{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999867178472287, "eval_steps": 500, "global_step": 56466, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7709537028428233e-05, "grad_norm": 19.739229202270508, "learning_rate": 1.7709563164108617e-08, "loss": 2.2781, "step": 1 }, { "epoch": 3.5419074056856467e-05, "grad_norm": 15.84681224822998, "learning_rate": 3.5419126328217235e-08, "loss": 2.2477, "step": 2 }, { "epoch": 5.31286110852847e-05, "grad_norm": 15.511771202087402, "learning_rate": 5.312868949232586e-08, "loss": 2.2286, "step": 3 }, { "epoch": 7.083814811371293e-05, "grad_norm": 14.167211532592773, "learning_rate": 7.083825265643447e-08, "loss": 2.223, "step": 4 }, { "epoch": 8.854768514214117e-05, "grad_norm": 13.162442207336426, "learning_rate": 8.85478158205431e-08, "loss": 2.1688, "step": 5 }, { "epoch": 0.0001062572221705694, "grad_norm": 19.040489196777344, "learning_rate": 1.0625737898465172e-07, "loss": 2.2942, "step": 6 }, { "epoch": 0.00012396675919899765, "grad_norm": 14.012434005737305, "learning_rate": 1.2396694214876034e-07, "loss": 2.2691, "step": 7 }, { "epoch": 0.00014167629622742587, "grad_norm": 14.043083190917969, "learning_rate": 1.4167650531286894e-07, "loss": 2.0859, "step": 8 }, { "epoch": 0.0001593858332558541, "grad_norm": 14.835147857666016, "learning_rate": 1.5938606847697757e-07, "loss": 2.1622, "step": 9 }, { "epoch": 0.00017709537028428235, "grad_norm": 15.412961959838867, "learning_rate": 1.770956316410862e-07, "loss": 2.2182, "step": 10 }, { "epoch": 0.00019480490731271059, "grad_norm": 16.22896385192871, "learning_rate": 1.9480519480519483e-07, "loss": 2.1574, "step": 11 }, { "epoch": 0.0002125144443411388, "grad_norm": 14.72508430480957, "learning_rate": 2.1251475796930344e-07, "loss": 2.1334, "step": 12 }, { "epoch": 0.00023022398136956704, "grad_norm": 14.418340682983398, "learning_rate": 2.3022432113341204e-07, "loss": 2.0927, "step": 13 }, { "epoch": 0.0002479335183979953, "grad_norm": 16.856901168823242, "learning_rate": 2.4793388429752067e-07, "loss": 2.2385, "step": 14 }, { "epoch": 0.0002656430554264235, "grad_norm": 12.675034523010254, "learning_rate": 2.656434474616293e-07, "loss": 2.2108, "step": 15 }, { "epoch": 0.00028335259245485173, "grad_norm": 13.113791465759277, "learning_rate": 2.833530106257379e-07, "loss": 2.1177, "step": 16 }, { "epoch": 0.00030106212948328, "grad_norm": 14.935504913330078, "learning_rate": 3.010625737898465e-07, "loss": 2.245, "step": 17 }, { "epoch": 0.0003187716665117082, "grad_norm": 13.57802677154541, "learning_rate": 3.1877213695395514e-07, "loss": 2.0212, "step": 18 }, { "epoch": 0.0003364812035401364, "grad_norm": 13.645076751708984, "learning_rate": 3.364817001180637e-07, "loss": 2.0775, "step": 19 }, { "epoch": 0.0003541907405685647, "grad_norm": 14.050622940063477, "learning_rate": 3.541912632821724e-07, "loss": 2.1778, "step": 20 }, { "epoch": 0.0003719002775969929, "grad_norm": 13.942513465881348, "learning_rate": 3.7190082644628103e-07, "loss": 2.2283, "step": 21 }, { "epoch": 0.00038960981462542117, "grad_norm": 13.610316276550293, "learning_rate": 3.8961038961038966e-07, "loss": 2.1111, "step": 22 }, { "epoch": 0.0004073193516538494, "grad_norm": 15.762763023376465, "learning_rate": 4.0731995277449824e-07, "loss": 2.1507, "step": 23 }, { "epoch": 0.0004250288886822776, "grad_norm": 14.590259552001953, "learning_rate": 4.2502951593860687e-07, "loss": 2.1095, "step": 24 }, { "epoch": 0.00044273842571070587, "grad_norm": 14.321306228637695, "learning_rate": 4.427390791027155e-07, "loss": 2.0575, "step": 25 }, { "epoch": 0.0004604479627391341, "grad_norm": 14.686238288879395, "learning_rate": 4.604486422668241e-07, "loss": 2.0118, "step": 26 }, { "epoch": 0.00047815749976756235, "grad_norm": 12.713171005249023, "learning_rate": 4.781582054309327e-07, "loss": 2.0851, "step": 27 }, { "epoch": 0.0004958670367959906, "grad_norm": 12.607099533081055, "learning_rate": 4.958677685950413e-07, "loss": 1.9747, "step": 28 }, { "epoch": 0.0005135765738244188, "grad_norm": 11.334381103515625, "learning_rate": 5.1357733175915e-07, "loss": 2.0636, "step": 29 }, { "epoch": 0.000531286110852847, "grad_norm": 13.812267303466797, "learning_rate": 5.312868949232586e-07, "loss": 1.9541, "step": 30 }, { "epoch": 0.0005489956478812753, "grad_norm": 13.900432586669922, "learning_rate": 5.489964580873671e-07, "loss": 2.0255, "step": 31 }, { "epoch": 0.0005667051849097035, "grad_norm": 11.75472354888916, "learning_rate": 5.667060212514758e-07, "loss": 1.934, "step": 32 }, { "epoch": 0.0005844147219381318, "grad_norm": 11.980932235717773, "learning_rate": 5.844155844155844e-07, "loss": 1.9332, "step": 33 }, { "epoch": 0.00060212425896656, "grad_norm": 10.460787773132324, "learning_rate": 6.02125147579693e-07, "loss": 1.8655, "step": 34 }, { "epoch": 0.0006198337959949882, "grad_norm": 10.629040718078613, "learning_rate": 6.198347107438017e-07, "loss": 1.9403, "step": 35 }, { "epoch": 0.0006375433330234164, "grad_norm": 10.02330207824707, "learning_rate": 6.375442739079103e-07, "loss": 1.8403, "step": 36 }, { "epoch": 0.0006552528700518446, "grad_norm": 10.462778091430664, "learning_rate": 6.552538370720189e-07, "loss": 1.9219, "step": 37 }, { "epoch": 0.0006729624070802729, "grad_norm": 9.715096473693848, "learning_rate": 6.729634002361274e-07, "loss": 1.7198, "step": 38 }, { "epoch": 0.0006906719441087012, "grad_norm": 10.04427433013916, "learning_rate": 6.906729634002361e-07, "loss": 1.6954, "step": 39 }, { "epoch": 0.0007083814811371294, "grad_norm": 9.301004409790039, "learning_rate": 7.083825265643448e-07, "loss": 1.7723, "step": 40 }, { "epoch": 0.0007260910181655576, "grad_norm": 7.490121841430664, "learning_rate": 7.260920897284534e-07, "loss": 1.6849, "step": 41 }, { "epoch": 0.0007438005551939858, "grad_norm": 8.967666625976562, "learning_rate": 7.438016528925621e-07, "loss": 1.8284, "step": 42 }, { "epoch": 0.000761510092222414, "grad_norm": 8.640925407409668, "learning_rate": 7.615112160566707e-07, "loss": 1.603, "step": 43 }, { "epoch": 0.0007792196292508423, "grad_norm": 9.44016170501709, "learning_rate": 7.792207792207793e-07, "loss": 1.634, "step": 44 }, { "epoch": 0.0007969291662792706, "grad_norm": 10.646740913391113, "learning_rate": 7.969303423848879e-07, "loss": 1.6182, "step": 45 }, { "epoch": 0.0008146387033076988, "grad_norm": 8.642082214355469, "learning_rate": 8.146399055489965e-07, "loss": 1.568, "step": 46 }, { "epoch": 0.000832348240336127, "grad_norm": 7.671570777893066, "learning_rate": 8.323494687131051e-07, "loss": 1.6198, "step": 47 }, { "epoch": 0.0008500577773645552, "grad_norm": 9.241918563842773, "learning_rate": 8.500590318772137e-07, "loss": 1.5174, "step": 48 }, { "epoch": 0.0008677673143929835, "grad_norm": 8.290735244750977, "learning_rate": 8.677685950413224e-07, "loss": 1.5158, "step": 49 }, { "epoch": 0.0008854768514214117, "grad_norm": 7.959954261779785, "learning_rate": 8.85478158205431e-07, "loss": 1.4829, "step": 50 }, { "epoch": 0.0009031863884498399, "grad_norm": 8.826253890991211, "learning_rate": 9.031877213695395e-07, "loss": 1.4767, "step": 51 }, { "epoch": 0.0009208959254782682, "grad_norm": 8.060569763183594, "learning_rate": 9.208972845336482e-07, "loss": 1.6214, "step": 52 }, { "epoch": 0.0009386054625066964, "grad_norm": 7.728250503540039, "learning_rate": 9.386068476977569e-07, "loss": 1.5862, "step": 53 }, { "epoch": 0.0009563149995351247, "grad_norm": 7.031571865081787, "learning_rate": 9.563164108618654e-07, "loss": 1.4721, "step": 54 }, { "epoch": 0.0009740245365635529, "grad_norm": 7.787759780883789, "learning_rate": 9.74025974025974e-07, "loss": 1.3596, "step": 55 }, { "epoch": 0.0009917340735919812, "grad_norm": 6.317153453826904, "learning_rate": 9.917355371900827e-07, "loss": 1.4428, "step": 56 }, { "epoch": 0.0010094436106204093, "grad_norm": 6.657107353210449, "learning_rate": 1.0094451003541912e-06, "loss": 1.3894, "step": 57 }, { "epoch": 0.0010271531476488377, "grad_norm": 6.670893669128418, "learning_rate": 1.0271546635183e-06, "loss": 1.4386, "step": 58 }, { "epoch": 0.0010448626846772658, "grad_norm": 6.024390697479248, "learning_rate": 1.0448642266824085e-06, "loss": 1.338, "step": 59 }, { "epoch": 0.001062572221705694, "grad_norm": 7.3216447830200195, "learning_rate": 1.0625737898465172e-06, "loss": 1.357, "step": 60 }, { "epoch": 0.0010802817587341224, "grad_norm": 6.036355018615723, "learning_rate": 1.0802833530106257e-06, "loss": 1.2982, "step": 61 }, { "epoch": 0.0010979912957625505, "grad_norm": 6.530137062072754, "learning_rate": 1.0979929161747343e-06, "loss": 1.3201, "step": 62 }, { "epoch": 0.0011157008327909788, "grad_norm": 6.2906174659729, "learning_rate": 1.115702479338843e-06, "loss": 1.1411, "step": 63 }, { "epoch": 0.001133410369819407, "grad_norm": 5.560519695281982, "learning_rate": 1.1334120425029515e-06, "loss": 1.3283, "step": 64 }, { "epoch": 0.0011511199068478352, "grad_norm": 5.856861591339111, "learning_rate": 1.1511216056670603e-06, "loss": 1.2309, "step": 65 }, { "epoch": 0.0011688294438762636, "grad_norm": 7.32969331741333, "learning_rate": 1.1688311688311688e-06, "loss": 1.1798, "step": 66 }, { "epoch": 0.0011865389809046917, "grad_norm": 6.199501037597656, "learning_rate": 1.1865407319952775e-06, "loss": 1.2152, "step": 67 }, { "epoch": 0.00120424851793312, "grad_norm": 5.323037147521973, "learning_rate": 1.204250295159386e-06, "loss": 1.1556, "step": 68 }, { "epoch": 0.001221958054961548, "grad_norm": 6.373168468475342, "learning_rate": 1.2219598583234946e-06, "loss": 1.1697, "step": 69 }, { "epoch": 0.0012396675919899764, "grad_norm": 5.707713603973389, "learning_rate": 1.2396694214876033e-06, "loss": 1.2199, "step": 70 }, { "epoch": 0.0012573771290184047, "grad_norm": 5.629158973693848, "learning_rate": 1.2573789846517118e-06, "loss": 1.2006, "step": 71 }, { "epoch": 0.0012750866660468328, "grad_norm": 6.118285655975342, "learning_rate": 1.2750885478158206e-06, "loss": 1.2409, "step": 72 }, { "epoch": 0.0012927962030752612, "grad_norm": 7.074717044830322, "learning_rate": 1.292798110979929e-06, "loss": 1.1845, "step": 73 }, { "epoch": 0.0013105057401036893, "grad_norm": 5.234448432922363, "learning_rate": 1.3105076741440378e-06, "loss": 1.1837, "step": 74 }, { "epoch": 0.0013282152771321176, "grad_norm": 5.6512370109558105, "learning_rate": 1.3282172373081464e-06, "loss": 1.1773, "step": 75 }, { "epoch": 0.0013459248141605457, "grad_norm": 5.976123809814453, "learning_rate": 1.3459268004722549e-06, "loss": 1.1565, "step": 76 }, { "epoch": 0.001363634351188974, "grad_norm": 5.916675090789795, "learning_rate": 1.3636363636363636e-06, "loss": 1.0426, "step": 77 }, { "epoch": 0.0013813438882174023, "grad_norm": 5.821821689605713, "learning_rate": 1.3813459268004721e-06, "loss": 1.1518, "step": 78 }, { "epoch": 0.0013990534252458304, "grad_norm": 5.166565895080566, "learning_rate": 1.3990554899645809e-06, "loss": 1.2095, "step": 79 }, { "epoch": 0.0014167629622742588, "grad_norm": 5.694742679595947, "learning_rate": 1.4167650531286896e-06, "loss": 1.022, "step": 80 }, { "epoch": 0.0014344724993026869, "grad_norm": 4.716714382171631, "learning_rate": 1.4344746162927983e-06, "loss": 1.098, "step": 81 }, { "epoch": 0.0014521820363311152, "grad_norm": 5.740160942077637, "learning_rate": 1.4521841794569069e-06, "loss": 1.0866, "step": 82 }, { "epoch": 0.0014698915733595435, "grad_norm": 5.325934410095215, "learning_rate": 1.4698937426210154e-06, "loss": 1.1084, "step": 83 }, { "epoch": 0.0014876011103879716, "grad_norm": 6.218843460083008, "learning_rate": 1.4876033057851241e-06, "loss": 1.2292, "step": 84 }, { "epoch": 0.0015053106474164, "grad_norm": 4.860445976257324, "learning_rate": 1.5053128689492327e-06, "loss": 1.1051, "step": 85 }, { "epoch": 0.001523020184444828, "grad_norm": 4.924854755401611, "learning_rate": 1.5230224321133414e-06, "loss": 1.0975, "step": 86 }, { "epoch": 0.0015407297214732564, "grad_norm": 5.222801208496094, "learning_rate": 1.54073199527745e-06, "loss": 1.0578, "step": 87 }, { "epoch": 0.0015584392585016847, "grad_norm": 5.786187171936035, "learning_rate": 1.5584415584415587e-06, "loss": 1.0661, "step": 88 }, { "epoch": 0.0015761487955301128, "grad_norm": 5.3542022705078125, "learning_rate": 1.5761511216056672e-06, "loss": 0.9591, "step": 89 }, { "epoch": 0.0015938583325585411, "grad_norm": 5.385840892791748, "learning_rate": 1.5938606847697757e-06, "loss": 1.0002, "step": 90 }, { "epoch": 0.0016115678695869692, "grad_norm": 5.187088966369629, "learning_rate": 1.6115702479338844e-06, "loss": 1.0354, "step": 91 }, { "epoch": 0.0016292774066153975, "grad_norm": 5.5410475730896, "learning_rate": 1.629279811097993e-06, "loss": 1.0262, "step": 92 }, { "epoch": 0.0016469869436438259, "grad_norm": 5.396633625030518, "learning_rate": 1.6469893742621017e-06, "loss": 1.0239, "step": 93 }, { "epoch": 0.001664696480672254, "grad_norm": 5.3420867919921875, "learning_rate": 1.6646989374262102e-06, "loss": 1.0589, "step": 94 }, { "epoch": 0.0016824060177006823, "grad_norm": 5.214301109313965, "learning_rate": 1.6824085005903188e-06, "loss": 0.9799, "step": 95 }, { "epoch": 0.0017001155547291104, "grad_norm": 4.46592378616333, "learning_rate": 1.7001180637544275e-06, "loss": 0.8847, "step": 96 }, { "epoch": 0.0017178250917575387, "grad_norm": 5.294605255126953, "learning_rate": 1.717827626918536e-06, "loss": 1.0541, "step": 97 }, { "epoch": 0.001735534628785967, "grad_norm": 4.611186981201172, "learning_rate": 1.7355371900826448e-06, "loss": 1.0477, "step": 98 }, { "epoch": 0.0017532441658143951, "grad_norm": 5.1175103187561035, "learning_rate": 1.7532467532467533e-06, "loss": 0.9266, "step": 99 }, { "epoch": 0.0017709537028428235, "grad_norm": 5.59054708480835, "learning_rate": 1.770956316410862e-06, "loss": 0.9337, "step": 100 }, { "epoch": 0.0017886632398712516, "grad_norm": 4.743609428405762, "learning_rate": 1.7886658795749705e-06, "loss": 0.9711, "step": 101 }, { "epoch": 0.0018063727768996799, "grad_norm": 5.4983110427856445, "learning_rate": 1.806375442739079e-06, "loss": 0.8728, "step": 102 }, { "epoch": 0.0018240823139281082, "grad_norm": 4.748778820037842, "learning_rate": 1.8240850059031878e-06, "loss": 0.911, "step": 103 }, { "epoch": 0.0018417918509565363, "grad_norm": 5.034358978271484, "learning_rate": 1.8417945690672963e-06, "loss": 0.8813, "step": 104 }, { "epoch": 0.0018595013879849646, "grad_norm": 4.79362154006958, "learning_rate": 1.859504132231405e-06, "loss": 0.9606, "step": 105 }, { "epoch": 0.0018772109250133927, "grad_norm": 5.511528015136719, "learning_rate": 1.8772136953955138e-06, "loss": 0.9606, "step": 106 }, { "epoch": 0.001894920462041821, "grad_norm": 5.463324069976807, "learning_rate": 1.8949232585596221e-06, "loss": 0.9679, "step": 107 }, { "epoch": 0.0019126299990702494, "grad_norm": 4.855085849761963, "learning_rate": 1.912632821723731e-06, "loss": 0.9487, "step": 108 }, { "epoch": 0.0019303395360986775, "grad_norm": 5.021084785461426, "learning_rate": 1.9303423848878396e-06, "loss": 0.9125, "step": 109 }, { "epoch": 0.0019480490731271058, "grad_norm": 4.070415496826172, "learning_rate": 1.948051948051948e-06, "loss": 0.8279, "step": 110 }, { "epoch": 0.001965758610155534, "grad_norm": 4.946351051330566, "learning_rate": 1.9657615112160566e-06, "loss": 0.8853, "step": 111 }, { "epoch": 0.0019834681471839625, "grad_norm": 4.549526691436768, "learning_rate": 1.9834710743801654e-06, "loss": 0.8492, "step": 112 }, { "epoch": 0.0020011776842123906, "grad_norm": 4.486638069152832, "learning_rate": 2.001180637544274e-06, "loss": 0.9351, "step": 113 }, { "epoch": 0.0020188872212408187, "grad_norm": 4.613684177398682, "learning_rate": 2.0188902007083824e-06, "loss": 0.8934, "step": 114 }, { "epoch": 0.0020365967582692468, "grad_norm": 5.119312286376953, "learning_rate": 2.036599763872491e-06, "loss": 0.8827, "step": 115 }, { "epoch": 0.0020543062952976753, "grad_norm": 5.398226737976074, "learning_rate": 2.0543093270366e-06, "loss": 0.8615, "step": 116 }, { "epoch": 0.0020720158323261034, "grad_norm": 4.82933235168457, "learning_rate": 2.072018890200708e-06, "loss": 0.8666, "step": 117 }, { "epoch": 0.0020897253693545315, "grad_norm": 5.166830539703369, "learning_rate": 2.089728453364817e-06, "loss": 0.7967, "step": 118 }, { "epoch": 0.00210743490638296, "grad_norm": 4.65884256362915, "learning_rate": 2.1074380165289257e-06, "loss": 0.9533, "step": 119 }, { "epoch": 0.002125144443411388, "grad_norm": 5.296113014221191, "learning_rate": 2.1251475796930344e-06, "loss": 0.8266, "step": 120 }, { "epoch": 0.0021428539804398163, "grad_norm": 4.054265022277832, "learning_rate": 2.1428571428571427e-06, "loss": 0.7759, "step": 121 }, { "epoch": 0.002160563517468245, "grad_norm": 4.403802871704102, "learning_rate": 2.1605667060212515e-06, "loss": 0.8353, "step": 122 }, { "epoch": 0.002178273054496673, "grad_norm": 4.017107009887695, "learning_rate": 2.17827626918536e-06, "loss": 0.7473, "step": 123 }, { "epoch": 0.002195982591525101, "grad_norm": 4.615809440612793, "learning_rate": 2.1959858323494685e-06, "loss": 0.8086, "step": 124 }, { "epoch": 0.002213692128553529, "grad_norm": 4.09709358215332, "learning_rate": 2.2136953955135773e-06, "loss": 0.7667, "step": 125 }, { "epoch": 0.0022314016655819576, "grad_norm": 4.607713222503662, "learning_rate": 2.231404958677686e-06, "loss": 0.8076, "step": 126 }, { "epoch": 0.0022491112026103858, "grad_norm": 4.8284196853637695, "learning_rate": 2.2491145218417947e-06, "loss": 0.7924, "step": 127 }, { "epoch": 0.002266820739638814, "grad_norm": 4.710180282592773, "learning_rate": 2.266824085005903e-06, "loss": 0.7644, "step": 128 }, { "epoch": 0.0022845302766672424, "grad_norm": 4.449590682983398, "learning_rate": 2.2845336481700118e-06, "loss": 0.8119, "step": 129 }, { "epoch": 0.0023022398136956705, "grad_norm": 5.348082065582275, "learning_rate": 2.3022432113341205e-06, "loss": 0.7978, "step": 130 }, { "epoch": 0.0023199493507240986, "grad_norm": 4.9466118812561035, "learning_rate": 2.319952774498229e-06, "loss": 0.8028, "step": 131 }, { "epoch": 0.002337658887752527, "grad_norm": 4.6952409744262695, "learning_rate": 2.3376623376623376e-06, "loss": 0.7905, "step": 132 }, { "epoch": 0.0023553684247809552, "grad_norm": 4.482764720916748, "learning_rate": 2.3553719008264463e-06, "loss": 0.7152, "step": 133 }, { "epoch": 0.0023730779618093834, "grad_norm": 5.522718906402588, "learning_rate": 2.373081463990555e-06, "loss": 0.7379, "step": 134 }, { "epoch": 0.0023907874988378115, "grad_norm": 5.42115592956543, "learning_rate": 2.3907910271546633e-06, "loss": 0.7836, "step": 135 }, { "epoch": 0.00240849703586624, "grad_norm": 4.680845260620117, "learning_rate": 2.408500590318772e-06, "loss": 0.8472, "step": 136 }, { "epoch": 0.002426206572894668, "grad_norm": 4.138347148895264, "learning_rate": 2.426210153482881e-06, "loss": 0.7056, "step": 137 }, { "epoch": 0.002443916109923096, "grad_norm": 4.883685111999512, "learning_rate": 2.443919716646989e-06, "loss": 0.746, "step": 138 }, { "epoch": 0.0024616256469515247, "grad_norm": 4.100607872009277, "learning_rate": 2.461629279811098e-06, "loss": 0.72, "step": 139 }, { "epoch": 0.002479335183979953, "grad_norm": 4.905400276184082, "learning_rate": 2.4793388429752066e-06, "loss": 0.7397, "step": 140 }, { "epoch": 0.002497044721008381, "grad_norm": 5.2684736251831055, "learning_rate": 2.4970484061393153e-06, "loss": 0.8023, "step": 141 }, { "epoch": 0.0025147542580368095, "grad_norm": 5.413041591644287, "learning_rate": 2.5147579693034237e-06, "loss": 0.7364, "step": 142 }, { "epoch": 0.0025324637950652376, "grad_norm": 5.319970607757568, "learning_rate": 2.5324675324675324e-06, "loss": 0.7446, "step": 143 }, { "epoch": 0.0025501733320936657, "grad_norm": 3.8883988857269287, "learning_rate": 2.550177095631641e-06, "loss": 0.7445, "step": 144 }, { "epoch": 0.002567882869122094, "grad_norm": 5.256938934326172, "learning_rate": 2.5678866587957494e-06, "loss": 0.7521, "step": 145 }, { "epoch": 0.0025855924061505223, "grad_norm": 6.334427833557129, "learning_rate": 2.585596221959858e-06, "loss": 0.8103, "step": 146 }, { "epoch": 0.0026033019431789504, "grad_norm": 5.348402976989746, "learning_rate": 2.603305785123967e-06, "loss": 0.7269, "step": 147 }, { "epoch": 0.0026210114802073785, "grad_norm": 4.51717472076416, "learning_rate": 2.6210153482880757e-06, "loss": 0.7147, "step": 148 }, { "epoch": 0.002638721017235807, "grad_norm": 4.620640754699707, "learning_rate": 2.638724911452184e-06, "loss": 0.7224, "step": 149 }, { "epoch": 0.002656430554264235, "grad_norm": 3.8978404998779297, "learning_rate": 2.6564344746162927e-06, "loss": 0.6571, "step": 150 }, { "epoch": 0.0026741400912926633, "grad_norm": 4.899637222290039, "learning_rate": 2.6741440377804014e-06, "loss": 0.7643, "step": 151 }, { "epoch": 0.0026918496283210914, "grad_norm": 5.499406814575195, "learning_rate": 2.6918536009445098e-06, "loss": 0.7622, "step": 152 }, { "epoch": 0.00270955916534952, "grad_norm": 4.681412696838379, "learning_rate": 2.7095631641086185e-06, "loss": 0.6814, "step": 153 }, { "epoch": 0.002727268702377948, "grad_norm": 4.523877143859863, "learning_rate": 2.7272727272727272e-06, "loss": 0.748, "step": 154 }, { "epoch": 0.002744978239406376, "grad_norm": 4.976410865783691, "learning_rate": 2.744982290436836e-06, "loss": 0.6572, "step": 155 }, { "epoch": 0.0027626877764348047, "grad_norm": 4.540420055389404, "learning_rate": 2.7626918536009443e-06, "loss": 0.6783, "step": 156 }, { "epoch": 0.002780397313463233, "grad_norm": 5.299781322479248, "learning_rate": 2.780401416765053e-06, "loss": 0.7191, "step": 157 }, { "epoch": 0.002798106850491661, "grad_norm": 4.200704574584961, "learning_rate": 2.7981109799291617e-06, "loss": 0.6349, "step": 158 }, { "epoch": 0.0028158163875200894, "grad_norm": 3.857712984085083, "learning_rate": 2.8158205430932705e-06, "loss": 0.6375, "step": 159 }, { "epoch": 0.0028335259245485175, "grad_norm": 5.092162609100342, "learning_rate": 2.8335301062573792e-06, "loss": 0.6834, "step": 160 }, { "epoch": 0.0028512354615769456, "grad_norm": 4.633670806884766, "learning_rate": 2.851239669421488e-06, "loss": 0.6712, "step": 161 }, { "epoch": 0.0028689449986053737, "grad_norm": 5.115223407745361, "learning_rate": 2.8689492325855967e-06, "loss": 0.6765, "step": 162 }, { "epoch": 0.0028866545356338023, "grad_norm": 4.1504058837890625, "learning_rate": 2.886658795749705e-06, "loss": 0.6373, "step": 163 }, { "epoch": 0.0029043640726622304, "grad_norm": 3.731731414794922, "learning_rate": 2.9043683589138137e-06, "loss": 0.6298, "step": 164 }, { "epoch": 0.0029220736096906585, "grad_norm": 4.508049964904785, "learning_rate": 2.9220779220779225e-06, "loss": 0.6146, "step": 165 }, { "epoch": 0.002939783146719087, "grad_norm": 4.729813098907471, "learning_rate": 2.939787485242031e-06, "loss": 0.6604, "step": 166 }, { "epoch": 0.002957492683747515, "grad_norm": 4.942739963531494, "learning_rate": 2.9574970484061395e-06, "loss": 0.6747, "step": 167 }, { "epoch": 0.0029752022207759432, "grad_norm": 4.775129795074463, "learning_rate": 2.9752066115702483e-06, "loss": 0.6886, "step": 168 }, { "epoch": 0.0029929117578043718, "grad_norm": 4.351351261138916, "learning_rate": 2.992916174734357e-06, "loss": 0.5941, "step": 169 }, { "epoch": 0.0030106212948328, "grad_norm": 4.636556625366211, "learning_rate": 3.0106257378984653e-06, "loss": 0.716, "step": 170 }, { "epoch": 0.003028330831861228, "grad_norm": 4.196171283721924, "learning_rate": 3.028335301062574e-06, "loss": 0.6936, "step": 171 }, { "epoch": 0.003046040368889656, "grad_norm": 4.432174205780029, "learning_rate": 3.046044864226683e-06, "loss": 0.6533, "step": 172 }, { "epoch": 0.0030637499059180846, "grad_norm": 4.744258880615234, "learning_rate": 3.063754427390791e-06, "loss": 0.5814, "step": 173 }, { "epoch": 0.0030814594429465127, "grad_norm": 3.9817054271698, "learning_rate": 3.0814639905549e-06, "loss": 0.5371, "step": 174 }, { "epoch": 0.003099168979974941, "grad_norm": 4.324345588684082, "learning_rate": 3.0991735537190086e-06, "loss": 0.6421, "step": 175 }, { "epoch": 0.0031168785170033694, "grad_norm": 4.503774642944336, "learning_rate": 3.1168831168831173e-06, "loss": 0.5998, "step": 176 }, { "epoch": 0.0031345880540317975, "grad_norm": 4.447761058807373, "learning_rate": 3.1345926800472256e-06, "loss": 0.6215, "step": 177 }, { "epoch": 0.0031522975910602256, "grad_norm": 5.07835054397583, "learning_rate": 3.1523022432113344e-06, "loss": 0.6335, "step": 178 }, { "epoch": 0.003170007128088654, "grad_norm": 4.894444465637207, "learning_rate": 3.170011806375443e-06, "loss": 0.6027, "step": 179 }, { "epoch": 0.0031877166651170822, "grad_norm": 4.950722694396973, "learning_rate": 3.1877213695395514e-06, "loss": 0.6263, "step": 180 }, { "epoch": 0.0032054262021455103, "grad_norm": 4.361475944519043, "learning_rate": 3.20543093270366e-06, "loss": 0.5925, "step": 181 }, { "epoch": 0.0032231357391739384, "grad_norm": 4.336880683898926, "learning_rate": 3.223140495867769e-06, "loss": 0.6246, "step": 182 }, { "epoch": 0.003240845276202367, "grad_norm": 3.9839630126953125, "learning_rate": 3.240850059031877e-06, "loss": 0.6315, "step": 183 }, { "epoch": 0.003258554813230795, "grad_norm": 4.537562847137451, "learning_rate": 3.258559622195986e-06, "loss": 0.6583, "step": 184 }, { "epoch": 0.003276264350259223, "grad_norm": 4.632065773010254, "learning_rate": 3.2762691853600947e-06, "loss": 0.649, "step": 185 }, { "epoch": 0.0032939738872876517, "grad_norm": 4.5564069747924805, "learning_rate": 3.2939787485242034e-06, "loss": 0.62, "step": 186 }, { "epoch": 0.00331168342431608, "grad_norm": 4.53033447265625, "learning_rate": 3.3116883116883117e-06, "loss": 0.578, "step": 187 }, { "epoch": 0.003329392961344508, "grad_norm": 4.219237804412842, "learning_rate": 3.3293978748524205e-06, "loss": 0.5431, "step": 188 }, { "epoch": 0.0033471024983729365, "grad_norm": 4.354885101318359, "learning_rate": 3.347107438016529e-06, "loss": 0.5869, "step": 189 }, { "epoch": 0.0033648120354013646, "grad_norm": 4.237489700317383, "learning_rate": 3.3648170011806375e-06, "loss": 0.5368, "step": 190 }, { "epoch": 0.0033825215724297927, "grad_norm": 4.441393852233887, "learning_rate": 3.3825265643447462e-06, "loss": 0.6446, "step": 191 }, { "epoch": 0.003400231109458221, "grad_norm": 4.229732036590576, "learning_rate": 3.400236127508855e-06, "loss": 0.5688, "step": 192 }, { "epoch": 0.0034179406464866493, "grad_norm": 4.546323776245117, "learning_rate": 3.4179456906729637e-06, "loss": 0.5711, "step": 193 }, { "epoch": 0.0034356501835150774, "grad_norm": 4.744283676147461, "learning_rate": 3.435655253837072e-06, "loss": 0.6098, "step": 194 }, { "epoch": 0.0034533597205435055, "grad_norm": 4.611690044403076, "learning_rate": 3.4533648170011808e-06, "loss": 0.5507, "step": 195 }, { "epoch": 0.003471069257571934, "grad_norm": 5.210282802581787, "learning_rate": 3.4710743801652895e-06, "loss": 0.6606, "step": 196 }, { "epoch": 0.003488778794600362, "grad_norm": 4.682610988616943, "learning_rate": 3.488783943329398e-06, "loss": 0.5897, "step": 197 }, { "epoch": 0.0035064883316287903, "grad_norm": 4.590041160583496, "learning_rate": 3.5064935064935066e-06, "loss": 0.5886, "step": 198 }, { "epoch": 0.003524197868657219, "grad_norm": 4.862149715423584, "learning_rate": 3.5242030696576153e-06, "loss": 0.5496, "step": 199 }, { "epoch": 0.003541907405685647, "grad_norm": 4.634613037109375, "learning_rate": 3.541912632821724e-06, "loss": 0.5736, "step": 200 }, { "epoch": 0.003559616942714075, "grad_norm": 4.640380859375, "learning_rate": 3.5596221959858323e-06, "loss": 0.5618, "step": 201 }, { "epoch": 0.003577326479742503, "grad_norm": 4.653122901916504, "learning_rate": 3.577331759149941e-06, "loss": 0.5448, "step": 202 }, { "epoch": 0.0035950360167709317, "grad_norm": 3.9752581119537354, "learning_rate": 3.59504132231405e-06, "loss": 0.5923, "step": 203 }, { "epoch": 0.0036127455537993598, "grad_norm": 6.206792831420898, "learning_rate": 3.612750885478158e-06, "loss": 0.5033, "step": 204 }, { "epoch": 0.003630455090827788, "grad_norm": 5.355326175689697, "learning_rate": 3.630460448642267e-06, "loss": 0.5318, "step": 205 }, { "epoch": 0.0036481646278562164, "grad_norm": 5.074928283691406, "learning_rate": 3.6481700118063756e-06, "loss": 0.5366, "step": 206 }, { "epoch": 0.0036658741648846445, "grad_norm": 4.5688395500183105, "learning_rate": 3.6658795749704843e-06, "loss": 0.5732, "step": 207 }, { "epoch": 0.0036835837019130726, "grad_norm": 4.258020401000977, "learning_rate": 3.6835891381345927e-06, "loss": 0.5411, "step": 208 }, { "epoch": 0.003701293238941501, "grad_norm": 3.7857308387756348, "learning_rate": 3.7012987012987014e-06, "loss": 0.5174, "step": 209 }, { "epoch": 0.0037190027759699293, "grad_norm": 4.629384994506836, "learning_rate": 3.71900826446281e-06, "loss": 0.5714, "step": 210 }, { "epoch": 0.0037367123129983574, "grad_norm": 4.244004249572754, "learning_rate": 3.7367178276269184e-06, "loss": 0.5382, "step": 211 }, { "epoch": 0.0037544218500267855, "grad_norm": 3.7344613075256348, "learning_rate": 3.7544273907910276e-06, "loss": 0.4777, "step": 212 }, { "epoch": 0.003772131387055214, "grad_norm": 4.90242338180542, "learning_rate": 3.772136953955136e-06, "loss": 0.6148, "step": 213 }, { "epoch": 0.003789840924083642, "grad_norm": 4.194389343261719, "learning_rate": 3.7898465171192442e-06, "loss": 0.5935, "step": 214 }, { "epoch": 0.0038075504611120702, "grad_norm": 5.073704719543457, "learning_rate": 3.8075560802833534e-06, "loss": 0.5931, "step": 215 }, { "epoch": 0.0038252599981404988, "grad_norm": 5.481753349304199, "learning_rate": 3.825265643447462e-06, "loss": 0.48, "step": 216 }, { "epoch": 0.003842969535168927, "grad_norm": 4.463499069213867, "learning_rate": 3.84297520661157e-06, "loss": 0.5986, "step": 217 }, { "epoch": 0.003860679072197355, "grad_norm": 5.315115928649902, "learning_rate": 3.860684769775679e-06, "loss": 0.5775, "step": 218 }, { "epoch": 0.0038783886092257835, "grad_norm": 4.524503707885742, "learning_rate": 3.8783943329397875e-06, "loss": 0.4862, "step": 219 }, { "epoch": 0.0038960981462542116, "grad_norm": 4.162378311157227, "learning_rate": 3.896103896103896e-06, "loss": 0.5333, "step": 220 }, { "epoch": 0.00391380768328264, "grad_norm": 4.242223262786865, "learning_rate": 3.913813459268005e-06, "loss": 0.5088, "step": 221 }, { "epoch": 0.003931517220311068, "grad_norm": 5.314175128936768, "learning_rate": 3.931523022432113e-06, "loss": 0.5845, "step": 222 }, { "epoch": 0.003949226757339496, "grad_norm": 4.3998260498046875, "learning_rate": 3.9492325855962224e-06, "loss": 0.5119, "step": 223 }, { "epoch": 0.003966936294367925, "grad_norm": 3.8959505558013916, "learning_rate": 3.966942148760331e-06, "loss": 0.4896, "step": 224 }, { "epoch": 0.003984645831396353, "grad_norm": 5.979248523712158, "learning_rate": 3.984651711924439e-06, "loss": 0.6755, "step": 225 }, { "epoch": 0.004002355368424781, "grad_norm": 4.318150043487549, "learning_rate": 4.002361275088548e-06, "loss": 0.4681, "step": 226 }, { "epoch": 0.004020064905453209, "grad_norm": 4.027387619018555, "learning_rate": 4.0200708382526565e-06, "loss": 0.5743, "step": 227 }, { "epoch": 0.004037774442481637, "grad_norm": 4.202825546264648, "learning_rate": 4.037780401416765e-06, "loss": 0.4754, "step": 228 }, { "epoch": 0.004055483979510065, "grad_norm": 3.441228151321411, "learning_rate": 4.055489964580874e-06, "loss": 0.4311, "step": 229 }, { "epoch": 0.0040731935165384935, "grad_norm": 5.612522125244141, "learning_rate": 4.073199527744982e-06, "loss": 0.5192, "step": 230 }, { "epoch": 0.0040909030535669225, "grad_norm": 4.250944137573242, "learning_rate": 4.090909090909091e-06, "loss": 0.5606, "step": 231 }, { "epoch": 0.004108612590595351, "grad_norm": 4.148431777954102, "learning_rate": 4.1086186540732e-06, "loss": 0.5203, "step": 232 }, { "epoch": 0.004126322127623779, "grad_norm": 4.602533340454102, "learning_rate": 4.126328217237308e-06, "loss": 0.5974, "step": 233 }, { "epoch": 0.004144031664652207, "grad_norm": 5.25504732131958, "learning_rate": 4.144037780401416e-06, "loss": 0.6013, "step": 234 }, { "epoch": 0.004161741201680635, "grad_norm": 5.135586738586426, "learning_rate": 4.1617473435655256e-06, "loss": 0.5718, "step": 235 }, { "epoch": 0.004179450738709063, "grad_norm": 4.912363052368164, "learning_rate": 4.179456906729634e-06, "loss": 0.5465, "step": 236 }, { "epoch": 0.004197160275737491, "grad_norm": 4.211596488952637, "learning_rate": 4.197166469893743e-06, "loss": 0.4148, "step": 237 }, { "epoch": 0.00421486981276592, "grad_norm": 4.972733020782471, "learning_rate": 4.214876033057851e-06, "loss": 0.4931, "step": 238 }, { "epoch": 0.004232579349794348, "grad_norm": 4.534011363983154, "learning_rate": 4.23258559622196e-06, "loss": 0.5546, "step": 239 }, { "epoch": 0.004250288886822776, "grad_norm": 4.6393513679504395, "learning_rate": 4.250295159386069e-06, "loss": 0.5404, "step": 240 }, { "epoch": 0.004267998423851204, "grad_norm": 5.780306816101074, "learning_rate": 4.268004722550177e-06, "loss": 0.5125, "step": 241 }, { "epoch": 0.0042857079608796325, "grad_norm": 4.689870834350586, "learning_rate": 4.2857142857142855e-06, "loss": 0.5709, "step": 242 }, { "epoch": 0.004303417497908061, "grad_norm": 6.299881935119629, "learning_rate": 4.303423848878395e-06, "loss": 0.5211, "step": 243 }, { "epoch": 0.00432112703493649, "grad_norm": 4.9436445236206055, "learning_rate": 4.321133412042503e-06, "loss": 0.4649, "step": 244 }, { "epoch": 0.004338836571964918, "grad_norm": 4.239124298095703, "learning_rate": 4.338842975206611e-06, "loss": 0.5224, "step": 245 }, { "epoch": 0.004356546108993346, "grad_norm": 5.246542930603027, "learning_rate": 4.35655253837072e-06, "loss": 0.487, "step": 246 }, { "epoch": 0.004374255646021774, "grad_norm": 4.220561981201172, "learning_rate": 4.374262101534829e-06, "loss": 0.4386, "step": 247 }, { "epoch": 0.004391965183050202, "grad_norm": 4.2034406661987305, "learning_rate": 4.391971664698937e-06, "loss": 0.4856, "step": 248 }, { "epoch": 0.00440967472007863, "grad_norm": 3.863534450531006, "learning_rate": 4.409681227863046e-06, "loss": 0.4737, "step": 249 }, { "epoch": 0.004427384257107058, "grad_norm": 4.434429168701172, "learning_rate": 4.4273907910271545e-06, "loss": 0.4582, "step": 250 }, { "epoch": 0.004445093794135487, "grad_norm": 4.096792221069336, "learning_rate": 4.445100354191263e-06, "loss": 0.4486, "step": 251 }, { "epoch": 0.004462803331163915, "grad_norm": 3.574784994125366, "learning_rate": 4.462809917355372e-06, "loss": 0.4543, "step": 252 }, { "epoch": 0.004480512868192343, "grad_norm": 4.734442710876465, "learning_rate": 4.48051948051948e-06, "loss": 0.5165, "step": 253 }, { "epoch": 0.0044982224052207715, "grad_norm": 5.7441277503967285, "learning_rate": 4.4982290436835895e-06, "loss": 0.5167, "step": 254 }, { "epoch": 0.0045159319422492, "grad_norm": 3.5251331329345703, "learning_rate": 4.515938606847698e-06, "loss": 0.4844, "step": 255 }, { "epoch": 0.004533641479277628, "grad_norm": 6.242320537567139, "learning_rate": 4.533648170011806e-06, "loss": 0.4492, "step": 256 }, { "epoch": 0.004551351016306056, "grad_norm": 3.763449192047119, "learning_rate": 4.551357733175915e-06, "loss": 0.4194, "step": 257 }, { "epoch": 0.004569060553334485, "grad_norm": 4.283092021942139, "learning_rate": 4.5690672963400236e-06, "loss": 0.4438, "step": 258 }, { "epoch": 0.004586770090362913, "grad_norm": 4.696651935577393, "learning_rate": 4.586776859504132e-06, "loss": 0.5108, "step": 259 }, { "epoch": 0.004604479627391341, "grad_norm": 4.217391490936279, "learning_rate": 4.604486422668241e-06, "loss": 0.4799, "step": 260 }, { "epoch": 0.004622189164419769, "grad_norm": 3.9510228633880615, "learning_rate": 4.622195985832349e-06, "loss": 0.4862, "step": 261 }, { "epoch": 0.004639898701448197, "grad_norm": 4.303753852844238, "learning_rate": 4.639905548996458e-06, "loss": 0.4399, "step": 262 }, { "epoch": 0.004657608238476625, "grad_norm": 4.228268623352051, "learning_rate": 4.657615112160567e-06, "loss": 0.4784, "step": 263 }, { "epoch": 0.004675317775505054, "grad_norm": 3.5818989276885986, "learning_rate": 4.675324675324675e-06, "loss": 0.5006, "step": 264 }, { "epoch": 0.004693027312533482, "grad_norm": 4.059908390045166, "learning_rate": 4.6930342384887834e-06, "loss": 0.4234, "step": 265 }, { "epoch": 0.0047107368495619105, "grad_norm": 4.4708123207092285, "learning_rate": 4.710743801652893e-06, "loss": 0.3845, "step": 266 }, { "epoch": 0.004728446386590339, "grad_norm": 3.91793155670166, "learning_rate": 4.728453364817001e-06, "loss": 0.4752, "step": 267 }, { "epoch": 0.004746155923618767, "grad_norm": 4.043045997619629, "learning_rate": 4.74616292798111e-06, "loss": 0.4742, "step": 268 }, { "epoch": 0.004763865460647195, "grad_norm": 3.3903756141662598, "learning_rate": 4.763872491145218e-06, "loss": 0.4216, "step": 269 }, { "epoch": 0.004781574997675623, "grad_norm": 4.567605495452881, "learning_rate": 4.781582054309327e-06, "loss": 0.4709, "step": 270 }, { "epoch": 0.004799284534704052, "grad_norm": 4.651115417480469, "learning_rate": 4.799291617473436e-06, "loss": 0.5103, "step": 271 }, { "epoch": 0.00481699407173248, "grad_norm": 5.103347301483154, "learning_rate": 4.817001180637544e-06, "loss": 0.4093, "step": 272 }, { "epoch": 0.004834703608760908, "grad_norm": 4.570180416107178, "learning_rate": 4.8347107438016525e-06, "loss": 0.4573, "step": 273 }, { "epoch": 0.004852413145789336, "grad_norm": 3.547973871231079, "learning_rate": 4.852420306965762e-06, "loss": 0.431, "step": 274 }, { "epoch": 0.004870122682817764, "grad_norm": 3.797386646270752, "learning_rate": 4.87012987012987e-06, "loss": 0.4397, "step": 275 }, { "epoch": 0.004887832219846192, "grad_norm": 3.7837769985198975, "learning_rate": 4.887839433293978e-06, "loss": 0.5073, "step": 276 }, { "epoch": 0.0049055417568746205, "grad_norm": 3.6770901679992676, "learning_rate": 4.9055489964580874e-06, "loss": 0.4176, "step": 277 }, { "epoch": 0.0049232512939030495, "grad_norm": 5.331933498382568, "learning_rate": 4.923258559622196e-06, "loss": 0.51, "step": 278 }, { "epoch": 0.004940960830931478, "grad_norm": 3.957670211791992, "learning_rate": 4.940968122786304e-06, "loss": 0.3566, "step": 279 }, { "epoch": 0.004958670367959906, "grad_norm": 4.068375587463379, "learning_rate": 4.958677685950413e-06, "loss": 0.4383, "step": 280 }, { "epoch": 0.004976379904988334, "grad_norm": 4.2856059074401855, "learning_rate": 4.9763872491145215e-06, "loss": 0.4485, "step": 281 }, { "epoch": 0.004994089442016762, "grad_norm": 4.067772388458252, "learning_rate": 4.994096812278631e-06, "loss": 0.422, "step": 282 }, { "epoch": 0.00501179897904519, "grad_norm": 4.916604995727539, "learning_rate": 5.011806375442739e-06, "loss": 0.5078, "step": 283 }, { "epoch": 0.005029508516073619, "grad_norm": 4.192014217376709, "learning_rate": 5.029515938606847e-06, "loss": 0.4957, "step": 284 }, { "epoch": 0.005047218053102047, "grad_norm": 4.309875965118408, "learning_rate": 5.0472255017709565e-06, "loss": 0.467, "step": 285 }, { "epoch": 0.005064927590130475, "grad_norm": 4.0538434982299805, "learning_rate": 5.064935064935065e-06, "loss": 0.4861, "step": 286 }, { "epoch": 0.005082637127158903, "grad_norm": 4.1516947746276855, "learning_rate": 5.082644628099173e-06, "loss": 0.4334, "step": 287 }, { "epoch": 0.005100346664187331, "grad_norm": 4.338875770568848, "learning_rate": 5.100354191263282e-06, "loss": 0.4519, "step": 288 }, { "epoch": 0.0051180562012157595, "grad_norm": 3.7111563682556152, "learning_rate": 5.1180637544273906e-06, "loss": 0.4404, "step": 289 }, { "epoch": 0.005135765738244188, "grad_norm": 4.164034366607666, "learning_rate": 5.135773317591499e-06, "loss": 0.4228, "step": 290 }, { "epoch": 0.005153475275272617, "grad_norm": 4.5167999267578125, "learning_rate": 5.153482880755608e-06, "loss": 0.4463, "step": 291 }, { "epoch": 0.005171184812301045, "grad_norm": 4.352184772491455, "learning_rate": 5.171192443919716e-06, "loss": 0.3875, "step": 292 }, { "epoch": 0.005188894349329473, "grad_norm": 4.204676628112793, "learning_rate": 5.188902007083825e-06, "loss": 0.4756, "step": 293 }, { "epoch": 0.005206603886357901, "grad_norm": 5.798733234405518, "learning_rate": 5.206611570247934e-06, "loss": 0.5357, "step": 294 }, { "epoch": 0.005224313423386329, "grad_norm": 4.450980186462402, "learning_rate": 5.224321133412042e-06, "loss": 0.4196, "step": 295 }, { "epoch": 0.005242022960414757, "grad_norm": 4.849542617797852, "learning_rate": 5.242030696576151e-06, "loss": 0.4157, "step": 296 }, { "epoch": 0.005259732497443185, "grad_norm": 4.19887113571167, "learning_rate": 5.25974025974026e-06, "loss": 0.5304, "step": 297 }, { "epoch": 0.005277442034471614, "grad_norm": 5.330066680908203, "learning_rate": 5.277449822904368e-06, "loss": 0.5042, "step": 298 }, { "epoch": 0.005295151571500042, "grad_norm": 4.679723739624023, "learning_rate": 5.295159386068477e-06, "loss": 0.398, "step": 299 }, { "epoch": 0.00531286110852847, "grad_norm": 4.680940628051758, "learning_rate": 5.312868949232585e-06, "loss": 0.4598, "step": 300 }, { "epoch": 0.0053305706455568985, "grad_norm": 3.9143331050872803, "learning_rate": 5.330578512396694e-06, "loss": 0.4656, "step": 301 }, { "epoch": 0.005348280182585327, "grad_norm": 5.376965045928955, "learning_rate": 5.348288075560803e-06, "loss": 0.3997, "step": 302 }, { "epoch": 0.005365989719613755, "grad_norm": 4.211335182189941, "learning_rate": 5.365997638724911e-06, "loss": 0.4618, "step": 303 }, { "epoch": 0.005383699256642183, "grad_norm": 4.112772464752197, "learning_rate": 5.3837072018890195e-06, "loss": 0.4082, "step": 304 }, { "epoch": 0.005401408793670612, "grad_norm": 4.834102153778076, "learning_rate": 5.401416765053129e-06, "loss": 0.453, "step": 305 }, { "epoch": 0.00541911833069904, "grad_norm": 3.675689935684204, "learning_rate": 5.419126328217237e-06, "loss": 0.3818, "step": 306 }, { "epoch": 0.005436827867727468, "grad_norm": 4.332947254180908, "learning_rate": 5.436835891381345e-06, "loss": 0.4197, "step": 307 }, { "epoch": 0.005454537404755896, "grad_norm": 4.537837028503418, "learning_rate": 5.4545454545454545e-06, "loss": 0.405, "step": 308 }, { "epoch": 0.005472246941784324, "grad_norm": 4.358576774597168, "learning_rate": 5.472255017709563e-06, "loss": 0.4374, "step": 309 }, { "epoch": 0.005489956478812752, "grad_norm": 5.204386234283447, "learning_rate": 5.489964580873672e-06, "loss": 0.4744, "step": 310 }, { "epoch": 0.005507666015841181, "grad_norm": 4.442570686340332, "learning_rate": 5.50767414403778e-06, "loss": 0.4515, "step": 311 }, { "epoch": 0.005525375552869609, "grad_norm": 3.3154032230377197, "learning_rate": 5.5253837072018886e-06, "loss": 0.3902, "step": 312 }, { "epoch": 0.0055430850898980375, "grad_norm": 4.3279709815979, "learning_rate": 5.543093270365998e-06, "loss": 0.4191, "step": 313 }, { "epoch": 0.005560794626926466, "grad_norm": 4.834677696228027, "learning_rate": 5.560802833530106e-06, "loss": 0.3949, "step": 314 }, { "epoch": 0.005578504163954894, "grad_norm": 3.7082629203796387, "learning_rate": 5.578512396694214e-06, "loss": 0.4135, "step": 315 }, { "epoch": 0.005596213700983322, "grad_norm": 3.5873665809631348, "learning_rate": 5.5962219598583235e-06, "loss": 0.3974, "step": 316 }, { "epoch": 0.00561392323801175, "grad_norm": 4.048012733459473, "learning_rate": 5.613931523022432e-06, "loss": 0.4502, "step": 317 }, { "epoch": 0.005631632775040179, "grad_norm": 4.139986515045166, "learning_rate": 5.631641086186541e-06, "loss": 0.4531, "step": 318 }, { "epoch": 0.005649342312068607, "grad_norm": 4.311934471130371, "learning_rate": 5.64935064935065e-06, "loss": 0.4819, "step": 319 }, { "epoch": 0.005667051849097035, "grad_norm": 4.010392189025879, "learning_rate": 5.6670602125147584e-06, "loss": 0.4934, "step": 320 }, { "epoch": 0.005684761386125463, "grad_norm": 4.497403621673584, "learning_rate": 5.684769775678867e-06, "loss": 0.38, "step": 321 }, { "epoch": 0.005702470923153891, "grad_norm": 3.7809085845947266, "learning_rate": 5.702479338842976e-06, "loss": 0.4232, "step": 322 }, { "epoch": 0.005720180460182319, "grad_norm": 3.281883478164673, "learning_rate": 5.720188902007084e-06, "loss": 0.3616, "step": 323 }, { "epoch": 0.0057378899972107475, "grad_norm": 5.347336292266846, "learning_rate": 5.737898465171193e-06, "loss": 0.4456, "step": 324 }, { "epoch": 0.0057555995342391765, "grad_norm": 4.645716190338135, "learning_rate": 5.755608028335302e-06, "loss": 0.439, "step": 325 }, { "epoch": 0.005773309071267605, "grad_norm": 4.307283401489258, "learning_rate": 5.77331759149941e-06, "loss": 0.4221, "step": 326 }, { "epoch": 0.005791018608296033, "grad_norm": 4.990793704986572, "learning_rate": 5.791027154663519e-06, "loss": 0.4179, "step": 327 }, { "epoch": 0.005808728145324461, "grad_norm": 4.872641563415527, "learning_rate": 5.8087367178276275e-06, "loss": 0.4852, "step": 328 }, { "epoch": 0.005826437682352889, "grad_norm": 4.485213756561279, "learning_rate": 5.826446280991736e-06, "loss": 0.3661, "step": 329 }, { "epoch": 0.005844147219381317, "grad_norm": 4.372236728668213, "learning_rate": 5.844155844155845e-06, "loss": 0.4122, "step": 330 }, { "epoch": 0.005861856756409746, "grad_norm": 4.422037601470947, "learning_rate": 5.861865407319953e-06, "loss": 0.4312, "step": 331 }, { "epoch": 0.005879566293438174, "grad_norm": 3.449848175048828, "learning_rate": 5.879574970484062e-06, "loss": 0.4413, "step": 332 }, { "epoch": 0.005897275830466602, "grad_norm": 4.391119956970215, "learning_rate": 5.897284533648171e-06, "loss": 0.4606, "step": 333 }, { "epoch": 0.00591498536749503, "grad_norm": 4.363178253173828, "learning_rate": 5.914994096812279e-06, "loss": 0.3538, "step": 334 }, { "epoch": 0.005932694904523458, "grad_norm": 4.586402893066406, "learning_rate": 5.932703659976387e-06, "loss": 0.4466, "step": 335 }, { "epoch": 0.0059504044415518865, "grad_norm": 3.392624855041504, "learning_rate": 5.9504132231404965e-06, "loss": 0.3736, "step": 336 }, { "epoch": 0.005968113978580315, "grad_norm": 3.640761375427246, "learning_rate": 5.968122786304605e-06, "loss": 0.3643, "step": 337 }, { "epoch": 0.0059858235156087436, "grad_norm": 4.031898498535156, "learning_rate": 5.985832349468714e-06, "loss": 0.4285, "step": 338 }, { "epoch": 0.006003533052637172, "grad_norm": 3.8163459300994873, "learning_rate": 6.003541912632822e-06, "loss": 0.3717, "step": 339 }, { "epoch": 0.0060212425896656, "grad_norm": 3.3648884296417236, "learning_rate": 6.021251475796931e-06, "loss": 0.3328, "step": 340 }, { "epoch": 0.006038952126694028, "grad_norm": 3.7447891235351562, "learning_rate": 6.03896103896104e-06, "loss": 0.304, "step": 341 }, { "epoch": 0.006056661663722456, "grad_norm": 3.731780529022217, "learning_rate": 6.056670602125148e-06, "loss": 0.3563, "step": 342 }, { "epoch": 0.006074371200750884, "grad_norm": 4.6880388259887695, "learning_rate": 6.074380165289256e-06, "loss": 0.4254, "step": 343 }, { "epoch": 0.006092080737779312, "grad_norm": 4.031008243560791, "learning_rate": 6.092089728453366e-06, "loss": 0.4223, "step": 344 }, { "epoch": 0.006109790274807741, "grad_norm": 3.5578808784484863, "learning_rate": 6.109799291617474e-06, "loss": 0.3786, "step": 345 }, { "epoch": 0.006127499811836169, "grad_norm": 3.9119343757629395, "learning_rate": 6.127508854781582e-06, "loss": 0.3979, "step": 346 }, { "epoch": 0.006145209348864597, "grad_norm": 3.8535077571868896, "learning_rate": 6.145218417945691e-06, "loss": 0.471, "step": 347 }, { "epoch": 0.0061629188858930255, "grad_norm": 4.139798641204834, "learning_rate": 6.1629279811098e-06, "loss": 0.4022, "step": 348 }, { "epoch": 0.006180628422921454, "grad_norm": 3.9747626781463623, "learning_rate": 6.180637544273908e-06, "loss": 0.3809, "step": 349 }, { "epoch": 0.006198337959949882, "grad_norm": 4.534278392791748, "learning_rate": 6.198347107438017e-06, "loss": 0.3325, "step": 350 }, { "epoch": 0.006216047496978311, "grad_norm": 3.908843755722046, "learning_rate": 6.2160566706021255e-06, "loss": 0.3612, "step": 351 }, { "epoch": 0.006233757034006739, "grad_norm": 3.850062370300293, "learning_rate": 6.233766233766235e-06, "loss": 0.3603, "step": 352 }, { "epoch": 0.006251466571035167, "grad_norm": 4.600862979888916, "learning_rate": 6.251475796930343e-06, "loss": 0.3824, "step": 353 }, { "epoch": 0.006269176108063595, "grad_norm": 3.9684720039367676, "learning_rate": 6.269185360094451e-06, "loss": 0.4739, "step": 354 }, { "epoch": 0.006286885645092023, "grad_norm": 3.8911266326904297, "learning_rate": 6.28689492325856e-06, "loss": 0.3883, "step": 355 }, { "epoch": 0.006304595182120451, "grad_norm": 4.029200553894043, "learning_rate": 6.304604486422669e-06, "loss": 0.3782, "step": 356 }, { "epoch": 0.006322304719148879, "grad_norm": 3.8787455558776855, "learning_rate": 6.322314049586777e-06, "loss": 0.3825, "step": 357 }, { "epoch": 0.006340014256177308, "grad_norm": 5.138105392456055, "learning_rate": 6.340023612750886e-06, "loss": 0.4076, "step": 358 }, { "epoch": 0.006357723793205736, "grad_norm": 4.304817199707031, "learning_rate": 6.3577331759149945e-06, "loss": 0.4087, "step": 359 }, { "epoch": 0.0063754333302341645, "grad_norm": 3.748053789138794, "learning_rate": 6.375442739079103e-06, "loss": 0.3942, "step": 360 }, { "epoch": 0.0063931428672625926, "grad_norm": 3.5275371074676514, "learning_rate": 6.393152302243212e-06, "loss": 0.3011, "step": 361 }, { "epoch": 0.006410852404291021, "grad_norm": 4.233658790588379, "learning_rate": 6.41086186540732e-06, "loss": 0.3674, "step": 362 }, { "epoch": 0.006428561941319449, "grad_norm": 4.101852893829346, "learning_rate": 6.428571428571429e-06, "loss": 0.3736, "step": 363 }, { "epoch": 0.006446271478347877, "grad_norm": 3.4169909954071045, "learning_rate": 6.446280991735538e-06, "loss": 0.3578, "step": 364 }, { "epoch": 0.006463981015376306, "grad_norm": 5.230625152587891, "learning_rate": 6.463990554899646e-06, "loss": 0.4564, "step": 365 }, { "epoch": 0.006481690552404734, "grad_norm": 2.645723581314087, "learning_rate": 6.481700118063754e-06, "loss": 0.3646, "step": 366 }, { "epoch": 0.006499400089433162, "grad_norm": 3.5762650966644287, "learning_rate": 6.4994096812278636e-06, "loss": 0.3555, "step": 367 }, { "epoch": 0.00651710962646159, "grad_norm": 3.517948865890503, "learning_rate": 6.517119244391972e-06, "loss": 0.4258, "step": 368 }, { "epoch": 0.006534819163490018, "grad_norm": 3.931680917739868, "learning_rate": 6.534828807556081e-06, "loss": 0.3962, "step": 369 }, { "epoch": 0.006552528700518446, "grad_norm": 4.584248065948486, "learning_rate": 6.552538370720189e-06, "loss": 0.3586, "step": 370 }, { "epoch": 0.006570238237546875, "grad_norm": 4.446023941040039, "learning_rate": 6.570247933884298e-06, "loss": 0.3672, "step": 371 }, { "epoch": 0.0065879477745753035, "grad_norm": 3.492002010345459, "learning_rate": 6.587957497048407e-06, "loss": 0.3897, "step": 372 }, { "epoch": 0.0066056573116037316, "grad_norm": 4.364515781402588, "learning_rate": 6.605667060212515e-06, "loss": 0.4168, "step": 373 }, { "epoch": 0.00662336684863216, "grad_norm": 3.9480397701263428, "learning_rate": 6.6233766233766234e-06, "loss": 0.4044, "step": 374 }, { "epoch": 0.006641076385660588, "grad_norm": 4.331887722015381, "learning_rate": 6.641086186540733e-06, "loss": 0.4064, "step": 375 }, { "epoch": 0.006658785922689016, "grad_norm": 5.124655723571777, "learning_rate": 6.658795749704841e-06, "loss": 0.3995, "step": 376 }, { "epoch": 0.006676495459717444, "grad_norm": 4.387752532958984, "learning_rate": 6.676505312868949e-06, "loss": 0.4261, "step": 377 }, { "epoch": 0.006694204996745873, "grad_norm": 4.282907485961914, "learning_rate": 6.694214876033058e-06, "loss": 0.4183, "step": 378 }, { "epoch": 0.006711914533774301, "grad_norm": 3.6495323181152344, "learning_rate": 6.711924439197167e-06, "loss": 0.3664, "step": 379 }, { "epoch": 0.006729624070802729, "grad_norm": 5.611471176147461, "learning_rate": 6.729634002361275e-06, "loss": 0.3981, "step": 380 }, { "epoch": 0.006747333607831157, "grad_norm": 4.648621559143066, "learning_rate": 6.747343565525384e-06, "loss": 0.3979, "step": 381 }, { "epoch": 0.006765043144859585, "grad_norm": 3.232480764389038, "learning_rate": 6.7650531286894925e-06, "loss": 0.3478, "step": 382 }, { "epoch": 0.0067827526818880135, "grad_norm": 4.257722854614258, "learning_rate": 6.782762691853602e-06, "loss": 0.3783, "step": 383 }, { "epoch": 0.006800462218916442, "grad_norm": 2.8525073528289795, "learning_rate": 6.80047225501771e-06, "loss": 0.276, "step": 384 }, { "epoch": 0.0068181717559448705, "grad_norm": 3.703336238861084, "learning_rate": 6.818181818181818e-06, "loss": 0.3478, "step": 385 }, { "epoch": 0.006835881292973299, "grad_norm": 3.03556227684021, "learning_rate": 6.8358913813459274e-06, "loss": 0.3511, "step": 386 }, { "epoch": 0.006853590830001727, "grad_norm": 5.144900321960449, "learning_rate": 6.853600944510036e-06, "loss": 0.4065, "step": 387 }, { "epoch": 0.006871300367030155, "grad_norm": 4.583066940307617, "learning_rate": 6.871310507674144e-06, "loss": 0.4139, "step": 388 }, { "epoch": 0.006889009904058583, "grad_norm": 3.783829689025879, "learning_rate": 6.889020070838253e-06, "loss": 0.3704, "step": 389 }, { "epoch": 0.006906719441087011, "grad_norm": 3.546769380569458, "learning_rate": 6.9067296340023615e-06, "loss": 0.4206, "step": 390 }, { "epoch": 0.00692442897811544, "grad_norm": 4.942490577697754, "learning_rate": 6.92443919716647e-06, "loss": 0.3883, "step": 391 }, { "epoch": 0.006942138515143868, "grad_norm": 4.083221912384033, "learning_rate": 6.942148760330579e-06, "loss": 0.3636, "step": 392 }, { "epoch": 0.006959848052172296, "grad_norm": 4.133819580078125, "learning_rate": 6.959858323494687e-06, "loss": 0.3908, "step": 393 }, { "epoch": 0.006977557589200724, "grad_norm": 4.228703498840332, "learning_rate": 6.977567886658796e-06, "loss": 0.3694, "step": 394 }, { "epoch": 0.0069952671262291525, "grad_norm": 4.500380039215088, "learning_rate": 6.995277449822905e-06, "loss": 0.3606, "step": 395 }, { "epoch": 0.0070129766632575806, "grad_norm": 3.80035400390625, "learning_rate": 7.012987012987013e-06, "loss": 0.3565, "step": 396 }, { "epoch": 0.007030686200286009, "grad_norm": 4.2072978019714355, "learning_rate": 7.030696576151122e-06, "loss": 0.3834, "step": 397 }, { "epoch": 0.007048395737314438, "grad_norm": 4.474222660064697, "learning_rate": 7.048406139315231e-06, "loss": 0.3685, "step": 398 }, { "epoch": 0.007066105274342866, "grad_norm": 4.010639667510986, "learning_rate": 7.066115702479339e-06, "loss": 0.3865, "step": 399 }, { "epoch": 0.007083814811371294, "grad_norm": 3.8165857791900635, "learning_rate": 7.083825265643448e-06, "loss": 0.3441, "step": 400 }, { "epoch": 0.007101524348399722, "grad_norm": 4.709686756134033, "learning_rate": 7.101534828807556e-06, "loss": 0.3933, "step": 401 }, { "epoch": 0.00711923388542815, "grad_norm": 4.211243629455566, "learning_rate": 7.119244391971665e-06, "loss": 0.4029, "step": 402 }, { "epoch": 0.007136943422456578, "grad_norm": 4.50501823425293, "learning_rate": 7.136953955135774e-06, "loss": 0.4013, "step": 403 }, { "epoch": 0.007154652959485006, "grad_norm": 3.264988660812378, "learning_rate": 7.154663518299882e-06, "loss": 0.3911, "step": 404 }, { "epoch": 0.007172362496513435, "grad_norm": 3.456624984741211, "learning_rate": 7.1723730814639905e-06, "loss": 0.3467, "step": 405 }, { "epoch": 0.007190072033541863, "grad_norm": 3.8943302631378174, "learning_rate": 7.1900826446281e-06, "loss": 0.3584, "step": 406 }, { "epoch": 0.0072077815705702914, "grad_norm": 3.031559705734253, "learning_rate": 7.207792207792208e-06, "loss": 0.3534, "step": 407 }, { "epoch": 0.0072254911075987195, "grad_norm": 3.732365846633911, "learning_rate": 7.225501770956316e-06, "loss": 0.302, "step": 408 }, { "epoch": 0.007243200644627148, "grad_norm": 4.715466499328613, "learning_rate": 7.243211334120425e-06, "loss": 0.3956, "step": 409 }, { "epoch": 0.007260910181655576, "grad_norm": 5.629728317260742, "learning_rate": 7.260920897284534e-06, "loss": 0.4469, "step": 410 }, { "epoch": 0.007278619718684005, "grad_norm": 4.140046119689941, "learning_rate": 7.278630460448643e-06, "loss": 0.3856, "step": 411 }, { "epoch": 0.007296329255712433, "grad_norm": 4.220736503601074, "learning_rate": 7.296340023612751e-06, "loss": 0.3639, "step": 412 }, { "epoch": 0.007314038792740861, "grad_norm": 2.8911848068237305, "learning_rate": 7.3140495867768595e-06, "loss": 0.3673, "step": 413 }, { "epoch": 0.007331748329769289, "grad_norm": 4.056469917297363, "learning_rate": 7.331759149940969e-06, "loss": 0.3085, "step": 414 }, { "epoch": 0.007349457866797717, "grad_norm": 4.975326061248779, "learning_rate": 7.349468713105077e-06, "loss": 0.3759, "step": 415 }, { "epoch": 0.007367167403826145, "grad_norm": 3.3136887550354004, "learning_rate": 7.367178276269185e-06, "loss": 0.362, "step": 416 }, { "epoch": 0.007384876940854573, "grad_norm": 3.1191229820251465, "learning_rate": 7.3848878394332945e-06, "loss": 0.3401, "step": 417 }, { "epoch": 0.007402586477883002, "grad_norm": 4.193265914916992, "learning_rate": 7.402597402597403e-06, "loss": 0.2984, "step": 418 }, { "epoch": 0.00742029601491143, "grad_norm": 4.364076137542725, "learning_rate": 7.420306965761511e-06, "loss": 0.4044, "step": 419 }, { "epoch": 0.0074380055519398585, "grad_norm": 4.326837062835693, "learning_rate": 7.43801652892562e-06, "loss": 0.4047, "step": 420 }, { "epoch": 0.007455715088968287, "grad_norm": 3.4538941383361816, "learning_rate": 7.4557260920897286e-06, "loss": 0.2959, "step": 421 }, { "epoch": 0.007473424625996715, "grad_norm": 3.985891580581665, "learning_rate": 7.473435655253837e-06, "loss": 0.3815, "step": 422 }, { "epoch": 0.007491134163025143, "grad_norm": 3.982536792755127, "learning_rate": 7.491145218417946e-06, "loss": 0.378, "step": 423 }, { "epoch": 0.007508843700053571, "grad_norm": 3.631007671356201, "learning_rate": 7.508854781582055e-06, "loss": 0.3482, "step": 424 }, { "epoch": 0.007526553237082, "grad_norm": 4.199782371520996, "learning_rate": 7.5265643447461635e-06, "loss": 0.3724, "step": 425 }, { "epoch": 0.007544262774110428, "grad_norm": 3.3629870414733887, "learning_rate": 7.544273907910272e-06, "loss": 0.3122, "step": 426 }, { "epoch": 0.007561972311138856, "grad_norm": 4.510176658630371, "learning_rate": 7.56198347107438e-06, "loss": 0.432, "step": 427 }, { "epoch": 0.007579681848167284, "grad_norm": 3.6941421031951904, "learning_rate": 7.5796930342384884e-06, "loss": 0.3063, "step": 428 }, { "epoch": 0.007597391385195712, "grad_norm": 3.8394408226013184, "learning_rate": 7.597402597402597e-06, "loss": 0.3188, "step": 429 }, { "epoch": 0.0076151009222241405, "grad_norm": 3.301008939743042, "learning_rate": 7.615112160566707e-06, "loss": 0.3732, "step": 430 }, { "epoch": 0.007632810459252569, "grad_norm": 3.1187171936035156, "learning_rate": 7.632821723730815e-06, "loss": 0.3377, "step": 431 }, { "epoch": 0.0076505199962809975, "grad_norm": 3.9344284534454346, "learning_rate": 7.650531286894923e-06, "loss": 0.324, "step": 432 }, { "epoch": 0.007668229533309426, "grad_norm": 3.698336601257324, "learning_rate": 7.668240850059032e-06, "loss": 0.358, "step": 433 }, { "epoch": 0.007685939070337854, "grad_norm": 3.856339454650879, "learning_rate": 7.68595041322314e-06, "loss": 0.3555, "step": 434 }, { "epoch": 0.007703648607366282, "grad_norm": 3.7962889671325684, "learning_rate": 7.70365997638725e-06, "loss": 0.3582, "step": 435 }, { "epoch": 0.00772135814439471, "grad_norm": 4.274860382080078, "learning_rate": 7.721369539551358e-06, "loss": 0.3385, "step": 436 }, { "epoch": 0.007739067681423138, "grad_norm": 4.402339458465576, "learning_rate": 7.739079102715467e-06, "loss": 0.3812, "step": 437 }, { "epoch": 0.007756777218451567, "grad_norm": 4.688227653503418, "learning_rate": 7.756788665879575e-06, "loss": 0.3847, "step": 438 }, { "epoch": 0.007774486755479995, "grad_norm": 2.931818962097168, "learning_rate": 7.774498229043683e-06, "loss": 0.3198, "step": 439 }, { "epoch": 0.007792196292508423, "grad_norm": 3.680401563644409, "learning_rate": 7.792207792207792e-06, "loss": 0.3056, "step": 440 }, { "epoch": 0.007809905829536851, "grad_norm": 3.923002243041992, "learning_rate": 7.809917355371902e-06, "loss": 0.3589, "step": 441 }, { "epoch": 0.00782761536656528, "grad_norm": 4.0456366539001465, "learning_rate": 7.82762691853601e-06, "loss": 0.2646, "step": 442 }, { "epoch": 0.007845324903593708, "grad_norm": 4.477214336395264, "learning_rate": 7.845336481700118e-06, "loss": 0.3858, "step": 443 }, { "epoch": 0.007863034440622136, "grad_norm": 4.1668243408203125, "learning_rate": 7.863046044864227e-06, "loss": 0.2861, "step": 444 }, { "epoch": 0.007880743977650564, "grad_norm": 3.2481513023376465, "learning_rate": 7.880755608028335e-06, "loss": 0.3083, "step": 445 }, { "epoch": 0.007898453514678992, "grad_norm": 5.174544811248779, "learning_rate": 7.898465171192445e-06, "loss": 0.352, "step": 446 }, { "epoch": 0.00791616305170742, "grad_norm": 4.203968048095703, "learning_rate": 7.916174734356553e-06, "loss": 0.3423, "step": 447 }, { "epoch": 0.00793387258873585, "grad_norm": 3.947993516921997, "learning_rate": 7.933884297520661e-06, "loss": 0.3351, "step": 448 }, { "epoch": 0.007951582125764278, "grad_norm": 3.999462127685547, "learning_rate": 7.95159386068477e-06, "loss": 0.2886, "step": 449 }, { "epoch": 0.007969291662792706, "grad_norm": 3.346590042114258, "learning_rate": 7.969303423848878e-06, "loss": 0.3502, "step": 450 }, { "epoch": 0.007987001199821134, "grad_norm": 4.706239223480225, "learning_rate": 7.987012987012986e-06, "loss": 0.3951, "step": 451 }, { "epoch": 0.008004710736849562, "grad_norm": 3.2842650413513184, "learning_rate": 8.004722550177096e-06, "loss": 0.3158, "step": 452 }, { "epoch": 0.00802242027387799, "grad_norm": 4.330171585083008, "learning_rate": 8.022432113341205e-06, "loss": 0.3135, "step": 453 }, { "epoch": 0.008040129810906418, "grad_norm": 3.849376916885376, "learning_rate": 8.040141676505313e-06, "loss": 0.3755, "step": 454 }, { "epoch": 0.008057839347934847, "grad_norm": 3.4784603118896484, "learning_rate": 8.057851239669421e-06, "loss": 0.3253, "step": 455 }, { "epoch": 0.008075548884963275, "grad_norm": 4.572418689727783, "learning_rate": 8.07556080283353e-06, "loss": 0.4081, "step": 456 }, { "epoch": 0.008093258421991703, "grad_norm": 3.540642499923706, "learning_rate": 8.093270365997638e-06, "loss": 0.3392, "step": 457 }, { "epoch": 0.00811096795902013, "grad_norm": 3.3840675354003906, "learning_rate": 8.110979929161748e-06, "loss": 0.3533, "step": 458 }, { "epoch": 0.008128677496048559, "grad_norm": 4.6807756423950195, "learning_rate": 8.128689492325856e-06, "loss": 0.3663, "step": 459 }, { "epoch": 0.008146387033076987, "grad_norm": 3.5887725353240967, "learning_rate": 8.146399055489965e-06, "loss": 0.2955, "step": 460 }, { "epoch": 0.008164096570105415, "grad_norm": 4.504231929779053, "learning_rate": 8.164108618654073e-06, "loss": 0.3741, "step": 461 }, { "epoch": 0.008181806107133845, "grad_norm": 3.9071128368377686, "learning_rate": 8.181818181818181e-06, "loss": 0.3916, "step": 462 }, { "epoch": 0.008199515644162273, "grad_norm": 4.490426063537598, "learning_rate": 8.199527744982291e-06, "loss": 0.3123, "step": 463 }, { "epoch": 0.008217225181190701, "grad_norm": 3.112659454345703, "learning_rate": 8.2172373081464e-06, "loss": 0.3101, "step": 464 }, { "epoch": 0.00823493471821913, "grad_norm": 3.6749868392944336, "learning_rate": 8.234946871310508e-06, "loss": 0.2836, "step": 465 }, { "epoch": 0.008252644255247557, "grad_norm": 3.1626293659210205, "learning_rate": 8.252656434474616e-06, "loss": 0.2959, "step": 466 }, { "epoch": 0.008270353792275986, "grad_norm": 3.797560930252075, "learning_rate": 8.270365997638725e-06, "loss": 0.3266, "step": 467 }, { "epoch": 0.008288063329304414, "grad_norm": 4.067280292510986, "learning_rate": 8.288075560802833e-06, "loss": 0.3037, "step": 468 }, { "epoch": 0.008305772866332842, "grad_norm": 4.0564656257629395, "learning_rate": 8.305785123966943e-06, "loss": 0.3542, "step": 469 }, { "epoch": 0.00832348240336127, "grad_norm": 4.14343786239624, "learning_rate": 8.323494687131051e-06, "loss": 0.2947, "step": 470 }, { "epoch": 0.008341191940389698, "grad_norm": 4.042548656463623, "learning_rate": 8.34120425029516e-06, "loss": 0.3291, "step": 471 }, { "epoch": 0.008358901477418126, "grad_norm": 6.312155723571777, "learning_rate": 8.358913813459268e-06, "loss": 0.3197, "step": 472 }, { "epoch": 0.008376611014446554, "grad_norm": 3.8181121349334717, "learning_rate": 8.376623376623376e-06, "loss": 0.3311, "step": 473 }, { "epoch": 0.008394320551474982, "grad_norm": 5.870604038238525, "learning_rate": 8.394332939787486e-06, "loss": 0.4297, "step": 474 }, { "epoch": 0.008412030088503412, "grad_norm": 3.5805516242980957, "learning_rate": 8.412042502951594e-06, "loss": 0.2909, "step": 475 }, { "epoch": 0.00842973962553184, "grad_norm": 3.8785789012908936, "learning_rate": 8.429752066115703e-06, "loss": 0.3091, "step": 476 }, { "epoch": 0.008447449162560268, "grad_norm": 4.021095275878906, "learning_rate": 8.447461629279811e-06, "loss": 0.407, "step": 477 }, { "epoch": 0.008465158699588696, "grad_norm": 4.525032997131348, "learning_rate": 8.46517119244392e-06, "loss": 0.3667, "step": 478 }, { "epoch": 0.008482868236617125, "grad_norm": 3.7026326656341553, "learning_rate": 8.482880755608028e-06, "loss": 0.3965, "step": 479 }, { "epoch": 0.008500577773645553, "grad_norm": 4.409277439117432, "learning_rate": 8.500590318772138e-06, "loss": 0.3767, "step": 480 }, { "epoch": 0.00851828731067398, "grad_norm": 3.754185914993286, "learning_rate": 8.518299881936246e-06, "loss": 0.3076, "step": 481 }, { "epoch": 0.008535996847702409, "grad_norm": 3.489647150039673, "learning_rate": 8.536009445100354e-06, "loss": 0.2952, "step": 482 }, { "epoch": 0.008553706384730837, "grad_norm": 3.6653151512145996, "learning_rate": 8.553719008264463e-06, "loss": 0.3332, "step": 483 }, { "epoch": 0.008571415921759265, "grad_norm": 2.7302405834198, "learning_rate": 8.571428571428571e-06, "loss": 0.2765, "step": 484 }, { "epoch": 0.008589125458787693, "grad_norm": 4.133355617523193, "learning_rate": 8.58913813459268e-06, "loss": 0.3582, "step": 485 }, { "epoch": 0.008606834995816121, "grad_norm": 4.09158992767334, "learning_rate": 8.60684769775679e-06, "loss": 0.3662, "step": 486 }, { "epoch": 0.00862454453284455, "grad_norm": 3.3076579570770264, "learning_rate": 8.624557260920898e-06, "loss": 0.3156, "step": 487 }, { "epoch": 0.00864225406987298, "grad_norm": 5.091364860534668, "learning_rate": 8.642266824085006e-06, "loss": 0.3658, "step": 488 }, { "epoch": 0.008659963606901407, "grad_norm": 4.68433141708374, "learning_rate": 8.659976387249114e-06, "loss": 0.3238, "step": 489 }, { "epoch": 0.008677673143929835, "grad_norm": 4.313590049743652, "learning_rate": 8.677685950413222e-06, "loss": 0.3323, "step": 490 }, { "epoch": 0.008695382680958263, "grad_norm": 4.018658638000488, "learning_rate": 8.695395513577332e-06, "loss": 0.2598, "step": 491 }, { "epoch": 0.008713092217986692, "grad_norm": 4.669622898101807, "learning_rate": 8.71310507674144e-06, "loss": 0.4818, "step": 492 }, { "epoch": 0.00873080175501512, "grad_norm": 4.037537574768066, "learning_rate": 8.730814639905549e-06, "loss": 0.357, "step": 493 }, { "epoch": 0.008748511292043548, "grad_norm": 3.905165672302246, "learning_rate": 8.748524203069657e-06, "loss": 0.3316, "step": 494 }, { "epoch": 0.008766220829071976, "grad_norm": 3.5288257598876953, "learning_rate": 8.766233766233766e-06, "loss": 0.3054, "step": 495 }, { "epoch": 0.008783930366100404, "grad_norm": 7.365562915802002, "learning_rate": 8.783943329397874e-06, "loss": 0.3206, "step": 496 }, { "epoch": 0.008801639903128832, "grad_norm": 4.046594619750977, "learning_rate": 8.801652892561984e-06, "loss": 0.3199, "step": 497 }, { "epoch": 0.00881934944015726, "grad_norm": 3.676159620285034, "learning_rate": 8.819362455726092e-06, "loss": 0.2977, "step": 498 }, { "epoch": 0.008837058977185688, "grad_norm": 4.479018211364746, "learning_rate": 8.8370720188902e-06, "loss": 0.3549, "step": 499 }, { "epoch": 0.008854768514214116, "grad_norm": 4.7529826164245605, "learning_rate": 8.854781582054309e-06, "loss": 0.3534, "step": 500 }, { "epoch": 0.008872478051242545, "grad_norm": 4.226685523986816, "learning_rate": 8.872491145218417e-06, "loss": 0.3151, "step": 501 }, { "epoch": 0.008890187588270974, "grad_norm": 3.9462623596191406, "learning_rate": 8.890200708382526e-06, "loss": 0.27, "step": 502 }, { "epoch": 0.008907897125299402, "grad_norm": 4.824769496917725, "learning_rate": 8.907910271546636e-06, "loss": 0.3685, "step": 503 }, { "epoch": 0.00892560666232783, "grad_norm": 3.073777437210083, "learning_rate": 8.925619834710744e-06, "loss": 0.314, "step": 504 }, { "epoch": 0.008943316199356259, "grad_norm": 3.5724966526031494, "learning_rate": 8.943329397874852e-06, "loss": 0.2657, "step": 505 }, { "epoch": 0.008961025736384687, "grad_norm": 3.4372730255126953, "learning_rate": 8.96103896103896e-06, "loss": 0.323, "step": 506 }, { "epoch": 0.008978735273413115, "grad_norm": 3.406008720397949, "learning_rate": 8.978748524203069e-06, "loss": 0.3357, "step": 507 }, { "epoch": 0.008996444810441543, "grad_norm": 3.9918322563171387, "learning_rate": 8.996458087367179e-06, "loss": 0.308, "step": 508 }, { "epoch": 0.009014154347469971, "grad_norm": 3.9215893745422363, "learning_rate": 9.014167650531287e-06, "loss": 0.3072, "step": 509 }, { "epoch": 0.0090318638844984, "grad_norm": 3.6448872089385986, "learning_rate": 9.031877213695396e-06, "loss": 0.2983, "step": 510 }, { "epoch": 0.009049573421526827, "grad_norm": 3.8280205726623535, "learning_rate": 9.049586776859504e-06, "loss": 0.3128, "step": 511 }, { "epoch": 0.009067282958555255, "grad_norm": 3.8062844276428223, "learning_rate": 9.067296340023612e-06, "loss": 0.2778, "step": 512 }, { "epoch": 0.009084992495583684, "grad_norm": 3.317850351333618, "learning_rate": 9.08500590318772e-06, "loss": 0.3027, "step": 513 }, { "epoch": 0.009102702032612112, "grad_norm": 3.8457162380218506, "learning_rate": 9.10271546635183e-06, "loss": 0.2742, "step": 514 }, { "epoch": 0.009120411569640541, "grad_norm": 4.149758338928223, "learning_rate": 9.120425029515939e-06, "loss": 0.3795, "step": 515 }, { "epoch": 0.00913812110666897, "grad_norm": 3.5364601612091064, "learning_rate": 9.138134592680047e-06, "loss": 0.2696, "step": 516 }, { "epoch": 0.009155830643697398, "grad_norm": 2.8844990730285645, "learning_rate": 9.155844155844155e-06, "loss": 0.3241, "step": 517 }, { "epoch": 0.009173540180725826, "grad_norm": 3.778808355331421, "learning_rate": 9.173553719008264e-06, "loss": 0.3055, "step": 518 }, { "epoch": 0.009191249717754254, "grad_norm": 4.33472204208374, "learning_rate": 9.191263282172374e-06, "loss": 0.3233, "step": 519 }, { "epoch": 0.009208959254782682, "grad_norm": 4.151673316955566, "learning_rate": 9.208972845336482e-06, "loss": 0.3457, "step": 520 }, { "epoch": 0.00922666879181111, "grad_norm": 3.8210811614990234, "learning_rate": 9.22668240850059e-06, "loss": 0.321, "step": 521 }, { "epoch": 0.009244378328839538, "grad_norm": 3.885615825653076, "learning_rate": 9.244391971664699e-06, "loss": 0.3195, "step": 522 }, { "epoch": 0.009262087865867966, "grad_norm": 3.5450832843780518, "learning_rate": 9.262101534828807e-06, "loss": 0.3059, "step": 523 }, { "epoch": 0.009279797402896394, "grad_norm": 4.309544086456299, "learning_rate": 9.279811097992915e-06, "loss": 0.3104, "step": 524 }, { "epoch": 0.009297506939924823, "grad_norm": 3.526477813720703, "learning_rate": 9.297520661157025e-06, "loss": 0.2878, "step": 525 }, { "epoch": 0.00931521647695325, "grad_norm": 3.7035679817199707, "learning_rate": 9.315230224321134e-06, "loss": 0.3017, "step": 526 }, { "epoch": 0.009332926013981679, "grad_norm": 4.962423801422119, "learning_rate": 9.332939787485242e-06, "loss": 0.3076, "step": 527 }, { "epoch": 0.009350635551010109, "grad_norm": 3.4271607398986816, "learning_rate": 9.35064935064935e-06, "loss": 0.3017, "step": 528 }, { "epoch": 0.009368345088038537, "grad_norm": 3.0530099868774414, "learning_rate": 9.368358913813459e-06, "loss": 0.2943, "step": 529 }, { "epoch": 0.009386054625066965, "grad_norm": 4.826744079589844, "learning_rate": 9.386068476977567e-06, "loss": 0.3374, "step": 530 }, { "epoch": 0.009403764162095393, "grad_norm": 4.028501033782959, "learning_rate": 9.403778040141677e-06, "loss": 0.3157, "step": 531 }, { "epoch": 0.009421473699123821, "grad_norm": 3.6143834590911865, "learning_rate": 9.421487603305785e-06, "loss": 0.3909, "step": 532 }, { "epoch": 0.009439183236152249, "grad_norm": 4.030798435211182, "learning_rate": 9.439197166469894e-06, "loss": 0.3048, "step": 533 }, { "epoch": 0.009456892773180677, "grad_norm": 4.620660305023193, "learning_rate": 9.456906729634002e-06, "loss": 0.2566, "step": 534 }, { "epoch": 0.009474602310209105, "grad_norm": 4.063000679016113, "learning_rate": 9.47461629279811e-06, "loss": 0.3197, "step": 535 }, { "epoch": 0.009492311847237533, "grad_norm": 4.10441255569458, "learning_rate": 9.49232585596222e-06, "loss": 0.3176, "step": 536 }, { "epoch": 0.009510021384265962, "grad_norm": 3.4101455211639404, "learning_rate": 9.510035419126328e-06, "loss": 0.2835, "step": 537 }, { "epoch": 0.00952773092129439, "grad_norm": 3.847109317779541, "learning_rate": 9.527744982290437e-06, "loss": 0.3209, "step": 538 }, { "epoch": 0.009545440458322818, "grad_norm": 3.484576463699341, "learning_rate": 9.545454545454545e-06, "loss": 0.2866, "step": 539 }, { "epoch": 0.009563149995351246, "grad_norm": 2.7592995166778564, "learning_rate": 9.563164108618653e-06, "loss": 0.316, "step": 540 }, { "epoch": 0.009580859532379674, "grad_norm": 3.93188738822937, "learning_rate": 9.580873671782762e-06, "loss": 0.3163, "step": 541 }, { "epoch": 0.009598569069408104, "grad_norm": 3.227245569229126, "learning_rate": 9.598583234946872e-06, "loss": 0.316, "step": 542 }, { "epoch": 0.009616278606436532, "grad_norm": 3.2343342304229736, "learning_rate": 9.61629279811098e-06, "loss": 0.3197, "step": 543 }, { "epoch": 0.00963398814346496, "grad_norm": 4.310835361480713, "learning_rate": 9.634002361275088e-06, "loss": 0.3171, "step": 544 }, { "epoch": 0.009651697680493388, "grad_norm": 3.397613048553467, "learning_rate": 9.651711924439197e-06, "loss": 0.3223, "step": 545 }, { "epoch": 0.009669407217521816, "grad_norm": 3.095853090286255, "learning_rate": 9.669421487603305e-06, "loss": 0.302, "step": 546 }, { "epoch": 0.009687116754550244, "grad_norm": 3.201669931411743, "learning_rate": 9.687131050767415e-06, "loss": 0.3377, "step": 547 }, { "epoch": 0.009704826291578672, "grad_norm": 3.29960298538208, "learning_rate": 9.704840613931523e-06, "loss": 0.2701, "step": 548 }, { "epoch": 0.0097225358286071, "grad_norm": 4.221160411834717, "learning_rate": 9.722550177095632e-06, "loss": 0.3648, "step": 549 }, { "epoch": 0.009740245365635529, "grad_norm": 3.8034472465515137, "learning_rate": 9.74025974025974e-06, "loss": 0.3082, "step": 550 }, { "epoch": 0.009757954902663957, "grad_norm": 4.270373344421387, "learning_rate": 9.757969303423848e-06, "loss": 0.3936, "step": 551 }, { "epoch": 0.009775664439692385, "grad_norm": 3.564638614654541, "learning_rate": 9.775678866587957e-06, "loss": 0.343, "step": 552 }, { "epoch": 0.009793373976720813, "grad_norm": 3.3180792331695557, "learning_rate": 9.793388429752067e-06, "loss": 0.3008, "step": 553 }, { "epoch": 0.009811083513749241, "grad_norm": 4.168432712554932, "learning_rate": 9.811097992916175e-06, "loss": 0.2996, "step": 554 }, { "epoch": 0.00982879305077767, "grad_norm": 3.8727171421051025, "learning_rate": 9.828807556080283e-06, "loss": 0.3276, "step": 555 }, { "epoch": 0.009846502587806099, "grad_norm": 4.325137138366699, "learning_rate": 9.846517119244391e-06, "loss": 0.288, "step": 556 }, { "epoch": 0.009864212124834527, "grad_norm": 3.2228660583496094, "learning_rate": 9.8642266824085e-06, "loss": 0.2646, "step": 557 }, { "epoch": 0.009881921661862955, "grad_norm": 3.5273869037628174, "learning_rate": 9.881936245572608e-06, "loss": 0.3285, "step": 558 }, { "epoch": 0.009899631198891383, "grad_norm": 3.895221471786499, "learning_rate": 9.899645808736718e-06, "loss": 0.3069, "step": 559 }, { "epoch": 0.009917340735919811, "grad_norm": 3.74955153465271, "learning_rate": 9.917355371900826e-06, "loss": 0.2742, "step": 560 }, { "epoch": 0.00993505027294824, "grad_norm": 3.6209962368011475, "learning_rate": 9.935064935064935e-06, "loss": 0.3515, "step": 561 }, { "epoch": 0.009952759809976668, "grad_norm": 3.1937108039855957, "learning_rate": 9.952774498229043e-06, "loss": 0.295, "step": 562 }, { "epoch": 0.009970469347005096, "grad_norm": 3.2969648838043213, "learning_rate": 9.970484061393151e-06, "loss": 0.286, "step": 563 }, { "epoch": 0.009988178884033524, "grad_norm": 2.5522377490997314, "learning_rate": 9.988193624557261e-06, "loss": 0.3229, "step": 564 }, { "epoch": 0.010005888421061952, "grad_norm": 2.914608955383301, "learning_rate": 1.000590318772137e-05, "loss": 0.2652, "step": 565 }, { "epoch": 0.01002359795809038, "grad_norm": 4.065354824066162, "learning_rate": 1.0023612750885478e-05, "loss": 0.2274, "step": 566 }, { "epoch": 0.010041307495118808, "grad_norm": 3.022474765777588, "learning_rate": 1.0041322314049586e-05, "loss": 0.2932, "step": 567 }, { "epoch": 0.010059017032147238, "grad_norm": 3.259490966796875, "learning_rate": 1.0059031877213695e-05, "loss": 0.3501, "step": 568 }, { "epoch": 0.010076726569175666, "grad_norm": 3.745133638381958, "learning_rate": 1.0076741440377803e-05, "loss": 0.3002, "step": 569 }, { "epoch": 0.010094436106204094, "grad_norm": 3.829441785812378, "learning_rate": 1.0094451003541913e-05, "loss": 0.3049, "step": 570 }, { "epoch": 0.010112145643232522, "grad_norm": 3.9210150241851807, "learning_rate": 1.0112160566706021e-05, "loss": 0.3417, "step": 571 }, { "epoch": 0.01012985518026095, "grad_norm": 4.0420823097229, "learning_rate": 1.012987012987013e-05, "loss": 0.3421, "step": 572 }, { "epoch": 0.010147564717289378, "grad_norm": 2.950167179107666, "learning_rate": 1.0147579693034238e-05, "loss": 0.3265, "step": 573 }, { "epoch": 0.010165274254317807, "grad_norm": 4.702620983123779, "learning_rate": 1.0165289256198346e-05, "loss": 0.3587, "step": 574 }, { "epoch": 0.010182983791346235, "grad_norm": 3.601080894470215, "learning_rate": 1.0182998819362456e-05, "loss": 0.3111, "step": 575 }, { "epoch": 0.010200693328374663, "grad_norm": 3.76023530960083, "learning_rate": 1.0200708382526565e-05, "loss": 0.3144, "step": 576 }, { "epoch": 0.010218402865403091, "grad_norm": 3.6817502975463867, "learning_rate": 1.0218417945690673e-05, "loss": 0.3144, "step": 577 }, { "epoch": 0.010236112402431519, "grad_norm": 3.9969866275787354, "learning_rate": 1.0236127508854781e-05, "loss": 0.2322, "step": 578 }, { "epoch": 0.010253821939459947, "grad_norm": 3.376215934753418, "learning_rate": 1.025383707201889e-05, "loss": 0.2813, "step": 579 }, { "epoch": 0.010271531476488375, "grad_norm": 4.869962215423584, "learning_rate": 1.0271546635182998e-05, "loss": 0.3442, "step": 580 }, { "epoch": 0.010289241013516803, "grad_norm": 3.5758609771728516, "learning_rate": 1.0289256198347108e-05, "loss": 0.3174, "step": 581 }, { "epoch": 0.010306950550545233, "grad_norm": 3.8253557682037354, "learning_rate": 1.0306965761511216e-05, "loss": 0.2426, "step": 582 }, { "epoch": 0.010324660087573661, "grad_norm": 3.981196880340576, "learning_rate": 1.0324675324675324e-05, "loss": 0.292, "step": 583 }, { "epoch": 0.01034236962460209, "grad_norm": 3.8605055809020996, "learning_rate": 1.0342384887839433e-05, "loss": 0.2784, "step": 584 }, { "epoch": 0.010360079161630517, "grad_norm": 4.183393955230713, "learning_rate": 1.0360094451003541e-05, "loss": 0.3163, "step": 585 }, { "epoch": 0.010377788698658946, "grad_norm": 3.9528818130493164, "learning_rate": 1.037780401416765e-05, "loss": 0.3469, "step": 586 }, { "epoch": 0.010395498235687374, "grad_norm": 4.179790496826172, "learning_rate": 1.039551357733176e-05, "loss": 0.3546, "step": 587 }, { "epoch": 0.010413207772715802, "grad_norm": 3.3767471313476562, "learning_rate": 1.0413223140495868e-05, "loss": 0.29, "step": 588 }, { "epoch": 0.01043091730974423, "grad_norm": 3.6124207973480225, "learning_rate": 1.0430932703659976e-05, "loss": 0.2776, "step": 589 }, { "epoch": 0.010448626846772658, "grad_norm": 3.4886744022369385, "learning_rate": 1.0448642266824084e-05, "loss": 0.3573, "step": 590 }, { "epoch": 0.010466336383801086, "grad_norm": 2.913076877593994, "learning_rate": 1.0466351829988193e-05, "loss": 0.339, "step": 591 }, { "epoch": 0.010484045920829514, "grad_norm": 4.217906951904297, "learning_rate": 1.0484061393152303e-05, "loss": 0.3768, "step": 592 }, { "epoch": 0.010501755457857942, "grad_norm": 4.9797797203063965, "learning_rate": 1.0501770956316411e-05, "loss": 0.2933, "step": 593 }, { "epoch": 0.01051946499488637, "grad_norm": 4.1745285987854, "learning_rate": 1.051948051948052e-05, "loss": 0.3132, "step": 594 }, { "epoch": 0.0105371745319148, "grad_norm": 3.5498006343841553, "learning_rate": 1.0537190082644628e-05, "loss": 0.3376, "step": 595 }, { "epoch": 0.010554884068943228, "grad_norm": 4.4333930015563965, "learning_rate": 1.0554899645808736e-05, "loss": 0.3185, "step": 596 }, { "epoch": 0.010572593605971656, "grad_norm": 3.4258532524108887, "learning_rate": 1.0572609208972844e-05, "loss": 0.2893, "step": 597 }, { "epoch": 0.010590303143000085, "grad_norm": 3.290158748626709, "learning_rate": 1.0590318772136954e-05, "loss": 0.3067, "step": 598 }, { "epoch": 0.010608012680028513, "grad_norm": 2.6695854663848877, "learning_rate": 1.0608028335301063e-05, "loss": 0.2579, "step": 599 }, { "epoch": 0.01062572221705694, "grad_norm": 3.7849619388580322, "learning_rate": 1.062573789846517e-05, "loss": 0.2981, "step": 600 }, { "epoch": 0.010643431754085369, "grad_norm": 3.567471742630005, "learning_rate": 1.0643447461629279e-05, "loss": 0.2513, "step": 601 }, { "epoch": 0.010661141291113797, "grad_norm": 6.131582736968994, "learning_rate": 1.0661157024793387e-05, "loss": 0.2624, "step": 602 }, { "epoch": 0.010678850828142225, "grad_norm": 3.0529515743255615, "learning_rate": 1.0678866587957497e-05, "loss": 0.3009, "step": 603 }, { "epoch": 0.010696560365170653, "grad_norm": 4.007580757141113, "learning_rate": 1.0696576151121606e-05, "loss": 0.2729, "step": 604 }, { "epoch": 0.010714269902199081, "grad_norm": 3.8831732273101807, "learning_rate": 1.0714285714285714e-05, "loss": 0.3213, "step": 605 }, { "epoch": 0.01073197943922751, "grad_norm": 3.9968113899230957, "learning_rate": 1.0731995277449822e-05, "loss": 0.323, "step": 606 }, { "epoch": 0.010749688976255938, "grad_norm": 4.528337001800537, "learning_rate": 1.074970484061393e-05, "loss": 0.3355, "step": 607 }, { "epoch": 0.010767398513284366, "grad_norm": 3.1429264545440674, "learning_rate": 1.0767414403778039e-05, "loss": 0.3292, "step": 608 }, { "epoch": 0.010785108050312795, "grad_norm": 2.9933083057403564, "learning_rate": 1.0785123966942149e-05, "loss": 0.273, "step": 609 }, { "epoch": 0.010802817587341224, "grad_norm": 3.0423476696014404, "learning_rate": 1.0802833530106257e-05, "loss": 0.2796, "step": 610 }, { "epoch": 0.010820527124369652, "grad_norm": 4.479870319366455, "learning_rate": 1.0820543093270366e-05, "loss": 0.3451, "step": 611 }, { "epoch": 0.01083823666139808, "grad_norm": 3.7816433906555176, "learning_rate": 1.0838252656434474e-05, "loss": 0.2586, "step": 612 }, { "epoch": 0.010855946198426508, "grad_norm": 3.57519793510437, "learning_rate": 1.0855962219598582e-05, "loss": 0.2831, "step": 613 }, { "epoch": 0.010873655735454936, "grad_norm": 2.670597791671753, "learning_rate": 1.087367178276269e-05, "loss": 0.2595, "step": 614 }, { "epoch": 0.010891365272483364, "grad_norm": 3.080822467803955, "learning_rate": 1.08913813459268e-05, "loss": 0.2887, "step": 615 }, { "epoch": 0.010909074809511792, "grad_norm": 4.420372486114502, "learning_rate": 1.0909090909090909e-05, "loss": 0.296, "step": 616 }, { "epoch": 0.01092678434654022, "grad_norm": 3.551335334777832, "learning_rate": 1.0926800472255017e-05, "loss": 0.2742, "step": 617 }, { "epoch": 0.010944493883568648, "grad_norm": 3.5174143314361572, "learning_rate": 1.0944510035419126e-05, "loss": 0.2398, "step": 618 }, { "epoch": 0.010962203420597076, "grad_norm": 3.7353086471557617, "learning_rate": 1.0962219598583234e-05, "loss": 0.2893, "step": 619 }, { "epoch": 0.010979912957625505, "grad_norm": 3.01607084274292, "learning_rate": 1.0979929161747344e-05, "loss": 0.2234, "step": 620 }, { "epoch": 0.010997622494653933, "grad_norm": 10.035015106201172, "learning_rate": 1.0997638724911452e-05, "loss": 0.3282, "step": 621 }, { "epoch": 0.011015332031682363, "grad_norm": 5.625340938568115, "learning_rate": 1.101534828807556e-05, "loss": 0.3208, "step": 622 }, { "epoch": 0.01103304156871079, "grad_norm": 4.167219638824463, "learning_rate": 1.1033057851239669e-05, "loss": 0.3262, "step": 623 }, { "epoch": 0.011050751105739219, "grad_norm": 4.236057281494141, "learning_rate": 1.1050767414403777e-05, "loss": 0.3194, "step": 624 }, { "epoch": 0.011068460642767647, "grad_norm": 4.513891220092773, "learning_rate": 1.1068476977567885e-05, "loss": 0.2884, "step": 625 }, { "epoch": 0.011086170179796075, "grad_norm": 3.8267359733581543, "learning_rate": 1.1086186540731995e-05, "loss": 0.268, "step": 626 }, { "epoch": 0.011103879716824503, "grad_norm": 2.999478816986084, "learning_rate": 1.1103896103896104e-05, "loss": 0.2225, "step": 627 }, { "epoch": 0.011121589253852931, "grad_norm": 3.5239903926849365, "learning_rate": 1.1121605667060212e-05, "loss": 0.2649, "step": 628 }, { "epoch": 0.01113929879088136, "grad_norm": 3.5992603302001953, "learning_rate": 1.113931523022432e-05, "loss": 0.3085, "step": 629 }, { "epoch": 0.011157008327909787, "grad_norm": 3.537876844406128, "learning_rate": 1.1157024793388429e-05, "loss": 0.2611, "step": 630 }, { "epoch": 0.011174717864938215, "grad_norm": 4.015173435211182, "learning_rate": 1.1174734356552537e-05, "loss": 0.3234, "step": 631 }, { "epoch": 0.011192427401966644, "grad_norm": 4.639187812805176, "learning_rate": 1.1192443919716647e-05, "loss": 0.3091, "step": 632 }, { "epoch": 0.011210136938995072, "grad_norm": 3.310431480407715, "learning_rate": 1.1210153482880755e-05, "loss": 0.3229, "step": 633 }, { "epoch": 0.0112278464760235, "grad_norm": 6.903495788574219, "learning_rate": 1.1227863046044864e-05, "loss": 0.2701, "step": 634 }, { "epoch": 0.01124555601305193, "grad_norm": 4.010343074798584, "learning_rate": 1.1245572609208974e-05, "loss": 0.3088, "step": 635 }, { "epoch": 0.011263265550080358, "grad_norm": 2.7269959449768066, "learning_rate": 1.1263282172373082e-05, "loss": 0.2888, "step": 636 }, { "epoch": 0.011280975087108786, "grad_norm": 4.066575050354004, "learning_rate": 1.128099173553719e-05, "loss": 0.2841, "step": 637 }, { "epoch": 0.011298684624137214, "grad_norm": 3.1763012409210205, "learning_rate": 1.12987012987013e-05, "loss": 0.278, "step": 638 }, { "epoch": 0.011316394161165642, "grad_norm": 3.280134916305542, "learning_rate": 1.1316410861865409e-05, "loss": 0.278, "step": 639 }, { "epoch": 0.01133410369819407, "grad_norm": 3.4392690658569336, "learning_rate": 1.1334120425029517e-05, "loss": 0.2218, "step": 640 }, { "epoch": 0.011351813235222498, "grad_norm": 7.339663028717041, "learning_rate": 1.1351829988193625e-05, "loss": 0.2865, "step": 641 }, { "epoch": 0.011369522772250926, "grad_norm": 3.851538896560669, "learning_rate": 1.1369539551357734e-05, "loss": 0.2494, "step": 642 }, { "epoch": 0.011387232309279354, "grad_norm": 2.8633644580841064, "learning_rate": 1.1387249114521844e-05, "loss": 0.2892, "step": 643 }, { "epoch": 0.011404941846307783, "grad_norm": 3.081355571746826, "learning_rate": 1.1404958677685952e-05, "loss": 0.2824, "step": 644 }, { "epoch": 0.01142265138333621, "grad_norm": 4.0398149490356445, "learning_rate": 1.142266824085006e-05, "loss": 0.2483, "step": 645 }, { "epoch": 0.011440360920364639, "grad_norm": 3.5588786602020264, "learning_rate": 1.1440377804014168e-05, "loss": 0.2525, "step": 646 }, { "epoch": 0.011458070457393067, "grad_norm": 3.6606709957122803, "learning_rate": 1.1458087367178277e-05, "loss": 0.3264, "step": 647 }, { "epoch": 0.011475779994421495, "grad_norm": 3.9685158729553223, "learning_rate": 1.1475796930342387e-05, "loss": 0.3005, "step": 648 }, { "epoch": 0.011493489531449925, "grad_norm": 3.840383529663086, "learning_rate": 1.1493506493506495e-05, "loss": 0.3084, "step": 649 }, { "epoch": 0.011511199068478353, "grad_norm": 3.0051181316375732, "learning_rate": 1.1511216056670603e-05, "loss": 0.2865, "step": 650 }, { "epoch": 0.011528908605506781, "grad_norm": 3.594433307647705, "learning_rate": 1.1528925619834712e-05, "loss": 0.2846, "step": 651 }, { "epoch": 0.01154661814253521, "grad_norm": 4.195858478546143, "learning_rate": 1.154663518299882e-05, "loss": 0.2906, "step": 652 }, { "epoch": 0.011564327679563637, "grad_norm": 4.0655837059021, "learning_rate": 1.1564344746162928e-05, "loss": 0.2824, "step": 653 }, { "epoch": 0.011582037216592065, "grad_norm": 2.5835375785827637, "learning_rate": 1.1582054309327038e-05, "loss": 0.2598, "step": 654 }, { "epoch": 0.011599746753620493, "grad_norm": 3.130460262298584, "learning_rate": 1.1599763872491147e-05, "loss": 0.3135, "step": 655 }, { "epoch": 0.011617456290648922, "grad_norm": 2.3559834957122803, "learning_rate": 1.1617473435655255e-05, "loss": 0.2733, "step": 656 }, { "epoch": 0.01163516582767735, "grad_norm": 3.4577314853668213, "learning_rate": 1.1635182998819363e-05, "loss": 0.2879, "step": 657 }, { "epoch": 0.011652875364705778, "grad_norm": 4.782825469970703, "learning_rate": 1.1652892561983472e-05, "loss": 0.2911, "step": 658 }, { "epoch": 0.011670584901734206, "grad_norm": 2.451289653778076, "learning_rate": 1.167060212514758e-05, "loss": 0.2397, "step": 659 }, { "epoch": 0.011688294438762634, "grad_norm": 3.8965938091278076, "learning_rate": 1.168831168831169e-05, "loss": 0.2747, "step": 660 }, { "epoch": 0.011706003975791062, "grad_norm": 3.4624102115631104, "learning_rate": 1.1706021251475798e-05, "loss": 0.3192, "step": 661 }, { "epoch": 0.011723713512819492, "grad_norm": 3.1148345470428467, "learning_rate": 1.1723730814639907e-05, "loss": 0.2211, "step": 662 }, { "epoch": 0.01174142304984792, "grad_norm": 3.735468626022339, "learning_rate": 1.1741440377804015e-05, "loss": 0.24, "step": 663 }, { "epoch": 0.011759132586876348, "grad_norm": 3.5258948802948, "learning_rate": 1.1759149940968123e-05, "loss": 0.2468, "step": 664 }, { "epoch": 0.011776842123904776, "grad_norm": 3.3994412422180176, "learning_rate": 1.1776859504132233e-05, "loss": 0.3325, "step": 665 }, { "epoch": 0.011794551660933204, "grad_norm": 4.187801361083984, "learning_rate": 1.1794569067296342e-05, "loss": 0.2725, "step": 666 }, { "epoch": 0.011812261197961632, "grad_norm": 3.1754541397094727, "learning_rate": 1.181227863046045e-05, "loss": 0.3334, "step": 667 }, { "epoch": 0.01182997073499006, "grad_norm": 3.7650043964385986, "learning_rate": 1.1829988193624558e-05, "loss": 0.2612, "step": 668 }, { "epoch": 0.011847680272018489, "grad_norm": 3.42564058303833, "learning_rate": 1.1847697756788666e-05, "loss": 0.2535, "step": 669 }, { "epoch": 0.011865389809046917, "grad_norm": 3.970458507537842, "learning_rate": 1.1865407319952775e-05, "loss": 0.2641, "step": 670 }, { "epoch": 0.011883099346075345, "grad_norm": 2.8349921703338623, "learning_rate": 1.1883116883116885e-05, "loss": 0.2703, "step": 671 }, { "epoch": 0.011900808883103773, "grad_norm": 3.983018159866333, "learning_rate": 1.1900826446280993e-05, "loss": 0.2378, "step": 672 }, { "epoch": 0.011918518420132201, "grad_norm": 3.304542303085327, "learning_rate": 1.1918536009445101e-05, "loss": 0.3015, "step": 673 }, { "epoch": 0.01193622795716063, "grad_norm": 4.072509288787842, "learning_rate": 1.193624557260921e-05, "loss": 0.3551, "step": 674 }, { "epoch": 0.011953937494189059, "grad_norm": 3.30562162399292, "learning_rate": 1.1953955135773318e-05, "loss": 0.2989, "step": 675 }, { "epoch": 0.011971647031217487, "grad_norm": 4.19535493850708, "learning_rate": 1.1971664698937428e-05, "loss": 0.3075, "step": 676 }, { "epoch": 0.011989356568245915, "grad_norm": 4.513827800750732, "learning_rate": 1.1989374262101536e-05, "loss": 0.3383, "step": 677 }, { "epoch": 0.012007066105274343, "grad_norm": 3.213318109512329, "learning_rate": 1.2007083825265645e-05, "loss": 0.2447, "step": 678 }, { "epoch": 0.012024775642302771, "grad_norm": 3.7242379188537598, "learning_rate": 1.2024793388429753e-05, "loss": 0.295, "step": 679 }, { "epoch": 0.0120424851793312, "grad_norm": 3.2992897033691406, "learning_rate": 1.2042502951593861e-05, "loss": 0.2869, "step": 680 }, { "epoch": 0.012060194716359628, "grad_norm": 2.8634984493255615, "learning_rate": 1.206021251475797e-05, "loss": 0.2888, "step": 681 }, { "epoch": 0.012077904253388056, "grad_norm": 3.613853931427002, "learning_rate": 1.207792207792208e-05, "loss": 0.3136, "step": 682 }, { "epoch": 0.012095613790416484, "grad_norm": 3.677427053451538, "learning_rate": 1.2095631641086188e-05, "loss": 0.3073, "step": 683 }, { "epoch": 0.012113323327444912, "grad_norm": 3.0802109241485596, "learning_rate": 1.2113341204250296e-05, "loss": 0.2884, "step": 684 }, { "epoch": 0.01213103286447334, "grad_norm": 3.0449116230010986, "learning_rate": 1.2131050767414405e-05, "loss": 0.314, "step": 685 }, { "epoch": 0.012148742401501768, "grad_norm": 4.58173942565918, "learning_rate": 1.2148760330578513e-05, "loss": 0.2594, "step": 686 }, { "epoch": 0.012166451938530196, "grad_norm": 4.352831840515137, "learning_rate": 1.2166469893742621e-05, "loss": 0.3262, "step": 687 }, { "epoch": 0.012184161475558624, "grad_norm": 4.425669193267822, "learning_rate": 1.2184179456906731e-05, "loss": 0.3035, "step": 688 }, { "epoch": 0.012201871012587054, "grad_norm": 3.215141534805298, "learning_rate": 1.220188902007084e-05, "loss": 0.2923, "step": 689 }, { "epoch": 0.012219580549615482, "grad_norm": 3.5604069232940674, "learning_rate": 1.2219598583234948e-05, "loss": 0.2898, "step": 690 }, { "epoch": 0.01223729008664391, "grad_norm": 3.9341981410980225, "learning_rate": 1.2237308146399056e-05, "loss": 0.2544, "step": 691 }, { "epoch": 0.012254999623672339, "grad_norm": 5.867823123931885, "learning_rate": 1.2255017709563164e-05, "loss": 0.2577, "step": 692 }, { "epoch": 0.012272709160700767, "grad_norm": 4.298845291137695, "learning_rate": 1.2272727272727274e-05, "loss": 0.2118, "step": 693 }, { "epoch": 0.012290418697729195, "grad_norm": 3.127088785171509, "learning_rate": 1.2290436835891383e-05, "loss": 0.3114, "step": 694 }, { "epoch": 0.012308128234757623, "grad_norm": 3.2110626697540283, "learning_rate": 1.2308146399055491e-05, "loss": 0.2319, "step": 695 }, { "epoch": 0.012325837771786051, "grad_norm": 3.02630877494812, "learning_rate": 1.23258559622196e-05, "loss": 0.2713, "step": 696 }, { "epoch": 0.012343547308814479, "grad_norm": 3.506972312927246, "learning_rate": 1.2343565525383708e-05, "loss": 0.3113, "step": 697 }, { "epoch": 0.012361256845842907, "grad_norm": 3.1195507049560547, "learning_rate": 1.2361275088547816e-05, "loss": 0.2491, "step": 698 }, { "epoch": 0.012378966382871335, "grad_norm": 3.1061453819274902, "learning_rate": 1.2378984651711926e-05, "loss": 0.2713, "step": 699 }, { "epoch": 0.012396675919899763, "grad_norm": 3.657191276550293, "learning_rate": 1.2396694214876034e-05, "loss": 0.3078, "step": 700 }, { "epoch": 0.012414385456928191, "grad_norm": 5.176729202270508, "learning_rate": 1.2414403778040143e-05, "loss": 0.3318, "step": 701 }, { "epoch": 0.012432094993956621, "grad_norm": 3.712618827819824, "learning_rate": 1.2432113341204251e-05, "loss": 0.2964, "step": 702 }, { "epoch": 0.01244980453098505, "grad_norm": 3.9182965755462646, "learning_rate": 1.244982290436836e-05, "loss": 0.2984, "step": 703 }, { "epoch": 0.012467514068013478, "grad_norm": 3.615446090698242, "learning_rate": 1.246753246753247e-05, "loss": 0.28, "step": 704 }, { "epoch": 0.012485223605041906, "grad_norm": 4.456589221954346, "learning_rate": 1.2485242030696578e-05, "loss": 0.2795, "step": 705 }, { "epoch": 0.012502933142070334, "grad_norm": 2.9628186225891113, "learning_rate": 1.2502951593860686e-05, "loss": 0.2679, "step": 706 }, { "epoch": 0.012520642679098762, "grad_norm": 4.419187068939209, "learning_rate": 1.2520661157024794e-05, "loss": 0.2828, "step": 707 }, { "epoch": 0.01253835221612719, "grad_norm": 3.8366713523864746, "learning_rate": 1.2538370720188903e-05, "loss": 0.2728, "step": 708 }, { "epoch": 0.012556061753155618, "grad_norm": 4.38347053527832, "learning_rate": 1.255608028335301e-05, "loss": 0.2517, "step": 709 }, { "epoch": 0.012573771290184046, "grad_norm": 3.0783307552337646, "learning_rate": 1.257378984651712e-05, "loss": 0.217, "step": 710 }, { "epoch": 0.012591480827212474, "grad_norm": 4.222809314727783, "learning_rate": 1.2591499409681229e-05, "loss": 0.2936, "step": 711 }, { "epoch": 0.012609190364240902, "grad_norm": 3.582655191421509, "learning_rate": 1.2609208972845337e-05, "loss": 0.2613, "step": 712 }, { "epoch": 0.01262689990126933, "grad_norm": 2.720393180847168, "learning_rate": 1.2626918536009446e-05, "loss": 0.2577, "step": 713 }, { "epoch": 0.012644609438297759, "grad_norm": 3.051708936691284, "learning_rate": 1.2644628099173554e-05, "loss": 0.252, "step": 714 }, { "epoch": 0.012662318975326188, "grad_norm": 2.8675472736358643, "learning_rate": 1.2662337662337662e-05, "loss": 0.2525, "step": 715 }, { "epoch": 0.012680028512354617, "grad_norm": 4.664363861083984, "learning_rate": 1.2680047225501772e-05, "loss": 0.2874, "step": 716 }, { "epoch": 0.012697738049383045, "grad_norm": 3.769902467727661, "learning_rate": 1.269775678866588e-05, "loss": 0.2963, "step": 717 }, { "epoch": 0.012715447586411473, "grad_norm": 3.966372489929199, "learning_rate": 1.2715466351829989e-05, "loss": 0.2224, "step": 718 }, { "epoch": 0.0127331571234399, "grad_norm": 3.763244390487671, "learning_rate": 1.2733175914994097e-05, "loss": 0.2616, "step": 719 }, { "epoch": 0.012750866660468329, "grad_norm": 3.926788091659546, "learning_rate": 1.2750885478158206e-05, "loss": 0.2143, "step": 720 }, { "epoch": 0.012768576197496757, "grad_norm": 3.9870142936706543, "learning_rate": 1.2768595041322316e-05, "loss": 0.2992, "step": 721 }, { "epoch": 0.012786285734525185, "grad_norm": 3.496575355529785, "learning_rate": 1.2786304604486424e-05, "loss": 0.2812, "step": 722 }, { "epoch": 0.012803995271553613, "grad_norm": 4.020864963531494, "learning_rate": 1.2804014167650532e-05, "loss": 0.2718, "step": 723 }, { "epoch": 0.012821704808582041, "grad_norm": 3.7069718837738037, "learning_rate": 1.282172373081464e-05, "loss": 0.2375, "step": 724 }, { "epoch": 0.01283941434561047, "grad_norm": 3.7505338191986084, "learning_rate": 1.2839433293978749e-05, "loss": 0.2998, "step": 725 }, { "epoch": 0.012857123882638898, "grad_norm": 2.6950008869171143, "learning_rate": 1.2857142857142857e-05, "loss": 0.288, "step": 726 }, { "epoch": 0.012874833419667326, "grad_norm": 3.530226945877075, "learning_rate": 1.2874852420306967e-05, "loss": 0.2892, "step": 727 }, { "epoch": 0.012892542956695754, "grad_norm": 2.848052740097046, "learning_rate": 1.2892561983471076e-05, "loss": 0.2223, "step": 728 }, { "epoch": 0.012910252493724184, "grad_norm": 5.148857116699219, "learning_rate": 1.2910271546635184e-05, "loss": 0.2886, "step": 729 }, { "epoch": 0.012927962030752612, "grad_norm": 2.8496510982513428, "learning_rate": 1.2927981109799292e-05, "loss": 0.3131, "step": 730 }, { "epoch": 0.01294567156778104, "grad_norm": 3.3177995681762695, "learning_rate": 1.29456906729634e-05, "loss": 0.2729, "step": 731 }, { "epoch": 0.012963381104809468, "grad_norm": 2.3948779106140137, "learning_rate": 1.2963400236127509e-05, "loss": 0.2632, "step": 732 }, { "epoch": 0.012981090641837896, "grad_norm": 2.965151309967041, "learning_rate": 1.2981109799291619e-05, "loss": 0.2888, "step": 733 }, { "epoch": 0.012998800178866324, "grad_norm": 3.1736984252929688, "learning_rate": 1.2998819362455727e-05, "loss": 0.3525, "step": 734 }, { "epoch": 0.013016509715894752, "grad_norm": 3.0777838230133057, "learning_rate": 1.3016528925619835e-05, "loss": 0.2967, "step": 735 }, { "epoch": 0.01303421925292318, "grad_norm": 3.9619569778442383, "learning_rate": 1.3034238488783944e-05, "loss": 0.3134, "step": 736 }, { "epoch": 0.013051928789951608, "grad_norm": 2.971637010574341, "learning_rate": 1.3051948051948052e-05, "loss": 0.2366, "step": 737 }, { "epoch": 0.013069638326980037, "grad_norm": 3.3658945560455322, "learning_rate": 1.3069657615112162e-05, "loss": 0.276, "step": 738 }, { "epoch": 0.013087347864008465, "grad_norm": 3.4731478691101074, "learning_rate": 1.308736717827627e-05, "loss": 0.2873, "step": 739 }, { "epoch": 0.013105057401036893, "grad_norm": 5.139403343200684, "learning_rate": 1.3105076741440379e-05, "loss": 0.2533, "step": 740 }, { "epoch": 0.01312276693806532, "grad_norm": 4.561427593231201, "learning_rate": 1.3122786304604487e-05, "loss": 0.2757, "step": 741 }, { "epoch": 0.01314047647509375, "grad_norm": 2.6998484134674072, "learning_rate": 1.3140495867768595e-05, "loss": 0.2613, "step": 742 }, { "epoch": 0.013158186012122179, "grad_norm": 4.087787628173828, "learning_rate": 1.3158205430932704e-05, "loss": 0.281, "step": 743 }, { "epoch": 0.013175895549150607, "grad_norm": 4.097916126251221, "learning_rate": 1.3175914994096814e-05, "loss": 0.2348, "step": 744 }, { "epoch": 0.013193605086179035, "grad_norm": 2.9009573459625244, "learning_rate": 1.3193624557260922e-05, "loss": 0.2281, "step": 745 }, { "epoch": 0.013211314623207463, "grad_norm": 4.0431928634643555, "learning_rate": 1.321133412042503e-05, "loss": 0.2964, "step": 746 }, { "epoch": 0.013229024160235891, "grad_norm": 3.3004584312438965, "learning_rate": 1.3229043683589139e-05, "loss": 0.2645, "step": 747 }, { "epoch": 0.01324673369726432, "grad_norm": 3.297995090484619, "learning_rate": 1.3246753246753247e-05, "loss": 0.2992, "step": 748 }, { "epoch": 0.013264443234292747, "grad_norm": 3.598801374435425, "learning_rate": 1.3264462809917357e-05, "loss": 0.2212, "step": 749 }, { "epoch": 0.013282152771321176, "grad_norm": 3.0572006702423096, "learning_rate": 1.3282172373081465e-05, "loss": 0.3175, "step": 750 }, { "epoch": 0.013299862308349604, "grad_norm": 3.0344865322113037, "learning_rate": 1.3299881936245574e-05, "loss": 0.3282, "step": 751 }, { "epoch": 0.013317571845378032, "grad_norm": 3.3999550342559814, "learning_rate": 1.3317591499409682e-05, "loss": 0.2438, "step": 752 }, { "epoch": 0.01333528138240646, "grad_norm": 3.7291440963745117, "learning_rate": 1.333530106257379e-05, "loss": 0.2482, "step": 753 }, { "epoch": 0.013352990919434888, "grad_norm": 2.8178164958953857, "learning_rate": 1.3353010625737898e-05, "loss": 0.2795, "step": 754 }, { "epoch": 0.013370700456463316, "grad_norm": 3.4611573219299316, "learning_rate": 1.3370720188902008e-05, "loss": 0.2166, "step": 755 }, { "epoch": 0.013388409993491746, "grad_norm": 3.008009910583496, "learning_rate": 1.3388429752066117e-05, "loss": 0.2779, "step": 756 }, { "epoch": 0.013406119530520174, "grad_norm": 3.6844804286956787, "learning_rate": 1.3406139315230225e-05, "loss": 0.296, "step": 757 }, { "epoch": 0.013423829067548602, "grad_norm": 3.4051706790924072, "learning_rate": 1.3423848878394333e-05, "loss": 0.2109, "step": 758 }, { "epoch": 0.01344153860457703, "grad_norm": 4.389113426208496, "learning_rate": 1.3441558441558442e-05, "loss": 0.3116, "step": 759 }, { "epoch": 0.013459248141605458, "grad_norm": 3.6325273513793945, "learning_rate": 1.345926800472255e-05, "loss": 0.2757, "step": 760 }, { "epoch": 0.013476957678633886, "grad_norm": 3.606416940689087, "learning_rate": 1.347697756788666e-05, "loss": 0.2266, "step": 761 }, { "epoch": 0.013494667215662315, "grad_norm": 2.944957971572876, "learning_rate": 1.3494687131050768e-05, "loss": 0.2368, "step": 762 }, { "epoch": 0.013512376752690743, "grad_norm": 3.5843470096588135, "learning_rate": 1.3512396694214877e-05, "loss": 0.2679, "step": 763 }, { "epoch": 0.01353008628971917, "grad_norm": 3.885674238204956, "learning_rate": 1.3530106257378985e-05, "loss": 0.304, "step": 764 }, { "epoch": 0.013547795826747599, "grad_norm": 3.636401653289795, "learning_rate": 1.3547815820543093e-05, "loss": 0.2178, "step": 765 }, { "epoch": 0.013565505363776027, "grad_norm": 3.298795461654663, "learning_rate": 1.3565525383707203e-05, "loss": 0.3319, "step": 766 }, { "epoch": 0.013583214900804455, "grad_norm": 3.767066478729248, "learning_rate": 1.3583234946871312e-05, "loss": 0.2749, "step": 767 }, { "epoch": 0.013600924437832883, "grad_norm": 2.561018705368042, "learning_rate": 1.360094451003542e-05, "loss": 0.2199, "step": 768 }, { "epoch": 0.013618633974861313, "grad_norm": 3.6686172485351562, "learning_rate": 1.3618654073199528e-05, "loss": 0.2632, "step": 769 }, { "epoch": 0.013636343511889741, "grad_norm": 3.4040157794952393, "learning_rate": 1.3636363636363637e-05, "loss": 0.2402, "step": 770 }, { "epoch": 0.01365405304891817, "grad_norm": 3.7989394664764404, "learning_rate": 1.3654073199527745e-05, "loss": 0.2967, "step": 771 }, { "epoch": 0.013671762585946597, "grad_norm": 3.609119415283203, "learning_rate": 1.3671782762691855e-05, "loss": 0.3143, "step": 772 }, { "epoch": 0.013689472122975025, "grad_norm": 3.085630416870117, "learning_rate": 1.3689492325855963e-05, "loss": 0.2414, "step": 773 }, { "epoch": 0.013707181660003454, "grad_norm": 2.410400867462158, "learning_rate": 1.3707201889020072e-05, "loss": 0.2168, "step": 774 }, { "epoch": 0.013724891197031882, "grad_norm": 3.089731454849243, "learning_rate": 1.372491145218418e-05, "loss": 0.2708, "step": 775 }, { "epoch": 0.01374260073406031, "grad_norm": 2.837721824645996, "learning_rate": 1.3742621015348288e-05, "loss": 0.2889, "step": 776 }, { "epoch": 0.013760310271088738, "grad_norm": 3.668142557144165, "learning_rate": 1.3760330578512398e-05, "loss": 0.2436, "step": 777 }, { "epoch": 0.013778019808117166, "grad_norm": 2.9912331104278564, "learning_rate": 1.3778040141676506e-05, "loss": 0.2588, "step": 778 }, { "epoch": 0.013795729345145594, "grad_norm": 3.153373956680298, "learning_rate": 1.3795749704840615e-05, "loss": 0.3132, "step": 779 }, { "epoch": 0.013813438882174022, "grad_norm": 4.183038711547852, "learning_rate": 1.3813459268004723e-05, "loss": 0.2839, "step": 780 }, { "epoch": 0.01383114841920245, "grad_norm": 4.561267375946045, "learning_rate": 1.3831168831168831e-05, "loss": 0.2867, "step": 781 }, { "epoch": 0.01384885795623088, "grad_norm": 2.7727127075195312, "learning_rate": 1.384887839433294e-05, "loss": 0.2824, "step": 782 }, { "epoch": 0.013866567493259308, "grad_norm": 4.690083026885986, "learning_rate": 1.386658795749705e-05, "loss": 0.2707, "step": 783 }, { "epoch": 0.013884277030287736, "grad_norm": 3.0775704383850098, "learning_rate": 1.3884297520661158e-05, "loss": 0.2758, "step": 784 }, { "epoch": 0.013901986567316164, "grad_norm": 3.1322579383850098, "learning_rate": 1.3902007083825266e-05, "loss": 0.2451, "step": 785 }, { "epoch": 0.013919696104344592, "grad_norm": 3.0992825031280518, "learning_rate": 1.3919716646989375e-05, "loss": 0.2813, "step": 786 }, { "epoch": 0.01393740564137302, "grad_norm": 3.689483880996704, "learning_rate": 1.3937426210153483e-05, "loss": 0.3097, "step": 787 }, { "epoch": 0.013955115178401449, "grad_norm": 2.900174379348755, "learning_rate": 1.3955135773317591e-05, "loss": 0.2808, "step": 788 }, { "epoch": 0.013972824715429877, "grad_norm": 3.473814010620117, "learning_rate": 1.3972845336481701e-05, "loss": 0.2278, "step": 789 }, { "epoch": 0.013990534252458305, "grad_norm": 2.8068809509277344, "learning_rate": 1.399055489964581e-05, "loss": 0.2808, "step": 790 }, { "epoch": 0.014008243789486733, "grad_norm": 2.7631659507751465, "learning_rate": 1.4008264462809918e-05, "loss": 0.2243, "step": 791 }, { "epoch": 0.014025953326515161, "grad_norm": 3.2092325687408447, "learning_rate": 1.4025974025974026e-05, "loss": 0.2509, "step": 792 }, { "epoch": 0.01404366286354359, "grad_norm": 3.537273406982422, "learning_rate": 1.4043683589138135e-05, "loss": 0.2945, "step": 793 }, { "epoch": 0.014061372400572017, "grad_norm": 3.3613250255584717, "learning_rate": 1.4061393152302245e-05, "loss": 0.272, "step": 794 }, { "epoch": 0.014079081937600445, "grad_norm": 3.476778030395508, "learning_rate": 1.4079102715466353e-05, "loss": 0.2752, "step": 795 }, { "epoch": 0.014096791474628875, "grad_norm": 2.395601749420166, "learning_rate": 1.4096812278630461e-05, "loss": 0.2478, "step": 796 }, { "epoch": 0.014114501011657303, "grad_norm": 3.4920754432678223, "learning_rate": 1.411452184179457e-05, "loss": 0.212, "step": 797 }, { "epoch": 0.014132210548685731, "grad_norm": 2.903257131576538, "learning_rate": 1.4132231404958678e-05, "loss": 0.2288, "step": 798 }, { "epoch": 0.01414992008571416, "grad_norm": 2.9657886028289795, "learning_rate": 1.4149940968122786e-05, "loss": 0.2195, "step": 799 }, { "epoch": 0.014167629622742588, "grad_norm": 2.6865811347961426, "learning_rate": 1.4167650531286896e-05, "loss": 0.2796, "step": 800 }, { "epoch": 0.014185339159771016, "grad_norm": 3.0191493034362793, "learning_rate": 1.4185360094451004e-05, "loss": 0.2191, "step": 801 }, { "epoch": 0.014203048696799444, "grad_norm": 3.664377450942993, "learning_rate": 1.4203069657615113e-05, "loss": 0.2125, "step": 802 }, { "epoch": 0.014220758233827872, "grad_norm": 5.142484188079834, "learning_rate": 1.4220779220779221e-05, "loss": 0.235, "step": 803 }, { "epoch": 0.0142384677708563, "grad_norm": 4.334110260009766, "learning_rate": 1.423848878394333e-05, "loss": 0.2323, "step": 804 }, { "epoch": 0.014256177307884728, "grad_norm": 4.824215888977051, "learning_rate": 1.425619834710744e-05, "loss": 0.2869, "step": 805 }, { "epoch": 0.014273886844913156, "grad_norm": 4.136261463165283, "learning_rate": 1.4273907910271548e-05, "loss": 0.244, "step": 806 }, { "epoch": 0.014291596381941584, "grad_norm": 3.108125686645508, "learning_rate": 1.4291617473435656e-05, "loss": 0.2813, "step": 807 }, { "epoch": 0.014309305918970013, "grad_norm": 2.976840019226074, "learning_rate": 1.4309327036599764e-05, "loss": 0.2172, "step": 808 }, { "epoch": 0.014327015455998442, "grad_norm": 3.9331977367401123, "learning_rate": 1.4327036599763873e-05, "loss": 0.2848, "step": 809 }, { "epoch": 0.01434472499302687, "grad_norm": 3.794069766998291, "learning_rate": 1.4344746162927981e-05, "loss": 0.2666, "step": 810 }, { "epoch": 0.014362434530055299, "grad_norm": 3.6306304931640625, "learning_rate": 1.4362455726092091e-05, "loss": 0.2238, "step": 811 }, { "epoch": 0.014380144067083727, "grad_norm": 2.680814266204834, "learning_rate": 1.43801652892562e-05, "loss": 0.2914, "step": 812 }, { "epoch": 0.014397853604112155, "grad_norm": 3.7049028873443604, "learning_rate": 1.4397874852420308e-05, "loss": 0.2047, "step": 813 }, { "epoch": 0.014415563141140583, "grad_norm": 2.81430983543396, "learning_rate": 1.4415584415584416e-05, "loss": 0.2509, "step": 814 }, { "epoch": 0.014433272678169011, "grad_norm": 2.757364273071289, "learning_rate": 1.4433293978748524e-05, "loss": 0.2151, "step": 815 }, { "epoch": 0.014450982215197439, "grad_norm": 3.548339605331421, "learning_rate": 1.4451003541912633e-05, "loss": 0.278, "step": 816 }, { "epoch": 0.014468691752225867, "grad_norm": 3.8404781818389893, "learning_rate": 1.4468713105076743e-05, "loss": 0.2418, "step": 817 }, { "epoch": 0.014486401289254295, "grad_norm": 3.707854747772217, "learning_rate": 1.448642266824085e-05, "loss": 0.2704, "step": 818 }, { "epoch": 0.014504110826282723, "grad_norm": 3.973817825317383, "learning_rate": 1.4504132231404959e-05, "loss": 0.2195, "step": 819 }, { "epoch": 0.014521820363311152, "grad_norm": 2.7335357666015625, "learning_rate": 1.4521841794569067e-05, "loss": 0.2667, "step": 820 }, { "epoch": 0.01453952990033958, "grad_norm": 3.7470943927764893, "learning_rate": 1.4539551357733176e-05, "loss": 0.2911, "step": 821 }, { "epoch": 0.01455723943736801, "grad_norm": 3.904993772506714, "learning_rate": 1.4557260920897286e-05, "loss": 0.2456, "step": 822 }, { "epoch": 0.014574948974396438, "grad_norm": 3.239161252975464, "learning_rate": 1.4574970484061394e-05, "loss": 0.3195, "step": 823 }, { "epoch": 0.014592658511424866, "grad_norm": 4.492678165435791, "learning_rate": 1.4592680047225502e-05, "loss": 0.2093, "step": 824 }, { "epoch": 0.014610368048453294, "grad_norm": 4.2188801765441895, "learning_rate": 1.461038961038961e-05, "loss": 0.2453, "step": 825 }, { "epoch": 0.014628077585481722, "grad_norm": 3.0873801708221436, "learning_rate": 1.4628099173553719e-05, "loss": 0.2658, "step": 826 }, { "epoch": 0.01464578712251015, "grad_norm": 2.9514925479888916, "learning_rate": 1.4645808736717827e-05, "loss": 0.2429, "step": 827 }, { "epoch": 0.014663496659538578, "grad_norm": 3.299889326095581, "learning_rate": 1.4663518299881937e-05, "loss": 0.2068, "step": 828 }, { "epoch": 0.014681206196567006, "grad_norm": 3.612199306488037, "learning_rate": 1.4681227863046046e-05, "loss": 0.2563, "step": 829 }, { "epoch": 0.014698915733595434, "grad_norm": 3.2055506706237793, "learning_rate": 1.4698937426210154e-05, "loss": 0.2917, "step": 830 }, { "epoch": 0.014716625270623862, "grad_norm": 2.9018101692199707, "learning_rate": 1.4716646989374262e-05, "loss": 0.2221, "step": 831 }, { "epoch": 0.01473433480765229, "grad_norm": 2.9657998085021973, "learning_rate": 1.473435655253837e-05, "loss": 0.2287, "step": 832 }, { "epoch": 0.014752044344680719, "grad_norm": 4.151045799255371, "learning_rate": 1.475206611570248e-05, "loss": 0.3064, "step": 833 }, { "epoch": 0.014769753881709147, "grad_norm": 2.9256603717803955, "learning_rate": 1.4769775678866589e-05, "loss": 0.2783, "step": 834 }, { "epoch": 0.014787463418737575, "grad_norm": 3.065783977508545, "learning_rate": 1.4787485242030697e-05, "loss": 0.3395, "step": 835 }, { "epoch": 0.014805172955766005, "grad_norm": 2.985644578933716, "learning_rate": 1.4805194805194806e-05, "loss": 0.2448, "step": 836 }, { "epoch": 0.014822882492794433, "grad_norm": 4.5487165451049805, "learning_rate": 1.4822904368358914e-05, "loss": 0.3007, "step": 837 }, { "epoch": 0.01484059202982286, "grad_norm": 2.944786787033081, "learning_rate": 1.4840613931523022e-05, "loss": 0.3014, "step": 838 }, { "epoch": 0.014858301566851289, "grad_norm": 3.6491661071777344, "learning_rate": 1.4858323494687132e-05, "loss": 0.3096, "step": 839 }, { "epoch": 0.014876011103879717, "grad_norm": 3.8329031467437744, "learning_rate": 1.487603305785124e-05, "loss": 0.241, "step": 840 }, { "epoch": 0.014893720640908145, "grad_norm": 3.3749892711639404, "learning_rate": 1.4893742621015349e-05, "loss": 0.3151, "step": 841 }, { "epoch": 0.014911430177936573, "grad_norm": 3.46456241607666, "learning_rate": 1.4911452184179457e-05, "loss": 0.2501, "step": 842 }, { "epoch": 0.014929139714965001, "grad_norm": 3.3024566173553467, "learning_rate": 1.4929161747343565e-05, "loss": 0.2559, "step": 843 }, { "epoch": 0.01494684925199343, "grad_norm": 4.155496597290039, "learning_rate": 1.4946871310507674e-05, "loss": 0.2524, "step": 844 }, { "epoch": 0.014964558789021858, "grad_norm": 3.2795374393463135, "learning_rate": 1.4964580873671784e-05, "loss": 0.2727, "step": 845 }, { "epoch": 0.014982268326050286, "grad_norm": 3.9566869735717773, "learning_rate": 1.4982290436835892e-05, "loss": 0.2637, "step": 846 }, { "epoch": 0.014999977863078714, "grad_norm": 2.521868944168091, "learning_rate": 1.5e-05, "loss": 0.2678, "step": 847 }, { "epoch": 0.015017687400107142, "grad_norm": 4.314074993133545, "learning_rate": 1.501770956316411e-05, "loss": 0.2602, "step": 848 }, { "epoch": 0.015035396937135572, "grad_norm": 3.464693307876587, "learning_rate": 1.5035419126328217e-05, "loss": 0.2632, "step": 849 }, { "epoch": 0.015053106474164, "grad_norm": 3.7536888122558594, "learning_rate": 1.5053128689492327e-05, "loss": 0.3091, "step": 850 }, { "epoch": 0.015070816011192428, "grad_norm": 2.9464943408966064, "learning_rate": 1.5070838252656434e-05, "loss": 0.2291, "step": 851 }, { "epoch": 0.015088525548220856, "grad_norm": 3.2326269149780273, "learning_rate": 1.5088547815820544e-05, "loss": 0.226, "step": 852 }, { "epoch": 0.015106235085249284, "grad_norm": 2.7311813831329346, "learning_rate": 1.5106257378984654e-05, "loss": 0.2378, "step": 853 }, { "epoch": 0.015123944622277712, "grad_norm": 2.488077402114868, "learning_rate": 1.512396694214876e-05, "loss": 0.2467, "step": 854 }, { "epoch": 0.01514165415930614, "grad_norm": 2.9365711212158203, "learning_rate": 1.514167650531287e-05, "loss": 0.2227, "step": 855 }, { "epoch": 0.015159363696334568, "grad_norm": 4.591442584991455, "learning_rate": 1.5159386068476977e-05, "loss": 0.2644, "step": 856 }, { "epoch": 0.015177073233362997, "grad_norm": 3.277339458465576, "learning_rate": 1.5177095631641087e-05, "loss": 0.2541, "step": 857 }, { "epoch": 0.015194782770391425, "grad_norm": 3.346364974975586, "learning_rate": 1.5194805194805194e-05, "loss": 0.2009, "step": 858 }, { "epoch": 0.015212492307419853, "grad_norm": 3.0102381706237793, "learning_rate": 1.5212514757969304e-05, "loss": 0.2533, "step": 859 }, { "epoch": 0.015230201844448281, "grad_norm": 3.1340084075927734, "learning_rate": 1.5230224321133414e-05, "loss": 0.1962, "step": 860 }, { "epoch": 0.015247911381476709, "grad_norm": 3.240091323852539, "learning_rate": 1.524793388429752e-05, "loss": 0.2647, "step": 861 }, { "epoch": 0.015265620918505139, "grad_norm": 2.600087881088257, "learning_rate": 1.526564344746163e-05, "loss": 0.1943, "step": 862 }, { "epoch": 0.015283330455533567, "grad_norm": 3.38022780418396, "learning_rate": 1.528335301062574e-05, "loss": 0.2166, "step": 863 }, { "epoch": 0.015301039992561995, "grad_norm": 3.079639196395874, "learning_rate": 1.5301062573789847e-05, "loss": 0.2392, "step": 864 }, { "epoch": 0.015318749529590423, "grad_norm": 3.2306058406829834, "learning_rate": 1.531877213695396e-05, "loss": 0.1999, "step": 865 }, { "epoch": 0.015336459066618851, "grad_norm": 4.025371551513672, "learning_rate": 1.5336481700118063e-05, "loss": 0.2838, "step": 866 }, { "epoch": 0.01535416860364728, "grad_norm": 2.749201774597168, "learning_rate": 1.5354191263282175e-05, "loss": 0.2693, "step": 867 }, { "epoch": 0.015371878140675707, "grad_norm": 3.9273173809051514, "learning_rate": 1.537190082644628e-05, "loss": 0.2593, "step": 868 }, { "epoch": 0.015389587677704136, "grad_norm": 3.5236895084381104, "learning_rate": 1.5389610389610392e-05, "loss": 0.2659, "step": 869 }, { "epoch": 0.015407297214732564, "grad_norm": 4.353926658630371, "learning_rate": 1.54073199527745e-05, "loss": 0.2621, "step": 870 }, { "epoch": 0.015425006751760992, "grad_norm": 3.1412129402160645, "learning_rate": 1.542502951593861e-05, "loss": 0.2869, "step": 871 }, { "epoch": 0.01544271628878942, "grad_norm": 2.978135585784912, "learning_rate": 1.5442739079102717e-05, "loss": 0.2319, "step": 872 }, { "epoch": 0.015460425825817848, "grad_norm": 2.349982261657715, "learning_rate": 1.5460448642266825e-05, "loss": 0.279, "step": 873 }, { "epoch": 0.015478135362846276, "grad_norm": 3.8427600860595703, "learning_rate": 1.5478158205430933e-05, "loss": 0.18, "step": 874 }, { "epoch": 0.015495844899874704, "grad_norm": 4.309515476226807, "learning_rate": 1.549586776859504e-05, "loss": 0.255, "step": 875 }, { "epoch": 0.015513554436903134, "grad_norm": 3.7857656478881836, "learning_rate": 1.551357733175915e-05, "loss": 0.2924, "step": 876 }, { "epoch": 0.015531263973931562, "grad_norm": 3.363541603088379, "learning_rate": 1.553128689492326e-05, "loss": 0.281, "step": 877 }, { "epoch": 0.01554897351095999, "grad_norm": 4.259315490722656, "learning_rate": 1.5548996458087367e-05, "loss": 0.3278, "step": 878 }, { "epoch": 0.015566683047988418, "grad_norm": 2.654956340789795, "learning_rate": 1.5566706021251478e-05, "loss": 0.2573, "step": 879 }, { "epoch": 0.015584392585016846, "grad_norm": 3.0276215076446533, "learning_rate": 1.5584415584415583e-05, "loss": 0.2226, "step": 880 }, { "epoch": 0.015602102122045275, "grad_norm": 3.4632363319396973, "learning_rate": 1.5602125147579695e-05, "loss": 0.2405, "step": 881 }, { "epoch": 0.015619811659073703, "grad_norm": 3.2919554710388184, "learning_rate": 1.5619834710743803e-05, "loss": 0.3074, "step": 882 }, { "epoch": 0.015637521196102133, "grad_norm": 4.607194900512695, "learning_rate": 1.563754427390791e-05, "loss": 0.2962, "step": 883 }, { "epoch": 0.01565523073313056, "grad_norm": 3.030284881591797, "learning_rate": 1.565525383707202e-05, "loss": 0.246, "step": 884 }, { "epoch": 0.01567294027015899, "grad_norm": 4.185439586639404, "learning_rate": 1.5672963400236128e-05, "loss": 0.2584, "step": 885 }, { "epoch": 0.015690649807187415, "grad_norm": 3.5303122997283936, "learning_rate": 1.5690672963400236e-05, "loss": 0.2757, "step": 886 }, { "epoch": 0.015708359344215845, "grad_norm": 3.4683430194854736, "learning_rate": 1.5708382526564348e-05, "loss": 0.2315, "step": 887 }, { "epoch": 0.01572606888124427, "grad_norm": 3.6390490531921387, "learning_rate": 1.5726092089728453e-05, "loss": 0.2207, "step": 888 }, { "epoch": 0.0157437784182727, "grad_norm": 3.171565294265747, "learning_rate": 1.5743801652892565e-05, "loss": 0.1695, "step": 889 }, { "epoch": 0.015761487955301128, "grad_norm": 4.045363426208496, "learning_rate": 1.576151121605667e-05, "loss": 0.2394, "step": 890 }, { "epoch": 0.015779197492329557, "grad_norm": 3.153245210647583, "learning_rate": 1.577922077922078e-05, "loss": 0.2777, "step": 891 }, { "epoch": 0.015796907029357984, "grad_norm": 3.3663675785064697, "learning_rate": 1.579693034238489e-05, "loss": 0.2422, "step": 892 }, { "epoch": 0.015814616566386414, "grad_norm": 3.752943754196167, "learning_rate": 1.5814639905548998e-05, "loss": 0.2634, "step": 893 }, { "epoch": 0.01583232610341484, "grad_norm": 3.3741626739501953, "learning_rate": 1.5832349468713106e-05, "loss": 0.3222, "step": 894 }, { "epoch": 0.01585003564044327, "grad_norm": 4.233578205108643, "learning_rate": 1.5850059031877215e-05, "loss": 0.1815, "step": 895 }, { "epoch": 0.0158677451774717, "grad_norm": 3.541214942932129, "learning_rate": 1.5867768595041323e-05, "loss": 0.259, "step": 896 }, { "epoch": 0.015885454714500126, "grad_norm": 2.6871728897094727, "learning_rate": 1.588547815820543e-05, "loss": 0.2484, "step": 897 }, { "epoch": 0.015903164251528556, "grad_norm": 3.4927871227264404, "learning_rate": 1.590318772136954e-05, "loss": 0.1959, "step": 898 }, { "epoch": 0.015920873788556982, "grad_norm": 2.9719631671905518, "learning_rate": 1.592089728453365e-05, "loss": 0.2675, "step": 899 }, { "epoch": 0.015938583325585412, "grad_norm": 3.2292299270629883, "learning_rate": 1.5938606847697756e-05, "loss": 0.204, "step": 900 }, { "epoch": 0.01595629286261384, "grad_norm": 2.2454049587249756, "learning_rate": 1.5956316410861868e-05, "loss": 0.208, "step": 901 }, { "epoch": 0.015974002399642268, "grad_norm": 2.575477123260498, "learning_rate": 1.5974025974025973e-05, "loss": 0.1877, "step": 902 }, { "epoch": 0.015991711936670695, "grad_norm": 3.048327922821045, "learning_rate": 1.5991735537190085e-05, "loss": 0.2582, "step": 903 }, { "epoch": 0.016009421473699124, "grad_norm": 3.6841773986816406, "learning_rate": 1.6009445100354193e-05, "loss": 0.2545, "step": 904 }, { "epoch": 0.01602713101072755, "grad_norm": 3.531911849975586, "learning_rate": 1.60271546635183e-05, "loss": 0.2433, "step": 905 }, { "epoch": 0.01604484054775598, "grad_norm": 3.175097942352295, "learning_rate": 1.604486422668241e-05, "loss": 0.2347, "step": 906 }, { "epoch": 0.016062550084784407, "grad_norm": 2.915935754776001, "learning_rate": 1.6062573789846518e-05, "loss": 0.2797, "step": 907 }, { "epoch": 0.016080259621812837, "grad_norm": 2.286107063293457, "learning_rate": 1.6080283353010626e-05, "loss": 0.1849, "step": 908 }, { "epoch": 0.016097969158841267, "grad_norm": 3.88871693611145, "learning_rate": 1.6097992916174738e-05, "loss": 0.2348, "step": 909 }, { "epoch": 0.016115678695869693, "grad_norm": 2.6220860481262207, "learning_rate": 1.6115702479338843e-05, "loss": 0.2266, "step": 910 }, { "epoch": 0.016133388232898123, "grad_norm": 3.3515818119049072, "learning_rate": 1.6133412042502954e-05, "loss": 0.2657, "step": 911 }, { "epoch": 0.01615109776992655, "grad_norm": 2.685786485671997, "learning_rate": 1.615112160566706e-05, "loss": 0.1973, "step": 912 }, { "epoch": 0.01616880730695498, "grad_norm": 3.8153436183929443, "learning_rate": 1.616883116883117e-05, "loss": 0.2048, "step": 913 }, { "epoch": 0.016186516843983405, "grad_norm": 4.228230953216553, "learning_rate": 1.6186540731995276e-05, "loss": 0.1877, "step": 914 }, { "epoch": 0.016204226381011835, "grad_norm": 3.253657341003418, "learning_rate": 1.6204250295159388e-05, "loss": 0.2321, "step": 915 }, { "epoch": 0.01622193591804026, "grad_norm": 3.2480666637420654, "learning_rate": 1.6221959858323496e-05, "loss": 0.2514, "step": 916 }, { "epoch": 0.01623964545506869, "grad_norm": 3.33115553855896, "learning_rate": 1.6239669421487604e-05, "loss": 0.2073, "step": 917 }, { "epoch": 0.016257354992097118, "grad_norm": 3.7297778129577637, "learning_rate": 1.6257378984651713e-05, "loss": 0.2729, "step": 918 }, { "epoch": 0.016275064529125548, "grad_norm": 3.178342580795288, "learning_rate": 1.627508854781582e-05, "loss": 0.2186, "step": 919 }, { "epoch": 0.016292774066153974, "grad_norm": 3.8393757343292236, "learning_rate": 1.629279811097993e-05, "loss": 0.3243, "step": 920 }, { "epoch": 0.016310483603182404, "grad_norm": 2.622913360595703, "learning_rate": 1.631050767414404e-05, "loss": 0.23, "step": 921 }, { "epoch": 0.01632819314021083, "grad_norm": 2.167600154876709, "learning_rate": 1.6328217237308146e-05, "loss": 0.2179, "step": 922 }, { "epoch": 0.01634590267723926, "grad_norm": 2.8961596488952637, "learning_rate": 1.6345926800472258e-05, "loss": 0.2749, "step": 923 }, { "epoch": 0.01636361221426769, "grad_norm": 3.217485189437866, "learning_rate": 1.6363636363636363e-05, "loss": 0.3334, "step": 924 }, { "epoch": 0.016381321751296116, "grad_norm": 3.3530070781707764, "learning_rate": 1.6381345926800474e-05, "loss": 0.3045, "step": 925 }, { "epoch": 0.016399031288324546, "grad_norm": 2.641184091567993, "learning_rate": 1.6399055489964583e-05, "loss": 0.2175, "step": 926 }, { "epoch": 0.016416740825352973, "grad_norm": 3.8989193439483643, "learning_rate": 1.641676505312869e-05, "loss": 0.2177, "step": 927 }, { "epoch": 0.016434450362381402, "grad_norm": 3.7609825134277344, "learning_rate": 1.64344746162928e-05, "loss": 0.2285, "step": 928 }, { "epoch": 0.01645215989940983, "grad_norm": 2.9629650115966797, "learning_rate": 1.6452184179456907e-05, "loss": 0.2146, "step": 929 }, { "epoch": 0.01646986943643826, "grad_norm": 2.6247634887695312, "learning_rate": 1.6469893742621016e-05, "loss": 0.2812, "step": 930 }, { "epoch": 0.016487578973466685, "grad_norm": 3.5467019081115723, "learning_rate": 1.6487603305785124e-05, "loss": 0.2484, "step": 931 }, { "epoch": 0.016505288510495115, "grad_norm": 2.9275145530700684, "learning_rate": 1.6505312868949232e-05, "loss": 0.2428, "step": 932 }, { "epoch": 0.01652299804752354, "grad_norm": 2.7302353382110596, "learning_rate": 1.6523022432113344e-05, "loss": 0.2456, "step": 933 }, { "epoch": 0.01654070758455197, "grad_norm": 2.8160812854766846, "learning_rate": 1.654073199527745e-05, "loss": 0.252, "step": 934 }, { "epoch": 0.016558417121580397, "grad_norm": 5.478418827056885, "learning_rate": 1.655844155844156e-05, "loss": 0.2881, "step": 935 }, { "epoch": 0.016576126658608827, "grad_norm": 3.8003594875335693, "learning_rate": 1.6576151121605666e-05, "loss": 0.2622, "step": 936 }, { "epoch": 0.016593836195637257, "grad_norm": 3.172234296798706, "learning_rate": 1.6593860684769777e-05, "loss": 0.327, "step": 937 }, { "epoch": 0.016611545732665683, "grad_norm": 2.1022143363952637, "learning_rate": 1.6611570247933886e-05, "loss": 0.2176, "step": 938 }, { "epoch": 0.016629255269694113, "grad_norm": 3.758087396621704, "learning_rate": 1.6629279811097994e-05, "loss": 0.2683, "step": 939 }, { "epoch": 0.01664696480672254, "grad_norm": 3.5263760089874268, "learning_rate": 1.6646989374262102e-05, "loss": 0.254, "step": 940 }, { "epoch": 0.01666467434375097, "grad_norm": 3.4156525135040283, "learning_rate": 1.666469893742621e-05, "loss": 0.2393, "step": 941 }, { "epoch": 0.016682383880779396, "grad_norm": 3.4841597080230713, "learning_rate": 1.668240850059032e-05, "loss": 0.2691, "step": 942 }, { "epoch": 0.016700093417807826, "grad_norm": 3.552879571914673, "learning_rate": 1.670011806375443e-05, "loss": 0.2551, "step": 943 }, { "epoch": 0.016717802954836252, "grad_norm": 3.0578012466430664, "learning_rate": 1.6717827626918536e-05, "loss": 0.2194, "step": 944 }, { "epoch": 0.016735512491864682, "grad_norm": 3.277918577194214, "learning_rate": 1.6735537190082647e-05, "loss": 0.2421, "step": 945 }, { "epoch": 0.01675322202889311, "grad_norm": 3.2593159675598145, "learning_rate": 1.6753246753246752e-05, "loss": 0.2127, "step": 946 }, { "epoch": 0.016770931565921538, "grad_norm": 2.0844647884368896, "learning_rate": 1.6770956316410864e-05, "loss": 0.2211, "step": 947 }, { "epoch": 0.016788641102949965, "grad_norm": 3.707261800765991, "learning_rate": 1.6788665879574972e-05, "loss": 0.188, "step": 948 }, { "epoch": 0.016806350639978394, "grad_norm": 3.283175468444824, "learning_rate": 1.680637544273908e-05, "loss": 0.2656, "step": 949 }, { "epoch": 0.016824060177006824, "grad_norm": 2.3402602672576904, "learning_rate": 1.682408500590319e-05, "loss": 0.1999, "step": 950 }, { "epoch": 0.01684176971403525, "grad_norm": 4.789072036743164, "learning_rate": 1.6841794569067297e-05, "loss": 0.3022, "step": 951 }, { "epoch": 0.01685947925106368, "grad_norm": 3.0140631198883057, "learning_rate": 1.6859504132231405e-05, "loss": 0.3205, "step": 952 }, { "epoch": 0.016877188788092107, "grad_norm": 3.565828800201416, "learning_rate": 1.6877213695395514e-05, "loss": 0.2032, "step": 953 }, { "epoch": 0.016894898325120537, "grad_norm": 3.2908740043640137, "learning_rate": 1.6894923258559622e-05, "loss": 0.2456, "step": 954 }, { "epoch": 0.016912607862148963, "grad_norm": 3.0197947025299072, "learning_rate": 1.6912632821723734e-05, "loss": 0.2444, "step": 955 }, { "epoch": 0.016930317399177393, "grad_norm": 4.088615417480469, "learning_rate": 1.693034238488784e-05, "loss": 0.2536, "step": 956 }, { "epoch": 0.01694802693620582, "grad_norm": 3.127398729324341, "learning_rate": 1.694805194805195e-05, "loss": 0.2913, "step": 957 }, { "epoch": 0.01696573647323425, "grad_norm": 2.4292328357696533, "learning_rate": 1.6965761511216055e-05, "loss": 0.2333, "step": 958 }, { "epoch": 0.016983446010262675, "grad_norm": 2.803481340408325, "learning_rate": 1.6983471074380167e-05, "loss": 0.266, "step": 959 }, { "epoch": 0.017001155547291105, "grad_norm": 3.5114879608154297, "learning_rate": 1.7001180637544275e-05, "loss": 0.2292, "step": 960 }, { "epoch": 0.01701886508431953, "grad_norm": 3.350294828414917, "learning_rate": 1.7018890200708384e-05, "loss": 0.2371, "step": 961 }, { "epoch": 0.01703657462134796, "grad_norm": 3.5249600410461426, "learning_rate": 1.7036599763872492e-05, "loss": 0.2149, "step": 962 }, { "epoch": 0.01705428415837639, "grad_norm": 2.598485231399536, "learning_rate": 1.70543093270366e-05, "loss": 0.1987, "step": 963 }, { "epoch": 0.017071993695404818, "grad_norm": 3.7802722454071045, "learning_rate": 1.707201889020071e-05, "loss": 0.2938, "step": 964 }, { "epoch": 0.017089703232433247, "grad_norm": 4.241633415222168, "learning_rate": 1.708972845336482e-05, "loss": 0.2835, "step": 965 }, { "epoch": 0.017107412769461674, "grad_norm": 3.9193944931030273, "learning_rate": 1.7107438016528925e-05, "loss": 0.2594, "step": 966 }, { "epoch": 0.017125122306490104, "grad_norm": 4.287761688232422, "learning_rate": 1.7125147579693037e-05, "loss": 0.2105, "step": 967 }, { "epoch": 0.01714283184351853, "grad_norm": 2.6021957397460938, "learning_rate": 1.7142857142857142e-05, "loss": 0.1787, "step": 968 }, { "epoch": 0.01716054138054696, "grad_norm": 3.5871431827545166, "learning_rate": 1.7160566706021254e-05, "loss": 0.2643, "step": 969 }, { "epoch": 0.017178250917575386, "grad_norm": 2.2529048919677734, "learning_rate": 1.717827626918536e-05, "loss": 0.2426, "step": 970 }, { "epoch": 0.017195960454603816, "grad_norm": 2.5329082012176514, "learning_rate": 1.719598583234947e-05, "loss": 0.2357, "step": 971 }, { "epoch": 0.017213669991632242, "grad_norm": 2.723510265350342, "learning_rate": 1.721369539551358e-05, "loss": 0.2246, "step": 972 }, { "epoch": 0.017231379528660672, "grad_norm": 2.90854811668396, "learning_rate": 1.7231404958677687e-05, "loss": 0.2013, "step": 973 }, { "epoch": 0.0172490890656891, "grad_norm": 3.066727876663208, "learning_rate": 1.7249114521841795e-05, "loss": 0.1911, "step": 974 }, { "epoch": 0.01726679860271753, "grad_norm": 3.428598403930664, "learning_rate": 1.7266824085005903e-05, "loss": 0.2305, "step": 975 }, { "epoch": 0.01728450813974596, "grad_norm": 3.2870044708251953, "learning_rate": 1.7284533648170012e-05, "loss": 0.2154, "step": 976 }, { "epoch": 0.017302217676774385, "grad_norm": 3.3409979343414307, "learning_rate": 1.7302243211334123e-05, "loss": 0.2869, "step": 977 }, { "epoch": 0.017319927213802815, "grad_norm": 3.834258794784546, "learning_rate": 1.731995277449823e-05, "loss": 0.235, "step": 978 }, { "epoch": 0.01733763675083124, "grad_norm": 2.42409610748291, "learning_rate": 1.733766233766234e-05, "loss": 0.3033, "step": 979 }, { "epoch": 0.01735534628785967, "grad_norm": 3.947303295135498, "learning_rate": 1.7355371900826445e-05, "loss": 0.2721, "step": 980 }, { "epoch": 0.017373055824888097, "grad_norm": 2.730459213256836, "learning_rate": 1.7373081463990557e-05, "loss": 0.2288, "step": 981 }, { "epoch": 0.017390765361916527, "grad_norm": 2.8603603839874268, "learning_rate": 1.7390791027154665e-05, "loss": 0.2486, "step": 982 }, { "epoch": 0.017408474898944953, "grad_norm": 3.558769941329956, "learning_rate": 1.7408500590318773e-05, "loss": 0.2692, "step": 983 }, { "epoch": 0.017426184435973383, "grad_norm": 2.897613763809204, "learning_rate": 1.742621015348288e-05, "loss": 0.2089, "step": 984 }, { "epoch": 0.01744389397300181, "grad_norm": 3.739163398742676, "learning_rate": 1.744391971664699e-05, "loss": 0.1949, "step": 985 }, { "epoch": 0.01746160351003024, "grad_norm": 2.44085431098938, "learning_rate": 1.7461629279811098e-05, "loss": 0.2136, "step": 986 }, { "epoch": 0.017479313047058666, "grad_norm": 5.566951274871826, "learning_rate": 1.7479338842975207e-05, "loss": 0.2591, "step": 987 }, { "epoch": 0.017497022584087096, "grad_norm": 2.819683790206909, "learning_rate": 1.7497048406139315e-05, "loss": 0.2174, "step": 988 }, { "epoch": 0.017514732121115525, "grad_norm": 2.3070919513702393, "learning_rate": 1.7514757969303427e-05, "loss": 0.2244, "step": 989 }, { "epoch": 0.017532441658143952, "grad_norm": 3.8266940116882324, "learning_rate": 1.753246753246753e-05, "loss": 0.2736, "step": 990 }, { "epoch": 0.01755015119517238, "grad_norm": 3.611830472946167, "learning_rate": 1.7550177095631643e-05, "loss": 0.1839, "step": 991 }, { "epoch": 0.017567860732200808, "grad_norm": 5.336496353149414, "learning_rate": 1.7567886658795748e-05, "loss": 0.2296, "step": 992 }, { "epoch": 0.017585570269229238, "grad_norm": 3.109227418899536, "learning_rate": 1.758559622195986e-05, "loss": 0.2151, "step": 993 }, { "epoch": 0.017603279806257664, "grad_norm": 3.256679058074951, "learning_rate": 1.7603305785123968e-05, "loss": 0.2996, "step": 994 }, { "epoch": 0.017620989343286094, "grad_norm": 3.2043955326080322, "learning_rate": 1.7621015348288076e-05, "loss": 0.2274, "step": 995 }, { "epoch": 0.01763869888031452, "grad_norm": 2.8394157886505127, "learning_rate": 1.7638724911452185e-05, "loss": 0.276, "step": 996 }, { "epoch": 0.01765640841734295, "grad_norm": 3.7785580158233643, "learning_rate": 1.7656434474616293e-05, "loss": 0.232, "step": 997 }, { "epoch": 0.017674117954371377, "grad_norm": 2.5105810165405273, "learning_rate": 1.76741440377804e-05, "loss": 0.2336, "step": 998 }, { "epoch": 0.017691827491399807, "grad_norm": 3.46427583694458, "learning_rate": 1.7691853600944513e-05, "loss": 0.275, "step": 999 }, { "epoch": 0.017709537028428233, "grad_norm": 3.0922727584838867, "learning_rate": 1.7709563164108618e-05, "loss": 0.2612, "step": 1000 }, { "epoch": 0.017727246565456663, "grad_norm": 2.9071552753448486, "learning_rate": 1.772727272727273e-05, "loss": 0.2197, "step": 1001 }, { "epoch": 0.01774495610248509, "grad_norm": 3.127441167831421, "learning_rate": 1.7744982290436835e-05, "loss": 0.2353, "step": 1002 }, { "epoch": 0.01776266563951352, "grad_norm": 3.25630521774292, "learning_rate": 1.7762691853600946e-05, "loss": 0.221, "step": 1003 }, { "epoch": 0.01778037517654195, "grad_norm": 4.461640357971191, "learning_rate": 1.778040141676505e-05, "loss": 0.2081, "step": 1004 }, { "epoch": 0.017798084713570375, "grad_norm": 3.642925500869751, "learning_rate": 1.7798110979929163e-05, "loss": 0.2704, "step": 1005 }, { "epoch": 0.017815794250598805, "grad_norm": 2.97226619720459, "learning_rate": 1.781582054309327e-05, "loss": 0.2255, "step": 1006 }, { "epoch": 0.01783350378762723, "grad_norm": 4.078212261199951, "learning_rate": 1.783353010625738e-05, "loss": 0.2418, "step": 1007 }, { "epoch": 0.01785121332465566, "grad_norm": 3.1347265243530273, "learning_rate": 1.7851239669421488e-05, "loss": 0.2382, "step": 1008 }, { "epoch": 0.017868922861684088, "grad_norm": 3.5144200325012207, "learning_rate": 1.7868949232585596e-05, "loss": 0.2576, "step": 1009 }, { "epoch": 0.017886632398712517, "grad_norm": 2.928436040878296, "learning_rate": 1.7886658795749705e-05, "loss": 0.2501, "step": 1010 }, { "epoch": 0.017904341935740944, "grad_norm": 4.053630828857422, "learning_rate": 1.7904368358913816e-05, "loss": 0.3028, "step": 1011 }, { "epoch": 0.017922051472769374, "grad_norm": 3.8268725872039795, "learning_rate": 1.792207792207792e-05, "loss": 0.233, "step": 1012 }, { "epoch": 0.0179397610097978, "grad_norm": 3.6330182552337646, "learning_rate": 1.7939787485242033e-05, "loss": 0.2359, "step": 1013 }, { "epoch": 0.01795747054682623, "grad_norm": 3.2659342288970947, "learning_rate": 1.7957497048406138e-05, "loss": 0.2655, "step": 1014 }, { "epoch": 0.017975180083854656, "grad_norm": 3.633756637573242, "learning_rate": 1.797520661157025e-05, "loss": 0.2713, "step": 1015 }, { "epoch": 0.017992889620883086, "grad_norm": 2.73190975189209, "learning_rate": 1.7992916174734358e-05, "loss": 0.1829, "step": 1016 }, { "epoch": 0.018010599157911516, "grad_norm": 3.2639780044555664, "learning_rate": 1.8010625737898466e-05, "loss": 0.2552, "step": 1017 }, { "epoch": 0.018028308694939942, "grad_norm": 2.8062283992767334, "learning_rate": 1.8028335301062574e-05, "loss": 0.2218, "step": 1018 }, { "epoch": 0.018046018231968372, "grad_norm": 2.458418846130371, "learning_rate": 1.8046044864226683e-05, "loss": 0.248, "step": 1019 }, { "epoch": 0.0180637277689968, "grad_norm": 3.058807373046875, "learning_rate": 1.806375442739079e-05, "loss": 0.195, "step": 1020 }, { "epoch": 0.01808143730602523, "grad_norm": 3.8380842208862305, "learning_rate": 1.8081463990554903e-05, "loss": 0.2592, "step": 1021 }, { "epoch": 0.018099146843053655, "grad_norm": 3.3306586742401123, "learning_rate": 1.8099173553719008e-05, "loss": 0.2164, "step": 1022 }, { "epoch": 0.018116856380082084, "grad_norm": 3.269367218017578, "learning_rate": 1.811688311688312e-05, "loss": 0.3, "step": 1023 }, { "epoch": 0.01813456591711051, "grad_norm": 2.841618061065674, "learning_rate": 1.8134592680047224e-05, "loss": 0.2295, "step": 1024 }, { "epoch": 0.01815227545413894, "grad_norm": 3.710519313812256, "learning_rate": 1.8152302243211336e-05, "loss": 0.2315, "step": 1025 }, { "epoch": 0.018169984991167367, "grad_norm": 3.1069111824035645, "learning_rate": 1.817001180637544e-05, "loss": 0.2509, "step": 1026 }, { "epoch": 0.018187694528195797, "grad_norm": 3.3706884384155273, "learning_rate": 1.8187721369539553e-05, "loss": 0.26, "step": 1027 }, { "epoch": 0.018205404065224223, "grad_norm": 3.5368711948394775, "learning_rate": 1.820543093270366e-05, "loss": 0.2231, "step": 1028 }, { "epoch": 0.018223113602252653, "grad_norm": 3.134873628616333, "learning_rate": 1.822314049586777e-05, "loss": 0.2396, "step": 1029 }, { "epoch": 0.018240823139281083, "grad_norm": 2.500868558883667, "learning_rate": 1.8240850059031878e-05, "loss": 0.2082, "step": 1030 }, { "epoch": 0.01825853267630951, "grad_norm": 2.9874684810638428, "learning_rate": 1.8258559622195986e-05, "loss": 0.2043, "step": 1031 }, { "epoch": 0.01827624221333794, "grad_norm": 2.605351209640503, "learning_rate": 1.8276269185360094e-05, "loss": 0.2744, "step": 1032 }, { "epoch": 0.018293951750366366, "grad_norm": 3.6388587951660156, "learning_rate": 1.8293978748524206e-05, "loss": 0.2509, "step": 1033 }, { "epoch": 0.018311661287394795, "grad_norm": 2.710402011871338, "learning_rate": 1.831168831168831e-05, "loss": 0.2397, "step": 1034 }, { "epoch": 0.018329370824423222, "grad_norm": 2.8121283054351807, "learning_rate": 1.8329397874852423e-05, "loss": 0.2402, "step": 1035 }, { "epoch": 0.01834708036145165, "grad_norm": 3.6591291427612305, "learning_rate": 1.8347107438016527e-05, "loss": 0.2741, "step": 1036 }, { "epoch": 0.018364789898480078, "grad_norm": 3.5171608924865723, "learning_rate": 1.836481700118064e-05, "loss": 0.2351, "step": 1037 }, { "epoch": 0.018382499435508508, "grad_norm": 3.1773386001586914, "learning_rate": 1.8382526564344747e-05, "loss": 0.2257, "step": 1038 }, { "epoch": 0.018400208972536934, "grad_norm": 3.2639663219451904, "learning_rate": 1.8400236127508856e-05, "loss": 0.2563, "step": 1039 }, { "epoch": 0.018417918509565364, "grad_norm": 4.127315998077393, "learning_rate": 1.8417945690672964e-05, "loss": 0.2214, "step": 1040 }, { "epoch": 0.01843562804659379, "grad_norm": 2.5280752182006836, "learning_rate": 1.8435655253837072e-05, "loss": 0.1869, "step": 1041 }, { "epoch": 0.01845333758362222, "grad_norm": 2.879472255706787, "learning_rate": 1.845336481700118e-05, "loss": 0.1613, "step": 1042 }, { "epoch": 0.01847104712065065, "grad_norm": 2.2941322326660156, "learning_rate": 1.847107438016529e-05, "loss": 0.2185, "step": 1043 }, { "epoch": 0.018488756657679076, "grad_norm": 2.394294261932373, "learning_rate": 1.8488783943329397e-05, "loss": 0.1571, "step": 1044 }, { "epoch": 0.018506466194707506, "grad_norm": 2.9643893241882324, "learning_rate": 1.850649350649351e-05, "loss": 0.2665, "step": 1045 }, { "epoch": 0.018524175731735933, "grad_norm": 3.332854986190796, "learning_rate": 1.8524203069657614e-05, "loss": 0.2338, "step": 1046 }, { "epoch": 0.018541885268764362, "grad_norm": 3.9719936847686768, "learning_rate": 1.8541912632821726e-05, "loss": 0.241, "step": 1047 }, { "epoch": 0.01855959480579279, "grad_norm": 5.225637912750244, "learning_rate": 1.855962219598583e-05, "loss": 0.2837, "step": 1048 }, { "epoch": 0.01857730434282122, "grad_norm": 4.428311347961426, "learning_rate": 1.8577331759149942e-05, "loss": 0.2629, "step": 1049 }, { "epoch": 0.018595013879849645, "grad_norm": 3.4688382148742676, "learning_rate": 1.859504132231405e-05, "loss": 0.2104, "step": 1050 }, { "epoch": 0.018612723416878075, "grad_norm": 3.4681382179260254, "learning_rate": 1.861275088547816e-05, "loss": 0.2638, "step": 1051 }, { "epoch": 0.0186304329539065, "grad_norm": 4.1070685386657715, "learning_rate": 1.8630460448642267e-05, "loss": 0.2404, "step": 1052 }, { "epoch": 0.01864814249093493, "grad_norm": 2.780747413635254, "learning_rate": 1.8648170011806376e-05, "loss": 0.2482, "step": 1053 }, { "epoch": 0.018665852027963357, "grad_norm": 3.801996946334839, "learning_rate": 1.8665879574970484e-05, "loss": 0.2807, "step": 1054 }, { "epoch": 0.018683561564991787, "grad_norm": 2.8485820293426514, "learning_rate": 1.8683589138134596e-05, "loss": 0.2272, "step": 1055 }, { "epoch": 0.018701271102020217, "grad_norm": 2.6444711685180664, "learning_rate": 1.87012987012987e-05, "loss": 0.2463, "step": 1056 }, { "epoch": 0.018718980639048644, "grad_norm": 2.6030726432800293, "learning_rate": 1.8719008264462812e-05, "loss": 0.2166, "step": 1057 }, { "epoch": 0.018736690176077073, "grad_norm": 3.7089507579803467, "learning_rate": 1.8736717827626917e-05, "loss": 0.2291, "step": 1058 }, { "epoch": 0.0187543997131055, "grad_norm": 2.966357946395874, "learning_rate": 1.875442739079103e-05, "loss": 0.2257, "step": 1059 }, { "epoch": 0.01877210925013393, "grad_norm": 2.9645509719848633, "learning_rate": 1.8772136953955134e-05, "loss": 0.2257, "step": 1060 }, { "epoch": 0.018789818787162356, "grad_norm": 2.8492183685302734, "learning_rate": 1.8789846517119245e-05, "loss": 0.22, "step": 1061 }, { "epoch": 0.018807528324190786, "grad_norm": 2.7402431964874268, "learning_rate": 1.8807556080283354e-05, "loss": 0.2377, "step": 1062 }, { "epoch": 0.018825237861219212, "grad_norm": 2.3771777153015137, "learning_rate": 1.8825265643447462e-05, "loss": 0.2232, "step": 1063 }, { "epoch": 0.018842947398247642, "grad_norm": 4.7808122634887695, "learning_rate": 1.884297520661157e-05, "loss": 0.2816, "step": 1064 }, { "epoch": 0.01886065693527607, "grad_norm": 6.211142539978027, "learning_rate": 1.886068476977568e-05, "loss": 0.2241, "step": 1065 }, { "epoch": 0.018878366472304498, "grad_norm": 3.500793695449829, "learning_rate": 1.8878394332939787e-05, "loss": 0.2778, "step": 1066 }, { "epoch": 0.018896076009332925, "grad_norm": 3.1964094638824463, "learning_rate": 1.88961038961039e-05, "loss": 0.2467, "step": 1067 }, { "epoch": 0.018913785546361354, "grad_norm": 3.109814167022705, "learning_rate": 1.8913813459268004e-05, "loss": 0.2027, "step": 1068 }, { "epoch": 0.01893149508338978, "grad_norm": 2.943338394165039, "learning_rate": 1.8931523022432115e-05, "loss": 0.249, "step": 1069 }, { "epoch": 0.01894920462041821, "grad_norm": 2.254657506942749, "learning_rate": 1.894923258559622e-05, "loss": 0.2016, "step": 1070 }, { "epoch": 0.01896691415744664, "grad_norm": 2.593017101287842, "learning_rate": 1.8966942148760332e-05, "loss": 0.1673, "step": 1071 }, { "epoch": 0.018984623694475067, "grad_norm": 2.7556028366088867, "learning_rate": 1.898465171192444e-05, "loss": 0.2299, "step": 1072 }, { "epoch": 0.019002333231503497, "grad_norm": 2.720900297164917, "learning_rate": 1.900236127508855e-05, "loss": 0.22, "step": 1073 }, { "epoch": 0.019020042768531923, "grad_norm": 3.0816409587860107, "learning_rate": 1.9020070838252657e-05, "loss": 0.2497, "step": 1074 }, { "epoch": 0.019037752305560353, "grad_norm": 2.6803228855133057, "learning_rate": 1.9037780401416765e-05, "loss": 0.3025, "step": 1075 }, { "epoch": 0.01905546184258878, "grad_norm": 4.559927940368652, "learning_rate": 1.9055489964580874e-05, "loss": 0.2315, "step": 1076 }, { "epoch": 0.01907317137961721, "grad_norm": 2.372323989868164, "learning_rate": 1.9073199527744985e-05, "loss": 0.255, "step": 1077 }, { "epoch": 0.019090880916645635, "grad_norm": 3.4978346824645996, "learning_rate": 1.909090909090909e-05, "loss": 0.1735, "step": 1078 }, { "epoch": 0.019108590453674065, "grad_norm": 2.7241663932800293, "learning_rate": 1.9108618654073202e-05, "loss": 0.2105, "step": 1079 }, { "epoch": 0.01912629999070249, "grad_norm": 2.4999477863311768, "learning_rate": 1.9126328217237307e-05, "loss": 0.253, "step": 1080 }, { "epoch": 0.01914400952773092, "grad_norm": 2.3323912620544434, "learning_rate": 1.914403778040142e-05, "loss": 0.2732, "step": 1081 }, { "epoch": 0.019161719064759348, "grad_norm": 3.2442193031311035, "learning_rate": 1.9161747343565523e-05, "loss": 0.265, "step": 1082 }, { "epoch": 0.019179428601787778, "grad_norm": 2.4891843795776367, "learning_rate": 1.9179456906729635e-05, "loss": 0.2436, "step": 1083 }, { "epoch": 0.019197138138816208, "grad_norm": 3.1843409538269043, "learning_rate": 1.9197166469893743e-05, "loss": 0.2803, "step": 1084 }, { "epoch": 0.019214847675844634, "grad_norm": 2.6842079162597656, "learning_rate": 1.9214876033057852e-05, "loss": 0.2087, "step": 1085 }, { "epoch": 0.019232557212873064, "grad_norm": 3.069079637527466, "learning_rate": 1.923258559622196e-05, "loss": 0.2038, "step": 1086 }, { "epoch": 0.01925026674990149, "grad_norm": 3.774153470993042, "learning_rate": 1.925029515938607e-05, "loss": 0.2449, "step": 1087 }, { "epoch": 0.01926797628692992, "grad_norm": 2.970914125442505, "learning_rate": 1.9268004722550177e-05, "loss": 0.2256, "step": 1088 }, { "epoch": 0.019285685823958346, "grad_norm": 3.558340072631836, "learning_rate": 1.928571428571429e-05, "loss": 0.2217, "step": 1089 }, { "epoch": 0.019303395360986776, "grad_norm": 3.0897276401519775, "learning_rate": 1.9303423848878393e-05, "loss": 0.2466, "step": 1090 }, { "epoch": 0.019321104898015203, "grad_norm": 2.3315775394439697, "learning_rate": 1.9321133412042505e-05, "loss": 0.2141, "step": 1091 }, { "epoch": 0.019338814435043632, "grad_norm": 3.3453519344329834, "learning_rate": 1.933884297520661e-05, "loss": 0.2273, "step": 1092 }, { "epoch": 0.01935652397207206, "grad_norm": 3.7382607460021973, "learning_rate": 1.935655253837072e-05, "loss": 0.2424, "step": 1093 }, { "epoch": 0.01937423350910049, "grad_norm": 3.4400925636291504, "learning_rate": 1.937426210153483e-05, "loss": 0.2163, "step": 1094 }, { "epoch": 0.019391943046128915, "grad_norm": 3.4013757705688477, "learning_rate": 1.9391971664698938e-05, "loss": 0.2534, "step": 1095 }, { "epoch": 0.019409652583157345, "grad_norm": 2.6005449295043945, "learning_rate": 1.9409681227863047e-05, "loss": 0.2308, "step": 1096 }, { "epoch": 0.019427362120185775, "grad_norm": 3.2109668254852295, "learning_rate": 1.9427390791027155e-05, "loss": 0.2244, "step": 1097 }, { "epoch": 0.0194450716572142, "grad_norm": 3.1210501194000244, "learning_rate": 1.9445100354191263e-05, "loss": 0.2362, "step": 1098 }, { "epoch": 0.01946278119424263, "grad_norm": 3.445817470550537, "learning_rate": 1.946280991735537e-05, "loss": 0.2482, "step": 1099 }, { "epoch": 0.019480490731271057, "grad_norm": 2.736848831176758, "learning_rate": 1.948051948051948e-05, "loss": 0.2104, "step": 1100 }, { "epoch": 0.019498200268299487, "grad_norm": 2.9480350017547607, "learning_rate": 1.949822904368359e-05, "loss": 0.2017, "step": 1101 }, { "epoch": 0.019515909805327913, "grad_norm": 3.7093968391418457, "learning_rate": 1.9515938606847696e-05, "loss": 0.2901, "step": 1102 }, { "epoch": 0.019533619342356343, "grad_norm": 3.225334405899048, "learning_rate": 1.9533648170011808e-05, "loss": 0.2858, "step": 1103 }, { "epoch": 0.01955132887938477, "grad_norm": 2.517820358276367, "learning_rate": 1.9551357733175913e-05, "loss": 0.2229, "step": 1104 }, { "epoch": 0.0195690384164132, "grad_norm": 2.317817449569702, "learning_rate": 1.9569067296340025e-05, "loss": 0.1964, "step": 1105 }, { "epoch": 0.019586747953441626, "grad_norm": 3.5171759128570557, "learning_rate": 1.9586776859504133e-05, "loss": 0.234, "step": 1106 }, { "epoch": 0.019604457490470056, "grad_norm": 2.3346383571624756, "learning_rate": 1.960448642266824e-05, "loss": 0.2099, "step": 1107 }, { "epoch": 0.019622167027498482, "grad_norm": 3.114544630050659, "learning_rate": 1.962219598583235e-05, "loss": 0.2406, "step": 1108 }, { "epoch": 0.019639876564526912, "grad_norm": 3.5456738471984863, "learning_rate": 1.9639905548996458e-05, "loss": 0.3051, "step": 1109 }, { "epoch": 0.01965758610155534, "grad_norm": 2.8695716857910156, "learning_rate": 1.9657615112160566e-05, "loss": 0.2449, "step": 1110 }, { "epoch": 0.019675295638583768, "grad_norm": 2.651825189590454, "learning_rate": 1.9675324675324678e-05, "loss": 0.2288, "step": 1111 }, { "epoch": 0.019693005175612198, "grad_norm": 3.9352636337280273, "learning_rate": 1.9693034238488783e-05, "loss": 0.22, "step": 1112 }, { "epoch": 0.019710714712640624, "grad_norm": 3.2876126766204834, "learning_rate": 1.9710743801652895e-05, "loss": 0.2676, "step": 1113 }, { "epoch": 0.019728424249669054, "grad_norm": 2.614063024520874, "learning_rate": 1.9728453364817e-05, "loss": 0.209, "step": 1114 }, { "epoch": 0.01974613378669748, "grad_norm": 2.698777437210083, "learning_rate": 1.974616292798111e-05, "loss": 0.2405, "step": 1115 }, { "epoch": 0.01976384332372591, "grad_norm": 3.248406171798706, "learning_rate": 1.9763872491145216e-05, "loss": 0.2532, "step": 1116 }, { "epoch": 0.019781552860754337, "grad_norm": 2.992158889770508, "learning_rate": 1.9781582054309328e-05, "loss": 0.2667, "step": 1117 }, { "epoch": 0.019799262397782767, "grad_norm": 3.5051653385162354, "learning_rate": 1.9799291617473436e-05, "loss": 0.258, "step": 1118 }, { "epoch": 0.019816971934811193, "grad_norm": 4.286552429199219, "learning_rate": 1.9817001180637545e-05, "loss": 0.2492, "step": 1119 }, { "epoch": 0.019834681471839623, "grad_norm": 2.7200703620910645, "learning_rate": 1.9834710743801653e-05, "loss": 0.2089, "step": 1120 }, { "epoch": 0.01985239100886805, "grad_norm": 4.668503761291504, "learning_rate": 1.985242030696576e-05, "loss": 0.2802, "step": 1121 }, { "epoch": 0.01987010054589648, "grad_norm": 3.534067153930664, "learning_rate": 1.987012987012987e-05, "loss": 0.2593, "step": 1122 }, { "epoch": 0.01988781008292491, "grad_norm": 3.2121775150299072, "learning_rate": 1.988783943329398e-05, "loss": 0.2236, "step": 1123 }, { "epoch": 0.019905519619953335, "grad_norm": 2.272920846939087, "learning_rate": 1.9905548996458086e-05, "loss": 0.2399, "step": 1124 }, { "epoch": 0.019923229156981765, "grad_norm": 2.8292150497436523, "learning_rate": 1.9923258559622198e-05, "loss": 0.1993, "step": 1125 }, { "epoch": 0.01994093869401019, "grad_norm": 3.2729814052581787, "learning_rate": 1.9940968122786303e-05, "loss": 0.2472, "step": 1126 }, { "epoch": 0.01995864823103862, "grad_norm": 3.471618175506592, "learning_rate": 1.9958677685950414e-05, "loss": 0.2393, "step": 1127 }, { "epoch": 0.019976357768067048, "grad_norm": 3.0512280464172363, "learning_rate": 1.9976387249114523e-05, "loss": 0.2238, "step": 1128 }, { "epoch": 0.019994067305095477, "grad_norm": 3.4578771591186523, "learning_rate": 1.999409681227863e-05, "loss": 0.2209, "step": 1129 }, { "epoch": 0.020011776842123904, "grad_norm": 3.4134554862976074, "learning_rate": 2.001180637544274e-05, "loss": 0.252, "step": 1130 }, { "epoch": 0.020029486379152334, "grad_norm": 2.356433391571045, "learning_rate": 2.0029515938606848e-05, "loss": 0.2165, "step": 1131 }, { "epoch": 0.02004719591618076, "grad_norm": 3.5799450874328613, "learning_rate": 2.0047225501770956e-05, "loss": 0.1865, "step": 1132 }, { "epoch": 0.02006490545320919, "grad_norm": 3.309002161026001, "learning_rate": 2.0064935064935064e-05, "loss": 0.2151, "step": 1133 }, { "epoch": 0.020082614990237616, "grad_norm": 3.7405502796173096, "learning_rate": 2.0082644628099173e-05, "loss": 0.2263, "step": 1134 }, { "epoch": 0.020100324527266046, "grad_norm": 3.0783135890960693, "learning_rate": 2.0100354191263284e-05, "loss": 0.2391, "step": 1135 }, { "epoch": 0.020118034064294476, "grad_norm": 3.6866073608398438, "learning_rate": 2.011806375442739e-05, "loss": 0.1942, "step": 1136 }, { "epoch": 0.020135743601322902, "grad_norm": 2.6282131671905518, "learning_rate": 2.01357733175915e-05, "loss": 0.2347, "step": 1137 }, { "epoch": 0.020153453138351332, "grad_norm": 3.162929058074951, "learning_rate": 2.0153482880755606e-05, "loss": 0.2287, "step": 1138 }, { "epoch": 0.02017116267537976, "grad_norm": 2.528510332107544, "learning_rate": 2.0171192443919718e-05, "loss": 0.2324, "step": 1139 }, { "epoch": 0.02018887221240819, "grad_norm": 5.639596939086914, "learning_rate": 2.0188902007083826e-05, "loss": 0.1968, "step": 1140 }, { "epoch": 0.020206581749436615, "grad_norm": 3.2862064838409424, "learning_rate": 2.0206611570247934e-05, "loss": 0.2283, "step": 1141 }, { "epoch": 0.020224291286465045, "grad_norm": 2.8719239234924316, "learning_rate": 2.0224321133412043e-05, "loss": 0.2794, "step": 1142 }, { "epoch": 0.02024200082349347, "grad_norm": 3.520916223526001, "learning_rate": 2.024203069657615e-05, "loss": 0.2385, "step": 1143 }, { "epoch": 0.0202597103605219, "grad_norm": 3.1041159629821777, "learning_rate": 2.025974025974026e-05, "loss": 0.2433, "step": 1144 }, { "epoch": 0.020277419897550327, "grad_norm": 3.9419143199920654, "learning_rate": 2.027744982290437e-05, "loss": 0.2477, "step": 1145 }, { "epoch": 0.020295129434578757, "grad_norm": 3.2544147968292236, "learning_rate": 2.0295159386068476e-05, "loss": 0.2891, "step": 1146 }, { "epoch": 0.020312838971607183, "grad_norm": 3.252856492996216, "learning_rate": 2.0312868949232587e-05, "loss": 0.2371, "step": 1147 }, { "epoch": 0.020330548508635613, "grad_norm": 2.7551229000091553, "learning_rate": 2.0330578512396692e-05, "loss": 0.2248, "step": 1148 }, { "epoch": 0.02034825804566404, "grad_norm": 2.7347404956817627, "learning_rate": 2.0348288075560804e-05, "loss": 0.211, "step": 1149 }, { "epoch": 0.02036596758269247, "grad_norm": 2.4150774478912354, "learning_rate": 2.0365997638724912e-05, "loss": 0.2539, "step": 1150 }, { "epoch": 0.0203836771197209, "grad_norm": 3.048077344894409, "learning_rate": 2.038370720188902e-05, "loss": 0.2303, "step": 1151 }, { "epoch": 0.020401386656749326, "grad_norm": 3.750525712966919, "learning_rate": 2.040141676505313e-05, "loss": 0.2312, "step": 1152 }, { "epoch": 0.020419096193777755, "grad_norm": 2.797610282897949, "learning_rate": 2.0419126328217237e-05, "loss": 0.201, "step": 1153 }, { "epoch": 0.020436805730806182, "grad_norm": 3.3869082927703857, "learning_rate": 2.0436835891381346e-05, "loss": 0.247, "step": 1154 }, { "epoch": 0.02045451526783461, "grad_norm": 3.313248872756958, "learning_rate": 2.0454545454545454e-05, "loss": 0.249, "step": 1155 }, { "epoch": 0.020472224804863038, "grad_norm": 2.9977757930755615, "learning_rate": 2.0472255017709562e-05, "loss": 0.2315, "step": 1156 }, { "epoch": 0.020489934341891468, "grad_norm": 3.494988441467285, "learning_rate": 2.0489964580873674e-05, "loss": 0.2592, "step": 1157 }, { "epoch": 0.020507643878919894, "grad_norm": 4.442539691925049, "learning_rate": 2.050767414403778e-05, "loss": 0.2674, "step": 1158 }, { "epoch": 0.020525353415948324, "grad_norm": 2.723020315170288, "learning_rate": 2.052538370720189e-05, "loss": 0.2314, "step": 1159 }, { "epoch": 0.02054306295297675, "grad_norm": 1.8844321966171265, "learning_rate": 2.0543093270365996e-05, "loss": 0.2274, "step": 1160 }, { "epoch": 0.02056077249000518, "grad_norm": 3.2954373359680176, "learning_rate": 2.0560802833530107e-05, "loss": 0.2208, "step": 1161 }, { "epoch": 0.020578482027033607, "grad_norm": 2.350541830062866, "learning_rate": 2.0578512396694216e-05, "loss": 0.2541, "step": 1162 }, { "epoch": 0.020596191564062036, "grad_norm": 4.081457614898682, "learning_rate": 2.0596221959858324e-05, "loss": 0.2332, "step": 1163 }, { "epoch": 0.020613901101090466, "grad_norm": 2.7540481090545654, "learning_rate": 2.0613931523022432e-05, "loss": 0.2284, "step": 1164 }, { "epoch": 0.020631610638118893, "grad_norm": 2.37937331199646, "learning_rate": 2.063164108618654e-05, "loss": 0.2494, "step": 1165 }, { "epoch": 0.020649320175147323, "grad_norm": 2.6153030395507812, "learning_rate": 2.064935064935065e-05, "loss": 0.1966, "step": 1166 }, { "epoch": 0.02066702971217575, "grad_norm": 2.338740348815918, "learning_rate": 2.066706021251476e-05, "loss": 0.206, "step": 1167 }, { "epoch": 0.02068473924920418, "grad_norm": 2.9289770126342773, "learning_rate": 2.0684769775678865e-05, "loss": 0.2083, "step": 1168 }, { "epoch": 0.020702448786232605, "grad_norm": 2.3757050037384033, "learning_rate": 2.0702479338842977e-05, "loss": 0.1843, "step": 1169 }, { "epoch": 0.020720158323261035, "grad_norm": 2.5869665145874023, "learning_rate": 2.0720188902007082e-05, "loss": 0.1582, "step": 1170 }, { "epoch": 0.02073786786028946, "grad_norm": 3.306159019470215, "learning_rate": 2.0737898465171194e-05, "loss": 0.2231, "step": 1171 }, { "epoch": 0.02075557739731789, "grad_norm": 2.569404363632202, "learning_rate": 2.07556080283353e-05, "loss": 0.2021, "step": 1172 }, { "epoch": 0.020773286934346318, "grad_norm": 3.1835765838623047, "learning_rate": 2.077331759149941e-05, "loss": 0.2007, "step": 1173 }, { "epoch": 0.020790996471374747, "grad_norm": 3.3974545001983643, "learning_rate": 2.079102715466352e-05, "loss": 0.2126, "step": 1174 }, { "epoch": 0.020808706008403174, "grad_norm": 3.8742377758026123, "learning_rate": 2.0808736717827627e-05, "loss": 0.2562, "step": 1175 }, { "epoch": 0.020826415545431604, "grad_norm": 3.348170518875122, "learning_rate": 2.0826446280991735e-05, "loss": 0.2422, "step": 1176 }, { "epoch": 0.020844125082460033, "grad_norm": 3.9869461059570312, "learning_rate": 2.0844155844155844e-05, "loss": 0.1996, "step": 1177 }, { "epoch": 0.02086183461948846, "grad_norm": 3.0052707195281982, "learning_rate": 2.0861865407319952e-05, "loss": 0.2502, "step": 1178 }, { "epoch": 0.02087954415651689, "grad_norm": 2.304192066192627, "learning_rate": 2.0879574970484064e-05, "loss": 0.2159, "step": 1179 }, { "epoch": 0.020897253693545316, "grad_norm": 4.193364143371582, "learning_rate": 2.089728453364817e-05, "loss": 0.2519, "step": 1180 }, { "epoch": 0.020914963230573746, "grad_norm": 2.8145592212677, "learning_rate": 2.091499409681228e-05, "loss": 0.2868, "step": 1181 }, { "epoch": 0.020932672767602172, "grad_norm": 2.5838253498077393, "learning_rate": 2.0932703659976385e-05, "loss": 0.2043, "step": 1182 }, { "epoch": 0.020950382304630602, "grad_norm": 3.1374402046203613, "learning_rate": 2.0950413223140497e-05, "loss": 0.2599, "step": 1183 }, { "epoch": 0.02096809184165903, "grad_norm": 4.033368110656738, "learning_rate": 2.0968122786304605e-05, "loss": 0.2784, "step": 1184 }, { "epoch": 0.020985801378687458, "grad_norm": 3.116999864578247, "learning_rate": 2.0985832349468714e-05, "loss": 0.2565, "step": 1185 }, { "epoch": 0.021003510915715885, "grad_norm": 2.8315980434417725, "learning_rate": 2.1003541912632822e-05, "loss": 0.2494, "step": 1186 }, { "epoch": 0.021021220452744314, "grad_norm": 2.9687516689300537, "learning_rate": 2.102125147579693e-05, "loss": 0.1804, "step": 1187 }, { "epoch": 0.02103892998977274, "grad_norm": 4.031305313110352, "learning_rate": 2.103896103896104e-05, "loss": 0.2905, "step": 1188 }, { "epoch": 0.02105663952680117, "grad_norm": 3.013493299484253, "learning_rate": 2.1056670602125147e-05, "loss": 0.223, "step": 1189 }, { "epoch": 0.0210743490638296, "grad_norm": 2.7764534950256348, "learning_rate": 2.1074380165289255e-05, "loss": 0.2305, "step": 1190 }, { "epoch": 0.021092058600858027, "grad_norm": 3.1693928241729736, "learning_rate": 2.1092089728453367e-05, "loss": 0.2454, "step": 1191 }, { "epoch": 0.021109768137886457, "grad_norm": 4.210748195648193, "learning_rate": 2.1109799291617472e-05, "loss": 0.2331, "step": 1192 }, { "epoch": 0.021127477674914883, "grad_norm": 2.8249475955963135, "learning_rate": 2.1127508854781583e-05, "loss": 0.2298, "step": 1193 }, { "epoch": 0.021145187211943313, "grad_norm": 2.463437795639038, "learning_rate": 2.114521841794569e-05, "loss": 0.199, "step": 1194 }, { "epoch": 0.02116289674897174, "grad_norm": 2.9098033905029297, "learning_rate": 2.11629279811098e-05, "loss": 0.1999, "step": 1195 }, { "epoch": 0.02118060628600017, "grad_norm": 2.890873908996582, "learning_rate": 2.118063754427391e-05, "loss": 0.1807, "step": 1196 }, { "epoch": 0.021198315823028595, "grad_norm": 4.463277816772461, "learning_rate": 2.1198347107438017e-05, "loss": 0.17, "step": 1197 }, { "epoch": 0.021216025360057025, "grad_norm": 3.0301153659820557, "learning_rate": 2.1216056670602125e-05, "loss": 0.1978, "step": 1198 }, { "epoch": 0.02123373489708545, "grad_norm": 3.6958255767822266, "learning_rate": 2.1233766233766233e-05, "loss": 0.2458, "step": 1199 }, { "epoch": 0.02125144443411388, "grad_norm": 2.218797445297241, "learning_rate": 2.125147579693034e-05, "loss": 0.2114, "step": 1200 }, { "epoch": 0.021269153971142308, "grad_norm": 2.4781975746154785, "learning_rate": 2.1269185360094453e-05, "loss": 0.2176, "step": 1201 }, { "epoch": 0.021286863508170738, "grad_norm": 2.8767597675323486, "learning_rate": 2.1286894923258558e-05, "loss": 0.2047, "step": 1202 }, { "epoch": 0.021304573045199168, "grad_norm": 3.0902488231658936, "learning_rate": 2.130460448642267e-05, "loss": 0.1922, "step": 1203 }, { "epoch": 0.021322282582227594, "grad_norm": 2.488739490509033, "learning_rate": 2.1322314049586775e-05, "loss": 0.27, "step": 1204 }, { "epoch": 0.021339992119256024, "grad_norm": 2.49159574508667, "learning_rate": 2.1340023612750887e-05, "loss": 0.2142, "step": 1205 }, { "epoch": 0.02135770165628445, "grad_norm": 2.5178074836730957, "learning_rate": 2.1357733175914995e-05, "loss": 0.2292, "step": 1206 }, { "epoch": 0.02137541119331288, "grad_norm": 3.3364241123199463, "learning_rate": 2.1375442739079103e-05, "loss": 0.2455, "step": 1207 }, { "epoch": 0.021393120730341306, "grad_norm": 3.1266868114471436, "learning_rate": 2.139315230224321e-05, "loss": 0.2286, "step": 1208 }, { "epoch": 0.021410830267369736, "grad_norm": 2.1508562564849854, "learning_rate": 2.141086186540732e-05, "loss": 0.1629, "step": 1209 }, { "epoch": 0.021428539804398163, "grad_norm": 3.165841817855835, "learning_rate": 2.1428571428571428e-05, "loss": 0.1918, "step": 1210 }, { "epoch": 0.021446249341426592, "grad_norm": 3.133472442626953, "learning_rate": 2.1446280991735536e-05, "loss": 0.2708, "step": 1211 }, { "epoch": 0.02146395887845502, "grad_norm": 3.271463632583618, "learning_rate": 2.1463990554899645e-05, "loss": 0.2641, "step": 1212 }, { "epoch": 0.02148166841548345, "grad_norm": 3.0405666828155518, "learning_rate": 2.1481700118063756e-05, "loss": 0.2048, "step": 1213 }, { "epoch": 0.021499377952511875, "grad_norm": 3.2905259132385254, "learning_rate": 2.149940968122786e-05, "loss": 0.2594, "step": 1214 }, { "epoch": 0.021517087489540305, "grad_norm": 4.363830089569092, "learning_rate": 2.1517119244391973e-05, "loss": 0.2137, "step": 1215 }, { "epoch": 0.02153479702656873, "grad_norm": 2.7633039951324463, "learning_rate": 2.1534828807556078e-05, "loss": 0.2075, "step": 1216 }, { "epoch": 0.02155250656359716, "grad_norm": 2.976863384246826, "learning_rate": 2.155253837072019e-05, "loss": 0.2193, "step": 1217 }, { "epoch": 0.02157021610062559, "grad_norm": 3.2432079315185547, "learning_rate": 2.1570247933884298e-05, "loss": 0.2638, "step": 1218 }, { "epoch": 0.021587925637654017, "grad_norm": 2.502978563308716, "learning_rate": 2.1587957497048406e-05, "loss": 0.2166, "step": 1219 }, { "epoch": 0.021605635174682447, "grad_norm": 2.338869333267212, "learning_rate": 2.1605667060212515e-05, "loss": 0.2492, "step": 1220 }, { "epoch": 0.021623344711710873, "grad_norm": 2.4066271781921387, "learning_rate": 2.1623376623376623e-05, "loss": 0.203, "step": 1221 }, { "epoch": 0.021641054248739303, "grad_norm": 2.4647376537323, "learning_rate": 2.164108618654073e-05, "loss": 0.1913, "step": 1222 }, { "epoch": 0.02165876378576773, "grad_norm": 2.221907377243042, "learning_rate": 2.1658795749704843e-05, "loss": 0.2581, "step": 1223 }, { "epoch": 0.02167647332279616, "grad_norm": 2.248154640197754, "learning_rate": 2.1676505312868948e-05, "loss": 0.2098, "step": 1224 }, { "epoch": 0.021694182859824586, "grad_norm": 2.8117971420288086, "learning_rate": 2.169421487603306e-05, "loss": 0.1814, "step": 1225 }, { "epoch": 0.021711892396853016, "grad_norm": 3.1908583641052246, "learning_rate": 2.1711924439197165e-05, "loss": 0.199, "step": 1226 }, { "epoch": 0.021729601933881442, "grad_norm": 3.0651378631591797, "learning_rate": 2.1729634002361276e-05, "loss": 0.2128, "step": 1227 }, { "epoch": 0.021747311470909872, "grad_norm": 3.0012736320495605, "learning_rate": 2.174734356552538e-05, "loss": 0.2713, "step": 1228 }, { "epoch": 0.0217650210079383, "grad_norm": 2.475425958633423, "learning_rate": 2.1765053128689493e-05, "loss": 0.2289, "step": 1229 }, { "epoch": 0.021782730544966728, "grad_norm": 3.3496124744415283, "learning_rate": 2.17827626918536e-05, "loss": 0.2132, "step": 1230 }, { "epoch": 0.021800440081995158, "grad_norm": 3.2890708446502686, "learning_rate": 2.180047225501771e-05, "loss": 0.2006, "step": 1231 }, { "epoch": 0.021818149619023584, "grad_norm": 3.354532241821289, "learning_rate": 2.1818181818181818e-05, "loss": 0.2477, "step": 1232 }, { "epoch": 0.021835859156052014, "grad_norm": 3.051370859146118, "learning_rate": 2.1835891381345926e-05, "loss": 0.2076, "step": 1233 }, { "epoch": 0.02185356869308044, "grad_norm": 2.42962908744812, "learning_rate": 2.1853600944510034e-05, "loss": 0.2338, "step": 1234 }, { "epoch": 0.02187127823010887, "grad_norm": 2.7457587718963623, "learning_rate": 2.1871310507674146e-05, "loss": 0.2238, "step": 1235 }, { "epoch": 0.021888987767137297, "grad_norm": 3.335847854614258, "learning_rate": 2.188902007083825e-05, "loss": 0.243, "step": 1236 }, { "epoch": 0.021906697304165727, "grad_norm": 3.851555585861206, "learning_rate": 2.1906729634002363e-05, "loss": 0.2519, "step": 1237 }, { "epoch": 0.021924406841194153, "grad_norm": 2.671828508377075, "learning_rate": 2.1924439197166468e-05, "loss": 0.2275, "step": 1238 }, { "epoch": 0.021942116378222583, "grad_norm": 3.309086322784424, "learning_rate": 2.194214876033058e-05, "loss": 0.2901, "step": 1239 }, { "epoch": 0.02195982591525101, "grad_norm": 3.645740270614624, "learning_rate": 2.1959858323494688e-05, "loss": 0.1816, "step": 1240 }, { "epoch": 0.02197753545227944, "grad_norm": 2.911776304244995, "learning_rate": 2.1977567886658796e-05, "loss": 0.2122, "step": 1241 }, { "epoch": 0.021995244989307865, "grad_norm": 3.3773744106292725, "learning_rate": 2.1995277449822904e-05, "loss": 0.2799, "step": 1242 }, { "epoch": 0.022012954526336295, "grad_norm": 2.4071502685546875, "learning_rate": 2.2012987012987013e-05, "loss": 0.188, "step": 1243 }, { "epoch": 0.022030664063364725, "grad_norm": 3.6280269622802734, "learning_rate": 2.203069657615112e-05, "loss": 0.2325, "step": 1244 }, { "epoch": 0.02204837360039315, "grad_norm": 3.14595365524292, "learning_rate": 2.204840613931523e-05, "loss": 0.2381, "step": 1245 }, { "epoch": 0.02206608313742158, "grad_norm": 3.1829774379730225, "learning_rate": 2.2066115702479338e-05, "loss": 0.2377, "step": 1246 }, { "epoch": 0.022083792674450008, "grad_norm": 2.448495864868164, "learning_rate": 2.208382526564345e-05, "loss": 0.2409, "step": 1247 }, { "epoch": 0.022101502211478437, "grad_norm": 2.9201645851135254, "learning_rate": 2.2101534828807554e-05, "loss": 0.2471, "step": 1248 }, { "epoch": 0.022119211748506864, "grad_norm": 2.8361122608184814, "learning_rate": 2.2119244391971666e-05, "loss": 0.2396, "step": 1249 }, { "epoch": 0.022136921285535294, "grad_norm": 2.698026657104492, "learning_rate": 2.213695395513577e-05, "loss": 0.2153, "step": 1250 }, { "epoch": 0.02215463082256372, "grad_norm": 3.333526134490967, "learning_rate": 2.2154663518299883e-05, "loss": 0.2369, "step": 1251 }, { "epoch": 0.02217234035959215, "grad_norm": 5.451172351837158, "learning_rate": 2.217237308146399e-05, "loss": 0.2651, "step": 1252 }, { "epoch": 0.022190049896620576, "grad_norm": 2.8745527267456055, "learning_rate": 2.21900826446281e-05, "loss": 0.2302, "step": 1253 }, { "epoch": 0.022207759433649006, "grad_norm": 3.1012051105499268, "learning_rate": 2.2207792207792207e-05, "loss": 0.2432, "step": 1254 }, { "epoch": 0.022225468970677432, "grad_norm": 3.0492191314697266, "learning_rate": 2.2225501770956316e-05, "loss": 0.2401, "step": 1255 }, { "epoch": 0.022243178507705862, "grad_norm": 3.071268320083618, "learning_rate": 2.2243211334120424e-05, "loss": 0.2327, "step": 1256 }, { "epoch": 0.022260888044734292, "grad_norm": 2.5920636653900146, "learning_rate": 2.2260920897284536e-05, "loss": 0.2133, "step": 1257 }, { "epoch": 0.02227859758176272, "grad_norm": 4.811889171600342, "learning_rate": 2.227863046044864e-05, "loss": 0.2807, "step": 1258 }, { "epoch": 0.02229630711879115, "grad_norm": 2.640648126602173, "learning_rate": 2.2296340023612752e-05, "loss": 0.2576, "step": 1259 }, { "epoch": 0.022314016655819575, "grad_norm": 2.4451956748962402, "learning_rate": 2.2314049586776857e-05, "loss": 0.2633, "step": 1260 }, { "epoch": 0.022331726192848005, "grad_norm": 3.93778920173645, "learning_rate": 2.233175914994097e-05, "loss": 0.2466, "step": 1261 }, { "epoch": 0.02234943572987643, "grad_norm": 2.958994150161743, "learning_rate": 2.2349468713105074e-05, "loss": 0.2365, "step": 1262 }, { "epoch": 0.02236714526690486, "grad_norm": 3.269099712371826, "learning_rate": 2.2367178276269186e-05, "loss": 0.2376, "step": 1263 }, { "epoch": 0.022384854803933287, "grad_norm": 2.927625894546509, "learning_rate": 2.2384887839433294e-05, "loss": 0.2046, "step": 1264 }, { "epoch": 0.022402564340961717, "grad_norm": 2.431689977645874, "learning_rate": 2.2402597402597402e-05, "loss": 0.1934, "step": 1265 }, { "epoch": 0.022420273877990143, "grad_norm": 2.8889858722686768, "learning_rate": 2.242030696576151e-05, "loss": 0.2095, "step": 1266 }, { "epoch": 0.022437983415018573, "grad_norm": 4.170443534851074, "learning_rate": 2.243801652892562e-05, "loss": 0.219, "step": 1267 }, { "epoch": 0.022455692952047, "grad_norm": 3.66253662109375, "learning_rate": 2.2455726092089727e-05, "loss": 0.2605, "step": 1268 }, { "epoch": 0.02247340248907543, "grad_norm": 2.728379249572754, "learning_rate": 2.247343565525384e-05, "loss": 0.2735, "step": 1269 }, { "epoch": 0.02249111202610386, "grad_norm": 3.196887493133545, "learning_rate": 2.2491145218417947e-05, "loss": 0.1958, "step": 1270 }, { "epoch": 0.022508821563132286, "grad_norm": 3.2048227787017822, "learning_rate": 2.2508854781582056e-05, "loss": 0.2613, "step": 1271 }, { "epoch": 0.022526531100160715, "grad_norm": 3.097825765609741, "learning_rate": 2.2526564344746164e-05, "loss": 0.3268, "step": 1272 }, { "epoch": 0.022544240637189142, "grad_norm": 2.7878756523132324, "learning_rate": 2.2544273907910272e-05, "loss": 0.2506, "step": 1273 }, { "epoch": 0.02256195017421757, "grad_norm": 2.597989559173584, "learning_rate": 2.256198347107438e-05, "loss": 0.2277, "step": 1274 }, { "epoch": 0.022579659711245998, "grad_norm": 6.134589195251465, "learning_rate": 2.257969303423849e-05, "loss": 0.3529, "step": 1275 }, { "epoch": 0.022597369248274428, "grad_norm": 4.238209247589111, "learning_rate": 2.25974025974026e-05, "loss": 0.2506, "step": 1276 }, { "epoch": 0.022615078785302854, "grad_norm": 3.3241183757781982, "learning_rate": 2.2615112160566705e-05, "loss": 0.2228, "step": 1277 }, { "epoch": 0.022632788322331284, "grad_norm": 3.534719705581665, "learning_rate": 2.2632821723730817e-05, "loss": 0.2188, "step": 1278 }, { "epoch": 0.02265049785935971, "grad_norm": 2.9141154289245605, "learning_rate": 2.2650531286894925e-05, "loss": 0.2172, "step": 1279 }, { "epoch": 0.02266820739638814, "grad_norm": 2.2506103515625, "learning_rate": 2.2668240850059034e-05, "loss": 0.2145, "step": 1280 }, { "epoch": 0.022685916933416567, "grad_norm": 3.343284845352173, "learning_rate": 2.2685950413223142e-05, "loss": 0.275, "step": 1281 }, { "epoch": 0.022703626470444997, "grad_norm": 3.6248178482055664, "learning_rate": 2.270365997638725e-05, "loss": 0.2023, "step": 1282 }, { "epoch": 0.022721336007473426, "grad_norm": 2.4692769050598145, "learning_rate": 2.272136953955136e-05, "loss": 0.2006, "step": 1283 }, { "epoch": 0.022739045544501853, "grad_norm": 2.6897687911987305, "learning_rate": 2.2739079102715467e-05, "loss": 0.2392, "step": 1284 }, { "epoch": 0.022756755081530283, "grad_norm": 2.9966773986816406, "learning_rate": 2.2756788665879575e-05, "loss": 0.2454, "step": 1285 }, { "epoch": 0.02277446461855871, "grad_norm": 3.2094063758850098, "learning_rate": 2.2774498229043687e-05, "loss": 0.2399, "step": 1286 }, { "epoch": 0.02279217415558714, "grad_norm": 3.0764851570129395, "learning_rate": 2.2792207792207792e-05, "loss": 0.2343, "step": 1287 }, { "epoch": 0.022809883692615565, "grad_norm": 2.6950809955596924, "learning_rate": 2.2809917355371904e-05, "loss": 0.1929, "step": 1288 }, { "epoch": 0.022827593229643995, "grad_norm": 3.6675031185150146, "learning_rate": 2.282762691853601e-05, "loss": 0.2193, "step": 1289 }, { "epoch": 0.02284530276667242, "grad_norm": 2.4359259605407715, "learning_rate": 2.284533648170012e-05, "loss": 0.1664, "step": 1290 }, { "epoch": 0.02286301230370085, "grad_norm": 3.6481876373291016, "learning_rate": 2.286304604486423e-05, "loss": 0.2282, "step": 1291 }, { "epoch": 0.022880721840729278, "grad_norm": 2.9413163661956787, "learning_rate": 2.2880755608028337e-05, "loss": 0.2122, "step": 1292 }, { "epoch": 0.022898431377757707, "grad_norm": 2.5194759368896484, "learning_rate": 2.2898465171192445e-05, "loss": 0.241, "step": 1293 }, { "epoch": 0.022916140914786134, "grad_norm": 2.9643218517303467, "learning_rate": 2.2916174734356554e-05, "loss": 0.2346, "step": 1294 }, { "epoch": 0.022933850451814564, "grad_norm": 2.7493157386779785, "learning_rate": 2.2933884297520662e-05, "loss": 0.241, "step": 1295 }, { "epoch": 0.02295155998884299, "grad_norm": 2.2905519008636475, "learning_rate": 2.2951593860684774e-05, "loss": 0.2088, "step": 1296 }, { "epoch": 0.02296926952587142, "grad_norm": 3.208134889602661, "learning_rate": 2.296930342384888e-05, "loss": 0.2313, "step": 1297 }, { "epoch": 0.02298697906289985, "grad_norm": 2.3693478107452393, "learning_rate": 2.298701298701299e-05, "loss": 0.2246, "step": 1298 }, { "epoch": 0.023004688599928276, "grad_norm": 2.7767977714538574, "learning_rate": 2.3004722550177095e-05, "loss": 0.268, "step": 1299 }, { "epoch": 0.023022398136956706, "grad_norm": 2.9469542503356934, "learning_rate": 2.3022432113341207e-05, "loss": 0.2281, "step": 1300 }, { "epoch": 0.023040107673985132, "grad_norm": 3.202636241912842, "learning_rate": 2.3040141676505312e-05, "loss": 0.2347, "step": 1301 }, { "epoch": 0.023057817211013562, "grad_norm": 2.9055776596069336, "learning_rate": 2.3057851239669423e-05, "loss": 0.1794, "step": 1302 }, { "epoch": 0.02307552674804199, "grad_norm": 2.5960729122161865, "learning_rate": 2.3075560802833532e-05, "loss": 0.168, "step": 1303 }, { "epoch": 0.02309323628507042, "grad_norm": 3.30688738822937, "learning_rate": 2.309327036599764e-05, "loss": 0.2232, "step": 1304 }, { "epoch": 0.023110945822098845, "grad_norm": 2.440359115600586, "learning_rate": 2.311097992916175e-05, "loss": 0.2523, "step": 1305 }, { "epoch": 0.023128655359127274, "grad_norm": 3.634507417678833, "learning_rate": 2.3128689492325857e-05, "loss": 0.2567, "step": 1306 }, { "epoch": 0.0231463648961557, "grad_norm": 2.563175678253174, "learning_rate": 2.3146399055489965e-05, "loss": 0.1945, "step": 1307 }, { "epoch": 0.02316407443318413, "grad_norm": 2.790828227996826, "learning_rate": 2.3164108618654077e-05, "loss": 0.2109, "step": 1308 }, { "epoch": 0.023181783970212557, "grad_norm": 2.47916316986084, "learning_rate": 2.318181818181818e-05, "loss": 0.2179, "step": 1309 }, { "epoch": 0.023199493507240987, "grad_norm": 3.115570068359375, "learning_rate": 2.3199527744982293e-05, "loss": 0.2117, "step": 1310 }, { "epoch": 0.023217203044269417, "grad_norm": 6.515268802642822, "learning_rate": 2.3217237308146398e-05, "loss": 0.2875, "step": 1311 }, { "epoch": 0.023234912581297843, "grad_norm": 2.1368889808654785, "learning_rate": 2.323494687131051e-05, "loss": 0.185, "step": 1312 }, { "epoch": 0.023252622118326273, "grad_norm": 4.319392681121826, "learning_rate": 2.3252656434474618e-05, "loss": 0.2173, "step": 1313 }, { "epoch": 0.0232703316553547, "grad_norm": 2.5158333778381348, "learning_rate": 2.3270365997638727e-05, "loss": 0.2275, "step": 1314 }, { "epoch": 0.02328804119238313, "grad_norm": 4.347591400146484, "learning_rate": 2.3288075560802835e-05, "loss": 0.2262, "step": 1315 }, { "epoch": 0.023305750729411556, "grad_norm": 3.498663902282715, "learning_rate": 2.3305785123966943e-05, "loss": 0.2231, "step": 1316 }, { "epoch": 0.023323460266439985, "grad_norm": 2.8334314823150635, "learning_rate": 2.332349468713105e-05, "loss": 0.2599, "step": 1317 }, { "epoch": 0.023341169803468412, "grad_norm": 2.8764023780822754, "learning_rate": 2.334120425029516e-05, "loss": 0.2384, "step": 1318 }, { "epoch": 0.02335887934049684, "grad_norm": 3.9745898246765137, "learning_rate": 2.3358913813459268e-05, "loss": 0.2058, "step": 1319 }, { "epoch": 0.023376588877525268, "grad_norm": 2.5624094009399414, "learning_rate": 2.337662337662338e-05, "loss": 0.2217, "step": 1320 }, { "epoch": 0.023394298414553698, "grad_norm": 2.539064407348633, "learning_rate": 2.3394332939787485e-05, "loss": 0.2033, "step": 1321 }, { "epoch": 0.023412007951582124, "grad_norm": 2.312983751296997, "learning_rate": 2.3412042502951596e-05, "loss": 0.2099, "step": 1322 }, { "epoch": 0.023429717488610554, "grad_norm": 2.6267952919006348, "learning_rate": 2.34297520661157e-05, "loss": 0.2052, "step": 1323 }, { "epoch": 0.023447427025638984, "grad_norm": 3.597630262374878, "learning_rate": 2.3447461629279813e-05, "loss": 0.2725, "step": 1324 }, { "epoch": 0.02346513656266741, "grad_norm": 3.039414167404175, "learning_rate": 2.346517119244392e-05, "loss": 0.2131, "step": 1325 }, { "epoch": 0.02348284609969584, "grad_norm": 3.6861836910247803, "learning_rate": 2.348288075560803e-05, "loss": 0.2425, "step": 1326 }, { "epoch": 0.023500555636724266, "grad_norm": 2.532602548599243, "learning_rate": 2.3500590318772138e-05, "loss": 0.2783, "step": 1327 }, { "epoch": 0.023518265173752696, "grad_norm": 2.381554365158081, "learning_rate": 2.3518299881936246e-05, "loss": 0.2374, "step": 1328 }, { "epoch": 0.023535974710781123, "grad_norm": 3.1751019954681396, "learning_rate": 2.3536009445100355e-05, "loss": 0.2016, "step": 1329 }, { "epoch": 0.023553684247809552, "grad_norm": 2.3332512378692627, "learning_rate": 2.3553719008264466e-05, "loss": 0.227, "step": 1330 }, { "epoch": 0.02357139378483798, "grad_norm": 2.6536126136779785, "learning_rate": 2.357142857142857e-05, "loss": 0.2445, "step": 1331 }, { "epoch": 0.02358910332186641, "grad_norm": 2.0443246364593506, "learning_rate": 2.3589138134592683e-05, "loss": 0.2149, "step": 1332 }, { "epoch": 0.023606812858894835, "grad_norm": 2.708801746368408, "learning_rate": 2.3606847697756788e-05, "loss": 0.1957, "step": 1333 }, { "epoch": 0.023624522395923265, "grad_norm": 3.825857162475586, "learning_rate": 2.36245572609209e-05, "loss": 0.242, "step": 1334 }, { "epoch": 0.02364223193295169, "grad_norm": 2.7070419788360596, "learning_rate": 2.3642266824085005e-05, "loss": 0.1665, "step": 1335 }, { "epoch": 0.02365994146998012, "grad_norm": 3.0113022327423096, "learning_rate": 2.3659976387249116e-05, "loss": 0.2041, "step": 1336 }, { "epoch": 0.02367765100700855, "grad_norm": 2.0658411979675293, "learning_rate": 2.3677685950413225e-05, "loss": 0.2448, "step": 1337 }, { "epoch": 0.023695360544036977, "grad_norm": 3.0948166847229004, "learning_rate": 2.3695395513577333e-05, "loss": 0.2622, "step": 1338 }, { "epoch": 0.023713070081065407, "grad_norm": 3.9283149242401123, "learning_rate": 2.371310507674144e-05, "loss": 0.2253, "step": 1339 }, { "epoch": 0.023730779618093834, "grad_norm": 3.547339677810669, "learning_rate": 2.373081463990555e-05, "loss": 0.2255, "step": 1340 }, { "epoch": 0.023748489155122263, "grad_norm": 4.005363941192627, "learning_rate": 2.3748524203069658e-05, "loss": 0.2414, "step": 1341 }, { "epoch": 0.02376619869215069, "grad_norm": 4.164314270019531, "learning_rate": 2.376623376623377e-05, "loss": 0.2191, "step": 1342 }, { "epoch": 0.02378390822917912, "grad_norm": 2.5803842544555664, "learning_rate": 2.3783943329397874e-05, "loss": 0.1994, "step": 1343 }, { "epoch": 0.023801617766207546, "grad_norm": 3.2211966514587402, "learning_rate": 2.3801652892561986e-05, "loss": 0.2358, "step": 1344 }, { "epoch": 0.023819327303235976, "grad_norm": 3.042497396469116, "learning_rate": 2.381936245572609e-05, "loss": 0.282, "step": 1345 }, { "epoch": 0.023837036840264402, "grad_norm": 3.145017623901367, "learning_rate": 2.3837072018890203e-05, "loss": 0.2436, "step": 1346 }, { "epoch": 0.023854746377292832, "grad_norm": 2.6687264442443848, "learning_rate": 2.385478158205431e-05, "loss": 0.2472, "step": 1347 }, { "epoch": 0.02387245591432126, "grad_norm": 2.691990852355957, "learning_rate": 2.387249114521842e-05, "loss": 0.2402, "step": 1348 }, { "epoch": 0.023890165451349688, "grad_norm": 2.7659807205200195, "learning_rate": 2.3890200708382528e-05, "loss": 0.2242, "step": 1349 }, { "epoch": 0.023907874988378118, "grad_norm": 2.7492387294769287, "learning_rate": 2.3907910271546636e-05, "loss": 0.1725, "step": 1350 }, { "epoch": 0.023925584525406544, "grad_norm": 2.6352076530456543, "learning_rate": 2.3925619834710744e-05, "loss": 0.2408, "step": 1351 }, { "epoch": 0.023943294062434974, "grad_norm": 2.861091136932373, "learning_rate": 2.3943329397874856e-05, "loss": 0.2077, "step": 1352 }, { "epoch": 0.0239610035994634, "grad_norm": 2.711583375930786, "learning_rate": 2.396103896103896e-05, "loss": 0.21, "step": 1353 }, { "epoch": 0.02397871313649183, "grad_norm": 3.9012644290924072, "learning_rate": 2.3978748524203073e-05, "loss": 0.2421, "step": 1354 }, { "epoch": 0.023996422673520257, "grad_norm": 2.967503547668457, "learning_rate": 2.3996458087367178e-05, "loss": 0.2256, "step": 1355 }, { "epoch": 0.024014132210548687, "grad_norm": 2.1771347522735596, "learning_rate": 2.401416765053129e-05, "loss": 0.2298, "step": 1356 }, { "epoch": 0.024031841747577113, "grad_norm": 3.2618584632873535, "learning_rate": 2.4031877213695394e-05, "loss": 0.2627, "step": 1357 }, { "epoch": 0.024049551284605543, "grad_norm": 2.738431692123413, "learning_rate": 2.4049586776859506e-05, "loss": 0.2339, "step": 1358 }, { "epoch": 0.02406726082163397, "grad_norm": 2.5746748447418213, "learning_rate": 2.4067296340023614e-05, "loss": 0.221, "step": 1359 }, { "epoch": 0.0240849703586624, "grad_norm": 2.9693095684051514, "learning_rate": 2.4085005903187723e-05, "loss": 0.2059, "step": 1360 }, { "epoch": 0.024102679895690825, "grad_norm": 3.867568016052246, "learning_rate": 2.410271546635183e-05, "loss": 0.2806, "step": 1361 }, { "epoch": 0.024120389432719255, "grad_norm": 2.8165998458862305, "learning_rate": 2.412042502951594e-05, "loss": 0.2172, "step": 1362 }, { "epoch": 0.02413809896974768, "grad_norm": 3.306201457977295, "learning_rate": 2.4138134592680047e-05, "loss": 0.2713, "step": 1363 }, { "epoch": 0.02415580850677611, "grad_norm": 3.226264715194702, "learning_rate": 2.415584415584416e-05, "loss": 0.1769, "step": 1364 }, { "epoch": 0.02417351804380454, "grad_norm": 2.785763740539551, "learning_rate": 2.4173553719008264e-05, "loss": 0.2388, "step": 1365 }, { "epoch": 0.024191227580832968, "grad_norm": 2.900602102279663, "learning_rate": 2.4191263282172376e-05, "loss": 0.1938, "step": 1366 }, { "epoch": 0.024208937117861398, "grad_norm": 2.932274341583252, "learning_rate": 2.420897284533648e-05, "loss": 0.2843, "step": 1367 }, { "epoch": 0.024226646654889824, "grad_norm": 3.912766218185425, "learning_rate": 2.4226682408500592e-05, "loss": 0.2127, "step": 1368 }, { "epoch": 0.024244356191918254, "grad_norm": 3.1578903198242188, "learning_rate": 2.42443919716647e-05, "loss": 0.2471, "step": 1369 }, { "epoch": 0.02426206572894668, "grad_norm": 2.837794780731201, "learning_rate": 2.426210153482881e-05, "loss": 0.1896, "step": 1370 }, { "epoch": 0.02427977526597511, "grad_norm": 3.310556411743164, "learning_rate": 2.4279811097992917e-05, "loss": 0.2398, "step": 1371 }, { "epoch": 0.024297484803003536, "grad_norm": 2.3313000202178955, "learning_rate": 2.4297520661157026e-05, "loss": 0.2203, "step": 1372 }, { "epoch": 0.024315194340031966, "grad_norm": 3.4077799320220947, "learning_rate": 2.4315230224321134e-05, "loss": 0.2169, "step": 1373 }, { "epoch": 0.024332903877060393, "grad_norm": 2.6897757053375244, "learning_rate": 2.4332939787485242e-05, "loss": 0.1942, "step": 1374 }, { "epoch": 0.024350613414088822, "grad_norm": 3.194545030593872, "learning_rate": 2.435064935064935e-05, "loss": 0.2324, "step": 1375 }, { "epoch": 0.02436832295111725, "grad_norm": 4.151400089263916, "learning_rate": 2.4368358913813462e-05, "loss": 0.1765, "step": 1376 }, { "epoch": 0.02438603248814568, "grad_norm": 2.0715203285217285, "learning_rate": 2.4386068476977567e-05, "loss": 0.1884, "step": 1377 }, { "epoch": 0.02440374202517411, "grad_norm": 4.864234447479248, "learning_rate": 2.440377804014168e-05, "loss": 0.224, "step": 1378 }, { "epoch": 0.024421451562202535, "grad_norm": 2.633721113204956, "learning_rate": 2.4421487603305784e-05, "loss": 0.2706, "step": 1379 }, { "epoch": 0.024439161099230965, "grad_norm": 2.876042127609253, "learning_rate": 2.4439197166469896e-05, "loss": 0.1778, "step": 1380 }, { "epoch": 0.02445687063625939, "grad_norm": 2.867253065109253, "learning_rate": 2.4456906729634004e-05, "loss": 0.2995, "step": 1381 }, { "epoch": 0.02447458017328782, "grad_norm": 3.5423154830932617, "learning_rate": 2.4474616292798112e-05, "loss": 0.255, "step": 1382 }, { "epoch": 0.024492289710316247, "grad_norm": 3.6509711742401123, "learning_rate": 2.449232585596222e-05, "loss": 0.2089, "step": 1383 }, { "epoch": 0.024509999247344677, "grad_norm": 2.501493453979492, "learning_rate": 2.451003541912633e-05, "loss": 0.2302, "step": 1384 }, { "epoch": 0.024527708784373103, "grad_norm": 3.4441256523132324, "learning_rate": 2.4527744982290437e-05, "loss": 0.2399, "step": 1385 }, { "epoch": 0.024545418321401533, "grad_norm": 3.475132942199707, "learning_rate": 2.454545454545455e-05, "loss": 0.2612, "step": 1386 }, { "epoch": 0.02456312785842996, "grad_norm": 2.431737184524536, "learning_rate": 2.4563164108618654e-05, "loss": 0.1731, "step": 1387 }, { "epoch": 0.02458083739545839, "grad_norm": 2.8922808170318604, "learning_rate": 2.4580873671782765e-05, "loss": 0.2451, "step": 1388 }, { "epoch": 0.024598546932486816, "grad_norm": 3.0357260704040527, "learning_rate": 2.459858323494687e-05, "loss": 0.1656, "step": 1389 }, { "epoch": 0.024616256469515246, "grad_norm": 3.384870767593384, "learning_rate": 2.4616292798110982e-05, "loss": 0.2155, "step": 1390 }, { "epoch": 0.024633966006543676, "grad_norm": 2.599003791809082, "learning_rate": 2.4634002361275087e-05, "loss": 0.1944, "step": 1391 }, { "epoch": 0.024651675543572102, "grad_norm": 3.238501787185669, "learning_rate": 2.46517119244392e-05, "loss": 0.251, "step": 1392 }, { "epoch": 0.02466938508060053, "grad_norm": 2.4665277004241943, "learning_rate": 2.4669421487603307e-05, "loss": 0.1964, "step": 1393 }, { "epoch": 0.024687094617628958, "grad_norm": 2.0964107513427734, "learning_rate": 2.4687131050767415e-05, "loss": 0.2048, "step": 1394 }, { "epoch": 0.024704804154657388, "grad_norm": 3.2321979999542236, "learning_rate": 2.4704840613931524e-05, "loss": 0.274, "step": 1395 }, { "epoch": 0.024722513691685814, "grad_norm": 2.6262903213500977, "learning_rate": 2.4722550177095632e-05, "loss": 0.2189, "step": 1396 }, { "epoch": 0.024740223228714244, "grad_norm": 2.6378729343414307, "learning_rate": 2.474025974025974e-05, "loss": 0.2656, "step": 1397 }, { "epoch": 0.02475793276574267, "grad_norm": 3.266932964324951, "learning_rate": 2.4757969303423852e-05, "loss": 0.1891, "step": 1398 }, { "epoch": 0.0247756423027711, "grad_norm": 2.5977134704589844, "learning_rate": 2.4775678866587957e-05, "loss": 0.1958, "step": 1399 }, { "epoch": 0.024793351839799527, "grad_norm": 2.0251593589782715, "learning_rate": 2.479338842975207e-05, "loss": 0.1604, "step": 1400 }, { "epoch": 0.024811061376827957, "grad_norm": 2.4550375938415527, "learning_rate": 2.4811097992916174e-05, "loss": 0.1972, "step": 1401 }, { "epoch": 0.024828770913856383, "grad_norm": 3.1555557250976562, "learning_rate": 2.4828807556080285e-05, "loss": 0.1717, "step": 1402 }, { "epoch": 0.024846480450884813, "grad_norm": 2.1042582988739014, "learning_rate": 2.4846517119244394e-05, "loss": 0.182, "step": 1403 }, { "epoch": 0.024864189987913243, "grad_norm": 2.4925270080566406, "learning_rate": 2.4864226682408502e-05, "loss": 0.1887, "step": 1404 }, { "epoch": 0.02488189952494167, "grad_norm": 3.3956243991851807, "learning_rate": 2.488193624557261e-05, "loss": 0.2069, "step": 1405 }, { "epoch": 0.0248996090619701, "grad_norm": 3.6796090602874756, "learning_rate": 2.489964580873672e-05, "loss": 0.2222, "step": 1406 }, { "epoch": 0.024917318598998525, "grad_norm": 2.7411327362060547, "learning_rate": 2.4917355371900827e-05, "loss": 0.2202, "step": 1407 }, { "epoch": 0.024935028136026955, "grad_norm": 2.75634503364563, "learning_rate": 2.493506493506494e-05, "loss": 0.2061, "step": 1408 }, { "epoch": 0.02495273767305538, "grad_norm": 2.640225410461426, "learning_rate": 2.4952774498229043e-05, "loss": 0.2724, "step": 1409 }, { "epoch": 0.02497044721008381, "grad_norm": 1.8774627447128296, "learning_rate": 2.4970484061393155e-05, "loss": 0.2336, "step": 1410 }, { "epoch": 0.024988156747112238, "grad_norm": 3.348250389099121, "learning_rate": 2.498819362455726e-05, "loss": 0.2339, "step": 1411 }, { "epoch": 0.025005866284140667, "grad_norm": 1.5991325378417969, "learning_rate": 2.5005903187721372e-05, "loss": 0.2143, "step": 1412 }, { "epoch": 0.025023575821169094, "grad_norm": 2.2314348220825195, "learning_rate": 2.5023612750885477e-05, "loss": 0.2236, "step": 1413 }, { "epoch": 0.025041285358197524, "grad_norm": 3.015507936477661, "learning_rate": 2.504132231404959e-05, "loss": 0.2015, "step": 1414 }, { "epoch": 0.02505899489522595, "grad_norm": 2.0357120037078857, "learning_rate": 2.5059031877213697e-05, "loss": 0.1969, "step": 1415 }, { "epoch": 0.02507670443225438, "grad_norm": 3.0842528343200684, "learning_rate": 2.5076741440377805e-05, "loss": 0.235, "step": 1416 }, { "epoch": 0.02509441396928281, "grad_norm": 2.326018810272217, "learning_rate": 2.5094451003541913e-05, "loss": 0.2033, "step": 1417 }, { "epoch": 0.025112123506311236, "grad_norm": 2.136911153793335, "learning_rate": 2.511216056670602e-05, "loss": 0.1737, "step": 1418 }, { "epoch": 0.025129833043339666, "grad_norm": 3.926013946533203, "learning_rate": 2.512987012987013e-05, "loss": 0.2537, "step": 1419 }, { "epoch": 0.025147542580368092, "grad_norm": 2.037090539932251, "learning_rate": 2.514757969303424e-05, "loss": 0.2109, "step": 1420 }, { "epoch": 0.025165252117396522, "grad_norm": 3.5652780532836914, "learning_rate": 2.5165289256198347e-05, "loss": 0.2352, "step": 1421 }, { "epoch": 0.02518296165442495, "grad_norm": 2.5713536739349365, "learning_rate": 2.5182998819362458e-05, "loss": 0.2601, "step": 1422 }, { "epoch": 0.02520067119145338, "grad_norm": 2.443295955657959, "learning_rate": 2.5200708382526563e-05, "loss": 0.2294, "step": 1423 }, { "epoch": 0.025218380728481805, "grad_norm": 2.4936559200286865, "learning_rate": 2.5218417945690675e-05, "loss": 0.2257, "step": 1424 }, { "epoch": 0.025236090265510235, "grad_norm": 3.7178540229797363, "learning_rate": 2.5236127508854783e-05, "loss": 0.2568, "step": 1425 }, { "epoch": 0.02525379980253866, "grad_norm": 2.891442060470581, "learning_rate": 2.525383707201889e-05, "loss": 0.2998, "step": 1426 }, { "epoch": 0.02527150933956709, "grad_norm": 2.4450464248657227, "learning_rate": 2.5271546635183e-05, "loss": 0.1966, "step": 1427 }, { "epoch": 0.025289218876595517, "grad_norm": 3.173722982406616, "learning_rate": 2.5289256198347108e-05, "loss": 0.2436, "step": 1428 }, { "epoch": 0.025306928413623947, "grad_norm": 3.035489082336426, "learning_rate": 2.5306965761511216e-05, "loss": 0.1733, "step": 1429 }, { "epoch": 0.025324637950652377, "grad_norm": 2.8730766773223877, "learning_rate": 2.5324675324675325e-05, "loss": 0.1901, "step": 1430 }, { "epoch": 0.025342347487680803, "grad_norm": 2.6592307090759277, "learning_rate": 2.5342384887839433e-05, "loss": 0.2324, "step": 1431 }, { "epoch": 0.025360057024709233, "grad_norm": 3.2107839584350586, "learning_rate": 2.5360094451003545e-05, "loss": 0.2615, "step": 1432 }, { "epoch": 0.02537776656173766, "grad_norm": 2.908832550048828, "learning_rate": 2.537780401416765e-05, "loss": 0.2061, "step": 1433 }, { "epoch": 0.02539547609876609, "grad_norm": 2.566666841506958, "learning_rate": 2.539551357733176e-05, "loss": 0.2257, "step": 1434 }, { "epoch": 0.025413185635794516, "grad_norm": 3.0858471393585205, "learning_rate": 2.5413223140495866e-05, "loss": 0.2345, "step": 1435 }, { "epoch": 0.025430895172822945, "grad_norm": 2.725393772125244, "learning_rate": 2.5430932703659978e-05, "loss": 0.2783, "step": 1436 }, { "epoch": 0.025448604709851372, "grad_norm": 3.0685572624206543, "learning_rate": 2.5448642266824086e-05, "loss": 0.2516, "step": 1437 }, { "epoch": 0.0254663142468798, "grad_norm": 2.8400566577911377, "learning_rate": 2.5466351829988195e-05, "loss": 0.2308, "step": 1438 }, { "epoch": 0.025484023783908228, "grad_norm": 2.861359119415283, "learning_rate": 2.5484061393152303e-05, "loss": 0.2628, "step": 1439 }, { "epoch": 0.025501733320936658, "grad_norm": 5.089741230010986, "learning_rate": 2.550177095631641e-05, "loss": 0.2119, "step": 1440 }, { "epoch": 0.025519442857965084, "grad_norm": 3.952207565307617, "learning_rate": 2.551948051948052e-05, "loss": 0.2344, "step": 1441 }, { "epoch": 0.025537152394993514, "grad_norm": 3.656445026397705, "learning_rate": 2.553719008264463e-05, "loss": 0.2135, "step": 1442 }, { "epoch": 0.02555486193202194, "grad_norm": 2.322624444961548, "learning_rate": 2.5554899645808736e-05, "loss": 0.2623, "step": 1443 }, { "epoch": 0.02557257146905037, "grad_norm": 3.054271697998047, "learning_rate": 2.5572609208972848e-05, "loss": 0.1848, "step": 1444 }, { "epoch": 0.0255902810060788, "grad_norm": 2.277453899383545, "learning_rate": 2.5590318772136953e-05, "loss": 0.2186, "step": 1445 }, { "epoch": 0.025607990543107226, "grad_norm": 2.5769267082214355, "learning_rate": 2.5608028335301065e-05, "loss": 0.1691, "step": 1446 }, { "epoch": 0.025625700080135656, "grad_norm": 2.742615222930908, "learning_rate": 2.562573789846517e-05, "loss": 0.2339, "step": 1447 }, { "epoch": 0.025643409617164083, "grad_norm": 2.2701711654663086, "learning_rate": 2.564344746162928e-05, "loss": 0.2074, "step": 1448 }, { "epoch": 0.025661119154192513, "grad_norm": 2.8797080516815186, "learning_rate": 2.566115702479339e-05, "loss": 0.2045, "step": 1449 }, { "epoch": 0.02567882869122094, "grad_norm": 2.9553439617156982, "learning_rate": 2.5678866587957498e-05, "loss": 0.2612, "step": 1450 }, { "epoch": 0.02569653822824937, "grad_norm": 3.1474783420562744, "learning_rate": 2.5696576151121606e-05, "loss": 0.2699, "step": 1451 }, { "epoch": 0.025714247765277795, "grad_norm": 2.443730115890503, "learning_rate": 2.5714285714285714e-05, "loss": 0.2214, "step": 1452 }, { "epoch": 0.025731957302306225, "grad_norm": 2.1337368488311768, "learning_rate": 2.5731995277449823e-05, "loss": 0.1703, "step": 1453 }, { "epoch": 0.02574966683933465, "grad_norm": 2.87211275100708, "learning_rate": 2.5749704840613934e-05, "loss": 0.1678, "step": 1454 }, { "epoch": 0.02576737637636308, "grad_norm": 2.62957763671875, "learning_rate": 2.576741440377804e-05, "loss": 0.2097, "step": 1455 }, { "epoch": 0.025785085913391508, "grad_norm": 2.29287052154541, "learning_rate": 2.578512396694215e-05, "loss": 0.1892, "step": 1456 }, { "epoch": 0.025802795450419937, "grad_norm": 2.9328131675720215, "learning_rate": 2.5802833530106256e-05, "loss": 0.1955, "step": 1457 }, { "epoch": 0.025820504987448367, "grad_norm": 2.7516753673553467, "learning_rate": 2.5820543093270368e-05, "loss": 0.2268, "step": 1458 }, { "epoch": 0.025838214524476794, "grad_norm": 2.1402904987335205, "learning_rate": 2.5838252656434476e-05, "loss": 0.2178, "step": 1459 }, { "epoch": 0.025855924061505223, "grad_norm": 1.6841528415679932, "learning_rate": 2.5855962219598584e-05, "loss": 0.1862, "step": 1460 }, { "epoch": 0.02587363359853365, "grad_norm": 3.0051279067993164, "learning_rate": 2.5873671782762693e-05, "loss": 0.2597, "step": 1461 }, { "epoch": 0.02589134313556208, "grad_norm": 3.4539482593536377, "learning_rate": 2.58913813459268e-05, "loss": 0.188, "step": 1462 }, { "epoch": 0.025909052672590506, "grad_norm": 3.0631439685821533, "learning_rate": 2.590909090909091e-05, "loss": 0.2192, "step": 1463 }, { "epoch": 0.025926762209618936, "grad_norm": 3.2742230892181396, "learning_rate": 2.5926800472255018e-05, "loss": 0.2524, "step": 1464 }, { "epoch": 0.025944471746647362, "grad_norm": 3.4499807357788086, "learning_rate": 2.5944510035419126e-05, "loss": 0.2247, "step": 1465 }, { "epoch": 0.025962181283675792, "grad_norm": 2.488284111022949, "learning_rate": 2.5962219598583238e-05, "loss": 0.235, "step": 1466 }, { "epoch": 0.02597989082070422, "grad_norm": 2.410860300064087, "learning_rate": 2.5979929161747343e-05, "loss": 0.147, "step": 1467 }, { "epoch": 0.025997600357732648, "grad_norm": 2.743396759033203, "learning_rate": 2.5997638724911454e-05, "loss": 0.2076, "step": 1468 }, { "epoch": 0.026015309894761075, "grad_norm": 3.801486015319824, "learning_rate": 2.601534828807556e-05, "loss": 0.2044, "step": 1469 }, { "epoch": 0.026033019431789504, "grad_norm": 6.8612213134765625, "learning_rate": 2.603305785123967e-05, "loss": 0.2774, "step": 1470 }, { "epoch": 0.026050728968817934, "grad_norm": 2.568298578262329, "learning_rate": 2.605076741440378e-05, "loss": 0.206, "step": 1471 }, { "epoch": 0.02606843850584636, "grad_norm": 2.8535349369049072, "learning_rate": 2.6068476977567887e-05, "loss": 0.228, "step": 1472 }, { "epoch": 0.02608614804287479, "grad_norm": 3.592813491821289, "learning_rate": 2.6086186540731996e-05, "loss": 0.1908, "step": 1473 }, { "epoch": 0.026103857579903217, "grad_norm": 2.5624966621398926, "learning_rate": 2.6103896103896104e-05, "loss": 0.2289, "step": 1474 }, { "epoch": 0.026121567116931647, "grad_norm": 4.089432716369629, "learning_rate": 2.6121605667060212e-05, "loss": 0.2112, "step": 1475 }, { "epoch": 0.026139276653960073, "grad_norm": 2.6381709575653076, "learning_rate": 2.6139315230224324e-05, "loss": 0.2478, "step": 1476 }, { "epoch": 0.026156986190988503, "grad_norm": 2.816854953765869, "learning_rate": 2.615702479338843e-05, "loss": 0.1761, "step": 1477 }, { "epoch": 0.02617469572801693, "grad_norm": 2.3975367546081543, "learning_rate": 2.617473435655254e-05, "loss": 0.2134, "step": 1478 }, { "epoch": 0.02619240526504536, "grad_norm": 2.604034662246704, "learning_rate": 2.6192443919716646e-05, "loss": 0.2286, "step": 1479 }, { "epoch": 0.026210114802073785, "grad_norm": 3.290055751800537, "learning_rate": 2.6210153482880757e-05, "loss": 0.211, "step": 1480 }, { "epoch": 0.026227824339102215, "grad_norm": 1.8501187562942505, "learning_rate": 2.6227863046044866e-05, "loss": 0.2009, "step": 1481 }, { "epoch": 0.02624553387613064, "grad_norm": 1.9670050144195557, "learning_rate": 2.6245572609208974e-05, "loss": 0.2253, "step": 1482 }, { "epoch": 0.02626324341315907, "grad_norm": 3.8647401332855225, "learning_rate": 2.6263282172373082e-05, "loss": 0.236, "step": 1483 }, { "epoch": 0.0262809529501875, "grad_norm": 3.264671564102173, "learning_rate": 2.628099173553719e-05, "loss": 0.2371, "step": 1484 }, { "epoch": 0.026298662487215928, "grad_norm": 2.061224937438965, "learning_rate": 2.62987012987013e-05, "loss": 0.1653, "step": 1485 }, { "epoch": 0.026316372024244358, "grad_norm": 2.895435094833374, "learning_rate": 2.6316410861865407e-05, "loss": 0.242, "step": 1486 }, { "epoch": 0.026334081561272784, "grad_norm": 2.6153461933135986, "learning_rate": 2.6334120425029516e-05, "loss": 0.2201, "step": 1487 }, { "epoch": 0.026351791098301214, "grad_norm": 2.586974859237671, "learning_rate": 2.6351829988193627e-05, "loss": 0.2646, "step": 1488 }, { "epoch": 0.02636950063532964, "grad_norm": 2.6524643898010254, "learning_rate": 2.6369539551357732e-05, "loss": 0.2212, "step": 1489 }, { "epoch": 0.02638721017235807, "grad_norm": 1.9795809984207153, "learning_rate": 2.6387249114521844e-05, "loss": 0.1746, "step": 1490 }, { "epoch": 0.026404919709386496, "grad_norm": 2.3253681659698486, "learning_rate": 2.640495867768595e-05, "loss": 0.2123, "step": 1491 }, { "epoch": 0.026422629246414926, "grad_norm": 2.7136120796203613, "learning_rate": 2.642266824085006e-05, "loss": 0.2023, "step": 1492 }, { "epoch": 0.026440338783443353, "grad_norm": 2.888613700866699, "learning_rate": 2.644037780401417e-05, "loss": 0.2514, "step": 1493 }, { "epoch": 0.026458048320471782, "grad_norm": 3.0649914741516113, "learning_rate": 2.6458087367178277e-05, "loss": 0.2523, "step": 1494 }, { "epoch": 0.02647575785750021, "grad_norm": 2.842682123184204, "learning_rate": 2.6475796930342385e-05, "loss": 0.2, "step": 1495 }, { "epoch": 0.02649346739452864, "grad_norm": 2.5775558948516846, "learning_rate": 2.6493506493506494e-05, "loss": 0.2196, "step": 1496 }, { "epoch": 0.02651117693155707, "grad_norm": 2.7747347354888916, "learning_rate": 2.6511216056670602e-05, "loss": 0.2128, "step": 1497 }, { "epoch": 0.026528886468585495, "grad_norm": 2.7602930068969727, "learning_rate": 2.6528925619834714e-05, "loss": 0.2383, "step": 1498 }, { "epoch": 0.026546596005613925, "grad_norm": 2.926172971725464, "learning_rate": 2.654663518299882e-05, "loss": 0.2459, "step": 1499 }, { "epoch": 0.02656430554264235, "grad_norm": 3.143378973007202, "learning_rate": 2.656434474616293e-05, "loss": 0.2416, "step": 1500 }, { "epoch": 0.02658201507967078, "grad_norm": 1.7359278202056885, "learning_rate": 2.6582054309327035e-05, "loss": 0.2147, "step": 1501 }, { "epoch": 0.026599724616699207, "grad_norm": 3.3084537982940674, "learning_rate": 2.6599763872491147e-05, "loss": 0.2634, "step": 1502 }, { "epoch": 0.026617434153727637, "grad_norm": 2.8954150676727295, "learning_rate": 2.6617473435655252e-05, "loss": 0.2373, "step": 1503 }, { "epoch": 0.026635143690756063, "grad_norm": 2.839406967163086, "learning_rate": 2.6635182998819364e-05, "loss": 0.2439, "step": 1504 }, { "epoch": 0.026652853227784493, "grad_norm": 2.6155312061309814, "learning_rate": 2.6652892561983472e-05, "loss": 0.2103, "step": 1505 }, { "epoch": 0.02667056276481292, "grad_norm": 3.239893674850464, "learning_rate": 2.667060212514758e-05, "loss": 0.2382, "step": 1506 }, { "epoch": 0.02668827230184135, "grad_norm": 1.9205121994018555, "learning_rate": 2.668831168831169e-05, "loss": 0.1901, "step": 1507 }, { "epoch": 0.026705981838869776, "grad_norm": 3.302642583847046, "learning_rate": 2.6706021251475797e-05, "loss": 0.2075, "step": 1508 }, { "epoch": 0.026723691375898206, "grad_norm": 2.2234978675842285, "learning_rate": 2.6723730814639905e-05, "loss": 0.2241, "step": 1509 }, { "epoch": 0.026741400912926632, "grad_norm": 2.733546733856201, "learning_rate": 2.6741440377804017e-05, "loss": 0.2311, "step": 1510 }, { "epoch": 0.026759110449955062, "grad_norm": 2.7774624824523926, "learning_rate": 2.6759149940968122e-05, "loss": 0.2107, "step": 1511 }, { "epoch": 0.026776819986983492, "grad_norm": 3.4645514488220215, "learning_rate": 2.6776859504132234e-05, "loss": 0.2499, "step": 1512 }, { "epoch": 0.026794529524011918, "grad_norm": 2.057664155960083, "learning_rate": 2.679456906729634e-05, "loss": 0.1771, "step": 1513 }, { "epoch": 0.026812239061040348, "grad_norm": 2.7750868797302246, "learning_rate": 2.681227863046045e-05, "loss": 0.2163, "step": 1514 }, { "epoch": 0.026829948598068774, "grad_norm": 2.195906639099121, "learning_rate": 2.682998819362456e-05, "loss": 0.2463, "step": 1515 }, { "epoch": 0.026847658135097204, "grad_norm": 2.162440538406372, "learning_rate": 2.6847697756788667e-05, "loss": 0.1675, "step": 1516 }, { "epoch": 0.02686536767212563, "grad_norm": 2.9185423851013184, "learning_rate": 2.6865407319952775e-05, "loss": 0.1842, "step": 1517 }, { "epoch": 0.02688307720915406, "grad_norm": 2.935487747192383, "learning_rate": 2.6883116883116883e-05, "loss": 0.2711, "step": 1518 }, { "epoch": 0.026900786746182487, "grad_norm": 2.5214180946350098, "learning_rate": 2.6900826446280992e-05, "loss": 0.2526, "step": 1519 }, { "epoch": 0.026918496283210917, "grad_norm": 2.3453309535980225, "learning_rate": 2.69185360094451e-05, "loss": 0.2909, "step": 1520 }, { "epoch": 0.026936205820239343, "grad_norm": 2.6656882762908936, "learning_rate": 2.693624557260921e-05, "loss": 0.2113, "step": 1521 }, { "epoch": 0.026953915357267773, "grad_norm": 2.4502503871917725, "learning_rate": 2.695395513577332e-05, "loss": 0.2574, "step": 1522 }, { "epoch": 0.0269716248942962, "grad_norm": 2.8595783710479736, "learning_rate": 2.6971664698937425e-05, "loss": 0.2327, "step": 1523 }, { "epoch": 0.02698933443132463, "grad_norm": 2.871455669403076, "learning_rate": 2.6989374262101537e-05, "loss": 0.2183, "step": 1524 }, { "epoch": 0.02700704396835306, "grad_norm": 2.579617500305176, "learning_rate": 2.700708382526564e-05, "loss": 0.209, "step": 1525 }, { "epoch": 0.027024753505381485, "grad_norm": 2.4078867435455322, "learning_rate": 2.7024793388429753e-05, "loss": 0.1928, "step": 1526 }, { "epoch": 0.027042463042409915, "grad_norm": 2.335122585296631, "learning_rate": 2.704250295159386e-05, "loss": 0.206, "step": 1527 }, { "epoch": 0.02706017257943834, "grad_norm": 2.281550168991089, "learning_rate": 2.706021251475797e-05, "loss": 0.1917, "step": 1528 }, { "epoch": 0.02707788211646677, "grad_norm": 3.244192600250244, "learning_rate": 2.7077922077922078e-05, "loss": 0.2281, "step": 1529 }, { "epoch": 0.027095591653495198, "grad_norm": 2.0267021656036377, "learning_rate": 2.7095631641086187e-05, "loss": 0.1945, "step": 1530 }, { "epoch": 0.027113301190523627, "grad_norm": 2.641606092453003, "learning_rate": 2.7113341204250295e-05, "loss": 0.2067, "step": 1531 }, { "epoch": 0.027131010727552054, "grad_norm": 3.320889949798584, "learning_rate": 2.7131050767414407e-05, "loss": 0.2467, "step": 1532 }, { "epoch": 0.027148720264580484, "grad_norm": 2.187145471572876, "learning_rate": 2.714876033057851e-05, "loss": 0.3157, "step": 1533 }, { "epoch": 0.02716642980160891, "grad_norm": 2.358673572540283, "learning_rate": 2.7166469893742623e-05, "loss": 0.2422, "step": 1534 }, { "epoch": 0.02718413933863734, "grad_norm": 3.0629072189331055, "learning_rate": 2.7184179456906728e-05, "loss": 0.193, "step": 1535 }, { "epoch": 0.027201848875665766, "grad_norm": 2.4952332973480225, "learning_rate": 2.720188902007084e-05, "loss": 0.2059, "step": 1536 }, { "epoch": 0.027219558412694196, "grad_norm": 2.4978551864624023, "learning_rate": 2.7219598583234948e-05, "loss": 0.2308, "step": 1537 }, { "epoch": 0.027237267949722626, "grad_norm": 3.156177043914795, "learning_rate": 2.7237308146399056e-05, "loss": 0.1792, "step": 1538 }, { "epoch": 0.027254977486751052, "grad_norm": 3.025653600692749, "learning_rate": 2.7255017709563165e-05, "loss": 0.2149, "step": 1539 }, { "epoch": 0.027272687023779482, "grad_norm": 3.088665246963501, "learning_rate": 2.7272727272727273e-05, "loss": 0.2137, "step": 1540 }, { "epoch": 0.02729039656080791, "grad_norm": 3.090444803237915, "learning_rate": 2.729043683589138e-05, "loss": 0.1707, "step": 1541 }, { "epoch": 0.02730810609783634, "grad_norm": 2.5892980098724365, "learning_rate": 2.730814639905549e-05, "loss": 0.1879, "step": 1542 }, { "epoch": 0.027325815634864765, "grad_norm": 2.7601258754730225, "learning_rate": 2.7325855962219598e-05, "loss": 0.2314, "step": 1543 }, { "epoch": 0.027343525171893195, "grad_norm": 2.9066696166992188, "learning_rate": 2.734356552538371e-05, "loss": 0.2773, "step": 1544 }, { "epoch": 0.02736123470892162, "grad_norm": 3.7877919673919678, "learning_rate": 2.7361275088547815e-05, "loss": 0.2448, "step": 1545 }, { "epoch": 0.02737894424595005, "grad_norm": 2.311199903488159, "learning_rate": 2.7378984651711926e-05, "loss": 0.2448, "step": 1546 }, { "epoch": 0.027396653782978477, "grad_norm": 3.0726118087768555, "learning_rate": 2.739669421487603e-05, "loss": 0.209, "step": 1547 }, { "epoch": 0.027414363320006907, "grad_norm": 3.0673632621765137, "learning_rate": 2.7414403778040143e-05, "loss": 0.2284, "step": 1548 }, { "epoch": 0.027432072857035333, "grad_norm": 2.5011229515075684, "learning_rate": 2.743211334120425e-05, "loss": 0.1927, "step": 1549 }, { "epoch": 0.027449782394063763, "grad_norm": 1.9595084190368652, "learning_rate": 2.744982290436836e-05, "loss": 0.2379, "step": 1550 }, { "epoch": 0.027467491931092193, "grad_norm": 3.680929183959961, "learning_rate": 2.7467532467532468e-05, "loss": 0.246, "step": 1551 }, { "epoch": 0.02748520146812062, "grad_norm": 3.610175848007202, "learning_rate": 2.7485242030696576e-05, "loss": 0.2418, "step": 1552 }, { "epoch": 0.02750291100514905, "grad_norm": 2.568791627883911, "learning_rate": 2.7502951593860685e-05, "loss": 0.1972, "step": 1553 }, { "epoch": 0.027520620542177476, "grad_norm": 2.802333116531372, "learning_rate": 2.7520661157024796e-05, "loss": 0.2384, "step": 1554 }, { "epoch": 0.027538330079205905, "grad_norm": 2.7911019325256348, "learning_rate": 2.75383707201889e-05, "loss": 0.1605, "step": 1555 }, { "epoch": 0.027556039616234332, "grad_norm": 3.4265685081481934, "learning_rate": 2.7556080283353013e-05, "loss": 0.1939, "step": 1556 }, { "epoch": 0.02757374915326276, "grad_norm": 4.419022083282471, "learning_rate": 2.7573789846517118e-05, "loss": 0.2084, "step": 1557 }, { "epoch": 0.027591458690291188, "grad_norm": 2.524085521697998, "learning_rate": 2.759149940968123e-05, "loss": 0.2219, "step": 1558 }, { "epoch": 0.027609168227319618, "grad_norm": 5.670626640319824, "learning_rate": 2.7609208972845334e-05, "loss": 0.2223, "step": 1559 }, { "epoch": 0.027626877764348044, "grad_norm": 2.5162527561187744, "learning_rate": 2.7626918536009446e-05, "loss": 0.1965, "step": 1560 }, { "epoch": 0.027644587301376474, "grad_norm": 2.586879253387451, "learning_rate": 2.7644628099173554e-05, "loss": 0.2264, "step": 1561 }, { "epoch": 0.0276622968384049, "grad_norm": 2.1289007663726807, "learning_rate": 2.7662337662337663e-05, "loss": 0.2024, "step": 1562 }, { "epoch": 0.02768000637543333, "grad_norm": 2.4486796855926514, "learning_rate": 2.768004722550177e-05, "loss": 0.1615, "step": 1563 }, { "epoch": 0.02769771591246176, "grad_norm": 2.4948017597198486, "learning_rate": 2.769775678866588e-05, "loss": 0.2593, "step": 1564 }, { "epoch": 0.027715425449490187, "grad_norm": 2.17087459564209, "learning_rate": 2.7715466351829988e-05, "loss": 0.2194, "step": 1565 }, { "epoch": 0.027733134986518616, "grad_norm": 2.932187795639038, "learning_rate": 2.77331759149941e-05, "loss": 0.2131, "step": 1566 }, { "epoch": 0.027750844523547043, "grad_norm": 2.728731155395508, "learning_rate": 2.7750885478158204e-05, "loss": 0.2161, "step": 1567 }, { "epoch": 0.027768554060575473, "grad_norm": 2.294975519180298, "learning_rate": 2.7768595041322316e-05, "loss": 0.2029, "step": 1568 }, { "epoch": 0.0277862635976039, "grad_norm": 2.010667085647583, "learning_rate": 2.778630460448642e-05, "loss": 0.1823, "step": 1569 }, { "epoch": 0.02780397313463233, "grad_norm": 3.649599075317383, "learning_rate": 2.7804014167650533e-05, "loss": 0.233, "step": 1570 }, { "epoch": 0.027821682671660755, "grad_norm": 2.2440974712371826, "learning_rate": 2.782172373081464e-05, "loss": 0.1992, "step": 1571 }, { "epoch": 0.027839392208689185, "grad_norm": 1.812654972076416, "learning_rate": 2.783943329397875e-05, "loss": 0.1892, "step": 1572 }, { "epoch": 0.02785710174571761, "grad_norm": 5.444638729095459, "learning_rate": 2.7857142857142858e-05, "loss": 0.223, "step": 1573 }, { "epoch": 0.02787481128274604, "grad_norm": 4.901094913482666, "learning_rate": 2.7874852420306966e-05, "loss": 0.2927, "step": 1574 }, { "epoch": 0.027892520819774468, "grad_norm": 4.9424262046813965, "learning_rate": 2.7892561983471074e-05, "loss": 0.2329, "step": 1575 }, { "epoch": 0.027910230356802897, "grad_norm": 2.725874662399292, "learning_rate": 2.7910271546635183e-05, "loss": 0.1883, "step": 1576 }, { "epoch": 0.027927939893831327, "grad_norm": 2.3230717182159424, "learning_rate": 2.792798110979929e-05, "loss": 0.2272, "step": 1577 }, { "epoch": 0.027945649430859754, "grad_norm": 2.1880428791046143, "learning_rate": 2.7945690672963403e-05, "loss": 0.2143, "step": 1578 }, { "epoch": 0.027963358967888183, "grad_norm": 2.757606267929077, "learning_rate": 2.7963400236127507e-05, "loss": 0.2205, "step": 1579 }, { "epoch": 0.02798106850491661, "grad_norm": 2.6836090087890625, "learning_rate": 2.798110979929162e-05, "loss": 0.1889, "step": 1580 }, { "epoch": 0.02799877804194504, "grad_norm": 3.378918409347534, "learning_rate": 2.7998819362455724e-05, "loss": 0.2342, "step": 1581 }, { "epoch": 0.028016487578973466, "grad_norm": 3.170837640762329, "learning_rate": 2.8016528925619836e-05, "loss": 0.2091, "step": 1582 }, { "epoch": 0.028034197116001896, "grad_norm": 2.8039538860321045, "learning_rate": 2.8034238488783944e-05, "loss": 0.1756, "step": 1583 }, { "epoch": 0.028051906653030322, "grad_norm": 3.469496250152588, "learning_rate": 2.8051948051948052e-05, "loss": 0.2042, "step": 1584 }, { "epoch": 0.028069616190058752, "grad_norm": 3.1182453632354736, "learning_rate": 2.806965761511216e-05, "loss": 0.2284, "step": 1585 }, { "epoch": 0.02808732572708718, "grad_norm": 4.890115261077881, "learning_rate": 2.808736717827627e-05, "loss": 0.2428, "step": 1586 }, { "epoch": 0.02810503526411561, "grad_norm": 2.9341378211975098, "learning_rate": 2.8105076741440377e-05, "loss": 0.2128, "step": 1587 }, { "epoch": 0.028122744801144035, "grad_norm": 2.8342437744140625, "learning_rate": 2.812278630460449e-05, "loss": 0.1914, "step": 1588 }, { "epoch": 0.028140454338172464, "grad_norm": 2.6269185543060303, "learning_rate": 2.8140495867768594e-05, "loss": 0.2734, "step": 1589 }, { "epoch": 0.02815816387520089, "grad_norm": 2.503126859664917, "learning_rate": 2.8158205430932706e-05, "loss": 0.2327, "step": 1590 }, { "epoch": 0.02817587341222932, "grad_norm": 2.43961763381958, "learning_rate": 2.817591499409681e-05, "loss": 0.2474, "step": 1591 }, { "epoch": 0.02819358294925775, "grad_norm": 2.6676273345947266, "learning_rate": 2.8193624557260922e-05, "loss": 0.1961, "step": 1592 }, { "epoch": 0.028211292486286177, "grad_norm": 2.548938751220703, "learning_rate": 2.8211334120425027e-05, "loss": 0.2397, "step": 1593 }, { "epoch": 0.028229002023314607, "grad_norm": 2.348769187927246, "learning_rate": 2.822904368358914e-05, "loss": 0.231, "step": 1594 }, { "epoch": 0.028246711560343033, "grad_norm": 1.9244883060455322, "learning_rate": 2.8246753246753247e-05, "loss": 0.2558, "step": 1595 }, { "epoch": 0.028264421097371463, "grad_norm": 4.005422592163086, "learning_rate": 2.8264462809917356e-05, "loss": 0.2252, "step": 1596 }, { "epoch": 0.02828213063439989, "grad_norm": 2.747918128967285, "learning_rate": 2.8282172373081464e-05, "loss": 0.2694, "step": 1597 }, { "epoch": 0.02829984017142832, "grad_norm": 2.683718681335449, "learning_rate": 2.8299881936245572e-05, "loss": 0.2428, "step": 1598 }, { "epoch": 0.028317549708456746, "grad_norm": 2.0314273834228516, "learning_rate": 2.831759149940968e-05, "loss": 0.2031, "step": 1599 }, { "epoch": 0.028335259245485175, "grad_norm": 2.0518176555633545, "learning_rate": 2.8335301062573792e-05, "loss": 0.2117, "step": 1600 }, { "epoch": 0.028352968782513602, "grad_norm": 2.362600088119507, "learning_rate": 2.8353010625737897e-05, "loss": 0.1648, "step": 1601 }, { "epoch": 0.02837067831954203, "grad_norm": 2.3037047386169434, "learning_rate": 2.837072018890201e-05, "loss": 0.1909, "step": 1602 }, { "epoch": 0.028388387856570458, "grad_norm": 2.535851240158081, "learning_rate": 2.8388429752066114e-05, "loss": 0.1601, "step": 1603 }, { "epoch": 0.028406097393598888, "grad_norm": 2.400670289993286, "learning_rate": 2.8406139315230225e-05, "loss": 0.1981, "step": 1604 }, { "epoch": 0.028423806930627318, "grad_norm": 3.9627768993377686, "learning_rate": 2.8423848878394334e-05, "loss": 0.2304, "step": 1605 }, { "epoch": 0.028441516467655744, "grad_norm": 1.8717331886291504, "learning_rate": 2.8441558441558442e-05, "loss": 0.2213, "step": 1606 }, { "epoch": 0.028459226004684174, "grad_norm": 2.562387228012085, "learning_rate": 2.845926800472255e-05, "loss": 0.1816, "step": 1607 }, { "epoch": 0.0284769355417126, "grad_norm": 4.206682205200195, "learning_rate": 2.847697756788666e-05, "loss": 0.237, "step": 1608 }, { "epoch": 0.02849464507874103, "grad_norm": 2.6953680515289307, "learning_rate": 2.8494687131050767e-05, "loss": 0.2215, "step": 1609 }, { "epoch": 0.028512354615769456, "grad_norm": 2.7340075969696045, "learning_rate": 2.851239669421488e-05, "loss": 0.223, "step": 1610 }, { "epoch": 0.028530064152797886, "grad_norm": 2.6246461868286133, "learning_rate": 2.8530106257378984e-05, "loss": 0.2613, "step": 1611 }, { "epoch": 0.028547773689826313, "grad_norm": 2.67979097366333, "learning_rate": 2.8547815820543095e-05, "loss": 0.2321, "step": 1612 }, { "epoch": 0.028565483226854742, "grad_norm": 2.1460344791412354, "learning_rate": 2.85655253837072e-05, "loss": 0.1258, "step": 1613 }, { "epoch": 0.02858319276388317, "grad_norm": 1.745981216430664, "learning_rate": 2.8583234946871312e-05, "loss": 0.2021, "step": 1614 }, { "epoch": 0.0286009023009116, "grad_norm": 2.682851552963257, "learning_rate": 2.8600944510035417e-05, "loss": 0.2021, "step": 1615 }, { "epoch": 0.028618611837940025, "grad_norm": 2.3288187980651855, "learning_rate": 2.861865407319953e-05, "loss": 0.2094, "step": 1616 }, { "epoch": 0.028636321374968455, "grad_norm": 2.1817426681518555, "learning_rate": 2.8636363636363637e-05, "loss": 0.1981, "step": 1617 }, { "epoch": 0.028654030911996885, "grad_norm": 2.937725067138672, "learning_rate": 2.8654073199527745e-05, "loss": 0.2241, "step": 1618 }, { "epoch": 0.02867174044902531, "grad_norm": 1.7908066511154175, "learning_rate": 2.8671782762691854e-05, "loss": 0.2187, "step": 1619 }, { "epoch": 0.02868944998605374, "grad_norm": 2.9421894550323486, "learning_rate": 2.8689492325855962e-05, "loss": 0.1623, "step": 1620 }, { "epoch": 0.028707159523082167, "grad_norm": 1.940549612045288, "learning_rate": 2.870720188902007e-05, "loss": 0.1761, "step": 1621 }, { "epoch": 0.028724869060110597, "grad_norm": 2.646627426147461, "learning_rate": 2.8724911452184182e-05, "loss": 0.1988, "step": 1622 }, { "epoch": 0.028742578597139024, "grad_norm": 2.2619082927703857, "learning_rate": 2.8742621015348287e-05, "loss": 0.2016, "step": 1623 }, { "epoch": 0.028760288134167453, "grad_norm": 5.749508380889893, "learning_rate": 2.87603305785124e-05, "loss": 0.2244, "step": 1624 }, { "epoch": 0.02877799767119588, "grad_norm": 2.3819446563720703, "learning_rate": 2.8778040141676503e-05, "loss": 0.226, "step": 1625 }, { "epoch": 0.02879570720822431, "grad_norm": 2.2078967094421387, "learning_rate": 2.8795749704840615e-05, "loss": 0.1885, "step": 1626 }, { "epoch": 0.028813416745252736, "grad_norm": 2.786677122116089, "learning_rate": 2.8813459268004723e-05, "loss": 0.1724, "step": 1627 }, { "epoch": 0.028831126282281166, "grad_norm": 4.038115978240967, "learning_rate": 2.8831168831168832e-05, "loss": 0.2421, "step": 1628 }, { "epoch": 0.028848835819309592, "grad_norm": 3.0823402404785156, "learning_rate": 2.884887839433294e-05, "loss": 0.2268, "step": 1629 }, { "epoch": 0.028866545356338022, "grad_norm": 2.3979434967041016, "learning_rate": 2.886658795749705e-05, "loss": 0.2331, "step": 1630 }, { "epoch": 0.028884254893366452, "grad_norm": 3.05082106590271, "learning_rate": 2.8884297520661157e-05, "loss": 0.2021, "step": 1631 }, { "epoch": 0.028901964430394878, "grad_norm": 2.285433530807495, "learning_rate": 2.8902007083825265e-05, "loss": 0.2419, "step": 1632 }, { "epoch": 0.028919673967423308, "grad_norm": 2.059668779373169, "learning_rate": 2.8919716646989373e-05, "loss": 0.1508, "step": 1633 }, { "epoch": 0.028937383504451734, "grad_norm": 2.7970736026763916, "learning_rate": 2.8937426210153485e-05, "loss": 0.2777, "step": 1634 }, { "epoch": 0.028955093041480164, "grad_norm": 2.7912652492523193, "learning_rate": 2.895513577331759e-05, "loss": 0.193, "step": 1635 }, { "epoch": 0.02897280257850859, "grad_norm": 2.67448091506958, "learning_rate": 2.89728453364817e-05, "loss": 0.2178, "step": 1636 }, { "epoch": 0.02899051211553702, "grad_norm": 3.4983246326446533, "learning_rate": 2.8990554899645807e-05, "loss": 0.2875, "step": 1637 }, { "epoch": 0.029008221652565447, "grad_norm": 2.492966413497925, "learning_rate": 2.9008264462809918e-05, "loss": 0.2412, "step": 1638 }, { "epoch": 0.029025931189593877, "grad_norm": 2.4299747943878174, "learning_rate": 2.9025974025974027e-05, "loss": 0.2201, "step": 1639 }, { "epoch": 0.029043640726622303, "grad_norm": 2.9760987758636475, "learning_rate": 2.9043683589138135e-05, "loss": 0.1835, "step": 1640 }, { "epoch": 0.029061350263650733, "grad_norm": 1.7022783756256104, "learning_rate": 2.9061393152302243e-05, "loss": 0.1902, "step": 1641 }, { "epoch": 0.02907905980067916, "grad_norm": 2.3024892807006836, "learning_rate": 2.907910271546635e-05, "loss": 0.2258, "step": 1642 }, { "epoch": 0.02909676933770759, "grad_norm": 2.4476890563964844, "learning_rate": 2.909681227863046e-05, "loss": 0.2078, "step": 1643 }, { "epoch": 0.02911447887473602, "grad_norm": 3.883331060409546, "learning_rate": 2.911452184179457e-05, "loss": 0.229, "step": 1644 }, { "epoch": 0.029132188411764445, "grad_norm": 2.2359886169433594, "learning_rate": 2.9132231404958676e-05, "loss": 0.2279, "step": 1645 }, { "epoch": 0.029149897948792875, "grad_norm": 2.014756917953491, "learning_rate": 2.9149940968122788e-05, "loss": 0.1933, "step": 1646 }, { "epoch": 0.0291676074858213, "grad_norm": 2.7747812271118164, "learning_rate": 2.9167650531286893e-05, "loss": 0.1921, "step": 1647 }, { "epoch": 0.02918531702284973, "grad_norm": 2.1722395420074463, "learning_rate": 2.9185360094451005e-05, "loss": 0.2149, "step": 1648 }, { "epoch": 0.029203026559878158, "grad_norm": 4.396351337432861, "learning_rate": 2.920306965761511e-05, "loss": 0.2205, "step": 1649 }, { "epoch": 0.029220736096906588, "grad_norm": 4.507271766662598, "learning_rate": 2.922077922077922e-05, "loss": 0.2278, "step": 1650 }, { "epoch": 0.029238445633935014, "grad_norm": 1.8857364654541016, "learning_rate": 2.923848878394333e-05, "loss": 0.2039, "step": 1651 }, { "epoch": 0.029256155170963444, "grad_norm": 2.570111036300659, "learning_rate": 2.9256198347107438e-05, "loss": 0.2429, "step": 1652 }, { "epoch": 0.02927386470799187, "grad_norm": 2.5733706951141357, "learning_rate": 2.9273907910271546e-05, "loss": 0.2019, "step": 1653 }, { "epoch": 0.0292915742450203, "grad_norm": 2.01139235496521, "learning_rate": 2.9291617473435655e-05, "loss": 0.1802, "step": 1654 }, { "epoch": 0.029309283782048726, "grad_norm": 3.283074378967285, "learning_rate": 2.9309327036599763e-05, "loss": 0.2295, "step": 1655 }, { "epoch": 0.029326993319077156, "grad_norm": 3.526217222213745, "learning_rate": 2.9327036599763875e-05, "loss": 0.2051, "step": 1656 }, { "epoch": 0.029344702856105583, "grad_norm": 2.3609061241149902, "learning_rate": 2.934474616292798e-05, "loss": 0.2237, "step": 1657 }, { "epoch": 0.029362412393134012, "grad_norm": 2.0373942852020264, "learning_rate": 2.936245572609209e-05, "loss": 0.1877, "step": 1658 }, { "epoch": 0.029380121930162442, "grad_norm": 2.4823553562164307, "learning_rate": 2.9380165289256196e-05, "loss": 0.1877, "step": 1659 }, { "epoch": 0.02939783146719087, "grad_norm": 2.747537136077881, "learning_rate": 2.9397874852420308e-05, "loss": 0.2087, "step": 1660 }, { "epoch": 0.0294155410042193, "grad_norm": 2.628568172454834, "learning_rate": 2.9415584415584416e-05, "loss": 0.197, "step": 1661 }, { "epoch": 0.029433250541247725, "grad_norm": 2.182156801223755, "learning_rate": 2.9433293978748525e-05, "loss": 0.2512, "step": 1662 }, { "epoch": 0.029450960078276155, "grad_norm": 2.4220845699310303, "learning_rate": 2.9451003541912633e-05, "loss": 0.2169, "step": 1663 }, { "epoch": 0.02946866961530458, "grad_norm": 2.6988325119018555, "learning_rate": 2.946871310507674e-05, "loss": 0.2452, "step": 1664 }, { "epoch": 0.02948637915233301, "grad_norm": 2.11460542678833, "learning_rate": 2.948642266824085e-05, "loss": 0.2314, "step": 1665 }, { "epoch": 0.029504088689361437, "grad_norm": 2.1446714401245117, "learning_rate": 2.950413223140496e-05, "loss": 0.2103, "step": 1666 }, { "epoch": 0.029521798226389867, "grad_norm": 3.5513203144073486, "learning_rate": 2.9521841794569066e-05, "loss": 0.208, "step": 1667 }, { "epoch": 0.029539507763418293, "grad_norm": 2.2129967212677, "learning_rate": 2.9539551357733178e-05, "loss": 0.2442, "step": 1668 }, { "epoch": 0.029557217300446723, "grad_norm": 3.1769046783447266, "learning_rate": 2.9557260920897283e-05, "loss": 0.2233, "step": 1669 }, { "epoch": 0.02957492683747515, "grad_norm": 2.8578672409057617, "learning_rate": 2.9574970484061394e-05, "loss": 0.1663, "step": 1670 }, { "epoch": 0.02959263637450358, "grad_norm": 2.23781681060791, "learning_rate": 2.95926800472255e-05, "loss": 0.1915, "step": 1671 }, { "epoch": 0.02961034591153201, "grad_norm": 2.3750791549682617, "learning_rate": 2.961038961038961e-05, "loss": 0.181, "step": 1672 }, { "epoch": 0.029628055448560436, "grad_norm": 2.7913296222686768, "learning_rate": 2.962809917355372e-05, "loss": 0.2483, "step": 1673 }, { "epoch": 0.029645764985588866, "grad_norm": 2.36212158203125, "learning_rate": 2.9645808736717828e-05, "loss": 0.1796, "step": 1674 }, { "epoch": 0.029663474522617292, "grad_norm": 2.819007158279419, "learning_rate": 2.9663518299881936e-05, "loss": 0.1917, "step": 1675 }, { "epoch": 0.02968118405964572, "grad_norm": 3.012659788131714, "learning_rate": 2.9681227863046044e-05, "loss": 0.2089, "step": 1676 }, { "epoch": 0.029698893596674148, "grad_norm": 2.764552116394043, "learning_rate": 2.9698937426210153e-05, "loss": 0.1869, "step": 1677 }, { "epoch": 0.029716603133702578, "grad_norm": 2.4785406589508057, "learning_rate": 2.9716646989374264e-05, "loss": 0.1518, "step": 1678 }, { "epoch": 0.029734312670731004, "grad_norm": 2.360736846923828, "learning_rate": 2.973435655253837e-05, "loss": 0.2473, "step": 1679 }, { "epoch": 0.029752022207759434, "grad_norm": 3.0762171745300293, "learning_rate": 2.975206611570248e-05, "loss": 0.2328, "step": 1680 }, { "epoch": 0.02976973174478786, "grad_norm": 2.0487608909606934, "learning_rate": 2.9769775678866586e-05, "loss": 0.2447, "step": 1681 }, { "epoch": 0.02978744128181629, "grad_norm": 2.9326839447021484, "learning_rate": 2.9787485242030698e-05, "loss": 0.2306, "step": 1682 }, { "epoch": 0.029805150818844717, "grad_norm": 2.416590452194214, "learning_rate": 2.9805194805194806e-05, "loss": 0.1877, "step": 1683 }, { "epoch": 0.029822860355873147, "grad_norm": 3.328813314437866, "learning_rate": 2.9822904368358914e-05, "loss": 0.2548, "step": 1684 }, { "epoch": 0.029840569892901576, "grad_norm": 2.580632448196411, "learning_rate": 2.9840613931523023e-05, "loss": 0.2173, "step": 1685 }, { "epoch": 0.029858279429930003, "grad_norm": 2.5726816654205322, "learning_rate": 2.985832349468713e-05, "loss": 0.1876, "step": 1686 }, { "epoch": 0.029875988966958433, "grad_norm": 3.165820837020874, "learning_rate": 2.987603305785124e-05, "loss": 0.1867, "step": 1687 }, { "epoch": 0.02989369850398686, "grad_norm": 2.377047300338745, "learning_rate": 2.9893742621015347e-05, "loss": 0.2585, "step": 1688 }, { "epoch": 0.02991140804101529, "grad_norm": 2.647552490234375, "learning_rate": 2.9911452184179456e-05, "loss": 0.2247, "step": 1689 }, { "epoch": 0.029929117578043715, "grad_norm": 2.694312334060669, "learning_rate": 2.9929161747343568e-05, "loss": 0.244, "step": 1690 }, { "epoch": 0.029946827115072145, "grad_norm": 4.158058166503906, "learning_rate": 2.9946871310507676e-05, "loss": 0.2193, "step": 1691 }, { "epoch": 0.02996453665210057, "grad_norm": 2.477278470993042, "learning_rate": 2.9964580873671784e-05, "loss": 0.2241, "step": 1692 }, { "epoch": 0.029982246189129, "grad_norm": 2.072052240371704, "learning_rate": 2.9982290436835892e-05, "loss": 0.1622, "step": 1693 }, { "epoch": 0.029999955726157428, "grad_norm": 2.8740615844726562, "learning_rate": 3e-05, "loss": 0.2095, "step": 1694 }, { "epoch": 0.030017665263185857, "grad_norm": 3.1922519207000732, "learning_rate": 2.9999999975325758e-05, "loss": 0.2167, "step": 1695 }, { "epoch": 0.030035374800214284, "grad_norm": 2.2962586879730225, "learning_rate": 2.9999999901303037e-05, "loss": 0.1733, "step": 1696 }, { "epoch": 0.030053084337242714, "grad_norm": 2.6412882804870605, "learning_rate": 2.999999977793183e-05, "loss": 0.1899, "step": 1697 }, { "epoch": 0.030070793874271144, "grad_norm": 2.22198224067688, "learning_rate": 2.9999999605212137e-05, "loss": 0.1687, "step": 1698 }, { "epoch": 0.03008850341129957, "grad_norm": 2.4171571731567383, "learning_rate": 2.9999999383143968e-05, "loss": 0.2021, "step": 1699 }, { "epoch": 0.030106212948328, "grad_norm": 2.9561774730682373, "learning_rate": 2.999999911172732e-05, "loss": 0.2602, "step": 1700 }, { "epoch": 0.030123922485356426, "grad_norm": 2.4069550037384033, "learning_rate": 2.9999998790962187e-05, "loss": 0.1903, "step": 1701 }, { "epoch": 0.030141632022384856, "grad_norm": 2.0496819019317627, "learning_rate": 2.999999842084858e-05, "loss": 0.2189, "step": 1702 }, { "epoch": 0.030159341559413282, "grad_norm": 2.0318543910980225, "learning_rate": 2.999999800138649e-05, "loss": 0.2078, "step": 1703 }, { "epoch": 0.030177051096441712, "grad_norm": 2.2180912494659424, "learning_rate": 2.9999997532575926e-05, "loss": 0.1897, "step": 1704 }, { "epoch": 0.03019476063347014, "grad_norm": 2.502767562866211, "learning_rate": 2.9999997014416886e-05, "loss": 0.2143, "step": 1705 }, { "epoch": 0.03021247017049857, "grad_norm": 2.8452839851379395, "learning_rate": 2.9999996446909375e-05, "loss": 0.224, "step": 1706 }, { "epoch": 0.030230179707526995, "grad_norm": 2.2589728832244873, "learning_rate": 2.9999995830053393e-05, "loss": 0.2755, "step": 1707 }, { "epoch": 0.030247889244555425, "grad_norm": 2.8572187423706055, "learning_rate": 2.9999995163848942e-05, "loss": 0.2545, "step": 1708 }, { "epoch": 0.03026559878158385, "grad_norm": 2.1511189937591553, "learning_rate": 2.9999994448296024e-05, "loss": 0.2272, "step": 1709 }, { "epoch": 0.03028330831861228, "grad_norm": 2.800546646118164, "learning_rate": 2.999999368339464e-05, "loss": 0.2089, "step": 1710 }, { "epoch": 0.03030101785564071, "grad_norm": 2.241126537322998, "learning_rate": 2.9999992869144795e-05, "loss": 0.1845, "step": 1711 }, { "epoch": 0.030318727392669137, "grad_norm": 2.0057387351989746, "learning_rate": 2.999999200554649e-05, "loss": 0.2009, "step": 1712 }, { "epoch": 0.030336436929697567, "grad_norm": 1.639704942703247, "learning_rate": 2.999999109259973e-05, "loss": 0.2018, "step": 1713 }, { "epoch": 0.030354146466725993, "grad_norm": 2.142895221710205, "learning_rate": 2.9999990130304513e-05, "loss": 0.2184, "step": 1714 }, { "epoch": 0.030371856003754423, "grad_norm": 2.5767595767974854, "learning_rate": 2.9999989118660846e-05, "loss": 0.2028, "step": 1715 }, { "epoch": 0.03038956554078285, "grad_norm": 2.7071571350097656, "learning_rate": 2.999998805766874e-05, "loss": 0.1982, "step": 1716 }, { "epoch": 0.03040727507781128, "grad_norm": 2.4610683917999268, "learning_rate": 2.9999986947328176e-05, "loss": 0.2047, "step": 1717 }, { "epoch": 0.030424984614839706, "grad_norm": 2.1619362831115723, "learning_rate": 2.9999985787639183e-05, "loss": 0.1883, "step": 1718 }, { "epoch": 0.030442694151868135, "grad_norm": 3.3524746894836426, "learning_rate": 2.999998457860175e-05, "loss": 0.183, "step": 1719 }, { "epoch": 0.030460403688896562, "grad_norm": 1.8958051204681396, "learning_rate": 2.9999983320215887e-05, "loss": 0.1933, "step": 1720 }, { "epoch": 0.03047811322592499, "grad_norm": 2.885643482208252, "learning_rate": 2.9999982012481594e-05, "loss": 0.2088, "step": 1721 }, { "epoch": 0.030495822762953418, "grad_norm": 4.992901802062988, "learning_rate": 2.999998065539888e-05, "loss": 0.1678, "step": 1722 }, { "epoch": 0.030513532299981848, "grad_norm": 2.7111246585845947, "learning_rate": 2.9999979248967744e-05, "loss": 0.2265, "step": 1723 }, { "epoch": 0.030531241837010278, "grad_norm": 2.5892751216888428, "learning_rate": 2.99999777931882e-05, "loss": 0.1231, "step": 1724 }, { "epoch": 0.030548951374038704, "grad_norm": 2.7991983890533447, "learning_rate": 2.9999976288060237e-05, "loss": 0.1967, "step": 1725 }, { "epoch": 0.030566660911067134, "grad_norm": 1.588302493095398, "learning_rate": 2.9999974733583875e-05, "loss": 0.1659, "step": 1726 }, { "epoch": 0.03058437044809556, "grad_norm": 2.1578714847564697, "learning_rate": 2.999997312975911e-05, "loss": 0.2473, "step": 1727 }, { "epoch": 0.03060207998512399, "grad_norm": 2.4398977756500244, "learning_rate": 2.999997147658595e-05, "loss": 0.2322, "step": 1728 }, { "epoch": 0.030619789522152416, "grad_norm": 2.39328932762146, "learning_rate": 2.9999969774064402e-05, "loss": 0.2211, "step": 1729 }, { "epoch": 0.030637499059180846, "grad_norm": 1.8242993354797363, "learning_rate": 2.9999968022194476e-05, "loss": 0.1697, "step": 1730 }, { "epoch": 0.030655208596209273, "grad_norm": 2.717905044555664, "learning_rate": 2.9999966220976162e-05, "loss": 0.193, "step": 1731 }, { "epoch": 0.030672918133237703, "grad_norm": 2.5574405193328857, "learning_rate": 2.9999964370409482e-05, "loss": 0.1944, "step": 1732 }, { "epoch": 0.03069062767026613, "grad_norm": 2.604346513748169, "learning_rate": 2.999996247049444e-05, "loss": 0.2298, "step": 1733 }, { "epoch": 0.03070833720729456, "grad_norm": 2.9126698970794678, "learning_rate": 2.9999960521231033e-05, "loss": 0.2141, "step": 1734 }, { "epoch": 0.030726046744322985, "grad_norm": 2.392829179763794, "learning_rate": 2.9999958522619277e-05, "loss": 0.1911, "step": 1735 }, { "epoch": 0.030743756281351415, "grad_norm": 1.994145154953003, "learning_rate": 2.999995647465917e-05, "loss": 0.2313, "step": 1736 }, { "epoch": 0.03076146581837984, "grad_norm": 2.6799352169036865, "learning_rate": 2.999995437735073e-05, "loss": 0.2005, "step": 1737 }, { "epoch": 0.03077917535540827, "grad_norm": 2.9827518463134766, "learning_rate": 2.999995223069395e-05, "loss": 0.1589, "step": 1738 }, { "epoch": 0.0307968848924367, "grad_norm": 2.1834704875946045, "learning_rate": 2.9999950034688853e-05, "loss": 0.2152, "step": 1739 }, { "epoch": 0.030814594429465127, "grad_norm": 3.050957202911377, "learning_rate": 2.999994778933543e-05, "loss": 0.177, "step": 1740 }, { "epoch": 0.030832303966493557, "grad_norm": 2.999016046524048, "learning_rate": 2.9999945494633696e-05, "loss": 0.2046, "step": 1741 }, { "epoch": 0.030850013503521984, "grad_norm": 2.568363904953003, "learning_rate": 2.999994315058366e-05, "loss": 0.2089, "step": 1742 }, { "epoch": 0.030867723040550413, "grad_norm": 2.462632179260254, "learning_rate": 2.999994075718533e-05, "loss": 0.2334, "step": 1743 }, { "epoch": 0.03088543257757884, "grad_norm": 2.1161839962005615, "learning_rate": 2.9999938314438713e-05, "loss": 0.2274, "step": 1744 }, { "epoch": 0.03090314211460727, "grad_norm": 2.572249174118042, "learning_rate": 2.999993582234381e-05, "loss": 0.2754, "step": 1745 }, { "epoch": 0.030920851651635696, "grad_norm": 2.456145763397217, "learning_rate": 2.9999933280900643e-05, "loss": 0.2358, "step": 1746 }, { "epoch": 0.030938561188664126, "grad_norm": 1.9822627305984497, "learning_rate": 2.9999930690109207e-05, "loss": 0.1844, "step": 1747 }, { "epoch": 0.030956270725692552, "grad_norm": 2.154475450515747, "learning_rate": 2.999992804996952e-05, "loss": 0.2535, "step": 1748 }, { "epoch": 0.030973980262720982, "grad_norm": 3.148165464401245, "learning_rate": 2.9999925360481585e-05, "loss": 0.2581, "step": 1749 }, { "epoch": 0.03099168979974941, "grad_norm": 2.1009602546691895, "learning_rate": 2.999992262164541e-05, "loss": 0.2471, "step": 1750 }, { "epoch": 0.031009399336777838, "grad_norm": 2.2983553409576416, "learning_rate": 2.9999919833461012e-05, "loss": 0.214, "step": 1751 }, { "epoch": 0.031027108873806268, "grad_norm": 2.4024062156677246, "learning_rate": 2.999991699592839e-05, "loss": 0.2309, "step": 1752 }, { "epoch": 0.031044818410834694, "grad_norm": 1.6448572874069214, "learning_rate": 2.9999914109047563e-05, "loss": 0.1866, "step": 1753 }, { "epoch": 0.031062527947863124, "grad_norm": 4.376161575317383, "learning_rate": 2.999991117281853e-05, "loss": 0.2183, "step": 1754 }, { "epoch": 0.03108023748489155, "grad_norm": 2.310882329940796, "learning_rate": 2.999990818724131e-05, "loss": 0.2756, "step": 1755 }, { "epoch": 0.03109794702191998, "grad_norm": 2.772268295288086, "learning_rate": 2.9999905152315912e-05, "loss": 0.2356, "step": 1756 }, { "epoch": 0.031115656558948407, "grad_norm": 2.9048664569854736, "learning_rate": 2.999990206804234e-05, "loss": 0.2669, "step": 1757 }, { "epoch": 0.031133366095976837, "grad_norm": 1.608648419380188, "learning_rate": 2.999989893442061e-05, "loss": 0.1737, "step": 1758 }, { "epoch": 0.031151075633005263, "grad_norm": 1.8875187635421753, "learning_rate": 2.9999895751450724e-05, "loss": 0.1816, "step": 1759 }, { "epoch": 0.031168785170033693, "grad_norm": 2.804023504257202, "learning_rate": 2.9999892519132702e-05, "loss": 0.2148, "step": 1760 }, { "epoch": 0.03118649470706212, "grad_norm": 1.8456350564956665, "learning_rate": 2.9999889237466548e-05, "loss": 0.201, "step": 1761 }, { "epoch": 0.03120420424409055, "grad_norm": 2.819697856903076, "learning_rate": 2.9999885906452278e-05, "loss": 0.2392, "step": 1762 }, { "epoch": 0.031221913781118976, "grad_norm": 3.6516330242156982, "learning_rate": 2.99998825260899e-05, "loss": 0.2327, "step": 1763 }, { "epoch": 0.031239623318147405, "grad_norm": 2.155762195587158, "learning_rate": 2.9999879096379427e-05, "loss": 0.159, "step": 1764 }, { "epoch": 0.03125733285517583, "grad_norm": 2.0823357105255127, "learning_rate": 2.9999875617320867e-05, "loss": 0.2009, "step": 1765 }, { "epoch": 0.031275042392204265, "grad_norm": 3.2696714401245117, "learning_rate": 2.9999872088914235e-05, "loss": 0.1943, "step": 1766 }, { "epoch": 0.03129275192923269, "grad_norm": 2.5845441818237305, "learning_rate": 2.9999868511159538e-05, "loss": 0.1919, "step": 1767 }, { "epoch": 0.03131046146626112, "grad_norm": 2.0790722370147705, "learning_rate": 2.9999864884056793e-05, "loss": 0.2296, "step": 1768 }, { "epoch": 0.031328171003289544, "grad_norm": 2.3684322834014893, "learning_rate": 2.999986120760601e-05, "loss": 0.1929, "step": 1769 }, { "epoch": 0.03134588054031798, "grad_norm": 2.342740774154663, "learning_rate": 2.99998574818072e-05, "loss": 0.1502, "step": 1770 }, { "epoch": 0.031363590077346404, "grad_norm": 2.0198512077331543, "learning_rate": 2.999985370666038e-05, "loss": 0.206, "step": 1771 }, { "epoch": 0.03138129961437483, "grad_norm": 2.675664186477661, "learning_rate": 2.999984988216555e-05, "loss": 0.2182, "step": 1772 }, { "epoch": 0.03139900915140326, "grad_norm": 2.5956244468688965, "learning_rate": 2.999984600832274e-05, "loss": 0.2019, "step": 1773 }, { "epoch": 0.03141671868843169, "grad_norm": 2.3662006855010986, "learning_rate": 2.9999842085131946e-05, "loss": 0.2369, "step": 1774 }, { "epoch": 0.031434428225460116, "grad_norm": 2.4221765995025635, "learning_rate": 2.9999838112593194e-05, "loss": 0.2405, "step": 1775 }, { "epoch": 0.03145213776248854, "grad_norm": 2.5108513832092285, "learning_rate": 2.999983409070649e-05, "loss": 0.2589, "step": 1776 }, { "epoch": 0.03146984729951697, "grad_norm": 2.4272541999816895, "learning_rate": 2.9999830019471847e-05, "loss": 0.2152, "step": 1777 }, { "epoch": 0.0314875568365454, "grad_norm": 2.738532543182373, "learning_rate": 2.9999825898889282e-05, "loss": 0.2155, "step": 1778 }, { "epoch": 0.03150526637357383, "grad_norm": 2.5793354511260986, "learning_rate": 2.9999821728958805e-05, "loss": 0.183, "step": 1779 }, { "epoch": 0.031522975910602255, "grad_norm": 1.908286452293396, "learning_rate": 2.999981750968044e-05, "loss": 0.1945, "step": 1780 }, { "epoch": 0.03154068544763069, "grad_norm": 2.4058780670166016, "learning_rate": 2.9999813241054182e-05, "loss": 0.2528, "step": 1781 }, { "epoch": 0.031558394984659115, "grad_norm": 2.6224610805511475, "learning_rate": 2.9999808923080062e-05, "loss": 0.2499, "step": 1782 }, { "epoch": 0.03157610452168754, "grad_norm": 2.422938823699951, "learning_rate": 2.9999804555758084e-05, "loss": 0.3, "step": 1783 }, { "epoch": 0.03159381405871597, "grad_norm": 2.096282958984375, "learning_rate": 2.999980013908827e-05, "loss": 0.239, "step": 1784 }, { "epoch": 0.0316115235957444, "grad_norm": 2.9666197299957275, "learning_rate": 2.9999795673070625e-05, "loss": 0.1798, "step": 1785 }, { "epoch": 0.03162923313277283, "grad_norm": 2.214970588684082, "learning_rate": 2.9999791157705177e-05, "loss": 0.2172, "step": 1786 }, { "epoch": 0.031646942669801253, "grad_norm": 2.7075657844543457, "learning_rate": 2.999978659299193e-05, "loss": 0.1945, "step": 1787 }, { "epoch": 0.03166465220682968, "grad_norm": 2.707385540008545, "learning_rate": 2.9999781978930897e-05, "loss": 0.1932, "step": 1788 }, { "epoch": 0.03168236174385811, "grad_norm": 2.9866409301757812, "learning_rate": 2.9999777315522105e-05, "loss": 0.2002, "step": 1789 }, { "epoch": 0.03170007128088654, "grad_norm": 1.818095326423645, "learning_rate": 2.999977260276556e-05, "loss": 0.1787, "step": 1790 }, { "epoch": 0.031717780817914966, "grad_norm": 2.214944362640381, "learning_rate": 2.999976784066128e-05, "loss": 0.2234, "step": 1791 }, { "epoch": 0.0317354903549434, "grad_norm": 1.5969702005386353, "learning_rate": 2.9999763029209283e-05, "loss": 0.2145, "step": 1792 }, { "epoch": 0.031753199891971826, "grad_norm": 2.539012908935547, "learning_rate": 2.999975816840958e-05, "loss": 0.1908, "step": 1793 }, { "epoch": 0.03177090942900025, "grad_norm": 2.222928047180176, "learning_rate": 2.9999753258262194e-05, "loss": 0.237, "step": 1794 }, { "epoch": 0.03178861896602868, "grad_norm": 2.655707836151123, "learning_rate": 2.999974829876713e-05, "loss": 0.1928, "step": 1795 }, { "epoch": 0.03180632850305711, "grad_norm": 2.1023073196411133, "learning_rate": 2.9999743289924417e-05, "loss": 0.253, "step": 1796 }, { "epoch": 0.03182403804008554, "grad_norm": 2.3089652061462402, "learning_rate": 2.9999738231734063e-05, "loss": 0.2371, "step": 1797 }, { "epoch": 0.031841747577113964, "grad_norm": 2.083495855331421, "learning_rate": 2.999973312419609e-05, "loss": 0.1854, "step": 1798 }, { "epoch": 0.03185945711414239, "grad_norm": 2.5884501934051514, "learning_rate": 2.9999727967310512e-05, "loss": 0.2362, "step": 1799 }, { "epoch": 0.031877166651170824, "grad_norm": 2.2440545558929443, "learning_rate": 2.9999722761077345e-05, "loss": 0.1952, "step": 1800 }, { "epoch": 0.03189487618819925, "grad_norm": 2.866589307785034, "learning_rate": 2.9999717505496605e-05, "loss": 0.2035, "step": 1801 }, { "epoch": 0.03191258572522768, "grad_norm": 1.80193030834198, "learning_rate": 2.9999712200568315e-05, "loss": 0.1952, "step": 1802 }, { "epoch": 0.0319302952622561, "grad_norm": 1.9224921464920044, "learning_rate": 2.9999706846292484e-05, "loss": 0.1781, "step": 1803 }, { "epoch": 0.031948004799284536, "grad_norm": 1.8918901681900024, "learning_rate": 2.999970144266914e-05, "loss": 0.2157, "step": 1804 }, { "epoch": 0.03196571433631296, "grad_norm": 4.215412139892578, "learning_rate": 2.9999695989698296e-05, "loss": 0.2222, "step": 1805 }, { "epoch": 0.03198342387334139, "grad_norm": 2.7490789890289307, "learning_rate": 2.9999690487379965e-05, "loss": 0.1912, "step": 1806 }, { "epoch": 0.03200113341036982, "grad_norm": 1.9281971454620361, "learning_rate": 2.9999684935714173e-05, "loss": 0.2056, "step": 1807 }, { "epoch": 0.03201884294739825, "grad_norm": 2.3775906562805176, "learning_rate": 2.9999679334700935e-05, "loss": 0.2003, "step": 1808 }, { "epoch": 0.032036552484426675, "grad_norm": 2.766352653503418, "learning_rate": 2.9999673684340266e-05, "loss": 0.2653, "step": 1809 }, { "epoch": 0.0320542620214551, "grad_norm": 2.412569761276245, "learning_rate": 2.999966798463219e-05, "loss": 0.1878, "step": 1810 }, { "epoch": 0.032071971558483535, "grad_norm": 3.3818235397338867, "learning_rate": 2.999966223557672e-05, "loss": 0.2322, "step": 1811 }, { "epoch": 0.03208968109551196, "grad_norm": 3.2786967754364014, "learning_rate": 2.999965643717388e-05, "loss": 0.2131, "step": 1812 }, { "epoch": 0.03210739063254039, "grad_norm": 2.3582370281219482, "learning_rate": 2.999965058942369e-05, "loss": 0.2557, "step": 1813 }, { "epoch": 0.032125100169568814, "grad_norm": 1.5234642028808594, "learning_rate": 2.9999644692326164e-05, "loss": 0.1845, "step": 1814 }, { "epoch": 0.03214280970659725, "grad_norm": 2.8272011280059814, "learning_rate": 2.9999638745881324e-05, "loss": 0.2219, "step": 1815 }, { "epoch": 0.032160519243625674, "grad_norm": 1.9720220565795898, "learning_rate": 2.9999632750089194e-05, "loss": 0.1535, "step": 1816 }, { "epoch": 0.0321782287806541, "grad_norm": 2.4863932132720947, "learning_rate": 2.999962670494978e-05, "loss": 0.1782, "step": 1817 }, { "epoch": 0.03219593831768253, "grad_norm": 2.4773616790771484, "learning_rate": 2.999962061046312e-05, "loss": 0.1937, "step": 1818 }, { "epoch": 0.03221364785471096, "grad_norm": 1.7365185022354126, "learning_rate": 2.9999614466629222e-05, "loss": 0.2168, "step": 1819 }, { "epoch": 0.032231357391739386, "grad_norm": 2.0612664222717285, "learning_rate": 2.9999608273448112e-05, "loss": 0.258, "step": 1820 }, { "epoch": 0.03224906692876781, "grad_norm": 2.5161027908325195, "learning_rate": 2.9999602030919805e-05, "loss": 0.1851, "step": 1821 }, { "epoch": 0.032266776465796246, "grad_norm": 2.628225088119507, "learning_rate": 2.9999595739044323e-05, "loss": 0.1854, "step": 1822 }, { "epoch": 0.03228448600282467, "grad_norm": 1.8137168884277344, "learning_rate": 2.999958939782169e-05, "loss": 0.1917, "step": 1823 }, { "epoch": 0.0323021955398531, "grad_norm": 3.4449353218078613, "learning_rate": 2.999958300725193e-05, "loss": 0.2583, "step": 1824 }, { "epoch": 0.032319905076881525, "grad_norm": 2.8332176208496094, "learning_rate": 2.999957656733505e-05, "loss": 0.2122, "step": 1825 }, { "epoch": 0.03233761461390996, "grad_norm": 1.919183373451233, "learning_rate": 2.9999570078071086e-05, "loss": 0.2297, "step": 1826 }, { "epoch": 0.032355324150938385, "grad_norm": 4.333654880523682, "learning_rate": 2.999956353946005e-05, "loss": 0.1946, "step": 1827 }, { "epoch": 0.03237303368796681, "grad_norm": 3.8901517391204834, "learning_rate": 2.9999556951501972e-05, "loss": 0.2301, "step": 1828 }, { "epoch": 0.03239074322499524, "grad_norm": 4.016636371612549, "learning_rate": 2.9999550314196866e-05, "loss": 0.2315, "step": 1829 }, { "epoch": 0.03240845276202367, "grad_norm": 2.47420334815979, "learning_rate": 2.9999543627544756e-05, "loss": 0.1521, "step": 1830 }, { "epoch": 0.0324261622990521, "grad_norm": 2.081713914871216, "learning_rate": 2.9999536891545665e-05, "loss": 0.2174, "step": 1831 }, { "epoch": 0.03244387183608052, "grad_norm": 3.136591911315918, "learning_rate": 2.9999530106199617e-05, "loss": 0.1848, "step": 1832 }, { "epoch": 0.03246158137310896, "grad_norm": 3.1633682250976562, "learning_rate": 2.999952327150663e-05, "loss": 0.2103, "step": 1833 }, { "epoch": 0.03247929091013738, "grad_norm": 2.794672966003418, "learning_rate": 2.9999516387466728e-05, "loss": 0.2075, "step": 1834 }, { "epoch": 0.03249700044716581, "grad_norm": 2.8431317806243896, "learning_rate": 2.9999509454079937e-05, "loss": 0.1806, "step": 1835 }, { "epoch": 0.032514709984194236, "grad_norm": 3.12935471534729, "learning_rate": 2.999950247134627e-05, "loss": 0.2478, "step": 1836 }, { "epoch": 0.03253241952122267, "grad_norm": 1.828811764717102, "learning_rate": 2.9999495439265763e-05, "loss": 0.2218, "step": 1837 }, { "epoch": 0.032550129058251095, "grad_norm": 2.6610238552093506, "learning_rate": 2.9999488357838434e-05, "loss": 0.208, "step": 1838 }, { "epoch": 0.03256783859527952, "grad_norm": 2.4065985679626465, "learning_rate": 2.9999481227064306e-05, "loss": 0.2297, "step": 1839 }, { "epoch": 0.03258554813230795, "grad_norm": 3.2885947227478027, "learning_rate": 2.9999474046943395e-05, "loss": 0.2275, "step": 1840 }, { "epoch": 0.03260325766933638, "grad_norm": 2.478978157043457, "learning_rate": 2.9999466817475736e-05, "loss": 0.1879, "step": 1841 }, { "epoch": 0.03262096720636481, "grad_norm": 3.413325309753418, "learning_rate": 2.9999459538661347e-05, "loss": 0.2445, "step": 1842 }, { "epoch": 0.032638676743393234, "grad_norm": 3.9369382858276367, "learning_rate": 2.9999452210500252e-05, "loss": 0.24, "step": 1843 }, { "epoch": 0.03265638628042166, "grad_norm": 2.8852319717407227, "learning_rate": 2.999944483299248e-05, "loss": 0.2983, "step": 1844 }, { "epoch": 0.032674095817450094, "grad_norm": 2.423675060272217, "learning_rate": 2.9999437406138052e-05, "loss": 0.2023, "step": 1845 }, { "epoch": 0.03269180535447852, "grad_norm": 2.293455123901367, "learning_rate": 2.9999429929936994e-05, "loss": 0.2387, "step": 1846 }, { "epoch": 0.03270951489150695, "grad_norm": 2.560502529144287, "learning_rate": 2.999942240438932e-05, "loss": 0.1509, "step": 1847 }, { "epoch": 0.03272722442853538, "grad_norm": 2.222580909729004, "learning_rate": 2.999941482949507e-05, "loss": 0.2117, "step": 1848 }, { "epoch": 0.032744933965563806, "grad_norm": 4.149318695068359, "learning_rate": 2.999940720525426e-05, "loss": 0.1838, "step": 1849 }, { "epoch": 0.03276264350259223, "grad_norm": 2.770440101623535, "learning_rate": 2.9999399531666918e-05, "loss": 0.1879, "step": 1850 }, { "epoch": 0.03278035303962066, "grad_norm": 2.845264196395874, "learning_rate": 2.9999391808733067e-05, "loss": 0.195, "step": 1851 }, { "epoch": 0.03279806257664909, "grad_norm": 1.8160464763641357, "learning_rate": 2.9999384036452736e-05, "loss": 0.1736, "step": 1852 }, { "epoch": 0.03281577211367752, "grad_norm": 2.1748061180114746, "learning_rate": 2.9999376214825947e-05, "loss": 0.1703, "step": 1853 }, { "epoch": 0.032833481650705945, "grad_norm": 2.5249295234680176, "learning_rate": 2.999936834385273e-05, "loss": 0.2031, "step": 1854 }, { "epoch": 0.03285119118773437, "grad_norm": 2.7034332752227783, "learning_rate": 2.9999360423533106e-05, "loss": 0.1966, "step": 1855 }, { "epoch": 0.032868900724762805, "grad_norm": 2.4689345359802246, "learning_rate": 2.9999352453867106e-05, "loss": 0.1736, "step": 1856 }, { "epoch": 0.03288661026179123, "grad_norm": 2.3145244121551514, "learning_rate": 2.999934443485475e-05, "loss": 0.1594, "step": 1857 }, { "epoch": 0.03290431979881966, "grad_norm": 1.5869982242584229, "learning_rate": 2.999933636649607e-05, "loss": 0.178, "step": 1858 }, { "epoch": 0.03292202933584809, "grad_norm": 2.107912540435791, "learning_rate": 2.999932824879109e-05, "loss": 0.2145, "step": 1859 }, { "epoch": 0.03293973887287652, "grad_norm": 2.654987335205078, "learning_rate": 2.999932008173984e-05, "loss": 0.1859, "step": 1860 }, { "epoch": 0.032957448409904944, "grad_norm": 1.987683653831482, "learning_rate": 2.999931186534234e-05, "loss": 0.2184, "step": 1861 }, { "epoch": 0.03297515794693337, "grad_norm": 2.0481653213500977, "learning_rate": 2.9999303599598622e-05, "loss": 0.1904, "step": 1862 }, { "epoch": 0.0329928674839618, "grad_norm": 2.771674394607544, "learning_rate": 2.9999295284508713e-05, "loss": 0.1811, "step": 1863 }, { "epoch": 0.03301057702099023, "grad_norm": 2.3693971633911133, "learning_rate": 2.9999286920072637e-05, "loss": 0.2152, "step": 1864 }, { "epoch": 0.033028286558018656, "grad_norm": 2.521068811416626, "learning_rate": 2.9999278506290423e-05, "loss": 0.2115, "step": 1865 }, { "epoch": 0.03304599609504708, "grad_norm": 2.6125290393829346, "learning_rate": 2.9999270043162106e-05, "loss": 0.1962, "step": 1866 }, { "epoch": 0.033063705632075516, "grad_norm": 2.2146806716918945, "learning_rate": 2.9999261530687703e-05, "loss": 0.235, "step": 1867 }, { "epoch": 0.03308141516910394, "grad_norm": 2.1060609817504883, "learning_rate": 2.9999252968867247e-05, "loss": 0.231, "step": 1868 }, { "epoch": 0.03309912470613237, "grad_norm": 1.993875503540039, "learning_rate": 2.999924435770077e-05, "loss": 0.2092, "step": 1869 }, { "epoch": 0.033116834243160795, "grad_norm": 2.4707794189453125, "learning_rate": 2.9999235697188294e-05, "loss": 0.2221, "step": 1870 }, { "epoch": 0.03313454378018923, "grad_norm": 2.0689446926116943, "learning_rate": 2.9999226987329847e-05, "loss": 0.1837, "step": 1871 }, { "epoch": 0.033152253317217655, "grad_norm": 2.108670711517334, "learning_rate": 2.9999218228125463e-05, "loss": 0.2088, "step": 1872 }, { "epoch": 0.03316996285424608, "grad_norm": 2.4932332038879395, "learning_rate": 2.9999209419575165e-05, "loss": 0.2194, "step": 1873 }, { "epoch": 0.033187672391274514, "grad_norm": 2.487386465072632, "learning_rate": 2.9999200561678988e-05, "loss": 0.2379, "step": 1874 }, { "epoch": 0.03320538192830294, "grad_norm": 2.5801291465759277, "learning_rate": 2.999919165443696e-05, "loss": 0.1828, "step": 1875 }, { "epoch": 0.03322309146533137, "grad_norm": 2.553447961807251, "learning_rate": 2.9999182697849103e-05, "loss": 0.2052, "step": 1876 }, { "epoch": 0.03324080100235979, "grad_norm": 2.158396005630493, "learning_rate": 2.9999173691915457e-05, "loss": 0.2201, "step": 1877 }, { "epoch": 0.03325851053938823, "grad_norm": 2.753021717071533, "learning_rate": 2.9999164636636045e-05, "loss": 0.2042, "step": 1878 }, { "epoch": 0.03327622007641665, "grad_norm": 1.92559814453125, "learning_rate": 2.9999155532010897e-05, "loss": 0.2266, "step": 1879 }, { "epoch": 0.03329392961344508, "grad_norm": 2.9521782398223877, "learning_rate": 2.9999146378040042e-05, "loss": 0.1801, "step": 1880 }, { "epoch": 0.033311639150473506, "grad_norm": 2.80912709236145, "learning_rate": 2.999913717472352e-05, "loss": 0.1723, "step": 1881 }, { "epoch": 0.03332934868750194, "grad_norm": 3.218515634536743, "learning_rate": 2.999912792206135e-05, "loss": 0.1669, "step": 1882 }, { "epoch": 0.033347058224530365, "grad_norm": 2.236497640609741, "learning_rate": 2.9999118620053567e-05, "loss": 0.2156, "step": 1883 }, { "epoch": 0.03336476776155879, "grad_norm": 2.257725954055786, "learning_rate": 2.9999109268700196e-05, "loss": 0.2316, "step": 1884 }, { "epoch": 0.033382477298587225, "grad_norm": 2.572608709335327, "learning_rate": 2.9999099868001278e-05, "loss": 0.1489, "step": 1885 }, { "epoch": 0.03340018683561565, "grad_norm": 1.9130573272705078, "learning_rate": 2.9999090417956835e-05, "loss": 0.1962, "step": 1886 }, { "epoch": 0.03341789637264408, "grad_norm": 1.9210525751113892, "learning_rate": 2.9999080918566903e-05, "loss": 0.2094, "step": 1887 }, { "epoch": 0.033435605909672504, "grad_norm": 1.7875536680221558, "learning_rate": 2.999907136983151e-05, "loss": 0.2307, "step": 1888 }, { "epoch": 0.03345331544670094, "grad_norm": 6.128395080566406, "learning_rate": 2.9999061771750694e-05, "loss": 0.2191, "step": 1889 }, { "epoch": 0.033471024983729364, "grad_norm": 1.6781724691390991, "learning_rate": 2.999905212432448e-05, "loss": 0.1998, "step": 1890 }, { "epoch": 0.03348873452075779, "grad_norm": 2.39540958404541, "learning_rate": 2.99990424275529e-05, "loss": 0.1956, "step": 1891 }, { "epoch": 0.03350644405778622, "grad_norm": 1.4538320302963257, "learning_rate": 2.9999032681435993e-05, "loss": 0.1947, "step": 1892 }, { "epoch": 0.03352415359481465, "grad_norm": 2.357279062271118, "learning_rate": 2.9999022885973778e-05, "loss": 0.2074, "step": 1893 }, { "epoch": 0.033541863131843076, "grad_norm": 1.9774788618087769, "learning_rate": 2.9999013041166297e-05, "loss": 0.1646, "step": 1894 }, { "epoch": 0.0335595726688715, "grad_norm": 1.4897546768188477, "learning_rate": 2.9999003147013583e-05, "loss": 0.1639, "step": 1895 }, { "epoch": 0.03357728220589993, "grad_norm": 3.118340015411377, "learning_rate": 2.9998993203515664e-05, "loss": 0.2059, "step": 1896 }, { "epoch": 0.03359499174292836, "grad_norm": 3.235734462738037, "learning_rate": 2.9998983210672573e-05, "loss": 0.2004, "step": 1897 }, { "epoch": 0.03361270127995679, "grad_norm": 2.041700601577759, "learning_rate": 2.9998973168484345e-05, "loss": 0.1853, "step": 1898 }, { "epoch": 0.033630410816985215, "grad_norm": 1.7308520078659058, "learning_rate": 2.9998963076951013e-05, "loss": 0.1638, "step": 1899 }, { "epoch": 0.03364812035401365, "grad_norm": 1.7766201496124268, "learning_rate": 2.9998952936072607e-05, "loss": 0.2138, "step": 1900 }, { "epoch": 0.033665829891042075, "grad_norm": 3.671501398086548, "learning_rate": 2.9998942745849166e-05, "loss": 0.2657, "step": 1901 }, { "epoch": 0.0336835394280705, "grad_norm": 2.5528080463409424, "learning_rate": 2.999893250628072e-05, "loss": 0.1674, "step": 1902 }, { "epoch": 0.03370124896509893, "grad_norm": 2.8108878135681152, "learning_rate": 2.9998922217367304e-05, "loss": 0.1528, "step": 1903 }, { "epoch": 0.03371895850212736, "grad_norm": 2.28184175491333, "learning_rate": 2.999891187910895e-05, "loss": 0.2167, "step": 1904 }, { "epoch": 0.03373666803915579, "grad_norm": 2.413902759552002, "learning_rate": 2.9998901491505692e-05, "loss": 0.195, "step": 1905 }, { "epoch": 0.033754377576184214, "grad_norm": 2.366434097290039, "learning_rate": 2.9998891054557562e-05, "loss": 0.1888, "step": 1906 }, { "epoch": 0.03377208711321264, "grad_norm": 3.7628209590911865, "learning_rate": 2.9998880568264603e-05, "loss": 0.2315, "step": 1907 }, { "epoch": 0.03378979665024107, "grad_norm": 2.2828800678253174, "learning_rate": 2.9998870032626845e-05, "loss": 0.2069, "step": 1908 }, { "epoch": 0.0338075061872695, "grad_norm": 1.7488197088241577, "learning_rate": 2.9998859447644315e-05, "loss": 0.1613, "step": 1909 }, { "epoch": 0.033825215724297926, "grad_norm": 2.601870536804199, "learning_rate": 2.999884881331706e-05, "loss": 0.2329, "step": 1910 }, { "epoch": 0.03384292526132635, "grad_norm": 3.001080274581909, "learning_rate": 2.9998838129645107e-05, "loss": 0.2681, "step": 1911 }, { "epoch": 0.033860634798354786, "grad_norm": 3.7272751331329346, "learning_rate": 2.9998827396628492e-05, "loss": 0.222, "step": 1912 }, { "epoch": 0.03387834433538321, "grad_norm": 2.301988363265991, "learning_rate": 2.9998816614267255e-05, "loss": 0.1395, "step": 1913 }, { "epoch": 0.03389605387241164, "grad_norm": 1.6190003156661987, "learning_rate": 2.999880578256143e-05, "loss": 0.1852, "step": 1914 }, { "epoch": 0.03391376340944007, "grad_norm": 2.641460418701172, "learning_rate": 2.999879490151105e-05, "loss": 0.1859, "step": 1915 }, { "epoch": 0.0339314729464685, "grad_norm": 1.3256503343582153, "learning_rate": 2.9998783971116147e-05, "loss": 0.1714, "step": 1916 }, { "epoch": 0.033949182483496924, "grad_norm": 2.425506114959717, "learning_rate": 2.9998772991376765e-05, "loss": 0.1978, "step": 1917 }, { "epoch": 0.03396689202052535, "grad_norm": 2.783635377883911, "learning_rate": 2.9998761962292937e-05, "loss": 0.2108, "step": 1918 }, { "epoch": 0.033984601557553784, "grad_norm": 2.8154296875, "learning_rate": 2.99987508838647e-05, "loss": 0.2282, "step": 1919 }, { "epoch": 0.03400231109458221, "grad_norm": 1.3980129957199097, "learning_rate": 2.999873975609209e-05, "loss": 0.1919, "step": 1920 }, { "epoch": 0.03402002063161064, "grad_norm": 2.8293073177337646, "learning_rate": 2.999872857897514e-05, "loss": 0.1977, "step": 1921 }, { "epoch": 0.03403773016863906, "grad_norm": 2.078246831893921, "learning_rate": 2.999871735251389e-05, "loss": 0.2174, "step": 1922 }, { "epoch": 0.034055439705667497, "grad_norm": 2.2795119285583496, "learning_rate": 2.9998706076708382e-05, "loss": 0.1721, "step": 1923 }, { "epoch": 0.03407314924269592, "grad_norm": 2.92990779876709, "learning_rate": 2.9998694751558644e-05, "loss": 0.2427, "step": 1924 }, { "epoch": 0.03409085877972435, "grad_norm": 1.9525021314620972, "learning_rate": 2.9998683377064717e-05, "loss": 0.1419, "step": 1925 }, { "epoch": 0.03410856831675278, "grad_norm": 2.421506881713867, "learning_rate": 2.9998671953226642e-05, "loss": 0.2385, "step": 1926 }, { "epoch": 0.03412627785378121, "grad_norm": 2.281479835510254, "learning_rate": 2.999866048004445e-05, "loss": 0.2045, "step": 1927 }, { "epoch": 0.034143987390809635, "grad_norm": 1.9419951438903809, "learning_rate": 2.9998648957518186e-05, "loss": 0.194, "step": 1928 }, { "epoch": 0.03416169692783806, "grad_norm": 2.5481064319610596, "learning_rate": 2.999863738564788e-05, "loss": 0.2495, "step": 1929 }, { "epoch": 0.034179406464866495, "grad_norm": 2.3142693042755127, "learning_rate": 2.9998625764433573e-05, "loss": 0.1662, "step": 1930 }, { "epoch": 0.03419711600189492, "grad_norm": 1.7791438102722168, "learning_rate": 2.999861409387531e-05, "loss": 0.1644, "step": 1931 }, { "epoch": 0.03421482553892335, "grad_norm": 3.9030652046203613, "learning_rate": 2.999860237397312e-05, "loss": 0.2177, "step": 1932 }, { "epoch": 0.034232535075951774, "grad_norm": 3.5678200721740723, "learning_rate": 2.9998590604727047e-05, "loss": 0.1742, "step": 1933 }, { "epoch": 0.03425024461298021, "grad_norm": 2.517186403274536, "learning_rate": 2.9998578786137124e-05, "loss": 0.2387, "step": 1934 }, { "epoch": 0.034267954150008634, "grad_norm": 3.172654390335083, "learning_rate": 2.9998566918203395e-05, "loss": 0.2364, "step": 1935 }, { "epoch": 0.03428566368703706, "grad_norm": 2.489813804626465, "learning_rate": 2.9998555000925896e-05, "loss": 0.252, "step": 1936 }, { "epoch": 0.034303373224065487, "grad_norm": 2.1145503520965576, "learning_rate": 2.9998543034304677e-05, "loss": 0.1733, "step": 1937 }, { "epoch": 0.03432108276109392, "grad_norm": 2.460684299468994, "learning_rate": 2.999853101833976e-05, "loss": 0.1583, "step": 1938 }, { "epoch": 0.034338792298122346, "grad_norm": 2.3820743560791016, "learning_rate": 2.9998518953031196e-05, "loss": 0.1931, "step": 1939 }, { "epoch": 0.03435650183515077, "grad_norm": 1.989437460899353, "learning_rate": 2.9998506838379022e-05, "loss": 0.186, "step": 1940 }, { "epoch": 0.034374211372179206, "grad_norm": 2.2453413009643555, "learning_rate": 2.9998494674383276e-05, "loss": 0.1728, "step": 1941 }, { "epoch": 0.03439192090920763, "grad_norm": 2.723623275756836, "learning_rate": 2.9998482461044e-05, "loss": 0.1604, "step": 1942 }, { "epoch": 0.03440963044623606, "grad_norm": 2.4812679290771484, "learning_rate": 2.9998470198361234e-05, "loss": 0.266, "step": 1943 }, { "epoch": 0.034427339983264485, "grad_norm": 3.607783794403076, "learning_rate": 2.9998457886335015e-05, "loss": 0.2112, "step": 1944 }, { "epoch": 0.03444504952029292, "grad_norm": 2.8826799392700195, "learning_rate": 2.9998445524965393e-05, "loss": 0.2186, "step": 1945 }, { "epoch": 0.034462759057321345, "grad_norm": 2.30582857131958, "learning_rate": 2.9998433114252396e-05, "loss": 0.1692, "step": 1946 }, { "epoch": 0.03448046859434977, "grad_norm": 1.9448070526123047, "learning_rate": 2.9998420654196077e-05, "loss": 0.1852, "step": 1947 }, { "epoch": 0.0344981781313782, "grad_norm": 3.3345253467559814, "learning_rate": 2.9998408144796466e-05, "loss": 0.196, "step": 1948 }, { "epoch": 0.03451588766840663, "grad_norm": 2.598554849624634, "learning_rate": 2.9998395586053613e-05, "loss": 0.175, "step": 1949 }, { "epoch": 0.03453359720543506, "grad_norm": 3.557544708251953, "learning_rate": 2.9998382977967552e-05, "loss": 0.2247, "step": 1950 }, { "epoch": 0.03455130674246348, "grad_norm": 2.0630414485931396, "learning_rate": 2.999837032053833e-05, "loss": 0.1774, "step": 1951 }, { "epoch": 0.03456901627949192, "grad_norm": 1.7807891368865967, "learning_rate": 2.999835761376599e-05, "loss": 0.2288, "step": 1952 }, { "epoch": 0.03458672581652034, "grad_norm": 2.0545341968536377, "learning_rate": 2.9998344857650564e-05, "loss": 0.155, "step": 1953 }, { "epoch": 0.03460443535354877, "grad_norm": 3.3600032329559326, "learning_rate": 2.9998332052192105e-05, "loss": 0.258, "step": 1954 }, { "epoch": 0.034622144890577196, "grad_norm": 2.2468338012695312, "learning_rate": 2.9998319197390646e-05, "loss": 0.192, "step": 1955 }, { "epoch": 0.03463985442760563, "grad_norm": 2.5133471488952637, "learning_rate": 2.9998306293246235e-05, "loss": 0.2902, "step": 1956 }, { "epoch": 0.034657563964634056, "grad_norm": 2.234950065612793, "learning_rate": 2.9998293339758913e-05, "loss": 0.221, "step": 1957 }, { "epoch": 0.03467527350166248, "grad_norm": 2.5470643043518066, "learning_rate": 2.9998280336928726e-05, "loss": 0.1827, "step": 1958 }, { "epoch": 0.03469298303869091, "grad_norm": 2.3557045459747314, "learning_rate": 2.9998267284755713e-05, "loss": 0.1907, "step": 1959 }, { "epoch": 0.03471069257571934, "grad_norm": 1.7972513437271118, "learning_rate": 2.9998254183239916e-05, "loss": 0.2221, "step": 1960 }, { "epoch": 0.03472840211274777, "grad_norm": 2.3487436771392822, "learning_rate": 2.999824103238138e-05, "loss": 0.1995, "step": 1961 }, { "epoch": 0.034746111649776194, "grad_norm": 1.8072688579559326, "learning_rate": 2.999822783218015e-05, "loss": 0.1826, "step": 1962 }, { "epoch": 0.03476382118680462, "grad_norm": 2.6454315185546875, "learning_rate": 2.9998214582636267e-05, "loss": 0.1821, "step": 1963 }, { "epoch": 0.034781530723833054, "grad_norm": 2.324018955230713, "learning_rate": 2.9998201283749772e-05, "loss": 0.2749, "step": 1964 }, { "epoch": 0.03479924026086148, "grad_norm": 2.50747013092041, "learning_rate": 2.9998187935520712e-05, "loss": 0.2233, "step": 1965 }, { "epoch": 0.03481694979788991, "grad_norm": 1.9132124185562134, "learning_rate": 2.9998174537949133e-05, "loss": 0.2592, "step": 1966 }, { "epoch": 0.03483465933491834, "grad_norm": 2.249924898147583, "learning_rate": 2.999816109103508e-05, "loss": 0.2236, "step": 1967 }, { "epoch": 0.034852368871946766, "grad_norm": 2.2372941970825195, "learning_rate": 2.9998147594778586e-05, "loss": 0.1875, "step": 1968 }, { "epoch": 0.03487007840897519, "grad_norm": 3.327547788619995, "learning_rate": 2.9998134049179712e-05, "loss": 0.2402, "step": 1969 }, { "epoch": 0.03488778794600362, "grad_norm": 4.652193546295166, "learning_rate": 2.999812045423849e-05, "loss": 0.2067, "step": 1970 }, { "epoch": 0.03490549748303205, "grad_norm": 2.3963441848754883, "learning_rate": 2.9998106809954964e-05, "loss": 0.1865, "step": 1971 }, { "epoch": 0.03492320702006048, "grad_norm": 7.655791282653809, "learning_rate": 2.999809311632919e-05, "loss": 0.2305, "step": 1972 }, { "epoch": 0.034940916557088905, "grad_norm": 2.212611436843872, "learning_rate": 2.9998079373361206e-05, "loss": 0.1869, "step": 1973 }, { "epoch": 0.03495862609411733, "grad_norm": 4.0419158935546875, "learning_rate": 2.9998065581051054e-05, "loss": 0.1833, "step": 1974 }, { "epoch": 0.034976335631145765, "grad_norm": 2.4496917724609375, "learning_rate": 2.9998051739398786e-05, "loss": 0.2052, "step": 1975 }, { "epoch": 0.03499404516817419, "grad_norm": 3.100717544555664, "learning_rate": 2.9998037848404444e-05, "loss": 0.1864, "step": 1976 }, { "epoch": 0.03501175470520262, "grad_norm": 2.9184799194335938, "learning_rate": 2.9998023908068074e-05, "loss": 0.2012, "step": 1977 }, { "epoch": 0.03502946424223105, "grad_norm": 2.310774326324463, "learning_rate": 2.9998009918389724e-05, "loss": 0.16, "step": 1978 }, { "epoch": 0.03504717377925948, "grad_norm": 2.2080941200256348, "learning_rate": 2.999799587936944e-05, "loss": 0.1731, "step": 1979 }, { "epoch": 0.035064883316287904, "grad_norm": 2.649616003036499, "learning_rate": 2.9997981791007262e-05, "loss": 0.2207, "step": 1980 }, { "epoch": 0.03508259285331633, "grad_norm": 4.088893413543701, "learning_rate": 2.999796765330324e-05, "loss": 0.2079, "step": 1981 }, { "epoch": 0.03510030239034476, "grad_norm": 2.19524884223938, "learning_rate": 2.9997953466257422e-05, "loss": 0.1934, "step": 1982 }, { "epoch": 0.03511801192737319, "grad_norm": 2.287811040878296, "learning_rate": 2.999793922986986e-05, "loss": 0.2172, "step": 1983 }, { "epoch": 0.035135721464401616, "grad_norm": 2.4948315620422363, "learning_rate": 2.9997924944140587e-05, "loss": 0.1895, "step": 1984 }, { "epoch": 0.03515343100143004, "grad_norm": 4.159839630126953, "learning_rate": 2.9997910609069663e-05, "loss": 0.2322, "step": 1985 }, { "epoch": 0.035171140538458476, "grad_norm": 2.0421738624572754, "learning_rate": 2.9997896224657128e-05, "loss": 0.2007, "step": 1986 }, { "epoch": 0.0351888500754869, "grad_norm": 2.2701480388641357, "learning_rate": 2.9997881790903033e-05, "loss": 0.1852, "step": 1987 }, { "epoch": 0.03520655961251533, "grad_norm": 3.899777889251709, "learning_rate": 2.9997867307807422e-05, "loss": 0.1974, "step": 1988 }, { "epoch": 0.035224269149543755, "grad_norm": 1.9315680265426636, "learning_rate": 2.9997852775370346e-05, "loss": 0.2094, "step": 1989 }, { "epoch": 0.03524197868657219, "grad_norm": 2.104356527328491, "learning_rate": 2.999783819359185e-05, "loss": 0.23, "step": 1990 }, { "epoch": 0.035259688223600615, "grad_norm": 2.511618137359619, "learning_rate": 2.9997823562471987e-05, "loss": 0.1943, "step": 1991 }, { "epoch": 0.03527739776062904, "grad_norm": 3.012556314468384, "learning_rate": 2.9997808882010798e-05, "loss": 0.2293, "step": 1992 }, { "epoch": 0.035295107297657474, "grad_norm": 4.350891590118408, "learning_rate": 2.9997794152208334e-05, "loss": 0.2478, "step": 1993 }, { "epoch": 0.0353128168346859, "grad_norm": 2.664389133453369, "learning_rate": 2.9997779373064645e-05, "loss": 0.2406, "step": 1994 }, { "epoch": 0.03533052637171433, "grad_norm": 5.565785884857178, "learning_rate": 2.999776454457978e-05, "loss": 0.1921, "step": 1995 }, { "epoch": 0.03534823590874275, "grad_norm": 2.602003812789917, "learning_rate": 2.9997749666753785e-05, "loss": 0.2236, "step": 1996 }, { "epoch": 0.03536594544577119, "grad_norm": 2.151245355606079, "learning_rate": 2.999773473958671e-05, "loss": 0.2144, "step": 1997 }, { "epoch": 0.03538365498279961, "grad_norm": 1.8347917795181274, "learning_rate": 2.9997719763078607e-05, "loss": 0.2177, "step": 1998 }, { "epoch": 0.03540136451982804, "grad_norm": 2.812134265899658, "learning_rate": 2.9997704737229522e-05, "loss": 0.172, "step": 1999 }, { "epoch": 0.035419074056856466, "grad_norm": 1.947991132736206, "learning_rate": 2.9997689662039504e-05, "loss": 0.1609, "step": 2000 }, { "epoch": 0.0354367835938849, "grad_norm": 3.080084800720215, "learning_rate": 2.9997674537508607e-05, "loss": 0.2191, "step": 2001 }, { "epoch": 0.035454493130913325, "grad_norm": 2.5415093898773193, "learning_rate": 2.999765936363688e-05, "loss": 0.225, "step": 2002 }, { "epoch": 0.03547220266794175, "grad_norm": 2.1830427646636963, "learning_rate": 2.999764414042436e-05, "loss": 0.2155, "step": 2003 }, { "epoch": 0.03548991220497018, "grad_norm": 2.039956569671631, "learning_rate": 2.999762886787112e-05, "loss": 0.181, "step": 2004 }, { "epoch": 0.03550762174199861, "grad_norm": 2.846949815750122, "learning_rate": 2.9997613545977188e-05, "loss": 0.25, "step": 2005 }, { "epoch": 0.03552533127902704, "grad_norm": 2.6948885917663574, "learning_rate": 2.9997598174742626e-05, "loss": 0.1933, "step": 2006 }, { "epoch": 0.035543040816055464, "grad_norm": 1.9354041814804077, "learning_rate": 2.999758275416749e-05, "loss": 0.1796, "step": 2007 }, { "epoch": 0.0355607503530839, "grad_norm": 2.799272060394287, "learning_rate": 2.9997567284251818e-05, "loss": 0.1993, "step": 2008 }, { "epoch": 0.035578459890112324, "grad_norm": 2.059406280517578, "learning_rate": 2.9997551764995667e-05, "loss": 0.2044, "step": 2009 }, { "epoch": 0.03559616942714075, "grad_norm": 3.8309788703918457, "learning_rate": 2.999753619639909e-05, "loss": 0.242, "step": 2010 }, { "epoch": 0.03561387896416918, "grad_norm": 2.1763038635253906, "learning_rate": 2.999752057846213e-05, "loss": 0.1844, "step": 2011 }, { "epoch": 0.03563158850119761, "grad_norm": 2.199199914932251, "learning_rate": 2.999750491118485e-05, "loss": 0.2292, "step": 2012 }, { "epoch": 0.035649298038226036, "grad_norm": 2.0327227115631104, "learning_rate": 2.9997489194567295e-05, "loss": 0.2419, "step": 2013 }, { "epoch": 0.03566700757525446, "grad_norm": 1.9814680814743042, "learning_rate": 2.9997473428609515e-05, "loss": 0.1585, "step": 2014 }, { "epoch": 0.03568471711228289, "grad_norm": 2.292417049407959, "learning_rate": 2.9997457613311564e-05, "loss": 0.2555, "step": 2015 }, { "epoch": 0.03570242664931132, "grad_norm": 3.601030111312866, "learning_rate": 2.99974417486735e-05, "loss": 0.1832, "step": 2016 }, { "epoch": 0.03572013618633975, "grad_norm": 2.023629665374756, "learning_rate": 2.9997425834695364e-05, "loss": 0.1635, "step": 2017 }, { "epoch": 0.035737845723368175, "grad_norm": 3.0491158962249756, "learning_rate": 2.9997409871377213e-05, "loss": 0.1756, "step": 2018 }, { "epoch": 0.03575555526039661, "grad_norm": 2.2002952098846436, "learning_rate": 2.9997393858719108e-05, "loss": 0.1799, "step": 2019 }, { "epoch": 0.035773264797425035, "grad_norm": 2.0409786701202393, "learning_rate": 2.999737779672109e-05, "loss": 0.1809, "step": 2020 }, { "epoch": 0.03579097433445346, "grad_norm": 3.2536566257476807, "learning_rate": 2.999736168538321e-05, "loss": 0.233, "step": 2021 }, { "epoch": 0.03580868387148189, "grad_norm": 2.63645601272583, "learning_rate": 2.9997345524705535e-05, "loss": 0.2629, "step": 2022 }, { "epoch": 0.03582639340851032, "grad_norm": 2.791656732559204, "learning_rate": 2.9997329314688106e-05, "loss": 0.2061, "step": 2023 }, { "epoch": 0.03584410294553875, "grad_norm": 2.1786139011383057, "learning_rate": 2.9997313055330978e-05, "loss": 0.1752, "step": 2024 }, { "epoch": 0.035861812482567174, "grad_norm": 2.0283913612365723, "learning_rate": 2.9997296746634214e-05, "loss": 0.2037, "step": 2025 }, { "epoch": 0.0358795220195956, "grad_norm": 2.6042935848236084, "learning_rate": 2.9997280388597856e-05, "loss": 0.2204, "step": 2026 }, { "epoch": 0.03589723155662403, "grad_norm": 2.8785743713378906, "learning_rate": 2.9997263981221964e-05, "loss": 0.1815, "step": 2027 }, { "epoch": 0.03591494109365246, "grad_norm": 1.5539253950119019, "learning_rate": 2.999724752450659e-05, "loss": 0.1974, "step": 2028 }, { "epoch": 0.035932650630680886, "grad_norm": 2.5421676635742188, "learning_rate": 2.999723101845179e-05, "loss": 0.1888, "step": 2029 }, { "epoch": 0.03595036016770931, "grad_norm": 1.8660677671432495, "learning_rate": 2.9997214463057615e-05, "loss": 0.1483, "step": 2030 }, { "epoch": 0.035968069704737746, "grad_norm": 2.6479218006134033, "learning_rate": 2.999719785832412e-05, "loss": 0.2296, "step": 2031 }, { "epoch": 0.03598577924176617, "grad_norm": 1.7494897842407227, "learning_rate": 2.9997181204251363e-05, "loss": 0.18, "step": 2032 }, { "epoch": 0.0360034887787946, "grad_norm": 1.8461532592773438, "learning_rate": 2.9997164500839394e-05, "loss": 0.1995, "step": 2033 }, { "epoch": 0.03602119831582303, "grad_norm": 1.9784257411956787, "learning_rate": 2.999714774808827e-05, "loss": 0.1913, "step": 2034 }, { "epoch": 0.03603890785285146, "grad_norm": 2.702082872390747, "learning_rate": 2.9997130945998054e-05, "loss": 0.1661, "step": 2035 }, { "epoch": 0.036056617389879884, "grad_norm": 2.117901563644409, "learning_rate": 2.999711409456879e-05, "loss": 0.1472, "step": 2036 }, { "epoch": 0.03607432692690831, "grad_norm": 1.6648008823394775, "learning_rate": 2.9997097193800535e-05, "loss": 0.1792, "step": 2037 }, { "epoch": 0.036092036463936744, "grad_norm": 2.0051381587982178, "learning_rate": 2.999708024369335e-05, "loss": 0.1594, "step": 2038 }, { "epoch": 0.03610974600096517, "grad_norm": 1.5916060209274292, "learning_rate": 2.9997063244247287e-05, "loss": 0.1688, "step": 2039 }, { "epoch": 0.0361274555379936, "grad_norm": 2.465602397918701, "learning_rate": 2.9997046195462404e-05, "loss": 0.1844, "step": 2040 }, { "epoch": 0.03614516507502202, "grad_norm": 2.2449374198913574, "learning_rate": 2.9997029097338753e-05, "loss": 0.1736, "step": 2041 }, { "epoch": 0.03616287461205046, "grad_norm": 1.8395166397094727, "learning_rate": 2.9997011949876395e-05, "loss": 0.154, "step": 2042 }, { "epoch": 0.03618058414907888, "grad_norm": 2.5713000297546387, "learning_rate": 2.9996994753075385e-05, "loss": 0.2321, "step": 2043 }, { "epoch": 0.03619829368610731, "grad_norm": 1.7518560886383057, "learning_rate": 2.9996977506935776e-05, "loss": 0.1389, "step": 2044 }, { "epoch": 0.03621600322313574, "grad_norm": 1.9239754676818848, "learning_rate": 2.9996960211457633e-05, "loss": 0.2043, "step": 2045 }, { "epoch": 0.03623371276016417, "grad_norm": 4.072599411010742, "learning_rate": 2.9996942866641003e-05, "loss": 0.2542, "step": 2046 }, { "epoch": 0.036251422297192595, "grad_norm": 1.9529670476913452, "learning_rate": 2.999692547248595e-05, "loss": 0.1562, "step": 2047 }, { "epoch": 0.03626913183422102, "grad_norm": 2.4668920040130615, "learning_rate": 2.999690802899253e-05, "loss": 0.2111, "step": 2048 }, { "epoch": 0.036286841371249455, "grad_norm": 2.4784045219421387, "learning_rate": 2.9996890536160797e-05, "loss": 0.1803, "step": 2049 }, { "epoch": 0.03630455090827788, "grad_norm": 3.884716033935547, "learning_rate": 2.9996872993990813e-05, "loss": 0.1861, "step": 2050 }, { "epoch": 0.03632226044530631, "grad_norm": 2.8544509410858154, "learning_rate": 2.9996855402482634e-05, "loss": 0.2032, "step": 2051 }, { "epoch": 0.036339969982334734, "grad_norm": 2.010774850845337, "learning_rate": 2.9996837761636317e-05, "loss": 0.1949, "step": 2052 }, { "epoch": 0.03635767951936317, "grad_norm": 2.55561900138855, "learning_rate": 2.999682007145192e-05, "loss": 0.2173, "step": 2053 }, { "epoch": 0.036375389056391594, "grad_norm": 1.6775516271591187, "learning_rate": 2.9996802331929504e-05, "loss": 0.1776, "step": 2054 }, { "epoch": 0.03639309859342002, "grad_norm": 1.6130657196044922, "learning_rate": 2.999678454306912e-05, "loss": 0.1811, "step": 2055 }, { "epoch": 0.03641080813044845, "grad_norm": 2.2669739723205566, "learning_rate": 2.9996766704870835e-05, "loss": 0.1495, "step": 2056 }, { "epoch": 0.03642851766747688, "grad_norm": 3.5107264518737793, "learning_rate": 2.9996748817334707e-05, "loss": 0.1723, "step": 2057 }, { "epoch": 0.036446227204505306, "grad_norm": 2.4429380893707275, "learning_rate": 2.9996730880460786e-05, "loss": 0.1642, "step": 2058 }, { "epoch": 0.03646393674153373, "grad_norm": 2.293142557144165, "learning_rate": 2.9996712894249142e-05, "loss": 0.1891, "step": 2059 }, { "epoch": 0.036481646278562166, "grad_norm": 2.7423863410949707, "learning_rate": 2.9996694858699827e-05, "loss": 0.2694, "step": 2060 }, { "epoch": 0.03649935581559059, "grad_norm": 2.168126344680786, "learning_rate": 2.9996676773812907e-05, "loss": 0.2321, "step": 2061 }, { "epoch": 0.03651706535261902, "grad_norm": 2.2360236644744873, "learning_rate": 2.999665863958843e-05, "loss": 0.1788, "step": 2062 }, { "epoch": 0.036534774889647445, "grad_norm": 2.937854051589966, "learning_rate": 2.999664045602647e-05, "loss": 0.1799, "step": 2063 }, { "epoch": 0.03655248442667588, "grad_norm": 2.1753885746002197, "learning_rate": 2.9996622223127076e-05, "loss": 0.1673, "step": 2064 }, { "epoch": 0.036570193963704305, "grad_norm": 5.372445106506348, "learning_rate": 2.999660394089031e-05, "loss": 0.2321, "step": 2065 }, { "epoch": 0.03658790350073273, "grad_norm": 2.3216092586517334, "learning_rate": 2.999658560931624e-05, "loss": 0.1911, "step": 2066 }, { "epoch": 0.03660561303776116, "grad_norm": 2.9195635318756104, "learning_rate": 2.9996567228404914e-05, "loss": 0.1771, "step": 2067 }, { "epoch": 0.03662332257478959, "grad_norm": 3.343878984451294, "learning_rate": 2.99965487981564e-05, "loss": 0.1584, "step": 2068 }, { "epoch": 0.03664103211181802, "grad_norm": 2.2139642238616943, "learning_rate": 2.9996530318570757e-05, "loss": 0.1972, "step": 2069 }, { "epoch": 0.036658741648846443, "grad_norm": 2.582723379135132, "learning_rate": 2.9996511789648047e-05, "loss": 0.2215, "step": 2070 }, { "epoch": 0.03667645118587487, "grad_norm": 1.6840053796768188, "learning_rate": 2.999649321138833e-05, "loss": 0.1434, "step": 2071 }, { "epoch": 0.0366941607229033, "grad_norm": 1.8134186267852783, "learning_rate": 2.9996474583791674e-05, "loss": 0.219, "step": 2072 }, { "epoch": 0.03671187025993173, "grad_norm": 2.3848955631256104, "learning_rate": 2.9996455906858125e-05, "loss": 0.1935, "step": 2073 }, { "epoch": 0.036729579796960156, "grad_norm": 2.937896490097046, "learning_rate": 2.9996437180587754e-05, "loss": 0.216, "step": 2074 }, { "epoch": 0.03674728933398859, "grad_norm": 1.9299174547195435, "learning_rate": 2.9996418404980624e-05, "loss": 0.1789, "step": 2075 }, { "epoch": 0.036764998871017016, "grad_norm": 2.3488876819610596, "learning_rate": 2.9996399580036792e-05, "loss": 0.2281, "step": 2076 }, { "epoch": 0.03678270840804544, "grad_norm": 2.6578235626220703, "learning_rate": 2.999638070575632e-05, "loss": 0.1937, "step": 2077 }, { "epoch": 0.03680041794507387, "grad_norm": 2.335752248764038, "learning_rate": 2.9996361782139276e-05, "loss": 0.2436, "step": 2078 }, { "epoch": 0.0368181274821023, "grad_norm": 2.0036134719848633, "learning_rate": 2.999634280918572e-05, "loss": 0.1657, "step": 2079 }, { "epoch": 0.03683583701913073, "grad_norm": 2.2354142665863037, "learning_rate": 2.999632378689571e-05, "loss": 0.169, "step": 2080 }, { "epoch": 0.036853546556159154, "grad_norm": 1.973707675933838, "learning_rate": 2.9996304715269312e-05, "loss": 0.1916, "step": 2081 }, { "epoch": 0.03687125609318758, "grad_norm": 2.0526814460754395, "learning_rate": 2.9996285594306585e-05, "loss": 0.21, "step": 2082 }, { "epoch": 0.036888965630216014, "grad_norm": 2.7715415954589844, "learning_rate": 2.99962664240076e-05, "loss": 0.2138, "step": 2083 }, { "epoch": 0.03690667516724444, "grad_norm": 2.2397232055664062, "learning_rate": 2.9996247204372412e-05, "loss": 0.2069, "step": 2084 }, { "epoch": 0.03692438470427287, "grad_norm": 2.255136728286743, "learning_rate": 2.9996227935401092e-05, "loss": 0.2294, "step": 2085 }, { "epoch": 0.0369420942413013, "grad_norm": 1.6952934265136719, "learning_rate": 2.9996208617093695e-05, "loss": 0.1894, "step": 2086 }, { "epoch": 0.036959803778329726, "grad_norm": 1.9313265085220337, "learning_rate": 2.999618924945029e-05, "loss": 0.1661, "step": 2087 }, { "epoch": 0.03697751331535815, "grad_norm": 2.297537326812744, "learning_rate": 2.999616983247094e-05, "loss": 0.155, "step": 2088 }, { "epoch": 0.03699522285238658, "grad_norm": 1.9605461359024048, "learning_rate": 2.9996150366155703e-05, "loss": 0.1793, "step": 2089 }, { "epoch": 0.03701293238941501, "grad_norm": 2.0162174701690674, "learning_rate": 2.9996130850504652e-05, "loss": 0.1601, "step": 2090 }, { "epoch": 0.03703064192644344, "grad_norm": 2.183216094970703, "learning_rate": 2.9996111285517848e-05, "loss": 0.2237, "step": 2091 }, { "epoch": 0.037048351463471865, "grad_norm": 2.607102394104004, "learning_rate": 2.9996091671195356e-05, "loss": 0.2068, "step": 2092 }, { "epoch": 0.03706606100050029, "grad_norm": 2.512045383453369, "learning_rate": 2.9996072007537234e-05, "loss": 0.2249, "step": 2093 }, { "epoch": 0.037083770537528725, "grad_norm": 1.795575499534607, "learning_rate": 2.9996052294543555e-05, "loss": 0.2019, "step": 2094 }, { "epoch": 0.03710148007455715, "grad_norm": 2.9776177406311035, "learning_rate": 2.9996032532214385e-05, "loss": 0.2105, "step": 2095 }, { "epoch": 0.03711918961158558, "grad_norm": 3.4469921588897705, "learning_rate": 2.9996012720549774e-05, "loss": 0.2232, "step": 2096 }, { "epoch": 0.037136899148614004, "grad_norm": 1.8495793342590332, "learning_rate": 2.9995992859549807e-05, "loss": 0.2152, "step": 2097 }, { "epoch": 0.03715460868564244, "grad_norm": 3.380619764328003, "learning_rate": 2.9995972949214535e-05, "loss": 0.1827, "step": 2098 }, { "epoch": 0.037172318222670864, "grad_norm": 2.2560346126556396, "learning_rate": 2.9995952989544032e-05, "loss": 0.2217, "step": 2099 }, { "epoch": 0.03719002775969929, "grad_norm": 1.874945878982544, "learning_rate": 2.999593298053836e-05, "loss": 0.1868, "step": 2100 }, { "epoch": 0.03720773729672772, "grad_norm": 2.366464376449585, "learning_rate": 2.9995912922197585e-05, "loss": 0.2029, "step": 2101 }, { "epoch": 0.03722544683375615, "grad_norm": 2.067063808441162, "learning_rate": 2.9995892814521773e-05, "loss": 0.2424, "step": 2102 }, { "epoch": 0.037243156370784576, "grad_norm": 2.891650438308716, "learning_rate": 2.9995872657510987e-05, "loss": 0.1901, "step": 2103 }, { "epoch": 0.037260865907813, "grad_norm": 2.958216667175293, "learning_rate": 2.9995852451165303e-05, "loss": 0.2063, "step": 2104 }, { "epoch": 0.037278575444841436, "grad_norm": 2.1563339233398438, "learning_rate": 2.9995832195484778e-05, "loss": 0.231, "step": 2105 }, { "epoch": 0.03729628498186986, "grad_norm": 3.63415789604187, "learning_rate": 2.9995811890469483e-05, "loss": 0.1862, "step": 2106 }, { "epoch": 0.03731399451889829, "grad_norm": 2.690019369125366, "learning_rate": 2.999579153611948e-05, "loss": 0.2184, "step": 2107 }, { "epoch": 0.037331704055926715, "grad_norm": 1.8326890468597412, "learning_rate": 2.999577113243484e-05, "loss": 0.1586, "step": 2108 }, { "epoch": 0.03734941359295515, "grad_norm": 2.395220994949341, "learning_rate": 2.9995750679415635e-05, "loss": 0.1556, "step": 2109 }, { "epoch": 0.037367123129983575, "grad_norm": 1.951304316520691, "learning_rate": 2.999573017706192e-05, "loss": 0.2235, "step": 2110 }, { "epoch": 0.037384832667012, "grad_norm": 3.392333507537842, "learning_rate": 2.9995709625373776e-05, "loss": 0.2427, "step": 2111 }, { "epoch": 0.037402542204040434, "grad_norm": 4.332752227783203, "learning_rate": 2.9995689024351264e-05, "loss": 0.1845, "step": 2112 }, { "epoch": 0.03742025174106886, "grad_norm": 1.8884210586547852, "learning_rate": 2.9995668373994446e-05, "loss": 0.24, "step": 2113 }, { "epoch": 0.03743796127809729, "grad_norm": 2.6878159046173096, "learning_rate": 2.99956476743034e-05, "loss": 0.2298, "step": 2114 }, { "epoch": 0.03745567081512571, "grad_norm": 3.381054639816284, "learning_rate": 2.9995626925278187e-05, "loss": 0.1975, "step": 2115 }, { "epoch": 0.03747338035215415, "grad_norm": 2.290027141571045, "learning_rate": 2.9995606126918883e-05, "loss": 0.1766, "step": 2116 }, { "epoch": 0.03749108988918257, "grad_norm": 1.8865522146224976, "learning_rate": 2.9995585279225546e-05, "loss": 0.205, "step": 2117 }, { "epoch": 0.037508799426211, "grad_norm": 3.1949384212493896, "learning_rate": 2.9995564382198253e-05, "loss": 0.1757, "step": 2118 }, { "epoch": 0.037526508963239426, "grad_norm": 3.2540626525878906, "learning_rate": 2.999554343583707e-05, "loss": 0.1958, "step": 2119 }, { "epoch": 0.03754421850026786, "grad_norm": 1.7555750608444214, "learning_rate": 2.9995522440142065e-05, "loss": 0.2021, "step": 2120 }, { "epoch": 0.037561928037296285, "grad_norm": 2.222078323364258, "learning_rate": 2.9995501395113308e-05, "loss": 0.2258, "step": 2121 }, { "epoch": 0.03757963757432471, "grad_norm": 2.064394950866699, "learning_rate": 2.999548030075087e-05, "loss": 0.1803, "step": 2122 }, { "epoch": 0.03759734711135314, "grad_norm": 2.157418966293335, "learning_rate": 2.9995459157054815e-05, "loss": 0.2345, "step": 2123 }, { "epoch": 0.03761505664838157, "grad_norm": 2.857867479324341, "learning_rate": 2.999543796402522e-05, "loss": 0.2003, "step": 2124 }, { "epoch": 0.03763276618541, "grad_norm": 2.5981411933898926, "learning_rate": 2.9995416721662145e-05, "loss": 0.1885, "step": 2125 }, { "epoch": 0.037650475722438424, "grad_norm": 2.3462579250335693, "learning_rate": 2.9995395429965675e-05, "loss": 0.193, "step": 2126 }, { "epoch": 0.03766818525946686, "grad_norm": 2.1109650135040283, "learning_rate": 2.9995374088935864e-05, "loss": 0.1808, "step": 2127 }, { "epoch": 0.037685894796495284, "grad_norm": 2.7621572017669678, "learning_rate": 2.9995352698572788e-05, "loss": 0.1827, "step": 2128 }, { "epoch": 0.03770360433352371, "grad_norm": 1.5855622291564941, "learning_rate": 2.999533125887652e-05, "loss": 0.2147, "step": 2129 }, { "epoch": 0.03772131387055214, "grad_norm": 2.183319330215454, "learning_rate": 2.999530976984713e-05, "loss": 0.2036, "step": 2130 }, { "epoch": 0.03773902340758057, "grad_norm": 1.782381534576416, "learning_rate": 2.9995288231484687e-05, "loss": 0.1998, "step": 2131 }, { "epoch": 0.037756732944608996, "grad_norm": 1.579725742340088, "learning_rate": 2.9995266643789263e-05, "loss": 0.1759, "step": 2132 }, { "epoch": 0.03777444248163742, "grad_norm": 2.1226513385772705, "learning_rate": 2.999524500676093e-05, "loss": 0.1665, "step": 2133 }, { "epoch": 0.03779215201866585, "grad_norm": 2.311689853668213, "learning_rate": 2.999522332039975e-05, "loss": 0.2116, "step": 2134 }, { "epoch": 0.03780986155569428, "grad_norm": 1.3594621419906616, "learning_rate": 2.999520158470581e-05, "loss": 0.173, "step": 2135 }, { "epoch": 0.03782757109272271, "grad_norm": 2.917381763458252, "learning_rate": 2.9995179799679167e-05, "loss": 0.1866, "step": 2136 }, { "epoch": 0.037845280629751135, "grad_norm": 2.3791885375976562, "learning_rate": 2.9995157965319905e-05, "loss": 0.1459, "step": 2137 }, { "epoch": 0.03786299016677956, "grad_norm": 2.2416162490844727, "learning_rate": 2.9995136081628086e-05, "loss": 0.2272, "step": 2138 }, { "epoch": 0.037880699703807995, "grad_norm": 2.2548937797546387, "learning_rate": 2.999511414860379e-05, "loss": 0.1922, "step": 2139 }, { "epoch": 0.03789840924083642, "grad_norm": 2.2965683937072754, "learning_rate": 2.9995092166247084e-05, "loss": 0.1648, "step": 2140 }, { "epoch": 0.03791611877786485, "grad_norm": 2.1284711360931396, "learning_rate": 2.9995070134558036e-05, "loss": 0.2086, "step": 2141 }, { "epoch": 0.03793382831489328, "grad_norm": 1.9862658977508545, "learning_rate": 2.9995048053536732e-05, "loss": 0.1767, "step": 2142 }, { "epoch": 0.03795153785192171, "grad_norm": 2.328784942626953, "learning_rate": 2.999502592318323e-05, "loss": 0.2341, "step": 2143 }, { "epoch": 0.037969247388950134, "grad_norm": 3.4253361225128174, "learning_rate": 2.9995003743497615e-05, "loss": 0.2008, "step": 2144 }, { "epoch": 0.03798695692597856, "grad_norm": 3.4684267044067383, "learning_rate": 2.9994981514479946e-05, "loss": 0.1967, "step": 2145 }, { "epoch": 0.03800466646300699, "grad_norm": 2.581272602081299, "learning_rate": 2.9994959236130313e-05, "loss": 0.2333, "step": 2146 }, { "epoch": 0.03802237600003542, "grad_norm": 2.0961685180664062, "learning_rate": 2.9994936908448777e-05, "loss": 0.1268, "step": 2147 }, { "epoch": 0.038040085537063846, "grad_norm": 1.8199667930603027, "learning_rate": 2.9994914531435415e-05, "loss": 0.2284, "step": 2148 }, { "epoch": 0.03805779507409227, "grad_norm": 1.9010655879974365, "learning_rate": 2.99948921050903e-05, "loss": 0.1772, "step": 2149 }, { "epoch": 0.038075504611120706, "grad_norm": 2.757153034210205, "learning_rate": 2.9994869629413512e-05, "loss": 0.1926, "step": 2150 }, { "epoch": 0.03809321414814913, "grad_norm": 3.069042682647705, "learning_rate": 2.9994847104405117e-05, "loss": 0.1913, "step": 2151 }, { "epoch": 0.03811092368517756, "grad_norm": 2.2231621742248535, "learning_rate": 2.999482453006519e-05, "loss": 0.212, "step": 2152 }, { "epoch": 0.03812863322220599, "grad_norm": 2.5834872722625732, "learning_rate": 2.9994801906393805e-05, "loss": 0.202, "step": 2153 }, { "epoch": 0.03814634275923442, "grad_norm": 1.5341964960098267, "learning_rate": 2.999477923339104e-05, "loss": 0.1805, "step": 2154 }, { "epoch": 0.038164052296262845, "grad_norm": 2.346238374710083, "learning_rate": 2.999475651105697e-05, "loss": 0.2324, "step": 2155 }, { "epoch": 0.03818176183329127, "grad_norm": 2.2154808044433594, "learning_rate": 2.9994733739391668e-05, "loss": 0.2093, "step": 2156 }, { "epoch": 0.038199471370319704, "grad_norm": 1.950007438659668, "learning_rate": 2.999471091839521e-05, "loss": 0.1805, "step": 2157 }, { "epoch": 0.03821718090734813, "grad_norm": 2.548402786254883, "learning_rate": 2.9994688048067663e-05, "loss": 0.2741, "step": 2158 }, { "epoch": 0.03823489044437656, "grad_norm": 1.9700485467910767, "learning_rate": 2.999466512840911e-05, "loss": 0.1495, "step": 2159 }, { "epoch": 0.03825259998140498, "grad_norm": 1.9244680404663086, "learning_rate": 2.999464215941963e-05, "loss": 0.2085, "step": 2160 }, { "epoch": 0.03827030951843342, "grad_norm": 2.133146047592163, "learning_rate": 2.999461914109929e-05, "loss": 0.181, "step": 2161 }, { "epoch": 0.03828801905546184, "grad_norm": 3.737030029296875, "learning_rate": 2.9994596073448177e-05, "loss": 0.1943, "step": 2162 }, { "epoch": 0.03830572859249027, "grad_norm": 2.0689969062805176, "learning_rate": 2.9994572956466354e-05, "loss": 0.2198, "step": 2163 }, { "epoch": 0.038323438129518696, "grad_norm": 1.9442510604858398, "learning_rate": 2.99945497901539e-05, "loss": 0.172, "step": 2164 }, { "epoch": 0.03834114766654713, "grad_norm": 2.188572883605957, "learning_rate": 2.9994526574510898e-05, "loss": 0.1521, "step": 2165 }, { "epoch": 0.038358857203575555, "grad_norm": 1.784666657447815, "learning_rate": 2.9994503309537417e-05, "loss": 0.1532, "step": 2166 }, { "epoch": 0.03837656674060398, "grad_norm": 1.5715923309326172, "learning_rate": 2.9994479995233536e-05, "loss": 0.1706, "step": 2167 }, { "epoch": 0.038394276277632415, "grad_norm": 2.170325517654419, "learning_rate": 2.9994456631599338e-05, "loss": 0.2287, "step": 2168 }, { "epoch": 0.03841198581466084, "grad_norm": 1.4810506105422974, "learning_rate": 2.999443321863489e-05, "loss": 0.1696, "step": 2169 }, { "epoch": 0.03842969535168927, "grad_norm": 2.431321859359741, "learning_rate": 2.9994409756340272e-05, "loss": 0.1899, "step": 2170 }, { "epoch": 0.038447404888717694, "grad_norm": 2.306309700012207, "learning_rate": 2.9994386244715567e-05, "loss": 0.193, "step": 2171 }, { "epoch": 0.03846511442574613, "grad_norm": 2.282731056213379, "learning_rate": 2.9994362683760846e-05, "loss": 0.1773, "step": 2172 }, { "epoch": 0.038482823962774554, "grad_norm": 1.6951574087142944, "learning_rate": 2.999433907347619e-05, "loss": 0.1766, "step": 2173 }, { "epoch": 0.03850053349980298, "grad_norm": 1.953503131866455, "learning_rate": 2.999431541386167e-05, "loss": 0.1896, "step": 2174 }, { "epoch": 0.03851824303683141, "grad_norm": 2.5856783390045166, "learning_rate": 2.9994291704917377e-05, "loss": 0.1761, "step": 2175 }, { "epoch": 0.03853595257385984, "grad_norm": 2.046098232269287, "learning_rate": 2.9994267946643373e-05, "loss": 0.2329, "step": 2176 }, { "epoch": 0.038553662110888266, "grad_norm": 1.7810989618301392, "learning_rate": 2.9994244139039745e-05, "loss": 0.1789, "step": 2177 }, { "epoch": 0.03857137164791669, "grad_norm": 2.635258197784424, "learning_rate": 2.9994220282106572e-05, "loss": 0.1802, "step": 2178 }, { "epoch": 0.038589081184945126, "grad_norm": 2.1512997150421143, "learning_rate": 2.9994196375843935e-05, "loss": 0.232, "step": 2179 }, { "epoch": 0.03860679072197355, "grad_norm": 2.4924798011779785, "learning_rate": 2.9994172420251905e-05, "loss": 0.2019, "step": 2180 }, { "epoch": 0.03862450025900198, "grad_norm": 2.123204231262207, "learning_rate": 2.999414841533056e-05, "loss": 0.1554, "step": 2181 }, { "epoch": 0.038642209796030405, "grad_norm": 3.1940200328826904, "learning_rate": 2.9994124361079988e-05, "loss": 0.1894, "step": 2182 }, { "epoch": 0.03865991933305884, "grad_norm": 2.666325569152832, "learning_rate": 2.9994100257500264e-05, "loss": 0.1896, "step": 2183 }, { "epoch": 0.038677628870087265, "grad_norm": 2.334503173828125, "learning_rate": 2.9994076104591465e-05, "loss": 0.1465, "step": 2184 }, { "epoch": 0.03869533840711569, "grad_norm": 2.3441689014434814, "learning_rate": 2.9994051902353673e-05, "loss": 0.1988, "step": 2185 }, { "epoch": 0.03871304794414412, "grad_norm": 1.9569885730743408, "learning_rate": 2.999402765078697e-05, "loss": 0.2226, "step": 2186 }, { "epoch": 0.03873075748117255, "grad_norm": 2.240356206893921, "learning_rate": 2.9994003349891427e-05, "loss": 0.2074, "step": 2187 }, { "epoch": 0.03874846701820098, "grad_norm": 1.7760297060012817, "learning_rate": 2.999397899966713e-05, "loss": 0.1801, "step": 2188 }, { "epoch": 0.038766176555229404, "grad_norm": 2.0066521167755127, "learning_rate": 2.9993954600114158e-05, "loss": 0.19, "step": 2189 }, { "epoch": 0.03878388609225783, "grad_norm": 2.745940923690796, "learning_rate": 2.9993930151232596e-05, "loss": 0.1855, "step": 2190 }, { "epoch": 0.03880159562928626, "grad_norm": 1.705955147743225, "learning_rate": 2.999390565302252e-05, "loss": 0.1431, "step": 2191 }, { "epoch": 0.03881930516631469, "grad_norm": 1.7151196002960205, "learning_rate": 2.999388110548401e-05, "loss": 0.2073, "step": 2192 }, { "epoch": 0.038837014703343116, "grad_norm": 1.8171594142913818, "learning_rate": 2.9993856508617146e-05, "loss": 0.238, "step": 2193 }, { "epoch": 0.03885472424037155, "grad_norm": 2.191462278366089, "learning_rate": 2.9993831862422007e-05, "loss": 0.1796, "step": 2194 }, { "epoch": 0.038872433777399976, "grad_norm": 1.775893211364746, "learning_rate": 2.9993807166898684e-05, "loss": 0.1727, "step": 2195 }, { "epoch": 0.0388901433144284, "grad_norm": 2.8712313175201416, "learning_rate": 2.9993782422047252e-05, "loss": 0.2057, "step": 2196 }, { "epoch": 0.03890785285145683, "grad_norm": 2.024813175201416, "learning_rate": 2.999375762786779e-05, "loss": 0.1767, "step": 2197 }, { "epoch": 0.03892556238848526, "grad_norm": 2.0319082736968994, "learning_rate": 2.999373278436038e-05, "loss": 0.1437, "step": 2198 }, { "epoch": 0.03894327192551369, "grad_norm": 2.5832247734069824, "learning_rate": 2.9993707891525114e-05, "loss": 0.2245, "step": 2199 }, { "epoch": 0.038960981462542114, "grad_norm": 3.0693936347961426, "learning_rate": 2.9993682949362057e-05, "loss": 0.1919, "step": 2200 }, { "epoch": 0.03897869099957054, "grad_norm": 2.0259087085723877, "learning_rate": 2.99936579578713e-05, "loss": 0.1755, "step": 2201 }, { "epoch": 0.038996400536598974, "grad_norm": 2.9896109104156494, "learning_rate": 2.999363291705293e-05, "loss": 0.2272, "step": 2202 }, { "epoch": 0.0390141100736274, "grad_norm": 2.0410401821136475, "learning_rate": 2.999360782690702e-05, "loss": 0.1978, "step": 2203 }, { "epoch": 0.03903181961065583, "grad_norm": 2.061823606491089, "learning_rate": 2.999358268743366e-05, "loss": 0.1566, "step": 2204 }, { "epoch": 0.03904952914768425, "grad_norm": 2.7093026638031006, "learning_rate": 2.9993557498632927e-05, "loss": 0.1942, "step": 2205 }, { "epoch": 0.039067238684712687, "grad_norm": 2.5242319107055664, "learning_rate": 2.9993532260504906e-05, "loss": 0.2062, "step": 2206 }, { "epoch": 0.03908494822174111, "grad_norm": 2.9076335430145264, "learning_rate": 2.999350697304968e-05, "loss": 0.1793, "step": 2207 }, { "epoch": 0.03910265775876954, "grad_norm": 1.6472676992416382, "learning_rate": 2.9993481636267336e-05, "loss": 0.2445, "step": 2208 }, { "epoch": 0.03912036729579797, "grad_norm": 2.1527445316314697, "learning_rate": 2.9993456250157953e-05, "loss": 0.1447, "step": 2209 }, { "epoch": 0.0391380768328264, "grad_norm": 2.9094152450561523, "learning_rate": 2.999343081472161e-05, "loss": 0.1849, "step": 2210 }, { "epoch": 0.039155786369854825, "grad_norm": 1.6674261093139648, "learning_rate": 2.9993405329958405e-05, "loss": 0.1782, "step": 2211 }, { "epoch": 0.03917349590688325, "grad_norm": 2.1557259559631348, "learning_rate": 2.9993379795868406e-05, "loss": 0.1757, "step": 2212 }, { "epoch": 0.039191205443911685, "grad_norm": 2.440213203430176, "learning_rate": 2.9993354212451707e-05, "loss": 0.1906, "step": 2213 }, { "epoch": 0.03920891498094011, "grad_norm": 2.1662423610687256, "learning_rate": 2.999332857970839e-05, "loss": 0.211, "step": 2214 }, { "epoch": 0.03922662451796854, "grad_norm": 2.0816028118133545, "learning_rate": 2.9993302897638536e-05, "loss": 0.191, "step": 2215 }, { "epoch": 0.039244334054996964, "grad_norm": 2.4445583820343018, "learning_rate": 2.9993277166242234e-05, "loss": 0.1749, "step": 2216 }, { "epoch": 0.0392620435920254, "grad_norm": 1.9648315906524658, "learning_rate": 2.9993251385519564e-05, "loss": 0.19, "step": 2217 }, { "epoch": 0.039279753129053824, "grad_norm": 2.2795073986053467, "learning_rate": 2.9993225555470615e-05, "loss": 0.1812, "step": 2218 }, { "epoch": 0.03929746266608225, "grad_norm": 1.7769005298614502, "learning_rate": 2.999319967609547e-05, "loss": 0.151, "step": 2219 }, { "epoch": 0.03931517220311068, "grad_norm": 2.0698201656341553, "learning_rate": 2.9993173747394214e-05, "loss": 0.2019, "step": 2220 }, { "epoch": 0.03933288174013911, "grad_norm": 1.988866925239563, "learning_rate": 2.9993147769366936e-05, "loss": 0.191, "step": 2221 }, { "epoch": 0.039350591277167536, "grad_norm": 1.3283588886260986, "learning_rate": 2.9993121742013716e-05, "loss": 0.1615, "step": 2222 }, { "epoch": 0.03936830081419596, "grad_norm": 1.9497700929641724, "learning_rate": 2.999309566533464e-05, "loss": 0.1786, "step": 2223 }, { "epoch": 0.039386010351224396, "grad_norm": 3.0079896450042725, "learning_rate": 2.9993069539329795e-05, "loss": 0.1432, "step": 2224 }, { "epoch": 0.03940371988825282, "grad_norm": 2.3498573303222656, "learning_rate": 2.9993043363999273e-05, "loss": 0.1574, "step": 2225 }, { "epoch": 0.03942142942528125, "grad_norm": 2.2314674854278564, "learning_rate": 2.9993017139343153e-05, "loss": 0.1789, "step": 2226 }, { "epoch": 0.039439138962309675, "grad_norm": 2.402060031890869, "learning_rate": 2.9992990865361522e-05, "loss": 0.1793, "step": 2227 }, { "epoch": 0.03945684849933811, "grad_norm": 1.6915172338485718, "learning_rate": 2.999296454205447e-05, "loss": 0.169, "step": 2228 }, { "epoch": 0.039474558036366535, "grad_norm": 2.823782205581665, "learning_rate": 2.9992938169422076e-05, "loss": 0.1894, "step": 2229 }, { "epoch": 0.03949226757339496, "grad_norm": 2.752408266067505, "learning_rate": 2.9992911747464437e-05, "loss": 0.2273, "step": 2230 }, { "epoch": 0.03950997711042339, "grad_norm": 2.0805938243865967, "learning_rate": 2.999288527618163e-05, "loss": 0.1618, "step": 2231 }, { "epoch": 0.03952768664745182, "grad_norm": 2.9657676219940186, "learning_rate": 2.999285875557375e-05, "loss": 0.1946, "step": 2232 }, { "epoch": 0.03954539618448025, "grad_norm": 1.5728561878204346, "learning_rate": 2.999283218564088e-05, "loss": 0.1972, "step": 2233 }, { "epoch": 0.03956310572150867, "grad_norm": 2.4837772846221924, "learning_rate": 2.9992805566383107e-05, "loss": 0.2222, "step": 2234 }, { "epoch": 0.03958081525853711, "grad_norm": 1.7880438566207886, "learning_rate": 2.9992778897800526e-05, "loss": 0.1863, "step": 2235 }, { "epoch": 0.03959852479556553, "grad_norm": 1.7893940210342407, "learning_rate": 2.9992752179893214e-05, "loss": 0.1195, "step": 2236 }, { "epoch": 0.03961623433259396, "grad_norm": 3.46136212348938, "learning_rate": 2.9992725412661262e-05, "loss": 0.1773, "step": 2237 }, { "epoch": 0.039633943869622386, "grad_norm": 3.0677428245544434, "learning_rate": 2.9992698596104766e-05, "loss": 0.264, "step": 2238 }, { "epoch": 0.03965165340665082, "grad_norm": 3.3591723442077637, "learning_rate": 2.9992671730223803e-05, "loss": 0.1852, "step": 2239 }, { "epoch": 0.039669362943679246, "grad_norm": 2.316706895828247, "learning_rate": 2.9992644815018468e-05, "loss": 0.2219, "step": 2240 }, { "epoch": 0.03968707248070767, "grad_norm": 2.472792148590088, "learning_rate": 2.9992617850488846e-05, "loss": 0.1915, "step": 2241 }, { "epoch": 0.0397047820177361, "grad_norm": 2.186516523361206, "learning_rate": 2.9992590836635032e-05, "loss": 0.2282, "step": 2242 }, { "epoch": 0.03972249155476453, "grad_norm": 1.4827419519424438, "learning_rate": 2.9992563773457108e-05, "loss": 0.2251, "step": 2243 }, { "epoch": 0.03974020109179296, "grad_norm": 1.7855803966522217, "learning_rate": 2.9992536660955163e-05, "loss": 0.1608, "step": 2244 }, { "epoch": 0.039757910628821384, "grad_norm": 2.12776780128479, "learning_rate": 2.9992509499129295e-05, "loss": 0.1624, "step": 2245 }, { "epoch": 0.03977562016584982, "grad_norm": 2.05539608001709, "learning_rate": 2.9992482287979583e-05, "loss": 0.1887, "step": 2246 }, { "epoch": 0.039793329702878244, "grad_norm": 1.9025096893310547, "learning_rate": 2.9992455027506123e-05, "loss": 0.1831, "step": 2247 }, { "epoch": 0.03981103923990667, "grad_norm": 1.978398323059082, "learning_rate": 2.9992427717709e-05, "loss": 0.2083, "step": 2248 }, { "epoch": 0.0398287487769351, "grad_norm": 2.358508586883545, "learning_rate": 2.9992400358588306e-05, "loss": 0.1947, "step": 2249 }, { "epoch": 0.03984645831396353, "grad_norm": 2.134312629699707, "learning_rate": 2.9992372950144133e-05, "loss": 0.1832, "step": 2250 }, { "epoch": 0.039864167850991956, "grad_norm": 2.4868268966674805, "learning_rate": 2.999234549237657e-05, "loss": 0.1605, "step": 2251 }, { "epoch": 0.03988187738802038, "grad_norm": 1.8198455572128296, "learning_rate": 2.9992317985285703e-05, "loss": 0.1914, "step": 2252 }, { "epoch": 0.03989958692504881, "grad_norm": 2.3427059650421143, "learning_rate": 2.9992290428871626e-05, "loss": 0.1849, "step": 2253 }, { "epoch": 0.03991729646207724, "grad_norm": 2.3148274421691895, "learning_rate": 2.9992262823134435e-05, "loss": 0.2375, "step": 2254 }, { "epoch": 0.03993500599910567, "grad_norm": 2.2553470134735107, "learning_rate": 2.9992235168074213e-05, "loss": 0.1754, "step": 2255 }, { "epoch": 0.039952715536134095, "grad_norm": 2.103837728500366, "learning_rate": 2.9992207463691056e-05, "loss": 0.2452, "step": 2256 }, { "epoch": 0.03997042507316252, "grad_norm": 1.9470832347869873, "learning_rate": 2.999217970998505e-05, "loss": 0.1649, "step": 2257 }, { "epoch": 0.039988134610190955, "grad_norm": 1.9897642135620117, "learning_rate": 2.999215190695629e-05, "loss": 0.236, "step": 2258 }, { "epoch": 0.04000584414721938, "grad_norm": 1.7433058023452759, "learning_rate": 2.9992124054604863e-05, "loss": 0.1663, "step": 2259 }, { "epoch": 0.04002355368424781, "grad_norm": 2.0155367851257324, "learning_rate": 2.999209615293087e-05, "loss": 0.1514, "step": 2260 }, { "epoch": 0.04004126322127624, "grad_norm": 1.820598840713501, "learning_rate": 2.9992068201934396e-05, "loss": 0.1605, "step": 2261 }, { "epoch": 0.04005897275830467, "grad_norm": 4.064627170562744, "learning_rate": 2.9992040201615532e-05, "loss": 0.1668, "step": 2262 }, { "epoch": 0.040076682295333094, "grad_norm": 2.4820075035095215, "learning_rate": 2.999201215197437e-05, "loss": 0.2541, "step": 2263 }, { "epoch": 0.04009439183236152, "grad_norm": 1.5491236448287964, "learning_rate": 2.9991984053011004e-05, "loss": 0.1992, "step": 2264 }, { "epoch": 0.04011210136938995, "grad_norm": 1.8806393146514893, "learning_rate": 2.9991955904725534e-05, "loss": 0.205, "step": 2265 }, { "epoch": 0.04012981090641838, "grad_norm": 1.7052725553512573, "learning_rate": 2.999192770711804e-05, "loss": 0.1544, "step": 2266 }, { "epoch": 0.040147520443446806, "grad_norm": 2.267317295074463, "learning_rate": 2.999189946018862e-05, "loss": 0.1995, "step": 2267 }, { "epoch": 0.04016522998047523, "grad_norm": 2.6106300354003906, "learning_rate": 2.999187116393737e-05, "loss": 0.1844, "step": 2268 }, { "epoch": 0.040182939517503666, "grad_norm": 1.9762868881225586, "learning_rate": 2.9991842818364376e-05, "loss": 0.174, "step": 2269 }, { "epoch": 0.04020064905453209, "grad_norm": 3.5630416870117188, "learning_rate": 2.999181442346974e-05, "loss": 0.1562, "step": 2270 }, { "epoch": 0.04021835859156052, "grad_norm": 2.3727545738220215, "learning_rate": 2.999178597925355e-05, "loss": 0.1879, "step": 2271 }, { "epoch": 0.04023606812858895, "grad_norm": 2.2693259716033936, "learning_rate": 2.99917574857159e-05, "loss": 0.1681, "step": 2272 }, { "epoch": 0.04025377766561738, "grad_norm": 2.6182713508605957, "learning_rate": 2.9991728942856883e-05, "loss": 0.1454, "step": 2273 }, { "epoch": 0.040271487202645805, "grad_norm": 1.7335734367370605, "learning_rate": 2.9991700350676593e-05, "loss": 0.2458, "step": 2274 }, { "epoch": 0.04028919673967423, "grad_norm": 1.7952141761779785, "learning_rate": 2.999167170917513e-05, "loss": 0.1863, "step": 2275 }, { "epoch": 0.040306906276702664, "grad_norm": 1.6565366983413696, "learning_rate": 2.999164301835258e-05, "loss": 0.1662, "step": 2276 }, { "epoch": 0.04032461581373109, "grad_norm": 1.8164803981781006, "learning_rate": 2.999161427820904e-05, "loss": 0.1585, "step": 2277 }, { "epoch": 0.04034232535075952, "grad_norm": 3.1876962184906006, "learning_rate": 2.999158548874461e-05, "loss": 0.2109, "step": 2278 }, { "epoch": 0.04036003488778794, "grad_norm": 3.0875988006591797, "learning_rate": 2.999155664995937e-05, "loss": 0.1906, "step": 2279 }, { "epoch": 0.04037774442481638, "grad_norm": 2.395995855331421, "learning_rate": 2.9991527761853433e-05, "loss": 0.196, "step": 2280 }, { "epoch": 0.0403954539618448, "grad_norm": 2.4933156967163086, "learning_rate": 2.9991498824426885e-05, "loss": 0.1576, "step": 2281 }, { "epoch": 0.04041316349887323, "grad_norm": 4.452755451202393, "learning_rate": 2.9991469837679824e-05, "loss": 0.2259, "step": 2282 }, { "epoch": 0.040430873035901656, "grad_norm": 2.561126232147217, "learning_rate": 2.999144080161234e-05, "loss": 0.1934, "step": 2283 }, { "epoch": 0.04044858257293009, "grad_norm": 2.2886338233947754, "learning_rate": 2.999141171622453e-05, "loss": 0.1909, "step": 2284 }, { "epoch": 0.040466292109958515, "grad_norm": 2.362994909286499, "learning_rate": 2.9991382581516497e-05, "loss": 0.2083, "step": 2285 }, { "epoch": 0.04048400164698694, "grad_norm": 3.9381909370422363, "learning_rate": 2.9991353397488327e-05, "loss": 0.2318, "step": 2286 }, { "epoch": 0.040501711184015375, "grad_norm": 1.674422025680542, "learning_rate": 2.9991324164140126e-05, "loss": 0.1954, "step": 2287 }, { "epoch": 0.0405194207210438, "grad_norm": 3.476508617401123, "learning_rate": 2.999129488147198e-05, "loss": 0.2026, "step": 2288 }, { "epoch": 0.04053713025807223, "grad_norm": 1.579620361328125, "learning_rate": 2.9991265549483994e-05, "loss": 0.1998, "step": 2289 }, { "epoch": 0.040554839795100654, "grad_norm": 1.9066990613937378, "learning_rate": 2.9991236168176257e-05, "loss": 0.1737, "step": 2290 }, { "epoch": 0.04057254933212909, "grad_norm": 2.2759716510772705, "learning_rate": 2.9991206737548872e-05, "loss": 0.207, "step": 2291 }, { "epoch": 0.040590258869157514, "grad_norm": 2.081772565841675, "learning_rate": 2.9991177257601932e-05, "loss": 0.1987, "step": 2292 }, { "epoch": 0.04060796840618594, "grad_norm": 1.8805029392242432, "learning_rate": 2.9991147728335534e-05, "loss": 0.1649, "step": 2293 }, { "epoch": 0.04062567794321437, "grad_norm": 1.9541208744049072, "learning_rate": 2.9991118149749778e-05, "loss": 0.1958, "step": 2294 }, { "epoch": 0.0406433874802428, "grad_norm": 1.4784775972366333, "learning_rate": 2.999108852184476e-05, "loss": 0.1756, "step": 2295 }, { "epoch": 0.040661097017271226, "grad_norm": 2.169450521469116, "learning_rate": 2.9991058844620574e-05, "loss": 0.2177, "step": 2296 }, { "epoch": 0.04067880655429965, "grad_norm": 1.9870245456695557, "learning_rate": 2.9991029118077325e-05, "loss": 0.2155, "step": 2297 }, { "epoch": 0.04069651609132808, "grad_norm": 2.2819924354553223, "learning_rate": 2.9990999342215102e-05, "loss": 0.201, "step": 2298 }, { "epoch": 0.04071422562835651, "grad_norm": 1.8707712888717651, "learning_rate": 2.999096951703401e-05, "loss": 0.2038, "step": 2299 }, { "epoch": 0.04073193516538494, "grad_norm": 1.6800458431243896, "learning_rate": 2.9990939642534143e-05, "loss": 0.2009, "step": 2300 }, { "epoch": 0.040749644702413365, "grad_norm": 1.7658039331436157, "learning_rate": 2.99909097187156e-05, "loss": 0.1602, "step": 2301 }, { "epoch": 0.0407673542394418, "grad_norm": 2.0798757076263428, "learning_rate": 2.9990879745578485e-05, "loss": 0.1663, "step": 2302 }, { "epoch": 0.040785063776470225, "grad_norm": 2.3782336711883545, "learning_rate": 2.9990849723122888e-05, "loss": 0.1601, "step": 2303 }, { "epoch": 0.04080277331349865, "grad_norm": 1.6531038284301758, "learning_rate": 2.9990819651348912e-05, "loss": 0.1769, "step": 2304 }, { "epoch": 0.04082048285052708, "grad_norm": 2.1239089965820312, "learning_rate": 2.9990789530256656e-05, "loss": 0.1614, "step": 2305 }, { "epoch": 0.04083819238755551, "grad_norm": 3.2422962188720703, "learning_rate": 2.999075935984622e-05, "loss": 0.2309, "step": 2306 }, { "epoch": 0.04085590192458394, "grad_norm": 2.6997506618499756, "learning_rate": 2.9990729140117698e-05, "loss": 0.1909, "step": 2307 }, { "epoch": 0.040873611461612364, "grad_norm": 1.9340237379074097, "learning_rate": 2.9990698871071195e-05, "loss": 0.1615, "step": 2308 }, { "epoch": 0.04089132099864079, "grad_norm": 1.8646397590637207, "learning_rate": 2.9990668552706813e-05, "loss": 0.2212, "step": 2309 }, { "epoch": 0.04090903053566922, "grad_norm": 2.4987387657165527, "learning_rate": 2.9990638185024645e-05, "loss": 0.1951, "step": 2310 }, { "epoch": 0.04092674007269765, "grad_norm": 3.273069143295288, "learning_rate": 2.9990607768024793e-05, "loss": 0.2202, "step": 2311 }, { "epoch": 0.040944449609726076, "grad_norm": 1.6663880348205566, "learning_rate": 2.999057730170736e-05, "loss": 0.1897, "step": 2312 }, { "epoch": 0.04096215914675451, "grad_norm": 1.9473615884780884, "learning_rate": 2.9990546786072443e-05, "loss": 0.1858, "step": 2313 }, { "epoch": 0.040979868683782936, "grad_norm": 1.6341389417648315, "learning_rate": 2.999051622112014e-05, "loss": 0.179, "step": 2314 }, { "epoch": 0.04099757822081136, "grad_norm": 2.1640822887420654, "learning_rate": 2.9990485606850557e-05, "loss": 0.2207, "step": 2315 }, { "epoch": 0.04101528775783979, "grad_norm": 1.9358415603637695, "learning_rate": 2.999045494326379e-05, "loss": 0.1864, "step": 2316 }, { "epoch": 0.04103299729486822, "grad_norm": 1.8057280778884888, "learning_rate": 2.9990424230359947e-05, "loss": 0.1976, "step": 2317 }, { "epoch": 0.04105070683189665, "grad_norm": 2.1759426593780518, "learning_rate": 2.999039346813912e-05, "loss": 0.199, "step": 2318 }, { "epoch": 0.041068416368925074, "grad_norm": 2.203697919845581, "learning_rate": 2.9990362656601417e-05, "loss": 0.1747, "step": 2319 }, { "epoch": 0.0410861259059535, "grad_norm": 2.1750996112823486, "learning_rate": 2.999033179574694e-05, "loss": 0.1888, "step": 2320 }, { "epoch": 0.041103835442981934, "grad_norm": 3.4346506595611572, "learning_rate": 2.9990300885575782e-05, "loss": 0.153, "step": 2321 }, { "epoch": 0.04112154498001036, "grad_norm": 3.1778109073638916, "learning_rate": 2.9990269926088055e-05, "loss": 0.2613, "step": 2322 }, { "epoch": 0.04113925451703879, "grad_norm": 2.610346555709839, "learning_rate": 2.999023891728385e-05, "loss": 0.2071, "step": 2323 }, { "epoch": 0.04115696405406721, "grad_norm": 2.130425214767456, "learning_rate": 2.9990207859163284e-05, "loss": 0.2384, "step": 2324 }, { "epoch": 0.04117467359109565, "grad_norm": 1.6088100671768188, "learning_rate": 2.9990176751726444e-05, "loss": 0.1633, "step": 2325 }, { "epoch": 0.04119238312812407, "grad_norm": 1.9003608226776123, "learning_rate": 2.999014559497344e-05, "loss": 0.1595, "step": 2326 }, { "epoch": 0.0412100926651525, "grad_norm": 1.5790456533432007, "learning_rate": 2.9990114388904374e-05, "loss": 0.1857, "step": 2327 }, { "epoch": 0.04122780220218093, "grad_norm": 2.3855128288269043, "learning_rate": 2.9990083133519345e-05, "loss": 0.1755, "step": 2328 }, { "epoch": 0.04124551173920936, "grad_norm": 2.268404245376587, "learning_rate": 2.999005182881846e-05, "loss": 0.1901, "step": 2329 }, { "epoch": 0.041263221276237785, "grad_norm": 2.256453037261963, "learning_rate": 2.9990020474801822e-05, "loss": 0.1774, "step": 2330 }, { "epoch": 0.04128093081326621, "grad_norm": 1.6875417232513428, "learning_rate": 2.9989989071469536e-05, "loss": 0.1655, "step": 2331 }, { "epoch": 0.041298640350294645, "grad_norm": 1.307291865348816, "learning_rate": 2.9989957618821697e-05, "loss": 0.2247, "step": 2332 }, { "epoch": 0.04131634988732307, "grad_norm": 2.3932158946990967, "learning_rate": 2.9989926116858414e-05, "loss": 0.166, "step": 2333 }, { "epoch": 0.0413340594243515, "grad_norm": 1.7019392251968384, "learning_rate": 2.9989894565579792e-05, "loss": 0.205, "step": 2334 }, { "epoch": 0.041351768961379924, "grad_norm": 2.563568115234375, "learning_rate": 2.9989862964985932e-05, "loss": 0.1668, "step": 2335 }, { "epoch": 0.04136947849840836, "grad_norm": 3.878545045852661, "learning_rate": 2.9989831315076937e-05, "loss": 0.1908, "step": 2336 }, { "epoch": 0.041387188035436784, "grad_norm": 3.427387237548828, "learning_rate": 2.9989799615852915e-05, "loss": 0.2395, "step": 2337 }, { "epoch": 0.04140489757246521, "grad_norm": 2.3279836177825928, "learning_rate": 2.9989767867313967e-05, "loss": 0.1784, "step": 2338 }, { "epoch": 0.041422607109493643, "grad_norm": 2.539620876312256, "learning_rate": 2.9989736069460205e-05, "loss": 0.2109, "step": 2339 }, { "epoch": 0.04144031664652207, "grad_norm": 2.266814708709717, "learning_rate": 2.998970422229172e-05, "loss": 0.2176, "step": 2340 }, { "epoch": 0.041458026183550496, "grad_norm": 2.416750907897949, "learning_rate": 2.9989672325808626e-05, "loss": 0.1411, "step": 2341 }, { "epoch": 0.04147573572057892, "grad_norm": 2.44842267036438, "learning_rate": 2.9989640380011026e-05, "loss": 0.2139, "step": 2342 }, { "epoch": 0.041493445257607356, "grad_norm": 2.0632517337799072, "learning_rate": 2.998960838489903e-05, "loss": 0.1983, "step": 2343 }, { "epoch": 0.04151115479463578, "grad_norm": 2.442232131958008, "learning_rate": 2.9989576340472736e-05, "loss": 0.224, "step": 2344 }, { "epoch": 0.04152886433166421, "grad_norm": 1.5497033596038818, "learning_rate": 2.998954424673225e-05, "loss": 0.1302, "step": 2345 }, { "epoch": 0.041546573868692635, "grad_norm": 2.3982021808624268, "learning_rate": 2.9989512103677677e-05, "loss": 0.1885, "step": 2346 }, { "epoch": 0.04156428340572107, "grad_norm": 1.8937608003616333, "learning_rate": 2.9989479911309132e-05, "loss": 0.1455, "step": 2347 }, { "epoch": 0.041581992942749495, "grad_norm": 2.2127151489257812, "learning_rate": 2.998944766962671e-05, "loss": 0.1769, "step": 2348 }, { "epoch": 0.04159970247977792, "grad_norm": 1.6522692441940308, "learning_rate": 2.9989415378630523e-05, "loss": 0.1774, "step": 2349 }, { "epoch": 0.04161741201680635, "grad_norm": 2.072058916091919, "learning_rate": 2.9989383038320677e-05, "loss": 0.1855, "step": 2350 }, { "epoch": 0.04163512155383478, "grad_norm": 2.01857590675354, "learning_rate": 2.9989350648697273e-05, "loss": 0.1525, "step": 2351 }, { "epoch": 0.04165283109086321, "grad_norm": 1.5208297967910767, "learning_rate": 2.9989318209760425e-05, "loss": 0.1725, "step": 2352 }, { "epoch": 0.041670540627891633, "grad_norm": 2.198089122772217, "learning_rate": 2.9989285721510234e-05, "loss": 0.1909, "step": 2353 }, { "epoch": 0.04168825016492007, "grad_norm": 2.262284755706787, "learning_rate": 2.9989253183946807e-05, "loss": 0.2185, "step": 2354 }, { "epoch": 0.04170595970194849, "grad_norm": 1.884792685508728, "learning_rate": 2.9989220597070254e-05, "loss": 0.1465, "step": 2355 }, { "epoch": 0.04172366923897692, "grad_norm": 2.139300584793091, "learning_rate": 2.9989187960880682e-05, "loss": 0.2674, "step": 2356 }, { "epoch": 0.041741378776005346, "grad_norm": 2.189673662185669, "learning_rate": 2.99891552753782e-05, "loss": 0.1689, "step": 2357 }, { "epoch": 0.04175908831303378, "grad_norm": 1.6822640895843506, "learning_rate": 2.9989122540562912e-05, "loss": 0.1599, "step": 2358 }, { "epoch": 0.041776797850062206, "grad_norm": 1.7229876518249512, "learning_rate": 2.9989089756434927e-05, "loss": 0.1774, "step": 2359 }, { "epoch": 0.04179450738709063, "grad_norm": 2.853323221206665, "learning_rate": 2.9989056922994354e-05, "loss": 0.2903, "step": 2360 }, { "epoch": 0.04181221692411906, "grad_norm": 2.5372743606567383, "learning_rate": 2.9989024040241295e-05, "loss": 0.1722, "step": 2361 }, { "epoch": 0.04182992646114749, "grad_norm": 2.1700961589813232, "learning_rate": 2.9988991108175864e-05, "loss": 0.1415, "step": 2362 }, { "epoch": 0.04184763599817592, "grad_norm": 1.8207899332046509, "learning_rate": 2.998895812679817e-05, "loss": 0.1386, "step": 2363 }, { "epoch": 0.041865345535204344, "grad_norm": 2.4792404174804688, "learning_rate": 2.9988925096108324e-05, "loss": 0.1971, "step": 2364 }, { "epoch": 0.04188305507223277, "grad_norm": 1.693324327468872, "learning_rate": 2.9988892016106425e-05, "loss": 0.1784, "step": 2365 }, { "epoch": 0.041900764609261204, "grad_norm": 1.9218323230743408, "learning_rate": 2.998885888679259e-05, "loss": 0.1699, "step": 2366 }, { "epoch": 0.04191847414628963, "grad_norm": 1.6716034412384033, "learning_rate": 2.9988825708166922e-05, "loss": 0.1619, "step": 2367 }, { "epoch": 0.04193618368331806, "grad_norm": 1.6655079126358032, "learning_rate": 2.9988792480229536e-05, "loss": 0.156, "step": 2368 }, { "epoch": 0.04195389322034649, "grad_norm": 2.0268733501434326, "learning_rate": 2.9988759202980537e-05, "loss": 0.1832, "step": 2369 }, { "epoch": 0.041971602757374916, "grad_norm": 2.0143651962280273, "learning_rate": 2.998872587642004e-05, "loss": 0.2036, "step": 2370 }, { "epoch": 0.04198931229440334, "grad_norm": 1.6032085418701172, "learning_rate": 2.9988692500548148e-05, "loss": 0.1961, "step": 2371 }, { "epoch": 0.04200702183143177, "grad_norm": 2.100083827972412, "learning_rate": 2.9988659075364974e-05, "loss": 0.2213, "step": 2372 }, { "epoch": 0.0420247313684602, "grad_norm": 2.2812728881835938, "learning_rate": 2.998862560087063e-05, "loss": 0.1928, "step": 2373 }, { "epoch": 0.04204244090548863, "grad_norm": 2.4423601627349854, "learning_rate": 2.9988592077065217e-05, "loss": 0.2018, "step": 2374 }, { "epoch": 0.042060150442517055, "grad_norm": 1.4373822212219238, "learning_rate": 2.9988558503948857e-05, "loss": 0.1844, "step": 2375 }, { "epoch": 0.04207785997954548, "grad_norm": 2.5370965003967285, "learning_rate": 2.9988524881521654e-05, "loss": 0.1939, "step": 2376 }, { "epoch": 0.042095569516573915, "grad_norm": 3.0197553634643555, "learning_rate": 2.9988491209783723e-05, "loss": 0.2049, "step": 2377 }, { "epoch": 0.04211327905360234, "grad_norm": 2.9278383255004883, "learning_rate": 2.998845748873517e-05, "loss": 0.2247, "step": 2378 }, { "epoch": 0.04213098859063077, "grad_norm": 1.4671846628189087, "learning_rate": 2.998842371837611e-05, "loss": 0.1809, "step": 2379 }, { "epoch": 0.0421486981276592, "grad_norm": 2.2904677391052246, "learning_rate": 2.998838989870665e-05, "loss": 0.1889, "step": 2380 }, { "epoch": 0.04216640766468763, "grad_norm": 4.1198554039001465, "learning_rate": 2.99883560297269e-05, "loss": 0.2097, "step": 2381 }, { "epoch": 0.042184117201716054, "grad_norm": 1.4759812355041504, "learning_rate": 2.998832211143698e-05, "loss": 0.137, "step": 2382 }, { "epoch": 0.04220182673874448, "grad_norm": 2.769514322280884, "learning_rate": 2.9988288143836992e-05, "loss": 0.2749, "step": 2383 }, { "epoch": 0.04221953627577291, "grad_norm": 1.757370114326477, "learning_rate": 2.9988254126927057e-05, "loss": 0.139, "step": 2384 }, { "epoch": 0.04223724581280134, "grad_norm": 2.2628514766693115, "learning_rate": 2.9988220060707278e-05, "loss": 0.2324, "step": 2385 }, { "epoch": 0.042254955349829766, "grad_norm": 2.2808175086975098, "learning_rate": 2.998818594517777e-05, "loss": 0.178, "step": 2386 }, { "epoch": 0.04227266488685819, "grad_norm": 1.4748125076293945, "learning_rate": 2.998815178033865e-05, "loss": 0.1706, "step": 2387 }, { "epoch": 0.042290374423886626, "grad_norm": 1.7904075384140015, "learning_rate": 2.9988117566190024e-05, "loss": 0.1638, "step": 2388 }, { "epoch": 0.04230808396091505, "grad_norm": 2.7006113529205322, "learning_rate": 2.9988083302732007e-05, "loss": 0.2208, "step": 2389 }, { "epoch": 0.04232579349794348, "grad_norm": 2.108989715576172, "learning_rate": 2.9988048989964715e-05, "loss": 0.1868, "step": 2390 }, { "epoch": 0.042343503034971905, "grad_norm": 3.1680197715759277, "learning_rate": 2.9988014627888258e-05, "loss": 0.1767, "step": 2391 }, { "epoch": 0.04236121257200034, "grad_norm": 1.9586859941482544, "learning_rate": 2.9987980216502746e-05, "loss": 0.2068, "step": 2392 }, { "epoch": 0.042378922109028765, "grad_norm": 1.4244674444198608, "learning_rate": 2.9987945755808293e-05, "loss": 0.1609, "step": 2393 }, { "epoch": 0.04239663164605719, "grad_norm": 2.264227867126465, "learning_rate": 2.998791124580502e-05, "loss": 0.1671, "step": 2394 }, { "epoch": 0.042414341183085624, "grad_norm": 1.875732660293579, "learning_rate": 2.998787668649303e-05, "loss": 0.1487, "step": 2395 }, { "epoch": 0.04243205072011405, "grad_norm": 2.657472848892212, "learning_rate": 2.9987842077872444e-05, "loss": 0.2055, "step": 2396 }, { "epoch": 0.04244976025714248, "grad_norm": 2.166447877883911, "learning_rate": 2.9987807419943374e-05, "loss": 0.1658, "step": 2397 }, { "epoch": 0.0424674697941709, "grad_norm": 2.111726999282837, "learning_rate": 2.998777271270593e-05, "loss": 0.1816, "step": 2398 }, { "epoch": 0.04248517933119934, "grad_norm": 2.9985125064849854, "learning_rate": 2.9987737956160236e-05, "loss": 0.1631, "step": 2399 }, { "epoch": 0.04250288886822776, "grad_norm": 2.531907320022583, "learning_rate": 2.9987703150306392e-05, "loss": 0.1882, "step": 2400 }, { "epoch": 0.04252059840525619, "grad_norm": 1.722689151763916, "learning_rate": 2.9987668295144527e-05, "loss": 0.1914, "step": 2401 }, { "epoch": 0.042538307942284616, "grad_norm": 1.5270062685012817, "learning_rate": 2.9987633390674744e-05, "loss": 0.1861, "step": 2402 }, { "epoch": 0.04255601747931305, "grad_norm": 2.4398603439331055, "learning_rate": 2.9987598436897167e-05, "loss": 0.2012, "step": 2403 }, { "epoch": 0.042573727016341475, "grad_norm": 2.904587507247925, "learning_rate": 2.9987563433811908e-05, "loss": 0.1854, "step": 2404 }, { "epoch": 0.0425914365533699, "grad_norm": 1.956911563873291, "learning_rate": 2.998752838141908e-05, "loss": 0.1814, "step": 2405 }, { "epoch": 0.042609146090398335, "grad_norm": 1.673769235610962, "learning_rate": 2.99874932797188e-05, "loss": 0.1552, "step": 2406 }, { "epoch": 0.04262685562742676, "grad_norm": 1.8364001512527466, "learning_rate": 2.9987458128711175e-05, "loss": 0.1806, "step": 2407 }, { "epoch": 0.04264456516445519, "grad_norm": 2.027263641357422, "learning_rate": 2.998742292839634e-05, "loss": 0.1713, "step": 2408 }, { "epoch": 0.042662274701483614, "grad_norm": 1.8570787906646729, "learning_rate": 2.998738767877439e-05, "loss": 0.1794, "step": 2409 }, { "epoch": 0.04267998423851205, "grad_norm": 2.1603641510009766, "learning_rate": 2.998735237984546e-05, "loss": 0.1563, "step": 2410 }, { "epoch": 0.042697693775540474, "grad_norm": 1.8842077255249023, "learning_rate": 2.998731703160965e-05, "loss": 0.1694, "step": 2411 }, { "epoch": 0.0427154033125689, "grad_norm": 4.941252708435059, "learning_rate": 2.9987281634067087e-05, "loss": 0.1876, "step": 2412 }, { "epoch": 0.04273311284959733, "grad_norm": 2.067849636077881, "learning_rate": 2.9987246187217882e-05, "loss": 0.1765, "step": 2413 }, { "epoch": 0.04275082238662576, "grad_norm": 2.0754594802856445, "learning_rate": 2.998721069106215e-05, "loss": 0.17, "step": 2414 }, { "epoch": 0.042768531923654186, "grad_norm": 2.6190686225891113, "learning_rate": 2.998717514560001e-05, "loss": 0.2222, "step": 2415 }, { "epoch": 0.04278624146068261, "grad_norm": 2.0698020458221436, "learning_rate": 2.9987139550831587e-05, "loss": 0.1647, "step": 2416 }, { "epoch": 0.04280395099771104, "grad_norm": 1.931342601776123, "learning_rate": 2.998710390675698e-05, "loss": 0.1611, "step": 2417 }, { "epoch": 0.04282166053473947, "grad_norm": 2.0007669925689697, "learning_rate": 2.9987068213376326e-05, "loss": 0.196, "step": 2418 }, { "epoch": 0.0428393700717679, "grad_norm": 1.8517093658447266, "learning_rate": 2.998703247068973e-05, "loss": 0.2025, "step": 2419 }, { "epoch": 0.042857079608796325, "grad_norm": 2.863279342651367, "learning_rate": 2.9986996678697318e-05, "loss": 0.1392, "step": 2420 }, { "epoch": 0.04287478914582476, "grad_norm": 2.2585954666137695, "learning_rate": 2.9986960837399194e-05, "loss": 0.177, "step": 2421 }, { "epoch": 0.042892498682853185, "grad_norm": 2.7121548652648926, "learning_rate": 2.998692494679549e-05, "loss": 0.1734, "step": 2422 }, { "epoch": 0.04291020821988161, "grad_norm": 2.0438425540924072, "learning_rate": 2.9986889006886316e-05, "loss": 0.1641, "step": 2423 }, { "epoch": 0.04292791775691004, "grad_norm": 1.2960937023162842, "learning_rate": 2.9986853017671795e-05, "loss": 0.1282, "step": 2424 }, { "epoch": 0.04294562729393847, "grad_norm": 1.9453133344650269, "learning_rate": 2.9986816979152045e-05, "loss": 0.1653, "step": 2425 }, { "epoch": 0.0429633368309669, "grad_norm": 1.607733964920044, "learning_rate": 2.998678089132718e-05, "loss": 0.1833, "step": 2426 }, { "epoch": 0.042981046367995324, "grad_norm": 2.958164691925049, "learning_rate": 2.9986744754197322e-05, "loss": 0.2107, "step": 2427 }, { "epoch": 0.04299875590502375, "grad_norm": 1.9028297662734985, "learning_rate": 2.998670856776259e-05, "loss": 0.1831, "step": 2428 }, { "epoch": 0.04301646544205218, "grad_norm": 4.298436164855957, "learning_rate": 2.9986672332023104e-05, "loss": 0.2005, "step": 2429 }, { "epoch": 0.04303417497908061, "grad_norm": 10.363285064697266, "learning_rate": 2.9986636046978982e-05, "loss": 0.2124, "step": 2430 }, { "epoch": 0.043051884516109036, "grad_norm": 4.687328815460205, "learning_rate": 2.998659971263034e-05, "loss": 0.245, "step": 2431 }, { "epoch": 0.04306959405313746, "grad_norm": 1.6871998310089111, "learning_rate": 2.99865633289773e-05, "loss": 0.1838, "step": 2432 }, { "epoch": 0.043087303590165896, "grad_norm": 2.6095612049102783, "learning_rate": 2.998652689601999e-05, "loss": 0.1409, "step": 2433 }, { "epoch": 0.04310501312719432, "grad_norm": 3.162137985229492, "learning_rate": 2.9986490413758516e-05, "loss": 0.232, "step": 2434 }, { "epoch": 0.04312272266422275, "grad_norm": 2.661500930786133, "learning_rate": 2.9986453882193007e-05, "loss": 0.1898, "step": 2435 }, { "epoch": 0.04314043220125118, "grad_norm": 1.8885793685913086, "learning_rate": 2.998641730132358e-05, "loss": 0.2108, "step": 2436 }, { "epoch": 0.04315814173827961, "grad_norm": 2.137852430343628, "learning_rate": 2.998638067115035e-05, "loss": 0.2403, "step": 2437 }, { "epoch": 0.043175851275308035, "grad_norm": 1.6249338388442993, "learning_rate": 2.998634399167345e-05, "loss": 0.1766, "step": 2438 }, { "epoch": 0.04319356081233646, "grad_norm": 2.182440757751465, "learning_rate": 2.9986307262892993e-05, "loss": 0.2086, "step": 2439 }, { "epoch": 0.043211270349364894, "grad_norm": 2.0191385746002197, "learning_rate": 2.9986270484809098e-05, "loss": 0.2427, "step": 2440 }, { "epoch": 0.04322897988639332, "grad_norm": 2.1882917881011963, "learning_rate": 2.9986233657421892e-05, "loss": 0.2211, "step": 2441 }, { "epoch": 0.04324668942342175, "grad_norm": 1.7249832153320312, "learning_rate": 2.9986196780731494e-05, "loss": 0.1443, "step": 2442 }, { "epoch": 0.04326439896045017, "grad_norm": 1.5881520509719849, "learning_rate": 2.9986159854738022e-05, "loss": 0.1634, "step": 2443 }, { "epoch": 0.04328210849747861, "grad_norm": 1.627715826034546, "learning_rate": 2.9986122879441604e-05, "loss": 0.2472, "step": 2444 }, { "epoch": 0.04329981803450703, "grad_norm": 1.91647469997406, "learning_rate": 2.998608585484235e-05, "loss": 0.1677, "step": 2445 }, { "epoch": 0.04331752757153546, "grad_norm": 1.6521059274673462, "learning_rate": 2.9986048780940395e-05, "loss": 0.1353, "step": 2446 }, { "epoch": 0.04333523710856389, "grad_norm": 2.7087082862854004, "learning_rate": 2.9986011657735853e-05, "loss": 0.2813, "step": 2447 }, { "epoch": 0.04335294664559232, "grad_norm": 2.2987122535705566, "learning_rate": 2.998597448522885e-05, "loss": 0.1581, "step": 2448 }, { "epoch": 0.043370656182620745, "grad_norm": 1.3442714214324951, "learning_rate": 2.9985937263419506e-05, "loss": 0.1461, "step": 2449 }, { "epoch": 0.04338836571964917, "grad_norm": 2.1906697750091553, "learning_rate": 2.998589999230794e-05, "loss": 0.1992, "step": 2450 }, { "epoch": 0.043406075256677605, "grad_norm": 2.4950027465820312, "learning_rate": 2.9985862671894286e-05, "loss": 0.1534, "step": 2451 }, { "epoch": 0.04342378479370603, "grad_norm": 1.2667490243911743, "learning_rate": 2.9985825302178657e-05, "loss": 0.1346, "step": 2452 }, { "epoch": 0.04344149433073446, "grad_norm": 1.8795877695083618, "learning_rate": 2.9985787883161174e-05, "loss": 0.207, "step": 2453 }, { "epoch": 0.043459203867762884, "grad_norm": 1.78037428855896, "learning_rate": 2.998575041484197e-05, "loss": 0.2038, "step": 2454 }, { "epoch": 0.04347691340479132, "grad_norm": 1.7112971544265747, "learning_rate": 2.998571289722116e-05, "loss": 0.1614, "step": 2455 }, { "epoch": 0.043494622941819744, "grad_norm": 1.7094064950942993, "learning_rate": 2.9985675330298873e-05, "loss": 0.1575, "step": 2456 }, { "epoch": 0.04351233247884817, "grad_norm": 2.7086737155914307, "learning_rate": 2.998563771407523e-05, "loss": 0.1556, "step": 2457 }, { "epoch": 0.0435300420158766, "grad_norm": 2.5390682220458984, "learning_rate": 2.998560004855035e-05, "loss": 0.1943, "step": 2458 }, { "epoch": 0.04354775155290503, "grad_norm": 1.8669462203979492, "learning_rate": 2.9985562333724363e-05, "loss": 0.1798, "step": 2459 }, { "epoch": 0.043565461089933456, "grad_norm": 1.8486831188201904, "learning_rate": 2.9985524569597397e-05, "loss": 0.154, "step": 2460 }, { "epoch": 0.04358317062696188, "grad_norm": 2.150186061859131, "learning_rate": 2.9985486756169566e-05, "loss": 0.1591, "step": 2461 }, { "epoch": 0.043600880163990316, "grad_norm": 1.9050308465957642, "learning_rate": 2.9985448893441002e-05, "loss": 0.1689, "step": 2462 }, { "epoch": 0.04361858970101874, "grad_norm": 1.4373042583465576, "learning_rate": 2.998541098141182e-05, "loss": 0.155, "step": 2463 }, { "epoch": 0.04363629923804717, "grad_norm": 5.27461576461792, "learning_rate": 2.9985373020082157e-05, "loss": 0.1551, "step": 2464 }, { "epoch": 0.043654008775075595, "grad_norm": 3.419527530670166, "learning_rate": 2.9985335009452134e-05, "loss": 0.187, "step": 2465 }, { "epoch": 0.04367171831210403, "grad_norm": 1.5169216394424438, "learning_rate": 2.9985296949521875e-05, "loss": 0.1885, "step": 2466 }, { "epoch": 0.043689427849132455, "grad_norm": 2.241786479949951, "learning_rate": 2.99852588402915e-05, "loss": 0.1865, "step": 2467 }, { "epoch": 0.04370713738616088, "grad_norm": 1.7066271305084229, "learning_rate": 2.998522068176114e-05, "loss": 0.1371, "step": 2468 }, { "epoch": 0.04372484692318931, "grad_norm": 2.1733999252319336, "learning_rate": 2.9985182473930922e-05, "loss": 0.2485, "step": 2469 }, { "epoch": 0.04374255646021774, "grad_norm": 1.6984002590179443, "learning_rate": 2.998514421680097e-05, "loss": 0.1725, "step": 2470 }, { "epoch": 0.04376026599724617, "grad_norm": 2.2005884647369385, "learning_rate": 2.9985105910371404e-05, "loss": 0.1404, "step": 2471 }, { "epoch": 0.043777975534274594, "grad_norm": 2.0415685176849365, "learning_rate": 2.998506755464236e-05, "loss": 0.1599, "step": 2472 }, { "epoch": 0.04379568507130303, "grad_norm": 2.23721981048584, "learning_rate": 2.9985029149613958e-05, "loss": 0.1974, "step": 2473 }, { "epoch": 0.04381339460833145, "grad_norm": 2.0049984455108643, "learning_rate": 2.998499069528632e-05, "loss": 0.129, "step": 2474 }, { "epoch": 0.04383110414535988, "grad_norm": 2.84621000289917, "learning_rate": 2.998495219165959e-05, "loss": 0.1826, "step": 2475 }, { "epoch": 0.043848813682388306, "grad_norm": 1.9303710460662842, "learning_rate": 2.9984913638733878e-05, "loss": 0.1907, "step": 2476 }, { "epoch": 0.04386652321941674, "grad_norm": 2.347398519515991, "learning_rate": 2.9984875036509315e-05, "loss": 0.1431, "step": 2477 }, { "epoch": 0.043884232756445166, "grad_norm": 2.439121961593628, "learning_rate": 2.9984836384986025e-05, "loss": 0.1736, "step": 2478 }, { "epoch": 0.04390194229347359, "grad_norm": 2.593766450881958, "learning_rate": 2.9984797684164144e-05, "loss": 0.2017, "step": 2479 }, { "epoch": 0.04391965183050202, "grad_norm": 1.6711124181747437, "learning_rate": 2.9984758934043793e-05, "loss": 0.1373, "step": 2480 }, { "epoch": 0.04393736136753045, "grad_norm": 1.4097473621368408, "learning_rate": 2.9984720134625098e-05, "loss": 0.1888, "step": 2481 }, { "epoch": 0.04395507090455888, "grad_norm": 1.9373217821121216, "learning_rate": 2.998468128590819e-05, "loss": 0.209, "step": 2482 }, { "epoch": 0.043972780441587304, "grad_norm": 2.22566556930542, "learning_rate": 2.99846423878932e-05, "loss": 0.2023, "step": 2483 }, { "epoch": 0.04399048997861573, "grad_norm": 2.0700395107269287, "learning_rate": 2.998460344058025e-05, "loss": 0.1986, "step": 2484 }, { "epoch": 0.044008199515644164, "grad_norm": 2.0915846824645996, "learning_rate": 2.998456444396947e-05, "loss": 0.1792, "step": 2485 }, { "epoch": 0.04402590905267259, "grad_norm": 2.3726208209991455, "learning_rate": 2.9984525398060988e-05, "loss": 0.1923, "step": 2486 }, { "epoch": 0.04404361858970102, "grad_norm": 1.572406530380249, "learning_rate": 2.9984486302854935e-05, "loss": 0.1479, "step": 2487 }, { "epoch": 0.04406132812672945, "grad_norm": 2.1198928356170654, "learning_rate": 2.9984447158351436e-05, "loss": 0.1865, "step": 2488 }, { "epoch": 0.044079037663757877, "grad_norm": 2.098003387451172, "learning_rate": 2.9984407964550623e-05, "loss": 0.2202, "step": 2489 }, { "epoch": 0.0440967472007863, "grad_norm": 1.7335485219955444, "learning_rate": 2.998436872145262e-05, "loss": 0.1327, "step": 2490 }, { "epoch": 0.04411445673781473, "grad_norm": 1.8441543579101562, "learning_rate": 2.9984329429057563e-05, "loss": 0.1765, "step": 2491 }, { "epoch": 0.04413216627484316, "grad_norm": 1.8586108684539795, "learning_rate": 2.9984290087365576e-05, "loss": 0.1776, "step": 2492 }, { "epoch": 0.04414987581187159, "grad_norm": 2.4969258308410645, "learning_rate": 2.9984250696376794e-05, "loss": 0.1798, "step": 2493 }, { "epoch": 0.044167585348900015, "grad_norm": 2.3285882472991943, "learning_rate": 2.9984211256091335e-05, "loss": 0.1915, "step": 2494 }, { "epoch": 0.04418529488592844, "grad_norm": 2.0276854038238525, "learning_rate": 2.9984171766509344e-05, "loss": 0.2052, "step": 2495 }, { "epoch": 0.044203004422956875, "grad_norm": 4.562910556793213, "learning_rate": 2.998413222763094e-05, "loss": 0.1631, "step": 2496 }, { "epoch": 0.0442207139599853, "grad_norm": 1.9663569927215576, "learning_rate": 2.9984092639456256e-05, "loss": 0.1639, "step": 2497 }, { "epoch": 0.04423842349701373, "grad_norm": 1.7711421251296997, "learning_rate": 2.9984053001985423e-05, "loss": 0.1435, "step": 2498 }, { "epoch": 0.044256133034042154, "grad_norm": 1.7748204469680786, "learning_rate": 2.9984013315218573e-05, "loss": 0.1522, "step": 2499 }, { "epoch": 0.04427384257107059, "grad_norm": 1.8222805261611938, "learning_rate": 2.9983973579155832e-05, "loss": 0.1496, "step": 2500 }, { "epoch": 0.044291552108099014, "grad_norm": 1.152324914932251, "learning_rate": 2.9983933793797336e-05, "loss": 0.1829, "step": 2501 }, { "epoch": 0.04430926164512744, "grad_norm": 1.9601757526397705, "learning_rate": 2.9983893959143213e-05, "loss": 0.1965, "step": 2502 }, { "epoch": 0.04432697118215587, "grad_norm": 1.670836091041565, "learning_rate": 2.9983854075193595e-05, "loss": 0.1775, "step": 2503 }, { "epoch": 0.0443446807191843, "grad_norm": 2.509019613265991, "learning_rate": 2.9983814141948615e-05, "loss": 0.1753, "step": 2504 }, { "epoch": 0.044362390256212726, "grad_norm": 1.5353474617004395, "learning_rate": 2.9983774159408393e-05, "loss": 0.1232, "step": 2505 }, { "epoch": 0.04438009979324115, "grad_norm": 2.9970192909240723, "learning_rate": 2.9983734127573074e-05, "loss": 0.2215, "step": 2506 }, { "epoch": 0.044397809330269586, "grad_norm": 1.975069284439087, "learning_rate": 2.9983694046442785e-05, "loss": 0.2202, "step": 2507 }, { "epoch": 0.04441551886729801, "grad_norm": 1.5181915760040283, "learning_rate": 2.9983653916017662e-05, "loss": 0.1784, "step": 2508 }, { "epoch": 0.04443322840432644, "grad_norm": 2.272921323776245, "learning_rate": 2.9983613736297834e-05, "loss": 0.1894, "step": 2509 }, { "epoch": 0.044450937941354865, "grad_norm": 2.0561933517456055, "learning_rate": 2.9983573507283426e-05, "loss": 0.2007, "step": 2510 }, { "epoch": 0.0444686474783833, "grad_norm": 1.4494210481643677, "learning_rate": 2.9983533228974577e-05, "loss": 0.152, "step": 2511 }, { "epoch": 0.044486357015411725, "grad_norm": 2.3468589782714844, "learning_rate": 2.9983492901371425e-05, "loss": 0.218, "step": 2512 }, { "epoch": 0.04450406655244015, "grad_norm": 1.375985026359558, "learning_rate": 2.9983452524474094e-05, "loss": 0.1383, "step": 2513 }, { "epoch": 0.044521776089468584, "grad_norm": 1.2103805541992188, "learning_rate": 2.9983412098282718e-05, "loss": 0.1811, "step": 2514 }, { "epoch": 0.04453948562649701, "grad_norm": 1.5035001039505005, "learning_rate": 2.998337162279743e-05, "loss": 0.2033, "step": 2515 }, { "epoch": 0.04455719516352544, "grad_norm": 1.7735185623168945, "learning_rate": 2.998333109801837e-05, "loss": 0.1758, "step": 2516 }, { "epoch": 0.04457490470055386, "grad_norm": 2.458148241043091, "learning_rate": 2.9983290523945664e-05, "loss": 0.1822, "step": 2517 }, { "epoch": 0.0445926142375823, "grad_norm": 1.4281105995178223, "learning_rate": 2.9983249900579445e-05, "loss": 0.158, "step": 2518 }, { "epoch": 0.04461032377461072, "grad_norm": 1.7621126174926758, "learning_rate": 2.9983209227919855e-05, "loss": 0.173, "step": 2519 }, { "epoch": 0.04462803331163915, "grad_norm": 3.72261381149292, "learning_rate": 2.998316850596702e-05, "loss": 0.1713, "step": 2520 }, { "epoch": 0.044645742848667576, "grad_norm": 2.3557777404785156, "learning_rate": 2.9983127734721073e-05, "loss": 0.2282, "step": 2521 }, { "epoch": 0.04466345238569601, "grad_norm": 1.4698460102081299, "learning_rate": 2.9983086914182154e-05, "loss": 0.2064, "step": 2522 }, { "epoch": 0.044681161922724436, "grad_norm": 1.506950855255127, "learning_rate": 2.9983046044350394e-05, "loss": 0.182, "step": 2523 }, { "epoch": 0.04469887145975286, "grad_norm": 2.022977352142334, "learning_rate": 2.9983005125225925e-05, "loss": 0.1941, "step": 2524 }, { "epoch": 0.04471658099678129, "grad_norm": 1.7029755115509033, "learning_rate": 2.9982964156808888e-05, "loss": 0.1527, "step": 2525 }, { "epoch": 0.04473429053380972, "grad_norm": 1.7205836772918701, "learning_rate": 2.9982923139099415e-05, "loss": 0.1322, "step": 2526 }, { "epoch": 0.04475200007083815, "grad_norm": 1.5444940328598022, "learning_rate": 2.998288207209764e-05, "loss": 0.1354, "step": 2527 }, { "epoch": 0.044769709607866574, "grad_norm": 1.634353518486023, "learning_rate": 2.9982840955803697e-05, "loss": 0.2038, "step": 2528 }, { "epoch": 0.04478741914489501, "grad_norm": 1.7578186988830566, "learning_rate": 2.9982799790217722e-05, "loss": 0.1604, "step": 2529 }, { "epoch": 0.044805128681923434, "grad_norm": 3.156792163848877, "learning_rate": 2.9982758575339854e-05, "loss": 0.2042, "step": 2530 }, { "epoch": 0.04482283821895186, "grad_norm": 2.105464220046997, "learning_rate": 2.998271731117022e-05, "loss": 0.199, "step": 2531 }, { "epoch": 0.04484054775598029, "grad_norm": 1.8668879270553589, "learning_rate": 2.9982675997708967e-05, "loss": 0.2316, "step": 2532 }, { "epoch": 0.04485825729300872, "grad_norm": 2.137086868286133, "learning_rate": 2.9982634634956222e-05, "loss": 0.1938, "step": 2533 }, { "epoch": 0.044875966830037146, "grad_norm": 1.602533221244812, "learning_rate": 2.998259322291213e-05, "loss": 0.1872, "step": 2534 }, { "epoch": 0.04489367636706557, "grad_norm": 2.3678860664367676, "learning_rate": 2.998255176157682e-05, "loss": 0.1641, "step": 2535 }, { "epoch": 0.044911385904094, "grad_norm": 2.459117889404297, "learning_rate": 2.998251025095042e-05, "loss": 0.1757, "step": 2536 }, { "epoch": 0.04492909544112243, "grad_norm": 1.751866340637207, "learning_rate": 2.9982468691033086e-05, "loss": 0.2214, "step": 2537 }, { "epoch": 0.04494680497815086, "grad_norm": 1.6908048391342163, "learning_rate": 2.9982427081824945e-05, "loss": 0.157, "step": 2538 }, { "epoch": 0.044964514515179285, "grad_norm": 2.0545620918273926, "learning_rate": 2.9982385423326134e-05, "loss": 0.208, "step": 2539 }, { "epoch": 0.04498222405220772, "grad_norm": 1.6972901821136475, "learning_rate": 2.9982343715536788e-05, "loss": 0.1804, "step": 2540 }, { "epoch": 0.044999933589236145, "grad_norm": 2.1413326263427734, "learning_rate": 2.9982301958457047e-05, "loss": 0.1892, "step": 2541 }, { "epoch": 0.04501764312626457, "grad_norm": 1.985202431678772, "learning_rate": 2.998226015208705e-05, "loss": 0.1694, "step": 2542 }, { "epoch": 0.045035352663293, "grad_norm": 2.3250725269317627, "learning_rate": 2.998221829642693e-05, "loss": 0.1798, "step": 2543 }, { "epoch": 0.04505306220032143, "grad_norm": 1.8851033449172974, "learning_rate": 2.9982176391476832e-05, "loss": 0.2515, "step": 2544 }, { "epoch": 0.04507077173734986, "grad_norm": 2.0241103172302246, "learning_rate": 2.9982134437236885e-05, "loss": 0.1548, "step": 2545 }, { "epoch": 0.045088481274378284, "grad_norm": 2.1660311222076416, "learning_rate": 2.9982092433707232e-05, "loss": 0.1724, "step": 2546 }, { "epoch": 0.04510619081140671, "grad_norm": 1.786618709564209, "learning_rate": 2.998205038088801e-05, "loss": 0.1525, "step": 2547 }, { "epoch": 0.04512390034843514, "grad_norm": 1.9267420768737793, "learning_rate": 2.9982008278779357e-05, "loss": 0.1828, "step": 2548 }, { "epoch": 0.04514160988546357, "grad_norm": 1.427704930305481, "learning_rate": 2.998196612738141e-05, "loss": 0.1457, "step": 2549 }, { "epoch": 0.045159319422491996, "grad_norm": 1.7700005769729614, "learning_rate": 2.9981923926694314e-05, "loss": 0.1719, "step": 2550 }, { "epoch": 0.04517702895952042, "grad_norm": 1.4404723644256592, "learning_rate": 2.9981881676718203e-05, "loss": 0.1366, "step": 2551 }, { "epoch": 0.045194738496548856, "grad_norm": 3.2025582790374756, "learning_rate": 2.9981839377453218e-05, "loss": 0.2196, "step": 2552 }, { "epoch": 0.04521244803357728, "grad_norm": 1.7729920148849487, "learning_rate": 2.9981797028899494e-05, "loss": 0.1638, "step": 2553 }, { "epoch": 0.04523015757060571, "grad_norm": 1.803598165512085, "learning_rate": 2.9981754631057173e-05, "loss": 0.1601, "step": 2554 }, { "epoch": 0.04524786710763414, "grad_norm": 1.5210049152374268, "learning_rate": 2.9981712183926395e-05, "loss": 0.1671, "step": 2555 }, { "epoch": 0.04526557664466257, "grad_norm": 1.9993326663970947, "learning_rate": 2.99816696875073e-05, "loss": 0.1219, "step": 2556 }, { "epoch": 0.045283286181690995, "grad_norm": 1.8954236507415771, "learning_rate": 2.9981627141800025e-05, "loss": 0.1925, "step": 2557 }, { "epoch": 0.04530099571871942, "grad_norm": 1.807051181793213, "learning_rate": 2.9981584546804715e-05, "loss": 0.1582, "step": 2558 }, { "epoch": 0.045318705255747854, "grad_norm": 1.5390077829360962, "learning_rate": 2.9981541902521506e-05, "loss": 0.1829, "step": 2559 }, { "epoch": 0.04533641479277628, "grad_norm": 1.7588456869125366, "learning_rate": 2.9981499208950534e-05, "loss": 0.2354, "step": 2560 }, { "epoch": 0.04535412432980471, "grad_norm": 1.8480587005615234, "learning_rate": 2.998145646609195e-05, "loss": 0.2423, "step": 2561 }, { "epoch": 0.04537183386683313, "grad_norm": 1.3256620168685913, "learning_rate": 2.998141367394589e-05, "loss": 0.1459, "step": 2562 }, { "epoch": 0.04538954340386157, "grad_norm": 2.2453765869140625, "learning_rate": 2.9981370832512494e-05, "loss": 0.2112, "step": 2563 }, { "epoch": 0.04540725294088999, "grad_norm": 1.8947352170944214, "learning_rate": 2.9981327941791906e-05, "loss": 0.1896, "step": 2564 }, { "epoch": 0.04542496247791842, "grad_norm": 1.7132415771484375, "learning_rate": 2.9981285001784257e-05, "loss": 0.1644, "step": 2565 }, { "epoch": 0.04544267201494685, "grad_norm": 1.4057778120040894, "learning_rate": 2.9981242012489702e-05, "loss": 0.1529, "step": 2566 }, { "epoch": 0.04546038155197528, "grad_norm": 2.3252487182617188, "learning_rate": 2.9981198973908374e-05, "loss": 0.1906, "step": 2567 }, { "epoch": 0.045478091089003705, "grad_norm": 1.9252400398254395, "learning_rate": 2.9981155886040414e-05, "loss": 0.1885, "step": 2568 }, { "epoch": 0.04549580062603213, "grad_norm": 1.9103201627731323, "learning_rate": 2.9981112748885972e-05, "loss": 0.1372, "step": 2569 }, { "epoch": 0.045513510163060565, "grad_norm": 2.1774187088012695, "learning_rate": 2.998106956244518e-05, "loss": 0.201, "step": 2570 }, { "epoch": 0.04553121970008899, "grad_norm": 2.225958824157715, "learning_rate": 2.9981026326718186e-05, "loss": 0.2498, "step": 2571 }, { "epoch": 0.04554892923711742, "grad_norm": 2.1985089778900146, "learning_rate": 2.9980983041705127e-05, "loss": 0.2426, "step": 2572 }, { "epoch": 0.045566638774145844, "grad_norm": 1.3467127084732056, "learning_rate": 2.9980939707406155e-05, "loss": 0.1847, "step": 2573 }, { "epoch": 0.04558434831117428, "grad_norm": 1.6844978332519531, "learning_rate": 2.9980896323821402e-05, "loss": 0.2154, "step": 2574 }, { "epoch": 0.045602057848202704, "grad_norm": 2.0126919746398926, "learning_rate": 2.998085289095102e-05, "loss": 0.1568, "step": 2575 }, { "epoch": 0.04561976738523113, "grad_norm": 1.5756232738494873, "learning_rate": 2.9980809408795143e-05, "loss": 0.1662, "step": 2576 }, { "epoch": 0.04563747692225956, "grad_norm": 2.104489803314209, "learning_rate": 2.9980765877353918e-05, "loss": 0.1744, "step": 2577 }, { "epoch": 0.04565518645928799, "grad_norm": 1.4279826879501343, "learning_rate": 2.998072229662749e-05, "loss": 0.1913, "step": 2578 }, { "epoch": 0.045672895996316416, "grad_norm": 2.263439416885376, "learning_rate": 2.9980678666616002e-05, "loss": 0.1892, "step": 2579 }, { "epoch": 0.04569060553334484, "grad_norm": 1.6031067371368408, "learning_rate": 2.9980634987319596e-05, "loss": 0.1954, "step": 2580 }, { "epoch": 0.045708315070373276, "grad_norm": 1.6753218173980713, "learning_rate": 2.9980591258738415e-05, "loss": 0.155, "step": 2581 }, { "epoch": 0.0457260246074017, "grad_norm": 1.732743501663208, "learning_rate": 2.9980547480872606e-05, "loss": 0.1411, "step": 2582 }, { "epoch": 0.04574373414443013, "grad_norm": 2.0850260257720947, "learning_rate": 2.998050365372231e-05, "loss": 0.1662, "step": 2583 }, { "epoch": 0.045761443681458555, "grad_norm": 1.902511477470398, "learning_rate": 2.9980459777287668e-05, "loss": 0.1306, "step": 2584 }, { "epoch": 0.04577915321848699, "grad_norm": 2.0567078590393066, "learning_rate": 2.9980415851568832e-05, "loss": 0.1696, "step": 2585 }, { "epoch": 0.045796862755515415, "grad_norm": 1.8677716255187988, "learning_rate": 2.9980371876565943e-05, "loss": 0.212, "step": 2586 }, { "epoch": 0.04581457229254384, "grad_norm": 1.43766188621521, "learning_rate": 2.998032785227915e-05, "loss": 0.1429, "step": 2587 }, { "epoch": 0.04583228182957227, "grad_norm": 1.40674889087677, "learning_rate": 2.9980283778708588e-05, "loss": 0.2257, "step": 2588 }, { "epoch": 0.0458499913666007, "grad_norm": 1.762457013130188, "learning_rate": 2.9980239655854404e-05, "loss": 0.1621, "step": 2589 }, { "epoch": 0.04586770090362913, "grad_norm": 2.1408960819244385, "learning_rate": 2.9980195483716753e-05, "loss": 0.2008, "step": 2590 }, { "epoch": 0.045885410440657554, "grad_norm": 2.562091112136841, "learning_rate": 2.998015126229577e-05, "loss": 0.16, "step": 2591 }, { "epoch": 0.04590311997768598, "grad_norm": 2.7890548706054688, "learning_rate": 2.998010699159161e-05, "loss": 0.2229, "step": 2592 }, { "epoch": 0.04592082951471441, "grad_norm": 1.7665079832077026, "learning_rate": 2.9980062671604408e-05, "loss": 0.1666, "step": 2593 }, { "epoch": 0.04593853905174284, "grad_norm": 1.8696693181991577, "learning_rate": 2.9980018302334316e-05, "loss": 0.1566, "step": 2594 }, { "epoch": 0.045956248588771266, "grad_norm": 1.333473563194275, "learning_rate": 2.9979973883781482e-05, "loss": 0.138, "step": 2595 }, { "epoch": 0.0459739581257997, "grad_norm": 2.2081265449523926, "learning_rate": 2.997992941594605e-05, "loss": 0.1662, "step": 2596 }, { "epoch": 0.045991667662828126, "grad_norm": 1.9787554740905762, "learning_rate": 2.9979884898828162e-05, "loss": 0.2232, "step": 2597 }, { "epoch": 0.04600937719985655, "grad_norm": 2.236600875854492, "learning_rate": 2.9979840332427967e-05, "loss": 0.2001, "step": 2598 }, { "epoch": 0.04602708673688498, "grad_norm": 1.3712018728256226, "learning_rate": 2.9979795716745612e-05, "loss": 0.1763, "step": 2599 }, { "epoch": 0.04604479627391341, "grad_norm": 1.101959466934204, "learning_rate": 2.997975105178125e-05, "loss": 0.1442, "step": 2600 }, { "epoch": 0.04606250581094184, "grad_norm": 1.1772494316101074, "learning_rate": 2.9979706337535016e-05, "loss": 0.1409, "step": 2601 }, { "epoch": 0.046080215347970264, "grad_norm": 1.3677902221679688, "learning_rate": 2.9979661574007066e-05, "loss": 0.2024, "step": 2602 }, { "epoch": 0.04609792488499869, "grad_norm": 1.8728842735290527, "learning_rate": 2.9979616761197543e-05, "loss": 0.2476, "step": 2603 }, { "epoch": 0.046115634422027124, "grad_norm": 2.3003931045532227, "learning_rate": 2.99795718991066e-05, "loss": 0.1872, "step": 2604 }, { "epoch": 0.04613334395905555, "grad_norm": 1.7872792482376099, "learning_rate": 2.9979526987734374e-05, "loss": 0.181, "step": 2605 }, { "epoch": 0.04615105349608398, "grad_norm": 1.818526029586792, "learning_rate": 2.9979482027081023e-05, "loss": 0.2042, "step": 2606 }, { "epoch": 0.04616876303311241, "grad_norm": 1.8602324724197388, "learning_rate": 2.997943701714669e-05, "loss": 0.1859, "step": 2607 }, { "epoch": 0.04618647257014084, "grad_norm": 2.0682966709136963, "learning_rate": 2.9979391957931527e-05, "loss": 0.2036, "step": 2608 }, { "epoch": 0.04620418210716926, "grad_norm": 1.9991354942321777, "learning_rate": 2.9979346849435677e-05, "loss": 0.1646, "step": 2609 }, { "epoch": 0.04622189164419769, "grad_norm": 2.0298221111297607, "learning_rate": 2.997930169165929e-05, "loss": 0.1791, "step": 2610 }, { "epoch": 0.04623960118122612, "grad_norm": 1.915655493736267, "learning_rate": 2.997925648460252e-05, "loss": 0.1502, "step": 2611 }, { "epoch": 0.04625731071825455, "grad_norm": 1.6928459405899048, "learning_rate": 2.997921122826551e-05, "loss": 0.1693, "step": 2612 }, { "epoch": 0.046275020255282975, "grad_norm": 1.9359744787216187, "learning_rate": 2.9979165922648405e-05, "loss": 0.1519, "step": 2613 }, { "epoch": 0.0462927297923114, "grad_norm": 1.3229843378067017, "learning_rate": 2.9979120567751362e-05, "loss": 0.1329, "step": 2614 }, { "epoch": 0.046310439329339835, "grad_norm": 1.8399903774261475, "learning_rate": 2.9979075163574528e-05, "loss": 0.2111, "step": 2615 }, { "epoch": 0.04632814886636826, "grad_norm": 1.825600266456604, "learning_rate": 2.9979029710118052e-05, "loss": 0.1825, "step": 2616 }, { "epoch": 0.04634585840339669, "grad_norm": 1.6459757089614868, "learning_rate": 2.9978984207382083e-05, "loss": 0.169, "step": 2617 }, { "epoch": 0.046363567940425114, "grad_norm": 1.1455320119857788, "learning_rate": 2.997893865536677e-05, "loss": 0.1579, "step": 2618 }, { "epoch": 0.04638127747745355, "grad_norm": 1.5305365324020386, "learning_rate": 2.9978893054072265e-05, "loss": 0.1752, "step": 2619 }, { "epoch": 0.046398987014481974, "grad_norm": 1.6092201471328735, "learning_rate": 2.9978847403498717e-05, "loss": 0.1468, "step": 2620 }, { "epoch": 0.0464166965515104, "grad_norm": 1.7624555826187134, "learning_rate": 2.997880170364628e-05, "loss": 0.1477, "step": 2621 }, { "epoch": 0.046434406088538834, "grad_norm": 1.8936991691589355, "learning_rate": 2.997875595451509e-05, "loss": 0.1819, "step": 2622 }, { "epoch": 0.04645211562556726, "grad_norm": 2.3372669219970703, "learning_rate": 2.9978710156105318e-05, "loss": 0.229, "step": 2623 }, { "epoch": 0.046469825162595686, "grad_norm": 1.6778687238693237, "learning_rate": 2.99786643084171e-05, "loss": 0.2136, "step": 2624 }, { "epoch": 0.04648753469962411, "grad_norm": 2.0520973205566406, "learning_rate": 2.997861841145059e-05, "loss": 0.2278, "step": 2625 }, { "epoch": 0.046505244236652546, "grad_norm": 1.670636773109436, "learning_rate": 2.9978572465205942e-05, "loss": 0.1798, "step": 2626 }, { "epoch": 0.04652295377368097, "grad_norm": 2.127058506011963, "learning_rate": 2.997852646968331e-05, "loss": 0.2388, "step": 2627 }, { "epoch": 0.0465406633107094, "grad_norm": 1.5874862670898438, "learning_rate": 2.9978480424882836e-05, "loss": 0.1728, "step": 2628 }, { "epoch": 0.046558372847737825, "grad_norm": 1.7931638956069946, "learning_rate": 2.997843433080468e-05, "loss": 0.1818, "step": 2629 }, { "epoch": 0.04657608238476626, "grad_norm": 1.8319926261901855, "learning_rate": 2.9978388187448985e-05, "loss": 0.2004, "step": 2630 }, { "epoch": 0.046593791921794685, "grad_norm": 1.673984169960022, "learning_rate": 2.997834199481591e-05, "loss": 0.1373, "step": 2631 }, { "epoch": 0.04661150145882311, "grad_norm": 1.9390535354614258, "learning_rate": 2.9978295752905605e-05, "loss": 0.2019, "step": 2632 }, { "epoch": 0.046629210995851544, "grad_norm": 1.5962989330291748, "learning_rate": 2.9978249461718223e-05, "loss": 0.1643, "step": 2633 }, { "epoch": 0.04664692053287997, "grad_norm": 2.1097774505615234, "learning_rate": 2.9978203121253914e-05, "loss": 0.1801, "step": 2634 }, { "epoch": 0.0466646300699084, "grad_norm": 1.557174563407898, "learning_rate": 2.9978156731512832e-05, "loss": 0.1782, "step": 2635 }, { "epoch": 0.046682339606936823, "grad_norm": 1.7956620454788208, "learning_rate": 2.9978110292495128e-05, "loss": 0.1261, "step": 2636 }, { "epoch": 0.04670004914396526, "grad_norm": 2.438577890396118, "learning_rate": 2.997806380420096e-05, "loss": 0.2034, "step": 2637 }, { "epoch": 0.04671775868099368, "grad_norm": 1.9179133176803589, "learning_rate": 2.997801726663047e-05, "loss": 0.1164, "step": 2638 }, { "epoch": 0.04673546821802211, "grad_norm": 1.9359207153320312, "learning_rate": 2.9977970679783827e-05, "loss": 0.2175, "step": 2639 }, { "epoch": 0.046753177755050536, "grad_norm": 1.9558801651000977, "learning_rate": 2.997792404366117e-05, "loss": 0.1728, "step": 2640 }, { "epoch": 0.04677088729207897, "grad_norm": 1.7636207342147827, "learning_rate": 2.9977877358262658e-05, "loss": 0.1559, "step": 2641 }, { "epoch": 0.046788596829107396, "grad_norm": 1.594603419303894, "learning_rate": 2.9977830623588442e-05, "loss": 0.143, "step": 2642 }, { "epoch": 0.04680630636613582, "grad_norm": 1.6243133544921875, "learning_rate": 2.9977783839638685e-05, "loss": 0.1691, "step": 2643 }, { "epoch": 0.04682401590316425, "grad_norm": 1.6123768091201782, "learning_rate": 2.9977737006413527e-05, "loss": 0.2142, "step": 2644 }, { "epoch": 0.04684172544019268, "grad_norm": 3.6376113891601562, "learning_rate": 2.9977690123913133e-05, "loss": 0.14, "step": 2645 }, { "epoch": 0.04685943497722111, "grad_norm": 2.9294261932373047, "learning_rate": 2.9977643192137654e-05, "loss": 0.1782, "step": 2646 }, { "epoch": 0.046877144514249534, "grad_norm": 1.6237003803253174, "learning_rate": 2.9977596211087243e-05, "loss": 0.1844, "step": 2647 }, { "epoch": 0.04689485405127797, "grad_norm": 1.8089818954467773, "learning_rate": 2.9977549180762055e-05, "loss": 0.1511, "step": 2648 }, { "epoch": 0.046912563588306394, "grad_norm": 2.155940532684326, "learning_rate": 2.9977502101162244e-05, "loss": 0.1736, "step": 2649 }, { "epoch": 0.04693027312533482, "grad_norm": 2.2263336181640625, "learning_rate": 2.9977454972287968e-05, "loss": 0.2693, "step": 2650 }, { "epoch": 0.04694798266236325, "grad_norm": 1.9197582006454468, "learning_rate": 2.997740779413938e-05, "loss": 0.1759, "step": 2651 }, { "epoch": 0.04696569219939168, "grad_norm": 1.5257139205932617, "learning_rate": 2.9977360566716635e-05, "loss": 0.1504, "step": 2652 }, { "epoch": 0.046983401736420106, "grad_norm": 1.7561298608779907, "learning_rate": 2.9977313290019887e-05, "loss": 0.1941, "step": 2653 }, { "epoch": 0.04700111127344853, "grad_norm": 1.7701205015182495, "learning_rate": 2.9977265964049295e-05, "loss": 0.1499, "step": 2654 }, { "epoch": 0.04701882081047696, "grad_norm": 1.9801201820373535, "learning_rate": 2.9977218588805013e-05, "loss": 0.1664, "step": 2655 }, { "epoch": 0.04703653034750539, "grad_norm": 2.101966619491577, "learning_rate": 2.9977171164287192e-05, "loss": 0.1826, "step": 2656 }, { "epoch": 0.04705423988453382, "grad_norm": 1.9627333879470825, "learning_rate": 2.9977123690495998e-05, "loss": 0.1624, "step": 2657 }, { "epoch": 0.047071949421562245, "grad_norm": 1.5219312906265259, "learning_rate": 2.997707616743158e-05, "loss": 0.1651, "step": 2658 }, { "epoch": 0.04708965895859067, "grad_norm": 1.9317833185195923, "learning_rate": 2.9977028595094094e-05, "loss": 0.237, "step": 2659 }, { "epoch": 0.047107368495619105, "grad_norm": 2.787123441696167, "learning_rate": 2.9976980973483704e-05, "loss": 0.208, "step": 2660 }, { "epoch": 0.04712507803264753, "grad_norm": 2.2434990406036377, "learning_rate": 2.997693330260056e-05, "loss": 0.1898, "step": 2661 }, { "epoch": 0.04714278756967596, "grad_norm": 1.6523746252059937, "learning_rate": 2.9976885582444818e-05, "loss": 0.1944, "step": 2662 }, { "epoch": 0.04716049710670439, "grad_norm": 2.303032636642456, "learning_rate": 2.997683781301664e-05, "loss": 0.1732, "step": 2663 }, { "epoch": 0.04717820664373282, "grad_norm": 1.8467382192611694, "learning_rate": 2.997678999431618e-05, "loss": 0.201, "step": 2664 }, { "epoch": 0.047195916180761244, "grad_norm": 2.1439130306243896, "learning_rate": 2.9976742126343592e-05, "loss": 0.1669, "step": 2665 }, { "epoch": 0.04721362571778967, "grad_norm": 1.521218180656433, "learning_rate": 2.997669420909904e-05, "loss": 0.1429, "step": 2666 }, { "epoch": 0.0472313352548181, "grad_norm": 1.5563980340957642, "learning_rate": 2.9976646242582677e-05, "loss": 0.1978, "step": 2667 }, { "epoch": 0.04724904479184653, "grad_norm": 1.854658842086792, "learning_rate": 2.9976598226794666e-05, "loss": 0.1921, "step": 2668 }, { "epoch": 0.047266754328874956, "grad_norm": 1.7466328144073486, "learning_rate": 2.9976550161735158e-05, "loss": 0.2052, "step": 2669 }, { "epoch": 0.04728446386590338, "grad_norm": 1.5880041122436523, "learning_rate": 2.9976502047404316e-05, "loss": 0.1393, "step": 2670 }, { "epoch": 0.047302173402931816, "grad_norm": 2.15632700920105, "learning_rate": 2.99764538838023e-05, "loss": 0.1711, "step": 2671 }, { "epoch": 0.04731988293996024, "grad_norm": 3.3156886100769043, "learning_rate": 2.9976405670929258e-05, "loss": 0.1685, "step": 2672 }, { "epoch": 0.04733759247698867, "grad_norm": 1.7722173929214478, "learning_rate": 2.9976357408785363e-05, "loss": 0.1658, "step": 2673 }, { "epoch": 0.0473553020140171, "grad_norm": 1.8793518543243408, "learning_rate": 2.9976309097370768e-05, "loss": 0.1756, "step": 2674 }, { "epoch": 0.04737301155104553, "grad_norm": 1.9217017889022827, "learning_rate": 2.9976260736685625e-05, "loss": 0.1825, "step": 2675 }, { "epoch": 0.047390721088073955, "grad_norm": 1.538160800933838, "learning_rate": 2.99762123267301e-05, "loss": 0.1735, "step": 2676 }, { "epoch": 0.04740843062510238, "grad_norm": 1.5932650566101074, "learning_rate": 2.9976163867504356e-05, "loss": 0.1874, "step": 2677 }, { "epoch": 0.047426140162130814, "grad_norm": 2.5715229511260986, "learning_rate": 2.9976115359008542e-05, "loss": 0.1842, "step": 2678 }, { "epoch": 0.04744384969915924, "grad_norm": 2.0285801887512207, "learning_rate": 2.9976066801242827e-05, "loss": 0.1687, "step": 2679 }, { "epoch": 0.04746155923618767, "grad_norm": 2.0558993816375732, "learning_rate": 2.9976018194207367e-05, "loss": 0.1939, "step": 2680 }, { "epoch": 0.04747926877321609, "grad_norm": 2.2264344692230225, "learning_rate": 2.9975969537902318e-05, "loss": 0.1764, "step": 2681 }, { "epoch": 0.04749697831024453, "grad_norm": 1.8748157024383545, "learning_rate": 2.997592083232785e-05, "loss": 0.1604, "step": 2682 }, { "epoch": 0.04751468784727295, "grad_norm": 3.0021748542785645, "learning_rate": 2.9975872077484113e-05, "loss": 0.131, "step": 2683 }, { "epoch": 0.04753239738430138, "grad_norm": 1.8173341751098633, "learning_rate": 2.9975823273371273e-05, "loss": 0.1653, "step": 2684 }, { "epoch": 0.047550106921329806, "grad_norm": 2.054471731185913, "learning_rate": 2.997577441998949e-05, "loss": 0.1946, "step": 2685 }, { "epoch": 0.04756781645835824, "grad_norm": 2.124411106109619, "learning_rate": 2.997572551733892e-05, "loss": 0.2186, "step": 2686 }, { "epoch": 0.047585525995386665, "grad_norm": 2.272366523742676, "learning_rate": 2.9975676565419733e-05, "loss": 0.1939, "step": 2687 }, { "epoch": 0.04760323553241509, "grad_norm": 1.5025014877319336, "learning_rate": 2.9975627564232077e-05, "loss": 0.1589, "step": 2688 }, { "epoch": 0.047620945069443525, "grad_norm": 1.7659411430358887, "learning_rate": 2.9975578513776126e-05, "loss": 0.1588, "step": 2689 }, { "epoch": 0.04763865460647195, "grad_norm": 2.192265033721924, "learning_rate": 2.997552941405204e-05, "loss": 0.225, "step": 2690 }, { "epoch": 0.04765636414350038, "grad_norm": 1.8249107599258423, "learning_rate": 2.997548026505997e-05, "loss": 0.1928, "step": 2691 }, { "epoch": 0.047674073680528804, "grad_norm": 2.7225987911224365, "learning_rate": 2.997543106680009e-05, "loss": 0.1836, "step": 2692 }, { "epoch": 0.04769178321755724, "grad_norm": 2.188990592956543, "learning_rate": 2.9975381819272556e-05, "loss": 0.1437, "step": 2693 }, { "epoch": 0.047709492754585664, "grad_norm": 1.976647138595581, "learning_rate": 2.9975332522477527e-05, "loss": 0.183, "step": 2694 }, { "epoch": 0.04772720229161409, "grad_norm": 1.4039450883865356, "learning_rate": 2.997528317641517e-05, "loss": 0.1344, "step": 2695 }, { "epoch": 0.04774491182864252, "grad_norm": 2.2157163619995117, "learning_rate": 2.9975233781085646e-05, "loss": 0.2258, "step": 2696 }, { "epoch": 0.04776262136567095, "grad_norm": 2.681471347808838, "learning_rate": 2.997518433648912e-05, "loss": 0.2001, "step": 2697 }, { "epoch": 0.047780330902699376, "grad_norm": 2.4046380519866943, "learning_rate": 2.997513484262575e-05, "loss": 0.1936, "step": 2698 }, { "epoch": 0.0477980404397278, "grad_norm": 1.7457486391067505, "learning_rate": 2.99750852994957e-05, "loss": 0.1931, "step": 2699 }, { "epoch": 0.047815749976756236, "grad_norm": 1.3098053932189941, "learning_rate": 2.9975035707099133e-05, "loss": 0.1359, "step": 2700 }, { "epoch": 0.04783345951378466, "grad_norm": 1.5603445768356323, "learning_rate": 2.9974986065436218e-05, "loss": 0.1525, "step": 2701 }, { "epoch": 0.04785116905081309, "grad_norm": 1.9239853620529175, "learning_rate": 2.997493637450711e-05, "loss": 0.0963, "step": 2702 }, { "epoch": 0.047868878587841515, "grad_norm": 1.864026427268982, "learning_rate": 2.997488663431198e-05, "loss": 0.1802, "step": 2703 }, { "epoch": 0.04788658812486995, "grad_norm": 2.456207513809204, "learning_rate": 2.9974836844850982e-05, "loss": 0.1862, "step": 2704 }, { "epoch": 0.047904297661898375, "grad_norm": 1.9736324548721313, "learning_rate": 2.9974787006124286e-05, "loss": 0.1763, "step": 2705 }, { "epoch": 0.0479220071989268, "grad_norm": 2.265176296234131, "learning_rate": 2.997473711813206e-05, "loss": 0.22, "step": 2706 }, { "epoch": 0.04793971673595523, "grad_norm": 1.261269211769104, "learning_rate": 2.997468718087446e-05, "loss": 0.152, "step": 2707 }, { "epoch": 0.04795742627298366, "grad_norm": 1.6695139408111572, "learning_rate": 2.9974637194351653e-05, "loss": 0.1559, "step": 2708 }, { "epoch": 0.04797513581001209, "grad_norm": 2.4198522567749023, "learning_rate": 2.9974587158563806e-05, "loss": 0.2048, "step": 2709 }, { "epoch": 0.047992845347040514, "grad_norm": 1.2989453077316284, "learning_rate": 2.9974537073511083e-05, "loss": 0.1525, "step": 2710 }, { "epoch": 0.04801055488406894, "grad_norm": 2.0481133460998535, "learning_rate": 2.9974486939193643e-05, "loss": 0.1728, "step": 2711 }, { "epoch": 0.04802826442109737, "grad_norm": 2.0454013347625732, "learning_rate": 2.9974436755611662e-05, "loss": 0.2357, "step": 2712 }, { "epoch": 0.0480459739581258, "grad_norm": 1.7982118129730225, "learning_rate": 2.997438652276529e-05, "loss": 0.1478, "step": 2713 }, { "epoch": 0.048063683495154226, "grad_norm": 1.5058221817016602, "learning_rate": 2.997433624065471e-05, "loss": 0.2021, "step": 2714 }, { "epoch": 0.04808139303218266, "grad_norm": 2.0847978591918945, "learning_rate": 2.9974285909280075e-05, "loss": 0.169, "step": 2715 }, { "epoch": 0.048099102569211086, "grad_norm": 1.6160825490951538, "learning_rate": 2.9974235528641554e-05, "loss": 0.2068, "step": 2716 }, { "epoch": 0.04811681210623951, "grad_norm": 5.025996685028076, "learning_rate": 2.9974185098739312e-05, "loss": 0.182, "step": 2717 }, { "epoch": 0.04813452164326794, "grad_norm": 1.958884596824646, "learning_rate": 2.9974134619573513e-05, "loss": 0.1795, "step": 2718 }, { "epoch": 0.04815223118029637, "grad_norm": 2.676203489303589, "learning_rate": 2.997408409114433e-05, "loss": 0.2309, "step": 2719 }, { "epoch": 0.0481699407173248, "grad_norm": 1.7949854135513306, "learning_rate": 2.9974033513451923e-05, "loss": 0.1798, "step": 2720 }, { "epoch": 0.048187650254353225, "grad_norm": 2.0347161293029785, "learning_rate": 2.9973982886496462e-05, "loss": 0.1868, "step": 2721 }, { "epoch": 0.04820535979138165, "grad_norm": 2.9469785690307617, "learning_rate": 2.9973932210278106e-05, "loss": 0.1537, "step": 2722 }, { "epoch": 0.048223069328410084, "grad_norm": 1.8717178106307983, "learning_rate": 2.9973881484797033e-05, "loss": 0.1543, "step": 2723 }, { "epoch": 0.04824077886543851, "grad_norm": 1.9452978372573853, "learning_rate": 2.99738307100534e-05, "loss": 0.1114, "step": 2724 }, { "epoch": 0.04825848840246694, "grad_norm": 1.4051529169082642, "learning_rate": 2.9973779886047387e-05, "loss": 0.1377, "step": 2725 }, { "epoch": 0.04827619793949536, "grad_norm": 1.7389246225357056, "learning_rate": 2.997372901277915e-05, "loss": 0.2193, "step": 2726 }, { "epoch": 0.0482939074765238, "grad_norm": 1.9566961526870728, "learning_rate": 2.9973678090248854e-05, "loss": 0.1917, "step": 2727 }, { "epoch": 0.04831161701355222, "grad_norm": 1.929262399673462, "learning_rate": 2.9973627118456674e-05, "loss": 0.163, "step": 2728 }, { "epoch": 0.04832932655058065, "grad_norm": 2.259004831314087, "learning_rate": 2.9973576097402773e-05, "loss": 0.1837, "step": 2729 }, { "epoch": 0.04834703608760908, "grad_norm": 2.184138774871826, "learning_rate": 2.9973525027087323e-05, "loss": 0.1883, "step": 2730 }, { "epoch": 0.04836474562463751, "grad_norm": 1.7252370119094849, "learning_rate": 2.9973473907510493e-05, "loss": 0.1908, "step": 2731 }, { "epoch": 0.048382455161665935, "grad_norm": 2.299853801727295, "learning_rate": 2.9973422738672446e-05, "loss": 0.1498, "step": 2732 }, { "epoch": 0.04840016469869436, "grad_norm": 3.4859960079193115, "learning_rate": 2.997337152057335e-05, "loss": 0.1653, "step": 2733 }, { "epoch": 0.048417874235722795, "grad_norm": 2.885972023010254, "learning_rate": 2.997332025321338e-05, "loss": 0.1735, "step": 2734 }, { "epoch": 0.04843558377275122, "grad_norm": 1.8205137252807617, "learning_rate": 2.99732689365927e-05, "loss": 0.1568, "step": 2735 }, { "epoch": 0.04845329330977965, "grad_norm": 1.8949774503707886, "learning_rate": 2.9973217570711477e-05, "loss": 0.1809, "step": 2736 }, { "epoch": 0.048471002846808074, "grad_norm": 2.901909828186035, "learning_rate": 2.9973166155569886e-05, "loss": 0.2491, "step": 2737 }, { "epoch": 0.04848871238383651, "grad_norm": 2.1068601608276367, "learning_rate": 2.9973114691168095e-05, "loss": 0.1914, "step": 2738 }, { "epoch": 0.048506421920864934, "grad_norm": 1.3148609399795532, "learning_rate": 2.9973063177506262e-05, "loss": 0.1918, "step": 2739 }, { "epoch": 0.04852413145789336, "grad_norm": 3.1413233280181885, "learning_rate": 2.9973011614584575e-05, "loss": 0.1975, "step": 2740 }, { "epoch": 0.048541840994921794, "grad_norm": 3.720099687576294, "learning_rate": 2.9972960002403188e-05, "loss": 0.1774, "step": 2741 }, { "epoch": 0.04855955053195022, "grad_norm": 2.678842067718506, "learning_rate": 2.9972908340962285e-05, "loss": 0.2327, "step": 2742 }, { "epoch": 0.048577260068978646, "grad_norm": 2.2587802410125732, "learning_rate": 2.9972856630262022e-05, "loss": 0.2552, "step": 2743 }, { "epoch": 0.04859496960600707, "grad_norm": 2.0413432121276855, "learning_rate": 2.9972804870302575e-05, "loss": 0.1858, "step": 2744 }, { "epoch": 0.048612679143035506, "grad_norm": 1.810652256011963, "learning_rate": 2.9972753061084117e-05, "loss": 0.2349, "step": 2745 }, { "epoch": 0.04863038868006393, "grad_norm": 1.5955208539962769, "learning_rate": 2.997270120260681e-05, "loss": 0.1911, "step": 2746 }, { "epoch": 0.04864809821709236, "grad_norm": 1.8988699913024902, "learning_rate": 2.9972649294870833e-05, "loss": 0.218, "step": 2747 }, { "epoch": 0.048665807754120785, "grad_norm": 2.3670361042022705, "learning_rate": 2.997259733787636e-05, "loss": 0.1711, "step": 2748 }, { "epoch": 0.04868351729114922, "grad_norm": 1.604511022567749, "learning_rate": 2.9972545331623548e-05, "loss": 0.2022, "step": 2749 }, { "epoch": 0.048701226828177645, "grad_norm": 2.1380598545074463, "learning_rate": 2.997249327611258e-05, "loss": 0.1871, "step": 2750 }, { "epoch": 0.04871893636520607, "grad_norm": 1.7836476564407349, "learning_rate": 2.9972441171343625e-05, "loss": 0.1764, "step": 2751 }, { "epoch": 0.0487366459022345, "grad_norm": 2.5099802017211914, "learning_rate": 2.997238901731685e-05, "loss": 0.2584, "step": 2752 }, { "epoch": 0.04875435543926293, "grad_norm": 1.3345853090286255, "learning_rate": 2.997233681403243e-05, "loss": 0.125, "step": 2753 }, { "epoch": 0.04877206497629136, "grad_norm": 1.6904387474060059, "learning_rate": 2.997228456149054e-05, "loss": 0.2116, "step": 2754 }, { "epoch": 0.048789774513319784, "grad_norm": 1.892164707183838, "learning_rate": 2.9972232259691345e-05, "loss": 0.1543, "step": 2755 }, { "epoch": 0.04880748405034822, "grad_norm": 2.2566800117492676, "learning_rate": 2.997217990863502e-05, "loss": 0.1532, "step": 2756 }, { "epoch": 0.04882519358737664, "grad_norm": 1.9173139333724976, "learning_rate": 2.997212750832174e-05, "loss": 0.241, "step": 2757 }, { "epoch": 0.04884290312440507, "grad_norm": 1.5949126482009888, "learning_rate": 2.997207505875167e-05, "loss": 0.1376, "step": 2758 }, { "epoch": 0.048860612661433496, "grad_norm": 2.104368209838867, "learning_rate": 2.997202255992499e-05, "loss": 0.1822, "step": 2759 }, { "epoch": 0.04887832219846193, "grad_norm": 2.94309663772583, "learning_rate": 2.997197001184187e-05, "loss": 0.1856, "step": 2760 }, { "epoch": 0.048896031735490356, "grad_norm": 2.1224160194396973, "learning_rate": 2.9971917414502484e-05, "loss": 0.1695, "step": 2761 }, { "epoch": 0.04891374127251878, "grad_norm": 1.7142034769058228, "learning_rate": 2.9971864767907e-05, "loss": 0.1557, "step": 2762 }, { "epoch": 0.04893145080954721, "grad_norm": 1.2828295230865479, "learning_rate": 2.99718120720556e-05, "loss": 0.1326, "step": 2763 }, { "epoch": 0.04894916034657564, "grad_norm": 3.3007254600524902, "learning_rate": 2.997175932694845e-05, "loss": 0.1638, "step": 2764 }, { "epoch": 0.04896686988360407, "grad_norm": 1.6467818021774292, "learning_rate": 2.9971706532585728e-05, "loss": 0.1338, "step": 2765 }, { "epoch": 0.048984579420632494, "grad_norm": 1.8504358530044556, "learning_rate": 2.9971653688967605e-05, "loss": 0.151, "step": 2766 }, { "epoch": 0.04900228895766093, "grad_norm": 1.8478623628616333, "learning_rate": 2.997160079609426e-05, "loss": 0.1431, "step": 2767 }, { "epoch": 0.049019998494689354, "grad_norm": 1.3727312088012695, "learning_rate": 2.9971547853965856e-05, "loss": 0.1571, "step": 2768 }, { "epoch": 0.04903770803171778, "grad_norm": 2.0648491382598877, "learning_rate": 2.9971494862582574e-05, "loss": 0.1592, "step": 2769 }, { "epoch": 0.04905541756874621, "grad_norm": 1.8921353816986084, "learning_rate": 2.997144182194459e-05, "loss": 0.1794, "step": 2770 }, { "epoch": 0.04907312710577464, "grad_norm": 2.770516872406006, "learning_rate": 2.997138873205208e-05, "loss": 0.1909, "step": 2771 }, { "epoch": 0.049090836642803067, "grad_norm": 1.3887972831726074, "learning_rate": 2.997133559290521e-05, "loss": 0.1677, "step": 2772 }, { "epoch": 0.04910854617983149, "grad_norm": 2.6480300426483154, "learning_rate": 2.997128240450416e-05, "loss": 0.1567, "step": 2773 }, { "epoch": 0.04912625571685992, "grad_norm": 2.0256872177124023, "learning_rate": 2.9971229166849107e-05, "loss": 0.2003, "step": 2774 }, { "epoch": 0.04914396525388835, "grad_norm": 1.589294672012329, "learning_rate": 2.997117587994023e-05, "loss": 0.2108, "step": 2775 }, { "epoch": 0.04916167479091678, "grad_norm": 2.0572211742401123, "learning_rate": 2.9971122543777688e-05, "loss": 0.1684, "step": 2776 }, { "epoch": 0.049179384327945205, "grad_norm": 2.216914415359497, "learning_rate": 2.9971069158361673e-05, "loss": 0.1695, "step": 2777 }, { "epoch": 0.04919709386497363, "grad_norm": 1.6482467651367188, "learning_rate": 2.997101572369236e-05, "loss": 0.1581, "step": 2778 }, { "epoch": 0.049214803402002065, "grad_norm": 1.4928991794586182, "learning_rate": 2.9970962239769912e-05, "loss": 0.1497, "step": 2779 }, { "epoch": 0.04923251293903049, "grad_norm": 1.8783477544784546, "learning_rate": 2.9970908706594515e-05, "loss": 0.1857, "step": 2780 }, { "epoch": 0.04925022247605892, "grad_norm": 1.9318821430206299, "learning_rate": 2.997085512416634e-05, "loss": 0.1999, "step": 2781 }, { "epoch": 0.04926793201308735, "grad_norm": 1.4578834772109985, "learning_rate": 2.997080149248557e-05, "loss": 0.1933, "step": 2782 }, { "epoch": 0.04928564155011578, "grad_norm": 1.8888115882873535, "learning_rate": 2.9970747811552373e-05, "loss": 0.1451, "step": 2783 }, { "epoch": 0.049303351087144204, "grad_norm": 1.7565685510635376, "learning_rate": 2.997069408136693e-05, "loss": 0.1798, "step": 2784 }, { "epoch": 0.04932106062417263, "grad_norm": 2.393073558807373, "learning_rate": 2.997064030192942e-05, "loss": 0.2022, "step": 2785 }, { "epoch": 0.04933877016120106, "grad_norm": 1.6474641561508179, "learning_rate": 2.9970586473240017e-05, "loss": 0.1512, "step": 2786 }, { "epoch": 0.04935647969822949, "grad_norm": 1.8833023309707642, "learning_rate": 2.99705325952989e-05, "loss": 0.1638, "step": 2787 }, { "epoch": 0.049374189235257916, "grad_norm": 1.7649409770965576, "learning_rate": 2.9970478668106243e-05, "loss": 0.1749, "step": 2788 }, { "epoch": 0.04939189877228634, "grad_norm": 2.211167812347412, "learning_rate": 2.9970424691662228e-05, "loss": 0.197, "step": 2789 }, { "epoch": 0.049409608309314776, "grad_norm": 1.4764140844345093, "learning_rate": 2.997037066596703e-05, "loss": 0.1348, "step": 2790 }, { "epoch": 0.0494273178463432, "grad_norm": 1.4637154340744019, "learning_rate": 2.9970316591020824e-05, "loss": 0.1408, "step": 2791 }, { "epoch": 0.04944502738337163, "grad_norm": 1.5571955442428589, "learning_rate": 2.997026246682379e-05, "loss": 0.1819, "step": 2792 }, { "epoch": 0.049462736920400055, "grad_norm": 1.9479122161865234, "learning_rate": 2.9970208293376104e-05, "loss": 0.1691, "step": 2793 }, { "epoch": 0.04948044645742849, "grad_norm": 1.3526268005371094, "learning_rate": 2.9970154070677954e-05, "loss": 0.1561, "step": 2794 }, { "epoch": 0.049498155994456915, "grad_norm": 1.6397696733474731, "learning_rate": 2.9970099798729508e-05, "loss": 0.1552, "step": 2795 }, { "epoch": 0.04951586553148534, "grad_norm": 1.7750474214553833, "learning_rate": 2.9970045477530944e-05, "loss": 0.1501, "step": 2796 }, { "epoch": 0.049533575068513774, "grad_norm": 1.7686697244644165, "learning_rate": 2.9969991107082452e-05, "loss": 0.172, "step": 2797 }, { "epoch": 0.0495512846055422, "grad_norm": 1.8542811870574951, "learning_rate": 2.9969936687384202e-05, "loss": 0.2078, "step": 2798 }, { "epoch": 0.04956899414257063, "grad_norm": 1.8316383361816406, "learning_rate": 2.996988221843637e-05, "loss": 0.1956, "step": 2799 }, { "epoch": 0.04958670367959905, "grad_norm": 1.5348871946334839, "learning_rate": 2.9969827700239142e-05, "loss": 0.2172, "step": 2800 }, { "epoch": 0.04960441321662749, "grad_norm": 2.235522508621216, "learning_rate": 2.9969773132792694e-05, "loss": 0.1461, "step": 2801 }, { "epoch": 0.04962212275365591, "grad_norm": 1.8218308687210083, "learning_rate": 2.9969718516097207e-05, "loss": 0.1889, "step": 2802 }, { "epoch": 0.04963983229068434, "grad_norm": 1.8910046815872192, "learning_rate": 2.996966385015286e-05, "loss": 0.1263, "step": 2803 }, { "epoch": 0.049657541827712766, "grad_norm": 1.4347338676452637, "learning_rate": 2.9969609134959833e-05, "loss": 0.1518, "step": 2804 }, { "epoch": 0.0496752513647412, "grad_norm": 2.80582857131958, "learning_rate": 2.996955437051831e-05, "loss": 0.1508, "step": 2805 }, { "epoch": 0.049692960901769626, "grad_norm": 1.9162460565567017, "learning_rate": 2.996949955682846e-05, "loss": 0.1475, "step": 2806 }, { "epoch": 0.04971067043879805, "grad_norm": 2.2789206504821777, "learning_rate": 2.9969444693890476e-05, "loss": 0.1516, "step": 2807 }, { "epoch": 0.049728379975826485, "grad_norm": 1.7560280561447144, "learning_rate": 2.996938978170453e-05, "loss": 0.2053, "step": 2808 }, { "epoch": 0.04974608951285491, "grad_norm": 1.718000888824463, "learning_rate": 2.9969334820270807e-05, "loss": 0.1182, "step": 2809 }, { "epoch": 0.04976379904988334, "grad_norm": 2.2156600952148438, "learning_rate": 2.9969279809589487e-05, "loss": 0.1865, "step": 2810 }, { "epoch": 0.049781508586911764, "grad_norm": 1.4910354614257812, "learning_rate": 2.996922474966075e-05, "loss": 0.1923, "step": 2811 }, { "epoch": 0.0497992181239402, "grad_norm": 2.087214708328247, "learning_rate": 2.9969169640484776e-05, "loss": 0.1795, "step": 2812 }, { "epoch": 0.049816927660968624, "grad_norm": 1.6427122354507446, "learning_rate": 2.9969114482061747e-05, "loss": 0.1838, "step": 2813 }, { "epoch": 0.04983463719799705, "grad_norm": 1.5226006507873535, "learning_rate": 2.9969059274391853e-05, "loss": 0.1321, "step": 2814 }, { "epoch": 0.04985234673502548, "grad_norm": 1.631838083267212, "learning_rate": 2.996900401747526e-05, "loss": 0.1624, "step": 2815 }, { "epoch": 0.04987005627205391, "grad_norm": 2.3306431770324707, "learning_rate": 2.9968948711312162e-05, "loss": 0.1513, "step": 2816 }, { "epoch": 0.049887765809082336, "grad_norm": 1.3120676279067993, "learning_rate": 2.9968893355902734e-05, "loss": 0.1422, "step": 2817 }, { "epoch": 0.04990547534611076, "grad_norm": 1.7524746656417847, "learning_rate": 2.9968837951247163e-05, "loss": 0.188, "step": 2818 }, { "epoch": 0.04992318488313919, "grad_norm": 1.6734025478363037, "learning_rate": 2.996878249734563e-05, "loss": 0.1527, "step": 2819 }, { "epoch": 0.04994089442016762, "grad_norm": 2.635624408721924, "learning_rate": 2.9968726994198313e-05, "loss": 0.2046, "step": 2820 }, { "epoch": 0.04995860395719605, "grad_norm": 2.2619049549102783, "learning_rate": 2.99686714418054e-05, "loss": 0.1901, "step": 2821 }, { "epoch": 0.049976313494224475, "grad_norm": 1.9661785364151, "learning_rate": 2.9968615840167072e-05, "loss": 0.1746, "step": 2822 }, { "epoch": 0.04999402303125291, "grad_norm": 1.9804461002349854, "learning_rate": 2.9968560189283512e-05, "loss": 0.1659, "step": 2823 }, { "epoch": 0.050011732568281335, "grad_norm": 1.195099115371704, "learning_rate": 2.9968504489154904e-05, "loss": 0.1652, "step": 2824 }, { "epoch": 0.05002944210530976, "grad_norm": 1.8693407773971558, "learning_rate": 2.996844873978143e-05, "loss": 0.1649, "step": 2825 }, { "epoch": 0.05004715164233819, "grad_norm": 1.6061052083969116, "learning_rate": 2.9968392941163272e-05, "loss": 0.2432, "step": 2826 }, { "epoch": 0.05006486117936662, "grad_norm": 1.510818600654602, "learning_rate": 2.996833709330062e-05, "loss": 0.19, "step": 2827 }, { "epoch": 0.05008257071639505, "grad_norm": 1.489907145500183, "learning_rate": 2.9968281196193644e-05, "loss": 0.2197, "step": 2828 }, { "epoch": 0.050100280253423474, "grad_norm": 3.1491031646728516, "learning_rate": 2.9968225249842543e-05, "loss": 0.153, "step": 2829 }, { "epoch": 0.0501179897904519, "grad_norm": 3.1116509437561035, "learning_rate": 2.996816925424749e-05, "loss": 0.1933, "step": 2830 }, { "epoch": 0.05013569932748033, "grad_norm": 2.2408576011657715, "learning_rate": 2.996811320940868e-05, "loss": 0.2162, "step": 2831 }, { "epoch": 0.05015340886450876, "grad_norm": 1.9384742975234985, "learning_rate": 2.9968057115326288e-05, "loss": 0.1745, "step": 2832 }, { "epoch": 0.050171118401537186, "grad_norm": 1.5602768659591675, "learning_rate": 2.9968000972000504e-05, "loss": 0.152, "step": 2833 }, { "epoch": 0.05018882793856562, "grad_norm": 1.7345722913742065, "learning_rate": 2.996794477943151e-05, "loss": 0.2149, "step": 2834 }, { "epoch": 0.050206537475594046, "grad_norm": 2.001284122467041, "learning_rate": 2.996788853761949e-05, "loss": 0.1679, "step": 2835 }, { "epoch": 0.05022424701262247, "grad_norm": 1.9462043046951294, "learning_rate": 2.9967832246564635e-05, "loss": 0.1774, "step": 2836 }, { "epoch": 0.0502419565496509, "grad_norm": 2.252777576446533, "learning_rate": 2.996777590626712e-05, "loss": 0.1571, "step": 2837 }, { "epoch": 0.05025966608667933, "grad_norm": 2.042508602142334, "learning_rate": 2.996771951672714e-05, "loss": 0.1122, "step": 2838 }, { "epoch": 0.05027737562370776, "grad_norm": 1.3304685354232788, "learning_rate": 2.9967663077944876e-05, "loss": 0.1517, "step": 2839 }, { "epoch": 0.050295085160736185, "grad_norm": 1.5880573987960815, "learning_rate": 2.9967606589920515e-05, "loss": 0.2001, "step": 2840 }, { "epoch": 0.05031279469776461, "grad_norm": 1.8837469816207886, "learning_rate": 2.9967550052654242e-05, "loss": 0.1607, "step": 2841 }, { "epoch": 0.050330504234793044, "grad_norm": 1.7888336181640625, "learning_rate": 2.9967493466146244e-05, "loss": 0.1278, "step": 2842 }, { "epoch": 0.05034821377182147, "grad_norm": 1.6104336977005005, "learning_rate": 2.99674368303967e-05, "loss": 0.156, "step": 2843 }, { "epoch": 0.0503659233088499, "grad_norm": 1.5688773393630981, "learning_rate": 2.996738014540581e-05, "loss": 0.1476, "step": 2844 }, { "epoch": 0.05038363284587832, "grad_norm": 1.9808000326156616, "learning_rate": 2.996732341117375e-05, "loss": 0.1636, "step": 2845 }, { "epoch": 0.05040134238290676, "grad_norm": 1.6575112342834473, "learning_rate": 2.996726662770071e-05, "loss": 0.1474, "step": 2846 }, { "epoch": 0.05041905191993518, "grad_norm": 2.7694289684295654, "learning_rate": 2.9967209794986877e-05, "loss": 0.1619, "step": 2847 }, { "epoch": 0.05043676145696361, "grad_norm": 1.7342643737792969, "learning_rate": 2.9967152913032442e-05, "loss": 0.1884, "step": 2848 }, { "epoch": 0.05045447099399204, "grad_norm": 1.6160266399383545, "learning_rate": 2.9967095981837583e-05, "loss": 0.2048, "step": 2849 }, { "epoch": 0.05047218053102047, "grad_norm": 1.6854689121246338, "learning_rate": 2.9967039001402493e-05, "loss": 0.1628, "step": 2850 }, { "epoch": 0.050489890068048895, "grad_norm": 2.0358612537384033, "learning_rate": 2.9966981971727355e-05, "loss": 0.1541, "step": 2851 }, { "epoch": 0.05050759960507732, "grad_norm": 1.5928356647491455, "learning_rate": 2.9966924892812362e-05, "loss": 0.1601, "step": 2852 }, { "epoch": 0.050525309142105755, "grad_norm": 1.5490349531173706, "learning_rate": 2.99668677646577e-05, "loss": 0.1726, "step": 2853 }, { "epoch": 0.05054301867913418, "grad_norm": 1.7301132678985596, "learning_rate": 2.996681058726356e-05, "loss": 0.2359, "step": 2854 }, { "epoch": 0.05056072821616261, "grad_norm": 1.734790325164795, "learning_rate": 2.9966753360630122e-05, "loss": 0.159, "step": 2855 }, { "epoch": 0.050578437753191034, "grad_norm": 1.3833256959915161, "learning_rate": 2.9966696084757584e-05, "loss": 0.1673, "step": 2856 }, { "epoch": 0.05059614729021947, "grad_norm": 2.038306713104248, "learning_rate": 2.9966638759646126e-05, "loss": 0.1547, "step": 2857 }, { "epoch": 0.050613856827247894, "grad_norm": 1.7508355379104614, "learning_rate": 2.9966581385295945e-05, "loss": 0.145, "step": 2858 }, { "epoch": 0.05063156636427632, "grad_norm": 1.9003413915634155, "learning_rate": 2.9966523961707224e-05, "loss": 0.1468, "step": 2859 }, { "epoch": 0.050649275901304754, "grad_norm": 1.3837649822235107, "learning_rate": 2.9966466488880148e-05, "loss": 0.1478, "step": 2860 }, { "epoch": 0.05066698543833318, "grad_norm": 2.4761736392974854, "learning_rate": 2.996640896681492e-05, "loss": 0.2154, "step": 2861 }, { "epoch": 0.050684694975361606, "grad_norm": 1.6917959451675415, "learning_rate": 2.9966351395511712e-05, "loss": 0.1891, "step": 2862 }, { "epoch": 0.05070240451239003, "grad_norm": 1.8075060844421387, "learning_rate": 2.996629377497072e-05, "loss": 0.2137, "step": 2863 }, { "epoch": 0.050720114049418466, "grad_norm": 2.1912901401519775, "learning_rate": 2.996623610519214e-05, "loss": 0.1558, "step": 2864 }, { "epoch": 0.05073782358644689, "grad_norm": 1.7271045446395874, "learning_rate": 2.996617838617616e-05, "loss": 0.168, "step": 2865 }, { "epoch": 0.05075553312347532, "grad_norm": 1.604385495185852, "learning_rate": 2.996612061792296e-05, "loss": 0.173, "step": 2866 }, { "epoch": 0.050773242660503745, "grad_norm": 2.1153564453125, "learning_rate": 2.9966062800432742e-05, "loss": 0.1794, "step": 2867 }, { "epoch": 0.05079095219753218, "grad_norm": 1.9806652069091797, "learning_rate": 2.996600493370569e-05, "loss": 0.1757, "step": 2868 }, { "epoch": 0.050808661734560605, "grad_norm": 1.949289083480835, "learning_rate": 2.9965947017741995e-05, "loss": 0.1649, "step": 2869 }, { "epoch": 0.05082637127158903, "grad_norm": 3.3874311447143555, "learning_rate": 2.9965889052541844e-05, "loss": 0.14, "step": 2870 }, { "epoch": 0.05084408080861746, "grad_norm": 3.580360174179077, "learning_rate": 2.9965831038105436e-05, "loss": 0.1281, "step": 2871 }, { "epoch": 0.05086179034564589, "grad_norm": 1.6851513385772705, "learning_rate": 2.9965772974432957e-05, "loss": 0.1773, "step": 2872 }, { "epoch": 0.05087949988267432, "grad_norm": 1.5893830060958862, "learning_rate": 2.99657148615246e-05, "loss": 0.2141, "step": 2873 }, { "epoch": 0.050897209419702744, "grad_norm": 2.068000078201294, "learning_rate": 2.9965656699380552e-05, "loss": 0.1687, "step": 2874 }, { "epoch": 0.05091491895673118, "grad_norm": 1.544479489326477, "learning_rate": 2.9965598488001008e-05, "loss": 0.176, "step": 2875 }, { "epoch": 0.0509326284937596, "grad_norm": 1.8949942588806152, "learning_rate": 2.9965540227386163e-05, "loss": 0.1919, "step": 2876 }, { "epoch": 0.05095033803078803, "grad_norm": 2.7771706581115723, "learning_rate": 2.99654819175362e-05, "loss": 0.2258, "step": 2877 }, { "epoch": 0.050968047567816456, "grad_norm": 1.8289819955825806, "learning_rate": 2.9965423558451315e-05, "loss": 0.1626, "step": 2878 }, { "epoch": 0.05098575710484489, "grad_norm": 2.072307586669922, "learning_rate": 2.9965365150131698e-05, "loss": 0.1654, "step": 2879 }, { "epoch": 0.051003466641873316, "grad_norm": 1.9016218185424805, "learning_rate": 2.996530669257755e-05, "loss": 0.1542, "step": 2880 }, { "epoch": 0.05102117617890174, "grad_norm": 1.6514992713928223, "learning_rate": 2.996524818578905e-05, "loss": 0.172, "step": 2881 }, { "epoch": 0.05103888571593017, "grad_norm": 1.1663488149642944, "learning_rate": 2.99651896297664e-05, "loss": 0.2233, "step": 2882 }, { "epoch": 0.0510565952529586, "grad_norm": 1.6394288539886475, "learning_rate": 2.996513102450979e-05, "loss": 0.1641, "step": 2883 }, { "epoch": 0.05107430478998703, "grad_norm": 1.5757339000701904, "learning_rate": 2.9965072370019417e-05, "loss": 0.1966, "step": 2884 }, { "epoch": 0.051092014327015454, "grad_norm": 1.9013994932174683, "learning_rate": 2.996501366629546e-05, "loss": 0.2003, "step": 2885 }, { "epoch": 0.05110972386404388, "grad_norm": 1.4607352018356323, "learning_rate": 2.9964954913338127e-05, "loss": 0.1569, "step": 2886 }, { "epoch": 0.051127433401072314, "grad_norm": 1.8090730905532837, "learning_rate": 2.9964896111147603e-05, "loss": 0.1807, "step": 2887 }, { "epoch": 0.05114514293810074, "grad_norm": 2.0571448802948, "learning_rate": 2.996483725972409e-05, "loss": 0.1708, "step": 2888 }, { "epoch": 0.05116285247512917, "grad_norm": 1.3089743852615356, "learning_rate": 2.996477835906777e-05, "loss": 0.1421, "step": 2889 }, { "epoch": 0.0511805620121576, "grad_norm": 1.5732332468032837, "learning_rate": 2.9964719409178845e-05, "loss": 0.1685, "step": 2890 }, { "epoch": 0.05119827154918603, "grad_norm": 1.5146938562393188, "learning_rate": 2.996466041005751e-05, "loss": 0.117, "step": 2891 }, { "epoch": 0.05121598108621445, "grad_norm": 1.9527556896209717, "learning_rate": 2.996460136170395e-05, "loss": 0.1753, "step": 2892 }, { "epoch": 0.05123369062324288, "grad_norm": 1.6064260005950928, "learning_rate": 2.9964542264118374e-05, "loss": 0.1377, "step": 2893 }, { "epoch": 0.05125140016027131, "grad_norm": 2.3974485397338867, "learning_rate": 2.996448311730096e-05, "loss": 0.2214, "step": 2894 }, { "epoch": 0.05126910969729974, "grad_norm": 1.7267189025878906, "learning_rate": 2.996442392125191e-05, "loss": 0.1317, "step": 2895 }, { "epoch": 0.051286819234328165, "grad_norm": 1.7815476655960083, "learning_rate": 2.996436467597142e-05, "loss": 0.1922, "step": 2896 }, { "epoch": 0.05130452877135659, "grad_norm": 1.5635255575180054, "learning_rate": 2.9964305381459686e-05, "loss": 0.1476, "step": 2897 }, { "epoch": 0.051322238308385025, "grad_norm": 1.9847973585128784, "learning_rate": 2.99642460377169e-05, "loss": 0.1743, "step": 2898 }, { "epoch": 0.05133994784541345, "grad_norm": 2.0676729679107666, "learning_rate": 2.9964186644743257e-05, "loss": 0.1524, "step": 2899 }, { "epoch": 0.05135765738244188, "grad_norm": 1.4661142826080322, "learning_rate": 2.9964127202538955e-05, "loss": 0.1694, "step": 2900 }, { "epoch": 0.05137536691947031, "grad_norm": 1.5896449089050293, "learning_rate": 2.9964067711104186e-05, "loss": 0.2102, "step": 2901 }, { "epoch": 0.05139307645649874, "grad_norm": 1.8568918704986572, "learning_rate": 2.996400817043915e-05, "loss": 0.1569, "step": 2902 }, { "epoch": 0.051410785993527164, "grad_norm": 1.7801066637039185, "learning_rate": 2.996394858054404e-05, "loss": 0.1361, "step": 2903 }, { "epoch": 0.05142849553055559, "grad_norm": 2.138288974761963, "learning_rate": 2.996388894141905e-05, "loss": 0.1615, "step": 2904 }, { "epoch": 0.051446205067584024, "grad_norm": 2.4081966876983643, "learning_rate": 2.996382925306438e-05, "loss": 0.1513, "step": 2905 }, { "epoch": 0.05146391460461245, "grad_norm": 2.47067928314209, "learning_rate": 2.9963769515480228e-05, "loss": 0.2497, "step": 2906 }, { "epoch": 0.051481624141640876, "grad_norm": 3.1342833042144775, "learning_rate": 2.996370972866678e-05, "loss": 0.2, "step": 2907 }, { "epoch": 0.0514993336786693, "grad_norm": 2.0339667797088623, "learning_rate": 2.996364989262425e-05, "loss": 0.1494, "step": 2908 }, { "epoch": 0.051517043215697736, "grad_norm": 1.863795280456543, "learning_rate": 2.9963590007352824e-05, "loss": 0.1468, "step": 2909 }, { "epoch": 0.05153475275272616, "grad_norm": 2.01179575920105, "learning_rate": 2.9963530072852695e-05, "loss": 0.1913, "step": 2910 }, { "epoch": 0.05155246228975459, "grad_norm": 2.4979193210601807, "learning_rate": 2.996347008912407e-05, "loss": 0.1971, "step": 2911 }, { "epoch": 0.051570171826783015, "grad_norm": 1.605202078819275, "learning_rate": 2.996341005616714e-05, "loss": 0.1647, "step": 2912 }, { "epoch": 0.05158788136381145, "grad_norm": 1.138383388519287, "learning_rate": 2.9963349973982107e-05, "loss": 0.1582, "step": 2913 }, { "epoch": 0.051605590900839875, "grad_norm": 1.686611533164978, "learning_rate": 2.9963289842569163e-05, "loss": 0.1189, "step": 2914 }, { "epoch": 0.0516233004378683, "grad_norm": 1.5630115270614624, "learning_rate": 2.996322966192851e-05, "loss": 0.1703, "step": 2915 }, { "epoch": 0.051641009974896734, "grad_norm": 1.1990487575531006, "learning_rate": 2.9963169432060342e-05, "loss": 0.1472, "step": 2916 }, { "epoch": 0.05165871951192516, "grad_norm": 1.8535441160202026, "learning_rate": 2.9963109152964866e-05, "loss": 0.1825, "step": 2917 }, { "epoch": 0.05167642904895359, "grad_norm": 1.32803475856781, "learning_rate": 2.9963048824642267e-05, "loss": 0.1711, "step": 2918 }, { "epoch": 0.051694138585982014, "grad_norm": 1.6902350187301636, "learning_rate": 2.9962988447092756e-05, "loss": 0.177, "step": 2919 }, { "epoch": 0.05171184812301045, "grad_norm": 1.3032222986221313, "learning_rate": 2.9962928020316523e-05, "loss": 0.1648, "step": 2920 }, { "epoch": 0.05172955766003887, "grad_norm": 1.8911528587341309, "learning_rate": 2.9962867544313777e-05, "loss": 0.1471, "step": 2921 }, { "epoch": 0.0517472671970673, "grad_norm": 1.7563873529434204, "learning_rate": 2.9962807019084702e-05, "loss": 0.192, "step": 2922 }, { "epoch": 0.051764976734095726, "grad_norm": 1.668947696685791, "learning_rate": 2.996274644462951e-05, "loss": 0.1774, "step": 2923 }, { "epoch": 0.05178268627112416, "grad_norm": 2.656409502029419, "learning_rate": 2.9962685820948397e-05, "loss": 0.1486, "step": 2924 }, { "epoch": 0.051800395808152586, "grad_norm": 1.4500575065612793, "learning_rate": 2.9962625148041557e-05, "loss": 0.1872, "step": 2925 }, { "epoch": 0.05181810534518101, "grad_norm": 1.6284257173538208, "learning_rate": 2.9962564425909194e-05, "loss": 0.1758, "step": 2926 }, { "epoch": 0.051835814882209445, "grad_norm": 1.6403672695159912, "learning_rate": 2.9962503654551508e-05, "loss": 0.1797, "step": 2927 }, { "epoch": 0.05185352441923787, "grad_norm": 2.3890879154205322, "learning_rate": 2.99624428339687e-05, "loss": 0.1842, "step": 2928 }, { "epoch": 0.0518712339562663, "grad_norm": 1.7505162954330444, "learning_rate": 2.9962381964160966e-05, "loss": 0.1657, "step": 2929 }, { "epoch": 0.051888943493294724, "grad_norm": 1.745857834815979, "learning_rate": 2.9962321045128513e-05, "loss": 0.1614, "step": 2930 }, { "epoch": 0.05190665303032316, "grad_norm": 1.139630913734436, "learning_rate": 2.996226007687153e-05, "loss": 0.1414, "step": 2931 }, { "epoch": 0.051924362567351584, "grad_norm": 1.3897449970245361, "learning_rate": 2.996219905939023e-05, "loss": 0.1937, "step": 2932 }, { "epoch": 0.05194207210438001, "grad_norm": 2.0802953243255615, "learning_rate": 2.996213799268481e-05, "loss": 0.2042, "step": 2933 }, { "epoch": 0.05195978164140844, "grad_norm": 1.6198638677597046, "learning_rate": 2.9962076876755466e-05, "loss": 0.1426, "step": 2934 }, { "epoch": 0.05197749117843687, "grad_norm": 2.229257822036743, "learning_rate": 2.9962015711602404e-05, "loss": 0.2131, "step": 2935 }, { "epoch": 0.051995200715465296, "grad_norm": 1.6796576976776123, "learning_rate": 2.9961954497225822e-05, "loss": 0.179, "step": 2936 }, { "epoch": 0.05201291025249372, "grad_norm": 1.97540283203125, "learning_rate": 2.9961893233625923e-05, "loss": 0.1592, "step": 2937 }, { "epoch": 0.05203061978952215, "grad_norm": 1.4738292694091797, "learning_rate": 2.996183192080291e-05, "loss": 0.1735, "step": 2938 }, { "epoch": 0.05204832932655058, "grad_norm": 1.8926447629928589, "learning_rate": 2.996177055875698e-05, "loss": 0.1596, "step": 2939 }, { "epoch": 0.05206603886357901, "grad_norm": 1.9840285778045654, "learning_rate": 2.9961709147488344e-05, "loss": 0.1825, "step": 2940 }, { "epoch": 0.052083748400607435, "grad_norm": 2.4733264446258545, "learning_rate": 2.9961647686997195e-05, "loss": 0.1355, "step": 2941 }, { "epoch": 0.05210145793763587, "grad_norm": 2.7828516960144043, "learning_rate": 2.9961586177283738e-05, "loss": 0.2273, "step": 2942 }, { "epoch": 0.052119167474664295, "grad_norm": 2.3253836631774902, "learning_rate": 2.9961524618348177e-05, "loss": 0.1691, "step": 2943 }, { "epoch": 0.05213687701169272, "grad_norm": 2.3589134216308594, "learning_rate": 2.996146301019071e-05, "loss": 0.1837, "step": 2944 }, { "epoch": 0.05215458654872115, "grad_norm": 1.833799958229065, "learning_rate": 2.9961401352811544e-05, "loss": 0.1788, "step": 2945 }, { "epoch": 0.05217229608574958, "grad_norm": 1.8994762897491455, "learning_rate": 2.9961339646210882e-05, "loss": 0.1547, "step": 2946 }, { "epoch": 0.05219000562277801, "grad_norm": 1.9278082847595215, "learning_rate": 2.9961277890388926e-05, "loss": 0.1966, "step": 2947 }, { "epoch": 0.052207715159806434, "grad_norm": 1.3029042482376099, "learning_rate": 2.996121608534588e-05, "loss": 0.1718, "step": 2948 }, { "epoch": 0.05222542469683486, "grad_norm": 1.4559763669967651, "learning_rate": 2.9961154231081943e-05, "loss": 0.2022, "step": 2949 }, { "epoch": 0.05224313423386329, "grad_norm": 1.2289637327194214, "learning_rate": 2.9961092327597326e-05, "loss": 0.1708, "step": 2950 }, { "epoch": 0.05226084377089172, "grad_norm": 2.1572673320770264, "learning_rate": 2.9961030374892227e-05, "loss": 0.1689, "step": 2951 }, { "epoch": 0.052278553307920146, "grad_norm": 1.4718669652938843, "learning_rate": 2.996096837296685e-05, "loss": 0.1883, "step": 2952 }, { "epoch": 0.05229626284494857, "grad_norm": 2.129794120788574, "learning_rate": 2.9960906321821396e-05, "loss": 0.1489, "step": 2953 }, { "epoch": 0.052313972381977006, "grad_norm": 1.9823817014694214, "learning_rate": 2.996084422145608e-05, "loss": 0.2139, "step": 2954 }, { "epoch": 0.05233168191900543, "grad_norm": 1.9934303760528564, "learning_rate": 2.99607820718711e-05, "loss": 0.1574, "step": 2955 }, { "epoch": 0.05234939145603386, "grad_norm": 1.4278243780136108, "learning_rate": 2.996071987306666e-05, "loss": 0.1829, "step": 2956 }, { "epoch": 0.05236710099306229, "grad_norm": 1.6155383586883545, "learning_rate": 2.996065762504296e-05, "loss": 0.13, "step": 2957 }, { "epoch": 0.05238481053009072, "grad_norm": 0.9455142021179199, "learning_rate": 2.9960595327800214e-05, "loss": 0.1096, "step": 2958 }, { "epoch": 0.052402520067119145, "grad_norm": 1.7101646661758423, "learning_rate": 2.9960532981338623e-05, "loss": 0.1383, "step": 2959 }, { "epoch": 0.05242022960414757, "grad_norm": 1.1821749210357666, "learning_rate": 2.9960470585658387e-05, "loss": 0.1722, "step": 2960 }, { "epoch": 0.052437939141176004, "grad_norm": 1.9309344291687012, "learning_rate": 2.9960408140759722e-05, "loss": 0.1754, "step": 2961 }, { "epoch": 0.05245564867820443, "grad_norm": 1.6680498123168945, "learning_rate": 2.996034564664282e-05, "loss": 0.1556, "step": 2962 }, { "epoch": 0.05247335821523286, "grad_norm": 1.337128758430481, "learning_rate": 2.99602831033079e-05, "loss": 0.1544, "step": 2963 }, { "epoch": 0.05249106775226128, "grad_norm": 1.43620765209198, "learning_rate": 2.996022051075516e-05, "loss": 0.2168, "step": 2964 }, { "epoch": 0.05250877728928972, "grad_norm": 2.278960704803467, "learning_rate": 2.9960157868984806e-05, "loss": 0.1814, "step": 2965 }, { "epoch": 0.05252648682631814, "grad_norm": 2.4192609786987305, "learning_rate": 2.996009517799705e-05, "loss": 0.1488, "step": 2966 }, { "epoch": 0.05254419636334657, "grad_norm": 2.5451433658599854, "learning_rate": 2.996003243779209e-05, "loss": 0.1801, "step": 2967 }, { "epoch": 0.052561905900375, "grad_norm": 2.495353937149048, "learning_rate": 2.9959969648370134e-05, "loss": 0.186, "step": 2968 }, { "epoch": 0.05257961543740343, "grad_norm": 1.436368703842163, "learning_rate": 2.9959906809731396e-05, "loss": 0.1595, "step": 2969 }, { "epoch": 0.052597324974431856, "grad_norm": 2.0110886096954346, "learning_rate": 2.995984392187608e-05, "loss": 0.1638, "step": 2970 }, { "epoch": 0.05261503451146028, "grad_norm": 1.5290825366973877, "learning_rate": 2.9959780984804387e-05, "loss": 0.1485, "step": 2971 }, { "epoch": 0.052632744048488715, "grad_norm": 2.0858960151672363, "learning_rate": 2.9959717998516527e-05, "loss": 0.2024, "step": 2972 }, { "epoch": 0.05265045358551714, "grad_norm": 1.7816225290298462, "learning_rate": 2.9959654963012708e-05, "loss": 0.1784, "step": 2973 }, { "epoch": 0.05266816312254557, "grad_norm": 2.245579242706299, "learning_rate": 2.9959591878293134e-05, "loss": 0.205, "step": 2974 }, { "epoch": 0.052685872659573994, "grad_norm": 1.433483600616455, "learning_rate": 2.9959528744358022e-05, "loss": 0.159, "step": 2975 }, { "epoch": 0.05270358219660243, "grad_norm": 2.475973606109619, "learning_rate": 2.9959465561207572e-05, "loss": 0.2306, "step": 2976 }, { "epoch": 0.052721291733630854, "grad_norm": 1.4377803802490234, "learning_rate": 2.995940232884199e-05, "loss": 0.1327, "step": 2977 }, { "epoch": 0.05273900127065928, "grad_norm": 2.1385722160339355, "learning_rate": 2.995933904726149e-05, "loss": 0.1509, "step": 2978 }, { "epoch": 0.05275671080768771, "grad_norm": 1.6361504793167114, "learning_rate": 2.9959275716466276e-05, "loss": 0.1315, "step": 2979 }, { "epoch": 0.05277442034471614, "grad_norm": 1.8842047452926636, "learning_rate": 2.9959212336456563e-05, "loss": 0.189, "step": 2980 }, { "epoch": 0.052792129881744566, "grad_norm": 1.9423691034317017, "learning_rate": 2.995914890723255e-05, "loss": 0.1556, "step": 2981 }, { "epoch": 0.05280983941877299, "grad_norm": 1.823688268661499, "learning_rate": 2.995908542879445e-05, "loss": 0.2073, "step": 2982 }, { "epoch": 0.052827548955801426, "grad_norm": 1.898791790008545, "learning_rate": 2.9959021901142472e-05, "loss": 0.171, "step": 2983 }, { "epoch": 0.05284525849282985, "grad_norm": 2.617786169052124, "learning_rate": 2.9958958324276827e-05, "loss": 0.1806, "step": 2984 }, { "epoch": 0.05286296802985828, "grad_norm": 2.146052360534668, "learning_rate": 2.995889469819772e-05, "loss": 0.2227, "step": 2985 }, { "epoch": 0.052880677566886705, "grad_norm": 1.5650092363357544, "learning_rate": 2.9958831022905363e-05, "loss": 0.212, "step": 2986 }, { "epoch": 0.05289838710391514, "grad_norm": 2.091089963912964, "learning_rate": 2.995876729839996e-05, "loss": 0.224, "step": 2987 }, { "epoch": 0.052916096640943565, "grad_norm": 2.1446311473846436, "learning_rate": 2.9958703524681735e-05, "loss": 0.218, "step": 2988 }, { "epoch": 0.05293380617797199, "grad_norm": 1.8113582134246826, "learning_rate": 2.9958639701750884e-05, "loss": 0.1469, "step": 2989 }, { "epoch": 0.05295151571500042, "grad_norm": 1.897697925567627, "learning_rate": 2.9958575829607623e-05, "loss": 0.1817, "step": 2990 }, { "epoch": 0.05296922525202885, "grad_norm": 1.5419095754623413, "learning_rate": 2.995851190825216e-05, "loss": 0.1815, "step": 2991 }, { "epoch": 0.05298693478905728, "grad_norm": 1.95848548412323, "learning_rate": 2.9958447937684703e-05, "loss": 0.2003, "step": 2992 }, { "epoch": 0.053004644326085704, "grad_norm": 1.7585309743881226, "learning_rate": 2.9958383917905466e-05, "loss": 0.144, "step": 2993 }, { "epoch": 0.05302235386311414, "grad_norm": 2.037454605102539, "learning_rate": 2.995831984891466e-05, "loss": 0.209, "step": 2994 }, { "epoch": 0.05304006340014256, "grad_norm": 2.0942142009735107, "learning_rate": 2.9958255730712495e-05, "loss": 0.1899, "step": 2995 }, { "epoch": 0.05305777293717099, "grad_norm": 1.9584362506866455, "learning_rate": 2.9958191563299183e-05, "loss": 0.1835, "step": 2996 }, { "epoch": 0.053075482474199416, "grad_norm": 1.4896894693374634, "learning_rate": 2.995812734667493e-05, "loss": 0.1322, "step": 2997 }, { "epoch": 0.05309319201122785, "grad_norm": 1.634549856185913, "learning_rate": 2.9958063080839953e-05, "loss": 0.2015, "step": 2998 }, { "epoch": 0.053110901548256276, "grad_norm": 1.5110434293746948, "learning_rate": 2.9957998765794462e-05, "loss": 0.2053, "step": 2999 }, { "epoch": 0.0531286110852847, "grad_norm": 2.000849962234497, "learning_rate": 2.9957934401538665e-05, "loss": 0.1352, "step": 3000 }, { "epoch": 0.05314632062231313, "grad_norm": 1.68721604347229, "learning_rate": 2.995786998807278e-05, "loss": 0.103, "step": 3001 }, { "epoch": 0.05316403015934156, "grad_norm": 1.4681165218353271, "learning_rate": 2.9957805525397014e-05, "loss": 0.1591, "step": 3002 }, { "epoch": 0.05318173969636999, "grad_norm": 1.5526474714279175, "learning_rate": 2.9957741013511583e-05, "loss": 0.1293, "step": 3003 }, { "epoch": 0.053199449233398415, "grad_norm": 1.9216783046722412, "learning_rate": 2.9957676452416693e-05, "loss": 0.1741, "step": 3004 }, { "epoch": 0.05321715877042684, "grad_norm": 2.352524757385254, "learning_rate": 2.9957611842112563e-05, "loss": 0.1931, "step": 3005 }, { "epoch": 0.053234868307455274, "grad_norm": 1.5069565773010254, "learning_rate": 2.9957547182599403e-05, "loss": 0.1455, "step": 3006 }, { "epoch": 0.0532525778444837, "grad_norm": 2.1698062419891357, "learning_rate": 2.9957482473877427e-05, "loss": 0.1316, "step": 3007 }, { "epoch": 0.05327028738151213, "grad_norm": 1.6946600675582886, "learning_rate": 2.9957417715946845e-05, "loss": 0.1695, "step": 3008 }, { "epoch": 0.05328799691854056, "grad_norm": 1.8363921642303467, "learning_rate": 2.9957352908807867e-05, "loss": 0.1579, "step": 3009 }, { "epoch": 0.05330570645556899, "grad_norm": 2.084749937057495, "learning_rate": 2.9957288052460717e-05, "loss": 0.1787, "step": 3010 }, { "epoch": 0.05332341599259741, "grad_norm": 1.8340436220169067, "learning_rate": 2.9957223146905598e-05, "loss": 0.2136, "step": 3011 }, { "epoch": 0.05334112552962584, "grad_norm": 2.1651229858398438, "learning_rate": 2.995715819214273e-05, "loss": 0.171, "step": 3012 }, { "epoch": 0.05335883506665427, "grad_norm": 2.0625720024108887, "learning_rate": 2.9957093188172326e-05, "loss": 0.1864, "step": 3013 }, { "epoch": 0.0533765446036827, "grad_norm": 1.7842941284179688, "learning_rate": 2.9957028134994593e-05, "loss": 0.1513, "step": 3014 }, { "epoch": 0.053394254140711125, "grad_norm": 1.9432533979415894, "learning_rate": 2.9956963032609756e-05, "loss": 0.1979, "step": 3015 }, { "epoch": 0.05341196367773955, "grad_norm": 2.1642231941223145, "learning_rate": 2.9956897881018023e-05, "loss": 0.2288, "step": 3016 }, { "epoch": 0.053429673214767985, "grad_norm": 1.5800055265426636, "learning_rate": 2.9956832680219605e-05, "loss": 0.1578, "step": 3017 }, { "epoch": 0.05344738275179641, "grad_norm": 1.4581599235534668, "learning_rate": 2.9956767430214723e-05, "loss": 0.1657, "step": 3018 }, { "epoch": 0.05346509228882484, "grad_norm": 3.6940300464630127, "learning_rate": 2.9956702131003586e-05, "loss": 0.179, "step": 3019 }, { "epoch": 0.053482801825853264, "grad_norm": 2.415088415145874, "learning_rate": 2.9956636782586414e-05, "loss": 0.193, "step": 3020 }, { "epoch": 0.0535005113628817, "grad_norm": 1.811599612236023, "learning_rate": 2.9956571384963418e-05, "loss": 0.1771, "step": 3021 }, { "epoch": 0.053518220899910124, "grad_norm": 1.808807373046875, "learning_rate": 2.995650593813482e-05, "loss": 0.1772, "step": 3022 }, { "epoch": 0.05353593043693855, "grad_norm": 4.446116924285889, "learning_rate": 2.9956440442100823e-05, "loss": 0.1863, "step": 3023 }, { "epoch": 0.053553639973966984, "grad_norm": 4.222898006439209, "learning_rate": 2.9956374896861655e-05, "loss": 0.2009, "step": 3024 }, { "epoch": 0.05357134951099541, "grad_norm": 1.6549779176712036, "learning_rate": 2.9956309302417523e-05, "loss": 0.1697, "step": 3025 }, { "epoch": 0.053589059048023836, "grad_norm": 1.81899094581604, "learning_rate": 2.9956243658768647e-05, "loss": 0.1583, "step": 3026 }, { "epoch": 0.05360676858505226, "grad_norm": 2.368025302886963, "learning_rate": 2.9956177965915245e-05, "loss": 0.1548, "step": 3027 }, { "epoch": 0.053624478122080696, "grad_norm": 1.9075745344161987, "learning_rate": 2.995611222385753e-05, "loss": 0.1561, "step": 3028 }, { "epoch": 0.05364218765910912, "grad_norm": 1.82602059841156, "learning_rate": 2.9956046432595713e-05, "loss": 0.1456, "step": 3029 }, { "epoch": 0.05365989719613755, "grad_norm": 1.8850635290145874, "learning_rate": 2.995598059213002e-05, "loss": 0.1773, "step": 3030 }, { "epoch": 0.053677606733165975, "grad_norm": 1.4989126920700073, "learning_rate": 2.9955914702460666e-05, "loss": 0.1486, "step": 3031 }, { "epoch": 0.05369531627019441, "grad_norm": 2.514918565750122, "learning_rate": 2.9955848763587862e-05, "loss": 0.1816, "step": 3032 }, { "epoch": 0.053713025807222835, "grad_norm": 2.539216995239258, "learning_rate": 2.995578277551183e-05, "loss": 0.177, "step": 3033 }, { "epoch": 0.05373073534425126, "grad_norm": 1.8843106031417847, "learning_rate": 2.9955716738232784e-05, "loss": 0.1471, "step": 3034 }, { "epoch": 0.053748444881279694, "grad_norm": 1.7327748537063599, "learning_rate": 2.995565065175094e-05, "loss": 0.1449, "step": 3035 }, { "epoch": 0.05376615441830812, "grad_norm": 1.8285456895828247, "learning_rate": 2.995558451606652e-05, "loss": 0.166, "step": 3036 }, { "epoch": 0.05378386395533655, "grad_norm": 2.546926498413086, "learning_rate": 2.9955518331179742e-05, "loss": 0.1747, "step": 3037 }, { "epoch": 0.053801573492364974, "grad_norm": 0.9200562834739685, "learning_rate": 2.995545209709082e-05, "loss": 0.1498, "step": 3038 }, { "epoch": 0.05381928302939341, "grad_norm": 5.128407955169678, "learning_rate": 2.9955385813799972e-05, "loss": 0.1395, "step": 3039 }, { "epoch": 0.05383699256642183, "grad_norm": 1.5271594524383545, "learning_rate": 2.9955319481307417e-05, "loss": 0.1402, "step": 3040 }, { "epoch": 0.05385470210345026, "grad_norm": 42.93827438354492, "learning_rate": 2.9955253099613378e-05, "loss": 0.1872, "step": 3041 }, { "epoch": 0.053872411640478686, "grad_norm": 1.8349692821502686, "learning_rate": 2.995518666871806e-05, "loss": 0.1696, "step": 3042 }, { "epoch": 0.05389012117750712, "grad_norm": 3.184649705886841, "learning_rate": 2.99551201886217e-05, "loss": 0.1873, "step": 3043 }, { "epoch": 0.053907830714535546, "grad_norm": 2.0990164279937744, "learning_rate": 2.9955053659324502e-05, "loss": 0.1838, "step": 3044 }, { "epoch": 0.05392554025156397, "grad_norm": 1.7578805685043335, "learning_rate": 2.9954987080826694e-05, "loss": 0.1812, "step": 3045 }, { "epoch": 0.0539432497885924, "grad_norm": 1.6836134195327759, "learning_rate": 2.995492045312849e-05, "loss": 0.2078, "step": 3046 }, { "epoch": 0.05396095932562083, "grad_norm": 1.9351081848144531, "learning_rate": 2.9954853776230105e-05, "loss": 0.1557, "step": 3047 }, { "epoch": 0.05397866886264926, "grad_norm": 2.901787757873535, "learning_rate": 2.9954787050131768e-05, "loss": 0.1398, "step": 3048 }, { "epoch": 0.053996378399677684, "grad_norm": 2.134690046310425, "learning_rate": 2.9954720274833695e-05, "loss": 0.1319, "step": 3049 }, { "epoch": 0.05401408793670612, "grad_norm": 1.8633005619049072, "learning_rate": 2.9954653450336103e-05, "loss": 0.1861, "step": 3050 }, { "epoch": 0.054031797473734544, "grad_norm": 2.0381734371185303, "learning_rate": 2.9954586576639215e-05, "loss": 0.2045, "step": 3051 }, { "epoch": 0.05404950701076297, "grad_norm": 2.6277241706848145, "learning_rate": 2.995451965374325e-05, "loss": 0.1893, "step": 3052 }, { "epoch": 0.0540672165477914, "grad_norm": 1.7505089044570923, "learning_rate": 2.9954452681648424e-05, "loss": 0.148, "step": 3053 }, { "epoch": 0.05408492608481983, "grad_norm": 6.671162128448486, "learning_rate": 2.9954385660354964e-05, "loss": 0.1861, "step": 3054 }, { "epoch": 0.05410263562184826, "grad_norm": 1.8185328245162964, "learning_rate": 2.995431858986309e-05, "loss": 0.2173, "step": 3055 }, { "epoch": 0.05412034515887668, "grad_norm": 1.868167519569397, "learning_rate": 2.995425147017302e-05, "loss": 0.1777, "step": 3056 }, { "epoch": 0.05413805469590511, "grad_norm": 1.3897343873977661, "learning_rate": 2.9954184301284972e-05, "loss": 0.1244, "step": 3057 }, { "epoch": 0.05415576423293354, "grad_norm": 1.6543318033218384, "learning_rate": 2.9954117083199172e-05, "loss": 0.163, "step": 3058 }, { "epoch": 0.05417347376996197, "grad_norm": 1.5785808563232422, "learning_rate": 2.995404981591584e-05, "loss": 0.1637, "step": 3059 }, { "epoch": 0.054191183306990395, "grad_norm": 1.18126380443573, "learning_rate": 2.9953982499435195e-05, "loss": 0.1301, "step": 3060 }, { "epoch": 0.05420889284401883, "grad_norm": 1.3287314176559448, "learning_rate": 2.995391513375746e-05, "loss": 0.2138, "step": 3061 }, { "epoch": 0.054226602381047255, "grad_norm": 1.9588837623596191, "learning_rate": 2.9953847718882856e-05, "loss": 0.1503, "step": 3062 }, { "epoch": 0.05424431191807568, "grad_norm": 2.4544677734375, "learning_rate": 2.9953780254811607e-05, "loss": 0.2236, "step": 3063 }, { "epoch": 0.05426202145510411, "grad_norm": 2.453047275543213, "learning_rate": 2.9953712741543935e-05, "loss": 0.1629, "step": 3064 }, { "epoch": 0.05427973099213254, "grad_norm": 2.1427130699157715, "learning_rate": 2.9953645179080062e-05, "loss": 0.1773, "step": 3065 }, { "epoch": 0.05429744052916097, "grad_norm": 1.1604602336883545, "learning_rate": 2.9953577567420203e-05, "loss": 0.129, "step": 3066 }, { "epoch": 0.054315150066189394, "grad_norm": 1.5062071084976196, "learning_rate": 2.995350990656459e-05, "loss": 0.1517, "step": 3067 }, { "epoch": 0.05433285960321782, "grad_norm": 1.930431604385376, "learning_rate": 2.9953442196513438e-05, "loss": 0.1727, "step": 3068 }, { "epoch": 0.05435056914024625, "grad_norm": 1.6454713344573975, "learning_rate": 2.995337443726698e-05, "loss": 0.1446, "step": 3069 }, { "epoch": 0.05436827867727468, "grad_norm": 1.319266438484192, "learning_rate": 2.9953306628825425e-05, "loss": 0.143, "step": 3070 }, { "epoch": 0.054385988214303106, "grad_norm": 2.1794168949127197, "learning_rate": 2.9953238771189008e-05, "loss": 0.1669, "step": 3071 }, { "epoch": 0.05440369775133153, "grad_norm": 2.034370183944702, "learning_rate": 2.9953170864357947e-05, "loss": 0.176, "step": 3072 }, { "epoch": 0.054421407288359966, "grad_norm": 2.2017576694488525, "learning_rate": 2.995310290833247e-05, "loss": 0.1382, "step": 3073 }, { "epoch": 0.05443911682538839, "grad_norm": 2.6826624870300293, "learning_rate": 2.9953034903112792e-05, "loss": 0.1881, "step": 3074 }, { "epoch": 0.05445682636241682, "grad_norm": 1.9335163831710815, "learning_rate": 2.9952966848699142e-05, "loss": 0.1662, "step": 3075 }, { "epoch": 0.05447453589944525, "grad_norm": 2.0956389904022217, "learning_rate": 2.9952898745091746e-05, "loss": 0.1805, "step": 3076 }, { "epoch": 0.05449224543647368, "grad_norm": 2.0328025817871094, "learning_rate": 2.9952830592290825e-05, "loss": 0.17, "step": 3077 }, { "epoch": 0.054509954973502105, "grad_norm": 1.5285793542861938, "learning_rate": 2.99527623902966e-05, "loss": 0.1861, "step": 3078 }, { "epoch": 0.05452766451053053, "grad_norm": 1.7393057346343994, "learning_rate": 2.9952694139109304e-05, "loss": 0.1766, "step": 3079 }, { "epoch": 0.054545374047558964, "grad_norm": 2.9488160610198975, "learning_rate": 2.995262583872915e-05, "loss": 0.1646, "step": 3080 }, { "epoch": 0.05456308358458739, "grad_norm": 1.5609049797058105, "learning_rate": 2.9952557489156378e-05, "loss": 0.1731, "step": 3081 }, { "epoch": 0.05458079312161582, "grad_norm": 1.239553451538086, "learning_rate": 2.99524890903912e-05, "loss": 0.1559, "step": 3082 }, { "epoch": 0.05459850265864424, "grad_norm": 1.4561052322387695, "learning_rate": 2.995242064243385e-05, "loss": 0.1529, "step": 3083 }, { "epoch": 0.05461621219567268, "grad_norm": 1.5994904041290283, "learning_rate": 2.995235214528454e-05, "loss": 0.1703, "step": 3084 }, { "epoch": 0.0546339217327011, "grad_norm": 1.4082419872283936, "learning_rate": 2.9952283598943507e-05, "loss": 0.1991, "step": 3085 }, { "epoch": 0.05465163126972953, "grad_norm": 1.6316627264022827, "learning_rate": 2.9952215003410973e-05, "loss": 0.1276, "step": 3086 }, { "epoch": 0.054669340806757956, "grad_norm": 1.7027376890182495, "learning_rate": 2.995214635868717e-05, "loss": 0.1777, "step": 3087 }, { "epoch": 0.05468705034378639, "grad_norm": 2.327688694000244, "learning_rate": 2.9952077664772312e-05, "loss": 0.1657, "step": 3088 }, { "epoch": 0.054704759880814816, "grad_norm": 1.6704884767532349, "learning_rate": 2.9952008921666632e-05, "loss": 0.1609, "step": 3089 }, { "epoch": 0.05472246941784324, "grad_norm": 2.001314878463745, "learning_rate": 2.995194012937035e-05, "loss": 0.1114, "step": 3090 }, { "epoch": 0.054740178954871675, "grad_norm": 1.6718106269836426, "learning_rate": 2.9951871287883704e-05, "loss": 0.209, "step": 3091 }, { "epoch": 0.0547578884919001, "grad_norm": 2.413115978240967, "learning_rate": 2.995180239720691e-05, "loss": 0.1887, "step": 3092 }, { "epoch": 0.05477559802892853, "grad_norm": 1.2634514570236206, "learning_rate": 2.9951733457340204e-05, "loss": 0.1459, "step": 3093 }, { "epoch": 0.054793307565956954, "grad_norm": 2.6839218139648438, "learning_rate": 2.9951664468283803e-05, "loss": 0.1814, "step": 3094 }, { "epoch": 0.05481101710298539, "grad_norm": 6.511263370513916, "learning_rate": 2.9951595430037935e-05, "loss": 0.1624, "step": 3095 }, { "epoch": 0.054828726640013814, "grad_norm": 1.8713525533676147, "learning_rate": 2.9951526342602835e-05, "loss": 0.1682, "step": 3096 }, { "epoch": 0.05484643617704224, "grad_norm": 1.6204214096069336, "learning_rate": 2.9951457205978723e-05, "loss": 0.1923, "step": 3097 }, { "epoch": 0.05486414571407067, "grad_norm": 2.952298879623413, "learning_rate": 2.995138802016583e-05, "loss": 0.1597, "step": 3098 }, { "epoch": 0.0548818552510991, "grad_norm": 4.0093674659729, "learning_rate": 2.9951318785164385e-05, "loss": 0.17, "step": 3099 }, { "epoch": 0.054899564788127526, "grad_norm": 1.5635864734649658, "learning_rate": 2.9951249500974614e-05, "loss": 0.1555, "step": 3100 }, { "epoch": 0.05491727432515595, "grad_norm": 1.6325414180755615, "learning_rate": 2.9951180167596742e-05, "loss": 0.1457, "step": 3101 }, { "epoch": 0.054934983862184386, "grad_norm": 1.8557521104812622, "learning_rate": 2.9951110785030995e-05, "loss": 0.1997, "step": 3102 }, { "epoch": 0.05495269339921281, "grad_norm": 1.87331223487854, "learning_rate": 2.9951041353277612e-05, "loss": 0.2113, "step": 3103 }, { "epoch": 0.05497040293624124, "grad_norm": 2.136538505554199, "learning_rate": 2.9950971872336816e-05, "loss": 0.2136, "step": 3104 }, { "epoch": 0.054988112473269665, "grad_norm": 1.7809871435165405, "learning_rate": 2.995090234220883e-05, "loss": 0.1545, "step": 3105 }, { "epoch": 0.0550058220102981, "grad_norm": 2.2632813453674316, "learning_rate": 2.995083276289389e-05, "loss": 0.1497, "step": 3106 }, { "epoch": 0.055023531547326525, "grad_norm": 2.266582489013672, "learning_rate": 2.995076313439222e-05, "loss": 0.2073, "step": 3107 }, { "epoch": 0.05504124108435495, "grad_norm": 1.8336381912231445, "learning_rate": 2.9950693456704053e-05, "loss": 0.1875, "step": 3108 }, { "epoch": 0.05505895062138338, "grad_norm": 2.7723445892333984, "learning_rate": 2.995062372982962e-05, "loss": 0.1831, "step": 3109 }, { "epoch": 0.05507666015841181, "grad_norm": 1.3992512226104736, "learning_rate": 2.9950553953769143e-05, "loss": 0.1402, "step": 3110 }, { "epoch": 0.05509436969544024, "grad_norm": 2.1364223957061768, "learning_rate": 2.9950484128522854e-05, "loss": 0.1595, "step": 3111 }, { "epoch": 0.055112079232468664, "grad_norm": 1.85945463180542, "learning_rate": 2.995041425409099e-05, "loss": 0.1708, "step": 3112 }, { "epoch": 0.05512978876949709, "grad_norm": 1.5114651918411255, "learning_rate": 2.995034433047377e-05, "loss": 0.1621, "step": 3113 }, { "epoch": 0.05514749830652552, "grad_norm": 1.891526699066162, "learning_rate": 2.995027435767143e-05, "loss": 0.1798, "step": 3114 }, { "epoch": 0.05516520784355395, "grad_norm": 1.6485899686813354, "learning_rate": 2.9950204335684203e-05, "loss": 0.1885, "step": 3115 }, { "epoch": 0.055182917380582376, "grad_norm": 1.6961718797683716, "learning_rate": 2.9950134264512316e-05, "loss": 0.1416, "step": 3116 }, { "epoch": 0.05520062691761081, "grad_norm": 1.4221293926239014, "learning_rate": 2.9950064144155993e-05, "loss": 0.1772, "step": 3117 }, { "epoch": 0.055218336454639236, "grad_norm": 2.169005870819092, "learning_rate": 2.9949993974615476e-05, "loss": 0.1905, "step": 3118 }, { "epoch": 0.05523604599166766, "grad_norm": 1.529056429862976, "learning_rate": 2.9949923755890995e-05, "loss": 0.1188, "step": 3119 }, { "epoch": 0.05525375552869609, "grad_norm": 2.283264636993408, "learning_rate": 2.9949853487982766e-05, "loss": 0.1711, "step": 3120 }, { "epoch": 0.05527146506572452, "grad_norm": 2.8148152828216553, "learning_rate": 2.9949783170891038e-05, "loss": 0.2633, "step": 3121 }, { "epoch": 0.05528917460275295, "grad_norm": 3.2431578636169434, "learning_rate": 2.994971280461604e-05, "loss": 0.2333, "step": 3122 }, { "epoch": 0.055306884139781375, "grad_norm": 1.1384646892547607, "learning_rate": 2.9949642389157992e-05, "loss": 0.1194, "step": 3123 }, { "epoch": 0.0553245936768098, "grad_norm": 1.976019263267517, "learning_rate": 2.9949571924517132e-05, "loss": 0.2204, "step": 3124 }, { "epoch": 0.055342303213838234, "grad_norm": 1.9763120412826538, "learning_rate": 2.9949501410693694e-05, "loss": 0.1817, "step": 3125 }, { "epoch": 0.05536001275086666, "grad_norm": 1.484298825263977, "learning_rate": 2.994943084768791e-05, "loss": 0.1842, "step": 3126 }, { "epoch": 0.05537772228789509, "grad_norm": 1.233750581741333, "learning_rate": 2.994936023550001e-05, "loss": 0.2176, "step": 3127 }, { "epoch": 0.05539543182492352, "grad_norm": 2.108530282974243, "learning_rate": 2.994928957413023e-05, "loss": 0.202, "step": 3128 }, { "epoch": 0.05541314136195195, "grad_norm": 1.5295392274856567, "learning_rate": 2.9949218863578794e-05, "loss": 0.1574, "step": 3129 }, { "epoch": 0.05543085089898037, "grad_norm": 2.0625672340393066, "learning_rate": 2.994914810384594e-05, "loss": 0.1377, "step": 3130 }, { "epoch": 0.0554485604360088, "grad_norm": 1.7021028995513916, "learning_rate": 2.99490772949319e-05, "loss": 0.1861, "step": 3131 }, { "epoch": 0.05546626997303723, "grad_norm": 1.2971174716949463, "learning_rate": 2.9949006436836914e-05, "loss": 0.1706, "step": 3132 }, { "epoch": 0.05548397951006566, "grad_norm": 1.4187394380569458, "learning_rate": 2.9948935529561207e-05, "loss": 0.1157, "step": 3133 }, { "epoch": 0.055501689047094085, "grad_norm": 1.6458680629730225, "learning_rate": 2.9948864573105008e-05, "loss": 0.1758, "step": 3134 }, { "epoch": 0.05551939858412251, "grad_norm": 1.307558298110962, "learning_rate": 2.9948793567468565e-05, "loss": 0.1774, "step": 3135 }, { "epoch": 0.055537108121150945, "grad_norm": 1.5251208543777466, "learning_rate": 2.9948722512652096e-05, "loss": 0.1555, "step": 3136 }, { "epoch": 0.05555481765817937, "grad_norm": 1.6897035837173462, "learning_rate": 2.9948651408655846e-05, "loss": 0.1611, "step": 3137 }, { "epoch": 0.0555725271952078, "grad_norm": 2.1324944496154785, "learning_rate": 2.9948580255480043e-05, "loss": 0.1908, "step": 3138 }, { "epoch": 0.055590236732236224, "grad_norm": 2.5251166820526123, "learning_rate": 2.9948509053124925e-05, "loss": 0.1913, "step": 3139 }, { "epoch": 0.05560794626926466, "grad_norm": 1.4810336828231812, "learning_rate": 2.994843780159072e-05, "loss": 0.1555, "step": 3140 }, { "epoch": 0.055625655806293084, "grad_norm": 3.4840035438537598, "learning_rate": 2.9948366500877673e-05, "loss": 0.1245, "step": 3141 }, { "epoch": 0.05564336534332151, "grad_norm": 1.6676936149597168, "learning_rate": 2.9948295150986004e-05, "loss": 0.1899, "step": 3142 }, { "epoch": 0.055661074880349944, "grad_norm": 1.8174461126327515, "learning_rate": 2.994822375191596e-05, "loss": 0.2063, "step": 3143 }, { "epoch": 0.05567878441737837, "grad_norm": 2.3635356426239014, "learning_rate": 2.9948152303667777e-05, "loss": 0.212, "step": 3144 }, { "epoch": 0.055696493954406796, "grad_norm": 1.3562159538269043, "learning_rate": 2.9948080806241678e-05, "loss": 0.1347, "step": 3145 }, { "epoch": 0.05571420349143522, "grad_norm": 2.0154120922088623, "learning_rate": 2.994800925963791e-05, "loss": 0.1851, "step": 3146 }, { "epoch": 0.055731913028463656, "grad_norm": 1.3292773962020874, "learning_rate": 2.99479376638567e-05, "loss": 0.1654, "step": 3147 }, { "epoch": 0.05574962256549208, "grad_norm": 1.9835261106491089, "learning_rate": 2.9947866018898286e-05, "loss": 0.2051, "step": 3148 }, { "epoch": 0.05576733210252051, "grad_norm": 1.8613721132278442, "learning_rate": 2.9947794324762906e-05, "loss": 0.1741, "step": 3149 }, { "epoch": 0.055785041639548935, "grad_norm": 1.2977619171142578, "learning_rate": 2.9947722581450793e-05, "loss": 0.1503, "step": 3150 }, { "epoch": 0.05580275117657737, "grad_norm": 2.050063133239746, "learning_rate": 2.9947650788962186e-05, "loss": 0.1966, "step": 3151 }, { "epoch": 0.055820460713605795, "grad_norm": 1.689945101737976, "learning_rate": 2.9947578947297322e-05, "loss": 0.1275, "step": 3152 }, { "epoch": 0.05583817025063422, "grad_norm": 1.890464186668396, "learning_rate": 2.9947507056456433e-05, "loss": 0.1625, "step": 3153 }, { "epoch": 0.055855879787662654, "grad_norm": 1.2636265754699707, "learning_rate": 2.994743511643976e-05, "loss": 0.1214, "step": 3154 }, { "epoch": 0.05587358932469108, "grad_norm": 1.3456422090530396, "learning_rate": 2.994736312724753e-05, "loss": 0.125, "step": 3155 }, { "epoch": 0.05589129886171951, "grad_norm": 3.3441994190216064, "learning_rate": 2.9947291088879993e-05, "loss": 0.1444, "step": 3156 }, { "epoch": 0.055909008398747934, "grad_norm": 1.5821634531021118, "learning_rate": 2.9947219001337383e-05, "loss": 0.1869, "step": 3157 }, { "epoch": 0.05592671793577637, "grad_norm": 1.7667428255081177, "learning_rate": 2.994714686461993e-05, "loss": 0.1703, "step": 3158 }, { "epoch": 0.05594442747280479, "grad_norm": 1.2793257236480713, "learning_rate": 2.9947074678727876e-05, "loss": 0.129, "step": 3159 }, { "epoch": 0.05596213700983322, "grad_norm": 1.8876688480377197, "learning_rate": 2.994700244366146e-05, "loss": 0.1736, "step": 3160 }, { "epoch": 0.055979846546861646, "grad_norm": 1.7280843257904053, "learning_rate": 2.9946930159420913e-05, "loss": 0.1464, "step": 3161 }, { "epoch": 0.05599755608389008, "grad_norm": 1.354785442352295, "learning_rate": 2.994685782600648e-05, "loss": 0.1458, "step": 3162 }, { "epoch": 0.056015265620918506, "grad_norm": 1.6406793594360352, "learning_rate": 2.99467854434184e-05, "loss": 0.1821, "step": 3163 }, { "epoch": 0.05603297515794693, "grad_norm": 2.0268871784210205, "learning_rate": 2.9946713011656903e-05, "loss": 0.1817, "step": 3164 }, { "epoch": 0.05605068469497536, "grad_norm": 1.4501962661743164, "learning_rate": 2.9946640530722237e-05, "loss": 0.127, "step": 3165 }, { "epoch": 0.05606839423200379, "grad_norm": 1.8663156032562256, "learning_rate": 2.994656800061463e-05, "loss": 0.1753, "step": 3166 }, { "epoch": 0.05608610376903222, "grad_norm": 2.5708847045898438, "learning_rate": 2.994649542133433e-05, "loss": 0.1847, "step": 3167 }, { "epoch": 0.056103813306060644, "grad_norm": 3.0818512439727783, "learning_rate": 2.994642279288157e-05, "loss": 0.1327, "step": 3168 }, { "epoch": 0.05612152284308908, "grad_norm": 1.8340792655944824, "learning_rate": 2.994635011525659e-05, "loss": 0.1973, "step": 3169 }, { "epoch": 0.056139232380117504, "grad_norm": 2.0386667251586914, "learning_rate": 2.994627738845963e-05, "loss": 0.1704, "step": 3170 }, { "epoch": 0.05615694191714593, "grad_norm": 1.2451614141464233, "learning_rate": 2.994620461249093e-05, "loss": 0.1663, "step": 3171 }, { "epoch": 0.05617465145417436, "grad_norm": 1.9769651889801025, "learning_rate": 2.994613178735073e-05, "loss": 0.1555, "step": 3172 }, { "epoch": 0.05619236099120279, "grad_norm": 2.3720340728759766, "learning_rate": 2.9946058913039267e-05, "loss": 0.1673, "step": 3173 }, { "epoch": 0.05621007052823122, "grad_norm": 3.017087936401367, "learning_rate": 2.994598598955678e-05, "loss": 0.1433, "step": 3174 }, { "epoch": 0.05622778006525964, "grad_norm": 1.0471782684326172, "learning_rate": 2.9945913016903514e-05, "loss": 0.1387, "step": 3175 }, { "epoch": 0.05624548960228807, "grad_norm": 1.667553186416626, "learning_rate": 2.9945839995079704e-05, "loss": 0.1668, "step": 3176 }, { "epoch": 0.0562631991393165, "grad_norm": 1.3927613496780396, "learning_rate": 2.994576692408559e-05, "loss": 0.2035, "step": 3177 }, { "epoch": 0.05628090867634493, "grad_norm": 1.780455470085144, "learning_rate": 2.9945693803921418e-05, "loss": 0.1511, "step": 3178 }, { "epoch": 0.056298618213373355, "grad_norm": 2.2161595821380615, "learning_rate": 2.9945620634587423e-05, "loss": 0.1956, "step": 3179 }, { "epoch": 0.05631632775040178, "grad_norm": 1.5970312356948853, "learning_rate": 2.994554741608385e-05, "loss": 0.1371, "step": 3180 }, { "epoch": 0.056334037287430215, "grad_norm": 2.09515380859375, "learning_rate": 2.9945474148410935e-05, "loss": 0.1561, "step": 3181 }, { "epoch": 0.05635174682445864, "grad_norm": 2.089996576309204, "learning_rate": 2.9945400831568924e-05, "loss": 0.2143, "step": 3182 }, { "epoch": 0.05636945636148707, "grad_norm": 1.1493223905563354, "learning_rate": 2.9945327465558053e-05, "loss": 0.1181, "step": 3183 }, { "epoch": 0.0563871658985155, "grad_norm": 1.2810077667236328, "learning_rate": 2.994525405037857e-05, "loss": 0.1735, "step": 3184 }, { "epoch": 0.05640487543554393, "grad_norm": 2.0135490894317627, "learning_rate": 2.994518058603071e-05, "loss": 0.1909, "step": 3185 }, { "epoch": 0.056422584972572354, "grad_norm": 1.305864691734314, "learning_rate": 2.994510707251472e-05, "loss": 0.1408, "step": 3186 }, { "epoch": 0.05644029450960078, "grad_norm": 1.4225398302078247, "learning_rate": 2.9945033509830838e-05, "loss": 0.1666, "step": 3187 }, { "epoch": 0.056458004046629214, "grad_norm": 1.3988471031188965, "learning_rate": 2.9944959897979302e-05, "loss": 0.1706, "step": 3188 }, { "epoch": 0.05647571358365764, "grad_norm": 1.2285047769546509, "learning_rate": 2.9944886236960367e-05, "loss": 0.158, "step": 3189 }, { "epoch": 0.056493423120686066, "grad_norm": 2.142256021499634, "learning_rate": 2.9944812526774267e-05, "loss": 0.1856, "step": 3190 }, { "epoch": 0.05651113265771449, "grad_norm": 1.8457266092300415, "learning_rate": 2.9944738767421244e-05, "loss": 0.1588, "step": 3191 }, { "epoch": 0.056528842194742926, "grad_norm": 1.6198481321334839, "learning_rate": 2.994466495890154e-05, "loss": 0.1802, "step": 3192 }, { "epoch": 0.05654655173177135, "grad_norm": 1.3854146003723145, "learning_rate": 2.9944591101215402e-05, "loss": 0.1714, "step": 3193 }, { "epoch": 0.05656426126879978, "grad_norm": 1.785885214805603, "learning_rate": 2.994451719436307e-05, "loss": 0.2019, "step": 3194 }, { "epoch": 0.05658197080582821, "grad_norm": 2.1762447357177734, "learning_rate": 2.9944443238344787e-05, "loss": 0.1687, "step": 3195 }, { "epoch": 0.05659968034285664, "grad_norm": 1.7175897359848022, "learning_rate": 2.99443692331608e-05, "loss": 0.2409, "step": 3196 }, { "epoch": 0.056617389879885065, "grad_norm": 1.2666029930114746, "learning_rate": 2.9944295178811346e-05, "loss": 0.139, "step": 3197 }, { "epoch": 0.05663509941691349, "grad_norm": 1.2802774906158447, "learning_rate": 2.9944221075296677e-05, "loss": 0.1705, "step": 3198 }, { "epoch": 0.056652808953941924, "grad_norm": 1.6187005043029785, "learning_rate": 2.9944146922617034e-05, "loss": 0.1708, "step": 3199 }, { "epoch": 0.05667051849097035, "grad_norm": 1.256197214126587, "learning_rate": 2.994407272077265e-05, "loss": 0.1658, "step": 3200 }, { "epoch": 0.05668822802799878, "grad_norm": 1.1434366703033447, "learning_rate": 2.9943998469763786e-05, "loss": 0.1505, "step": 3201 }, { "epoch": 0.056705937565027204, "grad_norm": 1.6414072513580322, "learning_rate": 2.9943924169590676e-05, "loss": 0.1178, "step": 3202 }, { "epoch": 0.05672364710205564, "grad_norm": 1.087730050086975, "learning_rate": 2.9943849820253567e-05, "loss": 0.1703, "step": 3203 }, { "epoch": 0.05674135663908406, "grad_norm": 1.8436156511306763, "learning_rate": 2.99437754217527e-05, "loss": 0.1861, "step": 3204 }, { "epoch": 0.05675906617611249, "grad_norm": 1.0570789575576782, "learning_rate": 2.994370097408833e-05, "loss": 0.1464, "step": 3205 }, { "epoch": 0.056776775713140916, "grad_norm": 1.7464516162872314, "learning_rate": 2.9943626477260694e-05, "loss": 0.1882, "step": 3206 }, { "epoch": 0.05679448525016935, "grad_norm": 1.6296048164367676, "learning_rate": 2.9943551931270033e-05, "loss": 0.1571, "step": 3207 }, { "epoch": 0.056812194787197776, "grad_norm": 1.785431146621704, "learning_rate": 2.9943477336116605e-05, "loss": 0.1574, "step": 3208 }, { "epoch": 0.0568299043242262, "grad_norm": 2.2722485065460205, "learning_rate": 2.9943402691800642e-05, "loss": 0.1403, "step": 3209 }, { "epoch": 0.056847613861254635, "grad_norm": 1.7146251201629639, "learning_rate": 2.9943327998322395e-05, "loss": 0.1711, "step": 3210 }, { "epoch": 0.05686532339828306, "grad_norm": 2.271815538406372, "learning_rate": 2.9943253255682116e-05, "loss": 0.1788, "step": 3211 }, { "epoch": 0.05688303293531149, "grad_norm": 0.8955349922180176, "learning_rate": 2.994317846388004e-05, "loss": 0.1335, "step": 3212 }, { "epoch": 0.056900742472339914, "grad_norm": 1.4716415405273438, "learning_rate": 2.994310362291642e-05, "loss": 0.1378, "step": 3213 }, { "epoch": 0.05691845200936835, "grad_norm": 1.4347509145736694, "learning_rate": 2.9943028732791504e-05, "loss": 0.1399, "step": 3214 }, { "epoch": 0.056936161546396774, "grad_norm": 2.4660372734069824, "learning_rate": 2.994295379350553e-05, "loss": 0.2049, "step": 3215 }, { "epoch": 0.0569538710834252, "grad_norm": 1.561966061592102, "learning_rate": 2.9942878805058752e-05, "loss": 0.1476, "step": 3216 }, { "epoch": 0.05697158062045363, "grad_norm": 1.701991319656372, "learning_rate": 2.9942803767451412e-05, "loss": 0.1534, "step": 3217 }, { "epoch": 0.05698929015748206, "grad_norm": 1.9492794275283813, "learning_rate": 2.9942728680683764e-05, "loss": 0.204, "step": 3218 }, { "epoch": 0.057006999694510486, "grad_norm": 2.294523239135742, "learning_rate": 2.9942653544756045e-05, "loss": 0.1999, "step": 3219 }, { "epoch": 0.05702470923153891, "grad_norm": 1.3465728759765625, "learning_rate": 2.994257835966851e-05, "loss": 0.1475, "step": 3220 }, { "epoch": 0.057042418768567346, "grad_norm": 2.8797881603240967, "learning_rate": 2.99425031254214e-05, "loss": 0.1725, "step": 3221 }, { "epoch": 0.05706012830559577, "grad_norm": 1.2663339376449585, "learning_rate": 2.994242784201497e-05, "loss": 0.1464, "step": 3222 }, { "epoch": 0.0570778378426242, "grad_norm": 1.503638744354248, "learning_rate": 2.994235250944946e-05, "loss": 0.1565, "step": 3223 }, { "epoch": 0.057095547379652625, "grad_norm": 2.2808477878570557, "learning_rate": 2.994227712772512e-05, "loss": 0.2225, "step": 3224 }, { "epoch": 0.05711325691668106, "grad_norm": 1.3539959192276, "learning_rate": 2.9942201696842207e-05, "loss": 0.1298, "step": 3225 }, { "epoch": 0.057130966453709485, "grad_norm": 1.559299111366272, "learning_rate": 2.9942126216800956e-05, "loss": 0.1812, "step": 3226 }, { "epoch": 0.05714867599073791, "grad_norm": 1.2285102605819702, "learning_rate": 2.9942050687601623e-05, "loss": 0.1495, "step": 3227 }, { "epoch": 0.05716638552776634, "grad_norm": 1.5155444145202637, "learning_rate": 2.994197510924446e-05, "loss": 0.1969, "step": 3228 }, { "epoch": 0.05718409506479477, "grad_norm": 1.3047218322753906, "learning_rate": 2.99418994817297e-05, "loss": 0.1899, "step": 3229 }, { "epoch": 0.0572018046018232, "grad_norm": 1.2020965814590454, "learning_rate": 2.9941823805057604e-05, "loss": 0.1645, "step": 3230 }, { "epoch": 0.057219514138851624, "grad_norm": 1.3897366523742676, "learning_rate": 2.9941748079228422e-05, "loss": 0.1671, "step": 3231 }, { "epoch": 0.05723722367588005, "grad_norm": 1.8595614433288574, "learning_rate": 2.99416723042424e-05, "loss": 0.1496, "step": 3232 }, { "epoch": 0.05725493321290848, "grad_norm": 1.4228860139846802, "learning_rate": 2.9941596480099786e-05, "loss": 0.1532, "step": 3233 }, { "epoch": 0.05727264274993691, "grad_norm": 1.3688369989395142, "learning_rate": 2.9941520606800837e-05, "loss": 0.1378, "step": 3234 }, { "epoch": 0.057290352286965336, "grad_norm": 2.292708396911621, "learning_rate": 2.9941444684345787e-05, "loss": 0.1418, "step": 3235 }, { "epoch": 0.05730806182399377, "grad_norm": 3.746958017349243, "learning_rate": 2.99413687127349e-05, "loss": 0.1854, "step": 3236 }, { "epoch": 0.057325771361022196, "grad_norm": 1.129480004310608, "learning_rate": 2.9941292691968424e-05, "loss": 0.1624, "step": 3237 }, { "epoch": 0.05734348089805062, "grad_norm": 1.9792214632034302, "learning_rate": 2.99412166220466e-05, "loss": 0.1537, "step": 3238 }, { "epoch": 0.05736119043507905, "grad_norm": 3.1963040828704834, "learning_rate": 2.9941140502969685e-05, "loss": 0.1989, "step": 3239 }, { "epoch": 0.05737889997210748, "grad_norm": 2.0784623622894287, "learning_rate": 2.9941064334737936e-05, "loss": 0.2032, "step": 3240 }, { "epoch": 0.05739660950913591, "grad_norm": 1.8117700815200806, "learning_rate": 2.9940988117351587e-05, "loss": 0.1715, "step": 3241 }, { "epoch": 0.057414319046164335, "grad_norm": 1.6526153087615967, "learning_rate": 2.9940911850810907e-05, "loss": 0.2018, "step": 3242 }, { "epoch": 0.05743202858319276, "grad_norm": 1.4612758159637451, "learning_rate": 2.9940835535116133e-05, "loss": 0.1595, "step": 3243 }, { "epoch": 0.057449738120221194, "grad_norm": 1.9655826091766357, "learning_rate": 2.9940759170267525e-05, "loss": 0.1646, "step": 3244 }, { "epoch": 0.05746744765724962, "grad_norm": 1.5445709228515625, "learning_rate": 2.9940682756265324e-05, "loss": 0.1869, "step": 3245 }, { "epoch": 0.05748515719427805, "grad_norm": 1.7791767120361328, "learning_rate": 2.994060629310979e-05, "loss": 0.1631, "step": 3246 }, { "epoch": 0.05750286673130647, "grad_norm": 3.020930767059326, "learning_rate": 2.9940529780801176e-05, "loss": 0.1505, "step": 3247 }, { "epoch": 0.05752057626833491, "grad_norm": 1.4240349531173706, "learning_rate": 2.994045321933973e-05, "loss": 0.1973, "step": 3248 }, { "epoch": 0.05753828580536333, "grad_norm": 1.4512451887130737, "learning_rate": 2.9940376608725703e-05, "loss": 0.1201, "step": 3249 }, { "epoch": 0.05755599534239176, "grad_norm": 1.6266895532608032, "learning_rate": 2.994029994895935e-05, "loss": 0.1688, "step": 3250 }, { "epoch": 0.05757370487942019, "grad_norm": 1.9276416301727295, "learning_rate": 2.9940223240040916e-05, "loss": 0.1387, "step": 3251 }, { "epoch": 0.05759141441644862, "grad_norm": 2.050046920776367, "learning_rate": 2.9940146481970662e-05, "loss": 0.209, "step": 3252 }, { "epoch": 0.057609123953477046, "grad_norm": 1.4453366994857788, "learning_rate": 2.9940069674748838e-05, "loss": 0.1553, "step": 3253 }, { "epoch": 0.05762683349050547, "grad_norm": 1.3661863803863525, "learning_rate": 2.993999281837569e-05, "loss": 0.1683, "step": 3254 }, { "epoch": 0.057644543027533905, "grad_norm": 1.8207685947418213, "learning_rate": 2.9939915912851482e-05, "loss": 0.1633, "step": 3255 }, { "epoch": 0.05766225256456233, "grad_norm": 1.439125418663025, "learning_rate": 2.9939838958176465e-05, "loss": 0.1943, "step": 3256 }, { "epoch": 0.05767996210159076, "grad_norm": 1.878604769706726, "learning_rate": 2.9939761954350884e-05, "loss": 0.1675, "step": 3257 }, { "epoch": 0.057697671638619184, "grad_norm": 1.9580094814300537, "learning_rate": 2.9939684901375e-05, "loss": 0.148, "step": 3258 }, { "epoch": 0.05771538117564762, "grad_norm": 1.364364743232727, "learning_rate": 2.993960779924906e-05, "loss": 0.1284, "step": 3259 }, { "epoch": 0.057733090712676044, "grad_norm": 2.5642306804656982, "learning_rate": 2.9939530647973322e-05, "loss": 0.2184, "step": 3260 }, { "epoch": 0.05775080024970447, "grad_norm": 2.3771042823791504, "learning_rate": 2.9939453447548043e-05, "loss": 0.1525, "step": 3261 }, { "epoch": 0.057768509786732904, "grad_norm": 1.492961049079895, "learning_rate": 2.993937619797347e-05, "loss": 0.1472, "step": 3262 }, { "epoch": 0.05778621932376133, "grad_norm": 1.7003391981124878, "learning_rate": 2.9939298899249863e-05, "loss": 0.1534, "step": 3263 }, { "epoch": 0.057803928860789756, "grad_norm": 1.3230255842208862, "learning_rate": 2.9939221551377473e-05, "loss": 0.1589, "step": 3264 }, { "epoch": 0.05782163839781818, "grad_norm": 1.3221741914749146, "learning_rate": 2.9939144154356552e-05, "loss": 0.1532, "step": 3265 }, { "epoch": 0.057839347934846616, "grad_norm": 1.7526911497116089, "learning_rate": 2.993906670818736e-05, "loss": 0.1878, "step": 3266 }, { "epoch": 0.05785705747187504, "grad_norm": 1.716229796409607, "learning_rate": 2.993898921287015e-05, "loss": 0.221, "step": 3267 }, { "epoch": 0.05787476700890347, "grad_norm": 1.718772292137146, "learning_rate": 2.9938911668405177e-05, "loss": 0.1731, "step": 3268 }, { "epoch": 0.057892476545931895, "grad_norm": 1.952904462814331, "learning_rate": 2.9938834074792693e-05, "loss": 0.1764, "step": 3269 }, { "epoch": 0.05791018608296033, "grad_norm": 1.570652961730957, "learning_rate": 2.9938756432032955e-05, "loss": 0.1799, "step": 3270 }, { "epoch": 0.057927895619988755, "grad_norm": 1.6028578281402588, "learning_rate": 2.9938678740126224e-05, "loss": 0.1524, "step": 3271 }, { "epoch": 0.05794560515701718, "grad_norm": 1.550383448600769, "learning_rate": 2.9938600999072747e-05, "loss": 0.2012, "step": 3272 }, { "epoch": 0.05796331469404561, "grad_norm": 1.3347307443618774, "learning_rate": 2.9938523208872783e-05, "loss": 0.1587, "step": 3273 }, { "epoch": 0.05798102423107404, "grad_norm": 1.8444286584854126, "learning_rate": 2.993844536952659e-05, "loss": 0.1493, "step": 3274 }, { "epoch": 0.05799873376810247, "grad_norm": 1.4681864976882935, "learning_rate": 2.9938367481034423e-05, "loss": 0.1844, "step": 3275 }, { "epoch": 0.058016443305130894, "grad_norm": 1.7332091331481934, "learning_rate": 2.993828954339654e-05, "loss": 0.1175, "step": 3276 }, { "epoch": 0.05803415284215933, "grad_norm": 1.7305303812026978, "learning_rate": 2.9938211556613194e-05, "loss": 0.1674, "step": 3277 }, { "epoch": 0.05805186237918775, "grad_norm": 1.514931559562683, "learning_rate": 2.993813352068464e-05, "loss": 0.1353, "step": 3278 }, { "epoch": 0.05806957191621618, "grad_norm": 1.5773088932037354, "learning_rate": 2.9938055435611143e-05, "loss": 0.1406, "step": 3279 }, { "epoch": 0.058087281453244606, "grad_norm": 1.629582166671753, "learning_rate": 2.993797730139295e-05, "loss": 0.136, "step": 3280 }, { "epoch": 0.05810499099027304, "grad_norm": 1.5355935096740723, "learning_rate": 2.9937899118030322e-05, "loss": 0.1571, "step": 3281 }, { "epoch": 0.058122700527301466, "grad_norm": 1.7176094055175781, "learning_rate": 2.9937820885523525e-05, "loss": 0.1267, "step": 3282 }, { "epoch": 0.05814041006432989, "grad_norm": 1.1550261974334717, "learning_rate": 2.9937742603872798e-05, "loss": 0.1618, "step": 3283 }, { "epoch": 0.05815811960135832, "grad_norm": 1.3895232677459717, "learning_rate": 2.9937664273078412e-05, "loss": 0.2194, "step": 3284 }, { "epoch": 0.05817582913838675, "grad_norm": 1.6543196439743042, "learning_rate": 2.9937585893140625e-05, "loss": 0.2219, "step": 3285 }, { "epoch": 0.05819353867541518, "grad_norm": 2.2384073734283447, "learning_rate": 2.9937507464059687e-05, "loss": 0.2279, "step": 3286 }, { "epoch": 0.058211248212443605, "grad_norm": 1.428132176399231, "learning_rate": 2.9937428985835863e-05, "loss": 0.1757, "step": 3287 }, { "epoch": 0.05822895774947204, "grad_norm": 1.440608263015747, "learning_rate": 2.9937350458469406e-05, "loss": 0.1391, "step": 3288 }, { "epoch": 0.058246667286500464, "grad_norm": 2.029179811477661, "learning_rate": 2.993727188196058e-05, "loss": 0.2202, "step": 3289 }, { "epoch": 0.05826437682352889, "grad_norm": 1.0373945236206055, "learning_rate": 2.9937193256309635e-05, "loss": 0.1239, "step": 3290 }, { "epoch": 0.05828208636055732, "grad_norm": 1.6862872838974, "learning_rate": 2.993711458151684e-05, "loss": 0.1378, "step": 3291 }, { "epoch": 0.05829979589758575, "grad_norm": 1.1125348806381226, "learning_rate": 2.9937035857582445e-05, "loss": 0.1397, "step": 3292 }, { "epoch": 0.05831750543461418, "grad_norm": 0.8167054057121277, "learning_rate": 2.9936957084506716e-05, "loss": 0.133, "step": 3293 }, { "epoch": 0.0583352149716426, "grad_norm": 1.3819445371627808, "learning_rate": 2.993687826228991e-05, "loss": 0.1684, "step": 3294 }, { "epoch": 0.05835292450867103, "grad_norm": 1.6358258724212646, "learning_rate": 2.993679939093228e-05, "loss": 0.1437, "step": 3295 }, { "epoch": 0.05837063404569946, "grad_norm": 1.7394543886184692, "learning_rate": 2.9936720470434095e-05, "loss": 0.1656, "step": 3296 }, { "epoch": 0.05838834358272789, "grad_norm": 2.1728291511535645, "learning_rate": 2.993664150079561e-05, "loss": 0.1375, "step": 3297 }, { "epoch": 0.058406053119756315, "grad_norm": 1.785304069519043, "learning_rate": 2.9936562482017085e-05, "loss": 0.183, "step": 3298 }, { "epoch": 0.05842376265678474, "grad_norm": 1.394012451171875, "learning_rate": 2.993648341409878e-05, "loss": 0.1784, "step": 3299 }, { "epoch": 0.058441472193813175, "grad_norm": 1.73690664768219, "learning_rate": 2.9936404297040954e-05, "loss": 0.1623, "step": 3300 }, { "epoch": 0.0584591817308416, "grad_norm": 1.7571158409118652, "learning_rate": 2.9936325130843868e-05, "loss": 0.1438, "step": 3301 }, { "epoch": 0.05847689126787003, "grad_norm": 1.3222856521606445, "learning_rate": 2.9936245915507785e-05, "loss": 0.1995, "step": 3302 }, { "epoch": 0.05849460080489846, "grad_norm": 1.6889991760253906, "learning_rate": 2.9936166651032962e-05, "loss": 0.1303, "step": 3303 }, { "epoch": 0.05851231034192689, "grad_norm": 2.60392165184021, "learning_rate": 2.9936087337419665e-05, "loss": 0.1319, "step": 3304 }, { "epoch": 0.058530019878955314, "grad_norm": 1.9306989908218384, "learning_rate": 2.9936007974668146e-05, "loss": 0.1755, "step": 3305 }, { "epoch": 0.05854772941598374, "grad_norm": 1.4709551334381104, "learning_rate": 2.9935928562778677e-05, "loss": 0.1675, "step": 3306 }, { "epoch": 0.058565438953012174, "grad_norm": 2.008873462677002, "learning_rate": 2.9935849101751513e-05, "loss": 0.1242, "step": 3307 }, { "epoch": 0.0585831484900406, "grad_norm": 2.1065032482147217, "learning_rate": 2.9935769591586913e-05, "loss": 0.1875, "step": 3308 }, { "epoch": 0.058600858027069026, "grad_norm": 1.489488959312439, "learning_rate": 2.9935690032285143e-05, "loss": 0.1596, "step": 3309 }, { "epoch": 0.05861856756409745, "grad_norm": 1.4304357767105103, "learning_rate": 2.9935610423846463e-05, "loss": 0.179, "step": 3310 }, { "epoch": 0.058636277101125886, "grad_norm": 1.8827522993087769, "learning_rate": 2.9935530766271134e-05, "loss": 0.1938, "step": 3311 }, { "epoch": 0.05865398663815431, "grad_norm": 1.8097245693206787, "learning_rate": 2.9935451059559424e-05, "loss": 0.1797, "step": 3312 }, { "epoch": 0.05867169617518274, "grad_norm": 2.0872011184692383, "learning_rate": 2.9935371303711583e-05, "loss": 0.1544, "step": 3313 }, { "epoch": 0.058689405712211165, "grad_norm": 2.1911730766296387, "learning_rate": 2.993529149872789e-05, "loss": 0.1398, "step": 3314 }, { "epoch": 0.0587071152492396, "grad_norm": 1.8724415302276611, "learning_rate": 2.9935211644608594e-05, "loss": 0.2026, "step": 3315 }, { "epoch": 0.058724824786268025, "grad_norm": 1.4649677276611328, "learning_rate": 2.993513174135396e-05, "loss": 0.1207, "step": 3316 }, { "epoch": 0.05874253432329645, "grad_norm": 1.3442054986953735, "learning_rate": 2.9935051788964258e-05, "loss": 0.1326, "step": 3317 }, { "epoch": 0.058760243860324884, "grad_norm": 1.9531450271606445, "learning_rate": 2.9934971787439746e-05, "loss": 0.1474, "step": 3318 }, { "epoch": 0.05877795339735331, "grad_norm": 1.5026682615280151, "learning_rate": 2.9934891736780685e-05, "loss": 0.1619, "step": 3319 }, { "epoch": 0.05879566293438174, "grad_norm": 1.224913239479065, "learning_rate": 2.993481163698734e-05, "loss": 0.1358, "step": 3320 }, { "epoch": 0.058813372471410164, "grad_norm": 1.878110408782959, "learning_rate": 2.993473148805998e-05, "loss": 0.1995, "step": 3321 }, { "epoch": 0.0588310820084386, "grad_norm": 1.1973369121551514, "learning_rate": 2.993465128999886e-05, "loss": 0.1376, "step": 3322 }, { "epoch": 0.05884879154546702, "grad_norm": 1.8210997581481934, "learning_rate": 2.9934571042804245e-05, "loss": 0.1769, "step": 3323 }, { "epoch": 0.05886650108249545, "grad_norm": 1.6896549463272095, "learning_rate": 2.9934490746476403e-05, "loss": 0.1849, "step": 3324 }, { "epoch": 0.058884210619523876, "grad_norm": 1.4532936811447144, "learning_rate": 2.9934410401015602e-05, "loss": 0.1218, "step": 3325 }, { "epoch": 0.05890192015655231, "grad_norm": 2.056985855102539, "learning_rate": 2.9934330006422096e-05, "loss": 0.1737, "step": 3326 }, { "epoch": 0.058919629693580736, "grad_norm": 1.8530972003936768, "learning_rate": 2.993424956269616e-05, "loss": 0.2217, "step": 3327 }, { "epoch": 0.05893733923060916, "grad_norm": 1.6643002033233643, "learning_rate": 2.993416906983805e-05, "loss": 0.1727, "step": 3328 }, { "epoch": 0.058955048767637595, "grad_norm": 1.3831758499145508, "learning_rate": 2.9934088527848035e-05, "loss": 0.1636, "step": 3329 }, { "epoch": 0.05897275830466602, "grad_norm": 1.102357029914856, "learning_rate": 2.993400793672638e-05, "loss": 0.1427, "step": 3330 }, { "epoch": 0.05899046784169445, "grad_norm": 1.5681371688842773, "learning_rate": 2.9933927296473348e-05, "loss": 0.2015, "step": 3331 }, { "epoch": 0.059008177378722874, "grad_norm": 1.7585471868515015, "learning_rate": 2.993384660708921e-05, "loss": 0.1847, "step": 3332 }, { "epoch": 0.05902588691575131, "grad_norm": 1.428077220916748, "learning_rate": 2.993376586857422e-05, "loss": 0.1737, "step": 3333 }, { "epoch": 0.059043596452779734, "grad_norm": 1.4339141845703125, "learning_rate": 2.9933685080928655e-05, "loss": 0.1877, "step": 3334 }, { "epoch": 0.05906130598980816, "grad_norm": 1.6822433471679688, "learning_rate": 2.9933604244152773e-05, "loss": 0.1369, "step": 3335 }, { "epoch": 0.05907901552683659, "grad_norm": 1.5534319877624512, "learning_rate": 2.993352335824685e-05, "loss": 0.1329, "step": 3336 }, { "epoch": 0.05909672506386502, "grad_norm": 1.5047686100006104, "learning_rate": 2.993344242321114e-05, "loss": 0.1914, "step": 3337 }, { "epoch": 0.05911443460089345, "grad_norm": 1.6118853092193604, "learning_rate": 2.993336143904592e-05, "loss": 0.216, "step": 3338 }, { "epoch": 0.05913214413792187, "grad_norm": 1.3220863342285156, "learning_rate": 2.9933280405751447e-05, "loss": 0.1642, "step": 3339 }, { "epoch": 0.0591498536749503, "grad_norm": 1.5540592670440674, "learning_rate": 2.993319932332799e-05, "loss": 0.1895, "step": 3340 }, { "epoch": 0.05916756321197873, "grad_norm": 1.5448148250579834, "learning_rate": 2.993311819177582e-05, "loss": 0.1646, "step": 3341 }, { "epoch": 0.05918527274900716, "grad_norm": 1.8423148393630981, "learning_rate": 2.9933037011095204e-05, "loss": 0.1817, "step": 3342 }, { "epoch": 0.059202982286035585, "grad_norm": 1.3287022113800049, "learning_rate": 2.9932955781286406e-05, "loss": 0.1858, "step": 3343 }, { "epoch": 0.05922069182306402, "grad_norm": 2.4700379371643066, "learning_rate": 2.993287450234969e-05, "loss": 0.2132, "step": 3344 }, { "epoch": 0.059238401360092445, "grad_norm": 2.050124168395996, "learning_rate": 2.993279317428533e-05, "loss": 0.1566, "step": 3345 }, { "epoch": 0.05925611089712087, "grad_norm": 1.6437318325042725, "learning_rate": 2.9932711797093588e-05, "loss": 0.1864, "step": 3346 }, { "epoch": 0.0592738204341493, "grad_norm": 2.335975408554077, "learning_rate": 2.993263037077474e-05, "loss": 0.1659, "step": 3347 }, { "epoch": 0.05929152997117773, "grad_norm": 1.5385648012161255, "learning_rate": 2.9932548895329045e-05, "loss": 0.1552, "step": 3348 }, { "epoch": 0.05930923950820616, "grad_norm": 1.9240809679031372, "learning_rate": 2.9932467370756775e-05, "loss": 0.1696, "step": 3349 }, { "epoch": 0.059326949045234584, "grad_norm": 2.1165008544921875, "learning_rate": 2.9932385797058194e-05, "loss": 0.2019, "step": 3350 }, { "epoch": 0.05934465858226301, "grad_norm": 1.7043896913528442, "learning_rate": 2.9932304174233582e-05, "loss": 0.178, "step": 3351 }, { "epoch": 0.05936236811929144, "grad_norm": 1.4913890361785889, "learning_rate": 2.9932222502283194e-05, "loss": 0.1456, "step": 3352 }, { "epoch": 0.05938007765631987, "grad_norm": 1.637043833732605, "learning_rate": 2.9932140781207307e-05, "loss": 0.152, "step": 3353 }, { "epoch": 0.059397787193348296, "grad_norm": 2.0354843139648438, "learning_rate": 2.9932059011006182e-05, "loss": 0.1781, "step": 3354 }, { "epoch": 0.05941549673037673, "grad_norm": 0.9394916296005249, "learning_rate": 2.9931977191680098e-05, "loss": 0.1304, "step": 3355 }, { "epoch": 0.059433206267405156, "grad_norm": 1.572217345237732, "learning_rate": 2.9931895323229318e-05, "loss": 0.122, "step": 3356 }, { "epoch": 0.05945091580443358, "grad_norm": 1.3510619401931763, "learning_rate": 2.9931813405654113e-05, "loss": 0.1339, "step": 3357 }, { "epoch": 0.05946862534146201, "grad_norm": 1.3195230960845947, "learning_rate": 2.9931731438954752e-05, "loss": 0.1423, "step": 3358 }, { "epoch": 0.05948633487849044, "grad_norm": 1.4382405281066895, "learning_rate": 2.9931649423131508e-05, "loss": 0.1337, "step": 3359 }, { "epoch": 0.05950404441551887, "grad_norm": 1.7038366794586182, "learning_rate": 2.9931567358184642e-05, "loss": 0.1526, "step": 3360 }, { "epoch": 0.059521753952547295, "grad_norm": 1.4682351350784302, "learning_rate": 2.9931485244114433e-05, "loss": 0.0902, "step": 3361 }, { "epoch": 0.05953946348957572, "grad_norm": 1.443432092666626, "learning_rate": 2.9931403080921145e-05, "loss": 0.1677, "step": 3362 }, { "epoch": 0.059557173026604154, "grad_norm": 1.3652468919754028, "learning_rate": 2.9931320868605054e-05, "loss": 0.1682, "step": 3363 }, { "epoch": 0.05957488256363258, "grad_norm": 1.2397184371948242, "learning_rate": 2.9931238607166427e-05, "loss": 0.1752, "step": 3364 }, { "epoch": 0.05959259210066101, "grad_norm": 1.9076184034347534, "learning_rate": 2.9931156296605533e-05, "loss": 0.1961, "step": 3365 }, { "epoch": 0.05961030163768943, "grad_norm": 2.168395757675171, "learning_rate": 2.9931073936922647e-05, "loss": 0.145, "step": 3366 }, { "epoch": 0.05962801117471787, "grad_norm": 1.8333549499511719, "learning_rate": 2.993099152811804e-05, "loss": 0.1039, "step": 3367 }, { "epoch": 0.05964572071174629, "grad_norm": 2.0432510375976562, "learning_rate": 2.9930909070191977e-05, "loss": 0.1874, "step": 3368 }, { "epoch": 0.05966343024877472, "grad_norm": 2.0275375843048096, "learning_rate": 2.9930826563144735e-05, "loss": 0.1545, "step": 3369 }, { "epoch": 0.05968113978580315, "grad_norm": 1.4811279773712158, "learning_rate": 2.9930744006976582e-05, "loss": 0.1674, "step": 3370 }, { "epoch": 0.05969884932283158, "grad_norm": 1.5294609069824219, "learning_rate": 2.9930661401687792e-05, "loss": 0.1508, "step": 3371 }, { "epoch": 0.059716558859860006, "grad_norm": 1.84352707862854, "learning_rate": 2.9930578747278637e-05, "loss": 0.1313, "step": 3372 }, { "epoch": 0.05973426839688843, "grad_norm": 2.077239513397217, "learning_rate": 2.993049604374939e-05, "loss": 0.1691, "step": 3373 }, { "epoch": 0.059751977933916865, "grad_norm": 1.4637212753295898, "learning_rate": 2.9930413291100317e-05, "loss": 0.1662, "step": 3374 }, { "epoch": 0.05976968747094529, "grad_norm": 1.2035809755325317, "learning_rate": 2.9930330489331695e-05, "loss": 0.1322, "step": 3375 }, { "epoch": 0.05978739700797372, "grad_norm": 1.3691518306732178, "learning_rate": 2.9930247638443798e-05, "loss": 0.1663, "step": 3376 }, { "epoch": 0.059805106545002144, "grad_norm": 1.9075013399124146, "learning_rate": 2.99301647384369e-05, "loss": 0.1839, "step": 3377 }, { "epoch": 0.05982281608203058, "grad_norm": 1.592477798461914, "learning_rate": 2.9930081789311265e-05, "loss": 0.1856, "step": 3378 }, { "epoch": 0.059840525619059004, "grad_norm": 1.8296147584915161, "learning_rate": 2.9929998791067174e-05, "loss": 0.1456, "step": 3379 }, { "epoch": 0.05985823515608743, "grad_norm": 1.442355751991272, "learning_rate": 2.9929915743704893e-05, "loss": 0.1797, "step": 3380 }, { "epoch": 0.05987594469311586, "grad_norm": 1.2998422384262085, "learning_rate": 2.9929832647224706e-05, "loss": 0.1262, "step": 3381 }, { "epoch": 0.05989365423014429, "grad_norm": 1.8238890171051025, "learning_rate": 2.992974950162687e-05, "loss": 0.1552, "step": 3382 }, { "epoch": 0.059911363767172716, "grad_norm": 1.2246978282928467, "learning_rate": 2.9929666306911675e-05, "loss": 0.1806, "step": 3383 }, { "epoch": 0.05992907330420114, "grad_norm": 1.1909842491149902, "learning_rate": 2.9929583063079386e-05, "loss": 0.1526, "step": 3384 }, { "epoch": 0.059946782841229576, "grad_norm": 1.7289228439331055, "learning_rate": 2.992949977013028e-05, "loss": 0.1181, "step": 3385 }, { "epoch": 0.059964492378258, "grad_norm": 1.3165712356567383, "learning_rate": 2.9929416428064626e-05, "loss": 0.1384, "step": 3386 }, { "epoch": 0.05998220191528643, "grad_norm": 2.076558828353882, "learning_rate": 2.9929333036882706e-05, "loss": 0.1621, "step": 3387 }, { "epoch": 0.059999911452314855, "grad_norm": 1.3153921365737915, "learning_rate": 2.992924959658479e-05, "loss": 0.1345, "step": 3388 }, { "epoch": 0.06001762098934329, "grad_norm": 2.594484329223633, "learning_rate": 2.992916610717115e-05, "loss": 0.1858, "step": 3389 }, { "epoch": 0.060035330526371715, "grad_norm": 1.580630898475647, "learning_rate": 2.9929082568642066e-05, "loss": 0.1495, "step": 3390 }, { "epoch": 0.06005304006340014, "grad_norm": 1.6832456588745117, "learning_rate": 2.992899898099781e-05, "loss": 0.1542, "step": 3391 }, { "epoch": 0.06007074960042857, "grad_norm": 1.928563117980957, "learning_rate": 2.9928915344238654e-05, "loss": 0.2032, "step": 3392 }, { "epoch": 0.060088459137457, "grad_norm": 1.440686821937561, "learning_rate": 2.9928831658364885e-05, "loss": 0.1393, "step": 3393 }, { "epoch": 0.06010616867448543, "grad_norm": 1.787306308746338, "learning_rate": 2.992874792337676e-05, "loss": 0.1765, "step": 3394 }, { "epoch": 0.060123878211513854, "grad_norm": 1.4489580392837524, "learning_rate": 2.992866413927457e-05, "loss": 0.1515, "step": 3395 }, { "epoch": 0.06014158774854229, "grad_norm": 1.5496914386749268, "learning_rate": 2.9928580306058583e-05, "loss": 0.2164, "step": 3396 }, { "epoch": 0.06015929728557071, "grad_norm": 1.9430577754974365, "learning_rate": 2.9928496423729078e-05, "loss": 0.1808, "step": 3397 }, { "epoch": 0.06017700682259914, "grad_norm": 1.4894399642944336, "learning_rate": 2.992841249228633e-05, "loss": 0.1743, "step": 3398 }, { "epoch": 0.060194716359627566, "grad_norm": 1.2115182876586914, "learning_rate": 2.9928328511730614e-05, "loss": 0.1455, "step": 3399 }, { "epoch": 0.060212425896656, "grad_norm": 1.4171758890151978, "learning_rate": 2.9928244482062205e-05, "loss": 0.1562, "step": 3400 }, { "epoch": 0.060230135433684426, "grad_norm": 1.3574509620666504, "learning_rate": 2.992816040328138e-05, "loss": 0.1517, "step": 3401 }, { "epoch": 0.06024784497071285, "grad_norm": 1.077505350112915, "learning_rate": 2.992807627538842e-05, "loss": 0.1415, "step": 3402 }, { "epoch": 0.06026555450774128, "grad_norm": 2.244487762451172, "learning_rate": 2.9927992098383602e-05, "loss": 0.1904, "step": 3403 }, { "epoch": 0.06028326404476971, "grad_norm": 2.2126612663269043, "learning_rate": 2.9927907872267198e-05, "loss": 0.213, "step": 3404 }, { "epoch": 0.06030097358179814, "grad_norm": 1.532943844795227, "learning_rate": 2.9927823597039485e-05, "loss": 0.1448, "step": 3405 }, { "epoch": 0.060318683118826565, "grad_norm": 1.8619794845581055, "learning_rate": 2.9927739272700744e-05, "loss": 0.1495, "step": 3406 }, { "epoch": 0.06033639265585499, "grad_norm": 1.9014629125595093, "learning_rate": 2.992765489925125e-05, "loss": 0.202, "step": 3407 }, { "epoch": 0.060354102192883424, "grad_norm": 1.4870915412902832, "learning_rate": 2.992757047669128e-05, "loss": 0.1538, "step": 3408 }, { "epoch": 0.06037181172991185, "grad_norm": 2.1607422828674316, "learning_rate": 2.9927486005021116e-05, "loss": 0.1553, "step": 3409 }, { "epoch": 0.06038952126694028, "grad_norm": 2.330627679824829, "learning_rate": 2.992740148424103e-05, "loss": 0.1703, "step": 3410 }, { "epoch": 0.06040723080396871, "grad_norm": 1.7256066799163818, "learning_rate": 2.9927316914351305e-05, "loss": 0.1968, "step": 3411 }, { "epoch": 0.06042494034099714, "grad_norm": 1.165461540222168, "learning_rate": 2.9927232295352214e-05, "loss": 0.1632, "step": 3412 }, { "epoch": 0.06044264987802556, "grad_norm": 1.1346688270568848, "learning_rate": 2.9927147627244044e-05, "loss": 0.1491, "step": 3413 }, { "epoch": 0.06046035941505399, "grad_norm": 1.1457387208938599, "learning_rate": 2.992706291002706e-05, "loss": 0.1943, "step": 3414 }, { "epoch": 0.06047806895208242, "grad_norm": 1.9553642272949219, "learning_rate": 2.992697814370156e-05, "loss": 0.1652, "step": 3415 }, { "epoch": 0.06049577848911085, "grad_norm": 1.873936653137207, "learning_rate": 2.9926893328267806e-05, "loss": 0.1614, "step": 3416 }, { "epoch": 0.060513488026139275, "grad_norm": 1.2911953926086426, "learning_rate": 2.992680846372608e-05, "loss": 0.1745, "step": 3417 }, { "epoch": 0.0605311975631677, "grad_norm": 1.3485844135284424, "learning_rate": 2.992672355007667e-05, "loss": 0.1957, "step": 3418 }, { "epoch": 0.060548907100196135, "grad_norm": 1.8063982725143433, "learning_rate": 2.992663858731984e-05, "loss": 0.1795, "step": 3419 }, { "epoch": 0.06056661663722456, "grad_norm": 2.2437331676483154, "learning_rate": 2.9926553575455886e-05, "loss": 0.1802, "step": 3420 }, { "epoch": 0.06058432617425299, "grad_norm": 1.195750117301941, "learning_rate": 2.9926468514485076e-05, "loss": 0.1751, "step": 3421 }, { "epoch": 0.06060203571128142, "grad_norm": 1.415350079536438, "learning_rate": 2.9926383404407697e-05, "loss": 0.1561, "step": 3422 }, { "epoch": 0.06061974524830985, "grad_norm": 1.3500264883041382, "learning_rate": 2.992629824522403e-05, "loss": 0.1748, "step": 3423 }, { "epoch": 0.060637454785338274, "grad_norm": 1.8659427165985107, "learning_rate": 2.992621303693435e-05, "loss": 0.1389, "step": 3424 }, { "epoch": 0.0606551643223667, "grad_norm": 1.4291613101959229, "learning_rate": 2.992612777953893e-05, "loss": 0.1571, "step": 3425 }, { "epoch": 0.060672873859395134, "grad_norm": 1.1818162202835083, "learning_rate": 2.9926042473038068e-05, "loss": 0.1613, "step": 3426 }, { "epoch": 0.06069058339642356, "grad_norm": 1.4245707988739014, "learning_rate": 2.992595711743203e-05, "loss": 0.135, "step": 3427 }, { "epoch": 0.060708292933451986, "grad_norm": 1.4956321716308594, "learning_rate": 2.992587171272111e-05, "loss": 0.1832, "step": 3428 }, { "epoch": 0.06072600247048041, "grad_norm": 2.905524253845215, "learning_rate": 2.9925786258905577e-05, "loss": 0.2213, "step": 3429 }, { "epoch": 0.060743712007508846, "grad_norm": 1.630570650100708, "learning_rate": 2.9925700755985722e-05, "loss": 0.1641, "step": 3430 }, { "epoch": 0.06076142154453727, "grad_norm": 1.62313973903656, "learning_rate": 2.9925615203961815e-05, "loss": 0.1857, "step": 3431 }, { "epoch": 0.0607791310815657, "grad_norm": 1.9182707071304321, "learning_rate": 2.9925529602834148e-05, "loss": 0.187, "step": 3432 }, { "epoch": 0.060796840618594125, "grad_norm": 1.6599419116973877, "learning_rate": 2.9925443952602994e-05, "loss": 0.144, "step": 3433 }, { "epoch": 0.06081455015562256, "grad_norm": 2.8398401737213135, "learning_rate": 2.992535825326864e-05, "loss": 0.1926, "step": 3434 }, { "epoch": 0.060832259692650985, "grad_norm": 1.7834489345550537, "learning_rate": 2.992527250483137e-05, "loss": 0.1385, "step": 3435 }, { "epoch": 0.06084996922967941, "grad_norm": 3.5783863067626953, "learning_rate": 2.9925186707291462e-05, "loss": 0.1651, "step": 3436 }, { "epoch": 0.060867678766707844, "grad_norm": 1.805887222290039, "learning_rate": 2.9925100860649194e-05, "loss": 0.2092, "step": 3437 }, { "epoch": 0.06088538830373627, "grad_norm": 1.6959381103515625, "learning_rate": 2.992501496490486e-05, "loss": 0.1831, "step": 3438 }, { "epoch": 0.0609030978407647, "grad_norm": 1.3868685960769653, "learning_rate": 2.9924929020058738e-05, "loss": 0.1212, "step": 3439 }, { "epoch": 0.060920807377793124, "grad_norm": 2.580054759979248, "learning_rate": 2.99248430261111e-05, "loss": 0.2359, "step": 3440 }, { "epoch": 0.06093851691482156, "grad_norm": 3.54889178276062, "learning_rate": 2.992475698306225e-05, "loss": 0.1472, "step": 3441 }, { "epoch": 0.06095622645184998, "grad_norm": 1.6984974145889282, "learning_rate": 2.992467089091245e-05, "loss": 0.1524, "step": 3442 }, { "epoch": 0.06097393598887841, "grad_norm": 2.7888145446777344, "learning_rate": 2.9924584749661995e-05, "loss": 0.234, "step": 3443 }, { "epoch": 0.060991645525906836, "grad_norm": 1.8376940488815308, "learning_rate": 2.9924498559311165e-05, "loss": 0.1718, "step": 3444 }, { "epoch": 0.06100935506293527, "grad_norm": 1.4050565958023071, "learning_rate": 2.9924412319860245e-05, "loss": 0.1395, "step": 3445 }, { "epoch": 0.061027064599963696, "grad_norm": 1.4402354955673218, "learning_rate": 2.9924326031309518e-05, "loss": 0.1627, "step": 3446 }, { "epoch": 0.06104477413699212, "grad_norm": 1.8644909858703613, "learning_rate": 2.992423969365927e-05, "loss": 0.1749, "step": 3447 }, { "epoch": 0.061062483674020555, "grad_norm": 1.5272592306137085, "learning_rate": 2.992415330690978e-05, "loss": 0.2164, "step": 3448 }, { "epoch": 0.06108019321104898, "grad_norm": 1.4772063493728638, "learning_rate": 2.9924066871061335e-05, "loss": 0.2107, "step": 3449 }, { "epoch": 0.06109790274807741, "grad_norm": 1.27153480052948, "learning_rate": 2.992398038611422e-05, "loss": 0.1797, "step": 3450 }, { "epoch": 0.061115612285105834, "grad_norm": 2.11330509185791, "learning_rate": 2.9923893852068718e-05, "loss": 0.162, "step": 3451 }, { "epoch": 0.06113332182213427, "grad_norm": 1.4923440217971802, "learning_rate": 2.9923807268925116e-05, "loss": 0.1674, "step": 3452 }, { "epoch": 0.061151031359162694, "grad_norm": 1.2374396324157715, "learning_rate": 2.99237206366837e-05, "loss": 0.1642, "step": 3453 }, { "epoch": 0.06116874089619112, "grad_norm": 3.1474556922912598, "learning_rate": 2.992363395534475e-05, "loss": 0.1559, "step": 3454 }, { "epoch": 0.06118645043321955, "grad_norm": 1.8870916366577148, "learning_rate": 2.9923547224908553e-05, "loss": 0.1885, "step": 3455 }, { "epoch": 0.06120415997024798, "grad_norm": 1.7845004796981812, "learning_rate": 2.99234604453754e-05, "loss": 0.151, "step": 3456 }, { "epoch": 0.06122186950727641, "grad_norm": 2.3386943340301514, "learning_rate": 2.9923373616745565e-05, "loss": 0.2049, "step": 3457 }, { "epoch": 0.06123957904430483, "grad_norm": 2.0132827758789062, "learning_rate": 2.992328673901934e-05, "loss": 0.1794, "step": 3458 }, { "epoch": 0.06125728858133326, "grad_norm": 2.3603172302246094, "learning_rate": 2.9923199812197015e-05, "loss": 0.1392, "step": 3459 }, { "epoch": 0.06127499811836169, "grad_norm": 1.6675021648406982, "learning_rate": 2.992311283627887e-05, "loss": 0.16, "step": 3460 }, { "epoch": 0.06129270765539012, "grad_norm": 1.4504766464233398, "learning_rate": 2.9923025811265195e-05, "loss": 0.1629, "step": 3461 }, { "epoch": 0.061310417192418545, "grad_norm": 1.9839718341827393, "learning_rate": 2.9922938737156272e-05, "loss": 0.2417, "step": 3462 }, { "epoch": 0.06132812672944698, "grad_norm": 1.4177943468093872, "learning_rate": 2.9922851613952394e-05, "loss": 0.1213, "step": 3463 }, { "epoch": 0.061345836266475405, "grad_norm": 1.5844982862472534, "learning_rate": 2.992276444165384e-05, "loss": 0.1654, "step": 3464 }, { "epoch": 0.06136354580350383, "grad_norm": 1.815903902053833, "learning_rate": 2.99226772202609e-05, "loss": 0.1768, "step": 3465 }, { "epoch": 0.06138125534053226, "grad_norm": 1.0287439823150635, "learning_rate": 2.9922589949773863e-05, "loss": 0.1466, "step": 3466 }, { "epoch": 0.06139896487756069, "grad_norm": 1.8870158195495605, "learning_rate": 2.9922502630193013e-05, "loss": 0.1415, "step": 3467 }, { "epoch": 0.06141667441458912, "grad_norm": 1.6955180168151855, "learning_rate": 2.9922415261518642e-05, "loss": 0.1777, "step": 3468 }, { "epoch": 0.061434383951617544, "grad_norm": 2.100100517272949, "learning_rate": 2.992232784375103e-05, "loss": 0.1462, "step": 3469 }, { "epoch": 0.06145209348864597, "grad_norm": 1.608696460723877, "learning_rate": 2.992224037689047e-05, "loss": 0.18, "step": 3470 }, { "epoch": 0.061469803025674404, "grad_norm": 2.123208999633789, "learning_rate": 2.992215286093725e-05, "loss": 0.1448, "step": 3471 }, { "epoch": 0.06148751256270283, "grad_norm": 1.8579102754592896, "learning_rate": 2.9922065295891648e-05, "loss": 0.1576, "step": 3472 }, { "epoch": 0.061505222099731256, "grad_norm": 1.2410061359405518, "learning_rate": 2.9921977681753973e-05, "loss": 0.1681, "step": 3473 }, { "epoch": 0.06152293163675968, "grad_norm": 1.3012828826904297, "learning_rate": 2.9921890018524497e-05, "loss": 0.1763, "step": 3474 }, { "epoch": 0.061540641173788116, "grad_norm": 1.9067634344100952, "learning_rate": 2.9921802306203506e-05, "loss": 0.1514, "step": 3475 }, { "epoch": 0.06155835071081654, "grad_norm": 2.0276777744293213, "learning_rate": 2.99217145447913e-05, "loss": 0.1551, "step": 3476 }, { "epoch": 0.06157606024784497, "grad_norm": 1.3033705949783325, "learning_rate": 2.9921626734288162e-05, "loss": 0.1556, "step": 3477 }, { "epoch": 0.0615937697848734, "grad_norm": 1.4255539178848267, "learning_rate": 2.992153887469438e-05, "loss": 0.1463, "step": 3478 }, { "epoch": 0.06161147932190183, "grad_norm": 2.446906805038452, "learning_rate": 2.9921450966010246e-05, "loss": 0.1937, "step": 3479 }, { "epoch": 0.061629188858930255, "grad_norm": 2.0830464363098145, "learning_rate": 2.9921363008236047e-05, "loss": 0.1997, "step": 3480 }, { "epoch": 0.06164689839595868, "grad_norm": 1.7669754028320312, "learning_rate": 2.992127500137207e-05, "loss": 0.1793, "step": 3481 }, { "epoch": 0.061664607932987114, "grad_norm": 1.4640977382659912, "learning_rate": 2.9921186945418608e-05, "loss": 0.1938, "step": 3482 }, { "epoch": 0.06168231747001554, "grad_norm": 2.1358470916748047, "learning_rate": 2.9921098840375954e-05, "loss": 0.2021, "step": 3483 }, { "epoch": 0.06170002700704397, "grad_norm": 1.3928247690200806, "learning_rate": 2.992101068624439e-05, "loss": 0.1708, "step": 3484 }, { "epoch": 0.061717736544072394, "grad_norm": 1.3200136423110962, "learning_rate": 2.9920922483024215e-05, "loss": 0.1664, "step": 3485 }, { "epoch": 0.06173544608110083, "grad_norm": 1.6513996124267578, "learning_rate": 2.992083423071571e-05, "loss": 0.2196, "step": 3486 }, { "epoch": 0.06175315561812925, "grad_norm": 1.9173154830932617, "learning_rate": 2.992074592931917e-05, "loss": 0.1723, "step": 3487 }, { "epoch": 0.06177086515515768, "grad_norm": 1.4352731704711914, "learning_rate": 2.9920657578834888e-05, "loss": 0.1403, "step": 3488 }, { "epoch": 0.06178857469218611, "grad_norm": 1.9203310012817383, "learning_rate": 2.9920569179263147e-05, "loss": 0.104, "step": 3489 }, { "epoch": 0.06180628422921454, "grad_norm": 1.2790709733963013, "learning_rate": 2.9920480730604248e-05, "loss": 0.1227, "step": 3490 }, { "epoch": 0.061823993766242966, "grad_norm": 1.7261199951171875, "learning_rate": 2.9920392232858472e-05, "loss": 0.163, "step": 3491 }, { "epoch": 0.06184170330327139, "grad_norm": 1.580391764640808, "learning_rate": 2.9920303686026116e-05, "loss": 0.1406, "step": 3492 }, { "epoch": 0.061859412840299825, "grad_norm": 1.6653186082839966, "learning_rate": 2.9920215090107466e-05, "loss": 0.2239, "step": 3493 }, { "epoch": 0.06187712237732825, "grad_norm": 2.073429822921753, "learning_rate": 2.9920126445102823e-05, "loss": 0.1583, "step": 3494 }, { "epoch": 0.06189483191435668, "grad_norm": 1.2344353199005127, "learning_rate": 2.9920037751012472e-05, "loss": 0.1681, "step": 3495 }, { "epoch": 0.061912541451385104, "grad_norm": 1.5678633451461792, "learning_rate": 2.9919949007836707e-05, "loss": 0.1873, "step": 3496 }, { "epoch": 0.06193025098841354, "grad_norm": 1.5233365297317505, "learning_rate": 2.9919860215575816e-05, "loss": 0.1771, "step": 3497 }, { "epoch": 0.061947960525441964, "grad_norm": 1.7433936595916748, "learning_rate": 2.9919771374230098e-05, "loss": 0.1203, "step": 3498 }, { "epoch": 0.06196567006247039, "grad_norm": 1.7097967863082886, "learning_rate": 2.9919682483799837e-05, "loss": 0.1102, "step": 3499 }, { "epoch": 0.06198337959949882, "grad_norm": 1.4697601795196533, "learning_rate": 2.991959354428533e-05, "loss": 0.1638, "step": 3500 }, { "epoch": 0.06200108913652725, "grad_norm": 1.4964441061019897, "learning_rate": 2.9919504555686873e-05, "loss": 0.1847, "step": 3501 }, { "epoch": 0.062018798673555676, "grad_norm": 2.4219329357147217, "learning_rate": 2.9919415518004752e-05, "loss": 0.172, "step": 3502 }, { "epoch": 0.0620365082105841, "grad_norm": 1.87660813331604, "learning_rate": 2.9919326431239264e-05, "loss": 0.1798, "step": 3503 }, { "epoch": 0.062054217747612536, "grad_norm": 1.2951703071594238, "learning_rate": 2.99192372953907e-05, "loss": 0.1414, "step": 3504 }, { "epoch": 0.06207192728464096, "grad_norm": 1.1533102989196777, "learning_rate": 2.9919148110459355e-05, "loss": 0.1866, "step": 3505 }, { "epoch": 0.06208963682166939, "grad_norm": 2.1885271072387695, "learning_rate": 2.991905887644552e-05, "loss": 0.1193, "step": 3506 }, { "epoch": 0.062107346358697815, "grad_norm": 1.5110173225402832, "learning_rate": 2.991896959334949e-05, "loss": 0.1229, "step": 3507 }, { "epoch": 0.06212505589572625, "grad_norm": 2.306419849395752, "learning_rate": 2.9918880261171565e-05, "loss": 0.1948, "step": 3508 }, { "epoch": 0.062142765432754675, "grad_norm": 2.386725902557373, "learning_rate": 2.9918790879912027e-05, "loss": 0.2234, "step": 3509 }, { "epoch": 0.0621604749697831, "grad_norm": 1.5361632108688354, "learning_rate": 2.9918701449571178e-05, "loss": 0.0999, "step": 3510 }, { "epoch": 0.06217818450681153, "grad_norm": 1.9245048761367798, "learning_rate": 2.991861197014931e-05, "loss": 0.2011, "step": 3511 }, { "epoch": 0.06219589404383996, "grad_norm": 2.429445505142212, "learning_rate": 2.9918522441646714e-05, "loss": 0.1373, "step": 3512 }, { "epoch": 0.06221360358086839, "grad_norm": 1.8873395919799805, "learning_rate": 2.9918432864063692e-05, "loss": 0.1347, "step": 3513 }, { "epoch": 0.062231313117896814, "grad_norm": 1.4560372829437256, "learning_rate": 2.9918343237400535e-05, "loss": 0.1242, "step": 3514 }, { "epoch": 0.06224902265492525, "grad_norm": 1.5061542987823486, "learning_rate": 2.9918253561657538e-05, "loss": 0.1186, "step": 3515 }, { "epoch": 0.06226673219195367, "grad_norm": 1.4012088775634766, "learning_rate": 2.9918163836834996e-05, "loss": 0.1915, "step": 3516 }, { "epoch": 0.0622844417289821, "grad_norm": 1.4007785320281982, "learning_rate": 2.99180740629332e-05, "loss": 0.1485, "step": 3517 }, { "epoch": 0.062302151266010526, "grad_norm": 1.6690322160720825, "learning_rate": 2.991798423995245e-05, "loss": 0.1461, "step": 3518 }, { "epoch": 0.06231986080303896, "grad_norm": 2.3167452812194824, "learning_rate": 2.991789436789304e-05, "loss": 0.1909, "step": 3519 }, { "epoch": 0.062337570340067386, "grad_norm": 1.5129483938217163, "learning_rate": 2.991780444675527e-05, "loss": 0.201, "step": 3520 }, { "epoch": 0.06235527987709581, "grad_norm": 1.7729889154434204, "learning_rate": 2.9917714476539437e-05, "loss": 0.1778, "step": 3521 }, { "epoch": 0.06237298941412424, "grad_norm": 1.2277674674987793, "learning_rate": 2.9917624457245822e-05, "loss": 0.1125, "step": 3522 }, { "epoch": 0.06239069895115267, "grad_norm": 1.8197424411773682, "learning_rate": 2.9917534388874733e-05, "loss": 0.1144, "step": 3523 }, { "epoch": 0.0624084084881811, "grad_norm": 1.8735641241073608, "learning_rate": 2.991744427142647e-05, "loss": 0.1588, "step": 3524 }, { "epoch": 0.062426118025209525, "grad_norm": 1.9098763465881348, "learning_rate": 2.991735410490132e-05, "loss": 0.142, "step": 3525 }, { "epoch": 0.06244382756223795, "grad_norm": 2.049380302429199, "learning_rate": 2.9917263889299584e-05, "loss": 0.1553, "step": 3526 }, { "epoch": 0.062461537099266384, "grad_norm": 1.365897297859192, "learning_rate": 2.9917173624621562e-05, "loss": 0.1905, "step": 3527 }, { "epoch": 0.06247924663629481, "grad_norm": 1.1871874332427979, "learning_rate": 2.9917083310867545e-05, "loss": 0.1401, "step": 3528 }, { "epoch": 0.06249695617332324, "grad_norm": 1.736837387084961, "learning_rate": 2.991699294803783e-05, "loss": 0.1799, "step": 3529 }, { "epoch": 0.06251466571035166, "grad_norm": 1.5283820629119873, "learning_rate": 2.991690253613272e-05, "loss": 0.1689, "step": 3530 }, { "epoch": 0.06253237524738009, "grad_norm": 1.295778512954712, "learning_rate": 2.9916812075152515e-05, "loss": 0.1316, "step": 3531 }, { "epoch": 0.06255008478440853, "grad_norm": 1.9046149253845215, "learning_rate": 2.9916721565097497e-05, "loss": 0.1644, "step": 3532 }, { "epoch": 0.06256779432143696, "grad_norm": 1.723680853843689, "learning_rate": 2.991663100596798e-05, "loss": 0.1491, "step": 3533 }, { "epoch": 0.06258550385846538, "grad_norm": 1.4786421060562134, "learning_rate": 2.9916540397764253e-05, "loss": 0.1873, "step": 3534 }, { "epoch": 0.06260321339549381, "grad_norm": 1.682854175567627, "learning_rate": 2.9916449740486615e-05, "loss": 0.1364, "step": 3535 }, { "epoch": 0.06262092293252224, "grad_norm": 1.4337810277938843, "learning_rate": 2.9916359034135372e-05, "loss": 0.1538, "step": 3536 }, { "epoch": 0.06263863246955066, "grad_norm": 1.2935261726379395, "learning_rate": 2.991626827871081e-05, "loss": 0.1517, "step": 3537 }, { "epoch": 0.06265634200657909, "grad_norm": 1.060869812965393, "learning_rate": 2.991617747421324e-05, "loss": 0.1208, "step": 3538 }, { "epoch": 0.06267405154360751, "grad_norm": 1.2252384424209595, "learning_rate": 2.991608662064295e-05, "loss": 0.1707, "step": 3539 }, { "epoch": 0.06269176108063595, "grad_norm": 1.3352900743484497, "learning_rate": 2.9915995718000247e-05, "loss": 0.1705, "step": 3540 }, { "epoch": 0.06270947061766438, "grad_norm": 1.3722426891326904, "learning_rate": 2.9915904766285422e-05, "loss": 0.1467, "step": 3541 }, { "epoch": 0.06272718015469281, "grad_norm": 2.2916481494903564, "learning_rate": 2.9915813765498784e-05, "loss": 0.1686, "step": 3542 }, { "epoch": 0.06274488969172123, "grad_norm": 2.4153192043304443, "learning_rate": 2.9915722715640626e-05, "loss": 0.2064, "step": 3543 }, { "epoch": 0.06276259922874966, "grad_norm": 1.3358567953109741, "learning_rate": 2.991563161671125e-05, "loss": 0.1756, "step": 3544 }, { "epoch": 0.06278030876577809, "grad_norm": 0.8964038491249084, "learning_rate": 2.991554046871095e-05, "loss": 0.1277, "step": 3545 }, { "epoch": 0.06279801830280651, "grad_norm": 1.6394697427749634, "learning_rate": 2.9915449271640033e-05, "loss": 0.1845, "step": 3546 }, { "epoch": 0.06281572783983495, "grad_norm": 0.9538173079490662, "learning_rate": 2.99153580254988e-05, "loss": 0.1821, "step": 3547 }, { "epoch": 0.06283343737686338, "grad_norm": 1.5111654996871948, "learning_rate": 2.9915266730287546e-05, "loss": 0.1512, "step": 3548 }, { "epoch": 0.0628511469138918, "grad_norm": 1.3200596570968628, "learning_rate": 2.991517538600657e-05, "loss": 0.1536, "step": 3549 }, { "epoch": 0.06286885645092023, "grad_norm": 1.412581205368042, "learning_rate": 2.991508399265618e-05, "loss": 0.1562, "step": 3550 }, { "epoch": 0.06288656598794866, "grad_norm": 1.6526073217391968, "learning_rate": 2.9914992550236667e-05, "loss": 0.1115, "step": 3551 }, { "epoch": 0.06290427552497709, "grad_norm": 1.0329498052597046, "learning_rate": 2.9914901058748338e-05, "loss": 0.129, "step": 3552 }, { "epoch": 0.06292198506200551, "grad_norm": 1.2892093658447266, "learning_rate": 2.9914809518191496e-05, "loss": 0.1457, "step": 3553 }, { "epoch": 0.06293969459903394, "grad_norm": 2.308220386505127, "learning_rate": 2.9914717928566438e-05, "loss": 0.1709, "step": 3554 }, { "epoch": 0.06295740413606238, "grad_norm": 1.5035016536712646, "learning_rate": 2.991462628987347e-05, "loss": 0.1609, "step": 3555 }, { "epoch": 0.0629751136730908, "grad_norm": 1.1177178621292114, "learning_rate": 2.9914534602112885e-05, "loss": 0.1374, "step": 3556 }, { "epoch": 0.06299282321011923, "grad_norm": 1.7631927728652954, "learning_rate": 2.991444286528499e-05, "loss": 0.1369, "step": 3557 }, { "epoch": 0.06301053274714766, "grad_norm": 1.1441372632980347, "learning_rate": 2.991435107939009e-05, "loss": 0.1577, "step": 3558 }, { "epoch": 0.06302824228417608, "grad_norm": 1.3769712448120117, "learning_rate": 2.991425924442848e-05, "loss": 0.1522, "step": 3559 }, { "epoch": 0.06304595182120451, "grad_norm": 1.659306526184082, "learning_rate": 2.991416736040047e-05, "loss": 0.15, "step": 3560 }, { "epoch": 0.06306366135823294, "grad_norm": 1.5773166418075562, "learning_rate": 2.991407542730635e-05, "loss": 0.1292, "step": 3561 }, { "epoch": 0.06308137089526138, "grad_norm": 1.8565033674240112, "learning_rate": 2.9913983445146434e-05, "loss": 0.1675, "step": 3562 }, { "epoch": 0.0630990804322898, "grad_norm": 1.5491485595703125, "learning_rate": 2.9913891413921024e-05, "loss": 0.134, "step": 3563 }, { "epoch": 0.06311678996931823, "grad_norm": 1.180353045463562, "learning_rate": 2.9913799333630417e-05, "loss": 0.1165, "step": 3564 }, { "epoch": 0.06313449950634666, "grad_norm": 1.9552375078201294, "learning_rate": 2.991370720427492e-05, "loss": 0.1869, "step": 3565 }, { "epoch": 0.06315220904337508, "grad_norm": 1.780030608177185, "learning_rate": 2.9913615025854836e-05, "loss": 0.1902, "step": 3566 }, { "epoch": 0.06316991858040351, "grad_norm": 2.417754888534546, "learning_rate": 2.9913522798370465e-05, "loss": 0.1983, "step": 3567 }, { "epoch": 0.06318762811743193, "grad_norm": 1.6788891553878784, "learning_rate": 2.991343052182211e-05, "loss": 0.1504, "step": 3568 }, { "epoch": 0.06320533765446038, "grad_norm": 3.4502947330474854, "learning_rate": 2.991333819621008e-05, "loss": 0.1986, "step": 3569 }, { "epoch": 0.0632230471914888, "grad_norm": 1.7723497152328491, "learning_rate": 2.9913245821534674e-05, "loss": 0.1435, "step": 3570 }, { "epoch": 0.06324075672851723, "grad_norm": 1.5384968519210815, "learning_rate": 2.99131533977962e-05, "loss": 0.1511, "step": 3571 }, { "epoch": 0.06325846626554565, "grad_norm": 1.8004398345947266, "learning_rate": 2.9913060924994958e-05, "loss": 0.1754, "step": 3572 }, { "epoch": 0.06327617580257408, "grad_norm": 1.801315426826477, "learning_rate": 2.9912968403131256e-05, "loss": 0.1484, "step": 3573 }, { "epoch": 0.06329388533960251, "grad_norm": 1.258144736289978, "learning_rate": 2.9912875832205393e-05, "loss": 0.1663, "step": 3574 }, { "epoch": 0.06331159487663093, "grad_norm": 1.4866276979446411, "learning_rate": 2.9912783212217677e-05, "loss": 0.1589, "step": 3575 }, { "epoch": 0.06332930441365936, "grad_norm": 1.8432555198669434, "learning_rate": 2.9912690543168417e-05, "loss": 0.2131, "step": 3576 }, { "epoch": 0.0633470139506878, "grad_norm": 1.3344712257385254, "learning_rate": 2.9912597825057908e-05, "loss": 0.1109, "step": 3577 }, { "epoch": 0.06336472348771623, "grad_norm": 1.8308943510055542, "learning_rate": 2.9912505057886464e-05, "loss": 0.1544, "step": 3578 }, { "epoch": 0.06338243302474465, "grad_norm": 1.2628880739212036, "learning_rate": 2.9912412241654388e-05, "loss": 0.1315, "step": 3579 }, { "epoch": 0.06340014256177308, "grad_norm": 2.104029893875122, "learning_rate": 2.991231937636198e-05, "loss": 0.1742, "step": 3580 }, { "epoch": 0.0634178520988015, "grad_norm": 1.7379472255706787, "learning_rate": 2.9912226462009553e-05, "loss": 0.2005, "step": 3581 }, { "epoch": 0.06343556163582993, "grad_norm": 1.7149279117584229, "learning_rate": 2.9912133498597408e-05, "loss": 0.1439, "step": 3582 }, { "epoch": 0.06345327117285836, "grad_norm": 2.439746618270874, "learning_rate": 2.991204048612585e-05, "loss": 0.1395, "step": 3583 }, { "epoch": 0.0634709807098868, "grad_norm": 1.664516806602478, "learning_rate": 2.9911947424595186e-05, "loss": 0.1973, "step": 3584 }, { "epoch": 0.06348869024691522, "grad_norm": 1.3782802820205688, "learning_rate": 2.991185431400573e-05, "loss": 0.1532, "step": 3585 }, { "epoch": 0.06350639978394365, "grad_norm": 1.3861701488494873, "learning_rate": 2.9911761154357772e-05, "loss": 0.1159, "step": 3586 }, { "epoch": 0.06352410932097208, "grad_norm": 1.0139660835266113, "learning_rate": 2.9911667945651635e-05, "loss": 0.1801, "step": 3587 }, { "epoch": 0.0635418188580005, "grad_norm": 2.3060991764068604, "learning_rate": 2.991157468788762e-05, "loss": 0.1371, "step": 3588 }, { "epoch": 0.06355952839502893, "grad_norm": 3.5823774337768555, "learning_rate": 2.9911481381066026e-05, "loss": 0.1848, "step": 3589 }, { "epoch": 0.06357723793205736, "grad_norm": 1.5158387422561646, "learning_rate": 2.9911388025187172e-05, "loss": 0.1267, "step": 3590 }, { "epoch": 0.06359494746908578, "grad_norm": 1.8667434453964233, "learning_rate": 2.9911294620251358e-05, "loss": 0.1438, "step": 3591 }, { "epoch": 0.06361265700611422, "grad_norm": 1.6237770318984985, "learning_rate": 2.9911201166258895e-05, "loss": 0.1338, "step": 3592 }, { "epoch": 0.06363036654314265, "grad_norm": 2.4615538120269775, "learning_rate": 2.9911107663210086e-05, "loss": 0.1458, "step": 3593 }, { "epoch": 0.06364807608017108, "grad_norm": 1.87467360496521, "learning_rate": 2.9911014111105238e-05, "loss": 0.2066, "step": 3594 }, { "epoch": 0.0636657856171995, "grad_norm": 1.2177529335021973, "learning_rate": 2.9910920509944666e-05, "loss": 0.1749, "step": 3595 }, { "epoch": 0.06368349515422793, "grad_norm": 1.540473461151123, "learning_rate": 2.991082685972867e-05, "loss": 0.1765, "step": 3596 }, { "epoch": 0.06370120469125636, "grad_norm": 1.2559131383895874, "learning_rate": 2.9910733160457566e-05, "loss": 0.1137, "step": 3597 }, { "epoch": 0.06371891422828478, "grad_norm": 1.6061519384384155, "learning_rate": 2.9910639412131654e-05, "loss": 0.1643, "step": 3598 }, { "epoch": 0.06373662376531322, "grad_norm": 1.7873605489730835, "learning_rate": 2.991054561475125e-05, "loss": 0.1613, "step": 3599 }, { "epoch": 0.06375433330234165, "grad_norm": 1.8197405338287354, "learning_rate": 2.9910451768316658e-05, "loss": 0.1797, "step": 3600 }, { "epoch": 0.06377204283937007, "grad_norm": 1.5253679752349854, "learning_rate": 2.9910357872828187e-05, "loss": 0.1305, "step": 3601 }, { "epoch": 0.0637897523763985, "grad_norm": 1.5452009439468384, "learning_rate": 2.9910263928286147e-05, "loss": 0.1769, "step": 3602 }, { "epoch": 0.06380746191342693, "grad_norm": 1.7159818410873413, "learning_rate": 2.9910169934690848e-05, "loss": 0.1566, "step": 3603 }, { "epoch": 0.06382517145045535, "grad_norm": 1.1114985942840576, "learning_rate": 2.9910075892042596e-05, "loss": 0.1306, "step": 3604 }, { "epoch": 0.06384288098748378, "grad_norm": 1.6031272411346436, "learning_rate": 2.9909981800341704e-05, "loss": 0.1455, "step": 3605 }, { "epoch": 0.0638605905245122, "grad_norm": 1.6692757606506348, "learning_rate": 2.9909887659588477e-05, "loss": 0.15, "step": 3606 }, { "epoch": 0.06387830006154065, "grad_norm": 1.819420337677002, "learning_rate": 2.990979346978323e-05, "loss": 0.1644, "step": 3607 }, { "epoch": 0.06389600959856907, "grad_norm": 1.3096519708633423, "learning_rate": 2.990969923092627e-05, "loss": 0.1843, "step": 3608 }, { "epoch": 0.0639137191355975, "grad_norm": 1.826184630393982, "learning_rate": 2.990960494301791e-05, "loss": 0.1909, "step": 3609 }, { "epoch": 0.06393142867262593, "grad_norm": 1.4067541360855103, "learning_rate": 2.9909510606058453e-05, "loss": 0.1816, "step": 3610 }, { "epoch": 0.06394913820965435, "grad_norm": 3.415881872177124, "learning_rate": 2.990941622004822e-05, "loss": 0.1889, "step": 3611 }, { "epoch": 0.06396684774668278, "grad_norm": 1.0461585521697998, "learning_rate": 2.9909321784987505e-05, "loss": 0.127, "step": 3612 }, { "epoch": 0.0639845572837112, "grad_norm": 1.854543685913086, "learning_rate": 2.990922730087664e-05, "loss": 0.1494, "step": 3613 }, { "epoch": 0.06400226682073965, "grad_norm": 1.4637993574142456, "learning_rate": 2.9909132767715924e-05, "loss": 0.1414, "step": 3614 }, { "epoch": 0.06401997635776807, "grad_norm": 1.6720435619354248, "learning_rate": 2.9909038185505665e-05, "loss": 0.1369, "step": 3615 }, { "epoch": 0.0640376858947965, "grad_norm": 1.2094142436981201, "learning_rate": 2.990894355424618e-05, "loss": 0.161, "step": 3616 }, { "epoch": 0.06405539543182492, "grad_norm": 1.743513584136963, "learning_rate": 2.990884887393778e-05, "loss": 0.1873, "step": 3617 }, { "epoch": 0.06407310496885335, "grad_norm": 1.4150199890136719, "learning_rate": 2.9908754144580774e-05, "loss": 0.1458, "step": 3618 }, { "epoch": 0.06409081450588178, "grad_norm": 1.8183330297470093, "learning_rate": 2.9908659366175472e-05, "loss": 0.196, "step": 3619 }, { "epoch": 0.0641085240429102, "grad_norm": 1.7383520603179932, "learning_rate": 2.9908564538722192e-05, "loss": 0.1747, "step": 3620 }, { "epoch": 0.06412623357993863, "grad_norm": 1.1595934629440308, "learning_rate": 2.9908469662221246e-05, "loss": 0.1256, "step": 3621 }, { "epoch": 0.06414394311696707, "grad_norm": 1.6012712717056274, "learning_rate": 2.9908374736672935e-05, "loss": 0.1696, "step": 3622 }, { "epoch": 0.0641616526539955, "grad_norm": 3.0368552207946777, "learning_rate": 2.990827976207758e-05, "loss": 0.1538, "step": 3623 }, { "epoch": 0.06417936219102392, "grad_norm": 1.3718334436416626, "learning_rate": 2.99081847384355e-05, "loss": 0.1888, "step": 3624 }, { "epoch": 0.06419707172805235, "grad_norm": 0.9908298254013062, "learning_rate": 2.9908089665746994e-05, "loss": 0.1235, "step": 3625 }, { "epoch": 0.06421478126508078, "grad_norm": 1.4635337591171265, "learning_rate": 2.990799454401238e-05, "loss": 0.1541, "step": 3626 }, { "epoch": 0.0642324908021092, "grad_norm": 1.7915115356445312, "learning_rate": 2.9907899373231976e-05, "loss": 0.1561, "step": 3627 }, { "epoch": 0.06425020033913763, "grad_norm": 1.2684717178344727, "learning_rate": 2.990780415340609e-05, "loss": 0.1182, "step": 3628 }, { "epoch": 0.06426790987616607, "grad_norm": 1.7728188037872314, "learning_rate": 2.9907708884535036e-05, "loss": 0.1745, "step": 3629 }, { "epoch": 0.0642856194131945, "grad_norm": 1.891465663909912, "learning_rate": 2.9907613566619123e-05, "loss": 0.2079, "step": 3630 }, { "epoch": 0.06430332895022292, "grad_norm": 1.6504695415496826, "learning_rate": 2.9907518199658673e-05, "loss": 0.1687, "step": 3631 }, { "epoch": 0.06432103848725135, "grad_norm": 1.6507946252822876, "learning_rate": 2.9907422783653996e-05, "loss": 0.1712, "step": 3632 }, { "epoch": 0.06433874802427977, "grad_norm": 1.5005037784576416, "learning_rate": 2.9907327318605407e-05, "loss": 0.1952, "step": 3633 }, { "epoch": 0.0643564575613082, "grad_norm": 2.215320348739624, "learning_rate": 2.9907231804513217e-05, "loss": 0.1371, "step": 3634 }, { "epoch": 0.06437416709833663, "grad_norm": 1.1520904302597046, "learning_rate": 2.9907136241377744e-05, "loss": 0.1386, "step": 3635 }, { "epoch": 0.06439187663536507, "grad_norm": 1.2452094554901123, "learning_rate": 2.9907040629199295e-05, "loss": 0.1613, "step": 3636 }, { "epoch": 0.0644095861723935, "grad_norm": 1.551122784614563, "learning_rate": 2.9906944967978197e-05, "loss": 0.1728, "step": 3637 }, { "epoch": 0.06442729570942192, "grad_norm": 1.4848365783691406, "learning_rate": 2.9906849257714755e-05, "loss": 0.1596, "step": 3638 }, { "epoch": 0.06444500524645035, "grad_norm": 1.9259300231933594, "learning_rate": 2.9906753498409285e-05, "loss": 0.1987, "step": 3639 }, { "epoch": 0.06446271478347877, "grad_norm": 1.1739628314971924, "learning_rate": 2.9906657690062108e-05, "loss": 0.1617, "step": 3640 }, { "epoch": 0.0644804243205072, "grad_norm": 2.2914910316467285, "learning_rate": 2.9906561832673532e-05, "loss": 0.1986, "step": 3641 }, { "epoch": 0.06449813385753563, "grad_norm": 1.3511908054351807, "learning_rate": 2.9906465926243877e-05, "loss": 0.1433, "step": 3642 }, { "epoch": 0.06451584339456405, "grad_norm": 1.2359317541122437, "learning_rate": 2.9906369970773454e-05, "loss": 0.1867, "step": 3643 }, { "epoch": 0.06453355293159249, "grad_norm": 1.3503170013427734, "learning_rate": 2.990627396626258e-05, "loss": 0.1785, "step": 3644 }, { "epoch": 0.06455126246862092, "grad_norm": 1.3680704832077026, "learning_rate": 2.9906177912711576e-05, "loss": 0.1587, "step": 3645 }, { "epoch": 0.06456897200564934, "grad_norm": 1.8973486423492432, "learning_rate": 2.9906081810120755e-05, "loss": 0.1459, "step": 3646 }, { "epoch": 0.06458668154267777, "grad_norm": 1.8402111530303955, "learning_rate": 2.990598565849043e-05, "loss": 0.1833, "step": 3647 }, { "epoch": 0.0646043910797062, "grad_norm": 1.5185943841934204, "learning_rate": 2.990588945782092e-05, "loss": 0.1438, "step": 3648 }, { "epoch": 0.06462210061673462, "grad_norm": 1.7662632465362549, "learning_rate": 2.9905793208112544e-05, "loss": 0.1632, "step": 3649 }, { "epoch": 0.06463981015376305, "grad_norm": 2.7089595794677734, "learning_rate": 2.9905696909365614e-05, "loss": 0.1833, "step": 3650 }, { "epoch": 0.06465751969079149, "grad_norm": 1.6888400316238403, "learning_rate": 2.990560056158045e-05, "loss": 0.1391, "step": 3651 }, { "epoch": 0.06467522922781992, "grad_norm": 2.515892267227173, "learning_rate": 2.9905504164757366e-05, "loss": 0.1379, "step": 3652 }, { "epoch": 0.06469293876484834, "grad_norm": 1.1522955894470215, "learning_rate": 2.9905407718896682e-05, "loss": 0.1769, "step": 3653 }, { "epoch": 0.06471064830187677, "grad_norm": 2.981405735015869, "learning_rate": 2.9905311223998717e-05, "loss": 0.1799, "step": 3654 }, { "epoch": 0.0647283578389052, "grad_norm": 2.3041837215423584, "learning_rate": 2.990521468006378e-05, "loss": 0.2138, "step": 3655 }, { "epoch": 0.06474606737593362, "grad_norm": 1.3169491291046143, "learning_rate": 2.9905118087092197e-05, "loss": 0.1493, "step": 3656 }, { "epoch": 0.06476377691296205, "grad_norm": 1.4147855043411255, "learning_rate": 2.9905021445084284e-05, "loss": 0.1321, "step": 3657 }, { "epoch": 0.06478148644999047, "grad_norm": 1.4792262315750122, "learning_rate": 2.9904924754040356e-05, "loss": 0.1901, "step": 3658 }, { "epoch": 0.06479919598701891, "grad_norm": 1.4147346019744873, "learning_rate": 2.9904828013960735e-05, "loss": 0.1877, "step": 3659 }, { "epoch": 0.06481690552404734, "grad_norm": 1.3731634616851807, "learning_rate": 2.9904731224845736e-05, "loss": 0.141, "step": 3660 }, { "epoch": 0.06483461506107577, "grad_norm": 1.4786185026168823, "learning_rate": 2.9904634386695678e-05, "loss": 0.1895, "step": 3661 }, { "epoch": 0.0648523245981042, "grad_norm": 1.9842348098754883, "learning_rate": 2.990453749951088e-05, "loss": 0.1739, "step": 3662 }, { "epoch": 0.06487003413513262, "grad_norm": 1.6883933544158936, "learning_rate": 2.9904440563291666e-05, "loss": 0.1627, "step": 3663 }, { "epoch": 0.06488774367216105, "grad_norm": 2.5478944778442383, "learning_rate": 2.9904343578038345e-05, "loss": 0.1443, "step": 3664 }, { "epoch": 0.06490545320918947, "grad_norm": 3.4527149200439453, "learning_rate": 2.9904246543751242e-05, "loss": 0.1293, "step": 3665 }, { "epoch": 0.06492316274621791, "grad_norm": 1.3278207778930664, "learning_rate": 2.9904149460430676e-05, "loss": 0.1511, "step": 3666 }, { "epoch": 0.06494087228324634, "grad_norm": 1.3313504457473755, "learning_rate": 2.9904052328076968e-05, "loss": 0.1542, "step": 3667 }, { "epoch": 0.06495858182027477, "grad_norm": 1.9510425329208374, "learning_rate": 2.990395514669043e-05, "loss": 0.1675, "step": 3668 }, { "epoch": 0.06497629135730319, "grad_norm": 1.8267531394958496, "learning_rate": 2.990385791627139e-05, "loss": 0.2211, "step": 3669 }, { "epoch": 0.06499400089433162, "grad_norm": 1.3795883655548096, "learning_rate": 2.9903760636820167e-05, "loss": 0.2164, "step": 3670 }, { "epoch": 0.06501171043136005, "grad_norm": 1.7021269798278809, "learning_rate": 2.9903663308337074e-05, "loss": 0.1616, "step": 3671 }, { "epoch": 0.06502941996838847, "grad_norm": 1.959450602531433, "learning_rate": 2.9903565930822443e-05, "loss": 0.1359, "step": 3672 }, { "epoch": 0.0650471295054169, "grad_norm": 1.3107954263687134, "learning_rate": 2.990346850427658e-05, "loss": 0.1617, "step": 3673 }, { "epoch": 0.06506483904244534, "grad_norm": 1.435616374015808, "learning_rate": 2.9903371028699817e-05, "loss": 0.1722, "step": 3674 }, { "epoch": 0.06508254857947376, "grad_norm": 1.4546412229537964, "learning_rate": 2.9903273504092473e-05, "loss": 0.1584, "step": 3675 }, { "epoch": 0.06510025811650219, "grad_norm": 1.6092325448989868, "learning_rate": 2.990317593045486e-05, "loss": 0.1168, "step": 3676 }, { "epoch": 0.06511796765353062, "grad_norm": 1.5536773204803467, "learning_rate": 2.9903078307787312e-05, "loss": 0.1528, "step": 3677 }, { "epoch": 0.06513567719055904, "grad_norm": 1.8819568157196045, "learning_rate": 2.9902980636090142e-05, "loss": 0.1834, "step": 3678 }, { "epoch": 0.06515338672758747, "grad_norm": 1.5921306610107422, "learning_rate": 2.9902882915363673e-05, "loss": 0.1744, "step": 3679 }, { "epoch": 0.0651710962646159, "grad_norm": 1.3365745544433594, "learning_rate": 2.9902785145608226e-05, "loss": 0.1704, "step": 3680 }, { "epoch": 0.06518880580164434, "grad_norm": 1.7009297609329224, "learning_rate": 2.990268732682412e-05, "loss": 0.2015, "step": 3681 }, { "epoch": 0.06520651533867276, "grad_norm": 2.0703067779541016, "learning_rate": 2.9902589459011688e-05, "loss": 0.2076, "step": 3682 }, { "epoch": 0.06522422487570119, "grad_norm": 1.4832907915115356, "learning_rate": 2.9902491542171237e-05, "loss": 0.1374, "step": 3683 }, { "epoch": 0.06524193441272962, "grad_norm": 2.1211090087890625, "learning_rate": 2.9902393576303095e-05, "loss": 0.1991, "step": 3684 }, { "epoch": 0.06525964394975804, "grad_norm": 1.2614881992340088, "learning_rate": 2.990229556140759e-05, "loss": 0.143, "step": 3685 }, { "epoch": 0.06527735348678647, "grad_norm": 1.0277454853057861, "learning_rate": 2.9902197497485037e-05, "loss": 0.1111, "step": 3686 }, { "epoch": 0.0652950630238149, "grad_norm": 1.6047033071517944, "learning_rate": 2.9902099384535767e-05, "loss": 0.1496, "step": 3687 }, { "epoch": 0.06531277256084332, "grad_norm": 1.6912453174591064, "learning_rate": 2.990200122256009e-05, "loss": 0.1907, "step": 3688 }, { "epoch": 0.06533048209787176, "grad_norm": 1.6042321920394897, "learning_rate": 2.990190301155834e-05, "loss": 0.1803, "step": 3689 }, { "epoch": 0.06534819163490019, "grad_norm": 3.4282307624816895, "learning_rate": 2.9901804751530836e-05, "loss": 0.1704, "step": 3690 }, { "epoch": 0.06536590117192861, "grad_norm": 1.458446979522705, "learning_rate": 2.99017064424779e-05, "loss": 0.1731, "step": 3691 }, { "epoch": 0.06538361070895704, "grad_norm": 1.692870855331421, "learning_rate": 2.9901608084399857e-05, "loss": 0.1573, "step": 3692 }, { "epoch": 0.06540132024598547, "grad_norm": 1.6784217357635498, "learning_rate": 2.990150967729703e-05, "loss": 0.1788, "step": 3693 }, { "epoch": 0.0654190297830139, "grad_norm": 1.367201566696167, "learning_rate": 2.9901411221169747e-05, "loss": 0.1619, "step": 3694 }, { "epoch": 0.06543673932004232, "grad_norm": 1.3421549797058105, "learning_rate": 2.990131271601833e-05, "loss": 0.1427, "step": 3695 }, { "epoch": 0.06545444885707076, "grad_norm": 0.7309148907661438, "learning_rate": 2.99012141618431e-05, "loss": 0.1627, "step": 3696 }, { "epoch": 0.06547215839409919, "grad_norm": 1.9421520233154297, "learning_rate": 2.990111555864438e-05, "loss": 0.1388, "step": 3697 }, { "epoch": 0.06548986793112761, "grad_norm": 1.5354937314987183, "learning_rate": 2.9901016906422494e-05, "loss": 0.2124, "step": 3698 }, { "epoch": 0.06550757746815604, "grad_norm": 1.4517585039138794, "learning_rate": 2.9900918205177776e-05, "loss": 0.1804, "step": 3699 }, { "epoch": 0.06552528700518447, "grad_norm": 1.4666513204574585, "learning_rate": 2.990081945491054e-05, "loss": 0.1742, "step": 3700 }, { "epoch": 0.06554299654221289, "grad_norm": 1.410317063331604, "learning_rate": 2.9900720655621117e-05, "loss": 0.1536, "step": 3701 }, { "epoch": 0.06556070607924132, "grad_norm": 1.862117886543274, "learning_rate": 2.9900621807309827e-05, "loss": 0.1855, "step": 3702 }, { "epoch": 0.06557841561626976, "grad_norm": 1.377569317817688, "learning_rate": 2.9900522909977005e-05, "loss": 0.1188, "step": 3703 }, { "epoch": 0.06559612515329818, "grad_norm": 3.236860990524292, "learning_rate": 2.990042396362296e-05, "loss": 0.1635, "step": 3704 }, { "epoch": 0.06561383469032661, "grad_norm": 1.8857766389846802, "learning_rate": 2.9900324968248036e-05, "loss": 0.1526, "step": 3705 }, { "epoch": 0.06563154422735504, "grad_norm": 0.976881206035614, "learning_rate": 2.9900225923852544e-05, "loss": 0.1426, "step": 3706 }, { "epoch": 0.06564925376438346, "grad_norm": 1.233932614326477, "learning_rate": 2.9900126830436817e-05, "loss": 0.1568, "step": 3707 }, { "epoch": 0.06566696330141189, "grad_norm": 1.1492222547531128, "learning_rate": 2.9900027688001182e-05, "loss": 0.1383, "step": 3708 }, { "epoch": 0.06568467283844032, "grad_norm": 1.0850255489349365, "learning_rate": 2.989992849654597e-05, "loss": 0.1717, "step": 3709 }, { "epoch": 0.06570238237546874, "grad_norm": 1.2332650423049927, "learning_rate": 2.9899829256071488e-05, "loss": 0.1292, "step": 3710 }, { "epoch": 0.06572009191249718, "grad_norm": 2.217949628829956, "learning_rate": 2.989972996657808e-05, "loss": 0.158, "step": 3711 }, { "epoch": 0.06573780144952561, "grad_norm": 1.2164218425750732, "learning_rate": 2.9899630628066067e-05, "loss": 0.1413, "step": 3712 }, { "epoch": 0.06575551098655404, "grad_norm": 1.8456987142562866, "learning_rate": 2.9899531240535773e-05, "loss": 0.1348, "step": 3713 }, { "epoch": 0.06577322052358246, "grad_norm": 1.9121496677398682, "learning_rate": 2.989943180398753e-05, "loss": 0.1694, "step": 3714 }, { "epoch": 0.06579093006061089, "grad_norm": 1.6582485437393188, "learning_rate": 2.9899332318421665e-05, "loss": 0.1574, "step": 3715 }, { "epoch": 0.06580863959763932, "grad_norm": 1.9444595575332642, "learning_rate": 2.9899232783838503e-05, "loss": 0.1752, "step": 3716 }, { "epoch": 0.06582634913466774, "grad_norm": 1.5351133346557617, "learning_rate": 2.9899133200238375e-05, "loss": 0.1573, "step": 3717 }, { "epoch": 0.06584405867169618, "grad_norm": 1.6967442035675049, "learning_rate": 2.98990335676216e-05, "loss": 0.1395, "step": 3718 }, { "epoch": 0.06586176820872461, "grad_norm": 1.8263219594955444, "learning_rate": 2.989893388598852e-05, "loss": 0.1954, "step": 3719 }, { "epoch": 0.06587947774575303, "grad_norm": 2.413691520690918, "learning_rate": 2.9898834155339448e-05, "loss": 0.1722, "step": 3720 }, { "epoch": 0.06589718728278146, "grad_norm": 2.132282257080078, "learning_rate": 2.9898734375674716e-05, "loss": 0.1108, "step": 3721 }, { "epoch": 0.06591489681980989, "grad_norm": 5.825462818145752, "learning_rate": 2.989863454699466e-05, "loss": 0.215, "step": 3722 }, { "epoch": 0.06593260635683831, "grad_norm": 2.078287124633789, "learning_rate": 2.9898534669299602e-05, "loss": 0.1791, "step": 3723 }, { "epoch": 0.06595031589386674, "grad_norm": 1.2453036308288574, "learning_rate": 2.9898434742589873e-05, "loss": 0.1685, "step": 3724 }, { "epoch": 0.06596802543089517, "grad_norm": 1.3716002702713013, "learning_rate": 2.98983347668658e-05, "loss": 0.1348, "step": 3725 }, { "epoch": 0.0659857349679236, "grad_norm": 1.2744536399841309, "learning_rate": 2.9898234742127707e-05, "loss": 0.206, "step": 3726 }, { "epoch": 0.06600344450495203, "grad_norm": 1.7803481817245483, "learning_rate": 2.9898134668375935e-05, "loss": 0.1605, "step": 3727 }, { "epoch": 0.06602115404198046, "grad_norm": 2.4221181869506836, "learning_rate": 2.9898034545610805e-05, "loss": 0.2156, "step": 3728 }, { "epoch": 0.06603886357900889, "grad_norm": 0.8711928129196167, "learning_rate": 2.989793437383265e-05, "loss": 0.0939, "step": 3729 }, { "epoch": 0.06605657311603731, "grad_norm": 1.5134623050689697, "learning_rate": 2.989783415304179e-05, "loss": 0.189, "step": 3730 }, { "epoch": 0.06607428265306574, "grad_norm": 2.046764850616455, "learning_rate": 2.989773388323857e-05, "loss": 0.1157, "step": 3731 }, { "epoch": 0.06609199219009416, "grad_norm": 1.1660770177841187, "learning_rate": 2.9897633564423315e-05, "loss": 0.1759, "step": 3732 }, { "epoch": 0.0661097017271226, "grad_norm": 1.6227816343307495, "learning_rate": 2.9897533196596347e-05, "loss": 0.205, "step": 3733 }, { "epoch": 0.06612741126415103, "grad_norm": 1.5155400037765503, "learning_rate": 2.9897432779758003e-05, "loss": 0.1772, "step": 3734 }, { "epoch": 0.06614512080117946, "grad_norm": 1.2153828144073486, "learning_rate": 2.989733231390861e-05, "loss": 0.1581, "step": 3735 }, { "epoch": 0.06616283033820788, "grad_norm": 1.3359156847000122, "learning_rate": 2.98972317990485e-05, "loss": 0.166, "step": 3736 }, { "epoch": 0.06618053987523631, "grad_norm": 1.427787184715271, "learning_rate": 2.989713123517801e-05, "loss": 0.1573, "step": 3737 }, { "epoch": 0.06619824941226474, "grad_norm": 1.6464471817016602, "learning_rate": 2.9897030622297458e-05, "loss": 0.1313, "step": 3738 }, { "epoch": 0.06621595894929316, "grad_norm": 1.490177869796753, "learning_rate": 2.9896929960407187e-05, "loss": 0.1968, "step": 3739 }, { "epoch": 0.06623366848632159, "grad_norm": 2.172095775604248, "learning_rate": 2.989682924950752e-05, "loss": 0.1815, "step": 3740 }, { "epoch": 0.06625137802335003, "grad_norm": 1.6339913606643677, "learning_rate": 2.9896728489598794e-05, "loss": 0.2045, "step": 3741 }, { "epoch": 0.06626908756037846, "grad_norm": 1.8469089269638062, "learning_rate": 2.9896627680681336e-05, "loss": 0.133, "step": 3742 }, { "epoch": 0.06628679709740688, "grad_norm": 1.325290322303772, "learning_rate": 2.9896526822755477e-05, "loss": 0.186, "step": 3743 }, { "epoch": 0.06630450663443531, "grad_norm": 1.6095408201217651, "learning_rate": 2.9896425915821556e-05, "loss": 0.17, "step": 3744 }, { "epoch": 0.06632221617146374, "grad_norm": 1.9766441583633423, "learning_rate": 2.9896324959879904e-05, "loss": 0.1456, "step": 3745 }, { "epoch": 0.06633992570849216, "grad_norm": 1.6832524538040161, "learning_rate": 2.9896223954930843e-05, "loss": 0.1651, "step": 3746 }, { "epoch": 0.06635763524552059, "grad_norm": 1.4408169984817505, "learning_rate": 2.9896122900974716e-05, "loss": 0.1613, "step": 3747 }, { "epoch": 0.06637534478254903, "grad_norm": 1.1971371173858643, "learning_rate": 2.9896021798011848e-05, "loss": 0.1696, "step": 3748 }, { "epoch": 0.06639305431957745, "grad_norm": 1.9872455596923828, "learning_rate": 2.9895920646042574e-05, "loss": 0.1856, "step": 3749 }, { "epoch": 0.06641076385660588, "grad_norm": 1.2007343769073486, "learning_rate": 2.989581944506723e-05, "loss": 0.1746, "step": 3750 }, { "epoch": 0.06642847339363431, "grad_norm": 0.9546710848808289, "learning_rate": 2.9895718195086147e-05, "loss": 0.1406, "step": 3751 }, { "epoch": 0.06644618293066273, "grad_norm": 1.0691931247711182, "learning_rate": 2.9895616896099654e-05, "loss": 0.114, "step": 3752 }, { "epoch": 0.06646389246769116, "grad_norm": 1.4269473552703857, "learning_rate": 2.9895515548108095e-05, "loss": 0.1289, "step": 3753 }, { "epoch": 0.06648160200471959, "grad_norm": 1.3640649318695068, "learning_rate": 2.989541415111179e-05, "loss": 0.183, "step": 3754 }, { "epoch": 0.06649931154174801, "grad_norm": 1.427253007888794, "learning_rate": 2.9895312705111084e-05, "loss": 0.1453, "step": 3755 }, { "epoch": 0.06651702107877645, "grad_norm": 1.6017335653305054, "learning_rate": 2.9895211210106304e-05, "loss": 0.1132, "step": 3756 }, { "epoch": 0.06653473061580488, "grad_norm": 2.1647214889526367, "learning_rate": 2.989510966609778e-05, "loss": 0.166, "step": 3757 }, { "epoch": 0.0665524401528333, "grad_norm": 1.8009167909622192, "learning_rate": 2.9895008073085858e-05, "loss": 0.1407, "step": 3758 }, { "epoch": 0.06657014968986173, "grad_norm": 1.6355787515640259, "learning_rate": 2.9894906431070866e-05, "loss": 0.1374, "step": 3759 }, { "epoch": 0.06658785922689016, "grad_norm": 1.3522469997406006, "learning_rate": 2.9894804740053138e-05, "loss": 0.2135, "step": 3760 }, { "epoch": 0.06660556876391859, "grad_norm": 1.4200060367584229, "learning_rate": 2.9894703000033006e-05, "loss": 0.1523, "step": 3761 }, { "epoch": 0.06662327830094701, "grad_norm": 1.6910343170166016, "learning_rate": 2.989460121101081e-05, "loss": 0.1566, "step": 3762 }, { "epoch": 0.06664098783797545, "grad_norm": 1.4128891229629517, "learning_rate": 2.989449937298688e-05, "loss": 0.1362, "step": 3763 }, { "epoch": 0.06665869737500388, "grad_norm": 1.6309784650802612, "learning_rate": 2.9894397485961557e-05, "loss": 0.1555, "step": 3764 }, { "epoch": 0.0666764069120323, "grad_norm": 1.3756133317947388, "learning_rate": 2.989429554993517e-05, "loss": 0.163, "step": 3765 }, { "epoch": 0.06669411644906073, "grad_norm": 1.6531575918197632, "learning_rate": 2.989419356490806e-05, "loss": 0.1535, "step": 3766 }, { "epoch": 0.06671182598608916, "grad_norm": 1.865427017211914, "learning_rate": 2.989409153088056e-05, "loss": 0.1339, "step": 3767 }, { "epoch": 0.06672953552311758, "grad_norm": 1.6902207136154175, "learning_rate": 2.9893989447853003e-05, "loss": 0.1762, "step": 3768 }, { "epoch": 0.06674724506014601, "grad_norm": 1.1531649827957153, "learning_rate": 2.9893887315825728e-05, "loss": 0.1652, "step": 3769 }, { "epoch": 0.06676495459717445, "grad_norm": 1.4383260011672974, "learning_rate": 2.9893785134799067e-05, "loss": 0.1213, "step": 3770 }, { "epoch": 0.06678266413420288, "grad_norm": 0.991833508014679, "learning_rate": 2.9893682904773363e-05, "loss": 0.1184, "step": 3771 }, { "epoch": 0.0668003736712313, "grad_norm": 1.850813388824463, "learning_rate": 2.989358062574895e-05, "loss": 0.193, "step": 3772 }, { "epoch": 0.06681808320825973, "grad_norm": 1.0457136631011963, "learning_rate": 2.9893478297726163e-05, "loss": 0.1443, "step": 3773 }, { "epoch": 0.06683579274528816, "grad_norm": 1.3431401252746582, "learning_rate": 2.9893375920705334e-05, "loss": 0.1531, "step": 3774 }, { "epoch": 0.06685350228231658, "grad_norm": 1.1871495246887207, "learning_rate": 2.9893273494686808e-05, "loss": 0.1587, "step": 3775 }, { "epoch": 0.06687121181934501, "grad_norm": 1.100223183631897, "learning_rate": 2.9893171019670918e-05, "loss": 0.1679, "step": 3776 }, { "epoch": 0.06688892135637343, "grad_norm": 2.83567476272583, "learning_rate": 2.9893068495658e-05, "loss": 0.2132, "step": 3777 }, { "epoch": 0.06690663089340187, "grad_norm": 1.913478970527649, "learning_rate": 2.9892965922648396e-05, "loss": 0.1449, "step": 3778 }, { "epoch": 0.0669243404304303, "grad_norm": 1.7951143980026245, "learning_rate": 2.989286330064244e-05, "loss": 0.1193, "step": 3779 }, { "epoch": 0.06694204996745873, "grad_norm": 1.357507586479187, "learning_rate": 2.9892760629640464e-05, "loss": 0.1574, "step": 3780 }, { "epoch": 0.06695975950448715, "grad_norm": 1.0889167785644531, "learning_rate": 2.9892657909642822e-05, "loss": 0.1547, "step": 3781 }, { "epoch": 0.06697746904151558, "grad_norm": 1.3764203786849976, "learning_rate": 2.9892555140649834e-05, "loss": 0.134, "step": 3782 }, { "epoch": 0.066995178578544, "grad_norm": 1.7087554931640625, "learning_rate": 2.989245232266185e-05, "loss": 0.1366, "step": 3783 }, { "epoch": 0.06701288811557243, "grad_norm": 1.2670284509658813, "learning_rate": 2.9892349455679205e-05, "loss": 0.1449, "step": 3784 }, { "epoch": 0.06703059765260087, "grad_norm": 1.1330077648162842, "learning_rate": 2.9892246539702232e-05, "loss": 0.1701, "step": 3785 }, { "epoch": 0.0670483071896293, "grad_norm": 1.5901116132736206, "learning_rate": 2.9892143574731278e-05, "loss": 0.111, "step": 3786 }, { "epoch": 0.06706601672665773, "grad_norm": 1.0531558990478516, "learning_rate": 2.9892040560766674e-05, "loss": 0.1308, "step": 3787 }, { "epoch": 0.06708372626368615, "grad_norm": 1.683419942855835, "learning_rate": 2.9891937497808763e-05, "loss": 0.1265, "step": 3788 }, { "epoch": 0.06710143580071458, "grad_norm": 1.3405039310455322, "learning_rate": 2.989183438585789e-05, "loss": 0.1192, "step": 3789 }, { "epoch": 0.067119145337743, "grad_norm": 1.6412547826766968, "learning_rate": 2.9891731224914385e-05, "loss": 0.1722, "step": 3790 }, { "epoch": 0.06713685487477143, "grad_norm": 1.732503056526184, "learning_rate": 2.989162801497859e-05, "loss": 0.1302, "step": 3791 }, { "epoch": 0.06715456441179986, "grad_norm": 1.2270852327346802, "learning_rate": 2.9891524756050844e-05, "loss": 0.1292, "step": 3792 }, { "epoch": 0.0671722739488283, "grad_norm": 1.2350713014602661, "learning_rate": 2.9891421448131485e-05, "loss": 0.1331, "step": 3793 }, { "epoch": 0.06718998348585672, "grad_norm": 1.7462859153747559, "learning_rate": 2.989131809122086e-05, "loss": 0.1567, "step": 3794 }, { "epoch": 0.06720769302288515, "grad_norm": 1.3760426044464111, "learning_rate": 2.9891214685319302e-05, "loss": 0.1063, "step": 3795 }, { "epoch": 0.06722540255991358, "grad_norm": 1.109375, "learning_rate": 2.9891111230427154e-05, "loss": 0.1237, "step": 3796 }, { "epoch": 0.067243112096942, "grad_norm": 1.1937437057495117, "learning_rate": 2.989100772654476e-05, "loss": 0.0954, "step": 3797 }, { "epoch": 0.06726082163397043, "grad_norm": 1.8281503915786743, "learning_rate": 2.9890904173672454e-05, "loss": 0.1781, "step": 3798 }, { "epoch": 0.06727853117099886, "grad_norm": 1.4667634963989258, "learning_rate": 2.989080057181058e-05, "loss": 0.1387, "step": 3799 }, { "epoch": 0.0672962407080273, "grad_norm": 1.7219215631484985, "learning_rate": 2.989069692095948e-05, "loss": 0.1781, "step": 3800 }, { "epoch": 0.06731395024505572, "grad_norm": 1.6240143775939941, "learning_rate": 2.989059322111949e-05, "loss": 0.155, "step": 3801 }, { "epoch": 0.06733165978208415, "grad_norm": 2.137242078781128, "learning_rate": 2.9890489472290956e-05, "loss": 0.174, "step": 3802 }, { "epoch": 0.06734936931911258, "grad_norm": 3.856635570526123, "learning_rate": 2.9890385674474216e-05, "loss": 0.165, "step": 3803 }, { "epoch": 0.067367078856141, "grad_norm": 1.797323226928711, "learning_rate": 2.9890281827669614e-05, "loss": 0.1407, "step": 3804 }, { "epoch": 0.06738478839316943, "grad_norm": 1.7140214443206787, "learning_rate": 2.989017793187749e-05, "loss": 0.2164, "step": 3805 }, { "epoch": 0.06740249793019785, "grad_norm": 1.0269883871078491, "learning_rate": 2.9890073987098186e-05, "loss": 0.1325, "step": 3806 }, { "epoch": 0.06742020746722628, "grad_norm": 1.925530195236206, "learning_rate": 2.988996999333205e-05, "loss": 0.158, "step": 3807 }, { "epoch": 0.06743791700425472, "grad_norm": 1.3468003273010254, "learning_rate": 2.9889865950579416e-05, "loss": 0.1383, "step": 3808 }, { "epoch": 0.06745562654128315, "grad_norm": 1.6204397678375244, "learning_rate": 2.9889761858840627e-05, "loss": 0.1424, "step": 3809 }, { "epoch": 0.06747333607831157, "grad_norm": 1.6327613592147827, "learning_rate": 2.988965771811603e-05, "loss": 0.1731, "step": 3810 }, { "epoch": 0.06749104561534, "grad_norm": 1.3445230722427368, "learning_rate": 2.9889553528405963e-05, "loss": 0.1686, "step": 3811 }, { "epoch": 0.06750875515236843, "grad_norm": 1.8860893249511719, "learning_rate": 2.9889449289710777e-05, "loss": 0.1489, "step": 3812 }, { "epoch": 0.06752646468939685, "grad_norm": 1.4851621389389038, "learning_rate": 2.98893450020308e-05, "loss": 0.1541, "step": 3813 }, { "epoch": 0.06754417422642528, "grad_norm": 1.2737853527069092, "learning_rate": 2.9889240665366387e-05, "loss": 0.164, "step": 3814 }, { "epoch": 0.06756188376345372, "grad_norm": 1.2295087575912476, "learning_rate": 2.9889136279717876e-05, "loss": 0.2092, "step": 3815 }, { "epoch": 0.06757959330048215, "grad_norm": 1.618823766708374, "learning_rate": 2.9889031845085618e-05, "loss": 0.191, "step": 3816 }, { "epoch": 0.06759730283751057, "grad_norm": 1.4228105545043945, "learning_rate": 2.988892736146995e-05, "loss": 0.1301, "step": 3817 }, { "epoch": 0.067615012374539, "grad_norm": 1.2284536361694336, "learning_rate": 2.9888822828871213e-05, "loss": 0.1367, "step": 3818 }, { "epoch": 0.06763272191156743, "grad_norm": 1.691175103187561, "learning_rate": 2.9888718247289758e-05, "loss": 0.1645, "step": 3819 }, { "epoch": 0.06765043144859585, "grad_norm": 1.5795706510543823, "learning_rate": 2.9888613616725925e-05, "loss": 0.1558, "step": 3820 }, { "epoch": 0.06766814098562428, "grad_norm": 1.709302306175232, "learning_rate": 2.988850893718006e-05, "loss": 0.1536, "step": 3821 }, { "epoch": 0.0676858505226527, "grad_norm": 1.0640844106674194, "learning_rate": 2.9888404208652507e-05, "loss": 0.1451, "step": 3822 }, { "epoch": 0.06770356005968114, "grad_norm": 1.4088127613067627, "learning_rate": 2.9888299431143606e-05, "loss": 0.1649, "step": 3823 }, { "epoch": 0.06772126959670957, "grad_norm": 1.981795310974121, "learning_rate": 2.988819460465371e-05, "loss": 0.1865, "step": 3824 }, { "epoch": 0.067738979133738, "grad_norm": 1.42597234249115, "learning_rate": 2.9888089729183155e-05, "loss": 0.1477, "step": 3825 }, { "epoch": 0.06775668867076642, "grad_norm": 1.4645360708236694, "learning_rate": 2.9887984804732297e-05, "loss": 0.1838, "step": 3826 }, { "epoch": 0.06777439820779485, "grad_norm": 1.8881101608276367, "learning_rate": 2.988787983130147e-05, "loss": 0.1693, "step": 3827 }, { "epoch": 0.06779210774482328, "grad_norm": 1.3906480073928833, "learning_rate": 2.9887774808891027e-05, "loss": 0.1654, "step": 3828 }, { "epoch": 0.0678098172818517, "grad_norm": 1.5609785318374634, "learning_rate": 2.988766973750131e-05, "loss": 0.1244, "step": 3829 }, { "epoch": 0.06782752681888014, "grad_norm": 1.6561132669448853, "learning_rate": 2.988756461713266e-05, "loss": 0.193, "step": 3830 }, { "epoch": 0.06784523635590857, "grad_norm": 1.3077973127365112, "learning_rate": 2.9887459447785437e-05, "loss": 0.1601, "step": 3831 }, { "epoch": 0.067862945892937, "grad_norm": 1.2963274717330933, "learning_rate": 2.988735422945997e-05, "loss": 0.1395, "step": 3832 }, { "epoch": 0.06788065542996542, "grad_norm": 2.348057508468628, "learning_rate": 2.988724896215662e-05, "loss": 0.1337, "step": 3833 }, { "epoch": 0.06789836496699385, "grad_norm": 1.019524097442627, "learning_rate": 2.9887143645875726e-05, "loss": 0.1393, "step": 3834 }, { "epoch": 0.06791607450402228, "grad_norm": 1.6474002599716187, "learning_rate": 2.9887038280617632e-05, "loss": 0.1699, "step": 3835 }, { "epoch": 0.0679337840410507, "grad_norm": 1.454897165298462, "learning_rate": 2.9886932866382687e-05, "loss": 0.081, "step": 3836 }, { "epoch": 0.06795149357807914, "grad_norm": 1.6989867687225342, "learning_rate": 2.9886827403171244e-05, "loss": 0.1547, "step": 3837 }, { "epoch": 0.06796920311510757, "grad_norm": 2.548597574234009, "learning_rate": 2.988672189098364e-05, "loss": 0.1382, "step": 3838 }, { "epoch": 0.067986912652136, "grad_norm": 0.8390227556228638, "learning_rate": 2.9886616329820227e-05, "loss": 0.1729, "step": 3839 }, { "epoch": 0.06800462218916442, "grad_norm": 2.2400760650634766, "learning_rate": 2.9886510719681354e-05, "loss": 0.1698, "step": 3840 }, { "epoch": 0.06802233172619285, "grad_norm": 1.6371289491653442, "learning_rate": 2.9886405060567365e-05, "loss": 0.1717, "step": 3841 }, { "epoch": 0.06804004126322127, "grad_norm": 1.2840468883514404, "learning_rate": 2.9886299352478608e-05, "loss": 0.1676, "step": 3842 }, { "epoch": 0.0680577508002497, "grad_norm": 0.9308936595916748, "learning_rate": 2.9886193595415433e-05, "loss": 0.1163, "step": 3843 }, { "epoch": 0.06807546033727813, "grad_norm": 1.9062788486480713, "learning_rate": 2.9886087789378185e-05, "loss": 0.1622, "step": 3844 }, { "epoch": 0.06809316987430657, "grad_norm": 1.7406282424926758, "learning_rate": 2.9885981934367217e-05, "loss": 0.1663, "step": 3845 }, { "epoch": 0.06811087941133499, "grad_norm": 1.443605661392212, "learning_rate": 2.9885876030382874e-05, "loss": 0.1674, "step": 3846 }, { "epoch": 0.06812858894836342, "grad_norm": 1.4685357809066772, "learning_rate": 2.9885770077425505e-05, "loss": 0.1641, "step": 3847 }, { "epoch": 0.06814629848539185, "grad_norm": 1.7819052934646606, "learning_rate": 2.9885664075495453e-05, "loss": 0.1762, "step": 3848 }, { "epoch": 0.06816400802242027, "grad_norm": 1.5706748962402344, "learning_rate": 2.9885558024593076e-05, "loss": 0.1444, "step": 3849 }, { "epoch": 0.0681817175594487, "grad_norm": 1.3441858291625977, "learning_rate": 2.9885451924718716e-05, "loss": 0.1754, "step": 3850 }, { "epoch": 0.06819942709647712, "grad_norm": 1.6659501791000366, "learning_rate": 2.9885345775872727e-05, "loss": 0.1388, "step": 3851 }, { "epoch": 0.06821713663350557, "grad_norm": 1.582441806793213, "learning_rate": 2.9885239578055456e-05, "loss": 0.174, "step": 3852 }, { "epoch": 0.06823484617053399, "grad_norm": 1.2633755207061768, "learning_rate": 2.9885133331267254e-05, "loss": 0.1398, "step": 3853 }, { "epoch": 0.06825255570756242, "grad_norm": 1.3198968172073364, "learning_rate": 2.9885027035508465e-05, "loss": 0.1574, "step": 3854 }, { "epoch": 0.06827026524459084, "grad_norm": 1.5186058282852173, "learning_rate": 2.988492069077944e-05, "loss": 0.1694, "step": 3855 }, { "epoch": 0.06828797478161927, "grad_norm": 1.3500384092330933, "learning_rate": 2.9884814297080537e-05, "loss": 0.1321, "step": 3856 }, { "epoch": 0.0683056843186477, "grad_norm": 1.225285291671753, "learning_rate": 2.9884707854412103e-05, "loss": 0.1325, "step": 3857 }, { "epoch": 0.06832339385567612, "grad_norm": 1.785548448562622, "learning_rate": 2.9884601362774482e-05, "loss": 0.1664, "step": 3858 }, { "epoch": 0.06834110339270455, "grad_norm": 1.3777658939361572, "learning_rate": 2.9884494822168026e-05, "loss": 0.125, "step": 3859 }, { "epoch": 0.06835881292973299, "grad_norm": 1.9259295463562012, "learning_rate": 2.9884388232593088e-05, "loss": 0.1832, "step": 3860 }, { "epoch": 0.06837652246676142, "grad_norm": 1.5496314764022827, "learning_rate": 2.9884281594050023e-05, "loss": 0.2021, "step": 3861 }, { "epoch": 0.06839423200378984, "grad_norm": 1.2777583599090576, "learning_rate": 2.9884174906539173e-05, "loss": 0.1731, "step": 3862 }, { "epoch": 0.06841194154081827, "grad_norm": 1.3896520137786865, "learning_rate": 2.988406817006089e-05, "loss": 0.1758, "step": 3863 }, { "epoch": 0.0684296510778467, "grad_norm": 2.176257371902466, "learning_rate": 2.9883961384615534e-05, "loss": 0.1949, "step": 3864 }, { "epoch": 0.06844736061487512, "grad_norm": 1.6402041912078857, "learning_rate": 2.9883854550203445e-05, "loss": 0.1346, "step": 3865 }, { "epoch": 0.06846507015190355, "grad_norm": 1.5159333944320679, "learning_rate": 2.9883747666824985e-05, "loss": 0.2161, "step": 3866 }, { "epoch": 0.06848277968893199, "grad_norm": 0.9898371696472168, "learning_rate": 2.98836407344805e-05, "loss": 0.1444, "step": 3867 }, { "epoch": 0.06850048922596041, "grad_norm": 1.2573909759521484, "learning_rate": 2.9883533753170343e-05, "loss": 0.1408, "step": 3868 }, { "epoch": 0.06851819876298884, "grad_norm": 1.6986188888549805, "learning_rate": 2.988342672289486e-05, "loss": 0.137, "step": 3869 }, { "epoch": 0.06853590830001727, "grad_norm": 1.2677048444747925, "learning_rate": 2.9883319643654412e-05, "loss": 0.1529, "step": 3870 }, { "epoch": 0.0685536178370457, "grad_norm": 1.4330636262893677, "learning_rate": 2.988321251544935e-05, "loss": 0.1176, "step": 3871 }, { "epoch": 0.06857132737407412, "grad_norm": 1.7321637868881226, "learning_rate": 2.9883105338280018e-05, "loss": 0.1646, "step": 3872 }, { "epoch": 0.06858903691110255, "grad_norm": 1.8052153587341309, "learning_rate": 2.988299811214678e-05, "loss": 0.1523, "step": 3873 }, { "epoch": 0.06860674644813097, "grad_norm": 1.5598095655441284, "learning_rate": 2.988289083704998e-05, "loss": 0.1537, "step": 3874 }, { "epoch": 0.06862445598515941, "grad_norm": 1.4210875034332275, "learning_rate": 2.9882783512989978e-05, "loss": 0.1149, "step": 3875 }, { "epoch": 0.06864216552218784, "grad_norm": 1.776543378829956, "learning_rate": 2.988267613996712e-05, "loss": 0.115, "step": 3876 }, { "epoch": 0.06865987505921627, "grad_norm": 2.360816717147827, "learning_rate": 2.988256871798177e-05, "loss": 0.1939, "step": 3877 }, { "epoch": 0.06867758459624469, "grad_norm": 1.4449414014816284, "learning_rate": 2.9882461247034266e-05, "loss": 0.1743, "step": 3878 }, { "epoch": 0.06869529413327312, "grad_norm": 1.451339840888977, "learning_rate": 2.9882353727124973e-05, "loss": 0.1251, "step": 3879 }, { "epoch": 0.06871300367030155, "grad_norm": 1.7682570219039917, "learning_rate": 2.988224615825424e-05, "loss": 0.1273, "step": 3880 }, { "epoch": 0.06873071320732997, "grad_norm": 1.3604971170425415, "learning_rate": 2.9882138540422423e-05, "loss": 0.175, "step": 3881 }, { "epoch": 0.06874842274435841, "grad_norm": 1.1787670850753784, "learning_rate": 2.9882030873629878e-05, "loss": 0.105, "step": 3882 }, { "epoch": 0.06876613228138684, "grad_norm": 1.792516827583313, "learning_rate": 2.9881923157876955e-05, "loss": 0.0993, "step": 3883 }, { "epoch": 0.06878384181841526, "grad_norm": 1.4273302555084229, "learning_rate": 2.988181539316401e-05, "loss": 0.1121, "step": 3884 }, { "epoch": 0.06880155135544369, "grad_norm": 1.4490846395492554, "learning_rate": 2.9881707579491395e-05, "loss": 0.1667, "step": 3885 }, { "epoch": 0.06881926089247212, "grad_norm": 1.920148253440857, "learning_rate": 2.988159971685947e-05, "loss": 0.1457, "step": 3886 }, { "epoch": 0.06883697042950054, "grad_norm": 1.7715448141098022, "learning_rate": 2.9881491805268586e-05, "loss": 0.2153, "step": 3887 }, { "epoch": 0.06885467996652897, "grad_norm": 1.269858956336975, "learning_rate": 2.98813838447191e-05, "loss": 0.1339, "step": 3888 }, { "epoch": 0.06887238950355741, "grad_norm": 1.7747669219970703, "learning_rate": 2.9881275835211365e-05, "loss": 0.2156, "step": 3889 }, { "epoch": 0.06889009904058584, "grad_norm": 1.7691456079483032, "learning_rate": 2.988116777674574e-05, "loss": 0.1655, "step": 3890 }, { "epoch": 0.06890780857761426, "grad_norm": 1.5824836492538452, "learning_rate": 2.9881059669322577e-05, "loss": 0.1703, "step": 3891 }, { "epoch": 0.06892551811464269, "grad_norm": 1.2958669662475586, "learning_rate": 2.988095151294223e-05, "loss": 0.1471, "step": 3892 }, { "epoch": 0.06894322765167112, "grad_norm": 1.18690824508667, "learning_rate": 2.988084330760506e-05, "loss": 0.1741, "step": 3893 }, { "epoch": 0.06896093718869954, "grad_norm": 1.2950782775878906, "learning_rate": 2.9880735053311426e-05, "loss": 0.1386, "step": 3894 }, { "epoch": 0.06897864672572797, "grad_norm": 2.2969517707824707, "learning_rate": 2.988062675006167e-05, "loss": 0.1706, "step": 3895 }, { "epoch": 0.0689963562627564, "grad_norm": 0.7399702668190002, "learning_rate": 2.988051839785616e-05, "loss": 0.1245, "step": 3896 }, { "epoch": 0.06901406579978484, "grad_norm": 1.3495137691497803, "learning_rate": 2.9880409996695254e-05, "loss": 0.1252, "step": 3897 }, { "epoch": 0.06903177533681326, "grad_norm": 1.5085939168930054, "learning_rate": 2.98803015465793e-05, "loss": 0.1168, "step": 3898 }, { "epoch": 0.06904948487384169, "grad_norm": 1.7280679941177368, "learning_rate": 2.9880193047508658e-05, "loss": 0.2059, "step": 3899 }, { "epoch": 0.06906719441087011, "grad_norm": 1.433477520942688, "learning_rate": 2.9880084499483692e-05, "loss": 0.1615, "step": 3900 }, { "epoch": 0.06908490394789854, "grad_norm": 1.8862448930740356, "learning_rate": 2.987997590250475e-05, "loss": 0.1588, "step": 3901 }, { "epoch": 0.06910261348492697, "grad_norm": 1.6545785665512085, "learning_rate": 2.9879867256572183e-05, "loss": 0.1252, "step": 3902 }, { "epoch": 0.0691203230219554, "grad_norm": 1.227459192276001, "learning_rate": 2.9879758561686368e-05, "loss": 0.1356, "step": 3903 }, { "epoch": 0.06913803255898383, "grad_norm": 1.5988513231277466, "learning_rate": 2.9879649817847653e-05, "loss": 0.1191, "step": 3904 }, { "epoch": 0.06915574209601226, "grad_norm": 1.2648663520812988, "learning_rate": 2.987954102505639e-05, "loss": 0.1161, "step": 3905 }, { "epoch": 0.06917345163304069, "grad_norm": 1.343461036682129, "learning_rate": 2.9879432183312944e-05, "loss": 0.1666, "step": 3906 }, { "epoch": 0.06919116117006911, "grad_norm": 2.2490181922912598, "learning_rate": 2.9879323292617672e-05, "loss": 0.1537, "step": 3907 }, { "epoch": 0.06920887070709754, "grad_norm": 1.6114927530288696, "learning_rate": 2.9879214352970926e-05, "loss": 0.1663, "step": 3908 }, { "epoch": 0.06922658024412597, "grad_norm": 1.8309768438339233, "learning_rate": 2.9879105364373076e-05, "loss": 0.1793, "step": 3909 }, { "epoch": 0.06924428978115439, "grad_norm": 1.0754382610321045, "learning_rate": 2.9878996326824473e-05, "loss": 0.2124, "step": 3910 }, { "epoch": 0.06926199931818282, "grad_norm": 1.4278041124343872, "learning_rate": 2.9878887240325477e-05, "loss": 0.1372, "step": 3911 }, { "epoch": 0.06927970885521126, "grad_norm": 1.4211572408676147, "learning_rate": 2.9878778104876444e-05, "loss": 0.1474, "step": 3912 }, { "epoch": 0.06929741839223968, "grad_norm": 1.6605265140533447, "learning_rate": 2.987866892047774e-05, "loss": 0.1126, "step": 3913 }, { "epoch": 0.06931512792926811, "grad_norm": 1.087224006652832, "learning_rate": 2.9878559687129715e-05, "loss": 0.1051, "step": 3914 }, { "epoch": 0.06933283746629654, "grad_norm": 1.3777623176574707, "learning_rate": 2.9878450404832738e-05, "loss": 0.175, "step": 3915 }, { "epoch": 0.06935054700332496, "grad_norm": 1.3892834186553955, "learning_rate": 2.9878341073587163e-05, "loss": 0.1492, "step": 3916 }, { "epoch": 0.06936825654035339, "grad_norm": 1.721418857574463, "learning_rate": 2.987823169339335e-05, "loss": 0.1659, "step": 3917 }, { "epoch": 0.06938596607738182, "grad_norm": 2.2551615238189697, "learning_rate": 2.9878122264251665e-05, "loss": 0.1751, "step": 3918 }, { "epoch": 0.06940367561441026, "grad_norm": 2.125839948654175, "learning_rate": 2.9878012786162456e-05, "loss": 0.1631, "step": 3919 }, { "epoch": 0.06942138515143868, "grad_norm": 1.3031219244003296, "learning_rate": 2.987790325912609e-05, "loss": 0.1699, "step": 3920 }, { "epoch": 0.06943909468846711, "grad_norm": 2.97390079498291, "learning_rate": 2.987779368314293e-05, "loss": 0.1674, "step": 3921 }, { "epoch": 0.06945680422549554, "grad_norm": 2.614903450012207, "learning_rate": 2.9877684058213332e-05, "loss": 0.1679, "step": 3922 }, { "epoch": 0.06947451376252396, "grad_norm": 1.4424026012420654, "learning_rate": 2.9877574384337658e-05, "loss": 0.1744, "step": 3923 }, { "epoch": 0.06949222329955239, "grad_norm": 1.8071929216384888, "learning_rate": 2.9877464661516267e-05, "loss": 0.1404, "step": 3924 }, { "epoch": 0.06950993283658082, "grad_norm": 1.5467219352722168, "learning_rate": 2.9877354889749527e-05, "loss": 0.2392, "step": 3925 }, { "epoch": 0.06952764237360924, "grad_norm": 1.2677325010299683, "learning_rate": 2.9877245069037792e-05, "loss": 0.181, "step": 3926 }, { "epoch": 0.06954535191063768, "grad_norm": 2.0182721614837646, "learning_rate": 2.9877135199381423e-05, "loss": 0.1327, "step": 3927 }, { "epoch": 0.06956306144766611, "grad_norm": 1.365071177482605, "learning_rate": 2.9877025280780787e-05, "loss": 0.1493, "step": 3928 }, { "epoch": 0.06958077098469453, "grad_norm": 2.0421254634857178, "learning_rate": 2.987691531323624e-05, "loss": 0.1332, "step": 3929 }, { "epoch": 0.06959848052172296, "grad_norm": 2.0048370361328125, "learning_rate": 2.987680529674815e-05, "loss": 0.1677, "step": 3930 }, { "epoch": 0.06961619005875139, "grad_norm": 1.2895387411117554, "learning_rate": 2.987669523131687e-05, "loss": 0.1715, "step": 3931 }, { "epoch": 0.06963389959577981, "grad_norm": 1.7755577564239502, "learning_rate": 2.987658511694277e-05, "loss": 0.1434, "step": 3932 }, { "epoch": 0.06965160913280824, "grad_norm": 1.118835687637329, "learning_rate": 2.9876474953626213e-05, "loss": 0.121, "step": 3933 }, { "epoch": 0.06966931866983668, "grad_norm": 0.9271125197410583, "learning_rate": 2.9876364741367558e-05, "loss": 0.128, "step": 3934 }, { "epoch": 0.0696870282068651, "grad_norm": 1.107096552848816, "learning_rate": 2.9876254480167163e-05, "loss": 0.1626, "step": 3935 }, { "epoch": 0.06970473774389353, "grad_norm": 1.79450523853302, "learning_rate": 2.98761441700254e-05, "loss": 0.1567, "step": 3936 }, { "epoch": 0.06972244728092196, "grad_norm": 1.0847728252410889, "learning_rate": 2.987603381094262e-05, "loss": 0.0872, "step": 3937 }, { "epoch": 0.06974015681795039, "grad_norm": 3.9696738719940186, "learning_rate": 2.98759234029192e-05, "loss": 0.1844, "step": 3938 }, { "epoch": 0.06975786635497881, "grad_norm": 1.4718842506408691, "learning_rate": 2.9875812945955495e-05, "loss": 0.1538, "step": 3939 }, { "epoch": 0.06977557589200724, "grad_norm": 1.442906379699707, "learning_rate": 2.9875702440051866e-05, "loss": 0.1109, "step": 3940 }, { "epoch": 0.06979328542903566, "grad_norm": 1.552984356880188, "learning_rate": 2.9875591885208686e-05, "loss": 0.1831, "step": 3941 }, { "epoch": 0.0698109949660641, "grad_norm": 1.5161681175231934, "learning_rate": 2.9875481281426314e-05, "loss": 0.1584, "step": 3942 }, { "epoch": 0.06982870450309253, "grad_norm": 1.3276004791259766, "learning_rate": 2.987537062870511e-05, "loss": 0.1435, "step": 3943 }, { "epoch": 0.06984641404012096, "grad_norm": 1.5873340368270874, "learning_rate": 2.987525992704544e-05, "loss": 0.1684, "step": 3944 }, { "epoch": 0.06986412357714938, "grad_norm": 2.58453369140625, "learning_rate": 2.987514917644767e-05, "loss": 0.1752, "step": 3945 }, { "epoch": 0.06988183311417781, "grad_norm": 1.1931276321411133, "learning_rate": 2.9875038376912167e-05, "loss": 0.16, "step": 3946 }, { "epoch": 0.06989954265120624, "grad_norm": 1.4066574573516846, "learning_rate": 2.987492752843929e-05, "loss": 0.1302, "step": 3947 }, { "epoch": 0.06991725218823466, "grad_norm": 1.6012375354766846, "learning_rate": 2.9874816631029406e-05, "loss": 0.1995, "step": 3948 }, { "epoch": 0.0699349617252631, "grad_norm": 1.6387914419174194, "learning_rate": 2.987470568468288e-05, "loss": 0.1675, "step": 3949 }, { "epoch": 0.06995267126229153, "grad_norm": 1.5010840892791748, "learning_rate": 2.987459468940008e-05, "loss": 0.1674, "step": 3950 }, { "epoch": 0.06997038079931996, "grad_norm": 2.292382001876831, "learning_rate": 2.987448364518136e-05, "loss": 0.1452, "step": 3951 }, { "epoch": 0.06998809033634838, "grad_norm": 1.1259822845458984, "learning_rate": 2.98743725520271e-05, "loss": 0.126, "step": 3952 }, { "epoch": 0.07000579987337681, "grad_norm": 3.032346487045288, "learning_rate": 2.9874261409937658e-05, "loss": 0.1492, "step": 3953 }, { "epoch": 0.07002350941040524, "grad_norm": 2.489605665206909, "learning_rate": 2.9874150218913398e-05, "loss": 0.1683, "step": 3954 }, { "epoch": 0.07004121894743366, "grad_norm": 1.5953425168991089, "learning_rate": 2.987403897895469e-05, "loss": 0.1696, "step": 3955 }, { "epoch": 0.0700589284844621, "grad_norm": 1.430031657218933, "learning_rate": 2.98739276900619e-05, "loss": 0.1727, "step": 3956 }, { "epoch": 0.07007663802149053, "grad_norm": 1.3055676221847534, "learning_rate": 2.9873816352235387e-05, "loss": 0.1953, "step": 3957 }, { "epoch": 0.07009434755851895, "grad_norm": 1.9920300245285034, "learning_rate": 2.9873704965475522e-05, "loss": 0.1502, "step": 3958 }, { "epoch": 0.07011205709554738, "grad_norm": 1.655858039855957, "learning_rate": 2.9873593529782678e-05, "loss": 0.1481, "step": 3959 }, { "epoch": 0.07012976663257581, "grad_norm": 1.4980456829071045, "learning_rate": 2.9873482045157212e-05, "loss": 0.1804, "step": 3960 }, { "epoch": 0.07014747616960423, "grad_norm": 1.1362658739089966, "learning_rate": 2.9873370511599496e-05, "loss": 0.1753, "step": 3961 }, { "epoch": 0.07016518570663266, "grad_norm": 1.5244582891464233, "learning_rate": 2.9873258929109894e-05, "loss": 0.1645, "step": 3962 }, { "epoch": 0.07018289524366109, "grad_norm": 2.6753594875335693, "learning_rate": 2.9873147297688773e-05, "loss": 0.1674, "step": 3963 }, { "epoch": 0.07020060478068953, "grad_norm": 1.8921010494232178, "learning_rate": 2.9873035617336507e-05, "loss": 0.1554, "step": 3964 }, { "epoch": 0.07021831431771795, "grad_norm": 1.1032394170761108, "learning_rate": 2.9872923888053455e-05, "loss": 0.1232, "step": 3965 }, { "epoch": 0.07023602385474638, "grad_norm": 1.2385362386703491, "learning_rate": 2.9872812109839986e-05, "loss": 0.1354, "step": 3966 }, { "epoch": 0.0702537333917748, "grad_norm": 1.3067750930786133, "learning_rate": 2.9872700282696473e-05, "loss": 0.1801, "step": 3967 }, { "epoch": 0.07027144292880323, "grad_norm": 1.6453962326049805, "learning_rate": 2.987258840662328e-05, "loss": 0.1369, "step": 3968 }, { "epoch": 0.07028915246583166, "grad_norm": 1.3964812755584717, "learning_rate": 2.9872476481620776e-05, "loss": 0.1158, "step": 3969 }, { "epoch": 0.07030686200286008, "grad_norm": 1.5723363161087036, "learning_rate": 2.9872364507689323e-05, "loss": 0.1551, "step": 3970 }, { "epoch": 0.07032457153988853, "grad_norm": 1.6038686037063599, "learning_rate": 2.98722524848293e-05, "loss": 0.1336, "step": 3971 }, { "epoch": 0.07034228107691695, "grad_norm": 1.429330587387085, "learning_rate": 2.987214041304107e-05, "loss": 0.1524, "step": 3972 }, { "epoch": 0.07035999061394538, "grad_norm": 2.2596020698547363, "learning_rate": 2.9872028292324997e-05, "loss": 0.1755, "step": 3973 }, { "epoch": 0.0703777001509738, "grad_norm": 1.5968601703643799, "learning_rate": 2.9871916122681465e-05, "loss": 0.1804, "step": 3974 }, { "epoch": 0.07039540968800223, "grad_norm": 1.2973790168762207, "learning_rate": 2.9871803904110824e-05, "loss": 0.1239, "step": 3975 }, { "epoch": 0.07041311922503066, "grad_norm": 1.3511948585510254, "learning_rate": 2.9871691636613456e-05, "loss": 0.1426, "step": 3976 }, { "epoch": 0.07043082876205908, "grad_norm": 1.171542763710022, "learning_rate": 2.987157932018973e-05, "loss": 0.1586, "step": 3977 }, { "epoch": 0.07044853829908751, "grad_norm": 1.9309238195419312, "learning_rate": 2.987146695484001e-05, "loss": 0.1675, "step": 3978 }, { "epoch": 0.07046624783611595, "grad_norm": 1.0977259874343872, "learning_rate": 2.9871354540564664e-05, "loss": 0.1479, "step": 3979 }, { "epoch": 0.07048395737314438, "grad_norm": 1.502425193786621, "learning_rate": 2.987124207736407e-05, "loss": 0.1588, "step": 3980 }, { "epoch": 0.0705016669101728, "grad_norm": 0.9557260274887085, "learning_rate": 2.9871129565238593e-05, "loss": 0.13, "step": 3981 }, { "epoch": 0.07051937644720123, "grad_norm": 1.4679903984069824, "learning_rate": 2.98710170041886e-05, "loss": 0.1093, "step": 3982 }, { "epoch": 0.07053708598422966, "grad_norm": 1.5706796646118164, "learning_rate": 2.987090439421447e-05, "loss": 0.1169, "step": 3983 }, { "epoch": 0.07055479552125808, "grad_norm": 1.4158916473388672, "learning_rate": 2.987079173531657e-05, "loss": 0.1832, "step": 3984 }, { "epoch": 0.07057250505828651, "grad_norm": 1.4582953453063965, "learning_rate": 2.9870679027495263e-05, "loss": 0.1487, "step": 3985 }, { "epoch": 0.07059021459531495, "grad_norm": 1.2967818975448608, "learning_rate": 2.9870566270750933e-05, "loss": 0.1569, "step": 3986 }, { "epoch": 0.07060792413234337, "grad_norm": 1.2300153970718384, "learning_rate": 2.9870453465083937e-05, "loss": 0.1108, "step": 3987 }, { "epoch": 0.0706256336693718, "grad_norm": 1.2790381908416748, "learning_rate": 2.9870340610494657e-05, "loss": 0.15, "step": 3988 }, { "epoch": 0.07064334320640023, "grad_norm": 1.733345866203308, "learning_rate": 2.987022770698346e-05, "loss": 0.1607, "step": 3989 }, { "epoch": 0.07066105274342865, "grad_norm": 2.149080276489258, "learning_rate": 2.9870114754550718e-05, "loss": 0.1545, "step": 3990 }, { "epoch": 0.07067876228045708, "grad_norm": 1.5111172199249268, "learning_rate": 2.9870001753196804e-05, "loss": 0.1775, "step": 3991 }, { "epoch": 0.0706964718174855, "grad_norm": 1.2380390167236328, "learning_rate": 2.986988870292209e-05, "loss": 0.158, "step": 3992 }, { "epoch": 0.07071418135451393, "grad_norm": 1.8414727449417114, "learning_rate": 2.986977560372694e-05, "loss": 0.164, "step": 3993 }, { "epoch": 0.07073189089154237, "grad_norm": 1.4891105890274048, "learning_rate": 2.9869662455611733e-05, "loss": 0.1433, "step": 3994 }, { "epoch": 0.0707496004285708, "grad_norm": 1.1208469867706299, "learning_rate": 2.986954925857684e-05, "loss": 0.1392, "step": 3995 }, { "epoch": 0.07076730996559923, "grad_norm": 1.5277466773986816, "learning_rate": 2.986943601262264e-05, "loss": 0.1478, "step": 3996 }, { "epoch": 0.07078501950262765, "grad_norm": 1.0474953651428223, "learning_rate": 2.9869322717749494e-05, "loss": 0.1414, "step": 3997 }, { "epoch": 0.07080272903965608, "grad_norm": 1.2792561054229736, "learning_rate": 2.9869209373957782e-05, "loss": 0.1405, "step": 3998 }, { "epoch": 0.0708204385766845, "grad_norm": 1.3410563468933105, "learning_rate": 2.9869095981247877e-05, "loss": 0.1788, "step": 3999 }, { "epoch": 0.07083814811371293, "grad_norm": 1.1198670864105225, "learning_rate": 2.9868982539620146e-05, "loss": 0.191, "step": 4000 }, { "epoch": 0.07085585765074137, "grad_norm": 1.2795170545578003, "learning_rate": 2.986886904907497e-05, "loss": 0.1791, "step": 4001 }, { "epoch": 0.0708735671877698, "grad_norm": 2.1251025199890137, "learning_rate": 2.986875550961272e-05, "loss": 0.1448, "step": 4002 }, { "epoch": 0.07089127672479822, "grad_norm": 2.369182825088501, "learning_rate": 2.9868641921233765e-05, "loss": 0.1512, "step": 4003 }, { "epoch": 0.07090898626182665, "grad_norm": 1.2844417095184326, "learning_rate": 2.9868528283938482e-05, "loss": 0.0962, "step": 4004 }, { "epoch": 0.07092669579885508, "grad_norm": 1.6280556917190552, "learning_rate": 2.9868414597727246e-05, "loss": 0.1577, "step": 4005 }, { "epoch": 0.0709444053358835, "grad_norm": 1.4729288816452026, "learning_rate": 2.9868300862600427e-05, "loss": 0.1649, "step": 4006 }, { "epoch": 0.07096211487291193, "grad_norm": 1.4706521034240723, "learning_rate": 2.9868187078558407e-05, "loss": 0.1551, "step": 4007 }, { "epoch": 0.07097982440994036, "grad_norm": 1.4837641716003418, "learning_rate": 2.9868073245601552e-05, "loss": 0.161, "step": 4008 }, { "epoch": 0.0709975339469688, "grad_norm": 1.946671485900879, "learning_rate": 2.986795936373024e-05, "loss": 0.1784, "step": 4009 }, { "epoch": 0.07101524348399722, "grad_norm": 2.034782648086548, "learning_rate": 2.9867845432944847e-05, "loss": 0.1521, "step": 4010 }, { "epoch": 0.07103295302102565, "grad_norm": 1.6773700714111328, "learning_rate": 2.9867731453245745e-05, "loss": 0.1707, "step": 4011 }, { "epoch": 0.07105066255805408, "grad_norm": 1.4470373392105103, "learning_rate": 2.986761742463331e-05, "loss": 0.1127, "step": 4012 }, { "epoch": 0.0710683720950825, "grad_norm": 1.3742108345031738, "learning_rate": 2.9867503347107915e-05, "loss": 0.1641, "step": 4013 }, { "epoch": 0.07108608163211093, "grad_norm": 1.854792594909668, "learning_rate": 2.9867389220669943e-05, "loss": 0.1812, "step": 4014 }, { "epoch": 0.07110379116913935, "grad_norm": 1.2954860925674438, "learning_rate": 2.9867275045319762e-05, "loss": 0.1596, "step": 4015 }, { "epoch": 0.0711215007061678, "grad_norm": 2.2348930835723877, "learning_rate": 2.9867160821057752e-05, "loss": 0.1918, "step": 4016 }, { "epoch": 0.07113921024319622, "grad_norm": 2.069833755493164, "learning_rate": 2.986704654788428e-05, "loss": 0.1443, "step": 4017 }, { "epoch": 0.07115691978022465, "grad_norm": 1.1327412128448486, "learning_rate": 2.9866932225799737e-05, "loss": 0.1061, "step": 4018 }, { "epoch": 0.07117462931725307, "grad_norm": 4.948402404785156, "learning_rate": 2.9866817854804485e-05, "loss": 0.1995, "step": 4019 }, { "epoch": 0.0711923388542815, "grad_norm": 2.0198471546173096, "learning_rate": 2.9866703434898907e-05, "loss": 0.1337, "step": 4020 }, { "epoch": 0.07121004839130993, "grad_norm": 1.4340624809265137, "learning_rate": 2.986658896608338e-05, "loss": 0.1197, "step": 4021 }, { "epoch": 0.07122775792833835, "grad_norm": 1.5408352613449097, "learning_rate": 2.986647444835828e-05, "loss": 0.171, "step": 4022 }, { "epoch": 0.0712454674653668, "grad_norm": 1.452042818069458, "learning_rate": 2.986635988172398e-05, "loss": 0.1647, "step": 4023 }, { "epoch": 0.07126317700239522, "grad_norm": 1.853063702583313, "learning_rate": 2.986624526618086e-05, "loss": 0.1938, "step": 4024 }, { "epoch": 0.07128088653942365, "grad_norm": 1.5193265676498413, "learning_rate": 2.9866130601729296e-05, "loss": 0.1863, "step": 4025 }, { "epoch": 0.07129859607645207, "grad_norm": 2.1218230724334717, "learning_rate": 2.986601588836967e-05, "loss": 0.1902, "step": 4026 }, { "epoch": 0.0713163056134805, "grad_norm": 1.2158153057098389, "learning_rate": 2.986590112610235e-05, "loss": 0.142, "step": 4027 }, { "epoch": 0.07133401515050893, "grad_norm": 1.3403139114379883, "learning_rate": 2.9865786314927724e-05, "loss": 0.1546, "step": 4028 }, { "epoch": 0.07135172468753735, "grad_norm": 1.6262974739074707, "learning_rate": 2.986567145484616e-05, "loss": 0.1992, "step": 4029 }, { "epoch": 0.07136943422456578, "grad_norm": 1.954994797706604, "learning_rate": 2.986555654585804e-05, "loss": 0.1412, "step": 4030 }, { "epoch": 0.07138714376159422, "grad_norm": 1.2846873998641968, "learning_rate": 2.9865441587963745e-05, "loss": 0.1264, "step": 4031 }, { "epoch": 0.07140485329862264, "grad_norm": 1.7008992433547974, "learning_rate": 2.9865326581163655e-05, "loss": 0.1868, "step": 4032 }, { "epoch": 0.07142256283565107, "grad_norm": 1.6775014400482178, "learning_rate": 2.986521152545814e-05, "loss": 0.1301, "step": 4033 }, { "epoch": 0.0714402723726795, "grad_norm": 1.9645167589187622, "learning_rate": 2.986509642084758e-05, "loss": 0.1407, "step": 4034 }, { "epoch": 0.07145798190970792, "grad_norm": 2.184846878051758, "learning_rate": 2.9864981267332357e-05, "loss": 0.1738, "step": 4035 }, { "epoch": 0.07147569144673635, "grad_norm": 1.3016917705535889, "learning_rate": 2.986486606491285e-05, "loss": 0.1849, "step": 4036 }, { "epoch": 0.07149340098376478, "grad_norm": 1.3133175373077393, "learning_rate": 2.986475081358944e-05, "loss": 0.1642, "step": 4037 }, { "epoch": 0.07151111052079322, "grad_norm": 1.2998923063278198, "learning_rate": 2.98646355133625e-05, "loss": 0.1218, "step": 4038 }, { "epoch": 0.07152882005782164, "grad_norm": 1.7598696947097778, "learning_rate": 2.9864520164232417e-05, "loss": 0.1678, "step": 4039 }, { "epoch": 0.07154652959485007, "grad_norm": 1.0239912271499634, "learning_rate": 2.986440476619956e-05, "loss": 0.1124, "step": 4040 }, { "epoch": 0.0715642391318785, "grad_norm": 1.478246808052063, "learning_rate": 2.9864289319264314e-05, "loss": 0.1234, "step": 4041 }, { "epoch": 0.07158194866890692, "grad_norm": 1.3784600496292114, "learning_rate": 2.9864173823427068e-05, "loss": 0.1705, "step": 4042 }, { "epoch": 0.07159965820593535, "grad_norm": 1.4083921909332275, "learning_rate": 2.9864058278688188e-05, "loss": 0.1601, "step": 4043 }, { "epoch": 0.07161736774296378, "grad_norm": 1.8369691371917725, "learning_rate": 2.986394268504806e-05, "loss": 0.1754, "step": 4044 }, { "epoch": 0.0716350772799922, "grad_norm": 1.2060625553131104, "learning_rate": 2.9863827042507065e-05, "loss": 0.1241, "step": 4045 }, { "epoch": 0.07165278681702064, "grad_norm": 1.4171020984649658, "learning_rate": 2.9863711351065582e-05, "loss": 0.176, "step": 4046 }, { "epoch": 0.07167049635404907, "grad_norm": 2.2152833938598633, "learning_rate": 2.9863595610723992e-05, "loss": 0.1584, "step": 4047 }, { "epoch": 0.0716882058910775, "grad_norm": 1.4761013984680176, "learning_rate": 2.9863479821482675e-05, "loss": 0.1626, "step": 4048 }, { "epoch": 0.07170591542810592, "grad_norm": 2.6453921794891357, "learning_rate": 2.9863363983342014e-05, "loss": 0.2047, "step": 4049 }, { "epoch": 0.07172362496513435, "grad_norm": 1.491808295249939, "learning_rate": 2.9863248096302385e-05, "loss": 0.1532, "step": 4050 }, { "epoch": 0.07174133450216277, "grad_norm": 1.3998103141784668, "learning_rate": 2.9863132160364177e-05, "loss": 0.1638, "step": 4051 }, { "epoch": 0.0717590440391912, "grad_norm": 2.177239418029785, "learning_rate": 2.9863016175527767e-05, "loss": 0.165, "step": 4052 }, { "epoch": 0.07177675357621964, "grad_norm": 1.23148512840271, "learning_rate": 2.9862900141793537e-05, "loss": 0.1524, "step": 4053 }, { "epoch": 0.07179446311324807, "grad_norm": 1.346524715423584, "learning_rate": 2.986278405916187e-05, "loss": 0.2046, "step": 4054 }, { "epoch": 0.07181217265027649, "grad_norm": 2.170154094696045, "learning_rate": 2.9862667927633144e-05, "loss": 0.1776, "step": 4055 }, { "epoch": 0.07182988218730492, "grad_norm": 1.1542731523513794, "learning_rate": 2.9862551747207743e-05, "loss": 0.1435, "step": 4056 }, { "epoch": 0.07184759172433335, "grad_norm": 1.8180429935455322, "learning_rate": 2.9862435517886057e-05, "loss": 0.1286, "step": 4057 }, { "epoch": 0.07186530126136177, "grad_norm": 1.264263391494751, "learning_rate": 2.9862319239668456e-05, "loss": 0.1402, "step": 4058 }, { "epoch": 0.0718830107983902, "grad_norm": 1.805871605873108, "learning_rate": 2.986220291255533e-05, "loss": 0.1961, "step": 4059 }, { "epoch": 0.07190072033541862, "grad_norm": 1.5859441757202148, "learning_rate": 2.986208653654706e-05, "loss": 0.1045, "step": 4060 }, { "epoch": 0.07191842987244706, "grad_norm": 1.330101728439331, "learning_rate": 2.9861970111644022e-05, "loss": 0.1532, "step": 4061 }, { "epoch": 0.07193613940947549, "grad_norm": 1.182384967803955, "learning_rate": 2.9861853637846612e-05, "loss": 0.1701, "step": 4062 }, { "epoch": 0.07195384894650392, "grad_norm": 2.560502052307129, "learning_rate": 2.9861737115155204e-05, "loss": 0.1611, "step": 4063 }, { "epoch": 0.07197155848353234, "grad_norm": 1.0750387907028198, "learning_rate": 2.9861620543570184e-05, "loss": 0.1383, "step": 4064 }, { "epoch": 0.07198926802056077, "grad_norm": 1.2799186706542969, "learning_rate": 2.986150392309194e-05, "loss": 0.1235, "step": 4065 }, { "epoch": 0.0720069775575892, "grad_norm": 1.871860146522522, "learning_rate": 2.9861387253720848e-05, "loss": 0.1237, "step": 4066 }, { "epoch": 0.07202468709461762, "grad_norm": 1.4929717779159546, "learning_rate": 2.986127053545729e-05, "loss": 0.2347, "step": 4067 }, { "epoch": 0.07204239663164606, "grad_norm": 1.6912585496902466, "learning_rate": 2.986115376830166e-05, "loss": 0.1371, "step": 4068 }, { "epoch": 0.07206010616867449, "grad_norm": 1.466995358467102, "learning_rate": 2.986103695225434e-05, "loss": 0.1484, "step": 4069 }, { "epoch": 0.07207781570570292, "grad_norm": 1.2948859930038452, "learning_rate": 2.9860920087315706e-05, "loss": 0.1236, "step": 4070 }, { "epoch": 0.07209552524273134, "grad_norm": 1.5800844430923462, "learning_rate": 2.9860803173486153e-05, "loss": 0.178, "step": 4071 }, { "epoch": 0.07211323477975977, "grad_norm": 1.1408993005752563, "learning_rate": 2.9860686210766058e-05, "loss": 0.1251, "step": 4072 }, { "epoch": 0.0721309443167882, "grad_norm": 2.2463107109069824, "learning_rate": 2.9860569199155808e-05, "loss": 0.188, "step": 4073 }, { "epoch": 0.07214865385381662, "grad_norm": 1.3589656352996826, "learning_rate": 2.9860452138655792e-05, "loss": 0.1737, "step": 4074 }, { "epoch": 0.07216636339084505, "grad_norm": 1.9659919738769531, "learning_rate": 2.9860335029266386e-05, "loss": 0.1519, "step": 4075 }, { "epoch": 0.07218407292787349, "grad_norm": 2.0572311878204346, "learning_rate": 2.9860217870987983e-05, "loss": 0.1886, "step": 4076 }, { "epoch": 0.07220178246490191, "grad_norm": 1.068573236465454, "learning_rate": 2.986010066382097e-05, "loss": 0.1275, "step": 4077 }, { "epoch": 0.07221949200193034, "grad_norm": 1.4680932760238647, "learning_rate": 2.9859983407765722e-05, "loss": 0.1162, "step": 4078 }, { "epoch": 0.07223720153895877, "grad_norm": 1.2757065296173096, "learning_rate": 2.9859866102822636e-05, "loss": 0.1364, "step": 4079 }, { "epoch": 0.0722549110759872, "grad_norm": 2.0835635662078857, "learning_rate": 2.9859748748992095e-05, "loss": 0.1535, "step": 4080 }, { "epoch": 0.07227262061301562, "grad_norm": 2.0287926197052, "learning_rate": 2.9859631346274478e-05, "loss": 0.1262, "step": 4081 }, { "epoch": 0.07229033015004405, "grad_norm": 1.5187097787857056, "learning_rate": 2.9859513894670182e-05, "loss": 0.2049, "step": 4082 }, { "epoch": 0.07230803968707249, "grad_norm": 1.5226517915725708, "learning_rate": 2.9859396394179582e-05, "loss": 0.1426, "step": 4083 }, { "epoch": 0.07232574922410091, "grad_norm": 2.296311378479004, "learning_rate": 2.9859278844803076e-05, "loss": 0.197, "step": 4084 }, { "epoch": 0.07234345876112934, "grad_norm": 1.842989444732666, "learning_rate": 2.985916124654105e-05, "loss": 0.1712, "step": 4085 }, { "epoch": 0.07236116829815777, "grad_norm": 1.435959815979004, "learning_rate": 2.9859043599393877e-05, "loss": 0.1653, "step": 4086 }, { "epoch": 0.07237887783518619, "grad_norm": 0.8836579322814941, "learning_rate": 2.9858925903361958e-05, "loss": 0.135, "step": 4087 }, { "epoch": 0.07239658737221462, "grad_norm": 1.4243309497833252, "learning_rate": 2.9858808158445673e-05, "loss": 0.1873, "step": 4088 }, { "epoch": 0.07241429690924304, "grad_norm": 0.9943320155143738, "learning_rate": 2.9858690364645416e-05, "loss": 0.1313, "step": 4089 }, { "epoch": 0.07243200644627149, "grad_norm": 1.5376390218734741, "learning_rate": 2.985857252196157e-05, "loss": 0.1661, "step": 4090 }, { "epoch": 0.07244971598329991, "grad_norm": 1.3052347898483276, "learning_rate": 2.985845463039452e-05, "loss": 0.172, "step": 4091 }, { "epoch": 0.07246742552032834, "grad_norm": 1.5403262376785278, "learning_rate": 2.985833668994466e-05, "loss": 0.17, "step": 4092 }, { "epoch": 0.07248513505735676, "grad_norm": 1.9466700553894043, "learning_rate": 2.9858218700612376e-05, "loss": 0.1706, "step": 4093 }, { "epoch": 0.07250284459438519, "grad_norm": 1.3492480516433716, "learning_rate": 2.9858100662398052e-05, "loss": 0.1059, "step": 4094 }, { "epoch": 0.07252055413141362, "grad_norm": 0.8755335211753845, "learning_rate": 2.9857982575302083e-05, "loss": 0.1372, "step": 4095 }, { "epoch": 0.07253826366844204, "grad_norm": 1.2907289266586304, "learning_rate": 2.9857864439324852e-05, "loss": 0.1335, "step": 4096 }, { "epoch": 0.07255597320547047, "grad_norm": 1.2421715259552002, "learning_rate": 2.9857746254466752e-05, "loss": 0.1595, "step": 4097 }, { "epoch": 0.07257368274249891, "grad_norm": 2.1796231269836426, "learning_rate": 2.9857628020728164e-05, "loss": 0.1431, "step": 4098 }, { "epoch": 0.07259139227952734, "grad_norm": 1.78104567527771, "learning_rate": 2.9857509738109485e-05, "loss": 0.1474, "step": 4099 }, { "epoch": 0.07260910181655576, "grad_norm": 1.674487590789795, "learning_rate": 2.9857391406611104e-05, "loss": 0.1632, "step": 4100 }, { "epoch": 0.07262681135358419, "grad_norm": 1.3143317699432373, "learning_rate": 2.985727302623341e-05, "loss": 0.1865, "step": 4101 }, { "epoch": 0.07264452089061262, "grad_norm": 2.2710330486297607, "learning_rate": 2.9857154596976783e-05, "loss": 0.1839, "step": 4102 }, { "epoch": 0.07266223042764104, "grad_norm": 1.3874318599700928, "learning_rate": 2.9857036118841627e-05, "loss": 0.1469, "step": 4103 }, { "epoch": 0.07267993996466947, "grad_norm": 2.049912691116333, "learning_rate": 2.9856917591828322e-05, "loss": 0.149, "step": 4104 }, { "epoch": 0.07269764950169791, "grad_norm": 1.493552803993225, "learning_rate": 2.9856799015937263e-05, "loss": 0.1586, "step": 4105 }, { "epoch": 0.07271535903872633, "grad_norm": 1.9259716272354126, "learning_rate": 2.9856680391168834e-05, "loss": 0.1792, "step": 4106 }, { "epoch": 0.07273306857575476, "grad_norm": 1.5148409605026245, "learning_rate": 2.985656171752343e-05, "loss": 0.1711, "step": 4107 }, { "epoch": 0.07275077811278319, "grad_norm": 1.2389249801635742, "learning_rate": 2.9856442995001444e-05, "loss": 0.1054, "step": 4108 }, { "epoch": 0.07276848764981161, "grad_norm": 1.1746454238891602, "learning_rate": 2.985632422360326e-05, "loss": 0.1616, "step": 4109 }, { "epoch": 0.07278619718684004, "grad_norm": 1.368612289428711, "learning_rate": 2.9856205403329274e-05, "loss": 0.121, "step": 4110 }, { "epoch": 0.07280390672386847, "grad_norm": 0.9961474537849426, "learning_rate": 2.9856086534179873e-05, "loss": 0.1541, "step": 4111 }, { "epoch": 0.0728216162608969, "grad_norm": 1.444512128829956, "learning_rate": 2.9855967616155452e-05, "loss": 0.1534, "step": 4112 }, { "epoch": 0.07283932579792533, "grad_norm": 1.2424683570861816, "learning_rate": 2.98558486492564e-05, "loss": 0.1522, "step": 4113 }, { "epoch": 0.07285703533495376, "grad_norm": 1.678479552268982, "learning_rate": 2.9855729633483106e-05, "loss": 0.1245, "step": 4114 }, { "epoch": 0.07287474487198219, "grad_norm": 1.256824016571045, "learning_rate": 2.9855610568835967e-05, "loss": 0.141, "step": 4115 }, { "epoch": 0.07289245440901061, "grad_norm": 1.7176769971847534, "learning_rate": 2.9855491455315365e-05, "loss": 0.1649, "step": 4116 }, { "epoch": 0.07291016394603904, "grad_norm": 2.1178600788116455, "learning_rate": 2.985537229292171e-05, "loss": 0.132, "step": 4117 }, { "epoch": 0.07292787348306747, "grad_norm": 1.2410225868225098, "learning_rate": 2.9855253081655375e-05, "loss": 0.1128, "step": 4118 }, { "epoch": 0.07294558302009589, "grad_norm": 1.9519280195236206, "learning_rate": 2.985513382151676e-05, "loss": 0.2062, "step": 4119 }, { "epoch": 0.07296329255712433, "grad_norm": 1.4975107908248901, "learning_rate": 2.985501451250626e-05, "loss": 0.1511, "step": 4120 }, { "epoch": 0.07298100209415276, "grad_norm": 1.588769793510437, "learning_rate": 2.9854895154624263e-05, "loss": 0.1355, "step": 4121 }, { "epoch": 0.07299871163118118, "grad_norm": 1.4877781867980957, "learning_rate": 2.9854775747871165e-05, "loss": 0.1881, "step": 4122 }, { "epoch": 0.07301642116820961, "grad_norm": 1.5752986669540405, "learning_rate": 2.9854656292247353e-05, "loss": 0.1427, "step": 4123 }, { "epoch": 0.07303413070523804, "grad_norm": 3.091315507888794, "learning_rate": 2.985453678775323e-05, "loss": 0.1923, "step": 4124 }, { "epoch": 0.07305184024226646, "grad_norm": 1.1564953327178955, "learning_rate": 2.985441723438918e-05, "loss": 0.117, "step": 4125 }, { "epoch": 0.07306954977929489, "grad_norm": 1.6963253021240234, "learning_rate": 2.9854297632155598e-05, "loss": 0.1367, "step": 4126 }, { "epoch": 0.07308725931632332, "grad_norm": 1.3734720945358276, "learning_rate": 2.9854177981052885e-05, "loss": 0.1152, "step": 4127 }, { "epoch": 0.07310496885335176, "grad_norm": 0.9920089244842529, "learning_rate": 2.9854058281081424e-05, "loss": 0.1312, "step": 4128 }, { "epoch": 0.07312267839038018, "grad_norm": 1.2679880857467651, "learning_rate": 2.9853938532241612e-05, "loss": 0.1523, "step": 4129 }, { "epoch": 0.07314038792740861, "grad_norm": 3.2829041481018066, "learning_rate": 2.9853818734533854e-05, "loss": 0.1917, "step": 4130 }, { "epoch": 0.07315809746443704, "grad_norm": 1.1895750761032104, "learning_rate": 2.9853698887958526e-05, "loss": 0.133, "step": 4131 }, { "epoch": 0.07317580700146546, "grad_norm": 1.6827993392944336, "learning_rate": 2.9853578992516034e-05, "loss": 0.1661, "step": 4132 }, { "epoch": 0.07319351653849389, "grad_norm": 1.1947362422943115, "learning_rate": 2.9853459048206772e-05, "loss": 0.1494, "step": 4133 }, { "epoch": 0.07321122607552231, "grad_norm": 2.636570930480957, "learning_rate": 2.985333905503113e-05, "loss": 0.1687, "step": 4134 }, { "epoch": 0.07322893561255076, "grad_norm": 2.1882412433624268, "learning_rate": 2.98532190129895e-05, "loss": 0.2123, "step": 4135 }, { "epoch": 0.07324664514957918, "grad_norm": 1.0026859045028687, "learning_rate": 2.9853098922082287e-05, "loss": 0.1356, "step": 4136 }, { "epoch": 0.07326435468660761, "grad_norm": 1.6965452432632446, "learning_rate": 2.985297878230988e-05, "loss": 0.1849, "step": 4137 }, { "epoch": 0.07328206422363603, "grad_norm": 2.0245442390441895, "learning_rate": 2.9852858593672672e-05, "loss": 0.146, "step": 4138 }, { "epoch": 0.07329977376066446, "grad_norm": 1.7164568901062012, "learning_rate": 2.9852738356171066e-05, "loss": 0.2188, "step": 4139 }, { "epoch": 0.07331748329769289, "grad_norm": 1.5984865427017212, "learning_rate": 2.9852618069805454e-05, "loss": 0.1665, "step": 4140 }, { "epoch": 0.07333519283472131, "grad_norm": 1.7769005298614502, "learning_rate": 2.9852497734576226e-05, "loss": 0.1591, "step": 4141 }, { "epoch": 0.07335290237174974, "grad_norm": 1.9012269973754883, "learning_rate": 2.9852377350483785e-05, "loss": 0.1347, "step": 4142 }, { "epoch": 0.07337061190877818, "grad_norm": 1.21457839012146, "learning_rate": 2.9852256917528523e-05, "loss": 0.1704, "step": 4143 }, { "epoch": 0.0733883214458066, "grad_norm": 0.9558283090591431, "learning_rate": 2.9852136435710838e-05, "loss": 0.1165, "step": 4144 }, { "epoch": 0.07340603098283503, "grad_norm": 2.1710426807403564, "learning_rate": 2.985201590503113e-05, "loss": 0.1694, "step": 4145 }, { "epoch": 0.07342374051986346, "grad_norm": 1.4617639780044556, "learning_rate": 2.9851895325489792e-05, "loss": 0.1309, "step": 4146 }, { "epoch": 0.07344145005689189, "grad_norm": 1.5401562452316284, "learning_rate": 2.9851774697087218e-05, "loss": 0.1459, "step": 4147 }, { "epoch": 0.07345915959392031, "grad_norm": 1.3798205852508545, "learning_rate": 2.9851654019823805e-05, "loss": 0.1322, "step": 4148 }, { "epoch": 0.07347686913094874, "grad_norm": 1.1663272380828857, "learning_rate": 2.9851533293699954e-05, "loss": 0.1428, "step": 4149 }, { "epoch": 0.07349457866797718, "grad_norm": 1.7707023620605469, "learning_rate": 2.9851412518716065e-05, "loss": 0.1742, "step": 4150 }, { "epoch": 0.0735122882050056, "grad_norm": 2.0090105533599854, "learning_rate": 2.9851291694872527e-05, "loss": 0.2369, "step": 4151 }, { "epoch": 0.07352999774203403, "grad_norm": 1.3846465349197388, "learning_rate": 2.985117082216974e-05, "loss": 0.1475, "step": 4152 }, { "epoch": 0.07354770727906246, "grad_norm": 2.339008331298828, "learning_rate": 2.9851049900608106e-05, "loss": 0.1624, "step": 4153 }, { "epoch": 0.07356541681609088, "grad_norm": 1.0256397724151611, "learning_rate": 2.9850928930188017e-05, "loss": 0.1849, "step": 4154 }, { "epoch": 0.07358312635311931, "grad_norm": 3.2130651473999023, "learning_rate": 2.9850807910909875e-05, "loss": 0.1634, "step": 4155 }, { "epoch": 0.07360083589014774, "grad_norm": 1.5399879217147827, "learning_rate": 2.985068684277408e-05, "loss": 0.1954, "step": 4156 }, { "epoch": 0.07361854542717618, "grad_norm": 1.5561326742172241, "learning_rate": 2.9850565725781024e-05, "loss": 0.1552, "step": 4157 }, { "epoch": 0.0736362549642046, "grad_norm": 1.29883873462677, "learning_rate": 2.985044455993111e-05, "loss": 0.1558, "step": 4158 }, { "epoch": 0.07365396450123303, "grad_norm": 1.2969094514846802, "learning_rate": 2.9850323345224738e-05, "loss": 0.1587, "step": 4159 }, { "epoch": 0.07367167403826146, "grad_norm": 1.3734804391860962, "learning_rate": 2.98502020816623e-05, "loss": 0.1599, "step": 4160 }, { "epoch": 0.07368938357528988, "grad_norm": 1.7332912683486938, "learning_rate": 2.9850080769244202e-05, "loss": 0.2168, "step": 4161 }, { "epoch": 0.07370709311231831, "grad_norm": 1.150794267654419, "learning_rate": 2.984995940797084e-05, "loss": 0.1236, "step": 4162 }, { "epoch": 0.07372480264934674, "grad_norm": 1.1099706888198853, "learning_rate": 2.984983799784261e-05, "loss": 0.1001, "step": 4163 }, { "epoch": 0.07374251218637516, "grad_norm": 1.202277660369873, "learning_rate": 2.9849716538859922e-05, "loss": 0.1264, "step": 4164 }, { "epoch": 0.0737602217234036, "grad_norm": 1.5476372241973877, "learning_rate": 2.9849595031023164e-05, "loss": 0.1248, "step": 4165 }, { "epoch": 0.07377793126043203, "grad_norm": 1.395337462425232, "learning_rate": 2.9849473474332737e-05, "loss": 0.1755, "step": 4166 }, { "epoch": 0.07379564079746045, "grad_norm": 1.217400312423706, "learning_rate": 2.984935186878905e-05, "loss": 0.1784, "step": 4167 }, { "epoch": 0.07381335033448888, "grad_norm": 2.2879583835601807, "learning_rate": 2.9849230214392493e-05, "loss": 0.1432, "step": 4168 }, { "epoch": 0.07383105987151731, "grad_norm": 1.8344403505325317, "learning_rate": 2.9849108511143476e-05, "loss": 0.1874, "step": 4169 }, { "epoch": 0.07384876940854573, "grad_norm": 1.8901922702789307, "learning_rate": 2.9848986759042385e-05, "loss": 0.1576, "step": 4170 }, { "epoch": 0.07386647894557416, "grad_norm": 3.203404664993286, "learning_rate": 2.9848864958089636e-05, "loss": 0.1519, "step": 4171 }, { "epoch": 0.0738841884826026, "grad_norm": 1.0511271953582764, "learning_rate": 2.9848743108285622e-05, "loss": 0.1676, "step": 4172 }, { "epoch": 0.07390189801963103, "grad_norm": 1.555920958518982, "learning_rate": 2.9848621209630746e-05, "loss": 0.1384, "step": 4173 }, { "epoch": 0.07391960755665945, "grad_norm": 1.649993658065796, "learning_rate": 2.984849926212541e-05, "loss": 0.2127, "step": 4174 }, { "epoch": 0.07393731709368788, "grad_norm": 1.4906638860702515, "learning_rate": 2.984837726577001e-05, "loss": 0.1424, "step": 4175 }, { "epoch": 0.0739550266307163, "grad_norm": 1.2208117246627808, "learning_rate": 2.9848255220564954e-05, "loss": 0.1285, "step": 4176 }, { "epoch": 0.07397273616774473, "grad_norm": 1.1075928211212158, "learning_rate": 2.9848133126510632e-05, "loss": 0.135, "step": 4177 }, { "epoch": 0.07399044570477316, "grad_norm": 1.609646201133728, "learning_rate": 2.9848010983607462e-05, "loss": 0.131, "step": 4178 }, { "epoch": 0.07400815524180158, "grad_norm": 1.270715594291687, "learning_rate": 2.9847888791855835e-05, "loss": 0.1285, "step": 4179 }, { "epoch": 0.07402586477883003, "grad_norm": 1.3638231754302979, "learning_rate": 2.9847766551256153e-05, "loss": 0.1481, "step": 4180 }, { "epoch": 0.07404357431585845, "grad_norm": 2.087451934814453, "learning_rate": 2.9847644261808823e-05, "loss": 0.1547, "step": 4181 }, { "epoch": 0.07406128385288688, "grad_norm": 1.5174158811569214, "learning_rate": 2.9847521923514245e-05, "loss": 0.1377, "step": 4182 }, { "epoch": 0.0740789933899153, "grad_norm": 2.2721548080444336, "learning_rate": 2.9847399536372825e-05, "loss": 0.1913, "step": 4183 }, { "epoch": 0.07409670292694373, "grad_norm": 2.1303882598876953, "learning_rate": 2.9847277100384956e-05, "loss": 0.2023, "step": 4184 }, { "epoch": 0.07411441246397216, "grad_norm": 1.308815598487854, "learning_rate": 2.9847154615551052e-05, "loss": 0.1163, "step": 4185 }, { "epoch": 0.07413212200100058, "grad_norm": 1.4858222007751465, "learning_rate": 2.984703208187151e-05, "loss": 0.1666, "step": 4186 }, { "epoch": 0.07414983153802902, "grad_norm": 1.7277708053588867, "learning_rate": 2.9846909499346734e-05, "loss": 0.1814, "step": 4187 }, { "epoch": 0.07416754107505745, "grad_norm": 1.5106943845748901, "learning_rate": 2.9846786867977123e-05, "loss": 0.1769, "step": 4188 }, { "epoch": 0.07418525061208588, "grad_norm": 1.523970127105713, "learning_rate": 2.984666418776309e-05, "loss": 0.1016, "step": 4189 }, { "epoch": 0.0742029601491143, "grad_norm": 1.2294232845306396, "learning_rate": 2.984654145870503e-05, "loss": 0.1199, "step": 4190 }, { "epoch": 0.07422066968614273, "grad_norm": 1.6276929378509521, "learning_rate": 2.9846418680803353e-05, "loss": 0.1247, "step": 4191 }, { "epoch": 0.07423837922317116, "grad_norm": 2.038370132446289, "learning_rate": 2.984629585405846e-05, "loss": 0.1754, "step": 4192 }, { "epoch": 0.07425608876019958, "grad_norm": 0.9523389935493469, "learning_rate": 2.9846172978470755e-05, "loss": 0.1272, "step": 4193 }, { "epoch": 0.07427379829722801, "grad_norm": 1.4314943552017212, "learning_rate": 2.9846050054040643e-05, "loss": 0.1336, "step": 4194 }, { "epoch": 0.07429150783425645, "grad_norm": 2.07018780708313, "learning_rate": 2.9845927080768524e-05, "loss": 0.19, "step": 4195 }, { "epoch": 0.07430921737128487, "grad_norm": 1.8038734197616577, "learning_rate": 2.984580405865481e-05, "loss": 0.1565, "step": 4196 }, { "epoch": 0.0743269269083133, "grad_norm": 1.0642645359039307, "learning_rate": 2.9845680987699902e-05, "loss": 0.1296, "step": 4197 }, { "epoch": 0.07434463644534173, "grad_norm": 1.8026396036148071, "learning_rate": 2.9845557867904205e-05, "loss": 0.177, "step": 4198 }, { "epoch": 0.07436234598237015, "grad_norm": 1.436820387840271, "learning_rate": 2.9845434699268124e-05, "loss": 0.1953, "step": 4199 }, { "epoch": 0.07438005551939858, "grad_norm": 1.953382134437561, "learning_rate": 2.9845311481792066e-05, "loss": 0.1472, "step": 4200 }, { "epoch": 0.074397765056427, "grad_norm": 1.1233997344970703, "learning_rate": 2.9845188215476434e-05, "loss": 0.1242, "step": 4201 }, { "epoch": 0.07441547459345545, "grad_norm": 1.2158149480819702, "learning_rate": 2.9845064900321635e-05, "loss": 0.1319, "step": 4202 }, { "epoch": 0.07443318413048387, "grad_norm": 1.2234477996826172, "learning_rate": 2.984494153632807e-05, "loss": 0.1662, "step": 4203 }, { "epoch": 0.0744508936675123, "grad_norm": 1.3815051317214966, "learning_rate": 2.9844818123496155e-05, "loss": 0.0901, "step": 4204 }, { "epoch": 0.07446860320454073, "grad_norm": 1.229468584060669, "learning_rate": 2.9844694661826282e-05, "loss": 0.1175, "step": 4205 }, { "epoch": 0.07448631274156915, "grad_norm": 1.3646680116653442, "learning_rate": 2.984457115131887e-05, "loss": 0.1447, "step": 4206 }, { "epoch": 0.07450402227859758, "grad_norm": 1.3041797876358032, "learning_rate": 2.984444759197432e-05, "loss": 0.1482, "step": 4207 }, { "epoch": 0.074521731815626, "grad_norm": 1.3000168800354004, "learning_rate": 2.9844323983793038e-05, "loss": 0.1178, "step": 4208 }, { "epoch": 0.07453944135265443, "grad_norm": 2.5189383029937744, "learning_rate": 2.9844200326775433e-05, "loss": 0.2193, "step": 4209 }, { "epoch": 0.07455715088968287, "grad_norm": 1.2911686897277832, "learning_rate": 2.9844076620921905e-05, "loss": 0.1574, "step": 4210 }, { "epoch": 0.0745748604267113, "grad_norm": 1.7710037231445312, "learning_rate": 2.9843952866232873e-05, "loss": 0.2154, "step": 4211 }, { "epoch": 0.07459256996373972, "grad_norm": 1.3314610719680786, "learning_rate": 2.9843829062708736e-05, "loss": 0.1203, "step": 4212 }, { "epoch": 0.07461027950076815, "grad_norm": 1.6995680332183838, "learning_rate": 2.98437052103499e-05, "loss": 0.1904, "step": 4213 }, { "epoch": 0.07462798903779658, "grad_norm": 1.3654162883758545, "learning_rate": 2.9843581309156775e-05, "loss": 0.1076, "step": 4214 }, { "epoch": 0.074645698574825, "grad_norm": 1.7557629346847534, "learning_rate": 2.9843457359129775e-05, "loss": 0.1884, "step": 4215 }, { "epoch": 0.07466340811185343, "grad_norm": 1.2463403940200806, "learning_rate": 2.9843333360269292e-05, "loss": 0.1586, "step": 4216 }, { "epoch": 0.07468111764888187, "grad_norm": 2.095341205596924, "learning_rate": 2.984320931257575e-05, "loss": 0.1354, "step": 4217 }, { "epoch": 0.0746988271859103, "grad_norm": 1.2895102500915527, "learning_rate": 2.984308521604955e-05, "loss": 0.182, "step": 4218 }, { "epoch": 0.07471653672293872, "grad_norm": 1.5566424131393433, "learning_rate": 2.9842961070691102e-05, "loss": 0.136, "step": 4219 }, { "epoch": 0.07473424625996715, "grad_norm": 1.9876810312271118, "learning_rate": 2.984283687650081e-05, "loss": 0.1687, "step": 4220 }, { "epoch": 0.07475195579699558, "grad_norm": 1.3427023887634277, "learning_rate": 2.984271263347909e-05, "loss": 0.1353, "step": 4221 }, { "epoch": 0.074769665334024, "grad_norm": 2.001512050628662, "learning_rate": 2.9842588341626343e-05, "loss": 0.1667, "step": 4222 }, { "epoch": 0.07478737487105243, "grad_norm": 1.3943201303482056, "learning_rate": 2.9842464000942984e-05, "loss": 0.1468, "step": 4223 }, { "epoch": 0.07480508440808087, "grad_norm": 1.5769721269607544, "learning_rate": 2.9842339611429416e-05, "loss": 0.1597, "step": 4224 }, { "epoch": 0.0748227939451093, "grad_norm": 2.7528445720672607, "learning_rate": 2.9842215173086058e-05, "loss": 0.1425, "step": 4225 }, { "epoch": 0.07484050348213772, "grad_norm": 1.3404484987258911, "learning_rate": 2.984209068591331e-05, "loss": 0.1235, "step": 4226 }, { "epoch": 0.07485821301916615, "grad_norm": 1.5468522310256958, "learning_rate": 2.984196614991158e-05, "loss": 0.1678, "step": 4227 }, { "epoch": 0.07487592255619457, "grad_norm": 1.8549233675003052, "learning_rate": 2.984184156508129e-05, "loss": 0.1281, "step": 4228 }, { "epoch": 0.074893632093223, "grad_norm": 1.8243696689605713, "learning_rate": 2.984171693142284e-05, "loss": 0.1907, "step": 4229 }, { "epoch": 0.07491134163025143, "grad_norm": 1.1475118398666382, "learning_rate": 2.984159224893664e-05, "loss": 0.1246, "step": 4230 }, { "epoch": 0.07492905116727985, "grad_norm": 1.4813275337219238, "learning_rate": 2.9841467517623107e-05, "loss": 0.1029, "step": 4231 }, { "epoch": 0.0749467607043083, "grad_norm": 1.1664904356002808, "learning_rate": 2.9841342737482646e-05, "loss": 0.1652, "step": 4232 }, { "epoch": 0.07496447024133672, "grad_norm": 1.2828850746154785, "learning_rate": 2.9841217908515662e-05, "loss": 0.1167, "step": 4233 }, { "epoch": 0.07498217977836515, "grad_norm": 1.402844786643982, "learning_rate": 2.9841093030722582e-05, "loss": 0.1508, "step": 4234 }, { "epoch": 0.07499988931539357, "grad_norm": 1.3563045263290405, "learning_rate": 2.9840968104103802e-05, "loss": 0.1584, "step": 4235 }, { "epoch": 0.075017598852422, "grad_norm": 3.336148738861084, "learning_rate": 2.9840843128659736e-05, "loss": 0.126, "step": 4236 }, { "epoch": 0.07503530838945043, "grad_norm": 1.4223453998565674, "learning_rate": 2.9840718104390803e-05, "loss": 0.1704, "step": 4237 }, { "epoch": 0.07505301792647885, "grad_norm": 1.200476884841919, "learning_rate": 2.98405930312974e-05, "loss": 0.1677, "step": 4238 }, { "epoch": 0.07507072746350729, "grad_norm": 1.1023221015930176, "learning_rate": 2.9840467909379952e-05, "loss": 0.1858, "step": 4239 }, { "epoch": 0.07508843700053572, "grad_norm": 1.4026974439620972, "learning_rate": 2.9840342738638867e-05, "loss": 0.192, "step": 4240 }, { "epoch": 0.07510614653756414, "grad_norm": 1.4213889837265015, "learning_rate": 2.9840217519074556e-05, "loss": 0.1619, "step": 4241 }, { "epoch": 0.07512385607459257, "grad_norm": 1.7119594812393188, "learning_rate": 2.9840092250687428e-05, "loss": 0.1398, "step": 4242 }, { "epoch": 0.075141565611621, "grad_norm": 1.7887301445007324, "learning_rate": 2.9839966933477897e-05, "loss": 0.1995, "step": 4243 }, { "epoch": 0.07515927514864942, "grad_norm": 1.4816895723342896, "learning_rate": 2.9839841567446376e-05, "loss": 0.1313, "step": 4244 }, { "epoch": 0.07517698468567785, "grad_norm": 1.8467987775802612, "learning_rate": 2.983971615259328e-05, "loss": 0.1307, "step": 4245 }, { "epoch": 0.07519469422270628, "grad_norm": 1.7345000505447388, "learning_rate": 2.9839590688919015e-05, "loss": 0.14, "step": 4246 }, { "epoch": 0.07521240375973472, "grad_norm": 1.2983381748199463, "learning_rate": 2.9839465176424e-05, "loss": 0.1308, "step": 4247 }, { "epoch": 0.07523011329676314, "grad_norm": 1.0632151365280151, "learning_rate": 2.9839339615108642e-05, "loss": 0.137, "step": 4248 }, { "epoch": 0.07524782283379157, "grad_norm": 1.2378404140472412, "learning_rate": 2.983921400497336e-05, "loss": 0.1229, "step": 4249 }, { "epoch": 0.07526553237082, "grad_norm": 1.345223069190979, "learning_rate": 2.9839088346018567e-05, "loss": 0.1265, "step": 4250 }, { "epoch": 0.07528324190784842, "grad_norm": 1.2279889583587646, "learning_rate": 2.9838962638244675e-05, "loss": 0.1343, "step": 4251 }, { "epoch": 0.07530095144487685, "grad_norm": 1.3663362264633179, "learning_rate": 2.9838836881652093e-05, "loss": 0.0903, "step": 4252 }, { "epoch": 0.07531866098190527, "grad_norm": 1.7717927694320679, "learning_rate": 2.9838711076241242e-05, "loss": 0.1839, "step": 4253 }, { "epoch": 0.07533637051893372, "grad_norm": 1.921025037765503, "learning_rate": 2.9838585222012528e-05, "loss": 0.1841, "step": 4254 }, { "epoch": 0.07535408005596214, "grad_norm": 1.6169403791427612, "learning_rate": 2.983845931896637e-05, "loss": 0.1485, "step": 4255 }, { "epoch": 0.07537178959299057, "grad_norm": 3.254743814468384, "learning_rate": 2.983833336710318e-05, "loss": 0.1886, "step": 4256 }, { "epoch": 0.075389499130019, "grad_norm": 2.5548980236053467, "learning_rate": 2.983820736642338e-05, "loss": 0.1661, "step": 4257 }, { "epoch": 0.07540720866704742, "grad_norm": 1.3824539184570312, "learning_rate": 2.983808131692738e-05, "loss": 0.1567, "step": 4258 }, { "epoch": 0.07542491820407585, "grad_norm": 1.208168387413025, "learning_rate": 2.9837955218615583e-05, "loss": 0.1748, "step": 4259 }, { "epoch": 0.07544262774110427, "grad_norm": 1.0562145709991455, "learning_rate": 2.9837829071488422e-05, "loss": 0.1674, "step": 4260 }, { "epoch": 0.0754603372781327, "grad_norm": 1.337501049041748, "learning_rate": 2.9837702875546305e-05, "loss": 0.1523, "step": 4261 }, { "epoch": 0.07547804681516114, "grad_norm": 1.2323613166809082, "learning_rate": 2.9837576630789642e-05, "loss": 0.1342, "step": 4262 }, { "epoch": 0.07549575635218957, "grad_norm": 1.093999981880188, "learning_rate": 2.9837450337218853e-05, "loss": 0.1378, "step": 4263 }, { "epoch": 0.07551346588921799, "grad_norm": 1.8527493476867676, "learning_rate": 2.9837323994834355e-05, "loss": 0.1153, "step": 4264 }, { "epoch": 0.07553117542624642, "grad_norm": 2.2409098148345947, "learning_rate": 2.9837197603636557e-05, "loss": 0.1491, "step": 4265 }, { "epoch": 0.07554888496327485, "grad_norm": 1.5911462306976318, "learning_rate": 2.9837071163625884e-05, "loss": 0.1649, "step": 4266 }, { "epoch": 0.07556659450030327, "grad_norm": 1.9379854202270508, "learning_rate": 2.983694467480275e-05, "loss": 0.1283, "step": 4267 }, { "epoch": 0.0755843040373317, "grad_norm": 1.9096051454544067, "learning_rate": 2.9836818137167565e-05, "loss": 0.1845, "step": 4268 }, { "epoch": 0.07560201357436014, "grad_norm": 1.460174322128296, "learning_rate": 2.983669155072075e-05, "loss": 0.1283, "step": 4269 }, { "epoch": 0.07561972311138856, "grad_norm": 1.3774690628051758, "learning_rate": 2.983656491546272e-05, "loss": 0.1586, "step": 4270 }, { "epoch": 0.07563743264841699, "grad_norm": 1.5459578037261963, "learning_rate": 2.9836438231393892e-05, "loss": 0.129, "step": 4271 }, { "epoch": 0.07565514218544542, "grad_norm": 1.4741706848144531, "learning_rate": 2.9836311498514683e-05, "loss": 0.1452, "step": 4272 }, { "epoch": 0.07567285172247384, "grad_norm": 1.440338373184204, "learning_rate": 2.983618471682551e-05, "loss": 0.1361, "step": 4273 }, { "epoch": 0.07569056125950227, "grad_norm": 2.2840778827667236, "learning_rate": 2.983605788632679e-05, "loss": 0.2057, "step": 4274 }, { "epoch": 0.0757082707965307, "grad_norm": 1.2745633125305176, "learning_rate": 2.983593100701894e-05, "loss": 0.1436, "step": 4275 }, { "epoch": 0.07572598033355912, "grad_norm": 2.139305353164673, "learning_rate": 2.9835804078902377e-05, "loss": 0.1803, "step": 4276 }, { "epoch": 0.07574368987058756, "grad_norm": 3.029042959213257, "learning_rate": 2.983567710197752e-05, "loss": 0.2265, "step": 4277 }, { "epoch": 0.07576139940761599, "grad_norm": 1.3282798528671265, "learning_rate": 2.9835550076244786e-05, "loss": 0.1279, "step": 4278 }, { "epoch": 0.07577910894464442, "grad_norm": 1.3818674087524414, "learning_rate": 2.9835423001704588e-05, "loss": 0.1478, "step": 4279 }, { "epoch": 0.07579681848167284, "grad_norm": 1.4322923421859741, "learning_rate": 2.9835295878357356e-05, "loss": 0.1349, "step": 4280 }, { "epoch": 0.07581452801870127, "grad_norm": 1.16624116897583, "learning_rate": 2.98351687062035e-05, "loss": 0.1167, "step": 4281 }, { "epoch": 0.0758322375557297, "grad_norm": 1.808815836906433, "learning_rate": 2.9835041485243438e-05, "loss": 0.1654, "step": 4282 }, { "epoch": 0.07584994709275812, "grad_norm": 2.140932321548462, "learning_rate": 2.9834914215477585e-05, "loss": 0.1488, "step": 4283 }, { "epoch": 0.07586765662978656, "grad_norm": 1.6833839416503906, "learning_rate": 2.9834786896906375e-05, "loss": 0.1437, "step": 4284 }, { "epoch": 0.07588536616681499, "grad_norm": 1.2917664051055908, "learning_rate": 2.9834659529530208e-05, "loss": 0.207, "step": 4285 }, { "epoch": 0.07590307570384341, "grad_norm": 2.1290807723999023, "learning_rate": 2.9834532113349517e-05, "loss": 0.1594, "step": 4286 }, { "epoch": 0.07592078524087184, "grad_norm": 1.7508395910263062, "learning_rate": 2.9834404648364708e-05, "loss": 0.1934, "step": 4287 }, { "epoch": 0.07593849477790027, "grad_norm": 1.042380452156067, "learning_rate": 2.9834277134576212e-05, "loss": 0.1236, "step": 4288 }, { "epoch": 0.0759562043149287, "grad_norm": 1.6397298574447632, "learning_rate": 2.983414957198445e-05, "loss": 0.1668, "step": 4289 }, { "epoch": 0.07597391385195712, "grad_norm": 1.6253776550292969, "learning_rate": 2.9834021960589833e-05, "loss": 0.1577, "step": 4290 }, { "epoch": 0.07599162338898556, "grad_norm": 1.5419114828109741, "learning_rate": 2.9833894300392784e-05, "loss": 0.1771, "step": 4291 }, { "epoch": 0.07600933292601399, "grad_norm": 1.3328938484191895, "learning_rate": 2.983376659139372e-05, "loss": 0.151, "step": 4292 }, { "epoch": 0.07602704246304241, "grad_norm": 1.5069963932037354, "learning_rate": 2.9833638833593064e-05, "loss": 0.1232, "step": 4293 }, { "epoch": 0.07604475200007084, "grad_norm": 1.4008268117904663, "learning_rate": 2.9833511026991242e-05, "loss": 0.1253, "step": 4294 }, { "epoch": 0.07606246153709927, "grad_norm": 1.2958319187164307, "learning_rate": 2.9833383171588663e-05, "loss": 0.1826, "step": 4295 }, { "epoch": 0.07608017107412769, "grad_norm": 1.1858303546905518, "learning_rate": 2.983325526738576e-05, "loss": 0.1188, "step": 4296 }, { "epoch": 0.07609788061115612, "grad_norm": 1.1888214349746704, "learning_rate": 2.983312731438294e-05, "loss": 0.1511, "step": 4297 }, { "epoch": 0.07611559014818454, "grad_norm": 1.2594329118728638, "learning_rate": 2.9832999312580637e-05, "loss": 0.2107, "step": 4298 }, { "epoch": 0.07613329968521299, "grad_norm": 1.4915000200271606, "learning_rate": 2.983287126197927e-05, "loss": 0.1382, "step": 4299 }, { "epoch": 0.07615100922224141, "grad_norm": 1.7382177114486694, "learning_rate": 2.983274316257925e-05, "loss": 0.1509, "step": 4300 }, { "epoch": 0.07616871875926984, "grad_norm": 1.5777419805526733, "learning_rate": 2.9832615014381007e-05, "loss": 0.1952, "step": 4301 }, { "epoch": 0.07618642829629826, "grad_norm": 1.33022940158844, "learning_rate": 2.9832486817384962e-05, "loss": 0.1019, "step": 4302 }, { "epoch": 0.07620413783332669, "grad_norm": 1.78130042552948, "learning_rate": 2.9832358571591535e-05, "loss": 0.1236, "step": 4303 }, { "epoch": 0.07622184737035512, "grad_norm": 1.6957826614379883, "learning_rate": 2.9832230277001147e-05, "loss": 0.1262, "step": 4304 }, { "epoch": 0.07623955690738354, "grad_norm": 2.301398992538452, "learning_rate": 2.983210193361422e-05, "loss": 0.1884, "step": 4305 }, { "epoch": 0.07625726644441198, "grad_norm": 2.758725881576538, "learning_rate": 2.9831973541431183e-05, "loss": 0.1756, "step": 4306 }, { "epoch": 0.07627497598144041, "grad_norm": 1.316240906715393, "learning_rate": 2.983184510045245e-05, "loss": 0.1235, "step": 4307 }, { "epoch": 0.07629268551846884, "grad_norm": 2.2496259212493896, "learning_rate": 2.983171661067845e-05, "loss": 0.1836, "step": 4308 }, { "epoch": 0.07631039505549726, "grad_norm": 1.813167929649353, "learning_rate": 2.98315880721096e-05, "loss": 0.1802, "step": 4309 }, { "epoch": 0.07632810459252569, "grad_norm": 1.2234901189804077, "learning_rate": 2.9831459484746326e-05, "loss": 0.1342, "step": 4310 }, { "epoch": 0.07634581412955412, "grad_norm": 1.3107134103775024, "learning_rate": 2.983133084858905e-05, "loss": 0.1617, "step": 4311 }, { "epoch": 0.07636352366658254, "grad_norm": 1.6599624156951904, "learning_rate": 2.9831202163638196e-05, "loss": 0.1681, "step": 4312 }, { "epoch": 0.07638123320361097, "grad_norm": 1.2149124145507812, "learning_rate": 2.9831073429894184e-05, "loss": 0.1379, "step": 4313 }, { "epoch": 0.07639894274063941, "grad_norm": 2.4598183631896973, "learning_rate": 2.9830944647357442e-05, "loss": 0.1513, "step": 4314 }, { "epoch": 0.07641665227766783, "grad_norm": 1.9178261756896973, "learning_rate": 2.9830815816028397e-05, "loss": 0.1503, "step": 4315 }, { "epoch": 0.07643436181469626, "grad_norm": 1.2641806602478027, "learning_rate": 2.9830686935907463e-05, "loss": 0.1452, "step": 4316 }, { "epoch": 0.07645207135172469, "grad_norm": 1.3098753690719604, "learning_rate": 2.983055800699507e-05, "loss": 0.1276, "step": 4317 }, { "epoch": 0.07646978088875311, "grad_norm": 1.1649816036224365, "learning_rate": 2.9830429029291644e-05, "loss": 0.1369, "step": 4318 }, { "epoch": 0.07648749042578154, "grad_norm": 0.891822099685669, "learning_rate": 2.9830300002797602e-05, "loss": 0.1347, "step": 4319 }, { "epoch": 0.07650519996280997, "grad_norm": 0.9462270736694336, "learning_rate": 2.9830170927513375e-05, "loss": 0.1358, "step": 4320 }, { "epoch": 0.0765229094998384, "grad_norm": 1.264488697052002, "learning_rate": 2.9830041803439383e-05, "loss": 0.1455, "step": 4321 }, { "epoch": 0.07654061903686683, "grad_norm": 1.7793877124786377, "learning_rate": 2.9829912630576056e-05, "loss": 0.1397, "step": 4322 }, { "epoch": 0.07655832857389526, "grad_norm": 2.7457103729248047, "learning_rate": 2.9829783408923818e-05, "loss": 0.1866, "step": 4323 }, { "epoch": 0.07657603811092369, "grad_norm": 1.111722469329834, "learning_rate": 2.9829654138483095e-05, "loss": 0.1323, "step": 4324 }, { "epoch": 0.07659374764795211, "grad_norm": 1.0490775108337402, "learning_rate": 2.98295248192543e-05, "loss": 0.1148, "step": 4325 }, { "epoch": 0.07661145718498054, "grad_norm": 1.4031249284744263, "learning_rate": 2.9829395451237876e-05, "loss": 0.1001, "step": 4326 }, { "epoch": 0.07662916672200897, "grad_norm": 1.4838676452636719, "learning_rate": 2.9829266034434237e-05, "loss": 0.2073, "step": 4327 }, { "epoch": 0.07664687625903739, "grad_norm": 1.3635458946228027, "learning_rate": 2.9829136568843818e-05, "loss": 0.1407, "step": 4328 }, { "epoch": 0.07666458579606583, "grad_norm": 1.75480055809021, "learning_rate": 2.9829007054467035e-05, "loss": 0.1295, "step": 4329 }, { "epoch": 0.07668229533309426, "grad_norm": 1.471014142036438, "learning_rate": 2.982887749130432e-05, "loss": 0.1794, "step": 4330 }, { "epoch": 0.07670000487012268, "grad_norm": 1.9330785274505615, "learning_rate": 2.9828747879356097e-05, "loss": 0.1272, "step": 4331 }, { "epoch": 0.07671771440715111, "grad_norm": 1.07081937789917, "learning_rate": 2.9828618218622795e-05, "loss": 0.109, "step": 4332 }, { "epoch": 0.07673542394417954, "grad_norm": 1.2488986253738403, "learning_rate": 2.9828488509104834e-05, "loss": 0.1228, "step": 4333 }, { "epoch": 0.07675313348120796, "grad_norm": 1.3068370819091797, "learning_rate": 2.982835875080265e-05, "loss": 0.1287, "step": 4334 }, { "epoch": 0.07677084301823639, "grad_norm": 1.0572367906570435, "learning_rate": 2.9828228943716666e-05, "loss": 0.121, "step": 4335 }, { "epoch": 0.07678855255526483, "grad_norm": 1.2715786695480347, "learning_rate": 2.9828099087847305e-05, "loss": 0.216, "step": 4336 }, { "epoch": 0.07680626209229326, "grad_norm": 1.0368725061416626, "learning_rate": 2.9827969183195007e-05, "loss": 0.1537, "step": 4337 }, { "epoch": 0.07682397162932168, "grad_norm": 1.2417798042297363, "learning_rate": 2.982783922976018e-05, "loss": 0.1767, "step": 4338 }, { "epoch": 0.07684168116635011, "grad_norm": 2.3952877521514893, "learning_rate": 2.9827709227543265e-05, "loss": 0.1119, "step": 4339 }, { "epoch": 0.07685939070337854, "grad_norm": 1.8407789468765259, "learning_rate": 2.9827579176544683e-05, "loss": 0.1746, "step": 4340 }, { "epoch": 0.07687710024040696, "grad_norm": 4.423111438751221, "learning_rate": 2.982744907676487e-05, "loss": 0.177, "step": 4341 }, { "epoch": 0.07689480977743539, "grad_norm": 1.5690052509307861, "learning_rate": 2.9827318928204248e-05, "loss": 0.1784, "step": 4342 }, { "epoch": 0.07691251931446381, "grad_norm": 1.9408178329467773, "learning_rate": 2.9827188730863248e-05, "loss": 0.1694, "step": 4343 }, { "epoch": 0.07693022885149225, "grad_norm": 2.2099215984344482, "learning_rate": 2.9827058484742295e-05, "loss": 0.1593, "step": 4344 }, { "epoch": 0.07694793838852068, "grad_norm": 1.5816810131072998, "learning_rate": 2.982692818984182e-05, "loss": 0.1845, "step": 4345 }, { "epoch": 0.07696564792554911, "grad_norm": 4.4235358238220215, "learning_rate": 2.982679784616225e-05, "loss": 0.1691, "step": 4346 }, { "epoch": 0.07698335746257753, "grad_norm": 1.8480958938598633, "learning_rate": 2.9826667453704013e-05, "loss": 0.149, "step": 4347 }, { "epoch": 0.07700106699960596, "grad_norm": 1.539162516593933, "learning_rate": 2.982653701246754e-05, "loss": 0.1682, "step": 4348 }, { "epoch": 0.07701877653663439, "grad_norm": 1.8620637655258179, "learning_rate": 2.9826406522453262e-05, "loss": 0.1247, "step": 4349 }, { "epoch": 0.07703648607366281, "grad_norm": 0.8955290913581848, "learning_rate": 2.9826275983661606e-05, "loss": 0.147, "step": 4350 }, { "epoch": 0.07705419561069125, "grad_norm": 1.5392593145370483, "learning_rate": 2.9826145396093005e-05, "loss": 0.1701, "step": 4351 }, { "epoch": 0.07707190514771968, "grad_norm": 1.3342664241790771, "learning_rate": 2.9826014759747876e-05, "loss": 0.0946, "step": 4352 }, { "epoch": 0.0770896146847481, "grad_norm": 1.4039483070373535, "learning_rate": 2.9825884074626667e-05, "loss": 0.1401, "step": 4353 }, { "epoch": 0.07710732422177653, "grad_norm": 0.9601556658744812, "learning_rate": 2.9825753340729797e-05, "loss": 0.1273, "step": 4354 }, { "epoch": 0.07712503375880496, "grad_norm": 1.6451390981674194, "learning_rate": 2.982562255805769e-05, "loss": 0.2038, "step": 4355 }, { "epoch": 0.07714274329583339, "grad_norm": 1.3906210660934448, "learning_rate": 2.9825491726610795e-05, "loss": 0.1408, "step": 4356 }, { "epoch": 0.07716045283286181, "grad_norm": 1.3188022375106812, "learning_rate": 2.982536084638953e-05, "loss": 0.1094, "step": 4357 }, { "epoch": 0.07717816236989025, "grad_norm": 1.3771331310272217, "learning_rate": 2.9825229917394324e-05, "loss": 0.1554, "step": 4358 }, { "epoch": 0.07719587190691868, "grad_norm": 1.3221290111541748, "learning_rate": 2.9825098939625613e-05, "loss": 0.128, "step": 4359 }, { "epoch": 0.0772135814439471, "grad_norm": 1.240684151649475, "learning_rate": 2.9824967913083824e-05, "loss": 0.0954, "step": 4360 }, { "epoch": 0.07723129098097553, "grad_norm": 1.7216908931732178, "learning_rate": 2.9824836837769386e-05, "loss": 0.1848, "step": 4361 }, { "epoch": 0.07724900051800396, "grad_norm": 1.2740297317504883, "learning_rate": 2.982470571368274e-05, "loss": 0.1611, "step": 4362 }, { "epoch": 0.07726671005503238, "grad_norm": 3.245185613632202, "learning_rate": 2.9824574540824314e-05, "loss": 0.1319, "step": 4363 }, { "epoch": 0.07728441959206081, "grad_norm": 2.4460482597351074, "learning_rate": 2.9824443319194533e-05, "loss": 0.1812, "step": 4364 }, { "epoch": 0.07730212912908924, "grad_norm": 1.3351925611495972, "learning_rate": 2.9824312048793832e-05, "loss": 0.1527, "step": 4365 }, { "epoch": 0.07731983866611768, "grad_norm": 1.431225061416626, "learning_rate": 2.9824180729622648e-05, "loss": 0.1598, "step": 4366 }, { "epoch": 0.0773375482031461, "grad_norm": 3.6402041912078857, "learning_rate": 2.9824049361681407e-05, "loss": 0.1511, "step": 4367 }, { "epoch": 0.07735525774017453, "grad_norm": 2.6141393184661865, "learning_rate": 2.9823917944970543e-05, "loss": 0.1608, "step": 4368 }, { "epoch": 0.07737296727720296, "grad_norm": 1.4487468004226685, "learning_rate": 2.9823786479490487e-05, "loss": 0.0962, "step": 4369 }, { "epoch": 0.07739067681423138, "grad_norm": 1.7386343479156494, "learning_rate": 2.982365496524167e-05, "loss": 0.1538, "step": 4370 }, { "epoch": 0.07740838635125981, "grad_norm": 1.4915348291397095, "learning_rate": 2.9823523402224535e-05, "loss": 0.125, "step": 4371 }, { "epoch": 0.07742609588828823, "grad_norm": 1.2013986110687256, "learning_rate": 2.98233917904395e-05, "loss": 0.1649, "step": 4372 }, { "epoch": 0.07744380542531668, "grad_norm": 0.8234829902648926, "learning_rate": 2.982326012988701e-05, "loss": 0.1518, "step": 4373 }, { "epoch": 0.0774615149623451, "grad_norm": 1.181640625, "learning_rate": 2.9823128420567494e-05, "loss": 0.1244, "step": 4374 }, { "epoch": 0.07747922449937353, "grad_norm": 1.038245677947998, "learning_rate": 2.982299666248138e-05, "loss": 0.1128, "step": 4375 }, { "epoch": 0.07749693403640195, "grad_norm": 1.1549752950668335, "learning_rate": 2.982286485562911e-05, "loss": 0.1093, "step": 4376 }, { "epoch": 0.07751464357343038, "grad_norm": 1.5551557540893555, "learning_rate": 2.982273300001111e-05, "loss": 0.1412, "step": 4377 }, { "epoch": 0.07753235311045881, "grad_norm": 1.4728994369506836, "learning_rate": 2.982260109562782e-05, "loss": 0.1359, "step": 4378 }, { "epoch": 0.07755006264748723, "grad_norm": 2.2306296825408936, "learning_rate": 2.9822469142479676e-05, "loss": 0.1395, "step": 4379 }, { "epoch": 0.07756777218451566, "grad_norm": 1.5663678646087646, "learning_rate": 2.9822337140567105e-05, "loss": 0.2129, "step": 4380 }, { "epoch": 0.0775854817215441, "grad_norm": 1.8199243545532227, "learning_rate": 2.9822205089890542e-05, "loss": 0.1568, "step": 4381 }, { "epoch": 0.07760319125857253, "grad_norm": 1.1272245645523071, "learning_rate": 2.9822072990450425e-05, "loss": 0.1131, "step": 4382 }, { "epoch": 0.07762090079560095, "grad_norm": 1.9845774173736572, "learning_rate": 2.9821940842247183e-05, "loss": 0.1549, "step": 4383 }, { "epoch": 0.07763861033262938, "grad_norm": 1.4035998582839966, "learning_rate": 2.9821808645281258e-05, "loss": 0.1142, "step": 4384 }, { "epoch": 0.0776563198696578, "grad_norm": 2.107095956802368, "learning_rate": 2.9821676399553082e-05, "loss": 0.2034, "step": 4385 }, { "epoch": 0.07767402940668623, "grad_norm": 1.1608456373214722, "learning_rate": 2.9821544105063093e-05, "loss": 0.0988, "step": 4386 }, { "epoch": 0.07769173894371466, "grad_norm": 1.2210620641708374, "learning_rate": 2.9821411761811714e-05, "loss": 0.1183, "step": 4387 }, { "epoch": 0.0777094484807431, "grad_norm": 1.2718125581741333, "learning_rate": 2.9821279369799397e-05, "loss": 0.2394, "step": 4388 }, { "epoch": 0.07772715801777152, "grad_norm": 1.8414231538772583, "learning_rate": 2.9821146929026567e-05, "loss": 0.1554, "step": 4389 }, { "epoch": 0.07774486755479995, "grad_norm": 1.2682381868362427, "learning_rate": 2.9821014439493666e-05, "loss": 0.1311, "step": 4390 }, { "epoch": 0.07776257709182838, "grad_norm": 1.3112578392028809, "learning_rate": 2.9820881901201123e-05, "loss": 0.1682, "step": 4391 }, { "epoch": 0.0777802866288568, "grad_norm": 1.421529769897461, "learning_rate": 2.982074931414938e-05, "loss": 0.1683, "step": 4392 }, { "epoch": 0.07779799616588523, "grad_norm": 1.9454854726791382, "learning_rate": 2.982061667833887e-05, "loss": 0.1382, "step": 4393 }, { "epoch": 0.07781570570291366, "grad_norm": 1.5218505859375, "learning_rate": 2.982048399377003e-05, "loss": 0.1497, "step": 4394 }, { "epoch": 0.07783341523994208, "grad_norm": 1.6069966554641724, "learning_rate": 2.982035126044329e-05, "loss": 0.156, "step": 4395 }, { "epoch": 0.07785112477697052, "grad_norm": 1.7288897037506104, "learning_rate": 2.9820218478359105e-05, "loss": 0.1813, "step": 4396 }, { "epoch": 0.07786883431399895, "grad_norm": 1.5137711763381958, "learning_rate": 2.9820085647517892e-05, "loss": 0.152, "step": 4397 }, { "epoch": 0.07788654385102738, "grad_norm": 1.783636450767517, "learning_rate": 2.98199527679201e-05, "loss": 0.1675, "step": 4398 }, { "epoch": 0.0779042533880558, "grad_norm": 1.50126051902771, "learning_rate": 2.9819819839566163e-05, "loss": 0.1124, "step": 4399 }, { "epoch": 0.07792196292508423, "grad_norm": 1.33329176902771, "learning_rate": 2.9819686862456513e-05, "loss": 0.1515, "step": 4400 }, { "epoch": 0.07793967246211266, "grad_norm": 1.361169457435608, "learning_rate": 2.9819553836591596e-05, "loss": 0.1522, "step": 4401 }, { "epoch": 0.07795738199914108, "grad_norm": 0.9844861626625061, "learning_rate": 2.9819420761971845e-05, "loss": 0.1366, "step": 4402 }, { "epoch": 0.07797509153616952, "grad_norm": 1.2750872373580933, "learning_rate": 2.98192876385977e-05, "loss": 0.1209, "step": 4403 }, { "epoch": 0.07799280107319795, "grad_norm": 0.9550092816352844, "learning_rate": 2.9819154466469597e-05, "loss": 0.1949, "step": 4404 }, { "epoch": 0.07801051061022637, "grad_norm": 1.3238495588302612, "learning_rate": 2.9819021245587968e-05, "loss": 0.145, "step": 4405 }, { "epoch": 0.0780282201472548, "grad_norm": 1.1649057865142822, "learning_rate": 2.9818887975953267e-05, "loss": 0.1373, "step": 4406 }, { "epoch": 0.07804592968428323, "grad_norm": 1.5250033140182495, "learning_rate": 2.9818754657565918e-05, "loss": 0.128, "step": 4407 }, { "epoch": 0.07806363922131165, "grad_norm": 1.401430606842041, "learning_rate": 2.981862129042637e-05, "loss": 0.198, "step": 4408 }, { "epoch": 0.07808134875834008, "grad_norm": 1.187333106994629, "learning_rate": 2.9818487874535054e-05, "loss": 0.1652, "step": 4409 }, { "epoch": 0.0780990582953685, "grad_norm": 1.289313554763794, "learning_rate": 2.9818354409892413e-05, "loss": 0.1542, "step": 4410 }, { "epoch": 0.07811676783239695, "grad_norm": 1.2140638828277588, "learning_rate": 2.981822089649888e-05, "loss": 0.1382, "step": 4411 }, { "epoch": 0.07813447736942537, "grad_norm": 1.288560152053833, "learning_rate": 2.9818087334354902e-05, "loss": 0.1777, "step": 4412 }, { "epoch": 0.0781521869064538, "grad_norm": 1.6462104320526123, "learning_rate": 2.9817953723460916e-05, "loss": 0.1458, "step": 4413 }, { "epoch": 0.07816989644348223, "grad_norm": 1.3866087198257446, "learning_rate": 2.9817820063817362e-05, "loss": 0.122, "step": 4414 }, { "epoch": 0.07818760598051065, "grad_norm": 1.2647969722747803, "learning_rate": 2.9817686355424677e-05, "loss": 0.1699, "step": 4415 }, { "epoch": 0.07820531551753908, "grad_norm": 1.5940043926239014, "learning_rate": 2.9817552598283304e-05, "loss": 0.1259, "step": 4416 }, { "epoch": 0.0782230250545675, "grad_norm": 1.5375077724456787, "learning_rate": 2.981741879239368e-05, "loss": 0.1258, "step": 4417 }, { "epoch": 0.07824073459159595, "grad_norm": 1.2790989875793457, "learning_rate": 2.981728493775625e-05, "loss": 0.1209, "step": 4418 }, { "epoch": 0.07825844412862437, "grad_norm": 1.6611170768737793, "learning_rate": 2.9817151034371447e-05, "loss": 0.1615, "step": 4419 }, { "epoch": 0.0782761536656528, "grad_norm": 1.166059970855713, "learning_rate": 2.9817017082239715e-05, "loss": 0.1379, "step": 4420 }, { "epoch": 0.07829386320268122, "grad_norm": 1.7814923524856567, "learning_rate": 2.98168830813615e-05, "loss": 0.1504, "step": 4421 }, { "epoch": 0.07831157273970965, "grad_norm": 1.3337758779525757, "learning_rate": 2.9816749031737236e-05, "loss": 0.1674, "step": 4422 }, { "epoch": 0.07832928227673808, "grad_norm": 1.2838743925094604, "learning_rate": 2.9816614933367367e-05, "loss": 0.206, "step": 4423 }, { "epoch": 0.0783469918137665, "grad_norm": 1.3062242269515991, "learning_rate": 2.9816480786252333e-05, "loss": 0.2149, "step": 4424 }, { "epoch": 0.07836470135079494, "grad_norm": 1.1001808643341064, "learning_rate": 2.9816346590392576e-05, "loss": 0.1421, "step": 4425 }, { "epoch": 0.07838241088782337, "grad_norm": 1.2283880710601807, "learning_rate": 2.981621234578854e-05, "loss": 0.1714, "step": 4426 }, { "epoch": 0.0784001204248518, "grad_norm": 1.6115193367004395, "learning_rate": 2.9816078052440657e-05, "loss": 0.1357, "step": 4427 }, { "epoch": 0.07841782996188022, "grad_norm": 1.164208173751831, "learning_rate": 2.981594371034938e-05, "loss": 0.1427, "step": 4428 }, { "epoch": 0.07843553949890865, "grad_norm": 1.8570640087127686, "learning_rate": 2.981580931951515e-05, "loss": 0.1656, "step": 4429 }, { "epoch": 0.07845324903593708, "grad_norm": 1.3216429948806763, "learning_rate": 2.98156748799384e-05, "loss": 0.1379, "step": 4430 }, { "epoch": 0.0784709585729655, "grad_norm": 1.6697461605072021, "learning_rate": 2.981554039161958e-05, "loss": 0.1976, "step": 4431 }, { "epoch": 0.07848866810999393, "grad_norm": 1.315569281578064, "learning_rate": 2.9815405854559136e-05, "loss": 0.147, "step": 4432 }, { "epoch": 0.07850637764702237, "grad_norm": 1.9321622848510742, "learning_rate": 2.98152712687575e-05, "loss": 0.1798, "step": 4433 }, { "epoch": 0.0785240871840508, "grad_norm": 1.071709394454956, "learning_rate": 2.9815136634215117e-05, "loss": 0.1194, "step": 4434 }, { "epoch": 0.07854179672107922, "grad_norm": 1.349532127380371, "learning_rate": 2.9815001950932436e-05, "loss": 0.1303, "step": 4435 }, { "epoch": 0.07855950625810765, "grad_norm": 1.335842490196228, "learning_rate": 2.9814867218909896e-05, "loss": 0.212, "step": 4436 }, { "epoch": 0.07857721579513607, "grad_norm": 1.2402455806732178, "learning_rate": 2.9814732438147946e-05, "loss": 0.1991, "step": 4437 }, { "epoch": 0.0785949253321645, "grad_norm": 1.4857233762741089, "learning_rate": 2.9814597608647023e-05, "loss": 0.1277, "step": 4438 }, { "epoch": 0.07861263486919293, "grad_norm": 1.299574613571167, "learning_rate": 2.9814462730407573e-05, "loss": 0.1637, "step": 4439 }, { "epoch": 0.07863034440622137, "grad_norm": 1.930807113647461, "learning_rate": 2.9814327803430036e-05, "loss": 0.121, "step": 4440 }, { "epoch": 0.0786480539432498, "grad_norm": 1.2707575559616089, "learning_rate": 2.9814192827714858e-05, "loss": 0.1705, "step": 4441 }, { "epoch": 0.07866576348027822, "grad_norm": 1.508414626121521, "learning_rate": 2.9814057803262485e-05, "loss": 0.1444, "step": 4442 }, { "epoch": 0.07868347301730665, "grad_norm": 1.4777356386184692, "learning_rate": 2.981392273007336e-05, "loss": 0.1389, "step": 4443 }, { "epoch": 0.07870118255433507, "grad_norm": 1.3080837726593018, "learning_rate": 2.9813787608147928e-05, "loss": 0.1785, "step": 4444 }, { "epoch": 0.0787188920913635, "grad_norm": 1.3846979141235352, "learning_rate": 2.9813652437486636e-05, "loss": 0.1755, "step": 4445 }, { "epoch": 0.07873660162839193, "grad_norm": 1.5689663887023926, "learning_rate": 2.9813517218089925e-05, "loss": 0.1742, "step": 4446 }, { "epoch": 0.07875431116542035, "grad_norm": 1.5806366205215454, "learning_rate": 2.9813381949958238e-05, "loss": 0.1491, "step": 4447 }, { "epoch": 0.07877202070244879, "grad_norm": 1.6580990552902222, "learning_rate": 2.9813246633092025e-05, "loss": 0.1547, "step": 4448 }, { "epoch": 0.07878973023947722, "grad_norm": 1.3257243633270264, "learning_rate": 2.9813111267491725e-05, "loss": 0.0949, "step": 4449 }, { "epoch": 0.07880743977650564, "grad_norm": 1.6676900386810303, "learning_rate": 2.981297585315779e-05, "loss": 0.1607, "step": 4450 }, { "epoch": 0.07882514931353407, "grad_norm": 1.2820085287094116, "learning_rate": 2.9812840390090663e-05, "loss": 0.1313, "step": 4451 }, { "epoch": 0.0788428588505625, "grad_norm": 1.732709288597107, "learning_rate": 2.9812704878290788e-05, "loss": 0.1631, "step": 4452 }, { "epoch": 0.07886056838759092, "grad_norm": 1.1319992542266846, "learning_rate": 2.981256931775861e-05, "loss": 0.1542, "step": 4453 }, { "epoch": 0.07887827792461935, "grad_norm": 1.389978289604187, "learning_rate": 2.9812433708494583e-05, "loss": 0.1599, "step": 4454 }, { "epoch": 0.07889598746164779, "grad_norm": 1.1713377237319946, "learning_rate": 2.9812298050499142e-05, "loss": 0.1268, "step": 4455 }, { "epoch": 0.07891369699867622, "grad_norm": 1.2810089588165283, "learning_rate": 2.981216234377274e-05, "loss": 0.1406, "step": 4456 }, { "epoch": 0.07893140653570464, "grad_norm": 2.3638484477996826, "learning_rate": 2.9812026588315824e-05, "loss": 0.1354, "step": 4457 }, { "epoch": 0.07894911607273307, "grad_norm": 1.748784065246582, "learning_rate": 2.9811890784128843e-05, "loss": 0.1566, "step": 4458 }, { "epoch": 0.0789668256097615, "grad_norm": 1.506777048110962, "learning_rate": 2.9811754931212232e-05, "loss": 0.1831, "step": 4459 }, { "epoch": 0.07898453514678992, "grad_norm": 1.4734622240066528, "learning_rate": 2.9811619029566444e-05, "loss": 0.1639, "step": 4460 }, { "epoch": 0.07900224468381835, "grad_norm": 1.1516759395599365, "learning_rate": 2.981148307919193e-05, "loss": 0.1648, "step": 4461 }, { "epoch": 0.07901995422084677, "grad_norm": 0.9735503792762756, "learning_rate": 2.9811347080089136e-05, "loss": 0.0979, "step": 4462 }, { "epoch": 0.07903766375787522, "grad_norm": 1.3684215545654297, "learning_rate": 2.9811211032258508e-05, "loss": 0.152, "step": 4463 }, { "epoch": 0.07905537329490364, "grad_norm": 1.2597323656082153, "learning_rate": 2.9811074935700497e-05, "loss": 0.155, "step": 4464 }, { "epoch": 0.07907308283193207, "grad_norm": 1.4463558197021484, "learning_rate": 2.981093879041554e-05, "loss": 0.1775, "step": 4465 }, { "epoch": 0.0790907923689605, "grad_norm": 1.3781864643096924, "learning_rate": 2.98108025964041e-05, "loss": 0.1497, "step": 4466 }, { "epoch": 0.07910850190598892, "grad_norm": 1.205863118171692, "learning_rate": 2.9810666353666613e-05, "loss": 0.1347, "step": 4467 }, { "epoch": 0.07912621144301735, "grad_norm": 1.6443589925765991, "learning_rate": 2.9810530062203532e-05, "loss": 0.1533, "step": 4468 }, { "epoch": 0.07914392098004577, "grad_norm": 0.8563595414161682, "learning_rate": 2.9810393722015305e-05, "loss": 0.1213, "step": 4469 }, { "epoch": 0.07916163051707421, "grad_norm": 1.2477712631225586, "learning_rate": 2.9810257333102382e-05, "loss": 0.1714, "step": 4470 }, { "epoch": 0.07917934005410264, "grad_norm": 1.283536672592163, "learning_rate": 2.9810120895465213e-05, "loss": 0.1403, "step": 4471 }, { "epoch": 0.07919704959113107, "grad_norm": 1.4781705141067505, "learning_rate": 2.980998440910424e-05, "loss": 0.1313, "step": 4472 }, { "epoch": 0.07921475912815949, "grad_norm": 1.4109896421432495, "learning_rate": 2.9809847874019914e-05, "loss": 0.1708, "step": 4473 }, { "epoch": 0.07923246866518792, "grad_norm": 1.1908738613128662, "learning_rate": 2.9809711290212692e-05, "loss": 0.1367, "step": 4474 }, { "epoch": 0.07925017820221635, "grad_norm": 1.238538384437561, "learning_rate": 2.9809574657683013e-05, "loss": 0.1604, "step": 4475 }, { "epoch": 0.07926788773924477, "grad_norm": 1.6591835021972656, "learning_rate": 2.980943797643134e-05, "loss": 0.1217, "step": 4476 }, { "epoch": 0.07928559727627321, "grad_norm": 1.183282732963562, "learning_rate": 2.9809301246458102e-05, "loss": 0.1147, "step": 4477 }, { "epoch": 0.07930330681330164, "grad_norm": 1.5430045127868652, "learning_rate": 2.980916446776377e-05, "loss": 0.0954, "step": 4478 }, { "epoch": 0.07932101635033006, "grad_norm": 1.0489907264709473, "learning_rate": 2.980902764034878e-05, "loss": 0.1315, "step": 4479 }, { "epoch": 0.07933872588735849, "grad_norm": 1.410719633102417, "learning_rate": 2.980889076421359e-05, "loss": 0.1318, "step": 4480 }, { "epoch": 0.07935643542438692, "grad_norm": 1.4732333421707153, "learning_rate": 2.9808753839358646e-05, "loss": 0.1482, "step": 4481 }, { "epoch": 0.07937414496141534, "grad_norm": 1.6076749563217163, "learning_rate": 2.9808616865784395e-05, "loss": 0.1508, "step": 4482 }, { "epoch": 0.07939185449844377, "grad_norm": 1.6396350860595703, "learning_rate": 2.9808479843491296e-05, "loss": 0.1661, "step": 4483 }, { "epoch": 0.0794095640354722, "grad_norm": 1.3036519289016724, "learning_rate": 2.98083427724798e-05, "loss": 0.1609, "step": 4484 }, { "epoch": 0.07942727357250064, "grad_norm": 1.194724440574646, "learning_rate": 2.9808205652750353e-05, "loss": 0.1321, "step": 4485 }, { "epoch": 0.07944498310952906, "grad_norm": 1.970322608947754, "learning_rate": 2.98080684843034e-05, "loss": 0.1401, "step": 4486 }, { "epoch": 0.07946269264655749, "grad_norm": 1.318766474723816, "learning_rate": 2.9807931267139407e-05, "loss": 0.1289, "step": 4487 }, { "epoch": 0.07948040218358592, "grad_norm": 1.0542690753936768, "learning_rate": 2.9807794001258813e-05, "loss": 0.1731, "step": 4488 }, { "epoch": 0.07949811172061434, "grad_norm": 1.1081395149230957, "learning_rate": 2.9807656686662077e-05, "loss": 0.1322, "step": 4489 }, { "epoch": 0.07951582125764277, "grad_norm": 1.4449644088745117, "learning_rate": 2.9807519323349645e-05, "loss": 0.1809, "step": 4490 }, { "epoch": 0.0795335307946712, "grad_norm": 1.5610824823379517, "learning_rate": 2.9807381911321975e-05, "loss": 0.1627, "step": 4491 }, { "epoch": 0.07955124033169964, "grad_norm": 1.7146462202072144, "learning_rate": 2.9807244450579517e-05, "loss": 0.1856, "step": 4492 }, { "epoch": 0.07956894986872806, "grad_norm": 1.5254712104797363, "learning_rate": 2.9807106941122723e-05, "loss": 0.163, "step": 4493 }, { "epoch": 0.07958665940575649, "grad_norm": 1.8709439039230347, "learning_rate": 2.980696938295204e-05, "loss": 0.1441, "step": 4494 }, { "epoch": 0.07960436894278491, "grad_norm": 1.4824223518371582, "learning_rate": 2.980683177606793e-05, "loss": 0.1446, "step": 4495 }, { "epoch": 0.07962207847981334, "grad_norm": 1.297377109527588, "learning_rate": 2.980669412047084e-05, "loss": 0.1141, "step": 4496 }, { "epoch": 0.07963978801684177, "grad_norm": 1.1211048364639282, "learning_rate": 2.9806556416161222e-05, "loss": 0.117, "step": 4497 }, { "epoch": 0.0796574975538702, "grad_norm": 1.3934345245361328, "learning_rate": 2.9806418663139532e-05, "loss": 0.1617, "step": 4498 }, { "epoch": 0.07967520709089862, "grad_norm": 3.0185842514038086, "learning_rate": 2.9806280861406224e-05, "loss": 0.136, "step": 4499 }, { "epoch": 0.07969291662792706, "grad_norm": 2.778430700302124, "learning_rate": 2.9806143010961748e-05, "loss": 0.1921, "step": 4500 }, { "epoch": 0.07971062616495549, "grad_norm": 1.1895943880081177, "learning_rate": 2.980600511180656e-05, "loss": 0.1352, "step": 4501 }, { "epoch": 0.07972833570198391, "grad_norm": 1.605324625968933, "learning_rate": 2.9805867163941115e-05, "loss": 0.1328, "step": 4502 }, { "epoch": 0.07974604523901234, "grad_norm": 1.370564341545105, "learning_rate": 2.980572916736586e-05, "loss": 0.1417, "step": 4503 }, { "epoch": 0.07976375477604077, "grad_norm": 1.2542823553085327, "learning_rate": 2.9805591122081253e-05, "loss": 0.1669, "step": 4504 }, { "epoch": 0.07978146431306919, "grad_norm": 1.659535527229309, "learning_rate": 2.9805453028087754e-05, "loss": 0.1536, "step": 4505 }, { "epoch": 0.07979917385009762, "grad_norm": 2.144099473953247, "learning_rate": 2.980531488538581e-05, "loss": 0.1565, "step": 4506 }, { "epoch": 0.07981688338712606, "grad_norm": 1.7225420475006104, "learning_rate": 2.9805176693975874e-05, "loss": 0.146, "step": 4507 }, { "epoch": 0.07983459292415448, "grad_norm": 1.5439180135726929, "learning_rate": 2.9805038453858407e-05, "loss": 0.1449, "step": 4508 }, { "epoch": 0.07985230246118291, "grad_norm": 1.3955364227294922, "learning_rate": 2.9804900165033863e-05, "loss": 0.1135, "step": 4509 }, { "epoch": 0.07987001199821134, "grad_norm": 1.390766978263855, "learning_rate": 2.9804761827502688e-05, "loss": 0.1719, "step": 4510 }, { "epoch": 0.07988772153523976, "grad_norm": 1.4136029481887817, "learning_rate": 2.980462344126535e-05, "loss": 0.1251, "step": 4511 }, { "epoch": 0.07990543107226819, "grad_norm": 2.4988853931427, "learning_rate": 2.9804485006322293e-05, "loss": 0.186, "step": 4512 }, { "epoch": 0.07992314060929662, "grad_norm": 1.389317274093628, "learning_rate": 2.9804346522673984e-05, "loss": 0.1487, "step": 4513 }, { "epoch": 0.07994085014632504, "grad_norm": 1.3610323667526245, "learning_rate": 2.980420799032087e-05, "loss": 0.136, "step": 4514 }, { "epoch": 0.07995855968335348, "grad_norm": 1.5085374116897583, "learning_rate": 2.9804069409263406e-05, "loss": 0.1475, "step": 4515 }, { "epoch": 0.07997626922038191, "grad_norm": 1.7755343914031982, "learning_rate": 2.9803930779502056e-05, "loss": 0.1539, "step": 4516 }, { "epoch": 0.07999397875741034, "grad_norm": 1.7088749408721924, "learning_rate": 2.9803792101037267e-05, "loss": 0.1991, "step": 4517 }, { "epoch": 0.08001168829443876, "grad_norm": 1.3480327129364014, "learning_rate": 2.9803653373869503e-05, "loss": 0.1617, "step": 4518 }, { "epoch": 0.08002939783146719, "grad_norm": 3.9729719161987305, "learning_rate": 2.9803514597999215e-05, "loss": 0.154, "step": 4519 }, { "epoch": 0.08004710736849562, "grad_norm": 2.045031785964966, "learning_rate": 2.9803375773426856e-05, "loss": 0.143, "step": 4520 }, { "epoch": 0.08006481690552404, "grad_norm": 1.330568790435791, "learning_rate": 2.9803236900152895e-05, "loss": 0.1607, "step": 4521 }, { "epoch": 0.08008252644255248, "grad_norm": 1.1666914224624634, "learning_rate": 2.980309797817778e-05, "loss": 0.1372, "step": 4522 }, { "epoch": 0.08010023597958091, "grad_norm": 2.049085855484009, "learning_rate": 2.9802959007501975e-05, "loss": 0.1513, "step": 4523 }, { "epoch": 0.08011794551660933, "grad_norm": 2.1191565990448, "learning_rate": 2.9802819988125923e-05, "loss": 0.1367, "step": 4524 }, { "epoch": 0.08013565505363776, "grad_norm": 2.635023832321167, "learning_rate": 2.9802680920050095e-05, "loss": 0.1938, "step": 4525 }, { "epoch": 0.08015336459066619, "grad_norm": 1.5853404998779297, "learning_rate": 2.980254180327494e-05, "loss": 0.1283, "step": 4526 }, { "epoch": 0.08017107412769461, "grad_norm": 1.1749931573867798, "learning_rate": 2.9802402637800922e-05, "loss": 0.1873, "step": 4527 }, { "epoch": 0.08018878366472304, "grad_norm": 1.0358988046646118, "learning_rate": 2.98022634236285e-05, "loss": 0.0916, "step": 4528 }, { "epoch": 0.08020649320175147, "grad_norm": 2.133690595626831, "learning_rate": 2.9802124160758125e-05, "loss": 0.1282, "step": 4529 }, { "epoch": 0.0802242027387799, "grad_norm": 1.7469079494476318, "learning_rate": 2.9801984849190262e-05, "loss": 0.149, "step": 4530 }, { "epoch": 0.08024191227580833, "grad_norm": 1.0224404335021973, "learning_rate": 2.980184548892536e-05, "loss": 0.1422, "step": 4531 }, { "epoch": 0.08025962181283676, "grad_norm": 1.3596347570419312, "learning_rate": 2.980170607996389e-05, "loss": 0.1714, "step": 4532 }, { "epoch": 0.08027733134986519, "grad_norm": 1.1761261224746704, "learning_rate": 2.9801566622306297e-05, "loss": 0.1411, "step": 4533 }, { "epoch": 0.08029504088689361, "grad_norm": 1.5877548456192017, "learning_rate": 2.9801427115953054e-05, "loss": 0.1517, "step": 4534 }, { "epoch": 0.08031275042392204, "grad_norm": 2.1172282695770264, "learning_rate": 2.980128756090461e-05, "loss": 0.1352, "step": 4535 }, { "epoch": 0.08033045996095046, "grad_norm": 1.4377052783966064, "learning_rate": 2.9801147957161427e-05, "loss": 0.152, "step": 4536 }, { "epoch": 0.0803481694979789, "grad_norm": 5.024900436401367, "learning_rate": 2.9801008304723965e-05, "loss": 0.1526, "step": 4537 }, { "epoch": 0.08036587903500733, "grad_norm": 1.593335509300232, "learning_rate": 2.980086860359268e-05, "loss": 0.1555, "step": 4538 }, { "epoch": 0.08038358857203576, "grad_norm": 1.1022145748138428, "learning_rate": 2.9800728853768036e-05, "loss": 0.1342, "step": 4539 }, { "epoch": 0.08040129810906418, "grad_norm": 2.504387140274048, "learning_rate": 2.9800589055250493e-05, "loss": 0.174, "step": 4540 }, { "epoch": 0.08041900764609261, "grad_norm": 1.2049473524093628, "learning_rate": 2.980044920804051e-05, "loss": 0.1104, "step": 4541 }, { "epoch": 0.08043671718312104, "grad_norm": 1.3097648620605469, "learning_rate": 2.9800309312138537e-05, "loss": 0.1786, "step": 4542 }, { "epoch": 0.08045442672014946, "grad_norm": 1.3579922914505005, "learning_rate": 2.9800169367545053e-05, "loss": 0.1469, "step": 4543 }, { "epoch": 0.0804721362571779, "grad_norm": 1.4836103916168213, "learning_rate": 2.9800029374260503e-05, "loss": 0.1375, "step": 4544 }, { "epoch": 0.08048984579420633, "grad_norm": 1.8375998735427856, "learning_rate": 2.9799889332285356e-05, "loss": 0.1584, "step": 4545 }, { "epoch": 0.08050755533123476, "grad_norm": 1.488490343093872, "learning_rate": 2.9799749241620068e-05, "loss": 0.133, "step": 4546 }, { "epoch": 0.08052526486826318, "grad_norm": 1.2744755744934082, "learning_rate": 2.9799609102265106e-05, "loss": 0.1209, "step": 4547 }, { "epoch": 0.08054297440529161, "grad_norm": 1.1142510175704956, "learning_rate": 2.9799468914220923e-05, "loss": 0.126, "step": 4548 }, { "epoch": 0.08056068394232004, "grad_norm": 2.1825361251831055, "learning_rate": 2.9799328677487984e-05, "loss": 0.1598, "step": 4549 }, { "epoch": 0.08057839347934846, "grad_norm": 1.2975261211395264, "learning_rate": 2.9799188392066755e-05, "loss": 0.1123, "step": 4550 }, { "epoch": 0.08059610301637689, "grad_norm": 1.682287573814392, "learning_rate": 2.979904805795769e-05, "loss": 0.1976, "step": 4551 }, { "epoch": 0.08061381255340533, "grad_norm": 2.1909995079040527, "learning_rate": 2.9798907675161254e-05, "loss": 0.1716, "step": 4552 }, { "epoch": 0.08063152209043375, "grad_norm": 1.3775750398635864, "learning_rate": 2.9798767243677907e-05, "loss": 0.125, "step": 4553 }, { "epoch": 0.08064923162746218, "grad_norm": 1.268512487411499, "learning_rate": 2.9798626763508117e-05, "loss": 0.1161, "step": 4554 }, { "epoch": 0.08066694116449061, "grad_norm": 1.6169897317886353, "learning_rate": 2.979848623465234e-05, "loss": 0.1757, "step": 4555 }, { "epoch": 0.08068465070151903, "grad_norm": 1.1236519813537598, "learning_rate": 2.9798345657111036e-05, "loss": 0.1447, "step": 4556 }, { "epoch": 0.08070236023854746, "grad_norm": 1.479781985282898, "learning_rate": 2.9798205030884674e-05, "loss": 0.1626, "step": 4557 }, { "epoch": 0.08072006977557589, "grad_norm": 1.619450569152832, "learning_rate": 2.9798064355973714e-05, "loss": 0.1766, "step": 4558 }, { "epoch": 0.08073777931260433, "grad_norm": 1.4052006006240845, "learning_rate": 2.979792363237862e-05, "loss": 0.1048, "step": 4559 }, { "epoch": 0.08075548884963275, "grad_norm": 0.819770336151123, "learning_rate": 2.9797782860099855e-05, "loss": 0.1033, "step": 4560 }, { "epoch": 0.08077319838666118, "grad_norm": 1.0018223524093628, "learning_rate": 2.9797642039137878e-05, "loss": 0.122, "step": 4561 }, { "epoch": 0.0807909079236896, "grad_norm": 1.3367795944213867, "learning_rate": 2.9797501169493156e-05, "loss": 0.1482, "step": 4562 }, { "epoch": 0.08080861746071803, "grad_norm": 1.4719997644424438, "learning_rate": 2.9797360251166154e-05, "loss": 0.169, "step": 4563 }, { "epoch": 0.08082632699774646, "grad_norm": 1.366531252861023, "learning_rate": 2.9797219284157333e-05, "loss": 0.1891, "step": 4564 }, { "epoch": 0.08084403653477489, "grad_norm": 1.5857157707214355, "learning_rate": 2.9797078268467157e-05, "loss": 0.1442, "step": 4565 }, { "epoch": 0.08086174607180331, "grad_norm": 1.851655125617981, "learning_rate": 2.9796937204096086e-05, "loss": 0.1439, "step": 4566 }, { "epoch": 0.08087945560883175, "grad_norm": 1.1491233110427856, "learning_rate": 2.9796796091044595e-05, "loss": 0.1686, "step": 4567 }, { "epoch": 0.08089716514586018, "grad_norm": 1.2900748252868652, "learning_rate": 2.9796654929313138e-05, "loss": 0.1352, "step": 4568 }, { "epoch": 0.0809148746828886, "grad_norm": 1.3186914920806885, "learning_rate": 2.979651371890218e-05, "loss": 0.1442, "step": 4569 }, { "epoch": 0.08093258421991703, "grad_norm": 0.9980699419975281, "learning_rate": 2.9796372459812196e-05, "loss": 0.1106, "step": 4570 }, { "epoch": 0.08095029375694546, "grad_norm": 1.2740025520324707, "learning_rate": 2.9796231152043636e-05, "loss": 0.1426, "step": 4571 }, { "epoch": 0.08096800329397388, "grad_norm": 1.2341021299362183, "learning_rate": 2.9796089795596974e-05, "loss": 0.1356, "step": 4572 }, { "epoch": 0.08098571283100231, "grad_norm": 1.3976997137069702, "learning_rate": 2.9795948390472677e-05, "loss": 0.1725, "step": 4573 }, { "epoch": 0.08100342236803075, "grad_norm": 1.630624771118164, "learning_rate": 2.97958069366712e-05, "loss": 0.1704, "step": 4574 }, { "epoch": 0.08102113190505918, "grad_norm": 1.6384645700454712, "learning_rate": 2.9795665434193017e-05, "loss": 0.1304, "step": 4575 }, { "epoch": 0.0810388414420876, "grad_norm": 1.5036276578903198, "learning_rate": 2.9795523883038592e-05, "loss": 0.1549, "step": 4576 }, { "epoch": 0.08105655097911603, "grad_norm": 1.4868899583816528, "learning_rate": 2.9795382283208393e-05, "loss": 0.1143, "step": 4577 }, { "epoch": 0.08107426051614446, "grad_norm": 1.6656298637390137, "learning_rate": 2.979524063470288e-05, "loss": 0.1752, "step": 4578 }, { "epoch": 0.08109197005317288, "grad_norm": 1.5955042839050293, "learning_rate": 2.979509893752252e-05, "loss": 0.1658, "step": 4579 }, { "epoch": 0.08110967959020131, "grad_norm": 1.1672818660736084, "learning_rate": 2.9794957191667785e-05, "loss": 0.1439, "step": 4580 }, { "epoch": 0.08112738912722973, "grad_norm": 1.3559383153915405, "learning_rate": 2.979481539713913e-05, "loss": 0.1374, "step": 4581 }, { "epoch": 0.08114509866425818, "grad_norm": 1.7171685695648193, "learning_rate": 2.9794673553937034e-05, "loss": 0.1243, "step": 4582 }, { "epoch": 0.0811628082012866, "grad_norm": 2.3786745071411133, "learning_rate": 2.979453166206196e-05, "loss": 0.1706, "step": 4583 }, { "epoch": 0.08118051773831503, "grad_norm": 1.811571478843689, "learning_rate": 2.979438972151437e-05, "loss": 0.1583, "step": 4584 }, { "epoch": 0.08119822727534345, "grad_norm": 1.2100051641464233, "learning_rate": 2.9794247732294737e-05, "loss": 0.1484, "step": 4585 }, { "epoch": 0.08121593681237188, "grad_norm": 1.0574710369110107, "learning_rate": 2.9794105694403526e-05, "loss": 0.1684, "step": 4586 }, { "epoch": 0.0812336463494003, "grad_norm": 1.4505850076675415, "learning_rate": 2.97939636078412e-05, "loss": 0.1752, "step": 4587 }, { "epoch": 0.08125135588642873, "grad_norm": 1.4520293474197388, "learning_rate": 2.979382147260823e-05, "loss": 0.1763, "step": 4588 }, { "epoch": 0.08126906542345717, "grad_norm": 1.215578556060791, "learning_rate": 2.9793679288705086e-05, "loss": 0.1629, "step": 4589 }, { "epoch": 0.0812867749604856, "grad_norm": 1.605475902557373, "learning_rate": 2.9793537056132233e-05, "loss": 0.1676, "step": 4590 }, { "epoch": 0.08130448449751403, "grad_norm": 0.9726895093917847, "learning_rate": 2.9793394774890137e-05, "loss": 0.0982, "step": 4591 }, { "epoch": 0.08132219403454245, "grad_norm": 2.369328498840332, "learning_rate": 2.9793252444979272e-05, "loss": 0.1476, "step": 4592 }, { "epoch": 0.08133990357157088, "grad_norm": 1.5578402280807495, "learning_rate": 2.9793110066400097e-05, "loss": 0.1474, "step": 4593 }, { "epoch": 0.0813576131085993, "grad_norm": 1.6281442642211914, "learning_rate": 2.9792967639153095e-05, "loss": 0.167, "step": 4594 }, { "epoch": 0.08137532264562773, "grad_norm": 1.4498217105865479, "learning_rate": 2.9792825163238718e-05, "loss": 0.1468, "step": 4595 }, { "epoch": 0.08139303218265616, "grad_norm": 1.6871198415756226, "learning_rate": 2.9792682638657446e-05, "loss": 0.1769, "step": 4596 }, { "epoch": 0.0814107417196846, "grad_norm": 1.436721682548523, "learning_rate": 2.9792540065409743e-05, "loss": 0.1559, "step": 4597 }, { "epoch": 0.08142845125671302, "grad_norm": 1.531131386756897, "learning_rate": 2.9792397443496078e-05, "loss": 0.1499, "step": 4598 }, { "epoch": 0.08144616079374145, "grad_norm": 1.0948480367660522, "learning_rate": 2.9792254772916928e-05, "loss": 0.0977, "step": 4599 }, { "epoch": 0.08146387033076988, "grad_norm": 1.9703911542892456, "learning_rate": 2.9792112053672746e-05, "loss": 0.1452, "step": 4600 }, { "epoch": 0.0814815798677983, "grad_norm": 2.103104591369629, "learning_rate": 2.9791969285764018e-05, "loss": 0.1317, "step": 4601 }, { "epoch": 0.08149928940482673, "grad_norm": 1.674586534500122, "learning_rate": 2.9791826469191207e-05, "loss": 0.131, "step": 4602 }, { "epoch": 0.08151699894185516, "grad_norm": 2.2706825733184814, "learning_rate": 2.979168360395478e-05, "loss": 0.1244, "step": 4603 }, { "epoch": 0.0815347084788836, "grad_norm": 1.197023868560791, "learning_rate": 2.979154069005521e-05, "loss": 0.1068, "step": 4604 }, { "epoch": 0.08155241801591202, "grad_norm": 1.6068871021270752, "learning_rate": 2.9791397727492976e-05, "loss": 0.2245, "step": 4605 }, { "epoch": 0.08157012755294045, "grad_norm": 2.0562734603881836, "learning_rate": 2.9791254716268527e-05, "loss": 0.1822, "step": 4606 }, { "epoch": 0.08158783708996888, "grad_norm": 1.95699143409729, "learning_rate": 2.979111165638236e-05, "loss": 0.2061, "step": 4607 }, { "epoch": 0.0816055466269973, "grad_norm": 1.710261344909668, "learning_rate": 2.979096854783492e-05, "loss": 0.1333, "step": 4608 }, { "epoch": 0.08162325616402573, "grad_norm": 1.4727041721343994, "learning_rate": 2.979082539062669e-05, "loss": 0.1422, "step": 4609 }, { "epoch": 0.08164096570105416, "grad_norm": 1.0322155952453613, "learning_rate": 2.9790682184758147e-05, "loss": 0.1651, "step": 4610 }, { "epoch": 0.0816586752380826, "grad_norm": 1.0437774658203125, "learning_rate": 2.9790538930229746e-05, "loss": 0.1503, "step": 4611 }, { "epoch": 0.08167638477511102, "grad_norm": 1.7283411026000977, "learning_rate": 2.979039562704198e-05, "loss": 0.1256, "step": 4612 }, { "epoch": 0.08169409431213945, "grad_norm": 2.288179874420166, "learning_rate": 2.97902522751953e-05, "loss": 0.1753, "step": 4613 }, { "epoch": 0.08171180384916787, "grad_norm": 1.151073932647705, "learning_rate": 2.979010887469019e-05, "loss": 0.1413, "step": 4614 }, { "epoch": 0.0817295133861963, "grad_norm": 1.839065670967102, "learning_rate": 2.978996542552711e-05, "loss": 0.1553, "step": 4615 }, { "epoch": 0.08174722292322473, "grad_norm": 1.4475700855255127, "learning_rate": 2.978982192770655e-05, "loss": 0.1526, "step": 4616 }, { "epoch": 0.08176493246025315, "grad_norm": 1.2926872968673706, "learning_rate": 2.9789678381228967e-05, "loss": 0.1881, "step": 4617 }, { "epoch": 0.08178264199728158, "grad_norm": 2.1849377155303955, "learning_rate": 2.978953478609484e-05, "loss": 0.1974, "step": 4618 }, { "epoch": 0.08180035153431002, "grad_norm": 1.7919079065322876, "learning_rate": 2.9789391142304637e-05, "loss": 0.1831, "step": 4619 }, { "epoch": 0.08181806107133845, "grad_norm": 1.2898114919662476, "learning_rate": 2.9789247449858834e-05, "loss": 0.1277, "step": 4620 }, { "epoch": 0.08183577060836687, "grad_norm": 1.3095659017562866, "learning_rate": 2.97891037087579e-05, "loss": 0.1492, "step": 4621 }, { "epoch": 0.0818534801453953, "grad_norm": 1.0600076913833618, "learning_rate": 2.9788959919002315e-05, "loss": 0.1832, "step": 4622 }, { "epoch": 0.08187118968242373, "grad_norm": 1.8929758071899414, "learning_rate": 2.9788816080592548e-05, "loss": 0.1665, "step": 4623 }, { "epoch": 0.08188889921945215, "grad_norm": 2.0444626808166504, "learning_rate": 2.9788672193529066e-05, "loss": 0.1373, "step": 4624 }, { "epoch": 0.08190660875648058, "grad_norm": 1.4777555465698242, "learning_rate": 2.978852825781235e-05, "loss": 0.1481, "step": 4625 }, { "epoch": 0.08192431829350902, "grad_norm": 1.5202863216400146, "learning_rate": 2.9788384273442876e-05, "loss": 0.1765, "step": 4626 }, { "epoch": 0.08194202783053744, "grad_norm": 1.4453792572021484, "learning_rate": 2.9788240240421113e-05, "loss": 0.1536, "step": 4627 }, { "epoch": 0.08195973736756587, "grad_norm": 2.277614116668701, "learning_rate": 2.9788096158747533e-05, "loss": 0.2079, "step": 4628 }, { "epoch": 0.0819774469045943, "grad_norm": 1.694272756576538, "learning_rate": 2.9787952028422613e-05, "loss": 0.2036, "step": 4629 }, { "epoch": 0.08199515644162272, "grad_norm": 1.5334781408309937, "learning_rate": 2.9787807849446824e-05, "loss": 0.1709, "step": 4630 }, { "epoch": 0.08201286597865115, "grad_norm": 1.4594392776489258, "learning_rate": 2.9787663621820647e-05, "loss": 0.1862, "step": 4631 }, { "epoch": 0.08203057551567958, "grad_norm": 1.6704955101013184, "learning_rate": 2.978751934554455e-05, "loss": 0.1766, "step": 4632 }, { "epoch": 0.082048285052708, "grad_norm": 1.5582726001739502, "learning_rate": 2.978737502061901e-05, "loss": 0.1434, "step": 4633 }, { "epoch": 0.08206599458973644, "grad_norm": 1.349892497062683, "learning_rate": 2.9787230647044505e-05, "loss": 0.1818, "step": 4634 }, { "epoch": 0.08208370412676487, "grad_norm": 0.9237607717514038, "learning_rate": 2.9787086224821504e-05, "loss": 0.148, "step": 4635 }, { "epoch": 0.0821014136637933, "grad_norm": 1.0196659564971924, "learning_rate": 2.9786941753950487e-05, "loss": 0.1186, "step": 4636 }, { "epoch": 0.08211912320082172, "grad_norm": 1.9515022039413452, "learning_rate": 2.9786797234431922e-05, "loss": 0.1695, "step": 4637 }, { "epoch": 0.08213683273785015, "grad_norm": 1.2245428562164307, "learning_rate": 2.9786652666266297e-05, "loss": 0.1576, "step": 4638 }, { "epoch": 0.08215454227487858, "grad_norm": 1.2892403602600098, "learning_rate": 2.9786508049454075e-05, "loss": 0.1505, "step": 4639 }, { "epoch": 0.082172251811907, "grad_norm": 1.3417315483093262, "learning_rate": 2.9786363383995738e-05, "loss": 0.1576, "step": 4640 }, { "epoch": 0.08218996134893544, "grad_norm": 1.2663077116012573, "learning_rate": 2.978621866989176e-05, "loss": 0.1449, "step": 4641 }, { "epoch": 0.08220767088596387, "grad_norm": 1.3649587631225586, "learning_rate": 2.9786073907142622e-05, "loss": 0.1762, "step": 4642 }, { "epoch": 0.0822253804229923, "grad_norm": 1.6051255464553833, "learning_rate": 2.9785929095748796e-05, "loss": 0.1544, "step": 4643 }, { "epoch": 0.08224308996002072, "grad_norm": 1.6772589683532715, "learning_rate": 2.9785784235710755e-05, "loss": 0.1499, "step": 4644 }, { "epoch": 0.08226079949704915, "grad_norm": 1.705193042755127, "learning_rate": 2.978563932702898e-05, "loss": 0.1528, "step": 4645 }, { "epoch": 0.08227850903407757, "grad_norm": 1.2876625061035156, "learning_rate": 2.978549436970395e-05, "loss": 0.1367, "step": 4646 }, { "epoch": 0.082296218571106, "grad_norm": 1.5693352222442627, "learning_rate": 2.978534936373614e-05, "loss": 0.1587, "step": 4647 }, { "epoch": 0.08231392810813443, "grad_norm": 1.4331368207931519, "learning_rate": 2.978520430912602e-05, "loss": 0.1917, "step": 4648 }, { "epoch": 0.08233163764516287, "grad_norm": 1.2998120784759521, "learning_rate": 2.9785059205874077e-05, "loss": 0.16, "step": 4649 }, { "epoch": 0.0823493471821913, "grad_norm": 1.2073596715927124, "learning_rate": 2.978491405398078e-05, "loss": 0.1338, "step": 4650 }, { "epoch": 0.08236705671921972, "grad_norm": 1.9136923551559448, "learning_rate": 2.978476885344662e-05, "loss": 0.1527, "step": 4651 }, { "epoch": 0.08238476625624815, "grad_norm": 1.7400517463684082, "learning_rate": 2.9784623604272063e-05, "loss": 0.1728, "step": 4652 }, { "epoch": 0.08240247579327657, "grad_norm": 1.6279773712158203, "learning_rate": 2.9784478306457587e-05, "loss": 0.1458, "step": 4653 }, { "epoch": 0.082420185330305, "grad_norm": 1.5830943584442139, "learning_rate": 2.9784332960003672e-05, "loss": 0.1331, "step": 4654 }, { "epoch": 0.08243789486733342, "grad_norm": 1.053288459777832, "learning_rate": 2.9784187564910802e-05, "loss": 0.1722, "step": 4655 }, { "epoch": 0.08245560440436187, "grad_norm": 1.1117639541625977, "learning_rate": 2.9784042121179447e-05, "loss": 0.1533, "step": 4656 }, { "epoch": 0.08247331394139029, "grad_norm": 1.867409110069275, "learning_rate": 2.9783896628810088e-05, "loss": 0.1543, "step": 4657 }, { "epoch": 0.08249102347841872, "grad_norm": 1.5882530212402344, "learning_rate": 2.9783751087803205e-05, "loss": 0.1795, "step": 4658 }, { "epoch": 0.08250873301544714, "grad_norm": 1.4672725200653076, "learning_rate": 2.9783605498159278e-05, "loss": 0.1275, "step": 4659 }, { "epoch": 0.08252644255247557, "grad_norm": 1.5949950218200684, "learning_rate": 2.978345985987878e-05, "loss": 0.1441, "step": 4660 }, { "epoch": 0.082544152089504, "grad_norm": 1.5610597133636475, "learning_rate": 2.9783314172962198e-05, "loss": 0.1229, "step": 4661 }, { "epoch": 0.08256186162653242, "grad_norm": 1.2918788194656372, "learning_rate": 2.9783168437410005e-05, "loss": 0.1619, "step": 4662 }, { "epoch": 0.08257957116356085, "grad_norm": 1.3504999876022339, "learning_rate": 2.978302265322268e-05, "loss": 0.137, "step": 4663 }, { "epoch": 0.08259728070058929, "grad_norm": 1.1092140674591064, "learning_rate": 2.9782876820400714e-05, "loss": 0.1157, "step": 4664 }, { "epoch": 0.08261499023761772, "grad_norm": 1.030257225036621, "learning_rate": 2.9782730938944574e-05, "loss": 0.1628, "step": 4665 }, { "epoch": 0.08263269977464614, "grad_norm": 1.5393600463867188, "learning_rate": 2.9782585008854748e-05, "loss": 0.1698, "step": 4666 }, { "epoch": 0.08265040931167457, "grad_norm": 1.0821895599365234, "learning_rate": 2.9782439030131708e-05, "loss": 0.1487, "step": 4667 }, { "epoch": 0.082668118848703, "grad_norm": 1.4656466245651245, "learning_rate": 2.978229300277594e-05, "loss": 0.1279, "step": 4668 }, { "epoch": 0.08268582838573142, "grad_norm": 1.1587618589401245, "learning_rate": 2.9782146926787923e-05, "loss": 0.1217, "step": 4669 }, { "epoch": 0.08270353792275985, "grad_norm": 1.3227710723876953, "learning_rate": 2.9782000802168137e-05, "loss": 0.1676, "step": 4670 }, { "epoch": 0.08272124745978829, "grad_norm": 1.1615744829177856, "learning_rate": 2.9781854628917065e-05, "loss": 0.1701, "step": 4671 }, { "epoch": 0.08273895699681671, "grad_norm": 1.7628282308578491, "learning_rate": 2.9781708407035186e-05, "loss": 0.2044, "step": 4672 }, { "epoch": 0.08275666653384514, "grad_norm": 1.4608421325683594, "learning_rate": 2.978156213652298e-05, "loss": 0.1393, "step": 4673 }, { "epoch": 0.08277437607087357, "grad_norm": 1.122475028038025, "learning_rate": 2.9781415817380933e-05, "loss": 0.1571, "step": 4674 }, { "epoch": 0.082792085607902, "grad_norm": 1.4050365686416626, "learning_rate": 2.9781269449609524e-05, "loss": 0.1876, "step": 4675 }, { "epoch": 0.08280979514493042, "grad_norm": 1.4849883317947388, "learning_rate": 2.978112303320923e-05, "loss": 0.1523, "step": 4676 }, { "epoch": 0.08282750468195885, "grad_norm": 0.9689475297927856, "learning_rate": 2.9780976568180532e-05, "loss": 0.1307, "step": 4677 }, { "epoch": 0.08284521421898729, "grad_norm": 0.7894468307495117, "learning_rate": 2.9780830054523927e-05, "loss": 0.0979, "step": 4678 }, { "epoch": 0.08286292375601571, "grad_norm": 1.2397897243499756, "learning_rate": 2.9780683492239876e-05, "loss": 0.104, "step": 4679 }, { "epoch": 0.08288063329304414, "grad_norm": 1.8270034790039062, "learning_rate": 2.978053688132888e-05, "loss": 0.1539, "step": 4680 }, { "epoch": 0.08289834283007257, "grad_norm": 1.2160606384277344, "learning_rate": 2.9780390221791406e-05, "loss": 0.1014, "step": 4681 }, { "epoch": 0.08291605236710099, "grad_norm": 1.3285645246505737, "learning_rate": 2.9780243513627942e-05, "loss": 0.1503, "step": 4682 }, { "epoch": 0.08293376190412942, "grad_norm": 1.4298288822174072, "learning_rate": 2.9780096756838977e-05, "loss": 0.1676, "step": 4683 }, { "epoch": 0.08295147144115785, "grad_norm": 1.3056963682174683, "learning_rate": 2.9779949951424987e-05, "loss": 0.1421, "step": 4684 }, { "epoch": 0.08296918097818627, "grad_norm": 1.1603336334228516, "learning_rate": 2.9779803097386455e-05, "loss": 0.1867, "step": 4685 }, { "epoch": 0.08298689051521471, "grad_norm": 1.4972282648086548, "learning_rate": 2.9779656194723867e-05, "loss": 0.167, "step": 4686 }, { "epoch": 0.08300460005224314, "grad_norm": 1.2699588537216187, "learning_rate": 2.9779509243437702e-05, "loss": 0.1427, "step": 4687 }, { "epoch": 0.08302230958927156, "grad_norm": 1.1267400979995728, "learning_rate": 2.977936224352845e-05, "loss": 0.1174, "step": 4688 }, { "epoch": 0.08304001912629999, "grad_norm": 1.4155040979385376, "learning_rate": 2.9779215194996595e-05, "loss": 0.1755, "step": 4689 }, { "epoch": 0.08305772866332842, "grad_norm": 1.4144575595855713, "learning_rate": 2.977906809784261e-05, "loss": 0.1402, "step": 4690 }, { "epoch": 0.08307543820035684, "grad_norm": 1.020687222480774, "learning_rate": 2.9778920952066986e-05, "loss": 0.1298, "step": 4691 }, { "epoch": 0.08309314773738527, "grad_norm": 1.9361317157745361, "learning_rate": 2.977877375767021e-05, "loss": 0.1105, "step": 4692 }, { "epoch": 0.08311085727441371, "grad_norm": 1.448896884918213, "learning_rate": 2.977862651465276e-05, "loss": 0.1168, "step": 4693 }, { "epoch": 0.08312856681144214, "grad_norm": 1.2475547790527344, "learning_rate": 2.9778479223015127e-05, "loss": 0.1904, "step": 4694 }, { "epoch": 0.08314627634847056, "grad_norm": 1.2624273300170898, "learning_rate": 2.977833188275779e-05, "loss": 0.1372, "step": 4695 }, { "epoch": 0.08316398588549899, "grad_norm": 1.36884605884552, "learning_rate": 2.9778184493881237e-05, "loss": 0.1198, "step": 4696 }, { "epoch": 0.08318169542252742, "grad_norm": 1.576668381690979, "learning_rate": 2.9778037056385953e-05, "loss": 0.2336, "step": 4697 }, { "epoch": 0.08319940495955584, "grad_norm": 1.1548799276351929, "learning_rate": 2.9777889570272418e-05, "loss": 0.1393, "step": 4698 }, { "epoch": 0.08321711449658427, "grad_norm": 1.3032816648483276, "learning_rate": 2.9777742035541126e-05, "loss": 0.115, "step": 4699 }, { "epoch": 0.0832348240336127, "grad_norm": 1.367472767829895, "learning_rate": 2.9777594452192548e-05, "loss": 0.1272, "step": 4700 }, { "epoch": 0.08325253357064114, "grad_norm": 1.567257046699524, "learning_rate": 2.977744682022719e-05, "loss": 0.1312, "step": 4701 }, { "epoch": 0.08327024310766956, "grad_norm": 1.3967825174331665, "learning_rate": 2.977729913964552e-05, "loss": 0.1592, "step": 4702 }, { "epoch": 0.08328795264469799, "grad_norm": 1.1298807859420776, "learning_rate": 2.9777151410448032e-05, "loss": 0.1251, "step": 4703 }, { "epoch": 0.08330566218172641, "grad_norm": 1.3186626434326172, "learning_rate": 2.977700363263521e-05, "loss": 0.1487, "step": 4704 }, { "epoch": 0.08332337171875484, "grad_norm": 1.3841062784194946, "learning_rate": 2.9776855806207538e-05, "loss": 0.176, "step": 4705 }, { "epoch": 0.08334108125578327, "grad_norm": 1.248949646949768, "learning_rate": 2.9776707931165513e-05, "loss": 0.1322, "step": 4706 }, { "epoch": 0.0833587907928117, "grad_norm": 1.2155510187149048, "learning_rate": 2.9776560007509607e-05, "loss": 0.1359, "step": 4707 }, { "epoch": 0.08337650032984013, "grad_norm": 1.0953537225723267, "learning_rate": 2.9776412035240316e-05, "loss": 0.0961, "step": 4708 }, { "epoch": 0.08339420986686856, "grad_norm": 1.4586433172225952, "learning_rate": 2.977626401435812e-05, "loss": 0.1386, "step": 4709 }, { "epoch": 0.08341191940389699, "grad_norm": 1.2030842304229736, "learning_rate": 2.9776115944863517e-05, "loss": 0.1052, "step": 4710 }, { "epoch": 0.08342962894092541, "grad_norm": 1.2358686923980713, "learning_rate": 2.977596782675698e-05, "loss": 0.102, "step": 4711 }, { "epoch": 0.08344733847795384, "grad_norm": 1.416182279586792, "learning_rate": 2.977581966003901e-05, "loss": 0.1465, "step": 4712 }, { "epoch": 0.08346504801498227, "grad_norm": 1.1469497680664062, "learning_rate": 2.9775671444710083e-05, "loss": 0.1082, "step": 4713 }, { "epoch": 0.08348275755201069, "grad_norm": 1.3233379125595093, "learning_rate": 2.977552318077069e-05, "loss": 0.1333, "step": 4714 }, { "epoch": 0.08350046708903912, "grad_norm": 1.6770099401474, "learning_rate": 2.9775374868221322e-05, "loss": 0.1618, "step": 4715 }, { "epoch": 0.08351817662606756, "grad_norm": 1.3085023164749146, "learning_rate": 2.9775226507062464e-05, "loss": 0.1801, "step": 4716 }, { "epoch": 0.08353588616309598, "grad_norm": 1.1101570129394531, "learning_rate": 2.977507809729461e-05, "loss": 0.1267, "step": 4717 }, { "epoch": 0.08355359570012441, "grad_norm": 2.0449678897857666, "learning_rate": 2.977492963891824e-05, "loss": 0.2045, "step": 4718 }, { "epoch": 0.08357130523715284, "grad_norm": 1.4133453369140625, "learning_rate": 2.9774781131933848e-05, "loss": 0.1381, "step": 4719 }, { "epoch": 0.08358901477418126, "grad_norm": 1.3357789516448975, "learning_rate": 2.9774632576341917e-05, "loss": 0.1142, "step": 4720 }, { "epoch": 0.08360672431120969, "grad_norm": 1.5137441158294678, "learning_rate": 2.977448397214294e-05, "loss": 0.1045, "step": 4721 }, { "epoch": 0.08362443384823812, "grad_norm": 1.621044397354126, "learning_rate": 2.9774335319337408e-05, "loss": 0.1248, "step": 4722 }, { "epoch": 0.08364214338526656, "grad_norm": 1.4624192714691162, "learning_rate": 2.9774186617925804e-05, "loss": 0.1254, "step": 4723 }, { "epoch": 0.08365985292229498, "grad_norm": 1.1842405796051025, "learning_rate": 2.977403786790862e-05, "loss": 0.1652, "step": 4724 }, { "epoch": 0.08367756245932341, "grad_norm": 1.3270052671432495, "learning_rate": 2.977388906928635e-05, "loss": 0.1727, "step": 4725 }, { "epoch": 0.08369527199635184, "grad_norm": 1.0202562808990479, "learning_rate": 2.9773740222059477e-05, "loss": 0.1174, "step": 4726 }, { "epoch": 0.08371298153338026, "grad_norm": 0.9299101829528809, "learning_rate": 2.9773591326228493e-05, "loss": 0.1297, "step": 4727 }, { "epoch": 0.08373069107040869, "grad_norm": 1.8994250297546387, "learning_rate": 2.9773442381793887e-05, "loss": 0.1468, "step": 4728 }, { "epoch": 0.08374840060743712, "grad_norm": 1.3835431337356567, "learning_rate": 2.9773293388756148e-05, "loss": 0.1557, "step": 4729 }, { "epoch": 0.08376611014446554, "grad_norm": 1.8637361526489258, "learning_rate": 2.9773144347115774e-05, "loss": 0.1161, "step": 4730 }, { "epoch": 0.08378381968149398, "grad_norm": 1.3858540058135986, "learning_rate": 2.9772995256873242e-05, "loss": 0.1783, "step": 4731 }, { "epoch": 0.08380152921852241, "grad_norm": 1.427344560623169, "learning_rate": 2.9772846118029054e-05, "loss": 0.1643, "step": 4732 }, { "epoch": 0.08381923875555083, "grad_norm": 1.6180020570755005, "learning_rate": 2.9772696930583697e-05, "loss": 0.1849, "step": 4733 }, { "epoch": 0.08383694829257926, "grad_norm": 1.5249639749526978, "learning_rate": 2.9772547694537654e-05, "loss": 0.174, "step": 4734 }, { "epoch": 0.08385465782960769, "grad_norm": 1.4294730424880981, "learning_rate": 2.977239840989143e-05, "loss": 0.1025, "step": 4735 }, { "epoch": 0.08387236736663611, "grad_norm": 1.4588106870651245, "learning_rate": 2.977224907664551e-05, "loss": 0.1193, "step": 4736 }, { "epoch": 0.08389007690366454, "grad_norm": 2.0447704792022705, "learning_rate": 2.9772099694800384e-05, "loss": 0.1617, "step": 4737 }, { "epoch": 0.08390778644069298, "grad_norm": 1.4775938987731934, "learning_rate": 2.9771950264356542e-05, "loss": 0.1317, "step": 4738 }, { "epoch": 0.0839254959777214, "grad_norm": 1.081043004989624, "learning_rate": 2.977180078531448e-05, "loss": 0.1427, "step": 4739 }, { "epoch": 0.08394320551474983, "grad_norm": 0.9599761366844177, "learning_rate": 2.9771651257674688e-05, "loss": 0.1137, "step": 4740 }, { "epoch": 0.08396091505177826, "grad_norm": 1.4426449537277222, "learning_rate": 2.9771501681437654e-05, "loss": 0.1422, "step": 4741 }, { "epoch": 0.08397862458880669, "grad_norm": 0.8710267543792725, "learning_rate": 2.9771352056603874e-05, "loss": 0.1366, "step": 4742 }, { "epoch": 0.08399633412583511, "grad_norm": 1.4370077848434448, "learning_rate": 2.9771202383173845e-05, "loss": 0.1898, "step": 4743 }, { "epoch": 0.08401404366286354, "grad_norm": 1.6732206344604492, "learning_rate": 2.977105266114805e-05, "loss": 0.151, "step": 4744 }, { "epoch": 0.08403175319989198, "grad_norm": 1.204633355140686, "learning_rate": 2.9770902890526983e-05, "loss": 0.1269, "step": 4745 }, { "epoch": 0.0840494627369204, "grad_norm": 1.5398699045181274, "learning_rate": 2.9770753071311142e-05, "loss": 0.1261, "step": 4746 }, { "epoch": 0.08406717227394883, "grad_norm": 1.392634630203247, "learning_rate": 2.977060320350102e-05, "loss": 0.1487, "step": 4747 }, { "epoch": 0.08408488181097726, "grad_norm": 0.8764210939407349, "learning_rate": 2.9770453287097105e-05, "loss": 0.1144, "step": 4748 }, { "epoch": 0.08410259134800568, "grad_norm": 1.4248465299606323, "learning_rate": 2.9770303322099894e-05, "loss": 0.137, "step": 4749 }, { "epoch": 0.08412030088503411, "grad_norm": 1.6258686780929565, "learning_rate": 2.977015330850988e-05, "loss": 0.1651, "step": 4750 }, { "epoch": 0.08413801042206254, "grad_norm": 1.2422560453414917, "learning_rate": 2.977000324632755e-05, "loss": 0.161, "step": 4751 }, { "epoch": 0.08415571995909096, "grad_norm": 0.9540186524391174, "learning_rate": 2.976985313555341e-05, "loss": 0.1377, "step": 4752 }, { "epoch": 0.0841734294961194, "grad_norm": 1.0795974731445312, "learning_rate": 2.976970297618794e-05, "loss": 0.1272, "step": 4753 }, { "epoch": 0.08419113903314783, "grad_norm": 1.3368861675262451, "learning_rate": 2.976955276823165e-05, "loss": 0.1235, "step": 4754 }, { "epoch": 0.08420884857017626, "grad_norm": 2.152916431427002, "learning_rate": 2.9769402511685016e-05, "loss": 0.2096, "step": 4755 }, { "epoch": 0.08422655810720468, "grad_norm": 1.3131227493286133, "learning_rate": 2.9769252206548546e-05, "loss": 0.1473, "step": 4756 }, { "epoch": 0.08424426764423311, "grad_norm": 2.0039193630218506, "learning_rate": 2.9769101852822728e-05, "loss": 0.1649, "step": 4757 }, { "epoch": 0.08426197718126154, "grad_norm": 1.7544809579849243, "learning_rate": 2.9768951450508065e-05, "loss": 0.1489, "step": 4758 }, { "epoch": 0.08427968671828996, "grad_norm": 1.0720561742782593, "learning_rate": 2.9768800999605037e-05, "loss": 0.155, "step": 4759 }, { "epoch": 0.0842973962553184, "grad_norm": 1.066842794418335, "learning_rate": 2.9768650500114153e-05, "loss": 0.1391, "step": 4760 }, { "epoch": 0.08431510579234683, "grad_norm": 1.5099124908447266, "learning_rate": 2.97684999520359e-05, "loss": 0.1112, "step": 4761 }, { "epoch": 0.08433281532937525, "grad_norm": 2.062776565551758, "learning_rate": 2.9768349355370777e-05, "loss": 0.1406, "step": 4762 }, { "epoch": 0.08435052486640368, "grad_norm": 0.7240650653839111, "learning_rate": 2.976819871011928e-05, "loss": 0.1175, "step": 4763 }, { "epoch": 0.08436823440343211, "grad_norm": 1.5837790966033936, "learning_rate": 2.97680480162819e-05, "loss": 0.1481, "step": 4764 }, { "epoch": 0.08438594394046053, "grad_norm": 1.21147882938385, "learning_rate": 2.976789727385914e-05, "loss": 0.1013, "step": 4765 }, { "epoch": 0.08440365347748896, "grad_norm": 1.168107509613037, "learning_rate": 2.9767746482851486e-05, "loss": 0.1663, "step": 4766 }, { "epoch": 0.08442136301451739, "grad_norm": 1.184245228767395, "learning_rate": 2.9767595643259442e-05, "loss": 0.1697, "step": 4767 }, { "epoch": 0.08443907255154583, "grad_norm": 1.622154951095581, "learning_rate": 2.9767444755083502e-05, "loss": 0.1762, "step": 4768 }, { "epoch": 0.08445678208857425, "grad_norm": 1.7476568222045898, "learning_rate": 2.9767293818324164e-05, "loss": 0.1554, "step": 4769 }, { "epoch": 0.08447449162560268, "grad_norm": 1.9543516635894775, "learning_rate": 2.976714283298192e-05, "loss": 0.1346, "step": 4770 }, { "epoch": 0.0844922011626311, "grad_norm": 1.2825788259506226, "learning_rate": 2.9766991799057276e-05, "loss": 0.1096, "step": 4771 }, { "epoch": 0.08450991069965953, "grad_norm": 1.5058093070983887, "learning_rate": 2.9766840716550717e-05, "loss": 0.1412, "step": 4772 }, { "epoch": 0.08452762023668796, "grad_norm": 1.3113394975662231, "learning_rate": 2.9766689585462747e-05, "loss": 0.1301, "step": 4773 }, { "epoch": 0.08454532977371639, "grad_norm": 1.5132302045822144, "learning_rate": 2.976653840579386e-05, "loss": 0.1528, "step": 4774 }, { "epoch": 0.08456303931074483, "grad_norm": 2.83158016204834, "learning_rate": 2.9766387177544555e-05, "loss": 0.1519, "step": 4775 }, { "epoch": 0.08458074884777325, "grad_norm": 1.6609164476394653, "learning_rate": 2.9766235900715333e-05, "loss": 0.1588, "step": 4776 }, { "epoch": 0.08459845838480168, "grad_norm": 1.2700467109680176, "learning_rate": 2.9766084575306684e-05, "loss": 0.0852, "step": 4777 }, { "epoch": 0.0846161679218301, "grad_norm": 1.5712025165557861, "learning_rate": 2.9765933201319114e-05, "loss": 0.1676, "step": 4778 }, { "epoch": 0.08463387745885853, "grad_norm": 2.2944939136505127, "learning_rate": 2.9765781778753117e-05, "loss": 0.1795, "step": 4779 }, { "epoch": 0.08465158699588696, "grad_norm": 2.746366262435913, "learning_rate": 2.9765630307609194e-05, "loss": 0.1822, "step": 4780 }, { "epoch": 0.08466929653291538, "grad_norm": 2.3191449642181396, "learning_rate": 2.9765478787887833e-05, "loss": 0.1231, "step": 4781 }, { "epoch": 0.08468700606994381, "grad_norm": 1.9166851043701172, "learning_rate": 2.9765327219589546e-05, "loss": 0.1433, "step": 4782 }, { "epoch": 0.08470471560697225, "grad_norm": 1.6741658449172974, "learning_rate": 2.9765175602714824e-05, "loss": 0.1213, "step": 4783 }, { "epoch": 0.08472242514400068, "grad_norm": 1.5530341863632202, "learning_rate": 2.976502393726417e-05, "loss": 0.1332, "step": 4784 }, { "epoch": 0.0847401346810291, "grad_norm": 2.0709927082061768, "learning_rate": 2.9764872223238075e-05, "loss": 0.1376, "step": 4785 }, { "epoch": 0.08475784421805753, "grad_norm": 1.0092380046844482, "learning_rate": 2.976472046063705e-05, "loss": 0.1712, "step": 4786 }, { "epoch": 0.08477555375508596, "grad_norm": 1.8187569379806519, "learning_rate": 2.9764568649461586e-05, "loss": 0.1481, "step": 4787 }, { "epoch": 0.08479326329211438, "grad_norm": 1.545764684677124, "learning_rate": 2.9764416789712184e-05, "loss": 0.1501, "step": 4788 }, { "epoch": 0.08481097282914281, "grad_norm": 1.174421787261963, "learning_rate": 2.9764264881389343e-05, "loss": 0.0926, "step": 4789 }, { "epoch": 0.08482868236617125, "grad_norm": 1.0181989669799805, "learning_rate": 2.9764112924493566e-05, "loss": 0.1221, "step": 4790 }, { "epoch": 0.08484639190319967, "grad_norm": 1.7599660158157349, "learning_rate": 2.976396091902535e-05, "loss": 0.1645, "step": 4791 }, { "epoch": 0.0848641014402281, "grad_norm": 1.1429308652877808, "learning_rate": 2.9763808864985194e-05, "loss": 0.1749, "step": 4792 }, { "epoch": 0.08488181097725653, "grad_norm": 1.7352981567382812, "learning_rate": 2.97636567623736e-05, "loss": 0.1372, "step": 4793 }, { "epoch": 0.08489952051428495, "grad_norm": 1.695351243019104, "learning_rate": 2.976350461119107e-05, "loss": 0.1474, "step": 4794 }, { "epoch": 0.08491723005131338, "grad_norm": 1.4749078750610352, "learning_rate": 2.9763352411438105e-05, "loss": 0.1511, "step": 4795 }, { "epoch": 0.0849349395883418, "grad_norm": 1.2060368061065674, "learning_rate": 2.97632001631152e-05, "loss": 0.154, "step": 4796 }, { "epoch": 0.08495264912537023, "grad_norm": 1.7491519451141357, "learning_rate": 2.9763047866222866e-05, "loss": 0.1846, "step": 4797 }, { "epoch": 0.08497035866239867, "grad_norm": 1.570475697517395, "learning_rate": 2.9762895520761594e-05, "loss": 0.1257, "step": 4798 }, { "epoch": 0.0849880681994271, "grad_norm": 1.235515832901001, "learning_rate": 2.9762743126731887e-05, "loss": 0.117, "step": 4799 }, { "epoch": 0.08500577773645553, "grad_norm": 1.2769155502319336, "learning_rate": 2.9762590684134252e-05, "loss": 0.1195, "step": 4800 }, { "epoch": 0.08502348727348395, "grad_norm": 1.1102882623672485, "learning_rate": 2.9762438192969185e-05, "loss": 0.1755, "step": 4801 }, { "epoch": 0.08504119681051238, "grad_norm": 2.0720677375793457, "learning_rate": 2.9762285653237186e-05, "loss": 0.184, "step": 4802 }, { "epoch": 0.0850589063475408, "grad_norm": 1.4432470798492432, "learning_rate": 2.9762133064938765e-05, "loss": 0.1075, "step": 4803 }, { "epoch": 0.08507661588456923, "grad_norm": 1.496438980102539, "learning_rate": 2.9761980428074418e-05, "loss": 0.137, "step": 4804 }, { "epoch": 0.08509432542159767, "grad_norm": 1.8268921375274658, "learning_rate": 2.9761827742644648e-05, "loss": 0.1682, "step": 4805 }, { "epoch": 0.0851120349586261, "grad_norm": 1.2405365705490112, "learning_rate": 2.9761675008649956e-05, "loss": 0.1203, "step": 4806 }, { "epoch": 0.08512974449565452, "grad_norm": 1.2505022287368774, "learning_rate": 2.976152222609085e-05, "loss": 0.1652, "step": 4807 }, { "epoch": 0.08514745403268295, "grad_norm": 1.5859864950180054, "learning_rate": 2.976136939496783e-05, "loss": 0.1577, "step": 4808 }, { "epoch": 0.08516516356971138, "grad_norm": 1.5538277626037598, "learning_rate": 2.976121651528139e-05, "loss": 0.1422, "step": 4809 }, { "epoch": 0.0851828731067398, "grad_norm": 1.0681909322738647, "learning_rate": 2.9761063587032048e-05, "loss": 0.1285, "step": 4810 }, { "epoch": 0.08520058264376823, "grad_norm": 1.1525341272354126, "learning_rate": 2.9760910610220295e-05, "loss": 0.1469, "step": 4811 }, { "epoch": 0.08521829218079667, "grad_norm": 1.5950844287872314, "learning_rate": 2.976075758484664e-05, "loss": 0.1378, "step": 4812 }, { "epoch": 0.0852360017178251, "grad_norm": 1.332808494567871, "learning_rate": 2.976060451091159e-05, "loss": 0.1454, "step": 4813 }, { "epoch": 0.08525371125485352, "grad_norm": 1.7221611738204956, "learning_rate": 2.976045138841564e-05, "loss": 0.1473, "step": 4814 }, { "epoch": 0.08527142079188195, "grad_norm": 1.8164714574813843, "learning_rate": 2.97602982173593e-05, "loss": 0.1807, "step": 4815 }, { "epoch": 0.08528913032891038, "grad_norm": 1.1872217655181885, "learning_rate": 2.976014499774307e-05, "loss": 0.1202, "step": 4816 }, { "epoch": 0.0853068398659388, "grad_norm": 1.0908259153366089, "learning_rate": 2.9759991729567457e-05, "loss": 0.146, "step": 4817 }, { "epoch": 0.08532454940296723, "grad_norm": 1.1772960424423218, "learning_rate": 2.975983841283296e-05, "loss": 0.1324, "step": 4818 }, { "epoch": 0.08534225893999565, "grad_norm": 1.8196521997451782, "learning_rate": 2.9759685047540095e-05, "loss": 0.1677, "step": 4819 }, { "epoch": 0.0853599684770241, "grad_norm": 1.1458152532577515, "learning_rate": 2.9759531633689353e-05, "loss": 0.1631, "step": 4820 }, { "epoch": 0.08537767801405252, "grad_norm": 1.308121919631958, "learning_rate": 2.9759378171281248e-05, "loss": 0.14, "step": 4821 }, { "epoch": 0.08539538755108095, "grad_norm": 1.3346490859985352, "learning_rate": 2.9759224660316278e-05, "loss": 0.1071, "step": 4822 }, { "epoch": 0.08541309708810937, "grad_norm": 1.1232342720031738, "learning_rate": 2.9759071100794954e-05, "loss": 0.117, "step": 4823 }, { "epoch": 0.0854308066251378, "grad_norm": 1.2386806011199951, "learning_rate": 2.9758917492717782e-05, "loss": 0.1323, "step": 4824 }, { "epoch": 0.08544851616216623, "grad_norm": 1.6404290199279785, "learning_rate": 2.975876383608526e-05, "loss": 0.1476, "step": 4825 }, { "epoch": 0.08546622569919465, "grad_norm": 1.8110440969467163, "learning_rate": 2.9758610130897897e-05, "loss": 0.1237, "step": 4826 }, { "epoch": 0.0854839352362231, "grad_norm": 1.274735450744629, "learning_rate": 2.9758456377156205e-05, "loss": 0.1292, "step": 4827 }, { "epoch": 0.08550164477325152, "grad_norm": 1.1083881855010986, "learning_rate": 2.975830257486068e-05, "loss": 0.074, "step": 4828 }, { "epoch": 0.08551935431027995, "grad_norm": 1.3749364614486694, "learning_rate": 2.9758148724011834e-05, "loss": 0.1509, "step": 4829 }, { "epoch": 0.08553706384730837, "grad_norm": 1.5415539741516113, "learning_rate": 2.975799482461017e-05, "loss": 0.1555, "step": 4830 }, { "epoch": 0.0855547733843368, "grad_norm": 1.8761628866195679, "learning_rate": 2.9757840876656194e-05, "loss": 0.1658, "step": 4831 }, { "epoch": 0.08557248292136523, "grad_norm": 0.7894483208656311, "learning_rate": 2.9757686880150417e-05, "loss": 0.1426, "step": 4832 }, { "epoch": 0.08559019245839365, "grad_norm": 1.4786434173583984, "learning_rate": 2.9757532835093342e-05, "loss": 0.1574, "step": 4833 }, { "epoch": 0.08560790199542208, "grad_norm": 1.451184630393982, "learning_rate": 2.9757378741485473e-05, "loss": 0.103, "step": 4834 }, { "epoch": 0.08562561153245052, "grad_norm": 1.6248387098312378, "learning_rate": 2.9757224599327326e-05, "loss": 0.175, "step": 4835 }, { "epoch": 0.08564332106947894, "grad_norm": 1.2562000751495361, "learning_rate": 2.97570704086194e-05, "loss": 0.117, "step": 4836 }, { "epoch": 0.08566103060650737, "grad_norm": 1.3289430141448975, "learning_rate": 2.975691616936221e-05, "loss": 0.1476, "step": 4837 }, { "epoch": 0.0856787401435358, "grad_norm": 1.1721705198287964, "learning_rate": 2.975676188155625e-05, "loss": 0.0986, "step": 4838 }, { "epoch": 0.08569644968056422, "grad_norm": 1.2972933053970337, "learning_rate": 2.975660754520204e-05, "loss": 0.142, "step": 4839 }, { "epoch": 0.08571415921759265, "grad_norm": 1.371741771697998, "learning_rate": 2.9756453160300084e-05, "loss": 0.1823, "step": 4840 }, { "epoch": 0.08573186875462108, "grad_norm": 1.2057101726531982, "learning_rate": 2.975629872685089e-05, "loss": 0.1454, "step": 4841 }, { "epoch": 0.08574957829164952, "grad_norm": 1.3529601097106934, "learning_rate": 2.9756144244854967e-05, "loss": 0.1252, "step": 4842 }, { "epoch": 0.08576728782867794, "grad_norm": 1.4754928350448608, "learning_rate": 2.975598971431282e-05, "loss": 0.1283, "step": 4843 }, { "epoch": 0.08578499736570637, "grad_norm": 1.06416654586792, "learning_rate": 2.975583513522496e-05, "loss": 0.1296, "step": 4844 }, { "epoch": 0.0858027069027348, "grad_norm": 1.1373339891433716, "learning_rate": 2.9755680507591892e-05, "loss": 0.1599, "step": 4845 }, { "epoch": 0.08582041643976322, "grad_norm": 1.2303130626678467, "learning_rate": 2.9755525831414132e-05, "loss": 0.132, "step": 4846 }, { "epoch": 0.08583812597679165, "grad_norm": 1.0256208181381226, "learning_rate": 2.9755371106692186e-05, "loss": 0.1615, "step": 4847 }, { "epoch": 0.08585583551382008, "grad_norm": 1.811270833015442, "learning_rate": 2.9755216333426554e-05, "loss": 0.1698, "step": 4848 }, { "epoch": 0.0858735450508485, "grad_norm": 1.0086597204208374, "learning_rate": 2.9755061511617755e-05, "loss": 0.1236, "step": 4849 }, { "epoch": 0.08589125458787694, "grad_norm": 1.145909070968628, "learning_rate": 2.9754906641266298e-05, "loss": 0.1355, "step": 4850 }, { "epoch": 0.08590896412490537, "grad_norm": 0.9526703953742981, "learning_rate": 2.975475172237269e-05, "loss": 0.0959, "step": 4851 }, { "epoch": 0.0859266736619338, "grad_norm": 1.271407127380371, "learning_rate": 2.975459675493744e-05, "loss": 0.1683, "step": 4852 }, { "epoch": 0.08594438319896222, "grad_norm": 0.7744376063346863, "learning_rate": 2.9754441738961064e-05, "loss": 0.1911, "step": 4853 }, { "epoch": 0.08596209273599065, "grad_norm": 1.5391217470169067, "learning_rate": 2.975428667444406e-05, "loss": 0.1541, "step": 4854 }, { "epoch": 0.08597980227301907, "grad_norm": 1.3060579299926758, "learning_rate": 2.975413156138695e-05, "loss": 0.1452, "step": 4855 }, { "epoch": 0.0859975118100475, "grad_norm": 1.282423734664917, "learning_rate": 2.9753976399790238e-05, "loss": 0.1818, "step": 4856 }, { "epoch": 0.08601522134707594, "grad_norm": 1.1654130220413208, "learning_rate": 2.9753821189654435e-05, "loss": 0.1631, "step": 4857 }, { "epoch": 0.08603293088410437, "grad_norm": 1.7727190256118774, "learning_rate": 2.9753665930980055e-05, "loss": 0.1049, "step": 4858 }, { "epoch": 0.08605064042113279, "grad_norm": 0.9458323121070862, "learning_rate": 2.9753510623767605e-05, "loss": 0.1436, "step": 4859 }, { "epoch": 0.08606834995816122, "grad_norm": 1.386844277381897, "learning_rate": 2.975335526801759e-05, "loss": 0.1281, "step": 4860 }, { "epoch": 0.08608605949518965, "grad_norm": 2.549860715866089, "learning_rate": 2.975319986373054e-05, "loss": 0.1426, "step": 4861 }, { "epoch": 0.08610376903221807, "grad_norm": 1.45297372341156, "learning_rate": 2.975304441090695e-05, "loss": 0.178, "step": 4862 }, { "epoch": 0.0861214785692465, "grad_norm": 0.8534601926803589, "learning_rate": 2.9752888909547338e-05, "loss": 0.114, "step": 4863 }, { "epoch": 0.08613918810627492, "grad_norm": 2.083845853805542, "learning_rate": 2.975273335965221e-05, "loss": 0.2094, "step": 4864 }, { "epoch": 0.08615689764330337, "grad_norm": 1.2363954782485962, "learning_rate": 2.9752577761222083e-05, "loss": 0.1069, "step": 4865 }, { "epoch": 0.08617460718033179, "grad_norm": 1.2051228284835815, "learning_rate": 2.975242211425747e-05, "loss": 0.1537, "step": 4866 }, { "epoch": 0.08619231671736022, "grad_norm": 2.1374449729919434, "learning_rate": 2.9752266418758876e-05, "loss": 0.1376, "step": 4867 }, { "epoch": 0.08621002625438864, "grad_norm": 1.177666425704956, "learning_rate": 2.975211067472682e-05, "loss": 0.1432, "step": 4868 }, { "epoch": 0.08622773579141707, "grad_norm": 1.6002118587493896, "learning_rate": 2.975195488216181e-05, "loss": 0.1521, "step": 4869 }, { "epoch": 0.0862454453284455, "grad_norm": 1.1266757249832153, "learning_rate": 2.9751799041064363e-05, "loss": 0.137, "step": 4870 }, { "epoch": 0.08626315486547392, "grad_norm": 1.8461536169052124, "learning_rate": 2.9751643151434986e-05, "loss": 0.197, "step": 4871 }, { "epoch": 0.08628086440250236, "grad_norm": 2.042978048324585, "learning_rate": 2.9751487213274198e-05, "loss": 0.1488, "step": 4872 }, { "epoch": 0.08629857393953079, "grad_norm": 1.6004761457443237, "learning_rate": 2.9751331226582505e-05, "loss": 0.1422, "step": 4873 }, { "epoch": 0.08631628347655922, "grad_norm": 1.4730353355407715, "learning_rate": 2.9751175191360425e-05, "loss": 0.1545, "step": 4874 }, { "epoch": 0.08633399301358764, "grad_norm": 1.375158429145813, "learning_rate": 2.9751019107608477e-05, "loss": 0.1552, "step": 4875 }, { "epoch": 0.08635170255061607, "grad_norm": 1.412318468093872, "learning_rate": 2.9750862975327166e-05, "loss": 0.1309, "step": 4876 }, { "epoch": 0.0863694120876445, "grad_norm": 1.4956302642822266, "learning_rate": 2.9750706794517002e-05, "loss": 0.1161, "step": 4877 }, { "epoch": 0.08638712162467292, "grad_norm": 1.0829271078109741, "learning_rate": 2.975055056517851e-05, "loss": 0.122, "step": 4878 }, { "epoch": 0.08640483116170136, "grad_norm": 1.5525983572006226, "learning_rate": 2.97503942873122e-05, "loss": 0.1775, "step": 4879 }, { "epoch": 0.08642254069872979, "grad_norm": 0.95660799741745, "learning_rate": 2.9750237960918577e-05, "loss": 0.0837, "step": 4880 }, { "epoch": 0.08644025023575821, "grad_norm": 1.7442572116851807, "learning_rate": 2.9750081585998172e-05, "loss": 0.1244, "step": 4881 }, { "epoch": 0.08645795977278664, "grad_norm": 1.4772777557373047, "learning_rate": 2.9749925162551484e-05, "loss": 0.1492, "step": 4882 }, { "epoch": 0.08647566930981507, "grad_norm": 1.3095144033432007, "learning_rate": 2.9749768690579037e-05, "loss": 0.1355, "step": 4883 }, { "epoch": 0.0864933788468435, "grad_norm": 2.4432780742645264, "learning_rate": 2.9749612170081343e-05, "loss": 0.1575, "step": 4884 }, { "epoch": 0.08651108838387192, "grad_norm": 1.2321321964263916, "learning_rate": 2.9749455601058913e-05, "loss": 0.1034, "step": 4885 }, { "epoch": 0.08652879792090035, "grad_norm": 1.4002041816711426, "learning_rate": 2.974929898351227e-05, "loss": 0.1852, "step": 4886 }, { "epoch": 0.08654650745792879, "grad_norm": 1.6992335319519043, "learning_rate": 2.9749142317441925e-05, "loss": 0.1813, "step": 4887 }, { "epoch": 0.08656421699495721, "grad_norm": 1.5438332557678223, "learning_rate": 2.974898560284839e-05, "loss": 0.1413, "step": 4888 }, { "epoch": 0.08658192653198564, "grad_norm": 0.9868785738945007, "learning_rate": 2.974882883973219e-05, "loss": 0.1765, "step": 4889 }, { "epoch": 0.08659963606901407, "grad_norm": 5.257789611816406, "learning_rate": 2.974867202809383e-05, "loss": 0.1573, "step": 4890 }, { "epoch": 0.08661734560604249, "grad_norm": 1.3951122760772705, "learning_rate": 2.9748515167933836e-05, "loss": 0.1477, "step": 4891 }, { "epoch": 0.08663505514307092, "grad_norm": 2.037278890609741, "learning_rate": 2.9748358259252712e-05, "loss": 0.1754, "step": 4892 }, { "epoch": 0.08665276468009935, "grad_norm": 2.397552013397217, "learning_rate": 2.9748201302050984e-05, "loss": 0.1341, "step": 4893 }, { "epoch": 0.08667047421712779, "grad_norm": 1.0423437356948853, "learning_rate": 2.9748044296329168e-05, "loss": 0.1096, "step": 4894 }, { "epoch": 0.08668818375415621, "grad_norm": 1.0779770612716675, "learning_rate": 2.9747887242087774e-05, "loss": 0.161, "step": 4895 }, { "epoch": 0.08670589329118464, "grad_norm": 2.221008539199829, "learning_rate": 2.974773013932732e-05, "loss": 0.1091, "step": 4896 }, { "epoch": 0.08672360282821306, "grad_norm": 1.4029899835586548, "learning_rate": 2.974757298804833e-05, "loss": 0.145, "step": 4897 }, { "epoch": 0.08674131236524149, "grad_norm": 1.30568528175354, "learning_rate": 2.9747415788251317e-05, "loss": 0.1215, "step": 4898 }, { "epoch": 0.08675902190226992, "grad_norm": 1.2318751811981201, "learning_rate": 2.97472585399368e-05, "loss": 0.214, "step": 4899 }, { "epoch": 0.08677673143929834, "grad_norm": 1.8495763540267944, "learning_rate": 2.9747101243105288e-05, "loss": 0.1202, "step": 4900 }, { "epoch": 0.08679444097632677, "grad_norm": 2.0215537548065186, "learning_rate": 2.974694389775731e-05, "loss": 0.152, "step": 4901 }, { "epoch": 0.08681215051335521, "grad_norm": 1.621809720993042, "learning_rate": 2.9746786503893374e-05, "loss": 0.1683, "step": 4902 }, { "epoch": 0.08682986005038364, "grad_norm": 1.7225911617279053, "learning_rate": 2.9746629061514003e-05, "loss": 0.1521, "step": 4903 }, { "epoch": 0.08684756958741206, "grad_norm": 1.1987227201461792, "learning_rate": 2.9746471570619714e-05, "loss": 0.1481, "step": 4904 }, { "epoch": 0.08686527912444049, "grad_norm": 1.3705826997756958, "learning_rate": 2.9746314031211025e-05, "loss": 0.1389, "step": 4905 }, { "epoch": 0.08688298866146892, "grad_norm": 1.3804010152816772, "learning_rate": 2.9746156443288455e-05, "loss": 0.2041, "step": 4906 }, { "epoch": 0.08690069819849734, "grad_norm": 1.2329750061035156, "learning_rate": 2.974599880685252e-05, "loss": 0.1598, "step": 4907 }, { "epoch": 0.08691840773552577, "grad_norm": 1.2162017822265625, "learning_rate": 2.9745841121903743e-05, "loss": 0.1618, "step": 4908 }, { "epoch": 0.08693611727255421, "grad_norm": 1.6239429712295532, "learning_rate": 2.974568338844264e-05, "loss": 0.1619, "step": 4909 }, { "epoch": 0.08695382680958263, "grad_norm": 1.617853045463562, "learning_rate": 2.974552560646973e-05, "loss": 0.1529, "step": 4910 }, { "epoch": 0.08697153634661106, "grad_norm": 1.1293137073516846, "learning_rate": 2.974536777598553e-05, "loss": 0.1251, "step": 4911 }, { "epoch": 0.08698924588363949, "grad_norm": 1.0542773008346558, "learning_rate": 2.9745209896990563e-05, "loss": 0.1304, "step": 4912 }, { "epoch": 0.08700695542066791, "grad_norm": 1.4355522394180298, "learning_rate": 2.9745051969485346e-05, "loss": 0.1745, "step": 4913 }, { "epoch": 0.08702466495769634, "grad_norm": 2.3117494583129883, "learning_rate": 2.97448939934704e-05, "loss": 0.1419, "step": 4914 }, { "epoch": 0.08704237449472477, "grad_norm": 1.4066888093948364, "learning_rate": 2.9744735968946246e-05, "loss": 0.1278, "step": 4915 }, { "epoch": 0.0870600840317532, "grad_norm": 1.0282717943191528, "learning_rate": 2.97445778959134e-05, "loss": 0.1468, "step": 4916 }, { "epoch": 0.08707779356878163, "grad_norm": 0.8402010202407837, "learning_rate": 2.9744419774372385e-05, "loss": 0.155, "step": 4917 }, { "epoch": 0.08709550310581006, "grad_norm": 1.7932480573654175, "learning_rate": 2.974426160432372e-05, "loss": 0.1178, "step": 4918 }, { "epoch": 0.08711321264283849, "grad_norm": 0.7111843228340149, "learning_rate": 2.9744103385767922e-05, "loss": 0.1135, "step": 4919 }, { "epoch": 0.08713092217986691, "grad_norm": 1.2145644426345825, "learning_rate": 2.974394511870552e-05, "loss": 0.1304, "step": 4920 }, { "epoch": 0.08714863171689534, "grad_norm": 1.203444242477417, "learning_rate": 2.9743786803137026e-05, "loss": 0.1522, "step": 4921 }, { "epoch": 0.08716634125392377, "grad_norm": 1.7494044303894043, "learning_rate": 2.9743628439062965e-05, "loss": 0.1565, "step": 4922 }, { "epoch": 0.08718405079095219, "grad_norm": 1.9343494176864624, "learning_rate": 2.9743470026483863e-05, "loss": 0.1762, "step": 4923 }, { "epoch": 0.08720176032798063, "grad_norm": 1.3355754613876343, "learning_rate": 2.974331156540023e-05, "loss": 0.1544, "step": 4924 }, { "epoch": 0.08721946986500906, "grad_norm": 2.113222360610962, "learning_rate": 2.974315305581259e-05, "loss": 0.133, "step": 4925 }, { "epoch": 0.08723717940203748, "grad_norm": 5.117617607116699, "learning_rate": 2.9742994497721476e-05, "loss": 0.092, "step": 4926 }, { "epoch": 0.08725488893906591, "grad_norm": 1.363749384880066, "learning_rate": 2.9742835891127397e-05, "loss": 0.1752, "step": 4927 }, { "epoch": 0.08727259847609434, "grad_norm": 1.6138007640838623, "learning_rate": 2.9742677236030884e-05, "loss": 0.1447, "step": 4928 }, { "epoch": 0.08729030801312276, "grad_norm": 1.6570703983306885, "learning_rate": 2.9742518532432447e-05, "loss": 0.1226, "step": 4929 }, { "epoch": 0.08730801755015119, "grad_norm": 0.885714590549469, "learning_rate": 2.9742359780332622e-05, "loss": 0.1757, "step": 4930 }, { "epoch": 0.08732572708717962, "grad_norm": 1.0028482675552368, "learning_rate": 2.974220097973192e-05, "loss": 0.0931, "step": 4931 }, { "epoch": 0.08734343662420806, "grad_norm": 1.1822409629821777, "learning_rate": 2.9742042130630867e-05, "loss": 0.132, "step": 4932 }, { "epoch": 0.08736114616123648, "grad_norm": 1.4231020212173462, "learning_rate": 2.974188323302999e-05, "loss": 0.1725, "step": 4933 }, { "epoch": 0.08737885569826491, "grad_norm": 1.0929551124572754, "learning_rate": 2.9741724286929808e-05, "loss": 0.1266, "step": 4934 }, { "epoch": 0.08739656523529334, "grad_norm": 2.30727219581604, "learning_rate": 2.974156529233084e-05, "loss": 0.1842, "step": 4935 }, { "epoch": 0.08741427477232176, "grad_norm": 1.579555630683899, "learning_rate": 2.9741406249233616e-05, "loss": 0.0931, "step": 4936 }, { "epoch": 0.08743198430935019, "grad_norm": 1.3405417203903198, "learning_rate": 2.9741247157638657e-05, "loss": 0.1224, "step": 4937 }, { "epoch": 0.08744969384637861, "grad_norm": 1.1944619417190552, "learning_rate": 2.9741088017546483e-05, "loss": 0.1427, "step": 4938 }, { "epoch": 0.08746740338340706, "grad_norm": 1.0187854766845703, "learning_rate": 2.974092882895762e-05, "loss": 0.1501, "step": 4939 }, { "epoch": 0.08748511292043548, "grad_norm": 1.769906759262085, "learning_rate": 2.97407695918726e-05, "loss": 0.1767, "step": 4940 }, { "epoch": 0.08750282245746391, "grad_norm": 1.0448273420333862, "learning_rate": 2.9740610306291934e-05, "loss": 0.1689, "step": 4941 }, { "epoch": 0.08752053199449233, "grad_norm": 1.7778806686401367, "learning_rate": 2.9740450972216147e-05, "loss": 0.1575, "step": 4942 }, { "epoch": 0.08753824153152076, "grad_norm": 1.1757209300994873, "learning_rate": 2.9740291589645773e-05, "loss": 0.1199, "step": 4943 }, { "epoch": 0.08755595106854919, "grad_norm": 1.3218261003494263, "learning_rate": 2.9740132158581326e-05, "loss": 0.1303, "step": 4944 }, { "epoch": 0.08757366060557761, "grad_norm": 1.1065768003463745, "learning_rate": 2.9739972679023335e-05, "loss": 0.1418, "step": 4945 }, { "epoch": 0.08759137014260605, "grad_norm": 1.1680766344070435, "learning_rate": 2.973981315097233e-05, "loss": 0.147, "step": 4946 }, { "epoch": 0.08760907967963448, "grad_norm": 1.0011181831359863, "learning_rate": 2.9739653574428828e-05, "loss": 0.1323, "step": 4947 }, { "epoch": 0.0876267892166629, "grad_norm": 1.110019326210022, "learning_rate": 2.9739493949393358e-05, "loss": 0.1508, "step": 4948 }, { "epoch": 0.08764449875369133, "grad_norm": 1.1630651950836182, "learning_rate": 2.973933427586644e-05, "loss": 0.1469, "step": 4949 }, { "epoch": 0.08766220829071976, "grad_norm": 1.7583645582199097, "learning_rate": 2.9739174553848603e-05, "loss": 0.1912, "step": 4950 }, { "epoch": 0.08767991782774819, "grad_norm": 1.1314040422439575, "learning_rate": 2.9739014783340375e-05, "loss": 0.1405, "step": 4951 }, { "epoch": 0.08769762736477661, "grad_norm": 3.2468841075897217, "learning_rate": 2.973885496434228e-05, "loss": 0.1094, "step": 4952 }, { "epoch": 0.08771533690180504, "grad_norm": 1.316261649131775, "learning_rate": 2.973869509685484e-05, "loss": 0.1376, "step": 4953 }, { "epoch": 0.08773304643883348, "grad_norm": 1.6280878782272339, "learning_rate": 2.9738535180878582e-05, "loss": 0.1296, "step": 4954 }, { "epoch": 0.0877507559758619, "grad_norm": 1.3202462196350098, "learning_rate": 2.9738375216414043e-05, "loss": 0.1513, "step": 4955 }, { "epoch": 0.08776846551289033, "grad_norm": 1.4261751174926758, "learning_rate": 2.9738215203461732e-05, "loss": 0.1431, "step": 4956 }, { "epoch": 0.08778617504991876, "grad_norm": 1.8456261157989502, "learning_rate": 2.9738055142022183e-05, "loss": 0.1382, "step": 4957 }, { "epoch": 0.08780388458694718, "grad_norm": 3.4466185569763184, "learning_rate": 2.973789503209593e-05, "loss": 0.1387, "step": 4958 }, { "epoch": 0.08782159412397561, "grad_norm": 1.3633166551589966, "learning_rate": 2.973773487368349e-05, "loss": 0.1607, "step": 4959 }, { "epoch": 0.08783930366100404, "grad_norm": 1.4638088941574097, "learning_rate": 2.973757466678539e-05, "loss": 0.1513, "step": 4960 }, { "epoch": 0.08785701319803248, "grad_norm": 1.5118021965026855, "learning_rate": 2.9737414411402164e-05, "loss": 0.139, "step": 4961 }, { "epoch": 0.0878747227350609, "grad_norm": 0.9465568661689758, "learning_rate": 2.9737254107534334e-05, "loss": 0.1639, "step": 4962 }, { "epoch": 0.08789243227208933, "grad_norm": 1.577415108680725, "learning_rate": 2.9737093755182427e-05, "loss": 0.1504, "step": 4963 }, { "epoch": 0.08791014180911776, "grad_norm": 1.6128039360046387, "learning_rate": 2.9736933354346974e-05, "loss": 0.1745, "step": 4964 }, { "epoch": 0.08792785134614618, "grad_norm": 1.76096773147583, "learning_rate": 2.9736772905028503e-05, "loss": 0.1297, "step": 4965 }, { "epoch": 0.08794556088317461, "grad_norm": 1.7950854301452637, "learning_rate": 2.9736612407227534e-05, "loss": 0.1867, "step": 4966 }, { "epoch": 0.08796327042020304, "grad_norm": 1.6441400051116943, "learning_rate": 2.9736451860944604e-05, "loss": 0.186, "step": 4967 }, { "epoch": 0.08798097995723146, "grad_norm": 1.5821802616119385, "learning_rate": 2.9736291266180236e-05, "loss": 0.1348, "step": 4968 }, { "epoch": 0.0879986894942599, "grad_norm": 1.244771122932434, "learning_rate": 2.9736130622934962e-05, "loss": 0.1239, "step": 4969 }, { "epoch": 0.08801639903128833, "grad_norm": 1.2134647369384766, "learning_rate": 2.9735969931209306e-05, "loss": 0.1167, "step": 4970 }, { "epoch": 0.08803410856831675, "grad_norm": 1.1845412254333496, "learning_rate": 2.9735809191003804e-05, "loss": 0.1307, "step": 4971 }, { "epoch": 0.08805181810534518, "grad_norm": 1.4269685745239258, "learning_rate": 2.9735648402318978e-05, "loss": 0.128, "step": 4972 }, { "epoch": 0.08806952764237361, "grad_norm": 1.1927814483642578, "learning_rate": 2.973548756515536e-05, "loss": 0.1531, "step": 4973 }, { "epoch": 0.08808723717940203, "grad_norm": 1.2178125381469727, "learning_rate": 2.9735326679513477e-05, "loss": 0.1235, "step": 4974 }, { "epoch": 0.08810494671643046, "grad_norm": 1.5180481672286987, "learning_rate": 2.973516574539386e-05, "loss": 0.1049, "step": 4975 }, { "epoch": 0.0881226562534589, "grad_norm": 0.878861665725708, "learning_rate": 2.9735004762797032e-05, "loss": 0.0902, "step": 4976 }, { "epoch": 0.08814036579048733, "grad_norm": 1.1724090576171875, "learning_rate": 2.9734843731723536e-05, "loss": 0.1356, "step": 4977 }, { "epoch": 0.08815807532751575, "grad_norm": 1.9598677158355713, "learning_rate": 2.9734682652173894e-05, "loss": 0.1677, "step": 4978 }, { "epoch": 0.08817578486454418, "grad_norm": 0.9672350287437439, "learning_rate": 2.973452152414864e-05, "loss": 0.1456, "step": 4979 }, { "epoch": 0.0881934944015726, "grad_norm": 2.3633670806884766, "learning_rate": 2.973436034764829e-05, "loss": 0.1766, "step": 4980 }, { "epoch": 0.08821120393860103, "grad_norm": 1.1084496974945068, "learning_rate": 2.9734199122673394e-05, "loss": 0.1066, "step": 4981 }, { "epoch": 0.08822891347562946, "grad_norm": 1.8012628555297852, "learning_rate": 2.973403784922447e-05, "loss": 0.1362, "step": 4982 }, { "epoch": 0.08824662301265788, "grad_norm": 1.5048022270202637, "learning_rate": 2.973387652730205e-05, "loss": 0.1585, "step": 4983 }, { "epoch": 0.08826433254968633, "grad_norm": 1.8395304679870605, "learning_rate": 2.973371515690667e-05, "loss": 0.1174, "step": 4984 }, { "epoch": 0.08828204208671475, "grad_norm": 1.3454198837280273, "learning_rate": 2.9733553738038852e-05, "loss": 0.1346, "step": 4985 }, { "epoch": 0.08829975162374318, "grad_norm": 1.7762998342514038, "learning_rate": 2.973339227069914e-05, "loss": 0.1625, "step": 4986 }, { "epoch": 0.0883174611607716, "grad_norm": 1.4581129550933838, "learning_rate": 2.9733230754888052e-05, "loss": 0.1017, "step": 4987 }, { "epoch": 0.08833517069780003, "grad_norm": 1.521200180053711, "learning_rate": 2.9733069190606127e-05, "loss": 0.1497, "step": 4988 }, { "epoch": 0.08835288023482846, "grad_norm": 1.828229308128357, "learning_rate": 2.9732907577853893e-05, "loss": 0.1533, "step": 4989 }, { "epoch": 0.08837058977185688, "grad_norm": 1.229766607284546, "learning_rate": 2.9732745916631883e-05, "loss": 0.122, "step": 4990 }, { "epoch": 0.08838829930888532, "grad_norm": 1.3958600759506226, "learning_rate": 2.9732584206940632e-05, "loss": 0.1408, "step": 4991 }, { "epoch": 0.08840600884591375, "grad_norm": 4.321630001068115, "learning_rate": 2.9732422448780668e-05, "loss": 0.1382, "step": 4992 }, { "epoch": 0.08842371838294218, "grad_norm": 3.348264217376709, "learning_rate": 2.9732260642152524e-05, "loss": 0.1415, "step": 4993 }, { "epoch": 0.0884414279199706, "grad_norm": 3.3459091186523438, "learning_rate": 2.973209878705673e-05, "loss": 0.0974, "step": 4994 }, { "epoch": 0.08845913745699903, "grad_norm": 1.4073973894119263, "learning_rate": 2.9731936883493824e-05, "loss": 0.1972, "step": 4995 }, { "epoch": 0.08847684699402746, "grad_norm": 1.6385263204574585, "learning_rate": 2.9731774931464335e-05, "loss": 0.1527, "step": 4996 }, { "epoch": 0.08849455653105588, "grad_norm": 0.9784613847732544, "learning_rate": 2.9731612930968798e-05, "loss": 0.1267, "step": 4997 }, { "epoch": 0.08851226606808431, "grad_norm": 1.802881121635437, "learning_rate": 2.973145088200774e-05, "loss": 0.1117, "step": 4998 }, { "epoch": 0.08852997560511275, "grad_norm": 1.420070767402649, "learning_rate": 2.97312887845817e-05, "loss": 0.1712, "step": 4999 }, { "epoch": 0.08854768514214117, "grad_norm": 1.3416515588760376, "learning_rate": 2.9731126638691213e-05, "loss": 0.1299, "step": 5000 }, { "epoch": 0.0885653946791696, "grad_norm": 1.2791351079940796, "learning_rate": 2.973096444433681e-05, "loss": 0.1116, "step": 5001 }, { "epoch": 0.08858310421619803, "grad_norm": 1.636086344718933, "learning_rate": 2.973080220151902e-05, "loss": 0.1543, "step": 5002 }, { "epoch": 0.08860081375322645, "grad_norm": 1.582292914390564, "learning_rate": 2.9730639910238383e-05, "loss": 0.1463, "step": 5003 }, { "epoch": 0.08861852329025488, "grad_norm": 0.9903737306594849, "learning_rate": 2.9730477570495426e-05, "loss": 0.098, "step": 5004 }, { "epoch": 0.0886362328272833, "grad_norm": 1.0057517290115356, "learning_rate": 2.9730315182290692e-05, "loss": 0.1437, "step": 5005 }, { "epoch": 0.08865394236431175, "grad_norm": 1.7732161283493042, "learning_rate": 2.9730152745624713e-05, "loss": 0.1673, "step": 5006 }, { "epoch": 0.08867165190134017, "grad_norm": 1.194839358329773, "learning_rate": 2.9729990260498018e-05, "loss": 0.1386, "step": 5007 }, { "epoch": 0.0886893614383686, "grad_norm": 1.0740236043930054, "learning_rate": 2.9729827726911148e-05, "loss": 0.0991, "step": 5008 }, { "epoch": 0.08870707097539703, "grad_norm": 1.327571153640747, "learning_rate": 2.972966514486463e-05, "loss": 0.1684, "step": 5009 }, { "epoch": 0.08872478051242545, "grad_norm": 1.8783546686172485, "learning_rate": 2.9729502514359008e-05, "loss": 0.1657, "step": 5010 }, { "epoch": 0.08874249004945388, "grad_norm": 1.1042126417160034, "learning_rate": 2.972933983539481e-05, "loss": 0.1485, "step": 5011 }, { "epoch": 0.0887601995864823, "grad_norm": 1.4966042041778564, "learning_rate": 2.972917710797257e-05, "loss": 0.1314, "step": 5012 }, { "epoch": 0.08877790912351075, "grad_norm": 1.3231667280197144, "learning_rate": 2.9729014332092838e-05, "loss": 0.119, "step": 5013 }, { "epoch": 0.08879561866053917, "grad_norm": 0.9284341931343079, "learning_rate": 2.9728851507756128e-05, "loss": 0.1203, "step": 5014 }, { "epoch": 0.0888133281975676, "grad_norm": 1.0327969789505005, "learning_rate": 2.972868863496299e-05, "loss": 0.1521, "step": 5015 }, { "epoch": 0.08883103773459602, "grad_norm": 0.9742038249969482, "learning_rate": 2.9728525713713956e-05, "loss": 0.0991, "step": 5016 }, { "epoch": 0.08884874727162445, "grad_norm": 1.3436038494110107, "learning_rate": 2.972836274400956e-05, "loss": 0.1398, "step": 5017 }, { "epoch": 0.08886645680865288, "grad_norm": 2.3623993396759033, "learning_rate": 2.9728199725850344e-05, "loss": 0.1446, "step": 5018 }, { "epoch": 0.0888841663456813, "grad_norm": 1.2964813709259033, "learning_rate": 2.972803665923684e-05, "loss": 0.1457, "step": 5019 }, { "epoch": 0.08890187588270973, "grad_norm": 1.6455821990966797, "learning_rate": 2.9727873544169585e-05, "loss": 0.1654, "step": 5020 }, { "epoch": 0.08891958541973817, "grad_norm": 1.5868802070617676, "learning_rate": 2.9727710380649116e-05, "loss": 0.1676, "step": 5021 }, { "epoch": 0.0889372949567666, "grad_norm": 1.4769059419631958, "learning_rate": 2.9727547168675967e-05, "loss": 0.1649, "step": 5022 }, { "epoch": 0.08895500449379502, "grad_norm": 1.2886624336242676, "learning_rate": 2.9727383908250678e-05, "loss": 0.1175, "step": 5023 }, { "epoch": 0.08897271403082345, "grad_norm": 1.2958556413650513, "learning_rate": 2.9727220599373785e-05, "loss": 0.1583, "step": 5024 }, { "epoch": 0.08899042356785188, "grad_norm": 1.4370909929275513, "learning_rate": 2.972705724204583e-05, "loss": 0.1563, "step": 5025 }, { "epoch": 0.0890081331048803, "grad_norm": 1.2267284393310547, "learning_rate": 2.972689383626734e-05, "loss": 0.1037, "step": 5026 }, { "epoch": 0.08902584264190873, "grad_norm": 1.1054749488830566, "learning_rate": 2.9726730382038863e-05, "loss": 0.138, "step": 5027 }, { "epoch": 0.08904355217893717, "grad_norm": 1.7312853336334229, "learning_rate": 2.972656687936093e-05, "loss": 0.1968, "step": 5028 }, { "epoch": 0.0890612617159656, "grad_norm": 1.528406023979187, "learning_rate": 2.9726403328234085e-05, "loss": 0.1742, "step": 5029 }, { "epoch": 0.08907897125299402, "grad_norm": 1.1905546188354492, "learning_rate": 2.9726239728658863e-05, "loss": 0.1115, "step": 5030 }, { "epoch": 0.08909668079002245, "grad_norm": 0.9481402635574341, "learning_rate": 2.9726076080635796e-05, "loss": 0.1334, "step": 5031 }, { "epoch": 0.08911439032705087, "grad_norm": 1.1494567394256592, "learning_rate": 2.972591238416543e-05, "loss": 0.1633, "step": 5032 }, { "epoch": 0.0891320998640793, "grad_norm": 1.220693588256836, "learning_rate": 2.9725748639248305e-05, "loss": 0.1476, "step": 5033 }, { "epoch": 0.08914980940110773, "grad_norm": 1.1854718923568726, "learning_rate": 2.9725584845884955e-05, "loss": 0.1371, "step": 5034 }, { "epoch": 0.08916751893813615, "grad_norm": 1.720553994178772, "learning_rate": 2.9725421004075918e-05, "loss": 0.1573, "step": 5035 }, { "epoch": 0.0891852284751646, "grad_norm": 1.9880472421646118, "learning_rate": 2.9725257113821737e-05, "loss": 0.1627, "step": 5036 }, { "epoch": 0.08920293801219302, "grad_norm": 1.4942996501922607, "learning_rate": 2.9725093175122952e-05, "loss": 0.1211, "step": 5037 }, { "epoch": 0.08922064754922145, "grad_norm": 1.4168592691421509, "learning_rate": 2.9724929187980097e-05, "loss": 0.1429, "step": 5038 }, { "epoch": 0.08923835708624987, "grad_norm": 1.1049576997756958, "learning_rate": 2.9724765152393717e-05, "loss": 0.1632, "step": 5039 }, { "epoch": 0.0892560666232783, "grad_norm": 1.6566790342330933, "learning_rate": 2.9724601068364347e-05, "loss": 0.1316, "step": 5040 }, { "epoch": 0.08927377616030673, "grad_norm": 2.170409917831421, "learning_rate": 2.9724436935892526e-05, "loss": 0.1853, "step": 5041 }, { "epoch": 0.08929148569733515, "grad_norm": 1.3382093906402588, "learning_rate": 2.9724272754978802e-05, "loss": 0.1056, "step": 5042 }, { "epoch": 0.08930919523436359, "grad_norm": 1.2071675062179565, "learning_rate": 2.9724108525623704e-05, "loss": 0.096, "step": 5043 }, { "epoch": 0.08932690477139202, "grad_norm": 1.635327696800232, "learning_rate": 2.9723944247827782e-05, "loss": 0.1292, "step": 5044 }, { "epoch": 0.08934461430842044, "grad_norm": 1.3144984245300293, "learning_rate": 2.9723779921591573e-05, "loss": 0.0818, "step": 5045 }, { "epoch": 0.08936232384544887, "grad_norm": 1.0788813829421997, "learning_rate": 2.9723615546915617e-05, "loss": 0.1461, "step": 5046 }, { "epoch": 0.0893800333824773, "grad_norm": 1.5506154298782349, "learning_rate": 2.9723451123800452e-05, "loss": 0.1003, "step": 5047 }, { "epoch": 0.08939774291950572, "grad_norm": 1.5523868799209595, "learning_rate": 2.9723286652246624e-05, "loss": 0.144, "step": 5048 }, { "epoch": 0.08941545245653415, "grad_norm": 1.7925829887390137, "learning_rate": 2.9723122132254674e-05, "loss": 0.1476, "step": 5049 }, { "epoch": 0.08943316199356258, "grad_norm": 1.5948034524917603, "learning_rate": 2.9722957563825136e-05, "loss": 0.1646, "step": 5050 }, { "epoch": 0.08945087153059102, "grad_norm": 1.5288492441177368, "learning_rate": 2.9722792946958567e-05, "loss": 0.1602, "step": 5051 }, { "epoch": 0.08946858106761944, "grad_norm": 1.4464255571365356, "learning_rate": 2.9722628281655486e-05, "loss": 0.1238, "step": 5052 }, { "epoch": 0.08948629060464787, "grad_norm": 1.128409504890442, "learning_rate": 2.9722463567916456e-05, "loss": 0.1487, "step": 5053 }, { "epoch": 0.0895040001416763, "grad_norm": 0.9508262872695923, "learning_rate": 2.9722298805742008e-05, "loss": 0.1399, "step": 5054 }, { "epoch": 0.08952170967870472, "grad_norm": 1.1408928632736206, "learning_rate": 2.9722133995132682e-05, "loss": 0.0947, "step": 5055 }, { "epoch": 0.08953941921573315, "grad_norm": 1.7950962781906128, "learning_rate": 2.972196913608903e-05, "loss": 0.1266, "step": 5056 }, { "epoch": 0.08955712875276158, "grad_norm": 1.1564353704452515, "learning_rate": 2.9721804228611585e-05, "loss": 0.1267, "step": 5057 }, { "epoch": 0.08957483828979002, "grad_norm": 1.5599421262741089, "learning_rate": 2.9721639272700892e-05, "loss": 0.1425, "step": 5058 }, { "epoch": 0.08959254782681844, "grad_norm": 1.343186378479004, "learning_rate": 2.97214742683575e-05, "loss": 0.154, "step": 5059 }, { "epoch": 0.08961025736384687, "grad_norm": 1.3137884140014648, "learning_rate": 2.9721309215581937e-05, "loss": 0.1223, "step": 5060 }, { "epoch": 0.0896279669008753, "grad_norm": 1.5999549627304077, "learning_rate": 2.9721144114374767e-05, "loss": 0.1784, "step": 5061 }, { "epoch": 0.08964567643790372, "grad_norm": 1.039596676826477, "learning_rate": 2.9720978964736512e-05, "loss": 0.1217, "step": 5062 }, { "epoch": 0.08966338597493215, "grad_norm": 1.3273035287857056, "learning_rate": 2.9720813766667732e-05, "loss": 0.0945, "step": 5063 }, { "epoch": 0.08968109551196057, "grad_norm": 1.3128111362457275, "learning_rate": 2.9720648520168963e-05, "loss": 0.1468, "step": 5064 }, { "epoch": 0.089698805048989, "grad_norm": 1.8473914861679077, "learning_rate": 2.9720483225240746e-05, "loss": 0.1592, "step": 5065 }, { "epoch": 0.08971651458601744, "grad_norm": 0.9666771292686462, "learning_rate": 2.972031788188363e-05, "loss": 0.1342, "step": 5066 }, { "epoch": 0.08973422412304587, "grad_norm": 1.4879076480865479, "learning_rate": 2.9720152490098158e-05, "loss": 0.1586, "step": 5067 }, { "epoch": 0.08975193366007429, "grad_norm": 1.4528920650482178, "learning_rate": 2.971998704988487e-05, "loss": 0.1724, "step": 5068 }, { "epoch": 0.08976964319710272, "grad_norm": 1.6148262023925781, "learning_rate": 2.9719821561244316e-05, "loss": 0.1116, "step": 5069 }, { "epoch": 0.08978735273413115, "grad_norm": 1.4508529901504517, "learning_rate": 2.971965602417704e-05, "loss": 0.185, "step": 5070 }, { "epoch": 0.08980506227115957, "grad_norm": 1.5574077367782593, "learning_rate": 2.971949043868358e-05, "loss": 0.1602, "step": 5071 }, { "epoch": 0.089822771808188, "grad_norm": 1.122094988822937, "learning_rate": 2.971932480476449e-05, "loss": 0.1623, "step": 5072 }, { "epoch": 0.08984048134521644, "grad_norm": 1.3341931104660034, "learning_rate": 2.9719159122420306e-05, "loss": 0.1222, "step": 5073 }, { "epoch": 0.08985819088224486, "grad_norm": 1.5017591714859009, "learning_rate": 2.9718993391651577e-05, "loss": 0.1324, "step": 5074 }, { "epoch": 0.08987590041927329, "grad_norm": 1.3920046091079712, "learning_rate": 2.971882761245885e-05, "loss": 0.2262, "step": 5075 }, { "epoch": 0.08989360995630172, "grad_norm": 2.2086102962493896, "learning_rate": 2.9718661784842667e-05, "loss": 0.1305, "step": 5076 }, { "epoch": 0.08991131949333014, "grad_norm": 1.30555260181427, "learning_rate": 2.971849590880358e-05, "loss": 0.1406, "step": 5077 }, { "epoch": 0.08992902903035857, "grad_norm": 1.2843592166900635, "learning_rate": 2.9718329984342127e-05, "loss": 0.1688, "step": 5078 }, { "epoch": 0.089946738567387, "grad_norm": 1.0760654211044312, "learning_rate": 2.9718164011458856e-05, "loss": 0.1282, "step": 5079 }, { "epoch": 0.08996444810441544, "grad_norm": 1.1162251234054565, "learning_rate": 2.9717997990154316e-05, "loss": 0.1376, "step": 5080 }, { "epoch": 0.08998215764144386, "grad_norm": 1.0348502397537231, "learning_rate": 2.971783192042905e-05, "loss": 0.1233, "step": 5081 }, { "epoch": 0.08999986717847229, "grad_norm": 2.842388868331909, "learning_rate": 2.9717665802283603e-05, "loss": 0.1546, "step": 5082 }, { "epoch": 0.09001757671550072, "grad_norm": 1.2901642322540283, "learning_rate": 2.971749963571853e-05, "loss": 0.1326, "step": 5083 }, { "epoch": 0.09003528625252914, "grad_norm": 1.5080604553222656, "learning_rate": 2.971733342073437e-05, "loss": 0.1121, "step": 5084 }, { "epoch": 0.09005299578955757, "grad_norm": 1.6312966346740723, "learning_rate": 2.971716715733167e-05, "loss": 0.0936, "step": 5085 }, { "epoch": 0.090070705326586, "grad_norm": 1.7503571510314941, "learning_rate": 2.971700084551098e-05, "loss": 0.127, "step": 5086 }, { "epoch": 0.09008841486361442, "grad_norm": 1.3993316888809204, "learning_rate": 2.971683448527285e-05, "loss": 0.1588, "step": 5087 }, { "epoch": 0.09010612440064286, "grad_norm": 1.1597614288330078, "learning_rate": 2.971666807661781e-05, "loss": 0.1376, "step": 5088 }, { "epoch": 0.09012383393767129, "grad_norm": 1.5227559804916382, "learning_rate": 2.9716501619546432e-05, "loss": 0.169, "step": 5089 }, { "epoch": 0.09014154347469971, "grad_norm": 1.030648946762085, "learning_rate": 2.971633511405925e-05, "loss": 0.1208, "step": 5090 }, { "epoch": 0.09015925301172814, "grad_norm": 1.0673024654388428, "learning_rate": 2.971616856015681e-05, "loss": 0.1011, "step": 5091 }, { "epoch": 0.09017696254875657, "grad_norm": 2.0415546894073486, "learning_rate": 2.971600195783967e-05, "loss": 0.1621, "step": 5092 }, { "epoch": 0.090194672085785, "grad_norm": 1.2722046375274658, "learning_rate": 2.9715835307108366e-05, "loss": 0.1584, "step": 5093 }, { "epoch": 0.09021238162281342, "grad_norm": 1.0851175785064697, "learning_rate": 2.971566860796346e-05, "loss": 0.1499, "step": 5094 }, { "epoch": 0.09023009115984186, "grad_norm": 1.2199201583862305, "learning_rate": 2.9715501860405486e-05, "loss": 0.1678, "step": 5095 }, { "epoch": 0.09024780069687029, "grad_norm": 1.0050569772720337, "learning_rate": 2.9715335064434998e-05, "loss": 0.1269, "step": 5096 }, { "epoch": 0.09026551023389871, "grad_norm": 1.2698253393173218, "learning_rate": 2.971516822005255e-05, "loss": 0.1324, "step": 5097 }, { "epoch": 0.09028321977092714, "grad_norm": 1.1319489479064941, "learning_rate": 2.971500132725869e-05, "loss": 0.1594, "step": 5098 }, { "epoch": 0.09030092930795557, "grad_norm": 1.9284310340881348, "learning_rate": 2.9714834386053957e-05, "loss": 0.1797, "step": 5099 }, { "epoch": 0.09031863884498399, "grad_norm": 1.0836024284362793, "learning_rate": 2.971466739643891e-05, "loss": 0.1038, "step": 5100 }, { "epoch": 0.09033634838201242, "grad_norm": 1.5577243566513062, "learning_rate": 2.9714500358414095e-05, "loss": 0.1276, "step": 5101 }, { "epoch": 0.09035405791904084, "grad_norm": 1.5106877088546753, "learning_rate": 2.9714333271980065e-05, "loss": 0.1688, "step": 5102 }, { "epoch": 0.09037176745606929, "grad_norm": 1.3300669193267822, "learning_rate": 2.9714166137137363e-05, "loss": 0.1374, "step": 5103 }, { "epoch": 0.09038947699309771, "grad_norm": 1.524884581565857, "learning_rate": 2.971399895388654e-05, "loss": 0.1579, "step": 5104 }, { "epoch": 0.09040718653012614, "grad_norm": 1.3201295137405396, "learning_rate": 2.9713831722228156e-05, "loss": 0.1023, "step": 5105 }, { "epoch": 0.09042489606715456, "grad_norm": 1.2001231908798218, "learning_rate": 2.971366444216275e-05, "loss": 0.144, "step": 5106 }, { "epoch": 0.09044260560418299, "grad_norm": 1.1718333959579468, "learning_rate": 2.9713497113690878e-05, "loss": 0.1617, "step": 5107 }, { "epoch": 0.09046031514121142, "grad_norm": 1.4437052011489868, "learning_rate": 2.9713329736813085e-05, "loss": 0.1707, "step": 5108 }, { "epoch": 0.09047802467823984, "grad_norm": 1.388763666152954, "learning_rate": 2.9713162311529927e-05, "loss": 0.1704, "step": 5109 }, { "epoch": 0.09049573421526828, "grad_norm": 1.2270900011062622, "learning_rate": 2.9712994837841957e-05, "loss": 0.1167, "step": 5110 }, { "epoch": 0.09051344375229671, "grad_norm": 1.3995803594589233, "learning_rate": 2.971282731574972e-05, "loss": 0.1244, "step": 5111 }, { "epoch": 0.09053115328932514, "grad_norm": 0.80002760887146, "learning_rate": 2.971265974525377e-05, "loss": 0.1014, "step": 5112 }, { "epoch": 0.09054886282635356, "grad_norm": 1.2285093069076538, "learning_rate": 2.9712492126354655e-05, "loss": 0.1517, "step": 5113 }, { "epoch": 0.09056657236338199, "grad_norm": 1.5511276721954346, "learning_rate": 2.971232445905293e-05, "loss": 0.1793, "step": 5114 }, { "epoch": 0.09058428190041042, "grad_norm": 1.9048994779586792, "learning_rate": 2.9712156743349145e-05, "loss": 0.1488, "step": 5115 }, { "epoch": 0.09060199143743884, "grad_norm": 1.594909429550171, "learning_rate": 2.971198897924385e-05, "loss": 0.1536, "step": 5116 }, { "epoch": 0.09061970097446727, "grad_norm": 1.4298609495162964, "learning_rate": 2.9711821166737605e-05, "loss": 0.1319, "step": 5117 }, { "epoch": 0.09063741051149571, "grad_norm": 1.468615174293518, "learning_rate": 2.9711653305830953e-05, "loss": 0.1377, "step": 5118 }, { "epoch": 0.09065512004852413, "grad_norm": 1.2218924760818481, "learning_rate": 2.971148539652445e-05, "loss": 0.1491, "step": 5119 }, { "epoch": 0.09067282958555256, "grad_norm": 1.3122378587722778, "learning_rate": 2.9711317438818647e-05, "loss": 0.1463, "step": 5120 }, { "epoch": 0.09069053912258099, "grad_norm": 1.6715806722640991, "learning_rate": 2.97111494327141e-05, "loss": 0.0976, "step": 5121 }, { "epoch": 0.09070824865960941, "grad_norm": 1.1206480264663696, "learning_rate": 2.9710981378211357e-05, "loss": 0.1724, "step": 5122 }, { "epoch": 0.09072595819663784, "grad_norm": 1.5556755065917969, "learning_rate": 2.9710813275310976e-05, "loss": 0.1246, "step": 5123 }, { "epoch": 0.09074366773366627, "grad_norm": 1.4379866123199463, "learning_rate": 2.9710645124013504e-05, "loss": 0.145, "step": 5124 }, { "epoch": 0.09076137727069471, "grad_norm": 1.1439510583877563, "learning_rate": 2.97104769243195e-05, "loss": 0.1431, "step": 5125 }, { "epoch": 0.09077908680772313, "grad_norm": 1.251935362815857, "learning_rate": 2.9710308676229513e-05, "loss": 0.1142, "step": 5126 }, { "epoch": 0.09079679634475156, "grad_norm": 1.4599723815917969, "learning_rate": 2.97101403797441e-05, "loss": 0.1414, "step": 5127 }, { "epoch": 0.09081450588177999, "grad_norm": 1.4434763193130493, "learning_rate": 2.9709972034863814e-05, "loss": 0.1352, "step": 5128 }, { "epoch": 0.09083221541880841, "grad_norm": 1.1229534149169922, "learning_rate": 2.9709803641589202e-05, "loss": 0.105, "step": 5129 }, { "epoch": 0.09084992495583684, "grad_norm": 1.0812277793884277, "learning_rate": 2.970963519992083e-05, "loss": 0.1061, "step": 5130 }, { "epoch": 0.09086763449286527, "grad_norm": 3.2907824516296387, "learning_rate": 2.9709466709859242e-05, "loss": 0.1458, "step": 5131 }, { "epoch": 0.0908853440298937, "grad_norm": 1.6921648979187012, "learning_rate": 2.9709298171405e-05, "loss": 0.1421, "step": 5132 }, { "epoch": 0.09090305356692213, "grad_norm": 1.2261468172073364, "learning_rate": 2.970912958455865e-05, "loss": 0.1582, "step": 5133 }, { "epoch": 0.09092076310395056, "grad_norm": 1.7795586585998535, "learning_rate": 2.9708960949320755e-05, "loss": 0.0942, "step": 5134 }, { "epoch": 0.09093847264097898, "grad_norm": 1.2717863321304321, "learning_rate": 2.9708792265691864e-05, "loss": 0.1461, "step": 5135 }, { "epoch": 0.09095618217800741, "grad_norm": 1.537598967552185, "learning_rate": 2.9708623533672534e-05, "loss": 0.174, "step": 5136 }, { "epoch": 0.09097389171503584, "grad_norm": 1.9032243490219116, "learning_rate": 2.9708454753263326e-05, "loss": 0.167, "step": 5137 }, { "epoch": 0.09099160125206426, "grad_norm": 1.4457569122314453, "learning_rate": 2.9708285924464783e-05, "loss": 0.1315, "step": 5138 }, { "epoch": 0.09100931078909269, "grad_norm": 0.946901261806488, "learning_rate": 2.970811704727747e-05, "loss": 0.1497, "step": 5139 }, { "epoch": 0.09102702032612113, "grad_norm": 2.6022706031799316, "learning_rate": 2.970794812170194e-05, "loss": 0.1558, "step": 5140 }, { "epoch": 0.09104472986314956, "grad_norm": 2.2990424633026123, "learning_rate": 2.9707779147738748e-05, "loss": 0.232, "step": 5141 }, { "epoch": 0.09106243940017798, "grad_norm": 0.9396705031394958, "learning_rate": 2.9707610125388446e-05, "loss": 0.1588, "step": 5142 }, { "epoch": 0.09108014893720641, "grad_norm": 3.768770217895508, "learning_rate": 2.97074410546516e-05, "loss": 0.1673, "step": 5143 }, { "epoch": 0.09109785847423484, "grad_norm": 1.1842843294143677, "learning_rate": 2.9707271935528754e-05, "loss": 0.1223, "step": 5144 }, { "epoch": 0.09111556801126326, "grad_norm": 1.2782063484191895, "learning_rate": 2.9707102768020475e-05, "loss": 0.1357, "step": 5145 }, { "epoch": 0.09113327754829169, "grad_norm": 1.5212392807006836, "learning_rate": 2.9706933552127316e-05, "loss": 0.1461, "step": 5146 }, { "epoch": 0.09115098708532013, "grad_norm": 1.4653874635696411, "learning_rate": 2.9706764287849835e-05, "loss": 0.1241, "step": 5147 }, { "epoch": 0.09116869662234856, "grad_norm": 1.3343430757522583, "learning_rate": 2.9706594975188583e-05, "loss": 0.173, "step": 5148 }, { "epoch": 0.09118640615937698, "grad_norm": 1.2983876466751099, "learning_rate": 2.970642561414412e-05, "loss": 0.1556, "step": 5149 }, { "epoch": 0.09120411569640541, "grad_norm": 1.527662754058838, "learning_rate": 2.9706256204717008e-05, "loss": 0.1578, "step": 5150 }, { "epoch": 0.09122182523343383, "grad_norm": 1.07017183303833, "learning_rate": 2.9706086746907803e-05, "loss": 0.1538, "step": 5151 }, { "epoch": 0.09123953477046226, "grad_norm": 1.4620305299758911, "learning_rate": 2.9705917240717053e-05, "loss": 0.1573, "step": 5152 }, { "epoch": 0.09125724430749069, "grad_norm": 1.1874860525131226, "learning_rate": 2.970574768614533e-05, "loss": 0.1322, "step": 5153 }, { "epoch": 0.09127495384451911, "grad_norm": 1.1596258878707886, "learning_rate": 2.970557808319318e-05, "loss": 0.1109, "step": 5154 }, { "epoch": 0.09129266338154755, "grad_norm": 0.9552448987960815, "learning_rate": 2.9705408431861165e-05, "loss": 0.1274, "step": 5155 }, { "epoch": 0.09131037291857598, "grad_norm": 1.3895463943481445, "learning_rate": 2.9705238732149844e-05, "loss": 0.0939, "step": 5156 }, { "epoch": 0.0913280824556044, "grad_norm": 1.0447492599487305, "learning_rate": 2.9705068984059775e-05, "loss": 0.1199, "step": 5157 }, { "epoch": 0.09134579199263283, "grad_norm": 1.758507251739502, "learning_rate": 2.9704899187591518e-05, "loss": 0.1338, "step": 5158 }, { "epoch": 0.09136350152966126, "grad_norm": 1.1512610912322998, "learning_rate": 2.9704729342745627e-05, "loss": 0.0963, "step": 5159 }, { "epoch": 0.09138121106668969, "grad_norm": 1.7027982473373413, "learning_rate": 2.9704559449522667e-05, "loss": 0.1207, "step": 5160 }, { "epoch": 0.09139892060371811, "grad_norm": 0.7936742901802063, "learning_rate": 2.970438950792319e-05, "loss": 0.1544, "step": 5161 }, { "epoch": 0.09141663014074655, "grad_norm": 1.334448218345642, "learning_rate": 2.9704219517947763e-05, "loss": 0.1064, "step": 5162 }, { "epoch": 0.09143433967777498, "grad_norm": 1.0943104028701782, "learning_rate": 2.970404947959694e-05, "loss": 0.1355, "step": 5163 }, { "epoch": 0.0914520492148034, "grad_norm": 1.0099825859069824, "learning_rate": 2.970387939287128e-05, "loss": 0.1232, "step": 5164 }, { "epoch": 0.09146975875183183, "grad_norm": 1.3963637351989746, "learning_rate": 2.9703709257771343e-05, "loss": 0.1416, "step": 5165 }, { "epoch": 0.09148746828886026, "grad_norm": 1.5523312091827393, "learning_rate": 2.9703539074297693e-05, "loss": 0.1337, "step": 5166 }, { "epoch": 0.09150517782588868, "grad_norm": 4.4363322257995605, "learning_rate": 2.9703368842450882e-05, "loss": 0.1576, "step": 5167 }, { "epoch": 0.09152288736291711, "grad_norm": 1.1690157651901245, "learning_rate": 2.9703198562231476e-05, "loss": 0.1335, "step": 5168 }, { "epoch": 0.09154059689994554, "grad_norm": 1.457483172416687, "learning_rate": 2.970302823364004e-05, "loss": 0.1542, "step": 5169 }, { "epoch": 0.09155830643697398, "grad_norm": 1.2615593671798706, "learning_rate": 2.970285785667712e-05, "loss": 0.1552, "step": 5170 }, { "epoch": 0.0915760159740024, "grad_norm": 1.158553123474121, "learning_rate": 2.970268743134329e-05, "loss": 0.1137, "step": 5171 }, { "epoch": 0.09159372551103083, "grad_norm": 0.9328778386116028, "learning_rate": 2.9702516957639102e-05, "loss": 0.1248, "step": 5172 }, { "epoch": 0.09161143504805926, "grad_norm": 1.2281171083450317, "learning_rate": 2.9702346435565125e-05, "loss": 0.0991, "step": 5173 }, { "epoch": 0.09162914458508768, "grad_norm": 1.5180031061172485, "learning_rate": 2.9702175865121912e-05, "loss": 0.1528, "step": 5174 }, { "epoch": 0.09164685412211611, "grad_norm": 1.5170109272003174, "learning_rate": 2.9702005246310027e-05, "loss": 0.1625, "step": 5175 }, { "epoch": 0.09166456365914454, "grad_norm": 2.0194265842437744, "learning_rate": 2.9701834579130033e-05, "loss": 0.1771, "step": 5176 }, { "epoch": 0.09168227319617298, "grad_norm": 1.0218149423599243, "learning_rate": 2.970166386358249e-05, "loss": 0.1324, "step": 5177 }, { "epoch": 0.0916999827332014, "grad_norm": 1.4353700876235962, "learning_rate": 2.970149309966796e-05, "loss": 0.1175, "step": 5178 }, { "epoch": 0.09171769227022983, "grad_norm": 1.520491361618042, "learning_rate": 2.9701322287387007e-05, "loss": 0.1755, "step": 5179 }, { "epoch": 0.09173540180725825, "grad_norm": 1.1359282732009888, "learning_rate": 2.970115142674019e-05, "loss": 0.1468, "step": 5180 }, { "epoch": 0.09175311134428668, "grad_norm": 0.93144291639328, "learning_rate": 2.9700980517728063e-05, "loss": 0.1376, "step": 5181 }, { "epoch": 0.09177082088131511, "grad_norm": 1.406034231185913, "learning_rate": 2.9700809560351207e-05, "loss": 0.1174, "step": 5182 }, { "epoch": 0.09178853041834353, "grad_norm": 1.6732873916625977, "learning_rate": 2.970063855461017e-05, "loss": 0.1713, "step": 5183 }, { "epoch": 0.09180623995537196, "grad_norm": 1.319818139076233, "learning_rate": 2.9700467500505522e-05, "loss": 0.1151, "step": 5184 }, { "epoch": 0.0918239494924004, "grad_norm": 1.5099998712539673, "learning_rate": 2.970029639803782e-05, "loss": 0.1725, "step": 5185 }, { "epoch": 0.09184165902942883, "grad_norm": 1.2500121593475342, "learning_rate": 2.9700125247207632e-05, "loss": 0.1708, "step": 5186 }, { "epoch": 0.09185936856645725, "grad_norm": 1.2667235136032104, "learning_rate": 2.9699954048015517e-05, "loss": 0.126, "step": 5187 }, { "epoch": 0.09187707810348568, "grad_norm": 1.076453447341919, "learning_rate": 2.969978280046204e-05, "loss": 0.0987, "step": 5188 }, { "epoch": 0.0918947876405141, "grad_norm": 1.5542370080947876, "learning_rate": 2.9699611504547765e-05, "loss": 0.1578, "step": 5189 }, { "epoch": 0.09191249717754253, "grad_norm": 0.9713289737701416, "learning_rate": 2.9699440160273255e-05, "loss": 0.1414, "step": 5190 }, { "epoch": 0.09193020671457096, "grad_norm": 1.1330111026763916, "learning_rate": 2.9699268767639073e-05, "loss": 0.1297, "step": 5191 }, { "epoch": 0.0919479162515994, "grad_norm": 1.3411328792572021, "learning_rate": 2.9699097326645784e-05, "loss": 0.1332, "step": 5192 }, { "epoch": 0.09196562578862782, "grad_norm": 1.3923802375793457, "learning_rate": 2.9698925837293952e-05, "loss": 0.1366, "step": 5193 }, { "epoch": 0.09198333532565625, "grad_norm": 2.2098169326782227, "learning_rate": 2.969875429958414e-05, "loss": 0.1063, "step": 5194 }, { "epoch": 0.09200104486268468, "grad_norm": 0.9233964681625366, "learning_rate": 2.9698582713516914e-05, "loss": 0.1918, "step": 5195 }, { "epoch": 0.0920187543997131, "grad_norm": 1.5639630556106567, "learning_rate": 2.9698411079092833e-05, "loss": 0.1299, "step": 5196 }, { "epoch": 0.09203646393674153, "grad_norm": 1.5365492105484009, "learning_rate": 2.9698239396312472e-05, "loss": 0.1496, "step": 5197 }, { "epoch": 0.09205417347376996, "grad_norm": 1.1664761304855347, "learning_rate": 2.969806766517639e-05, "loss": 0.1311, "step": 5198 }, { "epoch": 0.0920718830107984, "grad_norm": 1.0613876581192017, "learning_rate": 2.9697895885685146e-05, "loss": 0.1339, "step": 5199 }, { "epoch": 0.09208959254782682, "grad_norm": 1.7233484983444214, "learning_rate": 2.9697724057839317e-05, "loss": 0.1115, "step": 5200 }, { "epoch": 0.09210730208485525, "grad_norm": 1.8987834453582764, "learning_rate": 2.969755218163946e-05, "loss": 0.1657, "step": 5201 }, { "epoch": 0.09212501162188368, "grad_norm": 1.0829293727874756, "learning_rate": 2.9697380257086144e-05, "loss": 0.1306, "step": 5202 }, { "epoch": 0.0921427211589121, "grad_norm": 1.0256595611572266, "learning_rate": 2.969720828417993e-05, "loss": 0.1061, "step": 5203 }, { "epoch": 0.09216043069594053, "grad_norm": 1.5981892347335815, "learning_rate": 2.9697036262921394e-05, "loss": 0.1431, "step": 5204 }, { "epoch": 0.09217814023296896, "grad_norm": 1.3147451877593994, "learning_rate": 2.969686419331109e-05, "loss": 0.1485, "step": 5205 }, { "epoch": 0.09219584976999738, "grad_norm": 1.4229836463928223, "learning_rate": 2.969669207534959e-05, "loss": 0.1571, "step": 5206 }, { "epoch": 0.09221355930702582, "grad_norm": 1.052575945854187, "learning_rate": 2.969651990903746e-05, "loss": 0.1654, "step": 5207 }, { "epoch": 0.09223126884405425, "grad_norm": 1.4520347118377686, "learning_rate": 2.9696347694375266e-05, "loss": 0.143, "step": 5208 }, { "epoch": 0.09224897838108267, "grad_norm": 3.7238047122955322, "learning_rate": 2.9696175431363577e-05, "loss": 0.1566, "step": 5209 }, { "epoch": 0.0922666879181111, "grad_norm": 1.4772791862487793, "learning_rate": 2.9696003120002952e-05, "loss": 0.1704, "step": 5210 }, { "epoch": 0.09228439745513953, "grad_norm": 2.293372869491577, "learning_rate": 2.9695830760293968e-05, "loss": 0.1688, "step": 5211 }, { "epoch": 0.09230210699216795, "grad_norm": 1.1026763916015625, "learning_rate": 2.9695658352237187e-05, "loss": 0.1514, "step": 5212 }, { "epoch": 0.09231981652919638, "grad_norm": 0.9384638071060181, "learning_rate": 2.9695485895833175e-05, "loss": 0.1357, "step": 5213 }, { "epoch": 0.09233752606622482, "grad_norm": 0.9830913543701172, "learning_rate": 2.96953133910825e-05, "loss": 0.1464, "step": 5214 }, { "epoch": 0.09235523560325325, "grad_norm": 0.9411222338676453, "learning_rate": 2.9695140837985726e-05, "loss": 0.1123, "step": 5215 }, { "epoch": 0.09237294514028167, "grad_norm": 1.113219976425171, "learning_rate": 2.9694968236543436e-05, "loss": 0.1103, "step": 5216 }, { "epoch": 0.0923906546773101, "grad_norm": 1.7185412645339966, "learning_rate": 2.9694795586756177e-05, "loss": 0.1741, "step": 5217 }, { "epoch": 0.09240836421433853, "grad_norm": 1.753105878829956, "learning_rate": 2.9694622888624534e-05, "loss": 0.1144, "step": 5218 }, { "epoch": 0.09242607375136695, "grad_norm": 1.334687352180481, "learning_rate": 2.9694450142149064e-05, "loss": 0.139, "step": 5219 }, { "epoch": 0.09244378328839538, "grad_norm": 1.5604006052017212, "learning_rate": 2.9694277347330337e-05, "loss": 0.1008, "step": 5220 }, { "epoch": 0.0924614928254238, "grad_norm": 1.4100062847137451, "learning_rate": 2.9694104504168927e-05, "loss": 0.1007, "step": 5221 }, { "epoch": 0.09247920236245225, "grad_norm": 1.4391629695892334, "learning_rate": 2.96939316126654e-05, "loss": 0.1538, "step": 5222 }, { "epoch": 0.09249691189948067, "grad_norm": 1.7789273262023926, "learning_rate": 2.9693758672820322e-05, "loss": 0.099, "step": 5223 }, { "epoch": 0.0925146214365091, "grad_norm": 0.9052527546882629, "learning_rate": 2.9693585684634267e-05, "loss": 0.1516, "step": 5224 }, { "epoch": 0.09253233097353752, "grad_norm": 1.1262922286987305, "learning_rate": 2.9693412648107797e-05, "loss": 0.1458, "step": 5225 }, { "epoch": 0.09255004051056595, "grad_norm": 1.4535257816314697, "learning_rate": 2.969323956324149e-05, "loss": 0.1504, "step": 5226 }, { "epoch": 0.09256775004759438, "grad_norm": 1.2954686880111694, "learning_rate": 2.9693066430035906e-05, "loss": 0.1667, "step": 5227 }, { "epoch": 0.0925854595846228, "grad_norm": 1.4777069091796875, "learning_rate": 2.9692893248491622e-05, "loss": 0.1416, "step": 5228 }, { "epoch": 0.09260316912165124, "grad_norm": 1.1591942310333252, "learning_rate": 2.9692720018609208e-05, "loss": 0.1227, "step": 5229 }, { "epoch": 0.09262087865867967, "grad_norm": 1.4259960651397705, "learning_rate": 2.969254674038923e-05, "loss": 0.1486, "step": 5230 }, { "epoch": 0.0926385881957081, "grad_norm": 1.4408594369888306, "learning_rate": 2.9692373413832258e-05, "loss": 0.1925, "step": 5231 }, { "epoch": 0.09265629773273652, "grad_norm": 1.9857113361358643, "learning_rate": 2.969220003893886e-05, "loss": 0.1113, "step": 5232 }, { "epoch": 0.09267400726976495, "grad_norm": 1.96419358253479, "learning_rate": 2.969202661570962e-05, "loss": 0.1356, "step": 5233 }, { "epoch": 0.09269171680679338, "grad_norm": 0.9589937329292297, "learning_rate": 2.969185314414509e-05, "loss": 0.1424, "step": 5234 }, { "epoch": 0.0927094263438218, "grad_norm": 1.1334874629974365, "learning_rate": 2.969167962424585e-05, "loss": 0.157, "step": 5235 }, { "epoch": 0.09272713588085023, "grad_norm": 1.2422380447387695, "learning_rate": 2.969150605601247e-05, "loss": 0.1526, "step": 5236 }, { "epoch": 0.09274484541787867, "grad_norm": 1.5223921537399292, "learning_rate": 2.9691332439445524e-05, "loss": 0.1627, "step": 5237 }, { "epoch": 0.0927625549549071, "grad_norm": 1.339275598526001, "learning_rate": 2.969115877454558e-05, "loss": 0.116, "step": 5238 }, { "epoch": 0.09278026449193552, "grad_norm": 1.1978681087493896, "learning_rate": 2.9690985061313208e-05, "loss": 0.1631, "step": 5239 }, { "epoch": 0.09279797402896395, "grad_norm": 2.0119245052337646, "learning_rate": 2.969081129974898e-05, "loss": 0.1376, "step": 5240 }, { "epoch": 0.09281568356599237, "grad_norm": 1.6547729969024658, "learning_rate": 2.969063748985347e-05, "loss": 0.0952, "step": 5241 }, { "epoch": 0.0928333931030208, "grad_norm": 1.2397587299346924, "learning_rate": 2.969046363162725e-05, "loss": 0.2163, "step": 5242 }, { "epoch": 0.09285110264004923, "grad_norm": 1.0813202857971191, "learning_rate": 2.9690289725070888e-05, "loss": 0.0937, "step": 5243 }, { "epoch": 0.09286881217707767, "grad_norm": 0.7530693411827087, "learning_rate": 2.969011577018496e-05, "loss": 0.1047, "step": 5244 }, { "epoch": 0.0928865217141061, "grad_norm": 1.3556030988693237, "learning_rate": 2.968994176697004e-05, "loss": 0.1599, "step": 5245 }, { "epoch": 0.09290423125113452, "grad_norm": 1.3030503988265991, "learning_rate": 2.968976771542669e-05, "loss": 0.1667, "step": 5246 }, { "epoch": 0.09292194078816295, "grad_norm": 1.3340357542037964, "learning_rate": 2.9689593615555494e-05, "loss": 0.1733, "step": 5247 }, { "epoch": 0.09293965032519137, "grad_norm": 1.369158387184143, "learning_rate": 2.9689419467357018e-05, "loss": 0.1674, "step": 5248 }, { "epoch": 0.0929573598622198, "grad_norm": 0.9996693134307861, "learning_rate": 2.968924527083184e-05, "loss": 0.1399, "step": 5249 }, { "epoch": 0.09297506939924823, "grad_norm": 1.3340102434158325, "learning_rate": 2.968907102598053e-05, "loss": 0.1193, "step": 5250 }, { "epoch": 0.09299277893627665, "grad_norm": 1.904747486114502, "learning_rate": 2.9688896732803668e-05, "loss": 0.1763, "step": 5251 }, { "epoch": 0.09301048847330509, "grad_norm": 1.3906890153884888, "learning_rate": 2.968872239130181e-05, "loss": 0.1321, "step": 5252 }, { "epoch": 0.09302819801033352, "grad_norm": 1.8013070821762085, "learning_rate": 2.9688548001475552e-05, "loss": 0.1621, "step": 5253 }, { "epoch": 0.09304590754736194, "grad_norm": 1.8564362525939941, "learning_rate": 2.9688373563325456e-05, "loss": 0.1149, "step": 5254 }, { "epoch": 0.09306361708439037, "grad_norm": 1.0304638147354126, "learning_rate": 2.968819907685209e-05, "loss": 0.0849, "step": 5255 }, { "epoch": 0.0930813266214188, "grad_norm": 1.6451144218444824, "learning_rate": 2.9688024542056034e-05, "loss": 0.1114, "step": 5256 }, { "epoch": 0.09309903615844722, "grad_norm": 1.1554710865020752, "learning_rate": 2.968784995893787e-05, "loss": 0.1337, "step": 5257 }, { "epoch": 0.09311674569547565, "grad_norm": 1.688130259513855, "learning_rate": 2.9687675327498162e-05, "loss": 0.1157, "step": 5258 }, { "epoch": 0.09313445523250409, "grad_norm": 1.1226401329040527, "learning_rate": 2.968750064773749e-05, "loss": 0.1604, "step": 5259 }, { "epoch": 0.09315216476953252, "grad_norm": 1.1664412021636963, "learning_rate": 2.968732591965642e-05, "loss": 0.1679, "step": 5260 }, { "epoch": 0.09316987430656094, "grad_norm": 1.4430887699127197, "learning_rate": 2.968715114325554e-05, "loss": 0.1405, "step": 5261 }, { "epoch": 0.09318758384358937, "grad_norm": 1.4231537580490112, "learning_rate": 2.9686976318535413e-05, "loss": 0.1724, "step": 5262 }, { "epoch": 0.0932052933806178, "grad_norm": 1.636359691619873, "learning_rate": 2.9686801445496624e-05, "loss": 0.154, "step": 5263 }, { "epoch": 0.09322300291764622, "grad_norm": 1.0559548139572144, "learning_rate": 2.9686626524139738e-05, "loss": 0.1491, "step": 5264 }, { "epoch": 0.09324071245467465, "grad_norm": 1.0592069625854492, "learning_rate": 2.9686451554465342e-05, "loss": 0.1672, "step": 5265 }, { "epoch": 0.09325842199170309, "grad_norm": 1.0360233783721924, "learning_rate": 2.9686276536474e-05, "loss": 0.1444, "step": 5266 }, { "epoch": 0.09327613152873152, "grad_norm": 0.9267216324806213, "learning_rate": 2.9686101470166298e-05, "loss": 0.0938, "step": 5267 }, { "epoch": 0.09329384106575994, "grad_norm": 1.1528507471084595, "learning_rate": 2.968592635554281e-05, "loss": 0.1228, "step": 5268 }, { "epoch": 0.09331155060278837, "grad_norm": 0.8796104192733765, "learning_rate": 2.9685751192604107e-05, "loss": 0.0991, "step": 5269 }, { "epoch": 0.0933292601398168, "grad_norm": 0.7797181606292725, "learning_rate": 2.9685575981350764e-05, "loss": 0.1415, "step": 5270 }, { "epoch": 0.09334696967684522, "grad_norm": 1.1472158432006836, "learning_rate": 2.9685400721783363e-05, "loss": 0.147, "step": 5271 }, { "epoch": 0.09336467921387365, "grad_norm": 1.423147439956665, "learning_rate": 2.9685225413902487e-05, "loss": 0.1216, "step": 5272 }, { "epoch": 0.09338238875090207, "grad_norm": 1.110607624053955, "learning_rate": 2.9685050057708695e-05, "loss": 0.1223, "step": 5273 }, { "epoch": 0.09340009828793051, "grad_norm": 1.5093573331832886, "learning_rate": 2.9684874653202576e-05, "loss": 0.1623, "step": 5274 }, { "epoch": 0.09341780782495894, "grad_norm": 1.5783777236938477, "learning_rate": 2.9684699200384706e-05, "loss": 0.1822, "step": 5275 }, { "epoch": 0.09343551736198737, "grad_norm": 1.131347417831421, "learning_rate": 2.9684523699255662e-05, "loss": 0.0861, "step": 5276 }, { "epoch": 0.09345322689901579, "grad_norm": 0.9912177324295044, "learning_rate": 2.968434814981602e-05, "loss": 0.1194, "step": 5277 }, { "epoch": 0.09347093643604422, "grad_norm": 1.24705970287323, "learning_rate": 2.9684172552066357e-05, "loss": 0.1524, "step": 5278 }, { "epoch": 0.09348864597307265, "grad_norm": 1.10701322555542, "learning_rate": 2.9683996906007252e-05, "loss": 0.1547, "step": 5279 }, { "epoch": 0.09350635551010107, "grad_norm": 1.4732701778411865, "learning_rate": 2.968382121163928e-05, "loss": 0.1539, "step": 5280 }, { "epoch": 0.09352406504712951, "grad_norm": 2.436929702758789, "learning_rate": 2.968364546896302e-05, "loss": 0.1294, "step": 5281 }, { "epoch": 0.09354177458415794, "grad_norm": 0.9474748373031616, "learning_rate": 2.968346967797906e-05, "loss": 0.1157, "step": 5282 }, { "epoch": 0.09355948412118636, "grad_norm": 1.5840034484863281, "learning_rate": 2.9683293838687963e-05, "loss": 0.1062, "step": 5283 }, { "epoch": 0.09357719365821479, "grad_norm": 1.0756864547729492, "learning_rate": 2.9683117951090315e-05, "loss": 0.0832, "step": 5284 }, { "epoch": 0.09359490319524322, "grad_norm": 1.5909852981567383, "learning_rate": 2.9682942015186696e-05, "loss": 0.1216, "step": 5285 }, { "epoch": 0.09361261273227164, "grad_norm": 1.5391371250152588, "learning_rate": 2.968276603097768e-05, "loss": 0.1142, "step": 5286 }, { "epoch": 0.09363032226930007, "grad_norm": 1.1740012168884277, "learning_rate": 2.968258999846385e-05, "loss": 0.1519, "step": 5287 }, { "epoch": 0.0936480318063285, "grad_norm": 1.863857626914978, "learning_rate": 2.968241391764579e-05, "loss": 0.158, "step": 5288 }, { "epoch": 0.09366574134335694, "grad_norm": 1.2903904914855957, "learning_rate": 2.9682237788524065e-05, "loss": 0.1226, "step": 5289 }, { "epoch": 0.09368345088038536, "grad_norm": 1.4251604080200195, "learning_rate": 2.9682061611099264e-05, "loss": 0.1214, "step": 5290 }, { "epoch": 0.09370116041741379, "grad_norm": 1.9923607110977173, "learning_rate": 2.9681885385371968e-05, "loss": 0.105, "step": 5291 }, { "epoch": 0.09371886995444222, "grad_norm": 1.1500986814498901, "learning_rate": 2.968170911134275e-05, "loss": 0.1514, "step": 5292 }, { "epoch": 0.09373657949147064, "grad_norm": 1.0667351484298706, "learning_rate": 2.9681532789012198e-05, "loss": 0.1078, "step": 5293 }, { "epoch": 0.09375428902849907, "grad_norm": 1.1767994165420532, "learning_rate": 2.9681356418380886e-05, "loss": 0.1165, "step": 5294 }, { "epoch": 0.0937719985655275, "grad_norm": 1.6714404821395874, "learning_rate": 2.96811799994494e-05, "loss": 0.1241, "step": 5295 }, { "epoch": 0.09378970810255594, "grad_norm": 1.5032529830932617, "learning_rate": 2.9681003532218313e-05, "loss": 0.1099, "step": 5296 }, { "epoch": 0.09380741763958436, "grad_norm": 1.847987174987793, "learning_rate": 2.968082701668821e-05, "loss": 0.1318, "step": 5297 }, { "epoch": 0.09382512717661279, "grad_norm": 1.2934279441833496, "learning_rate": 2.968065045285967e-05, "loss": 0.1053, "step": 5298 }, { "epoch": 0.09384283671364121, "grad_norm": 1.1339486837387085, "learning_rate": 2.9680473840733276e-05, "loss": 0.1343, "step": 5299 }, { "epoch": 0.09386054625066964, "grad_norm": 1.2210029363632202, "learning_rate": 2.968029718030961e-05, "loss": 0.1386, "step": 5300 }, { "epoch": 0.09387825578769807, "grad_norm": 1.320026159286499, "learning_rate": 2.9680120471589248e-05, "loss": 0.14, "step": 5301 }, { "epoch": 0.0938959653247265, "grad_norm": 1.060840129852295, "learning_rate": 2.9679943714572777e-05, "loss": 0.1377, "step": 5302 }, { "epoch": 0.09391367486175492, "grad_norm": 1.0308444499969482, "learning_rate": 2.9679766909260772e-05, "loss": 0.1132, "step": 5303 }, { "epoch": 0.09393138439878336, "grad_norm": 1.570163607597351, "learning_rate": 2.967959005565382e-05, "loss": 0.126, "step": 5304 }, { "epoch": 0.09394909393581179, "grad_norm": 1.3146661520004272, "learning_rate": 2.9679413153752507e-05, "loss": 0.1675, "step": 5305 }, { "epoch": 0.09396680347284021, "grad_norm": 1.7394633293151855, "learning_rate": 2.9679236203557403e-05, "loss": 0.1634, "step": 5306 }, { "epoch": 0.09398451300986864, "grad_norm": 1.463675856590271, "learning_rate": 2.9679059205069095e-05, "loss": 0.1138, "step": 5307 }, { "epoch": 0.09400222254689707, "grad_norm": 1.233718752861023, "learning_rate": 2.9678882158288173e-05, "loss": 0.1633, "step": 5308 }, { "epoch": 0.09401993208392549, "grad_norm": 1.4351762533187866, "learning_rate": 2.9678705063215215e-05, "loss": 0.1502, "step": 5309 }, { "epoch": 0.09403764162095392, "grad_norm": 1.3030091524124146, "learning_rate": 2.96785279198508e-05, "loss": 0.1285, "step": 5310 }, { "epoch": 0.09405535115798236, "grad_norm": 1.240120768547058, "learning_rate": 2.9678350728195506e-05, "loss": 0.1423, "step": 5311 }, { "epoch": 0.09407306069501079, "grad_norm": 1.1808626651763916, "learning_rate": 2.967817348824993e-05, "loss": 0.1461, "step": 5312 }, { "epoch": 0.09409077023203921, "grad_norm": 2.0450613498687744, "learning_rate": 2.9677996200014642e-05, "loss": 0.1922, "step": 5313 }, { "epoch": 0.09410847976906764, "grad_norm": 1.1320427656173706, "learning_rate": 2.9677818863490233e-05, "loss": 0.1185, "step": 5314 }, { "epoch": 0.09412618930609606, "grad_norm": 1.143824815750122, "learning_rate": 2.9677641478677286e-05, "loss": 0.1447, "step": 5315 }, { "epoch": 0.09414389884312449, "grad_norm": 1.132852554321289, "learning_rate": 2.9677464045576383e-05, "loss": 0.175, "step": 5316 }, { "epoch": 0.09416160838015292, "grad_norm": 1.0708125829696655, "learning_rate": 2.9677286564188107e-05, "loss": 0.1332, "step": 5317 }, { "epoch": 0.09417931791718134, "grad_norm": 1.3524558544158936, "learning_rate": 2.9677109034513047e-05, "loss": 0.1636, "step": 5318 }, { "epoch": 0.09419702745420978, "grad_norm": 1.5819835662841797, "learning_rate": 2.967693145655178e-05, "loss": 0.1236, "step": 5319 }, { "epoch": 0.09421473699123821, "grad_norm": 1.8552273511886597, "learning_rate": 2.9676753830304892e-05, "loss": 0.1199, "step": 5320 }, { "epoch": 0.09423244652826664, "grad_norm": 0.997168779373169, "learning_rate": 2.9676576155772968e-05, "loss": 0.074, "step": 5321 }, { "epoch": 0.09425015606529506, "grad_norm": 1.0451633930206299, "learning_rate": 2.9676398432956595e-05, "loss": 0.1348, "step": 5322 }, { "epoch": 0.09426786560232349, "grad_norm": 1.368035078048706, "learning_rate": 2.9676220661856353e-05, "loss": 0.1366, "step": 5323 }, { "epoch": 0.09428557513935192, "grad_norm": 1.125442624092102, "learning_rate": 2.9676042842472834e-05, "loss": 0.1618, "step": 5324 }, { "epoch": 0.09430328467638034, "grad_norm": 1.0396016836166382, "learning_rate": 2.967586497480661e-05, "loss": 0.1113, "step": 5325 }, { "epoch": 0.09432099421340878, "grad_norm": 1.1314297914505005, "learning_rate": 2.9675687058858282e-05, "loss": 0.12, "step": 5326 }, { "epoch": 0.09433870375043721, "grad_norm": 1.2934635877609253, "learning_rate": 2.9675509094628422e-05, "loss": 0.0973, "step": 5327 }, { "epoch": 0.09435641328746563, "grad_norm": 1.4181987047195435, "learning_rate": 2.9675331082117627e-05, "loss": 0.1418, "step": 5328 }, { "epoch": 0.09437412282449406, "grad_norm": 1.708583116531372, "learning_rate": 2.9675153021326475e-05, "loss": 0.1424, "step": 5329 }, { "epoch": 0.09439183236152249, "grad_norm": 0.8642504215240479, "learning_rate": 2.9674974912255553e-05, "loss": 0.1018, "step": 5330 }, { "epoch": 0.09440954189855091, "grad_norm": 1.5761839151382446, "learning_rate": 2.9674796754905448e-05, "loss": 0.139, "step": 5331 }, { "epoch": 0.09442725143557934, "grad_norm": 1.5929316282272339, "learning_rate": 2.9674618549276748e-05, "loss": 0.1756, "step": 5332 }, { "epoch": 0.09444496097260778, "grad_norm": 1.127227783203125, "learning_rate": 2.967444029537004e-05, "loss": 0.121, "step": 5333 }, { "epoch": 0.0944626705096362, "grad_norm": 0.945132315158844, "learning_rate": 2.9674261993185902e-05, "loss": 0.1164, "step": 5334 }, { "epoch": 0.09448038004666463, "grad_norm": 0.9970144033432007, "learning_rate": 2.967408364272493e-05, "loss": 0.1113, "step": 5335 }, { "epoch": 0.09449808958369306, "grad_norm": 1.4549297094345093, "learning_rate": 2.96739052439877e-05, "loss": 0.1177, "step": 5336 }, { "epoch": 0.09451579912072149, "grad_norm": 1.160865306854248, "learning_rate": 2.9673726796974812e-05, "loss": 0.1383, "step": 5337 }, { "epoch": 0.09453350865774991, "grad_norm": 1.2884896993637085, "learning_rate": 2.967354830168685e-05, "loss": 0.109, "step": 5338 }, { "epoch": 0.09455121819477834, "grad_norm": 0.996896505355835, "learning_rate": 2.967336975812439e-05, "loss": 0.1019, "step": 5339 }, { "epoch": 0.09456892773180677, "grad_norm": 0.8281222581863403, "learning_rate": 2.9673191166288032e-05, "loss": 0.1694, "step": 5340 }, { "epoch": 0.0945866372688352, "grad_norm": 1.5139265060424805, "learning_rate": 2.967301252617836e-05, "loss": 0.1561, "step": 5341 }, { "epoch": 0.09460434680586363, "grad_norm": 1.8000692129135132, "learning_rate": 2.967283383779596e-05, "loss": 0.1443, "step": 5342 }, { "epoch": 0.09462205634289206, "grad_norm": 1.6169003248214722, "learning_rate": 2.9672655101141417e-05, "loss": 0.1189, "step": 5343 }, { "epoch": 0.09463976587992048, "grad_norm": 1.1311217546463013, "learning_rate": 2.9672476316215328e-05, "loss": 0.1285, "step": 5344 }, { "epoch": 0.09465747541694891, "grad_norm": 1.193833827972412, "learning_rate": 2.9672297483018277e-05, "loss": 0.1731, "step": 5345 }, { "epoch": 0.09467518495397734, "grad_norm": 1.2079817056655884, "learning_rate": 2.9672118601550845e-05, "loss": 0.1513, "step": 5346 }, { "epoch": 0.09469289449100576, "grad_norm": 1.111327052116394, "learning_rate": 2.967193967181363e-05, "loss": 0.1578, "step": 5347 }, { "epoch": 0.0947106040280342, "grad_norm": 1.618686556816101, "learning_rate": 2.9671760693807218e-05, "loss": 0.2013, "step": 5348 }, { "epoch": 0.09472831356506263, "grad_norm": 0.8521748185157776, "learning_rate": 2.96715816675322e-05, "loss": 0.156, "step": 5349 }, { "epoch": 0.09474602310209106, "grad_norm": 1.2847718000411987, "learning_rate": 2.9671402592989156e-05, "loss": 0.148, "step": 5350 }, { "epoch": 0.09476373263911948, "grad_norm": 1.9349853992462158, "learning_rate": 2.9671223470178686e-05, "loss": 0.1517, "step": 5351 }, { "epoch": 0.09478144217614791, "grad_norm": 1.5833066701889038, "learning_rate": 2.9671044299101372e-05, "loss": 0.1188, "step": 5352 }, { "epoch": 0.09479915171317634, "grad_norm": 1.1274147033691406, "learning_rate": 2.967086507975781e-05, "loss": 0.0959, "step": 5353 }, { "epoch": 0.09481686125020476, "grad_norm": 0.8655201196670532, "learning_rate": 2.9670685812148582e-05, "loss": 0.1188, "step": 5354 }, { "epoch": 0.09483457078723319, "grad_norm": 1.0614430904388428, "learning_rate": 2.9670506496274283e-05, "loss": 0.1272, "step": 5355 }, { "epoch": 0.09485228032426163, "grad_norm": 1.1354295015335083, "learning_rate": 2.96703271321355e-05, "loss": 0.1037, "step": 5356 }, { "epoch": 0.09486998986129005, "grad_norm": 1.126366376876831, "learning_rate": 2.9670147719732827e-05, "loss": 0.1633, "step": 5357 }, { "epoch": 0.09488769939831848, "grad_norm": 1.1119418144226074, "learning_rate": 2.9669968259066848e-05, "loss": 0.151, "step": 5358 }, { "epoch": 0.09490540893534691, "grad_norm": 1.0498406887054443, "learning_rate": 2.966978875013816e-05, "loss": 0.122, "step": 5359 }, { "epoch": 0.09492311847237533, "grad_norm": 1.2867498397827148, "learning_rate": 2.9669609192947352e-05, "loss": 0.1129, "step": 5360 }, { "epoch": 0.09494082800940376, "grad_norm": 1.7988253831863403, "learning_rate": 2.9669429587495015e-05, "loss": 0.1403, "step": 5361 }, { "epoch": 0.09495853754643219, "grad_norm": 1.38666832447052, "learning_rate": 2.9669249933781736e-05, "loss": 0.1529, "step": 5362 }, { "epoch": 0.09497624708346063, "grad_norm": 1.1894513368606567, "learning_rate": 2.9669070231808108e-05, "loss": 0.1908, "step": 5363 }, { "epoch": 0.09499395662048905, "grad_norm": 1.5779013633728027, "learning_rate": 2.966889048157472e-05, "loss": 0.1634, "step": 5364 }, { "epoch": 0.09501166615751748, "grad_norm": 1.1053965091705322, "learning_rate": 2.9668710683082168e-05, "loss": 0.1651, "step": 5365 }, { "epoch": 0.0950293756945459, "grad_norm": 1.5701082944869995, "learning_rate": 2.9668530836331043e-05, "loss": 0.123, "step": 5366 }, { "epoch": 0.09504708523157433, "grad_norm": 1.5180083513259888, "learning_rate": 2.9668350941321933e-05, "loss": 0.15, "step": 5367 }, { "epoch": 0.09506479476860276, "grad_norm": 1.114882230758667, "learning_rate": 2.966817099805543e-05, "loss": 0.1753, "step": 5368 }, { "epoch": 0.09508250430563119, "grad_norm": 1.1219348907470703, "learning_rate": 2.9667991006532135e-05, "loss": 0.1026, "step": 5369 }, { "epoch": 0.09510021384265961, "grad_norm": 1.4143284559249878, "learning_rate": 2.966781096675263e-05, "loss": 0.165, "step": 5370 }, { "epoch": 0.09511792337968805, "grad_norm": 1.3075406551361084, "learning_rate": 2.9667630878717508e-05, "loss": 0.1552, "step": 5371 }, { "epoch": 0.09513563291671648, "grad_norm": 1.0974541902542114, "learning_rate": 2.9667450742427367e-05, "loss": 0.1399, "step": 5372 }, { "epoch": 0.0951533424537449, "grad_norm": 1.1887749433517456, "learning_rate": 2.9667270557882795e-05, "loss": 0.1665, "step": 5373 }, { "epoch": 0.09517105199077333, "grad_norm": 0.8809696435928345, "learning_rate": 2.9667090325084386e-05, "loss": 0.1182, "step": 5374 }, { "epoch": 0.09518876152780176, "grad_norm": 1.1259205341339111, "learning_rate": 2.9666910044032736e-05, "loss": 0.093, "step": 5375 }, { "epoch": 0.09520647106483018, "grad_norm": 1.6348780393600464, "learning_rate": 2.9666729714728435e-05, "loss": 0.1449, "step": 5376 }, { "epoch": 0.09522418060185861, "grad_norm": 1.9423201084136963, "learning_rate": 2.9666549337172068e-05, "loss": 0.1454, "step": 5377 }, { "epoch": 0.09524189013888705, "grad_norm": 0.9851625561714172, "learning_rate": 2.966636891136425e-05, "loss": 0.1371, "step": 5378 }, { "epoch": 0.09525959967591548, "grad_norm": 1.145267367362976, "learning_rate": 2.9666188437305555e-05, "loss": 0.1437, "step": 5379 }, { "epoch": 0.0952773092129439, "grad_norm": 1.2868443727493286, "learning_rate": 2.9666007914996583e-05, "loss": 0.128, "step": 5380 }, { "epoch": 0.09529501874997233, "grad_norm": 1.2463568449020386, "learning_rate": 2.966582734443793e-05, "loss": 0.1388, "step": 5381 }, { "epoch": 0.09531272828700076, "grad_norm": 0.9091947078704834, "learning_rate": 2.9665646725630187e-05, "loss": 0.1332, "step": 5382 }, { "epoch": 0.09533043782402918, "grad_norm": 1.916898250579834, "learning_rate": 2.966546605857395e-05, "loss": 0.1312, "step": 5383 }, { "epoch": 0.09534814736105761, "grad_norm": 1.0636879205703735, "learning_rate": 2.9665285343269817e-05, "loss": 0.1319, "step": 5384 }, { "epoch": 0.09536585689808603, "grad_norm": 0.9050979018211365, "learning_rate": 2.9665104579718374e-05, "loss": 0.1557, "step": 5385 }, { "epoch": 0.09538356643511448, "grad_norm": 1.316719889640808, "learning_rate": 2.9664923767920223e-05, "loss": 0.1427, "step": 5386 }, { "epoch": 0.0954012759721429, "grad_norm": 1.4866902828216553, "learning_rate": 2.9664742907875953e-05, "loss": 0.1509, "step": 5387 }, { "epoch": 0.09541898550917133, "grad_norm": 1.4602152109146118, "learning_rate": 2.9664561999586163e-05, "loss": 0.1561, "step": 5388 }, { "epoch": 0.09543669504619975, "grad_norm": 0.9353485107421875, "learning_rate": 2.966438104305145e-05, "loss": 0.1174, "step": 5389 }, { "epoch": 0.09545440458322818, "grad_norm": 0.7311817407608032, "learning_rate": 2.96642000382724e-05, "loss": 0.0924, "step": 5390 }, { "epoch": 0.09547211412025661, "grad_norm": 0.9656736254692078, "learning_rate": 2.9664018985249624e-05, "loss": 0.116, "step": 5391 }, { "epoch": 0.09548982365728503, "grad_norm": 1.6404712200164795, "learning_rate": 2.96638378839837e-05, "loss": 0.133, "step": 5392 }, { "epoch": 0.09550753319431347, "grad_norm": 1.413062572479248, "learning_rate": 2.966365673447524e-05, "loss": 0.1438, "step": 5393 }, { "epoch": 0.0955252427313419, "grad_norm": 1.3553190231323242, "learning_rate": 2.966347553672483e-05, "loss": 0.1331, "step": 5394 }, { "epoch": 0.09554295226837033, "grad_norm": 1.281958818435669, "learning_rate": 2.9663294290733066e-05, "loss": 0.1117, "step": 5395 }, { "epoch": 0.09556066180539875, "grad_norm": 1.3538645505905151, "learning_rate": 2.9663112996500547e-05, "loss": 0.1394, "step": 5396 }, { "epoch": 0.09557837134242718, "grad_norm": 1.3327478170394897, "learning_rate": 2.9662931654027873e-05, "loss": 0.1116, "step": 5397 }, { "epoch": 0.0955960808794556, "grad_norm": 1.6552129983901978, "learning_rate": 2.966275026331563e-05, "loss": 0.1376, "step": 5398 }, { "epoch": 0.09561379041648403, "grad_norm": 0.9718282222747803, "learning_rate": 2.966256882436443e-05, "loss": 0.1205, "step": 5399 }, { "epoch": 0.09563149995351247, "grad_norm": 1.3936522006988525, "learning_rate": 2.9662387337174858e-05, "loss": 0.1811, "step": 5400 }, { "epoch": 0.0956492094905409, "grad_norm": 1.5788564682006836, "learning_rate": 2.9662205801747516e-05, "loss": 0.1681, "step": 5401 }, { "epoch": 0.09566691902756932, "grad_norm": 0.8822656869888306, "learning_rate": 2.9662024218083e-05, "loss": 0.1225, "step": 5402 }, { "epoch": 0.09568462856459775, "grad_norm": 2.0324506759643555, "learning_rate": 2.9661842586181903e-05, "loss": 0.1413, "step": 5403 }, { "epoch": 0.09570233810162618, "grad_norm": 1.2438502311706543, "learning_rate": 2.966166090604483e-05, "loss": 0.1727, "step": 5404 }, { "epoch": 0.0957200476386546, "grad_norm": 1.2584152221679688, "learning_rate": 2.966147917767238e-05, "loss": 0.0949, "step": 5405 }, { "epoch": 0.09573775717568303, "grad_norm": 1.4735453128814697, "learning_rate": 2.966129740106514e-05, "loss": 0.1351, "step": 5406 }, { "epoch": 0.09575546671271146, "grad_norm": 1.7772603034973145, "learning_rate": 2.9661115576223718e-05, "loss": 0.0904, "step": 5407 }, { "epoch": 0.0957731762497399, "grad_norm": 1.6298998594284058, "learning_rate": 2.9660933703148703e-05, "loss": 0.1688, "step": 5408 }, { "epoch": 0.09579088578676832, "grad_norm": 1.607654333114624, "learning_rate": 2.9660751781840706e-05, "loss": 0.1273, "step": 5409 }, { "epoch": 0.09580859532379675, "grad_norm": 1.2720654010772705, "learning_rate": 2.9660569812300314e-05, "loss": 0.117, "step": 5410 }, { "epoch": 0.09582630486082518, "grad_norm": 0.9608213305473328, "learning_rate": 2.9660387794528135e-05, "loss": 0.1185, "step": 5411 }, { "epoch": 0.0958440143978536, "grad_norm": 1.555738091468811, "learning_rate": 2.966020572852476e-05, "loss": 0.1524, "step": 5412 }, { "epoch": 0.09586172393488203, "grad_norm": 1.0641443729400635, "learning_rate": 2.9660023614290787e-05, "loss": 0.1305, "step": 5413 }, { "epoch": 0.09587943347191046, "grad_norm": 1.336928367614746, "learning_rate": 2.965984145182682e-05, "loss": 0.1538, "step": 5414 }, { "epoch": 0.0958971430089389, "grad_norm": 0.9324845671653748, "learning_rate": 2.9659659241133463e-05, "loss": 0.1089, "step": 5415 }, { "epoch": 0.09591485254596732, "grad_norm": 1.0296528339385986, "learning_rate": 2.9659476982211305e-05, "loss": 0.1628, "step": 5416 }, { "epoch": 0.09593256208299575, "grad_norm": 1.2716792821884155, "learning_rate": 2.9659294675060955e-05, "loss": 0.1213, "step": 5417 }, { "epoch": 0.09595027162002417, "grad_norm": 1.4866323471069336, "learning_rate": 2.9659112319683006e-05, "loss": 0.1345, "step": 5418 }, { "epoch": 0.0959679811570526, "grad_norm": 1.0147864818572998, "learning_rate": 2.965892991607806e-05, "loss": 0.1403, "step": 5419 }, { "epoch": 0.09598569069408103, "grad_norm": 1.0185613632202148, "learning_rate": 2.9658747464246717e-05, "loss": 0.1398, "step": 5420 }, { "epoch": 0.09600340023110945, "grad_norm": 1.0349905490875244, "learning_rate": 2.9658564964189577e-05, "loss": 0.1287, "step": 5421 }, { "epoch": 0.09602110976813788, "grad_norm": 0.9737305045127869, "learning_rate": 2.965838241590724e-05, "loss": 0.1328, "step": 5422 }, { "epoch": 0.09603881930516632, "grad_norm": 1.5460221767425537, "learning_rate": 2.9658199819400307e-05, "loss": 0.1903, "step": 5423 }, { "epoch": 0.09605652884219475, "grad_norm": 1.3887660503387451, "learning_rate": 2.965801717466938e-05, "loss": 0.1557, "step": 5424 }, { "epoch": 0.09607423837922317, "grad_norm": 1.0691616535186768, "learning_rate": 2.965783448171506e-05, "loss": 0.1669, "step": 5425 }, { "epoch": 0.0960919479162516, "grad_norm": 1.5995172262191772, "learning_rate": 2.965765174053795e-05, "loss": 0.1517, "step": 5426 }, { "epoch": 0.09610965745328003, "grad_norm": 1.3001199960708618, "learning_rate": 2.9657468951138647e-05, "loss": 0.1093, "step": 5427 }, { "epoch": 0.09612736699030845, "grad_norm": 3.443268060684204, "learning_rate": 2.965728611351775e-05, "loss": 0.0976, "step": 5428 }, { "epoch": 0.09614507652733688, "grad_norm": 2.104219913482666, "learning_rate": 2.9657103227675866e-05, "loss": 0.1858, "step": 5429 }, { "epoch": 0.09616278606436532, "grad_norm": 1.4950249195098877, "learning_rate": 2.9656920293613595e-05, "loss": 0.1157, "step": 5430 }, { "epoch": 0.09618049560139375, "grad_norm": 1.4317305088043213, "learning_rate": 2.9656737311331542e-05, "loss": 0.1296, "step": 5431 }, { "epoch": 0.09619820513842217, "grad_norm": 0.9919546842575073, "learning_rate": 2.9656554280830298e-05, "loss": 0.1891, "step": 5432 }, { "epoch": 0.0962159146754506, "grad_norm": 1.3077336549758911, "learning_rate": 2.9656371202110478e-05, "loss": 0.1935, "step": 5433 }, { "epoch": 0.09623362421247902, "grad_norm": 1.4806963205337524, "learning_rate": 2.965618807517268e-05, "loss": 0.1588, "step": 5434 }, { "epoch": 0.09625133374950745, "grad_norm": 1.2385605573654175, "learning_rate": 2.9656004900017504e-05, "loss": 0.0979, "step": 5435 }, { "epoch": 0.09626904328653588, "grad_norm": 1.4775266647338867, "learning_rate": 2.9655821676645557e-05, "loss": 0.1051, "step": 5436 }, { "epoch": 0.0962867528235643, "grad_norm": 1.5305911302566528, "learning_rate": 2.9655638405057432e-05, "loss": 0.1495, "step": 5437 }, { "epoch": 0.09630446236059274, "grad_norm": 0.7453545331954956, "learning_rate": 2.9655455085253745e-05, "loss": 0.1423, "step": 5438 }, { "epoch": 0.09632217189762117, "grad_norm": 1.8960323333740234, "learning_rate": 2.965527171723509e-05, "loss": 0.1489, "step": 5439 }, { "epoch": 0.0963398814346496, "grad_norm": 1.2754379510879517, "learning_rate": 2.9655088301002076e-05, "loss": 0.1133, "step": 5440 }, { "epoch": 0.09635759097167802, "grad_norm": 1.7859687805175781, "learning_rate": 2.9654904836555303e-05, "loss": 0.1351, "step": 5441 }, { "epoch": 0.09637530050870645, "grad_norm": 0.9079410433769226, "learning_rate": 2.9654721323895374e-05, "loss": 0.1237, "step": 5442 }, { "epoch": 0.09639301004573488, "grad_norm": 1.5212758779525757, "learning_rate": 2.9654537763022898e-05, "loss": 0.1597, "step": 5443 }, { "epoch": 0.0964107195827633, "grad_norm": 1.5356436967849731, "learning_rate": 2.9654354153938466e-05, "loss": 0.1405, "step": 5444 }, { "epoch": 0.09642842911979174, "grad_norm": 1.4106526374816895, "learning_rate": 2.96541704966427e-05, "loss": 0.1444, "step": 5445 }, { "epoch": 0.09644613865682017, "grad_norm": 1.0231646299362183, "learning_rate": 2.9653986791136192e-05, "loss": 0.133, "step": 5446 }, { "epoch": 0.0964638481938486, "grad_norm": 1.257609486579895, "learning_rate": 2.965380303741955e-05, "loss": 0.1214, "step": 5447 }, { "epoch": 0.09648155773087702, "grad_norm": 0.9008616209030151, "learning_rate": 2.9653619235493376e-05, "loss": 0.1216, "step": 5448 }, { "epoch": 0.09649926726790545, "grad_norm": 1.0216684341430664, "learning_rate": 2.9653435385358282e-05, "loss": 0.112, "step": 5449 }, { "epoch": 0.09651697680493387, "grad_norm": 2.23734712600708, "learning_rate": 2.9653251487014866e-05, "loss": 0.181, "step": 5450 }, { "epoch": 0.0965346863419623, "grad_norm": 1.1576521396636963, "learning_rate": 2.965306754046373e-05, "loss": 0.14, "step": 5451 }, { "epoch": 0.09655239587899073, "grad_norm": 1.364982008934021, "learning_rate": 2.9652883545705488e-05, "loss": 0.1469, "step": 5452 }, { "epoch": 0.09657010541601917, "grad_norm": 0.9386643767356873, "learning_rate": 2.965269950274074e-05, "loss": 0.1095, "step": 5453 }, { "epoch": 0.0965878149530476, "grad_norm": 1.563585638999939, "learning_rate": 2.9652515411570093e-05, "loss": 0.1516, "step": 5454 }, { "epoch": 0.09660552449007602, "grad_norm": 1.611382246017456, "learning_rate": 2.965233127219415e-05, "loss": 0.1805, "step": 5455 }, { "epoch": 0.09662323402710445, "grad_norm": 1.188709020614624, "learning_rate": 2.965214708461352e-05, "loss": 0.1199, "step": 5456 }, { "epoch": 0.09664094356413287, "grad_norm": 1.1870836019515991, "learning_rate": 2.9651962848828812e-05, "loss": 0.1445, "step": 5457 }, { "epoch": 0.0966586531011613, "grad_norm": 1.136733055114746, "learning_rate": 2.9651778564840627e-05, "loss": 0.1795, "step": 5458 }, { "epoch": 0.09667636263818973, "grad_norm": 1.0280158519744873, "learning_rate": 2.9651594232649572e-05, "loss": 0.1546, "step": 5459 }, { "epoch": 0.09669407217521817, "grad_norm": 1.1547088623046875, "learning_rate": 2.9651409852256254e-05, "loss": 0.1024, "step": 5460 }, { "epoch": 0.09671178171224659, "grad_norm": 1.1647683382034302, "learning_rate": 2.9651225423661276e-05, "loss": 0.1022, "step": 5461 }, { "epoch": 0.09672949124927502, "grad_norm": 1.5508592128753662, "learning_rate": 2.965104094686525e-05, "loss": 0.1432, "step": 5462 }, { "epoch": 0.09674720078630344, "grad_norm": 1.0017565488815308, "learning_rate": 2.9650856421868782e-05, "loss": 0.1345, "step": 5463 }, { "epoch": 0.09676491032333187, "grad_norm": 1.305922269821167, "learning_rate": 2.965067184867248e-05, "loss": 0.1302, "step": 5464 }, { "epoch": 0.0967826198603603, "grad_norm": 1.338387370109558, "learning_rate": 2.9650487227276946e-05, "loss": 0.1126, "step": 5465 }, { "epoch": 0.09680032939738872, "grad_norm": 0.7153000831604004, "learning_rate": 2.9650302557682794e-05, "loss": 0.0925, "step": 5466 }, { "epoch": 0.09681803893441716, "grad_norm": 1.0823460817337036, "learning_rate": 2.9650117839890626e-05, "loss": 0.1153, "step": 5467 }, { "epoch": 0.09683574847144559, "grad_norm": 1.6544733047485352, "learning_rate": 2.964993307390105e-05, "loss": 0.1558, "step": 5468 }, { "epoch": 0.09685345800847402, "grad_norm": 0.9854970574378967, "learning_rate": 2.964974825971468e-05, "loss": 0.1173, "step": 5469 }, { "epoch": 0.09687116754550244, "grad_norm": 0.8065592050552368, "learning_rate": 2.964956339733212e-05, "loss": 0.0969, "step": 5470 }, { "epoch": 0.09688887708253087, "grad_norm": 1.1036999225616455, "learning_rate": 2.964937848675398e-05, "loss": 0.1742, "step": 5471 }, { "epoch": 0.0969065866195593, "grad_norm": 1.2934538125991821, "learning_rate": 2.964919352798086e-05, "loss": 0.1148, "step": 5472 }, { "epoch": 0.09692429615658772, "grad_norm": 2.411724805831909, "learning_rate": 2.9649008521013376e-05, "loss": 0.1776, "step": 5473 }, { "epoch": 0.09694200569361615, "grad_norm": 1.0763624906539917, "learning_rate": 2.964882346585214e-05, "loss": 0.1434, "step": 5474 }, { "epoch": 0.09695971523064459, "grad_norm": 1.5863600969314575, "learning_rate": 2.9648638362497755e-05, "loss": 0.171, "step": 5475 }, { "epoch": 0.09697742476767302, "grad_norm": 1.2404818534851074, "learning_rate": 2.964845321095083e-05, "loss": 0.1039, "step": 5476 }, { "epoch": 0.09699513430470144, "grad_norm": 1.2496472597122192, "learning_rate": 2.9648268011211976e-05, "loss": 0.1518, "step": 5477 }, { "epoch": 0.09701284384172987, "grad_norm": 1.1987366676330566, "learning_rate": 2.96480827632818e-05, "loss": 0.1611, "step": 5478 }, { "epoch": 0.0970305533787583, "grad_norm": 1.6774557828903198, "learning_rate": 2.9647897467160917e-05, "loss": 0.1559, "step": 5479 }, { "epoch": 0.09704826291578672, "grad_norm": 1.9779034852981567, "learning_rate": 2.9647712122849932e-05, "loss": 0.1821, "step": 5480 }, { "epoch": 0.09706597245281515, "grad_norm": 2.1432902812957764, "learning_rate": 2.9647526730349454e-05, "loss": 0.1497, "step": 5481 }, { "epoch": 0.09708368198984359, "grad_norm": 1.2317826747894287, "learning_rate": 2.9647341289660096e-05, "loss": 0.1393, "step": 5482 }, { "epoch": 0.09710139152687201, "grad_norm": 1.303497314453125, "learning_rate": 2.9647155800782465e-05, "loss": 0.1886, "step": 5483 }, { "epoch": 0.09711910106390044, "grad_norm": 0.7501279711723328, "learning_rate": 2.9646970263717176e-05, "loss": 0.1238, "step": 5484 }, { "epoch": 0.09713681060092887, "grad_norm": 1.2443548440933228, "learning_rate": 2.964678467846483e-05, "loss": 0.1412, "step": 5485 }, { "epoch": 0.09715452013795729, "grad_norm": 1.1655783653259277, "learning_rate": 2.964659904502605e-05, "loss": 0.1338, "step": 5486 }, { "epoch": 0.09717222967498572, "grad_norm": 1.6037629842758179, "learning_rate": 2.964641336340144e-05, "loss": 0.1752, "step": 5487 }, { "epoch": 0.09718993921201415, "grad_norm": 1.1823021173477173, "learning_rate": 2.964622763359161e-05, "loss": 0.1476, "step": 5488 }, { "epoch": 0.09720764874904257, "grad_norm": 0.9580889940261841, "learning_rate": 2.964604185559717e-05, "loss": 0.1245, "step": 5489 }, { "epoch": 0.09722535828607101, "grad_norm": 1.562277913093567, "learning_rate": 2.9645856029418735e-05, "loss": 0.1577, "step": 5490 }, { "epoch": 0.09724306782309944, "grad_norm": 1.5309100151062012, "learning_rate": 2.964567015505692e-05, "loss": 0.1663, "step": 5491 }, { "epoch": 0.09726077736012786, "grad_norm": 1.6301305294036865, "learning_rate": 2.964548423251233e-05, "loss": 0.1946, "step": 5492 }, { "epoch": 0.09727848689715629, "grad_norm": 1.605614185333252, "learning_rate": 2.964529826178557e-05, "loss": 0.1277, "step": 5493 }, { "epoch": 0.09729619643418472, "grad_norm": 1.3082597255706787, "learning_rate": 2.964511224287727e-05, "loss": 0.1443, "step": 5494 }, { "epoch": 0.09731390597121314, "grad_norm": 1.1660724878311157, "learning_rate": 2.9644926175788026e-05, "loss": 0.1257, "step": 5495 }, { "epoch": 0.09733161550824157, "grad_norm": 1.5450119972229004, "learning_rate": 2.964474006051846e-05, "loss": 0.1892, "step": 5496 }, { "epoch": 0.09734932504527001, "grad_norm": 0.9916715025901794, "learning_rate": 2.9644553897069178e-05, "loss": 0.0846, "step": 5497 }, { "epoch": 0.09736703458229844, "grad_norm": 1.1524407863616943, "learning_rate": 2.9644367685440796e-05, "loss": 0.1534, "step": 5498 }, { "epoch": 0.09738474411932686, "grad_norm": 2.055739402770996, "learning_rate": 2.9644181425633922e-05, "loss": 0.1675, "step": 5499 }, { "epoch": 0.09740245365635529, "grad_norm": 1.423177719116211, "learning_rate": 2.9643995117649176e-05, "loss": 0.0954, "step": 5500 }, { "epoch": 0.09742016319338372, "grad_norm": 0.9438083171844482, "learning_rate": 2.9643808761487167e-05, "loss": 0.1323, "step": 5501 }, { "epoch": 0.09743787273041214, "grad_norm": 1.482751488685608, "learning_rate": 2.964362235714851e-05, "loss": 0.1236, "step": 5502 }, { "epoch": 0.09745558226744057, "grad_norm": 1.1834604740142822, "learning_rate": 2.964343590463381e-05, "loss": 0.1205, "step": 5503 }, { "epoch": 0.097473291804469, "grad_norm": 1.3854187726974487, "learning_rate": 2.9643249403943692e-05, "loss": 0.1194, "step": 5504 }, { "epoch": 0.09749100134149744, "grad_norm": 1.286825180053711, "learning_rate": 2.9643062855078764e-05, "loss": 0.156, "step": 5505 }, { "epoch": 0.09750871087852586, "grad_norm": 2.0404279232025146, "learning_rate": 2.964287625803964e-05, "loss": 0.1154, "step": 5506 }, { "epoch": 0.09752642041555429, "grad_norm": 1.2157113552093506, "learning_rate": 2.9642689612826938e-05, "loss": 0.1147, "step": 5507 }, { "epoch": 0.09754412995258271, "grad_norm": 1.2679612636566162, "learning_rate": 2.964250291944126e-05, "loss": 0.1225, "step": 5508 }, { "epoch": 0.09756183948961114, "grad_norm": 0.8463433384895325, "learning_rate": 2.9642316177883234e-05, "loss": 0.1032, "step": 5509 }, { "epoch": 0.09757954902663957, "grad_norm": 1.1583935022354126, "learning_rate": 2.9642129388153466e-05, "loss": 0.0865, "step": 5510 }, { "epoch": 0.097597258563668, "grad_norm": 1.3426214456558228, "learning_rate": 2.9641942550252575e-05, "loss": 0.1529, "step": 5511 }, { "epoch": 0.09761496810069643, "grad_norm": 2.705657720565796, "learning_rate": 2.9641755664181175e-05, "loss": 0.1253, "step": 5512 }, { "epoch": 0.09763267763772486, "grad_norm": 1.3721076250076294, "learning_rate": 2.9641568729939875e-05, "loss": 0.1443, "step": 5513 }, { "epoch": 0.09765038717475329, "grad_norm": 1.46921706199646, "learning_rate": 2.9641381747529302e-05, "loss": 0.1667, "step": 5514 }, { "epoch": 0.09766809671178171, "grad_norm": 1.254006028175354, "learning_rate": 2.9641194716950056e-05, "loss": 0.1265, "step": 5515 }, { "epoch": 0.09768580624881014, "grad_norm": 1.205550193786621, "learning_rate": 2.9641007638202767e-05, "loss": 0.1184, "step": 5516 }, { "epoch": 0.09770351578583857, "grad_norm": 1.3084640502929688, "learning_rate": 2.964082051128804e-05, "loss": 0.1749, "step": 5517 }, { "epoch": 0.09772122532286699, "grad_norm": 1.231984257698059, "learning_rate": 2.96406333362065e-05, "loss": 0.1145, "step": 5518 }, { "epoch": 0.09773893485989542, "grad_norm": 0.7222150564193726, "learning_rate": 2.964044611295875e-05, "loss": 0.1206, "step": 5519 }, { "epoch": 0.09775664439692386, "grad_norm": 1.1146764755249023, "learning_rate": 2.9640258841545417e-05, "loss": 0.1033, "step": 5520 }, { "epoch": 0.09777435393395228, "grad_norm": 1.8694074153900146, "learning_rate": 2.9640071521967108e-05, "loss": 0.1808, "step": 5521 }, { "epoch": 0.09779206347098071, "grad_norm": 0.7990036010742188, "learning_rate": 2.9639884154224452e-05, "loss": 0.1291, "step": 5522 }, { "epoch": 0.09780977300800914, "grad_norm": 1.1660264730453491, "learning_rate": 2.9639696738318053e-05, "loss": 0.1664, "step": 5523 }, { "epoch": 0.09782748254503756, "grad_norm": 1.276249885559082, "learning_rate": 2.9639509274248534e-05, "loss": 0.1256, "step": 5524 }, { "epoch": 0.09784519208206599, "grad_norm": 1.4194790124893188, "learning_rate": 2.963932176201651e-05, "loss": 0.1487, "step": 5525 }, { "epoch": 0.09786290161909442, "grad_norm": 1.1328445672988892, "learning_rate": 2.9639134201622595e-05, "loss": 0.1578, "step": 5526 }, { "epoch": 0.09788061115612286, "grad_norm": 1.2039185762405396, "learning_rate": 2.9638946593067415e-05, "loss": 0.1742, "step": 5527 }, { "epoch": 0.09789832069315128, "grad_norm": 0.795330286026001, "learning_rate": 2.963875893635158e-05, "loss": 0.0979, "step": 5528 }, { "epoch": 0.09791603023017971, "grad_norm": 1.2409188747406006, "learning_rate": 2.9638571231475705e-05, "loss": 0.1505, "step": 5529 }, { "epoch": 0.09793373976720814, "grad_norm": 1.3443037271499634, "learning_rate": 2.963838347844041e-05, "loss": 0.1496, "step": 5530 }, { "epoch": 0.09795144930423656, "grad_norm": 1.4265518188476562, "learning_rate": 2.963819567724632e-05, "loss": 0.1606, "step": 5531 }, { "epoch": 0.09796915884126499, "grad_norm": 1.0749150514602661, "learning_rate": 2.9638007827894047e-05, "loss": 0.1553, "step": 5532 }, { "epoch": 0.09798686837829342, "grad_norm": 1.2143778800964355, "learning_rate": 2.9637819930384203e-05, "loss": 0.1494, "step": 5533 }, { "epoch": 0.09800457791532186, "grad_norm": 1.2822076082229614, "learning_rate": 2.9637631984717415e-05, "loss": 0.1672, "step": 5534 }, { "epoch": 0.09802228745235028, "grad_norm": 0.6863235235214233, "learning_rate": 2.9637443990894296e-05, "loss": 0.1154, "step": 5535 }, { "epoch": 0.09803999698937871, "grad_norm": 1.2749834060668945, "learning_rate": 2.963725594891547e-05, "loss": 0.0934, "step": 5536 }, { "epoch": 0.09805770652640713, "grad_norm": 1.243805170059204, "learning_rate": 2.9637067858781556e-05, "loss": 0.1136, "step": 5537 }, { "epoch": 0.09807541606343556, "grad_norm": 1.6217917203903198, "learning_rate": 2.963687972049316e-05, "loss": 0.1304, "step": 5538 }, { "epoch": 0.09809312560046399, "grad_norm": 1.121659755706787, "learning_rate": 2.9636691534050918e-05, "loss": 0.1155, "step": 5539 }, { "epoch": 0.09811083513749241, "grad_norm": 1.3492019176483154, "learning_rate": 2.9636503299455435e-05, "loss": 0.1047, "step": 5540 }, { "epoch": 0.09812854467452084, "grad_norm": 1.2063251733779907, "learning_rate": 2.963631501670734e-05, "loss": 0.167, "step": 5541 }, { "epoch": 0.09814625421154928, "grad_norm": 1.2329679727554321, "learning_rate": 2.963612668580725e-05, "loss": 0.1644, "step": 5542 }, { "epoch": 0.0981639637485777, "grad_norm": 1.417626976966858, "learning_rate": 2.963593830675578e-05, "loss": 0.1247, "step": 5543 }, { "epoch": 0.09818167328560613, "grad_norm": 1.7880858182907104, "learning_rate": 2.9635749879553556e-05, "loss": 0.1359, "step": 5544 }, { "epoch": 0.09819938282263456, "grad_norm": 1.1438939571380615, "learning_rate": 2.9635561404201193e-05, "loss": 0.1263, "step": 5545 }, { "epoch": 0.09821709235966299, "grad_norm": 0.9594239592552185, "learning_rate": 2.963537288069932e-05, "loss": 0.1142, "step": 5546 }, { "epoch": 0.09823480189669141, "grad_norm": 1.3694443702697754, "learning_rate": 2.9635184309048543e-05, "loss": 0.1536, "step": 5547 }, { "epoch": 0.09825251143371984, "grad_norm": 1.5786128044128418, "learning_rate": 2.9634995689249493e-05, "loss": 0.1434, "step": 5548 }, { "epoch": 0.09827022097074828, "grad_norm": 0.8754352331161499, "learning_rate": 2.9634807021302786e-05, "loss": 0.1219, "step": 5549 }, { "epoch": 0.0982879305077767, "grad_norm": 1.300480842590332, "learning_rate": 2.9634618305209042e-05, "loss": 0.128, "step": 5550 }, { "epoch": 0.09830564004480513, "grad_norm": 1.1995294094085693, "learning_rate": 2.963442954096889e-05, "loss": 0.1457, "step": 5551 }, { "epoch": 0.09832334958183356, "grad_norm": 1.3866268396377563, "learning_rate": 2.9634240728582943e-05, "loss": 0.1315, "step": 5552 }, { "epoch": 0.09834105911886198, "grad_norm": 0.7334070801734924, "learning_rate": 2.963405186805182e-05, "loss": 0.1224, "step": 5553 }, { "epoch": 0.09835876865589041, "grad_norm": 1.1273818016052246, "learning_rate": 2.963386295937615e-05, "loss": 0.1334, "step": 5554 }, { "epoch": 0.09837647819291884, "grad_norm": 1.5329910516738892, "learning_rate": 2.9633674002556548e-05, "loss": 0.1508, "step": 5555 }, { "epoch": 0.09839418772994726, "grad_norm": 1.166477918624878, "learning_rate": 2.963348499759364e-05, "loss": 0.117, "step": 5556 }, { "epoch": 0.0984118972669757, "grad_norm": 1.6190065145492554, "learning_rate": 2.963329594448805e-05, "loss": 0.1281, "step": 5557 }, { "epoch": 0.09842960680400413, "grad_norm": 1.207562804222107, "learning_rate": 2.9633106843240393e-05, "loss": 0.1381, "step": 5558 }, { "epoch": 0.09844731634103256, "grad_norm": 1.540125846862793, "learning_rate": 2.96329176938513e-05, "loss": 0.1482, "step": 5559 }, { "epoch": 0.09846502587806098, "grad_norm": 1.3169922828674316, "learning_rate": 2.963272849632138e-05, "loss": 0.0494, "step": 5560 }, { "epoch": 0.09848273541508941, "grad_norm": 1.29395592212677, "learning_rate": 2.9632539250651267e-05, "loss": 0.1297, "step": 5561 }, { "epoch": 0.09850044495211784, "grad_norm": 0.9115697741508484, "learning_rate": 2.963234995684158e-05, "loss": 0.127, "step": 5562 }, { "epoch": 0.09851815448914626, "grad_norm": 1.104960322380066, "learning_rate": 2.963216061489294e-05, "loss": 0.1255, "step": 5563 }, { "epoch": 0.0985358640261747, "grad_norm": 1.5806077718734741, "learning_rate": 2.963197122480597e-05, "loss": 0.1468, "step": 5564 }, { "epoch": 0.09855357356320313, "grad_norm": 0.7786734104156494, "learning_rate": 2.96317817865813e-05, "loss": 0.1065, "step": 5565 }, { "epoch": 0.09857128310023155, "grad_norm": 1.14259672164917, "learning_rate": 2.9631592300219546e-05, "loss": 0.1523, "step": 5566 }, { "epoch": 0.09858899263725998, "grad_norm": 1.1382949352264404, "learning_rate": 2.963140276572133e-05, "loss": 0.1646, "step": 5567 }, { "epoch": 0.09860670217428841, "grad_norm": 1.6076241731643677, "learning_rate": 2.9631213183087277e-05, "loss": 0.0993, "step": 5568 }, { "epoch": 0.09862441171131683, "grad_norm": 1.1324902772903442, "learning_rate": 2.963102355231802e-05, "loss": 0.1438, "step": 5569 }, { "epoch": 0.09864212124834526, "grad_norm": 1.4083502292633057, "learning_rate": 2.9630833873414173e-05, "loss": 0.1403, "step": 5570 }, { "epoch": 0.09865983078537369, "grad_norm": 1.1352304220199585, "learning_rate": 2.963064414637636e-05, "loss": 0.175, "step": 5571 }, { "epoch": 0.09867754032240213, "grad_norm": 1.138594627380371, "learning_rate": 2.9630454371205207e-05, "loss": 0.1537, "step": 5572 }, { "epoch": 0.09869524985943055, "grad_norm": 0.8922473192214966, "learning_rate": 2.963026454790134e-05, "loss": 0.1625, "step": 5573 }, { "epoch": 0.09871295939645898, "grad_norm": 1.1069509983062744, "learning_rate": 2.9630074676465386e-05, "loss": 0.1627, "step": 5574 }, { "epoch": 0.0987306689334874, "grad_norm": 1.439460277557373, "learning_rate": 2.962988475689796e-05, "loss": 0.1395, "step": 5575 }, { "epoch": 0.09874837847051583, "grad_norm": 1.7120988368988037, "learning_rate": 2.9629694789199695e-05, "loss": 0.2051, "step": 5576 }, { "epoch": 0.09876608800754426, "grad_norm": 1.0725276470184326, "learning_rate": 2.9629504773371217e-05, "loss": 0.114, "step": 5577 }, { "epoch": 0.09878379754457269, "grad_norm": 1.4534825086593628, "learning_rate": 2.9629314709413146e-05, "loss": 0.1426, "step": 5578 }, { "epoch": 0.09880150708160113, "grad_norm": 1.443345546722412, "learning_rate": 2.962912459732611e-05, "loss": 0.1206, "step": 5579 }, { "epoch": 0.09881921661862955, "grad_norm": 1.1288012266159058, "learning_rate": 2.9628934437110732e-05, "loss": 0.1404, "step": 5580 }, { "epoch": 0.09883692615565798, "grad_norm": 1.1243095397949219, "learning_rate": 2.962874422876764e-05, "loss": 0.1629, "step": 5581 }, { "epoch": 0.0988546356926864, "grad_norm": 0.8977570533752441, "learning_rate": 2.962855397229746e-05, "loss": 0.1906, "step": 5582 }, { "epoch": 0.09887234522971483, "grad_norm": 1.208646297454834, "learning_rate": 2.9628363667700817e-05, "loss": 0.1247, "step": 5583 }, { "epoch": 0.09889005476674326, "grad_norm": 1.2251251935958862, "learning_rate": 2.9628173314978334e-05, "loss": 0.1215, "step": 5584 }, { "epoch": 0.09890776430377168, "grad_norm": 1.6862674951553345, "learning_rate": 2.962798291413064e-05, "loss": 0.1277, "step": 5585 }, { "epoch": 0.09892547384080011, "grad_norm": 1.1628508567810059, "learning_rate": 2.962779246515837e-05, "loss": 0.1153, "step": 5586 }, { "epoch": 0.09894318337782855, "grad_norm": 1.109575867652893, "learning_rate": 2.962760196806213e-05, "loss": 0.154, "step": 5587 }, { "epoch": 0.09896089291485698, "grad_norm": 1.0718735456466675, "learning_rate": 2.962741142284257e-05, "loss": 0.1443, "step": 5588 }, { "epoch": 0.0989786024518854, "grad_norm": 0.8575027585029602, "learning_rate": 2.96272208295003e-05, "loss": 0.1362, "step": 5589 }, { "epoch": 0.09899631198891383, "grad_norm": 1.2214125394821167, "learning_rate": 2.9627030188035956e-05, "loss": 0.1374, "step": 5590 }, { "epoch": 0.09901402152594226, "grad_norm": 1.1316148042678833, "learning_rate": 2.962683949845016e-05, "loss": 0.1344, "step": 5591 }, { "epoch": 0.09903173106297068, "grad_norm": 1.3110886812210083, "learning_rate": 2.9626648760743543e-05, "loss": 0.1227, "step": 5592 }, { "epoch": 0.09904944059999911, "grad_norm": 0.8444863557815552, "learning_rate": 2.9626457974916732e-05, "loss": 0.1152, "step": 5593 }, { "epoch": 0.09906715013702755, "grad_norm": 0.8794445991516113, "learning_rate": 2.962626714097035e-05, "loss": 0.1203, "step": 5594 }, { "epoch": 0.09908485967405598, "grad_norm": 1.1740671396255493, "learning_rate": 2.962607625890503e-05, "loss": 0.1373, "step": 5595 }, { "epoch": 0.0991025692110844, "grad_norm": 1.1901417970657349, "learning_rate": 2.9625885328721397e-05, "loss": 0.1239, "step": 5596 }, { "epoch": 0.09912027874811283, "grad_norm": 1.1347508430480957, "learning_rate": 2.9625694350420082e-05, "loss": 0.1756, "step": 5597 }, { "epoch": 0.09913798828514125, "grad_norm": 1.418973445892334, "learning_rate": 2.962550332400171e-05, "loss": 0.1306, "step": 5598 }, { "epoch": 0.09915569782216968, "grad_norm": 1.5775679349899292, "learning_rate": 2.962531224946692e-05, "loss": 0.1414, "step": 5599 }, { "epoch": 0.0991734073591981, "grad_norm": 1.4072932004928589, "learning_rate": 2.9625121126816322e-05, "loss": 0.1498, "step": 5600 }, { "epoch": 0.09919111689622655, "grad_norm": 0.9824303984642029, "learning_rate": 2.9624929956050557e-05, "loss": 0.1332, "step": 5601 }, { "epoch": 0.09920882643325497, "grad_norm": 1.0364688634872437, "learning_rate": 2.9624738737170255e-05, "loss": 0.0781, "step": 5602 }, { "epoch": 0.0992265359702834, "grad_norm": 0.9890822172164917, "learning_rate": 2.962454747017604e-05, "loss": 0.1455, "step": 5603 }, { "epoch": 0.09924424550731183, "grad_norm": 1.364507794380188, "learning_rate": 2.962435615506854e-05, "loss": 0.1181, "step": 5604 }, { "epoch": 0.09926195504434025, "grad_norm": 2.040787935256958, "learning_rate": 2.962416479184839e-05, "loss": 0.1268, "step": 5605 }, { "epoch": 0.09927966458136868, "grad_norm": 1.5063704252243042, "learning_rate": 2.962397338051622e-05, "loss": 0.1094, "step": 5606 }, { "epoch": 0.0992973741183971, "grad_norm": 1.594443917274475, "learning_rate": 2.9623781921072653e-05, "loss": 0.1076, "step": 5607 }, { "epoch": 0.09931508365542553, "grad_norm": 1.3258302211761475, "learning_rate": 2.9623590413518325e-05, "loss": 0.18, "step": 5608 }, { "epoch": 0.09933279319245397, "grad_norm": 1.8973654508590698, "learning_rate": 2.962339885785386e-05, "loss": 0.1379, "step": 5609 }, { "epoch": 0.0993505027294824, "grad_norm": 0.902339518070221, "learning_rate": 2.9623207254079893e-05, "loss": 0.1194, "step": 5610 }, { "epoch": 0.09936821226651082, "grad_norm": 1.295585036277771, "learning_rate": 2.9623015602197052e-05, "loss": 0.1777, "step": 5611 }, { "epoch": 0.09938592180353925, "grad_norm": 1.5562233924865723, "learning_rate": 2.9622823902205973e-05, "loss": 0.1336, "step": 5612 }, { "epoch": 0.09940363134056768, "grad_norm": 1.1903049945831299, "learning_rate": 2.962263215410728e-05, "loss": 0.1389, "step": 5613 }, { "epoch": 0.0994213408775961, "grad_norm": 1.4590004682540894, "learning_rate": 2.9622440357901604e-05, "loss": 0.1803, "step": 5614 }, { "epoch": 0.09943905041462453, "grad_norm": 1.2523154020309448, "learning_rate": 2.962224851358958e-05, "loss": 0.1682, "step": 5615 }, { "epoch": 0.09945675995165297, "grad_norm": 1.099125623703003, "learning_rate": 2.962205662117184e-05, "loss": 0.1266, "step": 5616 }, { "epoch": 0.0994744694886814, "grad_norm": 1.2897511720657349, "learning_rate": 2.9621864680649012e-05, "loss": 0.181, "step": 5617 }, { "epoch": 0.09949217902570982, "grad_norm": 1.018976092338562, "learning_rate": 2.9621672692021725e-05, "loss": 0.1171, "step": 5618 }, { "epoch": 0.09950988856273825, "grad_norm": 1.2314777374267578, "learning_rate": 2.9621480655290613e-05, "loss": 0.1387, "step": 5619 }, { "epoch": 0.09952759809976668, "grad_norm": 1.0608265399932861, "learning_rate": 2.962128857045631e-05, "loss": 0.1141, "step": 5620 }, { "epoch": 0.0995453076367951, "grad_norm": 1.12784743309021, "learning_rate": 2.962109643751945e-05, "loss": 0.1315, "step": 5621 }, { "epoch": 0.09956301717382353, "grad_norm": 0.9371468424797058, "learning_rate": 2.9620904256480655e-05, "loss": 0.1029, "step": 5622 }, { "epoch": 0.09958072671085196, "grad_norm": 1.0680758953094482, "learning_rate": 2.9620712027340572e-05, "loss": 0.1145, "step": 5623 }, { "epoch": 0.0995984362478804, "grad_norm": 1.1850941181182861, "learning_rate": 2.9620519750099822e-05, "loss": 0.1616, "step": 5624 }, { "epoch": 0.09961614578490882, "grad_norm": 1.1682707071304321, "learning_rate": 2.9620327424759036e-05, "loss": 0.1326, "step": 5625 }, { "epoch": 0.09963385532193725, "grad_norm": 1.3727003335952759, "learning_rate": 2.9620135051318857e-05, "loss": 0.1662, "step": 5626 }, { "epoch": 0.09965156485896567, "grad_norm": 1.0651981830596924, "learning_rate": 2.961994262977991e-05, "loss": 0.1225, "step": 5627 }, { "epoch": 0.0996692743959941, "grad_norm": 1.42606520652771, "learning_rate": 2.961975016014283e-05, "loss": 0.167, "step": 5628 }, { "epoch": 0.09968698393302253, "grad_norm": 1.5019071102142334, "learning_rate": 2.961955764240825e-05, "loss": 0.1421, "step": 5629 }, { "epoch": 0.09970469347005095, "grad_norm": 1.4292713403701782, "learning_rate": 2.9619365076576807e-05, "loss": 0.1247, "step": 5630 }, { "epoch": 0.0997224030070794, "grad_norm": 1.1544076204299927, "learning_rate": 2.961917246264913e-05, "loss": 0.1044, "step": 5631 }, { "epoch": 0.09974011254410782, "grad_norm": 1.357422113418579, "learning_rate": 2.9618979800625857e-05, "loss": 0.1404, "step": 5632 }, { "epoch": 0.09975782208113625, "grad_norm": 1.110935091972351, "learning_rate": 2.9618787090507615e-05, "loss": 0.1409, "step": 5633 }, { "epoch": 0.09977553161816467, "grad_norm": 1.3534166812896729, "learning_rate": 2.9618594332295045e-05, "loss": 0.1379, "step": 5634 }, { "epoch": 0.0997932411551931, "grad_norm": 0.9038876295089722, "learning_rate": 2.9618401525988775e-05, "loss": 0.1212, "step": 5635 }, { "epoch": 0.09981095069222153, "grad_norm": 1.171341896057129, "learning_rate": 2.9618208671589445e-05, "loss": 0.1307, "step": 5636 }, { "epoch": 0.09982866022924995, "grad_norm": 0.9810640215873718, "learning_rate": 2.9618015769097687e-05, "loss": 0.1145, "step": 5637 }, { "epoch": 0.09984636976627838, "grad_norm": 1.150694727897644, "learning_rate": 2.9617822818514134e-05, "loss": 0.1537, "step": 5638 }, { "epoch": 0.09986407930330682, "grad_norm": 1.3386247158050537, "learning_rate": 2.9617629819839424e-05, "loss": 0.1558, "step": 5639 }, { "epoch": 0.09988178884033524, "grad_norm": 1.0302308797836304, "learning_rate": 2.961743677307419e-05, "loss": 0.1402, "step": 5640 }, { "epoch": 0.09989949837736367, "grad_norm": 1.0551170110702515, "learning_rate": 2.9617243678219067e-05, "loss": 0.1525, "step": 5641 }, { "epoch": 0.0999172079143921, "grad_norm": 1.290394902229309, "learning_rate": 2.961705053527469e-05, "loss": 0.1373, "step": 5642 }, { "epoch": 0.09993491745142052, "grad_norm": 1.6641017198562622, "learning_rate": 2.9616857344241698e-05, "loss": 0.1596, "step": 5643 }, { "epoch": 0.09995262698844895, "grad_norm": 1.3248822689056396, "learning_rate": 2.961666410512072e-05, "loss": 0.1279, "step": 5644 }, { "epoch": 0.09997033652547738, "grad_norm": 1.1893513202667236, "learning_rate": 2.96164708179124e-05, "loss": 0.125, "step": 5645 }, { "epoch": 0.09998804606250582, "grad_norm": 1.116339921951294, "learning_rate": 2.9616277482617368e-05, "loss": 0.1162, "step": 5646 }, { "epoch": 0.10000575559953424, "grad_norm": 0.9940365552902222, "learning_rate": 2.9616084099236258e-05, "loss": 0.1295, "step": 5647 }, { "epoch": 0.10002346513656267, "grad_norm": 1.190593957901001, "learning_rate": 2.9615890667769715e-05, "loss": 0.1401, "step": 5648 }, { "epoch": 0.1000411746735911, "grad_norm": 1.2087396383285522, "learning_rate": 2.9615697188218368e-05, "loss": 0.0963, "step": 5649 }, { "epoch": 0.10005888421061952, "grad_norm": 1.158515214920044, "learning_rate": 2.961550366058285e-05, "loss": 0.1618, "step": 5650 }, { "epoch": 0.10007659374764795, "grad_norm": 1.0284279584884644, "learning_rate": 2.961531008486381e-05, "loss": 0.1357, "step": 5651 }, { "epoch": 0.10009430328467638, "grad_norm": 1.4605531692504883, "learning_rate": 2.9615116461061874e-05, "loss": 0.1799, "step": 5652 }, { "epoch": 0.1001120128217048, "grad_norm": 1.6080743074417114, "learning_rate": 2.9614922789177687e-05, "loss": 0.1568, "step": 5653 }, { "epoch": 0.10012972235873324, "grad_norm": 1.4247907400131226, "learning_rate": 2.9614729069211878e-05, "loss": 0.1221, "step": 5654 }, { "epoch": 0.10014743189576167, "grad_norm": 1.85458505153656, "learning_rate": 2.961453530116509e-05, "loss": 0.1643, "step": 5655 }, { "epoch": 0.1001651414327901, "grad_norm": 1.0649733543395996, "learning_rate": 2.961434148503796e-05, "loss": 0.1231, "step": 5656 }, { "epoch": 0.10018285096981852, "grad_norm": 1.9290763139724731, "learning_rate": 2.9614147620831125e-05, "loss": 0.1062, "step": 5657 }, { "epoch": 0.10020056050684695, "grad_norm": 0.8576098680496216, "learning_rate": 2.961395370854522e-05, "loss": 0.1005, "step": 5658 }, { "epoch": 0.10021827004387537, "grad_norm": 1.2731837034225464, "learning_rate": 2.961375974818089e-05, "loss": 0.1574, "step": 5659 }, { "epoch": 0.1002359795809038, "grad_norm": 0.8880783319473267, "learning_rate": 2.9613565739738767e-05, "loss": 0.1688, "step": 5660 }, { "epoch": 0.10025368911793224, "grad_norm": 0.9691652655601501, "learning_rate": 2.9613371683219487e-05, "loss": 0.1144, "step": 5661 }, { "epoch": 0.10027139865496067, "grad_norm": 2.0648958683013916, "learning_rate": 2.9613177578623697e-05, "loss": 0.1222, "step": 5662 }, { "epoch": 0.1002891081919891, "grad_norm": 1.9849849939346313, "learning_rate": 2.9612983425952025e-05, "loss": 0.1575, "step": 5663 }, { "epoch": 0.10030681772901752, "grad_norm": 1.3410032987594604, "learning_rate": 2.961278922520512e-05, "loss": 0.1616, "step": 5664 }, { "epoch": 0.10032452726604595, "grad_norm": 1.4370217323303223, "learning_rate": 2.9612594976383618e-05, "loss": 0.1202, "step": 5665 }, { "epoch": 0.10034223680307437, "grad_norm": 1.8228175640106201, "learning_rate": 2.961240067948815e-05, "loss": 0.1614, "step": 5666 }, { "epoch": 0.1003599463401028, "grad_norm": 1.1740974187850952, "learning_rate": 2.9612206334519366e-05, "loss": 0.1383, "step": 5667 }, { "epoch": 0.10037765587713124, "grad_norm": 1.5754231214523315, "learning_rate": 2.9612011941477904e-05, "loss": 0.1481, "step": 5668 }, { "epoch": 0.10039536541415967, "grad_norm": 1.2213228940963745, "learning_rate": 2.9611817500364398e-05, "loss": 0.1632, "step": 5669 }, { "epoch": 0.10041307495118809, "grad_norm": 1.843971848487854, "learning_rate": 2.961162301117949e-05, "loss": 0.1662, "step": 5670 }, { "epoch": 0.10043078448821652, "grad_norm": 1.4773885011672974, "learning_rate": 2.9611428473923817e-05, "loss": 0.1368, "step": 5671 }, { "epoch": 0.10044849402524494, "grad_norm": 1.4761598110198975, "learning_rate": 2.9611233888598025e-05, "loss": 0.1766, "step": 5672 }, { "epoch": 0.10046620356227337, "grad_norm": 1.260624647140503, "learning_rate": 2.961103925520275e-05, "loss": 0.1283, "step": 5673 }, { "epoch": 0.1004839130993018, "grad_norm": 2.2113945484161377, "learning_rate": 2.9610844573738636e-05, "loss": 0.1711, "step": 5674 }, { "epoch": 0.10050162263633022, "grad_norm": 1.6821329593658447, "learning_rate": 2.9610649844206322e-05, "loss": 0.1197, "step": 5675 }, { "epoch": 0.10051933217335866, "grad_norm": 1.0220777988433838, "learning_rate": 2.9610455066606447e-05, "loss": 0.1425, "step": 5676 }, { "epoch": 0.10053704171038709, "grad_norm": 1.655243158340454, "learning_rate": 2.961026024093965e-05, "loss": 0.1668, "step": 5677 }, { "epoch": 0.10055475124741552, "grad_norm": 0.7453826665878296, "learning_rate": 2.9610065367206578e-05, "loss": 0.159, "step": 5678 }, { "epoch": 0.10057246078444394, "grad_norm": 1.0012941360473633, "learning_rate": 2.9609870445407864e-05, "loss": 0.114, "step": 5679 }, { "epoch": 0.10059017032147237, "grad_norm": 1.0525524616241455, "learning_rate": 2.9609675475544158e-05, "loss": 0.168, "step": 5680 }, { "epoch": 0.1006078798585008, "grad_norm": 1.1298537254333496, "learning_rate": 2.9609480457616096e-05, "loss": 0.1092, "step": 5681 }, { "epoch": 0.10062558939552922, "grad_norm": 1.214871883392334, "learning_rate": 2.9609285391624325e-05, "loss": 0.1552, "step": 5682 }, { "epoch": 0.10064329893255766, "grad_norm": 1.1986219882965088, "learning_rate": 2.9609090277569478e-05, "loss": 0.1058, "step": 5683 }, { "epoch": 0.10066100846958609, "grad_norm": 1.2575881481170654, "learning_rate": 2.96088951154522e-05, "loss": 0.1369, "step": 5684 }, { "epoch": 0.10067871800661451, "grad_norm": 0.8793315887451172, "learning_rate": 2.9608699905273134e-05, "loss": 0.1191, "step": 5685 }, { "epoch": 0.10069642754364294, "grad_norm": 1.4860855340957642, "learning_rate": 2.960850464703293e-05, "loss": 0.1371, "step": 5686 }, { "epoch": 0.10071413708067137, "grad_norm": 1.047674298286438, "learning_rate": 2.960830934073222e-05, "loss": 0.1596, "step": 5687 }, { "epoch": 0.1007318466176998, "grad_norm": 1.4563621282577515, "learning_rate": 2.960811398637165e-05, "loss": 0.1517, "step": 5688 }, { "epoch": 0.10074955615472822, "grad_norm": 1.4382188320159912, "learning_rate": 2.960791858395186e-05, "loss": 0.1418, "step": 5689 }, { "epoch": 0.10076726569175665, "grad_norm": 1.3730766773223877, "learning_rate": 2.9607723133473498e-05, "loss": 0.1413, "step": 5690 }, { "epoch": 0.10078497522878509, "grad_norm": 2.3697144985198975, "learning_rate": 2.9607527634937203e-05, "loss": 0.1491, "step": 5691 }, { "epoch": 0.10080268476581351, "grad_norm": 1.120216965675354, "learning_rate": 2.9607332088343623e-05, "loss": 0.1593, "step": 5692 }, { "epoch": 0.10082039430284194, "grad_norm": 1.1863563060760498, "learning_rate": 2.9607136493693392e-05, "loss": 0.1338, "step": 5693 }, { "epoch": 0.10083810383987037, "grad_norm": 1.153555989265442, "learning_rate": 2.9606940850987164e-05, "loss": 0.1611, "step": 5694 }, { "epoch": 0.10085581337689879, "grad_norm": 1.5599262714385986, "learning_rate": 2.9606745160225574e-05, "loss": 0.1412, "step": 5695 }, { "epoch": 0.10087352291392722, "grad_norm": 1.2295870780944824, "learning_rate": 2.9606549421409272e-05, "loss": 0.1332, "step": 5696 }, { "epoch": 0.10089123245095565, "grad_norm": 1.250945806503296, "learning_rate": 2.96063536345389e-05, "loss": 0.1496, "step": 5697 }, { "epoch": 0.10090894198798409, "grad_norm": 1.0700029134750366, "learning_rate": 2.9606157799615098e-05, "loss": 0.1417, "step": 5698 }, { "epoch": 0.10092665152501251, "grad_norm": 1.4848448038101196, "learning_rate": 2.960596191663852e-05, "loss": 0.1266, "step": 5699 }, { "epoch": 0.10094436106204094, "grad_norm": 1.2999392747879028, "learning_rate": 2.9605765985609805e-05, "loss": 0.1343, "step": 5700 }, { "epoch": 0.10096207059906936, "grad_norm": 1.65998113155365, "learning_rate": 2.9605570006529595e-05, "loss": 0.1165, "step": 5701 }, { "epoch": 0.10097978013609779, "grad_norm": 1.3893567323684692, "learning_rate": 2.9605373979398534e-05, "loss": 0.1387, "step": 5702 }, { "epoch": 0.10099748967312622, "grad_norm": 1.1482264995574951, "learning_rate": 2.9605177904217273e-05, "loss": 0.1148, "step": 5703 }, { "epoch": 0.10101519921015464, "grad_norm": 1.089512586593628, "learning_rate": 2.9604981780986454e-05, "loss": 0.0987, "step": 5704 }, { "epoch": 0.10103290874718307, "grad_norm": 1.4242392778396606, "learning_rate": 2.960478560970672e-05, "loss": 0.1328, "step": 5705 }, { "epoch": 0.10105061828421151, "grad_norm": 1.643492579460144, "learning_rate": 2.960458939037872e-05, "loss": 0.1539, "step": 5706 }, { "epoch": 0.10106832782123994, "grad_norm": 1.0214860439300537, "learning_rate": 2.9604393123003097e-05, "loss": 0.1461, "step": 5707 }, { "epoch": 0.10108603735826836, "grad_norm": 1.2435221672058105, "learning_rate": 2.96041968075805e-05, "loss": 0.1645, "step": 5708 }, { "epoch": 0.10110374689529679, "grad_norm": 1.4440957307815552, "learning_rate": 2.960400044411157e-05, "loss": 0.1908, "step": 5709 }, { "epoch": 0.10112145643232522, "grad_norm": 1.4479544162750244, "learning_rate": 2.960380403259696e-05, "loss": 0.1521, "step": 5710 }, { "epoch": 0.10113916596935364, "grad_norm": 1.4160412549972534, "learning_rate": 2.9603607573037306e-05, "loss": 0.1386, "step": 5711 }, { "epoch": 0.10115687550638207, "grad_norm": 1.1382414102554321, "learning_rate": 2.9603411065433263e-05, "loss": 0.1656, "step": 5712 }, { "epoch": 0.10117458504341051, "grad_norm": 1.3133131265640259, "learning_rate": 2.9603214509785475e-05, "loss": 0.1534, "step": 5713 }, { "epoch": 0.10119229458043894, "grad_norm": 1.7415518760681152, "learning_rate": 2.9603017906094587e-05, "loss": 0.1613, "step": 5714 }, { "epoch": 0.10121000411746736, "grad_norm": 0.9304753541946411, "learning_rate": 2.9602821254361248e-05, "loss": 0.1759, "step": 5715 }, { "epoch": 0.10122771365449579, "grad_norm": 1.1726065874099731, "learning_rate": 2.9602624554586107e-05, "loss": 0.1207, "step": 5716 }, { "epoch": 0.10124542319152421, "grad_norm": 0.9273986220359802, "learning_rate": 2.9602427806769806e-05, "loss": 0.1371, "step": 5717 }, { "epoch": 0.10126313272855264, "grad_norm": 1.0253933668136597, "learning_rate": 2.960223101091299e-05, "loss": 0.1053, "step": 5718 }, { "epoch": 0.10128084226558107, "grad_norm": 1.6998367309570312, "learning_rate": 2.9602034167016315e-05, "loss": 0.1277, "step": 5719 }, { "epoch": 0.10129855180260951, "grad_norm": 1.4846868515014648, "learning_rate": 2.9601837275080424e-05, "loss": 0.1537, "step": 5720 }, { "epoch": 0.10131626133963793, "grad_norm": 1.2889180183410645, "learning_rate": 2.9601640335105967e-05, "loss": 0.1506, "step": 5721 }, { "epoch": 0.10133397087666636, "grad_norm": 1.1726884841918945, "learning_rate": 2.960144334709359e-05, "loss": 0.1214, "step": 5722 }, { "epoch": 0.10135168041369479, "grad_norm": 1.3414257764816284, "learning_rate": 2.960124631104394e-05, "loss": 0.1423, "step": 5723 }, { "epoch": 0.10136938995072321, "grad_norm": 1.4654736518859863, "learning_rate": 2.9601049226957664e-05, "loss": 0.1716, "step": 5724 }, { "epoch": 0.10138709948775164, "grad_norm": 0.8788184523582458, "learning_rate": 2.9600852094835417e-05, "loss": 0.1218, "step": 5725 }, { "epoch": 0.10140480902478007, "grad_norm": 1.438673973083496, "learning_rate": 2.960065491467784e-05, "loss": 0.1643, "step": 5726 }, { "epoch": 0.10142251856180849, "grad_norm": 1.3804609775543213, "learning_rate": 2.960045768648559e-05, "loss": 0.165, "step": 5727 }, { "epoch": 0.10144022809883693, "grad_norm": 1.1254006624221802, "learning_rate": 2.9600260410259306e-05, "loss": 0.1333, "step": 5728 }, { "epoch": 0.10145793763586536, "grad_norm": 2.427009105682373, "learning_rate": 2.960006308599964e-05, "loss": 0.1159, "step": 5729 }, { "epoch": 0.10147564717289378, "grad_norm": 1.2733110189437866, "learning_rate": 2.959986571370725e-05, "loss": 0.1101, "step": 5730 }, { "epoch": 0.10149335670992221, "grad_norm": 1.226806640625, "learning_rate": 2.9599668293382773e-05, "loss": 0.1559, "step": 5731 }, { "epoch": 0.10151106624695064, "grad_norm": 1.8927987813949585, "learning_rate": 2.9599470825026867e-05, "loss": 0.1889, "step": 5732 }, { "epoch": 0.10152877578397906, "grad_norm": 1.1394610404968262, "learning_rate": 2.9599273308640177e-05, "loss": 0.1221, "step": 5733 }, { "epoch": 0.10154648532100749, "grad_norm": 0.9471078515052795, "learning_rate": 2.959907574422336e-05, "loss": 0.1235, "step": 5734 }, { "epoch": 0.10156419485803593, "grad_norm": 1.2480857372283936, "learning_rate": 2.959887813177705e-05, "loss": 0.1123, "step": 5735 }, { "epoch": 0.10158190439506436, "grad_norm": 0.9119039177894592, "learning_rate": 2.959868047130192e-05, "loss": 0.0842, "step": 5736 }, { "epoch": 0.10159961393209278, "grad_norm": 1.0388014316558838, "learning_rate": 2.9598482762798596e-05, "loss": 0.1248, "step": 5737 }, { "epoch": 0.10161732346912121, "grad_norm": 1.4106087684631348, "learning_rate": 2.9598285006267745e-05, "loss": 0.1435, "step": 5738 }, { "epoch": 0.10163503300614964, "grad_norm": 1.264219045639038, "learning_rate": 2.9598087201710015e-05, "loss": 0.192, "step": 5739 }, { "epoch": 0.10165274254317806, "grad_norm": 1.0698496103286743, "learning_rate": 2.9597889349126052e-05, "loss": 0.1614, "step": 5740 }, { "epoch": 0.10167045208020649, "grad_norm": 1.544248104095459, "learning_rate": 2.9597691448516507e-05, "loss": 0.165, "step": 5741 }, { "epoch": 0.10168816161723492, "grad_norm": 1.7388874292373657, "learning_rate": 2.9597493499882037e-05, "loss": 0.1885, "step": 5742 }, { "epoch": 0.10170587115426336, "grad_norm": 1.303795576095581, "learning_rate": 2.9597295503223292e-05, "loss": 0.148, "step": 5743 }, { "epoch": 0.10172358069129178, "grad_norm": 0.8205512762069702, "learning_rate": 2.959709745854092e-05, "loss": 0.1086, "step": 5744 }, { "epoch": 0.10174129022832021, "grad_norm": 1.4464603662490845, "learning_rate": 2.959689936583557e-05, "loss": 0.1239, "step": 5745 }, { "epoch": 0.10175899976534863, "grad_norm": 0.8879359364509583, "learning_rate": 2.9596701225107903e-05, "loss": 0.1045, "step": 5746 }, { "epoch": 0.10177670930237706, "grad_norm": 1.5921303033828735, "learning_rate": 2.959650303635856e-05, "loss": 0.1127, "step": 5747 }, { "epoch": 0.10179441883940549, "grad_norm": 1.1351003646850586, "learning_rate": 2.95963047995882e-05, "loss": 0.1299, "step": 5748 }, { "epoch": 0.10181212837643391, "grad_norm": 1.5357491970062256, "learning_rate": 2.9596106514797476e-05, "loss": 0.1232, "step": 5749 }, { "epoch": 0.10182983791346235, "grad_norm": 1.2445309162139893, "learning_rate": 2.959590818198704e-05, "loss": 0.1374, "step": 5750 }, { "epoch": 0.10184754745049078, "grad_norm": 1.0118002891540527, "learning_rate": 2.9595709801157534e-05, "loss": 0.1271, "step": 5751 }, { "epoch": 0.1018652569875192, "grad_norm": 2.1416800022125244, "learning_rate": 2.9595511372309627e-05, "loss": 0.1291, "step": 5752 }, { "epoch": 0.10188296652454763, "grad_norm": 2.347846031188965, "learning_rate": 2.959531289544396e-05, "loss": 0.1173, "step": 5753 }, { "epoch": 0.10190067606157606, "grad_norm": 1.07451593875885, "learning_rate": 2.9595114370561186e-05, "loss": 0.1121, "step": 5754 }, { "epoch": 0.10191838559860449, "grad_norm": 1.0870277881622314, "learning_rate": 2.959491579766197e-05, "loss": 0.1548, "step": 5755 }, { "epoch": 0.10193609513563291, "grad_norm": 1.0509958267211914, "learning_rate": 2.9594717176746956e-05, "loss": 0.1558, "step": 5756 }, { "epoch": 0.10195380467266134, "grad_norm": 1.9136943817138672, "learning_rate": 2.9594518507816796e-05, "loss": 0.1466, "step": 5757 }, { "epoch": 0.10197151420968978, "grad_norm": 1.3986103534698486, "learning_rate": 2.9594319790872143e-05, "loss": 0.1314, "step": 5758 }, { "epoch": 0.1019892237467182, "grad_norm": 1.27279531955719, "learning_rate": 2.9594121025913658e-05, "loss": 0.1182, "step": 5759 }, { "epoch": 0.10200693328374663, "grad_norm": 1.3880683183670044, "learning_rate": 2.959392221294199e-05, "loss": 0.1837, "step": 5760 }, { "epoch": 0.10202464282077506, "grad_norm": 0.5302793383598328, "learning_rate": 2.9593723351957796e-05, "loss": 0.1063, "step": 5761 }, { "epoch": 0.10204235235780348, "grad_norm": 1.0066975355148315, "learning_rate": 2.9593524442961724e-05, "loss": 0.1122, "step": 5762 }, { "epoch": 0.10206006189483191, "grad_norm": 1.267310380935669, "learning_rate": 2.9593325485954435e-05, "loss": 0.1372, "step": 5763 }, { "epoch": 0.10207777143186034, "grad_norm": 1.6345025300979614, "learning_rate": 2.9593126480936584e-05, "loss": 0.1745, "step": 5764 }, { "epoch": 0.10209548096888878, "grad_norm": 9.562207221984863, "learning_rate": 2.959292742790882e-05, "loss": 0.1819, "step": 5765 }, { "epoch": 0.1021131905059172, "grad_norm": 1.8267645835876465, "learning_rate": 2.95927283268718e-05, "loss": 0.1416, "step": 5766 }, { "epoch": 0.10213090004294563, "grad_norm": 1.3855572938919067, "learning_rate": 2.959252917782618e-05, "loss": 0.1648, "step": 5767 }, { "epoch": 0.10214860957997406, "grad_norm": 1.3733891248703003, "learning_rate": 2.959232998077262e-05, "loss": 0.1001, "step": 5768 }, { "epoch": 0.10216631911700248, "grad_norm": 1.9761074781417847, "learning_rate": 2.9592130735711764e-05, "loss": 0.1564, "step": 5769 }, { "epoch": 0.10218402865403091, "grad_norm": 1.2348953485488892, "learning_rate": 2.9591931442644272e-05, "loss": 0.1495, "step": 5770 }, { "epoch": 0.10220173819105934, "grad_norm": 4.1055731773376465, "learning_rate": 2.9591732101570805e-05, "loss": 0.1597, "step": 5771 }, { "epoch": 0.10221944772808776, "grad_norm": 1.0895482301712036, "learning_rate": 2.9591532712492016e-05, "loss": 0.1449, "step": 5772 }, { "epoch": 0.1022371572651162, "grad_norm": 1.5606365203857422, "learning_rate": 2.9591333275408564e-05, "loss": 0.1748, "step": 5773 }, { "epoch": 0.10225486680214463, "grad_norm": 1.2992854118347168, "learning_rate": 2.9591133790321095e-05, "loss": 0.0906, "step": 5774 }, { "epoch": 0.10227257633917305, "grad_norm": 1.021523118019104, "learning_rate": 2.9590934257230273e-05, "loss": 0.1192, "step": 5775 }, { "epoch": 0.10229028587620148, "grad_norm": 3.700949192047119, "learning_rate": 2.9590734676136753e-05, "loss": 0.1113, "step": 5776 }, { "epoch": 0.10230799541322991, "grad_norm": 5.441983699798584, "learning_rate": 2.959053504704119e-05, "loss": 0.1694, "step": 5777 }, { "epoch": 0.10232570495025833, "grad_norm": 6.7926025390625, "learning_rate": 2.9590335369944248e-05, "loss": 0.1159, "step": 5778 }, { "epoch": 0.10234341448728676, "grad_norm": 5.466456890106201, "learning_rate": 2.9590135644846575e-05, "loss": 0.1243, "step": 5779 }, { "epoch": 0.1023611240243152, "grad_norm": 1.617378830909729, "learning_rate": 2.9589935871748832e-05, "loss": 0.1003, "step": 5780 }, { "epoch": 0.10237883356134363, "grad_norm": 1.4270788431167603, "learning_rate": 2.9589736050651673e-05, "loss": 0.1702, "step": 5781 }, { "epoch": 0.10239654309837205, "grad_norm": 1.210654616355896, "learning_rate": 2.9589536181555763e-05, "loss": 0.1363, "step": 5782 }, { "epoch": 0.10241425263540048, "grad_norm": 1.3215346336364746, "learning_rate": 2.9589336264461755e-05, "loss": 0.141, "step": 5783 }, { "epoch": 0.1024319621724289, "grad_norm": 2.5123510360717773, "learning_rate": 2.9589136299370303e-05, "loss": 0.1536, "step": 5784 }, { "epoch": 0.10244967170945733, "grad_norm": 3.6473469734191895, "learning_rate": 2.9588936286282065e-05, "loss": 0.1452, "step": 5785 }, { "epoch": 0.10246738124648576, "grad_norm": 1.062914490699768, "learning_rate": 2.9588736225197704e-05, "loss": 0.0912, "step": 5786 }, { "epoch": 0.1024850907835142, "grad_norm": 0.9615958333015442, "learning_rate": 2.9588536116117878e-05, "loss": 0.144, "step": 5787 }, { "epoch": 0.10250280032054263, "grad_norm": 2.115126848220825, "learning_rate": 2.9588335959043244e-05, "loss": 0.1777, "step": 5788 }, { "epoch": 0.10252050985757105, "grad_norm": 1.0794669389724731, "learning_rate": 2.958813575397446e-05, "loss": 0.1431, "step": 5789 }, { "epoch": 0.10253821939459948, "grad_norm": 1.0606021881103516, "learning_rate": 2.958793550091219e-05, "loss": 0.1124, "step": 5790 }, { "epoch": 0.1025559289316279, "grad_norm": 1.0658199787139893, "learning_rate": 2.958773519985708e-05, "loss": 0.1026, "step": 5791 }, { "epoch": 0.10257363846865633, "grad_norm": 1.6469268798828125, "learning_rate": 2.95875348508098e-05, "loss": 0.1497, "step": 5792 }, { "epoch": 0.10259134800568476, "grad_norm": 1.6417230367660522, "learning_rate": 2.9587334453771003e-05, "loss": 0.1521, "step": 5793 }, { "epoch": 0.10260905754271318, "grad_norm": 1.719348669052124, "learning_rate": 2.9587134008741356e-05, "loss": 0.1604, "step": 5794 }, { "epoch": 0.10262676707974162, "grad_norm": 0.9434744119644165, "learning_rate": 2.958693351572151e-05, "loss": 0.1449, "step": 5795 }, { "epoch": 0.10264447661677005, "grad_norm": 0.8918817043304443, "learning_rate": 2.958673297471213e-05, "loss": 0.1418, "step": 5796 }, { "epoch": 0.10266218615379848, "grad_norm": 0.887042760848999, "learning_rate": 2.958653238571387e-05, "loss": 0.0813, "step": 5797 }, { "epoch": 0.1026798956908269, "grad_norm": 1.3904286623001099, "learning_rate": 2.9586331748727394e-05, "loss": 0.1617, "step": 5798 }, { "epoch": 0.10269760522785533, "grad_norm": 1.994460105895996, "learning_rate": 2.9586131063753367e-05, "loss": 0.1448, "step": 5799 }, { "epoch": 0.10271531476488376, "grad_norm": 1.1482914686203003, "learning_rate": 2.9585930330792444e-05, "loss": 0.1283, "step": 5800 }, { "epoch": 0.10273302430191218, "grad_norm": 1.070450782775879, "learning_rate": 2.958572954984528e-05, "loss": 0.1119, "step": 5801 }, { "epoch": 0.10275073383894062, "grad_norm": 1.4099599123001099, "learning_rate": 2.9585528720912543e-05, "loss": 0.1051, "step": 5802 }, { "epoch": 0.10276844337596905, "grad_norm": 1.2144439220428467, "learning_rate": 2.958532784399489e-05, "loss": 0.1311, "step": 5803 }, { "epoch": 0.10278615291299747, "grad_norm": 1.1833698749542236, "learning_rate": 2.9585126919092988e-05, "loss": 0.1223, "step": 5804 }, { "epoch": 0.1028038624500259, "grad_norm": 1.3630480766296387, "learning_rate": 2.9584925946207495e-05, "loss": 0.1938, "step": 5805 }, { "epoch": 0.10282157198705433, "grad_norm": 1.0512157678604126, "learning_rate": 2.9584724925339067e-05, "loss": 0.1185, "step": 5806 }, { "epoch": 0.10283928152408275, "grad_norm": 1.4004788398742676, "learning_rate": 2.9584523856488367e-05, "loss": 0.1104, "step": 5807 }, { "epoch": 0.10285699106111118, "grad_norm": 0.9218780398368835, "learning_rate": 2.9584322739656062e-05, "loss": 0.15, "step": 5808 }, { "epoch": 0.1028747005981396, "grad_norm": 1.1314828395843506, "learning_rate": 2.9584121574842814e-05, "loss": 0.1576, "step": 5809 }, { "epoch": 0.10289241013516805, "grad_norm": 0.8790839910507202, "learning_rate": 2.9583920362049272e-05, "loss": 0.1608, "step": 5810 }, { "epoch": 0.10291011967219647, "grad_norm": 1.2844269275665283, "learning_rate": 2.9583719101276112e-05, "loss": 0.124, "step": 5811 }, { "epoch": 0.1029278292092249, "grad_norm": 1.2715883255004883, "learning_rate": 2.9583517792523994e-05, "loss": 0.153, "step": 5812 }, { "epoch": 0.10294553874625333, "grad_norm": 1.1238696575164795, "learning_rate": 2.9583316435793575e-05, "loss": 0.1179, "step": 5813 }, { "epoch": 0.10296324828328175, "grad_norm": 1.4265000820159912, "learning_rate": 2.9583115031085515e-05, "loss": 0.1392, "step": 5814 }, { "epoch": 0.10298095782031018, "grad_norm": 1.1368643045425415, "learning_rate": 2.958291357840049e-05, "loss": 0.0785, "step": 5815 }, { "epoch": 0.1029986673573386, "grad_norm": 0.9355973601341248, "learning_rate": 2.958271207773915e-05, "loss": 0.1726, "step": 5816 }, { "epoch": 0.10301637689436705, "grad_norm": 1.0701930522918701, "learning_rate": 2.9582510529102164e-05, "loss": 0.0925, "step": 5817 }, { "epoch": 0.10303408643139547, "grad_norm": 1.29472017288208, "learning_rate": 2.958230893249019e-05, "loss": 0.0927, "step": 5818 }, { "epoch": 0.1030517959684239, "grad_norm": 1.5712507963180542, "learning_rate": 2.95821072879039e-05, "loss": 0.125, "step": 5819 }, { "epoch": 0.10306950550545232, "grad_norm": 1.0141104459762573, "learning_rate": 2.9581905595343947e-05, "loss": 0.1536, "step": 5820 }, { "epoch": 0.10308721504248075, "grad_norm": 1.617944359779358, "learning_rate": 2.9581703854811003e-05, "loss": 0.097, "step": 5821 }, { "epoch": 0.10310492457950918, "grad_norm": 0.5712664127349854, "learning_rate": 2.9581502066305722e-05, "loss": 0.1243, "step": 5822 }, { "epoch": 0.1031226341165376, "grad_norm": 1.3128232955932617, "learning_rate": 2.9581300229828778e-05, "loss": 0.1681, "step": 5823 }, { "epoch": 0.10314034365356603, "grad_norm": 1.0050220489501953, "learning_rate": 2.9581098345380833e-05, "loss": 0.1698, "step": 5824 }, { "epoch": 0.10315805319059447, "grad_norm": 1.1626266241073608, "learning_rate": 2.9580896412962547e-05, "loss": 0.1301, "step": 5825 }, { "epoch": 0.1031757627276229, "grad_norm": 1.217055320739746, "learning_rate": 2.9580694432574583e-05, "loss": 0.1398, "step": 5826 }, { "epoch": 0.10319347226465132, "grad_norm": 1.195687174797058, "learning_rate": 2.9580492404217615e-05, "loss": 0.1587, "step": 5827 }, { "epoch": 0.10321118180167975, "grad_norm": 1.1704270839691162, "learning_rate": 2.95802903278923e-05, "loss": 0.1483, "step": 5828 }, { "epoch": 0.10322889133870818, "grad_norm": 1.611161231994629, "learning_rate": 2.9580088203599306e-05, "loss": 0.158, "step": 5829 }, { "epoch": 0.1032466008757366, "grad_norm": 2.8280317783355713, "learning_rate": 2.957988603133929e-05, "loss": 0.185, "step": 5830 }, { "epoch": 0.10326431041276503, "grad_norm": 1.3042291402816772, "learning_rate": 2.9579683811112934e-05, "loss": 0.1343, "step": 5831 }, { "epoch": 0.10328201994979347, "grad_norm": 1.4853934049606323, "learning_rate": 2.9579481542920884e-05, "loss": 0.1329, "step": 5832 }, { "epoch": 0.1032997294868219, "grad_norm": 1.1562925577163696, "learning_rate": 2.9579279226763818e-05, "loss": 0.1366, "step": 5833 }, { "epoch": 0.10331743902385032, "grad_norm": 1.3392952680587769, "learning_rate": 2.95790768626424e-05, "loss": 0.1732, "step": 5834 }, { "epoch": 0.10333514856087875, "grad_norm": 1.076064944267273, "learning_rate": 2.957887445055729e-05, "loss": 0.0976, "step": 5835 }, { "epoch": 0.10335285809790717, "grad_norm": 1.558510661125183, "learning_rate": 2.9578671990509157e-05, "loss": 0.1678, "step": 5836 }, { "epoch": 0.1033705676349356, "grad_norm": 1.6867283582687378, "learning_rate": 2.957846948249867e-05, "loss": 0.1515, "step": 5837 }, { "epoch": 0.10338827717196403, "grad_norm": 1.3341127634048462, "learning_rate": 2.9578266926526496e-05, "loss": 0.1225, "step": 5838 }, { "epoch": 0.10340598670899245, "grad_norm": 1.3450862169265747, "learning_rate": 2.9578064322593293e-05, "loss": 0.1731, "step": 5839 }, { "epoch": 0.1034236962460209, "grad_norm": 0.884541392326355, "learning_rate": 2.9577861670699732e-05, "loss": 0.1305, "step": 5840 }, { "epoch": 0.10344140578304932, "grad_norm": 0.9974664449691772, "learning_rate": 2.9577658970846486e-05, "loss": 0.1257, "step": 5841 }, { "epoch": 0.10345911532007775, "grad_norm": 1.0488866567611694, "learning_rate": 2.9577456223034213e-05, "loss": 0.1377, "step": 5842 }, { "epoch": 0.10347682485710617, "grad_norm": 1.4652705192565918, "learning_rate": 2.9577253427263585e-05, "loss": 0.1178, "step": 5843 }, { "epoch": 0.1034945343941346, "grad_norm": 1.4481042623519897, "learning_rate": 2.9577050583535266e-05, "loss": 0.1693, "step": 5844 }, { "epoch": 0.10351224393116303, "grad_norm": 1.5811424255371094, "learning_rate": 2.9576847691849924e-05, "loss": 0.1434, "step": 5845 }, { "epoch": 0.10352995346819145, "grad_norm": 1.3519476652145386, "learning_rate": 2.957664475220823e-05, "loss": 0.157, "step": 5846 }, { "epoch": 0.10354766300521989, "grad_norm": 1.5322000980377197, "learning_rate": 2.957644176461085e-05, "loss": 0.1574, "step": 5847 }, { "epoch": 0.10356537254224832, "grad_norm": 1.2332141399383545, "learning_rate": 2.957623872905845e-05, "loss": 0.1398, "step": 5848 }, { "epoch": 0.10358308207927674, "grad_norm": 2.108494520187378, "learning_rate": 2.95760356455517e-05, "loss": 0.1495, "step": 5849 }, { "epoch": 0.10360079161630517, "grad_norm": 1.0012801885604858, "learning_rate": 2.957583251409126e-05, "loss": 0.142, "step": 5850 }, { "epoch": 0.1036185011533336, "grad_norm": 1.5834686756134033, "learning_rate": 2.9575629334677813e-05, "loss": 0.1728, "step": 5851 }, { "epoch": 0.10363621069036202, "grad_norm": 1.1558655500411987, "learning_rate": 2.9575426107312016e-05, "loss": 0.1524, "step": 5852 }, { "epoch": 0.10365392022739045, "grad_norm": 0.8220642805099487, "learning_rate": 2.9575222831994543e-05, "loss": 0.0721, "step": 5853 }, { "epoch": 0.10367162976441889, "grad_norm": 1.4303406476974487, "learning_rate": 2.9575019508726063e-05, "loss": 0.161, "step": 5854 }, { "epoch": 0.10368933930144732, "grad_norm": 1.4462674856185913, "learning_rate": 2.9574816137507238e-05, "loss": 0.1284, "step": 5855 }, { "epoch": 0.10370704883847574, "grad_norm": 1.1153814792633057, "learning_rate": 2.9574612718338745e-05, "loss": 0.1406, "step": 5856 }, { "epoch": 0.10372475837550417, "grad_norm": 1.6229392290115356, "learning_rate": 2.9574409251221248e-05, "loss": 0.1412, "step": 5857 }, { "epoch": 0.1037424679125326, "grad_norm": 1.2274420261383057, "learning_rate": 2.957420573615542e-05, "loss": 0.1246, "step": 5858 }, { "epoch": 0.10376017744956102, "grad_norm": 1.0249133110046387, "learning_rate": 2.957400217314193e-05, "loss": 0.1276, "step": 5859 }, { "epoch": 0.10377788698658945, "grad_norm": 1.260568380355835, "learning_rate": 2.9573798562181448e-05, "loss": 0.1324, "step": 5860 }, { "epoch": 0.10379559652361788, "grad_norm": 1.5242009162902832, "learning_rate": 2.9573594903274642e-05, "loss": 0.1163, "step": 5861 }, { "epoch": 0.10381330606064632, "grad_norm": 1.355654239654541, "learning_rate": 2.9573391196422183e-05, "loss": 0.1448, "step": 5862 }, { "epoch": 0.10383101559767474, "grad_norm": 1.0041569471359253, "learning_rate": 2.9573187441624738e-05, "loss": 0.1582, "step": 5863 }, { "epoch": 0.10384872513470317, "grad_norm": 1.097395896911621, "learning_rate": 2.9572983638882982e-05, "loss": 0.1387, "step": 5864 }, { "epoch": 0.1038664346717316, "grad_norm": 1.300789713859558, "learning_rate": 2.9572779788197583e-05, "loss": 0.1611, "step": 5865 }, { "epoch": 0.10388414420876002, "grad_norm": 1.426123857498169, "learning_rate": 2.9572575889569218e-05, "loss": 0.1363, "step": 5866 }, { "epoch": 0.10390185374578845, "grad_norm": 1.4127849340438843, "learning_rate": 2.9572371942998547e-05, "loss": 0.1452, "step": 5867 }, { "epoch": 0.10391956328281687, "grad_norm": 1.5724414587020874, "learning_rate": 2.9572167948486245e-05, "loss": 0.1471, "step": 5868 }, { "epoch": 0.10393727281984531, "grad_norm": 1.4447962045669556, "learning_rate": 2.9571963906032986e-05, "loss": 0.1468, "step": 5869 }, { "epoch": 0.10395498235687374, "grad_norm": 1.0498000383377075, "learning_rate": 2.957175981563944e-05, "loss": 0.1157, "step": 5870 }, { "epoch": 0.10397269189390217, "grad_norm": 1.0583736896514893, "learning_rate": 2.957155567730628e-05, "loss": 0.1384, "step": 5871 }, { "epoch": 0.10399040143093059, "grad_norm": 1.267025113105774, "learning_rate": 2.9571351491034173e-05, "loss": 0.107, "step": 5872 }, { "epoch": 0.10400811096795902, "grad_norm": 0.785493791103363, "learning_rate": 2.9571147256823795e-05, "loss": 0.1203, "step": 5873 }, { "epoch": 0.10402582050498745, "grad_norm": 1.1820573806762695, "learning_rate": 2.9570942974675815e-05, "loss": 0.1513, "step": 5874 }, { "epoch": 0.10404353004201587, "grad_norm": 1.4053936004638672, "learning_rate": 2.9570738644590906e-05, "loss": 0.1521, "step": 5875 }, { "epoch": 0.1040612395790443, "grad_norm": 1.1280219554901123, "learning_rate": 2.9570534266569743e-05, "loss": 0.1738, "step": 5876 }, { "epoch": 0.10407894911607274, "grad_norm": 1.2637611627578735, "learning_rate": 2.9570329840612996e-05, "loss": 0.1403, "step": 5877 }, { "epoch": 0.10409665865310117, "grad_norm": 0.8595660328865051, "learning_rate": 2.9570125366721334e-05, "loss": 0.1033, "step": 5878 }, { "epoch": 0.10411436819012959, "grad_norm": 0.96438068151474, "learning_rate": 2.9569920844895437e-05, "loss": 0.1167, "step": 5879 }, { "epoch": 0.10413207772715802, "grad_norm": 1.5544288158416748, "learning_rate": 2.9569716275135972e-05, "loss": 0.1143, "step": 5880 }, { "epoch": 0.10414978726418644, "grad_norm": 0.6775166392326355, "learning_rate": 2.9569511657443615e-05, "loss": 0.0871, "step": 5881 }, { "epoch": 0.10416749680121487, "grad_norm": 1.0597716569900513, "learning_rate": 2.9569306991819038e-05, "loss": 0.1802, "step": 5882 }, { "epoch": 0.1041852063382433, "grad_norm": 1.2233257293701172, "learning_rate": 2.9569102278262913e-05, "loss": 0.132, "step": 5883 }, { "epoch": 0.10420291587527174, "grad_norm": 1.5467239618301392, "learning_rate": 2.9568897516775915e-05, "loss": 0.1018, "step": 5884 }, { "epoch": 0.10422062541230016, "grad_norm": 1.0224812030792236, "learning_rate": 2.9568692707358724e-05, "loss": 0.131, "step": 5885 }, { "epoch": 0.10423833494932859, "grad_norm": 0.748054027557373, "learning_rate": 2.9568487850012e-05, "loss": 0.1205, "step": 5886 }, { "epoch": 0.10425604448635702, "grad_norm": 0.9459899067878723, "learning_rate": 2.956828294473643e-05, "loss": 0.0907, "step": 5887 }, { "epoch": 0.10427375402338544, "grad_norm": 1.0650544166564941, "learning_rate": 2.9568077991532675e-05, "loss": 0.123, "step": 5888 }, { "epoch": 0.10429146356041387, "grad_norm": 1.2414820194244385, "learning_rate": 2.9567872990401424e-05, "loss": 0.1435, "step": 5889 }, { "epoch": 0.1043091730974423, "grad_norm": 0.9151628613471985, "learning_rate": 2.9567667941343342e-05, "loss": 0.0973, "step": 5890 }, { "epoch": 0.10432688263447072, "grad_norm": 0.8013228178024292, "learning_rate": 2.9567462844359105e-05, "loss": 0.1271, "step": 5891 }, { "epoch": 0.10434459217149916, "grad_norm": 1.370274305343628, "learning_rate": 2.956725769944939e-05, "loss": 0.1479, "step": 5892 }, { "epoch": 0.10436230170852759, "grad_norm": 1.367047667503357, "learning_rate": 2.956705250661487e-05, "loss": 0.0983, "step": 5893 }, { "epoch": 0.10438001124555601, "grad_norm": 1.1571837663650513, "learning_rate": 2.956684726585622e-05, "loss": 0.1519, "step": 5894 }, { "epoch": 0.10439772078258444, "grad_norm": 1.2811980247497559, "learning_rate": 2.9566641977174116e-05, "loss": 0.1293, "step": 5895 }, { "epoch": 0.10441543031961287, "grad_norm": 1.2000645399093628, "learning_rate": 2.9566436640569233e-05, "loss": 0.1519, "step": 5896 }, { "epoch": 0.1044331398566413, "grad_norm": 1.2035324573516846, "learning_rate": 2.956623125604225e-05, "loss": 0.1076, "step": 5897 }, { "epoch": 0.10445084939366972, "grad_norm": 0.7877079844474792, "learning_rate": 2.9566025823593834e-05, "loss": 0.1427, "step": 5898 }, { "epoch": 0.10446855893069816, "grad_norm": 1.1188331842422485, "learning_rate": 2.956582034322467e-05, "loss": 0.117, "step": 5899 }, { "epoch": 0.10448626846772659, "grad_norm": 0.9868970513343811, "learning_rate": 2.9565614814935432e-05, "loss": 0.1266, "step": 5900 }, { "epoch": 0.10450397800475501, "grad_norm": 2.4890730381011963, "learning_rate": 2.9565409238726788e-05, "loss": 0.1743, "step": 5901 }, { "epoch": 0.10452168754178344, "grad_norm": 1.760391354560852, "learning_rate": 2.956520361459943e-05, "loss": 0.1456, "step": 5902 }, { "epoch": 0.10453939707881187, "grad_norm": 1.56492018699646, "learning_rate": 2.9564997942554016e-05, "loss": 0.1593, "step": 5903 }, { "epoch": 0.10455710661584029, "grad_norm": 1.612625002861023, "learning_rate": 2.9564792222591236e-05, "loss": 0.1371, "step": 5904 }, { "epoch": 0.10457481615286872, "grad_norm": 1.1504762172698975, "learning_rate": 2.9564586454711765e-05, "loss": 0.1252, "step": 5905 }, { "epoch": 0.10459252568989715, "grad_norm": 1.0501573085784912, "learning_rate": 2.9564380638916274e-05, "loss": 0.16, "step": 5906 }, { "epoch": 0.10461023522692559, "grad_norm": 1.6131861209869385, "learning_rate": 2.9564174775205444e-05, "loss": 0.1805, "step": 5907 }, { "epoch": 0.10462794476395401, "grad_norm": 1.0106631517410278, "learning_rate": 2.9563968863579952e-05, "loss": 0.1267, "step": 5908 }, { "epoch": 0.10464565430098244, "grad_norm": 1.3641483783721924, "learning_rate": 2.956376290404048e-05, "loss": 0.18, "step": 5909 }, { "epoch": 0.10466336383801086, "grad_norm": 1.7091718912124634, "learning_rate": 2.95635568965877e-05, "loss": 0.1234, "step": 5910 }, { "epoch": 0.10468107337503929, "grad_norm": 1.046317458152771, "learning_rate": 2.9563350841222286e-05, "loss": 0.1202, "step": 5911 }, { "epoch": 0.10469878291206772, "grad_norm": 1.5250071287155151, "learning_rate": 2.9563144737944924e-05, "loss": 0.1701, "step": 5912 }, { "epoch": 0.10471649244909614, "grad_norm": 1.5684734582901, "learning_rate": 2.956293858675629e-05, "loss": 0.1973, "step": 5913 }, { "epoch": 0.10473420198612458, "grad_norm": 1.269972562789917, "learning_rate": 2.9562732387657058e-05, "loss": 0.1312, "step": 5914 }, { "epoch": 0.10475191152315301, "grad_norm": 1.0711677074432373, "learning_rate": 2.956252614064791e-05, "loss": 0.1435, "step": 5915 }, { "epoch": 0.10476962106018144, "grad_norm": 0.9501212239265442, "learning_rate": 2.9562319845729525e-05, "loss": 0.1175, "step": 5916 }, { "epoch": 0.10478733059720986, "grad_norm": 1.2851225137710571, "learning_rate": 2.9562113502902578e-05, "loss": 0.1472, "step": 5917 }, { "epoch": 0.10480504013423829, "grad_norm": 1.1307820081710815, "learning_rate": 2.9561907112167757e-05, "loss": 0.164, "step": 5918 }, { "epoch": 0.10482274967126672, "grad_norm": 1.0050337314605713, "learning_rate": 2.9561700673525728e-05, "loss": 0.1297, "step": 5919 }, { "epoch": 0.10484045920829514, "grad_norm": 1.476542353630066, "learning_rate": 2.956149418697718e-05, "loss": 0.1541, "step": 5920 }, { "epoch": 0.10485816874532358, "grad_norm": 1.0901378393173218, "learning_rate": 2.9561287652522787e-05, "loss": 0.1229, "step": 5921 }, { "epoch": 0.10487587828235201, "grad_norm": 2.8282182216644287, "learning_rate": 2.956108107016323e-05, "loss": 0.1562, "step": 5922 }, { "epoch": 0.10489358781938043, "grad_norm": 1.5033811330795288, "learning_rate": 2.956087443989919e-05, "loss": 0.1635, "step": 5923 }, { "epoch": 0.10491129735640886, "grad_norm": 1.3177130222320557, "learning_rate": 2.9560667761731345e-05, "loss": 0.1076, "step": 5924 }, { "epoch": 0.10492900689343729, "grad_norm": 1.2493563890457153, "learning_rate": 2.9560461035660378e-05, "loss": 0.1641, "step": 5925 }, { "epoch": 0.10494671643046571, "grad_norm": 1.3327728509902954, "learning_rate": 2.9560254261686966e-05, "loss": 0.0933, "step": 5926 }, { "epoch": 0.10496442596749414, "grad_norm": 1.1115103960037231, "learning_rate": 2.956004743981179e-05, "loss": 0.1254, "step": 5927 }, { "epoch": 0.10498213550452257, "grad_norm": 1.1836800575256348, "learning_rate": 2.955984057003553e-05, "loss": 0.138, "step": 5928 }, { "epoch": 0.10499984504155101, "grad_norm": 1.543631672859192, "learning_rate": 2.9559633652358865e-05, "loss": 0.121, "step": 5929 }, { "epoch": 0.10501755457857943, "grad_norm": 1.0294475555419922, "learning_rate": 2.9559426686782484e-05, "loss": 0.1482, "step": 5930 }, { "epoch": 0.10503526411560786, "grad_norm": 1.2856295108795166, "learning_rate": 2.9559219673307057e-05, "loss": 0.1268, "step": 5931 }, { "epoch": 0.10505297365263629, "grad_norm": 1.3927632570266724, "learning_rate": 2.955901261193327e-05, "loss": 0.1905, "step": 5932 }, { "epoch": 0.10507068318966471, "grad_norm": 1.7010048627853394, "learning_rate": 2.955880550266181e-05, "loss": 0.2168, "step": 5933 }, { "epoch": 0.10508839272669314, "grad_norm": 0.9376681447029114, "learning_rate": 2.955859834549334e-05, "loss": 0.1098, "step": 5934 }, { "epoch": 0.10510610226372157, "grad_norm": 1.2614742517471313, "learning_rate": 2.9558391140428562e-05, "loss": 0.1127, "step": 5935 }, { "epoch": 0.10512381180075, "grad_norm": 1.5796338319778442, "learning_rate": 2.9558183887468153e-05, "loss": 0.152, "step": 5936 }, { "epoch": 0.10514152133777843, "grad_norm": 1.324449062347412, "learning_rate": 2.9557976586612788e-05, "loss": 0.1358, "step": 5937 }, { "epoch": 0.10515923087480686, "grad_norm": 1.3283584117889404, "learning_rate": 2.955776923786315e-05, "loss": 0.1492, "step": 5938 }, { "epoch": 0.10517694041183528, "grad_norm": 0.8139317035675049, "learning_rate": 2.9557561841219926e-05, "loss": 0.1108, "step": 5939 }, { "epoch": 0.10519464994886371, "grad_norm": 1.3206877708435059, "learning_rate": 2.9557354396683795e-05, "loss": 0.1649, "step": 5940 }, { "epoch": 0.10521235948589214, "grad_norm": 1.7702409029006958, "learning_rate": 2.955714690425544e-05, "loss": 0.1576, "step": 5941 }, { "epoch": 0.10523006902292056, "grad_norm": 1.273974061012268, "learning_rate": 2.9556939363935548e-05, "loss": 0.1286, "step": 5942 }, { "epoch": 0.10524777855994899, "grad_norm": 1.3400909900665283, "learning_rate": 2.9556731775724793e-05, "loss": 0.1462, "step": 5943 }, { "epoch": 0.10526548809697743, "grad_norm": 1.3181560039520264, "learning_rate": 2.9556524139623867e-05, "loss": 0.1097, "step": 5944 }, { "epoch": 0.10528319763400586, "grad_norm": 1.0035403966903687, "learning_rate": 2.9556316455633446e-05, "loss": 0.1217, "step": 5945 }, { "epoch": 0.10530090717103428, "grad_norm": 1.2329126596450806, "learning_rate": 2.9556108723754215e-05, "loss": 0.1018, "step": 5946 }, { "epoch": 0.10531861670806271, "grad_norm": 1.0719473361968994, "learning_rate": 2.9555900943986862e-05, "loss": 0.1282, "step": 5947 }, { "epoch": 0.10533632624509114, "grad_norm": 1.1433475017547607, "learning_rate": 2.9555693116332064e-05, "loss": 0.1389, "step": 5948 }, { "epoch": 0.10535403578211956, "grad_norm": 0.9594194293022156, "learning_rate": 2.9555485240790512e-05, "loss": 0.1335, "step": 5949 }, { "epoch": 0.10537174531914799, "grad_norm": 1.4907078742980957, "learning_rate": 2.9555277317362885e-05, "loss": 0.1496, "step": 5950 }, { "epoch": 0.10538945485617643, "grad_norm": 1.2207460403442383, "learning_rate": 2.9555069346049862e-05, "loss": 0.1286, "step": 5951 }, { "epoch": 0.10540716439320486, "grad_norm": 1.0860706567764282, "learning_rate": 2.955486132685214e-05, "loss": 0.1093, "step": 5952 }, { "epoch": 0.10542487393023328, "grad_norm": 1.5258270502090454, "learning_rate": 2.955465325977039e-05, "loss": 0.1126, "step": 5953 }, { "epoch": 0.10544258346726171, "grad_norm": 1.0911808013916016, "learning_rate": 2.9554445144805306e-05, "loss": 0.2124, "step": 5954 }, { "epoch": 0.10546029300429013, "grad_norm": 1.0833890438079834, "learning_rate": 2.955423698195757e-05, "loss": 0.1528, "step": 5955 }, { "epoch": 0.10547800254131856, "grad_norm": 1.917415738105774, "learning_rate": 2.9554028771227868e-05, "loss": 0.1601, "step": 5956 }, { "epoch": 0.10549571207834699, "grad_norm": 1.5162992477416992, "learning_rate": 2.9553820512616882e-05, "loss": 0.0999, "step": 5957 }, { "epoch": 0.10551342161537541, "grad_norm": 0.9656375646591187, "learning_rate": 2.95536122061253e-05, "loss": 0.1412, "step": 5958 }, { "epoch": 0.10553113115240385, "grad_norm": 1.611756443977356, "learning_rate": 2.9553403851753803e-05, "loss": 0.1594, "step": 5959 }, { "epoch": 0.10554884068943228, "grad_norm": 0.6734880805015564, "learning_rate": 2.955319544950308e-05, "loss": 0.1452, "step": 5960 }, { "epoch": 0.1055665502264607, "grad_norm": 1.4348211288452148, "learning_rate": 2.9552986999373813e-05, "loss": 0.1419, "step": 5961 }, { "epoch": 0.10558425976348913, "grad_norm": 1.0144004821777344, "learning_rate": 2.9552778501366695e-05, "loss": 0.1838, "step": 5962 }, { "epoch": 0.10560196930051756, "grad_norm": 1.2503830194473267, "learning_rate": 2.9552569955482408e-05, "loss": 0.1088, "step": 5963 }, { "epoch": 0.10561967883754599, "grad_norm": 1.1566886901855469, "learning_rate": 2.9552361361721637e-05, "loss": 0.076, "step": 5964 }, { "epoch": 0.10563738837457441, "grad_norm": 1.2825679779052734, "learning_rate": 2.9552152720085067e-05, "loss": 0.1493, "step": 5965 }, { "epoch": 0.10565509791160285, "grad_norm": 1.545137643814087, "learning_rate": 2.9551944030573388e-05, "loss": 0.1292, "step": 5966 }, { "epoch": 0.10567280744863128, "grad_norm": 0.7743906378746033, "learning_rate": 2.9551735293187284e-05, "loss": 0.1091, "step": 5967 }, { "epoch": 0.1056905169856597, "grad_norm": 1.6183457374572754, "learning_rate": 2.9551526507927445e-05, "loss": 0.1367, "step": 5968 }, { "epoch": 0.10570822652268813, "grad_norm": 1.5157132148742676, "learning_rate": 2.9551317674794553e-05, "loss": 0.1391, "step": 5969 }, { "epoch": 0.10572593605971656, "grad_norm": 4.3623199462890625, "learning_rate": 2.9551108793789297e-05, "loss": 0.1052, "step": 5970 }, { "epoch": 0.10574364559674498, "grad_norm": 1.2757420539855957, "learning_rate": 2.955089986491237e-05, "loss": 0.1234, "step": 5971 }, { "epoch": 0.10576135513377341, "grad_norm": 1.5807210206985474, "learning_rate": 2.9550690888164448e-05, "loss": 0.1324, "step": 5972 }, { "epoch": 0.10577906467080184, "grad_norm": 2.345946788787842, "learning_rate": 2.955048186354623e-05, "loss": 0.1343, "step": 5973 }, { "epoch": 0.10579677420783028, "grad_norm": 1.210256814956665, "learning_rate": 2.9550272791058392e-05, "loss": 0.1695, "step": 5974 }, { "epoch": 0.1058144837448587, "grad_norm": 1.0358757972717285, "learning_rate": 2.9550063670701635e-05, "loss": 0.1548, "step": 5975 }, { "epoch": 0.10583219328188713, "grad_norm": 0.9321252107620239, "learning_rate": 2.9549854502476635e-05, "loss": 0.1055, "step": 5976 }, { "epoch": 0.10584990281891556, "grad_norm": 1.1595362424850464, "learning_rate": 2.9549645286384088e-05, "loss": 0.1328, "step": 5977 }, { "epoch": 0.10586761235594398, "grad_norm": 1.2131376266479492, "learning_rate": 2.9549436022424676e-05, "loss": 0.1332, "step": 5978 }, { "epoch": 0.10588532189297241, "grad_norm": 2.4372196197509766, "learning_rate": 2.9549226710599097e-05, "loss": 0.1217, "step": 5979 }, { "epoch": 0.10590303143000084, "grad_norm": 1.4849311113357544, "learning_rate": 2.954901735090803e-05, "loss": 0.1721, "step": 5980 }, { "epoch": 0.10592074096702928, "grad_norm": 0.9624840021133423, "learning_rate": 2.9548807943352167e-05, "loss": 0.1459, "step": 5981 }, { "epoch": 0.1059384505040577, "grad_norm": 2.474881649017334, "learning_rate": 2.95485984879322e-05, "loss": 0.1355, "step": 5982 }, { "epoch": 0.10595616004108613, "grad_norm": 1.0188242197036743, "learning_rate": 2.9548388984648815e-05, "loss": 0.1119, "step": 5983 }, { "epoch": 0.10597386957811455, "grad_norm": 3.2123591899871826, "learning_rate": 2.95481794335027e-05, "loss": 0.1436, "step": 5984 }, { "epoch": 0.10599157911514298, "grad_norm": 1.1236921548843384, "learning_rate": 2.9547969834494546e-05, "loss": 0.1328, "step": 5985 }, { "epoch": 0.10600928865217141, "grad_norm": 1.5812633037567139, "learning_rate": 2.9547760187625044e-05, "loss": 0.125, "step": 5986 }, { "epoch": 0.10602699818919983, "grad_norm": 1.2495707273483276, "learning_rate": 2.9547550492894884e-05, "loss": 0.1168, "step": 5987 }, { "epoch": 0.10604470772622827, "grad_norm": 1.3056602478027344, "learning_rate": 2.954734075030475e-05, "loss": 0.1233, "step": 5988 }, { "epoch": 0.1060624172632567, "grad_norm": 1.1912988424301147, "learning_rate": 2.9547130959855336e-05, "loss": 0.1264, "step": 5989 }, { "epoch": 0.10608012680028513, "grad_norm": 1.1611032485961914, "learning_rate": 2.9546921121547338e-05, "loss": 0.1287, "step": 5990 }, { "epoch": 0.10609783633731355, "grad_norm": 1.0429296493530273, "learning_rate": 2.954671123538144e-05, "loss": 0.0979, "step": 5991 }, { "epoch": 0.10611554587434198, "grad_norm": 1.3695791959762573, "learning_rate": 2.954650130135833e-05, "loss": 0.1226, "step": 5992 }, { "epoch": 0.1061332554113704, "grad_norm": 1.7883727550506592, "learning_rate": 2.9546291319478704e-05, "loss": 0.149, "step": 5993 }, { "epoch": 0.10615096494839883, "grad_norm": 0.9545249938964844, "learning_rate": 2.954608128974325e-05, "loss": 0.1217, "step": 5994 }, { "epoch": 0.10616867448542726, "grad_norm": 1.708465337753296, "learning_rate": 2.9545871212152658e-05, "loss": 0.1508, "step": 5995 }, { "epoch": 0.1061863840224557, "grad_norm": 1.1544355154037476, "learning_rate": 2.9545661086707623e-05, "loss": 0.1142, "step": 5996 }, { "epoch": 0.10620409355948413, "grad_norm": 1.6662124395370483, "learning_rate": 2.9545450913408834e-05, "loss": 0.1502, "step": 5997 }, { "epoch": 0.10622180309651255, "grad_norm": 0.927807629108429, "learning_rate": 2.9545240692256984e-05, "loss": 0.1391, "step": 5998 }, { "epoch": 0.10623951263354098, "grad_norm": 1.117433786392212, "learning_rate": 2.954503042325276e-05, "loss": 0.2138, "step": 5999 }, { "epoch": 0.1062572221705694, "grad_norm": 1.1560893058776855, "learning_rate": 2.9544820106396862e-05, "loss": 0.1546, "step": 6000 }, { "epoch": 0.10627493170759783, "grad_norm": 1.416272759437561, "learning_rate": 2.9544609741689973e-05, "loss": 0.1781, "step": 6001 }, { "epoch": 0.10629264124462626, "grad_norm": 1.0661357641220093, "learning_rate": 2.954439932913279e-05, "loss": 0.1375, "step": 6002 }, { "epoch": 0.1063103507816547, "grad_norm": 1.5360863208770752, "learning_rate": 2.9544188868726004e-05, "loss": 0.1513, "step": 6003 }, { "epoch": 0.10632806031868312, "grad_norm": 1.558618187904358, "learning_rate": 2.954397836047031e-05, "loss": 0.1196, "step": 6004 }, { "epoch": 0.10634576985571155, "grad_norm": 0.9165611863136292, "learning_rate": 2.9543767804366396e-05, "loss": 0.1007, "step": 6005 }, { "epoch": 0.10636347939273998, "grad_norm": 1.6094850301742554, "learning_rate": 2.9543557200414956e-05, "loss": 0.1614, "step": 6006 }, { "epoch": 0.1063811889297684, "grad_norm": 1.108586311340332, "learning_rate": 2.954334654861669e-05, "loss": 0.1325, "step": 6007 }, { "epoch": 0.10639889846679683, "grad_norm": 1.104303002357483, "learning_rate": 2.9543135848972277e-05, "loss": 0.1125, "step": 6008 }, { "epoch": 0.10641660800382526, "grad_norm": 1.2828642129898071, "learning_rate": 2.954292510148242e-05, "loss": 0.1367, "step": 6009 }, { "epoch": 0.10643431754085368, "grad_norm": 1.6464091539382935, "learning_rate": 2.954271430614781e-05, "loss": 0.1269, "step": 6010 }, { "epoch": 0.10645202707788212, "grad_norm": 1.128185749053955, "learning_rate": 2.9542503462969143e-05, "loss": 0.1459, "step": 6011 }, { "epoch": 0.10646973661491055, "grad_norm": 1.0917762517929077, "learning_rate": 2.9542292571947107e-05, "loss": 0.1474, "step": 6012 }, { "epoch": 0.10648744615193897, "grad_norm": 1.3092588186264038, "learning_rate": 2.9542081633082404e-05, "loss": 0.1132, "step": 6013 }, { "epoch": 0.1065051556889674, "grad_norm": 0.9420502781867981, "learning_rate": 2.9541870646375718e-05, "loss": 0.1235, "step": 6014 }, { "epoch": 0.10652286522599583, "grad_norm": 1.5897600650787354, "learning_rate": 2.9541659611827758e-05, "loss": 0.1838, "step": 6015 }, { "epoch": 0.10654057476302425, "grad_norm": 1.610480546951294, "learning_rate": 2.95414485294392e-05, "loss": 0.1179, "step": 6016 }, { "epoch": 0.10655828430005268, "grad_norm": 0.9234992861747742, "learning_rate": 2.9541237399210747e-05, "loss": 0.1451, "step": 6017 }, { "epoch": 0.10657599383708112, "grad_norm": 0.8768957853317261, "learning_rate": 2.95410262211431e-05, "loss": 0.1378, "step": 6018 }, { "epoch": 0.10659370337410955, "grad_norm": 0.8173255920410156, "learning_rate": 2.9540814995236942e-05, "loss": 0.1081, "step": 6019 }, { "epoch": 0.10661141291113797, "grad_norm": 2.095266103744507, "learning_rate": 2.9540603721492976e-05, "loss": 0.1743, "step": 6020 }, { "epoch": 0.1066291224481664, "grad_norm": 1.0359745025634766, "learning_rate": 2.9540392399911892e-05, "loss": 0.1137, "step": 6021 }, { "epoch": 0.10664683198519483, "grad_norm": 1.0986063480377197, "learning_rate": 2.9540181030494386e-05, "loss": 0.1138, "step": 6022 }, { "epoch": 0.10666454152222325, "grad_norm": 1.0494872331619263, "learning_rate": 2.953996961324116e-05, "loss": 0.1407, "step": 6023 }, { "epoch": 0.10668225105925168, "grad_norm": 1.0419083833694458, "learning_rate": 2.9539758148152903e-05, "loss": 0.1376, "step": 6024 }, { "epoch": 0.1066999605962801, "grad_norm": 1.019934058189392, "learning_rate": 2.9539546635230312e-05, "loss": 0.0939, "step": 6025 }, { "epoch": 0.10671767013330855, "grad_norm": 0.9400906562805176, "learning_rate": 2.9539335074474084e-05, "loss": 0.0806, "step": 6026 }, { "epoch": 0.10673537967033697, "grad_norm": 0.8779181838035583, "learning_rate": 2.9539123465884915e-05, "loss": 0.0938, "step": 6027 }, { "epoch": 0.1067530892073654, "grad_norm": 0.9154511094093323, "learning_rate": 2.9538911809463498e-05, "loss": 0.147, "step": 6028 }, { "epoch": 0.10677079874439382, "grad_norm": 2.4196348190307617, "learning_rate": 2.953870010521053e-05, "loss": 0.1321, "step": 6029 }, { "epoch": 0.10678850828142225, "grad_norm": 1.4505388736724854, "learning_rate": 2.9538488353126713e-05, "loss": 0.1582, "step": 6030 }, { "epoch": 0.10680621781845068, "grad_norm": 1.1291903257369995, "learning_rate": 2.9538276553212735e-05, "loss": 0.1324, "step": 6031 }, { "epoch": 0.1068239273554791, "grad_norm": 0.6002070307731628, "learning_rate": 2.9538064705469302e-05, "loss": 0.1269, "step": 6032 }, { "epoch": 0.10684163689250754, "grad_norm": 0.8842212557792664, "learning_rate": 2.9537852809897104e-05, "loss": 0.1126, "step": 6033 }, { "epoch": 0.10685934642953597, "grad_norm": 1.387620449066162, "learning_rate": 2.9537640866496846e-05, "loss": 0.1091, "step": 6034 }, { "epoch": 0.1068770559665644, "grad_norm": 1.3648878335952759, "learning_rate": 2.9537428875269217e-05, "loss": 0.1416, "step": 6035 }, { "epoch": 0.10689476550359282, "grad_norm": 1.295100212097168, "learning_rate": 2.9537216836214916e-05, "loss": 0.1082, "step": 6036 }, { "epoch": 0.10691247504062125, "grad_norm": 1.160428762435913, "learning_rate": 2.9537004749334642e-05, "loss": 0.1614, "step": 6037 }, { "epoch": 0.10693018457764968, "grad_norm": 1.5445247888565063, "learning_rate": 2.9536792614629092e-05, "loss": 0.1817, "step": 6038 }, { "epoch": 0.1069478941146781, "grad_norm": 1.5322000980377197, "learning_rate": 2.9536580432098967e-05, "loss": 0.1501, "step": 6039 }, { "epoch": 0.10696560365170653, "grad_norm": 1.2991665601730347, "learning_rate": 2.953636820174496e-05, "loss": 0.1014, "step": 6040 }, { "epoch": 0.10698331318873497, "grad_norm": 1.2485647201538086, "learning_rate": 2.9536155923567772e-05, "loss": 0.0821, "step": 6041 }, { "epoch": 0.1070010227257634, "grad_norm": 1.0690245628356934, "learning_rate": 2.9535943597568104e-05, "loss": 0.1029, "step": 6042 }, { "epoch": 0.10701873226279182, "grad_norm": 1.4234111309051514, "learning_rate": 2.953573122374665e-05, "loss": 0.135, "step": 6043 }, { "epoch": 0.10703644179982025, "grad_norm": 1.628729224205017, "learning_rate": 2.953551880210411e-05, "loss": 0.1203, "step": 6044 }, { "epoch": 0.10705415133684867, "grad_norm": 1.5642316341400146, "learning_rate": 2.9535306332641187e-05, "loss": 0.2002, "step": 6045 }, { "epoch": 0.1070718608738771, "grad_norm": 1.0203561782836914, "learning_rate": 2.9535093815358572e-05, "loss": 0.1066, "step": 6046 }, { "epoch": 0.10708957041090553, "grad_norm": 1.502821922302246, "learning_rate": 2.953488125025697e-05, "loss": 0.1351, "step": 6047 }, { "epoch": 0.10710727994793397, "grad_norm": 1.5284901857376099, "learning_rate": 2.9534668637337076e-05, "loss": 0.1294, "step": 6048 }, { "epoch": 0.1071249894849624, "grad_norm": 0.9693112373352051, "learning_rate": 2.9534455976599596e-05, "loss": 0.1161, "step": 6049 }, { "epoch": 0.10714269902199082, "grad_norm": 1.5755267143249512, "learning_rate": 2.9534243268045226e-05, "loss": 0.1339, "step": 6050 }, { "epoch": 0.10716040855901925, "grad_norm": 0.9828916192054749, "learning_rate": 2.9534030511674664e-05, "loss": 0.136, "step": 6051 }, { "epoch": 0.10717811809604767, "grad_norm": 1.1507402658462524, "learning_rate": 2.9533817707488614e-05, "loss": 0.1094, "step": 6052 }, { "epoch": 0.1071958276330761, "grad_norm": 2.064939498901367, "learning_rate": 2.953360485548777e-05, "loss": 0.1391, "step": 6053 }, { "epoch": 0.10721353717010453, "grad_norm": 1.3340812921524048, "learning_rate": 2.9533391955672837e-05, "loss": 0.1455, "step": 6054 }, { "epoch": 0.10723124670713297, "grad_norm": 2.140355110168457, "learning_rate": 2.9533179008044515e-05, "loss": 0.1509, "step": 6055 }, { "epoch": 0.10724895624416139, "grad_norm": 1.1628925800323486, "learning_rate": 2.953296601260351e-05, "loss": 0.125, "step": 6056 }, { "epoch": 0.10726666578118982, "grad_norm": 1.3621056079864502, "learning_rate": 2.953275296935051e-05, "loss": 0.1383, "step": 6057 }, { "epoch": 0.10728437531821824, "grad_norm": 1.2128525972366333, "learning_rate": 2.9532539878286222e-05, "loss": 0.1292, "step": 6058 }, { "epoch": 0.10730208485524667, "grad_norm": 1.151507019996643, "learning_rate": 2.953232673941135e-05, "loss": 0.145, "step": 6059 }, { "epoch": 0.1073197943922751, "grad_norm": 1.439041256904602, "learning_rate": 2.953211355272659e-05, "loss": 0.1509, "step": 6060 }, { "epoch": 0.10733750392930352, "grad_norm": 1.057186245918274, "learning_rate": 2.953190031823265e-05, "loss": 0.1133, "step": 6061 }, { "epoch": 0.10735521346633195, "grad_norm": 1.52206552028656, "learning_rate": 2.9531687035930224e-05, "loss": 0.1647, "step": 6062 }, { "epoch": 0.10737292300336039, "grad_norm": 1.7105025053024292, "learning_rate": 2.9531473705820022e-05, "loss": 0.1275, "step": 6063 }, { "epoch": 0.10739063254038882, "grad_norm": 1.38475501537323, "learning_rate": 2.953126032790274e-05, "loss": 0.1432, "step": 6064 }, { "epoch": 0.10740834207741724, "grad_norm": 1.4358195066452026, "learning_rate": 2.953104690217908e-05, "loss": 0.0947, "step": 6065 }, { "epoch": 0.10742605161444567, "grad_norm": 1.162025809288025, "learning_rate": 2.9530833428649746e-05, "loss": 0.1592, "step": 6066 }, { "epoch": 0.1074437611514741, "grad_norm": 1.5066158771514893, "learning_rate": 2.953061990731544e-05, "loss": 0.1413, "step": 6067 }, { "epoch": 0.10746147068850252, "grad_norm": 1.20095694065094, "learning_rate": 2.953040633817686e-05, "loss": 0.1186, "step": 6068 }, { "epoch": 0.10747918022553095, "grad_norm": 0.899560809135437, "learning_rate": 2.9530192721234716e-05, "loss": 0.1504, "step": 6069 }, { "epoch": 0.10749688976255939, "grad_norm": 1.3024100065231323, "learning_rate": 2.9529979056489714e-05, "loss": 0.1209, "step": 6070 }, { "epoch": 0.10751459929958782, "grad_norm": 1.0215245485305786, "learning_rate": 2.952976534394254e-05, "loss": 0.1648, "step": 6071 }, { "epoch": 0.10753230883661624, "grad_norm": 1.241425633430481, "learning_rate": 2.9529551583593912e-05, "loss": 0.1476, "step": 6072 }, { "epoch": 0.10755001837364467, "grad_norm": 1.2097811698913574, "learning_rate": 2.952933777544453e-05, "loss": 0.1052, "step": 6073 }, { "epoch": 0.1075677279106731, "grad_norm": 1.237940788269043, "learning_rate": 2.95291239194951e-05, "loss": 0.1102, "step": 6074 }, { "epoch": 0.10758543744770152, "grad_norm": 1.6108888387680054, "learning_rate": 2.9528910015746314e-05, "loss": 0.1492, "step": 6075 }, { "epoch": 0.10760314698472995, "grad_norm": 1.0537290573120117, "learning_rate": 2.9528696064198887e-05, "loss": 0.1207, "step": 6076 }, { "epoch": 0.10762085652175837, "grad_norm": 1.1491984128952026, "learning_rate": 2.952848206485352e-05, "loss": 0.1191, "step": 6077 }, { "epoch": 0.10763856605878681, "grad_norm": 1.0473887920379639, "learning_rate": 2.9528268017710916e-05, "loss": 0.1529, "step": 6078 }, { "epoch": 0.10765627559581524, "grad_norm": 1.3905760049819946, "learning_rate": 2.9528053922771778e-05, "loss": 0.1132, "step": 6079 }, { "epoch": 0.10767398513284367, "grad_norm": 1.5210610628128052, "learning_rate": 2.9527839780036814e-05, "loss": 0.0907, "step": 6080 }, { "epoch": 0.10769169466987209, "grad_norm": 1.139656901359558, "learning_rate": 2.9527625589506725e-05, "loss": 0.1572, "step": 6081 }, { "epoch": 0.10770940420690052, "grad_norm": 1.934881329536438, "learning_rate": 2.9527411351182216e-05, "loss": 0.1416, "step": 6082 }, { "epoch": 0.10772711374392895, "grad_norm": 1.0256754159927368, "learning_rate": 2.9527197065063998e-05, "loss": 0.1505, "step": 6083 }, { "epoch": 0.10774482328095737, "grad_norm": 1.750678539276123, "learning_rate": 2.9526982731152766e-05, "loss": 0.1087, "step": 6084 }, { "epoch": 0.10776253281798581, "grad_norm": 1.1789132356643677, "learning_rate": 2.9526768349449232e-05, "loss": 0.1497, "step": 6085 }, { "epoch": 0.10778024235501424, "grad_norm": 1.126035451889038, "learning_rate": 2.9526553919954103e-05, "loss": 0.1614, "step": 6086 }, { "epoch": 0.10779795189204266, "grad_norm": 1.377023696899414, "learning_rate": 2.9526339442668074e-05, "loss": 0.1186, "step": 6087 }, { "epoch": 0.10781566142907109, "grad_norm": 1.2599033117294312, "learning_rate": 2.9526124917591863e-05, "loss": 0.1403, "step": 6088 }, { "epoch": 0.10783337096609952, "grad_norm": 1.0450421571731567, "learning_rate": 2.952591034472617e-05, "loss": 0.1236, "step": 6089 }, { "epoch": 0.10785108050312794, "grad_norm": 1.1381292343139648, "learning_rate": 2.95256957240717e-05, "loss": 0.1467, "step": 6090 }, { "epoch": 0.10786879004015637, "grad_norm": 1.285143494606018, "learning_rate": 2.952548105562916e-05, "loss": 0.103, "step": 6091 }, { "epoch": 0.1078864995771848, "grad_norm": 0.9525443911552429, "learning_rate": 2.9525266339399255e-05, "loss": 0.0736, "step": 6092 }, { "epoch": 0.10790420911421324, "grad_norm": 1.2087959051132202, "learning_rate": 2.952505157538269e-05, "loss": 0.1472, "step": 6093 }, { "epoch": 0.10792191865124166, "grad_norm": 1.186932921409607, "learning_rate": 2.952483676358018e-05, "loss": 0.1384, "step": 6094 }, { "epoch": 0.10793962818827009, "grad_norm": 1.0207395553588867, "learning_rate": 2.9524621903992422e-05, "loss": 0.1064, "step": 6095 }, { "epoch": 0.10795733772529852, "grad_norm": 1.69343101978302, "learning_rate": 2.9524406996620132e-05, "loss": 0.1174, "step": 6096 }, { "epoch": 0.10797504726232694, "grad_norm": 1.3202537298202515, "learning_rate": 2.952419204146401e-05, "loss": 0.1474, "step": 6097 }, { "epoch": 0.10799275679935537, "grad_norm": 1.915975570678711, "learning_rate": 2.9523977038524767e-05, "loss": 0.1505, "step": 6098 }, { "epoch": 0.1080104663363838, "grad_norm": 2.6801328659057617, "learning_rate": 2.9523761987803105e-05, "loss": 0.1584, "step": 6099 }, { "epoch": 0.10802817587341224, "grad_norm": 1.3337879180908203, "learning_rate": 2.9523546889299737e-05, "loss": 0.1684, "step": 6100 }, { "epoch": 0.10804588541044066, "grad_norm": 1.07521390914917, "learning_rate": 2.9523331743015367e-05, "loss": 0.1212, "step": 6101 }, { "epoch": 0.10806359494746909, "grad_norm": 2.216121196746826, "learning_rate": 2.952311654895071e-05, "loss": 0.1483, "step": 6102 }, { "epoch": 0.10808130448449751, "grad_norm": 1.5037145614624023, "learning_rate": 2.9522901307106463e-05, "loss": 0.097, "step": 6103 }, { "epoch": 0.10809901402152594, "grad_norm": 1.6230885982513428, "learning_rate": 2.952268601748334e-05, "loss": 0.1791, "step": 6104 }, { "epoch": 0.10811672355855437, "grad_norm": 0.9501044154167175, "learning_rate": 2.952247068008205e-05, "loss": 0.1216, "step": 6105 }, { "epoch": 0.1081344330955828, "grad_norm": 1.5632684230804443, "learning_rate": 2.9522255294903296e-05, "loss": 0.1535, "step": 6106 }, { "epoch": 0.10815214263261122, "grad_norm": 1.2424625158309937, "learning_rate": 2.9522039861947794e-05, "loss": 0.1342, "step": 6107 }, { "epoch": 0.10816985216963966, "grad_norm": 1.5752229690551758, "learning_rate": 2.9521824381216256e-05, "loss": 0.1059, "step": 6108 }, { "epoch": 0.10818756170666809, "grad_norm": 3.5775609016418457, "learning_rate": 2.9521608852709376e-05, "loss": 0.1472, "step": 6109 }, { "epoch": 0.10820527124369651, "grad_norm": 2.4313416481018066, "learning_rate": 2.9521393276427877e-05, "loss": 0.1754, "step": 6110 }, { "epoch": 0.10822298078072494, "grad_norm": 1.2096552848815918, "learning_rate": 2.952117765237246e-05, "loss": 0.1816, "step": 6111 }, { "epoch": 0.10824069031775337, "grad_norm": 1.4415589570999146, "learning_rate": 2.9520961980543836e-05, "loss": 0.1847, "step": 6112 }, { "epoch": 0.10825839985478179, "grad_norm": 1.395214557647705, "learning_rate": 2.9520746260942722e-05, "loss": 0.1349, "step": 6113 }, { "epoch": 0.10827610939181022, "grad_norm": 0.9939817190170288, "learning_rate": 2.9520530493569814e-05, "loss": 0.1138, "step": 6114 }, { "epoch": 0.10829381892883866, "grad_norm": 1.3178958892822266, "learning_rate": 2.9520314678425832e-05, "loss": 0.1786, "step": 6115 }, { "epoch": 0.10831152846586709, "grad_norm": 1.1351590156555176, "learning_rate": 2.9520098815511487e-05, "loss": 0.1569, "step": 6116 }, { "epoch": 0.10832923800289551, "grad_norm": 2.694983959197998, "learning_rate": 2.951988290482748e-05, "loss": 0.1271, "step": 6117 }, { "epoch": 0.10834694753992394, "grad_norm": 0.9604008793830872, "learning_rate": 2.9519666946374528e-05, "loss": 0.1545, "step": 6118 }, { "epoch": 0.10836465707695236, "grad_norm": 1.3748570680618286, "learning_rate": 2.9519450940153345e-05, "loss": 0.156, "step": 6119 }, { "epoch": 0.10838236661398079, "grad_norm": 0.5885289311408997, "learning_rate": 2.9519234886164633e-05, "loss": 0.1269, "step": 6120 }, { "epoch": 0.10840007615100922, "grad_norm": 1.980633020401001, "learning_rate": 2.951901878440911e-05, "loss": 0.1159, "step": 6121 }, { "epoch": 0.10841778568803766, "grad_norm": 1.3522651195526123, "learning_rate": 2.951880263488748e-05, "loss": 0.1213, "step": 6122 }, { "epoch": 0.10843549522506608, "grad_norm": 1.3356374502182007, "learning_rate": 2.9518586437600454e-05, "loss": 0.1466, "step": 6123 }, { "epoch": 0.10845320476209451, "grad_norm": 0.7382290363311768, "learning_rate": 2.9518370192548756e-05, "loss": 0.1062, "step": 6124 }, { "epoch": 0.10847091429912294, "grad_norm": 1.2271161079406738, "learning_rate": 2.9518153899733084e-05, "loss": 0.1357, "step": 6125 }, { "epoch": 0.10848862383615136, "grad_norm": 1.2417536973953247, "learning_rate": 2.9517937559154152e-05, "loss": 0.1536, "step": 6126 }, { "epoch": 0.10850633337317979, "grad_norm": 1.3156251907348633, "learning_rate": 2.951772117081268e-05, "loss": 0.1308, "step": 6127 }, { "epoch": 0.10852404291020822, "grad_norm": 1.26352059841156, "learning_rate": 2.951750473470937e-05, "loss": 0.1368, "step": 6128 }, { "epoch": 0.10854175244723664, "grad_norm": 1.156655192375183, "learning_rate": 2.9517288250844935e-05, "loss": 0.1396, "step": 6129 }, { "epoch": 0.10855946198426508, "grad_norm": 1.0887367725372314, "learning_rate": 2.9517071719220093e-05, "loss": 0.0889, "step": 6130 }, { "epoch": 0.10857717152129351, "grad_norm": 1.2676188945770264, "learning_rate": 2.9516855139835553e-05, "loss": 0.1717, "step": 6131 }, { "epoch": 0.10859488105832193, "grad_norm": 1.8290587663650513, "learning_rate": 2.9516638512692026e-05, "loss": 0.1646, "step": 6132 }, { "epoch": 0.10861259059535036, "grad_norm": 0.9781219959259033, "learning_rate": 2.9516421837790234e-05, "loss": 0.1114, "step": 6133 }, { "epoch": 0.10863030013237879, "grad_norm": 1.2342036962509155, "learning_rate": 2.9516205115130874e-05, "loss": 0.167, "step": 6134 }, { "epoch": 0.10864800966940721, "grad_norm": 1.6529487371444702, "learning_rate": 2.951598834471467e-05, "loss": 0.1121, "step": 6135 }, { "epoch": 0.10866571920643564, "grad_norm": 1.6327357292175293, "learning_rate": 2.9515771526542335e-05, "loss": 0.1306, "step": 6136 }, { "epoch": 0.10868342874346408, "grad_norm": 0.9610931873321533, "learning_rate": 2.951555466061458e-05, "loss": 0.1565, "step": 6137 }, { "epoch": 0.1087011382804925, "grad_norm": 1.2759543657302856, "learning_rate": 2.9515337746932114e-05, "loss": 0.1413, "step": 6138 }, { "epoch": 0.10871884781752093, "grad_norm": 1.1695687770843506, "learning_rate": 2.951512078549566e-05, "loss": 0.181, "step": 6139 }, { "epoch": 0.10873655735454936, "grad_norm": 1.260833740234375, "learning_rate": 2.9514903776305922e-05, "loss": 0.1929, "step": 6140 }, { "epoch": 0.10875426689157779, "grad_norm": 1.2459636926651, "learning_rate": 2.951468671936362e-05, "loss": 0.1358, "step": 6141 }, { "epoch": 0.10877197642860621, "grad_norm": 1.493797779083252, "learning_rate": 2.9514469614669467e-05, "loss": 0.1314, "step": 6142 }, { "epoch": 0.10878968596563464, "grad_norm": 1.2682150602340698, "learning_rate": 2.9514252462224177e-05, "loss": 0.1511, "step": 6143 }, { "epoch": 0.10880739550266307, "grad_norm": 2.776541233062744, "learning_rate": 2.9514035262028465e-05, "loss": 0.1175, "step": 6144 }, { "epoch": 0.1088251050396915, "grad_norm": 1.1369938850402832, "learning_rate": 2.9513818014083045e-05, "loss": 0.1299, "step": 6145 }, { "epoch": 0.10884281457671993, "grad_norm": 1.486340880393982, "learning_rate": 2.9513600718388634e-05, "loss": 0.1725, "step": 6146 }, { "epoch": 0.10886052411374836, "grad_norm": 0.9239571690559387, "learning_rate": 2.9513383374945935e-05, "loss": 0.0873, "step": 6147 }, { "epoch": 0.10887823365077678, "grad_norm": 0.9984976649284363, "learning_rate": 2.9513165983755684e-05, "loss": 0.1389, "step": 6148 }, { "epoch": 0.10889594318780521, "grad_norm": 1.696166753768921, "learning_rate": 2.951294854481858e-05, "loss": 0.1407, "step": 6149 }, { "epoch": 0.10891365272483364, "grad_norm": 0.9822277426719666, "learning_rate": 2.951273105813534e-05, "loss": 0.1054, "step": 6150 }, { "epoch": 0.10893136226186206, "grad_norm": 1.241342544555664, "learning_rate": 2.9512513523706693e-05, "loss": 0.1001, "step": 6151 }, { "epoch": 0.1089490717988905, "grad_norm": 0.9269837737083435, "learning_rate": 2.9512295941533335e-05, "loss": 0.1203, "step": 6152 }, { "epoch": 0.10896678133591893, "grad_norm": 1.2492961883544922, "learning_rate": 2.9512078311615993e-05, "loss": 0.1514, "step": 6153 }, { "epoch": 0.10898449087294736, "grad_norm": 1.1599348783493042, "learning_rate": 2.9511860633955384e-05, "loss": 0.1365, "step": 6154 }, { "epoch": 0.10900220040997578, "grad_norm": 1.1116355657577515, "learning_rate": 2.951164290855222e-05, "loss": 0.1383, "step": 6155 }, { "epoch": 0.10901990994700421, "grad_norm": 0.7376075983047485, "learning_rate": 2.951142513540722e-05, "loss": 0.146, "step": 6156 }, { "epoch": 0.10903761948403264, "grad_norm": 1.6551121473312378, "learning_rate": 2.9511207314521096e-05, "loss": 0.1362, "step": 6157 }, { "epoch": 0.10905532902106106, "grad_norm": 1.5306015014648438, "learning_rate": 2.9510989445894565e-05, "loss": 0.1235, "step": 6158 }, { "epoch": 0.10907303855808949, "grad_norm": 0.7177351713180542, "learning_rate": 2.9510771529528352e-05, "loss": 0.1805, "step": 6159 }, { "epoch": 0.10909074809511793, "grad_norm": 0.8203222155570984, "learning_rate": 2.951055356542317e-05, "loss": 0.1, "step": 6160 }, { "epoch": 0.10910845763214636, "grad_norm": 0.7762220501899719, "learning_rate": 2.951033555357973e-05, "loss": 0.1489, "step": 6161 }, { "epoch": 0.10912616716917478, "grad_norm": 1.1350302696228027, "learning_rate": 2.9510117493998753e-05, "loss": 0.1346, "step": 6162 }, { "epoch": 0.10914387670620321, "grad_norm": 1.9955391883850098, "learning_rate": 2.950989938668096e-05, "loss": 0.1486, "step": 6163 }, { "epoch": 0.10916158624323163, "grad_norm": 0.8815780878067017, "learning_rate": 2.9509681231627063e-05, "loss": 0.125, "step": 6164 }, { "epoch": 0.10917929578026006, "grad_norm": 1.3020813465118408, "learning_rate": 2.9509463028837784e-05, "loss": 0.0991, "step": 6165 }, { "epoch": 0.10919700531728849, "grad_norm": 0.7930856347084045, "learning_rate": 2.950924477831384e-05, "loss": 0.1409, "step": 6166 }, { "epoch": 0.10921471485431693, "grad_norm": 1.4730236530303955, "learning_rate": 2.9509026480055943e-05, "loss": 0.1843, "step": 6167 }, { "epoch": 0.10923242439134535, "grad_norm": 1.2222377061843872, "learning_rate": 2.950880813406482e-05, "loss": 0.1203, "step": 6168 }, { "epoch": 0.10925013392837378, "grad_norm": 1.693734049797058, "learning_rate": 2.950858974034119e-05, "loss": 0.1691, "step": 6169 }, { "epoch": 0.1092678434654022, "grad_norm": 1.4367780685424805, "learning_rate": 2.950837129888576e-05, "loss": 0.13, "step": 6170 }, { "epoch": 0.10928555300243063, "grad_norm": 1.5259960889816284, "learning_rate": 2.9508152809699264e-05, "loss": 0.1428, "step": 6171 }, { "epoch": 0.10930326253945906, "grad_norm": 1.0854508876800537, "learning_rate": 2.9507934272782406e-05, "loss": 0.0922, "step": 6172 }, { "epoch": 0.10932097207648749, "grad_norm": 1.5045676231384277, "learning_rate": 2.950771568813591e-05, "loss": 0.1207, "step": 6173 }, { "epoch": 0.10933868161351591, "grad_norm": 1.525385856628418, "learning_rate": 2.9507497055760504e-05, "loss": 0.1242, "step": 6174 }, { "epoch": 0.10935639115054435, "grad_norm": 1.0951273441314697, "learning_rate": 2.9507278375656894e-05, "loss": 0.1649, "step": 6175 }, { "epoch": 0.10937410068757278, "grad_norm": 1.081619143486023, "learning_rate": 2.9507059647825806e-05, "loss": 0.1395, "step": 6176 }, { "epoch": 0.1093918102246012, "grad_norm": 1.0248709917068481, "learning_rate": 2.9506840872267962e-05, "loss": 0.1387, "step": 6177 }, { "epoch": 0.10940951976162963, "grad_norm": 1.4561172723770142, "learning_rate": 2.9506622048984078e-05, "loss": 0.1744, "step": 6178 }, { "epoch": 0.10942722929865806, "grad_norm": 1.1855493783950806, "learning_rate": 2.9506403177974874e-05, "loss": 0.1411, "step": 6179 }, { "epoch": 0.10944493883568648, "grad_norm": 1.609076976776123, "learning_rate": 2.9506184259241073e-05, "loss": 0.1096, "step": 6180 }, { "epoch": 0.10946264837271491, "grad_norm": 0.6750141382217407, "learning_rate": 2.950596529278339e-05, "loss": 0.0994, "step": 6181 }, { "epoch": 0.10948035790974335, "grad_norm": 1.542748212814331, "learning_rate": 2.950574627860255e-05, "loss": 0.1504, "step": 6182 }, { "epoch": 0.10949806744677178, "grad_norm": 0.9350191950798035, "learning_rate": 2.9505527216699273e-05, "loss": 0.1175, "step": 6183 }, { "epoch": 0.1095157769838002, "grad_norm": 1.3878612518310547, "learning_rate": 2.9505308107074277e-05, "loss": 0.1136, "step": 6184 }, { "epoch": 0.10953348652082863, "grad_norm": 1.2961301803588867, "learning_rate": 2.9505088949728283e-05, "loss": 0.1671, "step": 6185 }, { "epoch": 0.10955119605785706, "grad_norm": 1.2146389484405518, "learning_rate": 2.9504869744662014e-05, "loss": 0.165, "step": 6186 }, { "epoch": 0.10956890559488548, "grad_norm": 0.9179638624191284, "learning_rate": 2.9504650491876192e-05, "loss": 0.143, "step": 6187 }, { "epoch": 0.10958661513191391, "grad_norm": 1.4928150177001953, "learning_rate": 2.9504431191371536e-05, "loss": 0.1337, "step": 6188 }, { "epoch": 0.10960432466894235, "grad_norm": 2.0608761310577393, "learning_rate": 2.9504211843148767e-05, "loss": 0.164, "step": 6189 }, { "epoch": 0.10962203420597078, "grad_norm": 1.267831802368164, "learning_rate": 2.950399244720861e-05, "loss": 0.1431, "step": 6190 }, { "epoch": 0.1096397437429992, "grad_norm": 1.066103458404541, "learning_rate": 2.9503773003551785e-05, "loss": 0.1169, "step": 6191 }, { "epoch": 0.10965745328002763, "grad_norm": 1.623186469078064, "learning_rate": 2.9503553512179012e-05, "loss": 0.0997, "step": 6192 }, { "epoch": 0.10967516281705605, "grad_norm": 0.8536043763160706, "learning_rate": 2.950333397309102e-05, "loss": 0.1356, "step": 6193 }, { "epoch": 0.10969287235408448, "grad_norm": 0.9009843468666077, "learning_rate": 2.9503114386288522e-05, "loss": 0.1169, "step": 6194 }, { "epoch": 0.10971058189111291, "grad_norm": 1.1771591901779175, "learning_rate": 2.9502894751772243e-05, "loss": 0.1734, "step": 6195 }, { "epoch": 0.10972829142814133, "grad_norm": 0.8986955285072327, "learning_rate": 2.9502675069542912e-05, "loss": 0.1054, "step": 6196 }, { "epoch": 0.10974600096516977, "grad_norm": 1.132405400276184, "learning_rate": 2.9502455339601243e-05, "loss": 0.1161, "step": 6197 }, { "epoch": 0.1097637105021982, "grad_norm": 1.0881351232528687, "learning_rate": 2.9502235561947962e-05, "loss": 0.1498, "step": 6198 }, { "epoch": 0.10978142003922663, "grad_norm": 1.5825735330581665, "learning_rate": 2.95020157365838e-05, "loss": 0.1284, "step": 6199 }, { "epoch": 0.10979912957625505, "grad_norm": 1.3327667713165283, "learning_rate": 2.9501795863509465e-05, "loss": 0.1708, "step": 6200 }, { "epoch": 0.10981683911328348, "grad_norm": 1.2739923000335693, "learning_rate": 2.9501575942725692e-05, "loss": 0.1325, "step": 6201 }, { "epoch": 0.1098345486503119, "grad_norm": 1.3747483491897583, "learning_rate": 2.9501355974233198e-05, "loss": 0.1257, "step": 6202 }, { "epoch": 0.10985225818734033, "grad_norm": 0.9326984882354736, "learning_rate": 2.9501135958032715e-05, "loss": 0.1196, "step": 6203 }, { "epoch": 0.10986996772436877, "grad_norm": 1.7168807983398438, "learning_rate": 2.9500915894124955e-05, "loss": 0.1422, "step": 6204 }, { "epoch": 0.1098876772613972, "grad_norm": 1.0578385591506958, "learning_rate": 2.950069578251065e-05, "loss": 0.0993, "step": 6205 }, { "epoch": 0.10990538679842562, "grad_norm": 1.1668387651443481, "learning_rate": 2.9500475623190523e-05, "loss": 0.1502, "step": 6206 }, { "epoch": 0.10992309633545405, "grad_norm": 1.2745835781097412, "learning_rate": 2.95002554161653e-05, "loss": 0.1578, "step": 6207 }, { "epoch": 0.10994080587248248, "grad_norm": 1.0749785900115967, "learning_rate": 2.9500035161435706e-05, "loss": 0.1191, "step": 6208 }, { "epoch": 0.1099585154095109, "grad_norm": 0.9187670946121216, "learning_rate": 2.9499814859002458e-05, "loss": 0.1978, "step": 6209 }, { "epoch": 0.10997622494653933, "grad_norm": 1.1683911085128784, "learning_rate": 2.9499594508866286e-05, "loss": 0.1292, "step": 6210 }, { "epoch": 0.10999393448356776, "grad_norm": 1.5273315906524658, "learning_rate": 2.9499374111027913e-05, "loss": 0.1719, "step": 6211 }, { "epoch": 0.1100116440205962, "grad_norm": 0.827346920967102, "learning_rate": 2.949915366548807e-05, "loss": 0.1442, "step": 6212 }, { "epoch": 0.11002935355762462, "grad_norm": 0.8647903203964233, "learning_rate": 2.9498933172247477e-05, "loss": 0.1275, "step": 6213 }, { "epoch": 0.11004706309465305, "grad_norm": 0.9857375025749207, "learning_rate": 2.9498712631306858e-05, "loss": 0.1407, "step": 6214 }, { "epoch": 0.11006477263168148, "grad_norm": 1.1798738241195679, "learning_rate": 2.949849204266694e-05, "loss": 0.1516, "step": 6215 }, { "epoch": 0.1100824821687099, "grad_norm": 1.6204136610031128, "learning_rate": 2.9498271406328453e-05, "loss": 0.1513, "step": 6216 }, { "epoch": 0.11010019170573833, "grad_norm": 1.4420678615570068, "learning_rate": 2.9498050722292117e-05, "loss": 0.1135, "step": 6217 }, { "epoch": 0.11011790124276676, "grad_norm": 1.130057454109192, "learning_rate": 2.9497829990558665e-05, "loss": 0.1574, "step": 6218 }, { "epoch": 0.1101356107797952, "grad_norm": 1.1335530281066895, "learning_rate": 2.949760921112881e-05, "loss": 0.1271, "step": 6219 }, { "epoch": 0.11015332031682362, "grad_norm": 1.330098032951355, "learning_rate": 2.9497388384003295e-05, "loss": 0.1317, "step": 6220 }, { "epoch": 0.11017102985385205, "grad_norm": 1.059086799621582, "learning_rate": 2.9497167509182837e-05, "loss": 0.1031, "step": 6221 }, { "epoch": 0.11018873939088047, "grad_norm": 1.1280802488327026, "learning_rate": 2.9496946586668167e-05, "loss": 0.1393, "step": 6222 }, { "epoch": 0.1102064489279089, "grad_norm": 1.0692191123962402, "learning_rate": 2.949672561646e-05, "loss": 0.0874, "step": 6223 }, { "epoch": 0.11022415846493733, "grad_norm": 1.7213003635406494, "learning_rate": 2.949650459855908e-05, "loss": 0.2119, "step": 6224 }, { "epoch": 0.11024186800196575, "grad_norm": 0.775193452835083, "learning_rate": 2.949628353296612e-05, "loss": 0.161, "step": 6225 }, { "epoch": 0.11025957753899418, "grad_norm": 1.1038684844970703, "learning_rate": 2.949606241968186e-05, "loss": 0.1097, "step": 6226 }, { "epoch": 0.11027728707602262, "grad_norm": 1.6192373037338257, "learning_rate": 2.9495841258707015e-05, "loss": 0.1249, "step": 6227 }, { "epoch": 0.11029499661305105, "grad_norm": 1.1515620946884155, "learning_rate": 2.949562005004232e-05, "loss": 0.0972, "step": 6228 }, { "epoch": 0.11031270615007947, "grad_norm": 1.3372918367385864, "learning_rate": 2.9495398793688503e-05, "loss": 0.0873, "step": 6229 }, { "epoch": 0.1103304156871079, "grad_norm": 1.1634126901626587, "learning_rate": 2.949517748964629e-05, "loss": 0.1344, "step": 6230 }, { "epoch": 0.11034812522413633, "grad_norm": 1.74513840675354, "learning_rate": 2.9494956137916413e-05, "loss": 0.1941, "step": 6231 }, { "epoch": 0.11036583476116475, "grad_norm": 1.1982009410858154, "learning_rate": 2.9494734738499587e-05, "loss": 0.1194, "step": 6232 }, { "epoch": 0.11038354429819318, "grad_norm": 0.8687193989753723, "learning_rate": 2.9494513291396555e-05, "loss": 0.1242, "step": 6233 }, { "epoch": 0.11040125383522162, "grad_norm": 1.15312922000885, "learning_rate": 2.949429179660804e-05, "loss": 0.1663, "step": 6234 }, { "epoch": 0.11041896337225005, "grad_norm": 0.7433673143386841, "learning_rate": 2.9494070254134774e-05, "loss": 0.1231, "step": 6235 }, { "epoch": 0.11043667290927847, "grad_norm": 1.0785325765609741, "learning_rate": 2.9493848663977476e-05, "loss": 0.1051, "step": 6236 }, { "epoch": 0.1104543824463069, "grad_norm": 0.9085976481437683, "learning_rate": 2.9493627026136884e-05, "loss": 0.1353, "step": 6237 }, { "epoch": 0.11047209198333532, "grad_norm": 1.4399681091308594, "learning_rate": 2.9493405340613732e-05, "loss": 0.1017, "step": 6238 }, { "epoch": 0.11048980152036375, "grad_norm": 1.749390721321106, "learning_rate": 2.9493183607408734e-05, "loss": 0.1386, "step": 6239 }, { "epoch": 0.11050751105739218, "grad_norm": 1.2878708839416504, "learning_rate": 2.949296182652263e-05, "loss": 0.1841, "step": 6240 }, { "epoch": 0.1105252205944206, "grad_norm": 1.3591853380203247, "learning_rate": 2.949273999795615e-05, "loss": 0.11, "step": 6241 }, { "epoch": 0.11054293013144904, "grad_norm": 0.8516919016838074, "learning_rate": 2.949251812171002e-05, "loss": 0.1001, "step": 6242 }, { "epoch": 0.11056063966847747, "grad_norm": 0.9519446492195129, "learning_rate": 2.949229619778497e-05, "loss": 0.126, "step": 6243 }, { "epoch": 0.1105783492055059, "grad_norm": 1.1653858423233032, "learning_rate": 2.949207422618173e-05, "loss": 0.1194, "step": 6244 }, { "epoch": 0.11059605874253432, "grad_norm": 0.8752722144126892, "learning_rate": 2.9491852206901035e-05, "loss": 0.1543, "step": 6245 }, { "epoch": 0.11061376827956275, "grad_norm": 1.0839751958847046, "learning_rate": 2.9491630139943608e-05, "loss": 0.1474, "step": 6246 }, { "epoch": 0.11063147781659118, "grad_norm": 0.9734991788864136, "learning_rate": 2.949140802531019e-05, "loss": 0.1078, "step": 6247 }, { "epoch": 0.1106491873536196, "grad_norm": 1.6142821311950684, "learning_rate": 2.94911858630015e-05, "loss": 0.1575, "step": 6248 }, { "epoch": 0.11066689689064804, "grad_norm": 1.2953459024429321, "learning_rate": 2.9490963653018273e-05, "loss": 0.1489, "step": 6249 }, { "epoch": 0.11068460642767647, "grad_norm": 1.440246820449829, "learning_rate": 2.949074139536125e-05, "loss": 0.0999, "step": 6250 }, { "epoch": 0.1107023159647049, "grad_norm": 1.1485804319381714, "learning_rate": 2.9490519090031142e-05, "loss": 0.1632, "step": 6251 }, { "epoch": 0.11072002550173332, "grad_norm": 1.210963249206543, "learning_rate": 2.94902967370287e-05, "loss": 0.1345, "step": 6252 }, { "epoch": 0.11073773503876175, "grad_norm": 0.8623694777488708, "learning_rate": 2.9490074336354642e-05, "loss": 0.0924, "step": 6253 }, { "epoch": 0.11075544457579017, "grad_norm": 1.5013058185577393, "learning_rate": 2.9489851888009707e-05, "loss": 0.1424, "step": 6254 }, { "epoch": 0.1107731541128186, "grad_norm": 1.1510372161865234, "learning_rate": 2.9489629391994626e-05, "loss": 0.126, "step": 6255 }, { "epoch": 0.11079086364984704, "grad_norm": 1.5445410013198853, "learning_rate": 2.9489406848310133e-05, "loss": 0.116, "step": 6256 }, { "epoch": 0.11080857318687547, "grad_norm": 1.5627707242965698, "learning_rate": 2.948918425695695e-05, "loss": 0.1418, "step": 6257 }, { "epoch": 0.1108262827239039, "grad_norm": 1.2837857007980347, "learning_rate": 2.948896161793582e-05, "loss": 0.1559, "step": 6258 }, { "epoch": 0.11084399226093232, "grad_norm": 1.0354644060134888, "learning_rate": 2.9488738931247472e-05, "loss": 0.153, "step": 6259 }, { "epoch": 0.11086170179796075, "grad_norm": 1.8227916955947876, "learning_rate": 2.9488516196892634e-05, "loss": 0.1475, "step": 6260 }, { "epoch": 0.11087941133498917, "grad_norm": 1.1431862115859985, "learning_rate": 2.948829341487205e-05, "loss": 0.1289, "step": 6261 }, { "epoch": 0.1108971208720176, "grad_norm": 0.9863327741622925, "learning_rate": 2.948807058518644e-05, "loss": 0.1194, "step": 6262 }, { "epoch": 0.11091483040904603, "grad_norm": 1.3043794631958008, "learning_rate": 2.948784770783655e-05, "loss": 0.148, "step": 6263 }, { "epoch": 0.11093253994607447, "grad_norm": 0.8384320139884949, "learning_rate": 2.94876247828231e-05, "loss": 0.1177, "step": 6264 }, { "epoch": 0.11095024948310289, "grad_norm": 0.9685684442520142, "learning_rate": 2.9487401810146832e-05, "loss": 0.0987, "step": 6265 }, { "epoch": 0.11096795902013132, "grad_norm": 2.7983291149139404, "learning_rate": 2.948717878980848e-05, "loss": 0.1446, "step": 6266 }, { "epoch": 0.11098566855715974, "grad_norm": 1.5620423555374146, "learning_rate": 2.948695572180877e-05, "loss": 0.1336, "step": 6267 }, { "epoch": 0.11100337809418817, "grad_norm": 1.5099705457687378, "learning_rate": 2.9486732606148443e-05, "loss": 0.1627, "step": 6268 }, { "epoch": 0.1110210876312166, "grad_norm": 1.3665056228637695, "learning_rate": 2.9486509442828233e-05, "loss": 0.1079, "step": 6269 }, { "epoch": 0.11103879716824502, "grad_norm": 1.3197182416915894, "learning_rate": 2.9486286231848873e-05, "loss": 0.1349, "step": 6270 }, { "epoch": 0.11105650670527346, "grad_norm": 1.0677909851074219, "learning_rate": 2.948606297321109e-05, "loss": 0.1387, "step": 6271 }, { "epoch": 0.11107421624230189, "grad_norm": 0.926754891872406, "learning_rate": 2.948583966691563e-05, "loss": 0.1015, "step": 6272 }, { "epoch": 0.11109192577933032, "grad_norm": 1.1596758365631104, "learning_rate": 2.9485616312963222e-05, "loss": 0.1233, "step": 6273 }, { "epoch": 0.11110963531635874, "grad_norm": 1.2457472085952759, "learning_rate": 2.9485392911354602e-05, "loss": 0.1738, "step": 6274 }, { "epoch": 0.11112734485338717, "grad_norm": 1.3392332792282104, "learning_rate": 2.9485169462090505e-05, "loss": 0.1463, "step": 6275 }, { "epoch": 0.1111450543904156, "grad_norm": 1.0356554985046387, "learning_rate": 2.9484945965171664e-05, "loss": 0.1341, "step": 6276 }, { "epoch": 0.11116276392744402, "grad_norm": 1.3225525617599487, "learning_rate": 2.9484722420598817e-05, "loss": 0.0947, "step": 6277 }, { "epoch": 0.11118047346447245, "grad_norm": 0.8508710861206055, "learning_rate": 2.9484498828372696e-05, "loss": 0.0822, "step": 6278 }, { "epoch": 0.11119818300150089, "grad_norm": 0.8196133375167847, "learning_rate": 2.948427518849404e-05, "loss": 0.0948, "step": 6279 }, { "epoch": 0.11121589253852932, "grad_norm": 1.1776186227798462, "learning_rate": 2.9484051500963583e-05, "loss": 0.1129, "step": 6280 }, { "epoch": 0.11123360207555774, "grad_norm": 1.0691441297531128, "learning_rate": 2.948382776578206e-05, "loss": 0.1238, "step": 6281 }, { "epoch": 0.11125131161258617, "grad_norm": 0.798219621181488, "learning_rate": 2.9483603982950214e-05, "loss": 0.0791, "step": 6282 }, { "epoch": 0.1112690211496146, "grad_norm": 1.6948559284210205, "learning_rate": 2.9483380152468775e-05, "loss": 0.1008, "step": 6283 }, { "epoch": 0.11128673068664302, "grad_norm": 1.5177016258239746, "learning_rate": 2.9483156274338477e-05, "loss": 0.1477, "step": 6284 }, { "epoch": 0.11130444022367145, "grad_norm": 1.4931753873825073, "learning_rate": 2.948293234856006e-05, "loss": 0.1015, "step": 6285 }, { "epoch": 0.11132214976069989, "grad_norm": 0.947325587272644, "learning_rate": 2.9482708375134263e-05, "loss": 0.1231, "step": 6286 }, { "epoch": 0.11133985929772831, "grad_norm": 1.3408992290496826, "learning_rate": 2.9482484354061816e-05, "loss": 0.1585, "step": 6287 }, { "epoch": 0.11135756883475674, "grad_norm": 0.8723849058151245, "learning_rate": 2.9482260285343463e-05, "loss": 0.1286, "step": 6288 }, { "epoch": 0.11137527837178517, "grad_norm": 1.0545239448547363, "learning_rate": 2.9482036168979937e-05, "loss": 0.1339, "step": 6289 }, { "epoch": 0.11139298790881359, "grad_norm": 0.9410812854766846, "learning_rate": 2.948181200497198e-05, "loss": 0.112, "step": 6290 }, { "epoch": 0.11141069744584202, "grad_norm": 1.1079554557800293, "learning_rate": 2.9481587793320325e-05, "loss": 0.08, "step": 6291 }, { "epoch": 0.11142840698287045, "grad_norm": 1.305256962776184, "learning_rate": 2.948136353402571e-05, "loss": 0.1366, "step": 6292 }, { "epoch": 0.11144611651989887, "grad_norm": 1.5904853343963623, "learning_rate": 2.9481139227088877e-05, "loss": 0.1113, "step": 6293 }, { "epoch": 0.11146382605692731, "grad_norm": 1.134912133216858, "learning_rate": 2.9480914872510552e-05, "loss": 0.112, "step": 6294 }, { "epoch": 0.11148153559395574, "grad_norm": 1.1493805646896362, "learning_rate": 2.948069047029149e-05, "loss": 0.138, "step": 6295 }, { "epoch": 0.11149924513098416, "grad_norm": 0.8002210259437561, "learning_rate": 2.9480466020432417e-05, "loss": 0.1073, "step": 6296 }, { "epoch": 0.11151695466801259, "grad_norm": 1.1450908184051514, "learning_rate": 2.9480241522934077e-05, "loss": 0.1097, "step": 6297 }, { "epoch": 0.11153466420504102, "grad_norm": 1.281858205795288, "learning_rate": 2.9480016977797205e-05, "loss": 0.084, "step": 6298 }, { "epoch": 0.11155237374206944, "grad_norm": 1.3216147422790527, "learning_rate": 2.9479792385022546e-05, "loss": 0.1368, "step": 6299 }, { "epoch": 0.11157008327909787, "grad_norm": 1.4241366386413574, "learning_rate": 2.947956774461083e-05, "loss": 0.1236, "step": 6300 }, { "epoch": 0.11158779281612631, "grad_norm": 1.8454234600067139, "learning_rate": 2.9479343056562802e-05, "loss": 0.1998, "step": 6301 }, { "epoch": 0.11160550235315474, "grad_norm": 1.3079415559768677, "learning_rate": 2.9479118320879204e-05, "loss": 0.1104, "step": 6302 }, { "epoch": 0.11162321189018316, "grad_norm": 1.181054711341858, "learning_rate": 2.9478893537560767e-05, "loss": 0.1371, "step": 6303 }, { "epoch": 0.11164092142721159, "grad_norm": 0.8073400855064392, "learning_rate": 2.9478668706608232e-05, "loss": 0.1335, "step": 6304 }, { "epoch": 0.11165863096424002, "grad_norm": 1.1835684776306152, "learning_rate": 2.9478443828022346e-05, "loss": 0.1296, "step": 6305 }, { "epoch": 0.11167634050126844, "grad_norm": 1.4015146493911743, "learning_rate": 2.947821890180384e-05, "loss": 0.1438, "step": 6306 }, { "epoch": 0.11169405003829687, "grad_norm": 0.859153151512146, "learning_rate": 2.9477993927953466e-05, "loss": 0.0967, "step": 6307 }, { "epoch": 0.11171175957532531, "grad_norm": 1.8587944507598877, "learning_rate": 2.9477768906471953e-05, "loss": 0.1359, "step": 6308 }, { "epoch": 0.11172946911235374, "grad_norm": 1.3929554224014282, "learning_rate": 2.947754383736004e-05, "loss": 0.1767, "step": 6309 }, { "epoch": 0.11174717864938216, "grad_norm": 1.774078369140625, "learning_rate": 2.9477318720618476e-05, "loss": 0.0912, "step": 6310 }, { "epoch": 0.11176488818641059, "grad_norm": 1.1859657764434814, "learning_rate": 2.9477093556247997e-05, "loss": 0.1222, "step": 6311 }, { "epoch": 0.11178259772343901, "grad_norm": 1.184303879737854, "learning_rate": 2.9476868344249343e-05, "loss": 0.1416, "step": 6312 }, { "epoch": 0.11180030726046744, "grad_norm": 1.6374512910842896, "learning_rate": 2.947664308462326e-05, "loss": 0.1536, "step": 6313 }, { "epoch": 0.11181801679749587, "grad_norm": 0.9574883580207825, "learning_rate": 2.9476417777370482e-05, "loss": 0.147, "step": 6314 }, { "epoch": 0.1118357263345243, "grad_norm": 1.686394453048706, "learning_rate": 2.9476192422491752e-05, "loss": 0.1513, "step": 6315 }, { "epoch": 0.11185343587155273, "grad_norm": 1.4160081148147583, "learning_rate": 2.9475967019987816e-05, "loss": 0.1314, "step": 6316 }, { "epoch": 0.11187114540858116, "grad_norm": 1.0580980777740479, "learning_rate": 2.9475741569859415e-05, "loss": 0.1511, "step": 6317 }, { "epoch": 0.11188885494560959, "grad_norm": 2.2089548110961914, "learning_rate": 2.9475516072107288e-05, "loss": 0.1507, "step": 6318 }, { "epoch": 0.11190656448263801, "grad_norm": 0.9536860585212708, "learning_rate": 2.9475290526732173e-05, "loss": 0.1119, "step": 6319 }, { "epoch": 0.11192427401966644, "grad_norm": 1.760345220565796, "learning_rate": 2.947506493373482e-05, "loss": 0.1519, "step": 6320 }, { "epoch": 0.11194198355669487, "grad_norm": 1.029801607131958, "learning_rate": 2.9474839293115966e-05, "loss": 0.0816, "step": 6321 }, { "epoch": 0.11195969309372329, "grad_norm": 0.8023664951324463, "learning_rate": 2.9474613604876354e-05, "loss": 0.095, "step": 6322 }, { "epoch": 0.11197740263075173, "grad_norm": 1.1190524101257324, "learning_rate": 2.947438786901673e-05, "loss": 0.1434, "step": 6323 }, { "epoch": 0.11199511216778016, "grad_norm": 1.0191489458084106, "learning_rate": 2.947416208553783e-05, "loss": 0.1318, "step": 6324 }, { "epoch": 0.11201282170480859, "grad_norm": 0.8801543712615967, "learning_rate": 2.94739362544404e-05, "loss": 0.1443, "step": 6325 }, { "epoch": 0.11203053124183701, "grad_norm": 1.420288324356079, "learning_rate": 2.947371037572519e-05, "loss": 0.153, "step": 6326 }, { "epoch": 0.11204824077886544, "grad_norm": 0.9603327512741089, "learning_rate": 2.9473484449392935e-05, "loss": 0.0969, "step": 6327 }, { "epoch": 0.11206595031589386, "grad_norm": 1.1169291734695435, "learning_rate": 2.9473258475444374e-05, "loss": 0.1247, "step": 6328 }, { "epoch": 0.11208365985292229, "grad_norm": 1.0781501531600952, "learning_rate": 2.947303245388026e-05, "loss": 0.1229, "step": 6329 }, { "epoch": 0.11210136938995072, "grad_norm": 0.8636693358421326, "learning_rate": 2.9472806384701334e-05, "loss": 0.1281, "step": 6330 }, { "epoch": 0.11211907892697916, "grad_norm": 0.9353713989257812, "learning_rate": 2.947258026790834e-05, "loss": 0.1328, "step": 6331 }, { "epoch": 0.11213678846400758, "grad_norm": 1.3186054229736328, "learning_rate": 2.947235410350202e-05, "loss": 0.1191, "step": 6332 }, { "epoch": 0.11215449800103601, "grad_norm": 0.7749024629592896, "learning_rate": 2.9472127891483114e-05, "loss": 0.1313, "step": 6333 }, { "epoch": 0.11217220753806444, "grad_norm": 1.542676568031311, "learning_rate": 2.947190163185238e-05, "loss": 0.1351, "step": 6334 }, { "epoch": 0.11218991707509286, "grad_norm": 0.963553786277771, "learning_rate": 2.9471675324610547e-05, "loss": 0.1275, "step": 6335 }, { "epoch": 0.11220762661212129, "grad_norm": 1.0888339281082153, "learning_rate": 2.9471448969758367e-05, "loss": 0.098, "step": 6336 }, { "epoch": 0.11222533614914972, "grad_norm": 0.9388276934623718, "learning_rate": 2.9471222567296586e-05, "loss": 0.1132, "step": 6337 }, { "epoch": 0.11224304568617816, "grad_norm": 1.1504508256912231, "learning_rate": 2.9470996117225943e-05, "loss": 0.1094, "step": 6338 }, { "epoch": 0.11226075522320658, "grad_norm": 0.9909408688545227, "learning_rate": 2.9470769619547187e-05, "loss": 0.1456, "step": 6339 }, { "epoch": 0.11227846476023501, "grad_norm": 0.984589159488678, "learning_rate": 2.9470543074261065e-05, "loss": 0.1378, "step": 6340 }, { "epoch": 0.11229617429726343, "grad_norm": 1.2800004482269287, "learning_rate": 2.9470316481368318e-05, "loss": 0.1413, "step": 6341 }, { "epoch": 0.11231388383429186, "grad_norm": 1.137223482131958, "learning_rate": 2.9470089840869692e-05, "loss": 0.139, "step": 6342 }, { "epoch": 0.11233159337132029, "grad_norm": 1.0440056324005127, "learning_rate": 2.9469863152765935e-05, "loss": 0.1496, "step": 6343 }, { "epoch": 0.11234930290834871, "grad_norm": 1.2681713104248047, "learning_rate": 2.946963641705779e-05, "loss": 0.1481, "step": 6344 }, { "epoch": 0.11236701244537714, "grad_norm": 1.0591685771942139, "learning_rate": 2.946940963374601e-05, "loss": 0.1008, "step": 6345 }, { "epoch": 0.11238472198240558, "grad_norm": 1.1757967472076416, "learning_rate": 2.9469182802831332e-05, "loss": 0.1183, "step": 6346 }, { "epoch": 0.112402431519434, "grad_norm": 1.2823350429534912, "learning_rate": 2.9468955924314508e-05, "loss": 0.1317, "step": 6347 }, { "epoch": 0.11242014105646243, "grad_norm": 0.9996956586837769, "learning_rate": 2.946872899819628e-05, "loss": 0.1322, "step": 6348 }, { "epoch": 0.11243785059349086, "grad_norm": 1.4148304462432861, "learning_rate": 2.9468502024477402e-05, "loss": 0.1357, "step": 6349 }, { "epoch": 0.11245556013051929, "grad_norm": 1.8759182691574097, "learning_rate": 2.946827500315861e-05, "loss": 0.1219, "step": 6350 }, { "epoch": 0.11247326966754771, "grad_norm": 0.9964178800582886, "learning_rate": 2.946804793424066e-05, "loss": 0.126, "step": 6351 }, { "epoch": 0.11249097920457614, "grad_norm": 1.3664792776107788, "learning_rate": 2.9467820817724297e-05, "loss": 0.123, "step": 6352 }, { "epoch": 0.11250868874160458, "grad_norm": 1.352921724319458, "learning_rate": 2.9467593653610263e-05, "loss": 0.1508, "step": 6353 }, { "epoch": 0.112526398278633, "grad_norm": 1.2812031507492065, "learning_rate": 2.946736644189931e-05, "loss": 0.2051, "step": 6354 }, { "epoch": 0.11254410781566143, "grad_norm": 1.1410987377166748, "learning_rate": 2.9467139182592183e-05, "loss": 0.1245, "step": 6355 }, { "epoch": 0.11256181735268986, "grad_norm": 1.0454411506652832, "learning_rate": 2.9466911875689635e-05, "loss": 0.1171, "step": 6356 }, { "epoch": 0.11257952688971828, "grad_norm": 1.1494271755218506, "learning_rate": 2.946668452119241e-05, "loss": 0.1065, "step": 6357 }, { "epoch": 0.11259723642674671, "grad_norm": 1.4179209470748901, "learning_rate": 2.946645711910125e-05, "loss": 0.1368, "step": 6358 }, { "epoch": 0.11261494596377514, "grad_norm": 1.209366798400879, "learning_rate": 2.9466229669416916e-05, "loss": 0.1316, "step": 6359 }, { "epoch": 0.11263265550080356, "grad_norm": 1.1310532093048096, "learning_rate": 2.946600217214015e-05, "loss": 0.11, "step": 6360 }, { "epoch": 0.112650365037832, "grad_norm": 1.5386285781860352, "learning_rate": 2.9465774627271696e-05, "loss": 0.1495, "step": 6361 }, { "epoch": 0.11266807457486043, "grad_norm": 1.9728538990020752, "learning_rate": 2.946554703481231e-05, "loss": 0.1707, "step": 6362 }, { "epoch": 0.11268578411188886, "grad_norm": 1.140125036239624, "learning_rate": 2.9465319394762733e-05, "loss": 0.1175, "step": 6363 }, { "epoch": 0.11270349364891728, "grad_norm": 1.3442997932434082, "learning_rate": 2.946509170712372e-05, "loss": 0.1461, "step": 6364 }, { "epoch": 0.11272120318594571, "grad_norm": 0.9552537202835083, "learning_rate": 2.9464863971896017e-05, "loss": 0.1333, "step": 6365 }, { "epoch": 0.11273891272297414, "grad_norm": 1.3270798921585083, "learning_rate": 2.9464636189080384e-05, "loss": 0.1955, "step": 6366 }, { "epoch": 0.11275662226000256, "grad_norm": 1.2620484828948975, "learning_rate": 2.9464408358677552e-05, "loss": 0.1192, "step": 6367 }, { "epoch": 0.112774331797031, "grad_norm": 2.0512640476226807, "learning_rate": 2.946418048068828e-05, "loss": 0.1247, "step": 6368 }, { "epoch": 0.11279204133405943, "grad_norm": 1.4112741947174072, "learning_rate": 2.946395255511332e-05, "loss": 0.1834, "step": 6369 }, { "epoch": 0.11280975087108785, "grad_norm": 2.499150514602661, "learning_rate": 2.9463724581953417e-05, "loss": 0.1161, "step": 6370 }, { "epoch": 0.11282746040811628, "grad_norm": 1.1069865226745605, "learning_rate": 2.9463496561209325e-05, "loss": 0.1173, "step": 6371 }, { "epoch": 0.11284516994514471, "grad_norm": 1.451161503791809, "learning_rate": 2.9463268492881786e-05, "loss": 0.1896, "step": 6372 }, { "epoch": 0.11286287948217313, "grad_norm": 1.6580201387405396, "learning_rate": 2.9463040376971564e-05, "loss": 0.1283, "step": 6373 }, { "epoch": 0.11288058901920156, "grad_norm": 1.6906120777130127, "learning_rate": 2.94628122134794e-05, "loss": 0.1943, "step": 6374 }, { "epoch": 0.11289829855623, "grad_norm": 1.097449779510498, "learning_rate": 2.9462584002406046e-05, "loss": 0.1148, "step": 6375 }, { "epoch": 0.11291600809325843, "grad_norm": 1.2540922164916992, "learning_rate": 2.946235574375225e-05, "loss": 0.114, "step": 6376 }, { "epoch": 0.11293371763028685, "grad_norm": 1.0545767545700073, "learning_rate": 2.946212743751877e-05, "loss": 0.1107, "step": 6377 }, { "epoch": 0.11295142716731528, "grad_norm": 1.7115278244018555, "learning_rate": 2.9461899083706354e-05, "loss": 0.1482, "step": 6378 }, { "epoch": 0.1129691367043437, "grad_norm": 1.3684184551239014, "learning_rate": 2.946167068231575e-05, "loss": 0.1249, "step": 6379 }, { "epoch": 0.11298684624137213, "grad_norm": 1.794479489326477, "learning_rate": 2.9461442233347715e-05, "loss": 0.1764, "step": 6380 }, { "epoch": 0.11300455577840056, "grad_norm": 1.4078766107559204, "learning_rate": 2.9461213736802997e-05, "loss": 0.1209, "step": 6381 }, { "epoch": 0.11302226531542899, "grad_norm": 0.7822393774986267, "learning_rate": 2.9460985192682347e-05, "loss": 0.1057, "step": 6382 }, { "epoch": 0.11303997485245743, "grad_norm": 0.8311421275138855, "learning_rate": 2.9460756600986523e-05, "loss": 0.0994, "step": 6383 }, { "epoch": 0.11305768438948585, "grad_norm": 1.4505794048309326, "learning_rate": 2.9460527961716266e-05, "loss": 0.1273, "step": 6384 }, { "epoch": 0.11307539392651428, "grad_norm": 1.2779357433319092, "learning_rate": 2.946029927487234e-05, "loss": 0.1119, "step": 6385 }, { "epoch": 0.1130931034635427, "grad_norm": 1.3401554822921753, "learning_rate": 2.946007054045549e-05, "loss": 0.1275, "step": 6386 }, { "epoch": 0.11311081300057113, "grad_norm": 1.456911325454712, "learning_rate": 2.945984175846647e-05, "loss": 0.1669, "step": 6387 }, { "epoch": 0.11312852253759956, "grad_norm": 1.143812894821167, "learning_rate": 2.9459612928906035e-05, "loss": 0.1492, "step": 6388 }, { "epoch": 0.11314623207462798, "grad_norm": 1.3788472414016724, "learning_rate": 2.9459384051774934e-05, "loss": 0.1133, "step": 6389 }, { "epoch": 0.11316394161165642, "grad_norm": 1.4557358026504517, "learning_rate": 2.945915512707392e-05, "loss": 0.2073, "step": 6390 }, { "epoch": 0.11318165114868485, "grad_norm": 0.7215054035186768, "learning_rate": 2.945892615480375e-05, "loss": 0.1558, "step": 6391 }, { "epoch": 0.11319936068571328, "grad_norm": 1.59913170337677, "learning_rate": 2.9458697134965174e-05, "loss": 0.1078, "step": 6392 }, { "epoch": 0.1132170702227417, "grad_norm": 2.871408462524414, "learning_rate": 2.9458468067558953e-05, "loss": 0.1263, "step": 6393 }, { "epoch": 0.11323477975977013, "grad_norm": 1.227656602859497, "learning_rate": 2.9458238952585825e-05, "loss": 0.1273, "step": 6394 }, { "epoch": 0.11325248929679856, "grad_norm": 1.630678415298462, "learning_rate": 2.9458009790046562e-05, "loss": 0.113, "step": 6395 }, { "epoch": 0.11327019883382698, "grad_norm": 0.9593499302864075, "learning_rate": 2.9457780579941903e-05, "loss": 0.1135, "step": 6396 }, { "epoch": 0.11328790837085541, "grad_norm": 1.5417288541793823, "learning_rate": 2.945755132227261e-05, "loss": 0.1991, "step": 6397 }, { "epoch": 0.11330561790788385, "grad_norm": 0.914312481880188, "learning_rate": 2.9457322017039437e-05, "loss": 0.1832, "step": 6398 }, { "epoch": 0.11332332744491228, "grad_norm": 1.1872879266738892, "learning_rate": 2.9457092664243137e-05, "loss": 0.1157, "step": 6399 }, { "epoch": 0.1133410369819407, "grad_norm": 1.048351764678955, "learning_rate": 2.9456863263884463e-05, "loss": 0.1287, "step": 6400 }, { "epoch": 0.11335874651896913, "grad_norm": 1.1878631114959717, "learning_rate": 2.945663381596417e-05, "loss": 0.1351, "step": 6401 }, { "epoch": 0.11337645605599755, "grad_norm": 1.2247579097747803, "learning_rate": 2.9456404320483016e-05, "loss": 0.0903, "step": 6402 }, { "epoch": 0.11339416559302598, "grad_norm": 1.628749132156372, "learning_rate": 2.9456174777441754e-05, "loss": 0.1268, "step": 6403 }, { "epoch": 0.11341187513005441, "grad_norm": 1.2434996366500854, "learning_rate": 2.9455945186841136e-05, "loss": 0.1196, "step": 6404 }, { "epoch": 0.11342958466708285, "grad_norm": 0.815190315246582, "learning_rate": 2.9455715548681923e-05, "loss": 0.1012, "step": 6405 }, { "epoch": 0.11344729420411127, "grad_norm": 1.4094158411026, "learning_rate": 2.945548586296487e-05, "loss": 0.1281, "step": 6406 }, { "epoch": 0.1134650037411397, "grad_norm": 0.9742622375488281, "learning_rate": 2.9455256129690724e-05, "loss": 0.1215, "step": 6407 }, { "epoch": 0.11348271327816813, "grad_norm": 1.395559310913086, "learning_rate": 2.9455026348860253e-05, "loss": 0.1343, "step": 6408 }, { "epoch": 0.11350042281519655, "grad_norm": 1.140527367591858, "learning_rate": 2.9454796520474204e-05, "loss": 0.1409, "step": 6409 }, { "epoch": 0.11351813235222498, "grad_norm": 1.6134992837905884, "learning_rate": 2.9454566644533344e-05, "loss": 0.1879, "step": 6410 }, { "epoch": 0.1135358418892534, "grad_norm": 1.53749418258667, "learning_rate": 2.9454336721038415e-05, "loss": 0.1357, "step": 6411 }, { "epoch": 0.11355355142628183, "grad_norm": 1.508020281791687, "learning_rate": 2.945410674999018e-05, "loss": 0.17, "step": 6412 }, { "epoch": 0.11357126096331027, "grad_norm": 1.0751270055770874, "learning_rate": 2.9453876731389396e-05, "loss": 0.2014, "step": 6413 }, { "epoch": 0.1135889705003387, "grad_norm": 0.8525680303573608, "learning_rate": 2.9453646665236818e-05, "loss": 0.1154, "step": 6414 }, { "epoch": 0.11360668003736712, "grad_norm": 0.8403554558753967, "learning_rate": 2.945341655153321e-05, "loss": 0.1012, "step": 6415 }, { "epoch": 0.11362438957439555, "grad_norm": 1.2071921825408936, "learning_rate": 2.945318639027932e-05, "loss": 0.1442, "step": 6416 }, { "epoch": 0.11364209911142398, "grad_norm": 2.504887342453003, "learning_rate": 2.945295618147591e-05, "loss": 0.1767, "step": 6417 }, { "epoch": 0.1136598086484524, "grad_norm": 2.2106552124023438, "learning_rate": 2.9452725925123732e-05, "loss": 0.1541, "step": 6418 }, { "epoch": 0.11367751818548083, "grad_norm": 1.2340965270996094, "learning_rate": 2.9452495621223555e-05, "loss": 0.153, "step": 6419 }, { "epoch": 0.11369522772250927, "grad_norm": 1.163656234741211, "learning_rate": 2.9452265269776125e-05, "loss": 0.1225, "step": 6420 }, { "epoch": 0.1137129372595377, "grad_norm": 1.1634994745254517, "learning_rate": 2.9452034870782204e-05, "loss": 0.1023, "step": 6421 }, { "epoch": 0.11373064679656612, "grad_norm": 0.9396840929985046, "learning_rate": 2.9451804424242546e-05, "loss": 0.1069, "step": 6422 }, { "epoch": 0.11374835633359455, "grad_norm": 1.4154632091522217, "learning_rate": 2.945157393015792e-05, "loss": 0.1273, "step": 6423 }, { "epoch": 0.11376606587062298, "grad_norm": 0.9603788256645203, "learning_rate": 2.9451343388529074e-05, "loss": 0.1087, "step": 6424 }, { "epoch": 0.1137837754076514, "grad_norm": 1.5472731590270996, "learning_rate": 2.945111279935677e-05, "loss": 0.1653, "step": 6425 }, { "epoch": 0.11380148494467983, "grad_norm": 0.9301677942276001, "learning_rate": 2.9450882162641765e-05, "loss": 0.1058, "step": 6426 }, { "epoch": 0.11381919448170826, "grad_norm": 1.1845351457595825, "learning_rate": 2.945065147838482e-05, "loss": 0.1191, "step": 6427 }, { "epoch": 0.1138369040187367, "grad_norm": 1.3915057182312012, "learning_rate": 2.9450420746586695e-05, "loss": 0.1331, "step": 6428 }, { "epoch": 0.11385461355576512, "grad_norm": 0.8212749361991882, "learning_rate": 2.9450189967248147e-05, "loss": 0.0966, "step": 6429 }, { "epoch": 0.11387232309279355, "grad_norm": 1.2805348634719849, "learning_rate": 2.9449959140369936e-05, "loss": 0.0985, "step": 6430 }, { "epoch": 0.11389003262982197, "grad_norm": 1.0472933053970337, "learning_rate": 2.944972826595282e-05, "loss": 0.1321, "step": 6431 }, { "epoch": 0.1139077421668504, "grad_norm": 0.8376052975654602, "learning_rate": 2.944949734399756e-05, "loss": 0.0994, "step": 6432 }, { "epoch": 0.11392545170387883, "grad_norm": 1.3663642406463623, "learning_rate": 2.9449266374504914e-05, "loss": 0.1046, "step": 6433 }, { "epoch": 0.11394316124090725, "grad_norm": 0.9255065321922302, "learning_rate": 2.9449035357475647e-05, "loss": 0.1485, "step": 6434 }, { "epoch": 0.1139608707779357, "grad_norm": 1.4007965326309204, "learning_rate": 2.944880429291051e-05, "loss": 0.1174, "step": 6435 }, { "epoch": 0.11397858031496412, "grad_norm": 1.1077286005020142, "learning_rate": 2.944857318081027e-05, "loss": 0.1357, "step": 6436 }, { "epoch": 0.11399628985199255, "grad_norm": 1.1887097358703613, "learning_rate": 2.9448342021175687e-05, "loss": 0.1329, "step": 6437 }, { "epoch": 0.11401399938902097, "grad_norm": 1.2183623313903809, "learning_rate": 2.944811081400752e-05, "loss": 0.1131, "step": 6438 }, { "epoch": 0.1140317089260494, "grad_norm": 1.3159480094909668, "learning_rate": 2.9447879559306527e-05, "loss": 0.1279, "step": 6439 }, { "epoch": 0.11404941846307783, "grad_norm": 1.4210772514343262, "learning_rate": 2.9447648257073473e-05, "loss": 0.1595, "step": 6440 }, { "epoch": 0.11406712800010625, "grad_norm": 1.2460390329360962, "learning_rate": 2.944741690730912e-05, "loss": 0.1492, "step": 6441 }, { "epoch": 0.11408483753713469, "grad_norm": 1.4166587591171265, "learning_rate": 2.9447185510014225e-05, "loss": 0.1212, "step": 6442 }, { "epoch": 0.11410254707416312, "grad_norm": 1.041695475578308, "learning_rate": 2.9446954065189545e-05, "loss": 0.1281, "step": 6443 }, { "epoch": 0.11412025661119155, "grad_norm": 1.4536080360412598, "learning_rate": 2.9446722572835857e-05, "loss": 0.1, "step": 6444 }, { "epoch": 0.11413796614821997, "grad_norm": 1.0768568515777588, "learning_rate": 2.9446491032953907e-05, "loss": 0.1103, "step": 6445 }, { "epoch": 0.1141556756852484, "grad_norm": 1.7861080169677734, "learning_rate": 2.9446259445544464e-05, "loss": 0.2118, "step": 6446 }, { "epoch": 0.11417338522227682, "grad_norm": 1.2390060424804688, "learning_rate": 2.944602781060829e-05, "loss": 0.1339, "step": 6447 }, { "epoch": 0.11419109475930525, "grad_norm": 1.2091253995895386, "learning_rate": 2.9445796128146142e-05, "loss": 0.1262, "step": 6448 }, { "epoch": 0.11420880429633368, "grad_norm": 1.5998623371124268, "learning_rate": 2.944556439815879e-05, "loss": 0.2062, "step": 6449 }, { "epoch": 0.11422651383336212, "grad_norm": 0.9956096410751343, "learning_rate": 2.9445332620646995e-05, "loss": 0.0926, "step": 6450 }, { "epoch": 0.11424422337039054, "grad_norm": 1.26437509059906, "learning_rate": 2.944510079561151e-05, "loss": 0.1568, "step": 6451 }, { "epoch": 0.11426193290741897, "grad_norm": 1.0907106399536133, "learning_rate": 2.9444868923053106e-05, "loss": 0.1254, "step": 6452 }, { "epoch": 0.1142796424444474, "grad_norm": 1.2477213144302368, "learning_rate": 2.9444637002972544e-05, "loss": 0.1307, "step": 6453 }, { "epoch": 0.11429735198147582, "grad_norm": 1.1946945190429688, "learning_rate": 2.944440503537059e-05, "loss": 0.1211, "step": 6454 }, { "epoch": 0.11431506151850425, "grad_norm": 1.6836220026016235, "learning_rate": 2.9444173020248005e-05, "loss": 0.139, "step": 6455 }, { "epoch": 0.11433277105553268, "grad_norm": 1.6877546310424805, "learning_rate": 2.9443940957605547e-05, "loss": 0.1515, "step": 6456 }, { "epoch": 0.11435048059256112, "grad_norm": 1.3736358880996704, "learning_rate": 2.9443708847443988e-05, "loss": 0.1369, "step": 6457 }, { "epoch": 0.11436819012958954, "grad_norm": 0.815427839756012, "learning_rate": 2.9443476689764084e-05, "loss": 0.1181, "step": 6458 }, { "epoch": 0.11438589966661797, "grad_norm": 0.7850918769836426, "learning_rate": 2.9443244484566607e-05, "loss": 0.0883, "step": 6459 }, { "epoch": 0.1144036092036464, "grad_norm": 1.205082654953003, "learning_rate": 2.9443012231852313e-05, "loss": 0.1475, "step": 6460 }, { "epoch": 0.11442131874067482, "grad_norm": 1.4157212972640991, "learning_rate": 2.9442779931621972e-05, "loss": 0.1251, "step": 6461 }, { "epoch": 0.11443902827770325, "grad_norm": 1.0260454416275024, "learning_rate": 2.9442547583876345e-05, "loss": 0.103, "step": 6462 }, { "epoch": 0.11445673781473167, "grad_norm": 1.0829495191574097, "learning_rate": 2.9442315188616194e-05, "loss": 0.1844, "step": 6463 }, { "epoch": 0.1144744473517601, "grad_norm": 1.2089358568191528, "learning_rate": 2.9442082745842293e-05, "loss": 0.1448, "step": 6464 }, { "epoch": 0.11449215688878854, "grad_norm": 0.9066780805587769, "learning_rate": 2.9441850255555397e-05, "loss": 0.0806, "step": 6465 }, { "epoch": 0.11450986642581697, "grad_norm": 3.5255401134490967, "learning_rate": 2.9441617717756274e-05, "loss": 0.1115, "step": 6466 }, { "epoch": 0.1145275759628454, "grad_norm": 1.4180679321289062, "learning_rate": 2.944138513244569e-05, "loss": 0.1229, "step": 6467 }, { "epoch": 0.11454528549987382, "grad_norm": 1.35042142868042, "learning_rate": 2.944115249962441e-05, "loss": 0.1578, "step": 6468 }, { "epoch": 0.11456299503690225, "grad_norm": 1.671008825302124, "learning_rate": 2.9440919819293198e-05, "loss": 0.1687, "step": 6469 }, { "epoch": 0.11458070457393067, "grad_norm": 1.4633687734603882, "learning_rate": 2.944068709145282e-05, "loss": 0.1176, "step": 6470 }, { "epoch": 0.1145984141109591, "grad_norm": 0.6827748417854309, "learning_rate": 2.9440454316104047e-05, "loss": 0.1376, "step": 6471 }, { "epoch": 0.11461612364798754, "grad_norm": 1.1408841609954834, "learning_rate": 2.944022149324763e-05, "loss": 0.1615, "step": 6472 }, { "epoch": 0.11463383318501597, "grad_norm": 1.3864176273345947, "learning_rate": 2.9439988622884353e-05, "loss": 0.1198, "step": 6473 }, { "epoch": 0.11465154272204439, "grad_norm": 1.8194869756698608, "learning_rate": 2.9439755705014974e-05, "loss": 0.1425, "step": 6474 }, { "epoch": 0.11466925225907282, "grad_norm": 0.8846833109855652, "learning_rate": 2.9439522739640257e-05, "loss": 0.0954, "step": 6475 }, { "epoch": 0.11468696179610124, "grad_norm": 1.2604901790618896, "learning_rate": 2.943928972676097e-05, "loss": 0.1209, "step": 6476 }, { "epoch": 0.11470467133312967, "grad_norm": 0.6355501413345337, "learning_rate": 2.943905666637788e-05, "loss": 0.0909, "step": 6477 }, { "epoch": 0.1147223808701581, "grad_norm": 1.3631501197814941, "learning_rate": 2.9438823558491756e-05, "loss": 0.1419, "step": 6478 }, { "epoch": 0.11474009040718652, "grad_norm": 0.6720353960990906, "learning_rate": 2.943859040310336e-05, "loss": 0.1006, "step": 6479 }, { "epoch": 0.11475779994421496, "grad_norm": 1.6913002729415894, "learning_rate": 2.9438357200213464e-05, "loss": 0.1401, "step": 6480 }, { "epoch": 0.11477550948124339, "grad_norm": 1.5131665468215942, "learning_rate": 2.9438123949822836e-05, "loss": 0.1399, "step": 6481 }, { "epoch": 0.11479321901827182, "grad_norm": 1.3032492399215698, "learning_rate": 2.9437890651932234e-05, "loss": 0.1741, "step": 6482 }, { "epoch": 0.11481092855530024, "grad_norm": 1.3340495824813843, "learning_rate": 2.9437657306542437e-05, "loss": 0.1669, "step": 6483 }, { "epoch": 0.11482863809232867, "grad_norm": 1.4236172437667847, "learning_rate": 2.9437423913654206e-05, "loss": 0.1287, "step": 6484 }, { "epoch": 0.1148463476293571, "grad_norm": 1.2425236701965332, "learning_rate": 2.9437190473268312e-05, "loss": 0.1055, "step": 6485 }, { "epoch": 0.11486405716638552, "grad_norm": 1.395051121711731, "learning_rate": 2.943695698538552e-05, "loss": 0.1702, "step": 6486 }, { "epoch": 0.11488176670341396, "grad_norm": 0.9767583608627319, "learning_rate": 2.9436723450006597e-05, "loss": 0.1354, "step": 6487 }, { "epoch": 0.11489947624044239, "grad_norm": 1.5161187648773193, "learning_rate": 2.9436489867132323e-05, "loss": 0.1158, "step": 6488 }, { "epoch": 0.11491718577747081, "grad_norm": 1.211970567703247, "learning_rate": 2.943625623676345e-05, "loss": 0.1493, "step": 6489 }, { "epoch": 0.11493489531449924, "grad_norm": 1.6245936155319214, "learning_rate": 2.9436022558900752e-05, "loss": 0.1547, "step": 6490 }, { "epoch": 0.11495260485152767, "grad_norm": 1.284022569656372, "learning_rate": 2.943578883354501e-05, "loss": 0.1513, "step": 6491 }, { "epoch": 0.1149703143885561, "grad_norm": 1.420545220375061, "learning_rate": 2.943555506069697e-05, "loss": 0.1073, "step": 6492 }, { "epoch": 0.11498802392558452, "grad_norm": 1.082114577293396, "learning_rate": 2.9435321240357423e-05, "loss": 0.1172, "step": 6493 }, { "epoch": 0.11500573346261295, "grad_norm": 1.7032076120376587, "learning_rate": 2.9435087372527126e-05, "loss": 0.118, "step": 6494 }, { "epoch": 0.11502344299964139, "grad_norm": 1.3877031803131104, "learning_rate": 2.943485345720685e-05, "loss": 0.1072, "step": 6495 }, { "epoch": 0.11504115253666981, "grad_norm": 1.4980854988098145, "learning_rate": 2.943461949439737e-05, "loss": 0.1505, "step": 6496 }, { "epoch": 0.11505886207369824, "grad_norm": 1.0749167203903198, "learning_rate": 2.943438548409945e-05, "loss": 0.1684, "step": 6497 }, { "epoch": 0.11507657161072667, "grad_norm": 1.364050030708313, "learning_rate": 2.9434151426313864e-05, "loss": 0.1065, "step": 6498 }, { "epoch": 0.11509428114775509, "grad_norm": 1.1559839248657227, "learning_rate": 2.9433917321041377e-05, "loss": 0.1166, "step": 6499 }, { "epoch": 0.11511199068478352, "grad_norm": 1.7904155254364014, "learning_rate": 2.9433683168282762e-05, "loss": 0.1496, "step": 6500 }, { "epoch": 0.11512970022181195, "grad_norm": 1.1709002256393433, "learning_rate": 2.943344896803879e-05, "loss": 0.138, "step": 6501 }, { "epoch": 0.11514740975884039, "grad_norm": 1.0480443239212036, "learning_rate": 2.9433214720310228e-05, "loss": 0.1449, "step": 6502 }, { "epoch": 0.11516511929586881, "grad_norm": 1.029573917388916, "learning_rate": 2.943298042509785e-05, "loss": 0.1113, "step": 6503 }, { "epoch": 0.11518282883289724, "grad_norm": 1.287657618522644, "learning_rate": 2.9432746082402427e-05, "loss": 0.1658, "step": 6504 }, { "epoch": 0.11520053836992566, "grad_norm": 1.4986534118652344, "learning_rate": 2.9432511692224733e-05, "loss": 0.1666, "step": 6505 }, { "epoch": 0.11521824790695409, "grad_norm": 1.8926396369934082, "learning_rate": 2.9432277254565534e-05, "loss": 0.1435, "step": 6506 }, { "epoch": 0.11523595744398252, "grad_norm": 1.2002720832824707, "learning_rate": 2.94320427694256e-05, "loss": 0.092, "step": 6507 }, { "epoch": 0.11525366698101094, "grad_norm": 1.1055347919464111, "learning_rate": 2.9431808236805706e-05, "loss": 0.1118, "step": 6508 }, { "epoch": 0.11527137651803938, "grad_norm": 1.2219436168670654, "learning_rate": 2.9431573656706623e-05, "loss": 0.1621, "step": 6509 }, { "epoch": 0.11528908605506781, "grad_norm": 1.1142560243606567, "learning_rate": 2.943133902912912e-05, "loss": 0.1228, "step": 6510 }, { "epoch": 0.11530679559209624, "grad_norm": 1.0801658630371094, "learning_rate": 2.9431104354073976e-05, "loss": 0.117, "step": 6511 }, { "epoch": 0.11532450512912466, "grad_norm": 1.3677382469177246, "learning_rate": 2.9430869631541953e-05, "loss": 0.1716, "step": 6512 }, { "epoch": 0.11534221466615309, "grad_norm": 0.8976718187332153, "learning_rate": 2.9430634861533835e-05, "loss": 0.1193, "step": 6513 }, { "epoch": 0.11535992420318152, "grad_norm": 1.5796303749084473, "learning_rate": 2.9430400044050384e-05, "loss": 0.1278, "step": 6514 }, { "epoch": 0.11537763374020994, "grad_norm": 0.9129429459571838, "learning_rate": 2.9430165179092373e-05, "loss": 0.0833, "step": 6515 }, { "epoch": 0.11539534327723837, "grad_norm": 1.1897425651550293, "learning_rate": 2.942993026666058e-05, "loss": 0.1322, "step": 6516 }, { "epoch": 0.11541305281426681, "grad_norm": 0.7327213287353516, "learning_rate": 2.942969530675578e-05, "loss": 0.1054, "step": 6517 }, { "epoch": 0.11543076235129524, "grad_norm": 1.0671882629394531, "learning_rate": 2.942946029937874e-05, "loss": 0.1005, "step": 6518 }, { "epoch": 0.11544847188832366, "grad_norm": 0.9382221102714539, "learning_rate": 2.942922524453023e-05, "loss": 0.1305, "step": 6519 }, { "epoch": 0.11546618142535209, "grad_norm": 1.2695636749267578, "learning_rate": 2.942899014221104e-05, "loss": 0.1478, "step": 6520 }, { "epoch": 0.11548389096238051, "grad_norm": 1.018293023109436, "learning_rate": 2.942875499242192e-05, "loss": 0.1188, "step": 6521 }, { "epoch": 0.11550160049940894, "grad_norm": 1.2070766687393188, "learning_rate": 2.942851979516366e-05, "loss": 0.1534, "step": 6522 }, { "epoch": 0.11551931003643737, "grad_norm": 3.0664944648742676, "learning_rate": 2.942828455043703e-05, "loss": 0.1389, "step": 6523 }, { "epoch": 0.11553701957346581, "grad_norm": 1.2200638055801392, "learning_rate": 2.9428049258242803e-05, "loss": 0.1237, "step": 6524 }, { "epoch": 0.11555472911049423, "grad_norm": 0.9002518653869629, "learning_rate": 2.942781391858175e-05, "loss": 0.088, "step": 6525 }, { "epoch": 0.11557243864752266, "grad_norm": 1.4036258459091187, "learning_rate": 2.9427578531454654e-05, "loss": 0.1288, "step": 6526 }, { "epoch": 0.11559014818455109, "grad_norm": 1.4838719367980957, "learning_rate": 2.942734309686228e-05, "loss": 0.169, "step": 6527 }, { "epoch": 0.11560785772157951, "grad_norm": 1.3743200302124023, "learning_rate": 2.9427107614805408e-05, "loss": 0.1676, "step": 6528 }, { "epoch": 0.11562556725860794, "grad_norm": 0.6488073468208313, "learning_rate": 2.942687208528481e-05, "loss": 0.0941, "step": 6529 }, { "epoch": 0.11564327679563637, "grad_norm": 1.3918724060058594, "learning_rate": 2.9426636508301262e-05, "loss": 0.1491, "step": 6530 }, { "epoch": 0.11566098633266479, "grad_norm": 1.0846959352493286, "learning_rate": 2.9426400883855543e-05, "loss": 0.1532, "step": 6531 }, { "epoch": 0.11567869586969323, "grad_norm": 0.886036217212677, "learning_rate": 2.942616521194842e-05, "loss": 0.1215, "step": 6532 }, { "epoch": 0.11569640540672166, "grad_norm": 1.2957583665847778, "learning_rate": 2.9425929492580675e-05, "loss": 0.1331, "step": 6533 }, { "epoch": 0.11571411494375008, "grad_norm": 1.1929776668548584, "learning_rate": 2.9425693725753075e-05, "loss": 0.1628, "step": 6534 }, { "epoch": 0.11573182448077851, "grad_norm": 1.274558424949646, "learning_rate": 2.942545791146641e-05, "loss": 0.1483, "step": 6535 }, { "epoch": 0.11574953401780694, "grad_norm": 1.1244122982025146, "learning_rate": 2.942522204972144e-05, "loss": 0.1677, "step": 6536 }, { "epoch": 0.11576724355483536, "grad_norm": 1.1415259838104248, "learning_rate": 2.9424986140518953e-05, "loss": 0.1719, "step": 6537 }, { "epoch": 0.11578495309186379, "grad_norm": 1.5087103843688965, "learning_rate": 2.9424750183859722e-05, "loss": 0.1575, "step": 6538 }, { "epoch": 0.11580266262889223, "grad_norm": 1.4344782829284668, "learning_rate": 2.942451417974452e-05, "loss": 0.1133, "step": 6539 }, { "epoch": 0.11582037216592066, "grad_norm": 1.1800568103790283, "learning_rate": 2.9424278128174122e-05, "loss": 0.0974, "step": 6540 }, { "epoch": 0.11583808170294908, "grad_norm": 1.127259373664856, "learning_rate": 2.942404202914931e-05, "loss": 0.1356, "step": 6541 }, { "epoch": 0.11585579123997751, "grad_norm": 1.2643141746520996, "learning_rate": 2.942380588267086e-05, "loss": 0.1235, "step": 6542 }, { "epoch": 0.11587350077700594, "grad_norm": 1.1458443403244019, "learning_rate": 2.942356968873955e-05, "loss": 0.1318, "step": 6543 }, { "epoch": 0.11589121031403436, "grad_norm": 1.5037521123886108, "learning_rate": 2.9423333447356152e-05, "loss": 0.1861, "step": 6544 }, { "epoch": 0.11590891985106279, "grad_norm": 1.2738332748413086, "learning_rate": 2.9423097158521445e-05, "loss": 0.0998, "step": 6545 }, { "epoch": 0.11592662938809122, "grad_norm": 0.8149670958518982, "learning_rate": 2.9422860822236207e-05, "loss": 0.1061, "step": 6546 }, { "epoch": 0.11594433892511966, "grad_norm": 1.4908535480499268, "learning_rate": 2.942262443850122e-05, "loss": 0.0839, "step": 6547 }, { "epoch": 0.11596204846214808, "grad_norm": 0.8709651827812195, "learning_rate": 2.9422388007317252e-05, "loss": 0.1561, "step": 6548 }, { "epoch": 0.11597975799917651, "grad_norm": 1.170662522315979, "learning_rate": 2.9422151528685084e-05, "loss": 0.1081, "step": 6549 }, { "epoch": 0.11599746753620493, "grad_norm": 1.6135231256484985, "learning_rate": 2.9421915002605502e-05, "loss": 0.1309, "step": 6550 }, { "epoch": 0.11601517707323336, "grad_norm": 1.4053252935409546, "learning_rate": 2.9421678429079277e-05, "loss": 0.1234, "step": 6551 }, { "epoch": 0.11603288661026179, "grad_norm": 0.7374605536460876, "learning_rate": 2.942144180810719e-05, "loss": 0.1177, "step": 6552 }, { "epoch": 0.11605059614729021, "grad_norm": 0.8781150579452515, "learning_rate": 2.9421205139690013e-05, "loss": 0.1579, "step": 6553 }, { "epoch": 0.11606830568431865, "grad_norm": 1.5511970520019531, "learning_rate": 2.9420968423828535e-05, "loss": 0.1742, "step": 6554 }, { "epoch": 0.11608601522134708, "grad_norm": 1.0497989654541016, "learning_rate": 2.9420731660523525e-05, "loss": 0.1577, "step": 6555 }, { "epoch": 0.1161037247583755, "grad_norm": 0.96379554271698, "learning_rate": 2.9420494849775766e-05, "loss": 0.1311, "step": 6556 }, { "epoch": 0.11612143429540393, "grad_norm": 0.8028421998023987, "learning_rate": 2.9420257991586044e-05, "loss": 0.109, "step": 6557 }, { "epoch": 0.11613914383243236, "grad_norm": 1.6243040561676025, "learning_rate": 2.9420021085955125e-05, "loss": 0.1721, "step": 6558 }, { "epoch": 0.11615685336946079, "grad_norm": 0.9389899373054504, "learning_rate": 2.94197841328838e-05, "loss": 0.1751, "step": 6559 }, { "epoch": 0.11617456290648921, "grad_norm": 0.909188985824585, "learning_rate": 2.9419547132372842e-05, "loss": 0.1368, "step": 6560 }, { "epoch": 0.11619227244351764, "grad_norm": 0.9485960602760315, "learning_rate": 2.941931008442303e-05, "loss": 0.1043, "step": 6561 }, { "epoch": 0.11620998198054608, "grad_norm": 1.3392695188522339, "learning_rate": 2.9419072989035152e-05, "loss": 0.1171, "step": 6562 }, { "epoch": 0.1162276915175745, "grad_norm": 1.0706926584243774, "learning_rate": 2.9418835846209976e-05, "loss": 0.1712, "step": 6563 }, { "epoch": 0.11624540105460293, "grad_norm": 1.041224718093872, "learning_rate": 2.9418598655948292e-05, "loss": 0.1145, "step": 6564 }, { "epoch": 0.11626311059163136, "grad_norm": 1.4921749830245972, "learning_rate": 2.941836141825088e-05, "loss": 0.1442, "step": 6565 }, { "epoch": 0.11628082012865978, "grad_norm": 1.0124658346176147, "learning_rate": 2.941812413311851e-05, "loss": 0.1065, "step": 6566 }, { "epoch": 0.11629852966568821, "grad_norm": 1.339317798614502, "learning_rate": 2.9417886800551974e-05, "loss": 0.108, "step": 6567 }, { "epoch": 0.11631623920271664, "grad_norm": 1.1852105855941772, "learning_rate": 2.9417649420552046e-05, "loss": 0.1421, "step": 6568 }, { "epoch": 0.11633394873974508, "grad_norm": 1.0071051120758057, "learning_rate": 2.941741199311951e-05, "loss": 0.1386, "step": 6569 }, { "epoch": 0.1163516582767735, "grad_norm": 1.065019130706787, "learning_rate": 2.9417174518255153e-05, "loss": 0.1394, "step": 6570 }, { "epoch": 0.11636936781380193, "grad_norm": 1.0921250581741333, "learning_rate": 2.941693699595974e-05, "loss": 0.113, "step": 6571 }, { "epoch": 0.11638707735083036, "grad_norm": 1.0406131744384766, "learning_rate": 2.9416699426234067e-05, "loss": 0.1146, "step": 6572 }, { "epoch": 0.11640478688785878, "grad_norm": 0.8685389757156372, "learning_rate": 2.9416461809078917e-05, "loss": 0.1572, "step": 6573 }, { "epoch": 0.11642249642488721, "grad_norm": 0.7763631939888, "learning_rate": 2.9416224144495056e-05, "loss": 0.1152, "step": 6574 }, { "epoch": 0.11644020596191564, "grad_norm": 1.3650509119033813, "learning_rate": 2.9415986432483282e-05, "loss": 0.1518, "step": 6575 }, { "epoch": 0.11645791549894408, "grad_norm": 0.9342799186706543, "learning_rate": 2.9415748673044372e-05, "loss": 0.1559, "step": 6576 }, { "epoch": 0.1164756250359725, "grad_norm": 1.3015772104263306, "learning_rate": 2.94155108661791e-05, "loss": 0.1176, "step": 6577 }, { "epoch": 0.11649333457300093, "grad_norm": 1.127114176750183, "learning_rate": 2.9415273011888262e-05, "loss": 0.095, "step": 6578 }, { "epoch": 0.11651104411002935, "grad_norm": 0.9694299697875977, "learning_rate": 2.9415035110172628e-05, "loss": 0.1222, "step": 6579 }, { "epoch": 0.11652875364705778, "grad_norm": 0.9858259558677673, "learning_rate": 2.9414797161032988e-05, "loss": 0.1138, "step": 6580 }, { "epoch": 0.11654646318408621, "grad_norm": 1.3986440896987915, "learning_rate": 2.9414559164470125e-05, "loss": 0.1227, "step": 6581 }, { "epoch": 0.11656417272111463, "grad_norm": 2.7872731685638428, "learning_rate": 2.941432112048482e-05, "loss": 0.1425, "step": 6582 }, { "epoch": 0.11658188225814306, "grad_norm": 0.846010684967041, "learning_rate": 2.941408302907786e-05, "loss": 0.1253, "step": 6583 }, { "epoch": 0.1165995917951715, "grad_norm": 1.4906994104385376, "learning_rate": 2.941384489025002e-05, "loss": 0.2003, "step": 6584 }, { "epoch": 0.11661730133219993, "grad_norm": 1.161045789718628, "learning_rate": 2.941360670400209e-05, "loss": 0.1117, "step": 6585 }, { "epoch": 0.11663501086922835, "grad_norm": 0.718938946723938, "learning_rate": 2.941336847033485e-05, "loss": 0.1457, "step": 6586 }, { "epoch": 0.11665272040625678, "grad_norm": 0.7771636247634888, "learning_rate": 2.9413130189249085e-05, "loss": 0.1473, "step": 6587 }, { "epoch": 0.1166704299432852, "grad_norm": 0.8575987219810486, "learning_rate": 2.941289186074558e-05, "loss": 0.1269, "step": 6588 }, { "epoch": 0.11668813948031363, "grad_norm": 0.8246989846229553, "learning_rate": 2.9412653484825123e-05, "loss": 0.1342, "step": 6589 }, { "epoch": 0.11670584901734206, "grad_norm": 1.1234948635101318, "learning_rate": 2.941241506148849e-05, "loss": 0.1174, "step": 6590 }, { "epoch": 0.1167235585543705, "grad_norm": 1.1236207485198975, "learning_rate": 2.941217659073647e-05, "loss": 0.0896, "step": 6591 }, { "epoch": 0.11674126809139893, "grad_norm": 1.4702563285827637, "learning_rate": 2.941193807256985e-05, "loss": 0.1976, "step": 6592 }, { "epoch": 0.11675897762842735, "grad_norm": 1.1994054317474365, "learning_rate": 2.9411699506989404e-05, "loss": 0.1117, "step": 6593 }, { "epoch": 0.11677668716545578, "grad_norm": 1.2676899433135986, "learning_rate": 2.941146089399593e-05, "loss": 0.1544, "step": 6594 }, { "epoch": 0.1167943967024842, "grad_norm": 0.8085352778434753, "learning_rate": 2.9411222233590203e-05, "loss": 0.1009, "step": 6595 }, { "epoch": 0.11681210623951263, "grad_norm": 0.8568081259727478, "learning_rate": 2.9410983525773015e-05, "loss": 0.1338, "step": 6596 }, { "epoch": 0.11682981577654106, "grad_norm": 1.2477991580963135, "learning_rate": 2.9410744770545147e-05, "loss": 0.0995, "step": 6597 }, { "epoch": 0.11684752531356948, "grad_norm": 1.3481998443603516, "learning_rate": 2.941050596790739e-05, "loss": 0.1348, "step": 6598 }, { "epoch": 0.11686523485059792, "grad_norm": 1.0418065786361694, "learning_rate": 2.9410267117860523e-05, "loss": 0.1119, "step": 6599 }, { "epoch": 0.11688294438762635, "grad_norm": 0.9565759301185608, "learning_rate": 2.941002822040534e-05, "loss": 0.0845, "step": 6600 }, { "epoch": 0.11690065392465478, "grad_norm": 1.4349232912063599, "learning_rate": 2.9409789275542617e-05, "loss": 0.133, "step": 6601 }, { "epoch": 0.1169183634616832, "grad_norm": 1.35871160030365, "learning_rate": 2.940955028327314e-05, "loss": 0.1602, "step": 6602 }, { "epoch": 0.11693607299871163, "grad_norm": 1.5939793586730957, "learning_rate": 2.9409311243597707e-05, "loss": 0.1281, "step": 6603 }, { "epoch": 0.11695378253574006, "grad_norm": 0.8664671778678894, "learning_rate": 2.9409072156517095e-05, "loss": 0.1299, "step": 6604 }, { "epoch": 0.11697149207276848, "grad_norm": 1.2567696571350098, "learning_rate": 2.9408833022032095e-05, "loss": 0.1182, "step": 6605 }, { "epoch": 0.11698920160979692, "grad_norm": 1.1819573640823364, "learning_rate": 2.940859384014349e-05, "loss": 0.1306, "step": 6606 }, { "epoch": 0.11700691114682535, "grad_norm": 0.9008628726005554, "learning_rate": 2.940835461085207e-05, "loss": 0.135, "step": 6607 }, { "epoch": 0.11702462068385378, "grad_norm": 1.3251912593841553, "learning_rate": 2.9408115334158616e-05, "loss": 0.0957, "step": 6608 }, { "epoch": 0.1170423302208822, "grad_norm": 1.51101815700531, "learning_rate": 2.9407876010063925e-05, "loss": 0.0934, "step": 6609 }, { "epoch": 0.11706003975791063, "grad_norm": 1.6010324954986572, "learning_rate": 2.9407636638568775e-05, "loss": 0.1168, "step": 6610 }, { "epoch": 0.11707774929493905, "grad_norm": 1.027734637260437, "learning_rate": 2.940739721967396e-05, "loss": 0.1377, "step": 6611 }, { "epoch": 0.11709545883196748, "grad_norm": 0.8975421190261841, "learning_rate": 2.9407157753380266e-05, "loss": 0.1506, "step": 6612 }, { "epoch": 0.1171131683689959, "grad_norm": 0.7008670568466187, "learning_rate": 2.9406918239688478e-05, "loss": 0.1489, "step": 6613 }, { "epoch": 0.11713087790602435, "grad_norm": 1.9673957824707031, "learning_rate": 2.9406678678599393e-05, "loss": 0.1265, "step": 6614 }, { "epoch": 0.11714858744305277, "grad_norm": 1.1993411779403687, "learning_rate": 2.9406439070113786e-05, "loss": 0.1317, "step": 6615 }, { "epoch": 0.1171662969800812, "grad_norm": 1.7555620670318604, "learning_rate": 2.9406199414232454e-05, "loss": 0.1296, "step": 6616 }, { "epoch": 0.11718400651710963, "grad_norm": 1.1392236948013306, "learning_rate": 2.940595971095618e-05, "loss": 0.112, "step": 6617 }, { "epoch": 0.11720171605413805, "grad_norm": 1.1334675550460815, "learning_rate": 2.9405719960285756e-05, "loss": 0.1309, "step": 6618 }, { "epoch": 0.11721942559116648, "grad_norm": 1.6627919673919678, "learning_rate": 2.940548016222197e-05, "loss": 0.1863, "step": 6619 }, { "epoch": 0.1172371351281949, "grad_norm": 0.9719165563583374, "learning_rate": 2.9405240316765613e-05, "loss": 0.1559, "step": 6620 }, { "epoch": 0.11725484466522335, "grad_norm": 0.6651251912117004, "learning_rate": 2.9405000423917473e-05, "loss": 0.1047, "step": 6621 }, { "epoch": 0.11727255420225177, "grad_norm": 1.7153332233428955, "learning_rate": 2.9404760483678338e-05, "loss": 0.1264, "step": 6622 }, { "epoch": 0.1172902637392802, "grad_norm": 1.0574582815170288, "learning_rate": 2.9404520496049e-05, "loss": 0.097, "step": 6623 }, { "epoch": 0.11730797327630862, "grad_norm": 1.7061326503753662, "learning_rate": 2.9404280461030242e-05, "loss": 0.1436, "step": 6624 }, { "epoch": 0.11732568281333705, "grad_norm": 2.0039916038513184, "learning_rate": 2.940404037862286e-05, "loss": 0.15, "step": 6625 }, { "epoch": 0.11734339235036548, "grad_norm": 0.6337059736251831, "learning_rate": 2.9403800248827642e-05, "loss": 0.1343, "step": 6626 }, { "epoch": 0.1173611018873939, "grad_norm": 1.66916024684906, "learning_rate": 2.9403560071645382e-05, "loss": 0.1043, "step": 6627 }, { "epoch": 0.11737881142442233, "grad_norm": 1.5806217193603516, "learning_rate": 2.9403319847076863e-05, "loss": 0.2009, "step": 6628 }, { "epoch": 0.11739652096145077, "grad_norm": 1.3255772590637207, "learning_rate": 2.9403079575122878e-05, "loss": 0.1025, "step": 6629 }, { "epoch": 0.1174142304984792, "grad_norm": 1.9751261472702026, "learning_rate": 2.940283925578422e-05, "loss": 0.1774, "step": 6630 }, { "epoch": 0.11743194003550762, "grad_norm": 1.5542417764663696, "learning_rate": 2.9402598889061675e-05, "loss": 0.0934, "step": 6631 }, { "epoch": 0.11744964957253605, "grad_norm": 1.4498125314712524, "learning_rate": 2.9402358474956036e-05, "loss": 0.123, "step": 6632 }, { "epoch": 0.11746735910956448, "grad_norm": 0.8553786277770996, "learning_rate": 2.94021180134681e-05, "loss": 0.11, "step": 6633 }, { "epoch": 0.1174850686465929, "grad_norm": 1.6310219764709473, "learning_rate": 2.9401877504598646e-05, "loss": 0.1679, "step": 6634 }, { "epoch": 0.11750277818362133, "grad_norm": 3.297787666320801, "learning_rate": 2.940163694834847e-05, "loss": 0.1359, "step": 6635 }, { "epoch": 0.11752048772064977, "grad_norm": 1.4381705522537231, "learning_rate": 2.9401396344718373e-05, "loss": 0.1522, "step": 6636 }, { "epoch": 0.1175381972576782, "grad_norm": 1.2010340690612793, "learning_rate": 2.940115569370913e-05, "loss": 0.1416, "step": 6637 }, { "epoch": 0.11755590679470662, "grad_norm": 1.3698467016220093, "learning_rate": 2.9400914995321552e-05, "loss": 0.135, "step": 6638 }, { "epoch": 0.11757361633173505, "grad_norm": 1.557074785232544, "learning_rate": 2.940067424955641e-05, "loss": 0.1521, "step": 6639 }, { "epoch": 0.11759132586876347, "grad_norm": 0.905785858631134, "learning_rate": 2.940043345641451e-05, "loss": 0.1005, "step": 6640 }, { "epoch": 0.1176090354057919, "grad_norm": 0.9834039807319641, "learning_rate": 2.9400192615896638e-05, "loss": 0.0967, "step": 6641 }, { "epoch": 0.11762674494282033, "grad_norm": 0.9746255874633789, "learning_rate": 2.9399951728003594e-05, "loss": 0.1345, "step": 6642 }, { "epoch": 0.11764445447984877, "grad_norm": 1.4817214012145996, "learning_rate": 2.9399710792736163e-05, "loss": 0.1696, "step": 6643 }, { "epoch": 0.1176621640168772, "grad_norm": 1.3952860832214355, "learning_rate": 2.9399469810095142e-05, "loss": 0.1342, "step": 6644 }, { "epoch": 0.11767987355390562, "grad_norm": 1.1538457870483398, "learning_rate": 2.939922878008132e-05, "loss": 0.0804, "step": 6645 }, { "epoch": 0.11769758309093405, "grad_norm": 0.7475193738937378, "learning_rate": 2.939898770269549e-05, "loss": 0.1367, "step": 6646 }, { "epoch": 0.11771529262796247, "grad_norm": 1.0978596210479736, "learning_rate": 2.9398746577938447e-05, "loss": 0.1111, "step": 6647 }, { "epoch": 0.1177330021649909, "grad_norm": 1.6151992082595825, "learning_rate": 2.9398505405810985e-05, "loss": 0.1544, "step": 6648 }, { "epoch": 0.11775071170201933, "grad_norm": 0.8167439103126526, "learning_rate": 2.93982641863139e-05, "loss": 0.1271, "step": 6649 }, { "epoch": 0.11776842123904775, "grad_norm": 1.2805187702178955, "learning_rate": 2.9398022919447978e-05, "loss": 0.1447, "step": 6650 }, { "epoch": 0.11778613077607619, "grad_norm": 1.0429728031158447, "learning_rate": 2.939778160521402e-05, "loss": 0.1001, "step": 6651 }, { "epoch": 0.11780384031310462, "grad_norm": 1.477304458618164, "learning_rate": 2.9397540243612812e-05, "loss": 0.1193, "step": 6652 }, { "epoch": 0.11782154985013304, "grad_norm": 1.1459132432937622, "learning_rate": 2.9397298834645153e-05, "loss": 0.0971, "step": 6653 }, { "epoch": 0.11783925938716147, "grad_norm": 0.7653710842132568, "learning_rate": 2.9397057378311842e-05, "loss": 0.1333, "step": 6654 }, { "epoch": 0.1178569689241899, "grad_norm": 1.0967597961425781, "learning_rate": 2.9396815874613662e-05, "loss": 0.1477, "step": 6655 }, { "epoch": 0.11787467846121832, "grad_norm": 0.9191855192184448, "learning_rate": 2.939657432355142e-05, "loss": 0.1687, "step": 6656 }, { "epoch": 0.11789238799824675, "grad_norm": 0.8735758066177368, "learning_rate": 2.93963327251259e-05, "loss": 0.1472, "step": 6657 }, { "epoch": 0.11791009753527519, "grad_norm": 1.6399928331375122, "learning_rate": 2.9396091079337907e-05, "loss": 0.1301, "step": 6658 }, { "epoch": 0.11792780707230362, "grad_norm": 1.0005416870117188, "learning_rate": 2.9395849386188224e-05, "loss": 0.1331, "step": 6659 }, { "epoch": 0.11794551660933204, "grad_norm": 1.0205882787704468, "learning_rate": 2.9395607645677657e-05, "loss": 0.1523, "step": 6660 }, { "epoch": 0.11796322614636047, "grad_norm": 0.9759616851806641, "learning_rate": 2.9395365857806997e-05, "loss": 0.1021, "step": 6661 }, { "epoch": 0.1179809356833889, "grad_norm": 0.8081018328666687, "learning_rate": 2.9395124022577035e-05, "loss": 0.1309, "step": 6662 }, { "epoch": 0.11799864522041732, "grad_norm": 1.0933377742767334, "learning_rate": 2.9394882139988576e-05, "loss": 0.1022, "step": 6663 }, { "epoch": 0.11801635475744575, "grad_norm": 1.4420623779296875, "learning_rate": 2.9394640210042406e-05, "loss": 0.1377, "step": 6664 }, { "epoch": 0.11803406429447418, "grad_norm": 1.2435636520385742, "learning_rate": 2.9394398232739333e-05, "loss": 0.1342, "step": 6665 }, { "epoch": 0.11805177383150262, "grad_norm": 1.0478863716125488, "learning_rate": 2.939415620808014e-05, "loss": 0.1393, "step": 6666 }, { "epoch": 0.11806948336853104, "grad_norm": 1.1699471473693848, "learning_rate": 2.939391413606563e-05, "loss": 0.1277, "step": 6667 }, { "epoch": 0.11808719290555947, "grad_norm": 1.4183688163757324, "learning_rate": 2.9393672016696598e-05, "loss": 0.1394, "step": 6668 }, { "epoch": 0.1181049024425879, "grad_norm": 1.380407691001892, "learning_rate": 2.939342984997384e-05, "loss": 0.1337, "step": 6669 }, { "epoch": 0.11812261197961632, "grad_norm": 1.0553081035614014, "learning_rate": 2.9393187635898153e-05, "loss": 0.1201, "step": 6670 }, { "epoch": 0.11814032151664475, "grad_norm": 1.6522603034973145, "learning_rate": 2.939294537447034e-05, "loss": 0.131, "step": 6671 }, { "epoch": 0.11815803105367317, "grad_norm": 1.3134127855300903, "learning_rate": 2.9392703065691185e-05, "loss": 0.144, "step": 6672 }, { "epoch": 0.11817574059070161, "grad_norm": 1.1767051219940186, "learning_rate": 2.9392460709561496e-05, "loss": 0.1189, "step": 6673 }, { "epoch": 0.11819345012773004, "grad_norm": 1.3359757661819458, "learning_rate": 2.939221830608207e-05, "loss": 0.1486, "step": 6674 }, { "epoch": 0.11821115966475847, "grad_norm": 1.0254648923873901, "learning_rate": 2.9391975855253698e-05, "loss": 0.1355, "step": 6675 }, { "epoch": 0.1182288692017869, "grad_norm": 0.8439029455184937, "learning_rate": 2.939173335707718e-05, "loss": 0.1159, "step": 6676 }, { "epoch": 0.11824657873881532, "grad_norm": 1.015629768371582, "learning_rate": 2.9391490811553316e-05, "loss": 0.1189, "step": 6677 }, { "epoch": 0.11826428827584375, "grad_norm": 1.2725149393081665, "learning_rate": 2.9391248218682906e-05, "loss": 0.1265, "step": 6678 }, { "epoch": 0.11828199781287217, "grad_norm": 1.1272186040878296, "learning_rate": 2.9391005578466742e-05, "loss": 0.1715, "step": 6679 }, { "epoch": 0.1182997073499006, "grad_norm": 1.010432481765747, "learning_rate": 2.9390762890905624e-05, "loss": 0.102, "step": 6680 }, { "epoch": 0.11831741688692904, "grad_norm": 1.4452033042907715, "learning_rate": 2.9390520156000352e-05, "loss": 0.1242, "step": 6681 }, { "epoch": 0.11833512642395747, "grad_norm": 1.5454367399215698, "learning_rate": 2.939027737375173e-05, "loss": 0.1289, "step": 6682 }, { "epoch": 0.11835283596098589, "grad_norm": 1.0471866130828857, "learning_rate": 2.9390034544160546e-05, "loss": 0.1528, "step": 6683 }, { "epoch": 0.11837054549801432, "grad_norm": 1.3358134031295776, "learning_rate": 2.9389791667227602e-05, "loss": 0.159, "step": 6684 }, { "epoch": 0.11838825503504274, "grad_norm": 1.1327078342437744, "learning_rate": 2.9389548742953704e-05, "loss": 0.122, "step": 6685 }, { "epoch": 0.11840596457207117, "grad_norm": 2.4583094120025635, "learning_rate": 2.9389305771339644e-05, "loss": 0.1729, "step": 6686 }, { "epoch": 0.1184236741090996, "grad_norm": 0.7835233807563782, "learning_rate": 2.9389062752386223e-05, "loss": 0.087, "step": 6687 }, { "epoch": 0.11844138364612804, "grad_norm": 0.6752554774284363, "learning_rate": 2.938881968609424e-05, "loss": 0.1461, "step": 6688 }, { "epoch": 0.11845909318315646, "grad_norm": 1.1500575542449951, "learning_rate": 2.9388576572464493e-05, "loss": 0.1418, "step": 6689 }, { "epoch": 0.11847680272018489, "grad_norm": 1.3157188892364502, "learning_rate": 2.938833341149779e-05, "loss": 0.1057, "step": 6690 }, { "epoch": 0.11849451225721332, "grad_norm": 1.0290178060531616, "learning_rate": 2.938809020319492e-05, "loss": 0.1507, "step": 6691 }, { "epoch": 0.11851222179424174, "grad_norm": 2.6546592712402344, "learning_rate": 2.9387846947556692e-05, "loss": 0.0936, "step": 6692 }, { "epoch": 0.11852993133127017, "grad_norm": 1.0136909484863281, "learning_rate": 2.9387603644583902e-05, "loss": 0.15, "step": 6693 }, { "epoch": 0.1185476408682986, "grad_norm": 1.5474591255187988, "learning_rate": 2.9387360294277355e-05, "loss": 0.1718, "step": 6694 }, { "epoch": 0.11856535040532702, "grad_norm": 0.9682610034942627, "learning_rate": 2.9387116896637842e-05, "loss": 0.1505, "step": 6695 }, { "epoch": 0.11858305994235546, "grad_norm": 2.9212851524353027, "learning_rate": 2.938687345166617e-05, "loss": 0.165, "step": 6696 }, { "epoch": 0.11860076947938389, "grad_norm": 1.355674386024475, "learning_rate": 2.9386629959363143e-05, "loss": 0.1032, "step": 6697 }, { "epoch": 0.11861847901641231, "grad_norm": 2.433748960494995, "learning_rate": 2.9386386419729557e-05, "loss": 0.1535, "step": 6698 }, { "epoch": 0.11863618855344074, "grad_norm": 0.8901601433753967, "learning_rate": 2.9386142832766217e-05, "loss": 0.132, "step": 6699 }, { "epoch": 0.11865389809046917, "grad_norm": 1.1231366395950317, "learning_rate": 2.9385899198473914e-05, "loss": 0.1283, "step": 6700 }, { "epoch": 0.1186716076274976, "grad_norm": 1.3131732940673828, "learning_rate": 2.9385655516853466e-05, "loss": 0.1109, "step": 6701 }, { "epoch": 0.11868931716452602, "grad_norm": 1.6517494916915894, "learning_rate": 2.938541178790566e-05, "loss": 0.1242, "step": 6702 }, { "epoch": 0.11870702670155446, "grad_norm": 1.1177794933319092, "learning_rate": 2.938516801163131e-05, "loss": 0.12, "step": 6703 }, { "epoch": 0.11872473623858289, "grad_norm": 0.9947576522827148, "learning_rate": 2.9384924188031214e-05, "loss": 0.1192, "step": 6704 }, { "epoch": 0.11874244577561131, "grad_norm": 1.2763102054595947, "learning_rate": 2.9384680317106166e-05, "loss": 0.1445, "step": 6705 }, { "epoch": 0.11876015531263974, "grad_norm": 1.056239366531372, "learning_rate": 2.938443639885698e-05, "loss": 0.1434, "step": 6706 }, { "epoch": 0.11877786484966817, "grad_norm": 1.0016263723373413, "learning_rate": 2.9384192433284448e-05, "loss": 0.1011, "step": 6707 }, { "epoch": 0.11879557438669659, "grad_norm": 1.6424369812011719, "learning_rate": 2.938394842038938e-05, "loss": 0.1283, "step": 6708 }, { "epoch": 0.11881328392372502, "grad_norm": 0.8643039464950562, "learning_rate": 2.9383704360172577e-05, "loss": 0.1336, "step": 6709 }, { "epoch": 0.11883099346075346, "grad_norm": 1.1928819417953491, "learning_rate": 2.9383460252634842e-05, "loss": 0.1096, "step": 6710 }, { "epoch": 0.11884870299778189, "grad_norm": 1.247012972831726, "learning_rate": 2.9383216097776974e-05, "loss": 0.1476, "step": 6711 }, { "epoch": 0.11886641253481031, "grad_norm": 1.3092340230941772, "learning_rate": 2.9382971895599784e-05, "loss": 0.1324, "step": 6712 }, { "epoch": 0.11888412207183874, "grad_norm": 1.6264026165008545, "learning_rate": 2.9382727646104074e-05, "loss": 0.1684, "step": 6713 }, { "epoch": 0.11890183160886716, "grad_norm": 1.299180507659912, "learning_rate": 2.9382483349290636e-05, "loss": 0.1302, "step": 6714 }, { "epoch": 0.11891954114589559, "grad_norm": 0.8288941383361816, "learning_rate": 2.938223900516029e-05, "loss": 0.1208, "step": 6715 }, { "epoch": 0.11893725068292402, "grad_norm": 0.9452264904975891, "learning_rate": 2.9381994613713827e-05, "loss": 0.186, "step": 6716 }, { "epoch": 0.11895496021995244, "grad_norm": 1.1552412509918213, "learning_rate": 2.938175017495206e-05, "loss": 0.1703, "step": 6717 }, { "epoch": 0.11897266975698088, "grad_norm": 1.1781600713729858, "learning_rate": 2.938150568887579e-05, "loss": 0.1046, "step": 6718 }, { "epoch": 0.11899037929400931, "grad_norm": 1.1043864488601685, "learning_rate": 2.938126115548582e-05, "loss": 0.1283, "step": 6719 }, { "epoch": 0.11900808883103774, "grad_norm": 1.4364961385726929, "learning_rate": 2.938101657478295e-05, "loss": 0.1203, "step": 6720 }, { "epoch": 0.11902579836806616, "grad_norm": 0.6621330380439758, "learning_rate": 2.9380771946767996e-05, "loss": 0.0955, "step": 6721 }, { "epoch": 0.11904350790509459, "grad_norm": 1.3777828216552734, "learning_rate": 2.938052727144176e-05, "loss": 0.1603, "step": 6722 }, { "epoch": 0.11906121744212302, "grad_norm": 0.8873653411865234, "learning_rate": 2.9380282548805035e-05, "loss": 0.1055, "step": 6723 }, { "epoch": 0.11907892697915144, "grad_norm": 1.341168999671936, "learning_rate": 2.938003777885864e-05, "loss": 0.1007, "step": 6724 }, { "epoch": 0.11909663651617988, "grad_norm": 1.173521637916565, "learning_rate": 2.9379792961603377e-05, "loss": 0.1462, "step": 6725 }, { "epoch": 0.11911434605320831, "grad_norm": 0.7509279251098633, "learning_rate": 2.9379548097040042e-05, "loss": 0.1371, "step": 6726 }, { "epoch": 0.11913205559023674, "grad_norm": 1.2684236764907837, "learning_rate": 2.9379303185169452e-05, "loss": 0.1472, "step": 6727 }, { "epoch": 0.11914976512726516, "grad_norm": 1.3102701902389526, "learning_rate": 2.9379058225992412e-05, "loss": 0.1079, "step": 6728 }, { "epoch": 0.11916747466429359, "grad_norm": 1.3077353239059448, "learning_rate": 2.9378813219509723e-05, "loss": 0.1277, "step": 6729 }, { "epoch": 0.11918518420132201, "grad_norm": 1.492777705192566, "learning_rate": 2.9378568165722192e-05, "loss": 0.1324, "step": 6730 }, { "epoch": 0.11920289373835044, "grad_norm": 2.526466131210327, "learning_rate": 2.937832306463063e-05, "loss": 0.1451, "step": 6731 }, { "epoch": 0.11922060327537887, "grad_norm": 1.002381682395935, "learning_rate": 2.9378077916235833e-05, "loss": 0.1278, "step": 6732 }, { "epoch": 0.11923831281240731, "grad_norm": 1.51311457157135, "learning_rate": 2.937783272053862e-05, "loss": 0.1356, "step": 6733 }, { "epoch": 0.11925602234943573, "grad_norm": 1.7195245027542114, "learning_rate": 2.9377587477539784e-05, "loss": 0.1736, "step": 6734 }, { "epoch": 0.11927373188646416, "grad_norm": 1.4072033166885376, "learning_rate": 2.9377342187240145e-05, "loss": 0.131, "step": 6735 }, { "epoch": 0.11929144142349259, "grad_norm": 1.167149305343628, "learning_rate": 2.9377096849640504e-05, "loss": 0.1185, "step": 6736 }, { "epoch": 0.11930915096052101, "grad_norm": 1.2750883102416992, "learning_rate": 2.937685146474167e-05, "loss": 0.1727, "step": 6737 }, { "epoch": 0.11932686049754944, "grad_norm": 1.2670080661773682, "learning_rate": 2.9376606032544445e-05, "loss": 0.1215, "step": 6738 }, { "epoch": 0.11934457003457787, "grad_norm": 1.4690877199172974, "learning_rate": 2.9376360553049644e-05, "loss": 0.1002, "step": 6739 }, { "epoch": 0.1193622795716063, "grad_norm": 0.8279964923858643, "learning_rate": 2.9376115026258068e-05, "loss": 0.0902, "step": 6740 }, { "epoch": 0.11937998910863473, "grad_norm": 1.2946921586990356, "learning_rate": 2.9375869452170527e-05, "loss": 0.1827, "step": 6741 }, { "epoch": 0.11939769864566316, "grad_norm": 1.5696098804473877, "learning_rate": 2.937562383078783e-05, "loss": 0.1466, "step": 6742 }, { "epoch": 0.11941540818269158, "grad_norm": 1.7429544925689697, "learning_rate": 2.937537816211079e-05, "loss": 0.1299, "step": 6743 }, { "epoch": 0.11943311771972001, "grad_norm": 1.2375816106796265, "learning_rate": 2.9375132446140202e-05, "loss": 0.1284, "step": 6744 }, { "epoch": 0.11945082725674844, "grad_norm": 1.4334620237350464, "learning_rate": 2.9374886682876885e-05, "loss": 0.1005, "step": 6745 }, { "epoch": 0.11946853679377686, "grad_norm": 1.1549830436706543, "learning_rate": 2.937464087232165e-05, "loss": 0.1322, "step": 6746 }, { "epoch": 0.11948624633080529, "grad_norm": 0.9917389750480652, "learning_rate": 2.9374395014475292e-05, "loss": 0.1169, "step": 6747 }, { "epoch": 0.11950395586783373, "grad_norm": 1.530152678489685, "learning_rate": 2.9374149109338633e-05, "loss": 0.1203, "step": 6748 }, { "epoch": 0.11952166540486216, "grad_norm": 1.229225516319275, "learning_rate": 2.9373903156912472e-05, "loss": 0.1288, "step": 6749 }, { "epoch": 0.11953937494189058, "grad_norm": 1.270233154296875, "learning_rate": 2.9373657157197628e-05, "loss": 0.0719, "step": 6750 }, { "epoch": 0.11955708447891901, "grad_norm": 2.961078405380249, "learning_rate": 2.937341111019491e-05, "loss": 0.1881, "step": 6751 }, { "epoch": 0.11957479401594744, "grad_norm": 0.8922566175460815, "learning_rate": 2.9373165015905117e-05, "loss": 0.1023, "step": 6752 }, { "epoch": 0.11959250355297586, "grad_norm": 1.1994433403015137, "learning_rate": 2.9372918874329066e-05, "loss": 0.1692, "step": 6753 }, { "epoch": 0.11961021309000429, "grad_norm": 0.7527751326560974, "learning_rate": 2.9372672685467566e-05, "loss": 0.1328, "step": 6754 }, { "epoch": 0.11962792262703273, "grad_norm": 1.759376883506775, "learning_rate": 2.9372426449321425e-05, "loss": 0.1318, "step": 6755 }, { "epoch": 0.11964563216406116, "grad_norm": 1.3233866691589355, "learning_rate": 2.937218016589146e-05, "loss": 0.0972, "step": 6756 }, { "epoch": 0.11966334170108958, "grad_norm": 0.8839173913002014, "learning_rate": 2.9371933835178473e-05, "loss": 0.1224, "step": 6757 }, { "epoch": 0.11968105123811801, "grad_norm": 1.2268610000610352, "learning_rate": 2.9371687457183275e-05, "loss": 0.136, "step": 6758 }, { "epoch": 0.11969876077514643, "grad_norm": 1.0807108879089355, "learning_rate": 2.9371441031906678e-05, "loss": 0.1438, "step": 6759 }, { "epoch": 0.11971647031217486, "grad_norm": 1.3663161993026733, "learning_rate": 2.9371194559349497e-05, "loss": 0.1661, "step": 6760 }, { "epoch": 0.11973417984920329, "grad_norm": 1.2342230081558228, "learning_rate": 2.937094803951254e-05, "loss": 0.1721, "step": 6761 }, { "epoch": 0.11975188938623171, "grad_norm": 1.0347903966903687, "learning_rate": 2.9370701472396617e-05, "loss": 0.1083, "step": 6762 }, { "epoch": 0.11976959892326015, "grad_norm": 1.5059946775436401, "learning_rate": 2.937045485800254e-05, "loss": 0.114, "step": 6763 }, { "epoch": 0.11978730846028858, "grad_norm": 1.116357445716858, "learning_rate": 2.9370208196331118e-05, "loss": 0.0995, "step": 6764 }, { "epoch": 0.119805017997317, "grad_norm": 1.4499270915985107, "learning_rate": 2.9369961487383166e-05, "loss": 0.142, "step": 6765 }, { "epoch": 0.11982272753434543, "grad_norm": 1.4466063976287842, "learning_rate": 2.936971473115949e-05, "loss": 0.1609, "step": 6766 }, { "epoch": 0.11984043707137386, "grad_norm": 0.8708063960075378, "learning_rate": 2.936946792766091e-05, "loss": 0.0821, "step": 6767 }, { "epoch": 0.11985814660840229, "grad_norm": 1.4041200876235962, "learning_rate": 2.9369221076888234e-05, "loss": 0.1433, "step": 6768 }, { "epoch": 0.11987585614543071, "grad_norm": 1.7223130464553833, "learning_rate": 2.936897417884227e-05, "loss": 0.1291, "step": 6769 }, { "epoch": 0.11989356568245915, "grad_norm": 1.246468424797058, "learning_rate": 2.9368727233523842e-05, "loss": 0.1191, "step": 6770 }, { "epoch": 0.11991127521948758, "grad_norm": 1.308103084564209, "learning_rate": 2.9368480240933752e-05, "loss": 0.1241, "step": 6771 }, { "epoch": 0.119928984756516, "grad_norm": 1.0702753067016602, "learning_rate": 2.936823320107281e-05, "loss": 0.1494, "step": 6772 }, { "epoch": 0.11994669429354443, "grad_norm": 1.045119047164917, "learning_rate": 2.936798611394184e-05, "loss": 0.1123, "step": 6773 }, { "epoch": 0.11996440383057286, "grad_norm": 1.5215246677398682, "learning_rate": 2.9367738979541644e-05, "loss": 0.172, "step": 6774 }, { "epoch": 0.11998211336760128, "grad_norm": 0.9322366714477539, "learning_rate": 2.9367491797873044e-05, "loss": 0.0935, "step": 6775 }, { "epoch": 0.11999982290462971, "grad_norm": 0.8235464096069336, "learning_rate": 2.9367244568936848e-05, "loss": 0.1192, "step": 6776 }, { "epoch": 0.12001753244165815, "grad_norm": 1.7338320016860962, "learning_rate": 2.936699729273387e-05, "loss": 0.1411, "step": 6777 }, { "epoch": 0.12003524197868658, "grad_norm": 1.126795768737793, "learning_rate": 2.936674996926492e-05, "loss": 0.1479, "step": 6778 }, { "epoch": 0.120052951515715, "grad_norm": 1.2043287754058838, "learning_rate": 2.9366502598530824e-05, "loss": 0.1343, "step": 6779 }, { "epoch": 0.12007066105274343, "grad_norm": 0.6331952214241028, "learning_rate": 2.936625518053238e-05, "loss": 0.096, "step": 6780 }, { "epoch": 0.12008837058977186, "grad_norm": 1.013946533203125, "learning_rate": 2.9366007715270412e-05, "loss": 0.1334, "step": 6781 }, { "epoch": 0.12010608012680028, "grad_norm": 1.4537922143936157, "learning_rate": 2.936576020274574e-05, "loss": 0.1743, "step": 6782 }, { "epoch": 0.12012378966382871, "grad_norm": 0.8753652572631836, "learning_rate": 2.936551264295916e-05, "loss": 0.1242, "step": 6783 }, { "epoch": 0.12014149920085714, "grad_norm": 1.0031262636184692, "learning_rate": 2.93652650359115e-05, "loss": 0.1424, "step": 6784 }, { "epoch": 0.12015920873788558, "grad_norm": 1.3372093439102173, "learning_rate": 2.9365017381603566e-05, "loss": 0.1479, "step": 6785 }, { "epoch": 0.120176918274914, "grad_norm": 0.9835739135742188, "learning_rate": 2.9364769680036182e-05, "loss": 0.131, "step": 6786 }, { "epoch": 0.12019462781194243, "grad_norm": 2.4058241844177246, "learning_rate": 2.936452193121016e-05, "loss": 0.1623, "step": 6787 }, { "epoch": 0.12021233734897085, "grad_norm": 1.3899099826812744, "learning_rate": 2.936427413512631e-05, "loss": 0.1417, "step": 6788 }, { "epoch": 0.12023004688599928, "grad_norm": 1.4259353876113892, "learning_rate": 2.9364026291785452e-05, "loss": 0.1462, "step": 6789 }, { "epoch": 0.12024775642302771, "grad_norm": 1.1527100801467896, "learning_rate": 2.9363778401188402e-05, "loss": 0.1581, "step": 6790 }, { "epoch": 0.12026546596005613, "grad_norm": 1.2656465768814087, "learning_rate": 2.9363530463335973e-05, "loss": 0.1543, "step": 6791 }, { "epoch": 0.12028317549708457, "grad_norm": 1.291478157043457, "learning_rate": 2.9363282478228986e-05, "loss": 0.1339, "step": 6792 }, { "epoch": 0.120300885034113, "grad_norm": 5.332714080810547, "learning_rate": 2.936303444586825e-05, "loss": 0.1434, "step": 6793 }, { "epoch": 0.12031859457114143, "grad_norm": 0.8678814768791199, "learning_rate": 2.9362786366254578e-05, "loss": 0.138, "step": 6794 }, { "epoch": 0.12033630410816985, "grad_norm": 0.7267546653747559, "learning_rate": 2.9362538239388797e-05, "loss": 0.1276, "step": 6795 }, { "epoch": 0.12035401364519828, "grad_norm": 1.1918628215789795, "learning_rate": 2.9362290065271713e-05, "loss": 0.1397, "step": 6796 }, { "epoch": 0.1203717231822267, "grad_norm": 1.3555504083633423, "learning_rate": 2.9362041843904153e-05, "loss": 0.1574, "step": 6797 }, { "epoch": 0.12038943271925513, "grad_norm": 1.317515254020691, "learning_rate": 2.936179357528692e-05, "loss": 0.1513, "step": 6798 }, { "epoch": 0.12040714225628356, "grad_norm": 1.1556041240692139, "learning_rate": 2.9361545259420848e-05, "loss": 0.1216, "step": 6799 }, { "epoch": 0.120424851793312, "grad_norm": 1.3960933685302734, "learning_rate": 2.936129689630674e-05, "loss": 0.1142, "step": 6800 }, { "epoch": 0.12044256133034043, "grad_norm": 0.9191359281539917, "learning_rate": 2.936104848594542e-05, "loss": 0.1646, "step": 6801 }, { "epoch": 0.12046027086736885, "grad_norm": 1.1529313325881958, "learning_rate": 2.9360800028337703e-05, "loss": 0.1207, "step": 6802 }, { "epoch": 0.12047798040439728, "grad_norm": 2.0810446739196777, "learning_rate": 2.9360551523484404e-05, "loss": 0.1398, "step": 6803 }, { "epoch": 0.1204956899414257, "grad_norm": 1.5003600120544434, "learning_rate": 2.9360302971386345e-05, "loss": 0.1405, "step": 6804 }, { "epoch": 0.12051339947845413, "grad_norm": 0.9299284815788269, "learning_rate": 2.936005437204434e-05, "loss": 0.1111, "step": 6805 }, { "epoch": 0.12053110901548256, "grad_norm": 1.3361693620681763, "learning_rate": 2.935980572545921e-05, "loss": 0.1042, "step": 6806 }, { "epoch": 0.120548818552511, "grad_norm": 0.8122140169143677, "learning_rate": 2.935955703163177e-05, "loss": 0.1294, "step": 6807 }, { "epoch": 0.12056652808953942, "grad_norm": 1.9576025009155273, "learning_rate": 2.935930829056284e-05, "loss": 0.1922, "step": 6808 }, { "epoch": 0.12058423762656785, "grad_norm": 1.4524097442626953, "learning_rate": 2.9359059502253237e-05, "loss": 0.1248, "step": 6809 }, { "epoch": 0.12060194716359628, "grad_norm": 0.7233214974403381, "learning_rate": 2.9358810666703785e-05, "loss": 0.1479, "step": 6810 }, { "epoch": 0.1206196567006247, "grad_norm": 1.2170727252960205, "learning_rate": 2.935856178391529e-05, "loss": 0.1316, "step": 6811 }, { "epoch": 0.12063736623765313, "grad_norm": 0.9867627024650574, "learning_rate": 2.9358312853888587e-05, "loss": 0.1217, "step": 6812 }, { "epoch": 0.12065507577468156, "grad_norm": 1.5657343864440918, "learning_rate": 2.9358063876624485e-05, "loss": 0.1297, "step": 6813 }, { "epoch": 0.12067278531170998, "grad_norm": 1.0827757120132446, "learning_rate": 2.9357814852123805e-05, "loss": 0.1698, "step": 6814 }, { "epoch": 0.12069049484873842, "grad_norm": 1.0580663681030273, "learning_rate": 2.9357565780387364e-05, "loss": 0.1324, "step": 6815 }, { "epoch": 0.12070820438576685, "grad_norm": 1.8315365314483643, "learning_rate": 2.935731666141599e-05, "loss": 0.1504, "step": 6816 }, { "epoch": 0.12072591392279527, "grad_norm": 1.1613264083862305, "learning_rate": 2.9357067495210488e-05, "loss": 0.1557, "step": 6817 }, { "epoch": 0.1207436234598237, "grad_norm": 1.5787594318389893, "learning_rate": 2.935681828177169e-05, "loss": 0.0924, "step": 6818 }, { "epoch": 0.12076133299685213, "grad_norm": 1.221279263496399, "learning_rate": 2.9356569021100412e-05, "loss": 0.1133, "step": 6819 }, { "epoch": 0.12077904253388055, "grad_norm": 1.2686384916305542, "learning_rate": 2.9356319713197473e-05, "loss": 0.14, "step": 6820 }, { "epoch": 0.12079675207090898, "grad_norm": 0.9534274339675903, "learning_rate": 2.9356070358063696e-05, "loss": 0.1506, "step": 6821 }, { "epoch": 0.12081446160793742, "grad_norm": 1.6561627388000488, "learning_rate": 2.9355820955699898e-05, "loss": 0.0974, "step": 6822 }, { "epoch": 0.12083217114496585, "grad_norm": 0.9976150989532471, "learning_rate": 2.9355571506106904e-05, "loss": 0.1107, "step": 6823 }, { "epoch": 0.12084988068199427, "grad_norm": 1.141736388206482, "learning_rate": 2.9355322009285528e-05, "loss": 0.1256, "step": 6824 }, { "epoch": 0.1208675902190227, "grad_norm": 1.311448097229004, "learning_rate": 2.9355072465236597e-05, "loss": 0.1288, "step": 6825 }, { "epoch": 0.12088529975605113, "grad_norm": 0.8256204128265381, "learning_rate": 2.935482287396093e-05, "loss": 0.113, "step": 6826 }, { "epoch": 0.12090300929307955, "grad_norm": 1.2667325735092163, "learning_rate": 2.9354573235459345e-05, "loss": 0.1138, "step": 6827 }, { "epoch": 0.12092071883010798, "grad_norm": 1.113171935081482, "learning_rate": 2.9354323549732664e-05, "loss": 0.1087, "step": 6828 }, { "epoch": 0.1209384283671364, "grad_norm": 1.5003941059112549, "learning_rate": 2.9354073816781715e-05, "loss": 0.1239, "step": 6829 }, { "epoch": 0.12095613790416485, "grad_norm": 0.7469476461410522, "learning_rate": 2.9353824036607316e-05, "loss": 0.0881, "step": 6830 }, { "epoch": 0.12097384744119327, "grad_norm": 1.4759923219680786, "learning_rate": 2.9353574209210286e-05, "loss": 0.122, "step": 6831 }, { "epoch": 0.1209915569782217, "grad_norm": 1.2661802768707275, "learning_rate": 2.9353324334591443e-05, "loss": 0.0946, "step": 6832 }, { "epoch": 0.12100926651525012, "grad_norm": 1.599982738494873, "learning_rate": 2.935307441275162e-05, "loss": 0.1249, "step": 6833 }, { "epoch": 0.12102697605227855, "grad_norm": 1.2161550521850586, "learning_rate": 2.9352824443691637e-05, "loss": 0.0932, "step": 6834 }, { "epoch": 0.12104468558930698, "grad_norm": 1.0335206985473633, "learning_rate": 2.935257442741231e-05, "loss": 0.1516, "step": 6835 }, { "epoch": 0.1210623951263354, "grad_norm": 1.0092170238494873, "learning_rate": 2.935232436391446e-05, "loss": 0.1276, "step": 6836 }, { "epoch": 0.12108010466336384, "grad_norm": 1.317287802696228, "learning_rate": 2.9352074253198923e-05, "loss": 0.1248, "step": 6837 }, { "epoch": 0.12109781420039227, "grad_norm": 1.0444308519363403, "learning_rate": 2.9351824095266505e-05, "loss": 0.1322, "step": 6838 }, { "epoch": 0.1211155237374207, "grad_norm": 1.0024763345718384, "learning_rate": 2.9351573890118042e-05, "loss": 0.0995, "step": 6839 }, { "epoch": 0.12113323327444912, "grad_norm": 1.1219825744628906, "learning_rate": 2.9351323637754356e-05, "loss": 0.1161, "step": 6840 }, { "epoch": 0.12115094281147755, "grad_norm": 1.1827712059020996, "learning_rate": 2.935107333817626e-05, "loss": 0.1333, "step": 6841 }, { "epoch": 0.12116865234850598, "grad_norm": 1.1859130859375, "learning_rate": 2.9350822991384587e-05, "loss": 0.162, "step": 6842 }, { "epoch": 0.1211863618855344, "grad_norm": 1.2032932043075562, "learning_rate": 2.935057259738016e-05, "loss": 0.1259, "step": 6843 }, { "epoch": 0.12120407142256284, "grad_norm": 1.2132148742675781, "learning_rate": 2.9350322156163798e-05, "loss": 0.1563, "step": 6844 }, { "epoch": 0.12122178095959127, "grad_norm": 0.792839527130127, "learning_rate": 2.935007166773633e-05, "loss": 0.1133, "step": 6845 }, { "epoch": 0.1212394904966197, "grad_norm": 1.4434856176376343, "learning_rate": 2.9349821132098575e-05, "loss": 0.1253, "step": 6846 }, { "epoch": 0.12125720003364812, "grad_norm": 1.0940903425216675, "learning_rate": 2.934957054925136e-05, "loss": 0.1302, "step": 6847 }, { "epoch": 0.12127490957067655, "grad_norm": 1.2146360874176025, "learning_rate": 2.9349319919195508e-05, "loss": 0.0841, "step": 6848 }, { "epoch": 0.12129261910770497, "grad_norm": 0.9570266008377075, "learning_rate": 2.9349069241931846e-05, "loss": 0.1353, "step": 6849 }, { "epoch": 0.1213103286447334, "grad_norm": 1.0955392122268677, "learning_rate": 2.93488185174612e-05, "loss": 0.148, "step": 6850 }, { "epoch": 0.12132803818176183, "grad_norm": 1.1886435747146606, "learning_rate": 2.934856774578439e-05, "loss": 0.126, "step": 6851 }, { "epoch": 0.12134574771879027, "grad_norm": 1.104993224143982, "learning_rate": 2.9348316926902244e-05, "loss": 0.087, "step": 6852 }, { "epoch": 0.1213634572558187, "grad_norm": 1.0767300128936768, "learning_rate": 2.9348066060815588e-05, "loss": 0.1149, "step": 6853 }, { "epoch": 0.12138116679284712, "grad_norm": 1.4854973554611206, "learning_rate": 2.9347815147525246e-05, "loss": 0.1129, "step": 6854 }, { "epoch": 0.12139887632987555, "grad_norm": 2.88043212890625, "learning_rate": 2.934756418703204e-05, "loss": 0.1159, "step": 6855 }, { "epoch": 0.12141658586690397, "grad_norm": 1.5745313167572021, "learning_rate": 2.93473131793368e-05, "loss": 0.1557, "step": 6856 }, { "epoch": 0.1214342954039324, "grad_norm": 1.3790225982666016, "learning_rate": 2.9347062124440353e-05, "loss": 0.1555, "step": 6857 }, { "epoch": 0.12145200494096083, "grad_norm": 1.1619563102722168, "learning_rate": 2.934681102234352e-05, "loss": 0.1448, "step": 6858 }, { "epoch": 0.12146971447798927, "grad_norm": 1.0507566928863525, "learning_rate": 2.934655987304713e-05, "loss": 0.1499, "step": 6859 }, { "epoch": 0.12148742401501769, "grad_norm": 1.1808247566223145, "learning_rate": 2.934630867655201e-05, "loss": 0.1246, "step": 6860 }, { "epoch": 0.12150513355204612, "grad_norm": 1.0250115394592285, "learning_rate": 2.934605743285899e-05, "loss": 0.1281, "step": 6861 }, { "epoch": 0.12152284308907454, "grad_norm": 0.8645066618919373, "learning_rate": 2.9345806141968884e-05, "loss": 0.1423, "step": 6862 }, { "epoch": 0.12154055262610297, "grad_norm": 1.2053786516189575, "learning_rate": 2.934555480388253e-05, "loss": 0.1135, "step": 6863 }, { "epoch": 0.1215582621631314, "grad_norm": 1.8782479763031006, "learning_rate": 2.9345303418600755e-05, "loss": 0.0961, "step": 6864 }, { "epoch": 0.12157597170015982, "grad_norm": 1.1522842645645142, "learning_rate": 2.9345051986124377e-05, "loss": 0.1235, "step": 6865 }, { "epoch": 0.12159368123718825, "grad_norm": 2.01057505607605, "learning_rate": 2.9344800506454234e-05, "loss": 0.1042, "step": 6866 }, { "epoch": 0.12161139077421669, "grad_norm": 1.0782215595245361, "learning_rate": 2.9344548979591145e-05, "loss": 0.1118, "step": 6867 }, { "epoch": 0.12162910031124512, "grad_norm": 0.8980597257614136, "learning_rate": 2.9344297405535944e-05, "loss": 0.1137, "step": 6868 }, { "epoch": 0.12164680984827354, "grad_norm": 1.1668148040771484, "learning_rate": 2.934404578428945e-05, "loss": 0.1655, "step": 6869 }, { "epoch": 0.12166451938530197, "grad_norm": 1.216943383216858, "learning_rate": 2.9343794115852503e-05, "loss": 0.131, "step": 6870 }, { "epoch": 0.1216822289223304, "grad_norm": 1.2917097806930542, "learning_rate": 2.934354240022592e-05, "loss": 0.159, "step": 6871 }, { "epoch": 0.12169993845935882, "grad_norm": 1.0238651037216187, "learning_rate": 2.934329063741053e-05, "loss": 0.1142, "step": 6872 }, { "epoch": 0.12171764799638725, "grad_norm": 1.909979224205017, "learning_rate": 2.934303882740717e-05, "loss": 0.0955, "step": 6873 }, { "epoch": 0.12173535753341569, "grad_norm": 0.8849340677261353, "learning_rate": 2.934278697021666e-05, "loss": 0.1144, "step": 6874 }, { "epoch": 0.12175306707044412, "grad_norm": 1.1386375427246094, "learning_rate": 2.9342535065839835e-05, "loss": 0.16, "step": 6875 }, { "epoch": 0.12177077660747254, "grad_norm": 0.7808980941772461, "learning_rate": 2.934228311427752e-05, "loss": 0.1138, "step": 6876 }, { "epoch": 0.12178848614450097, "grad_norm": 1.2314881086349487, "learning_rate": 2.934203111553054e-05, "loss": 0.1225, "step": 6877 }, { "epoch": 0.1218061956815294, "grad_norm": 1.3055269718170166, "learning_rate": 2.9341779069599732e-05, "loss": 0.1349, "step": 6878 }, { "epoch": 0.12182390521855782, "grad_norm": 1.017067551612854, "learning_rate": 2.934152697648592e-05, "loss": 0.1495, "step": 6879 }, { "epoch": 0.12184161475558625, "grad_norm": 1.1374022960662842, "learning_rate": 2.934127483618993e-05, "loss": 0.1265, "step": 6880 }, { "epoch": 0.12185932429261467, "grad_norm": 1.5710643529891968, "learning_rate": 2.9341022648712606e-05, "loss": 0.1161, "step": 6881 }, { "epoch": 0.12187703382964311, "grad_norm": 1.1743688583374023, "learning_rate": 2.9340770414054763e-05, "loss": 0.151, "step": 6882 }, { "epoch": 0.12189474336667154, "grad_norm": 2.2394421100616455, "learning_rate": 2.9340518132217236e-05, "loss": 0.1351, "step": 6883 }, { "epoch": 0.12191245290369997, "grad_norm": 1.0733722448349, "learning_rate": 2.9340265803200854e-05, "loss": 0.1189, "step": 6884 }, { "epoch": 0.12193016244072839, "grad_norm": 0.927535891532898, "learning_rate": 2.934001342700645e-05, "loss": 0.136, "step": 6885 }, { "epoch": 0.12194787197775682, "grad_norm": 1.0646393299102783, "learning_rate": 2.933976100363485e-05, "loss": 0.1254, "step": 6886 }, { "epoch": 0.12196558151478525, "grad_norm": 1.5076258182525635, "learning_rate": 2.9339508533086888e-05, "loss": 0.1179, "step": 6887 }, { "epoch": 0.12198329105181367, "grad_norm": 1.2350763082504272, "learning_rate": 2.933925601536339e-05, "loss": 0.1322, "step": 6888 }, { "epoch": 0.12200100058884211, "grad_norm": 1.304645299911499, "learning_rate": 2.9339003450465192e-05, "loss": 0.1203, "step": 6889 }, { "epoch": 0.12201871012587054, "grad_norm": 1.1516613960266113, "learning_rate": 2.9338750838393127e-05, "loss": 0.1281, "step": 6890 }, { "epoch": 0.12203641966289897, "grad_norm": 1.164397120475769, "learning_rate": 2.933849817914802e-05, "loss": 0.1484, "step": 6891 }, { "epoch": 0.12205412919992739, "grad_norm": 0.8810755014419556, "learning_rate": 2.9338245472730704e-05, "loss": 0.1143, "step": 6892 }, { "epoch": 0.12207183873695582, "grad_norm": 1.3135206699371338, "learning_rate": 2.9337992719142005e-05, "loss": 0.1482, "step": 6893 }, { "epoch": 0.12208954827398424, "grad_norm": 1.3836135864257812, "learning_rate": 2.9337739918382763e-05, "loss": 0.0969, "step": 6894 }, { "epoch": 0.12210725781101267, "grad_norm": 1.0662760734558105, "learning_rate": 2.933748707045381e-05, "loss": 0.1094, "step": 6895 }, { "epoch": 0.12212496734804111, "grad_norm": 0.8878738880157471, "learning_rate": 2.9337234175355973e-05, "loss": 0.1376, "step": 6896 }, { "epoch": 0.12214267688506954, "grad_norm": 1.1617813110351562, "learning_rate": 2.933698123309009e-05, "loss": 0.0931, "step": 6897 }, { "epoch": 0.12216038642209796, "grad_norm": 1.330322027206421, "learning_rate": 2.9336728243656986e-05, "loss": 0.1049, "step": 6898 }, { "epoch": 0.12217809595912639, "grad_norm": 1.1612666845321655, "learning_rate": 2.9336475207057493e-05, "loss": 0.1062, "step": 6899 }, { "epoch": 0.12219580549615482, "grad_norm": 1.2040669918060303, "learning_rate": 2.9336222123292448e-05, "loss": 0.1536, "step": 6900 }, { "epoch": 0.12221351503318324, "grad_norm": 0.9079346060752869, "learning_rate": 2.9335968992362684e-05, "loss": 0.1399, "step": 6901 }, { "epoch": 0.12223122457021167, "grad_norm": 0.8895795345306396, "learning_rate": 2.933571581426903e-05, "loss": 0.1288, "step": 6902 }, { "epoch": 0.1222489341072401, "grad_norm": 0.7640013098716736, "learning_rate": 2.9335462589012322e-05, "loss": 0.1158, "step": 6903 }, { "epoch": 0.12226664364426854, "grad_norm": 0.964898407459259, "learning_rate": 2.9335209316593392e-05, "loss": 0.1065, "step": 6904 }, { "epoch": 0.12228435318129696, "grad_norm": 0.8977356553077698, "learning_rate": 2.9334955997013073e-05, "loss": 0.0979, "step": 6905 }, { "epoch": 0.12230206271832539, "grad_norm": 0.8430138230323792, "learning_rate": 2.93347026302722e-05, "loss": 0.1311, "step": 6906 }, { "epoch": 0.12231977225535381, "grad_norm": 1.2093394994735718, "learning_rate": 2.93344492163716e-05, "loss": 0.1416, "step": 6907 }, { "epoch": 0.12233748179238224, "grad_norm": 0.9726892709732056, "learning_rate": 2.933419575531212e-05, "loss": 0.1155, "step": 6908 }, { "epoch": 0.12235519132941067, "grad_norm": 1.302964448928833, "learning_rate": 2.9333942247094583e-05, "loss": 0.1547, "step": 6909 }, { "epoch": 0.1223729008664391, "grad_norm": 1.1721488237380981, "learning_rate": 2.933368869171982e-05, "loss": 0.1485, "step": 6910 }, { "epoch": 0.12239061040346753, "grad_norm": 1.0145403146743774, "learning_rate": 2.9333435089188678e-05, "loss": 0.1437, "step": 6911 }, { "epoch": 0.12240831994049596, "grad_norm": 0.804824709892273, "learning_rate": 2.933318143950198e-05, "loss": 0.1477, "step": 6912 }, { "epoch": 0.12242602947752439, "grad_norm": 1.334155559539795, "learning_rate": 2.933292774266057e-05, "loss": 0.1308, "step": 6913 }, { "epoch": 0.12244373901455281, "grad_norm": 1.1948893070220947, "learning_rate": 2.9332673998665273e-05, "loss": 0.1379, "step": 6914 }, { "epoch": 0.12246144855158124, "grad_norm": 1.7858372926712036, "learning_rate": 2.933242020751693e-05, "loss": 0.1421, "step": 6915 }, { "epoch": 0.12247915808860967, "grad_norm": 0.885847806930542, "learning_rate": 2.933216636921637e-05, "loss": 0.0788, "step": 6916 }, { "epoch": 0.12249686762563809, "grad_norm": 1.2234584093093872, "learning_rate": 2.9331912483764435e-05, "loss": 0.122, "step": 6917 }, { "epoch": 0.12251457716266652, "grad_norm": 1.3771237134933472, "learning_rate": 2.933165855116196e-05, "loss": 0.1448, "step": 6918 }, { "epoch": 0.12253228669969496, "grad_norm": 1.3193693161010742, "learning_rate": 2.9331404571409774e-05, "loss": 0.1663, "step": 6919 }, { "epoch": 0.12254999623672339, "grad_norm": 1.2063753604888916, "learning_rate": 2.933115054450872e-05, "loss": 0.1331, "step": 6920 }, { "epoch": 0.12256770577375181, "grad_norm": 0.9278913736343384, "learning_rate": 2.9330896470459632e-05, "loss": 0.1186, "step": 6921 }, { "epoch": 0.12258541531078024, "grad_norm": 1.2555447816848755, "learning_rate": 2.9330642349263338e-05, "loss": 0.1404, "step": 6922 }, { "epoch": 0.12260312484780866, "grad_norm": 1.0614439249038696, "learning_rate": 2.933038818092068e-05, "loss": 0.1391, "step": 6923 }, { "epoch": 0.12262083438483709, "grad_norm": 0.9926783442497253, "learning_rate": 2.9330133965432503e-05, "loss": 0.0852, "step": 6924 }, { "epoch": 0.12263854392186552, "grad_norm": 1.2619966268539429, "learning_rate": 2.9329879702799626e-05, "loss": 0.1149, "step": 6925 }, { "epoch": 0.12265625345889396, "grad_norm": 0.9787118434906006, "learning_rate": 2.9329625393022897e-05, "loss": 0.1265, "step": 6926 }, { "epoch": 0.12267396299592238, "grad_norm": 0.903846263885498, "learning_rate": 2.932937103610315e-05, "loss": 0.1144, "step": 6927 }, { "epoch": 0.12269167253295081, "grad_norm": 1.3034319877624512, "learning_rate": 2.9329116632041224e-05, "loss": 0.153, "step": 6928 }, { "epoch": 0.12270938206997924, "grad_norm": 0.8659222722053528, "learning_rate": 2.9328862180837952e-05, "loss": 0.11, "step": 6929 }, { "epoch": 0.12272709160700766, "grad_norm": 1.024054765701294, "learning_rate": 2.932860768249417e-05, "loss": 0.1107, "step": 6930 }, { "epoch": 0.12274480114403609, "grad_norm": 0.992980420589447, "learning_rate": 2.932835313701072e-05, "loss": 0.115, "step": 6931 }, { "epoch": 0.12276251068106452, "grad_norm": 0.9879109263420105, "learning_rate": 2.9328098544388437e-05, "loss": 0.1401, "step": 6932 }, { "epoch": 0.12278022021809294, "grad_norm": 1.0391981601715088, "learning_rate": 2.932784390462816e-05, "loss": 0.1127, "step": 6933 }, { "epoch": 0.12279792975512138, "grad_norm": 1.0335887670516968, "learning_rate": 2.932758921773072e-05, "loss": 0.1469, "step": 6934 }, { "epoch": 0.12281563929214981, "grad_norm": 0.8072028756141663, "learning_rate": 2.9327334483696968e-05, "loss": 0.103, "step": 6935 }, { "epoch": 0.12283334882917823, "grad_norm": 1.2612513303756714, "learning_rate": 2.9327079702527732e-05, "loss": 0.1126, "step": 6936 }, { "epoch": 0.12285105836620666, "grad_norm": 0.9553078413009644, "learning_rate": 2.9326824874223854e-05, "loss": 0.1214, "step": 6937 }, { "epoch": 0.12286876790323509, "grad_norm": 0.9009528160095215, "learning_rate": 2.9326569998786174e-05, "loss": 0.1288, "step": 6938 }, { "epoch": 0.12288647744026351, "grad_norm": 1.0671309232711792, "learning_rate": 2.9326315076215518e-05, "loss": 0.1194, "step": 6939 }, { "epoch": 0.12290418697729194, "grad_norm": 1.5813870429992676, "learning_rate": 2.932606010651274e-05, "loss": 0.1433, "step": 6940 }, { "epoch": 0.12292189651432038, "grad_norm": 1.130739688873291, "learning_rate": 2.9325805089678678e-05, "loss": 0.1614, "step": 6941 }, { "epoch": 0.12293960605134881, "grad_norm": 1.5102976560592651, "learning_rate": 2.9325550025714164e-05, "loss": 0.1696, "step": 6942 }, { "epoch": 0.12295731558837723, "grad_norm": 0.6428033709526062, "learning_rate": 2.9325294914620033e-05, "loss": 0.0943, "step": 6943 }, { "epoch": 0.12297502512540566, "grad_norm": 0.9886006116867065, "learning_rate": 2.9325039756397137e-05, "loss": 0.1628, "step": 6944 }, { "epoch": 0.12299273466243409, "grad_norm": 1.15678870677948, "learning_rate": 2.9324784551046307e-05, "loss": 0.0808, "step": 6945 }, { "epoch": 0.12301044419946251, "grad_norm": 1.1717318296432495, "learning_rate": 2.9324529298568383e-05, "loss": 0.1384, "step": 6946 }, { "epoch": 0.12302815373649094, "grad_norm": 1.3374236822128296, "learning_rate": 2.932427399896421e-05, "loss": 0.1402, "step": 6947 }, { "epoch": 0.12304586327351937, "grad_norm": 1.7051876783370972, "learning_rate": 2.9324018652234622e-05, "loss": 0.0931, "step": 6948 }, { "epoch": 0.1230635728105478, "grad_norm": 2.729738235473633, "learning_rate": 2.932376325838046e-05, "loss": 0.1312, "step": 6949 }, { "epoch": 0.12308128234757623, "grad_norm": 1.4115186929702759, "learning_rate": 2.932350781740257e-05, "loss": 0.1135, "step": 6950 }, { "epoch": 0.12309899188460466, "grad_norm": 1.0625848770141602, "learning_rate": 2.9323252329301788e-05, "loss": 0.1583, "step": 6951 }, { "epoch": 0.12311670142163308, "grad_norm": 1.9246989488601685, "learning_rate": 2.932299679407895e-05, "loss": 0.0937, "step": 6952 }, { "epoch": 0.12313441095866151, "grad_norm": 0.714584469795227, "learning_rate": 2.9322741211734905e-05, "loss": 0.1627, "step": 6953 }, { "epoch": 0.12315212049568994, "grad_norm": 3.5822091102600098, "learning_rate": 2.9322485582270487e-05, "loss": 0.1348, "step": 6954 }, { "epoch": 0.12316983003271836, "grad_norm": 1.6704093217849731, "learning_rate": 2.9322229905686545e-05, "loss": 0.1344, "step": 6955 }, { "epoch": 0.1231875395697468, "grad_norm": 0.5807987451553345, "learning_rate": 2.932197418198391e-05, "loss": 0.1128, "step": 6956 }, { "epoch": 0.12320524910677523, "grad_norm": 1.2880724668502808, "learning_rate": 2.9321718411163426e-05, "loss": 0.1402, "step": 6957 }, { "epoch": 0.12322295864380366, "grad_norm": 1.940233826637268, "learning_rate": 2.9321462593225945e-05, "loss": 0.1407, "step": 6958 }, { "epoch": 0.12324066818083208, "grad_norm": 1.2180490493774414, "learning_rate": 2.9321206728172296e-05, "loss": 0.1315, "step": 6959 }, { "epoch": 0.12325837771786051, "grad_norm": 1.3189167976379395, "learning_rate": 2.9320950816003328e-05, "loss": 0.1274, "step": 6960 }, { "epoch": 0.12327608725488894, "grad_norm": 1.2388619184494019, "learning_rate": 2.932069485671988e-05, "loss": 0.16, "step": 6961 }, { "epoch": 0.12329379679191736, "grad_norm": 1.041087031364441, "learning_rate": 2.932043885032279e-05, "loss": 0.0855, "step": 6962 }, { "epoch": 0.1233115063289458, "grad_norm": 1.1324094533920288, "learning_rate": 2.9320182796812908e-05, "loss": 0.1418, "step": 6963 }, { "epoch": 0.12332921586597423, "grad_norm": 1.9325624704360962, "learning_rate": 2.9319926696191072e-05, "loss": 0.1723, "step": 6964 }, { "epoch": 0.12334692540300266, "grad_norm": 0.9046569466590881, "learning_rate": 2.9319670548458127e-05, "loss": 0.1194, "step": 6965 }, { "epoch": 0.12336463494003108, "grad_norm": 1.484878420829773, "learning_rate": 2.9319414353614914e-05, "loss": 0.1791, "step": 6966 }, { "epoch": 0.12338234447705951, "grad_norm": 1.2538139820098877, "learning_rate": 2.9319158111662274e-05, "loss": 0.1003, "step": 6967 }, { "epoch": 0.12340005401408793, "grad_norm": 1.60073721408844, "learning_rate": 2.9318901822601052e-05, "loss": 0.1106, "step": 6968 }, { "epoch": 0.12341776355111636, "grad_norm": 1.2090150117874146, "learning_rate": 2.9318645486432093e-05, "loss": 0.1514, "step": 6969 }, { "epoch": 0.12343547308814479, "grad_norm": 1.2721996307373047, "learning_rate": 2.9318389103156236e-05, "loss": 0.1204, "step": 6970 }, { "epoch": 0.12345318262517323, "grad_norm": 1.1283750534057617, "learning_rate": 2.9318132672774333e-05, "loss": 0.0954, "step": 6971 }, { "epoch": 0.12347089216220165, "grad_norm": 0.8828780651092529, "learning_rate": 2.9317876195287217e-05, "loss": 0.1184, "step": 6972 }, { "epoch": 0.12348860169923008, "grad_norm": 1.4146496057510376, "learning_rate": 2.931761967069574e-05, "loss": 0.136, "step": 6973 }, { "epoch": 0.1235063112362585, "grad_norm": 1.5286414623260498, "learning_rate": 2.931736309900074e-05, "loss": 0.1294, "step": 6974 }, { "epoch": 0.12352402077328693, "grad_norm": 0.8366476893424988, "learning_rate": 2.9317106480203063e-05, "loss": 0.1762, "step": 6975 }, { "epoch": 0.12354173031031536, "grad_norm": 2.0353293418884277, "learning_rate": 2.9316849814303554e-05, "loss": 0.1375, "step": 6976 }, { "epoch": 0.12355943984734379, "grad_norm": 0.8070055842399597, "learning_rate": 2.931659310130306e-05, "loss": 0.12, "step": 6977 }, { "epoch": 0.12357714938437223, "grad_norm": 1.284158706665039, "learning_rate": 2.931633634120242e-05, "loss": 0.1889, "step": 6978 }, { "epoch": 0.12359485892140065, "grad_norm": 1.0166904926300049, "learning_rate": 2.931607953400248e-05, "loss": 0.118, "step": 6979 }, { "epoch": 0.12361256845842908, "grad_norm": 1.5510503053665161, "learning_rate": 2.931582267970409e-05, "loss": 0.1543, "step": 6980 }, { "epoch": 0.1236302779954575, "grad_norm": 0.706517219543457, "learning_rate": 2.9315565778308087e-05, "loss": 0.1396, "step": 6981 }, { "epoch": 0.12364798753248593, "grad_norm": 1.0745751857757568, "learning_rate": 2.9315308829815324e-05, "loss": 0.1167, "step": 6982 }, { "epoch": 0.12366569706951436, "grad_norm": 1.135176420211792, "learning_rate": 2.9315051834226642e-05, "loss": 0.1354, "step": 6983 }, { "epoch": 0.12368340660654278, "grad_norm": 1.4065895080566406, "learning_rate": 2.9314794791542887e-05, "loss": 0.1557, "step": 6984 }, { "epoch": 0.12370111614357121, "grad_norm": 1.4243437051773071, "learning_rate": 2.9314537701764903e-05, "loss": 0.1727, "step": 6985 }, { "epoch": 0.12371882568059965, "grad_norm": 1.2328879833221436, "learning_rate": 2.931428056489354e-05, "loss": 0.1165, "step": 6986 }, { "epoch": 0.12373653521762808, "grad_norm": 1.0275449752807617, "learning_rate": 2.9314023380929643e-05, "loss": 0.1446, "step": 6987 }, { "epoch": 0.1237542447546565, "grad_norm": 0.870806872844696, "learning_rate": 2.931376614987405e-05, "loss": 0.1088, "step": 6988 }, { "epoch": 0.12377195429168493, "grad_norm": 1.2711069583892822, "learning_rate": 2.9313508871727625e-05, "loss": 0.1194, "step": 6989 }, { "epoch": 0.12378966382871336, "grad_norm": 1.4572381973266602, "learning_rate": 2.93132515464912e-05, "loss": 0.1526, "step": 6990 }, { "epoch": 0.12380737336574178, "grad_norm": 1.497262954711914, "learning_rate": 2.9312994174165623e-05, "loss": 0.1101, "step": 6991 }, { "epoch": 0.12382508290277021, "grad_norm": 1.0177562236785889, "learning_rate": 2.9312736754751743e-05, "loss": 0.1129, "step": 6992 }, { "epoch": 0.12384279243979865, "grad_norm": 1.1397758722305298, "learning_rate": 2.9312479288250405e-05, "loss": 0.1109, "step": 6993 }, { "epoch": 0.12386050197682708, "grad_norm": 1.3754074573516846, "learning_rate": 2.9312221774662457e-05, "loss": 0.1519, "step": 6994 }, { "epoch": 0.1238782115138555, "grad_norm": 1.7860605716705322, "learning_rate": 2.9311964213988754e-05, "loss": 0.1491, "step": 6995 }, { "epoch": 0.12389592105088393, "grad_norm": 0.9975094795227051, "learning_rate": 2.931170660623013e-05, "loss": 0.0991, "step": 6996 }, { "epoch": 0.12391363058791235, "grad_norm": 1.1310160160064697, "learning_rate": 2.9311448951387444e-05, "loss": 0.1134, "step": 6997 }, { "epoch": 0.12393134012494078, "grad_norm": 1.4209586381912231, "learning_rate": 2.931119124946153e-05, "loss": 0.1278, "step": 6998 }, { "epoch": 0.12394904966196921, "grad_norm": 1.148439645767212, "learning_rate": 2.931093350045325e-05, "loss": 0.1231, "step": 6999 }, { "epoch": 0.12396675919899763, "grad_norm": 1.1289703845977783, "learning_rate": 2.9310675704363446e-05, "loss": 0.1221, "step": 7000 }, { "epoch": 0.12398446873602607, "grad_norm": 0.7230361104011536, "learning_rate": 2.9310417861192966e-05, "loss": 0.1015, "step": 7001 }, { "epoch": 0.1240021782730545, "grad_norm": 1.4917675256729126, "learning_rate": 2.931015997094266e-05, "loss": 0.1242, "step": 7002 }, { "epoch": 0.12401988781008293, "grad_norm": 1.2441030740737915, "learning_rate": 2.9309902033613375e-05, "loss": 0.0825, "step": 7003 }, { "epoch": 0.12403759734711135, "grad_norm": 1.2049932479858398, "learning_rate": 2.930964404920596e-05, "loss": 0.1256, "step": 7004 }, { "epoch": 0.12405530688413978, "grad_norm": 1.6834279298782349, "learning_rate": 2.9309386017721257e-05, "loss": 0.1363, "step": 7005 }, { "epoch": 0.1240730164211682, "grad_norm": 1.0053136348724365, "learning_rate": 2.930912793916013e-05, "loss": 0.1486, "step": 7006 }, { "epoch": 0.12409072595819663, "grad_norm": 0.9885342121124268, "learning_rate": 2.930886981352341e-05, "loss": 0.153, "step": 7007 }, { "epoch": 0.12410843549522507, "grad_norm": 1.168309211730957, "learning_rate": 2.930861164081196e-05, "loss": 0.0986, "step": 7008 }, { "epoch": 0.1241261450322535, "grad_norm": 0.8417467474937439, "learning_rate": 2.9308353421026627e-05, "loss": 0.0923, "step": 7009 }, { "epoch": 0.12414385456928193, "grad_norm": 1.0685054063796997, "learning_rate": 2.930809515416826e-05, "loss": 0.1379, "step": 7010 }, { "epoch": 0.12416156410631035, "grad_norm": 1.536195158958435, "learning_rate": 2.9307836840237706e-05, "loss": 0.1384, "step": 7011 }, { "epoch": 0.12417927364333878, "grad_norm": 0.8798844814300537, "learning_rate": 2.9307578479235812e-05, "loss": 0.0864, "step": 7012 }, { "epoch": 0.1241969831803672, "grad_norm": 1.1826690435409546, "learning_rate": 2.9307320071163437e-05, "loss": 0.1247, "step": 7013 }, { "epoch": 0.12421469271739563, "grad_norm": 1.1422382593154907, "learning_rate": 2.9307061616021424e-05, "loss": 0.0988, "step": 7014 }, { "epoch": 0.12423240225442406, "grad_norm": 1.5222197771072388, "learning_rate": 2.9306803113810627e-05, "loss": 0.109, "step": 7015 }, { "epoch": 0.1242501117914525, "grad_norm": 1.5040770769119263, "learning_rate": 2.930654456453189e-05, "loss": 0.1367, "step": 7016 }, { "epoch": 0.12426782132848092, "grad_norm": 1.074936866760254, "learning_rate": 2.930628596818607e-05, "loss": 0.1437, "step": 7017 }, { "epoch": 0.12428553086550935, "grad_norm": 1.4463986158370972, "learning_rate": 2.930602732477402e-05, "loss": 0.1977, "step": 7018 }, { "epoch": 0.12430324040253778, "grad_norm": 1.2642723321914673, "learning_rate": 2.9305768634296583e-05, "loss": 0.1548, "step": 7019 }, { "epoch": 0.1243209499395662, "grad_norm": 0.9968997836112976, "learning_rate": 2.9305509896754615e-05, "loss": 0.0768, "step": 7020 }, { "epoch": 0.12433865947659463, "grad_norm": 1.4595024585723877, "learning_rate": 2.9305251112148966e-05, "loss": 0.1305, "step": 7021 }, { "epoch": 0.12435636901362306, "grad_norm": 0.7818520069122314, "learning_rate": 2.9304992280480486e-05, "loss": 0.0949, "step": 7022 }, { "epoch": 0.1243740785506515, "grad_norm": 1.0843193531036377, "learning_rate": 2.9304733401750035e-05, "loss": 0.149, "step": 7023 }, { "epoch": 0.12439178808767992, "grad_norm": 1.3068714141845703, "learning_rate": 2.9304474475958454e-05, "loss": 0.1249, "step": 7024 }, { "epoch": 0.12440949762470835, "grad_norm": 1.7550839185714722, "learning_rate": 2.9304215503106597e-05, "loss": 0.1499, "step": 7025 }, { "epoch": 0.12442720716173677, "grad_norm": 0.7442371249198914, "learning_rate": 2.9303956483195317e-05, "loss": 0.1523, "step": 7026 }, { "epoch": 0.1244449166987652, "grad_norm": 0.8705452084541321, "learning_rate": 2.9303697416225472e-05, "loss": 0.1088, "step": 7027 }, { "epoch": 0.12446262623579363, "grad_norm": 1.0019440650939941, "learning_rate": 2.930343830219791e-05, "loss": 0.1088, "step": 7028 }, { "epoch": 0.12448033577282205, "grad_norm": 1.7948685884475708, "learning_rate": 2.9303179141113473e-05, "loss": 0.1669, "step": 7029 }, { "epoch": 0.1244980453098505, "grad_norm": 1.1560715436935425, "learning_rate": 2.9302919932973028e-05, "loss": 0.1763, "step": 7030 }, { "epoch": 0.12451575484687892, "grad_norm": 0.9691892266273499, "learning_rate": 2.9302660677777428e-05, "loss": 0.0912, "step": 7031 }, { "epoch": 0.12453346438390735, "grad_norm": 0.8545749187469482, "learning_rate": 2.930240137552752e-05, "loss": 0.12, "step": 7032 }, { "epoch": 0.12455117392093577, "grad_norm": 1.1163954734802246, "learning_rate": 2.9302142026224154e-05, "loss": 0.1083, "step": 7033 }, { "epoch": 0.1245688834579642, "grad_norm": 1.2127777338027954, "learning_rate": 2.9301882629868186e-05, "loss": 0.1316, "step": 7034 }, { "epoch": 0.12458659299499263, "grad_norm": 1.701211929321289, "learning_rate": 2.9301623186460473e-05, "loss": 0.1346, "step": 7035 }, { "epoch": 0.12460430253202105, "grad_norm": 1.0418843030929565, "learning_rate": 2.930136369600187e-05, "loss": 0.1113, "step": 7036 }, { "epoch": 0.12462201206904948, "grad_norm": 1.2945497035980225, "learning_rate": 2.930110415849322e-05, "loss": 0.1378, "step": 7037 }, { "epoch": 0.12463972160607792, "grad_norm": 1.54978609085083, "learning_rate": 2.9300844573935393e-05, "loss": 0.1547, "step": 7038 }, { "epoch": 0.12465743114310635, "grad_norm": 0.917944610118866, "learning_rate": 2.9300584942329224e-05, "loss": 0.0802, "step": 7039 }, { "epoch": 0.12467514068013477, "grad_norm": 1.3596748113632202, "learning_rate": 2.9300325263675586e-05, "loss": 0.1406, "step": 7040 }, { "epoch": 0.1246928502171632, "grad_norm": 1.0349584817886353, "learning_rate": 2.930006553797532e-05, "loss": 0.1312, "step": 7041 }, { "epoch": 0.12471055975419162, "grad_norm": 1.581804871559143, "learning_rate": 2.929980576522928e-05, "loss": 0.1368, "step": 7042 }, { "epoch": 0.12472826929122005, "grad_norm": 1.1518036127090454, "learning_rate": 2.9299545945438327e-05, "loss": 0.1132, "step": 7043 }, { "epoch": 0.12474597882824848, "grad_norm": 1.1095237731933594, "learning_rate": 2.929928607860332e-05, "loss": 0.1047, "step": 7044 }, { "epoch": 0.12476368836527692, "grad_norm": 1.382123351097107, "learning_rate": 2.9299026164725106e-05, "loss": 0.124, "step": 7045 }, { "epoch": 0.12478139790230534, "grad_norm": 0.8104090094566345, "learning_rate": 2.929876620380454e-05, "loss": 0.0841, "step": 7046 }, { "epoch": 0.12479910743933377, "grad_norm": 0.8248339295387268, "learning_rate": 2.929850619584248e-05, "loss": 0.1302, "step": 7047 }, { "epoch": 0.1248168169763622, "grad_norm": 0.9882720112800598, "learning_rate": 2.9298246140839783e-05, "loss": 0.1077, "step": 7048 }, { "epoch": 0.12483452651339062, "grad_norm": 1.0906128883361816, "learning_rate": 2.9297986038797302e-05, "loss": 0.1634, "step": 7049 }, { "epoch": 0.12485223605041905, "grad_norm": 1.4875152111053467, "learning_rate": 2.929772588971589e-05, "loss": 0.1141, "step": 7050 }, { "epoch": 0.12486994558744748, "grad_norm": 1.43636953830719, "learning_rate": 2.929746569359641e-05, "loss": 0.1627, "step": 7051 }, { "epoch": 0.1248876551244759, "grad_norm": 1.4672259092330933, "learning_rate": 2.929720545043971e-05, "loss": 0.1019, "step": 7052 }, { "epoch": 0.12490536466150434, "grad_norm": 1.9853641986846924, "learning_rate": 2.9296945160246654e-05, "loss": 0.1113, "step": 7053 }, { "epoch": 0.12492307419853277, "grad_norm": 1.3951435089111328, "learning_rate": 2.9296684823018093e-05, "loss": 0.1318, "step": 7054 }, { "epoch": 0.1249407837355612, "grad_norm": 0.9079066514968872, "learning_rate": 2.929642443875488e-05, "loss": 0.1673, "step": 7055 }, { "epoch": 0.12495849327258962, "grad_norm": 0.9581667184829712, "learning_rate": 2.9296164007457882e-05, "loss": 0.1629, "step": 7056 }, { "epoch": 0.12497620280961805, "grad_norm": 1.0126286745071411, "learning_rate": 2.9295903529127947e-05, "loss": 0.135, "step": 7057 }, { "epoch": 0.12499391234664647, "grad_norm": 1.3557748794555664, "learning_rate": 2.929564300376594e-05, "loss": 0.1368, "step": 7058 }, { "epoch": 0.1250116218836749, "grad_norm": 1.2427959442138672, "learning_rate": 2.9295382431372713e-05, "loss": 0.1377, "step": 7059 }, { "epoch": 0.12502933142070333, "grad_norm": 0.9176596403121948, "learning_rate": 2.9295121811949118e-05, "loss": 0.0976, "step": 7060 }, { "epoch": 0.12504704095773175, "grad_norm": 1.5853525400161743, "learning_rate": 2.9294861145496025e-05, "loss": 0.1373, "step": 7061 }, { "epoch": 0.12506475049476018, "grad_norm": 0.9511769413948059, "learning_rate": 2.929460043201428e-05, "loss": 0.1333, "step": 7062 }, { "epoch": 0.1250824600317886, "grad_norm": 1.4324853420257568, "learning_rate": 2.9294339671504747e-05, "loss": 0.1167, "step": 7063 }, { "epoch": 0.12510016956881706, "grad_norm": 1.1152160167694092, "learning_rate": 2.929407886396828e-05, "loss": 0.1334, "step": 7064 }, { "epoch": 0.1251178791058455, "grad_norm": 2.3450686931610107, "learning_rate": 2.9293818009405745e-05, "loss": 0.1073, "step": 7065 }, { "epoch": 0.1251355886428739, "grad_norm": 1.3399412631988525, "learning_rate": 2.929355710781799e-05, "loss": 0.1255, "step": 7066 }, { "epoch": 0.12515329817990234, "grad_norm": 1.3642091751098633, "learning_rate": 2.929329615920588e-05, "loss": 0.1594, "step": 7067 }, { "epoch": 0.12517100771693077, "grad_norm": 1.1247751712799072, "learning_rate": 2.9293035163570266e-05, "loss": 0.1066, "step": 7068 }, { "epoch": 0.1251887172539592, "grad_norm": 1.8478914499282837, "learning_rate": 2.929277412091202e-05, "loss": 0.1379, "step": 7069 }, { "epoch": 0.12520642679098762, "grad_norm": 0.8660967946052551, "learning_rate": 2.929251303123199e-05, "loss": 0.106, "step": 7070 }, { "epoch": 0.12522413632801604, "grad_norm": 0.6794450879096985, "learning_rate": 2.929225189453103e-05, "loss": 0.1228, "step": 7071 }, { "epoch": 0.12524184586504447, "grad_norm": 1.2920864820480347, "learning_rate": 2.9291990710810018e-05, "loss": 0.1103, "step": 7072 }, { "epoch": 0.1252595554020729, "grad_norm": 1.0311639308929443, "learning_rate": 2.9291729480069796e-05, "loss": 0.1291, "step": 7073 }, { "epoch": 0.12527726493910132, "grad_norm": 1.0539915561676025, "learning_rate": 2.9291468202311232e-05, "loss": 0.0969, "step": 7074 }, { "epoch": 0.12529497447612975, "grad_norm": 1.0925045013427734, "learning_rate": 2.929120687753518e-05, "loss": 0.1444, "step": 7075 }, { "epoch": 0.12531268401315818, "grad_norm": 0.9432128071784973, "learning_rate": 2.9290945505742506e-05, "loss": 0.1329, "step": 7076 }, { "epoch": 0.1253303935501866, "grad_norm": 1.0372602939605713, "learning_rate": 2.9290684086934065e-05, "loss": 0.1145, "step": 7077 }, { "epoch": 0.12534810308721503, "grad_norm": 0.7957093715667725, "learning_rate": 2.929042262111072e-05, "loss": 0.1177, "step": 7078 }, { "epoch": 0.12536581262424348, "grad_norm": 1.1623125076293945, "learning_rate": 2.929016110827333e-05, "loss": 0.139, "step": 7079 }, { "epoch": 0.1253835221612719, "grad_norm": 1.3789604902267456, "learning_rate": 2.9289899548422757e-05, "loss": 0.1176, "step": 7080 }, { "epoch": 0.12540123169830034, "grad_norm": 1.0173239707946777, "learning_rate": 2.928963794155986e-05, "loss": 0.1368, "step": 7081 }, { "epoch": 0.12541894123532876, "grad_norm": 1.3186743259429932, "learning_rate": 2.9289376287685493e-05, "loss": 0.1684, "step": 7082 }, { "epoch": 0.1254366507723572, "grad_norm": 1.1043075323104858, "learning_rate": 2.9289114586800528e-05, "loss": 0.1426, "step": 7083 }, { "epoch": 0.12545436030938562, "grad_norm": 0.6889842748641968, "learning_rate": 2.928885283890582e-05, "loss": 0.1023, "step": 7084 }, { "epoch": 0.12547206984641404, "grad_norm": 2.115553617477417, "learning_rate": 2.928859104400223e-05, "loss": 0.1412, "step": 7085 }, { "epoch": 0.12548977938344247, "grad_norm": 1.1041074991226196, "learning_rate": 2.9288329202090628e-05, "loss": 0.148, "step": 7086 }, { "epoch": 0.1255074889204709, "grad_norm": 1.2727795839309692, "learning_rate": 2.9288067313171863e-05, "loss": 0.088, "step": 7087 }, { "epoch": 0.12552519845749932, "grad_norm": 1.163130283355713, "learning_rate": 2.92878053772468e-05, "loss": 0.1437, "step": 7088 }, { "epoch": 0.12554290799452775, "grad_norm": 0.8378774523735046, "learning_rate": 2.92875433943163e-05, "loss": 0.1425, "step": 7089 }, { "epoch": 0.12556061753155617, "grad_norm": 1.2873191833496094, "learning_rate": 2.9287281364381232e-05, "loss": 0.1149, "step": 7090 }, { "epoch": 0.1255783270685846, "grad_norm": 0.741492509841919, "learning_rate": 2.9287019287442455e-05, "loss": 0.1084, "step": 7091 }, { "epoch": 0.12559603660561303, "grad_norm": 1.2752043008804321, "learning_rate": 2.9286757163500826e-05, "loss": 0.1202, "step": 7092 }, { "epoch": 0.12561374614264145, "grad_norm": 1.4400159120559692, "learning_rate": 2.9286494992557214e-05, "loss": 0.1786, "step": 7093 }, { "epoch": 0.1256314556796699, "grad_norm": 0.920488715171814, "learning_rate": 2.928623277461248e-05, "loss": 0.1282, "step": 7094 }, { "epoch": 0.12564916521669833, "grad_norm": 1.395738124847412, "learning_rate": 2.9285970509667478e-05, "loss": 0.1326, "step": 7095 }, { "epoch": 0.12566687475372676, "grad_norm": 1.2623953819274902, "learning_rate": 2.9285708197723082e-05, "loss": 0.1385, "step": 7096 }, { "epoch": 0.12568458429075519, "grad_norm": 1.0252965688705444, "learning_rate": 2.9285445838780152e-05, "loss": 0.1286, "step": 7097 }, { "epoch": 0.1257022938277836, "grad_norm": 1.2010480165481567, "learning_rate": 2.9285183432839547e-05, "loss": 0.1269, "step": 7098 }, { "epoch": 0.12572000336481204, "grad_norm": 1.3973591327667236, "learning_rate": 2.9284920979902133e-05, "loss": 0.1337, "step": 7099 }, { "epoch": 0.12573771290184046, "grad_norm": 1.0008600950241089, "learning_rate": 2.9284658479968776e-05, "loss": 0.127, "step": 7100 }, { "epoch": 0.1257554224388689, "grad_norm": 1.5205104351043701, "learning_rate": 2.928439593304034e-05, "loss": 0.1304, "step": 7101 }, { "epoch": 0.12577313197589732, "grad_norm": 1.354599118232727, "learning_rate": 2.9284133339117684e-05, "loss": 0.1436, "step": 7102 }, { "epoch": 0.12579084151292574, "grad_norm": 1.3823516368865967, "learning_rate": 2.9283870698201673e-05, "loss": 0.11, "step": 7103 }, { "epoch": 0.12580855104995417, "grad_norm": 1.295455813407898, "learning_rate": 2.9283608010293172e-05, "loss": 0.127, "step": 7104 }, { "epoch": 0.1258262605869826, "grad_norm": 1.3580894470214844, "learning_rate": 2.9283345275393045e-05, "loss": 0.1372, "step": 7105 }, { "epoch": 0.12584397012401102, "grad_norm": 1.220017671585083, "learning_rate": 2.928308249350216e-05, "loss": 0.1351, "step": 7106 }, { "epoch": 0.12586167966103945, "grad_norm": 1.247136116027832, "learning_rate": 2.9282819664621375e-05, "loss": 0.1579, "step": 7107 }, { "epoch": 0.12587938919806788, "grad_norm": 1.209704041481018, "learning_rate": 2.9282556788751557e-05, "loss": 0.133, "step": 7108 }, { "epoch": 0.12589709873509633, "grad_norm": 1.1939977407455444, "learning_rate": 2.9282293865893576e-05, "loss": 0.1372, "step": 7109 }, { "epoch": 0.12591480827212476, "grad_norm": 1.2431151866912842, "learning_rate": 2.9282030896048288e-05, "loss": 0.1216, "step": 7110 }, { "epoch": 0.12593251780915318, "grad_norm": 0.7374699115753174, "learning_rate": 2.9281767879216565e-05, "loss": 0.114, "step": 7111 }, { "epoch": 0.1259502273461816, "grad_norm": 1.8340457677841187, "learning_rate": 2.9281504815399274e-05, "loss": 0.1074, "step": 7112 }, { "epoch": 0.12596793688321004, "grad_norm": 0.9342846870422363, "learning_rate": 2.928124170459727e-05, "loss": 0.1573, "step": 7113 }, { "epoch": 0.12598564642023846, "grad_norm": 1.306886076927185, "learning_rate": 2.9280978546811433e-05, "loss": 0.097, "step": 7114 }, { "epoch": 0.1260033559572669, "grad_norm": 1.8204057216644287, "learning_rate": 2.9280715342042614e-05, "loss": 0.1606, "step": 7115 }, { "epoch": 0.12602106549429531, "grad_norm": 1.393652081489563, "learning_rate": 2.928045209029169e-05, "loss": 0.1619, "step": 7116 }, { "epoch": 0.12603877503132374, "grad_norm": 2.530268907546997, "learning_rate": 2.9280188791559518e-05, "loss": 0.1226, "step": 7117 }, { "epoch": 0.12605648456835217, "grad_norm": 0.9022555351257324, "learning_rate": 2.9279925445846975e-05, "loss": 0.1176, "step": 7118 }, { "epoch": 0.1260741941053806, "grad_norm": 1.2837631702423096, "learning_rate": 2.927966205315492e-05, "loss": 0.1287, "step": 7119 }, { "epoch": 0.12609190364240902, "grad_norm": 1.3431141376495361, "learning_rate": 2.927939861348422e-05, "loss": 0.1463, "step": 7120 }, { "epoch": 0.12610961317943745, "grad_norm": 0.988177478313446, "learning_rate": 2.927913512683574e-05, "loss": 0.1292, "step": 7121 }, { "epoch": 0.12612732271646587, "grad_norm": 1.1209901571273804, "learning_rate": 2.9278871593210355e-05, "loss": 0.1126, "step": 7122 }, { "epoch": 0.1261450322534943, "grad_norm": 0.9178664684295654, "learning_rate": 2.927860801260893e-05, "loss": 0.0854, "step": 7123 }, { "epoch": 0.12616274179052275, "grad_norm": 1.166882038116455, "learning_rate": 2.927834438503232e-05, "loss": 0.105, "step": 7124 }, { "epoch": 0.12618045132755118, "grad_norm": 1.393604040145874, "learning_rate": 2.9278080710481406e-05, "loss": 0.1465, "step": 7125 }, { "epoch": 0.1261981608645796, "grad_norm": 1.557581901550293, "learning_rate": 2.9277816988957048e-05, "loss": 0.1648, "step": 7126 }, { "epoch": 0.12621587040160803, "grad_norm": 1.3140558004379272, "learning_rate": 2.9277553220460115e-05, "loss": 0.1217, "step": 7127 }, { "epoch": 0.12623357993863646, "grad_norm": 0.8925879597663879, "learning_rate": 2.927728940499148e-05, "loss": 0.117, "step": 7128 }, { "epoch": 0.12625128947566489, "grad_norm": 1.0798535346984863, "learning_rate": 2.9277025542552005e-05, "loss": 0.1141, "step": 7129 }, { "epoch": 0.1262689990126933, "grad_norm": 1.8113155364990234, "learning_rate": 2.927676163314256e-05, "loss": 0.1317, "step": 7130 }, { "epoch": 0.12628670854972174, "grad_norm": 0.9978067874908447, "learning_rate": 2.9276497676764012e-05, "loss": 0.1637, "step": 7131 }, { "epoch": 0.12630441808675016, "grad_norm": 1.5013781785964966, "learning_rate": 2.9276233673417234e-05, "loss": 0.1546, "step": 7132 }, { "epoch": 0.1263221276237786, "grad_norm": 1.1932419538497925, "learning_rate": 2.927596962310309e-05, "loss": 0.1411, "step": 7133 }, { "epoch": 0.12633983716080702, "grad_norm": 1.2661798000335693, "learning_rate": 2.927570552582245e-05, "loss": 0.1727, "step": 7134 }, { "epoch": 0.12635754669783544, "grad_norm": 1.665749192237854, "learning_rate": 2.9275441381576182e-05, "loss": 0.1166, "step": 7135 }, { "epoch": 0.12637525623486387, "grad_norm": 0.9087540507316589, "learning_rate": 2.9275177190365154e-05, "loss": 0.11, "step": 7136 }, { "epoch": 0.1263929657718923, "grad_norm": 0.9837380647659302, "learning_rate": 2.927491295219024e-05, "loss": 0.1141, "step": 7137 }, { "epoch": 0.12641067530892075, "grad_norm": 1.1069846153259277, "learning_rate": 2.9274648667052304e-05, "loss": 0.0797, "step": 7138 }, { "epoch": 0.12642838484594918, "grad_norm": 1.139750361442566, "learning_rate": 2.9274384334952218e-05, "loss": 0.1518, "step": 7139 }, { "epoch": 0.1264460943829776, "grad_norm": 1.7454171180725098, "learning_rate": 2.927411995589085e-05, "loss": 0.1259, "step": 7140 }, { "epoch": 0.12646380392000603, "grad_norm": 1.5919426679611206, "learning_rate": 2.9273855529869075e-05, "loss": 0.1119, "step": 7141 }, { "epoch": 0.12648151345703446, "grad_norm": 1.367798089981079, "learning_rate": 2.9273591056887755e-05, "loss": 0.19, "step": 7142 }, { "epoch": 0.12649922299406288, "grad_norm": 1.4759074449539185, "learning_rate": 2.927332653694777e-05, "loss": 0.1325, "step": 7143 }, { "epoch": 0.1265169325310913, "grad_norm": 1.4236195087432861, "learning_rate": 2.9273061970049978e-05, "loss": 0.1452, "step": 7144 }, { "epoch": 0.12653464206811973, "grad_norm": 1.707688808441162, "learning_rate": 2.9272797356195258e-05, "loss": 0.1266, "step": 7145 }, { "epoch": 0.12655235160514816, "grad_norm": 1.350588083267212, "learning_rate": 2.9272532695384477e-05, "loss": 0.1674, "step": 7146 }, { "epoch": 0.1265700611421766, "grad_norm": 1.2115172147750854, "learning_rate": 2.927226798761851e-05, "loss": 0.1059, "step": 7147 }, { "epoch": 0.12658777067920501, "grad_norm": 1.8325406312942505, "learning_rate": 2.9272003232898223e-05, "loss": 0.1195, "step": 7148 }, { "epoch": 0.12660548021623344, "grad_norm": 1.0116379261016846, "learning_rate": 2.9271738431224487e-05, "loss": 0.1306, "step": 7149 }, { "epoch": 0.12662318975326187, "grad_norm": 0.7471093535423279, "learning_rate": 2.9271473582598177e-05, "loss": 0.1466, "step": 7150 }, { "epoch": 0.1266408992902903, "grad_norm": 0.7049247622489929, "learning_rate": 2.9271208687020167e-05, "loss": 0.1035, "step": 7151 }, { "epoch": 0.12665860882731872, "grad_norm": 0.8972417712211609, "learning_rate": 2.9270943744491317e-05, "loss": 0.1451, "step": 7152 }, { "epoch": 0.12667631836434717, "grad_norm": 0.8057044744491577, "learning_rate": 2.9270678755012508e-05, "loss": 0.1204, "step": 7153 }, { "epoch": 0.1266940279013756, "grad_norm": 1.2653635740280151, "learning_rate": 2.9270413718584608e-05, "loss": 0.1684, "step": 7154 }, { "epoch": 0.12671173743840403, "grad_norm": 1.3235890865325928, "learning_rate": 2.9270148635208494e-05, "loss": 0.1078, "step": 7155 }, { "epoch": 0.12672944697543245, "grad_norm": 0.9047909379005432, "learning_rate": 2.926988350488503e-05, "loss": 0.1324, "step": 7156 }, { "epoch": 0.12674715651246088, "grad_norm": 1.3333022594451904, "learning_rate": 2.9269618327615093e-05, "loss": 0.1402, "step": 7157 }, { "epoch": 0.1267648660494893, "grad_norm": 1.1440247297286987, "learning_rate": 2.9269353103399558e-05, "loss": 0.1321, "step": 7158 }, { "epoch": 0.12678257558651773, "grad_norm": 0.8587185740470886, "learning_rate": 2.926908783223929e-05, "loss": 0.1487, "step": 7159 }, { "epoch": 0.12680028512354616, "grad_norm": 0.8087301850318909, "learning_rate": 2.926882251413517e-05, "loss": 0.1354, "step": 7160 }, { "epoch": 0.12681799466057458, "grad_norm": 1.0081932544708252, "learning_rate": 2.9268557149088067e-05, "loss": 0.1308, "step": 7161 }, { "epoch": 0.126835704197603, "grad_norm": 0.9774448871612549, "learning_rate": 2.9268291737098853e-05, "loss": 0.1175, "step": 7162 }, { "epoch": 0.12685341373463144, "grad_norm": 0.8420494198799133, "learning_rate": 2.9268026278168404e-05, "loss": 0.1534, "step": 7163 }, { "epoch": 0.12687112327165986, "grad_norm": 1.2469171285629272, "learning_rate": 2.926776077229759e-05, "loss": 0.142, "step": 7164 }, { "epoch": 0.1268888328086883, "grad_norm": 0.8007935285568237, "learning_rate": 2.9267495219487287e-05, "loss": 0.1136, "step": 7165 }, { "epoch": 0.12690654234571672, "grad_norm": 0.8007517457008362, "learning_rate": 2.9267229619738367e-05, "loss": 0.1275, "step": 7166 }, { "epoch": 0.12692425188274514, "grad_norm": 0.8019488453865051, "learning_rate": 2.9266963973051702e-05, "loss": 0.1047, "step": 7167 }, { "epoch": 0.1269419614197736, "grad_norm": 1.0742485523223877, "learning_rate": 2.9266698279428173e-05, "loss": 0.112, "step": 7168 }, { "epoch": 0.12695967095680202, "grad_norm": 1.0292418003082275, "learning_rate": 2.9266432538868652e-05, "loss": 0.132, "step": 7169 }, { "epoch": 0.12697738049383045, "grad_norm": 1.1101511716842651, "learning_rate": 2.9266166751374004e-05, "loss": 0.1412, "step": 7170 }, { "epoch": 0.12699509003085888, "grad_norm": 0.9029587507247925, "learning_rate": 2.9265900916945113e-05, "loss": 0.0988, "step": 7171 }, { "epoch": 0.1270127995678873, "grad_norm": 1.2347718477249146, "learning_rate": 2.9265635035582853e-05, "loss": 0.1209, "step": 7172 }, { "epoch": 0.12703050910491573, "grad_norm": 1.463080644607544, "learning_rate": 2.9265369107288095e-05, "loss": 0.1159, "step": 7173 }, { "epoch": 0.12704821864194416, "grad_norm": 1.1005587577819824, "learning_rate": 2.9265103132061717e-05, "loss": 0.1312, "step": 7174 }, { "epoch": 0.12706592817897258, "grad_norm": 1.197059154510498, "learning_rate": 2.926483710990459e-05, "loss": 0.1293, "step": 7175 }, { "epoch": 0.127083637716001, "grad_norm": 1.1520966291427612, "learning_rate": 2.9264571040817594e-05, "loss": 0.1215, "step": 7176 }, { "epoch": 0.12710134725302943, "grad_norm": 1.2679872512817383, "learning_rate": 2.9264304924801598e-05, "loss": 0.1441, "step": 7177 }, { "epoch": 0.12711905679005786, "grad_norm": 1.0734916925430298, "learning_rate": 2.9264038761857486e-05, "loss": 0.1088, "step": 7178 }, { "epoch": 0.1271367663270863, "grad_norm": 1.3495662212371826, "learning_rate": 2.9263772551986126e-05, "loss": 0.1271, "step": 7179 }, { "epoch": 0.1271544758641147, "grad_norm": 0.9616808891296387, "learning_rate": 2.92635062951884e-05, "loss": 0.1347, "step": 7180 }, { "epoch": 0.12717218540114314, "grad_norm": 1.0412278175354004, "learning_rate": 2.9263239991465178e-05, "loss": 0.1483, "step": 7181 }, { "epoch": 0.12718989493817157, "grad_norm": 1.5156655311584473, "learning_rate": 2.9262973640817342e-05, "loss": 0.1746, "step": 7182 }, { "epoch": 0.12720760447520002, "grad_norm": 1.2020516395568848, "learning_rate": 2.9262707243245764e-05, "loss": 0.1115, "step": 7183 }, { "epoch": 0.12722531401222845, "grad_norm": 1.2350212335586548, "learning_rate": 2.9262440798751323e-05, "loss": 0.1192, "step": 7184 }, { "epoch": 0.12724302354925687, "grad_norm": 1.4404338598251343, "learning_rate": 2.926217430733489e-05, "loss": 0.1581, "step": 7185 }, { "epoch": 0.1272607330862853, "grad_norm": 0.9943919777870178, "learning_rate": 2.926190776899735e-05, "loss": 0.1101, "step": 7186 }, { "epoch": 0.12727844262331373, "grad_norm": 1.7004021406173706, "learning_rate": 2.9261641183739576e-05, "loss": 0.1509, "step": 7187 }, { "epoch": 0.12729615216034215, "grad_norm": 1.0665595531463623, "learning_rate": 2.926137455156244e-05, "loss": 0.1138, "step": 7188 }, { "epoch": 0.12731386169737058, "grad_norm": 1.28787362575531, "learning_rate": 2.9261107872466832e-05, "loss": 0.1669, "step": 7189 }, { "epoch": 0.127331571234399, "grad_norm": 1.0777355432510376, "learning_rate": 2.9260841146453618e-05, "loss": 0.1009, "step": 7190 }, { "epoch": 0.12734928077142743, "grad_norm": 0.852550745010376, "learning_rate": 2.926057437352368e-05, "loss": 0.0951, "step": 7191 }, { "epoch": 0.12736699030845586, "grad_norm": 1.3369340896606445, "learning_rate": 2.926030755367789e-05, "loss": 0.1772, "step": 7192 }, { "epoch": 0.12738469984548428, "grad_norm": 0.8846178650856018, "learning_rate": 2.9260040686917134e-05, "loss": 0.1344, "step": 7193 }, { "epoch": 0.1274024093825127, "grad_norm": 1.176820158958435, "learning_rate": 2.925977377324229e-05, "loss": 0.0925, "step": 7194 }, { "epoch": 0.12742011891954114, "grad_norm": 1.488789677619934, "learning_rate": 2.9259506812654228e-05, "loss": 0.1393, "step": 7195 }, { "epoch": 0.12743782845656956, "grad_norm": 1.1470471620559692, "learning_rate": 2.9259239805153832e-05, "loss": 0.1535, "step": 7196 }, { "epoch": 0.127455537993598, "grad_norm": 1.3250761032104492, "learning_rate": 2.9258972750741976e-05, "loss": 0.1267, "step": 7197 }, { "epoch": 0.12747324753062644, "grad_norm": 1.1366773843765259, "learning_rate": 2.9258705649419546e-05, "loss": 0.1169, "step": 7198 }, { "epoch": 0.12749095706765487, "grad_norm": 1.2880902290344238, "learning_rate": 2.9258438501187414e-05, "loss": 0.1231, "step": 7199 }, { "epoch": 0.1275086666046833, "grad_norm": 1.056643009185791, "learning_rate": 2.9258171306046463e-05, "loss": 0.1621, "step": 7200 }, { "epoch": 0.12752637614171172, "grad_norm": 1.5085030794143677, "learning_rate": 2.9257904063997572e-05, "loss": 0.1634, "step": 7201 }, { "epoch": 0.12754408567874015, "grad_norm": 1.5632587671279907, "learning_rate": 2.9257636775041616e-05, "loss": 0.1463, "step": 7202 }, { "epoch": 0.12756179521576858, "grad_norm": 1.1717296838760376, "learning_rate": 2.925736943917948e-05, "loss": 0.1405, "step": 7203 }, { "epoch": 0.127579504752797, "grad_norm": 1.0004982948303223, "learning_rate": 2.925710205641204e-05, "loss": 0.1469, "step": 7204 }, { "epoch": 0.12759721428982543, "grad_norm": 0.7302985787391663, "learning_rate": 2.925683462674017e-05, "loss": 0.1182, "step": 7205 }, { "epoch": 0.12761492382685385, "grad_norm": 1.0213314294815063, "learning_rate": 2.9256567150164762e-05, "loss": 0.1221, "step": 7206 }, { "epoch": 0.12763263336388228, "grad_norm": 0.6655212044715881, "learning_rate": 2.925629962668669e-05, "loss": 0.1095, "step": 7207 }, { "epoch": 0.1276503429009107, "grad_norm": 1.0862149000167847, "learning_rate": 2.925603205630683e-05, "loss": 0.1152, "step": 7208 }, { "epoch": 0.12766805243793913, "grad_norm": 1.5599730014801025, "learning_rate": 2.9255764439026074e-05, "loss": 0.1664, "step": 7209 }, { "epoch": 0.12768576197496756, "grad_norm": 1.5959138870239258, "learning_rate": 2.925549677484529e-05, "loss": 0.1196, "step": 7210 }, { "epoch": 0.127703471511996, "grad_norm": 1.3421223163604736, "learning_rate": 2.9255229063765365e-05, "loss": 0.108, "step": 7211 }, { "epoch": 0.1277211810490244, "grad_norm": 0.8574727177619934, "learning_rate": 2.9254961305787175e-05, "loss": 0.159, "step": 7212 }, { "epoch": 0.12773889058605287, "grad_norm": 0.9705176949501038, "learning_rate": 2.9254693500911606e-05, "loss": 0.0934, "step": 7213 }, { "epoch": 0.1277566001230813, "grad_norm": 1.9304819107055664, "learning_rate": 2.925442564913954e-05, "loss": 0.1206, "step": 7214 }, { "epoch": 0.12777430966010972, "grad_norm": 1.2709892988204956, "learning_rate": 2.9254157750471856e-05, "loss": 0.1564, "step": 7215 }, { "epoch": 0.12779201919713815, "grad_norm": 1.280441403388977, "learning_rate": 2.925388980490943e-05, "loss": 0.1442, "step": 7216 }, { "epoch": 0.12780972873416657, "grad_norm": 1.0117632150650024, "learning_rate": 2.925362181245315e-05, "loss": 0.129, "step": 7217 }, { "epoch": 0.127827438271195, "grad_norm": 1.0043984651565552, "learning_rate": 2.9253353773103896e-05, "loss": 0.1484, "step": 7218 }, { "epoch": 0.12784514780822342, "grad_norm": 1.0192888975143433, "learning_rate": 2.925308568686255e-05, "loss": 0.1241, "step": 7219 }, { "epoch": 0.12786285734525185, "grad_norm": 1.4861950874328613, "learning_rate": 2.925281755372999e-05, "loss": 0.1484, "step": 7220 }, { "epoch": 0.12788056688228028, "grad_norm": 1.488814115524292, "learning_rate": 2.9252549373707107e-05, "loss": 0.1333, "step": 7221 }, { "epoch": 0.1278982764193087, "grad_norm": 1.0672167539596558, "learning_rate": 2.9252281146794776e-05, "loss": 0.1538, "step": 7222 }, { "epoch": 0.12791598595633713, "grad_norm": 0.9577605724334717, "learning_rate": 2.925201287299388e-05, "loss": 0.1221, "step": 7223 }, { "epoch": 0.12793369549336556, "grad_norm": 0.8887894153594971, "learning_rate": 2.9251744552305306e-05, "loss": 0.0765, "step": 7224 }, { "epoch": 0.12795140503039398, "grad_norm": 0.6183681488037109, "learning_rate": 2.9251476184729934e-05, "loss": 0.1319, "step": 7225 }, { "epoch": 0.1279691145674224, "grad_norm": 1.7801299095153809, "learning_rate": 2.9251207770268646e-05, "loss": 0.121, "step": 7226 }, { "epoch": 0.12798682410445084, "grad_norm": 1.099830985069275, "learning_rate": 2.9250939308922326e-05, "loss": 0.1011, "step": 7227 }, { "epoch": 0.1280045336414793, "grad_norm": 1.62611985206604, "learning_rate": 2.9250670800691855e-05, "loss": 0.1287, "step": 7228 }, { "epoch": 0.12802224317850772, "grad_norm": 1.2170809507369995, "learning_rate": 2.925040224557812e-05, "loss": 0.1526, "step": 7229 }, { "epoch": 0.12803995271553614, "grad_norm": 1.2770094871520996, "learning_rate": 2.9250133643582003e-05, "loss": 0.1304, "step": 7230 }, { "epoch": 0.12805766225256457, "grad_norm": 0.8484607934951782, "learning_rate": 2.9249864994704387e-05, "loss": 0.1559, "step": 7231 }, { "epoch": 0.128075371789593, "grad_norm": 1.6722320318222046, "learning_rate": 2.9249596298946158e-05, "loss": 0.1652, "step": 7232 }, { "epoch": 0.12809308132662142, "grad_norm": 1.034993052482605, "learning_rate": 2.92493275563082e-05, "loss": 0.0968, "step": 7233 }, { "epoch": 0.12811079086364985, "grad_norm": 0.8844557404518127, "learning_rate": 2.924905876679139e-05, "loss": 0.0961, "step": 7234 }, { "epoch": 0.12812850040067827, "grad_norm": 0.9748209118843079, "learning_rate": 2.9248789930396622e-05, "loss": 0.0794, "step": 7235 }, { "epoch": 0.1281462099377067, "grad_norm": 0.9793394804000854, "learning_rate": 2.9248521047124777e-05, "loss": 0.1011, "step": 7236 }, { "epoch": 0.12816391947473513, "grad_norm": 1.3643585443496704, "learning_rate": 2.9248252116976732e-05, "loss": 0.1182, "step": 7237 }, { "epoch": 0.12818162901176355, "grad_norm": 1.7200706005096436, "learning_rate": 2.9247983139953386e-05, "loss": 0.13, "step": 7238 }, { "epoch": 0.12819933854879198, "grad_norm": 1.129936933517456, "learning_rate": 2.9247714116055612e-05, "loss": 0.1319, "step": 7239 }, { "epoch": 0.1282170480858204, "grad_norm": 1.3824049234390259, "learning_rate": 2.9247445045284306e-05, "loss": 0.1239, "step": 7240 }, { "epoch": 0.12823475762284883, "grad_norm": 1.5630414485931396, "learning_rate": 2.9247175927640338e-05, "loss": 0.1173, "step": 7241 }, { "epoch": 0.12825246715987726, "grad_norm": 1.2138605117797852, "learning_rate": 2.924690676312461e-05, "loss": 0.1537, "step": 7242 }, { "epoch": 0.1282701766969057, "grad_norm": 1.169132947921753, "learning_rate": 2.9246637551737997e-05, "loss": 0.1558, "step": 7243 }, { "epoch": 0.12828788623393414, "grad_norm": 1.4914215803146362, "learning_rate": 2.924636829348139e-05, "loss": 0.1147, "step": 7244 }, { "epoch": 0.12830559577096257, "grad_norm": 1.056818962097168, "learning_rate": 2.924609898835567e-05, "loss": 0.1172, "step": 7245 }, { "epoch": 0.128323305307991, "grad_norm": 0.963188111782074, "learning_rate": 2.924582963636172e-05, "loss": 0.096, "step": 7246 }, { "epoch": 0.12834101484501942, "grad_norm": 1.2716857194900513, "learning_rate": 2.924556023750044e-05, "loss": 0.1344, "step": 7247 }, { "epoch": 0.12835872438204785, "grad_norm": 1.100285291671753, "learning_rate": 2.9245290791772703e-05, "loss": 0.1554, "step": 7248 }, { "epoch": 0.12837643391907627, "grad_norm": 0.8661668300628662, "learning_rate": 2.92450212991794e-05, "loss": 0.1349, "step": 7249 }, { "epoch": 0.1283941434561047, "grad_norm": 1.2515658140182495, "learning_rate": 2.924475175972142e-05, "loss": 0.2034, "step": 7250 }, { "epoch": 0.12841185299313312, "grad_norm": 1.3670940399169922, "learning_rate": 2.9244482173399643e-05, "loss": 0.182, "step": 7251 }, { "epoch": 0.12842956253016155, "grad_norm": 2.1303257942199707, "learning_rate": 2.9244212540214964e-05, "loss": 0.2102, "step": 7252 }, { "epoch": 0.12844727206718998, "grad_norm": 1.9286675453186035, "learning_rate": 2.9243942860168264e-05, "loss": 0.1482, "step": 7253 }, { "epoch": 0.1284649816042184, "grad_norm": 1.1039983034133911, "learning_rate": 2.9243673133260436e-05, "loss": 0.1138, "step": 7254 }, { "epoch": 0.12848269114124683, "grad_norm": 1.5171927213668823, "learning_rate": 2.9243403359492364e-05, "loss": 0.164, "step": 7255 }, { "epoch": 0.12850040067827526, "grad_norm": 1.6509912014007568, "learning_rate": 2.9243133538864937e-05, "loss": 0.1549, "step": 7256 }, { "epoch": 0.1285181102153037, "grad_norm": 1.2934446334838867, "learning_rate": 2.9242863671379035e-05, "loss": 0.1578, "step": 7257 }, { "epoch": 0.12853581975233214, "grad_norm": 5.549740314483643, "learning_rate": 2.9242593757035555e-05, "loss": 0.1147, "step": 7258 }, { "epoch": 0.12855352928936056, "grad_norm": 237.7852020263672, "learning_rate": 2.9242323795835385e-05, "loss": 1.2872, "step": 7259 }, { "epoch": 0.128571238826389, "grad_norm": 3.523237943649292, "learning_rate": 2.924205378777941e-05, "loss": 0.2415, "step": 7260 }, { "epoch": 0.12858894836341742, "grad_norm": 3.9147870540618896, "learning_rate": 2.9241783732868514e-05, "loss": 0.2888, "step": 7261 }, { "epoch": 0.12860665790044584, "grad_norm": 2.9971680641174316, "learning_rate": 2.9241513631103595e-05, "loss": 0.2002, "step": 7262 }, { "epoch": 0.12862436743747427, "grad_norm": 2.298784017562866, "learning_rate": 2.924124348248553e-05, "loss": 0.2069, "step": 7263 }, { "epoch": 0.1286420769745027, "grad_norm": 1.594488501548767, "learning_rate": 2.924097328701522e-05, "loss": 0.1742, "step": 7264 }, { "epoch": 0.12865978651153112, "grad_norm": 1.399267315864563, "learning_rate": 2.9240703044693545e-05, "loss": 0.1211, "step": 7265 }, { "epoch": 0.12867749604855955, "grad_norm": 1.842609167098999, "learning_rate": 2.92404327555214e-05, "loss": 0.1794, "step": 7266 }, { "epoch": 0.12869520558558797, "grad_norm": 1.8215744495391846, "learning_rate": 2.924016241949967e-05, "loss": 0.1269, "step": 7267 }, { "epoch": 0.1287129151226164, "grad_norm": 2.645399570465088, "learning_rate": 2.9239892036629248e-05, "loss": 0.1486, "step": 7268 }, { "epoch": 0.12873062465964483, "grad_norm": 1.1324093341827393, "learning_rate": 2.923962160691102e-05, "loss": 0.1556, "step": 7269 }, { "epoch": 0.12874833419667325, "grad_norm": 1.0209689140319824, "learning_rate": 2.9239351130345878e-05, "loss": 0.093, "step": 7270 }, { "epoch": 0.12876604373370168, "grad_norm": 1.1781400442123413, "learning_rate": 2.9239080606934707e-05, "loss": 0.141, "step": 7271 }, { "epoch": 0.12878375327073013, "grad_norm": 2.2188024520874023, "learning_rate": 2.9238810036678406e-05, "loss": 0.1643, "step": 7272 }, { "epoch": 0.12880146280775856, "grad_norm": 1.5253199338912964, "learning_rate": 2.9238539419577857e-05, "loss": 0.1459, "step": 7273 }, { "epoch": 0.128819172344787, "grad_norm": 1.4623414278030396, "learning_rate": 2.9238268755633955e-05, "loss": 0.1026, "step": 7274 }, { "epoch": 0.1288368818818154, "grad_norm": 1.1843578815460205, "learning_rate": 2.923799804484759e-05, "loss": 0.1089, "step": 7275 }, { "epoch": 0.12885459141884384, "grad_norm": 1.6176565885543823, "learning_rate": 2.923772728721965e-05, "loss": 0.0924, "step": 7276 }, { "epoch": 0.12887230095587227, "grad_norm": 1.223657250404358, "learning_rate": 2.9237456482751028e-05, "loss": 0.1878, "step": 7277 }, { "epoch": 0.1288900104929007, "grad_norm": 1.5643155574798584, "learning_rate": 2.9237185631442616e-05, "loss": 0.1685, "step": 7278 }, { "epoch": 0.12890772002992912, "grad_norm": 1.0109517574310303, "learning_rate": 2.9236914733295297e-05, "loss": 0.1519, "step": 7279 }, { "epoch": 0.12892542956695754, "grad_norm": 1.1173726320266724, "learning_rate": 2.9236643788309974e-05, "loss": 0.1387, "step": 7280 }, { "epoch": 0.12894313910398597, "grad_norm": 1.1871684789657593, "learning_rate": 2.923637279648753e-05, "loss": 0.1071, "step": 7281 }, { "epoch": 0.1289608486410144, "grad_norm": 1.0180379152297974, "learning_rate": 2.923610175782886e-05, "loss": 0.1518, "step": 7282 }, { "epoch": 0.12897855817804282, "grad_norm": 1.2831602096557617, "learning_rate": 2.9235830672334856e-05, "loss": 0.12, "step": 7283 }, { "epoch": 0.12899626771507125, "grad_norm": 1.2524702548980713, "learning_rate": 2.923555954000641e-05, "loss": 0.0997, "step": 7284 }, { "epoch": 0.12901397725209968, "grad_norm": 1.2363333702087402, "learning_rate": 2.9235288360844416e-05, "loss": 0.1186, "step": 7285 }, { "epoch": 0.1290316867891281, "grad_norm": 1.435369610786438, "learning_rate": 2.9235017134849756e-05, "loss": 0.1201, "step": 7286 }, { "epoch": 0.12904939632615656, "grad_norm": 1.5141026973724365, "learning_rate": 2.9234745862023333e-05, "loss": 0.1363, "step": 7287 }, { "epoch": 0.12906710586318498, "grad_norm": 1.0831408500671387, "learning_rate": 2.9234474542366033e-05, "loss": 0.1253, "step": 7288 }, { "epoch": 0.1290848154002134, "grad_norm": 0.9039520025253296, "learning_rate": 2.923420317587875e-05, "loss": 0.1499, "step": 7289 }, { "epoch": 0.12910252493724184, "grad_norm": 0.9466784000396729, "learning_rate": 2.9233931762562386e-05, "loss": 0.1123, "step": 7290 }, { "epoch": 0.12912023447427026, "grad_norm": 1.3437656164169312, "learning_rate": 2.923366030241782e-05, "loss": 0.1167, "step": 7291 }, { "epoch": 0.1291379440112987, "grad_norm": 0.9857016205787659, "learning_rate": 2.9233388795445958e-05, "loss": 0.1217, "step": 7292 }, { "epoch": 0.12915565354832712, "grad_norm": 3.225266695022583, "learning_rate": 2.923311724164768e-05, "loss": 0.1222, "step": 7293 }, { "epoch": 0.12917336308535554, "grad_norm": 1.3607099056243896, "learning_rate": 2.9232845641023888e-05, "loss": 0.1084, "step": 7294 }, { "epoch": 0.12919107262238397, "grad_norm": 1.260801911354065, "learning_rate": 2.9232573993575476e-05, "loss": 0.1347, "step": 7295 }, { "epoch": 0.1292087821594124, "grad_norm": 1.6249140501022339, "learning_rate": 2.9232302299303327e-05, "loss": 0.1528, "step": 7296 }, { "epoch": 0.12922649169644082, "grad_norm": 1.1732094287872314, "learning_rate": 2.923203055820835e-05, "loss": 0.1311, "step": 7297 }, { "epoch": 0.12924420123346925, "grad_norm": 1.3543635606765747, "learning_rate": 2.923175877029143e-05, "loss": 0.0845, "step": 7298 }, { "epoch": 0.12926191077049767, "grad_norm": 1.2246307134628296, "learning_rate": 2.9231486935553465e-05, "loss": 0.1379, "step": 7299 }, { "epoch": 0.1292796203075261, "grad_norm": 1.3563053607940674, "learning_rate": 2.9231215053995345e-05, "loss": 0.135, "step": 7300 }, { "epoch": 0.12929732984455453, "grad_norm": 3.909849166870117, "learning_rate": 2.9230943125617967e-05, "loss": 0.1808, "step": 7301 }, { "epoch": 0.12931503938158298, "grad_norm": 1.3545725345611572, "learning_rate": 2.9230671150422228e-05, "loss": 0.1404, "step": 7302 }, { "epoch": 0.1293327489186114, "grad_norm": 1.0682830810546875, "learning_rate": 2.9230399128409017e-05, "loss": 0.1149, "step": 7303 }, { "epoch": 0.12935045845563983, "grad_norm": 1.0447196960449219, "learning_rate": 2.923012705957923e-05, "loss": 0.1303, "step": 7304 }, { "epoch": 0.12936816799266826, "grad_norm": 0.8608657121658325, "learning_rate": 2.922985494393377e-05, "loss": 0.0998, "step": 7305 }, { "epoch": 0.12938587752969669, "grad_norm": 1.1123735904693604, "learning_rate": 2.9229582781473525e-05, "loss": 0.1028, "step": 7306 }, { "epoch": 0.1294035870667251, "grad_norm": 0.7293652296066284, "learning_rate": 2.922931057219939e-05, "loss": 0.1525, "step": 7307 }, { "epoch": 0.12942129660375354, "grad_norm": 1.1920255422592163, "learning_rate": 2.9229038316112264e-05, "loss": 0.1367, "step": 7308 }, { "epoch": 0.12943900614078196, "grad_norm": 1.4456970691680908, "learning_rate": 2.922876601321304e-05, "loss": 0.1296, "step": 7309 }, { "epoch": 0.1294567156778104, "grad_norm": 0.9024927020072937, "learning_rate": 2.9228493663502616e-05, "loss": 0.1064, "step": 7310 }, { "epoch": 0.12947442521483882, "grad_norm": 1.816089391708374, "learning_rate": 2.9228221266981888e-05, "loss": 0.1708, "step": 7311 }, { "epoch": 0.12949213475186724, "grad_norm": 1.1575549840927124, "learning_rate": 2.9227948823651748e-05, "loss": 0.2011, "step": 7312 }, { "epoch": 0.12950984428889567, "grad_norm": 1.4281675815582275, "learning_rate": 2.92276763335131e-05, "loss": 0.1307, "step": 7313 }, { "epoch": 0.1295275538259241, "grad_norm": 1.5330462455749512, "learning_rate": 2.9227403796566828e-05, "loss": 0.1629, "step": 7314 }, { "epoch": 0.12954526336295252, "grad_norm": 1.539652705192566, "learning_rate": 2.9227131212813848e-05, "loss": 0.1361, "step": 7315 }, { "epoch": 0.12956297289998095, "grad_norm": 0.952849805355072, "learning_rate": 2.9226858582255034e-05, "loss": 0.1118, "step": 7316 }, { "epoch": 0.1295806824370094, "grad_norm": 0.8855245113372803, "learning_rate": 2.9226585904891303e-05, "loss": 0.1202, "step": 7317 }, { "epoch": 0.12959839197403783, "grad_norm": 1.468575119972229, "learning_rate": 2.9226313180723536e-05, "loss": 0.1104, "step": 7318 }, { "epoch": 0.12961610151106626, "grad_norm": 1.8047380447387695, "learning_rate": 2.9226040409752643e-05, "loss": 0.1475, "step": 7319 }, { "epoch": 0.12963381104809468, "grad_norm": 1.4801971912384033, "learning_rate": 2.922576759197951e-05, "loss": 0.1752, "step": 7320 }, { "epoch": 0.1296515205851231, "grad_norm": 1.4113343954086304, "learning_rate": 2.9225494727405048e-05, "loss": 0.1539, "step": 7321 }, { "epoch": 0.12966923012215154, "grad_norm": 1.2623200416564941, "learning_rate": 2.9225221816030142e-05, "loss": 0.1388, "step": 7322 }, { "epoch": 0.12968693965917996, "grad_norm": 1.0435484647750854, "learning_rate": 2.9224948857855697e-05, "loss": 0.1124, "step": 7323 }, { "epoch": 0.1297046491962084, "grad_norm": 1.3045380115509033, "learning_rate": 2.9224675852882607e-05, "loss": 0.1177, "step": 7324 }, { "epoch": 0.12972235873323681, "grad_norm": 1.232992172241211, "learning_rate": 2.9224402801111777e-05, "loss": 0.0968, "step": 7325 }, { "epoch": 0.12974006827026524, "grad_norm": 1.3014824390411377, "learning_rate": 2.9224129702544098e-05, "loss": 0.1101, "step": 7326 }, { "epoch": 0.12975777780729367, "grad_norm": 1.6801015138626099, "learning_rate": 2.922385655718047e-05, "loss": 0.1478, "step": 7327 }, { "epoch": 0.1297754873443221, "grad_norm": 1.0743356943130493, "learning_rate": 2.9223583365021787e-05, "loss": 0.1667, "step": 7328 }, { "epoch": 0.12979319688135052, "grad_norm": 1.0632749795913696, "learning_rate": 2.9223310126068964e-05, "loss": 0.1337, "step": 7329 }, { "epoch": 0.12981090641837895, "grad_norm": 0.9796637296676636, "learning_rate": 2.9223036840322884e-05, "loss": 0.124, "step": 7330 }, { "epoch": 0.12982861595540737, "grad_norm": 0.978751003742218, "learning_rate": 2.9222763507784454e-05, "loss": 0.1779, "step": 7331 }, { "epoch": 0.12984632549243583, "grad_norm": 1.0839617252349854, "learning_rate": 2.9222490128454565e-05, "loss": 0.0793, "step": 7332 }, { "epoch": 0.12986403502946425, "grad_norm": 2.7692923545837402, "learning_rate": 2.9222216702334126e-05, "loss": 0.1319, "step": 7333 }, { "epoch": 0.12988174456649268, "grad_norm": 0.8369758129119873, "learning_rate": 2.9221943229424034e-05, "loss": 0.0967, "step": 7334 }, { "epoch": 0.1298994541035211, "grad_norm": 1.3047144412994385, "learning_rate": 2.9221669709725183e-05, "loss": 0.1399, "step": 7335 }, { "epoch": 0.12991716364054953, "grad_norm": 0.8889278769493103, "learning_rate": 2.922139614323848e-05, "loss": 0.0849, "step": 7336 }, { "epoch": 0.12993487317757796, "grad_norm": 1.1115224361419678, "learning_rate": 2.9221122529964823e-05, "loss": 0.1076, "step": 7337 }, { "epoch": 0.12995258271460638, "grad_norm": 1.1134147644042969, "learning_rate": 2.9220848869905108e-05, "loss": 0.1417, "step": 7338 }, { "epoch": 0.1299702922516348, "grad_norm": 1.2238436937332153, "learning_rate": 2.922057516306024e-05, "loss": 0.1411, "step": 7339 }, { "epoch": 0.12998800178866324, "grad_norm": 0.849916398525238, "learning_rate": 2.922030140943112e-05, "loss": 0.1252, "step": 7340 }, { "epoch": 0.13000571132569166, "grad_norm": 1.1133568286895752, "learning_rate": 2.9220027609018646e-05, "loss": 0.1119, "step": 7341 }, { "epoch": 0.1300234208627201, "grad_norm": 1.2607405185699463, "learning_rate": 2.921975376182372e-05, "loss": 0.1397, "step": 7342 }, { "epoch": 0.13004113039974852, "grad_norm": 0.9133511781692505, "learning_rate": 2.921947986784724e-05, "loss": 0.1151, "step": 7343 }, { "epoch": 0.13005883993677694, "grad_norm": 1.1304575204849243, "learning_rate": 2.9219205927090107e-05, "loss": 0.124, "step": 7344 }, { "epoch": 0.13007654947380537, "grad_norm": 0.8247532844543457, "learning_rate": 2.921893193955323e-05, "loss": 0.1347, "step": 7345 }, { "epoch": 0.1300942590108338, "grad_norm": 0.7568877339363098, "learning_rate": 2.92186579052375e-05, "loss": 0.1253, "step": 7346 }, { "epoch": 0.13011196854786225, "grad_norm": 1.1524460315704346, "learning_rate": 2.9218383824143828e-05, "loss": 0.126, "step": 7347 }, { "epoch": 0.13012967808489068, "grad_norm": 1.2274439334869385, "learning_rate": 2.921810969627311e-05, "loss": 0.1277, "step": 7348 }, { "epoch": 0.1301473876219191, "grad_norm": 1.022196650505066, "learning_rate": 2.9217835521626246e-05, "loss": 0.1404, "step": 7349 }, { "epoch": 0.13016509715894753, "grad_norm": 1.051263451576233, "learning_rate": 2.9217561300204146e-05, "loss": 0.1115, "step": 7350 }, { "epoch": 0.13018280669597596, "grad_norm": 0.941329836845398, "learning_rate": 2.9217287032007705e-05, "loss": 0.1384, "step": 7351 }, { "epoch": 0.13020051623300438, "grad_norm": 1.0945560932159424, "learning_rate": 2.921701271703783e-05, "loss": 0.0948, "step": 7352 }, { "epoch": 0.1302182257700328, "grad_norm": 1.2221812009811401, "learning_rate": 2.921673835529542e-05, "loss": 0.1071, "step": 7353 }, { "epoch": 0.13023593530706123, "grad_norm": 1.2734496593475342, "learning_rate": 2.9216463946781376e-05, "loss": 0.0924, "step": 7354 }, { "epoch": 0.13025364484408966, "grad_norm": 1.2461353540420532, "learning_rate": 2.9216189491496607e-05, "loss": 0.1303, "step": 7355 }, { "epoch": 0.1302713543811181, "grad_norm": 0.7430467009544373, "learning_rate": 2.9215914989442012e-05, "loss": 0.0873, "step": 7356 }, { "epoch": 0.1302890639181465, "grad_norm": 1.04097318649292, "learning_rate": 2.921564044061849e-05, "loss": 0.1456, "step": 7357 }, { "epoch": 0.13030677345517494, "grad_norm": 0.9296790957450867, "learning_rate": 2.9215365845026952e-05, "loss": 0.1266, "step": 7358 }, { "epoch": 0.13032448299220337, "grad_norm": 0.978237509727478, "learning_rate": 2.9215091202668302e-05, "loss": 0.1666, "step": 7359 }, { "epoch": 0.1303421925292318, "grad_norm": 0.9618648886680603, "learning_rate": 2.9214816513543435e-05, "loss": 0.1456, "step": 7360 }, { "epoch": 0.13035990206626022, "grad_norm": 1.2411619424819946, "learning_rate": 2.921454177765326e-05, "loss": 0.2122, "step": 7361 }, { "epoch": 0.13037761160328867, "grad_norm": 0.9509791731834412, "learning_rate": 2.9214266994998683e-05, "loss": 0.1385, "step": 7362 }, { "epoch": 0.1303953211403171, "grad_norm": 0.9269832968711853, "learning_rate": 2.92139921655806e-05, "loss": 0.1082, "step": 7363 }, { "epoch": 0.13041303067734553, "grad_norm": 0.8406513929367065, "learning_rate": 2.9213717289399924e-05, "loss": 0.1015, "step": 7364 }, { "epoch": 0.13043074021437395, "grad_norm": 0.860686182975769, "learning_rate": 2.9213442366457558e-05, "loss": 0.115, "step": 7365 }, { "epoch": 0.13044844975140238, "grad_norm": 1.3741564750671387, "learning_rate": 2.92131673967544e-05, "loss": 0.1228, "step": 7366 }, { "epoch": 0.1304661592884308, "grad_norm": 1.0731375217437744, "learning_rate": 2.9212892380291363e-05, "loss": 0.1164, "step": 7367 }, { "epoch": 0.13048386882545923, "grad_norm": 1.1182861328125, "learning_rate": 2.9212617317069344e-05, "loss": 0.1385, "step": 7368 }, { "epoch": 0.13050157836248766, "grad_norm": 1.4885010719299316, "learning_rate": 2.9212342207089252e-05, "loss": 0.1179, "step": 7369 }, { "epoch": 0.13051928789951608, "grad_norm": 1.6093087196350098, "learning_rate": 2.9212067050351993e-05, "loss": 0.1794, "step": 7370 }, { "epoch": 0.1305369974365445, "grad_norm": 0.9010266065597534, "learning_rate": 2.921179184685847e-05, "loss": 0.1368, "step": 7371 }, { "epoch": 0.13055470697357294, "grad_norm": 1.1592285633087158, "learning_rate": 2.921151659660959e-05, "loss": 0.1334, "step": 7372 }, { "epoch": 0.13057241651060136, "grad_norm": 0.9624170660972595, "learning_rate": 2.9211241299606258e-05, "loss": 0.1268, "step": 7373 }, { "epoch": 0.1305901260476298, "grad_norm": 0.9487460255622864, "learning_rate": 2.9210965955849376e-05, "loss": 0.1355, "step": 7374 }, { "epoch": 0.13060783558465822, "grad_norm": 1.3773260116577148, "learning_rate": 2.921069056533986e-05, "loss": 0.1431, "step": 7375 }, { "epoch": 0.13062554512168664, "grad_norm": 1.1693055629730225, "learning_rate": 2.92104151280786e-05, "loss": 0.1754, "step": 7376 }, { "epoch": 0.1306432546587151, "grad_norm": 1.406050205230713, "learning_rate": 2.921013964406652e-05, "loss": 0.1261, "step": 7377 }, { "epoch": 0.13066096419574352, "grad_norm": 1.0048257112503052, "learning_rate": 2.9209864113304512e-05, "loss": 0.0909, "step": 7378 }, { "epoch": 0.13067867373277195, "grad_norm": 0.7712417244911194, "learning_rate": 2.9209588535793496e-05, "loss": 0.0767, "step": 7379 }, { "epoch": 0.13069638326980038, "grad_norm": 1.2614880800247192, "learning_rate": 2.9209312911534365e-05, "loss": 0.104, "step": 7380 }, { "epoch": 0.1307140928068288, "grad_norm": 1.1456722021102905, "learning_rate": 2.9209037240528028e-05, "loss": 0.1434, "step": 7381 }, { "epoch": 0.13073180234385723, "grad_norm": 1.2468653917312622, "learning_rate": 2.9208761522775403e-05, "loss": 0.1077, "step": 7382 }, { "epoch": 0.13074951188088565, "grad_norm": 1.6321500539779663, "learning_rate": 2.9208485758277388e-05, "loss": 0.1221, "step": 7383 }, { "epoch": 0.13076722141791408, "grad_norm": 1.1841555833816528, "learning_rate": 2.920820994703489e-05, "loss": 0.1373, "step": 7384 }, { "epoch": 0.1307849309549425, "grad_norm": 0.9897195100784302, "learning_rate": 2.920793408904882e-05, "loss": 0.0997, "step": 7385 }, { "epoch": 0.13080264049197093, "grad_norm": 0.9070276618003845, "learning_rate": 2.9207658184320085e-05, "loss": 0.1128, "step": 7386 }, { "epoch": 0.13082035002899936, "grad_norm": 1.43431556224823, "learning_rate": 2.9207382232849586e-05, "loss": 0.1671, "step": 7387 }, { "epoch": 0.1308380595660278, "grad_norm": 0.8480346202850342, "learning_rate": 2.920710623463824e-05, "loss": 0.1134, "step": 7388 }, { "epoch": 0.1308557691030562, "grad_norm": 1.0632495880126953, "learning_rate": 2.9206830189686955e-05, "loss": 0.1044, "step": 7389 }, { "epoch": 0.13087347864008464, "grad_norm": 1.0517902374267578, "learning_rate": 2.9206554097996633e-05, "loss": 0.1377, "step": 7390 }, { "epoch": 0.1308911881771131, "grad_norm": 1.0029810667037964, "learning_rate": 2.9206277959568185e-05, "loss": 0.1443, "step": 7391 }, { "epoch": 0.13090889771414152, "grad_norm": 0.7947275042533875, "learning_rate": 2.9206001774402515e-05, "loss": 0.1021, "step": 7392 }, { "epoch": 0.13092660725116995, "grad_norm": 1.2097264528274536, "learning_rate": 2.9205725542500543e-05, "loss": 0.1155, "step": 7393 }, { "epoch": 0.13094431678819837, "grad_norm": 0.8958892226219177, "learning_rate": 2.9205449263863168e-05, "loss": 0.1144, "step": 7394 }, { "epoch": 0.1309620263252268, "grad_norm": 0.9818747639656067, "learning_rate": 2.92051729384913e-05, "loss": 0.1129, "step": 7395 }, { "epoch": 0.13097973586225523, "grad_norm": 3.713324785232544, "learning_rate": 2.9204896566385847e-05, "loss": 0.0953, "step": 7396 }, { "epoch": 0.13099744539928365, "grad_norm": 1.0013434886932373, "learning_rate": 2.9204620147547728e-05, "loss": 0.1409, "step": 7397 }, { "epoch": 0.13101515493631208, "grad_norm": 1.1074352264404297, "learning_rate": 2.9204343681977846e-05, "loss": 0.122, "step": 7398 }, { "epoch": 0.1310328644733405, "grad_norm": 1.2694432735443115, "learning_rate": 2.92040671696771e-05, "loss": 0.144, "step": 7399 }, { "epoch": 0.13105057401036893, "grad_norm": 1.115519404411316, "learning_rate": 2.920379061064642e-05, "loss": 0.1133, "step": 7400 }, { "epoch": 0.13106828354739736, "grad_norm": 1.0933231115341187, "learning_rate": 2.9203514004886703e-05, "loss": 0.1071, "step": 7401 }, { "epoch": 0.13108599308442578, "grad_norm": 1.189272165298462, "learning_rate": 2.920323735239886e-05, "loss": 0.1282, "step": 7402 }, { "epoch": 0.1311037026214542, "grad_norm": 0.8978736996650696, "learning_rate": 2.9202960653183805e-05, "loss": 0.0965, "step": 7403 }, { "epoch": 0.13112141215848264, "grad_norm": 0.9339725971221924, "learning_rate": 2.920268390724244e-05, "loss": 0.1517, "step": 7404 }, { "epoch": 0.13113912169551106, "grad_norm": 1.0646027326583862, "learning_rate": 2.9202407114575686e-05, "loss": 0.0957, "step": 7405 }, { "epoch": 0.13115683123253952, "grad_norm": 0.756150484085083, "learning_rate": 2.920213027518445e-05, "loss": 0.1323, "step": 7406 }, { "epoch": 0.13117454076956794, "grad_norm": 1.066943645477295, "learning_rate": 2.920185338906964e-05, "loss": 0.1355, "step": 7407 }, { "epoch": 0.13119225030659637, "grad_norm": 1.0982052087783813, "learning_rate": 2.920157645623217e-05, "loss": 0.1644, "step": 7408 }, { "epoch": 0.1312099598436248, "grad_norm": 0.887299120426178, "learning_rate": 2.9201299476672946e-05, "loss": 0.0978, "step": 7409 }, { "epoch": 0.13122766938065322, "grad_norm": 1.4382574558258057, "learning_rate": 2.9201022450392887e-05, "loss": 0.1812, "step": 7410 }, { "epoch": 0.13124537891768165, "grad_norm": 1.1079034805297852, "learning_rate": 2.9200745377392897e-05, "loss": 0.1213, "step": 7411 }, { "epoch": 0.13126308845471008, "grad_norm": 0.7370003461837769, "learning_rate": 2.9200468257673896e-05, "loss": 0.1261, "step": 7412 }, { "epoch": 0.1312807979917385, "grad_norm": 1.0007102489471436, "learning_rate": 2.9200191091236786e-05, "loss": 0.1537, "step": 7413 }, { "epoch": 0.13129850752876693, "grad_norm": 1.1939667463302612, "learning_rate": 2.9199913878082485e-05, "loss": 0.1007, "step": 7414 }, { "epoch": 0.13131621706579535, "grad_norm": 1.281200885772705, "learning_rate": 2.9199636618211905e-05, "loss": 0.1011, "step": 7415 }, { "epoch": 0.13133392660282378, "grad_norm": 1.9204697608947754, "learning_rate": 2.9199359311625954e-05, "loss": 0.1542, "step": 7416 }, { "epoch": 0.1313516361398522, "grad_norm": 1.3353943824768066, "learning_rate": 2.9199081958325553e-05, "loss": 0.1303, "step": 7417 }, { "epoch": 0.13136934567688063, "grad_norm": 0.8233826160430908, "learning_rate": 2.9198804558311603e-05, "loss": 0.1176, "step": 7418 }, { "epoch": 0.13138705521390906, "grad_norm": 2.769984006881714, "learning_rate": 2.9198527111585023e-05, "loss": 0.1486, "step": 7419 }, { "epoch": 0.13140476475093749, "grad_norm": 0.6559020280838013, "learning_rate": 2.9198249618146727e-05, "loss": 0.084, "step": 7420 }, { "epoch": 0.13142247428796594, "grad_norm": 0.6539106369018555, "learning_rate": 2.9197972077997627e-05, "loss": 0.0979, "step": 7421 }, { "epoch": 0.13144018382499437, "grad_norm": 2.636850595474243, "learning_rate": 2.919769449113863e-05, "loss": 0.1157, "step": 7422 }, { "epoch": 0.1314578933620228, "grad_norm": 1.5298722982406616, "learning_rate": 2.9197416857570657e-05, "loss": 0.1183, "step": 7423 }, { "epoch": 0.13147560289905122, "grad_norm": 1.0531421899795532, "learning_rate": 2.919713917729462e-05, "loss": 0.0876, "step": 7424 }, { "epoch": 0.13149331243607965, "grad_norm": 0.7576688528060913, "learning_rate": 2.9196861450311427e-05, "loss": 0.0816, "step": 7425 }, { "epoch": 0.13151102197310807, "grad_norm": 1.3869677782058716, "learning_rate": 2.9196583676621998e-05, "loss": 0.123, "step": 7426 }, { "epoch": 0.1315287315101365, "grad_norm": 1.2998193502426147, "learning_rate": 2.9196305856227243e-05, "loss": 0.1112, "step": 7427 }, { "epoch": 0.13154644104716492, "grad_norm": 1.1141955852508545, "learning_rate": 2.919602798912808e-05, "loss": 0.1176, "step": 7428 }, { "epoch": 0.13156415058419335, "grad_norm": 1.1285128593444824, "learning_rate": 2.919575007532542e-05, "loss": 0.0954, "step": 7429 }, { "epoch": 0.13158186012122178, "grad_norm": 0.6872220635414124, "learning_rate": 2.919547211482018e-05, "loss": 0.1269, "step": 7430 }, { "epoch": 0.1315995696582502, "grad_norm": 1.1911027431488037, "learning_rate": 2.9195194107613267e-05, "loss": 0.1231, "step": 7431 }, { "epoch": 0.13161727919527863, "grad_norm": 1.4390166997909546, "learning_rate": 2.9194916053705607e-05, "loss": 0.1552, "step": 7432 }, { "epoch": 0.13163498873230706, "grad_norm": 0.9720426201820374, "learning_rate": 2.9194637953098106e-05, "loss": 0.1139, "step": 7433 }, { "epoch": 0.13165269826933548, "grad_norm": 0.9625604152679443, "learning_rate": 2.9194359805791683e-05, "loss": 0.0931, "step": 7434 }, { "epoch": 0.1316704078063639, "grad_norm": 0.9867298603057861, "learning_rate": 2.9194081611787253e-05, "loss": 0.087, "step": 7435 }, { "epoch": 0.13168811734339236, "grad_norm": 1.9975415468215942, "learning_rate": 2.9193803371085727e-05, "loss": 0.1633, "step": 7436 }, { "epoch": 0.1317058268804208, "grad_norm": 1.4638572931289673, "learning_rate": 2.9193525083688027e-05, "loss": 0.1073, "step": 7437 }, { "epoch": 0.13172353641744922, "grad_norm": 1.1902382373809814, "learning_rate": 2.919324674959506e-05, "loss": 0.1927, "step": 7438 }, { "epoch": 0.13174124595447764, "grad_norm": 0.8874852061271667, "learning_rate": 2.919296836880775e-05, "loss": 0.1241, "step": 7439 }, { "epoch": 0.13175895549150607, "grad_norm": 1.2366917133331299, "learning_rate": 2.9192689941327005e-05, "loss": 0.1161, "step": 7440 }, { "epoch": 0.1317766650285345, "grad_norm": 0.8255090713500977, "learning_rate": 2.9192411467153753e-05, "loss": 0.1228, "step": 7441 }, { "epoch": 0.13179437456556292, "grad_norm": 1.2805352210998535, "learning_rate": 2.91921329462889e-05, "loss": 0.1357, "step": 7442 }, { "epoch": 0.13181208410259135, "grad_norm": 1.29740571975708, "learning_rate": 2.9191854378733366e-05, "loss": 0.1562, "step": 7443 }, { "epoch": 0.13182979363961977, "grad_norm": 1.3431066274642944, "learning_rate": 2.9191575764488064e-05, "loss": 0.1127, "step": 7444 }, { "epoch": 0.1318475031766482, "grad_norm": 1.0969547033309937, "learning_rate": 2.9191297103553916e-05, "loss": 0.1563, "step": 7445 }, { "epoch": 0.13186521271367663, "grad_norm": 1.1390020847320557, "learning_rate": 2.9191018395931834e-05, "loss": 0.109, "step": 7446 }, { "epoch": 0.13188292225070505, "grad_norm": 1.1437232494354248, "learning_rate": 2.9190739641622736e-05, "loss": 0.1314, "step": 7447 }, { "epoch": 0.13190063178773348, "grad_norm": 1.0030527114868164, "learning_rate": 2.9190460840627544e-05, "loss": 0.1113, "step": 7448 }, { "epoch": 0.1319183413247619, "grad_norm": 0.905413031578064, "learning_rate": 2.9190181992947164e-05, "loss": 0.1209, "step": 7449 }, { "epoch": 0.13193605086179033, "grad_norm": 1.1333072185516357, "learning_rate": 2.9189903098582527e-05, "loss": 0.1125, "step": 7450 }, { "epoch": 0.1319537603988188, "grad_norm": 0.7869049906730652, "learning_rate": 2.918962415753454e-05, "loss": 0.1272, "step": 7451 }, { "epoch": 0.1319714699358472, "grad_norm": 1.2393417358398438, "learning_rate": 2.918934516980413e-05, "loss": 0.1112, "step": 7452 }, { "epoch": 0.13198917947287564, "grad_norm": 1.2648310661315918, "learning_rate": 2.9189066135392204e-05, "loss": 0.1455, "step": 7453 }, { "epoch": 0.13200688900990407, "grad_norm": 0.9328197240829468, "learning_rate": 2.9188787054299683e-05, "loss": 0.1042, "step": 7454 }, { "epoch": 0.1320245985469325, "grad_norm": 1.4809828996658325, "learning_rate": 2.9188507926527493e-05, "loss": 0.1468, "step": 7455 }, { "epoch": 0.13204230808396092, "grad_norm": 1.044511079788208, "learning_rate": 2.918822875207655e-05, "loss": 0.1277, "step": 7456 }, { "epoch": 0.13206001762098935, "grad_norm": 0.8976600766181946, "learning_rate": 2.9187949530947766e-05, "loss": 0.1157, "step": 7457 }, { "epoch": 0.13207772715801777, "grad_norm": 1.331639051437378, "learning_rate": 2.918767026314206e-05, "loss": 0.1229, "step": 7458 }, { "epoch": 0.1320954366950462, "grad_norm": 0.857770562171936, "learning_rate": 2.9187390948660354e-05, "loss": 0.1429, "step": 7459 }, { "epoch": 0.13211314623207462, "grad_norm": 0.9476569294929504, "learning_rate": 2.9187111587503574e-05, "loss": 0.0735, "step": 7460 }, { "epoch": 0.13213085576910305, "grad_norm": 1.0934394598007202, "learning_rate": 2.9186832179672626e-05, "loss": 0.0906, "step": 7461 }, { "epoch": 0.13214856530613148, "grad_norm": 0.9708653688430786, "learning_rate": 2.9186552725168435e-05, "loss": 0.1235, "step": 7462 }, { "epoch": 0.1321662748431599, "grad_norm": 1.0615911483764648, "learning_rate": 2.918627322399192e-05, "loss": 0.0666, "step": 7463 }, { "epoch": 0.13218398438018833, "grad_norm": 1.4969310760498047, "learning_rate": 2.9185993676144005e-05, "loss": 0.1648, "step": 7464 }, { "epoch": 0.13220169391721676, "grad_norm": 0.9806920289993286, "learning_rate": 2.91857140816256e-05, "loss": 0.1043, "step": 7465 }, { "epoch": 0.1322194034542452, "grad_norm": 1.0633805990219116, "learning_rate": 2.9185434440437634e-05, "loss": 0.1269, "step": 7466 }, { "epoch": 0.13223711299127364, "grad_norm": 0.9264194965362549, "learning_rate": 2.9185154752581026e-05, "loss": 0.1169, "step": 7467 }, { "epoch": 0.13225482252830206, "grad_norm": 0.8419871926307678, "learning_rate": 2.918487501805669e-05, "loss": 0.1163, "step": 7468 }, { "epoch": 0.1322725320653305, "grad_norm": 1.3586736917495728, "learning_rate": 2.9184595236865552e-05, "loss": 0.1512, "step": 7469 }, { "epoch": 0.13229024160235892, "grad_norm": 0.9469600319862366, "learning_rate": 2.918431540900853e-05, "loss": 0.1456, "step": 7470 }, { "epoch": 0.13230795113938734, "grad_norm": 0.9092691540718079, "learning_rate": 2.918403553448654e-05, "loss": 0.1181, "step": 7471 }, { "epoch": 0.13232566067641577, "grad_norm": 1.4150185585021973, "learning_rate": 2.9183755613300514e-05, "loss": 0.1321, "step": 7472 }, { "epoch": 0.1323433702134442, "grad_norm": 1.2518255710601807, "learning_rate": 2.918347564545136e-05, "loss": 0.1281, "step": 7473 }, { "epoch": 0.13236107975047262, "grad_norm": 1.0285183191299438, "learning_rate": 2.918319563094001e-05, "loss": 0.1636, "step": 7474 }, { "epoch": 0.13237878928750105, "grad_norm": 1.353346347808838, "learning_rate": 2.9182915569767387e-05, "loss": 0.0908, "step": 7475 }, { "epoch": 0.13239649882452947, "grad_norm": 1.085364818572998, "learning_rate": 2.9182635461934398e-05, "loss": 0.1266, "step": 7476 }, { "epoch": 0.1324142083615579, "grad_norm": 1.1524094343185425, "learning_rate": 2.9182355307441973e-05, "loss": 0.126, "step": 7477 }, { "epoch": 0.13243191789858633, "grad_norm": 1.118436574935913, "learning_rate": 2.918207510629104e-05, "loss": 0.1637, "step": 7478 }, { "epoch": 0.13244962743561475, "grad_norm": 0.964227557182312, "learning_rate": 2.9181794858482504e-05, "loss": 0.0923, "step": 7479 }, { "epoch": 0.13246733697264318, "grad_norm": 1.0297644138336182, "learning_rate": 2.9181514564017305e-05, "loss": 0.1181, "step": 7480 }, { "epoch": 0.13248504650967163, "grad_norm": 1.2582250833511353, "learning_rate": 2.9181234222896355e-05, "loss": 0.088, "step": 7481 }, { "epoch": 0.13250275604670006, "grad_norm": 1.2228516340255737, "learning_rate": 2.918095383512058e-05, "loss": 0.1333, "step": 7482 }, { "epoch": 0.1325204655837285, "grad_norm": 1.274749517440796, "learning_rate": 2.91806734006909e-05, "loss": 0.1636, "step": 7483 }, { "epoch": 0.1325381751207569, "grad_norm": 0.8825529217720032, "learning_rate": 2.9180392919608242e-05, "loss": 0.1554, "step": 7484 }, { "epoch": 0.13255588465778534, "grad_norm": 1.1305748224258423, "learning_rate": 2.918011239187352e-05, "loss": 0.2049, "step": 7485 }, { "epoch": 0.13257359419481377, "grad_norm": 1.1487640142440796, "learning_rate": 2.917983181748767e-05, "loss": 0.1202, "step": 7486 }, { "epoch": 0.1325913037318422, "grad_norm": 1.0335686206817627, "learning_rate": 2.91795511964516e-05, "loss": 0.1116, "step": 7487 }, { "epoch": 0.13260901326887062, "grad_norm": 1.4615445137023926, "learning_rate": 2.917927052876624e-05, "loss": 0.1385, "step": 7488 }, { "epoch": 0.13262672280589904, "grad_norm": 1.5237388610839844, "learning_rate": 2.9178989814432522e-05, "loss": 0.1136, "step": 7489 }, { "epoch": 0.13264443234292747, "grad_norm": 0.8650116920471191, "learning_rate": 2.9178709053451354e-05, "loss": 0.1336, "step": 7490 }, { "epoch": 0.1326621418799559, "grad_norm": 1.2672890424728394, "learning_rate": 2.9178428245823674e-05, "loss": 0.1078, "step": 7491 }, { "epoch": 0.13267985141698432, "grad_norm": 0.9129905104637146, "learning_rate": 2.9178147391550395e-05, "loss": 0.1106, "step": 7492 }, { "epoch": 0.13269756095401275, "grad_norm": 1.0981552600860596, "learning_rate": 2.9177866490632445e-05, "loss": 0.1104, "step": 7493 }, { "epoch": 0.13271527049104118, "grad_norm": 1.3196427822113037, "learning_rate": 2.9177585543070748e-05, "loss": 0.1651, "step": 7494 }, { "epoch": 0.1327329800280696, "grad_norm": 1.414760708808899, "learning_rate": 2.917730454886623e-05, "loss": 0.1486, "step": 7495 }, { "epoch": 0.13275068956509806, "grad_norm": 1.128752589225769, "learning_rate": 2.9177023508019816e-05, "loss": 0.1386, "step": 7496 }, { "epoch": 0.13276839910212648, "grad_norm": 1.1211320161819458, "learning_rate": 2.9176742420532424e-05, "loss": 0.0923, "step": 7497 }, { "epoch": 0.1327861086391549, "grad_norm": 0.8349193930625916, "learning_rate": 2.9176461286404985e-05, "loss": 0.1077, "step": 7498 }, { "epoch": 0.13280381817618334, "grad_norm": 0.9375842213630676, "learning_rate": 2.917618010563842e-05, "loss": 0.1073, "step": 7499 }, { "epoch": 0.13282152771321176, "grad_norm": 1.058213710784912, "learning_rate": 2.9175898878233664e-05, "loss": 0.1663, "step": 7500 }, { "epoch": 0.1328392372502402, "grad_norm": 1.6717414855957031, "learning_rate": 2.9175617604191626e-05, "loss": 0.1515, "step": 7501 }, { "epoch": 0.13285694678726861, "grad_norm": 0.7965503931045532, "learning_rate": 2.9175336283513247e-05, "loss": 0.0962, "step": 7502 }, { "epoch": 0.13287465632429704, "grad_norm": 1.0925534963607788, "learning_rate": 2.917505491619944e-05, "loss": 0.1016, "step": 7503 }, { "epoch": 0.13289236586132547, "grad_norm": 0.6950823664665222, "learning_rate": 2.9174773502251137e-05, "loss": 0.11, "step": 7504 }, { "epoch": 0.1329100753983539, "grad_norm": 1.3901902437210083, "learning_rate": 2.917449204166927e-05, "loss": 0.0832, "step": 7505 }, { "epoch": 0.13292778493538232, "grad_norm": 1.0869438648223877, "learning_rate": 2.9174210534454747e-05, "loss": 0.1235, "step": 7506 }, { "epoch": 0.13294549447241075, "grad_norm": 0.5859490633010864, "learning_rate": 2.917392898060851e-05, "loss": 0.1218, "step": 7507 }, { "epoch": 0.13296320400943917, "grad_norm": 1.1117229461669922, "learning_rate": 2.917364738013148e-05, "loss": 0.1335, "step": 7508 }, { "epoch": 0.1329809135464676, "grad_norm": 1.2125555276870728, "learning_rate": 2.9173365733024584e-05, "loss": 0.1222, "step": 7509 }, { "epoch": 0.13299862308349603, "grad_norm": 0.8731268644332886, "learning_rate": 2.9173084039288746e-05, "loss": 0.1429, "step": 7510 }, { "epoch": 0.13301633262052448, "grad_norm": 1.3205676078796387, "learning_rate": 2.9172802298924898e-05, "loss": 0.1133, "step": 7511 }, { "epoch": 0.1330340421575529, "grad_norm": 1.154166579246521, "learning_rate": 2.917252051193396e-05, "loss": 0.1332, "step": 7512 }, { "epoch": 0.13305175169458133, "grad_norm": 0.8112698793411255, "learning_rate": 2.9172238678316868e-05, "loss": 0.1486, "step": 7513 }, { "epoch": 0.13306946123160976, "grad_norm": 1.0739781856536865, "learning_rate": 2.9171956798074542e-05, "loss": 0.1787, "step": 7514 }, { "epoch": 0.13308717076863819, "grad_norm": 1.1497352123260498, "learning_rate": 2.917167487120791e-05, "loss": 0.1041, "step": 7515 }, { "epoch": 0.1331048803056666, "grad_norm": 1.0633128881454468, "learning_rate": 2.9171392897717906e-05, "loss": 0.1214, "step": 7516 }, { "epoch": 0.13312258984269504, "grad_norm": 1.2447203397750854, "learning_rate": 2.9171110877605446e-05, "loss": 0.1439, "step": 7517 }, { "epoch": 0.13314029937972346, "grad_norm": 0.9585633277893066, "learning_rate": 2.9170828810871466e-05, "loss": 0.0926, "step": 7518 }, { "epoch": 0.1331580089167519, "grad_norm": 1.1199015378952026, "learning_rate": 2.9170546697516894e-05, "loss": 0.1451, "step": 7519 }, { "epoch": 0.13317571845378032, "grad_norm": 1.0041314363479614, "learning_rate": 2.917026453754266e-05, "loss": 0.1253, "step": 7520 }, { "epoch": 0.13319342799080874, "grad_norm": 0.6874716877937317, "learning_rate": 2.9169982330949685e-05, "loss": 0.1194, "step": 7521 }, { "epoch": 0.13321113752783717, "grad_norm": 1.2312686443328857, "learning_rate": 2.91697000777389e-05, "loss": 0.1152, "step": 7522 }, { "epoch": 0.1332288470648656, "grad_norm": 0.8120663166046143, "learning_rate": 2.916941777791124e-05, "loss": 0.0984, "step": 7523 }, { "epoch": 0.13324655660189402, "grad_norm": 1.0191888809204102, "learning_rate": 2.9169135431467627e-05, "loss": 0.0881, "step": 7524 }, { "epoch": 0.13326426613892248, "grad_norm": 1.4252973794937134, "learning_rate": 2.916885303840899e-05, "loss": 0.1391, "step": 7525 }, { "epoch": 0.1332819756759509, "grad_norm": 0.9341335296630859, "learning_rate": 2.916857059873626e-05, "loss": 0.0995, "step": 7526 }, { "epoch": 0.13329968521297933, "grad_norm": 0.7483333349227905, "learning_rate": 2.9168288112450363e-05, "loss": 0.0886, "step": 7527 }, { "epoch": 0.13331739475000776, "grad_norm": 0.9732539653778076, "learning_rate": 2.9168005579552234e-05, "loss": 0.0846, "step": 7528 }, { "epoch": 0.13333510428703618, "grad_norm": 0.7065516114234924, "learning_rate": 2.91677230000428e-05, "loss": 0.084, "step": 7529 }, { "epoch": 0.1333528138240646, "grad_norm": 1.0413713455200195, "learning_rate": 2.916744037392299e-05, "loss": 0.0964, "step": 7530 }, { "epoch": 0.13337052336109304, "grad_norm": 0.9951986074447632, "learning_rate": 2.9167157701193735e-05, "loss": 0.0825, "step": 7531 }, { "epoch": 0.13338823289812146, "grad_norm": 0.8569962382316589, "learning_rate": 2.9166874981855963e-05, "loss": 0.1291, "step": 7532 }, { "epoch": 0.1334059424351499, "grad_norm": 0.8347509503364563, "learning_rate": 2.9166592215910603e-05, "loss": 0.1609, "step": 7533 }, { "epoch": 0.13342365197217831, "grad_norm": 1.8208085298538208, "learning_rate": 2.9166309403358594e-05, "loss": 0.1323, "step": 7534 }, { "epoch": 0.13344136150920674, "grad_norm": 0.6344942450523376, "learning_rate": 2.9166026544200853e-05, "loss": 0.0879, "step": 7535 }, { "epoch": 0.13345907104623517, "grad_norm": 1.145788311958313, "learning_rate": 2.916574363843832e-05, "loss": 0.1732, "step": 7536 }, { "epoch": 0.1334767805832636, "grad_norm": 1.2421914339065552, "learning_rate": 2.9165460686071925e-05, "loss": 0.0973, "step": 7537 }, { "epoch": 0.13349449012029202, "grad_norm": 1.4228310585021973, "learning_rate": 2.916517768710259e-05, "loss": 0.104, "step": 7538 }, { "epoch": 0.13351219965732045, "grad_norm": 1.392472267150879, "learning_rate": 2.9164894641531265e-05, "loss": 0.0778, "step": 7539 }, { "epoch": 0.1335299091943489, "grad_norm": 1.6756885051727295, "learning_rate": 2.916461154935886e-05, "loss": 0.1134, "step": 7540 }, { "epoch": 0.13354761873137733, "grad_norm": 1.5291640758514404, "learning_rate": 2.9164328410586317e-05, "loss": 0.1246, "step": 7541 }, { "epoch": 0.13356532826840575, "grad_norm": 0.9506994485855103, "learning_rate": 2.9164045225214565e-05, "loss": 0.1343, "step": 7542 }, { "epoch": 0.13358303780543418, "grad_norm": 0.8936184048652649, "learning_rate": 2.9163761993244543e-05, "loss": 0.1128, "step": 7543 }, { "epoch": 0.1336007473424626, "grad_norm": 1.1304019689559937, "learning_rate": 2.916347871467717e-05, "loss": 0.0754, "step": 7544 }, { "epoch": 0.13361845687949103, "grad_norm": 1.4910705089569092, "learning_rate": 2.9163195389513388e-05, "loss": 0.1063, "step": 7545 }, { "epoch": 0.13363616641651946, "grad_norm": 1.156288981437683, "learning_rate": 2.9162912017754127e-05, "loss": 0.1247, "step": 7546 }, { "epoch": 0.13365387595354788, "grad_norm": 1.3475401401519775, "learning_rate": 2.9162628599400318e-05, "loss": 0.101, "step": 7547 }, { "epoch": 0.1336715854905763, "grad_norm": 1.0481476783752441, "learning_rate": 2.916234513445289e-05, "loss": 0.1603, "step": 7548 }, { "epoch": 0.13368929502760474, "grad_norm": 0.743218719959259, "learning_rate": 2.916206162291278e-05, "loss": 0.138, "step": 7549 }, { "epoch": 0.13370700456463316, "grad_norm": 0.7998074293136597, "learning_rate": 2.9161778064780923e-05, "loss": 0.1185, "step": 7550 }, { "epoch": 0.1337247141016616, "grad_norm": 1.073875904083252, "learning_rate": 2.9161494460058243e-05, "loss": 0.1262, "step": 7551 }, { "epoch": 0.13374242363869002, "grad_norm": 0.8819498419761658, "learning_rate": 2.9161210808745684e-05, "loss": 0.0891, "step": 7552 }, { "epoch": 0.13376013317571844, "grad_norm": 0.9489238262176514, "learning_rate": 2.916092711084417e-05, "loss": 0.1137, "step": 7553 }, { "epoch": 0.13377784271274687, "grad_norm": 1.188913106918335, "learning_rate": 2.916064336635464e-05, "loss": 0.1306, "step": 7554 }, { "epoch": 0.13379555224977532, "grad_norm": 1.3198059797286987, "learning_rate": 2.9160359575278026e-05, "loss": 0.1109, "step": 7555 }, { "epoch": 0.13381326178680375, "grad_norm": 0.9860581159591675, "learning_rate": 2.9160075737615258e-05, "loss": 0.1196, "step": 7556 }, { "epoch": 0.13383097132383218, "grad_norm": 0.6364176273345947, "learning_rate": 2.9159791853367277e-05, "loss": 0.1107, "step": 7557 }, { "epoch": 0.1338486808608606, "grad_norm": 1.386646032333374, "learning_rate": 2.915950792253501e-05, "loss": 0.1415, "step": 7558 }, { "epoch": 0.13386639039788903, "grad_norm": 1.4662089347839355, "learning_rate": 2.9159223945119396e-05, "loss": 0.1133, "step": 7559 }, { "epoch": 0.13388409993491746, "grad_norm": 0.7523108720779419, "learning_rate": 2.9158939921121364e-05, "loss": 0.1167, "step": 7560 }, { "epoch": 0.13390180947194588, "grad_norm": 0.9893003106117249, "learning_rate": 2.9158655850541855e-05, "loss": 0.1338, "step": 7561 }, { "epoch": 0.1339195190089743, "grad_norm": 1.396836280822754, "learning_rate": 2.91583717333818e-05, "loss": 0.1126, "step": 7562 }, { "epoch": 0.13393722854600273, "grad_norm": 1.1532591581344604, "learning_rate": 2.915808756964213e-05, "loss": 0.1273, "step": 7563 }, { "epoch": 0.13395493808303116, "grad_norm": 1.326060175895691, "learning_rate": 2.9157803359323792e-05, "loss": 0.1247, "step": 7564 }, { "epoch": 0.1339726476200596, "grad_norm": 0.7726870775222778, "learning_rate": 2.915751910242771e-05, "loss": 0.0867, "step": 7565 }, { "epoch": 0.133990357157088, "grad_norm": 1.060988426208496, "learning_rate": 2.915723479895482e-05, "loss": 0.0893, "step": 7566 }, { "epoch": 0.13400806669411644, "grad_norm": 1.6153841018676758, "learning_rate": 2.9156950448906058e-05, "loss": 0.1464, "step": 7567 }, { "epoch": 0.13402577623114487, "grad_norm": 1.022944450378418, "learning_rate": 2.9156666052282362e-05, "loss": 0.0953, "step": 7568 }, { "epoch": 0.1340434857681733, "grad_norm": 0.8429528474807739, "learning_rate": 2.9156381609084667e-05, "loss": 0.0891, "step": 7569 }, { "epoch": 0.13406119530520175, "grad_norm": 1.3476706743240356, "learning_rate": 2.915609711931391e-05, "loss": 0.1294, "step": 7570 }, { "epoch": 0.13407890484223017, "grad_norm": 1.1332974433898926, "learning_rate": 2.9155812582971025e-05, "loss": 0.1161, "step": 7571 }, { "epoch": 0.1340966143792586, "grad_norm": 0.9715867042541504, "learning_rate": 2.9155528000056947e-05, "loss": 0.0982, "step": 7572 }, { "epoch": 0.13411432391628703, "grad_norm": 0.9911920428276062, "learning_rate": 2.9155243370572614e-05, "loss": 0.0996, "step": 7573 }, { "epoch": 0.13413203345331545, "grad_norm": 0.9034011960029602, "learning_rate": 2.9154958694518963e-05, "loss": 0.1115, "step": 7574 }, { "epoch": 0.13414974299034388, "grad_norm": 1.067611575126648, "learning_rate": 2.9154673971896927e-05, "loss": 0.1216, "step": 7575 }, { "epoch": 0.1341674525273723, "grad_norm": 0.6759408712387085, "learning_rate": 2.9154389202707446e-05, "loss": 0.1008, "step": 7576 }, { "epoch": 0.13418516206440073, "grad_norm": 0.9162687659263611, "learning_rate": 2.9154104386951457e-05, "loss": 0.1295, "step": 7577 }, { "epoch": 0.13420287160142916, "grad_norm": 1.0086431503295898, "learning_rate": 2.9153819524629897e-05, "loss": 0.1356, "step": 7578 }, { "epoch": 0.13422058113845758, "grad_norm": 1.2366708517074585, "learning_rate": 2.9153534615743704e-05, "loss": 0.1506, "step": 7579 }, { "epoch": 0.134238290675486, "grad_norm": 0.8779020309448242, "learning_rate": 2.915324966029381e-05, "loss": 0.0922, "step": 7580 }, { "epoch": 0.13425600021251444, "grad_norm": 1.5064311027526855, "learning_rate": 2.9152964658281157e-05, "loss": 0.095, "step": 7581 }, { "epoch": 0.13427370974954286, "grad_norm": 1.028993010520935, "learning_rate": 2.915267960970668e-05, "loss": 0.0969, "step": 7582 }, { "epoch": 0.1342914192865713, "grad_norm": 1.3093863725662231, "learning_rate": 2.9152394514571322e-05, "loss": 0.1299, "step": 7583 }, { "epoch": 0.13430912882359972, "grad_norm": 1.9789079427719116, "learning_rate": 2.9152109372876016e-05, "loss": 0.108, "step": 7584 }, { "epoch": 0.13432683836062817, "grad_norm": 1.1405115127563477, "learning_rate": 2.91518241846217e-05, "loss": 0.1074, "step": 7585 }, { "epoch": 0.1343445478976566, "grad_norm": 1.645377516746521, "learning_rate": 2.9151538949809317e-05, "loss": 0.1152, "step": 7586 }, { "epoch": 0.13436225743468502, "grad_norm": 1.5670779943466187, "learning_rate": 2.9151253668439802e-05, "loss": 0.1376, "step": 7587 }, { "epoch": 0.13437996697171345, "grad_norm": 0.9162499904632568, "learning_rate": 2.915096834051409e-05, "loss": 0.1288, "step": 7588 }, { "epoch": 0.13439767650874188, "grad_norm": 1.0394848585128784, "learning_rate": 2.9150682966033127e-05, "loss": 0.1056, "step": 7589 }, { "epoch": 0.1344153860457703, "grad_norm": 0.9343019127845764, "learning_rate": 2.9150397544997847e-05, "loss": 0.1301, "step": 7590 }, { "epoch": 0.13443309558279873, "grad_norm": 1.6309386491775513, "learning_rate": 2.9150112077409193e-05, "loss": 0.1151, "step": 7591 }, { "epoch": 0.13445080511982715, "grad_norm": 1.2578643560409546, "learning_rate": 2.91498265632681e-05, "loss": 0.1535, "step": 7592 }, { "epoch": 0.13446851465685558, "grad_norm": 1.1908094882965088, "learning_rate": 2.9149541002575507e-05, "loss": 0.1359, "step": 7593 }, { "epoch": 0.134486224193884, "grad_norm": 1.3398211002349854, "learning_rate": 2.9149255395332353e-05, "loss": 0.1514, "step": 7594 }, { "epoch": 0.13450393373091243, "grad_norm": 1.2596014738082886, "learning_rate": 2.9148969741539585e-05, "loss": 0.1245, "step": 7595 }, { "epoch": 0.13452164326794086, "grad_norm": 1.224136233329773, "learning_rate": 2.914868404119813e-05, "loss": 0.1178, "step": 7596 }, { "epoch": 0.1345393528049693, "grad_norm": 0.8307498693466187, "learning_rate": 2.9148398294308945e-05, "loss": 0.0996, "step": 7597 }, { "epoch": 0.1345570623419977, "grad_norm": 1.1100881099700928, "learning_rate": 2.914811250087296e-05, "loss": 0.132, "step": 7598 }, { "epoch": 0.13457477187902614, "grad_norm": 1.9882428646087646, "learning_rate": 2.9147826660891108e-05, "loss": 0.134, "step": 7599 }, { "epoch": 0.1345924814160546, "grad_norm": 1.491113305091858, "learning_rate": 2.9147540774364342e-05, "loss": 0.0952, "step": 7600 }, { "epoch": 0.13461019095308302, "grad_norm": 0.9677945375442505, "learning_rate": 2.91472548412936e-05, "loss": 0.0945, "step": 7601 }, { "epoch": 0.13462790049011145, "grad_norm": 1.2896751165390015, "learning_rate": 2.914696886167982e-05, "loss": 0.1294, "step": 7602 }, { "epoch": 0.13464561002713987, "grad_norm": 0.7157572507858276, "learning_rate": 2.914668283552394e-05, "loss": 0.1232, "step": 7603 }, { "epoch": 0.1346633195641683, "grad_norm": 1.1239206790924072, "learning_rate": 2.9146396762826903e-05, "loss": 0.147, "step": 7604 }, { "epoch": 0.13468102910119673, "grad_norm": 1.123052954673767, "learning_rate": 2.9146110643589656e-05, "loss": 0.0793, "step": 7605 }, { "epoch": 0.13469873863822515, "grad_norm": 0.8077875971794128, "learning_rate": 2.9145824477813132e-05, "loss": 0.1332, "step": 7606 }, { "epoch": 0.13471644817525358, "grad_norm": 0.8519202470779419, "learning_rate": 2.914553826549828e-05, "loss": 0.0868, "step": 7607 }, { "epoch": 0.134734157712282, "grad_norm": 1.0792840719223022, "learning_rate": 2.9145252006646036e-05, "loss": 0.1155, "step": 7608 }, { "epoch": 0.13475186724931043, "grad_norm": 1.8245488405227661, "learning_rate": 2.914496570125734e-05, "loss": 0.1641, "step": 7609 }, { "epoch": 0.13476957678633886, "grad_norm": 0.8809342384338379, "learning_rate": 2.9144679349333143e-05, "loss": 0.1341, "step": 7610 }, { "epoch": 0.13478728632336728, "grad_norm": 0.8929827213287354, "learning_rate": 2.9144392950874382e-05, "loss": 0.1274, "step": 7611 }, { "epoch": 0.1348049958603957, "grad_norm": 1.234744668006897, "learning_rate": 2.9144106505881995e-05, "loss": 0.1145, "step": 7612 }, { "epoch": 0.13482270539742414, "grad_norm": 1.2658748626708984, "learning_rate": 2.9143820014356932e-05, "loss": 0.101, "step": 7613 }, { "epoch": 0.13484041493445256, "grad_norm": 0.6867822408676147, "learning_rate": 2.914353347630013e-05, "loss": 0.1076, "step": 7614 }, { "epoch": 0.13485812447148102, "grad_norm": 1.1880122423171997, "learning_rate": 2.914324689171253e-05, "loss": 0.1593, "step": 7615 }, { "epoch": 0.13487583400850944, "grad_norm": 0.9515403509140015, "learning_rate": 2.9142960260595083e-05, "loss": 0.1081, "step": 7616 }, { "epoch": 0.13489354354553787, "grad_norm": 0.7792133688926697, "learning_rate": 2.9142673582948726e-05, "loss": 0.1046, "step": 7617 }, { "epoch": 0.1349112530825663, "grad_norm": 1.2695256471633911, "learning_rate": 2.9142386858774405e-05, "loss": 0.1654, "step": 7618 }, { "epoch": 0.13492896261959472, "grad_norm": 1.0772464275360107, "learning_rate": 2.914210008807306e-05, "loss": 0.1112, "step": 7619 }, { "epoch": 0.13494667215662315, "grad_norm": 0.8804036378860474, "learning_rate": 2.914181327084563e-05, "loss": 0.1493, "step": 7620 }, { "epoch": 0.13496438169365158, "grad_norm": 1.3709880113601685, "learning_rate": 2.914152640709307e-05, "loss": 0.1265, "step": 7621 }, { "epoch": 0.13498209123068, "grad_norm": 1.0341681241989136, "learning_rate": 2.914123949681632e-05, "loss": 0.1252, "step": 7622 }, { "epoch": 0.13499980076770843, "grad_norm": 1.6505768299102783, "learning_rate": 2.9140952540016318e-05, "loss": 0.1509, "step": 7623 }, { "epoch": 0.13501751030473685, "grad_norm": 0.9500051736831665, "learning_rate": 2.914066553669402e-05, "loss": 0.1009, "step": 7624 }, { "epoch": 0.13503521984176528, "grad_norm": 1.1386734247207642, "learning_rate": 2.9140378486850357e-05, "loss": 0.1183, "step": 7625 }, { "epoch": 0.1350529293787937, "grad_norm": 1.2158938646316528, "learning_rate": 2.914009139048628e-05, "loss": 0.1731, "step": 7626 }, { "epoch": 0.13507063891582213, "grad_norm": 1.5832127332687378, "learning_rate": 2.913980424760273e-05, "loss": 0.0926, "step": 7627 }, { "epoch": 0.13508834845285056, "grad_norm": 1.3387212753295898, "learning_rate": 2.9139517058200658e-05, "loss": 0.1351, "step": 7628 }, { "epoch": 0.13510605798987899, "grad_norm": 1.1444742679595947, "learning_rate": 2.9139229822281004e-05, "loss": 0.1385, "step": 7629 }, { "epoch": 0.13512376752690744, "grad_norm": 1.4176747798919678, "learning_rate": 2.9138942539844714e-05, "loss": 0.0956, "step": 7630 }, { "epoch": 0.13514147706393587, "grad_norm": 0.8513776659965515, "learning_rate": 2.913865521089273e-05, "loss": 0.1384, "step": 7631 }, { "epoch": 0.1351591866009643, "grad_norm": 0.9013084173202515, "learning_rate": 2.9138367835426006e-05, "loss": 0.12, "step": 7632 }, { "epoch": 0.13517689613799272, "grad_norm": 0.8755764961242676, "learning_rate": 2.913808041344548e-05, "loss": 0.1043, "step": 7633 }, { "epoch": 0.13519460567502115, "grad_norm": 1.1059439182281494, "learning_rate": 2.9137792944952096e-05, "loss": 0.1045, "step": 7634 }, { "epoch": 0.13521231521204957, "grad_norm": 0.9440594911575317, "learning_rate": 2.9137505429946806e-05, "loss": 0.1285, "step": 7635 }, { "epoch": 0.135230024749078, "grad_norm": 1.354251742362976, "learning_rate": 2.9137217868430554e-05, "loss": 0.1138, "step": 7636 }, { "epoch": 0.13524773428610642, "grad_norm": 0.901574432849884, "learning_rate": 2.9136930260404285e-05, "loss": 0.1132, "step": 7637 }, { "epoch": 0.13526544382313485, "grad_norm": 1.275336742401123, "learning_rate": 2.913664260586894e-05, "loss": 0.1271, "step": 7638 }, { "epoch": 0.13528315336016328, "grad_norm": 0.9919061064720154, "learning_rate": 2.913635490482548e-05, "loss": 0.1377, "step": 7639 }, { "epoch": 0.1353008628971917, "grad_norm": 0.7831404209136963, "learning_rate": 2.9136067157274835e-05, "loss": 0.1248, "step": 7640 }, { "epoch": 0.13531857243422013, "grad_norm": 0.7586182951927185, "learning_rate": 2.913577936321796e-05, "loss": 0.1132, "step": 7641 }, { "epoch": 0.13533628197124856, "grad_norm": 1.1930756568908691, "learning_rate": 2.91354915226558e-05, "loss": 0.1349, "step": 7642 }, { "epoch": 0.13535399150827698, "grad_norm": 0.9737448692321777, "learning_rate": 2.913520363558931e-05, "loss": 0.1061, "step": 7643 }, { "epoch": 0.1353717010453054, "grad_norm": 1.1085001230239868, "learning_rate": 2.913491570201942e-05, "loss": 0.1248, "step": 7644 }, { "epoch": 0.13538941058233386, "grad_norm": 1.0293384790420532, "learning_rate": 2.913462772194709e-05, "loss": 0.0939, "step": 7645 }, { "epoch": 0.1354071201193623, "grad_norm": 0.9931119084358215, "learning_rate": 2.9134339695373267e-05, "loss": 0.1268, "step": 7646 }, { "epoch": 0.13542482965639072, "grad_norm": 0.9266719222068787, "learning_rate": 2.9134051622298893e-05, "loss": 0.1388, "step": 7647 }, { "epoch": 0.13544253919341914, "grad_norm": 1.1982237100601196, "learning_rate": 2.913376350272492e-05, "loss": 0.1284, "step": 7648 }, { "epoch": 0.13546024873044757, "grad_norm": 1.0083820819854736, "learning_rate": 2.9133475336652293e-05, "loss": 0.1134, "step": 7649 }, { "epoch": 0.135477958267476, "grad_norm": 0.91825270652771, "learning_rate": 2.9133187124081963e-05, "loss": 0.1219, "step": 7650 }, { "epoch": 0.13549566780450442, "grad_norm": 1.1625500917434692, "learning_rate": 2.9132898865014875e-05, "loss": 0.0964, "step": 7651 }, { "epoch": 0.13551337734153285, "grad_norm": 1.044414758682251, "learning_rate": 2.9132610559451984e-05, "loss": 0.137, "step": 7652 }, { "epoch": 0.13553108687856127, "grad_norm": 1.2988632917404175, "learning_rate": 2.9132322207394227e-05, "loss": 0.1715, "step": 7653 }, { "epoch": 0.1355487964155897, "grad_norm": 1.4272018671035767, "learning_rate": 2.9132033808842565e-05, "loss": 0.1276, "step": 7654 }, { "epoch": 0.13556650595261813, "grad_norm": 1.894922137260437, "learning_rate": 2.9131745363797938e-05, "loss": 0.1391, "step": 7655 }, { "epoch": 0.13558421548964655, "grad_norm": 2.3025758266448975, "learning_rate": 2.91314568722613e-05, "loss": 0.1479, "step": 7656 }, { "epoch": 0.13560192502667498, "grad_norm": 1.297036051750183, "learning_rate": 2.91311683342336e-05, "loss": 0.1229, "step": 7657 }, { "epoch": 0.1356196345637034, "grad_norm": 0.7182132005691528, "learning_rate": 2.9130879749715782e-05, "loss": 0.1202, "step": 7658 }, { "epoch": 0.13563734410073186, "grad_norm": 1.1570854187011719, "learning_rate": 2.91305911187088e-05, "loss": 0.1324, "step": 7659 }, { "epoch": 0.1356550536377603, "grad_norm": 1.358628749847412, "learning_rate": 2.9130302441213604e-05, "loss": 0.1364, "step": 7660 }, { "epoch": 0.1356727631747887, "grad_norm": 0.9561678171157837, "learning_rate": 2.913001371723114e-05, "loss": 0.1271, "step": 7661 }, { "epoch": 0.13569047271181714, "grad_norm": 1.0423578023910522, "learning_rate": 2.912972494676236e-05, "loss": 0.1379, "step": 7662 }, { "epoch": 0.13570818224884557, "grad_norm": 1.2309855222702026, "learning_rate": 2.9129436129808214e-05, "loss": 0.1427, "step": 7663 }, { "epoch": 0.135725891785874, "grad_norm": 1.0932894945144653, "learning_rate": 2.9129147266369657e-05, "loss": 0.1412, "step": 7664 }, { "epoch": 0.13574360132290242, "grad_norm": 0.8369607925415039, "learning_rate": 2.9128858356447626e-05, "loss": 0.1168, "step": 7665 }, { "epoch": 0.13576131085993084, "grad_norm": 1.2266225814819336, "learning_rate": 2.9128569400043086e-05, "loss": 0.107, "step": 7666 }, { "epoch": 0.13577902039695927, "grad_norm": 0.6715453267097473, "learning_rate": 2.912828039715698e-05, "loss": 0.0934, "step": 7667 }, { "epoch": 0.1357967299339877, "grad_norm": 0.8331812620162964, "learning_rate": 2.9127991347790263e-05, "loss": 0.1379, "step": 7668 }, { "epoch": 0.13581443947101612, "grad_norm": 1.193427324295044, "learning_rate": 2.912770225194388e-05, "loss": 0.1825, "step": 7669 }, { "epoch": 0.13583214900804455, "grad_norm": 0.8088653683662415, "learning_rate": 2.9127413109618783e-05, "loss": 0.1114, "step": 7670 }, { "epoch": 0.13584985854507298, "grad_norm": 1.177916407585144, "learning_rate": 2.912712392081593e-05, "loss": 0.127, "step": 7671 }, { "epoch": 0.1358675680821014, "grad_norm": 0.7003226280212402, "learning_rate": 2.912683468553627e-05, "loss": 0.0949, "step": 7672 }, { "epoch": 0.13588527761912983, "grad_norm": 0.9974634647369385, "learning_rate": 2.9126545403780748e-05, "loss": 0.1456, "step": 7673 }, { "epoch": 0.13590298715615828, "grad_norm": 1.6118104457855225, "learning_rate": 2.9126256075550323e-05, "loss": 0.0806, "step": 7674 }, { "epoch": 0.1359206966931867, "grad_norm": 1.2912728786468506, "learning_rate": 2.9125966700845946e-05, "loss": 0.119, "step": 7675 }, { "epoch": 0.13593840623021514, "grad_norm": 0.9499444365501404, "learning_rate": 2.9125677279668566e-05, "loss": 0.1, "step": 7676 }, { "epoch": 0.13595611576724356, "grad_norm": 0.9883235692977905, "learning_rate": 2.9125387812019135e-05, "loss": 0.1257, "step": 7677 }, { "epoch": 0.135973825304272, "grad_norm": 1.1103790998458862, "learning_rate": 2.912509829789861e-05, "loss": 0.1827, "step": 7678 }, { "epoch": 0.13599153484130042, "grad_norm": 2.059781789779663, "learning_rate": 2.9124808737307935e-05, "loss": 0.1694, "step": 7679 }, { "epoch": 0.13600924437832884, "grad_norm": 1.4241164922714233, "learning_rate": 2.9124519130248074e-05, "loss": 0.0822, "step": 7680 }, { "epoch": 0.13602695391535727, "grad_norm": 1.045973539352417, "learning_rate": 2.912422947671997e-05, "loss": 0.1243, "step": 7681 }, { "epoch": 0.1360446634523857, "grad_norm": 0.9389161467552185, "learning_rate": 2.912393977672458e-05, "loss": 0.1041, "step": 7682 }, { "epoch": 0.13606237298941412, "grad_norm": 0.8376295566558838, "learning_rate": 2.9123650030262854e-05, "loss": 0.0978, "step": 7683 }, { "epoch": 0.13608008252644255, "grad_norm": 0.9283280372619629, "learning_rate": 2.9123360237335752e-05, "loss": 0.0914, "step": 7684 }, { "epoch": 0.13609779206347097, "grad_norm": 0.8898047208786011, "learning_rate": 2.912307039794422e-05, "loss": 0.1766, "step": 7685 }, { "epoch": 0.1361155016004994, "grad_norm": 1.2437602281570435, "learning_rate": 2.912278051208922e-05, "loss": 0.1228, "step": 7686 }, { "epoch": 0.13613321113752783, "grad_norm": 0.8905254006385803, "learning_rate": 2.9122490579771698e-05, "loss": 0.1022, "step": 7687 }, { "epoch": 0.13615092067455625, "grad_norm": 1.1237832307815552, "learning_rate": 2.9122200600992608e-05, "loss": 0.0983, "step": 7688 }, { "epoch": 0.1361686302115847, "grad_norm": 1.5430887937545776, "learning_rate": 2.9121910575752906e-05, "loss": 0.1329, "step": 7689 }, { "epoch": 0.13618633974861313, "grad_norm": 1.2053889036178589, "learning_rate": 2.912162050405355e-05, "loss": 0.1337, "step": 7690 }, { "epoch": 0.13620404928564156, "grad_norm": 0.9565767049789429, "learning_rate": 2.912133038589549e-05, "loss": 0.1385, "step": 7691 }, { "epoch": 0.13622175882266999, "grad_norm": 1.4859943389892578, "learning_rate": 2.912104022127968e-05, "loss": 0.1403, "step": 7692 }, { "epoch": 0.1362394683596984, "grad_norm": 0.9199531078338623, "learning_rate": 2.9120750010207076e-05, "loss": 0.1333, "step": 7693 }, { "epoch": 0.13625717789672684, "grad_norm": 1.5445574522018433, "learning_rate": 2.9120459752678635e-05, "loss": 0.1467, "step": 7694 }, { "epoch": 0.13627488743375527, "grad_norm": 0.9289002418518066, "learning_rate": 2.9120169448695305e-05, "loss": 0.1097, "step": 7695 }, { "epoch": 0.1362925969707837, "grad_norm": 0.700183629989624, "learning_rate": 2.9119879098258053e-05, "loss": 0.1179, "step": 7696 }, { "epoch": 0.13631030650781212, "grad_norm": 1.2661406993865967, "learning_rate": 2.9119588701367823e-05, "loss": 0.1199, "step": 7697 }, { "epoch": 0.13632801604484054, "grad_norm": 1.4450820684432983, "learning_rate": 2.9119298258025574e-05, "loss": 0.1293, "step": 7698 }, { "epoch": 0.13634572558186897, "grad_norm": 1.1973336935043335, "learning_rate": 2.9119007768232262e-05, "loss": 0.1121, "step": 7699 }, { "epoch": 0.1363634351188974, "grad_norm": 1.1049518585205078, "learning_rate": 2.9118717231988842e-05, "loss": 0.1033, "step": 7700 }, { "epoch": 0.13638114465592582, "grad_norm": 1.1472177505493164, "learning_rate": 2.911842664929627e-05, "loss": 0.1264, "step": 7701 }, { "epoch": 0.13639885419295425, "grad_norm": 0.7290621995925903, "learning_rate": 2.9118136020155505e-05, "loss": 0.1036, "step": 7702 }, { "epoch": 0.13641656372998268, "grad_norm": 0.9727233648300171, "learning_rate": 2.9117845344567498e-05, "loss": 0.1073, "step": 7703 }, { "epoch": 0.13643427326701113, "grad_norm": 0.9419469237327576, "learning_rate": 2.9117554622533205e-05, "loss": 0.1241, "step": 7704 }, { "epoch": 0.13645198280403956, "grad_norm": 1.5087811946868896, "learning_rate": 2.9117263854053592e-05, "loss": 0.1443, "step": 7705 }, { "epoch": 0.13646969234106798, "grad_norm": 1.563431739807129, "learning_rate": 2.91169730391296e-05, "loss": 0.1087, "step": 7706 }, { "epoch": 0.1364874018780964, "grad_norm": 1.1799981594085693, "learning_rate": 2.9116682177762203e-05, "loss": 0.1189, "step": 7707 }, { "epoch": 0.13650511141512484, "grad_norm": 1.4515026807785034, "learning_rate": 2.9116391269952345e-05, "loss": 0.1077, "step": 7708 }, { "epoch": 0.13652282095215326, "grad_norm": 1.3636890649795532, "learning_rate": 2.911610031570099e-05, "loss": 0.1355, "step": 7709 }, { "epoch": 0.1365405304891817, "grad_norm": 0.9240149259567261, "learning_rate": 2.911580931500909e-05, "loss": 0.1409, "step": 7710 }, { "epoch": 0.13655824002621011, "grad_norm": 0.7584317922592163, "learning_rate": 2.9115518267877607e-05, "loss": 0.0829, "step": 7711 }, { "epoch": 0.13657594956323854, "grad_norm": 1.3223373889923096, "learning_rate": 2.9115227174307498e-05, "loss": 0.1115, "step": 7712 }, { "epoch": 0.13659365910026697, "grad_norm": 0.9528888463973999, "learning_rate": 2.9114936034299715e-05, "loss": 0.1282, "step": 7713 }, { "epoch": 0.1366113686372954, "grad_norm": 0.7961279153823853, "learning_rate": 2.9114644847855224e-05, "loss": 0.1128, "step": 7714 }, { "epoch": 0.13662907817432382, "grad_norm": 1.2413841485977173, "learning_rate": 2.9114353614974978e-05, "loss": 0.1341, "step": 7715 }, { "epoch": 0.13664678771135225, "grad_norm": 1.8050782680511475, "learning_rate": 2.9114062335659932e-05, "loss": 0.0908, "step": 7716 }, { "epoch": 0.13666449724838067, "grad_norm": 1.3230209350585938, "learning_rate": 2.9113771009911054e-05, "loss": 0.1457, "step": 7717 }, { "epoch": 0.1366822067854091, "grad_norm": 1.272345781326294, "learning_rate": 2.9113479637729294e-05, "loss": 0.1295, "step": 7718 }, { "epoch": 0.13669991632243755, "grad_norm": 0.9878420233726501, "learning_rate": 2.9113188219115617e-05, "loss": 0.1486, "step": 7719 }, { "epoch": 0.13671762585946598, "grad_norm": 1.6170430183410645, "learning_rate": 2.9112896754070976e-05, "loss": 0.1619, "step": 7720 }, { "epoch": 0.1367353353964944, "grad_norm": 1.3488622903823853, "learning_rate": 2.911260524259633e-05, "loss": 0.1341, "step": 7721 }, { "epoch": 0.13675304493352283, "grad_norm": 0.9381994605064392, "learning_rate": 2.9112313684692643e-05, "loss": 0.1295, "step": 7722 }, { "epoch": 0.13677075447055126, "grad_norm": 0.669844925403595, "learning_rate": 2.911202208036087e-05, "loss": 0.1057, "step": 7723 }, { "epoch": 0.13678846400757969, "grad_norm": 1.0024373531341553, "learning_rate": 2.9111730429601975e-05, "loss": 0.1161, "step": 7724 }, { "epoch": 0.1368061735446081, "grad_norm": 1.2597883939743042, "learning_rate": 2.911143873241691e-05, "loss": 0.1555, "step": 7725 }, { "epoch": 0.13682388308163654, "grad_norm": 1.6617802381515503, "learning_rate": 2.9111146988806638e-05, "loss": 0.1335, "step": 7726 }, { "epoch": 0.13684159261866496, "grad_norm": 1.1598684787750244, "learning_rate": 2.9110855198772124e-05, "loss": 0.1301, "step": 7727 }, { "epoch": 0.1368593021556934, "grad_norm": 0.8613225817680359, "learning_rate": 2.9110563362314322e-05, "loss": 0.0905, "step": 7728 }, { "epoch": 0.13687701169272182, "grad_norm": 0.8487030267715454, "learning_rate": 2.9110271479434193e-05, "loss": 0.1216, "step": 7729 }, { "epoch": 0.13689472122975024, "grad_norm": 1.0619596242904663, "learning_rate": 2.91099795501327e-05, "loss": 0.0875, "step": 7730 }, { "epoch": 0.13691243076677867, "grad_norm": 1.167917251586914, "learning_rate": 2.9109687574410797e-05, "loss": 0.1284, "step": 7731 }, { "epoch": 0.1369301403038071, "grad_norm": 0.8549022674560547, "learning_rate": 2.910939555226945e-05, "loss": 0.1139, "step": 7732 }, { "epoch": 0.13694784984083552, "grad_norm": 1.031272292137146, "learning_rate": 2.910910348370962e-05, "loss": 0.1266, "step": 7733 }, { "epoch": 0.13696555937786398, "grad_norm": 1.6355637311935425, "learning_rate": 2.9108811368732268e-05, "loss": 0.1346, "step": 7734 }, { "epoch": 0.1369832689148924, "grad_norm": 0.8368094563484192, "learning_rate": 2.910851920733835e-05, "loss": 0.0999, "step": 7735 }, { "epoch": 0.13700097845192083, "grad_norm": 0.8989651799201965, "learning_rate": 2.9108226999528837e-05, "loss": 0.1252, "step": 7736 }, { "epoch": 0.13701868798894926, "grad_norm": 0.9826761484146118, "learning_rate": 2.910793474530468e-05, "loss": 0.1738, "step": 7737 }, { "epoch": 0.13703639752597768, "grad_norm": 0.9907819628715515, "learning_rate": 2.9107642444666843e-05, "loss": 0.1512, "step": 7738 }, { "epoch": 0.1370541070630061, "grad_norm": 1.033267855644226, "learning_rate": 2.9107350097616293e-05, "loss": 0.0877, "step": 7739 }, { "epoch": 0.13707181660003454, "grad_norm": 1.2198444604873657, "learning_rate": 2.9107057704153987e-05, "loss": 0.0954, "step": 7740 }, { "epoch": 0.13708952613706296, "grad_norm": 0.7965579628944397, "learning_rate": 2.9106765264280888e-05, "loss": 0.1239, "step": 7741 }, { "epoch": 0.1371072356740914, "grad_norm": 1.0670020580291748, "learning_rate": 2.9106472777997956e-05, "loss": 0.1576, "step": 7742 }, { "epoch": 0.13712494521111981, "grad_norm": 0.7480011582374573, "learning_rate": 2.9106180245306157e-05, "loss": 0.0984, "step": 7743 }, { "epoch": 0.13714265474814824, "grad_norm": 1.3280456066131592, "learning_rate": 2.9105887666206453e-05, "loss": 0.1191, "step": 7744 }, { "epoch": 0.13716036428517667, "grad_norm": 1.068070888519287, "learning_rate": 2.9105595040699802e-05, "loss": 0.1164, "step": 7745 }, { "epoch": 0.1371780738222051, "grad_norm": 1.5034468173980713, "learning_rate": 2.9105302368787174e-05, "loss": 0.0925, "step": 7746 }, { "epoch": 0.13719578335923352, "grad_norm": 1.1954419612884521, "learning_rate": 2.9105009650469524e-05, "loss": 0.1545, "step": 7747 }, { "epoch": 0.13721349289626195, "grad_norm": 1.6205581426620483, "learning_rate": 2.910471688574782e-05, "loss": 0.1357, "step": 7748 }, { "epoch": 0.1372312024332904, "grad_norm": 1.1774027347564697, "learning_rate": 2.9104424074623024e-05, "loss": 0.1508, "step": 7749 }, { "epoch": 0.13724891197031883, "grad_norm": 1.0704333782196045, "learning_rate": 2.9104131217096102e-05, "loss": 0.1151, "step": 7750 }, { "epoch": 0.13726662150734725, "grad_norm": 0.776602566242218, "learning_rate": 2.910383831316801e-05, "loss": 0.0982, "step": 7751 }, { "epoch": 0.13728433104437568, "grad_norm": 0.9463326334953308, "learning_rate": 2.9103545362839722e-05, "loss": 0.1447, "step": 7752 }, { "epoch": 0.1373020405814041, "grad_norm": 1.324352741241455, "learning_rate": 2.9103252366112195e-05, "loss": 0.1134, "step": 7753 }, { "epoch": 0.13731975011843253, "grad_norm": 1.5731524229049683, "learning_rate": 2.910295932298639e-05, "loss": 0.145, "step": 7754 }, { "epoch": 0.13733745965546096, "grad_norm": 1.0688698291778564, "learning_rate": 2.910266623346328e-05, "loss": 0.1429, "step": 7755 }, { "epoch": 0.13735516919248938, "grad_norm": 0.9506707787513733, "learning_rate": 2.910237309754382e-05, "loss": 0.1283, "step": 7756 }, { "epoch": 0.1373728787295178, "grad_norm": 0.7445931434631348, "learning_rate": 2.9102079915228982e-05, "loss": 0.1246, "step": 7757 }, { "epoch": 0.13739058826654624, "grad_norm": 0.9559561014175415, "learning_rate": 2.9101786686519728e-05, "loss": 0.1327, "step": 7758 }, { "epoch": 0.13740829780357466, "grad_norm": 1.2932758331298828, "learning_rate": 2.910149341141702e-05, "loss": 0.1386, "step": 7759 }, { "epoch": 0.1374260073406031, "grad_norm": 0.4706224203109741, "learning_rate": 2.9101200089921826e-05, "loss": 0.1471, "step": 7760 }, { "epoch": 0.13744371687763152, "grad_norm": 0.8311017751693726, "learning_rate": 2.910090672203511e-05, "loss": 0.1278, "step": 7761 }, { "epoch": 0.13746142641465994, "grad_norm": 0.8215199708938599, "learning_rate": 2.910061330775784e-05, "loss": 0.1274, "step": 7762 }, { "epoch": 0.13747913595168837, "grad_norm": 1.3155876398086548, "learning_rate": 2.9100319847090974e-05, "loss": 0.131, "step": 7763 }, { "epoch": 0.13749684548871682, "grad_norm": 0.8621727228164673, "learning_rate": 2.910002634003548e-05, "loss": 0.1051, "step": 7764 }, { "epoch": 0.13751455502574525, "grad_norm": 1.3675776720046997, "learning_rate": 2.909973278659233e-05, "loss": 0.1276, "step": 7765 }, { "epoch": 0.13753226456277368, "grad_norm": 2.484750509262085, "learning_rate": 2.9099439186762486e-05, "loss": 0.1169, "step": 7766 }, { "epoch": 0.1375499740998021, "grad_norm": 1.7269599437713623, "learning_rate": 2.9099145540546907e-05, "loss": 0.1967, "step": 7767 }, { "epoch": 0.13756768363683053, "grad_norm": 1.4728455543518066, "learning_rate": 2.909885184794657e-05, "loss": 0.0759, "step": 7768 }, { "epoch": 0.13758539317385896, "grad_norm": 1.4632134437561035, "learning_rate": 2.9098558108962438e-05, "loss": 0.0942, "step": 7769 }, { "epoch": 0.13760310271088738, "grad_norm": 0.8398409485816956, "learning_rate": 2.909826432359547e-05, "loss": 0.0997, "step": 7770 }, { "epoch": 0.1376208122479158, "grad_norm": 1.1004339456558228, "learning_rate": 2.9097970491846645e-05, "loss": 0.1431, "step": 7771 }, { "epoch": 0.13763852178494423, "grad_norm": 0.9868416786193848, "learning_rate": 2.909767661371692e-05, "loss": 0.0892, "step": 7772 }, { "epoch": 0.13765623132197266, "grad_norm": 1.3502296209335327, "learning_rate": 2.9097382689207264e-05, "loss": 0.139, "step": 7773 }, { "epoch": 0.1376739408590011, "grad_norm": 2.784966230392456, "learning_rate": 2.9097088718318644e-05, "loss": 0.1266, "step": 7774 }, { "epoch": 0.1376916503960295, "grad_norm": 1.20030677318573, "learning_rate": 2.9096794701052032e-05, "loss": 0.132, "step": 7775 }, { "epoch": 0.13770935993305794, "grad_norm": 1.6001341342926025, "learning_rate": 2.9096500637408388e-05, "loss": 0.0884, "step": 7776 }, { "epoch": 0.13772706947008637, "grad_norm": 1.2217257022857666, "learning_rate": 2.9096206527388684e-05, "loss": 0.1291, "step": 7777 }, { "epoch": 0.13774477900711482, "grad_norm": 1.5618816614151, "learning_rate": 2.9095912370993885e-05, "loss": 0.1608, "step": 7778 }, { "epoch": 0.13776248854414325, "grad_norm": 1.1424915790557861, "learning_rate": 2.9095618168224964e-05, "loss": 0.1105, "step": 7779 }, { "epoch": 0.13778019808117167, "grad_norm": 1.2276397943496704, "learning_rate": 2.9095323919082876e-05, "loss": 0.1112, "step": 7780 }, { "epoch": 0.1377979076182001, "grad_norm": 1.0191991329193115, "learning_rate": 2.9095029623568604e-05, "loss": 0.1011, "step": 7781 }, { "epoch": 0.13781561715522853, "grad_norm": 0.9119118452072144, "learning_rate": 2.9094735281683114e-05, "loss": 0.1123, "step": 7782 }, { "epoch": 0.13783332669225695, "grad_norm": 1.0418131351470947, "learning_rate": 2.9094440893427364e-05, "loss": 0.1143, "step": 7783 }, { "epoch": 0.13785103622928538, "grad_norm": 0.8352648019790649, "learning_rate": 2.9094146458802334e-05, "loss": 0.1266, "step": 7784 }, { "epoch": 0.1378687457663138, "grad_norm": 1.1156175136566162, "learning_rate": 2.909385197780898e-05, "loss": 0.127, "step": 7785 }, { "epoch": 0.13788645530334223, "grad_norm": 1.2600946426391602, "learning_rate": 2.9093557450448287e-05, "loss": 0.1497, "step": 7786 }, { "epoch": 0.13790416484037066, "grad_norm": 0.6260522603988647, "learning_rate": 2.9093262876721212e-05, "loss": 0.1419, "step": 7787 }, { "epoch": 0.13792187437739908, "grad_norm": 1.3125280141830444, "learning_rate": 2.9092968256628726e-05, "loss": 0.1343, "step": 7788 }, { "epoch": 0.1379395839144275, "grad_norm": 0.9544423818588257, "learning_rate": 2.90926735901718e-05, "loss": 0.1094, "step": 7789 }, { "epoch": 0.13795729345145594, "grad_norm": 1.051222562789917, "learning_rate": 2.9092378877351407e-05, "loss": 0.1346, "step": 7790 }, { "epoch": 0.13797500298848436, "grad_norm": 0.8245951533317566, "learning_rate": 2.909208411816851e-05, "loss": 0.08, "step": 7791 }, { "epoch": 0.1379927125255128, "grad_norm": 1.1361980438232422, "learning_rate": 2.909178931262408e-05, "loss": 0.0767, "step": 7792 }, { "epoch": 0.13801042206254124, "grad_norm": 0.8212150931358337, "learning_rate": 2.909149446071909e-05, "loss": 0.0861, "step": 7793 }, { "epoch": 0.13802813159956967, "grad_norm": 1.1424007415771484, "learning_rate": 2.909119956245451e-05, "loss": 0.1066, "step": 7794 }, { "epoch": 0.1380458411365981, "grad_norm": 1.6258903741836548, "learning_rate": 2.909090461783131e-05, "loss": 0.1159, "step": 7795 }, { "epoch": 0.13806355067362652, "grad_norm": 1.2049063444137573, "learning_rate": 2.9090609626850456e-05, "loss": 0.1053, "step": 7796 }, { "epoch": 0.13808126021065495, "grad_norm": 0.9707575440406799, "learning_rate": 2.9090314589512922e-05, "loss": 0.1077, "step": 7797 }, { "epoch": 0.13809896974768338, "grad_norm": 0.8899399042129517, "learning_rate": 2.909001950581968e-05, "loss": 0.1022, "step": 7798 }, { "epoch": 0.1381166792847118, "grad_norm": 1.245018720626831, "learning_rate": 2.9089724375771696e-05, "loss": 0.0964, "step": 7799 }, { "epoch": 0.13813438882174023, "grad_norm": 1.3298667669296265, "learning_rate": 2.9089429199369945e-05, "loss": 0.1368, "step": 7800 }, { "epoch": 0.13815209835876865, "grad_norm": 1.2629188299179077, "learning_rate": 2.9089133976615398e-05, "loss": 0.1389, "step": 7801 }, { "epoch": 0.13816980789579708, "grad_norm": 0.8407302498817444, "learning_rate": 2.9088838707509025e-05, "loss": 0.1291, "step": 7802 }, { "epoch": 0.1381875174328255, "grad_norm": 1.0556455850601196, "learning_rate": 2.9088543392051797e-05, "loss": 0.1441, "step": 7803 }, { "epoch": 0.13820522696985393, "grad_norm": 1.1417094469070435, "learning_rate": 2.9088248030244685e-05, "loss": 0.1532, "step": 7804 }, { "epoch": 0.13822293650688236, "grad_norm": 1.0272963047027588, "learning_rate": 2.9087952622088663e-05, "loss": 0.1138, "step": 7805 }, { "epoch": 0.1382406460439108, "grad_norm": 1.2757781744003296, "learning_rate": 2.9087657167584705e-05, "loss": 0.1231, "step": 7806 }, { "epoch": 0.1382583555809392, "grad_norm": 0.9003292322158813, "learning_rate": 2.9087361666733777e-05, "loss": 0.1249, "step": 7807 }, { "epoch": 0.13827606511796767, "grad_norm": 2.2195236682891846, "learning_rate": 2.908706611953685e-05, "loss": 0.1162, "step": 7808 }, { "epoch": 0.1382937746549961, "grad_norm": 1.0639057159423828, "learning_rate": 2.9086770525994902e-05, "loss": 0.1311, "step": 7809 }, { "epoch": 0.13831148419202452, "grad_norm": 1.068384051322937, "learning_rate": 2.908647488610891e-05, "loss": 0.1304, "step": 7810 }, { "epoch": 0.13832919372905295, "grad_norm": 1.3512591123580933, "learning_rate": 2.9086179199879835e-05, "loss": 0.1368, "step": 7811 }, { "epoch": 0.13834690326608137, "grad_norm": 1.1418043375015259, "learning_rate": 2.908588346730866e-05, "loss": 0.0971, "step": 7812 }, { "epoch": 0.1383646128031098, "grad_norm": 1.0981069803237915, "learning_rate": 2.9085587688396347e-05, "loss": 0.1368, "step": 7813 }, { "epoch": 0.13838232234013823, "grad_norm": 1.2545182704925537, "learning_rate": 2.9085291863143876e-05, "loss": 0.1415, "step": 7814 }, { "epoch": 0.13840003187716665, "grad_norm": 1.3091822862625122, "learning_rate": 2.908499599155222e-05, "loss": 0.1312, "step": 7815 }, { "epoch": 0.13841774141419508, "grad_norm": 0.9208365678787231, "learning_rate": 2.9084700073622355e-05, "loss": 0.1011, "step": 7816 }, { "epoch": 0.1384354509512235, "grad_norm": 1.3066502809524536, "learning_rate": 2.9084404109355247e-05, "loss": 0.1261, "step": 7817 }, { "epoch": 0.13845316048825193, "grad_norm": 0.7451284527778625, "learning_rate": 2.908410809875188e-05, "loss": 0.0913, "step": 7818 }, { "epoch": 0.13847087002528036, "grad_norm": 1.4783458709716797, "learning_rate": 2.9083812041813216e-05, "loss": 0.1308, "step": 7819 }, { "epoch": 0.13848857956230878, "grad_norm": 0.9675626754760742, "learning_rate": 2.908351593854024e-05, "loss": 0.0991, "step": 7820 }, { "epoch": 0.1385062890993372, "grad_norm": 1.5877729654312134, "learning_rate": 2.9083219788933916e-05, "loss": 0.1422, "step": 7821 }, { "epoch": 0.13852399863636564, "grad_norm": 0.7497804760932922, "learning_rate": 2.9082923592995225e-05, "loss": 0.1049, "step": 7822 }, { "epoch": 0.1385417081733941, "grad_norm": 1.2357988357543945, "learning_rate": 2.908262735072514e-05, "loss": 0.1167, "step": 7823 }, { "epoch": 0.13855941771042252, "grad_norm": 0.5613104104995728, "learning_rate": 2.9082331062124638e-05, "loss": 0.0958, "step": 7824 }, { "epoch": 0.13857712724745094, "grad_norm": 1.1996499300003052, "learning_rate": 2.908203472719469e-05, "loss": 0.1132, "step": 7825 }, { "epoch": 0.13859483678447937, "grad_norm": 0.888157308101654, "learning_rate": 2.9081738345936266e-05, "loss": 0.1293, "step": 7826 }, { "epoch": 0.1386125463215078, "grad_norm": 2.1118805408477783, "learning_rate": 2.908144191835035e-05, "loss": 0.148, "step": 7827 }, { "epoch": 0.13863025585853622, "grad_norm": 0.748660147190094, "learning_rate": 2.9081145444437918e-05, "loss": 0.1183, "step": 7828 }, { "epoch": 0.13864796539556465, "grad_norm": 0.8227342963218689, "learning_rate": 2.9080848924199938e-05, "loss": 0.1062, "step": 7829 }, { "epoch": 0.13866567493259307, "grad_norm": 1.620086908340454, "learning_rate": 2.9080552357637395e-05, "loss": 0.0774, "step": 7830 }, { "epoch": 0.1386833844696215, "grad_norm": 1.0722497701644897, "learning_rate": 2.9080255744751253e-05, "loss": 0.1289, "step": 7831 }, { "epoch": 0.13870109400664993, "grad_norm": 1.1935303211212158, "learning_rate": 2.9079959085542498e-05, "loss": 0.1382, "step": 7832 }, { "epoch": 0.13871880354367835, "grad_norm": 1.2589080333709717, "learning_rate": 2.9079662380012097e-05, "loss": 0.1388, "step": 7833 }, { "epoch": 0.13873651308070678, "grad_norm": 1.5743656158447266, "learning_rate": 2.9079365628161035e-05, "loss": 0.1084, "step": 7834 }, { "epoch": 0.1387542226177352, "grad_norm": 1.0682971477508545, "learning_rate": 2.9079068829990282e-05, "loss": 0.098, "step": 7835 }, { "epoch": 0.13877193215476363, "grad_norm": 1.3395137786865234, "learning_rate": 2.9078771985500816e-05, "loss": 0.1237, "step": 7836 }, { "epoch": 0.13878964169179206, "grad_norm": 0.9051039814949036, "learning_rate": 2.9078475094693616e-05, "loss": 0.0817, "step": 7837 }, { "epoch": 0.1388073512288205, "grad_norm": 1.3814432621002197, "learning_rate": 2.9078178157569655e-05, "loss": 0.1399, "step": 7838 }, { "epoch": 0.13882506076584894, "grad_norm": 1.2188501358032227, "learning_rate": 2.9077881174129912e-05, "loss": 0.1464, "step": 7839 }, { "epoch": 0.13884277030287737, "grad_norm": 0.8360077738761902, "learning_rate": 2.9077584144375367e-05, "loss": 0.1193, "step": 7840 }, { "epoch": 0.1388604798399058, "grad_norm": 1.4372681379318237, "learning_rate": 2.907728706830699e-05, "loss": 0.0884, "step": 7841 }, { "epoch": 0.13887818937693422, "grad_norm": 1.0890165567398071, "learning_rate": 2.9076989945925765e-05, "loss": 0.1087, "step": 7842 }, { "epoch": 0.13889589891396265, "grad_norm": 1.1472936868667603, "learning_rate": 2.9076692777232666e-05, "loss": 0.1634, "step": 7843 }, { "epoch": 0.13891360845099107, "grad_norm": 0.8517131209373474, "learning_rate": 2.907639556222867e-05, "loss": 0.1102, "step": 7844 }, { "epoch": 0.1389313179880195, "grad_norm": 1.1419072151184082, "learning_rate": 2.9076098300914762e-05, "loss": 0.1514, "step": 7845 }, { "epoch": 0.13894902752504792, "grad_norm": 1.5253283977508545, "learning_rate": 2.907580099329191e-05, "loss": 0.1108, "step": 7846 }, { "epoch": 0.13896673706207635, "grad_norm": 1.0009465217590332, "learning_rate": 2.9075503639361097e-05, "loss": 0.1226, "step": 7847 }, { "epoch": 0.13898444659910478, "grad_norm": 1.2305729389190674, "learning_rate": 2.90752062391233e-05, "loss": 0.1147, "step": 7848 }, { "epoch": 0.1390021561361332, "grad_norm": 1.4818166494369507, "learning_rate": 2.9074908792579497e-05, "loss": 0.1014, "step": 7849 }, { "epoch": 0.13901986567316163, "grad_norm": 0.8991113305091858, "learning_rate": 2.9074611299730667e-05, "loss": 0.1038, "step": 7850 }, { "epoch": 0.13903757521019006, "grad_norm": 0.9297117590904236, "learning_rate": 2.907431376057779e-05, "loss": 0.1593, "step": 7851 }, { "epoch": 0.13905528474721848, "grad_norm": 0.7529505491256714, "learning_rate": 2.907401617512185e-05, "loss": 0.0954, "step": 7852 }, { "epoch": 0.13907299428424694, "grad_norm": 0.938593864440918, "learning_rate": 2.907371854336381e-05, "loss": 0.1103, "step": 7853 }, { "epoch": 0.13909070382127536, "grad_norm": 0.9371369481086731, "learning_rate": 2.907342086530467e-05, "loss": 0.0987, "step": 7854 }, { "epoch": 0.1391084133583038, "grad_norm": 0.797373354434967, "learning_rate": 2.9073123140945392e-05, "loss": 0.1373, "step": 7855 }, { "epoch": 0.13912612289533222, "grad_norm": 1.3557848930358887, "learning_rate": 2.9072825370286967e-05, "loss": 0.1535, "step": 7856 }, { "epoch": 0.13914383243236064, "grad_norm": 1.5864319801330566, "learning_rate": 2.907252755333037e-05, "loss": 0.1418, "step": 7857 }, { "epoch": 0.13916154196938907, "grad_norm": 1.2178945541381836, "learning_rate": 2.9072229690076575e-05, "loss": 0.1048, "step": 7858 }, { "epoch": 0.1391792515064175, "grad_norm": 0.7828069925308228, "learning_rate": 2.9071931780526573e-05, "loss": 0.087, "step": 7859 }, { "epoch": 0.13919696104344592, "grad_norm": 0.7673678398132324, "learning_rate": 2.9071633824681335e-05, "loss": 0.1244, "step": 7860 }, { "epoch": 0.13921467058047435, "grad_norm": 0.8314254283905029, "learning_rate": 2.9071335822541843e-05, "loss": 0.1318, "step": 7861 }, { "epoch": 0.13923238011750277, "grad_norm": 1.3112131357192993, "learning_rate": 2.9071037774109083e-05, "loss": 0.0936, "step": 7862 }, { "epoch": 0.1392500896545312, "grad_norm": 0.7470086216926575, "learning_rate": 2.9070739679384032e-05, "loss": 0.0832, "step": 7863 }, { "epoch": 0.13926779919155963, "grad_norm": 0.9047475457191467, "learning_rate": 2.907044153836767e-05, "loss": 0.1419, "step": 7864 }, { "epoch": 0.13928550872858805, "grad_norm": 1.1702646017074585, "learning_rate": 2.9070143351060983e-05, "loss": 0.1367, "step": 7865 }, { "epoch": 0.13930321826561648, "grad_norm": 0.9696024060249329, "learning_rate": 2.906984511746494e-05, "loss": 0.1011, "step": 7866 }, { "epoch": 0.1393209278026449, "grad_norm": 1.4366917610168457, "learning_rate": 2.9069546837580533e-05, "loss": 0.1305, "step": 7867 }, { "epoch": 0.13933863733967336, "grad_norm": 1.2153466939926147, "learning_rate": 2.906924851140874e-05, "loss": 0.0964, "step": 7868 }, { "epoch": 0.1393563468767018, "grad_norm": 0.9356049299240112, "learning_rate": 2.9068950138950543e-05, "loss": 0.0732, "step": 7869 }, { "epoch": 0.1393740564137302, "grad_norm": 1.7726471424102783, "learning_rate": 2.9068651720206925e-05, "loss": 0.1431, "step": 7870 }, { "epoch": 0.13939176595075864, "grad_norm": 0.9508774280548096, "learning_rate": 2.906835325517886e-05, "loss": 0.1042, "step": 7871 }, { "epoch": 0.13940947548778707, "grad_norm": 1.1214878559112549, "learning_rate": 2.906805474386734e-05, "loss": 0.1299, "step": 7872 }, { "epoch": 0.1394271850248155, "grad_norm": 1.0759822130203247, "learning_rate": 2.9067756186273342e-05, "loss": 0.0923, "step": 7873 }, { "epoch": 0.13944489456184392, "grad_norm": 1.1352020502090454, "learning_rate": 2.906745758239785e-05, "loss": 0.103, "step": 7874 }, { "epoch": 0.13946260409887234, "grad_norm": 1.0564630031585693, "learning_rate": 2.9067158932241843e-05, "loss": 0.1298, "step": 7875 }, { "epoch": 0.13948031363590077, "grad_norm": 1.2681868076324463, "learning_rate": 2.9066860235806304e-05, "loss": 0.1164, "step": 7876 }, { "epoch": 0.1394980231729292, "grad_norm": 1.4073727130889893, "learning_rate": 2.9066561493092225e-05, "loss": 0.1519, "step": 7877 }, { "epoch": 0.13951573270995762, "grad_norm": 0.9666545987129211, "learning_rate": 2.906626270410058e-05, "loss": 0.0894, "step": 7878 }, { "epoch": 0.13953344224698605, "grad_norm": 1.4446386098861694, "learning_rate": 2.9065963868832344e-05, "loss": 0.1299, "step": 7879 }, { "epoch": 0.13955115178401448, "grad_norm": 0.8227309584617615, "learning_rate": 2.906566498728852e-05, "loss": 0.1018, "step": 7880 }, { "epoch": 0.1395688613210429, "grad_norm": 1.2875334024429321, "learning_rate": 2.9065366059470073e-05, "loss": 0.1071, "step": 7881 }, { "epoch": 0.13958657085807133, "grad_norm": 1.2812492847442627, "learning_rate": 2.9065067085378e-05, "loss": 0.1137, "step": 7882 }, { "epoch": 0.13960428039509978, "grad_norm": 0.7409243583679199, "learning_rate": 2.9064768065013274e-05, "loss": 0.1066, "step": 7883 }, { "epoch": 0.1396219899321282, "grad_norm": 0.7873639464378357, "learning_rate": 2.906446899837689e-05, "loss": 0.1157, "step": 7884 }, { "epoch": 0.13963969946915664, "grad_norm": 0.9309523105621338, "learning_rate": 2.9064169885469822e-05, "loss": 0.1229, "step": 7885 }, { "epoch": 0.13965740900618506, "grad_norm": 1.5560705661773682, "learning_rate": 2.906387072629306e-05, "loss": 0.1082, "step": 7886 }, { "epoch": 0.1396751185432135, "grad_norm": 1.2834570407867432, "learning_rate": 2.9063571520847577e-05, "loss": 0.1195, "step": 7887 }, { "epoch": 0.13969282808024192, "grad_norm": 1.1248193979263306, "learning_rate": 2.9063272269134375e-05, "loss": 0.1338, "step": 7888 }, { "epoch": 0.13971053761727034, "grad_norm": 1.0230669975280762, "learning_rate": 2.9062972971154427e-05, "loss": 0.0922, "step": 7889 }, { "epoch": 0.13972824715429877, "grad_norm": 0.9744355082511902, "learning_rate": 2.9062673626908723e-05, "loss": 0.0879, "step": 7890 }, { "epoch": 0.1397459566913272, "grad_norm": 0.8603931069374084, "learning_rate": 2.9062374236398243e-05, "loss": 0.1349, "step": 7891 }, { "epoch": 0.13976366622835562, "grad_norm": 0.7894555330276489, "learning_rate": 2.906207479962397e-05, "loss": 0.0692, "step": 7892 }, { "epoch": 0.13978137576538405, "grad_norm": 1.2746984958648682, "learning_rate": 2.90617753165869e-05, "loss": 0.0993, "step": 7893 }, { "epoch": 0.13979908530241247, "grad_norm": 1.467592477798462, "learning_rate": 2.9061475787288008e-05, "loss": 0.1485, "step": 7894 }, { "epoch": 0.1398167948394409, "grad_norm": 1.2025614976882935, "learning_rate": 2.9061176211728282e-05, "loss": 0.1091, "step": 7895 }, { "epoch": 0.13983450437646933, "grad_norm": 0.991912841796875, "learning_rate": 2.9060876589908713e-05, "loss": 0.1257, "step": 7896 }, { "epoch": 0.13985221391349775, "grad_norm": 1.4983469247817993, "learning_rate": 2.906057692183028e-05, "loss": 0.1385, "step": 7897 }, { "epoch": 0.1398699234505262, "grad_norm": 1.0255334377288818, "learning_rate": 2.9060277207493972e-05, "loss": 0.1352, "step": 7898 }, { "epoch": 0.13988763298755463, "grad_norm": 0.925347626209259, "learning_rate": 2.905997744690077e-05, "loss": 0.1324, "step": 7899 }, { "epoch": 0.13990534252458306, "grad_norm": 1.857517957687378, "learning_rate": 2.9059677640051664e-05, "loss": 0.1478, "step": 7900 }, { "epoch": 0.13992305206161149, "grad_norm": 0.643042266368866, "learning_rate": 2.9059377786947644e-05, "loss": 0.1253, "step": 7901 }, { "epoch": 0.1399407615986399, "grad_norm": 1.06086003780365, "learning_rate": 2.9059077887589693e-05, "loss": 0.1179, "step": 7902 }, { "epoch": 0.13995847113566834, "grad_norm": 0.703086793422699, "learning_rate": 2.9058777941978796e-05, "loss": 0.0983, "step": 7903 }, { "epoch": 0.13997618067269677, "grad_norm": 0.7177636027336121, "learning_rate": 2.9058477950115942e-05, "loss": 0.1041, "step": 7904 }, { "epoch": 0.1399938902097252, "grad_norm": 1.701303243637085, "learning_rate": 2.905817791200212e-05, "loss": 0.1623, "step": 7905 }, { "epoch": 0.14001159974675362, "grad_norm": 1.2237839698791504, "learning_rate": 2.905787782763831e-05, "loss": 0.1534, "step": 7906 }, { "epoch": 0.14002930928378204, "grad_norm": 0.8002790212631226, "learning_rate": 2.9057577697025508e-05, "loss": 0.1949, "step": 7907 }, { "epoch": 0.14004701882081047, "grad_norm": 0.7463768720626831, "learning_rate": 2.9057277520164692e-05, "loss": 0.1062, "step": 7908 }, { "epoch": 0.1400647283578389, "grad_norm": 1.0304807424545288, "learning_rate": 2.9056977297056864e-05, "loss": 0.1141, "step": 7909 }, { "epoch": 0.14008243789486732, "grad_norm": 2.0469884872436523, "learning_rate": 2.9056677027702995e-05, "loss": 0.0837, "step": 7910 }, { "epoch": 0.14010014743189575, "grad_norm": 0.9328092932701111, "learning_rate": 2.905637671210408e-05, "loss": 0.1177, "step": 7911 }, { "epoch": 0.1401178569689242, "grad_norm": 0.9557792544364929, "learning_rate": 2.9056076350261113e-05, "loss": 0.1442, "step": 7912 }, { "epoch": 0.14013556650595263, "grad_norm": 1.141753077507019, "learning_rate": 2.905577594217507e-05, "loss": 0.09, "step": 7913 }, { "epoch": 0.14015327604298106, "grad_norm": 0.8922107815742493, "learning_rate": 2.905547548784695e-05, "loss": 0.1168, "step": 7914 }, { "epoch": 0.14017098558000948, "grad_norm": 0.8638213872909546, "learning_rate": 2.9055174987277735e-05, "loss": 0.1403, "step": 7915 }, { "epoch": 0.1401886951170379, "grad_norm": 0.9257450699806213, "learning_rate": 2.905487444046842e-05, "loss": 0.1192, "step": 7916 }, { "epoch": 0.14020640465406634, "grad_norm": 0.9639191031455994, "learning_rate": 2.9054573847419984e-05, "loss": 0.0943, "step": 7917 }, { "epoch": 0.14022411419109476, "grad_norm": 0.9557640552520752, "learning_rate": 2.9054273208133424e-05, "loss": 0.1187, "step": 7918 }, { "epoch": 0.1402418237281232, "grad_norm": 1.03947114944458, "learning_rate": 2.905397252260973e-05, "loss": 0.1291, "step": 7919 }, { "epoch": 0.14025953326515161, "grad_norm": 0.806777834892273, "learning_rate": 2.9053671790849882e-05, "loss": 0.0958, "step": 7920 }, { "epoch": 0.14027724280218004, "grad_norm": 1.013244390487671, "learning_rate": 2.9053371012854876e-05, "loss": 0.1189, "step": 7921 }, { "epoch": 0.14029495233920847, "grad_norm": 1.0239598751068115, "learning_rate": 2.9053070188625702e-05, "loss": 0.1562, "step": 7922 }, { "epoch": 0.1403126618762369, "grad_norm": 1.0566174983978271, "learning_rate": 2.9052769318163352e-05, "loss": 0.1585, "step": 7923 }, { "epoch": 0.14033037141326532, "grad_norm": 1.5699617862701416, "learning_rate": 2.905246840146881e-05, "loss": 0.1536, "step": 7924 }, { "epoch": 0.14034808095029375, "grad_norm": 0.7520493865013123, "learning_rate": 2.9052167438543068e-05, "loss": 0.0864, "step": 7925 }, { "epoch": 0.14036579048732217, "grad_norm": 1.406205415725708, "learning_rate": 2.9051866429387116e-05, "loss": 0.1192, "step": 7926 }, { "epoch": 0.14038350002435063, "grad_norm": 1.2680445909500122, "learning_rate": 2.905156537400195e-05, "loss": 0.1173, "step": 7927 }, { "epoch": 0.14040120956137905, "grad_norm": 1.0871870517730713, "learning_rate": 2.9051264272388546e-05, "loss": 0.1411, "step": 7928 }, { "epoch": 0.14041891909840748, "grad_norm": 1.0817983150482178, "learning_rate": 2.9050963124547906e-05, "loss": 0.1045, "step": 7929 }, { "epoch": 0.1404366286354359, "grad_norm": 0.6664702892303467, "learning_rate": 2.905066193048102e-05, "loss": 0.1081, "step": 7930 }, { "epoch": 0.14045433817246433, "grad_norm": 0.9952887892723083, "learning_rate": 2.905036069018888e-05, "loss": 0.1378, "step": 7931 }, { "epoch": 0.14047204770949276, "grad_norm": 1.0317072868347168, "learning_rate": 2.9050059403672473e-05, "loss": 0.0949, "step": 7932 }, { "epoch": 0.14048975724652119, "grad_norm": 0.9861408472061157, "learning_rate": 2.9049758070932788e-05, "loss": 0.127, "step": 7933 }, { "epoch": 0.1405074667835496, "grad_norm": 1.1281795501708984, "learning_rate": 2.9049456691970827e-05, "loss": 0.1435, "step": 7934 }, { "epoch": 0.14052517632057804, "grad_norm": 1.2847707271575928, "learning_rate": 2.9049155266787565e-05, "loss": 0.1101, "step": 7935 }, { "epoch": 0.14054288585760646, "grad_norm": 0.5829545855522156, "learning_rate": 2.9048853795384013e-05, "loss": 0.0718, "step": 7936 }, { "epoch": 0.1405605953946349, "grad_norm": 0.8135583996772766, "learning_rate": 2.9048552277761145e-05, "loss": 0.0932, "step": 7937 }, { "epoch": 0.14057830493166332, "grad_norm": 0.844679057598114, "learning_rate": 2.9048250713919965e-05, "loss": 0.0994, "step": 7938 }, { "epoch": 0.14059601446869174, "grad_norm": 0.9525856971740723, "learning_rate": 2.9047949103861463e-05, "loss": 0.1227, "step": 7939 }, { "epoch": 0.14061372400572017, "grad_norm": 1.0395786762237549, "learning_rate": 2.9047647447586624e-05, "loss": 0.0886, "step": 7940 }, { "epoch": 0.1406314335427486, "grad_norm": 1.1150470972061157, "learning_rate": 2.9047345745096447e-05, "loss": 0.1123, "step": 7941 }, { "epoch": 0.14064914307977705, "grad_norm": 0.932308554649353, "learning_rate": 2.904704399639193e-05, "loss": 0.1409, "step": 7942 }, { "epoch": 0.14066685261680548, "grad_norm": 1.2685269117355347, "learning_rate": 2.904674220147405e-05, "loss": 0.1828, "step": 7943 }, { "epoch": 0.1406845621538339, "grad_norm": 1.6885249614715576, "learning_rate": 2.904644036034381e-05, "loss": 0.1855, "step": 7944 }, { "epoch": 0.14070227169086233, "grad_norm": 1.2299121618270874, "learning_rate": 2.904613847300221e-05, "loss": 0.0932, "step": 7945 }, { "epoch": 0.14071998122789076, "grad_norm": 0.9994919896125793, "learning_rate": 2.9045836539450226e-05, "loss": 0.1796, "step": 7946 }, { "epoch": 0.14073769076491918, "grad_norm": 1.0480133295059204, "learning_rate": 2.9045534559688866e-05, "loss": 0.1022, "step": 7947 }, { "epoch": 0.1407554003019476, "grad_norm": 0.9135172963142395, "learning_rate": 2.9045232533719114e-05, "loss": 0.1566, "step": 7948 }, { "epoch": 0.14077310983897603, "grad_norm": 0.8166214823722839, "learning_rate": 2.904493046154197e-05, "loss": 0.1304, "step": 7949 }, { "epoch": 0.14079081937600446, "grad_norm": 1.0224370956420898, "learning_rate": 2.9044628343158424e-05, "loss": 0.111, "step": 7950 }, { "epoch": 0.1408085289130329, "grad_norm": 0.9029039740562439, "learning_rate": 2.9044326178569474e-05, "loss": 0.1509, "step": 7951 }, { "epoch": 0.14082623845006131, "grad_norm": 1.7680983543395996, "learning_rate": 2.904402396777611e-05, "loss": 0.1281, "step": 7952 }, { "epoch": 0.14084394798708974, "grad_norm": 1.2205514907836914, "learning_rate": 2.9043721710779324e-05, "loss": 0.1013, "step": 7953 }, { "epoch": 0.14086165752411817, "grad_norm": 0.8564586639404297, "learning_rate": 2.9043419407580117e-05, "loss": 0.1282, "step": 7954 }, { "epoch": 0.1408793670611466, "grad_norm": 1.272003173828125, "learning_rate": 2.904311705817948e-05, "loss": 0.13, "step": 7955 }, { "epoch": 0.14089707659817502, "grad_norm": 0.874859094619751, "learning_rate": 2.9042814662578412e-05, "loss": 0.1179, "step": 7956 }, { "epoch": 0.14091478613520347, "grad_norm": 0.8701591491699219, "learning_rate": 2.90425122207779e-05, "loss": 0.0936, "step": 7957 }, { "epoch": 0.1409324956722319, "grad_norm": 0.5582547783851624, "learning_rate": 2.9042209732778942e-05, "loss": 0.1371, "step": 7958 }, { "epoch": 0.14095020520926033, "grad_norm": 1.468030333518982, "learning_rate": 2.904190719858254e-05, "loss": 0.1603, "step": 7959 }, { "epoch": 0.14096791474628875, "grad_norm": 1.1276706457138062, "learning_rate": 2.904160461818968e-05, "loss": 0.1338, "step": 7960 }, { "epoch": 0.14098562428331718, "grad_norm": 1.0430576801300049, "learning_rate": 2.9041301991601358e-05, "loss": 0.1219, "step": 7961 }, { "epoch": 0.1410033338203456, "grad_norm": 1.1608171463012695, "learning_rate": 2.904099931881858e-05, "loss": 0.1604, "step": 7962 }, { "epoch": 0.14102104335737403, "grad_norm": 0.9272089600563049, "learning_rate": 2.904069659984233e-05, "loss": 0.1251, "step": 7963 }, { "epoch": 0.14103875289440246, "grad_norm": 1.1890273094177246, "learning_rate": 2.9040393834673606e-05, "loss": 0.113, "step": 7964 }, { "epoch": 0.14105646243143088, "grad_norm": 1.0626380443572998, "learning_rate": 2.9040091023313408e-05, "loss": 0.16, "step": 7965 }, { "epoch": 0.1410741719684593, "grad_norm": 0.9186572432518005, "learning_rate": 2.9039788165762735e-05, "loss": 0.0981, "step": 7966 }, { "epoch": 0.14109188150548774, "grad_norm": 0.7940202355384827, "learning_rate": 2.9039485262022576e-05, "loss": 0.1057, "step": 7967 }, { "epoch": 0.14110959104251616, "grad_norm": 0.8865506052970886, "learning_rate": 2.9039182312093927e-05, "loss": 0.0855, "step": 7968 }, { "epoch": 0.1411273005795446, "grad_norm": 1.0461993217468262, "learning_rate": 2.903887931597779e-05, "loss": 0.0912, "step": 7969 }, { "epoch": 0.14114501011657302, "grad_norm": 1.0604840517044067, "learning_rate": 2.9038576273675167e-05, "loss": 0.1189, "step": 7970 }, { "epoch": 0.14116271965360144, "grad_norm": 1.456807017326355, "learning_rate": 2.903827318518704e-05, "loss": 0.1635, "step": 7971 }, { "epoch": 0.1411804291906299, "grad_norm": 1.1923710107803345, "learning_rate": 2.9037970050514416e-05, "loss": 0.1297, "step": 7972 }, { "epoch": 0.14119813872765832, "grad_norm": 1.0015491247177124, "learning_rate": 2.9037666869658287e-05, "loss": 0.1204, "step": 7973 }, { "epoch": 0.14121584826468675, "grad_norm": 1.1365089416503906, "learning_rate": 2.903736364261966e-05, "loss": 0.1377, "step": 7974 }, { "epoch": 0.14123355780171518, "grad_norm": 1.000716209411621, "learning_rate": 2.9037060369399522e-05, "loss": 0.0984, "step": 7975 }, { "epoch": 0.1412512673387436, "grad_norm": 0.848635733127594, "learning_rate": 2.9036757049998876e-05, "loss": 0.118, "step": 7976 }, { "epoch": 0.14126897687577203, "grad_norm": 1.134724497795105, "learning_rate": 2.9036453684418722e-05, "loss": 0.1301, "step": 7977 }, { "epoch": 0.14128668641280046, "grad_norm": 1.4571845531463623, "learning_rate": 2.9036150272660056e-05, "loss": 0.1195, "step": 7978 }, { "epoch": 0.14130439594982888, "grad_norm": 0.8119239807128906, "learning_rate": 2.903584681472387e-05, "loss": 0.1324, "step": 7979 }, { "epoch": 0.1413221054868573, "grad_norm": 1.0482828617095947, "learning_rate": 2.9035543310611167e-05, "loss": 0.1634, "step": 7980 }, { "epoch": 0.14133981502388573, "grad_norm": 0.9188486933708191, "learning_rate": 2.9035239760322953e-05, "loss": 0.1018, "step": 7981 }, { "epoch": 0.14135752456091416, "grad_norm": 0.8906669020652771, "learning_rate": 2.903493616386021e-05, "loss": 0.1219, "step": 7982 }, { "epoch": 0.1413752340979426, "grad_norm": 1.1011340618133545, "learning_rate": 2.903463252122396e-05, "loss": 0.1206, "step": 7983 }, { "epoch": 0.141392943634971, "grad_norm": 0.8979130983352661, "learning_rate": 2.903432883241518e-05, "loss": 0.1085, "step": 7984 }, { "epoch": 0.14141065317199944, "grad_norm": 0.7825950384140015, "learning_rate": 2.9034025097434876e-05, "loss": 0.1132, "step": 7985 }, { "epoch": 0.14142836270902787, "grad_norm": 0.9651725888252258, "learning_rate": 2.903372131628405e-05, "loss": 0.1113, "step": 7986 }, { "epoch": 0.14144607224605632, "grad_norm": 1.0698344707489014, "learning_rate": 2.9033417488963703e-05, "loss": 0.1084, "step": 7987 }, { "epoch": 0.14146378178308475, "grad_norm": 1.530102252960205, "learning_rate": 2.903311361547483e-05, "loss": 0.1044, "step": 7988 }, { "epoch": 0.14148149132011317, "grad_norm": 1.254399061203003, "learning_rate": 2.9032809695818436e-05, "loss": 0.1177, "step": 7989 }, { "epoch": 0.1414992008571416, "grad_norm": 0.8582084774971008, "learning_rate": 2.9032505729995515e-05, "loss": 0.1006, "step": 7990 }, { "epoch": 0.14151691039417003, "grad_norm": 1.0875121355056763, "learning_rate": 2.9032201718007067e-05, "loss": 0.1357, "step": 7991 }, { "epoch": 0.14153461993119845, "grad_norm": 0.8943343758583069, "learning_rate": 2.9031897659854096e-05, "loss": 0.158, "step": 7992 }, { "epoch": 0.14155232946822688, "grad_norm": 0.9438438415527344, "learning_rate": 2.90315935555376e-05, "loss": 0.1357, "step": 7993 }, { "epoch": 0.1415700390052553, "grad_norm": 0.7208147048950195, "learning_rate": 2.9031289405058586e-05, "loss": 0.0826, "step": 7994 }, { "epoch": 0.14158774854228373, "grad_norm": 1.1517881155014038, "learning_rate": 2.9030985208418043e-05, "loss": 0.0964, "step": 7995 }, { "epoch": 0.14160545807931216, "grad_norm": 1.151015043258667, "learning_rate": 2.9030680965616982e-05, "loss": 0.0824, "step": 7996 }, { "epoch": 0.14162316761634058, "grad_norm": 1.051890254020691, "learning_rate": 2.9030376676656393e-05, "loss": 0.0949, "step": 7997 }, { "epoch": 0.141640877153369, "grad_norm": 1.0903379917144775, "learning_rate": 2.9030072341537293e-05, "loss": 0.1063, "step": 7998 }, { "epoch": 0.14165858669039744, "grad_norm": 1.4467477798461914, "learning_rate": 2.9029767960260667e-05, "loss": 0.1162, "step": 7999 }, { "epoch": 0.14167629622742586, "grad_norm": 1.1496974229812622, "learning_rate": 2.9029463532827528e-05, "loss": 0.114, "step": 8000 }, { "epoch": 0.1416940057644543, "grad_norm": 1.2646324634552002, "learning_rate": 2.9029159059238866e-05, "loss": 0.1476, "step": 8001 }, { "epoch": 0.14171171530148274, "grad_norm": 0.9574103951454163, "learning_rate": 2.90288545394957e-05, "loss": 0.0969, "step": 8002 }, { "epoch": 0.14172942483851117, "grad_norm": 1.107818603515625, "learning_rate": 2.9028549973599018e-05, "loss": 0.1014, "step": 8003 }, { "epoch": 0.1417471343755396, "grad_norm": 1.09075129032135, "learning_rate": 2.9028245361549817e-05, "loss": 0.1057, "step": 8004 }, { "epoch": 0.14176484391256802, "grad_norm": 0.9658418893814087, "learning_rate": 2.902794070334912e-05, "loss": 0.1228, "step": 8005 }, { "epoch": 0.14178255344959645, "grad_norm": 0.7314751744270325, "learning_rate": 2.9027635998997907e-05, "loss": 0.1186, "step": 8006 }, { "epoch": 0.14180026298662488, "grad_norm": 1.0292870998382568, "learning_rate": 2.9027331248497197e-05, "loss": 0.1354, "step": 8007 }, { "epoch": 0.1418179725236533, "grad_norm": 1.1478897333145142, "learning_rate": 2.902702645184798e-05, "loss": 0.1248, "step": 8008 }, { "epoch": 0.14183568206068173, "grad_norm": 1.0682549476623535, "learning_rate": 2.902672160905127e-05, "loss": 0.1107, "step": 8009 }, { "epoch": 0.14185339159771015, "grad_norm": 0.8561519980430603, "learning_rate": 2.9026416720108063e-05, "loss": 0.1394, "step": 8010 }, { "epoch": 0.14187110113473858, "grad_norm": 0.9906296133995056, "learning_rate": 2.902611178501936e-05, "loss": 0.1328, "step": 8011 }, { "epoch": 0.141888810671767, "grad_norm": 1.0557475090026855, "learning_rate": 2.9025806803786175e-05, "loss": 0.1102, "step": 8012 }, { "epoch": 0.14190652020879543, "grad_norm": 0.7611133456230164, "learning_rate": 2.90255017764095e-05, "loss": 0.0839, "step": 8013 }, { "epoch": 0.14192422974582386, "grad_norm": 0.8889872431755066, "learning_rate": 2.9025196702890346e-05, "loss": 0.1353, "step": 8014 }, { "epoch": 0.1419419392828523, "grad_norm": 1.1328214406967163, "learning_rate": 2.9024891583229705e-05, "loss": 0.1159, "step": 8015 }, { "epoch": 0.1419596488198807, "grad_norm": 1.0205551385879517, "learning_rate": 2.90245864174286e-05, "loss": 0.0813, "step": 8016 }, { "epoch": 0.14197735835690917, "grad_norm": 1.305302381515503, "learning_rate": 2.902428120548802e-05, "loss": 0.1244, "step": 8017 }, { "epoch": 0.1419950678939376, "grad_norm": 1.715816855430603, "learning_rate": 2.902397594740897e-05, "loss": 0.1048, "step": 8018 }, { "epoch": 0.14201277743096602, "grad_norm": 1.232552170753479, "learning_rate": 2.9023670643192462e-05, "loss": 0.0944, "step": 8019 }, { "epoch": 0.14203048696799445, "grad_norm": 0.6838304996490479, "learning_rate": 2.902336529283949e-05, "loss": 0.1196, "step": 8020 }, { "epoch": 0.14204819650502287, "grad_norm": 1.044907808303833, "learning_rate": 2.902305989635107e-05, "loss": 0.1164, "step": 8021 }, { "epoch": 0.1420659060420513, "grad_norm": 1.45291006565094, "learning_rate": 2.90227544537282e-05, "loss": 0.1226, "step": 8022 }, { "epoch": 0.14208361557907973, "grad_norm": 0.8370017409324646, "learning_rate": 2.9022448964971887e-05, "loss": 0.1046, "step": 8023 }, { "epoch": 0.14210132511610815, "grad_norm": 1.2004116773605347, "learning_rate": 2.9022143430083133e-05, "loss": 0.117, "step": 8024 }, { "epoch": 0.14211903465313658, "grad_norm": 1.0377328395843506, "learning_rate": 2.9021837849062948e-05, "loss": 0.0874, "step": 8025 }, { "epoch": 0.142136744190165, "grad_norm": 1.6081435680389404, "learning_rate": 2.9021532221912333e-05, "loss": 0.1661, "step": 8026 }, { "epoch": 0.14215445372719343, "grad_norm": 0.8247132897377014, "learning_rate": 2.9021226548632297e-05, "loss": 0.0881, "step": 8027 }, { "epoch": 0.14217216326422186, "grad_norm": 1.0222700834274292, "learning_rate": 2.902092082922384e-05, "loss": 0.1415, "step": 8028 }, { "epoch": 0.14218987280125028, "grad_norm": 1.021789312362671, "learning_rate": 2.9020615063687976e-05, "loss": 0.0724, "step": 8029 }, { "epoch": 0.1422075823382787, "grad_norm": 1.0407458543777466, "learning_rate": 2.9020309252025704e-05, "loss": 0.1047, "step": 8030 }, { "epoch": 0.14222529187530714, "grad_norm": 1.311490774154663, "learning_rate": 2.9020003394238033e-05, "loss": 0.1333, "step": 8031 }, { "epoch": 0.1422430014123356, "grad_norm": 1.0377519130706787, "learning_rate": 2.9019697490325968e-05, "loss": 0.1172, "step": 8032 }, { "epoch": 0.14226071094936402, "grad_norm": 1.0364668369293213, "learning_rate": 2.901939154029052e-05, "loss": 0.0931, "step": 8033 }, { "epoch": 0.14227842048639244, "grad_norm": 0.9819226861000061, "learning_rate": 2.9019085544132685e-05, "loss": 0.1259, "step": 8034 }, { "epoch": 0.14229613002342087, "grad_norm": 1.7423222064971924, "learning_rate": 2.9018779501853482e-05, "loss": 0.1132, "step": 8035 }, { "epoch": 0.1423138395604493, "grad_norm": 0.7484779953956604, "learning_rate": 2.901847341345391e-05, "loss": 0.1267, "step": 8036 }, { "epoch": 0.14233154909747772, "grad_norm": 1.1039915084838867, "learning_rate": 2.901816727893498e-05, "loss": 0.1466, "step": 8037 }, { "epoch": 0.14234925863450615, "grad_norm": 1.019748568534851, "learning_rate": 2.9017861098297695e-05, "loss": 0.0873, "step": 8038 }, { "epoch": 0.14236696817153457, "grad_norm": 1.2386237382888794, "learning_rate": 2.9017554871543068e-05, "loss": 0.1168, "step": 8039 }, { "epoch": 0.142384677708563, "grad_norm": 0.8439168930053711, "learning_rate": 2.9017248598672105e-05, "loss": 0.1082, "step": 8040 }, { "epoch": 0.14240238724559143, "grad_norm": 0.7332102060317993, "learning_rate": 2.901694227968581e-05, "loss": 0.0874, "step": 8041 }, { "epoch": 0.14242009678261985, "grad_norm": 1.04574716091156, "learning_rate": 2.9016635914585185e-05, "loss": 0.1671, "step": 8042 }, { "epoch": 0.14243780631964828, "grad_norm": 1.3358887434005737, "learning_rate": 2.9016329503371255e-05, "loss": 0.1287, "step": 8043 }, { "epoch": 0.1424555158566767, "grad_norm": 0.7228671312332153, "learning_rate": 2.9016023046045014e-05, "loss": 0.1513, "step": 8044 }, { "epoch": 0.14247322539370513, "grad_norm": 0.7928351163864136, "learning_rate": 2.9015716542607478e-05, "loss": 0.1072, "step": 8045 }, { "epoch": 0.1424909349307336, "grad_norm": 0.7599784135818481, "learning_rate": 2.9015409993059653e-05, "loss": 0.1331, "step": 8046 }, { "epoch": 0.142508644467762, "grad_norm": 1.275710940361023, "learning_rate": 2.9015103397402544e-05, "loss": 0.0964, "step": 8047 }, { "epoch": 0.14252635400479044, "grad_norm": 0.8524417877197266, "learning_rate": 2.9014796755637158e-05, "loss": 0.1434, "step": 8048 }, { "epoch": 0.14254406354181887, "grad_norm": 1.1168478727340698, "learning_rate": 2.9014490067764514e-05, "loss": 0.1025, "step": 8049 }, { "epoch": 0.1425617730788473, "grad_norm": 1.3159105777740479, "learning_rate": 2.9014183333785616e-05, "loss": 0.1666, "step": 8050 }, { "epoch": 0.14257948261587572, "grad_norm": 0.6661510467529297, "learning_rate": 2.9013876553701465e-05, "loss": 0.0851, "step": 8051 }, { "epoch": 0.14259719215290415, "grad_norm": 0.7460588216781616, "learning_rate": 2.9013569727513086e-05, "loss": 0.1421, "step": 8052 }, { "epoch": 0.14261490168993257, "grad_norm": 1.0442168712615967, "learning_rate": 2.9013262855221477e-05, "loss": 0.1541, "step": 8053 }, { "epoch": 0.142632611226961, "grad_norm": 1.2151970863342285, "learning_rate": 2.901295593682765e-05, "loss": 0.1048, "step": 8054 }, { "epoch": 0.14265032076398942, "grad_norm": 0.9885944128036499, "learning_rate": 2.9012648972332615e-05, "loss": 0.0961, "step": 8055 }, { "epoch": 0.14266803030101785, "grad_norm": 0.94219970703125, "learning_rate": 2.9012341961737382e-05, "loss": 0.1422, "step": 8056 }, { "epoch": 0.14268573983804628, "grad_norm": 0.8433796167373657, "learning_rate": 2.9012034905042967e-05, "loss": 0.1269, "step": 8057 }, { "epoch": 0.1427034493750747, "grad_norm": 1.1920485496520996, "learning_rate": 2.9011727802250362e-05, "loss": 0.1407, "step": 8058 }, { "epoch": 0.14272115891210313, "grad_norm": 0.9593645930290222, "learning_rate": 2.90114206533606e-05, "loss": 0.1392, "step": 8059 }, { "epoch": 0.14273886844913156, "grad_norm": 1.7712819576263428, "learning_rate": 2.9011113458374678e-05, "loss": 0.1469, "step": 8060 }, { "epoch": 0.14275657798616, "grad_norm": 1.1278600692749023, "learning_rate": 2.9010806217293607e-05, "loss": 0.156, "step": 8061 }, { "epoch": 0.14277428752318844, "grad_norm": 1.8705618381500244, "learning_rate": 2.901049893011841e-05, "loss": 0.1367, "step": 8062 }, { "epoch": 0.14279199706021686, "grad_norm": 1.830032229423523, "learning_rate": 2.901019159685008e-05, "loss": 0.1256, "step": 8063 }, { "epoch": 0.1428097065972453, "grad_norm": 0.6587288975715637, "learning_rate": 2.900988421748964e-05, "loss": 0.0847, "step": 8064 }, { "epoch": 0.14282741613427372, "grad_norm": 1.0559090375900269, "learning_rate": 2.9009576792038095e-05, "loss": 0.1391, "step": 8065 }, { "epoch": 0.14284512567130214, "grad_norm": 1.092610239982605, "learning_rate": 2.900926932049646e-05, "loss": 0.136, "step": 8066 }, { "epoch": 0.14286283520833057, "grad_norm": 1.097941279411316, "learning_rate": 2.900896180286575e-05, "loss": 0.1646, "step": 8067 }, { "epoch": 0.142880544745359, "grad_norm": 0.6502113938331604, "learning_rate": 2.900865423914697e-05, "loss": 0.0871, "step": 8068 }, { "epoch": 0.14289825428238742, "grad_norm": 0.8621475696563721, "learning_rate": 2.9008346629341133e-05, "loss": 0.1526, "step": 8069 }, { "epoch": 0.14291596381941585, "grad_norm": 0.8666287660598755, "learning_rate": 2.9008038973449255e-05, "loss": 0.1249, "step": 8070 }, { "epoch": 0.14293367335644427, "grad_norm": 1.295289397239685, "learning_rate": 2.9007731271472343e-05, "loss": 0.1478, "step": 8071 }, { "epoch": 0.1429513828934727, "grad_norm": 0.950835108757019, "learning_rate": 2.9007423523411415e-05, "loss": 0.0685, "step": 8072 }, { "epoch": 0.14296909243050113, "grad_norm": 1.208185076713562, "learning_rate": 2.900711572926748e-05, "loss": 0.1694, "step": 8073 }, { "epoch": 0.14298680196752955, "grad_norm": 0.8162720203399658, "learning_rate": 2.900680788904155e-05, "loss": 0.1282, "step": 8074 }, { "epoch": 0.14300451150455798, "grad_norm": 0.6050163507461548, "learning_rate": 2.9006500002734637e-05, "loss": 0.1032, "step": 8075 }, { "epoch": 0.14302222104158643, "grad_norm": 0.9320036172866821, "learning_rate": 2.9006192070347758e-05, "loss": 0.127, "step": 8076 }, { "epoch": 0.14303993057861486, "grad_norm": 3.442843198776245, "learning_rate": 2.9005884091881922e-05, "loss": 0.1289, "step": 8077 }, { "epoch": 0.1430576401156433, "grad_norm": 1.0660780668258667, "learning_rate": 2.9005576067338143e-05, "loss": 0.143, "step": 8078 }, { "epoch": 0.1430753496526717, "grad_norm": 0.8082324862480164, "learning_rate": 2.9005267996717436e-05, "loss": 0.0972, "step": 8079 }, { "epoch": 0.14309305918970014, "grad_norm": 1.237259864807129, "learning_rate": 2.9004959880020817e-05, "loss": 0.139, "step": 8080 }, { "epoch": 0.14311076872672857, "grad_norm": 1.065075159072876, "learning_rate": 2.9004651717249295e-05, "loss": 0.09, "step": 8081 }, { "epoch": 0.143128478263757, "grad_norm": 1.3291616439819336, "learning_rate": 2.9004343508403887e-05, "loss": 0.1105, "step": 8082 }, { "epoch": 0.14314618780078542, "grad_norm": 1.6923927068710327, "learning_rate": 2.9004035253485602e-05, "loss": 0.1808, "step": 8083 }, { "epoch": 0.14316389733781384, "grad_norm": 0.9295291304588318, "learning_rate": 2.9003726952495457e-05, "loss": 0.1344, "step": 8084 }, { "epoch": 0.14318160687484227, "grad_norm": 1.152395248413086, "learning_rate": 2.9003418605434465e-05, "loss": 0.1304, "step": 8085 }, { "epoch": 0.1431993164118707, "grad_norm": 1.6562778949737549, "learning_rate": 2.900311021230365e-05, "loss": 0.1512, "step": 8086 }, { "epoch": 0.14321702594889912, "grad_norm": 1.0514326095581055, "learning_rate": 2.9002801773104012e-05, "loss": 0.105, "step": 8087 }, { "epoch": 0.14323473548592755, "grad_norm": 1.0724444389343262, "learning_rate": 2.9002493287836574e-05, "loss": 0.1216, "step": 8088 }, { "epoch": 0.14325244502295598, "grad_norm": 0.9820640087127686, "learning_rate": 2.9002184756502348e-05, "loss": 0.1602, "step": 8089 }, { "epoch": 0.1432701545599844, "grad_norm": 1.1484674215316772, "learning_rate": 2.9001876179102357e-05, "loss": 0.1358, "step": 8090 }, { "epoch": 0.14328786409701286, "grad_norm": 1.1774914264678955, "learning_rate": 2.90015675556376e-05, "loss": 0.1419, "step": 8091 }, { "epoch": 0.14330557363404128, "grad_norm": 1.345078706741333, "learning_rate": 2.9001258886109106e-05, "loss": 0.1219, "step": 8092 }, { "epoch": 0.1433232831710697, "grad_norm": 1.0128695964813232, "learning_rate": 2.900095017051789e-05, "loss": 0.1229, "step": 8093 }, { "epoch": 0.14334099270809814, "grad_norm": 1.320044755935669, "learning_rate": 2.900064140886496e-05, "loss": 0.141, "step": 8094 }, { "epoch": 0.14335870224512656, "grad_norm": 0.9047884345054626, "learning_rate": 2.900033260115134e-05, "loss": 0.0899, "step": 8095 }, { "epoch": 0.143376411782155, "grad_norm": 1.4308347702026367, "learning_rate": 2.9000023747378033e-05, "loss": 0.1434, "step": 8096 }, { "epoch": 0.14339412131918342, "grad_norm": 1.5203279256820679, "learning_rate": 2.8999714847546074e-05, "loss": 0.1481, "step": 8097 }, { "epoch": 0.14341183085621184, "grad_norm": 1.0776952505111694, "learning_rate": 2.8999405901656465e-05, "loss": 0.1127, "step": 8098 }, { "epoch": 0.14342954039324027, "grad_norm": 0.8281843662261963, "learning_rate": 2.8999096909710228e-05, "loss": 0.1091, "step": 8099 }, { "epoch": 0.1434472499302687, "grad_norm": 0.8207547068595886, "learning_rate": 2.899878787170837e-05, "loss": 0.132, "step": 8100 }, { "epoch": 0.14346495946729712, "grad_norm": 1.2131718397140503, "learning_rate": 2.8998478787651926e-05, "loss": 0.1093, "step": 8101 }, { "epoch": 0.14348266900432555, "grad_norm": 1.3807148933410645, "learning_rate": 2.89981696575419e-05, "loss": 0.1351, "step": 8102 }, { "epoch": 0.14350037854135397, "grad_norm": 0.9265442490577698, "learning_rate": 2.8997860481379306e-05, "loss": 0.1049, "step": 8103 }, { "epoch": 0.1435180880783824, "grad_norm": 1.9624266624450684, "learning_rate": 2.8997551259165174e-05, "loss": 0.1618, "step": 8104 }, { "epoch": 0.14353579761541083, "grad_norm": 1.1749423742294312, "learning_rate": 2.8997241990900514e-05, "loss": 0.1025, "step": 8105 }, { "epoch": 0.14355350715243928, "grad_norm": 0.8194381594657898, "learning_rate": 2.899693267658634e-05, "loss": 0.1151, "step": 8106 }, { "epoch": 0.1435712166894677, "grad_norm": 1.1816215515136719, "learning_rate": 2.8996623316223673e-05, "loss": 0.1439, "step": 8107 }, { "epoch": 0.14358892622649613, "grad_norm": 1.0400136709213257, "learning_rate": 2.8996313909813533e-05, "loss": 0.1171, "step": 8108 }, { "epoch": 0.14360663576352456, "grad_norm": 1.4757970571517944, "learning_rate": 2.8996004457356937e-05, "loss": 0.1512, "step": 8109 }, { "epoch": 0.14362434530055299, "grad_norm": 0.6995194554328918, "learning_rate": 2.8995694958854897e-05, "loss": 0.1021, "step": 8110 }, { "epoch": 0.1436420548375814, "grad_norm": 1.0812785625457764, "learning_rate": 2.899538541430844e-05, "loss": 0.1102, "step": 8111 }, { "epoch": 0.14365976437460984, "grad_norm": 1.3218814134597778, "learning_rate": 2.8995075823718583e-05, "loss": 0.1606, "step": 8112 }, { "epoch": 0.14367747391163826, "grad_norm": 1.6819089651107788, "learning_rate": 2.8994766187086335e-05, "loss": 0.1444, "step": 8113 }, { "epoch": 0.1436951834486667, "grad_norm": 1.0728224515914917, "learning_rate": 2.899445650441273e-05, "loss": 0.0765, "step": 8114 }, { "epoch": 0.14371289298569512, "grad_norm": 1.3058111667633057, "learning_rate": 2.8994146775698773e-05, "loss": 0.1247, "step": 8115 }, { "epoch": 0.14373060252272354, "grad_norm": 0.9156899452209473, "learning_rate": 2.8993837000945487e-05, "loss": 0.1187, "step": 8116 }, { "epoch": 0.14374831205975197, "grad_norm": 1.489853024482727, "learning_rate": 2.8993527180153896e-05, "loss": 0.1722, "step": 8117 }, { "epoch": 0.1437660215967804, "grad_norm": 1.1449217796325684, "learning_rate": 2.8993217313325018e-05, "loss": 0.1322, "step": 8118 }, { "epoch": 0.14378373113380882, "grad_norm": 1.2031038999557495, "learning_rate": 2.8992907400459865e-05, "loss": 0.1251, "step": 8119 }, { "epoch": 0.14380144067083725, "grad_norm": 1.091045618057251, "learning_rate": 2.8992597441559464e-05, "loss": 0.1428, "step": 8120 }, { "epoch": 0.1438191502078657, "grad_norm": 0.8419917225837708, "learning_rate": 2.8992287436624835e-05, "loss": 0.1067, "step": 8121 }, { "epoch": 0.14383685974489413, "grad_norm": 1.140537142753601, "learning_rate": 2.8991977385656994e-05, "loss": 0.14, "step": 8122 }, { "epoch": 0.14385456928192256, "grad_norm": 0.880843460559845, "learning_rate": 2.8991667288656962e-05, "loss": 0.1253, "step": 8123 }, { "epoch": 0.14387227881895098, "grad_norm": 0.9409007430076599, "learning_rate": 2.8991357145625757e-05, "loss": 0.1452, "step": 8124 }, { "epoch": 0.1438899883559794, "grad_norm": 1.284814476966858, "learning_rate": 2.8991046956564407e-05, "loss": 0.119, "step": 8125 }, { "epoch": 0.14390769789300784, "grad_norm": 1.1417304277420044, "learning_rate": 2.8990736721473925e-05, "loss": 0.1568, "step": 8126 }, { "epoch": 0.14392540743003626, "grad_norm": 0.9785062074661255, "learning_rate": 2.899042644035534e-05, "loss": 0.1024, "step": 8127 }, { "epoch": 0.1439431169670647, "grad_norm": 1.1552097797393799, "learning_rate": 2.899011611320966e-05, "loss": 0.121, "step": 8128 }, { "epoch": 0.14396082650409311, "grad_norm": 1.0149019956588745, "learning_rate": 2.8989805740037916e-05, "loss": 0.0948, "step": 8129 }, { "epoch": 0.14397853604112154, "grad_norm": 0.8353173732757568, "learning_rate": 2.8989495320841126e-05, "loss": 0.1123, "step": 8130 }, { "epoch": 0.14399624557814997, "grad_norm": 0.7038736343383789, "learning_rate": 2.898918485562031e-05, "loss": 0.1138, "step": 8131 }, { "epoch": 0.1440139551151784, "grad_norm": 1.5956251621246338, "learning_rate": 2.8988874344376493e-05, "loss": 0.1402, "step": 8132 }, { "epoch": 0.14403166465220682, "grad_norm": 0.8532955050468445, "learning_rate": 2.898856378711069e-05, "loss": 0.0874, "step": 8133 }, { "epoch": 0.14404937418923525, "grad_norm": 1.2338606119155884, "learning_rate": 2.898825318382393e-05, "loss": 0.1727, "step": 8134 }, { "epoch": 0.14406708372626367, "grad_norm": 0.8859164118766785, "learning_rate": 2.8987942534517233e-05, "loss": 0.1032, "step": 8135 }, { "epoch": 0.14408479326329213, "grad_norm": 1.0930075645446777, "learning_rate": 2.898763183919162e-05, "loss": 0.0906, "step": 8136 }, { "epoch": 0.14410250280032055, "grad_norm": 1.1369438171386719, "learning_rate": 2.898732109784811e-05, "loss": 0.0739, "step": 8137 }, { "epoch": 0.14412021233734898, "grad_norm": 0.7857069969177246, "learning_rate": 2.898701031048773e-05, "loss": 0.0991, "step": 8138 }, { "epoch": 0.1441379218743774, "grad_norm": 1.0903449058532715, "learning_rate": 2.89866994771115e-05, "loss": 0.0946, "step": 8139 }, { "epoch": 0.14415563141140583, "grad_norm": 0.8447869420051575, "learning_rate": 2.8986388597720446e-05, "loss": 0.0876, "step": 8140 }, { "epoch": 0.14417334094843426, "grad_norm": 0.7675719261169434, "learning_rate": 2.8986077672315584e-05, "loss": 0.0979, "step": 8141 }, { "epoch": 0.14419105048546269, "grad_norm": 1.0732700824737549, "learning_rate": 2.898576670089794e-05, "loss": 0.0957, "step": 8142 }, { "epoch": 0.1442087600224911, "grad_norm": 0.8840409517288208, "learning_rate": 2.8985455683468543e-05, "loss": 0.1027, "step": 8143 }, { "epoch": 0.14422646955951954, "grad_norm": 1.521847128868103, "learning_rate": 2.898514462002841e-05, "loss": 0.1209, "step": 8144 }, { "epoch": 0.14424417909654796, "grad_norm": 0.5904234647750854, "learning_rate": 2.8984833510578566e-05, "loss": 0.0939, "step": 8145 }, { "epoch": 0.1442618886335764, "grad_norm": 1.6900639533996582, "learning_rate": 2.8984522355120034e-05, "loss": 0.1133, "step": 8146 }, { "epoch": 0.14427959817060482, "grad_norm": 1.1265336275100708, "learning_rate": 2.898421115365384e-05, "loss": 0.0704, "step": 8147 }, { "epoch": 0.14429730770763324, "grad_norm": 3.398137092590332, "learning_rate": 2.8983899906181e-05, "loss": 0.1288, "step": 8148 }, { "epoch": 0.14431501724466167, "grad_norm": 0.9196200966835022, "learning_rate": 2.8983588612702548e-05, "loss": 0.11, "step": 8149 }, { "epoch": 0.1443327267816901, "grad_norm": 0.8657050132751465, "learning_rate": 2.8983277273219504e-05, "loss": 0.0791, "step": 8150 }, { "epoch": 0.14435043631871855, "grad_norm": 1.1208174228668213, "learning_rate": 2.8982965887732888e-05, "loss": 0.0933, "step": 8151 }, { "epoch": 0.14436814585574698, "grad_norm": 0.9883320331573486, "learning_rate": 2.898265445624373e-05, "loss": 0.1444, "step": 8152 }, { "epoch": 0.1443858553927754, "grad_norm": 1.1595757007598877, "learning_rate": 2.898234297875306e-05, "loss": 0.1187, "step": 8153 }, { "epoch": 0.14440356492980383, "grad_norm": 1.9901782274246216, "learning_rate": 2.898203145526189e-05, "loss": 0.1471, "step": 8154 }, { "epoch": 0.14442127446683226, "grad_norm": 1.1330935955047607, "learning_rate": 2.8981719885771246e-05, "loss": 0.1425, "step": 8155 }, { "epoch": 0.14443898400386068, "grad_norm": 1.3743085861206055, "learning_rate": 2.8981408270282168e-05, "loss": 0.1428, "step": 8156 }, { "epoch": 0.1444566935408891, "grad_norm": 0.946162223815918, "learning_rate": 2.8981096608795663e-05, "loss": 0.1067, "step": 8157 }, { "epoch": 0.14447440307791753, "grad_norm": 0.5312217473983765, "learning_rate": 2.898078490131277e-05, "loss": 0.0478, "step": 8158 }, { "epoch": 0.14449211261494596, "grad_norm": 0.6925959587097168, "learning_rate": 2.8980473147834504e-05, "loss": 0.104, "step": 8159 }, { "epoch": 0.1445098221519744, "grad_norm": 0.9510782361030579, "learning_rate": 2.8980161348361897e-05, "loss": 0.1055, "step": 8160 }, { "epoch": 0.14452753168900281, "grad_norm": 0.8108930587768555, "learning_rate": 2.8979849502895974e-05, "loss": 0.0984, "step": 8161 }, { "epoch": 0.14454524122603124, "grad_norm": 0.9820871353149414, "learning_rate": 2.897953761143776e-05, "loss": 0.1102, "step": 8162 }, { "epoch": 0.14456295076305967, "grad_norm": 0.9778751730918884, "learning_rate": 2.897922567398828e-05, "loss": 0.1112, "step": 8163 }, { "epoch": 0.1445806603000881, "grad_norm": 1.1021993160247803, "learning_rate": 2.897891369054856e-05, "loss": 0.1054, "step": 8164 }, { "epoch": 0.14459836983711652, "grad_norm": 0.8594469428062439, "learning_rate": 2.8978601661119634e-05, "loss": 0.0979, "step": 8165 }, { "epoch": 0.14461607937414497, "grad_norm": 1.577412724494934, "learning_rate": 2.8978289585702517e-05, "loss": 0.1353, "step": 8166 }, { "epoch": 0.1446337889111734, "grad_norm": 1.0638254880905151, "learning_rate": 2.8977977464298243e-05, "loss": 0.1216, "step": 8167 }, { "epoch": 0.14465149844820183, "grad_norm": 1.3899600505828857, "learning_rate": 2.8977665296907836e-05, "loss": 0.1138, "step": 8168 }, { "epoch": 0.14466920798523025, "grad_norm": 0.9166818857192993, "learning_rate": 2.8977353083532322e-05, "loss": 0.0879, "step": 8169 }, { "epoch": 0.14468691752225868, "grad_norm": 1.1359878778457642, "learning_rate": 2.8977040824172734e-05, "loss": 0.1318, "step": 8170 }, { "epoch": 0.1447046270592871, "grad_norm": 1.4436243772506714, "learning_rate": 2.8976728518830093e-05, "loss": 0.1704, "step": 8171 }, { "epoch": 0.14472233659631553, "grad_norm": 1.2840827703475952, "learning_rate": 2.897641616750543e-05, "loss": 0.1062, "step": 8172 }, { "epoch": 0.14474004613334396, "grad_norm": 1.048804759979248, "learning_rate": 2.897610377019977e-05, "loss": 0.1111, "step": 8173 }, { "epoch": 0.14475775567037238, "grad_norm": 1.6906110048294067, "learning_rate": 2.897579132691414e-05, "loss": 0.1357, "step": 8174 }, { "epoch": 0.1447754652074008, "grad_norm": 0.8379197120666504, "learning_rate": 2.8975478837649578e-05, "loss": 0.1156, "step": 8175 }, { "epoch": 0.14479317474442924, "grad_norm": 1.1613620519638062, "learning_rate": 2.89751663024071e-05, "loss": 0.1252, "step": 8176 }, { "epoch": 0.14481088428145766, "grad_norm": 1.1109318733215332, "learning_rate": 2.897485372118774e-05, "loss": 0.1112, "step": 8177 }, { "epoch": 0.1448285938184861, "grad_norm": 1.0520563125610352, "learning_rate": 2.897454109399252e-05, "loss": 0.1631, "step": 8178 }, { "epoch": 0.14484630335551452, "grad_norm": 0.7479009032249451, "learning_rate": 2.8974228420822473e-05, "loss": 0.0899, "step": 8179 }, { "epoch": 0.14486401289254297, "grad_norm": 1.4460227489471436, "learning_rate": 2.897391570167863e-05, "loss": 0.1094, "step": 8180 }, { "epoch": 0.1448817224295714, "grad_norm": 0.7775472402572632, "learning_rate": 2.8973602936562018e-05, "loss": 0.121, "step": 8181 }, { "epoch": 0.14489943196659982, "grad_norm": 1.1402889490127563, "learning_rate": 2.8973290125473666e-05, "loss": 0.0887, "step": 8182 }, { "epoch": 0.14491714150362825, "grad_norm": 1.47563636302948, "learning_rate": 2.8972977268414606e-05, "loss": 0.1214, "step": 8183 }, { "epoch": 0.14493485104065668, "grad_norm": 0.9232064485549927, "learning_rate": 2.897266436538586e-05, "loss": 0.1166, "step": 8184 }, { "epoch": 0.1449525605776851, "grad_norm": 0.909173846244812, "learning_rate": 2.8972351416388464e-05, "loss": 0.131, "step": 8185 }, { "epoch": 0.14497027011471353, "grad_norm": 1.001694679260254, "learning_rate": 2.8972038421423442e-05, "loss": 0.1266, "step": 8186 }, { "epoch": 0.14498797965174196, "grad_norm": 1.0563912391662598, "learning_rate": 2.8971725380491826e-05, "loss": 0.1209, "step": 8187 }, { "epoch": 0.14500568918877038, "grad_norm": 1.2473170757293701, "learning_rate": 2.897141229359465e-05, "loss": 0.0809, "step": 8188 }, { "epoch": 0.1450233987257988, "grad_norm": 1.0837429761886597, "learning_rate": 2.897109916073294e-05, "loss": 0.146, "step": 8189 }, { "epoch": 0.14504110826282723, "grad_norm": 0.9085285663604736, "learning_rate": 2.8970785981907727e-05, "loss": 0.0987, "step": 8190 }, { "epoch": 0.14505881779985566, "grad_norm": 1.4781429767608643, "learning_rate": 2.8970472757120037e-05, "loss": 0.1418, "step": 8191 }, { "epoch": 0.1450765273368841, "grad_norm": 0.9943220019340515, "learning_rate": 2.897015948637091e-05, "loss": 0.1203, "step": 8192 }, { "epoch": 0.1450942368739125, "grad_norm": 0.7752689123153687, "learning_rate": 2.896984616966137e-05, "loss": 0.1127, "step": 8193 }, { "epoch": 0.14511194641094094, "grad_norm": 1.3182357549667358, "learning_rate": 2.8969532806992448e-05, "loss": 0.1297, "step": 8194 }, { "epoch": 0.1451296559479694, "grad_norm": 0.8618564605712891, "learning_rate": 2.8969219398365178e-05, "loss": 0.0929, "step": 8195 }, { "epoch": 0.14514736548499782, "grad_norm": 0.9638327956199646, "learning_rate": 2.8968905943780586e-05, "loss": 0.1405, "step": 8196 }, { "epoch": 0.14516507502202625, "grad_norm": 0.7653736472129822, "learning_rate": 2.8968592443239708e-05, "loss": 0.1202, "step": 8197 }, { "epoch": 0.14518278455905467, "grad_norm": 2.571924924850464, "learning_rate": 2.8968278896743574e-05, "loss": 0.1361, "step": 8198 }, { "epoch": 0.1452004940960831, "grad_norm": 1.3382463455200195, "learning_rate": 2.8967965304293217e-05, "loss": 0.1314, "step": 8199 }, { "epoch": 0.14521820363311153, "grad_norm": 0.9592819809913635, "learning_rate": 2.8967651665889666e-05, "loss": 0.0836, "step": 8200 }, { "epoch": 0.14523591317013995, "grad_norm": 0.8019886016845703, "learning_rate": 2.896733798153395e-05, "loss": 0.1193, "step": 8201 }, { "epoch": 0.14525362270716838, "grad_norm": 0.6556316614151001, "learning_rate": 2.8967024251227107e-05, "loss": 0.1467, "step": 8202 }, { "epoch": 0.1452713322441968, "grad_norm": 1.5144582986831665, "learning_rate": 2.896671047497017e-05, "loss": 0.1193, "step": 8203 }, { "epoch": 0.14528904178122523, "grad_norm": 0.7955426573753357, "learning_rate": 2.8966396652764162e-05, "loss": 0.1059, "step": 8204 }, { "epoch": 0.14530675131825366, "grad_norm": 1.4624392986297607, "learning_rate": 2.8966082784610127e-05, "loss": 0.1065, "step": 8205 }, { "epoch": 0.14532446085528208, "grad_norm": 0.9948825240135193, "learning_rate": 2.8965768870509088e-05, "loss": 0.1265, "step": 8206 }, { "epoch": 0.1453421703923105, "grad_norm": 0.9304824471473694, "learning_rate": 2.8965454910462088e-05, "loss": 0.0964, "step": 8207 }, { "epoch": 0.14535987992933894, "grad_norm": 0.9304362535476685, "learning_rate": 2.896514090447015e-05, "loss": 0.0904, "step": 8208 }, { "epoch": 0.14537758946636736, "grad_norm": 0.7452035546302795, "learning_rate": 2.896482685253431e-05, "loss": 0.1038, "step": 8209 }, { "epoch": 0.14539529900339582, "grad_norm": 0.7095679044723511, "learning_rate": 2.89645127546556e-05, "loss": 0.0804, "step": 8210 }, { "epoch": 0.14541300854042424, "grad_norm": 0.7587662935256958, "learning_rate": 2.896419861083506e-05, "loss": 0.1132, "step": 8211 }, { "epoch": 0.14543071807745267, "grad_norm": 1.4493119716644287, "learning_rate": 2.8963884421073717e-05, "loss": 0.1115, "step": 8212 }, { "epoch": 0.1454484276144811, "grad_norm": 1.1344372034072876, "learning_rate": 2.896357018537261e-05, "loss": 0.1279, "step": 8213 }, { "epoch": 0.14546613715150952, "grad_norm": 0.7838382124900818, "learning_rate": 2.8963255903732765e-05, "loss": 0.1396, "step": 8214 }, { "epoch": 0.14548384668853795, "grad_norm": 1.0080510377883911, "learning_rate": 2.896294157615522e-05, "loss": 0.1294, "step": 8215 }, { "epoch": 0.14550155622556638, "grad_norm": 0.5936316251754761, "learning_rate": 2.8962627202641015e-05, "loss": 0.0892, "step": 8216 }, { "epoch": 0.1455192657625948, "grad_norm": 0.7502555847167969, "learning_rate": 2.8962312783191174e-05, "loss": 0.0828, "step": 8217 }, { "epoch": 0.14553697529962323, "grad_norm": 1.4584909677505493, "learning_rate": 2.8961998317806737e-05, "loss": 0.1515, "step": 8218 }, { "epoch": 0.14555468483665165, "grad_norm": 1.0636661052703857, "learning_rate": 2.896168380648874e-05, "loss": 0.1275, "step": 8219 }, { "epoch": 0.14557239437368008, "grad_norm": 1.784081220626831, "learning_rate": 2.8961369249238214e-05, "loss": 0.1172, "step": 8220 }, { "epoch": 0.1455901039107085, "grad_norm": 0.9317235946655273, "learning_rate": 2.8961054646056197e-05, "loss": 0.0861, "step": 8221 }, { "epoch": 0.14560781344773693, "grad_norm": 0.8679249286651611, "learning_rate": 2.8960739996943717e-05, "loss": 0.1248, "step": 8222 }, { "epoch": 0.14562552298476536, "grad_norm": 0.7597274780273438, "learning_rate": 2.8960425301901817e-05, "loss": 0.1247, "step": 8223 }, { "epoch": 0.1456432325217938, "grad_norm": 0.9506083726882935, "learning_rate": 2.896011056093153e-05, "loss": 0.1029, "step": 8224 }, { "epoch": 0.14566094205882224, "grad_norm": 0.745732843875885, "learning_rate": 2.895979577403389e-05, "loss": 0.1288, "step": 8225 }, { "epoch": 0.14567865159585067, "grad_norm": 0.8333691954612732, "learning_rate": 2.8959480941209938e-05, "loss": 0.0966, "step": 8226 }, { "epoch": 0.1456963611328791, "grad_norm": 1.1886976957321167, "learning_rate": 2.8959166062460703e-05, "loss": 0.1357, "step": 8227 }, { "epoch": 0.14571407066990752, "grad_norm": 0.6383328437805176, "learning_rate": 2.8958851137787226e-05, "loss": 0.1205, "step": 8228 }, { "epoch": 0.14573178020693595, "grad_norm": 1.1127272844314575, "learning_rate": 2.8958536167190534e-05, "loss": 0.1087, "step": 8229 }, { "epoch": 0.14574948974396437, "grad_norm": 0.9705687165260315, "learning_rate": 2.895822115067168e-05, "loss": 0.1149, "step": 8230 }, { "epoch": 0.1457671992809928, "grad_norm": 1.5551371574401855, "learning_rate": 2.895790608823168e-05, "loss": 0.1216, "step": 8231 }, { "epoch": 0.14578490881802122, "grad_norm": 1.6912575960159302, "learning_rate": 2.8957590979871584e-05, "loss": 0.1537, "step": 8232 }, { "epoch": 0.14580261835504965, "grad_norm": 1.322267770767212, "learning_rate": 2.895727582559243e-05, "loss": 0.1084, "step": 8233 }, { "epoch": 0.14582032789207808, "grad_norm": 0.8602104187011719, "learning_rate": 2.8956960625395245e-05, "loss": 0.1051, "step": 8234 }, { "epoch": 0.1458380374291065, "grad_norm": 1.0754334926605225, "learning_rate": 2.8956645379281072e-05, "loss": 0.1031, "step": 8235 }, { "epoch": 0.14585574696613493, "grad_norm": 1.0918476581573486, "learning_rate": 2.8956330087250948e-05, "loss": 0.1346, "step": 8236 }, { "epoch": 0.14587345650316336, "grad_norm": 1.0904476642608643, "learning_rate": 2.8956014749305908e-05, "loss": 0.0795, "step": 8237 }, { "epoch": 0.14589116604019178, "grad_norm": 0.8066553473472595, "learning_rate": 2.8955699365446993e-05, "loss": 0.1434, "step": 8238 }, { "epoch": 0.1459088755772202, "grad_norm": 1.029081106185913, "learning_rate": 2.895538393567524e-05, "loss": 0.1563, "step": 8239 }, { "epoch": 0.14592658511424866, "grad_norm": 0.9704173803329468, "learning_rate": 2.8955068459991683e-05, "loss": 0.1311, "step": 8240 }, { "epoch": 0.1459442946512771, "grad_norm": 1.375266671180725, "learning_rate": 2.8954752938397365e-05, "loss": 0.1415, "step": 8241 }, { "epoch": 0.14596200418830552, "grad_norm": 0.7884438037872314, "learning_rate": 2.8954437370893318e-05, "loss": 0.0785, "step": 8242 }, { "epoch": 0.14597971372533394, "grad_norm": 1.250495195388794, "learning_rate": 2.8954121757480584e-05, "loss": 0.1059, "step": 8243 }, { "epoch": 0.14599742326236237, "grad_norm": 0.7164839506149292, "learning_rate": 2.8953806098160202e-05, "loss": 0.1571, "step": 8244 }, { "epoch": 0.1460151327993908, "grad_norm": 1.3216429948806763, "learning_rate": 2.8953490392933206e-05, "loss": 0.146, "step": 8245 }, { "epoch": 0.14603284233641922, "grad_norm": 1.011225938796997, "learning_rate": 2.8953174641800643e-05, "loss": 0.1213, "step": 8246 }, { "epoch": 0.14605055187344765, "grad_norm": 1.4293653964996338, "learning_rate": 2.8952858844763543e-05, "loss": 0.1288, "step": 8247 }, { "epoch": 0.14606826141047607, "grad_norm": 1.3807815313339233, "learning_rate": 2.8952543001822952e-05, "loss": 0.1274, "step": 8248 }, { "epoch": 0.1460859709475045, "grad_norm": 1.4905412197113037, "learning_rate": 2.8952227112979905e-05, "loss": 0.1269, "step": 8249 }, { "epoch": 0.14610368048453293, "grad_norm": 1.068044900894165, "learning_rate": 2.895191117823544e-05, "loss": 0.125, "step": 8250 }, { "epoch": 0.14612139002156135, "grad_norm": 0.5798987150192261, "learning_rate": 2.8951595197590596e-05, "loss": 0.1111, "step": 8251 }, { "epoch": 0.14613909955858978, "grad_norm": 1.0758224725723267, "learning_rate": 2.895127917104642e-05, "loss": 0.1978, "step": 8252 }, { "epoch": 0.1461568090956182, "grad_norm": 0.9017530083656311, "learning_rate": 2.8950963098603946e-05, "loss": 0.1069, "step": 8253 }, { "epoch": 0.14617451863264663, "grad_norm": 1.7433321475982666, "learning_rate": 2.8950646980264212e-05, "loss": 0.1393, "step": 8254 }, { "epoch": 0.1461922281696751, "grad_norm": 1.034582495689392, "learning_rate": 2.895033081602826e-05, "loss": 0.1361, "step": 8255 }, { "epoch": 0.1462099377067035, "grad_norm": 1.0349400043487549, "learning_rate": 2.8950014605897135e-05, "loss": 0.1046, "step": 8256 }, { "epoch": 0.14622764724373194, "grad_norm": 1.2726949453353882, "learning_rate": 2.8949698349871868e-05, "loss": 0.1465, "step": 8257 }, { "epoch": 0.14624535678076037, "grad_norm": 1.0797315835952759, "learning_rate": 2.8949382047953505e-05, "loss": 0.1164, "step": 8258 }, { "epoch": 0.1462630663177888, "grad_norm": 1.4919792413711548, "learning_rate": 2.894906570014309e-05, "loss": 0.1361, "step": 8259 }, { "epoch": 0.14628077585481722, "grad_norm": 0.9929150342941284, "learning_rate": 2.8948749306441657e-05, "loss": 0.0937, "step": 8260 }, { "epoch": 0.14629848539184565, "grad_norm": 1.5854295492172241, "learning_rate": 2.894843286685025e-05, "loss": 0.131, "step": 8261 }, { "epoch": 0.14631619492887407, "grad_norm": 1.3037383556365967, "learning_rate": 2.8948116381369912e-05, "loss": 0.0904, "step": 8262 }, { "epoch": 0.1463339044659025, "grad_norm": 1.1562681198120117, "learning_rate": 2.8947799850001674e-05, "loss": 0.1224, "step": 8263 }, { "epoch": 0.14635161400293092, "grad_norm": 0.6406860947608948, "learning_rate": 2.894748327274659e-05, "loss": 0.095, "step": 8264 }, { "epoch": 0.14636932353995935, "grad_norm": 0.8841346502304077, "learning_rate": 2.8947166649605698e-05, "loss": 0.1154, "step": 8265 }, { "epoch": 0.14638703307698778, "grad_norm": 0.9466145038604736, "learning_rate": 2.894684998058004e-05, "loss": 0.1051, "step": 8266 }, { "epoch": 0.1464047426140162, "grad_norm": 0.9524233937263489, "learning_rate": 2.8946533265670652e-05, "loss": 0.1461, "step": 8267 }, { "epoch": 0.14642245215104463, "grad_norm": 1.0010597705841064, "learning_rate": 2.8946216504878584e-05, "loss": 0.1373, "step": 8268 }, { "epoch": 0.14644016168807306, "grad_norm": 1.2914910316467285, "learning_rate": 2.894589969820487e-05, "loss": 0.1345, "step": 8269 }, { "epoch": 0.1464578712251015, "grad_norm": 0.8532371520996094, "learning_rate": 2.894558284565056e-05, "loss": 0.1468, "step": 8270 }, { "epoch": 0.14647558076212994, "grad_norm": 0.7134577035903931, "learning_rate": 2.8945265947216694e-05, "loss": 0.1003, "step": 8271 }, { "epoch": 0.14649329029915836, "grad_norm": 0.8924064040184021, "learning_rate": 2.8944949002904312e-05, "loss": 0.1496, "step": 8272 }, { "epoch": 0.1465109998361868, "grad_norm": 1.184678316116333, "learning_rate": 2.8944632012714454e-05, "loss": 0.1544, "step": 8273 }, { "epoch": 0.14652870937321522, "grad_norm": 1.1134393215179443, "learning_rate": 2.894431497664817e-05, "loss": 0.1163, "step": 8274 }, { "epoch": 0.14654641891024364, "grad_norm": 1.2541884183883667, "learning_rate": 2.8943997894706504e-05, "loss": 0.1198, "step": 8275 }, { "epoch": 0.14656412844727207, "grad_norm": 0.7763073444366455, "learning_rate": 2.894368076689049e-05, "loss": 0.0996, "step": 8276 }, { "epoch": 0.1465818379843005, "grad_norm": 1.1718740463256836, "learning_rate": 2.8943363593201178e-05, "loss": 0.0955, "step": 8277 }, { "epoch": 0.14659954752132892, "grad_norm": 1.031756043434143, "learning_rate": 2.8943046373639612e-05, "loss": 0.1069, "step": 8278 }, { "epoch": 0.14661725705835735, "grad_norm": 1.06956946849823, "learning_rate": 2.8942729108206832e-05, "loss": 0.1193, "step": 8279 }, { "epoch": 0.14663496659538577, "grad_norm": 1.6417251825332642, "learning_rate": 2.8942411796903883e-05, "loss": 0.1299, "step": 8280 }, { "epoch": 0.1466526761324142, "grad_norm": 0.7890477180480957, "learning_rate": 2.8942094439731814e-05, "loss": 0.1301, "step": 8281 }, { "epoch": 0.14667038566944263, "grad_norm": 0.7511901259422302, "learning_rate": 2.8941777036691656e-05, "loss": 0.1261, "step": 8282 }, { "epoch": 0.14668809520647105, "grad_norm": 0.9621019959449768, "learning_rate": 2.894145958778447e-05, "loss": 0.1649, "step": 8283 }, { "epoch": 0.14670580474349948, "grad_norm": 1.3186182975769043, "learning_rate": 2.8941142093011287e-05, "loss": 0.1105, "step": 8284 }, { "epoch": 0.14672351428052793, "grad_norm": 0.9635287523269653, "learning_rate": 2.8940824552373158e-05, "loss": 0.0751, "step": 8285 }, { "epoch": 0.14674122381755636, "grad_norm": 0.9280521273612976, "learning_rate": 2.8940506965871126e-05, "loss": 0.0858, "step": 8286 }, { "epoch": 0.1467589333545848, "grad_norm": 1.1860095262527466, "learning_rate": 2.8940189333506237e-05, "loss": 0.101, "step": 8287 }, { "epoch": 0.1467766428916132, "grad_norm": 0.871720016002655, "learning_rate": 2.8939871655279533e-05, "loss": 0.0884, "step": 8288 }, { "epoch": 0.14679435242864164, "grad_norm": 2.057413101196289, "learning_rate": 2.893955393119206e-05, "loss": 0.1173, "step": 8289 }, { "epoch": 0.14681206196567007, "grad_norm": 1.3919352293014526, "learning_rate": 2.8939236161244872e-05, "loss": 0.1282, "step": 8290 }, { "epoch": 0.1468297715026985, "grad_norm": 1.0574042797088623, "learning_rate": 2.8938918345438997e-05, "loss": 0.117, "step": 8291 }, { "epoch": 0.14684748103972692, "grad_norm": 1.01448392868042, "learning_rate": 2.8938600483775496e-05, "loss": 0.134, "step": 8292 }, { "epoch": 0.14686519057675534, "grad_norm": 1.1224066019058228, "learning_rate": 2.893828257625541e-05, "loss": 0.1242, "step": 8293 }, { "epoch": 0.14688290011378377, "grad_norm": 0.9692729115486145, "learning_rate": 2.8937964622879784e-05, "loss": 0.0961, "step": 8294 }, { "epoch": 0.1469006096508122, "grad_norm": 1.1326956748962402, "learning_rate": 2.8937646623649662e-05, "loss": 0.1161, "step": 8295 }, { "epoch": 0.14691831918784062, "grad_norm": 1.2091763019561768, "learning_rate": 2.8937328578566093e-05, "loss": 0.1216, "step": 8296 }, { "epoch": 0.14693602872486905, "grad_norm": 0.832278847694397, "learning_rate": 2.8937010487630124e-05, "loss": 0.1097, "step": 8297 }, { "epoch": 0.14695373826189748, "grad_norm": 1.059230089187622, "learning_rate": 2.8936692350842793e-05, "loss": 0.1179, "step": 8298 }, { "epoch": 0.1469714477989259, "grad_norm": 1.2813935279846191, "learning_rate": 2.8936374168205163e-05, "loss": 0.105, "step": 8299 }, { "epoch": 0.14698915733595436, "grad_norm": 0.8993552923202515, "learning_rate": 2.893605593971827e-05, "loss": 0.1243, "step": 8300 }, { "epoch": 0.14700686687298278, "grad_norm": 0.8473024964332581, "learning_rate": 2.8935737665383163e-05, "loss": 0.1251, "step": 8301 }, { "epoch": 0.1470245764100112, "grad_norm": 0.9734718203544617, "learning_rate": 2.893541934520088e-05, "loss": 0.1829, "step": 8302 }, { "epoch": 0.14704228594703964, "grad_norm": 1.3022699356079102, "learning_rate": 2.8935100979172486e-05, "loss": 0.1132, "step": 8303 }, { "epoch": 0.14705999548406806, "grad_norm": 0.9739146828651428, "learning_rate": 2.8934782567299016e-05, "loss": 0.1444, "step": 8304 }, { "epoch": 0.1470777050210965, "grad_norm": 1.083857774734497, "learning_rate": 2.893446410958152e-05, "loss": 0.1126, "step": 8305 }, { "epoch": 0.14709541455812492, "grad_norm": 1.2160931825637817, "learning_rate": 2.893414560602105e-05, "loss": 0.111, "step": 8306 }, { "epoch": 0.14711312409515334, "grad_norm": 0.8404926061630249, "learning_rate": 2.8933827056618648e-05, "loss": 0.1031, "step": 8307 }, { "epoch": 0.14713083363218177, "grad_norm": 0.9121972322463989, "learning_rate": 2.8933508461375362e-05, "loss": 0.1092, "step": 8308 }, { "epoch": 0.1471485431692102, "grad_norm": 1.1348559856414795, "learning_rate": 2.8933189820292245e-05, "loss": 0.0976, "step": 8309 }, { "epoch": 0.14716625270623862, "grad_norm": 1.1052851676940918, "learning_rate": 2.8932871133370345e-05, "loss": 0.1306, "step": 8310 }, { "epoch": 0.14718396224326705, "grad_norm": 0.7714388370513916, "learning_rate": 2.8932552400610707e-05, "loss": 0.1693, "step": 8311 }, { "epoch": 0.14720167178029547, "grad_norm": 0.8293941617012024, "learning_rate": 2.8932233622014377e-05, "loss": 0.1072, "step": 8312 }, { "epoch": 0.1472193813173239, "grad_norm": 0.9238527417182922, "learning_rate": 2.893191479758241e-05, "loss": 0.0852, "step": 8313 }, { "epoch": 0.14723709085435235, "grad_norm": 1.2317779064178467, "learning_rate": 2.8931595927315854e-05, "loss": 0.1172, "step": 8314 }, { "epoch": 0.14725480039138078, "grad_norm": 1.2356176376342773, "learning_rate": 2.8931277011215752e-05, "loss": 0.1096, "step": 8315 }, { "epoch": 0.1472725099284092, "grad_norm": 0.9456567168235779, "learning_rate": 2.893095804928316e-05, "loss": 0.0913, "step": 8316 }, { "epoch": 0.14729021946543763, "grad_norm": 0.9641491174697876, "learning_rate": 2.8930639041519126e-05, "loss": 0.1083, "step": 8317 }, { "epoch": 0.14730792900246606, "grad_norm": 1.0717676877975464, "learning_rate": 2.89303199879247e-05, "loss": 0.1171, "step": 8318 }, { "epoch": 0.14732563853949449, "grad_norm": 1.3443052768707275, "learning_rate": 2.893000088850093e-05, "loss": 0.1829, "step": 8319 }, { "epoch": 0.1473433480765229, "grad_norm": 1.971853256225586, "learning_rate": 2.8929681743248863e-05, "loss": 0.1838, "step": 8320 }, { "epoch": 0.14736105761355134, "grad_norm": 1.0450263023376465, "learning_rate": 2.892936255216955e-05, "loss": 0.1111, "step": 8321 }, { "epoch": 0.14737876715057976, "grad_norm": 1.0030750036239624, "learning_rate": 2.8929043315264048e-05, "loss": 0.0838, "step": 8322 }, { "epoch": 0.1473964766876082, "grad_norm": 0.8045002818107605, "learning_rate": 2.89287240325334e-05, "loss": 0.117, "step": 8323 }, { "epoch": 0.14741418622463662, "grad_norm": 0.8836508393287659, "learning_rate": 2.892840470397866e-05, "loss": 0.1014, "step": 8324 }, { "epoch": 0.14743189576166504, "grad_norm": 1.299026370048523, "learning_rate": 2.8928085329600876e-05, "loss": 0.1191, "step": 8325 }, { "epoch": 0.14744960529869347, "grad_norm": 0.989152193069458, "learning_rate": 2.8927765909401103e-05, "loss": 0.08, "step": 8326 }, { "epoch": 0.1474673148357219, "grad_norm": 0.947111189365387, "learning_rate": 2.8927446443380385e-05, "loss": 0.1481, "step": 8327 }, { "epoch": 0.14748502437275032, "grad_norm": 1.2436801195144653, "learning_rate": 2.8927126931539782e-05, "loss": 0.149, "step": 8328 }, { "epoch": 0.14750273390977878, "grad_norm": 1.123696208000183, "learning_rate": 2.8926807373880338e-05, "loss": 0.1249, "step": 8329 }, { "epoch": 0.1475204434468072, "grad_norm": 1.291167140007019, "learning_rate": 2.89264877704031e-05, "loss": 0.1548, "step": 8330 }, { "epoch": 0.14753815298383563, "grad_norm": 1.6513087749481201, "learning_rate": 2.8926168121109133e-05, "loss": 0.1242, "step": 8331 }, { "epoch": 0.14755586252086406, "grad_norm": 1.1561083793640137, "learning_rate": 2.892584842599948e-05, "loss": 0.1098, "step": 8332 }, { "epoch": 0.14757357205789248, "grad_norm": 1.254805088043213, "learning_rate": 2.8925528685075192e-05, "loss": 0.1465, "step": 8333 }, { "epoch": 0.1475912815949209, "grad_norm": 0.8978260159492493, "learning_rate": 2.8925208898337327e-05, "loss": 0.1146, "step": 8334 }, { "epoch": 0.14760899113194934, "grad_norm": 1.165871262550354, "learning_rate": 2.892488906578693e-05, "loss": 0.1008, "step": 8335 }, { "epoch": 0.14762670066897776, "grad_norm": 0.846491813659668, "learning_rate": 2.892456918742506e-05, "loss": 0.0992, "step": 8336 }, { "epoch": 0.1476444102060062, "grad_norm": 0.6066442728042603, "learning_rate": 2.892424926325276e-05, "loss": 0.1139, "step": 8337 }, { "epoch": 0.14766211974303461, "grad_norm": 0.9852542877197266, "learning_rate": 2.8923929293271092e-05, "loss": 0.1489, "step": 8338 }, { "epoch": 0.14767982928006304, "grad_norm": 1.1987037658691406, "learning_rate": 2.89236092774811e-05, "loss": 0.1138, "step": 8339 }, { "epoch": 0.14769753881709147, "grad_norm": 1.4870195388793945, "learning_rate": 2.8923289215883848e-05, "loss": 0.1369, "step": 8340 }, { "epoch": 0.1477152483541199, "grad_norm": 1.0431016683578491, "learning_rate": 2.892296910848038e-05, "loss": 0.1145, "step": 8341 }, { "epoch": 0.14773295789114832, "grad_norm": 1.3811109066009521, "learning_rate": 2.892264895527175e-05, "loss": 0.1383, "step": 8342 }, { "epoch": 0.14775066742817675, "grad_norm": 1.8585976362228394, "learning_rate": 2.8922328756259017e-05, "loss": 0.1171, "step": 8343 }, { "epoch": 0.1477683769652052, "grad_norm": 1.1697824001312256, "learning_rate": 2.892200851144323e-05, "loss": 0.1441, "step": 8344 }, { "epoch": 0.14778608650223363, "grad_norm": 1.8095420598983765, "learning_rate": 2.8921688220825442e-05, "loss": 0.0992, "step": 8345 }, { "epoch": 0.14780379603926205, "grad_norm": 0.9050758481025696, "learning_rate": 2.892136788440671e-05, "loss": 0.1139, "step": 8346 }, { "epoch": 0.14782150557629048, "grad_norm": 1.6033291816711426, "learning_rate": 2.8921047502188082e-05, "loss": 0.1007, "step": 8347 }, { "epoch": 0.1478392151133189, "grad_norm": 1.1137140989303589, "learning_rate": 2.8920727074170616e-05, "loss": 0.1132, "step": 8348 }, { "epoch": 0.14785692465034733, "grad_norm": 0.9865022897720337, "learning_rate": 2.8920406600355372e-05, "loss": 0.1087, "step": 8349 }, { "epoch": 0.14787463418737576, "grad_norm": 0.8724144101142883, "learning_rate": 2.892008608074339e-05, "loss": 0.0925, "step": 8350 }, { "epoch": 0.14789234372440418, "grad_norm": 1.3824340105056763, "learning_rate": 2.891976551533574e-05, "loss": 0.1229, "step": 8351 }, { "epoch": 0.1479100532614326, "grad_norm": 1.1404629945755005, "learning_rate": 2.8919444904133464e-05, "loss": 0.1294, "step": 8352 }, { "epoch": 0.14792776279846104, "grad_norm": 1.1419496536254883, "learning_rate": 2.8919124247137625e-05, "loss": 0.1521, "step": 8353 }, { "epoch": 0.14794547233548946, "grad_norm": 1.961695909500122, "learning_rate": 2.8918803544349274e-05, "loss": 0.1517, "step": 8354 }, { "epoch": 0.1479631818725179, "grad_norm": 1.926366925239563, "learning_rate": 2.8918482795769465e-05, "loss": 0.2236, "step": 8355 }, { "epoch": 0.14798089140954632, "grad_norm": 0.9718257784843445, "learning_rate": 2.8918162001399258e-05, "loss": 0.1348, "step": 8356 }, { "epoch": 0.14799860094657474, "grad_norm": 0.8059102892875671, "learning_rate": 2.891784116123971e-05, "loss": 0.0924, "step": 8357 }, { "epoch": 0.14801631048360317, "grad_norm": 1.5458104610443115, "learning_rate": 2.891752027529186e-05, "loss": 0.1674, "step": 8358 }, { "epoch": 0.14803402002063162, "grad_norm": 0.8034359812736511, "learning_rate": 2.891719934355679e-05, "loss": 0.0721, "step": 8359 }, { "epoch": 0.14805172955766005, "grad_norm": 1.0621273517608643, "learning_rate": 2.8916878366035534e-05, "loss": 0.1104, "step": 8360 }, { "epoch": 0.14806943909468848, "grad_norm": 1.3948748111724854, "learning_rate": 2.8916557342729154e-05, "loss": 0.1593, "step": 8361 }, { "epoch": 0.1480871486317169, "grad_norm": 0.9430567026138306, "learning_rate": 2.8916236273638714e-05, "loss": 0.0957, "step": 8362 }, { "epoch": 0.14810485816874533, "grad_norm": 1.2411750555038452, "learning_rate": 2.8915915158765263e-05, "loss": 0.1056, "step": 8363 }, { "epoch": 0.14812256770577376, "grad_norm": 0.5548197031021118, "learning_rate": 2.891559399810985e-05, "loss": 0.1264, "step": 8364 }, { "epoch": 0.14814027724280218, "grad_norm": 1.2325940132141113, "learning_rate": 2.891527279167355e-05, "loss": 0.0951, "step": 8365 }, { "epoch": 0.1481579867798306, "grad_norm": 0.8824803829193115, "learning_rate": 2.8914951539457408e-05, "loss": 0.0963, "step": 8366 }, { "epoch": 0.14817569631685903, "grad_norm": 1.1993663311004639, "learning_rate": 2.891463024146248e-05, "loss": 0.1188, "step": 8367 }, { "epoch": 0.14819340585388746, "grad_norm": 0.9175613522529602, "learning_rate": 2.891430889768983e-05, "loss": 0.1306, "step": 8368 }, { "epoch": 0.1482111153909159, "grad_norm": 1.6921789646148682, "learning_rate": 2.8913987508140505e-05, "loss": 0.0981, "step": 8369 }, { "epoch": 0.1482288249279443, "grad_norm": 1.1843442916870117, "learning_rate": 2.8913666072815578e-05, "loss": 0.1178, "step": 8370 }, { "epoch": 0.14824653446497274, "grad_norm": 1.0331841707229614, "learning_rate": 2.891334459171609e-05, "loss": 0.1099, "step": 8371 }, { "epoch": 0.14826424400200117, "grad_norm": 0.7860304713249207, "learning_rate": 2.8913023064843105e-05, "loss": 0.104, "step": 8372 }, { "epoch": 0.1482819535390296, "grad_norm": 0.7335470914840698, "learning_rate": 2.8912701492197684e-05, "loss": 0.0988, "step": 8373 }, { "epoch": 0.14829966307605805, "grad_norm": 0.9674996137619019, "learning_rate": 2.8912379873780883e-05, "loss": 0.1283, "step": 8374 }, { "epoch": 0.14831737261308647, "grad_norm": 1.078711748123169, "learning_rate": 2.8912058209593755e-05, "loss": 0.0964, "step": 8375 }, { "epoch": 0.1483350821501149, "grad_norm": 1.1103872060775757, "learning_rate": 2.8911736499637364e-05, "loss": 0.1368, "step": 8376 }, { "epoch": 0.14835279168714333, "grad_norm": 0.7709578275680542, "learning_rate": 2.891141474391277e-05, "loss": 0.1144, "step": 8377 }, { "epoch": 0.14837050122417175, "grad_norm": 1.157982349395752, "learning_rate": 2.8911092942421027e-05, "loss": 0.1344, "step": 8378 }, { "epoch": 0.14838821076120018, "grad_norm": 1.19827139377594, "learning_rate": 2.8910771095163195e-05, "loss": 0.1051, "step": 8379 }, { "epoch": 0.1484059202982286, "grad_norm": 1.2937114238739014, "learning_rate": 2.891044920214033e-05, "loss": 0.127, "step": 8380 }, { "epoch": 0.14842362983525703, "grad_norm": 2.365654945373535, "learning_rate": 2.8910127263353504e-05, "loss": 0.147, "step": 8381 }, { "epoch": 0.14844133937228546, "grad_norm": 0.839387834072113, "learning_rate": 2.8909805278803757e-05, "loss": 0.124, "step": 8382 }, { "epoch": 0.14845904890931388, "grad_norm": 0.9206784963607788, "learning_rate": 2.890948324849216e-05, "loss": 0.121, "step": 8383 }, { "epoch": 0.1484767584463423, "grad_norm": 1.0216809511184692, "learning_rate": 2.890916117241977e-05, "loss": 0.0713, "step": 8384 }, { "epoch": 0.14849446798337074, "grad_norm": 0.9044396281242371, "learning_rate": 2.8908839050587647e-05, "loss": 0.0775, "step": 8385 }, { "epoch": 0.14851217752039916, "grad_norm": 1.0573738813400269, "learning_rate": 2.890851688299685e-05, "loss": 0.1299, "step": 8386 }, { "epoch": 0.1485298870574276, "grad_norm": 1.2569894790649414, "learning_rate": 2.8908194669648438e-05, "loss": 0.1359, "step": 8387 }, { "epoch": 0.14854759659445602, "grad_norm": 1.1158515214920044, "learning_rate": 2.8907872410543474e-05, "loss": 0.1287, "step": 8388 }, { "epoch": 0.14856530613148447, "grad_norm": 1.2336297035217285, "learning_rate": 2.8907550105683014e-05, "loss": 0.1254, "step": 8389 }, { "epoch": 0.1485830156685129, "grad_norm": 1.23321533203125, "learning_rate": 2.8907227755068128e-05, "loss": 0.133, "step": 8390 }, { "epoch": 0.14860072520554132, "grad_norm": 2.5660533905029297, "learning_rate": 2.8906905358699862e-05, "loss": 0.1092, "step": 8391 }, { "epoch": 0.14861843474256975, "grad_norm": 1.2013347148895264, "learning_rate": 2.8906582916579285e-05, "loss": 0.1485, "step": 8392 }, { "epoch": 0.14863614427959818, "grad_norm": 0.7658651471138, "learning_rate": 2.8906260428707456e-05, "loss": 0.0513, "step": 8393 }, { "epoch": 0.1486538538166266, "grad_norm": 1.0780737400054932, "learning_rate": 2.890593789508544e-05, "loss": 0.1147, "step": 8394 }, { "epoch": 0.14867156335365503, "grad_norm": 1.3951219320297241, "learning_rate": 2.8905615315714297e-05, "loss": 0.1193, "step": 8395 }, { "epoch": 0.14868927289068345, "grad_norm": 1.0132256746292114, "learning_rate": 2.890529269059508e-05, "loss": 0.144, "step": 8396 }, { "epoch": 0.14870698242771188, "grad_norm": 1.2107820510864258, "learning_rate": 2.8904970019728857e-05, "loss": 0.1396, "step": 8397 }, { "epoch": 0.1487246919647403, "grad_norm": 0.8677024841308594, "learning_rate": 2.890464730311669e-05, "loss": 0.1078, "step": 8398 }, { "epoch": 0.14874240150176873, "grad_norm": 0.6832256317138672, "learning_rate": 2.8904324540759643e-05, "loss": 0.0952, "step": 8399 }, { "epoch": 0.14876011103879716, "grad_norm": 1.0203030109405518, "learning_rate": 2.8904001732658772e-05, "loss": 0.1193, "step": 8400 }, { "epoch": 0.1487778205758256, "grad_norm": 1.0279512405395508, "learning_rate": 2.890367887881514e-05, "loss": 0.1407, "step": 8401 }, { "epoch": 0.148795530112854, "grad_norm": 1.0343096256256104, "learning_rate": 2.8903355979229815e-05, "loss": 0.1102, "step": 8402 }, { "epoch": 0.14881323964988244, "grad_norm": 0.8586990833282471, "learning_rate": 2.890303303390385e-05, "loss": 0.0959, "step": 8403 }, { "epoch": 0.1488309491869109, "grad_norm": 1.4311891794204712, "learning_rate": 2.8902710042838313e-05, "loss": 0.1556, "step": 8404 }, { "epoch": 0.14884865872393932, "grad_norm": 0.877223789691925, "learning_rate": 2.8902387006034274e-05, "loss": 0.1234, "step": 8405 }, { "epoch": 0.14886636826096775, "grad_norm": 0.8676758408546448, "learning_rate": 2.890206392349278e-05, "loss": 0.1143, "step": 8406 }, { "epoch": 0.14888407779799617, "grad_norm": 1.4801284074783325, "learning_rate": 2.8901740795214905e-05, "loss": 0.1634, "step": 8407 }, { "epoch": 0.1489017873350246, "grad_norm": 1.1707432270050049, "learning_rate": 2.890141762120171e-05, "loss": 0.0978, "step": 8408 }, { "epoch": 0.14891949687205303, "grad_norm": 1.040602445602417, "learning_rate": 2.890109440145425e-05, "loss": 0.102, "step": 8409 }, { "epoch": 0.14893720640908145, "grad_norm": 1.1286710500717163, "learning_rate": 2.8900771135973603e-05, "loss": 0.118, "step": 8410 }, { "epoch": 0.14895491594610988, "grad_norm": 1.1731113195419312, "learning_rate": 2.8900447824760823e-05, "loss": 0.1097, "step": 8411 }, { "epoch": 0.1489726254831383, "grad_norm": 1.5713787078857422, "learning_rate": 2.8900124467816975e-05, "loss": 0.0983, "step": 8412 }, { "epoch": 0.14899033502016673, "grad_norm": 1.7299832105636597, "learning_rate": 2.8899801065143122e-05, "loss": 0.093, "step": 8413 }, { "epoch": 0.14900804455719516, "grad_norm": 0.6995404958724976, "learning_rate": 2.8899477616740332e-05, "loss": 0.0974, "step": 8414 }, { "epoch": 0.14902575409422358, "grad_norm": 1.1159030199050903, "learning_rate": 2.8899154122609663e-05, "loss": 0.1007, "step": 8415 }, { "epoch": 0.149043463631252, "grad_norm": 1.2319774627685547, "learning_rate": 2.8898830582752183e-05, "loss": 0.1045, "step": 8416 }, { "epoch": 0.14906117316828044, "grad_norm": 0.7561239004135132, "learning_rate": 2.8898506997168962e-05, "loss": 0.0818, "step": 8417 }, { "epoch": 0.14907888270530886, "grad_norm": 0.9801322817802429, "learning_rate": 2.8898183365861052e-05, "loss": 0.0998, "step": 8418 }, { "epoch": 0.14909659224233732, "grad_norm": 1.1801217794418335, "learning_rate": 2.8897859688829525e-05, "loss": 0.0727, "step": 8419 }, { "epoch": 0.14911430177936574, "grad_norm": 1.0008399486541748, "learning_rate": 2.889753596607545e-05, "loss": 0.1727, "step": 8420 }, { "epoch": 0.14913201131639417, "grad_norm": 1.2830852270126343, "learning_rate": 2.8897212197599886e-05, "loss": 0.1191, "step": 8421 }, { "epoch": 0.1491497208534226, "grad_norm": 1.360553503036499, "learning_rate": 2.8896888383403895e-05, "loss": 0.1038, "step": 8422 }, { "epoch": 0.14916743039045102, "grad_norm": 0.758114218711853, "learning_rate": 2.8896564523488556e-05, "loss": 0.1342, "step": 8423 }, { "epoch": 0.14918513992747945, "grad_norm": 0.7594456076622009, "learning_rate": 2.8896240617854918e-05, "loss": 0.1052, "step": 8424 }, { "epoch": 0.14920284946450788, "grad_norm": 1.1451326608657837, "learning_rate": 2.8895916666504056e-05, "loss": 0.1308, "step": 8425 }, { "epoch": 0.1492205590015363, "grad_norm": 1.113378643989563, "learning_rate": 2.889559266943703e-05, "loss": 0.1015, "step": 8426 }, { "epoch": 0.14923826853856473, "grad_norm": 0.6781612634658813, "learning_rate": 2.8895268626654917e-05, "loss": 0.1074, "step": 8427 }, { "epoch": 0.14925597807559315, "grad_norm": 0.6910512447357178, "learning_rate": 2.889494453815877e-05, "loss": 0.0854, "step": 8428 }, { "epoch": 0.14927368761262158, "grad_norm": 1.5841455459594727, "learning_rate": 2.8894620403949664e-05, "loss": 0.1296, "step": 8429 }, { "epoch": 0.14929139714965, "grad_norm": 1.0564621686935425, "learning_rate": 2.8894296224028655e-05, "loss": 0.0897, "step": 8430 }, { "epoch": 0.14930910668667843, "grad_norm": 1.2814737558364868, "learning_rate": 2.8893971998396826e-05, "loss": 0.1349, "step": 8431 }, { "epoch": 0.14932681622370686, "grad_norm": 1.0102301836013794, "learning_rate": 2.889364772705523e-05, "loss": 0.1107, "step": 8432 }, { "epoch": 0.1493445257607353, "grad_norm": 1.0984925031661987, "learning_rate": 2.889332341000494e-05, "loss": 0.1308, "step": 8433 }, { "epoch": 0.14936223529776374, "grad_norm": 1.0074912309646606, "learning_rate": 2.889299904724702e-05, "loss": 0.1021, "step": 8434 }, { "epoch": 0.14937994483479217, "grad_norm": 1.5232259035110474, "learning_rate": 2.8892674638782536e-05, "loss": 0.1224, "step": 8435 }, { "epoch": 0.1493976543718206, "grad_norm": 1.399901032447815, "learning_rate": 2.889235018461256e-05, "loss": 0.1061, "step": 8436 }, { "epoch": 0.14941536390884902, "grad_norm": 2.247126340866089, "learning_rate": 2.889202568473816e-05, "loss": 0.1187, "step": 8437 }, { "epoch": 0.14943307344587745, "grad_norm": 1.1823614835739136, "learning_rate": 2.8891701139160394e-05, "loss": 0.1433, "step": 8438 }, { "epoch": 0.14945078298290587, "grad_norm": 1.3209166526794434, "learning_rate": 2.8891376547880338e-05, "loss": 0.1275, "step": 8439 }, { "epoch": 0.1494684925199343, "grad_norm": 0.8908463716506958, "learning_rate": 2.8891051910899063e-05, "loss": 0.127, "step": 8440 }, { "epoch": 0.14948620205696272, "grad_norm": 1.0504826307296753, "learning_rate": 2.8890727228217627e-05, "loss": 0.1546, "step": 8441 }, { "epoch": 0.14950391159399115, "grad_norm": 1.2708479166030884, "learning_rate": 2.8890402499837106e-05, "loss": 0.1113, "step": 8442 }, { "epoch": 0.14952162113101958, "grad_norm": 1.1135810613632202, "learning_rate": 2.8890077725758565e-05, "loss": 0.1322, "step": 8443 }, { "epoch": 0.149539330668048, "grad_norm": 0.6763830780982971, "learning_rate": 2.8889752905983077e-05, "loss": 0.0616, "step": 8444 }, { "epoch": 0.14955704020507643, "grad_norm": 0.9180071949958801, "learning_rate": 2.8889428040511697e-05, "loss": 0.1279, "step": 8445 }, { "epoch": 0.14957474974210486, "grad_norm": 1.1544638872146606, "learning_rate": 2.8889103129345515e-05, "loss": 0.1969, "step": 8446 }, { "epoch": 0.14959245927913328, "grad_norm": 0.8418632745742798, "learning_rate": 2.8888778172485578e-05, "loss": 0.0979, "step": 8447 }, { "epoch": 0.14961016881616174, "grad_norm": 0.771562933921814, "learning_rate": 2.8888453169932974e-05, "loss": 0.1007, "step": 8448 }, { "epoch": 0.14962787835319016, "grad_norm": 1.2569187879562378, "learning_rate": 2.888812812168876e-05, "loss": 0.125, "step": 8449 }, { "epoch": 0.1496455878902186, "grad_norm": 2.67130446434021, "learning_rate": 2.888780302775401e-05, "loss": 0.1167, "step": 8450 }, { "epoch": 0.14966329742724702, "grad_norm": 1.139557957649231, "learning_rate": 2.888747788812979e-05, "loss": 0.1323, "step": 8451 }, { "epoch": 0.14968100696427544, "grad_norm": 1.1552053689956665, "learning_rate": 2.888715270281718e-05, "loss": 0.121, "step": 8452 }, { "epoch": 0.14969871650130387, "grad_norm": 2.9399070739746094, "learning_rate": 2.8886827471817236e-05, "loss": 0.1622, "step": 8453 }, { "epoch": 0.1497164260383323, "grad_norm": 0.8790324926376343, "learning_rate": 2.888650219513104e-05, "loss": 0.0844, "step": 8454 }, { "epoch": 0.14973413557536072, "grad_norm": 1.2323966026306152, "learning_rate": 2.8886176872759648e-05, "loss": 0.1604, "step": 8455 }, { "epoch": 0.14975184511238915, "grad_norm": 1.035044550895691, "learning_rate": 2.8885851504704145e-05, "loss": 0.1324, "step": 8456 }, { "epoch": 0.14976955464941757, "grad_norm": 1.14603590965271, "learning_rate": 2.888552609096559e-05, "loss": 0.1133, "step": 8457 }, { "epoch": 0.149787264186446, "grad_norm": 1.2344928979873657, "learning_rate": 2.888520063154506e-05, "loss": 0.114, "step": 8458 }, { "epoch": 0.14980497372347443, "grad_norm": 0.9872365593910217, "learning_rate": 2.8884875126443625e-05, "loss": 0.1201, "step": 8459 }, { "epoch": 0.14982268326050285, "grad_norm": 1.0176174640655518, "learning_rate": 2.8884549575662356e-05, "loss": 0.1233, "step": 8460 }, { "epoch": 0.14984039279753128, "grad_norm": 1.0952434539794922, "learning_rate": 2.888422397920232e-05, "loss": 0.1271, "step": 8461 }, { "epoch": 0.1498581023345597, "grad_norm": 0.8686836361885071, "learning_rate": 2.8883898337064592e-05, "loss": 0.1103, "step": 8462 }, { "epoch": 0.14987581187158816, "grad_norm": 0.94154953956604, "learning_rate": 2.888357264925025e-05, "loss": 0.1159, "step": 8463 }, { "epoch": 0.1498935214086166, "grad_norm": 1.0549254417419434, "learning_rate": 2.888324691576035e-05, "loss": 0.0906, "step": 8464 }, { "epoch": 0.149911230945645, "grad_norm": 0.7733936309814453, "learning_rate": 2.8882921136595977e-05, "loss": 0.1089, "step": 8465 }, { "epoch": 0.14992894048267344, "grad_norm": 1.0097665786743164, "learning_rate": 2.8882595311758194e-05, "loss": 0.0921, "step": 8466 }, { "epoch": 0.14994665001970187, "grad_norm": 1.148660659790039, "learning_rate": 2.8882269441248076e-05, "loss": 0.0973, "step": 8467 }, { "epoch": 0.1499643595567303, "grad_norm": 1.049277901649475, "learning_rate": 2.88819435250667e-05, "loss": 0.1315, "step": 8468 }, { "epoch": 0.14998206909375872, "grad_norm": 0.6770144701004028, "learning_rate": 2.8881617563215132e-05, "loss": 0.1024, "step": 8469 }, { "epoch": 0.14999977863078715, "grad_norm": 1.0655908584594727, "learning_rate": 2.8881291555694443e-05, "loss": 0.1037, "step": 8470 }, { "epoch": 0.15001748816781557, "grad_norm": 0.8552361726760864, "learning_rate": 2.8880965502505716e-05, "loss": 0.0965, "step": 8471 }, { "epoch": 0.150035197704844, "grad_norm": 1.232188105583191, "learning_rate": 2.888063940365001e-05, "loss": 0.1317, "step": 8472 }, { "epoch": 0.15005290724187242, "grad_norm": 1.5405503511428833, "learning_rate": 2.888031325912841e-05, "loss": 0.1407, "step": 8473 }, { "epoch": 0.15007061677890085, "grad_norm": 1.1838558912277222, "learning_rate": 2.887998706894198e-05, "loss": 0.1262, "step": 8474 }, { "epoch": 0.15008832631592928, "grad_norm": 1.1482027769088745, "learning_rate": 2.8879660833091792e-05, "loss": 0.1179, "step": 8475 }, { "epoch": 0.1501060358529577, "grad_norm": 0.7201055884361267, "learning_rate": 2.887933455157893e-05, "loss": 0.1174, "step": 8476 }, { "epoch": 0.15012374538998613, "grad_norm": 1.1508705615997314, "learning_rate": 2.887900822440446e-05, "loss": 0.1071, "step": 8477 }, { "epoch": 0.15014145492701458, "grad_norm": 1.5558091402053833, "learning_rate": 2.8878681851569452e-05, "loss": 0.0875, "step": 8478 }, { "epoch": 0.150159164464043, "grad_norm": 0.9755881428718567, "learning_rate": 2.887835543307499e-05, "loss": 0.1331, "step": 8479 }, { "epoch": 0.15017687400107144, "grad_norm": 0.8184041380882263, "learning_rate": 2.8878028968922137e-05, "loss": 0.1331, "step": 8480 }, { "epoch": 0.15019458353809986, "grad_norm": 1.15324068069458, "learning_rate": 2.887770245911198e-05, "loss": 0.1185, "step": 8481 }, { "epoch": 0.1502122930751283, "grad_norm": 1.3758103847503662, "learning_rate": 2.887737590364558e-05, "loss": 0.1234, "step": 8482 }, { "epoch": 0.15023000261215672, "grad_norm": 0.8440849781036377, "learning_rate": 2.887704930252402e-05, "loss": 0.0922, "step": 8483 }, { "epoch": 0.15024771214918514, "grad_norm": 0.9696694612503052, "learning_rate": 2.887672265574837e-05, "loss": 0.0987, "step": 8484 }, { "epoch": 0.15026542168621357, "grad_norm": 0.9084112048149109, "learning_rate": 2.8876395963319705e-05, "loss": 0.1115, "step": 8485 }, { "epoch": 0.150283131223242, "grad_norm": 0.7735928893089294, "learning_rate": 2.8876069225239103e-05, "loss": 0.1115, "step": 8486 }, { "epoch": 0.15030084076027042, "grad_norm": 1.0309346914291382, "learning_rate": 2.8875742441507633e-05, "loss": 0.1322, "step": 8487 }, { "epoch": 0.15031855029729885, "grad_norm": 1.099478840827942, "learning_rate": 2.887541561212638e-05, "loss": 0.1264, "step": 8488 }, { "epoch": 0.15033625983432727, "grad_norm": 0.8774245977401733, "learning_rate": 2.8875088737096407e-05, "loss": 0.1064, "step": 8489 }, { "epoch": 0.1503539693713557, "grad_norm": 1.7108654975891113, "learning_rate": 2.8874761816418798e-05, "loss": 0.1086, "step": 8490 }, { "epoch": 0.15037167890838413, "grad_norm": 0.9110877513885498, "learning_rate": 2.8874434850094625e-05, "loss": 0.133, "step": 8491 }, { "epoch": 0.15038938844541255, "grad_norm": 1.2353016138076782, "learning_rate": 2.8874107838124964e-05, "loss": 0.1172, "step": 8492 }, { "epoch": 0.150407097982441, "grad_norm": 0.9424992203712463, "learning_rate": 2.8873780780510893e-05, "loss": 0.1233, "step": 8493 }, { "epoch": 0.15042480751946943, "grad_norm": 1.167583703994751, "learning_rate": 2.8873453677253486e-05, "loss": 0.1188, "step": 8494 }, { "epoch": 0.15044251705649786, "grad_norm": 0.725687563419342, "learning_rate": 2.887312652835382e-05, "loss": 0.088, "step": 8495 }, { "epoch": 0.1504602265935263, "grad_norm": 0.863555371761322, "learning_rate": 2.887279933381297e-05, "loss": 0.1219, "step": 8496 }, { "epoch": 0.1504779361305547, "grad_norm": 0.799205482006073, "learning_rate": 2.8872472093632013e-05, "loss": 0.1173, "step": 8497 }, { "epoch": 0.15049564566758314, "grad_norm": 1.1303815841674805, "learning_rate": 2.8872144807812026e-05, "loss": 0.1133, "step": 8498 }, { "epoch": 0.15051335520461157, "grad_norm": 0.7032261490821838, "learning_rate": 2.8871817476354087e-05, "loss": 0.1094, "step": 8499 }, { "epoch": 0.15053106474164, "grad_norm": 1.259566307067871, "learning_rate": 2.8871490099259274e-05, "loss": 0.1206, "step": 8500 }, { "epoch": 0.15054877427866842, "grad_norm": 1.521990418434143, "learning_rate": 2.887116267652866e-05, "loss": 0.1281, "step": 8501 }, { "epoch": 0.15056648381569684, "grad_norm": 1.5362221002578735, "learning_rate": 2.887083520816332e-05, "loss": 0.1488, "step": 8502 }, { "epoch": 0.15058419335272527, "grad_norm": 1.1575520038604736, "learning_rate": 2.8870507694164337e-05, "loss": 0.1127, "step": 8503 }, { "epoch": 0.1506019028897537, "grad_norm": 1.19972825050354, "learning_rate": 2.8870180134532788e-05, "loss": 0.1191, "step": 8504 }, { "epoch": 0.15061961242678212, "grad_norm": 0.9292722940444946, "learning_rate": 2.8869852529269748e-05, "loss": 0.152, "step": 8505 }, { "epoch": 0.15063732196381055, "grad_norm": 1.414446473121643, "learning_rate": 2.8869524878376295e-05, "loss": 0.1281, "step": 8506 }, { "epoch": 0.15065503150083898, "grad_norm": 1.0569467544555664, "learning_rate": 2.886919718185351e-05, "loss": 0.1573, "step": 8507 }, { "epoch": 0.15067274103786743, "grad_norm": 1.3265691995620728, "learning_rate": 2.8868869439702473e-05, "loss": 0.1486, "step": 8508 }, { "epoch": 0.15069045057489586, "grad_norm": 2.1975066661834717, "learning_rate": 2.8868541651924253e-05, "loss": 0.1725, "step": 8509 }, { "epoch": 0.15070816011192428, "grad_norm": 1.1872526407241821, "learning_rate": 2.8868213818519932e-05, "loss": 0.0909, "step": 8510 }, { "epoch": 0.1507258696489527, "grad_norm": 0.7449542880058289, "learning_rate": 2.8867885939490596e-05, "loss": 0.0953, "step": 8511 }, { "epoch": 0.15074357918598114, "grad_norm": 1.4265722036361694, "learning_rate": 2.886755801483731e-05, "loss": 0.1321, "step": 8512 }, { "epoch": 0.15076128872300956, "grad_norm": 1.0932093858718872, "learning_rate": 2.8867230044561166e-05, "loss": 0.1126, "step": 8513 }, { "epoch": 0.150778998260038, "grad_norm": 1.8246396780014038, "learning_rate": 2.8866902028663234e-05, "loss": 0.1503, "step": 8514 }, { "epoch": 0.15079670779706641, "grad_norm": 0.934782862663269, "learning_rate": 2.8866573967144606e-05, "loss": 0.1034, "step": 8515 }, { "epoch": 0.15081441733409484, "grad_norm": 1.1627452373504639, "learning_rate": 2.8866245860006346e-05, "loss": 0.1096, "step": 8516 }, { "epoch": 0.15083212687112327, "grad_norm": 1.2737019062042236, "learning_rate": 2.8865917707249537e-05, "loss": 0.0966, "step": 8517 }, { "epoch": 0.1508498364081517, "grad_norm": 1.1653136014938354, "learning_rate": 2.8865589508875264e-05, "loss": 0.1192, "step": 8518 }, { "epoch": 0.15086754594518012, "grad_norm": 1.381087064743042, "learning_rate": 2.8865261264884607e-05, "loss": 0.1299, "step": 8519 }, { "epoch": 0.15088525548220855, "grad_norm": 1.4734209775924683, "learning_rate": 2.886493297527864e-05, "loss": 0.1365, "step": 8520 }, { "epoch": 0.15090296501923697, "grad_norm": 1.009887933731079, "learning_rate": 2.886460464005844e-05, "loss": 0.0977, "step": 8521 }, { "epoch": 0.1509206745562654, "grad_norm": 1.4857169389724731, "learning_rate": 2.88642762592251e-05, "loss": 0.1514, "step": 8522 }, { "epoch": 0.15093838409329385, "grad_norm": 1.4029507637023926, "learning_rate": 2.8863947832779695e-05, "loss": 0.147, "step": 8523 }, { "epoch": 0.15095609363032228, "grad_norm": 2.1163504123687744, "learning_rate": 2.88636193607233e-05, "loss": 0.0959, "step": 8524 }, { "epoch": 0.1509738031673507, "grad_norm": 0.9777604937553406, "learning_rate": 2.8863290843057e-05, "loss": 0.0971, "step": 8525 }, { "epoch": 0.15099151270437913, "grad_norm": 0.9119082689285278, "learning_rate": 2.8862962279781878e-05, "loss": 0.1164, "step": 8526 }, { "epoch": 0.15100922224140756, "grad_norm": 1.2948472499847412, "learning_rate": 2.8862633670899006e-05, "loss": 0.13, "step": 8527 }, { "epoch": 0.15102693177843599, "grad_norm": 1.091691017150879, "learning_rate": 2.8862305016409475e-05, "loss": 0.1432, "step": 8528 }, { "epoch": 0.1510446413154644, "grad_norm": 1.4786741733551025, "learning_rate": 2.8861976316314368e-05, "loss": 0.1647, "step": 8529 }, { "epoch": 0.15106235085249284, "grad_norm": 0.8876942992210388, "learning_rate": 2.886164757061475e-05, "loss": 0.1047, "step": 8530 }, { "epoch": 0.15108006038952126, "grad_norm": 0.798292875289917, "learning_rate": 2.886131877931172e-05, "loss": 0.147, "step": 8531 }, { "epoch": 0.1510977699265497, "grad_norm": 0.6377320885658264, "learning_rate": 2.8860989942406354e-05, "loss": 0.0966, "step": 8532 }, { "epoch": 0.15111547946357812, "grad_norm": 0.9393572211265564, "learning_rate": 2.8860661059899734e-05, "loss": 0.0962, "step": 8533 }, { "epoch": 0.15113318900060654, "grad_norm": 1.347420334815979, "learning_rate": 2.886033213179294e-05, "loss": 0.1521, "step": 8534 }, { "epoch": 0.15115089853763497, "grad_norm": 1.3789350986480713, "learning_rate": 2.886000315808705e-05, "loss": 0.1208, "step": 8535 }, { "epoch": 0.1511686080746634, "grad_norm": 0.9229636192321777, "learning_rate": 2.8859674138783156e-05, "loss": 0.1231, "step": 8536 }, { "epoch": 0.15118631761169182, "grad_norm": 0.8938892483711243, "learning_rate": 2.8859345073882333e-05, "loss": 0.1032, "step": 8537 }, { "epoch": 0.15120402714872028, "grad_norm": 0.6995444297790527, "learning_rate": 2.8859015963385672e-05, "loss": 0.0845, "step": 8538 }, { "epoch": 0.1512217366857487, "grad_norm": 0.9208232760429382, "learning_rate": 2.8858686807294248e-05, "loss": 0.0967, "step": 8539 }, { "epoch": 0.15123944622277713, "grad_norm": 1.0977641344070435, "learning_rate": 2.8858357605609144e-05, "loss": 0.1201, "step": 8540 }, { "epoch": 0.15125715575980556, "grad_norm": 0.8876867890357971, "learning_rate": 2.8858028358331448e-05, "loss": 0.1493, "step": 8541 }, { "epoch": 0.15127486529683398, "grad_norm": 0.9087730646133423, "learning_rate": 2.8857699065462237e-05, "loss": 0.107, "step": 8542 }, { "epoch": 0.1512925748338624, "grad_norm": 1.1859322786331177, "learning_rate": 2.88573697270026e-05, "loss": 0.1497, "step": 8543 }, { "epoch": 0.15131028437089084, "grad_norm": 1.0813921689987183, "learning_rate": 2.885704034295362e-05, "loss": 0.1452, "step": 8544 }, { "epoch": 0.15132799390791926, "grad_norm": 1.204817533493042, "learning_rate": 2.8856710913316374e-05, "loss": 0.1127, "step": 8545 }, { "epoch": 0.1513457034449477, "grad_norm": 0.767676591873169, "learning_rate": 2.885638143809195e-05, "loss": 0.094, "step": 8546 }, { "epoch": 0.15136341298197611, "grad_norm": 0.9785438776016235, "learning_rate": 2.8856051917281437e-05, "loss": 0.1343, "step": 8547 }, { "epoch": 0.15138112251900454, "grad_norm": 1.319545030593872, "learning_rate": 2.8855722350885915e-05, "loss": 0.1302, "step": 8548 }, { "epoch": 0.15139883205603297, "grad_norm": 0.9151449203491211, "learning_rate": 2.8855392738906467e-05, "loss": 0.1137, "step": 8549 }, { "epoch": 0.1514165415930614, "grad_norm": 0.9981586933135986, "learning_rate": 2.8855063081344175e-05, "loss": 0.1197, "step": 8550 }, { "epoch": 0.15143425113008982, "grad_norm": 1.2811380624771118, "learning_rate": 2.885473337820013e-05, "loss": 0.1346, "step": 8551 }, { "epoch": 0.15145196066711825, "grad_norm": 1.1348806619644165, "learning_rate": 2.8854403629475412e-05, "loss": 0.1117, "step": 8552 }, { "epoch": 0.1514696702041467, "grad_norm": 1.297568678855896, "learning_rate": 2.885407383517111e-05, "loss": 0.1085, "step": 8553 }, { "epoch": 0.15148737974117513, "grad_norm": 1.1215403079986572, "learning_rate": 2.8853743995288303e-05, "loss": 0.1052, "step": 8554 }, { "epoch": 0.15150508927820355, "grad_norm": 0.8776131272315979, "learning_rate": 2.8853414109828083e-05, "loss": 0.1317, "step": 8555 }, { "epoch": 0.15152279881523198, "grad_norm": 0.9089791178703308, "learning_rate": 2.8853084178791528e-05, "loss": 0.1706, "step": 8556 }, { "epoch": 0.1515405083522604, "grad_norm": 1.1304031610488892, "learning_rate": 2.885275420217973e-05, "loss": 0.1098, "step": 8557 }, { "epoch": 0.15155821788928883, "grad_norm": 1.0216115713119507, "learning_rate": 2.885242417999377e-05, "loss": 0.1395, "step": 8558 }, { "epoch": 0.15157592742631726, "grad_norm": 1.038915753364563, "learning_rate": 2.8852094112234733e-05, "loss": 0.0895, "step": 8559 }, { "epoch": 0.15159363696334568, "grad_norm": 0.9035724997520447, "learning_rate": 2.8851763998903714e-05, "loss": 0.0928, "step": 8560 }, { "epoch": 0.1516113465003741, "grad_norm": 0.9448494911193848, "learning_rate": 2.8851433840001788e-05, "loss": 0.1148, "step": 8561 }, { "epoch": 0.15162905603740254, "grad_norm": 0.9821330308914185, "learning_rate": 2.8851103635530044e-05, "loss": 0.1241, "step": 8562 }, { "epoch": 0.15164676557443096, "grad_norm": 0.9537966251373291, "learning_rate": 2.8850773385489578e-05, "loss": 0.1393, "step": 8563 }, { "epoch": 0.1516644751114594, "grad_norm": 0.8826560974121094, "learning_rate": 2.885044308988146e-05, "loss": 0.0998, "step": 8564 }, { "epoch": 0.15168218464848782, "grad_norm": 1.6379492282867432, "learning_rate": 2.8850112748706787e-05, "loss": 0.144, "step": 8565 }, { "epoch": 0.15169989418551624, "grad_norm": 0.8357781767845154, "learning_rate": 2.8849782361966645e-05, "loss": 0.1085, "step": 8566 }, { "epoch": 0.1517176037225447, "grad_norm": 1.1466223001480103, "learning_rate": 2.884945192966212e-05, "loss": 0.1879, "step": 8567 }, { "epoch": 0.15173531325957312, "grad_norm": 0.9710361361503601, "learning_rate": 2.88491214517943e-05, "loss": 0.107, "step": 8568 }, { "epoch": 0.15175302279660155, "grad_norm": 1.4362034797668457, "learning_rate": 2.884879092836427e-05, "loss": 0.1153, "step": 8569 }, { "epoch": 0.15177073233362998, "grad_norm": 1.045353889465332, "learning_rate": 2.884846035937312e-05, "loss": 0.0907, "step": 8570 }, { "epoch": 0.1517884418706584, "grad_norm": 0.9986693859100342, "learning_rate": 2.8848129744821934e-05, "loss": 0.1305, "step": 8571 }, { "epoch": 0.15180615140768683, "grad_norm": 1.3411227464675903, "learning_rate": 2.88477990847118e-05, "loss": 0.1556, "step": 8572 }, { "epoch": 0.15182386094471526, "grad_norm": 0.8294422626495361, "learning_rate": 2.8847468379043815e-05, "loss": 0.0778, "step": 8573 }, { "epoch": 0.15184157048174368, "grad_norm": 1.1015740633010864, "learning_rate": 2.8847137627819057e-05, "loss": 0.1048, "step": 8574 }, { "epoch": 0.1518592800187721, "grad_norm": 1.069612979888916, "learning_rate": 2.884680683103861e-05, "loss": 0.1526, "step": 8575 }, { "epoch": 0.15187698955580053, "grad_norm": 1.2536344528198242, "learning_rate": 2.8846475988703572e-05, "loss": 0.1155, "step": 8576 }, { "epoch": 0.15189469909282896, "grad_norm": 1.1400240659713745, "learning_rate": 2.884614510081503e-05, "loss": 0.1297, "step": 8577 }, { "epoch": 0.1519124086298574, "grad_norm": 1.150886058807373, "learning_rate": 2.8845814167374075e-05, "loss": 0.1307, "step": 8578 }, { "epoch": 0.1519301181668858, "grad_norm": 1.000622034072876, "learning_rate": 2.8845483188381788e-05, "loss": 0.1162, "step": 8579 }, { "epoch": 0.15194782770391424, "grad_norm": 1.0105830430984497, "learning_rate": 2.884515216383926e-05, "loss": 0.1299, "step": 8580 }, { "epoch": 0.15196553724094267, "grad_norm": 0.8805416822433472, "learning_rate": 2.8844821093747587e-05, "loss": 0.1089, "step": 8581 }, { "epoch": 0.15198324677797112, "grad_norm": 0.9684417843818665, "learning_rate": 2.8844489978107848e-05, "loss": 0.1187, "step": 8582 }, { "epoch": 0.15200095631499955, "grad_norm": 1.7225053310394287, "learning_rate": 2.8844158816921135e-05, "loss": 0.1481, "step": 8583 }, { "epoch": 0.15201866585202797, "grad_norm": 1.4356448650360107, "learning_rate": 2.8843827610188546e-05, "loss": 0.1414, "step": 8584 }, { "epoch": 0.1520363753890564, "grad_norm": 0.9094599485397339, "learning_rate": 2.884349635791116e-05, "loss": 0.0996, "step": 8585 }, { "epoch": 0.15205408492608483, "grad_norm": 0.9736344218254089, "learning_rate": 2.8843165060090075e-05, "loss": 0.0917, "step": 8586 }, { "epoch": 0.15207179446311325, "grad_norm": 1.0375792980194092, "learning_rate": 2.8842833716726376e-05, "loss": 0.1315, "step": 8587 }, { "epoch": 0.15208950400014168, "grad_norm": 1.231746792793274, "learning_rate": 2.8842502327821154e-05, "loss": 0.1487, "step": 8588 }, { "epoch": 0.1521072135371701, "grad_norm": 0.6742162108421326, "learning_rate": 2.88421708933755e-05, "loss": 0.127, "step": 8589 }, { "epoch": 0.15212492307419853, "grad_norm": 1.1907190084457397, "learning_rate": 2.88418394133905e-05, "loss": 0.1409, "step": 8590 }, { "epoch": 0.15214263261122696, "grad_norm": 1.0285588502883911, "learning_rate": 2.884150788786725e-05, "loss": 0.1151, "step": 8591 }, { "epoch": 0.15216034214825538, "grad_norm": 1.1102174520492554, "learning_rate": 2.884117631680684e-05, "loss": 0.1071, "step": 8592 }, { "epoch": 0.1521780516852838, "grad_norm": 1.53147554397583, "learning_rate": 2.884084470021036e-05, "loss": 0.1273, "step": 8593 }, { "epoch": 0.15219576122231224, "grad_norm": 1.1725016832351685, "learning_rate": 2.8840513038078903e-05, "loss": 0.1132, "step": 8594 }, { "epoch": 0.15221347075934066, "grad_norm": 1.082909107208252, "learning_rate": 2.8840181330413553e-05, "loss": 0.1006, "step": 8595 }, { "epoch": 0.1522311802963691, "grad_norm": 1.0213932991027832, "learning_rate": 2.8839849577215408e-05, "loss": 0.1769, "step": 8596 }, { "epoch": 0.15224888983339754, "grad_norm": 0.7925437092781067, "learning_rate": 2.8839517778485563e-05, "loss": 0.0739, "step": 8597 }, { "epoch": 0.15226659937042597, "grad_norm": 1.532185435295105, "learning_rate": 2.8839185934225098e-05, "loss": 0.1411, "step": 8598 }, { "epoch": 0.1522843089074544, "grad_norm": 4.160783767700195, "learning_rate": 2.883885404443511e-05, "loss": 0.112, "step": 8599 }, { "epoch": 0.15230201844448282, "grad_norm": 2.8460123538970947, "learning_rate": 2.8838522109116697e-05, "loss": 0.1526, "step": 8600 }, { "epoch": 0.15231972798151125, "grad_norm": 0.9897413849830627, "learning_rate": 2.8838190128270943e-05, "loss": 0.1206, "step": 8601 }, { "epoch": 0.15233743751853968, "grad_norm": 1.1430655717849731, "learning_rate": 2.8837858101898947e-05, "loss": 0.1431, "step": 8602 }, { "epoch": 0.1523551470555681, "grad_norm": 1.4540632963180542, "learning_rate": 2.8837526030001793e-05, "loss": 0.155, "step": 8603 }, { "epoch": 0.15237285659259653, "grad_norm": 1.5887404680252075, "learning_rate": 2.8837193912580578e-05, "loss": 0.1316, "step": 8604 }, { "epoch": 0.15239056612962495, "grad_norm": 1.09453284740448, "learning_rate": 2.8836861749636395e-05, "loss": 0.1035, "step": 8605 }, { "epoch": 0.15240827566665338, "grad_norm": 0.8393604159355164, "learning_rate": 2.8836529541170336e-05, "loss": 0.1009, "step": 8606 }, { "epoch": 0.1524259852036818, "grad_norm": 0.7382755875587463, "learning_rate": 2.88361972871835e-05, "loss": 0.1057, "step": 8607 }, { "epoch": 0.15244369474071023, "grad_norm": 1.3131213188171387, "learning_rate": 2.883586498767697e-05, "loss": 0.132, "step": 8608 }, { "epoch": 0.15246140427773866, "grad_norm": 0.8813315629959106, "learning_rate": 2.8835532642651842e-05, "loss": 0.0757, "step": 8609 }, { "epoch": 0.1524791138147671, "grad_norm": 1.1399562358856201, "learning_rate": 2.883520025210921e-05, "loss": 0.1246, "step": 8610 }, { "epoch": 0.1524968233517955, "grad_norm": 1.912914752960205, "learning_rate": 2.883486781605017e-05, "loss": 0.1097, "step": 8611 }, { "epoch": 0.15251453288882397, "grad_norm": 1.179848074913025, "learning_rate": 2.8834535334475817e-05, "loss": 0.1157, "step": 8612 }, { "epoch": 0.1525322424258524, "grad_norm": 1.2479255199432373, "learning_rate": 2.883420280738724e-05, "loss": 0.1251, "step": 8613 }, { "epoch": 0.15254995196288082, "grad_norm": 0.9076572060585022, "learning_rate": 2.8833870234785535e-05, "loss": 0.148, "step": 8614 }, { "epoch": 0.15256766149990925, "grad_norm": 1.1123285293579102, "learning_rate": 2.8833537616671796e-05, "loss": 0.135, "step": 8615 }, { "epoch": 0.15258537103693767, "grad_norm": 0.6363422870635986, "learning_rate": 2.8833204953047118e-05, "loss": 0.1117, "step": 8616 }, { "epoch": 0.1526030805739661, "grad_norm": 1.10365891456604, "learning_rate": 2.8832872243912593e-05, "loss": 0.1341, "step": 8617 }, { "epoch": 0.15262079011099453, "grad_norm": 1.3616560697555542, "learning_rate": 2.883253948926932e-05, "loss": 0.1556, "step": 8618 }, { "epoch": 0.15263849964802295, "grad_norm": 0.9729346632957458, "learning_rate": 2.883220668911839e-05, "loss": 0.1279, "step": 8619 }, { "epoch": 0.15265620918505138, "grad_norm": 0.7675600051879883, "learning_rate": 2.88318738434609e-05, "loss": 0.0917, "step": 8620 }, { "epoch": 0.1526739187220798, "grad_norm": 1.7182941436767578, "learning_rate": 2.8831540952297946e-05, "loss": 0.1452, "step": 8621 }, { "epoch": 0.15269162825910823, "grad_norm": 0.7810162305831909, "learning_rate": 2.8831208015630615e-05, "loss": 0.1015, "step": 8622 }, { "epoch": 0.15270933779613666, "grad_norm": 1.5193597078323364, "learning_rate": 2.8830875033460014e-05, "loss": 0.1769, "step": 8623 }, { "epoch": 0.15272704733316508, "grad_norm": 1.1993175745010376, "learning_rate": 2.883054200578723e-05, "loss": 0.0562, "step": 8624 }, { "epoch": 0.1527447568701935, "grad_norm": 1.118532419204712, "learning_rate": 2.8830208932613364e-05, "loss": 0.1123, "step": 8625 }, { "epoch": 0.15276246640722194, "grad_norm": 0.8508358001708984, "learning_rate": 2.8829875813939507e-05, "loss": 0.1271, "step": 8626 }, { "epoch": 0.1527801759442504, "grad_norm": 1.36273992061615, "learning_rate": 2.8829542649766757e-05, "loss": 0.1328, "step": 8627 }, { "epoch": 0.15279788548127882, "grad_norm": 0.8470105528831482, "learning_rate": 2.8829209440096215e-05, "loss": 0.0724, "step": 8628 }, { "epoch": 0.15281559501830724, "grad_norm": 1.0583726167678833, "learning_rate": 2.8828876184928972e-05, "loss": 0.136, "step": 8629 }, { "epoch": 0.15283330455533567, "grad_norm": 0.8326586484909058, "learning_rate": 2.8828542884266122e-05, "loss": 0.0925, "step": 8630 }, { "epoch": 0.1528510140923641, "grad_norm": 0.6975737810134888, "learning_rate": 2.8828209538108763e-05, "loss": 0.1017, "step": 8631 }, { "epoch": 0.15286872362939252, "grad_norm": 0.8898029923439026, "learning_rate": 2.8827876146457997e-05, "loss": 0.0945, "step": 8632 }, { "epoch": 0.15288643316642095, "grad_norm": 1.4647202491760254, "learning_rate": 2.8827542709314917e-05, "loss": 0.1465, "step": 8633 }, { "epoch": 0.15290414270344937, "grad_norm": 1.3299745321273804, "learning_rate": 2.8827209226680618e-05, "loss": 0.1699, "step": 8634 }, { "epoch": 0.1529218522404778, "grad_norm": 0.6269090175628662, "learning_rate": 2.8826875698556198e-05, "loss": 0.0624, "step": 8635 }, { "epoch": 0.15293956177750623, "grad_norm": 1.1214005947113037, "learning_rate": 2.882654212494276e-05, "loss": 0.1235, "step": 8636 }, { "epoch": 0.15295727131453465, "grad_norm": 1.5161104202270508, "learning_rate": 2.8826208505841395e-05, "loss": 0.1291, "step": 8637 }, { "epoch": 0.15297498085156308, "grad_norm": 1.9140254259109497, "learning_rate": 2.88258748412532e-05, "loss": 0.0944, "step": 8638 }, { "epoch": 0.1529926903885915, "grad_norm": 1.3609373569488525, "learning_rate": 2.8825541131179277e-05, "loss": 0.1055, "step": 8639 }, { "epoch": 0.15301039992561993, "grad_norm": 0.7970651388168335, "learning_rate": 2.8825207375620723e-05, "loss": 0.098, "step": 8640 }, { "epoch": 0.15302810946264836, "grad_norm": 0.9747058749198914, "learning_rate": 2.8824873574578632e-05, "loss": 0.0719, "step": 8641 }, { "epoch": 0.1530458189996768, "grad_norm": 1.109908938407898, "learning_rate": 2.8824539728054107e-05, "loss": 0.1449, "step": 8642 }, { "epoch": 0.15306352853670524, "grad_norm": 1.5883597135543823, "learning_rate": 2.8824205836048246e-05, "loss": 0.097, "step": 8643 }, { "epoch": 0.15308123807373367, "grad_norm": 0.9226470589637756, "learning_rate": 2.8823871898562147e-05, "loss": 0.1237, "step": 8644 }, { "epoch": 0.1530989476107621, "grad_norm": 0.9032711982727051, "learning_rate": 2.8823537915596907e-05, "loss": 0.1283, "step": 8645 }, { "epoch": 0.15311665714779052, "grad_norm": 1.6167490482330322, "learning_rate": 2.8823203887153625e-05, "loss": 0.1427, "step": 8646 }, { "epoch": 0.15313436668481895, "grad_norm": 1.5182946920394897, "learning_rate": 2.8822869813233395e-05, "loss": 0.1001, "step": 8647 }, { "epoch": 0.15315207622184737, "grad_norm": 0.9057402610778809, "learning_rate": 2.8822535693837328e-05, "loss": 0.1022, "step": 8648 }, { "epoch": 0.1531697857588758, "grad_norm": 1.4846662282943726, "learning_rate": 2.882220152896651e-05, "loss": 0.1161, "step": 8649 }, { "epoch": 0.15318749529590422, "grad_norm": 1.2771143913269043, "learning_rate": 2.8821867318622054e-05, "loss": 0.1103, "step": 8650 }, { "epoch": 0.15320520483293265, "grad_norm": 1.1821714639663696, "learning_rate": 2.8821533062805052e-05, "loss": 0.1072, "step": 8651 }, { "epoch": 0.15322291436996108, "grad_norm": 0.997597873210907, "learning_rate": 2.88211987615166e-05, "loss": 0.1067, "step": 8652 }, { "epoch": 0.1532406239069895, "grad_norm": 1.0467528104782104, "learning_rate": 2.8820864414757803e-05, "loss": 0.1352, "step": 8653 }, { "epoch": 0.15325833344401793, "grad_norm": 1.4602782726287842, "learning_rate": 2.8820530022529764e-05, "loss": 0.1035, "step": 8654 }, { "epoch": 0.15327604298104636, "grad_norm": 1.6081632375717163, "learning_rate": 2.8820195584833575e-05, "loss": 0.1534, "step": 8655 }, { "epoch": 0.15329375251807478, "grad_norm": 0.8544208407402039, "learning_rate": 2.8819861101670342e-05, "loss": 0.1133, "step": 8656 }, { "epoch": 0.15331146205510324, "grad_norm": 0.7022674083709717, "learning_rate": 2.8819526573041162e-05, "loss": 0.0857, "step": 8657 }, { "epoch": 0.15332917159213166, "grad_norm": 1.018089771270752, "learning_rate": 2.881919199894714e-05, "loss": 0.1317, "step": 8658 }, { "epoch": 0.1533468811291601, "grad_norm": 1.296174168586731, "learning_rate": 2.8818857379389373e-05, "loss": 0.1237, "step": 8659 }, { "epoch": 0.15336459066618852, "grad_norm": 0.9465892314910889, "learning_rate": 2.8818522714368962e-05, "loss": 0.1234, "step": 8660 }, { "epoch": 0.15338230020321694, "grad_norm": 0.9992998242378235, "learning_rate": 2.881818800388701e-05, "loss": 0.1389, "step": 8661 }, { "epoch": 0.15340000974024537, "grad_norm": 0.9402461647987366, "learning_rate": 2.8817853247944618e-05, "loss": 0.1231, "step": 8662 }, { "epoch": 0.1534177192772738, "grad_norm": 0.940422534942627, "learning_rate": 2.8817518446542884e-05, "loss": 0.122, "step": 8663 }, { "epoch": 0.15343542881430222, "grad_norm": 1.0909162759780884, "learning_rate": 2.8817183599682914e-05, "loss": 0.1195, "step": 8664 }, { "epoch": 0.15345313835133065, "grad_norm": 1.0815198421478271, "learning_rate": 2.8816848707365803e-05, "loss": 0.128, "step": 8665 }, { "epoch": 0.15347084788835907, "grad_norm": 1.0917479991912842, "learning_rate": 2.8816513769592665e-05, "loss": 0.095, "step": 8666 }, { "epoch": 0.1534885574253875, "grad_norm": 1.3555610179901123, "learning_rate": 2.881617878636459e-05, "loss": 0.1371, "step": 8667 }, { "epoch": 0.15350626696241593, "grad_norm": 1.0879428386688232, "learning_rate": 2.881584375768268e-05, "loss": 0.1232, "step": 8668 }, { "epoch": 0.15352397649944435, "grad_norm": 1.282752275466919, "learning_rate": 2.8815508683548047e-05, "loss": 0.1503, "step": 8669 }, { "epoch": 0.15354168603647278, "grad_norm": 0.8264439105987549, "learning_rate": 2.8815173563961788e-05, "loss": 0.0983, "step": 8670 }, { "epoch": 0.1535593955735012, "grad_norm": 1.3661974668502808, "learning_rate": 2.8814838398925004e-05, "loss": 0.1385, "step": 8671 }, { "epoch": 0.15357710511052966, "grad_norm": 1.5734792947769165, "learning_rate": 2.8814503188438798e-05, "loss": 0.0959, "step": 8672 }, { "epoch": 0.1535948146475581, "grad_norm": 1.2858667373657227, "learning_rate": 2.8814167932504273e-05, "loss": 0.0813, "step": 8673 }, { "epoch": 0.1536125241845865, "grad_norm": 1.1198827028274536, "learning_rate": 2.8813832631122534e-05, "loss": 0.1481, "step": 8674 }, { "epoch": 0.15363023372161494, "grad_norm": 0.6578332185745239, "learning_rate": 2.8813497284294686e-05, "loss": 0.0813, "step": 8675 }, { "epoch": 0.15364794325864337, "grad_norm": 1.1133748292922974, "learning_rate": 2.8813161892021822e-05, "loss": 0.0894, "step": 8676 }, { "epoch": 0.1536656527956718, "grad_norm": 1.741227626800537, "learning_rate": 2.8812826454305058e-05, "loss": 0.124, "step": 8677 }, { "epoch": 0.15368336233270022, "grad_norm": 1.2640159130096436, "learning_rate": 2.8812490971145492e-05, "loss": 0.1369, "step": 8678 }, { "epoch": 0.15370107186972864, "grad_norm": 0.9901644587516785, "learning_rate": 2.8812155442544224e-05, "loss": 0.124, "step": 8679 }, { "epoch": 0.15371878140675707, "grad_norm": 0.9896200299263, "learning_rate": 2.8811819868502362e-05, "loss": 0.0931, "step": 8680 }, { "epoch": 0.1537364909437855, "grad_norm": 0.8918898105621338, "learning_rate": 2.8811484249021015e-05, "loss": 0.1287, "step": 8681 }, { "epoch": 0.15375420048081392, "grad_norm": 0.9862200617790222, "learning_rate": 2.8811148584101273e-05, "loss": 0.1221, "step": 8682 }, { "epoch": 0.15377191001784235, "grad_norm": 1.553796410560608, "learning_rate": 2.8810812873744258e-05, "loss": 0.1052, "step": 8683 }, { "epoch": 0.15378961955487078, "grad_norm": 1.871568202972412, "learning_rate": 2.8810477117951064e-05, "loss": 0.1048, "step": 8684 }, { "epoch": 0.1538073290918992, "grad_norm": 1.2094837427139282, "learning_rate": 2.8810141316722793e-05, "loss": 0.124, "step": 8685 }, { "epoch": 0.15382503862892763, "grad_norm": 1.0681440830230713, "learning_rate": 2.8809805470060555e-05, "loss": 0.1357, "step": 8686 }, { "epoch": 0.15384274816595608, "grad_norm": 0.9605876207351685, "learning_rate": 2.880946957796546e-05, "loss": 0.1603, "step": 8687 }, { "epoch": 0.1538604577029845, "grad_norm": 0.9385454058647156, "learning_rate": 2.8809133640438598e-05, "loss": 0.0968, "step": 8688 }, { "epoch": 0.15387816724001294, "grad_norm": 0.6286852359771729, "learning_rate": 2.8808797657481085e-05, "loss": 0.1266, "step": 8689 }, { "epoch": 0.15389587677704136, "grad_norm": 0.6222802400588989, "learning_rate": 2.8808461629094027e-05, "loss": 0.0975, "step": 8690 }, { "epoch": 0.1539135863140698, "grad_norm": 0.5742712020874023, "learning_rate": 2.8808125555278525e-05, "loss": 0.0761, "step": 8691 }, { "epoch": 0.15393129585109822, "grad_norm": 0.7485565543174744, "learning_rate": 2.8807789436035687e-05, "loss": 0.1327, "step": 8692 }, { "epoch": 0.15394900538812664, "grad_norm": 1.110369086265564, "learning_rate": 2.8807453271366618e-05, "loss": 0.0913, "step": 8693 }, { "epoch": 0.15396671492515507, "grad_norm": 0.671647310256958, "learning_rate": 2.880711706127243e-05, "loss": 0.0658, "step": 8694 }, { "epoch": 0.1539844244621835, "grad_norm": 1.0713132619857788, "learning_rate": 2.880678080575421e-05, "loss": 0.1446, "step": 8695 }, { "epoch": 0.15400213399921192, "grad_norm": 1.013941764831543, "learning_rate": 2.880644450481309e-05, "loss": 0.1123, "step": 8696 }, { "epoch": 0.15401984353624035, "grad_norm": 1.2732475996017456, "learning_rate": 2.8806108158450158e-05, "loss": 0.1221, "step": 8697 }, { "epoch": 0.15403755307326877, "grad_norm": 1.4690077304840088, "learning_rate": 2.880577176666653e-05, "loss": 0.1023, "step": 8698 }, { "epoch": 0.1540552626102972, "grad_norm": 0.8149709105491638, "learning_rate": 2.880543532946331e-05, "loss": 0.1009, "step": 8699 }, { "epoch": 0.15407297214732563, "grad_norm": 0.873673677444458, "learning_rate": 2.8805098846841597e-05, "loss": 0.1108, "step": 8700 }, { "epoch": 0.15409068168435408, "grad_norm": 1.238241195678711, "learning_rate": 2.8804762318802512e-05, "loss": 0.1058, "step": 8701 }, { "epoch": 0.1541083912213825, "grad_norm": 0.9191115498542786, "learning_rate": 2.8804425745347153e-05, "loss": 0.1469, "step": 8702 }, { "epoch": 0.15412610075841093, "grad_norm": 1.092624306678772, "learning_rate": 2.8804089126476633e-05, "loss": 0.0855, "step": 8703 }, { "epoch": 0.15414381029543936, "grad_norm": 2.206368923187256, "learning_rate": 2.8803752462192048e-05, "loss": 0.1249, "step": 8704 }, { "epoch": 0.15416151983246779, "grad_norm": 0.890052080154419, "learning_rate": 2.8803415752494516e-05, "loss": 0.101, "step": 8705 }, { "epoch": 0.1541792293694962, "grad_norm": 1.3013941049575806, "learning_rate": 2.8803078997385146e-05, "loss": 0.0865, "step": 8706 }, { "epoch": 0.15419693890652464, "grad_norm": 1.114845871925354, "learning_rate": 2.8802742196865042e-05, "loss": 0.148, "step": 8707 }, { "epoch": 0.15421464844355307, "grad_norm": 1.6546205282211304, "learning_rate": 2.880240535093531e-05, "loss": 0.1137, "step": 8708 }, { "epoch": 0.1542323579805815, "grad_norm": 1.0534937381744385, "learning_rate": 2.8802068459597064e-05, "loss": 0.1004, "step": 8709 }, { "epoch": 0.15425006751760992, "grad_norm": 0.9150084257125854, "learning_rate": 2.8801731522851406e-05, "loss": 0.0965, "step": 8710 }, { "epoch": 0.15426777705463834, "grad_norm": 1.1846318244934082, "learning_rate": 2.880139454069945e-05, "loss": 0.0912, "step": 8711 }, { "epoch": 0.15428548659166677, "grad_norm": 1.0048691034317017, "learning_rate": 2.8801057513142294e-05, "loss": 0.128, "step": 8712 }, { "epoch": 0.1543031961286952, "grad_norm": 0.840074896812439, "learning_rate": 2.8800720440181062e-05, "loss": 0.1162, "step": 8713 }, { "epoch": 0.15432090566572362, "grad_norm": 1.1198153495788574, "learning_rate": 2.8800383321816856e-05, "loss": 0.1052, "step": 8714 }, { "epoch": 0.15433861520275205, "grad_norm": 1.0124897956848145, "learning_rate": 2.8800046158050782e-05, "loss": 0.0855, "step": 8715 }, { "epoch": 0.1543563247397805, "grad_norm": 1.2895513772964478, "learning_rate": 2.8799708948883953e-05, "loss": 0.123, "step": 8716 }, { "epoch": 0.15437403427680893, "grad_norm": 0.747286319732666, "learning_rate": 2.8799371694317472e-05, "loss": 0.101, "step": 8717 }, { "epoch": 0.15439174381383736, "grad_norm": 0.8855647444725037, "learning_rate": 2.8799034394352458e-05, "loss": 0.102, "step": 8718 }, { "epoch": 0.15440945335086578, "grad_norm": 0.8688555955886841, "learning_rate": 2.879869704899002e-05, "loss": 0.1011, "step": 8719 }, { "epoch": 0.1544271628878942, "grad_norm": 0.559009850025177, "learning_rate": 2.879835965823126e-05, "loss": 0.0989, "step": 8720 }, { "epoch": 0.15444487242492264, "grad_norm": 1.0527700185775757, "learning_rate": 2.879802222207729e-05, "loss": 0.0627, "step": 8721 }, { "epoch": 0.15446258196195106, "grad_norm": 1.2496508359909058, "learning_rate": 2.879768474052923e-05, "loss": 0.0868, "step": 8722 }, { "epoch": 0.1544802914989795, "grad_norm": 0.8468443751335144, "learning_rate": 2.8797347213588182e-05, "loss": 0.1199, "step": 8723 }, { "epoch": 0.15449800103600791, "grad_norm": 0.7522751688957214, "learning_rate": 2.879700964125525e-05, "loss": 0.1026, "step": 8724 }, { "epoch": 0.15451571057303634, "grad_norm": 1.3123453855514526, "learning_rate": 2.8796672023531556e-05, "loss": 0.1703, "step": 8725 }, { "epoch": 0.15453342011006477, "grad_norm": 1.3006925582885742, "learning_rate": 2.8796334360418205e-05, "loss": 0.1101, "step": 8726 }, { "epoch": 0.1545511296470932, "grad_norm": 1.18882155418396, "learning_rate": 2.8795996651916308e-05, "loss": 0.1122, "step": 8727 }, { "epoch": 0.15456883918412162, "grad_norm": 1.3227827548980713, "learning_rate": 2.8795658898026985e-05, "loss": 0.1611, "step": 8728 }, { "epoch": 0.15458654872115005, "grad_norm": 0.9915305376052856, "learning_rate": 2.8795321098751336e-05, "loss": 0.0849, "step": 8729 }, { "epoch": 0.15460425825817847, "grad_norm": 0.9912057518959045, "learning_rate": 2.8794983254090475e-05, "loss": 0.0978, "step": 8730 }, { "epoch": 0.15462196779520693, "grad_norm": 0.7816666960716248, "learning_rate": 2.879464536404551e-05, "loss": 0.108, "step": 8731 }, { "epoch": 0.15463967733223535, "grad_norm": 1.282820701599121, "learning_rate": 2.8794307428617562e-05, "loss": 0.1658, "step": 8732 }, { "epoch": 0.15465738686926378, "grad_norm": 1.2788350582122803, "learning_rate": 2.8793969447807736e-05, "loss": 0.1401, "step": 8733 }, { "epoch": 0.1546750964062922, "grad_norm": 1.246097207069397, "learning_rate": 2.8793631421617144e-05, "loss": 0.1338, "step": 8734 }, { "epoch": 0.15469280594332063, "grad_norm": 1.0443508625030518, "learning_rate": 2.8793293350046904e-05, "loss": 0.1262, "step": 8735 }, { "epoch": 0.15471051548034906, "grad_norm": 1.2285435199737549, "learning_rate": 2.879295523309812e-05, "loss": 0.1201, "step": 8736 }, { "epoch": 0.15472822501737749, "grad_norm": 1.3755871057510376, "learning_rate": 2.8792617070771913e-05, "loss": 0.1229, "step": 8737 }, { "epoch": 0.1547459345544059, "grad_norm": 0.7894703149795532, "learning_rate": 2.879227886306939e-05, "loss": 0.1188, "step": 8738 }, { "epoch": 0.15476364409143434, "grad_norm": 1.0470346212387085, "learning_rate": 2.8791940609991662e-05, "loss": 0.156, "step": 8739 }, { "epoch": 0.15478135362846276, "grad_norm": 0.8706836700439453, "learning_rate": 2.8791602311539846e-05, "loss": 0.138, "step": 8740 }, { "epoch": 0.1547990631654912, "grad_norm": 1.131908655166626, "learning_rate": 2.8791263967715053e-05, "loss": 0.0944, "step": 8741 }, { "epoch": 0.15481677270251962, "grad_norm": 0.7951298952102661, "learning_rate": 2.8790925578518398e-05, "loss": 0.1239, "step": 8742 }, { "epoch": 0.15483448223954804, "grad_norm": 0.9402720332145691, "learning_rate": 2.8790587143950986e-05, "loss": 0.1321, "step": 8743 }, { "epoch": 0.15485219177657647, "grad_norm": 0.970001220703125, "learning_rate": 2.8790248664013944e-05, "loss": 0.0975, "step": 8744 }, { "epoch": 0.1548699013136049, "grad_norm": 0.9369348287582397, "learning_rate": 2.8789910138708375e-05, "loss": 0.1084, "step": 8745 }, { "epoch": 0.15488761085063335, "grad_norm": 1.1196097135543823, "learning_rate": 2.87895715680354e-05, "loss": 0.1144, "step": 8746 }, { "epoch": 0.15490532038766178, "grad_norm": 1.7367124557495117, "learning_rate": 2.8789232951996128e-05, "loss": 0.116, "step": 8747 }, { "epoch": 0.1549230299246902, "grad_norm": 1.080122470855713, "learning_rate": 2.878889429059167e-05, "loss": 0.1561, "step": 8748 }, { "epoch": 0.15494073946171863, "grad_norm": 0.8425098061561584, "learning_rate": 2.878855558382315e-05, "loss": 0.1171, "step": 8749 }, { "epoch": 0.15495844899874706, "grad_norm": 1.089377760887146, "learning_rate": 2.8788216831691677e-05, "loss": 0.1038, "step": 8750 }, { "epoch": 0.15497615853577548, "grad_norm": 1.2762035131454468, "learning_rate": 2.8787878034198358e-05, "loss": 0.1646, "step": 8751 }, { "epoch": 0.1549938680728039, "grad_norm": 1.392601728439331, "learning_rate": 2.878753919134432e-05, "loss": 0.1367, "step": 8752 }, { "epoch": 0.15501157760983234, "grad_norm": 1.600110650062561, "learning_rate": 2.8787200303130677e-05, "loss": 0.1009, "step": 8753 }, { "epoch": 0.15502928714686076, "grad_norm": 0.9014406800270081, "learning_rate": 2.878686136955853e-05, "loss": 0.0959, "step": 8754 }, { "epoch": 0.1550469966838892, "grad_norm": 0.939048707485199, "learning_rate": 2.878652239062901e-05, "loss": 0.1261, "step": 8755 }, { "epoch": 0.15506470622091761, "grad_norm": 1.0155941247940063, "learning_rate": 2.8786183366343223e-05, "loss": 0.1003, "step": 8756 }, { "epoch": 0.15508241575794604, "grad_norm": 1.5236672163009644, "learning_rate": 2.878584429670229e-05, "loss": 0.1344, "step": 8757 }, { "epoch": 0.15510012529497447, "grad_norm": 0.9673433303833008, "learning_rate": 2.878550518170732e-05, "loss": 0.141, "step": 8758 }, { "epoch": 0.1551178348320029, "grad_norm": 0.9892401695251465, "learning_rate": 2.8785166021359434e-05, "loss": 0.1324, "step": 8759 }, { "epoch": 0.15513554436903132, "grad_norm": 0.42202088236808777, "learning_rate": 2.8784826815659745e-05, "loss": 0.0844, "step": 8760 }, { "epoch": 0.15515325390605977, "grad_norm": 1.0837664604187012, "learning_rate": 2.8784487564609374e-05, "loss": 0.1069, "step": 8761 }, { "epoch": 0.1551709634430882, "grad_norm": 1.206356406211853, "learning_rate": 2.878414826820943e-05, "loss": 0.1287, "step": 8762 }, { "epoch": 0.15518867298011663, "grad_norm": 1.2065927982330322, "learning_rate": 2.8783808926461035e-05, "loss": 0.1397, "step": 8763 }, { "epoch": 0.15520638251714505, "grad_norm": 1.0577322244644165, "learning_rate": 2.8783469539365297e-05, "loss": 0.114, "step": 8764 }, { "epoch": 0.15522409205417348, "grad_norm": 0.7487345337867737, "learning_rate": 2.878313010692334e-05, "loss": 0.1198, "step": 8765 }, { "epoch": 0.1552418015912019, "grad_norm": 0.5565875172615051, "learning_rate": 2.8782790629136284e-05, "loss": 0.0979, "step": 8766 }, { "epoch": 0.15525951112823033, "grad_norm": 1.5468438863754272, "learning_rate": 2.878245110600524e-05, "loss": 0.1279, "step": 8767 }, { "epoch": 0.15527722066525876, "grad_norm": 1.024415135383606, "learning_rate": 2.8782111537531323e-05, "loss": 0.1273, "step": 8768 }, { "epoch": 0.15529493020228718, "grad_norm": 1.0556203126907349, "learning_rate": 2.8781771923715653e-05, "loss": 0.1083, "step": 8769 }, { "epoch": 0.1553126397393156, "grad_norm": 1.0276716947555542, "learning_rate": 2.878143226455935e-05, "loss": 0.0894, "step": 8770 }, { "epoch": 0.15533034927634404, "grad_norm": 1.157080054283142, "learning_rate": 2.8781092560063527e-05, "loss": 0.131, "step": 8771 }, { "epoch": 0.15534805881337246, "grad_norm": 0.9103273153305054, "learning_rate": 2.8780752810229304e-05, "loss": 0.1194, "step": 8772 }, { "epoch": 0.1553657683504009, "grad_norm": 0.9527855515480042, "learning_rate": 2.8780413015057795e-05, "loss": 0.1205, "step": 8773 }, { "epoch": 0.15538347788742932, "grad_norm": 2.162137031555176, "learning_rate": 2.878007317455012e-05, "loss": 0.0962, "step": 8774 }, { "epoch": 0.15540118742445774, "grad_norm": 1.0249112844467163, "learning_rate": 2.8779733288707404e-05, "loss": 0.1377, "step": 8775 }, { "epoch": 0.1554188969614862, "grad_norm": 1.321466326713562, "learning_rate": 2.8779393357530756e-05, "loss": 0.1242, "step": 8776 }, { "epoch": 0.15543660649851462, "grad_norm": 1.386003851890564, "learning_rate": 2.87790533810213e-05, "loss": 0.1481, "step": 8777 }, { "epoch": 0.15545431603554305, "grad_norm": 1.2911264896392822, "learning_rate": 2.877871335918015e-05, "loss": 0.1611, "step": 8778 }, { "epoch": 0.15547202557257148, "grad_norm": 1.910788655281067, "learning_rate": 2.8778373292008427e-05, "loss": 0.1671, "step": 8779 }, { "epoch": 0.1554897351095999, "grad_norm": 0.8680071234703064, "learning_rate": 2.8778033179507245e-05, "loss": 0.1602, "step": 8780 }, { "epoch": 0.15550744464662833, "grad_norm": 1.3518682718276978, "learning_rate": 2.8777693021677733e-05, "loss": 0.0996, "step": 8781 }, { "epoch": 0.15552515418365676, "grad_norm": 1.0995341539382935, "learning_rate": 2.8777352818521004e-05, "loss": 0.1527, "step": 8782 }, { "epoch": 0.15554286372068518, "grad_norm": 1.0576523542404175, "learning_rate": 2.8777012570038176e-05, "loss": 0.144, "step": 8783 }, { "epoch": 0.1555605732577136, "grad_norm": 1.1662479639053345, "learning_rate": 2.8776672276230372e-05, "loss": 0.1121, "step": 8784 }, { "epoch": 0.15557828279474203, "grad_norm": 0.9022353887557983, "learning_rate": 2.877633193709871e-05, "loss": 0.1373, "step": 8785 }, { "epoch": 0.15559599233177046, "grad_norm": 0.6973216533660889, "learning_rate": 2.8775991552644308e-05, "loss": 0.1013, "step": 8786 }, { "epoch": 0.1556137018687989, "grad_norm": 0.7503734827041626, "learning_rate": 2.8775651122868284e-05, "loss": 0.1161, "step": 8787 }, { "epoch": 0.1556314114058273, "grad_norm": 0.9500711560249329, "learning_rate": 2.8775310647771766e-05, "loss": 0.1139, "step": 8788 }, { "epoch": 0.15564912094285574, "grad_norm": 1.4838978052139282, "learning_rate": 2.8774970127355865e-05, "loss": 0.2241, "step": 8789 }, { "epoch": 0.15566683047988417, "grad_norm": 2.132150888442993, "learning_rate": 2.877462956162171e-05, "loss": 0.1101, "step": 8790 }, { "epoch": 0.15568454001691262, "grad_norm": 1.1025117635726929, "learning_rate": 2.8774288950570413e-05, "loss": 0.1069, "step": 8791 }, { "epoch": 0.15570224955394105, "grad_norm": 1.2064297199249268, "learning_rate": 2.87739482942031e-05, "loss": 0.1428, "step": 8792 }, { "epoch": 0.15571995909096947, "grad_norm": 0.9693011045455933, "learning_rate": 2.8773607592520893e-05, "loss": 0.0943, "step": 8793 }, { "epoch": 0.1557376686279979, "grad_norm": 1.0997469425201416, "learning_rate": 2.8773266845524905e-05, "loss": 0.151, "step": 8794 }, { "epoch": 0.15575537816502633, "grad_norm": 0.7138884663581848, "learning_rate": 2.8772926053216266e-05, "loss": 0.0971, "step": 8795 }, { "epoch": 0.15577308770205475, "grad_norm": 0.7177891731262207, "learning_rate": 2.877258521559609e-05, "loss": 0.108, "step": 8796 }, { "epoch": 0.15579079723908318, "grad_norm": 1.050279974937439, "learning_rate": 2.8772244332665508e-05, "loss": 0.0891, "step": 8797 }, { "epoch": 0.1558085067761116, "grad_norm": 1.585289716720581, "learning_rate": 2.877190340442563e-05, "loss": 0.1292, "step": 8798 }, { "epoch": 0.15582621631314003, "grad_norm": 1.1867471933364868, "learning_rate": 2.8771562430877583e-05, "loss": 0.1011, "step": 8799 }, { "epoch": 0.15584392585016846, "grad_norm": 1.078477144241333, "learning_rate": 2.877122141202249e-05, "loss": 0.1271, "step": 8800 }, { "epoch": 0.15586163538719688, "grad_norm": 1.0619127750396729, "learning_rate": 2.877088034786147e-05, "loss": 0.1128, "step": 8801 }, { "epoch": 0.1558793449242253, "grad_norm": 6.499042510986328, "learning_rate": 2.877053923839565e-05, "loss": 0.1236, "step": 8802 }, { "epoch": 0.15589705446125374, "grad_norm": 1.40885329246521, "learning_rate": 2.8770198083626146e-05, "loss": 0.133, "step": 8803 }, { "epoch": 0.15591476399828216, "grad_norm": 1.1820107698440552, "learning_rate": 2.876985688355408e-05, "loss": 0.1215, "step": 8804 }, { "epoch": 0.1559324735353106, "grad_norm": 0.9902054071426392, "learning_rate": 2.8769515638180583e-05, "loss": 0.1464, "step": 8805 }, { "epoch": 0.15595018307233904, "grad_norm": 0.7264845371246338, "learning_rate": 2.8769174347506767e-05, "loss": 0.1006, "step": 8806 }, { "epoch": 0.15596789260936747, "grad_norm": 0.9660214185714722, "learning_rate": 2.8768833011533765e-05, "loss": 0.1123, "step": 8807 }, { "epoch": 0.1559856021463959, "grad_norm": 1.2391536235809326, "learning_rate": 2.8768491630262694e-05, "loss": 0.0893, "step": 8808 }, { "epoch": 0.15600331168342432, "grad_norm": 1.3397736549377441, "learning_rate": 2.8768150203694676e-05, "loss": 0.0997, "step": 8809 }, { "epoch": 0.15602102122045275, "grad_norm": 1.2588878870010376, "learning_rate": 2.876780873183084e-05, "loss": 0.1361, "step": 8810 }, { "epoch": 0.15603873075748118, "grad_norm": 1.3612834215164185, "learning_rate": 2.87674672146723e-05, "loss": 0.1228, "step": 8811 }, { "epoch": 0.1560564402945096, "grad_norm": 1.8767361640930176, "learning_rate": 2.8767125652220194e-05, "loss": 0.1273, "step": 8812 }, { "epoch": 0.15607414983153803, "grad_norm": 0.997154712677002, "learning_rate": 2.8766784044475627e-05, "loss": 0.1447, "step": 8813 }, { "epoch": 0.15609185936856645, "grad_norm": 1.1026347875595093, "learning_rate": 2.8766442391439737e-05, "loss": 0.1195, "step": 8814 }, { "epoch": 0.15610956890559488, "grad_norm": 1.138021469116211, "learning_rate": 2.8766100693113643e-05, "loss": 0.1021, "step": 8815 }, { "epoch": 0.1561272784426233, "grad_norm": 1.1863785982131958, "learning_rate": 2.8765758949498475e-05, "loss": 0.1232, "step": 8816 }, { "epoch": 0.15614498797965173, "grad_norm": 0.806581974029541, "learning_rate": 2.876541716059535e-05, "loss": 0.1212, "step": 8817 }, { "epoch": 0.15616269751668016, "grad_norm": 1.367854118347168, "learning_rate": 2.8765075326405395e-05, "loss": 0.1165, "step": 8818 }, { "epoch": 0.1561804070537086, "grad_norm": 1.0795015096664429, "learning_rate": 2.876473344692973e-05, "loss": 0.1061, "step": 8819 }, { "epoch": 0.156198116590737, "grad_norm": 1.154531478881836, "learning_rate": 2.876439152216949e-05, "loss": 0.1326, "step": 8820 }, { "epoch": 0.15621582612776547, "grad_norm": 1.4177144765853882, "learning_rate": 2.876404955212579e-05, "loss": 0.1571, "step": 8821 }, { "epoch": 0.1562335356647939, "grad_norm": 0.8842942714691162, "learning_rate": 2.876370753679976e-05, "loss": 0.0924, "step": 8822 }, { "epoch": 0.15625124520182232, "grad_norm": 1.0741466283798218, "learning_rate": 2.876336547619253e-05, "loss": 0.119, "step": 8823 }, { "epoch": 0.15626895473885075, "grad_norm": 0.8839545845985413, "learning_rate": 2.876302337030521e-05, "loss": 0.1051, "step": 8824 }, { "epoch": 0.15628666427587917, "grad_norm": 1.1069079637527466, "learning_rate": 2.8762681219138943e-05, "loss": 0.1275, "step": 8825 }, { "epoch": 0.1563043738129076, "grad_norm": 0.9833061695098877, "learning_rate": 2.8762339022694842e-05, "loss": 0.1199, "step": 8826 }, { "epoch": 0.15632208334993603, "grad_norm": 0.9280309081077576, "learning_rate": 2.876199678097404e-05, "loss": 0.0696, "step": 8827 }, { "epoch": 0.15633979288696445, "grad_norm": 1.017526388168335, "learning_rate": 2.876165449397766e-05, "loss": 0.1152, "step": 8828 }, { "epoch": 0.15635750242399288, "grad_norm": 1.2007032632827759, "learning_rate": 2.8761312161706828e-05, "loss": 0.1281, "step": 8829 }, { "epoch": 0.1563752119610213, "grad_norm": 0.7185596227645874, "learning_rate": 2.8760969784162672e-05, "loss": 0.1139, "step": 8830 }, { "epoch": 0.15639292149804973, "grad_norm": 1.0820401906967163, "learning_rate": 2.8760627361346316e-05, "loss": 0.1485, "step": 8831 }, { "epoch": 0.15641063103507816, "grad_norm": 0.9779603481292725, "learning_rate": 2.876028489325889e-05, "loss": 0.0927, "step": 8832 }, { "epoch": 0.15642834057210658, "grad_norm": 1.003645658493042, "learning_rate": 2.875994237990151e-05, "loss": 0.0881, "step": 8833 }, { "epoch": 0.156446050109135, "grad_norm": 1.0644022226333618, "learning_rate": 2.875959982127532e-05, "loss": 0.1054, "step": 8834 }, { "epoch": 0.15646375964616346, "grad_norm": 0.9533789753913879, "learning_rate": 2.8759257217381438e-05, "loss": 0.1654, "step": 8835 }, { "epoch": 0.1564814691831919, "grad_norm": 0.9795439839363098, "learning_rate": 2.875891456822099e-05, "loss": 0.0698, "step": 8836 }, { "epoch": 0.15649917872022032, "grad_norm": 0.9423592686653137, "learning_rate": 2.8758571873795102e-05, "loss": 0.113, "step": 8837 }, { "epoch": 0.15651688825724874, "grad_norm": 1.513484239578247, "learning_rate": 2.8758229134104907e-05, "loss": 0.0915, "step": 8838 }, { "epoch": 0.15653459779427717, "grad_norm": 0.8966332077980042, "learning_rate": 2.875788634915153e-05, "loss": 0.0909, "step": 8839 }, { "epoch": 0.1565523073313056, "grad_norm": 1.129685401916504, "learning_rate": 2.87575435189361e-05, "loss": 0.1302, "step": 8840 }, { "epoch": 0.15657001686833402, "grad_norm": 1.0227363109588623, "learning_rate": 2.875720064345974e-05, "loss": 0.1449, "step": 8841 }, { "epoch": 0.15658772640536245, "grad_norm": 1.2612457275390625, "learning_rate": 2.8756857722723583e-05, "loss": 0.1369, "step": 8842 }, { "epoch": 0.15660543594239087, "grad_norm": 0.8687033653259277, "learning_rate": 2.8756514756728752e-05, "loss": 0.1296, "step": 8843 }, { "epoch": 0.1566231454794193, "grad_norm": 0.9827057123184204, "learning_rate": 2.8756171745476385e-05, "loss": 0.1506, "step": 8844 }, { "epoch": 0.15664085501644773, "grad_norm": 1.5906099081039429, "learning_rate": 2.8755828688967597e-05, "loss": 0.1173, "step": 8845 }, { "epoch": 0.15665856455347615, "grad_norm": 0.8496119379997253, "learning_rate": 2.8755485587203532e-05, "loss": 0.1134, "step": 8846 }, { "epoch": 0.15667627409050458, "grad_norm": 1.388567566871643, "learning_rate": 2.8755142440185306e-05, "loss": 0.1029, "step": 8847 }, { "epoch": 0.156693983627533, "grad_norm": 0.9103668332099915, "learning_rate": 2.8754799247914053e-05, "loss": 0.1231, "step": 8848 }, { "epoch": 0.15671169316456143, "grad_norm": 1.6203562021255493, "learning_rate": 2.8754456010390902e-05, "loss": 0.1409, "step": 8849 }, { "epoch": 0.1567294027015899, "grad_norm": 0.6577801704406738, "learning_rate": 2.875411272761698e-05, "loss": 0.1162, "step": 8850 }, { "epoch": 0.1567471122386183, "grad_norm": 0.9874176383018494, "learning_rate": 2.875376939959342e-05, "loss": 0.1409, "step": 8851 }, { "epoch": 0.15676482177564674, "grad_norm": 1.4840657711029053, "learning_rate": 2.875342602632135e-05, "loss": 0.1354, "step": 8852 }, { "epoch": 0.15678253131267517, "grad_norm": 0.8346155285835266, "learning_rate": 2.8753082607801903e-05, "loss": 0.1339, "step": 8853 }, { "epoch": 0.1568002408497036, "grad_norm": 0.8359714150428772, "learning_rate": 2.87527391440362e-05, "loss": 0.1021, "step": 8854 }, { "epoch": 0.15681795038673202, "grad_norm": 1.0420523881912231, "learning_rate": 2.875239563502538e-05, "loss": 0.1178, "step": 8855 }, { "epoch": 0.15683565992376045, "grad_norm": 1.0717862844467163, "learning_rate": 2.875205208077057e-05, "loss": 0.142, "step": 8856 }, { "epoch": 0.15685336946078887, "grad_norm": 1.578176498413086, "learning_rate": 2.8751708481272892e-05, "loss": 0.1168, "step": 8857 }, { "epoch": 0.1568710789978173, "grad_norm": 1.4294790029525757, "learning_rate": 2.8751364836533488e-05, "loss": 0.1182, "step": 8858 }, { "epoch": 0.15688878853484572, "grad_norm": 1.5025489330291748, "learning_rate": 2.8751021146553485e-05, "loss": 0.1485, "step": 8859 }, { "epoch": 0.15690649807187415, "grad_norm": 0.9073846340179443, "learning_rate": 2.8750677411334015e-05, "loss": 0.1216, "step": 8860 }, { "epoch": 0.15692420760890258, "grad_norm": 0.8972967863082886, "learning_rate": 2.8750333630876206e-05, "loss": 0.1012, "step": 8861 }, { "epoch": 0.156941917145931, "grad_norm": 0.9629007577896118, "learning_rate": 2.874998980518119e-05, "loss": 0.088, "step": 8862 }, { "epoch": 0.15695962668295943, "grad_norm": 1.0506360530853271, "learning_rate": 2.87496459342501e-05, "loss": 0.1066, "step": 8863 }, { "epoch": 0.15697733621998786, "grad_norm": 1.1240785121917725, "learning_rate": 2.874930201808406e-05, "loss": 0.1253, "step": 8864 }, { "epoch": 0.1569950457570163, "grad_norm": 1.1012613773345947, "learning_rate": 2.874895805668421e-05, "loss": 0.1107, "step": 8865 }, { "epoch": 0.15701275529404474, "grad_norm": 1.1469659805297852, "learning_rate": 2.874861405005168e-05, "loss": 0.0979, "step": 8866 }, { "epoch": 0.15703046483107316, "grad_norm": 0.7035771012306213, "learning_rate": 2.87482699981876e-05, "loss": 0.1405, "step": 8867 }, { "epoch": 0.1570481743681016, "grad_norm": 0.9092094898223877, "learning_rate": 2.8747925901093102e-05, "loss": 0.088, "step": 8868 }, { "epoch": 0.15706588390513002, "grad_norm": 0.7721137404441833, "learning_rate": 2.874758175876932e-05, "loss": 0.1299, "step": 8869 }, { "epoch": 0.15708359344215844, "grad_norm": 1.0816298723220825, "learning_rate": 2.8747237571217382e-05, "loss": 0.1601, "step": 8870 }, { "epoch": 0.15710130297918687, "grad_norm": 1.4457907676696777, "learning_rate": 2.8746893338438423e-05, "loss": 0.1311, "step": 8871 }, { "epoch": 0.1571190125162153, "grad_norm": 1.0998765230178833, "learning_rate": 2.874654906043358e-05, "loss": 0.0994, "step": 8872 }, { "epoch": 0.15713672205324372, "grad_norm": 1.0727627277374268, "learning_rate": 2.8746204737203976e-05, "loss": 0.1211, "step": 8873 }, { "epoch": 0.15715443159027215, "grad_norm": 0.9953015446662903, "learning_rate": 2.8745860368750748e-05, "loss": 0.097, "step": 8874 }, { "epoch": 0.15717214112730057, "grad_norm": 0.8349462151527405, "learning_rate": 2.874551595507503e-05, "loss": 0.0843, "step": 8875 }, { "epoch": 0.157189850664329, "grad_norm": 0.9821386933326721, "learning_rate": 2.8745171496177956e-05, "loss": 0.1063, "step": 8876 }, { "epoch": 0.15720756020135743, "grad_norm": 0.8566862940788269, "learning_rate": 2.874482699206066e-05, "loss": 0.0886, "step": 8877 }, { "epoch": 0.15722526973838585, "grad_norm": 1.1152805089950562, "learning_rate": 2.874448244272427e-05, "loss": 0.1267, "step": 8878 }, { "epoch": 0.15724297927541428, "grad_norm": 0.6970918774604797, "learning_rate": 2.8744137848169926e-05, "loss": 0.084, "step": 8879 }, { "epoch": 0.15726068881244273, "grad_norm": 0.7695069909095764, "learning_rate": 2.8743793208398757e-05, "loss": 0.1215, "step": 8880 }, { "epoch": 0.15727839834947116, "grad_norm": 0.9138755202293396, "learning_rate": 2.87434485234119e-05, "loss": 0.0952, "step": 8881 }, { "epoch": 0.1572961078864996, "grad_norm": 1.0819392204284668, "learning_rate": 2.8743103793210486e-05, "loss": 0.1211, "step": 8882 }, { "epoch": 0.157313817423528, "grad_norm": 1.137754201889038, "learning_rate": 2.8742759017795647e-05, "loss": 0.0804, "step": 8883 }, { "epoch": 0.15733152696055644, "grad_norm": 0.8566388487815857, "learning_rate": 2.8742414197168527e-05, "loss": 0.0836, "step": 8884 }, { "epoch": 0.15734923649758487, "grad_norm": 0.8380873799324036, "learning_rate": 2.8742069331330248e-05, "loss": 0.0978, "step": 8885 }, { "epoch": 0.1573669460346133, "grad_norm": 1.1780056953430176, "learning_rate": 2.874172442028196e-05, "loss": 0.1638, "step": 8886 }, { "epoch": 0.15738465557164172, "grad_norm": 1.2872315645217896, "learning_rate": 2.8741379464024782e-05, "loss": 0.1174, "step": 8887 }, { "epoch": 0.15740236510867014, "grad_norm": 1.1391299962997437, "learning_rate": 2.8741034462559855e-05, "loss": 0.1102, "step": 8888 }, { "epoch": 0.15742007464569857, "grad_norm": 1.1557245254516602, "learning_rate": 2.8740689415888316e-05, "loss": 0.0848, "step": 8889 }, { "epoch": 0.157437784182727, "grad_norm": 1.1609190702438354, "learning_rate": 2.87403443240113e-05, "loss": 0.1218, "step": 8890 }, { "epoch": 0.15745549371975542, "grad_norm": 1.3305590152740479, "learning_rate": 2.873999918692994e-05, "loss": 0.1399, "step": 8891 }, { "epoch": 0.15747320325678385, "grad_norm": 1.1496999263763428, "learning_rate": 2.873965400464537e-05, "loss": 0.1487, "step": 8892 }, { "epoch": 0.15749091279381228, "grad_norm": 1.0706297159194946, "learning_rate": 2.873930877715873e-05, "loss": 0.1289, "step": 8893 }, { "epoch": 0.1575086223308407, "grad_norm": 1.71881902217865, "learning_rate": 2.8738963504471156e-05, "loss": 0.1324, "step": 8894 }, { "epoch": 0.15752633186786916, "grad_norm": 1.269006371498108, "learning_rate": 2.8738618186583776e-05, "loss": 0.133, "step": 8895 }, { "epoch": 0.15754404140489758, "grad_norm": 1.1814154386520386, "learning_rate": 2.8738272823497733e-05, "loss": 0.1432, "step": 8896 }, { "epoch": 0.157561750941926, "grad_norm": 0.6811233162879944, "learning_rate": 2.8737927415214164e-05, "loss": 0.0868, "step": 8897 }, { "epoch": 0.15757946047895444, "grad_norm": 1.177626371383667, "learning_rate": 2.8737581961734207e-05, "loss": 0.1339, "step": 8898 }, { "epoch": 0.15759717001598286, "grad_norm": 0.9370623230934143, "learning_rate": 2.873723646305899e-05, "loss": 0.1111, "step": 8899 }, { "epoch": 0.1576148795530113, "grad_norm": 1.2362051010131836, "learning_rate": 2.8736890919189656e-05, "loss": 0.1469, "step": 8900 }, { "epoch": 0.15763258909003972, "grad_norm": 0.7086539268493652, "learning_rate": 2.873654533012734e-05, "loss": 0.0668, "step": 8901 }, { "epoch": 0.15765029862706814, "grad_norm": 1.0438750982284546, "learning_rate": 2.8736199695873177e-05, "loss": 0.1214, "step": 8902 }, { "epoch": 0.15766800816409657, "grad_norm": 0.8072900176048279, "learning_rate": 2.873585401642831e-05, "loss": 0.1029, "step": 8903 }, { "epoch": 0.157685717701125, "grad_norm": 1.4760936498641968, "learning_rate": 2.873550829179387e-05, "loss": 0.1417, "step": 8904 }, { "epoch": 0.15770342723815342, "grad_norm": 1.0717124938964844, "learning_rate": 2.8735162521971e-05, "loss": 0.141, "step": 8905 }, { "epoch": 0.15772113677518185, "grad_norm": 0.9470397233963013, "learning_rate": 2.8734816706960835e-05, "loss": 0.1616, "step": 8906 }, { "epoch": 0.15773884631221027, "grad_norm": 1.1579954624176025, "learning_rate": 2.8734470846764504e-05, "loss": 0.1218, "step": 8907 }, { "epoch": 0.1577565558492387, "grad_norm": 1.1041290760040283, "learning_rate": 2.873412494138316e-05, "loss": 0.1412, "step": 8908 }, { "epoch": 0.15777426538626713, "grad_norm": 1.4118781089782715, "learning_rate": 2.873377899081793e-05, "loss": 0.104, "step": 8909 }, { "epoch": 0.15779197492329558, "grad_norm": 0.7626907825469971, "learning_rate": 2.8733432995069962e-05, "loss": 0.0984, "step": 8910 }, { "epoch": 0.157809684460324, "grad_norm": 1.3513396978378296, "learning_rate": 2.8733086954140388e-05, "loss": 0.1571, "step": 8911 }, { "epoch": 0.15782739399735243, "grad_norm": 1.0604994297027588, "learning_rate": 2.8732740868030345e-05, "loss": 0.1055, "step": 8912 }, { "epoch": 0.15784510353438086, "grad_norm": 0.7685645818710327, "learning_rate": 2.873239473674097e-05, "loss": 0.0968, "step": 8913 }, { "epoch": 0.15786281307140929, "grad_norm": 0.9914777874946594, "learning_rate": 2.873204856027341e-05, "loss": 0.1557, "step": 8914 }, { "epoch": 0.1578805226084377, "grad_norm": 0.78696209192276, "learning_rate": 2.8731702338628797e-05, "loss": 0.0772, "step": 8915 }, { "epoch": 0.15789823214546614, "grad_norm": 0.6297850012779236, "learning_rate": 2.8731356071808272e-05, "loss": 0.0938, "step": 8916 }, { "epoch": 0.15791594168249456, "grad_norm": 1.4628088474273682, "learning_rate": 2.8731009759812977e-05, "loss": 0.1264, "step": 8917 }, { "epoch": 0.157933651219523, "grad_norm": 1.264255166053772, "learning_rate": 2.8730663402644045e-05, "loss": 0.0932, "step": 8918 }, { "epoch": 0.15795136075655142, "grad_norm": 1.0007091760635376, "learning_rate": 2.8730317000302623e-05, "loss": 0.1188, "step": 8919 }, { "epoch": 0.15796907029357984, "grad_norm": 0.7975606918334961, "learning_rate": 2.8729970552789845e-05, "loss": 0.124, "step": 8920 }, { "epoch": 0.15798677983060827, "grad_norm": 0.805631160736084, "learning_rate": 2.8729624060106853e-05, "loss": 0.1109, "step": 8921 }, { "epoch": 0.1580044893676367, "grad_norm": 0.7184317111968994, "learning_rate": 2.8729277522254786e-05, "loss": 0.0635, "step": 8922 }, { "epoch": 0.15802219890466512, "grad_norm": 0.9480992555618286, "learning_rate": 2.872893093923478e-05, "loss": 0.1224, "step": 8923 }, { "epoch": 0.15803990844169355, "grad_norm": 1.242858648300171, "learning_rate": 2.872858431104799e-05, "loss": 0.1327, "step": 8924 }, { "epoch": 0.158057617978722, "grad_norm": 1.2479026317596436, "learning_rate": 2.8728237637695536e-05, "loss": 0.1315, "step": 8925 }, { "epoch": 0.15807532751575043, "grad_norm": 1.0872217416763306, "learning_rate": 2.872789091917857e-05, "loss": 0.1308, "step": 8926 }, { "epoch": 0.15809303705277886, "grad_norm": 0.815059244632721, "learning_rate": 2.8727544155498237e-05, "loss": 0.1171, "step": 8927 }, { "epoch": 0.15811074658980728, "grad_norm": 3.1419057846069336, "learning_rate": 2.872719734665567e-05, "loss": 0.1324, "step": 8928 }, { "epoch": 0.1581284561268357, "grad_norm": 0.7600656151771545, "learning_rate": 2.8726850492652012e-05, "loss": 0.1194, "step": 8929 }, { "epoch": 0.15814616566386414, "grad_norm": 0.8800998330116272, "learning_rate": 2.8726503593488405e-05, "loss": 0.1057, "step": 8930 }, { "epoch": 0.15816387520089256, "grad_norm": 1.124815821647644, "learning_rate": 2.8726156649165986e-05, "loss": 0.1427, "step": 8931 }, { "epoch": 0.158181584737921, "grad_norm": 2.5584824085235596, "learning_rate": 2.8725809659685905e-05, "loss": 0.086, "step": 8932 }, { "epoch": 0.15819929427494941, "grad_norm": 0.6005983352661133, "learning_rate": 2.8725462625049296e-05, "loss": 0.1006, "step": 8933 }, { "epoch": 0.15821700381197784, "grad_norm": 0.7955952882766724, "learning_rate": 2.8725115545257302e-05, "loss": 0.0911, "step": 8934 }, { "epoch": 0.15823471334900627, "grad_norm": 1.6972252130508423, "learning_rate": 2.872476842031107e-05, "loss": 0.1054, "step": 8935 }, { "epoch": 0.1582524228860347, "grad_norm": 0.8034749031066895, "learning_rate": 2.8724421250211733e-05, "loss": 0.1541, "step": 8936 }, { "epoch": 0.15827013242306312, "grad_norm": 1.4772100448608398, "learning_rate": 2.8724074034960442e-05, "loss": 0.1432, "step": 8937 }, { "epoch": 0.15828784196009155, "grad_norm": 0.9060435891151428, "learning_rate": 2.8723726774558334e-05, "loss": 0.1146, "step": 8938 }, { "epoch": 0.15830555149711997, "grad_norm": 1.050032377243042, "learning_rate": 2.8723379469006554e-05, "loss": 0.1013, "step": 8939 }, { "epoch": 0.15832326103414843, "grad_norm": 0.6243327856063843, "learning_rate": 2.8723032118306244e-05, "loss": 0.1099, "step": 8940 }, { "epoch": 0.15834097057117685, "grad_norm": 0.917873740196228, "learning_rate": 2.872268472245854e-05, "loss": 0.1005, "step": 8941 }, { "epoch": 0.15835868010820528, "grad_norm": 1.1172446012496948, "learning_rate": 2.87223372814646e-05, "loss": 0.1261, "step": 8942 }, { "epoch": 0.1583763896452337, "grad_norm": 1.2037922143936157, "learning_rate": 2.8721989795325554e-05, "loss": 0.1154, "step": 8943 }, { "epoch": 0.15839409918226213, "grad_norm": 1.0258901119232178, "learning_rate": 2.8721642264042553e-05, "loss": 0.1567, "step": 8944 }, { "epoch": 0.15841180871929056, "grad_norm": 0.9711400270462036, "learning_rate": 2.872129468761673e-05, "loss": 0.1172, "step": 8945 }, { "epoch": 0.15842951825631899, "grad_norm": 0.8286972045898438, "learning_rate": 2.872094706604924e-05, "loss": 0.1485, "step": 8946 }, { "epoch": 0.1584472277933474, "grad_norm": 1.055290937423706, "learning_rate": 2.872059939934122e-05, "loss": 0.1598, "step": 8947 }, { "epoch": 0.15846493733037584, "grad_norm": 0.9651479721069336, "learning_rate": 2.8720251687493813e-05, "loss": 0.1105, "step": 8948 }, { "epoch": 0.15848264686740426, "grad_norm": 0.5861930251121521, "learning_rate": 2.8719903930508174e-05, "loss": 0.0904, "step": 8949 }, { "epoch": 0.1585003564044327, "grad_norm": 0.9892210960388184, "learning_rate": 2.8719556128385433e-05, "loss": 0.1239, "step": 8950 }, { "epoch": 0.15851806594146112, "grad_norm": 1.1512829065322876, "learning_rate": 2.871920828112674e-05, "loss": 0.112, "step": 8951 }, { "epoch": 0.15853577547848954, "grad_norm": 1.34071946144104, "learning_rate": 2.8718860388733243e-05, "loss": 0.1128, "step": 8952 }, { "epoch": 0.15855348501551797, "grad_norm": 1.156197190284729, "learning_rate": 2.8718512451206078e-05, "loss": 0.1636, "step": 8953 }, { "epoch": 0.15857119455254642, "grad_norm": 0.9991310834884644, "learning_rate": 2.87181644685464e-05, "loss": 0.1481, "step": 8954 }, { "epoch": 0.15858890408957485, "grad_norm": 1.4250845909118652, "learning_rate": 2.8717816440755344e-05, "loss": 0.1418, "step": 8955 }, { "epoch": 0.15860661362660328, "grad_norm": 1.4996395111083984, "learning_rate": 2.871746836783406e-05, "loss": 0.1392, "step": 8956 }, { "epoch": 0.1586243231636317, "grad_norm": 1.0883440971374512, "learning_rate": 2.8717120249783692e-05, "loss": 0.1169, "step": 8957 }, { "epoch": 0.15864203270066013, "grad_norm": 1.1696603298187256, "learning_rate": 2.871677208660539e-05, "loss": 0.1494, "step": 8958 }, { "epoch": 0.15865974223768856, "grad_norm": 0.9799208641052246, "learning_rate": 2.8716423878300292e-05, "loss": 0.0828, "step": 8959 }, { "epoch": 0.15867745177471698, "grad_norm": 0.8199596405029297, "learning_rate": 2.871607562486955e-05, "loss": 0.0892, "step": 8960 }, { "epoch": 0.1586951613117454, "grad_norm": 0.8347567319869995, "learning_rate": 2.87157273263143e-05, "loss": 0.1275, "step": 8961 }, { "epoch": 0.15871287084877383, "grad_norm": 0.9156501889228821, "learning_rate": 2.8715378982635698e-05, "loss": 0.1299, "step": 8962 }, { "epoch": 0.15873058038580226, "grad_norm": 3.264091968536377, "learning_rate": 2.8715030593834888e-05, "loss": 0.1319, "step": 8963 }, { "epoch": 0.1587482899228307, "grad_norm": 1.3936985731124878, "learning_rate": 2.8714682159913012e-05, "loss": 0.1304, "step": 8964 }, { "epoch": 0.15876599945985911, "grad_norm": 0.6721452474594116, "learning_rate": 2.871433368087122e-05, "loss": 0.1382, "step": 8965 }, { "epoch": 0.15878370899688754, "grad_norm": 1.3280022144317627, "learning_rate": 2.871398515671066e-05, "loss": 0.1569, "step": 8966 }, { "epoch": 0.15880141853391597, "grad_norm": 1.1144672632217407, "learning_rate": 2.871363658743247e-05, "loss": 0.1199, "step": 8967 }, { "epoch": 0.1588191280709444, "grad_norm": 0.9707752466201782, "learning_rate": 2.8713287973037805e-05, "loss": 0.0952, "step": 8968 }, { "epoch": 0.15883683760797285, "grad_norm": 0.5861685276031494, "learning_rate": 2.871293931352781e-05, "loss": 0.0965, "step": 8969 }, { "epoch": 0.15885454714500127, "grad_norm": 1.4928185939788818, "learning_rate": 2.8712590608903633e-05, "loss": 0.1313, "step": 8970 }, { "epoch": 0.1588722566820297, "grad_norm": 1.950809359550476, "learning_rate": 2.8712241859166418e-05, "loss": 0.122, "step": 8971 }, { "epoch": 0.15888996621905813, "grad_norm": 0.8107472658157349, "learning_rate": 2.8711893064317315e-05, "loss": 0.1362, "step": 8972 }, { "epoch": 0.15890767575608655, "grad_norm": 0.87882399559021, "learning_rate": 2.8711544224357467e-05, "loss": 0.1117, "step": 8973 }, { "epoch": 0.15892538529311498, "grad_norm": 0.8482578992843628, "learning_rate": 2.8711195339288036e-05, "loss": 0.1112, "step": 8974 }, { "epoch": 0.1589430948301434, "grad_norm": 1.0380115509033203, "learning_rate": 2.871084640911015e-05, "loss": 0.0865, "step": 8975 }, { "epoch": 0.15896080436717183, "grad_norm": 1.1519794464111328, "learning_rate": 2.8710497433824967e-05, "loss": 0.1292, "step": 8976 }, { "epoch": 0.15897851390420026, "grad_norm": 1.0139660835266113, "learning_rate": 2.8710148413433637e-05, "loss": 0.1213, "step": 8977 }, { "epoch": 0.15899622344122868, "grad_norm": 1.2747910022735596, "learning_rate": 2.8709799347937304e-05, "loss": 0.1194, "step": 8978 }, { "epoch": 0.1590139329782571, "grad_norm": 1.134548544883728, "learning_rate": 2.8709450237337117e-05, "loss": 0.1055, "step": 8979 }, { "epoch": 0.15903164251528554, "grad_norm": 1.1419472694396973, "learning_rate": 2.8709101081634225e-05, "loss": 0.1222, "step": 8980 }, { "epoch": 0.15904935205231396, "grad_norm": 0.9734914898872375, "learning_rate": 2.8708751880829777e-05, "loss": 0.0895, "step": 8981 }, { "epoch": 0.1590670615893424, "grad_norm": 1.2364665269851685, "learning_rate": 2.8708402634924923e-05, "loss": 0.1328, "step": 8982 }, { "epoch": 0.15908477112637082, "grad_norm": 0.9534461498260498, "learning_rate": 2.870805334392081e-05, "loss": 0.1111, "step": 8983 }, { "epoch": 0.15910248066339927, "grad_norm": 0.7350040674209595, "learning_rate": 2.8707704007818592e-05, "loss": 0.1219, "step": 8984 }, { "epoch": 0.1591201902004277, "grad_norm": 0.8755776286125183, "learning_rate": 2.8707354626619406e-05, "loss": 0.1296, "step": 8985 }, { "epoch": 0.15913789973745612, "grad_norm": 0.8132104873657227, "learning_rate": 2.870700520032442e-05, "loss": 0.1357, "step": 8986 }, { "epoch": 0.15915560927448455, "grad_norm": 0.5299890637397766, "learning_rate": 2.8706655728934763e-05, "loss": 0.1471, "step": 8987 }, { "epoch": 0.15917331881151298, "grad_norm": 1.1601518392562866, "learning_rate": 2.87063062124516e-05, "loss": 0.089, "step": 8988 }, { "epoch": 0.1591910283485414, "grad_norm": 0.856157660484314, "learning_rate": 2.870595665087608e-05, "loss": 0.124, "step": 8989 }, { "epoch": 0.15920873788556983, "grad_norm": 0.7036252617835999, "learning_rate": 2.8705607044209344e-05, "loss": 0.1278, "step": 8990 }, { "epoch": 0.15922644742259826, "grad_norm": 0.8540633320808411, "learning_rate": 2.8705257392452547e-05, "loss": 0.0668, "step": 8991 }, { "epoch": 0.15924415695962668, "grad_norm": 1.1813009977340698, "learning_rate": 2.870490769560684e-05, "loss": 0.1048, "step": 8992 }, { "epoch": 0.1592618664966551, "grad_norm": 1.013746976852417, "learning_rate": 2.8704557953673374e-05, "loss": 0.0988, "step": 8993 }, { "epoch": 0.15927957603368353, "grad_norm": 1.126509666442871, "learning_rate": 2.8704208166653296e-05, "loss": 0.1555, "step": 8994 }, { "epoch": 0.15929728557071196, "grad_norm": 0.7954094409942627, "learning_rate": 2.870385833454776e-05, "loss": 0.0945, "step": 8995 }, { "epoch": 0.1593149951077404, "grad_norm": 0.9057723879814148, "learning_rate": 2.870350845735792e-05, "loss": 0.1348, "step": 8996 }, { "epoch": 0.1593327046447688, "grad_norm": 0.6963258385658264, "learning_rate": 2.8703158535084918e-05, "loss": 0.1091, "step": 8997 }, { "epoch": 0.15935041418179724, "grad_norm": 0.5757218599319458, "learning_rate": 2.8702808567729912e-05, "loss": 0.0909, "step": 8998 }, { "epoch": 0.1593681237188257, "grad_norm": 1.1519291400909424, "learning_rate": 2.8702458555294055e-05, "loss": 0.1134, "step": 8999 }, { "epoch": 0.15938583325585412, "grad_norm": 1.053788423538208, "learning_rate": 2.8702108497778487e-05, "loss": 0.0714, "step": 9000 }, { "epoch": 0.15940354279288255, "grad_norm": 0.7840074896812439, "learning_rate": 2.8701758395184376e-05, "loss": 0.1226, "step": 9001 }, { "epoch": 0.15942125232991097, "grad_norm": 0.985195517539978, "learning_rate": 2.8701408247512864e-05, "loss": 0.1247, "step": 9002 }, { "epoch": 0.1594389618669394, "grad_norm": 0.935705304145813, "learning_rate": 2.8701058054765102e-05, "loss": 0.1194, "step": 9003 }, { "epoch": 0.15945667140396783, "grad_norm": 0.8450241088867188, "learning_rate": 2.8700707816942248e-05, "loss": 0.0763, "step": 9004 }, { "epoch": 0.15947438094099625, "grad_norm": 1.1392191648483276, "learning_rate": 2.8700357534045447e-05, "loss": 0.105, "step": 9005 }, { "epoch": 0.15949209047802468, "grad_norm": 0.9827702641487122, "learning_rate": 2.870000720607586e-05, "loss": 0.1434, "step": 9006 }, { "epoch": 0.1595098000150531, "grad_norm": 1.0430917739868164, "learning_rate": 2.8699656833034632e-05, "loss": 0.1249, "step": 9007 }, { "epoch": 0.15952750955208153, "grad_norm": 1.2065256834030151, "learning_rate": 2.869930641492292e-05, "loss": 0.0986, "step": 9008 }, { "epoch": 0.15954521908910996, "grad_norm": 1.0891355276107788, "learning_rate": 2.8698955951741877e-05, "loss": 0.1304, "step": 9009 }, { "epoch": 0.15956292862613838, "grad_norm": 1.3842295408248901, "learning_rate": 2.869860544349265e-05, "loss": 0.1805, "step": 9010 }, { "epoch": 0.1595806381631668, "grad_norm": 1.3838518857955933, "learning_rate": 2.86982548901764e-05, "loss": 0.1389, "step": 9011 }, { "epoch": 0.15959834770019524, "grad_norm": 0.7960590720176697, "learning_rate": 2.8697904291794278e-05, "loss": 0.1035, "step": 9012 }, { "epoch": 0.15961605723722366, "grad_norm": 1.1735033988952637, "learning_rate": 2.8697553648347433e-05, "loss": 0.1047, "step": 9013 }, { "epoch": 0.15963376677425212, "grad_norm": 0.7892924547195435, "learning_rate": 2.869720295983702e-05, "loss": 0.1143, "step": 9014 }, { "epoch": 0.15965147631128054, "grad_norm": 1.0056416988372803, "learning_rate": 2.8696852226264203e-05, "loss": 0.1289, "step": 9015 }, { "epoch": 0.15966918584830897, "grad_norm": 1.0128439664840698, "learning_rate": 2.869650144763012e-05, "loss": 0.0929, "step": 9016 }, { "epoch": 0.1596868953853374, "grad_norm": 0.9725878834724426, "learning_rate": 2.8696150623935936e-05, "loss": 0.1165, "step": 9017 }, { "epoch": 0.15970460492236582, "grad_norm": 1.4017930030822754, "learning_rate": 2.86957997551828e-05, "loss": 0.1268, "step": 9018 }, { "epoch": 0.15972231445939425, "grad_norm": 0.8767735362052917, "learning_rate": 2.8695448841371868e-05, "loss": 0.121, "step": 9019 }, { "epoch": 0.15974002399642268, "grad_norm": 1.0550676584243774, "learning_rate": 2.8695097882504296e-05, "loss": 0.1138, "step": 9020 }, { "epoch": 0.1597577335334511, "grad_norm": 0.9291868805885315, "learning_rate": 2.869474687858124e-05, "loss": 0.0899, "step": 9021 }, { "epoch": 0.15977544307047953, "grad_norm": 1.2441869974136353, "learning_rate": 2.8694395829603846e-05, "loss": 0.1497, "step": 9022 }, { "epoch": 0.15979315260750795, "grad_norm": 1.175055980682373, "learning_rate": 2.8694044735573278e-05, "loss": 0.1344, "step": 9023 }, { "epoch": 0.15981086214453638, "grad_norm": 0.931610643863678, "learning_rate": 2.8693693596490688e-05, "loss": 0.1186, "step": 9024 }, { "epoch": 0.1598285716815648, "grad_norm": 1.0504491329193115, "learning_rate": 2.8693342412357228e-05, "loss": 0.1529, "step": 9025 }, { "epoch": 0.15984628121859323, "grad_norm": 1.946268916130066, "learning_rate": 2.869299118317406e-05, "loss": 0.0869, "step": 9026 }, { "epoch": 0.15986399075562166, "grad_norm": 0.9871273040771484, "learning_rate": 2.869263990894233e-05, "loss": 0.1378, "step": 9027 }, { "epoch": 0.1598817002926501, "grad_norm": 1.140021800994873, "learning_rate": 2.869228858966321e-05, "loss": 0.1077, "step": 9028 }, { "epoch": 0.15989940982967854, "grad_norm": 1.4404067993164062, "learning_rate": 2.8691937225337835e-05, "loss": 0.1394, "step": 9029 }, { "epoch": 0.15991711936670697, "grad_norm": 0.9028124809265137, "learning_rate": 2.869158581596738e-05, "loss": 0.1283, "step": 9030 }, { "epoch": 0.1599348289037354, "grad_norm": 0.8880149126052856, "learning_rate": 2.8691234361552988e-05, "loss": 0.0994, "step": 9031 }, { "epoch": 0.15995253844076382, "grad_norm": 0.8986853957176208, "learning_rate": 2.8690882862095823e-05, "loss": 0.1482, "step": 9032 }, { "epoch": 0.15997024797779225, "grad_norm": 1.1477328538894653, "learning_rate": 2.8690531317597034e-05, "loss": 0.1075, "step": 9033 }, { "epoch": 0.15998795751482067, "grad_norm": 0.48565009236335754, "learning_rate": 2.8690179728057782e-05, "loss": 0.1109, "step": 9034 }, { "epoch": 0.1600056670518491, "grad_norm": 1.0207297801971436, "learning_rate": 2.8689828093479228e-05, "loss": 0.1036, "step": 9035 }, { "epoch": 0.16002337658887753, "grad_norm": 1.3830372095108032, "learning_rate": 2.868947641386252e-05, "loss": 0.1237, "step": 9036 }, { "epoch": 0.16004108612590595, "grad_norm": 1.407047986984253, "learning_rate": 2.8689124689208817e-05, "loss": 0.1107, "step": 9037 }, { "epoch": 0.16005879566293438, "grad_norm": 0.9709406495094299, "learning_rate": 2.868877291951928e-05, "loss": 0.1327, "step": 9038 }, { "epoch": 0.1600765051999628, "grad_norm": 0.7983800768852234, "learning_rate": 2.8688421104795067e-05, "loss": 0.1023, "step": 9039 }, { "epoch": 0.16009421473699123, "grad_norm": 0.7538876533508301, "learning_rate": 2.8688069245037334e-05, "loss": 0.0947, "step": 9040 }, { "epoch": 0.16011192427401966, "grad_norm": 0.8828462362289429, "learning_rate": 2.8687717340247236e-05, "loss": 0.0809, "step": 9041 }, { "epoch": 0.16012963381104808, "grad_norm": 0.7579813003540039, "learning_rate": 2.8687365390425932e-05, "loss": 0.0929, "step": 9042 }, { "epoch": 0.1601473433480765, "grad_norm": 1.0262399911880493, "learning_rate": 2.8687013395574582e-05, "loss": 0.1301, "step": 9043 }, { "epoch": 0.16016505288510496, "grad_norm": 0.8933939933776855, "learning_rate": 2.8686661355694336e-05, "loss": 0.1141, "step": 9044 }, { "epoch": 0.1601827624221334, "grad_norm": 0.8672217130661011, "learning_rate": 2.8686309270786365e-05, "loss": 0.101, "step": 9045 }, { "epoch": 0.16020047195916182, "grad_norm": 1.0237237215042114, "learning_rate": 2.868595714085182e-05, "loss": 0.1304, "step": 9046 }, { "epoch": 0.16021818149619024, "grad_norm": 1.32489812374115, "learning_rate": 2.868560496589186e-05, "loss": 0.1522, "step": 9047 }, { "epoch": 0.16023589103321867, "grad_norm": 0.9083908796310425, "learning_rate": 2.8685252745907642e-05, "loss": 0.1069, "step": 9048 }, { "epoch": 0.1602536005702471, "grad_norm": 1.126225233078003, "learning_rate": 2.8684900480900324e-05, "loss": 0.1137, "step": 9049 }, { "epoch": 0.16027131010727552, "grad_norm": 0.917268693447113, "learning_rate": 2.8684548170871073e-05, "loss": 0.0974, "step": 9050 }, { "epoch": 0.16028901964430395, "grad_norm": 0.9728431105613708, "learning_rate": 2.8684195815821045e-05, "loss": 0.1211, "step": 9051 }, { "epoch": 0.16030672918133237, "grad_norm": 2.9006142616271973, "learning_rate": 2.8683843415751393e-05, "loss": 0.1072, "step": 9052 }, { "epoch": 0.1603244387183608, "grad_norm": 1.3605635166168213, "learning_rate": 2.8683490970663278e-05, "loss": 0.1402, "step": 9053 }, { "epoch": 0.16034214825538923, "grad_norm": 1.0903249979019165, "learning_rate": 2.868313848055786e-05, "loss": 0.1303, "step": 9054 }, { "epoch": 0.16035985779241765, "grad_norm": 1.2770951986312866, "learning_rate": 2.8682785945436303e-05, "loss": 0.1573, "step": 9055 }, { "epoch": 0.16037756732944608, "grad_norm": 0.8960994482040405, "learning_rate": 2.868243336529977e-05, "loss": 0.1317, "step": 9056 }, { "epoch": 0.1603952768664745, "grad_norm": 0.8192460536956787, "learning_rate": 2.868208074014941e-05, "loss": 0.1006, "step": 9057 }, { "epoch": 0.16041298640350293, "grad_norm": 0.9888143539428711, "learning_rate": 2.8681728069986385e-05, "loss": 0.104, "step": 9058 }, { "epoch": 0.1604306959405314, "grad_norm": 0.7900488972663879, "learning_rate": 2.8681375354811864e-05, "loss": 0.0758, "step": 9059 }, { "epoch": 0.1604484054775598, "grad_norm": 1.1906639337539673, "learning_rate": 2.8681022594626997e-05, "loss": 0.1259, "step": 9060 }, { "epoch": 0.16046611501458824, "grad_norm": 1.5381730794906616, "learning_rate": 2.8680669789432955e-05, "loss": 0.111, "step": 9061 }, { "epoch": 0.16048382455161667, "grad_norm": 1.2659478187561035, "learning_rate": 2.868031693923089e-05, "loss": 0.1808, "step": 9062 }, { "epoch": 0.1605015340886451, "grad_norm": 1.121732473373413, "learning_rate": 2.8679964044021964e-05, "loss": 0.0869, "step": 9063 }, { "epoch": 0.16051924362567352, "grad_norm": 1.4413323402404785, "learning_rate": 2.8679611103807343e-05, "loss": 0.1455, "step": 9064 }, { "epoch": 0.16053695316270195, "grad_norm": 1.1832808256149292, "learning_rate": 2.8679258118588187e-05, "loss": 0.0832, "step": 9065 }, { "epoch": 0.16055466269973037, "grad_norm": 1.1832696199417114, "learning_rate": 2.867890508836565e-05, "loss": 0.1218, "step": 9066 }, { "epoch": 0.1605723722367588, "grad_norm": 0.9054111242294312, "learning_rate": 2.8678552013140905e-05, "loss": 0.0934, "step": 9067 }, { "epoch": 0.16059008177378722, "grad_norm": 0.9533539414405823, "learning_rate": 2.8678198892915103e-05, "loss": 0.0991, "step": 9068 }, { "epoch": 0.16060779131081565, "grad_norm": 0.8906692266464233, "learning_rate": 2.867784572768941e-05, "loss": 0.0938, "step": 9069 }, { "epoch": 0.16062550084784408, "grad_norm": 1.0923398733139038, "learning_rate": 2.867749251746499e-05, "loss": 0.1523, "step": 9070 }, { "epoch": 0.1606432103848725, "grad_norm": 0.9065157175064087, "learning_rate": 2.8677139262243e-05, "loss": 0.0994, "step": 9071 }, { "epoch": 0.16066091992190093, "grad_norm": 1.410044550895691, "learning_rate": 2.867678596202461e-05, "loss": 0.1305, "step": 9072 }, { "epoch": 0.16067862945892936, "grad_norm": 0.7596257925033569, "learning_rate": 2.8676432616810974e-05, "loss": 0.1067, "step": 9073 }, { "epoch": 0.1606963389959578, "grad_norm": 1.3871170282363892, "learning_rate": 2.867607922660326e-05, "loss": 0.1443, "step": 9074 }, { "epoch": 0.16071404853298624, "grad_norm": 0.9076303839683533, "learning_rate": 2.8675725791402624e-05, "loss": 0.1132, "step": 9075 }, { "epoch": 0.16073175807001466, "grad_norm": 1.0670830011367798, "learning_rate": 2.8675372311210235e-05, "loss": 0.1323, "step": 9076 }, { "epoch": 0.1607494676070431, "grad_norm": 0.9622491002082825, "learning_rate": 2.867501878602726e-05, "loss": 0.0902, "step": 9077 }, { "epoch": 0.16076717714407152, "grad_norm": 0.9646140336990356, "learning_rate": 2.867466521585485e-05, "loss": 0.0877, "step": 9078 }, { "epoch": 0.16078488668109994, "grad_norm": 0.9501488208770752, "learning_rate": 2.8674311600694178e-05, "loss": 0.1077, "step": 9079 }, { "epoch": 0.16080259621812837, "grad_norm": 0.9964986443519592, "learning_rate": 2.86739579405464e-05, "loss": 0.1104, "step": 9080 }, { "epoch": 0.1608203057551568, "grad_norm": 0.9200759530067444, "learning_rate": 2.867360423541269e-05, "loss": 0.1336, "step": 9081 }, { "epoch": 0.16083801529218522, "grad_norm": 0.9279144406318665, "learning_rate": 2.86732504852942e-05, "loss": 0.1047, "step": 9082 }, { "epoch": 0.16085572482921365, "grad_norm": 1.9875752925872803, "learning_rate": 2.86728966901921e-05, "loss": 0.1457, "step": 9083 }, { "epoch": 0.16087343436624207, "grad_norm": 0.8434213995933533, "learning_rate": 2.8672542850107553e-05, "loss": 0.0742, "step": 9084 }, { "epoch": 0.1608911439032705, "grad_norm": 2.2996647357940674, "learning_rate": 2.867218896504172e-05, "loss": 0.1132, "step": 9085 }, { "epoch": 0.16090885344029893, "grad_norm": 1.3234596252441406, "learning_rate": 2.867183503499577e-05, "loss": 0.1173, "step": 9086 }, { "epoch": 0.16092656297732735, "grad_norm": 1.8367806673049927, "learning_rate": 2.8671481059970867e-05, "loss": 0.0995, "step": 9087 }, { "epoch": 0.1609442725143558, "grad_norm": 0.6069349646568298, "learning_rate": 2.8671127039968176e-05, "loss": 0.0805, "step": 9088 }, { "epoch": 0.16096198205138423, "grad_norm": 0.7154369354248047, "learning_rate": 2.8670772974988858e-05, "loss": 0.1173, "step": 9089 }, { "epoch": 0.16097969158841266, "grad_norm": 1.012078881263733, "learning_rate": 2.867041886503408e-05, "loss": 0.1131, "step": 9090 }, { "epoch": 0.1609974011254411, "grad_norm": 1.0664232969284058, "learning_rate": 2.8670064710105003e-05, "loss": 0.0895, "step": 9091 }, { "epoch": 0.1610151106624695, "grad_norm": 1.4665898084640503, "learning_rate": 2.86697105102028e-05, "loss": 0.0977, "step": 9092 }, { "epoch": 0.16103282019949794, "grad_norm": 1.7938134670257568, "learning_rate": 2.866935626532863e-05, "loss": 0.1221, "step": 9093 }, { "epoch": 0.16105052973652637, "grad_norm": 0.8330878615379333, "learning_rate": 2.866900197548366e-05, "loss": 0.1141, "step": 9094 }, { "epoch": 0.1610682392735548, "grad_norm": 0.9133893847465515, "learning_rate": 2.8668647640669057e-05, "loss": 0.0961, "step": 9095 }, { "epoch": 0.16108594881058322, "grad_norm": 0.6222397685050964, "learning_rate": 2.8668293260885987e-05, "loss": 0.0939, "step": 9096 }, { "epoch": 0.16110365834761164, "grad_norm": 1.059372901916504, "learning_rate": 2.8667938836135615e-05, "loss": 0.1376, "step": 9097 }, { "epoch": 0.16112136788464007, "grad_norm": 1.2160978317260742, "learning_rate": 2.86675843664191e-05, "loss": 0.1148, "step": 9098 }, { "epoch": 0.1611390774216685, "grad_norm": 0.9298089146614075, "learning_rate": 2.8667229851737624e-05, "loss": 0.1118, "step": 9099 }, { "epoch": 0.16115678695869692, "grad_norm": 1.4662071466445923, "learning_rate": 2.866687529209234e-05, "loss": 0.1295, "step": 9100 }, { "epoch": 0.16117449649572535, "grad_norm": 1.1040209531784058, "learning_rate": 2.8666520687484417e-05, "loss": 0.1076, "step": 9101 }, { "epoch": 0.16119220603275378, "grad_norm": 1.192752718925476, "learning_rate": 2.8666166037915025e-05, "loss": 0.1283, "step": 9102 }, { "epoch": 0.16120991556978223, "grad_norm": 0.8240300416946411, "learning_rate": 2.8665811343385327e-05, "loss": 0.1016, "step": 9103 }, { "epoch": 0.16122762510681066, "grad_norm": 0.7285215854644775, "learning_rate": 2.8665456603896495e-05, "loss": 0.0765, "step": 9104 }, { "epoch": 0.16124533464383908, "grad_norm": 1.031366229057312, "learning_rate": 2.8665101819449692e-05, "loss": 0.1209, "step": 9105 }, { "epoch": 0.1612630441808675, "grad_norm": 0.8774465918540955, "learning_rate": 2.8664746990046088e-05, "loss": 0.0859, "step": 9106 }, { "epoch": 0.16128075371789594, "grad_norm": 0.8566232323646545, "learning_rate": 2.8664392115686843e-05, "loss": 0.127, "step": 9107 }, { "epoch": 0.16129846325492436, "grad_norm": 1.3719730377197266, "learning_rate": 2.8664037196373133e-05, "loss": 0.1462, "step": 9108 }, { "epoch": 0.1613161727919528, "grad_norm": 1.1335240602493286, "learning_rate": 2.8663682232106122e-05, "loss": 0.1384, "step": 9109 }, { "epoch": 0.16133388232898122, "grad_norm": 0.8917019963264465, "learning_rate": 2.866332722288698e-05, "loss": 0.1211, "step": 9110 }, { "epoch": 0.16135159186600964, "grad_norm": 1.04978609085083, "learning_rate": 2.8662972168716874e-05, "loss": 0.1425, "step": 9111 }, { "epoch": 0.16136930140303807, "grad_norm": 0.9995663166046143, "learning_rate": 2.866261706959697e-05, "loss": 0.0988, "step": 9112 }, { "epoch": 0.1613870109400665, "grad_norm": 1.098376989364624, "learning_rate": 2.8662261925528437e-05, "loss": 0.1238, "step": 9113 }, { "epoch": 0.16140472047709492, "grad_norm": 0.9745564460754395, "learning_rate": 2.8661906736512445e-05, "loss": 0.1138, "step": 9114 }, { "epoch": 0.16142243001412335, "grad_norm": 0.8564494848251343, "learning_rate": 2.8661551502550163e-05, "loss": 0.0911, "step": 9115 }, { "epoch": 0.16144013955115177, "grad_norm": 1.735904335975647, "learning_rate": 2.8661196223642755e-05, "loss": 0.1134, "step": 9116 }, { "epoch": 0.1614578490881802, "grad_norm": 1.0123862028121948, "learning_rate": 2.86608408997914e-05, "loss": 0.1167, "step": 9117 }, { "epoch": 0.16147555862520865, "grad_norm": 0.9921805262565613, "learning_rate": 2.8660485530997253e-05, "loss": 0.0963, "step": 9118 }, { "epoch": 0.16149326816223708, "grad_norm": 0.7508294582366943, "learning_rate": 2.8660130117261495e-05, "loss": 0.0946, "step": 9119 }, { "epoch": 0.1615109776992655, "grad_norm": 0.6794629693031311, "learning_rate": 2.8659774658585286e-05, "loss": 0.1218, "step": 9120 }, { "epoch": 0.16152868723629393, "grad_norm": 1.2569478750228882, "learning_rate": 2.86594191549698e-05, "loss": 0.1311, "step": 9121 }, { "epoch": 0.16154639677332236, "grad_norm": 1.5613343715667725, "learning_rate": 2.865906360641621e-05, "loss": 0.0874, "step": 9122 }, { "epoch": 0.16156410631035079, "grad_norm": 1.387853980064392, "learning_rate": 2.865870801292568e-05, "loss": 0.1097, "step": 9123 }, { "epoch": 0.1615818158473792, "grad_norm": 0.9066696166992188, "learning_rate": 2.8658352374499384e-05, "loss": 0.0859, "step": 9124 }, { "epoch": 0.16159952538440764, "grad_norm": 2.219534158706665, "learning_rate": 2.865799669113849e-05, "loss": 0.1597, "step": 9125 }, { "epoch": 0.16161723492143606, "grad_norm": 0.9247276782989502, "learning_rate": 2.8657640962844168e-05, "loss": 0.1074, "step": 9126 }, { "epoch": 0.1616349444584645, "grad_norm": 1.060194492340088, "learning_rate": 2.865728518961759e-05, "loss": 0.1045, "step": 9127 }, { "epoch": 0.16165265399549292, "grad_norm": 1.8291618824005127, "learning_rate": 2.8656929371459922e-05, "loss": 0.1767, "step": 9128 }, { "epoch": 0.16167036353252134, "grad_norm": 0.7641096711158752, "learning_rate": 2.8656573508372338e-05, "loss": 0.0872, "step": 9129 }, { "epoch": 0.16168807306954977, "grad_norm": 1.5606485605239868, "learning_rate": 2.865621760035601e-05, "loss": 0.1146, "step": 9130 }, { "epoch": 0.1617057826065782, "grad_norm": 0.7735611796379089, "learning_rate": 2.865586164741211e-05, "loss": 0.0834, "step": 9131 }, { "epoch": 0.16172349214360662, "grad_norm": 0.8398951292037964, "learning_rate": 2.86555056495418e-05, "loss": 0.1123, "step": 9132 }, { "epoch": 0.16174120168063508, "grad_norm": 0.9111669659614563, "learning_rate": 2.865514960674626e-05, "loss": 0.1091, "step": 9133 }, { "epoch": 0.1617589112176635, "grad_norm": 1.4307223558425903, "learning_rate": 2.865479351902666e-05, "loss": 0.1101, "step": 9134 }, { "epoch": 0.16177662075469193, "grad_norm": 1.1079630851745605, "learning_rate": 2.865443738638417e-05, "loss": 0.1181, "step": 9135 }, { "epoch": 0.16179433029172036, "grad_norm": 1.175911784172058, "learning_rate": 2.8654081208819963e-05, "loss": 0.1326, "step": 9136 }, { "epoch": 0.16181203982874878, "grad_norm": 0.8255929350852966, "learning_rate": 2.865372498633521e-05, "loss": 0.1071, "step": 9137 }, { "epoch": 0.1618297493657772, "grad_norm": 0.7810240387916565, "learning_rate": 2.8653368718931084e-05, "loss": 0.0788, "step": 9138 }, { "epoch": 0.16184745890280564, "grad_norm": 1.1588460206985474, "learning_rate": 2.865301240660875e-05, "loss": 0.1057, "step": 9139 }, { "epoch": 0.16186516843983406, "grad_norm": 0.8213565945625305, "learning_rate": 2.8652656049369394e-05, "loss": 0.1517, "step": 9140 }, { "epoch": 0.1618828779768625, "grad_norm": 1.393896460533142, "learning_rate": 2.865229964721417e-05, "loss": 0.1631, "step": 9141 }, { "epoch": 0.16190058751389091, "grad_norm": 2.8791749477386475, "learning_rate": 2.865194320014427e-05, "loss": 0.1259, "step": 9142 }, { "epoch": 0.16191829705091934, "grad_norm": 1.8584709167480469, "learning_rate": 2.8651586708160856e-05, "loss": 0.0962, "step": 9143 }, { "epoch": 0.16193600658794777, "grad_norm": 1.9803904294967651, "learning_rate": 2.86512301712651e-05, "loss": 0.1382, "step": 9144 }, { "epoch": 0.1619537161249762, "grad_norm": 1.2186577320098877, "learning_rate": 2.8650873589458183e-05, "loss": 0.1212, "step": 9145 }, { "epoch": 0.16197142566200462, "grad_norm": 1.4854426383972168, "learning_rate": 2.8650516962741263e-05, "loss": 0.118, "step": 9146 }, { "epoch": 0.16198913519903305, "grad_norm": 0.9941027164459229, "learning_rate": 2.865016029111553e-05, "loss": 0.092, "step": 9147 }, { "epoch": 0.1620068447360615, "grad_norm": 0.9247725605964661, "learning_rate": 2.864980357458215e-05, "loss": 0.1189, "step": 9148 }, { "epoch": 0.16202455427308993, "grad_norm": 0.9369238615036011, "learning_rate": 2.8649446813142295e-05, "loss": 0.0992, "step": 9149 }, { "epoch": 0.16204226381011835, "grad_norm": 1.6432660818099976, "learning_rate": 2.864909000679714e-05, "loss": 0.1214, "step": 9150 }, { "epoch": 0.16205997334714678, "grad_norm": 0.8410406708717346, "learning_rate": 2.8648733155547863e-05, "loss": 0.1212, "step": 9151 }, { "epoch": 0.1620776828841752, "grad_norm": 1.0343296527862549, "learning_rate": 2.864837625939563e-05, "loss": 0.1232, "step": 9152 }, { "epoch": 0.16209539242120363, "grad_norm": 1.579839825630188, "learning_rate": 2.8648019318341622e-05, "loss": 0.144, "step": 9153 }, { "epoch": 0.16211310195823206, "grad_norm": 1.0982282161712646, "learning_rate": 2.864766233238701e-05, "loss": 0.1026, "step": 9154 }, { "epoch": 0.16213081149526049, "grad_norm": 1.2579096555709839, "learning_rate": 2.864730530153297e-05, "loss": 0.1051, "step": 9155 }, { "epoch": 0.1621485210322889, "grad_norm": 1.671416997909546, "learning_rate": 2.8646948225780674e-05, "loss": 0.1308, "step": 9156 }, { "epoch": 0.16216623056931734, "grad_norm": 2.354339838027954, "learning_rate": 2.86465911051313e-05, "loss": 0.1216, "step": 9157 }, { "epoch": 0.16218394010634576, "grad_norm": 0.8162134289741516, "learning_rate": 2.8646233939586022e-05, "loss": 0.0923, "step": 9158 }, { "epoch": 0.1622016496433742, "grad_norm": 1.1057168245315552, "learning_rate": 2.8645876729146016e-05, "loss": 0.14, "step": 9159 }, { "epoch": 0.16221935918040262, "grad_norm": 0.8052575588226318, "learning_rate": 2.8645519473812454e-05, "loss": 0.1171, "step": 9160 }, { "epoch": 0.16223706871743104, "grad_norm": 0.7127360701560974, "learning_rate": 2.864516217358651e-05, "loss": 0.1166, "step": 9161 }, { "epoch": 0.16225477825445947, "grad_norm": 1.4172190427780151, "learning_rate": 2.8644804828469368e-05, "loss": 0.1346, "step": 9162 }, { "epoch": 0.16227248779148792, "grad_norm": 1.0952811241149902, "learning_rate": 2.8644447438462195e-05, "loss": 0.1426, "step": 9163 }, { "epoch": 0.16229019732851635, "grad_norm": 1.1682454347610474, "learning_rate": 2.864409000356617e-05, "loss": 0.179, "step": 9164 }, { "epoch": 0.16230790686554478, "grad_norm": 0.8915332555770874, "learning_rate": 2.864373252378247e-05, "loss": 0.128, "step": 9165 }, { "epoch": 0.1623256164025732, "grad_norm": 1.2032943964004517, "learning_rate": 2.864337499911227e-05, "loss": 0.1275, "step": 9166 }, { "epoch": 0.16234332593960163, "grad_norm": 1.385344386100769, "learning_rate": 2.8643017429556747e-05, "loss": 0.1145, "step": 9167 }, { "epoch": 0.16236103547663006, "grad_norm": 1.2651358842849731, "learning_rate": 2.8642659815117075e-05, "loss": 0.0631, "step": 9168 }, { "epoch": 0.16237874501365848, "grad_norm": 1.295870065689087, "learning_rate": 2.8642302155794435e-05, "loss": 0.1318, "step": 9169 }, { "epoch": 0.1623964545506869, "grad_norm": 0.9861395359039307, "learning_rate": 2.8641944451589996e-05, "loss": 0.1236, "step": 9170 }, { "epoch": 0.16241416408771533, "grad_norm": 0.9669197201728821, "learning_rate": 2.8641586702504944e-05, "loss": 0.1067, "step": 9171 }, { "epoch": 0.16243187362474376, "grad_norm": 0.939058244228363, "learning_rate": 2.8641228908540447e-05, "loss": 0.1426, "step": 9172 }, { "epoch": 0.1624495831617722, "grad_norm": 0.8428022861480713, "learning_rate": 2.864087106969769e-05, "loss": 0.0883, "step": 9173 }, { "epoch": 0.1624672926988006, "grad_norm": 1.076140284538269, "learning_rate": 2.8640513185977844e-05, "loss": 0.0831, "step": 9174 }, { "epoch": 0.16248500223582904, "grad_norm": 0.8210229873657227, "learning_rate": 2.8640155257382095e-05, "loss": 0.1028, "step": 9175 }, { "epoch": 0.16250271177285747, "grad_norm": 1.5066826343536377, "learning_rate": 2.863979728391161e-05, "loss": 0.144, "step": 9176 }, { "epoch": 0.1625204213098859, "grad_norm": 1.4454586505889893, "learning_rate": 2.8639439265567572e-05, "loss": 0.1489, "step": 9177 }, { "epoch": 0.16253813084691435, "grad_norm": 0.9576887488365173, "learning_rate": 2.8639081202351156e-05, "loss": 0.1474, "step": 9178 }, { "epoch": 0.16255584038394277, "grad_norm": 0.9081063270568848, "learning_rate": 2.8638723094263546e-05, "loss": 0.1299, "step": 9179 }, { "epoch": 0.1625735499209712, "grad_norm": 1.273709774017334, "learning_rate": 2.863836494130592e-05, "loss": 0.1199, "step": 9180 }, { "epoch": 0.16259125945799963, "grad_norm": 1.0491992235183716, "learning_rate": 2.8638006743479445e-05, "loss": 0.1093, "step": 9181 }, { "epoch": 0.16260896899502805, "grad_norm": 1.583174705505371, "learning_rate": 2.863764850078531e-05, "loss": 0.0976, "step": 9182 }, { "epoch": 0.16262667853205648, "grad_norm": 1.0550602674484253, "learning_rate": 2.8637290213224692e-05, "loss": 0.0768, "step": 9183 }, { "epoch": 0.1626443880690849, "grad_norm": 0.49589046835899353, "learning_rate": 2.863693188079877e-05, "loss": 0.0844, "step": 9184 }, { "epoch": 0.16266209760611333, "grad_norm": 1.4098198413848877, "learning_rate": 2.8636573503508716e-05, "loss": 0.1348, "step": 9185 }, { "epoch": 0.16267980714314176, "grad_norm": 0.6699489951133728, "learning_rate": 2.863621508135572e-05, "loss": 0.1384, "step": 9186 }, { "epoch": 0.16269751668017018, "grad_norm": 0.8382313847541809, "learning_rate": 2.8635856614340953e-05, "loss": 0.1111, "step": 9187 }, { "epoch": 0.1627152262171986, "grad_norm": 2.765002965927124, "learning_rate": 2.8635498102465598e-05, "loss": 0.1291, "step": 9188 }, { "epoch": 0.16273293575422704, "grad_norm": 0.6878914833068848, "learning_rate": 2.8635139545730832e-05, "loss": 0.0911, "step": 9189 }, { "epoch": 0.16275064529125546, "grad_norm": 0.9770885109901428, "learning_rate": 2.8634780944137835e-05, "loss": 0.1383, "step": 9190 }, { "epoch": 0.1627683548282839, "grad_norm": 1.1862794160842896, "learning_rate": 2.863442229768779e-05, "loss": 0.1046, "step": 9191 }, { "epoch": 0.16278606436531232, "grad_norm": 1.1254479885101318, "learning_rate": 2.8634063606381878e-05, "loss": 0.1187, "step": 9192 }, { "epoch": 0.16280377390234077, "grad_norm": 0.7166727781295776, "learning_rate": 2.8633704870221268e-05, "loss": 0.0829, "step": 9193 }, { "epoch": 0.1628214834393692, "grad_norm": 1.1134135723114014, "learning_rate": 2.8633346089207153e-05, "loss": 0.0846, "step": 9194 }, { "epoch": 0.16283919297639762, "grad_norm": 0.9561274647712708, "learning_rate": 2.8632987263340706e-05, "loss": 0.1327, "step": 9195 }, { "epoch": 0.16285690251342605, "grad_norm": 1.4563599824905396, "learning_rate": 2.8632628392623116e-05, "loss": 0.1179, "step": 9196 }, { "epoch": 0.16287461205045448, "grad_norm": 1.2067253589630127, "learning_rate": 2.8632269477055548e-05, "loss": 0.0858, "step": 9197 }, { "epoch": 0.1628923215874829, "grad_norm": 1.0570532083511353, "learning_rate": 2.8631910516639198e-05, "loss": 0.1271, "step": 9198 }, { "epoch": 0.16291003112451133, "grad_norm": 1.1587328910827637, "learning_rate": 2.863155151137524e-05, "loss": 0.1235, "step": 9199 }, { "epoch": 0.16292774066153975, "grad_norm": 1.1108366250991821, "learning_rate": 2.8631192461264855e-05, "loss": 0.1286, "step": 9200 }, { "epoch": 0.16294545019856818, "grad_norm": 0.7956637740135193, "learning_rate": 2.8630833366309224e-05, "loss": 0.0789, "step": 9201 }, { "epoch": 0.1629631597355966, "grad_norm": 1.123699426651001, "learning_rate": 2.8630474226509538e-05, "loss": 0.1167, "step": 9202 }, { "epoch": 0.16298086927262503, "grad_norm": 1.7528396844863892, "learning_rate": 2.8630115041866965e-05, "loss": 0.1823, "step": 9203 }, { "epoch": 0.16299857880965346, "grad_norm": 1.0907368659973145, "learning_rate": 2.862975581238269e-05, "loss": 0.0907, "step": 9204 }, { "epoch": 0.1630162883466819, "grad_norm": 1.1248400211334229, "learning_rate": 2.8629396538057903e-05, "loss": 0.0958, "step": 9205 }, { "epoch": 0.1630339978837103, "grad_norm": 0.7798048853874207, "learning_rate": 2.8629037218893772e-05, "loss": 0.0963, "step": 9206 }, { "epoch": 0.16305170742073874, "grad_norm": 0.9573701620101929, "learning_rate": 2.8628677854891488e-05, "loss": 0.1038, "step": 9207 }, { "epoch": 0.1630694169577672, "grad_norm": 0.8754021525382996, "learning_rate": 2.862831844605224e-05, "loss": 0.1375, "step": 9208 }, { "epoch": 0.16308712649479562, "grad_norm": 1.194701075553894, "learning_rate": 2.8627958992377194e-05, "loss": 0.1255, "step": 9209 }, { "epoch": 0.16310483603182405, "grad_norm": 1.057397484779358, "learning_rate": 2.8627599493867548e-05, "loss": 0.1143, "step": 9210 }, { "epoch": 0.16312254556885247, "grad_norm": 1.3754467964172363, "learning_rate": 2.8627239950524476e-05, "loss": 0.098, "step": 9211 }, { "epoch": 0.1631402551058809, "grad_norm": 1.3238075971603394, "learning_rate": 2.8626880362349162e-05, "loss": 0.1388, "step": 9212 }, { "epoch": 0.16315796464290933, "grad_norm": 0.7331779599189758, "learning_rate": 2.862652072934279e-05, "loss": 0.1067, "step": 9213 }, { "epoch": 0.16317567417993775, "grad_norm": 1.0134047269821167, "learning_rate": 2.8626161051506543e-05, "loss": 0.1549, "step": 9214 }, { "epoch": 0.16319338371696618, "grad_norm": 0.9108197093009949, "learning_rate": 2.8625801328841604e-05, "loss": 0.1427, "step": 9215 }, { "epoch": 0.1632110932539946, "grad_norm": 1.0100276470184326, "learning_rate": 2.862544156134916e-05, "loss": 0.1275, "step": 9216 }, { "epoch": 0.16322880279102303, "grad_norm": 0.9628457427024841, "learning_rate": 2.8625081749030388e-05, "loss": 0.0985, "step": 9217 }, { "epoch": 0.16324651232805146, "grad_norm": 1.090881586074829, "learning_rate": 2.8624721891886474e-05, "loss": 0.1626, "step": 9218 }, { "epoch": 0.16326422186507988, "grad_norm": 0.9345003962516785, "learning_rate": 2.8624361989918606e-05, "loss": 0.1218, "step": 9219 }, { "epoch": 0.1632819314021083, "grad_norm": 0.9210751056671143, "learning_rate": 2.8624002043127964e-05, "loss": 0.0993, "step": 9220 }, { "epoch": 0.16329964093913674, "grad_norm": 1.0853172540664673, "learning_rate": 2.8623642051515734e-05, "loss": 0.1312, "step": 9221 }, { "epoch": 0.1633173504761652, "grad_norm": 0.9510403871536255, "learning_rate": 2.86232820150831e-05, "loss": 0.106, "step": 9222 }, { "epoch": 0.16333506001319362, "grad_norm": 0.9836308360099792, "learning_rate": 2.8622921933831243e-05, "loss": 0.1287, "step": 9223 }, { "epoch": 0.16335276955022204, "grad_norm": 0.8256052732467651, "learning_rate": 2.862256180776135e-05, "loss": 0.1038, "step": 9224 }, { "epoch": 0.16337047908725047, "grad_norm": 1.1529450416564941, "learning_rate": 2.862220163687461e-05, "loss": 0.0968, "step": 9225 }, { "epoch": 0.1633881886242789, "grad_norm": 1.429059624671936, "learning_rate": 2.8621841421172205e-05, "loss": 0.1315, "step": 9226 }, { "epoch": 0.16340589816130732, "grad_norm": 0.9821475148200989, "learning_rate": 2.8621481160655318e-05, "loss": 0.1262, "step": 9227 }, { "epoch": 0.16342360769833575, "grad_norm": 0.7740317583084106, "learning_rate": 2.8621120855325133e-05, "loss": 0.086, "step": 9228 }, { "epoch": 0.16344131723536418, "grad_norm": 0.844265878200531, "learning_rate": 2.8620760505182843e-05, "loss": 0.0997, "step": 9229 }, { "epoch": 0.1634590267723926, "grad_norm": 1.0109556913375854, "learning_rate": 2.8620400110229627e-05, "loss": 0.1089, "step": 9230 }, { "epoch": 0.16347673630942103, "grad_norm": 0.7903071641921997, "learning_rate": 2.862003967046667e-05, "loss": 0.0864, "step": 9231 }, { "epoch": 0.16349444584644945, "grad_norm": 0.8748589158058167, "learning_rate": 2.8619679185895166e-05, "loss": 0.1629, "step": 9232 }, { "epoch": 0.16351215538347788, "grad_norm": 0.7796515822410583, "learning_rate": 2.8619318656516286e-05, "loss": 0.1105, "step": 9233 }, { "epoch": 0.1635298649205063, "grad_norm": 1.2671830654144287, "learning_rate": 2.861895808233123e-05, "loss": 0.1298, "step": 9234 }, { "epoch": 0.16354757445753473, "grad_norm": 0.9025423526763916, "learning_rate": 2.861859746334118e-05, "loss": 0.1429, "step": 9235 }, { "epoch": 0.16356528399456316, "grad_norm": 0.8281335830688477, "learning_rate": 2.8618236799547323e-05, "loss": 0.1277, "step": 9236 }, { "epoch": 0.16358299353159161, "grad_norm": 1.988653302192688, "learning_rate": 2.861787609095084e-05, "loss": 0.1204, "step": 9237 }, { "epoch": 0.16360070306862004, "grad_norm": 1.3175482749938965, "learning_rate": 2.861751533755293e-05, "loss": 0.1482, "step": 9238 }, { "epoch": 0.16361841260564847, "grad_norm": 1.2081764936447144, "learning_rate": 2.8617154539354763e-05, "loss": 0.1138, "step": 9239 }, { "epoch": 0.1636361221426769, "grad_norm": 0.9397351145744324, "learning_rate": 2.8616793696357537e-05, "loss": 0.1221, "step": 9240 }, { "epoch": 0.16365383167970532, "grad_norm": 0.6469448208808899, "learning_rate": 2.861643280856244e-05, "loss": 0.1127, "step": 9241 }, { "epoch": 0.16367154121673375, "grad_norm": 0.891141414642334, "learning_rate": 2.8616071875970655e-05, "loss": 0.0954, "step": 9242 }, { "epoch": 0.16368925075376217, "grad_norm": 1.3075486421585083, "learning_rate": 2.8615710898583368e-05, "loss": 0.0933, "step": 9243 }, { "epoch": 0.1637069602907906, "grad_norm": 1.1624454259872437, "learning_rate": 2.861534987640177e-05, "loss": 0.138, "step": 9244 }, { "epoch": 0.16372466982781902, "grad_norm": 1.0649138689041138, "learning_rate": 2.8614988809427053e-05, "loss": 0.1413, "step": 9245 }, { "epoch": 0.16374237936484745, "grad_norm": 0.7546588182449341, "learning_rate": 2.8614627697660397e-05, "loss": 0.1276, "step": 9246 }, { "epoch": 0.16376008890187588, "grad_norm": 1.0152497291564941, "learning_rate": 2.861426654110299e-05, "loss": 0.1079, "step": 9247 }, { "epoch": 0.1637777984389043, "grad_norm": 0.7242705821990967, "learning_rate": 2.8613905339756022e-05, "loss": 0.0876, "step": 9248 }, { "epoch": 0.16379550797593273, "grad_norm": 1.258628010749817, "learning_rate": 2.861354409362069e-05, "loss": 0.1471, "step": 9249 }, { "epoch": 0.16381321751296116, "grad_norm": 0.9735628962516785, "learning_rate": 2.861318280269817e-05, "loss": 0.1183, "step": 9250 }, { "epoch": 0.16383092704998958, "grad_norm": 0.9087284803390503, "learning_rate": 2.861282146698966e-05, "loss": 0.0781, "step": 9251 }, { "epoch": 0.16384863658701804, "grad_norm": 1.0259112119674683, "learning_rate": 2.8612460086496336e-05, "loss": 0.1286, "step": 9252 }, { "epoch": 0.16386634612404646, "grad_norm": 1.3002732992172241, "learning_rate": 2.8612098661219402e-05, "loss": 0.1138, "step": 9253 }, { "epoch": 0.1638840556610749, "grad_norm": 1.0554335117340088, "learning_rate": 2.8611737191160036e-05, "loss": 0.0834, "step": 9254 }, { "epoch": 0.16390176519810332, "grad_norm": 0.999397873878479, "learning_rate": 2.8611375676319434e-05, "loss": 0.1114, "step": 9255 }, { "epoch": 0.16391947473513174, "grad_norm": 1.072529911994934, "learning_rate": 2.861101411669878e-05, "loss": 0.1324, "step": 9256 }, { "epoch": 0.16393718427216017, "grad_norm": 1.0151889324188232, "learning_rate": 2.8610652512299268e-05, "loss": 0.123, "step": 9257 }, { "epoch": 0.1639548938091886, "grad_norm": 1.6198410987854004, "learning_rate": 2.8610290863122086e-05, "loss": 0.1067, "step": 9258 }, { "epoch": 0.16397260334621702, "grad_norm": 0.7965756058692932, "learning_rate": 2.8609929169168425e-05, "loss": 0.1105, "step": 9259 }, { "epoch": 0.16399031288324545, "grad_norm": 0.8170693516731262, "learning_rate": 2.8609567430439468e-05, "loss": 0.1057, "step": 9260 }, { "epoch": 0.16400802242027387, "grad_norm": 1.0161792039871216, "learning_rate": 2.8609205646936417e-05, "loss": 0.142, "step": 9261 }, { "epoch": 0.1640257319573023, "grad_norm": 1.2276469469070435, "learning_rate": 2.8608843818660454e-05, "loss": 0.1425, "step": 9262 }, { "epoch": 0.16404344149433073, "grad_norm": 0.9068129062652588, "learning_rate": 2.8608481945612773e-05, "loss": 0.1287, "step": 9263 }, { "epoch": 0.16406115103135915, "grad_norm": 0.600382387638092, "learning_rate": 2.860812002779456e-05, "loss": 0.1044, "step": 9264 }, { "epoch": 0.16407886056838758, "grad_norm": 1.1452856063842773, "learning_rate": 2.8607758065207008e-05, "loss": 0.089, "step": 9265 }, { "epoch": 0.164096570105416, "grad_norm": 1.3324939012527466, "learning_rate": 2.860739605785131e-05, "loss": 0.0975, "step": 9266 }, { "epoch": 0.16411427964244446, "grad_norm": 1.0359368324279785, "learning_rate": 2.8607034005728655e-05, "loss": 0.1046, "step": 9267 }, { "epoch": 0.1641319891794729, "grad_norm": 1.2080621719360352, "learning_rate": 2.8606671908840232e-05, "loss": 0.1152, "step": 9268 }, { "epoch": 0.1641496987165013, "grad_norm": 1.0277786254882812, "learning_rate": 2.860630976718724e-05, "loss": 0.1109, "step": 9269 }, { "epoch": 0.16416740825352974, "grad_norm": 1.1151376962661743, "learning_rate": 2.860594758077086e-05, "loss": 0.1116, "step": 9270 }, { "epoch": 0.16418511779055817, "grad_norm": 0.8639999032020569, "learning_rate": 2.860558534959229e-05, "loss": 0.1067, "step": 9271 }, { "epoch": 0.1642028273275866, "grad_norm": 1.0964627265930176, "learning_rate": 2.8605223073652725e-05, "loss": 0.1027, "step": 9272 }, { "epoch": 0.16422053686461502, "grad_norm": 0.9851518869400024, "learning_rate": 2.8604860752953345e-05, "loss": 0.1173, "step": 9273 }, { "epoch": 0.16423824640164345, "grad_norm": 0.9228296875953674, "learning_rate": 2.8604498387495352e-05, "loss": 0.1225, "step": 9274 }, { "epoch": 0.16425595593867187, "grad_norm": 0.6373488903045654, "learning_rate": 2.8604135977279938e-05, "loss": 0.1039, "step": 9275 }, { "epoch": 0.1642736654757003, "grad_norm": 0.6938695907592773, "learning_rate": 2.8603773522308286e-05, "loss": 0.1155, "step": 9276 }, { "epoch": 0.16429137501272872, "grad_norm": 1.0506190061569214, "learning_rate": 2.86034110225816e-05, "loss": 0.1003, "step": 9277 }, { "epoch": 0.16430908454975715, "grad_norm": 0.5911566019058228, "learning_rate": 2.8603048478101067e-05, "loss": 0.0741, "step": 9278 }, { "epoch": 0.16432679408678558, "grad_norm": 0.7358670830726624, "learning_rate": 2.8602685888867883e-05, "loss": 0.1193, "step": 9279 }, { "epoch": 0.164344503623814, "grad_norm": 0.7760373950004578, "learning_rate": 2.8602323254883237e-05, "loss": 0.1289, "step": 9280 }, { "epoch": 0.16436221316084243, "grad_norm": 1.4819377660751343, "learning_rate": 2.860196057614832e-05, "loss": 0.1089, "step": 9281 }, { "epoch": 0.16437992269787088, "grad_norm": 0.8275805711746216, "learning_rate": 2.860159785266433e-05, "loss": 0.1192, "step": 9282 }, { "epoch": 0.1643976322348993, "grad_norm": 1.0788522958755493, "learning_rate": 2.8601235084432457e-05, "loss": 0.1202, "step": 9283 }, { "epoch": 0.16441534177192774, "grad_norm": 0.9656455516815186, "learning_rate": 2.86008722714539e-05, "loss": 0.0771, "step": 9284 }, { "epoch": 0.16443305130895616, "grad_norm": 1.2432712316513062, "learning_rate": 2.8600509413729848e-05, "loss": 0.116, "step": 9285 }, { "epoch": 0.1644507608459846, "grad_norm": 0.8282486796379089, "learning_rate": 2.860014651126149e-05, "loss": 0.1479, "step": 9286 }, { "epoch": 0.16446847038301302, "grad_norm": 0.6000875234603882, "learning_rate": 2.859978356405003e-05, "loss": 0.0773, "step": 9287 }, { "epoch": 0.16448617992004144, "grad_norm": 1.5017822980880737, "learning_rate": 2.859942057209666e-05, "loss": 0.0957, "step": 9288 }, { "epoch": 0.16450388945706987, "grad_norm": 1.1919621229171753, "learning_rate": 2.859905753540257e-05, "loss": 0.0858, "step": 9289 }, { "epoch": 0.1645215989940983, "grad_norm": 1.0423731803894043, "learning_rate": 2.859869445396896e-05, "loss": 0.0762, "step": 9290 }, { "epoch": 0.16453930853112672, "grad_norm": 1.419262170791626, "learning_rate": 2.8598331327797012e-05, "loss": 0.1415, "step": 9291 }, { "epoch": 0.16455701806815515, "grad_norm": 1.163309931755066, "learning_rate": 2.8597968156887936e-05, "loss": 0.0915, "step": 9292 }, { "epoch": 0.16457472760518357, "grad_norm": 1.086765170097351, "learning_rate": 2.8597604941242916e-05, "loss": 0.0666, "step": 9293 }, { "epoch": 0.164592437142212, "grad_norm": 1.6178739070892334, "learning_rate": 2.8597241680863155e-05, "loss": 0.1148, "step": 9294 }, { "epoch": 0.16461014667924043, "grad_norm": 1.1743813753128052, "learning_rate": 2.859687837574984e-05, "loss": 0.1032, "step": 9295 }, { "epoch": 0.16462785621626885, "grad_norm": 1.1107269525527954, "learning_rate": 2.8596515025904172e-05, "loss": 0.1285, "step": 9296 }, { "epoch": 0.1646455657532973, "grad_norm": 0.7496302723884583, "learning_rate": 2.859615163132735e-05, "loss": 0.105, "step": 9297 }, { "epoch": 0.16466327529032573, "grad_norm": 1.2228131294250488, "learning_rate": 2.8595788192020555e-05, "loss": 0.1176, "step": 9298 }, { "epoch": 0.16468098482735416, "grad_norm": 1.4409202337265015, "learning_rate": 2.8595424707984997e-05, "loss": 0.1279, "step": 9299 }, { "epoch": 0.1646986943643826, "grad_norm": 0.8861536383628845, "learning_rate": 2.8595061179221872e-05, "loss": 0.0839, "step": 9300 }, { "epoch": 0.164716403901411, "grad_norm": 1.174979567527771, "learning_rate": 2.8594697605732364e-05, "loss": 0.1027, "step": 9301 }, { "epoch": 0.16473411343843944, "grad_norm": 1.0407785177230835, "learning_rate": 2.8594333987517673e-05, "loss": 0.1254, "step": 9302 }, { "epoch": 0.16475182297546787, "grad_norm": 1.1267774105072021, "learning_rate": 2.8593970324579003e-05, "loss": 0.1184, "step": 9303 }, { "epoch": 0.1647695325124963, "grad_norm": 1.419071912765503, "learning_rate": 2.8593606616917548e-05, "loss": 0.1268, "step": 9304 }, { "epoch": 0.16478724204952472, "grad_norm": 1.0418411493301392, "learning_rate": 2.85932428645345e-05, "loss": 0.1114, "step": 9305 }, { "epoch": 0.16480495158655314, "grad_norm": 0.7251076698303223, "learning_rate": 2.8592879067431054e-05, "loss": 0.0673, "step": 9306 }, { "epoch": 0.16482266112358157, "grad_norm": 0.9124817252159119, "learning_rate": 2.8592515225608414e-05, "loss": 0.0753, "step": 9307 }, { "epoch": 0.16484037066061, "grad_norm": 0.8289561867713928, "learning_rate": 2.8592151339067773e-05, "loss": 0.1054, "step": 9308 }, { "epoch": 0.16485808019763842, "grad_norm": 1.1639724969863892, "learning_rate": 2.859178740781033e-05, "loss": 0.1291, "step": 9309 }, { "epoch": 0.16487578973466685, "grad_norm": 0.9421271681785583, "learning_rate": 2.859142343183728e-05, "loss": 0.1073, "step": 9310 }, { "epoch": 0.16489349927169528, "grad_norm": 0.9799255728721619, "learning_rate": 2.8591059411149822e-05, "loss": 0.1431, "step": 9311 }, { "epoch": 0.16491120880872373, "grad_norm": 1.3481212854385376, "learning_rate": 2.8590695345749152e-05, "loss": 0.1073, "step": 9312 }, { "epoch": 0.16492891834575216, "grad_norm": 1.0571094751358032, "learning_rate": 2.859033123563647e-05, "loss": 0.0752, "step": 9313 }, { "epoch": 0.16494662788278058, "grad_norm": 0.9598175287246704, "learning_rate": 2.8589967080812973e-05, "loss": 0.1125, "step": 9314 }, { "epoch": 0.164964337419809, "grad_norm": 1.0034610033035278, "learning_rate": 2.858960288127986e-05, "loss": 0.1133, "step": 9315 }, { "epoch": 0.16498204695683744, "grad_norm": 0.942436158657074, "learning_rate": 2.8589238637038326e-05, "loss": 0.0977, "step": 9316 }, { "epoch": 0.16499975649386586, "grad_norm": 1.003061056137085, "learning_rate": 2.8588874348089574e-05, "loss": 0.1093, "step": 9317 }, { "epoch": 0.1650174660308943, "grad_norm": 1.2934787273406982, "learning_rate": 2.8588510014434793e-05, "loss": 0.1051, "step": 9318 }, { "epoch": 0.16503517556792272, "grad_norm": 0.8857508301734924, "learning_rate": 2.8588145636075195e-05, "loss": 0.1206, "step": 9319 }, { "epoch": 0.16505288510495114, "grad_norm": 1.6899943351745605, "learning_rate": 2.858778121301197e-05, "loss": 0.0822, "step": 9320 }, { "epoch": 0.16507059464197957, "grad_norm": 1.6163074970245361, "learning_rate": 2.8587416745246323e-05, "loss": 0.1172, "step": 9321 }, { "epoch": 0.165088304179008, "grad_norm": 0.7429532408714294, "learning_rate": 2.8587052232779445e-05, "loss": 0.1071, "step": 9322 }, { "epoch": 0.16510601371603642, "grad_norm": 1.5770366191864014, "learning_rate": 2.8586687675612537e-05, "loss": 0.1565, "step": 9323 }, { "epoch": 0.16512372325306485, "grad_norm": 1.269486427307129, "learning_rate": 2.8586323073746806e-05, "loss": 0.1338, "step": 9324 }, { "epoch": 0.16514143279009327, "grad_norm": 0.7520358562469482, "learning_rate": 2.8585958427183447e-05, "loss": 0.1139, "step": 9325 }, { "epoch": 0.1651591423271217, "grad_norm": 1.985024333000183, "learning_rate": 2.8585593735923655e-05, "loss": 0.1306, "step": 9326 }, { "epoch": 0.16517685186415015, "grad_norm": 1.6153870820999146, "learning_rate": 2.8585228999968634e-05, "loss": 0.0971, "step": 9327 }, { "epoch": 0.16519456140117858, "grad_norm": 0.9367659687995911, "learning_rate": 2.8584864219319584e-05, "loss": 0.1272, "step": 9328 }, { "epoch": 0.165212270938207, "grad_norm": 0.9275274276733398, "learning_rate": 2.858449939397771e-05, "loss": 0.0974, "step": 9329 }, { "epoch": 0.16522998047523543, "grad_norm": 1.3828965425491333, "learning_rate": 2.85841345239442e-05, "loss": 0.1163, "step": 9330 }, { "epoch": 0.16524769001226386, "grad_norm": 1.272786021232605, "learning_rate": 2.8583769609220262e-05, "loss": 0.1048, "step": 9331 }, { "epoch": 0.16526539954929229, "grad_norm": 0.7913694977760315, "learning_rate": 2.8583404649807098e-05, "loss": 0.1086, "step": 9332 }, { "epoch": 0.1652831090863207, "grad_norm": 0.803305447101593, "learning_rate": 2.8583039645705905e-05, "loss": 0.0898, "step": 9333 }, { "epoch": 0.16530081862334914, "grad_norm": 0.9217231273651123, "learning_rate": 2.8582674596917886e-05, "loss": 0.0932, "step": 9334 }, { "epoch": 0.16531852816037756, "grad_norm": 0.9668385982513428, "learning_rate": 2.8582309503444244e-05, "loss": 0.1029, "step": 9335 }, { "epoch": 0.165336237697406, "grad_norm": 1.1493542194366455, "learning_rate": 2.8581944365286176e-05, "loss": 0.1088, "step": 9336 }, { "epoch": 0.16535394723443442, "grad_norm": 0.9007049798965454, "learning_rate": 2.858157918244488e-05, "loss": 0.1554, "step": 9337 }, { "epoch": 0.16537165677146284, "grad_norm": 1.4275376796722412, "learning_rate": 2.858121395492157e-05, "loss": 0.1205, "step": 9338 }, { "epoch": 0.16538936630849127, "grad_norm": 0.9730335474014282, "learning_rate": 2.8580848682717433e-05, "loss": 0.1407, "step": 9339 }, { "epoch": 0.1654070758455197, "grad_norm": 1.001637578010559, "learning_rate": 2.8580483365833684e-05, "loss": 0.1343, "step": 9340 }, { "epoch": 0.16542478538254812, "grad_norm": 1.0291728973388672, "learning_rate": 2.8580118004271516e-05, "loss": 0.1194, "step": 9341 }, { "epoch": 0.16544249491957658, "grad_norm": 0.7467126846313477, "learning_rate": 2.857975259803213e-05, "loss": 0.0946, "step": 9342 }, { "epoch": 0.165460204456605, "grad_norm": 1.0529340505599976, "learning_rate": 2.8579387147116735e-05, "loss": 0.1488, "step": 9343 }, { "epoch": 0.16547791399363343, "grad_norm": 1.271384358406067, "learning_rate": 2.8579021651526528e-05, "loss": 0.1232, "step": 9344 }, { "epoch": 0.16549562353066186, "grad_norm": 1.0637680292129517, "learning_rate": 2.8578656111262712e-05, "loss": 0.1163, "step": 9345 }, { "epoch": 0.16551333306769028, "grad_norm": 0.8644570112228394, "learning_rate": 2.8578290526326493e-05, "loss": 0.1179, "step": 9346 }, { "epoch": 0.1655310426047187, "grad_norm": 1.1009118556976318, "learning_rate": 2.857792489671907e-05, "loss": 0.1586, "step": 9347 }, { "epoch": 0.16554875214174714, "grad_norm": 1.1347298622131348, "learning_rate": 2.857755922244165e-05, "loss": 0.0747, "step": 9348 }, { "epoch": 0.16556646167877556, "grad_norm": 0.7166604399681091, "learning_rate": 2.857719350349543e-05, "loss": 0.1062, "step": 9349 }, { "epoch": 0.165584171215804, "grad_norm": 1.1322380304336548, "learning_rate": 2.857682773988162e-05, "loss": 0.1502, "step": 9350 }, { "epoch": 0.16560188075283241, "grad_norm": 0.9791074395179749, "learning_rate": 2.857646193160142e-05, "loss": 0.1089, "step": 9351 }, { "epoch": 0.16561959028986084, "grad_norm": 0.9830495715141296, "learning_rate": 2.857609607865603e-05, "loss": 0.1153, "step": 9352 }, { "epoch": 0.16563729982688927, "grad_norm": 1.1112931966781616, "learning_rate": 2.857573018104666e-05, "loss": 0.1212, "step": 9353 }, { "epoch": 0.1656550093639177, "grad_norm": 1.0690237283706665, "learning_rate": 2.857536423877451e-05, "loss": 0.1138, "step": 9354 }, { "epoch": 0.16567271890094612, "grad_norm": 0.8533393144607544, "learning_rate": 2.8574998251840782e-05, "loss": 0.1295, "step": 9355 }, { "epoch": 0.16569042843797457, "grad_norm": 0.8689027428627014, "learning_rate": 2.8574632220246686e-05, "loss": 0.0762, "step": 9356 }, { "epoch": 0.165708137975003, "grad_norm": 0.9585733413696289, "learning_rate": 2.857426614399342e-05, "loss": 0.1136, "step": 9357 }, { "epoch": 0.16572584751203143, "grad_norm": 0.8796278238296509, "learning_rate": 2.8573900023082198e-05, "loss": 0.0926, "step": 9358 }, { "epoch": 0.16574355704905985, "grad_norm": 0.9918760657310486, "learning_rate": 2.8573533857514215e-05, "loss": 0.1118, "step": 9359 }, { "epoch": 0.16576126658608828, "grad_norm": 0.8350846171379089, "learning_rate": 2.8573167647290675e-05, "loss": 0.1164, "step": 9360 }, { "epoch": 0.1657789761231167, "grad_norm": 0.8274527788162231, "learning_rate": 2.8572801392412788e-05, "loss": 0.1207, "step": 9361 }, { "epoch": 0.16579668566014513, "grad_norm": 0.8841550946235657, "learning_rate": 2.8572435092881754e-05, "loss": 0.1145, "step": 9362 }, { "epoch": 0.16581439519717356, "grad_norm": 0.9290022850036621, "learning_rate": 2.8572068748698785e-05, "loss": 0.0802, "step": 9363 }, { "epoch": 0.16583210473420198, "grad_norm": 0.7591924667358398, "learning_rate": 2.8571702359865087e-05, "loss": 0.1155, "step": 9364 }, { "epoch": 0.1658498142712304, "grad_norm": 0.8419449925422668, "learning_rate": 2.8571335926381853e-05, "loss": 0.1381, "step": 9365 }, { "epoch": 0.16586752380825884, "grad_norm": 0.9668233394622803, "learning_rate": 2.85709694482503e-05, "loss": 0.1101, "step": 9366 }, { "epoch": 0.16588523334528726, "grad_norm": 1.5739824771881104, "learning_rate": 2.8570602925471626e-05, "loss": 0.1137, "step": 9367 }, { "epoch": 0.1659029428823157, "grad_norm": 1.0596336126327515, "learning_rate": 2.8570236358047046e-05, "loss": 0.1661, "step": 9368 }, { "epoch": 0.16592065241934412, "grad_norm": 0.9523754715919495, "learning_rate": 2.8569869745977764e-05, "loss": 0.0844, "step": 9369 }, { "epoch": 0.16593836195637254, "grad_norm": 1.0849635601043701, "learning_rate": 2.8569503089264977e-05, "loss": 0.1296, "step": 9370 }, { "epoch": 0.165956071493401, "grad_norm": 1.666707992553711, "learning_rate": 2.8569136387909898e-05, "loss": 0.1301, "step": 9371 }, { "epoch": 0.16597378103042942, "grad_norm": 1.0430362224578857, "learning_rate": 2.8568769641913733e-05, "loss": 0.0956, "step": 9372 }, { "epoch": 0.16599149056745785, "grad_norm": 1.0622012615203857, "learning_rate": 2.8568402851277688e-05, "loss": 0.1312, "step": 9373 }, { "epoch": 0.16600920010448628, "grad_norm": 0.9339731931686401, "learning_rate": 2.8568036016002973e-05, "loss": 0.1201, "step": 9374 }, { "epoch": 0.1660269096415147, "grad_norm": 1.1509946584701538, "learning_rate": 2.8567669136090787e-05, "loss": 0.1339, "step": 9375 }, { "epoch": 0.16604461917854313, "grad_norm": 0.902644157409668, "learning_rate": 2.856730221154235e-05, "loss": 0.1092, "step": 9376 }, { "epoch": 0.16606232871557156, "grad_norm": 1.2644360065460205, "learning_rate": 2.8566935242358852e-05, "loss": 0.1423, "step": 9377 }, { "epoch": 0.16608003825259998, "grad_norm": 1.1329759359359741, "learning_rate": 2.856656822854151e-05, "loss": 0.1164, "step": 9378 }, { "epoch": 0.1660977477896284, "grad_norm": 1.1076973676681519, "learning_rate": 2.8566201170091538e-05, "loss": 0.1562, "step": 9379 }, { "epoch": 0.16611545732665683, "grad_norm": 0.752020001411438, "learning_rate": 2.856583406701013e-05, "loss": 0.1094, "step": 9380 }, { "epoch": 0.16613316686368526, "grad_norm": 1.4382365942001343, "learning_rate": 2.8565466919298505e-05, "loss": 0.1705, "step": 9381 }, { "epoch": 0.1661508764007137, "grad_norm": 0.9626526832580566, "learning_rate": 2.856509972695786e-05, "loss": 0.1088, "step": 9382 }, { "epoch": 0.1661685859377421, "grad_norm": 0.9635062217712402, "learning_rate": 2.8564732489989412e-05, "loss": 0.1353, "step": 9383 }, { "epoch": 0.16618629547477054, "grad_norm": 0.9338418841362, "learning_rate": 2.8564365208394365e-05, "loss": 0.1169, "step": 9384 }, { "epoch": 0.16620400501179897, "grad_norm": 1.3335089683532715, "learning_rate": 2.856399788217393e-05, "loss": 0.1078, "step": 9385 }, { "epoch": 0.16622171454882742, "grad_norm": 1.0579135417938232, "learning_rate": 2.8563630511329312e-05, "loss": 0.1142, "step": 9386 }, { "epoch": 0.16623942408585585, "grad_norm": 0.9886431097984314, "learning_rate": 2.8563263095861724e-05, "loss": 0.1516, "step": 9387 }, { "epoch": 0.16625713362288427, "grad_norm": 0.9639809131622314, "learning_rate": 2.8562895635772373e-05, "loss": 0.1216, "step": 9388 }, { "epoch": 0.1662748431599127, "grad_norm": 0.798039972782135, "learning_rate": 2.856252813106246e-05, "loss": 0.1029, "step": 9389 }, { "epoch": 0.16629255269694113, "grad_norm": 0.9016119837760925, "learning_rate": 2.856216058173321e-05, "loss": 0.1481, "step": 9390 }, { "epoch": 0.16631026223396955, "grad_norm": 1.4258546829223633, "learning_rate": 2.8561792987785818e-05, "loss": 0.1483, "step": 9391 }, { "epoch": 0.16632797177099798, "grad_norm": 1.2793604135513306, "learning_rate": 2.8561425349221505e-05, "loss": 0.1207, "step": 9392 }, { "epoch": 0.1663456813080264, "grad_norm": 1.4334484338760376, "learning_rate": 2.8561057666041467e-05, "loss": 0.135, "step": 9393 }, { "epoch": 0.16636339084505483, "grad_norm": 1.1452540159225464, "learning_rate": 2.8560689938246923e-05, "loss": 0.0961, "step": 9394 }, { "epoch": 0.16638110038208326, "grad_norm": 0.9382977485656738, "learning_rate": 2.8560322165839078e-05, "loss": 0.1149, "step": 9395 }, { "epoch": 0.16639880991911168, "grad_norm": 1.135593295097351, "learning_rate": 2.855995434881915e-05, "loss": 0.1467, "step": 9396 }, { "epoch": 0.1664165194561401, "grad_norm": 1.1407982110977173, "learning_rate": 2.8559586487188342e-05, "loss": 0.2039, "step": 9397 }, { "epoch": 0.16643422899316854, "grad_norm": 1.5227288007736206, "learning_rate": 2.855921858094787e-05, "loss": 0.0712, "step": 9398 }, { "epoch": 0.16645193853019696, "grad_norm": 0.7686276435852051, "learning_rate": 2.855885063009893e-05, "loss": 0.1311, "step": 9399 }, { "epoch": 0.1664696480672254, "grad_norm": 1.3881791830062866, "learning_rate": 2.855848263464275e-05, "loss": 0.1195, "step": 9400 }, { "epoch": 0.16648735760425384, "grad_norm": 0.8662548065185547, "learning_rate": 2.8558114594580532e-05, "loss": 0.0984, "step": 9401 }, { "epoch": 0.16650506714128227, "grad_norm": 1.090195655822754, "learning_rate": 2.8557746509913487e-05, "loss": 0.0982, "step": 9402 }, { "epoch": 0.1665227766783107, "grad_norm": 1.4187896251678467, "learning_rate": 2.8557378380642832e-05, "loss": 0.1325, "step": 9403 }, { "epoch": 0.16654048621533912, "grad_norm": 1.2701600790023804, "learning_rate": 2.855701020676977e-05, "loss": 0.123, "step": 9404 }, { "epoch": 0.16655819575236755, "grad_norm": 0.8368890881538391, "learning_rate": 2.8556641988295514e-05, "loss": 0.119, "step": 9405 }, { "epoch": 0.16657590528939598, "grad_norm": 0.9837709069252014, "learning_rate": 2.855627372522128e-05, "loss": 0.1152, "step": 9406 }, { "epoch": 0.1665936148264244, "grad_norm": 2.5709104537963867, "learning_rate": 2.8555905417548275e-05, "loss": 0.093, "step": 9407 }, { "epoch": 0.16661132436345283, "grad_norm": 1.4477945566177368, "learning_rate": 2.855553706527771e-05, "loss": 0.1186, "step": 9408 }, { "epoch": 0.16662903390048125, "grad_norm": 1.270772099494934, "learning_rate": 2.8555168668410805e-05, "loss": 0.1327, "step": 9409 }, { "epoch": 0.16664674343750968, "grad_norm": 1.1378899812698364, "learning_rate": 2.8554800226948763e-05, "loss": 0.1096, "step": 9410 }, { "epoch": 0.1666644529745381, "grad_norm": 1.1000185012817383, "learning_rate": 2.8554431740892797e-05, "loss": 0.0996, "step": 9411 }, { "epoch": 0.16668216251156653, "grad_norm": 1.3415740728378296, "learning_rate": 2.8554063210244125e-05, "loss": 0.1248, "step": 9412 }, { "epoch": 0.16669987204859496, "grad_norm": 1.1016134023666382, "learning_rate": 2.8553694635003956e-05, "loss": 0.1118, "step": 9413 }, { "epoch": 0.1667175815856234, "grad_norm": 0.8746788501739502, "learning_rate": 2.85533260151735e-05, "loss": 0.1074, "step": 9414 }, { "epoch": 0.1667352911226518, "grad_norm": 0.7735227346420288, "learning_rate": 2.8552957350753973e-05, "loss": 0.0832, "step": 9415 }, { "epoch": 0.16675300065968027, "grad_norm": 1.2993112802505493, "learning_rate": 2.8552588641746586e-05, "loss": 0.1284, "step": 9416 }, { "epoch": 0.1667707101967087, "grad_norm": 1.2717552185058594, "learning_rate": 2.8552219888152554e-05, "loss": 0.1255, "step": 9417 }, { "epoch": 0.16678841973373712, "grad_norm": 1.143873691558838, "learning_rate": 2.8551851089973093e-05, "loss": 0.1462, "step": 9418 }, { "epoch": 0.16680612927076555, "grad_norm": 0.8295691609382629, "learning_rate": 2.8551482247209405e-05, "loss": 0.0954, "step": 9419 }, { "epoch": 0.16682383880779397, "grad_norm": 0.9553481340408325, "learning_rate": 2.8551113359862717e-05, "loss": 0.1394, "step": 9420 }, { "epoch": 0.1668415483448224, "grad_norm": 1.2497271299362183, "learning_rate": 2.8550744427934236e-05, "loss": 0.1339, "step": 9421 }, { "epoch": 0.16685925788185083, "grad_norm": 0.8327664732933044, "learning_rate": 2.8550375451425178e-05, "loss": 0.0963, "step": 9422 }, { "epoch": 0.16687696741887925, "grad_norm": 1.0955299139022827, "learning_rate": 2.855000643033675e-05, "loss": 0.1317, "step": 9423 }, { "epoch": 0.16689467695590768, "grad_norm": 0.9598916172981262, "learning_rate": 2.8549637364670177e-05, "loss": 0.1344, "step": 9424 }, { "epoch": 0.1669123864929361, "grad_norm": 1.1815496683120728, "learning_rate": 2.854926825442666e-05, "loss": 0.1266, "step": 9425 }, { "epoch": 0.16693009602996453, "grad_norm": 0.5561860799789429, "learning_rate": 2.8548899099607427e-05, "loss": 0.1369, "step": 9426 }, { "epoch": 0.16694780556699296, "grad_norm": 1.1869773864746094, "learning_rate": 2.8548529900213686e-05, "loss": 0.1036, "step": 9427 }, { "epoch": 0.16696551510402138, "grad_norm": 1.009057879447937, "learning_rate": 2.8548160656246653e-05, "loss": 0.1295, "step": 9428 }, { "epoch": 0.1669832246410498, "grad_norm": 0.8964099884033203, "learning_rate": 2.854779136770754e-05, "loss": 0.1067, "step": 9429 }, { "epoch": 0.16700093417807824, "grad_norm": 0.8847422003746033, "learning_rate": 2.854742203459756e-05, "loss": 0.1235, "step": 9430 }, { "epoch": 0.1670186437151067, "grad_norm": 1.147326111793518, "learning_rate": 2.8547052656917932e-05, "loss": 0.1316, "step": 9431 }, { "epoch": 0.16703635325213512, "grad_norm": 1.063999891281128, "learning_rate": 2.8546683234669877e-05, "loss": 0.1283, "step": 9432 }, { "epoch": 0.16705406278916354, "grad_norm": 1.0097696781158447, "learning_rate": 2.85463137678546e-05, "loss": 0.0841, "step": 9433 }, { "epoch": 0.16707177232619197, "grad_norm": 0.7181469798088074, "learning_rate": 2.8545944256473327e-05, "loss": 0.107, "step": 9434 }, { "epoch": 0.1670894818632204, "grad_norm": 1.5611635446548462, "learning_rate": 2.854557470052726e-05, "loss": 0.1567, "step": 9435 }, { "epoch": 0.16710719140024882, "grad_norm": 1.0836100578308105, "learning_rate": 2.8545205100017623e-05, "loss": 0.1239, "step": 9436 }, { "epoch": 0.16712490093727725, "grad_norm": 1.5761164426803589, "learning_rate": 2.8544835454945635e-05, "loss": 0.1632, "step": 9437 }, { "epoch": 0.16714261047430568, "grad_norm": 1.036326289176941, "learning_rate": 2.8544465765312505e-05, "loss": 0.1168, "step": 9438 }, { "epoch": 0.1671603200113341, "grad_norm": 1.5849649906158447, "learning_rate": 2.8544096031119458e-05, "loss": 0.0999, "step": 9439 }, { "epoch": 0.16717802954836253, "grad_norm": 1.070590615272522, "learning_rate": 2.85437262523677e-05, "loss": 0.1269, "step": 9440 }, { "epoch": 0.16719573908539095, "grad_norm": 1.5427504777908325, "learning_rate": 2.8543356429058452e-05, "loss": 0.1579, "step": 9441 }, { "epoch": 0.16721344862241938, "grad_norm": 1.3240879774093628, "learning_rate": 2.8542986561192934e-05, "loss": 0.1158, "step": 9442 }, { "epoch": 0.1672311581594478, "grad_norm": 1.2182377576828003, "learning_rate": 2.854261664877236e-05, "loss": 0.1476, "step": 9443 }, { "epoch": 0.16724886769647623, "grad_norm": 0.6584374308586121, "learning_rate": 2.8542246691797942e-05, "loss": 0.0961, "step": 9444 }, { "epoch": 0.16726657723350466, "grad_norm": 0.646772563457489, "learning_rate": 2.8541876690270906e-05, "loss": 0.1018, "step": 9445 }, { "epoch": 0.1672842867705331, "grad_norm": 1.0602978467941284, "learning_rate": 2.8541506644192467e-05, "loss": 0.1293, "step": 9446 }, { "epoch": 0.16730199630756154, "grad_norm": 0.997704029083252, "learning_rate": 2.854113655356384e-05, "loss": 0.1019, "step": 9447 }, { "epoch": 0.16731970584458997, "grad_norm": 0.907037615776062, "learning_rate": 2.8540766418386242e-05, "loss": 0.1055, "step": 9448 }, { "epoch": 0.1673374153816184, "grad_norm": 0.8890054821968079, "learning_rate": 2.8540396238660892e-05, "loss": 0.0979, "step": 9449 }, { "epoch": 0.16735512491864682, "grad_norm": 1.1119612455368042, "learning_rate": 2.854002601438901e-05, "loss": 0.1053, "step": 9450 }, { "epoch": 0.16737283445567525, "grad_norm": 1.4322165250778198, "learning_rate": 2.853965574557181e-05, "loss": 0.0794, "step": 9451 }, { "epoch": 0.16739054399270367, "grad_norm": 1.082252025604248, "learning_rate": 2.853928543221051e-05, "loss": 0.1412, "step": 9452 }, { "epoch": 0.1674082535297321, "grad_norm": 1.4619780778884888, "learning_rate": 2.8538915074306334e-05, "loss": 0.1145, "step": 9453 }, { "epoch": 0.16742596306676052, "grad_norm": 0.9889599680900574, "learning_rate": 2.8538544671860494e-05, "loss": 0.1502, "step": 9454 }, { "epoch": 0.16744367260378895, "grad_norm": 0.8070142269134521, "learning_rate": 2.8538174224874213e-05, "loss": 0.1015, "step": 9455 }, { "epoch": 0.16746138214081738, "grad_norm": 0.6859434843063354, "learning_rate": 2.853780373334871e-05, "loss": 0.1212, "step": 9456 }, { "epoch": 0.1674790916778458, "grad_norm": 1.7255325317382812, "learning_rate": 2.8537433197285197e-05, "loss": 0.1343, "step": 9457 }, { "epoch": 0.16749680121487423, "grad_norm": 1.3238897323608398, "learning_rate": 2.8537062616684903e-05, "loss": 0.156, "step": 9458 }, { "epoch": 0.16751451075190266, "grad_norm": 1.0433189868927002, "learning_rate": 2.853669199154904e-05, "loss": 0.1267, "step": 9459 }, { "epoch": 0.16753222028893108, "grad_norm": 0.8231715559959412, "learning_rate": 2.8536321321878826e-05, "loss": 0.0985, "step": 9460 }, { "epoch": 0.16754992982595954, "grad_norm": 1.4448392391204834, "learning_rate": 2.8535950607675493e-05, "loss": 0.1246, "step": 9461 }, { "epoch": 0.16756763936298796, "grad_norm": 1.1175481081008911, "learning_rate": 2.8535579848940246e-05, "loss": 0.1093, "step": 9462 }, { "epoch": 0.1675853489000164, "grad_norm": 1.8645981550216675, "learning_rate": 2.8535209045674306e-05, "loss": 0.1544, "step": 9463 }, { "epoch": 0.16760305843704482, "grad_norm": 0.858373761177063, "learning_rate": 2.85348381978789e-05, "loss": 0.1248, "step": 9464 }, { "epoch": 0.16762076797407324, "grad_norm": 0.997943103313446, "learning_rate": 2.853446730555525e-05, "loss": 0.0824, "step": 9465 }, { "epoch": 0.16763847751110167, "grad_norm": 1.2094706296920776, "learning_rate": 2.853409636870457e-05, "loss": 0.1473, "step": 9466 }, { "epoch": 0.1676561870481301, "grad_norm": 0.8285605907440186, "learning_rate": 2.853372538732808e-05, "loss": 0.0995, "step": 9467 }, { "epoch": 0.16767389658515852, "grad_norm": 1.124721646308899, "learning_rate": 2.8533354361427005e-05, "loss": 0.0832, "step": 9468 }, { "epoch": 0.16769160612218695, "grad_norm": 0.9397109150886536, "learning_rate": 2.8532983291002556e-05, "loss": 0.117, "step": 9469 }, { "epoch": 0.16770931565921537, "grad_norm": 1.0588525533676147, "learning_rate": 2.8532612176055965e-05, "loss": 0.1216, "step": 9470 }, { "epoch": 0.1677270251962438, "grad_norm": 0.7213224172592163, "learning_rate": 2.853224101658845e-05, "loss": 0.1058, "step": 9471 }, { "epoch": 0.16774473473327223, "grad_norm": 1.6495493650436401, "learning_rate": 2.8531869812601226e-05, "loss": 0.1378, "step": 9472 }, { "epoch": 0.16776244427030065, "grad_norm": 1.2359538078308105, "learning_rate": 2.8531498564095527e-05, "loss": 0.1504, "step": 9473 }, { "epoch": 0.16778015380732908, "grad_norm": 0.7734817862510681, "learning_rate": 2.8531127271072565e-05, "loss": 0.1258, "step": 9474 }, { "epoch": 0.1677978633443575, "grad_norm": 0.7657071948051453, "learning_rate": 2.853075593353356e-05, "loss": 0.1014, "step": 9475 }, { "epoch": 0.16781557288138596, "grad_norm": 1.1749253273010254, "learning_rate": 2.8530384551479737e-05, "loss": 0.1402, "step": 9476 }, { "epoch": 0.1678332824184144, "grad_norm": 1.3887661695480347, "learning_rate": 2.8530013124912313e-05, "loss": 0.1384, "step": 9477 }, { "epoch": 0.1678509919554428, "grad_norm": 1.3221632242202759, "learning_rate": 2.8529641653832515e-05, "loss": 0.1123, "step": 9478 }, { "epoch": 0.16786870149247124, "grad_norm": 0.9234195947647095, "learning_rate": 2.8529270138241573e-05, "loss": 0.1012, "step": 9479 }, { "epoch": 0.16788641102949967, "grad_norm": 0.771201491355896, "learning_rate": 2.8528898578140693e-05, "loss": 0.1048, "step": 9480 }, { "epoch": 0.1679041205665281, "grad_norm": 0.9699147939682007, "learning_rate": 2.852852697353111e-05, "loss": 0.1062, "step": 9481 }, { "epoch": 0.16792183010355652, "grad_norm": 1.1848621368408203, "learning_rate": 2.8528155324414038e-05, "loss": 0.0997, "step": 9482 }, { "epoch": 0.16793953964058494, "grad_norm": 1.0267841815948486, "learning_rate": 2.8527783630790703e-05, "loss": 0.1292, "step": 9483 }, { "epoch": 0.16795724917761337, "grad_norm": 0.9612479209899902, "learning_rate": 2.8527411892662328e-05, "loss": 0.1314, "step": 9484 }, { "epoch": 0.1679749587146418, "grad_norm": 1.0074844360351562, "learning_rate": 2.852704011003014e-05, "loss": 0.0947, "step": 9485 }, { "epoch": 0.16799266825167022, "grad_norm": 0.928088903427124, "learning_rate": 2.8526668282895352e-05, "loss": 0.1137, "step": 9486 }, { "epoch": 0.16801037778869865, "grad_norm": 1.0977888107299805, "learning_rate": 2.85262964112592e-05, "loss": 0.1397, "step": 9487 }, { "epoch": 0.16802808732572708, "grad_norm": 0.5295854210853577, "learning_rate": 2.85259244951229e-05, "loss": 0.0997, "step": 9488 }, { "epoch": 0.1680457968627555, "grad_norm": 0.8386576175689697, "learning_rate": 2.852555253448767e-05, "loss": 0.0869, "step": 9489 }, { "epoch": 0.16806350639978396, "grad_norm": 1.1922837495803833, "learning_rate": 2.852518052935475e-05, "loss": 0.0845, "step": 9490 }, { "epoch": 0.16808121593681238, "grad_norm": 1.0396063327789307, "learning_rate": 2.8524808479725343e-05, "loss": 0.064, "step": 9491 }, { "epoch": 0.1680989254738408, "grad_norm": 1.3297536373138428, "learning_rate": 2.8524436385600693e-05, "loss": 0.0803, "step": 9492 }, { "epoch": 0.16811663501086924, "grad_norm": 1.0505855083465576, "learning_rate": 2.8524064246982012e-05, "loss": 0.1105, "step": 9493 }, { "epoch": 0.16813434454789766, "grad_norm": 1.2485394477844238, "learning_rate": 2.8523692063870527e-05, "loss": 0.1339, "step": 9494 }, { "epoch": 0.1681520540849261, "grad_norm": 1.2707980871200562, "learning_rate": 2.852331983626746e-05, "loss": 0.1218, "step": 9495 }, { "epoch": 0.16816976362195452, "grad_norm": 1.0670166015625, "learning_rate": 2.8522947564174043e-05, "loss": 0.0904, "step": 9496 }, { "epoch": 0.16818747315898294, "grad_norm": 0.8764817714691162, "learning_rate": 2.8522575247591497e-05, "loss": 0.1306, "step": 9497 }, { "epoch": 0.16820518269601137, "grad_norm": 0.8938935995101929, "learning_rate": 2.8522202886521045e-05, "loss": 0.0976, "step": 9498 }, { "epoch": 0.1682228922330398, "grad_norm": 1.425464391708374, "learning_rate": 2.8521830480963912e-05, "loss": 0.1293, "step": 9499 }, { "epoch": 0.16824060177006822, "grad_norm": 0.7664563655853271, "learning_rate": 2.8521458030921324e-05, "loss": 0.0999, "step": 9500 }, { "epoch": 0.16825831130709665, "grad_norm": 1.3984791040420532, "learning_rate": 2.852108553639451e-05, "loss": 0.1255, "step": 9501 }, { "epoch": 0.16827602084412507, "grad_norm": 1.3495599031448364, "learning_rate": 2.8520712997384686e-05, "loss": 0.1137, "step": 9502 }, { "epoch": 0.1682937303811535, "grad_norm": 0.38361239433288574, "learning_rate": 2.8520340413893086e-05, "loss": 0.0468, "step": 9503 }, { "epoch": 0.16831143991818193, "grad_norm": 1.078055500984192, "learning_rate": 2.851996778592094e-05, "loss": 0.121, "step": 9504 }, { "epoch": 0.16832914945521038, "grad_norm": 1.0896095037460327, "learning_rate": 2.8519595113469462e-05, "loss": 0.1478, "step": 9505 }, { "epoch": 0.1683468589922388, "grad_norm": 2.2631235122680664, "learning_rate": 2.851922239653988e-05, "loss": 0.0948, "step": 9506 }, { "epoch": 0.16836456852926723, "grad_norm": 0.8481941819190979, "learning_rate": 2.851884963513343e-05, "loss": 0.0913, "step": 9507 }, { "epoch": 0.16838227806629566, "grad_norm": 0.9292656779289246, "learning_rate": 2.8518476829251326e-05, "loss": 0.091, "step": 9508 }, { "epoch": 0.16839998760332409, "grad_norm": 1.1018037796020508, "learning_rate": 2.8518103978894807e-05, "loss": 0.1, "step": 9509 }, { "epoch": 0.1684176971403525, "grad_norm": 0.7732083797454834, "learning_rate": 2.851773108406509e-05, "loss": 0.1445, "step": 9510 }, { "epoch": 0.16843540667738094, "grad_norm": 1.0492972135543823, "learning_rate": 2.8517358144763407e-05, "loss": 0.1061, "step": 9511 }, { "epoch": 0.16845311621440937, "grad_norm": 1.5093811750411987, "learning_rate": 2.8516985160990982e-05, "loss": 0.134, "step": 9512 }, { "epoch": 0.1684708257514378, "grad_norm": 0.9729232788085938, "learning_rate": 2.851661213274904e-05, "loss": 0.0954, "step": 9513 }, { "epoch": 0.16848853528846622, "grad_norm": 1.2495181560516357, "learning_rate": 2.8516239060038817e-05, "loss": 0.1785, "step": 9514 }, { "epoch": 0.16850624482549464, "grad_norm": 1.0975788831710815, "learning_rate": 2.851586594286153e-05, "loss": 0.103, "step": 9515 }, { "epoch": 0.16852395436252307, "grad_norm": 0.6187878847122192, "learning_rate": 2.8515492781218412e-05, "loss": 0.1022, "step": 9516 }, { "epoch": 0.1685416638995515, "grad_norm": 1.0433987379074097, "learning_rate": 2.851511957511069e-05, "loss": 0.1263, "step": 9517 }, { "epoch": 0.16855937343657992, "grad_norm": 0.9565390348434448, "learning_rate": 2.8514746324539592e-05, "loss": 0.1447, "step": 9518 }, { "epoch": 0.16857708297360835, "grad_norm": 0.7166225910186768, "learning_rate": 2.851437302950634e-05, "loss": 0.1227, "step": 9519 }, { "epoch": 0.1685947925106368, "grad_norm": 0.379043310880661, "learning_rate": 2.8513999690012177e-05, "loss": 0.0665, "step": 9520 }, { "epoch": 0.16861250204766523, "grad_norm": 0.9083613753318787, "learning_rate": 2.8513626306058315e-05, "loss": 0.1401, "step": 9521 }, { "epoch": 0.16863021158469366, "grad_norm": 0.8283191919326782, "learning_rate": 2.8513252877645994e-05, "loss": 0.0933, "step": 9522 }, { "epoch": 0.16864792112172208, "grad_norm": 0.6211908459663391, "learning_rate": 2.8512879404776437e-05, "loss": 0.1121, "step": 9523 }, { "epoch": 0.1686656306587505, "grad_norm": 1.1909968852996826, "learning_rate": 2.851250588745087e-05, "loss": 0.1133, "step": 9524 }, { "epoch": 0.16868334019577894, "grad_norm": 1.1387560367584229, "learning_rate": 2.851213232567053e-05, "loss": 0.088, "step": 9525 }, { "epoch": 0.16870104973280736, "grad_norm": 0.491077184677124, "learning_rate": 2.8511758719436635e-05, "loss": 0.1093, "step": 9526 }, { "epoch": 0.1687187592698358, "grad_norm": 0.8872612118721008, "learning_rate": 2.8511385068750427e-05, "loss": 0.1023, "step": 9527 }, { "epoch": 0.16873646880686421, "grad_norm": 1.1253994703292847, "learning_rate": 2.8511011373613126e-05, "loss": 0.1232, "step": 9528 }, { "epoch": 0.16875417834389264, "grad_norm": 0.8726817965507507, "learning_rate": 2.8510637634025966e-05, "loss": 0.1051, "step": 9529 }, { "epoch": 0.16877188788092107, "grad_norm": 0.7823722958564758, "learning_rate": 2.8510263849990172e-05, "loss": 0.1275, "step": 9530 }, { "epoch": 0.1687895974179495, "grad_norm": 0.9335771799087524, "learning_rate": 2.850989002150698e-05, "loss": 0.0865, "step": 9531 }, { "epoch": 0.16880730695497792, "grad_norm": 0.6744059920310974, "learning_rate": 2.850951614857761e-05, "loss": 0.0921, "step": 9532 }, { "epoch": 0.16882501649200635, "grad_norm": 1.6892526149749756, "learning_rate": 2.85091422312033e-05, "loss": 0.1413, "step": 9533 }, { "epoch": 0.16884272602903477, "grad_norm": 1.0465242862701416, "learning_rate": 2.8508768269385283e-05, "loss": 0.1527, "step": 9534 }, { "epoch": 0.16886043556606323, "grad_norm": 1.361745834350586, "learning_rate": 2.850839426312478e-05, "loss": 0.1319, "step": 9535 }, { "epoch": 0.16887814510309165, "grad_norm": 0.8977975845336914, "learning_rate": 2.8508020212423026e-05, "loss": 0.109, "step": 9536 }, { "epoch": 0.16889585464012008, "grad_norm": 1.0630204677581787, "learning_rate": 2.8507646117281256e-05, "loss": 0.1588, "step": 9537 }, { "epoch": 0.1689135641771485, "grad_norm": 0.879997193813324, "learning_rate": 2.8507271977700696e-05, "loss": 0.1279, "step": 9538 }, { "epoch": 0.16893127371417693, "grad_norm": 1.1529508829116821, "learning_rate": 2.8506897793682575e-05, "loss": 0.1155, "step": 9539 }, { "epoch": 0.16894898325120536, "grad_norm": 1.2747535705566406, "learning_rate": 2.8506523565228127e-05, "loss": 0.1274, "step": 9540 }, { "epoch": 0.16896669278823379, "grad_norm": 1.5448272228240967, "learning_rate": 2.8506149292338584e-05, "loss": 0.1237, "step": 9541 }, { "epoch": 0.1689844023252622, "grad_norm": 1.2735984325408936, "learning_rate": 2.8505774975015173e-05, "loss": 0.1091, "step": 9542 }, { "epoch": 0.16900211186229064, "grad_norm": 1.247323751449585, "learning_rate": 2.850540061325913e-05, "loss": 0.1204, "step": 9543 }, { "epoch": 0.16901982139931906, "grad_norm": 1.9454224109649658, "learning_rate": 2.8505026207071685e-05, "loss": 0.1096, "step": 9544 }, { "epoch": 0.1690375309363475, "grad_norm": 0.8330686688423157, "learning_rate": 2.850465175645407e-05, "loss": 0.1186, "step": 9545 }, { "epoch": 0.16905524047337592, "grad_norm": 1.143920660018921, "learning_rate": 2.8504277261407518e-05, "loss": 0.1855, "step": 9546 }, { "epoch": 0.16907295001040434, "grad_norm": 0.784636914730072, "learning_rate": 2.8503902721933257e-05, "loss": 0.1324, "step": 9547 }, { "epoch": 0.16909065954743277, "grad_norm": 1.129318356513977, "learning_rate": 2.8503528138032522e-05, "loss": 0.1422, "step": 9548 }, { "epoch": 0.1691083690844612, "grad_norm": 1.3610659837722778, "learning_rate": 2.8503153509706545e-05, "loss": 0.1233, "step": 9549 }, { "epoch": 0.16912607862148965, "grad_norm": 0.955633819103241, "learning_rate": 2.850277883695656e-05, "loss": 0.1048, "step": 9550 }, { "epoch": 0.16914378815851808, "grad_norm": 0.8678750991821289, "learning_rate": 2.85024041197838e-05, "loss": 0.128, "step": 9551 }, { "epoch": 0.1691614976955465, "grad_norm": 1.2785296440124512, "learning_rate": 2.8502029358189493e-05, "loss": 0.1545, "step": 9552 }, { "epoch": 0.16917920723257493, "grad_norm": 0.8285242915153503, "learning_rate": 2.850165455217488e-05, "loss": 0.0812, "step": 9553 }, { "epoch": 0.16919691676960336, "grad_norm": 1.8039358854293823, "learning_rate": 2.850127970174118e-05, "loss": 0.1501, "step": 9554 }, { "epoch": 0.16921462630663178, "grad_norm": 0.9523371458053589, "learning_rate": 2.8500904806889645e-05, "loss": 0.092, "step": 9555 }, { "epoch": 0.1692323358436602, "grad_norm": 0.7581359148025513, "learning_rate": 2.850052986762149e-05, "loss": 0.0887, "step": 9556 }, { "epoch": 0.16925004538068864, "grad_norm": 0.8638479113578796, "learning_rate": 2.850015488393797e-05, "loss": 0.1085, "step": 9557 }, { "epoch": 0.16926775491771706, "grad_norm": 1.2373321056365967, "learning_rate": 2.849977985584029e-05, "loss": 0.1448, "step": 9558 }, { "epoch": 0.1692854644547455, "grad_norm": 0.8440315127372742, "learning_rate": 2.8499404783329708e-05, "loss": 0.0696, "step": 9559 }, { "epoch": 0.16930317399177391, "grad_norm": 0.9148353338241577, "learning_rate": 2.849902966640745e-05, "loss": 0.1152, "step": 9560 }, { "epoch": 0.16932088352880234, "grad_norm": 1.057904839515686, "learning_rate": 2.849865450507475e-05, "loss": 0.119, "step": 9561 }, { "epoch": 0.16933859306583077, "grad_norm": 1.5484980344772339, "learning_rate": 2.849827929933284e-05, "loss": 0.157, "step": 9562 }, { "epoch": 0.1693563026028592, "grad_norm": 0.6950673460960388, "learning_rate": 2.849790404918296e-05, "loss": 0.0963, "step": 9563 }, { "epoch": 0.16937401213988762, "grad_norm": 1.2218434810638428, "learning_rate": 2.8497528754626338e-05, "loss": 0.133, "step": 9564 }, { "epoch": 0.16939172167691607, "grad_norm": 1.4110054969787598, "learning_rate": 2.849715341566421e-05, "loss": 0.1658, "step": 9565 }, { "epoch": 0.1694094312139445, "grad_norm": 1.1961712837219238, "learning_rate": 2.8496778032297815e-05, "loss": 0.1092, "step": 9566 }, { "epoch": 0.16942714075097293, "grad_norm": 1.0738319158554077, "learning_rate": 2.8496402604528383e-05, "loss": 0.1294, "step": 9567 }, { "epoch": 0.16944485028800135, "grad_norm": 0.9897875785827637, "learning_rate": 2.8496027132357155e-05, "loss": 0.1102, "step": 9568 }, { "epoch": 0.16946255982502978, "grad_norm": 0.7623896598815918, "learning_rate": 2.8495651615785358e-05, "loss": 0.1103, "step": 9569 }, { "epoch": 0.1694802693620582, "grad_norm": 0.9115681052207947, "learning_rate": 2.8495276054814235e-05, "loss": 0.1274, "step": 9570 }, { "epoch": 0.16949797889908663, "grad_norm": 0.8447659611701965, "learning_rate": 2.849490044944502e-05, "loss": 0.0993, "step": 9571 }, { "epoch": 0.16951568843611506, "grad_norm": 1.5127602815628052, "learning_rate": 2.8494524799678946e-05, "loss": 0.1249, "step": 9572 }, { "epoch": 0.16953339797314348, "grad_norm": 1.4605458974838257, "learning_rate": 2.849414910551725e-05, "loss": 0.1357, "step": 9573 }, { "epoch": 0.1695511075101719, "grad_norm": 0.9943959712982178, "learning_rate": 2.8493773366961172e-05, "loss": 0.1342, "step": 9574 }, { "epoch": 0.16956881704720034, "grad_norm": 1.013149619102478, "learning_rate": 2.849339758401194e-05, "loss": 0.1323, "step": 9575 }, { "epoch": 0.16958652658422876, "grad_norm": 1.6576781272888184, "learning_rate": 2.8493021756670792e-05, "loss": 0.1344, "step": 9576 }, { "epoch": 0.1696042361212572, "grad_norm": 0.6167601346969604, "learning_rate": 2.849264588493897e-05, "loss": 0.116, "step": 9577 }, { "epoch": 0.16962194565828562, "grad_norm": 0.8461760878562927, "learning_rate": 2.849226996881771e-05, "loss": 0.1086, "step": 9578 }, { "epoch": 0.16963965519531404, "grad_norm": 1.1254647970199585, "learning_rate": 2.8491894008308246e-05, "loss": 0.1208, "step": 9579 }, { "epoch": 0.1696573647323425, "grad_norm": 1.4675605297088623, "learning_rate": 2.8491518003411814e-05, "loss": 0.0956, "step": 9580 }, { "epoch": 0.16967507426937092, "grad_norm": 0.9251965880393982, "learning_rate": 2.8491141954129656e-05, "loss": 0.1519, "step": 9581 }, { "epoch": 0.16969278380639935, "grad_norm": 1.3467278480529785, "learning_rate": 2.8490765860463003e-05, "loss": 0.1308, "step": 9582 }, { "epoch": 0.16971049334342778, "grad_norm": 1.128204345703125, "learning_rate": 2.849038972241309e-05, "loss": 0.1003, "step": 9583 }, { "epoch": 0.1697282028804562, "grad_norm": 1.0493929386138916, "learning_rate": 2.8490013539981165e-05, "loss": 0.131, "step": 9584 }, { "epoch": 0.16974591241748463, "grad_norm": 1.0914883613586426, "learning_rate": 2.848963731316846e-05, "loss": 0.103, "step": 9585 }, { "epoch": 0.16976362195451306, "grad_norm": 0.9311854839324951, "learning_rate": 2.848926104197621e-05, "loss": 0.1314, "step": 9586 }, { "epoch": 0.16978133149154148, "grad_norm": 0.7013888359069824, "learning_rate": 2.8488884726405658e-05, "loss": 0.1229, "step": 9587 }, { "epoch": 0.1697990410285699, "grad_norm": 1.1922742128372192, "learning_rate": 2.8488508366458036e-05, "loss": 0.0937, "step": 9588 }, { "epoch": 0.16981675056559833, "grad_norm": 0.8904172778129578, "learning_rate": 2.8488131962134588e-05, "loss": 0.0921, "step": 9589 }, { "epoch": 0.16983446010262676, "grad_norm": 1.4228392839431763, "learning_rate": 2.848775551343655e-05, "loss": 0.0953, "step": 9590 }, { "epoch": 0.1698521696396552, "grad_norm": 0.6255881190299988, "learning_rate": 2.848737902036516e-05, "loss": 0.0901, "step": 9591 }, { "epoch": 0.1698698791766836, "grad_norm": 1.0091992616653442, "learning_rate": 2.8487002482921658e-05, "loss": 0.1044, "step": 9592 }, { "epoch": 0.16988758871371204, "grad_norm": 0.5982216596603394, "learning_rate": 2.8486625901107284e-05, "loss": 0.0736, "step": 9593 }, { "epoch": 0.16990529825074047, "grad_norm": 0.9457924365997314, "learning_rate": 2.8486249274923272e-05, "loss": 0.1284, "step": 9594 }, { "epoch": 0.16992300778776892, "grad_norm": 1.342279314994812, "learning_rate": 2.8485872604370864e-05, "loss": 0.1423, "step": 9595 }, { "epoch": 0.16994071732479735, "grad_norm": 0.9160152077674866, "learning_rate": 2.84854958894513e-05, "loss": 0.1154, "step": 9596 }, { "epoch": 0.16995842686182577, "grad_norm": 0.8386903405189514, "learning_rate": 2.848511913016582e-05, "loss": 0.081, "step": 9597 }, { "epoch": 0.1699761363988542, "grad_norm": 1.21435546875, "learning_rate": 2.8484742326515657e-05, "loss": 0.1412, "step": 9598 }, { "epoch": 0.16999384593588263, "grad_norm": 1.2547765970230103, "learning_rate": 2.848436547850206e-05, "loss": 0.1033, "step": 9599 }, { "epoch": 0.17001155547291105, "grad_norm": 0.8545218110084534, "learning_rate": 2.848398858612626e-05, "loss": 0.0952, "step": 9600 }, { "epoch": 0.17002926500993948, "grad_norm": 1.0275746583938599, "learning_rate": 2.8483611649389508e-05, "loss": 0.1486, "step": 9601 }, { "epoch": 0.1700469745469679, "grad_norm": 1.396996021270752, "learning_rate": 2.8483234668293032e-05, "loss": 0.1592, "step": 9602 }, { "epoch": 0.17006468408399633, "grad_norm": 1.0569900274276733, "learning_rate": 2.848285764283808e-05, "loss": 0.1125, "step": 9603 }, { "epoch": 0.17008239362102476, "grad_norm": 1.0375638008117676, "learning_rate": 2.8482480573025892e-05, "loss": 0.0886, "step": 9604 }, { "epoch": 0.17010010315805318, "grad_norm": 0.6248774528503418, "learning_rate": 2.84821034588577e-05, "loss": 0.0876, "step": 9605 }, { "epoch": 0.1701178126950816, "grad_norm": 0.8200719356536865, "learning_rate": 2.848172630033476e-05, "loss": 0.1562, "step": 9606 }, { "epoch": 0.17013552223211004, "grad_norm": 0.9342297315597534, "learning_rate": 2.8481349097458298e-05, "loss": 0.1248, "step": 9607 }, { "epoch": 0.17015323176913846, "grad_norm": 1.0272281169891357, "learning_rate": 2.8480971850229563e-05, "loss": 0.0834, "step": 9608 }, { "epoch": 0.17017094130616692, "grad_norm": 1.358957052230835, "learning_rate": 2.8480594558649796e-05, "loss": 0.101, "step": 9609 }, { "epoch": 0.17018865084319534, "grad_norm": 0.868299663066864, "learning_rate": 2.8480217222720234e-05, "loss": 0.1049, "step": 9610 }, { "epoch": 0.17020636038022377, "grad_norm": 1.4873130321502686, "learning_rate": 2.8479839842442118e-05, "loss": 0.0799, "step": 9611 }, { "epoch": 0.1702240699172522, "grad_norm": 0.9899519085884094, "learning_rate": 2.84794624178167e-05, "loss": 0.1024, "step": 9612 }, { "epoch": 0.17024177945428062, "grad_norm": 1.503692388534546, "learning_rate": 2.8479084948845206e-05, "loss": 0.1401, "step": 9613 }, { "epoch": 0.17025948899130905, "grad_norm": 2.254816770553589, "learning_rate": 2.8478707435528893e-05, "loss": 0.0991, "step": 9614 }, { "epoch": 0.17027719852833748, "grad_norm": 1.1354652643203735, "learning_rate": 2.847832987786899e-05, "loss": 0.134, "step": 9615 }, { "epoch": 0.1702949080653659, "grad_norm": 0.9346436858177185, "learning_rate": 2.8477952275866748e-05, "loss": 0.1095, "step": 9616 }, { "epoch": 0.17031261760239433, "grad_norm": 1.1471525430679321, "learning_rate": 2.8477574629523406e-05, "loss": 0.0999, "step": 9617 }, { "epoch": 0.17033032713942275, "grad_norm": 0.801741898059845, "learning_rate": 2.8477196938840204e-05, "loss": 0.1051, "step": 9618 }, { "epoch": 0.17034803667645118, "grad_norm": 0.8349654674530029, "learning_rate": 2.847681920381839e-05, "loss": 0.0784, "step": 9619 }, { "epoch": 0.1703657462134796, "grad_norm": 0.6963618993759155, "learning_rate": 2.84764414244592e-05, "loss": 0.0864, "step": 9620 }, { "epoch": 0.17038345575050803, "grad_norm": 1.1345821619033813, "learning_rate": 2.8476063600763883e-05, "loss": 0.1523, "step": 9621 }, { "epoch": 0.17040116528753646, "grad_norm": 1.4664119482040405, "learning_rate": 2.8475685732733686e-05, "loss": 0.1524, "step": 9622 }, { "epoch": 0.1704188748245649, "grad_norm": 0.9847407341003418, "learning_rate": 2.8475307820369835e-05, "loss": 0.1006, "step": 9623 }, { "epoch": 0.17043658436159334, "grad_norm": 0.9450722336769104, "learning_rate": 2.847492986367359e-05, "loss": 0.0969, "step": 9624 }, { "epoch": 0.17045429389862177, "grad_norm": 1.1410061120986938, "learning_rate": 2.847455186264619e-05, "loss": 0.1361, "step": 9625 }, { "epoch": 0.1704720034356502, "grad_norm": 0.6544849872589111, "learning_rate": 2.847417381728888e-05, "loss": 0.12, "step": 9626 }, { "epoch": 0.17048971297267862, "grad_norm": 0.9912419319152832, "learning_rate": 2.8473795727602892e-05, "loss": 0.0879, "step": 9627 }, { "epoch": 0.17050742250970705, "grad_norm": 0.9547034502029419, "learning_rate": 2.8473417593589482e-05, "loss": 0.1224, "step": 9628 }, { "epoch": 0.17052513204673547, "grad_norm": 2.028982400894165, "learning_rate": 2.8473039415249896e-05, "loss": 0.1061, "step": 9629 }, { "epoch": 0.1705428415837639, "grad_norm": 0.931089460849762, "learning_rate": 2.8472661192585368e-05, "loss": 0.1352, "step": 9630 }, { "epoch": 0.17056055112079233, "grad_norm": 1.1841124296188354, "learning_rate": 2.847228292559715e-05, "loss": 0.1092, "step": 9631 }, { "epoch": 0.17057826065782075, "grad_norm": 1.2020772695541382, "learning_rate": 2.8471904614286483e-05, "loss": 0.1101, "step": 9632 }, { "epoch": 0.17059597019484918, "grad_norm": 0.7826893329620361, "learning_rate": 2.847152625865461e-05, "loss": 0.1002, "step": 9633 }, { "epoch": 0.1706136797318776, "grad_norm": 1.0310893058776855, "learning_rate": 2.8471147858702782e-05, "loss": 0.1127, "step": 9634 }, { "epoch": 0.17063138926890603, "grad_norm": 1.5002315044403076, "learning_rate": 2.8470769414432235e-05, "loss": 0.1173, "step": 9635 }, { "epoch": 0.17064909880593446, "grad_norm": 0.7307422757148743, "learning_rate": 2.8470390925844224e-05, "loss": 0.1061, "step": 9636 }, { "epoch": 0.17066680834296288, "grad_norm": 1.0547621250152588, "learning_rate": 2.847001239293999e-05, "loss": 0.1509, "step": 9637 }, { "epoch": 0.1706845178799913, "grad_norm": 1.8033348321914673, "learning_rate": 2.8469633815720775e-05, "loss": 0.105, "step": 9638 }, { "epoch": 0.17070222741701976, "grad_norm": 1.3109290599822998, "learning_rate": 2.8469255194187825e-05, "loss": 0.1227, "step": 9639 }, { "epoch": 0.1707199369540482, "grad_norm": 1.197267770767212, "learning_rate": 2.846887652834239e-05, "loss": 0.0927, "step": 9640 }, { "epoch": 0.17073764649107662, "grad_norm": 0.8853595852851868, "learning_rate": 2.8468497818185718e-05, "loss": 0.1294, "step": 9641 }, { "epoch": 0.17075535602810504, "grad_norm": 1.0106191635131836, "learning_rate": 2.8468119063719044e-05, "loss": 0.1377, "step": 9642 }, { "epoch": 0.17077306556513347, "grad_norm": 0.8322656750679016, "learning_rate": 2.846774026494362e-05, "loss": 0.0826, "step": 9643 }, { "epoch": 0.1707907751021619, "grad_norm": 0.8673305511474609, "learning_rate": 2.8467361421860695e-05, "loss": 0.1048, "step": 9644 }, { "epoch": 0.17080848463919032, "grad_norm": 1.0130351781845093, "learning_rate": 2.8466982534471513e-05, "loss": 0.123, "step": 9645 }, { "epoch": 0.17082619417621875, "grad_norm": 0.5775085687637329, "learning_rate": 2.8466603602777323e-05, "loss": 0.1019, "step": 9646 }, { "epoch": 0.17084390371324717, "grad_norm": 0.8353055715560913, "learning_rate": 2.8466224626779367e-05, "loss": 0.0842, "step": 9647 }, { "epoch": 0.1708616132502756, "grad_norm": 0.9104469418525696, "learning_rate": 2.8465845606478895e-05, "loss": 0.1098, "step": 9648 }, { "epoch": 0.17087932278730403, "grad_norm": 1.5451496839523315, "learning_rate": 2.8465466541877152e-05, "loss": 0.1205, "step": 9649 }, { "epoch": 0.17089703232433245, "grad_norm": 0.7858447432518005, "learning_rate": 2.8465087432975383e-05, "loss": 0.1091, "step": 9650 }, { "epoch": 0.17091474186136088, "grad_norm": 0.7802098393440247, "learning_rate": 2.8464708279774844e-05, "loss": 0.1194, "step": 9651 }, { "epoch": 0.1709324513983893, "grad_norm": 1.4437090158462524, "learning_rate": 2.8464329082276773e-05, "loss": 0.0887, "step": 9652 }, { "epoch": 0.17095016093541773, "grad_norm": 1.2080371379852295, "learning_rate": 2.8463949840482422e-05, "loss": 0.1012, "step": 9653 }, { "epoch": 0.1709678704724462, "grad_norm": 0.7434712052345276, "learning_rate": 2.846357055439304e-05, "loss": 0.1441, "step": 9654 }, { "epoch": 0.1709855800094746, "grad_norm": 1.1406309604644775, "learning_rate": 2.846319122400987e-05, "loss": 0.0992, "step": 9655 }, { "epoch": 0.17100328954650304, "grad_norm": 1.004723072052002, "learning_rate": 2.8462811849334164e-05, "loss": 0.0652, "step": 9656 }, { "epoch": 0.17102099908353147, "grad_norm": 0.9272672533988953, "learning_rate": 2.8462432430367166e-05, "loss": 0.1267, "step": 9657 }, { "epoch": 0.1710387086205599, "grad_norm": 1.1029069423675537, "learning_rate": 2.846205296711013e-05, "loss": 0.1355, "step": 9658 }, { "epoch": 0.17105641815758832, "grad_norm": 0.9614199995994568, "learning_rate": 2.84616734595643e-05, "loss": 0.1195, "step": 9659 }, { "epoch": 0.17107412769461675, "grad_norm": 0.8282696008682251, "learning_rate": 2.8461293907730924e-05, "loss": 0.0854, "step": 9660 }, { "epoch": 0.17109183723164517, "grad_norm": 1.0605974197387695, "learning_rate": 2.8460914311611257e-05, "loss": 0.1095, "step": 9661 }, { "epoch": 0.1711095467686736, "grad_norm": 0.5815750956535339, "learning_rate": 2.846053467120654e-05, "loss": 0.0649, "step": 9662 }, { "epoch": 0.17112725630570202, "grad_norm": 0.9234621524810791, "learning_rate": 2.8460154986518027e-05, "loss": 0.101, "step": 9663 }, { "epoch": 0.17114496584273045, "grad_norm": 1.3512355089187622, "learning_rate": 2.8459775257546964e-05, "loss": 0.1123, "step": 9664 }, { "epoch": 0.17116267537975888, "grad_norm": 0.7295389771461487, "learning_rate": 2.8459395484294605e-05, "loss": 0.116, "step": 9665 }, { "epoch": 0.1711803849167873, "grad_norm": 1.1147438287734985, "learning_rate": 2.8459015666762192e-05, "loss": 0.1635, "step": 9666 }, { "epoch": 0.17119809445381573, "grad_norm": 1.0076444149017334, "learning_rate": 2.845863580495098e-05, "loss": 0.0931, "step": 9667 }, { "epoch": 0.17121580399084416, "grad_norm": 1.710950255393982, "learning_rate": 2.8458255898862218e-05, "loss": 0.1236, "step": 9668 }, { "epoch": 0.1712335135278726, "grad_norm": 1.433102011680603, "learning_rate": 2.8457875948497153e-05, "loss": 0.1178, "step": 9669 }, { "epoch": 0.17125122306490104, "grad_norm": 0.8228042125701904, "learning_rate": 2.845749595385704e-05, "loss": 0.1076, "step": 9670 }, { "epoch": 0.17126893260192946, "grad_norm": 0.7456841468811035, "learning_rate": 2.845711591494313e-05, "loss": 0.1054, "step": 9671 }, { "epoch": 0.1712866421389579, "grad_norm": 1.1633005142211914, "learning_rate": 2.8456735831756665e-05, "loss": 0.1133, "step": 9672 }, { "epoch": 0.17130435167598632, "grad_norm": 1.2888603210449219, "learning_rate": 2.84563557042989e-05, "loss": 0.0976, "step": 9673 }, { "epoch": 0.17132206121301474, "grad_norm": 0.7604508399963379, "learning_rate": 2.845597553257109e-05, "loss": 0.0904, "step": 9674 }, { "epoch": 0.17133977075004317, "grad_norm": 1.014048457145691, "learning_rate": 2.8455595316574473e-05, "loss": 0.1045, "step": 9675 }, { "epoch": 0.1713574802870716, "grad_norm": 1.0760456323623657, "learning_rate": 2.8455215056310316e-05, "loss": 0.104, "step": 9676 }, { "epoch": 0.17137518982410002, "grad_norm": 1.1967287063598633, "learning_rate": 2.845483475177986e-05, "loss": 0.119, "step": 9677 }, { "epoch": 0.17139289936112845, "grad_norm": 1.5766271352767944, "learning_rate": 2.845445440298436e-05, "loss": 0.1467, "step": 9678 }, { "epoch": 0.17141060889815687, "grad_norm": 1.048200249671936, "learning_rate": 2.8454074009925066e-05, "loss": 0.1554, "step": 9679 }, { "epoch": 0.1714283184351853, "grad_norm": 0.803631067276001, "learning_rate": 2.8453693572603226e-05, "loss": 0.1059, "step": 9680 }, { "epoch": 0.17144602797221373, "grad_norm": 0.5737667679786682, "learning_rate": 2.8453313091020093e-05, "loss": 0.0977, "step": 9681 }, { "epoch": 0.17146373750924215, "grad_norm": 1.2016998529434204, "learning_rate": 2.8452932565176924e-05, "loss": 0.1132, "step": 9682 }, { "epoch": 0.17148144704627058, "grad_norm": 1.2795495986938477, "learning_rate": 2.8452551995074972e-05, "loss": 0.1342, "step": 9683 }, { "epoch": 0.17149915658329903, "grad_norm": 1.1380590200424194, "learning_rate": 2.845217138071548e-05, "loss": 0.1, "step": 9684 }, { "epoch": 0.17151686612032746, "grad_norm": 0.9118197560310364, "learning_rate": 2.8451790722099704e-05, "loss": 0.0923, "step": 9685 }, { "epoch": 0.1715345756573559, "grad_norm": 1.1150999069213867, "learning_rate": 2.8451410019228902e-05, "loss": 0.0896, "step": 9686 }, { "epoch": 0.1715522851943843, "grad_norm": 1.0299720764160156, "learning_rate": 2.8451029272104315e-05, "loss": 0.1721, "step": 9687 }, { "epoch": 0.17156999473141274, "grad_norm": 1.3050892353057861, "learning_rate": 2.845064848072721e-05, "loss": 0.1286, "step": 9688 }, { "epoch": 0.17158770426844117, "grad_norm": 1.3679547309875488, "learning_rate": 2.8450267645098823e-05, "loss": 0.1447, "step": 9689 }, { "epoch": 0.1716054138054696, "grad_norm": 0.9995366930961609, "learning_rate": 2.844988676522042e-05, "loss": 0.0952, "step": 9690 }, { "epoch": 0.17162312334249802, "grad_norm": 1.118523120880127, "learning_rate": 2.844950584109325e-05, "loss": 0.1052, "step": 9691 }, { "epoch": 0.17164083287952644, "grad_norm": 0.9221858382225037, "learning_rate": 2.8449124872718565e-05, "loss": 0.1043, "step": 9692 }, { "epoch": 0.17165854241655487, "grad_norm": 1.5564910173416138, "learning_rate": 2.844874386009762e-05, "loss": 0.1176, "step": 9693 }, { "epoch": 0.1716762519535833, "grad_norm": 0.9150396585464478, "learning_rate": 2.844836280323167e-05, "loss": 0.0941, "step": 9694 }, { "epoch": 0.17169396149061172, "grad_norm": 1.6891340017318726, "learning_rate": 2.8447981702121965e-05, "loss": 0.1435, "step": 9695 }, { "epoch": 0.17171167102764015, "grad_norm": 0.9507501721382141, "learning_rate": 2.844760055676976e-05, "loss": 0.1672, "step": 9696 }, { "epoch": 0.17172938056466858, "grad_norm": 0.8168551325798035, "learning_rate": 2.844721936717631e-05, "loss": 0.1281, "step": 9697 }, { "epoch": 0.171747090101697, "grad_norm": 1.1991031169891357, "learning_rate": 2.8446838133342868e-05, "loss": 0.1197, "step": 9698 }, { "epoch": 0.17176479963872546, "grad_norm": 0.9731524586677551, "learning_rate": 2.8446456855270687e-05, "loss": 0.12, "step": 9699 }, { "epoch": 0.17178250917575388, "grad_norm": 1.1622105836868286, "learning_rate": 2.8446075532961025e-05, "loss": 0.1105, "step": 9700 }, { "epoch": 0.1718002187127823, "grad_norm": 1.6252647638320923, "learning_rate": 2.8445694166415135e-05, "loss": 0.125, "step": 9701 }, { "epoch": 0.17181792824981074, "grad_norm": 1.1110788583755493, "learning_rate": 2.844531275563427e-05, "loss": 0.1561, "step": 9702 }, { "epoch": 0.17183563778683916, "grad_norm": 1.3022571802139282, "learning_rate": 2.8444931300619688e-05, "loss": 0.1628, "step": 9703 }, { "epoch": 0.1718533473238676, "grad_norm": 0.9121233820915222, "learning_rate": 2.844454980137264e-05, "loss": 0.0799, "step": 9704 }, { "epoch": 0.17187105686089602, "grad_norm": 1.2463253736495972, "learning_rate": 2.8444168257894384e-05, "loss": 0.1286, "step": 9705 }, { "epoch": 0.17188876639792444, "grad_norm": 1.3752853870391846, "learning_rate": 2.844378667018617e-05, "loss": 0.0915, "step": 9706 }, { "epoch": 0.17190647593495287, "grad_norm": 0.5712572932243347, "learning_rate": 2.844340503824926e-05, "loss": 0.1342, "step": 9707 }, { "epoch": 0.1719241854719813, "grad_norm": 1.2207832336425781, "learning_rate": 2.844302336208491e-05, "loss": 0.1538, "step": 9708 }, { "epoch": 0.17194189500900972, "grad_norm": 0.5219871401786804, "learning_rate": 2.844264164169437e-05, "loss": 0.1041, "step": 9709 }, { "epoch": 0.17195960454603815, "grad_norm": 1.4888277053833008, "learning_rate": 2.8442259877078898e-05, "loss": 0.0954, "step": 9710 }, { "epoch": 0.17197731408306657, "grad_norm": 0.6815608143806458, "learning_rate": 2.8441878068239755e-05, "loss": 0.0703, "step": 9711 }, { "epoch": 0.171995023620095, "grad_norm": 1.048115849494934, "learning_rate": 2.8441496215178188e-05, "loss": 0.125, "step": 9712 }, { "epoch": 0.17201273315712343, "grad_norm": 0.8668369650840759, "learning_rate": 2.8441114317895463e-05, "loss": 0.1001, "step": 9713 }, { "epoch": 0.17203044269415188, "grad_norm": 0.8659136295318604, "learning_rate": 2.8440732376392823e-05, "loss": 0.1372, "step": 9714 }, { "epoch": 0.1720481522311803, "grad_norm": 0.9722048044204712, "learning_rate": 2.8440350390671544e-05, "loss": 0.1074, "step": 9715 }, { "epoch": 0.17206586176820873, "grad_norm": 1.3025166988372803, "learning_rate": 2.8439968360732865e-05, "loss": 0.1145, "step": 9716 }, { "epoch": 0.17208357130523716, "grad_norm": 1.8122550249099731, "learning_rate": 2.8439586286578046e-05, "loss": 0.109, "step": 9717 }, { "epoch": 0.17210128084226559, "grad_norm": 1.0911505222320557, "learning_rate": 2.843920416820835e-05, "loss": 0.154, "step": 9718 }, { "epoch": 0.172118990379294, "grad_norm": 0.9134747982025146, "learning_rate": 2.8438822005625037e-05, "loss": 0.1563, "step": 9719 }, { "epoch": 0.17213669991632244, "grad_norm": 0.7622043490409851, "learning_rate": 2.8438439798829355e-05, "loss": 0.1176, "step": 9720 }, { "epoch": 0.17215440945335087, "grad_norm": 0.9583179950714111, "learning_rate": 2.8438057547822567e-05, "loss": 0.0884, "step": 9721 }, { "epoch": 0.1721721189903793, "grad_norm": 0.9837190508842468, "learning_rate": 2.8437675252605925e-05, "loss": 0.0976, "step": 9722 }, { "epoch": 0.17218982852740772, "grad_norm": 0.9370942711830139, "learning_rate": 2.8437292913180692e-05, "loss": 0.1567, "step": 9723 }, { "epoch": 0.17220753806443614, "grad_norm": 0.876524806022644, "learning_rate": 2.8436910529548125e-05, "loss": 0.1227, "step": 9724 }, { "epoch": 0.17222524760146457, "grad_norm": 0.7732200026512146, "learning_rate": 2.843652810170948e-05, "loss": 0.1071, "step": 9725 }, { "epoch": 0.172242957138493, "grad_norm": 0.9881596565246582, "learning_rate": 2.843614562966602e-05, "loss": 0.1505, "step": 9726 }, { "epoch": 0.17226066667552142, "grad_norm": 0.8933213949203491, "learning_rate": 2.8435763113418998e-05, "loss": 0.1461, "step": 9727 }, { "epoch": 0.17227837621254985, "grad_norm": 1.1712220907211304, "learning_rate": 2.8435380552969675e-05, "loss": 0.1104, "step": 9728 }, { "epoch": 0.1722960857495783, "grad_norm": 0.7211456298828125, "learning_rate": 2.8434997948319307e-05, "loss": 0.0841, "step": 9729 }, { "epoch": 0.17231379528660673, "grad_norm": 0.981401801109314, "learning_rate": 2.8434615299469154e-05, "loss": 0.1057, "step": 9730 }, { "epoch": 0.17233150482363516, "grad_norm": 0.8386268019676208, "learning_rate": 2.8434232606420483e-05, "loss": 0.082, "step": 9731 }, { "epoch": 0.17234921436066358, "grad_norm": 0.8137356638908386, "learning_rate": 2.8433849869174537e-05, "loss": 0.0925, "step": 9732 }, { "epoch": 0.172366923897692, "grad_norm": 0.6351497769355774, "learning_rate": 2.8433467087732584e-05, "loss": 0.1284, "step": 9733 }, { "epoch": 0.17238463343472044, "grad_norm": 0.7849167585372925, "learning_rate": 2.8433084262095887e-05, "loss": 0.0735, "step": 9734 }, { "epoch": 0.17240234297174886, "grad_norm": 1.1544079780578613, "learning_rate": 2.84327013922657e-05, "loss": 0.1384, "step": 9735 }, { "epoch": 0.1724200525087773, "grad_norm": 0.7675365805625916, "learning_rate": 2.8432318478243284e-05, "loss": 0.0897, "step": 9736 }, { "epoch": 0.17243776204580571, "grad_norm": 0.9578295946121216, "learning_rate": 2.8431935520029905e-05, "loss": 0.1041, "step": 9737 }, { "epoch": 0.17245547158283414, "grad_norm": 1.032612919807434, "learning_rate": 2.8431552517626804e-05, "loss": 0.1189, "step": 9738 }, { "epoch": 0.17247318111986257, "grad_norm": 1.2716526985168457, "learning_rate": 2.8431169471035264e-05, "loss": 0.1239, "step": 9739 }, { "epoch": 0.172490890656891, "grad_norm": 0.9255229830741882, "learning_rate": 2.8430786380256533e-05, "loss": 0.1064, "step": 9740 }, { "epoch": 0.17250860019391942, "grad_norm": 0.9342133402824402, "learning_rate": 2.8430403245291868e-05, "loss": 0.1242, "step": 9741 }, { "epoch": 0.17252630973094785, "grad_norm": 1.2781836986541748, "learning_rate": 2.8430020066142538e-05, "loss": 0.1055, "step": 9742 }, { "epoch": 0.1725440192679763, "grad_norm": 0.544649064540863, "learning_rate": 2.8429636842809802e-05, "loss": 0.1246, "step": 9743 }, { "epoch": 0.17256172880500473, "grad_norm": 1.1167083978652954, "learning_rate": 2.8429253575294915e-05, "loss": 0.1073, "step": 9744 }, { "epoch": 0.17257943834203315, "grad_norm": 0.7783976793289185, "learning_rate": 2.8428870263599146e-05, "loss": 0.1174, "step": 9745 }, { "epoch": 0.17259714787906158, "grad_norm": 1.2406680583953857, "learning_rate": 2.8428486907723755e-05, "loss": 0.1224, "step": 9746 }, { "epoch": 0.17261485741609, "grad_norm": 0.7558615207672119, "learning_rate": 2.842810350766999e-05, "loss": 0.1117, "step": 9747 }, { "epoch": 0.17263256695311843, "grad_norm": 1.0597410202026367, "learning_rate": 2.842772006343913e-05, "loss": 0.1049, "step": 9748 }, { "epoch": 0.17265027649014686, "grad_norm": 0.6198666095733643, "learning_rate": 2.842733657503243e-05, "loss": 0.0945, "step": 9749 }, { "epoch": 0.17266798602717529, "grad_norm": 1.3267107009887695, "learning_rate": 2.842695304245115e-05, "loss": 0.1461, "step": 9750 }, { "epoch": 0.1726856955642037, "grad_norm": 0.9399333000183105, "learning_rate": 2.8426569465696548e-05, "loss": 0.0896, "step": 9751 }, { "epoch": 0.17270340510123214, "grad_norm": 0.9894494414329529, "learning_rate": 2.8426185844769893e-05, "loss": 0.1209, "step": 9752 }, { "epoch": 0.17272111463826056, "grad_norm": 0.7595548629760742, "learning_rate": 2.842580217967245e-05, "loss": 0.1004, "step": 9753 }, { "epoch": 0.172738824175289, "grad_norm": 0.9935443997383118, "learning_rate": 2.842541847040547e-05, "loss": 0.126, "step": 9754 }, { "epoch": 0.17275653371231742, "grad_norm": 0.9558256268501282, "learning_rate": 2.842503471697022e-05, "loss": 0.0984, "step": 9755 }, { "epoch": 0.17277424324934584, "grad_norm": 0.7789865732192993, "learning_rate": 2.8424650919367968e-05, "loss": 0.1056, "step": 9756 }, { "epoch": 0.17279195278637427, "grad_norm": 0.8260749578475952, "learning_rate": 2.8424267077599966e-05, "loss": 0.1296, "step": 9757 }, { "epoch": 0.17280966232340272, "grad_norm": 0.49241793155670166, "learning_rate": 2.8423883191667487e-05, "loss": 0.0848, "step": 9758 }, { "epoch": 0.17282737186043115, "grad_norm": 1.4969193935394287, "learning_rate": 2.842349926157179e-05, "loss": 0.1748, "step": 9759 }, { "epoch": 0.17284508139745958, "grad_norm": 1.109923005104065, "learning_rate": 2.8423115287314138e-05, "loss": 0.1221, "step": 9760 }, { "epoch": 0.172862790934488, "grad_norm": 0.8307128548622131, "learning_rate": 2.84227312688958e-05, "loss": 0.131, "step": 9761 }, { "epoch": 0.17288050047151643, "grad_norm": 1.2537587881088257, "learning_rate": 2.8422347206318025e-05, "loss": 0.1171, "step": 9762 }, { "epoch": 0.17289821000854486, "grad_norm": 1.2190406322479248, "learning_rate": 2.8421963099582088e-05, "loss": 0.1598, "step": 9763 }, { "epoch": 0.17291591954557328, "grad_norm": 0.5794843435287476, "learning_rate": 2.842157894868925e-05, "loss": 0.0675, "step": 9764 }, { "epoch": 0.1729336290826017, "grad_norm": 1.1575124263763428, "learning_rate": 2.8421194753640777e-05, "loss": 0.119, "step": 9765 }, { "epoch": 0.17295133861963013, "grad_norm": 0.7479633092880249, "learning_rate": 2.8420810514437927e-05, "loss": 0.0879, "step": 9766 }, { "epoch": 0.17296904815665856, "grad_norm": 1.7946522235870361, "learning_rate": 2.842042623108197e-05, "loss": 0.1133, "step": 9767 }, { "epoch": 0.172986757693687, "grad_norm": 0.930485188961029, "learning_rate": 2.842004190357417e-05, "loss": 0.1204, "step": 9768 }, { "epoch": 0.17300446723071541, "grad_norm": 0.9773157835006714, "learning_rate": 2.8419657531915787e-05, "loss": 0.1201, "step": 9769 }, { "epoch": 0.17302217676774384, "grad_norm": 0.7851682901382446, "learning_rate": 2.8419273116108092e-05, "loss": 0.0933, "step": 9770 }, { "epoch": 0.17303988630477227, "grad_norm": 0.9273061752319336, "learning_rate": 2.8418888656152338e-05, "loss": 0.1551, "step": 9771 }, { "epoch": 0.1730575958418007, "grad_norm": 0.8221361637115479, "learning_rate": 2.8418504152049804e-05, "loss": 0.1103, "step": 9772 }, { "epoch": 0.17307530537882915, "grad_norm": 0.8395161032676697, "learning_rate": 2.8418119603801745e-05, "loss": 0.1181, "step": 9773 }, { "epoch": 0.17309301491585757, "grad_norm": 1.286824345588684, "learning_rate": 2.8417735011409432e-05, "loss": 0.117, "step": 9774 }, { "epoch": 0.173110724452886, "grad_norm": 0.7474492788314819, "learning_rate": 2.8417350374874126e-05, "loss": 0.1109, "step": 9775 }, { "epoch": 0.17312843398991443, "grad_norm": 1.0296692848205566, "learning_rate": 2.8416965694197097e-05, "loss": 0.1249, "step": 9776 }, { "epoch": 0.17314614352694285, "grad_norm": 1.5705794095993042, "learning_rate": 2.8416580969379606e-05, "loss": 0.1064, "step": 9777 }, { "epoch": 0.17316385306397128, "grad_norm": 1.1719881296157837, "learning_rate": 2.841619620042292e-05, "loss": 0.1413, "step": 9778 }, { "epoch": 0.1731815626009997, "grad_norm": 1.1718686819076538, "learning_rate": 2.841581138732831e-05, "loss": 0.1319, "step": 9779 }, { "epoch": 0.17319927213802813, "grad_norm": 1.136078953742981, "learning_rate": 2.841542653009703e-05, "loss": 0.1328, "step": 9780 }, { "epoch": 0.17321698167505656, "grad_norm": 0.6222586631774902, "learning_rate": 2.8415041628730358e-05, "loss": 0.1042, "step": 9781 }, { "epoch": 0.17323469121208498, "grad_norm": 0.9969844818115234, "learning_rate": 2.8414656683229556e-05, "loss": 0.105, "step": 9782 }, { "epoch": 0.1732524007491134, "grad_norm": 1.1004960536956787, "learning_rate": 2.841427169359589e-05, "loss": 0.1045, "step": 9783 }, { "epoch": 0.17327011028614184, "grad_norm": 0.9720809459686279, "learning_rate": 2.8413886659830627e-05, "loss": 0.1434, "step": 9784 }, { "epoch": 0.17328781982317026, "grad_norm": 0.9202247262001038, "learning_rate": 2.8413501581935027e-05, "loss": 0.0988, "step": 9785 }, { "epoch": 0.1733055293601987, "grad_norm": 1.1295303106307983, "learning_rate": 2.841311645991037e-05, "loss": 0.1538, "step": 9786 }, { "epoch": 0.17332323889722712, "grad_norm": 0.7747654914855957, "learning_rate": 2.841273129375792e-05, "loss": 0.0773, "step": 9787 }, { "epoch": 0.17334094843425557, "grad_norm": 1.0797736644744873, "learning_rate": 2.8412346083478937e-05, "loss": 0.1095, "step": 9788 }, { "epoch": 0.173358657971284, "grad_norm": 0.9770507216453552, "learning_rate": 2.8411960829074692e-05, "loss": 0.1129, "step": 9789 }, { "epoch": 0.17337636750831242, "grad_norm": 2.792896032333374, "learning_rate": 2.841157553054645e-05, "loss": 0.1425, "step": 9790 }, { "epoch": 0.17339407704534085, "grad_norm": 0.9471035003662109, "learning_rate": 2.8411190187895485e-05, "loss": 0.1286, "step": 9791 }, { "epoch": 0.17341178658236928, "grad_norm": 0.9890639781951904, "learning_rate": 2.841080480112306e-05, "loss": 0.1652, "step": 9792 }, { "epoch": 0.1734294961193977, "grad_norm": 1.0695961713790894, "learning_rate": 2.841041937023044e-05, "loss": 0.1181, "step": 9793 }, { "epoch": 0.17344720565642613, "grad_norm": 1.3812317848205566, "learning_rate": 2.84100338952189e-05, "loss": 0.1135, "step": 9794 }, { "epoch": 0.17346491519345456, "grad_norm": 1.197515606880188, "learning_rate": 2.8409648376089704e-05, "loss": 0.1328, "step": 9795 }, { "epoch": 0.17348262473048298, "grad_norm": 1.307058334350586, "learning_rate": 2.8409262812844124e-05, "loss": 0.1077, "step": 9796 }, { "epoch": 0.1735003342675114, "grad_norm": 1.0564653873443604, "learning_rate": 2.8408877205483422e-05, "loss": 0.1022, "step": 9797 }, { "epoch": 0.17351804380453983, "grad_norm": 1.1773864030838013, "learning_rate": 2.8408491554008868e-05, "loss": 0.128, "step": 9798 }, { "epoch": 0.17353575334156826, "grad_norm": 0.7644208073616028, "learning_rate": 2.840810585842174e-05, "loss": 0.1065, "step": 9799 }, { "epoch": 0.1735534628785967, "grad_norm": 1.0941849946975708, "learning_rate": 2.8407720118723296e-05, "loss": 0.1451, "step": 9800 }, { "epoch": 0.1735711724156251, "grad_norm": 1.0856574773788452, "learning_rate": 2.8407334334914806e-05, "loss": 0.1309, "step": 9801 }, { "epoch": 0.17358888195265354, "grad_norm": 0.6335338354110718, "learning_rate": 2.8406948506997546e-05, "loss": 0.104, "step": 9802 }, { "epoch": 0.173606591489682, "grad_norm": 1.1812217235565186, "learning_rate": 2.840656263497278e-05, "loss": 0.1292, "step": 9803 }, { "epoch": 0.17362430102671042, "grad_norm": 0.7260771989822388, "learning_rate": 2.8406176718841775e-05, "loss": 0.1549, "step": 9804 }, { "epoch": 0.17364201056373885, "grad_norm": 2.873483896255493, "learning_rate": 2.8405790758605812e-05, "loss": 0.1557, "step": 9805 }, { "epoch": 0.17365972010076727, "grad_norm": 1.2602108716964722, "learning_rate": 2.840540475426615e-05, "loss": 0.0876, "step": 9806 }, { "epoch": 0.1736774296377957, "grad_norm": 0.9358903169631958, "learning_rate": 2.8405018705824063e-05, "loss": 0.1241, "step": 9807 }, { "epoch": 0.17369513917482413, "grad_norm": 1.0435656309127808, "learning_rate": 2.840463261328082e-05, "loss": 0.117, "step": 9808 }, { "epoch": 0.17371284871185255, "grad_norm": 1.3369591236114502, "learning_rate": 2.8404246476637688e-05, "loss": 0.145, "step": 9809 }, { "epoch": 0.17373055824888098, "grad_norm": 1.1778764724731445, "learning_rate": 2.8403860295895945e-05, "loss": 0.1265, "step": 9810 }, { "epoch": 0.1737482677859094, "grad_norm": 0.88893061876297, "learning_rate": 2.8403474071056858e-05, "loss": 0.0968, "step": 9811 }, { "epoch": 0.17376597732293783, "grad_norm": 0.7764165997505188, "learning_rate": 2.8403087802121695e-05, "loss": 0.1357, "step": 9812 }, { "epoch": 0.17378368685996626, "grad_norm": 0.8599885106086731, "learning_rate": 2.8402701489091733e-05, "loss": 0.0842, "step": 9813 }, { "epoch": 0.17380139639699468, "grad_norm": 0.9700383543968201, "learning_rate": 2.840231513196823e-05, "loss": 0.1332, "step": 9814 }, { "epoch": 0.1738191059340231, "grad_norm": 2.0511436462402344, "learning_rate": 2.8401928730752474e-05, "loss": 0.1116, "step": 9815 }, { "epoch": 0.17383681547105154, "grad_norm": 0.9457430839538574, "learning_rate": 2.8401542285445723e-05, "loss": 0.1214, "step": 9816 }, { "epoch": 0.17385452500807996, "grad_norm": 1.4719494581222534, "learning_rate": 2.8401155796049254e-05, "loss": 0.1297, "step": 9817 }, { "epoch": 0.17387223454510842, "grad_norm": 2.143069267272949, "learning_rate": 2.840076926256434e-05, "loss": 0.1246, "step": 9818 }, { "epoch": 0.17388994408213684, "grad_norm": 1.3969862461090088, "learning_rate": 2.840038268499225e-05, "loss": 0.1071, "step": 9819 }, { "epoch": 0.17390765361916527, "grad_norm": 1.0967413187026978, "learning_rate": 2.8399996063334254e-05, "loss": 0.0713, "step": 9820 }, { "epoch": 0.1739253631561937, "grad_norm": 0.8599191904067993, "learning_rate": 2.8399609397591627e-05, "loss": 0.0927, "step": 9821 }, { "epoch": 0.17394307269322212, "grad_norm": 1.0186152458190918, "learning_rate": 2.8399222687765645e-05, "loss": 0.1357, "step": 9822 }, { "epoch": 0.17396078223025055, "grad_norm": 0.7910923361778259, "learning_rate": 2.839883593385757e-05, "loss": 0.1489, "step": 9823 }, { "epoch": 0.17397849176727898, "grad_norm": 1.0643917322158813, "learning_rate": 2.839844913586868e-05, "loss": 0.0992, "step": 9824 }, { "epoch": 0.1739962013043074, "grad_norm": 0.9695929288864136, "learning_rate": 2.839806229380025e-05, "loss": 0.1395, "step": 9825 }, { "epoch": 0.17401391084133583, "grad_norm": 1.141825556755066, "learning_rate": 2.8397675407653548e-05, "loss": 0.1163, "step": 9826 }, { "epoch": 0.17403162037836425, "grad_norm": 1.0106257200241089, "learning_rate": 2.8397288477429854e-05, "loss": 0.076, "step": 9827 }, { "epoch": 0.17404932991539268, "grad_norm": 1.236631155014038, "learning_rate": 2.839690150313043e-05, "loss": 0.1149, "step": 9828 }, { "epoch": 0.1740670394524211, "grad_norm": 1.2812284231185913, "learning_rate": 2.8396514484756563e-05, "loss": 0.1495, "step": 9829 }, { "epoch": 0.17408474898944953, "grad_norm": 0.8736917972564697, "learning_rate": 2.8396127422309507e-05, "loss": 0.1032, "step": 9830 }, { "epoch": 0.17410245852647796, "grad_norm": 1.2188297510147095, "learning_rate": 2.8395740315790555e-05, "loss": 0.1084, "step": 9831 }, { "epoch": 0.1741201680635064, "grad_norm": 1.2175832986831665, "learning_rate": 2.839535316520097e-05, "loss": 0.137, "step": 9832 }, { "epoch": 0.17413787760053484, "grad_norm": 0.9328343868255615, "learning_rate": 2.8394965970542026e-05, "loss": 0.124, "step": 9833 }, { "epoch": 0.17415558713756327, "grad_norm": 1.5206812620162964, "learning_rate": 2.8394578731815003e-05, "loss": 0.124, "step": 9834 }, { "epoch": 0.1741732966745917, "grad_norm": 1.1058080196380615, "learning_rate": 2.8394191449021166e-05, "loss": 0.1457, "step": 9835 }, { "epoch": 0.17419100621162012, "grad_norm": 0.8082460165023804, "learning_rate": 2.83938041221618e-05, "loss": 0.1356, "step": 9836 }, { "epoch": 0.17420871574864855, "grad_norm": 1.3298001289367676, "learning_rate": 2.839341675123817e-05, "loss": 0.1308, "step": 9837 }, { "epoch": 0.17422642528567697, "grad_norm": 0.9099335074424744, "learning_rate": 2.839302933625155e-05, "loss": 0.1224, "step": 9838 }, { "epoch": 0.1742441348227054, "grad_norm": 1.1777684688568115, "learning_rate": 2.839264187720322e-05, "loss": 0.1361, "step": 9839 }, { "epoch": 0.17426184435973383, "grad_norm": 3.18153715133667, "learning_rate": 2.8392254374094456e-05, "loss": 0.083, "step": 9840 }, { "epoch": 0.17427955389676225, "grad_norm": 0.9894368648529053, "learning_rate": 2.839186682692653e-05, "loss": 0.1144, "step": 9841 }, { "epoch": 0.17429726343379068, "grad_norm": 0.8629376292228699, "learning_rate": 2.839147923570071e-05, "loss": 0.08, "step": 9842 }, { "epoch": 0.1743149729708191, "grad_norm": 1.1106951236724854, "learning_rate": 2.8391091600418283e-05, "loss": 0.0793, "step": 9843 }, { "epoch": 0.17433268250784753, "grad_norm": 2.431396484375, "learning_rate": 2.839070392108052e-05, "loss": 0.1576, "step": 9844 }, { "epoch": 0.17435039204487596, "grad_norm": 1.0026543140411377, "learning_rate": 2.839031619768869e-05, "loss": 0.1593, "step": 9845 }, { "epoch": 0.17436810158190438, "grad_norm": 1.069933295249939, "learning_rate": 2.8389928430244084e-05, "loss": 0.1327, "step": 9846 }, { "epoch": 0.1743858111189328, "grad_norm": 1.6574393510818481, "learning_rate": 2.838954061874796e-05, "loss": 0.1127, "step": 9847 }, { "epoch": 0.17440352065596126, "grad_norm": 0.6604463458061218, "learning_rate": 2.83891527632016e-05, "loss": 0.1133, "step": 9848 }, { "epoch": 0.1744212301929897, "grad_norm": 1.400056004524231, "learning_rate": 2.8388764863606285e-05, "loss": 0.1191, "step": 9849 }, { "epoch": 0.17443893973001812, "grad_norm": 1.4312825202941895, "learning_rate": 2.8388376919963285e-05, "loss": 0.1363, "step": 9850 }, { "epoch": 0.17445664926704654, "grad_norm": 0.9157159924507141, "learning_rate": 2.838798893227388e-05, "loss": 0.0657, "step": 9851 }, { "epoch": 0.17447435880407497, "grad_norm": 1.5230515003204346, "learning_rate": 2.8387600900539354e-05, "loss": 0.142, "step": 9852 }, { "epoch": 0.1744920683411034, "grad_norm": 1.6731009483337402, "learning_rate": 2.8387212824760966e-05, "loss": 0.1273, "step": 9853 }, { "epoch": 0.17450977787813182, "grad_norm": 1.415999174118042, "learning_rate": 2.8386824704940005e-05, "loss": 0.111, "step": 9854 }, { "epoch": 0.17452748741516025, "grad_norm": 1.1932954788208008, "learning_rate": 2.8386436541077743e-05, "loss": 0.1381, "step": 9855 }, { "epoch": 0.17454519695218867, "grad_norm": 0.714989423751831, "learning_rate": 2.838604833317546e-05, "loss": 0.1014, "step": 9856 }, { "epoch": 0.1745629064892171, "grad_norm": 0.9029973745346069, "learning_rate": 2.838566008123443e-05, "loss": 0.1373, "step": 9857 }, { "epoch": 0.17458061602624553, "grad_norm": 1.0787824392318726, "learning_rate": 2.8385271785255932e-05, "loss": 0.0746, "step": 9858 }, { "epoch": 0.17459832556327395, "grad_norm": 0.9250756502151489, "learning_rate": 2.8384883445241246e-05, "loss": 0.0658, "step": 9859 }, { "epoch": 0.17461603510030238, "grad_norm": 1.6671130657196045, "learning_rate": 2.8384495061191648e-05, "loss": 0.0962, "step": 9860 }, { "epoch": 0.1746337446373308, "grad_norm": 0.7424905300140381, "learning_rate": 2.8384106633108415e-05, "loss": 0.099, "step": 9861 }, { "epoch": 0.17465145417435923, "grad_norm": 1.053702712059021, "learning_rate": 2.838371816099282e-05, "loss": 0.0956, "step": 9862 }, { "epoch": 0.1746691637113877, "grad_norm": 1.1311067342758179, "learning_rate": 2.838332964484615e-05, "loss": 0.1079, "step": 9863 }, { "epoch": 0.1746868732484161, "grad_norm": 2.0809812545776367, "learning_rate": 2.838294108466968e-05, "loss": 0.0753, "step": 9864 }, { "epoch": 0.17470458278544454, "grad_norm": 1.1006730794906616, "learning_rate": 2.8382552480464688e-05, "loss": 0.1496, "step": 9865 }, { "epoch": 0.17472229232247297, "grad_norm": 0.8869113922119141, "learning_rate": 2.8382163832232445e-05, "loss": 0.1377, "step": 9866 }, { "epoch": 0.1747400018595014, "grad_norm": 1.2906819581985474, "learning_rate": 2.8381775139974246e-05, "loss": 0.1308, "step": 9867 }, { "epoch": 0.17475771139652982, "grad_norm": 1.1142687797546387, "learning_rate": 2.8381386403691353e-05, "loss": 0.122, "step": 9868 }, { "epoch": 0.17477542093355825, "grad_norm": 1.1383354663848877, "learning_rate": 2.8380997623385055e-05, "loss": 0.189, "step": 9869 }, { "epoch": 0.17479313047058667, "grad_norm": 0.8204053640365601, "learning_rate": 2.8380608799056628e-05, "loss": 0.0869, "step": 9870 }, { "epoch": 0.1748108400076151, "grad_norm": 1.5764820575714111, "learning_rate": 2.8380219930707353e-05, "loss": 0.1324, "step": 9871 }, { "epoch": 0.17482854954464352, "grad_norm": 0.8806646466255188, "learning_rate": 2.8379831018338504e-05, "loss": 0.1202, "step": 9872 }, { "epoch": 0.17484625908167195, "grad_norm": 0.6955965757369995, "learning_rate": 2.8379442061951366e-05, "loss": 0.1095, "step": 9873 }, { "epoch": 0.17486396861870038, "grad_norm": 1.213453769683838, "learning_rate": 2.837905306154722e-05, "loss": 0.1491, "step": 9874 }, { "epoch": 0.1748816781557288, "grad_norm": 0.7574252486228943, "learning_rate": 2.837866401712734e-05, "loss": 0.1042, "step": 9875 }, { "epoch": 0.17489938769275723, "grad_norm": 0.7737370133399963, "learning_rate": 2.8378274928693006e-05, "loss": 0.0787, "step": 9876 }, { "epoch": 0.17491709722978568, "grad_norm": 1.1813299655914307, "learning_rate": 2.8377885796245506e-05, "loss": 0.1123, "step": 9877 }, { "epoch": 0.1749348067668141, "grad_norm": 0.8668351173400879, "learning_rate": 2.837749661978611e-05, "loss": 0.1205, "step": 9878 }, { "epoch": 0.17495251630384254, "grad_norm": 1.055519461631775, "learning_rate": 2.8377107399316108e-05, "loss": 0.1334, "step": 9879 }, { "epoch": 0.17497022584087096, "grad_norm": 0.892003059387207, "learning_rate": 2.837671813483677e-05, "loss": 0.1129, "step": 9880 }, { "epoch": 0.1749879353778994, "grad_norm": 0.8180942535400391, "learning_rate": 2.8376328826349387e-05, "loss": 0.1014, "step": 9881 }, { "epoch": 0.17500564491492782, "grad_norm": 1.034430742263794, "learning_rate": 2.837593947385523e-05, "loss": 0.1002, "step": 9882 }, { "epoch": 0.17502335445195624, "grad_norm": 0.8384910225868225, "learning_rate": 2.837555007735559e-05, "loss": 0.1217, "step": 9883 }, { "epoch": 0.17504106398898467, "grad_norm": 0.9534763097763062, "learning_rate": 2.837516063685174e-05, "loss": 0.1137, "step": 9884 }, { "epoch": 0.1750587735260131, "grad_norm": 0.800358772277832, "learning_rate": 2.8374771152344966e-05, "loss": 0.088, "step": 9885 }, { "epoch": 0.17507648306304152, "grad_norm": 0.829922080039978, "learning_rate": 2.837438162383655e-05, "loss": 0.0924, "step": 9886 }, { "epoch": 0.17509419260006995, "grad_norm": 1.487427830696106, "learning_rate": 2.8373992051327765e-05, "loss": 0.1198, "step": 9887 }, { "epoch": 0.17511190213709837, "grad_norm": 1.7032294273376465, "learning_rate": 2.8373602434819902e-05, "loss": 0.1224, "step": 9888 }, { "epoch": 0.1751296116741268, "grad_norm": 1.0877844095230103, "learning_rate": 2.837321277431424e-05, "loss": 0.1731, "step": 9889 }, { "epoch": 0.17514732121115523, "grad_norm": 1.0239191055297852, "learning_rate": 2.8372823069812063e-05, "loss": 0.1468, "step": 9890 }, { "epoch": 0.17516503074818365, "grad_norm": 0.834419310092926, "learning_rate": 2.8372433321314647e-05, "loss": 0.1038, "step": 9891 }, { "epoch": 0.1751827402852121, "grad_norm": 1.566501259803772, "learning_rate": 2.837204352882328e-05, "loss": 0.1062, "step": 9892 }, { "epoch": 0.17520044982224053, "grad_norm": 0.5972846150398254, "learning_rate": 2.837165369233924e-05, "loss": 0.0905, "step": 9893 }, { "epoch": 0.17521815935926896, "grad_norm": 1.4257385730743408, "learning_rate": 2.8371263811863813e-05, "loss": 0.1035, "step": 9894 }, { "epoch": 0.1752358688962974, "grad_norm": 1.0887902975082397, "learning_rate": 2.837087388739828e-05, "loss": 0.0917, "step": 9895 }, { "epoch": 0.1752535784333258, "grad_norm": 0.9497483968734741, "learning_rate": 2.8370483918943926e-05, "loss": 0.1159, "step": 9896 }, { "epoch": 0.17527128797035424, "grad_norm": 1.0144788026809692, "learning_rate": 2.8370093906502035e-05, "loss": 0.1452, "step": 9897 }, { "epoch": 0.17528899750738267, "grad_norm": 1.3085474967956543, "learning_rate": 2.8369703850073882e-05, "loss": 0.1254, "step": 9898 }, { "epoch": 0.1753067070444111, "grad_norm": 0.6883156895637512, "learning_rate": 2.8369313749660755e-05, "loss": 0.1656, "step": 9899 }, { "epoch": 0.17532441658143952, "grad_norm": 1.1561001539230347, "learning_rate": 2.8368923605263942e-05, "loss": 0.1272, "step": 9900 }, { "epoch": 0.17534212611846794, "grad_norm": 1.411819338798523, "learning_rate": 2.836853341688472e-05, "loss": 0.135, "step": 9901 }, { "epoch": 0.17535983565549637, "grad_norm": 1.3606224060058594, "learning_rate": 2.8368143184524377e-05, "loss": 0.0863, "step": 9902 }, { "epoch": 0.1753775451925248, "grad_norm": 1.1700129508972168, "learning_rate": 2.836775290818419e-05, "loss": 0.1346, "step": 9903 }, { "epoch": 0.17539525472955322, "grad_norm": 1.061843991279602, "learning_rate": 2.8367362587865458e-05, "loss": 0.1016, "step": 9904 }, { "epoch": 0.17541296426658165, "grad_norm": 1.3713648319244385, "learning_rate": 2.8366972223569445e-05, "loss": 0.1526, "step": 9905 }, { "epoch": 0.17543067380361008, "grad_norm": 1.039082646369934, "learning_rate": 2.836658181529745e-05, "loss": 0.1265, "step": 9906 }, { "epoch": 0.17544838334063853, "grad_norm": 0.6566823124885559, "learning_rate": 2.8366191363050754e-05, "loss": 0.143, "step": 9907 }, { "epoch": 0.17546609287766696, "grad_norm": 1.1145416498184204, "learning_rate": 2.836580086683064e-05, "loss": 0.1446, "step": 9908 }, { "epoch": 0.17548380241469538, "grad_norm": 0.8717415928840637, "learning_rate": 2.8365410326638387e-05, "loss": 0.146, "step": 9909 }, { "epoch": 0.1755015119517238, "grad_norm": 0.9734825491905212, "learning_rate": 2.8365019742475294e-05, "loss": 0.1128, "step": 9910 }, { "epoch": 0.17551922148875224, "grad_norm": 1.1633034944534302, "learning_rate": 2.8364629114342636e-05, "loss": 0.1135, "step": 9911 }, { "epoch": 0.17553693102578066, "grad_norm": 0.7341876029968262, "learning_rate": 2.8364238442241695e-05, "loss": 0.0961, "step": 9912 }, { "epoch": 0.1755546405628091, "grad_norm": 1.2680972814559937, "learning_rate": 2.8363847726173766e-05, "loss": 0.1385, "step": 9913 }, { "epoch": 0.17557235009983752, "grad_norm": 0.895143449306488, "learning_rate": 2.836345696614013e-05, "loss": 0.1031, "step": 9914 }, { "epoch": 0.17559005963686594, "grad_norm": 0.9178650379180908, "learning_rate": 2.8363066162142065e-05, "loss": 0.1543, "step": 9915 }, { "epoch": 0.17560776917389437, "grad_norm": 0.7722197771072388, "learning_rate": 2.836267531418087e-05, "loss": 0.1248, "step": 9916 }, { "epoch": 0.1756254787109228, "grad_norm": 1.1384347677230835, "learning_rate": 2.8362284422257827e-05, "loss": 0.0981, "step": 9917 }, { "epoch": 0.17564318824795122, "grad_norm": 1.1376155614852905, "learning_rate": 2.8361893486374214e-05, "loss": 0.1277, "step": 9918 }, { "epoch": 0.17566089778497965, "grad_norm": 1.1221791505813599, "learning_rate": 2.836150250653133e-05, "loss": 0.1034, "step": 9919 }, { "epoch": 0.17567860732200807, "grad_norm": 0.8766219615936279, "learning_rate": 2.836111148273045e-05, "loss": 0.1126, "step": 9920 }, { "epoch": 0.1756963168590365, "grad_norm": 0.8156414031982422, "learning_rate": 2.836072041497286e-05, "loss": 0.0752, "step": 9921 }, { "epoch": 0.17571402639606495, "grad_norm": 1.2617032527923584, "learning_rate": 2.8360329303259855e-05, "loss": 0.1283, "step": 9922 }, { "epoch": 0.17573173593309338, "grad_norm": 2.279474973678589, "learning_rate": 2.8359938147592723e-05, "loss": 0.0958, "step": 9923 }, { "epoch": 0.1757494454701218, "grad_norm": 1.1545703411102295, "learning_rate": 2.835954694797274e-05, "loss": 0.103, "step": 9924 }, { "epoch": 0.17576715500715023, "grad_norm": 1.4667574167251587, "learning_rate": 2.83591557044012e-05, "loss": 0.161, "step": 9925 }, { "epoch": 0.17578486454417866, "grad_norm": 1.3529363870620728, "learning_rate": 2.835876441687939e-05, "loss": 0.1659, "step": 9926 }, { "epoch": 0.17580257408120709, "grad_norm": 1.2799240350723267, "learning_rate": 2.8358373085408594e-05, "loss": 0.1424, "step": 9927 }, { "epoch": 0.1758202836182355, "grad_norm": 0.6338253617286682, "learning_rate": 2.83579817099901e-05, "loss": 0.1506, "step": 9928 }, { "epoch": 0.17583799315526394, "grad_norm": 0.9778760075569153, "learning_rate": 2.8357590290625204e-05, "loss": 0.0875, "step": 9929 }, { "epoch": 0.17585570269229236, "grad_norm": 1.0364311933517456, "learning_rate": 2.8357198827315184e-05, "loss": 0.1024, "step": 9930 }, { "epoch": 0.1758734122293208, "grad_norm": 0.5843451023101807, "learning_rate": 2.835680732006133e-05, "loss": 0.0873, "step": 9931 }, { "epoch": 0.17589112176634922, "grad_norm": 0.8559237718582153, "learning_rate": 2.835641576886493e-05, "loss": 0.1028, "step": 9932 }, { "epoch": 0.17590883130337764, "grad_norm": 0.9936057925224304, "learning_rate": 2.8356024173727274e-05, "loss": 0.1132, "step": 9933 }, { "epoch": 0.17592654084040607, "grad_norm": 1.4126570224761963, "learning_rate": 2.835563253464965e-05, "loss": 0.1499, "step": 9934 }, { "epoch": 0.1759442503774345, "grad_norm": 1.0355943441390991, "learning_rate": 2.8355240851633348e-05, "loss": 0.0974, "step": 9935 }, { "epoch": 0.17596195991446292, "grad_norm": 1.1732643842697144, "learning_rate": 2.8354849124679647e-05, "loss": 0.1184, "step": 9936 }, { "epoch": 0.17597966945149138, "grad_norm": 1.1648926734924316, "learning_rate": 2.835445735378985e-05, "loss": 0.1015, "step": 9937 }, { "epoch": 0.1759973789885198, "grad_norm": 1.152114748954773, "learning_rate": 2.8354065538965236e-05, "loss": 0.1389, "step": 9938 }, { "epoch": 0.17601508852554823, "grad_norm": 0.726082980632782, "learning_rate": 2.83536736802071e-05, "loss": 0.162, "step": 9939 }, { "epoch": 0.17603279806257666, "grad_norm": 0.9747398495674133, "learning_rate": 2.8353281777516723e-05, "loss": 0.1482, "step": 9940 }, { "epoch": 0.17605050759960508, "grad_norm": 1.1267287731170654, "learning_rate": 2.8352889830895402e-05, "loss": 0.1431, "step": 9941 }, { "epoch": 0.1760682171366335, "grad_norm": 1.200977087020874, "learning_rate": 2.8352497840344425e-05, "loss": 0.1057, "step": 9942 }, { "epoch": 0.17608592667366194, "grad_norm": 0.927020251750946, "learning_rate": 2.8352105805865082e-05, "loss": 0.117, "step": 9943 }, { "epoch": 0.17610363621069036, "grad_norm": 0.8834071159362793, "learning_rate": 2.835171372745866e-05, "loss": 0.0967, "step": 9944 }, { "epoch": 0.1761213457477188, "grad_norm": 0.7080544829368591, "learning_rate": 2.8351321605126448e-05, "loss": 0.0828, "step": 9945 }, { "epoch": 0.17613905528474721, "grad_norm": 1.0856471061706543, "learning_rate": 2.835092943886974e-05, "loss": 0.1364, "step": 9946 }, { "epoch": 0.17615676482177564, "grad_norm": 0.8848108053207397, "learning_rate": 2.8350537228689826e-05, "loss": 0.1267, "step": 9947 }, { "epoch": 0.17617447435880407, "grad_norm": 0.7847734093666077, "learning_rate": 2.8350144974587993e-05, "loss": 0.1257, "step": 9948 }, { "epoch": 0.1761921838958325, "grad_norm": 1.1026536226272583, "learning_rate": 2.8349752676565537e-05, "loss": 0.1109, "step": 9949 }, { "epoch": 0.17620989343286092, "grad_norm": 1.0697799921035767, "learning_rate": 2.834936033462374e-05, "loss": 0.1169, "step": 9950 }, { "epoch": 0.17622760296988935, "grad_norm": 0.8913809061050415, "learning_rate": 2.8348967948763897e-05, "loss": 0.1274, "step": 9951 }, { "epoch": 0.1762453125069178, "grad_norm": 0.9689176678657532, "learning_rate": 2.8348575518987302e-05, "loss": 0.1436, "step": 9952 }, { "epoch": 0.17626302204394623, "grad_norm": 1.0895744562149048, "learning_rate": 2.8348183045295244e-05, "loss": 0.1076, "step": 9953 }, { "epoch": 0.17628073158097465, "grad_norm": 0.9835482239723206, "learning_rate": 2.8347790527689016e-05, "loss": 0.1184, "step": 9954 }, { "epoch": 0.17629844111800308, "grad_norm": 0.9078835248947144, "learning_rate": 2.8347397966169904e-05, "loss": 0.0833, "step": 9955 }, { "epoch": 0.1763161506550315, "grad_norm": 1.4277029037475586, "learning_rate": 2.83470053607392e-05, "loss": 0.1332, "step": 9956 }, { "epoch": 0.17633386019205993, "grad_norm": 1.0168377161026, "learning_rate": 2.834661271139821e-05, "loss": 0.1399, "step": 9957 }, { "epoch": 0.17635156972908836, "grad_norm": 1.1354660987854004, "learning_rate": 2.8346220018148204e-05, "loss": 0.1603, "step": 9958 }, { "epoch": 0.17636927926611679, "grad_norm": 0.9563764333724976, "learning_rate": 2.8345827280990484e-05, "loss": 0.1084, "step": 9959 }, { "epoch": 0.1763869888031452, "grad_norm": 0.9663645625114441, "learning_rate": 2.8345434499926344e-05, "loss": 0.1153, "step": 9960 }, { "epoch": 0.17640469834017364, "grad_norm": 0.8560625314712524, "learning_rate": 2.8345041674957075e-05, "loss": 0.1147, "step": 9961 }, { "epoch": 0.17642240787720206, "grad_norm": 1.2779895067214966, "learning_rate": 2.8344648806083967e-05, "loss": 0.1492, "step": 9962 }, { "epoch": 0.1764401174142305, "grad_norm": 0.9193150997161865, "learning_rate": 2.8344255893308315e-05, "loss": 0.1011, "step": 9963 }, { "epoch": 0.17645782695125892, "grad_norm": 0.9473816752433777, "learning_rate": 2.8343862936631412e-05, "loss": 0.0957, "step": 9964 }, { "epoch": 0.17647553648828734, "grad_norm": 1.2903515100479126, "learning_rate": 2.834346993605455e-05, "loss": 0.1204, "step": 9965 }, { "epoch": 0.17649324602531577, "grad_norm": 0.8717302680015564, "learning_rate": 2.834307689157902e-05, "loss": 0.0801, "step": 9966 }, { "epoch": 0.17651095556234422, "grad_norm": 1.7966219186782837, "learning_rate": 2.8342683803206116e-05, "loss": 0.1528, "step": 9967 }, { "epoch": 0.17652866509937265, "grad_norm": 0.9695708751678467, "learning_rate": 2.8342290670937133e-05, "loss": 0.0958, "step": 9968 }, { "epoch": 0.17654637463640108, "grad_norm": 1.0957437753677368, "learning_rate": 2.8341897494773363e-05, "loss": 0.0966, "step": 9969 }, { "epoch": 0.1765640841734295, "grad_norm": 1.6326607465744019, "learning_rate": 2.8341504274716102e-05, "loss": 0.117, "step": 9970 }, { "epoch": 0.17658179371045793, "grad_norm": 2.4083869457244873, "learning_rate": 2.8341111010766635e-05, "loss": 0.1191, "step": 9971 }, { "epoch": 0.17659950324748636, "grad_norm": 0.76140296459198, "learning_rate": 2.8340717702926267e-05, "loss": 0.137, "step": 9972 }, { "epoch": 0.17661721278451478, "grad_norm": 1.2765671014785767, "learning_rate": 2.8340324351196285e-05, "loss": 0.1336, "step": 9973 }, { "epoch": 0.1766349223215432, "grad_norm": 1.4647732973098755, "learning_rate": 2.833993095557799e-05, "loss": 0.0827, "step": 9974 }, { "epoch": 0.17665263185857163, "grad_norm": 1.0914413928985596, "learning_rate": 2.8339537516072665e-05, "loss": 0.1316, "step": 9975 }, { "epoch": 0.17667034139560006, "grad_norm": 1.0908467769622803, "learning_rate": 2.8339144032681612e-05, "loss": 0.0863, "step": 9976 }, { "epoch": 0.1766880509326285, "grad_norm": 1.1787554025650024, "learning_rate": 2.8338750505406125e-05, "loss": 0.1334, "step": 9977 }, { "epoch": 0.17670576046965691, "grad_norm": 0.9133784174919128, "learning_rate": 2.83383569342475e-05, "loss": 0.1316, "step": 9978 }, { "epoch": 0.17672347000668534, "grad_norm": 0.9333680868148804, "learning_rate": 2.833796331920703e-05, "loss": 0.1196, "step": 9979 }, { "epoch": 0.17674117954371377, "grad_norm": 1.1663274765014648, "learning_rate": 2.8337569660286007e-05, "loss": 0.0936, "step": 9980 }, { "epoch": 0.1767588890807422, "grad_norm": 0.6623028516769409, "learning_rate": 2.8337175957485734e-05, "loss": 0.1016, "step": 9981 }, { "epoch": 0.17677659861777065, "grad_norm": 0.6940836906433105, "learning_rate": 2.8336782210807497e-05, "loss": 0.1045, "step": 9982 }, { "epoch": 0.17679430815479907, "grad_norm": 1.0859215259552002, "learning_rate": 2.8336388420252594e-05, "loss": 0.114, "step": 9983 }, { "epoch": 0.1768120176918275, "grad_norm": 0.7132570147514343, "learning_rate": 2.8335994585822326e-05, "loss": 0.1466, "step": 9984 }, { "epoch": 0.17682972722885593, "grad_norm": 1.0028605461120605, "learning_rate": 2.8335600707517978e-05, "loss": 0.1286, "step": 9985 }, { "epoch": 0.17684743676588435, "grad_norm": 1.4084978103637695, "learning_rate": 2.8335206785340857e-05, "loss": 0.1173, "step": 9986 }, { "epoch": 0.17686514630291278, "grad_norm": 0.8707653284072876, "learning_rate": 2.8334812819292257e-05, "loss": 0.1071, "step": 9987 }, { "epoch": 0.1768828558399412, "grad_norm": 1.2264162302017212, "learning_rate": 2.8334418809373466e-05, "loss": 0.0808, "step": 9988 }, { "epoch": 0.17690056537696963, "grad_norm": 0.960426390171051, "learning_rate": 2.8334024755585792e-05, "loss": 0.1125, "step": 9989 }, { "epoch": 0.17691827491399806, "grad_norm": 1.459501028060913, "learning_rate": 2.833363065793052e-05, "loss": 0.1871, "step": 9990 }, { "epoch": 0.17693598445102648, "grad_norm": 0.4680997431278229, "learning_rate": 2.8333236516408954e-05, "loss": 0.0815, "step": 9991 }, { "epoch": 0.1769536939880549, "grad_norm": 0.9249992370605469, "learning_rate": 2.8332842331022386e-05, "loss": 0.0941, "step": 9992 }, { "epoch": 0.17697140352508334, "grad_norm": 1.0539764165878296, "learning_rate": 2.8332448101772118e-05, "loss": 0.1468, "step": 9993 }, { "epoch": 0.17698911306211176, "grad_norm": 0.9436476826667786, "learning_rate": 2.8332053828659445e-05, "loss": 0.0942, "step": 9994 }, { "epoch": 0.1770068225991402, "grad_norm": 0.8612077832221985, "learning_rate": 2.833165951168566e-05, "loss": 0.0974, "step": 9995 }, { "epoch": 0.17702453213616862, "grad_norm": 1.0822221040725708, "learning_rate": 2.8331265150852067e-05, "loss": 0.1172, "step": 9996 }, { "epoch": 0.17704224167319707, "grad_norm": 0.8920081257820129, "learning_rate": 2.833087074615996e-05, "loss": 0.0594, "step": 9997 }, { "epoch": 0.1770599512102255, "grad_norm": 1.3364834785461426, "learning_rate": 2.8330476297610634e-05, "loss": 0.1536, "step": 9998 }, { "epoch": 0.17707766074725392, "grad_norm": 1.3487884998321533, "learning_rate": 2.833008180520539e-05, "loss": 0.0855, "step": 9999 }, { "epoch": 0.17709537028428235, "grad_norm": 0.48938271403312683, "learning_rate": 2.8329687268945526e-05, "loss": 0.0918, "step": 10000 }, { "epoch": 0.17711307982131078, "grad_norm": 1.1016696691513062, "learning_rate": 2.8329292688832334e-05, "loss": 0.1245, "step": 10001 }, { "epoch": 0.1771307893583392, "grad_norm": 0.7480627298355103, "learning_rate": 2.8328898064867125e-05, "loss": 0.1008, "step": 10002 }, { "epoch": 0.17714849889536763, "grad_norm": 0.7670410871505737, "learning_rate": 2.832850339705119e-05, "loss": 0.1232, "step": 10003 }, { "epoch": 0.17716620843239606, "grad_norm": 0.8295215368270874, "learning_rate": 2.8328108685385823e-05, "loss": 0.1111, "step": 10004 }, { "epoch": 0.17718391796942448, "grad_norm": 1.1350734233856201, "learning_rate": 2.8327713929872324e-05, "loss": 0.1269, "step": 10005 }, { "epoch": 0.1772016275064529, "grad_norm": 1.2083977460861206, "learning_rate": 2.8327319130511996e-05, "loss": 0.1258, "step": 10006 }, { "epoch": 0.17721933704348133, "grad_norm": 1.1512309312820435, "learning_rate": 2.8326924287306136e-05, "loss": 0.107, "step": 10007 }, { "epoch": 0.17723704658050976, "grad_norm": 0.709905207157135, "learning_rate": 2.8326529400256044e-05, "loss": 0.1197, "step": 10008 }, { "epoch": 0.1772547561175382, "grad_norm": 0.9461525082588196, "learning_rate": 2.832613446936302e-05, "loss": 0.1274, "step": 10009 }, { "epoch": 0.1772724656545666, "grad_norm": 0.9448668360710144, "learning_rate": 2.8325739494628358e-05, "loss": 0.1151, "step": 10010 }, { "epoch": 0.17729017519159507, "grad_norm": 0.9431101083755493, "learning_rate": 2.8325344476053365e-05, "loss": 0.1352, "step": 10011 }, { "epoch": 0.1773078847286235, "grad_norm": 0.7107659578323364, "learning_rate": 2.8324949413639332e-05, "loss": 0.0664, "step": 10012 }, { "epoch": 0.17732559426565192, "grad_norm": 1.2169716358184814, "learning_rate": 2.8324554307387566e-05, "loss": 0.0596, "step": 10013 }, { "epoch": 0.17734330380268035, "grad_norm": 0.5878856182098389, "learning_rate": 2.8324159157299365e-05, "loss": 0.0671, "step": 10014 }, { "epoch": 0.17736101333970877, "grad_norm": 1.3809154033660889, "learning_rate": 2.8323763963376026e-05, "loss": 0.1676, "step": 10015 }, { "epoch": 0.1773787228767372, "grad_norm": 0.9446235299110413, "learning_rate": 2.832336872561885e-05, "loss": 0.0617, "step": 10016 }, { "epoch": 0.17739643241376563, "grad_norm": 0.39366328716278076, "learning_rate": 2.832297344402914e-05, "loss": 0.0655, "step": 10017 }, { "epoch": 0.17741414195079405, "grad_norm": 0.664526104927063, "learning_rate": 2.8322578118608196e-05, "loss": 0.0865, "step": 10018 }, { "epoch": 0.17743185148782248, "grad_norm": 0.6733464598655701, "learning_rate": 2.832218274935731e-05, "loss": 0.1098, "step": 10019 }, { "epoch": 0.1774495610248509, "grad_norm": 0.989747166633606, "learning_rate": 2.83217873362778e-05, "loss": 0.1218, "step": 10020 }, { "epoch": 0.17746727056187933, "grad_norm": 1.576159119606018, "learning_rate": 2.8321391879370952e-05, "loss": 0.1158, "step": 10021 }, { "epoch": 0.17748498009890776, "grad_norm": 1.0663756132125854, "learning_rate": 2.8320996378638074e-05, "loss": 0.1333, "step": 10022 }, { "epoch": 0.17750268963593618, "grad_norm": 1.3462567329406738, "learning_rate": 2.8320600834080466e-05, "loss": 0.0853, "step": 10023 }, { "epoch": 0.1775203991729646, "grad_norm": 0.9822452664375305, "learning_rate": 2.8320205245699427e-05, "loss": 0.1291, "step": 10024 }, { "epoch": 0.17753810870999304, "grad_norm": 0.9053953289985657, "learning_rate": 2.8319809613496264e-05, "loss": 0.1089, "step": 10025 }, { "epoch": 0.1775558182470215, "grad_norm": 1.0176959037780762, "learning_rate": 2.831941393747227e-05, "loss": 0.1176, "step": 10026 }, { "epoch": 0.17757352778404992, "grad_norm": 0.8032612204551697, "learning_rate": 2.831901821762875e-05, "loss": 0.1183, "step": 10027 }, { "epoch": 0.17759123732107834, "grad_norm": 0.9342600703239441, "learning_rate": 2.831862245396701e-05, "loss": 0.1174, "step": 10028 }, { "epoch": 0.17760894685810677, "grad_norm": 0.8087084889411926, "learning_rate": 2.831822664648835e-05, "loss": 0.0747, "step": 10029 }, { "epoch": 0.1776266563951352, "grad_norm": 0.9277584552764893, "learning_rate": 2.831783079519407e-05, "loss": 0.1156, "step": 10030 }, { "epoch": 0.17764436593216362, "grad_norm": 0.8072733283042908, "learning_rate": 2.8317434900085474e-05, "loss": 0.1156, "step": 10031 }, { "epoch": 0.17766207546919205, "grad_norm": 0.8629382252693176, "learning_rate": 2.831703896116386e-05, "loss": 0.1371, "step": 10032 }, { "epoch": 0.17767978500622048, "grad_norm": 0.7092472314834595, "learning_rate": 2.8316642978430543e-05, "loss": 0.0591, "step": 10033 }, { "epoch": 0.1776974945432489, "grad_norm": 0.975882351398468, "learning_rate": 2.831624695188681e-05, "loss": 0.0837, "step": 10034 }, { "epoch": 0.17771520408027733, "grad_norm": 0.9963095784187317, "learning_rate": 2.831585088153398e-05, "loss": 0.1306, "step": 10035 }, { "epoch": 0.17773291361730575, "grad_norm": 0.7084612250328064, "learning_rate": 2.831545476737334e-05, "loss": 0.092, "step": 10036 }, { "epoch": 0.17775062315433418, "grad_norm": 0.7344238758087158, "learning_rate": 2.83150586094062e-05, "loss": 0.0972, "step": 10037 }, { "epoch": 0.1777683326913626, "grad_norm": 1.176193118095398, "learning_rate": 2.8314662407633872e-05, "loss": 0.1182, "step": 10038 }, { "epoch": 0.17778604222839103, "grad_norm": 0.6696869134902954, "learning_rate": 2.831426616205764e-05, "loss": 0.0887, "step": 10039 }, { "epoch": 0.17780375176541946, "grad_norm": 0.8405266404151917, "learning_rate": 2.8313869872678827e-05, "loss": 0.1257, "step": 10040 }, { "epoch": 0.17782146130244791, "grad_norm": 1.2120016813278198, "learning_rate": 2.8313473539498725e-05, "loss": 0.1113, "step": 10041 }, { "epoch": 0.17783917083947634, "grad_norm": 1.2316381931304932, "learning_rate": 2.831307716251865e-05, "loss": 0.1205, "step": 10042 }, { "epoch": 0.17785688037650477, "grad_norm": 0.6122353672981262, "learning_rate": 2.8312680741739884e-05, "loss": 0.0788, "step": 10043 }, { "epoch": 0.1778745899135332, "grad_norm": 0.685052752494812, "learning_rate": 2.8312284277163756e-05, "loss": 0.1038, "step": 10044 }, { "epoch": 0.17789229945056162, "grad_norm": 0.7526736855506897, "learning_rate": 2.8311887768791552e-05, "loss": 0.1351, "step": 10045 }, { "epoch": 0.17791000898759005, "grad_norm": 1.2726138830184937, "learning_rate": 2.831149121662459e-05, "loss": 0.1388, "step": 10046 }, { "epoch": 0.17792771852461847, "grad_norm": 0.6786186099052429, "learning_rate": 2.8311094620664166e-05, "loss": 0.1074, "step": 10047 }, { "epoch": 0.1779454280616469, "grad_norm": 1.107541799545288, "learning_rate": 2.831069798091158e-05, "loss": 0.1346, "step": 10048 }, { "epoch": 0.17796313759867533, "grad_norm": 1.810388207435608, "learning_rate": 2.8310301297368157e-05, "loss": 0.1327, "step": 10049 }, { "epoch": 0.17798084713570375, "grad_norm": 0.6395211219787598, "learning_rate": 2.8309904570035178e-05, "loss": 0.0956, "step": 10050 }, { "epoch": 0.17799855667273218, "grad_norm": 1.061428189277649, "learning_rate": 2.8309507798913964e-05, "loss": 0.0875, "step": 10051 }, { "epoch": 0.1780162662097606, "grad_norm": 0.612368643283844, "learning_rate": 2.8309110984005812e-05, "loss": 0.0911, "step": 10052 }, { "epoch": 0.17803397574678903, "grad_norm": 1.247782588005066, "learning_rate": 2.8308714125312037e-05, "loss": 0.1245, "step": 10053 }, { "epoch": 0.17805168528381746, "grad_norm": 0.7936265468597412, "learning_rate": 2.8308317222833936e-05, "loss": 0.104, "step": 10054 }, { "epoch": 0.17806939482084588, "grad_norm": 1.2044872045516968, "learning_rate": 2.8307920276572814e-05, "loss": 0.0933, "step": 10055 }, { "epoch": 0.17808710435787434, "grad_norm": 0.7940226197242737, "learning_rate": 2.830752328652998e-05, "loss": 0.1187, "step": 10056 }, { "epoch": 0.17810481389490276, "grad_norm": 0.9720093607902527, "learning_rate": 2.8307126252706747e-05, "loss": 0.1116, "step": 10057 }, { "epoch": 0.1781225234319312, "grad_norm": 0.8514139652252197, "learning_rate": 2.830672917510441e-05, "loss": 0.1137, "step": 10058 }, { "epoch": 0.17814023296895962, "grad_norm": 1.4800387620925903, "learning_rate": 2.8306332053724282e-05, "loss": 0.1232, "step": 10059 }, { "epoch": 0.17815794250598804, "grad_norm": 0.8625746369361877, "learning_rate": 2.8305934888567664e-05, "loss": 0.1087, "step": 10060 }, { "epoch": 0.17817565204301647, "grad_norm": 0.7654323577880859, "learning_rate": 2.830553767963587e-05, "loss": 0.1184, "step": 10061 }, { "epoch": 0.1781933615800449, "grad_norm": 1.0353254079818726, "learning_rate": 2.83051404269302e-05, "loss": 0.1345, "step": 10062 }, { "epoch": 0.17821107111707332, "grad_norm": 0.9214807152748108, "learning_rate": 2.8304743130451966e-05, "loss": 0.1243, "step": 10063 }, { "epoch": 0.17822878065410175, "grad_norm": 0.8203766942024231, "learning_rate": 2.8304345790202474e-05, "loss": 0.0777, "step": 10064 }, { "epoch": 0.17824649019113017, "grad_norm": 0.5887287259101868, "learning_rate": 2.830394840618302e-05, "loss": 0.0562, "step": 10065 }, { "epoch": 0.1782641997281586, "grad_norm": 0.7762017250061035, "learning_rate": 2.8303550978394934e-05, "loss": 0.0986, "step": 10066 }, { "epoch": 0.17828190926518703, "grad_norm": 1.329163670539856, "learning_rate": 2.8303153506839503e-05, "loss": 0.0947, "step": 10067 }, { "epoch": 0.17829961880221545, "grad_norm": 1.103550910949707, "learning_rate": 2.830275599151805e-05, "loss": 0.0963, "step": 10068 }, { "epoch": 0.17831732833924388, "grad_norm": 0.9725162386894226, "learning_rate": 2.830235843243187e-05, "loss": 0.1168, "step": 10069 }, { "epoch": 0.1783350378762723, "grad_norm": 0.9182353019714355, "learning_rate": 2.8301960829582275e-05, "loss": 0.0783, "step": 10070 }, { "epoch": 0.17835274741330076, "grad_norm": 0.855557382106781, "learning_rate": 2.8301563182970574e-05, "loss": 0.1002, "step": 10071 }, { "epoch": 0.1783704569503292, "grad_norm": 0.7295685410499573, "learning_rate": 2.830116549259808e-05, "loss": 0.1215, "step": 10072 }, { "epoch": 0.1783881664873576, "grad_norm": 0.976935088634491, "learning_rate": 2.8300767758466093e-05, "loss": 0.1237, "step": 10073 }, { "epoch": 0.17840587602438604, "grad_norm": 1.0017997026443481, "learning_rate": 2.8300369980575927e-05, "loss": 0.1168, "step": 10074 }, { "epoch": 0.17842358556141447, "grad_norm": 1.2454487085342407, "learning_rate": 2.8299972158928887e-05, "loss": 0.1003, "step": 10075 }, { "epoch": 0.1784412950984429, "grad_norm": 1.0372893810272217, "learning_rate": 2.829957429352629e-05, "loss": 0.1291, "step": 10076 }, { "epoch": 0.17845900463547132, "grad_norm": 0.7939231395721436, "learning_rate": 2.8299176384369432e-05, "loss": 0.0961, "step": 10077 }, { "epoch": 0.17847671417249975, "grad_norm": 0.8350643515586853, "learning_rate": 2.8298778431459634e-05, "loss": 0.123, "step": 10078 }, { "epoch": 0.17849442370952817, "grad_norm": 0.7733582854270935, "learning_rate": 2.8298380434798192e-05, "loss": 0.1286, "step": 10079 }, { "epoch": 0.1785121332465566, "grad_norm": 0.9572000503540039, "learning_rate": 2.829798239438643e-05, "loss": 0.084, "step": 10080 }, { "epoch": 0.17852984278358502, "grad_norm": 1.075378179550171, "learning_rate": 2.829758431022565e-05, "loss": 0.0874, "step": 10081 }, { "epoch": 0.17854755232061345, "grad_norm": 0.693310022354126, "learning_rate": 2.8297186182317163e-05, "loss": 0.0969, "step": 10082 }, { "epoch": 0.17856526185764188, "grad_norm": 0.9191024303436279, "learning_rate": 2.829678801066228e-05, "loss": 0.1271, "step": 10083 }, { "epoch": 0.1785829713946703, "grad_norm": 1.129947304725647, "learning_rate": 2.8296389795262303e-05, "loss": 0.1191, "step": 10084 }, { "epoch": 0.17860068093169873, "grad_norm": 1.589705228805542, "learning_rate": 2.8295991536118554e-05, "loss": 0.0795, "step": 10085 }, { "epoch": 0.17861839046872718, "grad_norm": 0.8524532318115234, "learning_rate": 2.8295593233232333e-05, "loss": 0.1459, "step": 10086 }, { "epoch": 0.1786361000057556, "grad_norm": 0.9477987885475159, "learning_rate": 2.8295194886604958e-05, "loss": 0.1038, "step": 10087 }, { "epoch": 0.17865380954278404, "grad_norm": 1.228830099105835, "learning_rate": 2.829479649623774e-05, "loss": 0.0766, "step": 10088 }, { "epoch": 0.17867151907981246, "grad_norm": 0.648229718208313, "learning_rate": 2.8294398062131983e-05, "loss": 0.0671, "step": 10089 }, { "epoch": 0.1786892286168409, "grad_norm": 1.2499984502792358, "learning_rate": 2.8293999584288996e-05, "loss": 0.1415, "step": 10090 }, { "epoch": 0.17870693815386932, "grad_norm": 1.3602972030639648, "learning_rate": 2.8293601062710104e-05, "loss": 0.127, "step": 10091 }, { "epoch": 0.17872464769089774, "grad_norm": 0.9314883947372437, "learning_rate": 2.8293202497396606e-05, "loss": 0.1295, "step": 10092 }, { "epoch": 0.17874235722792617, "grad_norm": 0.7034397125244141, "learning_rate": 2.8292803888349815e-05, "loss": 0.1244, "step": 10093 }, { "epoch": 0.1787600667649546, "grad_norm": 0.4927031397819519, "learning_rate": 2.829240523557104e-05, "loss": 0.1049, "step": 10094 }, { "epoch": 0.17877777630198302, "grad_norm": 1.4161795377731323, "learning_rate": 2.8292006539061603e-05, "loss": 0.1086, "step": 10095 }, { "epoch": 0.17879548583901145, "grad_norm": 1.4089090824127197, "learning_rate": 2.8291607798822804e-05, "loss": 0.1483, "step": 10096 }, { "epoch": 0.17881319537603987, "grad_norm": 0.6229123473167419, "learning_rate": 2.829120901485596e-05, "loss": 0.097, "step": 10097 }, { "epoch": 0.1788309049130683, "grad_norm": 1.1938716173171997, "learning_rate": 2.8290810187162386e-05, "loss": 0.1182, "step": 10098 }, { "epoch": 0.17884861445009673, "grad_norm": 1.183448314666748, "learning_rate": 2.829041131574339e-05, "loss": 0.1296, "step": 10099 }, { "epoch": 0.17886632398712515, "grad_norm": 0.854982852935791, "learning_rate": 2.8290012400600283e-05, "loss": 0.1242, "step": 10100 }, { "epoch": 0.1788840335241536, "grad_norm": 0.75299072265625, "learning_rate": 2.828961344173438e-05, "loss": 0.0923, "step": 10101 }, { "epoch": 0.17890174306118203, "grad_norm": 1.6100698709487915, "learning_rate": 2.8289214439146995e-05, "loss": 0.1689, "step": 10102 }, { "epoch": 0.17891945259821046, "grad_norm": 0.9033620357513428, "learning_rate": 2.8288815392839438e-05, "loss": 0.0847, "step": 10103 }, { "epoch": 0.1789371621352389, "grad_norm": 0.692945659160614, "learning_rate": 2.8288416302813017e-05, "loss": 0.1031, "step": 10104 }, { "epoch": 0.1789548716722673, "grad_norm": 0.8695270419120789, "learning_rate": 2.8288017169069054e-05, "loss": 0.0855, "step": 10105 }, { "epoch": 0.17897258120929574, "grad_norm": 0.8279191255569458, "learning_rate": 2.8287617991608864e-05, "loss": 0.0735, "step": 10106 }, { "epoch": 0.17899029074632417, "grad_norm": 0.8945555090904236, "learning_rate": 2.828721877043375e-05, "loss": 0.1283, "step": 10107 }, { "epoch": 0.1790080002833526, "grad_norm": 0.980631411075592, "learning_rate": 2.828681950554503e-05, "loss": 0.1457, "step": 10108 }, { "epoch": 0.17902570982038102, "grad_norm": 0.9224551320075989, "learning_rate": 2.8286420196944016e-05, "loss": 0.0946, "step": 10109 }, { "epoch": 0.17904341935740944, "grad_norm": 1.1106661558151245, "learning_rate": 2.8286020844632027e-05, "loss": 0.1753, "step": 10110 }, { "epoch": 0.17906112889443787, "grad_norm": 1.4300944805145264, "learning_rate": 2.828562144861037e-05, "loss": 0.1156, "step": 10111 }, { "epoch": 0.1790788384314663, "grad_norm": 1.1438817977905273, "learning_rate": 2.8285222008880366e-05, "loss": 0.1211, "step": 10112 }, { "epoch": 0.17909654796849472, "grad_norm": 0.8952279090881348, "learning_rate": 2.8284822525443324e-05, "loss": 0.1134, "step": 10113 }, { "epoch": 0.17911425750552315, "grad_norm": 0.6644834280014038, "learning_rate": 2.8284422998300556e-05, "loss": 0.1321, "step": 10114 }, { "epoch": 0.17913196704255158, "grad_norm": 1.458417296409607, "learning_rate": 2.8284023427453386e-05, "loss": 0.1365, "step": 10115 }, { "epoch": 0.17914967657958003, "grad_norm": 1.0256614685058594, "learning_rate": 2.8283623812903118e-05, "loss": 0.1323, "step": 10116 }, { "epoch": 0.17916738611660846, "grad_norm": 1.2659376859664917, "learning_rate": 2.8283224154651072e-05, "loss": 0.1344, "step": 10117 }, { "epoch": 0.17918509565363688, "grad_norm": 1.1478440761566162, "learning_rate": 2.828282445269856e-05, "loss": 0.1505, "step": 10118 }, { "epoch": 0.1792028051906653, "grad_norm": 1.0494190454483032, "learning_rate": 2.8282424707046903e-05, "loss": 0.0733, "step": 10119 }, { "epoch": 0.17922051472769374, "grad_norm": 0.8401657938957214, "learning_rate": 2.8282024917697413e-05, "loss": 0.1324, "step": 10120 }, { "epoch": 0.17923822426472216, "grad_norm": 1.1146341562271118, "learning_rate": 2.8281625084651404e-05, "loss": 0.1072, "step": 10121 }, { "epoch": 0.1792559338017506, "grad_norm": 0.8984317183494568, "learning_rate": 2.8281225207910188e-05, "loss": 0.1489, "step": 10122 }, { "epoch": 0.17927364333877902, "grad_norm": 1.22982656955719, "learning_rate": 2.8280825287475085e-05, "loss": 0.1071, "step": 10123 }, { "epoch": 0.17929135287580744, "grad_norm": 1.058403491973877, "learning_rate": 2.828042532334741e-05, "loss": 0.1368, "step": 10124 }, { "epoch": 0.17930906241283587, "grad_norm": 0.7616161108016968, "learning_rate": 2.8280025315528482e-05, "loss": 0.1433, "step": 10125 }, { "epoch": 0.1793267719498643, "grad_norm": 0.7966130971908569, "learning_rate": 2.8279625264019612e-05, "loss": 0.167, "step": 10126 }, { "epoch": 0.17934448148689272, "grad_norm": 0.8506786227226257, "learning_rate": 2.8279225168822117e-05, "loss": 0.1027, "step": 10127 }, { "epoch": 0.17936219102392115, "grad_norm": 0.866383969783783, "learning_rate": 2.827882502993732e-05, "loss": 0.1169, "step": 10128 }, { "epoch": 0.17937990056094957, "grad_norm": 0.9585006237030029, "learning_rate": 2.8278424847366525e-05, "loss": 0.0988, "step": 10129 }, { "epoch": 0.179397610097978, "grad_norm": 1.403944492340088, "learning_rate": 2.8278024621111057e-05, "loss": 0.1578, "step": 10130 }, { "epoch": 0.17941531963500645, "grad_norm": 0.7096691727638245, "learning_rate": 2.8277624351172234e-05, "loss": 0.0847, "step": 10131 }, { "epoch": 0.17943302917203488, "grad_norm": 0.8814910650253296, "learning_rate": 2.8277224037551366e-05, "loss": 0.1338, "step": 10132 }, { "epoch": 0.1794507387090633, "grad_norm": 1.331386923789978, "learning_rate": 2.8276823680249774e-05, "loss": 0.1245, "step": 10133 }, { "epoch": 0.17946844824609173, "grad_norm": 0.6393643617630005, "learning_rate": 2.8276423279268777e-05, "loss": 0.1105, "step": 10134 }, { "epoch": 0.17948615778312016, "grad_norm": 0.9004690051078796, "learning_rate": 2.8276022834609686e-05, "loss": 0.1266, "step": 10135 }, { "epoch": 0.17950386732014859, "grad_norm": 0.7879291772842407, "learning_rate": 2.827562234627383e-05, "loss": 0.107, "step": 10136 }, { "epoch": 0.179521576857177, "grad_norm": 1.2848504781723022, "learning_rate": 2.8275221814262514e-05, "loss": 0.0939, "step": 10137 }, { "epoch": 0.17953928639420544, "grad_norm": 1.1828168630599976, "learning_rate": 2.827482123857706e-05, "loss": 0.096, "step": 10138 }, { "epoch": 0.17955699593123386, "grad_norm": 0.850020170211792, "learning_rate": 2.8274420619218794e-05, "loss": 0.1353, "step": 10139 }, { "epoch": 0.1795747054682623, "grad_norm": 0.5163761973381042, "learning_rate": 2.827401995618902e-05, "loss": 0.0791, "step": 10140 }, { "epoch": 0.17959241500529072, "grad_norm": 0.7079324722290039, "learning_rate": 2.8273619249489064e-05, "loss": 0.0978, "step": 10141 }, { "epoch": 0.17961012454231914, "grad_norm": 0.9390697479248047, "learning_rate": 2.8273218499120244e-05, "loss": 0.0997, "step": 10142 }, { "epoch": 0.17962783407934757, "grad_norm": 1.0204081535339355, "learning_rate": 2.8272817705083874e-05, "loss": 0.1062, "step": 10143 }, { "epoch": 0.179645543616376, "grad_norm": 0.8732492327690125, "learning_rate": 2.8272416867381285e-05, "loss": 0.0856, "step": 10144 }, { "epoch": 0.17966325315340445, "grad_norm": 0.46473509073257446, "learning_rate": 2.8272015986013782e-05, "loss": 0.0847, "step": 10145 }, { "epoch": 0.17968096269043288, "grad_norm": 0.8550340533256531, "learning_rate": 2.8271615060982686e-05, "loss": 0.1441, "step": 10146 }, { "epoch": 0.1796986722274613, "grad_norm": 0.8590682148933411, "learning_rate": 2.827121409228932e-05, "loss": 0.133, "step": 10147 }, { "epoch": 0.17971638176448973, "grad_norm": 0.9471752047538757, "learning_rate": 2.8270813079935007e-05, "loss": 0.1063, "step": 10148 }, { "epoch": 0.17973409130151816, "grad_norm": 1.0768855810165405, "learning_rate": 2.8270412023921057e-05, "loss": 0.0906, "step": 10149 }, { "epoch": 0.17975180083854658, "grad_norm": 1.0270814895629883, "learning_rate": 2.8270010924248792e-05, "loss": 0.1394, "step": 10150 }, { "epoch": 0.179769510375575, "grad_norm": 0.8641895055770874, "learning_rate": 2.8269609780919538e-05, "loss": 0.1196, "step": 10151 }, { "epoch": 0.17978721991260344, "grad_norm": 1.1230889558792114, "learning_rate": 2.8269208593934606e-05, "loss": 0.0719, "step": 10152 }, { "epoch": 0.17980492944963186, "grad_norm": 0.659687340259552, "learning_rate": 2.826880736329532e-05, "loss": 0.098, "step": 10153 }, { "epoch": 0.1798226389866603, "grad_norm": 0.9312443137168884, "learning_rate": 2.8268406089003e-05, "loss": 0.138, "step": 10154 }, { "epoch": 0.17984034852368871, "grad_norm": 0.8679712414741516, "learning_rate": 2.826800477105897e-05, "loss": 0.1083, "step": 10155 }, { "epoch": 0.17985805806071714, "grad_norm": 1.446434497833252, "learning_rate": 2.8267603409464544e-05, "loss": 0.1491, "step": 10156 }, { "epoch": 0.17987576759774557, "grad_norm": 1.335951566696167, "learning_rate": 2.826720200422104e-05, "loss": 0.0921, "step": 10157 }, { "epoch": 0.179893477134774, "grad_norm": 1.0525102615356445, "learning_rate": 2.826680055532979e-05, "loss": 0.1464, "step": 10158 }, { "epoch": 0.17991118667180242, "grad_norm": 1.962480902671814, "learning_rate": 2.8266399062792102e-05, "loss": 0.0831, "step": 10159 }, { "epoch": 0.17992889620883087, "grad_norm": 1.2747650146484375, "learning_rate": 2.826599752660931e-05, "loss": 0.089, "step": 10160 }, { "epoch": 0.1799466057458593, "grad_norm": 1.3173633813858032, "learning_rate": 2.8265595946782724e-05, "loss": 0.1417, "step": 10161 }, { "epoch": 0.17996431528288773, "grad_norm": 1.1760245561599731, "learning_rate": 2.826519432331367e-05, "loss": 0.1019, "step": 10162 }, { "epoch": 0.17998202481991615, "grad_norm": 1.2761926651000977, "learning_rate": 2.8264792656203467e-05, "loss": 0.0886, "step": 10163 }, { "epoch": 0.17999973435694458, "grad_norm": 0.7104843258857727, "learning_rate": 2.826439094545344e-05, "loss": 0.1127, "step": 10164 }, { "epoch": 0.180017443893973, "grad_norm": 1.1886723041534424, "learning_rate": 2.8263989191064906e-05, "loss": 0.1181, "step": 10165 }, { "epoch": 0.18003515343100143, "grad_norm": 1.8061548471450806, "learning_rate": 2.8263587393039187e-05, "loss": 0.096, "step": 10166 }, { "epoch": 0.18005286296802986, "grad_norm": 2.113755226135254, "learning_rate": 2.826318555137761e-05, "loss": 0.1097, "step": 10167 }, { "epoch": 0.18007057250505829, "grad_norm": 1.0995303392410278, "learning_rate": 2.8262783666081495e-05, "loss": 0.0724, "step": 10168 }, { "epoch": 0.1800882820420867, "grad_norm": 1.936652421951294, "learning_rate": 2.826238173715216e-05, "loss": 0.123, "step": 10169 }, { "epoch": 0.18010599157911514, "grad_norm": 0.677354097366333, "learning_rate": 2.8261979764590936e-05, "loss": 0.107, "step": 10170 }, { "epoch": 0.18012370111614356, "grad_norm": 1.0087559223175049, "learning_rate": 2.8261577748399133e-05, "loss": 0.1149, "step": 10171 }, { "epoch": 0.180141410653172, "grad_norm": 0.9691013097763062, "learning_rate": 2.826117568857809e-05, "loss": 0.1328, "step": 10172 }, { "epoch": 0.18015912019020042, "grad_norm": 0.8427352905273438, "learning_rate": 2.8260773585129115e-05, "loss": 0.1138, "step": 10173 }, { "epoch": 0.18017682972722884, "grad_norm": 1.1108702421188354, "learning_rate": 2.8260371438053533e-05, "loss": 0.1189, "step": 10174 }, { "epoch": 0.1801945392642573, "grad_norm": 0.9543166756629944, "learning_rate": 2.825996924735267e-05, "loss": 0.1092, "step": 10175 }, { "epoch": 0.18021224880128572, "grad_norm": 1.2405202388763428, "learning_rate": 2.8259567013027857e-05, "loss": 0.1466, "step": 10176 }, { "epoch": 0.18022995833831415, "grad_norm": 1.3559597730636597, "learning_rate": 2.8259164735080403e-05, "loss": 0.1028, "step": 10177 }, { "epoch": 0.18024766787534258, "grad_norm": 1.110055685043335, "learning_rate": 2.825876241351164e-05, "loss": 0.169, "step": 10178 }, { "epoch": 0.180265377412371, "grad_norm": 0.5962437391281128, "learning_rate": 2.825836004832289e-05, "loss": 0.0762, "step": 10179 }, { "epoch": 0.18028308694939943, "grad_norm": 1.001584529876709, "learning_rate": 2.8257957639515474e-05, "loss": 0.1491, "step": 10180 }, { "epoch": 0.18030079648642786, "grad_norm": 0.9119728803634644, "learning_rate": 2.825755518709072e-05, "loss": 0.1268, "step": 10181 }, { "epoch": 0.18031850602345628, "grad_norm": 1.0076947212219238, "learning_rate": 2.825715269104995e-05, "loss": 0.1306, "step": 10182 }, { "epoch": 0.1803362155604847, "grad_norm": 1.0993685722351074, "learning_rate": 2.825675015139449e-05, "loss": 0.134, "step": 10183 }, { "epoch": 0.18035392509751313, "grad_norm": 1.0522228479385376, "learning_rate": 2.8256347568125663e-05, "loss": 0.1013, "step": 10184 }, { "epoch": 0.18037163463454156, "grad_norm": 1.3771061897277832, "learning_rate": 2.8255944941244792e-05, "loss": 0.115, "step": 10185 }, { "epoch": 0.18038934417157, "grad_norm": 0.8612002730369568, "learning_rate": 2.8255542270753203e-05, "loss": 0.1165, "step": 10186 }, { "epoch": 0.1804070537085984, "grad_norm": 1.315782070159912, "learning_rate": 2.825513955665222e-05, "loss": 0.133, "step": 10187 }, { "epoch": 0.18042476324562684, "grad_norm": 0.9931589365005493, "learning_rate": 2.825473679894317e-05, "loss": 0.1159, "step": 10188 }, { "epoch": 0.18044247278265527, "grad_norm": 0.5998713374137878, "learning_rate": 2.8254333997627374e-05, "loss": 0.1076, "step": 10189 }, { "epoch": 0.18046018231968372, "grad_norm": 0.9376753568649292, "learning_rate": 2.8253931152706168e-05, "loss": 0.0891, "step": 10190 }, { "epoch": 0.18047789185671215, "grad_norm": 1.070462942123413, "learning_rate": 2.8253528264180858e-05, "loss": 0.0947, "step": 10191 }, { "epoch": 0.18049560139374057, "grad_norm": 1.0180432796478271, "learning_rate": 2.8253125332052786e-05, "loss": 0.1118, "step": 10192 }, { "epoch": 0.180513310930769, "grad_norm": 0.988827645778656, "learning_rate": 2.825272235632327e-05, "loss": 0.132, "step": 10193 }, { "epoch": 0.18053102046779743, "grad_norm": 1.0715057849884033, "learning_rate": 2.8252319336993643e-05, "loss": 0.1047, "step": 10194 }, { "epoch": 0.18054873000482585, "grad_norm": 1.026196002960205, "learning_rate": 2.825191627406522e-05, "loss": 0.0884, "step": 10195 }, { "epoch": 0.18056643954185428, "grad_norm": 1.1658257246017456, "learning_rate": 2.8251513167539337e-05, "loss": 0.1144, "step": 10196 }, { "epoch": 0.1805841490788827, "grad_norm": 1.5124109983444214, "learning_rate": 2.8251110017417313e-05, "loss": 0.087, "step": 10197 }, { "epoch": 0.18060185861591113, "grad_norm": 1.4834976196289062, "learning_rate": 2.8250706823700476e-05, "loss": 0.1212, "step": 10198 }, { "epoch": 0.18061956815293956, "grad_norm": 1.1588313579559326, "learning_rate": 2.825030358639016e-05, "loss": 0.1007, "step": 10199 }, { "epoch": 0.18063727768996798, "grad_norm": 1.0309324264526367, "learning_rate": 2.8249900305487683e-05, "loss": 0.0752, "step": 10200 }, { "epoch": 0.1806549872269964, "grad_norm": 0.9431357979774475, "learning_rate": 2.824949698099437e-05, "loss": 0.1137, "step": 10201 }, { "epoch": 0.18067269676402484, "grad_norm": 1.5420732498168945, "learning_rate": 2.8249093612911558e-05, "loss": 0.1396, "step": 10202 }, { "epoch": 0.18069040630105326, "grad_norm": 0.9237303137779236, "learning_rate": 2.824869020124056e-05, "loss": 0.1191, "step": 10203 }, { "epoch": 0.1807081158380817, "grad_norm": 1.142896294593811, "learning_rate": 2.824828674598272e-05, "loss": 0.17, "step": 10204 }, { "epoch": 0.18072582537511014, "grad_norm": 0.9246733784675598, "learning_rate": 2.8247883247139346e-05, "loss": 0.098, "step": 10205 }, { "epoch": 0.18074353491213857, "grad_norm": 0.9562126994132996, "learning_rate": 2.8247479704711787e-05, "loss": 0.1594, "step": 10206 }, { "epoch": 0.180761244449167, "grad_norm": 1.1774115562438965, "learning_rate": 2.8247076118701354e-05, "loss": 0.1251, "step": 10207 }, { "epoch": 0.18077895398619542, "grad_norm": 1.257710337638855, "learning_rate": 2.824667248910938e-05, "loss": 0.0994, "step": 10208 }, { "epoch": 0.18079666352322385, "grad_norm": 1.0449302196502686, "learning_rate": 2.82462688159372e-05, "loss": 0.1042, "step": 10209 }, { "epoch": 0.18081437306025228, "grad_norm": 0.827275276184082, "learning_rate": 2.824586509918613e-05, "loss": 0.1239, "step": 10210 }, { "epoch": 0.1808320825972807, "grad_norm": 0.9117798209190369, "learning_rate": 2.8245461338857505e-05, "loss": 0.092, "step": 10211 }, { "epoch": 0.18084979213430913, "grad_norm": 0.9937015175819397, "learning_rate": 2.8245057534952647e-05, "loss": 0.1363, "step": 10212 }, { "epoch": 0.18086750167133755, "grad_norm": 1.1314094066619873, "learning_rate": 2.824465368747289e-05, "loss": 0.0986, "step": 10213 }, { "epoch": 0.18088521120836598, "grad_norm": 1.2189629077911377, "learning_rate": 2.824424979641957e-05, "loss": 0.1235, "step": 10214 }, { "epoch": 0.1809029207453944, "grad_norm": 0.9398388266563416, "learning_rate": 2.8243845861794003e-05, "loss": 0.0904, "step": 10215 }, { "epoch": 0.18092063028242283, "grad_norm": 0.6940770745277405, "learning_rate": 2.824344188359752e-05, "loss": 0.1088, "step": 10216 }, { "epoch": 0.18093833981945126, "grad_norm": 1.596177339553833, "learning_rate": 2.8243037861831458e-05, "loss": 0.1708, "step": 10217 }, { "epoch": 0.1809560493564797, "grad_norm": 1.1905192136764526, "learning_rate": 2.8242633796497135e-05, "loss": 0.1475, "step": 10218 }, { "epoch": 0.1809737588935081, "grad_norm": 0.9301444888114929, "learning_rate": 2.824222968759589e-05, "loss": 0.0941, "step": 10219 }, { "epoch": 0.18099146843053657, "grad_norm": 0.7456848621368408, "learning_rate": 2.8241825535129053e-05, "loss": 0.0909, "step": 10220 }, { "epoch": 0.181009177967565, "grad_norm": 1.1774606704711914, "learning_rate": 2.824142133909794e-05, "loss": 0.0836, "step": 10221 }, { "epoch": 0.18102688750459342, "grad_norm": 1.102799654006958, "learning_rate": 2.8241017099503894e-05, "loss": 0.0941, "step": 10222 }, { "epoch": 0.18104459704162185, "grad_norm": 1.4635508060455322, "learning_rate": 2.8240612816348242e-05, "loss": 0.1357, "step": 10223 }, { "epoch": 0.18106230657865027, "grad_norm": 0.7664229273796082, "learning_rate": 2.8240208489632314e-05, "loss": 0.0827, "step": 10224 }, { "epoch": 0.1810800161156787, "grad_norm": 1.0205445289611816, "learning_rate": 2.8239804119357437e-05, "loss": 0.1111, "step": 10225 }, { "epoch": 0.18109772565270713, "grad_norm": 1.0133610963821411, "learning_rate": 2.8239399705524946e-05, "loss": 0.104, "step": 10226 }, { "epoch": 0.18111543518973555, "grad_norm": 1.0677294731140137, "learning_rate": 2.8238995248136166e-05, "loss": 0.146, "step": 10227 }, { "epoch": 0.18113314472676398, "grad_norm": 1.1264944076538086, "learning_rate": 2.8238590747192432e-05, "loss": 0.1167, "step": 10228 }, { "epoch": 0.1811508542637924, "grad_norm": 0.6945620775222778, "learning_rate": 2.823818620269507e-05, "loss": 0.0913, "step": 10229 }, { "epoch": 0.18116856380082083, "grad_norm": 1.469844102859497, "learning_rate": 2.823778161464542e-05, "loss": 0.1014, "step": 10230 }, { "epoch": 0.18118627333784926, "grad_norm": 0.9831147789955139, "learning_rate": 2.8237376983044805e-05, "loss": 0.1408, "step": 10231 }, { "epoch": 0.18120398287487768, "grad_norm": 1.1327513456344604, "learning_rate": 2.823697230789456e-05, "loss": 0.084, "step": 10232 }, { "epoch": 0.1812216924119061, "grad_norm": 0.7291767001152039, "learning_rate": 2.8236567589196013e-05, "loss": 0.11, "step": 10233 }, { "epoch": 0.18123940194893454, "grad_norm": 1.4225068092346191, "learning_rate": 2.82361628269505e-05, "loss": 0.1284, "step": 10234 }, { "epoch": 0.181257111485963, "grad_norm": 1.531543493270874, "learning_rate": 2.8235758021159347e-05, "loss": 0.1144, "step": 10235 }, { "epoch": 0.18127482102299142, "grad_norm": 1.087356686592102, "learning_rate": 2.823535317182389e-05, "loss": 0.1241, "step": 10236 }, { "epoch": 0.18129253056001984, "grad_norm": 1.1202892065048218, "learning_rate": 2.8234948278945464e-05, "loss": 0.134, "step": 10237 }, { "epoch": 0.18131024009704827, "grad_norm": 1.7170706987380981, "learning_rate": 2.823454334252539e-05, "loss": 0.1835, "step": 10238 }, { "epoch": 0.1813279496340767, "grad_norm": 0.8610799908638, "learning_rate": 2.8234138362565008e-05, "loss": 0.0852, "step": 10239 }, { "epoch": 0.18134565917110512, "grad_norm": 1.1385769844055176, "learning_rate": 2.8233733339065653e-05, "loss": 0.0964, "step": 10240 }, { "epoch": 0.18136336870813355, "grad_norm": 1.1181451082229614, "learning_rate": 2.823332827202865e-05, "loss": 0.0942, "step": 10241 }, { "epoch": 0.18138107824516198, "grad_norm": 1.4656600952148438, "learning_rate": 2.8232923161455337e-05, "loss": 0.1236, "step": 10242 }, { "epoch": 0.1813987877821904, "grad_norm": 0.7013381123542786, "learning_rate": 2.8232518007347046e-05, "loss": 0.1112, "step": 10243 }, { "epoch": 0.18141649731921883, "grad_norm": 0.9191396236419678, "learning_rate": 2.823211280970511e-05, "loss": 0.0821, "step": 10244 }, { "epoch": 0.18143420685624725, "grad_norm": 0.8911729454994202, "learning_rate": 2.8231707568530857e-05, "loss": 0.1348, "step": 10245 }, { "epoch": 0.18145191639327568, "grad_norm": 1.0614415407180786, "learning_rate": 2.823130228382563e-05, "loss": 0.1223, "step": 10246 }, { "epoch": 0.1814696259303041, "grad_norm": 1.6945316791534424, "learning_rate": 2.8230896955590753e-05, "loss": 0.1383, "step": 10247 }, { "epoch": 0.18148733546733253, "grad_norm": 1.1748080253601074, "learning_rate": 2.8230491583827568e-05, "loss": 0.0985, "step": 10248 }, { "epoch": 0.18150504500436096, "grad_norm": 0.9755350351333618, "learning_rate": 2.8230086168537396e-05, "loss": 0.1345, "step": 10249 }, { "epoch": 0.18152275454138941, "grad_norm": 1.2473303079605103, "learning_rate": 2.8229680709721585e-05, "loss": 0.1096, "step": 10250 }, { "epoch": 0.18154046407841784, "grad_norm": 1.4950816631317139, "learning_rate": 2.822927520738146e-05, "loss": 0.1421, "step": 10251 }, { "epoch": 0.18155817361544627, "grad_norm": 0.9872257113456726, "learning_rate": 2.8228869661518355e-05, "loss": 0.1107, "step": 10252 }, { "epoch": 0.1815758831524747, "grad_norm": 2.3080945014953613, "learning_rate": 2.822846407213361e-05, "loss": 0.1397, "step": 10253 }, { "epoch": 0.18159359268950312, "grad_norm": 0.779762327671051, "learning_rate": 2.822805843922856e-05, "loss": 0.1116, "step": 10254 }, { "epoch": 0.18161130222653155, "grad_norm": 0.9787821173667908, "learning_rate": 2.822765276280453e-05, "loss": 0.1045, "step": 10255 }, { "epoch": 0.18162901176355997, "grad_norm": 0.9136310815811157, "learning_rate": 2.8227247042862864e-05, "loss": 0.1099, "step": 10256 }, { "epoch": 0.1816467213005884, "grad_norm": 0.951824963092804, "learning_rate": 2.8226841279404888e-05, "loss": 0.1154, "step": 10257 }, { "epoch": 0.18166443083761682, "grad_norm": 1.5403647422790527, "learning_rate": 2.8226435472431942e-05, "loss": 0.1004, "step": 10258 }, { "epoch": 0.18168214037464525, "grad_norm": 0.7109566330909729, "learning_rate": 2.8226029621945366e-05, "loss": 0.1093, "step": 10259 }, { "epoch": 0.18169984991167368, "grad_norm": 1.3367518186569214, "learning_rate": 2.8225623727946487e-05, "loss": 0.1345, "step": 10260 }, { "epoch": 0.1817175594487021, "grad_norm": 0.8604671359062195, "learning_rate": 2.8225217790436647e-05, "loss": 0.1056, "step": 10261 }, { "epoch": 0.18173526898573053, "grad_norm": 2.2250735759735107, "learning_rate": 2.8224811809417178e-05, "loss": 0.1143, "step": 10262 }, { "epoch": 0.18175297852275896, "grad_norm": 1.0851211547851562, "learning_rate": 2.8224405784889414e-05, "loss": 0.1347, "step": 10263 }, { "epoch": 0.1817706880597874, "grad_norm": 0.8318812847137451, "learning_rate": 2.822399971685469e-05, "loss": 0.1277, "step": 10264 }, { "epoch": 0.18178839759681584, "grad_norm": 1.9679216146469116, "learning_rate": 2.8223593605314348e-05, "loss": 0.1455, "step": 10265 }, { "epoch": 0.18180610713384426, "grad_norm": 1.3911807537078857, "learning_rate": 2.8223187450269724e-05, "loss": 0.1053, "step": 10266 }, { "epoch": 0.1818238166708727, "grad_norm": 1.0586175918579102, "learning_rate": 2.822278125172214e-05, "loss": 0.1143, "step": 10267 }, { "epoch": 0.18184152620790112, "grad_norm": 0.6910115480422974, "learning_rate": 2.822237500967295e-05, "loss": 0.1273, "step": 10268 }, { "epoch": 0.18185923574492954, "grad_norm": 0.9567924737930298, "learning_rate": 2.8221968724123484e-05, "loss": 0.103, "step": 10269 }, { "epoch": 0.18187694528195797, "grad_norm": 1.3104937076568604, "learning_rate": 2.822156239507508e-05, "loss": 0.1376, "step": 10270 }, { "epoch": 0.1818946548189864, "grad_norm": 1.074855923652649, "learning_rate": 2.8221156022529067e-05, "loss": 0.1098, "step": 10271 }, { "epoch": 0.18191236435601482, "grad_norm": 1.0899046659469604, "learning_rate": 2.8220749606486795e-05, "loss": 0.1212, "step": 10272 }, { "epoch": 0.18193007389304325, "grad_norm": 1.2158994674682617, "learning_rate": 2.8220343146949587e-05, "loss": 0.1123, "step": 10273 }, { "epoch": 0.18194778343007167, "grad_norm": 0.7284682393074036, "learning_rate": 2.821993664391879e-05, "loss": 0.1495, "step": 10274 }, { "epoch": 0.1819654929671001, "grad_norm": 1.2148504257202148, "learning_rate": 2.821953009739574e-05, "loss": 0.1158, "step": 10275 }, { "epoch": 0.18198320250412853, "grad_norm": 1.272445559501648, "learning_rate": 2.8219123507381773e-05, "loss": 0.1422, "step": 10276 }, { "epoch": 0.18200091204115695, "grad_norm": 21.14447021484375, "learning_rate": 2.8218716873878222e-05, "loss": 0.1589, "step": 10277 }, { "epoch": 0.18201862157818538, "grad_norm": 2.653238296508789, "learning_rate": 2.8218310196886433e-05, "loss": 0.1337, "step": 10278 }, { "epoch": 0.18203633111521383, "grad_norm": 2.0193212032318115, "learning_rate": 2.8217903476407743e-05, "loss": 0.1498, "step": 10279 }, { "epoch": 0.18205404065224226, "grad_norm": 1.7987744808197021, "learning_rate": 2.8217496712443482e-05, "loss": 0.1082, "step": 10280 }, { "epoch": 0.1820717501892707, "grad_norm": 2.1958320140838623, "learning_rate": 2.8217089904995e-05, "loss": 0.1329, "step": 10281 }, { "epoch": 0.1820894597262991, "grad_norm": 1.919392466545105, "learning_rate": 2.8216683054063623e-05, "loss": 0.1207, "step": 10282 }, { "epoch": 0.18210716926332754, "grad_norm": 0.8708932995796204, "learning_rate": 2.8216276159650698e-05, "loss": 0.1598, "step": 10283 }, { "epoch": 0.18212487880035597, "grad_norm": 0.9090081453323364, "learning_rate": 2.8215869221757563e-05, "loss": 0.0858, "step": 10284 }, { "epoch": 0.1821425883373844, "grad_norm": 1.1934870481491089, "learning_rate": 2.8215462240385552e-05, "loss": 0.1551, "step": 10285 }, { "epoch": 0.18216029787441282, "grad_norm": 0.7232547402381897, "learning_rate": 2.8215055215536008e-05, "loss": 0.1338, "step": 10286 }, { "epoch": 0.18217800741144125, "grad_norm": 0.7276062965393066, "learning_rate": 2.821464814721027e-05, "loss": 0.0963, "step": 10287 }, { "epoch": 0.18219571694846967, "grad_norm": 1.0828351974487305, "learning_rate": 2.8214241035409674e-05, "loss": 0.1319, "step": 10288 }, { "epoch": 0.1822134264854981, "grad_norm": 1.2590571641921997, "learning_rate": 2.8213833880135562e-05, "loss": 0.1786, "step": 10289 }, { "epoch": 0.18223113602252652, "grad_norm": 1.205971598625183, "learning_rate": 2.821342668138928e-05, "loss": 0.1627, "step": 10290 }, { "epoch": 0.18224884555955495, "grad_norm": 1.1531339883804321, "learning_rate": 2.821301943917215e-05, "loss": 0.1355, "step": 10291 }, { "epoch": 0.18226655509658338, "grad_norm": 1.1069804430007935, "learning_rate": 2.8212612153485525e-05, "loss": 0.1349, "step": 10292 }, { "epoch": 0.1822842646336118, "grad_norm": 0.8601309061050415, "learning_rate": 2.8212204824330748e-05, "loss": 0.1163, "step": 10293 }, { "epoch": 0.18230197417064026, "grad_norm": 1.2832744121551514, "learning_rate": 2.8211797451709147e-05, "loss": 0.1265, "step": 10294 }, { "epoch": 0.18231968370766868, "grad_norm": 1.5438055992126465, "learning_rate": 2.821139003562207e-05, "loss": 0.1143, "step": 10295 }, { "epoch": 0.1823373932446971, "grad_norm": 0.9179930090904236, "learning_rate": 2.8210982576070857e-05, "loss": 0.0704, "step": 10296 }, { "epoch": 0.18235510278172554, "grad_norm": 1.156711459159851, "learning_rate": 2.821057507305685e-05, "loss": 0.119, "step": 10297 }, { "epoch": 0.18237281231875396, "grad_norm": 0.9385142922401428, "learning_rate": 2.8210167526581385e-05, "loss": 0.1338, "step": 10298 }, { "epoch": 0.1823905218557824, "grad_norm": 0.5148574113845825, "learning_rate": 2.8209759936645808e-05, "loss": 0.1151, "step": 10299 }, { "epoch": 0.18240823139281082, "grad_norm": 0.9723930358886719, "learning_rate": 2.8209352303251453e-05, "loss": 0.1051, "step": 10300 }, { "epoch": 0.18242594092983924, "grad_norm": 0.9826551675796509, "learning_rate": 2.8208944626399666e-05, "loss": 0.1199, "step": 10301 }, { "epoch": 0.18244365046686767, "grad_norm": 0.9581248164176941, "learning_rate": 2.820853690609179e-05, "loss": 0.1162, "step": 10302 }, { "epoch": 0.1824613600038961, "grad_norm": 1.086858868598938, "learning_rate": 2.820812914232916e-05, "loss": 0.1533, "step": 10303 }, { "epoch": 0.18247906954092452, "grad_norm": 0.7938152551651001, "learning_rate": 2.820772133511312e-05, "loss": 0.0887, "step": 10304 }, { "epoch": 0.18249677907795295, "grad_norm": 4.54545259475708, "learning_rate": 2.8207313484445013e-05, "loss": 0.1046, "step": 10305 }, { "epoch": 0.18251448861498137, "grad_norm": 1.1204255819320679, "learning_rate": 2.8206905590326185e-05, "loss": 0.1031, "step": 10306 }, { "epoch": 0.1825321981520098, "grad_norm": 0.9517515897750854, "learning_rate": 2.820649765275797e-05, "loss": 0.1057, "step": 10307 }, { "epoch": 0.18254990768903823, "grad_norm": 1.0160527229309082, "learning_rate": 2.8206089671741712e-05, "loss": 0.0886, "step": 10308 }, { "epoch": 0.18256761722606668, "grad_norm": 0.942118227481842, "learning_rate": 2.820568164727876e-05, "loss": 0.0964, "step": 10309 }, { "epoch": 0.1825853267630951, "grad_norm": 1.0984687805175781, "learning_rate": 2.8205273579370446e-05, "loss": 0.1208, "step": 10310 }, { "epoch": 0.18260303630012353, "grad_norm": 0.9238941669464111, "learning_rate": 2.8204865468018117e-05, "loss": 0.1435, "step": 10311 }, { "epoch": 0.18262074583715196, "grad_norm": 0.7471827864646912, "learning_rate": 2.820445731322312e-05, "loss": 0.0965, "step": 10312 }, { "epoch": 0.1826384553741804, "grad_norm": 0.9149797558784485, "learning_rate": 2.8204049114986793e-05, "loss": 0.1283, "step": 10313 }, { "epoch": 0.1826561649112088, "grad_norm": 1.2275753021240234, "learning_rate": 2.820364087331048e-05, "loss": 0.1302, "step": 10314 }, { "epoch": 0.18267387444823724, "grad_norm": 1.0381476879119873, "learning_rate": 2.8203232588195524e-05, "loss": 0.1069, "step": 10315 }, { "epoch": 0.18269158398526567, "grad_norm": 0.9695706367492676, "learning_rate": 2.8202824259643265e-05, "loss": 0.0917, "step": 10316 }, { "epoch": 0.1827092935222941, "grad_norm": 1.0350112915039062, "learning_rate": 2.8202415887655052e-05, "loss": 0.0943, "step": 10317 }, { "epoch": 0.18272700305932252, "grad_norm": 0.6829397082328796, "learning_rate": 2.820200747223223e-05, "loss": 0.092, "step": 10318 }, { "epoch": 0.18274471259635094, "grad_norm": 1.0221055746078491, "learning_rate": 2.8201599013376133e-05, "loss": 0.0973, "step": 10319 }, { "epoch": 0.18276242213337937, "grad_norm": 1.3383405208587646, "learning_rate": 2.820119051108811e-05, "loss": 0.1032, "step": 10320 }, { "epoch": 0.1827801316704078, "grad_norm": 0.9381941556930542, "learning_rate": 2.820078196536951e-05, "loss": 0.1208, "step": 10321 }, { "epoch": 0.18279784120743622, "grad_norm": 1.3317253589630127, "learning_rate": 2.8200373376221672e-05, "loss": 0.1313, "step": 10322 }, { "epoch": 0.18281555074446465, "grad_norm": 0.6901673078536987, "learning_rate": 2.8199964743645938e-05, "loss": 0.0982, "step": 10323 }, { "epoch": 0.1828332602814931, "grad_norm": 1.2067902088165283, "learning_rate": 2.8199556067643658e-05, "loss": 0.1274, "step": 10324 }, { "epoch": 0.18285096981852153, "grad_norm": 1.0233514308929443, "learning_rate": 2.8199147348216167e-05, "loss": 0.1159, "step": 10325 }, { "epoch": 0.18286867935554996, "grad_norm": 1.2767367362976074, "learning_rate": 2.8198738585364824e-05, "loss": 0.0981, "step": 10326 }, { "epoch": 0.18288638889257838, "grad_norm": 1.1744438409805298, "learning_rate": 2.819832977909096e-05, "loss": 0.1189, "step": 10327 }, { "epoch": 0.1829040984296068, "grad_norm": 0.9951429963111877, "learning_rate": 2.8197920929395932e-05, "loss": 0.1275, "step": 10328 }, { "epoch": 0.18292180796663524, "grad_norm": 1.2605557441711426, "learning_rate": 2.8197512036281076e-05, "loss": 0.1354, "step": 10329 }, { "epoch": 0.18293951750366366, "grad_norm": 0.6132369041442871, "learning_rate": 2.8197103099747738e-05, "loss": 0.0707, "step": 10330 }, { "epoch": 0.1829572270406921, "grad_norm": 1.1412869691848755, "learning_rate": 2.8196694119797273e-05, "loss": 0.1441, "step": 10331 }, { "epoch": 0.18297493657772052, "grad_norm": 0.8863256573677063, "learning_rate": 2.819628509643101e-05, "loss": 0.0906, "step": 10332 }, { "epoch": 0.18299264611474894, "grad_norm": 0.9703170657157898, "learning_rate": 2.8195876029650307e-05, "loss": 0.1392, "step": 10333 }, { "epoch": 0.18301035565177737, "grad_norm": 0.9900509119033813, "learning_rate": 2.8195466919456512e-05, "loss": 0.138, "step": 10334 }, { "epoch": 0.1830280651888058, "grad_norm": 1.0809826850891113, "learning_rate": 2.8195057765850958e-05, "loss": 0.1415, "step": 10335 }, { "epoch": 0.18304577472583422, "grad_norm": 0.9585011601448059, "learning_rate": 2.8194648568835002e-05, "loss": 0.1106, "step": 10336 }, { "epoch": 0.18306348426286265, "grad_norm": 0.887271523475647, "learning_rate": 2.8194239328409987e-05, "loss": 0.142, "step": 10337 }, { "epoch": 0.18308119379989107, "grad_norm": 0.9400715231895447, "learning_rate": 2.8193830044577257e-05, "loss": 0.1204, "step": 10338 }, { "epoch": 0.18309890333691953, "grad_norm": 0.9075803160667419, "learning_rate": 2.819342071733816e-05, "loss": 0.1297, "step": 10339 }, { "epoch": 0.18311661287394795, "grad_norm": 1.2142916917800903, "learning_rate": 2.8193011346694047e-05, "loss": 0.1735, "step": 10340 }, { "epoch": 0.18313432241097638, "grad_norm": 1.0620949268341064, "learning_rate": 2.819260193264626e-05, "loss": 0.0936, "step": 10341 }, { "epoch": 0.1831520319480048, "grad_norm": 1.0350563526153564, "learning_rate": 2.8192192475196148e-05, "loss": 0.0931, "step": 10342 }, { "epoch": 0.18316974148503323, "grad_norm": 0.8308811187744141, "learning_rate": 2.8191782974345053e-05, "loss": 0.0928, "step": 10343 }, { "epoch": 0.18318745102206166, "grad_norm": 0.9994953274726868, "learning_rate": 2.8191373430094327e-05, "loss": 0.0734, "step": 10344 }, { "epoch": 0.18320516055909009, "grad_norm": 0.7707306742668152, "learning_rate": 2.819096384244532e-05, "loss": 0.1088, "step": 10345 }, { "epoch": 0.1832228700961185, "grad_norm": 1.0369689464569092, "learning_rate": 2.8190554211399374e-05, "loss": 0.0827, "step": 10346 }, { "epoch": 0.18324057963314694, "grad_norm": 1.3204116821289062, "learning_rate": 2.8190144536957838e-05, "loss": 0.0917, "step": 10347 }, { "epoch": 0.18325828917017536, "grad_norm": 1.274326205253601, "learning_rate": 2.8189734819122065e-05, "loss": 0.1159, "step": 10348 }, { "epoch": 0.1832759987072038, "grad_norm": 0.8953652381896973, "learning_rate": 2.8189325057893394e-05, "loss": 0.0972, "step": 10349 }, { "epoch": 0.18329370824423222, "grad_norm": 1.4851130247116089, "learning_rate": 2.8188915253273176e-05, "loss": 0.1168, "step": 10350 }, { "epoch": 0.18331141778126064, "grad_norm": 1.2184110879898071, "learning_rate": 2.8188505405262768e-05, "loss": 0.1693, "step": 10351 }, { "epoch": 0.18332912731828907, "grad_norm": 0.7435591816902161, "learning_rate": 2.8188095513863507e-05, "loss": 0.12, "step": 10352 }, { "epoch": 0.1833468368553175, "grad_norm": 0.8375227451324463, "learning_rate": 2.8187685579076744e-05, "loss": 0.1036, "step": 10353 }, { "epoch": 0.18336454639234595, "grad_norm": 1.674981951713562, "learning_rate": 2.8187275600903833e-05, "loss": 0.1158, "step": 10354 }, { "epoch": 0.18338225592937438, "grad_norm": 1.3581255674362183, "learning_rate": 2.818686557934611e-05, "loss": 0.1178, "step": 10355 }, { "epoch": 0.1833999654664028, "grad_norm": 0.819446861743927, "learning_rate": 2.8186455514404944e-05, "loss": 0.0827, "step": 10356 }, { "epoch": 0.18341767500343123, "grad_norm": 1.1740270853042603, "learning_rate": 2.818604540608167e-05, "loss": 0.1248, "step": 10357 }, { "epoch": 0.18343538454045966, "grad_norm": 0.919380247592926, "learning_rate": 2.8185635254377634e-05, "loss": 0.0968, "step": 10358 }, { "epoch": 0.18345309407748808, "grad_norm": 1.1721272468566895, "learning_rate": 2.81852250592942e-05, "loss": 0.1168, "step": 10359 }, { "epoch": 0.1834708036145165, "grad_norm": 0.9078735113143921, "learning_rate": 2.8184814820832707e-05, "loss": 0.1134, "step": 10360 }, { "epoch": 0.18348851315154494, "grad_norm": 0.8881794810295105, "learning_rate": 2.8184404538994503e-05, "loss": 0.0889, "step": 10361 }, { "epoch": 0.18350622268857336, "grad_norm": 1.067604660987854, "learning_rate": 2.8183994213780948e-05, "loss": 0.1447, "step": 10362 }, { "epoch": 0.1835239322256018, "grad_norm": 0.9717968106269836, "learning_rate": 2.818358384519338e-05, "loss": 0.0974, "step": 10363 }, { "epoch": 0.18354164176263021, "grad_norm": 0.8257808685302734, "learning_rate": 2.8183173433233155e-05, "loss": 0.0899, "step": 10364 }, { "epoch": 0.18355935129965864, "grad_norm": 0.7375658750534058, "learning_rate": 2.8182762977901625e-05, "loss": 0.0956, "step": 10365 }, { "epoch": 0.18357706083668707, "grad_norm": 0.8266454339027405, "learning_rate": 2.818235247920014e-05, "loss": 0.1138, "step": 10366 }, { "epoch": 0.1835947703737155, "grad_norm": 1.0481247901916504, "learning_rate": 2.8181941937130044e-05, "loss": 0.1369, "step": 10367 }, { "epoch": 0.18361247991074392, "grad_norm": 0.7644380927085876, "learning_rate": 2.8181531351692697e-05, "loss": 0.0938, "step": 10368 }, { "epoch": 0.18363018944777237, "grad_norm": 0.9333551526069641, "learning_rate": 2.818112072288944e-05, "loss": 0.1413, "step": 10369 }, { "epoch": 0.1836478989848008, "grad_norm": 0.8399622440338135, "learning_rate": 2.8180710050721634e-05, "loss": 0.1147, "step": 10370 }, { "epoch": 0.18366560852182923, "grad_norm": 0.7083638906478882, "learning_rate": 2.818029933519062e-05, "loss": 0.0671, "step": 10371 }, { "epoch": 0.18368331805885765, "grad_norm": 0.8101193308830261, "learning_rate": 2.8179888576297756e-05, "loss": 0.0716, "step": 10372 }, { "epoch": 0.18370102759588608, "grad_norm": 1.1690775156021118, "learning_rate": 2.8179477774044397e-05, "loss": 0.1016, "step": 10373 }, { "epoch": 0.1837187371329145, "grad_norm": 0.8087595701217651, "learning_rate": 2.817906692843188e-05, "loss": 0.0864, "step": 10374 }, { "epoch": 0.18373644666994293, "grad_norm": 0.6211724877357483, "learning_rate": 2.817865603946157e-05, "loss": 0.0993, "step": 10375 }, { "epoch": 0.18375415620697136, "grad_norm": 1.28114914894104, "learning_rate": 2.8178245107134818e-05, "loss": 0.1074, "step": 10376 }, { "epoch": 0.18377186574399978, "grad_norm": 0.9446329474449158, "learning_rate": 2.817783413145297e-05, "loss": 0.0828, "step": 10377 }, { "epoch": 0.1837895752810282, "grad_norm": 0.7767227292060852, "learning_rate": 2.8177423112417378e-05, "loss": 0.1107, "step": 10378 }, { "epoch": 0.18380728481805664, "grad_norm": 0.7706063389778137, "learning_rate": 2.8177012050029402e-05, "loss": 0.0918, "step": 10379 }, { "epoch": 0.18382499435508506, "grad_norm": 0.7908567190170288, "learning_rate": 2.8176600944290386e-05, "loss": 0.1247, "step": 10380 }, { "epoch": 0.1838427038921135, "grad_norm": 0.9595304131507874, "learning_rate": 2.8176189795201683e-05, "loss": 0.1062, "step": 10381 }, { "epoch": 0.18386041342914192, "grad_norm": 1.1726617813110352, "learning_rate": 2.817577860276465e-05, "loss": 0.0939, "step": 10382 }, { "epoch": 0.18387812296617034, "grad_norm": 0.9188712239265442, "learning_rate": 2.817536736698064e-05, "loss": 0.1184, "step": 10383 }, { "epoch": 0.1838958325031988, "grad_norm": 1.0514296293258667, "learning_rate": 2.8174956087851005e-05, "loss": 0.1136, "step": 10384 }, { "epoch": 0.18391354204022722, "grad_norm": 0.9094589948654175, "learning_rate": 2.8174544765377094e-05, "loss": 0.136, "step": 10385 }, { "epoch": 0.18393125157725565, "grad_norm": 0.9528067708015442, "learning_rate": 2.8174133399560266e-05, "loss": 0.1181, "step": 10386 }, { "epoch": 0.18394896111428408, "grad_norm": 1.0095189809799194, "learning_rate": 2.8173721990401868e-05, "loss": 0.09, "step": 10387 }, { "epoch": 0.1839666706513125, "grad_norm": 0.8701847195625305, "learning_rate": 2.817331053790326e-05, "loss": 0.0984, "step": 10388 }, { "epoch": 0.18398438018834093, "grad_norm": 0.905920684337616, "learning_rate": 2.817289904206579e-05, "loss": 0.0825, "step": 10389 }, { "epoch": 0.18400208972536936, "grad_norm": 0.7403720021247864, "learning_rate": 2.817248750289082e-05, "loss": 0.11, "step": 10390 }, { "epoch": 0.18401979926239778, "grad_norm": 0.9964848756790161, "learning_rate": 2.817207592037969e-05, "loss": 0.1215, "step": 10391 }, { "epoch": 0.1840375087994262, "grad_norm": 0.926263689994812, "learning_rate": 2.817166429453377e-05, "loss": 0.1406, "step": 10392 }, { "epoch": 0.18405521833645463, "grad_norm": 1.0607552528381348, "learning_rate": 2.8171252625354404e-05, "loss": 0.1452, "step": 10393 }, { "epoch": 0.18407292787348306, "grad_norm": 1.0981695652008057, "learning_rate": 2.817084091284295e-05, "loss": 0.1178, "step": 10394 }, { "epoch": 0.1840906374105115, "grad_norm": 1.0345669984817505, "learning_rate": 2.817042915700076e-05, "loss": 0.0982, "step": 10395 }, { "epoch": 0.1841083469475399, "grad_norm": 0.9922787547111511, "learning_rate": 2.817001735782919e-05, "loss": 0.0926, "step": 10396 }, { "epoch": 0.18412605648456834, "grad_norm": 0.8674080967903137, "learning_rate": 2.8169605515329593e-05, "loss": 0.1229, "step": 10397 }, { "epoch": 0.1841437660215968, "grad_norm": 1.0818827152252197, "learning_rate": 2.8169193629503328e-05, "loss": 0.1027, "step": 10398 }, { "epoch": 0.18416147555862522, "grad_norm": 0.774397075176239, "learning_rate": 2.8168781700351752e-05, "loss": 0.0991, "step": 10399 }, { "epoch": 0.18417918509565365, "grad_norm": 1.149318814277649, "learning_rate": 2.816836972787621e-05, "loss": 0.1221, "step": 10400 }, { "epoch": 0.18419689463268207, "grad_norm": 1.0367549657821655, "learning_rate": 2.8167957712078065e-05, "loss": 0.1388, "step": 10401 }, { "epoch": 0.1842146041697105, "grad_norm": 1.0875380039215088, "learning_rate": 2.8167545652958672e-05, "loss": 0.1314, "step": 10402 }, { "epoch": 0.18423231370673893, "grad_norm": 1.3287280797958374, "learning_rate": 2.8167133550519385e-05, "loss": 0.1119, "step": 10403 }, { "epoch": 0.18425002324376735, "grad_norm": 1.0321286916732788, "learning_rate": 2.8166721404761565e-05, "loss": 0.0932, "step": 10404 }, { "epoch": 0.18426773278079578, "grad_norm": 1.1151748895645142, "learning_rate": 2.816630921568656e-05, "loss": 0.1421, "step": 10405 }, { "epoch": 0.1842854423178242, "grad_norm": 0.7634310722351074, "learning_rate": 2.8165896983295727e-05, "loss": 0.0675, "step": 10406 }, { "epoch": 0.18430315185485263, "grad_norm": 0.6426058411598206, "learning_rate": 2.816548470759043e-05, "loss": 0.1079, "step": 10407 }, { "epoch": 0.18432086139188106, "grad_norm": 0.8234237432479858, "learning_rate": 2.8165072388572013e-05, "loss": 0.1132, "step": 10408 }, { "epoch": 0.18433857092890948, "grad_norm": 0.8138535022735596, "learning_rate": 2.8164660026241842e-05, "loss": 0.078, "step": 10409 }, { "epoch": 0.1843562804659379, "grad_norm": 0.9066152572631836, "learning_rate": 2.8164247620601273e-05, "loss": 0.0863, "step": 10410 }, { "epoch": 0.18437399000296634, "grad_norm": 1.2807708978652954, "learning_rate": 2.816383517165166e-05, "loss": 0.1054, "step": 10411 }, { "epoch": 0.18439169953999476, "grad_norm": 1.2185046672821045, "learning_rate": 2.816342267939436e-05, "loss": 0.1424, "step": 10412 }, { "epoch": 0.18440940907702322, "grad_norm": 1.1680883169174194, "learning_rate": 2.816301014383073e-05, "loss": 0.1177, "step": 10413 }, { "epoch": 0.18442711861405164, "grad_norm": 0.8782331943511963, "learning_rate": 2.8162597564962128e-05, "loss": 0.1221, "step": 10414 }, { "epoch": 0.18444482815108007, "grad_norm": 0.6161668300628662, "learning_rate": 2.8162184942789913e-05, "loss": 0.1308, "step": 10415 }, { "epoch": 0.1844625376881085, "grad_norm": 1.0017666816711426, "learning_rate": 2.816177227731544e-05, "loss": 0.1005, "step": 10416 }, { "epoch": 0.18448024722513692, "grad_norm": 0.6786485910415649, "learning_rate": 2.8161359568540067e-05, "loss": 0.096, "step": 10417 }, { "epoch": 0.18449795676216535, "grad_norm": 0.5306575298309326, "learning_rate": 2.8160946816465154e-05, "loss": 0.094, "step": 10418 }, { "epoch": 0.18451566629919378, "grad_norm": 1.2156792879104614, "learning_rate": 2.8160534021092054e-05, "loss": 0.1132, "step": 10419 }, { "epoch": 0.1845333758362222, "grad_norm": 0.8861389756202698, "learning_rate": 2.816012118242213e-05, "loss": 0.1187, "step": 10420 }, { "epoch": 0.18455108537325063, "grad_norm": 0.8928191065788269, "learning_rate": 2.8159708300456738e-05, "loss": 0.1134, "step": 10421 }, { "epoch": 0.18456879491027905, "grad_norm": 0.9853481650352478, "learning_rate": 2.8159295375197238e-05, "loss": 0.1358, "step": 10422 }, { "epoch": 0.18458650444730748, "grad_norm": 0.9321956634521484, "learning_rate": 2.8158882406644983e-05, "loss": 0.1338, "step": 10423 }, { "epoch": 0.1846042139843359, "grad_norm": 0.9822232127189636, "learning_rate": 2.8158469394801342e-05, "loss": 0.1036, "step": 10424 }, { "epoch": 0.18462192352136433, "grad_norm": 0.5585010647773743, "learning_rate": 2.8158056339667666e-05, "loss": 0.0604, "step": 10425 }, { "epoch": 0.18463963305839276, "grad_norm": 1.5467215776443481, "learning_rate": 2.815764324124531e-05, "loss": 0.1073, "step": 10426 }, { "epoch": 0.1846573425954212, "grad_norm": 1.2425702810287476, "learning_rate": 2.8157230099535645e-05, "loss": 0.149, "step": 10427 }, { "epoch": 0.18467505213244964, "grad_norm": 0.6157933473587036, "learning_rate": 2.8156816914540025e-05, "loss": 0.0703, "step": 10428 }, { "epoch": 0.18469276166947807, "grad_norm": 0.6946881413459778, "learning_rate": 2.8156403686259804e-05, "loss": 0.1108, "step": 10429 }, { "epoch": 0.1847104712065065, "grad_norm": 1.057180404663086, "learning_rate": 2.8155990414696345e-05, "loss": 0.122, "step": 10430 }, { "epoch": 0.18472818074353492, "grad_norm": 0.8843851685523987, "learning_rate": 2.815557709985101e-05, "loss": 0.1125, "step": 10431 }, { "epoch": 0.18474589028056335, "grad_norm": 0.7528828382492065, "learning_rate": 2.8155163741725156e-05, "loss": 0.1023, "step": 10432 }, { "epoch": 0.18476359981759177, "grad_norm": 0.6105936169624329, "learning_rate": 2.8154750340320145e-05, "loss": 0.089, "step": 10433 }, { "epoch": 0.1847813093546202, "grad_norm": 1.0035966634750366, "learning_rate": 2.8154336895637332e-05, "loss": 0.1623, "step": 10434 }, { "epoch": 0.18479901889164863, "grad_norm": 1.0379478931427002, "learning_rate": 2.8153923407678087e-05, "loss": 0.1176, "step": 10435 }, { "epoch": 0.18481672842867705, "grad_norm": 1.2188962697982788, "learning_rate": 2.815350987644376e-05, "loss": 0.1571, "step": 10436 }, { "epoch": 0.18483443796570548, "grad_norm": 1.0898714065551758, "learning_rate": 2.8153096301935718e-05, "loss": 0.1214, "step": 10437 }, { "epoch": 0.1848521475027339, "grad_norm": 1.155440330505371, "learning_rate": 2.8152682684155318e-05, "loss": 0.0913, "step": 10438 }, { "epoch": 0.18486985703976233, "grad_norm": 0.9179189205169678, "learning_rate": 2.8152269023103925e-05, "loss": 0.0977, "step": 10439 }, { "epoch": 0.18488756657679076, "grad_norm": 0.7306157350540161, "learning_rate": 2.8151855318782897e-05, "loss": 0.1106, "step": 10440 }, { "epoch": 0.18490527611381918, "grad_norm": 0.68089759349823, "learning_rate": 2.815144157119359e-05, "loss": 0.1085, "step": 10441 }, { "epoch": 0.1849229856508476, "grad_norm": 1.492537260055542, "learning_rate": 2.8151027780337376e-05, "loss": 0.096, "step": 10442 }, { "epoch": 0.18494069518787606, "grad_norm": 0.6868723034858704, "learning_rate": 2.815061394621561e-05, "loss": 0.1402, "step": 10443 }, { "epoch": 0.1849584047249045, "grad_norm": 1.0642701387405396, "learning_rate": 2.815020006882965e-05, "loss": 0.13, "step": 10444 }, { "epoch": 0.18497611426193292, "grad_norm": 1.09943687915802, "learning_rate": 2.8149786148180866e-05, "loss": 0.1436, "step": 10445 }, { "epoch": 0.18499382379896134, "grad_norm": 1.180288553237915, "learning_rate": 2.8149372184270614e-05, "loss": 0.0822, "step": 10446 }, { "epoch": 0.18501153333598977, "grad_norm": 1.1265223026275635, "learning_rate": 2.814895817710026e-05, "loss": 0.1131, "step": 10447 }, { "epoch": 0.1850292428730182, "grad_norm": 0.9009881615638733, "learning_rate": 2.8148544126671162e-05, "loss": 0.1316, "step": 10448 }, { "epoch": 0.18504695241004662, "grad_norm": 0.7663016319274902, "learning_rate": 2.8148130032984684e-05, "loss": 0.1079, "step": 10449 }, { "epoch": 0.18506466194707505, "grad_norm": 1.1179426908493042, "learning_rate": 2.8147715896042185e-05, "loss": 0.1017, "step": 10450 }, { "epoch": 0.18508237148410348, "grad_norm": 1.0682390928268433, "learning_rate": 2.8147301715845033e-05, "loss": 0.0859, "step": 10451 }, { "epoch": 0.1851000810211319, "grad_norm": 1.1562526226043701, "learning_rate": 2.8146887492394585e-05, "loss": 0.1205, "step": 10452 }, { "epoch": 0.18511779055816033, "grad_norm": 1.0578571557998657, "learning_rate": 2.8146473225692212e-05, "loss": 0.1314, "step": 10453 }, { "epoch": 0.18513550009518875, "grad_norm": 1.0132217407226562, "learning_rate": 2.8146058915739272e-05, "loss": 0.1334, "step": 10454 }, { "epoch": 0.18515320963221718, "grad_norm": 0.832125723361969, "learning_rate": 2.8145644562537122e-05, "loss": 0.1089, "step": 10455 }, { "epoch": 0.1851709191692456, "grad_norm": 0.9420949816703796, "learning_rate": 2.8145230166087137e-05, "loss": 0.0905, "step": 10456 }, { "epoch": 0.18518862870627403, "grad_norm": 0.6214367747306824, "learning_rate": 2.8144815726390674e-05, "loss": 0.1075, "step": 10457 }, { "epoch": 0.1852063382433025, "grad_norm": 1.166468858718872, "learning_rate": 2.814440124344909e-05, "loss": 0.1011, "step": 10458 }, { "epoch": 0.1852240477803309, "grad_norm": 1.3443865776062012, "learning_rate": 2.814398671726376e-05, "loss": 0.1641, "step": 10459 }, { "epoch": 0.18524175731735934, "grad_norm": 1.1310330629348755, "learning_rate": 2.8143572147836048e-05, "loss": 0.1425, "step": 10460 }, { "epoch": 0.18525946685438777, "grad_norm": 0.6146723031997681, "learning_rate": 2.8143157535167303e-05, "loss": 0.1376, "step": 10461 }, { "epoch": 0.1852771763914162, "grad_norm": 1.0099602937698364, "learning_rate": 2.8142742879258908e-05, "loss": 0.1317, "step": 10462 }, { "epoch": 0.18529488592844462, "grad_norm": 0.9621031880378723, "learning_rate": 2.814232818011221e-05, "loss": 0.1524, "step": 10463 }, { "epoch": 0.18531259546547305, "grad_norm": 1.0564864873886108, "learning_rate": 2.8141913437728587e-05, "loss": 0.0854, "step": 10464 }, { "epoch": 0.18533030500250147, "grad_norm": 0.5815514326095581, "learning_rate": 2.8141498652109398e-05, "loss": 0.0787, "step": 10465 }, { "epoch": 0.1853480145395299, "grad_norm": 0.8287315964698792, "learning_rate": 2.8141083823256008e-05, "loss": 0.0925, "step": 10466 }, { "epoch": 0.18536572407655832, "grad_norm": 2.4438257217407227, "learning_rate": 2.814066895116978e-05, "loss": 0.1, "step": 10467 }, { "epoch": 0.18538343361358675, "grad_norm": 2.739370346069336, "learning_rate": 2.814025403585208e-05, "loss": 0.1127, "step": 10468 }, { "epoch": 0.18540114315061518, "grad_norm": 0.6359924674034119, "learning_rate": 2.8139839077304273e-05, "loss": 0.1421, "step": 10469 }, { "epoch": 0.1854188526876436, "grad_norm": 0.7540038228034973, "learning_rate": 2.8139424075527725e-05, "loss": 0.1089, "step": 10470 }, { "epoch": 0.18543656222467203, "grad_norm": 1.244555950164795, "learning_rate": 2.8139009030523802e-05, "loss": 0.1613, "step": 10471 }, { "epoch": 0.18545427176170046, "grad_norm": 1.3585177659988403, "learning_rate": 2.8138593942293867e-05, "loss": 0.0987, "step": 10472 }, { "epoch": 0.1854719812987289, "grad_norm": 0.7456543445587158, "learning_rate": 2.8138178810839288e-05, "loss": 0.0947, "step": 10473 }, { "epoch": 0.18548969083575734, "grad_norm": 1.3302439451217651, "learning_rate": 2.8137763636161424e-05, "loss": 0.1679, "step": 10474 }, { "epoch": 0.18550740037278576, "grad_norm": 1.1288774013519287, "learning_rate": 2.8137348418261654e-05, "loss": 0.0822, "step": 10475 }, { "epoch": 0.1855251099098142, "grad_norm": 1.4322468042373657, "learning_rate": 2.813693315714133e-05, "loss": 0.1329, "step": 10476 }, { "epoch": 0.18554281944684262, "grad_norm": 1.2504130601882935, "learning_rate": 2.813651785280183e-05, "loss": 0.1114, "step": 10477 }, { "epoch": 0.18556052898387104, "grad_norm": 0.9050321578979492, "learning_rate": 2.8136102505244513e-05, "loss": 0.0712, "step": 10478 }, { "epoch": 0.18557823852089947, "grad_norm": 0.5335842370986938, "learning_rate": 2.8135687114470746e-05, "loss": 0.0818, "step": 10479 }, { "epoch": 0.1855959480579279, "grad_norm": 0.964661180973053, "learning_rate": 2.8135271680481898e-05, "loss": 0.1296, "step": 10480 }, { "epoch": 0.18561365759495632, "grad_norm": 0.5832924842834473, "learning_rate": 2.8134856203279336e-05, "loss": 0.1456, "step": 10481 }, { "epoch": 0.18563136713198475, "grad_norm": 1.3801591396331787, "learning_rate": 2.8134440682864425e-05, "loss": 0.1367, "step": 10482 }, { "epoch": 0.18564907666901317, "grad_norm": 1.209842324256897, "learning_rate": 2.813402511923853e-05, "loss": 0.0985, "step": 10483 }, { "epoch": 0.1856667862060416, "grad_norm": 0.979819655418396, "learning_rate": 2.8133609512403023e-05, "loss": 0.1319, "step": 10484 }, { "epoch": 0.18568449574307003, "grad_norm": 0.8279260993003845, "learning_rate": 2.813319386235927e-05, "loss": 0.1388, "step": 10485 }, { "epoch": 0.18570220528009845, "grad_norm": 0.6701928377151489, "learning_rate": 2.8132778169108636e-05, "loss": 0.0791, "step": 10486 }, { "epoch": 0.18571991481712688, "grad_norm": 0.7108091115951538, "learning_rate": 2.813236243265249e-05, "loss": 0.0602, "step": 10487 }, { "epoch": 0.18573762435415533, "grad_norm": 0.8621261119842529, "learning_rate": 2.8131946652992204e-05, "loss": 0.1413, "step": 10488 }, { "epoch": 0.18575533389118376, "grad_norm": 1.0329811573028564, "learning_rate": 2.8131530830129136e-05, "loss": 0.073, "step": 10489 }, { "epoch": 0.1857730434282122, "grad_norm": 1.205898404121399, "learning_rate": 2.813111496406466e-05, "loss": 0.1117, "step": 10490 }, { "epoch": 0.1857907529652406, "grad_norm": 0.8603911399841309, "learning_rate": 2.8130699054800147e-05, "loss": 0.1263, "step": 10491 }, { "epoch": 0.18580846250226904, "grad_norm": 0.9281827807426453, "learning_rate": 2.813028310233696e-05, "loss": 0.0865, "step": 10492 }, { "epoch": 0.18582617203929747, "grad_norm": 1.0962945222854614, "learning_rate": 2.8129867106676474e-05, "loss": 0.1139, "step": 10493 }, { "epoch": 0.1858438815763259, "grad_norm": 1.000496745109558, "learning_rate": 2.812945106782005e-05, "loss": 0.0986, "step": 10494 }, { "epoch": 0.18586159111335432, "grad_norm": 0.9066200852394104, "learning_rate": 2.8129034985769057e-05, "loss": 0.0727, "step": 10495 }, { "epoch": 0.18587930065038274, "grad_norm": 0.966289222240448, "learning_rate": 2.8128618860524874e-05, "loss": 0.1011, "step": 10496 }, { "epoch": 0.18589701018741117, "grad_norm": 0.7170847058296204, "learning_rate": 2.8128202692088854e-05, "loss": 0.0843, "step": 10497 }, { "epoch": 0.1859147197244396, "grad_norm": 1.3912458419799805, "learning_rate": 2.8127786480462384e-05, "loss": 0.0831, "step": 10498 }, { "epoch": 0.18593242926146802, "grad_norm": 1.0171254873275757, "learning_rate": 2.8127370225646818e-05, "loss": 0.0899, "step": 10499 }, { "epoch": 0.18595013879849645, "grad_norm": 1.2433985471725464, "learning_rate": 2.812695392764353e-05, "loss": 0.115, "step": 10500 }, { "epoch": 0.18596784833552488, "grad_norm": 0.9334208965301514, "learning_rate": 2.8126537586453897e-05, "loss": 0.1287, "step": 10501 }, { "epoch": 0.1859855578725533, "grad_norm": 0.8384504914283752, "learning_rate": 2.8126121202079282e-05, "loss": 0.0735, "step": 10502 }, { "epoch": 0.18600326740958176, "grad_norm": 0.9051746129989624, "learning_rate": 2.8125704774521054e-05, "loss": 0.1189, "step": 10503 }, { "epoch": 0.18602097694661018, "grad_norm": 1.0868839025497437, "learning_rate": 2.8125288303780584e-05, "loss": 0.1384, "step": 10504 }, { "epoch": 0.1860386864836386, "grad_norm": 0.9418359994888306, "learning_rate": 2.8124871789859246e-05, "loss": 0.1308, "step": 10505 }, { "epoch": 0.18605639602066704, "grad_norm": 1.3075660467147827, "learning_rate": 2.8124455232758405e-05, "loss": 0.1051, "step": 10506 }, { "epoch": 0.18607410555769546, "grad_norm": 0.7523918151855469, "learning_rate": 2.8124038632479435e-05, "loss": 0.1266, "step": 10507 }, { "epoch": 0.1860918150947239, "grad_norm": 0.6800905466079712, "learning_rate": 2.8123621989023706e-05, "loss": 0.0647, "step": 10508 }, { "epoch": 0.18610952463175232, "grad_norm": 0.5321642756462097, "learning_rate": 2.8123205302392587e-05, "loss": 0.0829, "step": 10509 }, { "epoch": 0.18612723416878074, "grad_norm": 0.7887300848960876, "learning_rate": 2.812278857258745e-05, "loss": 0.0737, "step": 10510 }, { "epoch": 0.18614494370580917, "grad_norm": 1.170286774635315, "learning_rate": 2.8122371799609667e-05, "loss": 0.0878, "step": 10511 }, { "epoch": 0.1861626532428376, "grad_norm": 0.8379935026168823, "learning_rate": 2.8121954983460604e-05, "loss": 0.0792, "step": 10512 }, { "epoch": 0.18618036277986602, "grad_norm": 1.12766695022583, "learning_rate": 2.8121538124141642e-05, "loss": 0.1097, "step": 10513 }, { "epoch": 0.18619807231689445, "grad_norm": 0.6587085127830505, "learning_rate": 2.812112122165414e-05, "loss": 0.1229, "step": 10514 }, { "epoch": 0.18621578185392287, "grad_norm": 0.6175415515899658, "learning_rate": 2.812070427599948e-05, "loss": 0.0764, "step": 10515 }, { "epoch": 0.1862334913909513, "grad_norm": 0.49978265166282654, "learning_rate": 2.8120287287179033e-05, "loss": 0.0789, "step": 10516 }, { "epoch": 0.18625120092797973, "grad_norm": 1.2075310945510864, "learning_rate": 2.8119870255194164e-05, "loss": 0.0945, "step": 10517 }, { "epoch": 0.18626891046500818, "grad_norm": 1.0629349946975708, "learning_rate": 2.811945318004625e-05, "loss": 0.1314, "step": 10518 }, { "epoch": 0.1862866200020366, "grad_norm": 0.8042479753494263, "learning_rate": 2.8119036061736662e-05, "loss": 0.1059, "step": 10519 }, { "epoch": 0.18630432953906503, "grad_norm": 1.250316858291626, "learning_rate": 2.811861890026677e-05, "loss": 0.1026, "step": 10520 }, { "epoch": 0.18632203907609346, "grad_norm": 1.0165542364120483, "learning_rate": 2.811820169563795e-05, "loss": 0.1075, "step": 10521 }, { "epoch": 0.18633974861312189, "grad_norm": 0.9556318521499634, "learning_rate": 2.8117784447851578e-05, "loss": 0.1074, "step": 10522 }, { "epoch": 0.1863574581501503, "grad_norm": 0.9495221972465515, "learning_rate": 2.8117367156909017e-05, "loss": 0.1208, "step": 10523 }, { "epoch": 0.18637516768717874, "grad_norm": 1.3026297092437744, "learning_rate": 2.8116949822811643e-05, "loss": 0.1279, "step": 10524 }, { "epoch": 0.18639287722420717, "grad_norm": 0.8627350330352783, "learning_rate": 2.8116532445560837e-05, "loss": 0.0968, "step": 10525 }, { "epoch": 0.1864105867612356, "grad_norm": 0.8349089026451111, "learning_rate": 2.811611502515796e-05, "loss": 0.1123, "step": 10526 }, { "epoch": 0.18642829629826402, "grad_norm": 0.8946444988250732, "learning_rate": 2.8115697561604394e-05, "loss": 0.1161, "step": 10527 }, { "epoch": 0.18644600583529244, "grad_norm": 0.642705500125885, "learning_rate": 2.811528005490151e-05, "loss": 0.0928, "step": 10528 }, { "epoch": 0.18646371537232087, "grad_norm": 0.7488372921943665, "learning_rate": 2.8114862505050683e-05, "loss": 0.0915, "step": 10529 }, { "epoch": 0.1864814249093493, "grad_norm": 0.8437197208404541, "learning_rate": 2.811444491205328e-05, "loss": 0.1292, "step": 10530 }, { "epoch": 0.18649913444637772, "grad_norm": 1.08226478099823, "learning_rate": 2.8114027275910682e-05, "loss": 0.0959, "step": 10531 }, { "epoch": 0.18651684398340618, "grad_norm": 1.3136669397354126, "learning_rate": 2.811360959662426e-05, "loss": 0.1395, "step": 10532 }, { "epoch": 0.1865345535204346, "grad_norm": 0.6592121720314026, "learning_rate": 2.811319187419539e-05, "loss": 0.0957, "step": 10533 }, { "epoch": 0.18655226305746303, "grad_norm": 0.5014148950576782, "learning_rate": 2.811277410862544e-05, "loss": 0.0922, "step": 10534 }, { "epoch": 0.18656997259449146, "grad_norm": 1.2629711627960205, "learning_rate": 2.81123562999158e-05, "loss": 0.143, "step": 10535 }, { "epoch": 0.18658768213151988, "grad_norm": 0.9775171279907227, "learning_rate": 2.8111938448067827e-05, "loss": 0.1448, "step": 10536 }, { "epoch": 0.1866053916685483, "grad_norm": 0.8818159699440002, "learning_rate": 2.81115205530829e-05, "loss": 0.1113, "step": 10537 }, { "epoch": 0.18662310120557674, "grad_norm": 1.3262745141983032, "learning_rate": 2.8111102614962404e-05, "loss": 0.1414, "step": 10538 }, { "epoch": 0.18664081074260516, "grad_norm": 1.1496061086654663, "learning_rate": 2.81106846337077e-05, "loss": 0.1274, "step": 10539 }, { "epoch": 0.1866585202796336, "grad_norm": 0.9576653242111206, "learning_rate": 2.8110266609320172e-05, "loss": 0.1439, "step": 10540 }, { "epoch": 0.18667622981666201, "grad_norm": 0.7869819402694702, "learning_rate": 2.8109848541801194e-05, "loss": 0.1341, "step": 10541 }, { "epoch": 0.18669393935369044, "grad_norm": 0.5559887290000916, "learning_rate": 2.8109430431152145e-05, "loss": 0.0978, "step": 10542 }, { "epoch": 0.18671164889071887, "grad_norm": 1.2141908407211304, "learning_rate": 2.8109012277374387e-05, "loss": 0.0985, "step": 10543 }, { "epoch": 0.1867293584277473, "grad_norm": 0.7510805130004883, "learning_rate": 2.8108594080469306e-05, "loss": 0.1381, "step": 10544 }, { "epoch": 0.18674706796477572, "grad_norm": 0.9265447854995728, "learning_rate": 2.810817584043828e-05, "loss": 0.1415, "step": 10545 }, { "epoch": 0.18676477750180415, "grad_norm": 1.0508915185928345, "learning_rate": 2.8107757557282676e-05, "loss": 0.1119, "step": 10546 }, { "epoch": 0.1867824870388326, "grad_norm": 0.9728513360023499, "learning_rate": 2.8107339231003884e-05, "loss": 0.0926, "step": 10547 }, { "epoch": 0.18680019657586103, "grad_norm": 1.4063585996627808, "learning_rate": 2.8106920861603264e-05, "loss": 0.1465, "step": 10548 }, { "epoch": 0.18681790611288945, "grad_norm": 0.9116583466529846, "learning_rate": 2.8106502449082203e-05, "loss": 0.1194, "step": 10549 }, { "epoch": 0.18683561564991788, "grad_norm": 0.6013453602790833, "learning_rate": 2.8106083993442076e-05, "loss": 0.1485, "step": 10550 }, { "epoch": 0.1868533251869463, "grad_norm": 0.817275881767273, "learning_rate": 2.810566549468425e-05, "loss": 0.1213, "step": 10551 }, { "epoch": 0.18687103472397473, "grad_norm": 0.744094967842102, "learning_rate": 2.810524695281012e-05, "loss": 0.1378, "step": 10552 }, { "epoch": 0.18688874426100316, "grad_norm": 1.4832533597946167, "learning_rate": 2.810482836782105e-05, "loss": 0.122, "step": 10553 }, { "epoch": 0.18690645379803159, "grad_norm": 1.315475583076477, "learning_rate": 2.8104409739718416e-05, "loss": 0.0972, "step": 10554 }, { "epoch": 0.18692416333506, "grad_norm": 1.519748568534851, "learning_rate": 2.8103991068503606e-05, "loss": 0.089, "step": 10555 }, { "epoch": 0.18694187287208844, "grad_norm": 1.0839303731918335, "learning_rate": 2.8103572354177987e-05, "loss": 0.1068, "step": 10556 }, { "epoch": 0.18695958240911686, "grad_norm": 1.0807822942733765, "learning_rate": 2.810315359674294e-05, "loss": 0.1197, "step": 10557 }, { "epoch": 0.1869772919461453, "grad_norm": 1.0583174228668213, "learning_rate": 2.8102734796199845e-05, "loss": 0.1429, "step": 10558 }, { "epoch": 0.18699500148317372, "grad_norm": 0.7338154315948486, "learning_rate": 2.810231595255008e-05, "loss": 0.1197, "step": 10559 }, { "epoch": 0.18701271102020214, "grad_norm": 1.0169520378112793, "learning_rate": 2.8101897065795017e-05, "loss": 0.137, "step": 10560 }, { "epoch": 0.18703042055723057, "grad_norm": 0.8041121363639832, "learning_rate": 2.8101478135936037e-05, "loss": 0.0747, "step": 10561 }, { "epoch": 0.18704813009425902, "grad_norm": 1.2462787628173828, "learning_rate": 2.810105916297452e-05, "loss": 0.1134, "step": 10562 }, { "epoch": 0.18706583963128745, "grad_norm": 1.1054328680038452, "learning_rate": 2.8100640146911843e-05, "loss": 0.0945, "step": 10563 }, { "epoch": 0.18708354916831588, "grad_norm": 0.6747390031814575, "learning_rate": 2.8100221087749384e-05, "loss": 0.0836, "step": 10564 }, { "epoch": 0.1871012587053443, "grad_norm": 1.035142421722412, "learning_rate": 2.809980198548853e-05, "loss": 0.1003, "step": 10565 }, { "epoch": 0.18711896824237273, "grad_norm": 1.1475074291229248, "learning_rate": 2.8099382840130647e-05, "loss": 0.0775, "step": 10566 }, { "epoch": 0.18713667777940116, "grad_norm": 1.0379406213760376, "learning_rate": 2.809896365167712e-05, "loss": 0.111, "step": 10567 }, { "epoch": 0.18715438731642958, "grad_norm": 4.967631816864014, "learning_rate": 2.809854442012932e-05, "loss": 0.1342, "step": 10568 }, { "epoch": 0.187172096853458, "grad_norm": 1.1314828395843506, "learning_rate": 2.8098125145488642e-05, "loss": 0.1153, "step": 10569 }, { "epoch": 0.18718980639048644, "grad_norm": 1.5040580034255981, "learning_rate": 2.809770582775646e-05, "loss": 0.1269, "step": 10570 }, { "epoch": 0.18720751592751486, "grad_norm": 1.1995536088943481, "learning_rate": 2.8097286466934144e-05, "loss": 0.0758, "step": 10571 }, { "epoch": 0.1872252254645433, "grad_norm": 0.8498504757881165, "learning_rate": 2.8096867063023085e-05, "loss": 0.1177, "step": 10572 }, { "epoch": 0.18724293500157171, "grad_norm": 1.3909885883331299, "learning_rate": 2.8096447616024653e-05, "loss": 0.0987, "step": 10573 }, { "epoch": 0.18726064453860014, "grad_norm": 1.1085169315338135, "learning_rate": 2.8096028125940233e-05, "loss": 0.1251, "step": 10574 }, { "epoch": 0.18727835407562857, "grad_norm": 0.5302780270576477, "learning_rate": 2.8095608592771208e-05, "loss": 0.1082, "step": 10575 }, { "epoch": 0.187296063612657, "grad_norm": 0.7961003184318542, "learning_rate": 2.8095189016518957e-05, "loss": 0.1164, "step": 10576 }, { "epoch": 0.18731377314968545, "grad_norm": 1.0842212438583374, "learning_rate": 2.8094769397184855e-05, "loss": 0.1084, "step": 10577 }, { "epoch": 0.18733148268671387, "grad_norm": 0.9478570222854614, "learning_rate": 2.8094349734770287e-05, "loss": 0.1003, "step": 10578 }, { "epoch": 0.1873491922237423, "grad_norm": 0.9529094696044922, "learning_rate": 2.8093930029276634e-05, "loss": 0.1371, "step": 10579 }, { "epoch": 0.18736690176077073, "grad_norm": 1.1257823705673218, "learning_rate": 2.8093510280705274e-05, "loss": 0.1309, "step": 10580 }, { "epoch": 0.18738461129779915, "grad_norm": 1.0514460802078247, "learning_rate": 2.8093090489057592e-05, "loss": 0.135, "step": 10581 }, { "epoch": 0.18740232083482758, "grad_norm": 1.069480538368225, "learning_rate": 2.8092670654334965e-05, "loss": 0.1053, "step": 10582 }, { "epoch": 0.187420030371856, "grad_norm": 0.697551965713501, "learning_rate": 2.8092250776538774e-05, "loss": 0.0769, "step": 10583 }, { "epoch": 0.18743773990888443, "grad_norm": 1.490659475326538, "learning_rate": 2.809183085567041e-05, "loss": 0.1103, "step": 10584 }, { "epoch": 0.18745544944591286, "grad_norm": 0.9540086388587952, "learning_rate": 2.8091410891731236e-05, "loss": 0.0724, "step": 10585 }, { "epoch": 0.18747315898294128, "grad_norm": 1.2557218074798584, "learning_rate": 2.809099088472265e-05, "loss": 0.1007, "step": 10586 }, { "epoch": 0.1874908685199697, "grad_norm": 1.3079479932785034, "learning_rate": 2.8090570834646022e-05, "loss": 0.1675, "step": 10587 }, { "epoch": 0.18750857805699814, "grad_norm": 0.5452004671096802, "learning_rate": 2.809015074150275e-05, "loss": 0.0955, "step": 10588 }, { "epoch": 0.18752628759402656, "grad_norm": 0.9048922061920166, "learning_rate": 2.80897306052942e-05, "loss": 0.0931, "step": 10589 }, { "epoch": 0.187543997131055, "grad_norm": 1.2884502410888672, "learning_rate": 2.808931042602176e-05, "loss": 0.1116, "step": 10590 }, { "epoch": 0.18756170666808342, "grad_norm": 1.5157698392868042, "learning_rate": 2.8088890203686817e-05, "loss": 0.0907, "step": 10591 }, { "epoch": 0.18757941620511187, "grad_norm": 0.9981040954589844, "learning_rate": 2.808846993829074e-05, "loss": 0.1381, "step": 10592 }, { "epoch": 0.1875971257421403, "grad_norm": 1.1333292722702026, "learning_rate": 2.808804962983493e-05, "loss": 0.1331, "step": 10593 }, { "epoch": 0.18761483527916872, "grad_norm": 1.0418529510498047, "learning_rate": 2.8087629278320757e-05, "loss": 0.0863, "step": 10594 }, { "epoch": 0.18763254481619715, "grad_norm": 0.6681395173072815, "learning_rate": 2.808720888374961e-05, "loss": 0.1355, "step": 10595 }, { "epoch": 0.18765025435322558, "grad_norm": 1.075423240661621, "learning_rate": 2.8086788446122862e-05, "loss": 0.116, "step": 10596 }, { "epoch": 0.187667963890254, "grad_norm": 1.1064138412475586, "learning_rate": 2.8086367965441908e-05, "loss": 0.123, "step": 10597 }, { "epoch": 0.18768567342728243, "grad_norm": 1.1653611660003662, "learning_rate": 2.8085947441708133e-05, "loss": 0.1035, "step": 10598 }, { "epoch": 0.18770338296431086, "grad_norm": 1.0180714130401611, "learning_rate": 2.8085526874922908e-05, "loss": 0.0987, "step": 10599 }, { "epoch": 0.18772109250133928, "grad_norm": 0.9075543284416199, "learning_rate": 2.808510626508762e-05, "loss": 0.0867, "step": 10600 }, { "epoch": 0.1877388020383677, "grad_norm": 0.7452749609947205, "learning_rate": 2.8084685612203662e-05, "loss": 0.1378, "step": 10601 }, { "epoch": 0.18775651157539613, "grad_norm": 1.17698335647583, "learning_rate": 2.8084264916272412e-05, "loss": 0.1253, "step": 10602 }, { "epoch": 0.18777422111242456, "grad_norm": 0.7034304141998291, "learning_rate": 2.8083844177295252e-05, "loss": 0.0786, "step": 10603 }, { "epoch": 0.187791930649453, "grad_norm": 1.112114429473877, "learning_rate": 2.8083423395273565e-05, "loss": 0.1093, "step": 10604 }, { "epoch": 0.1878096401864814, "grad_norm": 0.8644895553588867, "learning_rate": 2.808300257020874e-05, "loss": 0.1118, "step": 10605 }, { "epoch": 0.18782734972350984, "grad_norm": 1.1093850135803223, "learning_rate": 2.8082581702102163e-05, "loss": 0.1246, "step": 10606 }, { "epoch": 0.1878450592605383, "grad_norm": 0.6877453327178955, "learning_rate": 2.8082160790955213e-05, "loss": 0.117, "step": 10607 }, { "epoch": 0.18786276879756672, "grad_norm": 1.0129393339157104, "learning_rate": 2.8081739836769275e-05, "loss": 0.0826, "step": 10608 }, { "epoch": 0.18788047833459515, "grad_norm": 0.962247371673584, "learning_rate": 2.808131883954574e-05, "loss": 0.0787, "step": 10609 }, { "epoch": 0.18789818787162357, "grad_norm": 1.174063801765442, "learning_rate": 2.8080897799285987e-05, "loss": 0.12, "step": 10610 }, { "epoch": 0.187915897408652, "grad_norm": 1.2011680603027344, "learning_rate": 2.8080476715991405e-05, "loss": 0.1168, "step": 10611 }, { "epoch": 0.18793360694568043, "grad_norm": 0.9651569724082947, "learning_rate": 2.8080055589663378e-05, "loss": 0.153, "step": 10612 }, { "epoch": 0.18795131648270885, "grad_norm": 0.7232292890548706, "learning_rate": 2.807963442030329e-05, "loss": 0.1301, "step": 10613 }, { "epoch": 0.18796902601973728, "grad_norm": 0.8788661956787109, "learning_rate": 2.8079213207912524e-05, "loss": 0.0997, "step": 10614 }, { "epoch": 0.1879867355567657, "grad_norm": 0.855268120765686, "learning_rate": 2.8078791952492474e-05, "loss": 0.1129, "step": 10615 }, { "epoch": 0.18800444509379413, "grad_norm": 1.1915746927261353, "learning_rate": 2.807837065404452e-05, "loss": 0.0858, "step": 10616 }, { "epoch": 0.18802215463082256, "grad_norm": 0.7417541742324829, "learning_rate": 2.807794931257005e-05, "loss": 0.0732, "step": 10617 }, { "epoch": 0.18803986416785098, "grad_norm": 0.8988126516342163, "learning_rate": 2.8077527928070447e-05, "loss": 0.0834, "step": 10618 }, { "epoch": 0.1880575737048794, "grad_norm": 0.9767519235610962, "learning_rate": 2.80771065005471e-05, "loss": 0.1031, "step": 10619 }, { "epoch": 0.18807528324190784, "grad_norm": 0.8643035292625427, "learning_rate": 2.80766850300014e-05, "loss": 0.1057, "step": 10620 }, { "epoch": 0.18809299277893626, "grad_norm": 0.8425744771957397, "learning_rate": 2.8076263516434727e-05, "loss": 0.0874, "step": 10621 }, { "epoch": 0.18811070231596472, "grad_norm": 0.7321418523788452, "learning_rate": 2.8075841959848468e-05, "loss": 0.1175, "step": 10622 }, { "epoch": 0.18812841185299314, "grad_norm": 1.291222095489502, "learning_rate": 2.8075420360244014e-05, "loss": 0.1172, "step": 10623 }, { "epoch": 0.18814612139002157, "grad_norm": 0.9519978165626526, "learning_rate": 2.8074998717622745e-05, "loss": 0.1141, "step": 10624 }, { "epoch": 0.18816383092705, "grad_norm": 1.066828966140747, "learning_rate": 2.8074577031986052e-05, "loss": 0.0859, "step": 10625 }, { "epoch": 0.18818154046407842, "grad_norm": 1.0030479431152344, "learning_rate": 2.807415530333533e-05, "loss": 0.1252, "step": 10626 }, { "epoch": 0.18819925000110685, "grad_norm": 0.5400281548500061, "learning_rate": 2.8073733531671958e-05, "loss": 0.0986, "step": 10627 }, { "epoch": 0.18821695953813528, "grad_norm": 1.3664872646331787, "learning_rate": 2.807331171699732e-05, "loss": 0.1189, "step": 10628 }, { "epoch": 0.1882346690751637, "grad_norm": 0.8312751650810242, "learning_rate": 2.8072889859312808e-05, "loss": 0.1251, "step": 10629 }, { "epoch": 0.18825237861219213, "grad_norm": 1.9262592792510986, "learning_rate": 2.8072467958619813e-05, "loss": 0.1925, "step": 10630 }, { "epoch": 0.18827008814922055, "grad_norm": 0.7837759256362915, "learning_rate": 2.8072046014919724e-05, "loss": 0.1068, "step": 10631 }, { "epoch": 0.18828779768624898, "grad_norm": 0.6916061639785767, "learning_rate": 2.8071624028213922e-05, "loss": 0.0823, "step": 10632 }, { "epoch": 0.1883055072232774, "grad_norm": 0.8666630983352661, "learning_rate": 2.8071201998503803e-05, "loss": 0.1314, "step": 10633 }, { "epoch": 0.18832321676030583, "grad_norm": 0.9734048247337341, "learning_rate": 2.807077992579075e-05, "loss": 0.0998, "step": 10634 }, { "epoch": 0.18834092629733426, "grad_norm": 0.6295486688613892, "learning_rate": 2.8070357810076148e-05, "loss": 0.0758, "step": 10635 }, { "epoch": 0.1883586358343627, "grad_norm": 1.19472074508667, "learning_rate": 2.8069935651361397e-05, "loss": 0.1202, "step": 10636 }, { "epoch": 0.18837634537139114, "grad_norm": 0.9612004160881042, "learning_rate": 2.806951344964788e-05, "loss": 0.1171, "step": 10637 }, { "epoch": 0.18839405490841957, "grad_norm": 0.6844362020492554, "learning_rate": 2.8069091204936983e-05, "loss": 0.1161, "step": 10638 }, { "epoch": 0.188411764445448, "grad_norm": 1.3007291555404663, "learning_rate": 2.8068668917230095e-05, "loss": 0.1099, "step": 10639 }, { "epoch": 0.18842947398247642, "grad_norm": 1.3882039785385132, "learning_rate": 2.806824658652861e-05, "loss": 0.1154, "step": 10640 }, { "epoch": 0.18844718351950485, "grad_norm": 0.7918422818183899, "learning_rate": 2.8067824212833916e-05, "loss": 0.152, "step": 10641 }, { "epoch": 0.18846489305653327, "grad_norm": 0.6197357177734375, "learning_rate": 2.8067401796147403e-05, "loss": 0.1237, "step": 10642 }, { "epoch": 0.1884826025935617, "grad_norm": 1.1775014400482178, "learning_rate": 2.806697933647046e-05, "loss": 0.1239, "step": 10643 }, { "epoch": 0.18850031213059013, "grad_norm": 1.0709441900253296, "learning_rate": 2.8066556833804478e-05, "loss": 0.1699, "step": 10644 }, { "epoch": 0.18851802166761855, "grad_norm": 0.6102509498596191, "learning_rate": 2.806613428815084e-05, "loss": 0.08, "step": 10645 }, { "epoch": 0.18853573120464698, "grad_norm": 0.8936747312545776, "learning_rate": 2.8065711699510946e-05, "loss": 0.0907, "step": 10646 }, { "epoch": 0.1885534407416754, "grad_norm": 1.1325435638427734, "learning_rate": 2.806528906788618e-05, "loss": 0.1449, "step": 10647 }, { "epoch": 0.18857115027870383, "grad_norm": 1.119600772857666, "learning_rate": 2.806486639327794e-05, "loss": 0.0935, "step": 10648 }, { "epoch": 0.18858885981573226, "grad_norm": 0.8157209157943726, "learning_rate": 2.8064443675687604e-05, "loss": 0.1533, "step": 10649 }, { "epoch": 0.18860656935276068, "grad_norm": 0.888990044593811, "learning_rate": 2.8064020915116574e-05, "loss": 0.0994, "step": 10650 }, { "epoch": 0.1886242788897891, "grad_norm": 1.1735718250274658, "learning_rate": 2.8063598111566234e-05, "loss": 0.1311, "step": 10651 }, { "epoch": 0.18864198842681756, "grad_norm": 1.113933801651001, "learning_rate": 2.8063175265037975e-05, "loss": 0.0866, "step": 10652 }, { "epoch": 0.188659697963846, "grad_norm": 0.7286727428436279, "learning_rate": 2.8062752375533196e-05, "loss": 0.0937, "step": 10653 }, { "epoch": 0.18867740750087442, "grad_norm": 0.6349745392799377, "learning_rate": 2.8062329443053285e-05, "loss": 0.099, "step": 10654 }, { "epoch": 0.18869511703790284, "grad_norm": 1.21125066280365, "learning_rate": 2.8061906467599625e-05, "loss": 0.1155, "step": 10655 }, { "epoch": 0.18871282657493127, "grad_norm": 0.6623073816299438, "learning_rate": 2.8061483449173616e-05, "loss": 0.117, "step": 10656 }, { "epoch": 0.1887305361119597, "grad_norm": 0.7592986822128296, "learning_rate": 2.8061060387776645e-05, "loss": 0.1005, "step": 10657 }, { "epoch": 0.18874824564898812, "grad_norm": 1.0326688289642334, "learning_rate": 2.806063728341011e-05, "loss": 0.1351, "step": 10658 }, { "epoch": 0.18876595518601655, "grad_norm": 1.2267699241638184, "learning_rate": 2.80602141360754e-05, "loss": 0.1109, "step": 10659 }, { "epoch": 0.18878366472304497, "grad_norm": 0.9358350038528442, "learning_rate": 2.8059790945773903e-05, "loss": 0.1051, "step": 10660 }, { "epoch": 0.1888013742600734, "grad_norm": 1.0535190105438232, "learning_rate": 2.805936771250702e-05, "loss": 0.0575, "step": 10661 }, { "epoch": 0.18881908379710183, "grad_norm": 0.9655090570449829, "learning_rate": 2.8058944436276134e-05, "loss": 0.1364, "step": 10662 }, { "epoch": 0.18883679333413025, "grad_norm": 0.8321799039840698, "learning_rate": 2.805852111708264e-05, "loss": 0.1164, "step": 10663 }, { "epoch": 0.18885450287115868, "grad_norm": 0.8292449712753296, "learning_rate": 2.8058097754927936e-05, "loss": 0.111, "step": 10664 }, { "epoch": 0.1888722124081871, "grad_norm": 1.1868544816970825, "learning_rate": 2.8057674349813412e-05, "loss": 0.0823, "step": 10665 }, { "epoch": 0.18888992194521556, "grad_norm": 1.0448049306869507, "learning_rate": 2.8057250901740457e-05, "loss": 0.1489, "step": 10666 }, { "epoch": 0.188907631482244, "grad_norm": 0.9737271070480347, "learning_rate": 2.805682741071047e-05, "loss": 0.1665, "step": 10667 }, { "epoch": 0.1889253410192724, "grad_norm": 0.7485194802284241, "learning_rate": 2.805640387672484e-05, "loss": 0.0654, "step": 10668 }, { "epoch": 0.18894305055630084, "grad_norm": 0.9691326022148132, "learning_rate": 2.805598029978496e-05, "loss": 0.1276, "step": 10669 }, { "epoch": 0.18896076009332927, "grad_norm": 0.924291729927063, "learning_rate": 2.8055556679892224e-05, "loss": 0.0917, "step": 10670 }, { "epoch": 0.1889784696303577, "grad_norm": 0.797365128993988, "learning_rate": 2.805513301704803e-05, "loss": 0.0979, "step": 10671 }, { "epoch": 0.18899617916738612, "grad_norm": 1.3453383445739746, "learning_rate": 2.8054709311253768e-05, "loss": 0.0872, "step": 10672 }, { "epoch": 0.18901388870441455, "grad_norm": 1.3165102005004883, "learning_rate": 2.8054285562510832e-05, "loss": 0.1065, "step": 10673 }, { "epoch": 0.18903159824144297, "grad_norm": 0.5055698156356812, "learning_rate": 2.8053861770820618e-05, "loss": 0.067, "step": 10674 }, { "epoch": 0.1890493077784714, "grad_norm": 1.1187996864318848, "learning_rate": 2.8053437936184523e-05, "loss": 0.0939, "step": 10675 }, { "epoch": 0.18906701731549982, "grad_norm": 0.9371117353439331, "learning_rate": 2.805301405860393e-05, "loss": 0.1356, "step": 10676 }, { "epoch": 0.18908472685252825, "grad_norm": 1.348351240158081, "learning_rate": 2.8052590138080247e-05, "loss": 0.1078, "step": 10677 }, { "epoch": 0.18910243638955668, "grad_norm": 1.1006532907485962, "learning_rate": 2.8052166174614862e-05, "loss": 0.1147, "step": 10678 }, { "epoch": 0.1891201459265851, "grad_norm": 1.3691835403442383, "learning_rate": 2.805174216820917e-05, "loss": 0.1026, "step": 10679 }, { "epoch": 0.18913785546361353, "grad_norm": 0.6741890907287598, "learning_rate": 2.8051318118864563e-05, "loss": 0.0924, "step": 10680 }, { "epoch": 0.18915556500064198, "grad_norm": 1.0302170515060425, "learning_rate": 2.805089402658244e-05, "loss": 0.094, "step": 10681 }, { "epoch": 0.1891732745376704, "grad_norm": 0.7148250937461853, "learning_rate": 2.8050469891364202e-05, "loss": 0.1166, "step": 10682 }, { "epoch": 0.18919098407469884, "grad_norm": 1.2422809600830078, "learning_rate": 2.805004571321123e-05, "loss": 0.0979, "step": 10683 }, { "epoch": 0.18920869361172726, "grad_norm": 0.797552764415741, "learning_rate": 2.8049621492124934e-05, "loss": 0.0684, "step": 10684 }, { "epoch": 0.1892264031487557, "grad_norm": 0.9804165959358215, "learning_rate": 2.8049197228106695e-05, "loss": 0.1453, "step": 10685 }, { "epoch": 0.18924411268578412, "grad_norm": 1.1701101064682007, "learning_rate": 2.8048772921157927e-05, "loss": 0.1194, "step": 10686 }, { "epoch": 0.18926182222281254, "grad_norm": 0.8332316279411316, "learning_rate": 2.8048348571280008e-05, "loss": 0.0945, "step": 10687 }, { "epoch": 0.18927953175984097, "grad_norm": 0.7067227959632874, "learning_rate": 2.8047924178474343e-05, "loss": 0.0932, "step": 10688 }, { "epoch": 0.1892972412968694, "grad_norm": 0.5752179026603699, "learning_rate": 2.804749974274233e-05, "loss": 0.1049, "step": 10689 }, { "epoch": 0.18931495083389782, "grad_norm": 0.9364463686943054, "learning_rate": 2.804707526408536e-05, "loss": 0.0782, "step": 10690 }, { "epoch": 0.18933266037092625, "grad_norm": 0.9776371121406555, "learning_rate": 2.8046650742504833e-05, "loss": 0.0984, "step": 10691 }, { "epoch": 0.18935036990795467, "grad_norm": 1.0231999158859253, "learning_rate": 2.8046226178002148e-05, "loss": 0.1077, "step": 10692 }, { "epoch": 0.1893680794449831, "grad_norm": 0.6525600552558899, "learning_rate": 2.8045801570578694e-05, "loss": 0.0786, "step": 10693 }, { "epoch": 0.18938578898201153, "grad_norm": 0.6499469876289368, "learning_rate": 2.804537692023587e-05, "loss": 0.1144, "step": 10694 }, { "epoch": 0.18940349851903995, "grad_norm": 0.8783102631568909, "learning_rate": 2.8044952226975084e-05, "loss": 0.1096, "step": 10695 }, { "epoch": 0.1894212080560684, "grad_norm": 1.079847812652588, "learning_rate": 2.804452749079772e-05, "loss": 0.1156, "step": 10696 }, { "epoch": 0.18943891759309683, "grad_norm": 0.8961337208747864, "learning_rate": 2.8044102711705177e-05, "loss": 0.1344, "step": 10697 }, { "epoch": 0.18945662713012526, "grad_norm": 1.485020399093628, "learning_rate": 2.8043677889698862e-05, "loss": 0.1232, "step": 10698 }, { "epoch": 0.1894743366671537, "grad_norm": 0.9392207860946655, "learning_rate": 2.804325302478016e-05, "loss": 0.1266, "step": 10699 }, { "epoch": 0.1894920462041821, "grad_norm": 0.9889879822731018, "learning_rate": 2.8042828116950478e-05, "loss": 0.1161, "step": 10700 }, { "epoch": 0.18950975574121054, "grad_norm": 1.5755099058151245, "learning_rate": 2.804240316621121e-05, "loss": 0.1183, "step": 10701 }, { "epoch": 0.18952746527823897, "grad_norm": 1.0842608213424683, "learning_rate": 2.8041978172563756e-05, "loss": 0.108, "step": 10702 }, { "epoch": 0.1895451748152674, "grad_norm": 0.7735379338264465, "learning_rate": 2.804155313600951e-05, "loss": 0.1294, "step": 10703 }, { "epoch": 0.18956288435229582, "grad_norm": 0.8472142815589905, "learning_rate": 2.8041128056549873e-05, "loss": 0.1043, "step": 10704 }, { "epoch": 0.18958059388932424, "grad_norm": 0.7290612459182739, "learning_rate": 2.8040702934186247e-05, "loss": 0.0997, "step": 10705 }, { "epoch": 0.18959830342635267, "grad_norm": 0.6350107192993164, "learning_rate": 2.804027776892003e-05, "loss": 0.1031, "step": 10706 }, { "epoch": 0.1896160129633811, "grad_norm": 0.6857130527496338, "learning_rate": 2.803985256075261e-05, "loss": 0.1102, "step": 10707 }, { "epoch": 0.18963372250040952, "grad_norm": 1.2537435293197632, "learning_rate": 2.8039427309685398e-05, "loss": 0.0963, "step": 10708 }, { "epoch": 0.18965143203743795, "grad_norm": 0.8300507068634033, "learning_rate": 2.803900201571979e-05, "loss": 0.101, "step": 10709 }, { "epoch": 0.18966914157446638, "grad_norm": 0.9363502860069275, "learning_rate": 2.8038576678857185e-05, "loss": 0.094, "step": 10710 }, { "epoch": 0.18968685111149483, "grad_norm": 1.6133437156677246, "learning_rate": 2.8038151299098977e-05, "loss": 0.1084, "step": 10711 }, { "epoch": 0.18970456064852326, "grad_norm": 0.922980010509491, "learning_rate": 2.8037725876446572e-05, "loss": 0.1365, "step": 10712 }, { "epoch": 0.18972227018555168, "grad_norm": 1.236799955368042, "learning_rate": 2.8037300410901368e-05, "loss": 0.1041, "step": 10713 }, { "epoch": 0.1897399797225801, "grad_norm": 0.8357802033424377, "learning_rate": 2.8036874902464767e-05, "loss": 0.0995, "step": 10714 }, { "epoch": 0.18975768925960854, "grad_norm": 0.8633826971054077, "learning_rate": 2.803644935113816e-05, "loss": 0.0892, "step": 10715 }, { "epoch": 0.18977539879663696, "grad_norm": 0.5109897255897522, "learning_rate": 2.803602375692296e-05, "loss": 0.0885, "step": 10716 }, { "epoch": 0.1897931083336654, "grad_norm": 1.8817002773284912, "learning_rate": 2.8035598119820557e-05, "loss": 0.1548, "step": 10717 }, { "epoch": 0.18981081787069382, "grad_norm": 0.7951784729957581, "learning_rate": 2.803517243983235e-05, "loss": 0.0881, "step": 10718 }, { "epoch": 0.18982852740772224, "grad_norm": 1.0478307008743286, "learning_rate": 2.803474671695975e-05, "loss": 0.1104, "step": 10719 }, { "epoch": 0.18984623694475067, "grad_norm": 0.9798327088356018, "learning_rate": 2.803432095120415e-05, "loss": 0.0869, "step": 10720 }, { "epoch": 0.1898639464817791, "grad_norm": 0.8835124969482422, "learning_rate": 2.8033895142566954e-05, "loss": 0.132, "step": 10721 }, { "epoch": 0.18988165601880752, "grad_norm": 0.6575950980186462, "learning_rate": 2.8033469291049557e-05, "loss": 0.1137, "step": 10722 }, { "epoch": 0.18989936555583595, "grad_norm": 0.761879563331604, "learning_rate": 2.8033043396653368e-05, "loss": 0.0937, "step": 10723 }, { "epoch": 0.18991707509286437, "grad_norm": 0.7194252014160156, "learning_rate": 2.8032617459379787e-05, "loss": 0.1305, "step": 10724 }, { "epoch": 0.1899347846298928, "grad_norm": 1.178252100944519, "learning_rate": 2.803219147923021e-05, "loss": 0.0893, "step": 10725 }, { "epoch": 0.18995249416692125, "grad_norm": 0.9247148036956787, "learning_rate": 2.8031765456206042e-05, "loss": 0.0879, "step": 10726 }, { "epoch": 0.18997020370394968, "grad_norm": 1.1848642826080322, "learning_rate": 2.803133939030868e-05, "loss": 0.1572, "step": 10727 }, { "epoch": 0.1899879132409781, "grad_norm": 1.231067419052124, "learning_rate": 2.8030913281539534e-05, "loss": 0.0837, "step": 10728 }, { "epoch": 0.19000562277800653, "grad_norm": 1.139386534690857, "learning_rate": 2.80304871299e-05, "loss": 0.1437, "step": 10729 }, { "epoch": 0.19002333231503496, "grad_norm": 0.7123058438301086, "learning_rate": 2.8030060935391476e-05, "loss": 0.0955, "step": 10730 }, { "epoch": 0.19004104185206339, "grad_norm": 1.067826509475708, "learning_rate": 2.8029634698015376e-05, "loss": 0.1186, "step": 10731 }, { "epoch": 0.1900587513890918, "grad_norm": 1.0513380765914917, "learning_rate": 2.8029208417773093e-05, "loss": 0.117, "step": 10732 }, { "epoch": 0.19007646092612024, "grad_norm": 0.7489902973175049, "learning_rate": 2.8028782094666034e-05, "loss": 0.1301, "step": 10733 }, { "epoch": 0.19009417046314867, "grad_norm": 0.9907151460647583, "learning_rate": 2.8028355728695598e-05, "loss": 0.1193, "step": 10734 }, { "epoch": 0.1901118800001771, "grad_norm": 0.9588985443115234, "learning_rate": 2.802792931986319e-05, "loss": 0.0765, "step": 10735 }, { "epoch": 0.19012958953720552, "grad_norm": 1.368739128112793, "learning_rate": 2.802750286817021e-05, "loss": 0.1709, "step": 10736 }, { "epoch": 0.19014729907423394, "grad_norm": 1.1779723167419434, "learning_rate": 2.8027076373618067e-05, "loss": 0.1099, "step": 10737 }, { "epoch": 0.19016500861126237, "grad_norm": 0.982447624206543, "learning_rate": 2.802664983620816e-05, "loss": 0.0701, "step": 10738 }, { "epoch": 0.1901827181482908, "grad_norm": 1.0003507137298584, "learning_rate": 2.8026223255941893e-05, "loss": 0.0783, "step": 10739 }, { "epoch": 0.19020042768531922, "grad_norm": 0.8450701832771301, "learning_rate": 2.8025796632820667e-05, "loss": 0.1088, "step": 10740 }, { "epoch": 0.19021813722234768, "grad_norm": 1.3369196653366089, "learning_rate": 2.8025369966845886e-05, "loss": 0.1211, "step": 10741 }, { "epoch": 0.1902358467593761, "grad_norm": 2.627401113510132, "learning_rate": 2.802494325801896e-05, "loss": 0.1446, "step": 10742 }, { "epoch": 0.19025355629640453, "grad_norm": 0.9802867770195007, "learning_rate": 2.8024516506341283e-05, "loss": 0.1124, "step": 10743 }, { "epoch": 0.19027126583343296, "grad_norm": 0.6768308877944946, "learning_rate": 2.802408971181427e-05, "loss": 0.1167, "step": 10744 }, { "epoch": 0.19028897537046138, "grad_norm": 1.008662462234497, "learning_rate": 2.802366287443931e-05, "loss": 0.1211, "step": 10745 }, { "epoch": 0.1903066849074898, "grad_norm": 0.684446394443512, "learning_rate": 2.8023235994217823e-05, "loss": 0.1424, "step": 10746 }, { "epoch": 0.19032439444451824, "grad_norm": 1.3076249361038208, "learning_rate": 2.8022809071151208e-05, "loss": 0.127, "step": 10747 }, { "epoch": 0.19034210398154666, "grad_norm": 0.8077059984207153, "learning_rate": 2.8022382105240868e-05, "loss": 0.09, "step": 10748 }, { "epoch": 0.1903598135185751, "grad_norm": 0.6994741559028625, "learning_rate": 2.8021955096488204e-05, "loss": 0.0599, "step": 10749 }, { "epoch": 0.19037752305560351, "grad_norm": 1.0962351560592651, "learning_rate": 2.8021528044894626e-05, "loss": 0.1296, "step": 10750 }, { "epoch": 0.19039523259263194, "grad_norm": 0.7880675196647644, "learning_rate": 2.8021100950461537e-05, "loss": 0.0854, "step": 10751 }, { "epoch": 0.19041294212966037, "grad_norm": 1.033829927444458, "learning_rate": 2.8020673813190343e-05, "loss": 0.1278, "step": 10752 }, { "epoch": 0.1904306516666888, "grad_norm": 0.9241205453872681, "learning_rate": 2.8020246633082453e-05, "loss": 0.1068, "step": 10753 }, { "epoch": 0.19044836120371722, "grad_norm": 0.9190375208854675, "learning_rate": 2.8019819410139266e-05, "loss": 0.1252, "step": 10754 }, { "epoch": 0.19046607074074565, "grad_norm": 1.4099751710891724, "learning_rate": 2.801939214436219e-05, "loss": 0.1229, "step": 10755 }, { "epoch": 0.1904837802777741, "grad_norm": 0.612727701663971, "learning_rate": 2.8018964835752632e-05, "loss": 0.1016, "step": 10756 }, { "epoch": 0.19050148981480253, "grad_norm": 0.706645667552948, "learning_rate": 2.8018537484311993e-05, "loss": 0.1277, "step": 10757 }, { "epoch": 0.19051919935183095, "grad_norm": 0.9991999268531799, "learning_rate": 2.8018110090041685e-05, "loss": 0.0946, "step": 10758 }, { "epoch": 0.19053690888885938, "grad_norm": 0.7332342267036438, "learning_rate": 2.801768265294311e-05, "loss": 0.1115, "step": 10759 }, { "epoch": 0.1905546184258878, "grad_norm": 0.6074641346931458, "learning_rate": 2.8017255173017678e-05, "loss": 0.063, "step": 10760 }, { "epoch": 0.19057232796291623, "grad_norm": 1.460155963897705, "learning_rate": 2.8016827650266793e-05, "loss": 0.1357, "step": 10761 }, { "epoch": 0.19059003749994466, "grad_norm": 1.4516807794570923, "learning_rate": 2.801640008469186e-05, "loss": 0.1131, "step": 10762 }, { "epoch": 0.19060774703697309, "grad_norm": 0.8487445116043091, "learning_rate": 2.8015972476294288e-05, "loss": 0.1092, "step": 10763 }, { "epoch": 0.1906254565740015, "grad_norm": 0.9552221894264221, "learning_rate": 2.801554482507548e-05, "loss": 0.101, "step": 10764 }, { "epoch": 0.19064316611102994, "grad_norm": 0.5116912126541138, "learning_rate": 2.8015117131036853e-05, "loss": 0.0847, "step": 10765 }, { "epoch": 0.19066087564805836, "grad_norm": 0.9110063314437866, "learning_rate": 2.80146893941798e-05, "loss": 0.0989, "step": 10766 }, { "epoch": 0.1906785851850868, "grad_norm": 0.7629339694976807, "learning_rate": 2.8014261614505737e-05, "loss": 0.1436, "step": 10767 }, { "epoch": 0.19069629472211522, "grad_norm": 0.7877369523048401, "learning_rate": 2.8013833792016073e-05, "loss": 0.0975, "step": 10768 }, { "epoch": 0.19071400425914364, "grad_norm": 1.1413750648498535, "learning_rate": 2.801340592671221e-05, "loss": 0.1584, "step": 10769 }, { "epoch": 0.19073171379617207, "grad_norm": 0.7971199750900269, "learning_rate": 2.8012978018595555e-05, "loss": 0.1139, "step": 10770 }, { "epoch": 0.19074942333320052, "grad_norm": 0.8437857031822205, "learning_rate": 2.801255006766752e-05, "loss": 0.0985, "step": 10771 }, { "epoch": 0.19076713287022895, "grad_norm": 0.6138041019439697, "learning_rate": 2.801212207392952e-05, "loss": 0.1168, "step": 10772 }, { "epoch": 0.19078484240725738, "grad_norm": 1.147324562072754, "learning_rate": 2.8011694037382944e-05, "loss": 0.1089, "step": 10773 }, { "epoch": 0.1908025519442858, "grad_norm": 1.251893401145935, "learning_rate": 2.8011265958029214e-05, "loss": 0.1226, "step": 10774 }, { "epoch": 0.19082026148131423, "grad_norm": 1.165188193321228, "learning_rate": 2.8010837835869735e-05, "loss": 0.068, "step": 10775 }, { "epoch": 0.19083797101834266, "grad_norm": 0.9203739762306213, "learning_rate": 2.8010409670905914e-05, "loss": 0.0998, "step": 10776 }, { "epoch": 0.19085568055537108, "grad_norm": 0.9117616415023804, "learning_rate": 2.800998146313916e-05, "loss": 0.0958, "step": 10777 }, { "epoch": 0.1908733900923995, "grad_norm": 0.6161444783210754, "learning_rate": 2.8009553212570885e-05, "loss": 0.0724, "step": 10778 }, { "epoch": 0.19089109962942793, "grad_norm": 0.7082911133766174, "learning_rate": 2.80091249192025e-05, "loss": 0.0727, "step": 10779 }, { "epoch": 0.19090880916645636, "grad_norm": 0.8651742339134216, "learning_rate": 2.8008696583035404e-05, "loss": 0.1039, "step": 10780 }, { "epoch": 0.1909265187034848, "grad_norm": 0.8382953405380249, "learning_rate": 2.8008268204071013e-05, "loss": 0.0811, "step": 10781 }, { "epoch": 0.19094422824051321, "grad_norm": 0.49363842606544495, "learning_rate": 2.800783978231074e-05, "loss": 0.0837, "step": 10782 }, { "epoch": 0.19096193777754164, "grad_norm": 1.0505292415618896, "learning_rate": 2.8007411317755983e-05, "loss": 0.1216, "step": 10783 }, { "epoch": 0.19097964731457007, "grad_norm": 0.8718956708908081, "learning_rate": 2.800698281040816e-05, "loss": 0.0789, "step": 10784 }, { "epoch": 0.19099735685159852, "grad_norm": 0.8329504132270813, "learning_rate": 2.8006554260268682e-05, "loss": 0.1542, "step": 10785 }, { "epoch": 0.19101506638862695, "grad_norm": 0.6219523549079895, "learning_rate": 2.8006125667338954e-05, "loss": 0.1096, "step": 10786 }, { "epoch": 0.19103277592565537, "grad_norm": 0.9547090530395508, "learning_rate": 2.800569703162039e-05, "loss": 0.0757, "step": 10787 }, { "epoch": 0.1910504854626838, "grad_norm": 1.07590651512146, "learning_rate": 2.80052683531144e-05, "loss": 0.1118, "step": 10788 }, { "epoch": 0.19106819499971223, "grad_norm": 1.328258991241455, "learning_rate": 2.8004839631822384e-05, "loss": 0.0958, "step": 10789 }, { "epoch": 0.19108590453674065, "grad_norm": 0.7680158615112305, "learning_rate": 2.8004410867745766e-05, "loss": 0.0964, "step": 10790 }, { "epoch": 0.19110361407376908, "grad_norm": 0.807126522064209, "learning_rate": 2.8003982060885954e-05, "loss": 0.1262, "step": 10791 }, { "epoch": 0.1911213236107975, "grad_norm": 0.912842869758606, "learning_rate": 2.800355321124435e-05, "loss": 0.0664, "step": 10792 }, { "epoch": 0.19113903314782593, "grad_norm": 0.825107991695404, "learning_rate": 2.8003124318822377e-05, "loss": 0.0982, "step": 10793 }, { "epoch": 0.19115674268485436, "grad_norm": 0.955400288105011, "learning_rate": 2.800269538362144e-05, "loss": 0.0817, "step": 10794 }, { "epoch": 0.19117445222188278, "grad_norm": 1.1023215055465698, "learning_rate": 2.800226640564295e-05, "loss": 0.1116, "step": 10795 }, { "epoch": 0.1911921617589112, "grad_norm": 1.0489903688430786, "learning_rate": 2.8001837384888313e-05, "loss": 0.1062, "step": 10796 }, { "epoch": 0.19120987129593964, "grad_norm": 0.877874493598938, "learning_rate": 2.800140832135895e-05, "loss": 0.1073, "step": 10797 }, { "epoch": 0.19122758083296806, "grad_norm": 0.7485938668251038, "learning_rate": 2.800097921505627e-05, "loss": 0.1023, "step": 10798 }, { "epoch": 0.1912452903699965, "grad_norm": 1.0618109703063965, "learning_rate": 2.8000550065981682e-05, "loss": 0.1326, "step": 10799 }, { "epoch": 0.19126299990702494, "grad_norm": 0.8613438606262207, "learning_rate": 2.80001208741366e-05, "loss": 0.0844, "step": 10800 }, { "epoch": 0.19128070944405337, "grad_norm": 0.9137830138206482, "learning_rate": 2.7999691639522436e-05, "loss": 0.0851, "step": 10801 }, { "epoch": 0.1912984189810818, "grad_norm": 0.8422757387161255, "learning_rate": 2.7999262362140596e-05, "loss": 0.1256, "step": 10802 }, { "epoch": 0.19131612851811022, "grad_norm": 0.715510368347168, "learning_rate": 2.7998833041992503e-05, "loss": 0.1362, "step": 10803 }, { "epoch": 0.19133383805513865, "grad_norm": 0.9524945020675659, "learning_rate": 2.7998403679079566e-05, "loss": 0.1411, "step": 10804 }, { "epoch": 0.19135154759216708, "grad_norm": 0.6588782072067261, "learning_rate": 2.7997974273403188e-05, "loss": 0.1156, "step": 10805 }, { "epoch": 0.1913692571291955, "grad_norm": 0.867904007434845, "learning_rate": 2.79975448249648e-05, "loss": 0.1004, "step": 10806 }, { "epoch": 0.19138696666622393, "grad_norm": 1.3319567441940308, "learning_rate": 2.7997115333765792e-05, "loss": 0.1503, "step": 10807 }, { "epoch": 0.19140467620325236, "grad_norm": 0.8665207028388977, "learning_rate": 2.79966857998076e-05, "loss": 0.1109, "step": 10808 }, { "epoch": 0.19142238574028078, "grad_norm": 0.5855766534805298, "learning_rate": 2.799625622309162e-05, "loss": 0.1124, "step": 10809 }, { "epoch": 0.1914400952773092, "grad_norm": 0.9597911834716797, "learning_rate": 2.7995826603619268e-05, "loss": 0.117, "step": 10810 }, { "epoch": 0.19145780481433763, "grad_norm": 0.656514585018158, "learning_rate": 2.799539694139197e-05, "loss": 0.0987, "step": 10811 }, { "epoch": 0.19147551435136606, "grad_norm": 1.0745978355407715, "learning_rate": 2.7994967236411126e-05, "loss": 0.1391, "step": 10812 }, { "epoch": 0.1914932238883945, "grad_norm": 0.9378113746643066, "learning_rate": 2.7994537488678155e-05, "loss": 0.1264, "step": 10813 }, { "epoch": 0.1915109334254229, "grad_norm": 0.9919568300247192, "learning_rate": 2.7994107698194465e-05, "loss": 0.1371, "step": 10814 }, { "epoch": 0.19152864296245137, "grad_norm": 1.0044503211975098, "learning_rate": 2.799367786496148e-05, "loss": 0.1087, "step": 10815 }, { "epoch": 0.1915463524994798, "grad_norm": 0.7549639940261841, "learning_rate": 2.7993247988980606e-05, "loss": 0.0921, "step": 10816 }, { "epoch": 0.19156406203650822, "grad_norm": 0.7603256106376648, "learning_rate": 2.7992818070253265e-05, "loss": 0.1053, "step": 10817 }, { "epoch": 0.19158177157353665, "grad_norm": 1.3262267112731934, "learning_rate": 2.799238810878086e-05, "loss": 0.1348, "step": 10818 }, { "epoch": 0.19159948111056507, "grad_norm": 1.4184136390686035, "learning_rate": 2.7991958104564822e-05, "loss": 0.0977, "step": 10819 }, { "epoch": 0.1916171906475935, "grad_norm": 1.0515437126159668, "learning_rate": 2.799152805760655e-05, "loss": 0.0704, "step": 10820 }, { "epoch": 0.19163490018462193, "grad_norm": 1.127131462097168, "learning_rate": 2.799109796790746e-05, "loss": 0.1117, "step": 10821 }, { "epoch": 0.19165260972165035, "grad_norm": 0.8033617734909058, "learning_rate": 2.799066783546898e-05, "loss": 0.1093, "step": 10822 }, { "epoch": 0.19167031925867878, "grad_norm": 0.9142471551895142, "learning_rate": 2.7990237660292516e-05, "loss": 0.1066, "step": 10823 }, { "epoch": 0.1916880287957072, "grad_norm": 0.5637396574020386, "learning_rate": 2.798980744237948e-05, "loss": 0.0718, "step": 10824 }, { "epoch": 0.19170573833273563, "grad_norm": 0.8890259861946106, "learning_rate": 2.7989377181731292e-05, "loss": 0.0791, "step": 10825 }, { "epoch": 0.19172344786976406, "grad_norm": 2.142321825027466, "learning_rate": 2.7988946878349366e-05, "loss": 0.0978, "step": 10826 }, { "epoch": 0.19174115740679248, "grad_norm": 0.6992320418357849, "learning_rate": 2.7988516532235123e-05, "loss": 0.1117, "step": 10827 }, { "epoch": 0.1917588669438209, "grad_norm": 2.854689836502075, "learning_rate": 2.798808614338997e-05, "loss": 0.1474, "step": 10828 }, { "epoch": 0.19177657648084934, "grad_norm": 25.0710506439209, "learning_rate": 2.7987655711815328e-05, "loss": 0.1497, "step": 10829 }, { "epoch": 0.1917942860178778, "grad_norm": 1.838011384010315, "learning_rate": 2.7987225237512616e-05, "loss": 0.1343, "step": 10830 }, { "epoch": 0.19181199555490622, "grad_norm": 2.308478593826294, "learning_rate": 2.7986794720483245e-05, "loss": 0.1576, "step": 10831 }, { "epoch": 0.19182970509193464, "grad_norm": 1.0311152935028076, "learning_rate": 2.7986364160728627e-05, "loss": 0.1158, "step": 10832 }, { "epoch": 0.19184741462896307, "grad_norm": 1.6699624061584473, "learning_rate": 2.7985933558250192e-05, "loss": 0.1153, "step": 10833 }, { "epoch": 0.1918651241659915, "grad_norm": 1.5696569681167603, "learning_rate": 2.7985502913049344e-05, "loss": 0.1238, "step": 10834 }, { "epoch": 0.19188283370301992, "grad_norm": 1.3360651731491089, "learning_rate": 2.7985072225127508e-05, "loss": 0.1568, "step": 10835 }, { "epoch": 0.19190054324004835, "grad_norm": 0.957831621170044, "learning_rate": 2.7984641494486093e-05, "loss": 0.1668, "step": 10836 }, { "epoch": 0.19191825277707678, "grad_norm": 1.2010570764541626, "learning_rate": 2.7984210721126527e-05, "loss": 0.1263, "step": 10837 }, { "epoch": 0.1919359623141052, "grad_norm": 1.2049280405044556, "learning_rate": 2.798377990505022e-05, "loss": 0.1016, "step": 10838 }, { "epoch": 0.19195367185113363, "grad_norm": 1.436418890953064, "learning_rate": 2.7983349046258588e-05, "loss": 0.1617, "step": 10839 }, { "epoch": 0.19197138138816205, "grad_norm": 1.1347984075546265, "learning_rate": 2.798291814475305e-05, "loss": 0.1456, "step": 10840 }, { "epoch": 0.19198909092519048, "grad_norm": 0.8713871240615845, "learning_rate": 2.7982487200535026e-05, "loss": 0.0835, "step": 10841 }, { "epoch": 0.1920068004622189, "grad_norm": 1.08645498752594, "learning_rate": 2.7982056213605935e-05, "loss": 0.1079, "step": 10842 }, { "epoch": 0.19202450999924733, "grad_norm": 1.0491353273391724, "learning_rate": 2.7981625183967186e-05, "loss": 0.1216, "step": 10843 }, { "epoch": 0.19204221953627576, "grad_norm": 0.6913058161735535, "learning_rate": 2.798119411162021e-05, "loss": 0.1268, "step": 10844 }, { "epoch": 0.19205992907330421, "grad_norm": 1.8907173871994019, "learning_rate": 2.798076299656641e-05, "loss": 0.0835, "step": 10845 }, { "epoch": 0.19207763861033264, "grad_norm": 0.9150447249412537, "learning_rate": 2.798033183880722e-05, "loss": 0.1027, "step": 10846 }, { "epoch": 0.19209534814736107, "grad_norm": 1.5929293632507324, "learning_rate": 2.7979900638344046e-05, "loss": 0.1553, "step": 10847 }, { "epoch": 0.1921130576843895, "grad_norm": 0.6712344884872437, "learning_rate": 2.7979469395178316e-05, "loss": 0.0964, "step": 10848 }, { "epoch": 0.19213076722141792, "grad_norm": 0.8119227290153503, "learning_rate": 2.7979038109311443e-05, "loss": 0.0935, "step": 10849 }, { "epoch": 0.19214847675844635, "grad_norm": 0.8466207385063171, "learning_rate": 2.7978606780744847e-05, "loss": 0.1161, "step": 10850 }, { "epoch": 0.19216618629547477, "grad_norm": 0.9579577445983887, "learning_rate": 2.797817540947995e-05, "loss": 0.127, "step": 10851 }, { "epoch": 0.1921838958325032, "grad_norm": 0.7312619090080261, "learning_rate": 2.7977743995518164e-05, "loss": 0.0859, "step": 10852 }, { "epoch": 0.19220160536953163, "grad_norm": 1.092452049255371, "learning_rate": 2.7977312538860914e-05, "loss": 0.1462, "step": 10853 }, { "epoch": 0.19221931490656005, "grad_norm": 0.9860503673553467, "learning_rate": 2.797688103950962e-05, "loss": 0.1176, "step": 10854 }, { "epoch": 0.19223702444358848, "grad_norm": 1.6113210916519165, "learning_rate": 2.79764494974657e-05, "loss": 0.11, "step": 10855 }, { "epoch": 0.1922547339806169, "grad_norm": 0.7213249206542969, "learning_rate": 2.7976017912730572e-05, "loss": 0.1269, "step": 10856 }, { "epoch": 0.19227244351764533, "grad_norm": 1.3270868062973022, "learning_rate": 2.7975586285305662e-05, "loss": 0.1017, "step": 10857 }, { "epoch": 0.19229015305467376, "grad_norm": 1.4792829751968384, "learning_rate": 2.797515461519238e-05, "loss": 0.1058, "step": 10858 }, { "epoch": 0.19230786259170218, "grad_norm": 0.9536152482032776, "learning_rate": 2.7974722902392153e-05, "loss": 0.1371, "step": 10859 }, { "epoch": 0.19232557212873064, "grad_norm": 0.9045251607894897, "learning_rate": 2.7974291146906402e-05, "loss": 0.1005, "step": 10860 }, { "epoch": 0.19234328166575906, "grad_norm": 0.8336524367332458, "learning_rate": 2.797385934873654e-05, "loss": 0.1193, "step": 10861 }, { "epoch": 0.1923609912027875, "grad_norm": 1.1072927713394165, "learning_rate": 2.7973427507883997e-05, "loss": 0.1498, "step": 10862 }, { "epoch": 0.19237870073981592, "grad_norm": 0.6650424003601074, "learning_rate": 2.7972995624350192e-05, "loss": 0.1075, "step": 10863 }, { "epoch": 0.19239641027684434, "grad_norm": 0.9254482388496399, "learning_rate": 2.7972563698136538e-05, "loss": 0.0929, "step": 10864 }, { "epoch": 0.19241411981387277, "grad_norm": 0.9268476366996765, "learning_rate": 2.7972131729244465e-05, "loss": 0.0988, "step": 10865 }, { "epoch": 0.1924318293509012, "grad_norm": 0.9809966683387756, "learning_rate": 2.7971699717675393e-05, "loss": 0.0937, "step": 10866 }, { "epoch": 0.19244953888792962, "grad_norm": 1.425907015800476, "learning_rate": 2.7971267663430737e-05, "loss": 0.1112, "step": 10867 }, { "epoch": 0.19246724842495805, "grad_norm": 0.9767981767654419, "learning_rate": 2.7970835566511924e-05, "loss": 0.0922, "step": 10868 }, { "epoch": 0.19248495796198647, "grad_norm": 0.620398998260498, "learning_rate": 2.7970403426920374e-05, "loss": 0.1114, "step": 10869 }, { "epoch": 0.1925026674990149, "grad_norm": 1.6235262155532837, "learning_rate": 2.796997124465751e-05, "loss": 0.1028, "step": 10870 }, { "epoch": 0.19252037703604333, "grad_norm": 1.2842466831207275, "learning_rate": 2.7969539019724748e-05, "loss": 0.0934, "step": 10871 }, { "epoch": 0.19253808657307175, "grad_norm": 1.0059685707092285, "learning_rate": 2.7969106752123518e-05, "loss": 0.1334, "step": 10872 }, { "epoch": 0.19255579611010018, "grad_norm": 1.0090055465698242, "learning_rate": 2.7968674441855236e-05, "loss": 0.1018, "step": 10873 }, { "epoch": 0.1925735056471286, "grad_norm": 0.7074437141418457, "learning_rate": 2.7968242088921328e-05, "loss": 0.1202, "step": 10874 }, { "epoch": 0.19259121518415706, "grad_norm": 0.882788896560669, "learning_rate": 2.7967809693323218e-05, "loss": 0.1251, "step": 10875 }, { "epoch": 0.1926089247211855, "grad_norm": 1.0214754343032837, "learning_rate": 2.796737725506232e-05, "loss": 0.1235, "step": 10876 }, { "epoch": 0.1926266342582139, "grad_norm": 1.4946151971817017, "learning_rate": 2.7966944774140065e-05, "loss": 0.11, "step": 10877 }, { "epoch": 0.19264434379524234, "grad_norm": 2.0805857181549072, "learning_rate": 2.7966512250557875e-05, "loss": 0.0838, "step": 10878 }, { "epoch": 0.19266205333227077, "grad_norm": 0.9075706005096436, "learning_rate": 2.7966079684317168e-05, "loss": 0.1143, "step": 10879 }, { "epoch": 0.1926797628692992, "grad_norm": 0.7772325277328491, "learning_rate": 2.7965647075419377e-05, "loss": 0.1561, "step": 10880 }, { "epoch": 0.19269747240632762, "grad_norm": 1.1062142848968506, "learning_rate": 2.7965214423865915e-05, "loss": 0.1049, "step": 10881 }, { "epoch": 0.19271518194335605, "grad_norm": 0.6864903569221497, "learning_rate": 2.7964781729658204e-05, "loss": 0.095, "step": 10882 }, { "epoch": 0.19273289148038447, "grad_norm": 1.0175328254699707, "learning_rate": 2.796434899279768e-05, "loss": 0.0804, "step": 10883 }, { "epoch": 0.1927506010174129, "grad_norm": 1.68919837474823, "learning_rate": 2.796391621328575e-05, "loss": 0.0929, "step": 10884 }, { "epoch": 0.19276831055444132, "grad_norm": 0.7054436802864075, "learning_rate": 2.7963483391123854e-05, "loss": 0.0904, "step": 10885 }, { "epoch": 0.19278602009146975, "grad_norm": 1.6061863899230957, "learning_rate": 2.796305052631341e-05, "loss": 0.1211, "step": 10886 }, { "epoch": 0.19280372962849818, "grad_norm": 1.0807925462722778, "learning_rate": 2.7962617618855836e-05, "loss": 0.0856, "step": 10887 }, { "epoch": 0.1928214391655266, "grad_norm": 0.9746885299682617, "learning_rate": 2.796218466875256e-05, "loss": 0.1198, "step": 10888 }, { "epoch": 0.19283914870255503, "grad_norm": 1.23284912109375, "learning_rate": 2.796175167600501e-05, "loss": 0.1099, "step": 10889 }, { "epoch": 0.19285685823958348, "grad_norm": 0.7774403691291809, "learning_rate": 2.796131864061461e-05, "loss": 0.0655, "step": 10890 }, { "epoch": 0.1928745677766119, "grad_norm": 0.8234028816223145, "learning_rate": 2.7960885562582784e-05, "loss": 0.0965, "step": 10891 }, { "epoch": 0.19289227731364034, "grad_norm": 0.7170693874359131, "learning_rate": 2.7960452441910953e-05, "loss": 0.0898, "step": 10892 }, { "epoch": 0.19290998685066876, "grad_norm": 0.9299899339675903, "learning_rate": 2.7960019278600543e-05, "loss": 0.1017, "step": 10893 }, { "epoch": 0.1929276963876972, "grad_norm": 1.7042851448059082, "learning_rate": 2.795958607265298e-05, "loss": 0.1609, "step": 10894 }, { "epoch": 0.19294540592472562, "grad_norm": 0.8406022787094116, "learning_rate": 2.795915282406969e-05, "loss": 0.1192, "step": 10895 }, { "epoch": 0.19296311546175404, "grad_norm": 0.7910848259925842, "learning_rate": 2.79587195328521e-05, "loss": 0.085, "step": 10896 }, { "epoch": 0.19298082499878247, "grad_norm": 0.6619073748588562, "learning_rate": 2.795828619900163e-05, "loss": 0.0955, "step": 10897 }, { "epoch": 0.1929985345358109, "grad_norm": 1.21695876121521, "learning_rate": 2.7957852822519716e-05, "loss": 0.1239, "step": 10898 }, { "epoch": 0.19301624407283932, "grad_norm": 0.8494668006896973, "learning_rate": 2.795741940340777e-05, "loss": 0.1151, "step": 10899 }, { "epoch": 0.19303395360986775, "grad_norm": 1.1009976863861084, "learning_rate": 2.7956985941667228e-05, "loss": 0.1164, "step": 10900 }, { "epoch": 0.19305166314689617, "grad_norm": 0.7911309599876404, "learning_rate": 2.795655243729951e-05, "loss": 0.113, "step": 10901 }, { "epoch": 0.1930693726839246, "grad_norm": 0.9005919694900513, "learning_rate": 2.7956118890306045e-05, "loss": 0.1059, "step": 10902 }, { "epoch": 0.19308708222095303, "grad_norm": 0.7144604921340942, "learning_rate": 2.7955685300688263e-05, "loss": 0.0978, "step": 10903 }, { "epoch": 0.19310479175798145, "grad_norm": 1.4858583211898804, "learning_rate": 2.7955251668447586e-05, "loss": 0.0809, "step": 10904 }, { "epoch": 0.1931225012950099, "grad_norm": 0.8645807504653931, "learning_rate": 2.7954817993585438e-05, "loss": 0.0744, "step": 10905 }, { "epoch": 0.19314021083203833, "grad_norm": 1.0364006757736206, "learning_rate": 2.795438427610325e-05, "loss": 0.1033, "step": 10906 }, { "epoch": 0.19315792036906676, "grad_norm": 1.2230395078659058, "learning_rate": 2.795395051600245e-05, "loss": 0.1005, "step": 10907 }, { "epoch": 0.1931756299060952, "grad_norm": 1.0449697971343994, "learning_rate": 2.7953516713284463e-05, "loss": 0.0766, "step": 10908 }, { "epoch": 0.1931933394431236, "grad_norm": 0.9640657901763916, "learning_rate": 2.7953082867950713e-05, "loss": 0.1219, "step": 10909 }, { "epoch": 0.19321104898015204, "grad_norm": 1.0340163707733154, "learning_rate": 2.7952648980002634e-05, "loss": 0.0763, "step": 10910 }, { "epoch": 0.19322875851718047, "grad_norm": 1.231021761894226, "learning_rate": 2.7952215049441646e-05, "loss": 0.1105, "step": 10911 }, { "epoch": 0.1932464680542089, "grad_norm": 0.650850236415863, "learning_rate": 2.795178107626918e-05, "loss": 0.0781, "step": 10912 }, { "epoch": 0.19326417759123732, "grad_norm": 0.9956876635551453, "learning_rate": 2.7951347060486673e-05, "loss": 0.1112, "step": 10913 }, { "epoch": 0.19328188712826574, "grad_norm": 0.6145312190055847, "learning_rate": 2.795091300209554e-05, "loss": 0.1177, "step": 10914 }, { "epoch": 0.19329959666529417, "grad_norm": 1.0135258436203003, "learning_rate": 2.7950478901097213e-05, "loss": 0.1225, "step": 10915 }, { "epoch": 0.1933173062023226, "grad_norm": 0.9176693558692932, "learning_rate": 2.7950044757493117e-05, "loss": 0.1226, "step": 10916 }, { "epoch": 0.19333501573935102, "grad_norm": 0.8805510401725769, "learning_rate": 2.7949610571284686e-05, "loss": 0.0981, "step": 10917 }, { "epoch": 0.19335272527637945, "grad_norm": 1.042475938796997, "learning_rate": 2.7949176342473345e-05, "loss": 0.0807, "step": 10918 }, { "epoch": 0.1933704348134079, "grad_norm": 1.2059437036514282, "learning_rate": 2.7948742071060525e-05, "loss": 0.0745, "step": 10919 }, { "epoch": 0.19338814435043633, "grad_norm": 0.6724398732185364, "learning_rate": 2.794830775704765e-05, "loss": 0.1011, "step": 10920 }, { "epoch": 0.19340585388746476, "grad_norm": 1.0764106512069702, "learning_rate": 2.7947873400436154e-05, "loss": 0.1236, "step": 10921 }, { "epoch": 0.19342356342449318, "grad_norm": 0.682462751865387, "learning_rate": 2.7947439001227465e-05, "loss": 0.0919, "step": 10922 }, { "epoch": 0.1934412729615216, "grad_norm": 1.254820466041565, "learning_rate": 2.794700455942301e-05, "loss": 0.1458, "step": 10923 }, { "epoch": 0.19345898249855004, "grad_norm": 0.8423499464988708, "learning_rate": 2.794657007502422e-05, "loss": 0.0766, "step": 10924 }, { "epoch": 0.19347669203557846, "grad_norm": 1.0788193941116333, "learning_rate": 2.7946135548032525e-05, "loss": 0.12, "step": 10925 }, { "epoch": 0.1934944015726069, "grad_norm": 0.8469552397727966, "learning_rate": 2.7945700978449353e-05, "loss": 0.129, "step": 10926 }, { "epoch": 0.19351211110963532, "grad_norm": 1.5308390855789185, "learning_rate": 2.7945266366276137e-05, "loss": 0.0888, "step": 10927 }, { "epoch": 0.19352982064666374, "grad_norm": 0.9643183350563049, "learning_rate": 2.79448317115143e-05, "loss": 0.1028, "step": 10928 }, { "epoch": 0.19354753018369217, "grad_norm": 0.39416229724884033, "learning_rate": 2.7944397014165276e-05, "loss": 0.0712, "step": 10929 }, { "epoch": 0.1935652397207206, "grad_norm": 1.0180872678756714, "learning_rate": 2.7943962274230496e-05, "loss": 0.1814, "step": 10930 }, { "epoch": 0.19358294925774902, "grad_norm": 1.1283907890319824, "learning_rate": 2.794352749171139e-05, "loss": 0.0874, "step": 10931 }, { "epoch": 0.19360065879477745, "grad_norm": 0.8528424501419067, "learning_rate": 2.7943092666609388e-05, "loss": 0.0958, "step": 10932 }, { "epoch": 0.19361836833180587, "grad_norm": 0.639653205871582, "learning_rate": 2.7942657798925916e-05, "loss": 0.0726, "step": 10933 }, { "epoch": 0.19363607786883433, "grad_norm": 1.1529539823532104, "learning_rate": 2.7942222888662413e-05, "loss": 0.1204, "step": 10934 }, { "epoch": 0.19365378740586275, "grad_norm": 1.0606740713119507, "learning_rate": 2.7941787935820305e-05, "loss": 0.0974, "step": 10935 }, { "epoch": 0.19367149694289118, "grad_norm": 0.8511730432510376, "learning_rate": 2.794135294040102e-05, "loss": 0.0868, "step": 10936 }, { "epoch": 0.1936892064799196, "grad_norm": 0.814395546913147, "learning_rate": 2.7940917902405997e-05, "loss": 0.1209, "step": 10937 }, { "epoch": 0.19370691601694803, "grad_norm": 0.8661714196205139, "learning_rate": 2.7940482821836667e-05, "loss": 0.1556, "step": 10938 }, { "epoch": 0.19372462555397646, "grad_norm": 0.9953076243400574, "learning_rate": 2.7940047698694448e-05, "loss": 0.137, "step": 10939 }, { "epoch": 0.19374233509100489, "grad_norm": 0.8631892204284668, "learning_rate": 2.793961253298079e-05, "loss": 0.1235, "step": 10940 }, { "epoch": 0.1937600446280333, "grad_norm": 0.9578085541725159, "learning_rate": 2.7939177324697106e-05, "loss": 0.0841, "step": 10941 }, { "epoch": 0.19377775416506174, "grad_norm": 0.9830888509750366, "learning_rate": 2.7938742073844846e-05, "loss": 0.1333, "step": 10942 }, { "epoch": 0.19379546370209016, "grad_norm": 0.6999830603599548, "learning_rate": 2.7938306780425427e-05, "loss": 0.0884, "step": 10943 }, { "epoch": 0.1938131732391186, "grad_norm": 0.8541196584701538, "learning_rate": 2.793787144444029e-05, "loss": 0.109, "step": 10944 }, { "epoch": 0.19383088277614702, "grad_norm": 0.9065552949905396, "learning_rate": 2.7937436065890865e-05, "loss": 0.0735, "step": 10945 }, { "epoch": 0.19384859231317544, "grad_norm": 1.1532942056655884, "learning_rate": 2.7937000644778586e-05, "loss": 0.1214, "step": 10946 }, { "epoch": 0.19386630185020387, "grad_norm": 1.169352412223816, "learning_rate": 2.7936565181104877e-05, "loss": 0.1329, "step": 10947 }, { "epoch": 0.1938840113872323, "grad_norm": 1.0914117097854614, "learning_rate": 2.793612967487118e-05, "loss": 0.0726, "step": 10948 }, { "epoch": 0.19390172092426075, "grad_norm": 0.7995134592056274, "learning_rate": 2.793569412607893e-05, "loss": 0.0809, "step": 10949 }, { "epoch": 0.19391943046128918, "grad_norm": 0.6426599621772766, "learning_rate": 2.7935258534729547e-05, "loss": 0.0889, "step": 10950 }, { "epoch": 0.1939371399983176, "grad_norm": 1.1450073719024658, "learning_rate": 2.793482290082448e-05, "loss": 0.1081, "step": 10951 }, { "epoch": 0.19395484953534603, "grad_norm": 0.6400823593139648, "learning_rate": 2.793438722436515e-05, "loss": 0.1318, "step": 10952 }, { "epoch": 0.19397255907237446, "grad_norm": 0.8787088990211487, "learning_rate": 2.793395150535299e-05, "loss": 0.1051, "step": 10953 }, { "epoch": 0.19399026860940288, "grad_norm": 0.7342649698257446, "learning_rate": 2.7933515743789437e-05, "loss": 0.1257, "step": 10954 }, { "epoch": 0.1940079781464313, "grad_norm": 1.2252745628356934, "learning_rate": 2.793307993967593e-05, "loss": 0.0998, "step": 10955 }, { "epoch": 0.19402568768345974, "grad_norm": 1.4837839603424072, "learning_rate": 2.79326440930139e-05, "loss": 0.1246, "step": 10956 }, { "epoch": 0.19404339722048816, "grad_norm": 0.9128055572509766, "learning_rate": 2.7932208203804776e-05, "loss": 0.1375, "step": 10957 }, { "epoch": 0.1940611067575166, "grad_norm": 1.1978260278701782, "learning_rate": 2.7931772272049994e-05, "loss": 0.1138, "step": 10958 }, { "epoch": 0.19407881629454501, "grad_norm": 0.6954236030578613, "learning_rate": 2.793133629775099e-05, "loss": 0.0868, "step": 10959 }, { "epoch": 0.19409652583157344, "grad_norm": 1.0146180391311646, "learning_rate": 2.7930900280909197e-05, "loss": 0.0867, "step": 10960 }, { "epoch": 0.19411423536860187, "grad_norm": 0.981647253036499, "learning_rate": 2.7930464221526056e-05, "loss": 0.1218, "step": 10961 }, { "epoch": 0.1941319449056303, "grad_norm": 1.3559517860412598, "learning_rate": 2.793002811960299e-05, "loss": 0.1456, "step": 10962 }, { "epoch": 0.19414965444265872, "grad_norm": 1.4082034826278687, "learning_rate": 2.7929591975141435e-05, "loss": 0.151, "step": 10963 }, { "epoch": 0.19416736397968717, "grad_norm": 2.3213562965393066, "learning_rate": 2.7929155788142836e-05, "loss": 0.1234, "step": 10964 }, { "epoch": 0.1941850735167156, "grad_norm": 0.8608248829841614, "learning_rate": 2.7928719558608624e-05, "loss": 0.0754, "step": 10965 }, { "epoch": 0.19420278305374403, "grad_norm": 0.7580646872520447, "learning_rate": 2.792828328654023e-05, "loss": 0.1451, "step": 10966 }, { "epoch": 0.19422049259077245, "grad_norm": 1.7469285726547241, "learning_rate": 2.792784697193909e-05, "loss": 0.1551, "step": 10967 }, { "epoch": 0.19423820212780088, "grad_norm": 0.9940736889839172, "learning_rate": 2.7927410614806637e-05, "loss": 0.1204, "step": 10968 }, { "epoch": 0.1942559116648293, "grad_norm": 0.8982288837432861, "learning_rate": 2.792697421514432e-05, "loss": 0.0945, "step": 10969 }, { "epoch": 0.19427362120185773, "grad_norm": 0.9800222516059875, "learning_rate": 2.7926537772953562e-05, "loss": 0.1096, "step": 10970 }, { "epoch": 0.19429133073888616, "grad_norm": 0.755272388458252, "learning_rate": 2.79261012882358e-05, "loss": 0.0853, "step": 10971 }, { "epoch": 0.19430904027591459, "grad_norm": 1.104328989982605, "learning_rate": 2.7925664760992476e-05, "loss": 0.097, "step": 10972 }, { "epoch": 0.194326749812943, "grad_norm": 0.9018140435218811, "learning_rate": 2.7925228191225018e-05, "loss": 0.0953, "step": 10973 }, { "epoch": 0.19434445934997144, "grad_norm": 0.7154865264892578, "learning_rate": 2.7924791578934866e-05, "loss": 0.0951, "step": 10974 }, { "epoch": 0.19436216888699986, "grad_norm": 1.0571870803833008, "learning_rate": 2.792435492412346e-05, "loss": 0.1321, "step": 10975 }, { "epoch": 0.1943798784240283, "grad_norm": 0.7562663555145264, "learning_rate": 2.7923918226792233e-05, "loss": 0.0728, "step": 10976 }, { "epoch": 0.19439758796105672, "grad_norm": 0.989574670791626, "learning_rate": 2.7923481486942622e-05, "loss": 0.1077, "step": 10977 }, { "epoch": 0.19441529749808514, "grad_norm": 0.9465590119361877, "learning_rate": 2.7923044704576063e-05, "loss": 0.1013, "step": 10978 }, { "epoch": 0.1944330070351136, "grad_norm": 0.8629117012023926, "learning_rate": 2.7922607879693997e-05, "loss": 0.0927, "step": 10979 }, { "epoch": 0.19445071657214202, "grad_norm": 1.2173806428909302, "learning_rate": 2.7922171012297857e-05, "loss": 0.1426, "step": 10980 }, { "epoch": 0.19446842610917045, "grad_norm": 0.8238360285758972, "learning_rate": 2.792173410238908e-05, "loss": 0.1309, "step": 10981 }, { "epoch": 0.19448613564619888, "grad_norm": 0.8477230072021484, "learning_rate": 2.7921297149969103e-05, "loss": 0.1538, "step": 10982 }, { "epoch": 0.1945038451832273, "grad_norm": 1.0547703504562378, "learning_rate": 2.792086015503937e-05, "loss": 0.0926, "step": 10983 }, { "epoch": 0.19452155472025573, "grad_norm": 1.0072757005691528, "learning_rate": 2.792042311760131e-05, "loss": 0.1408, "step": 10984 }, { "epoch": 0.19453926425728416, "grad_norm": 0.803935170173645, "learning_rate": 2.791998603765637e-05, "loss": 0.1443, "step": 10985 }, { "epoch": 0.19455697379431258, "grad_norm": 1.2176638841629028, "learning_rate": 2.7919548915205974e-05, "loss": 0.0911, "step": 10986 }, { "epoch": 0.194574683331341, "grad_norm": 1.0817468166351318, "learning_rate": 2.7919111750251574e-05, "loss": 0.1202, "step": 10987 }, { "epoch": 0.19459239286836943, "grad_norm": 1.2601665258407593, "learning_rate": 2.7918674542794606e-05, "loss": 0.1461, "step": 10988 }, { "epoch": 0.19461010240539786, "grad_norm": 1.0068241357803345, "learning_rate": 2.7918237292836496e-05, "loss": 0.0936, "step": 10989 }, { "epoch": 0.1946278119424263, "grad_norm": 1.0048191547393799, "learning_rate": 2.7917800000378704e-05, "loss": 0.0746, "step": 10990 }, { "epoch": 0.19464552147945471, "grad_norm": 1.0462079048156738, "learning_rate": 2.791736266542265e-05, "loss": 0.0724, "step": 10991 }, { "epoch": 0.19466323101648314, "grad_norm": 0.9712231159210205, "learning_rate": 2.7916925287969778e-05, "loss": 0.0855, "step": 10992 }, { "epoch": 0.19468094055351157, "grad_norm": 0.9160686731338501, "learning_rate": 2.7916487868021533e-05, "loss": 0.1288, "step": 10993 }, { "epoch": 0.19469865009054002, "grad_norm": 0.6556528806686401, "learning_rate": 2.7916050405579343e-05, "loss": 0.1152, "step": 10994 }, { "epoch": 0.19471635962756845, "grad_norm": 1.0266270637512207, "learning_rate": 2.791561290064466e-05, "loss": 0.1172, "step": 10995 }, { "epoch": 0.19473406916459687, "grad_norm": 1.0783334970474243, "learning_rate": 2.7915175353218914e-05, "loss": 0.0925, "step": 10996 }, { "epoch": 0.1947517787016253, "grad_norm": 0.5682010054588318, "learning_rate": 2.7914737763303547e-05, "loss": 0.0782, "step": 10997 }, { "epoch": 0.19476948823865373, "grad_norm": 1.1289511919021606, "learning_rate": 2.7914300130899995e-05, "loss": 0.1075, "step": 10998 }, { "epoch": 0.19478719777568215, "grad_norm": 0.7260118722915649, "learning_rate": 2.7913862456009708e-05, "loss": 0.1, "step": 10999 }, { "epoch": 0.19480490731271058, "grad_norm": 0.8800624012947083, "learning_rate": 2.791342473863412e-05, "loss": 0.1418, "step": 11000 }, { "epoch": 0.194822616849739, "grad_norm": 0.6426596641540527, "learning_rate": 2.7912986978774665e-05, "loss": 0.1139, "step": 11001 }, { "epoch": 0.19484032638676743, "grad_norm": 0.8703128099441528, "learning_rate": 2.7912549176432794e-05, "loss": 0.1487, "step": 11002 }, { "epoch": 0.19485803592379586, "grad_norm": 0.9210977554321289, "learning_rate": 2.7912111331609943e-05, "loss": 0.115, "step": 11003 }, { "epoch": 0.19487574546082428, "grad_norm": 0.9543769359588623, "learning_rate": 2.791167344430755e-05, "loss": 0.1234, "step": 11004 }, { "epoch": 0.1948934549978527, "grad_norm": 1.1352903842926025, "learning_rate": 2.7911235514527054e-05, "loss": 0.1353, "step": 11005 }, { "epoch": 0.19491116453488114, "grad_norm": 1.3664246797561646, "learning_rate": 2.7910797542269902e-05, "loss": 0.1398, "step": 11006 }, { "epoch": 0.19492887407190956, "grad_norm": 0.8318536281585693, "learning_rate": 2.791035952753753e-05, "loss": 0.1087, "step": 11007 }, { "epoch": 0.194946583608938, "grad_norm": 1.0357716083526611, "learning_rate": 2.7909921470331384e-05, "loss": 0.1288, "step": 11008 }, { "epoch": 0.19496429314596644, "grad_norm": 1.1757218837738037, "learning_rate": 2.79094833706529e-05, "loss": 0.0886, "step": 11009 }, { "epoch": 0.19498200268299487, "grad_norm": 1.2878955602645874, "learning_rate": 2.7909045228503522e-05, "loss": 0.0878, "step": 11010 }, { "epoch": 0.1949997122200233, "grad_norm": 0.8408857583999634, "learning_rate": 2.7908607043884694e-05, "loss": 0.1147, "step": 11011 }, { "epoch": 0.19501742175705172, "grad_norm": 1.1144013404846191, "learning_rate": 2.7908168816797853e-05, "loss": 0.0998, "step": 11012 }, { "epoch": 0.19503513129408015, "grad_norm": 0.7174482345581055, "learning_rate": 2.7907730547244436e-05, "loss": 0.1158, "step": 11013 }, { "epoch": 0.19505284083110858, "grad_norm": 1.0435121059417725, "learning_rate": 2.7907292235225896e-05, "loss": 0.0784, "step": 11014 }, { "epoch": 0.195070550368137, "grad_norm": 0.8198110461235046, "learning_rate": 2.790685388074367e-05, "loss": 0.0931, "step": 11015 }, { "epoch": 0.19508825990516543, "grad_norm": 0.76866215467453, "learning_rate": 2.7906415483799204e-05, "loss": 0.1129, "step": 11016 }, { "epoch": 0.19510596944219386, "grad_norm": 1.6459333896636963, "learning_rate": 2.790597704439393e-05, "loss": 0.1318, "step": 11017 }, { "epoch": 0.19512367897922228, "grad_norm": 1.1074402332305908, "learning_rate": 2.7905538562529298e-05, "loss": 0.1064, "step": 11018 }, { "epoch": 0.1951413885162507, "grad_norm": 1.0190542936325073, "learning_rate": 2.790510003820675e-05, "loss": 0.1481, "step": 11019 }, { "epoch": 0.19515909805327913, "grad_norm": 0.767001211643219, "learning_rate": 2.790466147142773e-05, "loss": 0.1089, "step": 11020 }, { "epoch": 0.19517680759030756, "grad_norm": 0.8931148648262024, "learning_rate": 2.7904222862193678e-05, "loss": 0.1, "step": 11021 }, { "epoch": 0.195194517127336, "grad_norm": 0.7648594379425049, "learning_rate": 2.790378421050604e-05, "loss": 0.106, "step": 11022 }, { "epoch": 0.1952122266643644, "grad_norm": 1.0687025785446167, "learning_rate": 2.7903345516366258e-05, "loss": 0.1021, "step": 11023 }, { "epoch": 0.19522993620139287, "grad_norm": 0.5471607446670532, "learning_rate": 2.7902906779775766e-05, "loss": 0.1086, "step": 11024 }, { "epoch": 0.1952476457384213, "grad_norm": 0.9559116959571838, "learning_rate": 2.7902468000736023e-05, "loss": 0.0741, "step": 11025 }, { "epoch": 0.19526535527544972, "grad_norm": 0.7679923176765442, "learning_rate": 2.7902029179248463e-05, "loss": 0.0892, "step": 11026 }, { "epoch": 0.19528306481247815, "grad_norm": 0.9306771755218506, "learning_rate": 2.7901590315314533e-05, "loss": 0.1235, "step": 11027 }, { "epoch": 0.19530077434950657, "grad_norm": 0.8614455461502075, "learning_rate": 2.7901151408935675e-05, "loss": 0.0843, "step": 11028 }, { "epoch": 0.195318483886535, "grad_norm": 0.8205417990684509, "learning_rate": 2.790071246011333e-05, "loss": 0.0964, "step": 11029 }, { "epoch": 0.19533619342356343, "grad_norm": 0.6969673037528992, "learning_rate": 2.7900273468848953e-05, "loss": 0.1126, "step": 11030 }, { "epoch": 0.19535390296059185, "grad_norm": 1.0384495258331299, "learning_rate": 2.7899834435143976e-05, "loss": 0.0819, "step": 11031 }, { "epoch": 0.19537161249762028, "grad_norm": 1.4595876932144165, "learning_rate": 2.789939535899985e-05, "loss": 0.1824, "step": 11032 }, { "epoch": 0.1953893220346487, "grad_norm": 1.1723042726516724, "learning_rate": 2.789895624041802e-05, "loss": 0.1545, "step": 11033 }, { "epoch": 0.19540703157167713, "grad_norm": 0.9453449845314026, "learning_rate": 2.7898517079399927e-05, "loss": 0.1243, "step": 11034 }, { "epoch": 0.19542474110870556, "grad_norm": 0.5959196090698242, "learning_rate": 2.7898077875947017e-05, "loss": 0.0803, "step": 11035 }, { "epoch": 0.19544245064573398, "grad_norm": 1.5972670316696167, "learning_rate": 2.7897638630060734e-05, "loss": 0.0937, "step": 11036 }, { "epoch": 0.1954601601827624, "grad_norm": 0.7376150488853455, "learning_rate": 2.7897199341742526e-05, "loss": 0.0982, "step": 11037 }, { "epoch": 0.19547786971979084, "grad_norm": 0.9886593222618103, "learning_rate": 2.7896760010993838e-05, "loss": 0.0811, "step": 11038 }, { "epoch": 0.1954955792568193, "grad_norm": 0.6912877559661865, "learning_rate": 2.789632063781611e-05, "loss": 0.0835, "step": 11039 }, { "epoch": 0.19551328879384772, "grad_norm": 1.3516680002212524, "learning_rate": 2.7895881222210796e-05, "loss": 0.121, "step": 11040 }, { "epoch": 0.19553099833087614, "grad_norm": 0.7989287376403809, "learning_rate": 2.7895441764179338e-05, "loss": 0.1556, "step": 11041 }, { "epoch": 0.19554870786790457, "grad_norm": 0.8135287165641785, "learning_rate": 2.7895002263723175e-05, "loss": 0.1052, "step": 11042 }, { "epoch": 0.195566417404933, "grad_norm": 0.7852874994277954, "learning_rate": 2.7894562720843763e-05, "loss": 0.0977, "step": 11043 }, { "epoch": 0.19558412694196142, "grad_norm": 1.0302491188049316, "learning_rate": 2.789412313554254e-05, "loss": 0.0908, "step": 11044 }, { "epoch": 0.19560183647898985, "grad_norm": 1.0768545866012573, "learning_rate": 2.7893683507820962e-05, "loss": 0.1033, "step": 11045 }, { "epoch": 0.19561954601601828, "grad_norm": 1.0646522045135498, "learning_rate": 2.7893243837680467e-05, "loss": 0.142, "step": 11046 }, { "epoch": 0.1956372555530467, "grad_norm": 1.4778578281402588, "learning_rate": 2.7892804125122503e-05, "loss": 0.1402, "step": 11047 }, { "epoch": 0.19565496509007513, "grad_norm": 0.7335883378982544, "learning_rate": 2.7892364370148515e-05, "loss": 0.1319, "step": 11048 }, { "epoch": 0.19567267462710355, "grad_norm": 0.8691079616546631, "learning_rate": 2.7891924572759957e-05, "loss": 0.111, "step": 11049 }, { "epoch": 0.19569038416413198, "grad_norm": 0.6253191232681274, "learning_rate": 2.7891484732958265e-05, "loss": 0.1226, "step": 11050 }, { "epoch": 0.1957080937011604, "grad_norm": 0.9991952776908875, "learning_rate": 2.7891044850744898e-05, "loss": 0.1337, "step": 11051 }, { "epoch": 0.19572580323818883, "grad_norm": 1.0382646322250366, "learning_rate": 2.7890604926121296e-05, "loss": 0.117, "step": 11052 }, { "epoch": 0.1957435127752173, "grad_norm": 0.934893012046814, "learning_rate": 2.789016495908891e-05, "loss": 0.1269, "step": 11053 }, { "epoch": 0.19576122231224571, "grad_norm": 0.8504903316497803, "learning_rate": 2.7889724949649177e-05, "loss": 0.1004, "step": 11054 }, { "epoch": 0.19577893184927414, "grad_norm": 1.348906397819519, "learning_rate": 2.7889284897803557e-05, "loss": 0.1191, "step": 11055 }, { "epoch": 0.19579664138630257, "grad_norm": 0.8684676885604858, "learning_rate": 2.7888844803553493e-05, "loss": 0.1168, "step": 11056 }, { "epoch": 0.195814350923331, "grad_norm": 0.7321626543998718, "learning_rate": 2.7888404666900437e-05, "loss": 0.1188, "step": 11057 }, { "epoch": 0.19583206046035942, "grad_norm": 1.1784913539886475, "learning_rate": 2.788796448784583e-05, "loss": 0.1364, "step": 11058 }, { "epoch": 0.19584976999738785, "grad_norm": 0.8239169716835022, "learning_rate": 2.7887524266391123e-05, "loss": 0.1048, "step": 11059 }, { "epoch": 0.19586747953441627, "grad_norm": 0.7261561155319214, "learning_rate": 2.7887084002537767e-05, "loss": 0.0898, "step": 11060 }, { "epoch": 0.1958851890714447, "grad_norm": 0.7756428122520447, "learning_rate": 2.7886643696287208e-05, "loss": 0.0765, "step": 11061 }, { "epoch": 0.19590289860847312, "grad_norm": 1.2437342405319214, "learning_rate": 2.7886203347640894e-05, "loss": 0.1107, "step": 11062 }, { "epoch": 0.19592060814550155, "grad_norm": 0.9586135149002075, "learning_rate": 2.788576295660027e-05, "loss": 0.0937, "step": 11063 }, { "epoch": 0.19593831768252998, "grad_norm": 1.0125237703323364, "learning_rate": 2.7885322523166795e-05, "loss": 0.1142, "step": 11064 }, { "epoch": 0.1959560272195584, "grad_norm": 0.9559198021888733, "learning_rate": 2.7884882047341907e-05, "loss": 0.1149, "step": 11065 }, { "epoch": 0.19597373675658683, "grad_norm": 0.8740067481994629, "learning_rate": 2.7884441529127065e-05, "loss": 0.1018, "step": 11066 }, { "epoch": 0.19599144629361526, "grad_norm": 1.1399836540222168, "learning_rate": 2.788400096852371e-05, "loss": 0.073, "step": 11067 }, { "epoch": 0.1960091558306437, "grad_norm": 1.5486819744110107, "learning_rate": 2.7883560365533296e-05, "loss": 0.1413, "step": 11068 }, { "epoch": 0.19602686536767214, "grad_norm": 1.056327223777771, "learning_rate": 2.788311972015727e-05, "loss": 0.0953, "step": 11069 }, { "epoch": 0.19604457490470056, "grad_norm": 1.411045789718628, "learning_rate": 2.7882679032397085e-05, "loss": 0.0759, "step": 11070 }, { "epoch": 0.196062284441729, "grad_norm": 0.6578629612922668, "learning_rate": 2.788223830225419e-05, "loss": 0.1091, "step": 11071 }, { "epoch": 0.19607999397875742, "grad_norm": 1.603368878364563, "learning_rate": 2.7881797529730034e-05, "loss": 0.1823, "step": 11072 }, { "epoch": 0.19609770351578584, "grad_norm": 1.265318751335144, "learning_rate": 2.7881356714826066e-05, "loss": 0.1337, "step": 11073 }, { "epoch": 0.19611541305281427, "grad_norm": 1.2053735256195068, "learning_rate": 2.788091585754374e-05, "loss": 0.1135, "step": 11074 }, { "epoch": 0.1961331225898427, "grad_norm": 1.2535552978515625, "learning_rate": 2.78804749578845e-05, "loss": 0.1006, "step": 11075 }, { "epoch": 0.19615083212687112, "grad_norm": 0.9039390683174133, "learning_rate": 2.7880034015849803e-05, "loss": 0.1423, "step": 11076 }, { "epoch": 0.19616854166389955, "grad_norm": 1.404271125793457, "learning_rate": 2.787959303144109e-05, "loss": 0.1317, "step": 11077 }, { "epoch": 0.19618625120092797, "grad_norm": 1.3496904373168945, "learning_rate": 2.7879152004659827e-05, "loss": 0.1038, "step": 11078 }, { "epoch": 0.1962039607379564, "grad_norm": 0.6596718430519104, "learning_rate": 2.787871093550745e-05, "loss": 0.0659, "step": 11079 }, { "epoch": 0.19622167027498483, "grad_norm": 0.8280007839202881, "learning_rate": 2.7878269823985424e-05, "loss": 0.1019, "step": 11080 }, { "epoch": 0.19623937981201325, "grad_norm": 1.0345873832702637, "learning_rate": 2.7877828670095187e-05, "loss": 0.0956, "step": 11081 }, { "epoch": 0.19625708934904168, "grad_norm": 1.2952560186386108, "learning_rate": 2.78773874738382e-05, "loss": 0.085, "step": 11082 }, { "epoch": 0.19627479888607013, "grad_norm": 1.1322518587112427, "learning_rate": 2.787694623521591e-05, "loss": 0.0992, "step": 11083 }, { "epoch": 0.19629250842309856, "grad_norm": 0.9797077775001526, "learning_rate": 2.7876504954229765e-05, "loss": 0.1117, "step": 11084 }, { "epoch": 0.196310217960127, "grad_norm": 1.253428339958191, "learning_rate": 2.7876063630881226e-05, "loss": 0.081, "step": 11085 }, { "epoch": 0.1963279274971554, "grad_norm": 0.877738356590271, "learning_rate": 2.7875622265171736e-05, "loss": 0.1097, "step": 11086 }, { "epoch": 0.19634563703418384, "grad_norm": 0.6175910830497742, "learning_rate": 2.7875180857102752e-05, "loss": 0.0743, "step": 11087 }, { "epoch": 0.19636334657121227, "grad_norm": 1.0683494806289673, "learning_rate": 2.787473940667573e-05, "loss": 0.0981, "step": 11088 }, { "epoch": 0.1963810561082407, "grad_norm": 0.7046083211898804, "learning_rate": 2.787429791389211e-05, "loss": 0.094, "step": 11089 }, { "epoch": 0.19639876564526912, "grad_norm": 0.6354217529296875, "learning_rate": 2.7873856378753358e-05, "loss": 0.1206, "step": 11090 }, { "epoch": 0.19641647518229755, "grad_norm": 0.8799327611923218, "learning_rate": 2.7873414801260918e-05, "loss": 0.1472, "step": 11091 }, { "epoch": 0.19643418471932597, "grad_norm": 0.7702499032020569, "learning_rate": 2.7872973181416244e-05, "loss": 0.1074, "step": 11092 }, { "epoch": 0.1964518942563544, "grad_norm": 1.1323435306549072, "learning_rate": 2.787253151922079e-05, "loss": 0.1152, "step": 11093 }, { "epoch": 0.19646960379338282, "grad_norm": 1.6279959678649902, "learning_rate": 2.7872089814676014e-05, "loss": 0.1485, "step": 11094 }, { "epoch": 0.19648731333041125, "grad_norm": 1.0078332424163818, "learning_rate": 2.787164806778336e-05, "loss": 0.1139, "step": 11095 }, { "epoch": 0.19650502286743968, "grad_norm": 0.7785099148750305, "learning_rate": 2.7871206278544293e-05, "loss": 0.1068, "step": 11096 }, { "epoch": 0.1965227324044681, "grad_norm": 1.0532455444335938, "learning_rate": 2.787076444696025e-05, "loss": 0.1489, "step": 11097 }, { "epoch": 0.19654044194149656, "grad_norm": 1.1209781169891357, "learning_rate": 2.78703225730327e-05, "loss": 0.1509, "step": 11098 }, { "epoch": 0.19655815147852498, "grad_norm": 0.725102424621582, "learning_rate": 2.7869880656763087e-05, "loss": 0.0942, "step": 11099 }, { "epoch": 0.1965758610155534, "grad_norm": 0.8943255543708801, "learning_rate": 2.7869438698152868e-05, "loss": 0.1283, "step": 11100 }, { "epoch": 0.19659357055258184, "grad_norm": 0.6544119119644165, "learning_rate": 2.7868996697203498e-05, "loss": 0.1223, "step": 11101 }, { "epoch": 0.19661128008961026, "grad_norm": 0.8931328654289246, "learning_rate": 2.7868554653916432e-05, "loss": 0.1172, "step": 11102 }, { "epoch": 0.1966289896266387, "grad_norm": 0.912817120552063, "learning_rate": 2.786811256829312e-05, "loss": 0.0647, "step": 11103 }, { "epoch": 0.19664669916366712, "grad_norm": 0.972843587398529, "learning_rate": 2.7867670440335023e-05, "loss": 0.1102, "step": 11104 }, { "epoch": 0.19666440870069554, "grad_norm": 0.8913133144378662, "learning_rate": 2.786722827004359e-05, "loss": 0.1355, "step": 11105 }, { "epoch": 0.19668211823772397, "grad_norm": 0.8917816281318665, "learning_rate": 2.7866786057420276e-05, "loss": 0.0895, "step": 11106 }, { "epoch": 0.1966998277747524, "grad_norm": 1.5148729085922241, "learning_rate": 2.786634380246654e-05, "loss": 0.1244, "step": 11107 }, { "epoch": 0.19671753731178082, "grad_norm": 1.0308541059494019, "learning_rate": 2.7865901505183833e-05, "loss": 0.1213, "step": 11108 }, { "epoch": 0.19673524684880925, "grad_norm": 0.7814550399780273, "learning_rate": 2.7865459165573614e-05, "loss": 0.0753, "step": 11109 }, { "epoch": 0.19675295638583767, "grad_norm": 0.9000691175460815, "learning_rate": 2.786501678363733e-05, "loss": 0.133, "step": 11110 }, { "epoch": 0.1967706659228661, "grad_norm": 1.0230398178100586, "learning_rate": 2.7864574359376445e-05, "loss": 0.0808, "step": 11111 }, { "epoch": 0.19678837545989453, "grad_norm": 0.577127993106842, "learning_rate": 2.786413189279241e-05, "loss": 0.1281, "step": 11112 }, { "epoch": 0.19680608499692298, "grad_norm": 1.0858675241470337, "learning_rate": 2.7863689383886684e-05, "loss": 0.0821, "step": 11113 }, { "epoch": 0.1968237945339514, "grad_norm": 1.4603053331375122, "learning_rate": 2.786324683266072e-05, "loss": 0.0984, "step": 11114 }, { "epoch": 0.19684150407097983, "grad_norm": 0.6002959609031677, "learning_rate": 2.7862804239115976e-05, "loss": 0.1083, "step": 11115 }, { "epoch": 0.19685921360800826, "grad_norm": 0.8182567358016968, "learning_rate": 2.786236160325391e-05, "loss": 0.0938, "step": 11116 }, { "epoch": 0.1968769231450367, "grad_norm": 1.1110795736312866, "learning_rate": 2.786191892507597e-05, "loss": 0.1262, "step": 11117 }, { "epoch": 0.1968946326820651, "grad_norm": 0.9722668528556824, "learning_rate": 2.786147620458362e-05, "loss": 0.1088, "step": 11118 }, { "epoch": 0.19691234221909354, "grad_norm": 1.175146460533142, "learning_rate": 2.7861033441778314e-05, "loss": 0.1501, "step": 11119 }, { "epoch": 0.19693005175612197, "grad_norm": 0.6388826370239258, "learning_rate": 2.7860590636661508e-05, "loss": 0.097, "step": 11120 }, { "epoch": 0.1969477612931504, "grad_norm": 1.1469776630401611, "learning_rate": 2.786014778923466e-05, "loss": 0.1365, "step": 11121 }, { "epoch": 0.19696547083017882, "grad_norm": 1.7429240942001343, "learning_rate": 2.7859704899499226e-05, "loss": 0.1051, "step": 11122 }, { "epoch": 0.19698318036720724, "grad_norm": 0.878735363483429, "learning_rate": 2.7859261967456664e-05, "loss": 0.1143, "step": 11123 }, { "epoch": 0.19700088990423567, "grad_norm": 1.4841668605804443, "learning_rate": 2.7858818993108432e-05, "loss": 0.1455, "step": 11124 }, { "epoch": 0.1970185994412641, "grad_norm": 1.1724200248718262, "learning_rate": 2.7858375976455986e-05, "loss": 0.1255, "step": 11125 }, { "epoch": 0.19703630897829252, "grad_norm": 0.9026124477386475, "learning_rate": 2.785793291750078e-05, "loss": 0.0944, "step": 11126 }, { "epoch": 0.19705401851532095, "grad_norm": 1.8860419988632202, "learning_rate": 2.7857489816244274e-05, "loss": 0.0898, "step": 11127 }, { "epoch": 0.1970717280523494, "grad_norm": 0.8995334506034851, "learning_rate": 2.7857046672687936e-05, "loss": 0.103, "step": 11128 }, { "epoch": 0.19708943758937783, "grad_norm": 0.7837330102920532, "learning_rate": 2.785660348683321e-05, "loss": 0.096, "step": 11129 }, { "epoch": 0.19710714712640626, "grad_norm": 1.3231700658798218, "learning_rate": 2.7856160258681555e-05, "loss": 0.1172, "step": 11130 }, { "epoch": 0.19712485666343468, "grad_norm": 0.9187365174293518, "learning_rate": 2.7855716988234436e-05, "loss": 0.1055, "step": 11131 }, { "epoch": 0.1971425662004631, "grad_norm": 1.3617496490478516, "learning_rate": 2.7855273675493312e-05, "loss": 0.1067, "step": 11132 }, { "epoch": 0.19716027573749154, "grad_norm": 1.0304481983184814, "learning_rate": 2.7854830320459634e-05, "loss": 0.1158, "step": 11133 }, { "epoch": 0.19717798527451996, "grad_norm": 1.000365138053894, "learning_rate": 2.7854386923134863e-05, "loss": 0.116, "step": 11134 }, { "epoch": 0.1971956948115484, "grad_norm": 0.9152172207832336, "learning_rate": 2.7853943483520462e-05, "loss": 0.1145, "step": 11135 }, { "epoch": 0.19721340434857682, "grad_norm": 0.9072765707969666, "learning_rate": 2.7853500001617887e-05, "loss": 0.1206, "step": 11136 }, { "epoch": 0.19723111388560524, "grad_norm": 0.5690075159072876, "learning_rate": 2.78530564774286e-05, "loss": 0.13, "step": 11137 }, { "epoch": 0.19724882342263367, "grad_norm": 0.48500052094459534, "learning_rate": 2.785261291095405e-05, "loss": 0.1175, "step": 11138 }, { "epoch": 0.1972665329596621, "grad_norm": 0.8074181079864502, "learning_rate": 2.7852169302195707e-05, "loss": 0.0957, "step": 11139 }, { "epoch": 0.19728424249669052, "grad_norm": 0.9227724671363831, "learning_rate": 2.7851725651155028e-05, "loss": 0.1167, "step": 11140 }, { "epoch": 0.19730195203371895, "grad_norm": 0.9509746432304382, "learning_rate": 2.785128195783347e-05, "loss": 0.0769, "step": 11141 }, { "epoch": 0.19731966157074737, "grad_norm": 0.9819214344024658, "learning_rate": 2.7850838222232495e-05, "loss": 0.1138, "step": 11142 }, { "epoch": 0.19733737110777583, "grad_norm": 0.8707996010780334, "learning_rate": 2.785039444435356e-05, "loss": 0.1556, "step": 11143 }, { "epoch": 0.19735508064480425, "grad_norm": 0.9874244928359985, "learning_rate": 2.784995062419813e-05, "loss": 0.0829, "step": 11144 }, { "epoch": 0.19737279018183268, "grad_norm": 0.9687397480010986, "learning_rate": 2.784950676176766e-05, "loss": 0.1185, "step": 11145 }, { "epoch": 0.1973904997188611, "grad_norm": 1.2047585248947144, "learning_rate": 2.7849062857063613e-05, "loss": 0.0853, "step": 11146 }, { "epoch": 0.19740820925588953, "grad_norm": 0.9508150815963745, "learning_rate": 2.784861891008745e-05, "loss": 0.113, "step": 11147 }, { "epoch": 0.19742591879291796, "grad_norm": 1.524436354637146, "learning_rate": 2.784817492084063e-05, "loss": 0.1525, "step": 11148 }, { "epoch": 0.19744362832994639, "grad_norm": 0.8785485625267029, "learning_rate": 2.7847730889324614e-05, "loss": 0.1575, "step": 11149 }, { "epoch": 0.1974613378669748, "grad_norm": 0.6176673769950867, "learning_rate": 2.7847286815540864e-05, "loss": 0.0838, "step": 11150 }, { "epoch": 0.19747904740400324, "grad_norm": 0.8799505233764648, "learning_rate": 2.7846842699490833e-05, "loss": 0.0971, "step": 11151 }, { "epoch": 0.19749675694103166, "grad_norm": 0.9110878109931946, "learning_rate": 2.7846398541176e-05, "loss": 0.0896, "step": 11152 }, { "epoch": 0.1975144664780601, "grad_norm": 0.7893260717391968, "learning_rate": 2.7845954340597804e-05, "loss": 0.1219, "step": 11153 }, { "epoch": 0.19753217601508852, "grad_norm": 0.7308303117752075, "learning_rate": 2.7845510097757726e-05, "loss": 0.1277, "step": 11154 }, { "epoch": 0.19754988555211694, "grad_norm": 0.8557037711143494, "learning_rate": 2.7845065812657216e-05, "loss": 0.1349, "step": 11155 }, { "epoch": 0.19756759508914537, "grad_norm": 0.6938474178314209, "learning_rate": 2.7844621485297737e-05, "loss": 0.113, "step": 11156 }, { "epoch": 0.1975853046261738, "grad_norm": 0.990278422832489, "learning_rate": 2.7844177115680754e-05, "loss": 0.1271, "step": 11157 }, { "epoch": 0.19760301416320225, "grad_norm": 1.2572619915008545, "learning_rate": 2.7843732703807726e-05, "loss": 0.1467, "step": 11158 }, { "epoch": 0.19762072370023068, "grad_norm": 1.106437087059021, "learning_rate": 2.7843288249680117e-05, "loss": 0.0797, "step": 11159 }, { "epoch": 0.1976384332372591, "grad_norm": 1.046120285987854, "learning_rate": 2.784284375329939e-05, "loss": 0.1379, "step": 11160 }, { "epoch": 0.19765614277428753, "grad_norm": 1.095776915550232, "learning_rate": 2.7842399214667006e-05, "loss": 0.1266, "step": 11161 }, { "epoch": 0.19767385231131596, "grad_norm": 1.0993659496307373, "learning_rate": 2.7841954633784427e-05, "loss": 0.1001, "step": 11162 }, { "epoch": 0.19769156184834438, "grad_norm": 1.5408005714416504, "learning_rate": 2.7841510010653115e-05, "loss": 0.1615, "step": 11163 }, { "epoch": 0.1977092713853728, "grad_norm": 1.0534268617630005, "learning_rate": 2.7841065345274537e-05, "loss": 0.1011, "step": 11164 }, { "epoch": 0.19772698092240124, "grad_norm": 0.6817558407783508, "learning_rate": 2.7840620637650147e-05, "loss": 0.1195, "step": 11165 }, { "epoch": 0.19774469045942966, "grad_norm": 1.1634459495544434, "learning_rate": 2.784017588778142e-05, "loss": 0.061, "step": 11166 }, { "epoch": 0.1977623999964581, "grad_norm": 0.804690420627594, "learning_rate": 2.783973109566981e-05, "loss": 0.1147, "step": 11167 }, { "epoch": 0.19778010953348651, "grad_norm": 1.045606255531311, "learning_rate": 2.7839286261316784e-05, "loss": 0.1095, "step": 11168 }, { "epoch": 0.19779781907051494, "grad_norm": 1.1228611469268799, "learning_rate": 2.7838841384723805e-05, "loss": 0.1603, "step": 11169 }, { "epoch": 0.19781552860754337, "grad_norm": 1.228344202041626, "learning_rate": 2.7838396465892336e-05, "loss": 0.0897, "step": 11170 }, { "epoch": 0.1978332381445718, "grad_norm": 0.9587256908416748, "learning_rate": 2.783795150482384e-05, "loss": 0.1554, "step": 11171 }, { "epoch": 0.19785094768160022, "grad_norm": 0.795281171798706, "learning_rate": 2.7837506501519784e-05, "loss": 0.1071, "step": 11172 }, { "epoch": 0.19786865721862867, "grad_norm": 1.0417510271072388, "learning_rate": 2.783706145598163e-05, "loss": 0.0785, "step": 11173 }, { "epoch": 0.1978863667556571, "grad_norm": 0.5547196269035339, "learning_rate": 2.7836616368210844e-05, "loss": 0.1002, "step": 11174 }, { "epoch": 0.19790407629268553, "grad_norm": 1.0713690519332886, "learning_rate": 2.7836171238208884e-05, "loss": 0.0968, "step": 11175 }, { "epoch": 0.19792178582971395, "grad_norm": 0.9824205636978149, "learning_rate": 2.7835726065977222e-05, "loss": 0.0741, "step": 11176 }, { "epoch": 0.19793949536674238, "grad_norm": 0.6055492758750916, "learning_rate": 2.783528085151732e-05, "loss": 0.0949, "step": 11177 }, { "epoch": 0.1979572049037708, "grad_norm": 0.6533147692680359, "learning_rate": 2.783483559483064e-05, "loss": 0.1012, "step": 11178 }, { "epoch": 0.19797491444079923, "grad_norm": 1.0525199174880981, "learning_rate": 2.783439029591865e-05, "loss": 0.1257, "step": 11179 }, { "epoch": 0.19799262397782766, "grad_norm": 1.435013771057129, "learning_rate": 2.783394495478281e-05, "loss": 0.1104, "step": 11180 }, { "epoch": 0.19801033351485609, "grad_norm": 0.9468785524368286, "learning_rate": 2.7833499571424595e-05, "loss": 0.1318, "step": 11181 }, { "epoch": 0.1980280430518845, "grad_norm": 0.9807043075561523, "learning_rate": 2.783305414584546e-05, "loss": 0.0847, "step": 11182 }, { "epoch": 0.19804575258891294, "grad_norm": 1.1290500164031982, "learning_rate": 2.7832608678046883e-05, "loss": 0.0876, "step": 11183 }, { "epoch": 0.19806346212594136, "grad_norm": 0.9375523328781128, "learning_rate": 2.7832163168030313e-05, "loss": 0.1302, "step": 11184 }, { "epoch": 0.1980811716629698, "grad_norm": 0.9632869958877563, "learning_rate": 2.7831717615797225e-05, "loss": 0.1202, "step": 11185 }, { "epoch": 0.19809888119999822, "grad_norm": 0.6530216932296753, "learning_rate": 2.7831272021349085e-05, "loss": 0.092, "step": 11186 }, { "epoch": 0.19811659073702667, "grad_norm": 0.46431368589401245, "learning_rate": 2.783082638468736e-05, "loss": 0.096, "step": 11187 }, { "epoch": 0.1981343002740551, "grad_norm": 1.0845589637756348, "learning_rate": 2.7830380705813513e-05, "loss": 0.1269, "step": 11188 }, { "epoch": 0.19815200981108352, "grad_norm": 0.9132553935050964, "learning_rate": 2.7829934984729014e-05, "loss": 0.0978, "step": 11189 }, { "epoch": 0.19816971934811195, "grad_norm": 0.849711000919342, "learning_rate": 2.7829489221435322e-05, "loss": 0.0943, "step": 11190 }, { "epoch": 0.19818742888514038, "grad_norm": 1.067179799079895, "learning_rate": 2.7829043415933908e-05, "loss": 0.1441, "step": 11191 }, { "epoch": 0.1982051384221688, "grad_norm": 0.7017533779144287, "learning_rate": 2.782859756822624e-05, "loss": 0.0923, "step": 11192 }, { "epoch": 0.19822284795919723, "grad_norm": 0.8202859163284302, "learning_rate": 2.782815167831378e-05, "loss": 0.126, "step": 11193 }, { "epoch": 0.19824055749622566, "grad_norm": 0.8600384593009949, "learning_rate": 2.7827705746198007e-05, "loss": 0.1535, "step": 11194 }, { "epoch": 0.19825826703325408, "grad_norm": 0.8604657053947449, "learning_rate": 2.7827259771880374e-05, "loss": 0.1221, "step": 11195 }, { "epoch": 0.1982759765702825, "grad_norm": 1.1519666910171509, "learning_rate": 2.7826813755362354e-05, "loss": 0.1021, "step": 11196 }, { "epoch": 0.19829368610731093, "grad_norm": 0.9010130763053894, "learning_rate": 2.7826367696645413e-05, "loss": 0.1004, "step": 11197 }, { "epoch": 0.19831139564433936, "grad_norm": 2.59944748878479, "learning_rate": 2.7825921595731023e-05, "loss": 0.103, "step": 11198 }, { "epoch": 0.1983291051813678, "grad_norm": 0.7140133380889893, "learning_rate": 2.782547545262065e-05, "loss": 0.1323, "step": 11199 }, { "epoch": 0.1983468147183962, "grad_norm": 0.8101217746734619, "learning_rate": 2.7825029267315753e-05, "loss": 0.119, "step": 11200 }, { "epoch": 0.19836452425542464, "grad_norm": 0.907443106174469, "learning_rate": 2.782458303981781e-05, "loss": 0.0678, "step": 11201 }, { "epoch": 0.1983822337924531, "grad_norm": 0.8270094394683838, "learning_rate": 2.7824136770128283e-05, "loss": 0.1086, "step": 11202 }, { "epoch": 0.19839994332948152, "grad_norm": 0.900661826133728, "learning_rate": 2.7823690458248648e-05, "loss": 0.1096, "step": 11203 }, { "epoch": 0.19841765286650995, "grad_norm": 0.958085834980011, "learning_rate": 2.7823244104180365e-05, "loss": 0.1064, "step": 11204 }, { "epoch": 0.19843536240353837, "grad_norm": 0.8517810106277466, "learning_rate": 2.782279770792491e-05, "loss": 0.0756, "step": 11205 }, { "epoch": 0.1984530719405668, "grad_norm": 0.8720978498458862, "learning_rate": 2.7822351269483745e-05, "loss": 0.1042, "step": 11206 }, { "epoch": 0.19847078147759523, "grad_norm": 0.8634043335914612, "learning_rate": 2.7821904788858345e-05, "loss": 0.0938, "step": 11207 }, { "epoch": 0.19848849101462365, "grad_norm": 0.7769199013710022, "learning_rate": 2.782145826605017e-05, "loss": 0.138, "step": 11208 }, { "epoch": 0.19850620055165208, "grad_norm": 0.7002426385879517, "learning_rate": 2.7821011701060693e-05, "loss": 0.106, "step": 11209 }, { "epoch": 0.1985239100886805, "grad_norm": 0.9086418747901917, "learning_rate": 2.7820565093891387e-05, "loss": 0.0968, "step": 11210 }, { "epoch": 0.19854161962570893, "grad_norm": 0.8282265663146973, "learning_rate": 2.782011844454372e-05, "loss": 0.0759, "step": 11211 }, { "epoch": 0.19855932916273736, "grad_norm": 0.9090074300765991, "learning_rate": 2.781967175301916e-05, "loss": 0.1105, "step": 11212 }, { "epoch": 0.19857703869976578, "grad_norm": 1.104360580444336, "learning_rate": 2.7819225019319173e-05, "loss": 0.1232, "step": 11213 }, { "epoch": 0.1985947482367942, "grad_norm": 0.7979938983917236, "learning_rate": 2.7818778243445233e-05, "loss": 0.111, "step": 11214 }, { "epoch": 0.19861245777382264, "grad_norm": 1.3717515468597412, "learning_rate": 2.781833142539881e-05, "loss": 0.1315, "step": 11215 }, { "epoch": 0.19863016731085106, "grad_norm": 2.8509719371795654, "learning_rate": 2.781788456518137e-05, "loss": 0.1174, "step": 11216 }, { "epoch": 0.19864787684787952, "grad_norm": 0.6436930894851685, "learning_rate": 2.7817437662794394e-05, "loss": 0.122, "step": 11217 }, { "epoch": 0.19866558638490794, "grad_norm": 1.4743467569351196, "learning_rate": 2.781699071823934e-05, "loss": 0.1161, "step": 11218 }, { "epoch": 0.19868329592193637, "grad_norm": 0.8489155769348145, "learning_rate": 2.7816543731517682e-05, "loss": 0.0784, "step": 11219 }, { "epoch": 0.1987010054589648, "grad_norm": 0.8640199303627014, "learning_rate": 2.781609670263089e-05, "loss": 0.1059, "step": 11220 }, { "epoch": 0.19871871499599322, "grad_norm": 0.7758638262748718, "learning_rate": 2.781564963158044e-05, "loss": 0.1201, "step": 11221 }, { "epoch": 0.19873642453302165, "grad_norm": 1.308298110961914, "learning_rate": 2.78152025183678e-05, "loss": 0.1177, "step": 11222 }, { "epoch": 0.19875413407005008, "grad_norm": 0.6290401220321655, "learning_rate": 2.7814755362994435e-05, "loss": 0.1106, "step": 11223 }, { "epoch": 0.1987718436070785, "grad_norm": 1.213020920753479, "learning_rate": 2.7814308165461822e-05, "loss": 0.0978, "step": 11224 }, { "epoch": 0.19878955314410693, "grad_norm": 0.7831798791885376, "learning_rate": 2.7813860925771435e-05, "loss": 0.1131, "step": 11225 }, { "epoch": 0.19880726268113535, "grad_norm": 0.9675009846687317, "learning_rate": 2.781341364392474e-05, "loss": 0.1187, "step": 11226 }, { "epoch": 0.19882497221816378, "grad_norm": 1.092820644378662, "learning_rate": 2.781296631992321e-05, "loss": 0.1161, "step": 11227 }, { "epoch": 0.1988426817551922, "grad_norm": 1.2715784311294556, "learning_rate": 2.7812518953768312e-05, "loss": 0.1084, "step": 11228 }, { "epoch": 0.19886039129222063, "grad_norm": 0.750530481338501, "learning_rate": 2.7812071545461532e-05, "loss": 0.1066, "step": 11229 }, { "epoch": 0.19887810082924906, "grad_norm": 1.1973539590835571, "learning_rate": 2.7811624095004325e-05, "loss": 0.1492, "step": 11230 }, { "epoch": 0.1988958103662775, "grad_norm": 0.8125539422035217, "learning_rate": 2.7811176602398174e-05, "loss": 0.1601, "step": 11231 }, { "epoch": 0.19891351990330594, "grad_norm": 0.7843680381774902, "learning_rate": 2.7810729067644546e-05, "loss": 0.0873, "step": 11232 }, { "epoch": 0.19893122944033437, "grad_norm": 1.065706491470337, "learning_rate": 2.7810281490744915e-05, "loss": 0.1464, "step": 11233 }, { "epoch": 0.1989489389773628, "grad_norm": 1.222859263420105, "learning_rate": 2.7809833871700752e-05, "loss": 0.1334, "step": 11234 }, { "epoch": 0.19896664851439122, "grad_norm": 1.5441958904266357, "learning_rate": 2.7809386210513532e-05, "loss": 0.0949, "step": 11235 }, { "epoch": 0.19898435805141965, "grad_norm": 0.7045955061912537, "learning_rate": 2.780893850718473e-05, "loss": 0.1231, "step": 11236 }, { "epoch": 0.19900206758844807, "grad_norm": 1.1432647705078125, "learning_rate": 2.7808490761715814e-05, "loss": 0.109, "step": 11237 }, { "epoch": 0.1990197771254765, "grad_norm": 0.7491928339004517, "learning_rate": 2.7808042974108262e-05, "loss": 0.0696, "step": 11238 }, { "epoch": 0.19903748666250493, "grad_norm": 1.0551296472549438, "learning_rate": 2.7807595144363536e-05, "loss": 0.0998, "step": 11239 }, { "epoch": 0.19905519619953335, "grad_norm": 1.1721675395965576, "learning_rate": 2.7807147272483125e-05, "loss": 0.1235, "step": 11240 }, { "epoch": 0.19907290573656178, "grad_norm": 0.770560622215271, "learning_rate": 2.7806699358468494e-05, "loss": 0.1439, "step": 11241 }, { "epoch": 0.1990906152735902, "grad_norm": 0.9843904376029968, "learning_rate": 2.7806251402321115e-05, "loss": 0.1098, "step": 11242 }, { "epoch": 0.19910832481061863, "grad_norm": 0.7826224565505981, "learning_rate": 2.7805803404042463e-05, "loss": 0.119, "step": 11243 }, { "epoch": 0.19912603434764706, "grad_norm": 1.773554801940918, "learning_rate": 2.7805355363634016e-05, "loss": 0.0653, "step": 11244 }, { "epoch": 0.19914374388467548, "grad_norm": 1.5197577476501465, "learning_rate": 2.780490728109724e-05, "loss": 0.1696, "step": 11245 }, { "epoch": 0.1991614534217039, "grad_norm": 1.316150188446045, "learning_rate": 2.780445915643362e-05, "loss": 0.1381, "step": 11246 }, { "epoch": 0.19917916295873236, "grad_norm": 0.683691680431366, "learning_rate": 2.7804010989644623e-05, "loss": 0.0901, "step": 11247 }, { "epoch": 0.1991968724957608, "grad_norm": 1.313034176826477, "learning_rate": 2.7803562780731727e-05, "loss": 0.1601, "step": 11248 }, { "epoch": 0.19921458203278922, "grad_norm": 1.4307630062103271, "learning_rate": 2.78031145296964e-05, "loss": 0.0868, "step": 11249 }, { "epoch": 0.19923229156981764, "grad_norm": 1.0562214851379395, "learning_rate": 2.780266623654012e-05, "loss": 0.1127, "step": 11250 }, { "epoch": 0.19925000110684607, "grad_norm": 1.0308817625045776, "learning_rate": 2.780221790126437e-05, "loss": 0.0976, "step": 11251 }, { "epoch": 0.1992677106438745, "grad_norm": 1.3689320087432861, "learning_rate": 2.7801769523870614e-05, "loss": 0.1666, "step": 11252 }, { "epoch": 0.19928542018090292, "grad_norm": 1.0760577917099, "learning_rate": 2.7801321104360332e-05, "loss": 0.1238, "step": 11253 }, { "epoch": 0.19930312971793135, "grad_norm": 0.6883609294891357, "learning_rate": 2.7800872642734997e-05, "loss": 0.091, "step": 11254 }, { "epoch": 0.19932083925495978, "grad_norm": 0.8242987394332886, "learning_rate": 2.7800424138996085e-05, "loss": 0.1101, "step": 11255 }, { "epoch": 0.1993385487919882, "grad_norm": 1.5297831296920776, "learning_rate": 2.7799975593145074e-05, "loss": 0.1058, "step": 11256 }, { "epoch": 0.19935625832901663, "grad_norm": 0.7075707316398621, "learning_rate": 2.7799527005183436e-05, "loss": 0.0822, "step": 11257 }, { "epoch": 0.19937396786604505, "grad_norm": 1.3956618309020996, "learning_rate": 2.779907837511265e-05, "loss": 0.136, "step": 11258 }, { "epoch": 0.19939167740307348, "grad_norm": 2.1170294284820557, "learning_rate": 2.7798629702934192e-05, "loss": 0.0728, "step": 11259 }, { "epoch": 0.1994093869401019, "grad_norm": 0.8550407290458679, "learning_rate": 2.7798180988649537e-05, "loss": 0.1233, "step": 11260 }, { "epoch": 0.19942709647713033, "grad_norm": 0.9084757566452026, "learning_rate": 2.7797732232260155e-05, "loss": 0.1314, "step": 11261 }, { "epoch": 0.1994448060141588, "grad_norm": 0.9514579772949219, "learning_rate": 2.7797283433767537e-05, "loss": 0.0915, "step": 11262 }, { "epoch": 0.19946251555118721, "grad_norm": 1.1349388360977173, "learning_rate": 2.779683459317315e-05, "loss": 0.1071, "step": 11263 }, { "epoch": 0.19948022508821564, "grad_norm": 1.4180878400802612, "learning_rate": 2.7796385710478466e-05, "loss": 0.1195, "step": 11264 }, { "epoch": 0.19949793462524407, "grad_norm": 0.8373978137969971, "learning_rate": 2.7795936785684968e-05, "loss": 0.124, "step": 11265 }, { "epoch": 0.1995156441622725, "grad_norm": 1.2594820261001587, "learning_rate": 2.7795487818794132e-05, "loss": 0.1071, "step": 11266 }, { "epoch": 0.19953335369930092, "grad_norm": 1.1380770206451416, "learning_rate": 2.779503880980744e-05, "loss": 0.1123, "step": 11267 }, { "epoch": 0.19955106323632935, "grad_norm": 0.7394042015075684, "learning_rate": 2.7794589758726357e-05, "loss": 0.1223, "step": 11268 }, { "epoch": 0.19956877277335777, "grad_norm": 0.8454982042312622, "learning_rate": 2.7794140665552376e-05, "loss": 0.0937, "step": 11269 }, { "epoch": 0.1995864823103862, "grad_norm": 0.9128254055976868, "learning_rate": 2.7793691530286965e-05, "loss": 0.1061, "step": 11270 }, { "epoch": 0.19960419184741462, "grad_norm": 0.7801406383514404, "learning_rate": 2.77932423529316e-05, "loss": 0.12, "step": 11271 }, { "epoch": 0.19962190138444305, "grad_norm": 0.6082634925842285, "learning_rate": 2.779279313348776e-05, "loss": 0.0659, "step": 11272 }, { "epoch": 0.19963961092147148, "grad_norm": 0.9990745782852173, "learning_rate": 2.7792343871956928e-05, "loss": 0.0907, "step": 11273 }, { "epoch": 0.1996573204584999, "grad_norm": 1.2181358337402344, "learning_rate": 2.7791894568340575e-05, "loss": 0.1303, "step": 11274 }, { "epoch": 0.19967502999552833, "grad_norm": 0.6860535144805908, "learning_rate": 2.7791445222640187e-05, "loss": 0.1038, "step": 11275 }, { "epoch": 0.19969273953255676, "grad_norm": 1.3526545763015747, "learning_rate": 2.7790995834857233e-05, "loss": 0.1172, "step": 11276 }, { "epoch": 0.1997104490695852, "grad_norm": 1.1705163717269897, "learning_rate": 2.77905464049932e-05, "loss": 0.1087, "step": 11277 }, { "epoch": 0.19972815860661364, "grad_norm": 1.0148078203201294, "learning_rate": 2.7790096933049566e-05, "loss": 0.1287, "step": 11278 }, { "epoch": 0.19974586814364206, "grad_norm": 0.6665489077568054, "learning_rate": 2.77896474190278e-05, "loss": 0.0929, "step": 11279 }, { "epoch": 0.1997635776806705, "grad_norm": 0.5320103764533997, "learning_rate": 2.778919786292939e-05, "loss": 0.0815, "step": 11280 }, { "epoch": 0.19978128721769892, "grad_norm": 0.6501403450965881, "learning_rate": 2.7788748264755813e-05, "loss": 0.0934, "step": 11281 }, { "epoch": 0.19979899675472734, "grad_norm": 1.0684778690338135, "learning_rate": 2.778829862450855e-05, "loss": 0.1015, "step": 11282 }, { "epoch": 0.19981670629175577, "grad_norm": 0.44710448384284973, "learning_rate": 2.7787848942189076e-05, "loss": 0.0924, "step": 11283 }, { "epoch": 0.1998344158287842, "grad_norm": 0.8850420713424683, "learning_rate": 2.778739921779887e-05, "loss": 0.1182, "step": 11284 }, { "epoch": 0.19985212536581262, "grad_norm": 1.1408019065856934, "learning_rate": 2.7786949451339416e-05, "loss": 0.082, "step": 11285 }, { "epoch": 0.19986983490284105, "grad_norm": 1.2627606391906738, "learning_rate": 2.7786499642812195e-05, "loss": 0.1379, "step": 11286 }, { "epoch": 0.19988754443986947, "grad_norm": 0.7296827435493469, "learning_rate": 2.778604979221868e-05, "loss": 0.1277, "step": 11287 }, { "epoch": 0.1999052539768979, "grad_norm": 0.9518330097198486, "learning_rate": 2.778559989956035e-05, "loss": 0.1019, "step": 11288 }, { "epoch": 0.19992296351392633, "grad_norm": 1.0450252294540405, "learning_rate": 2.77851499648387e-05, "loss": 0.1329, "step": 11289 }, { "epoch": 0.19994067305095475, "grad_norm": 0.8000916838645935, "learning_rate": 2.778469998805519e-05, "loss": 0.1117, "step": 11290 }, { "epoch": 0.19995838258798318, "grad_norm": 0.9707915186882019, "learning_rate": 2.7784249969211315e-05, "loss": 0.0916, "step": 11291 }, { "epoch": 0.19997609212501163, "grad_norm": 1.1913585662841797, "learning_rate": 2.7783799908308548e-05, "loss": 0.0896, "step": 11292 }, { "epoch": 0.19999380166204006, "grad_norm": 0.8803589344024658, "learning_rate": 2.778334980534838e-05, "loss": 0.1104, "step": 11293 }, { "epoch": 0.2000115111990685, "grad_norm": 0.7639653086662292, "learning_rate": 2.7782899660332278e-05, "loss": 0.1133, "step": 11294 }, { "epoch": 0.2000292207360969, "grad_norm": 1.9528125524520874, "learning_rate": 2.7782449473261727e-05, "loss": 0.125, "step": 11295 }, { "epoch": 0.20004693027312534, "grad_norm": 1.048356533050537, "learning_rate": 2.7781999244138212e-05, "loss": 0.1484, "step": 11296 }, { "epoch": 0.20006463981015377, "grad_norm": 0.9609624147415161, "learning_rate": 2.7781548972963212e-05, "loss": 0.0914, "step": 11297 }, { "epoch": 0.2000823493471822, "grad_norm": 1.0812039375305176, "learning_rate": 2.778109865973821e-05, "loss": 0.1299, "step": 11298 }, { "epoch": 0.20010005888421062, "grad_norm": 1.1161508560180664, "learning_rate": 2.7780648304464683e-05, "loss": 0.0869, "step": 11299 }, { "epoch": 0.20011776842123905, "grad_norm": 0.9834915995597839, "learning_rate": 2.7780197907144118e-05, "loss": 0.0815, "step": 11300 }, { "epoch": 0.20013547795826747, "grad_norm": 0.8079222440719604, "learning_rate": 2.7779747467777997e-05, "loss": 0.1114, "step": 11301 }, { "epoch": 0.2001531874952959, "grad_norm": 1.021661400794983, "learning_rate": 2.7779296986367796e-05, "loss": 0.0934, "step": 11302 }, { "epoch": 0.20017089703232432, "grad_norm": 1.2306886911392212, "learning_rate": 2.7778846462915003e-05, "loss": 0.1357, "step": 11303 }, { "epoch": 0.20018860656935275, "grad_norm": 0.7816832065582275, "learning_rate": 2.77783958974211e-05, "loss": 0.1036, "step": 11304 }, { "epoch": 0.20020631610638118, "grad_norm": 1.0827373266220093, "learning_rate": 2.777794528988756e-05, "loss": 0.1084, "step": 11305 }, { "epoch": 0.2002240256434096, "grad_norm": 0.598370373249054, "learning_rate": 2.777749464031587e-05, "loss": 0.0865, "step": 11306 }, { "epoch": 0.20024173518043806, "grad_norm": 1.3621567487716675, "learning_rate": 2.7777043948707526e-05, "loss": 0.0957, "step": 11307 }, { "epoch": 0.20025944471746648, "grad_norm": 0.8776227831840515, "learning_rate": 2.7776593215063995e-05, "loss": 0.1174, "step": 11308 }, { "epoch": 0.2002771542544949, "grad_norm": 1.1936230659484863, "learning_rate": 2.7776142439386766e-05, "loss": 0.1317, "step": 11309 }, { "epoch": 0.20029486379152334, "grad_norm": 0.8298741579055786, "learning_rate": 2.7775691621677315e-05, "loss": 0.1044, "step": 11310 }, { "epoch": 0.20031257332855176, "grad_norm": 1.1405448913574219, "learning_rate": 2.777524076193713e-05, "loss": 0.1357, "step": 11311 }, { "epoch": 0.2003302828655802, "grad_norm": 0.7559540867805481, "learning_rate": 2.7774789860167705e-05, "loss": 0.1043, "step": 11312 }, { "epoch": 0.20034799240260862, "grad_norm": 1.0226621627807617, "learning_rate": 2.777433891637051e-05, "loss": 0.0719, "step": 11313 }, { "epoch": 0.20036570193963704, "grad_norm": 0.9268821477890015, "learning_rate": 2.7773887930547028e-05, "loss": 0.0801, "step": 11314 }, { "epoch": 0.20038341147666547, "grad_norm": 1.129399299621582, "learning_rate": 2.7773436902698746e-05, "loss": 0.0782, "step": 11315 }, { "epoch": 0.2004011210136939, "grad_norm": 0.9090906381607056, "learning_rate": 2.777298583282715e-05, "loss": 0.1164, "step": 11316 }, { "epoch": 0.20041883055072232, "grad_norm": 0.5522928833961487, "learning_rate": 2.7772534720933724e-05, "loss": 0.0956, "step": 11317 }, { "epoch": 0.20043654008775075, "grad_norm": 1.0591996908187866, "learning_rate": 2.777208356701995e-05, "loss": 0.1211, "step": 11318 }, { "epoch": 0.20045424962477917, "grad_norm": 0.8555706143379211, "learning_rate": 2.7771632371087314e-05, "loss": 0.1212, "step": 11319 }, { "epoch": 0.2004719591618076, "grad_norm": 0.7182166576385498, "learning_rate": 2.77711811331373e-05, "loss": 0.1219, "step": 11320 }, { "epoch": 0.20048966869883605, "grad_norm": 1.5366352796554565, "learning_rate": 2.777072985317139e-05, "loss": 0.1132, "step": 11321 }, { "epoch": 0.20050737823586448, "grad_norm": 0.6231274008750916, "learning_rate": 2.7770278531191072e-05, "loss": 0.1079, "step": 11322 }, { "epoch": 0.2005250877728929, "grad_norm": 0.6450133919715881, "learning_rate": 2.7769827167197822e-05, "loss": 0.0773, "step": 11323 }, { "epoch": 0.20054279730992133, "grad_norm": 1.1569724082946777, "learning_rate": 2.776937576119314e-05, "loss": 0.1192, "step": 11324 }, { "epoch": 0.20056050684694976, "grad_norm": 1.3617475032806396, "learning_rate": 2.7768924313178503e-05, "loss": 0.1021, "step": 11325 }, { "epoch": 0.2005782163839782, "grad_norm": 0.9784817695617676, "learning_rate": 2.7768472823155394e-05, "loss": 0.1009, "step": 11326 }, { "epoch": 0.2005959259210066, "grad_norm": 0.9459272027015686, "learning_rate": 2.7768021291125302e-05, "loss": 0.0825, "step": 11327 }, { "epoch": 0.20061363545803504, "grad_norm": 0.3441724479198456, "learning_rate": 2.776756971708971e-05, "loss": 0.0991, "step": 11328 }, { "epoch": 0.20063134499506347, "grad_norm": 0.9498668909072876, "learning_rate": 2.7767118101050105e-05, "loss": 0.1216, "step": 11329 }, { "epoch": 0.2006490545320919, "grad_norm": 1.2228076457977295, "learning_rate": 2.7766666443007976e-05, "loss": 0.1289, "step": 11330 }, { "epoch": 0.20066676406912032, "grad_norm": 1.065617561340332, "learning_rate": 2.7766214742964802e-05, "loss": 0.095, "step": 11331 }, { "epoch": 0.20068447360614874, "grad_norm": 0.8706362843513489, "learning_rate": 2.7765763000922076e-05, "loss": 0.101, "step": 11332 }, { "epoch": 0.20070218314317717, "grad_norm": 0.5612645149230957, "learning_rate": 2.776531121688128e-05, "loss": 0.1092, "step": 11333 }, { "epoch": 0.2007198926802056, "grad_norm": 0.7994305491447449, "learning_rate": 2.7764859390843903e-05, "loss": 0.1233, "step": 11334 }, { "epoch": 0.20073760221723402, "grad_norm": 0.9712890386581421, "learning_rate": 2.7764407522811422e-05, "loss": 0.1329, "step": 11335 }, { "epoch": 0.20075531175426248, "grad_norm": 0.8762954473495483, "learning_rate": 2.776395561278534e-05, "loss": 0.097, "step": 11336 }, { "epoch": 0.2007730212912909, "grad_norm": 0.7248011231422424, "learning_rate": 2.776350366076713e-05, "loss": 0.0639, "step": 11337 }, { "epoch": 0.20079073082831933, "grad_norm": 0.5618357062339783, "learning_rate": 2.7763051666758286e-05, "loss": 0.1046, "step": 11338 }, { "epoch": 0.20080844036534776, "grad_norm": 0.8550315499305725, "learning_rate": 2.7762599630760294e-05, "loss": 0.0787, "step": 11339 }, { "epoch": 0.20082614990237618, "grad_norm": 1.0464162826538086, "learning_rate": 2.7762147552774635e-05, "loss": 0.0793, "step": 11340 }, { "epoch": 0.2008438594394046, "grad_norm": 1.3749396800994873, "learning_rate": 2.7761695432802806e-05, "loss": 0.1455, "step": 11341 }, { "epoch": 0.20086156897643304, "grad_norm": 0.9679125547409058, "learning_rate": 2.7761243270846288e-05, "loss": 0.1007, "step": 11342 }, { "epoch": 0.20087927851346146, "grad_norm": 1.0366750955581665, "learning_rate": 2.7760791066906572e-05, "loss": 0.0795, "step": 11343 }, { "epoch": 0.2008969880504899, "grad_norm": 1.3537826538085938, "learning_rate": 2.7760338820985142e-05, "loss": 0.1186, "step": 11344 }, { "epoch": 0.20091469758751831, "grad_norm": 0.8247703909873962, "learning_rate": 2.7759886533083492e-05, "loss": 0.1104, "step": 11345 }, { "epoch": 0.20093240712454674, "grad_norm": 0.5503036975860596, "learning_rate": 2.77594342032031e-05, "loss": 0.0715, "step": 11346 }, { "epoch": 0.20095011666157517, "grad_norm": 0.759015679359436, "learning_rate": 2.775898183134546e-05, "loss": 0.1117, "step": 11347 }, { "epoch": 0.2009678261986036, "grad_norm": 0.9177021980285645, "learning_rate": 2.7758529417512065e-05, "loss": 0.0963, "step": 11348 }, { "epoch": 0.20098553573563202, "grad_norm": 0.7710884809494019, "learning_rate": 2.77580769617044e-05, "loss": 0.0864, "step": 11349 }, { "epoch": 0.20100324527266045, "grad_norm": 1.2268397808074951, "learning_rate": 2.7757624463923946e-05, "loss": 0.1193, "step": 11350 }, { "epoch": 0.2010209548096889, "grad_norm": 1.221777081489563, "learning_rate": 2.77571719241722e-05, "loss": 0.1269, "step": 11351 }, { "epoch": 0.20103866434671733, "grad_norm": 0.9196329712867737, "learning_rate": 2.775671934245065e-05, "loss": 0.0865, "step": 11352 }, { "epoch": 0.20105637388374575, "grad_norm": 0.6846612095832825, "learning_rate": 2.7756266718760786e-05, "loss": 0.1022, "step": 11353 }, { "epoch": 0.20107408342077418, "grad_norm": 1.160424828529358, "learning_rate": 2.7755814053104088e-05, "loss": 0.1753, "step": 11354 }, { "epoch": 0.2010917929578026, "grad_norm": 0.8936575651168823, "learning_rate": 2.7755361345482058e-05, "loss": 0.1074, "step": 11355 }, { "epoch": 0.20110950249483103, "grad_norm": 0.7793446183204651, "learning_rate": 2.7754908595896175e-05, "loss": 0.0978, "step": 11356 }, { "epoch": 0.20112721203185946, "grad_norm": 0.5186123847961426, "learning_rate": 2.7754455804347935e-05, "loss": 0.0648, "step": 11357 }, { "epoch": 0.20114492156888789, "grad_norm": 1.0267513990402222, "learning_rate": 2.7754002970838828e-05, "loss": 0.1778, "step": 11358 }, { "epoch": 0.2011626311059163, "grad_norm": 0.9083809852600098, "learning_rate": 2.7753550095370338e-05, "loss": 0.0878, "step": 11359 }, { "epoch": 0.20118034064294474, "grad_norm": 1.176746129989624, "learning_rate": 2.7753097177943955e-05, "loss": 0.0975, "step": 11360 }, { "epoch": 0.20119805017997316, "grad_norm": 1.7467060089111328, "learning_rate": 2.7752644218561178e-05, "loss": 0.1225, "step": 11361 }, { "epoch": 0.2012157597170016, "grad_norm": 0.6471445560455322, "learning_rate": 2.775219121722349e-05, "loss": 0.0968, "step": 11362 }, { "epoch": 0.20123346925403002, "grad_norm": 1.0625284910202026, "learning_rate": 2.7751738173932383e-05, "loss": 0.0876, "step": 11363 }, { "epoch": 0.20125117879105844, "grad_norm": 0.5429611802101135, "learning_rate": 2.7751285088689346e-05, "loss": 0.0893, "step": 11364 }, { "epoch": 0.20126888832808687, "grad_norm": 0.9831764698028564, "learning_rate": 2.775083196149587e-05, "loss": 0.1029, "step": 11365 }, { "epoch": 0.20128659786511532, "grad_norm": 1.9036039113998413, "learning_rate": 2.7750378792353444e-05, "loss": 0.1491, "step": 11366 }, { "epoch": 0.20130430740214375, "grad_norm": 0.785952627658844, "learning_rate": 2.7749925581263568e-05, "loss": 0.0718, "step": 11367 }, { "epoch": 0.20132201693917218, "grad_norm": 0.694908082485199, "learning_rate": 2.7749472328227723e-05, "loss": 0.1183, "step": 11368 }, { "epoch": 0.2013397264762006, "grad_norm": 1.1710857152938843, "learning_rate": 2.7749019033247402e-05, "loss": 0.1509, "step": 11369 }, { "epoch": 0.20135743601322903, "grad_norm": 0.9480679035186768, "learning_rate": 2.77485656963241e-05, "loss": 0.108, "step": 11370 }, { "epoch": 0.20137514555025746, "grad_norm": 0.7598590850830078, "learning_rate": 2.7748112317459304e-05, "loss": 0.0933, "step": 11371 }, { "epoch": 0.20139285508728588, "grad_norm": 0.9891464114189148, "learning_rate": 2.7747658896654513e-05, "loss": 0.1798, "step": 11372 }, { "epoch": 0.2014105646243143, "grad_norm": 0.7588522434234619, "learning_rate": 2.774720543391121e-05, "loss": 0.1108, "step": 11373 }, { "epoch": 0.20142827416134274, "grad_norm": 0.49012115597724915, "learning_rate": 2.774675192923089e-05, "loss": 0.0786, "step": 11374 }, { "epoch": 0.20144598369837116, "grad_norm": 0.8819424510002136, "learning_rate": 2.7746298382615045e-05, "loss": 0.1002, "step": 11375 }, { "epoch": 0.2014636932353996, "grad_norm": 0.6125308275222778, "learning_rate": 2.774584479406517e-05, "loss": 0.0837, "step": 11376 }, { "epoch": 0.20148140277242801, "grad_norm": 0.8893592953681946, "learning_rate": 2.7745391163582752e-05, "loss": 0.1246, "step": 11377 }, { "epoch": 0.20149911230945644, "grad_norm": 0.9102808237075806, "learning_rate": 2.7744937491169287e-05, "loss": 0.0997, "step": 11378 }, { "epoch": 0.20151682184648487, "grad_norm": 0.9735626578330994, "learning_rate": 2.7744483776826268e-05, "loss": 0.1072, "step": 11379 }, { "epoch": 0.2015345313835133, "grad_norm": 0.7144157290458679, "learning_rate": 2.7744030020555185e-05, "loss": 0.1234, "step": 11380 }, { "epoch": 0.20155224092054175, "grad_norm": 0.7428461313247681, "learning_rate": 2.7743576222357534e-05, "loss": 0.0851, "step": 11381 }, { "epoch": 0.20156995045757017, "grad_norm": 0.7476575970649719, "learning_rate": 2.7743122382234807e-05, "loss": 0.1174, "step": 11382 }, { "epoch": 0.2015876599945986, "grad_norm": 1.3704379796981812, "learning_rate": 2.7742668500188494e-05, "loss": 0.1121, "step": 11383 }, { "epoch": 0.20160536953162703, "grad_norm": 0.9483518004417419, "learning_rate": 2.774221457622009e-05, "loss": 0.1406, "step": 11384 }, { "epoch": 0.20162307906865545, "grad_norm": 0.7469671964645386, "learning_rate": 2.7741760610331088e-05, "loss": 0.076, "step": 11385 }, { "epoch": 0.20164078860568388, "grad_norm": 1.0216580629348755, "learning_rate": 2.7741306602522987e-05, "loss": 0.1286, "step": 11386 }, { "epoch": 0.2016584981427123, "grad_norm": 0.511562168598175, "learning_rate": 2.7740852552797272e-05, "loss": 0.0479, "step": 11387 }, { "epoch": 0.20167620767974073, "grad_norm": 0.793494701385498, "learning_rate": 2.774039846115544e-05, "loss": 0.0946, "step": 11388 }, { "epoch": 0.20169391721676916, "grad_norm": 1.220279574394226, "learning_rate": 2.7739944327598987e-05, "loss": 0.123, "step": 11389 }, { "epoch": 0.20171162675379758, "grad_norm": 0.8994317650794983, "learning_rate": 2.7739490152129408e-05, "loss": 0.0902, "step": 11390 }, { "epoch": 0.201729336290826, "grad_norm": 0.6492707133293152, "learning_rate": 2.7739035934748195e-05, "loss": 0.1051, "step": 11391 }, { "epoch": 0.20174704582785444, "grad_norm": 1.139647126197815, "learning_rate": 2.773858167545684e-05, "loss": 0.1055, "step": 11392 }, { "epoch": 0.20176475536488286, "grad_norm": 1.1687731742858887, "learning_rate": 2.7738127374256837e-05, "loss": 0.1456, "step": 11393 }, { "epoch": 0.2017824649019113, "grad_norm": 0.6615344285964966, "learning_rate": 2.7737673031149687e-05, "loss": 0.0867, "step": 11394 }, { "epoch": 0.20180017443893972, "grad_norm": 0.816540002822876, "learning_rate": 2.773721864613688e-05, "loss": 0.1361, "step": 11395 }, { "epoch": 0.20181788397596817, "grad_norm": 0.7320220470428467, "learning_rate": 2.7736764219219913e-05, "loss": 0.1183, "step": 11396 }, { "epoch": 0.2018355935129966, "grad_norm": 0.9755159616470337, "learning_rate": 2.773630975040028e-05, "loss": 0.1212, "step": 11397 }, { "epoch": 0.20185330305002502, "grad_norm": 0.5969609022140503, "learning_rate": 2.7735855239679475e-05, "loss": 0.0601, "step": 11398 }, { "epoch": 0.20187101258705345, "grad_norm": 0.48585230112075806, "learning_rate": 2.7735400687058997e-05, "loss": 0.0776, "step": 11399 }, { "epoch": 0.20188872212408188, "grad_norm": 1.0915498733520508, "learning_rate": 2.7734946092540335e-05, "loss": 0.0745, "step": 11400 }, { "epoch": 0.2019064316611103, "grad_norm": 0.8004282116889954, "learning_rate": 2.773449145612499e-05, "loss": 0.081, "step": 11401 }, { "epoch": 0.20192414119813873, "grad_norm": 1.2000083923339844, "learning_rate": 2.773403677781446e-05, "loss": 0.1189, "step": 11402 }, { "epoch": 0.20194185073516716, "grad_norm": 0.5618786811828613, "learning_rate": 2.773358205761023e-05, "loss": 0.0934, "step": 11403 }, { "epoch": 0.20195956027219558, "grad_norm": 0.8880959749221802, "learning_rate": 2.7733127295513808e-05, "loss": 0.11, "step": 11404 }, { "epoch": 0.201977269809224, "grad_norm": 1.2385125160217285, "learning_rate": 2.7732672491526683e-05, "loss": 0.1245, "step": 11405 }, { "epoch": 0.20199497934625243, "grad_norm": 0.8984488844871521, "learning_rate": 2.7732217645650354e-05, "loss": 0.1563, "step": 11406 }, { "epoch": 0.20201268888328086, "grad_norm": 1.3878183364868164, "learning_rate": 2.7731762757886318e-05, "loss": 0.1113, "step": 11407 }, { "epoch": 0.2020303984203093, "grad_norm": 1.227268099784851, "learning_rate": 2.7731307828236065e-05, "loss": 0.1191, "step": 11408 }, { "epoch": 0.2020481079573377, "grad_norm": 0.9536821842193604, "learning_rate": 2.77308528567011e-05, "loss": 0.1238, "step": 11409 }, { "epoch": 0.20206581749436614, "grad_norm": 1.03434157371521, "learning_rate": 2.773039784328292e-05, "loss": 0.0819, "step": 11410 }, { "epoch": 0.2020835270313946, "grad_norm": 1.604289174079895, "learning_rate": 2.7729942787983013e-05, "loss": 0.0849, "step": 11411 }, { "epoch": 0.20210123656842302, "grad_norm": 0.8884332776069641, "learning_rate": 2.7729487690802888e-05, "loss": 0.1505, "step": 11412 }, { "epoch": 0.20211894610545145, "grad_norm": 1.0503101348876953, "learning_rate": 2.772903255174403e-05, "loss": 0.114, "step": 11413 }, { "epoch": 0.20213665564247987, "grad_norm": 0.9571001529693604, "learning_rate": 2.772857737080795e-05, "loss": 0.1041, "step": 11414 }, { "epoch": 0.2021543651795083, "grad_norm": 0.5159904360771179, "learning_rate": 2.7728122147996127e-05, "loss": 0.0708, "step": 11415 }, { "epoch": 0.20217207471653673, "grad_norm": 1.0649000406265259, "learning_rate": 2.772766688331008e-05, "loss": 0.1025, "step": 11416 }, { "epoch": 0.20218978425356515, "grad_norm": 0.6810111999511719, "learning_rate": 2.772721157675129e-05, "loss": 0.105, "step": 11417 }, { "epoch": 0.20220749379059358, "grad_norm": 0.6236575245857239, "learning_rate": 2.772675622832126e-05, "loss": 0.079, "step": 11418 }, { "epoch": 0.202225203327622, "grad_norm": 1.1187978982925415, "learning_rate": 2.7726300838021495e-05, "loss": 0.1378, "step": 11419 }, { "epoch": 0.20224291286465043, "grad_norm": 1.2371500730514526, "learning_rate": 2.7725845405853486e-05, "loss": 0.0852, "step": 11420 }, { "epoch": 0.20226062240167886, "grad_norm": 0.7753334641456604, "learning_rate": 2.7725389931818733e-05, "loss": 0.1275, "step": 11421 }, { "epoch": 0.20227833193870728, "grad_norm": 1.0349338054656982, "learning_rate": 2.7724934415918733e-05, "loss": 0.0865, "step": 11422 }, { "epoch": 0.2022960414757357, "grad_norm": 1.2506675720214844, "learning_rate": 2.7724478858154983e-05, "loss": 0.1279, "step": 11423 }, { "epoch": 0.20231375101276414, "grad_norm": 1.0387206077575684, "learning_rate": 2.7724023258528988e-05, "loss": 0.0825, "step": 11424 }, { "epoch": 0.20233146054979256, "grad_norm": 1.360878348350525, "learning_rate": 2.772356761704224e-05, "loss": 0.1034, "step": 11425 }, { "epoch": 0.20234917008682102, "grad_norm": 0.9861024618148804, "learning_rate": 2.7723111933696245e-05, "loss": 0.1124, "step": 11426 }, { "epoch": 0.20236687962384944, "grad_norm": 0.9421954154968262, "learning_rate": 2.77226562084925e-05, "loss": 0.0911, "step": 11427 }, { "epoch": 0.20238458916087787, "grad_norm": 0.8450961709022522, "learning_rate": 2.7722200441432498e-05, "loss": 0.1199, "step": 11428 }, { "epoch": 0.2024022986979063, "grad_norm": 0.8352589011192322, "learning_rate": 2.7721744632517747e-05, "loss": 0.1418, "step": 11429 }, { "epoch": 0.20242000823493472, "grad_norm": 1.148276686668396, "learning_rate": 2.7721288781749742e-05, "loss": 0.1034, "step": 11430 }, { "epoch": 0.20243771777196315, "grad_norm": 0.8844524025917053, "learning_rate": 2.7720832889129985e-05, "loss": 0.1344, "step": 11431 }, { "epoch": 0.20245542730899158, "grad_norm": 1.184127926826477, "learning_rate": 2.7720376954659973e-05, "loss": 0.1363, "step": 11432 }, { "epoch": 0.20247313684602, "grad_norm": 0.871894359588623, "learning_rate": 2.77199209783412e-05, "loss": 0.0939, "step": 11433 }, { "epoch": 0.20249084638304843, "grad_norm": 0.7280687689781189, "learning_rate": 2.7719464960175186e-05, "loss": 0.079, "step": 11434 }, { "epoch": 0.20250855592007685, "grad_norm": 0.8438846468925476, "learning_rate": 2.7719008900163413e-05, "loss": 0.0711, "step": 11435 }, { "epoch": 0.20252626545710528, "grad_norm": 0.3954765498638153, "learning_rate": 2.771855279830739e-05, "loss": 0.0799, "step": 11436 }, { "epoch": 0.2025439749941337, "grad_norm": 0.6232025027275085, "learning_rate": 2.7718096654608607e-05, "loss": 0.0947, "step": 11437 }, { "epoch": 0.20256168453116213, "grad_norm": 0.7383496761322021, "learning_rate": 2.771764046906858e-05, "loss": 0.1255, "step": 11438 }, { "epoch": 0.20257939406819056, "grad_norm": 0.6956263184547424, "learning_rate": 2.7717184241688802e-05, "loss": 0.1077, "step": 11439 }, { "epoch": 0.20259710360521901, "grad_norm": 0.5728869438171387, "learning_rate": 2.771672797247077e-05, "loss": 0.1055, "step": 11440 }, { "epoch": 0.20261481314224744, "grad_norm": 1.064780831336975, "learning_rate": 2.7716271661415992e-05, "loss": 0.118, "step": 11441 }, { "epoch": 0.20263252267927587, "grad_norm": 0.7667142748832703, "learning_rate": 2.7715815308525965e-05, "loss": 0.1164, "step": 11442 }, { "epoch": 0.2026502322163043, "grad_norm": 1.0282925367355347, "learning_rate": 2.771535891380219e-05, "loss": 0.1158, "step": 11443 }, { "epoch": 0.20266794175333272, "grad_norm": 0.8845109343528748, "learning_rate": 2.7714902477246172e-05, "loss": 0.126, "step": 11444 }, { "epoch": 0.20268565129036115, "grad_norm": 1.1121530532836914, "learning_rate": 2.7714445998859415e-05, "loss": 0.0937, "step": 11445 }, { "epoch": 0.20270336082738957, "grad_norm": 0.9024092555046082, "learning_rate": 2.7713989478643413e-05, "loss": 0.1338, "step": 11446 }, { "epoch": 0.202721070364418, "grad_norm": 0.872797429561615, "learning_rate": 2.7713532916599667e-05, "loss": 0.0836, "step": 11447 }, { "epoch": 0.20273877990144643, "grad_norm": 1.1607894897460938, "learning_rate": 2.7713076312729692e-05, "loss": 0.107, "step": 11448 }, { "epoch": 0.20275648943847485, "grad_norm": 1.06657874584198, "learning_rate": 2.7712619667034978e-05, "loss": 0.1016, "step": 11449 }, { "epoch": 0.20277419897550328, "grad_norm": 0.9315540194511414, "learning_rate": 2.7712162979517033e-05, "loss": 0.0887, "step": 11450 }, { "epoch": 0.2027919085125317, "grad_norm": 0.9091252684593201, "learning_rate": 2.7711706250177355e-05, "loss": 0.0779, "step": 11451 }, { "epoch": 0.20280961804956013, "grad_norm": 0.8438332676887512, "learning_rate": 2.771124947901745e-05, "loss": 0.127, "step": 11452 }, { "epoch": 0.20282732758658856, "grad_norm": 0.5683190822601318, "learning_rate": 2.771079266603882e-05, "loss": 0.0935, "step": 11453 }, { "epoch": 0.20284503712361698, "grad_norm": 0.9129083752632141, "learning_rate": 2.7710335811242966e-05, "loss": 0.0973, "step": 11454 }, { "epoch": 0.20286274666064544, "grad_norm": 1.0933538675308228, "learning_rate": 2.7709878914631394e-05, "loss": 0.1464, "step": 11455 }, { "epoch": 0.20288045619767386, "grad_norm": 1.2470674514770508, "learning_rate": 2.7709421976205604e-05, "loss": 0.1233, "step": 11456 }, { "epoch": 0.2028981657347023, "grad_norm": 0.6967697143554688, "learning_rate": 2.7708964995967106e-05, "loss": 0.0859, "step": 11457 }, { "epoch": 0.20291587527173072, "grad_norm": 0.835813045501709, "learning_rate": 2.7708507973917395e-05, "loss": 0.0581, "step": 11458 }, { "epoch": 0.20293358480875914, "grad_norm": 0.9905252456665039, "learning_rate": 2.770805091005798e-05, "loss": 0.1037, "step": 11459 }, { "epoch": 0.20295129434578757, "grad_norm": 0.8322873115539551, "learning_rate": 2.770759380439036e-05, "loss": 0.1237, "step": 11460 }, { "epoch": 0.202969003882816, "grad_norm": 0.954167366027832, "learning_rate": 2.7707136656916043e-05, "loss": 0.0844, "step": 11461 }, { "epoch": 0.20298671341984442, "grad_norm": 1.5916070938110352, "learning_rate": 2.7706679467636535e-05, "loss": 0.0994, "step": 11462 }, { "epoch": 0.20300442295687285, "grad_norm": 1.0827847719192505, "learning_rate": 2.7706222236553333e-05, "loss": 0.1047, "step": 11463 }, { "epoch": 0.20302213249390128, "grad_norm": 0.7897376418113708, "learning_rate": 2.7705764963667947e-05, "loss": 0.1096, "step": 11464 }, { "epoch": 0.2030398420309297, "grad_norm": 1.1667590141296387, "learning_rate": 2.770530764898188e-05, "loss": 0.1489, "step": 11465 }, { "epoch": 0.20305755156795813, "grad_norm": 0.7933666110038757, "learning_rate": 2.7704850292496634e-05, "loss": 0.0862, "step": 11466 }, { "epoch": 0.20307526110498655, "grad_norm": 1.3315684795379639, "learning_rate": 2.7704392894213712e-05, "loss": 0.1623, "step": 11467 }, { "epoch": 0.20309297064201498, "grad_norm": 0.6293829083442688, "learning_rate": 2.7703935454134627e-05, "loss": 0.0821, "step": 11468 }, { "epoch": 0.2031106801790434, "grad_norm": 0.9926144480705261, "learning_rate": 2.7703477972260877e-05, "loss": 0.0654, "step": 11469 }, { "epoch": 0.20312838971607186, "grad_norm": 0.9155864715576172, "learning_rate": 2.7703020448593972e-05, "loss": 0.1262, "step": 11470 }, { "epoch": 0.2031460992531003, "grad_norm": 1.0599212646484375, "learning_rate": 2.7702562883135417e-05, "loss": 0.1196, "step": 11471 }, { "epoch": 0.2031638087901287, "grad_norm": 0.5528805255889893, "learning_rate": 2.770210527588671e-05, "loss": 0.1006, "step": 11472 }, { "epoch": 0.20318151832715714, "grad_norm": 0.9988638758659363, "learning_rate": 2.7701647626849363e-05, "loss": 0.1114, "step": 11473 }, { "epoch": 0.20319922786418557, "grad_norm": 0.6839088201522827, "learning_rate": 2.7701189936024876e-05, "loss": 0.09, "step": 11474 }, { "epoch": 0.203216937401214, "grad_norm": 1.1734780073165894, "learning_rate": 2.7700732203414767e-05, "loss": 0.0898, "step": 11475 }, { "epoch": 0.20323464693824242, "grad_norm": 0.8403647541999817, "learning_rate": 2.770027442902053e-05, "loss": 0.1368, "step": 11476 }, { "epoch": 0.20325235647527085, "grad_norm": 1.0627310276031494, "learning_rate": 2.769981661284367e-05, "loss": 0.089, "step": 11477 }, { "epoch": 0.20327006601229927, "grad_norm": 0.9144383072853088, "learning_rate": 2.76993587548857e-05, "loss": 0.0971, "step": 11478 }, { "epoch": 0.2032877755493277, "grad_norm": 1.1610634326934814, "learning_rate": 2.7698900855148126e-05, "loss": 0.1283, "step": 11479 }, { "epoch": 0.20330548508635612, "grad_norm": 0.611173152923584, "learning_rate": 2.7698442913632455e-05, "loss": 0.0704, "step": 11480 }, { "epoch": 0.20332319462338455, "grad_norm": 1.2792448997497559, "learning_rate": 2.7697984930340187e-05, "loss": 0.1308, "step": 11481 }, { "epoch": 0.20334090416041298, "grad_norm": 1.325687050819397, "learning_rate": 2.7697526905272834e-05, "loss": 0.1045, "step": 11482 }, { "epoch": 0.2033586136974414, "grad_norm": 1.011635422706604, "learning_rate": 2.7697068838431902e-05, "loss": 0.1054, "step": 11483 }, { "epoch": 0.20337632323446983, "grad_norm": 1.0074788331985474, "learning_rate": 2.7696610729818897e-05, "loss": 0.1011, "step": 11484 }, { "epoch": 0.20339403277149828, "grad_norm": 0.716200590133667, "learning_rate": 2.7696152579435327e-05, "loss": 0.0981, "step": 11485 }, { "epoch": 0.2034117423085267, "grad_norm": 1.0515974760055542, "learning_rate": 2.76956943872827e-05, "loss": 0.078, "step": 11486 }, { "epoch": 0.20342945184555514, "grad_norm": 0.883491575717926, "learning_rate": 2.7695236153362526e-05, "loss": 0.0809, "step": 11487 }, { "epoch": 0.20344716138258356, "grad_norm": 0.9319970607757568, "learning_rate": 2.7694777877676307e-05, "loss": 0.0959, "step": 11488 }, { "epoch": 0.203464870919612, "grad_norm": 1.2131260633468628, "learning_rate": 2.769431956022555e-05, "loss": 0.0984, "step": 11489 }, { "epoch": 0.20348258045664042, "grad_norm": 0.808796226978302, "learning_rate": 2.7693861201011766e-05, "loss": 0.107, "step": 11490 }, { "epoch": 0.20350028999366884, "grad_norm": 0.7976753115653992, "learning_rate": 2.7693402800036463e-05, "loss": 0.1073, "step": 11491 }, { "epoch": 0.20351799953069727, "grad_norm": 0.8092987537384033, "learning_rate": 2.769294435730115e-05, "loss": 0.0839, "step": 11492 }, { "epoch": 0.2035357090677257, "grad_norm": 0.7175940275192261, "learning_rate": 2.7692485872807335e-05, "loss": 0.1133, "step": 11493 }, { "epoch": 0.20355341860475412, "grad_norm": 1.3284832239151, "learning_rate": 2.7692027346556523e-05, "loss": 0.104, "step": 11494 }, { "epoch": 0.20357112814178255, "grad_norm": 1.2343709468841553, "learning_rate": 2.7691568778550225e-05, "loss": 0.1046, "step": 11495 }, { "epoch": 0.20358883767881097, "grad_norm": 0.6964352130889893, "learning_rate": 2.769111016878995e-05, "loss": 0.0869, "step": 11496 }, { "epoch": 0.2036065472158394, "grad_norm": 0.7432032823562622, "learning_rate": 2.7690651517277205e-05, "loss": 0.0714, "step": 11497 }, { "epoch": 0.20362425675286783, "grad_norm": 0.8705253005027771, "learning_rate": 2.7690192824013498e-05, "loss": 0.0785, "step": 11498 }, { "epoch": 0.20364196628989625, "grad_norm": 0.9712387323379517, "learning_rate": 2.7689734089000347e-05, "loss": 0.0923, "step": 11499 }, { "epoch": 0.2036596758269247, "grad_norm": 0.6617199182510376, "learning_rate": 2.768927531223925e-05, "loss": 0.0742, "step": 11500 }, { "epoch": 0.20367738536395313, "grad_norm": 0.9994856119155884, "learning_rate": 2.7688816493731722e-05, "loss": 0.1201, "step": 11501 }, { "epoch": 0.20369509490098156, "grad_norm": 1.1469932794570923, "learning_rate": 2.7688357633479268e-05, "loss": 0.1399, "step": 11502 }, { "epoch": 0.20371280443801, "grad_norm": 0.8167834281921387, "learning_rate": 2.7687898731483404e-05, "loss": 0.1495, "step": 11503 }, { "epoch": 0.2037305139750384, "grad_norm": 1.3299304246902466, "learning_rate": 2.7687439787745634e-05, "loss": 0.1228, "step": 11504 }, { "epoch": 0.20374822351206684, "grad_norm": 0.8195064663887024, "learning_rate": 2.7686980802267472e-05, "loss": 0.1271, "step": 11505 }, { "epoch": 0.20376593304909527, "grad_norm": 1.100675344467163, "learning_rate": 2.7686521775050425e-05, "loss": 0.1325, "step": 11506 }, { "epoch": 0.2037836425861237, "grad_norm": 1.1882174015045166, "learning_rate": 2.7686062706096004e-05, "loss": 0.0995, "step": 11507 }, { "epoch": 0.20380135212315212, "grad_norm": 1.0006440877914429, "learning_rate": 2.7685603595405723e-05, "loss": 0.1061, "step": 11508 }, { "epoch": 0.20381906166018054, "grad_norm": 4.016328811645508, "learning_rate": 2.7685144442981085e-05, "loss": 0.1024, "step": 11509 }, { "epoch": 0.20383677119720897, "grad_norm": 0.6242197751998901, "learning_rate": 2.768468524882361e-05, "loss": 0.1454, "step": 11510 }, { "epoch": 0.2038544807342374, "grad_norm": 0.9264329075813293, "learning_rate": 2.7684226012934802e-05, "loss": 0.103, "step": 11511 }, { "epoch": 0.20387219027126582, "grad_norm": 1.003400206565857, "learning_rate": 2.768376673531617e-05, "loss": 0.1114, "step": 11512 }, { "epoch": 0.20388989980829425, "grad_norm": 1.076332688331604, "learning_rate": 2.768330741596923e-05, "loss": 0.0884, "step": 11513 }, { "epoch": 0.20390760934532268, "grad_norm": 0.8722721338272095, "learning_rate": 2.768284805489549e-05, "loss": 0.1447, "step": 11514 }, { "epoch": 0.20392531888235113, "grad_norm": 1.0667577981948853, "learning_rate": 2.7682388652096468e-05, "loss": 0.1193, "step": 11515 }, { "epoch": 0.20394302841937956, "grad_norm": 0.7896566987037659, "learning_rate": 2.768192920757367e-05, "loss": 0.1113, "step": 11516 }, { "epoch": 0.20396073795640798, "grad_norm": 0.5080448389053345, "learning_rate": 2.7681469721328604e-05, "loss": 0.0588, "step": 11517 }, { "epoch": 0.2039784474934364, "grad_norm": 1.7621285915374756, "learning_rate": 2.7681010193362784e-05, "loss": 0.1237, "step": 11518 }, { "epoch": 0.20399615703046484, "grad_norm": 0.9242430925369263, "learning_rate": 2.7680550623677722e-05, "loss": 0.0977, "step": 11519 }, { "epoch": 0.20401386656749326, "grad_norm": 0.9237914681434631, "learning_rate": 2.7680091012274935e-05, "loss": 0.1141, "step": 11520 }, { "epoch": 0.2040315761045217, "grad_norm": 0.7219533324241638, "learning_rate": 2.7679631359155932e-05, "loss": 0.1033, "step": 11521 }, { "epoch": 0.20404928564155012, "grad_norm": 0.7755159139633179, "learning_rate": 2.767917166432222e-05, "loss": 0.0981, "step": 11522 }, { "epoch": 0.20406699517857854, "grad_norm": 1.5605107545852661, "learning_rate": 2.767871192777532e-05, "loss": 0.1396, "step": 11523 }, { "epoch": 0.20408470471560697, "grad_norm": 1.1568926572799683, "learning_rate": 2.7678252149516736e-05, "loss": 0.1164, "step": 11524 }, { "epoch": 0.2041024142526354, "grad_norm": 0.9310675263404846, "learning_rate": 2.767779232954799e-05, "loss": 0.1172, "step": 11525 }, { "epoch": 0.20412012378966382, "grad_norm": 1.0711959600448608, "learning_rate": 2.7677332467870586e-05, "loss": 0.1193, "step": 11526 }, { "epoch": 0.20413783332669225, "grad_norm": 0.8505004048347473, "learning_rate": 2.767687256448604e-05, "loss": 0.101, "step": 11527 }, { "epoch": 0.20415554286372067, "grad_norm": 0.8626888394355774, "learning_rate": 2.7676412619395863e-05, "loss": 0.1136, "step": 11528 }, { "epoch": 0.2041732524007491, "grad_norm": 1.214995265007019, "learning_rate": 2.7675952632601572e-05, "loss": 0.1203, "step": 11529 }, { "epoch": 0.20419096193777755, "grad_norm": 0.8864603638648987, "learning_rate": 2.7675492604104685e-05, "loss": 0.1132, "step": 11530 }, { "epoch": 0.20420867147480598, "grad_norm": 1.195416808128357, "learning_rate": 2.7675032533906706e-05, "loss": 0.1554, "step": 11531 }, { "epoch": 0.2042263810118344, "grad_norm": 1.0326671600341797, "learning_rate": 2.767457242200915e-05, "loss": 0.1066, "step": 11532 }, { "epoch": 0.20424409054886283, "grad_norm": 1.0371402502059937, "learning_rate": 2.767411226841353e-05, "loss": 0.1345, "step": 11533 }, { "epoch": 0.20426180008589126, "grad_norm": 1.0127053260803223, "learning_rate": 2.767365207312137e-05, "loss": 0.1204, "step": 11534 }, { "epoch": 0.20427950962291969, "grad_norm": 0.9172870516777039, "learning_rate": 2.7673191836134168e-05, "loss": 0.1026, "step": 11535 }, { "epoch": 0.2042972191599481, "grad_norm": 1.785683035850525, "learning_rate": 2.767273155745345e-05, "loss": 0.1331, "step": 11536 }, { "epoch": 0.20431492869697654, "grad_norm": 1.0028772354125977, "learning_rate": 2.7672271237080734e-05, "loss": 0.1075, "step": 11537 }, { "epoch": 0.20433263823400497, "grad_norm": 0.4650551676750183, "learning_rate": 2.767181087501752e-05, "loss": 0.0887, "step": 11538 }, { "epoch": 0.2043503477710334, "grad_norm": 1.2398254871368408, "learning_rate": 2.7671350471265334e-05, "loss": 0.1192, "step": 11539 }, { "epoch": 0.20436805730806182, "grad_norm": 0.9619360566139221, "learning_rate": 2.7670890025825685e-05, "loss": 0.069, "step": 11540 }, { "epoch": 0.20438576684509024, "grad_norm": 1.3129627704620361, "learning_rate": 2.7670429538700088e-05, "loss": 0.1078, "step": 11541 }, { "epoch": 0.20440347638211867, "grad_norm": 0.8601673245429993, "learning_rate": 2.7669969009890058e-05, "loss": 0.0806, "step": 11542 }, { "epoch": 0.2044211859191471, "grad_norm": 0.5660907030105591, "learning_rate": 2.7669508439397113e-05, "loss": 0.0664, "step": 11543 }, { "epoch": 0.20443889545617552, "grad_norm": 1.1289384365081787, "learning_rate": 2.7669047827222765e-05, "loss": 0.1056, "step": 11544 }, { "epoch": 0.20445660499320398, "grad_norm": 1.0973588228225708, "learning_rate": 2.7668587173368534e-05, "loss": 0.137, "step": 11545 }, { "epoch": 0.2044743145302324, "grad_norm": 1.47423255443573, "learning_rate": 2.7668126477835932e-05, "loss": 0.1474, "step": 11546 }, { "epoch": 0.20449202406726083, "grad_norm": 0.7369469404220581, "learning_rate": 2.7667665740626477e-05, "loss": 0.0915, "step": 11547 }, { "epoch": 0.20450973360428926, "grad_norm": 1.1877800226211548, "learning_rate": 2.766720496174168e-05, "loss": 0.1192, "step": 11548 }, { "epoch": 0.20452744314131768, "grad_norm": 0.8853763341903687, "learning_rate": 2.7666744141183063e-05, "loss": 0.1005, "step": 11549 }, { "epoch": 0.2045451526783461, "grad_norm": 1.0297356843948364, "learning_rate": 2.7666283278952134e-05, "loss": 0.0858, "step": 11550 }, { "epoch": 0.20456286221537454, "grad_norm": 0.6956087350845337, "learning_rate": 2.7665822375050416e-05, "loss": 0.0798, "step": 11551 }, { "epoch": 0.20458057175240296, "grad_norm": 0.7301480174064636, "learning_rate": 2.7665361429479424e-05, "loss": 0.1121, "step": 11552 }, { "epoch": 0.2045982812894314, "grad_norm": 0.6124001741409302, "learning_rate": 2.7664900442240676e-05, "loss": 0.0991, "step": 11553 }, { "epoch": 0.20461599082645981, "grad_norm": 0.6614834666252136, "learning_rate": 2.7664439413335682e-05, "loss": 0.1385, "step": 11554 }, { "epoch": 0.20463370036348824, "grad_norm": 0.6804279685020447, "learning_rate": 2.7663978342765968e-05, "loss": 0.1009, "step": 11555 }, { "epoch": 0.20465140990051667, "grad_norm": 0.5479106903076172, "learning_rate": 2.766351723053304e-05, "loss": 0.0923, "step": 11556 }, { "epoch": 0.2046691194375451, "grad_norm": 0.5901158452033997, "learning_rate": 2.7663056076638432e-05, "loss": 0.0803, "step": 11557 }, { "epoch": 0.20468682897457352, "grad_norm": 0.7931162714958191, "learning_rate": 2.766259488108364e-05, "loss": 0.0874, "step": 11558 }, { "epoch": 0.20470453851160195, "grad_norm": 1.165224313735962, "learning_rate": 2.7662133643870195e-05, "loss": 0.1427, "step": 11559 }, { "epoch": 0.2047222480486304, "grad_norm": 1.4970811605453491, "learning_rate": 2.7661672364999612e-05, "loss": 0.1167, "step": 11560 }, { "epoch": 0.20473995758565883, "grad_norm": 0.4542124569416046, "learning_rate": 2.766121104447341e-05, "loss": 0.1405, "step": 11561 }, { "epoch": 0.20475766712268725, "grad_norm": 0.7803812026977539, "learning_rate": 2.7660749682293098e-05, "loss": 0.099, "step": 11562 }, { "epoch": 0.20477537665971568, "grad_norm": 0.6434224247932434, "learning_rate": 2.76602882784602e-05, "loss": 0.0904, "step": 11563 }, { "epoch": 0.2047930861967441, "grad_norm": 0.8307257294654846, "learning_rate": 2.765982683297624e-05, "loss": 0.0841, "step": 11564 }, { "epoch": 0.20481079573377253, "grad_norm": 1.1814570426940918, "learning_rate": 2.7659365345842728e-05, "loss": 0.1165, "step": 11565 }, { "epoch": 0.20482850527080096, "grad_norm": 0.7853264212608337, "learning_rate": 2.7658903817061183e-05, "loss": 0.1126, "step": 11566 }, { "epoch": 0.20484621480782939, "grad_norm": 0.9896315932273865, "learning_rate": 2.7658442246633125e-05, "loss": 0.1044, "step": 11567 }, { "epoch": 0.2048639243448578, "grad_norm": 0.9274057745933533, "learning_rate": 2.7657980634560073e-05, "loss": 0.0776, "step": 11568 }, { "epoch": 0.20488163388188624, "grad_norm": 1.220134973526001, "learning_rate": 2.7657518980843546e-05, "loss": 0.1018, "step": 11569 }, { "epoch": 0.20489934341891466, "grad_norm": 0.7967913746833801, "learning_rate": 2.765705728548506e-05, "loss": 0.0847, "step": 11570 }, { "epoch": 0.2049170529559431, "grad_norm": 1.3334858417510986, "learning_rate": 2.7656595548486134e-05, "loss": 0.1461, "step": 11571 }, { "epoch": 0.20493476249297152, "grad_norm": 0.9488868117332458, "learning_rate": 2.7656133769848294e-05, "loss": 0.0756, "step": 11572 }, { "epoch": 0.20495247202999994, "grad_norm": 1.3819502592086792, "learning_rate": 2.765567194957305e-05, "loss": 0.1397, "step": 11573 }, { "epoch": 0.2049701815670284, "grad_norm": 0.9417620301246643, "learning_rate": 2.7655210087661926e-05, "loss": 0.0956, "step": 11574 }, { "epoch": 0.20498789110405682, "grad_norm": 1.094862937927246, "learning_rate": 2.765474818411644e-05, "loss": 0.1208, "step": 11575 }, { "epoch": 0.20500560064108525, "grad_norm": 0.6362271308898926, "learning_rate": 2.7654286238938117e-05, "loss": 0.0573, "step": 11576 }, { "epoch": 0.20502331017811368, "grad_norm": 0.8253543972969055, "learning_rate": 2.765382425212847e-05, "loss": 0.1167, "step": 11577 }, { "epoch": 0.2050410197151421, "grad_norm": 0.7696610689163208, "learning_rate": 2.7653362223689016e-05, "loss": 0.061, "step": 11578 }, { "epoch": 0.20505872925217053, "grad_norm": 1.4968370199203491, "learning_rate": 2.7652900153621288e-05, "loss": 0.1101, "step": 11579 }, { "epoch": 0.20507643878919896, "grad_norm": 0.8628866672515869, "learning_rate": 2.765243804192679e-05, "loss": 0.1078, "step": 11580 }, { "epoch": 0.20509414832622738, "grad_norm": 1.1002434492111206, "learning_rate": 2.7651975888607056e-05, "loss": 0.0918, "step": 11581 }, { "epoch": 0.2051118578632558, "grad_norm": 1.229506254196167, "learning_rate": 2.76515136936636e-05, "loss": 0.0993, "step": 11582 }, { "epoch": 0.20512956740028424, "grad_norm": 1.0020452737808228, "learning_rate": 2.765105145709794e-05, "loss": 0.1013, "step": 11583 }, { "epoch": 0.20514727693731266, "grad_norm": 0.5701237320899963, "learning_rate": 2.765058917891161e-05, "loss": 0.1393, "step": 11584 }, { "epoch": 0.2051649864743411, "grad_norm": 0.8835271596908569, "learning_rate": 2.7650126859106114e-05, "loss": 0.1235, "step": 11585 }, { "epoch": 0.20518269601136951, "grad_norm": 0.6547019481658936, "learning_rate": 2.764966449768298e-05, "loss": 0.0649, "step": 11586 }, { "epoch": 0.20520040554839794, "grad_norm": 1.1562654972076416, "learning_rate": 2.7649202094643733e-05, "loss": 0.0858, "step": 11587 }, { "epoch": 0.20521811508542637, "grad_norm": 0.8791245818138123, "learning_rate": 2.7648739649989887e-05, "loss": 0.1201, "step": 11588 }, { "epoch": 0.20523582462245482, "grad_norm": 0.6135692596435547, "learning_rate": 2.7648277163722966e-05, "loss": 0.1147, "step": 11589 }, { "epoch": 0.20525353415948325, "grad_norm": 0.7188690900802612, "learning_rate": 2.7647814635844494e-05, "loss": 0.1049, "step": 11590 }, { "epoch": 0.20527124369651167, "grad_norm": 0.6784407496452332, "learning_rate": 2.764735206635599e-05, "loss": 0.0679, "step": 11591 }, { "epoch": 0.2052889532335401, "grad_norm": 0.762392520904541, "learning_rate": 2.764688945525898e-05, "loss": 0.1184, "step": 11592 }, { "epoch": 0.20530666277056853, "grad_norm": 0.7440862655639648, "learning_rate": 2.7646426802554977e-05, "loss": 0.0806, "step": 11593 }, { "epoch": 0.20532437230759695, "grad_norm": 0.9424849152565002, "learning_rate": 2.764596410824552e-05, "loss": 0.1302, "step": 11594 }, { "epoch": 0.20534208184462538, "grad_norm": 0.7651657462120056, "learning_rate": 2.7645501372332107e-05, "loss": 0.0986, "step": 11595 }, { "epoch": 0.2053597913816538, "grad_norm": 1.0404527187347412, "learning_rate": 2.7645038594816284e-05, "loss": 0.1204, "step": 11596 }, { "epoch": 0.20537750091868223, "grad_norm": 1.0744816064834595, "learning_rate": 2.7644575775699557e-05, "loss": 0.1549, "step": 11597 }, { "epoch": 0.20539521045571066, "grad_norm": 1.0224025249481201, "learning_rate": 2.7644112914983456e-05, "loss": 0.1074, "step": 11598 }, { "epoch": 0.20541291999273908, "grad_norm": 0.5550744533538818, "learning_rate": 2.7643650012669503e-05, "loss": 0.0877, "step": 11599 }, { "epoch": 0.2054306295297675, "grad_norm": 0.9255151748657227, "learning_rate": 2.764318706875922e-05, "loss": 0.1374, "step": 11600 }, { "epoch": 0.20544833906679594, "grad_norm": 0.6069380640983582, "learning_rate": 2.764272408325413e-05, "loss": 0.1051, "step": 11601 }, { "epoch": 0.20546604860382436, "grad_norm": 1.0359083414077759, "learning_rate": 2.7642261056155755e-05, "loss": 0.0971, "step": 11602 }, { "epoch": 0.2054837581408528, "grad_norm": 0.8926191329956055, "learning_rate": 2.764179798746562e-05, "loss": 0.1052, "step": 11603 }, { "epoch": 0.20550146767788124, "grad_norm": 0.7590829133987427, "learning_rate": 2.764133487718525e-05, "loss": 0.0833, "step": 11604 }, { "epoch": 0.20551917721490967, "grad_norm": 0.5724372267723083, "learning_rate": 2.7640871725316165e-05, "loss": 0.0931, "step": 11605 }, { "epoch": 0.2055368867519381, "grad_norm": 0.7404726147651672, "learning_rate": 2.7640408531859895e-05, "loss": 0.0979, "step": 11606 }, { "epoch": 0.20555459628896652, "grad_norm": 0.891279935836792, "learning_rate": 2.7639945296817955e-05, "loss": 0.0787, "step": 11607 }, { "epoch": 0.20557230582599495, "grad_norm": 0.8100512027740479, "learning_rate": 2.7639482020191876e-05, "loss": 0.1044, "step": 11608 }, { "epoch": 0.20559001536302338, "grad_norm": 1.4056679010391235, "learning_rate": 2.7639018701983177e-05, "loss": 0.1306, "step": 11609 }, { "epoch": 0.2056077249000518, "grad_norm": 0.6681508421897888, "learning_rate": 2.763855534219339e-05, "loss": 0.0986, "step": 11610 }, { "epoch": 0.20562543443708023, "grad_norm": 1.71709144115448, "learning_rate": 2.763809194082403e-05, "loss": 0.107, "step": 11611 }, { "epoch": 0.20564314397410866, "grad_norm": 0.7057551145553589, "learning_rate": 2.7637628497876623e-05, "loss": 0.1091, "step": 11612 }, { "epoch": 0.20566085351113708, "grad_norm": 1.060078501701355, "learning_rate": 2.7637165013352703e-05, "loss": 0.0944, "step": 11613 }, { "epoch": 0.2056785630481655, "grad_norm": 0.8180335760116577, "learning_rate": 2.7636701487253784e-05, "loss": 0.0808, "step": 11614 }, { "epoch": 0.20569627258519393, "grad_norm": 0.635570764541626, "learning_rate": 2.7636237919581396e-05, "loss": 0.122, "step": 11615 }, { "epoch": 0.20571398212222236, "grad_norm": 0.9067633152008057, "learning_rate": 2.7635774310337062e-05, "loss": 0.1496, "step": 11616 }, { "epoch": 0.2057316916592508, "grad_norm": 0.6398248076438904, "learning_rate": 2.7635310659522313e-05, "loss": 0.1028, "step": 11617 }, { "epoch": 0.2057494011962792, "grad_norm": 0.957728385925293, "learning_rate": 2.7634846967138666e-05, "loss": 0.1078, "step": 11618 }, { "epoch": 0.20576711073330767, "grad_norm": 0.8300969004631042, "learning_rate": 2.7634383233187653e-05, "loss": 0.0777, "step": 11619 }, { "epoch": 0.2057848202703361, "grad_norm": 0.8737062811851501, "learning_rate": 2.7633919457670795e-05, "loss": 0.0824, "step": 11620 }, { "epoch": 0.20580252980736452, "grad_norm": 1.1857364177703857, "learning_rate": 2.763345564058962e-05, "loss": 0.1291, "step": 11621 }, { "epoch": 0.20582023934439295, "grad_norm": 0.9847942590713501, "learning_rate": 2.763299178194565e-05, "loss": 0.0873, "step": 11622 }, { "epoch": 0.20583794888142137, "grad_norm": 1.1420204639434814, "learning_rate": 2.763252788174042e-05, "loss": 0.1169, "step": 11623 }, { "epoch": 0.2058556584184498, "grad_norm": 0.7095332145690918, "learning_rate": 2.763206393997545e-05, "loss": 0.1237, "step": 11624 }, { "epoch": 0.20587336795547823, "grad_norm": 0.6972426772117615, "learning_rate": 2.763159995665227e-05, "loss": 0.1163, "step": 11625 }, { "epoch": 0.20589107749250665, "grad_norm": 0.5183624625205994, "learning_rate": 2.76311359317724e-05, "loss": 0.0817, "step": 11626 }, { "epoch": 0.20590878702953508, "grad_norm": 0.9310973286628723, "learning_rate": 2.7630671865337374e-05, "loss": 0.1075, "step": 11627 }, { "epoch": 0.2059264965665635, "grad_norm": 0.6327198147773743, "learning_rate": 2.7630207757348714e-05, "loss": 0.1122, "step": 11628 }, { "epoch": 0.20594420610359193, "grad_norm": 0.8608894348144531, "learning_rate": 2.7629743607807944e-05, "loss": 0.1076, "step": 11629 }, { "epoch": 0.20596191564062036, "grad_norm": 1.1206817626953125, "learning_rate": 2.7629279416716597e-05, "loss": 0.1324, "step": 11630 }, { "epoch": 0.20597962517764878, "grad_norm": 1.6250510215759277, "learning_rate": 2.7628815184076197e-05, "loss": 0.0944, "step": 11631 }, { "epoch": 0.2059973347146772, "grad_norm": 0.6868305802345276, "learning_rate": 2.7628350909888275e-05, "loss": 0.1037, "step": 11632 }, { "epoch": 0.20601504425170564, "grad_norm": 0.5381285548210144, "learning_rate": 2.7627886594154356e-05, "loss": 0.058, "step": 11633 }, { "epoch": 0.2060327537887341, "grad_norm": 0.8741653561592102, "learning_rate": 2.7627422236875965e-05, "loss": 0.0696, "step": 11634 }, { "epoch": 0.20605046332576252, "grad_norm": 1.0197042226791382, "learning_rate": 2.7626957838054632e-05, "loss": 0.1225, "step": 11635 }, { "epoch": 0.20606817286279094, "grad_norm": 0.6807467937469482, "learning_rate": 2.7626493397691884e-05, "loss": 0.134, "step": 11636 }, { "epoch": 0.20608588239981937, "grad_norm": 0.7143934965133667, "learning_rate": 2.7626028915789254e-05, "loss": 0.1164, "step": 11637 }, { "epoch": 0.2061035919368478, "grad_norm": 1.1750067472457886, "learning_rate": 2.7625564392348263e-05, "loss": 0.1544, "step": 11638 }, { "epoch": 0.20612130147387622, "grad_norm": 1.3600940704345703, "learning_rate": 2.762509982737044e-05, "loss": 0.1577, "step": 11639 }, { "epoch": 0.20613901101090465, "grad_norm": 1.1025776863098145, "learning_rate": 2.762463522085732e-05, "loss": 0.1116, "step": 11640 }, { "epoch": 0.20615672054793308, "grad_norm": 0.8866145014762878, "learning_rate": 2.7624170572810425e-05, "loss": 0.0843, "step": 11641 }, { "epoch": 0.2061744300849615, "grad_norm": 1.2992502450942993, "learning_rate": 2.7623705883231288e-05, "loss": 0.1253, "step": 11642 }, { "epoch": 0.20619213962198993, "grad_norm": 1.1870567798614502, "learning_rate": 2.762324115212143e-05, "loss": 0.0923, "step": 11643 }, { "epoch": 0.20620984915901835, "grad_norm": 0.7999727129936218, "learning_rate": 2.7622776379482392e-05, "loss": 0.1153, "step": 11644 }, { "epoch": 0.20622755869604678, "grad_norm": 0.6937324404716492, "learning_rate": 2.762231156531569e-05, "loss": 0.0959, "step": 11645 }, { "epoch": 0.2062452682330752, "grad_norm": 0.955723226070404, "learning_rate": 2.7621846709622864e-05, "loss": 0.1045, "step": 11646 }, { "epoch": 0.20626297777010363, "grad_norm": 0.7016438245773315, "learning_rate": 2.7621381812405438e-05, "loss": 0.1142, "step": 11647 }, { "epoch": 0.20628068730713206, "grad_norm": 0.8245543241500854, "learning_rate": 2.762091687366494e-05, "loss": 0.0809, "step": 11648 }, { "epoch": 0.20629839684416051, "grad_norm": 0.9068359136581421, "learning_rate": 2.7620451893402906e-05, "loss": 0.099, "step": 11649 }, { "epoch": 0.20631610638118894, "grad_norm": 0.7983658909797668, "learning_rate": 2.761998687162086e-05, "loss": 0.1138, "step": 11650 }, { "epoch": 0.20633381591821737, "grad_norm": 0.786601722240448, "learning_rate": 2.7619521808320336e-05, "loss": 0.088, "step": 11651 }, { "epoch": 0.2063515254552458, "grad_norm": 0.6256248354911804, "learning_rate": 2.7619056703502856e-05, "loss": 0.0608, "step": 11652 }, { "epoch": 0.20636923499227422, "grad_norm": 0.9993883371353149, "learning_rate": 2.7618591557169957e-05, "loss": 0.0922, "step": 11653 }, { "epoch": 0.20638694452930265, "grad_norm": 1.2553242444992065, "learning_rate": 2.7618126369323174e-05, "loss": 0.1025, "step": 11654 }, { "epoch": 0.20640465406633107, "grad_norm": 1.2184028625488281, "learning_rate": 2.7617661139964024e-05, "loss": 0.0794, "step": 11655 }, { "epoch": 0.2064223636033595, "grad_norm": 1.0439774990081787, "learning_rate": 2.761719586909405e-05, "loss": 0.1141, "step": 11656 }, { "epoch": 0.20644007314038793, "grad_norm": 1.0940027236938477, "learning_rate": 2.7616730556714778e-05, "loss": 0.0919, "step": 11657 }, { "epoch": 0.20645778267741635, "grad_norm": 0.9243964552879333, "learning_rate": 2.7616265202827734e-05, "loss": 0.0899, "step": 11658 }, { "epoch": 0.20647549221444478, "grad_norm": 0.9444183707237244, "learning_rate": 2.761579980743446e-05, "loss": 0.0965, "step": 11659 }, { "epoch": 0.2064932017514732, "grad_norm": 0.7154809832572937, "learning_rate": 2.7615334370536476e-05, "loss": 0.0988, "step": 11660 }, { "epoch": 0.20651091128850163, "grad_norm": 0.5364027619361877, "learning_rate": 2.761486889213532e-05, "loss": 0.0753, "step": 11661 }, { "epoch": 0.20652862082553006, "grad_norm": 0.7313262224197388, "learning_rate": 2.761440337223252e-05, "loss": 0.097, "step": 11662 }, { "epoch": 0.20654633036255848, "grad_norm": 0.8035022020339966, "learning_rate": 2.761393781082961e-05, "loss": 0.1059, "step": 11663 }, { "epoch": 0.20656403989958694, "grad_norm": 0.7815883159637451, "learning_rate": 2.7613472207928116e-05, "loss": 0.0756, "step": 11664 }, { "epoch": 0.20658174943661536, "grad_norm": 1.3260442018508911, "learning_rate": 2.7613006563529578e-05, "loss": 0.1357, "step": 11665 }, { "epoch": 0.2065994589736438, "grad_norm": 0.7842608094215393, "learning_rate": 2.7612540877635522e-05, "loss": 0.1239, "step": 11666 }, { "epoch": 0.20661716851067222, "grad_norm": 0.8300888538360596, "learning_rate": 2.7612075150247485e-05, "loss": 0.0986, "step": 11667 }, { "epoch": 0.20663487804770064, "grad_norm": 0.9584978222846985, "learning_rate": 2.7611609381367e-05, "loss": 0.1118, "step": 11668 }, { "epoch": 0.20665258758472907, "grad_norm": 1.1301754713058472, "learning_rate": 2.7611143570995587e-05, "loss": 0.1459, "step": 11669 }, { "epoch": 0.2066702971217575, "grad_norm": 1.0448955297470093, "learning_rate": 2.761067771913479e-05, "loss": 0.1374, "step": 11670 }, { "epoch": 0.20668800665878592, "grad_norm": 0.70539391040802, "learning_rate": 2.761021182578614e-05, "loss": 0.0872, "step": 11671 }, { "epoch": 0.20670571619581435, "grad_norm": 1.1495946645736694, "learning_rate": 2.760974589095117e-05, "loss": 0.1569, "step": 11672 }, { "epoch": 0.20672342573284277, "grad_norm": 1.0856475830078125, "learning_rate": 2.760927991463141e-05, "loss": 0.087, "step": 11673 }, { "epoch": 0.2067411352698712, "grad_norm": 1.1163794994354248, "learning_rate": 2.7608813896828396e-05, "loss": 0.098, "step": 11674 }, { "epoch": 0.20675884480689963, "grad_norm": 0.5280570387840271, "learning_rate": 2.760834783754366e-05, "loss": 0.0927, "step": 11675 }, { "epoch": 0.20677655434392805, "grad_norm": 0.8274634480476379, "learning_rate": 2.760788173677873e-05, "loss": 0.0963, "step": 11676 }, { "epoch": 0.20679426388095648, "grad_norm": 0.9924614429473877, "learning_rate": 2.7607415594535147e-05, "loss": 0.1189, "step": 11677 }, { "epoch": 0.2068119734179849, "grad_norm": 0.680629551410675, "learning_rate": 2.7606949410814444e-05, "loss": 0.1481, "step": 11678 }, { "epoch": 0.20682968295501336, "grad_norm": 1.1189100742340088, "learning_rate": 2.7606483185618147e-05, "loss": 0.1515, "step": 11679 }, { "epoch": 0.2068473924920418, "grad_norm": 1.088653564453125, "learning_rate": 2.76060169189478e-05, "loss": 0.1276, "step": 11680 }, { "epoch": 0.2068651020290702, "grad_norm": 0.5978455543518066, "learning_rate": 2.760555061080493e-05, "loss": 0.079, "step": 11681 }, { "epoch": 0.20688281156609864, "grad_norm": 0.6258072257041931, "learning_rate": 2.7605084261191077e-05, "loss": 0.0696, "step": 11682 }, { "epoch": 0.20690052110312707, "grad_norm": 1.564361810684204, "learning_rate": 2.7604617870107768e-05, "loss": 0.1723, "step": 11683 }, { "epoch": 0.2069182306401555, "grad_norm": 0.8881898522377014, "learning_rate": 2.7604151437556545e-05, "loss": 0.1035, "step": 11684 }, { "epoch": 0.20693594017718392, "grad_norm": 0.9535655379295349, "learning_rate": 2.7603684963538932e-05, "loss": 0.1161, "step": 11685 }, { "epoch": 0.20695364971421235, "grad_norm": 0.8048695921897888, "learning_rate": 2.7603218448056474e-05, "loss": 0.0958, "step": 11686 }, { "epoch": 0.20697135925124077, "grad_norm": 1.0383853912353516, "learning_rate": 2.7602751891110704e-05, "loss": 0.1002, "step": 11687 }, { "epoch": 0.2069890687882692, "grad_norm": 0.9764443039894104, "learning_rate": 2.7602285292703152e-05, "loss": 0.0803, "step": 11688 }, { "epoch": 0.20700677832529762, "grad_norm": 0.5056140422821045, "learning_rate": 2.760181865283536e-05, "loss": 0.0603, "step": 11689 }, { "epoch": 0.20702448786232605, "grad_norm": 0.8026083707809448, "learning_rate": 2.7601351971508856e-05, "loss": 0.1051, "step": 11690 }, { "epoch": 0.20704219739935448, "grad_norm": 0.620967447757721, "learning_rate": 2.760088524872518e-05, "loss": 0.1022, "step": 11691 }, { "epoch": 0.2070599069363829, "grad_norm": 1.0588234663009644, "learning_rate": 2.7600418484485868e-05, "loss": 0.1231, "step": 11692 }, { "epoch": 0.20707761647341133, "grad_norm": 1.0054032802581787, "learning_rate": 2.759995167879245e-05, "loss": 0.1055, "step": 11693 }, { "epoch": 0.20709532601043978, "grad_norm": 0.8627681732177734, "learning_rate": 2.7599484831646467e-05, "loss": 0.1131, "step": 11694 }, { "epoch": 0.2071130355474682, "grad_norm": 0.6159939169883728, "learning_rate": 2.7599017943049452e-05, "loss": 0.0788, "step": 11695 }, { "epoch": 0.20713074508449664, "grad_norm": 1.0001524686813354, "learning_rate": 2.7598551013002945e-05, "loss": 0.1016, "step": 11696 }, { "epoch": 0.20714845462152506, "grad_norm": 0.8129002451896667, "learning_rate": 2.7598084041508476e-05, "loss": 0.1015, "step": 11697 }, { "epoch": 0.2071661641585535, "grad_norm": 0.9006738066673279, "learning_rate": 2.7597617028567587e-05, "loss": 0.0844, "step": 11698 }, { "epoch": 0.20718387369558192, "grad_norm": 0.7723904848098755, "learning_rate": 2.759714997418181e-05, "loss": 0.1051, "step": 11699 }, { "epoch": 0.20720158323261034, "grad_norm": 1.0480812788009644, "learning_rate": 2.7596682878352682e-05, "loss": 0.1445, "step": 11700 }, { "epoch": 0.20721929276963877, "grad_norm": 0.8488158583641052, "learning_rate": 2.7596215741081746e-05, "loss": 0.1007, "step": 11701 }, { "epoch": 0.2072370023066672, "grad_norm": 1.09621000289917, "learning_rate": 2.7595748562370536e-05, "loss": 0.1114, "step": 11702 }, { "epoch": 0.20725471184369562, "grad_norm": 0.6772289276123047, "learning_rate": 2.759528134222058e-05, "loss": 0.0748, "step": 11703 }, { "epoch": 0.20727242138072405, "grad_norm": 0.6742847561836243, "learning_rate": 2.7594814080633427e-05, "loss": 0.1353, "step": 11704 }, { "epoch": 0.20729013091775247, "grad_norm": 1.0278887748718262, "learning_rate": 2.7594346777610605e-05, "loss": 0.095, "step": 11705 }, { "epoch": 0.2073078404547809, "grad_norm": 0.8906141519546509, "learning_rate": 2.759387943315366e-05, "loss": 0.1419, "step": 11706 }, { "epoch": 0.20732554999180933, "grad_norm": 1.4195539951324463, "learning_rate": 2.759341204726413e-05, "loss": 0.1134, "step": 11707 }, { "epoch": 0.20734325952883778, "grad_norm": 1.106509804725647, "learning_rate": 2.7592944619943538e-05, "loss": 0.0887, "step": 11708 }, { "epoch": 0.2073609690658662, "grad_norm": 0.9223926067352295, "learning_rate": 2.7592477151193444e-05, "loss": 0.1277, "step": 11709 }, { "epoch": 0.20737867860289463, "grad_norm": 0.8519135117530823, "learning_rate": 2.7592009641015362e-05, "loss": 0.1307, "step": 11710 }, { "epoch": 0.20739638813992306, "grad_norm": 1.1892445087432861, "learning_rate": 2.7591542089410846e-05, "loss": 0.1275, "step": 11711 }, { "epoch": 0.2074140976769515, "grad_norm": 0.8545219302177429, "learning_rate": 2.759107449638143e-05, "loss": 0.1033, "step": 11712 }, { "epoch": 0.2074318072139799, "grad_norm": 0.9253093600273132, "learning_rate": 2.7590606861928655e-05, "loss": 0.106, "step": 11713 }, { "epoch": 0.20744951675100834, "grad_norm": 0.5086922645568848, "learning_rate": 2.7590139186054055e-05, "loss": 0.0684, "step": 11714 }, { "epoch": 0.20746722628803677, "grad_norm": 0.9772682785987854, "learning_rate": 2.758967146875917e-05, "loss": 0.0936, "step": 11715 }, { "epoch": 0.2074849358250652, "grad_norm": 0.6769326329231262, "learning_rate": 2.7589203710045543e-05, "loss": 0.0762, "step": 11716 }, { "epoch": 0.20750264536209362, "grad_norm": 0.8714045882225037, "learning_rate": 2.7588735909914706e-05, "loss": 0.0915, "step": 11717 }, { "epoch": 0.20752035489912204, "grad_norm": 0.6764907240867615, "learning_rate": 2.75882680683682e-05, "loss": 0.0969, "step": 11718 }, { "epoch": 0.20753806443615047, "grad_norm": 0.9137148261070251, "learning_rate": 2.7587800185407567e-05, "loss": 0.1008, "step": 11719 }, { "epoch": 0.2075557739731789, "grad_norm": 1.19169282913208, "learning_rate": 2.758733226103434e-05, "loss": 0.1031, "step": 11720 }, { "epoch": 0.20757348351020732, "grad_norm": 1.096677541732788, "learning_rate": 2.7586864295250068e-05, "loss": 0.0982, "step": 11721 }, { "epoch": 0.20759119304723575, "grad_norm": 1.0399796962738037, "learning_rate": 2.7586396288056288e-05, "loss": 0.1213, "step": 11722 }, { "epoch": 0.2076089025842642, "grad_norm": 1.1610791683197021, "learning_rate": 2.7585928239454533e-05, "loss": 0.1243, "step": 11723 }, { "epoch": 0.20762661212129263, "grad_norm": 1.1551915407180786, "learning_rate": 2.7585460149446345e-05, "loss": 0.1113, "step": 11724 }, { "epoch": 0.20764432165832106, "grad_norm": 0.9721426367759705, "learning_rate": 2.7584992018033273e-05, "loss": 0.0975, "step": 11725 }, { "epoch": 0.20766203119534948, "grad_norm": 0.9985890984535217, "learning_rate": 2.7584523845216844e-05, "loss": 0.1159, "step": 11726 }, { "epoch": 0.2076797407323779, "grad_norm": 0.8682447075843811, "learning_rate": 2.758405563099861e-05, "loss": 0.1298, "step": 11727 }, { "epoch": 0.20769745026940634, "grad_norm": 0.682462751865387, "learning_rate": 2.7583587375380102e-05, "loss": 0.0884, "step": 11728 }, { "epoch": 0.20771515980643476, "grad_norm": 0.8022685647010803, "learning_rate": 2.7583119078362862e-05, "loss": 0.0917, "step": 11729 }, { "epoch": 0.2077328693434632, "grad_norm": 1.2263027429580688, "learning_rate": 2.7582650739948432e-05, "loss": 0.1713, "step": 11730 }, { "epoch": 0.20775057888049162, "grad_norm": 1.2492564916610718, "learning_rate": 2.758218236013836e-05, "loss": 0.1514, "step": 11731 }, { "epoch": 0.20776828841752004, "grad_norm": 1.1797078847885132, "learning_rate": 2.758171393893418e-05, "loss": 0.0866, "step": 11732 }, { "epoch": 0.20778599795454847, "grad_norm": 1.1596171855926514, "learning_rate": 2.758124547633743e-05, "loss": 0.1358, "step": 11733 }, { "epoch": 0.2078037074915769, "grad_norm": 1.3211497068405151, "learning_rate": 2.7580776972349655e-05, "loss": 0.0763, "step": 11734 }, { "epoch": 0.20782141702860532, "grad_norm": 0.7723435163497925, "learning_rate": 2.7580308426972398e-05, "loss": 0.1024, "step": 11735 }, { "epoch": 0.20783912656563375, "grad_norm": 0.9859676957130432, "learning_rate": 2.75798398402072e-05, "loss": 0.1239, "step": 11736 }, { "epoch": 0.20785683610266217, "grad_norm": 0.9071505665779114, "learning_rate": 2.7579371212055603e-05, "loss": 0.1354, "step": 11737 }, { "epoch": 0.20787454563969063, "grad_norm": 1.1025660037994385, "learning_rate": 2.757890254251914e-05, "loss": 0.0715, "step": 11738 }, { "epoch": 0.20789225517671905, "grad_norm": 0.8727431297302246, "learning_rate": 2.7578433831599367e-05, "loss": 0.0908, "step": 11739 }, { "epoch": 0.20790996471374748, "grad_norm": 0.8453667759895325, "learning_rate": 2.7577965079297812e-05, "loss": 0.0876, "step": 11740 }, { "epoch": 0.2079276742507759, "grad_norm": 2.035555839538574, "learning_rate": 2.757749628561603e-05, "loss": 0.1199, "step": 11741 }, { "epoch": 0.20794538378780433, "grad_norm": 0.46571245789527893, "learning_rate": 2.7577027450555552e-05, "loss": 0.0891, "step": 11742 }, { "epoch": 0.20796309332483276, "grad_norm": 1.1494568586349487, "learning_rate": 2.757655857411793e-05, "loss": 0.0914, "step": 11743 }, { "epoch": 0.20798080286186119, "grad_norm": 0.6533000469207764, "learning_rate": 2.75760896563047e-05, "loss": 0.1036, "step": 11744 }, { "epoch": 0.2079985123988896, "grad_norm": 0.621880054473877, "learning_rate": 2.757562069711741e-05, "loss": 0.1237, "step": 11745 }, { "epoch": 0.20801622193591804, "grad_norm": 0.8901107907295227, "learning_rate": 2.7575151696557598e-05, "loss": 0.0846, "step": 11746 }, { "epoch": 0.20803393147294647, "grad_norm": 0.7069132328033447, "learning_rate": 2.7574682654626807e-05, "loss": 0.0957, "step": 11747 }, { "epoch": 0.2080516410099749, "grad_norm": 0.7031182050704956, "learning_rate": 2.7574213571326586e-05, "loss": 0.0838, "step": 11748 }, { "epoch": 0.20806935054700332, "grad_norm": 1.2211908102035522, "learning_rate": 2.7573744446658475e-05, "loss": 0.1223, "step": 11749 }, { "epoch": 0.20808706008403174, "grad_norm": 0.6197636127471924, "learning_rate": 2.7573275280624012e-05, "loss": 0.1208, "step": 11750 }, { "epoch": 0.20810476962106017, "grad_norm": 0.7572680711746216, "learning_rate": 2.7572806073224742e-05, "loss": 0.0829, "step": 11751 }, { "epoch": 0.2081224791580886, "grad_norm": 2.825810670852661, "learning_rate": 2.7572336824462215e-05, "loss": 0.1132, "step": 11752 }, { "epoch": 0.20814018869511705, "grad_norm": 0.7482007741928101, "learning_rate": 2.7571867534337974e-05, "loss": 0.0849, "step": 11753 }, { "epoch": 0.20815789823214548, "grad_norm": 0.5981278419494629, "learning_rate": 2.757139820285356e-05, "loss": 0.0884, "step": 11754 }, { "epoch": 0.2081756077691739, "grad_norm": 1.1566272974014282, "learning_rate": 2.7570928830010516e-05, "loss": 0.1559, "step": 11755 }, { "epoch": 0.20819331730620233, "grad_norm": 0.9538241624832153, "learning_rate": 2.757045941581039e-05, "loss": 0.0858, "step": 11756 }, { "epoch": 0.20821102684323076, "grad_norm": 0.6717127561569214, "learning_rate": 2.7569989960254722e-05, "loss": 0.0653, "step": 11757 }, { "epoch": 0.20822873638025918, "grad_norm": 1.2013169527053833, "learning_rate": 2.7569520463345056e-05, "loss": 0.1546, "step": 11758 }, { "epoch": 0.2082464459172876, "grad_norm": 1.0160586833953857, "learning_rate": 2.7569050925082944e-05, "loss": 0.0994, "step": 11759 }, { "epoch": 0.20826415545431604, "grad_norm": 1.42330002784729, "learning_rate": 2.7568581345469923e-05, "loss": 0.1547, "step": 11760 }, { "epoch": 0.20828186499134446, "grad_norm": 0.9350844621658325, "learning_rate": 2.7568111724507542e-05, "loss": 0.0817, "step": 11761 }, { "epoch": 0.2082995745283729, "grad_norm": 1.1023417711257935, "learning_rate": 2.756764206219734e-05, "loss": 0.1138, "step": 11762 }, { "epoch": 0.20831728406540131, "grad_norm": 0.9620265364646912, "learning_rate": 2.756717235854087e-05, "loss": 0.1284, "step": 11763 }, { "epoch": 0.20833499360242974, "grad_norm": 0.8091345429420471, "learning_rate": 2.756670261353968e-05, "loss": 0.1428, "step": 11764 }, { "epoch": 0.20835270313945817, "grad_norm": 0.8590015769004822, "learning_rate": 2.7566232827195298e-05, "loss": 0.1425, "step": 11765 }, { "epoch": 0.2083704126764866, "grad_norm": 0.9750894904136658, "learning_rate": 2.756576299950929e-05, "loss": 0.1322, "step": 11766 }, { "epoch": 0.20838812221351502, "grad_norm": 0.8921986818313599, "learning_rate": 2.756529313048319e-05, "loss": 0.1058, "step": 11767 }, { "epoch": 0.20840583175054347, "grad_norm": 0.9978339076042175, "learning_rate": 2.7564823220118544e-05, "loss": 0.1029, "step": 11768 }, { "epoch": 0.2084235412875719, "grad_norm": 0.8210716247558594, "learning_rate": 2.7564353268416902e-05, "loss": 0.1567, "step": 11769 }, { "epoch": 0.20844125082460033, "grad_norm": 0.8378511667251587, "learning_rate": 2.7563883275379805e-05, "loss": 0.1028, "step": 11770 }, { "epoch": 0.20845896036162875, "grad_norm": 1.0057933330535889, "learning_rate": 2.756341324100881e-05, "loss": 0.1316, "step": 11771 }, { "epoch": 0.20847666989865718, "grad_norm": 0.7635596990585327, "learning_rate": 2.7562943165305453e-05, "loss": 0.1119, "step": 11772 }, { "epoch": 0.2084943794356856, "grad_norm": 0.9905412197113037, "learning_rate": 2.7562473048271285e-05, "loss": 0.1071, "step": 11773 }, { "epoch": 0.20851208897271403, "grad_norm": 1.2149720191955566, "learning_rate": 2.7562002889907848e-05, "loss": 0.1145, "step": 11774 }, { "epoch": 0.20852979850974246, "grad_norm": 1.2331680059432983, "learning_rate": 2.7561532690216694e-05, "loss": 0.1037, "step": 11775 }, { "epoch": 0.20854750804677089, "grad_norm": 1.110383152961731, "learning_rate": 2.756106244919937e-05, "loss": 0.1341, "step": 11776 }, { "epoch": 0.2085652175837993, "grad_norm": 1.340743899345398, "learning_rate": 2.7560592166857416e-05, "loss": 0.1488, "step": 11777 }, { "epoch": 0.20858292712082774, "grad_norm": 0.7375162243843079, "learning_rate": 2.7560121843192386e-05, "loss": 0.1013, "step": 11778 }, { "epoch": 0.20860063665785616, "grad_norm": 1.2402445077896118, "learning_rate": 2.7559651478205826e-05, "loss": 0.1147, "step": 11779 }, { "epoch": 0.2086183461948846, "grad_norm": 0.6304312348365784, "learning_rate": 2.7559181071899284e-05, "loss": 0.1188, "step": 11780 }, { "epoch": 0.20863605573191302, "grad_norm": 0.6584668159484863, "learning_rate": 2.755871062427431e-05, "loss": 0.0732, "step": 11781 }, { "epoch": 0.20865376526894144, "grad_norm": 1.1906758546829224, "learning_rate": 2.7558240135332442e-05, "loss": 0.1295, "step": 11782 }, { "epoch": 0.2086714748059699, "grad_norm": 0.7710145711898804, "learning_rate": 2.7557769605075236e-05, "loss": 0.1243, "step": 11783 }, { "epoch": 0.20868918434299832, "grad_norm": 1.4053064584732056, "learning_rate": 2.7557299033504238e-05, "loss": 0.1028, "step": 11784 }, { "epoch": 0.20870689388002675, "grad_norm": 0.7276658415794373, "learning_rate": 2.7556828420621e-05, "loss": 0.091, "step": 11785 }, { "epoch": 0.20872460341705518, "grad_norm": 0.7241148948669434, "learning_rate": 2.755635776642706e-05, "loss": 0.1025, "step": 11786 }, { "epoch": 0.2087423129540836, "grad_norm": 1.0956202745437622, "learning_rate": 2.7555887070923976e-05, "loss": 0.1451, "step": 11787 }, { "epoch": 0.20876002249111203, "grad_norm": 0.7510272264480591, "learning_rate": 2.7555416334113296e-05, "loss": 0.1226, "step": 11788 }, { "epoch": 0.20877773202814046, "grad_norm": 0.9168498516082764, "learning_rate": 2.7554945555996566e-05, "loss": 0.0709, "step": 11789 }, { "epoch": 0.20879544156516888, "grad_norm": 1.1886014938354492, "learning_rate": 2.755447473657533e-05, "loss": 0.129, "step": 11790 }, { "epoch": 0.2088131511021973, "grad_norm": 1.0516756772994995, "learning_rate": 2.7554003875851145e-05, "loss": 0.0971, "step": 11791 }, { "epoch": 0.20883086063922573, "grad_norm": 0.746965229511261, "learning_rate": 2.7553532973825557e-05, "loss": 0.0839, "step": 11792 }, { "epoch": 0.20884857017625416, "grad_norm": 0.8150016069412231, "learning_rate": 2.7553062030500113e-05, "loss": 0.1111, "step": 11793 }, { "epoch": 0.2088662797132826, "grad_norm": 0.9247784614562988, "learning_rate": 2.7552591045876368e-05, "loss": 0.1218, "step": 11794 }, { "epoch": 0.20888398925031101, "grad_norm": 1.0294699668884277, "learning_rate": 2.7552120019955867e-05, "loss": 0.1083, "step": 11795 }, { "epoch": 0.20890169878733944, "grad_norm": 0.8262442946434021, "learning_rate": 2.755164895274016e-05, "loss": 0.0902, "step": 11796 }, { "epoch": 0.20891940832436787, "grad_norm": 1.1941875219345093, "learning_rate": 2.7551177844230798e-05, "loss": 0.1171, "step": 11797 }, { "epoch": 0.20893711786139632, "grad_norm": 0.8402411937713623, "learning_rate": 2.755070669442933e-05, "loss": 0.1314, "step": 11798 }, { "epoch": 0.20895482739842475, "grad_norm": 0.6803410053253174, "learning_rate": 2.755023550333731e-05, "loss": 0.0947, "step": 11799 }, { "epoch": 0.20897253693545317, "grad_norm": 1.1397168636322021, "learning_rate": 2.754976427095628e-05, "loss": 0.0984, "step": 11800 }, { "epoch": 0.2089902464724816, "grad_norm": 0.6000545620918274, "learning_rate": 2.7549292997287795e-05, "loss": 0.1129, "step": 11801 }, { "epoch": 0.20900795600951003, "grad_norm": 0.7061305046081543, "learning_rate": 2.7548821682333405e-05, "loss": 0.0502, "step": 11802 }, { "epoch": 0.20902566554653845, "grad_norm": 1.0613387823104858, "learning_rate": 2.7548350326094666e-05, "loss": 0.105, "step": 11803 }, { "epoch": 0.20904337508356688, "grad_norm": 0.7335426807403564, "learning_rate": 2.754787892857312e-05, "loss": 0.101, "step": 11804 }, { "epoch": 0.2090610846205953, "grad_norm": 0.9364948272705078, "learning_rate": 2.754740748977032e-05, "loss": 0.1114, "step": 11805 }, { "epoch": 0.20907879415762373, "grad_norm": 1.492016315460205, "learning_rate": 2.7546936009687818e-05, "loss": 0.1124, "step": 11806 }, { "epoch": 0.20909650369465216, "grad_norm": 1.2099860906600952, "learning_rate": 2.754646448832717e-05, "loss": 0.0963, "step": 11807 }, { "epoch": 0.20911421323168058, "grad_norm": 1.2250398397445679, "learning_rate": 2.754599292568992e-05, "loss": 0.1102, "step": 11808 }, { "epoch": 0.209131922768709, "grad_norm": 0.7828361988067627, "learning_rate": 2.754552132177762e-05, "loss": 0.104, "step": 11809 }, { "epoch": 0.20914963230573744, "grad_norm": 1.233488917350769, "learning_rate": 2.7545049676591828e-05, "loss": 0.098, "step": 11810 }, { "epoch": 0.20916734184276586, "grad_norm": 0.9314021468162537, "learning_rate": 2.7544577990134093e-05, "loss": 0.1388, "step": 11811 }, { "epoch": 0.2091850513797943, "grad_norm": 0.9725871682167053, "learning_rate": 2.7544106262405958e-05, "loss": 0.1058, "step": 11812 }, { "epoch": 0.20920276091682274, "grad_norm": 0.9044157266616821, "learning_rate": 2.7543634493408985e-05, "loss": 0.0993, "step": 11813 }, { "epoch": 0.20922047045385117, "grad_norm": 0.9147806763648987, "learning_rate": 2.7543162683144727e-05, "loss": 0.0916, "step": 11814 }, { "epoch": 0.2092381799908796, "grad_norm": 1.0483996868133545, "learning_rate": 2.7542690831614727e-05, "loss": 0.0994, "step": 11815 }, { "epoch": 0.20925588952790802, "grad_norm": 1.240728735923767, "learning_rate": 2.754221893882055e-05, "loss": 0.1428, "step": 11816 }, { "epoch": 0.20927359906493645, "grad_norm": 1.043032169342041, "learning_rate": 2.7541747004763733e-05, "loss": 0.0991, "step": 11817 }, { "epoch": 0.20929130860196488, "grad_norm": 1.0219184160232544, "learning_rate": 2.7541275029445842e-05, "loss": 0.0701, "step": 11818 }, { "epoch": 0.2093090181389933, "grad_norm": 0.9090622067451477, "learning_rate": 2.7540803012868427e-05, "loss": 0.115, "step": 11819 }, { "epoch": 0.20932672767602173, "grad_norm": 0.8245015144348145, "learning_rate": 2.754033095503303e-05, "loss": 0.1243, "step": 11820 }, { "epoch": 0.20934443721305016, "grad_norm": 1.0154346227645874, "learning_rate": 2.753985885594122e-05, "loss": 0.1185, "step": 11821 }, { "epoch": 0.20936214675007858, "grad_norm": 1.0078065395355225, "learning_rate": 2.753938671559454e-05, "loss": 0.1094, "step": 11822 }, { "epoch": 0.209379856287107, "grad_norm": 0.542570948600769, "learning_rate": 2.7538914533994547e-05, "loss": 0.0931, "step": 11823 }, { "epoch": 0.20939756582413543, "grad_norm": 0.9216623306274414, "learning_rate": 2.753844231114279e-05, "loss": 0.1232, "step": 11824 }, { "epoch": 0.20941527536116386, "grad_norm": 0.9530903697013855, "learning_rate": 2.753797004704083e-05, "loss": 0.1251, "step": 11825 }, { "epoch": 0.2094329848981923, "grad_norm": 0.8052566647529602, "learning_rate": 2.7537497741690218e-05, "loss": 0.1093, "step": 11826 }, { "epoch": 0.2094506944352207, "grad_norm": 0.8207805752754211, "learning_rate": 2.75370253950925e-05, "loss": 0.0858, "step": 11827 }, { "epoch": 0.20946840397224917, "grad_norm": 0.8741869330406189, "learning_rate": 2.7536553007249238e-05, "loss": 0.1161, "step": 11828 }, { "epoch": 0.2094861135092776, "grad_norm": 0.8166245818138123, "learning_rate": 2.7536080578161987e-05, "loss": 0.104, "step": 11829 }, { "epoch": 0.20950382304630602, "grad_norm": 1.1162683963775635, "learning_rate": 2.75356081078323e-05, "loss": 0.1824, "step": 11830 }, { "epoch": 0.20952153258333445, "grad_norm": 0.9457471966743469, "learning_rate": 2.753513559626173e-05, "loss": 0.0904, "step": 11831 }, { "epoch": 0.20953924212036287, "grad_norm": 0.79029780626297, "learning_rate": 2.753466304345183e-05, "loss": 0.1107, "step": 11832 }, { "epoch": 0.2095569516573913, "grad_norm": 0.762931227684021, "learning_rate": 2.753419044940416e-05, "loss": 0.1132, "step": 11833 }, { "epoch": 0.20957466119441973, "grad_norm": 1.1470136642456055, "learning_rate": 2.7533717814120263e-05, "loss": 0.1143, "step": 11834 }, { "epoch": 0.20959237073144815, "grad_norm": 1.011641502380371, "learning_rate": 2.753324513760171e-05, "loss": 0.0975, "step": 11835 }, { "epoch": 0.20961008026847658, "grad_norm": 1.0506715774536133, "learning_rate": 2.7532772419850043e-05, "loss": 0.0897, "step": 11836 }, { "epoch": 0.209627789805505, "grad_norm": 1.0387517213821411, "learning_rate": 2.7532299660866826e-05, "loss": 0.0998, "step": 11837 }, { "epoch": 0.20964549934253343, "grad_norm": 0.667401134967804, "learning_rate": 2.753182686065361e-05, "loss": 0.1036, "step": 11838 }, { "epoch": 0.20966320887956186, "grad_norm": 1.1153485774993896, "learning_rate": 2.753135401921195e-05, "loss": 0.1159, "step": 11839 }, { "epoch": 0.20968091841659028, "grad_norm": 0.8274309635162354, "learning_rate": 2.75308811365434e-05, "loss": 0.0961, "step": 11840 }, { "epoch": 0.2096986279536187, "grad_norm": 0.8236773610115051, "learning_rate": 2.7530408212649527e-05, "loss": 0.0953, "step": 11841 }, { "epoch": 0.20971633749064716, "grad_norm": 0.5573323965072632, "learning_rate": 2.752993524753187e-05, "loss": 0.1133, "step": 11842 }, { "epoch": 0.2097340470276756, "grad_norm": 1.3061320781707764, "learning_rate": 2.7529462241192e-05, "loss": 0.1461, "step": 11843 }, { "epoch": 0.20975175656470402, "grad_norm": 0.7998281121253967, "learning_rate": 2.752898919363146e-05, "loss": 0.1184, "step": 11844 }, { "epoch": 0.20976946610173244, "grad_norm": 0.8465624451637268, "learning_rate": 2.7528516104851816e-05, "loss": 0.1425, "step": 11845 }, { "epoch": 0.20978717563876087, "grad_norm": 0.9001927971839905, "learning_rate": 2.752804297485462e-05, "loss": 0.1339, "step": 11846 }, { "epoch": 0.2098048851757893, "grad_norm": 1.3853739500045776, "learning_rate": 2.7527569803641434e-05, "loss": 0.172, "step": 11847 }, { "epoch": 0.20982259471281772, "grad_norm": 0.9242575168609619, "learning_rate": 2.7527096591213807e-05, "loss": 0.1238, "step": 11848 }, { "epoch": 0.20984030424984615, "grad_norm": 0.5739115476608276, "learning_rate": 2.7526623337573297e-05, "loss": 0.1412, "step": 11849 }, { "epoch": 0.20985801378687458, "grad_norm": 0.8566415309906006, "learning_rate": 2.7526150042721466e-05, "loss": 0.0808, "step": 11850 }, { "epoch": 0.209875723323903, "grad_norm": 1.0194748640060425, "learning_rate": 2.7525676706659866e-05, "loss": 0.1273, "step": 11851 }, { "epoch": 0.20989343286093143, "grad_norm": 1.0648102760314941, "learning_rate": 2.7525203329390063e-05, "loss": 0.1235, "step": 11852 }, { "epoch": 0.20991114239795985, "grad_norm": 1.7027761936187744, "learning_rate": 2.7524729910913604e-05, "loss": 0.1262, "step": 11853 }, { "epoch": 0.20992885193498828, "grad_norm": 0.8143429160118103, "learning_rate": 2.7524256451232052e-05, "loss": 0.1069, "step": 11854 }, { "epoch": 0.2099465614720167, "grad_norm": 0.962445080280304, "learning_rate": 2.7523782950346955e-05, "loss": 0.0768, "step": 11855 }, { "epoch": 0.20996427100904513, "grad_norm": 1.1880823373794556, "learning_rate": 2.7523309408259888e-05, "loss": 0.1064, "step": 11856 }, { "epoch": 0.2099819805460736, "grad_norm": 1.1459972858428955, "learning_rate": 2.7522835824972393e-05, "loss": 0.1157, "step": 11857 }, { "epoch": 0.20999969008310201, "grad_norm": 0.7466773986816406, "learning_rate": 2.752236220048604e-05, "loss": 0.0861, "step": 11858 }, { "epoch": 0.21001739962013044, "grad_norm": 1.9042003154754639, "learning_rate": 2.752188853480238e-05, "loss": 0.1127, "step": 11859 }, { "epoch": 0.21003510915715887, "grad_norm": 1.1991418600082397, "learning_rate": 2.752141482792297e-05, "loss": 0.1185, "step": 11860 }, { "epoch": 0.2100528186941873, "grad_norm": 1.0173894166946411, "learning_rate": 2.7520941079849377e-05, "loss": 0.088, "step": 11861 }, { "epoch": 0.21007052823121572, "grad_norm": 1.2395652532577515, "learning_rate": 2.7520467290583155e-05, "loss": 0.1026, "step": 11862 }, { "epoch": 0.21008823776824415, "grad_norm": 0.6721280813217163, "learning_rate": 2.751999346012586e-05, "loss": 0.0747, "step": 11863 }, { "epoch": 0.21010594730527257, "grad_norm": 0.46612510085105896, "learning_rate": 2.751951958847905e-05, "loss": 0.0923, "step": 11864 }, { "epoch": 0.210123656842301, "grad_norm": 1.382131814956665, "learning_rate": 2.751904567564429e-05, "loss": 0.1101, "step": 11865 }, { "epoch": 0.21014136637932943, "grad_norm": 0.8781008720397949, "learning_rate": 2.751857172162313e-05, "loss": 0.1304, "step": 11866 }, { "epoch": 0.21015907591635785, "grad_norm": 0.9439311623573303, "learning_rate": 2.7518097726417145e-05, "loss": 0.1075, "step": 11867 }, { "epoch": 0.21017678545338628, "grad_norm": 0.9652137160301208, "learning_rate": 2.7517623690027877e-05, "loss": 0.1182, "step": 11868 }, { "epoch": 0.2101944949904147, "grad_norm": 1.289651870727539, "learning_rate": 2.75171496124569e-05, "loss": 0.1022, "step": 11869 }, { "epoch": 0.21021220452744313, "grad_norm": 1.321622371673584, "learning_rate": 2.751667549370576e-05, "loss": 0.1276, "step": 11870 }, { "epoch": 0.21022991406447156, "grad_norm": 0.6855002641677856, "learning_rate": 2.7516201333776025e-05, "loss": 0.0934, "step": 11871 }, { "epoch": 0.2102476236015, "grad_norm": 1.5062291622161865, "learning_rate": 2.7515727132669258e-05, "loss": 0.1059, "step": 11872 }, { "epoch": 0.21026533313852844, "grad_norm": 0.7900852560997009, "learning_rate": 2.751525289038701e-05, "loss": 0.1092, "step": 11873 }, { "epoch": 0.21028304267555686, "grad_norm": 1.2106350660324097, "learning_rate": 2.7514778606930846e-05, "loss": 0.1008, "step": 11874 }, { "epoch": 0.2103007522125853, "grad_norm": 1.0132287740707397, "learning_rate": 2.7514304282302328e-05, "loss": 0.0953, "step": 11875 }, { "epoch": 0.21031846174961372, "grad_norm": 0.6786735653877258, "learning_rate": 2.7513829916503015e-05, "loss": 0.1267, "step": 11876 }, { "epoch": 0.21033617128664214, "grad_norm": 1.3432023525238037, "learning_rate": 2.7513355509534465e-05, "loss": 0.1235, "step": 11877 }, { "epoch": 0.21035388082367057, "grad_norm": 0.9894353747367859, "learning_rate": 2.751288106139824e-05, "loss": 0.1253, "step": 11878 }, { "epoch": 0.210371590360699, "grad_norm": 1.4755046367645264, "learning_rate": 2.7512406572095906e-05, "loss": 0.1101, "step": 11879 }, { "epoch": 0.21038929989772742, "grad_norm": 1.0716830492019653, "learning_rate": 2.7511932041629017e-05, "loss": 0.1129, "step": 11880 }, { "epoch": 0.21040700943475585, "grad_norm": 0.6579023599624634, "learning_rate": 2.751145746999914e-05, "loss": 0.0969, "step": 11881 }, { "epoch": 0.21042471897178427, "grad_norm": 1.2290269136428833, "learning_rate": 2.7510982857207828e-05, "loss": 0.1329, "step": 11882 }, { "epoch": 0.2104424285088127, "grad_norm": 0.7325996160507202, "learning_rate": 2.7510508203256657e-05, "loss": 0.0977, "step": 11883 }, { "epoch": 0.21046013804584113, "grad_norm": 1.6611015796661377, "learning_rate": 2.7510033508147174e-05, "loss": 0.1127, "step": 11884 }, { "epoch": 0.21047784758286955, "grad_norm": 0.6291844844818115, "learning_rate": 2.7509558771880945e-05, "loss": 0.1132, "step": 11885 }, { "epoch": 0.21049555711989798, "grad_norm": 0.7334240078926086, "learning_rate": 2.7509083994459534e-05, "loss": 0.1051, "step": 11886 }, { "epoch": 0.21051326665692643, "grad_norm": 0.7754197120666504, "learning_rate": 2.75086091758845e-05, "loss": 0.0768, "step": 11887 }, { "epoch": 0.21053097619395486, "grad_norm": 1.3271448612213135, "learning_rate": 2.7508134316157407e-05, "loss": 0.1054, "step": 11888 }, { "epoch": 0.2105486857309833, "grad_norm": 0.7992666959762573, "learning_rate": 2.7507659415279822e-05, "loss": 0.0781, "step": 11889 }, { "epoch": 0.2105663952680117, "grad_norm": 0.7372426986694336, "learning_rate": 2.75071844732533e-05, "loss": 0.069, "step": 11890 }, { "epoch": 0.21058410480504014, "grad_norm": 1.0232762098312378, "learning_rate": 2.7506709490079402e-05, "loss": 0.0694, "step": 11891 }, { "epoch": 0.21060181434206857, "grad_norm": 1.1508620977401733, "learning_rate": 2.75062344657597e-05, "loss": 0.1372, "step": 11892 }, { "epoch": 0.210619523879097, "grad_norm": 0.7825685143470764, "learning_rate": 2.7505759400295748e-05, "loss": 0.0996, "step": 11893 }, { "epoch": 0.21063723341612542, "grad_norm": 0.5599676966667175, "learning_rate": 2.7505284293689117e-05, "loss": 0.0862, "step": 11894 }, { "epoch": 0.21065494295315385, "grad_norm": 1.120439052581787, "learning_rate": 2.750480914594136e-05, "loss": 0.096, "step": 11895 }, { "epoch": 0.21067265249018227, "grad_norm": 0.606678307056427, "learning_rate": 2.750433395705405e-05, "loss": 0.0658, "step": 11896 }, { "epoch": 0.2106903620272107, "grad_norm": 0.8897769451141357, "learning_rate": 2.7503858727028746e-05, "loss": 0.1242, "step": 11897 }, { "epoch": 0.21070807156423912, "grad_norm": 0.8082751035690308, "learning_rate": 2.7503383455867008e-05, "loss": 0.0844, "step": 11898 }, { "epoch": 0.21072578110126755, "grad_norm": 0.7260662317276001, "learning_rate": 2.7502908143570407e-05, "loss": 0.0854, "step": 11899 }, { "epoch": 0.21074349063829598, "grad_norm": 0.8231229782104492, "learning_rate": 2.7502432790140502e-05, "loss": 0.0967, "step": 11900 }, { "epoch": 0.2107612001753244, "grad_norm": 0.7176458835601807, "learning_rate": 2.750195739557885e-05, "loss": 0.1208, "step": 11901 }, { "epoch": 0.21077890971235286, "grad_norm": 1.226457118988037, "learning_rate": 2.7501481959887035e-05, "loss": 0.1484, "step": 11902 }, { "epoch": 0.21079661924938128, "grad_norm": 0.6313685774803162, "learning_rate": 2.75010064830666e-05, "loss": 0.1149, "step": 11903 }, { "epoch": 0.2108143287864097, "grad_norm": 1.0193666219711304, "learning_rate": 2.7500530965119122e-05, "loss": 0.1215, "step": 11904 }, { "epoch": 0.21083203832343814, "grad_norm": 0.8956096768379211, "learning_rate": 2.750005540604616e-05, "loss": 0.1146, "step": 11905 }, { "epoch": 0.21084974786046656, "grad_norm": 0.8981322646141052, "learning_rate": 2.7499579805849283e-05, "loss": 0.0953, "step": 11906 }, { "epoch": 0.210867457397495, "grad_norm": 1.2037733793258667, "learning_rate": 2.749910416453005e-05, "loss": 0.1098, "step": 11907 }, { "epoch": 0.21088516693452342, "grad_norm": 0.3473382890224457, "learning_rate": 2.7498628482090026e-05, "loss": 0.0588, "step": 11908 }, { "epoch": 0.21090287647155184, "grad_norm": 1.0850495100021362, "learning_rate": 2.749815275853078e-05, "loss": 0.1022, "step": 11909 }, { "epoch": 0.21092058600858027, "grad_norm": 0.9709604382514954, "learning_rate": 2.749767699385388e-05, "loss": 0.0769, "step": 11910 }, { "epoch": 0.2109382955456087, "grad_norm": 0.7078951597213745, "learning_rate": 2.7497201188060885e-05, "loss": 0.0987, "step": 11911 }, { "epoch": 0.21095600508263712, "grad_norm": 1.4712010622024536, "learning_rate": 2.749672534115336e-05, "loss": 0.12, "step": 11912 }, { "epoch": 0.21097371461966555, "grad_norm": 1.1542236804962158, "learning_rate": 2.7496249453132872e-05, "loss": 0.1002, "step": 11913 }, { "epoch": 0.21099142415669397, "grad_norm": 1.4017047882080078, "learning_rate": 2.749577352400099e-05, "loss": 0.1494, "step": 11914 }, { "epoch": 0.2110091336937224, "grad_norm": 1.018297553062439, "learning_rate": 2.7495297553759274e-05, "loss": 0.1331, "step": 11915 }, { "epoch": 0.21102684323075083, "grad_norm": 0.7716536521911621, "learning_rate": 2.7494821542409295e-05, "loss": 0.1225, "step": 11916 }, { "epoch": 0.21104455276777928, "grad_norm": 0.785054087638855, "learning_rate": 2.7494345489952617e-05, "loss": 0.0942, "step": 11917 }, { "epoch": 0.2110622623048077, "grad_norm": 1.1267722845077515, "learning_rate": 2.7493869396390802e-05, "loss": 0.1229, "step": 11918 }, { "epoch": 0.21107997184183613, "grad_norm": 1.0361441373825073, "learning_rate": 2.749339326172543e-05, "loss": 0.0884, "step": 11919 }, { "epoch": 0.21109768137886456, "grad_norm": 1.0853768587112427, "learning_rate": 2.7492917085958047e-05, "loss": 0.1261, "step": 11920 }, { "epoch": 0.211115390915893, "grad_norm": 1.2033584117889404, "learning_rate": 2.7492440869090235e-05, "loss": 0.0968, "step": 11921 }, { "epoch": 0.2111331004529214, "grad_norm": 1.26689612865448, "learning_rate": 2.7491964611123553e-05, "loss": 0.0986, "step": 11922 }, { "epoch": 0.21115080998994984, "grad_norm": 1.082227349281311, "learning_rate": 2.749148831205958e-05, "loss": 0.1113, "step": 11923 }, { "epoch": 0.21116851952697827, "grad_norm": 0.8012351989746094, "learning_rate": 2.7491011971899866e-05, "loss": 0.1059, "step": 11924 }, { "epoch": 0.2111862290640067, "grad_norm": 1.3917466402053833, "learning_rate": 2.7490535590645984e-05, "loss": 0.1315, "step": 11925 }, { "epoch": 0.21120393860103512, "grad_norm": 1.1404638290405273, "learning_rate": 2.749005916829951e-05, "loss": 0.1717, "step": 11926 }, { "epoch": 0.21122164813806354, "grad_norm": 1.2790318727493286, "learning_rate": 2.7489582704862e-05, "loss": 0.1477, "step": 11927 }, { "epoch": 0.21123935767509197, "grad_norm": 1.102066159248352, "learning_rate": 2.748910620033503e-05, "loss": 0.1433, "step": 11928 }, { "epoch": 0.2112570672121204, "grad_norm": 0.9276514053344727, "learning_rate": 2.748862965472016e-05, "loss": 0.106, "step": 11929 }, { "epoch": 0.21127477674914882, "grad_norm": 0.817127525806427, "learning_rate": 2.7488153068018965e-05, "loss": 0.1206, "step": 11930 }, { "epoch": 0.21129248628617725, "grad_norm": 1.197437047958374, "learning_rate": 2.7487676440233006e-05, "loss": 0.1217, "step": 11931 }, { "epoch": 0.2113101958232057, "grad_norm": 1.1027826070785522, "learning_rate": 2.7487199771363856e-05, "loss": 0.1087, "step": 11932 }, { "epoch": 0.21132790536023413, "grad_norm": 0.884220540523529, "learning_rate": 2.7486723061413082e-05, "loss": 0.1029, "step": 11933 }, { "epoch": 0.21134561489726256, "grad_norm": 0.7871856093406677, "learning_rate": 2.748624631038225e-05, "loss": 0.1359, "step": 11934 }, { "epoch": 0.21136332443429098, "grad_norm": 0.8940601944923401, "learning_rate": 2.7485769518272934e-05, "loss": 0.0974, "step": 11935 }, { "epoch": 0.2113810339713194, "grad_norm": 0.8685569763183594, "learning_rate": 2.74852926850867e-05, "loss": 0.1205, "step": 11936 }, { "epoch": 0.21139874350834784, "grad_norm": 0.878080427646637, "learning_rate": 2.7484815810825114e-05, "loss": 0.1154, "step": 11937 }, { "epoch": 0.21141645304537626, "grad_norm": 0.6685788631439209, "learning_rate": 2.748433889548974e-05, "loss": 0.1175, "step": 11938 }, { "epoch": 0.2114341625824047, "grad_norm": 0.669130802154541, "learning_rate": 2.7483861939082162e-05, "loss": 0.095, "step": 11939 }, { "epoch": 0.21145187211943312, "grad_norm": 1.3089020252227783, "learning_rate": 2.748338494160394e-05, "loss": 0.1133, "step": 11940 }, { "epoch": 0.21146958165646154, "grad_norm": 1.156433343887329, "learning_rate": 2.7482907903056648e-05, "loss": 0.108, "step": 11941 }, { "epoch": 0.21148729119348997, "grad_norm": 1.0014374256134033, "learning_rate": 2.7482430823441845e-05, "loss": 0.0922, "step": 11942 }, { "epoch": 0.2115050007305184, "grad_norm": 1.0209980010986328, "learning_rate": 2.748195370276111e-05, "loss": 0.0808, "step": 11943 }, { "epoch": 0.21152271026754682, "grad_norm": 1.1123780012130737, "learning_rate": 2.7481476541016005e-05, "loss": 0.1476, "step": 11944 }, { "epoch": 0.21154041980457525, "grad_norm": 1.2335047721862793, "learning_rate": 2.748099933820811e-05, "loss": 0.1233, "step": 11945 }, { "epoch": 0.21155812934160367, "grad_norm": 1.3785138130187988, "learning_rate": 2.7480522094338988e-05, "loss": 0.1117, "step": 11946 }, { "epoch": 0.21157583887863213, "grad_norm": 0.9499008059501648, "learning_rate": 2.748004480941021e-05, "loss": 0.114, "step": 11947 }, { "epoch": 0.21159354841566055, "grad_norm": 2.693798780441284, "learning_rate": 2.7479567483423343e-05, "loss": 0.103, "step": 11948 }, { "epoch": 0.21161125795268898, "grad_norm": 0.8720285296440125, "learning_rate": 2.747909011637997e-05, "loss": 0.1367, "step": 11949 }, { "epoch": 0.2116289674897174, "grad_norm": 1.4201678037643433, "learning_rate": 2.747861270828165e-05, "loss": 0.1222, "step": 11950 }, { "epoch": 0.21164667702674583, "grad_norm": 1.4130924940109253, "learning_rate": 2.7478135259129953e-05, "loss": 0.1648, "step": 11951 }, { "epoch": 0.21166438656377426, "grad_norm": 0.7764090895652771, "learning_rate": 2.7477657768926452e-05, "loss": 0.0895, "step": 11952 }, { "epoch": 0.21168209610080269, "grad_norm": 0.7399837374687195, "learning_rate": 2.7477180237672722e-05, "loss": 0.1263, "step": 11953 }, { "epoch": 0.2116998056378311, "grad_norm": 0.6528707146644592, "learning_rate": 2.7476702665370333e-05, "loss": 0.0948, "step": 11954 }, { "epoch": 0.21171751517485954, "grad_norm": 0.9738554358482361, "learning_rate": 2.747622505202085e-05, "loss": 0.1099, "step": 11955 }, { "epoch": 0.21173522471188796, "grad_norm": 0.7635704278945923, "learning_rate": 2.7475747397625848e-05, "loss": 0.0946, "step": 11956 }, { "epoch": 0.2117529342489164, "grad_norm": 0.9194570779800415, "learning_rate": 2.74752697021869e-05, "loss": 0.1094, "step": 11957 }, { "epoch": 0.21177064378594482, "grad_norm": 0.9574493169784546, "learning_rate": 2.7474791965705582e-05, "loss": 0.0905, "step": 11958 }, { "epoch": 0.21178835332297324, "grad_norm": 0.7239304780960083, "learning_rate": 2.7474314188183457e-05, "loss": 0.1156, "step": 11959 }, { "epoch": 0.21180606286000167, "grad_norm": 0.7994765639305115, "learning_rate": 2.7473836369622098e-05, "loss": 0.1131, "step": 11960 }, { "epoch": 0.21182377239703012, "grad_norm": 1.3720792531967163, "learning_rate": 2.7473358510023076e-05, "loss": 0.1533, "step": 11961 }, { "epoch": 0.21184148193405855, "grad_norm": 0.9742757081985474, "learning_rate": 2.7472880609387972e-05, "loss": 0.1084, "step": 11962 }, { "epoch": 0.21185919147108698, "grad_norm": 0.7639182209968567, "learning_rate": 2.747240266771835e-05, "loss": 0.1048, "step": 11963 }, { "epoch": 0.2118769010081154, "grad_norm": 0.9794225096702576, "learning_rate": 2.7471924685015784e-05, "loss": 0.1027, "step": 11964 }, { "epoch": 0.21189461054514383, "grad_norm": 0.5677722096443176, "learning_rate": 2.747144666128185e-05, "loss": 0.06, "step": 11965 }, { "epoch": 0.21191232008217226, "grad_norm": 0.6040748357772827, "learning_rate": 2.747096859651812e-05, "loss": 0.0839, "step": 11966 }, { "epoch": 0.21193002961920068, "grad_norm": 0.8103699088096619, "learning_rate": 2.747049049072616e-05, "loss": 0.0832, "step": 11967 }, { "epoch": 0.2119477391562291, "grad_norm": 0.7356539964675903, "learning_rate": 2.7470012343907546e-05, "loss": 0.1285, "step": 11968 }, { "epoch": 0.21196544869325754, "grad_norm": 1.1012057065963745, "learning_rate": 2.7469534156063854e-05, "loss": 0.0718, "step": 11969 }, { "epoch": 0.21198315823028596, "grad_norm": 1.2333290576934814, "learning_rate": 2.7469055927196658e-05, "loss": 0.1343, "step": 11970 }, { "epoch": 0.2120008677673144, "grad_norm": 1.0662630796432495, "learning_rate": 2.746857765730753e-05, "loss": 0.063, "step": 11971 }, { "epoch": 0.21201857730434281, "grad_norm": 0.8988332152366638, "learning_rate": 2.7468099346398036e-05, "loss": 0.0967, "step": 11972 }, { "epoch": 0.21203628684137124, "grad_norm": 1.3629374504089355, "learning_rate": 2.7467620994469764e-05, "loss": 0.1048, "step": 11973 }, { "epoch": 0.21205399637839967, "grad_norm": 0.7279009222984314, "learning_rate": 2.7467142601524274e-05, "loss": 0.0792, "step": 11974 }, { "epoch": 0.2120717059154281, "grad_norm": 1.0692201852798462, "learning_rate": 2.746666416756315e-05, "loss": 0.1065, "step": 11975 }, { "epoch": 0.21208941545245655, "grad_norm": 1.0927995443344116, "learning_rate": 2.746618569258796e-05, "loss": 0.1239, "step": 11976 }, { "epoch": 0.21210712498948497, "grad_norm": 1.21375572681427, "learning_rate": 2.746570717660028e-05, "loss": 0.0995, "step": 11977 }, { "epoch": 0.2121248345265134, "grad_norm": 0.7350465655326843, "learning_rate": 2.7465228619601686e-05, "loss": 0.112, "step": 11978 }, { "epoch": 0.21214254406354183, "grad_norm": 1.0357271432876587, "learning_rate": 2.746475002159375e-05, "loss": 0.1296, "step": 11979 }, { "epoch": 0.21216025360057025, "grad_norm": 1.281126856803894, "learning_rate": 2.7464271382578048e-05, "loss": 0.0986, "step": 11980 }, { "epoch": 0.21217796313759868, "grad_norm": 1.0229895114898682, "learning_rate": 2.746379270255615e-05, "loss": 0.1195, "step": 11981 }, { "epoch": 0.2121956726746271, "grad_norm": 1.5567810535430908, "learning_rate": 2.7463313981529637e-05, "loss": 0.1082, "step": 11982 }, { "epoch": 0.21221338221165553, "grad_norm": 0.8700738549232483, "learning_rate": 2.746283521950008e-05, "loss": 0.1057, "step": 11983 }, { "epoch": 0.21223109174868396, "grad_norm": 0.888154149055481, "learning_rate": 2.7462356416469057e-05, "loss": 0.0593, "step": 11984 }, { "epoch": 0.21224880128571239, "grad_norm": 0.9623886942863464, "learning_rate": 2.7461877572438146e-05, "loss": 0.1181, "step": 11985 }, { "epoch": 0.2122665108227408, "grad_norm": 0.7393441796302795, "learning_rate": 2.7461398687408913e-05, "loss": 0.0866, "step": 11986 }, { "epoch": 0.21228422035976924, "grad_norm": 1.021584391593933, "learning_rate": 2.746091976138294e-05, "loss": 0.1469, "step": 11987 }, { "epoch": 0.21230192989679766, "grad_norm": 0.6088911294937134, "learning_rate": 2.74604407943618e-05, "loss": 0.0631, "step": 11988 }, { "epoch": 0.2123196394338261, "grad_norm": 0.8621990084648132, "learning_rate": 2.7459961786347073e-05, "loss": 0.1195, "step": 11989 }, { "epoch": 0.21233734897085452, "grad_norm": 0.8783260583877563, "learning_rate": 2.745948273734033e-05, "loss": 0.1161, "step": 11990 }, { "epoch": 0.21235505850788297, "grad_norm": 0.5946685671806335, "learning_rate": 2.745900364734315e-05, "loss": 0.1134, "step": 11991 }, { "epoch": 0.2123727680449114, "grad_norm": 0.9135231375694275, "learning_rate": 2.745852451635711e-05, "loss": 0.1166, "step": 11992 }, { "epoch": 0.21239047758193982, "grad_norm": 1.0428839921951294, "learning_rate": 2.7458045344383783e-05, "loss": 0.1121, "step": 11993 }, { "epoch": 0.21240818711896825, "grad_norm": 1.0141379833221436, "learning_rate": 2.745756613142474e-05, "loss": 0.1442, "step": 11994 }, { "epoch": 0.21242589665599668, "grad_norm": 0.6749669909477234, "learning_rate": 2.7457086877481573e-05, "loss": 0.1146, "step": 11995 }, { "epoch": 0.2124436061930251, "grad_norm": 0.7698509693145752, "learning_rate": 2.7456607582555847e-05, "loss": 0.1117, "step": 11996 }, { "epoch": 0.21246131573005353, "grad_norm": 0.9893009662628174, "learning_rate": 2.7456128246649143e-05, "loss": 0.099, "step": 11997 }, { "epoch": 0.21247902526708196, "grad_norm": 1.3077305555343628, "learning_rate": 2.7455648869763036e-05, "loss": 0.1364, "step": 11998 }, { "epoch": 0.21249673480411038, "grad_norm": 0.8509182929992676, "learning_rate": 2.7455169451899104e-05, "loss": 0.1556, "step": 11999 }, { "epoch": 0.2125144443411388, "grad_norm": 0.6807008385658264, "learning_rate": 2.7454689993058923e-05, "loss": 0.081, "step": 12000 }, { "epoch": 0.21253215387816723, "grad_norm": 0.7651553750038147, "learning_rate": 2.7454210493244076e-05, "loss": 0.1041, "step": 12001 }, { "epoch": 0.21254986341519566, "grad_norm": 0.8339100480079651, "learning_rate": 2.745373095245613e-05, "loss": 0.1252, "step": 12002 }, { "epoch": 0.2125675729522241, "grad_norm": 1.149455189704895, "learning_rate": 2.7453251370696674e-05, "loss": 0.1383, "step": 12003 }, { "epoch": 0.21258528248925251, "grad_norm": 0.8055863976478577, "learning_rate": 2.745277174796728e-05, "loss": 0.1398, "step": 12004 }, { "epoch": 0.21260299202628094, "grad_norm": 0.8765118718147278, "learning_rate": 2.7452292084269525e-05, "loss": 0.1102, "step": 12005 }, { "epoch": 0.2126207015633094, "grad_norm": 0.8343167304992676, "learning_rate": 2.7451812379604988e-05, "loss": 0.1228, "step": 12006 }, { "epoch": 0.21263841110033782, "grad_norm": 0.8944414854049683, "learning_rate": 2.745133263397525e-05, "loss": 0.0892, "step": 12007 }, { "epoch": 0.21265612063736625, "grad_norm": 0.5862358212471008, "learning_rate": 2.7450852847381885e-05, "loss": 0.0866, "step": 12008 }, { "epoch": 0.21267383017439467, "grad_norm": 0.7144233584403992, "learning_rate": 2.7450373019826477e-05, "loss": 0.0805, "step": 12009 }, { "epoch": 0.2126915397114231, "grad_norm": 1.0339924097061157, "learning_rate": 2.7449893151310595e-05, "loss": 0.1118, "step": 12010 }, { "epoch": 0.21270924924845153, "grad_norm": 0.6070607900619507, "learning_rate": 2.7449413241835825e-05, "loss": 0.0887, "step": 12011 }, { "epoch": 0.21272695878547995, "grad_norm": 1.1600708961486816, "learning_rate": 2.744893329140375e-05, "loss": 0.1708, "step": 12012 }, { "epoch": 0.21274466832250838, "grad_norm": 0.6966260671615601, "learning_rate": 2.7448453300015937e-05, "loss": 0.0695, "step": 12013 }, { "epoch": 0.2127623778595368, "grad_norm": 0.6521174907684326, "learning_rate": 2.7447973267673977e-05, "loss": 0.1294, "step": 12014 }, { "epoch": 0.21278008739656523, "grad_norm": 1.0800977945327759, "learning_rate": 2.7447493194379437e-05, "loss": 0.0909, "step": 12015 }, { "epoch": 0.21279779693359366, "grad_norm": 1.3702507019042969, "learning_rate": 2.744701308013391e-05, "loss": 0.0908, "step": 12016 }, { "epoch": 0.21281550647062208, "grad_norm": 0.730486273765564, "learning_rate": 2.744653292493897e-05, "loss": 0.1345, "step": 12017 }, { "epoch": 0.2128332160076505, "grad_norm": 0.7526103258132935, "learning_rate": 2.744605272879619e-05, "loss": 0.1144, "step": 12018 }, { "epoch": 0.21285092554467894, "grad_norm": 1.0972881317138672, "learning_rate": 2.744557249170716e-05, "loss": 0.122, "step": 12019 }, { "epoch": 0.21286863508170736, "grad_norm": 0.982492208480835, "learning_rate": 2.7445092213673454e-05, "loss": 0.1214, "step": 12020 }, { "epoch": 0.21288634461873582, "grad_norm": 1.0979340076446533, "learning_rate": 2.744461189469665e-05, "loss": 0.1185, "step": 12021 }, { "epoch": 0.21290405415576424, "grad_norm": 1.1426914930343628, "learning_rate": 2.7444131534778336e-05, "loss": 0.134, "step": 12022 }, { "epoch": 0.21292176369279267, "grad_norm": 1.1293178796768188, "learning_rate": 2.7443651133920085e-05, "loss": 0.0952, "step": 12023 }, { "epoch": 0.2129394732298211, "grad_norm": 0.6642139554023743, "learning_rate": 2.7443170692123482e-05, "loss": 0.0743, "step": 12024 }, { "epoch": 0.21295718276684952, "grad_norm": 1.1002732515335083, "learning_rate": 2.7442690209390105e-05, "loss": 0.1001, "step": 12025 }, { "epoch": 0.21297489230387795, "grad_norm": 1.1437314748764038, "learning_rate": 2.7442209685721534e-05, "loss": 0.1168, "step": 12026 }, { "epoch": 0.21299260184090638, "grad_norm": 0.8734322786331177, "learning_rate": 2.7441729121119357e-05, "loss": 0.0929, "step": 12027 }, { "epoch": 0.2130103113779348, "grad_norm": 1.1324506998062134, "learning_rate": 2.7441248515585145e-05, "loss": 0.1225, "step": 12028 }, { "epoch": 0.21302802091496323, "grad_norm": 0.7622053027153015, "learning_rate": 2.7440767869120483e-05, "loss": 0.1281, "step": 12029 }, { "epoch": 0.21304573045199166, "grad_norm": 12.778517723083496, "learning_rate": 2.7440287181726955e-05, "loss": 0.1087, "step": 12030 }, { "epoch": 0.21306343998902008, "grad_norm": 1.8739770650863647, "learning_rate": 2.7439806453406143e-05, "loss": 0.0819, "step": 12031 }, { "epoch": 0.2130811495260485, "grad_norm": 1.1571729183197021, "learning_rate": 2.7439325684159623e-05, "loss": 0.1237, "step": 12032 }, { "epoch": 0.21309885906307693, "grad_norm": 1.2701044082641602, "learning_rate": 2.743884487398898e-05, "loss": 0.1422, "step": 12033 }, { "epoch": 0.21311656860010536, "grad_norm": 0.8661103844642639, "learning_rate": 2.7438364022895798e-05, "loss": 0.1385, "step": 12034 }, { "epoch": 0.2131342781371338, "grad_norm": 0.7722225785255432, "learning_rate": 2.7437883130881654e-05, "loss": 0.1169, "step": 12035 }, { "epoch": 0.21315198767416224, "grad_norm": 0.9132043719291687, "learning_rate": 2.743740219794813e-05, "loss": 0.1173, "step": 12036 }, { "epoch": 0.21316969721119067, "grad_norm": 1.4467865228652954, "learning_rate": 2.7436921224096816e-05, "loss": 0.1194, "step": 12037 }, { "epoch": 0.2131874067482191, "grad_norm": 0.7586389183998108, "learning_rate": 2.7436440209329285e-05, "loss": 0.0809, "step": 12038 }, { "epoch": 0.21320511628524752, "grad_norm": 1.0424067974090576, "learning_rate": 2.7435959153647122e-05, "loss": 0.1231, "step": 12039 }, { "epoch": 0.21322282582227595, "grad_norm": 2.1301231384277344, "learning_rate": 2.7435478057051916e-05, "loss": 0.0836, "step": 12040 }, { "epoch": 0.21324053535930437, "grad_norm": 0.8401663899421692, "learning_rate": 2.7434996919545246e-05, "loss": 0.1255, "step": 12041 }, { "epoch": 0.2132582448963328, "grad_norm": 0.881616473197937, "learning_rate": 2.7434515741128686e-05, "loss": 0.0846, "step": 12042 }, { "epoch": 0.21327595443336123, "grad_norm": 0.7037340402603149, "learning_rate": 2.7434034521803834e-05, "loss": 0.0925, "step": 12043 }, { "epoch": 0.21329366397038965, "grad_norm": 0.895451009273529, "learning_rate": 2.7433553261572264e-05, "loss": 0.1163, "step": 12044 }, { "epoch": 0.21331137350741808, "grad_norm": 0.7839975357055664, "learning_rate": 2.7433071960435562e-05, "loss": 0.0961, "step": 12045 }, { "epoch": 0.2133290830444465, "grad_norm": 0.9292768836021423, "learning_rate": 2.7432590618395307e-05, "loss": 0.0939, "step": 12046 }, { "epoch": 0.21334679258147493, "grad_norm": 1.2444806098937988, "learning_rate": 2.7432109235453095e-05, "loss": 0.1103, "step": 12047 }, { "epoch": 0.21336450211850336, "grad_norm": 1.3033092021942139, "learning_rate": 2.743162781161049e-05, "loss": 0.097, "step": 12048 }, { "epoch": 0.21338221165553178, "grad_norm": 1.111018180847168, "learning_rate": 2.7431146346869094e-05, "loss": 0.1382, "step": 12049 }, { "epoch": 0.2133999211925602, "grad_norm": 1.706627607345581, "learning_rate": 2.7430664841230483e-05, "loss": 0.1305, "step": 12050 }, { "epoch": 0.21341763072958866, "grad_norm": 0.7418352961540222, "learning_rate": 2.7430183294696238e-05, "loss": 0.0741, "step": 12051 }, { "epoch": 0.2134353402666171, "grad_norm": 0.996788501739502, "learning_rate": 2.7429701707267953e-05, "loss": 0.1237, "step": 12052 }, { "epoch": 0.21345304980364552, "grad_norm": 1.1284726858139038, "learning_rate": 2.7429220078947204e-05, "loss": 0.1273, "step": 12053 }, { "epoch": 0.21347075934067394, "grad_norm": 1.6291583776474, "learning_rate": 2.7428738409735577e-05, "loss": 0.0956, "step": 12054 }, { "epoch": 0.21348846887770237, "grad_norm": 0.8240261673927307, "learning_rate": 2.7428256699634658e-05, "loss": 0.1055, "step": 12055 }, { "epoch": 0.2135061784147308, "grad_norm": 0.5829989910125732, "learning_rate": 2.7427774948646032e-05, "loss": 0.084, "step": 12056 }, { "epoch": 0.21352388795175922, "grad_norm": 1.4615166187286377, "learning_rate": 2.7427293156771285e-05, "loss": 0.1431, "step": 12057 }, { "epoch": 0.21354159748878765, "grad_norm": 1.0211306810379028, "learning_rate": 2.7426811324011996e-05, "loss": 0.0971, "step": 12058 }, { "epoch": 0.21355930702581608, "grad_norm": 0.7513306140899658, "learning_rate": 2.7426329450369764e-05, "loss": 0.1285, "step": 12059 }, { "epoch": 0.2135770165628445, "grad_norm": 1.4953587055206299, "learning_rate": 2.7425847535846157e-05, "loss": 0.1218, "step": 12060 }, { "epoch": 0.21359472609987293, "grad_norm": 0.7796705365180969, "learning_rate": 2.742536558044277e-05, "loss": 0.0832, "step": 12061 }, { "epoch": 0.21361243563690135, "grad_norm": 0.9094902276992798, "learning_rate": 2.7424883584161187e-05, "loss": 0.116, "step": 12062 }, { "epoch": 0.21363014517392978, "grad_norm": 0.9299246072769165, "learning_rate": 2.7424401547002992e-05, "loss": 0.1175, "step": 12063 }, { "epoch": 0.2136478547109582, "grad_norm": 0.872480034828186, "learning_rate": 2.7423919468969775e-05, "loss": 0.0995, "step": 12064 }, { "epoch": 0.21366556424798663, "grad_norm": 1.023611307144165, "learning_rate": 2.742343735006312e-05, "loss": 0.1406, "step": 12065 }, { "epoch": 0.2136832737850151, "grad_norm": 1.3370051383972168, "learning_rate": 2.7422955190284608e-05, "loss": 0.1403, "step": 12066 }, { "epoch": 0.21370098332204351, "grad_norm": 1.2943483591079712, "learning_rate": 2.7422472989635837e-05, "loss": 0.1616, "step": 12067 }, { "epoch": 0.21371869285907194, "grad_norm": 0.7308619618415833, "learning_rate": 2.742199074811838e-05, "loss": 0.0693, "step": 12068 }, { "epoch": 0.21373640239610037, "grad_norm": 0.9954579472541809, "learning_rate": 2.7421508465733836e-05, "loss": 0.0951, "step": 12069 }, { "epoch": 0.2137541119331288, "grad_norm": 1.198455572128296, "learning_rate": 2.742102614248378e-05, "loss": 0.1474, "step": 12070 }, { "epoch": 0.21377182147015722, "grad_norm": 0.8197362422943115, "learning_rate": 2.7420543778369807e-05, "loss": 0.0831, "step": 12071 }, { "epoch": 0.21378953100718565, "grad_norm": 1.2226372957229614, "learning_rate": 2.7420061373393497e-05, "loss": 0.1062, "step": 12072 }, { "epoch": 0.21380724054421407, "grad_norm": 0.9964457750320435, "learning_rate": 2.741957892755645e-05, "loss": 0.1139, "step": 12073 }, { "epoch": 0.2138249500812425, "grad_norm": 1.075370192527771, "learning_rate": 2.7419096440860238e-05, "loss": 0.1293, "step": 12074 }, { "epoch": 0.21384265961827092, "grad_norm": 0.7146409153938293, "learning_rate": 2.7418613913306456e-05, "loss": 0.1117, "step": 12075 }, { "epoch": 0.21386036915529935, "grad_norm": 0.6437020897865295, "learning_rate": 2.741813134489669e-05, "loss": 0.0914, "step": 12076 }, { "epoch": 0.21387807869232778, "grad_norm": 1.137948751449585, "learning_rate": 2.7417648735632534e-05, "loss": 0.1359, "step": 12077 }, { "epoch": 0.2138957882293562, "grad_norm": 0.9016258120536804, "learning_rate": 2.7417166085515558e-05, "loss": 0.1241, "step": 12078 }, { "epoch": 0.21391349776638463, "grad_norm": 0.9791063070297241, "learning_rate": 2.741668339454737e-05, "loss": 0.1026, "step": 12079 }, { "epoch": 0.21393120730341306, "grad_norm": 0.9744675159454346, "learning_rate": 2.7416200662729546e-05, "loss": 0.1021, "step": 12080 }, { "epoch": 0.2139489168404415, "grad_norm": 1.254204511642456, "learning_rate": 2.741571789006368e-05, "loss": 0.1452, "step": 12081 }, { "epoch": 0.21396662637746994, "grad_norm": 1.0644292831420898, "learning_rate": 2.741523507655136e-05, "loss": 0.1077, "step": 12082 }, { "epoch": 0.21398433591449836, "grad_norm": 1.0047862529754639, "learning_rate": 2.7414752222194166e-05, "loss": 0.1236, "step": 12083 }, { "epoch": 0.2140020454515268, "grad_norm": 0.909930408000946, "learning_rate": 2.7414269326993696e-05, "loss": 0.088, "step": 12084 }, { "epoch": 0.21401975498855522, "grad_norm": 1.1327648162841797, "learning_rate": 2.7413786390951536e-05, "loss": 0.128, "step": 12085 }, { "epoch": 0.21403746452558364, "grad_norm": 0.8052230477333069, "learning_rate": 2.7413303414069277e-05, "loss": 0.0845, "step": 12086 }, { "epoch": 0.21405517406261207, "grad_norm": 0.8389812111854553, "learning_rate": 2.74128203963485e-05, "loss": 0.1169, "step": 12087 }, { "epoch": 0.2140728835996405, "grad_norm": 0.9371370673179626, "learning_rate": 2.74123373377908e-05, "loss": 0.0923, "step": 12088 }, { "epoch": 0.21409059313666892, "grad_norm": 0.7842544317245483, "learning_rate": 2.7411854238397764e-05, "loss": 0.1179, "step": 12089 }, { "epoch": 0.21410830267369735, "grad_norm": 0.41860339045524597, "learning_rate": 2.7411371098170987e-05, "loss": 0.0728, "step": 12090 }, { "epoch": 0.21412601221072577, "grad_norm": 0.8433542251586914, "learning_rate": 2.7410887917112055e-05, "loss": 0.0849, "step": 12091 }, { "epoch": 0.2141437217477542, "grad_norm": 0.6758030652999878, "learning_rate": 2.7410404695222554e-05, "loss": 0.0865, "step": 12092 }, { "epoch": 0.21416143128478263, "grad_norm": 0.8457604646682739, "learning_rate": 2.740992143250408e-05, "loss": 0.072, "step": 12093 }, { "epoch": 0.21417914082181105, "grad_norm": 0.9066845178604126, "learning_rate": 2.740943812895822e-05, "loss": 0.1131, "step": 12094 }, { "epoch": 0.2141968503588395, "grad_norm": 0.8055296540260315, "learning_rate": 2.740895478458656e-05, "loss": 0.0983, "step": 12095 }, { "epoch": 0.21421455989586793, "grad_norm": 1.0180723667144775, "learning_rate": 2.7408471399390696e-05, "loss": 0.0756, "step": 12096 }, { "epoch": 0.21423226943289636, "grad_norm": 1.6505476236343384, "learning_rate": 2.740798797337221e-05, "loss": 0.135, "step": 12097 }, { "epoch": 0.2142499789699248, "grad_norm": 1.4884123802185059, "learning_rate": 2.740750450653271e-05, "loss": 0.1437, "step": 12098 }, { "epoch": 0.2142676885069532, "grad_norm": 1.2547216415405273, "learning_rate": 2.7407020998873766e-05, "loss": 0.1222, "step": 12099 }, { "epoch": 0.21428539804398164, "grad_norm": 0.6295280456542969, "learning_rate": 2.740653745039698e-05, "loss": 0.1054, "step": 12100 }, { "epoch": 0.21430310758101007, "grad_norm": 0.7697655558586121, "learning_rate": 2.7406053861103943e-05, "loss": 0.1049, "step": 12101 }, { "epoch": 0.2143208171180385, "grad_norm": 0.9011271595954895, "learning_rate": 2.740557023099624e-05, "loss": 0.1425, "step": 12102 }, { "epoch": 0.21433852665506692, "grad_norm": 0.7380586862564087, "learning_rate": 2.7405086560075476e-05, "loss": 0.1059, "step": 12103 }, { "epoch": 0.21435623619209535, "grad_norm": 0.7822529077529907, "learning_rate": 2.740460284834322e-05, "loss": 0.0892, "step": 12104 }, { "epoch": 0.21437394572912377, "grad_norm": 0.7830172777175903, "learning_rate": 2.7404119095801083e-05, "loss": 0.0806, "step": 12105 }, { "epoch": 0.2143916552661522, "grad_norm": 0.6978353261947632, "learning_rate": 2.740363530245064e-05, "loss": 0.0998, "step": 12106 }, { "epoch": 0.21440936480318062, "grad_norm": 1.4906672239303589, "learning_rate": 2.7403151468293496e-05, "loss": 0.126, "step": 12107 }, { "epoch": 0.21442707434020905, "grad_norm": 1.037114143371582, "learning_rate": 2.740266759333124e-05, "loss": 0.1077, "step": 12108 }, { "epoch": 0.21444478387723748, "grad_norm": 1.386872410774231, "learning_rate": 2.740218367756546e-05, "loss": 0.1509, "step": 12109 }, { "epoch": 0.21446249341426593, "grad_norm": 0.8266937732696533, "learning_rate": 2.740169972099775e-05, "loss": 0.0922, "step": 12110 }, { "epoch": 0.21448020295129436, "grad_norm": 1.4724832773208618, "learning_rate": 2.7401215723629705e-05, "loss": 0.1083, "step": 12111 }, { "epoch": 0.21449791248832278, "grad_norm": 0.7988204956054688, "learning_rate": 2.740073168546291e-05, "loss": 0.1363, "step": 12112 }, { "epoch": 0.2145156220253512, "grad_norm": 0.7343606948852539, "learning_rate": 2.7400247606498966e-05, "loss": 0.1033, "step": 12113 }, { "epoch": 0.21453333156237964, "grad_norm": 0.6529625654220581, "learning_rate": 2.739976348673946e-05, "loss": 0.093, "step": 12114 }, { "epoch": 0.21455104109940806, "grad_norm": 1.0530604124069214, "learning_rate": 2.7399279326185987e-05, "loss": 0.1071, "step": 12115 }, { "epoch": 0.2145687506364365, "grad_norm": 0.8799625039100647, "learning_rate": 2.7398795124840136e-05, "loss": 0.083, "step": 12116 }, { "epoch": 0.21458646017346492, "grad_norm": 0.79949951171875, "learning_rate": 2.739831088270351e-05, "loss": 0.1106, "step": 12117 }, { "epoch": 0.21460416971049334, "grad_norm": 1.0186811685562134, "learning_rate": 2.739782659977769e-05, "loss": 0.1369, "step": 12118 }, { "epoch": 0.21462187924752177, "grad_norm": 0.9939776062965393, "learning_rate": 2.7397342276064274e-05, "loss": 0.0831, "step": 12119 }, { "epoch": 0.2146395887845502, "grad_norm": 0.6383013725280762, "learning_rate": 2.7396857911564858e-05, "loss": 0.119, "step": 12120 }, { "epoch": 0.21465729832157862, "grad_norm": 1.2169052362442017, "learning_rate": 2.7396373506281032e-05, "loss": 0.1153, "step": 12121 }, { "epoch": 0.21467500785860705, "grad_norm": 0.7693837881088257, "learning_rate": 2.7395889060214393e-05, "loss": 0.1001, "step": 12122 }, { "epoch": 0.21469271739563547, "grad_norm": 0.8457045555114746, "learning_rate": 2.739540457336653e-05, "loss": 0.1188, "step": 12123 }, { "epoch": 0.2147104269326639, "grad_norm": 1.158691644668579, "learning_rate": 2.7394920045739043e-05, "loss": 0.1413, "step": 12124 }, { "epoch": 0.21472813646969235, "grad_norm": 0.7370795011520386, "learning_rate": 2.739443547733352e-05, "loss": 0.1287, "step": 12125 }, { "epoch": 0.21474584600672078, "grad_norm": 1.1285992860794067, "learning_rate": 2.7393950868151558e-05, "loss": 0.1271, "step": 12126 }, { "epoch": 0.2147635555437492, "grad_norm": 0.679598331451416, "learning_rate": 2.739346621819475e-05, "loss": 0.0858, "step": 12127 }, { "epoch": 0.21478126508077763, "grad_norm": 1.1657482385635376, "learning_rate": 2.7392981527464694e-05, "loss": 0.1253, "step": 12128 }, { "epoch": 0.21479897461780606, "grad_norm": 1.1638468503952026, "learning_rate": 2.739249679596298e-05, "loss": 0.105, "step": 12129 }, { "epoch": 0.2148166841548345, "grad_norm": 0.9217689037322998, "learning_rate": 2.739201202369121e-05, "loss": 0.0841, "step": 12130 }, { "epoch": 0.2148343936918629, "grad_norm": 0.7764356136322021, "learning_rate": 2.739152721065097e-05, "loss": 0.0996, "step": 12131 }, { "epoch": 0.21485210322889134, "grad_norm": 0.6473864316940308, "learning_rate": 2.739104235684386e-05, "loss": 0.0858, "step": 12132 }, { "epoch": 0.21486981276591977, "grad_norm": 0.7044298052787781, "learning_rate": 2.7390557462271474e-05, "loss": 0.1028, "step": 12133 }, { "epoch": 0.2148875223029482, "grad_norm": 0.772842526435852, "learning_rate": 2.7390072526935406e-05, "loss": 0.0805, "step": 12134 }, { "epoch": 0.21490523183997662, "grad_norm": 0.8136147260665894, "learning_rate": 2.7389587550837253e-05, "loss": 0.1064, "step": 12135 }, { "epoch": 0.21492294137700504, "grad_norm": 0.8942804932594299, "learning_rate": 2.738910253397861e-05, "loss": 0.0854, "step": 12136 }, { "epoch": 0.21494065091403347, "grad_norm": 1.1167606115341187, "learning_rate": 2.738861747636107e-05, "loss": 0.1477, "step": 12137 }, { "epoch": 0.2149583604510619, "grad_norm": 0.7419834136962891, "learning_rate": 2.7388132377986234e-05, "loss": 0.1135, "step": 12138 }, { "epoch": 0.21497606998809032, "grad_norm": 0.9794779419898987, "learning_rate": 2.73876472388557e-05, "loss": 0.1267, "step": 12139 }, { "epoch": 0.21499377952511878, "grad_norm": 0.7644814252853394, "learning_rate": 2.738716205897105e-05, "loss": 0.1277, "step": 12140 }, { "epoch": 0.2150114890621472, "grad_norm": 0.6143444776535034, "learning_rate": 2.73866768383339e-05, "loss": 0.1158, "step": 12141 }, { "epoch": 0.21502919859917563, "grad_norm": 0.7359100580215454, "learning_rate": 2.738619157694583e-05, "loss": 0.1299, "step": 12142 }, { "epoch": 0.21504690813620406, "grad_norm": 0.807677686214447, "learning_rate": 2.7385706274808443e-05, "loss": 0.0738, "step": 12143 }, { "epoch": 0.21506461767323248, "grad_norm": 1.2137928009033203, "learning_rate": 2.738522093192334e-05, "loss": 0.1465, "step": 12144 }, { "epoch": 0.2150823272102609, "grad_norm": 0.7846404910087585, "learning_rate": 2.7384735548292102e-05, "loss": 0.127, "step": 12145 }, { "epoch": 0.21510003674728934, "grad_norm": 1.0076566934585571, "learning_rate": 2.7384250123916347e-05, "loss": 0.0771, "step": 12146 }, { "epoch": 0.21511774628431776, "grad_norm": 0.7716740965843201, "learning_rate": 2.7383764658797657e-05, "loss": 0.1015, "step": 12147 }, { "epoch": 0.2151354558213462, "grad_norm": 1.0400147438049316, "learning_rate": 2.7383279152937635e-05, "loss": 0.1264, "step": 12148 }, { "epoch": 0.21515316535837462, "grad_norm": 0.8811898827552795, "learning_rate": 2.7382793606337875e-05, "loss": 0.0976, "step": 12149 }, { "epoch": 0.21517087489540304, "grad_norm": 1.1265183687210083, "learning_rate": 2.7382308018999978e-05, "loss": 0.162, "step": 12150 }, { "epoch": 0.21518858443243147, "grad_norm": 1.620943307876587, "learning_rate": 2.7381822390925542e-05, "loss": 0.121, "step": 12151 }, { "epoch": 0.2152062939694599, "grad_norm": 0.8665394186973572, "learning_rate": 2.738133672211616e-05, "loss": 0.0948, "step": 12152 }, { "epoch": 0.21522400350648832, "grad_norm": 0.917103111743927, "learning_rate": 2.7380851012573435e-05, "loss": 0.1387, "step": 12153 }, { "epoch": 0.21524171304351675, "grad_norm": 0.7897868752479553, "learning_rate": 2.738036526229896e-05, "loss": 0.0738, "step": 12154 }, { "epoch": 0.2152594225805452, "grad_norm": 1.1293288469314575, "learning_rate": 2.7379879471294334e-05, "loss": 0.1352, "step": 12155 }, { "epoch": 0.21527713211757363, "grad_norm": 0.6200907230377197, "learning_rate": 2.7379393639561163e-05, "loss": 0.0825, "step": 12156 }, { "epoch": 0.21529484165460205, "grad_norm": 0.6841623187065125, "learning_rate": 2.7378907767101032e-05, "loss": 0.0891, "step": 12157 }, { "epoch": 0.21531255119163048, "grad_norm": 0.740469753742218, "learning_rate": 2.737842185391555e-05, "loss": 0.0913, "step": 12158 }, { "epoch": 0.2153302607286589, "grad_norm": 1.3201459646224976, "learning_rate": 2.737793590000631e-05, "loss": 0.1426, "step": 12159 }, { "epoch": 0.21534797026568733, "grad_norm": 1.2839245796203613, "learning_rate": 2.7377449905374917e-05, "loss": 0.132, "step": 12160 }, { "epoch": 0.21536567980271576, "grad_norm": 1.1551955938339233, "learning_rate": 2.7376963870022964e-05, "loss": 0.1111, "step": 12161 }, { "epoch": 0.21538338933974419, "grad_norm": 1.0855439901351929, "learning_rate": 2.737647779395205e-05, "loss": 0.1319, "step": 12162 }, { "epoch": 0.2154010988767726, "grad_norm": 0.621316134929657, "learning_rate": 2.7375991677163775e-05, "loss": 0.0964, "step": 12163 }, { "epoch": 0.21541880841380104, "grad_norm": 0.6563187837600708, "learning_rate": 2.7375505519659744e-05, "loss": 0.1034, "step": 12164 }, { "epoch": 0.21543651795082946, "grad_norm": 0.8158758282661438, "learning_rate": 2.7375019321441546e-05, "loss": 0.1026, "step": 12165 }, { "epoch": 0.2154542274878579, "grad_norm": 0.8028849363327026, "learning_rate": 2.737453308251079e-05, "loss": 0.0915, "step": 12166 }, { "epoch": 0.21547193702488632, "grad_norm": 0.6337559819221497, "learning_rate": 2.737404680286907e-05, "loss": 0.1071, "step": 12167 }, { "epoch": 0.21548964656191474, "grad_norm": 0.9058346748352051, "learning_rate": 2.737356048251799e-05, "loss": 0.0876, "step": 12168 }, { "epoch": 0.21550735609894317, "grad_norm": 0.9473334550857544, "learning_rate": 2.737307412145914e-05, "loss": 0.097, "step": 12169 }, { "epoch": 0.21552506563597162, "grad_norm": 0.7160012125968933, "learning_rate": 2.7372587719694135e-05, "loss": 0.1036, "step": 12170 }, { "epoch": 0.21554277517300005, "grad_norm": 1.338254451751709, "learning_rate": 2.7372101277224567e-05, "loss": 0.1176, "step": 12171 }, { "epoch": 0.21556048471002848, "grad_norm": 0.7920942902565002, "learning_rate": 2.7371614794052038e-05, "loss": 0.1347, "step": 12172 }, { "epoch": 0.2155781942470569, "grad_norm": 1.3009915351867676, "learning_rate": 2.7371128270178143e-05, "loss": 0.1243, "step": 12173 }, { "epoch": 0.21559590378408533, "grad_norm": 0.9914253354072571, "learning_rate": 2.737064170560449e-05, "loss": 0.1109, "step": 12174 }, { "epoch": 0.21561361332111376, "grad_norm": 0.9093942642211914, "learning_rate": 2.7370155100332677e-05, "loss": 0.0995, "step": 12175 }, { "epoch": 0.21563132285814218, "grad_norm": 1.3253129720687866, "learning_rate": 2.7369668454364305e-05, "loss": 0.1257, "step": 12176 }, { "epoch": 0.2156490323951706, "grad_norm": 0.6346606612205505, "learning_rate": 2.7369181767700973e-05, "loss": 0.1014, "step": 12177 }, { "epoch": 0.21566674193219904, "grad_norm": 0.7088409662246704, "learning_rate": 2.736869504034429e-05, "loss": 0.0893, "step": 12178 }, { "epoch": 0.21568445146922746, "grad_norm": 1.1235170364379883, "learning_rate": 2.7368208272295843e-05, "loss": 0.0822, "step": 12179 }, { "epoch": 0.2157021610062559, "grad_norm": 0.9100358486175537, "learning_rate": 2.7367721463557245e-05, "loss": 0.0933, "step": 12180 }, { "epoch": 0.21571987054328431, "grad_norm": 0.7985988259315491, "learning_rate": 2.73672346141301e-05, "loss": 0.1445, "step": 12181 }, { "epoch": 0.21573758008031274, "grad_norm": 0.7215781211853027, "learning_rate": 2.7366747724016e-05, "loss": 0.1238, "step": 12182 }, { "epoch": 0.21575528961734117, "grad_norm": 0.9327002763748169, "learning_rate": 2.7366260793216546e-05, "loss": 0.1052, "step": 12183 }, { "epoch": 0.2157729991543696, "grad_norm": 0.9230952262878418, "learning_rate": 2.7365773821733348e-05, "loss": 0.0862, "step": 12184 }, { "epoch": 0.21579070869139805, "grad_norm": 0.6333752274513245, "learning_rate": 2.7365286809568008e-05, "loss": 0.0563, "step": 12185 }, { "epoch": 0.21580841822842647, "grad_norm": 1.0299006700515747, "learning_rate": 2.736479975672212e-05, "loss": 0.1231, "step": 12186 }, { "epoch": 0.2158261277654549, "grad_norm": 1.0107414722442627, "learning_rate": 2.7364312663197298e-05, "loss": 0.0909, "step": 12187 }, { "epoch": 0.21584383730248333, "grad_norm": 1.2349375486373901, "learning_rate": 2.7363825528995134e-05, "loss": 0.1226, "step": 12188 }, { "epoch": 0.21586154683951175, "grad_norm": 1.459726333618164, "learning_rate": 2.736333835411723e-05, "loss": 0.1152, "step": 12189 }, { "epoch": 0.21587925637654018, "grad_norm": 0.857904851436615, "learning_rate": 2.73628511385652e-05, "loss": 0.1749, "step": 12190 }, { "epoch": 0.2158969659135686, "grad_norm": 0.5485751628875732, "learning_rate": 2.7362363882340638e-05, "loss": 0.1016, "step": 12191 }, { "epoch": 0.21591467545059703, "grad_norm": 1.1604431867599487, "learning_rate": 2.736187658544515e-05, "loss": 0.1527, "step": 12192 }, { "epoch": 0.21593238498762546, "grad_norm": 0.6333447694778442, "learning_rate": 2.7361389247880335e-05, "loss": 0.0879, "step": 12193 }, { "epoch": 0.21595009452465388, "grad_norm": 0.7385645508766174, "learning_rate": 2.7360901869647806e-05, "loss": 0.1149, "step": 12194 }, { "epoch": 0.2159678040616823, "grad_norm": 0.6763142943382263, "learning_rate": 2.736041445074915e-05, "loss": 0.0863, "step": 12195 }, { "epoch": 0.21598551359871074, "grad_norm": 0.8176849484443665, "learning_rate": 2.735992699118599e-05, "loss": 0.1141, "step": 12196 }, { "epoch": 0.21600322313573916, "grad_norm": 0.6252087354660034, "learning_rate": 2.7359439490959918e-05, "loss": 0.0799, "step": 12197 }, { "epoch": 0.2160209326727676, "grad_norm": 1.1009653806686401, "learning_rate": 2.7358951950072543e-05, "loss": 0.1386, "step": 12198 }, { "epoch": 0.21603864220979602, "grad_norm": 1.0855857133865356, "learning_rate": 2.735846436852546e-05, "loss": 0.1579, "step": 12199 }, { "epoch": 0.21605635174682447, "grad_norm": 0.7048550248146057, "learning_rate": 2.735797674632028e-05, "loss": 0.1077, "step": 12200 }, { "epoch": 0.2160740612838529, "grad_norm": 0.499885618686676, "learning_rate": 2.735748908345861e-05, "loss": 0.0718, "step": 12201 }, { "epoch": 0.21609177082088132, "grad_norm": 1.2244869470596313, "learning_rate": 2.7357001379942045e-05, "loss": 0.0929, "step": 12202 }, { "epoch": 0.21610948035790975, "grad_norm": 1.1893401145935059, "learning_rate": 2.73565136357722e-05, "loss": 0.0987, "step": 12203 }, { "epoch": 0.21612718989493818, "grad_norm": 1.1274642944335938, "learning_rate": 2.735602585095068e-05, "loss": 0.1313, "step": 12204 }, { "epoch": 0.2161448994319666, "grad_norm": 0.8017359375953674, "learning_rate": 2.7355538025479073e-05, "loss": 0.0803, "step": 12205 }, { "epoch": 0.21616260896899503, "grad_norm": 1.1112608909606934, "learning_rate": 2.7355050159359005e-05, "loss": 0.1064, "step": 12206 }, { "epoch": 0.21618031850602346, "grad_norm": 0.7559111714363098, "learning_rate": 2.7354562252592066e-05, "loss": 0.1302, "step": 12207 }, { "epoch": 0.21619802804305188, "grad_norm": 0.7307064533233643, "learning_rate": 2.7354074305179867e-05, "loss": 0.1176, "step": 12208 }, { "epoch": 0.2162157375800803, "grad_norm": 0.7567156553268433, "learning_rate": 2.7353586317124016e-05, "loss": 0.1071, "step": 12209 }, { "epoch": 0.21623344711710873, "grad_norm": 1.1390551328659058, "learning_rate": 2.7353098288426118e-05, "loss": 0.1553, "step": 12210 }, { "epoch": 0.21625115665413716, "grad_norm": 1.039162039756775, "learning_rate": 2.7352610219087772e-05, "loss": 0.0921, "step": 12211 }, { "epoch": 0.2162688661911656, "grad_norm": 1.1794167757034302, "learning_rate": 2.735212210911059e-05, "loss": 0.0846, "step": 12212 }, { "epoch": 0.216286575728194, "grad_norm": 0.7766614556312561, "learning_rate": 2.7351633958496172e-05, "loss": 0.087, "step": 12213 }, { "epoch": 0.21630428526522244, "grad_norm": 0.8962628245353699, "learning_rate": 2.7351145767246126e-05, "loss": 0.1498, "step": 12214 }, { "epoch": 0.2163219948022509, "grad_norm": 0.7631019949913025, "learning_rate": 2.7350657535362066e-05, "loss": 0.1074, "step": 12215 }, { "epoch": 0.21633970433927932, "grad_norm": 0.8664437532424927, "learning_rate": 2.7350169262845585e-05, "loss": 0.1276, "step": 12216 }, { "epoch": 0.21635741387630775, "grad_norm": 0.9617770314216614, "learning_rate": 2.7349680949698303e-05, "loss": 0.0877, "step": 12217 }, { "epoch": 0.21637512341333617, "grad_norm": 0.9173829555511475, "learning_rate": 2.7349192595921818e-05, "loss": 0.1323, "step": 12218 }, { "epoch": 0.2163928329503646, "grad_norm": 0.4373340606689453, "learning_rate": 2.734870420151774e-05, "loss": 0.12, "step": 12219 }, { "epoch": 0.21641054248739303, "grad_norm": 1.0689736604690552, "learning_rate": 2.734821576648767e-05, "loss": 0.0921, "step": 12220 }, { "epoch": 0.21642825202442145, "grad_norm": 0.8719617128372192, "learning_rate": 2.7347727290833217e-05, "loss": 0.1097, "step": 12221 }, { "epoch": 0.21644596156144988, "grad_norm": 1.0814781188964844, "learning_rate": 2.7347238774555996e-05, "loss": 0.0858, "step": 12222 }, { "epoch": 0.2164636710984783, "grad_norm": 1.0634976625442505, "learning_rate": 2.7346750217657608e-05, "loss": 0.1171, "step": 12223 }, { "epoch": 0.21648138063550673, "grad_norm": 0.721660852432251, "learning_rate": 2.7346261620139657e-05, "loss": 0.1074, "step": 12224 }, { "epoch": 0.21649909017253516, "grad_norm": 0.9860914945602417, "learning_rate": 2.734577298200376e-05, "loss": 0.1319, "step": 12225 }, { "epoch": 0.21651679970956358, "grad_norm": 0.9905055165290833, "learning_rate": 2.7345284303251507e-05, "loss": 0.108, "step": 12226 }, { "epoch": 0.216534509246592, "grad_norm": 0.8063143491744995, "learning_rate": 2.734479558388453e-05, "loss": 0.0775, "step": 12227 }, { "epoch": 0.21655221878362044, "grad_norm": 1.127600073814392, "learning_rate": 2.7344306823904416e-05, "loss": 0.1043, "step": 12228 }, { "epoch": 0.2165699283206489, "grad_norm": 0.8009840250015259, "learning_rate": 2.7343818023312783e-05, "loss": 0.0879, "step": 12229 }, { "epoch": 0.21658763785767732, "grad_norm": 0.658153772354126, "learning_rate": 2.734332918211124e-05, "loss": 0.0937, "step": 12230 }, { "epoch": 0.21660534739470574, "grad_norm": 0.5689609050750732, "learning_rate": 2.7342840300301387e-05, "loss": 0.073, "step": 12231 }, { "epoch": 0.21662305693173417, "grad_norm": 0.8572801947593689, "learning_rate": 2.734235137788484e-05, "loss": 0.1051, "step": 12232 }, { "epoch": 0.2166407664687626, "grad_norm": 0.9599549174308777, "learning_rate": 2.734186241486321e-05, "loss": 0.1049, "step": 12233 }, { "epoch": 0.21665847600579102, "grad_norm": 1.2091009616851807, "learning_rate": 2.73413734112381e-05, "loss": 0.138, "step": 12234 }, { "epoch": 0.21667618554281945, "grad_norm": 0.7458576560020447, "learning_rate": 2.7340884367011116e-05, "loss": 0.1048, "step": 12235 }, { "epoch": 0.21669389507984788, "grad_norm": 1.8958265781402588, "learning_rate": 2.7340395282183873e-05, "loss": 0.155, "step": 12236 }, { "epoch": 0.2167116046168763, "grad_norm": 0.5491364002227783, "learning_rate": 2.7339906156757977e-05, "loss": 0.0941, "step": 12237 }, { "epoch": 0.21672931415390473, "grad_norm": 1.2007226943969727, "learning_rate": 2.7339416990735036e-05, "loss": 0.1351, "step": 12238 }, { "epoch": 0.21674702369093315, "grad_norm": 1.1809409856796265, "learning_rate": 2.7338927784116662e-05, "loss": 0.1217, "step": 12239 }, { "epoch": 0.21676473322796158, "grad_norm": 0.7796650528907776, "learning_rate": 2.7338438536904464e-05, "loss": 0.1191, "step": 12240 }, { "epoch": 0.21678244276499, "grad_norm": 0.674851655960083, "learning_rate": 2.7337949249100052e-05, "loss": 0.1107, "step": 12241 }, { "epoch": 0.21680015230201843, "grad_norm": 0.9822721481323242, "learning_rate": 2.7337459920705035e-05, "loss": 0.1423, "step": 12242 }, { "epoch": 0.21681786183904686, "grad_norm": 0.8953041434288025, "learning_rate": 2.7336970551721023e-05, "loss": 0.1125, "step": 12243 }, { "epoch": 0.21683557137607531, "grad_norm": 0.6009477972984314, "learning_rate": 2.7336481142149624e-05, "loss": 0.0651, "step": 12244 }, { "epoch": 0.21685328091310374, "grad_norm": 0.9997972249984741, "learning_rate": 2.733599169199245e-05, "loss": 0.1952, "step": 12245 }, { "epoch": 0.21687099045013217, "grad_norm": 0.8514408469200134, "learning_rate": 2.7335502201251113e-05, "loss": 0.0876, "step": 12246 }, { "epoch": 0.2168886999871606, "grad_norm": 1.3171073198318481, "learning_rate": 2.733501266992722e-05, "loss": 0.125, "step": 12247 }, { "epoch": 0.21690640952418902, "grad_norm": 2.137986421585083, "learning_rate": 2.733452309802238e-05, "loss": 0.1329, "step": 12248 }, { "epoch": 0.21692411906121745, "grad_norm": 0.9538151621818542, "learning_rate": 2.733403348553821e-05, "loss": 0.1012, "step": 12249 }, { "epoch": 0.21694182859824587, "grad_norm": 0.8721570372581482, "learning_rate": 2.7333543832476316e-05, "loss": 0.1118, "step": 12250 }, { "epoch": 0.2169595381352743, "grad_norm": 0.7428445219993591, "learning_rate": 2.7333054138838314e-05, "loss": 0.1076, "step": 12251 }, { "epoch": 0.21697724767230273, "grad_norm": 1.0365151166915894, "learning_rate": 2.7332564404625807e-05, "loss": 0.1444, "step": 12252 }, { "epoch": 0.21699495720933115, "grad_norm": 0.4628147780895233, "learning_rate": 2.7332074629840412e-05, "loss": 0.085, "step": 12253 }, { "epoch": 0.21701266674635958, "grad_norm": 1.2508320808410645, "learning_rate": 2.733158481448374e-05, "loss": 0.1292, "step": 12254 }, { "epoch": 0.217030376283388, "grad_norm": 0.9933894872665405, "learning_rate": 2.7331094958557398e-05, "loss": 0.1156, "step": 12255 }, { "epoch": 0.21704808582041643, "grad_norm": 0.8128739595413208, "learning_rate": 2.7330605062063e-05, "loss": 0.1085, "step": 12256 }, { "epoch": 0.21706579535744486, "grad_norm": 0.8737818598747253, "learning_rate": 2.7330115125002163e-05, "loss": 0.062, "step": 12257 }, { "epoch": 0.21708350489447328, "grad_norm": 0.837792694568634, "learning_rate": 2.7329625147376494e-05, "loss": 0.1029, "step": 12258 }, { "epoch": 0.21710121443150174, "grad_norm": 0.9606443643569946, "learning_rate": 2.73291351291876e-05, "loss": 0.1252, "step": 12259 }, { "epoch": 0.21711892396853016, "grad_norm": 0.8036962747573853, "learning_rate": 2.7328645070437106e-05, "loss": 0.1032, "step": 12260 }, { "epoch": 0.2171366335055586, "grad_norm": 1.0253826379776, "learning_rate": 2.7328154971126613e-05, "loss": 0.1705, "step": 12261 }, { "epoch": 0.21715434304258702, "grad_norm": 0.7947515845298767, "learning_rate": 2.7327664831257735e-05, "loss": 0.1012, "step": 12262 }, { "epoch": 0.21717205257961544, "grad_norm": 0.7708759307861328, "learning_rate": 2.732717465083209e-05, "loss": 0.1001, "step": 12263 }, { "epoch": 0.21718976211664387, "grad_norm": 1.1884558200836182, "learning_rate": 2.7326684429851284e-05, "loss": 0.0864, "step": 12264 }, { "epoch": 0.2172074716536723, "grad_norm": 0.6990227103233337, "learning_rate": 2.7326194168316935e-05, "loss": 0.0973, "step": 12265 }, { "epoch": 0.21722518119070072, "grad_norm": 1.6816447973251343, "learning_rate": 2.7325703866230654e-05, "loss": 0.1475, "step": 12266 }, { "epoch": 0.21724289072772915, "grad_norm": 0.5575215220451355, "learning_rate": 2.7325213523594056e-05, "loss": 0.089, "step": 12267 }, { "epoch": 0.21726060026475758, "grad_norm": 0.768520176410675, "learning_rate": 2.732472314040875e-05, "loss": 0.0842, "step": 12268 }, { "epoch": 0.217278309801786, "grad_norm": 1.437045693397522, "learning_rate": 2.732423271667635e-05, "loss": 0.1135, "step": 12269 }, { "epoch": 0.21729601933881443, "grad_norm": 0.7005396485328674, "learning_rate": 2.7323742252398473e-05, "loss": 0.111, "step": 12270 }, { "epoch": 0.21731372887584285, "grad_norm": 0.9889710545539856, "learning_rate": 2.7323251747576726e-05, "loss": 0.1081, "step": 12271 }, { "epoch": 0.21733143841287128, "grad_norm": 1.1687482595443726, "learning_rate": 2.7322761202212733e-05, "loss": 0.0771, "step": 12272 }, { "epoch": 0.2173491479498997, "grad_norm": 1.309513807296753, "learning_rate": 2.7322270616308097e-05, "loss": 0.1191, "step": 12273 }, { "epoch": 0.21736685748692816, "grad_norm": 0.625701367855072, "learning_rate": 2.7321779989864443e-05, "loss": 0.1074, "step": 12274 }, { "epoch": 0.2173845670239566, "grad_norm": 1.2955503463745117, "learning_rate": 2.7321289322883378e-05, "loss": 0.1563, "step": 12275 }, { "epoch": 0.217402276560985, "grad_norm": 0.9511438608169556, "learning_rate": 2.7320798615366515e-05, "loss": 0.0926, "step": 12276 }, { "epoch": 0.21741998609801344, "grad_norm": 1.1014306545257568, "learning_rate": 2.7320307867315472e-05, "loss": 0.1692, "step": 12277 }, { "epoch": 0.21743769563504187, "grad_norm": 0.7578684091567993, "learning_rate": 2.7319817078731857e-05, "loss": 0.1037, "step": 12278 }, { "epoch": 0.2174554051720703, "grad_norm": 0.7596701383590698, "learning_rate": 2.7319326249617295e-05, "loss": 0.1154, "step": 12279 }, { "epoch": 0.21747311470909872, "grad_norm": 1.2210787534713745, "learning_rate": 2.7318835379973396e-05, "loss": 0.104, "step": 12280 }, { "epoch": 0.21749082424612715, "grad_norm": 1.1053948402404785, "learning_rate": 2.7318344469801772e-05, "loss": 0.0988, "step": 12281 }, { "epoch": 0.21750853378315557, "grad_norm": 0.9808877110481262, "learning_rate": 2.7317853519104043e-05, "loss": 0.1262, "step": 12282 }, { "epoch": 0.217526243320184, "grad_norm": 0.8917765021324158, "learning_rate": 2.7317362527881822e-05, "loss": 0.0962, "step": 12283 }, { "epoch": 0.21754395285721242, "grad_norm": 0.7928444743156433, "learning_rate": 2.7316871496136718e-05, "loss": 0.0905, "step": 12284 }, { "epoch": 0.21756166239424085, "grad_norm": 0.808461606502533, "learning_rate": 2.731638042387036e-05, "loss": 0.1411, "step": 12285 }, { "epoch": 0.21757937193126928, "grad_norm": 0.7237096428871155, "learning_rate": 2.7315889311084353e-05, "loss": 0.1095, "step": 12286 }, { "epoch": 0.2175970814682977, "grad_norm": 0.9120150208473206, "learning_rate": 2.7315398157780317e-05, "loss": 0.1167, "step": 12287 }, { "epoch": 0.21761479100532613, "grad_norm": 0.9527977705001831, "learning_rate": 2.7314906963959864e-05, "loss": 0.0828, "step": 12288 }, { "epoch": 0.21763250054235458, "grad_norm": 1.0231074094772339, "learning_rate": 2.7314415729624614e-05, "loss": 0.1161, "step": 12289 }, { "epoch": 0.217650210079383, "grad_norm": 0.7443048357963562, "learning_rate": 2.7313924454776186e-05, "loss": 0.0807, "step": 12290 }, { "epoch": 0.21766791961641144, "grad_norm": 0.8403790593147278, "learning_rate": 2.731343313941619e-05, "loss": 0.132, "step": 12291 }, { "epoch": 0.21768562915343986, "grad_norm": 1.1186437606811523, "learning_rate": 2.731294178354624e-05, "loss": 0.112, "step": 12292 }, { "epoch": 0.2177033386904683, "grad_norm": 1.149806261062622, "learning_rate": 2.7312450387167962e-05, "loss": 0.0966, "step": 12293 }, { "epoch": 0.21772104822749672, "grad_norm": 1.666735291481018, "learning_rate": 2.7311958950282964e-05, "loss": 0.1601, "step": 12294 }, { "epoch": 0.21773875776452514, "grad_norm": 0.9999938011169434, "learning_rate": 2.7311467472892867e-05, "loss": 0.0872, "step": 12295 }, { "epoch": 0.21775646730155357, "grad_norm": 0.7982105612754822, "learning_rate": 2.7310975954999285e-05, "loss": 0.1251, "step": 12296 }, { "epoch": 0.217774176838582, "grad_norm": 1.0006641149520874, "learning_rate": 2.7310484396603847e-05, "loss": 0.067, "step": 12297 }, { "epoch": 0.21779188637561042, "grad_norm": 0.8258119225502014, "learning_rate": 2.730999279770815e-05, "loss": 0.0816, "step": 12298 }, { "epoch": 0.21780959591263885, "grad_norm": 0.9094732999801636, "learning_rate": 2.7309501158313825e-05, "loss": 0.0865, "step": 12299 }, { "epoch": 0.21782730544966727, "grad_norm": 1.098454236984253, "learning_rate": 2.7309009478422488e-05, "loss": 0.1023, "step": 12300 }, { "epoch": 0.2178450149866957, "grad_norm": 1.2489620447158813, "learning_rate": 2.7308517758035756e-05, "loss": 0.1384, "step": 12301 }, { "epoch": 0.21786272452372413, "grad_norm": 0.8866801261901855, "learning_rate": 2.7308025997155238e-05, "loss": 0.11, "step": 12302 }, { "epoch": 0.21788043406075255, "grad_norm": 1.1566283702850342, "learning_rate": 2.7307534195782567e-05, "loss": 0.1164, "step": 12303 }, { "epoch": 0.217898143597781, "grad_norm": 0.791990339756012, "learning_rate": 2.730704235391935e-05, "loss": 0.1052, "step": 12304 }, { "epoch": 0.21791585313480943, "grad_norm": 0.9926612377166748, "learning_rate": 2.730655047156721e-05, "loss": 0.1163, "step": 12305 }, { "epoch": 0.21793356267183786, "grad_norm": 0.8117513060569763, "learning_rate": 2.7306058548727765e-05, "loss": 0.0932, "step": 12306 }, { "epoch": 0.2179512722088663, "grad_norm": 1.4388967752456665, "learning_rate": 2.7305566585402625e-05, "loss": 0.0749, "step": 12307 }, { "epoch": 0.2179689817458947, "grad_norm": 1.036699652671814, "learning_rate": 2.7305074581593424e-05, "loss": 0.0708, "step": 12308 }, { "epoch": 0.21798669128292314, "grad_norm": 0.8360115885734558, "learning_rate": 2.7304582537301767e-05, "loss": 0.1673, "step": 12309 }, { "epoch": 0.21800440081995157, "grad_norm": 0.933436930179596, "learning_rate": 2.730409045252928e-05, "loss": 0.1244, "step": 12310 }, { "epoch": 0.21802211035698, "grad_norm": 1.6429154872894287, "learning_rate": 2.730359832727758e-05, "loss": 0.1077, "step": 12311 }, { "epoch": 0.21803981989400842, "grad_norm": 0.9703196883201599, "learning_rate": 2.7303106161548285e-05, "loss": 0.0823, "step": 12312 }, { "epoch": 0.21805752943103685, "grad_norm": 0.8840948343276978, "learning_rate": 2.7302613955343017e-05, "loss": 0.086, "step": 12313 }, { "epoch": 0.21807523896806527, "grad_norm": 1.2578966617584229, "learning_rate": 2.730212170866339e-05, "loss": 0.102, "step": 12314 }, { "epoch": 0.2180929485050937, "grad_norm": 0.9962010979652405, "learning_rate": 2.730162942151103e-05, "loss": 0.1094, "step": 12315 }, { "epoch": 0.21811065804212212, "grad_norm": 1.0655640363693237, "learning_rate": 2.7301137093887554e-05, "loss": 0.0876, "step": 12316 }, { "epoch": 0.21812836757915055, "grad_norm": 1.2201555967330933, "learning_rate": 2.7300644725794578e-05, "loss": 0.0904, "step": 12317 }, { "epoch": 0.21814607711617898, "grad_norm": 0.728656530380249, "learning_rate": 2.730015231723373e-05, "loss": 0.0755, "step": 12318 }, { "epoch": 0.21816378665320743, "grad_norm": 0.6479318141937256, "learning_rate": 2.729965986820662e-05, "loss": 0.0873, "step": 12319 }, { "epoch": 0.21818149619023586, "grad_norm": 0.8397854566574097, "learning_rate": 2.7299167378714872e-05, "loss": 0.1088, "step": 12320 }, { "epoch": 0.21819920572726428, "grad_norm": 1.0198620557785034, "learning_rate": 2.729867484876011e-05, "loss": 0.0927, "step": 12321 }, { "epoch": 0.2182169152642927, "grad_norm": 1.0145916938781738, "learning_rate": 2.729818227834395e-05, "loss": 0.1052, "step": 12322 }, { "epoch": 0.21823462480132114, "grad_norm": 0.9819802045822144, "learning_rate": 2.7297689667468018e-05, "loss": 0.1112, "step": 12323 }, { "epoch": 0.21825233433834956, "grad_norm": 0.7100381255149841, "learning_rate": 2.7297197016133926e-05, "loss": 0.1343, "step": 12324 }, { "epoch": 0.218270043875378, "grad_norm": 0.9550193548202515, "learning_rate": 2.7296704324343305e-05, "loss": 0.1477, "step": 12325 }, { "epoch": 0.21828775341240642, "grad_norm": 1.2986887693405151, "learning_rate": 2.729621159209777e-05, "loss": 0.1116, "step": 12326 }, { "epoch": 0.21830546294943484, "grad_norm": 0.9431817531585693, "learning_rate": 2.7295718819398937e-05, "loss": 0.0924, "step": 12327 }, { "epoch": 0.21832317248646327, "grad_norm": 1.102975606918335, "learning_rate": 2.7295226006248434e-05, "loss": 0.1107, "step": 12328 }, { "epoch": 0.2183408820234917, "grad_norm": 0.6506676077842712, "learning_rate": 2.7294733152647885e-05, "loss": 0.1242, "step": 12329 }, { "epoch": 0.21835859156052012, "grad_norm": 0.632273256778717, "learning_rate": 2.7294240258598908e-05, "loss": 0.0619, "step": 12330 }, { "epoch": 0.21837630109754855, "grad_norm": 0.8137792348861694, "learning_rate": 2.729374732410312e-05, "loss": 0.0775, "step": 12331 }, { "epoch": 0.21839401063457697, "grad_norm": 0.9981764554977417, "learning_rate": 2.7293254349162145e-05, "loss": 0.1241, "step": 12332 }, { "epoch": 0.2184117201716054, "grad_norm": 1.1248115301132202, "learning_rate": 2.729276133377761e-05, "loss": 0.1303, "step": 12333 }, { "epoch": 0.21842942970863385, "grad_norm": 1.1807219982147217, "learning_rate": 2.7292268277951133e-05, "loss": 0.1031, "step": 12334 }, { "epoch": 0.21844713924566228, "grad_norm": 1.1442726850509644, "learning_rate": 2.729177518168434e-05, "loss": 0.1127, "step": 12335 }, { "epoch": 0.2184648487826907, "grad_norm": 1.24809730052948, "learning_rate": 2.7291282044978844e-05, "loss": 0.0791, "step": 12336 }, { "epoch": 0.21848255831971913, "grad_norm": 1.888299584388733, "learning_rate": 2.7290788867836277e-05, "loss": 0.1019, "step": 12337 }, { "epoch": 0.21850026785674756, "grad_norm": 0.7578706741333008, "learning_rate": 2.7290295650258257e-05, "loss": 0.1036, "step": 12338 }, { "epoch": 0.218517977393776, "grad_norm": 0.7185315489768982, "learning_rate": 2.7289802392246405e-05, "loss": 0.0716, "step": 12339 }, { "epoch": 0.2185356869308044, "grad_norm": 0.9475241303443909, "learning_rate": 2.7289309093802354e-05, "loss": 0.1151, "step": 12340 }, { "epoch": 0.21855339646783284, "grad_norm": 0.8055185079574585, "learning_rate": 2.728881575492771e-05, "loss": 0.1002, "step": 12341 }, { "epoch": 0.21857110600486127, "grad_norm": 0.7241965532302856, "learning_rate": 2.7288322375624107e-05, "loss": 0.0853, "step": 12342 }, { "epoch": 0.2185888155418897, "grad_norm": 1.1565968990325928, "learning_rate": 2.7287828955893167e-05, "loss": 0.1201, "step": 12343 }, { "epoch": 0.21860652507891812, "grad_norm": 1.7460298538208008, "learning_rate": 2.728733549573652e-05, "loss": 0.0924, "step": 12344 }, { "epoch": 0.21862423461594654, "grad_norm": 0.7565045952796936, "learning_rate": 2.728684199515577e-05, "loss": 0.1161, "step": 12345 }, { "epoch": 0.21864194415297497, "grad_norm": 0.860844612121582, "learning_rate": 2.728634845415256e-05, "loss": 0.1105, "step": 12346 }, { "epoch": 0.2186596536900034, "grad_norm": 1.5135635137557983, "learning_rate": 2.7285854872728505e-05, "loss": 0.1123, "step": 12347 }, { "epoch": 0.21867736322703182, "grad_norm": 0.8060370683670044, "learning_rate": 2.7285361250885228e-05, "loss": 0.088, "step": 12348 }, { "epoch": 0.21869507276406028, "grad_norm": 0.7440477609634399, "learning_rate": 2.7284867588624357e-05, "loss": 0.105, "step": 12349 }, { "epoch": 0.2187127823010887, "grad_norm": 0.9551728367805481, "learning_rate": 2.7284373885947517e-05, "loss": 0.0818, "step": 12350 }, { "epoch": 0.21873049183811713, "grad_norm": 0.8789604306221008, "learning_rate": 2.7283880142856325e-05, "loss": 0.1291, "step": 12351 }, { "epoch": 0.21874820137514556, "grad_norm": 1.1977511644363403, "learning_rate": 2.7283386359352412e-05, "loss": 0.0796, "step": 12352 }, { "epoch": 0.21876591091217398, "grad_norm": 0.9938164949417114, "learning_rate": 2.7282892535437394e-05, "loss": 0.1389, "step": 12353 }, { "epoch": 0.2187836204492024, "grad_norm": 0.7046528458595276, "learning_rate": 2.7282398671112908e-05, "loss": 0.1233, "step": 12354 }, { "epoch": 0.21880132998623084, "grad_norm": 1.3898887634277344, "learning_rate": 2.7281904766380572e-05, "loss": 0.1627, "step": 12355 }, { "epoch": 0.21881903952325926, "grad_norm": 0.6457729339599609, "learning_rate": 2.7281410821242014e-05, "loss": 0.0905, "step": 12356 }, { "epoch": 0.2188367490602877, "grad_norm": 0.8337993025779724, "learning_rate": 2.7280916835698852e-05, "loss": 0.0804, "step": 12357 }, { "epoch": 0.21885445859731611, "grad_norm": 0.7885002493858337, "learning_rate": 2.7280422809752717e-05, "loss": 0.1065, "step": 12358 }, { "epoch": 0.21887216813434454, "grad_norm": 0.9948632717132568, "learning_rate": 2.7279928743405235e-05, "loss": 0.1122, "step": 12359 }, { "epoch": 0.21888987767137297, "grad_norm": 1.2860444784164429, "learning_rate": 2.7279434636658025e-05, "loss": 0.0888, "step": 12360 }, { "epoch": 0.2189075872084014, "grad_norm": 0.7997264862060547, "learning_rate": 2.7278940489512724e-05, "loss": 0.1013, "step": 12361 }, { "epoch": 0.21892529674542982, "grad_norm": 1.2709578275680542, "learning_rate": 2.7278446301970948e-05, "loss": 0.1108, "step": 12362 }, { "epoch": 0.21894300628245827, "grad_norm": 1.0548720359802246, "learning_rate": 2.7277952074034323e-05, "loss": 0.1217, "step": 12363 }, { "epoch": 0.2189607158194867, "grad_norm": 0.936539351940155, "learning_rate": 2.7277457805704475e-05, "loss": 0.1042, "step": 12364 }, { "epoch": 0.21897842535651513, "grad_norm": 0.7976734638214111, "learning_rate": 2.7276963496983038e-05, "loss": 0.1036, "step": 12365 }, { "epoch": 0.21899613489354355, "grad_norm": 0.9873694181442261, "learning_rate": 2.727646914787163e-05, "loss": 0.136, "step": 12366 }, { "epoch": 0.21901384443057198, "grad_norm": 1.008371114730835, "learning_rate": 2.7275974758371884e-05, "loss": 0.0896, "step": 12367 }, { "epoch": 0.2190315539676004, "grad_norm": 1.0392460823059082, "learning_rate": 2.727548032848542e-05, "loss": 0.1371, "step": 12368 }, { "epoch": 0.21904926350462883, "grad_norm": 0.8282773494720459, "learning_rate": 2.7274985858213865e-05, "loss": 0.119, "step": 12369 }, { "epoch": 0.21906697304165726, "grad_norm": 0.9243664741516113, "learning_rate": 2.7274491347558855e-05, "loss": 0.1197, "step": 12370 }, { "epoch": 0.21908468257868569, "grad_norm": 0.9954864978790283, "learning_rate": 2.7273996796522003e-05, "loss": 0.0836, "step": 12371 }, { "epoch": 0.2191023921157141, "grad_norm": 0.9373611807823181, "learning_rate": 2.7273502205104947e-05, "loss": 0.1052, "step": 12372 }, { "epoch": 0.21912010165274254, "grad_norm": 1.089345097541809, "learning_rate": 2.7273007573309307e-05, "loss": 0.133, "step": 12373 }, { "epoch": 0.21913781118977096, "grad_norm": 1.7814829349517822, "learning_rate": 2.7272512901136716e-05, "loss": 0.0895, "step": 12374 }, { "epoch": 0.2191555207267994, "grad_norm": 0.9501543641090393, "learning_rate": 2.7272018188588797e-05, "loss": 0.0869, "step": 12375 }, { "epoch": 0.21917323026382782, "grad_norm": 0.7462413907051086, "learning_rate": 2.7271523435667183e-05, "loss": 0.0872, "step": 12376 }, { "epoch": 0.21919093980085624, "grad_norm": 1.4390513896942139, "learning_rate": 2.7271028642373494e-05, "loss": 0.1021, "step": 12377 }, { "epoch": 0.2192086493378847, "grad_norm": 1.0510965585708618, "learning_rate": 2.7270533808709365e-05, "loss": 0.1011, "step": 12378 }, { "epoch": 0.21922635887491312, "grad_norm": 2.2498695850372314, "learning_rate": 2.7270038934676417e-05, "loss": 0.1266, "step": 12379 }, { "epoch": 0.21924406841194155, "grad_norm": 1.0048576593399048, "learning_rate": 2.7269544020276286e-05, "loss": 0.1182, "step": 12380 }, { "epoch": 0.21926177794896998, "grad_norm": 0.9097558259963989, "learning_rate": 2.7269049065510592e-05, "loss": 0.0929, "step": 12381 }, { "epoch": 0.2192794874859984, "grad_norm": 1.4444868564605713, "learning_rate": 2.726855407038097e-05, "loss": 0.1076, "step": 12382 }, { "epoch": 0.21929719702302683, "grad_norm": 0.5525742173194885, "learning_rate": 2.7268059034889045e-05, "loss": 0.1074, "step": 12383 }, { "epoch": 0.21931490656005526, "grad_norm": 0.413326621055603, "learning_rate": 2.7267563959036448e-05, "loss": 0.0954, "step": 12384 }, { "epoch": 0.21933261609708368, "grad_norm": 1.0915021896362305, "learning_rate": 2.726706884282481e-05, "loss": 0.1269, "step": 12385 }, { "epoch": 0.2193503256341121, "grad_norm": 0.8752127885818481, "learning_rate": 2.726657368625575e-05, "loss": 0.0982, "step": 12386 }, { "epoch": 0.21936803517114054, "grad_norm": 0.8449836373329163, "learning_rate": 2.7266078489330903e-05, "loss": 0.1367, "step": 12387 }, { "epoch": 0.21938574470816896, "grad_norm": 0.9459269046783447, "learning_rate": 2.7265583252051905e-05, "loss": 0.1004, "step": 12388 }, { "epoch": 0.2194034542451974, "grad_norm": 1.1147780418395996, "learning_rate": 2.7265087974420372e-05, "loss": 0.097, "step": 12389 }, { "epoch": 0.21942116378222581, "grad_norm": 0.8417267799377441, "learning_rate": 2.7264592656437942e-05, "loss": 0.1093, "step": 12390 }, { "epoch": 0.21943887331925424, "grad_norm": 0.8414265513420105, "learning_rate": 2.726409729810624e-05, "loss": 0.093, "step": 12391 }, { "epoch": 0.21945658285628267, "grad_norm": 1.0396441221237183, "learning_rate": 2.7263601899426906e-05, "loss": 0.1106, "step": 12392 }, { "epoch": 0.21947429239331112, "grad_norm": 0.6744351983070374, "learning_rate": 2.7263106460401556e-05, "loss": 0.0886, "step": 12393 }, { "epoch": 0.21949200193033955, "grad_norm": 1.1747214794158936, "learning_rate": 2.726261098103183e-05, "loss": 0.1234, "step": 12394 }, { "epoch": 0.21950971146736797, "grad_norm": 2.6418206691741943, "learning_rate": 2.726211546131935e-05, "loss": 0.1213, "step": 12395 }, { "epoch": 0.2195274210043964, "grad_norm": 1.2373242378234863, "learning_rate": 2.7261619901265755e-05, "loss": 0.1075, "step": 12396 }, { "epoch": 0.21954513054142483, "grad_norm": 0.9664932489395142, "learning_rate": 2.7261124300872664e-05, "loss": 0.1354, "step": 12397 }, { "epoch": 0.21956284007845325, "grad_norm": 0.7560564875602722, "learning_rate": 2.726062866014172e-05, "loss": 0.085, "step": 12398 }, { "epoch": 0.21958054961548168, "grad_norm": 1.3754322528839111, "learning_rate": 2.726013297907455e-05, "loss": 0.0873, "step": 12399 }, { "epoch": 0.2195982591525101, "grad_norm": 1.205604910850525, "learning_rate": 2.7259637257672773e-05, "loss": 0.1128, "step": 12400 }, { "epoch": 0.21961596868953853, "grad_norm": 0.8124565482139587, "learning_rate": 2.7259141495938035e-05, "loss": 0.1121, "step": 12401 }, { "epoch": 0.21963367822656696, "grad_norm": 0.7242051959037781, "learning_rate": 2.7258645693871962e-05, "loss": 0.0816, "step": 12402 }, { "epoch": 0.21965138776359538, "grad_norm": 0.8053460121154785, "learning_rate": 2.7258149851476184e-05, "loss": 0.0944, "step": 12403 }, { "epoch": 0.2196690973006238, "grad_norm": 0.7738752365112305, "learning_rate": 2.7257653968752334e-05, "loss": 0.1241, "step": 12404 }, { "epoch": 0.21968680683765224, "grad_norm": 1.0898070335388184, "learning_rate": 2.725715804570204e-05, "loss": 0.0996, "step": 12405 }, { "epoch": 0.21970451637468066, "grad_norm": 0.8517255783081055, "learning_rate": 2.7256662082326936e-05, "loss": 0.0893, "step": 12406 }, { "epoch": 0.2197222259117091, "grad_norm": 1.3039342164993286, "learning_rate": 2.7256166078628654e-05, "loss": 0.0884, "step": 12407 }, { "epoch": 0.21973993544873754, "grad_norm": 0.9424622058868408, "learning_rate": 2.7255670034608826e-05, "loss": 0.1011, "step": 12408 }, { "epoch": 0.21975764498576597, "grad_norm": 0.964701771736145, "learning_rate": 2.7255173950269085e-05, "loss": 0.1354, "step": 12409 }, { "epoch": 0.2197753545227944, "grad_norm": 0.7737416625022888, "learning_rate": 2.7254677825611056e-05, "loss": 0.1021, "step": 12410 }, { "epoch": 0.21979306405982282, "grad_norm": 1.357486605644226, "learning_rate": 2.725418166063638e-05, "loss": 0.0908, "step": 12411 }, { "epoch": 0.21981077359685125, "grad_norm": 0.8647405505180359, "learning_rate": 2.7253685455346686e-05, "loss": 0.1161, "step": 12412 }, { "epoch": 0.21982848313387968, "grad_norm": 0.693185567855835, "learning_rate": 2.7253189209743605e-05, "loss": 0.103, "step": 12413 }, { "epoch": 0.2198461926709081, "grad_norm": 0.6688523888587952, "learning_rate": 2.725269292382877e-05, "loss": 0.1131, "step": 12414 }, { "epoch": 0.21986390220793653, "grad_norm": 0.8226237893104553, "learning_rate": 2.7252196597603813e-05, "loss": 0.0994, "step": 12415 }, { "epoch": 0.21988161174496496, "grad_norm": 0.7994938492774963, "learning_rate": 2.7251700231070373e-05, "loss": 0.0868, "step": 12416 }, { "epoch": 0.21989932128199338, "grad_norm": 0.5886068940162659, "learning_rate": 2.7251203824230077e-05, "loss": 0.0972, "step": 12417 }, { "epoch": 0.2199170308190218, "grad_norm": 1.091291904449463, "learning_rate": 2.725070737708456e-05, "loss": 0.086, "step": 12418 }, { "epoch": 0.21993474035605023, "grad_norm": 1.2598965167999268, "learning_rate": 2.725021088963545e-05, "loss": 0.0741, "step": 12419 }, { "epoch": 0.21995244989307866, "grad_norm": 0.9927111268043518, "learning_rate": 2.7249714361884394e-05, "loss": 0.1146, "step": 12420 }, { "epoch": 0.2199701594301071, "grad_norm": 1.2626235485076904, "learning_rate": 2.7249217793833013e-05, "loss": 0.1111, "step": 12421 }, { "epoch": 0.2199878689671355, "grad_norm": 1.4366737604141235, "learning_rate": 2.724872118548294e-05, "loss": 0.1091, "step": 12422 }, { "epoch": 0.22000557850416397, "grad_norm": 1.8569846153259277, "learning_rate": 2.7248224536835814e-05, "loss": 0.097, "step": 12423 }, { "epoch": 0.2200232880411924, "grad_norm": 0.772020697593689, "learning_rate": 2.7247727847893272e-05, "loss": 0.1254, "step": 12424 }, { "epoch": 0.22004099757822082, "grad_norm": 1.1065200567245483, "learning_rate": 2.7247231118656943e-05, "loss": 0.0806, "step": 12425 }, { "epoch": 0.22005870711524925, "grad_norm": 0.7443081736564636, "learning_rate": 2.7246734349128462e-05, "loss": 0.1251, "step": 12426 }, { "epoch": 0.22007641665227767, "grad_norm": 1.0374411344528198, "learning_rate": 2.7246237539309467e-05, "loss": 0.1296, "step": 12427 }, { "epoch": 0.2200941261893061, "grad_norm": 0.6614388823509216, "learning_rate": 2.7245740689201583e-05, "loss": 0.0943, "step": 12428 }, { "epoch": 0.22011183572633453, "grad_norm": 0.7630223631858826, "learning_rate": 2.7245243798806454e-05, "loss": 0.1235, "step": 12429 }, { "epoch": 0.22012954526336295, "grad_norm": 1.4860354661941528, "learning_rate": 2.724474686812571e-05, "loss": 0.0976, "step": 12430 }, { "epoch": 0.22014725480039138, "grad_norm": 1.4253222942352295, "learning_rate": 2.7244249897160987e-05, "loss": 0.1407, "step": 12431 }, { "epoch": 0.2201649643374198, "grad_norm": 1.2119460105895996, "learning_rate": 2.7243752885913925e-05, "loss": 0.1174, "step": 12432 }, { "epoch": 0.22018267387444823, "grad_norm": 1.0192174911499023, "learning_rate": 2.724325583438615e-05, "loss": 0.1316, "step": 12433 }, { "epoch": 0.22020038341147666, "grad_norm": 0.40852805972099304, "learning_rate": 2.7242758742579302e-05, "loss": 0.0673, "step": 12434 }, { "epoch": 0.22021809294850508, "grad_norm": 0.9037997722625732, "learning_rate": 2.7242261610495017e-05, "loss": 0.1113, "step": 12435 }, { "epoch": 0.2202358024855335, "grad_norm": 0.9579473733901978, "learning_rate": 2.724176443813493e-05, "loss": 0.1046, "step": 12436 }, { "epoch": 0.22025351202256194, "grad_norm": 0.8673532605171204, "learning_rate": 2.724126722550067e-05, "loss": 0.0734, "step": 12437 }, { "epoch": 0.2202712215595904, "grad_norm": 1.0719633102416992, "learning_rate": 2.724076997259389e-05, "loss": 0.1142, "step": 12438 }, { "epoch": 0.22028893109661882, "grad_norm": 0.6448632478713989, "learning_rate": 2.7240272679416207e-05, "loss": 0.1173, "step": 12439 }, { "epoch": 0.22030664063364724, "grad_norm": 0.8843797445297241, "learning_rate": 2.7239775345969267e-05, "loss": 0.0914, "step": 12440 }, { "epoch": 0.22032435017067567, "grad_norm": 0.7623472213745117, "learning_rate": 2.7239277972254706e-05, "loss": 0.1062, "step": 12441 }, { "epoch": 0.2203420597077041, "grad_norm": 0.8663541674613953, "learning_rate": 2.7238780558274153e-05, "loss": 0.0983, "step": 12442 }, { "epoch": 0.22035976924473252, "grad_norm": 0.7933915853500366, "learning_rate": 2.723828310402926e-05, "loss": 0.1058, "step": 12443 }, { "epoch": 0.22037747878176095, "grad_norm": 0.5426042675971985, "learning_rate": 2.723778560952164e-05, "loss": 0.1027, "step": 12444 }, { "epoch": 0.22039518831878938, "grad_norm": 0.7764717936515808, "learning_rate": 2.723728807475295e-05, "loss": 0.0934, "step": 12445 }, { "epoch": 0.2204128978558178, "grad_norm": 1.1077815294265747, "learning_rate": 2.7236790499724823e-05, "loss": 0.106, "step": 12446 }, { "epoch": 0.22043060739284623, "grad_norm": 0.8088765144348145, "learning_rate": 2.7236292884438888e-05, "loss": 0.1018, "step": 12447 }, { "epoch": 0.22044831692987465, "grad_norm": 0.8936681747436523, "learning_rate": 2.723579522889679e-05, "loss": 0.0935, "step": 12448 }, { "epoch": 0.22046602646690308, "grad_norm": 1.4321861267089844, "learning_rate": 2.7235297533100164e-05, "loss": 0.1592, "step": 12449 }, { "epoch": 0.2204837360039315, "grad_norm": 1.0889936685562134, "learning_rate": 2.7234799797050644e-05, "loss": 0.0934, "step": 12450 }, { "epoch": 0.22050144554095993, "grad_norm": 0.939848780632019, "learning_rate": 2.7234302020749868e-05, "loss": 0.1104, "step": 12451 }, { "epoch": 0.22051915507798836, "grad_norm": 1.0223326683044434, "learning_rate": 2.723380420419948e-05, "loss": 0.1202, "step": 12452 }, { "epoch": 0.22053686461501681, "grad_norm": 0.946086585521698, "learning_rate": 2.7233306347401113e-05, "loss": 0.1196, "step": 12453 }, { "epoch": 0.22055457415204524, "grad_norm": 0.5778943300247192, "learning_rate": 2.7232808450356406e-05, "loss": 0.1038, "step": 12454 }, { "epoch": 0.22057228368907367, "grad_norm": 0.678685188293457, "learning_rate": 2.723231051306699e-05, "loss": 0.1138, "step": 12455 }, { "epoch": 0.2205899932261021, "grad_norm": 1.0188004970550537, "learning_rate": 2.723181253553452e-05, "loss": 0.1263, "step": 12456 }, { "epoch": 0.22060770276313052, "grad_norm": 0.8836138844490051, "learning_rate": 2.7231314517760615e-05, "loss": 0.0998, "step": 12457 }, { "epoch": 0.22062541230015895, "grad_norm": 0.8381696939468384, "learning_rate": 2.723081645974693e-05, "loss": 0.093, "step": 12458 }, { "epoch": 0.22064312183718737, "grad_norm": 1.3863288164138794, "learning_rate": 2.7230318361495093e-05, "loss": 0.0821, "step": 12459 }, { "epoch": 0.2206608313742158, "grad_norm": 0.8066737055778503, "learning_rate": 2.722982022300674e-05, "loss": 0.1119, "step": 12460 }, { "epoch": 0.22067854091124423, "grad_norm": 1.092164158821106, "learning_rate": 2.722932204428352e-05, "loss": 0.1173, "step": 12461 }, { "epoch": 0.22069625044827265, "grad_norm": 0.5401098132133484, "learning_rate": 2.722882382532707e-05, "loss": 0.0753, "step": 12462 }, { "epoch": 0.22071395998530108, "grad_norm": 0.9660764336585999, "learning_rate": 2.722832556613902e-05, "loss": 0.1084, "step": 12463 }, { "epoch": 0.2207316695223295, "grad_norm": 0.9964619278907776, "learning_rate": 2.7227827266721023e-05, "loss": 0.0922, "step": 12464 }, { "epoch": 0.22074937905935793, "grad_norm": 0.8617786169052124, "learning_rate": 2.7227328927074707e-05, "loss": 0.0731, "step": 12465 }, { "epoch": 0.22076708859638636, "grad_norm": 1.0122560262680054, "learning_rate": 2.7226830547201716e-05, "loss": 0.0895, "step": 12466 }, { "epoch": 0.22078479813341478, "grad_norm": 0.6788411140441895, "learning_rate": 2.722633212710369e-05, "loss": 0.0952, "step": 12467 }, { "epoch": 0.22080250767044324, "grad_norm": 0.8946847915649414, "learning_rate": 2.7225833666782266e-05, "loss": 0.1085, "step": 12468 }, { "epoch": 0.22082021720747166, "grad_norm": 0.934143602848053, "learning_rate": 2.722533516623909e-05, "loss": 0.1012, "step": 12469 }, { "epoch": 0.2208379267445001, "grad_norm": 0.5814099907875061, "learning_rate": 2.7224836625475792e-05, "loss": 0.0671, "step": 12470 }, { "epoch": 0.22085563628152852, "grad_norm": 0.7446613907814026, "learning_rate": 2.7224338044494024e-05, "loss": 0.1064, "step": 12471 }, { "epoch": 0.22087334581855694, "grad_norm": 1.1259151697158813, "learning_rate": 2.7223839423295413e-05, "loss": 0.1341, "step": 12472 }, { "epoch": 0.22089105535558537, "grad_norm": 0.6113245487213135, "learning_rate": 2.7223340761881613e-05, "loss": 0.1201, "step": 12473 }, { "epoch": 0.2209087648926138, "grad_norm": 0.8967807292938232, "learning_rate": 2.722284206025426e-05, "loss": 0.0992, "step": 12474 }, { "epoch": 0.22092647442964222, "grad_norm": 0.603929877281189, "learning_rate": 2.7222343318414985e-05, "loss": 0.0946, "step": 12475 }, { "epoch": 0.22094418396667065, "grad_norm": 1.3147401809692383, "learning_rate": 2.722184453636544e-05, "loss": 0.1416, "step": 12476 }, { "epoch": 0.22096189350369908, "grad_norm": 0.4569435119628906, "learning_rate": 2.7221345714107263e-05, "loss": 0.0681, "step": 12477 }, { "epoch": 0.2209796030407275, "grad_norm": 0.9119472503662109, "learning_rate": 2.7220846851642095e-05, "loss": 0.0957, "step": 12478 }, { "epoch": 0.22099731257775593, "grad_norm": 0.7439149022102356, "learning_rate": 2.7220347948971576e-05, "loss": 0.101, "step": 12479 }, { "epoch": 0.22101502211478435, "grad_norm": 0.9958224892616272, "learning_rate": 2.7219849006097354e-05, "loss": 0.1189, "step": 12480 }, { "epoch": 0.22103273165181278, "grad_norm": 1.3462412357330322, "learning_rate": 2.7219350023021053e-05, "loss": 0.0844, "step": 12481 }, { "epoch": 0.2210504411888412, "grad_norm": 1.4708868265151978, "learning_rate": 2.7218850999744335e-05, "loss": 0.0945, "step": 12482 }, { "epoch": 0.22106815072586966, "grad_norm": 0.742777407169342, "learning_rate": 2.7218351936268833e-05, "loss": 0.1019, "step": 12483 }, { "epoch": 0.2210858602628981, "grad_norm": 0.884777843952179, "learning_rate": 2.7217852832596187e-05, "loss": 0.0703, "step": 12484 }, { "epoch": 0.2211035697999265, "grad_norm": 0.7798135280609131, "learning_rate": 2.721735368872804e-05, "loss": 0.1155, "step": 12485 }, { "epoch": 0.22112127933695494, "grad_norm": 0.6359502673149109, "learning_rate": 2.7216854504666036e-05, "loss": 0.0959, "step": 12486 }, { "epoch": 0.22113898887398337, "grad_norm": 0.7082203030586243, "learning_rate": 2.721635528041182e-05, "loss": 0.0629, "step": 12487 }, { "epoch": 0.2211566984110118, "grad_norm": 1.0773694515228271, "learning_rate": 2.7215856015967023e-05, "loss": 0.1079, "step": 12488 }, { "epoch": 0.22117440794804022, "grad_norm": 0.709498941898346, "learning_rate": 2.7215356711333302e-05, "loss": 0.0701, "step": 12489 }, { "epoch": 0.22119211748506865, "grad_norm": 0.7905908823013306, "learning_rate": 2.7214857366512293e-05, "loss": 0.1228, "step": 12490 }, { "epoch": 0.22120982702209707, "grad_norm": 1.2077863216400146, "learning_rate": 2.7214357981505632e-05, "loss": 0.1408, "step": 12491 }, { "epoch": 0.2212275365591255, "grad_norm": 0.9566108584403992, "learning_rate": 2.7213858556314976e-05, "loss": 0.1237, "step": 12492 }, { "epoch": 0.22124524609615392, "grad_norm": 0.7988021373748779, "learning_rate": 2.7213359090941957e-05, "loss": 0.0973, "step": 12493 }, { "epoch": 0.22126295563318235, "grad_norm": 0.65253084897995, "learning_rate": 2.7212859585388223e-05, "loss": 0.1051, "step": 12494 }, { "epoch": 0.22128066517021078, "grad_norm": 0.695127010345459, "learning_rate": 2.7212360039655417e-05, "loss": 0.1139, "step": 12495 }, { "epoch": 0.2212983747072392, "grad_norm": 0.8779201507568359, "learning_rate": 2.7211860453745175e-05, "loss": 0.1073, "step": 12496 }, { "epoch": 0.22131608424426766, "grad_norm": 1.8787801265716553, "learning_rate": 2.7211360827659158e-05, "loss": 0.1234, "step": 12497 }, { "epoch": 0.22133379378129608, "grad_norm": 1.021389126777649, "learning_rate": 2.721086116139899e-05, "loss": 0.1375, "step": 12498 }, { "epoch": 0.2213515033183245, "grad_norm": 1.5919748544692993, "learning_rate": 2.721036145496633e-05, "loss": 0.0993, "step": 12499 }, { "epoch": 0.22136921285535294, "grad_norm": 0.8334338068962097, "learning_rate": 2.7209861708362814e-05, "loss": 0.1203, "step": 12500 }, { "epoch": 0.22138692239238136, "grad_norm": 1.279595136642456, "learning_rate": 2.7209361921590086e-05, "loss": 0.1073, "step": 12501 }, { "epoch": 0.2214046319294098, "grad_norm": 1.7927601337432861, "learning_rate": 2.7208862094649795e-05, "loss": 0.1452, "step": 12502 }, { "epoch": 0.22142234146643822, "grad_norm": 1.0131577253341675, "learning_rate": 2.7208362227543578e-05, "loss": 0.0979, "step": 12503 }, { "epoch": 0.22144005100346664, "grad_norm": 1.0264511108398438, "learning_rate": 2.7207862320273088e-05, "loss": 0.1167, "step": 12504 }, { "epoch": 0.22145776054049507, "grad_norm": 0.6317465901374817, "learning_rate": 2.7207362372839962e-05, "loss": 0.1076, "step": 12505 }, { "epoch": 0.2214754700775235, "grad_norm": 0.9134203791618347, "learning_rate": 2.7206862385245856e-05, "loss": 0.1256, "step": 12506 }, { "epoch": 0.22149317961455192, "grad_norm": 0.6379525065422058, "learning_rate": 2.72063623574924e-05, "loss": 0.1064, "step": 12507 }, { "epoch": 0.22151088915158035, "grad_norm": 0.953880250453949, "learning_rate": 2.720586228958125e-05, "loss": 0.1023, "step": 12508 }, { "epoch": 0.22152859868860877, "grad_norm": 0.7939432263374329, "learning_rate": 2.720536218151405e-05, "loss": 0.0975, "step": 12509 }, { "epoch": 0.2215463082256372, "grad_norm": 0.9115598201751709, "learning_rate": 2.7204862033292437e-05, "loss": 0.1316, "step": 12510 }, { "epoch": 0.22156401776266563, "grad_norm": 0.9290445446968079, "learning_rate": 2.7204361844918065e-05, "loss": 0.1002, "step": 12511 }, { "epoch": 0.22158172729969408, "grad_norm": 0.8546584248542786, "learning_rate": 2.7203861616392576e-05, "loss": 0.112, "step": 12512 }, { "epoch": 0.2215994368367225, "grad_norm": 0.8176910281181335, "learning_rate": 2.7203361347717617e-05, "loss": 0.0942, "step": 12513 }, { "epoch": 0.22161714637375093, "grad_norm": 1.1867095232009888, "learning_rate": 2.7202861038894835e-05, "loss": 0.1547, "step": 12514 }, { "epoch": 0.22163485591077936, "grad_norm": 0.8959850072860718, "learning_rate": 2.7202360689925872e-05, "loss": 0.0827, "step": 12515 }, { "epoch": 0.2216525654478078, "grad_norm": 0.9196569919586182, "learning_rate": 2.720186030081238e-05, "loss": 0.098, "step": 12516 }, { "epoch": 0.2216702749848362, "grad_norm": 0.810756266117096, "learning_rate": 2.7201359871556e-05, "loss": 0.1298, "step": 12517 }, { "epoch": 0.22168798452186464, "grad_norm": 0.6102362275123596, "learning_rate": 2.720085940215838e-05, "loss": 0.0903, "step": 12518 }, { "epoch": 0.22170569405889307, "grad_norm": 1.7491015195846558, "learning_rate": 2.7200358892621165e-05, "loss": 0.0958, "step": 12519 }, { "epoch": 0.2217234035959215, "grad_norm": 1.128690242767334, "learning_rate": 2.7199858342946007e-05, "loss": 0.1459, "step": 12520 }, { "epoch": 0.22174111313294992, "grad_norm": 0.7600632309913635, "learning_rate": 2.7199357753134546e-05, "loss": 0.0712, "step": 12521 }, { "epoch": 0.22175882266997834, "grad_norm": 1.0377013683319092, "learning_rate": 2.719885712318843e-05, "loss": 0.0698, "step": 12522 }, { "epoch": 0.22177653220700677, "grad_norm": 1.7349729537963867, "learning_rate": 2.719835645310931e-05, "loss": 0.1271, "step": 12523 }, { "epoch": 0.2217942417440352, "grad_norm": 1.0627936124801636, "learning_rate": 2.719785574289883e-05, "loss": 0.1314, "step": 12524 }, { "epoch": 0.22181195128106362, "grad_norm": 1.0988285541534424, "learning_rate": 2.7197354992558645e-05, "loss": 0.1053, "step": 12525 }, { "epoch": 0.22182966081809205, "grad_norm": 1.010528564453125, "learning_rate": 2.719685420209039e-05, "loss": 0.0746, "step": 12526 }, { "epoch": 0.2218473703551205, "grad_norm": 1.127104640007019, "learning_rate": 2.719635337149572e-05, "loss": 0.1344, "step": 12527 }, { "epoch": 0.22186507989214893, "grad_norm": 0.936173141002655, "learning_rate": 2.7195852500776277e-05, "loss": 0.0833, "step": 12528 }, { "epoch": 0.22188278942917736, "grad_norm": 1.0432367324829102, "learning_rate": 2.7195351589933718e-05, "loss": 0.1234, "step": 12529 }, { "epoch": 0.22190049896620578, "grad_norm": 0.6266889572143555, "learning_rate": 2.7194850638969684e-05, "loss": 0.0961, "step": 12530 }, { "epoch": 0.2219182085032342, "grad_norm": 0.7569580674171448, "learning_rate": 2.719434964788582e-05, "loss": 0.097, "step": 12531 }, { "epoch": 0.22193591804026264, "grad_norm": 0.6237920522689819, "learning_rate": 2.7193848616683786e-05, "loss": 0.1048, "step": 12532 }, { "epoch": 0.22195362757729106, "grad_norm": 1.2780334949493408, "learning_rate": 2.719334754536522e-05, "loss": 0.0756, "step": 12533 }, { "epoch": 0.2219713371143195, "grad_norm": 0.801118016242981, "learning_rate": 2.7192846433931774e-05, "loss": 0.0755, "step": 12534 }, { "epoch": 0.22198904665134792, "grad_norm": 0.5251893401145935, "learning_rate": 2.71923452823851e-05, "loss": 0.0866, "step": 12535 }, { "epoch": 0.22200675618837634, "grad_norm": 0.8154137134552002, "learning_rate": 2.7191844090726837e-05, "loss": 0.102, "step": 12536 }, { "epoch": 0.22202446572540477, "grad_norm": 1.1663507223129272, "learning_rate": 2.719134285895865e-05, "loss": 0.1358, "step": 12537 }, { "epoch": 0.2220421752624332, "grad_norm": 0.7744766473770142, "learning_rate": 2.7190841587082167e-05, "loss": 0.1161, "step": 12538 }, { "epoch": 0.22205988479946162, "grad_norm": 0.8143596053123474, "learning_rate": 2.719034027509905e-05, "loss": 0.0852, "step": 12539 }, { "epoch": 0.22207759433649005, "grad_norm": 1.3520691394805908, "learning_rate": 2.718983892301095e-05, "loss": 0.1187, "step": 12540 }, { "epoch": 0.22209530387351847, "grad_norm": 0.9306900501251221, "learning_rate": 2.718933753081951e-05, "loss": 0.0883, "step": 12541 }, { "epoch": 0.22211301341054693, "grad_norm": 0.6973744630813599, "learning_rate": 2.7188836098526386e-05, "loss": 0.1023, "step": 12542 }, { "epoch": 0.22213072294757535, "grad_norm": 1.0200492143630981, "learning_rate": 2.7188334626133228e-05, "loss": 0.1019, "step": 12543 }, { "epoch": 0.22214843248460378, "grad_norm": 1.1583963632583618, "learning_rate": 2.718783311364167e-05, "loss": 0.1121, "step": 12544 }, { "epoch": 0.2221661420216322, "grad_norm": 0.8948832154273987, "learning_rate": 2.7187331561053382e-05, "loss": 0.1, "step": 12545 }, { "epoch": 0.22218385155866063, "grad_norm": 0.7137362957000732, "learning_rate": 2.7186829968370005e-05, "loss": 0.1054, "step": 12546 }, { "epoch": 0.22220156109568906, "grad_norm": 0.9129356145858765, "learning_rate": 2.718632833559319e-05, "loss": 0.0715, "step": 12547 }, { "epoch": 0.22221927063271749, "grad_norm": 0.9100565910339355, "learning_rate": 2.7185826662724586e-05, "loss": 0.1012, "step": 12548 }, { "epoch": 0.2222369801697459, "grad_norm": 1.3360137939453125, "learning_rate": 2.7185324949765848e-05, "loss": 0.1277, "step": 12549 }, { "epoch": 0.22225468970677434, "grad_norm": 0.6988869309425354, "learning_rate": 2.7184823196718615e-05, "loss": 0.1135, "step": 12550 }, { "epoch": 0.22227239924380277, "grad_norm": 1.56271493434906, "learning_rate": 2.7184321403584552e-05, "loss": 0.0948, "step": 12551 }, { "epoch": 0.2222901087808312, "grad_norm": 0.7378360629081726, "learning_rate": 2.7183819570365306e-05, "loss": 0.0857, "step": 12552 }, { "epoch": 0.22230781831785962, "grad_norm": 0.9634642601013184, "learning_rate": 2.7183317697062522e-05, "loss": 0.0816, "step": 12553 }, { "epoch": 0.22232552785488804, "grad_norm": 0.46855348348617554, "learning_rate": 2.718281578367786e-05, "loss": 0.0821, "step": 12554 }, { "epoch": 0.22234323739191647, "grad_norm": 0.9048058986663818, "learning_rate": 2.718231383021296e-05, "loss": 0.0872, "step": 12555 }, { "epoch": 0.2223609469289449, "grad_norm": 0.9924952387809753, "learning_rate": 2.718181183666948e-05, "loss": 0.0657, "step": 12556 }, { "epoch": 0.22237865646597335, "grad_norm": 0.9376689195632935, "learning_rate": 2.7181309803049076e-05, "loss": 0.1126, "step": 12557 }, { "epoch": 0.22239636600300178, "grad_norm": 0.708635687828064, "learning_rate": 2.7180807729353394e-05, "loss": 0.1121, "step": 12558 }, { "epoch": 0.2224140755400302, "grad_norm": 1.5031136274337769, "learning_rate": 2.7180305615584086e-05, "loss": 0.1236, "step": 12559 }, { "epoch": 0.22243178507705863, "grad_norm": 1.267539381980896, "learning_rate": 2.71798034617428e-05, "loss": 0.1182, "step": 12560 }, { "epoch": 0.22244949461408706, "grad_norm": 1.624733805656433, "learning_rate": 2.7179301267831196e-05, "loss": 0.1636, "step": 12561 }, { "epoch": 0.22246720415111548, "grad_norm": 0.7758660912513733, "learning_rate": 2.717879903385092e-05, "loss": 0.0645, "step": 12562 }, { "epoch": 0.2224849136881439, "grad_norm": 0.7496982216835022, "learning_rate": 2.7178296759803637e-05, "loss": 0.1047, "step": 12563 }, { "epoch": 0.22250262322517234, "grad_norm": 0.607614278793335, "learning_rate": 2.717779444569098e-05, "loss": 0.0591, "step": 12564 }, { "epoch": 0.22252033276220076, "grad_norm": 0.8416399359703064, "learning_rate": 2.7177292091514615e-05, "loss": 0.0884, "step": 12565 }, { "epoch": 0.2225380422992292, "grad_norm": 0.8123149871826172, "learning_rate": 2.7176789697276188e-05, "loss": 0.1402, "step": 12566 }, { "epoch": 0.22255575183625761, "grad_norm": 0.734228789806366, "learning_rate": 2.7176287262977354e-05, "loss": 0.0961, "step": 12567 }, { "epoch": 0.22257346137328604, "grad_norm": 1.257554531097412, "learning_rate": 2.7175784788619767e-05, "loss": 0.114, "step": 12568 }, { "epoch": 0.22259117091031447, "grad_norm": 0.7004863619804382, "learning_rate": 2.717528227420508e-05, "loss": 0.0953, "step": 12569 }, { "epoch": 0.2226088804473429, "grad_norm": 0.8592771291732788, "learning_rate": 2.7174779719734946e-05, "loss": 0.1336, "step": 12570 }, { "epoch": 0.22262658998437132, "grad_norm": 0.9392019510269165, "learning_rate": 2.7174277125211018e-05, "loss": 0.1288, "step": 12571 }, { "epoch": 0.22264429952139977, "grad_norm": 1.0216777324676514, "learning_rate": 2.717377449063495e-05, "loss": 0.1447, "step": 12572 }, { "epoch": 0.2226620090584282, "grad_norm": 0.6368405818939209, "learning_rate": 2.7173271816008394e-05, "loss": 0.1458, "step": 12573 }, { "epoch": 0.22267971859545663, "grad_norm": 0.660023033618927, "learning_rate": 2.7172769101333005e-05, "loss": 0.099, "step": 12574 }, { "epoch": 0.22269742813248505, "grad_norm": 1.1012495756149292, "learning_rate": 2.717226634661044e-05, "loss": 0.1068, "step": 12575 }, { "epoch": 0.22271513766951348, "grad_norm": 0.9633846879005432, "learning_rate": 2.717176355184235e-05, "loss": 0.0947, "step": 12576 }, { "epoch": 0.2227328472065419, "grad_norm": 0.6469177603721619, "learning_rate": 2.7171260717030384e-05, "loss": 0.0937, "step": 12577 }, { "epoch": 0.22275055674357033, "grad_norm": 1.0881128311157227, "learning_rate": 2.7170757842176206e-05, "loss": 0.1441, "step": 12578 }, { "epoch": 0.22276826628059876, "grad_norm": 0.9908155202865601, "learning_rate": 2.7170254927281462e-05, "loss": 0.1401, "step": 12579 }, { "epoch": 0.22278597581762719, "grad_norm": 0.8224003911018372, "learning_rate": 2.7169751972347814e-05, "loss": 0.0985, "step": 12580 }, { "epoch": 0.2228036853546556, "grad_norm": 0.7260321974754333, "learning_rate": 2.716924897737691e-05, "loss": 0.102, "step": 12581 }, { "epoch": 0.22282139489168404, "grad_norm": 0.8440296053886414, "learning_rate": 2.7168745942370408e-05, "loss": 0.1032, "step": 12582 }, { "epoch": 0.22283910442871246, "grad_norm": 0.7677128314971924, "learning_rate": 2.7168242867329968e-05, "loss": 0.0907, "step": 12583 }, { "epoch": 0.2228568139657409, "grad_norm": 0.7032915353775024, "learning_rate": 2.716773975225723e-05, "loss": 0.1159, "step": 12584 }, { "epoch": 0.22287452350276932, "grad_norm": 0.8584654927253723, "learning_rate": 2.716723659715387e-05, "loss": 0.096, "step": 12585 }, { "epoch": 0.22289223303979774, "grad_norm": 0.6277837157249451, "learning_rate": 2.7166733402021524e-05, "loss": 0.0894, "step": 12586 }, { "epoch": 0.2229099425768262, "grad_norm": 1.2470993995666504, "learning_rate": 2.7166230166861855e-05, "loss": 0.1064, "step": 12587 }, { "epoch": 0.22292765211385462, "grad_norm": 0.7653771042823792, "learning_rate": 2.7165726891676524e-05, "loss": 0.0839, "step": 12588 }, { "epoch": 0.22294536165088305, "grad_norm": 0.9135160446166992, "learning_rate": 2.716522357646718e-05, "loss": 0.094, "step": 12589 }, { "epoch": 0.22296307118791148, "grad_norm": 0.7668852806091309, "learning_rate": 2.716472022123548e-05, "loss": 0.1539, "step": 12590 }, { "epoch": 0.2229807807249399, "grad_norm": 0.9730843901634216, "learning_rate": 2.716421682598308e-05, "loss": 0.1005, "step": 12591 }, { "epoch": 0.22299849026196833, "grad_norm": 0.7501547932624817, "learning_rate": 2.716371339071164e-05, "loss": 0.1113, "step": 12592 }, { "epoch": 0.22301619979899676, "grad_norm": 0.8926236033439636, "learning_rate": 2.7163209915422813e-05, "loss": 0.1051, "step": 12593 }, { "epoch": 0.22303390933602518, "grad_norm": 1.0940912961959839, "learning_rate": 2.7162706400118252e-05, "loss": 0.0835, "step": 12594 }, { "epoch": 0.2230516188730536, "grad_norm": 1.6315029859542847, "learning_rate": 2.7162202844799624e-05, "loss": 0.1247, "step": 12595 }, { "epoch": 0.22306932841008204, "grad_norm": 1.1878740787506104, "learning_rate": 2.7161699249468576e-05, "loss": 0.0946, "step": 12596 }, { "epoch": 0.22308703794711046, "grad_norm": 1.0076738595962524, "learning_rate": 2.7161195614126764e-05, "loss": 0.1054, "step": 12597 }, { "epoch": 0.2231047474841389, "grad_norm": 0.8049816489219666, "learning_rate": 2.716069193877585e-05, "loss": 0.0692, "step": 12598 }, { "epoch": 0.22312245702116731, "grad_norm": 0.6669755578041077, "learning_rate": 2.716018822341749e-05, "loss": 0.1171, "step": 12599 }, { "epoch": 0.22314016655819574, "grad_norm": 0.9087768793106079, "learning_rate": 2.7159684468053344e-05, "loss": 0.1061, "step": 12600 }, { "epoch": 0.22315787609522417, "grad_norm": 0.9212879538536072, "learning_rate": 2.715918067268506e-05, "loss": 0.1343, "step": 12601 }, { "epoch": 0.22317558563225262, "grad_norm": 0.5039656758308411, "learning_rate": 2.7158676837314304e-05, "loss": 0.1232, "step": 12602 }, { "epoch": 0.22319329516928105, "grad_norm": 1.4671590328216553, "learning_rate": 2.7158172961942728e-05, "loss": 0.1313, "step": 12603 }, { "epoch": 0.22321100470630947, "grad_norm": 0.8754884004592896, "learning_rate": 2.7157669046571997e-05, "loss": 0.1734, "step": 12604 }, { "epoch": 0.2232287142433379, "grad_norm": 0.9276992678642273, "learning_rate": 2.7157165091203763e-05, "loss": 0.0692, "step": 12605 }, { "epoch": 0.22324642378036633, "grad_norm": 0.9463479518890381, "learning_rate": 2.7156661095839684e-05, "loss": 0.0953, "step": 12606 }, { "epoch": 0.22326413331739475, "grad_norm": 0.7319667935371399, "learning_rate": 2.7156157060481427e-05, "loss": 0.0605, "step": 12607 }, { "epoch": 0.22328184285442318, "grad_norm": 0.9325129389762878, "learning_rate": 2.7155652985130634e-05, "loss": 0.078, "step": 12608 }, { "epoch": 0.2232995523914516, "grad_norm": 0.8276948928833008, "learning_rate": 2.7155148869788977e-05, "loss": 0.0936, "step": 12609 }, { "epoch": 0.22331726192848003, "grad_norm": 0.9253665208816528, "learning_rate": 2.7154644714458108e-05, "loss": 0.1269, "step": 12610 }, { "epoch": 0.22333497146550846, "grad_norm": 0.8220722079277039, "learning_rate": 2.7154140519139687e-05, "loss": 0.0868, "step": 12611 }, { "epoch": 0.22335268100253688, "grad_norm": 1.2716349363327026, "learning_rate": 2.7153636283835376e-05, "loss": 0.1231, "step": 12612 }, { "epoch": 0.2233703905395653, "grad_norm": 0.9770463705062866, "learning_rate": 2.7153132008546827e-05, "loss": 0.1231, "step": 12613 }, { "epoch": 0.22338810007659374, "grad_norm": 1.272804617881775, "learning_rate": 2.7152627693275704e-05, "loss": 0.0988, "step": 12614 }, { "epoch": 0.22340580961362216, "grad_norm": 0.9280007481575012, "learning_rate": 2.7152123338023663e-05, "loss": 0.1141, "step": 12615 }, { "epoch": 0.22342351915065062, "grad_norm": 0.6039058566093445, "learning_rate": 2.715161894279237e-05, "loss": 0.0776, "step": 12616 }, { "epoch": 0.22344122868767904, "grad_norm": 0.882233738899231, "learning_rate": 2.7151114507583477e-05, "loss": 0.0946, "step": 12617 }, { "epoch": 0.22345893822470747, "grad_norm": 1.2444958686828613, "learning_rate": 2.7150610032398646e-05, "loss": 0.1288, "step": 12618 }, { "epoch": 0.2234766477617359, "grad_norm": 0.8732183575630188, "learning_rate": 2.7150105517239537e-05, "loss": 0.0802, "step": 12619 }, { "epoch": 0.22349435729876432, "grad_norm": 0.776097297668457, "learning_rate": 2.7149600962107813e-05, "loss": 0.1138, "step": 12620 }, { "epoch": 0.22351206683579275, "grad_norm": 0.9901642203330994, "learning_rate": 2.7149096367005125e-05, "loss": 0.0971, "step": 12621 }, { "epoch": 0.22352977637282118, "grad_norm": 0.9048697352409363, "learning_rate": 2.714859173193314e-05, "loss": 0.1311, "step": 12622 }, { "epoch": 0.2235474859098496, "grad_norm": 0.6213217377662659, "learning_rate": 2.7148087056893522e-05, "loss": 0.0908, "step": 12623 }, { "epoch": 0.22356519544687803, "grad_norm": 0.9269563555717468, "learning_rate": 2.714758234188792e-05, "loss": 0.1043, "step": 12624 }, { "epoch": 0.22358290498390646, "grad_norm": 0.9489111304283142, "learning_rate": 2.7147077586918006e-05, "loss": 0.1143, "step": 12625 }, { "epoch": 0.22360061452093488, "grad_norm": 1.0633537769317627, "learning_rate": 2.714657279198543e-05, "loss": 0.1163, "step": 12626 }, { "epoch": 0.2236183240579633, "grad_norm": 0.5423789620399475, "learning_rate": 2.7146067957091862e-05, "loss": 0.08, "step": 12627 }, { "epoch": 0.22363603359499173, "grad_norm": 1.0764827728271484, "learning_rate": 2.714556308223896e-05, "loss": 0.0941, "step": 12628 }, { "epoch": 0.22365374313202016, "grad_norm": 1.0292032957077026, "learning_rate": 2.7145058167428382e-05, "loss": 0.0946, "step": 12629 }, { "epoch": 0.2236714526690486, "grad_norm": 0.8996865153312683, "learning_rate": 2.714455321266179e-05, "loss": 0.1079, "step": 12630 }, { "epoch": 0.22368916220607704, "grad_norm": 1.068701148033142, "learning_rate": 2.714404821794085e-05, "loss": 0.1162, "step": 12631 }, { "epoch": 0.22370687174310547, "grad_norm": 0.6920168995857239, "learning_rate": 2.7143543183267216e-05, "loss": 0.0911, "step": 12632 }, { "epoch": 0.2237245812801339, "grad_norm": 0.5070422291755676, "learning_rate": 2.714303810864255e-05, "loss": 0.0837, "step": 12633 }, { "epoch": 0.22374229081716232, "grad_norm": 1.0142866373062134, "learning_rate": 2.7142532994068523e-05, "loss": 0.1143, "step": 12634 }, { "epoch": 0.22376000035419075, "grad_norm": 0.6335418820381165, "learning_rate": 2.714202783954679e-05, "loss": 0.1042, "step": 12635 }, { "epoch": 0.22377770989121917, "grad_norm": 0.9273338317871094, "learning_rate": 2.714152264507901e-05, "loss": 0.114, "step": 12636 }, { "epoch": 0.2237954194282476, "grad_norm": 0.8046538829803467, "learning_rate": 2.714101741066685e-05, "loss": 0.1173, "step": 12637 }, { "epoch": 0.22381312896527603, "grad_norm": 0.7153405547142029, "learning_rate": 2.714051213631197e-05, "loss": 0.0721, "step": 12638 }, { "epoch": 0.22383083850230445, "grad_norm": 0.9407280087471008, "learning_rate": 2.7140006822016033e-05, "loss": 0.0838, "step": 12639 }, { "epoch": 0.22384854803933288, "grad_norm": 1.0547829866409302, "learning_rate": 2.7139501467780707e-05, "loss": 0.1212, "step": 12640 }, { "epoch": 0.2238662575763613, "grad_norm": 1.0610090494155884, "learning_rate": 2.7138996073607646e-05, "loss": 0.1027, "step": 12641 }, { "epoch": 0.22388396711338973, "grad_norm": 0.8667235374450684, "learning_rate": 2.7138490639498512e-05, "loss": 0.1315, "step": 12642 }, { "epoch": 0.22390167665041816, "grad_norm": 0.95002281665802, "learning_rate": 2.7137985165454973e-05, "loss": 0.1174, "step": 12643 }, { "epoch": 0.22391938618744658, "grad_norm": 0.5408602356910706, "learning_rate": 2.7137479651478693e-05, "loss": 0.09, "step": 12644 }, { "epoch": 0.223937095724475, "grad_norm": 0.7868276834487915, "learning_rate": 2.7136974097571328e-05, "loss": 0.1307, "step": 12645 }, { "epoch": 0.22395480526150346, "grad_norm": 0.7404110431671143, "learning_rate": 2.7136468503734552e-05, "loss": 0.0897, "step": 12646 }, { "epoch": 0.2239725147985319, "grad_norm": 0.5913466811180115, "learning_rate": 2.713596286997002e-05, "loss": 0.1232, "step": 12647 }, { "epoch": 0.22399022433556032, "grad_norm": 0.7775266170501709, "learning_rate": 2.7135457196279394e-05, "loss": 0.0916, "step": 12648 }, { "epoch": 0.22400793387258874, "grad_norm": 1.1808043718338013, "learning_rate": 2.7134951482664343e-05, "loss": 0.083, "step": 12649 }, { "epoch": 0.22402564340961717, "grad_norm": 0.45051804184913635, "learning_rate": 2.7134445729126532e-05, "loss": 0.0659, "step": 12650 }, { "epoch": 0.2240433529466456, "grad_norm": 0.8285245299339294, "learning_rate": 2.7133939935667618e-05, "loss": 0.0949, "step": 12651 }, { "epoch": 0.22406106248367402, "grad_norm": 0.577525794506073, "learning_rate": 2.713343410228927e-05, "loss": 0.1086, "step": 12652 }, { "epoch": 0.22407877202070245, "grad_norm": 0.775039792060852, "learning_rate": 2.7132928228993154e-05, "loss": 0.1026, "step": 12653 }, { "epoch": 0.22409648155773088, "grad_norm": 0.5248772501945496, "learning_rate": 2.7132422315780928e-05, "loss": 0.0909, "step": 12654 }, { "epoch": 0.2241141910947593, "grad_norm": 0.6184224486351013, "learning_rate": 2.713191636265426e-05, "loss": 0.1038, "step": 12655 }, { "epoch": 0.22413190063178773, "grad_norm": 0.9637089967727661, "learning_rate": 2.7131410369614814e-05, "loss": 0.1322, "step": 12656 }, { "epoch": 0.22414961016881615, "grad_norm": 0.7454419732093811, "learning_rate": 2.713090433666426e-05, "loss": 0.0934, "step": 12657 }, { "epoch": 0.22416731970584458, "grad_norm": 1.4735445976257324, "learning_rate": 2.713039826380425e-05, "loss": 0.1296, "step": 12658 }, { "epoch": 0.224185029242873, "grad_norm": 0.6388847827911377, "learning_rate": 2.712989215103646e-05, "loss": 0.092, "step": 12659 }, { "epoch": 0.22420273877990143, "grad_norm": 0.92588871717453, "learning_rate": 2.712938599836255e-05, "loss": 0.087, "step": 12660 }, { "epoch": 0.2242204483169299, "grad_norm": 0.9475439786911011, "learning_rate": 2.712887980578419e-05, "loss": 0.1278, "step": 12661 }, { "epoch": 0.22423815785395831, "grad_norm": 1.110032081604004, "learning_rate": 2.712837357330304e-05, "loss": 0.0947, "step": 12662 }, { "epoch": 0.22425586739098674, "grad_norm": 0.79212486743927, "learning_rate": 2.7127867300920772e-05, "loss": 0.0853, "step": 12663 }, { "epoch": 0.22427357692801517, "grad_norm": 1.1933352947235107, "learning_rate": 2.7127360988639042e-05, "loss": 0.1178, "step": 12664 }, { "epoch": 0.2242912864650436, "grad_norm": 0.8415406346321106, "learning_rate": 2.7126854636459523e-05, "loss": 0.1066, "step": 12665 }, { "epoch": 0.22430899600207202, "grad_norm": 0.668113112449646, "learning_rate": 2.712634824438388e-05, "loss": 0.0581, "step": 12666 }, { "epoch": 0.22432670553910045, "grad_norm": 0.7186543941497803, "learning_rate": 2.7125841812413775e-05, "loss": 0.086, "step": 12667 }, { "epoch": 0.22434441507612887, "grad_norm": 0.5805327892303467, "learning_rate": 2.712533534055088e-05, "loss": 0.1119, "step": 12668 }, { "epoch": 0.2243621246131573, "grad_norm": 0.7598413825035095, "learning_rate": 2.712482882879685e-05, "loss": 0.1408, "step": 12669 }, { "epoch": 0.22437983415018573, "grad_norm": 0.6843700408935547, "learning_rate": 2.712432227715337e-05, "loss": 0.0912, "step": 12670 }, { "epoch": 0.22439754368721415, "grad_norm": 1.0787065029144287, "learning_rate": 2.7123815685622092e-05, "loss": 0.0955, "step": 12671 }, { "epoch": 0.22441525322424258, "grad_norm": 0.9270743727684021, "learning_rate": 2.7123309054204687e-05, "loss": 0.0875, "step": 12672 }, { "epoch": 0.224432962761271, "grad_norm": 1.0545347929000854, "learning_rate": 2.712280238290282e-05, "loss": 0.1316, "step": 12673 }, { "epoch": 0.22445067229829943, "grad_norm": 0.9884361624717712, "learning_rate": 2.712229567171816e-05, "loss": 0.1228, "step": 12674 }, { "epoch": 0.22446838183532786, "grad_norm": 0.6350904703140259, "learning_rate": 2.712178892065237e-05, "loss": 0.0904, "step": 12675 }, { "epoch": 0.2244860913723563, "grad_norm": 1.0123740434646606, "learning_rate": 2.712128212970713e-05, "loss": 0.0925, "step": 12676 }, { "epoch": 0.22450380090938474, "grad_norm": 0.8728259205818176, "learning_rate": 2.712077529888409e-05, "loss": 0.1355, "step": 12677 }, { "epoch": 0.22452151044641316, "grad_norm": 0.8292286992073059, "learning_rate": 2.7120268428184924e-05, "loss": 0.0957, "step": 12678 }, { "epoch": 0.2245392199834416, "grad_norm": 0.8208956718444824, "learning_rate": 2.7119761517611308e-05, "loss": 0.1193, "step": 12679 }, { "epoch": 0.22455692952047002, "grad_norm": 0.7415727376937866, "learning_rate": 2.7119254567164897e-05, "loss": 0.0783, "step": 12680 }, { "epoch": 0.22457463905749844, "grad_norm": 0.8991319537162781, "learning_rate": 2.7118747576847364e-05, "loss": 0.1304, "step": 12681 }, { "epoch": 0.22459234859452687, "grad_norm": 0.7091094851493835, "learning_rate": 2.711824054666038e-05, "loss": 0.0924, "step": 12682 }, { "epoch": 0.2246100581315553, "grad_norm": 0.7888717651367188, "learning_rate": 2.711773347660561e-05, "loss": 0.066, "step": 12683 }, { "epoch": 0.22462776766858372, "grad_norm": 1.5267198085784912, "learning_rate": 2.7117226366684723e-05, "loss": 0.0854, "step": 12684 }, { "epoch": 0.22464547720561215, "grad_norm": 0.7909937500953674, "learning_rate": 2.711671921689938e-05, "loss": 0.0682, "step": 12685 }, { "epoch": 0.22466318674264057, "grad_norm": 0.6797601580619812, "learning_rate": 2.7116212027251265e-05, "loss": 0.0655, "step": 12686 }, { "epoch": 0.224680896279669, "grad_norm": 1.0331212282180786, "learning_rate": 2.7115704797742033e-05, "loss": 0.1214, "step": 12687 }, { "epoch": 0.22469860581669743, "grad_norm": 0.9352075457572937, "learning_rate": 2.711519752837336e-05, "loss": 0.1087, "step": 12688 }, { "epoch": 0.22471631535372585, "grad_norm": 1.2569290399551392, "learning_rate": 2.7114690219146908e-05, "loss": 0.0989, "step": 12689 }, { "epoch": 0.22473402489075428, "grad_norm": 0.6307828426361084, "learning_rate": 2.7114182870064355e-05, "loss": 0.0891, "step": 12690 }, { "epoch": 0.22475173442778273, "grad_norm": 0.8032616972923279, "learning_rate": 2.7113675481127365e-05, "loss": 0.1212, "step": 12691 }, { "epoch": 0.22476944396481116, "grad_norm": 0.7201942205429077, "learning_rate": 2.7113168052337613e-05, "loss": 0.1277, "step": 12692 }, { "epoch": 0.2247871535018396, "grad_norm": 1.0393339395523071, "learning_rate": 2.7112660583696756e-05, "loss": 0.1151, "step": 12693 }, { "epoch": 0.224804863038868, "grad_norm": 0.9308286905288696, "learning_rate": 2.711215307520647e-05, "loss": 0.1061, "step": 12694 }, { "epoch": 0.22482257257589644, "grad_norm": 1.006460428237915, "learning_rate": 2.711164552686843e-05, "loss": 0.0861, "step": 12695 }, { "epoch": 0.22484028211292487, "grad_norm": 1.0011169910430908, "learning_rate": 2.71111379386843e-05, "loss": 0.1166, "step": 12696 }, { "epoch": 0.2248579916499533, "grad_norm": 0.9999537467956543, "learning_rate": 2.7110630310655747e-05, "loss": 0.0567, "step": 12697 }, { "epoch": 0.22487570118698172, "grad_norm": 0.8283836841583252, "learning_rate": 2.7110122642784454e-05, "loss": 0.0796, "step": 12698 }, { "epoch": 0.22489341072401015, "grad_norm": 0.7858253121376038, "learning_rate": 2.7109614935072075e-05, "loss": 0.0994, "step": 12699 }, { "epoch": 0.22491112026103857, "grad_norm": 0.7213382124900818, "learning_rate": 2.7109107187520288e-05, "loss": 0.1025, "step": 12700 }, { "epoch": 0.224928829798067, "grad_norm": 0.8671007752418518, "learning_rate": 2.7108599400130765e-05, "loss": 0.138, "step": 12701 }, { "epoch": 0.22494653933509542, "grad_norm": 0.7081451416015625, "learning_rate": 2.710809157290518e-05, "loss": 0.1134, "step": 12702 }, { "epoch": 0.22496424887212385, "grad_norm": 1.0235761404037476, "learning_rate": 2.710758370584519e-05, "loss": 0.1197, "step": 12703 }, { "epoch": 0.22498195840915228, "grad_norm": 1.0304189920425415, "learning_rate": 2.7107075798952476e-05, "loss": 0.0731, "step": 12704 }, { "epoch": 0.2249996679461807, "grad_norm": 0.5835845470428467, "learning_rate": 2.710656785222871e-05, "loss": 0.116, "step": 12705 }, { "epoch": 0.22501737748320916, "grad_norm": 0.8351247906684875, "learning_rate": 2.7106059865675555e-05, "loss": 0.0725, "step": 12706 }, { "epoch": 0.22503508702023758, "grad_norm": 0.7146278619766235, "learning_rate": 2.7105551839294694e-05, "loss": 0.0947, "step": 12707 }, { "epoch": 0.225052796557266, "grad_norm": 0.7033913731575012, "learning_rate": 2.7105043773087787e-05, "loss": 0.0747, "step": 12708 }, { "epoch": 0.22507050609429444, "grad_norm": 0.9153547286987305, "learning_rate": 2.7104535667056515e-05, "loss": 0.1161, "step": 12709 }, { "epoch": 0.22508821563132286, "grad_norm": 1.2743010520935059, "learning_rate": 2.7104027521202544e-05, "loss": 0.128, "step": 12710 }, { "epoch": 0.2251059251683513, "grad_norm": 0.9737077355384827, "learning_rate": 2.7103519335527542e-05, "loss": 0.0929, "step": 12711 }, { "epoch": 0.22512363470537972, "grad_norm": 0.8143932819366455, "learning_rate": 2.710301111003319e-05, "loss": 0.1036, "step": 12712 }, { "epoch": 0.22514134424240814, "grad_norm": 0.8003663420677185, "learning_rate": 2.7102502844721156e-05, "loss": 0.1037, "step": 12713 }, { "epoch": 0.22515905377943657, "grad_norm": 0.7374606132507324, "learning_rate": 2.7101994539593106e-05, "loss": 0.0744, "step": 12714 }, { "epoch": 0.225176763316465, "grad_norm": 1.0664259195327759, "learning_rate": 2.7101486194650726e-05, "loss": 0.1234, "step": 12715 }, { "epoch": 0.22519447285349342, "grad_norm": 0.8881377577781677, "learning_rate": 2.7100977809895673e-05, "loss": 0.1075, "step": 12716 }, { "epoch": 0.22521218239052185, "grad_norm": 0.7361786365509033, "learning_rate": 2.710046938532963e-05, "loss": 0.0989, "step": 12717 }, { "epoch": 0.22522989192755027, "grad_norm": 0.9741542339324951, "learning_rate": 2.7099960920954267e-05, "loss": 0.0942, "step": 12718 }, { "epoch": 0.2252476014645787, "grad_norm": 0.8497670888900757, "learning_rate": 2.709945241677126e-05, "loss": 0.1089, "step": 12719 }, { "epoch": 0.22526531100160713, "grad_norm": 0.5981786847114563, "learning_rate": 2.7098943872782274e-05, "loss": 0.0613, "step": 12720 }, { "epoch": 0.22528302053863558, "grad_norm": 0.6639741063117981, "learning_rate": 2.7098435288988987e-05, "loss": 0.1008, "step": 12721 }, { "epoch": 0.225300730075664, "grad_norm": 1.3586536645889282, "learning_rate": 2.709792666539307e-05, "loss": 0.1118, "step": 12722 }, { "epoch": 0.22531843961269243, "grad_norm": 0.9629795551300049, "learning_rate": 2.70974180019962e-05, "loss": 0.1233, "step": 12723 }, { "epoch": 0.22533614914972086, "grad_norm": 1.0089093446731567, "learning_rate": 2.709690929880005e-05, "loss": 0.1758, "step": 12724 }, { "epoch": 0.2253538586867493, "grad_norm": 0.5096702575683594, "learning_rate": 2.7096400555806286e-05, "loss": 0.1041, "step": 12725 }, { "epoch": 0.2253715682237777, "grad_norm": 0.7807444930076599, "learning_rate": 2.709589177301659e-05, "loss": 0.0714, "step": 12726 }, { "epoch": 0.22538927776080614, "grad_norm": 0.9119788408279419, "learning_rate": 2.709538295043264e-05, "loss": 0.1103, "step": 12727 }, { "epoch": 0.22540698729783457, "grad_norm": 0.9565947651863098, "learning_rate": 2.7094874088056098e-05, "loss": 0.1, "step": 12728 }, { "epoch": 0.225424696834863, "grad_norm": 0.6520696878433228, "learning_rate": 2.7094365185888645e-05, "loss": 0.1385, "step": 12729 }, { "epoch": 0.22544240637189142, "grad_norm": 0.9847483038902283, "learning_rate": 2.709385624393195e-05, "loss": 0.1089, "step": 12730 }, { "epoch": 0.22546011590891984, "grad_norm": 1.0127297639846802, "learning_rate": 2.7093347262187694e-05, "loss": 0.0907, "step": 12731 }, { "epoch": 0.22547782544594827, "grad_norm": 0.6774368286132812, "learning_rate": 2.7092838240657547e-05, "loss": 0.0803, "step": 12732 }, { "epoch": 0.2254955349829767, "grad_norm": 0.7302863001823425, "learning_rate": 2.709232917934319e-05, "loss": 0.1064, "step": 12733 }, { "epoch": 0.22551324452000512, "grad_norm": 0.887826681137085, "learning_rate": 2.7091820078246293e-05, "loss": 0.0707, "step": 12734 }, { "epoch": 0.22553095405703355, "grad_norm": 0.8728282451629639, "learning_rate": 2.7091310937368527e-05, "loss": 0.1089, "step": 12735 }, { "epoch": 0.225548663594062, "grad_norm": 0.8554156422615051, "learning_rate": 2.709080175671157e-05, "loss": 0.068, "step": 12736 }, { "epoch": 0.22556637313109043, "grad_norm": 0.8043012619018555, "learning_rate": 2.70902925362771e-05, "loss": 0.1388, "step": 12737 }, { "epoch": 0.22558408266811886, "grad_norm": 0.5785768032073975, "learning_rate": 2.7089783276066787e-05, "loss": 0.1456, "step": 12738 }, { "epoch": 0.22560179220514728, "grad_norm": 0.7425813674926758, "learning_rate": 2.7089273976082318e-05, "loss": 0.0733, "step": 12739 }, { "epoch": 0.2256195017421757, "grad_norm": 0.6071296334266663, "learning_rate": 2.7088764636325355e-05, "loss": 0.1058, "step": 12740 }, { "epoch": 0.22563721127920414, "grad_norm": 0.9115183353424072, "learning_rate": 2.708825525679758e-05, "loss": 0.1049, "step": 12741 }, { "epoch": 0.22565492081623256, "grad_norm": 0.8451993465423584, "learning_rate": 2.7087745837500667e-05, "loss": 0.1251, "step": 12742 }, { "epoch": 0.225672630353261, "grad_norm": 0.9697526097297668, "learning_rate": 2.708723637843629e-05, "loss": 0.1044, "step": 12743 }, { "epoch": 0.22569033989028942, "grad_norm": 1.1720566749572754, "learning_rate": 2.7086726879606135e-05, "loss": 0.1002, "step": 12744 }, { "epoch": 0.22570804942731784, "grad_norm": 0.7751510143280029, "learning_rate": 2.708621734101187e-05, "loss": 0.0797, "step": 12745 }, { "epoch": 0.22572575896434627, "grad_norm": 1.9180212020874023, "learning_rate": 2.7085707762655167e-05, "loss": 0.1028, "step": 12746 }, { "epoch": 0.2257434685013747, "grad_norm": 1.0021710395812988, "learning_rate": 2.7085198144537713e-05, "loss": 0.1224, "step": 12747 }, { "epoch": 0.22576117803840312, "grad_norm": 1.2708380222320557, "learning_rate": 2.7084688486661176e-05, "loss": 0.1104, "step": 12748 }, { "epoch": 0.22577888757543155, "grad_norm": 0.9417273998260498, "learning_rate": 2.7084178789027237e-05, "loss": 0.1189, "step": 12749 }, { "epoch": 0.22579659711246, "grad_norm": 1.0147526264190674, "learning_rate": 2.7083669051637573e-05, "loss": 0.0911, "step": 12750 }, { "epoch": 0.22581430664948843, "grad_norm": 0.6275537610054016, "learning_rate": 2.7083159274493857e-05, "loss": 0.0898, "step": 12751 }, { "epoch": 0.22583201618651685, "grad_norm": 0.8301745653152466, "learning_rate": 2.7082649457597774e-05, "loss": 0.0712, "step": 12752 }, { "epoch": 0.22584972572354528, "grad_norm": 0.7166682481765747, "learning_rate": 2.7082139600950993e-05, "loss": 0.113, "step": 12753 }, { "epoch": 0.2258674352605737, "grad_norm": 0.8985914587974548, "learning_rate": 2.7081629704555196e-05, "loss": 0.1204, "step": 12754 }, { "epoch": 0.22588514479760213, "grad_norm": 0.6427693963050842, "learning_rate": 2.7081119768412056e-05, "loss": 0.088, "step": 12755 }, { "epoch": 0.22590285433463056, "grad_norm": 1.5817029476165771, "learning_rate": 2.7080609792523257e-05, "loss": 0.0971, "step": 12756 }, { "epoch": 0.22592056387165899, "grad_norm": 0.6950384974479675, "learning_rate": 2.7080099776890477e-05, "loss": 0.1212, "step": 12757 }, { "epoch": 0.2259382734086874, "grad_norm": 1.262783169746399, "learning_rate": 2.7079589721515384e-05, "loss": 0.1015, "step": 12758 }, { "epoch": 0.22595598294571584, "grad_norm": 0.6390020847320557, "learning_rate": 2.7079079626399667e-05, "loss": 0.0603, "step": 12759 }, { "epoch": 0.22597369248274427, "grad_norm": 0.9665125608444214, "learning_rate": 2.7078569491544997e-05, "loss": 0.1051, "step": 12760 }, { "epoch": 0.2259914020197727, "grad_norm": 1.0995792150497437, "learning_rate": 2.707805931695306e-05, "loss": 0.095, "step": 12761 }, { "epoch": 0.22600911155680112, "grad_norm": 0.79682856798172, "learning_rate": 2.7077549102625526e-05, "loss": 0.1113, "step": 12762 }, { "epoch": 0.22602682109382954, "grad_norm": 3.0744452476501465, "learning_rate": 2.7077038848564074e-05, "loss": 0.1164, "step": 12763 }, { "epoch": 0.22604453063085797, "grad_norm": 1.4269760847091675, "learning_rate": 2.707652855477039e-05, "loss": 0.0813, "step": 12764 }, { "epoch": 0.22606224016788642, "grad_norm": 1.0806856155395508, "learning_rate": 2.7076018221246146e-05, "loss": 0.1458, "step": 12765 }, { "epoch": 0.22607994970491485, "grad_norm": 1.5194804668426514, "learning_rate": 2.7075507847993026e-05, "loss": 0.0982, "step": 12766 }, { "epoch": 0.22609765924194328, "grad_norm": 0.9459217190742493, "learning_rate": 2.7074997435012702e-05, "loss": 0.1236, "step": 12767 }, { "epoch": 0.2261153687789717, "grad_norm": 0.7636473178863525, "learning_rate": 2.707448698230686e-05, "loss": 0.126, "step": 12768 }, { "epoch": 0.22613307831600013, "grad_norm": 1.2423440217971802, "learning_rate": 2.707397648987718e-05, "loss": 0.1094, "step": 12769 }, { "epoch": 0.22615078785302856, "grad_norm": 0.7042003870010376, "learning_rate": 2.707346595772534e-05, "loss": 0.149, "step": 12770 }, { "epoch": 0.22616849739005698, "grad_norm": 1.0153316259384155, "learning_rate": 2.7072955385853015e-05, "loss": 0.1281, "step": 12771 }, { "epoch": 0.2261862069270854, "grad_norm": 1.0903699398040771, "learning_rate": 2.7072444774261882e-05, "loss": 0.0969, "step": 12772 }, { "epoch": 0.22620391646411384, "grad_norm": 0.5031706094741821, "learning_rate": 2.7071934122953633e-05, "loss": 0.098, "step": 12773 }, { "epoch": 0.22622162600114226, "grad_norm": 0.7289288640022278, "learning_rate": 2.7071423431929943e-05, "loss": 0.0963, "step": 12774 }, { "epoch": 0.2262393355381707, "grad_norm": 0.8890256881713867, "learning_rate": 2.7070912701192486e-05, "loss": 0.1014, "step": 12775 }, { "epoch": 0.22625704507519911, "grad_norm": 0.8264821767807007, "learning_rate": 2.707040193074295e-05, "loss": 0.0701, "step": 12776 }, { "epoch": 0.22627475461222754, "grad_norm": 0.6421096920967102, "learning_rate": 2.7069891120583016e-05, "loss": 0.0979, "step": 12777 }, { "epoch": 0.22629246414925597, "grad_norm": 1.0540598630905151, "learning_rate": 2.7069380270714353e-05, "loss": 0.1019, "step": 12778 }, { "epoch": 0.2263101736862844, "grad_norm": 0.7234168648719788, "learning_rate": 2.7068869381138656e-05, "loss": 0.0666, "step": 12779 }, { "epoch": 0.22632788322331285, "grad_norm": 1.0623490810394287, "learning_rate": 2.70683584518576e-05, "loss": 0.0764, "step": 12780 }, { "epoch": 0.22634559276034127, "grad_norm": 0.45642656087875366, "learning_rate": 2.7067847482872864e-05, "loss": 0.0704, "step": 12781 }, { "epoch": 0.2263633022973697, "grad_norm": 0.7300313115119934, "learning_rate": 2.7067336474186126e-05, "loss": 0.0898, "step": 12782 }, { "epoch": 0.22638101183439813, "grad_norm": 0.7542455196380615, "learning_rate": 2.7066825425799076e-05, "loss": 0.0883, "step": 12783 }, { "epoch": 0.22639872137142655, "grad_norm": 1.058292031288147, "learning_rate": 2.7066314337713388e-05, "loss": 0.1274, "step": 12784 }, { "epoch": 0.22641643090845498, "grad_norm": 0.8720220327377319, "learning_rate": 2.7065803209930752e-05, "loss": 0.0975, "step": 12785 }, { "epoch": 0.2264341404454834, "grad_norm": 0.596458911895752, "learning_rate": 2.706529204245284e-05, "loss": 0.1106, "step": 12786 }, { "epoch": 0.22645184998251183, "grad_norm": 1.1047308444976807, "learning_rate": 2.7064780835281335e-05, "loss": 0.0822, "step": 12787 }, { "epoch": 0.22646955951954026, "grad_norm": 0.7859731316566467, "learning_rate": 2.706426958841792e-05, "loss": 0.0933, "step": 12788 }, { "epoch": 0.22648726905656869, "grad_norm": 0.8344224095344543, "learning_rate": 2.7063758301864286e-05, "loss": 0.0683, "step": 12789 }, { "epoch": 0.2265049785935971, "grad_norm": 0.9048278331756592, "learning_rate": 2.7063246975622104e-05, "loss": 0.0806, "step": 12790 }, { "epoch": 0.22652268813062554, "grad_norm": 0.6822920441627502, "learning_rate": 2.7062735609693055e-05, "loss": 0.0786, "step": 12791 }, { "epoch": 0.22654039766765396, "grad_norm": 0.6022555828094482, "learning_rate": 2.7062224204078835e-05, "loss": 0.0574, "step": 12792 }, { "epoch": 0.2265581072046824, "grad_norm": 0.76369309425354, "learning_rate": 2.7061712758781108e-05, "loss": 0.116, "step": 12793 }, { "epoch": 0.22657581674171082, "grad_norm": 1.3968220949172974, "learning_rate": 2.706120127380157e-05, "loss": 0.1059, "step": 12794 }, { "epoch": 0.22659352627873927, "grad_norm": 0.7532272338867188, "learning_rate": 2.70606897491419e-05, "loss": 0.066, "step": 12795 }, { "epoch": 0.2266112358157677, "grad_norm": 0.7717310190200806, "learning_rate": 2.7060178184803784e-05, "loss": 0.0904, "step": 12796 }, { "epoch": 0.22662894535279612, "grad_norm": 0.9298346042633057, "learning_rate": 2.7059666580788894e-05, "loss": 0.0815, "step": 12797 }, { "epoch": 0.22664665488982455, "grad_norm": 0.9712023138999939, "learning_rate": 2.7059154937098927e-05, "loss": 0.0939, "step": 12798 }, { "epoch": 0.22666436442685298, "grad_norm": 1.2894810438156128, "learning_rate": 2.7058643253735556e-05, "loss": 0.1437, "step": 12799 }, { "epoch": 0.2266820739638814, "grad_norm": 0.9534240961074829, "learning_rate": 2.705813153070047e-05, "loss": 0.0967, "step": 12800 }, { "epoch": 0.22669978350090983, "grad_norm": 0.992262601852417, "learning_rate": 2.705761976799535e-05, "loss": 0.1149, "step": 12801 }, { "epoch": 0.22671749303793826, "grad_norm": 0.8338634967803955, "learning_rate": 2.705710796562188e-05, "loss": 0.0892, "step": 12802 }, { "epoch": 0.22673520257496668, "grad_norm": 1.0750969648361206, "learning_rate": 2.7056596123581748e-05, "loss": 0.1175, "step": 12803 }, { "epoch": 0.2267529121119951, "grad_norm": 1.0568029880523682, "learning_rate": 2.7056084241876634e-05, "loss": 0.0837, "step": 12804 }, { "epoch": 0.22677062164902353, "grad_norm": 0.743999719619751, "learning_rate": 2.7055572320508217e-05, "loss": 0.1087, "step": 12805 }, { "epoch": 0.22678833118605196, "grad_norm": 1.2071517705917358, "learning_rate": 2.705506035947819e-05, "loss": 0.1278, "step": 12806 }, { "epoch": 0.2268060407230804, "grad_norm": 0.6897484064102173, "learning_rate": 2.7054548358788236e-05, "loss": 0.0746, "step": 12807 }, { "epoch": 0.22682375026010881, "grad_norm": 0.9972524046897888, "learning_rate": 2.7054036318440035e-05, "loss": 0.0875, "step": 12808 }, { "epoch": 0.22684145979713724, "grad_norm": 1.24104905128479, "learning_rate": 2.7053524238435272e-05, "loss": 0.1258, "step": 12809 }, { "epoch": 0.2268591693341657, "grad_norm": 1.0384223461151123, "learning_rate": 2.7053012118775635e-05, "loss": 0.118, "step": 12810 }, { "epoch": 0.22687687887119412, "grad_norm": 0.8840258121490479, "learning_rate": 2.7052499959462805e-05, "loss": 0.1258, "step": 12811 }, { "epoch": 0.22689458840822255, "grad_norm": 0.6107568740844727, "learning_rate": 2.7051987760498475e-05, "loss": 0.1045, "step": 12812 }, { "epoch": 0.22691229794525097, "grad_norm": 0.9567149877548218, "learning_rate": 2.7051475521884316e-05, "loss": 0.0993, "step": 12813 }, { "epoch": 0.2269300074822794, "grad_norm": 1.2867203950881958, "learning_rate": 2.7050963243622025e-05, "loss": 0.1042, "step": 12814 }, { "epoch": 0.22694771701930783, "grad_norm": 1.1015979051589966, "learning_rate": 2.7050450925713287e-05, "loss": 0.1019, "step": 12815 }, { "epoch": 0.22696542655633625, "grad_norm": 0.8630722165107727, "learning_rate": 2.7049938568159778e-05, "loss": 0.1073, "step": 12816 }, { "epoch": 0.22698313609336468, "grad_norm": 1.0639028549194336, "learning_rate": 2.70494261709632e-05, "loss": 0.1119, "step": 12817 }, { "epoch": 0.2270008456303931, "grad_norm": 1.3470054864883423, "learning_rate": 2.7048913734125217e-05, "loss": 0.0972, "step": 12818 }, { "epoch": 0.22701855516742153, "grad_norm": 0.9677127599716187, "learning_rate": 2.7048401257647534e-05, "loss": 0.1014, "step": 12819 }, { "epoch": 0.22703626470444996, "grad_norm": 1.0665690898895264, "learning_rate": 2.704788874153183e-05, "loss": 0.1004, "step": 12820 }, { "epoch": 0.22705397424147838, "grad_norm": 1.3910428285598755, "learning_rate": 2.7047376185779786e-05, "loss": 0.1082, "step": 12821 }, { "epoch": 0.2270716837785068, "grad_norm": 1.0883777141571045, "learning_rate": 2.7046863590393093e-05, "loss": 0.1073, "step": 12822 }, { "epoch": 0.22708939331553524, "grad_norm": 0.7585930228233337, "learning_rate": 2.7046350955373437e-05, "loss": 0.1094, "step": 12823 }, { "epoch": 0.22710710285256366, "grad_norm": 0.7598515748977661, "learning_rate": 2.704583828072251e-05, "loss": 0.1003, "step": 12824 }, { "epoch": 0.22712481238959212, "grad_norm": 0.7625137567520142, "learning_rate": 2.704532556644199e-05, "loss": 0.1045, "step": 12825 }, { "epoch": 0.22714252192662054, "grad_norm": 1.3418426513671875, "learning_rate": 2.7044812812533565e-05, "loss": 0.0857, "step": 12826 }, { "epoch": 0.22716023146364897, "grad_norm": 0.9593151807785034, "learning_rate": 2.7044300018998927e-05, "loss": 0.1118, "step": 12827 }, { "epoch": 0.2271779410006774, "grad_norm": 0.9794602394104004, "learning_rate": 2.704378718583976e-05, "loss": 0.081, "step": 12828 }, { "epoch": 0.22719565053770582, "grad_norm": 0.8868820667266846, "learning_rate": 2.704327431305775e-05, "loss": 0.1025, "step": 12829 }, { "epoch": 0.22721336007473425, "grad_norm": 1.2174098491668701, "learning_rate": 2.7042761400654585e-05, "loss": 0.1144, "step": 12830 }, { "epoch": 0.22723106961176268, "grad_norm": 0.7750670313835144, "learning_rate": 2.7042248448631957e-05, "loss": 0.0786, "step": 12831 }, { "epoch": 0.2272487791487911, "grad_norm": 1.0816410779953003, "learning_rate": 2.704173545699155e-05, "loss": 0.1283, "step": 12832 }, { "epoch": 0.22726648868581953, "grad_norm": 0.8927249312400818, "learning_rate": 2.7041222425735044e-05, "loss": 0.0863, "step": 12833 }, { "epoch": 0.22728419822284796, "grad_norm": 1.1563830375671387, "learning_rate": 2.7040709354864143e-05, "loss": 0.1608, "step": 12834 }, { "epoch": 0.22730190775987638, "grad_norm": 1.053200125694275, "learning_rate": 2.704019624438052e-05, "loss": 0.1127, "step": 12835 }, { "epoch": 0.2273196172969048, "grad_norm": 0.876530647277832, "learning_rate": 2.7039683094285868e-05, "loss": 0.0942, "step": 12836 }, { "epoch": 0.22733732683393323, "grad_norm": 0.8406762480735779, "learning_rate": 2.7039169904581882e-05, "loss": 0.0975, "step": 12837 }, { "epoch": 0.22735503637096166, "grad_norm": 0.7054422497749329, "learning_rate": 2.7038656675270243e-05, "loss": 0.1028, "step": 12838 }, { "epoch": 0.2273727459079901, "grad_norm": 1.0840238332748413, "learning_rate": 2.703814340635264e-05, "loss": 0.1034, "step": 12839 }, { "epoch": 0.22739045544501854, "grad_norm": 0.7691830992698669, "learning_rate": 2.703763009783076e-05, "loss": 0.077, "step": 12840 }, { "epoch": 0.22740816498204697, "grad_norm": 0.9021473526954651, "learning_rate": 2.7037116749706296e-05, "loss": 0.1168, "step": 12841 }, { "epoch": 0.2274258745190754, "grad_norm": 0.7190030813217163, "learning_rate": 2.703660336198094e-05, "loss": 0.1155, "step": 12842 }, { "epoch": 0.22744358405610382, "grad_norm": 0.8621625304222107, "learning_rate": 2.7036089934656368e-05, "loss": 0.1236, "step": 12843 }, { "epoch": 0.22746129359313225, "grad_norm": 1.0490576028823853, "learning_rate": 2.7035576467734287e-05, "loss": 0.11, "step": 12844 }, { "epoch": 0.22747900313016067, "grad_norm": 0.757185697555542, "learning_rate": 2.7035062961216374e-05, "loss": 0.0727, "step": 12845 }, { "epoch": 0.2274967126671891, "grad_norm": 0.8589840531349182, "learning_rate": 2.703454941510432e-05, "loss": 0.0913, "step": 12846 }, { "epoch": 0.22751442220421753, "grad_norm": 0.8001002073287964, "learning_rate": 2.7034035829399813e-05, "loss": 0.0929, "step": 12847 }, { "epoch": 0.22753213174124595, "grad_norm": 0.7988490462303162, "learning_rate": 2.7033522204104547e-05, "loss": 0.1053, "step": 12848 }, { "epoch": 0.22754984127827438, "grad_norm": 1.0884534120559692, "learning_rate": 2.703300853922021e-05, "loss": 0.074, "step": 12849 }, { "epoch": 0.2275675508153028, "grad_norm": 1.6849309206008911, "learning_rate": 2.7032494834748492e-05, "loss": 0.1169, "step": 12850 }, { "epoch": 0.22758526035233123, "grad_norm": 0.8579127788543701, "learning_rate": 2.7031981090691082e-05, "loss": 0.1007, "step": 12851 }, { "epoch": 0.22760296988935966, "grad_norm": 1.0340790748596191, "learning_rate": 2.7031467307049677e-05, "loss": 0.0962, "step": 12852 }, { "epoch": 0.22762067942638808, "grad_norm": 1.0681592226028442, "learning_rate": 2.7030953483825955e-05, "loss": 0.0878, "step": 12853 }, { "epoch": 0.2276383889634165, "grad_norm": 1.2489432096481323, "learning_rate": 2.7030439621021614e-05, "loss": 0.1081, "step": 12854 }, { "epoch": 0.22765609850044496, "grad_norm": 0.8194200396537781, "learning_rate": 2.7029925718638347e-05, "loss": 0.0831, "step": 12855 }, { "epoch": 0.2276738080374734, "grad_norm": 0.7852258086204529, "learning_rate": 2.7029411776677838e-05, "loss": 0.1073, "step": 12856 }, { "epoch": 0.22769151757450182, "grad_norm": 0.7955893874168396, "learning_rate": 2.7028897795141777e-05, "loss": 0.1119, "step": 12857 }, { "epoch": 0.22770922711153024, "grad_norm": 0.9413312077522278, "learning_rate": 2.7028383774031867e-05, "loss": 0.1405, "step": 12858 }, { "epoch": 0.22772693664855867, "grad_norm": 0.5612507462501526, "learning_rate": 2.7027869713349787e-05, "loss": 0.067, "step": 12859 }, { "epoch": 0.2277446461855871, "grad_norm": 0.6892274022102356, "learning_rate": 2.7027355613097235e-05, "loss": 0.1071, "step": 12860 }, { "epoch": 0.22776235572261552, "grad_norm": 0.9218487739562988, "learning_rate": 2.7026841473275894e-05, "loss": 0.0825, "step": 12861 }, { "epoch": 0.22778006525964395, "grad_norm": 0.7388232350349426, "learning_rate": 2.7026327293887466e-05, "loss": 0.1014, "step": 12862 }, { "epoch": 0.22779777479667238, "grad_norm": 0.775983452796936, "learning_rate": 2.7025813074933637e-05, "loss": 0.1064, "step": 12863 }, { "epoch": 0.2278154843337008, "grad_norm": 0.8509052395820618, "learning_rate": 2.7025298816416096e-05, "loss": 0.1077, "step": 12864 }, { "epoch": 0.22783319387072923, "grad_norm": 0.9599555730819702, "learning_rate": 2.7024784518336542e-05, "loss": 0.1216, "step": 12865 }, { "epoch": 0.22785090340775765, "grad_norm": 0.7801843881607056, "learning_rate": 2.7024270180696657e-05, "loss": 0.0818, "step": 12866 }, { "epoch": 0.22786861294478608, "grad_norm": 0.9075967669487, "learning_rate": 2.7023755803498145e-05, "loss": 0.0802, "step": 12867 }, { "epoch": 0.2278863224818145, "grad_norm": 1.0728814601898193, "learning_rate": 2.7023241386742692e-05, "loss": 0.1049, "step": 12868 }, { "epoch": 0.22790403201884293, "grad_norm": 0.937212347984314, "learning_rate": 2.702272693043199e-05, "loss": 0.1196, "step": 12869 }, { "epoch": 0.2279217415558714, "grad_norm": 0.716262936592102, "learning_rate": 2.7022212434567733e-05, "loss": 0.0929, "step": 12870 }, { "epoch": 0.22793945109289981, "grad_norm": 0.5182917714118958, "learning_rate": 2.702169789915161e-05, "loss": 0.0939, "step": 12871 }, { "epoch": 0.22795716062992824, "grad_norm": 0.8999847769737244, "learning_rate": 2.702118332418532e-05, "loss": 0.1158, "step": 12872 }, { "epoch": 0.22797487016695667, "grad_norm": 0.5355912446975708, "learning_rate": 2.7020668709670552e-05, "loss": 0.0909, "step": 12873 }, { "epoch": 0.2279925797039851, "grad_norm": 0.6765568256378174, "learning_rate": 2.7020154055609e-05, "loss": 0.0932, "step": 12874 }, { "epoch": 0.22801028924101352, "grad_norm": 0.8810344934463501, "learning_rate": 2.7019639362002357e-05, "loss": 0.1059, "step": 12875 }, { "epoch": 0.22802799877804195, "grad_norm": 0.7161626815795898, "learning_rate": 2.701912462885232e-05, "loss": 0.097, "step": 12876 }, { "epoch": 0.22804570831507037, "grad_norm": 0.7997764945030212, "learning_rate": 2.7018609856160575e-05, "loss": 0.058, "step": 12877 }, { "epoch": 0.2280634178520988, "grad_norm": 0.5817076563835144, "learning_rate": 2.7018095043928817e-05, "loss": 0.0763, "step": 12878 }, { "epoch": 0.22808112738912723, "grad_norm": 1.4208652973175049, "learning_rate": 2.7017580192158747e-05, "loss": 0.1711, "step": 12879 }, { "epoch": 0.22809883692615565, "grad_norm": 1.3881739377975464, "learning_rate": 2.701706530085205e-05, "loss": 0.0909, "step": 12880 }, { "epoch": 0.22811654646318408, "grad_norm": 0.7200109958648682, "learning_rate": 2.7016550370010424e-05, "loss": 0.0755, "step": 12881 }, { "epoch": 0.2281342560002125, "grad_norm": 0.9174588322639465, "learning_rate": 2.7016035399635558e-05, "loss": 0.1031, "step": 12882 }, { "epoch": 0.22815196553724093, "grad_norm": 0.899366021156311, "learning_rate": 2.7015520389729157e-05, "loss": 0.1476, "step": 12883 }, { "epoch": 0.22816967507426938, "grad_norm": 0.8337142467498779, "learning_rate": 2.701500534029291e-05, "loss": 0.0798, "step": 12884 }, { "epoch": 0.2281873846112978, "grad_norm": 1.0056942701339722, "learning_rate": 2.7014490251328505e-05, "loss": 0.1129, "step": 12885 }, { "epoch": 0.22820509414832624, "grad_norm": 1.1898454427719116, "learning_rate": 2.7013975122837647e-05, "loss": 0.1122, "step": 12886 }, { "epoch": 0.22822280368535466, "grad_norm": 0.7254547476768494, "learning_rate": 2.7013459954822026e-05, "loss": 0.1072, "step": 12887 }, { "epoch": 0.2282405132223831, "grad_norm": 1.2621325254440308, "learning_rate": 2.7012944747283336e-05, "loss": 0.1098, "step": 12888 }, { "epoch": 0.22825822275941152, "grad_norm": 0.6268588900566101, "learning_rate": 2.701242950022327e-05, "loss": 0.0807, "step": 12889 }, { "epoch": 0.22827593229643994, "grad_norm": 0.9795658588409424, "learning_rate": 2.7011914213643532e-05, "loss": 0.0972, "step": 12890 }, { "epoch": 0.22829364183346837, "grad_norm": 1.2420918941497803, "learning_rate": 2.7011398887545803e-05, "loss": 0.1446, "step": 12891 }, { "epoch": 0.2283113513704968, "grad_norm": 2.1031932830810547, "learning_rate": 2.7010883521931795e-05, "loss": 0.1343, "step": 12892 }, { "epoch": 0.22832906090752522, "grad_norm": 0.7473735213279724, "learning_rate": 2.701036811680319e-05, "loss": 0.0621, "step": 12893 }, { "epoch": 0.22834677044455365, "grad_norm": 0.9338050484657288, "learning_rate": 2.7009852672161687e-05, "loss": 0.0826, "step": 12894 }, { "epoch": 0.22836447998158207, "grad_norm": 1.1268051862716675, "learning_rate": 2.700933718800899e-05, "loss": 0.1121, "step": 12895 }, { "epoch": 0.2283821895186105, "grad_norm": 0.8169872760772705, "learning_rate": 2.7008821664346784e-05, "loss": 0.072, "step": 12896 }, { "epoch": 0.22839989905563893, "grad_norm": 1.3910059928894043, "learning_rate": 2.700830610117677e-05, "loss": 0.0896, "step": 12897 }, { "epoch": 0.22841760859266735, "grad_norm": 1.1023774147033691, "learning_rate": 2.7007790498500643e-05, "loss": 0.1124, "step": 12898 }, { "epoch": 0.2284353181296958, "grad_norm": 1.0217276811599731, "learning_rate": 2.70072748563201e-05, "loss": 0.0997, "step": 12899 }, { "epoch": 0.22845302766672423, "grad_norm": 1.0018978118896484, "learning_rate": 2.700675917463684e-05, "loss": 0.0941, "step": 12900 }, { "epoch": 0.22847073720375266, "grad_norm": 1.2538492679595947, "learning_rate": 2.7006243453452553e-05, "loss": 0.1014, "step": 12901 }, { "epoch": 0.2284884467407811, "grad_norm": 0.7456783056259155, "learning_rate": 2.700572769276894e-05, "loss": 0.0933, "step": 12902 }, { "epoch": 0.2285061562778095, "grad_norm": 1.9550905227661133, "learning_rate": 2.7005211892587697e-05, "loss": 0.1106, "step": 12903 }, { "epoch": 0.22852386581483794, "grad_norm": 0.8892714381217957, "learning_rate": 2.700469605291052e-05, "loss": 0.1007, "step": 12904 }, { "epoch": 0.22854157535186637, "grad_norm": 0.8343381285667419, "learning_rate": 2.700418017373911e-05, "loss": 0.0939, "step": 12905 }, { "epoch": 0.2285592848888948, "grad_norm": 1.1190550327301025, "learning_rate": 2.700366425507516e-05, "loss": 0.1008, "step": 12906 }, { "epoch": 0.22857699442592322, "grad_norm": 0.9722718596458435, "learning_rate": 2.700314829692037e-05, "loss": 0.1106, "step": 12907 }, { "epoch": 0.22859470396295165, "grad_norm": 1.2647137641906738, "learning_rate": 2.7002632299276434e-05, "loss": 0.0966, "step": 12908 }, { "epoch": 0.22861241349998007, "grad_norm": 1.440598964691162, "learning_rate": 2.7002116262145053e-05, "loss": 0.1195, "step": 12909 }, { "epoch": 0.2286301230370085, "grad_norm": 0.691612720489502, "learning_rate": 2.7001600185527923e-05, "loss": 0.1273, "step": 12910 }, { "epoch": 0.22864783257403692, "grad_norm": 1.84946870803833, "learning_rate": 2.7001084069426745e-05, "loss": 0.0856, "step": 12911 }, { "epoch": 0.22866554211106535, "grad_norm": 0.7825056910514832, "learning_rate": 2.700056791384321e-05, "loss": 0.1319, "step": 12912 }, { "epoch": 0.22868325164809378, "grad_norm": 0.9395638704299927, "learning_rate": 2.7000051718779025e-05, "loss": 0.0721, "step": 12913 }, { "epoch": 0.22870096118512223, "grad_norm": 0.6992195248603821, "learning_rate": 2.699953548423588e-05, "loss": 0.0903, "step": 12914 }, { "epoch": 0.22871867072215066, "grad_norm": 0.90907883644104, "learning_rate": 2.6999019210215477e-05, "loss": 0.0752, "step": 12915 }, { "epoch": 0.22873638025917908, "grad_norm": 0.909766435623169, "learning_rate": 2.699850289671951e-05, "loss": 0.1125, "step": 12916 }, { "epoch": 0.2287540897962075, "grad_norm": 0.6979811191558838, "learning_rate": 2.699798654374969e-05, "loss": 0.09, "step": 12917 }, { "epoch": 0.22877179933323594, "grad_norm": 0.7386991381645203, "learning_rate": 2.6997470151307706e-05, "loss": 0.1219, "step": 12918 }, { "epoch": 0.22878950887026436, "grad_norm": 0.855086088180542, "learning_rate": 2.6996953719395257e-05, "loss": 0.1172, "step": 12919 }, { "epoch": 0.2288072184072928, "grad_norm": 1.240018367767334, "learning_rate": 2.6996437248014048e-05, "loss": 0.1084, "step": 12920 }, { "epoch": 0.22882492794432122, "grad_norm": 0.9520798325538635, "learning_rate": 2.699592073716577e-05, "loss": 0.1495, "step": 12921 }, { "epoch": 0.22884263748134964, "grad_norm": 0.6706000566482544, "learning_rate": 2.6995404186852124e-05, "loss": 0.0885, "step": 12922 }, { "epoch": 0.22886034701837807, "grad_norm": 0.47907501459121704, "learning_rate": 2.699488759707481e-05, "loss": 0.0681, "step": 12923 }, { "epoch": 0.2288780565554065, "grad_norm": 0.6619181036949158, "learning_rate": 2.6994370967835534e-05, "loss": 0.067, "step": 12924 }, { "epoch": 0.22889576609243492, "grad_norm": 0.9944356679916382, "learning_rate": 2.699385429913599e-05, "loss": 0.0947, "step": 12925 }, { "epoch": 0.22891347562946335, "grad_norm": 0.7014405131340027, "learning_rate": 2.6993337590977878e-05, "loss": 0.1115, "step": 12926 }, { "epoch": 0.22893118516649177, "grad_norm": 1.024718165397644, "learning_rate": 2.69928208433629e-05, "loss": 0.1441, "step": 12927 }, { "epoch": 0.2289488947035202, "grad_norm": 0.860894501209259, "learning_rate": 2.6992304056292753e-05, "loss": 0.1042, "step": 12928 }, { "epoch": 0.22896660424054865, "grad_norm": 0.5849151015281677, "learning_rate": 2.6991787229769138e-05, "loss": 0.0952, "step": 12929 }, { "epoch": 0.22898431377757708, "grad_norm": 0.9077529907226562, "learning_rate": 2.6991270363793758e-05, "loss": 0.1282, "step": 12930 }, { "epoch": 0.2290020233146055, "grad_norm": 1.1058319807052612, "learning_rate": 2.699075345836831e-05, "loss": 0.1297, "step": 12931 }, { "epoch": 0.22901973285163393, "grad_norm": 1.3343162536621094, "learning_rate": 2.6990236513494496e-05, "loss": 0.0817, "step": 12932 }, { "epoch": 0.22903744238866236, "grad_norm": 0.7052316069602966, "learning_rate": 2.6989719529174014e-05, "loss": 0.1118, "step": 12933 }, { "epoch": 0.2290551519256908, "grad_norm": 1.002750277519226, "learning_rate": 2.698920250540857e-05, "loss": 0.1209, "step": 12934 }, { "epoch": 0.2290728614627192, "grad_norm": 0.8603062629699707, "learning_rate": 2.698868544219986e-05, "loss": 0.139, "step": 12935 }, { "epoch": 0.22909057099974764, "grad_norm": 0.9596253037452698, "learning_rate": 2.698816833954959e-05, "loss": 0.105, "step": 12936 }, { "epoch": 0.22910828053677607, "grad_norm": 1.586610198020935, "learning_rate": 2.698765119745946e-05, "loss": 0.1251, "step": 12937 }, { "epoch": 0.2291259900738045, "grad_norm": 0.7028403878211975, "learning_rate": 2.6987134015931166e-05, "loss": 0.0864, "step": 12938 }, { "epoch": 0.22914369961083292, "grad_norm": 0.5434010028839111, "learning_rate": 2.6986616794966415e-05, "loss": 0.0712, "step": 12939 }, { "epoch": 0.22916140914786134, "grad_norm": 0.8425318002700806, "learning_rate": 2.6986099534566907e-05, "loss": 0.1021, "step": 12940 }, { "epoch": 0.22917911868488977, "grad_norm": 0.4008435606956482, "learning_rate": 2.6985582234734346e-05, "loss": 0.0874, "step": 12941 }, { "epoch": 0.2291968282219182, "grad_norm": 0.9977790117263794, "learning_rate": 2.698506489547043e-05, "loss": 0.0928, "step": 12942 }, { "epoch": 0.22921453775894662, "grad_norm": 0.5219807028770447, "learning_rate": 2.6984547516776862e-05, "loss": 0.0611, "step": 12943 }, { "epoch": 0.22923224729597508, "grad_norm": 1.0054962635040283, "learning_rate": 2.6984030098655347e-05, "loss": 0.1162, "step": 12944 }, { "epoch": 0.2292499568330035, "grad_norm": 0.6307471990585327, "learning_rate": 2.6983512641107586e-05, "loss": 0.0998, "step": 12945 }, { "epoch": 0.22926766637003193, "grad_norm": 1.4242209196090698, "learning_rate": 2.6982995144135275e-05, "loss": 0.1106, "step": 12946 }, { "epoch": 0.22928537590706036, "grad_norm": 0.6540866494178772, "learning_rate": 2.6982477607740127e-05, "loss": 0.1119, "step": 12947 }, { "epoch": 0.22930308544408878, "grad_norm": 1.3977961540222168, "learning_rate": 2.698196003192384e-05, "loss": 0.1033, "step": 12948 }, { "epoch": 0.2293207949811172, "grad_norm": 0.9569229483604431, "learning_rate": 2.698144241668811e-05, "loss": 0.0997, "step": 12949 }, { "epoch": 0.22933850451814564, "grad_norm": 0.9863066673278809, "learning_rate": 2.6980924762034655e-05, "loss": 0.1238, "step": 12950 }, { "epoch": 0.22935621405517406, "grad_norm": 1.8948078155517578, "learning_rate": 2.6980407067965164e-05, "loss": 0.1465, "step": 12951 }, { "epoch": 0.2293739235922025, "grad_norm": 0.8572563529014587, "learning_rate": 2.6979889334481346e-05, "loss": 0.0827, "step": 12952 }, { "epoch": 0.22939163312923092, "grad_norm": 0.7170151472091675, "learning_rate": 2.6979371561584906e-05, "loss": 0.0909, "step": 12953 }, { "epoch": 0.22940934266625934, "grad_norm": 0.8030628561973572, "learning_rate": 2.697885374927754e-05, "loss": 0.1319, "step": 12954 }, { "epoch": 0.22942705220328777, "grad_norm": 0.7057538628578186, "learning_rate": 2.6978335897560964e-05, "loss": 0.0766, "step": 12955 }, { "epoch": 0.2294447617403162, "grad_norm": 1.3092013597488403, "learning_rate": 2.697781800643687e-05, "loss": 0.1268, "step": 12956 }, { "epoch": 0.22946247127734462, "grad_norm": 1.706244707107544, "learning_rate": 2.6977300075906966e-05, "loss": 0.1497, "step": 12957 }, { "epoch": 0.22948018081437305, "grad_norm": 0.9222841262817383, "learning_rate": 2.697678210597296e-05, "loss": 0.0993, "step": 12958 }, { "epoch": 0.2294978903514015, "grad_norm": 0.6443036198616028, "learning_rate": 2.697626409663655e-05, "loss": 0.0987, "step": 12959 }, { "epoch": 0.22951559988842993, "grad_norm": 1.0308454036712646, "learning_rate": 2.6975746047899438e-05, "loss": 0.1109, "step": 12960 }, { "epoch": 0.22953330942545835, "grad_norm": 0.9701353907585144, "learning_rate": 2.697522795976334e-05, "loss": 0.0763, "step": 12961 }, { "epoch": 0.22955101896248678, "grad_norm": 0.7010281682014465, "learning_rate": 2.6974709832229948e-05, "loss": 0.1184, "step": 12962 }, { "epoch": 0.2295687284995152, "grad_norm": 0.611549437046051, "learning_rate": 2.6974191665300973e-05, "loss": 0.0931, "step": 12963 }, { "epoch": 0.22958643803654363, "grad_norm": 0.8121376633644104, "learning_rate": 2.697367345897812e-05, "loss": 0.1052, "step": 12964 }, { "epoch": 0.22960414757357206, "grad_norm": 0.6940382122993469, "learning_rate": 2.697315521326309e-05, "loss": 0.0984, "step": 12965 }, { "epoch": 0.22962185711060049, "grad_norm": 0.7680528163909912, "learning_rate": 2.6972636928157594e-05, "loss": 0.1424, "step": 12966 }, { "epoch": 0.2296395666476289, "grad_norm": 0.6246111989021301, "learning_rate": 2.697211860366333e-05, "loss": 0.0928, "step": 12967 }, { "epoch": 0.22965727618465734, "grad_norm": 0.7774848937988281, "learning_rate": 2.6971600239782008e-05, "loss": 0.1538, "step": 12968 }, { "epoch": 0.22967498572168576, "grad_norm": 0.6112204790115356, "learning_rate": 2.6971081836515334e-05, "loss": 0.139, "step": 12969 }, { "epoch": 0.2296926952587142, "grad_norm": 0.5784399509429932, "learning_rate": 2.697056339386501e-05, "loss": 0.072, "step": 12970 }, { "epoch": 0.22971040479574262, "grad_norm": 0.789863646030426, "learning_rate": 2.697004491183274e-05, "loss": 0.0975, "step": 12971 }, { "epoch": 0.22972811433277104, "grad_norm": 0.9494429230690002, "learning_rate": 2.6969526390420238e-05, "loss": 0.1051, "step": 12972 }, { "epoch": 0.22974582386979947, "grad_norm": 0.6690828800201416, "learning_rate": 2.69690078296292e-05, "loss": 0.0458, "step": 12973 }, { "epoch": 0.22976353340682792, "grad_norm": 0.84123295545578, "learning_rate": 2.6968489229461345e-05, "loss": 0.0956, "step": 12974 }, { "epoch": 0.22978124294385635, "grad_norm": 0.41925695538520813, "learning_rate": 2.6967970589918364e-05, "loss": 0.0611, "step": 12975 }, { "epoch": 0.22979895248088478, "grad_norm": 1.1949527263641357, "learning_rate": 2.6967451911001968e-05, "loss": 0.1569, "step": 12976 }, { "epoch": 0.2298166620179132, "grad_norm": 0.6985564231872559, "learning_rate": 2.696693319271387e-05, "loss": 0.0818, "step": 12977 }, { "epoch": 0.22983437155494163, "grad_norm": 0.5098971724510193, "learning_rate": 2.6966414435055774e-05, "loss": 0.0862, "step": 12978 }, { "epoch": 0.22985208109197006, "grad_norm": 0.7989724278450012, "learning_rate": 2.6965895638029382e-05, "loss": 0.0951, "step": 12979 }, { "epoch": 0.22986979062899848, "grad_norm": 0.8926742672920227, "learning_rate": 2.6965376801636405e-05, "loss": 0.1259, "step": 12980 }, { "epoch": 0.2298875001660269, "grad_norm": 0.7724067568778992, "learning_rate": 2.6964857925878547e-05, "loss": 0.0878, "step": 12981 }, { "epoch": 0.22990520970305534, "grad_norm": 0.9861605167388916, "learning_rate": 2.6964339010757514e-05, "loss": 0.1015, "step": 12982 }, { "epoch": 0.22992291924008376, "grad_norm": 0.5189476013183594, "learning_rate": 2.696382005627502e-05, "loss": 0.1096, "step": 12983 }, { "epoch": 0.2299406287771122, "grad_norm": 1.1620056629180908, "learning_rate": 2.6963301062432768e-05, "loss": 0.079, "step": 12984 }, { "epoch": 0.22995833831414061, "grad_norm": 0.7690044641494751, "learning_rate": 2.6962782029232465e-05, "loss": 0.1113, "step": 12985 }, { "epoch": 0.22997604785116904, "grad_norm": 0.7446339130401611, "learning_rate": 2.6962262956675816e-05, "loss": 0.1092, "step": 12986 }, { "epoch": 0.22999375738819747, "grad_norm": 0.6776006817817688, "learning_rate": 2.696174384476454e-05, "loss": 0.0935, "step": 12987 }, { "epoch": 0.2300114669252259, "grad_norm": 1.1792665719985962, "learning_rate": 2.696122469350033e-05, "loss": 0.1038, "step": 12988 }, { "epoch": 0.23002917646225435, "grad_norm": 0.9030924439430237, "learning_rate": 2.6960705502884902e-05, "loss": 0.0902, "step": 12989 }, { "epoch": 0.23004688599928277, "grad_norm": 0.899277925491333, "learning_rate": 2.6960186272919957e-05, "loss": 0.128, "step": 12990 }, { "epoch": 0.2300645955363112, "grad_norm": 0.9575717449188232, "learning_rate": 2.6959667003607217e-05, "loss": 0.1016, "step": 12991 }, { "epoch": 0.23008230507333963, "grad_norm": 0.9577398896217346, "learning_rate": 2.695914769494838e-05, "loss": 0.1463, "step": 12992 }, { "epoch": 0.23010001461036805, "grad_norm": 0.7893876433372498, "learning_rate": 2.695862834694516e-05, "loss": 0.1097, "step": 12993 }, { "epoch": 0.23011772414739648, "grad_norm": 0.8893476128578186, "learning_rate": 2.6958108959599252e-05, "loss": 0.1065, "step": 12994 }, { "epoch": 0.2301354336844249, "grad_norm": 0.9993811845779419, "learning_rate": 2.6957589532912382e-05, "loss": 0.108, "step": 12995 }, { "epoch": 0.23015314322145333, "grad_norm": 0.6345527768135071, "learning_rate": 2.6957070066886253e-05, "loss": 0.1081, "step": 12996 }, { "epoch": 0.23017085275848176, "grad_norm": 1.1284986734390259, "learning_rate": 2.6956550561522568e-05, "loss": 0.1337, "step": 12997 }, { "epoch": 0.23018856229551019, "grad_norm": 0.9978005290031433, "learning_rate": 2.6956031016823046e-05, "loss": 0.1148, "step": 12998 }, { "epoch": 0.2302062718325386, "grad_norm": 0.6557340621948242, "learning_rate": 2.6955511432789387e-05, "loss": 0.1069, "step": 12999 }, { "epoch": 0.23022398136956704, "grad_norm": 1.4537692070007324, "learning_rate": 2.695499180942331e-05, "loss": 0.1138, "step": 13000 }, { "epoch": 0.23024169090659546, "grad_norm": 0.5144350528717041, "learning_rate": 2.6954472146726514e-05, "loss": 0.0901, "step": 13001 }, { "epoch": 0.2302594004436239, "grad_norm": 0.7581515908241272, "learning_rate": 2.695395244470072e-05, "loss": 0.0947, "step": 13002 }, { "epoch": 0.23027710998065232, "grad_norm": 0.7202332019805908, "learning_rate": 2.6953432703347627e-05, "loss": 0.1242, "step": 13003 }, { "epoch": 0.23029481951768077, "grad_norm": 0.7414824366569519, "learning_rate": 2.6952912922668948e-05, "loss": 0.0906, "step": 13004 }, { "epoch": 0.2303125290547092, "grad_norm": 1.45334792137146, "learning_rate": 2.6952393102666394e-05, "loss": 0.1155, "step": 13005 }, { "epoch": 0.23033023859173762, "grad_norm": 0.7126156091690063, "learning_rate": 2.695187324334168e-05, "loss": 0.0883, "step": 13006 }, { "epoch": 0.23034794812876605, "grad_norm": 1.221447467803955, "learning_rate": 2.695135334469651e-05, "loss": 0.1589, "step": 13007 }, { "epoch": 0.23036565766579448, "grad_norm": 0.7931292057037354, "learning_rate": 2.6950833406732597e-05, "loss": 0.1161, "step": 13008 }, { "epoch": 0.2303833672028229, "grad_norm": 1.205257534980774, "learning_rate": 2.695031342945165e-05, "loss": 0.1084, "step": 13009 }, { "epoch": 0.23040107673985133, "grad_norm": 0.6559258699417114, "learning_rate": 2.694979341285538e-05, "loss": 0.0906, "step": 13010 }, { "epoch": 0.23041878627687976, "grad_norm": 1.0934593677520752, "learning_rate": 2.6949273356945497e-05, "loss": 0.0921, "step": 13011 }, { "epoch": 0.23043649581390818, "grad_norm": 1.0979063510894775, "learning_rate": 2.6948753261723714e-05, "loss": 0.1136, "step": 13012 }, { "epoch": 0.2304542053509366, "grad_norm": 1.0450254678726196, "learning_rate": 2.6948233127191742e-05, "loss": 0.1442, "step": 13013 }, { "epoch": 0.23047191488796503, "grad_norm": 0.8409127593040466, "learning_rate": 2.6947712953351292e-05, "loss": 0.0906, "step": 13014 }, { "epoch": 0.23048962442499346, "grad_norm": 0.955421507358551, "learning_rate": 2.6947192740204074e-05, "loss": 0.0948, "step": 13015 }, { "epoch": 0.2305073339620219, "grad_norm": 1.111112117767334, "learning_rate": 2.69466724877518e-05, "loss": 0.094, "step": 13016 }, { "epoch": 0.23052504349905031, "grad_norm": 2.8779308795928955, "learning_rate": 2.6946152195996182e-05, "loss": 0.096, "step": 13017 }, { "epoch": 0.23054275303607877, "grad_norm": 1.0101991891860962, "learning_rate": 2.6945631864938936e-05, "loss": 0.1279, "step": 13018 }, { "epoch": 0.2305604625731072, "grad_norm": 0.9541711211204529, "learning_rate": 2.6945111494581766e-05, "loss": 0.1124, "step": 13019 }, { "epoch": 0.23057817211013562, "grad_norm": 0.8743872046470642, "learning_rate": 2.6944591084926388e-05, "loss": 0.1174, "step": 13020 }, { "epoch": 0.23059588164716405, "grad_norm": 1.1523100137710571, "learning_rate": 2.6944070635974512e-05, "loss": 0.1185, "step": 13021 }, { "epoch": 0.23061359118419247, "grad_norm": 0.8901968598365784, "learning_rate": 2.6943550147727852e-05, "loss": 0.1127, "step": 13022 }, { "epoch": 0.2306313007212209, "grad_norm": 1.076228141784668, "learning_rate": 2.694302962018812e-05, "loss": 0.1374, "step": 13023 }, { "epoch": 0.23064901025824933, "grad_norm": 0.8365179300308228, "learning_rate": 2.6942509053357024e-05, "loss": 0.0929, "step": 13024 }, { "epoch": 0.23066671979527775, "grad_norm": 0.8293482065200806, "learning_rate": 2.6941988447236287e-05, "loss": 0.0832, "step": 13025 }, { "epoch": 0.23068442933230618, "grad_norm": 0.8082055449485779, "learning_rate": 2.6941467801827615e-05, "loss": 0.1041, "step": 13026 }, { "epoch": 0.2307021388693346, "grad_norm": 1.5248327255249023, "learning_rate": 2.6940947117132727e-05, "loss": 0.1757, "step": 13027 }, { "epoch": 0.23071984840636303, "grad_norm": 0.7825856804847717, "learning_rate": 2.6940426393153322e-05, "loss": 0.085, "step": 13028 }, { "epoch": 0.23073755794339146, "grad_norm": 1.390842080116272, "learning_rate": 2.6939905629891125e-05, "loss": 0.1163, "step": 13029 }, { "epoch": 0.23075526748041988, "grad_norm": 0.705342710018158, "learning_rate": 2.6939384827347845e-05, "loss": 0.1164, "step": 13030 }, { "epoch": 0.2307729770174483, "grad_norm": 0.4422188997268677, "learning_rate": 2.69388639855252e-05, "loss": 0.0874, "step": 13031 }, { "epoch": 0.23079068655447674, "grad_norm": 0.9883034825325012, "learning_rate": 2.6938343104424895e-05, "loss": 0.1216, "step": 13032 }, { "epoch": 0.2308083960915052, "grad_norm": 1.0233759880065918, "learning_rate": 2.693782218404865e-05, "loss": 0.1063, "step": 13033 }, { "epoch": 0.23082610562853362, "grad_norm": 1.3842978477478027, "learning_rate": 2.6937301224398184e-05, "loss": 0.1574, "step": 13034 }, { "epoch": 0.23084381516556204, "grad_norm": 0.8777868747711182, "learning_rate": 2.6936780225475195e-05, "loss": 0.1024, "step": 13035 }, { "epoch": 0.23086152470259047, "grad_norm": 0.8199424147605896, "learning_rate": 2.6936259187281413e-05, "loss": 0.0925, "step": 13036 }, { "epoch": 0.2308792342396189, "grad_norm": 0.944442868232727, "learning_rate": 2.6935738109818544e-05, "loss": 0.154, "step": 13037 }, { "epoch": 0.23089694377664732, "grad_norm": 1.2161459922790527, "learning_rate": 2.6935216993088298e-05, "loss": 0.1048, "step": 13038 }, { "epoch": 0.23091465331367575, "grad_norm": 0.9491990208625793, "learning_rate": 2.6934695837092404e-05, "loss": 0.128, "step": 13039 }, { "epoch": 0.23093236285070418, "grad_norm": 0.9811795949935913, "learning_rate": 2.693417464183256e-05, "loss": 0.0948, "step": 13040 }, { "epoch": 0.2309500723877326, "grad_norm": 0.7246787548065186, "learning_rate": 2.6933653407310498e-05, "loss": 0.0929, "step": 13041 }, { "epoch": 0.23096778192476103, "grad_norm": 0.8094245791435242, "learning_rate": 2.693313213352792e-05, "loss": 0.1164, "step": 13042 }, { "epoch": 0.23098549146178946, "grad_norm": 1.0257841348648071, "learning_rate": 2.6932610820486536e-05, "loss": 0.1367, "step": 13043 }, { "epoch": 0.23100320099881788, "grad_norm": 0.9471821188926697, "learning_rate": 2.6932089468188076e-05, "loss": 0.0861, "step": 13044 }, { "epoch": 0.2310209105358463, "grad_norm": 1.0734444856643677, "learning_rate": 2.693156807663425e-05, "loss": 0.1615, "step": 13045 }, { "epoch": 0.23103862007287473, "grad_norm": 1.1344196796417236, "learning_rate": 2.693104664582677e-05, "loss": 0.1106, "step": 13046 }, { "epoch": 0.23105632960990316, "grad_norm": 1.22279953956604, "learning_rate": 2.6930525175767354e-05, "loss": 0.0917, "step": 13047 }, { "epoch": 0.23107403914693161, "grad_norm": 0.7535354495048523, "learning_rate": 2.6930003666457713e-05, "loss": 0.1176, "step": 13048 }, { "epoch": 0.23109174868396004, "grad_norm": 0.5964229106903076, "learning_rate": 2.6929482117899567e-05, "loss": 0.1148, "step": 13049 }, { "epoch": 0.23110945822098847, "grad_norm": 0.7159842252731323, "learning_rate": 2.6928960530094635e-05, "loss": 0.1191, "step": 13050 }, { "epoch": 0.2311271677580169, "grad_norm": 0.8734145164489746, "learning_rate": 2.6928438903044632e-05, "loss": 0.1108, "step": 13051 }, { "epoch": 0.23114487729504532, "grad_norm": 1.091264247894287, "learning_rate": 2.6927917236751265e-05, "loss": 0.1022, "step": 13052 }, { "epoch": 0.23116258683207375, "grad_norm": 0.8701056241989136, "learning_rate": 2.6927395531216258e-05, "loss": 0.097, "step": 13053 }, { "epoch": 0.23118029636910217, "grad_norm": 0.8468396663665771, "learning_rate": 2.6926873786441327e-05, "loss": 0.0935, "step": 13054 }, { "epoch": 0.2311980059061306, "grad_norm": 1.9275035858154297, "learning_rate": 2.692635200242819e-05, "loss": 0.1379, "step": 13055 }, { "epoch": 0.23121571544315903, "grad_norm": 0.895871639251709, "learning_rate": 2.6925830179178562e-05, "loss": 0.1024, "step": 13056 }, { "epoch": 0.23123342498018745, "grad_norm": 0.9610932469367981, "learning_rate": 2.6925308316694153e-05, "loss": 0.1277, "step": 13057 }, { "epoch": 0.23125113451721588, "grad_norm": 0.8925360441207886, "learning_rate": 2.6924786414976693e-05, "loss": 0.1315, "step": 13058 }, { "epoch": 0.2312688440542443, "grad_norm": 1.1642874479293823, "learning_rate": 2.6924264474027885e-05, "loss": 0.1303, "step": 13059 }, { "epoch": 0.23128655359127273, "grad_norm": 0.8040661215782166, "learning_rate": 2.6923742493849457e-05, "loss": 0.0805, "step": 13060 }, { "epoch": 0.23130426312830116, "grad_norm": 0.9763756394386292, "learning_rate": 2.692322047444312e-05, "loss": 0.1292, "step": 13061 }, { "epoch": 0.23132197266532958, "grad_norm": 0.8143497705459595, "learning_rate": 2.6922698415810595e-05, "loss": 0.0773, "step": 13062 }, { "epoch": 0.23133968220235804, "grad_norm": 1.5258136987686157, "learning_rate": 2.69221763179536e-05, "loss": 0.0965, "step": 13063 }, { "epoch": 0.23135739173938646, "grad_norm": 0.9128803610801697, "learning_rate": 2.6921654180873844e-05, "loss": 0.0695, "step": 13064 }, { "epoch": 0.2313751012764149, "grad_norm": 0.7967895269393921, "learning_rate": 2.692113200457306e-05, "loss": 0.0713, "step": 13065 }, { "epoch": 0.23139281081344332, "grad_norm": 0.7100009322166443, "learning_rate": 2.692060978905295e-05, "loss": 0.0584, "step": 13066 }, { "epoch": 0.23141052035047174, "grad_norm": 0.863562822341919, "learning_rate": 2.6920087534315245e-05, "loss": 0.0819, "step": 13067 }, { "epoch": 0.23142822988750017, "grad_norm": 0.8179993629455566, "learning_rate": 2.6919565240361655e-05, "loss": 0.1005, "step": 13068 }, { "epoch": 0.2314459394245286, "grad_norm": 1.2222669124603271, "learning_rate": 2.6919042907193905e-05, "loss": 0.1001, "step": 13069 }, { "epoch": 0.23146364896155702, "grad_norm": 1.2040859460830688, "learning_rate": 2.6918520534813707e-05, "loss": 0.0958, "step": 13070 }, { "epoch": 0.23148135849858545, "grad_norm": 0.8903828859329224, "learning_rate": 2.691799812322278e-05, "loss": 0.1066, "step": 13071 }, { "epoch": 0.23149906803561388, "grad_norm": 0.9801318049430847, "learning_rate": 2.6917475672422844e-05, "loss": 0.1036, "step": 13072 }, { "epoch": 0.2315167775726423, "grad_norm": 1.0412324666976929, "learning_rate": 2.6916953182415618e-05, "loss": 0.1822, "step": 13073 }, { "epoch": 0.23153448710967073, "grad_norm": 1.0047311782836914, "learning_rate": 2.6916430653202828e-05, "loss": 0.0997, "step": 13074 }, { "epoch": 0.23155219664669915, "grad_norm": 0.8191807270050049, "learning_rate": 2.691590808478618e-05, "loss": 0.1151, "step": 13075 }, { "epoch": 0.23156990618372758, "grad_norm": 0.831349790096283, "learning_rate": 2.69153854771674e-05, "loss": 0.1028, "step": 13076 }, { "epoch": 0.231587615720756, "grad_norm": 0.6863725781440735, "learning_rate": 2.691486283034821e-05, "loss": 0.0871, "step": 13077 }, { "epoch": 0.23160532525778446, "grad_norm": 0.8394737839698792, "learning_rate": 2.691434014433032e-05, "loss": 0.109, "step": 13078 }, { "epoch": 0.2316230347948129, "grad_norm": 0.5928781032562256, "learning_rate": 2.6913817419115463e-05, "loss": 0.0984, "step": 13079 }, { "epoch": 0.23164074433184131, "grad_norm": 1.1770570278167725, "learning_rate": 2.691329465470535e-05, "loss": 0.1024, "step": 13080 }, { "epoch": 0.23165845386886974, "grad_norm": 0.7909449934959412, "learning_rate": 2.6912771851101702e-05, "loss": 0.0789, "step": 13081 }, { "epoch": 0.23167616340589817, "grad_norm": 0.7080392837524414, "learning_rate": 2.6912249008306243e-05, "loss": 0.1008, "step": 13082 }, { "epoch": 0.2316938729429266, "grad_norm": 0.83647620677948, "learning_rate": 2.6911726126320683e-05, "loss": 0.1093, "step": 13083 }, { "epoch": 0.23171158247995502, "grad_norm": 0.5937470197677612, "learning_rate": 2.691120320514675e-05, "loss": 0.0699, "step": 13084 }, { "epoch": 0.23172929201698345, "grad_norm": 1.263932228088379, "learning_rate": 2.6910680244786167e-05, "loss": 0.0979, "step": 13085 }, { "epoch": 0.23174700155401187, "grad_norm": 0.9920791983604431, "learning_rate": 2.691015724524065e-05, "loss": 0.1235, "step": 13086 }, { "epoch": 0.2317647110910403, "grad_norm": 0.87458336353302, "learning_rate": 2.690963420651192e-05, "loss": 0.0844, "step": 13087 }, { "epoch": 0.23178242062806872, "grad_norm": 0.8224503397941589, "learning_rate": 2.6909111128601695e-05, "loss": 0.1016, "step": 13088 }, { "epoch": 0.23180013016509715, "grad_norm": 0.8111966252326965, "learning_rate": 2.6908588011511702e-05, "loss": 0.081, "step": 13089 }, { "epoch": 0.23181783970212558, "grad_norm": 0.9058008790016174, "learning_rate": 2.6908064855243657e-05, "loss": 0.0982, "step": 13090 }, { "epoch": 0.231835549239154, "grad_norm": 0.7431012392044067, "learning_rate": 2.6907541659799284e-05, "loss": 0.0703, "step": 13091 }, { "epoch": 0.23185325877618243, "grad_norm": 0.650723397731781, "learning_rate": 2.69070184251803e-05, "loss": 0.0991, "step": 13092 }, { "epoch": 0.23187096831321088, "grad_norm": 0.7984852194786072, "learning_rate": 2.690649515138844e-05, "loss": 0.1006, "step": 13093 }, { "epoch": 0.2318886778502393, "grad_norm": 0.9909815788269043, "learning_rate": 2.6905971838425405e-05, "loss": 0.0915, "step": 13094 }, { "epoch": 0.23190638738726774, "grad_norm": 0.7706674933433533, "learning_rate": 2.690544848629293e-05, "loss": 0.0897, "step": 13095 }, { "epoch": 0.23192409692429616, "grad_norm": 0.47413167357444763, "learning_rate": 2.6904925094992735e-05, "loss": 0.1099, "step": 13096 }, { "epoch": 0.2319418064613246, "grad_norm": 1.0421205759048462, "learning_rate": 2.690440166452654e-05, "loss": 0.1091, "step": 13097 }, { "epoch": 0.23195951599835302, "grad_norm": 0.8005855083465576, "learning_rate": 2.6903878194896065e-05, "loss": 0.0801, "step": 13098 }, { "epoch": 0.23197722553538144, "grad_norm": 0.6860608458518982, "learning_rate": 2.690335468610304e-05, "loss": 0.0673, "step": 13099 }, { "epoch": 0.23199493507240987, "grad_norm": 0.979712188243866, "learning_rate": 2.6902831138149177e-05, "loss": 0.0947, "step": 13100 }, { "epoch": 0.2320126446094383, "grad_norm": 0.6256811618804932, "learning_rate": 2.6902307551036207e-05, "loss": 0.0861, "step": 13101 }, { "epoch": 0.23203035414646672, "grad_norm": 0.7830016613006592, "learning_rate": 2.6901783924765846e-05, "loss": 0.0995, "step": 13102 }, { "epoch": 0.23204806368349515, "grad_norm": 1.0321321487426758, "learning_rate": 2.690126025933982e-05, "loss": 0.0935, "step": 13103 }, { "epoch": 0.23206577322052357, "grad_norm": 0.9591166973114014, "learning_rate": 2.690073655475985e-05, "loss": 0.1073, "step": 13104 }, { "epoch": 0.232083482757552, "grad_norm": 0.8113078474998474, "learning_rate": 2.6900212811027667e-05, "loss": 0.0735, "step": 13105 }, { "epoch": 0.23210119229458043, "grad_norm": 1.1376457214355469, "learning_rate": 2.6899689028144983e-05, "loss": 0.1274, "step": 13106 }, { "epoch": 0.23211890183160885, "grad_norm": 0.8163964152336121, "learning_rate": 2.6899165206113524e-05, "loss": 0.0878, "step": 13107 }, { "epoch": 0.2321366113686373, "grad_norm": 1.4193451404571533, "learning_rate": 2.6898641344935017e-05, "loss": 0.1071, "step": 13108 }, { "epoch": 0.23215432090566573, "grad_norm": 0.6712756156921387, "learning_rate": 2.6898117444611184e-05, "loss": 0.067, "step": 13109 }, { "epoch": 0.23217203044269416, "grad_norm": 1.0484384298324585, "learning_rate": 2.6897593505143747e-05, "loss": 0.117, "step": 13110 }, { "epoch": 0.2321897399797226, "grad_norm": 0.698137104511261, "learning_rate": 2.6897069526534432e-05, "loss": 0.1345, "step": 13111 }, { "epoch": 0.232207449516751, "grad_norm": 0.6732574701309204, "learning_rate": 2.689654550878496e-05, "loss": 0.0704, "step": 13112 }, { "epoch": 0.23222515905377944, "grad_norm": 0.9717000126838684, "learning_rate": 2.689602145189706e-05, "loss": 0.1057, "step": 13113 }, { "epoch": 0.23224286859080787, "grad_norm": 0.909053385257721, "learning_rate": 2.6895497355872448e-05, "loss": 0.0987, "step": 13114 }, { "epoch": 0.2322605781278363, "grad_norm": 0.8140896558761597, "learning_rate": 2.6894973220712854e-05, "loss": 0.0965, "step": 13115 }, { "epoch": 0.23227828766486472, "grad_norm": 1.3363583087921143, "learning_rate": 2.6894449046420003e-05, "loss": 0.116, "step": 13116 }, { "epoch": 0.23229599720189315, "grad_norm": 0.8067652583122253, "learning_rate": 2.6893924832995615e-05, "loss": 0.0654, "step": 13117 }, { "epoch": 0.23231370673892157, "grad_norm": 0.8829255104064941, "learning_rate": 2.6893400580441422e-05, "loss": 0.1066, "step": 13118 }, { "epoch": 0.23233141627595, "grad_norm": 0.9004896879196167, "learning_rate": 2.6892876288759138e-05, "loss": 0.1007, "step": 13119 }, { "epoch": 0.23234912581297842, "grad_norm": 0.8184393048286438, "learning_rate": 2.6892351957950502e-05, "loss": 0.1056, "step": 13120 }, { "epoch": 0.23236683535000685, "grad_norm": 0.9986602663993835, "learning_rate": 2.6891827588017224e-05, "loss": 0.1077, "step": 13121 }, { "epoch": 0.23238454488703528, "grad_norm": 0.6869459748268127, "learning_rate": 2.689130317896104e-05, "loss": 0.0811, "step": 13122 }, { "epoch": 0.23240225442406373, "grad_norm": 0.7322047352790833, "learning_rate": 2.689077873078367e-05, "loss": 0.0945, "step": 13123 }, { "epoch": 0.23241996396109216, "grad_norm": 0.6706857085227966, "learning_rate": 2.6890254243486834e-05, "loss": 0.0902, "step": 13124 }, { "epoch": 0.23243767349812058, "grad_norm": 0.6631039977073669, "learning_rate": 2.6889729717072273e-05, "loss": 0.1078, "step": 13125 }, { "epoch": 0.232455383035149, "grad_norm": 1.7237335443496704, "learning_rate": 2.68892051515417e-05, "loss": 0.1322, "step": 13126 }, { "epoch": 0.23247309257217744, "grad_norm": 0.48113951086997986, "learning_rate": 2.6888680546896845e-05, "loss": 0.0854, "step": 13127 }, { "epoch": 0.23249080210920586, "grad_norm": 0.7900812029838562, "learning_rate": 2.6888155903139435e-05, "loss": 0.0835, "step": 13128 }, { "epoch": 0.2325085116462343, "grad_norm": 1.341619610786438, "learning_rate": 2.6887631220271194e-05, "loss": 0.1319, "step": 13129 }, { "epoch": 0.23252622118326272, "grad_norm": 0.6369728446006775, "learning_rate": 2.688710649829385e-05, "loss": 0.0966, "step": 13130 }, { "epoch": 0.23254393072029114, "grad_norm": 0.9821065068244934, "learning_rate": 2.688658173720912e-05, "loss": 0.1099, "step": 13131 }, { "epoch": 0.23256164025731957, "grad_norm": 0.9060849547386169, "learning_rate": 2.6886056937018746e-05, "loss": 0.1091, "step": 13132 }, { "epoch": 0.232579349794348, "grad_norm": 0.7272512912750244, "learning_rate": 2.6885532097724447e-05, "loss": 0.0996, "step": 13133 }, { "epoch": 0.23259705933137642, "grad_norm": 1.030056357383728, "learning_rate": 2.6885007219327947e-05, "loss": 0.0998, "step": 13134 }, { "epoch": 0.23261476886840485, "grad_norm": 0.8387565016746521, "learning_rate": 2.6884482301830976e-05, "loss": 0.0949, "step": 13135 }, { "epoch": 0.23263247840543327, "grad_norm": 0.6802672147750854, "learning_rate": 2.688395734523526e-05, "loss": 0.1113, "step": 13136 }, { "epoch": 0.23265018794246173, "grad_norm": 0.5958704352378845, "learning_rate": 2.6883432349542527e-05, "loss": 0.0702, "step": 13137 }, { "epoch": 0.23266789747949015, "grad_norm": 0.6268079280853271, "learning_rate": 2.68829073147545e-05, "loss": 0.1491, "step": 13138 }, { "epoch": 0.23268560701651858, "grad_norm": 0.586860179901123, "learning_rate": 2.6882382240872913e-05, "loss": 0.0938, "step": 13139 }, { "epoch": 0.232703316553547, "grad_norm": 1.1955827474594116, "learning_rate": 2.6881857127899495e-05, "loss": 0.1017, "step": 13140 }, { "epoch": 0.23272102609057543, "grad_norm": 0.7961146235466003, "learning_rate": 2.688133197583596e-05, "loss": 0.0917, "step": 13141 }, { "epoch": 0.23273873562760386, "grad_norm": 0.9784926176071167, "learning_rate": 2.688080678468405e-05, "loss": 0.1033, "step": 13142 }, { "epoch": 0.2327564451646323, "grad_norm": 1.1732194423675537, "learning_rate": 2.6880281554445482e-05, "loss": 0.1228, "step": 13143 }, { "epoch": 0.2327741547016607, "grad_norm": 0.8247631788253784, "learning_rate": 2.6879756285121996e-05, "loss": 0.0781, "step": 13144 }, { "epoch": 0.23279186423868914, "grad_norm": 1.1850930452346802, "learning_rate": 2.687923097671531e-05, "loss": 0.0827, "step": 13145 }, { "epoch": 0.23280957377571757, "grad_norm": 0.9383001327514648, "learning_rate": 2.6878705629227157e-05, "loss": 0.1244, "step": 13146 }, { "epoch": 0.232827283312746, "grad_norm": 0.7607072591781616, "learning_rate": 2.6878180242659264e-05, "loss": 0.0792, "step": 13147 }, { "epoch": 0.23284499284977442, "grad_norm": 1.5827035903930664, "learning_rate": 2.6877654817013357e-05, "loss": 0.0882, "step": 13148 }, { "epoch": 0.23286270238680284, "grad_norm": 0.7996134161949158, "learning_rate": 2.687712935229117e-05, "loss": 0.1015, "step": 13149 }, { "epoch": 0.23288041192383127, "grad_norm": 0.7365665435791016, "learning_rate": 2.6876603848494426e-05, "loss": 0.093, "step": 13150 }, { "epoch": 0.2328981214608597, "grad_norm": 1.4296574592590332, "learning_rate": 2.6876078305624858e-05, "loss": 0.1205, "step": 13151 }, { "epoch": 0.23291583099788815, "grad_norm": 1.0987719297409058, "learning_rate": 2.6875552723684194e-05, "loss": 0.1032, "step": 13152 }, { "epoch": 0.23293354053491658, "grad_norm": 0.5061723589897156, "learning_rate": 2.6875027102674162e-05, "loss": 0.0971, "step": 13153 }, { "epoch": 0.232951250071945, "grad_norm": 1.1305862665176392, "learning_rate": 2.687450144259649e-05, "loss": 0.0885, "step": 13154 }, { "epoch": 0.23296895960897343, "grad_norm": 1.716723918914795, "learning_rate": 2.6873975743452916e-05, "loss": 0.1778, "step": 13155 }, { "epoch": 0.23298666914600186, "grad_norm": 0.785233199596405, "learning_rate": 2.6873450005245156e-05, "loss": 0.0792, "step": 13156 }, { "epoch": 0.23300437868303028, "grad_norm": 0.5164899230003357, "learning_rate": 2.687292422797495e-05, "loss": 0.0904, "step": 13157 }, { "epoch": 0.2330220882200587, "grad_norm": 1.0109275579452515, "learning_rate": 2.687239841164402e-05, "loss": 0.0987, "step": 13158 }, { "epoch": 0.23303979775708714, "grad_norm": 0.7880386114120483, "learning_rate": 2.68718725562541e-05, "loss": 0.1103, "step": 13159 }, { "epoch": 0.23305750729411556, "grad_norm": 0.8868633508682251, "learning_rate": 2.6871346661806927e-05, "loss": 0.1255, "step": 13160 }, { "epoch": 0.233075216831144, "grad_norm": 0.7858055830001831, "learning_rate": 2.687082072830422e-05, "loss": 0.1444, "step": 13161 }, { "epoch": 0.23309292636817242, "grad_norm": 0.7143232226371765, "learning_rate": 2.687029475574771e-05, "loss": 0.0815, "step": 13162 }, { "epoch": 0.23311063590520084, "grad_norm": 1.0302351713180542, "learning_rate": 2.686976874413914e-05, "loss": 0.0778, "step": 13163 }, { "epoch": 0.23312834544222927, "grad_norm": 0.8301184177398682, "learning_rate": 2.6869242693480224e-05, "loss": 0.0758, "step": 13164 }, { "epoch": 0.2331460549792577, "grad_norm": 0.6264681220054626, "learning_rate": 2.68687166037727e-05, "loss": 0.0865, "step": 13165 }, { "epoch": 0.23316376451628612, "grad_norm": 0.835519552230835, "learning_rate": 2.68681904750183e-05, "loss": 0.1084, "step": 13166 }, { "epoch": 0.23318147405331457, "grad_norm": 1.1270372867584229, "learning_rate": 2.6867664307218754e-05, "loss": 0.0956, "step": 13167 }, { "epoch": 0.233199183590343, "grad_norm": 0.8615754246711731, "learning_rate": 2.6867138100375796e-05, "loss": 0.0921, "step": 13168 }, { "epoch": 0.23321689312737143, "grad_norm": 0.6734720468521118, "learning_rate": 2.6866611854491148e-05, "loss": 0.0916, "step": 13169 }, { "epoch": 0.23323460266439985, "grad_norm": 0.8072080016136169, "learning_rate": 2.6866085569566552e-05, "loss": 0.0849, "step": 13170 }, { "epoch": 0.23325231220142828, "grad_norm": 0.6083186864852905, "learning_rate": 2.6865559245603732e-05, "loss": 0.1319, "step": 13171 }, { "epoch": 0.2332700217384567, "grad_norm": 0.6089119911193848, "learning_rate": 2.6865032882604423e-05, "loss": 0.0919, "step": 13172 }, { "epoch": 0.23328773127548513, "grad_norm": 1.0696818828582764, "learning_rate": 2.6864506480570356e-05, "loss": 0.1224, "step": 13173 }, { "epoch": 0.23330544081251356, "grad_norm": 0.9099845290184021, "learning_rate": 2.6863980039503263e-05, "loss": 0.1181, "step": 13174 }, { "epoch": 0.23332315034954199, "grad_norm": 0.5905926823616028, "learning_rate": 2.6863453559404874e-05, "loss": 0.0834, "step": 13175 }, { "epoch": 0.2333408598865704, "grad_norm": 0.9871971011161804, "learning_rate": 2.6862927040276922e-05, "loss": 0.1198, "step": 13176 }, { "epoch": 0.23335856942359884, "grad_norm": 0.9120281338691711, "learning_rate": 2.6862400482121144e-05, "loss": 0.0894, "step": 13177 }, { "epoch": 0.23337627896062726, "grad_norm": 0.8624253273010254, "learning_rate": 2.6861873884939265e-05, "loss": 0.1068, "step": 13178 }, { "epoch": 0.2333939884976557, "grad_norm": 0.7864360809326172, "learning_rate": 2.6861347248733022e-05, "loss": 0.0648, "step": 13179 }, { "epoch": 0.23341169803468412, "grad_norm": 0.8222494125366211, "learning_rate": 2.686082057350415e-05, "loss": 0.0883, "step": 13180 }, { "epoch": 0.23342940757171254, "grad_norm": 1.2924582958221436, "learning_rate": 2.686029385925437e-05, "loss": 0.106, "step": 13181 }, { "epoch": 0.233447117108741, "grad_norm": 0.7218669056892395, "learning_rate": 2.6859767105985427e-05, "loss": 0.1154, "step": 13182 }, { "epoch": 0.23346482664576942, "grad_norm": 1.223556637763977, "learning_rate": 2.6859240313699053e-05, "loss": 0.1109, "step": 13183 }, { "epoch": 0.23348253618279785, "grad_norm": 0.9579647183418274, "learning_rate": 2.6858713482396973e-05, "loss": 0.1184, "step": 13184 }, { "epoch": 0.23350024571982628, "grad_norm": 0.8651231527328491, "learning_rate": 2.6858186612080927e-05, "loss": 0.1009, "step": 13185 }, { "epoch": 0.2335179552568547, "grad_norm": 0.5816264152526855, "learning_rate": 2.6857659702752645e-05, "loss": 0.0596, "step": 13186 }, { "epoch": 0.23353566479388313, "grad_norm": 1.1202213764190674, "learning_rate": 2.6857132754413864e-05, "loss": 0.0777, "step": 13187 }, { "epoch": 0.23355337433091156, "grad_norm": 0.8588240146636963, "learning_rate": 2.6856605767066315e-05, "loss": 0.0994, "step": 13188 }, { "epoch": 0.23357108386793998, "grad_norm": 0.6597774028778076, "learning_rate": 2.685607874071173e-05, "loss": 0.0759, "step": 13189 }, { "epoch": 0.2335887934049684, "grad_norm": 0.838575541973114, "learning_rate": 2.6855551675351847e-05, "loss": 0.1566, "step": 13190 }, { "epoch": 0.23360650294199684, "grad_norm": 0.7725934982299805, "learning_rate": 2.6855024570988398e-05, "loss": 0.0894, "step": 13191 }, { "epoch": 0.23362421247902526, "grad_norm": 1.1722447872161865, "learning_rate": 2.685449742762312e-05, "loss": 0.1125, "step": 13192 }, { "epoch": 0.2336419220160537, "grad_norm": 0.569580614566803, "learning_rate": 2.6853970245257738e-05, "loss": 0.0919, "step": 13193 }, { "epoch": 0.23365963155308211, "grad_norm": 0.8269039392471313, "learning_rate": 2.6853443023893997e-05, "loss": 0.1048, "step": 13194 }, { "epoch": 0.23367734109011054, "grad_norm": 0.5390129089355469, "learning_rate": 2.6852915763533625e-05, "loss": 0.1077, "step": 13195 }, { "epoch": 0.23369505062713897, "grad_norm": 0.7004663348197937, "learning_rate": 2.6852388464178363e-05, "loss": 0.0962, "step": 13196 }, { "epoch": 0.23371276016416742, "grad_norm": 1.5550665855407715, "learning_rate": 2.685186112582994e-05, "loss": 0.0719, "step": 13197 }, { "epoch": 0.23373046970119585, "grad_norm": 1.2438369989395142, "learning_rate": 2.685133374849009e-05, "loss": 0.0491, "step": 13198 }, { "epoch": 0.23374817923822427, "grad_norm": 0.8193850517272949, "learning_rate": 2.6850806332160555e-05, "loss": 0.0813, "step": 13199 }, { "epoch": 0.2337658887752527, "grad_norm": 0.8564987182617188, "learning_rate": 2.685027887684306e-05, "loss": 0.1024, "step": 13200 }, { "epoch": 0.23378359831228113, "grad_norm": 0.8744658827781677, "learning_rate": 2.684975138253935e-05, "loss": 0.0952, "step": 13201 }, { "epoch": 0.23380130784930955, "grad_norm": 1.054951548576355, "learning_rate": 2.6849223849251157e-05, "loss": 0.0893, "step": 13202 }, { "epoch": 0.23381901738633798, "grad_norm": 0.7662148475646973, "learning_rate": 2.6848696276980216e-05, "loss": 0.1084, "step": 13203 }, { "epoch": 0.2338367269233664, "grad_norm": 0.8085424304008484, "learning_rate": 2.6848168665728263e-05, "loss": 0.1022, "step": 13204 }, { "epoch": 0.23385443646039483, "grad_norm": 1.0338134765625, "learning_rate": 2.684764101549703e-05, "loss": 0.1096, "step": 13205 }, { "epoch": 0.23387214599742326, "grad_norm": 0.7935632467269897, "learning_rate": 2.6847113326288263e-05, "loss": 0.1271, "step": 13206 }, { "epoch": 0.23388985553445168, "grad_norm": 0.9026436805725098, "learning_rate": 2.6846585598103683e-05, "loss": 0.1426, "step": 13207 }, { "epoch": 0.2339075650714801, "grad_norm": 0.5697442889213562, "learning_rate": 2.684605783094504e-05, "loss": 0.0797, "step": 13208 }, { "epoch": 0.23392527460850854, "grad_norm": 0.9877029061317444, "learning_rate": 2.6845530024814065e-05, "loss": 0.0985, "step": 13209 }, { "epoch": 0.23394298414553696, "grad_norm": 0.6688500642776489, "learning_rate": 2.6845002179712494e-05, "loss": 0.0561, "step": 13210 }, { "epoch": 0.2339606936825654, "grad_norm": 0.9224187135696411, "learning_rate": 2.6844474295642064e-05, "loss": 0.0936, "step": 13211 }, { "epoch": 0.23397840321959384, "grad_norm": 1.5176397562026978, "learning_rate": 2.6843946372604503e-05, "loss": 0.1057, "step": 13212 }, { "epoch": 0.23399611275662227, "grad_norm": 0.6465449333190918, "learning_rate": 2.6843418410601566e-05, "loss": 0.0988, "step": 13213 }, { "epoch": 0.2340138222936507, "grad_norm": 1.0436491966247559, "learning_rate": 2.684289040963498e-05, "loss": 0.1052, "step": 13214 }, { "epoch": 0.23403153183067912, "grad_norm": 1.7376359701156616, "learning_rate": 2.684236236970648e-05, "loss": 0.1505, "step": 13215 }, { "epoch": 0.23404924136770755, "grad_norm": 0.5987348556518555, "learning_rate": 2.6841834290817806e-05, "loss": 0.1428, "step": 13216 }, { "epoch": 0.23406695090473598, "grad_norm": 0.85976243019104, "learning_rate": 2.6841306172970694e-05, "loss": 0.0887, "step": 13217 }, { "epoch": 0.2340846604417644, "grad_norm": 1.8171074390411377, "learning_rate": 2.6840778016166886e-05, "loss": 0.1198, "step": 13218 }, { "epoch": 0.23410236997879283, "grad_norm": 0.8449447154998779, "learning_rate": 2.684024982040811e-05, "loss": 0.0872, "step": 13219 }, { "epoch": 0.23412007951582126, "grad_norm": 0.8910098075866699, "learning_rate": 2.6839721585696114e-05, "loss": 0.1093, "step": 13220 }, { "epoch": 0.23413778905284968, "grad_norm": 0.6800181865692139, "learning_rate": 2.683919331203263e-05, "loss": 0.0919, "step": 13221 }, { "epoch": 0.2341554985898781, "grad_norm": 0.7646402716636658, "learning_rate": 2.68386649994194e-05, "loss": 0.1044, "step": 13222 }, { "epoch": 0.23417320812690653, "grad_norm": 0.807073712348938, "learning_rate": 2.683813664785816e-05, "loss": 0.0907, "step": 13223 }, { "epoch": 0.23419091766393496, "grad_norm": 1.148732304573059, "learning_rate": 2.6837608257350643e-05, "loss": 0.146, "step": 13224 }, { "epoch": 0.2342086272009634, "grad_norm": 1.0700926780700684, "learning_rate": 2.6837079827898595e-05, "loss": 0.0942, "step": 13225 }, { "epoch": 0.2342263367379918, "grad_norm": 1.2777544260025024, "learning_rate": 2.683655135950375e-05, "loss": 0.1145, "step": 13226 }, { "epoch": 0.23424404627502027, "grad_norm": 0.7065573334693909, "learning_rate": 2.6836022852167846e-05, "loss": 0.1126, "step": 13227 }, { "epoch": 0.2342617558120487, "grad_norm": 0.9072009921073914, "learning_rate": 2.6835494305892628e-05, "loss": 0.1045, "step": 13228 }, { "epoch": 0.23427946534907712, "grad_norm": 0.9468462467193604, "learning_rate": 2.683496572067983e-05, "loss": 0.0871, "step": 13229 }, { "epoch": 0.23429717488610555, "grad_norm": 0.7502338886260986, "learning_rate": 2.683443709653119e-05, "loss": 0.0776, "step": 13230 }, { "epoch": 0.23431488442313397, "grad_norm": 0.8465172052383423, "learning_rate": 2.6833908433448456e-05, "loss": 0.109, "step": 13231 }, { "epoch": 0.2343325939601624, "grad_norm": 0.8026290535926819, "learning_rate": 2.6833379731433355e-05, "loss": 0.1025, "step": 13232 }, { "epoch": 0.23435030349719083, "grad_norm": 1.1935375928878784, "learning_rate": 2.683285099048763e-05, "loss": 0.1135, "step": 13233 }, { "epoch": 0.23436801303421925, "grad_norm": 0.6505502462387085, "learning_rate": 2.6832322210613022e-05, "loss": 0.0947, "step": 13234 }, { "epoch": 0.23438572257124768, "grad_norm": 1.284781575202942, "learning_rate": 2.6831793391811272e-05, "loss": 0.0869, "step": 13235 }, { "epoch": 0.2344034321082761, "grad_norm": 0.77799391746521, "learning_rate": 2.6831264534084123e-05, "loss": 0.09, "step": 13236 }, { "epoch": 0.23442114164530453, "grad_norm": 0.7291777729988098, "learning_rate": 2.6830735637433302e-05, "loss": 0.1011, "step": 13237 }, { "epoch": 0.23443885118233296, "grad_norm": 0.8791059851646423, "learning_rate": 2.6830206701860566e-05, "loss": 0.129, "step": 13238 }, { "epoch": 0.23445656071936138, "grad_norm": 0.6096634268760681, "learning_rate": 2.6829677727367638e-05, "loss": 0.1116, "step": 13239 }, { "epoch": 0.2344742702563898, "grad_norm": 0.8847484588623047, "learning_rate": 2.6829148713956273e-05, "loss": 0.097, "step": 13240 }, { "epoch": 0.23449197979341824, "grad_norm": 1.028442144393921, "learning_rate": 2.68286196616282e-05, "loss": 0.0877, "step": 13241 }, { "epoch": 0.2345096893304467, "grad_norm": 1.049554705619812, "learning_rate": 2.6828090570385167e-05, "loss": 0.1124, "step": 13242 }, { "epoch": 0.23452739886747512, "grad_norm": 0.8220303058624268, "learning_rate": 2.6827561440228912e-05, "loss": 0.1209, "step": 13243 }, { "epoch": 0.23454510840450354, "grad_norm": 0.8327089548110962, "learning_rate": 2.682703227116118e-05, "loss": 0.1392, "step": 13244 }, { "epoch": 0.23456281794153197, "grad_norm": 1.0258023738861084, "learning_rate": 2.6826503063183704e-05, "loss": 0.1314, "step": 13245 }, { "epoch": 0.2345805274785604, "grad_norm": 0.7940089106559753, "learning_rate": 2.682597381629823e-05, "loss": 0.1011, "step": 13246 }, { "epoch": 0.23459823701558882, "grad_norm": 0.7017790079116821, "learning_rate": 2.68254445305065e-05, "loss": 0.1001, "step": 13247 }, { "epoch": 0.23461594655261725, "grad_norm": 0.7095293402671814, "learning_rate": 2.6824915205810252e-05, "loss": 0.1182, "step": 13248 }, { "epoch": 0.23463365608964568, "grad_norm": 0.7265079617500305, "learning_rate": 2.6824385842211228e-05, "loss": 0.088, "step": 13249 }, { "epoch": 0.2346513656266741, "grad_norm": 0.9037581086158752, "learning_rate": 2.6823856439711173e-05, "loss": 0.0846, "step": 13250 }, { "epoch": 0.23466907516370253, "grad_norm": 0.7557706832885742, "learning_rate": 2.6823326998311824e-05, "loss": 0.0965, "step": 13251 }, { "epoch": 0.23468678470073095, "grad_norm": 1.063067078590393, "learning_rate": 2.6822797518014927e-05, "loss": 0.1146, "step": 13252 }, { "epoch": 0.23470449423775938, "grad_norm": 0.8603524565696716, "learning_rate": 2.682226799882222e-05, "loss": 0.1069, "step": 13253 }, { "epoch": 0.2347222037747878, "grad_norm": 0.788526713848114, "learning_rate": 2.6821738440735446e-05, "loss": 0.0976, "step": 13254 }, { "epoch": 0.23473991331181623, "grad_norm": 0.8640379309654236, "learning_rate": 2.6821208843756355e-05, "loss": 0.1135, "step": 13255 }, { "epoch": 0.23475762284884466, "grad_norm": 0.7220377922058105, "learning_rate": 2.6820679207886674e-05, "loss": 0.1098, "step": 13256 }, { "epoch": 0.23477533238587311, "grad_norm": 1.2604849338531494, "learning_rate": 2.6820149533128156e-05, "loss": 0.0769, "step": 13257 }, { "epoch": 0.23479304192290154, "grad_norm": 0.5214874148368835, "learning_rate": 2.6819619819482545e-05, "loss": 0.1324, "step": 13258 }, { "epoch": 0.23481075145992997, "grad_norm": 0.7286080718040466, "learning_rate": 2.681909006695158e-05, "loss": 0.1144, "step": 13259 }, { "epoch": 0.2348284609969584, "grad_norm": 0.6956993341445923, "learning_rate": 2.6818560275537e-05, "loss": 0.0847, "step": 13260 }, { "epoch": 0.23484617053398682, "grad_norm": 0.7113601565361023, "learning_rate": 2.6818030445240553e-05, "loss": 0.1028, "step": 13261 }, { "epoch": 0.23486388007101525, "grad_norm": 0.6697239279747009, "learning_rate": 2.6817500576063986e-05, "loss": 0.1008, "step": 13262 }, { "epoch": 0.23488158960804367, "grad_norm": 0.9598865509033203, "learning_rate": 2.6816970668009033e-05, "loss": 0.1155, "step": 13263 }, { "epoch": 0.2348992991450721, "grad_norm": 0.7795920372009277, "learning_rate": 2.6816440721077443e-05, "loss": 0.0955, "step": 13264 }, { "epoch": 0.23491700868210053, "grad_norm": 0.9265511631965637, "learning_rate": 2.6815910735270957e-05, "loss": 0.1336, "step": 13265 }, { "epoch": 0.23493471821912895, "grad_norm": 1.1179553270339966, "learning_rate": 2.6815380710591324e-05, "loss": 0.0995, "step": 13266 }, { "epoch": 0.23495242775615738, "grad_norm": 0.38491225242614746, "learning_rate": 2.681485064704028e-05, "loss": 0.092, "step": 13267 }, { "epoch": 0.2349701372931858, "grad_norm": 0.7210397720336914, "learning_rate": 2.6814320544619572e-05, "loss": 0.0747, "step": 13268 }, { "epoch": 0.23498784683021423, "grad_norm": 0.7170727849006653, "learning_rate": 2.6813790403330943e-05, "loss": 0.0892, "step": 13269 }, { "epoch": 0.23500555636724266, "grad_norm": 1.2573275566101074, "learning_rate": 2.681326022317614e-05, "loss": 0.0969, "step": 13270 }, { "epoch": 0.2350232659042711, "grad_norm": 1.0929996967315674, "learning_rate": 2.681273000415691e-05, "loss": 0.148, "step": 13271 }, { "epoch": 0.23504097544129954, "grad_norm": 0.90400230884552, "learning_rate": 2.6812199746274984e-05, "loss": 0.1425, "step": 13272 }, { "epoch": 0.23505868497832796, "grad_norm": 0.8875843286514282, "learning_rate": 2.6811669449532123e-05, "loss": 0.1311, "step": 13273 }, { "epoch": 0.2350763945153564, "grad_norm": 0.5036019086837769, "learning_rate": 2.6811139113930063e-05, "loss": 0.0912, "step": 13274 }, { "epoch": 0.23509410405238482, "grad_norm": 0.8402342796325684, "learning_rate": 2.681060873947055e-05, "loss": 0.123, "step": 13275 }, { "epoch": 0.23511181358941324, "grad_norm": 0.7433314323425293, "learning_rate": 2.6810078326155328e-05, "loss": 0.1053, "step": 13276 }, { "epoch": 0.23512952312644167, "grad_norm": 0.9773169159889221, "learning_rate": 2.6809547873986145e-05, "loss": 0.0751, "step": 13277 }, { "epoch": 0.2351472326634701, "grad_norm": 0.6692507863044739, "learning_rate": 2.680901738296474e-05, "loss": 0.1149, "step": 13278 }, { "epoch": 0.23516494220049852, "grad_norm": 0.9972501993179321, "learning_rate": 2.6808486853092862e-05, "loss": 0.0925, "step": 13279 }, { "epoch": 0.23518265173752695, "grad_norm": 1.070342779159546, "learning_rate": 2.6807956284372262e-05, "loss": 0.0626, "step": 13280 }, { "epoch": 0.23520036127455538, "grad_norm": 0.6758332848548889, "learning_rate": 2.6807425676804673e-05, "loss": 0.0984, "step": 13281 }, { "epoch": 0.2352180708115838, "grad_norm": 0.7040977478027344, "learning_rate": 2.6806895030391853e-05, "loss": 0.0741, "step": 13282 }, { "epoch": 0.23523578034861223, "grad_norm": 0.9408034086227417, "learning_rate": 2.680636434513554e-05, "loss": 0.1067, "step": 13283 }, { "epoch": 0.23525348988564065, "grad_norm": 0.8262820839881897, "learning_rate": 2.6805833621037486e-05, "loss": 0.0787, "step": 13284 }, { "epoch": 0.23527119942266908, "grad_norm": 0.8749720454216003, "learning_rate": 2.6805302858099427e-05, "loss": 0.1499, "step": 13285 }, { "epoch": 0.23528890895969753, "grad_norm": 0.7698259353637695, "learning_rate": 2.680477205632312e-05, "loss": 0.0784, "step": 13286 }, { "epoch": 0.23530661849672596, "grad_norm": 0.7541770339012146, "learning_rate": 2.680424121571031e-05, "loss": 0.0993, "step": 13287 }, { "epoch": 0.2353243280337544, "grad_norm": 0.8613587617874146, "learning_rate": 2.6803710336262735e-05, "loss": 0.1153, "step": 13288 }, { "epoch": 0.2353420375707828, "grad_norm": 0.601847767829895, "learning_rate": 2.6803179417982147e-05, "loss": 0.1165, "step": 13289 }, { "epoch": 0.23535974710781124, "grad_norm": 0.7041382789611816, "learning_rate": 2.6802648460870292e-05, "loss": 0.0839, "step": 13290 }, { "epoch": 0.23537745664483967, "grad_norm": 0.9680834412574768, "learning_rate": 2.6802117464928923e-05, "loss": 0.1066, "step": 13291 }, { "epoch": 0.2353951661818681, "grad_norm": 0.7087642550468445, "learning_rate": 2.680158643015978e-05, "loss": 0.09, "step": 13292 }, { "epoch": 0.23541287571889652, "grad_norm": 1.1143282651901245, "learning_rate": 2.6801055356564604e-05, "loss": 0.0799, "step": 13293 }, { "epoch": 0.23543058525592495, "grad_norm": 0.8159465789794922, "learning_rate": 2.6800524244145156e-05, "loss": 0.1014, "step": 13294 }, { "epoch": 0.23544829479295337, "grad_norm": 0.7247900366783142, "learning_rate": 2.679999309290318e-05, "loss": 0.1131, "step": 13295 }, { "epoch": 0.2354660043299818, "grad_norm": 0.5585156083106995, "learning_rate": 2.679946190284041e-05, "loss": 0.0944, "step": 13296 }, { "epoch": 0.23548371386701022, "grad_norm": 0.5403749346733093, "learning_rate": 2.6798930673958614e-05, "loss": 0.0696, "step": 13297 }, { "epoch": 0.23550142340403865, "grad_norm": 1.0990604162216187, "learning_rate": 2.679839940625952e-05, "loss": 0.1122, "step": 13298 }, { "epoch": 0.23551913294106708, "grad_norm": 0.7655146718025208, "learning_rate": 2.679786809974489e-05, "loss": 0.0885, "step": 13299 }, { "epoch": 0.2355368424780955, "grad_norm": 0.707405686378479, "learning_rate": 2.6797336754416468e-05, "loss": 0.1058, "step": 13300 }, { "epoch": 0.23555455201512396, "grad_norm": 0.6717909574508667, "learning_rate": 2.6796805370276002e-05, "loss": 0.1513, "step": 13301 }, { "epoch": 0.23557226155215238, "grad_norm": 0.47112491726875305, "learning_rate": 2.679627394732524e-05, "loss": 0.0846, "step": 13302 }, { "epoch": 0.2355899710891808, "grad_norm": 1.08255136013031, "learning_rate": 2.6795742485565923e-05, "loss": 0.147, "step": 13303 }, { "epoch": 0.23560768062620924, "grad_norm": 1.366214632987976, "learning_rate": 2.6795210984999816e-05, "loss": 0.0974, "step": 13304 }, { "epoch": 0.23562539016323766, "grad_norm": 0.4931354522705078, "learning_rate": 2.6794679445628654e-05, "loss": 0.059, "step": 13305 }, { "epoch": 0.2356430997002661, "grad_norm": 0.716673731803894, "learning_rate": 2.679414786745419e-05, "loss": 0.1514, "step": 13306 }, { "epoch": 0.23566080923729452, "grad_norm": 1.126474380493164, "learning_rate": 2.6793616250478167e-05, "loss": 0.0888, "step": 13307 }, { "epoch": 0.23567851877432294, "grad_norm": 0.5274786949157715, "learning_rate": 2.6793084594702343e-05, "loss": 0.0748, "step": 13308 }, { "epoch": 0.23569622831135137, "grad_norm": 1.0815589427947998, "learning_rate": 2.6792552900128466e-05, "loss": 0.0759, "step": 13309 }, { "epoch": 0.2357139378483798, "grad_norm": 0.901892900466919, "learning_rate": 2.679202116675828e-05, "loss": 0.0624, "step": 13310 }, { "epoch": 0.23573164738540822, "grad_norm": 1.062685251235962, "learning_rate": 2.6791489394593537e-05, "loss": 0.1454, "step": 13311 }, { "epoch": 0.23574935692243665, "grad_norm": 0.8339834213256836, "learning_rate": 2.6790957583635988e-05, "loss": 0.1056, "step": 13312 }, { "epoch": 0.23576706645946507, "grad_norm": 0.7168293595314026, "learning_rate": 2.679042573388738e-05, "loss": 0.1164, "step": 13313 }, { "epoch": 0.2357847759964935, "grad_norm": 0.5395216941833496, "learning_rate": 2.6789893845349466e-05, "loss": 0.0647, "step": 13314 }, { "epoch": 0.23580248553352193, "grad_norm": 0.5199344754219055, "learning_rate": 2.6789361918023993e-05, "loss": 0.0725, "step": 13315 }, { "epoch": 0.23582019507055038, "grad_norm": 1.3764327764511108, "learning_rate": 2.678882995191271e-05, "loss": 0.132, "step": 13316 }, { "epoch": 0.2358379046075788, "grad_norm": 0.702225387096405, "learning_rate": 2.6788297947017368e-05, "loss": 0.0739, "step": 13317 }, { "epoch": 0.23585561414460723, "grad_norm": 1.1003345251083374, "learning_rate": 2.678776590333972e-05, "loss": 0.0966, "step": 13318 }, { "epoch": 0.23587332368163566, "grad_norm": 1.6570103168487549, "learning_rate": 2.6787233820881513e-05, "loss": 0.0879, "step": 13319 }, { "epoch": 0.2358910332186641, "grad_norm": 1.6446782350540161, "learning_rate": 2.6786701699644496e-05, "loss": 0.1223, "step": 13320 }, { "epoch": 0.2359087427556925, "grad_norm": 3.9566612243652344, "learning_rate": 2.678616953963043e-05, "loss": 0.1248, "step": 13321 }, { "epoch": 0.23592645229272094, "grad_norm": 0.6364396810531616, "learning_rate": 2.6785637340841055e-05, "loss": 0.0793, "step": 13322 }, { "epoch": 0.23594416182974937, "grad_norm": 0.8872259259223938, "learning_rate": 2.6785105103278127e-05, "loss": 0.116, "step": 13323 }, { "epoch": 0.2359618713667778, "grad_norm": 0.7702565789222717, "learning_rate": 2.6784572826943394e-05, "loss": 0.0965, "step": 13324 }, { "epoch": 0.23597958090380622, "grad_norm": 0.7656034231185913, "learning_rate": 2.6784040511838604e-05, "loss": 0.0849, "step": 13325 }, { "epoch": 0.23599729044083465, "grad_norm": 1.893446445465088, "learning_rate": 2.678350815796552e-05, "loss": 0.116, "step": 13326 }, { "epoch": 0.23601499997786307, "grad_norm": 0.6757848262786865, "learning_rate": 2.678297576532588e-05, "loss": 0.0957, "step": 13327 }, { "epoch": 0.2360327095148915, "grad_norm": 1.1744130849838257, "learning_rate": 2.6782443333921442e-05, "loss": 0.1267, "step": 13328 }, { "epoch": 0.23605041905191992, "grad_norm": 1.3369152545928955, "learning_rate": 2.678191086375396e-05, "loss": 0.1024, "step": 13329 }, { "epoch": 0.23606812858894835, "grad_norm": 0.6424720287322998, "learning_rate": 2.6781378354825182e-05, "loss": 0.0916, "step": 13330 }, { "epoch": 0.2360858381259768, "grad_norm": 1.284857153892517, "learning_rate": 2.678084580713686e-05, "loss": 0.1188, "step": 13331 }, { "epoch": 0.23610354766300523, "grad_norm": 0.9471108317375183, "learning_rate": 2.6780313220690745e-05, "loss": 0.0799, "step": 13332 }, { "epoch": 0.23612125720003366, "grad_norm": 0.8798263669013977, "learning_rate": 2.6779780595488596e-05, "loss": 0.1584, "step": 13333 }, { "epoch": 0.23613896673706208, "grad_norm": 0.9112264513969421, "learning_rate": 2.677924793153216e-05, "loss": 0.0899, "step": 13334 }, { "epoch": 0.2361566762740905, "grad_norm": 0.9653853178024292, "learning_rate": 2.6778715228823185e-05, "loss": 0.1025, "step": 13335 }, { "epoch": 0.23617438581111894, "grad_norm": 1.1327474117279053, "learning_rate": 2.6778182487363433e-05, "loss": 0.1392, "step": 13336 }, { "epoch": 0.23619209534814736, "grad_norm": 1.0602781772613525, "learning_rate": 2.6777649707154648e-05, "loss": 0.1058, "step": 13337 }, { "epoch": 0.2362098048851758, "grad_norm": 0.7587546110153198, "learning_rate": 2.6777116888198588e-05, "loss": 0.1508, "step": 13338 }, { "epoch": 0.23622751442220422, "grad_norm": 1.1441603899002075, "learning_rate": 2.6776584030497008e-05, "loss": 0.106, "step": 13339 }, { "epoch": 0.23624522395923264, "grad_norm": 0.6696348786354065, "learning_rate": 2.6776051134051653e-05, "loss": 0.1, "step": 13340 }, { "epoch": 0.23626293349626107, "grad_norm": 1.0518994331359863, "learning_rate": 2.6775518198864282e-05, "loss": 0.1069, "step": 13341 }, { "epoch": 0.2362806430332895, "grad_norm": 0.768680989742279, "learning_rate": 2.6774985224936646e-05, "loss": 0.118, "step": 13342 }, { "epoch": 0.23629835257031792, "grad_norm": 0.7917302250862122, "learning_rate": 2.6774452212270507e-05, "loss": 0.1179, "step": 13343 }, { "epoch": 0.23631606210734635, "grad_norm": 1.1188185214996338, "learning_rate": 2.6773919160867603e-05, "loss": 0.0966, "step": 13344 }, { "epoch": 0.23633377164437477, "grad_norm": 0.7568054795265198, "learning_rate": 2.6773386070729696e-05, "loss": 0.1087, "step": 13345 }, { "epoch": 0.23635148118140323, "grad_norm": 0.9676938652992249, "learning_rate": 2.677285294185854e-05, "loss": 0.1012, "step": 13346 }, { "epoch": 0.23636919071843165, "grad_norm": 0.5254724621772766, "learning_rate": 2.677231977425589e-05, "loss": 0.0664, "step": 13347 }, { "epoch": 0.23638690025546008, "grad_norm": 0.5229126811027527, "learning_rate": 2.6771786567923504e-05, "loss": 0.0994, "step": 13348 }, { "epoch": 0.2364046097924885, "grad_norm": 0.9923967123031616, "learning_rate": 2.6771253322863122e-05, "loss": 0.0972, "step": 13349 }, { "epoch": 0.23642231932951693, "grad_norm": 0.8214187622070312, "learning_rate": 2.6770720039076512e-05, "loss": 0.0898, "step": 13350 }, { "epoch": 0.23644002886654536, "grad_norm": 0.9273199439048767, "learning_rate": 2.6770186716565423e-05, "loss": 0.0698, "step": 13351 }, { "epoch": 0.2364577384035738, "grad_norm": 0.6105358004570007, "learning_rate": 2.676965335533161e-05, "loss": 0.1032, "step": 13352 }, { "epoch": 0.2364754479406022, "grad_norm": 0.8006720542907715, "learning_rate": 2.676911995537683e-05, "loss": 0.1133, "step": 13353 }, { "epoch": 0.23649315747763064, "grad_norm": 0.673018217086792, "learning_rate": 2.676858651670283e-05, "loss": 0.0979, "step": 13354 }, { "epoch": 0.23651086701465907, "grad_norm": 0.5660648345947266, "learning_rate": 2.6768053039311372e-05, "loss": 0.1199, "step": 13355 }, { "epoch": 0.2365285765516875, "grad_norm": 0.7191390991210938, "learning_rate": 2.6767519523204214e-05, "loss": 0.1003, "step": 13356 }, { "epoch": 0.23654628608871592, "grad_norm": 1.1376186609268188, "learning_rate": 2.6766985968383104e-05, "loss": 0.0887, "step": 13357 }, { "epoch": 0.23656399562574434, "grad_norm": 1.2978287935256958, "learning_rate": 2.6766452374849803e-05, "loss": 0.1164, "step": 13358 }, { "epoch": 0.23658170516277277, "grad_norm": 0.6600121855735779, "learning_rate": 2.6765918742606063e-05, "loss": 0.0689, "step": 13359 }, { "epoch": 0.2365994146998012, "grad_norm": 0.812687873840332, "learning_rate": 2.6765385071653636e-05, "loss": 0.1225, "step": 13360 }, { "epoch": 0.23661712423682965, "grad_norm": 0.9426072835922241, "learning_rate": 2.6764851361994286e-05, "loss": 0.1077, "step": 13361 }, { "epoch": 0.23663483377385808, "grad_norm": 0.5787109732627869, "learning_rate": 2.6764317613629766e-05, "loss": 0.0914, "step": 13362 }, { "epoch": 0.2366525433108865, "grad_norm": 0.8002899289131165, "learning_rate": 2.6763783826561828e-05, "loss": 0.1062, "step": 13363 }, { "epoch": 0.23667025284791493, "grad_norm": 0.8681513667106628, "learning_rate": 2.676325000079223e-05, "loss": 0.0805, "step": 13364 }, { "epoch": 0.23668796238494336, "grad_norm": 0.6581158638000488, "learning_rate": 2.6762716136322734e-05, "loss": 0.0843, "step": 13365 }, { "epoch": 0.23670567192197178, "grad_norm": 0.9135076403617859, "learning_rate": 2.676218223315509e-05, "loss": 0.0916, "step": 13366 }, { "epoch": 0.2367233814590002, "grad_norm": 1.0645956993103027, "learning_rate": 2.6761648291291055e-05, "loss": 0.1182, "step": 13367 }, { "epoch": 0.23674109099602864, "grad_norm": 0.9673126339912415, "learning_rate": 2.676111431073239e-05, "loss": 0.1151, "step": 13368 }, { "epoch": 0.23675880053305706, "grad_norm": 1.0828630924224854, "learning_rate": 2.6760580291480843e-05, "loss": 0.0886, "step": 13369 }, { "epoch": 0.2367765100700855, "grad_norm": 1.2826039791107178, "learning_rate": 2.676004623353818e-05, "loss": 0.1556, "step": 13370 }, { "epoch": 0.23679421960711391, "grad_norm": 0.9122865796089172, "learning_rate": 2.6759512136906155e-05, "loss": 0.1296, "step": 13371 }, { "epoch": 0.23681192914414234, "grad_norm": 0.8353773355484009, "learning_rate": 2.675897800158652e-05, "loss": 0.0649, "step": 13372 }, { "epoch": 0.23682963868117077, "grad_norm": 0.7722958326339722, "learning_rate": 2.6758443827581038e-05, "loss": 0.0737, "step": 13373 }, { "epoch": 0.2368473482181992, "grad_norm": 0.664700448513031, "learning_rate": 2.675790961489147e-05, "loss": 0.1105, "step": 13374 }, { "epoch": 0.23686505775522762, "grad_norm": 1.1354200839996338, "learning_rate": 2.6757375363519565e-05, "loss": 0.1199, "step": 13375 }, { "epoch": 0.23688276729225607, "grad_norm": 0.7630809545516968, "learning_rate": 2.6756841073467084e-05, "loss": 0.1225, "step": 13376 }, { "epoch": 0.2369004768292845, "grad_norm": 0.5661141276359558, "learning_rate": 2.6756306744735782e-05, "loss": 0.1596, "step": 13377 }, { "epoch": 0.23691818636631293, "grad_norm": 0.33571043610572815, "learning_rate": 2.6755772377327427e-05, "loss": 0.1129, "step": 13378 }, { "epoch": 0.23693589590334135, "grad_norm": 0.7612356543540955, "learning_rate": 2.675523797124377e-05, "loss": 0.0865, "step": 13379 }, { "epoch": 0.23695360544036978, "grad_norm": 0.9177390933036804, "learning_rate": 2.675470352648656e-05, "loss": 0.1136, "step": 13380 }, { "epoch": 0.2369713149773982, "grad_norm": 0.959240734577179, "learning_rate": 2.675416904305757e-05, "loss": 0.1136, "step": 13381 }, { "epoch": 0.23698902451442663, "grad_norm": 0.44911959767341614, "learning_rate": 2.6753634520958555e-05, "loss": 0.1087, "step": 13382 }, { "epoch": 0.23700673405145506, "grad_norm": 1.294376015663147, "learning_rate": 2.675309996019127e-05, "loss": 0.1433, "step": 13383 }, { "epoch": 0.23702444358848349, "grad_norm": 1.2473584413528442, "learning_rate": 2.6752565360757468e-05, "loss": 0.1035, "step": 13384 }, { "epoch": 0.2370421531255119, "grad_norm": 0.7042420506477356, "learning_rate": 2.675203072265892e-05, "loss": 0.078, "step": 13385 }, { "epoch": 0.23705986266254034, "grad_norm": 0.9901505708694458, "learning_rate": 2.6751496045897384e-05, "loss": 0.1172, "step": 13386 }, { "epoch": 0.23707757219956876, "grad_norm": 0.9577459692955017, "learning_rate": 2.675096133047461e-05, "loss": 0.0896, "step": 13387 }, { "epoch": 0.2370952817365972, "grad_norm": 0.6405777335166931, "learning_rate": 2.6750426576392363e-05, "loss": 0.0938, "step": 13388 }, { "epoch": 0.23711299127362562, "grad_norm": 0.7711221575737, "learning_rate": 2.6749891783652397e-05, "loss": 0.0863, "step": 13389 }, { "epoch": 0.23713070081065404, "grad_norm": 0.7331221699714661, "learning_rate": 2.6749356952256477e-05, "loss": 0.0806, "step": 13390 }, { "epoch": 0.2371484103476825, "grad_norm": 0.722869873046875, "learning_rate": 2.6748822082206364e-05, "loss": 0.0955, "step": 13391 }, { "epoch": 0.23716611988471092, "grad_norm": 0.974371612071991, "learning_rate": 2.6748287173503817e-05, "loss": 0.1521, "step": 13392 }, { "epoch": 0.23718382942173935, "grad_norm": 0.892162561416626, "learning_rate": 2.6747752226150587e-05, "loss": 0.1205, "step": 13393 }, { "epoch": 0.23720153895876778, "grad_norm": 1.2011975049972534, "learning_rate": 2.6747217240148444e-05, "loss": 0.1108, "step": 13394 }, { "epoch": 0.2372192484957962, "grad_norm": 1.043265461921692, "learning_rate": 2.6746682215499144e-05, "loss": 0.139, "step": 13395 }, { "epoch": 0.23723695803282463, "grad_norm": 0.9639753699302673, "learning_rate": 2.6746147152204448e-05, "loss": 0.0982, "step": 13396 }, { "epoch": 0.23725466756985306, "grad_norm": 0.7526950240135193, "learning_rate": 2.6745612050266116e-05, "loss": 0.1013, "step": 13397 }, { "epoch": 0.23727237710688148, "grad_norm": 1.2818984985351562, "learning_rate": 2.6745076909685904e-05, "loss": 0.1112, "step": 13398 }, { "epoch": 0.2372900866439099, "grad_norm": 0.9447178840637207, "learning_rate": 2.674454173046558e-05, "loss": 0.1615, "step": 13399 }, { "epoch": 0.23730779618093834, "grad_norm": 0.9446244835853577, "learning_rate": 2.6744006512606904e-05, "loss": 0.1016, "step": 13400 }, { "epoch": 0.23732550571796676, "grad_norm": 0.9085255265235901, "learning_rate": 2.6743471256111633e-05, "loss": 0.0824, "step": 13401 }, { "epoch": 0.2373432152549952, "grad_norm": 0.6523481607437134, "learning_rate": 2.674293596098153e-05, "loss": 0.106, "step": 13402 }, { "epoch": 0.23736092479202361, "grad_norm": 0.8372043967247009, "learning_rate": 2.6742400627218353e-05, "loss": 0.1373, "step": 13403 }, { "epoch": 0.23737863432905204, "grad_norm": 1.5019513368606567, "learning_rate": 2.674186525482386e-05, "loss": 0.1254, "step": 13404 }, { "epoch": 0.2373963438660805, "grad_norm": 0.8134528398513794, "learning_rate": 2.674132984379983e-05, "loss": 0.1045, "step": 13405 }, { "epoch": 0.23741405340310892, "grad_norm": 0.9381694793701172, "learning_rate": 2.674079439414801e-05, "loss": 0.0944, "step": 13406 }, { "epoch": 0.23743176294013735, "grad_norm": 0.9135011434555054, "learning_rate": 2.6740258905870157e-05, "loss": 0.1037, "step": 13407 }, { "epoch": 0.23744947247716577, "grad_norm": 1.0819586515426636, "learning_rate": 2.6739723378968047e-05, "loss": 0.1059, "step": 13408 }, { "epoch": 0.2374671820141942, "grad_norm": 0.7876102924346924, "learning_rate": 2.673918781344343e-05, "loss": 0.0917, "step": 13409 }, { "epoch": 0.23748489155122263, "grad_norm": 1.0919204950332642, "learning_rate": 2.673865220929807e-05, "loss": 0.0997, "step": 13410 }, { "epoch": 0.23750260108825105, "grad_norm": 0.8106171488761902, "learning_rate": 2.673811656653374e-05, "loss": 0.0932, "step": 13411 }, { "epoch": 0.23752031062527948, "grad_norm": 0.8004403114318848, "learning_rate": 2.6737580885152186e-05, "loss": 0.0901, "step": 13412 }, { "epoch": 0.2375380201623079, "grad_norm": 0.76795893907547, "learning_rate": 2.6737045165155183e-05, "loss": 0.0672, "step": 13413 }, { "epoch": 0.23755572969933633, "grad_norm": 0.9793534874916077, "learning_rate": 2.6736509406544487e-05, "loss": 0.0854, "step": 13414 }, { "epoch": 0.23757343923636476, "grad_norm": 1.19434654712677, "learning_rate": 2.6735973609321857e-05, "loss": 0.1101, "step": 13415 }, { "epoch": 0.23759114877339318, "grad_norm": 0.5333757996559143, "learning_rate": 2.6735437773489067e-05, "loss": 0.1007, "step": 13416 }, { "epoch": 0.2376088583104216, "grad_norm": 2.561891555786133, "learning_rate": 2.673490189904787e-05, "loss": 0.138, "step": 13417 }, { "epoch": 0.23762656784745004, "grad_norm": 0.8734784126281738, "learning_rate": 2.673436598600004e-05, "loss": 0.112, "step": 13418 }, { "epoch": 0.23764427738447846, "grad_norm": 0.7703092694282532, "learning_rate": 2.6733830034347325e-05, "loss": 0.0939, "step": 13419 }, { "epoch": 0.23766198692150692, "grad_norm": 0.953413724899292, "learning_rate": 2.6733294044091498e-05, "loss": 0.1079, "step": 13420 }, { "epoch": 0.23767969645853534, "grad_norm": 0.8414058089256287, "learning_rate": 2.6732758015234322e-05, "loss": 0.0729, "step": 13421 }, { "epoch": 0.23769740599556377, "grad_norm": 0.5235171318054199, "learning_rate": 2.6732221947777557e-05, "loss": 0.0865, "step": 13422 }, { "epoch": 0.2377151155325922, "grad_norm": 0.8494910001754761, "learning_rate": 2.6731685841722968e-05, "loss": 0.0982, "step": 13423 }, { "epoch": 0.23773282506962062, "grad_norm": 0.6540126800537109, "learning_rate": 2.6731149697072323e-05, "loss": 0.0901, "step": 13424 }, { "epoch": 0.23775053460664905, "grad_norm": 0.5477200746536255, "learning_rate": 2.6730613513827377e-05, "loss": 0.0735, "step": 13425 }, { "epoch": 0.23776824414367748, "grad_norm": 1.1655161380767822, "learning_rate": 2.6730077291989903e-05, "loss": 0.1512, "step": 13426 }, { "epoch": 0.2377859536807059, "grad_norm": 0.6916848421096802, "learning_rate": 2.6729541031561658e-05, "loss": 0.0632, "step": 13427 }, { "epoch": 0.23780366321773433, "grad_norm": 1.8489519357681274, "learning_rate": 2.6729004732544412e-05, "loss": 0.127, "step": 13428 }, { "epoch": 0.23782137275476276, "grad_norm": 0.9204082489013672, "learning_rate": 2.6728468394939926e-05, "loss": 0.135, "step": 13429 }, { "epoch": 0.23783908229179118, "grad_norm": 0.8572909832000732, "learning_rate": 2.6727932018749965e-05, "loss": 0.1012, "step": 13430 }, { "epoch": 0.2378567918288196, "grad_norm": 0.7861841320991516, "learning_rate": 2.672739560397629e-05, "loss": 0.0729, "step": 13431 }, { "epoch": 0.23787450136584803, "grad_norm": 1.0518498420715332, "learning_rate": 2.6726859150620674e-05, "loss": 0.0974, "step": 13432 }, { "epoch": 0.23789221090287646, "grad_norm": 0.6925858855247498, "learning_rate": 2.6726322658684877e-05, "loss": 0.0946, "step": 13433 }, { "epoch": 0.2379099204399049, "grad_norm": 0.8205441236495972, "learning_rate": 2.672578612817067e-05, "loss": 0.0889, "step": 13434 }, { "epoch": 0.23792762997693334, "grad_norm": 0.6119678616523743, "learning_rate": 2.6725249559079804e-05, "loss": 0.0833, "step": 13435 }, { "epoch": 0.23794533951396177, "grad_norm": 0.7776346206665039, "learning_rate": 2.6724712951414055e-05, "loss": 0.0859, "step": 13436 }, { "epoch": 0.2379630490509902, "grad_norm": 0.6743221282958984, "learning_rate": 2.6724176305175188e-05, "loss": 0.1045, "step": 13437 }, { "epoch": 0.23798075858801862, "grad_norm": 1.0888603925704956, "learning_rate": 2.6723639620364968e-05, "loss": 0.1044, "step": 13438 }, { "epoch": 0.23799846812504705, "grad_norm": 0.9803746938705444, "learning_rate": 2.6723102896985154e-05, "loss": 0.1408, "step": 13439 }, { "epoch": 0.23801617766207547, "grad_norm": 0.632588803768158, "learning_rate": 2.6722566135037524e-05, "loss": 0.0825, "step": 13440 }, { "epoch": 0.2380338871991039, "grad_norm": 0.6526612043380737, "learning_rate": 2.6722029334523833e-05, "loss": 0.0924, "step": 13441 }, { "epoch": 0.23805159673613233, "grad_norm": 0.6332026720046997, "learning_rate": 2.672149249544585e-05, "loss": 0.0933, "step": 13442 }, { "epoch": 0.23806930627316075, "grad_norm": 1.085466980934143, "learning_rate": 2.672095561780534e-05, "loss": 0.0962, "step": 13443 }, { "epoch": 0.23808701581018918, "grad_norm": 1.3702152967453003, "learning_rate": 2.672041870160408e-05, "loss": 0.1614, "step": 13444 }, { "epoch": 0.2381047253472176, "grad_norm": 1.1040308475494385, "learning_rate": 2.6719881746843826e-05, "loss": 0.1276, "step": 13445 }, { "epoch": 0.23812243488424603, "grad_norm": 1.0921266078948975, "learning_rate": 2.671934475352634e-05, "loss": 0.1454, "step": 13446 }, { "epoch": 0.23814014442127446, "grad_norm": 0.5718269348144531, "learning_rate": 2.67188077216534e-05, "loss": 0.0587, "step": 13447 }, { "epoch": 0.23815785395830288, "grad_norm": 0.817604660987854, "learning_rate": 2.6718270651226766e-05, "loss": 0.1001, "step": 13448 }, { "epoch": 0.2381755634953313, "grad_norm": 1.012600064277649, "learning_rate": 2.671773354224821e-05, "loss": 0.1164, "step": 13449 }, { "epoch": 0.23819327303235976, "grad_norm": 0.7824103832244873, "learning_rate": 2.6717196394719493e-05, "loss": 0.0925, "step": 13450 }, { "epoch": 0.2382109825693882, "grad_norm": 1.0140535831451416, "learning_rate": 2.671665920864238e-05, "loss": 0.1302, "step": 13451 }, { "epoch": 0.23822869210641662, "grad_norm": 0.7615099549293518, "learning_rate": 2.6716121984018657e-05, "loss": 0.1237, "step": 13452 }, { "epoch": 0.23824640164344504, "grad_norm": 0.8158119320869446, "learning_rate": 2.6715584720850068e-05, "loss": 0.0754, "step": 13453 }, { "epoch": 0.23826411118047347, "grad_norm": 1.1189026832580566, "learning_rate": 2.6715047419138394e-05, "loss": 0.0818, "step": 13454 }, { "epoch": 0.2382818207175019, "grad_norm": 0.998916745185852, "learning_rate": 2.6714510078885392e-05, "loss": 0.1227, "step": 13455 }, { "epoch": 0.23829953025453032, "grad_norm": 1.1007134914398193, "learning_rate": 2.671397270009284e-05, "loss": 0.0852, "step": 13456 }, { "epoch": 0.23831723979155875, "grad_norm": 0.8323226571083069, "learning_rate": 2.6713435282762505e-05, "loss": 0.1071, "step": 13457 }, { "epoch": 0.23833494932858718, "grad_norm": 0.716346263885498, "learning_rate": 2.6712897826896153e-05, "loss": 0.0941, "step": 13458 }, { "epoch": 0.2383526588656156, "grad_norm": 1.2297067642211914, "learning_rate": 2.6712360332495552e-05, "loss": 0.1637, "step": 13459 }, { "epoch": 0.23837036840264403, "grad_norm": 1.0326615571975708, "learning_rate": 2.6711822799562467e-05, "loss": 0.1352, "step": 13460 }, { "epoch": 0.23838807793967245, "grad_norm": 0.8882282376289368, "learning_rate": 2.671128522809867e-05, "loss": 0.0926, "step": 13461 }, { "epoch": 0.23840578747670088, "grad_norm": 1.2798190116882324, "learning_rate": 2.6710747618105932e-05, "loss": 0.1157, "step": 13462 }, { "epoch": 0.2384234970137293, "grad_norm": 0.8053379058837891, "learning_rate": 2.6710209969586016e-05, "loss": 0.1257, "step": 13463 }, { "epoch": 0.23844120655075773, "grad_norm": 0.5780537724494934, "learning_rate": 2.6709672282540694e-05, "loss": 0.0715, "step": 13464 }, { "epoch": 0.2384589160877862, "grad_norm": 1.631105661392212, "learning_rate": 2.6709134556971733e-05, "loss": 0.0862, "step": 13465 }, { "epoch": 0.23847662562481461, "grad_norm": 0.9794114232063293, "learning_rate": 2.6708596792880907e-05, "loss": 0.1268, "step": 13466 }, { "epoch": 0.23849433516184304, "grad_norm": 0.630269467830658, "learning_rate": 2.6708058990269984e-05, "loss": 0.0977, "step": 13467 }, { "epoch": 0.23851204469887147, "grad_norm": 1.2230679988861084, "learning_rate": 2.6707521149140725e-05, "loss": 0.0679, "step": 13468 }, { "epoch": 0.2385297542358999, "grad_norm": 0.8224474191665649, "learning_rate": 2.670698326949491e-05, "loss": 0.0722, "step": 13469 }, { "epoch": 0.23854746377292832, "grad_norm": 0.9314221739768982, "learning_rate": 2.67064453513343e-05, "loss": 0.0855, "step": 13470 }, { "epoch": 0.23856517330995675, "grad_norm": 0.8769263029098511, "learning_rate": 2.6705907394660674e-05, "loss": 0.0826, "step": 13471 }, { "epoch": 0.23858288284698517, "grad_norm": 1.1654717922210693, "learning_rate": 2.6705369399475793e-05, "loss": 0.119, "step": 13472 }, { "epoch": 0.2386005923840136, "grad_norm": 1.2665687799453735, "learning_rate": 2.670483136578143e-05, "loss": 0.0976, "step": 13473 }, { "epoch": 0.23861830192104203, "grad_norm": 0.6000884771347046, "learning_rate": 2.670429329357936e-05, "loss": 0.0879, "step": 13474 }, { "epoch": 0.23863601145807045, "grad_norm": 0.7687239646911621, "learning_rate": 2.670375518287134e-05, "loss": 0.0891, "step": 13475 }, { "epoch": 0.23865372099509888, "grad_norm": 0.8471599817276001, "learning_rate": 2.670321703365916e-05, "loss": 0.1177, "step": 13476 }, { "epoch": 0.2386714305321273, "grad_norm": 1.2103034257888794, "learning_rate": 2.6702678845944575e-05, "loss": 0.0624, "step": 13477 }, { "epoch": 0.23868914006915573, "grad_norm": 0.7720752358436584, "learning_rate": 2.6702140619729358e-05, "loss": 0.0889, "step": 13478 }, { "epoch": 0.23870684960618416, "grad_norm": 1.2135437726974487, "learning_rate": 2.6701602355015283e-05, "loss": 0.147, "step": 13479 }, { "epoch": 0.2387245591432126, "grad_norm": 0.7132299542427063, "learning_rate": 2.6701064051804124e-05, "loss": 0.108, "step": 13480 }, { "epoch": 0.23874226868024104, "grad_norm": 0.8698880672454834, "learning_rate": 2.6700525710097645e-05, "loss": 0.127, "step": 13481 }, { "epoch": 0.23875997821726946, "grad_norm": 0.9707176089286804, "learning_rate": 2.6699987329897618e-05, "loss": 0.1061, "step": 13482 }, { "epoch": 0.2387776877542979, "grad_norm": 0.7988152503967285, "learning_rate": 2.669944891120582e-05, "loss": 0.084, "step": 13483 }, { "epoch": 0.23879539729132632, "grad_norm": 0.8539696931838989, "learning_rate": 2.6698910454024015e-05, "loss": 0.1178, "step": 13484 }, { "epoch": 0.23881310682835474, "grad_norm": 0.6422919034957886, "learning_rate": 2.669837195835398e-05, "loss": 0.0927, "step": 13485 }, { "epoch": 0.23883081636538317, "grad_norm": 0.7655261754989624, "learning_rate": 2.6697833424197482e-05, "loss": 0.0764, "step": 13486 }, { "epoch": 0.2388485259024116, "grad_norm": 0.8947840332984924, "learning_rate": 2.6697294851556296e-05, "loss": 0.1057, "step": 13487 }, { "epoch": 0.23886623543944002, "grad_norm": 0.6428353786468506, "learning_rate": 2.6696756240432194e-05, "loss": 0.1045, "step": 13488 }, { "epoch": 0.23888394497646845, "grad_norm": 0.4849996864795685, "learning_rate": 2.6696217590826948e-05, "loss": 0.0824, "step": 13489 }, { "epoch": 0.23890165451349687, "grad_norm": 1.1441823244094849, "learning_rate": 2.669567890274233e-05, "loss": 0.1109, "step": 13490 }, { "epoch": 0.2389193640505253, "grad_norm": 0.847449004650116, "learning_rate": 2.669514017618011e-05, "loss": 0.0952, "step": 13491 }, { "epoch": 0.23893707358755373, "grad_norm": 1.1427949666976929, "learning_rate": 2.6694601411142057e-05, "loss": 0.093, "step": 13492 }, { "epoch": 0.23895478312458215, "grad_norm": 3.417306661605835, "learning_rate": 2.6694062607629952e-05, "loss": 0.1341, "step": 13493 }, { "epoch": 0.23897249266161058, "grad_norm": 0.5231714844703674, "learning_rate": 2.6693523765645564e-05, "loss": 0.0608, "step": 13494 }, { "epoch": 0.23899020219863903, "grad_norm": 0.9541175365447998, "learning_rate": 2.6692984885190666e-05, "loss": 0.0855, "step": 13495 }, { "epoch": 0.23900791173566746, "grad_norm": 0.946562647819519, "learning_rate": 2.6692445966267026e-05, "loss": 0.1491, "step": 13496 }, { "epoch": 0.2390256212726959, "grad_norm": 0.5945079326629639, "learning_rate": 2.669190700887643e-05, "loss": 0.0946, "step": 13497 }, { "epoch": 0.2390433308097243, "grad_norm": 1.1279553174972534, "learning_rate": 2.6691368013020637e-05, "loss": 0.1029, "step": 13498 }, { "epoch": 0.23906104034675274, "grad_norm": 0.9558769464492798, "learning_rate": 2.6690828978701422e-05, "loss": 0.0739, "step": 13499 }, { "epoch": 0.23907874988378117, "grad_norm": 0.8285911083221436, "learning_rate": 2.669028990592057e-05, "loss": 0.0795, "step": 13500 }, { "epoch": 0.2390964594208096, "grad_norm": 0.8234906792640686, "learning_rate": 2.6689750794679843e-05, "loss": 0.0962, "step": 13501 }, { "epoch": 0.23911416895783802, "grad_norm": 0.753410279750824, "learning_rate": 2.668921164498102e-05, "loss": 0.092, "step": 13502 }, { "epoch": 0.23913187849486645, "grad_norm": 1.0043690204620361, "learning_rate": 2.668867245682587e-05, "loss": 0.0903, "step": 13503 }, { "epoch": 0.23914958803189487, "grad_norm": 1.0124363899230957, "learning_rate": 2.6688133230216172e-05, "loss": 0.1032, "step": 13504 }, { "epoch": 0.2391672975689233, "grad_norm": 0.809262752532959, "learning_rate": 2.6687593965153697e-05, "loss": 0.0712, "step": 13505 }, { "epoch": 0.23918500710595172, "grad_norm": 1.0547288656234741, "learning_rate": 2.668705466164022e-05, "loss": 0.0852, "step": 13506 }, { "epoch": 0.23920271664298015, "grad_norm": 1.1662379503250122, "learning_rate": 2.6686515319677516e-05, "loss": 0.1161, "step": 13507 }, { "epoch": 0.23922042618000858, "grad_norm": 0.8346394300460815, "learning_rate": 2.6685975939267358e-05, "loss": 0.0779, "step": 13508 }, { "epoch": 0.239238135717037, "grad_norm": 0.7486051917076111, "learning_rate": 2.668543652041152e-05, "loss": 0.123, "step": 13509 }, { "epoch": 0.23925584525406546, "grad_norm": 0.811364471912384, "learning_rate": 2.668489706311178e-05, "loss": 0.1121, "step": 13510 }, { "epoch": 0.23927355479109388, "grad_norm": 1.108420729637146, "learning_rate": 2.6684357567369906e-05, "loss": 0.1224, "step": 13511 }, { "epoch": 0.2392912643281223, "grad_norm": 0.9118592143058777, "learning_rate": 2.6683818033187682e-05, "loss": 0.0894, "step": 13512 }, { "epoch": 0.23930897386515074, "grad_norm": 0.8216781616210938, "learning_rate": 2.6683278460566876e-05, "loss": 0.1174, "step": 13513 }, { "epoch": 0.23932668340217916, "grad_norm": 0.7853201031684875, "learning_rate": 2.6682738849509267e-05, "loss": 0.1178, "step": 13514 }, { "epoch": 0.2393443929392076, "grad_norm": 0.9944708347320557, "learning_rate": 2.668219920001663e-05, "loss": 0.0721, "step": 13515 }, { "epoch": 0.23936210247623602, "grad_norm": 0.9905447959899902, "learning_rate": 2.668165951209074e-05, "loss": 0.0657, "step": 13516 }, { "epoch": 0.23937981201326444, "grad_norm": 1.0481460094451904, "learning_rate": 2.668111978573337e-05, "loss": 0.0786, "step": 13517 }, { "epoch": 0.23939752155029287, "grad_norm": 1.1215952634811401, "learning_rate": 2.6680580020946293e-05, "loss": 0.1152, "step": 13518 }, { "epoch": 0.2394152310873213, "grad_norm": 0.8410583734512329, "learning_rate": 2.6680040217731294e-05, "loss": 0.0699, "step": 13519 }, { "epoch": 0.23943294062434972, "grad_norm": 0.7896263599395752, "learning_rate": 2.667950037609014e-05, "loss": 0.1443, "step": 13520 }, { "epoch": 0.23945065016137815, "grad_norm": 0.9226348996162415, "learning_rate": 2.6678960496024615e-05, "loss": 0.1236, "step": 13521 }, { "epoch": 0.23946835969840657, "grad_norm": 0.9553388357162476, "learning_rate": 2.6678420577536493e-05, "loss": 0.1163, "step": 13522 }, { "epoch": 0.239486069235435, "grad_norm": 0.8467562198638916, "learning_rate": 2.6677880620627545e-05, "loss": 0.1133, "step": 13523 }, { "epoch": 0.23950377877246343, "grad_norm": 0.7142643332481384, "learning_rate": 2.6677340625299555e-05, "loss": 0.107, "step": 13524 }, { "epoch": 0.23952148830949188, "grad_norm": 0.8796613216400146, "learning_rate": 2.667680059155429e-05, "loss": 0.1143, "step": 13525 }, { "epoch": 0.2395391978465203, "grad_norm": 1.1549049615859985, "learning_rate": 2.6676260519393534e-05, "loss": 0.0953, "step": 13526 }, { "epoch": 0.23955690738354873, "grad_norm": 1.1422207355499268, "learning_rate": 2.6675720408819064e-05, "loss": 0.1613, "step": 13527 }, { "epoch": 0.23957461692057716, "grad_norm": 0.9897768497467041, "learning_rate": 2.6675180259832653e-05, "loss": 0.0693, "step": 13528 }, { "epoch": 0.2395923264576056, "grad_norm": 1.3882646560668945, "learning_rate": 2.6674640072436082e-05, "loss": 0.1273, "step": 13529 }, { "epoch": 0.239610035994634, "grad_norm": 0.7628322839736938, "learning_rate": 2.667409984663112e-05, "loss": 0.0971, "step": 13530 }, { "epoch": 0.23962774553166244, "grad_norm": 1.020856499671936, "learning_rate": 2.6673559582419556e-05, "loss": 0.1166, "step": 13531 }, { "epoch": 0.23964545506869087, "grad_norm": 1.044742226600647, "learning_rate": 2.6673019279803157e-05, "loss": 0.0841, "step": 13532 }, { "epoch": 0.2396631646057193, "grad_norm": 1.2676550149917603, "learning_rate": 2.667247893878371e-05, "loss": 0.1679, "step": 13533 }, { "epoch": 0.23968087414274772, "grad_norm": 1.196285367012024, "learning_rate": 2.6671938559362986e-05, "loss": 0.1342, "step": 13534 }, { "epoch": 0.23969858367977614, "grad_norm": 1.019358515739441, "learning_rate": 2.667139814154276e-05, "loss": 0.1312, "step": 13535 }, { "epoch": 0.23971629321680457, "grad_norm": 0.7092589735984802, "learning_rate": 2.667085768532482e-05, "loss": 0.0718, "step": 13536 }, { "epoch": 0.239734002753833, "grad_norm": 1.0594089031219482, "learning_rate": 2.6670317190710936e-05, "loss": 0.0973, "step": 13537 }, { "epoch": 0.23975171229086142, "grad_norm": 0.98343425989151, "learning_rate": 2.6669776657702894e-05, "loss": 0.0715, "step": 13538 }, { "epoch": 0.23976942182788988, "grad_norm": 1.1417397260665894, "learning_rate": 2.6669236086302466e-05, "loss": 0.1364, "step": 13539 }, { "epoch": 0.2397871313649183, "grad_norm": 0.7438361644744873, "learning_rate": 2.666869547651143e-05, "loss": 0.0923, "step": 13540 }, { "epoch": 0.23980484090194673, "grad_norm": 0.6927298903465271, "learning_rate": 2.6668154828331565e-05, "loss": 0.1028, "step": 13541 }, { "epoch": 0.23982255043897516, "grad_norm": 0.5538045763969421, "learning_rate": 2.6667614141764652e-05, "loss": 0.082, "step": 13542 }, { "epoch": 0.23984025997600358, "grad_norm": 1.0730671882629395, "learning_rate": 2.6667073416812466e-05, "loss": 0.1325, "step": 13543 }, { "epoch": 0.239857969513032, "grad_norm": 1.352698564529419, "learning_rate": 2.6666532653476788e-05, "loss": 0.1077, "step": 13544 }, { "epoch": 0.23987567905006044, "grad_norm": 1.1947404146194458, "learning_rate": 2.66659918517594e-05, "loss": 0.0856, "step": 13545 }, { "epoch": 0.23989338858708886, "grad_norm": 1.3399720191955566, "learning_rate": 2.666545101166208e-05, "loss": 0.1226, "step": 13546 }, { "epoch": 0.2399110981241173, "grad_norm": 0.6737781763076782, "learning_rate": 2.6664910133186604e-05, "loss": 0.0812, "step": 13547 }, { "epoch": 0.23992880766114572, "grad_norm": 1.0599125623703003, "learning_rate": 2.6664369216334753e-05, "loss": 0.1449, "step": 13548 }, { "epoch": 0.23994651719817414, "grad_norm": 0.7431229948997498, "learning_rate": 2.666382826110831e-05, "loss": 0.0849, "step": 13549 }, { "epoch": 0.23996422673520257, "grad_norm": 1.0782480239868164, "learning_rate": 2.6663287267509048e-05, "loss": 0.1166, "step": 13550 }, { "epoch": 0.239981936272231, "grad_norm": 0.9430698156356812, "learning_rate": 2.666274623553876e-05, "loss": 0.0702, "step": 13551 }, { "epoch": 0.23999964580925942, "grad_norm": 1.0860408544540405, "learning_rate": 2.6662205165199205e-05, "loss": 0.1562, "step": 13552 }, { "epoch": 0.24001735534628785, "grad_norm": 0.7410164475440979, "learning_rate": 2.6661664056492175e-05, "loss": 0.0812, "step": 13553 }, { "epoch": 0.2400350648833163, "grad_norm": 0.8851836919784546, "learning_rate": 2.6661122909419453e-05, "loss": 0.1095, "step": 13554 }, { "epoch": 0.24005277442034473, "grad_norm": 0.6343907713890076, "learning_rate": 2.666058172398282e-05, "loss": 0.0973, "step": 13555 }, { "epoch": 0.24007048395737315, "grad_norm": 0.5871973633766174, "learning_rate": 2.666004050018405e-05, "loss": 0.0754, "step": 13556 }, { "epoch": 0.24008819349440158, "grad_norm": 1.0366562604904175, "learning_rate": 2.6659499238024923e-05, "loss": 0.1359, "step": 13557 }, { "epoch": 0.24010590303143, "grad_norm": 0.7924374341964722, "learning_rate": 2.6658957937507226e-05, "loss": 0.0818, "step": 13558 }, { "epoch": 0.24012361256845843, "grad_norm": 0.9467371106147766, "learning_rate": 2.6658416598632732e-05, "loss": 0.1005, "step": 13559 }, { "epoch": 0.24014132210548686, "grad_norm": 0.9189634919166565, "learning_rate": 2.6657875221403234e-05, "loss": 0.0734, "step": 13560 }, { "epoch": 0.24015903164251529, "grad_norm": 0.6648213267326355, "learning_rate": 2.66573338058205e-05, "loss": 0.0933, "step": 13561 }, { "epoch": 0.2401767411795437, "grad_norm": 1.8273429870605469, "learning_rate": 2.665679235188632e-05, "loss": 0.104, "step": 13562 }, { "epoch": 0.24019445071657214, "grad_norm": 1.3872357606887817, "learning_rate": 2.6656250859602466e-05, "loss": 0.0899, "step": 13563 }, { "epoch": 0.24021216025360057, "grad_norm": 0.733712375164032, "learning_rate": 2.6655709328970735e-05, "loss": 0.0841, "step": 13564 }, { "epoch": 0.240229869790629, "grad_norm": 0.8239594101905823, "learning_rate": 2.6655167759992894e-05, "loss": 0.0958, "step": 13565 }, { "epoch": 0.24024757932765742, "grad_norm": 0.7583775520324707, "learning_rate": 2.6654626152670728e-05, "loss": 0.1039, "step": 13566 }, { "epoch": 0.24026528886468584, "grad_norm": 0.7599239349365234, "learning_rate": 2.6654084507006023e-05, "loss": 0.085, "step": 13567 }, { "epoch": 0.24028299840171427, "grad_norm": 0.6382877230644226, "learning_rate": 2.665354282300056e-05, "loss": 0.1044, "step": 13568 }, { "epoch": 0.24030070793874272, "grad_norm": 0.835988461971283, "learning_rate": 2.665300110065612e-05, "loss": 0.0952, "step": 13569 }, { "epoch": 0.24031841747577115, "grad_norm": 1.5522304773330688, "learning_rate": 2.6652459339974482e-05, "loss": 0.1237, "step": 13570 }, { "epoch": 0.24033612701279958, "grad_norm": 0.842918872833252, "learning_rate": 2.6651917540957437e-05, "loss": 0.1163, "step": 13571 }, { "epoch": 0.240353836549828, "grad_norm": 1.463687777519226, "learning_rate": 2.6651375703606754e-05, "loss": 0.079, "step": 13572 }, { "epoch": 0.24037154608685643, "grad_norm": 0.8509623408317566, "learning_rate": 2.665083382792423e-05, "loss": 0.1138, "step": 13573 }, { "epoch": 0.24038925562388486, "grad_norm": 0.8028706312179565, "learning_rate": 2.6650291913911636e-05, "loss": 0.1018, "step": 13574 }, { "epoch": 0.24040696516091328, "grad_norm": 1.0173108577728271, "learning_rate": 2.6649749961570762e-05, "loss": 0.0889, "step": 13575 }, { "epoch": 0.2404246746979417, "grad_norm": 0.8349684476852417, "learning_rate": 2.664920797090339e-05, "loss": 0.0866, "step": 13576 }, { "epoch": 0.24044238423497014, "grad_norm": 0.8875943422317505, "learning_rate": 2.6648665941911302e-05, "loss": 0.0692, "step": 13577 }, { "epoch": 0.24046009377199856, "grad_norm": 0.6665690541267395, "learning_rate": 2.6648123874596276e-05, "loss": 0.1334, "step": 13578 }, { "epoch": 0.240477803309027, "grad_norm": 0.7724396586418152, "learning_rate": 2.664758176896011e-05, "loss": 0.142, "step": 13579 }, { "epoch": 0.24049551284605541, "grad_norm": 0.5792407989501953, "learning_rate": 2.6647039625004576e-05, "loss": 0.1044, "step": 13580 }, { "epoch": 0.24051322238308384, "grad_norm": 0.6747071743011475, "learning_rate": 2.6646497442731455e-05, "loss": 0.1044, "step": 13581 }, { "epoch": 0.24053093192011227, "grad_norm": 0.7458593845367432, "learning_rate": 2.6645955222142534e-05, "loss": 0.0892, "step": 13582 }, { "epoch": 0.2405486414571407, "grad_norm": 1.0624765157699585, "learning_rate": 2.6645412963239603e-05, "loss": 0.1456, "step": 13583 }, { "epoch": 0.24056635099416915, "grad_norm": 1.2597053050994873, "learning_rate": 2.664487066602444e-05, "loss": 0.104, "step": 13584 }, { "epoch": 0.24058406053119757, "grad_norm": 0.9863049983978271, "learning_rate": 2.664432833049883e-05, "loss": 0.0964, "step": 13585 }, { "epoch": 0.240601770068226, "grad_norm": 0.9529504179954529, "learning_rate": 2.6643785956664553e-05, "loss": 0.1301, "step": 13586 }, { "epoch": 0.24061947960525443, "grad_norm": 0.7933729887008667, "learning_rate": 2.6643243544523404e-05, "loss": 0.0935, "step": 13587 }, { "epoch": 0.24063718914228285, "grad_norm": 1.3015899658203125, "learning_rate": 2.6642701094077165e-05, "loss": 0.1167, "step": 13588 }, { "epoch": 0.24065489867931128, "grad_norm": 0.745570719242096, "learning_rate": 2.664215860532761e-05, "loss": 0.1058, "step": 13589 }, { "epoch": 0.2406726082163397, "grad_norm": 1.0524804592132568, "learning_rate": 2.664161607827653e-05, "loss": 0.1195, "step": 13590 }, { "epoch": 0.24069031775336813, "grad_norm": 0.5805643200874329, "learning_rate": 2.664107351292572e-05, "loss": 0.0845, "step": 13591 }, { "epoch": 0.24070802729039656, "grad_norm": 0.7836437821388245, "learning_rate": 2.6640530909276945e-05, "loss": 0.0743, "step": 13592 }, { "epoch": 0.24072573682742499, "grad_norm": 0.8656414151191711, "learning_rate": 2.6639988267332004e-05, "loss": 0.0966, "step": 13593 }, { "epoch": 0.2407434463644534, "grad_norm": 0.8715784549713135, "learning_rate": 2.6639445587092682e-05, "loss": 0.1198, "step": 13594 }, { "epoch": 0.24076115590148184, "grad_norm": 0.7908035516738892, "learning_rate": 2.6638902868560753e-05, "loss": 0.0878, "step": 13595 }, { "epoch": 0.24077886543851026, "grad_norm": 0.8057200908660889, "learning_rate": 2.663836011173802e-05, "loss": 0.1316, "step": 13596 }, { "epoch": 0.2407965749755387, "grad_norm": 0.718299150466919, "learning_rate": 2.6637817316626256e-05, "loss": 0.0675, "step": 13597 }, { "epoch": 0.24081428451256712, "grad_norm": 0.8501506447792053, "learning_rate": 2.663727448322725e-05, "loss": 0.0981, "step": 13598 }, { "epoch": 0.24083199404959557, "grad_norm": 0.9281145930290222, "learning_rate": 2.6636731611542785e-05, "loss": 0.0932, "step": 13599 }, { "epoch": 0.240849703586624, "grad_norm": 1.0255155563354492, "learning_rate": 2.6636188701574654e-05, "loss": 0.0904, "step": 13600 }, { "epoch": 0.24086741312365242, "grad_norm": 0.6700039505958557, "learning_rate": 2.6635645753324638e-05, "loss": 0.0706, "step": 13601 }, { "epoch": 0.24088512266068085, "grad_norm": 0.9787507057189941, "learning_rate": 2.6635102766794526e-05, "loss": 0.1348, "step": 13602 }, { "epoch": 0.24090283219770928, "grad_norm": 1.123632550239563, "learning_rate": 2.6634559741986098e-05, "loss": 0.1109, "step": 13603 }, { "epoch": 0.2409205417347377, "grad_norm": 1.143872618675232, "learning_rate": 2.663401667890115e-05, "loss": 0.1053, "step": 13604 }, { "epoch": 0.24093825127176613, "grad_norm": 0.8340252637863159, "learning_rate": 2.663347357754146e-05, "loss": 0.0982, "step": 13605 }, { "epoch": 0.24095596080879456, "grad_norm": 0.9821324944496155, "learning_rate": 2.6632930437908823e-05, "loss": 0.0863, "step": 13606 }, { "epoch": 0.24097367034582298, "grad_norm": 1.001394271850586, "learning_rate": 2.6632387260005017e-05, "loss": 0.0867, "step": 13607 }, { "epoch": 0.2409913798828514, "grad_norm": 0.7401909828186035, "learning_rate": 2.663184404383184e-05, "loss": 0.0905, "step": 13608 }, { "epoch": 0.24100908941987984, "grad_norm": 1.2737959623336792, "learning_rate": 2.6631300789391068e-05, "loss": 0.0776, "step": 13609 }, { "epoch": 0.24102679895690826, "grad_norm": 1.2014411687850952, "learning_rate": 2.663075749668449e-05, "loss": 0.1208, "step": 13610 }, { "epoch": 0.2410445084939367, "grad_norm": 0.6822860836982727, "learning_rate": 2.6630214165713898e-05, "loss": 0.0894, "step": 13611 }, { "epoch": 0.24106221803096511, "grad_norm": 0.7605714201927185, "learning_rate": 2.662967079648108e-05, "loss": 0.0914, "step": 13612 }, { "epoch": 0.24107992756799354, "grad_norm": 0.4593102037906647, "learning_rate": 2.6629127388987822e-05, "loss": 0.0879, "step": 13613 }, { "epoch": 0.241097637105022, "grad_norm": 0.848193883895874, "learning_rate": 2.6628583943235908e-05, "loss": 0.1149, "step": 13614 }, { "epoch": 0.24111534664205042, "grad_norm": 0.6734873652458191, "learning_rate": 2.6628040459227128e-05, "loss": 0.085, "step": 13615 }, { "epoch": 0.24113305617907885, "grad_norm": 1.3458327054977417, "learning_rate": 2.6627496936963273e-05, "loss": 0.0774, "step": 13616 }, { "epoch": 0.24115076571610727, "grad_norm": 1.2088173627853394, "learning_rate": 2.662695337644613e-05, "loss": 0.1081, "step": 13617 }, { "epoch": 0.2411684752531357, "grad_norm": 0.6628804206848145, "learning_rate": 2.662640977767749e-05, "loss": 0.0588, "step": 13618 }, { "epoch": 0.24118618479016413, "grad_norm": 0.8009606599807739, "learning_rate": 2.662586614065913e-05, "loss": 0.1255, "step": 13619 }, { "epoch": 0.24120389432719255, "grad_norm": 1.4252146482467651, "learning_rate": 2.6625322465392855e-05, "loss": 0.1167, "step": 13620 }, { "epoch": 0.24122160386422098, "grad_norm": 0.7757599353790283, "learning_rate": 2.6624778751880433e-05, "loss": 0.0789, "step": 13621 }, { "epoch": 0.2412393134012494, "grad_norm": 0.8070432543754578, "learning_rate": 2.6624235000123674e-05, "loss": 0.1055, "step": 13622 }, { "epoch": 0.24125702293827783, "grad_norm": 1.0753004550933838, "learning_rate": 2.6623691210124357e-05, "loss": 0.1125, "step": 13623 }, { "epoch": 0.24127473247530626, "grad_norm": 1.1565356254577637, "learning_rate": 2.662314738188427e-05, "loss": 0.1365, "step": 13624 }, { "epoch": 0.24129244201233468, "grad_norm": 1.1473803520202637, "learning_rate": 2.6622603515405203e-05, "loss": 0.1235, "step": 13625 }, { "epoch": 0.2413101515493631, "grad_norm": 0.891116201877594, "learning_rate": 2.6622059610688944e-05, "loss": 0.0566, "step": 13626 }, { "epoch": 0.24132786108639154, "grad_norm": 0.875775933265686, "learning_rate": 2.662151566773729e-05, "loss": 0.0825, "step": 13627 }, { "epoch": 0.24134557062341996, "grad_norm": 0.7359017133712769, "learning_rate": 2.662097168655202e-05, "loss": 0.0997, "step": 13628 }, { "epoch": 0.24136328016044842, "grad_norm": 1.1045901775360107, "learning_rate": 2.6620427667134934e-05, "loss": 0.116, "step": 13629 }, { "epoch": 0.24138098969747684, "grad_norm": 0.7817234396934509, "learning_rate": 2.6619883609487808e-05, "loss": 0.0895, "step": 13630 }, { "epoch": 0.24139869923450527, "grad_norm": 0.7823805212974548, "learning_rate": 2.6619339513612445e-05, "loss": 0.1193, "step": 13631 }, { "epoch": 0.2414164087715337, "grad_norm": 0.7944871783256531, "learning_rate": 2.6618795379510628e-05, "loss": 0.0569, "step": 13632 }, { "epoch": 0.24143411830856212, "grad_norm": 1.0866910219192505, "learning_rate": 2.6618251207184155e-05, "loss": 0.0934, "step": 13633 }, { "epoch": 0.24145182784559055, "grad_norm": 0.9638246893882751, "learning_rate": 2.6617706996634803e-05, "loss": 0.1497, "step": 13634 }, { "epoch": 0.24146953738261898, "grad_norm": 1.4420355558395386, "learning_rate": 2.6617162747864375e-05, "loss": 0.1448, "step": 13635 }, { "epoch": 0.2414872469196474, "grad_norm": 0.594124436378479, "learning_rate": 2.6616618460874656e-05, "loss": 0.0808, "step": 13636 }, { "epoch": 0.24150495645667583, "grad_norm": 0.8580307960510254, "learning_rate": 2.6616074135667437e-05, "loss": 0.1289, "step": 13637 }, { "epoch": 0.24152266599370426, "grad_norm": 0.7911452054977417, "learning_rate": 2.6615529772244507e-05, "loss": 0.0826, "step": 13638 }, { "epoch": 0.24154037553073268, "grad_norm": 0.7853466272354126, "learning_rate": 2.6614985370607662e-05, "loss": 0.1303, "step": 13639 }, { "epoch": 0.2415580850677611, "grad_norm": 0.752345621585846, "learning_rate": 2.6614440930758688e-05, "loss": 0.0983, "step": 13640 }, { "epoch": 0.24157579460478953, "grad_norm": 1.1576303243637085, "learning_rate": 2.661389645269938e-05, "loss": 0.1449, "step": 13641 }, { "epoch": 0.24159350414181796, "grad_norm": 1.010358452796936, "learning_rate": 2.6613351936431526e-05, "loss": 0.0932, "step": 13642 }, { "epoch": 0.2416112136788464, "grad_norm": 1.236340045928955, "learning_rate": 2.6612807381956917e-05, "loss": 0.1299, "step": 13643 }, { "epoch": 0.24162892321587484, "grad_norm": 1.1399763822555542, "learning_rate": 2.6612262789277354e-05, "loss": 0.1089, "step": 13644 }, { "epoch": 0.24164663275290327, "grad_norm": 0.49550896883010864, "learning_rate": 2.661171815839461e-05, "loss": 0.0874, "step": 13645 }, { "epoch": 0.2416643422899317, "grad_norm": 0.715900719165802, "learning_rate": 2.6611173489310493e-05, "loss": 0.0892, "step": 13646 }, { "epoch": 0.24168205182696012, "grad_norm": 0.7755768299102783, "learning_rate": 2.6610628782026786e-05, "loss": 0.132, "step": 13647 }, { "epoch": 0.24169976136398855, "grad_norm": 0.7793046236038208, "learning_rate": 2.661008403654529e-05, "loss": 0.1348, "step": 13648 }, { "epoch": 0.24171747090101697, "grad_norm": 0.9292676448822021, "learning_rate": 2.6609539252867795e-05, "loss": 0.0976, "step": 13649 }, { "epoch": 0.2417351804380454, "grad_norm": 0.7609183192253113, "learning_rate": 2.6608994430996082e-05, "loss": 0.1146, "step": 13650 }, { "epoch": 0.24175288997507383, "grad_norm": 1.0979787111282349, "learning_rate": 2.6608449570931955e-05, "loss": 0.1084, "step": 13651 }, { "epoch": 0.24177059951210225, "grad_norm": 1.2380106449127197, "learning_rate": 2.66079046726772e-05, "loss": 0.0842, "step": 13652 }, { "epoch": 0.24178830904913068, "grad_norm": 0.8761656880378723, "learning_rate": 2.6607359736233618e-05, "loss": 0.102, "step": 13653 }, { "epoch": 0.2418060185861591, "grad_norm": 1.2107828855514526, "learning_rate": 2.6606814761602992e-05, "loss": 0.1244, "step": 13654 }, { "epoch": 0.24182372812318753, "grad_norm": 1.2471249103546143, "learning_rate": 2.6606269748787122e-05, "loss": 0.1048, "step": 13655 }, { "epoch": 0.24184143766021596, "grad_norm": 0.7269659638404846, "learning_rate": 2.6605724697787796e-05, "loss": 0.1086, "step": 13656 }, { "epoch": 0.24185914719724438, "grad_norm": 0.5671206712722778, "learning_rate": 2.660517960860681e-05, "loss": 0.0698, "step": 13657 }, { "epoch": 0.2418768567342728, "grad_norm": 1.008963942527771, "learning_rate": 2.660463448124596e-05, "loss": 0.0903, "step": 13658 }, { "epoch": 0.24189456627130126, "grad_norm": 0.6140121817588806, "learning_rate": 2.6604089315707033e-05, "loss": 0.0875, "step": 13659 }, { "epoch": 0.2419122758083297, "grad_norm": 0.8615350723266602, "learning_rate": 2.6603544111991827e-05, "loss": 0.0941, "step": 13660 }, { "epoch": 0.24192998534535812, "grad_norm": 0.746832013130188, "learning_rate": 2.6602998870102134e-05, "loss": 0.1063, "step": 13661 }, { "epoch": 0.24194769488238654, "grad_norm": 1.0437555313110352, "learning_rate": 2.660245359003975e-05, "loss": 0.0571, "step": 13662 }, { "epoch": 0.24196540441941497, "grad_norm": 0.8648439049720764, "learning_rate": 2.6601908271806468e-05, "loss": 0.1475, "step": 13663 }, { "epoch": 0.2419831139564434, "grad_norm": 0.631183385848999, "learning_rate": 2.6601362915404076e-05, "loss": 0.0885, "step": 13664 }, { "epoch": 0.24200082349347182, "grad_norm": 0.7700727581977844, "learning_rate": 2.6600817520834377e-05, "loss": 0.0612, "step": 13665 }, { "epoch": 0.24201853303050025, "grad_norm": 0.7172255516052246, "learning_rate": 2.6600272088099162e-05, "loss": 0.1158, "step": 13666 }, { "epoch": 0.24203624256752868, "grad_norm": 0.9418005347251892, "learning_rate": 2.6599726617200223e-05, "loss": 0.1346, "step": 13667 }, { "epoch": 0.2420539521045571, "grad_norm": 0.8385457396507263, "learning_rate": 2.659918110813936e-05, "loss": 0.1111, "step": 13668 }, { "epoch": 0.24207166164158553, "grad_norm": 0.7362426519393921, "learning_rate": 2.6598635560918362e-05, "loss": 0.0654, "step": 13669 }, { "epoch": 0.24208937117861395, "grad_norm": 0.8771830201148987, "learning_rate": 2.6598089975539028e-05, "loss": 0.1097, "step": 13670 }, { "epoch": 0.24210708071564238, "grad_norm": 1.2716180086135864, "learning_rate": 2.659754435200315e-05, "loss": 0.1216, "step": 13671 }, { "epoch": 0.2421247902526708, "grad_norm": 0.7865853905677795, "learning_rate": 2.6596998690312524e-05, "loss": 0.0692, "step": 13672 }, { "epoch": 0.24214249978969926, "grad_norm": 0.666123628616333, "learning_rate": 2.6596452990468945e-05, "loss": 0.0774, "step": 13673 }, { "epoch": 0.2421602093267277, "grad_norm": 0.5952386260032654, "learning_rate": 2.6595907252474206e-05, "loss": 0.0734, "step": 13674 }, { "epoch": 0.24217791886375611, "grad_norm": 0.9215867519378662, "learning_rate": 2.6595361476330112e-05, "loss": 0.1191, "step": 13675 }, { "epoch": 0.24219562840078454, "grad_norm": 0.9809666275978088, "learning_rate": 2.6594815662038444e-05, "loss": 0.0694, "step": 13676 }, { "epoch": 0.24221333793781297, "grad_norm": 0.5834991335868835, "learning_rate": 2.659426980960101e-05, "loss": 0.0746, "step": 13677 }, { "epoch": 0.2422310474748414, "grad_norm": 0.5645997524261475, "learning_rate": 2.65937239190196e-05, "loss": 0.0997, "step": 13678 }, { "epoch": 0.24224875701186982, "grad_norm": 0.6436588168144226, "learning_rate": 2.659317799029601e-05, "loss": 0.0648, "step": 13679 }, { "epoch": 0.24226646654889825, "grad_norm": 1.6785327196121216, "learning_rate": 2.6592632023432032e-05, "loss": 0.1367, "step": 13680 }, { "epoch": 0.24228417608592667, "grad_norm": 1.1017875671386719, "learning_rate": 2.6592086018429476e-05, "loss": 0.0845, "step": 13681 }, { "epoch": 0.2423018856229551, "grad_norm": 1.023666262626648, "learning_rate": 2.6591539975290125e-05, "loss": 0.1165, "step": 13682 }, { "epoch": 0.24231959515998353, "grad_norm": 0.9135355949401855, "learning_rate": 2.6590993894015782e-05, "loss": 0.1165, "step": 13683 }, { "epoch": 0.24233730469701195, "grad_norm": 0.772108793258667, "learning_rate": 2.6590447774608243e-05, "loss": 0.1416, "step": 13684 }, { "epoch": 0.24235501423404038, "grad_norm": 0.8566184043884277, "learning_rate": 2.6589901617069293e-05, "loss": 0.0933, "step": 13685 }, { "epoch": 0.2423727237710688, "grad_norm": 0.7843952775001526, "learning_rate": 2.658935542140075e-05, "loss": 0.0914, "step": 13686 }, { "epoch": 0.24239043330809723, "grad_norm": 0.8078283071517944, "learning_rate": 2.6588809187604398e-05, "loss": 0.077, "step": 13687 }, { "epoch": 0.24240814284512568, "grad_norm": 1.0494751930236816, "learning_rate": 2.6588262915682034e-05, "loss": 0.1374, "step": 13688 }, { "epoch": 0.2424258523821541, "grad_norm": 0.5124610066413879, "learning_rate": 2.6587716605635456e-05, "loss": 0.0622, "step": 13689 }, { "epoch": 0.24244356191918254, "grad_norm": 0.8454024195671082, "learning_rate": 2.6587170257466462e-05, "loss": 0.0707, "step": 13690 }, { "epoch": 0.24246127145621096, "grad_norm": 0.8965311646461487, "learning_rate": 2.6586623871176852e-05, "loss": 0.1193, "step": 13691 }, { "epoch": 0.2424789809932394, "grad_norm": 0.9549517035484314, "learning_rate": 2.6586077446768422e-05, "loss": 0.1138, "step": 13692 }, { "epoch": 0.24249669053026782, "grad_norm": 0.6935052275657654, "learning_rate": 2.6585530984242968e-05, "loss": 0.0682, "step": 13693 }, { "epoch": 0.24251440006729624, "grad_norm": 0.9662207365036011, "learning_rate": 2.658498448360229e-05, "loss": 0.11, "step": 13694 }, { "epoch": 0.24253210960432467, "grad_norm": 0.7574399709701538, "learning_rate": 2.6584437944848184e-05, "loss": 0.137, "step": 13695 }, { "epoch": 0.2425498191413531, "grad_norm": 0.7492737174034119, "learning_rate": 2.6583891367982446e-05, "loss": 0.1399, "step": 13696 }, { "epoch": 0.24256752867838152, "grad_norm": 0.6414916515350342, "learning_rate": 2.658334475300688e-05, "loss": 0.1263, "step": 13697 }, { "epoch": 0.24258523821540995, "grad_norm": 0.8589701056480408, "learning_rate": 2.658279809992328e-05, "loss": 0.0809, "step": 13698 }, { "epoch": 0.24260294775243837, "grad_norm": 0.6217740774154663, "learning_rate": 2.658225140873345e-05, "loss": 0.0679, "step": 13699 }, { "epoch": 0.2426206572894668, "grad_norm": 1.0600388050079346, "learning_rate": 2.658170467943918e-05, "loss": 0.0851, "step": 13700 }, { "epoch": 0.24263836682649523, "grad_norm": 1.0408624410629272, "learning_rate": 2.6581157912042273e-05, "loss": 0.1048, "step": 13701 }, { "epoch": 0.24265607636352365, "grad_norm": 1.0271540880203247, "learning_rate": 2.6580611106544525e-05, "loss": 0.1199, "step": 13702 }, { "epoch": 0.2426737859005521, "grad_norm": 1.1889218091964722, "learning_rate": 2.6580064262947743e-05, "loss": 0.1006, "step": 13703 }, { "epoch": 0.24269149543758053, "grad_norm": 0.6547823548316956, "learning_rate": 2.657951738125372e-05, "loss": 0.0782, "step": 13704 }, { "epoch": 0.24270920497460896, "grad_norm": 0.7662676572799683, "learning_rate": 2.6578970461464253e-05, "loss": 0.1087, "step": 13705 }, { "epoch": 0.2427269145116374, "grad_norm": 0.7173214554786682, "learning_rate": 2.6578423503581147e-05, "loss": 0.0588, "step": 13706 }, { "epoch": 0.2427446240486658, "grad_norm": 1.2436927556991577, "learning_rate": 2.65778765076062e-05, "loss": 0.0782, "step": 13707 }, { "epoch": 0.24276233358569424, "grad_norm": 0.732235848903656, "learning_rate": 2.657732947354121e-05, "loss": 0.1322, "step": 13708 }, { "epoch": 0.24278004312272267, "grad_norm": 1.2860108613967896, "learning_rate": 2.6576782401387975e-05, "loss": 0.0851, "step": 13709 }, { "epoch": 0.2427977526597511, "grad_norm": 1.1679049730300903, "learning_rate": 2.65762352911483e-05, "loss": 0.0959, "step": 13710 }, { "epoch": 0.24281546219677952, "grad_norm": 0.5949276089668274, "learning_rate": 2.657568814282398e-05, "loss": 0.0873, "step": 13711 }, { "epoch": 0.24283317173380795, "grad_norm": 0.4990910589694977, "learning_rate": 2.6575140956416812e-05, "loss": 0.081, "step": 13712 }, { "epoch": 0.24285088127083637, "grad_norm": 0.7852666974067688, "learning_rate": 2.6574593731928605e-05, "loss": 0.084, "step": 13713 }, { "epoch": 0.2428685908078648, "grad_norm": 0.8126926422119141, "learning_rate": 2.657404646936116e-05, "loss": 0.0927, "step": 13714 }, { "epoch": 0.24288630034489322, "grad_norm": 0.7427466511726379, "learning_rate": 2.6573499168716267e-05, "loss": 0.1127, "step": 13715 }, { "epoch": 0.24290400988192165, "grad_norm": 0.9441308975219727, "learning_rate": 2.6572951829995734e-05, "loss": 0.0951, "step": 13716 }, { "epoch": 0.24292171941895008, "grad_norm": 0.9892001152038574, "learning_rate": 2.6572404453201357e-05, "loss": 0.1341, "step": 13717 }, { "epoch": 0.24293942895597853, "grad_norm": 1.2170629501342773, "learning_rate": 2.6571857038334947e-05, "loss": 0.0984, "step": 13718 }, { "epoch": 0.24295713849300696, "grad_norm": 0.8640857934951782, "learning_rate": 2.657130958539829e-05, "loss": 0.0729, "step": 13719 }, { "epoch": 0.24297484803003538, "grad_norm": 0.9296094179153442, "learning_rate": 2.65707620943932e-05, "loss": 0.0827, "step": 13720 }, { "epoch": 0.2429925575670638, "grad_norm": 0.7216991186141968, "learning_rate": 2.6570214565321468e-05, "loss": 0.0708, "step": 13721 }, { "epoch": 0.24301026710409224, "grad_norm": 0.601326584815979, "learning_rate": 2.6569666998184907e-05, "loss": 0.0884, "step": 13722 }, { "epoch": 0.24302797664112066, "grad_norm": 0.8405474424362183, "learning_rate": 2.6569119392985305e-05, "loss": 0.1218, "step": 13723 }, { "epoch": 0.2430456861781491, "grad_norm": 0.781918466091156, "learning_rate": 2.6568571749724477e-05, "loss": 0.1065, "step": 13724 }, { "epoch": 0.24306339571517752, "grad_norm": 0.8060815930366516, "learning_rate": 2.6568024068404215e-05, "loss": 0.1279, "step": 13725 }, { "epoch": 0.24308110525220594, "grad_norm": 0.543110728263855, "learning_rate": 2.6567476349026323e-05, "loss": 0.1054, "step": 13726 }, { "epoch": 0.24309881478923437, "grad_norm": 0.624843418598175, "learning_rate": 2.6566928591592603e-05, "loss": 0.1225, "step": 13727 }, { "epoch": 0.2431165243262628, "grad_norm": 0.819771945476532, "learning_rate": 2.656638079610486e-05, "loss": 0.0581, "step": 13728 }, { "epoch": 0.24313423386329122, "grad_norm": 0.6626728773117065, "learning_rate": 2.656583296256489e-05, "loss": 0.0827, "step": 13729 }, { "epoch": 0.24315194340031965, "grad_norm": 0.7839323282241821, "learning_rate": 2.6565285090974507e-05, "loss": 0.0804, "step": 13730 }, { "epoch": 0.24316965293734807, "grad_norm": 1.2823197841644287, "learning_rate": 2.6564737181335498e-05, "loss": 0.103, "step": 13731 }, { "epoch": 0.2431873624743765, "grad_norm": 0.9147917628288269, "learning_rate": 2.6564189233649677e-05, "loss": 0.1222, "step": 13732 }, { "epoch": 0.24320507201140495, "grad_norm": 1.6643610000610352, "learning_rate": 2.6563641247918846e-05, "loss": 0.1004, "step": 13733 }, { "epoch": 0.24322278154843338, "grad_norm": 1.0136548280715942, "learning_rate": 2.6563093224144797e-05, "loss": 0.1153, "step": 13734 }, { "epoch": 0.2432404910854618, "grad_norm": 0.7090227007865906, "learning_rate": 2.6562545162329346e-05, "loss": 0.0982, "step": 13735 }, { "epoch": 0.24325820062249023, "grad_norm": 0.6646261215209961, "learning_rate": 2.656199706247429e-05, "loss": 0.0986, "step": 13736 }, { "epoch": 0.24327591015951866, "grad_norm": 0.9406161904335022, "learning_rate": 2.6561448924581437e-05, "loss": 0.1145, "step": 13737 }, { "epoch": 0.2432936196965471, "grad_norm": 1.1228524446487427, "learning_rate": 2.656090074865258e-05, "loss": 0.0668, "step": 13738 }, { "epoch": 0.2433113292335755, "grad_norm": 0.4880063235759735, "learning_rate": 2.6560352534689534e-05, "loss": 0.0913, "step": 13739 }, { "epoch": 0.24332903877060394, "grad_norm": 0.7112827301025391, "learning_rate": 2.6559804282694092e-05, "loss": 0.119, "step": 13740 }, { "epoch": 0.24334674830763237, "grad_norm": 0.9848655462265015, "learning_rate": 2.655925599266807e-05, "loss": 0.1306, "step": 13741 }, { "epoch": 0.2433644578446608, "grad_norm": 0.6175467371940613, "learning_rate": 2.6558707664613258e-05, "loss": 0.0865, "step": 13742 }, { "epoch": 0.24338216738168922, "grad_norm": 1.1365222930908203, "learning_rate": 2.6558159298531474e-05, "loss": 0.1188, "step": 13743 }, { "epoch": 0.24339987691871764, "grad_norm": 1.2633250951766968, "learning_rate": 2.6557610894424507e-05, "loss": 0.155, "step": 13744 }, { "epoch": 0.24341758645574607, "grad_norm": 0.5446500182151794, "learning_rate": 2.6557062452294174e-05, "loss": 0.1008, "step": 13745 }, { "epoch": 0.2434352959927745, "grad_norm": 1.115365982055664, "learning_rate": 2.6556513972142273e-05, "loss": 0.0995, "step": 13746 }, { "epoch": 0.24345300552980292, "grad_norm": 0.6460556387901306, "learning_rate": 2.655596545397061e-05, "loss": 0.0936, "step": 13747 }, { "epoch": 0.24347071506683138, "grad_norm": 1.0888899564743042, "learning_rate": 2.6555416897780992e-05, "loss": 0.1467, "step": 13748 }, { "epoch": 0.2434884246038598, "grad_norm": 0.8039951324462891, "learning_rate": 2.6554868303575217e-05, "loss": 0.0864, "step": 13749 }, { "epoch": 0.24350613414088823, "grad_norm": 0.7292560338973999, "learning_rate": 2.6554319671355098e-05, "loss": 0.1017, "step": 13750 }, { "epoch": 0.24352384367791666, "grad_norm": 1.1258604526519775, "learning_rate": 2.6553771001122434e-05, "loss": 0.0858, "step": 13751 }, { "epoch": 0.24354155321494508, "grad_norm": 0.6283532381057739, "learning_rate": 2.655322229287903e-05, "loss": 0.0706, "step": 13752 }, { "epoch": 0.2435592627519735, "grad_norm": 0.8452011942863464, "learning_rate": 2.6552673546626695e-05, "loss": 0.1042, "step": 13753 }, { "epoch": 0.24357697228900194, "grad_norm": 0.9411900043487549, "learning_rate": 2.6552124762367235e-05, "loss": 0.0911, "step": 13754 }, { "epoch": 0.24359468182603036, "grad_norm": 0.756624698638916, "learning_rate": 2.655157594010245e-05, "loss": 0.0926, "step": 13755 }, { "epoch": 0.2436123913630588, "grad_norm": 0.7717658281326294, "learning_rate": 2.655102707983415e-05, "loss": 0.086, "step": 13756 }, { "epoch": 0.24363010090008722, "grad_norm": 0.8474063873291016, "learning_rate": 2.655047818156414e-05, "loss": 0.0857, "step": 13757 }, { "epoch": 0.24364781043711564, "grad_norm": 1.0208321809768677, "learning_rate": 2.654992924529422e-05, "loss": 0.1008, "step": 13758 }, { "epoch": 0.24366551997414407, "grad_norm": 0.8268963098526001, "learning_rate": 2.6549380271026205e-05, "loss": 0.0927, "step": 13759 }, { "epoch": 0.2436832295111725, "grad_norm": 1.1436903476715088, "learning_rate": 2.6548831258761898e-05, "loss": 0.0966, "step": 13760 }, { "epoch": 0.24370093904820092, "grad_norm": 0.9546402096748352, "learning_rate": 2.65482822085031e-05, "loss": 0.103, "step": 13761 }, { "epoch": 0.24371864858522935, "grad_norm": 0.6610300540924072, "learning_rate": 2.6547733120251628e-05, "loss": 0.1207, "step": 13762 }, { "epoch": 0.2437363581222578, "grad_norm": 1.0074559450149536, "learning_rate": 2.6547183994009277e-05, "loss": 0.1362, "step": 13763 }, { "epoch": 0.24375406765928623, "grad_norm": 0.6182318329811096, "learning_rate": 2.654663482977786e-05, "loss": 0.0953, "step": 13764 }, { "epoch": 0.24377177719631465, "grad_norm": 0.9153176546096802, "learning_rate": 2.6546085627559184e-05, "loss": 0.0835, "step": 13765 }, { "epoch": 0.24378948673334308, "grad_norm": 0.982148289680481, "learning_rate": 2.6545536387355055e-05, "loss": 0.1181, "step": 13766 }, { "epoch": 0.2438071962703715, "grad_norm": 1.148624062538147, "learning_rate": 2.6544987109167275e-05, "loss": 0.1124, "step": 13767 }, { "epoch": 0.24382490580739993, "grad_norm": 0.8515480160713196, "learning_rate": 2.6544437792997657e-05, "loss": 0.0835, "step": 13768 }, { "epoch": 0.24384261534442836, "grad_norm": 0.8578452467918396, "learning_rate": 2.6543888438848003e-05, "loss": 0.1179, "step": 13769 }, { "epoch": 0.24386032488145679, "grad_norm": 0.9831623435020447, "learning_rate": 2.654333904672013e-05, "loss": 0.1314, "step": 13770 }, { "epoch": 0.2438780344184852, "grad_norm": 0.7799859642982483, "learning_rate": 2.654278961661583e-05, "loss": 0.0826, "step": 13771 }, { "epoch": 0.24389574395551364, "grad_norm": 0.6892592310905457, "learning_rate": 2.654224014853693e-05, "loss": 0.0778, "step": 13772 }, { "epoch": 0.24391345349254206, "grad_norm": 1.340409278869629, "learning_rate": 2.654169064248522e-05, "loss": 0.123, "step": 13773 }, { "epoch": 0.2439311630295705, "grad_norm": 0.814760148525238, "learning_rate": 2.6541141098462516e-05, "loss": 0.1351, "step": 13774 }, { "epoch": 0.24394887256659892, "grad_norm": 0.8106704950332642, "learning_rate": 2.6540591516470628e-05, "loss": 0.0886, "step": 13775 }, { "epoch": 0.24396658210362734, "grad_norm": 1.1811258792877197, "learning_rate": 2.654004189651136e-05, "loss": 0.0755, "step": 13776 }, { "epoch": 0.24398429164065577, "grad_norm": 0.6778011918067932, "learning_rate": 2.6539492238586522e-05, "loss": 0.0849, "step": 13777 }, { "epoch": 0.24400200117768422, "grad_norm": 1.1900384426116943, "learning_rate": 2.653894254269792e-05, "loss": 0.1053, "step": 13778 }, { "epoch": 0.24401971071471265, "grad_norm": 1.1534837484359741, "learning_rate": 2.6538392808847365e-05, "loss": 0.1137, "step": 13779 }, { "epoch": 0.24403742025174108, "grad_norm": 0.7907986044883728, "learning_rate": 2.6537843037036668e-05, "loss": 0.092, "step": 13780 }, { "epoch": 0.2440551297887695, "grad_norm": 1.1942825317382812, "learning_rate": 2.653729322726763e-05, "loss": 0.1161, "step": 13781 }, { "epoch": 0.24407283932579793, "grad_norm": 0.7177379131317139, "learning_rate": 2.6536743379542064e-05, "loss": 0.0731, "step": 13782 }, { "epoch": 0.24409054886282636, "grad_norm": 0.4902766942977905, "learning_rate": 2.6536193493861783e-05, "loss": 0.0726, "step": 13783 }, { "epoch": 0.24410825839985478, "grad_norm": 0.7737630009651184, "learning_rate": 2.653564357022859e-05, "loss": 0.0969, "step": 13784 }, { "epoch": 0.2441259679368832, "grad_norm": 0.8362613916397095, "learning_rate": 2.6535093608644293e-05, "loss": 0.1111, "step": 13785 }, { "epoch": 0.24414367747391164, "grad_norm": 0.5093910098075867, "learning_rate": 2.653454360911071e-05, "loss": 0.1182, "step": 13786 }, { "epoch": 0.24416138701094006, "grad_norm": 1.402982234954834, "learning_rate": 2.6533993571629642e-05, "loss": 0.1371, "step": 13787 }, { "epoch": 0.2441790965479685, "grad_norm": 0.9712098240852356, "learning_rate": 2.6533443496202905e-05, "loss": 0.0961, "step": 13788 }, { "epoch": 0.24419680608499691, "grad_norm": 0.7415396571159363, "learning_rate": 2.65328933828323e-05, "loss": 0.1106, "step": 13789 }, { "epoch": 0.24421451562202534, "grad_norm": 0.567762553691864, "learning_rate": 2.653234323151965e-05, "loss": 0.0867, "step": 13790 }, { "epoch": 0.24423222515905377, "grad_norm": 0.823484480381012, "learning_rate": 2.653179304226675e-05, "loss": 0.1088, "step": 13791 }, { "epoch": 0.24424993469608222, "grad_norm": 0.4895206093788147, "learning_rate": 2.653124281507542e-05, "loss": 0.1122, "step": 13792 }, { "epoch": 0.24426764423311065, "grad_norm": 1.1632360219955444, "learning_rate": 2.6530692549947466e-05, "loss": 0.0961, "step": 13793 }, { "epoch": 0.24428535377013907, "grad_norm": 1.0422099828720093, "learning_rate": 2.65301422468847e-05, "loss": 0.1091, "step": 13794 }, { "epoch": 0.2443030633071675, "grad_norm": 1.0574373006820679, "learning_rate": 2.6529591905888936e-05, "loss": 0.0867, "step": 13795 }, { "epoch": 0.24432077284419593, "grad_norm": 1.4567861557006836, "learning_rate": 2.652904152696198e-05, "loss": 0.1169, "step": 13796 }, { "epoch": 0.24433848238122435, "grad_norm": 0.7068490386009216, "learning_rate": 2.652849111010564e-05, "loss": 0.0897, "step": 13797 }, { "epoch": 0.24435619191825278, "grad_norm": 1.0362457036972046, "learning_rate": 2.652794065532173e-05, "loss": 0.101, "step": 13798 }, { "epoch": 0.2443739014552812, "grad_norm": 0.9759332537651062, "learning_rate": 2.6527390162612066e-05, "loss": 0.0927, "step": 13799 }, { "epoch": 0.24439161099230963, "grad_norm": 1.3460259437561035, "learning_rate": 2.652683963197845e-05, "loss": 0.0997, "step": 13800 }, { "epoch": 0.24440932052933806, "grad_norm": 1.0216137170791626, "learning_rate": 2.65262890634227e-05, "loss": 0.0989, "step": 13801 }, { "epoch": 0.24442703006636649, "grad_norm": 1.2891865968704224, "learning_rate": 2.652573845694662e-05, "loss": 0.0882, "step": 13802 }, { "epoch": 0.2444447396033949, "grad_norm": 0.9106143712997437, "learning_rate": 2.6525187812552034e-05, "loss": 0.0929, "step": 13803 }, { "epoch": 0.24446244914042334, "grad_norm": 0.9361840486526489, "learning_rate": 2.6524637130240743e-05, "loss": 0.1106, "step": 13804 }, { "epoch": 0.24448015867745176, "grad_norm": 0.9959625005722046, "learning_rate": 2.652408641001456e-05, "loss": 0.1112, "step": 13805 }, { "epoch": 0.2444978682144802, "grad_norm": 0.8159355521202087, "learning_rate": 2.65235356518753e-05, "loss": 0.1043, "step": 13806 }, { "epoch": 0.24451557775150864, "grad_norm": 0.7010625004768372, "learning_rate": 2.652298485582477e-05, "loss": 0.1241, "step": 13807 }, { "epoch": 0.24453328728853707, "grad_norm": 0.5716879963874817, "learning_rate": 2.6522434021864785e-05, "loss": 0.0815, "step": 13808 }, { "epoch": 0.2445509968255655, "grad_norm": 1.1266686916351318, "learning_rate": 2.6521883149997163e-05, "loss": 0.1564, "step": 13809 }, { "epoch": 0.24456870636259392, "grad_norm": 0.7582814693450928, "learning_rate": 2.6521332240223708e-05, "loss": 0.0932, "step": 13810 }, { "epoch": 0.24458641589962235, "grad_norm": 0.8421710729598999, "learning_rate": 2.6520781292546233e-05, "loss": 0.0712, "step": 13811 }, { "epoch": 0.24460412543665078, "grad_norm": 0.6148570775985718, "learning_rate": 2.6520230306966555e-05, "loss": 0.0895, "step": 13812 }, { "epoch": 0.2446218349736792, "grad_norm": 0.8791078925132751, "learning_rate": 2.6519679283486483e-05, "loss": 0.0945, "step": 13813 }, { "epoch": 0.24463954451070763, "grad_norm": 0.839370608329773, "learning_rate": 2.6519128222107832e-05, "loss": 0.0949, "step": 13814 }, { "epoch": 0.24465725404773606, "grad_norm": 0.8570754528045654, "learning_rate": 2.6518577122832416e-05, "loss": 0.1116, "step": 13815 }, { "epoch": 0.24467496358476448, "grad_norm": 1.1360903978347778, "learning_rate": 2.6518025985662043e-05, "loss": 0.1312, "step": 13816 }, { "epoch": 0.2446926731217929, "grad_norm": 0.5995941162109375, "learning_rate": 2.6517474810598533e-05, "loss": 0.0608, "step": 13817 }, { "epoch": 0.24471038265882133, "grad_norm": 1.0328307151794434, "learning_rate": 2.6516923597643696e-05, "loss": 0.1153, "step": 13818 }, { "epoch": 0.24472809219584976, "grad_norm": 1.0726791620254517, "learning_rate": 2.651637234679934e-05, "loss": 0.1242, "step": 13819 }, { "epoch": 0.2447458017328782, "grad_norm": 0.8609076142311096, "learning_rate": 2.6515821058067285e-05, "loss": 0.0784, "step": 13820 }, { "epoch": 0.24476351126990661, "grad_norm": 0.8951162695884705, "learning_rate": 2.6515269731449347e-05, "loss": 0.0921, "step": 13821 }, { "epoch": 0.24478122080693507, "grad_norm": 1.0122835636138916, "learning_rate": 2.6514718366947336e-05, "loss": 0.131, "step": 13822 }, { "epoch": 0.2447989303439635, "grad_norm": 0.7907888889312744, "learning_rate": 2.6514166964563066e-05, "loss": 0.0881, "step": 13823 }, { "epoch": 0.24481663988099192, "grad_norm": 0.8028063774108887, "learning_rate": 2.651361552429835e-05, "loss": 0.1157, "step": 13824 }, { "epoch": 0.24483434941802035, "grad_norm": 1.0319007635116577, "learning_rate": 2.6513064046155005e-05, "loss": 0.12, "step": 13825 }, { "epoch": 0.24485205895504877, "grad_norm": 0.8479766845703125, "learning_rate": 2.6512512530134842e-05, "loss": 0.1164, "step": 13826 }, { "epoch": 0.2448697684920772, "grad_norm": 1.4046953916549683, "learning_rate": 2.6511960976239675e-05, "loss": 0.1171, "step": 13827 }, { "epoch": 0.24488747802910563, "grad_norm": 0.7168810367584229, "learning_rate": 2.6511409384471327e-05, "loss": 0.1359, "step": 13828 }, { "epoch": 0.24490518756613405, "grad_norm": 1.016809344291687, "learning_rate": 2.65108577548316e-05, "loss": 0.1003, "step": 13829 }, { "epoch": 0.24492289710316248, "grad_norm": 0.7128047347068787, "learning_rate": 2.651030608732232e-05, "loss": 0.1093, "step": 13830 }, { "epoch": 0.2449406066401909, "grad_norm": 0.8492231369018555, "learning_rate": 2.65097543819453e-05, "loss": 0.0926, "step": 13831 }, { "epoch": 0.24495831617721933, "grad_norm": 0.543038010597229, "learning_rate": 2.6509202638702343e-05, "loss": 0.1032, "step": 13832 }, { "epoch": 0.24497602571424776, "grad_norm": 0.992057204246521, "learning_rate": 2.6508650857595278e-05, "loss": 0.0749, "step": 13833 }, { "epoch": 0.24499373525127618, "grad_norm": 0.7850110530853271, "learning_rate": 2.6508099038625917e-05, "loss": 0.1309, "step": 13834 }, { "epoch": 0.2450114447883046, "grad_norm": 0.9512831568717957, "learning_rate": 2.650754718179607e-05, "loss": 0.1347, "step": 13835 }, { "epoch": 0.24502915432533304, "grad_norm": 0.9031388163566589, "learning_rate": 2.6506995287107562e-05, "loss": 0.1045, "step": 13836 }, { "epoch": 0.2450468638623615, "grad_norm": 0.8258031010627747, "learning_rate": 2.6506443354562203e-05, "loss": 0.0749, "step": 13837 }, { "epoch": 0.24506457339938992, "grad_norm": 0.777605414390564, "learning_rate": 2.6505891384161803e-05, "loss": 0.0884, "step": 13838 }, { "epoch": 0.24508228293641834, "grad_norm": 1.0874264240264893, "learning_rate": 2.650533937590819e-05, "loss": 0.1042, "step": 13839 }, { "epoch": 0.24509999247344677, "grad_norm": 0.8459740877151489, "learning_rate": 2.6504787329803173e-05, "loss": 0.1073, "step": 13840 }, { "epoch": 0.2451177020104752, "grad_norm": 0.6225147843360901, "learning_rate": 2.6504235245848567e-05, "loss": 0.0872, "step": 13841 }, { "epoch": 0.24513541154750362, "grad_norm": 0.72166907787323, "learning_rate": 2.6503683124046195e-05, "loss": 0.0847, "step": 13842 }, { "epoch": 0.24515312108453205, "grad_norm": 0.7635117769241333, "learning_rate": 2.6503130964397865e-05, "loss": 0.0915, "step": 13843 }, { "epoch": 0.24517083062156048, "grad_norm": 1.105076551437378, "learning_rate": 2.65025787669054e-05, "loss": 0.1337, "step": 13844 }, { "epoch": 0.2451885401585889, "grad_norm": 1.0809653997421265, "learning_rate": 2.6502026531570613e-05, "loss": 0.1258, "step": 13845 }, { "epoch": 0.24520624969561733, "grad_norm": 0.7026529908180237, "learning_rate": 2.650147425839532e-05, "loss": 0.0816, "step": 13846 }, { "epoch": 0.24522395923264576, "grad_norm": 0.9929629564285278, "learning_rate": 2.6500921947381343e-05, "loss": 0.1008, "step": 13847 }, { "epoch": 0.24524166876967418, "grad_norm": 0.7341411113739014, "learning_rate": 2.6500369598530496e-05, "loss": 0.0967, "step": 13848 }, { "epoch": 0.2452593783067026, "grad_norm": 0.650568425655365, "learning_rate": 2.6499817211844593e-05, "loss": 0.1175, "step": 13849 }, { "epoch": 0.24527708784373103, "grad_norm": 0.6037958264350891, "learning_rate": 2.649926478732546e-05, "loss": 0.0784, "step": 13850 }, { "epoch": 0.24529479738075946, "grad_norm": 0.9937388896942139, "learning_rate": 2.6498712324974904e-05, "loss": 0.0926, "step": 13851 }, { "epoch": 0.24531250691778791, "grad_norm": 0.8311699032783508, "learning_rate": 2.6498159824794752e-05, "loss": 0.1002, "step": 13852 }, { "epoch": 0.24533021645481634, "grad_norm": 1.1212214231491089, "learning_rate": 2.649760728678681e-05, "loss": 0.1134, "step": 13853 }, { "epoch": 0.24534792599184477, "grad_norm": 1.1995183229446411, "learning_rate": 2.649705471095291e-05, "loss": 0.0926, "step": 13854 }, { "epoch": 0.2453656355288732, "grad_norm": 0.6991331577301025, "learning_rate": 2.649650209729486e-05, "loss": 0.1045, "step": 13855 }, { "epoch": 0.24538334506590162, "grad_norm": 0.6810704469680786, "learning_rate": 2.6495949445814483e-05, "loss": 0.071, "step": 13856 }, { "epoch": 0.24540105460293005, "grad_norm": 1.0828770399093628, "learning_rate": 2.6495396756513595e-05, "loss": 0.1025, "step": 13857 }, { "epoch": 0.24541876413995847, "grad_norm": 0.853116512298584, "learning_rate": 2.649484402939401e-05, "loss": 0.0906, "step": 13858 }, { "epoch": 0.2454364736769869, "grad_norm": 0.7286630272865295, "learning_rate": 2.6494291264457554e-05, "loss": 0.0695, "step": 13859 }, { "epoch": 0.24545418321401533, "grad_norm": 1.0063161849975586, "learning_rate": 2.6493738461706042e-05, "loss": 0.0871, "step": 13860 }, { "epoch": 0.24547189275104375, "grad_norm": 0.7804523706436157, "learning_rate": 2.6493185621141293e-05, "loss": 0.1165, "step": 13861 }, { "epoch": 0.24548960228807218, "grad_norm": 0.8529682159423828, "learning_rate": 2.6492632742765125e-05, "loss": 0.0852, "step": 13862 }, { "epoch": 0.2455073118251006, "grad_norm": 1.009533166885376, "learning_rate": 2.649207982657936e-05, "loss": 0.1306, "step": 13863 }, { "epoch": 0.24552502136212903, "grad_norm": 0.8181564807891846, "learning_rate": 2.6491526872585814e-05, "loss": 0.0988, "step": 13864 }, { "epoch": 0.24554273089915746, "grad_norm": 1.1872161626815796, "learning_rate": 2.6490973880786303e-05, "loss": 0.1459, "step": 13865 }, { "epoch": 0.24556044043618588, "grad_norm": 0.904262363910675, "learning_rate": 2.6490420851182658e-05, "loss": 0.121, "step": 13866 }, { "epoch": 0.24557814997321434, "grad_norm": 1.0249077081680298, "learning_rate": 2.648986778377668e-05, "loss": 0.0903, "step": 13867 }, { "epoch": 0.24559585951024276, "grad_norm": 0.8690705299377441, "learning_rate": 2.648931467857021e-05, "loss": 0.0638, "step": 13868 }, { "epoch": 0.2456135690472712, "grad_norm": 0.6283259391784668, "learning_rate": 2.6488761535565048e-05, "loss": 0.0886, "step": 13869 }, { "epoch": 0.24563127858429962, "grad_norm": 0.6354916095733643, "learning_rate": 2.6488208354763023e-05, "loss": 0.0836, "step": 13870 }, { "epoch": 0.24564898812132804, "grad_norm": 1.1075025796890259, "learning_rate": 2.648765513616596e-05, "loss": 0.1364, "step": 13871 }, { "epoch": 0.24566669765835647, "grad_norm": 0.7011319994926453, "learning_rate": 2.6487101879775672e-05, "loss": 0.0902, "step": 13872 }, { "epoch": 0.2456844071953849, "grad_norm": 0.8991078734397888, "learning_rate": 2.648654858559398e-05, "loss": 0.1074, "step": 13873 }, { "epoch": 0.24570211673241332, "grad_norm": 1.7935622930526733, "learning_rate": 2.6485995253622705e-05, "loss": 0.0845, "step": 13874 }, { "epoch": 0.24571982626944175, "grad_norm": 1.3200877904891968, "learning_rate": 2.648544188386367e-05, "loss": 0.0985, "step": 13875 }, { "epoch": 0.24573753580647018, "grad_norm": 1.5650575160980225, "learning_rate": 2.648488847631869e-05, "loss": 0.1276, "step": 13876 }, { "epoch": 0.2457552453434986, "grad_norm": 0.7186098098754883, "learning_rate": 2.6484335030989587e-05, "loss": 0.1228, "step": 13877 }, { "epoch": 0.24577295488052703, "grad_norm": 0.9302157759666443, "learning_rate": 2.6483781547878183e-05, "loss": 0.0914, "step": 13878 }, { "epoch": 0.24579066441755545, "grad_norm": 0.908737063407898, "learning_rate": 2.64832280269863e-05, "loss": 0.0768, "step": 13879 }, { "epoch": 0.24580837395458388, "grad_norm": 1.2897547483444214, "learning_rate": 2.648267446831576e-05, "loss": 0.1252, "step": 13880 }, { "epoch": 0.2458260834916123, "grad_norm": 0.44282761216163635, "learning_rate": 2.6482120871868383e-05, "loss": 0.087, "step": 13881 }, { "epoch": 0.24584379302864076, "grad_norm": 0.5690823197364807, "learning_rate": 2.648156723764599e-05, "loss": 0.1094, "step": 13882 }, { "epoch": 0.2458615025656692, "grad_norm": 0.8756247758865356, "learning_rate": 2.64810135656504e-05, "loss": 0.1071, "step": 13883 }, { "epoch": 0.24587921210269761, "grad_norm": 0.9217658042907715, "learning_rate": 2.6480459855883435e-05, "loss": 0.1045, "step": 13884 }, { "epoch": 0.24589692163972604, "grad_norm": 0.717636227607727, "learning_rate": 2.6479906108346922e-05, "loss": 0.0668, "step": 13885 }, { "epoch": 0.24591463117675447, "grad_norm": 0.8823670148849487, "learning_rate": 2.6479352323042677e-05, "loss": 0.0784, "step": 13886 }, { "epoch": 0.2459323407137829, "grad_norm": 0.8982601165771484, "learning_rate": 2.6478798499972526e-05, "loss": 0.1068, "step": 13887 }, { "epoch": 0.24595005025081132, "grad_norm": 1.29944908618927, "learning_rate": 2.6478244639138282e-05, "loss": 0.0843, "step": 13888 }, { "epoch": 0.24596775978783975, "grad_norm": 0.6794427037239075, "learning_rate": 2.647769074054178e-05, "loss": 0.0979, "step": 13889 }, { "epoch": 0.24598546932486817, "grad_norm": 0.7887773513793945, "learning_rate": 2.6477136804184836e-05, "loss": 0.0655, "step": 13890 }, { "epoch": 0.2460031788618966, "grad_norm": 1.014554500579834, "learning_rate": 2.6476582830069277e-05, "loss": 0.1046, "step": 13891 }, { "epoch": 0.24602088839892503, "grad_norm": 1.0576308965682983, "learning_rate": 2.6476028818196915e-05, "loss": 0.1129, "step": 13892 }, { "epoch": 0.24603859793595345, "grad_norm": 2.709690809249878, "learning_rate": 2.647547476856958e-05, "loss": 0.1213, "step": 13893 }, { "epoch": 0.24605630747298188, "grad_norm": 0.7950347661972046, "learning_rate": 2.6474920681189095e-05, "loss": 0.0836, "step": 13894 }, { "epoch": 0.2460740170100103, "grad_norm": 0.9138685464859009, "learning_rate": 2.647436655605728e-05, "loss": 0.115, "step": 13895 }, { "epoch": 0.24609172654703873, "grad_norm": 0.7412357330322266, "learning_rate": 2.647381239317596e-05, "loss": 0.1148, "step": 13896 }, { "epoch": 0.24610943608406718, "grad_norm": 3.8732569217681885, "learning_rate": 2.6473258192546958e-05, "loss": 0.1012, "step": 13897 }, { "epoch": 0.2461271456210956, "grad_norm": 0.7208997011184692, "learning_rate": 2.6472703954172098e-05, "loss": 0.0919, "step": 13898 }, { "epoch": 0.24614485515812404, "grad_norm": 0.7252171635627747, "learning_rate": 2.6472149678053202e-05, "loss": 0.1076, "step": 13899 }, { "epoch": 0.24616256469515246, "grad_norm": 1.319341778755188, "learning_rate": 2.6471595364192094e-05, "loss": 0.1039, "step": 13900 }, { "epoch": 0.2461802742321809, "grad_norm": 1.091765284538269, "learning_rate": 2.6471041012590592e-05, "loss": 0.088, "step": 13901 }, { "epoch": 0.24619798376920932, "grad_norm": 0.6835081577301025, "learning_rate": 2.647048662325053e-05, "loss": 0.0944, "step": 13902 }, { "epoch": 0.24621569330623774, "grad_norm": 0.6575958728790283, "learning_rate": 2.6469932196173728e-05, "loss": 0.0755, "step": 13903 }, { "epoch": 0.24623340284326617, "grad_norm": 0.8256029486656189, "learning_rate": 2.646937773136201e-05, "loss": 0.099, "step": 13904 }, { "epoch": 0.2462511123802946, "grad_norm": 0.8836725950241089, "learning_rate": 2.64688232288172e-05, "loss": 0.0942, "step": 13905 }, { "epoch": 0.24626882191732302, "grad_norm": 0.7576885223388672, "learning_rate": 2.646826868854112e-05, "loss": 0.1049, "step": 13906 }, { "epoch": 0.24628653145435145, "grad_norm": 1.0860058069229126, "learning_rate": 2.6467714110535596e-05, "loss": 0.1094, "step": 13907 }, { "epoch": 0.24630424099137987, "grad_norm": 1.0722002983093262, "learning_rate": 2.646715949480245e-05, "loss": 0.0887, "step": 13908 }, { "epoch": 0.2463219505284083, "grad_norm": 0.9652796387672424, "learning_rate": 2.6466604841343514e-05, "loss": 0.1022, "step": 13909 }, { "epoch": 0.24633966006543673, "grad_norm": 0.798822283744812, "learning_rate": 2.6466050150160602e-05, "loss": 0.0694, "step": 13910 }, { "epoch": 0.24635736960246515, "grad_norm": 1.1143746376037598, "learning_rate": 2.646549542125555e-05, "loss": 0.1227, "step": 13911 }, { "epoch": 0.2463750791394936, "grad_norm": 0.7374840378761292, "learning_rate": 2.6464940654630173e-05, "loss": 0.0995, "step": 13912 }, { "epoch": 0.24639278867652203, "grad_norm": 0.7375292778015137, "learning_rate": 2.6464385850286305e-05, "loss": 0.1046, "step": 13913 }, { "epoch": 0.24641049821355046, "grad_norm": 0.7156995534896851, "learning_rate": 2.6463831008225768e-05, "loss": 0.1255, "step": 13914 }, { "epoch": 0.2464282077505789, "grad_norm": 1.0710911750793457, "learning_rate": 2.6463276128450384e-05, "loss": 0.104, "step": 13915 }, { "epoch": 0.2464459172876073, "grad_norm": 1.052001953125, "learning_rate": 2.646272121096198e-05, "loss": 0.0984, "step": 13916 }, { "epoch": 0.24646362682463574, "grad_norm": 0.6119237542152405, "learning_rate": 2.646216625576238e-05, "loss": 0.0765, "step": 13917 }, { "epoch": 0.24648133636166417, "grad_norm": 1.1589094400405884, "learning_rate": 2.6461611262853417e-05, "loss": 0.121, "step": 13918 }, { "epoch": 0.2464990458986926, "grad_norm": 0.9359287619590759, "learning_rate": 2.646105623223691e-05, "loss": 0.1159, "step": 13919 }, { "epoch": 0.24651675543572102, "grad_norm": 1.2109136581420898, "learning_rate": 2.6460501163914693e-05, "loss": 0.1029, "step": 13920 }, { "epoch": 0.24653446497274945, "grad_norm": 1.1573504209518433, "learning_rate": 2.645994605788858e-05, "loss": 0.106, "step": 13921 }, { "epoch": 0.24655217450977787, "grad_norm": 0.7890288233757019, "learning_rate": 2.6459390914160402e-05, "loss": 0.0889, "step": 13922 }, { "epoch": 0.2465698840468063, "grad_norm": 1.0417993068695068, "learning_rate": 2.645883573273199e-05, "loss": 0.1316, "step": 13923 }, { "epoch": 0.24658759358383472, "grad_norm": 0.9389486312866211, "learning_rate": 2.645828051360517e-05, "loss": 0.102, "step": 13924 }, { "epoch": 0.24660530312086315, "grad_norm": 0.8755216598510742, "learning_rate": 2.6457725256781762e-05, "loss": 0.1069, "step": 13925 }, { "epoch": 0.2466230126578916, "grad_norm": 1.7489304542541504, "learning_rate": 2.64571699622636e-05, "loss": 0.105, "step": 13926 }, { "epoch": 0.24664072219492003, "grad_norm": 0.6278191208839417, "learning_rate": 2.6456614630052503e-05, "loss": 0.0582, "step": 13927 }, { "epoch": 0.24665843173194846, "grad_norm": 0.5630273222923279, "learning_rate": 2.6456059260150307e-05, "loss": 0.075, "step": 13928 }, { "epoch": 0.24667614126897688, "grad_norm": 0.9176039099693298, "learning_rate": 2.6455503852558834e-05, "loss": 0.1169, "step": 13929 }, { "epoch": 0.2466938508060053, "grad_norm": 1.534683108329773, "learning_rate": 2.645494840727991e-05, "loss": 0.1421, "step": 13930 }, { "epoch": 0.24671156034303374, "grad_norm": 1.289649248123169, "learning_rate": 2.6454392924315366e-05, "loss": 0.1104, "step": 13931 }, { "epoch": 0.24672926988006216, "grad_norm": 0.7631474733352661, "learning_rate": 2.6453837403667028e-05, "loss": 0.0994, "step": 13932 }, { "epoch": 0.2467469794170906, "grad_norm": 0.80819171667099, "learning_rate": 2.645328184533672e-05, "loss": 0.0874, "step": 13933 }, { "epoch": 0.24676468895411902, "grad_norm": 1.0535945892333984, "learning_rate": 2.645272624932628e-05, "loss": 0.0911, "step": 13934 }, { "epoch": 0.24678239849114744, "grad_norm": 1.152850866317749, "learning_rate": 2.6452170615637524e-05, "loss": 0.139, "step": 13935 }, { "epoch": 0.24680010802817587, "grad_norm": 6.675637722015381, "learning_rate": 2.6451614944272287e-05, "loss": 0.3184, "step": 13936 }, { "epoch": 0.2468178175652043, "grad_norm": 2.0211522579193115, "learning_rate": 2.6451059235232396e-05, "loss": 0.1464, "step": 13937 }, { "epoch": 0.24683552710223272, "grad_norm": 6.322003364562988, "learning_rate": 2.6450503488519674e-05, "loss": 0.1635, "step": 13938 }, { "epoch": 0.24685323663926115, "grad_norm": 1.2954745292663574, "learning_rate": 2.6449947704135956e-05, "loss": 0.1107, "step": 13939 }, { "epoch": 0.24687094617628957, "grad_norm": 1.2897915840148926, "learning_rate": 2.644939188208307e-05, "loss": 0.155, "step": 13940 }, { "epoch": 0.24688865571331803, "grad_norm": 2.9703028202056885, "learning_rate": 2.6448836022362837e-05, "loss": 0.085, "step": 13941 }, { "epoch": 0.24690636525034645, "grad_norm": 1.4427331686019897, "learning_rate": 2.64482801249771e-05, "loss": 0.1094, "step": 13942 }, { "epoch": 0.24692407478737488, "grad_norm": 1.3561360836029053, "learning_rate": 2.644772418992767e-05, "loss": 0.0848, "step": 13943 }, { "epoch": 0.2469417843244033, "grad_norm": 1.700498342514038, "learning_rate": 2.6447168217216394e-05, "loss": 0.1086, "step": 13944 }, { "epoch": 0.24695949386143173, "grad_norm": 2.4534878730773926, "learning_rate": 2.6446612206845088e-05, "loss": 0.1111, "step": 13945 }, { "epoch": 0.24697720339846016, "grad_norm": 1.114008903503418, "learning_rate": 2.6446056158815585e-05, "loss": 0.1081, "step": 13946 }, { "epoch": 0.2469949129354886, "grad_norm": 0.8654639720916748, "learning_rate": 2.6445500073129717e-05, "loss": 0.1207, "step": 13947 }, { "epoch": 0.247012622472517, "grad_norm": 1.354802131652832, "learning_rate": 2.644494394978931e-05, "loss": 0.1284, "step": 13948 }, { "epoch": 0.24703033200954544, "grad_norm": 1.0304367542266846, "learning_rate": 2.6444387788796195e-05, "loss": 0.129, "step": 13949 }, { "epoch": 0.24704804154657387, "grad_norm": 0.9515578746795654, "learning_rate": 2.6443831590152202e-05, "loss": 0.1249, "step": 13950 }, { "epoch": 0.2470657510836023, "grad_norm": 1.0151309967041016, "learning_rate": 2.644327535385916e-05, "loss": 0.0957, "step": 13951 }, { "epoch": 0.24708346062063072, "grad_norm": 0.8052160739898682, "learning_rate": 2.6442719079918902e-05, "loss": 0.0712, "step": 13952 }, { "epoch": 0.24710117015765914, "grad_norm": 0.9208707809448242, "learning_rate": 2.644216276833325e-05, "loss": 0.0932, "step": 13953 }, { "epoch": 0.24711887969468757, "grad_norm": 1.7095283269882202, "learning_rate": 2.6441606419104046e-05, "loss": 0.1245, "step": 13954 }, { "epoch": 0.247136589231716, "grad_norm": 1.0812897682189941, "learning_rate": 2.6441050032233114e-05, "loss": 0.1325, "step": 13955 }, { "epoch": 0.24715429876874445, "grad_norm": 0.8658574819564819, "learning_rate": 2.6440493607722283e-05, "loss": 0.1338, "step": 13956 }, { "epoch": 0.24717200830577288, "grad_norm": 0.8548786044120789, "learning_rate": 2.643993714557338e-05, "loss": 0.1139, "step": 13957 }, { "epoch": 0.2471897178428013, "grad_norm": 0.7670557498931885, "learning_rate": 2.6439380645788248e-05, "loss": 0.1272, "step": 13958 }, { "epoch": 0.24720742737982973, "grad_norm": 1.049678087234497, "learning_rate": 2.6438824108368708e-05, "loss": 0.1102, "step": 13959 }, { "epoch": 0.24722513691685816, "grad_norm": 1.1081606149673462, "learning_rate": 2.6438267533316593e-05, "loss": 0.1264, "step": 13960 }, { "epoch": 0.24724284645388658, "grad_norm": 0.8743500113487244, "learning_rate": 2.6437710920633734e-05, "loss": 0.1136, "step": 13961 }, { "epoch": 0.247260555990915, "grad_norm": 0.9081926345825195, "learning_rate": 2.6437154270321965e-05, "loss": 0.1024, "step": 13962 }, { "epoch": 0.24727826552794344, "grad_norm": 0.615380048751831, "learning_rate": 2.6436597582383116e-05, "loss": 0.0704, "step": 13963 }, { "epoch": 0.24729597506497186, "grad_norm": 1.1536357402801514, "learning_rate": 2.6436040856819015e-05, "loss": 0.1494, "step": 13964 }, { "epoch": 0.2473136846020003, "grad_norm": 0.6741334199905396, "learning_rate": 2.6435484093631497e-05, "loss": 0.0956, "step": 13965 }, { "epoch": 0.24733139413902872, "grad_norm": 1.4869600534439087, "learning_rate": 2.6434927292822394e-05, "loss": 0.077, "step": 13966 }, { "epoch": 0.24734910367605714, "grad_norm": 0.8231313228607178, "learning_rate": 2.6434370454393535e-05, "loss": 0.0927, "step": 13967 }, { "epoch": 0.24736681321308557, "grad_norm": 0.9635069370269775, "learning_rate": 2.643381357834676e-05, "loss": 0.103, "step": 13968 }, { "epoch": 0.247384522750114, "grad_norm": 0.9145362377166748, "learning_rate": 2.6433256664683887e-05, "loss": 0.0858, "step": 13969 }, { "epoch": 0.24740223228714242, "grad_norm": 0.8608038425445557, "learning_rate": 2.643269971340676e-05, "loss": 0.1156, "step": 13970 }, { "epoch": 0.24741994182417087, "grad_norm": 0.580215334892273, "learning_rate": 2.6432142724517206e-05, "loss": 0.0889, "step": 13971 }, { "epoch": 0.2474376513611993, "grad_norm": 0.965186595916748, "learning_rate": 2.6431585698017058e-05, "loss": 0.0906, "step": 13972 }, { "epoch": 0.24745536089822773, "grad_norm": 1.0284178256988525, "learning_rate": 2.6431028633908153e-05, "loss": 0.1269, "step": 13973 }, { "epoch": 0.24747307043525615, "grad_norm": 0.9752861857414246, "learning_rate": 2.6430471532192313e-05, "loss": 0.0951, "step": 13974 }, { "epoch": 0.24749077997228458, "grad_norm": 0.745154619216919, "learning_rate": 2.642991439287138e-05, "loss": 0.0645, "step": 13975 }, { "epoch": 0.247508489509313, "grad_norm": 0.8913313150405884, "learning_rate": 2.642935721594719e-05, "loss": 0.1082, "step": 13976 }, { "epoch": 0.24752619904634143, "grad_norm": 0.8383105993270874, "learning_rate": 2.6428800001421565e-05, "loss": 0.107, "step": 13977 }, { "epoch": 0.24754390858336986, "grad_norm": 0.7284809947013855, "learning_rate": 2.642824274929635e-05, "loss": 0.0688, "step": 13978 }, { "epoch": 0.24756161812039829, "grad_norm": 0.5446036458015442, "learning_rate": 2.6427685459573365e-05, "loss": 0.0751, "step": 13979 }, { "epoch": 0.2475793276574267, "grad_norm": 1.1296696662902832, "learning_rate": 2.6427128132254454e-05, "loss": 0.1127, "step": 13980 }, { "epoch": 0.24759703719445514, "grad_norm": 0.6678929924964905, "learning_rate": 2.642657076734145e-05, "loss": 0.0861, "step": 13981 }, { "epoch": 0.24761474673148356, "grad_norm": 0.7356703877449036, "learning_rate": 2.642601336483618e-05, "loss": 0.106, "step": 13982 }, { "epoch": 0.247632456268512, "grad_norm": 0.8672329187393188, "learning_rate": 2.642545592474048e-05, "loss": 0.0937, "step": 13983 }, { "epoch": 0.24765016580554042, "grad_norm": 0.8958902359008789, "learning_rate": 2.6424898447056193e-05, "loss": 0.134, "step": 13984 }, { "epoch": 0.24766787534256884, "grad_norm": 0.780591607093811, "learning_rate": 2.642434093178514e-05, "loss": 0.1142, "step": 13985 }, { "epoch": 0.2476855848795973, "grad_norm": 1.1164833307266235, "learning_rate": 2.642378337892916e-05, "loss": 0.1014, "step": 13986 }, { "epoch": 0.24770329441662572, "grad_norm": 1.0229002237319946, "learning_rate": 2.6423225788490092e-05, "loss": 0.1386, "step": 13987 }, { "epoch": 0.24772100395365415, "grad_norm": 0.8050715327262878, "learning_rate": 2.6422668160469768e-05, "loss": 0.093, "step": 13988 }, { "epoch": 0.24773871349068258, "grad_norm": 1.1301754713058472, "learning_rate": 2.6422110494870014e-05, "loss": 0.1273, "step": 13989 }, { "epoch": 0.247756423027711, "grad_norm": 1.145089030265808, "learning_rate": 2.6421552791692676e-05, "loss": 0.1516, "step": 13990 }, { "epoch": 0.24777413256473943, "grad_norm": 0.8368217945098877, "learning_rate": 2.642099505093959e-05, "loss": 0.0817, "step": 13991 }, { "epoch": 0.24779184210176786, "grad_norm": 1.0630688667297363, "learning_rate": 2.6420437272612573e-05, "loss": 0.1243, "step": 13992 }, { "epoch": 0.24780955163879628, "grad_norm": 0.689423143863678, "learning_rate": 2.6419879456713484e-05, "loss": 0.1082, "step": 13993 }, { "epoch": 0.2478272611758247, "grad_norm": 0.8402978181838989, "learning_rate": 2.6419321603244136e-05, "loss": 0.084, "step": 13994 }, { "epoch": 0.24784497071285314, "grad_norm": 0.9022905826568604, "learning_rate": 2.6418763712206384e-05, "loss": 0.1248, "step": 13995 }, { "epoch": 0.24786268024988156, "grad_norm": 0.712594211101532, "learning_rate": 2.641820578360205e-05, "loss": 0.0885, "step": 13996 }, { "epoch": 0.24788038978691, "grad_norm": 0.8159325122833252, "learning_rate": 2.6417647817432976e-05, "loss": 0.1006, "step": 13997 }, { "epoch": 0.24789809932393841, "grad_norm": 1.0712416172027588, "learning_rate": 2.6417089813700993e-05, "loss": 0.0762, "step": 13998 }, { "epoch": 0.24791580886096684, "grad_norm": 1.0421198606491089, "learning_rate": 2.6416531772407944e-05, "loss": 0.1018, "step": 13999 }, { "epoch": 0.24793351839799527, "grad_norm": 0.6321924924850464, "learning_rate": 2.641597369355565e-05, "loss": 0.0778, "step": 14000 }, { "epoch": 0.24795122793502372, "grad_norm": 0.8338348269462585, "learning_rate": 2.641541557714597e-05, "loss": 0.0946, "step": 14001 }, { "epoch": 0.24796893747205215, "grad_norm": 0.4865829348564148, "learning_rate": 2.641485742318072e-05, "loss": 0.0714, "step": 14002 }, { "epoch": 0.24798664700908057, "grad_norm": 0.6714741587638855, "learning_rate": 2.6414299231661745e-05, "loss": 0.1201, "step": 14003 }, { "epoch": 0.248004356546109, "grad_norm": 0.9919679760932922, "learning_rate": 2.641374100259088e-05, "loss": 0.0814, "step": 14004 }, { "epoch": 0.24802206608313743, "grad_norm": 0.9359171986579895, "learning_rate": 2.641318273596996e-05, "loss": 0.0672, "step": 14005 }, { "epoch": 0.24803977562016585, "grad_norm": 1.6010528802871704, "learning_rate": 2.6412624431800826e-05, "loss": 0.135, "step": 14006 }, { "epoch": 0.24805748515719428, "grad_norm": 0.7715298533439636, "learning_rate": 2.6412066090085313e-05, "loss": 0.0981, "step": 14007 }, { "epoch": 0.2480751946942227, "grad_norm": 1.0272057056427002, "learning_rate": 2.641150771082525e-05, "loss": 0.1407, "step": 14008 }, { "epoch": 0.24809290423125113, "grad_norm": 0.7574724555015564, "learning_rate": 2.6410949294022485e-05, "loss": 0.0748, "step": 14009 }, { "epoch": 0.24811061376827956, "grad_norm": 1.2858753204345703, "learning_rate": 2.6410390839678854e-05, "loss": 0.118, "step": 14010 }, { "epoch": 0.24812832330530799, "grad_norm": 1.1343693733215332, "learning_rate": 2.640983234779619e-05, "loss": 0.1024, "step": 14011 }, { "epoch": 0.2481460328423364, "grad_norm": 0.8244336247444153, "learning_rate": 2.640927381837633e-05, "loss": 0.1095, "step": 14012 }, { "epoch": 0.24816374237936484, "grad_norm": 1.068400263786316, "learning_rate": 2.6408715251421115e-05, "loss": 0.0804, "step": 14013 }, { "epoch": 0.24818145191639326, "grad_norm": 1.1293625831604004, "learning_rate": 2.6408156646932376e-05, "loss": 0.1215, "step": 14014 }, { "epoch": 0.2481991614534217, "grad_norm": 1.0375725030899048, "learning_rate": 2.640759800491196e-05, "loss": 0.1237, "step": 14015 }, { "epoch": 0.24821687099045014, "grad_norm": 0.7474406361579895, "learning_rate": 2.6407039325361696e-05, "loss": 0.1161, "step": 14016 }, { "epoch": 0.24823458052747857, "grad_norm": 1.1194465160369873, "learning_rate": 2.640648060828343e-05, "loss": 0.126, "step": 14017 }, { "epoch": 0.248252290064507, "grad_norm": 0.3721622824668884, "learning_rate": 2.6405921853678996e-05, "loss": 0.0509, "step": 14018 }, { "epoch": 0.24826999960153542, "grad_norm": 1.0114587545394897, "learning_rate": 2.6405363061550234e-05, "loss": 0.0803, "step": 14019 }, { "epoch": 0.24828770913856385, "grad_norm": 0.6470507979393005, "learning_rate": 2.640480423189898e-05, "loss": 0.0691, "step": 14020 }, { "epoch": 0.24830541867559228, "grad_norm": 1.2730190753936768, "learning_rate": 2.6404245364727075e-05, "loss": 0.0785, "step": 14021 }, { "epoch": 0.2483231282126207, "grad_norm": 0.8970986604690552, "learning_rate": 2.640368646003635e-05, "loss": 0.0788, "step": 14022 }, { "epoch": 0.24834083774964913, "grad_norm": 0.37884336709976196, "learning_rate": 2.6403127517828658e-05, "loss": 0.1177, "step": 14023 }, { "epoch": 0.24835854728667756, "grad_norm": 0.5771740078926086, "learning_rate": 2.6402568538105824e-05, "loss": 0.0743, "step": 14024 }, { "epoch": 0.24837625682370598, "grad_norm": 0.6466518640518188, "learning_rate": 2.6402009520869697e-05, "loss": 0.0733, "step": 14025 }, { "epoch": 0.2483939663607344, "grad_norm": 0.8925042152404785, "learning_rate": 2.6401450466122107e-05, "loss": 0.1319, "step": 14026 }, { "epoch": 0.24841167589776283, "grad_norm": 0.7220746874809265, "learning_rate": 2.6400891373864905e-05, "loss": 0.0698, "step": 14027 }, { "epoch": 0.24842938543479126, "grad_norm": 1.2276438474655151, "learning_rate": 2.6400332244099916e-05, "loss": 0.087, "step": 14028 }, { "epoch": 0.2484470949718197, "grad_norm": 1.0888690948486328, "learning_rate": 2.639977307682899e-05, "loss": 0.1033, "step": 14029 }, { "epoch": 0.2484648045088481, "grad_norm": 0.8601173758506775, "learning_rate": 2.6399213872053964e-05, "loss": 0.075, "step": 14030 }, { "epoch": 0.24848251404587657, "grad_norm": 1.1477644443511963, "learning_rate": 2.6398654629776677e-05, "loss": 0.0851, "step": 14031 }, { "epoch": 0.248500223582905, "grad_norm": 0.82615727186203, "learning_rate": 2.6398095349998974e-05, "loss": 0.1448, "step": 14032 }, { "epoch": 0.24851793311993342, "grad_norm": 0.8379239439964294, "learning_rate": 2.6397536032722683e-05, "loss": 0.107, "step": 14033 }, { "epoch": 0.24853564265696185, "grad_norm": 0.8074853420257568, "learning_rate": 2.6396976677949653e-05, "loss": 0.0948, "step": 14034 }, { "epoch": 0.24855335219399027, "grad_norm": 1.0852100849151611, "learning_rate": 2.6396417285681726e-05, "loss": 0.1404, "step": 14035 }, { "epoch": 0.2485710617310187, "grad_norm": 1.3896652460098267, "learning_rate": 2.6395857855920737e-05, "loss": 0.1018, "step": 14036 }, { "epoch": 0.24858877126804713, "grad_norm": 1.4612338542938232, "learning_rate": 2.6395298388668524e-05, "loss": 0.074, "step": 14037 }, { "epoch": 0.24860648080507555, "grad_norm": 0.7674315571784973, "learning_rate": 2.6394738883926938e-05, "loss": 0.0735, "step": 14038 }, { "epoch": 0.24862419034210398, "grad_norm": 1.0053707361221313, "learning_rate": 2.6394179341697812e-05, "loss": 0.1132, "step": 14039 }, { "epoch": 0.2486418998791324, "grad_norm": 0.8422948122024536, "learning_rate": 2.6393619761982985e-05, "loss": 0.1006, "step": 14040 }, { "epoch": 0.24865960941616083, "grad_norm": 0.7594429850578308, "learning_rate": 2.6393060144784304e-05, "loss": 0.0934, "step": 14041 }, { "epoch": 0.24867731895318926, "grad_norm": 0.5172669291496277, "learning_rate": 2.6392500490103606e-05, "loss": 0.1034, "step": 14042 }, { "epoch": 0.24869502849021768, "grad_norm": 0.6698586940765381, "learning_rate": 2.639194079794274e-05, "loss": 0.087, "step": 14043 }, { "epoch": 0.2487127380272461, "grad_norm": 0.9757418632507324, "learning_rate": 2.639138106830353e-05, "loss": 0.0945, "step": 14044 }, { "epoch": 0.24873044756427454, "grad_norm": 0.7534773945808411, "learning_rate": 2.639082130118784e-05, "loss": 0.0788, "step": 14045 }, { "epoch": 0.248748157101303, "grad_norm": 0.9203715920448303, "learning_rate": 2.639026149659749e-05, "loss": 0.0773, "step": 14046 }, { "epoch": 0.24876586663833142, "grad_norm": 0.8136221766471863, "learning_rate": 2.638970165453434e-05, "loss": 0.1166, "step": 14047 }, { "epoch": 0.24878357617535984, "grad_norm": 0.8758092522621155, "learning_rate": 2.638914177500022e-05, "loss": 0.1143, "step": 14048 }, { "epoch": 0.24880128571238827, "grad_norm": 0.5082105398178101, "learning_rate": 2.6388581857996974e-05, "loss": 0.1331, "step": 14049 }, { "epoch": 0.2488189952494167, "grad_norm": 2.0621602535247803, "learning_rate": 2.6388021903526446e-05, "loss": 0.0791, "step": 14050 }, { "epoch": 0.24883670478644512, "grad_norm": 1.4548406600952148, "learning_rate": 2.638746191159048e-05, "loss": 0.0964, "step": 14051 }, { "epoch": 0.24885441432347355, "grad_norm": 0.9284650087356567, "learning_rate": 2.6386901882190916e-05, "loss": 0.1015, "step": 14052 }, { "epoch": 0.24887212386050198, "grad_norm": 0.6773951053619385, "learning_rate": 2.6386341815329592e-05, "loss": 0.078, "step": 14053 }, { "epoch": 0.2488898333975304, "grad_norm": 1.0690348148345947, "learning_rate": 2.6385781711008355e-05, "loss": 0.1068, "step": 14054 }, { "epoch": 0.24890754293455883, "grad_norm": 1.2276670932769775, "learning_rate": 2.6385221569229058e-05, "loss": 0.0701, "step": 14055 }, { "epoch": 0.24892525247158725, "grad_norm": 0.7038775682449341, "learning_rate": 2.6384661389993523e-05, "loss": 0.1189, "step": 14056 }, { "epoch": 0.24894296200861568, "grad_norm": 1.0900715589523315, "learning_rate": 2.6384101173303608e-05, "loss": 0.0746, "step": 14057 }, { "epoch": 0.2489606715456441, "grad_norm": 0.8278262615203857, "learning_rate": 2.638354091916115e-05, "loss": 0.1493, "step": 14058 }, { "epoch": 0.24897838108267253, "grad_norm": 0.7762850522994995, "learning_rate": 2.6382980627567995e-05, "loss": 0.0943, "step": 14059 }, { "epoch": 0.248996090619701, "grad_norm": 1.0095574855804443, "learning_rate": 2.6382420298525986e-05, "loss": 0.0923, "step": 14060 }, { "epoch": 0.24901380015672941, "grad_norm": 0.8120904564857483, "learning_rate": 2.6381859932036966e-05, "loss": 0.1327, "step": 14061 }, { "epoch": 0.24903150969375784, "grad_norm": 0.9132803678512573, "learning_rate": 2.6381299528102774e-05, "loss": 0.1191, "step": 14062 }, { "epoch": 0.24904921923078627, "grad_norm": 0.6788007616996765, "learning_rate": 2.638073908672526e-05, "loss": 0.0694, "step": 14063 }, { "epoch": 0.2490669287678147, "grad_norm": 0.8208765983581543, "learning_rate": 2.6380178607906265e-05, "loss": 0.0786, "step": 14064 }, { "epoch": 0.24908463830484312, "grad_norm": 0.8789253830909729, "learning_rate": 2.637961809164763e-05, "loss": 0.1028, "step": 14065 }, { "epoch": 0.24910234784187155, "grad_norm": 0.4913403391838074, "learning_rate": 2.637905753795121e-05, "loss": 0.095, "step": 14066 }, { "epoch": 0.24912005737889997, "grad_norm": 1.736594319343567, "learning_rate": 2.6378496946818837e-05, "loss": 0.1297, "step": 14067 }, { "epoch": 0.2491377669159284, "grad_norm": 0.7308431267738342, "learning_rate": 2.6377936318252358e-05, "loss": 0.0903, "step": 14068 }, { "epoch": 0.24915547645295683, "grad_norm": 0.9179704785346985, "learning_rate": 2.6377375652253622e-05, "loss": 0.0977, "step": 14069 }, { "epoch": 0.24917318598998525, "grad_norm": 1.1602773666381836, "learning_rate": 2.6376814948824474e-05, "loss": 0.0926, "step": 14070 }, { "epoch": 0.24919089552701368, "grad_norm": 0.6989523768424988, "learning_rate": 2.637625420796675e-05, "loss": 0.0558, "step": 14071 }, { "epoch": 0.2492086050640421, "grad_norm": 1.013738751411438, "learning_rate": 2.6375693429682304e-05, "loss": 0.0953, "step": 14072 }, { "epoch": 0.24922631460107053, "grad_norm": 1.1008765697479248, "learning_rate": 2.6375132613972975e-05, "loss": 0.0925, "step": 14073 }, { "epoch": 0.24924402413809896, "grad_norm": 0.8803789019584656, "learning_rate": 2.6374571760840613e-05, "loss": 0.0834, "step": 14074 }, { "epoch": 0.2492617336751274, "grad_norm": 0.9174559712409973, "learning_rate": 2.6374010870287057e-05, "loss": 0.0713, "step": 14075 }, { "epoch": 0.24927944321215584, "grad_norm": 0.9925279021263123, "learning_rate": 2.637344994231416e-05, "loss": 0.1262, "step": 14076 }, { "epoch": 0.24929715274918426, "grad_norm": 0.8153873682022095, "learning_rate": 2.637288897692376e-05, "loss": 0.1028, "step": 14077 }, { "epoch": 0.2493148622862127, "grad_norm": 0.9142839312553406, "learning_rate": 2.6372327974117704e-05, "loss": 0.0889, "step": 14078 }, { "epoch": 0.24933257182324112, "grad_norm": 0.8501275777816772, "learning_rate": 2.6371766933897844e-05, "loss": 0.1307, "step": 14079 }, { "epoch": 0.24935028136026954, "grad_norm": 0.8201941847801208, "learning_rate": 2.6371205856266012e-05, "loss": 0.0796, "step": 14080 }, { "epoch": 0.24936799089729797, "grad_norm": 1.2334589958190918, "learning_rate": 2.637064474122407e-05, "loss": 0.1175, "step": 14081 }, { "epoch": 0.2493857004343264, "grad_norm": 0.6335233449935913, "learning_rate": 2.6370083588773856e-05, "loss": 0.0967, "step": 14082 }, { "epoch": 0.24940340997135482, "grad_norm": 0.6358277201652527, "learning_rate": 2.636952239891722e-05, "loss": 0.0979, "step": 14083 }, { "epoch": 0.24942111950838325, "grad_norm": 0.5847216844558716, "learning_rate": 2.6368961171656002e-05, "loss": 0.0706, "step": 14084 }, { "epoch": 0.24943882904541168, "grad_norm": 0.6452047228813171, "learning_rate": 2.636839990699205e-05, "loss": 0.1005, "step": 14085 }, { "epoch": 0.2494565385824401, "grad_norm": 0.5314399600028992, "learning_rate": 2.6367838604927213e-05, "loss": 0.0712, "step": 14086 }, { "epoch": 0.24947424811946853, "grad_norm": 0.7996102571487427, "learning_rate": 2.6367277265463335e-05, "loss": 0.0828, "step": 14087 }, { "epoch": 0.24949195765649695, "grad_norm": 0.9090518355369568, "learning_rate": 2.636671588860227e-05, "loss": 0.1075, "step": 14088 }, { "epoch": 0.24950966719352538, "grad_norm": 0.5983657240867615, "learning_rate": 2.6366154474345856e-05, "loss": 0.0692, "step": 14089 }, { "epoch": 0.24952737673055383, "grad_norm": 0.8328096866607666, "learning_rate": 2.6365593022695944e-05, "loss": 0.1094, "step": 14090 }, { "epoch": 0.24954508626758226, "grad_norm": 0.7247660756111145, "learning_rate": 2.6365031533654383e-05, "loss": 0.0962, "step": 14091 }, { "epoch": 0.2495627958046107, "grad_norm": 0.5858404636383057, "learning_rate": 2.6364470007223012e-05, "loss": 0.1139, "step": 14092 }, { "epoch": 0.24958050534163911, "grad_norm": 0.5436884164810181, "learning_rate": 2.6363908443403688e-05, "loss": 0.0622, "step": 14093 }, { "epoch": 0.24959821487866754, "grad_norm": 0.8688008785247803, "learning_rate": 2.6363346842198254e-05, "loss": 0.0823, "step": 14094 }, { "epoch": 0.24961592441569597, "grad_norm": 1.0701810121536255, "learning_rate": 2.6362785203608557e-05, "loss": 0.1225, "step": 14095 }, { "epoch": 0.2496336339527244, "grad_norm": 0.4454350173473358, "learning_rate": 2.636222352763645e-05, "loss": 0.076, "step": 14096 }, { "epoch": 0.24965134348975282, "grad_norm": 0.6744809746742249, "learning_rate": 2.6361661814283772e-05, "loss": 0.1074, "step": 14097 }, { "epoch": 0.24966905302678125, "grad_norm": 0.5759086608886719, "learning_rate": 2.6361100063552378e-05, "loss": 0.0476, "step": 14098 }, { "epoch": 0.24968676256380967, "grad_norm": 1.0892218351364136, "learning_rate": 2.6360538275444113e-05, "loss": 0.0984, "step": 14099 }, { "epoch": 0.2497044721008381, "grad_norm": 0.8527393341064453, "learning_rate": 2.6359976449960828e-05, "loss": 0.1025, "step": 14100 }, { "epoch": 0.24972218163786652, "grad_norm": 1.3378931283950806, "learning_rate": 2.6359414587104366e-05, "loss": 0.121, "step": 14101 }, { "epoch": 0.24973989117489495, "grad_norm": 0.8169151544570923, "learning_rate": 2.6358852686876582e-05, "loss": 0.1004, "step": 14102 }, { "epoch": 0.24975760071192338, "grad_norm": 1.0226590633392334, "learning_rate": 2.6358290749279322e-05, "loss": 0.1225, "step": 14103 }, { "epoch": 0.2497753102489518, "grad_norm": 1.0175665616989136, "learning_rate": 2.635772877431443e-05, "loss": 0.1057, "step": 14104 }, { "epoch": 0.24979301978598026, "grad_norm": 1.2236522436141968, "learning_rate": 2.6357166761983764e-05, "loss": 0.1567, "step": 14105 }, { "epoch": 0.24981072932300868, "grad_norm": 0.8906488418579102, "learning_rate": 2.6356604712289164e-05, "loss": 0.0992, "step": 14106 }, { "epoch": 0.2498284388600371, "grad_norm": 0.5021268725395203, "learning_rate": 2.6356042625232486e-05, "loss": 0.0772, "step": 14107 }, { "epoch": 0.24984614839706554, "grad_norm": 2.0888397693634033, "learning_rate": 2.6355480500815576e-05, "loss": 0.1203, "step": 14108 }, { "epoch": 0.24986385793409396, "grad_norm": 0.7509682178497314, "learning_rate": 2.6354918339040282e-05, "loss": 0.0733, "step": 14109 }, { "epoch": 0.2498815674711224, "grad_norm": 1.0828710794448853, "learning_rate": 2.6354356139908456e-05, "loss": 0.1171, "step": 14110 }, { "epoch": 0.24989927700815082, "grad_norm": 1.4190690517425537, "learning_rate": 2.635379390342195e-05, "loss": 0.136, "step": 14111 }, { "epoch": 0.24991698654517924, "grad_norm": 0.9407625794410706, "learning_rate": 2.6353231629582607e-05, "loss": 0.1112, "step": 14112 }, { "epoch": 0.24993469608220767, "grad_norm": 0.5498059391975403, "learning_rate": 2.635266931839228e-05, "loss": 0.0469, "step": 14113 }, { "epoch": 0.2499524056192361, "grad_norm": 1.0045939683914185, "learning_rate": 2.6352106969852817e-05, "loss": 0.0892, "step": 14114 }, { "epoch": 0.24997011515626452, "grad_norm": 0.7858878970146179, "learning_rate": 2.635154458396608e-05, "loss": 0.1045, "step": 14115 }, { "epoch": 0.24998782469329295, "grad_norm": 0.6888148188591003, "learning_rate": 2.63509821607339e-05, "loss": 0.084, "step": 14116 }, { "epoch": 0.2500055342303214, "grad_norm": 1.5322571992874146, "learning_rate": 2.635041970015814e-05, "loss": 0.1273, "step": 14117 }, { "epoch": 0.2500232437673498, "grad_norm": 0.9052980542182922, "learning_rate": 2.6349857202240645e-05, "loss": 0.1044, "step": 14118 }, { "epoch": 0.25004095330437826, "grad_norm": 0.6261995434761047, "learning_rate": 2.634929466698327e-05, "loss": 0.0805, "step": 14119 }, { "epoch": 0.25005866284140665, "grad_norm": 1.1056815385818481, "learning_rate": 2.634873209438786e-05, "loss": 0.1205, "step": 14120 }, { "epoch": 0.2500763723784351, "grad_norm": 0.5837410688400269, "learning_rate": 2.6348169484456274e-05, "loss": 0.1063, "step": 14121 }, { "epoch": 0.2500940819154635, "grad_norm": 0.6977564692497253, "learning_rate": 2.6347606837190358e-05, "loss": 0.0924, "step": 14122 }, { "epoch": 0.25011179145249196, "grad_norm": 0.9223339557647705, "learning_rate": 2.634704415259196e-05, "loss": 0.0786, "step": 14123 }, { "epoch": 0.25012950098952036, "grad_norm": 0.5811683535575867, "learning_rate": 2.6346481430662935e-05, "loss": 0.0898, "step": 14124 }, { "epoch": 0.2501472105265488, "grad_norm": 0.7660568952560425, "learning_rate": 2.6345918671405135e-05, "loss": 0.0876, "step": 14125 }, { "epoch": 0.2501649200635772, "grad_norm": 0.7451796531677246, "learning_rate": 2.634535587482041e-05, "loss": 0.0962, "step": 14126 }, { "epoch": 0.25018262960060567, "grad_norm": 0.6062743663787842, "learning_rate": 2.634479304091061e-05, "loss": 0.0853, "step": 14127 }, { "epoch": 0.2502003391376341, "grad_norm": 0.8145233988761902, "learning_rate": 2.6344230169677596e-05, "loss": 0.0819, "step": 14128 }, { "epoch": 0.2502180486746625, "grad_norm": 0.9065536260604858, "learning_rate": 2.63436672611232e-05, "loss": 0.0933, "step": 14129 }, { "epoch": 0.250235758211691, "grad_norm": 0.6645797491073608, "learning_rate": 2.6343104315249294e-05, "loss": 0.0831, "step": 14130 }, { "epoch": 0.25025346774871937, "grad_norm": 0.609932005405426, "learning_rate": 2.634254133205772e-05, "loss": 0.0813, "step": 14131 }, { "epoch": 0.2502711772857478, "grad_norm": 1.2165660858154297, "learning_rate": 2.6341978311550335e-05, "loss": 0.0868, "step": 14132 }, { "epoch": 0.2502888868227762, "grad_norm": 0.5499749779701233, "learning_rate": 2.6341415253728983e-05, "loss": 0.1194, "step": 14133 }, { "epoch": 0.2503065963598047, "grad_norm": 1.130826711654663, "learning_rate": 2.6340852158595524e-05, "loss": 0.0533, "step": 14134 }, { "epoch": 0.2503243058968331, "grad_norm": 1.2093898057937622, "learning_rate": 2.634028902615181e-05, "loss": 0.1558, "step": 14135 }, { "epoch": 0.25034201543386153, "grad_norm": 0.8261436820030212, "learning_rate": 2.6339725856399695e-05, "loss": 0.1064, "step": 14136 }, { "epoch": 0.25035972497088993, "grad_norm": 0.843249499797821, "learning_rate": 2.6339162649341024e-05, "loss": 0.0883, "step": 14137 }, { "epoch": 0.2503774345079184, "grad_norm": 0.6550817489624023, "learning_rate": 2.6338599404977656e-05, "loss": 0.0924, "step": 14138 }, { "epoch": 0.2503951440449468, "grad_norm": 0.8206276893615723, "learning_rate": 2.633803612331144e-05, "loss": 0.1192, "step": 14139 }, { "epoch": 0.25041285358197524, "grad_norm": 0.6031401753425598, "learning_rate": 2.6337472804344238e-05, "loss": 0.0858, "step": 14140 }, { "epoch": 0.25043056311900364, "grad_norm": 1.126563310623169, "learning_rate": 2.6336909448077895e-05, "loss": 0.1212, "step": 14141 }, { "epoch": 0.2504482726560321, "grad_norm": 0.7255061268806458, "learning_rate": 2.633634605451426e-05, "loss": 0.1142, "step": 14142 }, { "epoch": 0.25046598219306054, "grad_norm": 1.1413569450378418, "learning_rate": 2.6335782623655204e-05, "loss": 0.1025, "step": 14143 }, { "epoch": 0.25048369173008894, "grad_norm": 0.8187820911407471, "learning_rate": 2.6335219155502564e-05, "loss": 0.0895, "step": 14144 }, { "epoch": 0.2505014012671174, "grad_norm": 0.5240395069122314, "learning_rate": 2.6334655650058197e-05, "loss": 0.0663, "step": 14145 }, { "epoch": 0.2505191108041458, "grad_norm": 0.8175118565559387, "learning_rate": 2.6334092107323968e-05, "loss": 0.1115, "step": 14146 }, { "epoch": 0.25053682034117425, "grad_norm": 0.4724104106426239, "learning_rate": 2.633352852730171e-05, "loss": 0.0568, "step": 14147 }, { "epoch": 0.25055452987820265, "grad_norm": 1.296175241470337, "learning_rate": 2.6332964909993298e-05, "loss": 0.1328, "step": 14148 }, { "epoch": 0.2505722394152311, "grad_norm": 0.9398148059844971, "learning_rate": 2.6332401255400578e-05, "loss": 0.0972, "step": 14149 }, { "epoch": 0.2505899489522595, "grad_norm": 1.1240452527999878, "learning_rate": 2.63318375635254e-05, "loss": 0.0767, "step": 14150 }, { "epoch": 0.25060765848928795, "grad_norm": 0.7217375636100769, "learning_rate": 2.6331273834369624e-05, "loss": 0.0829, "step": 14151 }, { "epoch": 0.25062536802631635, "grad_norm": 0.6007652282714844, "learning_rate": 2.63307100679351e-05, "loss": 0.0841, "step": 14152 }, { "epoch": 0.2506430775633448, "grad_norm": 0.6050270199775696, "learning_rate": 2.6330146264223694e-05, "loss": 0.0723, "step": 14153 }, { "epoch": 0.2506607871003732, "grad_norm": 0.7686470746994019, "learning_rate": 2.6329582423237248e-05, "loss": 0.1055, "step": 14154 }, { "epoch": 0.25067849663740166, "grad_norm": 0.8031103610992432, "learning_rate": 2.632901854497762e-05, "loss": 0.1032, "step": 14155 }, { "epoch": 0.25069620617443006, "grad_norm": 0.7312447428703308, "learning_rate": 2.6328454629446668e-05, "loss": 0.0781, "step": 14156 }, { "epoch": 0.2507139157114585, "grad_norm": 1.0815924406051636, "learning_rate": 2.6327890676646248e-05, "loss": 0.1324, "step": 14157 }, { "epoch": 0.25073162524848697, "grad_norm": 0.7665587067604065, "learning_rate": 2.6327326686578214e-05, "loss": 0.072, "step": 14158 }, { "epoch": 0.25074933478551537, "grad_norm": 2.3402042388916016, "learning_rate": 2.632676265924442e-05, "loss": 0.1261, "step": 14159 }, { "epoch": 0.2507670443225438, "grad_norm": 1.1264985799789429, "learning_rate": 2.632619859464672e-05, "loss": 0.1399, "step": 14160 }, { "epoch": 0.2507847538595722, "grad_norm": 0.9761722087860107, "learning_rate": 2.6325634492786978e-05, "loss": 0.1179, "step": 14161 }, { "epoch": 0.25080246339660067, "grad_norm": 1.0319949388504028, "learning_rate": 2.6325070353667038e-05, "loss": 0.0889, "step": 14162 }, { "epoch": 0.25082017293362907, "grad_norm": 1.278475284576416, "learning_rate": 2.6324506177288765e-05, "loss": 0.1291, "step": 14163 }, { "epoch": 0.2508378824706575, "grad_norm": 0.7285704016685486, "learning_rate": 2.632394196365401e-05, "loss": 0.1131, "step": 14164 }, { "epoch": 0.2508555920076859, "grad_norm": 0.7701048851013184, "learning_rate": 2.6323377712764632e-05, "loss": 0.0861, "step": 14165 }, { "epoch": 0.2508733015447144, "grad_norm": 0.7878021001815796, "learning_rate": 2.6322813424622486e-05, "loss": 0.0839, "step": 14166 }, { "epoch": 0.2508910110817428, "grad_norm": 0.6844021677970886, "learning_rate": 2.632224909922943e-05, "loss": 0.0987, "step": 14167 }, { "epoch": 0.25090872061877123, "grad_norm": 0.6914643049240112, "learning_rate": 2.6321684736587317e-05, "loss": 0.0523, "step": 14168 }, { "epoch": 0.25092643015579963, "grad_norm": 0.731376051902771, "learning_rate": 2.632112033669801e-05, "loss": 0.0729, "step": 14169 }, { "epoch": 0.2509441396928281, "grad_norm": 1.1814846992492676, "learning_rate": 2.6320555899563363e-05, "loss": 0.1001, "step": 14170 }, { "epoch": 0.2509618492298565, "grad_norm": 0.8013719320297241, "learning_rate": 2.6319991425185228e-05, "loss": 0.0723, "step": 14171 }, { "epoch": 0.25097955876688494, "grad_norm": 1.202723741531372, "learning_rate": 2.6319426913565473e-05, "loss": 0.133, "step": 14172 }, { "epoch": 0.2509972683039134, "grad_norm": 0.95686936378479, "learning_rate": 2.6318862364705942e-05, "loss": 0.1316, "step": 14173 }, { "epoch": 0.2510149778409418, "grad_norm": 0.6993273496627808, "learning_rate": 2.6318297778608498e-05, "loss": 0.0745, "step": 14174 }, { "epoch": 0.25103268737797024, "grad_norm": 0.6716019511222839, "learning_rate": 2.6317733155275004e-05, "loss": 0.0937, "step": 14175 }, { "epoch": 0.25105039691499864, "grad_norm": 0.80375736951828, "learning_rate": 2.6317168494707308e-05, "loss": 0.1079, "step": 14176 }, { "epoch": 0.2510681064520271, "grad_norm": 0.8482691049575806, "learning_rate": 2.6316603796907275e-05, "loss": 0.0692, "step": 14177 }, { "epoch": 0.2510858159890555, "grad_norm": 0.8907110691070557, "learning_rate": 2.631603906187676e-05, "loss": 0.0944, "step": 14178 }, { "epoch": 0.25110352552608395, "grad_norm": 0.741435170173645, "learning_rate": 2.6315474289617622e-05, "loss": 0.1141, "step": 14179 }, { "epoch": 0.25112123506311235, "grad_norm": 0.6326404809951782, "learning_rate": 2.631490948013171e-05, "loss": 0.0699, "step": 14180 }, { "epoch": 0.2511389446001408, "grad_norm": 0.6410948038101196, "learning_rate": 2.63143446334209e-05, "loss": 0.1415, "step": 14181 }, { "epoch": 0.2511566541371692, "grad_norm": 0.8376877903938293, "learning_rate": 2.631377974948704e-05, "loss": 0.0971, "step": 14182 }, { "epoch": 0.25117436367419765, "grad_norm": 0.7370343804359436, "learning_rate": 2.6313214828331983e-05, "loss": 0.122, "step": 14183 }, { "epoch": 0.25119207321122605, "grad_norm": 1.1200083494186401, "learning_rate": 2.6312649869957603e-05, "loss": 0.0941, "step": 14184 }, { "epoch": 0.2512097827482545, "grad_norm": 0.5693433284759521, "learning_rate": 2.631208487436574e-05, "loss": 0.0689, "step": 14185 }, { "epoch": 0.2512274922852829, "grad_norm": 0.9735647439956665, "learning_rate": 2.6311519841558264e-05, "loss": 0.1034, "step": 14186 }, { "epoch": 0.25124520182231136, "grad_norm": 0.6599847674369812, "learning_rate": 2.6310954771537033e-05, "loss": 0.0927, "step": 14187 }, { "epoch": 0.2512629113593398, "grad_norm": 1.3106857538223267, "learning_rate": 2.6310389664303903e-05, "loss": 0.1271, "step": 14188 }, { "epoch": 0.2512806208963682, "grad_norm": 0.9457247257232666, "learning_rate": 2.6309824519860738e-05, "loss": 0.0895, "step": 14189 }, { "epoch": 0.25129833043339667, "grad_norm": 1.113710880279541, "learning_rate": 2.6309259338209394e-05, "loss": 0.0492, "step": 14190 }, { "epoch": 0.25131603997042506, "grad_norm": 0.7425384521484375, "learning_rate": 2.630869411935173e-05, "loss": 0.0883, "step": 14191 }, { "epoch": 0.2513337495074535, "grad_norm": 0.917079508304596, "learning_rate": 2.6308128863289603e-05, "loss": 0.0881, "step": 14192 }, { "epoch": 0.2513514590444819, "grad_norm": 0.6920463442802429, "learning_rate": 2.630756357002488e-05, "loss": 0.1136, "step": 14193 }, { "epoch": 0.25136916858151037, "grad_norm": 0.6503214836120605, "learning_rate": 2.6306998239559415e-05, "loss": 0.1067, "step": 14194 }, { "epoch": 0.25138687811853877, "grad_norm": 0.8063676357269287, "learning_rate": 2.630643287189507e-05, "loss": 0.0682, "step": 14195 }, { "epoch": 0.2514045876555672, "grad_norm": 0.6011866927146912, "learning_rate": 2.63058674670337e-05, "loss": 0.108, "step": 14196 }, { "epoch": 0.2514222971925956, "grad_norm": 0.7156371474266052, "learning_rate": 2.6305302024977175e-05, "loss": 0.1143, "step": 14197 }, { "epoch": 0.2514400067296241, "grad_norm": 0.47753220796585083, "learning_rate": 2.630473654572735e-05, "loss": 0.119, "step": 14198 }, { "epoch": 0.2514577162666525, "grad_norm": 0.7355883717536926, "learning_rate": 2.630417102928608e-05, "loss": 0.0966, "step": 14199 }, { "epoch": 0.25147542580368093, "grad_norm": 1.153197169303894, "learning_rate": 2.6303605475655235e-05, "loss": 0.0874, "step": 14200 }, { "epoch": 0.25149313534070933, "grad_norm": 0.5430096983909607, "learning_rate": 2.6303039884836667e-05, "loss": 0.0711, "step": 14201 }, { "epoch": 0.2515108448777378, "grad_norm": 0.924282431602478, "learning_rate": 2.6302474256832248e-05, "loss": 0.1049, "step": 14202 }, { "epoch": 0.25152855441476624, "grad_norm": 1.3458389043807983, "learning_rate": 2.6301908591643828e-05, "loss": 0.1507, "step": 14203 }, { "epoch": 0.25154626395179464, "grad_norm": 0.7136276960372925, "learning_rate": 2.630134288927327e-05, "loss": 0.0639, "step": 14204 }, { "epoch": 0.2515639734888231, "grad_norm": 0.7781043648719788, "learning_rate": 2.630077714972244e-05, "loss": 0.1105, "step": 14205 }, { "epoch": 0.2515816830258515, "grad_norm": 1.0446562767028809, "learning_rate": 2.6300211372993197e-05, "loss": 0.0958, "step": 14206 }, { "epoch": 0.25159939256287994, "grad_norm": 0.7948789000511169, "learning_rate": 2.6299645559087398e-05, "loss": 0.115, "step": 14207 }, { "epoch": 0.25161710209990834, "grad_norm": 0.8238006830215454, "learning_rate": 2.629907970800691e-05, "loss": 0.0764, "step": 14208 }, { "epoch": 0.2516348116369368, "grad_norm": 0.8240371942520142, "learning_rate": 2.6298513819753595e-05, "loss": 0.0435, "step": 14209 }, { "epoch": 0.2516525211739652, "grad_norm": 0.738215446472168, "learning_rate": 2.6297947894329305e-05, "loss": 0.1117, "step": 14210 }, { "epoch": 0.25167023071099365, "grad_norm": 0.78566974401474, "learning_rate": 2.6297381931735915e-05, "loss": 0.1089, "step": 14211 }, { "epoch": 0.25168794024802205, "grad_norm": 0.8788307309150696, "learning_rate": 2.629681593197528e-05, "loss": 0.108, "step": 14212 }, { "epoch": 0.2517056497850505, "grad_norm": 0.7168081998825073, "learning_rate": 2.629624989504926e-05, "loss": 0.0691, "step": 14213 }, { "epoch": 0.2517233593220789, "grad_norm": 0.6842944622039795, "learning_rate": 2.6295683820959725e-05, "loss": 0.1103, "step": 14214 }, { "epoch": 0.25174106885910735, "grad_norm": 1.127440094947815, "learning_rate": 2.6295117709708532e-05, "loss": 0.1286, "step": 14215 }, { "epoch": 0.25175877839613575, "grad_norm": 0.8039445877075195, "learning_rate": 2.6294551561297543e-05, "loss": 0.0692, "step": 14216 }, { "epoch": 0.2517764879331642, "grad_norm": 1.1504733562469482, "learning_rate": 2.629398537572862e-05, "loss": 0.1261, "step": 14217 }, { "epoch": 0.25179419747019266, "grad_norm": 0.592244565486908, "learning_rate": 2.629341915300363e-05, "loss": 0.0593, "step": 14218 }, { "epoch": 0.25181190700722106, "grad_norm": 0.9388167858123779, "learning_rate": 2.6292852893124434e-05, "loss": 0.0913, "step": 14219 }, { "epoch": 0.2518296165442495, "grad_norm": 0.857871949672699, "learning_rate": 2.6292286596092895e-05, "loss": 0.0741, "step": 14220 }, { "epoch": 0.2518473260812779, "grad_norm": 1.1339360475540161, "learning_rate": 2.629172026191087e-05, "loss": 0.1202, "step": 14221 }, { "epoch": 0.25186503561830637, "grad_norm": 0.7978854775428772, "learning_rate": 2.629115389058023e-05, "loss": 0.1091, "step": 14222 }, { "epoch": 0.25188274515533476, "grad_norm": 0.8741371631622314, "learning_rate": 2.629058748210284e-05, "loss": 0.0882, "step": 14223 }, { "epoch": 0.2519004546923632, "grad_norm": 0.7266908884048462, "learning_rate": 2.6290021036480555e-05, "loss": 0.0672, "step": 14224 }, { "epoch": 0.2519181642293916, "grad_norm": 0.5710696578025818, "learning_rate": 2.628945455371524e-05, "loss": 0.0795, "step": 14225 }, { "epoch": 0.25193587376642007, "grad_norm": 0.8990541696548462, "learning_rate": 2.6288888033808763e-05, "loss": 0.1306, "step": 14226 }, { "epoch": 0.25195358330344847, "grad_norm": 0.8853703737258911, "learning_rate": 2.6288321476762992e-05, "loss": 0.1211, "step": 14227 }, { "epoch": 0.2519712928404769, "grad_norm": 0.5985936522483826, "learning_rate": 2.628775488257978e-05, "loss": 0.0604, "step": 14228 }, { "epoch": 0.2519890023775053, "grad_norm": 1.1310955286026, "learning_rate": 2.6287188251260996e-05, "loss": 0.1058, "step": 14229 }, { "epoch": 0.2520067119145338, "grad_norm": 1.0585873126983643, "learning_rate": 2.6286621582808508e-05, "loss": 0.096, "step": 14230 }, { "epoch": 0.2520244214515622, "grad_norm": 0.4990546703338623, "learning_rate": 2.6286054877224173e-05, "loss": 0.0996, "step": 14231 }, { "epoch": 0.25204213098859063, "grad_norm": 0.8312732577323914, "learning_rate": 2.6285488134509857e-05, "loss": 0.0882, "step": 14232 }, { "epoch": 0.2520598405256191, "grad_norm": 0.7803109884262085, "learning_rate": 2.628492135466743e-05, "loss": 0.0975, "step": 14233 }, { "epoch": 0.2520775500626475, "grad_norm": 0.7676761746406555, "learning_rate": 2.6284354537698754e-05, "loss": 0.1658, "step": 14234 }, { "epoch": 0.25209525959967594, "grad_norm": 0.8636016845703125, "learning_rate": 2.628378768360569e-05, "loss": 0.085, "step": 14235 }, { "epoch": 0.25211296913670433, "grad_norm": 0.957520067691803, "learning_rate": 2.6283220792390107e-05, "loss": 0.1021, "step": 14236 }, { "epoch": 0.2521306786737328, "grad_norm": 0.8590341210365295, "learning_rate": 2.6282653864053867e-05, "loss": 0.0749, "step": 14237 }, { "epoch": 0.2521483882107612, "grad_norm": 0.7319467663764954, "learning_rate": 2.628208689859884e-05, "loss": 0.0564, "step": 14238 }, { "epoch": 0.25216609774778964, "grad_norm": 0.8182671070098877, "learning_rate": 2.6281519896026886e-05, "loss": 0.1136, "step": 14239 }, { "epoch": 0.25218380728481804, "grad_norm": 0.5401938557624817, "learning_rate": 2.6280952856339874e-05, "loss": 0.0744, "step": 14240 }, { "epoch": 0.2522015168218465, "grad_norm": 1.1207163333892822, "learning_rate": 2.628038577953967e-05, "loss": 0.1052, "step": 14241 }, { "epoch": 0.2522192263588749, "grad_norm": 1.024901032447815, "learning_rate": 2.6279818665628134e-05, "loss": 0.1281, "step": 14242 }, { "epoch": 0.25223693589590335, "grad_norm": 1.9863747358322144, "learning_rate": 2.627925151460714e-05, "loss": 0.0927, "step": 14243 }, { "epoch": 0.25225464543293175, "grad_norm": 1.21656334400177, "learning_rate": 2.6278684326478542e-05, "loss": 0.0641, "step": 14244 }, { "epoch": 0.2522723549699602, "grad_norm": 1.2109146118164062, "learning_rate": 2.627811710124422e-05, "loss": 0.1059, "step": 14245 }, { "epoch": 0.2522900645069886, "grad_norm": 0.9362359642982483, "learning_rate": 2.627754983890603e-05, "loss": 0.0968, "step": 14246 }, { "epoch": 0.25230777404401705, "grad_norm": 1.0904653072357178, "learning_rate": 2.627698253946584e-05, "loss": 0.0852, "step": 14247 }, { "epoch": 0.2523254835810455, "grad_norm": 1.387998342514038, "learning_rate": 2.6276415202925525e-05, "loss": 0.1434, "step": 14248 }, { "epoch": 0.2523431931180739, "grad_norm": 0.9979574084281921, "learning_rate": 2.627584782928694e-05, "loss": 0.0888, "step": 14249 }, { "epoch": 0.25236090265510236, "grad_norm": 0.9056638479232788, "learning_rate": 2.6275280418551957e-05, "loss": 0.134, "step": 14250 }, { "epoch": 0.25237861219213076, "grad_norm": 0.8284446597099304, "learning_rate": 2.6274712970722438e-05, "loss": 0.1089, "step": 14251 }, { "epoch": 0.2523963217291592, "grad_norm": 0.5379573106765747, "learning_rate": 2.627414548580026e-05, "loss": 0.0909, "step": 14252 }, { "epoch": 0.2524140312661876, "grad_norm": 0.9377597570419312, "learning_rate": 2.627357796378728e-05, "loss": 0.1215, "step": 14253 }, { "epoch": 0.25243174080321606, "grad_norm": 0.5946604609489441, "learning_rate": 2.627301040468537e-05, "loss": 0.0717, "step": 14254 }, { "epoch": 0.25244945034024446, "grad_norm": 0.5527817010879517, "learning_rate": 2.6272442808496395e-05, "loss": 0.0735, "step": 14255 }, { "epoch": 0.2524671598772729, "grad_norm": 0.5551931262016296, "learning_rate": 2.6271875175222223e-05, "loss": 0.0883, "step": 14256 }, { "epoch": 0.2524848694143013, "grad_norm": 1.6808488368988037, "learning_rate": 2.6271307504864725e-05, "loss": 0.1124, "step": 14257 }, { "epoch": 0.25250257895132977, "grad_norm": 0.8091870546340942, "learning_rate": 2.6270739797425757e-05, "loss": 0.0983, "step": 14258 }, { "epoch": 0.25252028848835817, "grad_norm": 0.8373028039932251, "learning_rate": 2.6270172052907204e-05, "loss": 0.1156, "step": 14259 }, { "epoch": 0.2525379980253866, "grad_norm": 0.7751225829124451, "learning_rate": 2.6269604271310922e-05, "loss": 0.1236, "step": 14260 }, { "epoch": 0.2525557075624151, "grad_norm": 1.4093434810638428, "learning_rate": 2.6269036452638778e-05, "loss": 0.0797, "step": 14261 }, { "epoch": 0.2525734170994435, "grad_norm": 0.9533945918083191, "learning_rate": 2.626846859689265e-05, "loss": 0.0945, "step": 14262 }, { "epoch": 0.25259112663647193, "grad_norm": 0.6422977447509766, "learning_rate": 2.6267900704074395e-05, "loss": 0.0667, "step": 14263 }, { "epoch": 0.25260883617350033, "grad_norm": 1.45469331741333, "learning_rate": 2.6267332774185894e-05, "loss": 0.0683, "step": 14264 }, { "epoch": 0.2526265457105288, "grad_norm": 0.8117700219154358, "learning_rate": 2.6266764807229e-05, "loss": 0.101, "step": 14265 }, { "epoch": 0.2526442552475572, "grad_norm": 0.8795968890190125, "learning_rate": 2.626619680320559e-05, "loss": 0.0813, "step": 14266 }, { "epoch": 0.25266196478458564, "grad_norm": 0.996396541595459, "learning_rate": 2.6265628762117536e-05, "loss": 0.0886, "step": 14267 }, { "epoch": 0.25267967432161403, "grad_norm": 0.6323209404945374, "learning_rate": 2.6265060683966702e-05, "loss": 0.0844, "step": 14268 }, { "epoch": 0.2526973838586425, "grad_norm": 0.7792961597442627, "learning_rate": 2.6264492568754957e-05, "loss": 0.0696, "step": 14269 }, { "epoch": 0.2527150933956709, "grad_norm": 0.6607118844985962, "learning_rate": 2.626392441648417e-05, "loss": 0.0772, "step": 14270 }, { "epoch": 0.25273280293269934, "grad_norm": 0.6597123146057129, "learning_rate": 2.6263356227156208e-05, "loss": 0.0897, "step": 14271 }, { "epoch": 0.25275051246972774, "grad_norm": 0.6638107299804688, "learning_rate": 2.6262788000772947e-05, "loss": 0.079, "step": 14272 }, { "epoch": 0.2527682220067562, "grad_norm": 0.9443886280059814, "learning_rate": 2.6262219737336254e-05, "loss": 0.1206, "step": 14273 }, { "epoch": 0.2527859315437846, "grad_norm": 0.9090209007263184, "learning_rate": 2.6261651436847995e-05, "loss": 0.0898, "step": 14274 }, { "epoch": 0.25280364108081305, "grad_norm": 0.7697435617446899, "learning_rate": 2.6261083099310038e-05, "loss": 0.0694, "step": 14275 }, { "epoch": 0.2528213506178415, "grad_norm": 0.959059476852417, "learning_rate": 2.626051472472426e-05, "loss": 0.1661, "step": 14276 }, { "epoch": 0.2528390601548699, "grad_norm": 0.9324826598167419, "learning_rate": 2.6259946313092534e-05, "loss": 0.0818, "step": 14277 }, { "epoch": 0.25285676969189835, "grad_norm": 0.7865301966667175, "learning_rate": 2.6259377864416715e-05, "loss": 0.0968, "step": 14278 }, { "epoch": 0.25287447922892675, "grad_norm": 0.7145898342132568, "learning_rate": 2.6258809378698684e-05, "loss": 0.088, "step": 14279 }, { "epoch": 0.2528921887659552, "grad_norm": 0.5462795495986938, "learning_rate": 2.6258240855940313e-05, "loss": 0.0827, "step": 14280 }, { "epoch": 0.2529098983029836, "grad_norm": 0.612023651599884, "learning_rate": 2.625767229614346e-05, "loss": 0.0675, "step": 14281 }, { "epoch": 0.25292760784001206, "grad_norm": 0.8809725642204285, "learning_rate": 2.6257103699310013e-05, "loss": 0.0826, "step": 14282 }, { "epoch": 0.25294531737704046, "grad_norm": 0.9898436069488525, "learning_rate": 2.6256535065441827e-05, "loss": 0.1088, "step": 14283 }, { "epoch": 0.2529630269140689, "grad_norm": 0.5874138474464417, "learning_rate": 2.6255966394540782e-05, "loss": 0.0793, "step": 14284 }, { "epoch": 0.2529807364510973, "grad_norm": 0.7338516116142273, "learning_rate": 2.6255397686608744e-05, "loss": 0.0838, "step": 14285 }, { "epoch": 0.25299844598812576, "grad_norm": 0.9075673222541809, "learning_rate": 2.6254828941647583e-05, "loss": 0.0773, "step": 14286 }, { "epoch": 0.25301615552515416, "grad_norm": 0.9203830361366272, "learning_rate": 2.625426015965918e-05, "loss": 0.1098, "step": 14287 }, { "epoch": 0.2530338650621826, "grad_norm": 0.7779557704925537, "learning_rate": 2.6253691340645397e-05, "loss": 0.0979, "step": 14288 }, { "epoch": 0.253051574599211, "grad_norm": 0.9209290742874146, "learning_rate": 2.6253122484608106e-05, "loss": 0.0891, "step": 14289 }, { "epoch": 0.25306928413623947, "grad_norm": 1.196851372718811, "learning_rate": 2.625255359154918e-05, "loss": 0.103, "step": 14290 }, { "epoch": 0.2530869936732679, "grad_norm": 0.7085752487182617, "learning_rate": 2.6251984661470493e-05, "loss": 0.0731, "step": 14291 }, { "epoch": 0.2531047032102963, "grad_norm": 1.2446961402893066, "learning_rate": 2.6251415694373915e-05, "loss": 0.1259, "step": 14292 }, { "epoch": 0.2531224127473248, "grad_norm": 1.0735684633255005, "learning_rate": 2.6250846690261316e-05, "loss": 0.0911, "step": 14293 }, { "epoch": 0.2531401222843532, "grad_norm": 0.46306154131889343, "learning_rate": 2.625027764913457e-05, "loss": 0.1004, "step": 14294 }, { "epoch": 0.25315783182138163, "grad_norm": 0.5146255493164062, "learning_rate": 2.6249708570995544e-05, "loss": 0.0543, "step": 14295 }, { "epoch": 0.25317554135841003, "grad_norm": 1.004414677619934, "learning_rate": 2.624913945584612e-05, "loss": 0.0932, "step": 14296 }, { "epoch": 0.2531932508954385, "grad_norm": 0.9688230156898499, "learning_rate": 2.6248570303688165e-05, "loss": 0.0738, "step": 14297 }, { "epoch": 0.2532109604324669, "grad_norm": 1.1327500343322754, "learning_rate": 2.6248001114523547e-05, "loss": 0.0623, "step": 14298 }, { "epoch": 0.25322866996949533, "grad_norm": 0.6907156109809875, "learning_rate": 2.624743188835415e-05, "loss": 0.0907, "step": 14299 }, { "epoch": 0.25324637950652373, "grad_norm": 1.030664086341858, "learning_rate": 2.6246862625181832e-05, "loss": 0.1145, "step": 14300 }, { "epoch": 0.2532640890435522, "grad_norm": 0.9270278215408325, "learning_rate": 2.624629332500848e-05, "loss": 0.0945, "step": 14301 }, { "epoch": 0.2532817985805806, "grad_norm": 0.916718065738678, "learning_rate": 2.6245723987835957e-05, "loss": 0.0894, "step": 14302 }, { "epoch": 0.25329950811760904, "grad_norm": 0.7459370493888855, "learning_rate": 2.6245154613666143e-05, "loss": 0.09, "step": 14303 }, { "epoch": 0.25331721765463744, "grad_norm": 1.211039423942566, "learning_rate": 2.6244585202500904e-05, "loss": 0.101, "step": 14304 }, { "epoch": 0.2533349271916659, "grad_norm": 1.5987852811813354, "learning_rate": 2.6244015754342114e-05, "loss": 0.1053, "step": 14305 }, { "epoch": 0.25335263672869435, "grad_norm": 0.6495071053504944, "learning_rate": 2.6243446269191654e-05, "loss": 0.0677, "step": 14306 }, { "epoch": 0.25337034626572275, "grad_norm": 0.7197110652923584, "learning_rate": 2.6242876747051394e-05, "loss": 0.0978, "step": 14307 }, { "epoch": 0.2533880558027512, "grad_norm": 1.0115612745285034, "learning_rate": 2.6242307187923208e-05, "loss": 0.1014, "step": 14308 }, { "epoch": 0.2534057653397796, "grad_norm": 0.5506042242050171, "learning_rate": 2.6241737591808962e-05, "loss": 0.0849, "step": 14309 }, { "epoch": 0.25342347487680805, "grad_norm": 2.4095468521118164, "learning_rate": 2.624116795871054e-05, "loss": 0.0721, "step": 14310 }, { "epoch": 0.25344118441383645, "grad_norm": 0.6239637732505798, "learning_rate": 2.6240598288629813e-05, "loss": 0.0753, "step": 14311 }, { "epoch": 0.2534588939508649, "grad_norm": 1.127048373222351, "learning_rate": 2.6240028581568654e-05, "loss": 0.0832, "step": 14312 }, { "epoch": 0.2534766034878933, "grad_norm": 0.6820163726806641, "learning_rate": 2.6239458837528936e-05, "loss": 0.0652, "step": 14313 }, { "epoch": 0.25349431302492176, "grad_norm": 0.673638641834259, "learning_rate": 2.6238889056512538e-05, "loss": 0.0872, "step": 14314 }, { "epoch": 0.25351202256195016, "grad_norm": 0.8025257587432861, "learning_rate": 2.6238319238521328e-05, "loss": 0.086, "step": 14315 }, { "epoch": 0.2535297320989786, "grad_norm": 0.860809326171875, "learning_rate": 2.623774938355719e-05, "loss": 0.0781, "step": 14316 }, { "epoch": 0.253547441636007, "grad_norm": 0.5423476696014404, "learning_rate": 2.6237179491621988e-05, "loss": 0.0759, "step": 14317 }, { "epoch": 0.25356515117303546, "grad_norm": 0.875106155872345, "learning_rate": 2.6236609562717604e-05, "loss": 0.0832, "step": 14318 }, { "epoch": 0.25358286071006386, "grad_norm": 0.8062645792961121, "learning_rate": 2.623603959684591e-05, "loss": 0.1355, "step": 14319 }, { "epoch": 0.2536005702470923, "grad_norm": 0.8253330588340759, "learning_rate": 2.6235469594008785e-05, "loss": 0.1003, "step": 14320 }, { "epoch": 0.25361827978412077, "grad_norm": 0.9098450541496277, "learning_rate": 2.62348995542081e-05, "loss": 0.1111, "step": 14321 }, { "epoch": 0.25363598932114917, "grad_norm": 0.6984755396842957, "learning_rate": 2.623432947744573e-05, "loss": 0.0967, "step": 14322 }, { "epoch": 0.2536536988581776, "grad_norm": 0.9453076720237732, "learning_rate": 2.6233759363723555e-05, "loss": 0.0807, "step": 14323 }, { "epoch": 0.253671408395206, "grad_norm": 0.7996214628219604, "learning_rate": 2.6233189213043445e-05, "loss": 0.1152, "step": 14324 }, { "epoch": 0.2536891179322345, "grad_norm": 0.8579462766647339, "learning_rate": 2.6232619025407284e-05, "loss": 0.1217, "step": 14325 }, { "epoch": 0.2537068274692629, "grad_norm": 0.7380434274673462, "learning_rate": 2.6232048800816936e-05, "loss": 0.0906, "step": 14326 }, { "epoch": 0.25372453700629133, "grad_norm": 0.5573265552520752, "learning_rate": 2.6231478539274288e-05, "loss": 0.0571, "step": 14327 }, { "epoch": 0.2537422465433197, "grad_norm": 0.9206830859184265, "learning_rate": 2.6230908240781206e-05, "loss": 0.1002, "step": 14328 }, { "epoch": 0.2537599560803482, "grad_norm": 1.1982035636901855, "learning_rate": 2.6230337905339578e-05, "loss": 0.126, "step": 14329 }, { "epoch": 0.2537776656173766, "grad_norm": 0.5599522590637207, "learning_rate": 2.622976753295127e-05, "loss": 0.115, "step": 14330 }, { "epoch": 0.25379537515440503, "grad_norm": 0.5855094194412231, "learning_rate": 2.6229197123618168e-05, "loss": 0.0913, "step": 14331 }, { "epoch": 0.25381308469143343, "grad_norm": 0.6922001242637634, "learning_rate": 2.622862667734214e-05, "loss": 0.0696, "step": 14332 }, { "epoch": 0.2538307942284619, "grad_norm": 0.8990504741668701, "learning_rate": 2.6228056194125064e-05, "loss": 0.1011, "step": 14333 }, { "epoch": 0.2538485037654903, "grad_norm": 1.35502290725708, "learning_rate": 2.622748567396882e-05, "loss": 0.1006, "step": 14334 }, { "epoch": 0.25386621330251874, "grad_norm": 0.5014113783836365, "learning_rate": 2.622691511687529e-05, "loss": 0.0843, "step": 14335 }, { "epoch": 0.2538839228395472, "grad_norm": 0.9737098217010498, "learning_rate": 2.6226344522846334e-05, "loss": 0.1175, "step": 14336 }, { "epoch": 0.2539016323765756, "grad_norm": 0.5990156531333923, "learning_rate": 2.6225773891883845e-05, "loss": 0.1225, "step": 14337 }, { "epoch": 0.25391934191360405, "grad_norm": 0.6468972563743591, "learning_rate": 2.6225203223989693e-05, "loss": 0.0681, "step": 14338 }, { "epoch": 0.25393705145063244, "grad_norm": 0.9519240856170654, "learning_rate": 2.6224632519165766e-05, "loss": 0.1153, "step": 14339 }, { "epoch": 0.2539547609876609, "grad_norm": 0.6751115322113037, "learning_rate": 2.6224061777413925e-05, "loss": 0.1037, "step": 14340 }, { "epoch": 0.2539724705246893, "grad_norm": 0.7684540748596191, "learning_rate": 2.622349099873606e-05, "loss": 0.1074, "step": 14341 }, { "epoch": 0.25399018006171775, "grad_norm": 1.021939754486084, "learning_rate": 2.622292018313404e-05, "loss": 0.0823, "step": 14342 }, { "epoch": 0.25400788959874615, "grad_norm": 0.45953476428985596, "learning_rate": 2.622234933060975e-05, "loss": 0.0773, "step": 14343 }, { "epoch": 0.2540255991357746, "grad_norm": 0.895057737827301, "learning_rate": 2.6221778441165073e-05, "loss": 0.1142, "step": 14344 }, { "epoch": 0.254043308672803, "grad_norm": 0.503370463848114, "learning_rate": 2.622120751480187e-05, "loss": 0.1109, "step": 14345 }, { "epoch": 0.25406101820983146, "grad_norm": 0.8306543231010437, "learning_rate": 2.6220636551522033e-05, "loss": 0.0924, "step": 14346 }, { "epoch": 0.25407872774685986, "grad_norm": 1.3971893787384033, "learning_rate": 2.6220065551327438e-05, "loss": 0.0963, "step": 14347 }, { "epoch": 0.2540964372838883, "grad_norm": 0.7835435271263123, "learning_rate": 2.621949451421996e-05, "loss": 0.1003, "step": 14348 }, { "epoch": 0.2541141468209167, "grad_norm": 0.8711139559745789, "learning_rate": 2.621892344020148e-05, "loss": 0.0943, "step": 14349 }, { "epoch": 0.25413185635794516, "grad_norm": 1.2040338516235352, "learning_rate": 2.621835232927388e-05, "loss": 0.0908, "step": 14350 }, { "epoch": 0.2541495658949736, "grad_norm": 0.7502666115760803, "learning_rate": 2.621778118143903e-05, "loss": 0.0982, "step": 14351 }, { "epoch": 0.254167275432002, "grad_norm": 0.9882212281227112, "learning_rate": 2.621720999669882e-05, "loss": 0.0881, "step": 14352 }, { "epoch": 0.25418498496903047, "grad_norm": 0.5301265120506287, "learning_rate": 2.621663877505512e-05, "loss": 0.0927, "step": 14353 }, { "epoch": 0.25420269450605887, "grad_norm": 0.6447235345840454, "learning_rate": 2.6216067516509813e-05, "loss": 0.0862, "step": 14354 }, { "epoch": 0.2542204040430873, "grad_norm": 0.8827106952667236, "learning_rate": 2.6215496221064777e-05, "loss": 0.0868, "step": 14355 }, { "epoch": 0.2542381135801157, "grad_norm": 0.5123857855796814, "learning_rate": 2.6214924888721897e-05, "loss": 0.075, "step": 14356 }, { "epoch": 0.2542558231171442, "grad_norm": 1.0759260654449463, "learning_rate": 2.6214353519483046e-05, "loss": 0.0998, "step": 14357 }, { "epoch": 0.2542735326541726, "grad_norm": 0.7185155153274536, "learning_rate": 2.621378211335011e-05, "loss": 0.1219, "step": 14358 }, { "epoch": 0.25429124219120103, "grad_norm": 0.5203055143356323, "learning_rate": 2.6213210670324957e-05, "loss": 0.0608, "step": 14359 }, { "epoch": 0.2543089517282294, "grad_norm": 0.8755739331245422, "learning_rate": 2.621263919040948e-05, "loss": 0.0887, "step": 14360 }, { "epoch": 0.2543266612652579, "grad_norm": 0.4380987882614136, "learning_rate": 2.6212067673605556e-05, "loss": 0.1109, "step": 14361 }, { "epoch": 0.2543443708022863, "grad_norm": 1.1752086877822876, "learning_rate": 2.621149611991506e-05, "loss": 0.1813, "step": 14362 }, { "epoch": 0.25436208033931473, "grad_norm": 1.1911593675613403, "learning_rate": 2.6210924529339877e-05, "loss": 0.1079, "step": 14363 }, { "epoch": 0.25437978987634313, "grad_norm": 0.9392803907394409, "learning_rate": 2.6210352901881886e-05, "loss": 0.1079, "step": 14364 }, { "epoch": 0.2543974994133716, "grad_norm": 1.1002843379974365, "learning_rate": 2.620978123754297e-05, "loss": 0.0909, "step": 14365 }, { "epoch": 0.25441520895040004, "grad_norm": 1.0785406827926636, "learning_rate": 2.6209209536325004e-05, "loss": 0.0961, "step": 14366 }, { "epoch": 0.25443291848742844, "grad_norm": 0.8265839219093323, "learning_rate": 2.6208637798229876e-05, "loss": 0.1015, "step": 14367 }, { "epoch": 0.2544506280244569, "grad_norm": 0.96113520860672, "learning_rate": 2.6208066023259458e-05, "loss": 0.1253, "step": 14368 }, { "epoch": 0.2544683375614853, "grad_norm": 1.0865962505340576, "learning_rate": 2.620749421141564e-05, "loss": 0.1598, "step": 14369 }, { "epoch": 0.25448604709851375, "grad_norm": 0.7321788668632507, "learning_rate": 2.62069223627003e-05, "loss": 0.0507, "step": 14370 }, { "epoch": 0.25450375663554214, "grad_norm": 0.4722925126552582, "learning_rate": 2.620635047711532e-05, "loss": 0.1116, "step": 14371 }, { "epoch": 0.2545214661725706, "grad_norm": 0.9246276617050171, "learning_rate": 2.6205778554662577e-05, "loss": 0.082, "step": 14372 }, { "epoch": 0.254539175709599, "grad_norm": 0.6986244916915894, "learning_rate": 2.620520659534396e-05, "loss": 0.0722, "step": 14373 }, { "epoch": 0.25455688524662745, "grad_norm": 1.410040020942688, "learning_rate": 2.6204634599161338e-05, "loss": 0.1033, "step": 14374 }, { "epoch": 0.25457459478365585, "grad_norm": 0.5320760011672974, "learning_rate": 2.620406256611661e-05, "loss": 0.0732, "step": 14375 }, { "epoch": 0.2545923043206843, "grad_norm": 0.648971676826477, "learning_rate": 2.6203490496211647e-05, "loss": 0.0641, "step": 14376 }, { "epoch": 0.2546100138577127, "grad_norm": 1.008517861366272, "learning_rate": 2.6202918389448336e-05, "loss": 0.0857, "step": 14377 }, { "epoch": 0.25462772339474116, "grad_norm": 0.6784179210662842, "learning_rate": 2.6202346245828552e-05, "loss": 0.0748, "step": 14378 }, { "epoch": 0.25464543293176956, "grad_norm": 0.5530433058738708, "learning_rate": 2.6201774065354185e-05, "loss": 0.0751, "step": 14379 }, { "epoch": 0.254663142468798, "grad_norm": 1.0027433633804321, "learning_rate": 2.6201201848027112e-05, "loss": 0.101, "step": 14380 }, { "epoch": 0.25468085200582646, "grad_norm": 0.9138604998588562, "learning_rate": 2.6200629593849223e-05, "loss": 0.1099, "step": 14381 }, { "epoch": 0.25469856154285486, "grad_norm": 1.1596838235855103, "learning_rate": 2.6200057302822386e-05, "loss": 0.0975, "step": 14382 }, { "epoch": 0.2547162710798833, "grad_norm": 1.2963814735412598, "learning_rate": 2.61994849749485e-05, "loss": 0.1219, "step": 14383 }, { "epoch": 0.2547339806169117, "grad_norm": 0.8352892398834229, "learning_rate": 2.619891261022944e-05, "loss": 0.0866, "step": 14384 }, { "epoch": 0.25475169015394017, "grad_norm": 0.8322521448135376, "learning_rate": 2.619834020866709e-05, "loss": 0.122, "step": 14385 }, { "epoch": 0.25476939969096857, "grad_norm": 0.6625901460647583, "learning_rate": 2.619776777026333e-05, "loss": 0.0698, "step": 14386 }, { "epoch": 0.254787109227997, "grad_norm": 0.8113823533058167, "learning_rate": 2.6197195295020054e-05, "loss": 0.09, "step": 14387 }, { "epoch": 0.2548048187650254, "grad_norm": 0.8979138135910034, "learning_rate": 2.6196622782939136e-05, "loss": 0.112, "step": 14388 }, { "epoch": 0.2548225283020539, "grad_norm": 0.7459979057312012, "learning_rate": 2.6196050234022462e-05, "loss": 0.1124, "step": 14389 }, { "epoch": 0.2548402378390823, "grad_norm": 1.4390544891357422, "learning_rate": 2.619547764827191e-05, "loss": 0.1032, "step": 14390 }, { "epoch": 0.2548579473761107, "grad_norm": 1.352114200592041, "learning_rate": 2.619490502568937e-05, "loss": 0.1058, "step": 14391 }, { "epoch": 0.2548756569131391, "grad_norm": 0.9371317028999329, "learning_rate": 2.6194332366276726e-05, "loss": 0.0764, "step": 14392 }, { "epoch": 0.2548933664501676, "grad_norm": 1.1856684684753418, "learning_rate": 2.6193759670035863e-05, "loss": 0.1055, "step": 14393 }, { "epoch": 0.254911075987196, "grad_norm": 1.1089937686920166, "learning_rate": 2.6193186936968663e-05, "loss": 0.1034, "step": 14394 }, { "epoch": 0.25492878552422443, "grad_norm": 0.6340092420578003, "learning_rate": 2.619261416707701e-05, "loss": 0.0952, "step": 14395 }, { "epoch": 0.2549464950612529, "grad_norm": 0.6688147187232971, "learning_rate": 2.6192041360362786e-05, "loss": 0.1007, "step": 14396 }, { "epoch": 0.2549642045982813, "grad_norm": 0.8091140389442444, "learning_rate": 2.6191468516827873e-05, "loss": 0.0975, "step": 14397 }, { "epoch": 0.25498191413530974, "grad_norm": 0.7439177632331848, "learning_rate": 2.6190895636474173e-05, "loss": 0.1145, "step": 14398 }, { "epoch": 0.25499962367233814, "grad_norm": 0.9276675581932068, "learning_rate": 2.619032271930355e-05, "loss": 0.0792, "step": 14399 }, { "epoch": 0.2550173332093666, "grad_norm": 1.1068406105041504, "learning_rate": 2.61897497653179e-05, "loss": 0.1034, "step": 14400 }, { "epoch": 0.255035042746395, "grad_norm": 1.2240731716156006, "learning_rate": 2.61891767745191e-05, "loss": 0.1029, "step": 14401 }, { "epoch": 0.25505275228342345, "grad_norm": 1.1592466831207275, "learning_rate": 2.6188603746909045e-05, "loss": 0.1095, "step": 14402 }, { "epoch": 0.25507046182045184, "grad_norm": 0.6880576610565186, "learning_rate": 2.6188030682489617e-05, "loss": 0.1089, "step": 14403 }, { "epoch": 0.2550881713574803, "grad_norm": 0.581095814704895, "learning_rate": 2.6187457581262694e-05, "loss": 0.0808, "step": 14404 }, { "epoch": 0.2551058808945087, "grad_norm": 0.9726307392120361, "learning_rate": 2.6186884443230167e-05, "loss": 0.1061, "step": 14405 }, { "epoch": 0.25512359043153715, "grad_norm": 0.6802576184272766, "learning_rate": 2.6186311268393926e-05, "loss": 0.0829, "step": 14406 }, { "epoch": 0.25514129996856555, "grad_norm": 1.1219708919525146, "learning_rate": 2.6185738056755852e-05, "loss": 0.1064, "step": 14407 }, { "epoch": 0.255159009505594, "grad_norm": 0.928787112236023, "learning_rate": 2.618516480831783e-05, "loss": 0.0782, "step": 14408 }, { "epoch": 0.2551767190426224, "grad_norm": 0.8214039206504822, "learning_rate": 2.6184591523081746e-05, "loss": 0.0859, "step": 14409 }, { "epoch": 0.25519442857965086, "grad_norm": 1.1075680255889893, "learning_rate": 2.618401820104949e-05, "loss": 0.1181, "step": 14410 }, { "epoch": 0.2552121381166793, "grad_norm": 0.9947013854980469, "learning_rate": 2.6183444842222943e-05, "loss": 0.0849, "step": 14411 }, { "epoch": 0.2552298476537077, "grad_norm": 0.7879453897476196, "learning_rate": 2.6182871446603993e-05, "loss": 0.1057, "step": 14412 }, { "epoch": 0.25524755719073616, "grad_norm": 0.9265432357788086, "learning_rate": 2.618229801419453e-05, "loss": 0.0961, "step": 14413 }, { "epoch": 0.25526526672776456, "grad_norm": 1.3383053541183472, "learning_rate": 2.6181724544996434e-05, "loss": 0.0721, "step": 14414 }, { "epoch": 0.255282976264793, "grad_norm": 0.8745623826980591, "learning_rate": 2.61811510390116e-05, "loss": 0.1539, "step": 14415 }, { "epoch": 0.2553006858018214, "grad_norm": 0.5734396576881409, "learning_rate": 2.6180577496241904e-05, "loss": 0.1059, "step": 14416 }, { "epoch": 0.25531839533884987, "grad_norm": 1.4378763437271118, "learning_rate": 2.6180003916689242e-05, "loss": 0.1382, "step": 14417 }, { "epoch": 0.25533610487587827, "grad_norm": 1.6903845071792603, "learning_rate": 2.61794303003555e-05, "loss": 0.0911, "step": 14418 }, { "epoch": 0.2553538144129067, "grad_norm": 1.6138018369674683, "learning_rate": 2.6178856647242558e-05, "loss": 0.1086, "step": 14419 }, { "epoch": 0.2553715239499351, "grad_norm": 2.0858747959136963, "learning_rate": 2.617828295735231e-05, "loss": 0.1183, "step": 14420 }, { "epoch": 0.2553892334869636, "grad_norm": 0.9235548973083496, "learning_rate": 2.6177709230686645e-05, "loss": 0.1194, "step": 14421 }, { "epoch": 0.255406943023992, "grad_norm": 0.9342338442802429, "learning_rate": 2.6177135467247446e-05, "loss": 0.111, "step": 14422 }, { "epoch": 0.2554246525610204, "grad_norm": 0.9807782769203186, "learning_rate": 2.6176561667036602e-05, "loss": 0.0977, "step": 14423 }, { "epoch": 0.2554423620980488, "grad_norm": 1.0659071207046509, "learning_rate": 2.6175987830055997e-05, "loss": 0.0925, "step": 14424 }, { "epoch": 0.2554600716350773, "grad_norm": 0.6718698740005493, "learning_rate": 2.6175413956307525e-05, "loss": 0.1025, "step": 14425 }, { "epoch": 0.25547778117210573, "grad_norm": 1.1291297674179077, "learning_rate": 2.617484004579307e-05, "loss": 0.1913, "step": 14426 }, { "epoch": 0.25549549070913413, "grad_norm": 0.9758081436157227, "learning_rate": 2.6174266098514524e-05, "loss": 0.0717, "step": 14427 }, { "epoch": 0.2555132002461626, "grad_norm": 1.2057723999023438, "learning_rate": 2.6173692114473773e-05, "loss": 0.0755, "step": 14428 }, { "epoch": 0.255530909783191, "grad_norm": 1.0433604717254639, "learning_rate": 2.6173118093672704e-05, "loss": 0.1083, "step": 14429 }, { "epoch": 0.25554861932021944, "grad_norm": 0.9497156143188477, "learning_rate": 2.6172544036113207e-05, "loss": 0.1428, "step": 14430 }, { "epoch": 0.25556632885724784, "grad_norm": 0.8843218088150024, "learning_rate": 2.6171969941797165e-05, "loss": 0.0742, "step": 14431 }, { "epoch": 0.2555840383942763, "grad_norm": 0.8832065463066101, "learning_rate": 2.6171395810726477e-05, "loss": 0.0688, "step": 14432 }, { "epoch": 0.2556017479313047, "grad_norm": 1.163683295249939, "learning_rate": 2.6170821642903026e-05, "loss": 0.1349, "step": 14433 }, { "epoch": 0.25561945746833314, "grad_norm": 0.9060238003730774, "learning_rate": 2.6170247438328702e-05, "loss": 0.1007, "step": 14434 }, { "epoch": 0.25563716700536154, "grad_norm": 0.5588817000389099, "learning_rate": 2.616967319700539e-05, "loss": 0.0949, "step": 14435 }, { "epoch": 0.25565487654239, "grad_norm": 0.8823073506355286, "learning_rate": 2.616909891893499e-05, "loss": 0.0968, "step": 14436 }, { "epoch": 0.2556725860794184, "grad_norm": 0.8908374905586243, "learning_rate": 2.6168524604119383e-05, "loss": 0.104, "step": 14437 }, { "epoch": 0.25569029561644685, "grad_norm": 0.6790704727172852, "learning_rate": 2.6167950252560455e-05, "loss": 0.1083, "step": 14438 }, { "epoch": 0.25570800515347525, "grad_norm": 0.6798839569091797, "learning_rate": 2.6167375864260098e-05, "loss": 0.1045, "step": 14439 }, { "epoch": 0.2557257146905037, "grad_norm": 0.6509383320808411, "learning_rate": 2.616680143922021e-05, "loss": 0.1136, "step": 14440 }, { "epoch": 0.25574342422753216, "grad_norm": 1.0242550373077393, "learning_rate": 2.616622697744267e-05, "loss": 0.125, "step": 14441 }, { "epoch": 0.25576113376456056, "grad_norm": 0.9953365325927734, "learning_rate": 2.616565247892938e-05, "loss": 0.1138, "step": 14442 }, { "epoch": 0.255778843301589, "grad_norm": 0.87687087059021, "learning_rate": 2.6165077943682217e-05, "loss": 0.0785, "step": 14443 }, { "epoch": 0.2557965528386174, "grad_norm": 0.4663150906562805, "learning_rate": 2.616450337170308e-05, "loss": 0.0788, "step": 14444 }, { "epoch": 0.25581426237564586, "grad_norm": 1.0185960531234741, "learning_rate": 2.6163928762993854e-05, "loss": 0.0944, "step": 14445 }, { "epoch": 0.25583197191267426, "grad_norm": 1.740257978439331, "learning_rate": 2.6163354117556428e-05, "loss": 0.0765, "step": 14446 }, { "epoch": 0.2558496814497027, "grad_norm": 0.5914489030838013, "learning_rate": 2.6162779435392704e-05, "loss": 0.0975, "step": 14447 }, { "epoch": 0.2558673909867311, "grad_norm": 1.3217687606811523, "learning_rate": 2.616220471650456e-05, "loss": 0.1154, "step": 14448 }, { "epoch": 0.25588510052375957, "grad_norm": 1.1125965118408203, "learning_rate": 2.616162996089389e-05, "loss": 0.1405, "step": 14449 }, { "epoch": 0.25590281006078797, "grad_norm": 0.8184202313423157, "learning_rate": 2.616105516856259e-05, "loss": 0.1081, "step": 14450 }, { "epoch": 0.2559205195978164, "grad_norm": 0.8598121404647827, "learning_rate": 2.6160480339512547e-05, "loss": 0.0656, "step": 14451 }, { "epoch": 0.2559382291348448, "grad_norm": 0.846259593963623, "learning_rate": 2.615990547374565e-05, "loss": 0.1085, "step": 14452 }, { "epoch": 0.2559559386718733, "grad_norm": 0.801282525062561, "learning_rate": 2.6159330571263794e-05, "loss": 0.1061, "step": 14453 }, { "epoch": 0.25597364820890167, "grad_norm": 0.599215567111969, "learning_rate": 2.6158755632068868e-05, "loss": 0.076, "step": 14454 }, { "epoch": 0.2559913577459301, "grad_norm": 0.8809436559677124, "learning_rate": 2.6158180656162768e-05, "loss": 0.1103, "step": 14455 }, { "epoch": 0.2560090672829586, "grad_norm": 1.0573817491531372, "learning_rate": 2.615760564354738e-05, "loss": 0.098, "step": 14456 }, { "epoch": 0.256026776819987, "grad_norm": 0.6678721904754639, "learning_rate": 2.6157030594224596e-05, "loss": 0.0976, "step": 14457 }, { "epoch": 0.25604448635701543, "grad_norm": 0.6622013449668884, "learning_rate": 2.6156455508196312e-05, "loss": 0.1175, "step": 14458 }, { "epoch": 0.25606219589404383, "grad_norm": 0.9074439406394958, "learning_rate": 2.6155880385464413e-05, "loss": 0.1136, "step": 14459 }, { "epoch": 0.2560799054310723, "grad_norm": 1.2673143148422241, "learning_rate": 2.6155305226030803e-05, "loss": 0.1124, "step": 14460 }, { "epoch": 0.2560976149681007, "grad_norm": 0.7316991090774536, "learning_rate": 2.6154730029897364e-05, "loss": 0.0924, "step": 14461 }, { "epoch": 0.25611532450512914, "grad_norm": 0.8225864768028259, "learning_rate": 2.6154154797065995e-05, "loss": 0.0533, "step": 14462 }, { "epoch": 0.25613303404215754, "grad_norm": 0.7556372880935669, "learning_rate": 2.6153579527538583e-05, "loss": 0.0845, "step": 14463 }, { "epoch": 0.256150743579186, "grad_norm": 0.6649323105812073, "learning_rate": 2.615300422131702e-05, "loss": 0.1138, "step": 14464 }, { "epoch": 0.2561684531162144, "grad_norm": 0.6239635348320007, "learning_rate": 2.6152428878403203e-05, "loss": 0.0715, "step": 14465 }, { "epoch": 0.25618616265324284, "grad_norm": 0.8163975477218628, "learning_rate": 2.615185349879902e-05, "loss": 0.1019, "step": 14466 }, { "epoch": 0.25620387219027124, "grad_norm": 1.6634716987609863, "learning_rate": 2.615127808250637e-05, "loss": 0.1685, "step": 14467 }, { "epoch": 0.2562215817272997, "grad_norm": 0.9294646382331848, "learning_rate": 2.6150702629527147e-05, "loss": 0.1011, "step": 14468 }, { "epoch": 0.2562392912643281, "grad_norm": 1.0952212810516357, "learning_rate": 2.6150127139863228e-05, "loss": 0.0976, "step": 14469 }, { "epoch": 0.25625700080135655, "grad_norm": 0.7457708120346069, "learning_rate": 2.614955161351653e-05, "loss": 0.0992, "step": 14470 }, { "epoch": 0.256274710338385, "grad_norm": 1.2681711912155151, "learning_rate": 2.614897605048893e-05, "loss": 0.0681, "step": 14471 }, { "epoch": 0.2562924198754134, "grad_norm": 0.8097746968269348, "learning_rate": 2.6148400450782327e-05, "loss": 0.0986, "step": 14472 }, { "epoch": 0.25631012941244186, "grad_norm": 0.926173985004425, "learning_rate": 2.6147824814398614e-05, "loss": 0.0707, "step": 14473 }, { "epoch": 0.25632783894947025, "grad_norm": 0.605759859085083, "learning_rate": 2.6147249141339685e-05, "loss": 0.0828, "step": 14474 }, { "epoch": 0.2563455484864987, "grad_norm": 1.1390148401260376, "learning_rate": 2.614667343160744e-05, "loss": 0.1275, "step": 14475 }, { "epoch": 0.2563632580235271, "grad_norm": 1.0585085153579712, "learning_rate": 2.6146097685203757e-05, "loss": 0.1269, "step": 14476 }, { "epoch": 0.25638096756055556, "grad_norm": 0.618704080581665, "learning_rate": 2.6145521902130546e-05, "loss": 0.1369, "step": 14477 }, { "epoch": 0.25639867709758396, "grad_norm": 0.7727486491203308, "learning_rate": 2.614494608238969e-05, "loss": 0.0705, "step": 14478 }, { "epoch": 0.2564163866346124, "grad_norm": 0.49836742877960205, "learning_rate": 2.614437022598309e-05, "loss": 0.0891, "step": 14479 }, { "epoch": 0.2564340961716408, "grad_norm": 0.8351813554763794, "learning_rate": 2.6143794332912645e-05, "loss": 0.1076, "step": 14480 }, { "epoch": 0.25645180570866927, "grad_norm": 0.4789910316467285, "learning_rate": 2.6143218403180236e-05, "loss": 0.0646, "step": 14481 }, { "epoch": 0.25646951524569767, "grad_norm": 0.9249416589736938, "learning_rate": 2.6142642436787773e-05, "loss": 0.1198, "step": 14482 }, { "epoch": 0.2564872247827261, "grad_norm": 1.440883755683899, "learning_rate": 2.6142066433737138e-05, "loss": 0.1272, "step": 14483 }, { "epoch": 0.2565049343197545, "grad_norm": 0.713736355304718, "learning_rate": 2.6141490394030233e-05, "loss": 0.1119, "step": 14484 }, { "epoch": 0.256522643856783, "grad_norm": 1.0330086946487427, "learning_rate": 2.6140914317668954e-05, "loss": 0.1, "step": 14485 }, { "epoch": 0.2565403533938114, "grad_norm": 0.595047116279602, "learning_rate": 2.6140338204655186e-05, "loss": 0.0785, "step": 14486 }, { "epoch": 0.2565580629308398, "grad_norm": 1.355532169342041, "learning_rate": 2.613976205499084e-05, "loss": 0.1524, "step": 14487 }, { "epoch": 0.2565757724678683, "grad_norm": 0.7902871370315552, "learning_rate": 2.61391858686778e-05, "loss": 0.1072, "step": 14488 }, { "epoch": 0.2565934820048967, "grad_norm": 1.2957031726837158, "learning_rate": 2.6138609645717968e-05, "loss": 0.1001, "step": 14489 }, { "epoch": 0.25661119154192513, "grad_norm": 1.0259935855865479, "learning_rate": 2.6138033386113233e-05, "loss": 0.0931, "step": 14490 }, { "epoch": 0.25662890107895353, "grad_norm": 1.4664448499679565, "learning_rate": 2.6137457089865497e-05, "loss": 0.1212, "step": 14491 }, { "epoch": 0.256646610615982, "grad_norm": 0.5085070729255676, "learning_rate": 2.6136880756976654e-05, "loss": 0.1144, "step": 14492 }, { "epoch": 0.2566643201530104, "grad_norm": 0.7653972506523132, "learning_rate": 2.61363043874486e-05, "loss": 0.1173, "step": 14493 }, { "epoch": 0.25668202969003884, "grad_norm": 0.9194136261940002, "learning_rate": 2.613572798128323e-05, "loss": 0.0934, "step": 14494 }, { "epoch": 0.25669973922706724, "grad_norm": 0.7789939641952515, "learning_rate": 2.613515153848244e-05, "loss": 0.0915, "step": 14495 }, { "epoch": 0.2567174487640957, "grad_norm": 0.9896889328956604, "learning_rate": 2.6134575059048127e-05, "loss": 0.1356, "step": 14496 }, { "epoch": 0.2567351583011241, "grad_norm": 0.8372886180877686, "learning_rate": 2.6133998542982192e-05, "loss": 0.1201, "step": 14497 }, { "epoch": 0.25675286783815254, "grad_norm": 0.703517735004425, "learning_rate": 2.6133421990286527e-05, "loss": 0.0904, "step": 14498 }, { "epoch": 0.25677057737518094, "grad_norm": 0.8081948757171631, "learning_rate": 2.6132845400963027e-05, "loss": 0.0815, "step": 14499 }, { "epoch": 0.2567882869122094, "grad_norm": 0.5068190097808838, "learning_rate": 2.613226877501359e-05, "loss": 0.0919, "step": 14500 }, { "epoch": 0.25680599644923785, "grad_norm": 0.7760812640190125, "learning_rate": 2.6131692112440124e-05, "loss": 0.1068, "step": 14501 }, { "epoch": 0.25682370598626625, "grad_norm": 0.8747244477272034, "learning_rate": 2.6131115413244507e-05, "loss": 0.0951, "step": 14502 }, { "epoch": 0.2568414155232947, "grad_norm": 0.7152943015098572, "learning_rate": 2.6130538677428654e-05, "loss": 0.0693, "step": 14503 }, { "epoch": 0.2568591250603231, "grad_norm": 1.2379100322723389, "learning_rate": 2.6129961904994452e-05, "loss": 0.1028, "step": 14504 }, { "epoch": 0.25687683459735156, "grad_norm": 1.0284446477890015, "learning_rate": 2.6129385095943798e-05, "loss": 0.1048, "step": 14505 }, { "epoch": 0.25689454413437995, "grad_norm": 0.962235689163208, "learning_rate": 2.6128808250278593e-05, "loss": 0.1268, "step": 14506 }, { "epoch": 0.2569122536714084, "grad_norm": 0.9772830605506897, "learning_rate": 2.6128231368000738e-05, "loss": 0.0946, "step": 14507 }, { "epoch": 0.2569299632084368, "grad_norm": 0.6512486934661865, "learning_rate": 2.6127654449112123e-05, "loss": 0.0664, "step": 14508 }, { "epoch": 0.25694767274546526, "grad_norm": 0.9941226840019226, "learning_rate": 2.6127077493614653e-05, "loss": 0.0992, "step": 14509 }, { "epoch": 0.25696538228249366, "grad_norm": 0.7727907299995422, "learning_rate": 2.6126500501510225e-05, "loss": 0.1162, "step": 14510 }, { "epoch": 0.2569830918195221, "grad_norm": 0.8189834952354431, "learning_rate": 2.6125923472800733e-05, "loss": 0.0993, "step": 14511 }, { "epoch": 0.2570008013565505, "grad_norm": 0.8942825198173523, "learning_rate": 2.6125346407488077e-05, "loss": 0.0976, "step": 14512 }, { "epoch": 0.25701851089357897, "grad_norm": 0.7748973369598389, "learning_rate": 2.6124769305574162e-05, "loss": 0.1028, "step": 14513 }, { "epoch": 0.2570362204306074, "grad_norm": 1.028701663017273, "learning_rate": 2.612419216706088e-05, "loss": 0.1482, "step": 14514 }, { "epoch": 0.2570539299676358, "grad_norm": 1.0228039026260376, "learning_rate": 2.6123614991950127e-05, "loss": 0.0858, "step": 14515 }, { "epoch": 0.2570716395046643, "grad_norm": 0.8856015205383301, "learning_rate": 2.6123037780243808e-05, "loss": 0.0997, "step": 14516 }, { "epoch": 0.25708934904169267, "grad_norm": 1.1298080682754517, "learning_rate": 2.6122460531943816e-05, "loss": 0.066, "step": 14517 }, { "epoch": 0.2571070585787211, "grad_norm": 0.9338575601577759, "learning_rate": 2.6121883247052058e-05, "loss": 0.0966, "step": 14518 }, { "epoch": 0.2571247681157495, "grad_norm": 0.8252156972885132, "learning_rate": 2.6121305925570425e-05, "loss": 0.1083, "step": 14519 }, { "epoch": 0.257142477652778, "grad_norm": 1.337098479270935, "learning_rate": 2.6120728567500825e-05, "loss": 0.1348, "step": 14520 }, { "epoch": 0.2571601871898064, "grad_norm": 0.8419858813285828, "learning_rate": 2.612015117284515e-05, "loss": 0.051, "step": 14521 }, { "epoch": 0.25717789672683483, "grad_norm": 0.36328110098838806, "learning_rate": 2.6119573741605307e-05, "loss": 0.0499, "step": 14522 }, { "epoch": 0.25719560626386323, "grad_norm": 0.7420661449432373, "learning_rate": 2.6118996273783187e-05, "loss": 0.1257, "step": 14523 }, { "epoch": 0.2572133158008917, "grad_norm": 1.0088609457015991, "learning_rate": 2.6118418769380693e-05, "loss": 0.1242, "step": 14524 }, { "epoch": 0.2572310253379201, "grad_norm": 1.4547924995422363, "learning_rate": 2.611784122839973e-05, "loss": 0.1155, "step": 14525 }, { "epoch": 0.25724873487494854, "grad_norm": 0.9108176231384277, "learning_rate": 2.6117263650842187e-05, "loss": 0.1431, "step": 14526 }, { "epoch": 0.25726644441197694, "grad_norm": 1.5240323543548584, "learning_rate": 2.6116686036709976e-05, "loss": 0.138, "step": 14527 }, { "epoch": 0.2572841539490054, "grad_norm": 1.1777660846710205, "learning_rate": 2.6116108386004994e-05, "loss": 0.0959, "step": 14528 }, { "epoch": 0.25730186348603384, "grad_norm": 0.9065782427787781, "learning_rate": 2.6115530698729136e-05, "loss": 0.0888, "step": 14529 }, { "epoch": 0.25731957302306224, "grad_norm": 0.6293371915817261, "learning_rate": 2.611495297488431e-05, "loss": 0.0773, "step": 14530 }, { "epoch": 0.2573372825600907, "grad_norm": 1.2105895280838013, "learning_rate": 2.611437521447241e-05, "loss": 0.0797, "step": 14531 }, { "epoch": 0.2573549920971191, "grad_norm": 1.5136245489120483, "learning_rate": 2.611379741749534e-05, "loss": 0.1347, "step": 14532 }, { "epoch": 0.25737270163414755, "grad_norm": 0.5341486930847168, "learning_rate": 2.6113219583955e-05, "loss": 0.1017, "step": 14533 }, { "epoch": 0.25739041117117595, "grad_norm": 0.5646679401397705, "learning_rate": 2.6112641713853293e-05, "loss": 0.0562, "step": 14534 }, { "epoch": 0.2574081207082044, "grad_norm": 0.8091954588890076, "learning_rate": 2.6112063807192115e-05, "loss": 0.0986, "step": 14535 }, { "epoch": 0.2574258302452328, "grad_norm": 0.7252422571182251, "learning_rate": 2.6111485863973374e-05, "loss": 0.0908, "step": 14536 }, { "epoch": 0.25744353978226125, "grad_norm": 0.6975424289703369, "learning_rate": 2.6110907884198968e-05, "loss": 0.0917, "step": 14537 }, { "epoch": 0.25746124931928965, "grad_norm": 1.4465546607971191, "learning_rate": 2.61103298678708e-05, "loss": 0.1287, "step": 14538 }, { "epoch": 0.2574789588563181, "grad_norm": 0.4531228840351105, "learning_rate": 2.6109751814990773e-05, "loss": 0.0637, "step": 14539 }, { "epoch": 0.2574966683933465, "grad_norm": 1.0215750932693481, "learning_rate": 2.610917372556078e-05, "loss": 0.1227, "step": 14540 }, { "epoch": 0.25751437793037496, "grad_norm": 0.7345936894416809, "learning_rate": 2.610859559958273e-05, "loss": 0.0741, "step": 14541 }, { "epoch": 0.25753208746740336, "grad_norm": 0.8511857986450195, "learning_rate": 2.6108017437058526e-05, "loss": 0.1234, "step": 14542 }, { "epoch": 0.2575497970044318, "grad_norm": 1.4829403162002563, "learning_rate": 2.6107439237990068e-05, "loss": 0.0944, "step": 14543 }, { "epoch": 0.25756750654146027, "grad_norm": 0.9267546534538269, "learning_rate": 2.6106861002379264e-05, "loss": 0.1095, "step": 14544 }, { "epoch": 0.25758521607848867, "grad_norm": 0.7976185083389282, "learning_rate": 2.6106282730228004e-05, "loss": 0.1031, "step": 14545 }, { "epoch": 0.2576029256155171, "grad_norm": 0.7749096751213074, "learning_rate": 2.61057044215382e-05, "loss": 0.1051, "step": 14546 }, { "epoch": 0.2576206351525455, "grad_norm": 1.3449640274047852, "learning_rate": 2.6105126076311746e-05, "loss": 0.103, "step": 14547 }, { "epoch": 0.257638344689574, "grad_norm": 0.7286016345024109, "learning_rate": 2.6104547694550556e-05, "loss": 0.0873, "step": 14548 }, { "epoch": 0.25765605422660237, "grad_norm": 0.6262508034706116, "learning_rate": 2.6103969276256528e-05, "loss": 0.1082, "step": 14549 }, { "epoch": 0.2576737637636308, "grad_norm": 0.5656583309173584, "learning_rate": 2.610339082143156e-05, "loss": 0.098, "step": 14550 }, { "epoch": 0.2576914733006592, "grad_norm": 0.9643417000770569, "learning_rate": 2.6102812330077562e-05, "loss": 0.074, "step": 14551 }, { "epoch": 0.2577091828376877, "grad_norm": 0.9486006498336792, "learning_rate": 2.6102233802196433e-05, "loss": 0.1366, "step": 14552 }, { "epoch": 0.2577268923747161, "grad_norm": 0.8207620978355408, "learning_rate": 2.6101655237790075e-05, "loss": 0.0879, "step": 14553 }, { "epoch": 0.25774460191174453, "grad_norm": 1.281816005706787, "learning_rate": 2.61010766368604e-05, "loss": 0.0961, "step": 14554 }, { "epoch": 0.25776231144877293, "grad_norm": 0.7103471159934998, "learning_rate": 2.61004979994093e-05, "loss": 0.0906, "step": 14555 }, { "epoch": 0.2577800209858014, "grad_norm": 0.8690153956413269, "learning_rate": 2.6099919325438694e-05, "loss": 0.0787, "step": 14556 }, { "epoch": 0.2577977305228298, "grad_norm": 1.839181661605835, "learning_rate": 2.6099340614950465e-05, "loss": 0.1492, "step": 14557 }, { "epoch": 0.25781544005985824, "grad_norm": 0.7877046465873718, "learning_rate": 2.6098761867946534e-05, "loss": 0.1001, "step": 14558 }, { "epoch": 0.2578331495968867, "grad_norm": 0.7891610264778137, "learning_rate": 2.60981830844288e-05, "loss": 0.0904, "step": 14559 }, { "epoch": 0.2578508591339151, "grad_norm": 0.702551007270813, "learning_rate": 2.6097604264399163e-05, "loss": 0.0826, "step": 14560 }, { "epoch": 0.25786856867094354, "grad_norm": 0.829871654510498, "learning_rate": 2.609702540785953e-05, "loss": 0.0641, "step": 14561 }, { "epoch": 0.25788627820797194, "grad_norm": 0.8183537125587463, "learning_rate": 2.6096446514811805e-05, "loss": 0.1004, "step": 14562 }, { "epoch": 0.2579039877450004, "grad_norm": 0.9492713809013367, "learning_rate": 2.6095867585257898e-05, "loss": 0.0838, "step": 14563 }, { "epoch": 0.2579216972820288, "grad_norm": 1.1413733959197998, "learning_rate": 2.6095288619199703e-05, "loss": 0.1015, "step": 14564 }, { "epoch": 0.25793940681905725, "grad_norm": 0.8259278535842896, "learning_rate": 2.6094709616639132e-05, "loss": 0.1073, "step": 14565 }, { "epoch": 0.25795711635608565, "grad_norm": 0.9420735836029053, "learning_rate": 2.6094130577578096e-05, "loss": 0.1059, "step": 14566 }, { "epoch": 0.2579748258931141, "grad_norm": 1.0771582126617432, "learning_rate": 2.6093551502018484e-05, "loss": 0.1338, "step": 14567 }, { "epoch": 0.2579925354301425, "grad_norm": 0.6944736838340759, "learning_rate": 2.6092972389962215e-05, "loss": 0.0542, "step": 14568 }, { "epoch": 0.25801024496717095, "grad_norm": 0.3821704387664795, "learning_rate": 2.6092393241411185e-05, "loss": 0.0725, "step": 14569 }, { "epoch": 0.25802795450419935, "grad_norm": 0.946723461151123, "learning_rate": 2.6091814056367306e-05, "loss": 0.1354, "step": 14570 }, { "epoch": 0.2580456640412278, "grad_norm": 1.178022861480713, "learning_rate": 2.609123483483248e-05, "loss": 0.1032, "step": 14571 }, { "epoch": 0.2580633735782562, "grad_norm": 1.400377631187439, "learning_rate": 2.609065557680861e-05, "loss": 0.1299, "step": 14572 }, { "epoch": 0.25808108311528466, "grad_norm": 0.8393030166625977, "learning_rate": 2.609007628229761e-05, "loss": 0.1168, "step": 14573 }, { "epoch": 0.2580987926523131, "grad_norm": 1.2476568222045898, "learning_rate": 2.6089496951301375e-05, "loss": 0.1047, "step": 14574 }, { "epoch": 0.2581165021893415, "grad_norm": 0.8967838883399963, "learning_rate": 2.608891758382182e-05, "loss": 0.0941, "step": 14575 }, { "epoch": 0.25813421172636997, "grad_norm": 1.3166128396987915, "learning_rate": 2.6088338179860846e-05, "loss": 0.1344, "step": 14576 }, { "epoch": 0.25815192126339837, "grad_norm": 0.8501853942871094, "learning_rate": 2.6087758739420365e-05, "loss": 0.1104, "step": 14577 }, { "epoch": 0.2581696308004268, "grad_norm": 0.795861005783081, "learning_rate": 2.6087179262502276e-05, "loss": 0.1124, "step": 14578 }, { "epoch": 0.2581873403374552, "grad_norm": 0.7449671626091003, "learning_rate": 2.6086599749108487e-05, "loss": 0.0989, "step": 14579 }, { "epoch": 0.25820504987448367, "grad_norm": 0.9532802700996399, "learning_rate": 2.608602019924091e-05, "loss": 0.108, "step": 14580 }, { "epoch": 0.25822275941151207, "grad_norm": 1.0602864027023315, "learning_rate": 2.6085440612901442e-05, "loss": 0.1171, "step": 14581 }, { "epoch": 0.2582404689485405, "grad_norm": 0.3678090274333954, "learning_rate": 2.6084860990092003e-05, "loss": 0.0644, "step": 14582 }, { "epoch": 0.2582581784855689, "grad_norm": 0.706457257270813, "learning_rate": 2.608428133081449e-05, "loss": 0.1011, "step": 14583 }, { "epoch": 0.2582758880225974, "grad_norm": 0.7753899097442627, "learning_rate": 2.6083701635070808e-05, "loss": 0.1277, "step": 14584 }, { "epoch": 0.2582935975596258, "grad_norm": 1.1543803215026855, "learning_rate": 2.6083121902862876e-05, "loss": 0.1267, "step": 14585 }, { "epoch": 0.25831130709665423, "grad_norm": 0.71061772108078, "learning_rate": 2.6082542134192592e-05, "loss": 0.0801, "step": 14586 }, { "epoch": 0.25832901663368263, "grad_norm": 0.8593974113464355, "learning_rate": 2.6081962329061863e-05, "loss": 0.0574, "step": 14587 }, { "epoch": 0.2583467261707111, "grad_norm": 0.8706740736961365, "learning_rate": 2.60813824874726e-05, "loss": 0.1004, "step": 14588 }, { "epoch": 0.25836443570773954, "grad_norm": 1.0162205696105957, "learning_rate": 2.6080802609426705e-05, "loss": 0.0895, "step": 14589 }, { "epoch": 0.25838214524476794, "grad_norm": 0.9182543754577637, "learning_rate": 2.60802226949261e-05, "loss": 0.1198, "step": 14590 }, { "epoch": 0.2583998547817964, "grad_norm": 0.7651548981666565, "learning_rate": 2.607964274397267e-05, "loss": 0.0857, "step": 14591 }, { "epoch": 0.2584175643188248, "grad_norm": 0.7576450705528259, "learning_rate": 2.607906275656835e-05, "loss": 0.0628, "step": 14592 }, { "epoch": 0.25843527385585324, "grad_norm": 1.2832900285720825, "learning_rate": 2.6078482732715024e-05, "loss": 0.104, "step": 14593 }, { "epoch": 0.25845298339288164, "grad_norm": 1.0458348989486694, "learning_rate": 2.6077902672414617e-05, "loss": 0.1479, "step": 14594 }, { "epoch": 0.2584706929299101, "grad_norm": 1.0670559406280518, "learning_rate": 2.6077322575669024e-05, "loss": 0.1036, "step": 14595 }, { "epoch": 0.2584884024669385, "grad_norm": 1.1112624406814575, "learning_rate": 2.607674244248016e-05, "loss": 0.0937, "step": 14596 }, { "epoch": 0.25850611200396695, "grad_norm": 0.8225091099739075, "learning_rate": 2.607616227284994e-05, "loss": 0.0943, "step": 14597 }, { "epoch": 0.25852382154099535, "grad_norm": 0.7095812559127808, "learning_rate": 2.6075582066780266e-05, "loss": 0.1277, "step": 14598 }, { "epoch": 0.2585415310780238, "grad_norm": 0.5429444909095764, "learning_rate": 2.6075001824273043e-05, "loss": 0.099, "step": 14599 }, { "epoch": 0.2585592406150522, "grad_norm": 0.7061808705329895, "learning_rate": 2.6074421545330186e-05, "loss": 0.0661, "step": 14600 }, { "epoch": 0.25857695015208065, "grad_norm": 0.7627463936805725, "learning_rate": 2.6073841229953602e-05, "loss": 0.1, "step": 14601 }, { "epoch": 0.25859465968910905, "grad_norm": 0.9462427496910095, "learning_rate": 2.60732608781452e-05, "loss": 0.1324, "step": 14602 }, { "epoch": 0.2586123692261375, "grad_norm": 1.102893590927124, "learning_rate": 2.607268048990689e-05, "loss": 0.1231, "step": 14603 }, { "epoch": 0.25863007876316596, "grad_norm": 0.7279250621795654, "learning_rate": 2.607210006524058e-05, "loss": 0.0761, "step": 14604 }, { "epoch": 0.25864778830019436, "grad_norm": 1.0482134819030762, "learning_rate": 2.6071519604148182e-05, "loss": 0.1071, "step": 14605 }, { "epoch": 0.2586654978372228, "grad_norm": 1.0775173902511597, "learning_rate": 2.6070939106631607e-05, "loss": 0.0967, "step": 14606 }, { "epoch": 0.2586832073742512, "grad_norm": 0.7744837999343872, "learning_rate": 2.6070358572692757e-05, "loss": 0.073, "step": 14607 }, { "epoch": 0.25870091691127967, "grad_norm": 0.8804292678833008, "learning_rate": 2.606977800233355e-05, "loss": 0.0875, "step": 14608 }, { "epoch": 0.25871862644830806, "grad_norm": 0.5927222371101379, "learning_rate": 2.6069197395555895e-05, "loss": 0.0957, "step": 14609 }, { "epoch": 0.2587363359853365, "grad_norm": 0.670805037021637, "learning_rate": 2.6068616752361694e-05, "loss": 0.1202, "step": 14610 }, { "epoch": 0.2587540455223649, "grad_norm": 1.3330678939819336, "learning_rate": 2.6068036072752873e-05, "loss": 0.1066, "step": 14611 }, { "epoch": 0.25877175505939337, "grad_norm": 0.7353178262710571, "learning_rate": 2.606745535673132e-05, "loss": 0.0605, "step": 14612 }, { "epoch": 0.25878946459642177, "grad_norm": 0.9698297381401062, "learning_rate": 2.6066874604298968e-05, "loss": 0.0926, "step": 14613 }, { "epoch": 0.2588071741334502, "grad_norm": 1.1684893369674683, "learning_rate": 2.606629381545772e-05, "loss": 0.1236, "step": 14614 }, { "epoch": 0.2588248836704786, "grad_norm": 0.7692575454711914, "learning_rate": 2.6065712990209474e-05, "loss": 0.0766, "step": 14615 }, { "epoch": 0.2588425932075071, "grad_norm": 0.8014565110206604, "learning_rate": 2.606513212855616e-05, "loss": 0.0722, "step": 14616 }, { "epoch": 0.2588603027445355, "grad_norm": 1.6039072275161743, "learning_rate": 2.6064551230499675e-05, "loss": 0.1075, "step": 14617 }, { "epoch": 0.25887801228156393, "grad_norm": 0.7582020163536072, "learning_rate": 2.606397029604194e-05, "loss": 0.0808, "step": 14618 }, { "epoch": 0.2588957218185924, "grad_norm": 0.5517245531082153, "learning_rate": 2.6063389325184858e-05, "loss": 0.0682, "step": 14619 }, { "epoch": 0.2589134313556208, "grad_norm": 0.7879740595817566, "learning_rate": 2.6062808317930343e-05, "loss": 0.1161, "step": 14620 }, { "epoch": 0.25893114089264924, "grad_norm": 0.6300116777420044, "learning_rate": 2.6062227274280317e-05, "loss": 0.0909, "step": 14621 }, { "epoch": 0.25894885042967763, "grad_norm": 1.2786915302276611, "learning_rate": 2.6061646194236673e-05, "loss": 0.1171, "step": 14622 }, { "epoch": 0.2589665599667061, "grad_norm": 0.9436734318733215, "learning_rate": 2.6061065077801336e-05, "loss": 0.1199, "step": 14623 }, { "epoch": 0.2589842695037345, "grad_norm": 1.1701973676681519, "learning_rate": 2.6060483924976213e-05, "loss": 0.0653, "step": 14624 }, { "epoch": 0.25900197904076294, "grad_norm": 0.8955049514770508, "learning_rate": 2.6059902735763213e-05, "loss": 0.0919, "step": 14625 }, { "epoch": 0.25901968857779134, "grad_norm": 0.5664662718772888, "learning_rate": 2.605932151016426e-05, "loss": 0.0795, "step": 14626 }, { "epoch": 0.2590373981148198, "grad_norm": 0.7100342512130737, "learning_rate": 2.6058740248181248e-05, "loss": 0.0944, "step": 14627 }, { "epoch": 0.2590551076518482, "grad_norm": 1.4727572202682495, "learning_rate": 2.6058158949816105e-05, "loss": 0.1215, "step": 14628 }, { "epoch": 0.25907281718887665, "grad_norm": 0.8113971948623657, "learning_rate": 2.6057577615070735e-05, "loss": 0.11, "step": 14629 }, { "epoch": 0.25909052672590505, "grad_norm": 0.7318618893623352, "learning_rate": 2.6056996243947055e-05, "loss": 0.0762, "step": 14630 }, { "epoch": 0.2591082362629335, "grad_norm": 0.955910861492157, "learning_rate": 2.6056414836446972e-05, "loss": 0.0996, "step": 14631 }, { "epoch": 0.2591259457999619, "grad_norm": 0.5716781616210938, "learning_rate": 2.6055833392572408e-05, "loss": 0.0894, "step": 14632 }, { "epoch": 0.25914365533699035, "grad_norm": 0.6548027992248535, "learning_rate": 2.6055251912325266e-05, "loss": 0.079, "step": 14633 }, { "epoch": 0.2591613648740188, "grad_norm": 0.9267958402633667, "learning_rate": 2.605467039570746e-05, "loss": 0.1239, "step": 14634 }, { "epoch": 0.2591790744110472, "grad_norm": 0.7035727500915527, "learning_rate": 2.6054088842720915e-05, "loss": 0.1098, "step": 14635 }, { "epoch": 0.25919678394807566, "grad_norm": 1.0205543041229248, "learning_rate": 2.605350725336753e-05, "loss": 0.1211, "step": 14636 }, { "epoch": 0.25921449348510406, "grad_norm": 0.8514147996902466, "learning_rate": 2.605292562764923e-05, "loss": 0.0853, "step": 14637 }, { "epoch": 0.2592322030221325, "grad_norm": 0.9825121760368347, "learning_rate": 2.6052343965567916e-05, "loss": 0.105, "step": 14638 }, { "epoch": 0.2592499125591609, "grad_norm": 0.983350396156311, "learning_rate": 2.6051762267125514e-05, "loss": 0.093, "step": 14639 }, { "epoch": 0.25926762209618937, "grad_norm": 0.6318044066429138, "learning_rate": 2.6051180532323928e-05, "loss": 0.0823, "step": 14640 }, { "epoch": 0.25928533163321776, "grad_norm": 0.8165894746780396, "learning_rate": 2.605059876116508e-05, "loss": 0.1061, "step": 14641 }, { "epoch": 0.2593030411702462, "grad_norm": 0.9253107905387878, "learning_rate": 2.605001695365087e-05, "loss": 0.0786, "step": 14642 }, { "epoch": 0.2593207507072746, "grad_norm": 0.9962727427482605, "learning_rate": 2.6049435109783233e-05, "loss": 0.1128, "step": 14643 }, { "epoch": 0.25933846024430307, "grad_norm": 0.6379388570785522, "learning_rate": 2.6048853229564064e-05, "loss": 0.1396, "step": 14644 }, { "epoch": 0.25935616978133147, "grad_norm": 0.46255823969841003, "learning_rate": 2.6048271312995293e-05, "loss": 0.0885, "step": 14645 }, { "epoch": 0.2593738793183599, "grad_norm": 0.9677695631980896, "learning_rate": 2.6047689360078823e-05, "loss": 0.0807, "step": 14646 }, { "epoch": 0.2593915888553883, "grad_norm": 0.9332546591758728, "learning_rate": 2.604710737081657e-05, "loss": 0.1232, "step": 14647 }, { "epoch": 0.2594092983924168, "grad_norm": 0.6210776567459106, "learning_rate": 2.6046525345210457e-05, "loss": 0.077, "step": 14648 }, { "epoch": 0.25942700792944523, "grad_norm": 1.0175933837890625, "learning_rate": 2.6045943283262388e-05, "loss": 0.0691, "step": 14649 }, { "epoch": 0.25944471746647363, "grad_norm": 0.9873549938201904, "learning_rate": 2.6045361184974282e-05, "loss": 0.1226, "step": 14650 }, { "epoch": 0.2594624270035021, "grad_norm": 0.6402294635772705, "learning_rate": 2.6044779050348056e-05, "loss": 0.0966, "step": 14651 }, { "epoch": 0.2594801365405305, "grad_norm": 0.8227971196174622, "learning_rate": 2.6044196879385628e-05, "loss": 0.0928, "step": 14652 }, { "epoch": 0.25949784607755894, "grad_norm": 1.1420495510101318, "learning_rate": 2.6043614672088904e-05, "loss": 0.0864, "step": 14653 }, { "epoch": 0.25951555561458733, "grad_norm": 0.6445351839065552, "learning_rate": 2.6043032428459807e-05, "loss": 0.106, "step": 14654 }, { "epoch": 0.2595332651516158, "grad_norm": 0.794427752494812, "learning_rate": 2.604245014850025e-05, "loss": 0.1229, "step": 14655 }, { "epoch": 0.2595509746886442, "grad_norm": 0.7660442590713501, "learning_rate": 2.604186783221215e-05, "loss": 0.1257, "step": 14656 }, { "epoch": 0.25956868422567264, "grad_norm": 0.8598570823669434, "learning_rate": 2.604128547959742e-05, "loss": 0.1024, "step": 14657 }, { "epoch": 0.25958639376270104, "grad_norm": 1.0727710723876953, "learning_rate": 2.6040703090657978e-05, "loss": 0.1121, "step": 14658 }, { "epoch": 0.2596041032997295, "grad_norm": 0.9677048921585083, "learning_rate": 2.6040120665395744e-05, "loss": 0.0882, "step": 14659 }, { "epoch": 0.2596218128367579, "grad_norm": 0.8199235796928406, "learning_rate": 2.603953820381262e-05, "loss": 0.1235, "step": 14660 }, { "epoch": 0.25963952237378635, "grad_norm": 1.266608715057373, "learning_rate": 2.6038955705910542e-05, "loss": 0.1696, "step": 14661 }, { "epoch": 0.25965723191081475, "grad_norm": 0.5303802490234375, "learning_rate": 2.603837317169141e-05, "loss": 0.0497, "step": 14662 }, { "epoch": 0.2596749414478432, "grad_norm": 1.1723569631576538, "learning_rate": 2.6037790601157148e-05, "loss": 0.1567, "step": 14663 }, { "epoch": 0.25969265098487165, "grad_norm": 0.4462224841117859, "learning_rate": 2.6037207994309674e-05, "loss": 0.0927, "step": 14664 }, { "epoch": 0.25971036052190005, "grad_norm": 0.8427419066429138, "learning_rate": 2.60366253511509e-05, "loss": 0.0958, "step": 14665 }, { "epoch": 0.2597280700589285, "grad_norm": 0.8570930361747742, "learning_rate": 2.6036042671682746e-05, "loss": 0.0847, "step": 14666 }, { "epoch": 0.2597457795959569, "grad_norm": 0.6153296828269958, "learning_rate": 2.6035459955907126e-05, "loss": 0.1077, "step": 14667 }, { "epoch": 0.25976348913298536, "grad_norm": 0.8571672439575195, "learning_rate": 2.6034877203825954e-05, "loss": 0.0968, "step": 14668 }, { "epoch": 0.25978119867001376, "grad_norm": 1.002844214439392, "learning_rate": 2.6034294415441162e-05, "loss": 0.1347, "step": 14669 }, { "epoch": 0.2597989082070422, "grad_norm": 1.0892977714538574, "learning_rate": 2.6033711590754653e-05, "loss": 0.089, "step": 14670 }, { "epoch": 0.2598166177440706, "grad_norm": 0.9439561367034912, "learning_rate": 2.603312872976835e-05, "loss": 0.0938, "step": 14671 }, { "epoch": 0.25983432728109906, "grad_norm": 0.8470714092254639, "learning_rate": 2.6032545832484165e-05, "loss": 0.0868, "step": 14672 }, { "epoch": 0.25985203681812746, "grad_norm": 0.7363198399543762, "learning_rate": 2.6031962898904022e-05, "loss": 0.1004, "step": 14673 }, { "epoch": 0.2598697463551559, "grad_norm": 0.9795354008674622, "learning_rate": 2.603137992902984e-05, "loss": 0.1168, "step": 14674 }, { "epoch": 0.2598874558921843, "grad_norm": 0.5700733661651611, "learning_rate": 2.6030796922863532e-05, "loss": 0.0561, "step": 14675 }, { "epoch": 0.25990516542921277, "grad_norm": 1.208885669708252, "learning_rate": 2.6030213880407017e-05, "loss": 0.1077, "step": 14676 }, { "epoch": 0.25992287496624117, "grad_norm": 0.6413106322288513, "learning_rate": 2.6029630801662218e-05, "loss": 0.0982, "step": 14677 }, { "epoch": 0.2599405845032696, "grad_norm": 0.8987407684326172, "learning_rate": 2.6029047686631043e-05, "loss": 0.0869, "step": 14678 }, { "epoch": 0.2599582940402981, "grad_norm": 1.1419464349746704, "learning_rate": 2.602846453531542e-05, "loss": 0.1147, "step": 14679 }, { "epoch": 0.2599760035773265, "grad_norm": 0.7569507360458374, "learning_rate": 2.6027881347717267e-05, "loss": 0.0885, "step": 14680 }, { "epoch": 0.25999371311435493, "grad_norm": 0.826285719871521, "learning_rate": 2.6027298123838496e-05, "loss": 0.0845, "step": 14681 }, { "epoch": 0.26001142265138333, "grad_norm": 0.6615078449249268, "learning_rate": 2.602671486368103e-05, "loss": 0.0799, "step": 14682 }, { "epoch": 0.2600291321884118, "grad_norm": 1.2223528623580933, "learning_rate": 2.6026131567246787e-05, "loss": 0.1116, "step": 14683 }, { "epoch": 0.2600468417254402, "grad_norm": 0.6821671724319458, "learning_rate": 2.6025548234537685e-05, "loss": 0.1029, "step": 14684 }, { "epoch": 0.26006455126246864, "grad_norm": 0.6345797777175903, "learning_rate": 2.6024964865555644e-05, "loss": 0.0647, "step": 14685 }, { "epoch": 0.26008226079949703, "grad_norm": 1.0713484287261963, "learning_rate": 2.6024381460302592e-05, "loss": 0.1271, "step": 14686 }, { "epoch": 0.2600999703365255, "grad_norm": 1.004311203956604, "learning_rate": 2.6023798018780432e-05, "loss": 0.1055, "step": 14687 }, { "epoch": 0.2601176798735539, "grad_norm": 0.841016411781311, "learning_rate": 2.602321454099109e-05, "loss": 0.087, "step": 14688 }, { "epoch": 0.26013538941058234, "grad_norm": 0.7487421631813049, "learning_rate": 2.6022631026936493e-05, "loss": 0.0696, "step": 14689 }, { "epoch": 0.26015309894761074, "grad_norm": 0.5312727689743042, "learning_rate": 2.6022047476618556e-05, "loss": 0.0832, "step": 14690 }, { "epoch": 0.2601708084846392, "grad_norm": 0.7441684007644653, "learning_rate": 2.6021463890039192e-05, "loss": 0.0551, "step": 14691 }, { "epoch": 0.2601885180216676, "grad_norm": 0.9648680090904236, "learning_rate": 2.6020880267200327e-05, "loss": 0.1057, "step": 14692 }, { "epoch": 0.26020622755869605, "grad_norm": 1.245876669883728, "learning_rate": 2.6020296608103884e-05, "loss": 0.1001, "step": 14693 }, { "epoch": 0.2602239370957245, "grad_norm": 0.7699658870697021, "learning_rate": 2.6019712912751778e-05, "loss": 0.1016, "step": 14694 }, { "epoch": 0.2602416466327529, "grad_norm": 0.5969263315200806, "learning_rate": 2.6019129181145933e-05, "loss": 0.0694, "step": 14695 }, { "epoch": 0.26025935616978135, "grad_norm": 0.9603771567344666, "learning_rate": 2.6018545413288264e-05, "loss": 0.1467, "step": 14696 }, { "epoch": 0.26027706570680975, "grad_norm": 0.784956693649292, "learning_rate": 2.6017961609180696e-05, "loss": 0.1015, "step": 14697 }, { "epoch": 0.2602947752438382, "grad_norm": 0.8278798460960388, "learning_rate": 2.6017377768825148e-05, "loss": 0.0727, "step": 14698 }, { "epoch": 0.2603124847808666, "grad_norm": 0.8052574396133423, "learning_rate": 2.6016793892223543e-05, "loss": 0.0922, "step": 14699 }, { "epoch": 0.26033019431789506, "grad_norm": 1.144261121749878, "learning_rate": 2.60162099793778e-05, "loss": 0.1003, "step": 14700 }, { "epoch": 0.26034790385492346, "grad_norm": 1.7844414710998535, "learning_rate": 2.601562603028984e-05, "loss": 0.1129, "step": 14701 }, { "epoch": 0.2603656133919519, "grad_norm": 1.0651768445968628, "learning_rate": 2.6015042044961585e-05, "loss": 0.0701, "step": 14702 }, { "epoch": 0.2603833229289803, "grad_norm": 0.8526955842971802, "learning_rate": 2.6014458023394957e-05, "loss": 0.1015, "step": 14703 }, { "epoch": 0.26040103246600876, "grad_norm": 1.185025691986084, "learning_rate": 2.6013873965591876e-05, "loss": 0.0813, "step": 14704 }, { "epoch": 0.26041874200303716, "grad_norm": 0.7064526081085205, "learning_rate": 2.6013289871554263e-05, "loss": 0.0587, "step": 14705 }, { "epoch": 0.2604364515400656, "grad_norm": 0.6854767203330994, "learning_rate": 2.601270574128404e-05, "loss": 0.0764, "step": 14706 }, { "epoch": 0.260454161077094, "grad_norm": 0.9787240028381348, "learning_rate": 2.6012121574783126e-05, "loss": 0.0915, "step": 14707 }, { "epoch": 0.26047187061412247, "grad_norm": 0.6123991012573242, "learning_rate": 2.6011537372053453e-05, "loss": 0.0901, "step": 14708 }, { "epoch": 0.2604895801511509, "grad_norm": 1.0305296182632446, "learning_rate": 2.601095313309693e-05, "loss": 0.1282, "step": 14709 }, { "epoch": 0.2605072896881793, "grad_norm": 0.935750424861908, "learning_rate": 2.6010368857915484e-05, "loss": 0.145, "step": 14710 }, { "epoch": 0.2605249992252078, "grad_norm": 1.1669903993606567, "learning_rate": 2.6009784546511044e-05, "loss": 0.1265, "step": 14711 }, { "epoch": 0.2605427087622362, "grad_norm": 1.8675411939620972, "learning_rate": 2.6009200198885523e-05, "loss": 0.082, "step": 14712 }, { "epoch": 0.26056041829926463, "grad_norm": 1.2241061925888062, "learning_rate": 2.600861581504085e-05, "loss": 0.1112, "step": 14713 }, { "epoch": 0.260578127836293, "grad_norm": 1.2366498708724976, "learning_rate": 2.6008031394978938e-05, "loss": 0.0568, "step": 14714 }, { "epoch": 0.2605958373733215, "grad_norm": 0.5801372528076172, "learning_rate": 2.600744693870172e-05, "loss": 0.1108, "step": 14715 }, { "epoch": 0.2606135469103499, "grad_norm": 0.8340075612068176, "learning_rate": 2.6006862446211115e-05, "loss": 0.1174, "step": 14716 }, { "epoch": 0.26063125644737833, "grad_norm": 1.1298576593399048, "learning_rate": 2.6006277917509045e-05, "loss": 0.082, "step": 14717 }, { "epoch": 0.26064896598440673, "grad_norm": 0.9912476539611816, "learning_rate": 2.6005693352597432e-05, "loss": 0.1016, "step": 14718 }, { "epoch": 0.2606666755214352, "grad_norm": 1.038230299949646, "learning_rate": 2.6005108751478203e-05, "loss": 0.1097, "step": 14719 }, { "epoch": 0.2606843850584636, "grad_norm": 0.9126453399658203, "learning_rate": 2.600452411415328e-05, "loss": 0.1062, "step": 14720 }, { "epoch": 0.26070209459549204, "grad_norm": 0.570986807346344, "learning_rate": 2.6003939440624588e-05, "loss": 0.0904, "step": 14721 }, { "epoch": 0.26071980413252044, "grad_norm": 2.2603981494903564, "learning_rate": 2.600335473089404e-05, "loss": 0.1125, "step": 14722 }, { "epoch": 0.2607375136695489, "grad_norm": 0.652080237865448, "learning_rate": 2.600276998496358e-05, "loss": 0.1264, "step": 14723 }, { "epoch": 0.26075522320657735, "grad_norm": 0.8227963447570801, "learning_rate": 2.600218520283511e-05, "loss": 0.1037, "step": 14724 }, { "epoch": 0.26077293274360575, "grad_norm": 1.0884332656860352, "learning_rate": 2.600160038451056e-05, "loss": 0.1118, "step": 14725 }, { "epoch": 0.2607906422806342, "grad_norm": 1.9357901811599731, "learning_rate": 2.6001015529991868e-05, "loss": 0.0711, "step": 14726 }, { "epoch": 0.2608083518176626, "grad_norm": 0.7323583364486694, "learning_rate": 2.6000430639280944e-05, "loss": 0.0808, "step": 14727 }, { "epoch": 0.26082606135469105, "grad_norm": 1.8661078214645386, "learning_rate": 2.5999845712379714e-05, "loss": 0.0963, "step": 14728 }, { "epoch": 0.26084377089171945, "grad_norm": 0.5130515694618225, "learning_rate": 2.5999260749290106e-05, "loss": 0.1004, "step": 14729 }, { "epoch": 0.2608614804287479, "grad_norm": 0.5814390778541565, "learning_rate": 2.599867575001404e-05, "loss": 0.0936, "step": 14730 }, { "epoch": 0.2608791899657763, "grad_norm": 0.7933520674705505, "learning_rate": 2.5998090714553447e-05, "loss": 0.0709, "step": 14731 }, { "epoch": 0.26089689950280476, "grad_norm": 1.1571605205535889, "learning_rate": 2.5997505642910245e-05, "loss": 0.1385, "step": 14732 }, { "epoch": 0.26091460903983316, "grad_norm": 0.8686873316764832, "learning_rate": 2.5996920535086366e-05, "loss": 0.074, "step": 14733 }, { "epoch": 0.2609323185768616, "grad_norm": 1.008565902709961, "learning_rate": 2.5996335391083724e-05, "loss": 0.1106, "step": 14734 }, { "epoch": 0.26095002811389, "grad_norm": 0.7192808985710144, "learning_rate": 2.5995750210904254e-05, "loss": 0.1109, "step": 14735 }, { "epoch": 0.26096773765091846, "grad_norm": 0.7223030924797058, "learning_rate": 2.5995164994549877e-05, "loss": 0.0867, "step": 14736 }, { "epoch": 0.26098544718794686, "grad_norm": 0.7719318270683289, "learning_rate": 2.5994579742022525e-05, "loss": 0.0903, "step": 14737 }, { "epoch": 0.2610031567249753, "grad_norm": 0.7447165846824646, "learning_rate": 2.599399445332411e-05, "loss": 0.0863, "step": 14738 }, { "epoch": 0.26102086626200377, "grad_norm": 0.5292726159095764, "learning_rate": 2.599340912845657e-05, "loss": 0.0934, "step": 14739 }, { "epoch": 0.26103857579903217, "grad_norm": 1.275314211845398, "learning_rate": 2.5992823767421824e-05, "loss": 0.1255, "step": 14740 }, { "epoch": 0.2610562853360606, "grad_norm": 0.8368903994560242, "learning_rate": 2.59922383702218e-05, "loss": 0.1102, "step": 14741 }, { "epoch": 0.261073994873089, "grad_norm": 1.012139916419983, "learning_rate": 2.5991652936858422e-05, "loss": 0.1047, "step": 14742 }, { "epoch": 0.2610917044101175, "grad_norm": 0.938766360282898, "learning_rate": 2.5991067467333622e-05, "loss": 0.0859, "step": 14743 }, { "epoch": 0.2611094139471459, "grad_norm": 0.7234940528869629, "learning_rate": 2.5990481961649318e-05, "loss": 0.097, "step": 14744 }, { "epoch": 0.26112712348417433, "grad_norm": 0.5379011631011963, "learning_rate": 2.5989896419807444e-05, "loss": 0.0892, "step": 14745 }, { "epoch": 0.2611448330212027, "grad_norm": 0.5809690356254578, "learning_rate": 2.5989310841809914e-05, "loss": 0.0842, "step": 14746 }, { "epoch": 0.2611625425582312, "grad_norm": 0.670127809047699, "learning_rate": 2.598872522765867e-05, "loss": 0.1022, "step": 14747 }, { "epoch": 0.2611802520952596, "grad_norm": 0.9984736442565918, "learning_rate": 2.598813957735563e-05, "loss": 0.1045, "step": 14748 }, { "epoch": 0.26119796163228803, "grad_norm": 0.4794026017189026, "learning_rate": 2.598755389090272e-05, "loss": 0.0715, "step": 14749 }, { "epoch": 0.26121567116931643, "grad_norm": 0.7624415159225464, "learning_rate": 2.598696816830187e-05, "loss": 0.0774, "step": 14750 }, { "epoch": 0.2612333807063449, "grad_norm": 0.890978217124939, "learning_rate": 2.5986382409555012e-05, "loss": 0.0973, "step": 14751 }, { "epoch": 0.2612510902433733, "grad_norm": 0.9979225993156433, "learning_rate": 2.598579661466406e-05, "loss": 0.0907, "step": 14752 }, { "epoch": 0.26126879978040174, "grad_norm": 0.825080394744873, "learning_rate": 2.598521078363095e-05, "loss": 0.1091, "step": 14753 }, { "epoch": 0.2612865093174302, "grad_norm": 0.6964605450630188, "learning_rate": 2.5984624916457605e-05, "loss": 0.0978, "step": 14754 }, { "epoch": 0.2613042188544586, "grad_norm": 0.585825502872467, "learning_rate": 2.5984039013145962e-05, "loss": 0.089, "step": 14755 }, { "epoch": 0.26132192839148705, "grad_norm": 0.8969826698303223, "learning_rate": 2.5983453073697936e-05, "loss": 0.1106, "step": 14756 }, { "epoch": 0.26133963792851544, "grad_norm": 0.9113646745681763, "learning_rate": 2.598286709811546e-05, "loss": 0.142, "step": 14757 }, { "epoch": 0.2613573474655439, "grad_norm": 0.6906182169914246, "learning_rate": 2.5982281086400466e-05, "loss": 0.1186, "step": 14758 }, { "epoch": 0.2613750570025723, "grad_norm": 1.1805858612060547, "learning_rate": 2.5981695038554875e-05, "loss": 0.1017, "step": 14759 }, { "epoch": 0.26139276653960075, "grad_norm": 0.9969058036804199, "learning_rate": 2.598110895458062e-05, "loss": 0.0789, "step": 14760 }, { "epoch": 0.26141047607662915, "grad_norm": 1.0251917839050293, "learning_rate": 2.5980522834479624e-05, "loss": 0.0638, "step": 14761 }, { "epoch": 0.2614281856136576, "grad_norm": 1.2884101867675781, "learning_rate": 2.597993667825382e-05, "loss": 0.129, "step": 14762 }, { "epoch": 0.261445895150686, "grad_norm": 0.7756012678146362, "learning_rate": 2.5979350485905137e-05, "loss": 0.1269, "step": 14763 }, { "epoch": 0.26146360468771446, "grad_norm": 0.746288001537323, "learning_rate": 2.59787642574355e-05, "loss": 0.0826, "step": 14764 }, { "epoch": 0.26148131422474286, "grad_norm": 1.058125376701355, "learning_rate": 2.5978177992846835e-05, "loss": 0.0948, "step": 14765 }, { "epoch": 0.2614990237617713, "grad_norm": 1.0150014162063599, "learning_rate": 2.597759169214108e-05, "loss": 0.0912, "step": 14766 }, { "epoch": 0.2615167332987997, "grad_norm": 0.8156251907348633, "learning_rate": 2.597700535532016e-05, "loss": 0.1044, "step": 14767 }, { "epoch": 0.26153444283582816, "grad_norm": 1.011741280555725, "learning_rate": 2.5976418982385997e-05, "loss": 0.1169, "step": 14768 }, { "epoch": 0.2615521523728566, "grad_norm": 0.8255223035812378, "learning_rate": 2.597583257334053e-05, "loss": 0.105, "step": 14769 }, { "epoch": 0.261569861909885, "grad_norm": 0.7502027153968811, "learning_rate": 2.5975246128185684e-05, "loss": 0.0932, "step": 14770 }, { "epoch": 0.26158757144691347, "grad_norm": 0.4746754765510559, "learning_rate": 2.5974659646923384e-05, "loss": 0.0697, "step": 14771 }, { "epoch": 0.26160528098394187, "grad_norm": 0.7967153191566467, "learning_rate": 2.597407312955557e-05, "loss": 0.0761, "step": 14772 }, { "epoch": 0.2616229905209703, "grad_norm": 1.3772122859954834, "learning_rate": 2.597348657608416e-05, "loss": 0.0945, "step": 14773 }, { "epoch": 0.2616407000579987, "grad_norm": 0.9252740740776062, "learning_rate": 2.5972899986511092e-05, "loss": 0.1009, "step": 14774 }, { "epoch": 0.2616584095950272, "grad_norm": 0.7373980283737183, "learning_rate": 2.597231336083829e-05, "loss": 0.0843, "step": 14775 }, { "epoch": 0.2616761191320556, "grad_norm": 1.6657674312591553, "learning_rate": 2.597172669906769e-05, "loss": 0.1083, "step": 14776 }, { "epoch": 0.261693828669084, "grad_norm": 1.2297940254211426, "learning_rate": 2.5971140001201217e-05, "loss": 0.1098, "step": 14777 }, { "epoch": 0.2617115382061124, "grad_norm": 0.8890251517295837, "learning_rate": 2.5970553267240802e-05, "loss": 0.1117, "step": 14778 }, { "epoch": 0.2617292477431409, "grad_norm": 1.2945904731750488, "learning_rate": 2.596996649718838e-05, "loss": 0.0829, "step": 14779 }, { "epoch": 0.2617469572801693, "grad_norm": 0.7473850250244141, "learning_rate": 2.5969379691045875e-05, "loss": 0.1085, "step": 14780 }, { "epoch": 0.26176466681719773, "grad_norm": 0.5876367688179016, "learning_rate": 2.596879284881522e-05, "loss": 0.1101, "step": 14781 }, { "epoch": 0.2617823763542262, "grad_norm": 1.1449995040893555, "learning_rate": 2.596820597049835e-05, "loss": 0.0939, "step": 14782 }, { "epoch": 0.2618000858912546, "grad_norm": 0.7493425607681274, "learning_rate": 2.596761905609719e-05, "loss": 0.0966, "step": 14783 }, { "epoch": 0.26181779542828304, "grad_norm": 0.8883476257324219, "learning_rate": 2.596703210561367e-05, "loss": 0.1237, "step": 14784 }, { "epoch": 0.26183550496531144, "grad_norm": 1.2602382898330688, "learning_rate": 2.5966445119049724e-05, "loss": 0.1141, "step": 14785 }, { "epoch": 0.2618532145023399, "grad_norm": 0.7994390726089478, "learning_rate": 2.596585809640728e-05, "loss": 0.1416, "step": 14786 }, { "epoch": 0.2618709240393683, "grad_norm": 0.883378803730011, "learning_rate": 2.5965271037688278e-05, "loss": 0.1008, "step": 14787 }, { "epoch": 0.26188863357639675, "grad_norm": 0.9942126274108887, "learning_rate": 2.5964683942894643e-05, "loss": 0.0979, "step": 14788 }, { "epoch": 0.26190634311342514, "grad_norm": 0.9291870594024658, "learning_rate": 2.5964096812028308e-05, "loss": 0.1353, "step": 14789 }, { "epoch": 0.2619240526504536, "grad_norm": 0.7930245399475098, "learning_rate": 2.5963509645091196e-05, "loss": 0.1232, "step": 14790 }, { "epoch": 0.261941762187482, "grad_norm": 0.8833358287811279, "learning_rate": 2.596292244208525e-05, "loss": 0.1248, "step": 14791 }, { "epoch": 0.26195947172451045, "grad_norm": 0.8244411945343018, "learning_rate": 2.59623352030124e-05, "loss": 0.0821, "step": 14792 }, { "epoch": 0.26197718126153885, "grad_norm": 1.0643361806869507, "learning_rate": 2.5961747927874576e-05, "loss": 0.1058, "step": 14793 }, { "epoch": 0.2619948907985673, "grad_norm": 0.9664126634597778, "learning_rate": 2.5961160616673706e-05, "loss": 0.12, "step": 14794 }, { "epoch": 0.2620126003355957, "grad_norm": 0.8435265421867371, "learning_rate": 2.596057326941173e-05, "loss": 0.1105, "step": 14795 }, { "epoch": 0.26203030987262416, "grad_norm": 1.3648765087127686, "learning_rate": 2.5959985886090576e-05, "loss": 0.1126, "step": 14796 }, { "epoch": 0.2620480194096526, "grad_norm": 0.7628323435783386, "learning_rate": 2.5959398466712178e-05, "loss": 0.0719, "step": 14797 }, { "epoch": 0.262065728946681, "grad_norm": 0.8217112421989441, "learning_rate": 2.5958811011278464e-05, "loss": 0.0802, "step": 14798 }, { "epoch": 0.26208343848370946, "grad_norm": 0.8263654708862305, "learning_rate": 2.5958223519791375e-05, "loss": 0.0788, "step": 14799 }, { "epoch": 0.26210114802073786, "grad_norm": 0.7731134295463562, "learning_rate": 2.5957635992252836e-05, "loss": 0.0993, "step": 14800 }, { "epoch": 0.2621188575577663, "grad_norm": 1.2673094272613525, "learning_rate": 2.5957048428664777e-05, "loss": 0.0874, "step": 14801 }, { "epoch": 0.2621365670947947, "grad_norm": 0.9769918322563171, "learning_rate": 2.5956460829029145e-05, "loss": 0.1033, "step": 14802 }, { "epoch": 0.26215427663182317, "grad_norm": 1.3148986101150513, "learning_rate": 2.5955873193347863e-05, "loss": 0.1095, "step": 14803 }, { "epoch": 0.26217198616885157, "grad_norm": 0.9280118942260742, "learning_rate": 2.5955285521622866e-05, "loss": 0.1263, "step": 14804 }, { "epoch": 0.26218969570588, "grad_norm": 1.0341991186141968, "learning_rate": 2.595469781385609e-05, "loss": 0.145, "step": 14805 }, { "epoch": 0.2622074052429084, "grad_norm": 0.9372594952583313, "learning_rate": 2.5954110070049462e-05, "loss": 0.0738, "step": 14806 }, { "epoch": 0.2622251147799369, "grad_norm": 0.8229652643203735, "learning_rate": 2.5953522290204924e-05, "loss": 0.0765, "step": 14807 }, { "epoch": 0.2622428243169653, "grad_norm": 1.1573970317840576, "learning_rate": 2.59529344743244e-05, "loss": 0.1178, "step": 14808 }, { "epoch": 0.2622605338539937, "grad_norm": 0.7729935050010681, "learning_rate": 2.5952346622409837e-05, "loss": 0.1071, "step": 14809 }, { "epoch": 0.2622782433910221, "grad_norm": 0.7496561408042908, "learning_rate": 2.595175873446316e-05, "loss": 0.1056, "step": 14810 }, { "epoch": 0.2622959529280506, "grad_norm": 0.786475419998169, "learning_rate": 2.5951170810486296e-05, "loss": 0.0748, "step": 14811 }, { "epoch": 0.26231366246507903, "grad_norm": 0.9437966346740723, "learning_rate": 2.5950582850481194e-05, "loss": 0.1214, "step": 14812 }, { "epoch": 0.26233137200210743, "grad_norm": 0.7419259548187256, "learning_rate": 2.5949994854449782e-05, "loss": 0.1122, "step": 14813 }, { "epoch": 0.2623490815391359, "grad_norm": 0.4676688015460968, "learning_rate": 2.5949406822393994e-05, "loss": 0.0655, "step": 14814 }, { "epoch": 0.2623667910761643, "grad_norm": 1.2299398183822632, "learning_rate": 2.5948818754315766e-05, "loss": 0.1345, "step": 14815 }, { "epoch": 0.26238450061319274, "grad_norm": 0.5457404255867004, "learning_rate": 2.594823065021703e-05, "loss": 0.0972, "step": 14816 }, { "epoch": 0.26240221015022114, "grad_norm": 0.46318721771240234, "learning_rate": 2.5947642510099724e-05, "loss": 0.0952, "step": 14817 }, { "epoch": 0.2624199196872496, "grad_norm": 0.7709895968437195, "learning_rate": 2.594705433396578e-05, "loss": 0.0966, "step": 14818 }, { "epoch": 0.262437629224278, "grad_norm": 0.5537992119789124, "learning_rate": 2.594646612181713e-05, "loss": 0.0717, "step": 14819 }, { "epoch": 0.26245533876130644, "grad_norm": 1.0988926887512207, "learning_rate": 2.5945877873655724e-05, "loss": 0.1201, "step": 14820 }, { "epoch": 0.26247304829833484, "grad_norm": 0.5370144844055176, "learning_rate": 2.594528958948348e-05, "loss": 0.0816, "step": 14821 }, { "epoch": 0.2624907578353633, "grad_norm": 0.7762933373451233, "learning_rate": 2.5944701269302344e-05, "loss": 0.1032, "step": 14822 }, { "epoch": 0.2625084673723917, "grad_norm": 0.7942385077476501, "learning_rate": 2.5944112913114247e-05, "loss": 0.1268, "step": 14823 }, { "epoch": 0.26252617690942015, "grad_norm": 1.0811426639556885, "learning_rate": 2.594352452092112e-05, "loss": 0.0943, "step": 14824 }, { "epoch": 0.26254388644644855, "grad_norm": 0.7833890318870544, "learning_rate": 2.594293609272491e-05, "loss": 0.1226, "step": 14825 }, { "epoch": 0.262561595983477, "grad_norm": 0.8562355041503906, "learning_rate": 2.5942347628527547e-05, "loss": 0.0935, "step": 14826 }, { "epoch": 0.26257930552050546, "grad_norm": 1.1022696495056152, "learning_rate": 2.5941759128330964e-05, "loss": 0.1279, "step": 14827 }, { "epoch": 0.26259701505753386, "grad_norm": 0.8881797194480896, "learning_rate": 2.5941170592137097e-05, "loss": 0.1006, "step": 14828 }, { "epoch": 0.2626147245945623, "grad_norm": 0.7497678399085999, "learning_rate": 2.594058201994789e-05, "loss": 0.0845, "step": 14829 }, { "epoch": 0.2626324341315907, "grad_norm": 0.8813840746879578, "learning_rate": 2.5939993411765277e-05, "loss": 0.0756, "step": 14830 }, { "epoch": 0.26265014366861916, "grad_norm": 0.7034708857536316, "learning_rate": 2.5939404767591193e-05, "loss": 0.1092, "step": 14831 }, { "epoch": 0.26266785320564756, "grad_norm": 0.6401782631874084, "learning_rate": 2.593881608742757e-05, "loss": 0.0882, "step": 14832 }, { "epoch": 0.262685562742676, "grad_norm": 0.6894527673721313, "learning_rate": 2.593822737127635e-05, "loss": 0.0691, "step": 14833 }, { "epoch": 0.2627032722797044, "grad_norm": 1.001742959022522, "learning_rate": 2.5937638619139466e-05, "loss": 0.1188, "step": 14834 }, { "epoch": 0.26272098181673287, "grad_norm": 0.9241812229156494, "learning_rate": 2.5937049831018858e-05, "loss": 0.1027, "step": 14835 }, { "epoch": 0.26273869135376127, "grad_norm": 0.5610101819038391, "learning_rate": 2.5936461006916463e-05, "loss": 0.0869, "step": 14836 }, { "epoch": 0.2627564008907897, "grad_norm": 0.6404975056648254, "learning_rate": 2.5935872146834215e-05, "loss": 0.0688, "step": 14837 }, { "epoch": 0.2627741104278181, "grad_norm": 0.864046037197113, "learning_rate": 2.5935283250774057e-05, "loss": 0.0918, "step": 14838 }, { "epoch": 0.2627918199648466, "grad_norm": 0.8352556228637695, "learning_rate": 2.5934694318737923e-05, "loss": 0.0832, "step": 14839 }, { "epoch": 0.26280952950187497, "grad_norm": 1.0895624160766602, "learning_rate": 2.593410535072775e-05, "loss": 0.0664, "step": 14840 }, { "epoch": 0.2628272390389034, "grad_norm": 1.1473331451416016, "learning_rate": 2.5933516346745475e-05, "loss": 0.1468, "step": 14841 }, { "epoch": 0.2628449485759319, "grad_norm": 0.8551828861236572, "learning_rate": 2.5932927306793037e-05, "loss": 0.0995, "step": 14842 }, { "epoch": 0.2628626581129603, "grad_norm": 1.1958458423614502, "learning_rate": 2.593233823087238e-05, "loss": 0.0637, "step": 14843 }, { "epoch": 0.26288036764998873, "grad_norm": 0.43145063519477844, "learning_rate": 2.5931749118985427e-05, "loss": 0.0905, "step": 14844 }, { "epoch": 0.26289807718701713, "grad_norm": 1.0604126453399658, "learning_rate": 2.5931159971134127e-05, "loss": 0.1533, "step": 14845 }, { "epoch": 0.2629157867240456, "grad_norm": 1.4033949375152588, "learning_rate": 2.593057078732042e-05, "loss": 0.1486, "step": 14846 }, { "epoch": 0.262933496261074, "grad_norm": 0.9265289306640625, "learning_rate": 2.592998156754624e-05, "loss": 0.1117, "step": 14847 }, { "epoch": 0.26295120579810244, "grad_norm": 1.074710726737976, "learning_rate": 2.5929392311813527e-05, "loss": 0.0955, "step": 14848 }, { "epoch": 0.26296891533513084, "grad_norm": 0.6295585036277771, "learning_rate": 2.592880302012422e-05, "loss": 0.0951, "step": 14849 }, { "epoch": 0.2629866248721593, "grad_norm": 0.874521017074585, "learning_rate": 2.5928213692480252e-05, "loss": 0.1109, "step": 14850 }, { "epoch": 0.2630043344091877, "grad_norm": 0.9755545854568481, "learning_rate": 2.5927624328883565e-05, "loss": 0.1245, "step": 14851 }, { "epoch": 0.26302204394621614, "grad_norm": 0.6818135380744934, "learning_rate": 2.5927034929336104e-05, "loss": 0.0892, "step": 14852 }, { "epoch": 0.26303975348324454, "grad_norm": 1.0248799324035645, "learning_rate": 2.59264454938398e-05, "loss": 0.0803, "step": 14853 }, { "epoch": 0.263057463020273, "grad_norm": 0.6073268055915833, "learning_rate": 2.5925856022396596e-05, "loss": 0.0865, "step": 14854 }, { "epoch": 0.2630751725573014, "grad_norm": 0.8457728624343872, "learning_rate": 2.5925266515008433e-05, "loss": 0.0743, "step": 14855 }, { "epoch": 0.26309288209432985, "grad_norm": 0.84220951795578, "learning_rate": 2.5924676971677247e-05, "loss": 0.1127, "step": 14856 }, { "epoch": 0.2631105916313583, "grad_norm": 0.65423983335495, "learning_rate": 2.5924087392404976e-05, "loss": 0.0685, "step": 14857 }, { "epoch": 0.2631283011683867, "grad_norm": 0.46950477361679077, "learning_rate": 2.5923497777193568e-05, "loss": 0.0841, "step": 14858 }, { "epoch": 0.26314601070541516, "grad_norm": 1.1077967882156372, "learning_rate": 2.5922908126044954e-05, "loss": 0.0627, "step": 14859 }, { "epoch": 0.26316372024244356, "grad_norm": 0.6953386664390564, "learning_rate": 2.5922318438961076e-05, "loss": 0.0996, "step": 14860 }, { "epoch": 0.263181429779472, "grad_norm": 0.7729781866073608, "learning_rate": 2.5921728715943878e-05, "loss": 0.0669, "step": 14861 }, { "epoch": 0.2631991393165004, "grad_norm": 0.9827237725257874, "learning_rate": 2.5921138956995295e-05, "loss": 0.0991, "step": 14862 }, { "epoch": 0.26321684885352886, "grad_norm": 0.817227303981781, "learning_rate": 2.592054916211727e-05, "loss": 0.1022, "step": 14863 }, { "epoch": 0.26323455839055726, "grad_norm": 0.7396249175071716, "learning_rate": 2.591995933131174e-05, "loss": 0.0784, "step": 14864 }, { "epoch": 0.2632522679275857, "grad_norm": 1.0138790607452393, "learning_rate": 2.5919369464580654e-05, "loss": 0.1255, "step": 14865 }, { "epoch": 0.2632699774646141, "grad_norm": 1.0600087642669678, "learning_rate": 2.5918779561925942e-05, "loss": 0.0954, "step": 14866 }, { "epoch": 0.26328768700164257, "grad_norm": 1.0617049932479858, "learning_rate": 2.5918189623349552e-05, "loss": 0.098, "step": 14867 }, { "epoch": 0.26330539653867097, "grad_norm": 0.8128491044044495, "learning_rate": 2.591759964885342e-05, "loss": 0.1069, "step": 14868 }, { "epoch": 0.2633231060756994, "grad_norm": 0.8429560661315918, "learning_rate": 2.591700963843949e-05, "loss": 0.1032, "step": 14869 }, { "epoch": 0.2633408156127278, "grad_norm": 1.02328360080719, "learning_rate": 2.5916419592109705e-05, "loss": 0.1166, "step": 14870 }, { "epoch": 0.2633585251497563, "grad_norm": 0.5445410013198853, "learning_rate": 2.5915829509866003e-05, "loss": 0.0658, "step": 14871 }, { "epoch": 0.2633762346867847, "grad_norm": 0.8234012722969055, "learning_rate": 2.5915239391710324e-05, "loss": 0.0875, "step": 14872 }, { "epoch": 0.2633939442238131, "grad_norm": 0.7298033833503723, "learning_rate": 2.5914649237644612e-05, "loss": 0.0848, "step": 14873 }, { "epoch": 0.2634116537608416, "grad_norm": 0.9236401319503784, "learning_rate": 2.5914059047670812e-05, "loss": 0.0923, "step": 14874 }, { "epoch": 0.26342936329787, "grad_norm": 0.7622470855712891, "learning_rate": 2.5913468821790858e-05, "loss": 0.1329, "step": 14875 }, { "epoch": 0.26344707283489843, "grad_norm": 0.9080469608306885, "learning_rate": 2.5912878560006696e-05, "loss": 0.0989, "step": 14876 }, { "epoch": 0.26346478237192683, "grad_norm": 0.8680525422096252, "learning_rate": 2.5912288262320266e-05, "loss": 0.1094, "step": 14877 }, { "epoch": 0.2634824919089553, "grad_norm": 1.41261887550354, "learning_rate": 2.591169792873351e-05, "loss": 0.0798, "step": 14878 }, { "epoch": 0.2635002014459837, "grad_norm": 0.8829968571662903, "learning_rate": 2.5911107559248376e-05, "loss": 0.0895, "step": 14879 }, { "epoch": 0.26351791098301214, "grad_norm": 0.6663966774940491, "learning_rate": 2.5910517153866803e-05, "loss": 0.0996, "step": 14880 }, { "epoch": 0.26353562052004054, "grad_norm": 1.1158554553985596, "learning_rate": 2.5909926712590724e-05, "loss": 0.1523, "step": 14881 }, { "epoch": 0.263553330057069, "grad_norm": 0.6975429058074951, "learning_rate": 2.5909336235422096e-05, "loss": 0.0891, "step": 14882 }, { "epoch": 0.2635710395940974, "grad_norm": 0.6921257972717285, "learning_rate": 2.590874572236285e-05, "loss": 0.0699, "step": 14883 }, { "epoch": 0.26358874913112584, "grad_norm": 0.9377722144126892, "learning_rate": 2.5908155173414943e-05, "loss": 0.0823, "step": 14884 }, { "epoch": 0.26360645866815424, "grad_norm": 0.6675862073898315, "learning_rate": 2.59075645885803e-05, "loss": 0.0864, "step": 14885 }, { "epoch": 0.2636241682051827, "grad_norm": 0.8410717248916626, "learning_rate": 2.5906973967860876e-05, "loss": 0.0858, "step": 14886 }, { "epoch": 0.26364187774221115, "grad_norm": 2.093048334121704, "learning_rate": 2.590638331125861e-05, "loss": 0.1272, "step": 14887 }, { "epoch": 0.26365958727923955, "grad_norm": 0.6512764692306519, "learning_rate": 2.5905792618775447e-05, "loss": 0.1073, "step": 14888 }, { "epoch": 0.263677296816268, "grad_norm": 0.6159845590591431, "learning_rate": 2.590520189041333e-05, "loss": 0.101, "step": 14889 }, { "epoch": 0.2636950063532964, "grad_norm": 0.7555111050605774, "learning_rate": 2.5904611126174197e-05, "loss": 0.1131, "step": 14890 }, { "epoch": 0.26371271589032486, "grad_norm": 0.8484063744544983, "learning_rate": 2.5904020326060003e-05, "loss": 0.1176, "step": 14891 }, { "epoch": 0.26373042542735325, "grad_norm": 0.7239037752151489, "learning_rate": 2.5903429490072683e-05, "loss": 0.1209, "step": 14892 }, { "epoch": 0.2637481349643817, "grad_norm": 1.0190056562423706, "learning_rate": 2.5902838618214186e-05, "loss": 0.1269, "step": 14893 }, { "epoch": 0.2637658445014101, "grad_norm": 0.8817144632339478, "learning_rate": 2.5902247710486445e-05, "loss": 0.1084, "step": 14894 }, { "epoch": 0.26378355403843856, "grad_norm": 0.9055063128471375, "learning_rate": 2.590165676689142e-05, "loss": 0.0851, "step": 14895 }, { "epoch": 0.26380126357546696, "grad_norm": 0.7757046222686768, "learning_rate": 2.590106578743104e-05, "loss": 0.0842, "step": 14896 }, { "epoch": 0.2638189731124954, "grad_norm": 0.8378995060920715, "learning_rate": 2.5900474772107255e-05, "loss": 0.069, "step": 14897 }, { "epoch": 0.2638366826495238, "grad_norm": 1.533673882484436, "learning_rate": 2.5899883720922013e-05, "loss": 0.0776, "step": 14898 }, { "epoch": 0.26385439218655227, "grad_norm": 0.8110154867172241, "learning_rate": 2.5899292633877258e-05, "loss": 0.0635, "step": 14899 }, { "epoch": 0.26387210172358067, "grad_norm": 1.0096068382263184, "learning_rate": 2.5898701510974932e-05, "loss": 0.0919, "step": 14900 }, { "epoch": 0.2638898112606091, "grad_norm": 0.7290483713150024, "learning_rate": 2.589811035221698e-05, "loss": 0.0676, "step": 14901 }, { "epoch": 0.2639075207976376, "grad_norm": 0.5275712013244629, "learning_rate": 2.589751915760535e-05, "loss": 0.0934, "step": 14902 }, { "epoch": 0.263925230334666, "grad_norm": 1.054788589477539, "learning_rate": 2.589692792714198e-05, "loss": 0.1084, "step": 14903 }, { "epoch": 0.2639429398716944, "grad_norm": 0.46282216906547546, "learning_rate": 2.589633666082882e-05, "loss": 0.0875, "step": 14904 }, { "epoch": 0.2639606494087228, "grad_norm": 0.6854591369628906, "learning_rate": 2.5895745358667814e-05, "loss": 0.0843, "step": 14905 }, { "epoch": 0.2639783589457513, "grad_norm": 1.0921530723571777, "learning_rate": 2.589515402066091e-05, "loss": 0.1086, "step": 14906 }, { "epoch": 0.2639960684827797, "grad_norm": 0.8002527952194214, "learning_rate": 2.589456264681005e-05, "loss": 0.096, "step": 14907 }, { "epoch": 0.26401377801980813, "grad_norm": 0.8458983302116394, "learning_rate": 2.589397123711718e-05, "loss": 0.1212, "step": 14908 }, { "epoch": 0.26403148755683653, "grad_norm": 0.8854836225509644, "learning_rate": 2.5893379791584247e-05, "loss": 0.0542, "step": 14909 }, { "epoch": 0.264049197093865, "grad_norm": 0.747299313545227, "learning_rate": 2.58927883102132e-05, "loss": 0.0696, "step": 14910 }, { "epoch": 0.2640669066308934, "grad_norm": 1.284859299659729, "learning_rate": 2.589219679300598e-05, "loss": 0.1292, "step": 14911 }, { "epoch": 0.26408461616792184, "grad_norm": 0.852070689201355, "learning_rate": 2.5891605239964527e-05, "loss": 0.1373, "step": 14912 }, { "epoch": 0.26410232570495024, "grad_norm": 0.6813582181930542, "learning_rate": 2.5891013651090796e-05, "loss": 0.1046, "step": 14913 }, { "epoch": 0.2641200352419787, "grad_norm": 1.0419179201126099, "learning_rate": 2.589042202638674e-05, "loss": 0.1034, "step": 14914 }, { "epoch": 0.2641377447790071, "grad_norm": 0.49970054626464844, "learning_rate": 2.588983036585429e-05, "loss": 0.0652, "step": 14915 }, { "epoch": 0.26415545431603554, "grad_norm": 0.930179238319397, "learning_rate": 2.58892386694954e-05, "loss": 0.0908, "step": 14916 }, { "epoch": 0.264173163853064, "grad_norm": 1.1459918022155762, "learning_rate": 2.588864693731202e-05, "loss": 0.1005, "step": 14917 }, { "epoch": 0.2641908733900924, "grad_norm": 0.9262022972106934, "learning_rate": 2.588805516930609e-05, "loss": 0.1368, "step": 14918 }, { "epoch": 0.26420858292712085, "grad_norm": 1.054308533668518, "learning_rate": 2.5887463365479563e-05, "loss": 0.132, "step": 14919 }, { "epoch": 0.26422629246414925, "grad_norm": 0.9278860688209534, "learning_rate": 2.5886871525834383e-05, "loss": 0.0962, "step": 14920 }, { "epoch": 0.2642440020011777, "grad_norm": 0.709552526473999, "learning_rate": 2.5886279650372492e-05, "loss": 0.1124, "step": 14921 }, { "epoch": 0.2642617115382061, "grad_norm": 0.9086069464683533, "learning_rate": 2.5885687739095843e-05, "loss": 0.1239, "step": 14922 }, { "epoch": 0.26427942107523456, "grad_norm": 0.6755024790763855, "learning_rate": 2.588509579200638e-05, "loss": 0.0757, "step": 14923 }, { "epoch": 0.26429713061226295, "grad_norm": 1.051328182220459, "learning_rate": 2.5884503809106062e-05, "loss": 0.0971, "step": 14924 }, { "epoch": 0.2643148401492914, "grad_norm": 0.7658969759941101, "learning_rate": 2.588391179039682e-05, "loss": 0.1425, "step": 14925 }, { "epoch": 0.2643325496863198, "grad_norm": 1.019110083580017, "learning_rate": 2.5883319735880614e-05, "loss": 0.1077, "step": 14926 }, { "epoch": 0.26435025922334826, "grad_norm": 0.818809986114502, "learning_rate": 2.588272764555938e-05, "loss": 0.0831, "step": 14927 }, { "epoch": 0.26436796876037666, "grad_norm": 0.6661375761032104, "learning_rate": 2.5882135519435074e-05, "loss": 0.0615, "step": 14928 }, { "epoch": 0.2643856782974051, "grad_norm": 0.7356210947036743, "learning_rate": 2.5881543357509646e-05, "loss": 0.1227, "step": 14929 }, { "epoch": 0.2644033878344335, "grad_norm": 1.2963284254074097, "learning_rate": 2.588095115978504e-05, "loss": 0.1173, "step": 14930 }, { "epoch": 0.26442109737146197, "grad_norm": 1.0751897096633911, "learning_rate": 2.5880358926263205e-05, "loss": 0.0825, "step": 14931 }, { "epoch": 0.2644388069084904, "grad_norm": 0.4863624572753906, "learning_rate": 2.5879766656946092e-05, "loss": 0.0845, "step": 14932 }, { "epoch": 0.2644565164455188, "grad_norm": 0.7242389917373657, "learning_rate": 2.5879174351835646e-05, "loss": 0.1066, "step": 14933 }, { "epoch": 0.2644742259825473, "grad_norm": 1.131017804145813, "learning_rate": 2.5878582010933817e-05, "loss": 0.0696, "step": 14934 }, { "epoch": 0.26449193551957567, "grad_norm": 0.7186011672019958, "learning_rate": 2.587798963424255e-05, "loss": 0.1265, "step": 14935 }, { "epoch": 0.2645096450566041, "grad_norm": 1.0185773372650146, "learning_rate": 2.5877397221763802e-05, "loss": 0.1168, "step": 14936 }, { "epoch": 0.2645273545936325, "grad_norm": 0.6606224775314331, "learning_rate": 2.5876804773499514e-05, "loss": 0.075, "step": 14937 }, { "epoch": 0.264545064130661, "grad_norm": 0.8209274411201477, "learning_rate": 2.587621228945164e-05, "loss": 0.1419, "step": 14938 }, { "epoch": 0.2645627736676894, "grad_norm": 0.8464984893798828, "learning_rate": 2.5875619769622124e-05, "loss": 0.1069, "step": 14939 }, { "epoch": 0.26458048320471783, "grad_norm": 0.6444608569145203, "learning_rate": 2.5875027214012926e-05, "loss": 0.0989, "step": 14940 }, { "epoch": 0.26459819274174623, "grad_norm": 0.8600311875343323, "learning_rate": 2.5874434622625985e-05, "loss": 0.0959, "step": 14941 }, { "epoch": 0.2646159022787747, "grad_norm": 0.6222544312477112, "learning_rate": 2.5873841995463252e-05, "loss": 0.0883, "step": 14942 }, { "epoch": 0.2646336118158031, "grad_norm": 1.1946851015090942, "learning_rate": 2.5873249332526682e-05, "loss": 0.1077, "step": 14943 }, { "epoch": 0.26465132135283154, "grad_norm": 0.9834634065628052, "learning_rate": 2.5872656633818214e-05, "loss": 0.0802, "step": 14944 }, { "epoch": 0.26466903088985994, "grad_norm": 0.8241690397262573, "learning_rate": 2.5872063899339815e-05, "loss": 0.0819, "step": 14945 }, { "epoch": 0.2646867404268884, "grad_norm": 0.9139531850814819, "learning_rate": 2.587147112909342e-05, "loss": 0.09, "step": 14946 }, { "epoch": 0.26470444996391684, "grad_norm": 1.1281975507736206, "learning_rate": 2.5870878323080985e-05, "loss": 0.103, "step": 14947 }, { "epoch": 0.26472215950094524, "grad_norm": 1.0196436643600464, "learning_rate": 2.587028548130446e-05, "loss": 0.1376, "step": 14948 }, { "epoch": 0.2647398690379737, "grad_norm": 0.7197945713996887, "learning_rate": 2.5869692603765795e-05, "loss": 0.1195, "step": 14949 }, { "epoch": 0.2647575785750021, "grad_norm": 0.9220949411392212, "learning_rate": 2.5869099690466944e-05, "loss": 0.1231, "step": 14950 }, { "epoch": 0.26477528811203055, "grad_norm": 0.7204596996307373, "learning_rate": 2.586850674140985e-05, "loss": 0.1228, "step": 14951 }, { "epoch": 0.26479299764905895, "grad_norm": 0.8250581622123718, "learning_rate": 2.5867913756596472e-05, "loss": 0.1249, "step": 14952 }, { "epoch": 0.2648107071860874, "grad_norm": 0.809916615486145, "learning_rate": 2.5867320736028755e-05, "loss": 0.1151, "step": 14953 }, { "epoch": 0.2648284167231158, "grad_norm": 0.723863422870636, "learning_rate": 2.5866727679708654e-05, "loss": 0.1136, "step": 14954 }, { "epoch": 0.26484612626014425, "grad_norm": 0.6503912210464478, "learning_rate": 2.5866134587638115e-05, "loss": 0.1047, "step": 14955 }, { "epoch": 0.26486383579717265, "grad_norm": 0.701354444026947, "learning_rate": 2.586554145981909e-05, "loss": 0.0974, "step": 14956 }, { "epoch": 0.2648815453342011, "grad_norm": 0.6339819431304932, "learning_rate": 2.586494829625354e-05, "loss": 0.0767, "step": 14957 }, { "epoch": 0.2648992548712295, "grad_norm": 1.1390504837036133, "learning_rate": 2.5864355096943405e-05, "loss": 0.1147, "step": 14958 }, { "epoch": 0.26491696440825796, "grad_norm": 0.7036399841308594, "learning_rate": 2.5863761861890638e-05, "loss": 0.0948, "step": 14959 }, { "epoch": 0.26493467394528636, "grad_norm": 0.6926330327987671, "learning_rate": 2.5863168591097198e-05, "loss": 0.0736, "step": 14960 }, { "epoch": 0.2649523834823148, "grad_norm": 0.9168150424957275, "learning_rate": 2.586257528456503e-05, "loss": 0.0848, "step": 14961 }, { "epoch": 0.26497009301934327, "grad_norm": 1.1846895217895508, "learning_rate": 2.5861981942296085e-05, "loss": 0.131, "step": 14962 }, { "epoch": 0.26498780255637167, "grad_norm": 1.2239031791687012, "learning_rate": 2.5861388564292326e-05, "loss": 0.1078, "step": 14963 }, { "epoch": 0.2650055120934001, "grad_norm": 0.9253865480422974, "learning_rate": 2.586079515055569e-05, "loss": 0.0886, "step": 14964 }, { "epoch": 0.2650232216304285, "grad_norm": 0.7552544474601746, "learning_rate": 2.586020170108814e-05, "loss": 0.0971, "step": 14965 }, { "epoch": 0.265040931167457, "grad_norm": 0.8240410089492798, "learning_rate": 2.5859608215891623e-05, "loss": 0.0909, "step": 14966 }, { "epoch": 0.26505864070448537, "grad_norm": 1.0552055835723877, "learning_rate": 2.5859014694968093e-05, "loss": 0.1269, "step": 14967 }, { "epoch": 0.2650763502415138, "grad_norm": 0.9594316482543945, "learning_rate": 2.58584211383195e-05, "loss": 0.1261, "step": 14968 }, { "epoch": 0.2650940597785422, "grad_norm": 0.6372714638710022, "learning_rate": 2.5857827545947804e-05, "loss": 0.0831, "step": 14969 }, { "epoch": 0.2651117693155707, "grad_norm": 0.6254749298095703, "learning_rate": 2.5857233917854958e-05, "loss": 0.1074, "step": 14970 }, { "epoch": 0.2651294788525991, "grad_norm": 0.5709721446037292, "learning_rate": 2.58566402540429e-05, "loss": 0.0802, "step": 14971 }, { "epoch": 0.26514718838962753, "grad_norm": 1.0037155151367188, "learning_rate": 2.5856046554513602e-05, "loss": 0.0563, "step": 14972 }, { "epoch": 0.26516489792665593, "grad_norm": 0.8156819939613342, "learning_rate": 2.5855452819269005e-05, "loss": 0.1228, "step": 14973 }, { "epoch": 0.2651826074636844, "grad_norm": 0.900562584400177, "learning_rate": 2.585485904831107e-05, "loss": 0.0934, "step": 14974 }, { "epoch": 0.2652003170007128, "grad_norm": 0.7658376097679138, "learning_rate": 2.585426524164174e-05, "loss": 0.1015, "step": 14975 }, { "epoch": 0.26521802653774124, "grad_norm": 0.6812503933906555, "learning_rate": 2.5853671399262983e-05, "loss": 0.0698, "step": 14976 }, { "epoch": 0.2652357360747697, "grad_norm": 0.69384765625, "learning_rate": 2.5853077521176742e-05, "loss": 0.0798, "step": 14977 }, { "epoch": 0.2652534456117981, "grad_norm": 0.8056071400642395, "learning_rate": 2.585248360738497e-05, "loss": 0.0682, "step": 14978 }, { "epoch": 0.26527115514882654, "grad_norm": 0.9304819107055664, "learning_rate": 2.585188965788963e-05, "loss": 0.1129, "step": 14979 }, { "epoch": 0.26528886468585494, "grad_norm": 1.019874930381775, "learning_rate": 2.5851295672692666e-05, "loss": 0.1069, "step": 14980 }, { "epoch": 0.2653065742228834, "grad_norm": 0.835918128490448, "learning_rate": 2.585070165179604e-05, "loss": 0.067, "step": 14981 }, { "epoch": 0.2653242837599118, "grad_norm": 1.1124109029769897, "learning_rate": 2.58501075952017e-05, "loss": 0.1236, "step": 14982 }, { "epoch": 0.26534199329694025, "grad_norm": 0.7643265724182129, "learning_rate": 2.5849513502911605e-05, "loss": 0.0831, "step": 14983 }, { "epoch": 0.26535970283396865, "grad_norm": 0.8922755718231201, "learning_rate": 2.5848919374927707e-05, "loss": 0.1128, "step": 14984 }, { "epoch": 0.2653774123709971, "grad_norm": 0.8159493207931519, "learning_rate": 2.584832521125196e-05, "loss": 0.0654, "step": 14985 }, { "epoch": 0.2653951219080255, "grad_norm": 0.6800519824028015, "learning_rate": 2.584773101188633e-05, "loss": 0.0655, "step": 14986 }, { "epoch": 0.26541283144505395, "grad_norm": 0.7508583068847656, "learning_rate": 2.584713677683275e-05, "loss": 0.1198, "step": 14987 }, { "epoch": 0.26543054098208235, "grad_norm": 0.8282297849655151, "learning_rate": 2.5846542506093193e-05, "loss": 0.1005, "step": 14988 }, { "epoch": 0.2654482505191108, "grad_norm": 0.6943179368972778, "learning_rate": 2.584594819966961e-05, "loss": 0.1157, "step": 14989 }, { "epoch": 0.2654659600561392, "grad_norm": 0.7679041624069214, "learning_rate": 2.584535385756395e-05, "loss": 0.0854, "step": 14990 }, { "epoch": 0.26548366959316766, "grad_norm": 0.8464511632919312, "learning_rate": 2.5844759479778175e-05, "loss": 0.0908, "step": 14991 }, { "epoch": 0.2655013791301961, "grad_norm": 2.22005295753479, "learning_rate": 2.5844165066314236e-05, "loss": 0.0727, "step": 14992 }, { "epoch": 0.2655190886672245, "grad_norm": 0.5497163534164429, "learning_rate": 2.5843570617174096e-05, "loss": 0.0753, "step": 14993 }, { "epoch": 0.26553679820425297, "grad_norm": 0.8389102220535278, "learning_rate": 2.58429761323597e-05, "loss": 0.0762, "step": 14994 }, { "epoch": 0.26555450774128136, "grad_norm": 0.7709354758262634, "learning_rate": 2.5842381611873013e-05, "loss": 0.0876, "step": 14995 }, { "epoch": 0.2655722172783098, "grad_norm": 0.8110032677650452, "learning_rate": 2.5841787055715986e-05, "loss": 0.1142, "step": 14996 }, { "epoch": 0.2655899268153382, "grad_norm": 0.7794337272644043, "learning_rate": 2.5841192463890574e-05, "loss": 0.111, "step": 14997 }, { "epoch": 0.26560763635236667, "grad_norm": 0.8479118943214417, "learning_rate": 2.584059783639874e-05, "loss": 0.1269, "step": 14998 }, { "epoch": 0.26562534588939507, "grad_norm": 0.955576479434967, "learning_rate": 2.5840003173242427e-05, "loss": 0.1407, "step": 14999 }, { "epoch": 0.2656430554264235, "grad_norm": 0.6744577288627625, "learning_rate": 2.5839408474423608e-05, "loss": 0.0786, "step": 15000 }, { "epoch": 0.2656607649634519, "grad_norm": 0.7343684434890747, "learning_rate": 2.5838813739944228e-05, "loss": 0.0848, "step": 15001 }, { "epoch": 0.2656784745004804, "grad_norm": 0.8700651526451111, "learning_rate": 2.5838218969806248e-05, "loss": 0.0896, "step": 15002 }, { "epoch": 0.2656961840375088, "grad_norm": 1.0131360292434692, "learning_rate": 2.583762416401162e-05, "loss": 0.0926, "step": 15003 }, { "epoch": 0.26571389357453723, "grad_norm": 0.578920841217041, "learning_rate": 2.5837029322562313e-05, "loss": 0.096, "step": 15004 }, { "epoch": 0.26573160311156563, "grad_norm": 0.7014847993850708, "learning_rate": 2.5836434445460273e-05, "loss": 0.0911, "step": 15005 }, { "epoch": 0.2657493126485941, "grad_norm": 0.5768924355506897, "learning_rate": 2.5835839532707454e-05, "loss": 0.0715, "step": 15006 }, { "epoch": 0.26576702218562254, "grad_norm": 0.7266510725021362, "learning_rate": 2.5835244584305824e-05, "loss": 0.0612, "step": 15007 }, { "epoch": 0.26578473172265094, "grad_norm": 0.8969127535820007, "learning_rate": 2.5834649600257334e-05, "loss": 0.1008, "step": 15008 }, { "epoch": 0.2658024412596794, "grad_norm": 0.7377622127532959, "learning_rate": 2.5834054580563942e-05, "loss": 0.1041, "step": 15009 }, { "epoch": 0.2658201507967078, "grad_norm": 0.648395836353302, "learning_rate": 2.583345952522761e-05, "loss": 0.0946, "step": 15010 }, { "epoch": 0.26583786033373624, "grad_norm": 0.6604394316673279, "learning_rate": 2.5832864434250288e-05, "loss": 0.0629, "step": 15011 }, { "epoch": 0.26585556987076464, "grad_norm": 0.7815809845924377, "learning_rate": 2.5832269307633935e-05, "loss": 0.0989, "step": 15012 }, { "epoch": 0.2658732794077931, "grad_norm": 0.7193441987037659, "learning_rate": 2.5831674145380518e-05, "loss": 0.079, "step": 15013 }, { "epoch": 0.2658909889448215, "grad_norm": 0.9706149101257324, "learning_rate": 2.5831078947491984e-05, "loss": 0.1736, "step": 15014 }, { "epoch": 0.26590869848184995, "grad_norm": 1.1625287532806396, "learning_rate": 2.5830483713970298e-05, "loss": 0.0964, "step": 15015 }, { "epoch": 0.26592640801887835, "grad_norm": 0.9686691164970398, "learning_rate": 2.5829888444817416e-05, "loss": 0.084, "step": 15016 }, { "epoch": 0.2659441175559068, "grad_norm": 0.846882164478302, "learning_rate": 2.5829293140035293e-05, "loss": 0.0879, "step": 15017 }, { "epoch": 0.2659618270929352, "grad_norm": 0.6878381967544556, "learning_rate": 2.5828697799625896e-05, "loss": 0.0645, "step": 15018 }, { "epoch": 0.26597953662996365, "grad_norm": 0.8405889868736267, "learning_rate": 2.5828102423591175e-05, "loss": 0.0963, "step": 15019 }, { "epoch": 0.26599724616699205, "grad_norm": 1.0171469449996948, "learning_rate": 2.5827507011933094e-05, "loss": 0.1022, "step": 15020 }, { "epoch": 0.2660149557040205, "grad_norm": 0.7778128981590271, "learning_rate": 2.5826911564653608e-05, "loss": 0.1064, "step": 15021 }, { "epoch": 0.26603266524104896, "grad_norm": 0.6325031518936157, "learning_rate": 2.582631608175468e-05, "loss": 0.1246, "step": 15022 }, { "epoch": 0.26605037477807736, "grad_norm": 0.8928722143173218, "learning_rate": 2.582572056323827e-05, "loss": 0.1333, "step": 15023 }, { "epoch": 0.2660680843151058, "grad_norm": 0.5242671370506287, "learning_rate": 2.582512500910633e-05, "loss": 0.0622, "step": 15024 }, { "epoch": 0.2660857938521342, "grad_norm": 0.9168993830680847, "learning_rate": 2.5824529419360824e-05, "loss": 0.0944, "step": 15025 }, { "epoch": 0.26610350338916267, "grad_norm": 0.9630100131034851, "learning_rate": 2.582393379400371e-05, "loss": 0.0987, "step": 15026 }, { "epoch": 0.26612121292619106, "grad_norm": 1.085189700126648, "learning_rate": 2.582333813303695e-05, "loss": 0.0776, "step": 15027 }, { "epoch": 0.2661389224632195, "grad_norm": 0.8422962427139282, "learning_rate": 2.5822742436462502e-05, "loss": 0.0891, "step": 15028 }, { "epoch": 0.2661566320002479, "grad_norm": 1.1664142608642578, "learning_rate": 2.582214670428233e-05, "loss": 0.1371, "step": 15029 }, { "epoch": 0.26617434153727637, "grad_norm": 0.7166464328765869, "learning_rate": 2.5821550936498383e-05, "loss": 0.0991, "step": 15030 }, { "epoch": 0.26619205107430477, "grad_norm": 0.7724164724349976, "learning_rate": 2.5820955133112633e-05, "loss": 0.0801, "step": 15031 }, { "epoch": 0.2662097606113332, "grad_norm": 1.1521402597427368, "learning_rate": 2.5820359294127033e-05, "loss": 0.1034, "step": 15032 }, { "epoch": 0.2662274701483616, "grad_norm": 0.7950180768966675, "learning_rate": 2.5819763419543542e-05, "loss": 0.0919, "step": 15033 }, { "epoch": 0.2662451796853901, "grad_norm": 0.9636478424072266, "learning_rate": 2.581916750936413e-05, "loss": 0.1052, "step": 15034 }, { "epoch": 0.26626288922241853, "grad_norm": 0.6763203144073486, "learning_rate": 2.581857156359075e-05, "loss": 0.107, "step": 15035 }, { "epoch": 0.26628059875944693, "grad_norm": 0.8971275687217712, "learning_rate": 2.5817975582225363e-05, "loss": 0.0959, "step": 15036 }, { "epoch": 0.2662983082964754, "grad_norm": 1.3963350057601929, "learning_rate": 2.5817379565269928e-05, "loss": 0.0968, "step": 15037 }, { "epoch": 0.2663160178335038, "grad_norm": 0.5754190683364868, "learning_rate": 2.581678351272641e-05, "loss": 0.0792, "step": 15038 }, { "epoch": 0.26633372737053224, "grad_norm": 0.9398365020751953, "learning_rate": 2.5816187424596772e-05, "loss": 0.0674, "step": 15039 }, { "epoch": 0.26635143690756063, "grad_norm": 1.3254560232162476, "learning_rate": 2.5815591300882965e-05, "loss": 0.111, "step": 15040 }, { "epoch": 0.2663691464445891, "grad_norm": 0.8625941872596741, "learning_rate": 2.581499514158696e-05, "loss": 0.0912, "step": 15041 }, { "epoch": 0.2663868559816175, "grad_norm": 0.6654816269874573, "learning_rate": 2.5814398946710716e-05, "loss": 0.0969, "step": 15042 }, { "epoch": 0.26640456551864594, "grad_norm": 1.125734567642212, "learning_rate": 2.5813802716256192e-05, "loss": 0.0821, "step": 15043 }, { "epoch": 0.26642227505567434, "grad_norm": 0.5935638546943665, "learning_rate": 2.5813206450225352e-05, "loss": 0.0988, "step": 15044 }, { "epoch": 0.2664399845927028, "grad_norm": 0.5701977014541626, "learning_rate": 2.5812610148620152e-05, "loss": 0.0838, "step": 15045 }, { "epoch": 0.2664576941297312, "grad_norm": 0.9817458391189575, "learning_rate": 2.5812013811442563e-05, "loss": 0.1399, "step": 15046 }, { "epoch": 0.26647540366675965, "grad_norm": 0.6048136949539185, "learning_rate": 2.581141743869454e-05, "loss": 0.1079, "step": 15047 }, { "epoch": 0.26649311320378805, "grad_norm": 0.5472120642662048, "learning_rate": 2.5810821030378048e-05, "loss": 0.0826, "step": 15048 }, { "epoch": 0.2665108227408165, "grad_norm": 0.7376577854156494, "learning_rate": 2.5810224586495043e-05, "loss": 0.0966, "step": 15049 }, { "epoch": 0.26652853227784495, "grad_norm": 1.0743882656097412, "learning_rate": 2.58096281070475e-05, "loss": 0.0644, "step": 15050 }, { "epoch": 0.26654624181487335, "grad_norm": 1.0683192014694214, "learning_rate": 2.5809031592037373e-05, "loss": 0.1361, "step": 15051 }, { "epoch": 0.2665639513519018, "grad_norm": 0.5270046591758728, "learning_rate": 2.580843504146662e-05, "loss": 0.0746, "step": 15052 }, { "epoch": 0.2665816608889302, "grad_norm": 0.8367795348167419, "learning_rate": 2.5807838455337216e-05, "loss": 0.089, "step": 15053 }, { "epoch": 0.26659937042595866, "grad_norm": 0.7061274647712708, "learning_rate": 2.5807241833651112e-05, "loss": 0.0939, "step": 15054 }, { "epoch": 0.26661707996298706, "grad_norm": 1.0064061880111694, "learning_rate": 2.5806645176410272e-05, "loss": 0.0955, "step": 15055 }, { "epoch": 0.2666347895000155, "grad_norm": 1.4335129261016846, "learning_rate": 2.580604848361667e-05, "loss": 0.1373, "step": 15056 }, { "epoch": 0.2666524990370439, "grad_norm": 0.8297359943389893, "learning_rate": 2.5805451755272257e-05, "loss": 0.0981, "step": 15057 }, { "epoch": 0.26667020857407236, "grad_norm": 1.01539146900177, "learning_rate": 2.5804854991379e-05, "loss": 0.1366, "step": 15058 }, { "epoch": 0.26668791811110076, "grad_norm": 0.5637096166610718, "learning_rate": 2.5804258191938867e-05, "loss": 0.0947, "step": 15059 }, { "epoch": 0.2667056276481292, "grad_norm": 0.6630964279174805, "learning_rate": 2.5803661356953818e-05, "loss": 0.1079, "step": 15060 }, { "epoch": 0.2667233371851576, "grad_norm": 0.8821003437042236, "learning_rate": 2.580306448642581e-05, "loss": 0.1015, "step": 15061 }, { "epoch": 0.26674104672218607, "grad_norm": 1.0347185134887695, "learning_rate": 2.5802467580356816e-05, "loss": 0.0804, "step": 15062 }, { "epoch": 0.26675875625921447, "grad_norm": 0.5850035548210144, "learning_rate": 2.5801870638748795e-05, "loss": 0.105, "step": 15063 }, { "epoch": 0.2667764657962429, "grad_norm": 0.6171927452087402, "learning_rate": 2.5801273661603716e-05, "loss": 0.0679, "step": 15064 }, { "epoch": 0.2667941753332714, "grad_norm": 0.5322981476783752, "learning_rate": 2.5800676648923535e-05, "loss": 0.0821, "step": 15065 }, { "epoch": 0.2668118848702998, "grad_norm": 0.6034585237503052, "learning_rate": 2.580007960071022e-05, "loss": 0.1238, "step": 15066 }, { "epoch": 0.26682959440732823, "grad_norm": 0.8722794055938721, "learning_rate": 2.5799482516965736e-05, "loss": 0.0518, "step": 15067 }, { "epoch": 0.26684730394435663, "grad_norm": 0.5765208601951599, "learning_rate": 2.5798885397692045e-05, "loss": 0.09, "step": 15068 }, { "epoch": 0.2668650134813851, "grad_norm": 0.8071450591087341, "learning_rate": 2.579828824289112e-05, "loss": 0.1091, "step": 15069 }, { "epoch": 0.2668827230184135, "grad_norm": 0.5875954627990723, "learning_rate": 2.5797691052564917e-05, "loss": 0.0699, "step": 15070 }, { "epoch": 0.26690043255544194, "grad_norm": 0.9974770545959473, "learning_rate": 2.5797093826715393e-05, "loss": 0.1045, "step": 15071 }, { "epoch": 0.26691814209247033, "grad_norm": 0.539868175983429, "learning_rate": 2.5796496565344533e-05, "loss": 0.077, "step": 15072 }, { "epoch": 0.2669358516294988, "grad_norm": 1.2479848861694336, "learning_rate": 2.579589926845429e-05, "loss": 0.1069, "step": 15073 }, { "epoch": 0.2669535611665272, "grad_norm": 0.7876706719398499, "learning_rate": 2.5795301936046626e-05, "loss": 0.0942, "step": 15074 }, { "epoch": 0.26697127070355564, "grad_norm": 0.9676746726036072, "learning_rate": 2.5794704568123514e-05, "loss": 0.1153, "step": 15075 }, { "epoch": 0.26698898024058404, "grad_norm": 0.5855879783630371, "learning_rate": 2.5794107164686915e-05, "loss": 0.0666, "step": 15076 }, { "epoch": 0.2670066897776125, "grad_norm": 0.622506856918335, "learning_rate": 2.5793509725738796e-05, "loss": 0.0896, "step": 15077 }, { "epoch": 0.2670243993146409, "grad_norm": 0.5010805726051331, "learning_rate": 2.579291225128112e-05, "loss": 0.1122, "step": 15078 }, { "epoch": 0.26704210885166935, "grad_norm": 1.1092782020568848, "learning_rate": 2.5792314741315855e-05, "loss": 0.1047, "step": 15079 }, { "epoch": 0.2670598183886978, "grad_norm": 1.1137681007385254, "learning_rate": 2.5791717195844963e-05, "loss": 0.0904, "step": 15080 }, { "epoch": 0.2670775279257262, "grad_norm": 1.1901750564575195, "learning_rate": 2.5791119614870415e-05, "loss": 0.0903, "step": 15081 }, { "epoch": 0.26709523746275465, "grad_norm": 0.6328064799308777, "learning_rate": 2.5790521998394178e-05, "loss": 0.0789, "step": 15082 }, { "epoch": 0.26711294699978305, "grad_norm": 1.0629448890686035, "learning_rate": 2.578992434641821e-05, "loss": 0.1149, "step": 15083 }, { "epoch": 0.2671306565368115, "grad_norm": 0.7041545510292053, "learning_rate": 2.578932665894449e-05, "loss": 0.1099, "step": 15084 }, { "epoch": 0.2671483660738399, "grad_norm": 0.8580893874168396, "learning_rate": 2.578872893597497e-05, "loss": 0.0746, "step": 15085 }, { "epoch": 0.26716607561086836, "grad_norm": 1.2200517654418945, "learning_rate": 2.5788131177511624e-05, "loss": 0.0862, "step": 15086 }, { "epoch": 0.26718378514789676, "grad_norm": 0.7504784464836121, "learning_rate": 2.578753338355642e-05, "loss": 0.0767, "step": 15087 }, { "epoch": 0.2672014946849252, "grad_norm": 0.45283305644989014, "learning_rate": 2.578693555411132e-05, "loss": 0.0932, "step": 15088 }, { "epoch": 0.2672192042219536, "grad_norm": 0.8799218535423279, "learning_rate": 2.5786337689178293e-05, "loss": 0.1227, "step": 15089 }, { "epoch": 0.26723691375898206, "grad_norm": 1.194849967956543, "learning_rate": 2.5785739788759303e-05, "loss": 0.0817, "step": 15090 }, { "epoch": 0.26725462329601046, "grad_norm": 0.8826555609703064, "learning_rate": 2.5785141852856326e-05, "loss": 0.1076, "step": 15091 }, { "epoch": 0.2672723328330389, "grad_norm": 0.6159137487411499, "learning_rate": 2.578454388147132e-05, "loss": 0.0817, "step": 15092 }, { "epoch": 0.2672900423700673, "grad_norm": 1.2054733037948608, "learning_rate": 2.5783945874606257e-05, "loss": 0.1422, "step": 15093 }, { "epoch": 0.26730775190709577, "grad_norm": 0.7450251579284668, "learning_rate": 2.5783347832263105e-05, "loss": 0.0904, "step": 15094 }, { "epoch": 0.2673254614441242, "grad_norm": 0.886227548122406, "learning_rate": 2.578274975444382e-05, "loss": 0.1302, "step": 15095 }, { "epoch": 0.2673431709811526, "grad_norm": 0.8627745509147644, "learning_rate": 2.578215164115039e-05, "loss": 0.104, "step": 15096 }, { "epoch": 0.2673608805181811, "grad_norm": 0.9276648759841919, "learning_rate": 2.5781553492384764e-05, "loss": 0.0882, "step": 15097 }, { "epoch": 0.2673785900552095, "grad_norm": 0.5927104949951172, "learning_rate": 2.578095530814892e-05, "loss": 0.0769, "step": 15098 }, { "epoch": 0.26739629959223793, "grad_norm": 1.0600451231002808, "learning_rate": 2.5780357088444826e-05, "loss": 0.1003, "step": 15099 }, { "epoch": 0.26741400912926633, "grad_norm": 0.6425819993019104, "learning_rate": 2.5779758833274444e-05, "loss": 0.0952, "step": 15100 }, { "epoch": 0.2674317186662948, "grad_norm": 0.6886480450630188, "learning_rate": 2.5779160542639748e-05, "loss": 0.117, "step": 15101 }, { "epoch": 0.2674494282033232, "grad_norm": 0.636316180229187, "learning_rate": 2.57785622165427e-05, "loss": 0.0653, "step": 15102 }, { "epoch": 0.26746713774035163, "grad_norm": 1.322457194328308, "learning_rate": 2.5777963854985277e-05, "loss": 0.0755, "step": 15103 }, { "epoch": 0.26748484727738003, "grad_norm": 0.6146528720855713, "learning_rate": 2.5777365457969443e-05, "loss": 0.0849, "step": 15104 }, { "epoch": 0.2675025568144085, "grad_norm": 0.798026978969574, "learning_rate": 2.5776767025497162e-05, "loss": 0.1235, "step": 15105 }, { "epoch": 0.2675202663514369, "grad_norm": 0.7537888288497925, "learning_rate": 2.5776168557570413e-05, "loss": 0.1105, "step": 15106 }, { "epoch": 0.26753797588846534, "grad_norm": 0.851855993270874, "learning_rate": 2.5775570054191158e-05, "loss": 0.1089, "step": 15107 }, { "epoch": 0.26755568542549374, "grad_norm": 0.6316390037536621, "learning_rate": 2.5774971515361364e-05, "loss": 0.1083, "step": 15108 }, { "epoch": 0.2675733949625222, "grad_norm": 0.7921813726425171, "learning_rate": 2.577437294108301e-05, "loss": 0.0793, "step": 15109 }, { "epoch": 0.26759110449955065, "grad_norm": 0.9737669229507446, "learning_rate": 2.577377433135805e-05, "loss": 0.0911, "step": 15110 }, { "epoch": 0.26760881403657905, "grad_norm": 0.7370142340660095, "learning_rate": 2.577317568618847e-05, "loss": 0.1169, "step": 15111 }, { "epoch": 0.2676265235736075, "grad_norm": 0.7603501081466675, "learning_rate": 2.577257700557623e-05, "loss": 0.0868, "step": 15112 }, { "epoch": 0.2676442331106359, "grad_norm": 0.5790003538131714, "learning_rate": 2.5771978289523298e-05, "loss": 0.0831, "step": 15113 }, { "epoch": 0.26766194264766435, "grad_norm": 0.9422902464866638, "learning_rate": 2.577137953803165e-05, "loss": 0.1396, "step": 15114 }, { "epoch": 0.26767965218469275, "grad_norm": 1.1173484325408936, "learning_rate": 2.5770780751103253e-05, "loss": 0.0964, "step": 15115 }, { "epoch": 0.2676973617217212, "grad_norm": 1.213748812675476, "learning_rate": 2.5770181928740074e-05, "loss": 0.1012, "step": 15116 }, { "epoch": 0.2677150712587496, "grad_norm": 0.9203784465789795, "learning_rate": 2.5769583070944086e-05, "loss": 0.1079, "step": 15117 }, { "epoch": 0.26773278079577806, "grad_norm": 0.7630331516265869, "learning_rate": 2.5768984177717263e-05, "loss": 0.1023, "step": 15118 }, { "epoch": 0.26775049033280646, "grad_norm": 0.7522683143615723, "learning_rate": 2.5768385249061566e-05, "loss": 0.0973, "step": 15119 }, { "epoch": 0.2677681998698349, "grad_norm": 0.8674209117889404, "learning_rate": 2.5767786284978973e-05, "loss": 0.1036, "step": 15120 }, { "epoch": 0.2677859094068633, "grad_norm": 0.7453047633171082, "learning_rate": 2.5767187285471453e-05, "loss": 0.0968, "step": 15121 }, { "epoch": 0.26780361894389176, "grad_norm": 1.448657751083374, "learning_rate": 2.5766588250540974e-05, "loss": 0.119, "step": 15122 }, { "epoch": 0.26782132848092016, "grad_norm": 0.7313600778579712, "learning_rate": 2.576598918018951e-05, "loss": 0.0827, "step": 15123 }, { "epoch": 0.2678390380179486, "grad_norm": 0.7600354552268982, "learning_rate": 2.5765390074419032e-05, "loss": 0.1002, "step": 15124 }, { "epoch": 0.26785674755497707, "grad_norm": 0.721903920173645, "learning_rate": 2.5764790933231506e-05, "loss": 0.1002, "step": 15125 }, { "epoch": 0.26787445709200547, "grad_norm": 1.1783925294876099, "learning_rate": 2.576419175662891e-05, "loss": 0.1193, "step": 15126 }, { "epoch": 0.2678921666290339, "grad_norm": 1.1076288223266602, "learning_rate": 2.576359254461321e-05, "loss": 0.0942, "step": 15127 }, { "epoch": 0.2679098761660623, "grad_norm": 1.1621665954589844, "learning_rate": 2.5762993297186376e-05, "loss": 0.1259, "step": 15128 }, { "epoch": 0.2679275857030908, "grad_norm": 0.8747960329055786, "learning_rate": 2.5762394014350388e-05, "loss": 0.1087, "step": 15129 }, { "epoch": 0.2679452952401192, "grad_norm": 0.9908204078674316, "learning_rate": 2.5761794696107212e-05, "loss": 0.0729, "step": 15130 }, { "epoch": 0.26796300477714763, "grad_norm": 0.7333277463912964, "learning_rate": 2.5761195342458815e-05, "loss": 0.0854, "step": 15131 }, { "epoch": 0.267980714314176, "grad_norm": 0.7105033993721008, "learning_rate": 2.5760595953407182e-05, "loss": 0.0952, "step": 15132 }, { "epoch": 0.2679984238512045, "grad_norm": 1.256096601486206, "learning_rate": 2.575999652895427e-05, "loss": 0.0697, "step": 15133 }, { "epoch": 0.2680161333882329, "grad_norm": 0.8136866092681885, "learning_rate": 2.575939706910206e-05, "loss": 0.108, "step": 15134 }, { "epoch": 0.26803384292526133, "grad_norm": 1.2453511953353882, "learning_rate": 2.575879757385252e-05, "loss": 0.107, "step": 15135 }, { "epoch": 0.26805155246228973, "grad_norm": 0.6004133224487305, "learning_rate": 2.575819804320763e-05, "loss": 0.0851, "step": 15136 }, { "epoch": 0.2680692619993182, "grad_norm": 1.0641193389892578, "learning_rate": 2.575759847716935e-05, "loss": 0.1172, "step": 15137 }, { "epoch": 0.2680869715363466, "grad_norm": 1.030951738357544, "learning_rate": 2.575699887573966e-05, "loss": 0.092, "step": 15138 }, { "epoch": 0.26810468107337504, "grad_norm": 0.8201059103012085, "learning_rate": 2.575639923892053e-05, "loss": 0.0995, "step": 15139 }, { "epoch": 0.2681223906104035, "grad_norm": 1.042720913887024, "learning_rate": 2.575579956671394e-05, "loss": 0.091, "step": 15140 }, { "epoch": 0.2681401001474319, "grad_norm": 0.5354769229888916, "learning_rate": 2.5755199859121853e-05, "loss": 0.0557, "step": 15141 }, { "epoch": 0.26815780968446035, "grad_norm": 0.7664448618888855, "learning_rate": 2.575460011614625e-05, "loss": 0.1086, "step": 15142 }, { "epoch": 0.26817551922148875, "grad_norm": 1.080627679824829, "learning_rate": 2.57540003377891e-05, "loss": 0.0977, "step": 15143 }, { "epoch": 0.2681932287585172, "grad_norm": 0.7874602675437927, "learning_rate": 2.5753400524052372e-05, "loss": 0.0946, "step": 15144 }, { "epoch": 0.2682109382955456, "grad_norm": 1.084594488143921, "learning_rate": 2.5752800674938046e-05, "loss": 0.1023, "step": 15145 }, { "epoch": 0.26822864783257405, "grad_norm": 0.567613422870636, "learning_rate": 2.5752200790448098e-05, "loss": 0.0876, "step": 15146 }, { "epoch": 0.26824635736960245, "grad_norm": 0.7740535140037537, "learning_rate": 2.575160087058449e-05, "loss": 0.0827, "step": 15147 }, { "epoch": 0.2682640669066309, "grad_norm": 0.9158636331558228, "learning_rate": 2.5751000915349205e-05, "loss": 0.1015, "step": 15148 }, { "epoch": 0.2682817764436593, "grad_norm": 0.7842777371406555, "learning_rate": 2.575040092474422e-05, "loss": 0.1145, "step": 15149 }, { "epoch": 0.26829948598068776, "grad_norm": 0.8765310049057007, "learning_rate": 2.5749800898771494e-05, "loss": 0.0983, "step": 15150 }, { "epoch": 0.26831719551771616, "grad_norm": 0.682261049747467, "learning_rate": 2.5749200837433015e-05, "loss": 0.1059, "step": 15151 }, { "epoch": 0.2683349050547446, "grad_norm": 0.8977707624435425, "learning_rate": 2.5748600740730752e-05, "loss": 0.093, "step": 15152 }, { "epoch": 0.268352614591773, "grad_norm": 0.8596480488777161, "learning_rate": 2.574800060866668e-05, "loss": 0.1015, "step": 15153 }, { "epoch": 0.26837032412880146, "grad_norm": 1.4112820625305176, "learning_rate": 2.5747400441242773e-05, "loss": 0.1157, "step": 15154 }, { "epoch": 0.2683880336658299, "grad_norm": 0.677464485168457, "learning_rate": 2.5746800238461007e-05, "loss": 0.1129, "step": 15155 }, { "epoch": 0.2684057432028583, "grad_norm": 0.992368757724762, "learning_rate": 2.574620000032335e-05, "loss": 0.1169, "step": 15156 }, { "epoch": 0.26842345273988677, "grad_norm": 0.817544162273407, "learning_rate": 2.574559972683179e-05, "loss": 0.0863, "step": 15157 }, { "epoch": 0.26844116227691517, "grad_norm": 0.5333917737007141, "learning_rate": 2.574499941798829e-05, "loss": 0.1054, "step": 15158 }, { "epoch": 0.2684588718139436, "grad_norm": 0.8148513436317444, "learning_rate": 2.5744399073794826e-05, "loss": 0.0855, "step": 15159 }, { "epoch": 0.268476581350972, "grad_norm": 0.9472495317459106, "learning_rate": 2.5743798694253378e-05, "loss": 0.0993, "step": 15160 }, { "epoch": 0.2684942908880005, "grad_norm": 0.9207848310470581, "learning_rate": 2.574319827936592e-05, "loss": 0.1167, "step": 15161 }, { "epoch": 0.2685120004250289, "grad_norm": 0.7535758018493652, "learning_rate": 2.5742597829134428e-05, "loss": 0.1095, "step": 15162 }, { "epoch": 0.26852970996205733, "grad_norm": 0.9968627691268921, "learning_rate": 2.574199734356087e-05, "loss": 0.1381, "step": 15163 }, { "epoch": 0.2685474194990857, "grad_norm": 0.8479620218276978, "learning_rate": 2.574139682264723e-05, "loss": 0.1259, "step": 15164 }, { "epoch": 0.2685651290361142, "grad_norm": 1.1352856159210205, "learning_rate": 2.5740796266395482e-05, "loss": 0.0984, "step": 15165 }, { "epoch": 0.2685828385731426, "grad_norm": 0.6746829748153687, "learning_rate": 2.57401956748076e-05, "loss": 0.0676, "step": 15166 }, { "epoch": 0.26860054811017103, "grad_norm": 0.6754313111305237, "learning_rate": 2.573959504788556e-05, "loss": 0.0844, "step": 15167 }, { "epoch": 0.26861825764719943, "grad_norm": 1.6139789819717407, "learning_rate": 2.573899438563134e-05, "loss": 0.1127, "step": 15168 }, { "epoch": 0.2686359671842279, "grad_norm": 1.5165632963180542, "learning_rate": 2.573839368804691e-05, "loss": 0.0884, "step": 15169 }, { "epoch": 0.26865367672125634, "grad_norm": 1.1813836097717285, "learning_rate": 2.5737792955134258e-05, "loss": 0.0903, "step": 15170 }, { "epoch": 0.26867138625828474, "grad_norm": 1.6600933074951172, "learning_rate": 2.573719218689535e-05, "loss": 0.1121, "step": 15171 }, { "epoch": 0.2686890957953132, "grad_norm": 1.2619980573654175, "learning_rate": 2.5736591383332162e-05, "loss": 0.1171, "step": 15172 }, { "epoch": 0.2687068053323416, "grad_norm": 0.5943142771720886, "learning_rate": 2.5735990544446678e-05, "loss": 0.1156, "step": 15173 }, { "epoch": 0.26872451486937005, "grad_norm": 0.6718878746032715, "learning_rate": 2.5735389670240872e-05, "loss": 0.1028, "step": 15174 }, { "epoch": 0.26874222440639844, "grad_norm": 1.1898784637451172, "learning_rate": 2.5734788760716716e-05, "loss": 0.099, "step": 15175 }, { "epoch": 0.2687599339434269, "grad_norm": 0.6220198273658752, "learning_rate": 2.573418781587619e-05, "loss": 0.1137, "step": 15176 }, { "epoch": 0.2687776434804553, "grad_norm": 1.117656946182251, "learning_rate": 2.573358683572128e-05, "loss": 0.0924, "step": 15177 }, { "epoch": 0.26879535301748375, "grad_norm": 1.226965308189392, "learning_rate": 2.5732985820253948e-05, "loss": 0.1575, "step": 15178 }, { "epoch": 0.26881306255451215, "grad_norm": 1.1537680625915527, "learning_rate": 2.573238476947618e-05, "loss": 0.1062, "step": 15179 }, { "epoch": 0.2688307720915406, "grad_norm": 0.948757529258728, "learning_rate": 2.5731783683389953e-05, "loss": 0.0991, "step": 15180 }, { "epoch": 0.268848481628569, "grad_norm": 1.1610640287399292, "learning_rate": 2.573118256199724e-05, "loss": 0.1099, "step": 15181 }, { "epoch": 0.26886619116559746, "grad_norm": 1.8538728952407837, "learning_rate": 2.5730581405300022e-05, "loss": 0.1258, "step": 15182 }, { "epoch": 0.26888390070262586, "grad_norm": 0.7834913730621338, "learning_rate": 2.572998021330028e-05, "loss": 0.1234, "step": 15183 }, { "epoch": 0.2689016102396543, "grad_norm": 0.8969488739967346, "learning_rate": 2.5729378985999986e-05, "loss": 0.0723, "step": 15184 }, { "epoch": 0.26891931977668276, "grad_norm": 1.0799580812454224, "learning_rate": 2.572877772340112e-05, "loss": 0.0709, "step": 15185 }, { "epoch": 0.26893702931371116, "grad_norm": 0.6002988815307617, "learning_rate": 2.5728176425505663e-05, "loss": 0.0711, "step": 15186 }, { "epoch": 0.2689547388507396, "grad_norm": 1.0801116228103638, "learning_rate": 2.5727575092315586e-05, "loss": 0.1032, "step": 15187 }, { "epoch": 0.268972448387768, "grad_norm": 0.9882497787475586, "learning_rate": 2.5726973723832875e-05, "loss": 0.0859, "step": 15188 }, { "epoch": 0.26899015792479647, "grad_norm": 0.6277173161506653, "learning_rate": 2.572637232005951e-05, "loss": 0.0985, "step": 15189 }, { "epoch": 0.26900786746182487, "grad_norm": 1.1711763143539429, "learning_rate": 2.5725770880997455e-05, "loss": 0.0889, "step": 15190 }, { "epoch": 0.2690255769988533, "grad_norm": 0.6423106789588928, "learning_rate": 2.5725169406648708e-05, "loss": 0.0684, "step": 15191 }, { "epoch": 0.2690432865358817, "grad_norm": 0.6303879618644714, "learning_rate": 2.572456789701523e-05, "loss": 0.0888, "step": 15192 }, { "epoch": 0.2690609960729102, "grad_norm": 1.1377698183059692, "learning_rate": 2.5723966352099013e-05, "loss": 0.1398, "step": 15193 }, { "epoch": 0.2690787056099386, "grad_norm": 0.8328471183776855, "learning_rate": 2.5723364771902033e-05, "loss": 0.0983, "step": 15194 }, { "epoch": 0.269096415146967, "grad_norm": 0.6647769212722778, "learning_rate": 2.5722763156426266e-05, "loss": 0.0873, "step": 15195 }, { "epoch": 0.2691141246839954, "grad_norm": 0.6414621472358704, "learning_rate": 2.5722161505673695e-05, "loss": 0.0907, "step": 15196 }, { "epoch": 0.2691318342210239, "grad_norm": 0.8492636680603027, "learning_rate": 2.572155981964629e-05, "loss": 0.0761, "step": 15197 }, { "epoch": 0.2691495437580523, "grad_norm": 0.7004894614219666, "learning_rate": 2.5720958098346043e-05, "loss": 0.0873, "step": 15198 }, { "epoch": 0.26916725329508073, "grad_norm": 0.8770250082015991, "learning_rate": 2.5720356341774926e-05, "loss": 0.1039, "step": 15199 }, { "epoch": 0.2691849628321092, "grad_norm": 0.615281343460083, "learning_rate": 2.571975454993492e-05, "loss": 0.0926, "step": 15200 }, { "epoch": 0.2692026723691376, "grad_norm": 0.7432109713554382, "learning_rate": 2.571915272282801e-05, "loss": 0.0855, "step": 15201 }, { "epoch": 0.26922038190616604, "grad_norm": 0.6187295317649841, "learning_rate": 2.5718550860456168e-05, "loss": 0.0781, "step": 15202 }, { "epoch": 0.26923809144319444, "grad_norm": 0.8629604578018188, "learning_rate": 2.5717948962821383e-05, "loss": 0.1069, "step": 15203 }, { "epoch": 0.2692558009802229, "grad_norm": 0.5482796430587769, "learning_rate": 2.5717347029925624e-05, "loss": 0.079, "step": 15204 }, { "epoch": 0.2692735105172513, "grad_norm": 1.1015926599502563, "learning_rate": 2.5716745061770878e-05, "loss": 0.0935, "step": 15205 }, { "epoch": 0.26929122005427975, "grad_norm": 0.6531208753585815, "learning_rate": 2.5716143058359127e-05, "loss": 0.107, "step": 15206 }, { "epoch": 0.26930892959130814, "grad_norm": 0.6519860029220581, "learning_rate": 2.571554101969235e-05, "loss": 0.071, "step": 15207 }, { "epoch": 0.2693266391283366, "grad_norm": 0.9445702433586121, "learning_rate": 2.5714938945772527e-05, "loss": 0.0827, "step": 15208 }, { "epoch": 0.269344348665365, "grad_norm": 0.7606781721115112, "learning_rate": 2.571433683660164e-05, "loss": 0.0738, "step": 15209 }, { "epoch": 0.26936205820239345, "grad_norm": 0.6521406769752502, "learning_rate": 2.571373469218166e-05, "loss": 0.0862, "step": 15210 }, { "epoch": 0.26937976773942185, "grad_norm": 1.0860577821731567, "learning_rate": 2.5713132512514585e-05, "loss": 0.1077, "step": 15211 }, { "epoch": 0.2693974772764503, "grad_norm": 0.907596230506897, "learning_rate": 2.5712530297602384e-05, "loss": 0.0983, "step": 15212 }, { "epoch": 0.2694151868134787, "grad_norm": 0.660006046295166, "learning_rate": 2.5711928047447042e-05, "loss": 0.083, "step": 15213 }, { "epoch": 0.26943289635050716, "grad_norm": 1.13310706615448, "learning_rate": 2.5711325762050542e-05, "loss": 0.0978, "step": 15214 }, { "epoch": 0.2694506058875356, "grad_norm": 0.7314051985740662, "learning_rate": 2.571072344141486e-05, "loss": 0.0861, "step": 15215 }, { "epoch": 0.269468315424564, "grad_norm": 0.6818246841430664, "learning_rate": 2.5710121085541983e-05, "loss": 0.0858, "step": 15216 }, { "epoch": 0.26948602496159246, "grad_norm": 0.5666950941085815, "learning_rate": 2.570951869443389e-05, "loss": 0.075, "step": 15217 }, { "epoch": 0.26950373449862086, "grad_norm": 1.019848108291626, "learning_rate": 2.5708916268092566e-05, "loss": 0.1203, "step": 15218 }, { "epoch": 0.2695214440356493, "grad_norm": 1.7859925031661987, "learning_rate": 2.570831380651999e-05, "loss": 0.128, "step": 15219 }, { "epoch": 0.2695391535726777, "grad_norm": 0.8200356364250183, "learning_rate": 2.5707711309718147e-05, "loss": 0.0903, "step": 15220 }, { "epoch": 0.26955686310970617, "grad_norm": 0.6840993762016296, "learning_rate": 2.570710877768901e-05, "loss": 0.0978, "step": 15221 }, { "epoch": 0.26957457264673457, "grad_norm": 0.6904431581497192, "learning_rate": 2.5706506210434573e-05, "loss": 0.1335, "step": 15222 }, { "epoch": 0.269592282183763, "grad_norm": 1.2871127128601074, "learning_rate": 2.5705903607956813e-05, "loss": 0.0895, "step": 15223 }, { "epoch": 0.2696099917207914, "grad_norm": 0.9795688390731812, "learning_rate": 2.5705300970257714e-05, "loss": 0.0858, "step": 15224 }, { "epoch": 0.2696277012578199, "grad_norm": 0.7822130918502808, "learning_rate": 2.5704698297339257e-05, "loss": 0.092, "step": 15225 }, { "epoch": 0.2696454107948483, "grad_norm": 1.2275532484054565, "learning_rate": 2.5704095589203423e-05, "loss": 0.1037, "step": 15226 }, { "epoch": 0.2696631203318767, "grad_norm": 0.6964103579521179, "learning_rate": 2.5703492845852197e-05, "loss": 0.0951, "step": 15227 }, { "epoch": 0.2696808298689051, "grad_norm": 0.9244565367698669, "learning_rate": 2.570289006728757e-05, "loss": 0.0935, "step": 15228 }, { "epoch": 0.2696985394059336, "grad_norm": 1.0710293054580688, "learning_rate": 2.5702287253511503e-05, "loss": 0.0666, "step": 15229 }, { "epoch": 0.26971624894296203, "grad_norm": 1.0697499513626099, "learning_rate": 2.5701684404526006e-05, "loss": 0.1178, "step": 15230 }, { "epoch": 0.26973395847999043, "grad_norm": 1.3329744338989258, "learning_rate": 2.5701081520333046e-05, "loss": 0.0974, "step": 15231 }, { "epoch": 0.2697516680170189, "grad_norm": 0.9660694003105164, "learning_rate": 2.5700478600934604e-05, "loss": 0.0942, "step": 15232 }, { "epoch": 0.2697693775540473, "grad_norm": 1.3833152055740356, "learning_rate": 2.5699875646332677e-05, "loss": 0.1298, "step": 15233 }, { "epoch": 0.26978708709107574, "grad_norm": 0.7400135397911072, "learning_rate": 2.5699272656529244e-05, "loss": 0.1424, "step": 15234 }, { "epoch": 0.26980479662810414, "grad_norm": 1.1529892683029175, "learning_rate": 2.5698669631526278e-05, "loss": 0.1116, "step": 15235 }, { "epoch": 0.2698225061651326, "grad_norm": 0.8423610329627991, "learning_rate": 2.5698066571325774e-05, "loss": 0.1115, "step": 15236 }, { "epoch": 0.269840215702161, "grad_norm": 1.099735140800476, "learning_rate": 2.5697463475929715e-05, "loss": 0.1051, "step": 15237 }, { "epoch": 0.26985792523918944, "grad_norm": 1.0212334394454956, "learning_rate": 2.5696860345340078e-05, "loss": 0.1039, "step": 15238 }, { "epoch": 0.26987563477621784, "grad_norm": 1.359696626663208, "learning_rate": 2.5696257179558858e-05, "loss": 0.13, "step": 15239 }, { "epoch": 0.2698933443132463, "grad_norm": 0.9419753551483154, "learning_rate": 2.5695653978588033e-05, "loss": 0.0966, "step": 15240 }, { "epoch": 0.2699110538502747, "grad_norm": 0.9227922558784485, "learning_rate": 2.5695050742429586e-05, "loss": 0.0864, "step": 15241 }, { "epoch": 0.26992876338730315, "grad_norm": 0.8421314358711243, "learning_rate": 2.5694447471085506e-05, "loss": 0.1722, "step": 15242 }, { "epoch": 0.26994647292433155, "grad_norm": 0.7018389701843262, "learning_rate": 2.569384416455777e-05, "loss": 0.0743, "step": 15243 }, { "epoch": 0.26996418246136, "grad_norm": 0.5748410224914551, "learning_rate": 2.5693240822848377e-05, "loss": 0.1252, "step": 15244 }, { "epoch": 0.26998189199838846, "grad_norm": 0.8107659220695496, "learning_rate": 2.56926374459593e-05, "loss": 0.0897, "step": 15245 }, { "epoch": 0.26999960153541686, "grad_norm": 0.6895743608474731, "learning_rate": 2.5692034033892526e-05, "loss": 0.1023, "step": 15246 }, { "epoch": 0.2700173110724453, "grad_norm": 0.391355961561203, "learning_rate": 2.569143058665004e-05, "loss": 0.0731, "step": 15247 }, { "epoch": 0.2700350206094737, "grad_norm": 1.049133539199829, "learning_rate": 2.569082710423383e-05, "loss": 0.0878, "step": 15248 }, { "epoch": 0.27005273014650216, "grad_norm": 0.8429846167564392, "learning_rate": 2.569022358664588e-05, "loss": 0.1071, "step": 15249 }, { "epoch": 0.27007043968353056, "grad_norm": 1.4352394342422485, "learning_rate": 2.5689620033888176e-05, "loss": 0.0813, "step": 15250 }, { "epoch": 0.270088149220559, "grad_norm": 1.0264455080032349, "learning_rate": 2.568901644596271e-05, "loss": 0.1049, "step": 15251 }, { "epoch": 0.2701058587575874, "grad_norm": 0.5869017243385315, "learning_rate": 2.568841282287145e-05, "loss": 0.0972, "step": 15252 }, { "epoch": 0.27012356829461587, "grad_norm": 0.809471845626831, "learning_rate": 2.5687809164616397e-05, "loss": 0.1164, "step": 15253 }, { "epoch": 0.27014127783164427, "grad_norm": 0.6759240031242371, "learning_rate": 2.568720547119953e-05, "loss": 0.1138, "step": 15254 }, { "epoch": 0.2701589873686727, "grad_norm": 0.9870954751968384, "learning_rate": 2.5686601742622843e-05, "loss": 0.1016, "step": 15255 }, { "epoch": 0.2701766969057011, "grad_norm": 0.8939988613128662, "learning_rate": 2.568599797888832e-05, "loss": 0.093, "step": 15256 }, { "epoch": 0.2701944064427296, "grad_norm": 0.9058349132537842, "learning_rate": 2.5685394179997937e-05, "loss": 0.1046, "step": 15257 }, { "epoch": 0.27021211597975797, "grad_norm": 1.1640063524246216, "learning_rate": 2.5684790345953688e-05, "loss": 0.1242, "step": 15258 }, { "epoch": 0.2702298255167864, "grad_norm": 0.7454048991203308, "learning_rate": 2.5684186476757565e-05, "loss": 0.1067, "step": 15259 }, { "epoch": 0.2702475350538149, "grad_norm": 0.8514893054962158, "learning_rate": 2.5683582572411544e-05, "loss": 0.0857, "step": 15260 }, { "epoch": 0.2702652445908433, "grad_norm": 0.63278728723526, "learning_rate": 2.568297863291762e-05, "loss": 0.1027, "step": 15261 }, { "epoch": 0.27028295412787173, "grad_norm": 0.9622236490249634, "learning_rate": 2.5682374658277776e-05, "loss": 0.0835, "step": 15262 }, { "epoch": 0.27030066366490013, "grad_norm": 0.9415682554244995, "learning_rate": 2.5681770648494e-05, "loss": 0.077, "step": 15263 }, { "epoch": 0.2703183732019286, "grad_norm": 0.5601989030838013, "learning_rate": 2.568116660356828e-05, "loss": 0.0937, "step": 15264 }, { "epoch": 0.270336082738957, "grad_norm": 0.7342483401298523, "learning_rate": 2.5680562523502603e-05, "loss": 0.1098, "step": 15265 }, { "epoch": 0.27035379227598544, "grad_norm": 0.8168221712112427, "learning_rate": 2.5679958408298954e-05, "loss": 0.0931, "step": 15266 }, { "epoch": 0.27037150181301384, "grad_norm": 0.5671694874763489, "learning_rate": 2.5679354257959318e-05, "loss": 0.1176, "step": 15267 }, { "epoch": 0.2703892113500423, "grad_norm": 0.3318415582180023, "learning_rate": 2.5678750072485694e-05, "loss": 0.0759, "step": 15268 }, { "epoch": 0.2704069208870707, "grad_norm": 0.8092553615570068, "learning_rate": 2.5678145851880053e-05, "loss": 0.1032, "step": 15269 }, { "epoch": 0.27042463042409914, "grad_norm": 0.8121762275695801, "learning_rate": 2.56775415961444e-05, "loss": 0.0818, "step": 15270 }, { "epoch": 0.27044233996112754, "grad_norm": 0.8090660572052002, "learning_rate": 2.5676937305280716e-05, "loss": 0.0735, "step": 15271 }, { "epoch": 0.270460049498156, "grad_norm": 0.7963652610778809, "learning_rate": 2.567633297929098e-05, "loss": 0.0951, "step": 15272 }, { "epoch": 0.2704777590351844, "grad_norm": 1.2699562311172485, "learning_rate": 2.5675728618177197e-05, "loss": 0.1358, "step": 15273 }, { "epoch": 0.27049546857221285, "grad_norm": 0.639583170413971, "learning_rate": 2.567512422194134e-05, "loss": 0.095, "step": 15274 }, { "epoch": 0.2705131781092413, "grad_norm": 0.5578666925430298, "learning_rate": 2.567451979058541e-05, "loss": 0.1028, "step": 15275 }, { "epoch": 0.2705308876462697, "grad_norm": 1.0977303981781006, "learning_rate": 2.5673915324111383e-05, "loss": 0.0805, "step": 15276 }, { "epoch": 0.27054859718329816, "grad_norm": 0.9873591661453247, "learning_rate": 2.5673310822521258e-05, "loss": 0.1056, "step": 15277 }, { "epoch": 0.27056630672032655, "grad_norm": 1.4224966764450073, "learning_rate": 2.5672706285817023e-05, "loss": 0.1005, "step": 15278 }, { "epoch": 0.270584016257355, "grad_norm": 0.8458736538887024, "learning_rate": 2.567210171400066e-05, "loss": 0.1058, "step": 15279 }, { "epoch": 0.2706017257943834, "grad_norm": 0.7832011580467224, "learning_rate": 2.567149710707416e-05, "loss": 0.0849, "step": 15280 }, { "epoch": 0.27061943533141186, "grad_norm": 0.9610975384712219, "learning_rate": 2.567089246503952e-05, "loss": 0.0913, "step": 15281 }, { "epoch": 0.27063714486844026, "grad_norm": 0.8198978900909424, "learning_rate": 2.5670287787898716e-05, "loss": 0.0892, "step": 15282 }, { "epoch": 0.2706548544054687, "grad_norm": 0.74806147813797, "learning_rate": 2.566968307565375e-05, "loss": 0.1007, "step": 15283 }, { "epoch": 0.2706725639424971, "grad_norm": 1.277553677558899, "learning_rate": 2.5669078328306604e-05, "loss": 0.1086, "step": 15284 }, { "epoch": 0.27069027347952557, "grad_norm": 0.8008781671524048, "learning_rate": 2.5668473545859266e-05, "loss": 0.0879, "step": 15285 }, { "epoch": 0.27070798301655397, "grad_norm": 0.7888683676719666, "learning_rate": 2.566786872831373e-05, "loss": 0.0816, "step": 15286 }, { "epoch": 0.2707256925535824, "grad_norm": 0.8064649105072021, "learning_rate": 2.566726387567199e-05, "loss": 0.095, "step": 15287 }, { "epoch": 0.2707434020906108, "grad_norm": 0.9651579260826111, "learning_rate": 2.566665898793603e-05, "loss": 0.1101, "step": 15288 }, { "epoch": 0.2707611116276393, "grad_norm": 0.6321965456008911, "learning_rate": 2.5666054065107837e-05, "loss": 0.0948, "step": 15289 }, { "epoch": 0.2707788211646677, "grad_norm": 0.23565825819969177, "learning_rate": 2.5665449107189408e-05, "loss": 0.0871, "step": 15290 }, { "epoch": 0.2707965307016961, "grad_norm": 0.5053369998931885, "learning_rate": 2.5664844114182727e-05, "loss": 0.0614, "step": 15291 }, { "epoch": 0.2708142402387246, "grad_norm": 0.959570586681366, "learning_rate": 2.566423908608979e-05, "loss": 0.0787, "step": 15292 }, { "epoch": 0.270831949775753, "grad_norm": 0.9178187251091003, "learning_rate": 2.5663634022912582e-05, "loss": 0.1001, "step": 15293 }, { "epoch": 0.27084965931278143, "grad_norm": 1.064266324043274, "learning_rate": 2.5663028924653096e-05, "loss": 0.1149, "step": 15294 }, { "epoch": 0.27086736884980983, "grad_norm": 1.0726674795150757, "learning_rate": 2.566242379131333e-05, "loss": 0.0935, "step": 15295 }, { "epoch": 0.2708850783868383, "grad_norm": 0.7951224446296692, "learning_rate": 2.5661818622895264e-05, "loss": 0.0886, "step": 15296 }, { "epoch": 0.2709027879238667, "grad_norm": 1.3734965324401855, "learning_rate": 2.5661213419400893e-05, "loss": 0.1057, "step": 15297 }, { "epoch": 0.27092049746089514, "grad_norm": 0.7331885695457458, "learning_rate": 2.5660608180832208e-05, "loss": 0.0851, "step": 15298 }, { "epoch": 0.27093820699792354, "grad_norm": 0.6734297871589661, "learning_rate": 2.5660002907191203e-05, "loss": 0.0824, "step": 15299 }, { "epoch": 0.270955916534952, "grad_norm": 0.6753703951835632, "learning_rate": 2.5659397598479866e-05, "loss": 0.1095, "step": 15300 }, { "epoch": 0.2709736260719804, "grad_norm": 0.9592221975326538, "learning_rate": 2.5658792254700185e-05, "loss": 0.0727, "step": 15301 }, { "epoch": 0.27099133560900884, "grad_norm": 0.5254005789756775, "learning_rate": 2.5658186875854158e-05, "loss": 0.0755, "step": 15302 }, { "epoch": 0.2710090451460373, "grad_norm": 0.8139560222625732, "learning_rate": 2.5657581461943774e-05, "loss": 0.0834, "step": 15303 }, { "epoch": 0.2710267546830657, "grad_norm": 0.5608734488487244, "learning_rate": 2.565697601297103e-05, "loss": 0.0769, "step": 15304 }, { "epoch": 0.27104446422009415, "grad_norm": 0.8117145895957947, "learning_rate": 2.5656370528937905e-05, "loss": 0.0697, "step": 15305 }, { "epoch": 0.27106217375712255, "grad_norm": 1.2427992820739746, "learning_rate": 2.5655765009846402e-05, "loss": 0.1031, "step": 15306 }, { "epoch": 0.271079883294151, "grad_norm": 0.8997583389282227, "learning_rate": 2.565515945569851e-05, "loss": 0.0907, "step": 15307 }, { "epoch": 0.2710975928311794, "grad_norm": 0.7486866116523743, "learning_rate": 2.565455386649622e-05, "loss": 0.1052, "step": 15308 }, { "epoch": 0.27111530236820786, "grad_norm": 0.8621216416358948, "learning_rate": 2.5653948242241526e-05, "loss": 0.0942, "step": 15309 }, { "epoch": 0.27113301190523625, "grad_norm": 0.7994346022605896, "learning_rate": 2.5653342582936418e-05, "loss": 0.0929, "step": 15310 }, { "epoch": 0.2711507214422647, "grad_norm": 0.7068290710449219, "learning_rate": 2.5652736888582893e-05, "loss": 0.0993, "step": 15311 }, { "epoch": 0.2711684309792931, "grad_norm": 0.957696795463562, "learning_rate": 2.5652131159182942e-05, "loss": 0.1233, "step": 15312 }, { "epoch": 0.27118614051632156, "grad_norm": 1.1656639575958252, "learning_rate": 2.5651525394738555e-05, "loss": 0.1043, "step": 15313 }, { "epoch": 0.27120385005334996, "grad_norm": 0.9972304701805115, "learning_rate": 2.565091959525173e-05, "loss": 0.1221, "step": 15314 }, { "epoch": 0.2712215595903784, "grad_norm": 1.8676530122756958, "learning_rate": 2.5650313760724457e-05, "loss": 0.1139, "step": 15315 }, { "epoch": 0.2712392691274068, "grad_norm": 0.7667573094367981, "learning_rate": 2.5649707891158724e-05, "loss": 0.0883, "step": 15316 }, { "epoch": 0.27125697866443527, "grad_norm": 0.766482949256897, "learning_rate": 2.5649101986556527e-05, "loss": 0.1047, "step": 15317 }, { "epoch": 0.2712746882014637, "grad_norm": 1.258007526397705, "learning_rate": 2.5648496046919868e-05, "loss": 0.0968, "step": 15318 }, { "epoch": 0.2712923977384921, "grad_norm": 0.545371413230896, "learning_rate": 2.564789007225073e-05, "loss": 0.065, "step": 15319 }, { "epoch": 0.2713101072755206, "grad_norm": 0.9431453347206116, "learning_rate": 2.5647284062551114e-05, "loss": 0.0983, "step": 15320 }, { "epoch": 0.27132781681254897, "grad_norm": 0.7769917249679565, "learning_rate": 2.5646678017823013e-05, "loss": 0.0976, "step": 15321 }, { "epoch": 0.2713455263495774, "grad_norm": 0.7278474569320679, "learning_rate": 2.564607193806841e-05, "loss": 0.0975, "step": 15322 }, { "epoch": 0.2713632358866058, "grad_norm": 1.0173022747039795, "learning_rate": 2.5645465823289314e-05, "loss": 0.1141, "step": 15323 }, { "epoch": 0.2713809454236343, "grad_norm": 0.6592957377433777, "learning_rate": 2.5644859673487708e-05, "loss": 0.1081, "step": 15324 }, { "epoch": 0.2713986549606627, "grad_norm": 0.8561589121818542, "learning_rate": 2.5644253488665592e-05, "loss": 0.1185, "step": 15325 }, { "epoch": 0.27141636449769113, "grad_norm": 0.5227159261703491, "learning_rate": 2.564364726882496e-05, "loss": 0.1051, "step": 15326 }, { "epoch": 0.27143407403471953, "grad_norm": 1.0837677717208862, "learning_rate": 2.56430410139678e-05, "loss": 0.1304, "step": 15327 }, { "epoch": 0.271451783571748, "grad_norm": 0.7099774479866028, "learning_rate": 2.5642434724096117e-05, "loss": 0.0758, "step": 15328 }, { "epoch": 0.2714694931087764, "grad_norm": 0.9024806022644043, "learning_rate": 2.5641828399211896e-05, "loss": 0.1127, "step": 15329 }, { "epoch": 0.27148720264580484, "grad_norm": 0.49267593026161194, "learning_rate": 2.564122203931714e-05, "loss": 0.1097, "step": 15330 }, { "epoch": 0.27150491218283324, "grad_norm": 0.8773559331893921, "learning_rate": 2.5640615644413838e-05, "loss": 0.106, "step": 15331 }, { "epoch": 0.2715226217198617, "grad_norm": 0.7925720810890198, "learning_rate": 2.5640009214503985e-05, "loss": 0.0842, "step": 15332 }, { "epoch": 0.27154033125689014, "grad_norm": 0.7330923676490784, "learning_rate": 2.563940274958958e-05, "loss": 0.0898, "step": 15333 }, { "epoch": 0.27155804079391854, "grad_norm": 0.4562738835811615, "learning_rate": 2.5638796249672616e-05, "loss": 0.0808, "step": 15334 }, { "epoch": 0.271575750330947, "grad_norm": 1.2168030738830566, "learning_rate": 2.5638189714755086e-05, "loss": 0.1365, "step": 15335 }, { "epoch": 0.2715934598679754, "grad_norm": 0.8042242527008057, "learning_rate": 2.5637583144838992e-05, "loss": 0.0742, "step": 15336 }, { "epoch": 0.27161116940500385, "grad_norm": 1.6722856760025024, "learning_rate": 2.5636976539926322e-05, "loss": 0.1148, "step": 15337 }, { "epoch": 0.27162887894203225, "grad_norm": 0.8540915250778198, "learning_rate": 2.5636369900019076e-05, "loss": 0.0939, "step": 15338 }, { "epoch": 0.2716465884790607, "grad_norm": 0.651340126991272, "learning_rate": 2.5635763225119248e-05, "loss": 0.0796, "step": 15339 }, { "epoch": 0.2716642980160891, "grad_norm": 0.9983834624290466, "learning_rate": 2.5635156515228835e-05, "loss": 0.081, "step": 15340 }, { "epoch": 0.27168200755311755, "grad_norm": 0.9812217354774475, "learning_rate": 2.5634549770349832e-05, "loss": 0.1177, "step": 15341 }, { "epoch": 0.27169971709014595, "grad_norm": 0.7245916724205017, "learning_rate": 2.5633942990484242e-05, "loss": 0.0801, "step": 15342 }, { "epoch": 0.2717174266271744, "grad_norm": 0.8508406281471252, "learning_rate": 2.5633336175634046e-05, "loss": 0.077, "step": 15343 }, { "epoch": 0.2717351361642028, "grad_norm": 0.6695899367332458, "learning_rate": 2.5632729325801254e-05, "loss": 0.0995, "step": 15344 }, { "epoch": 0.27175284570123126, "grad_norm": 0.8676765561103821, "learning_rate": 2.563212244098786e-05, "loss": 0.0978, "step": 15345 }, { "epoch": 0.27177055523825966, "grad_norm": 0.5071179270744324, "learning_rate": 2.5631515521195854e-05, "loss": 0.1112, "step": 15346 }, { "epoch": 0.2717882647752881, "grad_norm": 0.9051437377929688, "learning_rate": 2.5630908566427243e-05, "loss": 0.1074, "step": 15347 }, { "epoch": 0.27180597431231657, "grad_norm": 0.6865466833114624, "learning_rate": 2.5630301576684015e-05, "loss": 0.0993, "step": 15348 }, { "epoch": 0.27182368384934497, "grad_norm": 0.9129502177238464, "learning_rate": 2.562969455196817e-05, "loss": 0.0978, "step": 15349 }, { "epoch": 0.2718413933863734, "grad_norm": 0.8928062319755554, "learning_rate": 2.56290874922817e-05, "loss": 0.1127, "step": 15350 }, { "epoch": 0.2718591029234018, "grad_norm": 0.6992318034172058, "learning_rate": 2.5628480397626616e-05, "loss": 0.1533, "step": 15351 }, { "epoch": 0.2718768124604303, "grad_norm": 0.9692932963371277, "learning_rate": 2.5627873268004903e-05, "loss": 0.0978, "step": 15352 }, { "epoch": 0.27189452199745867, "grad_norm": 0.775495171546936, "learning_rate": 2.562726610341856e-05, "loss": 0.1177, "step": 15353 }, { "epoch": 0.2719122315344871, "grad_norm": 0.5150043964385986, "learning_rate": 2.5626658903869588e-05, "loss": 0.0567, "step": 15354 }, { "epoch": 0.2719299410715155, "grad_norm": 0.993137001991272, "learning_rate": 2.562605166935998e-05, "loss": 0.1128, "step": 15355 }, { "epoch": 0.271947650608544, "grad_norm": 0.640904426574707, "learning_rate": 2.5625444399891743e-05, "loss": 0.0633, "step": 15356 }, { "epoch": 0.2719653601455724, "grad_norm": 0.9219598770141602, "learning_rate": 2.5624837095466865e-05, "loss": 0.1034, "step": 15357 }, { "epoch": 0.27198306968260083, "grad_norm": 0.7508329749107361, "learning_rate": 2.5624229756087345e-05, "loss": 0.0973, "step": 15358 }, { "epoch": 0.27200077921962923, "grad_norm": 0.7587724924087524, "learning_rate": 2.562362238175519e-05, "loss": 0.1126, "step": 15359 }, { "epoch": 0.2720184887566577, "grad_norm": 0.7330670356750488, "learning_rate": 2.5623014972472384e-05, "loss": 0.1084, "step": 15360 }, { "epoch": 0.2720361982936861, "grad_norm": 1.1103650331497192, "learning_rate": 2.562240752824094e-05, "loss": 0.1089, "step": 15361 }, { "epoch": 0.27205390783071454, "grad_norm": 0.5081562399864197, "learning_rate": 2.5621800049062844e-05, "loss": 0.0558, "step": 15362 }, { "epoch": 0.272071617367743, "grad_norm": 0.5918161869049072, "learning_rate": 2.5621192534940106e-05, "loss": 0.0924, "step": 15363 }, { "epoch": 0.2720893269047714, "grad_norm": 0.8417344689369202, "learning_rate": 2.5620584985874718e-05, "loss": 0.103, "step": 15364 }, { "epoch": 0.27210703644179984, "grad_norm": 0.6025742292404175, "learning_rate": 2.561997740186868e-05, "loss": 0.0913, "step": 15365 }, { "epoch": 0.27212474597882824, "grad_norm": 2.057328939437866, "learning_rate": 2.5619369782923984e-05, "loss": 0.1138, "step": 15366 }, { "epoch": 0.2721424555158567, "grad_norm": 0.9908429384231567, "learning_rate": 2.561876212904264e-05, "loss": 0.1125, "step": 15367 }, { "epoch": 0.2721601650528851, "grad_norm": 1.2590664625167847, "learning_rate": 2.5618154440226644e-05, "loss": 0.1275, "step": 15368 }, { "epoch": 0.27217787458991355, "grad_norm": 0.8449089527130127, "learning_rate": 2.561754671647799e-05, "loss": 0.1429, "step": 15369 }, { "epoch": 0.27219558412694195, "grad_norm": 1.3667117357254028, "learning_rate": 2.5616938957798683e-05, "loss": 0.1695, "step": 15370 }, { "epoch": 0.2722132936639704, "grad_norm": 1.0817488431930542, "learning_rate": 2.5616331164190722e-05, "loss": 0.0792, "step": 15371 }, { "epoch": 0.2722310032009988, "grad_norm": 1.2909244298934937, "learning_rate": 2.5615723335656103e-05, "loss": 0.1167, "step": 15372 }, { "epoch": 0.27224871273802725, "grad_norm": 1.0596354007720947, "learning_rate": 2.561511547219683e-05, "loss": 0.1479, "step": 15373 }, { "epoch": 0.27226642227505565, "grad_norm": 1.8231356143951416, "learning_rate": 2.5614507573814904e-05, "loss": 0.1099, "step": 15374 }, { "epoch": 0.2722841318120841, "grad_norm": 1.4752202033996582, "learning_rate": 2.5613899640512314e-05, "loss": 0.1281, "step": 15375 }, { "epoch": 0.2723018413491125, "grad_norm": 0.7110642194747925, "learning_rate": 2.561329167229107e-05, "loss": 0.1007, "step": 15376 }, { "epoch": 0.27231955088614096, "grad_norm": 1.2268351316452026, "learning_rate": 2.5612683669153175e-05, "loss": 0.0849, "step": 15377 }, { "epoch": 0.2723372604231694, "grad_norm": 0.7952210307121277, "learning_rate": 2.561207563110062e-05, "loss": 0.0958, "step": 15378 }, { "epoch": 0.2723549699601978, "grad_norm": 0.9305896162986755, "learning_rate": 2.5611467558135406e-05, "loss": 0.0973, "step": 15379 }, { "epoch": 0.27237267949722627, "grad_norm": 0.615885317325592, "learning_rate": 2.5610859450259544e-05, "loss": 0.0955, "step": 15380 }, { "epoch": 0.27239038903425467, "grad_norm": 0.9298792481422424, "learning_rate": 2.561025130747502e-05, "loss": 0.0813, "step": 15381 }, { "epoch": 0.2724080985712831, "grad_norm": 0.7301631569862366, "learning_rate": 2.560964312978385e-05, "loss": 0.0792, "step": 15382 }, { "epoch": 0.2724258081083115, "grad_norm": 0.7716534733772278, "learning_rate": 2.5609034917188026e-05, "loss": 0.0786, "step": 15383 }, { "epoch": 0.27244351764533997, "grad_norm": 1.0038362741470337, "learning_rate": 2.560842666968955e-05, "loss": 0.1044, "step": 15384 }, { "epoch": 0.27246122718236837, "grad_norm": 1.2265346050262451, "learning_rate": 2.560781838729042e-05, "loss": 0.1181, "step": 15385 }, { "epoch": 0.2724789367193968, "grad_norm": 0.8584659695625305, "learning_rate": 2.5607210069992638e-05, "loss": 0.0875, "step": 15386 }, { "epoch": 0.2724966462564252, "grad_norm": 0.9508982300758362, "learning_rate": 2.5606601717798212e-05, "loss": 0.122, "step": 15387 }, { "epoch": 0.2725143557934537, "grad_norm": 0.9087121486663818, "learning_rate": 2.560599333070914e-05, "loss": 0.0966, "step": 15388 }, { "epoch": 0.2725320653304821, "grad_norm": 0.7894461154937744, "learning_rate": 2.5605384908727422e-05, "loss": 0.0969, "step": 15389 }, { "epoch": 0.27254977486751053, "grad_norm": 0.682610809803009, "learning_rate": 2.5604776451855062e-05, "loss": 0.0893, "step": 15390 }, { "epoch": 0.27256748440453893, "grad_norm": 1.0635496377944946, "learning_rate": 2.5604167960094056e-05, "loss": 0.1118, "step": 15391 }, { "epoch": 0.2725851939415674, "grad_norm": 0.9946073293685913, "learning_rate": 2.560355943344641e-05, "loss": 0.0928, "step": 15392 }, { "epoch": 0.27260290347859584, "grad_norm": 0.5855004787445068, "learning_rate": 2.560295087191413e-05, "loss": 0.0714, "step": 15393 }, { "epoch": 0.27262061301562424, "grad_norm": 1.007527470588684, "learning_rate": 2.5602342275499213e-05, "loss": 0.1297, "step": 15394 }, { "epoch": 0.2726383225526527, "grad_norm": 0.7212101221084595, "learning_rate": 2.560173364420366e-05, "loss": 0.0935, "step": 15395 }, { "epoch": 0.2726560320896811, "grad_norm": 0.7714841961860657, "learning_rate": 2.5601124978029476e-05, "loss": 0.0739, "step": 15396 }, { "epoch": 0.27267374162670954, "grad_norm": 0.8488646149635315, "learning_rate": 2.5600516276978666e-05, "loss": 0.1227, "step": 15397 }, { "epoch": 0.27269145116373794, "grad_norm": 0.7719512581825256, "learning_rate": 2.5599907541053226e-05, "loss": 0.0976, "step": 15398 }, { "epoch": 0.2727091607007664, "grad_norm": 0.9831995964050293, "learning_rate": 2.5599298770255162e-05, "loss": 0.1008, "step": 15399 }, { "epoch": 0.2727268702377948, "grad_norm": 0.6555895805358887, "learning_rate": 2.559868996458648e-05, "loss": 0.0798, "step": 15400 }, { "epoch": 0.27274457977482325, "grad_norm": 0.7552753686904907, "learning_rate": 2.5598081124049177e-05, "loss": 0.1155, "step": 15401 }, { "epoch": 0.27276228931185165, "grad_norm": 1.1845329999923706, "learning_rate": 2.559747224864526e-05, "loss": 0.1306, "step": 15402 }, { "epoch": 0.2727799988488801, "grad_norm": 0.5974268913269043, "learning_rate": 2.5596863338376734e-05, "loss": 0.0816, "step": 15403 }, { "epoch": 0.2727977083859085, "grad_norm": 0.7229148745536804, "learning_rate": 2.55962543932456e-05, "loss": 0.096, "step": 15404 }, { "epoch": 0.27281541792293695, "grad_norm": 1.9151654243469238, "learning_rate": 2.559564541325386e-05, "loss": 0.0769, "step": 15405 }, { "epoch": 0.27283312745996535, "grad_norm": 0.5338727831840515, "learning_rate": 2.5595036398403515e-05, "loss": 0.1017, "step": 15406 }, { "epoch": 0.2728508369969938, "grad_norm": 0.8210222721099854, "learning_rate": 2.559442734869657e-05, "loss": 0.1097, "step": 15407 }, { "epoch": 0.27286854653402226, "grad_norm": 0.7817084193229675, "learning_rate": 2.5593818264135036e-05, "loss": 0.0856, "step": 15408 }, { "epoch": 0.27288625607105066, "grad_norm": 0.8292754888534546, "learning_rate": 2.5593209144720907e-05, "loss": 0.0864, "step": 15409 }, { "epoch": 0.2729039656080791, "grad_norm": 1.531753420829773, "learning_rate": 2.5592599990456198e-05, "loss": 0.1359, "step": 15410 }, { "epoch": 0.2729216751451075, "grad_norm": 0.8097227215766907, "learning_rate": 2.5591990801342903e-05, "loss": 0.1219, "step": 15411 }, { "epoch": 0.27293938468213597, "grad_norm": 0.8521280884742737, "learning_rate": 2.5591381577383026e-05, "loss": 0.1195, "step": 15412 }, { "epoch": 0.27295709421916436, "grad_norm": 0.6815036535263062, "learning_rate": 2.5590772318578577e-05, "loss": 0.0857, "step": 15413 }, { "epoch": 0.2729748037561928, "grad_norm": 0.8069233894348145, "learning_rate": 2.559016302493156e-05, "loss": 0.1196, "step": 15414 }, { "epoch": 0.2729925132932212, "grad_norm": 0.5817402005195618, "learning_rate": 2.5589553696443975e-05, "loss": 0.0757, "step": 15415 }, { "epoch": 0.27301022283024967, "grad_norm": 1.1560965776443481, "learning_rate": 2.558894433311783e-05, "loss": 0.1181, "step": 15416 }, { "epoch": 0.27302793236727807, "grad_norm": 0.7787560224533081, "learning_rate": 2.558833493495513e-05, "loss": 0.1016, "step": 15417 }, { "epoch": 0.2730456419043065, "grad_norm": 1.0392146110534668, "learning_rate": 2.558772550195788e-05, "loss": 0.082, "step": 15418 }, { "epoch": 0.2730633514413349, "grad_norm": 0.9183699488639832, "learning_rate": 2.5587116034128087e-05, "loss": 0.0823, "step": 15419 }, { "epoch": 0.2730810609783634, "grad_norm": 1.0414917469024658, "learning_rate": 2.558650653146775e-05, "loss": 0.1306, "step": 15420 }, { "epoch": 0.2730987705153918, "grad_norm": 0.5931721925735474, "learning_rate": 2.5585896993978873e-05, "loss": 0.0651, "step": 15421 }, { "epoch": 0.27311648005242023, "grad_norm": 0.903045117855072, "learning_rate": 2.558528742166347e-05, "loss": 0.0916, "step": 15422 }, { "epoch": 0.2731341895894487, "grad_norm": 0.5900062918663025, "learning_rate": 2.558467781452354e-05, "loss": 0.069, "step": 15423 }, { "epoch": 0.2731518991264771, "grad_norm": 0.923559844493866, "learning_rate": 2.558406817256109e-05, "loss": 0.1019, "step": 15424 }, { "epoch": 0.27316960866350554, "grad_norm": 0.7423649430274963, "learning_rate": 2.558345849577813e-05, "loss": 0.079, "step": 15425 }, { "epoch": 0.27318731820053394, "grad_norm": 0.548025369644165, "learning_rate": 2.558284878417666e-05, "loss": 0.0521, "step": 15426 }, { "epoch": 0.2732050277375624, "grad_norm": 0.853978157043457, "learning_rate": 2.5582239037758686e-05, "loss": 0.0913, "step": 15427 }, { "epoch": 0.2732227372745908, "grad_norm": 1.156890630722046, "learning_rate": 2.558162925652622e-05, "loss": 0.1243, "step": 15428 }, { "epoch": 0.27324044681161924, "grad_norm": 0.5496341586112976, "learning_rate": 2.5581019440481263e-05, "loss": 0.0797, "step": 15429 }, { "epoch": 0.27325815634864764, "grad_norm": 0.5555173754692078, "learning_rate": 2.558040958962582e-05, "loss": 0.0689, "step": 15430 }, { "epoch": 0.2732758658856761, "grad_norm": 0.747214138507843, "learning_rate": 2.55797997039619e-05, "loss": 0.0927, "step": 15431 }, { "epoch": 0.2732935754227045, "grad_norm": 0.5960835218429565, "learning_rate": 2.557918978349151e-05, "loss": 0.0959, "step": 15432 }, { "epoch": 0.27331128495973295, "grad_norm": 0.6014496684074402, "learning_rate": 2.5578579828216654e-05, "loss": 0.081, "step": 15433 }, { "epoch": 0.27332899449676135, "grad_norm": 0.7863844633102417, "learning_rate": 2.5577969838139345e-05, "loss": 0.0748, "step": 15434 }, { "epoch": 0.2733467040337898, "grad_norm": 0.7678304314613342, "learning_rate": 2.557735981326158e-05, "loss": 0.0898, "step": 15435 }, { "epoch": 0.2733644135708182, "grad_norm": 0.8580498099327087, "learning_rate": 2.557674975358537e-05, "loss": 0.0954, "step": 15436 }, { "epoch": 0.27338212310784665, "grad_norm": 0.42691415548324585, "learning_rate": 2.557613965911273e-05, "loss": 0.0573, "step": 15437 }, { "epoch": 0.2733998326448751, "grad_norm": 0.7627615928649902, "learning_rate": 2.5575529529845657e-05, "loss": 0.0769, "step": 15438 }, { "epoch": 0.2734175421819035, "grad_norm": 0.8266396522521973, "learning_rate": 2.5574919365786157e-05, "loss": 0.0724, "step": 15439 }, { "epoch": 0.27343525171893196, "grad_norm": 0.7843344807624817, "learning_rate": 2.5574309166936246e-05, "loss": 0.083, "step": 15440 }, { "epoch": 0.27345296125596036, "grad_norm": 0.8898069262504578, "learning_rate": 2.5573698933297927e-05, "loss": 0.1006, "step": 15441 }, { "epoch": 0.2734706707929888, "grad_norm": 1.0530627965927124, "learning_rate": 2.5573088664873207e-05, "loss": 0.1152, "step": 15442 }, { "epoch": 0.2734883803300172, "grad_norm": 0.7243862152099609, "learning_rate": 2.5572478361664094e-05, "loss": 0.0807, "step": 15443 }, { "epoch": 0.27350608986704567, "grad_norm": 0.605097770690918, "learning_rate": 2.5571868023672603e-05, "loss": 0.0726, "step": 15444 }, { "epoch": 0.27352379940407406, "grad_norm": 0.818184494972229, "learning_rate": 2.557125765090073e-05, "loss": 0.0817, "step": 15445 }, { "epoch": 0.2735415089411025, "grad_norm": 1.0170621871948242, "learning_rate": 2.557064724335049e-05, "loss": 0.0876, "step": 15446 }, { "epoch": 0.2735592184781309, "grad_norm": 1.1928324699401855, "learning_rate": 2.557003680102389e-05, "loss": 0.1376, "step": 15447 }, { "epoch": 0.27357692801515937, "grad_norm": 1.3065811395645142, "learning_rate": 2.5569426323922933e-05, "loss": 0.0946, "step": 15448 }, { "epoch": 0.27359463755218777, "grad_norm": 0.6610592007637024, "learning_rate": 2.556881581204964e-05, "loss": 0.102, "step": 15449 }, { "epoch": 0.2736123470892162, "grad_norm": 0.9013481736183167, "learning_rate": 2.5568205265406004e-05, "loss": 0.1031, "step": 15450 }, { "epoch": 0.2736300566262446, "grad_norm": 0.6766629219055176, "learning_rate": 2.556759468399405e-05, "loss": 0.0884, "step": 15451 }, { "epoch": 0.2736477661632731, "grad_norm": 0.8953661918640137, "learning_rate": 2.556698406781577e-05, "loss": 0.094, "step": 15452 }, { "epoch": 0.27366547570030153, "grad_norm": 0.9125103950500488, "learning_rate": 2.5566373416873187e-05, "loss": 0.1088, "step": 15453 }, { "epoch": 0.27368318523732993, "grad_norm": 1.0159087181091309, "learning_rate": 2.55657627311683e-05, "loss": 0.1032, "step": 15454 }, { "epoch": 0.2737008947743584, "grad_norm": 1.296694278717041, "learning_rate": 2.5565152010703128e-05, "loss": 0.1266, "step": 15455 }, { "epoch": 0.2737186043113868, "grad_norm": 0.6913049221038818, "learning_rate": 2.5564541255479673e-05, "loss": 0.0924, "step": 15456 }, { "epoch": 0.27373631384841524, "grad_norm": 1.0237165689468384, "learning_rate": 2.5563930465499943e-05, "loss": 0.1225, "step": 15457 }, { "epoch": 0.27375402338544363, "grad_norm": 0.9924812316894531, "learning_rate": 2.5563319640765952e-05, "loss": 0.1375, "step": 15458 }, { "epoch": 0.2737717329224721, "grad_norm": 1.0956463813781738, "learning_rate": 2.5562708781279704e-05, "loss": 0.0941, "step": 15459 }, { "epoch": 0.2737894424595005, "grad_norm": 0.9015767574310303, "learning_rate": 2.5562097887043215e-05, "loss": 0.092, "step": 15460 }, { "epoch": 0.27380715199652894, "grad_norm": 0.8439764380455017, "learning_rate": 2.5561486958058493e-05, "loss": 0.0809, "step": 15461 }, { "epoch": 0.27382486153355734, "grad_norm": 0.7314735054969788, "learning_rate": 2.5560875994327545e-05, "loss": 0.0886, "step": 15462 }, { "epoch": 0.2738425710705858, "grad_norm": 0.8199371695518494, "learning_rate": 2.556026499585239e-05, "loss": 0.0947, "step": 15463 }, { "epoch": 0.2738602806076142, "grad_norm": 0.982117235660553, "learning_rate": 2.5559653962635025e-05, "loss": 0.0964, "step": 15464 }, { "epoch": 0.27387799014464265, "grad_norm": 0.6579049229621887, "learning_rate": 2.555904289467747e-05, "loss": 0.0704, "step": 15465 }, { "epoch": 0.27389569968167105, "grad_norm": 0.6484406590461731, "learning_rate": 2.5558431791981725e-05, "loss": 0.0762, "step": 15466 }, { "epoch": 0.2739134092186995, "grad_norm": 1.0650562047958374, "learning_rate": 2.5557820654549815e-05, "loss": 0.0888, "step": 15467 }, { "epoch": 0.27393111875572795, "grad_norm": 0.7254875302314758, "learning_rate": 2.5557209482383743e-05, "loss": 0.0719, "step": 15468 }, { "epoch": 0.27394882829275635, "grad_norm": 0.5752281546592712, "learning_rate": 2.5556598275485513e-05, "loss": 0.0846, "step": 15469 }, { "epoch": 0.2739665378297848, "grad_norm": 0.8494105339050293, "learning_rate": 2.555598703385715e-05, "loss": 0.1139, "step": 15470 }, { "epoch": 0.2739842473668132, "grad_norm": 0.744042694568634, "learning_rate": 2.555537575750065e-05, "loss": 0.0793, "step": 15471 }, { "epoch": 0.27400195690384166, "grad_norm": 0.9503820538520813, "learning_rate": 2.5554764446418035e-05, "loss": 0.0871, "step": 15472 }, { "epoch": 0.27401966644087006, "grad_norm": 0.5532832145690918, "learning_rate": 2.5554153100611314e-05, "loss": 0.0647, "step": 15473 }, { "epoch": 0.2740373759778985, "grad_norm": 1.417496681213379, "learning_rate": 2.5553541720082492e-05, "loss": 0.0728, "step": 15474 }, { "epoch": 0.2740550855149269, "grad_norm": 1.3358086347579956, "learning_rate": 2.555293030483359e-05, "loss": 0.1433, "step": 15475 }, { "epoch": 0.27407279505195536, "grad_norm": 1.090906023979187, "learning_rate": 2.5552318854866615e-05, "loss": 0.1179, "step": 15476 }, { "epoch": 0.27409050458898376, "grad_norm": 0.9812159538269043, "learning_rate": 2.5551707370183573e-05, "loss": 0.0875, "step": 15477 }, { "epoch": 0.2741082141260122, "grad_norm": 1.0163241624832153, "learning_rate": 2.5551095850786485e-05, "loss": 0.1082, "step": 15478 }, { "epoch": 0.2741259236630406, "grad_norm": 1.0798957347869873, "learning_rate": 2.5550484296677357e-05, "loss": 0.1284, "step": 15479 }, { "epoch": 0.27414363320006907, "grad_norm": 0.588111937046051, "learning_rate": 2.5549872707858205e-05, "loss": 0.0745, "step": 15480 }, { "epoch": 0.27416134273709747, "grad_norm": 0.9847185611724854, "learning_rate": 2.554926108433104e-05, "loss": 0.0914, "step": 15481 }, { "epoch": 0.2741790522741259, "grad_norm": 0.594520628452301, "learning_rate": 2.554864942609787e-05, "loss": 0.0833, "step": 15482 }, { "epoch": 0.2741967618111544, "grad_norm": 0.8742309212684631, "learning_rate": 2.554803773316071e-05, "loss": 0.0884, "step": 15483 }, { "epoch": 0.2742144713481828, "grad_norm": 0.7501070499420166, "learning_rate": 2.554742600552157e-05, "loss": 0.1029, "step": 15484 }, { "epoch": 0.27423218088521123, "grad_norm": 0.6179106831550598, "learning_rate": 2.554681424318247e-05, "loss": 0.0816, "step": 15485 }, { "epoch": 0.27424989042223963, "grad_norm": 0.6153181791305542, "learning_rate": 2.5546202446145418e-05, "loss": 0.1091, "step": 15486 }, { "epoch": 0.2742675999592681, "grad_norm": 1.107712745666504, "learning_rate": 2.5545590614412422e-05, "loss": 0.1217, "step": 15487 }, { "epoch": 0.2742853094962965, "grad_norm": 0.5882949233055115, "learning_rate": 2.5544978747985505e-05, "loss": 0.0838, "step": 15488 }, { "epoch": 0.27430301903332494, "grad_norm": 0.5798782110214233, "learning_rate": 2.5544366846866674e-05, "loss": 0.0898, "step": 15489 }, { "epoch": 0.27432072857035333, "grad_norm": 0.6498367190361023, "learning_rate": 2.554375491105794e-05, "loss": 0.0966, "step": 15490 }, { "epoch": 0.2743384381073818, "grad_norm": 0.9983013868331909, "learning_rate": 2.554314294056132e-05, "loss": 0.094, "step": 15491 }, { "epoch": 0.2743561476444102, "grad_norm": 0.6327860355377197, "learning_rate": 2.5542530935378826e-05, "loss": 0.0669, "step": 15492 }, { "epoch": 0.27437385718143864, "grad_norm": 0.7422130107879639, "learning_rate": 2.5541918895512474e-05, "loss": 0.1174, "step": 15493 }, { "epoch": 0.27439156671846704, "grad_norm": 0.9037993550300598, "learning_rate": 2.5541306820964267e-05, "loss": 0.0889, "step": 15494 }, { "epoch": 0.2744092762554955, "grad_norm": 0.9046211838722229, "learning_rate": 2.5540694711736237e-05, "loss": 0.0989, "step": 15495 }, { "epoch": 0.2744269857925239, "grad_norm": 0.8168286681175232, "learning_rate": 2.554008256783038e-05, "loss": 0.093, "step": 15496 }, { "epoch": 0.27444469532955235, "grad_norm": 0.6771366596221924, "learning_rate": 2.5539470389248722e-05, "loss": 0.0785, "step": 15497 }, { "epoch": 0.2744624048665808, "grad_norm": 0.721125602722168, "learning_rate": 2.553885817599327e-05, "loss": 0.0888, "step": 15498 }, { "epoch": 0.2744801144036092, "grad_norm": 0.6717817783355713, "learning_rate": 2.553824592806604e-05, "loss": 0.0883, "step": 15499 }, { "epoch": 0.27449782394063765, "grad_norm": 0.8744209408760071, "learning_rate": 2.553763364546905e-05, "loss": 0.0934, "step": 15500 }, { "epoch": 0.27451553347766605, "grad_norm": 0.8226358890533447, "learning_rate": 2.553702132820431e-05, "loss": 0.0727, "step": 15501 }, { "epoch": 0.2745332430146945, "grad_norm": 0.8340471386909485, "learning_rate": 2.5536408976273834e-05, "loss": 0.0723, "step": 15502 }, { "epoch": 0.2745509525517229, "grad_norm": 0.7164602875709534, "learning_rate": 2.5535796589679638e-05, "loss": 0.146, "step": 15503 }, { "epoch": 0.27456866208875136, "grad_norm": 0.8317705988883972, "learning_rate": 2.553518416842374e-05, "loss": 0.1241, "step": 15504 }, { "epoch": 0.27458637162577976, "grad_norm": 0.6842843294143677, "learning_rate": 2.553457171250815e-05, "loss": 0.1096, "step": 15505 }, { "epoch": 0.2746040811628082, "grad_norm": 0.7438713908195496, "learning_rate": 2.553395922193488e-05, "loss": 0.0774, "step": 15506 }, { "epoch": 0.2746217906998366, "grad_norm": 0.8580771088600159, "learning_rate": 2.5533346696705955e-05, "loss": 0.0912, "step": 15507 }, { "epoch": 0.27463950023686506, "grad_norm": 1.5748072862625122, "learning_rate": 2.553273413682338e-05, "loss": 0.1051, "step": 15508 }, { "epoch": 0.27465720977389346, "grad_norm": 0.7140259742736816, "learning_rate": 2.5532121542289178e-05, "loss": 0.084, "step": 15509 }, { "epoch": 0.2746749193109219, "grad_norm": 0.8128994703292847, "learning_rate": 2.5531508913105362e-05, "loss": 0.1162, "step": 15510 }, { "epoch": 0.2746926288479503, "grad_norm": 0.6353231072425842, "learning_rate": 2.5530896249273948e-05, "loss": 0.0808, "step": 15511 }, { "epoch": 0.27471033838497877, "grad_norm": 0.9135794639587402, "learning_rate": 2.553028355079695e-05, "loss": 0.1082, "step": 15512 }, { "epoch": 0.2747280479220072, "grad_norm": 0.6109575629234314, "learning_rate": 2.552967081767638e-05, "loss": 0.0969, "step": 15513 }, { "epoch": 0.2747457574590356, "grad_norm": 0.8561570644378662, "learning_rate": 2.5529058049914267e-05, "loss": 0.092, "step": 15514 }, { "epoch": 0.2747634669960641, "grad_norm": 0.68711256980896, "learning_rate": 2.5528445247512606e-05, "loss": 0.1088, "step": 15515 }, { "epoch": 0.2747811765330925, "grad_norm": 0.7706565856933594, "learning_rate": 2.5527832410473437e-05, "loss": 0.0754, "step": 15516 }, { "epoch": 0.27479888607012093, "grad_norm": 0.5976107120513916, "learning_rate": 2.5527219538798753e-05, "loss": 0.0784, "step": 15517 }, { "epoch": 0.2748165956071493, "grad_norm": 0.9760028719902039, "learning_rate": 2.5526606632490588e-05, "loss": 0.1256, "step": 15518 }, { "epoch": 0.2748343051441778, "grad_norm": 0.5226340293884277, "learning_rate": 2.552599369155095e-05, "loss": 0.0774, "step": 15519 }, { "epoch": 0.2748520146812062, "grad_norm": 1.049545168876648, "learning_rate": 2.552538071598186e-05, "loss": 0.0852, "step": 15520 }, { "epoch": 0.27486972421823463, "grad_norm": 1.237688422203064, "learning_rate": 2.552476770578533e-05, "loss": 0.1336, "step": 15521 }, { "epoch": 0.27488743375526303, "grad_norm": 1.2514528036117554, "learning_rate": 2.552415466096338e-05, "loss": 0.1005, "step": 15522 }, { "epoch": 0.2749051432922915, "grad_norm": 0.9388842582702637, "learning_rate": 2.5523541581518025e-05, "loss": 0.1131, "step": 15523 }, { "epoch": 0.2749228528293199, "grad_norm": 0.6679106950759888, "learning_rate": 2.5522928467451282e-05, "loss": 0.1129, "step": 15524 }, { "epoch": 0.27494056236634834, "grad_norm": 0.7528902292251587, "learning_rate": 2.5522315318765165e-05, "loss": 0.0658, "step": 15525 }, { "epoch": 0.27495827190337674, "grad_norm": 1.0811173915863037, "learning_rate": 2.55217021354617e-05, "loss": 0.0885, "step": 15526 }, { "epoch": 0.2749759814404052, "grad_norm": 0.7516201138496399, "learning_rate": 2.55210889175429e-05, "loss": 0.1135, "step": 15527 }, { "epoch": 0.27499369097743365, "grad_norm": 0.6966935396194458, "learning_rate": 2.5520475665010777e-05, "loss": 0.1152, "step": 15528 }, { "epoch": 0.27501140051446205, "grad_norm": 0.8769236207008362, "learning_rate": 2.5519862377867356e-05, "loss": 0.1036, "step": 15529 }, { "epoch": 0.2750291100514905, "grad_norm": 0.672606348991394, "learning_rate": 2.5519249056114654e-05, "loss": 0.1117, "step": 15530 }, { "epoch": 0.2750468195885189, "grad_norm": 0.6988809108734131, "learning_rate": 2.5518635699754686e-05, "loss": 0.0788, "step": 15531 }, { "epoch": 0.27506452912554735, "grad_norm": 1.1951181888580322, "learning_rate": 2.5518022308789465e-05, "loss": 0.0897, "step": 15532 }, { "epoch": 0.27508223866257575, "grad_norm": 0.7189525365829468, "learning_rate": 2.5517408883221016e-05, "loss": 0.0747, "step": 15533 }, { "epoch": 0.2750999481996042, "grad_norm": 0.7047039866447449, "learning_rate": 2.551679542305136e-05, "loss": 0.091, "step": 15534 }, { "epoch": 0.2751176577366326, "grad_norm": 0.6478304266929626, "learning_rate": 2.551618192828251e-05, "loss": 0.0769, "step": 15535 }, { "epoch": 0.27513536727366106, "grad_norm": 0.956233561038971, "learning_rate": 2.551556839891648e-05, "loss": 0.1081, "step": 15536 }, { "epoch": 0.27515307681068946, "grad_norm": 1.0752756595611572, "learning_rate": 2.5514954834955297e-05, "loss": 0.0909, "step": 15537 }, { "epoch": 0.2751707863477179, "grad_norm": 1.0034594535827637, "learning_rate": 2.5514341236400976e-05, "loss": 0.1174, "step": 15538 }, { "epoch": 0.2751884958847463, "grad_norm": 0.9082440733909607, "learning_rate": 2.5513727603255537e-05, "loss": 0.0699, "step": 15539 }, { "epoch": 0.27520620542177476, "grad_norm": 0.7299848198890686, "learning_rate": 2.5513113935520992e-05, "loss": 0.0856, "step": 15540 }, { "epoch": 0.27522391495880316, "grad_norm": 0.6772722601890564, "learning_rate": 2.551250023319937e-05, "loss": 0.0998, "step": 15541 }, { "epoch": 0.2752416244958316, "grad_norm": 0.8450961709022522, "learning_rate": 2.5511886496292688e-05, "loss": 0.0958, "step": 15542 }, { "epoch": 0.27525933403286007, "grad_norm": 1.5976941585540771, "learning_rate": 2.5511272724802963e-05, "loss": 0.1178, "step": 15543 }, { "epoch": 0.27527704356988847, "grad_norm": 0.6448094844818115, "learning_rate": 2.5510658918732207e-05, "loss": 0.0936, "step": 15544 }, { "epoch": 0.2752947531069169, "grad_norm": 0.78628009557724, "learning_rate": 2.5510045078082453e-05, "loss": 0.0727, "step": 15545 }, { "epoch": 0.2753124626439453, "grad_norm": 0.7035363912582397, "learning_rate": 2.5509431202855712e-05, "loss": 0.0785, "step": 15546 }, { "epoch": 0.2753301721809738, "grad_norm": 0.9022806286811829, "learning_rate": 2.5508817293054005e-05, "loss": 0.0898, "step": 15547 }, { "epoch": 0.2753478817180022, "grad_norm": 0.5915695428848267, "learning_rate": 2.5508203348679347e-05, "loss": 0.0961, "step": 15548 }, { "epoch": 0.27536559125503063, "grad_norm": 1.325255036354065, "learning_rate": 2.5507589369733767e-05, "loss": 0.1146, "step": 15549 }, { "epoch": 0.275383300792059, "grad_norm": 0.5835385322570801, "learning_rate": 2.5506975356219282e-05, "loss": 0.0798, "step": 15550 }, { "epoch": 0.2754010103290875, "grad_norm": 0.5250874161720276, "learning_rate": 2.550636130813791e-05, "loss": 0.0829, "step": 15551 }, { "epoch": 0.2754187198661159, "grad_norm": 0.7551157474517822, "learning_rate": 2.550574722549167e-05, "loss": 0.1124, "step": 15552 }, { "epoch": 0.27543642940314433, "grad_norm": 1.3180638551712036, "learning_rate": 2.5505133108282586e-05, "loss": 0.0926, "step": 15553 }, { "epoch": 0.27545413894017273, "grad_norm": 1.261318564414978, "learning_rate": 2.5504518956512676e-05, "loss": 0.0859, "step": 15554 }, { "epoch": 0.2754718484772012, "grad_norm": 0.7894285321235657, "learning_rate": 2.5503904770183965e-05, "loss": 0.06, "step": 15555 }, { "epoch": 0.27548955801422964, "grad_norm": 0.7579795122146606, "learning_rate": 2.5503290549298464e-05, "loss": 0.0649, "step": 15556 }, { "epoch": 0.27550726755125804, "grad_norm": 0.5060763359069824, "learning_rate": 2.5502676293858203e-05, "loss": 0.0476, "step": 15557 }, { "epoch": 0.2755249770882865, "grad_norm": 1.0925583839416504, "learning_rate": 2.5502062003865195e-05, "loss": 0.12, "step": 15558 }, { "epoch": 0.2755426866253149, "grad_norm": 0.7169957160949707, "learning_rate": 2.550144767932147e-05, "loss": 0.0787, "step": 15559 }, { "epoch": 0.27556039616234335, "grad_norm": 0.6718143224716187, "learning_rate": 2.5500833320229037e-05, "loss": 0.1039, "step": 15560 }, { "epoch": 0.27557810569937174, "grad_norm": 0.7527996897697449, "learning_rate": 2.5500218926589934e-05, "loss": 0.0997, "step": 15561 }, { "epoch": 0.2755958152364002, "grad_norm": 0.8397977352142334, "learning_rate": 2.549960449840617e-05, "loss": 0.1194, "step": 15562 }, { "epoch": 0.2756135247734286, "grad_norm": 0.9629377126693726, "learning_rate": 2.5498990035679766e-05, "loss": 0.0788, "step": 15563 }, { "epoch": 0.27563123431045705, "grad_norm": 0.7232412695884705, "learning_rate": 2.549837553841275e-05, "loss": 0.1014, "step": 15564 }, { "epoch": 0.27564894384748545, "grad_norm": 0.8432209491729736, "learning_rate": 2.5497761006607137e-05, "loss": 0.1238, "step": 15565 }, { "epoch": 0.2756666533845139, "grad_norm": 0.9836902022361755, "learning_rate": 2.5497146440264954e-05, "loss": 0.0861, "step": 15566 }, { "epoch": 0.2756843629215423, "grad_norm": 0.6353415846824646, "learning_rate": 2.549653183938822e-05, "loss": 0.1024, "step": 15567 }, { "epoch": 0.27570207245857076, "grad_norm": 0.5134017467498779, "learning_rate": 2.5495917203978958e-05, "loss": 0.093, "step": 15568 }, { "epoch": 0.27571978199559916, "grad_norm": 1.3388886451721191, "learning_rate": 2.549530253403919e-05, "loss": 0.1296, "step": 15569 }, { "epoch": 0.2757374915326276, "grad_norm": 0.5072305202484131, "learning_rate": 2.549468782957094e-05, "loss": 0.051, "step": 15570 }, { "epoch": 0.27575520106965606, "grad_norm": 0.9316239356994629, "learning_rate": 2.5494073090576228e-05, "loss": 0.1135, "step": 15571 }, { "epoch": 0.27577291060668446, "grad_norm": 0.6288336515426636, "learning_rate": 2.5493458317057078e-05, "loss": 0.0837, "step": 15572 }, { "epoch": 0.2757906201437129, "grad_norm": 0.5491978526115417, "learning_rate": 2.5492843509015507e-05, "loss": 0.0885, "step": 15573 }, { "epoch": 0.2758083296807413, "grad_norm": 1.0191376209259033, "learning_rate": 2.5492228666453546e-05, "loss": 0.102, "step": 15574 }, { "epoch": 0.27582603921776977, "grad_norm": 1.234550952911377, "learning_rate": 2.5491613789373213e-05, "loss": 0.0876, "step": 15575 }, { "epoch": 0.27584374875479817, "grad_norm": 0.6199161410331726, "learning_rate": 2.549099887777653e-05, "loss": 0.1109, "step": 15576 }, { "epoch": 0.2758614582918266, "grad_norm": 0.8228922486305237, "learning_rate": 2.5490383931665524e-05, "loss": 0.0922, "step": 15577 }, { "epoch": 0.275879167828855, "grad_norm": 0.9418334364891052, "learning_rate": 2.5489768951042217e-05, "loss": 0.1302, "step": 15578 }, { "epoch": 0.2758968773658835, "grad_norm": 0.9045594930648804, "learning_rate": 2.5489153935908633e-05, "loss": 0.0846, "step": 15579 }, { "epoch": 0.2759145869029119, "grad_norm": 0.9573771357536316, "learning_rate": 2.5488538886266787e-05, "loss": 0.1026, "step": 15580 }, { "epoch": 0.2759322964399403, "grad_norm": 0.6634828448295593, "learning_rate": 2.548792380211871e-05, "loss": 0.0649, "step": 15581 }, { "epoch": 0.2759500059769687, "grad_norm": 0.9835436940193176, "learning_rate": 2.5487308683466427e-05, "loss": 0.1029, "step": 15582 }, { "epoch": 0.2759677155139972, "grad_norm": 0.930743396282196, "learning_rate": 2.548669353031196e-05, "loss": 0.102, "step": 15583 }, { "epoch": 0.2759854250510256, "grad_norm": 0.8320540189743042, "learning_rate": 2.548607834265733e-05, "loss": 0.121, "step": 15584 }, { "epoch": 0.27600313458805403, "grad_norm": 0.9605464935302734, "learning_rate": 2.5485463120504563e-05, "loss": 0.1062, "step": 15585 }, { "epoch": 0.2760208441250825, "grad_norm": 0.9166173934936523, "learning_rate": 2.548484786385568e-05, "loss": 0.0985, "step": 15586 }, { "epoch": 0.2760385536621109, "grad_norm": 0.808319628238678, "learning_rate": 2.5484232572712715e-05, "loss": 0.0925, "step": 15587 }, { "epoch": 0.27605626319913934, "grad_norm": 1.3778064250946045, "learning_rate": 2.5483617247077682e-05, "loss": 0.1021, "step": 15588 }, { "epoch": 0.27607397273616774, "grad_norm": 0.6291753053665161, "learning_rate": 2.5483001886952606e-05, "loss": 0.0746, "step": 15589 }, { "epoch": 0.2760916822731962, "grad_norm": 0.7833598256111145, "learning_rate": 2.5482386492339517e-05, "loss": 0.0726, "step": 15590 }, { "epoch": 0.2761093918102246, "grad_norm": 0.6652675271034241, "learning_rate": 2.5481771063240437e-05, "loss": 0.088, "step": 15591 }, { "epoch": 0.27612710134725305, "grad_norm": 0.8586334586143494, "learning_rate": 2.5481155599657384e-05, "loss": 0.1003, "step": 15592 }, { "epoch": 0.27614481088428144, "grad_norm": 1.3611408472061157, "learning_rate": 2.54805401015924e-05, "loss": 0.0939, "step": 15593 }, { "epoch": 0.2761625204213099, "grad_norm": 0.7720375061035156, "learning_rate": 2.547992456904749e-05, "loss": 0.1226, "step": 15594 }, { "epoch": 0.2761802299583383, "grad_norm": 0.6536701917648315, "learning_rate": 2.5479309002024693e-05, "loss": 0.1086, "step": 15595 }, { "epoch": 0.27619793949536675, "grad_norm": 1.0355463027954102, "learning_rate": 2.547869340052603e-05, "loss": 0.1094, "step": 15596 }, { "epoch": 0.27621564903239515, "grad_norm": 0.4921638071537018, "learning_rate": 2.5478077764553524e-05, "loss": 0.0903, "step": 15597 }, { "epoch": 0.2762333585694236, "grad_norm": 0.6908032298088074, "learning_rate": 2.54774620941092e-05, "loss": 0.1046, "step": 15598 }, { "epoch": 0.276251068106452, "grad_norm": 0.7654881477355957, "learning_rate": 2.5476846389195084e-05, "loss": 0.0895, "step": 15599 }, { "epoch": 0.27626877764348046, "grad_norm": 0.7385538220405579, "learning_rate": 2.5476230649813208e-05, "loss": 0.0716, "step": 15600 }, { "epoch": 0.2762864871805089, "grad_norm": 0.87159264087677, "learning_rate": 2.547561487596559e-05, "loss": 0.0853, "step": 15601 }, { "epoch": 0.2763041967175373, "grad_norm": 1.1307963132858276, "learning_rate": 2.547499906765426e-05, "loss": 0.1239, "step": 15602 }, { "epoch": 0.27632190625456576, "grad_norm": 0.8883389234542847, "learning_rate": 2.5474383224881242e-05, "loss": 0.1131, "step": 15603 }, { "epoch": 0.27633961579159416, "grad_norm": 0.6545148491859436, "learning_rate": 2.5473767347648562e-05, "loss": 0.0766, "step": 15604 }, { "epoch": 0.2763573253286226, "grad_norm": 1.4264570474624634, "learning_rate": 2.5473151435958247e-05, "loss": 0.0739, "step": 15605 }, { "epoch": 0.276375034865651, "grad_norm": 0.9682358503341675, "learning_rate": 2.5472535489812322e-05, "loss": 0.0712, "step": 15606 }, { "epoch": 0.27639274440267947, "grad_norm": 1.100601315498352, "learning_rate": 2.5471919509212817e-05, "loss": 0.075, "step": 15607 }, { "epoch": 0.27641045393970787, "grad_norm": 1.3399951457977295, "learning_rate": 2.5471303494161755e-05, "loss": 0.1334, "step": 15608 }, { "epoch": 0.2764281634767363, "grad_norm": 0.5014740824699402, "learning_rate": 2.547068744466116e-05, "loss": 0.0888, "step": 15609 }, { "epoch": 0.2764458730137647, "grad_norm": 0.6277970671653748, "learning_rate": 2.5470071360713073e-05, "loss": 0.0879, "step": 15610 }, { "epoch": 0.2764635825507932, "grad_norm": 0.8980984091758728, "learning_rate": 2.5469455242319498e-05, "loss": 0.0737, "step": 15611 }, { "epoch": 0.2764812920878216, "grad_norm": 0.6278683543205261, "learning_rate": 2.5468839089482484e-05, "loss": 0.077, "step": 15612 }, { "epoch": 0.27649900162485, "grad_norm": 1.050701379776001, "learning_rate": 2.546822290220404e-05, "loss": 0.0926, "step": 15613 }, { "epoch": 0.2765167111618784, "grad_norm": 0.7098962664604187, "learning_rate": 2.5467606680486205e-05, "loss": 0.0931, "step": 15614 }, { "epoch": 0.2765344206989069, "grad_norm": 0.7791774868965149, "learning_rate": 2.5466990424331002e-05, "loss": 0.1256, "step": 15615 }, { "epoch": 0.27655213023593533, "grad_norm": 0.9170494675636292, "learning_rate": 2.546637413374046e-05, "loss": 0.1081, "step": 15616 }, { "epoch": 0.27656983977296373, "grad_norm": 1.028639316558838, "learning_rate": 2.5465757808716604e-05, "loss": 0.121, "step": 15617 }, { "epoch": 0.2765875493099922, "grad_norm": 1.083746075630188, "learning_rate": 2.5465141449261464e-05, "loss": 0.1021, "step": 15618 }, { "epoch": 0.2766052588470206, "grad_norm": 0.8500295877456665, "learning_rate": 2.546452505537707e-05, "loss": 0.0983, "step": 15619 }, { "epoch": 0.27662296838404904, "grad_norm": 0.9199430346488953, "learning_rate": 2.5463908627065444e-05, "loss": 0.0739, "step": 15620 }, { "epoch": 0.27664067792107744, "grad_norm": 1.0092859268188477, "learning_rate": 2.546329216432862e-05, "loss": 0.111, "step": 15621 }, { "epoch": 0.2766583874581059, "grad_norm": 1.0005600452423096, "learning_rate": 2.5462675667168622e-05, "loss": 0.1531, "step": 15622 }, { "epoch": 0.2766760969951343, "grad_norm": 0.86020827293396, "learning_rate": 2.5462059135587473e-05, "loss": 0.0828, "step": 15623 }, { "epoch": 0.27669380653216274, "grad_norm": 0.5038046836853027, "learning_rate": 2.5461442569587215e-05, "loss": 0.0827, "step": 15624 }, { "epoch": 0.27671151606919114, "grad_norm": 0.6767106056213379, "learning_rate": 2.5460825969169867e-05, "loss": 0.0919, "step": 15625 }, { "epoch": 0.2767292256062196, "grad_norm": 0.7122169137001038, "learning_rate": 2.5460209334337462e-05, "loss": 0.0595, "step": 15626 }, { "epoch": 0.276746935143248, "grad_norm": 0.8442760705947876, "learning_rate": 2.5459592665092023e-05, "loss": 0.1069, "step": 15627 }, { "epoch": 0.27676464468027645, "grad_norm": 0.8003162145614624, "learning_rate": 2.545897596143558e-05, "loss": 0.1147, "step": 15628 }, { "epoch": 0.27678235421730485, "grad_norm": 0.5665056109428406, "learning_rate": 2.5458359223370172e-05, "loss": 0.0753, "step": 15629 }, { "epoch": 0.2768000637543333, "grad_norm": 0.9577192068099976, "learning_rate": 2.5457742450897813e-05, "loss": 0.0891, "step": 15630 }, { "epoch": 0.27681777329136176, "grad_norm": 0.6589022874832153, "learning_rate": 2.5457125644020538e-05, "loss": 0.1183, "step": 15631 }, { "epoch": 0.27683548282839016, "grad_norm": 1.0112214088439941, "learning_rate": 2.5456508802740383e-05, "loss": 0.0842, "step": 15632 }, { "epoch": 0.2768531923654186, "grad_norm": 0.7973436713218689, "learning_rate": 2.545589192705937e-05, "loss": 0.0786, "step": 15633 }, { "epoch": 0.276870901902447, "grad_norm": 0.7947813868522644, "learning_rate": 2.545527501697953e-05, "loss": 0.1008, "step": 15634 }, { "epoch": 0.27688861143947546, "grad_norm": 0.5668076872825623, "learning_rate": 2.5454658072502894e-05, "loss": 0.0755, "step": 15635 }, { "epoch": 0.27690632097650386, "grad_norm": 1.2550965547561646, "learning_rate": 2.5454041093631482e-05, "loss": 0.0993, "step": 15636 }, { "epoch": 0.2769240305135323, "grad_norm": 1.026938796043396, "learning_rate": 2.5453424080367338e-05, "loss": 0.1443, "step": 15637 }, { "epoch": 0.2769417400505607, "grad_norm": 0.5835875272750854, "learning_rate": 2.5452807032712487e-05, "loss": 0.074, "step": 15638 }, { "epoch": 0.27695944958758917, "grad_norm": 0.6828745007514954, "learning_rate": 2.5452189950668956e-05, "loss": 0.1062, "step": 15639 }, { "epoch": 0.27697715912461757, "grad_norm": 0.8078597784042358, "learning_rate": 2.5451572834238778e-05, "loss": 0.0746, "step": 15640 }, { "epoch": 0.276994868661646, "grad_norm": 0.8274484872817993, "learning_rate": 2.5450955683423988e-05, "loss": 0.1006, "step": 15641 }, { "epoch": 0.2770125781986744, "grad_norm": 0.7099824547767639, "learning_rate": 2.5450338498226606e-05, "loss": 0.0813, "step": 15642 }, { "epoch": 0.2770302877357029, "grad_norm": 0.6208105683326721, "learning_rate": 2.5449721278648665e-05, "loss": 0.0567, "step": 15643 }, { "epoch": 0.2770479972727313, "grad_norm": 0.9086681604385376, "learning_rate": 2.5449104024692204e-05, "loss": 0.0758, "step": 15644 }, { "epoch": 0.2770657068097597, "grad_norm": 0.806426465511322, "learning_rate": 2.544848673635924e-05, "loss": 0.1008, "step": 15645 }, { "epoch": 0.2770834163467882, "grad_norm": 0.9440526962280273, "learning_rate": 2.5447869413651815e-05, "loss": 0.1249, "step": 15646 }, { "epoch": 0.2771011258838166, "grad_norm": 0.7280558347702026, "learning_rate": 2.5447252056571956e-05, "loss": 0.1314, "step": 15647 }, { "epoch": 0.27711883542084503, "grad_norm": 0.8059581518173218, "learning_rate": 2.5446634665121696e-05, "loss": 0.0963, "step": 15648 }, { "epoch": 0.27713654495787343, "grad_norm": 0.7676113843917847, "learning_rate": 2.5446017239303064e-05, "loss": 0.1075, "step": 15649 }, { "epoch": 0.2771542544949019, "grad_norm": 0.9917171001434326, "learning_rate": 2.5445399779118092e-05, "loss": 0.082, "step": 15650 }, { "epoch": 0.2771719640319303, "grad_norm": 0.8384171724319458, "learning_rate": 2.544478228456881e-05, "loss": 0.1077, "step": 15651 }, { "epoch": 0.27718967356895874, "grad_norm": 0.8078637719154358, "learning_rate": 2.544416475565725e-05, "loss": 0.1213, "step": 15652 }, { "epoch": 0.27720738310598714, "grad_norm": 0.7165956497192383, "learning_rate": 2.5443547192385445e-05, "loss": 0.0619, "step": 15653 }, { "epoch": 0.2772250926430156, "grad_norm": 1.0031038522720337, "learning_rate": 2.544292959475543e-05, "loss": 0.0901, "step": 15654 }, { "epoch": 0.277242802180044, "grad_norm": 0.8483866453170776, "learning_rate": 2.5442311962769227e-05, "loss": 0.0946, "step": 15655 }, { "epoch": 0.27726051171707244, "grad_norm": 0.9560806155204773, "learning_rate": 2.5441694296428878e-05, "loss": 0.1128, "step": 15656 }, { "epoch": 0.27727822125410084, "grad_norm": 0.5566312074661255, "learning_rate": 2.544107659573641e-05, "loss": 0.1131, "step": 15657 }, { "epoch": 0.2772959307911293, "grad_norm": 0.9484796524047852, "learning_rate": 2.544045886069385e-05, "loss": 0.0664, "step": 15658 }, { "epoch": 0.2773136403281577, "grad_norm": 0.8052303791046143, "learning_rate": 2.5439841091303242e-05, "loss": 0.0658, "step": 15659 }, { "epoch": 0.27733134986518615, "grad_norm": 0.6726962327957153, "learning_rate": 2.5439223287566617e-05, "loss": 0.0793, "step": 15660 }, { "epoch": 0.2773490594022146, "grad_norm": 1.0700634717941284, "learning_rate": 2.5438605449485997e-05, "loss": 0.086, "step": 15661 }, { "epoch": 0.277366768939243, "grad_norm": 0.644070029258728, "learning_rate": 2.5437987577063425e-05, "loss": 0.0766, "step": 15662 }, { "epoch": 0.27738447847627146, "grad_norm": 1.0779515504837036, "learning_rate": 2.5437369670300925e-05, "loss": 0.112, "step": 15663 }, { "epoch": 0.27740218801329986, "grad_norm": 0.6663919687271118, "learning_rate": 2.5436751729200537e-05, "loss": 0.0729, "step": 15664 }, { "epoch": 0.2774198975503283, "grad_norm": 0.9220744371414185, "learning_rate": 2.543613375376429e-05, "loss": 0.082, "step": 15665 }, { "epoch": 0.2774376070873567, "grad_norm": 0.694661021232605, "learning_rate": 2.543551574399422e-05, "loss": 0.1029, "step": 15666 }, { "epoch": 0.27745531662438516, "grad_norm": 0.64349365234375, "learning_rate": 2.5434897699892358e-05, "loss": 0.133, "step": 15667 }, { "epoch": 0.27747302616141356, "grad_norm": 0.5052288174629211, "learning_rate": 2.5434279621460735e-05, "loss": 0.0715, "step": 15668 }, { "epoch": 0.277490735698442, "grad_norm": 0.8559357523918152, "learning_rate": 2.5433661508701395e-05, "loss": 0.1057, "step": 15669 }, { "epoch": 0.2775084452354704, "grad_norm": 0.4119444489479065, "learning_rate": 2.543304336161636e-05, "loss": 0.0902, "step": 15670 }, { "epoch": 0.27752615477249887, "grad_norm": 0.9491257667541504, "learning_rate": 2.5432425180207665e-05, "loss": 0.1219, "step": 15671 }, { "epoch": 0.27754386430952727, "grad_norm": 1.1461265087127686, "learning_rate": 2.543180696447735e-05, "loss": 0.0805, "step": 15672 }, { "epoch": 0.2775615738465557, "grad_norm": 0.8468232750892639, "learning_rate": 2.543118871442744e-05, "loss": 0.1175, "step": 15673 }, { "epoch": 0.2775792833835841, "grad_norm": 0.9972844123840332, "learning_rate": 2.543057043005998e-05, "loss": 0.0819, "step": 15674 }, { "epoch": 0.2775969929206126, "grad_norm": 1.0006054639816284, "learning_rate": 2.5429952111376997e-05, "loss": 0.1031, "step": 15675 }, { "epoch": 0.277614702457641, "grad_norm": 0.6382328867912292, "learning_rate": 2.5429333758380525e-05, "loss": 0.0858, "step": 15676 }, { "epoch": 0.2776324119946694, "grad_norm": 1.1062369346618652, "learning_rate": 2.5428715371072602e-05, "loss": 0.1013, "step": 15677 }, { "epoch": 0.2776501215316979, "grad_norm": 0.702481210231781, "learning_rate": 2.5428096949455253e-05, "loss": 0.1005, "step": 15678 }, { "epoch": 0.2776678310687263, "grad_norm": 0.9126712083816528, "learning_rate": 2.542747849353053e-05, "loss": 0.1219, "step": 15679 }, { "epoch": 0.27768554060575473, "grad_norm": 0.7250152230262756, "learning_rate": 2.5426860003300453e-05, "loss": 0.1003, "step": 15680 }, { "epoch": 0.27770325014278313, "grad_norm": 0.8171423673629761, "learning_rate": 2.542624147876706e-05, "loss": 0.0605, "step": 15681 }, { "epoch": 0.2777209596798116, "grad_norm": 0.7838090062141418, "learning_rate": 2.5425622919932387e-05, "loss": 0.1105, "step": 15682 }, { "epoch": 0.27773866921684, "grad_norm": 0.9153133034706116, "learning_rate": 2.542500432679847e-05, "loss": 0.1023, "step": 15683 }, { "epoch": 0.27775637875386844, "grad_norm": 1.4205565452575684, "learning_rate": 2.5424385699367345e-05, "loss": 0.1016, "step": 15684 }, { "epoch": 0.27777408829089684, "grad_norm": 0.7936873435974121, "learning_rate": 2.5423767037641045e-05, "loss": 0.1129, "step": 15685 }, { "epoch": 0.2777917978279253, "grad_norm": 0.8069238066673279, "learning_rate": 2.5423148341621603e-05, "loss": 0.0991, "step": 15686 }, { "epoch": 0.2778095073649537, "grad_norm": 0.8271235227584839, "learning_rate": 2.542252961131106e-05, "loss": 0.1123, "step": 15687 }, { "epoch": 0.27782721690198214, "grad_norm": 0.7588816285133362, "learning_rate": 2.5421910846711446e-05, "loss": 0.074, "step": 15688 }, { "epoch": 0.27784492643901054, "grad_norm": 0.49089640378952026, "learning_rate": 2.5421292047824803e-05, "loss": 0.0918, "step": 15689 }, { "epoch": 0.277862635976039, "grad_norm": 0.7586624026298523, "learning_rate": 2.542067321465316e-05, "loss": 0.0951, "step": 15690 }, { "epoch": 0.27788034551306745, "grad_norm": 0.7903540730476379, "learning_rate": 2.5420054347198556e-05, "loss": 0.0712, "step": 15691 }, { "epoch": 0.27789805505009585, "grad_norm": 0.6203528046607971, "learning_rate": 2.541943544546303e-05, "loss": 0.0811, "step": 15692 }, { "epoch": 0.2779157645871243, "grad_norm": 0.8566617965698242, "learning_rate": 2.541881650944861e-05, "loss": 0.0822, "step": 15693 }, { "epoch": 0.2779334741241527, "grad_norm": 0.8492035269737244, "learning_rate": 2.5418197539157344e-05, "loss": 0.0911, "step": 15694 }, { "epoch": 0.27795118366118116, "grad_norm": 0.8877835273742676, "learning_rate": 2.541757853459126e-05, "loss": 0.0997, "step": 15695 }, { "epoch": 0.27796889319820955, "grad_norm": 0.8330169320106506, "learning_rate": 2.5416959495752397e-05, "loss": 0.114, "step": 15696 }, { "epoch": 0.277986602735238, "grad_norm": 1.4759215116500854, "learning_rate": 2.5416340422642788e-05, "loss": 0.0752, "step": 15697 }, { "epoch": 0.2780043122722664, "grad_norm": 0.8524787425994873, "learning_rate": 2.541572131526447e-05, "loss": 0.0987, "step": 15698 }, { "epoch": 0.27802202180929486, "grad_norm": 0.7725102305412292, "learning_rate": 2.5415102173619486e-05, "loss": 0.1334, "step": 15699 }, { "epoch": 0.27803973134632326, "grad_norm": 0.734571099281311, "learning_rate": 2.541448299770987e-05, "loss": 0.1139, "step": 15700 }, { "epoch": 0.2780574408833517, "grad_norm": 1.596641182899475, "learning_rate": 2.5413863787537657e-05, "loss": 0.1224, "step": 15701 }, { "epoch": 0.2780751504203801, "grad_norm": 0.7267503142356873, "learning_rate": 2.5413244543104884e-05, "loss": 0.1083, "step": 15702 }, { "epoch": 0.27809285995740857, "grad_norm": 0.45334315299987793, "learning_rate": 2.54126252644136e-05, "loss": 0.1113, "step": 15703 }, { "epoch": 0.27811056949443697, "grad_norm": 1.0868948698043823, "learning_rate": 2.5412005951465818e-05, "loss": 0.0794, "step": 15704 }, { "epoch": 0.2781282790314654, "grad_norm": 0.8292275667190552, "learning_rate": 2.5411386604263595e-05, "loss": 0.0827, "step": 15705 }, { "epoch": 0.2781459885684939, "grad_norm": 0.7572113275527954, "learning_rate": 2.5410767222808965e-05, "loss": 0.1029, "step": 15706 }, { "epoch": 0.2781636981055223, "grad_norm": 0.8950186371803284, "learning_rate": 2.5410147807103964e-05, "loss": 0.151, "step": 15707 }, { "epoch": 0.2781814076425507, "grad_norm": 1.3142673969268799, "learning_rate": 2.5409528357150627e-05, "loss": 0.1012, "step": 15708 }, { "epoch": 0.2781991171795791, "grad_norm": 0.7237356901168823, "learning_rate": 2.5408908872950992e-05, "loss": 0.0806, "step": 15709 }, { "epoch": 0.2782168267166076, "grad_norm": 0.4718703031539917, "learning_rate": 2.540828935450711e-05, "loss": 0.0844, "step": 15710 }, { "epoch": 0.278234536253636, "grad_norm": 0.761045515537262, "learning_rate": 2.5407669801820998e-05, "loss": 0.0677, "step": 15711 }, { "epoch": 0.27825224579066443, "grad_norm": 1.0741738080978394, "learning_rate": 2.5407050214894708e-05, "loss": 0.0727, "step": 15712 }, { "epoch": 0.27826995532769283, "grad_norm": 0.7184566259384155, "learning_rate": 2.540643059373028e-05, "loss": 0.0773, "step": 15713 }, { "epoch": 0.2782876648647213, "grad_norm": 0.6746540069580078, "learning_rate": 2.540581093832974e-05, "loss": 0.0823, "step": 15714 }, { "epoch": 0.2783053744017497, "grad_norm": 0.9868407249450684, "learning_rate": 2.5405191248695142e-05, "loss": 0.078, "step": 15715 }, { "epoch": 0.27832308393877814, "grad_norm": 1.119075894355774, "learning_rate": 2.540457152482851e-05, "loss": 0.1241, "step": 15716 }, { "epoch": 0.27834079347580654, "grad_norm": 1.2936464548110962, "learning_rate": 2.5403951766731895e-05, "loss": 0.1047, "step": 15717 }, { "epoch": 0.278358503012835, "grad_norm": 0.8384459018707275, "learning_rate": 2.5403331974407332e-05, "loss": 0.1146, "step": 15718 }, { "epoch": 0.2783762125498634, "grad_norm": 0.4943004250526428, "learning_rate": 2.5402712147856854e-05, "loss": 0.1065, "step": 15719 }, { "epoch": 0.27839392208689184, "grad_norm": 0.8374937176704407, "learning_rate": 2.540209228708251e-05, "loss": 0.1225, "step": 15720 }, { "epoch": 0.2784116316239203, "grad_norm": 1.012145757675171, "learning_rate": 2.5401472392086328e-05, "loss": 0.1417, "step": 15721 }, { "epoch": 0.2784293411609487, "grad_norm": 0.5372095704078674, "learning_rate": 2.540085246287036e-05, "loss": 0.0834, "step": 15722 }, { "epoch": 0.27844705069797715, "grad_norm": 0.7659883499145508, "learning_rate": 2.540023249943664e-05, "loss": 0.0869, "step": 15723 }, { "epoch": 0.27846476023500555, "grad_norm": 0.6921195387840271, "learning_rate": 2.5399612501787203e-05, "loss": 0.0894, "step": 15724 }, { "epoch": 0.278482469772034, "grad_norm": 0.8170125484466553, "learning_rate": 2.5398992469924098e-05, "loss": 0.0652, "step": 15725 }, { "epoch": 0.2785001793090624, "grad_norm": 1.1371291875839233, "learning_rate": 2.5398372403849352e-05, "loss": 0.1027, "step": 15726 }, { "epoch": 0.27851788884609086, "grad_norm": 0.824557363986969, "learning_rate": 2.539775230356502e-05, "loss": 0.0693, "step": 15727 }, { "epoch": 0.27853559838311925, "grad_norm": 0.3290999233722687, "learning_rate": 2.539713216907313e-05, "loss": 0.1165, "step": 15728 }, { "epoch": 0.2785533079201477, "grad_norm": 0.7675114274024963, "learning_rate": 2.539651200037573e-05, "loss": 0.08, "step": 15729 }, { "epoch": 0.2785710174571761, "grad_norm": 0.8052238821983337, "learning_rate": 2.5395891797474856e-05, "loss": 0.1166, "step": 15730 }, { "epoch": 0.27858872699420456, "grad_norm": 0.5019052028656006, "learning_rate": 2.5395271560372547e-05, "loss": 0.1144, "step": 15731 }, { "epoch": 0.27860643653123296, "grad_norm": 0.74825119972229, "learning_rate": 2.5394651289070846e-05, "loss": 0.0995, "step": 15732 }, { "epoch": 0.2786241460682614, "grad_norm": 0.8571268916130066, "learning_rate": 2.5394030983571795e-05, "loss": 0.0862, "step": 15733 }, { "epoch": 0.2786418556052898, "grad_norm": 0.8550427556037903, "learning_rate": 2.5393410643877435e-05, "loss": 0.1341, "step": 15734 }, { "epoch": 0.27865956514231827, "grad_norm": 1.131446123123169, "learning_rate": 2.53927902699898e-05, "loss": 0.1041, "step": 15735 }, { "epoch": 0.2786772746793467, "grad_norm": 1.387378454208374, "learning_rate": 2.5392169861910945e-05, "loss": 0.1054, "step": 15736 }, { "epoch": 0.2786949842163751, "grad_norm": 0.49079927802085876, "learning_rate": 2.5391549419642897e-05, "loss": 0.0664, "step": 15737 }, { "epoch": 0.2787126937534036, "grad_norm": 0.8559449911117554, "learning_rate": 2.53909289431877e-05, "loss": 0.098, "step": 15738 }, { "epoch": 0.27873040329043197, "grad_norm": 0.6496095061302185, "learning_rate": 2.5390308432547402e-05, "loss": 0.0971, "step": 15739 }, { "epoch": 0.2787481128274604, "grad_norm": 0.6838752031326294, "learning_rate": 2.5389687887724037e-05, "loss": 0.0824, "step": 15740 }, { "epoch": 0.2787658223644888, "grad_norm": 0.799628496170044, "learning_rate": 2.5389067308719654e-05, "loss": 0.0929, "step": 15741 }, { "epoch": 0.2787835319015173, "grad_norm": 1.0279614925384521, "learning_rate": 2.5388446695536285e-05, "loss": 0.068, "step": 15742 }, { "epoch": 0.2788012414385457, "grad_norm": 0.8564943075180054, "learning_rate": 2.5387826048175984e-05, "loss": 0.1046, "step": 15743 }, { "epoch": 0.27881895097557413, "grad_norm": 0.9539850950241089, "learning_rate": 2.538720536664078e-05, "loss": 0.114, "step": 15744 }, { "epoch": 0.27883666051260253, "grad_norm": 1.9254645109176636, "learning_rate": 2.5386584650932722e-05, "loss": 0.1055, "step": 15745 }, { "epoch": 0.278854370049631, "grad_norm": 1.6496734619140625, "learning_rate": 2.5385963901053852e-05, "loss": 0.1287, "step": 15746 }, { "epoch": 0.2788720795866594, "grad_norm": 0.7003051042556763, "learning_rate": 2.538534311700621e-05, "loss": 0.0792, "step": 15747 }, { "epoch": 0.27888978912368784, "grad_norm": 0.7020289301872253, "learning_rate": 2.538472229879184e-05, "loss": 0.0783, "step": 15748 }, { "epoch": 0.27890749866071624, "grad_norm": 0.8725945353507996, "learning_rate": 2.5384101446412786e-05, "loss": 0.0765, "step": 15749 }, { "epoch": 0.2789252081977447, "grad_norm": 1.1577342748641968, "learning_rate": 2.5383480559871086e-05, "loss": 0.1291, "step": 15750 }, { "epoch": 0.27894291773477314, "grad_norm": 0.5224078893661499, "learning_rate": 2.5382859639168784e-05, "loss": 0.1047, "step": 15751 }, { "epoch": 0.27896062727180154, "grad_norm": 1.019729495048523, "learning_rate": 2.538223868430793e-05, "loss": 0.1087, "step": 15752 }, { "epoch": 0.27897833680883, "grad_norm": 0.9969866871833801, "learning_rate": 2.5381617695290553e-05, "loss": 0.1168, "step": 15753 }, { "epoch": 0.2789960463458584, "grad_norm": 0.6304478645324707, "learning_rate": 2.5380996672118708e-05, "loss": 0.0875, "step": 15754 }, { "epoch": 0.27901375588288685, "grad_norm": 0.7977100014686584, "learning_rate": 2.5380375614794433e-05, "loss": 0.1291, "step": 15755 }, { "epoch": 0.27903146541991525, "grad_norm": 0.8224126100540161, "learning_rate": 2.5379754523319775e-05, "loss": 0.0729, "step": 15756 }, { "epoch": 0.2790491749569437, "grad_norm": 0.616877019405365, "learning_rate": 2.537913339769677e-05, "loss": 0.0554, "step": 15757 }, { "epoch": 0.2790668844939721, "grad_norm": 0.8105337619781494, "learning_rate": 2.5378512237927473e-05, "loss": 0.083, "step": 15758 }, { "epoch": 0.27908459403100055, "grad_norm": 0.721157968044281, "learning_rate": 2.5377891044013914e-05, "loss": 0.0878, "step": 15759 }, { "epoch": 0.27910230356802895, "grad_norm": 1.0062731504440308, "learning_rate": 2.5377269815958145e-05, "loss": 0.106, "step": 15760 }, { "epoch": 0.2791200131050574, "grad_norm": 1.794032335281372, "learning_rate": 2.5376648553762204e-05, "loss": 0.1374, "step": 15761 }, { "epoch": 0.2791377226420858, "grad_norm": 1.0437251329421997, "learning_rate": 2.5376027257428145e-05, "loss": 0.0657, "step": 15762 }, { "epoch": 0.27915543217911426, "grad_norm": 0.7835448980331421, "learning_rate": 2.5375405926958005e-05, "loss": 0.0978, "step": 15763 }, { "epoch": 0.27917314171614266, "grad_norm": 0.9770792722702026, "learning_rate": 2.5374784562353823e-05, "loss": 0.0876, "step": 15764 }, { "epoch": 0.2791908512531711, "grad_norm": 0.5931757092475891, "learning_rate": 2.5374163163617655e-05, "loss": 0.0969, "step": 15765 }, { "epoch": 0.27920856079019957, "grad_norm": 0.6266990303993225, "learning_rate": 2.5373541730751538e-05, "loss": 0.1016, "step": 15766 }, { "epoch": 0.27922627032722797, "grad_norm": 1.0801329612731934, "learning_rate": 2.537292026375752e-05, "loss": 0.1196, "step": 15767 }, { "epoch": 0.2792439798642564, "grad_norm": 0.4430263340473175, "learning_rate": 2.537229876263764e-05, "loss": 0.0698, "step": 15768 }, { "epoch": 0.2792616894012848, "grad_norm": 0.619777262210846, "learning_rate": 2.5371677227393947e-05, "loss": 0.096, "step": 15769 }, { "epoch": 0.2792793989383133, "grad_norm": 0.8310164213180542, "learning_rate": 2.5371055658028487e-05, "loss": 0.0813, "step": 15770 }, { "epoch": 0.27929710847534167, "grad_norm": 0.5206877589225769, "learning_rate": 2.5370434054543297e-05, "loss": 0.1087, "step": 15771 }, { "epoch": 0.2793148180123701, "grad_norm": 0.7582829594612122, "learning_rate": 2.5369812416940437e-05, "loss": 0.1201, "step": 15772 }, { "epoch": 0.2793325275493985, "grad_norm": 0.6320861577987671, "learning_rate": 2.5369190745221935e-05, "loss": 0.0817, "step": 15773 }, { "epoch": 0.279350237086427, "grad_norm": 0.9783825874328613, "learning_rate": 2.5368569039389846e-05, "loss": 0.109, "step": 15774 }, { "epoch": 0.2793679466234554, "grad_norm": 1.358472228050232, "learning_rate": 2.5367947299446218e-05, "loss": 0.0833, "step": 15775 }, { "epoch": 0.27938565616048383, "grad_norm": 0.78192538022995, "learning_rate": 2.5367325525393085e-05, "loss": 0.1023, "step": 15776 }, { "epoch": 0.27940336569751223, "grad_norm": 0.8193898797035217, "learning_rate": 2.5366703717232508e-05, "loss": 0.1176, "step": 15777 }, { "epoch": 0.2794210752345407, "grad_norm": 0.7345989346504211, "learning_rate": 2.5366081874966517e-05, "loss": 0.1169, "step": 15778 }, { "epoch": 0.2794387847715691, "grad_norm": 0.5784133076667786, "learning_rate": 2.536545999859717e-05, "loss": 0.0755, "step": 15779 }, { "epoch": 0.27945649430859754, "grad_norm": 0.4349419176578522, "learning_rate": 2.5364838088126503e-05, "loss": 0.0682, "step": 15780 }, { "epoch": 0.279474203845626, "grad_norm": 0.6931809782981873, "learning_rate": 2.536421614355657e-05, "loss": 0.1004, "step": 15781 }, { "epoch": 0.2794919133826544, "grad_norm": 2.956289768218994, "learning_rate": 2.5363594164889413e-05, "loss": 0.1151, "step": 15782 }, { "epoch": 0.27950962291968284, "grad_norm": 1.2809239625930786, "learning_rate": 2.5362972152127084e-05, "loss": 0.1193, "step": 15783 }, { "epoch": 0.27952733245671124, "grad_norm": 1.9427481889724731, "learning_rate": 2.5362350105271623e-05, "loss": 0.126, "step": 15784 }, { "epoch": 0.2795450419937397, "grad_norm": 1.7874071598052979, "learning_rate": 2.5361728024325073e-05, "loss": 0.1199, "step": 15785 }, { "epoch": 0.2795627515307681, "grad_norm": 1.705552339553833, "learning_rate": 2.536110590928949e-05, "loss": 0.1166, "step": 15786 }, { "epoch": 0.27958046106779655, "grad_norm": 0.8825470805168152, "learning_rate": 2.5360483760166916e-05, "loss": 0.0902, "step": 15787 }, { "epoch": 0.27959817060482495, "grad_norm": 0.8016607761383057, "learning_rate": 2.53598615769594e-05, "loss": 0.1078, "step": 15788 }, { "epoch": 0.2796158801418534, "grad_norm": 2.078068494796753, "learning_rate": 2.5359239359668983e-05, "loss": 0.1069, "step": 15789 }, { "epoch": 0.2796335896788818, "grad_norm": 0.6941519975662231, "learning_rate": 2.535861710829772e-05, "loss": 0.1112, "step": 15790 }, { "epoch": 0.27965129921591025, "grad_norm": 0.9172930121421814, "learning_rate": 2.5357994822847657e-05, "loss": 0.0795, "step": 15791 }, { "epoch": 0.27966900875293865, "grad_norm": 0.9792255163192749, "learning_rate": 2.535737250332083e-05, "loss": 0.1171, "step": 15792 }, { "epoch": 0.2796867182899671, "grad_norm": 0.950208306312561, "learning_rate": 2.53567501497193e-05, "loss": 0.0848, "step": 15793 }, { "epoch": 0.2797044278269955, "grad_norm": 1.1775333881378174, "learning_rate": 2.535612776204511e-05, "loss": 0.0983, "step": 15794 }, { "epoch": 0.27972213736402396, "grad_norm": 0.8513561487197876, "learning_rate": 2.5355505340300306e-05, "loss": 0.0856, "step": 15795 }, { "epoch": 0.2797398469010524, "grad_norm": 0.7028363943099976, "learning_rate": 2.535488288448694e-05, "loss": 0.0866, "step": 15796 }, { "epoch": 0.2797575564380808, "grad_norm": 1.0770773887634277, "learning_rate": 2.5354260394607052e-05, "loss": 0.101, "step": 15797 }, { "epoch": 0.27977526597510927, "grad_norm": 0.9083957076072693, "learning_rate": 2.5353637870662695e-05, "loss": 0.1198, "step": 15798 }, { "epoch": 0.27979297551213766, "grad_norm": 0.7080664038658142, "learning_rate": 2.5353015312655924e-05, "loss": 0.0655, "step": 15799 }, { "epoch": 0.2798106850491661, "grad_norm": 0.3901055157184601, "learning_rate": 2.5352392720588773e-05, "loss": 0.1106, "step": 15800 }, { "epoch": 0.2798283945861945, "grad_norm": 1.0251319408416748, "learning_rate": 2.5351770094463294e-05, "loss": 0.113, "step": 15801 }, { "epoch": 0.27984610412322297, "grad_norm": 1.2492225170135498, "learning_rate": 2.5351147434281545e-05, "loss": 0.1442, "step": 15802 }, { "epoch": 0.27986381366025137, "grad_norm": 0.6362077593803406, "learning_rate": 2.535052474004556e-05, "loss": 0.1037, "step": 15803 }, { "epoch": 0.2798815231972798, "grad_norm": 0.6853954195976257, "learning_rate": 2.5349902011757405e-05, "loss": 0.0715, "step": 15804 }, { "epoch": 0.2798992327343082, "grad_norm": 1.0857453346252441, "learning_rate": 2.5349279249419115e-05, "loss": 0.117, "step": 15805 }, { "epoch": 0.2799169422713367, "grad_norm": 0.9957883954048157, "learning_rate": 2.5348656453032746e-05, "loss": 0.0769, "step": 15806 }, { "epoch": 0.2799346518083651, "grad_norm": 1.2357831001281738, "learning_rate": 2.534803362260034e-05, "loss": 0.0813, "step": 15807 }, { "epoch": 0.27995236134539353, "grad_norm": 0.9833505153656006, "learning_rate": 2.534741075812395e-05, "loss": 0.1242, "step": 15808 }, { "epoch": 0.27997007088242193, "grad_norm": 0.6907361745834351, "learning_rate": 2.5346787859605627e-05, "loss": 0.1158, "step": 15809 }, { "epoch": 0.2799877804194504, "grad_norm": 0.7497665882110596, "learning_rate": 2.5346164927047416e-05, "loss": 0.0472, "step": 15810 }, { "epoch": 0.28000548995647884, "grad_norm": 4.642019271850586, "learning_rate": 2.534554196045137e-05, "loss": 0.1307, "step": 15811 }, { "epoch": 0.28002319949350724, "grad_norm": 1.4376710653305054, "learning_rate": 2.5344918959819536e-05, "loss": 0.1064, "step": 15812 }, { "epoch": 0.2800409090305357, "grad_norm": 1.400388240814209, "learning_rate": 2.534429592515397e-05, "loss": 0.1712, "step": 15813 }, { "epoch": 0.2800586185675641, "grad_norm": 0.7315883040428162, "learning_rate": 2.5343672856456713e-05, "loss": 0.1294, "step": 15814 }, { "epoch": 0.28007632810459254, "grad_norm": 0.646466851234436, "learning_rate": 2.5343049753729817e-05, "loss": 0.0797, "step": 15815 }, { "epoch": 0.28009403764162094, "grad_norm": 0.5082030296325684, "learning_rate": 2.5342426616975337e-05, "loss": 0.0872, "step": 15816 }, { "epoch": 0.2801117471786494, "grad_norm": 1.7023919820785522, "learning_rate": 2.534180344619532e-05, "loss": 0.1318, "step": 15817 }, { "epoch": 0.2801294567156778, "grad_norm": 0.7552840709686279, "learning_rate": 2.5341180241391812e-05, "loss": 0.073, "step": 15818 }, { "epoch": 0.28014716625270625, "grad_norm": 0.7413889765739441, "learning_rate": 2.5340557002566867e-05, "loss": 0.0549, "step": 15819 }, { "epoch": 0.28016487578973465, "grad_norm": 1.9120328426361084, "learning_rate": 2.5339933729722538e-05, "loss": 0.1217, "step": 15820 }, { "epoch": 0.2801825853267631, "grad_norm": 1.3908945322036743, "learning_rate": 2.533931042286087e-05, "loss": 0.1413, "step": 15821 }, { "epoch": 0.2802002948637915, "grad_norm": 1.097054123878479, "learning_rate": 2.5338687081983922e-05, "loss": 0.1416, "step": 15822 }, { "epoch": 0.28021800440081995, "grad_norm": 1.066852331161499, "learning_rate": 2.5338063707093736e-05, "loss": 0.146, "step": 15823 }, { "epoch": 0.2802357139378484, "grad_norm": 1.127273440361023, "learning_rate": 2.5337440298192364e-05, "loss": 0.1465, "step": 15824 }, { "epoch": 0.2802534234748768, "grad_norm": 1.094679355621338, "learning_rate": 2.533681685528186e-05, "loss": 0.1082, "step": 15825 }, { "epoch": 0.28027113301190526, "grad_norm": 0.7375277280807495, "learning_rate": 2.5336193378364272e-05, "loss": 0.0934, "step": 15826 }, { "epoch": 0.28028884254893366, "grad_norm": 0.816875696182251, "learning_rate": 2.5335569867441658e-05, "loss": 0.0962, "step": 15827 }, { "epoch": 0.2803065520859621, "grad_norm": 0.8655363917350769, "learning_rate": 2.5334946322516065e-05, "loss": 0.0977, "step": 15828 }, { "epoch": 0.2803242616229905, "grad_norm": 0.7476261854171753, "learning_rate": 2.533432274358954e-05, "loss": 0.079, "step": 15829 }, { "epoch": 0.28034197116001897, "grad_norm": 0.9180153012275696, "learning_rate": 2.5333699130664138e-05, "loss": 0.1245, "step": 15830 }, { "epoch": 0.28035968069704736, "grad_norm": 0.832670271396637, "learning_rate": 2.5333075483741913e-05, "loss": 0.1182, "step": 15831 }, { "epoch": 0.2803773902340758, "grad_norm": 0.7858685255050659, "learning_rate": 2.5332451802824913e-05, "loss": 0.08, "step": 15832 }, { "epoch": 0.2803950997711042, "grad_norm": 0.7743949890136719, "learning_rate": 2.5331828087915193e-05, "loss": 0.1427, "step": 15833 }, { "epoch": 0.28041280930813267, "grad_norm": 0.6608768701553345, "learning_rate": 2.5331204339014803e-05, "loss": 0.0668, "step": 15834 }, { "epoch": 0.28043051884516107, "grad_norm": 0.7285192608833313, "learning_rate": 2.53305805561258e-05, "loss": 0.0901, "step": 15835 }, { "epoch": 0.2804482283821895, "grad_norm": 0.5793861150741577, "learning_rate": 2.5329956739250227e-05, "loss": 0.0817, "step": 15836 }, { "epoch": 0.2804659379192179, "grad_norm": 0.6615508198738098, "learning_rate": 2.5329332888390143e-05, "loss": 0.113, "step": 15837 }, { "epoch": 0.2804836474562464, "grad_norm": 0.7575063705444336, "learning_rate": 2.5328709003547596e-05, "loss": 0.0664, "step": 15838 }, { "epoch": 0.28050135699327483, "grad_norm": 0.5029103755950928, "learning_rate": 2.5328085084724645e-05, "loss": 0.0586, "step": 15839 }, { "epoch": 0.28051906653030323, "grad_norm": 0.6419529914855957, "learning_rate": 2.5327461131923336e-05, "loss": 0.1222, "step": 15840 }, { "epoch": 0.2805367760673317, "grad_norm": 1.8650530576705933, "learning_rate": 2.5326837145145727e-05, "loss": 0.1236, "step": 15841 }, { "epoch": 0.2805544856043601, "grad_norm": 0.8318203091621399, "learning_rate": 2.5326213124393863e-05, "loss": 0.1242, "step": 15842 }, { "epoch": 0.28057219514138854, "grad_norm": 0.9711102843284607, "learning_rate": 2.5325589069669805e-05, "loss": 0.0701, "step": 15843 }, { "epoch": 0.28058990467841693, "grad_norm": 1.2420932054519653, "learning_rate": 2.5324964980975603e-05, "loss": 0.1664, "step": 15844 }, { "epoch": 0.2806076142154454, "grad_norm": 1.070407509803772, "learning_rate": 2.5324340858313316e-05, "loss": 0.1124, "step": 15845 }, { "epoch": 0.2806253237524738, "grad_norm": 0.9509144425392151, "learning_rate": 2.5323716701684987e-05, "loss": 0.1339, "step": 15846 }, { "epoch": 0.28064303328950224, "grad_norm": 0.9169660806655884, "learning_rate": 2.532309251109267e-05, "loss": 0.1177, "step": 15847 }, { "epoch": 0.28066074282653064, "grad_norm": 0.9838313460350037, "learning_rate": 2.5322468286538428e-05, "loss": 0.1206, "step": 15848 }, { "epoch": 0.2806784523635591, "grad_norm": 1.033630609512329, "learning_rate": 2.5321844028024308e-05, "loss": 0.1224, "step": 15849 }, { "epoch": 0.2806961619005875, "grad_norm": 0.9145085215568542, "learning_rate": 2.532121973555237e-05, "loss": 0.1026, "step": 15850 }, { "epoch": 0.28071387143761595, "grad_norm": 0.8706889152526855, "learning_rate": 2.5320595409124658e-05, "loss": 0.128, "step": 15851 }, { "epoch": 0.28073158097464435, "grad_norm": 0.7019733190536499, "learning_rate": 2.531997104874323e-05, "loss": 0.0738, "step": 15852 }, { "epoch": 0.2807492905116728, "grad_norm": 0.5944635272026062, "learning_rate": 2.531934665441014e-05, "loss": 0.0963, "step": 15853 }, { "epoch": 0.28076700004870125, "grad_norm": 0.5278180837631226, "learning_rate": 2.5318722226127448e-05, "loss": 0.0801, "step": 15854 }, { "epoch": 0.28078470958572965, "grad_norm": 0.9690764546394348, "learning_rate": 2.5318097763897205e-05, "loss": 0.1083, "step": 15855 }, { "epoch": 0.2808024191227581, "grad_norm": 0.6209574937820435, "learning_rate": 2.531747326772146e-05, "loss": 0.0997, "step": 15856 }, { "epoch": 0.2808201286597865, "grad_norm": 0.8465526700019836, "learning_rate": 2.531684873760227e-05, "loss": 0.1061, "step": 15857 }, { "epoch": 0.28083783819681496, "grad_norm": 1.0515471696853638, "learning_rate": 2.5316224173541695e-05, "loss": 0.1183, "step": 15858 }, { "epoch": 0.28085554773384336, "grad_norm": 0.9347808957099915, "learning_rate": 2.5315599575541784e-05, "loss": 0.0694, "step": 15859 }, { "epoch": 0.2808732572708718, "grad_norm": 0.7949824333190918, "learning_rate": 2.5314974943604593e-05, "loss": 0.0802, "step": 15860 }, { "epoch": 0.2808909668079002, "grad_norm": 0.7252131104469299, "learning_rate": 2.531435027773218e-05, "loss": 0.0826, "step": 15861 }, { "epoch": 0.28090867634492867, "grad_norm": 0.6626308560371399, "learning_rate": 2.5313725577926595e-05, "loss": 0.1021, "step": 15862 }, { "epoch": 0.28092638588195706, "grad_norm": 0.4650212824344635, "learning_rate": 2.5313100844189895e-05, "loss": 0.111, "step": 15863 }, { "epoch": 0.2809440954189855, "grad_norm": 0.9290848970413208, "learning_rate": 2.531247607652414e-05, "loss": 0.1086, "step": 15864 }, { "epoch": 0.2809618049560139, "grad_norm": 0.7008875608444214, "learning_rate": 2.5311851274931384e-05, "loss": 0.074, "step": 15865 }, { "epoch": 0.28097951449304237, "grad_norm": 0.9964344501495361, "learning_rate": 2.5311226439413673e-05, "loss": 0.1087, "step": 15866 }, { "epoch": 0.28099722403007077, "grad_norm": 0.6537314653396606, "learning_rate": 2.5310601569973072e-05, "loss": 0.0739, "step": 15867 }, { "epoch": 0.2810149335670992, "grad_norm": 0.5870552062988281, "learning_rate": 2.5309976666611644e-05, "loss": 0.0703, "step": 15868 }, { "epoch": 0.2810326431041277, "grad_norm": 1.776808738708496, "learning_rate": 2.5309351729331425e-05, "loss": 0.1228, "step": 15869 }, { "epoch": 0.2810503526411561, "grad_norm": 0.6474621295928955, "learning_rate": 2.5308726758134483e-05, "loss": 0.0618, "step": 15870 }, { "epoch": 0.28106806217818453, "grad_norm": 0.9875191450119019, "learning_rate": 2.5308101753022877e-05, "loss": 0.1427, "step": 15871 }, { "epoch": 0.28108577171521293, "grad_norm": 0.8127615451812744, "learning_rate": 2.5307476713998653e-05, "loss": 0.093, "step": 15872 }, { "epoch": 0.2811034812522414, "grad_norm": 0.7991455793380737, "learning_rate": 2.5306851641063875e-05, "loss": 0.1011, "step": 15873 }, { "epoch": 0.2811211907892698, "grad_norm": 0.5546412467956543, "learning_rate": 2.53062265342206e-05, "loss": 0.0924, "step": 15874 }, { "epoch": 0.28113890032629824, "grad_norm": 0.7569437623023987, "learning_rate": 2.530560139347088e-05, "loss": 0.0985, "step": 15875 }, { "epoch": 0.28115660986332663, "grad_norm": 0.5330840945243835, "learning_rate": 2.5304976218816776e-05, "loss": 0.0576, "step": 15876 }, { "epoch": 0.2811743194003551, "grad_norm": 1.1336519718170166, "learning_rate": 2.530435101026034e-05, "loss": 0.0772, "step": 15877 }, { "epoch": 0.2811920289373835, "grad_norm": 0.8366576433181763, "learning_rate": 2.5303725767803635e-05, "loss": 0.0836, "step": 15878 }, { "epoch": 0.28120973847441194, "grad_norm": 0.7975041270256042, "learning_rate": 2.530310049144871e-05, "loss": 0.0698, "step": 15879 }, { "epoch": 0.28122744801144034, "grad_norm": 1.1475123167037964, "learning_rate": 2.530247518119763e-05, "loss": 0.1555, "step": 15880 }, { "epoch": 0.2812451575484688, "grad_norm": 0.9842048287391663, "learning_rate": 2.5301849837052446e-05, "loss": 0.0904, "step": 15881 }, { "epoch": 0.2812628670854972, "grad_norm": 0.7660056948661804, "learning_rate": 2.530122445901522e-05, "loss": 0.0752, "step": 15882 }, { "epoch": 0.28128057662252565, "grad_norm": 1.099548101425171, "learning_rate": 2.5300599047088005e-05, "loss": 0.1152, "step": 15883 }, { "epoch": 0.2812982861595541, "grad_norm": 0.9267683625221252, "learning_rate": 2.529997360127286e-05, "loss": 0.0969, "step": 15884 }, { "epoch": 0.2813159956965825, "grad_norm": 0.5610498189926147, "learning_rate": 2.529934812157185e-05, "loss": 0.0877, "step": 15885 }, { "epoch": 0.28133370523361095, "grad_norm": 0.8142337203025818, "learning_rate": 2.5298722607987018e-05, "loss": 0.0791, "step": 15886 }, { "epoch": 0.28135141477063935, "grad_norm": 0.7764913439750671, "learning_rate": 2.529809706052044e-05, "loss": 0.1073, "step": 15887 }, { "epoch": 0.2813691243076678, "grad_norm": 0.5763152837753296, "learning_rate": 2.5297471479174155e-05, "loss": 0.0519, "step": 15888 }, { "epoch": 0.2813868338446962, "grad_norm": 0.9165849089622498, "learning_rate": 2.5296845863950236e-05, "loss": 0.1524, "step": 15889 }, { "epoch": 0.28140454338172466, "grad_norm": 0.493062287569046, "learning_rate": 2.5296220214850735e-05, "loss": 0.09, "step": 15890 }, { "epoch": 0.28142225291875306, "grad_norm": 1.1812055110931396, "learning_rate": 2.529559453187771e-05, "loss": 0.1521, "step": 15891 }, { "epoch": 0.2814399624557815, "grad_norm": 1.3340505361557007, "learning_rate": 2.529496881503322e-05, "loss": 0.1298, "step": 15892 }, { "epoch": 0.2814576719928099, "grad_norm": 0.9301230907440186, "learning_rate": 2.5294343064319325e-05, "loss": 0.0845, "step": 15893 }, { "epoch": 0.28147538152983836, "grad_norm": 0.811128556728363, "learning_rate": 2.5293717279738084e-05, "loss": 0.0871, "step": 15894 }, { "epoch": 0.28149309106686676, "grad_norm": 0.5240210294723511, "learning_rate": 2.5293091461291547e-05, "loss": 0.0495, "step": 15895 }, { "epoch": 0.2815108006038952, "grad_norm": 1.1109880208969116, "learning_rate": 2.529246560898179e-05, "loss": 0.0933, "step": 15896 }, { "epoch": 0.2815285101409236, "grad_norm": 1.0813016891479492, "learning_rate": 2.5291839722810853e-05, "loss": 0.0974, "step": 15897 }, { "epoch": 0.28154621967795207, "grad_norm": 1.014267921447754, "learning_rate": 2.529121380278081e-05, "loss": 0.0753, "step": 15898 }, { "epoch": 0.2815639292149805, "grad_norm": 0.9736046195030212, "learning_rate": 2.5290587848893712e-05, "loss": 0.1055, "step": 15899 }, { "epoch": 0.2815816387520089, "grad_norm": 0.5546492338180542, "learning_rate": 2.528996186115162e-05, "loss": 0.0932, "step": 15900 }, { "epoch": 0.2815993482890374, "grad_norm": 0.8014137744903564, "learning_rate": 2.52893358395566e-05, "loss": 0.0841, "step": 15901 }, { "epoch": 0.2816170578260658, "grad_norm": 0.8302342891693115, "learning_rate": 2.52887097841107e-05, "loss": 0.0723, "step": 15902 }, { "epoch": 0.28163476736309423, "grad_norm": 0.7924015522003174, "learning_rate": 2.528808369481599e-05, "loss": 0.1018, "step": 15903 }, { "epoch": 0.28165247690012263, "grad_norm": 1.0286154747009277, "learning_rate": 2.528745757167452e-05, "loss": 0.1287, "step": 15904 }, { "epoch": 0.2816701864371511, "grad_norm": 0.940356969833374, "learning_rate": 2.5286831414688355e-05, "loss": 0.108, "step": 15905 }, { "epoch": 0.2816878959741795, "grad_norm": 0.7115861177444458, "learning_rate": 2.5286205223859558e-05, "loss": 0.0984, "step": 15906 }, { "epoch": 0.28170560551120793, "grad_norm": 1.6957820653915405, "learning_rate": 2.5285578999190186e-05, "loss": 0.0893, "step": 15907 }, { "epoch": 0.28172331504823633, "grad_norm": 0.5288769006729126, "learning_rate": 2.52849527406823e-05, "loss": 0.0534, "step": 15908 }, { "epoch": 0.2817410245852648, "grad_norm": 0.4812370538711548, "learning_rate": 2.5284326448337963e-05, "loss": 0.0989, "step": 15909 }, { "epoch": 0.2817587341222932, "grad_norm": 0.6017610430717468, "learning_rate": 2.5283700122159227e-05, "loss": 0.0727, "step": 15910 }, { "epoch": 0.28177644365932164, "grad_norm": 0.6303423047065735, "learning_rate": 2.5283073762148155e-05, "loss": 0.0802, "step": 15911 }, { "epoch": 0.28179415319635004, "grad_norm": 0.6658865809440613, "learning_rate": 2.528244736830682e-05, "loss": 0.08, "step": 15912 }, { "epoch": 0.2818118627333785, "grad_norm": 0.8293823599815369, "learning_rate": 2.5281820940637264e-05, "loss": 0.0813, "step": 15913 }, { "epoch": 0.28182957227040695, "grad_norm": 0.6921062469482422, "learning_rate": 2.5281194479141562e-05, "loss": 0.0937, "step": 15914 }, { "epoch": 0.28184728180743535, "grad_norm": 0.542822003364563, "learning_rate": 2.5280567983821768e-05, "loss": 0.0987, "step": 15915 }, { "epoch": 0.2818649913444638, "grad_norm": 0.801112174987793, "learning_rate": 2.5279941454679948e-05, "loss": 0.0979, "step": 15916 }, { "epoch": 0.2818827008814922, "grad_norm": 0.7244669198989868, "learning_rate": 2.5279314891718155e-05, "loss": 0.0733, "step": 15917 }, { "epoch": 0.28190041041852065, "grad_norm": 0.9707888960838318, "learning_rate": 2.5278688294938457e-05, "loss": 0.091, "step": 15918 }, { "epoch": 0.28191811995554905, "grad_norm": 0.7996183633804321, "learning_rate": 2.527806166434292e-05, "loss": 0.1109, "step": 15919 }, { "epoch": 0.2819358294925775, "grad_norm": 0.5221244096755981, "learning_rate": 2.5277434999933595e-05, "loss": 0.0969, "step": 15920 }, { "epoch": 0.2819535390296059, "grad_norm": 0.7823978066444397, "learning_rate": 2.527680830171255e-05, "loss": 0.1133, "step": 15921 }, { "epoch": 0.28197124856663436, "grad_norm": 0.7522883415222168, "learning_rate": 2.5276181569681843e-05, "loss": 0.1283, "step": 15922 }, { "epoch": 0.28198895810366276, "grad_norm": 0.9591156244277954, "learning_rate": 2.527555480384354e-05, "loss": 0.1001, "step": 15923 }, { "epoch": 0.2820066676406912, "grad_norm": 1.089818000793457, "learning_rate": 2.52749280041997e-05, "loss": 0.1016, "step": 15924 }, { "epoch": 0.2820243771777196, "grad_norm": 0.6499664187431335, "learning_rate": 2.5274301170752385e-05, "loss": 0.0509, "step": 15925 }, { "epoch": 0.28204208671474806, "grad_norm": 0.8159813284873962, "learning_rate": 2.5273674303503662e-05, "loss": 0.09, "step": 15926 }, { "epoch": 0.28205979625177646, "grad_norm": 0.6506341695785522, "learning_rate": 2.5273047402455584e-05, "loss": 0.0701, "step": 15927 }, { "epoch": 0.2820775057888049, "grad_norm": 0.9850216507911682, "learning_rate": 2.5272420467610224e-05, "loss": 0.0814, "step": 15928 }, { "epoch": 0.28209521532583337, "grad_norm": 0.6028852462768555, "learning_rate": 2.527179349896964e-05, "loss": 0.0735, "step": 15929 }, { "epoch": 0.28211292486286177, "grad_norm": 0.6150683760643005, "learning_rate": 2.5271166496535894e-05, "loss": 0.0861, "step": 15930 }, { "epoch": 0.2821306343998902, "grad_norm": 1.0363203287124634, "learning_rate": 2.5270539460311044e-05, "loss": 0.1427, "step": 15931 }, { "epoch": 0.2821483439369186, "grad_norm": 0.5965062379837036, "learning_rate": 2.5269912390297162e-05, "loss": 0.0812, "step": 15932 }, { "epoch": 0.2821660534739471, "grad_norm": 0.8464305996894836, "learning_rate": 2.526928528649631e-05, "loss": 0.1392, "step": 15933 }, { "epoch": 0.2821837630109755, "grad_norm": 0.7396256923675537, "learning_rate": 2.526865814891054e-05, "loss": 0.101, "step": 15934 }, { "epoch": 0.28220147254800393, "grad_norm": 0.7673283815383911, "learning_rate": 2.526803097754193e-05, "loss": 0.1045, "step": 15935 }, { "epoch": 0.2822191820850323, "grad_norm": 0.8244577646255493, "learning_rate": 2.526740377239253e-05, "loss": 0.1046, "step": 15936 }, { "epoch": 0.2822368916220608, "grad_norm": 0.5659812688827515, "learning_rate": 2.526677653346442e-05, "loss": 0.0803, "step": 15937 }, { "epoch": 0.2822546011590892, "grad_norm": 0.7409698367118835, "learning_rate": 2.5266149260759644e-05, "loss": 0.0995, "step": 15938 }, { "epoch": 0.28227231069611763, "grad_norm": 0.9282556176185608, "learning_rate": 2.526552195428028e-05, "loss": 0.0785, "step": 15939 }, { "epoch": 0.28229002023314603, "grad_norm": 0.757337749004364, "learning_rate": 2.5264894614028386e-05, "loss": 0.0977, "step": 15940 }, { "epoch": 0.2823077297701745, "grad_norm": 0.7665308713912964, "learning_rate": 2.5264267240006028e-05, "loss": 0.0844, "step": 15941 }, { "epoch": 0.2823254393072029, "grad_norm": 0.5887478590011597, "learning_rate": 2.526363983221527e-05, "loss": 0.1018, "step": 15942 }, { "epoch": 0.28234314884423134, "grad_norm": 0.8553300499916077, "learning_rate": 2.5263012390658173e-05, "loss": 0.1249, "step": 15943 }, { "epoch": 0.2823608583812598, "grad_norm": 1.3290709257125854, "learning_rate": 2.5262384915336803e-05, "loss": 0.1013, "step": 15944 }, { "epoch": 0.2823785679182882, "grad_norm": 1.233703374862671, "learning_rate": 2.5261757406253226e-05, "loss": 0.1216, "step": 15945 }, { "epoch": 0.28239627745531665, "grad_norm": 0.7437189221382141, "learning_rate": 2.5261129863409503e-05, "loss": 0.0928, "step": 15946 }, { "epoch": 0.28241398699234505, "grad_norm": 0.7881909608840942, "learning_rate": 2.5260502286807703e-05, "loss": 0.0884, "step": 15947 }, { "epoch": 0.2824316965293735, "grad_norm": 0.8472310304641724, "learning_rate": 2.5259874676449884e-05, "loss": 0.084, "step": 15948 }, { "epoch": 0.2824494060664019, "grad_norm": 0.44607260823249817, "learning_rate": 2.525924703233812e-05, "loss": 0.0772, "step": 15949 }, { "epoch": 0.28246711560343035, "grad_norm": 0.9162417650222778, "learning_rate": 2.525861935447447e-05, "loss": 0.1178, "step": 15950 }, { "epoch": 0.28248482514045875, "grad_norm": 0.7849816083908081, "learning_rate": 2.5257991642861e-05, "loss": 0.1024, "step": 15951 }, { "epoch": 0.2825025346774872, "grad_norm": 0.7159022688865662, "learning_rate": 2.5257363897499772e-05, "loss": 0.1344, "step": 15952 }, { "epoch": 0.2825202442145156, "grad_norm": 1.9392461776733398, "learning_rate": 2.5256736118392858e-05, "loss": 0.0841, "step": 15953 }, { "epoch": 0.28253795375154406, "grad_norm": 0.6949992775917053, "learning_rate": 2.5256108305542317e-05, "loss": 0.0735, "step": 15954 }, { "epoch": 0.28255566328857246, "grad_norm": 1.121387004852295, "learning_rate": 2.5255480458950218e-05, "loss": 0.0848, "step": 15955 }, { "epoch": 0.2825733728256009, "grad_norm": 1.3645395040512085, "learning_rate": 2.5254852578618628e-05, "loss": 0.1205, "step": 15956 }, { "epoch": 0.2825910823626293, "grad_norm": 1.444750428199768, "learning_rate": 2.5254224664549608e-05, "loss": 0.1388, "step": 15957 }, { "epoch": 0.28260879189965776, "grad_norm": 1.2306318283081055, "learning_rate": 2.5253596716745227e-05, "loss": 0.1119, "step": 15958 }, { "epoch": 0.2826265014366862, "grad_norm": 0.5507181286811829, "learning_rate": 2.5252968735207545e-05, "loss": 0.1575, "step": 15959 }, { "epoch": 0.2826442109737146, "grad_norm": 0.8972038626670837, "learning_rate": 2.525234071993864e-05, "loss": 0.1165, "step": 15960 }, { "epoch": 0.28266192051074307, "grad_norm": 0.6938641667366028, "learning_rate": 2.5251712670940564e-05, "loss": 0.1116, "step": 15961 }, { "epoch": 0.28267963004777147, "grad_norm": 0.600999653339386, "learning_rate": 2.5251084588215395e-05, "loss": 0.0648, "step": 15962 }, { "epoch": 0.2826973395847999, "grad_norm": 0.8574803471565247, "learning_rate": 2.525045647176519e-05, "loss": 0.1001, "step": 15963 }, { "epoch": 0.2827150491218283, "grad_norm": 1.074336290359497, "learning_rate": 2.5249828321592027e-05, "loss": 0.1082, "step": 15964 }, { "epoch": 0.2827327586588568, "grad_norm": 0.5080106258392334, "learning_rate": 2.524920013769796e-05, "loss": 0.0627, "step": 15965 }, { "epoch": 0.2827504681958852, "grad_norm": 0.7810705304145813, "learning_rate": 2.5248571920085065e-05, "loss": 0.091, "step": 15966 }, { "epoch": 0.28276817773291363, "grad_norm": 0.7724252343177795, "learning_rate": 2.5247943668755402e-05, "loss": 0.089, "step": 15967 }, { "epoch": 0.282785887269942, "grad_norm": 0.7156530618667603, "learning_rate": 2.524731538371104e-05, "loss": 0.0848, "step": 15968 }, { "epoch": 0.2828035968069705, "grad_norm": 0.9778617024421692, "learning_rate": 2.5246687064954052e-05, "loss": 0.0964, "step": 15969 }, { "epoch": 0.2828213063439989, "grad_norm": 0.6561960577964783, "learning_rate": 2.5246058712486494e-05, "loss": 0.0462, "step": 15970 }, { "epoch": 0.28283901588102733, "grad_norm": 1.0848829746246338, "learning_rate": 2.5245430326310443e-05, "loss": 0.1065, "step": 15971 }, { "epoch": 0.28285672541805573, "grad_norm": 0.8126159310340881, "learning_rate": 2.524480190642796e-05, "loss": 0.0936, "step": 15972 }, { "epoch": 0.2828744349550842, "grad_norm": 0.8559170961380005, "learning_rate": 2.5244173452841118e-05, "loss": 0.0855, "step": 15973 }, { "epoch": 0.28289214449211264, "grad_norm": 0.8204278945922852, "learning_rate": 2.524354496555198e-05, "loss": 0.0764, "step": 15974 }, { "epoch": 0.28290985402914104, "grad_norm": 1.1867990493774414, "learning_rate": 2.5242916444562615e-05, "loss": 0.1161, "step": 15975 }, { "epoch": 0.2829275635661695, "grad_norm": 0.7248724102973938, "learning_rate": 2.524228788987509e-05, "loss": 0.1004, "step": 15976 }, { "epoch": 0.2829452731031979, "grad_norm": 0.6817300915718079, "learning_rate": 2.5241659301491475e-05, "loss": 0.0649, "step": 15977 }, { "epoch": 0.28296298264022635, "grad_norm": 0.9044510126113892, "learning_rate": 2.5241030679413834e-05, "loss": 0.0872, "step": 15978 }, { "epoch": 0.28298069217725474, "grad_norm": 0.6192435026168823, "learning_rate": 2.524040202364424e-05, "loss": 0.0892, "step": 15979 }, { "epoch": 0.2829984017142832, "grad_norm": 1.1101312637329102, "learning_rate": 2.5239773334184762e-05, "loss": 0.1028, "step": 15980 }, { "epoch": 0.2830161112513116, "grad_norm": 0.7967236042022705, "learning_rate": 2.5239144611037463e-05, "loss": 0.079, "step": 15981 }, { "epoch": 0.28303382078834005, "grad_norm": 0.6104417443275452, "learning_rate": 2.5238515854204413e-05, "loss": 0.0937, "step": 15982 }, { "epoch": 0.28305153032536845, "grad_norm": 1.0177206993103027, "learning_rate": 2.5237887063687684e-05, "loss": 0.0923, "step": 15983 }, { "epoch": 0.2830692398623969, "grad_norm": 1.085796594619751, "learning_rate": 2.5237258239489338e-05, "loss": 0.1264, "step": 15984 }, { "epoch": 0.2830869493994253, "grad_norm": 0.5373337268829346, "learning_rate": 2.523662938161145e-05, "loss": 0.0652, "step": 15985 }, { "epoch": 0.28310465893645376, "grad_norm": 0.9107330441474915, "learning_rate": 2.5236000490056086e-05, "loss": 0.0764, "step": 15986 }, { "epoch": 0.28312236847348216, "grad_norm": 0.9793872237205505, "learning_rate": 2.5235371564825316e-05, "loss": 0.0843, "step": 15987 }, { "epoch": 0.2831400780105106, "grad_norm": 0.5250181555747986, "learning_rate": 2.523474260592121e-05, "loss": 0.0682, "step": 15988 }, { "epoch": 0.28315778754753906, "grad_norm": 0.8779169321060181, "learning_rate": 2.5234113613345832e-05, "loss": 0.1204, "step": 15989 }, { "epoch": 0.28317549708456746, "grad_norm": 1.3730443716049194, "learning_rate": 2.5233484587101258e-05, "loss": 0.0939, "step": 15990 }, { "epoch": 0.2831932066215959, "grad_norm": 0.7145947813987732, "learning_rate": 2.5232855527189556e-05, "loss": 0.0754, "step": 15991 }, { "epoch": 0.2832109161586243, "grad_norm": 0.928239643573761, "learning_rate": 2.5232226433612795e-05, "loss": 0.1021, "step": 15992 }, { "epoch": 0.28322862569565277, "grad_norm": 0.837875485420227, "learning_rate": 2.5231597306373043e-05, "loss": 0.1102, "step": 15993 }, { "epoch": 0.28324633523268117, "grad_norm": 0.8065756559371948, "learning_rate": 2.523096814547237e-05, "loss": 0.1502, "step": 15994 }, { "epoch": 0.2832640447697096, "grad_norm": 0.4398072063922882, "learning_rate": 2.5230338950912843e-05, "loss": 0.1239, "step": 15995 }, { "epoch": 0.283281754306738, "grad_norm": 0.7445842623710632, "learning_rate": 2.5229709722696542e-05, "loss": 0.1105, "step": 15996 }, { "epoch": 0.2832994638437665, "grad_norm": 0.9986787438392639, "learning_rate": 2.5229080460825528e-05, "loss": 0.0926, "step": 15997 }, { "epoch": 0.2833171733807949, "grad_norm": 0.5942639112472534, "learning_rate": 2.5228451165301875e-05, "loss": 0.0976, "step": 15998 }, { "epoch": 0.2833348829178233, "grad_norm": 0.8711373209953308, "learning_rate": 2.5227821836127652e-05, "loss": 0.0902, "step": 15999 }, { "epoch": 0.2833525924548517, "grad_norm": 0.7353549003601074, "learning_rate": 2.5227192473304927e-05, "loss": 0.0978, "step": 16000 }, { "epoch": 0.2833703019918802, "grad_norm": 0.8489319086074829, "learning_rate": 2.5226563076835777e-05, "loss": 0.0997, "step": 16001 }, { "epoch": 0.2833880115289086, "grad_norm": 0.761333703994751, "learning_rate": 2.5225933646722265e-05, "loss": 0.0998, "step": 16002 }, { "epoch": 0.28340572106593703, "grad_norm": 0.7163560390472412, "learning_rate": 2.522530418296647e-05, "loss": 0.0756, "step": 16003 }, { "epoch": 0.2834234306029655, "grad_norm": 0.9393923282623291, "learning_rate": 2.522467468557045e-05, "loss": 0.0989, "step": 16004 }, { "epoch": 0.2834411401399939, "grad_norm": 1.33279550075531, "learning_rate": 2.522404515453629e-05, "loss": 0.1024, "step": 16005 }, { "epoch": 0.28345884967702234, "grad_norm": 0.5400334596633911, "learning_rate": 2.522341558986606e-05, "loss": 0.105, "step": 16006 }, { "epoch": 0.28347655921405074, "grad_norm": 0.6716694831848145, "learning_rate": 2.5222785991561824e-05, "loss": 0.0764, "step": 16007 }, { "epoch": 0.2834942687510792, "grad_norm": 0.7773280739784241, "learning_rate": 2.5222156359625653e-05, "loss": 0.0826, "step": 16008 }, { "epoch": 0.2835119782881076, "grad_norm": 0.8120128512382507, "learning_rate": 2.522152669405962e-05, "loss": 0.0911, "step": 16009 }, { "epoch": 0.28352968782513605, "grad_norm": 0.9671022295951843, "learning_rate": 2.5220896994865804e-05, "loss": 0.0809, "step": 16010 }, { "epoch": 0.28354739736216444, "grad_norm": 1.0942543745040894, "learning_rate": 2.5220267262046267e-05, "loss": 0.1063, "step": 16011 }, { "epoch": 0.2835651068991929, "grad_norm": 1.2165794372558594, "learning_rate": 2.5219637495603087e-05, "loss": 0.0991, "step": 16012 }, { "epoch": 0.2835828164362213, "grad_norm": 0.9286420941352844, "learning_rate": 2.521900769553833e-05, "loss": 0.0816, "step": 16013 }, { "epoch": 0.28360052597324975, "grad_norm": 0.9518773555755615, "learning_rate": 2.5218377861854075e-05, "loss": 0.1103, "step": 16014 }, { "epoch": 0.28361823551027815, "grad_norm": 0.9065068960189819, "learning_rate": 2.5217747994552384e-05, "loss": 0.1093, "step": 16015 }, { "epoch": 0.2836359450473066, "grad_norm": 0.8444250226020813, "learning_rate": 2.521711809363534e-05, "loss": 0.1488, "step": 16016 }, { "epoch": 0.283653654584335, "grad_norm": 0.8946512937545776, "learning_rate": 2.5216488159105014e-05, "loss": 0.1062, "step": 16017 }, { "epoch": 0.28367136412136346, "grad_norm": 0.7853078246116638, "learning_rate": 2.5215858190963468e-05, "loss": 0.0716, "step": 16018 }, { "epoch": 0.2836890736583919, "grad_norm": 0.7175232172012329, "learning_rate": 2.5215228189212787e-05, "loss": 0.1064, "step": 16019 }, { "epoch": 0.2837067831954203, "grad_norm": 1.0853407382965088, "learning_rate": 2.5214598153855043e-05, "loss": 0.1154, "step": 16020 }, { "epoch": 0.28372449273244876, "grad_norm": 0.8148348331451416, "learning_rate": 2.521396808489229e-05, "loss": 0.0831, "step": 16021 }, { "epoch": 0.28374220226947716, "grad_norm": 1.011671543121338, "learning_rate": 2.521333798232663e-05, "loss": 0.134, "step": 16022 }, { "epoch": 0.2837599118065056, "grad_norm": 0.6298850774765015, "learning_rate": 2.5212707846160113e-05, "loss": 0.0697, "step": 16023 }, { "epoch": 0.283777621343534, "grad_norm": 0.6650575399398804, "learning_rate": 2.5212077676394822e-05, "loss": 0.1018, "step": 16024 }, { "epoch": 0.28379533088056247, "grad_norm": 0.9893573522567749, "learning_rate": 2.5211447473032826e-05, "loss": 0.0877, "step": 16025 }, { "epoch": 0.28381304041759087, "grad_norm": 0.548183023929596, "learning_rate": 2.52108172360762e-05, "loss": 0.0935, "step": 16026 }, { "epoch": 0.2838307499546193, "grad_norm": 0.609120786190033, "learning_rate": 2.5210186965527022e-05, "loss": 0.0747, "step": 16027 }, { "epoch": 0.2838484594916477, "grad_norm": 0.6164608597755432, "learning_rate": 2.520955666138736e-05, "loss": 0.0767, "step": 16028 }, { "epoch": 0.2838661690286762, "grad_norm": 0.791888952255249, "learning_rate": 2.5208926323659295e-05, "loss": 0.0895, "step": 16029 }, { "epoch": 0.2838838785657046, "grad_norm": 0.7069084048271179, "learning_rate": 2.5208295952344886e-05, "loss": 0.0863, "step": 16030 }, { "epoch": 0.283901588102733, "grad_norm": 0.6920512318611145, "learning_rate": 2.5207665547446217e-05, "loss": 0.0754, "step": 16031 }, { "epoch": 0.2839192976397614, "grad_norm": 0.7124485969543457, "learning_rate": 2.5207035108965364e-05, "loss": 0.0886, "step": 16032 }, { "epoch": 0.2839370071767899, "grad_norm": 0.9846322536468506, "learning_rate": 2.52064046369044e-05, "loss": 0.1126, "step": 16033 }, { "epoch": 0.28395471671381833, "grad_norm": 0.7266543507575989, "learning_rate": 2.5205774131265394e-05, "loss": 0.0905, "step": 16034 }, { "epoch": 0.28397242625084673, "grad_norm": 0.2859644293785095, "learning_rate": 2.5205143592050424e-05, "loss": 0.1168, "step": 16035 }, { "epoch": 0.2839901357878752, "grad_norm": 0.5647198557853699, "learning_rate": 2.520451301926156e-05, "loss": 0.1167, "step": 16036 }, { "epoch": 0.2840078453249036, "grad_norm": 0.6404585838317871, "learning_rate": 2.5203882412900886e-05, "loss": 0.1027, "step": 16037 }, { "epoch": 0.28402555486193204, "grad_norm": 0.692866861820221, "learning_rate": 2.520325177297047e-05, "loss": 0.0887, "step": 16038 }, { "epoch": 0.28404326439896044, "grad_norm": 0.5857863426208496, "learning_rate": 2.5202621099472384e-05, "loss": 0.0768, "step": 16039 }, { "epoch": 0.2840609739359889, "grad_norm": 0.6794676780700684, "learning_rate": 2.5201990392408707e-05, "loss": 0.1016, "step": 16040 }, { "epoch": 0.2840786834730173, "grad_norm": 0.647562563419342, "learning_rate": 2.5201359651781512e-05, "loss": 0.096, "step": 16041 }, { "epoch": 0.28409639301004574, "grad_norm": 0.7377432584762573, "learning_rate": 2.5200728877592884e-05, "loss": 0.1144, "step": 16042 }, { "epoch": 0.28411410254707414, "grad_norm": 0.7716846466064453, "learning_rate": 2.520009806984488e-05, "loss": 0.1242, "step": 16043 }, { "epoch": 0.2841318120841026, "grad_norm": 0.8087106347084045, "learning_rate": 2.519946722853959e-05, "loss": 0.0744, "step": 16044 }, { "epoch": 0.284149521621131, "grad_norm": 0.4808206558227539, "learning_rate": 2.5198836353679087e-05, "loss": 0.0595, "step": 16045 }, { "epoch": 0.28416723115815945, "grad_norm": 0.7634971141815186, "learning_rate": 2.5198205445265436e-05, "loss": 0.0667, "step": 16046 }, { "epoch": 0.28418494069518785, "grad_norm": 0.5578824281692505, "learning_rate": 2.519757450330073e-05, "loss": 0.1014, "step": 16047 }, { "epoch": 0.2842026502322163, "grad_norm": 0.993192732334137, "learning_rate": 2.5196943527787026e-05, "loss": 0.1347, "step": 16048 }, { "epoch": 0.28422035976924476, "grad_norm": 1.1601886749267578, "learning_rate": 2.5196312518726417e-05, "loss": 0.1295, "step": 16049 }, { "epoch": 0.28423806930627316, "grad_norm": 0.6439762115478516, "learning_rate": 2.519568147612097e-05, "loss": 0.1479, "step": 16050 }, { "epoch": 0.2842557788433016, "grad_norm": 0.5627419948577881, "learning_rate": 2.5195050399972758e-05, "loss": 0.0899, "step": 16051 }, { "epoch": 0.28427348838033, "grad_norm": 1.0312496423721313, "learning_rate": 2.5194419290283862e-05, "loss": 0.0775, "step": 16052 }, { "epoch": 0.28429119791735846, "grad_norm": 0.9524479508399963, "learning_rate": 2.5193788147056358e-05, "loss": 0.1023, "step": 16053 }, { "epoch": 0.28430890745438686, "grad_norm": 1.1800785064697266, "learning_rate": 2.519315697029233e-05, "loss": 0.102, "step": 16054 }, { "epoch": 0.2843266169914153, "grad_norm": 0.6760833263397217, "learning_rate": 2.5192525759993832e-05, "loss": 0.1488, "step": 16055 }, { "epoch": 0.2843443265284437, "grad_norm": 0.8416310548782349, "learning_rate": 2.5191894516162964e-05, "loss": 0.1237, "step": 16056 }, { "epoch": 0.28436203606547217, "grad_norm": 0.7098459005355835, "learning_rate": 2.5191263238801792e-05, "loss": 0.1257, "step": 16057 }, { "epoch": 0.28437974560250057, "grad_norm": 0.7688266634941101, "learning_rate": 2.5190631927912397e-05, "loss": 0.1186, "step": 16058 }, { "epoch": 0.284397455139529, "grad_norm": 1.0064575672149658, "learning_rate": 2.519000058349685e-05, "loss": 0.1004, "step": 16059 }, { "epoch": 0.2844151646765574, "grad_norm": 0.6919111609458923, "learning_rate": 2.518936920555724e-05, "loss": 0.1201, "step": 16060 }, { "epoch": 0.2844328742135859, "grad_norm": 0.5257853269577026, "learning_rate": 2.5188737794095627e-05, "loss": 0.0592, "step": 16061 }, { "epoch": 0.28445058375061427, "grad_norm": 0.7357519268989563, "learning_rate": 2.51881063491141e-05, "loss": 0.1108, "step": 16062 }, { "epoch": 0.2844682932876427, "grad_norm": 0.7627390623092651, "learning_rate": 2.5187474870614735e-05, "loss": 0.0803, "step": 16063 }, { "epoch": 0.2844860028246712, "grad_norm": 0.7499142289161682, "learning_rate": 2.5186843358599603e-05, "loss": 0.0854, "step": 16064 }, { "epoch": 0.2845037123616996, "grad_norm": 0.6381387114524841, "learning_rate": 2.518621181307079e-05, "loss": 0.1114, "step": 16065 }, { "epoch": 0.28452142189872803, "grad_norm": 0.530546247959137, "learning_rate": 2.5185580234030372e-05, "loss": 0.0714, "step": 16066 }, { "epoch": 0.28453913143575643, "grad_norm": 0.8904288411140442, "learning_rate": 2.518494862148042e-05, "loss": 0.0971, "step": 16067 }, { "epoch": 0.2845568409727849, "grad_norm": 0.7777676582336426, "learning_rate": 2.5184316975423026e-05, "loss": 0.0995, "step": 16068 }, { "epoch": 0.2845745505098133, "grad_norm": 0.6502843499183655, "learning_rate": 2.5183685295860253e-05, "loss": 0.0827, "step": 16069 }, { "epoch": 0.28459226004684174, "grad_norm": 0.9253166913986206, "learning_rate": 2.5183053582794186e-05, "loss": 0.0986, "step": 16070 }, { "epoch": 0.28460996958387014, "grad_norm": 0.602120578289032, "learning_rate": 2.51824218362269e-05, "loss": 0.0778, "step": 16071 }, { "epoch": 0.2846276791208986, "grad_norm": 0.6936535239219666, "learning_rate": 2.5181790056160486e-05, "loss": 0.1002, "step": 16072 }, { "epoch": 0.284645388657927, "grad_norm": 1.0042136907577515, "learning_rate": 2.5181158242597003e-05, "loss": 0.0738, "step": 16073 }, { "epoch": 0.28466309819495544, "grad_norm": 0.6469826698303223, "learning_rate": 2.5180526395538543e-05, "loss": 0.0852, "step": 16074 }, { "epoch": 0.28468080773198384, "grad_norm": 0.8580160737037659, "learning_rate": 2.5179894514987178e-05, "loss": 0.0768, "step": 16075 }, { "epoch": 0.2846985172690123, "grad_norm": 0.8592405915260315, "learning_rate": 2.5179262600944994e-05, "loss": 0.0943, "step": 16076 }, { "epoch": 0.2847162268060407, "grad_norm": 0.7597469687461853, "learning_rate": 2.5178630653414062e-05, "loss": 0.0915, "step": 16077 }, { "epoch": 0.28473393634306915, "grad_norm": 0.546674907207489, "learning_rate": 2.5177998672396464e-05, "loss": 0.0727, "step": 16078 }, { "epoch": 0.2847516458800976, "grad_norm": 0.8881522417068481, "learning_rate": 2.5177366657894286e-05, "loss": 0.1049, "step": 16079 }, { "epoch": 0.284769355417126, "grad_norm": 0.6199540495872498, "learning_rate": 2.5176734609909596e-05, "loss": 0.089, "step": 16080 }, { "epoch": 0.28478706495415446, "grad_norm": 0.5803489685058594, "learning_rate": 2.5176102528444476e-05, "loss": 0.0992, "step": 16081 }, { "epoch": 0.28480477449118285, "grad_norm": 0.703930675983429, "learning_rate": 2.517547041350101e-05, "loss": 0.0683, "step": 16082 }, { "epoch": 0.2848224840282113, "grad_norm": 0.5433782935142517, "learning_rate": 2.517483826508128e-05, "loss": 0.0904, "step": 16083 }, { "epoch": 0.2848401935652397, "grad_norm": 0.9085038900375366, "learning_rate": 2.5174206083187357e-05, "loss": 0.1268, "step": 16084 }, { "epoch": 0.28485790310226816, "grad_norm": 0.3679443299770355, "learning_rate": 2.5173573867821326e-05, "loss": 0.0734, "step": 16085 }, { "epoch": 0.28487561263929656, "grad_norm": 0.5501657724380493, "learning_rate": 2.5172941618985267e-05, "loss": 0.0846, "step": 16086 }, { "epoch": 0.284893322176325, "grad_norm": 0.7622559666633606, "learning_rate": 2.5172309336681255e-05, "loss": 0.0925, "step": 16087 }, { "epoch": 0.2849110317133534, "grad_norm": 1.0661945343017578, "learning_rate": 2.517167702091138e-05, "loss": 0.1017, "step": 16088 }, { "epoch": 0.28492874125038187, "grad_norm": 1.000023603439331, "learning_rate": 2.517104467167771e-05, "loss": 0.1225, "step": 16089 }, { "epoch": 0.28494645078741027, "grad_norm": 0.4188677966594696, "learning_rate": 2.517041228898234e-05, "loss": 0.0667, "step": 16090 }, { "epoch": 0.2849641603244387, "grad_norm": 0.6493629813194275, "learning_rate": 2.5169779872827337e-05, "loss": 0.1078, "step": 16091 }, { "epoch": 0.2849818698614672, "grad_norm": 0.7908461093902588, "learning_rate": 2.5169147423214785e-05, "loss": 0.0834, "step": 16092 }, { "epoch": 0.2849995793984956, "grad_norm": 0.6783996820449829, "learning_rate": 2.516851494014677e-05, "loss": 0.1412, "step": 16093 }, { "epoch": 0.285017288935524, "grad_norm": 0.8181930184364319, "learning_rate": 2.5167882423625366e-05, "loss": 0.0884, "step": 16094 }, { "epoch": 0.2850349984725524, "grad_norm": 0.8945848941802979, "learning_rate": 2.516724987365266e-05, "loss": 0.1075, "step": 16095 }, { "epoch": 0.2850527080095809, "grad_norm": 0.7516106367111206, "learning_rate": 2.516661729023073e-05, "loss": 0.0887, "step": 16096 }, { "epoch": 0.2850704175466093, "grad_norm": 0.49265730381011963, "learning_rate": 2.5165984673361658e-05, "loss": 0.0827, "step": 16097 }, { "epoch": 0.28508812708363773, "grad_norm": 1.2573541402816772, "learning_rate": 2.5165352023047526e-05, "loss": 0.1352, "step": 16098 }, { "epoch": 0.28510583662066613, "grad_norm": 1.0045760869979858, "learning_rate": 2.5164719339290406e-05, "loss": 0.1395, "step": 16099 }, { "epoch": 0.2851235461576946, "grad_norm": 0.4824232757091522, "learning_rate": 2.5164086622092398e-05, "loss": 0.111, "step": 16100 }, { "epoch": 0.285141255694723, "grad_norm": 0.6226267218589783, "learning_rate": 2.5163453871455565e-05, "loss": 0.0952, "step": 16101 }, { "epoch": 0.28515896523175144, "grad_norm": 0.7725018858909607, "learning_rate": 2.5162821087382002e-05, "loss": 0.0993, "step": 16102 }, { "epoch": 0.28517667476877984, "grad_norm": 0.7927052974700928, "learning_rate": 2.5162188269873786e-05, "loss": 0.1043, "step": 16103 }, { "epoch": 0.2851943843058083, "grad_norm": 0.6094503402709961, "learning_rate": 2.5161555418932992e-05, "loss": 0.089, "step": 16104 }, { "epoch": 0.2852120938428367, "grad_norm": 1.6491950750350952, "learning_rate": 2.5160922534561712e-05, "loss": 0.0548, "step": 16105 }, { "epoch": 0.28522980337986514, "grad_norm": 0.7574021816253662, "learning_rate": 2.5160289616762026e-05, "loss": 0.1209, "step": 16106 }, { "epoch": 0.2852475129168936, "grad_norm": 1.248522162437439, "learning_rate": 2.5159656665536015e-05, "loss": 0.1009, "step": 16107 }, { "epoch": 0.285265222453922, "grad_norm": 0.6658821105957031, "learning_rate": 2.515902368088576e-05, "loss": 0.0681, "step": 16108 }, { "epoch": 0.28528293199095045, "grad_norm": 0.8272596001625061, "learning_rate": 2.5158390662813346e-05, "loss": 0.0682, "step": 16109 }, { "epoch": 0.28530064152797885, "grad_norm": 0.6425071358680725, "learning_rate": 2.515775761132085e-05, "loss": 0.123, "step": 16110 }, { "epoch": 0.2853183510650073, "grad_norm": 0.9763413071632385, "learning_rate": 2.5157124526410366e-05, "loss": 0.1035, "step": 16111 }, { "epoch": 0.2853360606020357, "grad_norm": 0.8165475130081177, "learning_rate": 2.515649140808396e-05, "loss": 0.096, "step": 16112 }, { "epoch": 0.28535377013906416, "grad_norm": 1.0332398414611816, "learning_rate": 2.5155858256343733e-05, "loss": 0.0713, "step": 16113 }, { "epoch": 0.28537147967609255, "grad_norm": 1.4868091344833374, "learning_rate": 2.5155225071191755e-05, "loss": 0.1862, "step": 16114 }, { "epoch": 0.285389189213121, "grad_norm": 0.6925466656684875, "learning_rate": 2.5154591852630114e-05, "loss": 0.0943, "step": 16115 }, { "epoch": 0.2854068987501494, "grad_norm": 0.6906957030296326, "learning_rate": 2.5153958600660897e-05, "loss": 0.0747, "step": 16116 }, { "epoch": 0.28542460828717786, "grad_norm": 0.6393681168556213, "learning_rate": 2.515332531528618e-05, "loss": 0.1081, "step": 16117 }, { "epoch": 0.28544231782420626, "grad_norm": 0.7038633227348328, "learning_rate": 2.515269199650805e-05, "loss": 0.1196, "step": 16118 }, { "epoch": 0.2854600273612347, "grad_norm": 0.8158683180809021, "learning_rate": 2.515205864432859e-05, "loss": 0.0926, "step": 16119 }, { "epoch": 0.2854777368982631, "grad_norm": 1.0212985277175903, "learning_rate": 2.5151425258749883e-05, "loss": 0.0714, "step": 16120 }, { "epoch": 0.28549544643529157, "grad_norm": 0.622666597366333, "learning_rate": 2.5150791839774012e-05, "loss": 0.1057, "step": 16121 }, { "epoch": 0.28551315597232, "grad_norm": 0.7957166433334351, "learning_rate": 2.515015838740307e-05, "loss": 0.0809, "step": 16122 }, { "epoch": 0.2855308655093484, "grad_norm": 0.8590344786643982, "learning_rate": 2.5149524901639124e-05, "loss": 0.1168, "step": 16123 }, { "epoch": 0.2855485750463769, "grad_norm": 1.025643229484558, "learning_rate": 2.5148891382484274e-05, "loss": 0.0923, "step": 16124 }, { "epoch": 0.28556628458340527, "grad_norm": 0.7732301950454712, "learning_rate": 2.5148257829940595e-05, "loss": 0.0912, "step": 16125 }, { "epoch": 0.2855839941204337, "grad_norm": 0.5657405853271484, "learning_rate": 2.5147624244010177e-05, "loss": 0.0926, "step": 16126 }, { "epoch": 0.2856017036574621, "grad_norm": 1.1159846782684326, "learning_rate": 2.5146990624695097e-05, "loss": 0.0938, "step": 16127 }, { "epoch": 0.2856194131944906, "grad_norm": 0.9596078991889954, "learning_rate": 2.5146356971997446e-05, "loss": 0.0661, "step": 16128 }, { "epoch": 0.285637122731519, "grad_norm": 1.0669132471084595, "learning_rate": 2.514572328591931e-05, "loss": 0.1078, "step": 16129 }, { "epoch": 0.28565483226854743, "grad_norm": 0.5851234197616577, "learning_rate": 2.5145089566462768e-05, "loss": 0.0915, "step": 16130 }, { "epoch": 0.28567254180557583, "grad_norm": 1.1292977333068848, "learning_rate": 2.5144455813629908e-05, "loss": 0.1141, "step": 16131 }, { "epoch": 0.2856902513426043, "grad_norm": 0.5917806029319763, "learning_rate": 2.5143822027422815e-05, "loss": 0.0852, "step": 16132 }, { "epoch": 0.2857079608796327, "grad_norm": 0.6382826566696167, "learning_rate": 2.5143188207843573e-05, "loss": 0.0819, "step": 16133 }, { "epoch": 0.28572567041666114, "grad_norm": 0.7954479455947876, "learning_rate": 2.514255435489427e-05, "loss": 0.0954, "step": 16134 }, { "epoch": 0.28574337995368954, "grad_norm": 1.0731583833694458, "learning_rate": 2.5141920468576982e-05, "loss": 0.0938, "step": 16135 }, { "epoch": 0.285761089490718, "grad_norm": 1.1668028831481934, "learning_rate": 2.514128654889381e-05, "loss": 0.0795, "step": 16136 }, { "epoch": 0.28577879902774644, "grad_norm": 0.6698933839797974, "learning_rate": 2.5140652595846824e-05, "loss": 0.0858, "step": 16137 }, { "epoch": 0.28579650856477484, "grad_norm": 0.5878874659538269, "learning_rate": 2.514001860943812e-05, "loss": 0.0911, "step": 16138 }, { "epoch": 0.2858142181018033, "grad_norm": 0.6755440831184387, "learning_rate": 2.513938458966978e-05, "loss": 0.1065, "step": 16139 }, { "epoch": 0.2858319276388317, "grad_norm": 0.5345856547355652, "learning_rate": 2.513875053654389e-05, "loss": 0.094, "step": 16140 }, { "epoch": 0.28584963717586015, "grad_norm": 0.6227820515632629, "learning_rate": 2.5138116450062537e-05, "loss": 0.0927, "step": 16141 }, { "epoch": 0.28586734671288855, "grad_norm": 0.8343332409858704, "learning_rate": 2.5137482330227804e-05, "loss": 0.077, "step": 16142 }, { "epoch": 0.285885056249917, "grad_norm": 1.0335681438446045, "learning_rate": 2.5136848177041786e-05, "loss": 0.0923, "step": 16143 }, { "epoch": 0.2859027657869454, "grad_norm": 0.9887784123420715, "learning_rate": 2.5136213990506553e-05, "loss": 0.0641, "step": 16144 }, { "epoch": 0.28592047532397386, "grad_norm": 0.8845357298851013, "learning_rate": 2.5135579770624205e-05, "loss": 0.1065, "step": 16145 }, { "epoch": 0.28593818486100225, "grad_norm": 1.210037112236023, "learning_rate": 2.5134945517396827e-05, "loss": 0.0917, "step": 16146 }, { "epoch": 0.2859558943980307, "grad_norm": 0.6410766243934631, "learning_rate": 2.5134311230826505e-05, "loss": 0.0663, "step": 16147 }, { "epoch": 0.2859736039350591, "grad_norm": 0.6262305974960327, "learning_rate": 2.5133676910915322e-05, "loss": 0.0842, "step": 16148 }, { "epoch": 0.28599131347208756, "grad_norm": 1.0912995338439941, "learning_rate": 2.5133042557665364e-05, "loss": 0.1129, "step": 16149 }, { "epoch": 0.28600902300911596, "grad_norm": 0.9719469547271729, "learning_rate": 2.5132408171078723e-05, "loss": 0.0875, "step": 16150 }, { "epoch": 0.2860267325461444, "grad_norm": 0.9315400719642639, "learning_rate": 2.5131773751157483e-05, "loss": 0.0856, "step": 16151 }, { "epoch": 0.28604444208317287, "grad_norm": 0.39440828561782837, "learning_rate": 2.5131139297903735e-05, "loss": 0.0654, "step": 16152 }, { "epoch": 0.28606215162020127, "grad_norm": 0.8518913984298706, "learning_rate": 2.5130504811319562e-05, "loss": 0.1372, "step": 16153 }, { "epoch": 0.2860798611572297, "grad_norm": 0.4178136885166168, "learning_rate": 2.512987029140705e-05, "loss": 0.1722, "step": 16154 }, { "epoch": 0.2860975706942581, "grad_norm": 0.8668466210365295, "learning_rate": 2.5129235738168294e-05, "loss": 0.119, "step": 16155 }, { "epoch": 0.2861152802312866, "grad_norm": 1.3877816200256348, "learning_rate": 2.5128601151605373e-05, "loss": 0.0883, "step": 16156 }, { "epoch": 0.28613298976831497, "grad_norm": 0.7822781801223755, "learning_rate": 2.5127966531720383e-05, "loss": 0.1211, "step": 16157 }, { "epoch": 0.2861506993053434, "grad_norm": 1.0272185802459717, "learning_rate": 2.51273318785154e-05, "loss": 0.118, "step": 16158 }, { "epoch": 0.2861684088423718, "grad_norm": 0.5511778593063354, "learning_rate": 2.512669719199253e-05, "loss": 0.0672, "step": 16159 }, { "epoch": 0.2861861183794003, "grad_norm": 1.0246855020523071, "learning_rate": 2.512606247215384e-05, "loss": 0.1195, "step": 16160 }, { "epoch": 0.2862038279164287, "grad_norm": 0.7500260472297668, "learning_rate": 2.5125427719001436e-05, "loss": 0.1063, "step": 16161 }, { "epoch": 0.28622153745345713, "grad_norm": 0.9494159817695618, "learning_rate": 2.5124792932537395e-05, "loss": 0.1028, "step": 16162 }, { "epoch": 0.28623924699048553, "grad_norm": 0.6507579684257507, "learning_rate": 2.512415811276381e-05, "loss": 0.0996, "step": 16163 }, { "epoch": 0.286256956527514, "grad_norm": 0.8142730593681335, "learning_rate": 2.5123523259682772e-05, "loss": 0.0788, "step": 16164 }, { "epoch": 0.2862746660645424, "grad_norm": 0.9110943675041199, "learning_rate": 2.512288837329636e-05, "loss": 0.1023, "step": 16165 }, { "epoch": 0.28629237560157084, "grad_norm": 0.6523608565330505, "learning_rate": 2.5122253453606675e-05, "loss": 0.1293, "step": 16166 }, { "epoch": 0.2863100851385993, "grad_norm": 0.6775309443473816, "learning_rate": 2.5121618500615794e-05, "loss": 0.0878, "step": 16167 }, { "epoch": 0.2863277946756277, "grad_norm": 0.5506477952003479, "learning_rate": 2.5120983514325818e-05, "loss": 0.0675, "step": 16168 }, { "epoch": 0.28634550421265614, "grad_norm": 1.0515481233596802, "learning_rate": 2.5120348494738828e-05, "loss": 0.0932, "step": 16169 }, { "epoch": 0.28636321374968454, "grad_norm": 0.7400676012039185, "learning_rate": 2.5119713441856914e-05, "loss": 0.0879, "step": 16170 }, { "epoch": 0.286380923286713, "grad_norm": 0.812012791633606, "learning_rate": 2.5119078355682167e-05, "loss": 0.0943, "step": 16171 }, { "epoch": 0.2863986328237414, "grad_norm": 0.7134684324264526, "learning_rate": 2.5118443236216677e-05, "loss": 0.1348, "step": 16172 }, { "epoch": 0.28641634236076985, "grad_norm": 0.7720600366592407, "learning_rate": 2.511780808346253e-05, "loss": 0.106, "step": 16173 }, { "epoch": 0.28643405189779825, "grad_norm": 0.8060240745544434, "learning_rate": 2.511717289742182e-05, "loss": 0.1354, "step": 16174 }, { "epoch": 0.2864517614348267, "grad_norm": 0.7665859460830688, "learning_rate": 2.511653767809663e-05, "loss": 0.0796, "step": 16175 }, { "epoch": 0.2864694709718551, "grad_norm": 1.0094231367111206, "learning_rate": 2.511590242548906e-05, "loss": 0.0679, "step": 16176 }, { "epoch": 0.28648718050888355, "grad_norm": 0.7730621695518494, "learning_rate": 2.5115267139601194e-05, "loss": 0.0665, "step": 16177 }, { "epoch": 0.28650489004591195, "grad_norm": 0.7743422389030457, "learning_rate": 2.5114631820435116e-05, "loss": 0.0821, "step": 16178 }, { "epoch": 0.2865225995829404, "grad_norm": 0.7385982871055603, "learning_rate": 2.5113996467992926e-05, "loss": 0.1254, "step": 16179 }, { "epoch": 0.2865403091199688, "grad_norm": 0.9783926606178284, "learning_rate": 2.511336108227671e-05, "loss": 0.0653, "step": 16180 }, { "epoch": 0.28655801865699726, "grad_norm": 0.9273384213447571, "learning_rate": 2.5112725663288563e-05, "loss": 0.098, "step": 16181 }, { "epoch": 0.2865757281940257, "grad_norm": 1.046160340309143, "learning_rate": 2.5112090211030563e-05, "loss": 0.0842, "step": 16182 }, { "epoch": 0.2865934377310541, "grad_norm": 0.7790787816047668, "learning_rate": 2.5111454725504816e-05, "loss": 0.0846, "step": 16183 }, { "epoch": 0.28661114726808257, "grad_norm": 0.9165845513343811, "learning_rate": 2.5110819206713406e-05, "loss": 0.1032, "step": 16184 }, { "epoch": 0.28662885680511097, "grad_norm": 0.6064402461051941, "learning_rate": 2.511018365465842e-05, "loss": 0.0699, "step": 16185 }, { "epoch": 0.2866465663421394, "grad_norm": 0.8282721638679504, "learning_rate": 2.5109548069341958e-05, "loss": 0.1155, "step": 16186 }, { "epoch": 0.2866642758791678, "grad_norm": 0.7041895389556885, "learning_rate": 2.51089124507661e-05, "loss": 0.095, "step": 16187 }, { "epoch": 0.28668198541619627, "grad_norm": 1.037514328956604, "learning_rate": 2.510827679893294e-05, "loss": 0.1253, "step": 16188 }, { "epoch": 0.28669969495322467, "grad_norm": 0.9099291563034058, "learning_rate": 2.5107641113844578e-05, "loss": 0.1024, "step": 16189 }, { "epoch": 0.2867174044902531, "grad_norm": 0.4769657254219055, "learning_rate": 2.510700539550309e-05, "loss": 0.0768, "step": 16190 }, { "epoch": 0.2867351140272815, "grad_norm": 0.5308960676193237, "learning_rate": 2.5106369643910587e-05, "loss": 0.0671, "step": 16191 }, { "epoch": 0.28675282356431, "grad_norm": 1.9245904684066772, "learning_rate": 2.5105733859069146e-05, "loss": 0.0829, "step": 16192 }, { "epoch": 0.2867705331013384, "grad_norm": 0.9943339228630066, "learning_rate": 2.510509804098086e-05, "loss": 0.0997, "step": 16193 }, { "epoch": 0.28678824263836683, "grad_norm": 0.7325207591056824, "learning_rate": 2.5104462189647827e-05, "loss": 0.0903, "step": 16194 }, { "epoch": 0.28680595217539523, "grad_norm": 0.6171725988388062, "learning_rate": 2.5103826305072133e-05, "loss": 0.0753, "step": 16195 }, { "epoch": 0.2868236617124237, "grad_norm": 1.1049628257751465, "learning_rate": 2.5103190387255874e-05, "loss": 0.0667, "step": 16196 }, { "epoch": 0.28684137124945214, "grad_norm": 0.4955938160419464, "learning_rate": 2.5102554436201144e-05, "loss": 0.0894, "step": 16197 }, { "epoch": 0.28685908078648054, "grad_norm": 0.6803532838821411, "learning_rate": 2.5101918451910028e-05, "loss": 0.0758, "step": 16198 }, { "epoch": 0.286876790323509, "grad_norm": 1.5945134162902832, "learning_rate": 2.5101282434384622e-05, "loss": 0.1458, "step": 16199 }, { "epoch": 0.2868944998605374, "grad_norm": 0.6973593235015869, "learning_rate": 2.5100646383627016e-05, "loss": 0.1127, "step": 16200 }, { "epoch": 0.28691220939756584, "grad_norm": 1.2096259593963623, "learning_rate": 2.510001029963931e-05, "loss": 0.0665, "step": 16201 }, { "epoch": 0.28692991893459424, "grad_norm": 0.7949479222297668, "learning_rate": 2.5099374182423586e-05, "loss": 0.0851, "step": 16202 }, { "epoch": 0.2869476284716227, "grad_norm": 0.5621536374092102, "learning_rate": 2.509873803198195e-05, "loss": 0.072, "step": 16203 }, { "epoch": 0.2869653380086511, "grad_norm": 0.9453181028366089, "learning_rate": 2.5098101848316482e-05, "loss": 0.094, "step": 16204 }, { "epoch": 0.28698304754567955, "grad_norm": 0.9835395216941833, "learning_rate": 2.509746563142928e-05, "loss": 0.0936, "step": 16205 }, { "epoch": 0.28700075708270795, "grad_norm": 0.6028376221656799, "learning_rate": 2.5096829381322435e-05, "loss": 0.0856, "step": 16206 }, { "epoch": 0.2870184666197364, "grad_norm": 0.9319700002670288, "learning_rate": 2.509619309799805e-05, "loss": 0.0775, "step": 16207 }, { "epoch": 0.2870361761567648, "grad_norm": 0.7569637894630432, "learning_rate": 2.5095556781458208e-05, "loss": 0.0839, "step": 16208 }, { "epoch": 0.28705388569379325, "grad_norm": 0.6604136228561401, "learning_rate": 2.5094920431705003e-05, "loss": 0.0803, "step": 16209 }, { "epoch": 0.28707159523082165, "grad_norm": 0.9194954633712769, "learning_rate": 2.509428404874053e-05, "loss": 0.1431, "step": 16210 }, { "epoch": 0.2870893047678501, "grad_norm": 0.9842249155044556, "learning_rate": 2.5093647632566887e-05, "loss": 0.1178, "step": 16211 }, { "epoch": 0.28710701430487856, "grad_norm": 0.6153777241706848, "learning_rate": 2.5093011183186164e-05, "loss": 0.0965, "step": 16212 }, { "epoch": 0.28712472384190696, "grad_norm": 0.9645709991455078, "learning_rate": 2.5092374700600452e-05, "loss": 0.1384, "step": 16213 }, { "epoch": 0.2871424333789354, "grad_norm": 1.0934890508651733, "learning_rate": 2.5091738184811852e-05, "loss": 0.1028, "step": 16214 }, { "epoch": 0.2871601429159638, "grad_norm": 1.4871124029159546, "learning_rate": 2.5091101635822453e-05, "loss": 0.0923, "step": 16215 }, { "epoch": 0.28717785245299227, "grad_norm": 0.78383868932724, "learning_rate": 2.5090465053634347e-05, "loss": 0.0939, "step": 16216 }, { "epoch": 0.28719556199002066, "grad_norm": 0.6046488285064697, "learning_rate": 2.5089828438249634e-05, "loss": 0.1002, "step": 16217 }, { "epoch": 0.2872132715270491, "grad_norm": 0.9206478595733643, "learning_rate": 2.5089191789670408e-05, "loss": 0.1632, "step": 16218 }, { "epoch": 0.2872309810640775, "grad_norm": 0.9322947859764099, "learning_rate": 2.5088555107898756e-05, "loss": 0.1285, "step": 16219 }, { "epoch": 0.28724869060110597, "grad_norm": 0.9367945790290833, "learning_rate": 2.5087918392936782e-05, "loss": 0.1058, "step": 16220 }, { "epoch": 0.28726640013813437, "grad_norm": 1.2135134935379028, "learning_rate": 2.5087281644786576e-05, "loss": 0.1013, "step": 16221 }, { "epoch": 0.2872841096751628, "grad_norm": 1.1247351169586182, "learning_rate": 2.5086644863450233e-05, "loss": 0.1277, "step": 16222 }, { "epoch": 0.2873018192121912, "grad_norm": 0.4341478943824768, "learning_rate": 2.5086008048929853e-05, "loss": 0.0851, "step": 16223 }, { "epoch": 0.2873195287492197, "grad_norm": 1.059572458267212, "learning_rate": 2.508537120122752e-05, "loss": 0.1581, "step": 16224 }, { "epoch": 0.2873372382862481, "grad_norm": 0.7372915148735046, "learning_rate": 2.508473432034534e-05, "loss": 0.1218, "step": 16225 }, { "epoch": 0.28735494782327653, "grad_norm": 0.5745776295661926, "learning_rate": 2.5084097406285402e-05, "loss": 0.0655, "step": 16226 }, { "epoch": 0.287372657360305, "grad_norm": 0.9809247851371765, "learning_rate": 2.5083460459049806e-05, "loss": 0.0835, "step": 16227 }, { "epoch": 0.2873903668973334, "grad_norm": 0.882511556148529, "learning_rate": 2.5082823478640642e-05, "loss": 0.1127, "step": 16228 }, { "epoch": 0.28740807643436184, "grad_norm": 0.7523553371429443, "learning_rate": 2.5082186465060008e-05, "loss": 0.0757, "step": 16229 }, { "epoch": 0.28742578597139024, "grad_norm": 0.5206900238990784, "learning_rate": 2.5081549418310004e-05, "loss": 0.0749, "step": 16230 }, { "epoch": 0.2874434955084187, "grad_norm": 0.8891714811325073, "learning_rate": 2.5080912338392724e-05, "loss": 0.0947, "step": 16231 }, { "epoch": 0.2874612050454471, "grad_norm": 0.7824245095252991, "learning_rate": 2.5080275225310255e-05, "loss": 0.1217, "step": 16232 }, { "epoch": 0.28747891458247554, "grad_norm": 0.6672790050506592, "learning_rate": 2.50796380790647e-05, "loss": 0.0878, "step": 16233 }, { "epoch": 0.28749662411950394, "grad_norm": 0.8637861013412476, "learning_rate": 2.5079000899658164e-05, "loss": 0.1091, "step": 16234 }, { "epoch": 0.2875143336565324, "grad_norm": 0.8189089894294739, "learning_rate": 2.5078363687092733e-05, "loss": 0.1354, "step": 16235 }, { "epoch": 0.2875320431935608, "grad_norm": 0.6500492095947266, "learning_rate": 2.5077726441370495e-05, "loss": 0.0678, "step": 16236 }, { "epoch": 0.28754975273058925, "grad_norm": 0.6774049997329712, "learning_rate": 2.5077089162493565e-05, "loss": 0.1105, "step": 16237 }, { "epoch": 0.28756746226761765, "grad_norm": 0.7915927767753601, "learning_rate": 2.507645185046403e-05, "loss": 0.0685, "step": 16238 }, { "epoch": 0.2875851718046461, "grad_norm": 0.8766377568244934, "learning_rate": 2.5075814505283982e-05, "loss": 0.0652, "step": 16239 }, { "epoch": 0.2876028813416745, "grad_norm": 0.6029287576675415, "learning_rate": 2.5075177126955528e-05, "loss": 0.0646, "step": 16240 }, { "epoch": 0.28762059087870295, "grad_norm": 0.6126486659049988, "learning_rate": 2.5074539715480757e-05, "loss": 0.092, "step": 16241 }, { "epoch": 0.2876383004157314, "grad_norm": 0.9321139454841614, "learning_rate": 2.5073902270861776e-05, "loss": 0.0956, "step": 16242 }, { "epoch": 0.2876560099527598, "grad_norm": 0.7018503546714783, "learning_rate": 2.507326479310067e-05, "loss": 0.0771, "step": 16243 }, { "epoch": 0.28767371948978826, "grad_norm": 0.7747140526771545, "learning_rate": 2.507262728219954e-05, "loss": 0.095, "step": 16244 }, { "epoch": 0.28769142902681666, "grad_norm": 0.9632470011711121, "learning_rate": 2.507198973816049e-05, "loss": 0.0914, "step": 16245 }, { "epoch": 0.2877091385638451, "grad_norm": 0.6101571321487427, "learning_rate": 2.507135216098561e-05, "loss": 0.0603, "step": 16246 }, { "epoch": 0.2877268481008735, "grad_norm": 1.3427053689956665, "learning_rate": 2.5070714550677e-05, "loss": 0.1213, "step": 16247 }, { "epoch": 0.28774455763790197, "grad_norm": 0.6991331577301025, "learning_rate": 2.507007690723676e-05, "loss": 0.0653, "step": 16248 }, { "epoch": 0.28776226717493036, "grad_norm": 1.030620813369751, "learning_rate": 2.506943923066698e-05, "loss": 0.1552, "step": 16249 }, { "epoch": 0.2877799767119588, "grad_norm": 0.7235233783721924, "learning_rate": 2.5068801520969768e-05, "loss": 0.0832, "step": 16250 }, { "epoch": 0.2877976862489872, "grad_norm": 1.060388445854187, "learning_rate": 2.5068163778147217e-05, "loss": 0.1208, "step": 16251 }, { "epoch": 0.28781539578601567, "grad_norm": 0.8165155053138733, "learning_rate": 2.5067526002201426e-05, "loss": 0.1009, "step": 16252 }, { "epoch": 0.28783310532304407, "grad_norm": 1.0897852182388306, "learning_rate": 2.5066888193134492e-05, "loss": 0.1185, "step": 16253 }, { "epoch": 0.2878508148600725, "grad_norm": 0.9476909637451172, "learning_rate": 2.5066250350948512e-05, "loss": 0.1239, "step": 16254 }, { "epoch": 0.2878685243971009, "grad_norm": 1.1238172054290771, "learning_rate": 2.506561247564559e-05, "loss": 0.1346, "step": 16255 }, { "epoch": 0.2878862339341294, "grad_norm": 0.9118081331253052, "learning_rate": 2.5064974567227817e-05, "loss": 0.1338, "step": 16256 }, { "epoch": 0.28790394347115783, "grad_norm": 0.7723249793052673, "learning_rate": 2.50643366256973e-05, "loss": 0.096, "step": 16257 }, { "epoch": 0.28792165300818623, "grad_norm": 1.3733558654785156, "learning_rate": 2.5063698651056133e-05, "loss": 0.1068, "step": 16258 }, { "epoch": 0.2879393625452147, "grad_norm": 0.9347304701805115, "learning_rate": 2.506306064330641e-05, "loss": 0.1063, "step": 16259 }, { "epoch": 0.2879570720822431, "grad_norm": 0.7709836363792419, "learning_rate": 2.5062422602450238e-05, "loss": 0.0947, "step": 16260 }, { "epoch": 0.28797478161927154, "grad_norm": 0.6518771648406982, "learning_rate": 2.5061784528489718e-05, "loss": 0.0588, "step": 16261 }, { "epoch": 0.28799249115629993, "grad_norm": 0.789733350276947, "learning_rate": 2.506114642142694e-05, "loss": 0.1286, "step": 16262 }, { "epoch": 0.2880102006933284, "grad_norm": 0.45309990644454956, "learning_rate": 2.506050828126401e-05, "loss": 0.1126, "step": 16263 }, { "epoch": 0.2880279102303568, "grad_norm": 1.1749415397644043, "learning_rate": 2.5059870108003025e-05, "loss": 0.0772, "step": 16264 }, { "epoch": 0.28804561976738524, "grad_norm": 1.1174010038375854, "learning_rate": 2.5059231901646085e-05, "loss": 0.1452, "step": 16265 }, { "epoch": 0.28806332930441364, "grad_norm": 0.9154521822929382, "learning_rate": 2.505859366219529e-05, "loss": 0.0922, "step": 16266 }, { "epoch": 0.2880810388414421, "grad_norm": 0.8036441802978516, "learning_rate": 2.5057955389652737e-05, "loss": 0.0975, "step": 16267 }, { "epoch": 0.2880987483784705, "grad_norm": 0.7709242701530457, "learning_rate": 2.5057317084020526e-05, "loss": 0.0948, "step": 16268 }, { "epoch": 0.28811645791549895, "grad_norm": 1.224786639213562, "learning_rate": 2.5056678745300763e-05, "loss": 0.1338, "step": 16269 }, { "epoch": 0.28813416745252735, "grad_norm": 0.7918736338615417, "learning_rate": 2.505604037349554e-05, "loss": 0.1116, "step": 16270 }, { "epoch": 0.2881518769895558, "grad_norm": 1.0000450611114502, "learning_rate": 2.5055401968606966e-05, "loss": 0.1094, "step": 16271 }, { "epoch": 0.28816958652658425, "grad_norm": 0.5377984642982483, "learning_rate": 2.5054763530637134e-05, "loss": 0.0764, "step": 16272 }, { "epoch": 0.28818729606361265, "grad_norm": 0.4227994978427887, "learning_rate": 2.5054125059588147e-05, "loss": 0.0893, "step": 16273 }, { "epoch": 0.2882050056006411, "grad_norm": 0.6342710852622986, "learning_rate": 2.505348655546211e-05, "loss": 0.0783, "step": 16274 }, { "epoch": 0.2882227151376695, "grad_norm": 0.7061178684234619, "learning_rate": 2.5052848018261112e-05, "loss": 0.0725, "step": 16275 }, { "epoch": 0.28824042467469796, "grad_norm": 0.8371241092681885, "learning_rate": 2.5052209447987267e-05, "loss": 0.1238, "step": 16276 }, { "epoch": 0.28825813421172636, "grad_norm": 0.7535529732704163, "learning_rate": 2.5051570844642665e-05, "loss": 0.111, "step": 16277 }, { "epoch": 0.2882758437487548, "grad_norm": 0.5763735175132751, "learning_rate": 2.5050932208229415e-05, "loss": 0.0824, "step": 16278 }, { "epoch": 0.2882935532857832, "grad_norm": 0.8547413945198059, "learning_rate": 2.505029353874961e-05, "loss": 0.0801, "step": 16279 }, { "epoch": 0.28831126282281166, "grad_norm": 1.0466423034667969, "learning_rate": 2.504965483620536e-05, "loss": 0.0572, "step": 16280 }, { "epoch": 0.28832897235984006, "grad_norm": 0.9779149293899536, "learning_rate": 2.504901610059876e-05, "loss": 0.0939, "step": 16281 }, { "epoch": 0.2883466818968685, "grad_norm": 0.8583453893661499, "learning_rate": 2.5048377331931913e-05, "loss": 0.1308, "step": 16282 }, { "epoch": 0.2883643914338969, "grad_norm": 0.47085660696029663, "learning_rate": 2.5047738530206922e-05, "loss": 0.0712, "step": 16283 }, { "epoch": 0.28838210097092537, "grad_norm": 0.6267703175544739, "learning_rate": 2.5047099695425888e-05, "loss": 0.0916, "step": 16284 }, { "epoch": 0.28839981050795377, "grad_norm": 0.7595564126968384, "learning_rate": 2.5046460827590914e-05, "loss": 0.1445, "step": 16285 }, { "epoch": 0.2884175200449822, "grad_norm": 0.800751268863678, "learning_rate": 2.5045821926704098e-05, "loss": 0.1068, "step": 16286 }, { "epoch": 0.2884352295820107, "grad_norm": 0.9698659777641296, "learning_rate": 2.5045182992767544e-05, "loss": 0.1121, "step": 16287 }, { "epoch": 0.2884529391190391, "grad_norm": 0.6248121857643127, "learning_rate": 2.5044544025783352e-05, "loss": 0.094, "step": 16288 }, { "epoch": 0.28847064865606753, "grad_norm": 0.7738572359085083, "learning_rate": 2.5043905025753626e-05, "loss": 0.079, "step": 16289 }, { "epoch": 0.28848835819309593, "grad_norm": 0.633220911026001, "learning_rate": 2.5043265992680475e-05, "loss": 0.0891, "step": 16290 }, { "epoch": 0.2885060677301244, "grad_norm": 1.0351625680923462, "learning_rate": 2.5042626926565988e-05, "loss": 0.1226, "step": 16291 }, { "epoch": 0.2885237772671528, "grad_norm": 1.0024229288101196, "learning_rate": 2.5041987827412276e-05, "loss": 0.0943, "step": 16292 }, { "epoch": 0.28854148680418124, "grad_norm": 2.152054786682129, "learning_rate": 2.5041348695221436e-05, "loss": 0.1103, "step": 16293 }, { "epoch": 0.28855919634120963, "grad_norm": 0.9526220560073853, "learning_rate": 2.504070952999558e-05, "loss": 0.0901, "step": 16294 }, { "epoch": 0.2885769058782381, "grad_norm": 0.9999052882194519, "learning_rate": 2.50400703317368e-05, "loss": 0.1278, "step": 16295 }, { "epoch": 0.2885946154152665, "grad_norm": 1.1603114604949951, "learning_rate": 2.5039431100447216e-05, "loss": 0.0837, "step": 16296 }, { "epoch": 0.28861232495229494, "grad_norm": 0.6869332194328308, "learning_rate": 2.5038791836128906e-05, "loss": 0.0837, "step": 16297 }, { "epoch": 0.28863003448932334, "grad_norm": 1.1037147045135498, "learning_rate": 2.5038152538783992e-05, "loss": 0.1137, "step": 16298 }, { "epoch": 0.2886477440263518, "grad_norm": 1.2675440311431885, "learning_rate": 2.5037513208414572e-05, "loss": 0.0828, "step": 16299 }, { "epoch": 0.2886654535633802, "grad_norm": 0.8247741460800171, "learning_rate": 2.5036873845022742e-05, "loss": 0.102, "step": 16300 }, { "epoch": 0.28868316310040865, "grad_norm": 1.5703076124191284, "learning_rate": 2.503623444861062e-05, "loss": 0.1235, "step": 16301 }, { "epoch": 0.2887008726374371, "grad_norm": 0.9842851161956787, "learning_rate": 2.50355950191803e-05, "loss": 0.1143, "step": 16302 }, { "epoch": 0.2887185821744655, "grad_norm": 0.5269930362701416, "learning_rate": 2.5034955556733886e-05, "loss": 0.0729, "step": 16303 }, { "epoch": 0.28873629171149395, "grad_norm": 0.9373272061347961, "learning_rate": 2.503431606127348e-05, "loss": 0.1043, "step": 16304 }, { "epoch": 0.28875400124852235, "grad_norm": 0.8112289905548096, "learning_rate": 2.5033676532801195e-05, "loss": 0.0796, "step": 16305 }, { "epoch": 0.2887717107855508, "grad_norm": 0.7954074740409851, "learning_rate": 2.503303697131913e-05, "loss": 0.0969, "step": 16306 }, { "epoch": 0.2887894203225792, "grad_norm": 1.0065363645553589, "learning_rate": 2.5032397376829383e-05, "loss": 0.1095, "step": 16307 }, { "epoch": 0.28880712985960766, "grad_norm": 0.9934216737747192, "learning_rate": 2.503175774933407e-05, "loss": 0.0912, "step": 16308 }, { "epoch": 0.28882483939663606, "grad_norm": 1.102125883102417, "learning_rate": 2.5031118088835278e-05, "loss": 0.1186, "step": 16309 }, { "epoch": 0.2888425489336645, "grad_norm": 1.1260989904403687, "learning_rate": 2.503047839533513e-05, "loss": 0.1275, "step": 16310 }, { "epoch": 0.2888602584706929, "grad_norm": 1.0251597166061401, "learning_rate": 2.502983866883572e-05, "loss": 0.1175, "step": 16311 }, { "epoch": 0.28887796800772136, "grad_norm": 0.6485092639923096, "learning_rate": 2.5029198909339155e-05, "loss": 0.0752, "step": 16312 }, { "epoch": 0.28889567754474976, "grad_norm": 0.7926607131958008, "learning_rate": 2.5028559116847542e-05, "loss": 0.094, "step": 16313 }, { "epoch": 0.2889133870817782, "grad_norm": 1.160845160484314, "learning_rate": 2.5027919291362983e-05, "loss": 0.1044, "step": 16314 }, { "epoch": 0.2889310966188066, "grad_norm": 0.7788311839103699, "learning_rate": 2.5027279432887585e-05, "loss": 0.0962, "step": 16315 }, { "epoch": 0.28894880615583507, "grad_norm": 0.9064233303070068, "learning_rate": 2.5026639541423448e-05, "loss": 0.1183, "step": 16316 }, { "epoch": 0.2889665156928635, "grad_norm": 1.151475429534912, "learning_rate": 2.5025999616972684e-05, "loss": 0.0991, "step": 16317 }, { "epoch": 0.2889842252298919, "grad_norm": 1.0841377973556519, "learning_rate": 2.5025359659537394e-05, "loss": 0.1201, "step": 16318 }, { "epoch": 0.2890019347669204, "grad_norm": 0.7505732774734497, "learning_rate": 2.5024719669119687e-05, "loss": 0.0953, "step": 16319 }, { "epoch": 0.2890196443039488, "grad_norm": 0.8188832402229309, "learning_rate": 2.5024079645721665e-05, "loss": 0.085, "step": 16320 }, { "epoch": 0.28903735384097723, "grad_norm": 0.6820926070213318, "learning_rate": 2.5023439589345435e-05, "loss": 0.1269, "step": 16321 }, { "epoch": 0.28905506337800563, "grad_norm": 0.7507215738296509, "learning_rate": 2.5022799499993097e-05, "loss": 0.0919, "step": 16322 }, { "epoch": 0.2890727729150341, "grad_norm": 0.6616494655609131, "learning_rate": 2.502215937766677e-05, "loss": 0.0795, "step": 16323 }, { "epoch": 0.2890904824520625, "grad_norm": 0.8686075210571289, "learning_rate": 2.502151922236855e-05, "loss": 0.0931, "step": 16324 }, { "epoch": 0.28910819198909093, "grad_norm": 0.8777660727500916, "learning_rate": 2.5020879034100546e-05, "loss": 0.1066, "step": 16325 }, { "epoch": 0.28912590152611933, "grad_norm": 0.485685259103775, "learning_rate": 2.5020238812864863e-05, "loss": 0.0526, "step": 16326 }, { "epoch": 0.2891436110631478, "grad_norm": 0.40595248341560364, "learning_rate": 2.5019598558663607e-05, "loss": 0.0835, "step": 16327 }, { "epoch": 0.2891613206001762, "grad_norm": 0.7359151840209961, "learning_rate": 2.5018958271498885e-05, "loss": 0.0641, "step": 16328 }, { "epoch": 0.28917903013720464, "grad_norm": 0.8901845812797546, "learning_rate": 2.5018317951372807e-05, "loss": 0.1408, "step": 16329 }, { "epoch": 0.28919673967423304, "grad_norm": 0.6176233291625977, "learning_rate": 2.5017677598287468e-05, "loss": 0.0787, "step": 16330 }, { "epoch": 0.2892144492112615, "grad_norm": 0.7668746709823608, "learning_rate": 2.5017037212244992e-05, "loss": 0.053, "step": 16331 }, { "epoch": 0.28923215874828995, "grad_norm": 0.8867707848548889, "learning_rate": 2.5016396793247472e-05, "loss": 0.12, "step": 16332 }, { "epoch": 0.28924986828531835, "grad_norm": 0.8418675661087036, "learning_rate": 2.5015756341297023e-05, "loss": 0.0946, "step": 16333 }, { "epoch": 0.2892675778223468, "grad_norm": 0.9560034871101379, "learning_rate": 2.5015115856395743e-05, "loss": 0.1002, "step": 16334 }, { "epoch": 0.2892852873593752, "grad_norm": 0.4571652412414551, "learning_rate": 2.5014475338545748e-05, "loss": 0.0605, "step": 16335 }, { "epoch": 0.28930299689640365, "grad_norm": 1.2601567506790161, "learning_rate": 2.5013834787749142e-05, "loss": 0.1096, "step": 16336 }, { "epoch": 0.28932070643343205, "grad_norm": 0.5503294467926025, "learning_rate": 2.501319420400803e-05, "loss": 0.1277, "step": 16337 }, { "epoch": 0.2893384159704605, "grad_norm": 0.959486186504364, "learning_rate": 2.501255358732453e-05, "loss": 0.084, "step": 16338 }, { "epoch": 0.2893561255074889, "grad_norm": 0.4654480516910553, "learning_rate": 2.5011912937700733e-05, "loss": 0.0956, "step": 16339 }, { "epoch": 0.28937383504451736, "grad_norm": 0.6852009892463684, "learning_rate": 2.501127225513876e-05, "loss": 0.1131, "step": 16340 }, { "epoch": 0.28939154458154576, "grad_norm": 0.9015947580337524, "learning_rate": 2.5010631539640715e-05, "loss": 0.1064, "step": 16341 }, { "epoch": 0.2894092541185742, "grad_norm": 1.2545677423477173, "learning_rate": 2.50099907912087e-05, "loss": 0.1371, "step": 16342 }, { "epoch": 0.2894269636556026, "grad_norm": 0.8785799741744995, "learning_rate": 2.5009350009844828e-05, "loss": 0.1069, "step": 16343 }, { "epoch": 0.28944467319263106, "grad_norm": 0.6885555386543274, "learning_rate": 2.5008709195551208e-05, "loss": 0.1209, "step": 16344 }, { "epoch": 0.2894623827296595, "grad_norm": 0.7374235987663269, "learning_rate": 2.5008068348329948e-05, "loss": 0.1321, "step": 16345 }, { "epoch": 0.2894800922666879, "grad_norm": 0.6290664672851562, "learning_rate": 2.5007427468183155e-05, "loss": 0.0824, "step": 16346 }, { "epoch": 0.28949780180371637, "grad_norm": 1.2173913717269897, "learning_rate": 2.500678655511294e-05, "loss": 0.0949, "step": 16347 }, { "epoch": 0.28951551134074477, "grad_norm": 0.9255346059799194, "learning_rate": 2.5006145609121403e-05, "loss": 0.0919, "step": 16348 }, { "epoch": 0.2895332208777732, "grad_norm": 0.7443104982376099, "learning_rate": 2.5005504630210664e-05, "loss": 0.1079, "step": 16349 }, { "epoch": 0.2895509304148016, "grad_norm": 0.8529735207557678, "learning_rate": 2.5004863618382824e-05, "loss": 0.1201, "step": 16350 }, { "epoch": 0.2895686399518301, "grad_norm": 0.8757923245429993, "learning_rate": 2.5004222573639996e-05, "loss": 0.1176, "step": 16351 }, { "epoch": 0.2895863494888585, "grad_norm": 0.5091346502304077, "learning_rate": 2.500358149598429e-05, "loss": 0.0714, "step": 16352 }, { "epoch": 0.28960405902588693, "grad_norm": 1.0186487436294556, "learning_rate": 2.500294038541781e-05, "loss": 0.0713, "step": 16353 }, { "epoch": 0.2896217685629153, "grad_norm": 0.7115959525108337, "learning_rate": 2.500229924194267e-05, "loss": 0.1271, "step": 16354 }, { "epoch": 0.2896394780999438, "grad_norm": 0.6860688328742981, "learning_rate": 2.5001658065560973e-05, "loss": 0.0661, "step": 16355 }, { "epoch": 0.2896571876369722, "grad_norm": 0.9470465779304504, "learning_rate": 2.5001016856274833e-05, "loss": 0.0788, "step": 16356 }, { "epoch": 0.28967489717400063, "grad_norm": 0.3786492943763733, "learning_rate": 2.5000375614086365e-05, "loss": 0.084, "step": 16357 }, { "epoch": 0.28969260671102903, "grad_norm": 0.6767638921737671, "learning_rate": 2.499973433899767e-05, "loss": 0.0635, "step": 16358 }, { "epoch": 0.2897103162480575, "grad_norm": 0.8779987096786499, "learning_rate": 2.4999093031010855e-05, "loss": 0.0648, "step": 16359 }, { "epoch": 0.28972802578508594, "grad_norm": 0.8689431548118591, "learning_rate": 2.4998451690128043e-05, "loss": 0.1258, "step": 16360 }, { "epoch": 0.28974573532211434, "grad_norm": 0.6313885450363159, "learning_rate": 2.4997810316351333e-05, "loss": 0.0975, "step": 16361 }, { "epoch": 0.2897634448591428, "grad_norm": 0.834129810333252, "learning_rate": 2.499716890968284e-05, "loss": 0.1234, "step": 16362 }, { "epoch": 0.2897811543961712, "grad_norm": 0.8470026850700378, "learning_rate": 2.499652747012467e-05, "loss": 0.1153, "step": 16363 }, { "epoch": 0.28979886393319965, "grad_norm": 0.8033882975578308, "learning_rate": 2.4995885997678934e-05, "loss": 0.1146, "step": 16364 }, { "epoch": 0.28981657347022804, "grad_norm": 0.6180228590965271, "learning_rate": 2.4995244492347748e-05, "loss": 0.1135, "step": 16365 }, { "epoch": 0.2898342830072565, "grad_norm": 0.4909825325012207, "learning_rate": 2.4994602954133214e-05, "loss": 0.0947, "step": 16366 }, { "epoch": 0.2898519925442849, "grad_norm": 0.5360368490219116, "learning_rate": 2.499396138303745e-05, "loss": 0.0826, "step": 16367 }, { "epoch": 0.28986970208131335, "grad_norm": 0.636728048324585, "learning_rate": 2.499331977906257e-05, "loss": 0.0894, "step": 16368 }, { "epoch": 0.28988741161834175, "grad_norm": 0.8314254283905029, "learning_rate": 2.4992678142210667e-05, "loss": 0.0751, "step": 16369 }, { "epoch": 0.2899051211553702, "grad_norm": 0.8298219442367554, "learning_rate": 2.499203647248387e-05, "loss": 0.1345, "step": 16370 }, { "epoch": 0.2899228306923986, "grad_norm": 0.7208030819892883, "learning_rate": 2.499139476988428e-05, "loss": 0.066, "step": 16371 }, { "epoch": 0.28994054022942706, "grad_norm": 0.8291845321655273, "learning_rate": 2.4990753034414016e-05, "loss": 0.1129, "step": 16372 }, { "epoch": 0.28995824976645546, "grad_norm": 0.6755053997039795, "learning_rate": 2.4990111266075187e-05, "loss": 0.08, "step": 16373 }, { "epoch": 0.2899759593034839, "grad_norm": 0.8666710257530212, "learning_rate": 2.49894694648699e-05, "loss": 0.0959, "step": 16374 }, { "epoch": 0.28999366884051236, "grad_norm": 0.754547655582428, "learning_rate": 2.4988827630800267e-05, "loss": 0.102, "step": 16375 }, { "epoch": 0.29001137837754076, "grad_norm": 0.8347018957138062, "learning_rate": 2.49881857638684e-05, "loss": 0.0943, "step": 16376 }, { "epoch": 0.2900290879145692, "grad_norm": 0.7219460010528564, "learning_rate": 2.4987543864076418e-05, "loss": 0.1107, "step": 16377 }, { "epoch": 0.2900467974515976, "grad_norm": 0.5525215864181519, "learning_rate": 2.498690193142642e-05, "loss": 0.0905, "step": 16378 }, { "epoch": 0.29006450698862607, "grad_norm": 0.7000701427459717, "learning_rate": 2.498625996592053e-05, "loss": 0.0742, "step": 16379 }, { "epoch": 0.29008221652565447, "grad_norm": 0.9733440279960632, "learning_rate": 2.498561796756085e-05, "loss": 0.1078, "step": 16380 }, { "epoch": 0.2900999260626829, "grad_norm": 0.539318859577179, "learning_rate": 2.4984975936349502e-05, "loss": 0.0998, "step": 16381 }, { "epoch": 0.2901176355997113, "grad_norm": 0.6501055359840393, "learning_rate": 2.498433387228859e-05, "loss": 0.0879, "step": 16382 }, { "epoch": 0.2901353451367398, "grad_norm": 0.6430091857910156, "learning_rate": 2.4983691775380233e-05, "loss": 0.0787, "step": 16383 }, { "epoch": 0.2901530546737682, "grad_norm": 0.4733285903930664, "learning_rate": 2.4983049645626536e-05, "loss": 0.0766, "step": 16384 }, { "epoch": 0.29017076421079663, "grad_norm": 1.07100510597229, "learning_rate": 2.4982407483029614e-05, "loss": 0.0863, "step": 16385 }, { "epoch": 0.290188473747825, "grad_norm": 0.6150283217430115, "learning_rate": 2.4981765287591585e-05, "loss": 0.0916, "step": 16386 }, { "epoch": 0.2902061832848535, "grad_norm": 1.1300162076950073, "learning_rate": 2.4981123059314556e-05, "loss": 0.1084, "step": 16387 }, { "epoch": 0.2902238928218819, "grad_norm": 0.6239416599273682, "learning_rate": 2.4980480798200642e-05, "loss": 0.1219, "step": 16388 }, { "epoch": 0.29024160235891033, "grad_norm": 0.8349998593330383, "learning_rate": 2.4979838504251956e-05, "loss": 0.0805, "step": 16389 }, { "epoch": 0.2902593118959388, "grad_norm": 0.6820632219314575, "learning_rate": 2.497919617747061e-05, "loss": 0.0946, "step": 16390 }, { "epoch": 0.2902770214329672, "grad_norm": 0.3743182420730591, "learning_rate": 2.497855381785872e-05, "loss": 0.0535, "step": 16391 }, { "epoch": 0.29029473096999564, "grad_norm": 0.8422816395759583, "learning_rate": 2.4977911425418395e-05, "loss": 0.1547, "step": 16392 }, { "epoch": 0.29031244050702404, "grad_norm": 0.9024929404258728, "learning_rate": 2.4977269000151752e-05, "loss": 0.0835, "step": 16393 }, { "epoch": 0.2903301500440525, "grad_norm": 0.9587088823318481, "learning_rate": 2.49766265420609e-05, "loss": 0.0873, "step": 16394 }, { "epoch": 0.2903478595810809, "grad_norm": 0.7988584041595459, "learning_rate": 2.497598405114796e-05, "loss": 0.098, "step": 16395 }, { "epoch": 0.29036556911810935, "grad_norm": 0.8744660019874573, "learning_rate": 2.4975341527415036e-05, "loss": 0.084, "step": 16396 }, { "epoch": 0.29038327865513774, "grad_norm": 0.5292447805404663, "learning_rate": 2.4974698970864252e-05, "loss": 0.0699, "step": 16397 }, { "epoch": 0.2904009881921662, "grad_norm": 0.6700170040130615, "learning_rate": 2.497405638149772e-05, "loss": 0.0751, "step": 16398 }, { "epoch": 0.2904186977291946, "grad_norm": 0.3926297724246979, "learning_rate": 2.4973413759317542e-05, "loss": 0.0568, "step": 16399 }, { "epoch": 0.29043640726622305, "grad_norm": 0.6711686253547668, "learning_rate": 2.497277110432585e-05, "loss": 0.1006, "step": 16400 }, { "epoch": 0.29045411680325145, "grad_norm": 1.1059260368347168, "learning_rate": 2.497212841652475e-05, "loss": 0.1182, "step": 16401 }, { "epoch": 0.2904718263402799, "grad_norm": 0.8865326046943665, "learning_rate": 2.4971485695916346e-05, "loss": 0.1061, "step": 16402 }, { "epoch": 0.2904895358773083, "grad_norm": 0.8169355988502502, "learning_rate": 2.4970842942502772e-05, "loss": 0.0562, "step": 16403 }, { "epoch": 0.29050724541433676, "grad_norm": 0.8372573256492615, "learning_rate": 2.497020015628613e-05, "loss": 0.1066, "step": 16404 }, { "epoch": 0.2905249549513652, "grad_norm": 0.8195870518684387, "learning_rate": 2.4969557337268538e-05, "loss": 0.1031, "step": 16405 }, { "epoch": 0.2905426644883936, "grad_norm": 0.5773195028305054, "learning_rate": 2.496891448545211e-05, "loss": 0.0862, "step": 16406 }, { "epoch": 0.29056037402542206, "grad_norm": 0.7052227258682251, "learning_rate": 2.4968271600838966e-05, "loss": 0.1088, "step": 16407 }, { "epoch": 0.29057808356245046, "grad_norm": 0.6128412485122681, "learning_rate": 2.496762868343121e-05, "loss": 0.0829, "step": 16408 }, { "epoch": 0.2905957930994789, "grad_norm": 0.6946632862091064, "learning_rate": 2.496698573323097e-05, "loss": 0.0519, "step": 16409 }, { "epoch": 0.2906135026365073, "grad_norm": 0.5914168953895569, "learning_rate": 2.496634275024035e-05, "loss": 0.0582, "step": 16410 }, { "epoch": 0.29063121217353577, "grad_norm": 0.7488070130348206, "learning_rate": 2.4965699734461477e-05, "loss": 0.0768, "step": 16411 }, { "epoch": 0.29064892171056417, "grad_norm": 0.5337693691253662, "learning_rate": 2.496505668589645e-05, "loss": 0.066, "step": 16412 }, { "epoch": 0.2906666312475926, "grad_norm": 1.0526500940322876, "learning_rate": 2.4964413604547404e-05, "loss": 0.0911, "step": 16413 }, { "epoch": 0.290684340784621, "grad_norm": 0.6742384433746338, "learning_rate": 2.4963770490416438e-05, "loss": 0.1049, "step": 16414 }, { "epoch": 0.2907020503216495, "grad_norm": 0.8769058585166931, "learning_rate": 2.4963127343505683e-05, "loss": 0.0962, "step": 16415 }, { "epoch": 0.2907197598586779, "grad_norm": 1.2423580884933472, "learning_rate": 2.496248416381724e-05, "loss": 0.1438, "step": 16416 }, { "epoch": 0.2907374693957063, "grad_norm": 0.47544652223587036, "learning_rate": 2.496184095135323e-05, "loss": 0.0992, "step": 16417 }, { "epoch": 0.2907551789327347, "grad_norm": 0.6216408014297485, "learning_rate": 2.4961197706115775e-05, "loss": 0.1031, "step": 16418 }, { "epoch": 0.2907728884697632, "grad_norm": 1.1385443210601807, "learning_rate": 2.4960554428106986e-05, "loss": 0.1438, "step": 16419 }, { "epoch": 0.29079059800679163, "grad_norm": 0.5612266063690186, "learning_rate": 2.495991111732898e-05, "loss": 0.1114, "step": 16420 }, { "epoch": 0.29080830754382003, "grad_norm": 0.7273972630500793, "learning_rate": 2.4959267773783876e-05, "loss": 0.0683, "step": 16421 }, { "epoch": 0.2908260170808485, "grad_norm": 0.7549703121185303, "learning_rate": 2.495862439747379e-05, "loss": 0.1067, "step": 16422 }, { "epoch": 0.2908437266178769, "grad_norm": 0.5043936967849731, "learning_rate": 2.495798098840083e-05, "loss": 0.0981, "step": 16423 }, { "epoch": 0.29086143615490534, "grad_norm": 1.2456918954849243, "learning_rate": 2.495733754656713e-05, "loss": 0.0785, "step": 16424 }, { "epoch": 0.29087914569193374, "grad_norm": 0.8759942650794983, "learning_rate": 2.4956694071974783e-05, "loss": 0.081, "step": 16425 }, { "epoch": 0.2908968552289622, "grad_norm": 0.899553120136261, "learning_rate": 2.4956050564625928e-05, "loss": 0.1145, "step": 16426 }, { "epoch": 0.2909145647659906, "grad_norm": 0.6618881821632385, "learning_rate": 2.4955407024522675e-05, "loss": 0.0871, "step": 16427 }, { "epoch": 0.29093227430301905, "grad_norm": 0.42111796140670776, "learning_rate": 2.4954763451667135e-05, "loss": 0.0816, "step": 16428 }, { "epoch": 0.29094998384004744, "grad_norm": 0.5936740040779114, "learning_rate": 2.4954119846061432e-05, "loss": 0.0983, "step": 16429 }, { "epoch": 0.2909676933770759, "grad_norm": 1.873303771018982, "learning_rate": 2.4953476207707683e-05, "loss": 0.0766, "step": 16430 }, { "epoch": 0.2909854029141043, "grad_norm": 0.8280701637268066, "learning_rate": 2.4952832536608003e-05, "loss": 0.092, "step": 16431 }, { "epoch": 0.29100311245113275, "grad_norm": 0.6157441139221191, "learning_rate": 2.495218883276451e-05, "loss": 0.1149, "step": 16432 }, { "epoch": 0.29102082198816115, "grad_norm": 1.1906262636184692, "learning_rate": 2.495154509617932e-05, "loss": 0.1218, "step": 16433 }, { "epoch": 0.2910385315251896, "grad_norm": 0.6844795942306519, "learning_rate": 2.495090132685456e-05, "loss": 0.0559, "step": 16434 }, { "epoch": 0.29105624106221806, "grad_norm": 0.5392673015594482, "learning_rate": 2.4950257524792334e-05, "loss": 0.1006, "step": 16435 }, { "epoch": 0.29107395059924646, "grad_norm": 0.6684706807136536, "learning_rate": 2.494961368999477e-05, "loss": 0.1182, "step": 16436 }, { "epoch": 0.2910916601362749, "grad_norm": 0.8556526899337769, "learning_rate": 2.4948969822463984e-05, "loss": 0.1126, "step": 16437 }, { "epoch": 0.2911093696733033, "grad_norm": 0.4776639938354492, "learning_rate": 2.4948325922202095e-05, "loss": 0.087, "step": 16438 }, { "epoch": 0.29112707921033176, "grad_norm": 0.8143771886825562, "learning_rate": 2.4947681989211215e-05, "loss": 0.071, "step": 16439 }, { "epoch": 0.29114478874736016, "grad_norm": 0.5572056770324707, "learning_rate": 2.4947038023493473e-05, "loss": 0.0842, "step": 16440 }, { "epoch": 0.2911624982843886, "grad_norm": 0.8918533325195312, "learning_rate": 2.4946394025050978e-05, "loss": 0.0829, "step": 16441 }, { "epoch": 0.291180207821417, "grad_norm": 0.45738130807876587, "learning_rate": 2.4945749993885853e-05, "loss": 0.0844, "step": 16442 }, { "epoch": 0.29119791735844547, "grad_norm": 0.9368532299995422, "learning_rate": 2.494510593000022e-05, "loss": 0.1037, "step": 16443 }, { "epoch": 0.29121562689547387, "grad_norm": 0.7153545022010803, "learning_rate": 2.494446183339619e-05, "loss": 0.091, "step": 16444 }, { "epoch": 0.2912333364325023, "grad_norm": 0.8620760440826416, "learning_rate": 2.494381770407589e-05, "loss": 0.1006, "step": 16445 }, { "epoch": 0.2912510459695307, "grad_norm": 0.8283004760742188, "learning_rate": 2.4943173542041433e-05, "loss": 0.1145, "step": 16446 }, { "epoch": 0.2912687555065592, "grad_norm": 0.8700131177902222, "learning_rate": 2.4942529347294943e-05, "loss": 0.1003, "step": 16447 }, { "epoch": 0.2912864650435876, "grad_norm": 1.2118581533432007, "learning_rate": 2.4941885119838535e-05, "loss": 0.1134, "step": 16448 }, { "epoch": 0.291304174580616, "grad_norm": 0.5093675255775452, "learning_rate": 2.494124085967433e-05, "loss": 0.088, "step": 16449 }, { "epoch": 0.2913218841176445, "grad_norm": 1.121659755706787, "learning_rate": 2.4940596566804452e-05, "loss": 0.118, "step": 16450 }, { "epoch": 0.2913395936546729, "grad_norm": 1.1388951539993286, "learning_rate": 2.4939952241231013e-05, "loss": 0.0742, "step": 16451 }, { "epoch": 0.29135730319170133, "grad_norm": 1.0935938358306885, "learning_rate": 2.4939307882956136e-05, "loss": 0.1113, "step": 16452 }, { "epoch": 0.29137501272872973, "grad_norm": 0.8455023169517517, "learning_rate": 2.493866349198194e-05, "loss": 0.0982, "step": 16453 }, { "epoch": 0.2913927222657582, "grad_norm": 0.726844310760498, "learning_rate": 2.493801906831055e-05, "loss": 0.1039, "step": 16454 }, { "epoch": 0.2914104318027866, "grad_norm": 0.888715386390686, "learning_rate": 2.4937374611944084e-05, "loss": 0.1083, "step": 16455 }, { "epoch": 0.29142814133981504, "grad_norm": 0.9258120656013489, "learning_rate": 2.4936730122884657e-05, "loss": 0.0835, "step": 16456 }, { "epoch": 0.29144585087684344, "grad_norm": 0.53844153881073, "learning_rate": 2.4936085601134396e-05, "loss": 0.0716, "step": 16457 }, { "epoch": 0.2914635604138719, "grad_norm": 0.7997942566871643, "learning_rate": 2.4935441046695416e-05, "loss": 0.0805, "step": 16458 }, { "epoch": 0.2914812699509003, "grad_norm": 0.6262016296386719, "learning_rate": 2.493479645956984e-05, "loss": 0.0918, "step": 16459 }, { "epoch": 0.29149897948792874, "grad_norm": 0.7703151702880859, "learning_rate": 2.493415183975979e-05, "loss": 0.1066, "step": 16460 }, { "epoch": 0.29151668902495714, "grad_norm": 0.9169155359268188, "learning_rate": 2.4933507187267384e-05, "loss": 0.0839, "step": 16461 }, { "epoch": 0.2915343985619856, "grad_norm": 0.7967784404754639, "learning_rate": 2.493286250209474e-05, "loss": 0.0757, "step": 16462 }, { "epoch": 0.291552108099014, "grad_norm": 1.3056511878967285, "learning_rate": 2.4932217784243992e-05, "loss": 0.1152, "step": 16463 }, { "epoch": 0.29156981763604245, "grad_norm": 0.5598263144493103, "learning_rate": 2.4931573033717247e-05, "loss": 0.1013, "step": 16464 }, { "epoch": 0.2915875271730709, "grad_norm": 0.7738112807273865, "learning_rate": 2.493092825051663e-05, "loss": 0.105, "step": 16465 }, { "epoch": 0.2916052367100993, "grad_norm": 0.9145441055297852, "learning_rate": 2.4930283434644265e-05, "loss": 0.0944, "step": 16466 }, { "epoch": 0.29162294624712776, "grad_norm": 0.7371585369110107, "learning_rate": 2.492963858610227e-05, "loss": 0.138, "step": 16467 }, { "epoch": 0.29164065578415616, "grad_norm": 1.7474859952926636, "learning_rate": 2.492899370489277e-05, "loss": 0.0897, "step": 16468 }, { "epoch": 0.2916583653211846, "grad_norm": 0.8259074091911316, "learning_rate": 2.4928348791017887e-05, "loss": 0.1282, "step": 16469 }, { "epoch": 0.291676074858213, "grad_norm": 0.6230974793434143, "learning_rate": 2.492770384447974e-05, "loss": 0.0802, "step": 16470 }, { "epoch": 0.29169378439524146, "grad_norm": 0.344585120677948, "learning_rate": 2.4927058865280448e-05, "loss": 0.0708, "step": 16471 }, { "epoch": 0.29171149393226986, "grad_norm": 0.7836756110191345, "learning_rate": 2.492641385342214e-05, "loss": 0.0863, "step": 16472 }, { "epoch": 0.2917292034692983, "grad_norm": 1.0499590635299683, "learning_rate": 2.4925768808906934e-05, "loss": 0.0545, "step": 16473 }, { "epoch": 0.2917469130063267, "grad_norm": 1.0350151062011719, "learning_rate": 2.492512373173695e-05, "loss": 0.0826, "step": 16474 }, { "epoch": 0.29176462254335517, "grad_norm": 1.336861252784729, "learning_rate": 2.4924478621914316e-05, "loss": 0.0758, "step": 16475 }, { "epoch": 0.29178233208038357, "grad_norm": 0.841829776763916, "learning_rate": 2.4923833479441147e-05, "loss": 0.1011, "step": 16476 }, { "epoch": 0.291800041617412, "grad_norm": 0.5592455863952637, "learning_rate": 2.4923188304319574e-05, "loss": 0.1026, "step": 16477 }, { "epoch": 0.2918177511544404, "grad_norm": 0.8704729080200195, "learning_rate": 2.4922543096551712e-05, "loss": 0.1024, "step": 16478 }, { "epoch": 0.2918354606914689, "grad_norm": 0.48459646105766296, "learning_rate": 2.492189785613969e-05, "loss": 0.0888, "step": 16479 }, { "epoch": 0.2918531702284973, "grad_norm": 1.0533814430236816, "learning_rate": 2.4921252583085626e-05, "loss": 0.0722, "step": 16480 }, { "epoch": 0.2918708797655257, "grad_norm": 0.8007128238677979, "learning_rate": 2.492060727739164e-05, "loss": 0.1178, "step": 16481 }, { "epoch": 0.2918885893025542, "grad_norm": 0.7472453117370605, "learning_rate": 2.4919961939059867e-05, "loss": 0.074, "step": 16482 }, { "epoch": 0.2919062988395826, "grad_norm": 1.1432232856750488, "learning_rate": 2.4919316568092422e-05, "loss": 0.1123, "step": 16483 }, { "epoch": 0.29192400837661103, "grad_norm": 0.9281274080276489, "learning_rate": 2.4918671164491423e-05, "loss": 0.1148, "step": 16484 }, { "epoch": 0.29194171791363943, "grad_norm": 0.8672433495521545, "learning_rate": 2.4918025728259005e-05, "loss": 0.1263, "step": 16485 }, { "epoch": 0.2919594274506679, "grad_norm": 0.8504838943481445, "learning_rate": 2.4917380259397282e-05, "loss": 0.0778, "step": 16486 }, { "epoch": 0.2919771369876963, "grad_norm": 0.9121101498603821, "learning_rate": 2.491673475790838e-05, "loss": 0.082, "step": 16487 }, { "epoch": 0.29199484652472474, "grad_norm": 0.5668163299560547, "learning_rate": 2.4916089223794426e-05, "loss": 0.1222, "step": 16488 }, { "epoch": 0.29201255606175314, "grad_norm": 0.928808867931366, "learning_rate": 2.4915443657057538e-05, "loss": 0.0771, "step": 16489 }, { "epoch": 0.2920302655987816, "grad_norm": 1.1073089838027954, "learning_rate": 2.4914798057699846e-05, "loss": 0.1262, "step": 16490 }, { "epoch": 0.29204797513581, "grad_norm": 0.7889392971992493, "learning_rate": 2.4914152425723474e-05, "loss": 0.0994, "step": 16491 }, { "epoch": 0.29206568467283844, "grad_norm": 0.979742705821991, "learning_rate": 2.491350676113054e-05, "loss": 0.0923, "step": 16492 }, { "epoch": 0.29208339420986684, "grad_norm": 0.6604041457176208, "learning_rate": 2.491286106392317e-05, "loss": 0.0616, "step": 16493 }, { "epoch": 0.2921011037468953, "grad_norm": 0.7989192008972168, "learning_rate": 2.4912215334103492e-05, "loss": 0.0949, "step": 16494 }, { "epoch": 0.29211881328392375, "grad_norm": 1.0525550842285156, "learning_rate": 2.4911569571673628e-05, "loss": 0.1178, "step": 16495 }, { "epoch": 0.29213652282095215, "grad_norm": 1.1090891361236572, "learning_rate": 2.4910923776635705e-05, "loss": 0.1307, "step": 16496 }, { "epoch": 0.2921542323579806, "grad_norm": 0.9651985764503479, "learning_rate": 2.4910277948991836e-05, "loss": 0.0939, "step": 16497 }, { "epoch": 0.292171941895009, "grad_norm": 0.6763907670974731, "learning_rate": 2.4909632088744167e-05, "loss": 0.0832, "step": 16498 }, { "epoch": 0.29218965143203746, "grad_norm": 0.8108667731285095, "learning_rate": 2.4908986195894804e-05, "loss": 0.0917, "step": 16499 }, { "epoch": 0.29220736096906585, "grad_norm": 0.8801407814025879, "learning_rate": 2.4908340270445883e-05, "loss": 0.0589, "step": 16500 }, { "epoch": 0.2922250705060943, "grad_norm": 0.9415575861930847, "learning_rate": 2.4907694312399516e-05, "loss": 0.0968, "step": 16501 }, { "epoch": 0.2922427800431227, "grad_norm": 0.6189916133880615, "learning_rate": 2.4907048321757844e-05, "loss": 0.073, "step": 16502 }, { "epoch": 0.29226048958015116, "grad_norm": 1.048504114151001, "learning_rate": 2.4906402298522988e-05, "loss": 0.1188, "step": 16503 }, { "epoch": 0.29227819911717956, "grad_norm": 0.7098197340965271, "learning_rate": 2.4905756242697058e-05, "loss": 0.0828, "step": 16504 }, { "epoch": 0.292295908654208, "grad_norm": 0.8187053203582764, "learning_rate": 2.4905110154282203e-05, "loss": 0.106, "step": 16505 }, { "epoch": 0.2923136181912364, "grad_norm": 1.584957242012024, "learning_rate": 2.4904464033280538e-05, "loss": 0.0883, "step": 16506 }, { "epoch": 0.29233132772826487, "grad_norm": 0.5660377740859985, "learning_rate": 2.490381787969418e-05, "loss": 0.0909, "step": 16507 }, { "epoch": 0.29234903726529327, "grad_norm": 0.6866327524185181, "learning_rate": 2.4903171693525268e-05, "loss": 0.0624, "step": 16508 }, { "epoch": 0.2923667468023217, "grad_norm": 1.0448225736618042, "learning_rate": 2.4902525474775922e-05, "loss": 0.1202, "step": 16509 }, { "epoch": 0.2923844563393502, "grad_norm": 0.8311439156532288, "learning_rate": 2.490187922344827e-05, "loss": 0.0979, "step": 16510 }, { "epoch": 0.2924021658763786, "grad_norm": 0.8332817554473877, "learning_rate": 2.4901232939544435e-05, "loss": 0.093, "step": 16511 }, { "epoch": 0.292419875413407, "grad_norm": 0.8300650119781494, "learning_rate": 2.4900586623066545e-05, "loss": 0.096, "step": 16512 }, { "epoch": 0.2924375849504354, "grad_norm": 0.856410801410675, "learning_rate": 2.489994027401673e-05, "loss": 0.0778, "step": 16513 }, { "epoch": 0.2924552944874639, "grad_norm": 1.1072880029678345, "learning_rate": 2.489929389239711e-05, "loss": 0.0743, "step": 16514 }, { "epoch": 0.2924730040244923, "grad_norm": 0.8847995400428772, "learning_rate": 2.4898647478209812e-05, "loss": 0.0927, "step": 16515 }, { "epoch": 0.29249071356152073, "grad_norm": 1.0919135808944702, "learning_rate": 2.4898001031456968e-05, "loss": 0.1022, "step": 16516 }, { "epoch": 0.29250842309854913, "grad_norm": 0.8751809597015381, "learning_rate": 2.4897354552140697e-05, "loss": 0.1227, "step": 16517 }, { "epoch": 0.2925261326355776, "grad_norm": 0.5713136196136475, "learning_rate": 2.4896708040263135e-05, "loss": 0.0977, "step": 16518 }, { "epoch": 0.292543842172606, "grad_norm": 0.9695112109184265, "learning_rate": 2.4896061495826406e-05, "loss": 0.1393, "step": 16519 }, { "epoch": 0.29256155170963444, "grad_norm": 0.7553092241287231, "learning_rate": 2.4895414918832634e-05, "loss": 0.0824, "step": 16520 }, { "epoch": 0.29257926124666284, "grad_norm": 0.9350568652153015, "learning_rate": 2.4894768309283947e-05, "loss": 0.0911, "step": 16521 }, { "epoch": 0.2925969707836913, "grad_norm": 0.6245827078819275, "learning_rate": 2.4894121667182472e-05, "loss": 0.0867, "step": 16522 }, { "epoch": 0.2926146803207197, "grad_norm": 0.8127739429473877, "learning_rate": 2.4893474992530338e-05, "loss": 0.0879, "step": 16523 }, { "epoch": 0.29263238985774814, "grad_norm": 0.5907748937606812, "learning_rate": 2.4892828285329675e-05, "loss": 0.1067, "step": 16524 }, { "epoch": 0.2926500993947766, "grad_norm": 1.144239068031311, "learning_rate": 2.4892181545582605e-05, "loss": 0.1083, "step": 16525 }, { "epoch": 0.292667808931805, "grad_norm": 0.5003487467765808, "learning_rate": 2.489153477329126e-05, "loss": 0.077, "step": 16526 }, { "epoch": 0.29268551846883345, "grad_norm": 0.6362851858139038, "learning_rate": 2.4890887968457763e-05, "loss": 0.0917, "step": 16527 }, { "epoch": 0.29270322800586185, "grad_norm": 0.6546418070793152, "learning_rate": 2.489024113108425e-05, "loss": 0.089, "step": 16528 }, { "epoch": 0.2927209375428903, "grad_norm": 0.7308571338653564, "learning_rate": 2.4889594261172838e-05, "loss": 0.0949, "step": 16529 }, { "epoch": 0.2927386470799187, "grad_norm": 0.8308498859405518, "learning_rate": 2.4888947358725663e-05, "loss": 0.0703, "step": 16530 }, { "epoch": 0.29275635661694716, "grad_norm": 0.8319394588470459, "learning_rate": 2.488830042374485e-05, "loss": 0.1172, "step": 16531 }, { "epoch": 0.29277406615397555, "grad_norm": 0.36255231499671936, "learning_rate": 2.4887653456232535e-05, "loss": 0.0882, "step": 16532 }, { "epoch": 0.292791775691004, "grad_norm": 0.980587363243103, "learning_rate": 2.4887006456190834e-05, "loss": 0.0965, "step": 16533 }, { "epoch": 0.2928094852280324, "grad_norm": 0.8831157088279724, "learning_rate": 2.4886359423621886e-05, "loss": 0.1283, "step": 16534 }, { "epoch": 0.29282719476506086, "grad_norm": 0.9572278261184692, "learning_rate": 2.4885712358527814e-05, "loss": 0.0786, "step": 16535 }, { "epoch": 0.29284490430208926, "grad_norm": 0.8597052693367004, "learning_rate": 2.4885065260910747e-05, "loss": 0.1067, "step": 16536 }, { "epoch": 0.2928626138391177, "grad_norm": 0.9855288863182068, "learning_rate": 2.4884418130772818e-05, "loss": 0.1013, "step": 16537 }, { "epoch": 0.2928803233761461, "grad_norm": 0.910545289516449, "learning_rate": 2.4883770968116146e-05, "loss": 0.0816, "step": 16538 }, { "epoch": 0.29289803291317457, "grad_norm": 0.7779289484024048, "learning_rate": 2.4883123772942872e-05, "loss": 0.1075, "step": 16539 }, { "epoch": 0.292915742450203, "grad_norm": 0.6945046782493591, "learning_rate": 2.488247654525512e-05, "loss": 0.0824, "step": 16540 }, { "epoch": 0.2929334519872314, "grad_norm": 1.1521373987197876, "learning_rate": 2.4881829285055026e-05, "loss": 0.077, "step": 16541 }, { "epoch": 0.2929511615242599, "grad_norm": 0.7755136489868164, "learning_rate": 2.4881181992344705e-05, "loss": 0.0913, "step": 16542 }, { "epoch": 0.29296887106128827, "grad_norm": 0.7354781031608582, "learning_rate": 2.4880534667126297e-05, "loss": 0.1205, "step": 16543 }, { "epoch": 0.2929865805983167, "grad_norm": 1.0219602584838867, "learning_rate": 2.487988730940193e-05, "loss": 0.0974, "step": 16544 }, { "epoch": 0.2930042901353451, "grad_norm": 0.6278086304664612, "learning_rate": 2.487923991917373e-05, "loss": 0.1271, "step": 16545 }, { "epoch": 0.2930219996723736, "grad_norm": 1.0130637884140015, "learning_rate": 2.4878592496443833e-05, "loss": 0.0908, "step": 16546 }, { "epoch": 0.293039709209402, "grad_norm": 0.678129255771637, "learning_rate": 2.4877945041214364e-05, "loss": 0.0825, "step": 16547 }, { "epoch": 0.29305741874643043, "grad_norm": 0.9914253354072571, "learning_rate": 2.4877297553487454e-05, "loss": 0.0824, "step": 16548 }, { "epoch": 0.29307512828345883, "grad_norm": 0.42465198040008545, "learning_rate": 2.487665003326524e-05, "loss": 0.0998, "step": 16549 }, { "epoch": 0.2930928378204873, "grad_norm": 0.6580094695091248, "learning_rate": 2.487600248054984e-05, "loss": 0.071, "step": 16550 }, { "epoch": 0.2931105473575157, "grad_norm": 0.6505588889122009, "learning_rate": 2.4875354895343397e-05, "loss": 0.0846, "step": 16551 }, { "epoch": 0.29312825689454414, "grad_norm": 0.7427982687950134, "learning_rate": 2.487470727764803e-05, "loss": 0.0675, "step": 16552 }, { "epoch": 0.29314596643157254, "grad_norm": 0.5525614619255066, "learning_rate": 2.4874059627465877e-05, "loss": 0.0834, "step": 16553 }, { "epoch": 0.293163675968601, "grad_norm": 0.7161643505096436, "learning_rate": 2.4873411944799064e-05, "loss": 0.0781, "step": 16554 }, { "epoch": 0.29318138550562944, "grad_norm": 0.8483414053916931, "learning_rate": 2.4872764229649726e-05, "loss": 0.0688, "step": 16555 }, { "epoch": 0.29319909504265784, "grad_norm": 0.853036105632782, "learning_rate": 2.4872116482019988e-05, "loss": 0.0931, "step": 16556 }, { "epoch": 0.2932168045796863, "grad_norm": 1.041164755821228, "learning_rate": 2.4871468701911995e-05, "loss": 0.0828, "step": 16557 }, { "epoch": 0.2932345141167147, "grad_norm": 1.1540114879608154, "learning_rate": 2.487082088932786e-05, "loss": 0.0982, "step": 16558 }, { "epoch": 0.29325222365374315, "grad_norm": 0.5965383052825928, "learning_rate": 2.4870173044269727e-05, "loss": 0.0722, "step": 16559 }, { "epoch": 0.29326993319077155, "grad_norm": 0.9032995700836182, "learning_rate": 2.486952516673972e-05, "loss": 0.0911, "step": 16560 }, { "epoch": 0.2932876427278, "grad_norm": 0.8412985801696777, "learning_rate": 2.4868877256739978e-05, "loss": 0.0954, "step": 16561 }, { "epoch": 0.2933053522648284, "grad_norm": 0.7961260676383972, "learning_rate": 2.4868229314272625e-05, "loss": 0.0874, "step": 16562 }, { "epoch": 0.29332306180185685, "grad_norm": 1.6988067626953125, "learning_rate": 2.4867581339339793e-05, "loss": 0.0965, "step": 16563 }, { "epoch": 0.29334077133888525, "grad_norm": 1.2954037189483643, "learning_rate": 2.486693333194362e-05, "loss": 0.105, "step": 16564 }, { "epoch": 0.2933584808759137, "grad_norm": 1.1275612115859985, "learning_rate": 2.4866285292086235e-05, "loss": 0.0768, "step": 16565 }, { "epoch": 0.2933761904129421, "grad_norm": 1.4771965742111206, "learning_rate": 2.486563721976977e-05, "loss": 0.1289, "step": 16566 }, { "epoch": 0.29339389994997056, "grad_norm": 3.3129618167877197, "learning_rate": 2.486498911499635e-05, "loss": 0.1165, "step": 16567 }, { "epoch": 0.29341160948699896, "grad_norm": 0.9359400272369385, "learning_rate": 2.486434097776812e-05, "loss": 0.1045, "step": 16568 }, { "epoch": 0.2934293190240274, "grad_norm": 1.0031081438064575, "learning_rate": 2.4863692808087204e-05, "loss": 0.0927, "step": 16569 }, { "epoch": 0.29344702856105587, "grad_norm": 1.1889206171035767, "learning_rate": 2.4863044605955735e-05, "loss": 0.0781, "step": 16570 }, { "epoch": 0.29346473809808427, "grad_norm": 0.9272879958152771, "learning_rate": 2.486239637137585e-05, "loss": 0.091, "step": 16571 }, { "epoch": 0.2934824476351127, "grad_norm": 0.5628670454025269, "learning_rate": 2.4861748104349672e-05, "loss": 0.0884, "step": 16572 }, { "epoch": 0.2935001571721411, "grad_norm": 0.8978553414344788, "learning_rate": 2.486109980487935e-05, "loss": 0.1, "step": 16573 }, { "epoch": 0.2935178667091696, "grad_norm": 0.49029287695884705, "learning_rate": 2.4860451472967e-05, "loss": 0.0733, "step": 16574 }, { "epoch": 0.29353557624619797, "grad_norm": 0.8597450852394104, "learning_rate": 2.485980310861476e-05, "loss": 0.0755, "step": 16575 }, { "epoch": 0.2935532857832264, "grad_norm": 0.7198079824447632, "learning_rate": 2.4859154711824773e-05, "loss": 0.1016, "step": 16576 }, { "epoch": 0.2935709953202548, "grad_norm": 0.8002263307571411, "learning_rate": 2.485850628259916e-05, "loss": 0.0772, "step": 16577 }, { "epoch": 0.2935887048572833, "grad_norm": 0.996969997882843, "learning_rate": 2.4857857820940057e-05, "loss": 0.0838, "step": 16578 }, { "epoch": 0.2936064143943117, "grad_norm": 0.8998783826828003, "learning_rate": 2.4857209326849595e-05, "loss": 0.0712, "step": 16579 }, { "epoch": 0.29362412393134013, "grad_norm": 1.2761890888214111, "learning_rate": 2.485656080032992e-05, "loss": 0.1368, "step": 16580 }, { "epoch": 0.29364183346836853, "grad_norm": 0.865347683429718, "learning_rate": 2.485591224138315e-05, "loss": 0.0794, "step": 16581 }, { "epoch": 0.293659543005397, "grad_norm": 1.0140571594238281, "learning_rate": 2.485526365001143e-05, "loss": 0.0667, "step": 16582 }, { "epoch": 0.2936772525424254, "grad_norm": 1.35404372215271, "learning_rate": 2.4854615026216888e-05, "loss": 0.1388, "step": 16583 }, { "epoch": 0.29369496207945384, "grad_norm": 0.5724350810050964, "learning_rate": 2.485396637000166e-05, "loss": 0.0464, "step": 16584 }, { "epoch": 0.2937126716164823, "grad_norm": 0.7709205746650696, "learning_rate": 2.485331768136788e-05, "loss": 0.0799, "step": 16585 }, { "epoch": 0.2937303811535107, "grad_norm": 0.8400166034698486, "learning_rate": 2.4852668960317677e-05, "loss": 0.126, "step": 16586 }, { "epoch": 0.29374809069053914, "grad_norm": 3.1825573444366455, "learning_rate": 2.4852020206853194e-05, "loss": 0.1175, "step": 16587 }, { "epoch": 0.29376580022756754, "grad_norm": 1.152357816696167, "learning_rate": 2.485137142097656e-05, "loss": 0.1331, "step": 16588 }, { "epoch": 0.293783509764596, "grad_norm": 0.7219162583351135, "learning_rate": 2.485072260268991e-05, "loss": 0.1236, "step": 16589 }, { "epoch": 0.2938012193016244, "grad_norm": 1.0499157905578613, "learning_rate": 2.485007375199538e-05, "loss": 0.1161, "step": 16590 }, { "epoch": 0.29381892883865285, "grad_norm": 1.6101493835449219, "learning_rate": 2.4849424868895103e-05, "loss": 0.1154, "step": 16591 }, { "epoch": 0.29383663837568125, "grad_norm": 0.8292871117591858, "learning_rate": 2.4848775953391215e-05, "loss": 0.0842, "step": 16592 }, { "epoch": 0.2938543479127097, "grad_norm": 0.5770927667617798, "learning_rate": 2.484812700548585e-05, "loss": 0.0633, "step": 16593 }, { "epoch": 0.2938720574497381, "grad_norm": 1.8524099588394165, "learning_rate": 2.4847478025181143e-05, "loss": 0.1092, "step": 16594 }, { "epoch": 0.29388976698676655, "grad_norm": 0.7611445784568787, "learning_rate": 2.4846829012479228e-05, "loss": 0.085, "step": 16595 }, { "epoch": 0.29390747652379495, "grad_norm": 0.7865673899650574, "learning_rate": 2.4846179967382247e-05, "loss": 0.0884, "step": 16596 }, { "epoch": 0.2939251860608234, "grad_norm": 0.6596060991287231, "learning_rate": 2.4845530889892323e-05, "loss": 0.0917, "step": 16597 }, { "epoch": 0.2939428955978518, "grad_norm": 1.192232608795166, "learning_rate": 2.4844881780011595e-05, "loss": 0.106, "step": 16598 }, { "epoch": 0.29396060513488026, "grad_norm": 0.9432225823402405, "learning_rate": 2.484423263774221e-05, "loss": 0.1026, "step": 16599 }, { "epoch": 0.2939783146719087, "grad_norm": 1.362683892250061, "learning_rate": 2.4843583463086296e-05, "loss": 0.0904, "step": 16600 }, { "epoch": 0.2939960242089371, "grad_norm": 0.8898209929466248, "learning_rate": 2.4842934256045983e-05, "loss": 0.0906, "step": 16601 }, { "epoch": 0.29401373374596557, "grad_norm": 0.5881742835044861, "learning_rate": 2.484228501662341e-05, "loss": 0.0527, "step": 16602 }, { "epoch": 0.29403144328299397, "grad_norm": 0.7407026886940002, "learning_rate": 2.4841635744820724e-05, "loss": 0.1021, "step": 16603 }, { "epoch": 0.2940491528200224, "grad_norm": 0.9420667290687561, "learning_rate": 2.484098644064004e-05, "loss": 0.1172, "step": 16604 }, { "epoch": 0.2940668623570508, "grad_norm": 0.6259368062019348, "learning_rate": 2.4840337104083518e-05, "loss": 0.1177, "step": 16605 }, { "epoch": 0.29408457189407927, "grad_norm": 0.7623085975646973, "learning_rate": 2.4839687735153274e-05, "loss": 0.1095, "step": 16606 }, { "epoch": 0.29410228143110767, "grad_norm": 0.6443896293640137, "learning_rate": 2.4839038333851458e-05, "loss": 0.0848, "step": 16607 }, { "epoch": 0.2941199909681361, "grad_norm": 0.8048872351646423, "learning_rate": 2.4838388900180195e-05, "loss": 0.1147, "step": 16608 }, { "epoch": 0.2941377005051645, "grad_norm": 0.9971546530723572, "learning_rate": 2.483773943414163e-05, "loss": 0.1139, "step": 16609 }, { "epoch": 0.294155410042193, "grad_norm": 0.6190983653068542, "learning_rate": 2.48370899357379e-05, "loss": 0.0727, "step": 16610 }, { "epoch": 0.2941731195792214, "grad_norm": 0.8413740396499634, "learning_rate": 2.4836440404971136e-05, "loss": 0.0966, "step": 16611 }, { "epoch": 0.29419082911624983, "grad_norm": 0.7281327843666077, "learning_rate": 2.4835790841843474e-05, "loss": 0.1257, "step": 16612 }, { "epoch": 0.2942085386532783, "grad_norm": 0.6788150072097778, "learning_rate": 2.483514124635706e-05, "loss": 0.0805, "step": 16613 }, { "epoch": 0.2942262481903067, "grad_norm": 0.856404721736908, "learning_rate": 2.483449161851403e-05, "loss": 0.1261, "step": 16614 }, { "epoch": 0.29424395772733514, "grad_norm": 0.9728647470474243, "learning_rate": 2.4833841958316513e-05, "loss": 0.0915, "step": 16615 }, { "epoch": 0.29426166726436354, "grad_norm": 0.6830002069473267, "learning_rate": 2.483319226576665e-05, "loss": 0.0755, "step": 16616 }, { "epoch": 0.294279376801392, "grad_norm": 0.7103784084320068, "learning_rate": 2.483254254086658e-05, "loss": 0.0643, "step": 16617 }, { "epoch": 0.2942970863384204, "grad_norm": 1.3981695175170898, "learning_rate": 2.4831892783618437e-05, "loss": 0.1119, "step": 16618 }, { "epoch": 0.29431479587544884, "grad_norm": 0.8135954141616821, "learning_rate": 2.4831242994024367e-05, "loss": 0.0932, "step": 16619 }, { "epoch": 0.29433250541247724, "grad_norm": 1.1187840700149536, "learning_rate": 2.4830593172086497e-05, "loss": 0.0988, "step": 16620 }, { "epoch": 0.2943502149495057, "grad_norm": 0.6545528769493103, "learning_rate": 2.482994331780697e-05, "loss": 0.0736, "step": 16621 }, { "epoch": 0.2943679244865341, "grad_norm": 0.669624388217926, "learning_rate": 2.4829293431187926e-05, "loss": 0.0721, "step": 16622 }, { "epoch": 0.29438563402356255, "grad_norm": 0.5357493162155151, "learning_rate": 2.48286435122315e-05, "loss": 0.0786, "step": 16623 }, { "epoch": 0.29440334356059095, "grad_norm": 1.3495798110961914, "learning_rate": 2.4827993560939832e-05, "loss": 0.116, "step": 16624 }, { "epoch": 0.2944210530976194, "grad_norm": 0.8201170563697815, "learning_rate": 2.4827343577315054e-05, "loss": 0.0926, "step": 16625 }, { "epoch": 0.2944387626346478, "grad_norm": 0.6936874985694885, "learning_rate": 2.4826693561359314e-05, "loss": 0.0888, "step": 16626 }, { "epoch": 0.29445647217167625, "grad_norm": 0.9186453819274902, "learning_rate": 2.4826043513074748e-05, "loss": 0.0993, "step": 16627 }, { "epoch": 0.2944741817087047, "grad_norm": 0.48829272389411926, "learning_rate": 2.4825393432463488e-05, "loss": 0.067, "step": 16628 }, { "epoch": 0.2944918912457331, "grad_norm": 0.6750558018684387, "learning_rate": 2.482474331952768e-05, "loss": 0.0916, "step": 16629 }, { "epoch": 0.29450960078276156, "grad_norm": 1.079681158065796, "learning_rate": 2.4824093174269462e-05, "loss": 0.0867, "step": 16630 }, { "epoch": 0.29452731031978996, "grad_norm": 0.5161761045455933, "learning_rate": 2.482344299669097e-05, "loss": 0.0789, "step": 16631 }, { "epoch": 0.2945450198568184, "grad_norm": 0.912160336971283, "learning_rate": 2.482279278679434e-05, "loss": 0.0718, "step": 16632 }, { "epoch": 0.2945627293938468, "grad_norm": 1.0228676795959473, "learning_rate": 2.4822142544581716e-05, "loss": 0.0932, "step": 16633 }, { "epoch": 0.29458043893087527, "grad_norm": 0.6694746017456055, "learning_rate": 2.482149227005524e-05, "loss": 0.1112, "step": 16634 }, { "epoch": 0.29459814846790366, "grad_norm": 0.7967168688774109, "learning_rate": 2.4820841963217048e-05, "loss": 0.1072, "step": 16635 }, { "epoch": 0.2946158580049321, "grad_norm": 0.8929893970489502, "learning_rate": 2.4820191624069276e-05, "loss": 0.0628, "step": 16636 }, { "epoch": 0.2946335675419605, "grad_norm": 1.192967414855957, "learning_rate": 2.4819541252614072e-05, "loss": 0.0905, "step": 16637 }, { "epoch": 0.29465127707898897, "grad_norm": 0.7229781746864319, "learning_rate": 2.4818890848853563e-05, "loss": 0.1075, "step": 16638 }, { "epoch": 0.29466898661601737, "grad_norm": 0.740174412727356, "learning_rate": 2.48182404127899e-05, "loss": 0.0707, "step": 16639 }, { "epoch": 0.2946866961530458, "grad_norm": 0.6556029319763184, "learning_rate": 2.4817589944425223e-05, "loss": 0.0858, "step": 16640 }, { "epoch": 0.2947044056900742, "grad_norm": 0.7167291641235352, "learning_rate": 2.4816939443761662e-05, "loss": 0.122, "step": 16641 }, { "epoch": 0.2947221152271027, "grad_norm": 1.0793734788894653, "learning_rate": 2.4816288910801367e-05, "loss": 0.1196, "step": 16642 }, { "epoch": 0.29473982476413113, "grad_norm": 0.7887230515480042, "learning_rate": 2.4815638345546475e-05, "loss": 0.1072, "step": 16643 }, { "epoch": 0.29475753430115953, "grad_norm": 0.8898013830184937, "learning_rate": 2.4814987747999122e-05, "loss": 0.1415, "step": 16644 }, { "epoch": 0.294775243838188, "grad_norm": 0.40401023626327515, "learning_rate": 2.481433711816145e-05, "loss": 0.0805, "step": 16645 }, { "epoch": 0.2947929533752164, "grad_norm": 0.7451995611190796, "learning_rate": 2.4813686456035608e-05, "loss": 0.0802, "step": 16646 }, { "epoch": 0.29481066291224484, "grad_norm": 0.6987020373344421, "learning_rate": 2.481303576162373e-05, "loss": 0.0972, "step": 16647 }, { "epoch": 0.29482837244927323, "grad_norm": 0.9472419619560242, "learning_rate": 2.4812385034927955e-05, "loss": 0.1339, "step": 16648 }, { "epoch": 0.2948460819863017, "grad_norm": 0.5301985144615173, "learning_rate": 2.4811734275950423e-05, "loss": 0.1183, "step": 16649 }, { "epoch": 0.2948637915233301, "grad_norm": 1.315861701965332, "learning_rate": 2.4811083484693276e-05, "loss": 0.1112, "step": 16650 }, { "epoch": 0.29488150106035854, "grad_norm": 0.7829118967056274, "learning_rate": 2.481043266115866e-05, "loss": 0.1012, "step": 16651 }, { "epoch": 0.29489921059738694, "grad_norm": 0.9130457639694214, "learning_rate": 2.4809781805348717e-05, "loss": 0.1061, "step": 16652 }, { "epoch": 0.2949169201344154, "grad_norm": 0.794324517250061, "learning_rate": 2.480913091726558e-05, "loss": 0.0856, "step": 16653 }, { "epoch": 0.2949346296714438, "grad_norm": 0.8917646408081055, "learning_rate": 2.4808479996911393e-05, "loss": 0.0966, "step": 16654 }, { "epoch": 0.29495233920847225, "grad_norm": 1.5354875326156616, "learning_rate": 2.48078290442883e-05, "loss": 0.101, "step": 16655 }, { "epoch": 0.29497004874550065, "grad_norm": 0.8808669447898865, "learning_rate": 2.4807178059398445e-05, "loss": 0.0798, "step": 16656 }, { "epoch": 0.2949877582825291, "grad_norm": 0.6650671362876892, "learning_rate": 2.480652704224396e-05, "loss": 0.1199, "step": 16657 }, { "epoch": 0.29500546781955755, "grad_norm": 1.022515058517456, "learning_rate": 2.4805875992826996e-05, "loss": 0.1203, "step": 16658 }, { "epoch": 0.29502317735658595, "grad_norm": 0.7849681973457336, "learning_rate": 2.4805224911149692e-05, "loss": 0.1298, "step": 16659 }, { "epoch": 0.2950408868936144, "grad_norm": 0.7826765179634094, "learning_rate": 2.480457379721419e-05, "loss": 0.0702, "step": 16660 }, { "epoch": 0.2950585964306428, "grad_norm": 0.616297721862793, "learning_rate": 2.480392265102263e-05, "loss": 0.053, "step": 16661 }, { "epoch": 0.29507630596767126, "grad_norm": 0.6191213130950928, "learning_rate": 2.4803271472577158e-05, "loss": 0.0989, "step": 16662 }, { "epoch": 0.29509401550469966, "grad_norm": 0.6926201581954956, "learning_rate": 2.4802620261879914e-05, "loss": 0.1157, "step": 16663 }, { "epoch": 0.2951117250417281, "grad_norm": 1.0327260494232178, "learning_rate": 2.4801969018933042e-05, "loss": 0.1211, "step": 16664 }, { "epoch": 0.2951294345787565, "grad_norm": 0.8726533055305481, "learning_rate": 2.480131774373868e-05, "loss": 0.0988, "step": 16665 }, { "epoch": 0.29514714411578497, "grad_norm": 0.7625252604484558, "learning_rate": 2.4800666436298975e-05, "loss": 0.1061, "step": 16666 }, { "epoch": 0.29516485365281336, "grad_norm": 0.9385321736335754, "learning_rate": 2.4800015096616072e-05, "loss": 0.1054, "step": 16667 }, { "epoch": 0.2951825631898418, "grad_norm": 1.2709745168685913, "learning_rate": 2.4799363724692107e-05, "loss": 0.1393, "step": 16668 }, { "epoch": 0.2952002727268702, "grad_norm": 0.8758602142333984, "learning_rate": 2.4798712320529227e-05, "loss": 0.0983, "step": 16669 }, { "epoch": 0.29521798226389867, "grad_norm": 0.7765129804611206, "learning_rate": 2.4798060884129578e-05, "loss": 0.0942, "step": 16670 }, { "epoch": 0.29523569180092707, "grad_norm": 0.5742915868759155, "learning_rate": 2.4797409415495294e-05, "loss": 0.0678, "step": 16671 }, { "epoch": 0.2952534013379555, "grad_norm": 0.8619491457939148, "learning_rate": 2.479675791462853e-05, "loss": 0.0931, "step": 16672 }, { "epoch": 0.295271110874984, "grad_norm": 1.0850880146026611, "learning_rate": 2.479610638153142e-05, "loss": 0.0979, "step": 16673 }, { "epoch": 0.2952888204120124, "grad_norm": 1.0540106296539307, "learning_rate": 2.4795454816206112e-05, "loss": 0.098, "step": 16674 }, { "epoch": 0.29530652994904083, "grad_norm": 0.6268549561500549, "learning_rate": 2.4794803218654742e-05, "loss": 0.1304, "step": 16675 }, { "epoch": 0.29532423948606923, "grad_norm": 0.34981417655944824, "learning_rate": 2.479415158887947e-05, "loss": 0.1044, "step": 16676 }, { "epoch": 0.2953419490230977, "grad_norm": 0.6612764000892639, "learning_rate": 2.4793499926882424e-05, "loss": 0.0648, "step": 16677 }, { "epoch": 0.2953596585601261, "grad_norm": 0.8174286484718323, "learning_rate": 2.4792848232665753e-05, "loss": 0.0956, "step": 16678 }, { "epoch": 0.29537736809715454, "grad_norm": 0.7249032258987427, "learning_rate": 2.4792196506231608e-05, "loss": 0.0801, "step": 16679 }, { "epoch": 0.29539507763418293, "grad_norm": 0.7210264205932617, "learning_rate": 2.479154474758212e-05, "loss": 0.0938, "step": 16680 }, { "epoch": 0.2954127871712114, "grad_norm": 0.7932557463645935, "learning_rate": 2.4790892956719443e-05, "loss": 0.0961, "step": 16681 }, { "epoch": 0.2954304967082398, "grad_norm": 0.8238364458084106, "learning_rate": 2.479024113364572e-05, "loss": 0.0899, "step": 16682 }, { "epoch": 0.29544820624526824, "grad_norm": 0.8222406506538391, "learning_rate": 2.478958927836309e-05, "loss": 0.111, "step": 16683 }, { "epoch": 0.29546591578229664, "grad_norm": 1.2454630136489868, "learning_rate": 2.4788937390873705e-05, "loss": 0.1147, "step": 16684 }, { "epoch": 0.2954836253193251, "grad_norm": 0.6310842037200928, "learning_rate": 2.4788285471179702e-05, "loss": 0.096, "step": 16685 }, { "epoch": 0.2955013348563535, "grad_norm": 0.8114415407180786, "learning_rate": 2.4787633519283235e-05, "loss": 0.0731, "step": 16686 }, { "epoch": 0.29551904439338195, "grad_norm": 0.807617723941803, "learning_rate": 2.4786981535186444e-05, "loss": 0.127, "step": 16687 }, { "epoch": 0.2955367539304104, "grad_norm": 0.8497489094734192, "learning_rate": 2.4786329518891468e-05, "loss": 0.0728, "step": 16688 }, { "epoch": 0.2955544634674388, "grad_norm": 0.5405195951461792, "learning_rate": 2.478567747040046e-05, "loss": 0.1301, "step": 16689 }, { "epoch": 0.29557217300446725, "grad_norm": 0.9628820419311523, "learning_rate": 2.4785025389715565e-05, "loss": 0.0974, "step": 16690 }, { "epoch": 0.29558988254149565, "grad_norm": 1.0444649457931519, "learning_rate": 2.4784373276838925e-05, "loss": 0.1553, "step": 16691 }, { "epoch": 0.2956075920785241, "grad_norm": 0.8337594866752625, "learning_rate": 2.4783721131772687e-05, "loss": 0.0838, "step": 16692 }, { "epoch": 0.2956253016155525, "grad_norm": 1.0041396617889404, "learning_rate": 2.4783068954518998e-05, "loss": 0.1093, "step": 16693 }, { "epoch": 0.29564301115258096, "grad_norm": 0.6413031220436096, "learning_rate": 2.4782416745079998e-05, "loss": 0.0879, "step": 16694 }, { "epoch": 0.29566072068960936, "grad_norm": 1.0989482402801514, "learning_rate": 2.4781764503457836e-05, "loss": 0.1312, "step": 16695 }, { "epoch": 0.2956784302266378, "grad_norm": 0.9201775789260864, "learning_rate": 2.478111222965466e-05, "loss": 0.0883, "step": 16696 }, { "epoch": 0.2956961397636662, "grad_norm": 0.9543410539627075, "learning_rate": 2.4780459923672616e-05, "loss": 0.0898, "step": 16697 }, { "epoch": 0.29571384930069466, "grad_norm": 0.76337069272995, "learning_rate": 2.477980758551384e-05, "loss": 0.1032, "step": 16698 }, { "epoch": 0.29573155883772306, "grad_norm": 0.8978572487831116, "learning_rate": 2.47791552151805e-05, "loss": 0.0886, "step": 16699 }, { "epoch": 0.2957492683747515, "grad_norm": 1.0158923864364624, "learning_rate": 2.4778502812674715e-05, "loss": 0.1064, "step": 16700 }, { "epoch": 0.2957669779117799, "grad_norm": 0.8886321783065796, "learning_rate": 2.477785037799865e-05, "loss": 0.1044, "step": 16701 }, { "epoch": 0.29578468744880837, "grad_norm": 0.8909786939620972, "learning_rate": 2.4777197911154448e-05, "loss": 0.0705, "step": 16702 }, { "epoch": 0.2958023969858368, "grad_norm": 0.6349875926971436, "learning_rate": 2.4776545412144252e-05, "loss": 0.0923, "step": 16703 }, { "epoch": 0.2958201065228652, "grad_norm": 0.8551848530769348, "learning_rate": 2.477589288097021e-05, "loss": 0.098, "step": 16704 }, { "epoch": 0.2958378160598937, "grad_norm": 0.591144859790802, "learning_rate": 2.4775240317634467e-05, "loss": 0.0965, "step": 16705 }, { "epoch": 0.2958555255969221, "grad_norm": 0.7296690344810486, "learning_rate": 2.477458772213918e-05, "loss": 0.0952, "step": 16706 }, { "epoch": 0.29587323513395053, "grad_norm": 0.9363442659378052, "learning_rate": 2.477393509448648e-05, "loss": 0.0712, "step": 16707 }, { "epoch": 0.29589094467097893, "grad_norm": 2.028829336166382, "learning_rate": 2.4773282434678524e-05, "loss": 0.0672, "step": 16708 }, { "epoch": 0.2959086542080074, "grad_norm": 0.5516918301582336, "learning_rate": 2.477262974271746e-05, "loss": 0.0915, "step": 16709 }, { "epoch": 0.2959263637450358, "grad_norm": 0.9068558812141418, "learning_rate": 2.477197701860543e-05, "loss": 0.0961, "step": 16710 }, { "epoch": 0.29594407328206424, "grad_norm": 1.2869346141815186, "learning_rate": 2.4771324262344586e-05, "loss": 0.0976, "step": 16711 }, { "epoch": 0.29596178281909263, "grad_norm": 0.8619130253791809, "learning_rate": 2.477067147393707e-05, "loss": 0.0657, "step": 16712 }, { "epoch": 0.2959794923561211, "grad_norm": 1.1571757793426514, "learning_rate": 2.4770018653385038e-05, "loss": 0.1001, "step": 16713 }, { "epoch": 0.2959972018931495, "grad_norm": 0.5399033427238464, "learning_rate": 2.4769365800690627e-05, "loss": 0.1027, "step": 16714 }, { "epoch": 0.29601491143017794, "grad_norm": 0.7084410190582275, "learning_rate": 2.4768712915855995e-05, "loss": 0.0688, "step": 16715 }, { "epoch": 0.29603262096720634, "grad_norm": 0.9120205044746399, "learning_rate": 2.4768059998883287e-05, "loss": 0.0831, "step": 16716 }, { "epoch": 0.2960503305042348, "grad_norm": 0.6417859792709351, "learning_rate": 2.4767407049774643e-05, "loss": 0.0682, "step": 16717 }, { "epoch": 0.29606804004126325, "grad_norm": 1.2180453538894653, "learning_rate": 2.476675406853223e-05, "loss": 0.1467, "step": 16718 }, { "epoch": 0.29608574957829165, "grad_norm": 0.7144485712051392, "learning_rate": 2.4766101055158168e-05, "loss": 0.1016, "step": 16719 }, { "epoch": 0.2961034591153201, "grad_norm": 0.7280489802360535, "learning_rate": 2.4765448009654634e-05, "loss": 0.0778, "step": 16720 }, { "epoch": 0.2961211686523485, "grad_norm": 0.9046381115913391, "learning_rate": 2.4764794932023756e-05, "loss": 0.075, "step": 16721 }, { "epoch": 0.29613887818937695, "grad_norm": 0.8842294216156006, "learning_rate": 2.4764141822267694e-05, "loss": 0.0778, "step": 16722 }, { "epoch": 0.29615658772640535, "grad_norm": 0.9329274296760559, "learning_rate": 2.4763488680388588e-05, "loss": 0.0918, "step": 16723 }, { "epoch": 0.2961742972634338, "grad_norm": 0.6058604717254639, "learning_rate": 2.47628355063886e-05, "loss": 0.0868, "step": 16724 }, { "epoch": 0.2961920068004622, "grad_norm": 0.989045262336731, "learning_rate": 2.4762182300269865e-05, "loss": 0.093, "step": 16725 }, { "epoch": 0.29620971633749066, "grad_norm": 0.8500211834907532, "learning_rate": 2.476152906203454e-05, "loss": 0.1034, "step": 16726 }, { "epoch": 0.29622742587451906, "grad_norm": 0.8994197845458984, "learning_rate": 2.4760875791684772e-05, "loss": 0.1139, "step": 16727 }, { "epoch": 0.2962451354115475, "grad_norm": 0.7910695672035217, "learning_rate": 2.4760222489222707e-05, "loss": 0.106, "step": 16728 }, { "epoch": 0.2962628449485759, "grad_norm": 0.9634017944335938, "learning_rate": 2.47595691546505e-05, "loss": 0.1543, "step": 16729 }, { "epoch": 0.29628055448560436, "grad_norm": 0.6358320713043213, "learning_rate": 2.4758915787970294e-05, "loss": 0.073, "step": 16730 }, { "epoch": 0.29629826402263276, "grad_norm": 0.9986499547958374, "learning_rate": 2.4758262389184245e-05, "loss": 0.1106, "step": 16731 }, { "epoch": 0.2963159735596612, "grad_norm": 0.7999340891838074, "learning_rate": 2.4757608958294503e-05, "loss": 0.0675, "step": 16732 }, { "epoch": 0.29633368309668967, "grad_norm": 0.7328799962997437, "learning_rate": 2.475695549530321e-05, "loss": 0.1012, "step": 16733 }, { "epoch": 0.29635139263371807, "grad_norm": 0.7449361681938171, "learning_rate": 2.4756302000212518e-05, "loss": 0.0652, "step": 16734 }, { "epoch": 0.2963691021707465, "grad_norm": 0.7933021187782288, "learning_rate": 2.4755648473024582e-05, "loss": 0.0992, "step": 16735 }, { "epoch": 0.2963868117077749, "grad_norm": 0.5946691036224365, "learning_rate": 2.4754994913741552e-05, "loss": 0.0784, "step": 16736 }, { "epoch": 0.2964045212448034, "grad_norm": 0.8274561762809753, "learning_rate": 2.475434132236557e-05, "loss": 0.0954, "step": 16737 }, { "epoch": 0.2964222307818318, "grad_norm": 1.0276190042495728, "learning_rate": 2.4753687698898795e-05, "loss": 0.0992, "step": 16738 }, { "epoch": 0.29643994031886023, "grad_norm": 0.7183120250701904, "learning_rate": 2.4753034043343372e-05, "loss": 0.0746, "step": 16739 }, { "epoch": 0.2964576498558886, "grad_norm": 0.6506497263908386, "learning_rate": 2.475238035570146e-05, "loss": 0.1178, "step": 16740 }, { "epoch": 0.2964753593929171, "grad_norm": 0.4365912675857544, "learning_rate": 2.4751726635975197e-05, "loss": 0.0757, "step": 16741 }, { "epoch": 0.2964930689299455, "grad_norm": 1.2279350757598877, "learning_rate": 2.475107288416674e-05, "loss": 0.0879, "step": 16742 }, { "epoch": 0.29651077846697393, "grad_norm": 0.9869379997253418, "learning_rate": 2.475041910027824e-05, "loss": 0.0701, "step": 16743 }, { "epoch": 0.29652848800400233, "grad_norm": 0.6705707907676697, "learning_rate": 2.4749765284311846e-05, "loss": 0.0842, "step": 16744 }, { "epoch": 0.2965461975410308, "grad_norm": 0.8081321716308594, "learning_rate": 2.474911143626971e-05, "loss": 0.1167, "step": 16745 }, { "epoch": 0.2965639070780592, "grad_norm": 0.7203917503356934, "learning_rate": 2.4748457556153987e-05, "loss": 0.0772, "step": 16746 }, { "epoch": 0.29658161661508764, "grad_norm": 0.7082698941230774, "learning_rate": 2.4747803643966826e-05, "loss": 0.0638, "step": 16747 }, { "epoch": 0.2965993261521161, "grad_norm": 1.1861834526062012, "learning_rate": 2.4747149699710373e-05, "loss": 0.0939, "step": 16748 }, { "epoch": 0.2966170356891445, "grad_norm": 0.8311207294464111, "learning_rate": 2.4746495723386785e-05, "loss": 0.0819, "step": 16749 }, { "epoch": 0.29663474522617295, "grad_norm": 0.6203067898750305, "learning_rate": 2.4745841714998212e-05, "loss": 0.057, "step": 16750 }, { "epoch": 0.29665245476320135, "grad_norm": 0.8545131683349609, "learning_rate": 2.4745187674546803e-05, "loss": 0.0821, "step": 16751 }, { "epoch": 0.2966701643002298, "grad_norm": 1.6342506408691406, "learning_rate": 2.4744533602034715e-05, "loss": 0.0788, "step": 16752 }, { "epoch": 0.2966878738372582, "grad_norm": 0.7124131917953491, "learning_rate": 2.4743879497464094e-05, "loss": 0.0615, "step": 16753 }, { "epoch": 0.29670558337428665, "grad_norm": 0.9570850729942322, "learning_rate": 2.4743225360837098e-05, "loss": 0.0731, "step": 16754 }, { "epoch": 0.29672329291131505, "grad_norm": 0.573344349861145, "learning_rate": 2.4742571192155875e-05, "loss": 0.1309, "step": 16755 }, { "epoch": 0.2967410024483435, "grad_norm": 0.6282833218574524, "learning_rate": 2.4741916991422576e-05, "loss": 0.0843, "step": 16756 }, { "epoch": 0.2967587119853719, "grad_norm": 0.6986618041992188, "learning_rate": 2.474126275863936e-05, "loss": 0.0826, "step": 16757 }, { "epoch": 0.29677642152240036, "grad_norm": 0.5852739214897156, "learning_rate": 2.474060849380837e-05, "loss": 0.057, "step": 16758 }, { "epoch": 0.29679413105942876, "grad_norm": 0.5567772388458252, "learning_rate": 2.473995419693177e-05, "loss": 0.0839, "step": 16759 }, { "epoch": 0.2968118405964572, "grad_norm": 1.0303113460540771, "learning_rate": 2.4739299868011696e-05, "loss": 0.087, "step": 16760 }, { "epoch": 0.2968295501334856, "grad_norm": 0.6316903233528137, "learning_rate": 2.4738645507050318e-05, "loss": 0.1008, "step": 16761 }, { "epoch": 0.29684725967051406, "grad_norm": 0.7254623174667358, "learning_rate": 2.473799111404978e-05, "loss": 0.0952, "step": 16762 }, { "epoch": 0.2968649692075425, "grad_norm": 0.7298446297645569, "learning_rate": 2.4737336689012235e-05, "loss": 0.0831, "step": 16763 }, { "epoch": 0.2968826787445709, "grad_norm": 0.8470479846000671, "learning_rate": 2.4736682231939834e-05, "loss": 0.0906, "step": 16764 }, { "epoch": 0.29690038828159937, "grad_norm": 0.6319502592086792, "learning_rate": 2.473602774283474e-05, "loss": 0.0622, "step": 16765 }, { "epoch": 0.29691809781862777, "grad_norm": 0.715600848197937, "learning_rate": 2.47353732216991e-05, "loss": 0.1143, "step": 16766 }, { "epoch": 0.2969358073556562, "grad_norm": 0.41088807582855225, "learning_rate": 2.4734718668535056e-05, "loss": 0.0827, "step": 16767 }, { "epoch": 0.2969535168926846, "grad_norm": 0.8211648464202881, "learning_rate": 2.4734064083344785e-05, "loss": 0.0769, "step": 16768 }, { "epoch": 0.2969712264297131, "grad_norm": 0.7789930701255798, "learning_rate": 2.473340946613042e-05, "loss": 0.11, "step": 16769 }, { "epoch": 0.2969889359667415, "grad_norm": 0.8663400411605835, "learning_rate": 2.4732754816894125e-05, "loss": 0.0754, "step": 16770 }, { "epoch": 0.29700664550376993, "grad_norm": 1.6093789339065552, "learning_rate": 2.473210013563805e-05, "loss": 0.104, "step": 16771 }, { "epoch": 0.2970243550407983, "grad_norm": 0.9803196787834167, "learning_rate": 2.4731445422364348e-05, "loss": 0.069, "step": 16772 }, { "epoch": 0.2970420645778268, "grad_norm": 1.0850127935409546, "learning_rate": 2.473079067707518e-05, "loss": 0.096, "step": 16773 }, { "epoch": 0.2970597741148552, "grad_norm": 0.6678791642189026, "learning_rate": 2.473013589977269e-05, "loss": 0.079, "step": 16774 }, { "epoch": 0.29707748365188363, "grad_norm": 0.8929934501647949, "learning_rate": 2.472948109045904e-05, "loss": 0.1129, "step": 16775 }, { "epoch": 0.29709519318891203, "grad_norm": 0.708487868309021, "learning_rate": 2.472882624913638e-05, "loss": 0.137, "step": 16776 }, { "epoch": 0.2971129027259405, "grad_norm": 1.5987175703048706, "learning_rate": 2.4728171375806867e-05, "loss": 0.1, "step": 16777 }, { "epoch": 0.29713061226296894, "grad_norm": 0.4288784861564636, "learning_rate": 2.472751647047265e-05, "loss": 0.0865, "step": 16778 }, { "epoch": 0.29714832179999734, "grad_norm": 0.8446973562240601, "learning_rate": 2.4726861533135895e-05, "loss": 0.0973, "step": 16779 }, { "epoch": 0.2971660313370258, "grad_norm": 0.8707771301269531, "learning_rate": 2.4726206563798743e-05, "loss": 0.0972, "step": 16780 }, { "epoch": 0.2971837408740542, "grad_norm": 1.4262808561325073, "learning_rate": 2.4725551562463357e-05, "loss": 0.111, "step": 16781 }, { "epoch": 0.29720145041108265, "grad_norm": 0.6169357299804688, "learning_rate": 2.472489652913189e-05, "loss": 0.0667, "step": 16782 }, { "epoch": 0.29721915994811104, "grad_norm": 0.8543171286582947, "learning_rate": 2.47242414638065e-05, "loss": 0.0733, "step": 16783 }, { "epoch": 0.2972368694851395, "grad_norm": 0.6866121888160706, "learning_rate": 2.4723586366489333e-05, "loss": 0.1172, "step": 16784 }, { "epoch": 0.2972545790221679, "grad_norm": 0.5968263745307922, "learning_rate": 2.4722931237182556e-05, "loss": 0.0899, "step": 16785 }, { "epoch": 0.29727228855919635, "grad_norm": 0.7288272976875305, "learning_rate": 2.4722276075888318e-05, "loss": 0.0619, "step": 16786 }, { "epoch": 0.29728999809622475, "grad_norm": 1.0262911319732666, "learning_rate": 2.472162088260877e-05, "loss": 0.1177, "step": 16787 }, { "epoch": 0.2973077076332532, "grad_norm": 1.0875414609909058, "learning_rate": 2.472096565734608e-05, "loss": 0.1096, "step": 16788 }, { "epoch": 0.2973254171702816, "grad_norm": 0.7512707114219666, "learning_rate": 2.472031040010239e-05, "loss": 0.0872, "step": 16789 }, { "epoch": 0.29734312670731006, "grad_norm": 0.7006012797355652, "learning_rate": 2.4719655110879862e-05, "loss": 0.0777, "step": 16790 }, { "epoch": 0.29736083624433846, "grad_norm": 1.2947934865951538, "learning_rate": 2.4718999789680654e-05, "loss": 0.0823, "step": 16791 }, { "epoch": 0.2973785457813669, "grad_norm": 1.6828149557113647, "learning_rate": 2.471834443650692e-05, "loss": 0.109, "step": 16792 }, { "epoch": 0.29739625531839536, "grad_norm": 0.8475762009620667, "learning_rate": 2.4717689051360817e-05, "loss": 0.1034, "step": 16793 }, { "epoch": 0.29741396485542376, "grad_norm": 0.9340103268623352, "learning_rate": 2.47170336342445e-05, "loss": 0.0848, "step": 16794 }, { "epoch": 0.2974316743924522, "grad_norm": 1.3890432119369507, "learning_rate": 2.4716378185160124e-05, "loss": 0.0954, "step": 16795 }, { "epoch": 0.2974493839294806, "grad_norm": 0.8328518271446228, "learning_rate": 2.4715722704109847e-05, "loss": 0.0819, "step": 16796 }, { "epoch": 0.29746709346650907, "grad_norm": 0.9714750051498413, "learning_rate": 2.471506719109582e-05, "loss": 0.1051, "step": 16797 }, { "epoch": 0.29748480300353747, "grad_norm": 0.7498896718025208, "learning_rate": 2.471441164612021e-05, "loss": 0.0837, "step": 16798 }, { "epoch": 0.2975025125405659, "grad_norm": 0.735075831413269, "learning_rate": 2.4713756069185167e-05, "loss": 0.0828, "step": 16799 }, { "epoch": 0.2975202220775943, "grad_norm": 0.8567587733268738, "learning_rate": 2.4713100460292847e-05, "loss": 0.1212, "step": 16800 }, { "epoch": 0.2975379316146228, "grad_norm": 0.9461444020271301, "learning_rate": 2.471244481944541e-05, "loss": 0.0697, "step": 16801 }, { "epoch": 0.2975556411516512, "grad_norm": 0.7316159605979919, "learning_rate": 2.4711789146645014e-05, "loss": 0.103, "step": 16802 }, { "epoch": 0.2975733506886796, "grad_norm": 0.8619487881660461, "learning_rate": 2.4711133441893814e-05, "loss": 0.11, "step": 16803 }, { "epoch": 0.297591060225708, "grad_norm": 1.0424232482910156, "learning_rate": 2.4710477705193967e-05, "loss": 0.0898, "step": 16804 }, { "epoch": 0.2976087697627365, "grad_norm": 1.0594885349273682, "learning_rate": 2.470982193654763e-05, "loss": 0.1274, "step": 16805 }, { "epoch": 0.2976264792997649, "grad_norm": 0.8905976414680481, "learning_rate": 2.4709166135956954e-05, "loss": 0.1054, "step": 16806 }, { "epoch": 0.29764418883679333, "grad_norm": 1.5365036725997925, "learning_rate": 2.4708510303424115e-05, "loss": 0.1176, "step": 16807 }, { "epoch": 0.2976618983738218, "grad_norm": 0.7501898407936096, "learning_rate": 2.470785443895125e-05, "loss": 0.1026, "step": 16808 }, { "epoch": 0.2976796079108502, "grad_norm": 0.6792499423027039, "learning_rate": 2.470719854254053e-05, "loss": 0.0761, "step": 16809 }, { "epoch": 0.29769731744787864, "grad_norm": 0.4039510488510132, "learning_rate": 2.4706542614194106e-05, "loss": 0.099, "step": 16810 }, { "epoch": 0.29771502698490704, "grad_norm": 0.8203958868980408, "learning_rate": 2.4705886653914147e-05, "loss": 0.1102, "step": 16811 }, { "epoch": 0.2977327365219355, "grad_norm": 0.7747541069984436, "learning_rate": 2.4705230661702796e-05, "loss": 0.1166, "step": 16812 }, { "epoch": 0.2977504460589639, "grad_norm": 0.6652634739875793, "learning_rate": 2.4704574637562217e-05, "loss": 0.074, "step": 16813 }, { "epoch": 0.29776815559599235, "grad_norm": 0.9418542385101318, "learning_rate": 2.470391858149457e-05, "loss": 0.1032, "step": 16814 }, { "epoch": 0.29778586513302074, "grad_norm": 1.1242363452911377, "learning_rate": 2.4703262493502012e-05, "loss": 0.0914, "step": 16815 }, { "epoch": 0.2978035746700492, "grad_norm": 0.7424394488334656, "learning_rate": 2.4702606373586703e-05, "loss": 0.0701, "step": 16816 }, { "epoch": 0.2978212842070776, "grad_norm": 0.685462474822998, "learning_rate": 2.4701950221750798e-05, "loss": 0.082, "step": 16817 }, { "epoch": 0.29783899374410605, "grad_norm": 0.9865345358848572, "learning_rate": 2.4701294037996462e-05, "loss": 0.11, "step": 16818 }, { "epoch": 0.29785670328113445, "grad_norm": 0.9578484296798706, "learning_rate": 2.4700637822325846e-05, "loss": 0.0996, "step": 16819 }, { "epoch": 0.2978744128181629, "grad_norm": 0.4513239562511444, "learning_rate": 2.4699981574741113e-05, "loss": 0.068, "step": 16820 }, { "epoch": 0.2978921223551913, "grad_norm": 0.8194376230239868, "learning_rate": 2.4699325295244427e-05, "loss": 0.0925, "step": 16821 }, { "epoch": 0.29790983189221976, "grad_norm": 0.655481219291687, "learning_rate": 2.4698668983837933e-05, "loss": 0.0858, "step": 16822 }, { "epoch": 0.2979275414292482, "grad_norm": 0.41839730739593506, "learning_rate": 2.4698012640523807e-05, "loss": 0.1126, "step": 16823 }, { "epoch": 0.2979452509662766, "grad_norm": 0.8877285122871399, "learning_rate": 2.4697356265304195e-05, "loss": 0.0976, "step": 16824 }, { "epoch": 0.29796296050330506, "grad_norm": 0.6835724115371704, "learning_rate": 2.4696699858181267e-05, "loss": 0.0871, "step": 16825 }, { "epoch": 0.29798067004033346, "grad_norm": 0.6587901711463928, "learning_rate": 2.469604341915717e-05, "loss": 0.1116, "step": 16826 }, { "epoch": 0.2979983795773619, "grad_norm": 1.0587475299835205, "learning_rate": 2.4695386948234078e-05, "loss": 0.1431, "step": 16827 }, { "epoch": 0.2980160891143903, "grad_norm": 1.0068445205688477, "learning_rate": 2.469473044541414e-05, "loss": 0.1015, "step": 16828 }, { "epoch": 0.29803379865141877, "grad_norm": 0.6599639654159546, "learning_rate": 2.469407391069952e-05, "loss": 0.0943, "step": 16829 }, { "epoch": 0.29805150818844717, "grad_norm": 0.8041248917579651, "learning_rate": 2.4693417344092376e-05, "loss": 0.0981, "step": 16830 }, { "epoch": 0.2980692177254756, "grad_norm": 0.6429097652435303, "learning_rate": 2.4692760745594872e-05, "loss": 0.0766, "step": 16831 }, { "epoch": 0.298086927262504, "grad_norm": 0.6835101842880249, "learning_rate": 2.4692104115209167e-05, "loss": 0.0884, "step": 16832 }, { "epoch": 0.2981046367995325, "grad_norm": 0.9655414819717407, "learning_rate": 2.4691447452937415e-05, "loss": 0.1046, "step": 16833 }, { "epoch": 0.2981223463365609, "grad_norm": 0.561634361743927, "learning_rate": 2.469079075878178e-05, "loss": 0.0914, "step": 16834 }, { "epoch": 0.2981400558735893, "grad_norm": 0.8029463887214661, "learning_rate": 2.469013403274443e-05, "loss": 0.1097, "step": 16835 }, { "epoch": 0.2981577654106177, "grad_norm": 0.7950621247291565, "learning_rate": 2.4689477274827512e-05, "loss": 0.0863, "step": 16836 }, { "epoch": 0.2981754749476462, "grad_norm": 1.0450588464736938, "learning_rate": 2.46888204850332e-05, "loss": 0.0839, "step": 16837 }, { "epoch": 0.29819318448467463, "grad_norm": 0.42217057943344116, "learning_rate": 2.468816366336365e-05, "loss": 0.1025, "step": 16838 }, { "epoch": 0.29821089402170303, "grad_norm": 0.5833779573440552, "learning_rate": 2.4687506809821015e-05, "loss": 0.1071, "step": 16839 }, { "epoch": 0.2982286035587315, "grad_norm": 0.6763017177581787, "learning_rate": 2.468684992440746e-05, "loss": 0.1008, "step": 16840 }, { "epoch": 0.2982463130957599, "grad_norm": 0.761142373085022, "learning_rate": 2.4686193007125155e-05, "loss": 0.089, "step": 16841 }, { "epoch": 0.29826402263278834, "grad_norm": 0.8552858829498291, "learning_rate": 2.4685536057976255e-05, "loss": 0.0741, "step": 16842 }, { "epoch": 0.29828173216981674, "grad_norm": 0.5379459857940674, "learning_rate": 2.468487907696292e-05, "loss": 0.0889, "step": 16843 }, { "epoch": 0.2982994417068452, "grad_norm": 1.156330943107605, "learning_rate": 2.4684222064087312e-05, "loss": 0.1117, "step": 16844 }, { "epoch": 0.2983171512438736, "grad_norm": 0.9512873291969299, "learning_rate": 2.4683565019351592e-05, "loss": 0.0798, "step": 16845 }, { "epoch": 0.29833486078090204, "grad_norm": 0.9120621681213379, "learning_rate": 2.4682907942757922e-05, "loss": 0.1012, "step": 16846 }, { "epoch": 0.29835257031793044, "grad_norm": 0.8707973957061768, "learning_rate": 2.4682250834308465e-05, "loss": 0.0787, "step": 16847 }, { "epoch": 0.2983702798549589, "grad_norm": 0.603818953037262, "learning_rate": 2.468159369400538e-05, "loss": 0.0903, "step": 16848 }, { "epoch": 0.2983879893919873, "grad_norm": 0.7732045650482178, "learning_rate": 2.4680936521850837e-05, "loss": 0.1444, "step": 16849 }, { "epoch": 0.29840569892901575, "grad_norm": 0.7776392698287964, "learning_rate": 2.4680279317846986e-05, "loss": 0.1063, "step": 16850 }, { "epoch": 0.29842340846604415, "grad_norm": 1.2288343906402588, "learning_rate": 2.4679622081995998e-05, "loss": 0.1124, "step": 16851 }, { "epoch": 0.2984411180030726, "grad_norm": 0.7434032559394836, "learning_rate": 2.4678964814300033e-05, "loss": 0.0926, "step": 16852 }, { "epoch": 0.29845882754010106, "grad_norm": 0.663806140422821, "learning_rate": 2.4678307514761248e-05, "loss": 0.1263, "step": 16853 }, { "epoch": 0.29847653707712946, "grad_norm": 0.5938596129417419, "learning_rate": 2.4677650183381816e-05, "loss": 0.1096, "step": 16854 }, { "epoch": 0.2984942466141579, "grad_norm": 1.3922497034072876, "learning_rate": 2.4676992820163892e-05, "loss": 0.101, "step": 16855 }, { "epoch": 0.2985119561511863, "grad_norm": 1.2985682487487793, "learning_rate": 2.467633542510964e-05, "loss": 0.0777, "step": 16856 }, { "epoch": 0.29852966568821476, "grad_norm": 1.034728765487671, "learning_rate": 2.4675677998221222e-05, "loss": 0.1072, "step": 16857 }, { "epoch": 0.29854737522524316, "grad_norm": 0.507575511932373, "learning_rate": 2.46750205395008e-05, "loss": 0.0872, "step": 16858 }, { "epoch": 0.2985650847622716, "grad_norm": 0.7895803451538086, "learning_rate": 2.4674363048950543e-05, "loss": 0.0953, "step": 16859 }, { "epoch": 0.2985827942993, "grad_norm": 0.6552280783653259, "learning_rate": 2.4673705526572612e-05, "loss": 0.0997, "step": 16860 }, { "epoch": 0.29860050383632847, "grad_norm": 0.855956494808197, "learning_rate": 2.4673047972369166e-05, "loss": 0.1242, "step": 16861 }, { "epoch": 0.29861821337335687, "grad_norm": 1.0584313869476318, "learning_rate": 2.4672390386342366e-05, "loss": 0.1118, "step": 16862 }, { "epoch": 0.2986359229103853, "grad_norm": 0.8581506013870239, "learning_rate": 2.4671732768494383e-05, "loss": 0.1264, "step": 16863 }, { "epoch": 0.2986536324474137, "grad_norm": 0.6072593927383423, "learning_rate": 2.467107511882738e-05, "loss": 0.0734, "step": 16864 }, { "epoch": 0.2986713419844422, "grad_norm": 0.811546266078949, "learning_rate": 2.4670417437343517e-05, "loss": 0.0857, "step": 16865 }, { "epoch": 0.2986890515214706, "grad_norm": 0.6819331645965576, "learning_rate": 2.466975972404496e-05, "loss": 0.0627, "step": 16866 }, { "epoch": 0.298706761058499, "grad_norm": 0.6105886697769165, "learning_rate": 2.466910197893387e-05, "loss": 0.0878, "step": 16867 }, { "epoch": 0.2987244705955275, "grad_norm": 0.8022233247756958, "learning_rate": 2.4668444202012407e-05, "loss": 0.0862, "step": 16868 }, { "epoch": 0.2987421801325559, "grad_norm": 0.6530048251152039, "learning_rate": 2.4667786393282747e-05, "loss": 0.0998, "step": 16869 }, { "epoch": 0.29875988966958433, "grad_norm": 1.052525281906128, "learning_rate": 2.466712855274705e-05, "loss": 0.0944, "step": 16870 }, { "epoch": 0.29877759920661273, "grad_norm": 1.4409681558609009, "learning_rate": 2.466647068040747e-05, "loss": 0.0634, "step": 16871 }, { "epoch": 0.2987953087436412, "grad_norm": 0.7639958262443542, "learning_rate": 2.4665812776266187e-05, "loss": 0.1092, "step": 16872 }, { "epoch": 0.2988130182806696, "grad_norm": 0.7505613565444946, "learning_rate": 2.466515484032535e-05, "loss": 0.0776, "step": 16873 }, { "epoch": 0.29883072781769804, "grad_norm": 0.9039008021354675, "learning_rate": 2.4664496872587138e-05, "loss": 0.1091, "step": 16874 }, { "epoch": 0.29884843735472644, "grad_norm": 0.6037652492523193, "learning_rate": 2.4663838873053704e-05, "loss": 0.079, "step": 16875 }, { "epoch": 0.2988661468917549, "grad_norm": 0.8032640218734741, "learning_rate": 2.466318084172722e-05, "loss": 0.1147, "step": 16876 }, { "epoch": 0.2988838564287833, "grad_norm": 0.7982723712921143, "learning_rate": 2.4662522778609843e-05, "loss": 0.0885, "step": 16877 }, { "epoch": 0.29890156596581174, "grad_norm": 0.5338373780250549, "learning_rate": 2.4661864683703755e-05, "loss": 0.092, "step": 16878 }, { "epoch": 0.29891927550284014, "grad_norm": 0.7303940653800964, "learning_rate": 2.46612065570111e-05, "loss": 0.1, "step": 16879 }, { "epoch": 0.2989369850398686, "grad_norm": 0.6499305963516235, "learning_rate": 2.4660548398534053e-05, "loss": 0.0712, "step": 16880 }, { "epoch": 0.29895469457689705, "grad_norm": 1.2932978868484497, "learning_rate": 2.465989020827478e-05, "loss": 0.1062, "step": 16881 }, { "epoch": 0.29897240411392545, "grad_norm": 1.1645116806030273, "learning_rate": 2.4659231986235447e-05, "loss": 0.1039, "step": 16882 }, { "epoch": 0.2989901136509539, "grad_norm": 1.135936975479126, "learning_rate": 2.4658573732418216e-05, "loss": 0.0779, "step": 16883 }, { "epoch": 0.2990078231879823, "grad_norm": 0.7818959355354309, "learning_rate": 2.4657915446825256e-05, "loss": 0.0887, "step": 16884 }, { "epoch": 0.29902553272501076, "grad_norm": 1.3826804161071777, "learning_rate": 2.465725712945873e-05, "loss": 0.1394, "step": 16885 }, { "epoch": 0.29904324226203916, "grad_norm": 0.991935133934021, "learning_rate": 2.4656598780320807e-05, "loss": 0.1214, "step": 16886 }, { "epoch": 0.2990609517990676, "grad_norm": 0.6813939213752747, "learning_rate": 2.465594039941365e-05, "loss": 0.0745, "step": 16887 }, { "epoch": 0.299078661336096, "grad_norm": 0.6315475106239319, "learning_rate": 2.4655281986739423e-05, "loss": 0.09, "step": 16888 }, { "epoch": 0.29909637087312446, "grad_norm": 0.6436707973480225, "learning_rate": 2.4654623542300297e-05, "loss": 0.0939, "step": 16889 }, { "epoch": 0.29911408041015286, "grad_norm": 0.6218623518943787, "learning_rate": 2.4653965066098435e-05, "loss": 0.062, "step": 16890 }, { "epoch": 0.2991317899471813, "grad_norm": 0.828574538230896, "learning_rate": 2.4653306558136002e-05, "loss": 0.0878, "step": 16891 }, { "epoch": 0.2991494994842097, "grad_norm": 0.9656599164009094, "learning_rate": 2.465264801841517e-05, "loss": 0.0957, "step": 16892 }, { "epoch": 0.29916720902123817, "grad_norm": 0.7254977226257324, "learning_rate": 2.4651989446938104e-05, "loss": 0.1191, "step": 16893 }, { "epoch": 0.29918491855826657, "grad_norm": 0.5660725831985474, "learning_rate": 2.465133084370697e-05, "loss": 0.088, "step": 16894 }, { "epoch": 0.299202628095295, "grad_norm": 0.7281544208526611, "learning_rate": 2.465067220872393e-05, "loss": 0.1125, "step": 16895 }, { "epoch": 0.2992203376323235, "grad_norm": 1.0716489553451538, "learning_rate": 2.4650013541991153e-05, "loss": 0.1084, "step": 16896 }, { "epoch": 0.2992380471693519, "grad_norm": 0.6861010789871216, "learning_rate": 2.4649354843510804e-05, "loss": 0.0963, "step": 16897 }, { "epoch": 0.2992557567063803, "grad_norm": 0.7185343503952026, "learning_rate": 2.4648696113285062e-05, "loss": 0.0889, "step": 16898 }, { "epoch": 0.2992734662434087, "grad_norm": 0.7918407320976257, "learning_rate": 2.4648037351316083e-05, "loss": 0.13, "step": 16899 }, { "epoch": 0.2992911757804372, "grad_norm": 0.6007146835327148, "learning_rate": 2.4647378557606035e-05, "loss": 0.0825, "step": 16900 }, { "epoch": 0.2993088853174656, "grad_norm": 1.1939823627471924, "learning_rate": 2.464671973215709e-05, "loss": 0.0644, "step": 16901 }, { "epoch": 0.29932659485449403, "grad_norm": 0.6560108661651611, "learning_rate": 2.4646060874971416e-05, "loss": 0.0856, "step": 16902 }, { "epoch": 0.29934430439152243, "grad_norm": 1.0707520246505737, "learning_rate": 2.464540198605117e-05, "loss": 0.0987, "step": 16903 }, { "epoch": 0.2993620139285509, "grad_norm": 1.1036231517791748, "learning_rate": 2.464474306539853e-05, "loss": 0.1075, "step": 16904 }, { "epoch": 0.2993797234655793, "grad_norm": 0.865606963634491, "learning_rate": 2.4644084113015662e-05, "loss": 0.0986, "step": 16905 }, { "epoch": 0.29939743300260774, "grad_norm": 1.0470566749572754, "learning_rate": 2.464342512890473e-05, "loss": 0.0981, "step": 16906 }, { "epoch": 0.29941514253963614, "grad_norm": 0.5545916557312012, "learning_rate": 2.464276611306791e-05, "loss": 0.0509, "step": 16907 }, { "epoch": 0.2994328520766646, "grad_norm": 0.7950049042701721, "learning_rate": 2.4642107065507358e-05, "loss": 0.0958, "step": 16908 }, { "epoch": 0.299450561613693, "grad_norm": 1.1985266208648682, "learning_rate": 2.4641447986225254e-05, "loss": 0.1276, "step": 16909 }, { "epoch": 0.29946827115072144, "grad_norm": 0.6783439517021179, "learning_rate": 2.4640788875223756e-05, "loss": 0.0826, "step": 16910 }, { "epoch": 0.2994859806877499, "grad_norm": 0.6645179986953735, "learning_rate": 2.4640129732505044e-05, "loss": 0.0873, "step": 16911 }, { "epoch": 0.2995036902247783, "grad_norm": 1.1142635345458984, "learning_rate": 2.463947055807128e-05, "loss": 0.147, "step": 16912 }, { "epoch": 0.29952139976180675, "grad_norm": 0.7065452337265015, "learning_rate": 2.4638811351924624e-05, "loss": 0.0752, "step": 16913 }, { "epoch": 0.29953910929883515, "grad_norm": 1.2553417682647705, "learning_rate": 2.4638152114067263e-05, "loss": 0.0987, "step": 16914 }, { "epoch": 0.2995568188358636, "grad_norm": 0.746135413646698, "learning_rate": 2.4637492844501356e-05, "loss": 0.0977, "step": 16915 }, { "epoch": 0.299574528372892, "grad_norm": 0.4645518362522125, "learning_rate": 2.4636833543229064e-05, "loss": 0.0731, "step": 16916 }, { "epoch": 0.29959223790992046, "grad_norm": 0.7482659816741943, "learning_rate": 2.4636174210252573e-05, "loss": 0.1067, "step": 16917 }, { "epoch": 0.29960994744694885, "grad_norm": 1.0035558938980103, "learning_rate": 2.4635514845574038e-05, "loss": 0.1136, "step": 16918 }, { "epoch": 0.2996276569839773, "grad_norm": 0.9393836259841919, "learning_rate": 2.463485544919564e-05, "loss": 0.0922, "step": 16919 }, { "epoch": 0.2996453665210057, "grad_norm": 0.5751802921295166, "learning_rate": 2.4634196021119534e-05, "loss": 0.0556, "step": 16920 }, { "epoch": 0.29966307605803416, "grad_norm": 0.8567085266113281, "learning_rate": 2.4633536561347903e-05, "loss": 0.0726, "step": 16921 }, { "epoch": 0.29968078559506256, "grad_norm": 0.733565628528595, "learning_rate": 2.4632877069882915e-05, "loss": 0.1041, "step": 16922 }, { "epoch": 0.299698495132091, "grad_norm": 0.946732759475708, "learning_rate": 2.4632217546726724e-05, "loss": 0.1071, "step": 16923 }, { "epoch": 0.2997162046691194, "grad_norm": 0.9863656759262085, "learning_rate": 2.463155799188152e-05, "loss": 0.0713, "step": 16924 }, { "epoch": 0.29973391420614787, "grad_norm": 0.8894283771514893, "learning_rate": 2.463089840534946e-05, "loss": 0.0643, "step": 16925 }, { "epoch": 0.2997516237431763, "grad_norm": 0.7464848756790161, "learning_rate": 2.4630238787132726e-05, "loss": 0.0821, "step": 16926 }, { "epoch": 0.2997693332802047, "grad_norm": 0.6352330446243286, "learning_rate": 2.462957913723347e-05, "loss": 0.0696, "step": 16927 }, { "epoch": 0.2997870428172332, "grad_norm": 1.13495934009552, "learning_rate": 2.462891945565388e-05, "loss": 0.1711, "step": 16928 }, { "epoch": 0.29980475235426157, "grad_norm": 0.4934978783130646, "learning_rate": 2.462825974239612e-05, "loss": 0.0961, "step": 16929 }, { "epoch": 0.29982246189129, "grad_norm": 0.7916791439056396, "learning_rate": 2.4627599997462355e-05, "loss": 0.099, "step": 16930 }, { "epoch": 0.2998401714283184, "grad_norm": 0.6485906839370728, "learning_rate": 2.462694022085476e-05, "loss": 0.0917, "step": 16931 }, { "epoch": 0.2998578809653469, "grad_norm": 0.913557231426239, "learning_rate": 2.4626280412575505e-05, "loss": 0.0617, "step": 16932 }, { "epoch": 0.2998755905023753, "grad_norm": 0.9296627044677734, "learning_rate": 2.4625620572626766e-05, "loss": 0.0957, "step": 16933 }, { "epoch": 0.29989330003940373, "grad_norm": 0.6090419292449951, "learning_rate": 2.4624960701010704e-05, "loss": 0.0742, "step": 16934 }, { "epoch": 0.29991100957643213, "grad_norm": 0.9946900010108948, "learning_rate": 2.4624300797729495e-05, "loss": 0.1042, "step": 16935 }, { "epoch": 0.2999287191134606, "grad_norm": 0.6385928392410278, "learning_rate": 2.462364086278531e-05, "loss": 0.0942, "step": 16936 }, { "epoch": 0.299946428650489, "grad_norm": 1.1589093208312988, "learning_rate": 2.462298089618032e-05, "loss": 0.1042, "step": 16937 }, { "epoch": 0.29996413818751744, "grad_norm": 0.6075077652931213, "learning_rate": 2.4622320897916698e-05, "loss": 0.073, "step": 16938 }, { "epoch": 0.29998184772454584, "grad_norm": 0.5745444893836975, "learning_rate": 2.4621660867996615e-05, "loss": 0.0655, "step": 16939 }, { "epoch": 0.2999995572615743, "grad_norm": 1.3052048683166504, "learning_rate": 2.4621000806422237e-05, "loss": 0.1172, "step": 16940 }, { "epoch": 0.30001726679860274, "grad_norm": 0.6337313055992126, "learning_rate": 2.4620340713195736e-05, "loss": 0.0876, "step": 16941 }, { "epoch": 0.30003497633563114, "grad_norm": 0.9246070384979248, "learning_rate": 2.4619680588319296e-05, "loss": 0.0691, "step": 16942 }, { "epoch": 0.3000526858726596, "grad_norm": 0.6279768347740173, "learning_rate": 2.4619020431795076e-05, "loss": 0.0965, "step": 16943 }, { "epoch": 0.300070395409688, "grad_norm": 0.8471103310585022, "learning_rate": 2.461836024362525e-05, "loss": 0.1022, "step": 16944 }, { "epoch": 0.30008810494671645, "grad_norm": 0.7236282229423523, "learning_rate": 2.461770002381199e-05, "loss": 0.08, "step": 16945 }, { "epoch": 0.30010581448374485, "grad_norm": 0.9241182208061218, "learning_rate": 2.4617039772357474e-05, "loss": 0.0907, "step": 16946 }, { "epoch": 0.3001235240207733, "grad_norm": 0.6924760341644287, "learning_rate": 2.4616379489263875e-05, "loss": 0.0766, "step": 16947 }, { "epoch": 0.3001412335578017, "grad_norm": 0.6224448084831238, "learning_rate": 2.461571917453335e-05, "loss": 0.0756, "step": 16948 }, { "epoch": 0.30015894309483016, "grad_norm": 0.8196116089820862, "learning_rate": 2.4615058828168083e-05, "loss": 0.0624, "step": 16949 }, { "epoch": 0.30017665263185855, "grad_norm": 0.2954263687133789, "learning_rate": 2.4614398450170246e-05, "loss": 0.0906, "step": 16950 }, { "epoch": 0.300194362168887, "grad_norm": 0.9832664728164673, "learning_rate": 2.4613738040542014e-05, "loss": 0.0794, "step": 16951 }, { "epoch": 0.3002120717059154, "grad_norm": 0.534162700176239, "learning_rate": 2.461307759928555e-05, "loss": 0.1006, "step": 16952 }, { "epoch": 0.30022978124294386, "grad_norm": 0.9527263641357422, "learning_rate": 2.461241712640304e-05, "loss": 0.1192, "step": 16953 }, { "epoch": 0.30024749077997226, "grad_norm": 1.0351290702819824, "learning_rate": 2.4611756621896652e-05, "loss": 0.0551, "step": 16954 }, { "epoch": 0.3002652003170007, "grad_norm": 0.7895764708518982, "learning_rate": 2.461109608576855e-05, "loss": 0.1088, "step": 16955 }, { "epoch": 0.30028290985402917, "grad_norm": 0.7598890066146851, "learning_rate": 2.4610435518020915e-05, "loss": 0.0824, "step": 16956 }, { "epoch": 0.30030061939105757, "grad_norm": 0.7675661444664001, "learning_rate": 2.4609774918655917e-05, "loss": 0.0641, "step": 16957 }, { "epoch": 0.300318328928086, "grad_norm": 0.8691818118095398, "learning_rate": 2.4609114287675736e-05, "loss": 0.1197, "step": 16958 }, { "epoch": 0.3003360384651144, "grad_norm": 0.5749871134757996, "learning_rate": 2.4608453625082537e-05, "loss": 0.079, "step": 16959 }, { "epoch": 0.3003537480021429, "grad_norm": 0.6491918563842773, "learning_rate": 2.46077929308785e-05, "loss": 0.0918, "step": 16960 }, { "epoch": 0.30037145753917127, "grad_norm": 0.7970472574234009, "learning_rate": 2.4607132205065797e-05, "loss": 0.1437, "step": 16961 }, { "epoch": 0.3003891670761997, "grad_norm": 0.8168515563011169, "learning_rate": 2.4606471447646595e-05, "loss": 0.1183, "step": 16962 }, { "epoch": 0.3004068766132281, "grad_norm": 0.9676638841629028, "learning_rate": 2.460581065862308e-05, "loss": 0.1038, "step": 16963 }, { "epoch": 0.3004245861502566, "grad_norm": 0.9956635236740112, "learning_rate": 2.460514983799742e-05, "loss": 0.1103, "step": 16964 }, { "epoch": 0.300442295687285, "grad_norm": 0.5586684346199036, "learning_rate": 2.4604488985771782e-05, "loss": 0.0769, "step": 16965 }, { "epoch": 0.30046000522431343, "grad_norm": 0.7836746573448181, "learning_rate": 2.4603828101948353e-05, "loss": 0.0824, "step": 16966 }, { "epoch": 0.30047771476134183, "grad_norm": 0.6785818338394165, "learning_rate": 2.46031671865293e-05, "loss": 0.0939, "step": 16967 }, { "epoch": 0.3004954242983703, "grad_norm": 0.7922602295875549, "learning_rate": 2.4602506239516794e-05, "loss": 0.0899, "step": 16968 }, { "epoch": 0.3005131338353987, "grad_norm": 1.1148343086242676, "learning_rate": 2.4601845260913018e-05, "loss": 0.0858, "step": 16969 }, { "epoch": 0.30053084337242714, "grad_norm": 0.9043160080909729, "learning_rate": 2.4601184250720143e-05, "loss": 0.1383, "step": 16970 }, { "epoch": 0.3005485529094556, "grad_norm": 0.49477797746658325, "learning_rate": 2.460052320894034e-05, "loss": 0.0849, "step": 16971 }, { "epoch": 0.300566262446484, "grad_norm": 0.7715766429901123, "learning_rate": 2.4599862135575786e-05, "loss": 0.0745, "step": 16972 }, { "epoch": 0.30058397198351244, "grad_norm": 0.9862716197967529, "learning_rate": 2.459920103062866e-05, "loss": 0.0949, "step": 16973 }, { "epoch": 0.30060168152054084, "grad_norm": 0.5134018659591675, "learning_rate": 2.459853989410113e-05, "loss": 0.0766, "step": 16974 }, { "epoch": 0.3006193910575693, "grad_norm": 0.7127866744995117, "learning_rate": 2.4597878725995375e-05, "loss": 0.0876, "step": 16975 }, { "epoch": 0.3006371005945977, "grad_norm": 0.5680515766143799, "learning_rate": 2.459721752631357e-05, "loss": 0.0807, "step": 16976 }, { "epoch": 0.30065481013162615, "grad_norm": 0.6226948499679565, "learning_rate": 2.459655629505789e-05, "loss": 0.1042, "step": 16977 }, { "epoch": 0.30067251966865455, "grad_norm": 0.5754528641700745, "learning_rate": 2.4595895032230514e-05, "loss": 0.0849, "step": 16978 }, { "epoch": 0.300690229205683, "grad_norm": 0.8248561024665833, "learning_rate": 2.459523373783361e-05, "loss": 0.0717, "step": 16979 }, { "epoch": 0.3007079387427114, "grad_norm": 0.7844142317771912, "learning_rate": 2.459457241186936e-05, "loss": 0.0977, "step": 16980 }, { "epoch": 0.30072564827973985, "grad_norm": 0.8378962278366089, "learning_rate": 2.4593911054339937e-05, "loss": 0.1125, "step": 16981 }, { "epoch": 0.30074335781676825, "grad_norm": 0.9647140502929688, "learning_rate": 2.459324966524751e-05, "loss": 0.1108, "step": 16982 }, { "epoch": 0.3007610673537967, "grad_norm": 0.9701897501945496, "learning_rate": 2.459258824459427e-05, "loss": 0.0966, "step": 16983 }, { "epoch": 0.3007787768908251, "grad_norm": 0.6615763902664185, "learning_rate": 2.4591926792382383e-05, "loss": 0.0774, "step": 16984 }, { "epoch": 0.30079648642785356, "grad_norm": 0.9367160201072693, "learning_rate": 2.459126530861403e-05, "loss": 0.1225, "step": 16985 }, { "epoch": 0.300814195964882, "grad_norm": 0.5264294743537903, "learning_rate": 2.4590603793291383e-05, "loss": 0.0553, "step": 16986 }, { "epoch": 0.3008319055019104, "grad_norm": 0.6747719645500183, "learning_rate": 2.4589942246416614e-05, "loss": 0.0642, "step": 16987 }, { "epoch": 0.30084961503893887, "grad_norm": 0.7365988492965698, "learning_rate": 2.458928066799191e-05, "loss": 0.0836, "step": 16988 }, { "epoch": 0.30086732457596727, "grad_norm": 0.39555978775024414, "learning_rate": 2.458861905801944e-05, "loss": 0.1185, "step": 16989 }, { "epoch": 0.3008850341129957, "grad_norm": 0.7302737236022949, "learning_rate": 2.4587957416501382e-05, "loss": 0.0964, "step": 16990 }, { "epoch": 0.3009027436500241, "grad_norm": 1.0517076253890991, "learning_rate": 2.4587295743439914e-05, "loss": 0.0972, "step": 16991 }, { "epoch": 0.3009204531870526, "grad_norm": 1.4028512239456177, "learning_rate": 2.4586634038837215e-05, "loss": 0.1109, "step": 16992 }, { "epoch": 0.30093816272408097, "grad_norm": 1.2770735025405884, "learning_rate": 2.4585972302695465e-05, "loss": 0.0757, "step": 16993 }, { "epoch": 0.3009558722611094, "grad_norm": 1.0541592836380005, "learning_rate": 2.4585310535016825e-05, "loss": 0.0746, "step": 16994 }, { "epoch": 0.3009735817981378, "grad_norm": 0.7112905979156494, "learning_rate": 2.458464873580349e-05, "loss": 0.1298, "step": 16995 }, { "epoch": 0.3009912913351663, "grad_norm": 0.7786608338356018, "learning_rate": 2.4583986905057627e-05, "loss": 0.0919, "step": 16996 }, { "epoch": 0.3010090008721947, "grad_norm": 0.8013628125190735, "learning_rate": 2.458332504278142e-05, "loss": 0.1303, "step": 16997 }, { "epoch": 0.30102671040922313, "grad_norm": 0.9359055757522583, "learning_rate": 2.4582663148977035e-05, "loss": 0.0604, "step": 16998 }, { "epoch": 0.30104441994625153, "grad_norm": 0.871691107749939, "learning_rate": 2.458200122364667e-05, "loss": 0.0877, "step": 16999 }, { "epoch": 0.30106212948328, "grad_norm": 0.5823713541030884, "learning_rate": 2.4581339266792476e-05, "loss": 0.069, "step": 17000 }, { "epoch": 0.30107983902030844, "grad_norm": 0.6668957471847534, "learning_rate": 2.4580677278416654e-05, "loss": 0.0887, "step": 17001 }, { "epoch": 0.30109754855733684, "grad_norm": 0.8893439769744873, "learning_rate": 2.458001525852137e-05, "loss": 0.1116, "step": 17002 }, { "epoch": 0.3011152580943653, "grad_norm": 1.0865942239761353, "learning_rate": 2.4579353207108807e-05, "loss": 0.1294, "step": 17003 }, { "epoch": 0.3011329676313937, "grad_norm": 0.6976051926612854, "learning_rate": 2.457869112418114e-05, "loss": 0.124, "step": 17004 }, { "epoch": 0.30115067716842214, "grad_norm": 1.0716943740844727, "learning_rate": 2.4578029009740544e-05, "loss": 0.149, "step": 17005 }, { "epoch": 0.30116838670545054, "grad_norm": 0.9453533887863159, "learning_rate": 2.4577366863789207e-05, "loss": 0.0836, "step": 17006 }, { "epoch": 0.301186096242479, "grad_norm": 0.5956749320030212, "learning_rate": 2.45767046863293e-05, "loss": 0.0795, "step": 17007 }, { "epoch": 0.3012038057795074, "grad_norm": 1.0733399391174316, "learning_rate": 2.4576042477363e-05, "loss": 0.1191, "step": 17008 }, { "epoch": 0.30122151531653585, "grad_norm": 0.6770191192626953, "learning_rate": 2.457538023689249e-05, "loss": 0.0893, "step": 17009 }, { "epoch": 0.30123922485356425, "grad_norm": 0.9068747162818909, "learning_rate": 2.457471796491995e-05, "loss": 0.0984, "step": 17010 }, { "epoch": 0.3012569343905927, "grad_norm": 0.901780366897583, "learning_rate": 2.4574055661447557e-05, "loss": 0.0799, "step": 17011 }, { "epoch": 0.3012746439276211, "grad_norm": 0.6605696678161621, "learning_rate": 2.4573393326477488e-05, "loss": 0.0888, "step": 17012 }, { "epoch": 0.30129235346464955, "grad_norm": 0.6623247265815735, "learning_rate": 2.457273096001192e-05, "loss": 0.0755, "step": 17013 }, { "epoch": 0.30131006300167795, "grad_norm": 0.7281983494758606, "learning_rate": 2.4572068562053036e-05, "loss": 0.1114, "step": 17014 }, { "epoch": 0.3013277725387064, "grad_norm": 0.6616307497024536, "learning_rate": 2.457140613260302e-05, "loss": 0.0972, "step": 17015 }, { "epoch": 0.30134548207573486, "grad_norm": 0.43345871567726135, "learning_rate": 2.457074367166404e-05, "loss": 0.0965, "step": 17016 }, { "epoch": 0.30136319161276326, "grad_norm": 1.236430048942566, "learning_rate": 2.4570081179238286e-05, "loss": 0.0957, "step": 17017 }, { "epoch": 0.3013809011497917, "grad_norm": 0.4862203896045685, "learning_rate": 2.4569418655327932e-05, "loss": 0.0762, "step": 17018 }, { "epoch": 0.3013986106868201, "grad_norm": 0.7316309809684753, "learning_rate": 2.4568756099935153e-05, "loss": 0.049, "step": 17019 }, { "epoch": 0.30141632022384857, "grad_norm": 0.7929067015647888, "learning_rate": 2.456809351306214e-05, "loss": 0.0908, "step": 17020 }, { "epoch": 0.30143402976087696, "grad_norm": 1.0682580471038818, "learning_rate": 2.4567430894711066e-05, "loss": 0.0825, "step": 17021 }, { "epoch": 0.3014517392979054, "grad_norm": 1.781828761100769, "learning_rate": 2.4566768244884112e-05, "loss": 0.0969, "step": 17022 }, { "epoch": 0.3014694488349338, "grad_norm": 1.0330440998077393, "learning_rate": 2.4566105563583453e-05, "loss": 0.093, "step": 17023 }, { "epoch": 0.30148715837196227, "grad_norm": 1.0183578729629517, "learning_rate": 2.4565442850811282e-05, "loss": 0.0937, "step": 17024 }, { "epoch": 0.30150486790899067, "grad_norm": 0.8508448600769043, "learning_rate": 2.456478010656977e-05, "loss": 0.0933, "step": 17025 }, { "epoch": 0.3015225774460191, "grad_norm": 0.4751415550708771, "learning_rate": 2.4564117330861095e-05, "loss": 0.0658, "step": 17026 }, { "epoch": 0.3015402869830475, "grad_norm": 0.7215544581413269, "learning_rate": 2.4563454523687444e-05, "loss": 0.0535, "step": 17027 }, { "epoch": 0.301557996520076, "grad_norm": 0.6653730273246765, "learning_rate": 2.4562791685050995e-05, "loss": 0.1105, "step": 17028 }, { "epoch": 0.3015757060571044, "grad_norm": 0.9533371925354004, "learning_rate": 2.456212881495393e-05, "loss": 0.0678, "step": 17029 }, { "epoch": 0.30159341559413283, "grad_norm": 1.2038145065307617, "learning_rate": 2.4561465913398422e-05, "loss": 0.0969, "step": 17030 }, { "epoch": 0.3016111251311613, "grad_norm": 0.5531721711158752, "learning_rate": 2.4560802980386666e-05, "loss": 0.0897, "step": 17031 }, { "epoch": 0.3016288346681897, "grad_norm": 0.8005805015563965, "learning_rate": 2.456014001592083e-05, "loss": 0.1244, "step": 17032 }, { "epoch": 0.30164654420521814, "grad_norm": 0.7927770614624023, "learning_rate": 2.4559477020003105e-05, "loss": 0.105, "step": 17033 }, { "epoch": 0.30166425374224654, "grad_norm": 0.7698804140090942, "learning_rate": 2.4558813992635663e-05, "loss": 0.1617, "step": 17034 }, { "epoch": 0.301681963279275, "grad_norm": 0.8430996537208557, "learning_rate": 2.455815093382069e-05, "loss": 0.0857, "step": 17035 }, { "epoch": 0.3016996728163034, "grad_norm": 0.7681853175163269, "learning_rate": 2.4557487843560367e-05, "loss": 0.0742, "step": 17036 }, { "epoch": 0.30171738235333184, "grad_norm": 0.48602160811424255, "learning_rate": 2.455682472185688e-05, "loss": 0.0543, "step": 17037 }, { "epoch": 0.30173509189036024, "grad_norm": 0.6464071273803711, "learning_rate": 2.4556161568712402e-05, "loss": 0.0796, "step": 17038 }, { "epoch": 0.3017528014273887, "grad_norm": 1.1573578119277954, "learning_rate": 2.455549838412912e-05, "loss": 0.1061, "step": 17039 }, { "epoch": 0.3017705109644171, "grad_norm": 0.6729292273521423, "learning_rate": 2.4554835168109216e-05, "loss": 0.1267, "step": 17040 }, { "epoch": 0.30178822050144555, "grad_norm": 0.8752139210700989, "learning_rate": 2.4554171920654873e-05, "loss": 0.0841, "step": 17041 }, { "epoch": 0.30180593003847395, "grad_norm": 1.0745004415512085, "learning_rate": 2.4553508641768265e-05, "loss": 0.1304, "step": 17042 }, { "epoch": 0.3018236395755024, "grad_norm": 0.7846280336380005, "learning_rate": 2.4552845331451585e-05, "loss": 0.0895, "step": 17043 }, { "epoch": 0.3018413491125308, "grad_norm": 0.6925180554389954, "learning_rate": 2.4552181989707004e-05, "loss": 0.0602, "step": 17044 }, { "epoch": 0.30185905864955925, "grad_norm": 1.0252493619918823, "learning_rate": 2.4551518616536718e-05, "loss": 0.0697, "step": 17045 }, { "epoch": 0.3018767681865877, "grad_norm": 0.9155256152153015, "learning_rate": 2.4550855211942896e-05, "loss": 0.1038, "step": 17046 }, { "epoch": 0.3018944777236161, "grad_norm": 0.8617802858352661, "learning_rate": 2.4550191775927726e-05, "loss": 0.1005, "step": 17047 }, { "epoch": 0.30191218726064456, "grad_norm": 1.1435093879699707, "learning_rate": 2.4549528308493393e-05, "loss": 0.112, "step": 17048 }, { "epoch": 0.30192989679767296, "grad_norm": 0.65324467420578, "learning_rate": 2.454886480964208e-05, "loss": 0.1039, "step": 17049 }, { "epoch": 0.3019476063347014, "grad_norm": 0.6307874917984009, "learning_rate": 2.4548201279375968e-05, "loss": 0.0971, "step": 17050 }, { "epoch": 0.3019653158717298, "grad_norm": 0.6849192976951599, "learning_rate": 2.4547537717697236e-05, "loss": 0.0762, "step": 17051 }, { "epoch": 0.30198302540875827, "grad_norm": 0.7520576119422913, "learning_rate": 2.4546874124608073e-05, "loss": 0.0737, "step": 17052 }, { "epoch": 0.30200073494578666, "grad_norm": 0.6780928373336792, "learning_rate": 2.4546210500110657e-05, "loss": 0.071, "step": 17053 }, { "epoch": 0.3020184444828151, "grad_norm": 1.9381189346313477, "learning_rate": 2.4545546844207174e-05, "loss": 0.1106, "step": 17054 }, { "epoch": 0.3020361540198435, "grad_norm": 1.002485990524292, "learning_rate": 2.4544883156899805e-05, "loss": 0.1107, "step": 17055 }, { "epoch": 0.30205386355687197, "grad_norm": 0.8868548274040222, "learning_rate": 2.4544219438190743e-05, "loss": 0.0914, "step": 17056 }, { "epoch": 0.30207157309390037, "grad_norm": 0.7133104205131531, "learning_rate": 2.4543555688082163e-05, "loss": 0.1168, "step": 17057 }, { "epoch": 0.3020892826309288, "grad_norm": 1.2385259866714478, "learning_rate": 2.454289190657625e-05, "loss": 0.1155, "step": 17058 }, { "epoch": 0.3021069921679572, "grad_norm": 0.9595214128494263, "learning_rate": 2.4542228093675183e-05, "loss": 0.0978, "step": 17059 }, { "epoch": 0.3021247017049857, "grad_norm": 0.6038409471511841, "learning_rate": 2.4541564249381154e-05, "loss": 0.1202, "step": 17060 }, { "epoch": 0.30214241124201413, "grad_norm": 1.1865237951278687, "learning_rate": 2.454090037369634e-05, "loss": 0.1302, "step": 17061 }, { "epoch": 0.30216012077904253, "grad_norm": 0.7260255813598633, "learning_rate": 2.4540236466622933e-05, "loss": 0.096, "step": 17062 }, { "epoch": 0.302177830316071, "grad_norm": 0.8836692571640015, "learning_rate": 2.4539572528163113e-05, "loss": 0.0852, "step": 17063 }, { "epoch": 0.3021955398530994, "grad_norm": 1.0406018495559692, "learning_rate": 2.4538908558319062e-05, "loss": 0.1061, "step": 17064 }, { "epoch": 0.30221324939012784, "grad_norm": 0.8639060854911804, "learning_rate": 2.4538244557092972e-05, "loss": 0.0809, "step": 17065 }, { "epoch": 0.30223095892715623, "grad_norm": 0.7501336336135864, "learning_rate": 2.4537580524487018e-05, "loss": 0.0894, "step": 17066 }, { "epoch": 0.3022486684641847, "grad_norm": 0.8516223430633545, "learning_rate": 2.453691646050339e-05, "loss": 0.091, "step": 17067 }, { "epoch": 0.3022663780012131, "grad_norm": 0.8621813654899597, "learning_rate": 2.4536252365144268e-05, "loss": 0.0871, "step": 17068 }, { "epoch": 0.30228408753824154, "grad_norm": 0.9652262330055237, "learning_rate": 2.4535588238411843e-05, "loss": 0.092, "step": 17069 }, { "epoch": 0.30230179707526994, "grad_norm": 1.071012020111084, "learning_rate": 2.4534924080308295e-05, "loss": 0.0872, "step": 17070 }, { "epoch": 0.3023195066122984, "grad_norm": 0.7935333251953125, "learning_rate": 2.4534259890835814e-05, "loss": 0.1162, "step": 17071 }, { "epoch": 0.3023372161493268, "grad_norm": 0.8126487731933594, "learning_rate": 2.453359566999658e-05, "loss": 0.0786, "step": 17072 }, { "epoch": 0.30235492568635525, "grad_norm": 0.7135056257247925, "learning_rate": 2.4532931417792782e-05, "loss": 0.1129, "step": 17073 }, { "epoch": 0.30237263522338365, "grad_norm": 0.7542175054550171, "learning_rate": 2.4532267134226603e-05, "loss": 0.0743, "step": 17074 }, { "epoch": 0.3023903447604121, "grad_norm": 1.2198448181152344, "learning_rate": 2.453160281930023e-05, "loss": 0.1099, "step": 17075 }, { "epoch": 0.30240805429744055, "grad_norm": 0.8154441714286804, "learning_rate": 2.453093847301585e-05, "loss": 0.0831, "step": 17076 }, { "epoch": 0.30242576383446895, "grad_norm": 1.0137267112731934, "learning_rate": 2.453027409537564e-05, "loss": 0.1296, "step": 17077 }, { "epoch": 0.3024434733714974, "grad_norm": 0.9482935070991516, "learning_rate": 2.4529609686381796e-05, "loss": 0.1224, "step": 17078 }, { "epoch": 0.3024611829085258, "grad_norm": 0.7268438339233398, "learning_rate": 2.4528945246036503e-05, "loss": 0.0793, "step": 17079 }, { "epoch": 0.30247889244555426, "grad_norm": 0.985121488571167, "learning_rate": 2.452828077434194e-05, "loss": 0.1058, "step": 17080 }, { "epoch": 0.30249660198258266, "grad_norm": 0.8384871482849121, "learning_rate": 2.45276162713003e-05, "loss": 0.0904, "step": 17081 }, { "epoch": 0.3025143115196111, "grad_norm": 0.6757327318191528, "learning_rate": 2.452695173691376e-05, "loss": 0.0864, "step": 17082 }, { "epoch": 0.3025320210566395, "grad_norm": 1.1831859350204468, "learning_rate": 2.452628717118452e-05, "loss": 0.1036, "step": 17083 }, { "epoch": 0.30254973059366796, "grad_norm": 0.7230767011642456, "learning_rate": 2.4525622574114755e-05, "loss": 0.1372, "step": 17084 }, { "epoch": 0.30256744013069636, "grad_norm": 0.714989960193634, "learning_rate": 2.4524957945706657e-05, "loss": 0.0891, "step": 17085 }, { "epoch": 0.3025851496677248, "grad_norm": 0.6721889972686768, "learning_rate": 2.4524293285962408e-05, "loss": 0.0773, "step": 17086 }, { "epoch": 0.3026028592047532, "grad_norm": 0.8210732936859131, "learning_rate": 2.4523628594884195e-05, "loss": 0.0938, "step": 17087 }, { "epoch": 0.30262056874178167, "grad_norm": 0.578783392906189, "learning_rate": 2.4522963872474213e-05, "loss": 0.1173, "step": 17088 }, { "epoch": 0.30263827827881007, "grad_norm": 0.8716108798980713, "learning_rate": 2.4522299118734644e-05, "loss": 0.096, "step": 17089 }, { "epoch": 0.3026559878158385, "grad_norm": 0.7958414554595947, "learning_rate": 2.4521634333667666e-05, "loss": 0.1269, "step": 17090 }, { "epoch": 0.302673697352867, "grad_norm": 0.7501813769340515, "learning_rate": 2.4520969517275482e-05, "loss": 0.0577, "step": 17091 }, { "epoch": 0.3026914068898954, "grad_norm": 0.9856328964233398, "learning_rate": 2.4520304669560264e-05, "loss": 0.0974, "step": 17092 }, { "epoch": 0.30270911642692383, "grad_norm": 0.747674822807312, "learning_rate": 2.4519639790524216e-05, "loss": 0.0669, "step": 17093 }, { "epoch": 0.30272682596395223, "grad_norm": 0.5417386293411255, "learning_rate": 2.451897488016951e-05, "loss": 0.0831, "step": 17094 }, { "epoch": 0.3027445355009807, "grad_norm": 0.6132680773735046, "learning_rate": 2.4518309938498336e-05, "loss": 0.0717, "step": 17095 }, { "epoch": 0.3027622450380091, "grad_norm": 1.05588698387146, "learning_rate": 2.4517644965512895e-05, "loss": 0.1074, "step": 17096 }, { "epoch": 0.30277995457503754, "grad_norm": 0.7801482677459717, "learning_rate": 2.4516979961215355e-05, "loss": 0.0975, "step": 17097 }, { "epoch": 0.30279766411206593, "grad_norm": 1.0231035947799683, "learning_rate": 2.451631492560792e-05, "loss": 0.1086, "step": 17098 }, { "epoch": 0.3028153736490944, "grad_norm": 0.6847441792488098, "learning_rate": 2.4515649858692766e-05, "loss": 0.0939, "step": 17099 }, { "epoch": 0.3028330831861228, "grad_norm": 1.095710039138794, "learning_rate": 2.4514984760472093e-05, "loss": 0.1122, "step": 17100 }, { "epoch": 0.30285079272315124, "grad_norm": 0.8475447297096252, "learning_rate": 2.4514319630948075e-05, "loss": 0.0565, "step": 17101 }, { "epoch": 0.30286850226017964, "grad_norm": 0.5504086017608643, "learning_rate": 2.4513654470122916e-05, "loss": 0.0939, "step": 17102 }, { "epoch": 0.3028862117972081, "grad_norm": 0.841733455657959, "learning_rate": 2.451298927799879e-05, "loss": 0.1022, "step": 17103 }, { "epoch": 0.3029039213342365, "grad_norm": 0.9352737665176392, "learning_rate": 2.4512324054577892e-05, "loss": 0.0954, "step": 17104 }, { "epoch": 0.30292163087126495, "grad_norm": 0.7903913855552673, "learning_rate": 2.451165879986241e-05, "loss": 0.0861, "step": 17105 }, { "epoch": 0.3029393404082934, "grad_norm": 0.6366719603538513, "learning_rate": 2.451099351385453e-05, "loss": 0.1137, "step": 17106 }, { "epoch": 0.3029570499453218, "grad_norm": 0.7728797197341919, "learning_rate": 2.451032819655645e-05, "loss": 0.1265, "step": 17107 }, { "epoch": 0.30297475948235025, "grad_norm": 0.754555344581604, "learning_rate": 2.450966284797035e-05, "loss": 0.0968, "step": 17108 }, { "epoch": 0.30299246901937865, "grad_norm": 1.01461923122406, "learning_rate": 2.450899746809842e-05, "loss": 0.0998, "step": 17109 }, { "epoch": 0.3030101785564071, "grad_norm": 0.42587679624557495, "learning_rate": 2.450833205694285e-05, "loss": 0.0921, "step": 17110 }, { "epoch": 0.3030278880934355, "grad_norm": 1.0462607145309448, "learning_rate": 2.4507666614505828e-05, "loss": 0.0812, "step": 17111 }, { "epoch": 0.30304559763046396, "grad_norm": 0.9674702286720276, "learning_rate": 2.4507001140789548e-05, "loss": 0.0875, "step": 17112 }, { "epoch": 0.30306330716749236, "grad_norm": 0.6814413070678711, "learning_rate": 2.4506335635796197e-05, "loss": 0.0631, "step": 17113 }, { "epoch": 0.3030810167045208, "grad_norm": 0.9728447198867798, "learning_rate": 2.450567009952796e-05, "loss": 0.1392, "step": 17114 }, { "epoch": 0.3030987262415492, "grad_norm": 1.1685731410980225, "learning_rate": 2.4505004531987026e-05, "loss": 0.1093, "step": 17115 }, { "epoch": 0.30311643577857766, "grad_norm": 0.6907482743263245, "learning_rate": 2.450433893317559e-05, "loss": 0.0683, "step": 17116 }, { "epoch": 0.30313414531560606, "grad_norm": 0.4099592864513397, "learning_rate": 2.4503673303095843e-05, "loss": 0.0573, "step": 17117 }, { "epoch": 0.3031518548526345, "grad_norm": 1.1924527883529663, "learning_rate": 2.4503007641749974e-05, "loss": 0.1486, "step": 17118 }, { "epoch": 0.3031695643896629, "grad_norm": 0.5666170120239258, "learning_rate": 2.450234194914017e-05, "loss": 0.096, "step": 17119 }, { "epoch": 0.30318727392669137, "grad_norm": 1.375298261642456, "learning_rate": 2.450167622526862e-05, "loss": 0.1246, "step": 17120 }, { "epoch": 0.3032049834637198, "grad_norm": 0.4892345666885376, "learning_rate": 2.4501010470137518e-05, "loss": 0.0972, "step": 17121 }, { "epoch": 0.3032226930007482, "grad_norm": 0.9128822684288025, "learning_rate": 2.450034468374905e-05, "loss": 0.1304, "step": 17122 }, { "epoch": 0.3032404025377767, "grad_norm": 1.2062616348266602, "learning_rate": 2.449967886610541e-05, "loss": 0.0964, "step": 17123 }, { "epoch": 0.3032581120748051, "grad_norm": 0.3931314945220947, "learning_rate": 2.4499013017208788e-05, "loss": 0.1116, "step": 17124 }, { "epoch": 0.30327582161183353, "grad_norm": 0.8348355293273926, "learning_rate": 2.4498347137061373e-05, "loss": 0.0984, "step": 17125 }, { "epoch": 0.30329353114886193, "grad_norm": 0.8819567561149597, "learning_rate": 2.4497681225665358e-05, "loss": 0.089, "step": 17126 }, { "epoch": 0.3033112406858904, "grad_norm": 0.7290237545967102, "learning_rate": 2.4497015283022934e-05, "loss": 0.0968, "step": 17127 }, { "epoch": 0.3033289502229188, "grad_norm": 0.9369698762893677, "learning_rate": 2.4496349309136287e-05, "loss": 0.1441, "step": 17128 }, { "epoch": 0.30334665975994723, "grad_norm": 0.649495005607605, "learning_rate": 2.449568330400761e-05, "loss": 0.0898, "step": 17129 }, { "epoch": 0.30336436929697563, "grad_norm": 1.2917712926864624, "learning_rate": 2.44950172676391e-05, "loss": 0.1196, "step": 17130 }, { "epoch": 0.3033820788340041, "grad_norm": 1.1409296989440918, "learning_rate": 2.449435120003294e-05, "loss": 0.09, "step": 17131 }, { "epoch": 0.3033997883710325, "grad_norm": 0.6408012509346008, "learning_rate": 2.4493685101191324e-05, "loss": 0.076, "step": 17132 }, { "epoch": 0.30341749790806094, "grad_norm": 0.6891979575157166, "learning_rate": 2.4493018971116447e-05, "loss": 0.1085, "step": 17133 }, { "epoch": 0.3034352074450894, "grad_norm": 0.9376474022865295, "learning_rate": 2.44923528098105e-05, "loss": 0.1102, "step": 17134 }, { "epoch": 0.3034529169821178, "grad_norm": 0.5860794186592102, "learning_rate": 2.4491686617275667e-05, "loss": 0.081, "step": 17135 }, { "epoch": 0.30347062651914625, "grad_norm": 0.8135630488395691, "learning_rate": 2.4491020393514148e-05, "loss": 0.0593, "step": 17136 }, { "epoch": 0.30348833605617465, "grad_norm": 1.0027061700820923, "learning_rate": 2.449035413852813e-05, "loss": 0.0945, "step": 17137 }, { "epoch": 0.3035060455932031, "grad_norm": 1.3275647163391113, "learning_rate": 2.448968785231981e-05, "loss": 0.1152, "step": 17138 }, { "epoch": 0.3035237551302315, "grad_norm": 0.7785931825637817, "learning_rate": 2.4489021534891375e-05, "loss": 0.1019, "step": 17139 }, { "epoch": 0.30354146466725995, "grad_norm": 0.8638753890991211, "learning_rate": 2.4488355186245016e-05, "loss": 0.1092, "step": 17140 }, { "epoch": 0.30355917420428835, "grad_norm": 0.850935161113739, "learning_rate": 2.448768880638293e-05, "loss": 0.083, "step": 17141 }, { "epoch": 0.3035768837413168, "grad_norm": 0.6971922516822815, "learning_rate": 2.448702239530731e-05, "loss": 0.0958, "step": 17142 }, { "epoch": 0.3035945932783452, "grad_norm": 0.49086886644363403, "learning_rate": 2.4486355953020347e-05, "loss": 0.0854, "step": 17143 }, { "epoch": 0.30361230281537366, "grad_norm": 0.6478336453437805, "learning_rate": 2.448568947952423e-05, "loss": 0.0806, "step": 17144 }, { "epoch": 0.30363001235240206, "grad_norm": 0.7930381298065186, "learning_rate": 2.4485022974821153e-05, "loss": 0.1, "step": 17145 }, { "epoch": 0.3036477218894305, "grad_norm": 1.063760757446289, "learning_rate": 2.4484356438913312e-05, "loss": 0.0972, "step": 17146 }, { "epoch": 0.3036654314264589, "grad_norm": 0.3824215233325958, "learning_rate": 2.448368987180289e-05, "loss": 0.0431, "step": 17147 }, { "epoch": 0.30368314096348736, "grad_norm": 0.36557117104530334, "learning_rate": 2.4483023273492094e-05, "loss": 0.0881, "step": 17148 }, { "epoch": 0.3037008505005158, "grad_norm": 2.1228678226470947, "learning_rate": 2.4482356643983115e-05, "loss": 0.0886, "step": 17149 }, { "epoch": 0.3037185600375442, "grad_norm": 0.7344202995300293, "learning_rate": 2.4481689983278135e-05, "loss": 0.0692, "step": 17150 }, { "epoch": 0.30373626957457267, "grad_norm": 0.5824092030525208, "learning_rate": 2.4481023291379355e-05, "loss": 0.086, "step": 17151 }, { "epoch": 0.30375397911160107, "grad_norm": 0.7867928743362427, "learning_rate": 2.448035656828897e-05, "loss": 0.1194, "step": 17152 }, { "epoch": 0.3037716886486295, "grad_norm": 0.8044564127922058, "learning_rate": 2.447968981400917e-05, "loss": 0.0913, "step": 17153 }, { "epoch": 0.3037893981856579, "grad_norm": 0.681422233581543, "learning_rate": 2.4479023028542144e-05, "loss": 0.0932, "step": 17154 }, { "epoch": 0.3038071077226864, "grad_norm": 0.5341238975524902, "learning_rate": 2.4478356211890102e-05, "loss": 0.0948, "step": 17155 }, { "epoch": 0.3038248172597148, "grad_norm": 0.6489216685295105, "learning_rate": 2.4477689364055216e-05, "loss": 0.1047, "step": 17156 }, { "epoch": 0.30384252679674323, "grad_norm": 0.8897441625595093, "learning_rate": 2.4477022485039692e-05, "loss": 0.0846, "step": 17157 }, { "epoch": 0.3038602363337716, "grad_norm": 1.002253532409668, "learning_rate": 2.447635557484573e-05, "loss": 0.102, "step": 17158 }, { "epoch": 0.3038779458708001, "grad_norm": 0.8115721344947815, "learning_rate": 2.4475688633475508e-05, "loss": 0.0878, "step": 17159 }, { "epoch": 0.3038956554078285, "grad_norm": 0.6829407811164856, "learning_rate": 2.4475021660931236e-05, "loss": 0.0932, "step": 17160 }, { "epoch": 0.30391336494485693, "grad_norm": 0.7034497261047363, "learning_rate": 2.447435465721509e-05, "loss": 0.0647, "step": 17161 }, { "epoch": 0.30393107448188533, "grad_norm": 0.6973516345024109, "learning_rate": 2.447368762232929e-05, "loss": 0.13, "step": 17162 }, { "epoch": 0.3039487840189138, "grad_norm": 0.7434231638908386, "learning_rate": 2.447302055627601e-05, "loss": 0.1242, "step": 17163 }, { "epoch": 0.30396649355594224, "grad_norm": 0.7206902503967285, "learning_rate": 2.4472353459057445e-05, "loss": 0.0704, "step": 17164 }, { "epoch": 0.30398420309297064, "grad_norm": 0.890618622303009, "learning_rate": 2.44716863306758e-05, "loss": 0.0754, "step": 17165 }, { "epoch": 0.3040019126299991, "grad_norm": 0.5772426724433899, "learning_rate": 2.4471019171133265e-05, "loss": 0.0964, "step": 17166 }, { "epoch": 0.3040196221670275, "grad_norm": 0.7114921808242798, "learning_rate": 2.447035198043204e-05, "loss": 0.061, "step": 17167 }, { "epoch": 0.30403733170405595, "grad_norm": 0.45072734355926514, "learning_rate": 2.446968475857431e-05, "loss": 0.075, "step": 17168 }, { "epoch": 0.30405504124108435, "grad_norm": 0.5731795430183411, "learning_rate": 2.446901750556227e-05, "loss": 0.0811, "step": 17169 }, { "epoch": 0.3040727507781128, "grad_norm": 1.1836479902267456, "learning_rate": 2.4468350221398127e-05, "loss": 0.1289, "step": 17170 }, { "epoch": 0.3040904603151412, "grad_norm": 0.9100940227508545, "learning_rate": 2.4467682906084066e-05, "loss": 0.0924, "step": 17171 }, { "epoch": 0.30410816985216965, "grad_norm": 2.2580463886260986, "learning_rate": 2.4467015559622284e-05, "loss": 0.0661, "step": 17172 }, { "epoch": 0.30412587938919805, "grad_norm": 0.7962250113487244, "learning_rate": 2.4466348182014984e-05, "loss": 0.089, "step": 17173 }, { "epoch": 0.3041435889262265, "grad_norm": 0.7662795782089233, "learning_rate": 2.446568077326435e-05, "loss": 0.0754, "step": 17174 }, { "epoch": 0.3041612984632549, "grad_norm": 0.8885641694068909, "learning_rate": 2.446501333337259e-05, "loss": 0.0895, "step": 17175 }, { "epoch": 0.30417900800028336, "grad_norm": 0.8355216383934021, "learning_rate": 2.446434586234189e-05, "loss": 0.1143, "step": 17176 }, { "epoch": 0.30419671753731176, "grad_norm": 0.8786118030548096, "learning_rate": 2.4463678360174446e-05, "loss": 0.1213, "step": 17177 }, { "epoch": 0.3042144270743402, "grad_norm": 0.8609243035316467, "learning_rate": 2.4463010826872462e-05, "loss": 0.09, "step": 17178 }, { "epoch": 0.30423213661136866, "grad_norm": 1.2385934591293335, "learning_rate": 2.4462343262438128e-05, "loss": 0.1202, "step": 17179 }, { "epoch": 0.30424984614839706, "grad_norm": 1.1073781251907349, "learning_rate": 2.4461675666873642e-05, "loss": 0.1228, "step": 17180 }, { "epoch": 0.3042675556854255, "grad_norm": 1.0276732444763184, "learning_rate": 2.44610080401812e-05, "loss": 0.1161, "step": 17181 }, { "epoch": 0.3042852652224539, "grad_norm": 0.6815854907035828, "learning_rate": 2.4460340382363e-05, "loss": 0.0934, "step": 17182 }, { "epoch": 0.30430297475948237, "grad_norm": 1.0103058815002441, "learning_rate": 2.4459672693421238e-05, "loss": 0.1007, "step": 17183 }, { "epoch": 0.30432068429651077, "grad_norm": 0.6771447658538818, "learning_rate": 2.4459004973358104e-05, "loss": 0.0691, "step": 17184 }, { "epoch": 0.3043383938335392, "grad_norm": 0.7383326888084412, "learning_rate": 2.4458337222175804e-05, "loss": 0.1078, "step": 17185 }, { "epoch": 0.3043561033705676, "grad_norm": 1.1512101888656616, "learning_rate": 2.4457669439876532e-05, "loss": 0.088, "step": 17186 }, { "epoch": 0.3043738129075961, "grad_norm": 0.8342804312705994, "learning_rate": 2.4457001626462484e-05, "loss": 0.0831, "step": 17187 }, { "epoch": 0.3043915224446245, "grad_norm": 1.0357977151870728, "learning_rate": 2.4456333781935855e-05, "loss": 0.1181, "step": 17188 }, { "epoch": 0.30440923198165293, "grad_norm": 0.599380612373352, "learning_rate": 2.4455665906298848e-05, "loss": 0.0712, "step": 17189 }, { "epoch": 0.3044269415186813, "grad_norm": 0.9588901996612549, "learning_rate": 2.445499799955365e-05, "loss": 0.1108, "step": 17190 }, { "epoch": 0.3044446510557098, "grad_norm": 0.9302570819854736, "learning_rate": 2.445433006170247e-05, "loss": 0.111, "step": 17191 }, { "epoch": 0.3044623605927382, "grad_norm": 0.7092972993850708, "learning_rate": 2.44536620927475e-05, "loss": 0.0835, "step": 17192 }, { "epoch": 0.30448007012976663, "grad_norm": 0.9918453097343445, "learning_rate": 2.4452994092690935e-05, "loss": 0.0677, "step": 17193 }, { "epoch": 0.3044977796667951, "grad_norm": 0.9384862780570984, "learning_rate": 2.445232606153498e-05, "loss": 0.1062, "step": 17194 }, { "epoch": 0.3045154892038235, "grad_norm": 0.7327116131782532, "learning_rate": 2.4451657999281828e-05, "loss": 0.129, "step": 17195 }, { "epoch": 0.30453319874085194, "grad_norm": 0.37744101881980896, "learning_rate": 2.4450989905933677e-05, "loss": 0.0618, "step": 17196 }, { "epoch": 0.30455090827788034, "grad_norm": 0.7506256103515625, "learning_rate": 2.445032178149272e-05, "loss": 0.0968, "step": 17197 }, { "epoch": 0.3045686178149088, "grad_norm": 1.3419973850250244, "learning_rate": 2.4449653625961162e-05, "loss": 0.0839, "step": 17198 }, { "epoch": 0.3045863273519372, "grad_norm": 0.9454348683357239, "learning_rate": 2.4448985439341202e-05, "loss": 0.112, "step": 17199 }, { "epoch": 0.30460403688896565, "grad_norm": 0.6384375095367432, "learning_rate": 2.444831722163503e-05, "loss": 0.0829, "step": 17200 }, { "epoch": 0.30462174642599404, "grad_norm": 0.7960590124130249, "learning_rate": 2.444764897284486e-05, "loss": 0.0872, "step": 17201 }, { "epoch": 0.3046394559630225, "grad_norm": 1.0037329196929932, "learning_rate": 2.4446980692972874e-05, "loss": 0.096, "step": 17202 }, { "epoch": 0.3046571655000509, "grad_norm": 0.6433064937591553, "learning_rate": 2.4446312382021278e-05, "loss": 0.1193, "step": 17203 }, { "epoch": 0.30467487503707935, "grad_norm": 0.6061484813690186, "learning_rate": 2.444564403999227e-05, "loss": 0.0799, "step": 17204 }, { "epoch": 0.30469258457410775, "grad_norm": 0.7417743802070618, "learning_rate": 2.444497566688805e-05, "loss": 0.0957, "step": 17205 }, { "epoch": 0.3047102941111362, "grad_norm": 0.6557458639144897, "learning_rate": 2.4444307262710807e-05, "loss": 0.0937, "step": 17206 }, { "epoch": 0.3047280036481646, "grad_norm": 0.4726615846157074, "learning_rate": 2.4443638827462757e-05, "loss": 0.0562, "step": 17207 }, { "epoch": 0.30474571318519306, "grad_norm": 0.9927764534950256, "learning_rate": 2.4442970361146088e-05, "loss": 0.1254, "step": 17208 }, { "epoch": 0.3047634227222215, "grad_norm": 1.0108287334442139, "learning_rate": 2.4442301863763004e-05, "loss": 0.1075, "step": 17209 }, { "epoch": 0.3047811322592499, "grad_norm": 0.7150803208351135, "learning_rate": 2.44416333353157e-05, "loss": 0.1092, "step": 17210 }, { "epoch": 0.30479884179627836, "grad_norm": 0.8296600580215454, "learning_rate": 2.4440964775806377e-05, "loss": 0.0724, "step": 17211 }, { "epoch": 0.30481655133330676, "grad_norm": 0.5312799215316772, "learning_rate": 2.4440296185237235e-05, "loss": 0.0982, "step": 17212 }, { "epoch": 0.3048342608703352, "grad_norm": 0.8909763097763062, "learning_rate": 2.443962756361047e-05, "loss": 0.0909, "step": 17213 }, { "epoch": 0.3048519704073636, "grad_norm": 1.4683524370193481, "learning_rate": 2.4438958910928287e-05, "loss": 0.1541, "step": 17214 }, { "epoch": 0.30486967994439207, "grad_norm": 0.8618165254592896, "learning_rate": 2.4438290227192894e-05, "loss": 0.0719, "step": 17215 }, { "epoch": 0.30488738948142047, "grad_norm": 2.008645534515381, "learning_rate": 2.4437621512406468e-05, "loss": 0.0633, "step": 17216 }, { "epoch": 0.3049050990184489, "grad_norm": 0.8294517397880554, "learning_rate": 2.4436952766571226e-05, "loss": 0.0893, "step": 17217 }, { "epoch": 0.3049228085554773, "grad_norm": 0.9261902570724487, "learning_rate": 2.443628398968936e-05, "loss": 0.0946, "step": 17218 }, { "epoch": 0.3049405180925058, "grad_norm": 0.7215805649757385, "learning_rate": 2.4435615181763077e-05, "loss": 0.0806, "step": 17219 }, { "epoch": 0.3049582276295342, "grad_norm": 0.8308039307594299, "learning_rate": 2.4434946342794574e-05, "loss": 0.0992, "step": 17220 }, { "epoch": 0.3049759371665626, "grad_norm": 0.7113003730773926, "learning_rate": 2.4434277472786054e-05, "loss": 0.0681, "step": 17221 }, { "epoch": 0.304993646703591, "grad_norm": 0.6900041103363037, "learning_rate": 2.4433608571739716e-05, "loss": 0.0668, "step": 17222 }, { "epoch": 0.3050113562406195, "grad_norm": 1.4072306156158447, "learning_rate": 2.4432939639657758e-05, "loss": 0.0842, "step": 17223 }, { "epoch": 0.30502906577764793, "grad_norm": 0.7864885926246643, "learning_rate": 2.4432270676542384e-05, "loss": 0.0792, "step": 17224 }, { "epoch": 0.30504677531467633, "grad_norm": 0.4748286008834839, "learning_rate": 2.4431601682395795e-05, "loss": 0.0755, "step": 17225 }, { "epoch": 0.3050644848517048, "grad_norm": 0.695310115814209, "learning_rate": 2.4430932657220187e-05, "loss": 0.0842, "step": 17226 }, { "epoch": 0.3050821943887332, "grad_norm": 1.0450489521026611, "learning_rate": 2.443026360101776e-05, "loss": 0.1006, "step": 17227 }, { "epoch": 0.30509990392576164, "grad_norm": 0.9145596623420715, "learning_rate": 2.442959451379073e-05, "loss": 0.0975, "step": 17228 }, { "epoch": 0.30511761346279004, "grad_norm": 0.747978687286377, "learning_rate": 2.4428925395541277e-05, "loss": 0.0634, "step": 17229 }, { "epoch": 0.3051353229998185, "grad_norm": 0.6141019463539124, "learning_rate": 2.4428256246271622e-05, "loss": 0.053, "step": 17230 }, { "epoch": 0.3051530325368469, "grad_norm": 0.7363161444664001, "learning_rate": 2.4427587065983956e-05, "loss": 0.1007, "step": 17231 }, { "epoch": 0.30517074207387535, "grad_norm": 0.6746520400047302, "learning_rate": 2.4426917854680477e-05, "loss": 0.0725, "step": 17232 }, { "epoch": 0.30518845161090374, "grad_norm": 0.707756519317627, "learning_rate": 2.44262486123634e-05, "loss": 0.0837, "step": 17233 }, { "epoch": 0.3052061611479322, "grad_norm": 0.7928666472434998, "learning_rate": 2.442557933903491e-05, "loss": 0.1102, "step": 17234 }, { "epoch": 0.3052238706849606, "grad_norm": 1.0379184484481812, "learning_rate": 2.4424910034697222e-05, "loss": 0.1813, "step": 17235 }, { "epoch": 0.30524158022198905, "grad_norm": 0.9058732986450195, "learning_rate": 2.442424069935253e-05, "loss": 0.0844, "step": 17236 }, { "epoch": 0.30525928975901745, "grad_norm": 0.7589085698127747, "learning_rate": 2.4423571333003043e-05, "loss": 0.0686, "step": 17237 }, { "epoch": 0.3052769992960459, "grad_norm": 0.8395587205886841, "learning_rate": 2.4422901935650953e-05, "loss": 0.071, "step": 17238 }, { "epoch": 0.30529470883307436, "grad_norm": 1.07932448387146, "learning_rate": 2.4422232507298477e-05, "loss": 0.1428, "step": 17239 }, { "epoch": 0.30531241837010276, "grad_norm": 0.6825089454650879, "learning_rate": 2.4421563047947807e-05, "loss": 0.0693, "step": 17240 }, { "epoch": 0.3053301279071312, "grad_norm": 0.6192291975021362, "learning_rate": 2.4420893557601148e-05, "loss": 0.0781, "step": 17241 }, { "epoch": 0.3053478374441596, "grad_norm": 0.8716132044792175, "learning_rate": 2.4420224036260697e-05, "loss": 0.1087, "step": 17242 }, { "epoch": 0.30536554698118806, "grad_norm": 0.8099409937858582, "learning_rate": 2.4419554483928666e-05, "loss": 0.0974, "step": 17243 }, { "epoch": 0.30538325651821646, "grad_norm": 0.6278938055038452, "learning_rate": 2.4418884900607253e-05, "loss": 0.0979, "step": 17244 }, { "epoch": 0.3054009660552449, "grad_norm": 1.2733474969863892, "learning_rate": 2.4418215286298658e-05, "loss": 0.08, "step": 17245 }, { "epoch": 0.3054186755922733, "grad_norm": 1.264618992805481, "learning_rate": 2.4417545641005096e-05, "loss": 0.1067, "step": 17246 }, { "epoch": 0.30543638512930177, "grad_norm": 0.8268348574638367, "learning_rate": 2.4416875964728753e-05, "loss": 0.1319, "step": 17247 }, { "epoch": 0.30545409466633017, "grad_norm": 0.7024821639060974, "learning_rate": 2.441620625747184e-05, "loss": 0.0824, "step": 17248 }, { "epoch": 0.3054718042033586, "grad_norm": 0.7173861861228943, "learning_rate": 2.441553651923657e-05, "loss": 0.085, "step": 17249 }, { "epoch": 0.305489513740387, "grad_norm": 0.8910300135612488, "learning_rate": 2.4414866750025126e-05, "loss": 0.0934, "step": 17250 }, { "epoch": 0.3055072232774155, "grad_norm": 0.9625564217567444, "learning_rate": 2.4414196949839728e-05, "loss": 0.092, "step": 17251 }, { "epoch": 0.3055249328144439, "grad_norm": 0.7602808475494385, "learning_rate": 2.4413527118682573e-05, "loss": 0.1623, "step": 17252 }, { "epoch": 0.3055426423514723, "grad_norm": 0.7895975708961487, "learning_rate": 2.441285725655587e-05, "loss": 0.0804, "step": 17253 }, { "epoch": 0.3055603518885008, "grad_norm": 0.6959784030914307, "learning_rate": 2.4412187363461815e-05, "loss": 0.1238, "step": 17254 }, { "epoch": 0.3055780614255292, "grad_norm": 0.739456832408905, "learning_rate": 2.4411517439402615e-05, "loss": 0.1077, "step": 17255 }, { "epoch": 0.30559577096255763, "grad_norm": 0.6354466080665588, "learning_rate": 2.4410847484380475e-05, "loss": 0.0579, "step": 17256 }, { "epoch": 0.30561348049958603, "grad_norm": 0.735493540763855, "learning_rate": 2.4410177498397597e-05, "loss": 0.1156, "step": 17257 }, { "epoch": 0.3056311900366145, "grad_norm": 0.7264708876609802, "learning_rate": 2.440950748145619e-05, "loss": 0.119, "step": 17258 }, { "epoch": 0.3056488995736429, "grad_norm": 0.45474863052368164, "learning_rate": 2.4408837433558456e-05, "loss": 0.0716, "step": 17259 }, { "epoch": 0.30566660911067134, "grad_norm": 0.7850574851036072, "learning_rate": 2.4408167354706598e-05, "loss": 0.1039, "step": 17260 }, { "epoch": 0.30568431864769974, "grad_norm": 0.8961873650550842, "learning_rate": 2.4407497244902817e-05, "loss": 0.0771, "step": 17261 }, { "epoch": 0.3057020281847282, "grad_norm": 0.9501852989196777, "learning_rate": 2.440682710414932e-05, "loss": 0.1141, "step": 17262 }, { "epoch": 0.3057197377217566, "grad_norm": 0.8670428991317749, "learning_rate": 2.440615693244832e-05, "loss": 0.0901, "step": 17263 }, { "epoch": 0.30573744725878504, "grad_norm": 0.45990675687789917, "learning_rate": 2.4405486729802012e-05, "loss": 0.0945, "step": 17264 }, { "epoch": 0.30575515679581344, "grad_norm": 0.7439529895782471, "learning_rate": 2.4404816496212606e-05, "loss": 0.0904, "step": 17265 }, { "epoch": 0.3057728663328419, "grad_norm": 0.6741247177124023, "learning_rate": 2.44041462316823e-05, "loss": 0.1058, "step": 17266 }, { "epoch": 0.3057905758698703, "grad_norm": 0.8807603120803833, "learning_rate": 2.4403475936213312e-05, "loss": 0.0927, "step": 17267 }, { "epoch": 0.30580828540689875, "grad_norm": 0.5613399147987366, "learning_rate": 2.4402805609807835e-05, "loss": 0.0855, "step": 17268 }, { "epoch": 0.3058259949439272, "grad_norm": 0.5563967823982239, "learning_rate": 2.4402135252468077e-05, "loss": 0.0712, "step": 17269 }, { "epoch": 0.3058437044809556, "grad_norm": 0.6984299421310425, "learning_rate": 2.440146486419625e-05, "loss": 0.0777, "step": 17270 }, { "epoch": 0.30586141401798406, "grad_norm": 1.1227797269821167, "learning_rate": 2.4400794444994548e-05, "loss": 0.1146, "step": 17271 }, { "epoch": 0.30587912355501246, "grad_norm": 0.6280117034912109, "learning_rate": 2.4400123994865187e-05, "loss": 0.0906, "step": 17272 }, { "epoch": 0.3058968330920409, "grad_norm": 0.9497030973434448, "learning_rate": 2.4399453513810363e-05, "loss": 0.1014, "step": 17273 }, { "epoch": 0.3059145426290693, "grad_norm": 0.648114800453186, "learning_rate": 2.4398783001832296e-05, "loss": 0.0554, "step": 17274 }, { "epoch": 0.30593225216609776, "grad_norm": 0.7668083310127258, "learning_rate": 2.4398112458933177e-05, "loss": 0.1327, "step": 17275 }, { "epoch": 0.30594996170312616, "grad_norm": 1.0079652070999146, "learning_rate": 2.4397441885115223e-05, "loss": 0.1128, "step": 17276 }, { "epoch": 0.3059676712401546, "grad_norm": 1.0755809545516968, "learning_rate": 2.4396771280380635e-05, "loss": 0.0773, "step": 17277 }, { "epoch": 0.305985380777183, "grad_norm": 1.1454517841339111, "learning_rate": 2.4396100644731618e-05, "loss": 0.091, "step": 17278 }, { "epoch": 0.30600309031421147, "grad_norm": 1.0531588792800903, "learning_rate": 2.439542997817038e-05, "loss": 0.096, "step": 17279 }, { "epoch": 0.30602079985123987, "grad_norm": 0.7334063053131104, "learning_rate": 2.439475928069913e-05, "loss": 0.0963, "step": 17280 }, { "epoch": 0.3060385093882683, "grad_norm": 1.1882976293563843, "learning_rate": 2.4394088552320072e-05, "loss": 0.1183, "step": 17281 }, { "epoch": 0.3060562189252967, "grad_norm": 0.796149730682373, "learning_rate": 2.4393417793035412e-05, "loss": 0.0857, "step": 17282 }, { "epoch": 0.3060739284623252, "grad_norm": 0.9323287606239319, "learning_rate": 2.439274700284736e-05, "loss": 0.097, "step": 17283 }, { "epoch": 0.3060916379993536, "grad_norm": 1.1321660280227661, "learning_rate": 2.4392076181758113e-05, "loss": 0.0832, "step": 17284 }, { "epoch": 0.306109347536382, "grad_norm": 0.5791099667549133, "learning_rate": 2.4391405329769895e-05, "loss": 0.0582, "step": 17285 }, { "epoch": 0.3061270570734105, "grad_norm": 0.6800023913383484, "learning_rate": 2.4390734446884896e-05, "loss": 0.1028, "step": 17286 }, { "epoch": 0.3061447666104389, "grad_norm": 0.631609320640564, "learning_rate": 2.4390063533105333e-05, "loss": 0.0721, "step": 17287 }, { "epoch": 0.30616247614746733, "grad_norm": 0.8581016659736633, "learning_rate": 2.438939258843341e-05, "loss": 0.078, "step": 17288 }, { "epoch": 0.30618018568449573, "grad_norm": 1.1879055500030518, "learning_rate": 2.4388721612871336e-05, "loss": 0.124, "step": 17289 }, { "epoch": 0.3061978952215242, "grad_norm": 0.9489620923995972, "learning_rate": 2.438805060642132e-05, "loss": 0.0901, "step": 17290 }, { "epoch": 0.3062156047585526, "grad_norm": 0.6886789798736572, "learning_rate": 2.4387379569085563e-05, "loss": 0.1165, "step": 17291 }, { "epoch": 0.30623331429558104, "grad_norm": 0.8882352709770203, "learning_rate": 2.438670850086628e-05, "loss": 0.1168, "step": 17292 }, { "epoch": 0.30625102383260944, "grad_norm": 0.9842011332511902, "learning_rate": 2.438603740176567e-05, "loss": 0.1179, "step": 17293 }, { "epoch": 0.3062687333696379, "grad_norm": 0.5488728880882263, "learning_rate": 2.438536627178595e-05, "loss": 0.0775, "step": 17294 }, { "epoch": 0.3062864429066663, "grad_norm": 0.6390076279640198, "learning_rate": 2.4384695110929322e-05, "loss": 0.1086, "step": 17295 }, { "epoch": 0.30630415244369474, "grad_norm": 1.167372226715088, "learning_rate": 2.4384023919198e-05, "loss": 0.1208, "step": 17296 }, { "epoch": 0.30632186198072314, "grad_norm": 0.374983549118042, "learning_rate": 2.4383352696594187e-05, "loss": 0.0834, "step": 17297 }, { "epoch": 0.3063395715177516, "grad_norm": 0.4468395411968231, "learning_rate": 2.4382681443120093e-05, "loss": 0.0668, "step": 17298 }, { "epoch": 0.30635728105478005, "grad_norm": 0.4883268475532532, "learning_rate": 2.4382010158777924e-05, "loss": 0.1048, "step": 17299 }, { "epoch": 0.30637499059180845, "grad_norm": 0.9317607283592224, "learning_rate": 2.4381338843569894e-05, "loss": 0.1149, "step": 17300 }, { "epoch": 0.3063927001288369, "grad_norm": 0.8176994919776917, "learning_rate": 2.4380667497498206e-05, "loss": 0.1017, "step": 17301 }, { "epoch": 0.3064104096658653, "grad_norm": 0.6948168873786926, "learning_rate": 2.437999612056507e-05, "loss": 0.0873, "step": 17302 }, { "epoch": 0.30642811920289376, "grad_norm": 0.6524375677108765, "learning_rate": 2.4379324712772697e-05, "loss": 0.0884, "step": 17303 }, { "epoch": 0.30644582873992215, "grad_norm": 0.6970318555831909, "learning_rate": 2.4378653274123293e-05, "loss": 0.0821, "step": 17304 }, { "epoch": 0.3064635382769506, "grad_norm": 0.5571089386940002, "learning_rate": 2.4377981804619067e-05, "loss": 0.1187, "step": 17305 }, { "epoch": 0.306481247813979, "grad_norm": 0.40002191066741943, "learning_rate": 2.4377310304262233e-05, "loss": 0.069, "step": 17306 }, { "epoch": 0.30649895735100746, "grad_norm": 0.5963362455368042, "learning_rate": 2.4376638773054994e-05, "loss": 0.0873, "step": 17307 }, { "epoch": 0.30651666688803586, "grad_norm": 0.8331514596939087, "learning_rate": 2.4375967210999564e-05, "loss": 0.0674, "step": 17308 }, { "epoch": 0.3065343764250643, "grad_norm": 0.41901886463165283, "learning_rate": 2.437529561809815e-05, "loss": 0.0911, "step": 17309 }, { "epoch": 0.3065520859620927, "grad_norm": 0.8130340576171875, "learning_rate": 2.4374623994352958e-05, "loss": 0.0876, "step": 17310 }, { "epoch": 0.30656979549912117, "grad_norm": 0.7815334796905518, "learning_rate": 2.437395233976621e-05, "loss": 0.0702, "step": 17311 }, { "epoch": 0.30658750503614957, "grad_norm": 0.5196641087532043, "learning_rate": 2.4373280654340095e-05, "loss": 0.0802, "step": 17312 }, { "epoch": 0.306605214573178, "grad_norm": 0.7184193730354309, "learning_rate": 2.4372608938076842e-05, "loss": 0.109, "step": 17313 }, { "epoch": 0.3066229241102065, "grad_norm": 0.794136106967926, "learning_rate": 2.437193719097865e-05, "loss": 0.1152, "step": 17314 }, { "epoch": 0.3066406336472349, "grad_norm": 0.820273756980896, "learning_rate": 2.437126541304774e-05, "loss": 0.0876, "step": 17315 }, { "epoch": 0.3066583431842633, "grad_norm": 1.1468428373336792, "learning_rate": 2.4370593604286306e-05, "loss": 0.1068, "step": 17316 }, { "epoch": 0.3066760527212917, "grad_norm": 0.6596603989601135, "learning_rate": 2.4369921764696574e-05, "loss": 0.0885, "step": 17317 }, { "epoch": 0.3066937622583202, "grad_norm": 0.5730879902839661, "learning_rate": 2.436924989428074e-05, "loss": 0.0818, "step": 17318 }, { "epoch": 0.3067114717953486, "grad_norm": 0.7673627138137817, "learning_rate": 2.4368577993041024e-05, "loss": 0.0895, "step": 17319 }, { "epoch": 0.30672918133237703, "grad_norm": 0.8067256808280945, "learning_rate": 2.4367906060979637e-05, "loss": 0.0954, "step": 17320 }, { "epoch": 0.30674689086940543, "grad_norm": 0.6822635531425476, "learning_rate": 2.436723409809878e-05, "loss": 0.0983, "step": 17321 }, { "epoch": 0.3067646004064339, "grad_norm": 0.856120228767395, "learning_rate": 2.4366562104400677e-05, "loss": 0.0624, "step": 17322 }, { "epoch": 0.3067823099434623, "grad_norm": 0.5748131275177002, "learning_rate": 2.436589007988753e-05, "loss": 0.0813, "step": 17323 }, { "epoch": 0.30680001948049074, "grad_norm": 1.090396761894226, "learning_rate": 2.4365218024561554e-05, "loss": 0.1264, "step": 17324 }, { "epoch": 0.30681772901751914, "grad_norm": 0.8559938073158264, "learning_rate": 2.436454593842495e-05, "loss": 0.0877, "step": 17325 }, { "epoch": 0.3068354385545476, "grad_norm": 0.5919561386108398, "learning_rate": 2.4363873821479944e-05, "loss": 0.0687, "step": 17326 }, { "epoch": 0.306853148091576, "grad_norm": 0.9068358540534973, "learning_rate": 2.436320167372874e-05, "loss": 0.0967, "step": 17327 }, { "epoch": 0.30687085762860444, "grad_norm": 0.8124414682388306, "learning_rate": 2.436252949517355e-05, "loss": 0.1004, "step": 17328 }, { "epoch": 0.3068885671656329, "grad_norm": 0.8243727684020996, "learning_rate": 2.436185728581658e-05, "loss": 0.0794, "step": 17329 }, { "epoch": 0.3069062767026613, "grad_norm": 0.731080949306488, "learning_rate": 2.4361185045660052e-05, "loss": 0.0763, "step": 17330 }, { "epoch": 0.30692398623968975, "grad_norm": 0.5416074395179749, "learning_rate": 2.4360512774706167e-05, "loss": 0.0731, "step": 17331 }, { "epoch": 0.30694169577671815, "grad_norm": 0.709890604019165, "learning_rate": 2.4359840472957146e-05, "loss": 0.1109, "step": 17332 }, { "epoch": 0.3069594053137466, "grad_norm": 0.9454557299613953, "learning_rate": 2.4359168140415194e-05, "loss": 0.1037, "step": 17333 }, { "epoch": 0.306977114850775, "grad_norm": 0.5681268572807312, "learning_rate": 2.435849577708253e-05, "loss": 0.0956, "step": 17334 }, { "epoch": 0.30699482438780346, "grad_norm": 0.8478066325187683, "learning_rate": 2.4357823382961355e-05, "loss": 0.1099, "step": 17335 }, { "epoch": 0.30701253392483185, "grad_norm": 0.7034907341003418, "learning_rate": 2.435715095805389e-05, "loss": 0.0826, "step": 17336 }, { "epoch": 0.3070302434618603, "grad_norm": 0.6149781346321106, "learning_rate": 2.4356478502362342e-05, "loss": 0.1018, "step": 17337 }, { "epoch": 0.3070479529988887, "grad_norm": 0.49765247106552124, "learning_rate": 2.4355806015888933e-05, "loss": 0.0761, "step": 17338 }, { "epoch": 0.30706566253591716, "grad_norm": 0.7965837121009827, "learning_rate": 2.4355133498635862e-05, "loss": 0.0812, "step": 17339 }, { "epoch": 0.30708337207294556, "grad_norm": 0.7720762491226196, "learning_rate": 2.435446095060535e-05, "loss": 0.0796, "step": 17340 }, { "epoch": 0.307101081609974, "grad_norm": 0.9910210967063904, "learning_rate": 2.435378837179961e-05, "loss": 0.0755, "step": 17341 }, { "epoch": 0.3071187911470024, "grad_norm": 1.1650464534759521, "learning_rate": 2.4353115762220853e-05, "loss": 0.1196, "step": 17342 }, { "epoch": 0.30713650068403087, "grad_norm": 0.7045366764068604, "learning_rate": 2.4352443121871285e-05, "loss": 0.0799, "step": 17343 }, { "epoch": 0.3071542102210593, "grad_norm": 1.1739585399627686, "learning_rate": 2.4351770450753134e-05, "loss": 0.0871, "step": 17344 }, { "epoch": 0.3071719197580877, "grad_norm": 0.5684354901313782, "learning_rate": 2.4351097748868595e-05, "loss": 0.0569, "step": 17345 }, { "epoch": 0.3071896292951162, "grad_norm": 0.8289811611175537, "learning_rate": 2.4350425016219896e-05, "loss": 0.0979, "step": 17346 }, { "epoch": 0.30720733883214457, "grad_norm": 1.0409120321273804, "learning_rate": 2.4349752252809242e-05, "loss": 0.074, "step": 17347 }, { "epoch": 0.307225048369173, "grad_norm": 1.2679343223571777, "learning_rate": 2.4349079458638847e-05, "loss": 0.1359, "step": 17348 }, { "epoch": 0.3072427579062014, "grad_norm": 0.8850958347320557, "learning_rate": 2.434840663371093e-05, "loss": 0.1263, "step": 17349 }, { "epoch": 0.3072604674432299, "grad_norm": 0.8942436575889587, "learning_rate": 2.4347733778027698e-05, "loss": 0.0977, "step": 17350 }, { "epoch": 0.3072781769802583, "grad_norm": 0.5137008428573608, "learning_rate": 2.434706089159137e-05, "loss": 0.0844, "step": 17351 }, { "epoch": 0.30729588651728673, "grad_norm": 0.6520336866378784, "learning_rate": 2.4346387974404153e-05, "loss": 0.0598, "step": 17352 }, { "epoch": 0.30731359605431513, "grad_norm": 0.9484590291976929, "learning_rate": 2.434571502646827e-05, "loss": 0.0897, "step": 17353 }, { "epoch": 0.3073313055913436, "grad_norm": 0.7715070247650146, "learning_rate": 2.434504204778593e-05, "loss": 0.1038, "step": 17354 }, { "epoch": 0.307349015128372, "grad_norm": 0.8318029642105103, "learning_rate": 2.4344369038359343e-05, "loss": 0.0978, "step": 17355 }, { "epoch": 0.30736672466540044, "grad_norm": 1.0695866346359253, "learning_rate": 2.4343695998190725e-05, "loss": 0.1327, "step": 17356 }, { "epoch": 0.30738443420242884, "grad_norm": 0.5005025863647461, "learning_rate": 2.4343022927282296e-05, "loss": 0.0719, "step": 17357 }, { "epoch": 0.3074021437394573, "grad_norm": 0.9214311242103577, "learning_rate": 2.4342349825636268e-05, "loss": 0.1114, "step": 17358 }, { "epoch": 0.30741985327648574, "grad_norm": 0.6584210991859436, "learning_rate": 2.4341676693254853e-05, "loss": 0.0744, "step": 17359 }, { "epoch": 0.30743756281351414, "grad_norm": 0.6739766597747803, "learning_rate": 2.4341003530140264e-05, "loss": 0.1025, "step": 17360 }, { "epoch": 0.3074552723505426, "grad_norm": 1.1352633237838745, "learning_rate": 2.434033033629472e-05, "loss": 0.0745, "step": 17361 }, { "epoch": 0.307472981887571, "grad_norm": 0.9154380559921265, "learning_rate": 2.4339657111720433e-05, "loss": 0.1214, "step": 17362 }, { "epoch": 0.30749069142459945, "grad_norm": 0.8301962614059448, "learning_rate": 2.433898385641962e-05, "loss": 0.1439, "step": 17363 }, { "epoch": 0.30750840096162785, "grad_norm": 0.9033204317092896, "learning_rate": 2.433831057039449e-05, "loss": 0.1015, "step": 17364 }, { "epoch": 0.3075261104986563, "grad_norm": 0.8245831727981567, "learning_rate": 2.433763725364727e-05, "loss": 0.0956, "step": 17365 }, { "epoch": 0.3075438200356847, "grad_norm": 0.759833037853241, "learning_rate": 2.4336963906180167e-05, "loss": 0.0835, "step": 17366 }, { "epoch": 0.30756152957271315, "grad_norm": 0.6150420308113098, "learning_rate": 2.4336290527995394e-05, "loss": 0.0562, "step": 17367 }, { "epoch": 0.30757923910974155, "grad_norm": 0.541660726070404, "learning_rate": 2.433561711909517e-05, "loss": 0.1094, "step": 17368 }, { "epoch": 0.30759694864677, "grad_norm": 0.7584455013275146, "learning_rate": 2.433494367948171e-05, "loss": 0.0611, "step": 17369 }, { "epoch": 0.3076146581837984, "grad_norm": 0.8491677641868591, "learning_rate": 2.433427020915723e-05, "loss": 0.1, "step": 17370 }, { "epoch": 0.30763236772082686, "grad_norm": 0.740745484828949, "learning_rate": 2.4333596708123944e-05, "loss": 0.0909, "step": 17371 }, { "epoch": 0.30765007725785526, "grad_norm": 1.0563510656356812, "learning_rate": 2.433292317638407e-05, "loss": 0.0885, "step": 17372 }, { "epoch": 0.3076677867948837, "grad_norm": 0.7092090845108032, "learning_rate": 2.4332249613939822e-05, "loss": 0.0803, "step": 17373 }, { "epoch": 0.30768549633191217, "grad_norm": 1.2343841791152954, "learning_rate": 2.4331576020793416e-05, "loss": 0.0871, "step": 17374 }, { "epoch": 0.30770320586894057, "grad_norm": 0.650216281414032, "learning_rate": 2.4330902396947073e-05, "loss": 0.0969, "step": 17375 }, { "epoch": 0.307720915405969, "grad_norm": 0.9095790386199951, "learning_rate": 2.4330228742403e-05, "loss": 0.0937, "step": 17376 }, { "epoch": 0.3077386249429974, "grad_norm": 0.9863437414169312, "learning_rate": 2.4329555057163416e-05, "loss": 0.1092, "step": 17377 }, { "epoch": 0.3077563344800259, "grad_norm": 0.9167690873146057, "learning_rate": 2.4328881341230545e-05, "loss": 0.0951, "step": 17378 }, { "epoch": 0.30777404401705427, "grad_norm": 0.7948599457740784, "learning_rate": 2.4328207594606596e-05, "loss": 0.1062, "step": 17379 }, { "epoch": 0.3077917535540827, "grad_norm": 1.851775884628296, "learning_rate": 2.4327533817293787e-05, "loss": 0.0916, "step": 17380 }, { "epoch": 0.3078094630911111, "grad_norm": 1.1206482648849487, "learning_rate": 2.432686000929434e-05, "loss": 0.115, "step": 17381 }, { "epoch": 0.3078271726281396, "grad_norm": 0.6845078468322754, "learning_rate": 2.432618617061046e-05, "loss": 0.112, "step": 17382 }, { "epoch": 0.307844882165168, "grad_norm": 0.9017767310142517, "learning_rate": 2.4325512301244373e-05, "loss": 0.1005, "step": 17383 }, { "epoch": 0.30786259170219643, "grad_norm": 0.9377803802490234, "learning_rate": 2.4324838401198292e-05, "loss": 0.1024, "step": 17384 }, { "epoch": 0.30788030123922483, "grad_norm": 0.605657696723938, "learning_rate": 2.432416447047444e-05, "loss": 0.1132, "step": 17385 }, { "epoch": 0.3078980107762533, "grad_norm": 0.6663374304771423, "learning_rate": 2.432349050907503e-05, "loss": 0.0558, "step": 17386 }, { "epoch": 0.30791572031328174, "grad_norm": 1.108739972114563, "learning_rate": 2.4322816517002274e-05, "loss": 0.1634, "step": 17387 }, { "epoch": 0.30793342985031014, "grad_norm": 0.43087223172187805, "learning_rate": 2.43221424942584e-05, "loss": 0.1083, "step": 17388 }, { "epoch": 0.3079511393873386, "grad_norm": 0.8995749354362488, "learning_rate": 2.432146844084562e-05, "loss": 0.1106, "step": 17389 }, { "epoch": 0.307968848924367, "grad_norm": 0.7555855512619019, "learning_rate": 2.4320794356766147e-05, "loss": 0.0779, "step": 17390 }, { "epoch": 0.30798655846139544, "grad_norm": 0.6871697306632996, "learning_rate": 2.4320120242022205e-05, "loss": 0.0653, "step": 17391 }, { "epoch": 0.30800426799842384, "grad_norm": 0.5829521417617798, "learning_rate": 2.4319446096616012e-05, "loss": 0.0763, "step": 17392 }, { "epoch": 0.3080219775354523, "grad_norm": 0.8080639243125916, "learning_rate": 2.4318771920549785e-05, "loss": 0.1066, "step": 17393 }, { "epoch": 0.3080396870724807, "grad_norm": 0.5657134056091309, "learning_rate": 2.431809771382574e-05, "loss": 0.0785, "step": 17394 }, { "epoch": 0.30805739660950915, "grad_norm": 0.8676709532737732, "learning_rate": 2.431742347644609e-05, "loss": 0.088, "step": 17395 }, { "epoch": 0.30807510614653755, "grad_norm": 1.0084236860275269, "learning_rate": 2.4316749208413064e-05, "loss": 0.0848, "step": 17396 }, { "epoch": 0.308092815683566, "grad_norm": 0.7707618474960327, "learning_rate": 2.431607490972888e-05, "loss": 0.0689, "step": 17397 }, { "epoch": 0.3081105252205944, "grad_norm": 0.898074746131897, "learning_rate": 2.431540058039575e-05, "loss": 0.078, "step": 17398 }, { "epoch": 0.30812823475762285, "grad_norm": 0.7506610155105591, "learning_rate": 2.4314726220415887e-05, "loss": 0.0968, "step": 17399 }, { "epoch": 0.30814594429465125, "grad_norm": 0.5205780863761902, "learning_rate": 2.4314051829791518e-05, "loss": 0.08, "step": 17400 }, { "epoch": 0.3081636538316797, "grad_norm": 0.776710569858551, "learning_rate": 2.4313377408524865e-05, "loss": 0.0811, "step": 17401 }, { "epoch": 0.30818136336870816, "grad_norm": 0.7647714018821716, "learning_rate": 2.4312702956618144e-05, "loss": 0.0977, "step": 17402 }, { "epoch": 0.30819907290573656, "grad_norm": 0.9251651167869568, "learning_rate": 2.4312028474073567e-05, "loss": 0.0992, "step": 17403 }, { "epoch": 0.308216782442765, "grad_norm": 0.5319486856460571, "learning_rate": 2.4311353960893363e-05, "loss": 0.0684, "step": 17404 }, { "epoch": 0.3082344919797934, "grad_norm": 0.5482875108718872, "learning_rate": 2.4310679417079745e-05, "loss": 0.0553, "step": 17405 }, { "epoch": 0.30825220151682187, "grad_norm": 0.508455216884613, "learning_rate": 2.4310004842634932e-05, "loss": 0.1087, "step": 17406 }, { "epoch": 0.30826991105385027, "grad_norm": 0.5364785194396973, "learning_rate": 2.4309330237561144e-05, "loss": 0.109, "step": 17407 }, { "epoch": 0.3082876205908787, "grad_norm": 1.0129311084747314, "learning_rate": 2.4308655601860602e-05, "loss": 0.0972, "step": 17408 }, { "epoch": 0.3083053301279071, "grad_norm": 1.290892481803894, "learning_rate": 2.4307980935535524e-05, "loss": 0.1334, "step": 17409 }, { "epoch": 0.30832303966493557, "grad_norm": 0.8379893898963928, "learning_rate": 2.430730623858813e-05, "loss": 0.0866, "step": 17410 }, { "epoch": 0.30834074920196397, "grad_norm": 0.6808983087539673, "learning_rate": 2.4306631511020646e-05, "loss": 0.0644, "step": 17411 }, { "epoch": 0.3083584587389924, "grad_norm": 0.9545110464096069, "learning_rate": 2.4305956752835278e-05, "loss": 0.074, "step": 17412 }, { "epoch": 0.3083761682760208, "grad_norm": 0.6778848171234131, "learning_rate": 2.4305281964034256e-05, "loss": 0.0652, "step": 17413 }, { "epoch": 0.3083938778130493, "grad_norm": 1.1690211296081543, "learning_rate": 2.4304607144619793e-05, "loss": 0.1009, "step": 17414 }, { "epoch": 0.3084115873500777, "grad_norm": 0.8469699621200562, "learning_rate": 2.430393229459412e-05, "loss": 0.0923, "step": 17415 }, { "epoch": 0.30842929688710613, "grad_norm": 0.587591290473938, "learning_rate": 2.430325741395945e-05, "loss": 0.0682, "step": 17416 }, { "epoch": 0.3084470064241346, "grad_norm": 0.5332750678062439, "learning_rate": 2.4302582502718002e-05, "loss": 0.0891, "step": 17417 }, { "epoch": 0.308464715961163, "grad_norm": 0.8260359168052673, "learning_rate": 2.4301907560872005e-05, "loss": 0.1241, "step": 17418 }, { "epoch": 0.30848242549819144, "grad_norm": 0.8902124166488647, "learning_rate": 2.4301232588423663e-05, "loss": 0.0748, "step": 17419 }, { "epoch": 0.30850013503521984, "grad_norm": 1.1386070251464844, "learning_rate": 2.430055758537521e-05, "loss": 0.0889, "step": 17420 }, { "epoch": 0.3085178445722483, "grad_norm": 0.7713712453842163, "learning_rate": 2.4299882551728868e-05, "loss": 0.1009, "step": 17421 }, { "epoch": 0.3085355541092767, "grad_norm": 0.8407789468765259, "learning_rate": 2.4299207487486847e-05, "loss": 0.0698, "step": 17422 }, { "epoch": 0.30855326364630514, "grad_norm": 0.763948917388916, "learning_rate": 2.4298532392651378e-05, "loss": 0.0907, "step": 17423 }, { "epoch": 0.30857097318333354, "grad_norm": 0.5504772067070007, "learning_rate": 2.429785726722467e-05, "loss": 0.0771, "step": 17424 }, { "epoch": 0.308588682720362, "grad_norm": 0.9253607392311096, "learning_rate": 2.429718211120896e-05, "loss": 0.0839, "step": 17425 }, { "epoch": 0.3086063922573904, "grad_norm": 0.8977559804916382, "learning_rate": 2.429650692460646e-05, "loss": 0.0879, "step": 17426 }, { "epoch": 0.30862410179441885, "grad_norm": 1.0952975749969482, "learning_rate": 2.429583170741939e-05, "loss": 0.0926, "step": 17427 }, { "epoch": 0.30864181133144725, "grad_norm": 0.5004667639732361, "learning_rate": 2.4295156459649978e-05, "loss": 0.1034, "step": 17428 }, { "epoch": 0.3086595208684757, "grad_norm": 0.668712854385376, "learning_rate": 2.429448118130044e-05, "loss": 0.0997, "step": 17429 }, { "epoch": 0.3086772304055041, "grad_norm": 0.6844469904899597, "learning_rate": 2.4293805872372996e-05, "loss": 0.0695, "step": 17430 }, { "epoch": 0.30869493994253255, "grad_norm": 1.12758207321167, "learning_rate": 2.4293130532869872e-05, "loss": 0.0846, "step": 17431 }, { "epoch": 0.308712649479561, "grad_norm": 0.6628662943840027, "learning_rate": 2.4292455162793293e-05, "loss": 0.1109, "step": 17432 }, { "epoch": 0.3087303590165894, "grad_norm": 0.5142987370491028, "learning_rate": 2.429177976214547e-05, "loss": 0.0939, "step": 17433 }, { "epoch": 0.30874806855361786, "grad_norm": 0.8472671508789062, "learning_rate": 2.4291104330928633e-05, "loss": 0.0973, "step": 17434 }, { "epoch": 0.30876577809064626, "grad_norm": 1.5177505016326904, "learning_rate": 2.4290428869145004e-05, "loss": 0.0978, "step": 17435 }, { "epoch": 0.3087834876276747, "grad_norm": 0.59544438123703, "learning_rate": 2.4289753376796803e-05, "loss": 0.0819, "step": 17436 }, { "epoch": 0.3088011971647031, "grad_norm": 0.5047771334648132, "learning_rate": 2.4289077853886255e-05, "loss": 0.1021, "step": 17437 }, { "epoch": 0.30881890670173157, "grad_norm": 0.6444863677024841, "learning_rate": 2.4288402300415578e-05, "loss": 0.097, "step": 17438 }, { "epoch": 0.30883661623875996, "grad_norm": 1.137641429901123, "learning_rate": 2.4287726716386998e-05, "loss": 0.0688, "step": 17439 }, { "epoch": 0.3088543257757884, "grad_norm": 0.8184633255004883, "learning_rate": 2.4287051101802734e-05, "loss": 0.0967, "step": 17440 }, { "epoch": 0.3088720353128168, "grad_norm": 1.1940975189208984, "learning_rate": 2.4286375456665017e-05, "loss": 0.1252, "step": 17441 }, { "epoch": 0.30888974484984527, "grad_norm": 0.9778446555137634, "learning_rate": 2.4285699780976057e-05, "loss": 0.1125, "step": 17442 }, { "epoch": 0.30890745438687367, "grad_norm": 1.3278260231018066, "learning_rate": 2.4285024074738085e-05, "loss": 0.1563, "step": 17443 }, { "epoch": 0.3089251639239021, "grad_norm": 0.7683179378509521, "learning_rate": 2.428434833795333e-05, "loss": 0.1033, "step": 17444 }, { "epoch": 0.3089428734609305, "grad_norm": 0.5837980508804321, "learning_rate": 2.4283672570624002e-05, "loss": 0.0789, "step": 17445 }, { "epoch": 0.308960582997959, "grad_norm": 0.9270773530006409, "learning_rate": 2.428299677275233e-05, "loss": 0.1017, "step": 17446 }, { "epoch": 0.30897829253498743, "grad_norm": 0.650427520275116, "learning_rate": 2.4282320944340537e-05, "loss": 0.0839, "step": 17447 }, { "epoch": 0.30899600207201583, "grad_norm": 0.5200014710426331, "learning_rate": 2.428164508539085e-05, "loss": 0.0876, "step": 17448 }, { "epoch": 0.3090137116090443, "grad_norm": 1.0623924732208252, "learning_rate": 2.428096919590549e-05, "loss": 0.1211, "step": 17449 }, { "epoch": 0.3090314211460727, "grad_norm": 1.1483336687088013, "learning_rate": 2.4280293275886677e-05, "loss": 0.1125, "step": 17450 }, { "epoch": 0.30904913068310114, "grad_norm": 0.7642083168029785, "learning_rate": 2.4279617325336638e-05, "loss": 0.0834, "step": 17451 }, { "epoch": 0.30906684022012954, "grad_norm": 1.0913861989974976, "learning_rate": 2.4278941344257603e-05, "loss": 0.0856, "step": 17452 }, { "epoch": 0.309084549757158, "grad_norm": 0.5310171246528625, "learning_rate": 2.4278265332651782e-05, "loss": 0.0786, "step": 17453 }, { "epoch": 0.3091022592941864, "grad_norm": 0.609958291053772, "learning_rate": 2.4277589290521404e-05, "loss": 0.1018, "step": 17454 }, { "epoch": 0.30911996883121484, "grad_norm": 0.6367480158805847, "learning_rate": 2.4276913217868702e-05, "loss": 0.106, "step": 17455 }, { "epoch": 0.30913767836824324, "grad_norm": 0.701324462890625, "learning_rate": 2.427623711469589e-05, "loss": 0.1009, "step": 17456 }, { "epoch": 0.3091553879052717, "grad_norm": 0.7030653357505798, "learning_rate": 2.42755609810052e-05, "loss": 0.0737, "step": 17457 }, { "epoch": 0.3091730974423001, "grad_norm": 1.3980871438980103, "learning_rate": 2.4274884816798848e-05, "loss": 0.1232, "step": 17458 }, { "epoch": 0.30919080697932855, "grad_norm": 4.051508903503418, "learning_rate": 2.427420862207907e-05, "loss": 0.1525, "step": 17459 }, { "epoch": 0.30920851651635695, "grad_norm": 0.7844628691673279, "learning_rate": 2.427353239684808e-05, "loss": 0.1077, "step": 17460 }, { "epoch": 0.3092262260533854, "grad_norm": 0.9097232818603516, "learning_rate": 2.4272856141108105e-05, "loss": 0.0881, "step": 17461 }, { "epoch": 0.30924393559041385, "grad_norm": 0.9017152190208435, "learning_rate": 2.427217985486137e-05, "loss": 0.0786, "step": 17462 }, { "epoch": 0.30926164512744225, "grad_norm": 1.1632893085479736, "learning_rate": 2.4271503538110102e-05, "loss": 0.1061, "step": 17463 }, { "epoch": 0.3092793546644707, "grad_norm": 2.0687777996063232, "learning_rate": 2.427082719085653e-05, "loss": 0.1245, "step": 17464 }, { "epoch": 0.3092970642014991, "grad_norm": 0.8929980397224426, "learning_rate": 2.4270150813102864e-05, "loss": 0.12, "step": 17465 }, { "epoch": 0.30931477373852756, "grad_norm": 0.6439998149871826, "learning_rate": 2.4269474404851348e-05, "loss": 0.0837, "step": 17466 }, { "epoch": 0.30933248327555596, "grad_norm": 0.9390081763267517, "learning_rate": 2.42687979661042e-05, "loss": 0.0904, "step": 17467 }, { "epoch": 0.3093501928125844, "grad_norm": 0.8390102982521057, "learning_rate": 2.4268121496863638e-05, "loss": 0.0958, "step": 17468 }, { "epoch": 0.3093679023496128, "grad_norm": 0.8514935374259949, "learning_rate": 2.42674449971319e-05, "loss": 0.0761, "step": 17469 }, { "epoch": 0.30938561188664127, "grad_norm": 0.8578924536705017, "learning_rate": 2.4266768466911197e-05, "loss": 0.0664, "step": 17470 }, { "epoch": 0.30940332142366966, "grad_norm": 0.6937291026115417, "learning_rate": 2.4266091906203772e-05, "loss": 0.0872, "step": 17471 }, { "epoch": 0.3094210309606981, "grad_norm": 0.5919061899185181, "learning_rate": 2.4265415315011835e-05, "loss": 0.0813, "step": 17472 }, { "epoch": 0.3094387404977265, "grad_norm": 0.6310531497001648, "learning_rate": 2.4264738693337625e-05, "loss": 0.072, "step": 17473 }, { "epoch": 0.30945645003475497, "grad_norm": 0.8969325423240662, "learning_rate": 2.4264062041183358e-05, "loss": 0.0867, "step": 17474 }, { "epoch": 0.30947415957178337, "grad_norm": 0.7591704726219177, "learning_rate": 2.4263385358551264e-05, "loss": 0.0984, "step": 17475 }, { "epoch": 0.3094918691088118, "grad_norm": 1.0288257598876953, "learning_rate": 2.4262708645443564e-05, "loss": 0.1182, "step": 17476 }, { "epoch": 0.3095095786458403, "grad_norm": 1.0136997699737549, "learning_rate": 2.42620319018625e-05, "loss": 0.1234, "step": 17477 }, { "epoch": 0.3095272881828687, "grad_norm": 0.9605724215507507, "learning_rate": 2.4261355127810283e-05, "loss": 0.1102, "step": 17478 }, { "epoch": 0.30954499771989713, "grad_norm": 1.0594571828842163, "learning_rate": 2.4260678323289138e-05, "loss": 0.0868, "step": 17479 }, { "epoch": 0.30956270725692553, "grad_norm": 0.5562865734100342, "learning_rate": 2.426000148830131e-05, "loss": 0.0826, "step": 17480 }, { "epoch": 0.309580416793954, "grad_norm": 0.4282922148704529, "learning_rate": 2.4259324622849e-05, "loss": 0.0942, "step": 17481 }, { "epoch": 0.3095981263309824, "grad_norm": 0.5562996864318848, "learning_rate": 2.4258647726934464e-05, "loss": 0.0457, "step": 17482 }, { "epoch": 0.30961583586801084, "grad_norm": 0.5398950576782227, "learning_rate": 2.42579708005599e-05, "loss": 0.0835, "step": 17483 }, { "epoch": 0.30963354540503923, "grad_norm": 0.6161285042762756, "learning_rate": 2.4257293843727553e-05, "loss": 0.0819, "step": 17484 }, { "epoch": 0.3096512549420677, "grad_norm": 0.8933035731315613, "learning_rate": 2.425661685643965e-05, "loss": 0.0897, "step": 17485 }, { "epoch": 0.3096689644790961, "grad_norm": 0.7561283111572266, "learning_rate": 2.4255939838698407e-05, "loss": 0.1042, "step": 17486 }, { "epoch": 0.30968667401612454, "grad_norm": 0.5098099112510681, "learning_rate": 2.4255262790506062e-05, "loss": 0.0864, "step": 17487 }, { "epoch": 0.30970438355315294, "grad_norm": 0.4577386677265167, "learning_rate": 2.4254585711864838e-05, "loss": 0.083, "step": 17488 }, { "epoch": 0.3097220930901814, "grad_norm": 0.7429332137107849, "learning_rate": 2.4253908602776963e-05, "loss": 0.1192, "step": 17489 }, { "epoch": 0.3097398026272098, "grad_norm": 0.7644296288490295, "learning_rate": 2.4253231463244665e-05, "loss": 0.09, "step": 17490 }, { "epoch": 0.30975751216423825, "grad_norm": 0.7094605565071106, "learning_rate": 2.4252554293270172e-05, "loss": 0.0857, "step": 17491 }, { "epoch": 0.3097752217012667, "grad_norm": 0.7237260937690735, "learning_rate": 2.4251877092855714e-05, "loss": 0.1152, "step": 17492 }, { "epoch": 0.3097929312382951, "grad_norm": 1.0396795272827148, "learning_rate": 2.425119986200351e-05, "loss": 0.0899, "step": 17493 }, { "epoch": 0.30981064077532355, "grad_norm": 0.6396058201789856, "learning_rate": 2.4250522600715798e-05, "loss": 0.0868, "step": 17494 }, { "epoch": 0.30982835031235195, "grad_norm": 0.7492356896400452, "learning_rate": 2.4249845308994796e-05, "loss": 0.0873, "step": 17495 }, { "epoch": 0.3098460598493804, "grad_norm": 0.7965912222862244, "learning_rate": 2.4249167986842743e-05, "loss": 0.0923, "step": 17496 }, { "epoch": 0.3098637693864088, "grad_norm": 0.7779055237770081, "learning_rate": 2.424849063426186e-05, "loss": 0.084, "step": 17497 }, { "epoch": 0.30988147892343726, "grad_norm": 1.4121265411376953, "learning_rate": 2.4247813251254388e-05, "loss": 0.1332, "step": 17498 }, { "epoch": 0.30989918846046566, "grad_norm": 0.8539516925811768, "learning_rate": 2.4247135837822534e-05, "loss": 0.0738, "step": 17499 }, { "epoch": 0.3099168979974941, "grad_norm": 1.0293502807617188, "learning_rate": 2.4246458393968543e-05, "loss": 0.0987, "step": 17500 }, { "epoch": 0.3099346075345225, "grad_norm": 0.44280296564102173, "learning_rate": 2.4245780919694645e-05, "loss": 0.0839, "step": 17501 }, { "epoch": 0.30995231707155096, "grad_norm": 0.6283374428749084, "learning_rate": 2.4245103415003055e-05, "loss": 0.0909, "step": 17502 }, { "epoch": 0.30997002660857936, "grad_norm": 0.8710760474205017, "learning_rate": 2.424442587989601e-05, "loss": 0.107, "step": 17503 }, { "epoch": 0.3099877361456078, "grad_norm": 0.6623406410217285, "learning_rate": 2.4243748314375744e-05, "loss": 0.0418, "step": 17504 }, { "epoch": 0.3100054456826362, "grad_norm": 0.8720720410346985, "learning_rate": 2.4243070718444475e-05, "loss": 0.1066, "step": 17505 }, { "epoch": 0.31002315521966467, "grad_norm": 0.9512292742729187, "learning_rate": 2.4242393092104443e-05, "loss": 0.0677, "step": 17506 }, { "epoch": 0.3100408647566931, "grad_norm": 0.7431659698486328, "learning_rate": 2.4241715435357872e-05, "loss": 0.0919, "step": 17507 }, { "epoch": 0.3100585742937215, "grad_norm": 0.6369966864585876, "learning_rate": 2.4241037748206993e-05, "loss": 0.0598, "step": 17508 }, { "epoch": 0.31007628383075, "grad_norm": 0.6766425371170044, "learning_rate": 2.424036003065403e-05, "loss": 0.111, "step": 17509 }, { "epoch": 0.3100939933677784, "grad_norm": 0.836595892906189, "learning_rate": 2.423968228270122e-05, "loss": 0.0972, "step": 17510 }, { "epoch": 0.31011170290480683, "grad_norm": 0.8768680095672607, "learning_rate": 2.4239004504350787e-05, "loss": 0.0947, "step": 17511 }, { "epoch": 0.31012941244183523, "grad_norm": 0.7678354382514954, "learning_rate": 2.423832669560497e-05, "loss": 0.1172, "step": 17512 }, { "epoch": 0.3101471219788637, "grad_norm": 0.8695647716522217, "learning_rate": 2.4237648856465985e-05, "loss": 0.143, "step": 17513 }, { "epoch": 0.3101648315158921, "grad_norm": 0.6527107954025269, "learning_rate": 2.4236970986936078e-05, "loss": 0.053, "step": 17514 }, { "epoch": 0.31018254105292054, "grad_norm": 0.9881594181060791, "learning_rate": 2.4236293087017465e-05, "loss": 0.0796, "step": 17515 }, { "epoch": 0.31020025058994893, "grad_norm": 0.7998066544532776, "learning_rate": 2.423561515671238e-05, "loss": 0.1116, "step": 17516 }, { "epoch": 0.3102179601269774, "grad_norm": 0.9592970609664917, "learning_rate": 2.423493719602306e-05, "loss": 0.0872, "step": 17517 }, { "epoch": 0.3102356696640058, "grad_norm": 0.48225656151771545, "learning_rate": 2.4234259204951725e-05, "loss": 0.0714, "step": 17518 }, { "epoch": 0.31025337920103424, "grad_norm": 0.891573965549469, "learning_rate": 2.4233581183500617e-05, "loss": 0.0898, "step": 17519 }, { "epoch": 0.31027108873806264, "grad_norm": 0.8093754649162292, "learning_rate": 2.4232903131671957e-05, "loss": 0.0835, "step": 17520 }, { "epoch": 0.3102887982750911, "grad_norm": 0.9535284638404846, "learning_rate": 2.4232225049467984e-05, "loss": 0.1007, "step": 17521 }, { "epoch": 0.31030650781211955, "grad_norm": 0.7058425545692444, "learning_rate": 2.423154693689092e-05, "loss": 0.0665, "step": 17522 }, { "epoch": 0.31032421734914795, "grad_norm": 0.756949782371521, "learning_rate": 2.4230868793943004e-05, "loss": 0.1163, "step": 17523 }, { "epoch": 0.3103419268861764, "grad_norm": 0.9249597787857056, "learning_rate": 2.423019062062646e-05, "loss": 0.1127, "step": 17524 }, { "epoch": 0.3103596364232048, "grad_norm": 0.6385981440544128, "learning_rate": 2.4229512416943527e-05, "loss": 0.0904, "step": 17525 }, { "epoch": 0.31037734596023325, "grad_norm": 0.8323714137077332, "learning_rate": 2.4228834182896427e-05, "loss": 0.0799, "step": 17526 }, { "epoch": 0.31039505549726165, "grad_norm": 0.8772322535514832, "learning_rate": 2.42281559184874e-05, "loss": 0.1229, "step": 17527 }, { "epoch": 0.3104127650342901, "grad_norm": 0.8781065344810486, "learning_rate": 2.422747762371867e-05, "loss": 0.1078, "step": 17528 }, { "epoch": 0.3104304745713185, "grad_norm": 1.0852261781692505, "learning_rate": 2.422679929859247e-05, "loss": 0.0933, "step": 17529 }, { "epoch": 0.31044818410834696, "grad_norm": 0.483702689409256, "learning_rate": 2.4226120943111037e-05, "loss": 0.0639, "step": 17530 }, { "epoch": 0.31046589364537536, "grad_norm": 0.9136331677436829, "learning_rate": 2.4225442557276598e-05, "loss": 0.0643, "step": 17531 }, { "epoch": 0.3104836031824038, "grad_norm": 0.5687958598136902, "learning_rate": 2.4224764141091386e-05, "loss": 0.0772, "step": 17532 }, { "epoch": 0.3105013127194322, "grad_norm": 0.764845609664917, "learning_rate": 2.4224085694557635e-05, "loss": 0.0971, "step": 17533 }, { "epoch": 0.31051902225646066, "grad_norm": 0.6701663136482239, "learning_rate": 2.4223407217677572e-05, "loss": 0.0556, "step": 17534 }, { "epoch": 0.31053673179348906, "grad_norm": 0.5166144371032715, "learning_rate": 2.4222728710453436e-05, "loss": 0.0778, "step": 17535 }, { "epoch": 0.3105544413305175, "grad_norm": 1.0007367134094238, "learning_rate": 2.422205017288745e-05, "loss": 0.0933, "step": 17536 }, { "epoch": 0.31057215086754597, "grad_norm": 0.7279302477836609, "learning_rate": 2.4221371604981857e-05, "loss": 0.076, "step": 17537 }, { "epoch": 0.31058986040457437, "grad_norm": 0.6229591965675354, "learning_rate": 2.422069300673888e-05, "loss": 0.0982, "step": 17538 }, { "epoch": 0.3106075699416028, "grad_norm": 1.0654138326644897, "learning_rate": 2.422001437816076e-05, "loss": 0.1101, "step": 17539 }, { "epoch": 0.3106252794786312, "grad_norm": 0.9162411093711853, "learning_rate": 2.4219335719249726e-05, "loss": 0.1004, "step": 17540 }, { "epoch": 0.3106429890156597, "grad_norm": 0.9186009168624878, "learning_rate": 2.4218657030008e-05, "loss": 0.1365, "step": 17541 }, { "epoch": 0.3106606985526881, "grad_norm": 0.7980104088783264, "learning_rate": 2.4217978310437833e-05, "loss": 0.1211, "step": 17542 }, { "epoch": 0.31067840808971653, "grad_norm": 0.5281678438186646, "learning_rate": 2.421729956054145e-05, "loss": 0.0709, "step": 17543 }, { "epoch": 0.3106961176267449, "grad_norm": 0.8201416730880737, "learning_rate": 2.4216620780321082e-05, "loss": 0.0977, "step": 17544 }, { "epoch": 0.3107138271637734, "grad_norm": 0.6479671001434326, "learning_rate": 2.4215941969778963e-05, "loss": 0.1503, "step": 17545 }, { "epoch": 0.3107315367008018, "grad_norm": 0.8889597654342651, "learning_rate": 2.4215263128917327e-05, "loss": 0.1073, "step": 17546 }, { "epoch": 0.31074924623783023, "grad_norm": 0.8614855408668518, "learning_rate": 2.421458425773841e-05, "loss": 0.1348, "step": 17547 }, { "epoch": 0.31076695577485863, "grad_norm": 0.741062343120575, "learning_rate": 2.421390535624444e-05, "loss": 0.0719, "step": 17548 }, { "epoch": 0.3107846653118871, "grad_norm": 0.7729014158248901, "learning_rate": 2.4213226424437658e-05, "loss": 0.1044, "step": 17549 }, { "epoch": 0.3108023748489155, "grad_norm": 0.8089231848716736, "learning_rate": 2.4212547462320287e-05, "loss": 0.1003, "step": 17550 }, { "epoch": 0.31082008438594394, "grad_norm": 0.9675593376159668, "learning_rate": 2.4211868469894567e-05, "loss": 0.1118, "step": 17551 }, { "epoch": 0.3108377939229724, "grad_norm": 1.0232218503952026, "learning_rate": 2.4211189447162736e-05, "loss": 0.0931, "step": 17552 }, { "epoch": 0.3108555034600008, "grad_norm": 0.6254367828369141, "learning_rate": 2.421051039412702e-05, "loss": 0.0766, "step": 17553 }, { "epoch": 0.31087321299702925, "grad_norm": 1.0908294916152954, "learning_rate": 2.4209831310789654e-05, "loss": 0.0985, "step": 17554 }, { "epoch": 0.31089092253405765, "grad_norm": 0.7840650081634521, "learning_rate": 2.420915219715288e-05, "loss": 0.0705, "step": 17555 }, { "epoch": 0.3109086320710861, "grad_norm": 0.5399467945098877, "learning_rate": 2.4208473053218927e-05, "loss": 0.0534, "step": 17556 }, { "epoch": 0.3109263416081145, "grad_norm": 0.6754602193832397, "learning_rate": 2.4207793878990026e-05, "loss": 0.0947, "step": 17557 }, { "epoch": 0.31094405114514295, "grad_norm": 0.8529101014137268, "learning_rate": 2.4207114674468415e-05, "loss": 0.1188, "step": 17558 }, { "epoch": 0.31096176068217135, "grad_norm": 1.1944705247879028, "learning_rate": 2.4206435439656326e-05, "loss": 0.0614, "step": 17559 }, { "epoch": 0.3109794702191998, "grad_norm": 0.5969739556312561, "learning_rate": 2.4205756174556e-05, "loss": 0.1048, "step": 17560 }, { "epoch": 0.3109971797562282, "grad_norm": 0.5517269968986511, "learning_rate": 2.420507687916966e-05, "loss": 0.0836, "step": 17561 }, { "epoch": 0.31101488929325666, "grad_norm": 0.6812969446182251, "learning_rate": 2.4204397553499554e-05, "loss": 0.0607, "step": 17562 }, { "epoch": 0.31103259883028506, "grad_norm": 0.715762734413147, "learning_rate": 2.4203718197547913e-05, "loss": 0.0888, "step": 17563 }, { "epoch": 0.3110503083673135, "grad_norm": 0.5915889739990234, "learning_rate": 2.4203038811316965e-05, "loss": 0.0802, "step": 17564 }, { "epoch": 0.3110680179043419, "grad_norm": 1.1130905151367188, "learning_rate": 2.4202359394808953e-05, "loss": 0.1052, "step": 17565 }, { "epoch": 0.31108572744137036, "grad_norm": 0.7586818933486938, "learning_rate": 2.4201679948026107e-05, "loss": 0.0845, "step": 17566 }, { "epoch": 0.3111034369783988, "grad_norm": 0.6020655632019043, "learning_rate": 2.4201000470970665e-05, "loss": 0.0736, "step": 17567 }, { "epoch": 0.3111211465154272, "grad_norm": 0.9298285841941833, "learning_rate": 2.420032096364486e-05, "loss": 0.0997, "step": 17568 }, { "epoch": 0.31113885605245567, "grad_norm": 0.45146214962005615, "learning_rate": 2.419964142605094e-05, "loss": 0.0755, "step": 17569 }, { "epoch": 0.31115656558948407, "grad_norm": 0.7554914355278015, "learning_rate": 2.4198961858191115e-05, "loss": 0.0762, "step": 17570 }, { "epoch": 0.3111742751265125, "grad_norm": 0.6232385635375977, "learning_rate": 2.4198282260067645e-05, "loss": 0.0827, "step": 17571 }, { "epoch": 0.3111919846635409, "grad_norm": 0.7420811653137207, "learning_rate": 2.4197602631682758e-05, "loss": 0.1061, "step": 17572 }, { "epoch": 0.3112096942005694, "grad_norm": 0.9504631161689758, "learning_rate": 2.4196922973038685e-05, "loss": 0.11, "step": 17573 }, { "epoch": 0.3112274037375978, "grad_norm": 0.7012290954589844, "learning_rate": 2.4196243284137668e-05, "loss": 0.0917, "step": 17574 }, { "epoch": 0.31124511327462623, "grad_norm": 0.6856323480606079, "learning_rate": 2.4195563564981938e-05, "loss": 0.0762, "step": 17575 }, { "epoch": 0.3112628228116546, "grad_norm": 0.9318565726280212, "learning_rate": 2.4194883815573738e-05, "loss": 0.1268, "step": 17576 }, { "epoch": 0.3112805323486831, "grad_norm": 0.612030029296875, "learning_rate": 2.41942040359153e-05, "loss": 0.0785, "step": 17577 }, { "epoch": 0.3112982418857115, "grad_norm": 0.6411414742469788, "learning_rate": 2.4193524226008855e-05, "loss": 0.0889, "step": 17578 }, { "epoch": 0.31131595142273993, "grad_norm": 1.2077354192733765, "learning_rate": 2.419284438585665e-05, "loss": 0.1172, "step": 17579 }, { "epoch": 0.31133366095976833, "grad_norm": 0.4235270023345947, "learning_rate": 2.4192164515460914e-05, "loss": 0.081, "step": 17580 }, { "epoch": 0.3113513704967968, "grad_norm": 0.7239012122154236, "learning_rate": 2.419148461482389e-05, "loss": 0.0904, "step": 17581 }, { "epoch": 0.31136908003382524, "grad_norm": 0.7235378623008728, "learning_rate": 2.4190804683947805e-05, "loss": 0.0652, "step": 17582 }, { "epoch": 0.31138678957085364, "grad_norm": 0.5538811087608337, "learning_rate": 2.4190124722834908e-05, "loss": 0.0759, "step": 17583 }, { "epoch": 0.3114044991078821, "grad_norm": 3.1593456268310547, "learning_rate": 2.4189444731487424e-05, "loss": 0.1215, "step": 17584 }, { "epoch": 0.3114222086449105, "grad_norm": 1.1425056457519531, "learning_rate": 2.4188764709907606e-05, "loss": 0.1063, "step": 17585 }, { "epoch": 0.31143991818193895, "grad_norm": 4.7291741371154785, "learning_rate": 2.4188084658097674e-05, "loss": 0.0666, "step": 17586 }, { "epoch": 0.31145762771896734, "grad_norm": 1.3314802646636963, "learning_rate": 2.4187404576059876e-05, "loss": 0.1024, "step": 17587 }, { "epoch": 0.3114753372559958, "grad_norm": 1.1536123752593994, "learning_rate": 2.4186724463796443e-05, "loss": 0.1129, "step": 17588 }, { "epoch": 0.3114930467930242, "grad_norm": 0.538537859916687, "learning_rate": 2.4186044321309618e-05, "loss": 0.0606, "step": 17589 }, { "epoch": 0.31151075633005265, "grad_norm": 1.0426983833312988, "learning_rate": 2.418536414860164e-05, "loss": 0.0936, "step": 17590 }, { "epoch": 0.31152846586708105, "grad_norm": 0.807301938533783, "learning_rate": 2.4184683945674738e-05, "loss": 0.0962, "step": 17591 }, { "epoch": 0.3115461754041095, "grad_norm": 0.5609700679779053, "learning_rate": 2.4184003712531158e-05, "loss": 0.0764, "step": 17592 }, { "epoch": 0.3115638849411379, "grad_norm": 1.281509280204773, "learning_rate": 2.4183323449173128e-05, "loss": 0.0671, "step": 17593 }, { "epoch": 0.31158159447816636, "grad_norm": 0.5430867671966553, "learning_rate": 2.4182643155602895e-05, "loss": 0.0911, "step": 17594 }, { "epoch": 0.31159930401519476, "grad_norm": 0.8388929963111877, "learning_rate": 2.41819628318227e-05, "loss": 0.0533, "step": 17595 }, { "epoch": 0.3116170135522232, "grad_norm": 0.5445303320884705, "learning_rate": 2.418128247783477e-05, "loss": 0.0905, "step": 17596 }, { "epoch": 0.31163472308925166, "grad_norm": 0.6980975270271301, "learning_rate": 2.4180602093641358e-05, "loss": 0.086, "step": 17597 }, { "epoch": 0.31165243262628006, "grad_norm": 0.8384425044059753, "learning_rate": 2.4179921679244688e-05, "loss": 0.0931, "step": 17598 }, { "epoch": 0.3116701421633085, "grad_norm": 0.6758847832679749, "learning_rate": 2.4179241234647004e-05, "loss": 0.0678, "step": 17599 }, { "epoch": 0.3116878517003369, "grad_norm": 0.7256020903587341, "learning_rate": 2.4178560759850543e-05, "loss": 0.0649, "step": 17600 }, { "epoch": 0.31170556123736537, "grad_norm": 0.9748623371124268, "learning_rate": 2.4177880254857546e-05, "loss": 0.176, "step": 17601 }, { "epoch": 0.31172327077439377, "grad_norm": 0.5689506530761719, "learning_rate": 2.4177199719670258e-05, "loss": 0.089, "step": 17602 }, { "epoch": 0.3117409803114222, "grad_norm": 0.9920064210891724, "learning_rate": 2.4176519154290904e-05, "loss": 0.1063, "step": 17603 }, { "epoch": 0.3117586898484506, "grad_norm": 0.5963063836097717, "learning_rate": 2.417583855872173e-05, "loss": 0.0801, "step": 17604 }, { "epoch": 0.3117763993854791, "grad_norm": 0.6706804037094116, "learning_rate": 2.417515793296498e-05, "loss": 0.108, "step": 17605 }, { "epoch": 0.3117941089225075, "grad_norm": 1.3304393291473389, "learning_rate": 2.4174477277022885e-05, "loss": 0.0885, "step": 17606 }, { "epoch": 0.3118118184595359, "grad_norm": 0.7792558670043945, "learning_rate": 2.4173796590897687e-05, "loss": 0.1219, "step": 17607 }, { "epoch": 0.3118295279965643, "grad_norm": 0.6921331882476807, "learning_rate": 2.4173115874591627e-05, "loss": 0.093, "step": 17608 }, { "epoch": 0.3118472375335928, "grad_norm": 0.9131665825843811, "learning_rate": 2.417243512810694e-05, "loss": 0.1313, "step": 17609 }, { "epoch": 0.3118649470706212, "grad_norm": 0.8673830032348633, "learning_rate": 2.4171754351445878e-05, "loss": 0.1303, "step": 17610 }, { "epoch": 0.31188265660764963, "grad_norm": 0.8917141556739807, "learning_rate": 2.4171073544610668e-05, "loss": 0.1079, "step": 17611 }, { "epoch": 0.3119003661446781, "grad_norm": 0.7388052940368652, "learning_rate": 2.417039270760355e-05, "loss": 0.0722, "step": 17612 }, { "epoch": 0.3119180756817065, "grad_norm": 0.622042179107666, "learning_rate": 2.4169711840426775e-05, "loss": 0.0779, "step": 17613 }, { "epoch": 0.31193578521873494, "grad_norm": 0.9756434559822083, "learning_rate": 2.4169030943082567e-05, "loss": 0.0908, "step": 17614 }, { "epoch": 0.31195349475576334, "grad_norm": 0.9351192116737366, "learning_rate": 2.416835001557318e-05, "loss": 0.0922, "step": 17615 }, { "epoch": 0.3119712042927918, "grad_norm": 0.43701595067977905, "learning_rate": 2.4167669057900846e-05, "loss": 0.0799, "step": 17616 }, { "epoch": 0.3119889138298202, "grad_norm": 0.919410228729248, "learning_rate": 2.4166988070067807e-05, "loss": 0.0854, "step": 17617 }, { "epoch": 0.31200662336684865, "grad_norm": 1.0771636962890625, "learning_rate": 2.416630705207631e-05, "loss": 0.0863, "step": 17618 }, { "epoch": 0.31202433290387704, "grad_norm": 0.7133240103721619, "learning_rate": 2.4165626003928585e-05, "loss": 0.0708, "step": 17619 }, { "epoch": 0.3120420424409055, "grad_norm": 0.9841613173484802, "learning_rate": 2.4164944925626878e-05, "loss": 0.0653, "step": 17620 }, { "epoch": 0.3120597519779339, "grad_norm": 0.5172022581100464, "learning_rate": 2.416426381717343e-05, "loss": 0.0887, "step": 17621 }, { "epoch": 0.31207746151496235, "grad_norm": 0.7852078080177307, "learning_rate": 2.4163582678570485e-05, "loss": 0.112, "step": 17622 }, { "epoch": 0.31209517105199075, "grad_norm": 0.6907269954681396, "learning_rate": 2.4162901509820272e-05, "loss": 0.0561, "step": 17623 }, { "epoch": 0.3121128805890192, "grad_norm": 1.015112280845642, "learning_rate": 2.4162220310925048e-05, "loss": 0.0924, "step": 17624 }, { "epoch": 0.3121305901260476, "grad_norm": 0.6550273299217224, "learning_rate": 2.4161539081887035e-05, "loss": 0.1326, "step": 17625 }, { "epoch": 0.31214829966307606, "grad_norm": 0.7580224871635437, "learning_rate": 2.4160857822708496e-05, "loss": 0.0515, "step": 17626 }, { "epoch": 0.3121660092001045, "grad_norm": 0.841315507888794, "learning_rate": 2.4160176533391657e-05, "loss": 0.0952, "step": 17627 }, { "epoch": 0.3121837187371329, "grad_norm": 0.7235333919525146, "learning_rate": 2.415949521393876e-05, "loss": 0.112, "step": 17628 }, { "epoch": 0.31220142827416136, "grad_norm": 0.49878010153770447, "learning_rate": 2.4158813864352056e-05, "loss": 0.0677, "step": 17629 }, { "epoch": 0.31221913781118976, "grad_norm": 0.43570676445961, "learning_rate": 2.4158132484633776e-05, "loss": 0.0913, "step": 17630 }, { "epoch": 0.3122368473482182, "grad_norm": 0.6406897306442261, "learning_rate": 2.4157451074786175e-05, "loss": 0.0736, "step": 17631 }, { "epoch": 0.3122545568852466, "grad_norm": 0.9029538631439209, "learning_rate": 2.4156769634811472e-05, "loss": 0.0871, "step": 17632 }, { "epoch": 0.31227226642227507, "grad_norm": 0.9848624467849731, "learning_rate": 2.4156088164711934e-05, "loss": 0.1404, "step": 17633 }, { "epoch": 0.31228997595930347, "grad_norm": 0.8291462063789368, "learning_rate": 2.415540666448979e-05, "loss": 0.0652, "step": 17634 }, { "epoch": 0.3123076854963319, "grad_norm": 0.6305611729621887, "learning_rate": 2.4154725134147284e-05, "loss": 0.0515, "step": 17635 }, { "epoch": 0.3123253950333603, "grad_norm": 0.7377889156341553, "learning_rate": 2.4154043573686657e-05, "loss": 0.1106, "step": 17636 }, { "epoch": 0.3123431045703888, "grad_norm": 1.210768699645996, "learning_rate": 2.415336198311015e-05, "loss": 0.1016, "step": 17637 }, { "epoch": 0.3123608141074172, "grad_norm": 0.9307510852813721, "learning_rate": 2.4152680362420013e-05, "loss": 0.1131, "step": 17638 }, { "epoch": 0.3123785236444456, "grad_norm": 0.9233216643333435, "learning_rate": 2.4151998711618484e-05, "loss": 0.1186, "step": 17639 }, { "epoch": 0.312396233181474, "grad_norm": 1.3511358499526978, "learning_rate": 2.41513170307078e-05, "loss": 0.1192, "step": 17640 }, { "epoch": 0.3124139427185025, "grad_norm": 0.666476309299469, "learning_rate": 2.4150635319690212e-05, "loss": 0.1259, "step": 17641 }, { "epoch": 0.31243165225553093, "grad_norm": 1.237372875213623, "learning_rate": 2.4149953578567958e-05, "loss": 0.1021, "step": 17642 }, { "epoch": 0.31244936179255933, "grad_norm": 0.821825385093689, "learning_rate": 2.414927180734328e-05, "loss": 0.0696, "step": 17643 }, { "epoch": 0.3124670713295878, "grad_norm": 0.5223643183708191, "learning_rate": 2.4148590006018427e-05, "loss": 0.0512, "step": 17644 }, { "epoch": 0.3124847808666162, "grad_norm": 0.5834842324256897, "learning_rate": 2.414790817459564e-05, "loss": 0.0716, "step": 17645 }, { "epoch": 0.31250249040364464, "grad_norm": 1.1692231893539429, "learning_rate": 2.414722631307715e-05, "loss": 0.115, "step": 17646 }, { "epoch": 0.31252019994067304, "grad_norm": 0.5539718270301819, "learning_rate": 2.414654442146522e-05, "loss": 0.0818, "step": 17647 }, { "epoch": 0.3125379094777015, "grad_norm": 0.6593331098556519, "learning_rate": 2.4145862499762085e-05, "loss": 0.1045, "step": 17648 }, { "epoch": 0.3125556190147299, "grad_norm": 0.45556360483169556, "learning_rate": 2.414518054796998e-05, "loss": 0.0538, "step": 17649 }, { "epoch": 0.31257332855175834, "grad_norm": 0.9496064186096191, "learning_rate": 2.4144498566091164e-05, "loss": 0.097, "step": 17650 }, { "epoch": 0.31259103808878674, "grad_norm": 0.7428970336914062, "learning_rate": 2.4143816554127868e-05, "loss": 0.0633, "step": 17651 }, { "epoch": 0.3126087476258152, "grad_norm": 0.6012218594551086, "learning_rate": 2.4143134512082343e-05, "loss": 0.0951, "step": 17652 }, { "epoch": 0.3126264571628436, "grad_norm": 0.8118851780891418, "learning_rate": 2.4142452439956828e-05, "loss": 0.105, "step": 17653 }, { "epoch": 0.31264416669987205, "grad_norm": 0.6502910256385803, "learning_rate": 2.4141770337753566e-05, "loss": 0.0853, "step": 17654 }, { "epoch": 0.3126618762369005, "grad_norm": 1.0273606777191162, "learning_rate": 2.414108820547481e-05, "loss": 0.0744, "step": 17655 }, { "epoch": 0.3126795857739289, "grad_norm": 0.8947768807411194, "learning_rate": 2.4140406043122796e-05, "loss": 0.1152, "step": 17656 }, { "epoch": 0.31269729531095736, "grad_norm": 1.077944040298462, "learning_rate": 2.4139723850699768e-05, "loss": 0.1157, "step": 17657 }, { "epoch": 0.31271500484798576, "grad_norm": 0.8086604475975037, "learning_rate": 2.4139041628207975e-05, "loss": 0.0761, "step": 17658 }, { "epoch": 0.3127327143850142, "grad_norm": 0.9179626703262329, "learning_rate": 2.4138359375649663e-05, "loss": 0.1048, "step": 17659 }, { "epoch": 0.3127504239220426, "grad_norm": 0.6882070302963257, "learning_rate": 2.413767709302707e-05, "loss": 0.0792, "step": 17660 }, { "epoch": 0.31276813345907106, "grad_norm": 1.2694673538208008, "learning_rate": 2.413699478034244e-05, "loss": 0.0763, "step": 17661 }, { "epoch": 0.31278584299609946, "grad_norm": 0.7260180115699768, "learning_rate": 2.4136312437598028e-05, "loss": 0.0944, "step": 17662 }, { "epoch": 0.3128035525331279, "grad_norm": 0.8720447421073914, "learning_rate": 2.413563006479607e-05, "loss": 0.1256, "step": 17663 }, { "epoch": 0.3128212620701563, "grad_norm": 0.8327614665031433, "learning_rate": 2.413494766193881e-05, "loss": 0.0702, "step": 17664 }, { "epoch": 0.31283897160718477, "grad_norm": 1.9029464721679688, "learning_rate": 2.41342652290285e-05, "loss": 0.0904, "step": 17665 }, { "epoch": 0.31285668114421317, "grad_norm": 0.7024856209754944, "learning_rate": 2.4133582766067378e-05, "loss": 0.1015, "step": 17666 }, { "epoch": 0.3128743906812416, "grad_norm": 1.138576865196228, "learning_rate": 2.4132900273057697e-05, "loss": 0.1083, "step": 17667 }, { "epoch": 0.31289210021827, "grad_norm": 0.8342145681381226, "learning_rate": 2.413221775000169e-05, "loss": 0.0908, "step": 17668 }, { "epoch": 0.3129098097552985, "grad_norm": 1.6854358911514282, "learning_rate": 2.4131535196901618e-05, "loss": 0.1371, "step": 17669 }, { "epoch": 0.31292751929232693, "grad_norm": 1.092510461807251, "learning_rate": 2.4130852613759716e-05, "loss": 0.0982, "step": 17670 }, { "epoch": 0.3129452288293553, "grad_norm": 1.061900019645691, "learning_rate": 2.4130170000578228e-05, "loss": 0.1271, "step": 17671 }, { "epoch": 0.3129629383663838, "grad_norm": 0.8105944991111755, "learning_rate": 2.412948735735941e-05, "loss": 0.0966, "step": 17672 }, { "epoch": 0.3129806479034122, "grad_norm": 1.4844691753387451, "learning_rate": 2.4128804684105502e-05, "loss": 0.0841, "step": 17673 }, { "epoch": 0.31299835744044063, "grad_norm": 0.9063901901245117, "learning_rate": 2.4128121980818746e-05, "loss": 0.0769, "step": 17674 }, { "epoch": 0.31301606697746903, "grad_norm": 0.8748027086257935, "learning_rate": 2.41274392475014e-05, "loss": 0.0669, "step": 17675 }, { "epoch": 0.3130337765144975, "grad_norm": 0.8773415684700012, "learning_rate": 2.412675648415569e-05, "loss": 0.1062, "step": 17676 }, { "epoch": 0.3130514860515259, "grad_norm": 0.5446757078170776, "learning_rate": 2.4126073690783882e-05, "loss": 0.0586, "step": 17677 }, { "epoch": 0.31306919558855434, "grad_norm": 0.7492804527282715, "learning_rate": 2.4125390867388213e-05, "loss": 0.0729, "step": 17678 }, { "epoch": 0.31308690512558274, "grad_norm": 1.1736770868301392, "learning_rate": 2.4124708013970932e-05, "loss": 0.1246, "step": 17679 }, { "epoch": 0.3131046146626112, "grad_norm": 0.964078426361084, "learning_rate": 2.4124025130534286e-05, "loss": 0.1016, "step": 17680 }, { "epoch": 0.3131223241996396, "grad_norm": 0.5489985346794128, "learning_rate": 2.412334221708052e-05, "loss": 0.0946, "step": 17681 }, { "epoch": 0.31314003373666804, "grad_norm": 0.567506730556488, "learning_rate": 2.412265927361188e-05, "loss": 0.1132, "step": 17682 }, { "epoch": 0.31315774327369644, "grad_norm": 1.133853793144226, "learning_rate": 2.412197630013061e-05, "loss": 0.0776, "step": 17683 }, { "epoch": 0.3131754528107249, "grad_norm": 0.40284666419029236, "learning_rate": 2.4121293296638964e-05, "loss": 0.0688, "step": 17684 }, { "epoch": 0.31319316234775335, "grad_norm": 0.9553492069244385, "learning_rate": 2.4120610263139186e-05, "loss": 0.0763, "step": 17685 }, { "epoch": 0.31321087188478175, "grad_norm": 0.7661103010177612, "learning_rate": 2.411992719963352e-05, "loss": 0.1161, "step": 17686 }, { "epoch": 0.3132285814218102, "grad_norm": 0.7812032699584961, "learning_rate": 2.4119244106124217e-05, "loss": 0.0656, "step": 17687 }, { "epoch": 0.3132462909588386, "grad_norm": 0.8312039971351624, "learning_rate": 2.4118560982613524e-05, "loss": 0.0842, "step": 17688 }, { "epoch": 0.31326400049586706, "grad_norm": 0.7423357963562012, "learning_rate": 2.411787782910369e-05, "loss": 0.0743, "step": 17689 }, { "epoch": 0.31328171003289546, "grad_norm": 0.770114004611969, "learning_rate": 2.4117194645596956e-05, "loss": 0.0983, "step": 17690 }, { "epoch": 0.3132994195699239, "grad_norm": 0.6587023735046387, "learning_rate": 2.4116511432095574e-05, "loss": 0.0966, "step": 17691 }, { "epoch": 0.3133171291069523, "grad_norm": 0.6590749025344849, "learning_rate": 2.411582818860179e-05, "loss": 0.0715, "step": 17692 }, { "epoch": 0.31333483864398076, "grad_norm": 0.7157386541366577, "learning_rate": 2.411514491511786e-05, "loss": 0.0714, "step": 17693 }, { "epoch": 0.31335254818100916, "grad_norm": 0.9490751028060913, "learning_rate": 2.4114461611646016e-05, "loss": 0.0969, "step": 17694 }, { "epoch": 0.3133702577180376, "grad_norm": 0.846442699432373, "learning_rate": 2.4113778278188526e-05, "loss": 0.0997, "step": 17695 }, { "epoch": 0.313387967255066, "grad_norm": 0.9084263443946838, "learning_rate": 2.411309491474762e-05, "loss": 0.0925, "step": 17696 }, { "epoch": 0.31340567679209447, "grad_norm": 0.6986320614814758, "learning_rate": 2.4112411521325553e-05, "loss": 0.0907, "step": 17697 }, { "epoch": 0.31342338632912287, "grad_norm": 1.1405450105667114, "learning_rate": 2.411172809792458e-05, "loss": 0.1001, "step": 17698 }, { "epoch": 0.3134410958661513, "grad_norm": 0.5020057559013367, "learning_rate": 2.4111044644546937e-05, "loss": 0.087, "step": 17699 }, { "epoch": 0.3134588054031798, "grad_norm": 1.0573186874389648, "learning_rate": 2.411036116119488e-05, "loss": 0.1081, "step": 17700 }, { "epoch": 0.3134765149402082, "grad_norm": 0.943148136138916, "learning_rate": 2.4109677647870654e-05, "loss": 0.1156, "step": 17701 }, { "epoch": 0.3134942244772366, "grad_norm": 1.1633336544036865, "learning_rate": 2.4108994104576517e-05, "loss": 0.1189, "step": 17702 }, { "epoch": 0.313511934014265, "grad_norm": 0.9108474254608154, "learning_rate": 2.4108310531314706e-05, "loss": 0.0915, "step": 17703 }, { "epoch": 0.3135296435512935, "grad_norm": 0.7240228056907654, "learning_rate": 2.4107626928087475e-05, "loss": 0.0718, "step": 17704 }, { "epoch": 0.3135473530883219, "grad_norm": 0.8412255644798279, "learning_rate": 2.4106943294897074e-05, "loss": 0.0756, "step": 17705 }, { "epoch": 0.31356506262535033, "grad_norm": 0.6190308928489685, "learning_rate": 2.4106259631745745e-05, "loss": 0.0732, "step": 17706 }, { "epoch": 0.31358277216237873, "grad_norm": 0.5964778661727905, "learning_rate": 2.4105575938635752e-05, "loss": 0.0569, "step": 17707 }, { "epoch": 0.3136004816994072, "grad_norm": 1.1801786422729492, "learning_rate": 2.410489221556933e-05, "loss": 0.1237, "step": 17708 }, { "epoch": 0.3136181912364356, "grad_norm": 0.7423210144042969, "learning_rate": 2.4104208462548735e-05, "loss": 0.0809, "step": 17709 }, { "epoch": 0.31363590077346404, "grad_norm": 0.6830933690071106, "learning_rate": 2.410352467957621e-05, "loss": 0.0772, "step": 17710 }, { "epoch": 0.31365361031049244, "grad_norm": 0.942785382270813, "learning_rate": 2.4102840866654017e-05, "loss": 0.0912, "step": 17711 }, { "epoch": 0.3136713198475209, "grad_norm": 0.922720730304718, "learning_rate": 2.4102157023784394e-05, "loss": 0.0853, "step": 17712 }, { "epoch": 0.3136890293845493, "grad_norm": 0.9655106067657471, "learning_rate": 2.4101473150969595e-05, "loss": 0.0844, "step": 17713 }, { "epoch": 0.31370673892157774, "grad_norm": 0.7236549854278564, "learning_rate": 2.410078924821187e-05, "loss": 0.122, "step": 17714 }, { "epoch": 0.3137244484586062, "grad_norm": 1.1336737871170044, "learning_rate": 2.4100105315513468e-05, "loss": 0.1011, "step": 17715 }, { "epoch": 0.3137421579956346, "grad_norm": 0.9083662629127502, "learning_rate": 2.4099421352876642e-05, "loss": 0.0904, "step": 17716 }, { "epoch": 0.31375986753266305, "grad_norm": 0.7364532351493835, "learning_rate": 2.409873736030364e-05, "loss": 0.073, "step": 17717 }, { "epoch": 0.31377757706969145, "grad_norm": 0.4272406995296478, "learning_rate": 2.409805333779671e-05, "loss": 0.0848, "step": 17718 }, { "epoch": 0.3137952866067199, "grad_norm": 0.47420671582221985, "learning_rate": 2.409736928535811e-05, "loss": 0.0776, "step": 17719 }, { "epoch": 0.3138129961437483, "grad_norm": 0.6550644040107727, "learning_rate": 2.409668520299008e-05, "loss": 0.074, "step": 17720 }, { "epoch": 0.31383070568077676, "grad_norm": 0.7265585064888, "learning_rate": 2.4096001090694878e-05, "loss": 0.0846, "step": 17721 }, { "epoch": 0.31384841521780515, "grad_norm": 1.0700191259384155, "learning_rate": 2.4095316948474755e-05, "loss": 0.0998, "step": 17722 }, { "epoch": 0.3138661247548336, "grad_norm": 0.8306490182876587, "learning_rate": 2.4094632776331953e-05, "loss": 0.0798, "step": 17723 }, { "epoch": 0.313883834291862, "grad_norm": 0.9336680769920349, "learning_rate": 2.4093948574268732e-05, "loss": 0.0693, "step": 17724 }, { "epoch": 0.31390154382889046, "grad_norm": 0.5012087821960449, "learning_rate": 2.4093264342287342e-05, "loss": 0.0973, "step": 17725 }, { "epoch": 0.31391925336591886, "grad_norm": 0.6589315533638, "learning_rate": 2.409258008039003e-05, "loss": 0.0766, "step": 17726 }, { "epoch": 0.3139369629029473, "grad_norm": 0.7961333990097046, "learning_rate": 2.409189578857905e-05, "loss": 0.1206, "step": 17727 }, { "epoch": 0.3139546724399757, "grad_norm": 0.7934823632240295, "learning_rate": 2.409121146685665e-05, "loss": 0.0622, "step": 17728 }, { "epoch": 0.31397238197700417, "grad_norm": 0.9710506796836853, "learning_rate": 2.4090527115225087e-05, "loss": 0.1135, "step": 17729 }, { "epoch": 0.3139900915140326, "grad_norm": 0.7639445662498474, "learning_rate": 2.4089842733686606e-05, "loss": 0.1124, "step": 17730 }, { "epoch": 0.314007801051061, "grad_norm": 0.9384971261024475, "learning_rate": 2.4089158322243468e-05, "loss": 0.1131, "step": 17731 }, { "epoch": 0.3140255105880895, "grad_norm": 0.6307961940765381, "learning_rate": 2.408847388089791e-05, "loss": 0.0627, "step": 17732 }, { "epoch": 0.3140432201251179, "grad_norm": 0.8776851892471313, "learning_rate": 2.4087789409652198e-05, "loss": 0.0872, "step": 17733 }, { "epoch": 0.3140609296621463, "grad_norm": 0.7072957158088684, "learning_rate": 2.4087104908508575e-05, "loss": 0.098, "step": 17734 }, { "epoch": 0.3140786391991747, "grad_norm": 0.5594607591629028, "learning_rate": 2.4086420377469298e-05, "loss": 0.0767, "step": 17735 }, { "epoch": 0.3140963487362032, "grad_norm": 1.192031979560852, "learning_rate": 2.4085735816536618e-05, "loss": 0.0803, "step": 17736 }, { "epoch": 0.3141140582732316, "grad_norm": 0.6027469635009766, "learning_rate": 2.4085051225712783e-05, "loss": 0.0822, "step": 17737 }, { "epoch": 0.31413176781026003, "grad_norm": 0.5870519876480103, "learning_rate": 2.408436660500005e-05, "loss": 0.1256, "step": 17738 }, { "epoch": 0.31414947734728843, "grad_norm": 1.0611480474472046, "learning_rate": 2.4083681954400668e-05, "loss": 0.1034, "step": 17739 }, { "epoch": 0.3141671868843169, "grad_norm": 0.8808450698852539, "learning_rate": 2.4082997273916894e-05, "loss": 0.1108, "step": 17740 }, { "epoch": 0.3141848964213453, "grad_norm": 0.686008632183075, "learning_rate": 2.4082312563550977e-05, "loss": 0.0793, "step": 17741 }, { "epoch": 0.31420260595837374, "grad_norm": 0.9388607144355774, "learning_rate": 2.408162782330517e-05, "loss": 0.101, "step": 17742 }, { "epoch": 0.31422031549540214, "grad_norm": 0.5181083083152771, "learning_rate": 2.4080943053181725e-05, "loss": 0.0727, "step": 17743 }, { "epoch": 0.3142380250324306, "grad_norm": 0.7411150932312012, "learning_rate": 2.4080258253182894e-05, "loss": 0.0922, "step": 17744 }, { "epoch": 0.31425573456945904, "grad_norm": 0.6531732678413391, "learning_rate": 2.4079573423310934e-05, "loss": 0.0635, "step": 17745 }, { "epoch": 0.31427344410648744, "grad_norm": 0.9454626441001892, "learning_rate": 2.4078888563568097e-05, "loss": 0.0975, "step": 17746 }, { "epoch": 0.3142911536435159, "grad_norm": 0.7855163812637329, "learning_rate": 2.4078203673956632e-05, "loss": 0.0853, "step": 17747 }, { "epoch": 0.3143088631805443, "grad_norm": 0.7744629383087158, "learning_rate": 2.40775187544788e-05, "loss": 0.1003, "step": 17748 }, { "epoch": 0.31432657271757275, "grad_norm": 1.0304690599441528, "learning_rate": 2.4076833805136844e-05, "loss": 0.111, "step": 17749 }, { "epoch": 0.31434428225460115, "grad_norm": 0.419761598110199, "learning_rate": 2.407614882593303e-05, "loss": 0.1059, "step": 17750 }, { "epoch": 0.3143619917916296, "grad_norm": 0.8729283213615417, "learning_rate": 2.4075463816869598e-05, "loss": 0.0747, "step": 17751 }, { "epoch": 0.314379701328658, "grad_norm": 0.6155070662498474, "learning_rate": 2.407477877794881e-05, "loss": 0.0931, "step": 17752 }, { "epoch": 0.31439741086568646, "grad_norm": 1.2490023374557495, "learning_rate": 2.4074093709172915e-05, "loss": 0.1044, "step": 17753 }, { "epoch": 0.31441512040271485, "grad_norm": 1.2150567770004272, "learning_rate": 2.4073408610544175e-05, "loss": 0.0668, "step": 17754 }, { "epoch": 0.3144328299397433, "grad_norm": 0.795988917350769, "learning_rate": 2.4072723482064835e-05, "loss": 0.0898, "step": 17755 }, { "epoch": 0.3144505394767717, "grad_norm": 0.6306496262550354, "learning_rate": 2.407203832373715e-05, "loss": 0.1041, "step": 17756 }, { "epoch": 0.31446824901380016, "grad_norm": 0.8135966658592224, "learning_rate": 2.4071353135563377e-05, "loss": 0.0967, "step": 17757 }, { "epoch": 0.31448595855082856, "grad_norm": 0.767804741859436, "learning_rate": 2.4070667917545773e-05, "loss": 0.0976, "step": 17758 }, { "epoch": 0.314503668087857, "grad_norm": 0.7825274467468262, "learning_rate": 2.406998266968659e-05, "loss": 0.1163, "step": 17759 }, { "epoch": 0.31452137762488547, "grad_norm": 0.6707519292831421, "learning_rate": 2.4069297391988076e-05, "loss": 0.0932, "step": 17760 }, { "epoch": 0.31453908716191387, "grad_norm": 0.22645574808120728, "learning_rate": 2.4068612084452496e-05, "loss": 0.0835, "step": 17761 }, { "epoch": 0.3145567966989423, "grad_norm": 0.7382732629776001, "learning_rate": 2.40679267470821e-05, "loss": 0.1261, "step": 17762 }, { "epoch": 0.3145745062359707, "grad_norm": 0.9583964943885803, "learning_rate": 2.406724137987914e-05, "loss": 0.1017, "step": 17763 }, { "epoch": 0.3145922157729992, "grad_norm": 1.062381625175476, "learning_rate": 2.406655598284587e-05, "loss": 0.1049, "step": 17764 }, { "epoch": 0.31460992531002757, "grad_norm": 0.6979061961174011, "learning_rate": 2.406587055598455e-05, "loss": 0.0753, "step": 17765 }, { "epoch": 0.314627634847056, "grad_norm": 1.1275900602340698, "learning_rate": 2.4065185099297435e-05, "loss": 0.0721, "step": 17766 }, { "epoch": 0.3146453443840844, "grad_norm": 0.3535158634185791, "learning_rate": 2.4064499612786775e-05, "loss": 0.092, "step": 17767 }, { "epoch": 0.3146630539211129, "grad_norm": 0.7089625000953674, "learning_rate": 2.406381409645483e-05, "loss": 0.1305, "step": 17768 }, { "epoch": 0.3146807634581413, "grad_norm": 1.083885669708252, "learning_rate": 2.4063128550303855e-05, "loss": 0.0869, "step": 17769 }, { "epoch": 0.31469847299516973, "grad_norm": 0.7267585396766663, "learning_rate": 2.40624429743361e-05, "loss": 0.1094, "step": 17770 }, { "epoch": 0.31471618253219813, "grad_norm": 0.5701255798339844, "learning_rate": 2.4061757368553827e-05, "loss": 0.1022, "step": 17771 }, { "epoch": 0.3147338920692266, "grad_norm": 1.086098551750183, "learning_rate": 2.4061071732959283e-05, "loss": 0.0903, "step": 17772 }, { "epoch": 0.314751601606255, "grad_norm": 0.7194190621376038, "learning_rate": 2.4060386067554732e-05, "loss": 0.0897, "step": 17773 }, { "epoch": 0.31476931114328344, "grad_norm": 0.7123562693595886, "learning_rate": 2.405970037234243e-05, "loss": 0.1033, "step": 17774 }, { "epoch": 0.3147870206803119, "grad_norm": 0.9543618559837341, "learning_rate": 2.405901464732463e-05, "loss": 0.1285, "step": 17775 }, { "epoch": 0.3148047302173403, "grad_norm": 0.4192824959754944, "learning_rate": 2.405832889250359e-05, "loss": 0.0804, "step": 17776 }, { "epoch": 0.31482243975436874, "grad_norm": 0.8432044386863708, "learning_rate": 2.405764310788156e-05, "loss": 0.0944, "step": 17777 }, { "epoch": 0.31484014929139714, "grad_norm": 1.0149224996566772, "learning_rate": 2.4056957293460805e-05, "loss": 0.1332, "step": 17778 }, { "epoch": 0.3148578588284256, "grad_norm": 0.7138055562973022, "learning_rate": 2.405627144924357e-05, "loss": 0.0872, "step": 17779 }, { "epoch": 0.314875568365454, "grad_norm": 0.5462349653244019, "learning_rate": 2.4055585575232122e-05, "loss": 0.089, "step": 17780 }, { "epoch": 0.31489327790248245, "grad_norm": 0.618670642375946, "learning_rate": 2.4054899671428712e-05, "loss": 0.0837, "step": 17781 }, { "epoch": 0.31491098743951085, "grad_norm": 0.745678186416626, "learning_rate": 2.40542137378356e-05, "loss": 0.1191, "step": 17782 }, { "epoch": 0.3149286969765393, "grad_norm": 0.8224495649337769, "learning_rate": 2.4053527774455034e-05, "loss": 0.0896, "step": 17783 }, { "epoch": 0.3149464065135677, "grad_norm": 0.6002598404884338, "learning_rate": 2.405284178128929e-05, "loss": 0.0923, "step": 17784 }, { "epoch": 0.31496411605059615, "grad_norm": 0.33758288621902466, "learning_rate": 2.4052155758340602e-05, "loss": 0.0654, "step": 17785 }, { "epoch": 0.31498182558762455, "grad_norm": 0.5527463555335999, "learning_rate": 2.405146970561124e-05, "loss": 0.0658, "step": 17786 }, { "epoch": 0.314999535124653, "grad_norm": 0.820854127407074, "learning_rate": 2.4050783623103457e-05, "loss": 0.1004, "step": 17787 }, { "epoch": 0.3150172446616814, "grad_norm": 0.828490138053894, "learning_rate": 2.4050097510819513e-05, "loss": 0.0961, "step": 17788 }, { "epoch": 0.31503495419870986, "grad_norm": 0.8055847883224487, "learning_rate": 2.4049411368761666e-05, "loss": 0.0935, "step": 17789 }, { "epoch": 0.3150526637357383, "grad_norm": 0.8264631628990173, "learning_rate": 2.4048725196932162e-05, "loss": 0.0675, "step": 17790 }, { "epoch": 0.3150703732727667, "grad_norm": 0.9333062767982483, "learning_rate": 2.4048038995333276e-05, "loss": 0.1206, "step": 17791 }, { "epoch": 0.31508808280979517, "grad_norm": 1.1070634126663208, "learning_rate": 2.4047352763967253e-05, "loss": 0.0883, "step": 17792 }, { "epoch": 0.31510579234682357, "grad_norm": 0.49151185154914856, "learning_rate": 2.4046666502836354e-05, "loss": 0.0951, "step": 17793 }, { "epoch": 0.315123501883852, "grad_norm": 1.4069890975952148, "learning_rate": 2.404598021194284e-05, "loss": 0.1076, "step": 17794 }, { "epoch": 0.3151412114208804, "grad_norm": 0.6227445006370544, "learning_rate": 2.404529389128896e-05, "loss": 0.0925, "step": 17795 }, { "epoch": 0.3151589209579089, "grad_norm": 0.573788583278656, "learning_rate": 2.4044607540876987e-05, "loss": 0.1019, "step": 17796 }, { "epoch": 0.31517663049493727, "grad_norm": 0.682621955871582, "learning_rate": 2.4043921160709163e-05, "loss": 0.1027, "step": 17797 }, { "epoch": 0.3151943400319657, "grad_norm": 0.9081839323043823, "learning_rate": 2.4043234750787758e-05, "loss": 0.1074, "step": 17798 }, { "epoch": 0.3152120495689941, "grad_norm": 0.5425984859466553, "learning_rate": 2.4042548311115022e-05, "loss": 0.093, "step": 17799 }, { "epoch": 0.3152297591060226, "grad_norm": 1.253283143043518, "learning_rate": 2.404186184169322e-05, "loss": 0.1107, "step": 17800 }, { "epoch": 0.315247468643051, "grad_norm": 0.8646391034126282, "learning_rate": 2.4041175342524604e-05, "loss": 0.0839, "step": 17801 }, { "epoch": 0.31526517818007943, "grad_norm": 0.6356246471405029, "learning_rate": 2.4040488813611435e-05, "loss": 0.0822, "step": 17802 }, { "epoch": 0.31528288771710783, "grad_norm": 0.9293761849403381, "learning_rate": 2.4039802254955977e-05, "loss": 0.1155, "step": 17803 }, { "epoch": 0.3153005972541363, "grad_norm": 0.9180514216423035, "learning_rate": 2.4039115666560478e-05, "loss": 0.075, "step": 17804 }, { "epoch": 0.31531830679116474, "grad_norm": 0.4225640296936035, "learning_rate": 2.4038429048427204e-05, "loss": 0.0877, "step": 17805 }, { "epoch": 0.31533601632819314, "grad_norm": 0.6170282959938049, "learning_rate": 2.4037742400558415e-05, "loss": 0.0954, "step": 17806 }, { "epoch": 0.3153537258652216, "grad_norm": 0.7609187364578247, "learning_rate": 2.4037055722956367e-05, "loss": 0.0835, "step": 17807 }, { "epoch": 0.31537143540225, "grad_norm": 1.0184234380722046, "learning_rate": 2.403636901562332e-05, "loss": 0.1101, "step": 17808 }, { "epoch": 0.31538914493927844, "grad_norm": 1.020973563194275, "learning_rate": 2.403568227856153e-05, "loss": 0.0651, "step": 17809 }, { "epoch": 0.31540685447630684, "grad_norm": 0.9359765648841858, "learning_rate": 2.4034995511773264e-05, "loss": 0.1057, "step": 17810 }, { "epoch": 0.3154245640133353, "grad_norm": 0.5817531943321228, "learning_rate": 2.4034308715260773e-05, "loss": 0.0932, "step": 17811 }, { "epoch": 0.3154422735503637, "grad_norm": 0.7920562028884888, "learning_rate": 2.403362188902632e-05, "loss": 0.1098, "step": 17812 }, { "epoch": 0.31545998308739215, "grad_norm": 0.5856842994689941, "learning_rate": 2.4032935033072168e-05, "loss": 0.0497, "step": 17813 }, { "epoch": 0.31547769262442055, "grad_norm": 0.9288676381111145, "learning_rate": 2.403224814740057e-05, "loss": 0.1029, "step": 17814 }, { "epoch": 0.315495402161449, "grad_norm": 0.6533175706863403, "learning_rate": 2.403156123201379e-05, "loss": 0.0947, "step": 17815 }, { "epoch": 0.3155131116984774, "grad_norm": 1.424140214920044, "learning_rate": 2.4030874286914084e-05, "loss": 0.1126, "step": 17816 }, { "epoch": 0.31553082123550585, "grad_norm": 0.8205899596214294, "learning_rate": 2.403018731210372e-05, "loss": 0.0942, "step": 17817 }, { "epoch": 0.31554853077253425, "grad_norm": 0.5326775312423706, "learning_rate": 2.4029500307584952e-05, "loss": 0.1053, "step": 17818 }, { "epoch": 0.3155662403095627, "grad_norm": 1.0239882469177246, "learning_rate": 2.402881327336004e-05, "loss": 0.0884, "step": 17819 }, { "epoch": 0.31558394984659116, "grad_norm": 0.612135112285614, "learning_rate": 2.4028126209431243e-05, "loss": 0.0903, "step": 17820 }, { "epoch": 0.31560165938361956, "grad_norm": 1.2688127756118774, "learning_rate": 2.402743911580083e-05, "loss": 0.1598, "step": 17821 }, { "epoch": 0.315619368920648, "grad_norm": 0.6948555707931519, "learning_rate": 2.4026751992471053e-05, "loss": 0.0884, "step": 17822 }, { "epoch": 0.3156370784576764, "grad_norm": 1.4043409824371338, "learning_rate": 2.4026064839444174e-05, "loss": 0.1232, "step": 17823 }, { "epoch": 0.31565478799470487, "grad_norm": 0.6005054712295532, "learning_rate": 2.4025377656722458e-05, "loss": 0.1284, "step": 17824 }, { "epoch": 0.31567249753173326, "grad_norm": 1.00612473487854, "learning_rate": 2.4024690444308158e-05, "loss": 0.1242, "step": 17825 }, { "epoch": 0.3156902070687617, "grad_norm": 0.6039913296699524, "learning_rate": 2.4024003202203543e-05, "loss": 0.0873, "step": 17826 }, { "epoch": 0.3157079166057901, "grad_norm": 1.1080211400985718, "learning_rate": 2.4023315930410864e-05, "loss": 0.1254, "step": 17827 }, { "epoch": 0.31572562614281857, "grad_norm": 0.6669530868530273, "learning_rate": 2.402262862893239e-05, "loss": 0.0918, "step": 17828 }, { "epoch": 0.31574333567984697, "grad_norm": 0.826298177242279, "learning_rate": 2.4021941297770384e-05, "loss": 0.0899, "step": 17829 }, { "epoch": 0.3157610452168754, "grad_norm": 0.6390612125396729, "learning_rate": 2.4021253936927102e-05, "loss": 0.0727, "step": 17830 }, { "epoch": 0.3157787547539038, "grad_norm": 0.9867042303085327, "learning_rate": 2.4020566546404805e-05, "loss": 0.0604, "step": 17831 }, { "epoch": 0.3157964642909323, "grad_norm": 0.735697865486145, "learning_rate": 2.401987912620576e-05, "loss": 0.061, "step": 17832 }, { "epoch": 0.3158141738279607, "grad_norm": 0.7620007395744324, "learning_rate": 2.4019191676332228e-05, "loss": 0.0782, "step": 17833 }, { "epoch": 0.31583188336498913, "grad_norm": 1.2195509672164917, "learning_rate": 2.4018504196786457e-05, "loss": 0.1311, "step": 17834 }, { "epoch": 0.3158495929020176, "grad_norm": 0.5499755144119263, "learning_rate": 2.4017816687570727e-05, "loss": 0.0737, "step": 17835 }, { "epoch": 0.315867302439046, "grad_norm": 0.41878074407577515, "learning_rate": 2.401712914868729e-05, "loss": 0.0596, "step": 17836 }, { "epoch": 0.31588501197607444, "grad_norm": 0.7884352207183838, "learning_rate": 2.4016441580138413e-05, "loss": 0.1093, "step": 17837 }, { "epoch": 0.31590272151310284, "grad_norm": 0.4959055185317993, "learning_rate": 2.4015753981926352e-05, "loss": 0.1027, "step": 17838 }, { "epoch": 0.3159204310501313, "grad_norm": 0.7621615529060364, "learning_rate": 2.4015066354053372e-05, "loss": 0.0801, "step": 17839 }, { "epoch": 0.3159381405871597, "grad_norm": 0.9014572501182556, "learning_rate": 2.401437869652174e-05, "loss": 0.096, "step": 17840 }, { "epoch": 0.31595585012418814, "grad_norm": 0.7355020046234131, "learning_rate": 2.4013691009333706e-05, "loss": 0.0929, "step": 17841 }, { "epoch": 0.31597355966121654, "grad_norm": 0.6098725199699402, "learning_rate": 2.4013003292491546e-05, "loss": 0.0838, "step": 17842 }, { "epoch": 0.315991269198245, "grad_norm": 0.7996458411216736, "learning_rate": 2.4012315545997514e-05, "loss": 0.0623, "step": 17843 }, { "epoch": 0.3160089787352734, "grad_norm": 1.0384341478347778, "learning_rate": 2.401162776985388e-05, "loss": 0.1025, "step": 17844 }, { "epoch": 0.31602668827230185, "grad_norm": 0.6240960359573364, "learning_rate": 2.40109399640629e-05, "loss": 0.0763, "step": 17845 }, { "epoch": 0.31604439780933025, "grad_norm": 0.5947725176811218, "learning_rate": 2.4010252128626837e-05, "loss": 0.0772, "step": 17846 }, { "epoch": 0.3160621073463587, "grad_norm": 0.7919192314147949, "learning_rate": 2.4009564263547955e-05, "loss": 0.0784, "step": 17847 }, { "epoch": 0.3160798168833871, "grad_norm": 0.6747811436653137, "learning_rate": 2.400887636882852e-05, "loss": 0.1124, "step": 17848 }, { "epoch": 0.31609752642041555, "grad_norm": 0.8155792951583862, "learning_rate": 2.4008188444470797e-05, "loss": 0.1009, "step": 17849 }, { "epoch": 0.316115235957444, "grad_norm": 0.9263240098953247, "learning_rate": 2.4007500490477037e-05, "loss": 0.0911, "step": 17850 }, { "epoch": 0.3161329454944724, "grad_norm": 0.6864643096923828, "learning_rate": 2.4006812506849514e-05, "loss": 0.1075, "step": 17851 }, { "epoch": 0.31615065503150086, "grad_norm": 0.9236023426055908, "learning_rate": 2.4006124493590492e-05, "loss": 0.1052, "step": 17852 }, { "epoch": 0.31616836456852926, "grad_norm": 0.5408724546432495, "learning_rate": 2.4005436450702227e-05, "loss": 0.0702, "step": 17853 }, { "epoch": 0.3161860741055577, "grad_norm": 0.7552846074104309, "learning_rate": 2.400474837818699e-05, "loss": 0.0845, "step": 17854 }, { "epoch": 0.3162037836425861, "grad_norm": 1.2126531600952148, "learning_rate": 2.400406027604704e-05, "loss": 0.116, "step": 17855 }, { "epoch": 0.31622149317961457, "grad_norm": 0.45985147356987, "learning_rate": 2.4003372144284644e-05, "loss": 0.0623, "step": 17856 }, { "epoch": 0.31623920271664296, "grad_norm": 0.793230414390564, "learning_rate": 2.4002683982902067e-05, "loss": 0.1014, "step": 17857 }, { "epoch": 0.3162569122536714, "grad_norm": 0.42465630173683167, "learning_rate": 2.4001995791901566e-05, "loss": 0.0482, "step": 17858 }, { "epoch": 0.3162746217906998, "grad_norm": 1.5056841373443604, "learning_rate": 2.400130757128541e-05, "loss": 0.1228, "step": 17859 }, { "epoch": 0.31629233132772827, "grad_norm": 0.8534943461418152, "learning_rate": 2.4000619321055862e-05, "loss": 0.0709, "step": 17860 }, { "epoch": 0.31631004086475667, "grad_norm": 0.8897057771682739, "learning_rate": 2.3999931041215185e-05, "loss": 0.125, "step": 17861 }, { "epoch": 0.3163277504017851, "grad_norm": 0.6050039529800415, "learning_rate": 2.3999242731765648e-05, "loss": 0.0625, "step": 17862 }, { "epoch": 0.3163454599388135, "grad_norm": 0.5395557880401611, "learning_rate": 2.3998554392709514e-05, "loss": 0.0739, "step": 17863 }, { "epoch": 0.316363169475842, "grad_norm": 0.753719687461853, "learning_rate": 2.3997866024049044e-05, "loss": 0.0959, "step": 17864 }, { "epoch": 0.31638087901287043, "grad_norm": 0.7067922949790955, "learning_rate": 2.399717762578651e-05, "loss": 0.0835, "step": 17865 }, { "epoch": 0.31639858854989883, "grad_norm": 0.5339434146881104, "learning_rate": 2.3996489197924162e-05, "loss": 0.0875, "step": 17866 }, { "epoch": 0.3164162980869273, "grad_norm": 0.7097622156143188, "learning_rate": 2.399580074046428e-05, "loss": 0.0721, "step": 17867 }, { "epoch": 0.3164340076239557, "grad_norm": 0.9854418635368347, "learning_rate": 2.3995112253409123e-05, "loss": 0.1286, "step": 17868 }, { "epoch": 0.31645171716098414, "grad_norm": 0.698783278465271, "learning_rate": 2.3994423736760955e-05, "loss": 0.0774, "step": 17869 }, { "epoch": 0.31646942669801253, "grad_norm": 1.0777922868728638, "learning_rate": 2.3993735190522043e-05, "loss": 0.0957, "step": 17870 }, { "epoch": 0.316487136235041, "grad_norm": 1.683394432067871, "learning_rate": 2.3993046614694654e-05, "loss": 0.1373, "step": 17871 }, { "epoch": 0.3165048457720694, "grad_norm": 0.8926798701286316, "learning_rate": 2.399235800928105e-05, "loss": 0.085, "step": 17872 }, { "epoch": 0.31652255530909784, "grad_norm": 0.7819154858589172, "learning_rate": 2.3991669374283498e-05, "loss": 0.1158, "step": 17873 }, { "epoch": 0.31654026484612624, "grad_norm": 0.8666194677352905, "learning_rate": 2.3990980709704264e-05, "loss": 0.0897, "step": 17874 }, { "epoch": 0.3165579743831547, "grad_norm": 0.7158265113830566, "learning_rate": 2.3990292015545614e-05, "loss": 0.0883, "step": 17875 }, { "epoch": 0.3165756839201831, "grad_norm": 0.8826785683631897, "learning_rate": 2.398960329180981e-05, "loss": 0.0947, "step": 17876 }, { "epoch": 0.31659339345721155, "grad_norm": 0.7887400388717651, "learning_rate": 2.398891453849912e-05, "loss": 0.0943, "step": 17877 }, { "epoch": 0.31661110299423995, "grad_norm": 0.6879430413246155, "learning_rate": 2.3988225755615812e-05, "loss": 0.0718, "step": 17878 }, { "epoch": 0.3166288125312684, "grad_norm": 1.0388939380645752, "learning_rate": 2.398753694316215e-05, "loss": 0.0718, "step": 17879 }, { "epoch": 0.31664652206829685, "grad_norm": 0.674305260181427, "learning_rate": 2.3986848101140396e-05, "loss": 0.0943, "step": 17880 }, { "epoch": 0.31666423160532525, "grad_norm": 1.1948597431182861, "learning_rate": 2.3986159229552826e-05, "loss": 0.1548, "step": 17881 }, { "epoch": 0.3166819411423537, "grad_norm": 0.622441828250885, "learning_rate": 2.39854703284017e-05, "loss": 0.0774, "step": 17882 }, { "epoch": 0.3166996506793821, "grad_norm": 1.070844054222107, "learning_rate": 2.3984781397689283e-05, "loss": 0.0901, "step": 17883 }, { "epoch": 0.31671736021641056, "grad_norm": 0.8443483710289001, "learning_rate": 2.398409243741785e-05, "loss": 0.09, "step": 17884 }, { "epoch": 0.31673506975343896, "grad_norm": 0.8560139536857605, "learning_rate": 2.3983403447589658e-05, "loss": 0.0727, "step": 17885 }, { "epoch": 0.3167527792904674, "grad_norm": 0.8402602672576904, "learning_rate": 2.3982714428206974e-05, "loss": 0.0906, "step": 17886 }, { "epoch": 0.3167704888274958, "grad_norm": 0.7965186834335327, "learning_rate": 2.398202537927207e-05, "loss": 0.1072, "step": 17887 }, { "epoch": 0.31678819836452426, "grad_norm": 0.8069609999656677, "learning_rate": 2.3981336300787216e-05, "loss": 0.0981, "step": 17888 }, { "epoch": 0.31680590790155266, "grad_norm": 0.47695258259773254, "learning_rate": 2.398064719275467e-05, "loss": 0.0828, "step": 17889 }, { "epoch": 0.3168236174385811, "grad_norm": 1.0758981704711914, "learning_rate": 2.39799580551767e-05, "loss": 0.0893, "step": 17890 }, { "epoch": 0.3168413269756095, "grad_norm": 0.88093501329422, "learning_rate": 2.3979268888055578e-05, "loss": 0.106, "step": 17891 }, { "epoch": 0.31685903651263797, "grad_norm": 0.9045925736427307, "learning_rate": 2.397857969139357e-05, "loss": 0.0851, "step": 17892 }, { "epoch": 0.31687674604966637, "grad_norm": 0.7349390983581543, "learning_rate": 2.3977890465192947e-05, "loss": 0.0912, "step": 17893 }, { "epoch": 0.3168944555866948, "grad_norm": 0.7351741790771484, "learning_rate": 2.397720120945597e-05, "loss": 0.0747, "step": 17894 }, { "epoch": 0.3169121651237233, "grad_norm": 0.7002102732658386, "learning_rate": 2.3976511924184906e-05, "loss": 0.0845, "step": 17895 }, { "epoch": 0.3169298746607517, "grad_norm": 0.8125101327896118, "learning_rate": 2.397582260938203e-05, "loss": 0.1264, "step": 17896 }, { "epoch": 0.31694758419778013, "grad_norm": 0.6934695243835449, "learning_rate": 2.3975133265049607e-05, "loss": 0.0734, "step": 17897 }, { "epoch": 0.31696529373480853, "grad_norm": 0.9400862455368042, "learning_rate": 2.3974443891189896e-05, "loss": 0.1037, "step": 17898 }, { "epoch": 0.316983003271837, "grad_norm": 0.9697181582450867, "learning_rate": 2.3973754487805178e-05, "loss": 0.0948, "step": 17899 }, { "epoch": 0.3170007128088654, "grad_norm": 0.5525569319725037, "learning_rate": 2.3973065054897713e-05, "loss": 0.081, "step": 17900 }, { "epoch": 0.31701842234589384, "grad_norm": 0.7898280620574951, "learning_rate": 2.397237559246977e-05, "loss": 0.0835, "step": 17901 }, { "epoch": 0.31703613188292223, "grad_norm": 0.7563307881355286, "learning_rate": 2.397168610052362e-05, "loss": 0.1154, "step": 17902 }, { "epoch": 0.3170538414199507, "grad_norm": 0.7010406851768494, "learning_rate": 2.3970996579061532e-05, "loss": 0.0861, "step": 17903 }, { "epoch": 0.3170715509569791, "grad_norm": 0.9301489591598511, "learning_rate": 2.3970307028085775e-05, "loss": 0.0858, "step": 17904 }, { "epoch": 0.31708926049400754, "grad_norm": 0.5192678570747375, "learning_rate": 2.3969617447598608e-05, "loss": 0.0746, "step": 17905 }, { "epoch": 0.31710697003103594, "grad_norm": 0.6285043954849243, "learning_rate": 2.3968927837602314e-05, "loss": 0.0762, "step": 17906 }, { "epoch": 0.3171246795680644, "grad_norm": 0.6068471670150757, "learning_rate": 2.3968238198099152e-05, "loss": 0.089, "step": 17907 }, { "epoch": 0.31714238910509285, "grad_norm": 0.546072244644165, "learning_rate": 2.396754852909139e-05, "loss": 0.0685, "step": 17908 }, { "epoch": 0.31716009864212125, "grad_norm": 0.6294676661491394, "learning_rate": 2.3966858830581305e-05, "loss": 0.0615, "step": 17909 }, { "epoch": 0.3171778081791497, "grad_norm": 0.7097377181053162, "learning_rate": 2.396616910257116e-05, "loss": 0.0999, "step": 17910 }, { "epoch": 0.3171955177161781, "grad_norm": 0.8226660490036011, "learning_rate": 2.396547934506322e-05, "loss": 0.0814, "step": 17911 }, { "epoch": 0.31721322725320655, "grad_norm": 0.6006644368171692, "learning_rate": 2.3964789558059764e-05, "loss": 0.1083, "step": 17912 }, { "epoch": 0.31723093679023495, "grad_norm": 0.6933265924453735, "learning_rate": 2.396409974156306e-05, "loss": 0.0768, "step": 17913 }, { "epoch": 0.3172486463272634, "grad_norm": 0.5263631343841553, "learning_rate": 2.3963409895575375e-05, "loss": 0.0774, "step": 17914 }, { "epoch": 0.3172663558642918, "grad_norm": 0.6884946823120117, "learning_rate": 2.3962720020098975e-05, "loss": 0.0712, "step": 17915 }, { "epoch": 0.31728406540132026, "grad_norm": 0.670844316482544, "learning_rate": 2.3962030115136132e-05, "loss": 0.0808, "step": 17916 }, { "epoch": 0.31730177493834866, "grad_norm": 0.6937853693962097, "learning_rate": 2.396134018068912e-05, "loss": 0.0803, "step": 17917 }, { "epoch": 0.3173194844753771, "grad_norm": 1.0460386276245117, "learning_rate": 2.39606502167602e-05, "loss": 0.1018, "step": 17918 }, { "epoch": 0.3173371940124055, "grad_norm": 0.8261459469795227, "learning_rate": 2.395996022335165e-05, "loss": 0.085, "step": 17919 }, { "epoch": 0.31735490354943396, "grad_norm": 0.5910127758979797, "learning_rate": 2.3959270200465733e-05, "loss": 0.0654, "step": 17920 }, { "epoch": 0.31737261308646236, "grad_norm": 0.9390368461608887, "learning_rate": 2.395858014810473e-05, "loss": 0.1342, "step": 17921 }, { "epoch": 0.3173903226234908, "grad_norm": 0.8695755004882812, "learning_rate": 2.39578900662709e-05, "loss": 0.0955, "step": 17922 }, { "epoch": 0.31740803216051927, "grad_norm": 0.93021559715271, "learning_rate": 2.395719995496652e-05, "loss": 0.1012, "step": 17923 }, { "epoch": 0.31742574169754767, "grad_norm": 0.5421015620231628, "learning_rate": 2.3956509814193857e-05, "loss": 0.1131, "step": 17924 }, { "epoch": 0.3174434512345761, "grad_norm": 0.8740265369415283, "learning_rate": 2.3955819643955183e-05, "loss": 0.0772, "step": 17925 }, { "epoch": 0.3174611607716045, "grad_norm": 0.8941827416419983, "learning_rate": 2.395512944425277e-05, "loss": 0.0978, "step": 17926 }, { "epoch": 0.317478870308633, "grad_norm": 0.7722252607345581, "learning_rate": 2.3954439215088884e-05, "loss": 0.0744, "step": 17927 }, { "epoch": 0.3174965798456614, "grad_norm": 0.9939371943473816, "learning_rate": 2.39537489564658e-05, "loss": 0.1098, "step": 17928 }, { "epoch": 0.31751428938268983, "grad_norm": 0.4558134973049164, "learning_rate": 2.3953058668385785e-05, "loss": 0.0548, "step": 17929 }, { "epoch": 0.31753199891971823, "grad_norm": 0.618017315864563, "learning_rate": 2.3952368350851113e-05, "loss": 0.0667, "step": 17930 }, { "epoch": 0.3175497084567467, "grad_norm": 0.8502557277679443, "learning_rate": 2.395167800386406e-05, "loss": 0.0813, "step": 17931 }, { "epoch": 0.3175674179937751, "grad_norm": 1.000511646270752, "learning_rate": 2.395098762742689e-05, "loss": 0.1182, "step": 17932 }, { "epoch": 0.31758512753080353, "grad_norm": 0.7499309182167053, "learning_rate": 2.3950297221541872e-05, "loss": 0.114, "step": 17933 }, { "epoch": 0.31760283706783193, "grad_norm": 0.7918338775634766, "learning_rate": 2.394960678621128e-05, "loss": 0.1019, "step": 17934 }, { "epoch": 0.3176205466048604, "grad_norm": 0.7757755517959595, "learning_rate": 2.3948916321437393e-05, "loss": 0.1141, "step": 17935 }, { "epoch": 0.3176382561418888, "grad_norm": 0.5334535241127014, "learning_rate": 2.3948225827222477e-05, "loss": 0.0916, "step": 17936 }, { "epoch": 0.31765596567891724, "grad_norm": 0.7691330909729004, "learning_rate": 2.3947535303568795e-05, "loss": 0.0844, "step": 17937 }, { "epoch": 0.3176736752159457, "grad_norm": 0.7751168012619019, "learning_rate": 2.394684475047863e-05, "loss": 0.0973, "step": 17938 }, { "epoch": 0.3176913847529741, "grad_norm": 0.8251352310180664, "learning_rate": 2.3946154167954255e-05, "loss": 0.1356, "step": 17939 }, { "epoch": 0.31770909429000255, "grad_norm": 0.6771417260169983, "learning_rate": 2.3945463555997934e-05, "loss": 0.0809, "step": 17940 }, { "epoch": 0.31772680382703095, "grad_norm": 0.5996336340904236, "learning_rate": 2.394477291461194e-05, "loss": 0.1181, "step": 17941 }, { "epoch": 0.3177445133640594, "grad_norm": 1.1133767366409302, "learning_rate": 2.3944082243798553e-05, "loss": 0.0888, "step": 17942 }, { "epoch": 0.3177622229010878, "grad_norm": 0.49062690138816833, "learning_rate": 2.3943391543560043e-05, "loss": 0.0583, "step": 17943 }, { "epoch": 0.31777993243811625, "grad_norm": 1.2369945049285889, "learning_rate": 2.3942700813898675e-05, "loss": 0.1177, "step": 17944 }, { "epoch": 0.31779764197514465, "grad_norm": 0.5170107483863831, "learning_rate": 2.3942010054816723e-05, "loss": 0.0911, "step": 17945 }, { "epoch": 0.3178153515121731, "grad_norm": 0.8206095099449158, "learning_rate": 2.3941319266316465e-05, "loss": 0.1268, "step": 17946 }, { "epoch": 0.3178330610492015, "grad_norm": 0.4882974624633789, "learning_rate": 2.3940628448400176e-05, "loss": 0.0877, "step": 17947 }, { "epoch": 0.31785077058622996, "grad_norm": 0.9170410633087158, "learning_rate": 2.3939937601070113e-05, "loss": 0.0979, "step": 17948 }, { "epoch": 0.31786848012325836, "grad_norm": 0.7101050019264221, "learning_rate": 2.3939246724328568e-05, "loss": 0.1094, "step": 17949 }, { "epoch": 0.3178861896602868, "grad_norm": 0.6457890272140503, "learning_rate": 2.3938555818177806e-05, "loss": 0.1, "step": 17950 }, { "epoch": 0.3179038991973152, "grad_norm": 0.8879213333129883, "learning_rate": 2.393786488262009e-05, "loss": 0.0984, "step": 17951 }, { "epoch": 0.31792160873434366, "grad_norm": 0.6876395344734192, "learning_rate": 2.3937173917657712e-05, "loss": 0.0708, "step": 17952 }, { "epoch": 0.3179393182713721, "grad_norm": 0.6125332117080688, "learning_rate": 2.393648292329293e-05, "loss": 0.1119, "step": 17953 }, { "epoch": 0.3179570278084005, "grad_norm": 0.5723414421081543, "learning_rate": 2.393579189952803e-05, "loss": 0.0906, "step": 17954 }, { "epoch": 0.31797473734542897, "grad_norm": 0.49999895691871643, "learning_rate": 2.3935100846365272e-05, "loss": 0.0789, "step": 17955 }, { "epoch": 0.31799244688245737, "grad_norm": 0.6988149285316467, "learning_rate": 2.393440976380694e-05, "loss": 0.1083, "step": 17956 }, { "epoch": 0.3180101564194858, "grad_norm": 0.757605791091919, "learning_rate": 2.3933718651855296e-05, "loss": 0.1096, "step": 17957 }, { "epoch": 0.3180278659565142, "grad_norm": 0.8688122630119324, "learning_rate": 2.3933027510512632e-05, "loss": 0.0624, "step": 17958 }, { "epoch": 0.3180455754935427, "grad_norm": 0.6238734126091003, "learning_rate": 2.3932336339781202e-05, "loss": 0.0921, "step": 17959 }, { "epoch": 0.3180632850305711, "grad_norm": 0.5661106109619141, "learning_rate": 2.3931645139663295e-05, "loss": 0.0686, "step": 17960 }, { "epoch": 0.31808099456759953, "grad_norm": 0.793552041053772, "learning_rate": 2.3930953910161178e-05, "loss": 0.1159, "step": 17961 }, { "epoch": 0.3180987041046279, "grad_norm": 0.811790406703949, "learning_rate": 2.393026265127712e-05, "loss": 0.0916, "step": 17962 }, { "epoch": 0.3181164136416564, "grad_norm": 0.9420979619026184, "learning_rate": 2.3929571363013408e-05, "loss": 0.0861, "step": 17963 }, { "epoch": 0.3181341231786848, "grad_norm": 0.8748754262924194, "learning_rate": 2.3928880045372305e-05, "loss": 0.0824, "step": 17964 }, { "epoch": 0.31815183271571323, "grad_norm": 0.769127607345581, "learning_rate": 2.3928188698356093e-05, "loss": 0.1115, "step": 17965 }, { "epoch": 0.31816954225274163, "grad_norm": 0.825448215007782, "learning_rate": 2.3927497321967045e-05, "loss": 0.0758, "step": 17966 }, { "epoch": 0.3181872517897701, "grad_norm": 0.9653245210647583, "learning_rate": 2.3926805916207427e-05, "loss": 0.0806, "step": 17967 }, { "epoch": 0.31820496132679854, "grad_norm": 0.37482425570487976, "learning_rate": 2.3926114481079528e-05, "loss": 0.0881, "step": 17968 }, { "epoch": 0.31822267086382694, "grad_norm": 0.8096804022789001, "learning_rate": 2.3925423016585606e-05, "loss": 0.1301, "step": 17969 }, { "epoch": 0.3182403804008554, "grad_norm": 0.8643711805343628, "learning_rate": 2.392473152272795e-05, "loss": 0.0582, "step": 17970 }, { "epoch": 0.3182580899378838, "grad_norm": 0.6011104583740234, "learning_rate": 2.392403999950883e-05, "loss": 0.0874, "step": 17971 }, { "epoch": 0.31827579947491225, "grad_norm": 0.6612188816070557, "learning_rate": 2.3923348446930527e-05, "loss": 0.068, "step": 17972 }, { "epoch": 0.31829350901194065, "grad_norm": 0.7939309477806091, "learning_rate": 2.39226568649953e-05, "loss": 0.0812, "step": 17973 }, { "epoch": 0.3183112185489691, "grad_norm": 1.062852144241333, "learning_rate": 2.392196525370544e-05, "loss": 0.0961, "step": 17974 }, { "epoch": 0.3183289280859975, "grad_norm": 0.7755217552185059, "learning_rate": 2.3921273613063216e-05, "loss": 0.0722, "step": 17975 }, { "epoch": 0.31834663762302595, "grad_norm": 0.8067899346351624, "learning_rate": 2.3920581943070902e-05, "loss": 0.0978, "step": 17976 }, { "epoch": 0.31836434716005435, "grad_norm": 0.8621484637260437, "learning_rate": 2.3919890243730777e-05, "loss": 0.0779, "step": 17977 }, { "epoch": 0.3183820566970828, "grad_norm": 0.7651400566101074, "learning_rate": 2.3919198515045117e-05, "loss": 0.104, "step": 17978 }, { "epoch": 0.3183997662341112, "grad_norm": 0.4565042555332184, "learning_rate": 2.3918506757016196e-05, "loss": 0.1077, "step": 17979 }, { "epoch": 0.31841747577113966, "grad_norm": 0.4744095206260681, "learning_rate": 2.3917814969646284e-05, "loss": 0.0637, "step": 17980 }, { "epoch": 0.31843518530816806, "grad_norm": 0.8381475210189819, "learning_rate": 2.391712315293767e-05, "loss": 0.1361, "step": 17981 }, { "epoch": 0.3184528948451965, "grad_norm": 0.9621347784996033, "learning_rate": 2.3916431306892622e-05, "loss": 0.1011, "step": 17982 }, { "epoch": 0.31847060438222496, "grad_norm": 0.9725800156593323, "learning_rate": 2.391573943151341e-05, "loss": 0.0574, "step": 17983 }, { "epoch": 0.31848831391925336, "grad_norm": 0.9453374743461609, "learning_rate": 2.3915047526802325e-05, "loss": 0.0964, "step": 17984 }, { "epoch": 0.3185060234562818, "grad_norm": 0.786435067653656, "learning_rate": 2.3914355592761635e-05, "loss": 0.0964, "step": 17985 }, { "epoch": 0.3185237329933102, "grad_norm": 0.5538254976272583, "learning_rate": 2.3913663629393608e-05, "loss": 0.0884, "step": 17986 }, { "epoch": 0.31854144253033867, "grad_norm": 0.6549452543258667, "learning_rate": 2.3912971636700535e-05, "loss": 0.0646, "step": 17987 }, { "epoch": 0.31855915206736707, "grad_norm": 0.8364225029945374, "learning_rate": 2.3912279614684692e-05, "loss": 0.099, "step": 17988 }, { "epoch": 0.3185768616043955, "grad_norm": 1.3531852960586548, "learning_rate": 2.391158756334834e-05, "loss": 0.1099, "step": 17989 }, { "epoch": 0.3185945711414239, "grad_norm": 0.9662426710128784, "learning_rate": 2.3910895482693775e-05, "loss": 0.0948, "step": 17990 }, { "epoch": 0.3186122806784524, "grad_norm": 0.5230695605278015, "learning_rate": 2.3910203372723264e-05, "loss": 0.0533, "step": 17991 }, { "epoch": 0.3186299902154808, "grad_norm": 0.7448049783706665, "learning_rate": 2.390951123343908e-05, "loss": 0.0845, "step": 17992 }, { "epoch": 0.31864769975250923, "grad_norm": 0.8708814978599548, "learning_rate": 2.390881906484351e-05, "loss": 0.1082, "step": 17993 }, { "epoch": 0.3186654092895376, "grad_norm": 0.8135493993759155, "learning_rate": 2.3908126866938825e-05, "loss": 0.0873, "step": 17994 }, { "epoch": 0.3186831188265661, "grad_norm": 0.5317161083221436, "learning_rate": 2.3907434639727302e-05, "loss": 0.0682, "step": 17995 }, { "epoch": 0.3187008283635945, "grad_norm": 0.7475335001945496, "learning_rate": 2.390674238321122e-05, "loss": 0.1249, "step": 17996 }, { "epoch": 0.31871853790062293, "grad_norm": 0.793483555316925, "learning_rate": 2.390605009739286e-05, "loss": 0.1143, "step": 17997 }, { "epoch": 0.3187362474376514, "grad_norm": 0.8682699203491211, "learning_rate": 2.3905357782274492e-05, "loss": 0.0849, "step": 17998 }, { "epoch": 0.3187539569746798, "grad_norm": 0.7860563397407532, "learning_rate": 2.39046654378584e-05, "loss": 0.0816, "step": 17999 }, { "epoch": 0.31877166651170824, "grad_norm": 0.9258478879928589, "learning_rate": 2.390397306414686e-05, "loss": 0.0928, "step": 18000 }, { "epoch": 0.31878937604873664, "grad_norm": 0.7712126970291138, "learning_rate": 2.3903280661142145e-05, "loss": 0.1017, "step": 18001 }, { "epoch": 0.3188070855857651, "grad_norm": 0.7886852622032166, "learning_rate": 2.3902588228846538e-05, "loss": 0.1097, "step": 18002 }, { "epoch": 0.3188247951227935, "grad_norm": 0.5606104135513306, "learning_rate": 2.3901895767262317e-05, "loss": 0.0782, "step": 18003 }, { "epoch": 0.31884250465982195, "grad_norm": 0.6975069046020508, "learning_rate": 2.3901203276391762e-05, "loss": 0.0585, "step": 18004 }, { "epoch": 0.31886021419685034, "grad_norm": 0.9384016990661621, "learning_rate": 2.3900510756237148e-05, "loss": 0.0787, "step": 18005 }, { "epoch": 0.3188779237338788, "grad_norm": 1.157551884651184, "learning_rate": 2.3899818206800753e-05, "loss": 0.0961, "step": 18006 }, { "epoch": 0.3188956332709072, "grad_norm": 0.814210057258606, "learning_rate": 2.3899125628084856e-05, "loss": 0.0645, "step": 18007 }, { "epoch": 0.31891334280793565, "grad_norm": 0.6536222100257874, "learning_rate": 2.3898433020091735e-05, "loss": 0.101, "step": 18008 }, { "epoch": 0.31893105234496405, "grad_norm": 1.0804928541183472, "learning_rate": 2.389774038282367e-05, "loss": 0.0746, "step": 18009 }, { "epoch": 0.3189487618819925, "grad_norm": 0.6094516515731812, "learning_rate": 2.389704771628294e-05, "loss": 0.0741, "step": 18010 }, { "epoch": 0.3189664714190209, "grad_norm": 0.5858116745948792, "learning_rate": 2.389635502047182e-05, "loss": 0.0533, "step": 18011 }, { "epoch": 0.31898418095604936, "grad_norm": 0.6210225820541382, "learning_rate": 2.3895662295392595e-05, "loss": 0.0769, "step": 18012 }, { "epoch": 0.3190018904930778, "grad_norm": 0.493021160364151, "learning_rate": 2.389496954104754e-05, "loss": 0.1055, "step": 18013 }, { "epoch": 0.3190196000301062, "grad_norm": 0.7134986519813538, "learning_rate": 2.3894276757438932e-05, "loss": 0.1131, "step": 18014 }, { "epoch": 0.31903730956713466, "grad_norm": 0.7672281861305237, "learning_rate": 2.389358394456906e-05, "loss": 0.067, "step": 18015 }, { "epoch": 0.31905501910416306, "grad_norm": 0.9994046092033386, "learning_rate": 2.389289110244019e-05, "loss": 0.0936, "step": 18016 }, { "epoch": 0.3190727286411915, "grad_norm": 0.8028931021690369, "learning_rate": 2.389219823105461e-05, "loss": 0.128, "step": 18017 }, { "epoch": 0.3190904381782199, "grad_norm": 0.874661386013031, "learning_rate": 2.3891505330414597e-05, "loss": 0.0623, "step": 18018 }, { "epoch": 0.31910814771524837, "grad_norm": 0.3934500813484192, "learning_rate": 2.3890812400522427e-05, "loss": 0.0574, "step": 18019 }, { "epoch": 0.31912585725227677, "grad_norm": 0.7581154108047485, "learning_rate": 2.389011944138039e-05, "loss": 0.106, "step": 18020 }, { "epoch": 0.3191435667893052, "grad_norm": 0.8759164214134216, "learning_rate": 2.3889426452990753e-05, "loss": 0.081, "step": 18021 }, { "epoch": 0.3191612763263336, "grad_norm": 1.093676209449768, "learning_rate": 2.388873343535581e-05, "loss": 0.1021, "step": 18022 }, { "epoch": 0.3191789858633621, "grad_norm": 0.5778789520263672, "learning_rate": 2.388804038847783e-05, "loss": 0.0921, "step": 18023 }, { "epoch": 0.3191966954003905, "grad_norm": 0.648852527141571, "learning_rate": 2.3887347312359092e-05, "loss": 0.0772, "step": 18024 }, { "epoch": 0.3192144049374189, "grad_norm": 0.9335159063339233, "learning_rate": 2.3886654207001886e-05, "loss": 0.1048, "step": 18025 }, { "epoch": 0.3192321144744473, "grad_norm": 0.7616809010505676, "learning_rate": 2.3885961072408477e-05, "loss": 0.0812, "step": 18026 }, { "epoch": 0.3192498240114758, "grad_norm": 0.7381367683410645, "learning_rate": 2.3885267908581163e-05, "loss": 0.0738, "step": 18027 }, { "epoch": 0.31926753354850423, "grad_norm": 0.756152331829071, "learning_rate": 2.3884574715522213e-05, "loss": 0.0687, "step": 18028 }, { "epoch": 0.31928524308553263, "grad_norm": 0.9870827794075012, "learning_rate": 2.3883881493233914e-05, "loss": 0.0987, "step": 18029 }, { "epoch": 0.3193029526225611, "grad_norm": 0.7550113201141357, "learning_rate": 2.388318824171854e-05, "loss": 0.1027, "step": 18030 }, { "epoch": 0.3193206621595895, "grad_norm": 0.6343215703964233, "learning_rate": 2.3882494960978378e-05, "loss": 0.0916, "step": 18031 }, { "epoch": 0.31933837169661794, "grad_norm": 0.5573999285697937, "learning_rate": 2.3881801651015708e-05, "loss": 0.0974, "step": 18032 }, { "epoch": 0.31935608123364634, "grad_norm": 0.6463264226913452, "learning_rate": 2.3881108311832804e-05, "loss": 0.1076, "step": 18033 }, { "epoch": 0.3193737907706748, "grad_norm": 0.7910917401313782, "learning_rate": 2.3880414943431955e-05, "loss": 0.0989, "step": 18034 }, { "epoch": 0.3193915003077032, "grad_norm": 0.6469417214393616, "learning_rate": 2.387972154581544e-05, "loss": 0.0808, "step": 18035 }, { "epoch": 0.31940920984473165, "grad_norm": 0.5066125392913818, "learning_rate": 2.3879028118985537e-05, "loss": 0.0636, "step": 18036 }, { "epoch": 0.31942691938176004, "grad_norm": 1.3726619482040405, "learning_rate": 2.3878334662944527e-05, "loss": 0.0896, "step": 18037 }, { "epoch": 0.3194446289187885, "grad_norm": 0.6564796566963196, "learning_rate": 2.3877641177694694e-05, "loss": 0.08, "step": 18038 }, { "epoch": 0.3194623384558169, "grad_norm": 0.8610712289810181, "learning_rate": 2.3876947663238324e-05, "loss": 0.0944, "step": 18039 }, { "epoch": 0.31948004799284535, "grad_norm": 0.711367130279541, "learning_rate": 2.387625411957769e-05, "loss": 0.1021, "step": 18040 }, { "epoch": 0.31949775752987375, "grad_norm": 0.7144147753715515, "learning_rate": 2.387556054671508e-05, "loss": 0.0658, "step": 18041 }, { "epoch": 0.3195154670669022, "grad_norm": 1.0364255905151367, "learning_rate": 2.3874866944652777e-05, "loss": 0.0771, "step": 18042 }, { "epoch": 0.31953317660393066, "grad_norm": 1.0195109844207764, "learning_rate": 2.387417331339305e-05, "loss": 0.0615, "step": 18043 }, { "epoch": 0.31955088614095906, "grad_norm": 1.1189253330230713, "learning_rate": 2.3873479652938197e-05, "loss": 0.1551, "step": 18044 }, { "epoch": 0.3195685956779875, "grad_norm": 0.9776757955551147, "learning_rate": 2.3872785963290495e-05, "loss": 0.1019, "step": 18045 }, { "epoch": 0.3195863052150159, "grad_norm": 1.1580021381378174, "learning_rate": 2.387209224445222e-05, "loss": 0.1125, "step": 18046 }, { "epoch": 0.31960401475204436, "grad_norm": 0.8965001106262207, "learning_rate": 2.387139849642566e-05, "loss": 0.1037, "step": 18047 }, { "epoch": 0.31962172428907276, "grad_norm": 1.167611837387085, "learning_rate": 2.38707047192131e-05, "loss": 0.123, "step": 18048 }, { "epoch": 0.3196394338261012, "grad_norm": 0.7342422604560852, "learning_rate": 2.3870010912816813e-05, "loss": 0.0766, "step": 18049 }, { "epoch": 0.3196571433631296, "grad_norm": 0.8136693239212036, "learning_rate": 2.386931707723909e-05, "loss": 0.0574, "step": 18050 }, { "epoch": 0.31967485290015807, "grad_norm": 0.7631605267524719, "learning_rate": 2.386862321248221e-05, "loss": 0.0987, "step": 18051 }, { "epoch": 0.31969256243718647, "grad_norm": 0.9245545864105225, "learning_rate": 2.386792931854846e-05, "loss": 0.0901, "step": 18052 }, { "epoch": 0.3197102719742149, "grad_norm": 1.4464805126190186, "learning_rate": 2.3867235395440116e-05, "loss": 0.1161, "step": 18053 }, { "epoch": 0.3197279815112433, "grad_norm": 0.5814875960350037, "learning_rate": 2.3866541443159464e-05, "loss": 0.0931, "step": 18054 }, { "epoch": 0.3197456910482718, "grad_norm": 0.5952154994010925, "learning_rate": 2.3865847461708792e-05, "loss": 0.0826, "step": 18055 }, { "epoch": 0.3197634005853002, "grad_norm": 1.041515588760376, "learning_rate": 2.3865153451090372e-05, "loss": 0.0968, "step": 18056 }, { "epoch": 0.3197811101223286, "grad_norm": 0.7708975076675415, "learning_rate": 2.38644594113065e-05, "loss": 0.1073, "step": 18057 }, { "epoch": 0.3197988196593571, "grad_norm": 0.7492523193359375, "learning_rate": 2.386376534235945e-05, "loss": 0.0722, "step": 18058 }, { "epoch": 0.3198165291963855, "grad_norm": 1.1934019327163696, "learning_rate": 2.3863071244251508e-05, "loss": 0.0874, "step": 18059 }, { "epoch": 0.31983423873341393, "grad_norm": 1.013540506362915, "learning_rate": 2.3862377116984958e-05, "loss": 0.1341, "step": 18060 }, { "epoch": 0.31985194827044233, "grad_norm": 0.9216323494911194, "learning_rate": 2.3861682960562087e-05, "loss": 0.1006, "step": 18061 }, { "epoch": 0.3198696578074708, "grad_norm": 0.5494329333305359, "learning_rate": 2.3860988774985173e-05, "loss": 0.0976, "step": 18062 }, { "epoch": 0.3198873673444992, "grad_norm": 0.5365607142448425, "learning_rate": 2.3860294560256502e-05, "loss": 0.0493, "step": 18063 }, { "epoch": 0.31990507688152764, "grad_norm": 0.9746430516242981, "learning_rate": 2.3859600316378357e-05, "loss": 0.1395, "step": 18064 }, { "epoch": 0.31992278641855604, "grad_norm": 0.8180801272392273, "learning_rate": 2.3858906043353023e-05, "loss": 0.0762, "step": 18065 }, { "epoch": 0.3199404959555845, "grad_norm": 1.2103983163833618, "learning_rate": 2.3858211741182785e-05, "loss": 0.137, "step": 18066 }, { "epoch": 0.3199582054926129, "grad_norm": 1.1143360137939453, "learning_rate": 2.3857517409869926e-05, "loss": 0.0943, "step": 18067 }, { "epoch": 0.31997591502964134, "grad_norm": 1.9035080671310425, "learning_rate": 2.3856823049416734e-05, "loss": 0.0919, "step": 18068 }, { "epoch": 0.31999362456666974, "grad_norm": 0.743412435054779, "learning_rate": 2.385612865982549e-05, "loss": 0.0701, "step": 18069 }, { "epoch": 0.3200113341036982, "grad_norm": 0.7954809069633484, "learning_rate": 2.3855434241098474e-05, "loss": 0.1051, "step": 18070 }, { "epoch": 0.3200290436407266, "grad_norm": 0.8199231624603271, "learning_rate": 2.3854739793237973e-05, "loss": 0.1209, "step": 18071 }, { "epoch": 0.32004675317775505, "grad_norm": 0.5500145554542542, "learning_rate": 2.385404531624628e-05, "loss": 0.0938, "step": 18072 }, { "epoch": 0.3200644627147835, "grad_norm": 0.7039259672164917, "learning_rate": 2.3853350810125673e-05, "loss": 0.0756, "step": 18073 }, { "epoch": 0.3200821722518119, "grad_norm": 0.7792214155197144, "learning_rate": 2.3852656274878433e-05, "loss": 0.083, "step": 18074 }, { "epoch": 0.32009988178884036, "grad_norm": 0.44712379574775696, "learning_rate": 2.3851961710506853e-05, "loss": 0.0765, "step": 18075 }, { "epoch": 0.32011759132586876, "grad_norm": 0.867702066898346, "learning_rate": 2.3851267117013214e-05, "loss": 0.1419, "step": 18076 }, { "epoch": 0.3201353008628972, "grad_norm": 0.8359876871109009, "learning_rate": 2.3850572494399797e-05, "loss": 0.0805, "step": 18077 }, { "epoch": 0.3201530103999256, "grad_norm": 0.6995319724082947, "learning_rate": 2.3849877842668895e-05, "loss": 0.0941, "step": 18078 }, { "epoch": 0.32017071993695406, "grad_norm": 1.0025851726531982, "learning_rate": 2.384918316182279e-05, "loss": 0.0978, "step": 18079 }, { "epoch": 0.32018842947398246, "grad_norm": 1.224540114402771, "learning_rate": 2.3848488451863767e-05, "loss": 0.1165, "step": 18080 }, { "epoch": 0.3202061390110109, "grad_norm": 0.9272605776786804, "learning_rate": 2.3847793712794112e-05, "loss": 0.0632, "step": 18081 }, { "epoch": 0.3202238485480393, "grad_norm": 0.9214483499526978, "learning_rate": 2.3847098944616112e-05, "loss": 0.0783, "step": 18082 }, { "epoch": 0.32024155808506777, "grad_norm": 0.9478376507759094, "learning_rate": 2.384640414733205e-05, "loss": 0.0962, "step": 18083 }, { "epoch": 0.32025926762209617, "grad_norm": 0.9486128091812134, "learning_rate": 2.3845709320944215e-05, "loss": 0.1039, "step": 18084 }, { "epoch": 0.3202769771591246, "grad_norm": 0.9357857704162598, "learning_rate": 2.3845014465454885e-05, "loss": 0.094, "step": 18085 }, { "epoch": 0.320294686696153, "grad_norm": 0.5388160347938538, "learning_rate": 2.384431958086636e-05, "loss": 0.0892, "step": 18086 }, { "epoch": 0.3203123962331815, "grad_norm": 0.6892699003219604, "learning_rate": 2.3843624667180916e-05, "loss": 0.0572, "step": 18087 }, { "epoch": 0.3203301057702099, "grad_norm": 0.4892851710319519, "learning_rate": 2.3842929724400836e-05, "loss": 0.0853, "step": 18088 }, { "epoch": 0.3203478153072383, "grad_norm": 0.6614168882369995, "learning_rate": 2.3842234752528417e-05, "loss": 0.0717, "step": 18089 }, { "epoch": 0.3203655248442668, "grad_norm": 0.8238881230354309, "learning_rate": 2.384153975156594e-05, "loss": 0.0709, "step": 18090 }, { "epoch": 0.3203832343812952, "grad_norm": 0.5998144149780273, "learning_rate": 2.384084472151569e-05, "loss": 0.0713, "step": 18091 }, { "epoch": 0.32040094391832363, "grad_norm": 0.6607643365859985, "learning_rate": 2.384014966237995e-05, "loss": 0.0946, "step": 18092 }, { "epoch": 0.32041865345535203, "grad_norm": 1.2180237770080566, "learning_rate": 2.383945457416102e-05, "loss": 0.0983, "step": 18093 }, { "epoch": 0.3204363629923805, "grad_norm": 1.7271113395690918, "learning_rate": 2.3838759456861177e-05, "loss": 0.1326, "step": 18094 }, { "epoch": 0.3204540725294089, "grad_norm": 0.6188728213310242, "learning_rate": 2.3838064310482705e-05, "loss": 0.1024, "step": 18095 }, { "epoch": 0.32047178206643734, "grad_norm": 0.6871799826622009, "learning_rate": 2.38373691350279e-05, "loss": 0.0927, "step": 18096 }, { "epoch": 0.32048949160346574, "grad_norm": 0.69230717420578, "learning_rate": 2.383667393049904e-05, "loss": 0.0753, "step": 18097 }, { "epoch": 0.3205072011404942, "grad_norm": 0.7350403666496277, "learning_rate": 2.383597869689842e-05, "loss": 0.1101, "step": 18098 }, { "epoch": 0.3205249106775226, "grad_norm": 1.0359019041061401, "learning_rate": 2.3835283434228322e-05, "loss": 0.1145, "step": 18099 }, { "epoch": 0.32054262021455104, "grad_norm": 0.5309560894966125, "learning_rate": 2.3834588142491038e-05, "loss": 0.0648, "step": 18100 }, { "epoch": 0.32056032975157944, "grad_norm": 0.7287082076072693, "learning_rate": 2.3833892821688848e-05, "loss": 0.0803, "step": 18101 }, { "epoch": 0.3205780392886079, "grad_norm": 0.5744906067848206, "learning_rate": 2.3833197471824046e-05, "loss": 0.095, "step": 18102 }, { "epoch": 0.32059574882563635, "grad_norm": 0.8600823879241943, "learning_rate": 2.383250209289892e-05, "loss": 0.1085, "step": 18103 }, { "epoch": 0.32061345836266475, "grad_norm": 0.5729118585586548, "learning_rate": 2.3831806684915755e-05, "loss": 0.0937, "step": 18104 }, { "epoch": 0.3206311678996932, "grad_norm": 0.9701547622680664, "learning_rate": 2.383111124787684e-05, "loss": 0.0933, "step": 18105 }, { "epoch": 0.3206488774367216, "grad_norm": 0.8595721125602722, "learning_rate": 2.3830415781784457e-05, "loss": 0.0615, "step": 18106 }, { "epoch": 0.32066658697375006, "grad_norm": 0.9729540348052979, "learning_rate": 2.38297202866409e-05, "loss": 0.1216, "step": 18107 }, { "epoch": 0.32068429651077845, "grad_norm": 0.731144368648529, "learning_rate": 2.3829024762448458e-05, "loss": 0.1054, "step": 18108 }, { "epoch": 0.3207020060478069, "grad_norm": 0.7904123663902283, "learning_rate": 2.382832920920942e-05, "loss": 0.0681, "step": 18109 }, { "epoch": 0.3207197155848353, "grad_norm": 1.036063313484192, "learning_rate": 2.382763362692607e-05, "loss": 0.0938, "step": 18110 }, { "epoch": 0.32073742512186376, "grad_norm": 0.5147719383239746, "learning_rate": 2.3826938015600698e-05, "loss": 0.0778, "step": 18111 }, { "epoch": 0.32075513465889216, "grad_norm": 0.856696605682373, "learning_rate": 2.3826242375235596e-05, "loss": 0.0803, "step": 18112 }, { "epoch": 0.3207728441959206, "grad_norm": 0.6283304691314697, "learning_rate": 2.3825546705833046e-05, "loss": 0.1152, "step": 18113 }, { "epoch": 0.320790553732949, "grad_norm": 1.1249773502349854, "learning_rate": 2.382485100739534e-05, "loss": 0.1013, "step": 18114 }, { "epoch": 0.32080826326997747, "grad_norm": 1.3658937215805054, "learning_rate": 2.3824155279924767e-05, "loss": 0.0849, "step": 18115 }, { "epoch": 0.32082597280700587, "grad_norm": 1.268546462059021, "learning_rate": 2.3823459523423614e-05, "loss": 0.1205, "step": 18116 }, { "epoch": 0.3208436823440343, "grad_norm": 0.7811314463615417, "learning_rate": 2.3822763737894175e-05, "loss": 0.103, "step": 18117 }, { "epoch": 0.3208613918810628, "grad_norm": 0.886299192905426, "learning_rate": 2.382206792333873e-05, "loss": 0.1167, "step": 18118 }, { "epoch": 0.3208791014180912, "grad_norm": 1.1032285690307617, "learning_rate": 2.3821372079759582e-05, "loss": 0.0793, "step": 18119 }, { "epoch": 0.3208968109551196, "grad_norm": 0.5723621249198914, "learning_rate": 2.3820676207159005e-05, "loss": 0.0477, "step": 18120 }, { "epoch": 0.320914520492148, "grad_norm": 0.7676396369934082, "learning_rate": 2.38199803055393e-05, "loss": 0.1253, "step": 18121 }, { "epoch": 0.3209322300291765, "grad_norm": 0.6463615298271179, "learning_rate": 2.381928437490275e-05, "loss": 0.0791, "step": 18122 }, { "epoch": 0.3209499395662049, "grad_norm": 1.1145836114883423, "learning_rate": 2.3818588415251644e-05, "loss": 0.0869, "step": 18123 }, { "epoch": 0.32096764910323333, "grad_norm": 0.7593079805374146, "learning_rate": 2.381789242658828e-05, "loss": 0.078, "step": 18124 }, { "epoch": 0.32098535864026173, "grad_norm": 0.6529179215431213, "learning_rate": 2.381719640891494e-05, "loss": 0.0865, "step": 18125 }, { "epoch": 0.3210030681772902, "grad_norm": 0.5550765991210938, "learning_rate": 2.3816500362233912e-05, "loss": 0.0709, "step": 18126 }, { "epoch": 0.3210207777143186, "grad_norm": 0.9681974053382874, "learning_rate": 2.3815804286547493e-05, "loss": 0.0779, "step": 18127 }, { "epoch": 0.32103848725134704, "grad_norm": 0.5536705851554871, "learning_rate": 2.3815108181857967e-05, "loss": 0.0585, "step": 18128 }, { "epoch": 0.32105619678837544, "grad_norm": 1.1388651132583618, "learning_rate": 2.381441204816763e-05, "loss": 0.108, "step": 18129 }, { "epoch": 0.3210739063254039, "grad_norm": 0.7523413300514221, "learning_rate": 2.3813715885478766e-05, "loss": 0.0884, "step": 18130 }, { "epoch": 0.3210916158624323, "grad_norm": 0.8769616484642029, "learning_rate": 2.3813019693793665e-05, "loss": 0.0858, "step": 18131 }, { "epoch": 0.32110932539946074, "grad_norm": 0.6928542256355286, "learning_rate": 2.3812323473114625e-05, "loss": 0.062, "step": 18132 }, { "epoch": 0.3211270349364892, "grad_norm": 0.7398865818977356, "learning_rate": 2.3811627223443932e-05, "loss": 0.0546, "step": 18133 }, { "epoch": 0.3211447444735176, "grad_norm": 0.5184754729270935, "learning_rate": 2.3810930944783878e-05, "loss": 0.053, "step": 18134 }, { "epoch": 0.32116245401054605, "grad_norm": 0.6253125667572021, "learning_rate": 2.3810234637136754e-05, "loss": 0.0894, "step": 18135 }, { "epoch": 0.32118016354757445, "grad_norm": 0.48238304257392883, "learning_rate": 2.380953830050484e-05, "loss": 0.0704, "step": 18136 }, { "epoch": 0.3211978730846029, "grad_norm": 0.7023029327392578, "learning_rate": 2.3808841934890444e-05, "loss": 0.0633, "step": 18137 }, { "epoch": 0.3212155826216313, "grad_norm": 0.6272969245910645, "learning_rate": 2.380814554029585e-05, "loss": 0.0676, "step": 18138 }, { "epoch": 0.32123329215865976, "grad_norm": 0.7680333256721497, "learning_rate": 2.380744911672334e-05, "loss": 0.0702, "step": 18139 }, { "epoch": 0.32125100169568815, "grad_norm": 0.6628137826919556, "learning_rate": 2.3806752664175223e-05, "loss": 0.0644, "step": 18140 }, { "epoch": 0.3212687112327166, "grad_norm": 1.1063481569290161, "learning_rate": 2.3806056182653773e-05, "loss": 0.1132, "step": 18141 }, { "epoch": 0.321286420769745, "grad_norm": 0.7040275931358337, "learning_rate": 2.3805359672161293e-05, "loss": 0.0937, "step": 18142 }, { "epoch": 0.32130413030677346, "grad_norm": 0.6485496759414673, "learning_rate": 2.380466313270007e-05, "loss": 0.0841, "step": 18143 }, { "epoch": 0.32132183984380186, "grad_norm": 1.240822196006775, "learning_rate": 2.3803966564272392e-05, "loss": 0.1157, "step": 18144 }, { "epoch": 0.3213395493808303, "grad_norm": 0.8869866132736206, "learning_rate": 2.380326996688056e-05, "loss": 0.0861, "step": 18145 }, { "epoch": 0.3213572589178587, "grad_norm": 0.9783560037612915, "learning_rate": 2.3802573340526856e-05, "loss": 0.1125, "step": 18146 }, { "epoch": 0.32137496845488717, "grad_norm": 0.8248237371444702, "learning_rate": 2.3801876685213575e-05, "loss": 0.1153, "step": 18147 }, { "epoch": 0.3213926779919156, "grad_norm": 1.1452064514160156, "learning_rate": 2.3801180000943016e-05, "loss": 0.1407, "step": 18148 }, { "epoch": 0.321410387528944, "grad_norm": 0.647561252117157, "learning_rate": 2.3800483287717462e-05, "loss": 0.0927, "step": 18149 }, { "epoch": 0.3214280970659725, "grad_norm": 0.38089898228645325, "learning_rate": 2.3799786545539204e-05, "loss": 0.0913, "step": 18150 }, { "epoch": 0.32144580660300087, "grad_norm": 0.7954270243644714, "learning_rate": 2.3799089774410542e-05, "loss": 0.0958, "step": 18151 }, { "epoch": 0.3214635161400293, "grad_norm": 0.9837751388549805, "learning_rate": 2.3798392974333763e-05, "loss": 0.1029, "step": 18152 }, { "epoch": 0.3214812256770577, "grad_norm": 0.6909880638122559, "learning_rate": 2.3797696145311165e-05, "loss": 0.0581, "step": 18153 }, { "epoch": 0.3214989352140862, "grad_norm": 1.0479800701141357, "learning_rate": 2.3796999287345028e-05, "loss": 0.1083, "step": 18154 }, { "epoch": 0.3215166447511146, "grad_norm": 0.901674211025238, "learning_rate": 2.379630240043766e-05, "loss": 0.0698, "step": 18155 }, { "epoch": 0.32153435428814303, "grad_norm": 0.8633265495300293, "learning_rate": 2.379560548459135e-05, "loss": 0.0898, "step": 18156 }, { "epoch": 0.32155206382517143, "grad_norm": 0.7547114491462708, "learning_rate": 2.379490853980838e-05, "loss": 0.0923, "step": 18157 }, { "epoch": 0.3215697733621999, "grad_norm": 0.7086543440818787, "learning_rate": 2.379421156609106e-05, "loss": 0.0528, "step": 18158 }, { "epoch": 0.3215874828992283, "grad_norm": 0.8229072690010071, "learning_rate": 2.3793514563441665e-05, "loss": 0.0739, "step": 18159 }, { "epoch": 0.32160519243625674, "grad_norm": 0.5856227278709412, "learning_rate": 2.3792817531862498e-05, "loss": 0.0841, "step": 18160 }, { "epoch": 0.32162290197328514, "grad_norm": 1.1075843572616577, "learning_rate": 2.3792120471355848e-05, "loss": 0.1225, "step": 18161 }, { "epoch": 0.3216406115103136, "grad_norm": 0.7113922834396362, "learning_rate": 2.379142338192402e-05, "loss": 0.0785, "step": 18162 }, { "epoch": 0.32165832104734204, "grad_norm": 0.6305009126663208, "learning_rate": 2.3790726263569287e-05, "loss": 0.0679, "step": 18163 }, { "epoch": 0.32167603058437044, "grad_norm": 0.6557957530021667, "learning_rate": 2.379002911629396e-05, "loss": 0.0811, "step": 18164 }, { "epoch": 0.3216937401213989, "grad_norm": 0.9732286930084229, "learning_rate": 2.3789331940100327e-05, "loss": 0.0907, "step": 18165 }, { "epoch": 0.3217114496584273, "grad_norm": 0.7922136783599854, "learning_rate": 2.378863473499068e-05, "loss": 0.0845, "step": 18166 }, { "epoch": 0.32172915919545575, "grad_norm": 1.0340608358383179, "learning_rate": 2.3787937500967314e-05, "loss": 0.1131, "step": 18167 }, { "epoch": 0.32174686873248415, "grad_norm": 0.4276425540447235, "learning_rate": 2.378724023803252e-05, "loss": 0.0668, "step": 18168 }, { "epoch": 0.3217645782695126, "grad_norm": 0.9202277064323425, "learning_rate": 2.3786542946188594e-05, "loss": 0.0898, "step": 18169 }, { "epoch": 0.321782287806541, "grad_norm": 0.9963957667350769, "learning_rate": 2.378584562543783e-05, "loss": 0.1549, "step": 18170 }, { "epoch": 0.32179999734356945, "grad_norm": 0.657113254070282, "learning_rate": 2.378514827578253e-05, "loss": 0.0888, "step": 18171 }, { "epoch": 0.32181770688059785, "grad_norm": 0.6081112623214722, "learning_rate": 2.3784450897224978e-05, "loss": 0.0852, "step": 18172 }, { "epoch": 0.3218354164176263, "grad_norm": 1.0098445415496826, "learning_rate": 2.3783753489767466e-05, "loss": 0.104, "step": 18173 }, { "epoch": 0.3218531259546547, "grad_norm": 0.7550167441368103, "learning_rate": 2.3783056053412297e-05, "loss": 0.0842, "step": 18174 }, { "epoch": 0.32187083549168316, "grad_norm": 0.8397470712661743, "learning_rate": 2.378235858816176e-05, "loss": 0.0953, "step": 18175 }, { "epoch": 0.3218885450287116, "grad_norm": 0.6734909415245056, "learning_rate": 2.3781661094018153e-05, "loss": 0.0824, "step": 18176 }, { "epoch": 0.32190625456574, "grad_norm": 1.0734058618545532, "learning_rate": 2.3780963570983768e-05, "loss": 0.0723, "step": 18177 }, { "epoch": 0.32192396410276847, "grad_norm": 0.9533413052558899, "learning_rate": 2.37802660190609e-05, "loss": 0.0872, "step": 18178 }, { "epoch": 0.32194167363979687, "grad_norm": 0.8170492649078369, "learning_rate": 2.377956843825185e-05, "loss": 0.0775, "step": 18179 }, { "epoch": 0.3219593831768253, "grad_norm": 1.159584641456604, "learning_rate": 2.3778870828558903e-05, "loss": 0.0985, "step": 18180 }, { "epoch": 0.3219770927138537, "grad_norm": 0.8205990791320801, "learning_rate": 2.377817318998436e-05, "loss": 0.1139, "step": 18181 }, { "epoch": 0.3219948022508822, "grad_norm": 0.6990002989768982, "learning_rate": 2.3777475522530514e-05, "loss": 0.0752, "step": 18182 }, { "epoch": 0.32201251178791057, "grad_norm": 1.0739996433258057, "learning_rate": 2.3776777826199663e-05, "loss": 0.1062, "step": 18183 }, { "epoch": 0.322030221324939, "grad_norm": 0.8160891532897949, "learning_rate": 2.37760801009941e-05, "loss": 0.0483, "step": 18184 }, { "epoch": 0.3220479308619674, "grad_norm": 0.7396239042282104, "learning_rate": 2.3775382346916123e-05, "loss": 0.1023, "step": 18185 }, { "epoch": 0.3220656403989959, "grad_norm": 0.6326180696487427, "learning_rate": 2.3774684563968025e-05, "loss": 0.0723, "step": 18186 }, { "epoch": 0.3220833499360243, "grad_norm": 0.629051148891449, "learning_rate": 2.3773986752152102e-05, "loss": 0.0711, "step": 18187 }, { "epoch": 0.32210105947305273, "grad_norm": 0.8342304229736328, "learning_rate": 2.377328891147065e-05, "loss": 0.1194, "step": 18188 }, { "epoch": 0.32211876901008113, "grad_norm": 0.5871685743331909, "learning_rate": 2.377259104192596e-05, "loss": 0.0969, "step": 18189 }, { "epoch": 0.3221364785471096, "grad_norm": 0.7360912561416626, "learning_rate": 2.3771893143520344e-05, "loss": 0.1035, "step": 18190 }, { "epoch": 0.32215418808413804, "grad_norm": 0.7377802729606628, "learning_rate": 2.3771195216256073e-05, "loss": 0.1044, "step": 18191 }, { "epoch": 0.32217189762116644, "grad_norm": 0.8750842809677124, "learning_rate": 2.3770497260135467e-05, "loss": 0.105, "step": 18192 }, { "epoch": 0.3221896071581949, "grad_norm": 0.7944458723068237, "learning_rate": 2.376979927516081e-05, "loss": 0.0894, "step": 18193 }, { "epoch": 0.3222073166952233, "grad_norm": 0.9845200181007385, "learning_rate": 2.37691012613344e-05, "loss": 0.081, "step": 18194 }, { "epoch": 0.32222502623225174, "grad_norm": 0.6960806250572205, "learning_rate": 2.376840321865853e-05, "loss": 0.0845, "step": 18195 }, { "epoch": 0.32224273576928014, "grad_norm": 0.5699895620346069, "learning_rate": 2.3767705147135505e-05, "loss": 0.0815, "step": 18196 }, { "epoch": 0.3222604453063086, "grad_norm": 0.6997326016426086, "learning_rate": 2.3767007046767617e-05, "loss": 0.103, "step": 18197 }, { "epoch": 0.322278154843337, "grad_norm": 0.5864707231521606, "learning_rate": 2.3766308917557156e-05, "loss": 0.0724, "step": 18198 }, { "epoch": 0.32229586438036545, "grad_norm": 0.8372981548309326, "learning_rate": 2.3765610759506433e-05, "loss": 0.0921, "step": 18199 }, { "epoch": 0.32231357391739385, "grad_norm": 0.8162250518798828, "learning_rate": 2.3764912572617735e-05, "loss": 0.1249, "step": 18200 }, { "epoch": 0.3223312834544223, "grad_norm": 1.0965590476989746, "learning_rate": 2.376421435689336e-05, "loss": 0.0874, "step": 18201 }, { "epoch": 0.3223489929914507, "grad_norm": 0.8301029801368713, "learning_rate": 2.3763516112335604e-05, "loss": 0.0788, "step": 18202 }, { "epoch": 0.32236670252847915, "grad_norm": 0.6516448259353638, "learning_rate": 2.3762817838946773e-05, "loss": 0.0931, "step": 18203 }, { "epoch": 0.32238441206550755, "grad_norm": 0.8774427771568298, "learning_rate": 2.3762119536729152e-05, "loss": 0.0932, "step": 18204 }, { "epoch": 0.322402121602536, "grad_norm": 0.6377043128013611, "learning_rate": 2.3761421205685044e-05, "loss": 0.0733, "step": 18205 }, { "epoch": 0.32241983113956446, "grad_norm": 0.8968004584312439, "learning_rate": 2.376072284581675e-05, "loss": 0.0834, "step": 18206 }, { "epoch": 0.32243754067659286, "grad_norm": 1.2304209470748901, "learning_rate": 2.3760024457126563e-05, "loss": 0.0796, "step": 18207 }, { "epoch": 0.3224552502136213, "grad_norm": 1.0969301462173462, "learning_rate": 2.375932603961678e-05, "loss": 0.0683, "step": 18208 }, { "epoch": 0.3224729597506497, "grad_norm": 0.819293737411499, "learning_rate": 2.37586275932897e-05, "loss": 0.0945, "step": 18209 }, { "epoch": 0.32249066928767817, "grad_norm": 1.7422047853469849, "learning_rate": 2.3757929118147623e-05, "loss": 0.1063, "step": 18210 }, { "epoch": 0.32250837882470657, "grad_norm": 0.9437211751937866, "learning_rate": 2.3757230614192845e-05, "loss": 0.0916, "step": 18211 }, { "epoch": 0.322526088361735, "grad_norm": 1.01237154006958, "learning_rate": 2.375653208142766e-05, "loss": 0.0774, "step": 18212 }, { "epoch": 0.3225437978987634, "grad_norm": 0.6858612895011902, "learning_rate": 2.3755833519854373e-05, "loss": 0.0627, "step": 18213 }, { "epoch": 0.32256150743579187, "grad_norm": 0.47031500935554504, "learning_rate": 2.3755134929475283e-05, "loss": 0.0707, "step": 18214 }, { "epoch": 0.32257921697282027, "grad_norm": 0.7509259581565857, "learning_rate": 2.3754436310292678e-05, "loss": 0.1193, "step": 18215 }, { "epoch": 0.3225969265098487, "grad_norm": 0.6208603382110596, "learning_rate": 2.3753737662308865e-05, "loss": 0.0544, "step": 18216 }, { "epoch": 0.3226146360468771, "grad_norm": 1.039801001548767, "learning_rate": 2.3753038985526143e-05, "loss": 0.0802, "step": 18217 }, { "epoch": 0.3226323455839056, "grad_norm": 0.9086523652076721, "learning_rate": 2.3752340279946804e-05, "loss": 0.1031, "step": 18218 }, { "epoch": 0.322650055120934, "grad_norm": 0.8310830593109131, "learning_rate": 2.3751641545573156e-05, "loss": 0.0795, "step": 18219 }, { "epoch": 0.32266776465796243, "grad_norm": 0.4137410819530487, "learning_rate": 2.3750942782407493e-05, "loss": 0.0755, "step": 18220 }, { "epoch": 0.3226854741949909, "grad_norm": 0.8416698575019836, "learning_rate": 2.3750243990452104e-05, "loss": 0.1091, "step": 18221 }, { "epoch": 0.3227031837320193, "grad_norm": 0.9154465198516846, "learning_rate": 2.3749545169709305e-05, "loss": 0.107, "step": 18222 }, { "epoch": 0.32272089326904774, "grad_norm": 0.7738664746284485, "learning_rate": 2.3748846320181386e-05, "loss": 0.078, "step": 18223 }, { "epoch": 0.32273860280607614, "grad_norm": 0.6282504200935364, "learning_rate": 2.374814744187065e-05, "loss": 0.0951, "step": 18224 }, { "epoch": 0.3227563123431046, "grad_norm": 0.5739527940750122, "learning_rate": 2.3747448534779388e-05, "loss": 0.0666, "step": 18225 }, { "epoch": 0.322774021880133, "grad_norm": 1.0584535598754883, "learning_rate": 2.3746749598909907e-05, "loss": 0.1087, "step": 18226 }, { "epoch": 0.32279173141716144, "grad_norm": 0.9606537222862244, "learning_rate": 2.3746050634264508e-05, "loss": 0.0906, "step": 18227 }, { "epoch": 0.32280944095418984, "grad_norm": 0.8356102108955383, "learning_rate": 2.3745351640845484e-05, "loss": 0.0978, "step": 18228 }, { "epoch": 0.3228271504912183, "grad_norm": 1.0758063793182373, "learning_rate": 2.374465261865514e-05, "loss": 0.0806, "step": 18229 }, { "epoch": 0.3228448600282467, "grad_norm": 0.6864126920700073, "learning_rate": 2.374395356769577e-05, "loss": 0.095, "step": 18230 }, { "epoch": 0.32286256956527515, "grad_norm": 0.461932897567749, "learning_rate": 2.3743254487969678e-05, "loss": 0.0669, "step": 18231 }, { "epoch": 0.32288027910230355, "grad_norm": 0.537947416305542, "learning_rate": 2.3742555379479162e-05, "loss": 0.0678, "step": 18232 }, { "epoch": 0.322897988639332, "grad_norm": 0.66074538230896, "learning_rate": 2.3741856242226524e-05, "loss": 0.1103, "step": 18233 }, { "epoch": 0.3229156981763604, "grad_norm": 0.7193084955215454, "learning_rate": 2.3741157076214068e-05, "loss": 0.0707, "step": 18234 }, { "epoch": 0.32293340771338885, "grad_norm": 0.7884095907211304, "learning_rate": 2.3740457881444084e-05, "loss": 0.1064, "step": 18235 }, { "epoch": 0.3229511172504173, "grad_norm": 0.6231861710548401, "learning_rate": 2.3739758657918877e-05, "loss": 0.0935, "step": 18236 }, { "epoch": 0.3229688267874457, "grad_norm": 0.6972077488899231, "learning_rate": 2.373905940564075e-05, "loss": 0.1124, "step": 18237 }, { "epoch": 0.32298653632447416, "grad_norm": 0.5497759580612183, "learning_rate": 2.3738360124612e-05, "loss": 0.0794, "step": 18238 }, { "epoch": 0.32300424586150256, "grad_norm": 0.7838842868804932, "learning_rate": 2.3737660814834933e-05, "loss": 0.0586, "step": 18239 }, { "epoch": 0.323021955398531, "grad_norm": 0.48629653453826904, "learning_rate": 2.3736961476311844e-05, "loss": 0.0671, "step": 18240 }, { "epoch": 0.3230396649355594, "grad_norm": 0.7837795615196228, "learning_rate": 2.3736262109045034e-05, "loss": 0.0837, "step": 18241 }, { "epoch": 0.32305737447258787, "grad_norm": 0.39001208543777466, "learning_rate": 2.3735562713036804e-05, "loss": 0.0754, "step": 18242 }, { "epoch": 0.32307508400961626, "grad_norm": 0.7289948463439941, "learning_rate": 2.373486328828946e-05, "loss": 0.0713, "step": 18243 }, { "epoch": 0.3230927935466447, "grad_norm": 0.8377980589866638, "learning_rate": 2.3734163834805298e-05, "loss": 0.1021, "step": 18244 }, { "epoch": 0.3231105030836731, "grad_norm": 0.9227915406227112, "learning_rate": 2.373346435258662e-05, "loss": 0.0958, "step": 18245 }, { "epoch": 0.32312821262070157, "grad_norm": 0.5786755681037903, "learning_rate": 2.3732764841635723e-05, "loss": 0.0692, "step": 18246 }, { "epoch": 0.32314592215772997, "grad_norm": 1.1512219905853271, "learning_rate": 2.3732065301954923e-05, "loss": 0.1031, "step": 18247 }, { "epoch": 0.3231636316947584, "grad_norm": 0.29988840222358704, "learning_rate": 2.3731365733546503e-05, "loss": 0.0824, "step": 18248 }, { "epoch": 0.3231813412317868, "grad_norm": 0.9413899183273315, "learning_rate": 2.3730666136412776e-05, "loss": 0.087, "step": 18249 }, { "epoch": 0.3231990507688153, "grad_norm": 1.1211740970611572, "learning_rate": 2.372996651055604e-05, "loss": 0.0957, "step": 18250 }, { "epoch": 0.32321676030584373, "grad_norm": 0.9261248707771301, "learning_rate": 2.3729266855978598e-05, "loss": 0.1155, "step": 18251 }, { "epoch": 0.32323446984287213, "grad_norm": 1.005747675895691, "learning_rate": 2.3728567172682752e-05, "loss": 0.1149, "step": 18252 }, { "epoch": 0.3232521793799006, "grad_norm": 0.7589799761772156, "learning_rate": 2.3727867460670798e-05, "loss": 0.1308, "step": 18253 }, { "epoch": 0.323269888916929, "grad_norm": 0.7521978616714478, "learning_rate": 2.3727167719945044e-05, "loss": 0.1103, "step": 18254 }, { "epoch": 0.32328759845395744, "grad_norm": 0.8154234886169434, "learning_rate": 2.3726467950507797e-05, "loss": 0.089, "step": 18255 }, { "epoch": 0.32330530799098584, "grad_norm": 0.6358088850975037, "learning_rate": 2.3725768152361344e-05, "loss": 0.0841, "step": 18256 }, { "epoch": 0.3233230175280143, "grad_norm": 0.9105470180511475, "learning_rate": 2.3725068325508002e-05, "loss": 0.1372, "step": 18257 }, { "epoch": 0.3233407270650427, "grad_norm": 0.6827591061592102, "learning_rate": 2.3724368469950063e-05, "loss": 0.0706, "step": 18258 }, { "epoch": 0.32335843660207114, "grad_norm": 0.501258134841919, "learning_rate": 2.3723668585689843e-05, "loss": 0.0935, "step": 18259 }, { "epoch": 0.32337614613909954, "grad_norm": 0.34389251470565796, "learning_rate": 2.372296867272963e-05, "loss": 0.0694, "step": 18260 }, { "epoch": 0.323393855676128, "grad_norm": 0.5533784031867981, "learning_rate": 2.3722268731071735e-05, "loss": 0.0787, "step": 18261 }, { "epoch": 0.3234115652131564, "grad_norm": 0.6672326326370239, "learning_rate": 2.3721568760718456e-05, "loss": 0.0965, "step": 18262 }, { "epoch": 0.32342927475018485, "grad_norm": 0.7385595440864563, "learning_rate": 2.3720868761672095e-05, "loss": 0.1052, "step": 18263 }, { "epoch": 0.32344698428721325, "grad_norm": 0.6794900894165039, "learning_rate": 2.372016873393496e-05, "loss": 0.0891, "step": 18264 }, { "epoch": 0.3234646938242417, "grad_norm": 0.8770080208778381, "learning_rate": 2.3719468677509353e-05, "loss": 0.1005, "step": 18265 }, { "epoch": 0.32348240336127015, "grad_norm": 0.5755957961082458, "learning_rate": 2.3718768592397574e-05, "loss": 0.0533, "step": 18266 }, { "epoch": 0.32350011289829855, "grad_norm": 0.489772766828537, "learning_rate": 2.3718068478601928e-05, "loss": 0.0418, "step": 18267 }, { "epoch": 0.323517822435327, "grad_norm": 0.8950027227401733, "learning_rate": 2.3717368336124724e-05, "loss": 0.0761, "step": 18268 }, { "epoch": 0.3235355319723554, "grad_norm": 0.7602750658988953, "learning_rate": 2.3716668164968254e-05, "loss": 0.102, "step": 18269 }, { "epoch": 0.32355324150938386, "grad_norm": 0.9342440366744995, "learning_rate": 2.3715967965134833e-05, "loss": 0.0751, "step": 18270 }, { "epoch": 0.32357095104641226, "grad_norm": 0.6067834496498108, "learning_rate": 2.3715267736626752e-05, "loss": 0.0861, "step": 18271 }, { "epoch": 0.3235886605834407, "grad_norm": 0.7574297189712524, "learning_rate": 2.3714567479446328e-05, "loss": 0.0782, "step": 18272 }, { "epoch": 0.3236063701204691, "grad_norm": 0.9879876971244812, "learning_rate": 2.371386719359585e-05, "loss": 0.1111, "step": 18273 }, { "epoch": 0.32362407965749757, "grad_norm": 0.7880160212516785, "learning_rate": 2.3713166879077636e-05, "loss": 0.1192, "step": 18274 }, { "epoch": 0.32364178919452596, "grad_norm": 0.7062556147575378, "learning_rate": 2.3712466535893992e-05, "loss": 0.0726, "step": 18275 }, { "epoch": 0.3236594987315544, "grad_norm": 0.8578923940658569, "learning_rate": 2.37117661640472e-05, "loss": 0.0771, "step": 18276 }, { "epoch": 0.3236772082685828, "grad_norm": 0.5263417959213257, "learning_rate": 2.3711065763539587e-05, "loss": 0.094, "step": 18277 }, { "epoch": 0.32369491780561127, "grad_norm": 1.854150652885437, "learning_rate": 2.3710365334373446e-05, "loss": 0.0752, "step": 18278 }, { "epoch": 0.32371262734263967, "grad_norm": 0.8825897574424744, "learning_rate": 2.3709664876551086e-05, "loss": 0.0748, "step": 18279 }, { "epoch": 0.3237303368796681, "grad_norm": 0.5667031407356262, "learning_rate": 2.3708964390074804e-05, "loss": 0.1073, "step": 18280 }, { "epoch": 0.3237480464166966, "grad_norm": 0.8176568150520325, "learning_rate": 2.370826387494692e-05, "loss": 0.0981, "step": 18281 }, { "epoch": 0.323765755953725, "grad_norm": 0.7734203934669495, "learning_rate": 2.3707563331169718e-05, "loss": 0.0552, "step": 18282 }, { "epoch": 0.32378346549075343, "grad_norm": 2.5543408393859863, "learning_rate": 2.370686275874552e-05, "loss": 0.0854, "step": 18283 }, { "epoch": 0.32380117502778183, "grad_norm": 0.9987393021583557, "learning_rate": 2.370616215767662e-05, "loss": 0.109, "step": 18284 }, { "epoch": 0.3238188845648103, "grad_norm": 0.626654326915741, "learning_rate": 2.370546152796533e-05, "loss": 0.0814, "step": 18285 }, { "epoch": 0.3238365941018387, "grad_norm": 1.0974534749984741, "learning_rate": 2.3704760869613956e-05, "loss": 0.1175, "step": 18286 }, { "epoch": 0.32385430363886714, "grad_norm": 0.9466636180877686, "learning_rate": 2.3704060182624792e-05, "loss": 0.1407, "step": 18287 }, { "epoch": 0.32387201317589553, "grad_norm": 0.7139888405799866, "learning_rate": 2.3703359467000155e-05, "loss": 0.0817, "step": 18288 }, { "epoch": 0.323889722712924, "grad_norm": 1.0888803005218506, "learning_rate": 2.3702658722742342e-05, "loss": 0.085, "step": 18289 }, { "epoch": 0.3239074322499524, "grad_norm": 1.0439139604568481, "learning_rate": 2.3701957949853664e-05, "loss": 0.1024, "step": 18290 }, { "epoch": 0.32392514178698084, "grad_norm": 0.8507754802703857, "learning_rate": 2.3701257148336423e-05, "loss": 0.0888, "step": 18291 }, { "epoch": 0.32394285132400924, "grad_norm": 0.5406944155693054, "learning_rate": 2.370055631819293e-05, "loss": 0.0868, "step": 18292 }, { "epoch": 0.3239605608610377, "grad_norm": 0.8586603999137878, "learning_rate": 2.369985545942548e-05, "loss": 0.0879, "step": 18293 }, { "epoch": 0.3239782703980661, "grad_norm": 0.9460771679878235, "learning_rate": 2.369915457203639e-05, "loss": 0.0994, "step": 18294 }, { "epoch": 0.32399597993509455, "grad_norm": 1.0456358194351196, "learning_rate": 2.369845365602796e-05, "loss": 0.0826, "step": 18295 }, { "epoch": 0.324013689472123, "grad_norm": 0.6551015973091125, "learning_rate": 2.3697752711402495e-05, "loss": 0.0666, "step": 18296 }, { "epoch": 0.3240313990091514, "grad_norm": 1.0156753063201904, "learning_rate": 2.3697051738162308e-05, "loss": 0.0923, "step": 18297 }, { "epoch": 0.32404910854617985, "grad_norm": 0.37325114011764526, "learning_rate": 2.3696350736309697e-05, "loss": 0.0537, "step": 18298 }, { "epoch": 0.32406681808320825, "grad_norm": 0.9552344083786011, "learning_rate": 2.369564970584698e-05, "loss": 0.1067, "step": 18299 }, { "epoch": 0.3240845276202367, "grad_norm": 1.0270253419876099, "learning_rate": 2.3694948646776446e-05, "loss": 0.1266, "step": 18300 }, { "epoch": 0.3241022371572651, "grad_norm": 0.8545068502426147, "learning_rate": 2.369424755910041e-05, "loss": 0.1091, "step": 18301 }, { "epoch": 0.32411994669429356, "grad_norm": 0.9790577292442322, "learning_rate": 2.3693546442821183e-05, "loss": 0.0851, "step": 18302 }, { "epoch": 0.32413765623132196, "grad_norm": 0.7494553923606873, "learning_rate": 2.3692845297941065e-05, "loss": 0.0899, "step": 18303 }, { "epoch": 0.3241553657683504, "grad_norm": 0.9842589497566223, "learning_rate": 2.3692144124462367e-05, "loss": 0.0975, "step": 18304 }, { "epoch": 0.3241730753053788, "grad_norm": 0.39994704723358154, "learning_rate": 2.3691442922387397e-05, "loss": 0.079, "step": 18305 }, { "epoch": 0.32419078484240726, "grad_norm": 0.5594981908798218, "learning_rate": 2.369074169171845e-05, "loss": 0.0754, "step": 18306 }, { "epoch": 0.32420849437943566, "grad_norm": 0.5206003189086914, "learning_rate": 2.369004043245785e-05, "loss": 0.0711, "step": 18307 }, { "epoch": 0.3242262039164641, "grad_norm": 0.6428451538085938, "learning_rate": 2.3689339144607894e-05, "loss": 0.1224, "step": 18308 }, { "epoch": 0.3242439134534925, "grad_norm": 0.7738790512084961, "learning_rate": 2.368863782817089e-05, "loss": 0.1136, "step": 18309 }, { "epoch": 0.32426162299052097, "grad_norm": 1.0156532526016235, "learning_rate": 2.3687936483149144e-05, "loss": 0.1067, "step": 18310 }, { "epoch": 0.3242793325275494, "grad_norm": 1.1668827533721924, "learning_rate": 2.368723510954497e-05, "loss": 0.1147, "step": 18311 }, { "epoch": 0.3242970420645778, "grad_norm": 0.7051085233688354, "learning_rate": 2.3686533707360668e-05, "loss": 0.0738, "step": 18312 }, { "epoch": 0.3243147516016063, "grad_norm": 0.6013005375862122, "learning_rate": 2.3685832276598548e-05, "loss": 0.0664, "step": 18313 }, { "epoch": 0.3243324611386347, "grad_norm": 1.1153484582901, "learning_rate": 2.3685130817260925e-05, "loss": 0.1261, "step": 18314 }, { "epoch": 0.32435017067566313, "grad_norm": 0.8242712616920471, "learning_rate": 2.3684429329350092e-05, "loss": 0.1245, "step": 18315 }, { "epoch": 0.32436788021269153, "grad_norm": 0.8630778789520264, "learning_rate": 2.368372781286837e-05, "loss": 0.1132, "step": 18316 }, { "epoch": 0.32438558974972, "grad_norm": 0.6585264205932617, "learning_rate": 2.3683026267818057e-05, "loss": 0.077, "step": 18317 }, { "epoch": 0.3244032992867484, "grad_norm": 0.9692631959915161, "learning_rate": 2.368232469420147e-05, "loss": 0.1013, "step": 18318 }, { "epoch": 0.32442100882377684, "grad_norm": 0.5652663111686707, "learning_rate": 2.368162309202091e-05, "loss": 0.0573, "step": 18319 }, { "epoch": 0.32443871836080523, "grad_norm": 0.6026314496994019, "learning_rate": 2.368092146127869e-05, "loss": 0.1221, "step": 18320 }, { "epoch": 0.3244564278978337, "grad_norm": 0.6240838766098022, "learning_rate": 2.3680219801977116e-05, "loss": 0.0529, "step": 18321 }, { "epoch": 0.3244741374348621, "grad_norm": 0.9293899536132812, "learning_rate": 2.3679518114118498e-05, "loss": 0.1385, "step": 18322 }, { "epoch": 0.32449184697189054, "grad_norm": 0.8404783606529236, "learning_rate": 2.367881639770514e-05, "loss": 0.0728, "step": 18323 }, { "epoch": 0.32450955650891894, "grad_norm": 0.60882169008255, "learning_rate": 2.367811465273936e-05, "loss": 0.1004, "step": 18324 }, { "epoch": 0.3245272660459474, "grad_norm": 0.5972188115119934, "learning_rate": 2.367741287922346e-05, "loss": 0.0851, "step": 18325 }, { "epoch": 0.32454497558297585, "grad_norm": 0.7673765420913696, "learning_rate": 2.3676711077159745e-05, "loss": 0.1188, "step": 18326 }, { "epoch": 0.32456268512000425, "grad_norm": 1.1417673826217651, "learning_rate": 2.3676009246550528e-05, "loss": 0.1113, "step": 18327 }, { "epoch": 0.3245803946570327, "grad_norm": 0.810027003288269, "learning_rate": 2.367530738739812e-05, "loss": 0.0721, "step": 18328 }, { "epoch": 0.3245981041940611, "grad_norm": 0.8693475127220154, "learning_rate": 2.3674605499704826e-05, "loss": 0.1234, "step": 18329 }, { "epoch": 0.32461581373108955, "grad_norm": 1.0677602291107178, "learning_rate": 2.3673903583472962e-05, "loss": 0.0733, "step": 18330 }, { "epoch": 0.32463352326811795, "grad_norm": 0.5906199812889099, "learning_rate": 2.3673201638704823e-05, "loss": 0.0731, "step": 18331 }, { "epoch": 0.3246512328051464, "grad_norm": 0.6244823336601257, "learning_rate": 2.367249966540274e-05, "loss": 0.0883, "step": 18332 }, { "epoch": 0.3246689423421748, "grad_norm": 0.8963509202003479, "learning_rate": 2.3671797663569e-05, "loss": 0.1489, "step": 18333 }, { "epoch": 0.32468665187920326, "grad_norm": 0.7827874422073364, "learning_rate": 2.3671095633205928e-05, "loss": 0.0819, "step": 18334 }, { "epoch": 0.32470436141623166, "grad_norm": 0.8608472943305969, "learning_rate": 2.367039357431583e-05, "loss": 0.0896, "step": 18335 }, { "epoch": 0.3247220709532601, "grad_norm": 0.538655698299408, "learning_rate": 2.366969148690101e-05, "loss": 0.1037, "step": 18336 }, { "epoch": 0.3247397804902885, "grad_norm": 0.9464510083198547, "learning_rate": 2.366898937096378e-05, "loss": 0.0955, "step": 18337 }, { "epoch": 0.32475749002731696, "grad_norm": 0.6022655963897705, "learning_rate": 2.366828722650646e-05, "loss": 0.0901, "step": 18338 }, { "epoch": 0.32477519956434536, "grad_norm": 1.0023359060287476, "learning_rate": 2.3667585053531346e-05, "loss": 0.153, "step": 18339 }, { "epoch": 0.3247929091013738, "grad_norm": 0.8067276477813721, "learning_rate": 2.3666882852040755e-05, "loss": 0.1038, "step": 18340 }, { "epoch": 0.32481061863840227, "grad_norm": 0.6760597825050354, "learning_rate": 2.3666180622036997e-05, "loss": 0.0847, "step": 18341 }, { "epoch": 0.32482832817543067, "grad_norm": 0.8528137803077698, "learning_rate": 2.366547836352238e-05, "loss": 0.1211, "step": 18342 }, { "epoch": 0.3248460377124591, "grad_norm": 0.47247546911239624, "learning_rate": 2.3664776076499217e-05, "loss": 0.0643, "step": 18343 }, { "epoch": 0.3248637472494875, "grad_norm": 0.7196415662765503, "learning_rate": 2.366407376096981e-05, "loss": 0.0954, "step": 18344 }, { "epoch": 0.324881456786516, "grad_norm": 0.9554716944694519, "learning_rate": 2.3663371416936486e-05, "loss": 0.094, "step": 18345 }, { "epoch": 0.3248991663235444, "grad_norm": 0.7444411516189575, "learning_rate": 2.3662669044401547e-05, "loss": 0.1049, "step": 18346 }, { "epoch": 0.32491687586057283, "grad_norm": 0.6996881365776062, "learning_rate": 2.3661966643367296e-05, "loss": 0.0618, "step": 18347 }, { "epoch": 0.3249345853976012, "grad_norm": 0.7050393223762512, "learning_rate": 2.3661264213836054e-05, "loss": 0.0953, "step": 18348 }, { "epoch": 0.3249522949346297, "grad_norm": 0.5486283898353577, "learning_rate": 2.366056175581013e-05, "loss": 0.0834, "step": 18349 }, { "epoch": 0.3249700044716581, "grad_norm": 0.640310525894165, "learning_rate": 2.3659859269291833e-05, "loss": 0.0623, "step": 18350 }, { "epoch": 0.32498771400868653, "grad_norm": 0.7759705781936646, "learning_rate": 2.365915675428347e-05, "loss": 0.0697, "step": 18351 }, { "epoch": 0.32500542354571493, "grad_norm": 0.3851288855075836, "learning_rate": 2.3658454210787362e-05, "loss": 0.0652, "step": 18352 }, { "epoch": 0.3250231330827434, "grad_norm": 0.8316444754600525, "learning_rate": 2.3657751638805816e-05, "loss": 0.0747, "step": 18353 }, { "epoch": 0.3250408426197718, "grad_norm": 0.7457526922225952, "learning_rate": 2.365704903834114e-05, "loss": 0.1185, "step": 18354 }, { "epoch": 0.32505855215680024, "grad_norm": 0.9725096821784973, "learning_rate": 2.365634640939565e-05, "loss": 0.1194, "step": 18355 }, { "epoch": 0.3250762616938287, "grad_norm": 1.0402575731277466, "learning_rate": 2.3655643751971655e-05, "loss": 0.0889, "step": 18356 }, { "epoch": 0.3250939712308571, "grad_norm": 1.4420889616012573, "learning_rate": 2.365494106607147e-05, "loss": 0.0992, "step": 18357 }, { "epoch": 0.32511168076788555, "grad_norm": 0.8983940482139587, "learning_rate": 2.36542383516974e-05, "loss": 0.087, "step": 18358 }, { "epoch": 0.32512939030491395, "grad_norm": 0.9519171714782715, "learning_rate": 2.3653535608851767e-05, "loss": 0.086, "step": 18359 }, { "epoch": 0.3251470998419424, "grad_norm": 0.8045037388801575, "learning_rate": 2.3652832837536874e-05, "loss": 0.0981, "step": 18360 }, { "epoch": 0.3251648093789708, "grad_norm": 0.7282319068908691, "learning_rate": 2.3652130037755036e-05, "loss": 0.1073, "step": 18361 }, { "epoch": 0.32518251891599925, "grad_norm": 0.6796891689300537, "learning_rate": 2.3651427209508564e-05, "loss": 0.0763, "step": 18362 }, { "epoch": 0.32520022845302765, "grad_norm": 0.7130898833274841, "learning_rate": 2.3650724352799773e-05, "loss": 0.0657, "step": 18363 }, { "epoch": 0.3252179379900561, "grad_norm": 0.5796234011650085, "learning_rate": 2.3650021467630972e-05, "loss": 0.0399, "step": 18364 }, { "epoch": 0.3252356475270845, "grad_norm": 0.8444914817810059, "learning_rate": 2.3649318554004474e-05, "loss": 0.123, "step": 18365 }, { "epoch": 0.32525335706411296, "grad_norm": 0.9260479211807251, "learning_rate": 2.36486156119226e-05, "loss": 0.1186, "step": 18366 }, { "epoch": 0.32527106660114136, "grad_norm": 0.47059035301208496, "learning_rate": 2.364791264138765e-05, "loss": 0.0874, "step": 18367 }, { "epoch": 0.3252887761381698, "grad_norm": 0.6677044630050659, "learning_rate": 2.3647209642401945e-05, "loss": 0.0556, "step": 18368 }, { "epoch": 0.3253064856751982, "grad_norm": 0.9534856677055359, "learning_rate": 2.364650661496779e-05, "loss": 0.1043, "step": 18369 }, { "epoch": 0.32532419521222666, "grad_norm": 0.5667714476585388, "learning_rate": 2.3645803559087507e-05, "loss": 0.0902, "step": 18370 }, { "epoch": 0.3253419047492551, "grad_norm": 0.8925775289535522, "learning_rate": 2.3645100474763405e-05, "loss": 0.0952, "step": 18371 }, { "epoch": 0.3253596142862835, "grad_norm": 0.6436136960983276, "learning_rate": 2.3644397361997794e-05, "loss": 0.0766, "step": 18372 }, { "epoch": 0.32537732382331197, "grad_norm": 0.656916081905365, "learning_rate": 2.3643694220792994e-05, "loss": 0.1145, "step": 18373 }, { "epoch": 0.32539503336034037, "grad_norm": 0.8113610148429871, "learning_rate": 2.364299105115131e-05, "loss": 0.0736, "step": 18374 }, { "epoch": 0.3254127428973688, "grad_norm": 0.561495840549469, "learning_rate": 2.364228785307506e-05, "loss": 0.103, "step": 18375 }, { "epoch": 0.3254304524343972, "grad_norm": 0.7874147891998291, "learning_rate": 2.3641584626566558e-05, "loss": 0.0877, "step": 18376 }, { "epoch": 0.3254481619714257, "grad_norm": 0.48411503434181213, "learning_rate": 2.3640881371628117e-05, "loss": 0.0946, "step": 18377 }, { "epoch": 0.3254658715084541, "grad_norm": 0.6084145307540894, "learning_rate": 2.3640178088262054e-05, "loss": 0.0765, "step": 18378 }, { "epoch": 0.32548358104548253, "grad_norm": 0.6541017293930054, "learning_rate": 2.3639474776470673e-05, "loss": 0.1222, "step": 18379 }, { "epoch": 0.3255012905825109, "grad_norm": 0.5267530083656311, "learning_rate": 2.3638771436256295e-05, "loss": 0.1047, "step": 18380 }, { "epoch": 0.3255190001195394, "grad_norm": 0.7950040698051453, "learning_rate": 2.3638068067621234e-05, "loss": 0.0852, "step": 18381 }, { "epoch": 0.3255367096565678, "grad_norm": 0.7004420161247253, "learning_rate": 2.3637364670567805e-05, "loss": 0.1042, "step": 18382 }, { "epoch": 0.32555441919359623, "grad_norm": 0.636273980140686, "learning_rate": 2.363666124509831e-05, "loss": 0.0631, "step": 18383 }, { "epoch": 0.32557212873062463, "grad_norm": 0.6838135123252869, "learning_rate": 2.3635957791215086e-05, "loss": 0.0876, "step": 18384 }, { "epoch": 0.3255898382676531, "grad_norm": 0.7803669571876526, "learning_rate": 2.3635254308920425e-05, "loss": 0.096, "step": 18385 }, { "epoch": 0.32560754780468154, "grad_norm": 0.8091328740119934, "learning_rate": 2.363455079821666e-05, "loss": 0.0793, "step": 18386 }, { "epoch": 0.32562525734170994, "grad_norm": 0.7917863726615906, "learning_rate": 2.363384725910609e-05, "loss": 0.1372, "step": 18387 }, { "epoch": 0.3256429668787384, "grad_norm": 0.6976093649864197, "learning_rate": 2.3633143691591034e-05, "loss": 0.0847, "step": 18388 }, { "epoch": 0.3256606764157668, "grad_norm": 0.6622545123100281, "learning_rate": 2.3632440095673812e-05, "loss": 0.0917, "step": 18389 }, { "epoch": 0.32567838595279525, "grad_norm": 0.8526408672332764, "learning_rate": 2.3631736471356736e-05, "loss": 0.0804, "step": 18390 }, { "epoch": 0.32569609548982364, "grad_norm": 0.6504949927330017, "learning_rate": 2.3631032818642113e-05, "loss": 0.0618, "step": 18391 }, { "epoch": 0.3257138050268521, "grad_norm": 0.47838374972343445, "learning_rate": 2.363032913753227e-05, "loss": 0.088, "step": 18392 }, { "epoch": 0.3257315145638805, "grad_norm": 0.7743196487426758, "learning_rate": 2.3629625428029523e-05, "loss": 0.0676, "step": 18393 }, { "epoch": 0.32574922410090895, "grad_norm": 0.8268288969993591, "learning_rate": 2.362892169013617e-05, "loss": 0.1476, "step": 18394 }, { "epoch": 0.32576693363793735, "grad_norm": 0.5183914303779602, "learning_rate": 2.3628217923854545e-05, "loss": 0.0537, "step": 18395 }, { "epoch": 0.3257846431749658, "grad_norm": 0.8649522662162781, "learning_rate": 2.3627514129186953e-05, "loss": 0.1132, "step": 18396 }, { "epoch": 0.3258023527119942, "grad_norm": 0.7859053611755371, "learning_rate": 2.3626810306135715e-05, "loss": 0.0897, "step": 18397 }, { "epoch": 0.32582006224902266, "grad_norm": 1.1565842628479004, "learning_rate": 2.362610645470314e-05, "loss": 0.0837, "step": 18398 }, { "epoch": 0.32583777178605106, "grad_norm": 0.5137482285499573, "learning_rate": 2.3625402574891546e-05, "loss": 0.0944, "step": 18399 }, { "epoch": 0.3258554813230795, "grad_norm": 0.9312822222709656, "learning_rate": 2.3624698666703254e-05, "loss": 0.1044, "step": 18400 }, { "epoch": 0.32587319086010796, "grad_norm": 1.2582921981811523, "learning_rate": 2.3623994730140574e-05, "loss": 0.0896, "step": 18401 }, { "epoch": 0.32589090039713636, "grad_norm": 0.5342597365379333, "learning_rate": 2.362329076520582e-05, "loss": 0.1129, "step": 18402 }, { "epoch": 0.3259086099341648, "grad_norm": 0.7285088896751404, "learning_rate": 2.362258677190132e-05, "loss": 0.082, "step": 18403 }, { "epoch": 0.3259263194711932, "grad_norm": 0.4899722933769226, "learning_rate": 2.362188275022938e-05, "loss": 0.0762, "step": 18404 }, { "epoch": 0.32594402900822167, "grad_norm": 0.7343809008598328, "learning_rate": 2.3621178700192315e-05, "loss": 0.0722, "step": 18405 }, { "epoch": 0.32596173854525007, "grad_norm": 0.7444857954978943, "learning_rate": 2.3620474621792437e-05, "loss": 0.0959, "step": 18406 }, { "epoch": 0.3259794480822785, "grad_norm": 0.7335630059242249, "learning_rate": 2.3619770515032083e-05, "loss": 0.064, "step": 18407 }, { "epoch": 0.3259971576193069, "grad_norm": 0.5473909974098206, "learning_rate": 2.3619066379913544e-05, "loss": 0.0808, "step": 18408 }, { "epoch": 0.3260148671563354, "grad_norm": 1.6218976974487305, "learning_rate": 2.3618362216439154e-05, "loss": 0.115, "step": 18409 }, { "epoch": 0.3260325766933638, "grad_norm": 0.8509296178817749, "learning_rate": 2.3617658024611224e-05, "loss": 0.091, "step": 18410 }, { "epoch": 0.32605028623039223, "grad_norm": 0.6279837489128113, "learning_rate": 2.361695380443207e-05, "loss": 0.1071, "step": 18411 }, { "epoch": 0.3260679957674206, "grad_norm": 1.0394585132598877, "learning_rate": 2.3616249555904013e-05, "loss": 0.0817, "step": 18412 }, { "epoch": 0.3260857053044491, "grad_norm": 0.5938669443130493, "learning_rate": 2.361554527902936e-05, "loss": 0.0678, "step": 18413 }, { "epoch": 0.3261034148414775, "grad_norm": 0.8035743832588196, "learning_rate": 2.361484097381044e-05, "loss": 0.0763, "step": 18414 }, { "epoch": 0.32612112437850593, "grad_norm": 1.0373798608779907, "learning_rate": 2.361413664024956e-05, "loss": 0.0757, "step": 18415 }, { "epoch": 0.3261388339155344, "grad_norm": 0.7091127634048462, "learning_rate": 2.3613432278349044e-05, "loss": 0.0709, "step": 18416 }, { "epoch": 0.3261565434525628, "grad_norm": 0.6381905674934387, "learning_rate": 2.3612727888111208e-05, "loss": 0.0516, "step": 18417 }, { "epoch": 0.32617425298959124, "grad_norm": 0.6324994564056396, "learning_rate": 2.3612023469538365e-05, "loss": 0.0568, "step": 18418 }, { "epoch": 0.32619196252661964, "grad_norm": 0.48243337869644165, "learning_rate": 2.3611319022632843e-05, "loss": 0.0867, "step": 18419 }, { "epoch": 0.3262096720636481, "grad_norm": 0.8429363369941711, "learning_rate": 2.3610614547396942e-05, "loss": 0.1186, "step": 18420 }, { "epoch": 0.3262273816006765, "grad_norm": 0.6659532189369202, "learning_rate": 2.3609910043832996e-05, "loss": 0.0766, "step": 18421 }, { "epoch": 0.32624509113770495, "grad_norm": 0.589245617389679, "learning_rate": 2.360920551194332e-05, "loss": 0.1307, "step": 18422 }, { "epoch": 0.32626280067473334, "grad_norm": 0.7438409328460693, "learning_rate": 2.3608500951730224e-05, "loss": 0.0642, "step": 18423 }, { "epoch": 0.3262805102117618, "grad_norm": 0.928297758102417, "learning_rate": 2.3607796363196032e-05, "loss": 0.0801, "step": 18424 }, { "epoch": 0.3262982197487902, "grad_norm": 0.6575354337692261, "learning_rate": 2.360709174634306e-05, "loss": 0.0581, "step": 18425 }, { "epoch": 0.32631592928581865, "grad_norm": 0.8001993298530579, "learning_rate": 2.3606387101173626e-05, "loss": 0.0842, "step": 18426 }, { "epoch": 0.32633363882284705, "grad_norm": 0.8677672147750854, "learning_rate": 2.360568242769005e-05, "loss": 0.0964, "step": 18427 }, { "epoch": 0.3263513483598755, "grad_norm": 0.7175154089927673, "learning_rate": 2.3604977725894648e-05, "loss": 0.0589, "step": 18428 }, { "epoch": 0.3263690578969039, "grad_norm": 1.0715830326080322, "learning_rate": 2.360427299578974e-05, "loss": 0.0674, "step": 18429 }, { "epoch": 0.32638676743393236, "grad_norm": 0.47123879194259644, "learning_rate": 2.3603568237377644e-05, "loss": 0.0765, "step": 18430 }, { "epoch": 0.3264044769709608, "grad_norm": 0.5575540661811829, "learning_rate": 2.360286345066068e-05, "loss": 0.1103, "step": 18431 }, { "epoch": 0.3264221865079892, "grad_norm": 1.0864540338516235, "learning_rate": 2.3602158635641165e-05, "loss": 0.1086, "step": 18432 }, { "epoch": 0.32643989604501766, "grad_norm": 0.9225795865058899, "learning_rate": 2.3601453792321418e-05, "loss": 0.0958, "step": 18433 }, { "epoch": 0.32645760558204606, "grad_norm": 0.6126257181167603, "learning_rate": 2.3600748920703757e-05, "loss": 0.0628, "step": 18434 }, { "epoch": 0.3264753151190745, "grad_norm": 0.7106508612632751, "learning_rate": 2.3600044020790506e-05, "loss": 0.08, "step": 18435 }, { "epoch": 0.3264930246561029, "grad_norm": 0.8132098913192749, "learning_rate": 2.3599339092583977e-05, "loss": 0.0659, "step": 18436 }, { "epoch": 0.32651073419313137, "grad_norm": 0.6429843306541443, "learning_rate": 2.359863413608649e-05, "loss": 0.1337, "step": 18437 }, { "epoch": 0.32652844373015977, "grad_norm": 0.796506404876709, "learning_rate": 2.3597929151300368e-05, "loss": 0.0845, "step": 18438 }, { "epoch": 0.3265461532671882, "grad_norm": 0.6998439431190491, "learning_rate": 2.359722413822793e-05, "loss": 0.0691, "step": 18439 }, { "epoch": 0.3265638628042166, "grad_norm": 0.8035129308700562, "learning_rate": 2.3596519096871494e-05, "loss": 0.1159, "step": 18440 }, { "epoch": 0.3265815723412451, "grad_norm": 0.5361895561218262, "learning_rate": 2.359581402723338e-05, "loss": 0.0744, "step": 18441 }, { "epoch": 0.3265992818782735, "grad_norm": 0.5948096513748169, "learning_rate": 2.359510892931591e-05, "loss": 0.0825, "step": 18442 }, { "epoch": 0.3266169914153019, "grad_norm": 0.617941677570343, "learning_rate": 2.3594403803121394e-05, "loss": 0.0757, "step": 18443 }, { "epoch": 0.3266347009523304, "grad_norm": 0.6488475203514099, "learning_rate": 2.359369864865216e-05, "loss": 0.1461, "step": 18444 }, { "epoch": 0.3266524104893588, "grad_norm": 0.7246256470680237, "learning_rate": 2.359299346591053e-05, "loss": 0.0682, "step": 18445 }, { "epoch": 0.32667012002638723, "grad_norm": 0.8373600244522095, "learning_rate": 2.3592288254898816e-05, "loss": 0.1004, "step": 18446 }, { "epoch": 0.32668782956341563, "grad_norm": 0.7299478650093079, "learning_rate": 2.3591583015619348e-05, "loss": 0.0968, "step": 18447 }, { "epoch": 0.3267055391004441, "grad_norm": 0.7796523571014404, "learning_rate": 2.359087774807444e-05, "loss": 0.0912, "step": 18448 }, { "epoch": 0.3267232486374725, "grad_norm": 0.9610564708709717, "learning_rate": 2.3590172452266415e-05, "loss": 0.1004, "step": 18449 }, { "epoch": 0.32674095817450094, "grad_norm": 0.9804389476776123, "learning_rate": 2.3589467128197584e-05, "loss": 0.1061, "step": 18450 }, { "epoch": 0.32675866771152934, "grad_norm": 0.6823185086250305, "learning_rate": 2.358876177587028e-05, "loss": 0.0813, "step": 18451 }, { "epoch": 0.3267763772485578, "grad_norm": 0.9390901923179626, "learning_rate": 2.358805639528682e-05, "loss": 0.1185, "step": 18452 }, { "epoch": 0.3267940867855862, "grad_norm": 0.29508885741233826, "learning_rate": 2.358735098644952e-05, "loss": 0.0917, "step": 18453 }, { "epoch": 0.32681179632261464, "grad_norm": 0.9511811137199402, "learning_rate": 2.3586645549360703e-05, "loss": 0.0959, "step": 18454 }, { "epoch": 0.32682950585964304, "grad_norm": 0.7455751895904541, "learning_rate": 2.3585940084022694e-05, "loss": 0.0599, "step": 18455 }, { "epoch": 0.3268472153966715, "grad_norm": 0.5765321850776672, "learning_rate": 2.3585234590437806e-05, "loss": 0.0706, "step": 18456 }, { "epoch": 0.3268649249336999, "grad_norm": 0.8069567680358887, "learning_rate": 2.358452906860837e-05, "loss": 0.1012, "step": 18457 }, { "epoch": 0.32688263447072835, "grad_norm": 0.8697959184646606, "learning_rate": 2.3583823518536696e-05, "loss": 0.0842, "step": 18458 }, { "epoch": 0.3269003440077568, "grad_norm": 1.2302387952804565, "learning_rate": 2.3583117940225112e-05, "loss": 0.1073, "step": 18459 }, { "epoch": 0.3269180535447852, "grad_norm": 0.7189081907272339, "learning_rate": 2.3582412333675944e-05, "loss": 0.0601, "step": 18460 }, { "epoch": 0.32693576308181366, "grad_norm": 0.8756471276283264, "learning_rate": 2.35817066988915e-05, "loss": 0.1101, "step": 18461 }, { "epoch": 0.32695347261884206, "grad_norm": 0.7459902763366699, "learning_rate": 2.3581001035874115e-05, "loss": 0.0919, "step": 18462 }, { "epoch": 0.3269711821558705, "grad_norm": 0.9336832165718079, "learning_rate": 2.3580295344626103e-05, "loss": 0.1082, "step": 18463 }, { "epoch": 0.3269888916928989, "grad_norm": 0.6115849614143372, "learning_rate": 2.3579589625149787e-05, "loss": 0.1037, "step": 18464 }, { "epoch": 0.32700660122992736, "grad_norm": 0.49401578307151794, "learning_rate": 2.3578883877447487e-05, "loss": 0.0801, "step": 18465 }, { "epoch": 0.32702431076695576, "grad_norm": 0.7536622881889343, "learning_rate": 2.3578178101521527e-05, "loss": 0.0598, "step": 18466 }, { "epoch": 0.3270420203039842, "grad_norm": 0.6699895858764648, "learning_rate": 2.3577472297374232e-05, "loss": 0.0643, "step": 18467 }, { "epoch": 0.3270597298410126, "grad_norm": 0.6588349938392639, "learning_rate": 2.3576766465007916e-05, "loss": 0.0714, "step": 18468 }, { "epoch": 0.32707743937804107, "grad_norm": 1.1068429946899414, "learning_rate": 2.3576060604424907e-05, "loss": 0.0769, "step": 18469 }, { "epoch": 0.32709514891506947, "grad_norm": 0.9334536790847778, "learning_rate": 2.3575354715627528e-05, "loss": 0.1437, "step": 18470 }, { "epoch": 0.3271128584520979, "grad_norm": 1.6722790002822876, "learning_rate": 2.35746487986181e-05, "loss": 0.1241, "step": 18471 }, { "epoch": 0.3271305679891263, "grad_norm": 0.5498533844947815, "learning_rate": 2.357394285339894e-05, "loss": 0.1134, "step": 18472 }, { "epoch": 0.3271482775261548, "grad_norm": 0.7621027231216431, "learning_rate": 2.357323687997238e-05, "loss": 0.1051, "step": 18473 }, { "epoch": 0.32716598706318323, "grad_norm": 1.3216854333877563, "learning_rate": 2.3572530878340736e-05, "loss": 0.0668, "step": 18474 }, { "epoch": 0.3271836966002116, "grad_norm": 0.682427704334259, "learning_rate": 2.357182484850633e-05, "loss": 0.0905, "step": 18475 }, { "epoch": 0.3272014061372401, "grad_norm": 0.6182413697242737, "learning_rate": 2.3571118790471493e-05, "loss": 0.1268, "step": 18476 }, { "epoch": 0.3272191156742685, "grad_norm": 1.172483205795288, "learning_rate": 2.357041270423854e-05, "loss": 0.1125, "step": 18477 }, { "epoch": 0.32723682521129693, "grad_norm": 0.6637347340583801, "learning_rate": 2.3569706589809792e-05, "loss": 0.0762, "step": 18478 }, { "epoch": 0.32725453474832533, "grad_norm": 1.0397751331329346, "learning_rate": 2.3569000447187576e-05, "loss": 0.0976, "step": 18479 }, { "epoch": 0.3272722442853538, "grad_norm": 0.6221491694450378, "learning_rate": 2.3568294276374223e-05, "loss": 0.1222, "step": 18480 }, { "epoch": 0.3272899538223822, "grad_norm": 0.7084989547729492, "learning_rate": 2.3567588077372043e-05, "loss": 0.1092, "step": 18481 }, { "epoch": 0.32730766335941064, "grad_norm": 0.6529759168624878, "learning_rate": 2.3566881850183365e-05, "loss": 0.0684, "step": 18482 }, { "epoch": 0.32732537289643904, "grad_norm": 0.7279620170593262, "learning_rate": 2.3566175594810516e-05, "loss": 0.1077, "step": 18483 }, { "epoch": 0.3273430824334675, "grad_norm": 0.7371078133583069, "learning_rate": 2.3565469311255807e-05, "loss": 0.0981, "step": 18484 }, { "epoch": 0.3273607919704959, "grad_norm": 1.047761082649231, "learning_rate": 2.3564762999521576e-05, "loss": 0.0967, "step": 18485 }, { "epoch": 0.32737850150752434, "grad_norm": 1.063568115234375, "learning_rate": 2.356405665961014e-05, "loss": 0.1083, "step": 18486 }, { "epoch": 0.32739621104455274, "grad_norm": 0.6837515234947205, "learning_rate": 2.3563350291523823e-05, "loss": 0.1246, "step": 18487 }, { "epoch": 0.3274139205815812, "grad_norm": 0.563694953918457, "learning_rate": 2.3562643895264956e-05, "loss": 0.0763, "step": 18488 }, { "epoch": 0.32743163011860965, "grad_norm": 0.7733790874481201, "learning_rate": 2.356193747083585e-05, "loss": 0.1008, "step": 18489 }, { "epoch": 0.32744933965563805, "grad_norm": 1.2009921073913574, "learning_rate": 2.356123101823884e-05, "loss": 0.1317, "step": 18490 }, { "epoch": 0.3274670491926665, "grad_norm": 0.7263088822364807, "learning_rate": 2.356052453747624e-05, "loss": 0.1139, "step": 18491 }, { "epoch": 0.3274847587296949, "grad_norm": 0.6760522723197937, "learning_rate": 2.3559818028550384e-05, "loss": 0.0762, "step": 18492 }, { "epoch": 0.32750246826672336, "grad_norm": 0.9892285466194153, "learning_rate": 2.3559111491463593e-05, "loss": 0.0653, "step": 18493 }, { "epoch": 0.32752017780375176, "grad_norm": 0.6731255054473877, "learning_rate": 2.355840492621819e-05, "loss": 0.1185, "step": 18494 }, { "epoch": 0.3275378873407802, "grad_norm": 1.1422688961029053, "learning_rate": 2.35576983328165e-05, "loss": 0.1269, "step": 18495 }, { "epoch": 0.3275555968778086, "grad_norm": 0.542881965637207, "learning_rate": 2.355699171126085e-05, "loss": 0.0645, "step": 18496 }, { "epoch": 0.32757330641483706, "grad_norm": 0.41667813062667847, "learning_rate": 2.3556285061553558e-05, "loss": 0.0555, "step": 18497 }, { "epoch": 0.32759101595186546, "grad_norm": 0.5992022156715393, "learning_rate": 2.355557838369696e-05, "loss": 0.0798, "step": 18498 }, { "epoch": 0.3276087254888939, "grad_norm": 0.6235132217407227, "learning_rate": 2.355487167769337e-05, "loss": 0.0875, "step": 18499 }, { "epoch": 0.3276264350259223, "grad_norm": 0.6416141390800476, "learning_rate": 2.355416494354512e-05, "loss": 0.0889, "step": 18500 }, { "epoch": 0.32764414456295077, "grad_norm": 0.5286750197410583, "learning_rate": 2.3553458181254526e-05, "loss": 0.1109, "step": 18501 }, { "epoch": 0.32766185409997917, "grad_norm": 1.277791976928711, "learning_rate": 2.3552751390823924e-05, "loss": 0.1025, "step": 18502 }, { "epoch": 0.3276795636370076, "grad_norm": 0.6176666021347046, "learning_rate": 2.3552044572255637e-05, "loss": 0.1035, "step": 18503 }, { "epoch": 0.3276972731740361, "grad_norm": 0.5023122429847717, "learning_rate": 2.355133772555199e-05, "loss": 0.1046, "step": 18504 }, { "epoch": 0.3277149827110645, "grad_norm": 0.642061710357666, "learning_rate": 2.3550630850715304e-05, "loss": 0.1094, "step": 18505 }, { "epoch": 0.3277326922480929, "grad_norm": 0.5486158132553101, "learning_rate": 2.3549923947747908e-05, "loss": 0.0646, "step": 18506 }, { "epoch": 0.3277504017851213, "grad_norm": 0.5482416152954102, "learning_rate": 2.3549217016652124e-05, "loss": 0.0751, "step": 18507 }, { "epoch": 0.3277681113221498, "grad_norm": 0.6956694722175598, "learning_rate": 2.3548510057430285e-05, "loss": 0.0761, "step": 18508 }, { "epoch": 0.3277858208591782, "grad_norm": 0.9227741360664368, "learning_rate": 2.354780307008471e-05, "loss": 0.0846, "step": 18509 }, { "epoch": 0.32780353039620663, "grad_norm": 0.8772706985473633, "learning_rate": 2.3547096054617727e-05, "loss": 0.1113, "step": 18510 }, { "epoch": 0.32782123993323503, "grad_norm": 0.9954391717910767, "learning_rate": 2.3546389011031664e-05, "loss": 0.0844, "step": 18511 }, { "epoch": 0.3278389494702635, "grad_norm": 0.7152516841888428, "learning_rate": 2.3545681939328844e-05, "loss": 0.081, "step": 18512 }, { "epoch": 0.3278566590072919, "grad_norm": 1.3813154697418213, "learning_rate": 2.35449748395116e-05, "loss": 0.1191, "step": 18513 }, { "epoch": 0.32787436854432034, "grad_norm": 0.49210527539253235, "learning_rate": 2.3544267711582246e-05, "loss": 0.081, "step": 18514 }, { "epoch": 0.32789207808134874, "grad_norm": 0.5666748285293579, "learning_rate": 2.3543560555543116e-05, "loss": 0.094, "step": 18515 }, { "epoch": 0.3279097876183772, "grad_norm": 0.7400175929069519, "learning_rate": 2.354285337139654e-05, "loss": 0.106, "step": 18516 }, { "epoch": 0.3279274971554056, "grad_norm": 1.140555500984192, "learning_rate": 2.3542146159144838e-05, "loss": 0.0809, "step": 18517 }, { "epoch": 0.32794520669243404, "grad_norm": 1.0298651456832886, "learning_rate": 2.3541438918790338e-05, "loss": 0.1081, "step": 18518 }, { "epoch": 0.3279629162294625, "grad_norm": 0.5233180522918701, "learning_rate": 2.354073165033537e-05, "loss": 0.0715, "step": 18519 }, { "epoch": 0.3279806257664909, "grad_norm": 1.1253068447113037, "learning_rate": 2.354002435378226e-05, "loss": 0.1406, "step": 18520 }, { "epoch": 0.32799833530351935, "grad_norm": 0.740950345993042, "learning_rate": 2.3539317029133332e-05, "loss": 0.104, "step": 18521 }, { "epoch": 0.32801604484054775, "grad_norm": 0.7226127982139587, "learning_rate": 2.3538609676390916e-05, "loss": 0.0971, "step": 18522 }, { "epoch": 0.3280337543775762, "grad_norm": 0.5935165882110596, "learning_rate": 2.3537902295557337e-05, "loss": 0.0758, "step": 18523 }, { "epoch": 0.3280514639146046, "grad_norm": 0.8425658345222473, "learning_rate": 2.353719488663492e-05, "loss": 0.0614, "step": 18524 }, { "epoch": 0.32806917345163306, "grad_norm": 0.5741614103317261, "learning_rate": 2.3536487449625995e-05, "loss": 0.0807, "step": 18525 }, { "epoch": 0.32808688298866145, "grad_norm": 0.8772500157356262, "learning_rate": 2.3535779984532895e-05, "loss": 0.0898, "step": 18526 }, { "epoch": 0.3281045925256899, "grad_norm": 0.8545798659324646, "learning_rate": 2.353507249135794e-05, "loss": 0.0765, "step": 18527 }, { "epoch": 0.3281223020627183, "grad_norm": 0.7768099904060364, "learning_rate": 2.3534364970103458e-05, "loss": 0.0742, "step": 18528 }, { "epoch": 0.32814001159974676, "grad_norm": 0.5678882598876953, "learning_rate": 2.3533657420771782e-05, "loss": 0.1084, "step": 18529 }, { "epoch": 0.32815772113677516, "grad_norm": 0.771324098110199, "learning_rate": 2.3532949843365234e-05, "loss": 0.0981, "step": 18530 }, { "epoch": 0.3281754306738036, "grad_norm": 0.8525328636169434, "learning_rate": 2.3532242237886143e-05, "loss": 0.0693, "step": 18531 }, { "epoch": 0.328193140210832, "grad_norm": 0.796138346195221, "learning_rate": 2.353153460433684e-05, "loss": 0.0969, "step": 18532 }, { "epoch": 0.32821084974786047, "grad_norm": 0.7640347480773926, "learning_rate": 2.353082694271965e-05, "loss": 0.1135, "step": 18533 }, { "epoch": 0.3282285592848889, "grad_norm": 0.9031440019607544, "learning_rate": 2.3530119253036902e-05, "loss": 0.0711, "step": 18534 }, { "epoch": 0.3282462688219173, "grad_norm": 0.9516984224319458, "learning_rate": 2.3529411535290927e-05, "loss": 0.1331, "step": 18535 }, { "epoch": 0.3282639783589458, "grad_norm": 0.8567425608634949, "learning_rate": 2.3528703789484047e-05, "loss": 0.0965, "step": 18536 }, { "epoch": 0.3282816878959742, "grad_norm": 0.8003213405609131, "learning_rate": 2.3527996015618594e-05, "loss": 0.0897, "step": 18537 }, { "epoch": 0.3282993974330026, "grad_norm": 1.0905948877334595, "learning_rate": 2.3527288213696904e-05, "loss": 0.1132, "step": 18538 }, { "epoch": 0.328317106970031, "grad_norm": 0.6416653394699097, "learning_rate": 2.352658038372129e-05, "loss": 0.08, "step": 18539 }, { "epoch": 0.3283348165070595, "grad_norm": 0.8703566789627075, "learning_rate": 2.352587252569409e-05, "loss": 0.1085, "step": 18540 }, { "epoch": 0.3283525260440879, "grad_norm": 0.7693983316421509, "learning_rate": 2.3525164639617633e-05, "loss": 0.0937, "step": 18541 }, { "epoch": 0.32837023558111633, "grad_norm": 0.9402031898498535, "learning_rate": 2.3524456725494247e-05, "loss": 0.1014, "step": 18542 }, { "epoch": 0.32838794511814473, "grad_norm": 0.6683091521263123, "learning_rate": 2.3523748783326262e-05, "loss": 0.0708, "step": 18543 }, { "epoch": 0.3284056546551732, "grad_norm": 0.5861568450927734, "learning_rate": 2.3523040813116e-05, "loss": 0.0485, "step": 18544 }, { "epoch": 0.3284233641922016, "grad_norm": 1.1247764825820923, "learning_rate": 2.3522332814865805e-05, "loss": 0.13, "step": 18545 }, { "epoch": 0.32844107372923004, "grad_norm": 0.6885795593261719, "learning_rate": 2.3521624788577988e-05, "loss": 0.079, "step": 18546 }, { "epoch": 0.32845878326625844, "grad_norm": 1.112184762954712, "learning_rate": 2.3520916734254895e-05, "loss": 0.0954, "step": 18547 }, { "epoch": 0.3284764928032869, "grad_norm": 0.6474968194961548, "learning_rate": 2.3520208651898844e-05, "loss": 0.0627, "step": 18548 }, { "epoch": 0.32849420234031534, "grad_norm": 0.8361280560493469, "learning_rate": 2.3519500541512165e-05, "loss": 0.0896, "step": 18549 }, { "epoch": 0.32851191187734374, "grad_norm": 0.6802014112472534, "learning_rate": 2.3518792403097197e-05, "loss": 0.0637, "step": 18550 }, { "epoch": 0.3285296214143722, "grad_norm": 0.7122271656990051, "learning_rate": 2.3518084236656263e-05, "loss": 0.087, "step": 18551 }, { "epoch": 0.3285473309514006, "grad_norm": 0.5649420022964478, "learning_rate": 2.351737604219169e-05, "loss": 0.0867, "step": 18552 }, { "epoch": 0.32856504048842905, "grad_norm": 0.6225085854530334, "learning_rate": 2.3516667819705812e-05, "loss": 0.0815, "step": 18553 }, { "epoch": 0.32858275002545745, "grad_norm": 0.8861651420593262, "learning_rate": 2.351595956920096e-05, "loss": 0.1062, "step": 18554 }, { "epoch": 0.3286004595624859, "grad_norm": 1.0548479557037354, "learning_rate": 2.3515251290679462e-05, "loss": 0.1085, "step": 18555 }, { "epoch": 0.3286181690995143, "grad_norm": 0.9060632586479187, "learning_rate": 2.3514542984143646e-05, "loss": 0.0922, "step": 18556 }, { "epoch": 0.32863587863654276, "grad_norm": 0.9757000207901001, "learning_rate": 2.3513834649595847e-05, "loss": 0.1156, "step": 18557 }, { "epoch": 0.32865358817357115, "grad_norm": 0.6312735676765442, "learning_rate": 2.3513126287038396e-05, "loss": 0.0855, "step": 18558 }, { "epoch": 0.3286712977105996, "grad_norm": 0.9670249819755554, "learning_rate": 2.3512417896473615e-05, "loss": 0.093, "step": 18559 }, { "epoch": 0.328689007247628, "grad_norm": 0.6964777708053589, "learning_rate": 2.351170947790384e-05, "loss": 0.106, "step": 18560 }, { "epoch": 0.32870671678465646, "grad_norm": 1.0171500444412231, "learning_rate": 2.3511001031331405e-05, "loss": 0.0942, "step": 18561 }, { "epoch": 0.32872442632168486, "grad_norm": 0.8081861734390259, "learning_rate": 2.3510292556758635e-05, "loss": 0.0737, "step": 18562 }, { "epoch": 0.3287421358587133, "grad_norm": 0.5603337287902832, "learning_rate": 2.3509584054187868e-05, "loss": 0.0683, "step": 18563 }, { "epoch": 0.32875984539574177, "grad_norm": 0.5821303725242615, "learning_rate": 2.3508875523621425e-05, "loss": 0.1197, "step": 18564 }, { "epoch": 0.32877755493277017, "grad_norm": 0.6636070609092712, "learning_rate": 2.3508166965061645e-05, "loss": 0.0792, "step": 18565 }, { "epoch": 0.3287952644697986, "grad_norm": 0.9634894728660583, "learning_rate": 2.3507458378510855e-05, "loss": 0.0721, "step": 18566 }, { "epoch": 0.328812974006827, "grad_norm": 0.636511504650116, "learning_rate": 2.350674976397139e-05, "loss": 0.0795, "step": 18567 }, { "epoch": 0.3288306835438555, "grad_norm": 0.6253814101219177, "learning_rate": 2.3506041121445575e-05, "loss": 0.0777, "step": 18568 }, { "epoch": 0.32884839308088387, "grad_norm": 0.5826215147972107, "learning_rate": 2.3505332450935744e-05, "loss": 0.0923, "step": 18569 }, { "epoch": 0.3288661026179123, "grad_norm": 0.5637623071670532, "learning_rate": 2.350462375244423e-05, "loss": 0.0888, "step": 18570 }, { "epoch": 0.3288838121549407, "grad_norm": 1.3027187585830688, "learning_rate": 2.350391502597337e-05, "loss": 0.1088, "step": 18571 }, { "epoch": 0.3289015216919692, "grad_norm": 1.044331669807434, "learning_rate": 2.3503206271525487e-05, "loss": 0.1111, "step": 18572 }, { "epoch": 0.3289192312289976, "grad_norm": 1.0878721475601196, "learning_rate": 2.350249748910291e-05, "loss": 0.0781, "step": 18573 }, { "epoch": 0.32893694076602603, "grad_norm": 0.7512879967689514, "learning_rate": 2.3501788678707985e-05, "loss": 0.0856, "step": 18574 }, { "epoch": 0.32895465030305443, "grad_norm": 1.0572009086608887, "learning_rate": 2.3501079840343027e-05, "loss": 0.0886, "step": 18575 }, { "epoch": 0.3289723598400829, "grad_norm": 0.7921658158302307, "learning_rate": 2.350037097401038e-05, "loss": 0.0952, "step": 18576 }, { "epoch": 0.3289900693771113, "grad_norm": 0.8714838027954102, "learning_rate": 2.349966207971237e-05, "loss": 0.1001, "step": 18577 }, { "epoch": 0.32900777891413974, "grad_norm": 0.6863985061645508, "learning_rate": 2.3498953157451337e-05, "loss": 0.1189, "step": 18578 }, { "epoch": 0.3290254884511682, "grad_norm": 0.7781232595443726, "learning_rate": 2.3498244207229604e-05, "loss": 0.1156, "step": 18579 }, { "epoch": 0.3290431979881966, "grad_norm": 0.7393116354942322, "learning_rate": 2.3497535229049505e-05, "loss": 0.0714, "step": 18580 }, { "epoch": 0.32906090752522504, "grad_norm": 0.7287595868110657, "learning_rate": 2.349682622291338e-05, "loss": 0.093, "step": 18581 }, { "epoch": 0.32907861706225344, "grad_norm": 1.2641359567642212, "learning_rate": 2.349611718882355e-05, "loss": 0.12, "step": 18582 }, { "epoch": 0.3290963265992819, "grad_norm": 0.7143471240997314, "learning_rate": 2.349540812678236e-05, "loss": 0.083, "step": 18583 }, { "epoch": 0.3291140361363103, "grad_norm": 0.7368713021278381, "learning_rate": 2.3494699036792134e-05, "loss": 0.0919, "step": 18584 }, { "epoch": 0.32913174567333875, "grad_norm": 0.999732255935669, "learning_rate": 2.3493989918855204e-05, "loss": 0.0952, "step": 18585 }, { "epoch": 0.32914945521036715, "grad_norm": 0.8613649010658264, "learning_rate": 2.349328077297391e-05, "loss": 0.1141, "step": 18586 }, { "epoch": 0.3291671647473956, "grad_norm": 0.5883986949920654, "learning_rate": 2.3492571599150578e-05, "loss": 0.1168, "step": 18587 }, { "epoch": 0.329184874284424, "grad_norm": 0.9740463495254517, "learning_rate": 2.3491862397387547e-05, "loss": 0.1008, "step": 18588 }, { "epoch": 0.32920258382145245, "grad_norm": 0.7454268336296082, "learning_rate": 2.3491153167687148e-05, "loss": 0.0718, "step": 18589 }, { "epoch": 0.32922029335848085, "grad_norm": 0.5114043951034546, "learning_rate": 2.3490443910051716e-05, "loss": 0.08, "step": 18590 }, { "epoch": 0.3292380028955093, "grad_norm": 0.4945475161075592, "learning_rate": 2.3489734624483578e-05, "loss": 0.0418, "step": 18591 }, { "epoch": 0.3292557124325377, "grad_norm": 1.2937830686569214, "learning_rate": 2.3489025310985074e-05, "loss": 0.0993, "step": 18592 }, { "epoch": 0.32927342196956616, "grad_norm": 0.6935945749282837, "learning_rate": 2.3488315969558534e-05, "loss": 0.0735, "step": 18593 }, { "epoch": 0.3292911315065946, "grad_norm": 0.8021650314331055, "learning_rate": 2.348760660020629e-05, "loss": 0.0956, "step": 18594 }, { "epoch": 0.329308841043623, "grad_norm": 1.35861074924469, "learning_rate": 2.3486897202930685e-05, "loss": 0.097, "step": 18595 }, { "epoch": 0.32932655058065147, "grad_norm": 0.7915561199188232, "learning_rate": 2.348618777773404e-05, "loss": 0.109, "step": 18596 }, { "epoch": 0.32934426011767987, "grad_norm": 0.7921527624130249, "learning_rate": 2.3485478324618703e-05, "loss": 0.0786, "step": 18597 }, { "epoch": 0.3293619696547083, "grad_norm": 0.6520946025848389, "learning_rate": 2.3484768843586995e-05, "loss": 0.0859, "step": 18598 }, { "epoch": 0.3293796791917367, "grad_norm": 1.0073760747909546, "learning_rate": 2.348405933464126e-05, "loss": 0.1096, "step": 18599 }, { "epoch": 0.3293973887287652, "grad_norm": 0.9309085607528687, "learning_rate": 2.3483349797783822e-05, "loss": 0.1059, "step": 18600 }, { "epoch": 0.32941509826579357, "grad_norm": 0.9209330677986145, "learning_rate": 2.3482640233017026e-05, "loss": 0.0812, "step": 18601 }, { "epoch": 0.329432807802822, "grad_norm": 0.8290183544158936, "learning_rate": 2.34819306403432e-05, "loss": 0.0989, "step": 18602 }, { "epoch": 0.3294505173398504, "grad_norm": 0.6478266716003418, "learning_rate": 2.3481221019764683e-05, "loss": 0.0869, "step": 18603 }, { "epoch": 0.3294682268768789, "grad_norm": 0.9165462851524353, "learning_rate": 2.3480511371283804e-05, "loss": 0.1004, "step": 18604 }, { "epoch": 0.3294859364139073, "grad_norm": 0.8409436941146851, "learning_rate": 2.34798016949029e-05, "loss": 0.0901, "step": 18605 }, { "epoch": 0.32950364595093573, "grad_norm": 0.9242444634437561, "learning_rate": 2.3479091990624302e-05, "loss": 0.1061, "step": 18606 }, { "epoch": 0.32952135548796413, "grad_norm": 0.6554204821586609, "learning_rate": 2.3478382258450358e-05, "loss": 0.0677, "step": 18607 }, { "epoch": 0.3295390650249926, "grad_norm": 0.7232133746147156, "learning_rate": 2.347767249838339e-05, "loss": 0.0502, "step": 18608 }, { "epoch": 0.32955677456202104, "grad_norm": 0.6239949464797974, "learning_rate": 2.347696271042574e-05, "loss": 0.1007, "step": 18609 }, { "epoch": 0.32957448409904944, "grad_norm": 1.617599606513977, "learning_rate": 2.3476252894579732e-05, "loss": 0.1344, "step": 18610 }, { "epoch": 0.3295921936360779, "grad_norm": 0.5969352126121521, "learning_rate": 2.3475543050847713e-05, "loss": 0.0861, "step": 18611 }, { "epoch": 0.3296099031731063, "grad_norm": 0.5789291262626648, "learning_rate": 2.3474833179232014e-05, "loss": 0.076, "step": 18612 }, { "epoch": 0.32962761271013474, "grad_norm": 1.112822413444519, "learning_rate": 2.3474123279734974e-05, "loss": 0.0937, "step": 18613 }, { "epoch": 0.32964532224716314, "grad_norm": 0.7320646643638611, "learning_rate": 2.347341335235892e-05, "loss": 0.1004, "step": 18614 }, { "epoch": 0.3296630317841916, "grad_norm": 1.020920991897583, "learning_rate": 2.34727033971062e-05, "loss": 0.096, "step": 18615 }, { "epoch": 0.32968074132122, "grad_norm": 0.6212484836578369, "learning_rate": 2.3471993413979143e-05, "loss": 0.0911, "step": 18616 }, { "epoch": 0.32969845085824845, "grad_norm": 0.844576895236969, "learning_rate": 2.347128340298008e-05, "loss": 0.0958, "step": 18617 }, { "epoch": 0.32971616039527685, "grad_norm": 0.7196699380874634, "learning_rate": 2.3470573364111354e-05, "loss": 0.1065, "step": 18618 }, { "epoch": 0.3297338699323053, "grad_norm": 0.6273291707038879, "learning_rate": 2.3469863297375293e-05, "loss": 0.0719, "step": 18619 }, { "epoch": 0.3297515794693337, "grad_norm": 0.8472520709037781, "learning_rate": 2.3469153202774247e-05, "loss": 0.0513, "step": 18620 }, { "epoch": 0.32976928900636215, "grad_norm": 0.7131805419921875, "learning_rate": 2.3468443080310536e-05, "loss": 0.1124, "step": 18621 }, { "epoch": 0.32978699854339055, "grad_norm": 0.973034679889679, "learning_rate": 2.346773292998651e-05, "loss": 0.0807, "step": 18622 }, { "epoch": 0.329804708080419, "grad_norm": 1.055098295211792, "learning_rate": 2.3467022751804493e-05, "loss": 0.092, "step": 18623 }, { "epoch": 0.32982241761744746, "grad_norm": 0.8393222689628601, "learning_rate": 2.346631254576683e-05, "loss": 0.0598, "step": 18624 }, { "epoch": 0.32984012715447586, "grad_norm": 0.6199812889099121, "learning_rate": 2.3465602311875854e-05, "loss": 0.0844, "step": 18625 }, { "epoch": 0.3298578366915043, "grad_norm": 0.712035059928894, "learning_rate": 2.346489205013391e-05, "loss": 0.0584, "step": 18626 }, { "epoch": 0.3298755462285327, "grad_norm": 0.7750401496887207, "learning_rate": 2.346418176054332e-05, "loss": 0.0681, "step": 18627 }, { "epoch": 0.32989325576556117, "grad_norm": 0.6568481922149658, "learning_rate": 2.346347144310643e-05, "loss": 0.0611, "step": 18628 }, { "epoch": 0.32991096530258956, "grad_norm": 0.9453908205032349, "learning_rate": 2.3462761097825575e-05, "loss": 0.0988, "step": 18629 }, { "epoch": 0.329928674839618, "grad_norm": 0.9839059114456177, "learning_rate": 2.346205072470309e-05, "loss": 0.0851, "step": 18630 }, { "epoch": 0.3299463843766464, "grad_norm": 0.7610020041465759, "learning_rate": 2.3461340323741316e-05, "loss": 0.0825, "step": 18631 }, { "epoch": 0.32996409391367487, "grad_norm": 0.7815858721733093, "learning_rate": 2.3460629894942587e-05, "loss": 0.0787, "step": 18632 }, { "epoch": 0.32998180345070327, "grad_norm": 0.5802143216133118, "learning_rate": 2.3459919438309244e-05, "loss": 0.0722, "step": 18633 }, { "epoch": 0.3299995129877317, "grad_norm": 1.1655941009521484, "learning_rate": 2.3459208953843618e-05, "loss": 0.1272, "step": 18634 }, { "epoch": 0.3300172225247601, "grad_norm": 1.032857894897461, "learning_rate": 2.345849844154805e-05, "loss": 0.0727, "step": 18635 }, { "epoch": 0.3300349320617886, "grad_norm": 0.5466781854629517, "learning_rate": 2.3457787901424885e-05, "loss": 0.065, "step": 18636 }, { "epoch": 0.330052641598817, "grad_norm": 1.0125834941864014, "learning_rate": 2.3457077333476444e-05, "loss": 0.0959, "step": 18637 }, { "epoch": 0.33007035113584543, "grad_norm": 0.5173278450965881, "learning_rate": 2.3456366737705078e-05, "loss": 0.0363, "step": 18638 }, { "epoch": 0.3300880606728739, "grad_norm": 0.905970573425293, "learning_rate": 2.345565611411312e-05, "loss": 0.0929, "step": 18639 }, { "epoch": 0.3301057702099023, "grad_norm": 0.7835878729820251, "learning_rate": 2.345494546270291e-05, "loss": 0.0711, "step": 18640 }, { "epoch": 0.33012347974693074, "grad_norm": 1.0730998516082764, "learning_rate": 2.3454234783476785e-05, "loss": 0.1167, "step": 18641 }, { "epoch": 0.33014118928395914, "grad_norm": 0.7435671091079712, "learning_rate": 2.3453524076437084e-05, "loss": 0.0733, "step": 18642 }, { "epoch": 0.3301588988209876, "grad_norm": 0.8104825019836426, "learning_rate": 2.345281334158614e-05, "loss": 0.0694, "step": 18643 }, { "epoch": 0.330176608358016, "grad_norm": 0.86090087890625, "learning_rate": 2.3452102578926295e-05, "loss": 0.1137, "step": 18644 }, { "epoch": 0.33019431789504444, "grad_norm": 0.7399511933326721, "learning_rate": 2.345139178845989e-05, "loss": 0.0918, "step": 18645 }, { "epoch": 0.33021202743207284, "grad_norm": 0.8890818357467651, "learning_rate": 2.345068097018926e-05, "loss": 0.0883, "step": 18646 }, { "epoch": 0.3302297369691013, "grad_norm": 0.8322796821594238, "learning_rate": 2.3449970124116743e-05, "loss": 0.088, "step": 18647 }, { "epoch": 0.3302474465061297, "grad_norm": 0.7479783892631531, "learning_rate": 2.344925925024468e-05, "loss": 0.1193, "step": 18648 }, { "epoch": 0.33026515604315815, "grad_norm": 1.082133412361145, "learning_rate": 2.344854834857541e-05, "loss": 0.088, "step": 18649 }, { "epoch": 0.33028286558018655, "grad_norm": 0.8784544467926025, "learning_rate": 2.344783741911127e-05, "loss": 0.0829, "step": 18650 }, { "epoch": 0.330300575117215, "grad_norm": 0.6559897065162659, "learning_rate": 2.3447126461854593e-05, "loss": 0.0996, "step": 18651 }, { "epoch": 0.3303182846542434, "grad_norm": 1.0437909364700317, "learning_rate": 2.3446415476807735e-05, "loss": 0.0973, "step": 18652 }, { "epoch": 0.33033599419127185, "grad_norm": 0.4188101589679718, "learning_rate": 2.3445704463973016e-05, "loss": 0.0548, "step": 18653 }, { "epoch": 0.3303537037283003, "grad_norm": 0.7580372095108032, "learning_rate": 2.3444993423352787e-05, "loss": 0.1243, "step": 18654 }, { "epoch": 0.3303714132653287, "grad_norm": 1.271469235420227, "learning_rate": 2.3444282354949386e-05, "loss": 0.1393, "step": 18655 }, { "epoch": 0.33038912280235716, "grad_norm": 0.9674668908119202, "learning_rate": 2.3443571258765144e-05, "loss": 0.0914, "step": 18656 }, { "epoch": 0.33040683233938556, "grad_norm": 0.6766440868377686, "learning_rate": 2.3442860134802413e-05, "loss": 0.0893, "step": 18657 }, { "epoch": 0.330424541876414, "grad_norm": 0.4036797881126404, "learning_rate": 2.3442148983063523e-05, "loss": 0.0581, "step": 18658 }, { "epoch": 0.3304422514134424, "grad_norm": 0.6078481078147888, "learning_rate": 2.3441437803550816e-05, "loss": 0.0586, "step": 18659 }, { "epoch": 0.33045996095047087, "grad_norm": 0.9200885891914368, "learning_rate": 2.344072659626663e-05, "loss": 0.0749, "step": 18660 }, { "epoch": 0.33047767048749926, "grad_norm": 0.7488124370574951, "learning_rate": 2.344001536121331e-05, "loss": 0.0785, "step": 18661 }, { "epoch": 0.3304953800245277, "grad_norm": 1.1581135988235474, "learning_rate": 2.3439304098393196e-05, "loss": 0.0902, "step": 18662 }, { "epoch": 0.3305130895615561, "grad_norm": 0.347260445356369, "learning_rate": 2.3438592807808624e-05, "loss": 0.0587, "step": 18663 }, { "epoch": 0.33053079909858457, "grad_norm": 0.8733806014060974, "learning_rate": 2.3437881489461935e-05, "loss": 0.1188, "step": 18664 }, { "epoch": 0.33054850863561297, "grad_norm": 0.6631778478622437, "learning_rate": 2.343717014335547e-05, "loss": 0.0681, "step": 18665 }, { "epoch": 0.3305662181726414, "grad_norm": 0.9561883211135864, "learning_rate": 2.3436458769491564e-05, "loss": 0.0921, "step": 18666 }, { "epoch": 0.3305839277096698, "grad_norm": 1.051480770111084, "learning_rate": 2.3435747367872564e-05, "loss": 0.0973, "step": 18667 }, { "epoch": 0.3306016372466983, "grad_norm": 0.6159414649009705, "learning_rate": 2.343503593850081e-05, "loss": 0.0848, "step": 18668 }, { "epoch": 0.33061934678372673, "grad_norm": 0.43149319291114807, "learning_rate": 2.343432448137864e-05, "loss": 0.071, "step": 18669 }, { "epoch": 0.33063705632075513, "grad_norm": 0.8733137845993042, "learning_rate": 2.34336129965084e-05, "loss": 0.075, "step": 18670 }, { "epoch": 0.3306547658577836, "grad_norm": 1.180790662765503, "learning_rate": 2.343290148389242e-05, "loss": 0.0766, "step": 18671 }, { "epoch": 0.330672475394812, "grad_norm": 0.4815995395183563, "learning_rate": 2.343218994353305e-05, "loss": 0.0813, "step": 18672 }, { "epoch": 0.33069018493184044, "grad_norm": 0.8160610198974609, "learning_rate": 2.3431478375432623e-05, "loss": 0.0657, "step": 18673 }, { "epoch": 0.33070789446886883, "grad_norm": 0.819365918636322, "learning_rate": 2.3430766779593488e-05, "loss": 0.1, "step": 18674 }, { "epoch": 0.3307256040058973, "grad_norm": 1.0226616859436035, "learning_rate": 2.3430055156017985e-05, "loss": 0.1024, "step": 18675 }, { "epoch": 0.3307433135429257, "grad_norm": 0.48976385593414307, "learning_rate": 2.3429343504708448e-05, "loss": 0.0575, "step": 18676 }, { "epoch": 0.33076102307995414, "grad_norm": 0.8563974499702454, "learning_rate": 2.3428631825667232e-05, "loss": 0.1097, "step": 18677 }, { "epoch": 0.33077873261698254, "grad_norm": 0.3694077134132385, "learning_rate": 2.3427920118896667e-05, "loss": 0.0689, "step": 18678 }, { "epoch": 0.330796442154011, "grad_norm": 0.9166705012321472, "learning_rate": 2.3427208384399096e-05, "loss": 0.1139, "step": 18679 }, { "epoch": 0.3308141516910394, "grad_norm": 0.7027378678321838, "learning_rate": 2.342649662217686e-05, "loss": 0.1022, "step": 18680 }, { "epoch": 0.33083186122806785, "grad_norm": 0.5966646075248718, "learning_rate": 2.34257848322323e-05, "loss": 0.0624, "step": 18681 }, { "epoch": 0.33084957076509625, "grad_norm": 1.0880411863327026, "learning_rate": 2.3425073014567767e-05, "loss": 0.1044, "step": 18682 }, { "epoch": 0.3308672803021247, "grad_norm": 0.9726709127426147, "learning_rate": 2.342436116918559e-05, "loss": 0.0709, "step": 18683 }, { "epoch": 0.33088498983915315, "grad_norm": 0.7552331686019897, "learning_rate": 2.342364929608812e-05, "loss": 0.0925, "step": 18684 }, { "epoch": 0.33090269937618155, "grad_norm": 0.7586398124694824, "learning_rate": 2.34229373952777e-05, "loss": 0.0524, "step": 18685 }, { "epoch": 0.33092040891321, "grad_norm": 1.0160245895385742, "learning_rate": 2.342222546675666e-05, "loss": 0.1075, "step": 18686 }, { "epoch": 0.3309381184502384, "grad_norm": 1.2318437099456787, "learning_rate": 2.3421513510527352e-05, "loss": 0.1062, "step": 18687 }, { "epoch": 0.33095582798726686, "grad_norm": 0.7725245356559753, "learning_rate": 2.3420801526592117e-05, "loss": 0.0899, "step": 18688 }, { "epoch": 0.33097353752429526, "grad_norm": 0.515590488910675, "learning_rate": 2.3420089514953304e-05, "loss": 0.1085, "step": 18689 }, { "epoch": 0.3309912470613237, "grad_norm": 0.7033461332321167, "learning_rate": 2.3419377475613237e-05, "loss": 0.0702, "step": 18690 }, { "epoch": 0.3310089565983521, "grad_norm": 1.530142903327942, "learning_rate": 2.3418665408574275e-05, "loss": 0.0849, "step": 18691 }, { "epoch": 0.33102666613538057, "grad_norm": 0.8948557376861572, "learning_rate": 2.3417953313838754e-05, "loss": 0.1037, "step": 18692 }, { "epoch": 0.33104437567240896, "grad_norm": 0.647442638874054, "learning_rate": 2.3417241191409018e-05, "loss": 0.0795, "step": 18693 }, { "epoch": 0.3310620852094374, "grad_norm": 0.807756245136261, "learning_rate": 2.3416529041287413e-05, "loss": 0.1062, "step": 18694 }, { "epoch": 0.3310797947464658, "grad_norm": 0.9269899725914001, "learning_rate": 2.3415816863476273e-05, "loss": 0.094, "step": 18695 }, { "epoch": 0.33109750428349427, "grad_norm": 0.8893685936927795, "learning_rate": 2.341510465797795e-05, "loss": 0.0758, "step": 18696 }, { "epoch": 0.3311152138205227, "grad_norm": 0.48488107323646545, "learning_rate": 2.3414392424794787e-05, "loss": 0.1075, "step": 18697 }, { "epoch": 0.3311329233575511, "grad_norm": 0.6261841654777527, "learning_rate": 2.341368016392912e-05, "loss": 0.0836, "step": 18698 }, { "epoch": 0.3311506328945796, "grad_norm": 0.9707759618759155, "learning_rate": 2.3412967875383293e-05, "loss": 0.0916, "step": 18699 }, { "epoch": 0.331168342431608, "grad_norm": 0.6270633935928345, "learning_rate": 2.3412255559159662e-05, "loss": 0.1018, "step": 18700 }, { "epoch": 0.33118605196863643, "grad_norm": 1.0269556045532227, "learning_rate": 2.3411543215260554e-05, "loss": 0.0962, "step": 18701 }, { "epoch": 0.33120376150566483, "grad_norm": 0.4105364680290222, "learning_rate": 2.3410830843688317e-05, "loss": 0.0485, "step": 18702 }, { "epoch": 0.3312214710426933, "grad_norm": 0.9069678783416748, "learning_rate": 2.3410118444445307e-05, "loss": 0.1244, "step": 18703 }, { "epoch": 0.3312391805797217, "grad_norm": 0.638936460018158, "learning_rate": 2.340940601753385e-05, "loss": 0.0714, "step": 18704 }, { "epoch": 0.33125689011675014, "grad_norm": 0.6559277772903442, "learning_rate": 2.34086935629563e-05, "loss": 0.0941, "step": 18705 }, { "epoch": 0.33127459965377853, "grad_norm": 1.1505132913589478, "learning_rate": 2.3407981080714998e-05, "loss": 0.0648, "step": 18706 }, { "epoch": 0.331292309190807, "grad_norm": 1.1918832063674927, "learning_rate": 2.340726857081229e-05, "loss": 0.0977, "step": 18707 }, { "epoch": 0.3313100187278354, "grad_norm": 2.2235186100006104, "learning_rate": 2.3406556033250516e-05, "loss": 0.1298, "step": 18708 }, { "epoch": 0.33132772826486384, "grad_norm": 0.9523314833641052, "learning_rate": 2.3405843468032026e-05, "loss": 0.1226, "step": 18709 }, { "epoch": 0.33134543780189224, "grad_norm": 0.8573805093765259, "learning_rate": 2.3405130875159163e-05, "loss": 0.1119, "step": 18710 }, { "epoch": 0.3313631473389207, "grad_norm": 0.8800356388092041, "learning_rate": 2.3404418254634267e-05, "loss": 0.0664, "step": 18711 }, { "epoch": 0.33138085687594915, "grad_norm": 0.7797994613647461, "learning_rate": 2.340370560645968e-05, "loss": 0.1437, "step": 18712 }, { "epoch": 0.33139856641297755, "grad_norm": 0.9739986062049866, "learning_rate": 2.340299293063776e-05, "loss": 0.103, "step": 18713 }, { "epoch": 0.331416275950006, "grad_norm": 1.0516008138656616, "learning_rate": 2.3402280227170838e-05, "loss": 0.1194, "step": 18714 }, { "epoch": 0.3314339854870344, "grad_norm": 0.43340784311294556, "learning_rate": 2.3401567496061264e-05, "loss": 0.0937, "step": 18715 }, { "epoch": 0.33145169502406285, "grad_norm": 0.742603600025177, "learning_rate": 2.3400854737311388e-05, "loss": 0.092, "step": 18716 }, { "epoch": 0.33146940456109125, "grad_norm": 0.9431103467941284, "learning_rate": 2.3400141950923543e-05, "loss": 0.0816, "step": 18717 }, { "epoch": 0.3314871140981197, "grad_norm": 0.8780677318572998, "learning_rate": 2.3399429136900085e-05, "loss": 0.0898, "step": 18718 }, { "epoch": 0.3315048236351481, "grad_norm": 0.959484875202179, "learning_rate": 2.3398716295243353e-05, "loss": 0.0903, "step": 18719 }, { "epoch": 0.33152253317217656, "grad_norm": 1.6405091285705566, "learning_rate": 2.3398003425955695e-05, "loss": 0.0929, "step": 18720 }, { "epoch": 0.33154024270920496, "grad_norm": 0.7546898126602173, "learning_rate": 2.3397290529039455e-05, "loss": 0.0671, "step": 18721 }, { "epoch": 0.3315579522462334, "grad_norm": 0.6776033639907837, "learning_rate": 2.3396577604496975e-05, "loss": 0.0848, "step": 18722 }, { "epoch": 0.3315756617832618, "grad_norm": 0.8137174248695374, "learning_rate": 2.339586465233061e-05, "loss": 0.0775, "step": 18723 }, { "epoch": 0.33159337132029026, "grad_norm": 0.7059513926506042, "learning_rate": 2.3395151672542695e-05, "loss": 0.0996, "step": 18724 }, { "epoch": 0.33161108085731866, "grad_norm": 0.7375737428665161, "learning_rate": 2.3394438665135578e-05, "loss": 0.0567, "step": 18725 }, { "epoch": 0.3316287903943471, "grad_norm": 0.8130665421485901, "learning_rate": 2.3393725630111612e-05, "loss": 0.0919, "step": 18726 }, { "epoch": 0.33164649993137557, "grad_norm": 0.6102741956710815, "learning_rate": 2.3393012567473136e-05, "loss": 0.1091, "step": 18727 }, { "epoch": 0.33166420946840397, "grad_norm": 0.5624499917030334, "learning_rate": 2.3392299477222496e-05, "loss": 0.0675, "step": 18728 }, { "epoch": 0.3316819190054324, "grad_norm": 0.3493659794330597, "learning_rate": 2.3391586359362042e-05, "loss": 0.1529, "step": 18729 }, { "epoch": 0.3316996285424608, "grad_norm": 0.6593996286392212, "learning_rate": 2.3390873213894115e-05, "loss": 0.0845, "step": 18730 }, { "epoch": 0.3317173380794893, "grad_norm": 0.9675592184066772, "learning_rate": 2.3390160040821065e-05, "loss": 0.0708, "step": 18731 }, { "epoch": 0.3317350476165177, "grad_norm": 0.7173416018486023, "learning_rate": 2.3389446840145237e-05, "loss": 0.1063, "step": 18732 }, { "epoch": 0.33175275715354613, "grad_norm": 0.4448798596858978, "learning_rate": 2.338873361186898e-05, "loss": 0.1013, "step": 18733 }, { "epoch": 0.33177046669057453, "grad_norm": 0.7146708369255066, "learning_rate": 2.3388020355994634e-05, "loss": 0.1008, "step": 18734 }, { "epoch": 0.331788176227603, "grad_norm": 0.3690592646598816, "learning_rate": 2.338730707252455e-05, "loss": 0.053, "step": 18735 }, { "epoch": 0.3318058857646314, "grad_norm": 0.5394780039787292, "learning_rate": 2.3386593761461076e-05, "loss": 0.0772, "step": 18736 }, { "epoch": 0.33182359530165983, "grad_norm": 1.0605511665344238, "learning_rate": 2.3385880422806556e-05, "loss": 0.083, "step": 18737 }, { "epoch": 0.33184130483868823, "grad_norm": 1.0829153060913086, "learning_rate": 2.3385167056563336e-05, "loss": 0.0796, "step": 18738 }, { "epoch": 0.3318590143757167, "grad_norm": 0.3849100172519684, "learning_rate": 2.3384453662733768e-05, "loss": 0.083, "step": 18739 }, { "epoch": 0.3318767239127451, "grad_norm": 0.8314078450202942, "learning_rate": 2.3383740241320192e-05, "loss": 0.1009, "step": 18740 }, { "epoch": 0.33189443344977354, "grad_norm": 0.8260047435760498, "learning_rate": 2.3383026792324964e-05, "loss": 0.0641, "step": 18741 }, { "epoch": 0.331912142986802, "grad_norm": 0.7532780170440674, "learning_rate": 2.338231331575042e-05, "loss": 0.1275, "step": 18742 }, { "epoch": 0.3319298525238304, "grad_norm": 0.7626235485076904, "learning_rate": 2.3381599811598913e-05, "loss": 0.0598, "step": 18743 }, { "epoch": 0.33194756206085885, "grad_norm": 0.6486347317695618, "learning_rate": 2.3380886279872795e-05, "loss": 0.0788, "step": 18744 }, { "epoch": 0.33196527159788725, "grad_norm": 0.633288562297821, "learning_rate": 2.33801727205744e-05, "loss": 0.0576, "step": 18745 }, { "epoch": 0.3319829811349157, "grad_norm": 0.7366352081298828, "learning_rate": 2.3379459133706095e-05, "loss": 0.1198, "step": 18746 }, { "epoch": 0.3320006906719441, "grad_norm": 0.782281219959259, "learning_rate": 2.3378745519270208e-05, "loss": 0.0914, "step": 18747 }, { "epoch": 0.33201840020897255, "grad_norm": 0.9028252959251404, "learning_rate": 2.3378031877269103e-05, "loss": 0.0967, "step": 18748 }, { "epoch": 0.33203610974600095, "grad_norm": 0.5978951454162598, "learning_rate": 2.3377318207705116e-05, "loss": 0.0767, "step": 18749 }, { "epoch": 0.3320538192830294, "grad_norm": 0.9436225295066833, "learning_rate": 2.3376604510580605e-05, "loss": 0.0633, "step": 18750 }, { "epoch": 0.3320715288200578, "grad_norm": 0.8021388649940491, "learning_rate": 2.3375890785897907e-05, "loss": 0.1015, "step": 18751 }, { "epoch": 0.33208923835708626, "grad_norm": 0.5081961154937744, "learning_rate": 2.3375177033659375e-05, "loss": 0.0957, "step": 18752 }, { "epoch": 0.33210694789411466, "grad_norm": 0.5213006734848022, "learning_rate": 2.337446325386736e-05, "loss": 0.0406, "step": 18753 }, { "epoch": 0.3321246574311431, "grad_norm": 1.0951809883117676, "learning_rate": 2.3373749446524203e-05, "loss": 0.1066, "step": 18754 }, { "epoch": 0.3321423669681715, "grad_norm": 0.7381962537765503, "learning_rate": 2.3373035611632266e-05, "loss": 0.0839, "step": 18755 }, { "epoch": 0.33216007650519996, "grad_norm": 0.6472240090370178, "learning_rate": 2.3372321749193883e-05, "loss": 0.0916, "step": 18756 }, { "epoch": 0.3321777860422284, "grad_norm": 0.7842134237289429, "learning_rate": 2.337160785921141e-05, "loss": 0.0669, "step": 18757 }, { "epoch": 0.3321954955792568, "grad_norm": 0.47867727279663086, "learning_rate": 2.3370893941687193e-05, "loss": 0.1038, "step": 18758 }, { "epoch": 0.33221320511628527, "grad_norm": 0.6455080509185791, "learning_rate": 2.3370179996623585e-05, "loss": 0.0515, "step": 18759 }, { "epoch": 0.33223091465331367, "grad_norm": 0.7622125148773193, "learning_rate": 2.3369466024022926e-05, "loss": 0.0725, "step": 18760 }, { "epoch": 0.3322486241903421, "grad_norm": 1.1244425773620605, "learning_rate": 2.3368752023887574e-05, "loss": 0.1062, "step": 18761 }, { "epoch": 0.3322663337273705, "grad_norm": 0.544819712638855, "learning_rate": 2.3368037996219873e-05, "loss": 0.0721, "step": 18762 }, { "epoch": 0.332284043264399, "grad_norm": 0.4513721168041229, "learning_rate": 2.336732394102217e-05, "loss": 0.0812, "step": 18763 }, { "epoch": 0.3323017528014274, "grad_norm": 0.682381272315979, "learning_rate": 2.336660985829682e-05, "loss": 0.0875, "step": 18764 }, { "epoch": 0.33231946233845583, "grad_norm": 0.42291662096977234, "learning_rate": 2.3365895748046176e-05, "loss": 0.0877, "step": 18765 }, { "epoch": 0.3323371718754842, "grad_norm": 0.573954164981842, "learning_rate": 2.3365181610272574e-05, "loss": 0.0854, "step": 18766 }, { "epoch": 0.3323548814125127, "grad_norm": 0.7595160007476807, "learning_rate": 2.3364467444978374e-05, "loss": 0.1134, "step": 18767 }, { "epoch": 0.3323725909495411, "grad_norm": 0.7404164671897888, "learning_rate": 2.336375325216592e-05, "loss": 0.0632, "step": 18768 }, { "epoch": 0.33239030048656953, "grad_norm": 1.0481878519058228, "learning_rate": 2.3363039031837563e-05, "loss": 0.0811, "step": 18769 }, { "epoch": 0.33240801002359793, "grad_norm": 0.8355897068977356, "learning_rate": 2.3362324783995658e-05, "loss": 0.0935, "step": 18770 }, { "epoch": 0.3324257195606264, "grad_norm": 0.7684923410415649, "learning_rate": 2.3361610508642547e-05, "loss": 0.0758, "step": 18771 }, { "epoch": 0.33244342909765484, "grad_norm": 0.5049031376838684, "learning_rate": 2.336089620578058e-05, "loss": 0.0724, "step": 18772 }, { "epoch": 0.33246113863468324, "grad_norm": 0.528478741645813, "learning_rate": 2.3360181875412117e-05, "loss": 0.0702, "step": 18773 }, { "epoch": 0.3324788481717117, "grad_norm": 0.7965747117996216, "learning_rate": 2.3359467517539494e-05, "loss": 0.117, "step": 18774 }, { "epoch": 0.3324965577087401, "grad_norm": 0.75812828540802, "learning_rate": 2.335875313216507e-05, "loss": 0.1053, "step": 18775 }, { "epoch": 0.33251426724576855, "grad_norm": 0.8520965576171875, "learning_rate": 2.33580387192912e-05, "loss": 0.0865, "step": 18776 }, { "epoch": 0.33253197678279695, "grad_norm": 0.5771017074584961, "learning_rate": 2.335732427892022e-05, "loss": 0.0654, "step": 18777 }, { "epoch": 0.3325496863198254, "grad_norm": 0.7609277963638306, "learning_rate": 2.335660981105449e-05, "loss": 0.0891, "step": 18778 }, { "epoch": 0.3325673958568538, "grad_norm": 0.5064910650253296, "learning_rate": 2.335589531569636e-05, "loss": 0.0505, "step": 18779 }, { "epoch": 0.33258510539388225, "grad_norm": 0.7449690699577332, "learning_rate": 2.3355180792848187e-05, "loss": 0.0481, "step": 18780 }, { "epoch": 0.33260281493091065, "grad_norm": 0.8810113668441772, "learning_rate": 2.3354466242512304e-05, "loss": 0.1088, "step": 18781 }, { "epoch": 0.3326205244679391, "grad_norm": 0.7866032719612122, "learning_rate": 2.3353751664691076e-05, "loss": 0.1244, "step": 18782 }, { "epoch": 0.3326382340049675, "grad_norm": 0.5975719690322876, "learning_rate": 2.335303705938685e-05, "loss": 0.0698, "step": 18783 }, { "epoch": 0.33265594354199596, "grad_norm": 0.47690480947494507, "learning_rate": 2.3352322426601978e-05, "loss": 0.0908, "step": 18784 }, { "epoch": 0.33267365307902436, "grad_norm": 0.840915858745575, "learning_rate": 2.335160776633881e-05, "loss": 0.0992, "step": 18785 }, { "epoch": 0.3326913626160528, "grad_norm": 1.0593557357788086, "learning_rate": 2.3350893078599692e-05, "loss": 0.1213, "step": 18786 }, { "epoch": 0.33270907215308126, "grad_norm": 0.698976993560791, "learning_rate": 2.3350178363386986e-05, "loss": 0.083, "step": 18787 }, { "epoch": 0.33272678169010966, "grad_norm": 0.9443967342376709, "learning_rate": 2.3349463620703036e-05, "loss": 0.116, "step": 18788 }, { "epoch": 0.3327444912271381, "grad_norm": 0.953449010848999, "learning_rate": 2.3348748850550198e-05, "loss": 0.0972, "step": 18789 }, { "epoch": 0.3327622007641665, "grad_norm": 0.6518909335136414, "learning_rate": 2.3348034052930815e-05, "loss": 0.113, "step": 18790 }, { "epoch": 0.33277991030119497, "grad_norm": 0.7186135053634644, "learning_rate": 2.3347319227847246e-05, "loss": 0.0767, "step": 18791 }, { "epoch": 0.33279761983822337, "grad_norm": 0.5292288661003113, "learning_rate": 2.3346604375301846e-05, "loss": 0.0647, "step": 18792 }, { "epoch": 0.3328153293752518, "grad_norm": 0.8895583748817444, "learning_rate": 2.3345889495296955e-05, "loss": 0.0667, "step": 18793 }, { "epoch": 0.3328330389122802, "grad_norm": 0.7390729784965515, "learning_rate": 2.3345174587834936e-05, "loss": 0.0732, "step": 18794 }, { "epoch": 0.3328507484493087, "grad_norm": 0.647530734539032, "learning_rate": 2.3344459652918137e-05, "loss": 0.1295, "step": 18795 }, { "epoch": 0.3328684579863371, "grad_norm": 0.7357009053230286, "learning_rate": 2.3343744690548908e-05, "loss": 0.0617, "step": 18796 }, { "epoch": 0.33288616752336553, "grad_norm": 1.5255810022354126, "learning_rate": 2.3343029700729604e-05, "loss": 0.0905, "step": 18797 }, { "epoch": 0.3329038770603939, "grad_norm": 0.2869115471839905, "learning_rate": 2.334231468346257e-05, "loss": 0.0583, "step": 18798 }, { "epoch": 0.3329215865974224, "grad_norm": 0.7601824998855591, "learning_rate": 2.3341599638750176e-05, "loss": 0.0991, "step": 18799 }, { "epoch": 0.3329392961344508, "grad_norm": 0.6497971415519714, "learning_rate": 2.3340884566594753e-05, "loss": 0.1034, "step": 18800 }, { "epoch": 0.33295700567147923, "grad_norm": 0.9762676954269409, "learning_rate": 2.334016946699867e-05, "loss": 0.072, "step": 18801 }, { "epoch": 0.3329747152085077, "grad_norm": 0.7436665892601013, "learning_rate": 2.3339454339964266e-05, "loss": 0.1, "step": 18802 }, { "epoch": 0.3329924247455361, "grad_norm": 0.6622462868690491, "learning_rate": 2.3338739185493907e-05, "loss": 0.1414, "step": 18803 }, { "epoch": 0.33301013428256454, "grad_norm": 0.8276193737983704, "learning_rate": 2.3338024003589937e-05, "loss": 0.1098, "step": 18804 }, { "epoch": 0.33302784381959294, "grad_norm": 0.7889201045036316, "learning_rate": 2.3337308794254707e-05, "loss": 0.07, "step": 18805 }, { "epoch": 0.3330455533566214, "grad_norm": 0.7290891408920288, "learning_rate": 2.3336593557490584e-05, "loss": 0.0953, "step": 18806 }, { "epoch": 0.3330632628936498, "grad_norm": 0.9206566214561462, "learning_rate": 2.3335878293299904e-05, "loss": 0.0628, "step": 18807 }, { "epoch": 0.33308097243067825, "grad_norm": 0.7410904765129089, "learning_rate": 2.333516300168503e-05, "loss": 0.0647, "step": 18808 }, { "epoch": 0.33309868196770664, "grad_norm": 0.569965660572052, "learning_rate": 2.3334447682648314e-05, "loss": 0.0978, "step": 18809 }, { "epoch": 0.3331163915047351, "grad_norm": 0.6651450395584106, "learning_rate": 2.3333732336192104e-05, "loss": 0.0941, "step": 18810 }, { "epoch": 0.3331341010417635, "grad_norm": 0.5419864058494568, "learning_rate": 2.3333016962318762e-05, "loss": 0.0732, "step": 18811 }, { "epoch": 0.33315181057879195, "grad_norm": 0.8241457343101501, "learning_rate": 2.3332301561030636e-05, "loss": 0.1324, "step": 18812 }, { "epoch": 0.33316952011582035, "grad_norm": 1.0504807233810425, "learning_rate": 2.333158613233008e-05, "loss": 0.0696, "step": 18813 }, { "epoch": 0.3331872296528488, "grad_norm": 0.8377729058265686, "learning_rate": 2.3330870676219445e-05, "loss": 0.0998, "step": 18814 }, { "epoch": 0.3332049391898772, "grad_norm": 0.9260665774345398, "learning_rate": 2.3330155192701092e-05, "loss": 0.1035, "step": 18815 }, { "epoch": 0.33322264872690566, "grad_norm": 0.5050250291824341, "learning_rate": 2.332943968177737e-05, "loss": 0.0791, "step": 18816 }, { "epoch": 0.3332403582639341, "grad_norm": 0.3739300072193146, "learning_rate": 2.3328724143450635e-05, "loss": 0.0693, "step": 18817 }, { "epoch": 0.3332580678009625, "grad_norm": 0.9018915295600891, "learning_rate": 2.3328008577723243e-05, "loss": 0.0822, "step": 18818 }, { "epoch": 0.33327577733799096, "grad_norm": 0.697141170501709, "learning_rate": 2.332729298459754e-05, "loss": 0.102, "step": 18819 }, { "epoch": 0.33329348687501936, "grad_norm": 0.6775583624839783, "learning_rate": 2.3326577364075884e-05, "loss": 0.0532, "step": 18820 }, { "epoch": 0.3333111964120478, "grad_norm": 0.6661968231201172, "learning_rate": 2.3325861716160637e-05, "loss": 0.0877, "step": 18821 }, { "epoch": 0.3333289059490762, "grad_norm": 0.5987954139709473, "learning_rate": 2.332514604085414e-05, "loss": 0.0839, "step": 18822 }, { "epoch": 0.33334661548610467, "grad_norm": 0.8030975461006165, "learning_rate": 2.3324430338158756e-05, "loss": 0.0601, "step": 18823 }, { "epoch": 0.33336432502313307, "grad_norm": 0.9301969408988953, "learning_rate": 2.332371460807684e-05, "loss": 0.1059, "step": 18824 }, { "epoch": 0.3333820345601615, "grad_norm": 0.880368173122406, "learning_rate": 2.3322998850610745e-05, "loss": 0.0753, "step": 18825 }, { "epoch": 0.3333997440971899, "grad_norm": 1.1137768030166626, "learning_rate": 2.3322283065762827e-05, "loss": 0.0911, "step": 18826 }, { "epoch": 0.3334174536342184, "grad_norm": 0.8347527384757996, "learning_rate": 2.3321567253535437e-05, "loss": 0.1085, "step": 18827 }, { "epoch": 0.3334351631712468, "grad_norm": 0.5894887447357178, "learning_rate": 2.3320851413930935e-05, "loss": 0.1006, "step": 18828 }, { "epoch": 0.3334528727082752, "grad_norm": 0.7723181843757629, "learning_rate": 2.3320135546951674e-05, "loss": 0.108, "step": 18829 }, { "epoch": 0.3334705822453036, "grad_norm": 1.102820634841919, "learning_rate": 2.3319419652600005e-05, "loss": 0.1263, "step": 18830 }, { "epoch": 0.3334882917823321, "grad_norm": 0.7809327840805054, "learning_rate": 2.3318703730878288e-05, "loss": 0.1041, "step": 18831 }, { "epoch": 0.33350600131936053, "grad_norm": 0.4874153435230255, "learning_rate": 2.3317987781788873e-05, "loss": 0.0774, "step": 18832 }, { "epoch": 0.33352371085638893, "grad_norm": 0.6307083964347839, "learning_rate": 2.3317271805334127e-05, "loss": 0.0745, "step": 18833 }, { "epoch": 0.3335414203934174, "grad_norm": 1.2791872024536133, "learning_rate": 2.3316555801516395e-05, "loss": 0.0988, "step": 18834 }, { "epoch": 0.3335591299304458, "grad_norm": 0.8118139505386353, "learning_rate": 2.3315839770338035e-05, "loss": 0.0887, "step": 18835 }, { "epoch": 0.33357683946747424, "grad_norm": 1.8659476041793823, "learning_rate": 2.3315123711801402e-05, "loss": 0.0953, "step": 18836 }, { "epoch": 0.33359454900450264, "grad_norm": 0.6434708833694458, "learning_rate": 2.3314407625908858e-05, "loss": 0.0972, "step": 18837 }, { "epoch": 0.3336122585415311, "grad_norm": 0.5369188189506531, "learning_rate": 2.331369151266275e-05, "loss": 0.0985, "step": 18838 }, { "epoch": 0.3336299680785595, "grad_norm": 0.6580930948257446, "learning_rate": 2.3312975372065436e-05, "loss": 0.102, "step": 18839 }, { "epoch": 0.33364767761558795, "grad_norm": 1.3562871217727661, "learning_rate": 2.3312259204119275e-05, "loss": 0.1045, "step": 18840 }, { "epoch": 0.33366538715261634, "grad_norm": 0.975896954536438, "learning_rate": 2.3311543008826625e-05, "loss": 0.0922, "step": 18841 }, { "epoch": 0.3336830966896448, "grad_norm": 0.44175049662590027, "learning_rate": 2.3310826786189837e-05, "loss": 0.1033, "step": 18842 }, { "epoch": 0.3337008062266732, "grad_norm": 1.3455429077148438, "learning_rate": 2.331011053621127e-05, "loss": 0.0829, "step": 18843 }, { "epoch": 0.33371851576370165, "grad_norm": 0.5351725220680237, "learning_rate": 2.3309394258893277e-05, "loss": 0.0715, "step": 18844 }, { "epoch": 0.33373622530073005, "grad_norm": 1.28376042842865, "learning_rate": 2.3308677954238224e-05, "loss": 0.1111, "step": 18845 }, { "epoch": 0.3337539348377585, "grad_norm": 0.8973077535629272, "learning_rate": 2.3307961622248456e-05, "loss": 0.0794, "step": 18846 }, { "epoch": 0.33377164437478696, "grad_norm": 1.4613518714904785, "learning_rate": 2.3307245262926334e-05, "loss": 0.0809, "step": 18847 }, { "epoch": 0.33378935391181536, "grad_norm": 0.8651803731918335, "learning_rate": 2.3306528876274216e-05, "loss": 0.0881, "step": 18848 }, { "epoch": 0.3338070634488438, "grad_norm": 0.5762215256690979, "learning_rate": 2.330581246229446e-05, "loss": 0.0518, "step": 18849 }, { "epoch": 0.3338247729858722, "grad_norm": 0.9276733994483948, "learning_rate": 2.330509602098942e-05, "loss": 0.1008, "step": 18850 }, { "epoch": 0.33384248252290066, "grad_norm": 0.977685272693634, "learning_rate": 2.3304379552361453e-05, "loss": 0.1067, "step": 18851 }, { "epoch": 0.33386019205992906, "grad_norm": 0.8566457629203796, "learning_rate": 2.330366305641292e-05, "loss": 0.1059, "step": 18852 }, { "epoch": 0.3338779015969575, "grad_norm": 0.9456350207328796, "learning_rate": 2.3302946533146175e-05, "loss": 0.0893, "step": 18853 }, { "epoch": 0.3338956111339859, "grad_norm": 0.6320602893829346, "learning_rate": 2.3302229982563573e-05, "loss": 0.0915, "step": 18854 }, { "epoch": 0.33391332067101437, "grad_norm": 0.9860714077949524, "learning_rate": 2.3301513404667472e-05, "loss": 0.0805, "step": 18855 }, { "epoch": 0.33393103020804277, "grad_norm": 1.0092973709106445, "learning_rate": 2.3300796799460238e-05, "loss": 0.1188, "step": 18856 }, { "epoch": 0.3339487397450712, "grad_norm": 0.8809400200843811, "learning_rate": 2.330008016694422e-05, "loss": 0.1069, "step": 18857 }, { "epoch": 0.3339664492820996, "grad_norm": 1.0369315147399902, "learning_rate": 2.3299363507121774e-05, "loss": 0.1336, "step": 18858 }, { "epoch": 0.3339841588191281, "grad_norm": 0.4868258535861969, "learning_rate": 2.3298646819995263e-05, "loss": 0.0916, "step": 18859 }, { "epoch": 0.3340018683561565, "grad_norm": 0.5955396294593811, "learning_rate": 2.3297930105567046e-05, "loss": 0.0723, "step": 18860 }, { "epoch": 0.3340195778931849, "grad_norm": 0.8903903365135193, "learning_rate": 2.329721336383948e-05, "loss": 0.072, "step": 18861 }, { "epoch": 0.3340372874302134, "grad_norm": 1.0143312215805054, "learning_rate": 2.3296496594814917e-05, "loss": 0.0886, "step": 18862 }, { "epoch": 0.3340549969672418, "grad_norm": 0.7713420391082764, "learning_rate": 2.329577979849572e-05, "loss": 0.1124, "step": 18863 }, { "epoch": 0.33407270650427023, "grad_norm": 0.8676716089248657, "learning_rate": 2.3295062974884245e-05, "loss": 0.0857, "step": 18864 }, { "epoch": 0.33409041604129863, "grad_norm": 0.6249747276306152, "learning_rate": 2.3294346123982856e-05, "loss": 0.0776, "step": 18865 }, { "epoch": 0.3341081255783271, "grad_norm": 0.743854284286499, "learning_rate": 2.3293629245793904e-05, "loss": 0.0812, "step": 18866 }, { "epoch": 0.3341258351153555, "grad_norm": 0.6799715161323547, "learning_rate": 2.329291234031975e-05, "loss": 0.0835, "step": 18867 }, { "epoch": 0.33414354465238394, "grad_norm": 0.8748595118522644, "learning_rate": 2.3292195407562757e-05, "loss": 0.1047, "step": 18868 }, { "epoch": 0.33416125418941234, "grad_norm": 0.7584736347198486, "learning_rate": 2.3291478447525276e-05, "loss": 0.0884, "step": 18869 }, { "epoch": 0.3341789637264408, "grad_norm": 0.8730657696723938, "learning_rate": 2.3290761460209674e-05, "loss": 0.0765, "step": 18870 }, { "epoch": 0.3341966732634692, "grad_norm": 0.7648466229438782, "learning_rate": 2.32900444456183e-05, "loss": 0.0915, "step": 18871 }, { "epoch": 0.33421438280049764, "grad_norm": 0.8361303806304932, "learning_rate": 2.3289327403753523e-05, "loss": 0.0965, "step": 18872 }, { "epoch": 0.33423209233752604, "grad_norm": 0.71394282579422, "learning_rate": 2.3288610334617693e-05, "loss": 0.1126, "step": 18873 }, { "epoch": 0.3342498018745545, "grad_norm": 0.9422741532325745, "learning_rate": 2.3287893238213175e-05, "loss": 0.0991, "step": 18874 }, { "epoch": 0.3342675114115829, "grad_norm": 0.43032196164131165, "learning_rate": 2.3287176114542328e-05, "loss": 0.0547, "step": 18875 }, { "epoch": 0.33428522094861135, "grad_norm": 1.062524676322937, "learning_rate": 2.3286458963607506e-05, "loss": 0.0584, "step": 18876 }, { "epoch": 0.3343029304856398, "grad_norm": 0.5758786797523499, "learning_rate": 2.3285741785411075e-05, "loss": 0.0569, "step": 18877 }, { "epoch": 0.3343206400226682, "grad_norm": 0.8851287364959717, "learning_rate": 2.3285024579955392e-05, "loss": 0.0777, "step": 18878 }, { "epoch": 0.33433834955969666, "grad_norm": 0.7805832624435425, "learning_rate": 2.3284307347242816e-05, "loss": 0.0525, "step": 18879 }, { "epoch": 0.33435605909672506, "grad_norm": 0.6202988028526306, "learning_rate": 2.3283590087275704e-05, "loss": 0.0802, "step": 18880 }, { "epoch": 0.3343737686337535, "grad_norm": 0.7166119813919067, "learning_rate": 2.3282872800056422e-05, "loss": 0.1065, "step": 18881 }, { "epoch": 0.3343914781707819, "grad_norm": 0.8134261965751648, "learning_rate": 2.3282155485587325e-05, "loss": 0.0798, "step": 18882 }, { "epoch": 0.33440918770781036, "grad_norm": 0.5564413070678711, "learning_rate": 2.3281438143870777e-05, "loss": 0.0569, "step": 18883 }, { "epoch": 0.33442689724483876, "grad_norm": 0.6766692996025085, "learning_rate": 2.3280720774909132e-05, "loss": 0.085, "step": 18884 }, { "epoch": 0.3344446067818672, "grad_norm": 0.8870525360107422, "learning_rate": 2.328000337870475e-05, "loss": 0.0663, "step": 18885 }, { "epoch": 0.3344623163188956, "grad_norm": 0.7915005683898926, "learning_rate": 2.327928595526e-05, "loss": 0.0801, "step": 18886 }, { "epoch": 0.33448002585592407, "grad_norm": 0.7027496099472046, "learning_rate": 2.3278568504577232e-05, "loss": 0.0824, "step": 18887 }, { "epoch": 0.33449773539295247, "grad_norm": 0.8448268175125122, "learning_rate": 2.3277851026658816e-05, "loss": 0.0892, "step": 18888 }, { "epoch": 0.3345154449299809, "grad_norm": 1.0586730241775513, "learning_rate": 2.32771335215071e-05, "loss": 0.066, "step": 18889 }, { "epoch": 0.3345331544670093, "grad_norm": 0.3619387149810791, "learning_rate": 2.3276415989124463e-05, "loss": 0.0647, "step": 18890 }, { "epoch": 0.3345508640040378, "grad_norm": 0.8323861360549927, "learning_rate": 2.3275698429513243e-05, "loss": 0.0773, "step": 18891 }, { "epoch": 0.3345685735410662, "grad_norm": 0.655540943145752, "learning_rate": 2.3274980842675822e-05, "loss": 0.0698, "step": 18892 }, { "epoch": 0.3345862830780946, "grad_norm": 0.8115816116333008, "learning_rate": 2.3274263228614545e-05, "loss": 0.1037, "step": 18893 }, { "epoch": 0.3346039926151231, "grad_norm": 0.6839122772216797, "learning_rate": 2.3273545587331778e-05, "loss": 0.1082, "step": 18894 }, { "epoch": 0.3346217021521515, "grad_norm": 0.9985058307647705, "learning_rate": 2.3272827918829885e-05, "loss": 0.0729, "step": 18895 }, { "epoch": 0.33463941168917993, "grad_norm": 0.7956794500350952, "learning_rate": 2.3272110223111224e-05, "loss": 0.0935, "step": 18896 }, { "epoch": 0.33465712122620833, "grad_norm": 0.6996661424636841, "learning_rate": 2.327139250017816e-05, "loss": 0.0788, "step": 18897 }, { "epoch": 0.3346748307632368, "grad_norm": 0.9803223609924316, "learning_rate": 2.3270674750033048e-05, "loss": 0.1121, "step": 18898 }, { "epoch": 0.3346925403002652, "grad_norm": 0.6627029776573181, "learning_rate": 2.3269956972678252e-05, "loss": 0.0622, "step": 18899 }, { "epoch": 0.33471024983729364, "grad_norm": 1.0946295261383057, "learning_rate": 2.326923916811614e-05, "loss": 0.075, "step": 18900 }, { "epoch": 0.33472795937432204, "grad_norm": 0.6804167628288269, "learning_rate": 2.326852133634906e-05, "loss": 0.1034, "step": 18901 }, { "epoch": 0.3347456689113505, "grad_norm": 0.4788772761821747, "learning_rate": 2.3267803477379386e-05, "loss": 0.0843, "step": 18902 }, { "epoch": 0.3347633784483789, "grad_norm": 0.7004315257072449, "learning_rate": 2.326708559120947e-05, "loss": 0.0735, "step": 18903 }, { "epoch": 0.33478108798540734, "grad_norm": 1.0144048929214478, "learning_rate": 2.3266367677841685e-05, "loss": 0.13, "step": 18904 }, { "epoch": 0.33479879752243574, "grad_norm": 0.7403835654258728, "learning_rate": 2.3265649737278382e-05, "loss": 0.0653, "step": 18905 }, { "epoch": 0.3348165070594642, "grad_norm": 0.926667332649231, "learning_rate": 2.3264931769521927e-05, "loss": 0.0818, "step": 18906 }, { "epoch": 0.33483421659649265, "grad_norm": 0.8668155670166016, "learning_rate": 2.3264213774574685e-05, "loss": 0.0893, "step": 18907 }, { "epoch": 0.33485192613352105, "grad_norm": 1.1909894943237305, "learning_rate": 2.3263495752439016e-05, "loss": 0.1211, "step": 18908 }, { "epoch": 0.3348696356705495, "grad_norm": 0.8662069439888, "learning_rate": 2.3262777703117283e-05, "loss": 0.0904, "step": 18909 }, { "epoch": 0.3348873452075779, "grad_norm": 0.7430135607719421, "learning_rate": 2.3262059626611837e-05, "loss": 0.0522, "step": 18910 }, { "epoch": 0.33490505474460636, "grad_norm": 0.5850890278816223, "learning_rate": 2.3261341522925062e-05, "loss": 0.0928, "step": 18911 }, { "epoch": 0.33492276428163475, "grad_norm": 0.6966133117675781, "learning_rate": 2.3260623392059305e-05, "loss": 0.0766, "step": 18912 }, { "epoch": 0.3349404738186632, "grad_norm": 0.7778268456459045, "learning_rate": 2.3259905234016932e-05, "loss": 0.1076, "step": 18913 }, { "epoch": 0.3349581833556916, "grad_norm": 0.6537147760391235, "learning_rate": 2.3259187048800306e-05, "loss": 0.1174, "step": 18914 }, { "epoch": 0.33497589289272006, "grad_norm": 0.8804243803024292, "learning_rate": 2.325846883641179e-05, "loss": 0.0755, "step": 18915 }, { "epoch": 0.33499360242974846, "grad_norm": 1.2756223678588867, "learning_rate": 2.325775059685375e-05, "loss": 0.1096, "step": 18916 }, { "epoch": 0.3350113119667769, "grad_norm": 1.066866397857666, "learning_rate": 2.3257032330128542e-05, "loss": 0.0749, "step": 18917 }, { "epoch": 0.3350290215038053, "grad_norm": 0.8987789750099182, "learning_rate": 2.3256314036238537e-05, "loss": 0.0812, "step": 18918 }, { "epoch": 0.33504673104083377, "grad_norm": 0.9542714357376099, "learning_rate": 2.3255595715186087e-05, "loss": 0.0781, "step": 18919 }, { "epoch": 0.33506444057786217, "grad_norm": 0.95433109998703, "learning_rate": 2.3254877366973565e-05, "loss": 0.0986, "step": 18920 }, { "epoch": 0.3350821501148906, "grad_norm": 0.8563648462295532, "learning_rate": 2.3254158991603335e-05, "loss": 0.1231, "step": 18921 }, { "epoch": 0.3350998596519191, "grad_norm": 1.1607539653778076, "learning_rate": 2.3253440589077753e-05, "loss": 0.117, "step": 18922 }, { "epoch": 0.3351175691889475, "grad_norm": 0.7157827615737915, "learning_rate": 2.3252722159399192e-05, "loss": 0.0666, "step": 18923 }, { "epoch": 0.3351352787259759, "grad_norm": 0.718392014503479, "learning_rate": 2.3252003702570003e-05, "loss": 0.0776, "step": 18924 }, { "epoch": 0.3351529882630043, "grad_norm": 1.0133605003356934, "learning_rate": 2.3251285218592557e-05, "loss": 0.1064, "step": 18925 }, { "epoch": 0.3351706978000328, "grad_norm": 0.4699995219707489, "learning_rate": 2.325056670746922e-05, "loss": 0.0866, "step": 18926 }, { "epoch": 0.3351884073370612, "grad_norm": 0.8812299370765686, "learning_rate": 2.324984816920235e-05, "loss": 0.1073, "step": 18927 }, { "epoch": 0.33520611687408963, "grad_norm": 0.544198215007782, "learning_rate": 2.3249129603794314e-05, "loss": 0.0967, "step": 18928 }, { "epoch": 0.33522382641111803, "grad_norm": 0.5694849491119385, "learning_rate": 2.3248411011247482e-05, "loss": 0.0877, "step": 18929 }, { "epoch": 0.3352415359481465, "grad_norm": 0.48650234937667847, "learning_rate": 2.3247692391564205e-05, "loss": 0.0716, "step": 18930 }, { "epoch": 0.3352592454851749, "grad_norm": 0.8598876595497131, "learning_rate": 2.324697374474686e-05, "loss": 0.069, "step": 18931 }, { "epoch": 0.33527695502220334, "grad_norm": 0.6288015246391296, "learning_rate": 2.32462550707978e-05, "loss": 0.086, "step": 18932 }, { "epoch": 0.33529466455923174, "grad_norm": 1.9633772373199463, "learning_rate": 2.3245536369719397e-05, "loss": 0.1067, "step": 18933 }, { "epoch": 0.3353123740962602, "grad_norm": 0.5771732926368713, "learning_rate": 2.3244817641514014e-05, "loss": 0.0622, "step": 18934 }, { "epoch": 0.3353300836332886, "grad_norm": 1.0417660474777222, "learning_rate": 2.3244098886184014e-05, "loss": 0.1136, "step": 18935 }, { "epoch": 0.33534779317031704, "grad_norm": 0.9586049318313599, "learning_rate": 2.3243380103731765e-05, "loss": 0.0864, "step": 18936 }, { "epoch": 0.3353655027073455, "grad_norm": 0.9071679711341858, "learning_rate": 2.3242661294159628e-05, "loss": 0.0922, "step": 18937 }, { "epoch": 0.3353832122443739, "grad_norm": 0.7707067728042603, "learning_rate": 2.324194245746997e-05, "loss": 0.0722, "step": 18938 }, { "epoch": 0.33540092178140235, "grad_norm": 0.8346729278564453, "learning_rate": 2.3241223593665152e-05, "loss": 0.0975, "step": 18939 }, { "epoch": 0.33541863131843075, "grad_norm": 0.6258634328842163, "learning_rate": 2.324050470274754e-05, "loss": 0.0902, "step": 18940 }, { "epoch": 0.3354363408554592, "grad_norm": 0.9547888040542603, "learning_rate": 2.3239785784719508e-05, "loss": 0.1142, "step": 18941 }, { "epoch": 0.3354540503924876, "grad_norm": 2.4805972576141357, "learning_rate": 2.3239066839583407e-05, "loss": 0.0978, "step": 18942 }, { "epoch": 0.33547175992951606, "grad_norm": 0.9499503970146179, "learning_rate": 2.3238347867341615e-05, "loss": 0.1011, "step": 18943 }, { "epoch": 0.33548946946654445, "grad_norm": 0.45363765954971313, "learning_rate": 2.323762886799649e-05, "loss": 0.0603, "step": 18944 }, { "epoch": 0.3355071790035729, "grad_norm": 0.6194754838943481, "learning_rate": 2.32369098415504e-05, "loss": 0.0706, "step": 18945 }, { "epoch": 0.3355248885406013, "grad_norm": 1.246428370475769, "learning_rate": 2.3236190788005707e-05, "loss": 0.0897, "step": 18946 }, { "epoch": 0.33554259807762976, "grad_norm": 0.7265975475311279, "learning_rate": 2.3235471707364782e-05, "loss": 0.0679, "step": 18947 }, { "epoch": 0.33556030761465816, "grad_norm": 0.927815854549408, "learning_rate": 2.323475259962999e-05, "loss": 0.0894, "step": 18948 }, { "epoch": 0.3355780171516866, "grad_norm": 0.28119659423828125, "learning_rate": 2.3234033464803687e-05, "loss": 0.0854, "step": 18949 }, { "epoch": 0.335595726688715, "grad_norm": 0.9498661756515503, "learning_rate": 2.3233314302888257e-05, "loss": 0.0851, "step": 18950 }, { "epoch": 0.33561343622574347, "grad_norm": 0.5326437950134277, "learning_rate": 2.3232595113886044e-05, "loss": 0.1202, "step": 18951 }, { "epoch": 0.3356311457627719, "grad_norm": 0.7995855808258057, "learning_rate": 2.323187589779944e-05, "loss": 0.055, "step": 18952 }, { "epoch": 0.3356488552998003, "grad_norm": 1.078271508216858, "learning_rate": 2.323115665463078e-05, "loss": 0.0942, "step": 18953 }, { "epoch": 0.3356665648368288, "grad_norm": 0.420436292886734, "learning_rate": 2.323043738438246e-05, "loss": 0.0667, "step": 18954 }, { "epoch": 0.33568427437385717, "grad_norm": 0.4859163165092468, "learning_rate": 2.322971808705683e-05, "loss": 0.109, "step": 18955 }, { "epoch": 0.3357019839108856, "grad_norm": 0.6286353468894958, "learning_rate": 2.322899876265626e-05, "loss": 0.0738, "step": 18956 }, { "epoch": 0.335719693447914, "grad_norm": 1.0174108743667603, "learning_rate": 2.3228279411183113e-05, "loss": 0.1275, "step": 18957 }, { "epoch": 0.3357374029849425, "grad_norm": 0.8635909557342529, "learning_rate": 2.3227560032639762e-05, "loss": 0.0921, "step": 18958 }, { "epoch": 0.3357551125219709, "grad_norm": 0.7409020066261292, "learning_rate": 2.3226840627028572e-05, "loss": 0.0739, "step": 18959 }, { "epoch": 0.33577282205899933, "grad_norm": 0.7900704741477966, "learning_rate": 2.3226121194351905e-05, "loss": 0.0823, "step": 18960 }, { "epoch": 0.33579053159602773, "grad_norm": 0.6749408841133118, "learning_rate": 2.3225401734612138e-05, "loss": 0.0378, "step": 18961 }, { "epoch": 0.3358082411330562, "grad_norm": 1.0904793739318848, "learning_rate": 2.3224682247811624e-05, "loss": 0.0585, "step": 18962 }, { "epoch": 0.3358259506700846, "grad_norm": 0.7559425830841064, "learning_rate": 2.322396273395274e-05, "loss": 0.0855, "step": 18963 }, { "epoch": 0.33584366020711304, "grad_norm": 0.8008408546447754, "learning_rate": 2.322324319303785e-05, "loss": 0.0745, "step": 18964 }, { "epoch": 0.3358613697441415, "grad_norm": 0.7446514964103699, "learning_rate": 2.3222523625069318e-05, "loss": 0.09, "step": 18965 }, { "epoch": 0.3358790792811699, "grad_norm": 1.2186270952224731, "learning_rate": 2.322180403004952e-05, "loss": 0.1126, "step": 18966 }, { "epoch": 0.33589678881819834, "grad_norm": 0.9144564270973206, "learning_rate": 2.322108440798082e-05, "loss": 0.0907, "step": 18967 }, { "epoch": 0.33591449835522674, "grad_norm": 0.536359429359436, "learning_rate": 2.322036475886558e-05, "loss": 0.0839, "step": 18968 }, { "epoch": 0.3359322078922552, "grad_norm": 0.6448113322257996, "learning_rate": 2.321964508270617e-05, "loss": 0.0532, "step": 18969 }, { "epoch": 0.3359499174292836, "grad_norm": 0.5934745669364929, "learning_rate": 2.321892537950496e-05, "loss": 0.084, "step": 18970 }, { "epoch": 0.33596762696631205, "grad_norm": 0.5141407251358032, "learning_rate": 2.321820564926432e-05, "loss": 0.0908, "step": 18971 }, { "epoch": 0.33598533650334045, "grad_norm": 0.6790372133255005, "learning_rate": 2.3217485891986614e-05, "loss": 0.0564, "step": 18972 }, { "epoch": 0.3360030460403689, "grad_norm": 0.9359658360481262, "learning_rate": 2.3216766107674206e-05, "loss": 0.1025, "step": 18973 }, { "epoch": 0.3360207555773973, "grad_norm": 1.0200470685958862, "learning_rate": 2.321604629632947e-05, "loss": 0.095, "step": 18974 }, { "epoch": 0.33603846511442576, "grad_norm": 0.7834885120391846, "learning_rate": 2.321532645795478e-05, "loss": 0.1263, "step": 18975 }, { "epoch": 0.33605617465145415, "grad_norm": 0.9348554015159607, "learning_rate": 2.321460659255249e-05, "loss": 0.0951, "step": 18976 }, { "epoch": 0.3360738841884826, "grad_norm": 0.8502254486083984, "learning_rate": 2.3213886700124976e-05, "loss": 0.0613, "step": 18977 }, { "epoch": 0.336091593725511, "grad_norm": 0.9150997400283813, "learning_rate": 2.3213166780674608e-05, "loss": 0.1009, "step": 18978 }, { "epoch": 0.33610930326253946, "grad_norm": 0.712360143661499, "learning_rate": 2.3212446834203746e-05, "loss": 0.0796, "step": 18979 }, { "epoch": 0.3361270127995679, "grad_norm": 0.7832396030426025, "learning_rate": 2.3211726860714773e-05, "loss": 0.1537, "step": 18980 }, { "epoch": 0.3361447223365963, "grad_norm": 1.098426342010498, "learning_rate": 2.3211006860210044e-05, "loss": 0.0776, "step": 18981 }, { "epoch": 0.33616243187362477, "grad_norm": 1.1516083478927612, "learning_rate": 2.3210286832691933e-05, "loss": 0.1208, "step": 18982 }, { "epoch": 0.33618014141065317, "grad_norm": 0.6857410669326782, "learning_rate": 2.3209566778162808e-05, "loss": 0.0735, "step": 18983 }, { "epoch": 0.3361978509476816, "grad_norm": 0.7824838161468506, "learning_rate": 2.3208846696625042e-05, "loss": 0.107, "step": 18984 }, { "epoch": 0.33621556048471, "grad_norm": 0.7118711471557617, "learning_rate": 2.3208126588080996e-05, "loss": 0.1199, "step": 18985 }, { "epoch": 0.3362332700217385, "grad_norm": 0.7426016926765442, "learning_rate": 2.3207406452533048e-05, "loss": 0.0961, "step": 18986 }, { "epoch": 0.33625097955876687, "grad_norm": 0.6850648522377014, "learning_rate": 2.3206686289983567e-05, "loss": 0.0857, "step": 18987 }, { "epoch": 0.3362686890957953, "grad_norm": 1.179351806640625, "learning_rate": 2.3205966100434907e-05, "loss": 0.0989, "step": 18988 }, { "epoch": 0.3362863986328237, "grad_norm": 0.9115477204322815, "learning_rate": 2.3205245883889454e-05, "loss": 0.0528, "step": 18989 }, { "epoch": 0.3363041081698522, "grad_norm": 0.8538554906845093, "learning_rate": 2.3204525640349572e-05, "loss": 0.0909, "step": 18990 }, { "epoch": 0.3363218177068806, "grad_norm": 0.8127942085266113, "learning_rate": 2.320380536981763e-05, "loss": 0.0766, "step": 18991 }, { "epoch": 0.33633952724390903, "grad_norm": 0.7766463160514832, "learning_rate": 2.3203085072295997e-05, "loss": 0.118, "step": 18992 }, { "epoch": 0.33635723678093743, "grad_norm": 0.4364044964313507, "learning_rate": 2.320236474778705e-05, "loss": 0.0682, "step": 18993 }, { "epoch": 0.3363749463179659, "grad_norm": 0.5383670330047607, "learning_rate": 2.3201644396293145e-05, "loss": 0.0818, "step": 18994 }, { "epoch": 0.33639265585499434, "grad_norm": 0.8793129324913025, "learning_rate": 2.3200924017816666e-05, "loss": 0.0993, "step": 18995 }, { "epoch": 0.33641036539202274, "grad_norm": 0.9984785914421082, "learning_rate": 2.320020361235997e-05, "loss": 0.1131, "step": 18996 }, { "epoch": 0.3364280749290512, "grad_norm": 0.5419847965240479, "learning_rate": 2.3199483179925438e-05, "loss": 0.0945, "step": 18997 }, { "epoch": 0.3364457844660796, "grad_norm": 0.3643720746040344, "learning_rate": 2.3198762720515432e-05, "loss": 0.0779, "step": 18998 }, { "epoch": 0.33646349400310804, "grad_norm": 0.7386918067932129, "learning_rate": 2.319804223413233e-05, "loss": 0.0849, "step": 18999 }, { "epoch": 0.33648120354013644, "grad_norm": 0.6117910742759705, "learning_rate": 2.3197321720778495e-05, "loss": 0.0605, "step": 19000 }, { "epoch": 0.3364989130771649, "grad_norm": 0.5173048377037048, "learning_rate": 2.31966011804563e-05, "loss": 0.0628, "step": 19001 }, { "epoch": 0.3365166226141933, "grad_norm": 0.7137377858161926, "learning_rate": 2.3195880613168124e-05, "loss": 0.0754, "step": 19002 }, { "epoch": 0.33653433215122175, "grad_norm": 0.6711768507957458, "learning_rate": 2.319516001891632e-05, "loss": 0.0697, "step": 19003 }, { "epoch": 0.33655204168825015, "grad_norm": 1.133744716644287, "learning_rate": 2.3194439397703273e-05, "loss": 0.0823, "step": 19004 }, { "epoch": 0.3365697512252786, "grad_norm": 0.6342160701751709, "learning_rate": 2.319371874953135e-05, "loss": 0.0667, "step": 19005 }, { "epoch": 0.336587460762307, "grad_norm": 0.8250131011009216, "learning_rate": 2.3192998074402914e-05, "loss": 0.1027, "step": 19006 }, { "epoch": 0.33660517029933545, "grad_norm": 0.6797378063201904, "learning_rate": 2.3192277372320354e-05, "loss": 0.0938, "step": 19007 }, { "epoch": 0.33662287983636385, "grad_norm": 0.49238234758377075, "learning_rate": 2.3191556643286022e-05, "loss": 0.1144, "step": 19008 }, { "epoch": 0.3366405893733923, "grad_norm": 0.9142003655433655, "learning_rate": 2.31908358873023e-05, "loss": 0.1071, "step": 19009 }, { "epoch": 0.33665829891042076, "grad_norm": 0.45646578073501587, "learning_rate": 2.319011510437156e-05, "loss": 0.0505, "step": 19010 }, { "epoch": 0.33667600844744916, "grad_norm": 0.9753214716911316, "learning_rate": 2.3189394294496163e-05, "loss": 0.1265, "step": 19011 }, { "epoch": 0.3366937179844776, "grad_norm": 0.5630425810813904, "learning_rate": 2.3188673457678488e-05, "loss": 0.0688, "step": 19012 }, { "epoch": 0.336711427521506, "grad_norm": 0.618368923664093, "learning_rate": 2.318795259392091e-05, "loss": 0.0823, "step": 19013 }, { "epoch": 0.33672913705853447, "grad_norm": 0.3298785090446472, "learning_rate": 2.3187231703225793e-05, "loss": 0.0666, "step": 19014 }, { "epoch": 0.33674684659556287, "grad_norm": 0.6129916310310364, "learning_rate": 2.3186510785595513e-05, "loss": 0.0717, "step": 19015 }, { "epoch": 0.3367645561325913, "grad_norm": 2.3246219158172607, "learning_rate": 2.318578984103244e-05, "loss": 0.1106, "step": 19016 }, { "epoch": 0.3367822656696197, "grad_norm": 0.6532950401306152, "learning_rate": 2.3185068869538943e-05, "loss": 0.073, "step": 19017 }, { "epoch": 0.33679997520664817, "grad_norm": 0.6821510195732117, "learning_rate": 2.3184347871117404e-05, "loss": 0.1212, "step": 19018 }, { "epoch": 0.33681768474367657, "grad_norm": 0.6793180704116821, "learning_rate": 2.3183626845770184e-05, "loss": 0.088, "step": 19019 }, { "epoch": 0.336835394280705, "grad_norm": 0.9356226921081543, "learning_rate": 2.3182905793499657e-05, "loss": 0.1159, "step": 19020 }, { "epoch": 0.3368531038177334, "grad_norm": 0.5895965695381165, "learning_rate": 2.3182184714308203e-05, "loss": 0.075, "step": 19021 }, { "epoch": 0.3368708133547619, "grad_norm": 0.9689865112304688, "learning_rate": 2.3181463608198188e-05, "loss": 0.1159, "step": 19022 }, { "epoch": 0.3368885228917903, "grad_norm": 0.7798404693603516, "learning_rate": 2.3180742475171983e-05, "loss": 0.0781, "step": 19023 }, { "epoch": 0.33690623242881873, "grad_norm": 0.4539802372455597, "learning_rate": 2.3180021315231964e-05, "loss": 0.0697, "step": 19024 }, { "epoch": 0.3369239419658472, "grad_norm": 0.796633243560791, "learning_rate": 2.31793001283805e-05, "loss": 0.0777, "step": 19025 }, { "epoch": 0.3369416515028756, "grad_norm": 0.5695853233337402, "learning_rate": 2.3178578914619968e-05, "loss": 0.0895, "step": 19026 }, { "epoch": 0.33695936103990404, "grad_norm": 1.1119885444641113, "learning_rate": 2.3177857673952738e-05, "loss": 0.1071, "step": 19027 }, { "epoch": 0.33697707057693244, "grad_norm": 0.943617045879364, "learning_rate": 2.3177136406381187e-05, "loss": 0.0938, "step": 19028 }, { "epoch": 0.3369947801139609, "grad_norm": 0.38738206028938293, "learning_rate": 2.317641511190768e-05, "loss": 0.1033, "step": 19029 }, { "epoch": 0.3370124896509893, "grad_norm": 0.5476847290992737, "learning_rate": 2.3175693790534596e-05, "loss": 0.0799, "step": 19030 }, { "epoch": 0.33703019918801774, "grad_norm": 0.7636203765869141, "learning_rate": 2.3174972442264305e-05, "loss": 0.1037, "step": 19031 }, { "epoch": 0.33704790872504614, "grad_norm": 0.7699692249298096, "learning_rate": 2.3174251067099184e-05, "loss": 0.0718, "step": 19032 }, { "epoch": 0.3370656182620746, "grad_norm": 0.8523805141448975, "learning_rate": 2.31735296650416e-05, "loss": 0.0864, "step": 19033 }, { "epoch": 0.337083327799103, "grad_norm": 1.030083179473877, "learning_rate": 2.3172808236093932e-05, "loss": 0.0723, "step": 19034 }, { "epoch": 0.33710103733613145, "grad_norm": 0.685072124004364, "learning_rate": 2.317208678025855e-05, "loss": 0.1264, "step": 19035 }, { "epoch": 0.33711874687315985, "grad_norm": 0.640733003616333, "learning_rate": 2.3171365297537833e-05, "loss": 0.0924, "step": 19036 }, { "epoch": 0.3371364564101883, "grad_norm": 0.9947891235351562, "learning_rate": 2.317064378793415e-05, "loss": 0.1176, "step": 19037 }, { "epoch": 0.3371541659472167, "grad_norm": 0.6813101768493652, "learning_rate": 2.316992225144987e-05, "loss": 0.044, "step": 19038 }, { "epoch": 0.33717187548424515, "grad_norm": 0.8987317085266113, "learning_rate": 2.3169200688087376e-05, "loss": 0.0887, "step": 19039 }, { "epoch": 0.3371895850212736, "grad_norm": 1.0589674711227417, "learning_rate": 2.3168479097849035e-05, "loss": 0.0884, "step": 19040 }, { "epoch": 0.337207294558302, "grad_norm": 0.9414179921150208, "learning_rate": 2.316775748073723e-05, "loss": 0.1323, "step": 19041 }, { "epoch": 0.33722500409533046, "grad_norm": 1.088903546333313, "learning_rate": 2.3167035836754327e-05, "loss": 0.0615, "step": 19042 }, { "epoch": 0.33724271363235886, "grad_norm": 0.6173678040504456, "learning_rate": 2.31663141659027e-05, "loss": 0.0969, "step": 19043 }, { "epoch": 0.3372604231693873, "grad_norm": 0.7573906183242798, "learning_rate": 2.3165592468184728e-05, "loss": 0.0699, "step": 19044 }, { "epoch": 0.3372781327064157, "grad_norm": 0.8701938986778259, "learning_rate": 2.3164870743602778e-05, "loss": 0.1241, "step": 19045 }, { "epoch": 0.33729584224344417, "grad_norm": 0.5781208276748657, "learning_rate": 2.316414899215923e-05, "loss": 0.0763, "step": 19046 }, { "epoch": 0.33731355178047256, "grad_norm": 0.6785714626312256, "learning_rate": 2.3163427213856462e-05, "loss": 0.1072, "step": 19047 }, { "epoch": 0.337331261317501, "grad_norm": 0.5790959000587463, "learning_rate": 2.3162705408696844e-05, "loss": 0.0865, "step": 19048 }, { "epoch": 0.3373489708545294, "grad_norm": 1.0219663381576538, "learning_rate": 2.3161983576682744e-05, "loss": 0.0867, "step": 19049 }, { "epoch": 0.33736668039155787, "grad_norm": 0.567954421043396, "learning_rate": 2.3161261717816555e-05, "loss": 0.1302, "step": 19050 }, { "epoch": 0.33738438992858627, "grad_norm": 0.7599822878837585, "learning_rate": 2.316053983210063e-05, "loss": 0.101, "step": 19051 }, { "epoch": 0.3374020994656147, "grad_norm": 0.5350169539451599, "learning_rate": 2.3159817919537356e-05, "loss": 0.0978, "step": 19052 }, { "epoch": 0.3374198090026431, "grad_norm": 0.5545888543128967, "learning_rate": 2.3159095980129107e-05, "loss": 0.0884, "step": 19053 }, { "epoch": 0.3374375185396716, "grad_norm": 1.2427762746810913, "learning_rate": 2.315837401387826e-05, "loss": 0.1198, "step": 19054 }, { "epoch": 0.33745522807670003, "grad_norm": 0.9340857863426208, "learning_rate": 2.3157652020787187e-05, "loss": 0.0965, "step": 19055 }, { "epoch": 0.33747293761372843, "grad_norm": 1.4188019037246704, "learning_rate": 2.3156930000858262e-05, "loss": 0.0844, "step": 19056 }, { "epoch": 0.3374906471507569, "grad_norm": 0.9993862509727478, "learning_rate": 2.3156207954093866e-05, "loss": 0.0709, "step": 19057 }, { "epoch": 0.3375083566877853, "grad_norm": 1.154300570487976, "learning_rate": 2.3155485880496367e-05, "loss": 0.1116, "step": 19058 }, { "epoch": 0.33752606622481374, "grad_norm": 1.049134373664856, "learning_rate": 2.3154763780068148e-05, "loss": 0.1093, "step": 19059 }, { "epoch": 0.33754377576184214, "grad_norm": 0.5989999175071716, "learning_rate": 2.3154041652811577e-05, "loss": 0.0554, "step": 19060 }, { "epoch": 0.3375614852988706, "grad_norm": 0.7694869637489319, "learning_rate": 2.3153319498729035e-05, "loss": 0.1138, "step": 19061 }, { "epoch": 0.337579194835899, "grad_norm": 0.5325203537940979, "learning_rate": 2.31525973178229e-05, "loss": 0.103, "step": 19062 }, { "epoch": 0.33759690437292744, "grad_norm": 0.6308879852294922, "learning_rate": 2.3151875110095536e-05, "loss": 0.1026, "step": 19063 }, { "epoch": 0.33761461390995584, "grad_norm": 0.6317876577377319, "learning_rate": 2.3151152875549336e-05, "loss": 0.0963, "step": 19064 }, { "epoch": 0.3376323234469843, "grad_norm": 0.6639106869697571, "learning_rate": 2.3150430614186663e-05, "loss": 0.0818, "step": 19065 }, { "epoch": 0.3376500329840127, "grad_norm": 0.6636464595794678, "learning_rate": 2.3149708326009898e-05, "loss": 0.0928, "step": 19066 }, { "epoch": 0.33766774252104115, "grad_norm": 0.6387166380882263, "learning_rate": 2.314898601102142e-05, "loss": 0.0692, "step": 19067 }, { "epoch": 0.33768545205806955, "grad_norm": 0.6858764886856079, "learning_rate": 2.3148263669223597e-05, "loss": 0.0988, "step": 19068 }, { "epoch": 0.337703161595098, "grad_norm": 0.8113540410995483, "learning_rate": 2.3147541300618817e-05, "loss": 0.0726, "step": 19069 }, { "epoch": 0.33772087113212645, "grad_norm": 0.8644052743911743, "learning_rate": 2.3146818905209446e-05, "loss": 0.0713, "step": 19070 }, { "epoch": 0.33773858066915485, "grad_norm": 0.7809106707572937, "learning_rate": 2.3146096482997866e-05, "loss": 0.086, "step": 19071 }, { "epoch": 0.3377562902061833, "grad_norm": 0.48021399974823, "learning_rate": 2.314537403398645e-05, "loss": 0.0598, "step": 19072 }, { "epoch": 0.3377739997432117, "grad_norm": 0.5333887934684753, "learning_rate": 2.314465155817758e-05, "loss": 0.0905, "step": 19073 }, { "epoch": 0.33779170928024016, "grad_norm": 0.45220711827278137, "learning_rate": 2.3143929055573628e-05, "loss": 0.0691, "step": 19074 }, { "epoch": 0.33780941881726856, "grad_norm": 0.9059886932373047, "learning_rate": 2.3143206526176977e-05, "loss": 0.0853, "step": 19075 }, { "epoch": 0.337827128354297, "grad_norm": 0.4838781952857971, "learning_rate": 2.314248396999e-05, "loss": 0.0722, "step": 19076 }, { "epoch": 0.3378448378913254, "grad_norm": 0.5776859521865845, "learning_rate": 2.3141761387015065e-05, "loss": 0.0713, "step": 19077 }, { "epoch": 0.33786254742835387, "grad_norm": 0.5289576649665833, "learning_rate": 2.314103877725457e-05, "loss": 0.0589, "step": 19078 }, { "epoch": 0.33788025696538226, "grad_norm": 0.8581174612045288, "learning_rate": 2.3140316140710878e-05, "loss": 0.0882, "step": 19079 }, { "epoch": 0.3378979665024107, "grad_norm": 0.46506473422050476, "learning_rate": 2.3139593477386365e-05, "loss": 0.0436, "step": 19080 }, { "epoch": 0.3379156760394391, "grad_norm": 0.4729664921760559, "learning_rate": 2.3138870787283417e-05, "loss": 0.0798, "step": 19081 }, { "epoch": 0.33793338557646757, "grad_norm": 1.3206441402435303, "learning_rate": 2.3138148070404405e-05, "loss": 0.129, "step": 19082 }, { "epoch": 0.33795109511349597, "grad_norm": 0.6194357872009277, "learning_rate": 2.3137425326751712e-05, "loss": 0.0667, "step": 19083 }, { "epoch": 0.3379688046505244, "grad_norm": 0.5987380146980286, "learning_rate": 2.3136702556327706e-05, "loss": 0.1092, "step": 19084 }, { "epoch": 0.3379865141875529, "grad_norm": 0.9113197922706604, "learning_rate": 2.313597975913478e-05, "loss": 0.1049, "step": 19085 }, { "epoch": 0.3380042237245813, "grad_norm": 0.737296462059021, "learning_rate": 2.31352569351753e-05, "loss": 0.0712, "step": 19086 }, { "epoch": 0.33802193326160973, "grad_norm": 1.0250146389007568, "learning_rate": 2.3134534084451644e-05, "loss": 0.1289, "step": 19087 }, { "epoch": 0.33803964279863813, "grad_norm": 0.9887124300003052, "learning_rate": 2.3133811206966197e-05, "loss": 0.0849, "step": 19088 }, { "epoch": 0.3380573523356666, "grad_norm": 0.8653874397277832, "learning_rate": 2.3133088302721333e-05, "loss": 0.1233, "step": 19089 }, { "epoch": 0.338075061872695, "grad_norm": 0.7986204028129578, "learning_rate": 2.3132365371719433e-05, "loss": 0.0882, "step": 19090 }, { "epoch": 0.33809277140972344, "grad_norm": 0.9570438861846924, "learning_rate": 2.3131642413962873e-05, "loss": 0.0976, "step": 19091 }, { "epoch": 0.33811048094675183, "grad_norm": 0.7717917561531067, "learning_rate": 2.313091942945403e-05, "loss": 0.0869, "step": 19092 }, { "epoch": 0.3381281904837803, "grad_norm": 0.539852499961853, "learning_rate": 2.3130196418195287e-05, "loss": 0.0816, "step": 19093 }, { "epoch": 0.3381459000208087, "grad_norm": 0.6653847098350525, "learning_rate": 2.312947338018902e-05, "loss": 0.1093, "step": 19094 }, { "epoch": 0.33816360955783714, "grad_norm": 1.4372193813323975, "learning_rate": 2.31287503154376e-05, "loss": 0.1052, "step": 19095 }, { "epoch": 0.33818131909486554, "grad_norm": 0.6163147687911987, "learning_rate": 2.3128027223943426e-05, "loss": 0.0595, "step": 19096 }, { "epoch": 0.338199028631894, "grad_norm": 0.6674394011497498, "learning_rate": 2.3127304105708854e-05, "loss": 0.0921, "step": 19097 }, { "epoch": 0.3382167381689224, "grad_norm": 0.6027278900146484, "learning_rate": 2.3126580960736284e-05, "loss": 0.0965, "step": 19098 }, { "epoch": 0.33823444770595085, "grad_norm": 0.7904506325721741, "learning_rate": 2.3125857789028077e-05, "loss": 0.0998, "step": 19099 }, { "epoch": 0.3382521572429793, "grad_norm": 0.5500230193138123, "learning_rate": 2.312513459058662e-05, "loss": 0.0589, "step": 19100 }, { "epoch": 0.3382698667800077, "grad_norm": 1.035394549369812, "learning_rate": 2.3124411365414294e-05, "loss": 0.0897, "step": 19101 }, { "epoch": 0.33828757631703615, "grad_norm": 0.55576092004776, "learning_rate": 2.3123688113513475e-05, "loss": 0.1009, "step": 19102 }, { "epoch": 0.33830528585406455, "grad_norm": 1.112777590751648, "learning_rate": 2.3122964834886542e-05, "loss": 0.0953, "step": 19103 }, { "epoch": 0.338322995391093, "grad_norm": 0.9666395783424377, "learning_rate": 2.312224152953588e-05, "loss": 0.1117, "step": 19104 }, { "epoch": 0.3383407049281214, "grad_norm": 1.1435012817382812, "learning_rate": 2.3121518197463866e-05, "loss": 0.0893, "step": 19105 }, { "epoch": 0.33835841446514986, "grad_norm": 1.2755227088928223, "learning_rate": 2.312079483867288e-05, "loss": 0.0968, "step": 19106 }, { "epoch": 0.33837612400217826, "grad_norm": 0.6775720715522766, "learning_rate": 2.312007145316529e-05, "loss": 0.0953, "step": 19107 }, { "epoch": 0.3383938335392067, "grad_norm": 0.9137828350067139, "learning_rate": 2.3119348040943494e-05, "loss": 0.1315, "step": 19108 }, { "epoch": 0.3384115430762351, "grad_norm": 0.6975823640823364, "learning_rate": 2.311862460200986e-05, "loss": 0.0675, "step": 19109 }, { "epoch": 0.33842925261326356, "grad_norm": 1.155412197113037, "learning_rate": 2.311790113636678e-05, "loss": 0.096, "step": 19110 }, { "epoch": 0.33844696215029196, "grad_norm": 0.32342711091041565, "learning_rate": 2.311717764401662e-05, "loss": 0.049, "step": 19111 }, { "epoch": 0.3384646716873204, "grad_norm": 0.7359052896499634, "learning_rate": 2.3116454124961767e-05, "loss": 0.0516, "step": 19112 }, { "epoch": 0.3384823812243488, "grad_norm": 0.5024667978286743, "learning_rate": 2.3115730579204607e-05, "loss": 0.0684, "step": 19113 }, { "epoch": 0.33850009076137727, "grad_norm": 0.7929632067680359, "learning_rate": 2.311500700674751e-05, "loss": 0.0858, "step": 19114 }, { "epoch": 0.3385178002984057, "grad_norm": 0.9035959243774414, "learning_rate": 2.3114283407592857e-05, "loss": 0.0588, "step": 19115 }, { "epoch": 0.3385355098354341, "grad_norm": 0.6982946991920471, "learning_rate": 2.311355978174303e-05, "loss": 0.081, "step": 19116 }, { "epoch": 0.3385532193724626, "grad_norm": 0.751790463924408, "learning_rate": 2.3112836129200424e-05, "loss": 0.0847, "step": 19117 }, { "epoch": 0.338570928909491, "grad_norm": 0.8330448269844055, "learning_rate": 2.3112112449967396e-05, "loss": 0.0853, "step": 19118 }, { "epoch": 0.33858863844651943, "grad_norm": 0.6055347323417664, "learning_rate": 2.3111388744046346e-05, "loss": 0.0869, "step": 19119 }, { "epoch": 0.33860634798354783, "grad_norm": 0.795702338218689, "learning_rate": 2.3110665011439644e-05, "loss": 0.0856, "step": 19120 }, { "epoch": 0.3386240575205763, "grad_norm": 1.0849189758300781, "learning_rate": 2.3109941252149676e-05, "loss": 0.1162, "step": 19121 }, { "epoch": 0.3386417670576047, "grad_norm": 0.8438953757286072, "learning_rate": 2.310921746617882e-05, "loss": 0.0832, "step": 19122 }, { "epoch": 0.33865947659463314, "grad_norm": 1.074741244316101, "learning_rate": 2.310849365352946e-05, "loss": 0.0885, "step": 19123 }, { "epoch": 0.33867718613166153, "grad_norm": 0.7463719248771667, "learning_rate": 2.3107769814203977e-05, "loss": 0.0774, "step": 19124 }, { "epoch": 0.33869489566869, "grad_norm": 0.8171775937080383, "learning_rate": 2.310704594820475e-05, "loss": 0.0955, "step": 19125 }, { "epoch": 0.3387126052057184, "grad_norm": 0.9293091297149658, "learning_rate": 2.3106322055534163e-05, "loss": 0.0927, "step": 19126 }, { "epoch": 0.33873031474274684, "grad_norm": 0.5934111475944519, "learning_rate": 2.3105598136194596e-05, "loss": 0.0682, "step": 19127 }, { "epoch": 0.33874802427977524, "grad_norm": 0.9470354914665222, "learning_rate": 2.3104874190188426e-05, "loss": 0.0762, "step": 19128 }, { "epoch": 0.3387657338168037, "grad_norm": 0.7815018892288208, "learning_rate": 2.3104150217518045e-05, "loss": 0.0622, "step": 19129 }, { "epoch": 0.33878344335383215, "grad_norm": 1.0990568399429321, "learning_rate": 2.310342621818583e-05, "loss": 0.0997, "step": 19130 }, { "epoch": 0.33880115289086055, "grad_norm": 0.6591281890869141, "learning_rate": 2.3102702192194158e-05, "loss": 0.0998, "step": 19131 }, { "epoch": 0.338818862427889, "grad_norm": 0.8014640808105469, "learning_rate": 2.310197813954542e-05, "loss": 0.0846, "step": 19132 }, { "epoch": 0.3388365719649174, "grad_norm": 0.7756562232971191, "learning_rate": 2.3101254060241992e-05, "loss": 0.0698, "step": 19133 }, { "epoch": 0.33885428150194585, "grad_norm": 0.5487890839576721, "learning_rate": 2.3100529954286254e-05, "loss": 0.087, "step": 19134 }, { "epoch": 0.33887199103897425, "grad_norm": 0.8290695548057556, "learning_rate": 2.3099805821680596e-05, "loss": 0.1023, "step": 19135 }, { "epoch": 0.3388897005760027, "grad_norm": 0.49947887659072876, "learning_rate": 2.3099081662427394e-05, "loss": 0.0671, "step": 19136 }, { "epoch": 0.3389074101130311, "grad_norm": 0.6426383852958679, "learning_rate": 2.309835747652903e-05, "loss": 0.1036, "step": 19137 }, { "epoch": 0.33892511965005956, "grad_norm": 0.730614960193634, "learning_rate": 2.3097633263987895e-05, "loss": 0.0814, "step": 19138 }, { "epoch": 0.33894282918708796, "grad_norm": 0.9731719493865967, "learning_rate": 2.3096909024806363e-05, "loss": 0.0842, "step": 19139 }, { "epoch": 0.3389605387241164, "grad_norm": 1.1641874313354492, "learning_rate": 2.3096184758986817e-05, "loss": 0.0839, "step": 19140 }, { "epoch": 0.3389782482611448, "grad_norm": 0.7254600524902344, "learning_rate": 2.3095460466531646e-05, "loss": 0.0859, "step": 19141 }, { "epoch": 0.33899595779817326, "grad_norm": 0.4954933226108551, "learning_rate": 2.3094736147443228e-05, "loss": 0.0745, "step": 19142 }, { "epoch": 0.33901366733520166, "grad_norm": 0.3127875328063965, "learning_rate": 2.3094011801723945e-05, "loss": 0.08, "step": 19143 }, { "epoch": 0.3390313768722301, "grad_norm": 1.0724081993103027, "learning_rate": 2.3093287429376186e-05, "loss": 0.1192, "step": 19144 }, { "epoch": 0.33904908640925857, "grad_norm": 0.8469009399414062, "learning_rate": 2.3092563030402322e-05, "loss": 0.1112, "step": 19145 }, { "epoch": 0.33906679594628697, "grad_norm": 0.7883480787277222, "learning_rate": 2.309183860480475e-05, "loss": 0.1124, "step": 19146 }, { "epoch": 0.3390845054833154, "grad_norm": 0.721109926700592, "learning_rate": 2.3091114152585845e-05, "loss": 0.0875, "step": 19147 }, { "epoch": 0.3391022150203438, "grad_norm": 1.1030184030532837, "learning_rate": 2.3090389673748e-05, "loss": 0.1343, "step": 19148 }, { "epoch": 0.3391199245573723, "grad_norm": 1.158357858657837, "learning_rate": 2.3089665168293583e-05, "loss": 0.0963, "step": 19149 }, { "epoch": 0.3391376340944007, "grad_norm": 0.6957146525382996, "learning_rate": 2.3088940636224984e-05, "loss": 0.0898, "step": 19150 }, { "epoch": 0.33915534363142913, "grad_norm": 0.8353849649429321, "learning_rate": 2.3088216077544593e-05, "loss": 0.0771, "step": 19151 }, { "epoch": 0.33917305316845753, "grad_norm": 1.110024333000183, "learning_rate": 2.308749149225479e-05, "loss": 0.139, "step": 19152 }, { "epoch": 0.339190762705486, "grad_norm": 0.5763425827026367, "learning_rate": 2.308676688035796e-05, "loss": 0.0738, "step": 19153 }, { "epoch": 0.3392084722425144, "grad_norm": 0.608454704284668, "learning_rate": 2.308604224185648e-05, "loss": 0.0847, "step": 19154 }, { "epoch": 0.33922618177954283, "grad_norm": 0.7913001179695129, "learning_rate": 2.3085317576752737e-05, "loss": 0.069, "step": 19155 }, { "epoch": 0.33924389131657123, "grad_norm": 0.68561190366745, "learning_rate": 2.3084592885049125e-05, "loss": 0.0978, "step": 19156 }, { "epoch": 0.3392616008535997, "grad_norm": 0.9417885541915894, "learning_rate": 2.3083868166748014e-05, "loss": 0.09, "step": 19157 }, { "epoch": 0.3392793103906281, "grad_norm": 0.499815434217453, "learning_rate": 2.3083143421851797e-05, "loss": 0.0818, "step": 19158 }, { "epoch": 0.33929701992765654, "grad_norm": 0.6891815066337585, "learning_rate": 2.3082418650362854e-05, "loss": 0.0612, "step": 19159 }, { "epoch": 0.339314729464685, "grad_norm": 0.64341139793396, "learning_rate": 2.308169385228357e-05, "loss": 0.1209, "step": 19160 }, { "epoch": 0.3393324390017134, "grad_norm": 0.5859222412109375, "learning_rate": 2.3080969027616334e-05, "loss": 0.0897, "step": 19161 }, { "epoch": 0.33935014853874185, "grad_norm": 0.7436326146125793, "learning_rate": 2.3080244176363525e-05, "loss": 0.0672, "step": 19162 }, { "epoch": 0.33936785807577025, "grad_norm": 0.8997447490692139, "learning_rate": 2.307951929852753e-05, "loss": 0.1078, "step": 19163 }, { "epoch": 0.3393855676127987, "grad_norm": 0.6599385738372803, "learning_rate": 2.3078794394110736e-05, "loss": 0.0821, "step": 19164 }, { "epoch": 0.3394032771498271, "grad_norm": 1.0016270875930786, "learning_rate": 2.3078069463115522e-05, "loss": 0.1114, "step": 19165 }, { "epoch": 0.33942098668685555, "grad_norm": 0.5533618927001953, "learning_rate": 2.3077344505544276e-05, "loss": 0.0896, "step": 19166 }, { "epoch": 0.33943869622388395, "grad_norm": 0.45542219281196594, "learning_rate": 2.3076619521399388e-05, "loss": 0.0775, "step": 19167 }, { "epoch": 0.3394564057609124, "grad_norm": 0.6764529347419739, "learning_rate": 2.3075894510683236e-05, "loss": 0.1071, "step": 19168 }, { "epoch": 0.3394741152979408, "grad_norm": 0.5668690204620361, "learning_rate": 2.3075169473398204e-05, "loss": 0.0587, "step": 19169 }, { "epoch": 0.33949182483496926, "grad_norm": 0.48954862356185913, "learning_rate": 2.307444440954669e-05, "loss": 0.0693, "step": 19170 }, { "epoch": 0.33950953437199766, "grad_norm": 0.8865411281585693, "learning_rate": 2.3073719319131065e-05, "loss": 0.1051, "step": 19171 }, { "epoch": 0.3395272439090261, "grad_norm": 0.8976085782051086, "learning_rate": 2.3072994202153725e-05, "loss": 0.0831, "step": 19172 }, { "epoch": 0.3395449534460545, "grad_norm": 0.998832106590271, "learning_rate": 2.3072269058617043e-05, "loss": 0.0943, "step": 19173 }, { "epoch": 0.33956266298308296, "grad_norm": 0.8259385228157043, "learning_rate": 2.3071543888523418e-05, "loss": 0.0882, "step": 19174 }, { "epoch": 0.3395803725201114, "grad_norm": 0.8354207277297974, "learning_rate": 2.3070818691875223e-05, "loss": 0.0692, "step": 19175 }, { "epoch": 0.3395980820571398, "grad_norm": 0.5528804063796997, "learning_rate": 2.307009346867486e-05, "loss": 0.072, "step": 19176 }, { "epoch": 0.33961579159416827, "grad_norm": 1.021331787109375, "learning_rate": 2.3069368218924704e-05, "loss": 0.0467, "step": 19177 }, { "epoch": 0.33963350113119667, "grad_norm": 0.591366171836853, "learning_rate": 2.306864294262714e-05, "loss": 0.0733, "step": 19178 }, { "epoch": 0.3396512106682251, "grad_norm": 0.6275858283042908, "learning_rate": 2.3067917639784557e-05, "loss": 0.0543, "step": 19179 }, { "epoch": 0.3396689202052535, "grad_norm": 1.0198581218719482, "learning_rate": 2.3067192310399336e-05, "loss": 0.0962, "step": 19180 }, { "epoch": 0.339686629742282, "grad_norm": 0.9510269165039062, "learning_rate": 2.306646695447388e-05, "loss": 0.0702, "step": 19181 }, { "epoch": 0.3397043392793104, "grad_norm": 0.7471274137496948, "learning_rate": 2.3065741572010552e-05, "loss": 0.0817, "step": 19182 }, { "epoch": 0.33972204881633883, "grad_norm": 0.49954432249069214, "learning_rate": 2.3065016163011756e-05, "loss": 0.0616, "step": 19183 }, { "epoch": 0.3397397583533672, "grad_norm": 0.5715442299842834, "learning_rate": 2.306429072747987e-05, "loss": 0.0709, "step": 19184 }, { "epoch": 0.3397574678903957, "grad_norm": 1.000788927078247, "learning_rate": 2.3063565265417284e-05, "loss": 0.1211, "step": 19185 }, { "epoch": 0.3397751774274241, "grad_norm": 0.44558465480804443, "learning_rate": 2.306283977682639e-05, "loss": 0.0891, "step": 19186 }, { "epoch": 0.33979288696445253, "grad_norm": 0.5026042461395264, "learning_rate": 2.306211426170956e-05, "loss": 0.0566, "step": 19187 }, { "epoch": 0.33981059650148093, "grad_norm": 0.6192898154258728, "learning_rate": 2.3061388720069193e-05, "loss": 0.0598, "step": 19188 }, { "epoch": 0.3398283060385094, "grad_norm": 0.5859554409980774, "learning_rate": 2.306066315190767e-05, "loss": 0.0764, "step": 19189 }, { "epoch": 0.33984601557553784, "grad_norm": 0.6862713098526001, "learning_rate": 2.3059937557227388e-05, "loss": 0.1025, "step": 19190 }, { "epoch": 0.33986372511256624, "grad_norm": 0.5675140023231506, "learning_rate": 2.3059211936030717e-05, "loss": 0.0712, "step": 19191 }, { "epoch": 0.3398814346495947, "grad_norm": 0.7015743851661682, "learning_rate": 2.3058486288320062e-05, "loss": 0.0806, "step": 19192 }, { "epoch": 0.3398991441866231, "grad_norm": 0.5483357310295105, "learning_rate": 2.3057760614097798e-05, "loss": 0.082, "step": 19193 }, { "epoch": 0.33991685372365155, "grad_norm": 0.6413722038269043, "learning_rate": 2.3057034913366316e-05, "loss": 0.0978, "step": 19194 }, { "epoch": 0.33993456326067994, "grad_norm": 0.757282018661499, "learning_rate": 2.3056309186128005e-05, "loss": 0.0684, "step": 19195 }, { "epoch": 0.3399522727977084, "grad_norm": 0.976315975189209, "learning_rate": 2.305558343238525e-05, "loss": 0.0691, "step": 19196 }, { "epoch": 0.3399699823347368, "grad_norm": 1.1093180179595947, "learning_rate": 2.3054857652140445e-05, "loss": 0.0581, "step": 19197 }, { "epoch": 0.33998769187176525, "grad_norm": 0.583117663860321, "learning_rate": 2.3054131845395966e-05, "loss": 0.0773, "step": 19198 }, { "epoch": 0.34000540140879365, "grad_norm": 0.7732506990432739, "learning_rate": 2.3053406012154213e-05, "loss": 0.0926, "step": 19199 }, { "epoch": 0.3400231109458221, "grad_norm": 0.5245029926300049, "learning_rate": 2.305268015241757e-05, "loss": 0.0669, "step": 19200 }, { "epoch": 0.3400408204828505, "grad_norm": 0.8774025440216064, "learning_rate": 2.305195426618842e-05, "loss": 0.0988, "step": 19201 }, { "epoch": 0.34005853001987896, "grad_norm": 0.794871985912323, "learning_rate": 2.3051228353469153e-05, "loss": 0.0813, "step": 19202 }, { "epoch": 0.34007623955690736, "grad_norm": 0.7624329328536987, "learning_rate": 2.3050502414262165e-05, "loss": 0.0752, "step": 19203 }, { "epoch": 0.3400939490939358, "grad_norm": 0.6737438440322876, "learning_rate": 2.3049776448569836e-05, "loss": 0.1145, "step": 19204 }, { "epoch": 0.34011165863096426, "grad_norm": 0.5413034558296204, "learning_rate": 2.3049050456394555e-05, "loss": 0.0676, "step": 19205 }, { "epoch": 0.34012936816799266, "grad_norm": 0.6183378100395203, "learning_rate": 2.3048324437738716e-05, "loss": 0.0959, "step": 19206 }, { "epoch": 0.3401470777050211, "grad_norm": 0.6143801212310791, "learning_rate": 2.30475983926047e-05, "loss": 0.0685, "step": 19207 }, { "epoch": 0.3401647872420495, "grad_norm": 0.7076938152313232, "learning_rate": 2.3046872320994897e-05, "loss": 0.1001, "step": 19208 }, { "epoch": 0.34018249677907797, "grad_norm": 1.0725560188293457, "learning_rate": 2.3046146222911704e-05, "loss": 0.0834, "step": 19209 }, { "epoch": 0.34020020631610637, "grad_norm": 0.9764721393585205, "learning_rate": 2.3045420098357502e-05, "loss": 0.103, "step": 19210 }, { "epoch": 0.3402179158531348, "grad_norm": 0.721818208694458, "learning_rate": 2.3044693947334683e-05, "loss": 0.063, "step": 19211 }, { "epoch": 0.3402356253901632, "grad_norm": 0.7023571729660034, "learning_rate": 2.3043967769845628e-05, "loss": 0.0646, "step": 19212 }, { "epoch": 0.3402533349271917, "grad_norm": 0.4460199773311615, "learning_rate": 2.3043241565892736e-05, "loss": 0.0405, "step": 19213 }, { "epoch": 0.3402710444642201, "grad_norm": 0.6726110577583313, "learning_rate": 2.3042515335478396e-05, "loss": 0.1049, "step": 19214 }, { "epoch": 0.34028875400124853, "grad_norm": 0.7878656387329102, "learning_rate": 2.304178907860499e-05, "loss": 0.0979, "step": 19215 }, { "epoch": 0.3403064635382769, "grad_norm": 0.6370846033096313, "learning_rate": 2.3041062795274916e-05, "loss": 0.0956, "step": 19216 }, { "epoch": 0.3403241730753054, "grad_norm": 0.5632316470146179, "learning_rate": 2.3040336485490553e-05, "loss": 0.0904, "step": 19217 }, { "epoch": 0.34034188261233383, "grad_norm": 0.919699490070343, "learning_rate": 2.3039610149254303e-05, "loss": 0.0786, "step": 19218 }, { "epoch": 0.34035959214936223, "grad_norm": 0.9346705675125122, "learning_rate": 2.3038883786568545e-05, "loss": 0.1067, "step": 19219 }, { "epoch": 0.3403773016863907, "grad_norm": 0.8987654447555542, "learning_rate": 2.303815739743567e-05, "loss": 0.0772, "step": 19220 }, { "epoch": 0.3403950112234191, "grad_norm": 1.1983733177185059, "learning_rate": 2.303743098185807e-05, "loss": 0.1, "step": 19221 }, { "epoch": 0.34041272076044754, "grad_norm": 1.0056835412979126, "learning_rate": 2.303670453983814e-05, "loss": 0.114, "step": 19222 }, { "epoch": 0.34043043029747594, "grad_norm": 0.5129923820495605, "learning_rate": 2.3035978071378258e-05, "loss": 0.0769, "step": 19223 }, { "epoch": 0.3404481398345044, "grad_norm": 0.8596642017364502, "learning_rate": 2.3035251576480828e-05, "loss": 0.0584, "step": 19224 }, { "epoch": 0.3404658493715328, "grad_norm": 0.8849990367889404, "learning_rate": 2.303452505514823e-05, "loss": 0.0877, "step": 19225 }, { "epoch": 0.34048355890856125, "grad_norm": 1.0783066749572754, "learning_rate": 2.3033798507382857e-05, "loss": 0.1064, "step": 19226 }, { "epoch": 0.34050126844558964, "grad_norm": 1.1356452703475952, "learning_rate": 2.30330719331871e-05, "loss": 0.0693, "step": 19227 }, { "epoch": 0.3405189779826181, "grad_norm": 0.7540509700775146, "learning_rate": 2.3032345332563346e-05, "loss": 0.0637, "step": 19228 }, { "epoch": 0.3405366875196465, "grad_norm": 1.3048189878463745, "learning_rate": 2.3031618705513993e-05, "loss": 0.1039, "step": 19229 }, { "epoch": 0.34055439705667495, "grad_norm": 0.8840200304985046, "learning_rate": 2.3030892052041425e-05, "loss": 0.0638, "step": 19230 }, { "epoch": 0.34057210659370335, "grad_norm": 0.9690351486206055, "learning_rate": 2.303016537214803e-05, "loss": 0.1095, "step": 19231 }, { "epoch": 0.3405898161307318, "grad_norm": 0.6643182039260864, "learning_rate": 2.3029438665836212e-05, "loss": 0.0528, "step": 19232 }, { "epoch": 0.34060752566776026, "grad_norm": 0.8811643719673157, "learning_rate": 2.302871193310835e-05, "loss": 0.0847, "step": 19233 }, { "epoch": 0.34062523520478866, "grad_norm": 0.7353214621543884, "learning_rate": 2.3027985173966835e-05, "loss": 0.1033, "step": 19234 }, { "epoch": 0.3406429447418171, "grad_norm": 1.0538501739501953, "learning_rate": 2.302725838841406e-05, "loss": 0.1011, "step": 19235 }, { "epoch": 0.3406606542788455, "grad_norm": 0.5984762907028198, "learning_rate": 2.3026531576452418e-05, "loss": 0.0879, "step": 19236 }, { "epoch": 0.34067836381587396, "grad_norm": 0.6282381415367126, "learning_rate": 2.30258047380843e-05, "loss": 0.087, "step": 19237 }, { "epoch": 0.34069607335290236, "grad_norm": 0.7180334329605103, "learning_rate": 2.3025077873312095e-05, "loss": 0.0843, "step": 19238 }, { "epoch": 0.3407137828899308, "grad_norm": 0.9590132832527161, "learning_rate": 2.3024350982138195e-05, "loss": 0.0879, "step": 19239 }, { "epoch": 0.3407314924269592, "grad_norm": 0.8909767270088196, "learning_rate": 2.3023624064564995e-05, "loss": 0.1165, "step": 19240 }, { "epoch": 0.34074920196398767, "grad_norm": 1.0946520566940308, "learning_rate": 2.302289712059488e-05, "loss": 0.0894, "step": 19241 }, { "epoch": 0.34076691150101607, "grad_norm": 0.5926083326339722, "learning_rate": 2.3022170150230247e-05, "loss": 0.1004, "step": 19242 }, { "epoch": 0.3407846210380445, "grad_norm": 0.8971878290176392, "learning_rate": 2.3021443153473487e-05, "loss": 0.1336, "step": 19243 }, { "epoch": 0.3408023305750729, "grad_norm": 0.6436383724212646, "learning_rate": 2.3020716130326988e-05, "loss": 0.0656, "step": 19244 }, { "epoch": 0.3408200401121014, "grad_norm": 0.9882275462150574, "learning_rate": 2.3019989080793143e-05, "loss": 0.073, "step": 19245 }, { "epoch": 0.3408377496491298, "grad_norm": 0.7548261284828186, "learning_rate": 2.3019262004874347e-05, "loss": 0.0781, "step": 19246 }, { "epoch": 0.3408554591861582, "grad_norm": 0.6187759637832642, "learning_rate": 2.301853490257299e-05, "loss": 0.058, "step": 19247 }, { "epoch": 0.3408731687231867, "grad_norm": 0.7848917841911316, "learning_rate": 2.3017807773891467e-05, "loss": 0.1071, "step": 19248 }, { "epoch": 0.3408908782602151, "grad_norm": 1.0285191535949707, "learning_rate": 2.301708061883216e-05, "loss": 0.0806, "step": 19249 }, { "epoch": 0.34090858779724353, "grad_norm": 0.7112935781478882, "learning_rate": 2.3016353437397474e-05, "loss": 0.0793, "step": 19250 }, { "epoch": 0.34092629733427193, "grad_norm": 0.9477512240409851, "learning_rate": 2.3015626229589793e-05, "loss": 0.1191, "step": 19251 }, { "epoch": 0.3409440068713004, "grad_norm": 0.7386387586593628, "learning_rate": 2.3014898995411517e-05, "loss": 0.1064, "step": 19252 }, { "epoch": 0.3409617164083288, "grad_norm": 0.8006144165992737, "learning_rate": 2.301417173486503e-05, "loss": 0.0879, "step": 19253 }, { "epoch": 0.34097942594535724, "grad_norm": 0.49199792742729187, "learning_rate": 2.3013444447952733e-05, "loss": 0.0754, "step": 19254 }, { "epoch": 0.34099713548238564, "grad_norm": 1.0985424518585205, "learning_rate": 2.3012717134677006e-05, "loss": 0.0567, "step": 19255 }, { "epoch": 0.3410148450194141, "grad_norm": 0.6097420454025269, "learning_rate": 2.3011989795040258e-05, "loss": 0.0906, "step": 19256 }, { "epoch": 0.3410325545564425, "grad_norm": 0.9487916827201843, "learning_rate": 2.3011262429044872e-05, "loss": 0.0818, "step": 19257 }, { "epoch": 0.34105026409347095, "grad_norm": 1.069211721420288, "learning_rate": 2.3010535036693242e-05, "loss": 0.0931, "step": 19258 }, { "epoch": 0.34106797363049934, "grad_norm": 1.454627275466919, "learning_rate": 2.3009807617987764e-05, "loss": 0.0833, "step": 19259 }, { "epoch": 0.3410856831675278, "grad_norm": 0.795647919178009, "learning_rate": 2.300908017293082e-05, "loss": 0.1008, "step": 19260 }, { "epoch": 0.3411033927045562, "grad_norm": 0.5694802403450012, "learning_rate": 2.3008352701524823e-05, "loss": 0.1002, "step": 19261 }, { "epoch": 0.34112110224158465, "grad_norm": 0.8200595378875732, "learning_rate": 2.300762520377215e-05, "loss": 0.091, "step": 19262 }, { "epoch": 0.3411388117786131, "grad_norm": 0.9546707272529602, "learning_rate": 2.3006897679675205e-05, "loss": 0.0808, "step": 19263 }, { "epoch": 0.3411565213156415, "grad_norm": 0.9295961856842041, "learning_rate": 2.3006170129236372e-05, "loss": 0.1105, "step": 19264 }, { "epoch": 0.34117423085266996, "grad_norm": 0.4191983640193939, "learning_rate": 2.3005442552458053e-05, "loss": 0.0817, "step": 19265 }, { "epoch": 0.34119194038969836, "grad_norm": 1.0271475315093994, "learning_rate": 2.3004714949342633e-05, "loss": 0.1024, "step": 19266 }, { "epoch": 0.3412096499267268, "grad_norm": 0.5454553961753845, "learning_rate": 2.3003987319892516e-05, "loss": 0.0985, "step": 19267 }, { "epoch": 0.3412273594637552, "grad_norm": 0.9816539883613586, "learning_rate": 2.3003259664110085e-05, "loss": 0.0543, "step": 19268 }, { "epoch": 0.34124506900078366, "grad_norm": 0.7472932934761047, "learning_rate": 2.3002531981997737e-05, "loss": 0.0802, "step": 19269 }, { "epoch": 0.34126277853781206, "grad_norm": 0.719488263130188, "learning_rate": 2.3001804273557874e-05, "loss": 0.0872, "step": 19270 }, { "epoch": 0.3412804880748405, "grad_norm": 1.2103066444396973, "learning_rate": 2.3001076538792884e-05, "loss": 0.118, "step": 19271 }, { "epoch": 0.3412981976118689, "grad_norm": 0.3719440698623657, "learning_rate": 2.300034877770516e-05, "loss": 0.0626, "step": 19272 }, { "epoch": 0.34131590714889737, "grad_norm": 0.8630995154380798, "learning_rate": 2.29996209902971e-05, "loss": 0.093, "step": 19273 }, { "epoch": 0.34133361668592577, "grad_norm": 0.7287248969078064, "learning_rate": 2.299889317657109e-05, "loss": 0.078, "step": 19274 }, { "epoch": 0.3413513262229542, "grad_norm": 0.5522382855415344, "learning_rate": 2.2998165336529538e-05, "loss": 0.0995, "step": 19275 }, { "epoch": 0.3413690357599826, "grad_norm": 0.5703903436660767, "learning_rate": 2.2997437470174825e-05, "loss": 0.1164, "step": 19276 }, { "epoch": 0.3413867452970111, "grad_norm": 1.0062166452407837, "learning_rate": 2.2996709577509355e-05, "loss": 0.0831, "step": 19277 }, { "epoch": 0.34140445483403953, "grad_norm": 0.5622611045837402, "learning_rate": 2.299598165853552e-05, "loss": 0.0895, "step": 19278 }, { "epoch": 0.3414221643710679, "grad_norm": 0.7616016864776611, "learning_rate": 2.299525371325571e-05, "loss": 0.1047, "step": 19279 }, { "epoch": 0.3414398739080964, "grad_norm": 0.8174049258232117, "learning_rate": 2.2994525741672328e-05, "loss": 0.1033, "step": 19280 }, { "epoch": 0.3414575834451248, "grad_norm": 0.4442141652107239, "learning_rate": 2.299379774378776e-05, "loss": 0.0789, "step": 19281 }, { "epoch": 0.34147529298215323, "grad_norm": 0.604698896408081, "learning_rate": 2.299306971960441e-05, "loss": 0.0746, "step": 19282 }, { "epoch": 0.34149300251918163, "grad_norm": 0.536361575126648, "learning_rate": 2.299234166912467e-05, "loss": 0.0867, "step": 19283 }, { "epoch": 0.3415107120562101, "grad_norm": 0.849248468875885, "learning_rate": 2.2991613592350927e-05, "loss": 0.0845, "step": 19284 }, { "epoch": 0.3415284215932385, "grad_norm": 0.8065855503082275, "learning_rate": 2.299088548928559e-05, "loss": 0.1069, "step": 19285 }, { "epoch": 0.34154613113026694, "grad_norm": 0.6970089673995972, "learning_rate": 2.2990157359931053e-05, "loss": 0.112, "step": 19286 }, { "epoch": 0.34156384066729534, "grad_norm": 0.725511372089386, "learning_rate": 2.29894292042897e-05, "loss": 0.077, "step": 19287 }, { "epoch": 0.3415815502043238, "grad_norm": 0.6654328107833862, "learning_rate": 2.298870102236393e-05, "loss": 0.1143, "step": 19288 }, { "epoch": 0.3415992597413522, "grad_norm": 0.645110011100769, "learning_rate": 2.2987972814156144e-05, "loss": 0.0956, "step": 19289 }, { "epoch": 0.34161696927838064, "grad_norm": 0.6514961123466492, "learning_rate": 2.298724457966874e-05, "loss": 0.1124, "step": 19290 }, { "epoch": 0.34163467881540904, "grad_norm": 0.6233871579170227, "learning_rate": 2.2986516318904105e-05, "loss": 0.1324, "step": 19291 }, { "epoch": 0.3416523883524375, "grad_norm": 0.8552525639533997, "learning_rate": 2.2985788031864637e-05, "loss": 0.0991, "step": 19292 }, { "epoch": 0.34167009788946595, "grad_norm": 0.47729432582855225, "learning_rate": 2.2985059718552736e-05, "loss": 0.0796, "step": 19293 }, { "epoch": 0.34168780742649435, "grad_norm": 0.4397581219673157, "learning_rate": 2.29843313789708e-05, "loss": 0.0574, "step": 19294 }, { "epoch": 0.3417055169635228, "grad_norm": 0.8723825812339783, "learning_rate": 2.2983603013121217e-05, "loss": 0.0884, "step": 19295 }, { "epoch": 0.3417232265005512, "grad_norm": 0.4891658127307892, "learning_rate": 2.2982874621006395e-05, "loss": 0.0925, "step": 19296 }, { "epoch": 0.34174093603757966, "grad_norm": 1.0967894792556763, "learning_rate": 2.298214620262871e-05, "loss": 0.0843, "step": 19297 }, { "epoch": 0.34175864557460806, "grad_norm": 0.5669428706169128, "learning_rate": 2.298141775799058e-05, "loss": 0.0605, "step": 19298 }, { "epoch": 0.3417763551116365, "grad_norm": 1.1855876445770264, "learning_rate": 2.298068928709439e-05, "loss": 0.1095, "step": 19299 }, { "epoch": 0.3417940646486649, "grad_norm": 0.6551434397697449, "learning_rate": 2.297996078994254e-05, "loss": 0.1001, "step": 19300 }, { "epoch": 0.34181177418569336, "grad_norm": 0.5225227475166321, "learning_rate": 2.297923226653743e-05, "loss": 0.0986, "step": 19301 }, { "epoch": 0.34182948372272176, "grad_norm": 0.8669939041137695, "learning_rate": 2.2978503716881448e-05, "loss": 0.0706, "step": 19302 }, { "epoch": 0.3418471932597502, "grad_norm": 0.5913954377174377, "learning_rate": 2.2977775140976996e-05, "loss": 0.0799, "step": 19303 }, { "epoch": 0.3418649027967786, "grad_norm": 0.755159854888916, "learning_rate": 2.2977046538826473e-05, "loss": 0.0845, "step": 19304 }, { "epoch": 0.34188261233380707, "grad_norm": 0.7682356834411621, "learning_rate": 2.2976317910432274e-05, "loss": 0.0751, "step": 19305 }, { "epoch": 0.34190032187083547, "grad_norm": 1.23225998878479, "learning_rate": 2.297558925579679e-05, "loss": 0.0627, "step": 19306 }, { "epoch": 0.3419180314078639, "grad_norm": 0.7904123663902283, "learning_rate": 2.297486057492243e-05, "loss": 0.0929, "step": 19307 }, { "epoch": 0.3419357409448924, "grad_norm": 0.5809007883071899, "learning_rate": 2.2974131867811586e-05, "loss": 0.0708, "step": 19308 }, { "epoch": 0.3419534504819208, "grad_norm": 0.7931375503540039, "learning_rate": 2.297340313446665e-05, "loss": 0.1006, "step": 19309 }, { "epoch": 0.3419711600189492, "grad_norm": 0.6963357925415039, "learning_rate": 2.2972674374890027e-05, "loss": 0.0824, "step": 19310 }, { "epoch": 0.3419888695559776, "grad_norm": 1.0112992525100708, "learning_rate": 2.2971945589084116e-05, "loss": 0.0978, "step": 19311 }, { "epoch": 0.3420065790930061, "grad_norm": 1.081162452697754, "learning_rate": 2.2971216777051304e-05, "loss": 0.11, "step": 19312 }, { "epoch": 0.3420242886300345, "grad_norm": 0.6302021741867065, "learning_rate": 2.2970487938793996e-05, "loss": 0.0771, "step": 19313 }, { "epoch": 0.34204199816706293, "grad_norm": 0.9155311584472656, "learning_rate": 2.296975907431459e-05, "loss": 0.0763, "step": 19314 }, { "epoch": 0.34205970770409133, "grad_norm": 0.6082040667533875, "learning_rate": 2.296903018361548e-05, "loss": 0.0931, "step": 19315 }, { "epoch": 0.3420774172411198, "grad_norm": 0.973042368888855, "learning_rate": 2.2968301266699068e-05, "loss": 0.0852, "step": 19316 }, { "epoch": 0.3420951267781482, "grad_norm": 0.8674502372741699, "learning_rate": 2.2967572323567755e-05, "loss": 0.1023, "step": 19317 }, { "epoch": 0.34211283631517664, "grad_norm": 0.5905106067657471, "learning_rate": 2.2966843354223933e-05, "loss": 0.0859, "step": 19318 }, { "epoch": 0.34213054585220504, "grad_norm": 0.915535032749176, "learning_rate": 2.2966114358669998e-05, "loss": 0.0708, "step": 19319 }, { "epoch": 0.3421482553892335, "grad_norm": 0.984378457069397, "learning_rate": 2.296538533690836e-05, "loss": 0.1031, "step": 19320 }, { "epoch": 0.3421659649262619, "grad_norm": 0.8656737208366394, "learning_rate": 2.29646562889414e-05, "loss": 0.0946, "step": 19321 }, { "epoch": 0.34218367446329034, "grad_norm": 0.7879158854484558, "learning_rate": 2.2963927214771532e-05, "loss": 0.0813, "step": 19322 }, { "epoch": 0.3422013840003188, "grad_norm": 1.3934564590454102, "learning_rate": 2.296319811440115e-05, "loss": 0.0757, "step": 19323 }, { "epoch": 0.3422190935373472, "grad_norm": 0.7850646376609802, "learning_rate": 2.2962468987832647e-05, "loss": 0.0807, "step": 19324 }, { "epoch": 0.34223680307437565, "grad_norm": 0.969114363193512, "learning_rate": 2.296173983506843e-05, "loss": 0.0775, "step": 19325 }, { "epoch": 0.34225451261140405, "grad_norm": 1.168151617050171, "learning_rate": 2.2961010656110898e-05, "loss": 0.096, "step": 19326 }, { "epoch": 0.3422722221484325, "grad_norm": 0.5525662899017334, "learning_rate": 2.296028145096244e-05, "loss": 0.0675, "step": 19327 }, { "epoch": 0.3422899316854609, "grad_norm": 0.6760125756263733, "learning_rate": 2.2959552219625463e-05, "loss": 0.0941, "step": 19328 }, { "epoch": 0.34230764122248936, "grad_norm": 0.7267649173736572, "learning_rate": 2.2958822962102364e-05, "loss": 0.0878, "step": 19329 }, { "epoch": 0.34232535075951775, "grad_norm": 0.8736675977706909, "learning_rate": 2.2958093678395546e-05, "loss": 0.1011, "step": 19330 }, { "epoch": 0.3423430602965462, "grad_norm": 1.1797175407409668, "learning_rate": 2.2957364368507403e-05, "loss": 0.0914, "step": 19331 }, { "epoch": 0.3423607698335746, "grad_norm": 0.49479568004608154, "learning_rate": 2.295663503244033e-05, "loss": 0.0758, "step": 19332 }, { "epoch": 0.34237847937060306, "grad_norm": 0.9404975771903992, "learning_rate": 2.2955905670196736e-05, "loss": 0.0972, "step": 19333 }, { "epoch": 0.34239618890763146, "grad_norm": 0.44352784752845764, "learning_rate": 2.2955176281779023e-05, "loss": 0.0792, "step": 19334 }, { "epoch": 0.3424138984446599, "grad_norm": 0.9486892819404602, "learning_rate": 2.295444686718958e-05, "loss": 0.0912, "step": 19335 }, { "epoch": 0.3424316079816883, "grad_norm": 0.6857473850250244, "learning_rate": 2.2953717426430806e-05, "loss": 0.0765, "step": 19336 }, { "epoch": 0.34244931751871677, "grad_norm": 1.5004276037216187, "learning_rate": 2.2952987959505116e-05, "loss": 0.1002, "step": 19337 }, { "epoch": 0.3424670270557452, "grad_norm": 1.207900047302246, "learning_rate": 2.2952258466414897e-05, "loss": 0.0995, "step": 19338 }, { "epoch": 0.3424847365927736, "grad_norm": 0.9973893761634827, "learning_rate": 2.2951528947162547e-05, "loss": 0.0681, "step": 19339 }, { "epoch": 0.3425024461298021, "grad_norm": 0.6018047332763672, "learning_rate": 2.2950799401750472e-05, "loss": 0.0917, "step": 19340 }, { "epoch": 0.3425201556668305, "grad_norm": 1.505294680595398, "learning_rate": 2.2950069830181075e-05, "loss": 0.0696, "step": 19341 }, { "epoch": 0.3425378652038589, "grad_norm": 0.8637993931770325, "learning_rate": 2.2949340232456754e-05, "loss": 0.0738, "step": 19342 }, { "epoch": 0.3425555747408873, "grad_norm": 0.8313557505607605, "learning_rate": 2.2948610608579904e-05, "loss": 0.115, "step": 19343 }, { "epoch": 0.3425732842779158, "grad_norm": 0.9568238854408264, "learning_rate": 2.2947880958552932e-05, "loss": 0.1061, "step": 19344 }, { "epoch": 0.3425909938149442, "grad_norm": 1.1722121238708496, "learning_rate": 2.294715128237823e-05, "loss": 0.0784, "step": 19345 }, { "epoch": 0.34260870335197263, "grad_norm": 0.9704814553260803, "learning_rate": 2.2946421580058207e-05, "loss": 0.1055, "step": 19346 }, { "epoch": 0.34262641288900103, "grad_norm": 1.015518069267273, "learning_rate": 2.2945691851595258e-05, "loss": 0.1311, "step": 19347 }, { "epoch": 0.3426441224260295, "grad_norm": 0.821803092956543, "learning_rate": 2.2944962096991792e-05, "loss": 0.0717, "step": 19348 }, { "epoch": 0.3426618319630579, "grad_norm": 0.8653127551078796, "learning_rate": 2.2944232316250204e-05, "loss": 0.0975, "step": 19349 }, { "epoch": 0.34267954150008634, "grad_norm": 0.7301003932952881, "learning_rate": 2.2943502509372892e-05, "loss": 0.0607, "step": 19350 }, { "epoch": 0.34269725103711474, "grad_norm": 0.8850619196891785, "learning_rate": 2.294277267636226e-05, "loss": 0.0938, "step": 19351 }, { "epoch": 0.3427149605741432, "grad_norm": 0.6694617867469788, "learning_rate": 2.2942042817220708e-05, "loss": 0.1051, "step": 19352 }, { "epoch": 0.34273267011117164, "grad_norm": 0.8364896774291992, "learning_rate": 2.2941312931950646e-05, "loss": 0.0697, "step": 19353 }, { "epoch": 0.34275037964820004, "grad_norm": 0.6324149370193481, "learning_rate": 2.294058302055446e-05, "loss": 0.0715, "step": 19354 }, { "epoch": 0.3427680891852285, "grad_norm": 0.3838488459587097, "learning_rate": 2.293985308303456e-05, "loss": 0.0879, "step": 19355 }, { "epoch": 0.3427857987222569, "grad_norm": 0.6324499249458313, "learning_rate": 2.2939123119393344e-05, "loss": 0.1021, "step": 19356 }, { "epoch": 0.34280350825928535, "grad_norm": 0.9317193627357483, "learning_rate": 2.2938393129633227e-05, "loss": 0.1179, "step": 19357 }, { "epoch": 0.34282121779631375, "grad_norm": 0.6706636548042297, "learning_rate": 2.2937663113756587e-05, "loss": 0.1106, "step": 19358 }, { "epoch": 0.3428389273333422, "grad_norm": 1.18869948387146, "learning_rate": 2.2936933071765843e-05, "loss": 0.1244, "step": 19359 }, { "epoch": 0.3428566368703706, "grad_norm": 0.5705685615539551, "learning_rate": 2.2936203003663394e-05, "loss": 0.0749, "step": 19360 }, { "epoch": 0.34287434640739906, "grad_norm": 0.9709572196006775, "learning_rate": 2.293547290945164e-05, "loss": 0.0722, "step": 19361 }, { "epoch": 0.34289205594442745, "grad_norm": 0.48834851384162903, "learning_rate": 2.293474278913298e-05, "loss": 0.0497, "step": 19362 }, { "epoch": 0.3429097654814559, "grad_norm": 0.8101651668548584, "learning_rate": 2.2934012642709817e-05, "loss": 0.0623, "step": 19363 }, { "epoch": 0.3429274750184843, "grad_norm": 0.497877836227417, "learning_rate": 2.2933282470184564e-05, "loss": 0.0873, "step": 19364 }, { "epoch": 0.34294518455551276, "grad_norm": 0.7547133564949036, "learning_rate": 2.2932552271559603e-05, "loss": 0.0735, "step": 19365 }, { "epoch": 0.34296289409254116, "grad_norm": 0.7108445763587952, "learning_rate": 2.2931822046837354e-05, "loss": 0.1054, "step": 19366 }, { "epoch": 0.3429806036295696, "grad_norm": 0.6758571267127991, "learning_rate": 2.2931091796020212e-05, "loss": 0.0888, "step": 19367 }, { "epoch": 0.34299831316659807, "grad_norm": 0.6186235547065735, "learning_rate": 2.293036151911058e-05, "loss": 0.0818, "step": 19368 }, { "epoch": 0.34301602270362647, "grad_norm": 0.5005651116371155, "learning_rate": 2.2929631216110858e-05, "loss": 0.1091, "step": 19369 }, { "epoch": 0.3430337322406549, "grad_norm": 0.7610484957695007, "learning_rate": 2.292890088702345e-05, "loss": 0.0804, "step": 19370 }, { "epoch": 0.3430514417776833, "grad_norm": 0.8837396502494812, "learning_rate": 2.2928170531850768e-05, "loss": 0.0553, "step": 19371 }, { "epoch": 0.3430691513147118, "grad_norm": 0.6074936389923096, "learning_rate": 2.2927440150595197e-05, "loss": 0.1051, "step": 19372 }, { "epoch": 0.34308686085174017, "grad_norm": 0.8697656393051147, "learning_rate": 2.2926709743259156e-05, "loss": 0.0591, "step": 19373 }, { "epoch": 0.3431045703887686, "grad_norm": 0.7688180804252625, "learning_rate": 2.292597930984504e-05, "loss": 0.1033, "step": 19374 }, { "epoch": 0.343122279925797, "grad_norm": 0.8057520389556885, "learning_rate": 2.2925248850355254e-05, "loss": 0.067, "step": 19375 }, { "epoch": 0.3431399894628255, "grad_norm": 0.8564489483833313, "learning_rate": 2.2924518364792204e-05, "loss": 0.099, "step": 19376 }, { "epoch": 0.3431576989998539, "grad_norm": 0.9651041626930237, "learning_rate": 2.2923787853158286e-05, "loss": 0.0736, "step": 19377 }, { "epoch": 0.34317540853688233, "grad_norm": 0.9310013651847839, "learning_rate": 2.292305731545591e-05, "loss": 0.1049, "step": 19378 }, { "epoch": 0.34319311807391073, "grad_norm": 0.8564367294311523, "learning_rate": 2.2922326751687473e-05, "loss": 0.093, "step": 19379 }, { "epoch": 0.3432108276109392, "grad_norm": 0.8327797651290894, "learning_rate": 2.2921596161855382e-05, "loss": 0.1033, "step": 19380 }, { "epoch": 0.3432285371479676, "grad_norm": 0.641800045967102, "learning_rate": 2.2920865545962046e-05, "loss": 0.0799, "step": 19381 }, { "epoch": 0.34324624668499604, "grad_norm": 0.6439945101737976, "learning_rate": 2.292013490400986e-05, "loss": 0.0895, "step": 19382 }, { "epoch": 0.3432639562220245, "grad_norm": 0.7272714972496033, "learning_rate": 2.2919404236001235e-05, "loss": 0.0855, "step": 19383 }, { "epoch": 0.3432816657590529, "grad_norm": 2.675952196121216, "learning_rate": 2.2918673541938565e-05, "loss": 0.1202, "step": 19384 }, { "epoch": 0.34329937529608134, "grad_norm": 0.90264892578125, "learning_rate": 2.2917942821824265e-05, "loss": 0.0872, "step": 19385 }, { "epoch": 0.34331708483310974, "grad_norm": 0.3502102494239807, "learning_rate": 2.291721207566073e-05, "loss": 0.0569, "step": 19386 }, { "epoch": 0.3433347943701382, "grad_norm": 1.4444739818572998, "learning_rate": 2.291648130345037e-05, "loss": 0.1094, "step": 19387 }, { "epoch": 0.3433525039071666, "grad_norm": 0.9879419803619385, "learning_rate": 2.2915750505195588e-05, "loss": 0.1294, "step": 19388 }, { "epoch": 0.34337021344419505, "grad_norm": 0.6239575743675232, "learning_rate": 2.2915019680898787e-05, "loss": 0.0998, "step": 19389 }, { "epoch": 0.34338792298122345, "grad_norm": 0.6351888179779053, "learning_rate": 2.2914288830562377e-05, "loss": 0.0819, "step": 19390 }, { "epoch": 0.3434056325182519, "grad_norm": 0.46868106722831726, "learning_rate": 2.2913557954188747e-05, "loss": 0.092, "step": 19391 }, { "epoch": 0.3434233420552803, "grad_norm": 0.8207928538322449, "learning_rate": 2.2912827051780316e-05, "loss": 0.0631, "step": 19392 }, { "epoch": 0.34344105159230875, "grad_norm": 0.7252943515777588, "learning_rate": 2.2912096123339486e-05, "loss": 0.0994, "step": 19393 }, { "epoch": 0.34345876112933715, "grad_norm": 1.0306880474090576, "learning_rate": 2.291136516886866e-05, "loss": 0.102, "step": 19394 }, { "epoch": 0.3434764706663656, "grad_norm": 0.48823338747024536, "learning_rate": 2.291063418837024e-05, "loss": 0.0907, "step": 19395 }, { "epoch": 0.343494180203394, "grad_norm": 1.068613052368164, "learning_rate": 2.2909903181846634e-05, "loss": 0.1044, "step": 19396 }, { "epoch": 0.34351188974042246, "grad_norm": 0.7787981033325195, "learning_rate": 2.290917214930025e-05, "loss": 0.0593, "step": 19397 }, { "epoch": 0.3435295992774509, "grad_norm": 1.1273586750030518, "learning_rate": 2.2908441090733486e-05, "loss": 0.1069, "step": 19398 }, { "epoch": 0.3435473088144793, "grad_norm": 0.44845259189605713, "learning_rate": 2.2907710006148753e-05, "loss": 0.0797, "step": 19399 }, { "epoch": 0.34356501835150777, "grad_norm": 0.714279055595398, "learning_rate": 2.2906978895548453e-05, "loss": 0.098, "step": 19400 }, { "epoch": 0.34358272788853617, "grad_norm": 0.7375603318214417, "learning_rate": 2.2906247758934995e-05, "loss": 0.0873, "step": 19401 }, { "epoch": 0.3436004374255646, "grad_norm": 0.5690394043922424, "learning_rate": 2.2905516596310778e-05, "loss": 0.0797, "step": 19402 }, { "epoch": 0.343618146962593, "grad_norm": 0.6366997957229614, "learning_rate": 2.290478540767821e-05, "loss": 0.0578, "step": 19403 }, { "epoch": 0.3436358564996215, "grad_norm": 0.9072597622871399, "learning_rate": 2.2904054193039697e-05, "loss": 0.1114, "step": 19404 }, { "epoch": 0.34365356603664987, "grad_norm": 0.7364047169685364, "learning_rate": 2.290332295239765e-05, "loss": 0.0972, "step": 19405 }, { "epoch": 0.3436712755736783, "grad_norm": 0.8390801548957825, "learning_rate": 2.2902591685754468e-05, "loss": 0.0911, "step": 19406 }, { "epoch": 0.3436889851107067, "grad_norm": 0.5359276533126831, "learning_rate": 2.290186039311256e-05, "loss": 0.0614, "step": 19407 }, { "epoch": 0.3437066946477352, "grad_norm": 0.637997031211853, "learning_rate": 2.2901129074474324e-05, "loss": 0.0584, "step": 19408 }, { "epoch": 0.3437244041847636, "grad_norm": 0.5416201949119568, "learning_rate": 2.290039772984218e-05, "loss": 0.0628, "step": 19409 }, { "epoch": 0.34374211372179203, "grad_norm": 0.5514030456542969, "learning_rate": 2.2899666359218525e-05, "loss": 0.0844, "step": 19410 }, { "epoch": 0.34375982325882043, "grad_norm": 0.9981204867362976, "learning_rate": 2.2898934962605763e-05, "loss": 0.0734, "step": 19411 }, { "epoch": 0.3437775327958489, "grad_norm": 0.8479217290878296, "learning_rate": 2.2898203540006305e-05, "loss": 0.0827, "step": 19412 }, { "epoch": 0.34379524233287734, "grad_norm": 0.9699810147285461, "learning_rate": 2.289747209142256e-05, "loss": 0.0789, "step": 19413 }, { "epoch": 0.34381295186990574, "grad_norm": 0.7555923461914062, "learning_rate": 2.2896740616856928e-05, "loss": 0.0924, "step": 19414 }, { "epoch": 0.3438306614069342, "grad_norm": 0.3889545500278473, "learning_rate": 2.289600911631182e-05, "loss": 0.096, "step": 19415 }, { "epoch": 0.3438483709439626, "grad_norm": 0.5974962115287781, "learning_rate": 2.289527758978964e-05, "loss": 0.123, "step": 19416 }, { "epoch": 0.34386608048099104, "grad_norm": 0.750688374042511, "learning_rate": 2.2894546037292792e-05, "loss": 0.1075, "step": 19417 }, { "epoch": 0.34388379001801944, "grad_norm": 0.7320859432220459, "learning_rate": 2.2893814458823683e-05, "loss": 0.0912, "step": 19418 }, { "epoch": 0.3439014995550479, "grad_norm": 0.4457037150859833, "learning_rate": 2.289308285438473e-05, "loss": 0.0852, "step": 19419 }, { "epoch": 0.3439192090920763, "grad_norm": 1.3244811296463013, "learning_rate": 2.289235122397833e-05, "loss": 0.057, "step": 19420 }, { "epoch": 0.34393691862910475, "grad_norm": 0.7920066118240356, "learning_rate": 2.2891619567606896e-05, "loss": 0.0805, "step": 19421 }, { "epoch": 0.34395462816613315, "grad_norm": 1.0430489778518677, "learning_rate": 2.2890887885272828e-05, "loss": 0.1353, "step": 19422 }, { "epoch": 0.3439723377031616, "grad_norm": 0.6739312410354614, "learning_rate": 2.2890156176978538e-05, "loss": 0.0741, "step": 19423 }, { "epoch": 0.34399004724019, "grad_norm": 0.6133019328117371, "learning_rate": 2.2889424442726433e-05, "loss": 0.1043, "step": 19424 }, { "epoch": 0.34400775677721845, "grad_norm": 0.6960564851760864, "learning_rate": 2.2888692682518916e-05, "loss": 0.0726, "step": 19425 }, { "epoch": 0.34402546631424685, "grad_norm": 0.661462128162384, "learning_rate": 2.2887960896358406e-05, "loss": 0.063, "step": 19426 }, { "epoch": 0.3440431758512753, "grad_norm": 0.7315486073493958, "learning_rate": 2.2887229084247292e-05, "loss": 0.0895, "step": 19427 }, { "epoch": 0.34406088538830376, "grad_norm": 0.6104229688644409, "learning_rate": 2.2886497246188e-05, "loss": 0.0949, "step": 19428 }, { "epoch": 0.34407859492533216, "grad_norm": 0.816916286945343, "learning_rate": 2.2885765382182924e-05, "loss": 0.1134, "step": 19429 }, { "epoch": 0.3440963044623606, "grad_norm": 0.7750239968299866, "learning_rate": 2.2885033492234483e-05, "loss": 0.0899, "step": 19430 }, { "epoch": 0.344114013999389, "grad_norm": 1.2221715450286865, "learning_rate": 2.2884301576345076e-05, "loss": 0.1055, "step": 19431 }, { "epoch": 0.34413172353641747, "grad_norm": 0.8839070796966553, "learning_rate": 2.2883569634517115e-05, "loss": 0.0832, "step": 19432 }, { "epoch": 0.34414943307344587, "grad_norm": 1.039493441581726, "learning_rate": 2.288283766675301e-05, "loss": 0.1002, "step": 19433 }, { "epoch": 0.3441671426104743, "grad_norm": 0.8812858462333679, "learning_rate": 2.2882105673055164e-05, "loss": 0.0683, "step": 19434 }, { "epoch": 0.3441848521475027, "grad_norm": 0.5144078731536865, "learning_rate": 2.288137365342599e-05, "loss": 0.0635, "step": 19435 }, { "epoch": 0.34420256168453117, "grad_norm": 0.7731218338012695, "learning_rate": 2.2880641607867887e-05, "loss": 0.0807, "step": 19436 }, { "epoch": 0.34422027122155957, "grad_norm": 0.5653098821640015, "learning_rate": 2.2879909536383274e-05, "loss": 0.0871, "step": 19437 }, { "epoch": 0.344237980758588, "grad_norm": 0.8441193103790283, "learning_rate": 2.287917743897456e-05, "loss": 0.0837, "step": 19438 }, { "epoch": 0.3442556902956164, "grad_norm": 0.9630731344223022, "learning_rate": 2.2878445315644145e-05, "loss": 0.1228, "step": 19439 }, { "epoch": 0.3442733998326449, "grad_norm": 0.6374037861824036, "learning_rate": 2.287771316639444e-05, "loss": 0.1027, "step": 19440 }, { "epoch": 0.3442911093696733, "grad_norm": 0.8206228017807007, "learning_rate": 2.287698099122786e-05, "loss": 0.0883, "step": 19441 }, { "epoch": 0.34430881890670173, "grad_norm": 0.6791309118270874, "learning_rate": 2.28762487901468e-05, "loss": 0.0603, "step": 19442 }, { "epoch": 0.3443265284437302, "grad_norm": 0.578323245048523, "learning_rate": 2.287551656315369e-05, "loss": 0.0715, "step": 19443 }, { "epoch": 0.3443442379807586, "grad_norm": 1.0662710666656494, "learning_rate": 2.2874784310250926e-05, "loss": 0.0566, "step": 19444 }, { "epoch": 0.34436194751778704, "grad_norm": 0.33800140023231506, "learning_rate": 2.2874052031440908e-05, "loss": 0.0667, "step": 19445 }, { "epoch": 0.34437965705481544, "grad_norm": 0.7082786560058594, "learning_rate": 2.287331972672606e-05, "loss": 0.0541, "step": 19446 }, { "epoch": 0.3443973665918439, "grad_norm": 0.6369403004646301, "learning_rate": 2.2872587396108794e-05, "loss": 0.0629, "step": 19447 }, { "epoch": 0.3444150761288723, "grad_norm": 0.8651426434516907, "learning_rate": 2.2871855039591503e-05, "loss": 0.0704, "step": 19448 }, { "epoch": 0.34443278566590074, "grad_norm": 0.7859649062156677, "learning_rate": 2.287112265717661e-05, "loss": 0.1018, "step": 19449 }, { "epoch": 0.34445049520292914, "grad_norm": 0.8935807347297668, "learning_rate": 2.2870390248866516e-05, "loss": 0.1138, "step": 19450 }, { "epoch": 0.3444682047399576, "grad_norm": 1.0971698760986328, "learning_rate": 2.2869657814663635e-05, "loss": 0.1051, "step": 19451 }, { "epoch": 0.344485914276986, "grad_norm": 0.9250649809837341, "learning_rate": 2.2868925354570372e-05, "loss": 0.0694, "step": 19452 }, { "epoch": 0.34450362381401445, "grad_norm": 0.6723858714103699, "learning_rate": 2.2868192868589147e-05, "loss": 0.0997, "step": 19453 }, { "epoch": 0.34452133335104285, "grad_norm": 1.079681634902954, "learning_rate": 2.2867460356722364e-05, "loss": 0.0927, "step": 19454 }, { "epoch": 0.3445390428880713, "grad_norm": 0.8102827072143555, "learning_rate": 2.2866727818972422e-05, "loss": 0.1131, "step": 19455 }, { "epoch": 0.3445567524250997, "grad_norm": 0.43338239192962646, "learning_rate": 2.2865995255341752e-05, "loss": 0.1004, "step": 19456 }, { "epoch": 0.34457446196212815, "grad_norm": 0.7042093276977539, "learning_rate": 2.2865262665832748e-05, "loss": 0.079, "step": 19457 }, { "epoch": 0.3445921714991566, "grad_norm": 0.6841626167297363, "learning_rate": 2.286453005044783e-05, "loss": 0.0843, "step": 19458 }, { "epoch": 0.344609881036185, "grad_norm": 0.9429207444190979, "learning_rate": 2.2863797409189397e-05, "loss": 0.0703, "step": 19459 }, { "epoch": 0.34462759057321346, "grad_norm": 1.0057979822158813, "learning_rate": 2.2863064742059866e-05, "loss": 0.0878, "step": 19460 }, { "epoch": 0.34464530011024186, "grad_norm": 0.8609870672225952, "learning_rate": 2.2862332049061653e-05, "loss": 0.0956, "step": 19461 }, { "epoch": 0.3446630096472703, "grad_norm": 0.7138398289680481, "learning_rate": 2.2861599330197162e-05, "loss": 0.088, "step": 19462 }, { "epoch": 0.3446807191842987, "grad_norm": 1.1126803159713745, "learning_rate": 2.2860866585468804e-05, "loss": 0.1042, "step": 19463 }, { "epoch": 0.34469842872132717, "grad_norm": 1.2590423822402954, "learning_rate": 2.2860133814878986e-05, "loss": 0.145, "step": 19464 }, { "epoch": 0.34471613825835556, "grad_norm": 0.5031063556671143, "learning_rate": 2.2859401018430125e-05, "loss": 0.0995, "step": 19465 }, { "epoch": 0.344733847795384, "grad_norm": 0.6084972023963928, "learning_rate": 2.285866819612463e-05, "loss": 0.1028, "step": 19466 }, { "epoch": 0.3447515573324124, "grad_norm": 0.7235181331634521, "learning_rate": 2.2857935347964912e-05, "loss": 0.0652, "step": 19467 }, { "epoch": 0.34476926686944087, "grad_norm": 0.7297136187553406, "learning_rate": 2.2857202473953377e-05, "loss": 0.0903, "step": 19468 }, { "epoch": 0.34478697640646927, "grad_norm": 1.1228718757629395, "learning_rate": 2.2856469574092452e-05, "loss": 0.0771, "step": 19469 }, { "epoch": 0.3448046859434977, "grad_norm": 0.6027887463569641, "learning_rate": 2.2855736648384528e-05, "loss": 0.0533, "step": 19470 }, { "epoch": 0.3448223954805261, "grad_norm": 0.40720418095588684, "learning_rate": 2.2855003696832022e-05, "loss": 0.0732, "step": 19471 }, { "epoch": 0.3448401050175546, "grad_norm": 0.4699942469596863, "learning_rate": 2.2854270719437355e-05, "loss": 0.0869, "step": 19472 }, { "epoch": 0.34485781455458303, "grad_norm": 0.7958633303642273, "learning_rate": 2.285353771620293e-05, "loss": 0.0638, "step": 19473 }, { "epoch": 0.34487552409161143, "grad_norm": 0.8952435851097107, "learning_rate": 2.285280468713116e-05, "loss": 0.1005, "step": 19474 }, { "epoch": 0.3448932336286399, "grad_norm": 0.41597139835357666, "learning_rate": 2.2852071632224455e-05, "loss": 0.0955, "step": 19475 }, { "epoch": 0.3449109431656683, "grad_norm": 0.7069836854934692, "learning_rate": 2.285133855148523e-05, "loss": 0.0587, "step": 19476 }, { "epoch": 0.34492865270269674, "grad_norm": 0.797615110874176, "learning_rate": 2.28506054449159e-05, "loss": 0.07, "step": 19477 }, { "epoch": 0.34494636223972514, "grad_norm": 0.8946006298065186, "learning_rate": 2.284987231251886e-05, "loss": 0.0797, "step": 19478 }, { "epoch": 0.3449640717767536, "grad_norm": 0.8587843179702759, "learning_rate": 2.2849139154296543e-05, "loss": 0.1148, "step": 19479 }, { "epoch": 0.344981781313782, "grad_norm": 0.6334540247917175, "learning_rate": 2.2848405970251352e-05, "loss": 0.0974, "step": 19480 }, { "epoch": 0.34499949085081044, "grad_norm": 0.5181978940963745, "learning_rate": 2.2847672760385697e-05, "loss": 0.0524, "step": 19481 }, { "epoch": 0.34501720038783884, "grad_norm": 1.3635815382003784, "learning_rate": 2.2846939524701992e-05, "loss": 0.1567, "step": 19482 }, { "epoch": 0.3450349099248673, "grad_norm": 0.700671374797821, "learning_rate": 2.2846206263202652e-05, "loss": 0.0608, "step": 19483 }, { "epoch": 0.3450526194618957, "grad_norm": 0.5477708578109741, "learning_rate": 2.2845472975890084e-05, "loss": 0.1011, "step": 19484 }, { "epoch": 0.34507032899892415, "grad_norm": 0.810906708240509, "learning_rate": 2.2844739662766705e-05, "loss": 0.1167, "step": 19485 }, { "epoch": 0.3450880385359526, "grad_norm": 1.03071928024292, "learning_rate": 2.2844006323834926e-05, "loss": 0.0862, "step": 19486 }, { "epoch": 0.345105748072981, "grad_norm": 0.5680906772613525, "learning_rate": 2.284327295909716e-05, "loss": 0.0829, "step": 19487 }, { "epoch": 0.34512345761000945, "grad_norm": 0.6841003894805908, "learning_rate": 2.284253956855582e-05, "loss": 0.0663, "step": 19488 }, { "epoch": 0.34514116714703785, "grad_norm": 0.5976450443267822, "learning_rate": 2.2841806152213313e-05, "loss": 0.0786, "step": 19489 }, { "epoch": 0.3451588766840663, "grad_norm": 0.8818366527557373, "learning_rate": 2.2841072710072064e-05, "loss": 0.143, "step": 19490 }, { "epoch": 0.3451765862210947, "grad_norm": 1.1168311834335327, "learning_rate": 2.2840339242134474e-05, "loss": 0.0738, "step": 19491 }, { "epoch": 0.34519429575812316, "grad_norm": 0.7084240913391113, "learning_rate": 2.2839605748402965e-05, "loss": 0.071, "step": 19492 }, { "epoch": 0.34521200529515156, "grad_norm": 0.725182831287384, "learning_rate": 2.2838872228879943e-05, "loss": 0.1029, "step": 19493 }, { "epoch": 0.34522971483218, "grad_norm": 0.46759697794914246, "learning_rate": 2.2838138683567824e-05, "loss": 0.0816, "step": 19494 }, { "epoch": 0.3452474243692084, "grad_norm": 0.8925492167472839, "learning_rate": 2.283740511246902e-05, "loss": 0.0621, "step": 19495 }, { "epoch": 0.34526513390623687, "grad_norm": 0.7236244082450867, "learning_rate": 2.2836671515585948e-05, "loss": 0.0524, "step": 19496 }, { "epoch": 0.34528284344326526, "grad_norm": 0.5948356986045837, "learning_rate": 2.2835937892921026e-05, "loss": 0.0545, "step": 19497 }, { "epoch": 0.3453005529802937, "grad_norm": 0.5091882348060608, "learning_rate": 2.283520424447665e-05, "loss": 0.0797, "step": 19498 }, { "epoch": 0.3453182625173221, "grad_norm": 0.7590027451515198, "learning_rate": 2.2834470570255246e-05, "loss": 0.0904, "step": 19499 }, { "epoch": 0.34533597205435057, "grad_norm": 0.8268424272537231, "learning_rate": 2.283373687025923e-05, "loss": 0.101, "step": 19500 }, { "epoch": 0.345353681591379, "grad_norm": 0.7863345146179199, "learning_rate": 2.283300314449101e-05, "loss": 0.095, "step": 19501 }, { "epoch": 0.3453713911284074, "grad_norm": 0.7310522198677063, "learning_rate": 2.2832269392953008e-05, "loss": 0.074, "step": 19502 }, { "epoch": 0.3453891006654359, "grad_norm": 0.6161490082740784, "learning_rate": 2.2831535615647624e-05, "loss": 0.0627, "step": 19503 }, { "epoch": 0.3454068102024643, "grad_norm": 0.441821813583374, "learning_rate": 2.2830801812577285e-05, "loss": 0.0945, "step": 19504 }, { "epoch": 0.34542451973949273, "grad_norm": 2.5228536128997803, "learning_rate": 2.2830067983744397e-05, "loss": 0.1046, "step": 19505 }, { "epoch": 0.34544222927652113, "grad_norm": 1.769500494003296, "learning_rate": 2.2829334129151377e-05, "loss": 0.1091, "step": 19506 }, { "epoch": 0.3454599388135496, "grad_norm": 0.7479065656661987, "learning_rate": 2.282860024880064e-05, "loss": 0.1332, "step": 19507 }, { "epoch": 0.345477648350578, "grad_norm": 0.6131753921508789, "learning_rate": 2.28278663426946e-05, "loss": 0.0687, "step": 19508 }, { "epoch": 0.34549535788760644, "grad_norm": 0.6409251093864441, "learning_rate": 2.2827132410835673e-05, "loss": 0.0802, "step": 19509 }, { "epoch": 0.34551306742463483, "grad_norm": 0.8029223084449768, "learning_rate": 2.2826398453226273e-05, "loss": 0.0915, "step": 19510 }, { "epoch": 0.3455307769616633, "grad_norm": 0.8711277842521667, "learning_rate": 2.2825664469868812e-05, "loss": 0.0738, "step": 19511 }, { "epoch": 0.3455484864986917, "grad_norm": 0.5561936497688293, "learning_rate": 2.2824930460765707e-05, "loss": 0.0495, "step": 19512 }, { "epoch": 0.34556619603572014, "grad_norm": 0.7718735337257385, "learning_rate": 2.282419642591937e-05, "loss": 0.0605, "step": 19513 }, { "epoch": 0.34558390557274854, "grad_norm": 0.7148027420043945, "learning_rate": 2.282346236533222e-05, "loss": 0.1053, "step": 19514 }, { "epoch": 0.345601615109777, "grad_norm": 0.8934112191200256, "learning_rate": 2.2822728279006674e-05, "loss": 0.0859, "step": 19515 }, { "epoch": 0.34561932464680545, "grad_norm": 0.9492746591567993, "learning_rate": 2.2821994166945135e-05, "loss": 0.0791, "step": 19516 }, { "epoch": 0.34563703418383385, "grad_norm": 0.7366624474525452, "learning_rate": 2.2821260029150034e-05, "loss": 0.0812, "step": 19517 }, { "epoch": 0.3456547437208623, "grad_norm": 0.9946025609970093, "learning_rate": 2.282052586562377e-05, "loss": 0.1284, "step": 19518 }, { "epoch": 0.3456724532578907, "grad_norm": 0.6775732040405273, "learning_rate": 2.281979167636877e-05, "loss": 0.0864, "step": 19519 }, { "epoch": 0.34569016279491915, "grad_norm": 0.5668143033981323, "learning_rate": 2.2819057461387454e-05, "loss": 0.0791, "step": 19520 }, { "epoch": 0.34570787233194755, "grad_norm": 0.7897281646728516, "learning_rate": 2.2818323220682225e-05, "loss": 0.1016, "step": 19521 }, { "epoch": 0.345725581868976, "grad_norm": 0.5705676674842834, "learning_rate": 2.28175889542555e-05, "loss": 0.0563, "step": 19522 }, { "epoch": 0.3457432914060044, "grad_norm": 0.7913821935653687, "learning_rate": 2.28168546621097e-05, "loss": 0.0809, "step": 19523 }, { "epoch": 0.34576100094303286, "grad_norm": 0.6420299410820007, "learning_rate": 2.281612034424724e-05, "loss": 0.0622, "step": 19524 }, { "epoch": 0.34577871048006126, "grad_norm": 0.7949842810630798, "learning_rate": 2.2815386000670535e-05, "loss": 0.0982, "step": 19525 }, { "epoch": 0.3457964200170897, "grad_norm": 0.7041166424751282, "learning_rate": 2.2814651631382e-05, "loss": 0.0664, "step": 19526 }, { "epoch": 0.3458141295541181, "grad_norm": 0.990670382976532, "learning_rate": 2.281391723638405e-05, "loss": 0.1193, "step": 19527 }, { "epoch": 0.34583183909114656, "grad_norm": 0.9902945160865784, "learning_rate": 2.2813182815679102e-05, "loss": 0.1066, "step": 19528 }, { "epoch": 0.34584954862817496, "grad_norm": 0.7004660367965698, "learning_rate": 2.281244836926958e-05, "loss": 0.0991, "step": 19529 }, { "epoch": 0.3458672581652034, "grad_norm": 0.8286938667297363, "learning_rate": 2.2811713897157883e-05, "loss": 0.0698, "step": 19530 }, { "epoch": 0.34588496770223187, "grad_norm": 0.6743692755699158, "learning_rate": 2.2810979399346444e-05, "loss": 0.0696, "step": 19531 }, { "epoch": 0.34590267723926027, "grad_norm": 0.6268444061279297, "learning_rate": 2.2810244875837666e-05, "loss": 0.0879, "step": 19532 }, { "epoch": 0.3459203867762887, "grad_norm": 0.3968878984451294, "learning_rate": 2.2809510326633977e-05, "loss": 0.0635, "step": 19533 }, { "epoch": 0.3459380963133171, "grad_norm": 0.6678574681282043, "learning_rate": 2.2808775751737793e-05, "loss": 0.064, "step": 19534 }, { "epoch": 0.3459558058503456, "grad_norm": 0.5183407068252563, "learning_rate": 2.280804115115152e-05, "loss": 0.083, "step": 19535 }, { "epoch": 0.345973515387374, "grad_norm": 0.7594509124755859, "learning_rate": 2.2807306524877587e-05, "loss": 0.0848, "step": 19536 }, { "epoch": 0.34599122492440243, "grad_norm": 0.6998863816261292, "learning_rate": 2.28065718729184e-05, "loss": 0.0874, "step": 19537 }, { "epoch": 0.34600893446143083, "grad_norm": 0.870624303817749, "learning_rate": 2.2805837195276384e-05, "loss": 0.1103, "step": 19538 }, { "epoch": 0.3460266439984593, "grad_norm": 0.8981545567512512, "learning_rate": 2.2805102491953952e-05, "loss": 0.0686, "step": 19539 }, { "epoch": 0.3460443535354877, "grad_norm": 0.7688770294189453, "learning_rate": 2.2804367762953524e-05, "loss": 0.0765, "step": 19540 }, { "epoch": 0.34606206307251614, "grad_norm": 0.5981771349906921, "learning_rate": 2.280363300827751e-05, "loss": 0.1113, "step": 19541 }, { "epoch": 0.34607977260954453, "grad_norm": 0.9771832227706909, "learning_rate": 2.2802898227928336e-05, "loss": 0.0846, "step": 19542 }, { "epoch": 0.346097482146573, "grad_norm": 0.6800999045372009, "learning_rate": 2.280216342190842e-05, "loss": 0.0725, "step": 19543 }, { "epoch": 0.3461151916836014, "grad_norm": 0.66792231798172, "learning_rate": 2.2801428590220172e-05, "loss": 0.084, "step": 19544 }, { "epoch": 0.34613290122062984, "grad_norm": 0.6225261688232422, "learning_rate": 2.2800693732866012e-05, "loss": 0.0652, "step": 19545 }, { "epoch": 0.3461506107576583, "grad_norm": 0.9661259055137634, "learning_rate": 2.279995884984836e-05, "loss": 0.1091, "step": 19546 }, { "epoch": 0.3461683202946867, "grad_norm": 0.7425170540809631, "learning_rate": 2.279922394116963e-05, "loss": 0.0563, "step": 19547 }, { "epoch": 0.34618602983171515, "grad_norm": 0.9773942828178406, "learning_rate": 2.2798489006832248e-05, "loss": 0.0672, "step": 19548 }, { "epoch": 0.34620373936874355, "grad_norm": 0.6515718698501587, "learning_rate": 2.279775404683862e-05, "loss": 0.0613, "step": 19549 }, { "epoch": 0.346221448905772, "grad_norm": 0.7170665264129639, "learning_rate": 2.2797019061191172e-05, "loss": 0.0944, "step": 19550 }, { "epoch": 0.3462391584428004, "grad_norm": 0.9506268501281738, "learning_rate": 2.279628404989232e-05, "loss": 0.1119, "step": 19551 }, { "epoch": 0.34625686797982885, "grad_norm": 0.5788158774375916, "learning_rate": 2.2795549012944482e-05, "loss": 0.0682, "step": 19552 }, { "epoch": 0.34627457751685725, "grad_norm": 0.5119174122810364, "learning_rate": 2.2794813950350075e-05, "loss": 0.075, "step": 19553 }, { "epoch": 0.3462922870538857, "grad_norm": 0.6837896704673767, "learning_rate": 2.2794078862111522e-05, "loss": 0.0793, "step": 19554 }, { "epoch": 0.3463099965909141, "grad_norm": 0.7927871942520142, "learning_rate": 2.2793343748231235e-05, "loss": 0.0706, "step": 19555 }, { "epoch": 0.34632770612794256, "grad_norm": 0.6346527338027954, "learning_rate": 2.2792608608711636e-05, "loss": 0.1056, "step": 19556 }, { "epoch": 0.34634541566497096, "grad_norm": 0.2539829611778259, "learning_rate": 2.2791873443555146e-05, "loss": 0.0475, "step": 19557 }, { "epoch": 0.3463631252019994, "grad_norm": 0.6413658857345581, "learning_rate": 2.2791138252764172e-05, "loss": 0.0762, "step": 19558 }, { "epoch": 0.3463808347390278, "grad_norm": 0.3968944847583771, "learning_rate": 2.279040303634115e-05, "loss": 0.0997, "step": 19559 }, { "epoch": 0.34639854427605626, "grad_norm": 0.6708562970161438, "learning_rate": 2.2789667794288483e-05, "loss": 0.0815, "step": 19560 }, { "epoch": 0.3464162538130847, "grad_norm": 0.6005436182022095, "learning_rate": 2.2788932526608602e-05, "loss": 0.0767, "step": 19561 }, { "epoch": 0.3464339633501131, "grad_norm": 0.5073149800300598, "learning_rate": 2.2788197233303917e-05, "loss": 0.0694, "step": 19562 }, { "epoch": 0.34645167288714157, "grad_norm": 0.7409650087356567, "learning_rate": 2.2787461914376852e-05, "loss": 0.0699, "step": 19563 }, { "epoch": 0.34646938242416997, "grad_norm": 0.5318405628204346, "learning_rate": 2.278672656982983e-05, "loss": 0.0555, "step": 19564 }, { "epoch": 0.3464870919611984, "grad_norm": 0.6145297884941101, "learning_rate": 2.278599119966526e-05, "loss": 0.0735, "step": 19565 }, { "epoch": 0.3465048014982268, "grad_norm": 0.9996213912963867, "learning_rate": 2.2785255803885564e-05, "loss": 0.1033, "step": 19566 }, { "epoch": 0.3465225110352553, "grad_norm": 0.9380239844322205, "learning_rate": 2.2784520382493167e-05, "loss": 0.0923, "step": 19567 }, { "epoch": 0.3465402205722837, "grad_norm": 0.6853278875350952, "learning_rate": 2.278378493549049e-05, "loss": 0.0905, "step": 19568 }, { "epoch": 0.34655793010931213, "grad_norm": 0.5599946975708008, "learning_rate": 2.278304946287994e-05, "loss": 0.0835, "step": 19569 }, { "epoch": 0.3465756396463405, "grad_norm": 0.6986495852470398, "learning_rate": 2.2782313964663946e-05, "loss": 0.1114, "step": 19570 }, { "epoch": 0.346593349183369, "grad_norm": 1.148494005203247, "learning_rate": 2.2781578440844927e-05, "loss": 0.1007, "step": 19571 }, { "epoch": 0.3466110587203974, "grad_norm": 0.6280460953712463, "learning_rate": 2.2780842891425306e-05, "loss": 0.0779, "step": 19572 }, { "epoch": 0.34662876825742583, "grad_norm": 1.1373131275177002, "learning_rate": 2.2780107316407495e-05, "loss": 0.0821, "step": 19573 }, { "epoch": 0.34664647779445423, "grad_norm": 0.7870233654975891, "learning_rate": 2.277937171579392e-05, "loss": 0.104, "step": 19574 }, { "epoch": 0.3466641873314827, "grad_norm": 0.6317591667175293, "learning_rate": 2.2778636089586996e-05, "loss": 0.0702, "step": 19575 }, { "epoch": 0.34668189686851114, "grad_norm": 0.6724275946617126, "learning_rate": 2.277790043778915e-05, "loss": 0.0541, "step": 19576 }, { "epoch": 0.34669960640553954, "grad_norm": 0.7635596990585327, "learning_rate": 2.2777164760402796e-05, "loss": 0.0991, "step": 19577 }, { "epoch": 0.346717315942568, "grad_norm": 0.9022954702377319, "learning_rate": 2.2776429057430355e-05, "loss": 0.07, "step": 19578 }, { "epoch": 0.3467350254795964, "grad_norm": 0.9509633183479309, "learning_rate": 2.277569332887425e-05, "loss": 0.1244, "step": 19579 }, { "epoch": 0.34675273501662485, "grad_norm": 0.8744239807128906, "learning_rate": 2.27749575747369e-05, "loss": 0.1042, "step": 19580 }, { "epoch": 0.34677044455365325, "grad_norm": 0.7445957064628601, "learning_rate": 2.2774221795020727e-05, "loss": 0.085, "step": 19581 }, { "epoch": 0.3467881540906817, "grad_norm": 0.7214798927307129, "learning_rate": 2.277348598972815e-05, "loss": 0.1323, "step": 19582 }, { "epoch": 0.3468058636277101, "grad_norm": 0.8711704015731812, "learning_rate": 2.277275015886159e-05, "loss": 0.084, "step": 19583 }, { "epoch": 0.34682357316473855, "grad_norm": 0.6568447947502136, "learning_rate": 2.277201430242347e-05, "loss": 0.0926, "step": 19584 }, { "epoch": 0.34684128270176695, "grad_norm": 0.6629382967948914, "learning_rate": 2.2771278420416207e-05, "loss": 0.1003, "step": 19585 }, { "epoch": 0.3468589922387954, "grad_norm": 1.0245335102081299, "learning_rate": 2.2770542512842227e-05, "loss": 0.111, "step": 19586 }, { "epoch": 0.3468767017758238, "grad_norm": 0.6563927531242371, "learning_rate": 2.2769806579703945e-05, "loss": 0.0873, "step": 19587 }, { "epoch": 0.34689441131285226, "grad_norm": 0.70017409324646, "learning_rate": 2.2769070621003787e-05, "loss": 0.1165, "step": 19588 }, { "epoch": 0.34691212084988066, "grad_norm": 0.5314007997512817, "learning_rate": 2.276833463674417e-05, "loss": 0.054, "step": 19589 }, { "epoch": 0.3469298303869091, "grad_norm": 0.8448439836502075, "learning_rate": 2.276759862692752e-05, "loss": 0.0903, "step": 19590 }, { "epoch": 0.34694753992393756, "grad_norm": 1.06842041015625, "learning_rate": 2.2766862591556253e-05, "loss": 0.1136, "step": 19591 }, { "epoch": 0.34696524946096596, "grad_norm": 0.4596618711948395, "learning_rate": 2.2766126530632795e-05, "loss": 0.074, "step": 19592 }, { "epoch": 0.3469829589979944, "grad_norm": 0.6798549294471741, "learning_rate": 2.276539044415957e-05, "loss": 0.0871, "step": 19593 }, { "epoch": 0.3470006685350228, "grad_norm": 0.7157920598983765, "learning_rate": 2.2764654332138993e-05, "loss": 0.0794, "step": 19594 }, { "epoch": 0.34701837807205127, "grad_norm": 0.7632425427436829, "learning_rate": 2.276391819457349e-05, "loss": 0.1104, "step": 19595 }, { "epoch": 0.34703608760907967, "grad_norm": 0.6601311564445496, "learning_rate": 2.2763182031465478e-05, "loss": 0.0724, "step": 19596 }, { "epoch": 0.3470537971461081, "grad_norm": 0.9237881302833557, "learning_rate": 2.2762445842817387e-05, "loss": 0.1239, "step": 19597 }, { "epoch": 0.3470715066831365, "grad_norm": 1.0059711933135986, "learning_rate": 2.2761709628631635e-05, "loss": 0.0897, "step": 19598 }, { "epoch": 0.347089216220165, "grad_norm": 1.0745904445648193, "learning_rate": 2.276097338891063e-05, "loss": 0.0844, "step": 19599 }, { "epoch": 0.3471069257571934, "grad_norm": 0.98505699634552, "learning_rate": 2.2760237123656823e-05, "loss": 0.1094, "step": 19600 }, { "epoch": 0.34712463529422183, "grad_norm": 0.7206254005432129, "learning_rate": 2.2759500832872614e-05, "loss": 0.096, "step": 19601 }, { "epoch": 0.3471423448312502, "grad_norm": 0.5763750672340393, "learning_rate": 2.275876451656044e-05, "loss": 0.0702, "step": 19602 }, { "epoch": 0.3471600543682787, "grad_norm": 0.7302036881446838, "learning_rate": 2.2758028174722705e-05, "loss": 0.108, "step": 19603 }, { "epoch": 0.3471777639053071, "grad_norm": 0.7767532467842102, "learning_rate": 2.2757291807361846e-05, "loss": 0.0578, "step": 19604 }, { "epoch": 0.34719547344233553, "grad_norm": 0.7150493264198303, "learning_rate": 2.275655541448028e-05, "loss": 0.1272, "step": 19605 }, { "epoch": 0.347213182979364, "grad_norm": 1.370057225227356, "learning_rate": 2.275581899608043e-05, "loss": 0.0871, "step": 19606 }, { "epoch": 0.3472308925163924, "grad_norm": 0.6828815937042236, "learning_rate": 2.275508255216473e-05, "loss": 0.0789, "step": 19607 }, { "epoch": 0.34724860205342084, "grad_norm": 0.9650434851646423, "learning_rate": 2.2754346082735582e-05, "loss": 0.0978, "step": 19608 }, { "epoch": 0.34726631159044924, "grad_norm": 0.7141030430793762, "learning_rate": 2.275360958779542e-05, "loss": 0.101, "step": 19609 }, { "epoch": 0.3472840211274777, "grad_norm": 0.8566843867301941, "learning_rate": 2.275287306734667e-05, "loss": 0.0944, "step": 19610 }, { "epoch": 0.3473017306645061, "grad_norm": 1.01913583278656, "learning_rate": 2.2752136521391758e-05, "loss": 0.0996, "step": 19611 }, { "epoch": 0.34731944020153455, "grad_norm": 0.5640362501144409, "learning_rate": 2.2751399949933096e-05, "loss": 0.0874, "step": 19612 }, { "epoch": 0.34733714973856294, "grad_norm": 0.7397812008857727, "learning_rate": 2.2750663352973108e-05, "loss": 0.0983, "step": 19613 }, { "epoch": 0.3473548592755914, "grad_norm": 0.5235331058502197, "learning_rate": 2.2749926730514223e-05, "loss": 0.069, "step": 19614 }, { "epoch": 0.3473725688126198, "grad_norm": 0.8663363456726074, "learning_rate": 2.2749190082558863e-05, "loss": 0.1188, "step": 19615 }, { "epoch": 0.34739027834964825, "grad_norm": 0.8688623309135437, "learning_rate": 2.2748453409109455e-05, "loss": 0.1009, "step": 19616 }, { "epoch": 0.34740798788667665, "grad_norm": 1.018877625465393, "learning_rate": 2.2747716710168414e-05, "loss": 0.0984, "step": 19617 }, { "epoch": 0.3474256974237051, "grad_norm": 0.628506600856781, "learning_rate": 2.2746979985738174e-05, "loss": 0.0569, "step": 19618 }, { "epoch": 0.3474434069607335, "grad_norm": 0.8260759711265564, "learning_rate": 2.274624323582115e-05, "loss": 0.0844, "step": 19619 }, { "epoch": 0.34746111649776196, "grad_norm": 0.5869320631027222, "learning_rate": 2.274550646041977e-05, "loss": 0.0575, "step": 19620 }, { "epoch": 0.3474788260347904, "grad_norm": 0.43345341086387634, "learning_rate": 2.2744769659536456e-05, "loss": 0.0674, "step": 19621 }, { "epoch": 0.3474965355718188, "grad_norm": 0.7657961249351501, "learning_rate": 2.2744032833173634e-05, "loss": 0.0778, "step": 19622 }, { "epoch": 0.34751424510884726, "grad_norm": 1.3394099473953247, "learning_rate": 2.274329598133373e-05, "loss": 0.1222, "step": 19623 }, { "epoch": 0.34753195464587566, "grad_norm": 1.0735132694244385, "learning_rate": 2.274255910401916e-05, "loss": 0.0999, "step": 19624 }, { "epoch": 0.3475496641829041, "grad_norm": 0.5724836587905884, "learning_rate": 2.2741822201232358e-05, "loss": 0.0859, "step": 19625 }, { "epoch": 0.3475673737199325, "grad_norm": 1.0755764245986938, "learning_rate": 2.274108527297574e-05, "loss": 0.0822, "step": 19626 }, { "epoch": 0.34758508325696097, "grad_norm": 0.7543674111366272, "learning_rate": 2.2740348319251744e-05, "loss": 0.0825, "step": 19627 }, { "epoch": 0.34760279279398937, "grad_norm": 0.4998715817928314, "learning_rate": 2.2739611340062774e-05, "loss": 0.0946, "step": 19628 }, { "epoch": 0.3476205023310178, "grad_norm": 0.5444267392158508, "learning_rate": 2.2738874335411267e-05, "loss": 0.0962, "step": 19629 }, { "epoch": 0.3476382118680462, "grad_norm": 0.6604218482971191, "learning_rate": 2.2738137305299652e-05, "loss": 0.0734, "step": 19630 }, { "epoch": 0.3476559214050747, "grad_norm": 0.8230400681495667, "learning_rate": 2.2737400249730345e-05, "loss": 0.0757, "step": 19631 }, { "epoch": 0.3476736309421031, "grad_norm": 0.5823553800582886, "learning_rate": 2.2736663168705772e-05, "loss": 0.0802, "step": 19632 }, { "epoch": 0.3476913404791315, "grad_norm": 0.5925630927085876, "learning_rate": 2.2735926062228354e-05, "loss": 0.1056, "step": 19633 }, { "epoch": 0.3477090500161599, "grad_norm": 1.040726661682129, "learning_rate": 2.273518893030053e-05, "loss": 0.1127, "step": 19634 }, { "epoch": 0.3477267595531884, "grad_norm": 0.5900188684463501, "learning_rate": 2.273445177292472e-05, "loss": 0.0967, "step": 19635 }, { "epoch": 0.34774446909021683, "grad_norm": 0.7137134075164795, "learning_rate": 2.2733714590103342e-05, "loss": 0.1128, "step": 19636 }, { "epoch": 0.34776217862724523, "grad_norm": 0.7998580932617188, "learning_rate": 2.2732977381838825e-05, "loss": 0.094, "step": 19637 }, { "epoch": 0.3477798881642737, "grad_norm": 0.831655740737915, "learning_rate": 2.2732240148133593e-05, "loss": 0.0955, "step": 19638 }, { "epoch": 0.3477975977013021, "grad_norm": 0.6197009086608887, "learning_rate": 2.2731502888990077e-05, "loss": 0.07, "step": 19639 }, { "epoch": 0.34781530723833054, "grad_norm": 0.6321871876716614, "learning_rate": 2.273076560441069e-05, "loss": 0.0979, "step": 19640 }, { "epoch": 0.34783301677535894, "grad_norm": 0.9268985986709595, "learning_rate": 2.2730028294397876e-05, "loss": 0.0828, "step": 19641 }, { "epoch": 0.3478507263123874, "grad_norm": 0.8028342127799988, "learning_rate": 2.2729290958954045e-05, "loss": 0.08, "step": 19642 }, { "epoch": 0.3478684358494158, "grad_norm": 0.37567996978759766, "learning_rate": 2.2728553598081632e-05, "loss": 0.0583, "step": 19643 }, { "epoch": 0.34788614538644425, "grad_norm": 0.5312213897705078, "learning_rate": 2.2727816211783058e-05, "loss": 0.064, "step": 19644 }, { "epoch": 0.34790385492347264, "grad_norm": 0.8269767165184021, "learning_rate": 2.2727078800060746e-05, "loss": 0.0681, "step": 19645 }, { "epoch": 0.3479215644605011, "grad_norm": 0.6460903286933899, "learning_rate": 2.2726341362917136e-05, "loss": 0.0952, "step": 19646 }, { "epoch": 0.3479392739975295, "grad_norm": 1.051072359085083, "learning_rate": 2.2725603900354633e-05, "loss": 0.1, "step": 19647 }, { "epoch": 0.34795698353455795, "grad_norm": 0.6003922820091248, "learning_rate": 2.272486641237568e-05, "loss": 0.065, "step": 19648 }, { "epoch": 0.34797469307158635, "grad_norm": 0.7115729451179504, "learning_rate": 2.2724128898982696e-05, "loss": 0.0703, "step": 19649 }, { "epoch": 0.3479924026086148, "grad_norm": 0.7899292707443237, "learning_rate": 2.2723391360178113e-05, "loss": 0.0692, "step": 19650 }, { "epoch": 0.34801011214564326, "grad_norm": 0.6108189821243286, "learning_rate": 2.2722653795964348e-05, "loss": 0.062, "step": 19651 }, { "epoch": 0.34802782168267166, "grad_norm": 0.616909921169281, "learning_rate": 2.272191620634384e-05, "loss": 0.0629, "step": 19652 }, { "epoch": 0.3480455312197001, "grad_norm": 0.7522408366203308, "learning_rate": 2.2721178591319005e-05, "loss": 0.0717, "step": 19653 }, { "epoch": 0.3480632407567285, "grad_norm": 1.025662899017334, "learning_rate": 2.2720440950892275e-05, "loss": 0.0696, "step": 19654 }, { "epoch": 0.34808095029375696, "grad_norm": 0.7102144956588745, "learning_rate": 2.2719703285066072e-05, "loss": 0.0852, "step": 19655 }, { "epoch": 0.34809865983078536, "grad_norm": 0.6609695553779602, "learning_rate": 2.271896559384283e-05, "loss": 0.065, "step": 19656 }, { "epoch": 0.3481163693678138, "grad_norm": 0.9910834431648254, "learning_rate": 2.2718227877224965e-05, "loss": 0.0622, "step": 19657 }, { "epoch": 0.3481340789048422, "grad_norm": 0.7159879803657532, "learning_rate": 2.271749013521492e-05, "loss": 0.0986, "step": 19658 }, { "epoch": 0.34815178844187067, "grad_norm": 0.9454904794692993, "learning_rate": 2.271675236781511e-05, "loss": 0.0639, "step": 19659 }, { "epoch": 0.34816949797889907, "grad_norm": 1.0427647829055786, "learning_rate": 2.2716014575027963e-05, "loss": 0.1015, "step": 19660 }, { "epoch": 0.3481872075159275, "grad_norm": 0.4027200937271118, "learning_rate": 2.271527675685591e-05, "loss": 0.0913, "step": 19661 }, { "epoch": 0.3482049170529559, "grad_norm": 0.6564708948135376, "learning_rate": 2.2714538913301374e-05, "loss": 0.0996, "step": 19662 }, { "epoch": 0.3482226265899844, "grad_norm": 0.9050922393798828, "learning_rate": 2.2713801044366788e-05, "loss": 0.0987, "step": 19663 }, { "epoch": 0.3482403361270128, "grad_norm": 0.7467647790908813, "learning_rate": 2.2713063150054577e-05, "loss": 0.069, "step": 19664 }, { "epoch": 0.3482580456640412, "grad_norm": 1.0599827766418457, "learning_rate": 2.271232523036717e-05, "loss": 0.0933, "step": 19665 }, { "epoch": 0.3482757552010697, "grad_norm": 0.45144665241241455, "learning_rate": 2.271158728530699e-05, "loss": 0.0413, "step": 19666 }, { "epoch": 0.3482934647380981, "grad_norm": 0.8638441562652588, "learning_rate": 2.2710849314876465e-05, "loss": 0.0973, "step": 19667 }, { "epoch": 0.34831117427512653, "grad_norm": 0.7430962920188904, "learning_rate": 2.2710111319078034e-05, "loss": 0.0783, "step": 19668 }, { "epoch": 0.34832888381215493, "grad_norm": 0.7181841731071472, "learning_rate": 2.270937329791411e-05, "loss": 0.0982, "step": 19669 }, { "epoch": 0.3483465933491834, "grad_norm": 0.7054138779640198, "learning_rate": 2.270863525138713e-05, "loss": 0.0834, "step": 19670 }, { "epoch": 0.3483643028862118, "grad_norm": 0.6986668705940247, "learning_rate": 2.2707897179499517e-05, "loss": 0.0725, "step": 19671 }, { "epoch": 0.34838201242324024, "grad_norm": 0.9450697898864746, "learning_rate": 2.2707159082253702e-05, "loss": 0.1086, "step": 19672 }, { "epoch": 0.34839972196026864, "grad_norm": 0.6664628982543945, "learning_rate": 2.2706420959652118e-05, "loss": 0.082, "step": 19673 }, { "epoch": 0.3484174314972971, "grad_norm": 0.6837923526763916, "learning_rate": 2.270568281169718e-05, "loss": 0.1435, "step": 19674 }, { "epoch": 0.3484351410343255, "grad_norm": 0.6710053086280823, "learning_rate": 2.2704944638391335e-05, "loss": 0.0868, "step": 19675 }, { "epoch": 0.34845285057135394, "grad_norm": 0.5668522119522095, "learning_rate": 2.2704206439736997e-05, "loss": 0.0995, "step": 19676 }, { "epoch": 0.34847056010838234, "grad_norm": 0.7559475898742676, "learning_rate": 2.27034682157366e-05, "loss": 0.1017, "step": 19677 }, { "epoch": 0.3484882696454108, "grad_norm": 0.474656343460083, "learning_rate": 2.270272996639257e-05, "loss": 0.0882, "step": 19678 }, { "epoch": 0.3485059791824392, "grad_norm": 0.9040674567222595, "learning_rate": 2.2701991691707338e-05, "loss": 0.11, "step": 19679 }, { "epoch": 0.34852368871946765, "grad_norm": 0.8654758930206299, "learning_rate": 2.270125339168333e-05, "loss": 0.0929, "step": 19680 }, { "epoch": 0.3485413982564961, "grad_norm": 0.5092204213142395, "learning_rate": 2.270051506632298e-05, "loss": 0.0424, "step": 19681 }, { "epoch": 0.3485591077935245, "grad_norm": 1.006230354309082, "learning_rate": 2.2699776715628715e-05, "loss": 0.0798, "step": 19682 }, { "epoch": 0.34857681733055296, "grad_norm": 1.1644647121429443, "learning_rate": 2.2699038339602964e-05, "loss": 0.1291, "step": 19683 }, { "epoch": 0.34859452686758136, "grad_norm": 0.7346897125244141, "learning_rate": 2.2698299938248157e-05, "loss": 0.1038, "step": 19684 }, { "epoch": 0.3486122364046098, "grad_norm": 0.6257244348526001, "learning_rate": 2.2697561511566715e-05, "loss": 0.1072, "step": 19685 }, { "epoch": 0.3486299459416382, "grad_norm": 0.5211819410324097, "learning_rate": 2.269682305956108e-05, "loss": 0.0763, "step": 19686 }, { "epoch": 0.34864765547866666, "grad_norm": 0.6326072812080383, "learning_rate": 2.2696084582233677e-05, "loss": 0.0822, "step": 19687 }, { "epoch": 0.34866536501569506, "grad_norm": 0.718356728553772, "learning_rate": 2.2695346079586926e-05, "loss": 0.1196, "step": 19688 }, { "epoch": 0.3486830745527235, "grad_norm": 0.7331499457359314, "learning_rate": 2.269460755162327e-05, "loss": 0.0665, "step": 19689 }, { "epoch": 0.3487007840897519, "grad_norm": 0.48248228430747986, "learning_rate": 2.2693868998345133e-05, "loss": 0.0971, "step": 19690 }, { "epoch": 0.34871849362678037, "grad_norm": 0.6856340169906616, "learning_rate": 2.2693130419754946e-05, "loss": 0.0919, "step": 19691 }, { "epoch": 0.34873620316380877, "grad_norm": 0.9760563373565674, "learning_rate": 2.269239181585514e-05, "loss": 0.0896, "step": 19692 }, { "epoch": 0.3487539127008372, "grad_norm": 0.5987711548805237, "learning_rate": 2.2691653186648135e-05, "loss": 0.0698, "step": 19693 }, { "epoch": 0.3487716222378656, "grad_norm": 0.6368376612663269, "learning_rate": 2.2690914532136375e-05, "loss": 0.0737, "step": 19694 }, { "epoch": 0.3487893317748941, "grad_norm": 1.2163305282592773, "learning_rate": 2.2690175852322285e-05, "loss": 0.1097, "step": 19695 }, { "epoch": 0.3488070413119225, "grad_norm": 0.5234284400939941, "learning_rate": 2.2689437147208293e-05, "loss": 0.074, "step": 19696 }, { "epoch": 0.3488247508489509, "grad_norm": 0.7745230197906494, "learning_rate": 2.268869841679683e-05, "loss": 0.09, "step": 19697 }, { "epoch": 0.3488424603859794, "grad_norm": 1.5460458993911743, "learning_rate": 2.2687959661090328e-05, "loss": 0.1104, "step": 19698 }, { "epoch": 0.3488601699230078, "grad_norm": 1.0904662609100342, "learning_rate": 2.2687220880091213e-05, "loss": 0.1015, "step": 19699 }, { "epoch": 0.34887787946003623, "grad_norm": 1.3630223274230957, "learning_rate": 2.268648207380192e-05, "loss": 0.0922, "step": 19700 }, { "epoch": 0.34889558899706463, "grad_norm": 1.0718380212783813, "learning_rate": 2.2685743242224883e-05, "loss": 0.0829, "step": 19701 }, { "epoch": 0.3489132985340931, "grad_norm": 1.0091646909713745, "learning_rate": 2.2685004385362523e-05, "loss": 0.0959, "step": 19702 }, { "epoch": 0.3489310080711215, "grad_norm": 0.8463037610054016, "learning_rate": 2.268426550321728e-05, "loss": 0.1082, "step": 19703 }, { "epoch": 0.34894871760814994, "grad_norm": 0.9299449920654297, "learning_rate": 2.2683526595791576e-05, "loss": 0.0841, "step": 19704 }, { "epoch": 0.34896642714517834, "grad_norm": 1.048636555671692, "learning_rate": 2.2682787663087847e-05, "loss": 0.0781, "step": 19705 }, { "epoch": 0.3489841366822068, "grad_norm": 0.8318553566932678, "learning_rate": 2.2682048705108527e-05, "loss": 0.0589, "step": 19706 }, { "epoch": 0.3490018462192352, "grad_norm": 0.8339470028877258, "learning_rate": 2.2681309721856044e-05, "loss": 0.0786, "step": 19707 }, { "epoch": 0.34901955575626364, "grad_norm": 1.7481608390808105, "learning_rate": 2.2680570713332827e-05, "loss": 0.0831, "step": 19708 }, { "epoch": 0.34903726529329204, "grad_norm": 0.7771185040473938, "learning_rate": 2.2679831679541307e-05, "loss": 0.1254, "step": 19709 }, { "epoch": 0.3490549748303205, "grad_norm": 0.6628201603889465, "learning_rate": 2.2679092620483922e-05, "loss": 0.0817, "step": 19710 }, { "epoch": 0.34907268436734895, "grad_norm": 0.7867429852485657, "learning_rate": 2.2678353536163095e-05, "loss": 0.1302, "step": 19711 }, { "epoch": 0.34909039390437735, "grad_norm": 0.8689091205596924, "learning_rate": 2.2677614426581265e-05, "loss": 0.0925, "step": 19712 }, { "epoch": 0.3491081034414058, "grad_norm": 0.7569827437400818, "learning_rate": 2.2676875291740856e-05, "loss": 0.1059, "step": 19713 }, { "epoch": 0.3491258129784342, "grad_norm": 0.7877883315086365, "learning_rate": 2.2676136131644305e-05, "loss": 0.0848, "step": 19714 }, { "epoch": 0.34914352251546266, "grad_norm": 0.7752991914749146, "learning_rate": 2.267539694629405e-05, "loss": 0.0785, "step": 19715 }, { "epoch": 0.34916123205249106, "grad_norm": 1.201208233833313, "learning_rate": 2.2674657735692507e-05, "loss": 0.0909, "step": 19716 }, { "epoch": 0.3491789415895195, "grad_norm": 0.816935658454895, "learning_rate": 2.267391849984212e-05, "loss": 0.0803, "step": 19717 }, { "epoch": 0.3491966511265479, "grad_norm": 0.6247076392173767, "learning_rate": 2.2673179238745314e-05, "loss": 0.081, "step": 19718 }, { "epoch": 0.34921436066357636, "grad_norm": 0.8263108730316162, "learning_rate": 2.2672439952404524e-05, "loss": 0.0843, "step": 19719 }, { "epoch": 0.34923207020060476, "grad_norm": 0.6078947186470032, "learning_rate": 2.2671700640822182e-05, "loss": 0.0874, "step": 19720 }, { "epoch": 0.3492497797376332, "grad_norm": 0.7617987990379333, "learning_rate": 2.2670961304000728e-05, "loss": 0.0792, "step": 19721 }, { "epoch": 0.3492674892746616, "grad_norm": 0.819017767906189, "learning_rate": 2.2670221941942585e-05, "loss": 0.1292, "step": 19722 }, { "epoch": 0.34928519881169007, "grad_norm": 0.6676615476608276, "learning_rate": 2.2669482554650182e-05, "loss": 0.0721, "step": 19723 }, { "epoch": 0.34930290834871847, "grad_norm": 0.7882854342460632, "learning_rate": 2.266874314212596e-05, "loss": 0.0943, "step": 19724 }, { "epoch": 0.3493206178857469, "grad_norm": 0.5223100781440735, "learning_rate": 2.266800370437235e-05, "loss": 0.0626, "step": 19725 }, { "epoch": 0.3493383274227754, "grad_norm": 0.9657315015792847, "learning_rate": 2.2667264241391783e-05, "loss": 0.0784, "step": 19726 }, { "epoch": 0.3493560369598038, "grad_norm": 0.6135013103485107, "learning_rate": 2.2666524753186694e-05, "loss": 0.062, "step": 19727 }, { "epoch": 0.3493737464968322, "grad_norm": 0.25044336915016174, "learning_rate": 2.2665785239759508e-05, "loss": 0.0744, "step": 19728 }, { "epoch": 0.3493914560338606, "grad_norm": 0.7764254212379456, "learning_rate": 2.2665045701112668e-05, "loss": 0.0953, "step": 19729 }, { "epoch": 0.3494091655708891, "grad_norm": 0.5964257717132568, "learning_rate": 2.26643061372486e-05, "loss": 0.1003, "step": 19730 }, { "epoch": 0.3494268751079175, "grad_norm": 1.0868909358978271, "learning_rate": 2.2663566548169747e-05, "loss": 0.1034, "step": 19731 }, { "epoch": 0.34944458464494593, "grad_norm": 0.511808454990387, "learning_rate": 2.2662826933878528e-05, "loss": 0.0766, "step": 19732 }, { "epoch": 0.34946229418197433, "grad_norm": 0.8494970202445984, "learning_rate": 2.2662087294377385e-05, "loss": 0.0686, "step": 19733 }, { "epoch": 0.3494800037190028, "grad_norm": 0.8357285857200623, "learning_rate": 2.266134762966875e-05, "loss": 0.0778, "step": 19734 }, { "epoch": 0.3494977132560312, "grad_norm": 1.1706022024154663, "learning_rate": 2.266060793975506e-05, "loss": 0.0996, "step": 19735 }, { "epoch": 0.34951542279305964, "grad_norm": 0.6862501502037048, "learning_rate": 2.265986822463874e-05, "loss": 0.0812, "step": 19736 }, { "epoch": 0.34953313233008804, "grad_norm": 0.842147946357727, "learning_rate": 2.2659128484322233e-05, "loss": 0.087, "step": 19737 }, { "epoch": 0.3495508418671165, "grad_norm": 0.8174816370010376, "learning_rate": 2.2658388718807965e-05, "loss": 0.0887, "step": 19738 }, { "epoch": 0.34956855140414494, "grad_norm": 0.9953048825263977, "learning_rate": 2.265764892809837e-05, "loss": 0.0903, "step": 19739 }, { "epoch": 0.34958626094117334, "grad_norm": 0.8541080951690674, "learning_rate": 2.2656909112195888e-05, "loss": 0.0935, "step": 19740 }, { "epoch": 0.3496039704782018, "grad_norm": 0.9934766292572021, "learning_rate": 2.265616927110295e-05, "loss": 0.0974, "step": 19741 }, { "epoch": 0.3496216800152302, "grad_norm": 0.780241072177887, "learning_rate": 2.2655429404821988e-05, "loss": 0.0807, "step": 19742 }, { "epoch": 0.34963938955225865, "grad_norm": 1.470519781112671, "learning_rate": 2.2654689513355436e-05, "loss": 0.111, "step": 19743 }, { "epoch": 0.34965709908928705, "grad_norm": 0.8934949636459351, "learning_rate": 2.2653949596705733e-05, "loss": 0.1027, "step": 19744 }, { "epoch": 0.3496748086263155, "grad_norm": 0.7743428945541382, "learning_rate": 2.2653209654875307e-05, "loss": 0.1062, "step": 19745 }, { "epoch": 0.3496925181633439, "grad_norm": 0.7285844087600708, "learning_rate": 2.26524696878666e-05, "loss": 0.0912, "step": 19746 }, { "epoch": 0.34971022770037236, "grad_norm": 0.9687679409980774, "learning_rate": 2.2651729695682035e-05, "loss": 0.0939, "step": 19747 }, { "epoch": 0.34972793723740075, "grad_norm": 1.092982292175293, "learning_rate": 2.2650989678324056e-05, "loss": 0.1034, "step": 19748 }, { "epoch": 0.3497456467744292, "grad_norm": 0.7490842342376709, "learning_rate": 2.26502496357951e-05, "loss": 0.09, "step": 19749 }, { "epoch": 0.3497633563114576, "grad_norm": 0.579081118106842, "learning_rate": 2.264950956809759e-05, "loss": 0.0876, "step": 19750 }, { "epoch": 0.34978106584848606, "grad_norm": 0.6452624201774597, "learning_rate": 2.264876947523397e-05, "loss": 0.1293, "step": 19751 }, { "epoch": 0.34979877538551446, "grad_norm": 0.561589777469635, "learning_rate": 2.264802935720667e-05, "loss": 0.0964, "step": 19752 }, { "epoch": 0.3498164849225429, "grad_norm": 0.8616542220115662, "learning_rate": 2.2647289214018127e-05, "loss": 0.0812, "step": 19753 }, { "epoch": 0.34983419445957137, "grad_norm": 0.5245124101638794, "learning_rate": 2.2646549045670778e-05, "loss": 0.0917, "step": 19754 }, { "epoch": 0.34985190399659977, "grad_norm": 0.5792520642280579, "learning_rate": 2.2645808852167056e-05, "loss": 0.0552, "step": 19755 }, { "epoch": 0.3498696135336282, "grad_norm": 0.9605879783630371, "learning_rate": 2.2645068633509392e-05, "loss": 0.1103, "step": 19756 }, { "epoch": 0.3498873230706566, "grad_norm": 0.7033628821372986, "learning_rate": 2.264432838970023e-05, "loss": 0.0719, "step": 19757 }, { "epoch": 0.3499050326076851, "grad_norm": 0.802014946937561, "learning_rate": 2.2643588120742e-05, "loss": 0.1071, "step": 19758 }, { "epoch": 0.3499227421447135, "grad_norm": 0.7536330223083496, "learning_rate": 2.264284782663714e-05, "loss": 0.0987, "step": 19759 }, { "epoch": 0.3499404516817419, "grad_norm": 0.6666642427444458, "learning_rate": 2.2642107507388084e-05, "loss": 0.0716, "step": 19760 }, { "epoch": 0.3499581612187703, "grad_norm": 0.6874940395355225, "learning_rate": 2.2641367162997263e-05, "loss": 0.0896, "step": 19761 }, { "epoch": 0.3499758707557988, "grad_norm": 0.8642041683197021, "learning_rate": 2.2640626793467118e-05, "loss": 0.0596, "step": 19762 }, { "epoch": 0.3499935802928272, "grad_norm": 0.7313908338546753, "learning_rate": 2.2639886398800084e-05, "loss": 0.059, "step": 19763 }, { "epoch": 0.35001128982985563, "grad_norm": 0.606734037399292, "learning_rate": 2.2639145978998598e-05, "loss": 0.059, "step": 19764 }, { "epoch": 0.35002899936688403, "grad_norm": 0.8011584281921387, "learning_rate": 2.2638405534065095e-05, "loss": 0.086, "step": 19765 }, { "epoch": 0.3500467089039125, "grad_norm": 0.8130154013633728, "learning_rate": 2.263766506400201e-05, "loss": 0.0734, "step": 19766 }, { "epoch": 0.3500644184409409, "grad_norm": 0.9344800114631653, "learning_rate": 2.2636924568811775e-05, "loss": 0.0766, "step": 19767 }, { "epoch": 0.35008212797796934, "grad_norm": 0.5630698204040527, "learning_rate": 2.2636184048496832e-05, "loss": 0.1004, "step": 19768 }, { "epoch": 0.3500998375149978, "grad_norm": 0.7453184723854065, "learning_rate": 2.2635443503059618e-05, "loss": 0.0954, "step": 19769 }, { "epoch": 0.3501175470520262, "grad_norm": 1.0624494552612305, "learning_rate": 2.263470293250257e-05, "loss": 0.1103, "step": 19770 }, { "epoch": 0.35013525658905464, "grad_norm": 0.7173255085945129, "learning_rate": 2.263396233682811e-05, "loss": 0.0789, "step": 19771 }, { "epoch": 0.35015296612608304, "grad_norm": 0.9005471467971802, "learning_rate": 2.2633221716038702e-05, "loss": 0.0799, "step": 19772 }, { "epoch": 0.3501706756631115, "grad_norm": 0.7321834564208984, "learning_rate": 2.2632481070136756e-05, "loss": 0.0808, "step": 19773 }, { "epoch": 0.3501883852001399, "grad_norm": 0.7263750433921814, "learning_rate": 2.263174039912472e-05, "loss": 0.0742, "step": 19774 }, { "epoch": 0.35020609473716835, "grad_norm": 0.6505452990531921, "learning_rate": 2.2630999703005035e-05, "loss": 0.1135, "step": 19775 }, { "epoch": 0.35022380427419675, "grad_norm": 0.6713445782661438, "learning_rate": 2.263025898178013e-05, "loss": 0.078, "step": 19776 }, { "epoch": 0.3502415138112252, "grad_norm": 0.40243691205978394, "learning_rate": 2.2629518235452445e-05, "loss": 0.0709, "step": 19777 }, { "epoch": 0.3502592233482536, "grad_norm": 1.1225823163986206, "learning_rate": 2.262877746402441e-05, "loss": 0.0954, "step": 19778 }, { "epoch": 0.35027693288528206, "grad_norm": 0.9299498796463013, "learning_rate": 2.2628036667498482e-05, "loss": 0.0788, "step": 19779 }, { "epoch": 0.35029464242231045, "grad_norm": 1.0125806331634521, "learning_rate": 2.2627295845877075e-05, "loss": 0.0741, "step": 19780 }, { "epoch": 0.3503123519593389, "grad_norm": 0.7141273617744446, "learning_rate": 2.262655499916264e-05, "loss": 0.0667, "step": 19781 }, { "epoch": 0.3503300614963673, "grad_norm": 0.791445791721344, "learning_rate": 2.262581412735761e-05, "loss": 0.1005, "step": 19782 }, { "epoch": 0.35034777103339576, "grad_norm": 0.5166076421737671, "learning_rate": 2.262507323046442e-05, "loss": 0.0451, "step": 19783 }, { "epoch": 0.3503654805704242, "grad_norm": 0.4414645731449127, "learning_rate": 2.2624332308485517e-05, "loss": 0.0669, "step": 19784 }, { "epoch": 0.3503831901074526, "grad_norm": 0.6420091986656189, "learning_rate": 2.2623591361423324e-05, "loss": 0.092, "step": 19785 }, { "epoch": 0.35040089964448107, "grad_norm": 0.6665942668914795, "learning_rate": 2.262285038928029e-05, "loss": 0.0894, "step": 19786 }, { "epoch": 0.35041860918150947, "grad_norm": 0.888555645942688, "learning_rate": 2.2622109392058852e-05, "loss": 0.0808, "step": 19787 }, { "epoch": 0.3504363187185379, "grad_norm": 1.0341719388961792, "learning_rate": 2.262136836976144e-05, "loss": 0.1228, "step": 19788 }, { "epoch": 0.3504540282555663, "grad_norm": 0.5169954299926758, "learning_rate": 2.26206273223905e-05, "loss": 0.0988, "step": 19789 }, { "epoch": 0.3504717377925948, "grad_norm": 0.8024740815162659, "learning_rate": 2.2619886249948464e-05, "loss": 0.1222, "step": 19790 }, { "epoch": 0.35048944732962317, "grad_norm": 0.6627005934715271, "learning_rate": 2.2619145152437777e-05, "loss": 0.0843, "step": 19791 }, { "epoch": 0.3505071568666516, "grad_norm": 0.9183345437049866, "learning_rate": 2.2618404029860867e-05, "loss": 0.0771, "step": 19792 }, { "epoch": 0.35052486640368, "grad_norm": 1.0931873321533203, "learning_rate": 2.2617662882220185e-05, "loss": 0.0874, "step": 19793 }, { "epoch": 0.3505425759407085, "grad_norm": 0.5863537788391113, "learning_rate": 2.2616921709518162e-05, "loss": 0.0963, "step": 19794 }, { "epoch": 0.3505602854777369, "grad_norm": 0.8160828948020935, "learning_rate": 2.2616180511757236e-05, "loss": 0.0913, "step": 19795 }, { "epoch": 0.35057799501476533, "grad_norm": 1.3738638162612915, "learning_rate": 2.2615439288939842e-05, "loss": 0.0892, "step": 19796 }, { "epoch": 0.35059570455179373, "grad_norm": 1.1590995788574219, "learning_rate": 2.261469804106843e-05, "loss": 0.1194, "step": 19797 }, { "epoch": 0.3506134140888222, "grad_norm": 0.6664711236953735, "learning_rate": 2.2613956768145425e-05, "loss": 0.0899, "step": 19798 }, { "epoch": 0.35063112362585064, "grad_norm": 0.5685617923736572, "learning_rate": 2.2613215470173278e-05, "loss": 0.0877, "step": 19799 }, { "epoch": 0.35064883316287904, "grad_norm": 1.187692642211914, "learning_rate": 2.2612474147154414e-05, "loss": 0.0897, "step": 19800 }, { "epoch": 0.3506665426999075, "grad_norm": 0.8581185936927795, "learning_rate": 2.261173279909129e-05, "loss": 0.0902, "step": 19801 }, { "epoch": 0.3506842522369359, "grad_norm": 1.04233717918396, "learning_rate": 2.261099142598633e-05, "loss": 0.0814, "step": 19802 }, { "epoch": 0.35070196177396434, "grad_norm": 0.5919515490531921, "learning_rate": 2.261025002784198e-05, "loss": 0.0896, "step": 19803 }, { "epoch": 0.35071967131099274, "grad_norm": 0.6005095839500427, "learning_rate": 2.2609508604660673e-05, "loss": 0.068, "step": 19804 }, { "epoch": 0.3507373808480212, "grad_norm": 0.8996656537055969, "learning_rate": 2.2608767156444853e-05, "loss": 0.0857, "step": 19805 }, { "epoch": 0.3507550903850496, "grad_norm": 0.9180951118469238, "learning_rate": 2.2608025683196964e-05, "loss": 0.0893, "step": 19806 }, { "epoch": 0.35077279992207805, "grad_norm": 0.6106439232826233, "learning_rate": 2.2607284184919435e-05, "loss": 0.0605, "step": 19807 }, { "epoch": 0.35079050945910645, "grad_norm": 0.5896950960159302, "learning_rate": 2.2606542661614714e-05, "loss": 0.1137, "step": 19808 }, { "epoch": 0.3508082189961349, "grad_norm": 1.23110830783844, "learning_rate": 2.260580111328523e-05, "loss": 0.1114, "step": 19809 }, { "epoch": 0.3508259285331633, "grad_norm": 0.8916764259338379, "learning_rate": 2.260505953993344e-05, "loss": 0.1031, "step": 19810 }, { "epoch": 0.35084363807019175, "grad_norm": 0.9450615048408508, "learning_rate": 2.2604317941561766e-05, "loss": 0.0719, "step": 19811 }, { "epoch": 0.35086134760722015, "grad_norm": 0.5644630193710327, "learning_rate": 2.260357631817266e-05, "loss": 0.081, "step": 19812 }, { "epoch": 0.3508790571442486, "grad_norm": 0.600643515586853, "learning_rate": 2.2602834669768555e-05, "loss": 0.0706, "step": 19813 }, { "epoch": 0.35089676668127706, "grad_norm": 0.6885314583778381, "learning_rate": 2.260209299635189e-05, "loss": 0.0786, "step": 19814 }, { "epoch": 0.35091447621830546, "grad_norm": 0.7150135636329651, "learning_rate": 2.260135129792511e-05, "loss": 0.064, "step": 19815 }, { "epoch": 0.3509321857553339, "grad_norm": 0.6155622601509094, "learning_rate": 2.260060957449065e-05, "loss": 0.1016, "step": 19816 }, { "epoch": 0.3509498952923623, "grad_norm": 0.4707243740558624, "learning_rate": 2.259986782605096e-05, "loss": 0.1261, "step": 19817 }, { "epoch": 0.35096760482939077, "grad_norm": 0.48732563853263855, "learning_rate": 2.2599126052608467e-05, "loss": 0.1014, "step": 19818 }, { "epoch": 0.35098531436641917, "grad_norm": 0.3828409016132355, "learning_rate": 2.259838425416562e-05, "loss": 0.0673, "step": 19819 }, { "epoch": 0.3510030239034476, "grad_norm": 0.8324070572853088, "learning_rate": 2.259764243072486e-05, "loss": 0.0859, "step": 19820 }, { "epoch": 0.351020733440476, "grad_norm": 0.8569883108139038, "learning_rate": 2.259690058228862e-05, "loss": 0.1076, "step": 19821 }, { "epoch": 0.3510384429775045, "grad_norm": 0.5705912709236145, "learning_rate": 2.259615870885935e-05, "loss": 0.051, "step": 19822 }, { "epoch": 0.35105615251453287, "grad_norm": 0.9929541349411011, "learning_rate": 2.2595416810439485e-05, "loss": 0.0498, "step": 19823 }, { "epoch": 0.3510738620515613, "grad_norm": 0.4718952775001526, "learning_rate": 2.2594674887031464e-05, "loss": 0.0866, "step": 19824 }, { "epoch": 0.3510915715885897, "grad_norm": 0.8871350288391113, "learning_rate": 2.2593932938637736e-05, "loss": 0.0811, "step": 19825 }, { "epoch": 0.3511092811256182, "grad_norm": 0.7755895256996155, "learning_rate": 2.2593190965260734e-05, "loss": 0.0567, "step": 19826 }, { "epoch": 0.3511269906626466, "grad_norm": 0.8764606714248657, "learning_rate": 2.2592448966902904e-05, "loss": 0.1038, "step": 19827 }, { "epoch": 0.35114470019967503, "grad_norm": 0.7758114337921143, "learning_rate": 2.259170694356668e-05, "loss": 0.0833, "step": 19828 }, { "epoch": 0.3511624097367035, "grad_norm": 1.046815037727356, "learning_rate": 2.259096489525451e-05, "loss": 0.0842, "step": 19829 }, { "epoch": 0.3511801192737319, "grad_norm": 0.716118574142456, "learning_rate": 2.2590222821968832e-05, "loss": 0.0899, "step": 19830 }, { "epoch": 0.35119782881076034, "grad_norm": 0.681286096572876, "learning_rate": 2.2589480723712095e-05, "loss": 0.1107, "step": 19831 }, { "epoch": 0.35121553834778874, "grad_norm": 0.5402090549468994, "learning_rate": 2.2588738600486725e-05, "loss": 0.0811, "step": 19832 }, { "epoch": 0.3512332478848172, "grad_norm": 0.9234578609466553, "learning_rate": 2.2587996452295177e-05, "loss": 0.0777, "step": 19833 }, { "epoch": 0.3512509574218456, "grad_norm": 0.7552731037139893, "learning_rate": 2.2587254279139888e-05, "loss": 0.1063, "step": 19834 }, { "epoch": 0.35126866695887404, "grad_norm": 1.139454960823059, "learning_rate": 2.2586512081023303e-05, "loss": 0.0856, "step": 19835 }, { "epoch": 0.35128637649590244, "grad_norm": 0.8508246541023254, "learning_rate": 2.2585769857947856e-05, "loss": 0.0839, "step": 19836 }, { "epoch": 0.3513040860329309, "grad_norm": 0.7667396664619446, "learning_rate": 2.2585027609916e-05, "loss": 0.1014, "step": 19837 }, { "epoch": 0.3513217955699593, "grad_norm": 0.7072432637214661, "learning_rate": 2.2584285336930163e-05, "loss": 0.1018, "step": 19838 }, { "epoch": 0.35133950510698775, "grad_norm": 0.5829533934593201, "learning_rate": 2.258354303899279e-05, "loss": 0.0759, "step": 19839 }, { "epoch": 0.35135721464401615, "grad_norm": 1.1053025722503662, "learning_rate": 2.2582800716106336e-05, "loss": 0.1031, "step": 19840 }, { "epoch": 0.3513749241810446, "grad_norm": 0.9454658031463623, "learning_rate": 2.2582058368273236e-05, "loss": 0.1334, "step": 19841 }, { "epoch": 0.351392633718073, "grad_norm": 0.44995254278182983, "learning_rate": 2.2581315995495927e-05, "loss": 0.0653, "step": 19842 }, { "epoch": 0.35141034325510145, "grad_norm": 0.8367682099342346, "learning_rate": 2.2580573597776855e-05, "loss": 0.0892, "step": 19843 }, { "epoch": 0.3514280527921299, "grad_norm": 0.7726767063140869, "learning_rate": 2.2579831175118463e-05, "loss": 0.075, "step": 19844 }, { "epoch": 0.3514457623291583, "grad_norm": 0.9274719953536987, "learning_rate": 2.2579088727523194e-05, "loss": 0.0854, "step": 19845 }, { "epoch": 0.35146347186618676, "grad_norm": 0.6069661974906921, "learning_rate": 2.2578346254993488e-05, "loss": 0.0906, "step": 19846 }, { "epoch": 0.35148118140321516, "grad_norm": 0.7144900560379028, "learning_rate": 2.2577603757531787e-05, "loss": 0.1035, "step": 19847 }, { "epoch": 0.3514988909402436, "grad_norm": 0.557079017162323, "learning_rate": 2.257686123514054e-05, "loss": 0.1, "step": 19848 }, { "epoch": 0.351516600477272, "grad_norm": 0.7492938041687012, "learning_rate": 2.257611868782219e-05, "loss": 0.0661, "step": 19849 }, { "epoch": 0.35153431001430047, "grad_norm": 0.5822630524635315, "learning_rate": 2.2575376115579175e-05, "loss": 0.0837, "step": 19850 }, { "epoch": 0.35155201955132886, "grad_norm": 0.7196052074432373, "learning_rate": 2.257463351841393e-05, "loss": 0.0566, "step": 19851 }, { "epoch": 0.3515697290883573, "grad_norm": 0.5415059328079224, "learning_rate": 2.2573890896328913e-05, "loss": 0.0938, "step": 19852 }, { "epoch": 0.3515874386253857, "grad_norm": 0.7333996295928955, "learning_rate": 2.2573148249326557e-05, "loss": 0.1011, "step": 19853 }, { "epoch": 0.35160514816241417, "grad_norm": 1.1581220626831055, "learning_rate": 2.2572405577409317e-05, "loss": 0.0885, "step": 19854 }, { "epoch": 0.35162285769944257, "grad_norm": 1.0347365140914917, "learning_rate": 2.257166288057962e-05, "loss": 0.0845, "step": 19855 }, { "epoch": 0.351640567236471, "grad_norm": 0.4153687059879303, "learning_rate": 2.2570920158839924e-05, "loss": 0.0661, "step": 19856 }, { "epoch": 0.3516582767734994, "grad_norm": 0.7677245140075684, "learning_rate": 2.2570177412192663e-05, "loss": 0.0737, "step": 19857 }, { "epoch": 0.3516759863105279, "grad_norm": 0.8253180980682373, "learning_rate": 2.2569434640640285e-05, "loss": 0.0662, "step": 19858 }, { "epoch": 0.35169369584755633, "grad_norm": 1.0188957452774048, "learning_rate": 2.2568691844185234e-05, "loss": 0.1001, "step": 19859 }, { "epoch": 0.35171140538458473, "grad_norm": 0.7786002159118652, "learning_rate": 2.256794902282995e-05, "loss": 0.1099, "step": 19860 }, { "epoch": 0.3517291149216132, "grad_norm": 1.013124942779541, "learning_rate": 2.256720617657688e-05, "loss": 0.1058, "step": 19861 }, { "epoch": 0.3517468244586416, "grad_norm": 0.7086518406867981, "learning_rate": 2.2566463305428463e-05, "loss": 0.0741, "step": 19862 }, { "epoch": 0.35176453399567004, "grad_norm": 0.6786940097808838, "learning_rate": 2.2565720409387152e-05, "loss": 0.1062, "step": 19863 }, { "epoch": 0.35178224353269844, "grad_norm": 0.7016312479972839, "learning_rate": 2.2564977488455385e-05, "loss": 0.0899, "step": 19864 }, { "epoch": 0.3517999530697269, "grad_norm": 0.8283941745758057, "learning_rate": 2.256423454263561e-05, "loss": 0.0865, "step": 19865 }, { "epoch": 0.3518176626067553, "grad_norm": 0.9299160838127136, "learning_rate": 2.2563491571930263e-05, "loss": 0.0722, "step": 19866 }, { "epoch": 0.35183537214378374, "grad_norm": 0.6480887532234192, "learning_rate": 2.2562748576341798e-05, "loss": 0.1132, "step": 19867 }, { "epoch": 0.35185308168081214, "grad_norm": 0.7542708516120911, "learning_rate": 2.2562005555872655e-05, "loss": 0.0645, "step": 19868 }, { "epoch": 0.3518707912178406, "grad_norm": 0.8891477584838867, "learning_rate": 2.2561262510525277e-05, "loss": 0.0736, "step": 19869 }, { "epoch": 0.351888500754869, "grad_norm": 0.78399258852005, "learning_rate": 2.2560519440302113e-05, "loss": 0.0988, "step": 19870 }, { "epoch": 0.35190621029189745, "grad_norm": 0.5189133286476135, "learning_rate": 2.25597763452056e-05, "loss": 0.0914, "step": 19871 }, { "epoch": 0.35192391982892585, "grad_norm": 1.3656729459762573, "learning_rate": 2.255903322523819e-05, "loss": 0.1022, "step": 19872 }, { "epoch": 0.3519416293659543, "grad_norm": 0.7315338253974915, "learning_rate": 2.2558290080402325e-05, "loss": 0.0646, "step": 19873 }, { "epoch": 0.35195933890298275, "grad_norm": 0.9054439663887024, "learning_rate": 2.2557546910700448e-05, "loss": 0.0981, "step": 19874 }, { "epoch": 0.35197704844001115, "grad_norm": 0.661024808883667, "learning_rate": 2.2556803716135012e-05, "loss": 0.069, "step": 19875 }, { "epoch": 0.3519947579770396, "grad_norm": 1.2375237941741943, "learning_rate": 2.2556060496708448e-05, "loss": 0.1249, "step": 19876 }, { "epoch": 0.352012467514068, "grad_norm": 0.6982804536819458, "learning_rate": 2.2555317252423212e-05, "loss": 0.0859, "step": 19877 }, { "epoch": 0.35203017705109646, "grad_norm": 0.8555314540863037, "learning_rate": 2.255457398328175e-05, "loss": 0.068, "step": 19878 }, { "epoch": 0.35204788658812486, "grad_norm": 0.5545510649681091, "learning_rate": 2.25538306892865e-05, "loss": 0.1212, "step": 19879 }, { "epoch": 0.3520655961251533, "grad_norm": 0.6307228207588196, "learning_rate": 2.2553087370439916e-05, "loss": 0.0762, "step": 19880 }, { "epoch": 0.3520833056621817, "grad_norm": 1.1817294359207153, "learning_rate": 2.2552344026744433e-05, "loss": 0.0669, "step": 19881 }, { "epoch": 0.35210101519921017, "grad_norm": 0.7653728127479553, "learning_rate": 2.2551600658202506e-05, "loss": 0.11, "step": 19882 }, { "epoch": 0.35211872473623856, "grad_norm": 0.7878190875053406, "learning_rate": 2.255085726481657e-05, "loss": 0.0709, "step": 19883 }, { "epoch": 0.352136434273267, "grad_norm": 0.7315497398376465, "learning_rate": 2.2550113846589083e-05, "loss": 0.1036, "step": 19884 }, { "epoch": 0.3521541438102954, "grad_norm": 0.7967115640640259, "learning_rate": 2.2549370403522487e-05, "loss": 0.0519, "step": 19885 }, { "epoch": 0.35217185334732387, "grad_norm": 0.6081819534301758, "learning_rate": 2.254862693561922e-05, "loss": 0.0887, "step": 19886 }, { "epoch": 0.35218956288435227, "grad_norm": 0.916675329208374, "learning_rate": 2.254788344288174e-05, "loss": 0.1169, "step": 19887 }, { "epoch": 0.3522072724213807, "grad_norm": 0.6326022148132324, "learning_rate": 2.2547139925312483e-05, "loss": 0.0891, "step": 19888 }, { "epoch": 0.3522249819584092, "grad_norm": 1.2927398681640625, "learning_rate": 2.2546396382913902e-05, "loss": 0.0809, "step": 19889 }, { "epoch": 0.3522426914954376, "grad_norm": 0.6525794863700867, "learning_rate": 2.2545652815688434e-05, "loss": 0.0607, "step": 19890 }, { "epoch": 0.35226040103246603, "grad_norm": 0.3551684021949768, "learning_rate": 2.2544909223638534e-05, "loss": 0.0438, "step": 19891 }, { "epoch": 0.35227811056949443, "grad_norm": 0.7597788572311401, "learning_rate": 2.254416560676665e-05, "loss": 0.1112, "step": 19892 }, { "epoch": 0.3522958201065229, "grad_norm": 0.6835069060325623, "learning_rate": 2.2543421965075224e-05, "loss": 0.0609, "step": 19893 }, { "epoch": 0.3523135296435513, "grad_norm": 0.5629104971885681, "learning_rate": 2.2542678298566696e-05, "loss": 0.0767, "step": 19894 }, { "epoch": 0.35233123918057974, "grad_norm": 0.49949583411216736, "learning_rate": 2.2541934607243525e-05, "loss": 0.0749, "step": 19895 }, { "epoch": 0.35234894871760813, "grad_norm": 0.4790027439594269, "learning_rate": 2.254119089110815e-05, "loss": 0.0532, "step": 19896 }, { "epoch": 0.3523666582546366, "grad_norm": 0.5915570259094238, "learning_rate": 2.2540447150163018e-05, "loss": 0.1199, "step": 19897 }, { "epoch": 0.352384367791665, "grad_norm": 0.29091116786003113, "learning_rate": 2.2539703384410584e-05, "loss": 0.0753, "step": 19898 }, { "epoch": 0.35240207732869344, "grad_norm": 1.1067681312561035, "learning_rate": 2.253895959385328e-05, "loss": 0.1159, "step": 19899 }, { "epoch": 0.35241978686572184, "grad_norm": 1.1109602451324463, "learning_rate": 2.253821577849357e-05, "loss": 0.0785, "step": 19900 }, { "epoch": 0.3524374964027503, "grad_norm": 0.9404411315917969, "learning_rate": 2.2537471938333883e-05, "loss": 0.088, "step": 19901 }, { "epoch": 0.3524552059397787, "grad_norm": 0.6450722813606262, "learning_rate": 2.2536728073376686e-05, "loss": 0.0925, "step": 19902 }, { "epoch": 0.35247291547680715, "grad_norm": 0.4753733277320862, "learning_rate": 2.253598418362441e-05, "loss": 0.1013, "step": 19903 }, { "epoch": 0.3524906250138356, "grad_norm": 0.8530378937721252, "learning_rate": 2.2535240269079508e-05, "loss": 0.0665, "step": 19904 }, { "epoch": 0.352508334550864, "grad_norm": 1.160030722618103, "learning_rate": 2.253449632974443e-05, "loss": 0.0807, "step": 19905 }, { "epoch": 0.35252604408789245, "grad_norm": 0.8647083044052124, "learning_rate": 2.2533752365621615e-05, "loss": 0.0677, "step": 19906 }, { "epoch": 0.35254375362492085, "grad_norm": 0.933641254901886, "learning_rate": 2.2533008376713527e-05, "loss": 0.0791, "step": 19907 }, { "epoch": 0.3525614631619493, "grad_norm": 0.8541505932807922, "learning_rate": 2.2532264363022596e-05, "loss": 0.0857, "step": 19908 }, { "epoch": 0.3525791726989777, "grad_norm": 1.0684545040130615, "learning_rate": 2.2531520324551276e-05, "loss": 0.0869, "step": 19909 }, { "epoch": 0.35259688223600616, "grad_norm": 0.9506666660308838, "learning_rate": 2.253077626130202e-05, "loss": 0.1011, "step": 19910 }, { "epoch": 0.35261459177303456, "grad_norm": 0.6884104013442993, "learning_rate": 2.2530032173277273e-05, "loss": 0.1033, "step": 19911 }, { "epoch": 0.352632301310063, "grad_norm": 0.6379069685935974, "learning_rate": 2.252928806047948e-05, "loss": 0.0976, "step": 19912 }, { "epoch": 0.3526500108470914, "grad_norm": 1.0098881721496582, "learning_rate": 2.2528543922911088e-05, "loss": 0.1067, "step": 19913 }, { "epoch": 0.35266772038411986, "grad_norm": 0.9375150203704834, "learning_rate": 2.252779976057455e-05, "loss": 0.0903, "step": 19914 }, { "epoch": 0.35268542992114826, "grad_norm": 0.7680268287658691, "learning_rate": 2.2527055573472307e-05, "loss": 0.08, "step": 19915 }, { "epoch": 0.3527031394581767, "grad_norm": 0.7895272374153137, "learning_rate": 2.252631136160682e-05, "loss": 0.0935, "step": 19916 }, { "epoch": 0.3527208489952051, "grad_norm": 0.6132316589355469, "learning_rate": 2.2525567124980528e-05, "loss": 0.0477, "step": 19917 }, { "epoch": 0.35273855853223357, "grad_norm": 0.6742643117904663, "learning_rate": 2.2524822863595885e-05, "loss": 0.1013, "step": 19918 }, { "epoch": 0.352756268069262, "grad_norm": 0.6467061042785645, "learning_rate": 2.252407857745533e-05, "loss": 0.0493, "step": 19919 }, { "epoch": 0.3527739776062904, "grad_norm": 1.0981577634811401, "learning_rate": 2.2523334266561317e-05, "loss": 0.1406, "step": 19920 }, { "epoch": 0.3527916871433189, "grad_norm": 0.7835054993629456, "learning_rate": 2.25225899309163e-05, "loss": 0.0961, "step": 19921 }, { "epoch": 0.3528093966803473, "grad_norm": 1.153745412826538, "learning_rate": 2.252184557052272e-05, "loss": 0.1169, "step": 19922 }, { "epoch": 0.35282710621737573, "grad_norm": 0.5209277868270874, "learning_rate": 2.252110118538303e-05, "loss": 0.0776, "step": 19923 }, { "epoch": 0.35284481575440413, "grad_norm": 0.6748144030570984, "learning_rate": 2.2520356775499675e-05, "loss": 0.1142, "step": 19924 }, { "epoch": 0.3528625252914326, "grad_norm": 0.6633995175361633, "learning_rate": 2.251961234087511e-05, "loss": 0.096, "step": 19925 }, { "epoch": 0.352880234828461, "grad_norm": 0.6259221434593201, "learning_rate": 2.251886788151178e-05, "loss": 0.109, "step": 19926 }, { "epoch": 0.35289794436548944, "grad_norm": 0.7399572730064392, "learning_rate": 2.2518123397412138e-05, "loss": 0.0865, "step": 19927 }, { "epoch": 0.35291565390251783, "grad_norm": 0.899172842502594, "learning_rate": 2.2517378888578625e-05, "loss": 0.1158, "step": 19928 }, { "epoch": 0.3529333634395463, "grad_norm": 0.8145931363105774, "learning_rate": 2.2516634355013697e-05, "loss": 0.105, "step": 19929 }, { "epoch": 0.3529510729765747, "grad_norm": 0.8307584524154663, "learning_rate": 2.2515889796719806e-05, "loss": 0.1036, "step": 19930 }, { "epoch": 0.35296878251360314, "grad_norm": 0.5202776789665222, "learning_rate": 2.2515145213699396e-05, "loss": 0.0608, "step": 19931 }, { "epoch": 0.35298649205063154, "grad_norm": 0.7089666128158569, "learning_rate": 2.251440060595492e-05, "loss": 0.0946, "step": 19932 }, { "epoch": 0.35300420158766, "grad_norm": 0.7439208626747131, "learning_rate": 2.251365597348882e-05, "loss": 0.0705, "step": 19933 }, { "epoch": 0.35302191112468845, "grad_norm": 0.6818082928657532, "learning_rate": 2.2512911316303555e-05, "loss": 0.1011, "step": 19934 }, { "epoch": 0.35303962066171685, "grad_norm": 0.7735380530357361, "learning_rate": 2.2512166634401573e-05, "loss": 0.1072, "step": 19935 }, { "epoch": 0.3530573301987453, "grad_norm": 0.7830495238304138, "learning_rate": 2.251142192778532e-05, "loss": 0.0724, "step": 19936 }, { "epoch": 0.3530750397357737, "grad_norm": 0.5598623156547546, "learning_rate": 2.2510677196457256e-05, "loss": 0.0851, "step": 19937 }, { "epoch": 0.35309274927280215, "grad_norm": 0.7960566878318787, "learning_rate": 2.2509932440419816e-05, "loss": 0.0619, "step": 19938 }, { "epoch": 0.35311045880983055, "grad_norm": 0.6311371326446533, "learning_rate": 2.250918765967546e-05, "loss": 0.0902, "step": 19939 }, { "epoch": 0.353128168346859, "grad_norm": 0.9406317472457886, "learning_rate": 2.2508442854226637e-05, "loss": 0.0715, "step": 19940 }, { "epoch": 0.3531458778838874, "grad_norm": 0.8858824372291565, "learning_rate": 2.2507698024075793e-05, "loss": 0.09, "step": 19941 }, { "epoch": 0.35316358742091586, "grad_norm": 0.45964041352272034, "learning_rate": 2.2506953169225383e-05, "loss": 0.0641, "step": 19942 }, { "epoch": 0.35318129695794426, "grad_norm": 0.7712646126747131, "learning_rate": 2.2506208289677858e-05, "loss": 0.1173, "step": 19943 }, { "epoch": 0.3531990064949727, "grad_norm": 0.8632453680038452, "learning_rate": 2.250546338543567e-05, "loss": 0.1174, "step": 19944 }, { "epoch": 0.3532167160320011, "grad_norm": 0.550828754901886, "learning_rate": 2.2504718456501262e-05, "loss": 0.0883, "step": 19945 }, { "epoch": 0.35323442556902956, "grad_norm": 0.8988943696022034, "learning_rate": 2.250397350287709e-05, "loss": 0.0957, "step": 19946 }, { "epoch": 0.35325213510605796, "grad_norm": 0.49464333057403564, "learning_rate": 2.2503228524565605e-05, "loss": 0.1103, "step": 19947 }, { "epoch": 0.3532698446430864, "grad_norm": 0.5995764136314392, "learning_rate": 2.250248352156926e-05, "loss": 0.0702, "step": 19948 }, { "epoch": 0.35328755418011487, "grad_norm": 0.8255921006202698, "learning_rate": 2.2501738493890494e-05, "loss": 0.0937, "step": 19949 }, { "epoch": 0.35330526371714327, "grad_norm": 0.5091360807418823, "learning_rate": 2.2500993441531778e-05, "loss": 0.0809, "step": 19950 }, { "epoch": 0.3533229732541717, "grad_norm": 0.5954621434211731, "learning_rate": 2.2500248364495543e-05, "loss": 0.1072, "step": 19951 }, { "epoch": 0.3533406827912001, "grad_norm": 0.9606600999832153, "learning_rate": 2.2499503262784256e-05, "loss": 0.0909, "step": 19952 }, { "epoch": 0.3533583923282286, "grad_norm": 0.6378259062767029, "learning_rate": 2.249875813640036e-05, "loss": 0.0976, "step": 19953 }, { "epoch": 0.353376101865257, "grad_norm": 0.7870674729347229, "learning_rate": 2.2498012985346306e-05, "loss": 0.092, "step": 19954 }, { "epoch": 0.35339381140228543, "grad_norm": 0.79487544298172, "learning_rate": 2.2497267809624552e-05, "loss": 0.1235, "step": 19955 }, { "epoch": 0.35341152093931383, "grad_norm": 0.6943023204803467, "learning_rate": 2.249652260923754e-05, "loss": 0.1038, "step": 19956 }, { "epoch": 0.3534292304763423, "grad_norm": 0.8838053941726685, "learning_rate": 2.249577738418773e-05, "loss": 0.0792, "step": 19957 }, { "epoch": 0.3534469400133707, "grad_norm": 0.9173875451087952, "learning_rate": 2.2495032134477568e-05, "loss": 0.0919, "step": 19958 }, { "epoch": 0.35346464955039913, "grad_norm": 0.606975793838501, "learning_rate": 2.2494286860109514e-05, "loss": 0.0678, "step": 19959 }, { "epoch": 0.35348235908742753, "grad_norm": 0.565556526184082, "learning_rate": 2.249354156108601e-05, "loss": 0.0838, "step": 19960 }, { "epoch": 0.353500068624456, "grad_norm": 0.39754849672317505, "learning_rate": 2.2492796237409513e-05, "loss": 0.0714, "step": 19961 }, { "epoch": 0.3535177781614844, "grad_norm": 0.8801740407943726, "learning_rate": 2.2492050889082472e-05, "loss": 0.0663, "step": 19962 }, { "epoch": 0.35353548769851284, "grad_norm": 1.098863959312439, "learning_rate": 2.2491305516107346e-05, "loss": 0.0868, "step": 19963 }, { "epoch": 0.3535531972355413, "grad_norm": 0.540922999382019, "learning_rate": 2.2490560118486582e-05, "loss": 0.0958, "step": 19964 }, { "epoch": 0.3535709067725697, "grad_norm": 1.1297402381896973, "learning_rate": 2.2489814696222625e-05, "loss": 0.1035, "step": 19965 }, { "epoch": 0.35358861630959815, "grad_norm": 0.6699896454811096, "learning_rate": 2.2489069249317944e-05, "loss": 0.087, "step": 19966 }, { "epoch": 0.35360632584662655, "grad_norm": 0.9700076580047607, "learning_rate": 2.2488323777774976e-05, "loss": 0.0764, "step": 19967 }, { "epoch": 0.353624035383655, "grad_norm": 0.7346985936164856, "learning_rate": 2.2487578281596182e-05, "loss": 0.0614, "step": 19968 }, { "epoch": 0.3536417449206834, "grad_norm": 0.7137460708618164, "learning_rate": 2.248683276078402e-05, "loss": 0.1004, "step": 19969 }, { "epoch": 0.35365945445771185, "grad_norm": 0.8440886735916138, "learning_rate": 2.2486087215340928e-05, "loss": 0.1433, "step": 19970 }, { "epoch": 0.35367716399474025, "grad_norm": 0.6396999955177307, "learning_rate": 2.2485341645269366e-05, "loss": 0.1118, "step": 19971 }, { "epoch": 0.3536948735317687, "grad_norm": 0.5233638286590576, "learning_rate": 2.248459605057179e-05, "loss": 0.0847, "step": 19972 }, { "epoch": 0.3537125830687971, "grad_norm": 0.826964795589447, "learning_rate": 2.248385043125065e-05, "loss": 0.0897, "step": 19973 }, { "epoch": 0.35373029260582556, "grad_norm": 0.6264805197715759, "learning_rate": 2.24831047873084e-05, "loss": 0.0582, "step": 19974 }, { "epoch": 0.35374800214285396, "grad_norm": 0.8990551829338074, "learning_rate": 2.248235911874749e-05, "loss": 0.1001, "step": 19975 }, { "epoch": 0.3537657116798824, "grad_norm": 0.7024874091148376, "learning_rate": 2.2481613425570377e-05, "loss": 0.0553, "step": 19976 }, { "epoch": 0.3537834212169108, "grad_norm": 0.40165501832962036, "learning_rate": 2.248086770777951e-05, "loss": 0.0616, "step": 19977 }, { "epoch": 0.35380113075393926, "grad_norm": 0.719794750213623, "learning_rate": 2.2480121965377347e-05, "loss": 0.0834, "step": 19978 }, { "epoch": 0.3538188402909677, "grad_norm": 0.8037275671958923, "learning_rate": 2.247937619836634e-05, "loss": 0.0802, "step": 19979 }, { "epoch": 0.3538365498279961, "grad_norm": 0.7540953159332275, "learning_rate": 2.247863040674894e-05, "loss": 0.0591, "step": 19980 }, { "epoch": 0.35385425936502457, "grad_norm": 0.8906194567680359, "learning_rate": 2.24778845905276e-05, "loss": 0.0666, "step": 19981 }, { "epoch": 0.35387196890205297, "grad_norm": 1.093827486038208, "learning_rate": 2.2477138749704778e-05, "loss": 0.0883, "step": 19982 }, { "epoch": 0.3538896784390814, "grad_norm": 0.38726216554641724, "learning_rate": 2.247639288428293e-05, "loss": 0.0605, "step": 19983 }, { "epoch": 0.3539073879761098, "grad_norm": 0.8942539095878601, "learning_rate": 2.24756469942645e-05, "loss": 0.0875, "step": 19984 }, { "epoch": 0.3539250975131383, "grad_norm": 0.5495865941047668, "learning_rate": 2.247490107965195e-05, "loss": 0.0656, "step": 19985 }, { "epoch": 0.3539428070501667, "grad_norm": 0.6343011260032654, "learning_rate": 2.2474155140447728e-05, "loss": 0.0665, "step": 19986 }, { "epoch": 0.35396051658719513, "grad_norm": 0.48538774251937866, "learning_rate": 2.2473409176654297e-05, "loss": 0.0902, "step": 19987 }, { "epoch": 0.3539782261242235, "grad_norm": 0.4535188376903534, "learning_rate": 2.2472663188274104e-05, "loss": 0.0825, "step": 19988 }, { "epoch": 0.353995935661252, "grad_norm": 0.5420812368392944, "learning_rate": 2.2471917175309603e-05, "loss": 0.08, "step": 19989 }, { "epoch": 0.3540136451982804, "grad_norm": 0.9918664693832397, "learning_rate": 2.247117113776325e-05, "loss": 0.1235, "step": 19990 }, { "epoch": 0.35403135473530883, "grad_norm": 0.49652808904647827, "learning_rate": 2.2470425075637504e-05, "loss": 0.0871, "step": 19991 }, { "epoch": 0.35404906427233723, "grad_norm": 0.7172627449035645, "learning_rate": 2.2469678988934814e-05, "loss": 0.0834, "step": 19992 }, { "epoch": 0.3540667738093657, "grad_norm": 0.7273141145706177, "learning_rate": 2.2468932877657632e-05, "loss": 0.0596, "step": 19993 }, { "epoch": 0.35408448334639414, "grad_norm": 0.6750150918960571, "learning_rate": 2.246818674180842e-05, "loss": 0.1082, "step": 19994 }, { "epoch": 0.35410219288342254, "grad_norm": 0.7735642194747925, "learning_rate": 2.2467440581389626e-05, "loss": 0.0834, "step": 19995 }, { "epoch": 0.354119902420451, "grad_norm": 0.5309274196624756, "learning_rate": 2.2466694396403708e-05, "loss": 0.0773, "step": 19996 }, { "epoch": 0.3541376119574794, "grad_norm": 0.6300299167633057, "learning_rate": 2.2465948186853123e-05, "loss": 0.0671, "step": 19997 }, { "epoch": 0.35415532149450785, "grad_norm": 0.616680920124054, "learning_rate": 2.246520195274032e-05, "loss": 0.0817, "step": 19998 }, { "epoch": 0.35417303103153625, "grad_norm": 0.44829320907592773, "learning_rate": 2.2464455694067764e-05, "loss": 0.0525, "step": 19999 }, { "epoch": 0.3541907405685647, "grad_norm": 0.49091628193855286, "learning_rate": 2.2463709410837896e-05, "loss": 0.0682, "step": 20000 }, { "epoch": 0.3542084501055931, "grad_norm": 0.5147367715835571, "learning_rate": 2.2462963103053183e-05, "loss": 0.0758, "step": 20001 }, { "epoch": 0.35422615964262155, "grad_norm": 0.5712031722068787, "learning_rate": 2.2462216770716075e-05, "loss": 0.087, "step": 20002 }, { "epoch": 0.35424386917964995, "grad_norm": 0.9836156368255615, "learning_rate": 2.2461470413829032e-05, "loss": 0.1063, "step": 20003 }, { "epoch": 0.3542615787166784, "grad_norm": 0.7321233153343201, "learning_rate": 2.24607240323945e-05, "loss": 0.0644, "step": 20004 }, { "epoch": 0.3542792882537068, "grad_norm": 1.0695639848709106, "learning_rate": 2.2459977626414945e-05, "loss": 0.075, "step": 20005 }, { "epoch": 0.35429699779073526, "grad_norm": 0.8922237157821655, "learning_rate": 2.2459231195892817e-05, "loss": 0.0748, "step": 20006 }, { "epoch": 0.3543147073277637, "grad_norm": 0.7669535279273987, "learning_rate": 2.2458484740830575e-05, "loss": 0.0954, "step": 20007 }, { "epoch": 0.3543324168647921, "grad_norm": 0.8491383194923401, "learning_rate": 2.245773826123067e-05, "loss": 0.0716, "step": 20008 }, { "epoch": 0.35435012640182056, "grad_norm": 0.8786047101020813, "learning_rate": 2.245699175709556e-05, "loss": 0.0746, "step": 20009 }, { "epoch": 0.35436783593884896, "grad_norm": 1.266603708267212, "learning_rate": 2.24562452284277e-05, "loss": 0.0853, "step": 20010 }, { "epoch": 0.3543855454758774, "grad_norm": 0.7569361329078674, "learning_rate": 2.245549867522955e-05, "loss": 0.0902, "step": 20011 }, { "epoch": 0.3544032550129058, "grad_norm": 0.812079668045044, "learning_rate": 2.245475209750356e-05, "loss": 0.0974, "step": 20012 }, { "epoch": 0.35442096454993427, "grad_norm": 0.8045380711555481, "learning_rate": 2.2454005495252195e-05, "loss": 0.0755, "step": 20013 }, { "epoch": 0.35443867408696267, "grad_norm": 0.892906665802002, "learning_rate": 2.2453258868477906e-05, "loss": 0.0662, "step": 20014 }, { "epoch": 0.3544563836239911, "grad_norm": 0.9959000945091248, "learning_rate": 2.2452512217183146e-05, "loss": 0.0828, "step": 20015 }, { "epoch": 0.3544740931610195, "grad_norm": 0.6831870675086975, "learning_rate": 2.2451765541370376e-05, "loss": 0.0731, "step": 20016 }, { "epoch": 0.354491802698048, "grad_norm": 0.7760937213897705, "learning_rate": 2.2451018841042047e-05, "loss": 0.081, "step": 20017 }, { "epoch": 0.3545095122350764, "grad_norm": 0.7002512812614441, "learning_rate": 2.2450272116200626e-05, "loss": 0.0938, "step": 20018 }, { "epoch": 0.35452722177210483, "grad_norm": 0.6814894676208496, "learning_rate": 2.2449525366848557e-05, "loss": 0.0991, "step": 20019 }, { "epoch": 0.3545449313091332, "grad_norm": 0.5808302760124207, "learning_rate": 2.2448778592988306e-05, "loss": 0.0952, "step": 20020 }, { "epoch": 0.3545626408461617, "grad_norm": 0.9539472460746765, "learning_rate": 2.2448031794622327e-05, "loss": 0.0581, "step": 20021 }, { "epoch": 0.35458035038319013, "grad_norm": 0.6826077103614807, "learning_rate": 2.2447284971753074e-05, "loss": 0.0752, "step": 20022 }, { "epoch": 0.35459805992021853, "grad_norm": 0.6512519121170044, "learning_rate": 2.2446538124383014e-05, "loss": 0.0625, "step": 20023 }, { "epoch": 0.354615769457247, "grad_norm": 0.8012557029724121, "learning_rate": 2.244579125251459e-05, "loss": 0.0684, "step": 20024 }, { "epoch": 0.3546334789942754, "grad_norm": 1.1555957794189453, "learning_rate": 2.2445044356150266e-05, "loss": 0.0793, "step": 20025 }, { "epoch": 0.35465118853130384, "grad_norm": 1.0354427099227905, "learning_rate": 2.2444297435292505e-05, "loss": 0.1132, "step": 20026 }, { "epoch": 0.35466889806833224, "grad_norm": 0.42619311809539795, "learning_rate": 2.244355048994375e-05, "loss": 0.0639, "step": 20027 }, { "epoch": 0.3546866076053607, "grad_norm": 0.5940278768539429, "learning_rate": 2.2442803520106476e-05, "loss": 0.0786, "step": 20028 }, { "epoch": 0.3547043171423891, "grad_norm": 0.9633881449699402, "learning_rate": 2.244205652578312e-05, "loss": 0.0758, "step": 20029 }, { "epoch": 0.35472202667941755, "grad_norm": 0.7446793913841248, "learning_rate": 2.244130950697616e-05, "loss": 0.0993, "step": 20030 }, { "epoch": 0.35473973621644594, "grad_norm": 0.9164326786994934, "learning_rate": 2.2440562463688047e-05, "loss": 0.092, "step": 20031 }, { "epoch": 0.3547574457534744, "grad_norm": 0.7477850914001465, "learning_rate": 2.2439815395921227e-05, "loss": 0.1009, "step": 20032 }, { "epoch": 0.3547751552905028, "grad_norm": 0.999535322189331, "learning_rate": 2.2439068303678173e-05, "loss": 0.1066, "step": 20033 }, { "epoch": 0.35479286482753125, "grad_norm": 0.501720130443573, "learning_rate": 2.2438321186961333e-05, "loss": 0.1027, "step": 20034 }, { "epoch": 0.35481057436455965, "grad_norm": 0.9355255961418152, "learning_rate": 2.243757404577317e-05, "loss": 0.1005, "step": 20035 }, { "epoch": 0.3548282839015881, "grad_norm": 0.8776429295539856, "learning_rate": 2.243682688011614e-05, "loss": 0.0787, "step": 20036 }, { "epoch": 0.35484599343861656, "grad_norm": 1.1321052312850952, "learning_rate": 2.2436079689992703e-05, "loss": 0.1255, "step": 20037 }, { "epoch": 0.35486370297564496, "grad_norm": 0.5983048677444458, "learning_rate": 2.2435332475405315e-05, "loss": 0.0996, "step": 20038 }, { "epoch": 0.3548814125126734, "grad_norm": 0.6371934413909912, "learning_rate": 2.2434585236356435e-05, "loss": 0.0833, "step": 20039 }, { "epoch": 0.3548991220497018, "grad_norm": 1.345861554145813, "learning_rate": 2.243383797284852e-05, "loss": 0.096, "step": 20040 }, { "epoch": 0.35491683158673026, "grad_norm": 0.5130590796470642, "learning_rate": 2.2433090684884034e-05, "loss": 0.0871, "step": 20041 }, { "epoch": 0.35493454112375866, "grad_norm": 0.6884109973907471, "learning_rate": 2.2432343372465428e-05, "loss": 0.0671, "step": 20042 }, { "epoch": 0.3549522506607871, "grad_norm": 0.7841744422912598, "learning_rate": 2.2431596035595167e-05, "loss": 0.0887, "step": 20043 }, { "epoch": 0.3549699601978155, "grad_norm": 0.7676784992218018, "learning_rate": 2.2430848674275702e-05, "loss": 0.0892, "step": 20044 }, { "epoch": 0.35498766973484397, "grad_norm": 0.770935595035553, "learning_rate": 2.24301012885095e-05, "loss": 0.0489, "step": 20045 }, { "epoch": 0.35500537927187237, "grad_norm": 0.4789488613605499, "learning_rate": 2.2429353878299018e-05, "loss": 0.0814, "step": 20046 }, { "epoch": 0.3550230888089008, "grad_norm": 1.2463995218276978, "learning_rate": 2.2428606443646714e-05, "loss": 0.0925, "step": 20047 }, { "epoch": 0.3550407983459292, "grad_norm": 0.7114673852920532, "learning_rate": 2.242785898455504e-05, "loss": 0.1231, "step": 20048 }, { "epoch": 0.3550585078829577, "grad_norm": 0.5820220708847046, "learning_rate": 2.2427111501026464e-05, "loss": 0.0897, "step": 20049 }, { "epoch": 0.3550762174199861, "grad_norm": 0.7125759124755859, "learning_rate": 2.2426363993063438e-05, "loss": 0.0736, "step": 20050 }, { "epoch": 0.3550939269570145, "grad_norm": 0.3254309594631195, "learning_rate": 2.2425616460668434e-05, "loss": 0.0878, "step": 20051 }, { "epoch": 0.355111636494043, "grad_norm": 0.9177427887916565, "learning_rate": 2.24248689038439e-05, "loss": 0.0892, "step": 20052 }, { "epoch": 0.3551293460310714, "grad_norm": 0.7042199373245239, "learning_rate": 2.2424121322592293e-05, "loss": 0.0394, "step": 20053 }, { "epoch": 0.35514705556809983, "grad_norm": 0.9446136951446533, "learning_rate": 2.242337371691608e-05, "loss": 0.1334, "step": 20054 }, { "epoch": 0.35516476510512823, "grad_norm": 0.9156063795089722, "learning_rate": 2.2422626086817725e-05, "loss": 0.1054, "step": 20055 }, { "epoch": 0.3551824746421567, "grad_norm": 0.7311241626739502, "learning_rate": 2.242187843229968e-05, "loss": 0.109, "step": 20056 }, { "epoch": 0.3552001841791851, "grad_norm": 0.7114881277084351, "learning_rate": 2.24211307533644e-05, "loss": 0.0655, "step": 20057 }, { "epoch": 0.35521789371621354, "grad_norm": 0.6000383496284485, "learning_rate": 2.242038305001435e-05, "loss": 0.0725, "step": 20058 }, { "epoch": 0.35523560325324194, "grad_norm": 0.9901944994926453, "learning_rate": 2.2419635322251994e-05, "loss": 0.0875, "step": 20059 }, { "epoch": 0.3552533127902704, "grad_norm": 0.8142470121383667, "learning_rate": 2.2418887570079788e-05, "loss": 0.074, "step": 20060 }, { "epoch": 0.3552710223272988, "grad_norm": 0.734512209892273, "learning_rate": 2.2418139793500192e-05, "loss": 0.0888, "step": 20061 }, { "epoch": 0.35528873186432725, "grad_norm": 0.8550796508789062, "learning_rate": 2.2417391992515666e-05, "loss": 0.0798, "step": 20062 }, { "epoch": 0.35530644140135564, "grad_norm": 0.9980133175849915, "learning_rate": 2.241664416712867e-05, "loss": 0.0797, "step": 20063 }, { "epoch": 0.3553241509383841, "grad_norm": 0.8649949431419373, "learning_rate": 2.2415896317341665e-05, "loss": 0.1049, "step": 20064 }, { "epoch": 0.3553418604754125, "grad_norm": 0.624692440032959, "learning_rate": 2.2415148443157114e-05, "loss": 0.0887, "step": 20065 }, { "epoch": 0.35535957001244095, "grad_norm": 2.181969404220581, "learning_rate": 2.241440054457747e-05, "loss": 0.0894, "step": 20066 }, { "epoch": 0.3553772795494694, "grad_norm": 0.4860278069972992, "learning_rate": 2.24136526216052e-05, "loss": 0.0495, "step": 20067 }, { "epoch": 0.3553949890864978, "grad_norm": 0.6866669654846191, "learning_rate": 2.241290467424276e-05, "loss": 0.0745, "step": 20068 }, { "epoch": 0.35541269862352626, "grad_norm": 1.1226998567581177, "learning_rate": 2.2412156702492618e-05, "loss": 0.0881, "step": 20069 }, { "epoch": 0.35543040816055466, "grad_norm": 0.7538167238235474, "learning_rate": 2.241140870635723e-05, "loss": 0.069, "step": 20070 }, { "epoch": 0.3554481176975831, "grad_norm": 0.7341498732566833, "learning_rate": 2.2410660685839056e-05, "loss": 0.1114, "step": 20071 }, { "epoch": 0.3554658272346115, "grad_norm": 0.637391984462738, "learning_rate": 2.2409912640940554e-05, "loss": 0.0973, "step": 20072 }, { "epoch": 0.35548353677163996, "grad_norm": 0.899459719657898, "learning_rate": 2.240916457166419e-05, "loss": 0.0875, "step": 20073 }, { "epoch": 0.35550124630866836, "grad_norm": 0.6111410856246948, "learning_rate": 2.2408416478012426e-05, "loss": 0.0829, "step": 20074 }, { "epoch": 0.3555189558456968, "grad_norm": 0.5241513848304749, "learning_rate": 2.2407668359987723e-05, "loss": 0.0819, "step": 20075 }, { "epoch": 0.3555366653827252, "grad_norm": 0.7740716934204102, "learning_rate": 2.240692021759254e-05, "loss": 0.0707, "step": 20076 }, { "epoch": 0.35555437491975367, "grad_norm": 0.8748255968093872, "learning_rate": 2.2406172050829335e-05, "loss": 0.1006, "step": 20077 }, { "epoch": 0.35557208445678207, "grad_norm": 1.1870479583740234, "learning_rate": 2.2405423859700576e-05, "loss": 0.133, "step": 20078 }, { "epoch": 0.3555897939938105, "grad_norm": 0.6808558106422424, "learning_rate": 2.2404675644208723e-05, "loss": 0.0779, "step": 20079 }, { "epoch": 0.3556075035308389, "grad_norm": 0.518535315990448, "learning_rate": 2.240392740435623e-05, "loss": 0.0942, "step": 20080 }, { "epoch": 0.3556252130678674, "grad_norm": 0.7916868925094604, "learning_rate": 2.240317914014557e-05, "loss": 0.0753, "step": 20081 }, { "epoch": 0.35564292260489583, "grad_norm": 0.6626786589622498, "learning_rate": 2.2402430851579196e-05, "loss": 0.082, "step": 20082 }, { "epoch": 0.3556606321419242, "grad_norm": 0.6068603992462158, "learning_rate": 2.2401682538659575e-05, "loss": 0.0778, "step": 20083 }, { "epoch": 0.3556783416789527, "grad_norm": 0.598539412021637, "learning_rate": 2.240093420138917e-05, "loss": 0.0662, "step": 20084 }, { "epoch": 0.3556960512159811, "grad_norm": 0.7726559042930603, "learning_rate": 2.2400185839770438e-05, "loss": 0.078, "step": 20085 }, { "epoch": 0.35571376075300953, "grad_norm": 0.3647960424423218, "learning_rate": 2.2399437453805842e-05, "loss": 0.074, "step": 20086 }, { "epoch": 0.35573147029003793, "grad_norm": 0.5781664848327637, "learning_rate": 2.2398689043497845e-05, "loss": 0.0638, "step": 20087 }, { "epoch": 0.3557491798270664, "grad_norm": 0.7969908118247986, "learning_rate": 2.239794060884891e-05, "loss": 0.0826, "step": 20088 }, { "epoch": 0.3557668893640948, "grad_norm": 0.4955231845378876, "learning_rate": 2.23971921498615e-05, "loss": 0.0772, "step": 20089 }, { "epoch": 0.35578459890112324, "grad_norm": 0.5655863881111145, "learning_rate": 2.2396443666538072e-05, "loss": 0.0498, "step": 20090 }, { "epoch": 0.35580230843815164, "grad_norm": 0.4521823227405548, "learning_rate": 2.2395695158881093e-05, "loss": 0.0603, "step": 20091 }, { "epoch": 0.3558200179751801, "grad_norm": 0.9193599820137024, "learning_rate": 2.239494662689303e-05, "loss": 0.0996, "step": 20092 }, { "epoch": 0.3558377275122085, "grad_norm": 0.37835121154785156, "learning_rate": 2.239419807057633e-05, "loss": 0.0745, "step": 20093 }, { "epoch": 0.35585543704923694, "grad_norm": 0.9849780201911926, "learning_rate": 2.239344948993348e-05, "loss": 0.1119, "step": 20094 }, { "epoch": 0.35587314658626534, "grad_norm": 0.7016724348068237, "learning_rate": 2.2392700884966925e-05, "loss": 0.0912, "step": 20095 }, { "epoch": 0.3558908561232938, "grad_norm": 0.70354163646698, "learning_rate": 2.239195225567912e-05, "loss": 0.0981, "step": 20096 }, { "epoch": 0.35590856566032225, "grad_norm": 0.5812711119651794, "learning_rate": 2.2391203602072555e-05, "loss": 0.0604, "step": 20097 }, { "epoch": 0.35592627519735065, "grad_norm": 0.948695182800293, "learning_rate": 2.239045492414967e-05, "loss": 0.096, "step": 20098 }, { "epoch": 0.3559439847343791, "grad_norm": 0.741199791431427, "learning_rate": 2.2389706221912942e-05, "loss": 0.0804, "step": 20099 }, { "epoch": 0.3559616942714075, "grad_norm": 1.0012731552124023, "learning_rate": 2.238895749536482e-05, "loss": 0.0636, "step": 20100 }, { "epoch": 0.35597940380843596, "grad_norm": 0.5626772046089172, "learning_rate": 2.238820874450778e-05, "loss": 0.0665, "step": 20101 }, { "epoch": 0.35599711334546436, "grad_norm": 1.099163293838501, "learning_rate": 2.238745996934428e-05, "loss": 0.1118, "step": 20102 }, { "epoch": 0.3560148228824928, "grad_norm": 0.4872483015060425, "learning_rate": 2.238671116987678e-05, "loss": 0.0513, "step": 20103 }, { "epoch": 0.3560325324195212, "grad_norm": 0.7921134829521179, "learning_rate": 2.2385962346107757e-05, "loss": 0.0612, "step": 20104 }, { "epoch": 0.35605024195654966, "grad_norm": 0.6484619379043579, "learning_rate": 2.2385213498039655e-05, "loss": 0.0712, "step": 20105 }, { "epoch": 0.35606795149357806, "grad_norm": 0.8160871863365173, "learning_rate": 2.238446462567495e-05, "loss": 0.0921, "step": 20106 }, { "epoch": 0.3560856610306065, "grad_norm": 0.6518972516059875, "learning_rate": 2.2383715729016106e-05, "loss": 0.0824, "step": 20107 }, { "epoch": 0.3561033705676349, "grad_norm": 0.8533117175102234, "learning_rate": 2.2382966808065582e-05, "loss": 0.1055, "step": 20108 }, { "epoch": 0.35612108010466337, "grad_norm": 0.8606178164482117, "learning_rate": 2.2382217862825843e-05, "loss": 0.1021, "step": 20109 }, { "epoch": 0.35613878964169177, "grad_norm": 0.5123883485794067, "learning_rate": 2.2381468893299355e-05, "loss": 0.0917, "step": 20110 }, { "epoch": 0.3561564991787202, "grad_norm": 0.8678102493286133, "learning_rate": 2.2380719899488582e-05, "loss": 0.1111, "step": 20111 }, { "epoch": 0.3561742087157487, "grad_norm": 0.6655036211013794, "learning_rate": 2.2379970881395987e-05, "loss": 0.0916, "step": 20112 }, { "epoch": 0.3561919182527771, "grad_norm": 0.8612245321273804, "learning_rate": 2.2379221839024036e-05, "loss": 0.1064, "step": 20113 }, { "epoch": 0.3562096277898055, "grad_norm": 0.5951240062713623, "learning_rate": 2.2378472772375185e-05, "loss": 0.0597, "step": 20114 }, { "epoch": 0.3562273373268339, "grad_norm": 0.7652382850646973, "learning_rate": 2.2377723681451908e-05, "loss": 0.0725, "step": 20115 }, { "epoch": 0.3562450468638624, "grad_norm": 1.2362996339797974, "learning_rate": 2.2376974566256665e-05, "loss": 0.0984, "step": 20116 }, { "epoch": 0.3562627564008908, "grad_norm": 0.5293232798576355, "learning_rate": 2.237622542679193e-05, "loss": 0.1073, "step": 20117 }, { "epoch": 0.35628046593791923, "grad_norm": 1.3497724533081055, "learning_rate": 2.2375476263060153e-05, "loss": 0.094, "step": 20118 }, { "epoch": 0.35629817547494763, "grad_norm": 0.7326267957687378, "learning_rate": 2.2374727075063804e-05, "loss": 0.097, "step": 20119 }, { "epoch": 0.3563158850119761, "grad_norm": 1.0040309429168701, "learning_rate": 2.237397786280535e-05, "loss": 0.0824, "step": 20120 }, { "epoch": 0.3563335945490045, "grad_norm": 0.6191657185554504, "learning_rate": 2.237322862628725e-05, "loss": 0.0991, "step": 20121 }, { "epoch": 0.35635130408603294, "grad_norm": 0.42865246534347534, "learning_rate": 2.237247936551198e-05, "loss": 0.0813, "step": 20122 }, { "epoch": 0.35636901362306134, "grad_norm": 0.412503182888031, "learning_rate": 2.2371730080481998e-05, "loss": 0.1021, "step": 20123 }, { "epoch": 0.3563867231600898, "grad_norm": 1.4424721002578735, "learning_rate": 2.2370980771199773e-05, "loss": 0.127, "step": 20124 }, { "epoch": 0.3564044326971182, "grad_norm": 1.034324288368225, "learning_rate": 2.2370231437667758e-05, "loss": 0.1095, "step": 20125 }, { "epoch": 0.35642214223414664, "grad_norm": 0.8878680467605591, "learning_rate": 2.2369482079888435e-05, "loss": 0.0743, "step": 20126 }, { "epoch": 0.3564398517711751, "grad_norm": 0.3888358473777771, "learning_rate": 2.236873269786426e-05, "loss": 0.0588, "step": 20127 }, { "epoch": 0.3564575613082035, "grad_norm": 0.7128797173500061, "learning_rate": 2.2367983291597692e-05, "loss": 0.078, "step": 20128 }, { "epoch": 0.35647527084523195, "grad_norm": 0.938458263874054, "learning_rate": 2.236723386109121e-05, "loss": 0.1147, "step": 20129 }, { "epoch": 0.35649298038226035, "grad_norm": 0.746803879737854, "learning_rate": 2.2366484406347275e-05, "loss": 0.0829, "step": 20130 }, { "epoch": 0.3565106899192888, "grad_norm": 0.512553334236145, "learning_rate": 2.2365734927368352e-05, "loss": 0.083, "step": 20131 }, { "epoch": 0.3565283994563172, "grad_norm": 0.6333342790603638, "learning_rate": 2.2364985424156904e-05, "loss": 0.0641, "step": 20132 }, { "epoch": 0.35654610899334566, "grad_norm": 1.141028881072998, "learning_rate": 2.2364235896715398e-05, "loss": 0.0942, "step": 20133 }, { "epoch": 0.35656381853037405, "grad_norm": 0.5759899020195007, "learning_rate": 2.23634863450463e-05, "loss": 0.0801, "step": 20134 }, { "epoch": 0.3565815280674025, "grad_norm": 0.6256591081619263, "learning_rate": 2.2362736769152076e-05, "loss": 0.056, "step": 20135 }, { "epoch": 0.3565992376044309, "grad_norm": 0.7065256237983704, "learning_rate": 2.2361987169035197e-05, "loss": 0.0765, "step": 20136 }, { "epoch": 0.35661694714145936, "grad_norm": 0.6077278852462769, "learning_rate": 2.2361237544698123e-05, "loss": 0.0882, "step": 20137 }, { "epoch": 0.35663465667848776, "grad_norm": 0.5824951529502869, "learning_rate": 2.2360487896143326e-05, "loss": 0.0957, "step": 20138 }, { "epoch": 0.3566523662155162, "grad_norm": 0.4844352900981903, "learning_rate": 2.235973822337326e-05, "loss": 0.0576, "step": 20139 }, { "epoch": 0.3566700757525446, "grad_norm": 1.0479710102081299, "learning_rate": 2.235898852639041e-05, "loss": 0.1048, "step": 20140 }, { "epoch": 0.35668778528957307, "grad_norm": 0.8892455697059631, "learning_rate": 2.235823880519722e-05, "loss": 0.0975, "step": 20141 }, { "epoch": 0.3567054948266015, "grad_norm": 0.7863995432853699, "learning_rate": 2.2357489059796176e-05, "loss": 0.1113, "step": 20142 }, { "epoch": 0.3567232043636299, "grad_norm": 0.6195798516273499, "learning_rate": 2.2356739290189736e-05, "loss": 0.0906, "step": 20143 }, { "epoch": 0.3567409139006584, "grad_norm": 0.9534876346588135, "learning_rate": 2.2355989496380366e-05, "loss": 0.0883, "step": 20144 }, { "epoch": 0.3567586234376868, "grad_norm": 0.8329723477363586, "learning_rate": 2.2355239678370536e-05, "loss": 0.1276, "step": 20145 }, { "epoch": 0.3567763329747152, "grad_norm": 0.6701673269271851, "learning_rate": 2.235448983616271e-05, "loss": 0.0823, "step": 20146 }, { "epoch": 0.3567940425117436, "grad_norm": 0.6996746063232422, "learning_rate": 2.2353739969759364e-05, "loss": 0.0986, "step": 20147 }, { "epoch": 0.3568117520487721, "grad_norm": 0.956762433052063, "learning_rate": 2.2352990079162947e-05, "loss": 0.1062, "step": 20148 }, { "epoch": 0.3568294615858005, "grad_norm": 0.8213098049163818, "learning_rate": 2.235224016437594e-05, "loss": 0.0978, "step": 20149 }, { "epoch": 0.35684717112282893, "grad_norm": 0.6885930299758911, "learning_rate": 2.235149022540081e-05, "loss": 0.1137, "step": 20150 }, { "epoch": 0.35686488065985733, "grad_norm": 0.949981153011322, "learning_rate": 2.2350740262240017e-05, "loss": 0.0693, "step": 20151 }, { "epoch": 0.3568825901968858, "grad_norm": 0.6209002137184143, "learning_rate": 2.2349990274896033e-05, "loss": 0.0527, "step": 20152 }, { "epoch": 0.3569002997339142, "grad_norm": 0.6935112476348877, "learning_rate": 2.234924026337132e-05, "loss": 0.0788, "step": 20153 }, { "epoch": 0.35691800927094264, "grad_norm": 0.9482631087303162, "learning_rate": 2.2348490227668355e-05, "loss": 0.0786, "step": 20154 }, { "epoch": 0.35693571880797104, "grad_norm": 0.49101683497428894, "learning_rate": 2.23477401677896e-05, "loss": 0.0937, "step": 20155 }, { "epoch": 0.3569534283449995, "grad_norm": 1.0565885305404663, "learning_rate": 2.2346990083737522e-05, "loss": 0.103, "step": 20156 }, { "epoch": 0.35697113788202794, "grad_norm": 0.42022621631622314, "learning_rate": 2.234623997551459e-05, "loss": 0.0705, "step": 20157 }, { "epoch": 0.35698884741905634, "grad_norm": 0.8839125037193298, "learning_rate": 2.234548984312327e-05, "loss": 0.0483, "step": 20158 }, { "epoch": 0.3570065569560848, "grad_norm": 0.7529700994491577, "learning_rate": 2.2344739686566035e-05, "loss": 0.0883, "step": 20159 }, { "epoch": 0.3570242664931132, "grad_norm": 0.8858222961425781, "learning_rate": 2.2343989505845347e-05, "loss": 0.0788, "step": 20160 }, { "epoch": 0.35704197603014165, "grad_norm": 1.1018160581588745, "learning_rate": 2.2343239300963677e-05, "loss": 0.1112, "step": 20161 }, { "epoch": 0.35705968556717005, "grad_norm": 1.0615265369415283, "learning_rate": 2.234248907192349e-05, "loss": 0.0739, "step": 20162 }, { "epoch": 0.3570773951041985, "grad_norm": 0.8143443465232849, "learning_rate": 2.2341738818727258e-05, "loss": 0.0796, "step": 20163 }, { "epoch": 0.3570951046412269, "grad_norm": 0.6277787089347839, "learning_rate": 2.2340988541377448e-05, "loss": 0.0882, "step": 20164 }, { "epoch": 0.35711281417825536, "grad_norm": 0.9523302316665649, "learning_rate": 2.2340238239876533e-05, "loss": 0.0866, "step": 20165 }, { "epoch": 0.35713052371528375, "grad_norm": 0.6955156326293945, "learning_rate": 2.2339487914226973e-05, "loss": 0.0892, "step": 20166 }, { "epoch": 0.3571482332523122, "grad_norm": 0.867311954498291, "learning_rate": 2.2338737564431238e-05, "loss": 0.0895, "step": 20167 }, { "epoch": 0.3571659427893406, "grad_norm": 0.8910415172576904, "learning_rate": 2.2337987190491797e-05, "loss": 0.0598, "step": 20168 }, { "epoch": 0.35718365232636906, "grad_norm": 0.5712956786155701, "learning_rate": 2.2337236792411125e-05, "loss": 0.1123, "step": 20169 }, { "epoch": 0.35720136186339746, "grad_norm": 0.887968897819519, "learning_rate": 2.2336486370191688e-05, "loss": 0.0606, "step": 20170 }, { "epoch": 0.3572190714004259, "grad_norm": 0.7355058193206787, "learning_rate": 2.2335735923835945e-05, "loss": 0.0922, "step": 20171 }, { "epoch": 0.35723678093745437, "grad_norm": 0.7175925374031067, "learning_rate": 2.2334985453346376e-05, "loss": 0.0899, "step": 20172 }, { "epoch": 0.35725449047448277, "grad_norm": 1.3250069618225098, "learning_rate": 2.2334234958725453e-05, "loss": 0.1179, "step": 20173 }, { "epoch": 0.3572722000115112, "grad_norm": 0.6558870077133179, "learning_rate": 2.233348443997563e-05, "loss": 0.029, "step": 20174 }, { "epoch": 0.3572899095485396, "grad_norm": 0.9422006607055664, "learning_rate": 2.2332733897099392e-05, "loss": 0.0846, "step": 20175 }, { "epoch": 0.3573076190855681, "grad_norm": 0.6971361041069031, "learning_rate": 2.2331983330099194e-05, "loss": 0.0803, "step": 20176 }, { "epoch": 0.35732532862259647, "grad_norm": 0.6014885902404785, "learning_rate": 2.2331232738977515e-05, "loss": 0.0949, "step": 20177 }, { "epoch": 0.3573430381596249, "grad_norm": 0.9843035340309143, "learning_rate": 2.2330482123736822e-05, "loss": 0.0733, "step": 20178 }, { "epoch": 0.3573607476966533, "grad_norm": 0.8359946012496948, "learning_rate": 2.2329731484379588e-05, "loss": 0.116, "step": 20179 }, { "epoch": 0.3573784572336818, "grad_norm": 0.4551456868648529, "learning_rate": 2.2328980820908274e-05, "loss": 0.0506, "step": 20180 }, { "epoch": 0.3573961667707102, "grad_norm": 0.6505075693130493, "learning_rate": 2.232823013332536e-05, "loss": 0.0775, "step": 20181 }, { "epoch": 0.35741387630773863, "grad_norm": 0.7089636325836182, "learning_rate": 2.2327479421633304e-05, "loss": 0.0828, "step": 20182 }, { "epoch": 0.35743158584476703, "grad_norm": 0.7865769267082214, "learning_rate": 2.2326728685834586e-05, "loss": 0.1026, "step": 20183 }, { "epoch": 0.3574492953817955, "grad_norm": 0.5974690318107605, "learning_rate": 2.232597792593167e-05, "loss": 0.0958, "step": 20184 }, { "epoch": 0.3574670049188239, "grad_norm": 0.9680486917495728, "learning_rate": 2.2325227141927026e-05, "loss": 0.1068, "step": 20185 }, { "epoch": 0.35748471445585234, "grad_norm": 0.5969219207763672, "learning_rate": 2.232447633382313e-05, "loss": 0.076, "step": 20186 }, { "epoch": 0.3575024239928808, "grad_norm": 0.5036848187446594, "learning_rate": 2.2323725501622438e-05, "loss": 0.0873, "step": 20187 }, { "epoch": 0.3575201335299092, "grad_norm": 0.7471645474433899, "learning_rate": 2.232297464532744e-05, "loss": 0.0679, "step": 20188 }, { "epoch": 0.35753784306693764, "grad_norm": 0.7448589205741882, "learning_rate": 2.23222237649406e-05, "loss": 0.1013, "step": 20189 }, { "epoch": 0.35755555260396604, "grad_norm": 0.7167758345603943, "learning_rate": 2.2321472860464372e-05, "loss": 0.123, "step": 20190 }, { "epoch": 0.3575732621409945, "grad_norm": 1.0988284349441528, "learning_rate": 2.2320721931901243e-05, "loss": 0.0919, "step": 20191 }, { "epoch": 0.3575909716780229, "grad_norm": 0.3618748188018799, "learning_rate": 2.2319970979253677e-05, "loss": 0.069, "step": 20192 }, { "epoch": 0.35760868121505135, "grad_norm": 0.6225923895835876, "learning_rate": 2.231922000252415e-05, "loss": 0.1094, "step": 20193 }, { "epoch": 0.35762639075207975, "grad_norm": 0.662275493144989, "learning_rate": 2.231846900171513e-05, "loss": 0.0819, "step": 20194 }, { "epoch": 0.3576441002891082, "grad_norm": 0.6676918268203735, "learning_rate": 2.231771797682909e-05, "loss": 0.0799, "step": 20195 }, { "epoch": 0.3576618098261366, "grad_norm": 0.6476819515228271, "learning_rate": 2.2316966927868493e-05, "loss": 0.1058, "step": 20196 }, { "epoch": 0.35767951936316505, "grad_norm": 0.5552640557289124, "learning_rate": 2.2316215854835817e-05, "loss": 0.0824, "step": 20197 }, { "epoch": 0.35769722890019345, "grad_norm": 0.7553377747535706, "learning_rate": 2.231546475773353e-05, "loss": 0.0656, "step": 20198 }, { "epoch": 0.3577149384372219, "grad_norm": 0.8972952961921692, "learning_rate": 2.2314713636564105e-05, "loss": 0.0792, "step": 20199 }, { "epoch": 0.3577326479742503, "grad_norm": 0.47059205174446106, "learning_rate": 2.2313962491330016e-05, "loss": 0.1105, "step": 20200 }, { "epoch": 0.35775035751127876, "grad_norm": 0.47789281606674194, "learning_rate": 2.231321132203372e-05, "loss": 0.0641, "step": 20201 }, { "epoch": 0.3577680670483072, "grad_norm": 0.6815040707588196, "learning_rate": 2.231246012867771e-05, "loss": 0.0791, "step": 20202 }, { "epoch": 0.3577857765853356, "grad_norm": 0.5811102390289307, "learning_rate": 2.2311708911264435e-05, "loss": 0.0466, "step": 20203 }, { "epoch": 0.35780348612236407, "grad_norm": 1.0689687728881836, "learning_rate": 2.231095766979638e-05, "loss": 0.0982, "step": 20204 }, { "epoch": 0.35782119565939247, "grad_norm": 0.8798937797546387, "learning_rate": 2.231020640427602e-05, "loss": 0.0889, "step": 20205 }, { "epoch": 0.3578389051964209, "grad_norm": 0.7207009792327881, "learning_rate": 2.2309455114705812e-05, "loss": 0.0644, "step": 20206 }, { "epoch": 0.3578566147334493, "grad_norm": 0.9245839715003967, "learning_rate": 2.2308703801088244e-05, "loss": 0.114, "step": 20207 }, { "epoch": 0.3578743242704778, "grad_norm": 0.7866207361221313, "learning_rate": 2.2307952463425772e-05, "loss": 0.0722, "step": 20208 }, { "epoch": 0.35789203380750617, "grad_norm": 0.6917526125907898, "learning_rate": 2.2307201101720883e-05, "loss": 0.0826, "step": 20209 }, { "epoch": 0.3579097433445346, "grad_norm": 0.829020619392395, "learning_rate": 2.2306449715976037e-05, "loss": 0.087, "step": 20210 }, { "epoch": 0.357927452881563, "grad_norm": 0.7346882224082947, "learning_rate": 2.230569830619371e-05, "loss": 0.0858, "step": 20211 }, { "epoch": 0.3579451624185915, "grad_norm": 1.0851242542266846, "learning_rate": 2.2304946872376376e-05, "loss": 0.0942, "step": 20212 }, { "epoch": 0.3579628719556199, "grad_norm": 0.7772360444068909, "learning_rate": 2.2304195414526505e-05, "loss": 0.0894, "step": 20213 }, { "epoch": 0.35798058149264833, "grad_norm": 0.7282352447509766, "learning_rate": 2.2303443932646574e-05, "loss": 0.0794, "step": 20214 }, { "epoch": 0.35799829102967673, "grad_norm": 1.4834966659545898, "learning_rate": 2.2302692426739045e-05, "loss": 0.1102, "step": 20215 }, { "epoch": 0.3580160005667052, "grad_norm": 0.8406233787536621, "learning_rate": 2.2301940896806396e-05, "loss": 0.0636, "step": 20216 }, { "epoch": 0.35803371010373364, "grad_norm": 0.8092544078826904, "learning_rate": 2.2301189342851103e-05, "loss": 0.0803, "step": 20217 }, { "epoch": 0.35805141964076204, "grad_norm": 0.7449887990951538, "learning_rate": 2.230043776487564e-05, "loss": 0.1003, "step": 20218 }, { "epoch": 0.3580691291777905, "grad_norm": 0.9521604180335999, "learning_rate": 2.2299686162882468e-05, "loss": 0.0861, "step": 20219 }, { "epoch": 0.3580868387148189, "grad_norm": 0.8342021703720093, "learning_rate": 2.2298934536874067e-05, "loss": 0.0763, "step": 20220 }, { "epoch": 0.35810454825184734, "grad_norm": 0.8486059308052063, "learning_rate": 2.2298182886852915e-05, "loss": 0.1077, "step": 20221 }, { "epoch": 0.35812225778887574, "grad_norm": 0.48450157046318054, "learning_rate": 2.2297431212821472e-05, "loss": 0.0893, "step": 20222 }, { "epoch": 0.3581399673259042, "grad_norm": 0.916290819644928, "learning_rate": 2.2296679514782226e-05, "loss": 0.1247, "step": 20223 }, { "epoch": 0.3581576768629326, "grad_norm": 0.5994049906730652, "learning_rate": 2.229592779273764e-05, "loss": 0.0856, "step": 20224 }, { "epoch": 0.35817538639996105, "grad_norm": 0.7794917225837708, "learning_rate": 2.2295176046690188e-05, "loss": 0.0872, "step": 20225 }, { "epoch": 0.35819309593698945, "grad_norm": 0.7731468081474304, "learning_rate": 2.2294424276642343e-05, "loss": 0.0838, "step": 20226 }, { "epoch": 0.3582108054740179, "grad_norm": 1.0424457788467407, "learning_rate": 2.2293672482596584e-05, "loss": 0.0789, "step": 20227 }, { "epoch": 0.3582285150110463, "grad_norm": 0.9259428977966309, "learning_rate": 2.2292920664555375e-05, "loss": 0.0961, "step": 20228 }, { "epoch": 0.35824622454807475, "grad_norm": 0.5723820328712463, "learning_rate": 2.2292168822521202e-05, "loss": 0.0728, "step": 20229 }, { "epoch": 0.35826393408510315, "grad_norm": 0.6290487051010132, "learning_rate": 2.2291416956496528e-05, "loss": 0.0623, "step": 20230 }, { "epoch": 0.3582816436221316, "grad_norm": 0.34583544731140137, "learning_rate": 2.2290665066483827e-05, "loss": 0.0878, "step": 20231 }, { "epoch": 0.35829935315916006, "grad_norm": 2.5146114826202393, "learning_rate": 2.2289913152485582e-05, "loss": 0.0944, "step": 20232 }, { "epoch": 0.35831706269618846, "grad_norm": 0.6607961058616638, "learning_rate": 2.2289161214504253e-05, "loss": 0.1011, "step": 20233 }, { "epoch": 0.3583347722332169, "grad_norm": 0.7518116235733032, "learning_rate": 2.2288409252542324e-05, "loss": 0.0605, "step": 20234 }, { "epoch": 0.3583524817702453, "grad_norm": 1.3993357419967651, "learning_rate": 2.228765726660226e-05, "loss": 0.0782, "step": 20235 }, { "epoch": 0.35837019130727377, "grad_norm": 1.1277602910995483, "learning_rate": 2.228690525668655e-05, "loss": 0.1241, "step": 20236 }, { "epoch": 0.35838790084430217, "grad_norm": 1.3541215658187866, "learning_rate": 2.2286153222797653e-05, "loss": 0.1079, "step": 20237 }, { "epoch": 0.3584056103813306, "grad_norm": 1.1516717672348022, "learning_rate": 2.228540116493805e-05, "loss": 0.0916, "step": 20238 }, { "epoch": 0.358423319918359, "grad_norm": 0.7457351684570312, "learning_rate": 2.2284649083110213e-05, "loss": 0.1306, "step": 20239 }, { "epoch": 0.35844102945538747, "grad_norm": 0.809755265712738, "learning_rate": 2.228389697731662e-05, "loss": 0.0611, "step": 20240 }, { "epoch": 0.35845873899241587, "grad_norm": 1.1928004026412964, "learning_rate": 2.2283144847559745e-05, "loss": 0.1239, "step": 20241 }, { "epoch": 0.3584764485294443, "grad_norm": 1.1247004270553589, "learning_rate": 2.2282392693842055e-05, "loss": 0.113, "step": 20242 }, { "epoch": 0.3584941580664727, "grad_norm": 0.6854231953620911, "learning_rate": 2.228164051616603e-05, "loss": 0.0576, "step": 20243 }, { "epoch": 0.3585118676035012, "grad_norm": 0.8880309462547302, "learning_rate": 2.2280888314534144e-05, "loss": 0.0914, "step": 20244 }, { "epoch": 0.3585295771405296, "grad_norm": 0.7169229984283447, "learning_rate": 2.2280136088948873e-05, "loss": 0.0966, "step": 20245 }, { "epoch": 0.35854728667755803, "grad_norm": 0.4933592677116394, "learning_rate": 2.2279383839412692e-05, "loss": 0.0612, "step": 20246 }, { "epoch": 0.3585649962145865, "grad_norm": 0.9978182315826416, "learning_rate": 2.2278631565928074e-05, "loss": 0.094, "step": 20247 }, { "epoch": 0.3585827057516149, "grad_norm": 0.7494803667068481, "learning_rate": 2.2277879268497496e-05, "loss": 0.0797, "step": 20248 }, { "epoch": 0.35860041528864334, "grad_norm": 0.7121649384498596, "learning_rate": 2.2277126947123424e-05, "loss": 0.0725, "step": 20249 }, { "epoch": 0.35861812482567174, "grad_norm": 0.5853297710418701, "learning_rate": 2.227637460180835e-05, "loss": 0.1001, "step": 20250 }, { "epoch": 0.3586358343627002, "grad_norm": 0.8178926110267639, "learning_rate": 2.227562223255473e-05, "loss": 0.0932, "step": 20251 }, { "epoch": 0.3586535438997286, "grad_norm": 0.38506999611854553, "learning_rate": 2.2274869839365054e-05, "loss": 0.0607, "step": 20252 }, { "epoch": 0.35867125343675704, "grad_norm": 0.838733434677124, "learning_rate": 2.227411742224179e-05, "loss": 0.0999, "step": 20253 }, { "epoch": 0.35868896297378544, "grad_norm": 0.7068874835968018, "learning_rate": 2.227336498118742e-05, "loss": 0.0958, "step": 20254 }, { "epoch": 0.3587066725108139, "grad_norm": 0.9184263348579407, "learning_rate": 2.2272612516204413e-05, "loss": 0.1019, "step": 20255 }, { "epoch": 0.3587243820478423, "grad_norm": 0.8018152117729187, "learning_rate": 2.2271860027295244e-05, "loss": 0.0844, "step": 20256 }, { "epoch": 0.35874209158487075, "grad_norm": 0.8092877268791199, "learning_rate": 2.227110751446239e-05, "loss": 0.1057, "step": 20257 }, { "epoch": 0.35875980112189915, "grad_norm": 0.6067981719970703, "learning_rate": 2.2270354977708332e-05, "loss": 0.0582, "step": 20258 }, { "epoch": 0.3587775106589276, "grad_norm": 0.6710186004638672, "learning_rate": 2.226960241703554e-05, "loss": 0.0627, "step": 20259 }, { "epoch": 0.358795220195956, "grad_norm": 0.630060076713562, "learning_rate": 2.226884983244649e-05, "loss": 0.0735, "step": 20260 }, { "epoch": 0.35881292973298445, "grad_norm": 0.6700161099433899, "learning_rate": 2.2268097223943663e-05, "loss": 0.0501, "step": 20261 }, { "epoch": 0.3588306392700129, "grad_norm": 0.9156854152679443, "learning_rate": 2.2267344591529528e-05, "loss": 0.0804, "step": 20262 }, { "epoch": 0.3588483488070413, "grad_norm": 0.5500367879867554, "learning_rate": 2.226659193520656e-05, "loss": 0.0738, "step": 20263 }, { "epoch": 0.35886605834406976, "grad_norm": 0.4681081771850586, "learning_rate": 2.226583925497725e-05, "loss": 0.0764, "step": 20264 }, { "epoch": 0.35888376788109816, "grad_norm": 0.49474549293518066, "learning_rate": 2.226508655084405e-05, "loss": 0.0688, "step": 20265 }, { "epoch": 0.3589014774181266, "grad_norm": 0.532995879650116, "learning_rate": 2.226433382280946e-05, "loss": 0.0753, "step": 20266 }, { "epoch": 0.358919186955155, "grad_norm": 0.7980814576148987, "learning_rate": 2.2263581070875945e-05, "loss": 0.0604, "step": 20267 }, { "epoch": 0.35893689649218347, "grad_norm": 0.7369817495346069, "learning_rate": 2.2262828295045983e-05, "loss": 0.0869, "step": 20268 }, { "epoch": 0.35895460602921186, "grad_norm": 0.543509304523468, "learning_rate": 2.226207549532205e-05, "loss": 0.0923, "step": 20269 }, { "epoch": 0.3589723155662403, "grad_norm": 0.8235028386116028, "learning_rate": 2.226132267170662e-05, "loss": 0.0995, "step": 20270 }, { "epoch": 0.3589900251032687, "grad_norm": 0.9540897607803345, "learning_rate": 2.226056982420218e-05, "loss": 0.0784, "step": 20271 }, { "epoch": 0.35900773464029717, "grad_norm": 0.4090717136859894, "learning_rate": 2.2259816952811195e-05, "loss": 0.0642, "step": 20272 }, { "epoch": 0.35902544417732557, "grad_norm": 0.5068007707595825, "learning_rate": 2.2259064057536146e-05, "loss": 0.0745, "step": 20273 }, { "epoch": 0.359043153714354, "grad_norm": 1.2490094900131226, "learning_rate": 2.225831113837951e-05, "loss": 0.1512, "step": 20274 }, { "epoch": 0.3590608632513825, "grad_norm": 1.2078263759613037, "learning_rate": 2.225755819534377e-05, "loss": 0.0913, "step": 20275 }, { "epoch": 0.3590785727884109, "grad_norm": 0.6806946992874146, "learning_rate": 2.2256805228431393e-05, "loss": 0.0755, "step": 20276 }, { "epoch": 0.35909628232543933, "grad_norm": 0.7980968356132507, "learning_rate": 2.2256052237644862e-05, "loss": 0.0635, "step": 20277 }, { "epoch": 0.35911399186246773, "grad_norm": 0.9042552709579468, "learning_rate": 2.225529922298665e-05, "loss": 0.0738, "step": 20278 }, { "epoch": 0.3591317013994962, "grad_norm": 0.7519900798797607, "learning_rate": 2.2254546184459237e-05, "loss": 0.0844, "step": 20279 }, { "epoch": 0.3591494109365246, "grad_norm": 0.748235285282135, "learning_rate": 2.2253793122065106e-05, "loss": 0.054, "step": 20280 }, { "epoch": 0.35916712047355304, "grad_norm": 0.9124942421913147, "learning_rate": 2.2253040035806723e-05, "loss": 0.0822, "step": 20281 }, { "epoch": 0.35918483001058144, "grad_norm": 0.5164172649383545, "learning_rate": 2.225228692568658e-05, "loss": 0.0786, "step": 20282 }, { "epoch": 0.3592025395476099, "grad_norm": 0.7641187310218811, "learning_rate": 2.2251533791707136e-05, "loss": 0.0911, "step": 20283 }, { "epoch": 0.3592202490846383, "grad_norm": 0.7102837562561035, "learning_rate": 2.225078063387089e-05, "loss": 0.0701, "step": 20284 }, { "epoch": 0.35923795862166674, "grad_norm": 0.815055787563324, "learning_rate": 2.225002745218031e-05, "loss": 0.0884, "step": 20285 }, { "epoch": 0.35925566815869514, "grad_norm": 0.5720270872116089, "learning_rate": 2.2249274246637864e-05, "loss": 0.0773, "step": 20286 }, { "epoch": 0.3592733776957236, "grad_norm": 0.8316524028778076, "learning_rate": 2.224852101724604e-05, "loss": 0.0924, "step": 20287 }, { "epoch": 0.359291087232752, "grad_norm": 0.6189911365509033, "learning_rate": 2.2247767764007317e-05, "loss": 0.0574, "step": 20288 }, { "epoch": 0.35930879676978045, "grad_norm": 1.2865420579910278, "learning_rate": 2.2247014486924175e-05, "loss": 0.1095, "step": 20289 }, { "epoch": 0.3593265063068089, "grad_norm": 0.23572848737239838, "learning_rate": 2.2246261185999085e-05, "loss": 0.0467, "step": 20290 }, { "epoch": 0.3593442158438373, "grad_norm": 0.7038211226463318, "learning_rate": 2.224550786123453e-05, "loss": 0.0774, "step": 20291 }, { "epoch": 0.35936192538086575, "grad_norm": 0.6263458728790283, "learning_rate": 2.224475451263298e-05, "loss": 0.1203, "step": 20292 }, { "epoch": 0.35937963491789415, "grad_norm": 0.8877490758895874, "learning_rate": 2.2244001140196924e-05, "loss": 0.0968, "step": 20293 }, { "epoch": 0.3593973444549226, "grad_norm": 0.8414270281791687, "learning_rate": 2.2243247743928844e-05, "loss": 0.1067, "step": 20294 }, { "epoch": 0.359415053991951, "grad_norm": 0.676330029964447, "learning_rate": 2.2242494323831205e-05, "loss": 0.0744, "step": 20295 }, { "epoch": 0.35943276352897946, "grad_norm": 0.5665915608406067, "learning_rate": 2.2241740879906493e-05, "loss": 0.048, "step": 20296 }, { "epoch": 0.35945047306600786, "grad_norm": 0.8390756845474243, "learning_rate": 2.2240987412157186e-05, "loss": 0.0527, "step": 20297 }, { "epoch": 0.3594681826030363, "grad_norm": 0.8638498187065125, "learning_rate": 2.2240233920585763e-05, "loss": 0.0808, "step": 20298 }, { "epoch": 0.3594858921400647, "grad_norm": 0.7221866250038147, "learning_rate": 2.2239480405194705e-05, "loss": 0.0934, "step": 20299 }, { "epoch": 0.35950360167709317, "grad_norm": 0.6658340096473694, "learning_rate": 2.223872686598649e-05, "loss": 0.0768, "step": 20300 }, { "epoch": 0.35952131121412156, "grad_norm": 0.647262692451477, "learning_rate": 2.223797330296359e-05, "loss": 0.0782, "step": 20301 }, { "epoch": 0.35953902075115, "grad_norm": 0.7732744812965393, "learning_rate": 2.223721971612849e-05, "loss": 0.0685, "step": 20302 }, { "epoch": 0.3595567302881784, "grad_norm": 1.3022658824920654, "learning_rate": 2.2236466105483673e-05, "loss": 0.1274, "step": 20303 }, { "epoch": 0.35957443982520687, "grad_norm": 0.5402271151542664, "learning_rate": 2.2235712471031614e-05, "loss": 0.0746, "step": 20304 }, { "epoch": 0.3595921493622353, "grad_norm": 0.7714583873748779, "learning_rate": 2.2234958812774793e-05, "loss": 0.0756, "step": 20305 }, { "epoch": 0.3596098588992637, "grad_norm": 0.7088914513587952, "learning_rate": 2.2234205130715688e-05, "loss": 0.0782, "step": 20306 }, { "epoch": 0.3596275684362922, "grad_norm": 0.5944632291793823, "learning_rate": 2.223345142485678e-05, "loss": 0.0921, "step": 20307 }, { "epoch": 0.3596452779733206, "grad_norm": 0.8006603121757507, "learning_rate": 2.223269769520055e-05, "loss": 0.1001, "step": 20308 }, { "epoch": 0.35966298751034903, "grad_norm": 0.6236562132835388, "learning_rate": 2.2231943941749473e-05, "loss": 0.0616, "step": 20309 }, { "epoch": 0.35968069704737743, "grad_norm": 1.1401078701019287, "learning_rate": 2.2231190164506034e-05, "loss": 0.1283, "step": 20310 }, { "epoch": 0.3596984065844059, "grad_norm": 0.6837193965911865, "learning_rate": 2.2230436363472705e-05, "loss": 0.0903, "step": 20311 }, { "epoch": 0.3597161161214343, "grad_norm": 0.7922824025154114, "learning_rate": 2.222968253865198e-05, "loss": 0.0915, "step": 20312 }, { "epoch": 0.35973382565846274, "grad_norm": 1.0431333780288696, "learning_rate": 2.2228928690046327e-05, "loss": 0.0894, "step": 20313 }, { "epoch": 0.35975153519549113, "grad_norm": 0.706487238407135, "learning_rate": 2.2228174817658235e-05, "loss": 0.0557, "step": 20314 }, { "epoch": 0.3597692447325196, "grad_norm": 1.1280806064605713, "learning_rate": 2.222742092149017e-05, "loss": 0.1153, "step": 20315 }, { "epoch": 0.359786954269548, "grad_norm": 0.646130383014679, "learning_rate": 2.2226667001544624e-05, "loss": 0.071, "step": 20316 }, { "epoch": 0.35980466380657644, "grad_norm": 0.5776408314704895, "learning_rate": 2.222591305782408e-05, "loss": 0.1074, "step": 20317 }, { "epoch": 0.35982237334360484, "grad_norm": 1.1804367303848267, "learning_rate": 2.2225159090331005e-05, "loss": 0.106, "step": 20318 }, { "epoch": 0.3598400828806333, "grad_norm": 0.7551382184028625, "learning_rate": 2.2224405099067893e-05, "loss": 0.0951, "step": 20319 }, { "epoch": 0.35985779241766175, "grad_norm": 1.3503044843673706, "learning_rate": 2.2223651084037216e-05, "loss": 0.0694, "step": 20320 }, { "epoch": 0.35987550195469015, "grad_norm": 0.9226003885269165, "learning_rate": 2.2222897045241458e-05, "loss": 0.0822, "step": 20321 }, { "epoch": 0.3598932114917186, "grad_norm": 0.8478797674179077, "learning_rate": 2.22221429826831e-05, "loss": 0.0808, "step": 20322 }, { "epoch": 0.359910921028747, "grad_norm": 0.7591699361801147, "learning_rate": 2.222138889636462e-05, "loss": 0.0667, "step": 20323 }, { "epoch": 0.35992863056577545, "grad_norm": 0.9059216380119324, "learning_rate": 2.222063478628851e-05, "loss": 0.0854, "step": 20324 }, { "epoch": 0.35994634010280385, "grad_norm": 0.6013749241828918, "learning_rate": 2.221988065245723e-05, "loss": 0.0663, "step": 20325 }, { "epoch": 0.3599640496398323, "grad_norm": 0.42732638120651245, "learning_rate": 2.221912649487328e-05, "loss": 0.0748, "step": 20326 }, { "epoch": 0.3599817591768607, "grad_norm": 0.6701399087905884, "learning_rate": 2.221837231353913e-05, "loss": 0.076, "step": 20327 }, { "epoch": 0.35999946871388916, "grad_norm": 0.8021410703659058, "learning_rate": 2.2217618108457268e-05, "loss": 0.0756, "step": 20328 }, { "epoch": 0.36001717825091756, "grad_norm": 1.201183557510376, "learning_rate": 2.221686387963017e-05, "loss": 0.1209, "step": 20329 }, { "epoch": 0.360034887787946, "grad_norm": 0.4718099534511566, "learning_rate": 2.221610962706032e-05, "loss": 0.0903, "step": 20330 }, { "epoch": 0.3600525973249744, "grad_norm": 0.8145620226860046, "learning_rate": 2.2215355350750202e-05, "loss": 0.108, "step": 20331 }, { "epoch": 0.36007030686200286, "grad_norm": 0.7249778509140015, "learning_rate": 2.2214601050702293e-05, "loss": 0.0891, "step": 20332 }, { "epoch": 0.36008801639903126, "grad_norm": 0.7916348576545715, "learning_rate": 2.2213846726919077e-05, "loss": 0.0979, "step": 20333 }, { "epoch": 0.3601057259360597, "grad_norm": 0.8209920525550842, "learning_rate": 2.2213092379403032e-05, "loss": 0.1051, "step": 20334 }, { "epoch": 0.36012343547308817, "grad_norm": 0.9306464791297913, "learning_rate": 2.2212338008156647e-05, "loss": 0.0866, "step": 20335 }, { "epoch": 0.36014114501011657, "grad_norm": 0.7040552496910095, "learning_rate": 2.2211583613182393e-05, "loss": 0.0898, "step": 20336 }, { "epoch": 0.360158854547145, "grad_norm": 0.8525454998016357, "learning_rate": 2.2210829194482766e-05, "loss": 0.1397, "step": 20337 }, { "epoch": 0.3601765640841734, "grad_norm": 1.016325831413269, "learning_rate": 2.2210074752060237e-05, "loss": 0.0587, "step": 20338 }, { "epoch": 0.3601942736212019, "grad_norm": 0.7447726726531982, "learning_rate": 2.2209320285917295e-05, "loss": 0.1057, "step": 20339 }, { "epoch": 0.3602119831582303, "grad_norm": 0.5057799816131592, "learning_rate": 2.220856579605641e-05, "loss": 0.0977, "step": 20340 }, { "epoch": 0.36022969269525873, "grad_norm": 1.2597520351409912, "learning_rate": 2.2207811282480078e-05, "loss": 0.0817, "step": 20341 }, { "epoch": 0.36024740223228713, "grad_norm": 0.5163666009902954, "learning_rate": 2.2207056745190777e-05, "loss": 0.092, "step": 20342 }, { "epoch": 0.3602651117693156, "grad_norm": 1.0701817274093628, "learning_rate": 2.2206302184190984e-05, "loss": 0.0773, "step": 20343 }, { "epoch": 0.360282821306344, "grad_norm": 0.5544103980064392, "learning_rate": 2.220554759948319e-05, "loss": 0.0843, "step": 20344 }, { "epoch": 0.36030053084337244, "grad_norm": 0.6750965118408203, "learning_rate": 2.220479299106987e-05, "loss": 0.0699, "step": 20345 }, { "epoch": 0.36031824038040083, "grad_norm": 1.259697437286377, "learning_rate": 2.2204038358953512e-05, "loss": 0.1294, "step": 20346 }, { "epoch": 0.3603359499174293, "grad_norm": 0.85710608959198, "learning_rate": 2.2203283703136596e-05, "loss": 0.0716, "step": 20347 }, { "epoch": 0.3603536594544577, "grad_norm": 0.4740026295185089, "learning_rate": 2.2202529023621603e-05, "loss": 0.0492, "step": 20348 }, { "epoch": 0.36037136899148614, "grad_norm": 0.8280009627342224, "learning_rate": 2.220177432041102e-05, "loss": 0.0691, "step": 20349 }, { "epoch": 0.3603890785285146, "grad_norm": 0.6337137222290039, "learning_rate": 2.2201019593507328e-05, "loss": 0.0678, "step": 20350 }, { "epoch": 0.360406788065543, "grad_norm": 0.7719396352767944, "learning_rate": 2.220026484291301e-05, "loss": 0.1007, "step": 20351 }, { "epoch": 0.36042449760257145, "grad_norm": 1.0500956773757935, "learning_rate": 2.2199510068630547e-05, "loss": 0.0923, "step": 20352 }, { "epoch": 0.36044220713959985, "grad_norm": 0.5578780174255371, "learning_rate": 2.2198755270662424e-05, "loss": 0.0615, "step": 20353 }, { "epoch": 0.3604599166766283, "grad_norm": 0.6782006621360779, "learning_rate": 2.2198000449011125e-05, "loss": 0.0687, "step": 20354 }, { "epoch": 0.3604776262136567, "grad_norm": 0.8880075216293335, "learning_rate": 2.2197245603679136e-05, "loss": 0.0697, "step": 20355 }, { "epoch": 0.36049533575068515, "grad_norm": 0.7826733589172363, "learning_rate": 2.2196490734668934e-05, "loss": 0.1242, "step": 20356 }, { "epoch": 0.36051304528771355, "grad_norm": 0.9306946396827698, "learning_rate": 2.219573584198301e-05, "loss": 0.0873, "step": 20357 }, { "epoch": 0.360530754824742, "grad_norm": 0.49530187249183655, "learning_rate": 2.219498092562383e-05, "loss": 0.0932, "step": 20358 }, { "epoch": 0.3605484643617704, "grad_norm": 0.5116530060768127, "learning_rate": 2.2194225985593904e-05, "loss": 0.0494, "step": 20359 }, { "epoch": 0.36056617389879886, "grad_norm": 0.8327584266662598, "learning_rate": 2.2193471021895694e-05, "loss": 0.1104, "step": 20360 }, { "epoch": 0.36058388343582726, "grad_norm": 1.1682100296020508, "learning_rate": 2.2192716034531696e-05, "loss": 0.1107, "step": 20361 }, { "epoch": 0.3606015929728557, "grad_norm": 0.596488893032074, "learning_rate": 2.219196102350439e-05, "loss": 0.0493, "step": 20362 }, { "epoch": 0.3606193025098841, "grad_norm": 0.80987548828125, "learning_rate": 2.219120598881626e-05, "loss": 0.1068, "step": 20363 }, { "epoch": 0.36063701204691256, "grad_norm": 0.6146858930587769, "learning_rate": 2.2190450930469786e-05, "loss": 0.0833, "step": 20364 }, { "epoch": 0.360654721583941, "grad_norm": 0.8453536033630371, "learning_rate": 2.2189695848467457e-05, "loss": 0.1311, "step": 20365 }, { "epoch": 0.3606724311209694, "grad_norm": 0.6853557229042053, "learning_rate": 2.218894074281176e-05, "loss": 0.106, "step": 20366 }, { "epoch": 0.36069014065799787, "grad_norm": 0.34676703810691833, "learning_rate": 2.2188185613505172e-05, "loss": 0.0966, "step": 20367 }, { "epoch": 0.36070785019502627, "grad_norm": 0.7175156474113464, "learning_rate": 2.218743046055018e-05, "loss": 0.0859, "step": 20368 }, { "epoch": 0.3607255597320547, "grad_norm": 0.6892616152763367, "learning_rate": 2.2186675283949266e-05, "loss": 0.1037, "step": 20369 }, { "epoch": 0.3607432692690831, "grad_norm": 0.5447865128517151, "learning_rate": 2.218592008370492e-05, "loss": 0.0595, "step": 20370 }, { "epoch": 0.3607609788061116, "grad_norm": 0.7100496292114258, "learning_rate": 2.2185164859819625e-05, "loss": 0.1164, "step": 20371 }, { "epoch": 0.36077868834314, "grad_norm": 0.7975388765335083, "learning_rate": 2.2184409612295864e-05, "loss": 0.0833, "step": 20372 }, { "epoch": 0.36079639788016843, "grad_norm": 1.2384065389633179, "learning_rate": 2.2183654341136116e-05, "loss": 0.1425, "step": 20373 }, { "epoch": 0.3608141074171968, "grad_norm": 0.8933987021446228, "learning_rate": 2.218289904634288e-05, "loss": 0.0891, "step": 20374 }, { "epoch": 0.3608318169542253, "grad_norm": 0.4834224581718445, "learning_rate": 2.218214372791863e-05, "loss": 0.071, "step": 20375 }, { "epoch": 0.3608495264912537, "grad_norm": 0.6925999522209167, "learning_rate": 2.2181388385865854e-05, "loss": 0.0949, "step": 20376 }, { "epoch": 0.36086723602828213, "grad_norm": 0.890681803226471, "learning_rate": 2.2180633020187034e-05, "loss": 0.0613, "step": 20377 }, { "epoch": 0.36088494556531053, "grad_norm": 0.9538843035697937, "learning_rate": 2.217987763088466e-05, "loss": 0.0878, "step": 20378 }, { "epoch": 0.360902655102339, "grad_norm": 0.6291635036468506, "learning_rate": 2.2179122217961214e-05, "loss": 0.0896, "step": 20379 }, { "epoch": 0.36092036463936744, "grad_norm": 0.8399226069450378, "learning_rate": 2.217836678141918e-05, "loss": 0.083, "step": 20380 }, { "epoch": 0.36093807417639584, "grad_norm": 0.8521855473518372, "learning_rate": 2.2177611321261053e-05, "loss": 0.0926, "step": 20381 }, { "epoch": 0.3609557837134243, "grad_norm": 0.6025869846343994, "learning_rate": 2.2176855837489303e-05, "loss": 0.1143, "step": 20382 }, { "epoch": 0.3609734932504527, "grad_norm": 0.6535314917564392, "learning_rate": 2.2176100330106425e-05, "loss": 0.0559, "step": 20383 }, { "epoch": 0.36099120278748115, "grad_norm": 0.6408942341804504, "learning_rate": 2.2175344799114902e-05, "loss": 0.1025, "step": 20384 }, { "epoch": 0.36100891232450955, "grad_norm": 0.6245598196983337, "learning_rate": 2.2174589244517222e-05, "loss": 0.0868, "step": 20385 }, { "epoch": 0.361026621861538, "grad_norm": 1.0283918380737305, "learning_rate": 2.217383366631587e-05, "loss": 0.123, "step": 20386 }, { "epoch": 0.3610443313985664, "grad_norm": 1.0397484302520752, "learning_rate": 2.2173078064513325e-05, "loss": 0.1149, "step": 20387 }, { "epoch": 0.36106204093559485, "grad_norm": 1.0356171131134033, "learning_rate": 2.2172322439112086e-05, "loss": 0.1148, "step": 20388 }, { "epoch": 0.36107975047262325, "grad_norm": 0.5374513864517212, "learning_rate": 2.217156679011463e-05, "loss": 0.1072, "step": 20389 }, { "epoch": 0.3610974600096517, "grad_norm": 0.8403540253639221, "learning_rate": 2.2170811117523444e-05, "loss": 0.1361, "step": 20390 }, { "epoch": 0.3611151695466801, "grad_norm": 0.7802092432975769, "learning_rate": 2.2170055421341012e-05, "loss": 0.0639, "step": 20391 }, { "epoch": 0.36113287908370856, "grad_norm": 0.5565109848976135, "learning_rate": 2.2169299701569826e-05, "loss": 0.0988, "step": 20392 }, { "epoch": 0.36115058862073696, "grad_norm": 0.693101167678833, "learning_rate": 2.2168543958212365e-05, "loss": 0.0758, "step": 20393 }, { "epoch": 0.3611682981577654, "grad_norm": 0.7967349290847778, "learning_rate": 2.2167788191271124e-05, "loss": 0.1078, "step": 20394 }, { "epoch": 0.36118600769479386, "grad_norm": 1.1556780338287354, "learning_rate": 2.2167032400748588e-05, "loss": 0.0974, "step": 20395 }, { "epoch": 0.36120371723182226, "grad_norm": 0.6881783604621887, "learning_rate": 2.2166276586647233e-05, "loss": 0.0888, "step": 20396 }, { "epoch": 0.3612214267688507, "grad_norm": 0.6435421705245972, "learning_rate": 2.2165520748969552e-05, "loss": 0.0794, "step": 20397 }, { "epoch": 0.3612391363058791, "grad_norm": 0.6457093358039856, "learning_rate": 2.2164764887718037e-05, "loss": 0.0926, "step": 20398 }, { "epoch": 0.36125684584290757, "grad_norm": 0.7119149565696716, "learning_rate": 2.216400900289517e-05, "loss": 0.1014, "step": 20399 }, { "epoch": 0.36127455537993597, "grad_norm": 0.5089550614356995, "learning_rate": 2.2163253094503435e-05, "loss": 0.0858, "step": 20400 }, { "epoch": 0.3612922649169644, "grad_norm": 0.7100344300270081, "learning_rate": 2.2162497162545326e-05, "loss": 0.0965, "step": 20401 }, { "epoch": 0.3613099744539928, "grad_norm": 1.2617435455322266, "learning_rate": 2.2161741207023323e-05, "loss": 0.1329, "step": 20402 }, { "epoch": 0.3613276839910213, "grad_norm": 0.9338803887367249, "learning_rate": 2.2160985227939916e-05, "loss": 0.0629, "step": 20403 }, { "epoch": 0.3613453935280497, "grad_norm": 0.631824254989624, "learning_rate": 2.2160229225297595e-05, "loss": 0.1046, "step": 20404 }, { "epoch": 0.36136310306507813, "grad_norm": 0.7755141258239746, "learning_rate": 2.215947319909884e-05, "loss": 0.0692, "step": 20405 }, { "epoch": 0.3613808126021065, "grad_norm": 0.8288351893424988, "learning_rate": 2.2158717149346144e-05, "loss": 0.0939, "step": 20406 }, { "epoch": 0.361398522139135, "grad_norm": 0.9806278347969055, "learning_rate": 2.215796107604199e-05, "loss": 0.0968, "step": 20407 }, { "epoch": 0.3614162316761634, "grad_norm": 0.35341909527778625, "learning_rate": 2.2157204979188874e-05, "loss": 0.0662, "step": 20408 }, { "epoch": 0.36143394121319183, "grad_norm": 0.8056822419166565, "learning_rate": 2.215644885878927e-05, "loss": 0.073, "step": 20409 }, { "epoch": 0.3614516507502203, "grad_norm": 0.6870368719100952, "learning_rate": 2.215569271484568e-05, "loss": 0.1035, "step": 20410 }, { "epoch": 0.3614693602872487, "grad_norm": 0.4976816773414612, "learning_rate": 2.2154936547360584e-05, "loss": 0.0802, "step": 20411 }, { "epoch": 0.36148706982427714, "grad_norm": 0.7993669509887695, "learning_rate": 2.2154180356336465e-05, "loss": 0.0875, "step": 20412 }, { "epoch": 0.36150477936130554, "grad_norm": 0.8193355202674866, "learning_rate": 2.215342414177582e-05, "loss": 0.0645, "step": 20413 }, { "epoch": 0.361522488898334, "grad_norm": 0.7716719508171082, "learning_rate": 2.2152667903681134e-05, "loss": 0.0838, "step": 20414 }, { "epoch": 0.3615401984353624, "grad_norm": 0.6510171294212341, "learning_rate": 2.215191164205489e-05, "loss": 0.0849, "step": 20415 }, { "epoch": 0.36155790797239085, "grad_norm": 0.7054084539413452, "learning_rate": 2.215115535689958e-05, "loss": 0.0829, "step": 20416 }, { "epoch": 0.36157561750941924, "grad_norm": 0.6746551990509033, "learning_rate": 2.2150399048217697e-05, "loss": 0.079, "step": 20417 }, { "epoch": 0.3615933270464477, "grad_norm": 0.7989205121994019, "learning_rate": 2.2149642716011723e-05, "loss": 0.0964, "step": 20418 }, { "epoch": 0.3616110365834761, "grad_norm": 0.8510545492172241, "learning_rate": 2.214888636028415e-05, "loss": 0.0759, "step": 20419 }, { "epoch": 0.36162874612050455, "grad_norm": 0.6555280089378357, "learning_rate": 2.214812998103746e-05, "loss": 0.0639, "step": 20420 }, { "epoch": 0.36164645565753295, "grad_norm": 0.7132968902587891, "learning_rate": 2.214737357827414e-05, "loss": 0.1024, "step": 20421 }, { "epoch": 0.3616641651945614, "grad_norm": 1.027931571006775, "learning_rate": 2.2146617151996693e-05, "loss": 0.0869, "step": 20422 }, { "epoch": 0.3616818747315898, "grad_norm": 0.860202968120575, "learning_rate": 2.2145860702207594e-05, "loss": 0.0727, "step": 20423 }, { "epoch": 0.36169958426861826, "grad_norm": 0.7914077043533325, "learning_rate": 2.2145104228909343e-05, "loss": 0.1102, "step": 20424 }, { "epoch": 0.3617172938056467, "grad_norm": 0.796532928943634, "learning_rate": 2.2144347732104413e-05, "loss": 0.0637, "step": 20425 }, { "epoch": 0.3617350033426751, "grad_norm": 0.5528208613395691, "learning_rate": 2.2143591211795304e-05, "loss": 0.092, "step": 20426 }, { "epoch": 0.36175271287970356, "grad_norm": 0.5276424884796143, "learning_rate": 2.2142834667984504e-05, "loss": 0.1048, "step": 20427 }, { "epoch": 0.36177042241673196, "grad_norm": 0.9725465178489685, "learning_rate": 2.21420781006745e-05, "loss": 0.0633, "step": 20428 }, { "epoch": 0.3617881319537604, "grad_norm": 0.9996998310089111, "learning_rate": 2.214132150986778e-05, "loss": 0.1209, "step": 20429 }, { "epoch": 0.3618058414907888, "grad_norm": 0.7451820373535156, "learning_rate": 2.214056489556684e-05, "loss": 0.1181, "step": 20430 }, { "epoch": 0.36182355102781727, "grad_norm": 0.7525271773338318, "learning_rate": 2.2139808257774155e-05, "loss": 0.0722, "step": 20431 }, { "epoch": 0.36184126056484567, "grad_norm": 0.7061256766319275, "learning_rate": 2.2139051596492225e-05, "loss": 0.0812, "step": 20432 }, { "epoch": 0.3618589701018741, "grad_norm": 0.8248368501663208, "learning_rate": 2.213829491172354e-05, "loss": 0.094, "step": 20433 }, { "epoch": 0.3618766796389025, "grad_norm": 0.4623115360736847, "learning_rate": 2.2137538203470592e-05, "loss": 0.0656, "step": 20434 }, { "epoch": 0.361894389175931, "grad_norm": 0.7648775577545166, "learning_rate": 2.2136781471735856e-05, "loss": 0.0726, "step": 20435 }, { "epoch": 0.3619120987129594, "grad_norm": 0.6087368130683899, "learning_rate": 2.2136024716521838e-05, "loss": 0.0803, "step": 20436 }, { "epoch": 0.3619298082499878, "grad_norm": 0.7784574031829834, "learning_rate": 2.2135267937831014e-05, "loss": 0.0829, "step": 20437 }, { "epoch": 0.3619475177870162, "grad_norm": 0.5183783769607544, "learning_rate": 2.2134511135665882e-05, "loss": 0.0464, "step": 20438 }, { "epoch": 0.3619652273240447, "grad_norm": 0.5467854142189026, "learning_rate": 2.213375431002893e-05, "loss": 0.0638, "step": 20439 }, { "epoch": 0.36198293686107313, "grad_norm": 0.8593279123306274, "learning_rate": 2.213299746092265e-05, "loss": 0.0944, "step": 20440 }, { "epoch": 0.36200064639810153, "grad_norm": 0.8867830038070679, "learning_rate": 2.2132240588349526e-05, "loss": 0.093, "step": 20441 }, { "epoch": 0.36201835593513, "grad_norm": 0.7676078081130981, "learning_rate": 2.213148369231206e-05, "loss": 0.1506, "step": 20442 }, { "epoch": 0.3620360654721584, "grad_norm": 0.7820625305175781, "learning_rate": 2.2130726772812725e-05, "loss": 0.0667, "step": 20443 }, { "epoch": 0.36205377500918684, "grad_norm": 0.9999024271965027, "learning_rate": 2.2129969829854026e-05, "loss": 0.1176, "step": 20444 }, { "epoch": 0.36207148454621524, "grad_norm": 0.6400394439697266, "learning_rate": 2.2129212863438445e-05, "loss": 0.085, "step": 20445 }, { "epoch": 0.3620891940832437, "grad_norm": 0.6020144820213318, "learning_rate": 2.212845587356847e-05, "loss": 0.0989, "step": 20446 }, { "epoch": 0.3621069036202721, "grad_norm": 0.9739484786987305, "learning_rate": 2.2127698860246603e-05, "loss": 0.076, "step": 20447 }, { "epoch": 0.36212461315730055, "grad_norm": 1.0509648323059082, "learning_rate": 2.2126941823475323e-05, "loss": 0.0976, "step": 20448 }, { "epoch": 0.36214232269432894, "grad_norm": 0.6469429135322571, "learning_rate": 2.2126184763257128e-05, "loss": 0.1131, "step": 20449 }, { "epoch": 0.3621600322313574, "grad_norm": 0.8626314997673035, "learning_rate": 2.2125427679594507e-05, "loss": 0.1325, "step": 20450 }, { "epoch": 0.3621777417683858, "grad_norm": 0.610164225101471, "learning_rate": 2.212467057248995e-05, "loss": 0.0712, "step": 20451 }, { "epoch": 0.36219545130541425, "grad_norm": 1.4264891147613525, "learning_rate": 2.2123913441945948e-05, "loss": 0.0918, "step": 20452 }, { "epoch": 0.36221316084244265, "grad_norm": 0.3212711811065674, "learning_rate": 2.2123156287964986e-05, "loss": 0.0735, "step": 20453 }, { "epoch": 0.3622308703794711, "grad_norm": 1.0368369817733765, "learning_rate": 2.2122399110549564e-05, "loss": 0.0969, "step": 20454 }, { "epoch": 0.36224857991649956, "grad_norm": 1.4494855403900146, "learning_rate": 2.2121641909702168e-05, "loss": 0.1137, "step": 20455 }, { "epoch": 0.36226628945352796, "grad_norm": 0.47992897033691406, "learning_rate": 2.2120884685425295e-05, "loss": 0.1009, "step": 20456 }, { "epoch": 0.3622839989905564, "grad_norm": 0.3376636207103729, "learning_rate": 2.2120127437721425e-05, "loss": 0.0553, "step": 20457 }, { "epoch": 0.3623017085275848, "grad_norm": 0.6271530985832214, "learning_rate": 2.211937016659306e-05, "loss": 0.0816, "step": 20458 }, { "epoch": 0.36231941806461326, "grad_norm": 0.8465663194656372, "learning_rate": 2.2118612872042684e-05, "loss": 0.1035, "step": 20459 }, { "epoch": 0.36233712760164166, "grad_norm": 0.7493521571159363, "learning_rate": 2.2117855554072793e-05, "loss": 0.0887, "step": 20460 }, { "epoch": 0.3623548371386701, "grad_norm": 0.7287071347236633, "learning_rate": 2.211709821268588e-05, "loss": 0.144, "step": 20461 }, { "epoch": 0.3623725466756985, "grad_norm": 0.9995431900024414, "learning_rate": 2.211634084788443e-05, "loss": 0.0759, "step": 20462 }, { "epoch": 0.36239025621272697, "grad_norm": 0.8920299410820007, "learning_rate": 2.211558345967094e-05, "loss": 0.1335, "step": 20463 }, { "epoch": 0.36240796574975537, "grad_norm": 1.2674540281295776, "learning_rate": 2.2114826048047898e-05, "loss": 0.1463, "step": 20464 }, { "epoch": 0.3624256752867838, "grad_norm": 0.6577025651931763, "learning_rate": 2.21140686130178e-05, "loss": 0.0722, "step": 20465 }, { "epoch": 0.3624433848238122, "grad_norm": 0.8905543088912964, "learning_rate": 2.2113311154583134e-05, "loss": 0.088, "step": 20466 }, { "epoch": 0.3624610943608407, "grad_norm": 0.5211727023124695, "learning_rate": 2.2112553672746398e-05, "loss": 0.084, "step": 20467 }, { "epoch": 0.3624788038978691, "grad_norm": 2.517427921295166, "learning_rate": 2.2111796167510076e-05, "loss": 0.0724, "step": 20468 }, { "epoch": 0.3624965134348975, "grad_norm": 0.6038795709609985, "learning_rate": 2.2111038638876663e-05, "loss": 0.1141, "step": 20469 }, { "epoch": 0.362514222971926, "grad_norm": 1.118303656578064, "learning_rate": 2.211028108684866e-05, "loss": 0.0758, "step": 20470 }, { "epoch": 0.3625319325089544, "grad_norm": 0.9407668709754944, "learning_rate": 2.210952351142854e-05, "loss": 0.0715, "step": 20471 }, { "epoch": 0.36254964204598283, "grad_norm": 0.6206469535827637, "learning_rate": 2.210876591261882e-05, "loss": 0.0962, "step": 20472 }, { "epoch": 0.36256735158301123, "grad_norm": 1.1792144775390625, "learning_rate": 2.2108008290421967e-05, "loss": 0.0973, "step": 20473 }, { "epoch": 0.3625850611200397, "grad_norm": 0.7178950309753418, "learning_rate": 2.210725064484049e-05, "loss": 0.096, "step": 20474 }, { "epoch": 0.3626027706570681, "grad_norm": 0.7660735249519348, "learning_rate": 2.2106492975876878e-05, "loss": 0.0838, "step": 20475 }, { "epoch": 0.36262048019409654, "grad_norm": 0.4065074026584625, "learning_rate": 2.210573528353362e-05, "loss": 0.0925, "step": 20476 }, { "epoch": 0.36263818973112494, "grad_norm": 1.1814985275268555, "learning_rate": 2.2104977567813215e-05, "loss": 0.106, "step": 20477 }, { "epoch": 0.3626558992681534, "grad_norm": 0.8171623945236206, "learning_rate": 2.210421982871815e-05, "loss": 0.1154, "step": 20478 }, { "epoch": 0.3626736088051818, "grad_norm": 0.44106602668762207, "learning_rate": 2.210346206625092e-05, "loss": 0.0527, "step": 20479 }, { "epoch": 0.36269131834221024, "grad_norm": 0.7179314494132996, "learning_rate": 2.2102704280414023e-05, "loss": 0.0864, "step": 20480 }, { "epoch": 0.36270902787923864, "grad_norm": 0.9965715408325195, "learning_rate": 2.2101946471209943e-05, "loss": 0.0884, "step": 20481 }, { "epoch": 0.3627267374162671, "grad_norm": 0.6473599672317505, "learning_rate": 2.210118863864118e-05, "loss": 0.0598, "step": 20482 }, { "epoch": 0.3627444469532955, "grad_norm": 0.7941656112670898, "learning_rate": 2.2100430782710222e-05, "loss": 0.1236, "step": 20483 }, { "epoch": 0.36276215649032395, "grad_norm": 1.053956151008606, "learning_rate": 2.209967290341957e-05, "loss": 0.091, "step": 20484 }, { "epoch": 0.3627798660273524, "grad_norm": 0.8772370219230652, "learning_rate": 2.2098915000771705e-05, "loss": 0.0964, "step": 20485 }, { "epoch": 0.3627975755643808, "grad_norm": 1.138700008392334, "learning_rate": 2.2098157074769133e-05, "loss": 0.1151, "step": 20486 }, { "epoch": 0.36281528510140926, "grad_norm": 0.893129825592041, "learning_rate": 2.209739912541434e-05, "loss": 0.0968, "step": 20487 }, { "epoch": 0.36283299463843766, "grad_norm": 0.7793967127799988, "learning_rate": 2.2096641152709822e-05, "loss": 0.0713, "step": 20488 }, { "epoch": 0.3628507041754661, "grad_norm": 0.7101898193359375, "learning_rate": 2.209588315665807e-05, "loss": 0.0816, "step": 20489 }, { "epoch": 0.3628684137124945, "grad_norm": 0.8223959803581238, "learning_rate": 2.2095125137261584e-05, "loss": 0.0816, "step": 20490 }, { "epoch": 0.36288612324952296, "grad_norm": 0.9509740471839905, "learning_rate": 2.2094367094522856e-05, "loss": 0.086, "step": 20491 }, { "epoch": 0.36290383278655136, "grad_norm": 0.638199508190155, "learning_rate": 2.2093609028444372e-05, "loss": 0.0809, "step": 20492 }, { "epoch": 0.3629215423235798, "grad_norm": 1.0619264841079712, "learning_rate": 2.2092850939028633e-05, "loss": 0.0804, "step": 20493 }, { "epoch": 0.3629392518606082, "grad_norm": 0.5158715844154358, "learning_rate": 2.209209282627813e-05, "loss": 0.0888, "step": 20494 }, { "epoch": 0.36295696139763667, "grad_norm": 0.5642863512039185, "learning_rate": 2.2091334690195364e-05, "loss": 0.0794, "step": 20495 }, { "epoch": 0.36297467093466507, "grad_norm": 0.6016080975532532, "learning_rate": 2.209057653078282e-05, "loss": 0.0663, "step": 20496 }, { "epoch": 0.3629923804716935, "grad_norm": 0.694192111492157, "learning_rate": 2.2089818348042998e-05, "loss": 0.1083, "step": 20497 }, { "epoch": 0.3630100900087219, "grad_norm": 0.9433215260505676, "learning_rate": 2.2089060141978394e-05, "loss": 0.0841, "step": 20498 }, { "epoch": 0.3630277995457504, "grad_norm": 0.7216913104057312, "learning_rate": 2.2088301912591494e-05, "loss": 0.0866, "step": 20499 }, { "epoch": 0.36304550908277883, "grad_norm": 0.7811867594718933, "learning_rate": 2.2087543659884805e-05, "loss": 0.0613, "step": 20500 }, { "epoch": 0.3630632186198072, "grad_norm": 0.6637930870056152, "learning_rate": 2.2086785383860806e-05, "loss": 0.0922, "step": 20501 }, { "epoch": 0.3630809281568357, "grad_norm": 0.7799822092056274, "learning_rate": 2.2086027084522e-05, "loss": 0.0783, "step": 20502 }, { "epoch": 0.3630986376938641, "grad_norm": 0.6227748394012451, "learning_rate": 2.2085268761870885e-05, "loss": 0.0698, "step": 20503 }, { "epoch": 0.36311634723089253, "grad_norm": 0.6699866056442261, "learning_rate": 2.208451041590995e-05, "loss": 0.1078, "step": 20504 }, { "epoch": 0.36313405676792093, "grad_norm": 0.6516090631484985, "learning_rate": 2.2083752046641695e-05, "loss": 0.0782, "step": 20505 }, { "epoch": 0.3631517663049494, "grad_norm": 0.9504691958427429, "learning_rate": 2.2082993654068612e-05, "loss": 0.0561, "step": 20506 }, { "epoch": 0.3631694758419778, "grad_norm": 0.7840246558189392, "learning_rate": 2.20822352381932e-05, "loss": 0.0749, "step": 20507 }, { "epoch": 0.36318718537900624, "grad_norm": 0.7502316832542419, "learning_rate": 2.2081476799017942e-05, "loss": 0.0624, "step": 20508 }, { "epoch": 0.36320489491603464, "grad_norm": 0.6015203595161438, "learning_rate": 2.2080718336545346e-05, "loss": 0.0869, "step": 20509 }, { "epoch": 0.3632226044530631, "grad_norm": 0.4173950254917145, "learning_rate": 2.2079959850777903e-05, "loss": 0.0683, "step": 20510 }, { "epoch": 0.3632403139900915, "grad_norm": 0.7874374389648438, "learning_rate": 2.2079201341718105e-05, "loss": 0.0922, "step": 20511 }, { "epoch": 0.36325802352711994, "grad_norm": 0.9190818667411804, "learning_rate": 2.2078442809368452e-05, "loss": 0.1083, "step": 20512 }, { "epoch": 0.36327573306414834, "grad_norm": 0.6957783102989197, "learning_rate": 2.2077684253731443e-05, "loss": 0.0856, "step": 20513 }, { "epoch": 0.3632934426011768, "grad_norm": 0.8500660061836243, "learning_rate": 2.2076925674809564e-05, "loss": 0.1133, "step": 20514 }, { "epoch": 0.36331115213820525, "grad_norm": 0.6385482549667358, "learning_rate": 2.2076167072605316e-05, "loss": 0.0647, "step": 20515 }, { "epoch": 0.36332886167523365, "grad_norm": 0.48519620299339294, "learning_rate": 2.2075408447121195e-05, "loss": 0.0673, "step": 20516 }, { "epoch": 0.3633465712122621, "grad_norm": 0.9573991298675537, "learning_rate": 2.2074649798359694e-05, "loss": 0.0983, "step": 20517 }, { "epoch": 0.3633642807492905, "grad_norm": 0.8324665427207947, "learning_rate": 2.2073891126323313e-05, "loss": 0.0707, "step": 20518 }, { "epoch": 0.36338199028631896, "grad_norm": 0.8054936528205872, "learning_rate": 2.2073132431014546e-05, "loss": 0.1039, "step": 20519 }, { "epoch": 0.36339969982334736, "grad_norm": 0.8379967212677002, "learning_rate": 2.2072373712435887e-05, "loss": 0.081, "step": 20520 }, { "epoch": 0.3634174093603758, "grad_norm": 0.8905547261238098, "learning_rate": 2.2071614970589832e-05, "loss": 0.0891, "step": 20521 }, { "epoch": 0.3634351188974042, "grad_norm": 0.9394789338111877, "learning_rate": 2.2070856205478885e-05, "loss": 0.0813, "step": 20522 }, { "epoch": 0.36345282843443266, "grad_norm": 0.7898290157318115, "learning_rate": 2.207009741710553e-05, "loss": 0.0761, "step": 20523 }, { "epoch": 0.36347053797146106, "grad_norm": 0.7135241031646729, "learning_rate": 2.2069338605472272e-05, "loss": 0.1105, "step": 20524 }, { "epoch": 0.3634882475084895, "grad_norm": 0.5765112638473511, "learning_rate": 2.2068579770581607e-05, "loss": 0.1142, "step": 20525 }, { "epoch": 0.3635059570455179, "grad_norm": 0.6143401265144348, "learning_rate": 2.2067820912436024e-05, "loss": 0.1155, "step": 20526 }, { "epoch": 0.36352366658254637, "grad_norm": 0.7453172206878662, "learning_rate": 2.2067062031038032e-05, "loss": 0.077, "step": 20527 }, { "epoch": 0.3635413761195748, "grad_norm": 0.7851638197898865, "learning_rate": 2.2066303126390113e-05, "loss": 0.1015, "step": 20528 }, { "epoch": 0.3635590856566032, "grad_norm": 0.7152829766273499, "learning_rate": 2.206554419849478e-05, "loss": 0.0893, "step": 20529 }, { "epoch": 0.3635767951936317, "grad_norm": 0.6894380450248718, "learning_rate": 2.2064785247354517e-05, "loss": 0.0905, "step": 20530 }, { "epoch": 0.3635945047306601, "grad_norm": 0.6163634061813354, "learning_rate": 2.206402627297182e-05, "loss": 0.0537, "step": 20531 }, { "epoch": 0.3636122142676885, "grad_norm": 0.6898707747459412, "learning_rate": 2.20632672753492e-05, "loss": 0.0808, "step": 20532 }, { "epoch": 0.3636299238047169, "grad_norm": 0.7006522417068481, "learning_rate": 2.206250825448914e-05, "loss": 0.0817, "step": 20533 }, { "epoch": 0.3636476333417454, "grad_norm": 0.6118045449256897, "learning_rate": 2.2061749210394148e-05, "loss": 0.1034, "step": 20534 }, { "epoch": 0.3636653428787738, "grad_norm": 0.7508500814437866, "learning_rate": 2.2060990143066704e-05, "loss": 0.1156, "step": 20535 }, { "epoch": 0.36368305241580223, "grad_norm": 0.7804729342460632, "learning_rate": 2.2060231052509325e-05, "loss": 0.0686, "step": 20536 }, { "epoch": 0.36370076195283063, "grad_norm": 1.0008331537246704, "learning_rate": 2.20594719387245e-05, "loss": 0.0762, "step": 20537 }, { "epoch": 0.3637184714898591, "grad_norm": 0.9251857995986938, "learning_rate": 2.2058712801714722e-05, "loss": 0.0914, "step": 20538 }, { "epoch": 0.3637361810268875, "grad_norm": 0.699527382850647, "learning_rate": 2.20579536414825e-05, "loss": 0.0509, "step": 20539 }, { "epoch": 0.36375389056391594, "grad_norm": 0.5309996604919434, "learning_rate": 2.2057194458030318e-05, "loss": 0.0711, "step": 20540 }, { "epoch": 0.36377160010094434, "grad_norm": 0.658140242099762, "learning_rate": 2.205643525136068e-05, "loss": 0.0588, "step": 20541 }, { "epoch": 0.3637893096379728, "grad_norm": 1.1000763177871704, "learning_rate": 2.2055676021476084e-05, "loss": 0.1157, "step": 20542 }, { "epoch": 0.36380701917500124, "grad_norm": 0.6937608122825623, "learning_rate": 2.205491676837903e-05, "loss": 0.1117, "step": 20543 }, { "epoch": 0.36382472871202964, "grad_norm": 0.9747910499572754, "learning_rate": 2.2054157492072012e-05, "loss": 0.0642, "step": 20544 }, { "epoch": 0.3638424382490581, "grad_norm": 0.46129605174064636, "learning_rate": 2.205339819255753e-05, "loss": 0.0629, "step": 20545 }, { "epoch": 0.3638601477860865, "grad_norm": 0.8663727641105652, "learning_rate": 2.2052638869838083e-05, "loss": 0.0803, "step": 20546 }, { "epoch": 0.36387785732311495, "grad_norm": 0.5640580654144287, "learning_rate": 2.2051879523916164e-05, "loss": 0.0724, "step": 20547 }, { "epoch": 0.36389556686014335, "grad_norm": 0.7049669623374939, "learning_rate": 2.2051120154794276e-05, "loss": 0.0684, "step": 20548 }, { "epoch": 0.3639132763971718, "grad_norm": 0.8947818279266357, "learning_rate": 2.2050360762474917e-05, "loss": 0.1052, "step": 20549 }, { "epoch": 0.3639309859342002, "grad_norm": 0.41823121905326843, "learning_rate": 2.2049601346960585e-05, "loss": 0.0695, "step": 20550 }, { "epoch": 0.36394869547122866, "grad_norm": 0.8165623545646667, "learning_rate": 2.204884190825377e-05, "loss": 0.0945, "step": 20551 }, { "epoch": 0.36396640500825705, "grad_norm": 0.5256097912788391, "learning_rate": 2.204808244635699e-05, "loss": 0.0899, "step": 20552 }, { "epoch": 0.3639841145452855, "grad_norm": 0.6945488452911377, "learning_rate": 2.2047322961272726e-05, "loss": 0.0827, "step": 20553 }, { "epoch": 0.3640018240823139, "grad_norm": 0.7528715133666992, "learning_rate": 2.204656345300348e-05, "loss": 0.0832, "step": 20554 }, { "epoch": 0.36401953361934236, "grad_norm": 0.5109801888465881, "learning_rate": 2.204580392155175e-05, "loss": 0.0636, "step": 20555 }, { "epoch": 0.36403724315637076, "grad_norm": 0.97170490026474, "learning_rate": 2.2045044366920046e-05, "loss": 0.073, "step": 20556 }, { "epoch": 0.3640549526933992, "grad_norm": 0.6788581013679504, "learning_rate": 2.2044284789110857e-05, "loss": 0.0701, "step": 20557 }, { "epoch": 0.36407266223042767, "grad_norm": 0.8887023329734802, "learning_rate": 2.204352518812668e-05, "loss": 0.0949, "step": 20558 }, { "epoch": 0.36409037176745607, "grad_norm": 0.7865967750549316, "learning_rate": 2.204276556397002e-05, "loss": 0.0685, "step": 20559 }, { "epoch": 0.3641080813044845, "grad_norm": 1.5169240236282349, "learning_rate": 2.204200591664337e-05, "loss": 0.1091, "step": 20560 }, { "epoch": 0.3641257908415129, "grad_norm": 0.7332507967948914, "learning_rate": 2.204124624614924e-05, "loss": 0.0797, "step": 20561 }, { "epoch": 0.3641435003785414, "grad_norm": 0.8636038899421692, "learning_rate": 2.2040486552490117e-05, "loss": 0.1046, "step": 20562 }, { "epoch": 0.3641612099155698, "grad_norm": 0.9824678897857666, "learning_rate": 2.2039726835668506e-05, "loss": 0.0682, "step": 20563 }, { "epoch": 0.3641789194525982, "grad_norm": 0.6286963224411011, "learning_rate": 2.2038967095686903e-05, "loss": 0.0682, "step": 20564 }, { "epoch": 0.3641966289896266, "grad_norm": 0.4997776448726654, "learning_rate": 2.2038207332547813e-05, "loss": 0.0584, "step": 20565 }, { "epoch": 0.3642143385266551, "grad_norm": 0.9262570142745972, "learning_rate": 2.2037447546253735e-05, "loss": 0.0916, "step": 20566 }, { "epoch": 0.3642320480636835, "grad_norm": 0.597027063369751, "learning_rate": 2.203668773680716e-05, "loss": 0.0913, "step": 20567 }, { "epoch": 0.36424975760071193, "grad_norm": 0.556550145149231, "learning_rate": 2.2035927904210603e-05, "loss": 0.0703, "step": 20568 }, { "epoch": 0.36426746713774033, "grad_norm": 0.898347020149231, "learning_rate": 2.203516804846655e-05, "loss": 0.0872, "step": 20569 }, { "epoch": 0.3642851766747688, "grad_norm": 0.6676909327507019, "learning_rate": 2.2034408169577503e-05, "loss": 0.0587, "step": 20570 }, { "epoch": 0.3643028862117972, "grad_norm": 0.899039626121521, "learning_rate": 2.2033648267545968e-05, "loss": 0.067, "step": 20571 }, { "epoch": 0.36432059574882564, "grad_norm": 1.2471446990966797, "learning_rate": 2.203288834237444e-05, "loss": 0.0941, "step": 20572 }, { "epoch": 0.3643383052858541, "grad_norm": 0.6738815307617188, "learning_rate": 2.2032128394065425e-05, "loss": 0.0992, "step": 20573 }, { "epoch": 0.3643560148228825, "grad_norm": 0.8146885633468628, "learning_rate": 2.203136842262141e-05, "loss": 0.0778, "step": 20574 }, { "epoch": 0.36437372435991094, "grad_norm": 0.4328047037124634, "learning_rate": 2.2030608428044912e-05, "loss": 0.0773, "step": 20575 }, { "epoch": 0.36439143389693934, "grad_norm": 0.6427839398384094, "learning_rate": 2.2029848410338418e-05, "loss": 0.0646, "step": 20576 }, { "epoch": 0.3644091434339678, "grad_norm": 0.7447568774223328, "learning_rate": 2.202908836950444e-05, "loss": 0.0884, "step": 20577 }, { "epoch": 0.3644268529709962, "grad_norm": 0.5942512154579163, "learning_rate": 2.202832830554547e-05, "loss": 0.0899, "step": 20578 }, { "epoch": 0.36444456250802465, "grad_norm": 0.46956369280815125, "learning_rate": 2.202756821846401e-05, "loss": 0.1251, "step": 20579 }, { "epoch": 0.36446227204505305, "grad_norm": 0.9433608651161194, "learning_rate": 2.202680810826256e-05, "loss": 0.0791, "step": 20580 }, { "epoch": 0.3644799815820815, "grad_norm": 0.8338597416877747, "learning_rate": 2.202604797494362e-05, "loss": 0.1107, "step": 20581 }, { "epoch": 0.3644976911191099, "grad_norm": 1.094089150428772, "learning_rate": 2.2025287818509697e-05, "loss": 0.1304, "step": 20582 }, { "epoch": 0.36451540065613836, "grad_norm": 0.7233787178993225, "learning_rate": 2.2024527638963288e-05, "loss": 0.0929, "step": 20583 }, { "epoch": 0.36453311019316675, "grad_norm": 0.7119807004928589, "learning_rate": 2.202376743630689e-05, "loss": 0.0831, "step": 20584 }, { "epoch": 0.3645508197301952, "grad_norm": 0.7814031839370728, "learning_rate": 2.202300721054301e-05, "loss": 0.0692, "step": 20585 }, { "epoch": 0.3645685292672236, "grad_norm": 1.0618139505386353, "learning_rate": 2.2022246961674147e-05, "loss": 0.0602, "step": 20586 }, { "epoch": 0.36458623880425206, "grad_norm": 0.8188583850860596, "learning_rate": 2.2021486689702802e-05, "loss": 0.0803, "step": 20587 }, { "epoch": 0.3646039483412805, "grad_norm": 0.9632185101509094, "learning_rate": 2.2020726394631473e-05, "loss": 0.1186, "step": 20588 }, { "epoch": 0.3646216578783089, "grad_norm": 0.7012246251106262, "learning_rate": 2.2019966076462667e-05, "loss": 0.0697, "step": 20589 }, { "epoch": 0.36463936741533737, "grad_norm": 0.796058177947998, "learning_rate": 2.2019205735198877e-05, "loss": 0.0958, "step": 20590 }, { "epoch": 0.36465707695236577, "grad_norm": 0.3582543730735779, "learning_rate": 2.201844537084262e-05, "loss": 0.09, "step": 20591 }, { "epoch": 0.3646747864893942, "grad_norm": 0.6226882338523865, "learning_rate": 2.2017684983396382e-05, "loss": 0.0644, "step": 20592 }, { "epoch": 0.3646924960264226, "grad_norm": 0.6933177709579468, "learning_rate": 2.2016924572862668e-05, "loss": 0.0753, "step": 20593 }, { "epoch": 0.3647102055634511, "grad_norm": 1.097679853439331, "learning_rate": 2.2016164139243986e-05, "loss": 0.0773, "step": 20594 }, { "epoch": 0.36472791510047947, "grad_norm": 0.6553634405136108, "learning_rate": 2.201540368254283e-05, "loss": 0.0769, "step": 20595 }, { "epoch": 0.3647456246375079, "grad_norm": 0.4722885191440582, "learning_rate": 2.201464320276171e-05, "loss": 0.0723, "step": 20596 }, { "epoch": 0.3647633341745363, "grad_norm": 0.6957443952560425, "learning_rate": 2.201388269990312e-05, "loss": 0.0857, "step": 20597 }, { "epoch": 0.3647810437115648, "grad_norm": 0.3464624285697937, "learning_rate": 2.2013122173969564e-05, "loss": 0.0635, "step": 20598 }, { "epoch": 0.3647987532485932, "grad_norm": 0.6909922957420349, "learning_rate": 2.2012361624963547e-05, "loss": 0.0727, "step": 20599 }, { "epoch": 0.36481646278562163, "grad_norm": 0.8027104139328003, "learning_rate": 2.2011601052887572e-05, "loss": 0.1266, "step": 20600 }, { "epoch": 0.36483417232265003, "grad_norm": 0.624283492565155, "learning_rate": 2.2010840457744142e-05, "loss": 0.0714, "step": 20601 }, { "epoch": 0.3648518818596785, "grad_norm": 1.0753620862960815, "learning_rate": 2.2010079839535747e-05, "loss": 0.0726, "step": 20602 }, { "epoch": 0.36486959139670694, "grad_norm": 0.5744055509567261, "learning_rate": 2.2009319198264902e-05, "loss": 0.1252, "step": 20603 }, { "epoch": 0.36488730093373534, "grad_norm": 0.347445547580719, "learning_rate": 2.2008558533934106e-05, "loss": 0.0766, "step": 20604 }, { "epoch": 0.3649050104707638, "grad_norm": 0.846172571182251, "learning_rate": 2.200779784654586e-05, "loss": 0.0847, "step": 20605 }, { "epoch": 0.3649227200077922, "grad_norm": 0.6508521437644958, "learning_rate": 2.2007037136102673e-05, "loss": 0.0813, "step": 20606 }, { "epoch": 0.36494042954482064, "grad_norm": 0.6685647964477539, "learning_rate": 2.2006276402607032e-05, "loss": 0.1246, "step": 20607 }, { "epoch": 0.36495813908184904, "grad_norm": 0.8117619156837463, "learning_rate": 2.200551564606146e-05, "loss": 0.128, "step": 20608 }, { "epoch": 0.3649758486188775, "grad_norm": 0.48198065161705017, "learning_rate": 2.2004754866468448e-05, "loss": 0.0747, "step": 20609 }, { "epoch": 0.3649935581559059, "grad_norm": 0.7308517694473267, "learning_rate": 2.2003994063830504e-05, "loss": 0.0977, "step": 20610 }, { "epoch": 0.36501126769293435, "grad_norm": 0.7707126140594482, "learning_rate": 2.2003233238150123e-05, "loss": 0.0642, "step": 20611 }, { "epoch": 0.36502897722996275, "grad_norm": 1.0045236349105835, "learning_rate": 2.200247238942981e-05, "loss": 0.0815, "step": 20612 }, { "epoch": 0.3650466867669912, "grad_norm": 0.5568336248397827, "learning_rate": 2.2001711517672077e-05, "loss": 0.1053, "step": 20613 }, { "epoch": 0.3650643963040196, "grad_norm": 0.8647719621658325, "learning_rate": 2.200095062287942e-05, "loss": 0.0655, "step": 20614 }, { "epoch": 0.36508210584104805, "grad_norm": 0.5149389505386353, "learning_rate": 2.200018970505434e-05, "loss": 0.0805, "step": 20615 }, { "epoch": 0.36509981537807645, "grad_norm": 1.0927773714065552, "learning_rate": 2.1999428764199347e-05, "loss": 0.0999, "step": 20616 }, { "epoch": 0.3651175249151049, "grad_norm": 0.5340209007263184, "learning_rate": 2.1998667800316942e-05, "loss": 0.1023, "step": 20617 }, { "epoch": 0.36513523445213336, "grad_norm": 0.6714805960655212, "learning_rate": 2.1997906813409623e-05, "loss": 0.0953, "step": 20618 }, { "epoch": 0.36515294398916176, "grad_norm": 0.7628957629203796, "learning_rate": 2.1997145803479905e-05, "loss": 0.0747, "step": 20619 }, { "epoch": 0.3651706535261902, "grad_norm": 0.5699309706687927, "learning_rate": 2.1996384770530285e-05, "loss": 0.0664, "step": 20620 }, { "epoch": 0.3651883630632186, "grad_norm": 0.84215247631073, "learning_rate": 2.199562371456326e-05, "loss": 0.0832, "step": 20621 }, { "epoch": 0.36520607260024707, "grad_norm": 0.883948802947998, "learning_rate": 2.1994862635581348e-05, "loss": 0.0704, "step": 20622 }, { "epoch": 0.36522378213727547, "grad_norm": 0.762067437171936, "learning_rate": 2.1994101533587043e-05, "loss": 0.0851, "step": 20623 }, { "epoch": 0.3652414916743039, "grad_norm": 0.6671761870384216, "learning_rate": 2.199334040858285e-05, "loss": 0.1026, "step": 20624 }, { "epoch": 0.3652592012113323, "grad_norm": 0.5924562811851501, "learning_rate": 2.1992579260571274e-05, "loss": 0.0609, "step": 20625 }, { "epoch": 0.3652769107483608, "grad_norm": 0.8026044964790344, "learning_rate": 2.199181808955482e-05, "loss": 0.0996, "step": 20626 }, { "epoch": 0.36529462028538917, "grad_norm": 0.622039258480072, "learning_rate": 2.199105689553599e-05, "loss": 0.0771, "step": 20627 }, { "epoch": 0.3653123298224176, "grad_norm": 0.7561509013175964, "learning_rate": 2.1990295678517292e-05, "loss": 0.0979, "step": 20628 }, { "epoch": 0.365330039359446, "grad_norm": 0.8177732825279236, "learning_rate": 2.198953443850123e-05, "loss": 0.101, "step": 20629 }, { "epoch": 0.3653477488964745, "grad_norm": 0.6649754643440247, "learning_rate": 2.1988773175490307e-05, "loss": 0.0708, "step": 20630 }, { "epoch": 0.3653654584335029, "grad_norm": 0.7833030223846436, "learning_rate": 2.198801188948702e-05, "loss": 0.1095, "step": 20631 }, { "epoch": 0.36538316797053133, "grad_norm": 0.5585266947746277, "learning_rate": 2.198725058049389e-05, "loss": 0.0739, "step": 20632 }, { "epoch": 0.3654008775075598, "grad_norm": 0.738446831703186, "learning_rate": 2.1986489248513406e-05, "loss": 0.0794, "step": 20633 }, { "epoch": 0.3654185870445882, "grad_norm": 0.5782507061958313, "learning_rate": 2.198572789354808e-05, "loss": 0.0435, "step": 20634 }, { "epoch": 0.36543629658161664, "grad_norm": 0.8072814345359802, "learning_rate": 2.1984966515600418e-05, "loss": 0.0855, "step": 20635 }, { "epoch": 0.36545400611864504, "grad_norm": 0.8472094535827637, "learning_rate": 2.198420511467292e-05, "loss": 0.0886, "step": 20636 }, { "epoch": 0.3654717156556735, "grad_norm": 0.6768605709075928, "learning_rate": 2.1983443690768096e-05, "loss": 0.0918, "step": 20637 }, { "epoch": 0.3654894251927019, "grad_norm": 0.6277567744255066, "learning_rate": 2.198268224388845e-05, "loss": 0.0644, "step": 20638 }, { "epoch": 0.36550713472973034, "grad_norm": 0.6324995756149292, "learning_rate": 2.198192077403648e-05, "loss": 0.0931, "step": 20639 }, { "epoch": 0.36552484426675874, "grad_norm": 0.7625487446784973, "learning_rate": 2.1981159281214698e-05, "loss": 0.0661, "step": 20640 }, { "epoch": 0.3655425538037872, "grad_norm": 0.6252650022506714, "learning_rate": 2.198039776542561e-05, "loss": 0.0607, "step": 20641 }, { "epoch": 0.3655602633408156, "grad_norm": 1.0058834552764893, "learning_rate": 2.1979636226671723e-05, "loss": 0.0546, "step": 20642 }, { "epoch": 0.36557797287784405, "grad_norm": 0.8361390233039856, "learning_rate": 2.1978874664955534e-05, "loss": 0.0907, "step": 20643 }, { "epoch": 0.36559568241487245, "grad_norm": 0.5471735000610352, "learning_rate": 2.1978113080279554e-05, "loss": 0.0654, "step": 20644 }, { "epoch": 0.3656133919519009, "grad_norm": 1.085180640220642, "learning_rate": 2.197735147264629e-05, "loss": 0.0747, "step": 20645 }, { "epoch": 0.3656311014889293, "grad_norm": 0.5830361843109131, "learning_rate": 2.197658984205824e-05, "loss": 0.0405, "step": 20646 }, { "epoch": 0.36564881102595775, "grad_norm": 0.817389726638794, "learning_rate": 2.197582818851792e-05, "loss": 0.1218, "step": 20647 }, { "epoch": 0.3656665205629862, "grad_norm": 1.2718284130096436, "learning_rate": 2.197506651202783e-05, "loss": 0.1195, "step": 20648 }, { "epoch": 0.3656842301000146, "grad_norm": 0.5339906215667725, "learning_rate": 2.197430481259048e-05, "loss": 0.0659, "step": 20649 }, { "epoch": 0.36570193963704306, "grad_norm": 0.5117392539978027, "learning_rate": 2.1973543090208365e-05, "loss": 0.0758, "step": 20650 }, { "epoch": 0.36571964917407146, "grad_norm": 0.8055394291877747, "learning_rate": 2.1972781344884002e-05, "loss": 0.0995, "step": 20651 }, { "epoch": 0.3657373587110999, "grad_norm": 0.4671858847141266, "learning_rate": 2.1972019576619892e-05, "loss": 0.0722, "step": 20652 }, { "epoch": 0.3657550682481283, "grad_norm": 0.8641403317451477, "learning_rate": 2.1971257785418547e-05, "loss": 0.1102, "step": 20653 }, { "epoch": 0.36577277778515677, "grad_norm": 0.6451671123504639, "learning_rate": 2.1970495971282468e-05, "loss": 0.0897, "step": 20654 }, { "epoch": 0.36579048732218516, "grad_norm": 1.0358878374099731, "learning_rate": 2.196973413421416e-05, "loss": 0.0992, "step": 20655 }, { "epoch": 0.3658081968592136, "grad_norm": 0.4731616973876953, "learning_rate": 2.1968972274216135e-05, "loss": 0.0774, "step": 20656 }, { "epoch": 0.365825906396242, "grad_norm": 0.7736790776252747, "learning_rate": 2.196821039129089e-05, "loss": 0.1179, "step": 20657 }, { "epoch": 0.36584361593327047, "grad_norm": 0.8097290396690369, "learning_rate": 2.1967448485440945e-05, "loss": 0.1172, "step": 20658 }, { "epoch": 0.36586132547029887, "grad_norm": 1.2150853872299194, "learning_rate": 2.1966686556668792e-05, "loss": 0.0927, "step": 20659 }, { "epoch": 0.3658790350073273, "grad_norm": 1.0113670825958252, "learning_rate": 2.1965924604976947e-05, "loss": 0.1051, "step": 20660 }, { "epoch": 0.3658967445443557, "grad_norm": 1.112399697303772, "learning_rate": 2.1965162630367917e-05, "loss": 0.0786, "step": 20661 }, { "epoch": 0.3659144540813842, "grad_norm": 0.577256441116333, "learning_rate": 2.1964400632844206e-05, "loss": 0.0834, "step": 20662 }, { "epoch": 0.36593216361841263, "grad_norm": 0.6962343454360962, "learning_rate": 2.1963638612408322e-05, "loss": 0.0713, "step": 20663 }, { "epoch": 0.36594987315544103, "grad_norm": 0.6739094853401184, "learning_rate": 2.196287656906277e-05, "loss": 0.0678, "step": 20664 }, { "epoch": 0.3659675826924695, "grad_norm": 0.8702988624572754, "learning_rate": 2.1962114502810056e-05, "loss": 0.0935, "step": 20665 }, { "epoch": 0.3659852922294979, "grad_norm": 0.7479133009910583, "learning_rate": 2.1961352413652692e-05, "loss": 0.0866, "step": 20666 }, { "epoch": 0.36600300176652634, "grad_norm": 0.7805483937263489, "learning_rate": 2.1960590301593183e-05, "loss": 0.0725, "step": 20667 }, { "epoch": 0.36602071130355474, "grad_norm": 0.5213544964790344, "learning_rate": 2.1959828166634033e-05, "loss": 0.075, "step": 20668 }, { "epoch": 0.3660384208405832, "grad_norm": 0.7332226037979126, "learning_rate": 2.1959066008777757e-05, "loss": 0.0896, "step": 20669 }, { "epoch": 0.3660561303776116, "grad_norm": 0.695850133895874, "learning_rate": 2.195830382802685e-05, "loss": 0.0808, "step": 20670 }, { "epoch": 0.36607383991464004, "grad_norm": 0.9371636509895325, "learning_rate": 2.1957541624383835e-05, "loss": 0.0654, "step": 20671 }, { "epoch": 0.36609154945166844, "grad_norm": 0.5261837840080261, "learning_rate": 2.195677939785121e-05, "loss": 0.083, "step": 20672 }, { "epoch": 0.3661092589886969, "grad_norm": 0.9248366355895996, "learning_rate": 2.1956017148431487e-05, "loss": 0.1347, "step": 20673 }, { "epoch": 0.3661269685257253, "grad_norm": 0.6757625341415405, "learning_rate": 2.1955254876127164e-05, "loss": 0.1004, "step": 20674 }, { "epoch": 0.36614467806275375, "grad_norm": 0.9041299223899841, "learning_rate": 2.195449258094076e-05, "loss": 0.0848, "step": 20675 }, { "epoch": 0.36616238759978215, "grad_norm": 0.7546404004096985, "learning_rate": 2.1953730262874783e-05, "loss": 0.0926, "step": 20676 }, { "epoch": 0.3661800971368106, "grad_norm": 0.72312992811203, "learning_rate": 2.195296792193173e-05, "loss": 0.1024, "step": 20677 }, { "epoch": 0.36619780667383905, "grad_norm": 0.680774986743927, "learning_rate": 2.195220555811412e-05, "loss": 0.1454, "step": 20678 }, { "epoch": 0.36621551621086745, "grad_norm": 0.7874593138694763, "learning_rate": 2.195144317142445e-05, "loss": 0.1036, "step": 20679 }, { "epoch": 0.3662332257478959, "grad_norm": 0.6561384797096252, "learning_rate": 2.1950680761865243e-05, "loss": 0.0565, "step": 20680 }, { "epoch": 0.3662509352849243, "grad_norm": 0.7762807607650757, "learning_rate": 2.1949918329438998e-05, "loss": 0.111, "step": 20681 }, { "epoch": 0.36626864482195276, "grad_norm": 0.6251516342163086, "learning_rate": 2.1949155874148224e-05, "loss": 0.0806, "step": 20682 }, { "epoch": 0.36628635435898116, "grad_norm": 1.3363174200057983, "learning_rate": 2.1948393395995427e-05, "loss": 0.0914, "step": 20683 }, { "epoch": 0.3663040638960096, "grad_norm": 0.6835139393806458, "learning_rate": 2.1947630894983118e-05, "loss": 0.1428, "step": 20684 }, { "epoch": 0.366321773433038, "grad_norm": 1.0344367027282715, "learning_rate": 2.1946868371113812e-05, "loss": 0.0654, "step": 20685 }, { "epoch": 0.36633948297006647, "grad_norm": 1.02046799659729, "learning_rate": 2.1946105824390005e-05, "loss": 0.1025, "step": 20686 }, { "epoch": 0.36635719250709486, "grad_norm": 0.9495967626571655, "learning_rate": 2.194534325481422e-05, "loss": 0.1098, "step": 20687 }, { "epoch": 0.3663749020441233, "grad_norm": 0.8886353373527527, "learning_rate": 2.1944580662388952e-05, "loss": 0.0739, "step": 20688 }, { "epoch": 0.3663926115811517, "grad_norm": 0.664557158946991, "learning_rate": 2.1943818047116717e-05, "loss": 0.0759, "step": 20689 }, { "epoch": 0.36641032111818017, "grad_norm": 0.5670295357704163, "learning_rate": 2.1943055409000027e-05, "loss": 0.0518, "step": 20690 }, { "epoch": 0.36642803065520857, "grad_norm": 0.6013091802597046, "learning_rate": 2.1942292748041382e-05, "loss": 0.1066, "step": 20691 }, { "epoch": 0.366445740192237, "grad_norm": 0.539071798324585, "learning_rate": 2.19415300642433e-05, "loss": 0.0769, "step": 20692 }, { "epoch": 0.3664634497292655, "grad_norm": 0.4892798662185669, "learning_rate": 2.1940767357608286e-05, "loss": 0.0818, "step": 20693 }, { "epoch": 0.3664811592662939, "grad_norm": 0.7163825631141663, "learning_rate": 2.194000462813884e-05, "loss": 0.0972, "step": 20694 }, { "epoch": 0.36649886880332233, "grad_norm": 0.6423463225364685, "learning_rate": 2.1939241875837496e-05, "loss": 0.0716, "step": 20695 }, { "epoch": 0.36651657834035073, "grad_norm": 0.6592357158660889, "learning_rate": 2.1938479100706738e-05, "loss": 0.1011, "step": 20696 }, { "epoch": 0.3665342878773792, "grad_norm": 0.9943634867668152, "learning_rate": 2.1937716302749086e-05, "loss": 0.0744, "step": 20697 }, { "epoch": 0.3665519974144076, "grad_norm": 0.6731588244438171, "learning_rate": 2.193695348196705e-05, "loss": 0.0557, "step": 20698 }, { "epoch": 0.36656970695143604, "grad_norm": 0.5044351816177368, "learning_rate": 2.1936190638363143e-05, "loss": 0.0577, "step": 20699 }, { "epoch": 0.36658741648846443, "grad_norm": 0.657564640045166, "learning_rate": 2.1935427771939862e-05, "loss": 0.075, "step": 20700 }, { "epoch": 0.3666051260254929, "grad_norm": 0.8054569959640503, "learning_rate": 2.1934664882699733e-05, "loss": 0.0868, "step": 20701 }, { "epoch": 0.3666228355625213, "grad_norm": 0.7472425699234009, "learning_rate": 2.1933901970645256e-05, "loss": 0.1142, "step": 20702 }, { "epoch": 0.36664054509954974, "grad_norm": 0.5166794657707214, "learning_rate": 2.193313903577894e-05, "loss": 0.1022, "step": 20703 }, { "epoch": 0.36665825463657814, "grad_norm": 0.7243724465370178, "learning_rate": 2.1932376078103303e-05, "loss": 0.0679, "step": 20704 }, { "epoch": 0.3666759641736066, "grad_norm": 0.8353014588356018, "learning_rate": 2.1931613097620843e-05, "loss": 0.1012, "step": 20705 }, { "epoch": 0.366693673710635, "grad_norm": 0.829186201095581, "learning_rate": 2.1930850094334083e-05, "loss": 0.0656, "step": 20706 }, { "epoch": 0.36671138324766345, "grad_norm": 0.4593653082847595, "learning_rate": 2.1930087068245522e-05, "loss": 0.079, "step": 20707 }, { "epoch": 0.3667290927846919, "grad_norm": 0.47896838188171387, "learning_rate": 2.1929324019357677e-05, "loss": 0.0782, "step": 20708 }, { "epoch": 0.3667468023217203, "grad_norm": 0.7317845821380615, "learning_rate": 2.1928560947673058e-05, "loss": 0.0766, "step": 20709 }, { "epoch": 0.36676451185874875, "grad_norm": 0.9030693173408508, "learning_rate": 2.1927797853194176e-05, "loss": 0.0998, "step": 20710 }, { "epoch": 0.36678222139577715, "grad_norm": 0.3870898187160492, "learning_rate": 2.192703473592354e-05, "loss": 0.076, "step": 20711 }, { "epoch": 0.3667999309328056, "grad_norm": 0.7158815264701843, "learning_rate": 2.1926271595863658e-05, "loss": 0.0926, "step": 20712 }, { "epoch": 0.366817640469834, "grad_norm": 0.655421257019043, "learning_rate": 2.1925508433017043e-05, "loss": 0.0619, "step": 20713 }, { "epoch": 0.36683535000686246, "grad_norm": 0.678286612033844, "learning_rate": 2.1924745247386204e-05, "loss": 0.0733, "step": 20714 }, { "epoch": 0.36685305954389086, "grad_norm": 0.6822670698165894, "learning_rate": 2.192398203897366e-05, "loss": 0.0923, "step": 20715 }, { "epoch": 0.3668707690809193, "grad_norm": 1.1016266345977783, "learning_rate": 2.1923218807781908e-05, "loss": 0.1016, "step": 20716 }, { "epoch": 0.3668884786179477, "grad_norm": 0.5877670645713806, "learning_rate": 2.192245555381347e-05, "loss": 0.0784, "step": 20717 }, { "epoch": 0.36690618815497617, "grad_norm": 0.984634518623352, "learning_rate": 2.192169227707085e-05, "loss": 0.0831, "step": 20718 }, { "epoch": 0.36692389769200456, "grad_norm": 0.5667169094085693, "learning_rate": 2.192092897755657e-05, "loss": 0.0765, "step": 20719 }, { "epoch": 0.366941607229033, "grad_norm": 0.6188066601753235, "learning_rate": 2.1920165655273126e-05, "loss": 0.0574, "step": 20720 }, { "epoch": 0.3669593167660614, "grad_norm": 0.7109246850013733, "learning_rate": 2.191940231022304e-05, "loss": 0.0857, "step": 20721 }, { "epoch": 0.36697702630308987, "grad_norm": 0.7594593167304993, "learning_rate": 2.191863894240882e-05, "loss": 0.1019, "step": 20722 }, { "epoch": 0.3669947358401183, "grad_norm": 0.6605633497238159, "learning_rate": 2.1917875551832977e-05, "loss": 0.0844, "step": 20723 }, { "epoch": 0.3670124453771467, "grad_norm": 0.7432799339294434, "learning_rate": 2.1917112138498025e-05, "loss": 0.0878, "step": 20724 }, { "epoch": 0.3670301549141752, "grad_norm": 0.5557863712310791, "learning_rate": 2.1916348702406473e-05, "loss": 0.078, "step": 20725 }, { "epoch": 0.3670478644512036, "grad_norm": 0.686174213886261, "learning_rate": 2.1915585243560834e-05, "loss": 0.0812, "step": 20726 }, { "epoch": 0.36706557398823203, "grad_norm": 0.48235902190208435, "learning_rate": 2.191482176196362e-05, "loss": 0.0785, "step": 20727 }, { "epoch": 0.36708328352526043, "grad_norm": 0.7442923784255981, "learning_rate": 2.1914058257617335e-05, "loss": 0.0872, "step": 20728 }, { "epoch": 0.3671009930622889, "grad_norm": 0.5815911293029785, "learning_rate": 2.1913294730524503e-05, "loss": 0.1063, "step": 20729 }, { "epoch": 0.3671187025993173, "grad_norm": 0.431013286113739, "learning_rate": 2.191253118068763e-05, "loss": 0.0469, "step": 20730 }, { "epoch": 0.36713641213634574, "grad_norm": 0.6195128560066223, "learning_rate": 2.1911767608109228e-05, "loss": 0.0747, "step": 20731 }, { "epoch": 0.36715412167337413, "grad_norm": 0.5054374933242798, "learning_rate": 2.1911004012791812e-05, "loss": 0.0329, "step": 20732 }, { "epoch": 0.3671718312104026, "grad_norm": 0.4718433916568756, "learning_rate": 2.1910240394737887e-05, "loss": 0.0498, "step": 20733 }, { "epoch": 0.367189540747431, "grad_norm": 0.7725617289543152, "learning_rate": 2.1909476753949974e-05, "loss": 0.0885, "step": 20734 }, { "epoch": 0.36720725028445944, "grad_norm": 0.5957948565483093, "learning_rate": 2.1908713090430584e-05, "loss": 0.0725, "step": 20735 }, { "epoch": 0.36722495982148784, "grad_norm": 0.8461492657661438, "learning_rate": 2.190794940418222e-05, "loss": 0.0665, "step": 20736 }, { "epoch": 0.3672426693585163, "grad_norm": 0.43731966614723206, "learning_rate": 2.1907185695207402e-05, "loss": 0.0659, "step": 20737 }, { "epoch": 0.36726037889554475, "grad_norm": 0.6111915111541748, "learning_rate": 2.1906421963508646e-05, "loss": 0.0459, "step": 20738 }, { "epoch": 0.36727808843257315, "grad_norm": 0.744138777256012, "learning_rate": 2.1905658209088458e-05, "loss": 0.1223, "step": 20739 }, { "epoch": 0.3672957979696016, "grad_norm": 0.8404391407966614, "learning_rate": 2.190489443194936e-05, "loss": 0.0873, "step": 20740 }, { "epoch": 0.36731350750663, "grad_norm": 0.613681972026825, "learning_rate": 2.1904130632093842e-05, "loss": 0.0912, "step": 20741 }, { "epoch": 0.36733121704365845, "grad_norm": 0.39350050687789917, "learning_rate": 2.1903366809524446e-05, "loss": 0.0766, "step": 20742 }, { "epoch": 0.36734892658068685, "grad_norm": 0.6354733109474182, "learning_rate": 2.1902602964243667e-05, "loss": 0.0744, "step": 20743 }, { "epoch": 0.3673666361177153, "grad_norm": 0.6170526146888733, "learning_rate": 2.1901839096254023e-05, "loss": 0.0739, "step": 20744 }, { "epoch": 0.3673843456547437, "grad_norm": 0.7709797024726868, "learning_rate": 2.1901075205558024e-05, "loss": 0.1091, "step": 20745 }, { "epoch": 0.36740205519177216, "grad_norm": 0.3070012927055359, "learning_rate": 2.1900311292158185e-05, "loss": 0.0669, "step": 20746 }, { "epoch": 0.36741976472880056, "grad_norm": 0.9365510940551758, "learning_rate": 2.1899547356057026e-05, "loss": 0.1122, "step": 20747 }, { "epoch": 0.367437474265829, "grad_norm": 0.8802052736282349, "learning_rate": 2.1898783397257047e-05, "loss": 0.1033, "step": 20748 }, { "epoch": 0.3674551838028574, "grad_norm": 1.718247652053833, "learning_rate": 2.1898019415760774e-05, "loss": 0.0853, "step": 20749 }, { "epoch": 0.36747289333988586, "grad_norm": 0.5926626920700073, "learning_rate": 2.1897255411570712e-05, "loss": 0.0725, "step": 20750 }, { "epoch": 0.36749060287691426, "grad_norm": 0.6981438994407654, "learning_rate": 2.1896491384689376e-05, "loss": 0.0807, "step": 20751 }, { "epoch": 0.3675083124139427, "grad_norm": 0.6271365880966187, "learning_rate": 2.1895727335119283e-05, "loss": 0.0718, "step": 20752 }, { "epoch": 0.36752602195097117, "grad_norm": 0.7348807454109192, "learning_rate": 2.1894963262862945e-05, "loss": 0.0602, "step": 20753 }, { "epoch": 0.36754373148799957, "grad_norm": 0.5241868495941162, "learning_rate": 2.1894199167922874e-05, "loss": 0.0807, "step": 20754 }, { "epoch": 0.367561441025028, "grad_norm": 0.6945480108261108, "learning_rate": 2.1893435050301585e-05, "loss": 0.0837, "step": 20755 }, { "epoch": 0.3675791505620564, "grad_norm": 0.7149350643157959, "learning_rate": 2.1892670910001593e-05, "loss": 0.0922, "step": 20756 }, { "epoch": 0.3675968600990849, "grad_norm": 1.0229686498641968, "learning_rate": 2.189190674702541e-05, "loss": 0.083, "step": 20757 }, { "epoch": 0.3676145696361133, "grad_norm": 0.63901686668396, "learning_rate": 2.189114256137555e-05, "loss": 0.1098, "step": 20758 }, { "epoch": 0.36763227917314173, "grad_norm": 1.1153085231781006, "learning_rate": 2.189037835305453e-05, "loss": 0.1018, "step": 20759 }, { "epoch": 0.36764998871017013, "grad_norm": 0.5563086867332458, "learning_rate": 2.188961412206486e-05, "loss": 0.0561, "step": 20760 }, { "epoch": 0.3676676982471986, "grad_norm": 0.5282204151153564, "learning_rate": 2.1888849868409057e-05, "loss": 0.1032, "step": 20761 }, { "epoch": 0.367685407784227, "grad_norm": 0.8108397722244263, "learning_rate": 2.1888085592089637e-05, "loss": 0.0913, "step": 20762 }, { "epoch": 0.36770311732125543, "grad_norm": 0.5171456933021545, "learning_rate": 2.1887321293109108e-05, "loss": 0.0714, "step": 20763 }, { "epoch": 0.36772082685828383, "grad_norm": 0.5351126194000244, "learning_rate": 2.1886556971469993e-05, "loss": 0.0691, "step": 20764 }, { "epoch": 0.3677385363953123, "grad_norm": 0.6725524067878723, "learning_rate": 2.1885792627174796e-05, "loss": 0.0739, "step": 20765 }, { "epoch": 0.3677562459323407, "grad_norm": 0.7296939492225647, "learning_rate": 2.188502826022604e-05, "loss": 0.0654, "step": 20766 }, { "epoch": 0.36777395546936914, "grad_norm": 0.8353939056396484, "learning_rate": 2.1884263870626246e-05, "loss": 0.0835, "step": 20767 }, { "epoch": 0.3677916650063976, "grad_norm": 0.8465070724487305, "learning_rate": 2.1883499458377912e-05, "loss": 0.0965, "step": 20768 }, { "epoch": 0.367809374543426, "grad_norm": 0.7089707851409912, "learning_rate": 2.188273502348356e-05, "loss": 0.091, "step": 20769 }, { "epoch": 0.36782708408045445, "grad_norm": 0.628367006778717, "learning_rate": 2.1881970565945707e-05, "loss": 0.094, "step": 20770 }, { "epoch": 0.36784479361748285, "grad_norm": 0.6403366923332214, "learning_rate": 2.1881206085766866e-05, "loss": 0.0642, "step": 20771 }, { "epoch": 0.3678625031545113, "grad_norm": 0.8489040732383728, "learning_rate": 2.1880441582949562e-05, "loss": 0.1002, "step": 20772 }, { "epoch": 0.3678802126915397, "grad_norm": 0.7705403566360474, "learning_rate": 2.187967705749629e-05, "loss": 0.0917, "step": 20773 }, { "epoch": 0.36789792222856815, "grad_norm": 0.5114709734916687, "learning_rate": 2.187891250940958e-05, "loss": 0.0445, "step": 20774 }, { "epoch": 0.36791563176559655, "grad_norm": 1.0061819553375244, "learning_rate": 2.1878147938691946e-05, "loss": 0.061, "step": 20775 }, { "epoch": 0.367933341302625, "grad_norm": 0.9399732351303101, "learning_rate": 2.18773833453459e-05, "loss": 0.0838, "step": 20776 }, { "epoch": 0.3679510508396534, "grad_norm": 0.6258115172386169, "learning_rate": 2.1876618729373958e-05, "loss": 0.0753, "step": 20777 }, { "epoch": 0.36796876037668186, "grad_norm": 0.621585488319397, "learning_rate": 2.1875854090778634e-05, "loss": 0.1, "step": 20778 }, { "epoch": 0.36798646991371026, "grad_norm": 0.787564218044281, "learning_rate": 2.1875089429562446e-05, "loss": 0.0906, "step": 20779 }, { "epoch": 0.3680041794507387, "grad_norm": 0.6613154411315918, "learning_rate": 2.187432474572791e-05, "loss": 0.0658, "step": 20780 }, { "epoch": 0.3680218889877671, "grad_norm": 0.5502078533172607, "learning_rate": 2.1873560039277542e-05, "loss": 0.0576, "step": 20781 }, { "epoch": 0.36803959852479556, "grad_norm": 0.6100515127182007, "learning_rate": 2.1872795310213855e-05, "loss": 0.0925, "step": 20782 }, { "epoch": 0.368057308061824, "grad_norm": 1.1015490293502808, "learning_rate": 2.187203055853937e-05, "loss": 0.1008, "step": 20783 }, { "epoch": 0.3680750175988524, "grad_norm": 0.8379080295562744, "learning_rate": 2.1871265784256594e-05, "loss": 0.1136, "step": 20784 }, { "epoch": 0.36809272713588087, "grad_norm": 0.8573141694068909, "learning_rate": 2.187050098736805e-05, "loss": 0.1102, "step": 20785 }, { "epoch": 0.36811043667290927, "grad_norm": 0.6423177123069763, "learning_rate": 2.1869736167876255e-05, "loss": 0.0722, "step": 20786 }, { "epoch": 0.3681281462099377, "grad_norm": 0.7389219403266907, "learning_rate": 2.1868971325783723e-05, "loss": 0.1064, "step": 20787 }, { "epoch": 0.3681458557469661, "grad_norm": 0.6521042585372925, "learning_rate": 2.1868206461092968e-05, "loss": 0.0536, "step": 20788 }, { "epoch": 0.3681635652839946, "grad_norm": 0.7310061454772949, "learning_rate": 2.1867441573806504e-05, "loss": 0.0906, "step": 20789 }, { "epoch": 0.368181274821023, "grad_norm": 0.7336448431015015, "learning_rate": 2.186667666392686e-05, "loss": 0.0997, "step": 20790 }, { "epoch": 0.36819898435805143, "grad_norm": 0.7983915209770203, "learning_rate": 2.1865911731456542e-05, "loss": 0.1003, "step": 20791 }, { "epoch": 0.3682166938950798, "grad_norm": 0.6017410755157471, "learning_rate": 2.186514677639807e-05, "loss": 0.0908, "step": 20792 }, { "epoch": 0.3682344034321083, "grad_norm": 0.7577215433120728, "learning_rate": 2.1864381798753955e-05, "loss": 0.0683, "step": 20793 }, { "epoch": 0.3682521129691367, "grad_norm": 0.8914555311203003, "learning_rate": 2.186361679852672e-05, "loss": 0.0829, "step": 20794 }, { "epoch": 0.36826982250616513, "grad_norm": 0.6972089409828186, "learning_rate": 2.186285177571888e-05, "loss": 0.052, "step": 20795 }, { "epoch": 0.3682875320431936, "grad_norm": 0.8293882608413696, "learning_rate": 2.1862086730332953e-05, "loss": 0.1042, "step": 20796 }, { "epoch": 0.368305241580222, "grad_norm": 0.7185078859329224, "learning_rate": 2.1861321662371455e-05, "loss": 0.0886, "step": 20797 }, { "epoch": 0.36832295111725044, "grad_norm": 0.36499837040901184, "learning_rate": 2.18605565718369e-05, "loss": 0.0471, "step": 20798 }, { "epoch": 0.36834066065427884, "grad_norm": 0.6159648895263672, "learning_rate": 2.185979145873181e-05, "loss": 0.0692, "step": 20799 }, { "epoch": 0.3683583701913073, "grad_norm": 1.0414215326309204, "learning_rate": 2.1859026323058705e-05, "loss": 0.0677, "step": 20800 }, { "epoch": 0.3683760797283357, "grad_norm": 0.8074551224708557, "learning_rate": 2.185826116482009e-05, "loss": 0.0813, "step": 20801 }, { "epoch": 0.36839378926536415, "grad_norm": 0.7090462446212769, "learning_rate": 2.1857495984018495e-05, "loss": 0.0819, "step": 20802 }, { "epoch": 0.36841149880239255, "grad_norm": 0.7876393795013428, "learning_rate": 2.185673078065642e-05, "loss": 0.0667, "step": 20803 }, { "epoch": 0.368429208339421, "grad_norm": 0.6567758321762085, "learning_rate": 2.1855965554736405e-05, "loss": 0.1384, "step": 20804 }, { "epoch": 0.3684469178764494, "grad_norm": 0.6933006048202515, "learning_rate": 2.1855200306260954e-05, "loss": 0.0814, "step": 20805 }, { "epoch": 0.36846462741347785, "grad_norm": 0.7332701683044434, "learning_rate": 2.185443503523259e-05, "loss": 0.0909, "step": 20806 }, { "epoch": 0.36848233695050625, "grad_norm": 0.6168791055679321, "learning_rate": 2.1853669741653825e-05, "loss": 0.0674, "step": 20807 }, { "epoch": 0.3685000464875347, "grad_norm": 0.7197839021682739, "learning_rate": 2.1852904425527177e-05, "loss": 0.1191, "step": 20808 }, { "epoch": 0.3685177560245631, "grad_norm": 1.0809189081192017, "learning_rate": 2.1852139086855172e-05, "loss": 0.0655, "step": 20809 }, { "epoch": 0.36853546556159156, "grad_norm": 0.6394221782684326, "learning_rate": 2.185137372564032e-05, "loss": 0.0751, "step": 20810 }, { "epoch": 0.36855317509862, "grad_norm": 0.32286515831947327, "learning_rate": 2.1850608341885146e-05, "loss": 0.0855, "step": 20811 }, { "epoch": 0.3685708846356484, "grad_norm": 0.5460566878318787, "learning_rate": 2.1849842935592156e-05, "loss": 0.1131, "step": 20812 }, { "epoch": 0.36858859417267686, "grad_norm": 0.6762939691543579, "learning_rate": 2.1849077506763876e-05, "loss": 0.0642, "step": 20813 }, { "epoch": 0.36860630370970526, "grad_norm": 0.6622077822685242, "learning_rate": 2.184831205540283e-05, "loss": 0.0719, "step": 20814 }, { "epoch": 0.3686240132467337, "grad_norm": 0.6955482363700867, "learning_rate": 2.1847546581511523e-05, "loss": 0.064, "step": 20815 }, { "epoch": 0.3686417227837621, "grad_norm": 0.8453178405761719, "learning_rate": 2.1846781085092486e-05, "loss": 0.0938, "step": 20816 }, { "epoch": 0.36865943232079057, "grad_norm": 0.734804093837738, "learning_rate": 2.184601556614823e-05, "loss": 0.0828, "step": 20817 }, { "epoch": 0.36867714185781897, "grad_norm": 0.6422637104988098, "learning_rate": 2.1845250024681275e-05, "loss": 0.0514, "step": 20818 }, { "epoch": 0.3686948513948474, "grad_norm": 0.8512269258499146, "learning_rate": 2.184448446069414e-05, "loss": 0.0619, "step": 20819 }, { "epoch": 0.3687125609318758, "grad_norm": 0.7147124409675598, "learning_rate": 2.1843718874189343e-05, "loss": 0.078, "step": 20820 }, { "epoch": 0.3687302704689043, "grad_norm": 0.7269713282585144, "learning_rate": 2.1842953265169402e-05, "loss": 0.0875, "step": 20821 }, { "epoch": 0.3687479800059327, "grad_norm": 0.934532880783081, "learning_rate": 2.1842187633636833e-05, "loss": 0.0981, "step": 20822 }, { "epoch": 0.36876568954296113, "grad_norm": 0.7612414956092834, "learning_rate": 2.1841421979594167e-05, "loss": 0.058, "step": 20823 }, { "epoch": 0.3687833990799895, "grad_norm": 1.1535708904266357, "learning_rate": 2.184065630304391e-05, "loss": 0.0717, "step": 20824 }, { "epoch": 0.368801108617018, "grad_norm": 1.181867241859436, "learning_rate": 2.183989060398859e-05, "loss": 0.0774, "step": 20825 }, { "epoch": 0.36881881815404643, "grad_norm": 0.7504757642745972, "learning_rate": 2.1839124882430716e-05, "loss": 0.0697, "step": 20826 }, { "epoch": 0.36883652769107483, "grad_norm": 0.9349303841590881, "learning_rate": 2.1838359138372815e-05, "loss": 0.0839, "step": 20827 }, { "epoch": 0.3688542372281033, "grad_norm": 0.9022061228752136, "learning_rate": 2.1837593371817404e-05, "loss": 0.0688, "step": 20828 }, { "epoch": 0.3688719467651317, "grad_norm": 0.7407123446464539, "learning_rate": 2.1836827582767007e-05, "loss": 0.1032, "step": 20829 }, { "epoch": 0.36888965630216014, "grad_norm": 0.47327786684036255, "learning_rate": 2.1836061771224135e-05, "loss": 0.0752, "step": 20830 }, { "epoch": 0.36890736583918854, "grad_norm": 0.5140311121940613, "learning_rate": 2.183529593719131e-05, "loss": 0.0712, "step": 20831 }, { "epoch": 0.368925075376217, "grad_norm": 0.7632405757904053, "learning_rate": 2.183453008067105e-05, "loss": 0.1128, "step": 20832 }, { "epoch": 0.3689427849132454, "grad_norm": 0.7030142545700073, "learning_rate": 2.1833764201665882e-05, "loss": 0.1, "step": 20833 }, { "epoch": 0.36896049445027385, "grad_norm": 0.9045218825340271, "learning_rate": 2.1832998300178323e-05, "loss": 0.0861, "step": 20834 }, { "epoch": 0.36897820398730224, "grad_norm": 1.3413567543029785, "learning_rate": 2.1832232376210888e-05, "loss": 0.0805, "step": 20835 }, { "epoch": 0.3689959135243307, "grad_norm": 0.8591947555541992, "learning_rate": 2.18314664297661e-05, "loss": 0.0754, "step": 20836 }, { "epoch": 0.3690136230613591, "grad_norm": 0.7697131037712097, "learning_rate": 2.1830700460846474e-05, "loss": 0.0976, "step": 20837 }, { "epoch": 0.36903133259838755, "grad_norm": 0.6872439384460449, "learning_rate": 2.182993446945454e-05, "loss": 0.1133, "step": 20838 }, { "epoch": 0.36904904213541595, "grad_norm": 1.0450425148010254, "learning_rate": 2.182916845559281e-05, "loss": 0.0878, "step": 20839 }, { "epoch": 0.3690667516724444, "grad_norm": 0.5504192113876343, "learning_rate": 2.1828402419263807e-05, "loss": 0.0685, "step": 20840 }, { "epoch": 0.36908446120947286, "grad_norm": 0.6984079480171204, "learning_rate": 2.182763636047005e-05, "loss": 0.0729, "step": 20841 }, { "epoch": 0.36910217074650126, "grad_norm": 0.5853216648101807, "learning_rate": 2.1826870279214058e-05, "loss": 0.0712, "step": 20842 }, { "epoch": 0.3691198802835297, "grad_norm": 0.6931045055389404, "learning_rate": 2.182610417549836e-05, "loss": 0.1116, "step": 20843 }, { "epoch": 0.3691375898205581, "grad_norm": 0.8992568850517273, "learning_rate": 2.1825338049325463e-05, "loss": 0.1086, "step": 20844 }, { "epoch": 0.36915529935758656, "grad_norm": 0.7928709983825684, "learning_rate": 2.18245719006979e-05, "loss": 0.0869, "step": 20845 }, { "epoch": 0.36917300889461496, "grad_norm": 0.8479301929473877, "learning_rate": 2.182380572961818e-05, "loss": 0.0732, "step": 20846 }, { "epoch": 0.3691907184316434, "grad_norm": 0.8226930499076843, "learning_rate": 2.182303953608883e-05, "loss": 0.0859, "step": 20847 }, { "epoch": 0.3692084279686718, "grad_norm": 1.7054345607757568, "learning_rate": 2.1822273320112374e-05, "loss": 0.0915, "step": 20848 }, { "epoch": 0.36922613750570027, "grad_norm": 1.0333409309387207, "learning_rate": 2.1821507081691324e-05, "loss": 0.0914, "step": 20849 }, { "epoch": 0.36924384704272867, "grad_norm": 2.127363681793213, "learning_rate": 2.182074082082821e-05, "loss": 0.0779, "step": 20850 }, { "epoch": 0.3692615565797571, "grad_norm": 0.5323784351348877, "learning_rate": 2.1819974537525543e-05, "loss": 0.1023, "step": 20851 }, { "epoch": 0.3692792661167855, "grad_norm": 0.8953747749328613, "learning_rate": 2.1819208231785858e-05, "loss": 0.0999, "step": 20852 }, { "epoch": 0.369296975653814, "grad_norm": 0.7825260758399963, "learning_rate": 2.1818441903611658e-05, "loss": 0.0721, "step": 20853 }, { "epoch": 0.3693146851908424, "grad_norm": 0.48045337200164795, "learning_rate": 2.1817675553005485e-05, "loss": 0.1021, "step": 20854 }, { "epoch": 0.3693323947278708, "grad_norm": 0.6049835681915283, "learning_rate": 2.1816909179969838e-05, "loss": 0.0589, "step": 20855 }, { "epoch": 0.3693501042648993, "grad_norm": 0.7790864706039429, "learning_rate": 2.1816142784507253e-05, "loss": 0.0966, "step": 20856 }, { "epoch": 0.3693678138019277, "grad_norm": 0.4430074095726013, "learning_rate": 2.1815376366620253e-05, "loss": 0.0877, "step": 20857 }, { "epoch": 0.36938552333895613, "grad_norm": 1.112505555152893, "learning_rate": 2.1814609926311346e-05, "loss": 0.0854, "step": 20858 }, { "epoch": 0.36940323287598453, "grad_norm": 0.9959597587585449, "learning_rate": 2.181384346358307e-05, "loss": 0.1015, "step": 20859 }, { "epoch": 0.369420942413013, "grad_norm": 0.666205108165741, "learning_rate": 2.1813076978437933e-05, "loss": 0.0625, "step": 20860 }, { "epoch": 0.3694386519500414, "grad_norm": 0.9620750546455383, "learning_rate": 2.1812310470878458e-05, "loss": 0.1234, "step": 20861 }, { "epoch": 0.36945636148706984, "grad_norm": 1.0442278385162354, "learning_rate": 2.181154394090718e-05, "loss": 0.0778, "step": 20862 }, { "epoch": 0.36947407102409824, "grad_norm": 1.3593851327896118, "learning_rate": 2.1810777388526605e-05, "loss": 0.1035, "step": 20863 }, { "epoch": 0.3694917805611267, "grad_norm": 0.8165574073791504, "learning_rate": 2.1810010813739264e-05, "loss": 0.1005, "step": 20864 }, { "epoch": 0.3695094900981551, "grad_norm": 0.521826446056366, "learning_rate": 2.180924421654767e-05, "loss": 0.0638, "step": 20865 }, { "epoch": 0.36952719963518355, "grad_norm": 0.7480950951576233, "learning_rate": 2.1808477596954357e-05, "loss": 0.0622, "step": 20866 }, { "epoch": 0.36954490917221194, "grad_norm": 1.4183073043823242, "learning_rate": 2.180771095496184e-05, "loss": 0.1142, "step": 20867 }, { "epoch": 0.3695626187092404, "grad_norm": 0.560253918170929, "learning_rate": 2.1806944290572647e-05, "loss": 0.0987, "step": 20868 }, { "epoch": 0.3695803282462688, "grad_norm": 0.8219050168991089, "learning_rate": 2.1806177603789288e-05, "loss": 0.0969, "step": 20869 }, { "epoch": 0.36959803778329725, "grad_norm": 0.6243727207183838, "learning_rate": 2.18054108946143e-05, "loss": 0.0892, "step": 20870 }, { "epoch": 0.3696157473203257, "grad_norm": 0.9831513166427612, "learning_rate": 2.1804644163050195e-05, "loss": 0.104, "step": 20871 }, { "epoch": 0.3696334568573541, "grad_norm": 0.639102578163147, "learning_rate": 2.1803877409099495e-05, "loss": 0.0942, "step": 20872 }, { "epoch": 0.36965116639438256, "grad_norm": 0.9029622077941895, "learning_rate": 2.1803110632764734e-05, "loss": 0.0867, "step": 20873 }, { "epoch": 0.36966887593141096, "grad_norm": 0.6755476593971252, "learning_rate": 2.1802343834048424e-05, "loss": 0.0967, "step": 20874 }, { "epoch": 0.3696865854684394, "grad_norm": 0.6154879927635193, "learning_rate": 2.180157701295309e-05, "loss": 0.1062, "step": 20875 }, { "epoch": 0.3697042950054678, "grad_norm": 0.8770123720169067, "learning_rate": 2.1800810169481254e-05, "loss": 0.0683, "step": 20876 }, { "epoch": 0.36972200454249626, "grad_norm": 0.8597204089164734, "learning_rate": 2.1800043303635446e-05, "loss": 0.0709, "step": 20877 }, { "epoch": 0.36973971407952466, "grad_norm": 0.5776512622833252, "learning_rate": 2.179927641541818e-05, "loss": 0.0816, "step": 20878 }, { "epoch": 0.3697574236165531, "grad_norm": 0.5347191095352173, "learning_rate": 2.179850950483198e-05, "loss": 0.0747, "step": 20879 }, { "epoch": 0.3697751331535815, "grad_norm": 0.5590403079986572, "learning_rate": 2.1797742571879374e-05, "loss": 0.053, "step": 20880 }, { "epoch": 0.36979284269060997, "grad_norm": 0.6166929006576538, "learning_rate": 2.179697561656288e-05, "loss": 0.0366, "step": 20881 }, { "epoch": 0.36981055222763837, "grad_norm": 0.7609911561012268, "learning_rate": 2.179620863888503e-05, "loss": 0.0626, "step": 20882 }, { "epoch": 0.3698282617646668, "grad_norm": 0.9248408675193787, "learning_rate": 2.1795441638848333e-05, "loss": 0.1143, "step": 20883 }, { "epoch": 0.3698459713016952, "grad_norm": 0.7829274535179138, "learning_rate": 2.1794674616455324e-05, "loss": 0.06, "step": 20884 }, { "epoch": 0.3698636808387237, "grad_norm": 0.9210405349731445, "learning_rate": 2.1793907571708524e-05, "loss": 0.0883, "step": 20885 }, { "epoch": 0.36988139037575213, "grad_norm": 0.8016358017921448, "learning_rate": 2.1793140504610455e-05, "loss": 0.0768, "step": 20886 }, { "epoch": 0.3698990999127805, "grad_norm": 0.8109229207038879, "learning_rate": 2.1792373415163642e-05, "loss": 0.1032, "step": 20887 }, { "epoch": 0.369916809449809, "grad_norm": 0.30431070923805237, "learning_rate": 2.1791606303370603e-05, "loss": 0.0933, "step": 20888 }, { "epoch": 0.3699345189868374, "grad_norm": 0.8420714735984802, "learning_rate": 2.1790839169233867e-05, "loss": 0.0945, "step": 20889 }, { "epoch": 0.36995222852386583, "grad_norm": 1.0527348518371582, "learning_rate": 2.1790072012755957e-05, "loss": 0.0934, "step": 20890 }, { "epoch": 0.36996993806089423, "grad_norm": 0.9840424656867981, "learning_rate": 2.1789304833939403e-05, "loss": 0.105, "step": 20891 }, { "epoch": 0.3699876475979227, "grad_norm": 0.6994835138320923, "learning_rate": 2.1788537632786715e-05, "loss": 0.0542, "step": 20892 }, { "epoch": 0.3700053571349511, "grad_norm": 0.7806388735771179, "learning_rate": 2.178777040930043e-05, "loss": 0.082, "step": 20893 }, { "epoch": 0.37002306667197954, "grad_norm": 0.5402217507362366, "learning_rate": 2.1787003163483066e-05, "loss": 0.103, "step": 20894 }, { "epoch": 0.37004077620900794, "grad_norm": 1.0412015914916992, "learning_rate": 2.1786235895337143e-05, "loss": 0.1009, "step": 20895 }, { "epoch": 0.3700584857460364, "grad_norm": 0.6357706785202026, "learning_rate": 2.1785468604865197e-05, "loss": 0.0915, "step": 20896 }, { "epoch": 0.3700761952830648, "grad_norm": 0.2846021354198456, "learning_rate": 2.178470129206974e-05, "loss": 0.0563, "step": 20897 }, { "epoch": 0.37009390482009324, "grad_norm": 0.9623416066169739, "learning_rate": 2.1783933956953304e-05, "loss": 0.0839, "step": 20898 }, { "epoch": 0.37011161435712164, "grad_norm": 0.8307793140411377, "learning_rate": 2.1783166599518413e-05, "loss": 0.111, "step": 20899 }, { "epoch": 0.3701293238941501, "grad_norm": 1.0051326751708984, "learning_rate": 2.178239921976759e-05, "loss": 0.0991, "step": 20900 }, { "epoch": 0.37014703343117855, "grad_norm": 0.7018000483512878, "learning_rate": 2.1781631817703364e-05, "loss": 0.0602, "step": 20901 }, { "epoch": 0.37016474296820695, "grad_norm": 0.8479088544845581, "learning_rate": 2.1780864393328248e-05, "loss": 0.118, "step": 20902 }, { "epoch": 0.3701824525052354, "grad_norm": 0.9091206789016724, "learning_rate": 2.178009694664477e-05, "loss": 0.1, "step": 20903 }, { "epoch": 0.3702001620422638, "grad_norm": 1.3683969974517822, "learning_rate": 2.1779329477655468e-05, "loss": 0.0843, "step": 20904 }, { "epoch": 0.37021787157929226, "grad_norm": 0.8223798871040344, "learning_rate": 2.1778561986362855e-05, "loss": 0.0704, "step": 20905 }, { "epoch": 0.37023558111632066, "grad_norm": 0.7794755101203918, "learning_rate": 2.1777794472769458e-05, "loss": 0.0677, "step": 20906 }, { "epoch": 0.3702532906533491, "grad_norm": 0.9128495454788208, "learning_rate": 2.1777026936877806e-05, "loss": 0.0905, "step": 20907 }, { "epoch": 0.3702710001903775, "grad_norm": 0.6384204030036926, "learning_rate": 2.1776259378690417e-05, "loss": 0.1, "step": 20908 }, { "epoch": 0.37028870972740596, "grad_norm": 0.6684533357620239, "learning_rate": 2.1775491798209816e-05, "loss": 0.0592, "step": 20909 }, { "epoch": 0.37030641926443436, "grad_norm": 0.8141049146652222, "learning_rate": 2.177472419543854e-05, "loss": 0.0999, "step": 20910 }, { "epoch": 0.3703241288014628, "grad_norm": 0.9406232833862305, "learning_rate": 2.1773956570379103e-05, "loss": 0.0808, "step": 20911 }, { "epoch": 0.3703418383384912, "grad_norm": 0.9079115986824036, "learning_rate": 2.1773188923034036e-05, "loss": 0.1347, "step": 20912 }, { "epoch": 0.37035954787551967, "grad_norm": 0.5285917520523071, "learning_rate": 2.177242125340586e-05, "loss": 0.111, "step": 20913 }, { "epoch": 0.37037725741254807, "grad_norm": 0.4494575262069702, "learning_rate": 2.1771653561497106e-05, "loss": 0.0594, "step": 20914 }, { "epoch": 0.3703949669495765, "grad_norm": 0.4980153441429138, "learning_rate": 2.1770885847310294e-05, "loss": 0.0563, "step": 20915 }, { "epoch": 0.370412676486605, "grad_norm": 0.6927345395088196, "learning_rate": 2.1770118110847952e-05, "loss": 0.0446, "step": 20916 }, { "epoch": 0.3704303860236334, "grad_norm": 0.7369680404663086, "learning_rate": 2.1769350352112608e-05, "loss": 0.0654, "step": 20917 }, { "epoch": 0.3704480955606618, "grad_norm": 0.5498665571212769, "learning_rate": 2.1768582571106782e-05, "loss": 0.0758, "step": 20918 }, { "epoch": 0.3704658050976902, "grad_norm": 0.4888564348220825, "learning_rate": 2.1767814767833012e-05, "loss": 0.0663, "step": 20919 }, { "epoch": 0.3704835146347187, "grad_norm": 0.8153409957885742, "learning_rate": 2.176704694229381e-05, "loss": 0.0704, "step": 20920 }, { "epoch": 0.3705012241717471, "grad_norm": 0.6163589954376221, "learning_rate": 2.1766279094491714e-05, "loss": 0.0986, "step": 20921 }, { "epoch": 0.37051893370877553, "grad_norm": 0.8918638229370117, "learning_rate": 2.1765511224429237e-05, "loss": 0.0887, "step": 20922 }, { "epoch": 0.37053664324580393, "grad_norm": 0.7998664975166321, "learning_rate": 2.1764743332108913e-05, "loss": 0.0885, "step": 20923 }, { "epoch": 0.3705543527828324, "grad_norm": 0.8895566463470459, "learning_rate": 2.176397541753327e-05, "loss": 0.0659, "step": 20924 }, { "epoch": 0.3705720623198608, "grad_norm": 0.6843035817146301, "learning_rate": 2.1763207480704833e-05, "loss": 0.0797, "step": 20925 }, { "epoch": 0.37058977185688924, "grad_norm": 0.929336667060852, "learning_rate": 2.1762439521626125e-05, "loss": 0.1044, "step": 20926 }, { "epoch": 0.37060748139391764, "grad_norm": 1.2259751558303833, "learning_rate": 2.1761671540299674e-05, "loss": 0.1029, "step": 20927 }, { "epoch": 0.3706251909309461, "grad_norm": 0.9295229315757751, "learning_rate": 2.1760903536728013e-05, "loss": 0.0755, "step": 20928 }, { "epoch": 0.3706429004679745, "grad_norm": 0.6124506592750549, "learning_rate": 2.176013551091366e-05, "loss": 0.0652, "step": 20929 }, { "epoch": 0.37066061000500294, "grad_norm": 0.8184592723846436, "learning_rate": 2.175936746285914e-05, "loss": 0.0895, "step": 20930 }, { "epoch": 0.3706783195420314, "grad_norm": 0.6196765899658203, "learning_rate": 2.1758599392566994e-05, "loss": 0.0774, "step": 20931 }, { "epoch": 0.3706960290790598, "grad_norm": 0.5224001407623291, "learning_rate": 2.1757831300039733e-05, "loss": 0.0645, "step": 20932 }, { "epoch": 0.37071373861608825, "grad_norm": 0.6678071022033691, "learning_rate": 2.1757063185279893e-05, "loss": 0.0681, "step": 20933 }, { "epoch": 0.37073144815311665, "grad_norm": 0.6915563344955444, "learning_rate": 2.1756295048289995e-05, "loss": 0.0955, "step": 20934 }, { "epoch": 0.3707491576901451, "grad_norm": 0.7350720167160034, "learning_rate": 2.1755526889072578e-05, "loss": 0.0586, "step": 20935 }, { "epoch": 0.3707668672271735, "grad_norm": 0.893342137336731, "learning_rate": 2.175475870763015e-05, "loss": 0.1233, "step": 20936 }, { "epoch": 0.37078457676420196, "grad_norm": 1.9197216033935547, "learning_rate": 2.175399050396526e-05, "loss": 0.1233, "step": 20937 }, { "epoch": 0.37080228630123035, "grad_norm": 0.7241634130477905, "learning_rate": 2.1753222278080416e-05, "loss": 0.085, "step": 20938 }, { "epoch": 0.3708199958382588, "grad_norm": 1.11920964717865, "learning_rate": 2.1752454029978156e-05, "loss": 0.0985, "step": 20939 }, { "epoch": 0.3708377053752872, "grad_norm": 0.8034821152687073, "learning_rate": 2.1751685759661007e-05, "loss": 0.0705, "step": 20940 }, { "epoch": 0.37085541491231566, "grad_norm": 0.5054733157157898, "learning_rate": 2.1750917467131493e-05, "loss": 0.1083, "step": 20941 }, { "epoch": 0.37087312444934406, "grad_norm": 0.6098506450653076, "learning_rate": 2.1750149152392145e-05, "loss": 0.0811, "step": 20942 }, { "epoch": 0.3708908339863725, "grad_norm": 0.9778470396995544, "learning_rate": 2.1749380815445487e-05, "loss": 0.0989, "step": 20943 }, { "epoch": 0.3709085435234009, "grad_norm": 1.0890378952026367, "learning_rate": 2.174861245629405e-05, "loss": 0.0849, "step": 20944 }, { "epoch": 0.37092625306042937, "grad_norm": 0.6476827263832092, "learning_rate": 2.1747844074940358e-05, "loss": 0.0953, "step": 20945 }, { "epoch": 0.3709439625974578, "grad_norm": 0.6370628476142883, "learning_rate": 2.1747075671386946e-05, "loss": 0.0823, "step": 20946 }, { "epoch": 0.3709616721344862, "grad_norm": 1.0519566535949707, "learning_rate": 2.1746307245636334e-05, "loss": 0.1088, "step": 20947 }, { "epoch": 0.3709793816715147, "grad_norm": 0.7647165656089783, "learning_rate": 2.1745538797691057e-05, "loss": 0.1044, "step": 20948 }, { "epoch": 0.3709970912085431, "grad_norm": 0.5990086793899536, "learning_rate": 2.1744770327553638e-05, "loss": 0.0828, "step": 20949 }, { "epoch": 0.3710148007455715, "grad_norm": 0.41312652826309204, "learning_rate": 2.174400183522661e-05, "loss": 0.073, "step": 20950 }, { "epoch": 0.3710325102825999, "grad_norm": 0.7156381607055664, "learning_rate": 2.1743233320712487e-05, "loss": 0.0868, "step": 20951 }, { "epoch": 0.3710502198196284, "grad_norm": 0.9343305230140686, "learning_rate": 2.1742464784013817e-05, "loss": 0.0903, "step": 20952 }, { "epoch": 0.3710679293566568, "grad_norm": 0.6835010647773743, "learning_rate": 2.174169622513312e-05, "loss": 0.0973, "step": 20953 }, { "epoch": 0.37108563889368523, "grad_norm": 0.5211729407310486, "learning_rate": 2.174092764407292e-05, "loss": 0.1033, "step": 20954 }, { "epoch": 0.37110334843071363, "grad_norm": 1.0340676307678223, "learning_rate": 2.174015904083576e-05, "loss": 0.0992, "step": 20955 }, { "epoch": 0.3711210579677421, "grad_norm": 0.45237645506858826, "learning_rate": 2.1739390415424148e-05, "loss": 0.0485, "step": 20956 }, { "epoch": 0.3711387675047705, "grad_norm": 0.49920353293418884, "learning_rate": 2.1738621767840625e-05, "loss": 0.0644, "step": 20957 }, { "epoch": 0.37115647704179894, "grad_norm": 1.2131916284561157, "learning_rate": 2.173785309808772e-05, "loss": 0.0987, "step": 20958 }, { "epoch": 0.37117418657882734, "grad_norm": 0.5788254737854004, "learning_rate": 2.173708440616796e-05, "loss": 0.1131, "step": 20959 }, { "epoch": 0.3711918961158558, "grad_norm": 0.6696871519088745, "learning_rate": 2.1736315692083874e-05, "loss": 0.1156, "step": 20960 }, { "epoch": 0.37120960565288424, "grad_norm": 0.7622942924499512, "learning_rate": 2.1735546955837986e-05, "loss": 0.0804, "step": 20961 }, { "epoch": 0.37122731518991264, "grad_norm": 0.8858000040054321, "learning_rate": 2.1734778197432837e-05, "loss": 0.0755, "step": 20962 }, { "epoch": 0.3712450247269411, "grad_norm": 0.5685842037200928, "learning_rate": 2.173400941687094e-05, "loss": 0.0916, "step": 20963 }, { "epoch": 0.3712627342639695, "grad_norm": 0.7893264293670654, "learning_rate": 2.1733240614154842e-05, "loss": 0.091, "step": 20964 }, { "epoch": 0.37128044380099795, "grad_norm": 0.5252881646156311, "learning_rate": 2.173247178928706e-05, "loss": 0.0659, "step": 20965 }, { "epoch": 0.37129815333802635, "grad_norm": 0.49454328417778015, "learning_rate": 2.1731702942270123e-05, "loss": 0.0755, "step": 20966 }, { "epoch": 0.3713158628750548, "grad_norm": 0.7370595335960388, "learning_rate": 2.1730934073106572e-05, "loss": 0.1247, "step": 20967 }, { "epoch": 0.3713335724120832, "grad_norm": 0.8704926371574402, "learning_rate": 2.1730165181798924e-05, "loss": 0.0672, "step": 20968 }, { "epoch": 0.37135128194911166, "grad_norm": 0.7490140199661255, "learning_rate": 2.1729396268349712e-05, "loss": 0.079, "step": 20969 }, { "epoch": 0.37136899148614005, "grad_norm": 0.8330068588256836, "learning_rate": 2.1728627332761472e-05, "loss": 0.0621, "step": 20970 }, { "epoch": 0.3713867010231685, "grad_norm": 1.0175995826721191, "learning_rate": 2.1727858375036723e-05, "loss": 0.0681, "step": 20971 }, { "epoch": 0.3714044105601969, "grad_norm": 0.7401142120361328, "learning_rate": 2.1727089395178e-05, "loss": 0.0834, "step": 20972 }, { "epoch": 0.37142212009722536, "grad_norm": 0.7737642526626587, "learning_rate": 2.172632039318784e-05, "loss": 0.0988, "step": 20973 }, { "epoch": 0.37143982963425376, "grad_norm": 0.76678466796875, "learning_rate": 2.1725551369068762e-05, "loss": 0.0892, "step": 20974 }, { "epoch": 0.3714575391712822, "grad_norm": 0.7210634350776672, "learning_rate": 2.1724782322823294e-05, "loss": 0.0697, "step": 20975 }, { "epoch": 0.37147524870831067, "grad_norm": 0.46691229939460754, "learning_rate": 2.1724013254453982e-05, "loss": 0.083, "step": 20976 }, { "epoch": 0.37149295824533907, "grad_norm": 0.8241317868232727, "learning_rate": 2.1723244163963345e-05, "loss": 0.079, "step": 20977 }, { "epoch": 0.3715106677823675, "grad_norm": 0.6876785159111023, "learning_rate": 2.1722475051353913e-05, "loss": 0.0837, "step": 20978 }, { "epoch": 0.3715283773193959, "grad_norm": 0.9702771306037903, "learning_rate": 2.172170591662822e-05, "loss": 0.0958, "step": 20979 }, { "epoch": 0.3715460868564244, "grad_norm": 0.4546564817428589, "learning_rate": 2.172093675978879e-05, "loss": 0.079, "step": 20980 }, { "epoch": 0.37156379639345277, "grad_norm": 0.6978133916854858, "learning_rate": 2.1720167580838158e-05, "loss": 0.0723, "step": 20981 }, { "epoch": 0.3715815059304812, "grad_norm": 1.0343605279922485, "learning_rate": 2.171939837977886e-05, "loss": 0.0834, "step": 20982 }, { "epoch": 0.3715992154675096, "grad_norm": 0.5724114179611206, "learning_rate": 2.171862915661342e-05, "loss": 0.0974, "step": 20983 }, { "epoch": 0.3716169250045381, "grad_norm": 0.4161813259124756, "learning_rate": 2.1717859911344363e-05, "loss": 0.0914, "step": 20984 }, { "epoch": 0.3716346345415665, "grad_norm": 0.43822166323661804, "learning_rate": 2.171709064397423e-05, "loss": 0.0826, "step": 20985 }, { "epoch": 0.37165234407859493, "grad_norm": 0.7962623834609985, "learning_rate": 2.171632135450555e-05, "loss": 0.0799, "step": 20986 }, { "epoch": 0.37167005361562333, "grad_norm": 0.7568520307540894, "learning_rate": 2.171555204294085e-05, "loss": 0.1007, "step": 20987 }, { "epoch": 0.3716877631526518, "grad_norm": 0.4457346796989441, "learning_rate": 2.1714782709282667e-05, "loss": 0.0785, "step": 20988 }, { "epoch": 0.3717054726896802, "grad_norm": 0.8148764371871948, "learning_rate": 2.171401335353352e-05, "loss": 0.0802, "step": 20989 }, { "epoch": 0.37172318222670864, "grad_norm": 0.9168818593025208, "learning_rate": 2.1713243975695956e-05, "loss": 0.0888, "step": 20990 }, { "epoch": 0.3717408917637371, "grad_norm": 0.6568390130996704, "learning_rate": 2.1712474575772494e-05, "loss": 0.062, "step": 20991 }, { "epoch": 0.3717586013007655, "grad_norm": 0.8014701008796692, "learning_rate": 2.1711705153765673e-05, "loss": 0.091, "step": 20992 }, { "epoch": 0.37177631083779394, "grad_norm": 0.57486492395401, "learning_rate": 2.171093570967802e-05, "loss": 0.0655, "step": 20993 }, { "epoch": 0.37179402037482234, "grad_norm": 0.7106257081031799, "learning_rate": 2.1710166243512063e-05, "loss": 0.0623, "step": 20994 }, { "epoch": 0.3718117299118508, "grad_norm": 0.9496456980705261, "learning_rate": 2.170939675527034e-05, "loss": 0.1147, "step": 20995 }, { "epoch": 0.3718294394488792, "grad_norm": 1.0347453355789185, "learning_rate": 2.1708627244955384e-05, "loss": 0.0945, "step": 20996 }, { "epoch": 0.37184714898590765, "grad_norm": 0.8582056164741516, "learning_rate": 2.170785771256972e-05, "loss": 0.0798, "step": 20997 }, { "epoch": 0.37186485852293605, "grad_norm": 0.964680016040802, "learning_rate": 2.1707088158115882e-05, "loss": 0.0898, "step": 20998 }, { "epoch": 0.3718825680599645, "grad_norm": 0.85201096534729, "learning_rate": 2.17063185815964e-05, "loss": 0.0814, "step": 20999 }, { "epoch": 0.3719002775969929, "grad_norm": 0.43901604413986206, "learning_rate": 2.1705548983013813e-05, "loss": 0.0672, "step": 21000 }, { "epoch": 0.37191798713402136, "grad_norm": 0.5348149538040161, "learning_rate": 2.1704779362370648e-05, "loss": 0.0688, "step": 21001 }, { "epoch": 0.37193569667104975, "grad_norm": 0.7140443325042725, "learning_rate": 2.1704009719669433e-05, "loss": 0.0914, "step": 21002 }, { "epoch": 0.3719534062080782, "grad_norm": 0.5604316592216492, "learning_rate": 2.170324005491271e-05, "loss": 0.079, "step": 21003 }, { "epoch": 0.3719711157451066, "grad_norm": 1.0052582025527954, "learning_rate": 2.1702470368102998e-05, "loss": 0.0795, "step": 21004 }, { "epoch": 0.37198882528213506, "grad_norm": 0.5758146643638611, "learning_rate": 2.1701700659242842e-05, "loss": 0.0793, "step": 21005 }, { "epoch": 0.3720065348191635, "grad_norm": 1.0073139667510986, "learning_rate": 2.1700930928334767e-05, "loss": 0.1037, "step": 21006 }, { "epoch": 0.3720242443561919, "grad_norm": 0.7958300113677979, "learning_rate": 2.1700161175381305e-05, "loss": 0.086, "step": 21007 }, { "epoch": 0.37204195389322037, "grad_norm": 0.8101286292076111, "learning_rate": 2.1699391400384993e-05, "loss": 0.0961, "step": 21008 }, { "epoch": 0.37205966343024877, "grad_norm": 0.8219402432441711, "learning_rate": 2.1698621603348355e-05, "loss": 0.0996, "step": 21009 }, { "epoch": 0.3720773729672772, "grad_norm": 0.7373923063278198, "learning_rate": 2.1697851784273935e-05, "loss": 0.0565, "step": 21010 }, { "epoch": 0.3720950825043056, "grad_norm": 0.6059675216674805, "learning_rate": 2.1697081943164262e-05, "loss": 0.1294, "step": 21011 }, { "epoch": 0.3721127920413341, "grad_norm": 0.38397347927093506, "learning_rate": 2.1696312080021866e-05, "loss": 0.071, "step": 21012 }, { "epoch": 0.37213050157836247, "grad_norm": 0.8640123605728149, "learning_rate": 2.1695542194849278e-05, "loss": 0.0863, "step": 21013 }, { "epoch": 0.3721482111153909, "grad_norm": 0.47386634349823, "learning_rate": 2.1694772287649032e-05, "loss": 0.0569, "step": 21014 }, { "epoch": 0.3721659206524193, "grad_norm": 1.0248874425888062, "learning_rate": 2.1694002358423667e-05, "loss": 0.0925, "step": 21015 }, { "epoch": 0.3721836301894478, "grad_norm": 1.2440022230148315, "learning_rate": 2.1693232407175706e-05, "loss": 0.1347, "step": 21016 }, { "epoch": 0.3722013397264762, "grad_norm": 0.7513045072555542, "learning_rate": 2.1692462433907696e-05, "loss": 0.067, "step": 21017 }, { "epoch": 0.37221904926350463, "grad_norm": 0.7803346514701843, "learning_rate": 2.169169243862215e-05, "loss": 0.0877, "step": 21018 }, { "epoch": 0.37223675880053303, "grad_norm": 0.8102918267250061, "learning_rate": 2.169092242132162e-05, "loss": 0.1437, "step": 21019 }, { "epoch": 0.3722544683375615, "grad_norm": 0.8339950442314148, "learning_rate": 2.169015238200863e-05, "loss": 0.0937, "step": 21020 }, { "epoch": 0.37227217787458994, "grad_norm": 0.8243429064750671, "learning_rate": 2.1689382320685715e-05, "loss": 0.1174, "step": 21021 }, { "epoch": 0.37228988741161834, "grad_norm": 0.30009809136390686, "learning_rate": 2.1688612237355414e-05, "loss": 0.0929, "step": 21022 }, { "epoch": 0.3723075969486468, "grad_norm": 0.7892488837242126, "learning_rate": 2.1687842132020243e-05, "loss": 0.0741, "step": 21023 }, { "epoch": 0.3723253064856752, "grad_norm": 0.8856209516525269, "learning_rate": 2.168707200468276e-05, "loss": 0.0733, "step": 21024 }, { "epoch": 0.37234301602270364, "grad_norm": 1.1039150953292847, "learning_rate": 2.1686301855345483e-05, "loss": 0.0795, "step": 21025 }, { "epoch": 0.37236072555973204, "grad_norm": 1.0738693475723267, "learning_rate": 2.168553168401095e-05, "loss": 0.1268, "step": 21026 }, { "epoch": 0.3723784350967605, "grad_norm": 0.7291595935821533, "learning_rate": 2.168476149068169e-05, "loss": 0.0974, "step": 21027 }, { "epoch": 0.3723961446337889, "grad_norm": 0.6533539891242981, "learning_rate": 2.168399127536024e-05, "loss": 0.0806, "step": 21028 }, { "epoch": 0.37241385417081735, "grad_norm": 0.553142249584198, "learning_rate": 2.168322103804914e-05, "loss": 0.0979, "step": 21029 }, { "epoch": 0.37243156370784575, "grad_norm": 0.7064307332038879, "learning_rate": 2.168245077875092e-05, "loss": 0.0651, "step": 21030 }, { "epoch": 0.3724492732448742, "grad_norm": 0.6711593270301819, "learning_rate": 2.1681680497468106e-05, "loss": 0.0901, "step": 21031 }, { "epoch": 0.3724669827819026, "grad_norm": 0.7231063842773438, "learning_rate": 2.168091019420324e-05, "loss": 0.0885, "step": 21032 }, { "epoch": 0.37248469231893105, "grad_norm": 0.7682909369468689, "learning_rate": 2.168013986895886e-05, "loss": 0.0649, "step": 21033 }, { "epoch": 0.37250240185595945, "grad_norm": 0.9545804858207703, "learning_rate": 2.167936952173749e-05, "loss": 0.1133, "step": 21034 }, { "epoch": 0.3725201113929879, "grad_norm": 0.9813287258148193, "learning_rate": 2.1678599152541676e-05, "loss": 0.103, "step": 21035 }, { "epoch": 0.37253782093001636, "grad_norm": 0.5617026090621948, "learning_rate": 2.1677828761373946e-05, "loss": 0.1157, "step": 21036 }, { "epoch": 0.37255553046704476, "grad_norm": 0.9126536846160889, "learning_rate": 2.167705834823683e-05, "loss": 0.0811, "step": 21037 }, { "epoch": 0.3725732400040732, "grad_norm": 0.6644758582115173, "learning_rate": 2.1676287913132866e-05, "loss": 0.0962, "step": 21038 }, { "epoch": 0.3725909495411016, "grad_norm": 0.6129061579704285, "learning_rate": 2.167551745606459e-05, "loss": 0.096, "step": 21039 }, { "epoch": 0.37260865907813007, "grad_norm": 0.5344071984291077, "learning_rate": 2.1674746977034544e-05, "loss": 0.0928, "step": 21040 }, { "epoch": 0.37262636861515847, "grad_norm": 0.8569494485855103, "learning_rate": 2.167397647604525e-05, "loss": 0.072, "step": 21041 }, { "epoch": 0.3726440781521869, "grad_norm": 0.7939602732658386, "learning_rate": 2.167320595309925e-05, "loss": 0.0729, "step": 21042 }, { "epoch": 0.3726617876892153, "grad_norm": 0.8942878842353821, "learning_rate": 2.167243540819908e-05, "loss": 0.1094, "step": 21043 }, { "epoch": 0.37267949722624377, "grad_norm": 0.3265765905380249, "learning_rate": 2.167166484134727e-05, "loss": 0.0624, "step": 21044 }, { "epoch": 0.37269720676327217, "grad_norm": 0.8259015083312988, "learning_rate": 2.167089425254636e-05, "loss": 0.0877, "step": 21045 }, { "epoch": 0.3727149163003006, "grad_norm": 0.5656080842018127, "learning_rate": 2.1670123641798877e-05, "loss": 0.0788, "step": 21046 }, { "epoch": 0.372732625837329, "grad_norm": 0.5932666063308716, "learning_rate": 2.1669353009107364e-05, "loss": 0.0649, "step": 21047 }, { "epoch": 0.3727503353743575, "grad_norm": 0.4525701403617859, "learning_rate": 2.1668582354474353e-05, "loss": 0.0591, "step": 21048 }, { "epoch": 0.37276804491138593, "grad_norm": 0.757317841053009, "learning_rate": 2.166781167790238e-05, "loss": 0.0924, "step": 21049 }, { "epoch": 0.37278575444841433, "grad_norm": 1.02993643283844, "learning_rate": 2.1667040979393985e-05, "loss": 0.0888, "step": 21050 }, { "epoch": 0.3728034639854428, "grad_norm": 0.9802964925765991, "learning_rate": 2.16662702589517e-05, "loss": 0.0783, "step": 21051 }, { "epoch": 0.3728211735224712, "grad_norm": 0.9723767638206482, "learning_rate": 2.1665499516578057e-05, "loss": 0.0805, "step": 21052 }, { "epoch": 0.37283888305949964, "grad_norm": 0.5642349720001221, "learning_rate": 2.1664728752275594e-05, "loss": 0.0723, "step": 21053 }, { "epoch": 0.37285659259652804, "grad_norm": 0.6946759223937988, "learning_rate": 2.1663957966046853e-05, "loss": 0.0778, "step": 21054 }, { "epoch": 0.3728743021335565, "grad_norm": 0.39446157217025757, "learning_rate": 2.166318715789436e-05, "loss": 0.0576, "step": 21055 }, { "epoch": 0.3728920116705849, "grad_norm": 0.7387140393257141, "learning_rate": 2.1662416327820655e-05, "loss": 0.0643, "step": 21056 }, { "epoch": 0.37290972120761334, "grad_norm": 0.7700356841087341, "learning_rate": 2.1661645475828272e-05, "loss": 0.0861, "step": 21057 }, { "epoch": 0.37292743074464174, "grad_norm": 0.7326209545135498, "learning_rate": 2.1660874601919753e-05, "loss": 0.0907, "step": 21058 }, { "epoch": 0.3729451402816702, "grad_norm": 0.6964368224143982, "learning_rate": 2.1660103706097633e-05, "loss": 0.0793, "step": 21059 }, { "epoch": 0.3729628498186986, "grad_norm": 0.766129732131958, "learning_rate": 2.165933278836444e-05, "loss": 0.0703, "step": 21060 }, { "epoch": 0.37298055935572705, "grad_norm": 0.48279914259910583, "learning_rate": 2.1658561848722717e-05, "loss": 0.0818, "step": 21061 }, { "epoch": 0.37299826889275545, "grad_norm": 0.7839556336402893, "learning_rate": 2.1657790887174998e-05, "loss": 0.0865, "step": 21062 }, { "epoch": 0.3730159784297839, "grad_norm": 0.8396536111831665, "learning_rate": 2.1657019903723824e-05, "loss": 0.097, "step": 21063 }, { "epoch": 0.37303368796681236, "grad_norm": 0.7271554470062256, "learning_rate": 2.165624889837172e-05, "loss": 0.0623, "step": 21064 }, { "epoch": 0.37305139750384075, "grad_norm": 0.35720640420913696, "learning_rate": 2.165547787112124e-05, "loss": 0.0667, "step": 21065 }, { "epoch": 0.3730691070408692, "grad_norm": 0.7284597158432007, "learning_rate": 2.1654706821974905e-05, "loss": 0.0787, "step": 21066 }, { "epoch": 0.3730868165778976, "grad_norm": 0.5186518430709839, "learning_rate": 2.1653935750935262e-05, "loss": 0.0529, "step": 21067 }, { "epoch": 0.37310452611492606, "grad_norm": 0.7246329188346863, "learning_rate": 2.1653164658004846e-05, "loss": 0.0672, "step": 21068 }, { "epoch": 0.37312223565195446, "grad_norm": 0.6943327188491821, "learning_rate": 2.1652393543186183e-05, "loss": 0.065, "step": 21069 }, { "epoch": 0.3731399451889829, "grad_norm": 0.7323505878448486, "learning_rate": 2.165162240648182e-05, "loss": 0.0872, "step": 21070 }, { "epoch": 0.3731576547260113, "grad_norm": 0.7264659404754639, "learning_rate": 2.165085124789429e-05, "loss": 0.0706, "step": 21071 }, { "epoch": 0.37317536426303977, "grad_norm": 1.0952119827270508, "learning_rate": 2.1650080067426137e-05, "loss": 0.1091, "step": 21072 }, { "epoch": 0.37319307380006816, "grad_norm": 0.9166430234909058, "learning_rate": 2.1649308865079894e-05, "loss": 0.0762, "step": 21073 }, { "epoch": 0.3732107833370966, "grad_norm": 0.5400385856628418, "learning_rate": 2.1648537640858093e-05, "loss": 0.0703, "step": 21074 }, { "epoch": 0.373228492874125, "grad_norm": 1.4735440015792847, "learning_rate": 2.164776639476328e-05, "loss": 0.0946, "step": 21075 }, { "epoch": 0.37324620241115347, "grad_norm": 0.9125961065292358, "learning_rate": 2.1646995126797984e-05, "loss": 0.0625, "step": 21076 }, { "epoch": 0.37326391194818187, "grad_norm": 0.5876924991607666, "learning_rate": 2.1646223836964747e-05, "loss": 0.089, "step": 21077 }, { "epoch": 0.3732816214852103, "grad_norm": 0.6164084672927856, "learning_rate": 2.1645452525266107e-05, "loss": 0.0925, "step": 21078 }, { "epoch": 0.3732993310222388, "grad_norm": 0.749619722366333, "learning_rate": 2.16446811917046e-05, "loss": 0.0856, "step": 21079 }, { "epoch": 0.3733170405592672, "grad_norm": 0.6874075531959534, "learning_rate": 2.1643909836282767e-05, "loss": 0.0775, "step": 21080 }, { "epoch": 0.37333475009629563, "grad_norm": 0.3675301671028137, "learning_rate": 2.164313845900314e-05, "loss": 0.0605, "step": 21081 }, { "epoch": 0.37335245963332403, "grad_norm": 1.3223159313201904, "learning_rate": 2.1642367059868256e-05, "loss": 0.1245, "step": 21082 }, { "epoch": 0.3733701691703525, "grad_norm": 0.6471943855285645, "learning_rate": 2.164159563888066e-05, "loss": 0.0855, "step": 21083 }, { "epoch": 0.3733878787073809, "grad_norm": 0.9975151419639587, "learning_rate": 2.1640824196042886e-05, "loss": 0.0901, "step": 21084 }, { "epoch": 0.37340558824440934, "grad_norm": 1.0884448289871216, "learning_rate": 2.1640052731357468e-05, "loss": 0.1008, "step": 21085 }, { "epoch": 0.37342329778143774, "grad_norm": 0.7032083868980408, "learning_rate": 2.1639281244826955e-05, "loss": 0.1209, "step": 21086 }, { "epoch": 0.3734410073184662, "grad_norm": 0.6448361277580261, "learning_rate": 2.163850973645387e-05, "loss": 0.0615, "step": 21087 }, { "epoch": 0.3734587168554946, "grad_norm": 1.0797655582427979, "learning_rate": 2.1637738206240768e-05, "loss": 0.0888, "step": 21088 }, { "epoch": 0.37347642639252304, "grad_norm": 0.8546964526176453, "learning_rate": 2.1636966654190176e-05, "loss": 0.102, "step": 21089 }, { "epoch": 0.37349413592955144, "grad_norm": 0.6845414638519287, "learning_rate": 2.1636195080304633e-05, "loss": 0.0917, "step": 21090 }, { "epoch": 0.3735118454665799, "grad_norm": 0.5316368341445923, "learning_rate": 2.163542348458668e-05, "loss": 0.076, "step": 21091 }, { "epoch": 0.3735295550036083, "grad_norm": 0.9374380707740784, "learning_rate": 2.1634651867038853e-05, "loss": 0.1144, "step": 21092 }, { "epoch": 0.37354726454063675, "grad_norm": 0.7480871081352234, "learning_rate": 2.1633880227663698e-05, "loss": 0.0982, "step": 21093 }, { "epoch": 0.3735649740776652, "grad_norm": 0.474041610956192, "learning_rate": 2.1633108566463742e-05, "loss": 0.0899, "step": 21094 }, { "epoch": 0.3735826836146936, "grad_norm": 0.6863364577293396, "learning_rate": 2.163233688344153e-05, "loss": 0.0762, "step": 21095 }, { "epoch": 0.37360039315172205, "grad_norm": 0.4015797972679138, "learning_rate": 2.1631565178599603e-05, "loss": 0.062, "step": 21096 }, { "epoch": 0.37361810268875045, "grad_norm": 0.7581515312194824, "learning_rate": 2.16307934519405e-05, "loss": 0.0765, "step": 21097 }, { "epoch": 0.3736358122257789, "grad_norm": 1.0349361896514893, "learning_rate": 2.1630021703466753e-05, "loss": 0.1153, "step": 21098 }, { "epoch": 0.3736535217628073, "grad_norm": 0.9280045032501221, "learning_rate": 2.1629249933180904e-05, "loss": 0.1084, "step": 21099 }, { "epoch": 0.37367123129983576, "grad_norm": 0.41668057441711426, "learning_rate": 2.16284781410855e-05, "loss": 0.0706, "step": 21100 }, { "epoch": 0.37368894083686416, "grad_norm": 1.1586755514144897, "learning_rate": 2.1627706327183067e-05, "loss": 0.0971, "step": 21101 }, { "epoch": 0.3737066503738926, "grad_norm": 0.6432660222053528, "learning_rate": 2.1626934491476154e-05, "loss": 0.0571, "step": 21102 }, { "epoch": 0.373724359910921, "grad_norm": 0.7094681859016418, "learning_rate": 2.1626162633967297e-05, "loss": 0.1159, "step": 21103 }, { "epoch": 0.37374206944794947, "grad_norm": 0.5086770057678223, "learning_rate": 2.162539075465903e-05, "loss": 0.0619, "step": 21104 }, { "epoch": 0.37375977898497786, "grad_norm": 0.6739053726196289, "learning_rate": 2.16246188535539e-05, "loss": 0.0837, "step": 21105 }, { "epoch": 0.3737774885220063, "grad_norm": 0.7253195643424988, "learning_rate": 2.1623846930654446e-05, "loss": 0.0954, "step": 21106 }, { "epoch": 0.3737951980590347, "grad_norm": 0.6856134533882141, "learning_rate": 2.162307498596321e-05, "loss": 0.0863, "step": 21107 }, { "epoch": 0.37381290759606317, "grad_norm": 0.6856017112731934, "learning_rate": 2.1622303019482716e-05, "loss": 0.074, "step": 21108 }, { "epoch": 0.3738306171330916, "grad_norm": 0.44502344727516174, "learning_rate": 2.162153103121552e-05, "loss": 0.0497, "step": 21109 }, { "epoch": 0.37384832667012, "grad_norm": 0.6096222996711731, "learning_rate": 2.1620759021164158e-05, "loss": 0.0556, "step": 21110 }, { "epoch": 0.3738660362071485, "grad_norm": 0.608140766620636, "learning_rate": 2.161998698933117e-05, "loss": 0.0559, "step": 21111 }, { "epoch": 0.3738837457441769, "grad_norm": 0.6140830516815186, "learning_rate": 2.1619214935719092e-05, "loss": 0.0905, "step": 21112 }, { "epoch": 0.37390145528120533, "grad_norm": 0.689419686794281, "learning_rate": 2.1618442860330464e-05, "loss": 0.0602, "step": 21113 }, { "epoch": 0.37391916481823373, "grad_norm": 0.5259314775466919, "learning_rate": 2.161767076316783e-05, "loss": 0.1209, "step": 21114 }, { "epoch": 0.3739368743552622, "grad_norm": 0.9592459201812744, "learning_rate": 2.161689864423373e-05, "loss": 0.0727, "step": 21115 }, { "epoch": 0.3739545838922906, "grad_norm": 0.888365626335144, "learning_rate": 2.1616126503530705e-05, "loss": 0.082, "step": 21116 }, { "epoch": 0.37397229342931904, "grad_norm": 0.9854471683502197, "learning_rate": 2.161535434106129e-05, "loss": 0.089, "step": 21117 }, { "epoch": 0.37399000296634743, "grad_norm": 0.5135677456855774, "learning_rate": 2.1614582156828027e-05, "loss": 0.0747, "step": 21118 }, { "epoch": 0.3740077125033759, "grad_norm": 1.1466865539550781, "learning_rate": 2.1613809950833458e-05, "loss": 0.0823, "step": 21119 }, { "epoch": 0.3740254220404043, "grad_norm": 0.8646061420440674, "learning_rate": 2.1613037723080124e-05, "loss": 0.0819, "step": 21120 }, { "epoch": 0.37404313157743274, "grad_norm": 0.9326533079147339, "learning_rate": 2.1612265473570565e-05, "loss": 0.0662, "step": 21121 }, { "epoch": 0.37406084111446114, "grad_norm": 0.6056633591651917, "learning_rate": 2.1611493202307323e-05, "loss": 0.0645, "step": 21122 }, { "epoch": 0.3740785506514896, "grad_norm": 0.5893921852111816, "learning_rate": 2.1610720909292933e-05, "loss": 0.0728, "step": 21123 }, { "epoch": 0.37409626018851805, "grad_norm": 0.6848343014717102, "learning_rate": 2.1609948594529945e-05, "loss": 0.064, "step": 21124 }, { "epoch": 0.37411396972554645, "grad_norm": 1.0706902742385864, "learning_rate": 2.1609176258020894e-05, "loss": 0.0908, "step": 21125 }, { "epoch": 0.3741316792625749, "grad_norm": 0.5502674579620361, "learning_rate": 2.1608403899768317e-05, "loss": 0.0607, "step": 21126 }, { "epoch": 0.3741493887996033, "grad_norm": 0.6796875596046448, "learning_rate": 2.1607631519774766e-05, "loss": 0.0883, "step": 21127 }, { "epoch": 0.37416709833663175, "grad_norm": 0.8598068356513977, "learning_rate": 2.1606859118042765e-05, "loss": 0.0964, "step": 21128 }, { "epoch": 0.37418480787366015, "grad_norm": 0.7498263120651245, "learning_rate": 2.1606086694574875e-05, "loss": 0.1051, "step": 21129 }, { "epoch": 0.3742025174106886, "grad_norm": 0.5555641651153564, "learning_rate": 2.1605314249373628e-05, "loss": 0.0597, "step": 21130 }, { "epoch": 0.374220226947717, "grad_norm": 0.38227760791778564, "learning_rate": 2.1604541782441564e-05, "loss": 0.074, "step": 21131 }, { "epoch": 0.37423793648474546, "grad_norm": 1.1491706371307373, "learning_rate": 2.160376929378122e-05, "loss": 0.062, "step": 21132 }, { "epoch": 0.37425564602177386, "grad_norm": 0.7547751665115356, "learning_rate": 2.1602996783395147e-05, "loss": 0.0789, "step": 21133 }, { "epoch": 0.3742733555588023, "grad_norm": 0.7200036644935608, "learning_rate": 2.1602224251285887e-05, "loss": 0.0742, "step": 21134 }, { "epoch": 0.3742910650958307, "grad_norm": 0.8769203424453735, "learning_rate": 2.1601451697455967e-05, "loss": 0.0915, "step": 21135 }, { "epoch": 0.37430877463285916, "grad_norm": 0.8293478488922119, "learning_rate": 2.160067912190795e-05, "loss": 0.0765, "step": 21136 }, { "epoch": 0.37432648416988756, "grad_norm": 0.6595039367675781, "learning_rate": 2.1599906524644357e-05, "loss": 0.0801, "step": 21137 }, { "epoch": 0.374344193706916, "grad_norm": 1.0383187532424927, "learning_rate": 2.1599133905667744e-05, "loss": 0.1048, "step": 21138 }, { "epoch": 0.37436190324394447, "grad_norm": 0.671323835849762, "learning_rate": 2.1598361264980647e-05, "loss": 0.0745, "step": 21139 }, { "epoch": 0.37437961278097287, "grad_norm": 1.555533766746521, "learning_rate": 2.1597588602585607e-05, "loss": 0.1073, "step": 21140 }, { "epoch": 0.3743973223180013, "grad_norm": 0.7684475183486938, "learning_rate": 2.159681591848517e-05, "loss": 0.0441, "step": 21141 }, { "epoch": 0.3744150318550297, "grad_norm": 0.8084821701049805, "learning_rate": 2.1596043212681878e-05, "loss": 0.0893, "step": 21142 }, { "epoch": 0.3744327413920582, "grad_norm": 0.6547553539276123, "learning_rate": 2.1595270485178264e-05, "loss": 0.072, "step": 21143 }, { "epoch": 0.3744504509290866, "grad_norm": 1.0158963203430176, "learning_rate": 2.1594497735976882e-05, "loss": 0.0773, "step": 21144 }, { "epoch": 0.37446816046611503, "grad_norm": 0.7374564409255981, "learning_rate": 2.159372496508027e-05, "loss": 0.0908, "step": 21145 }, { "epoch": 0.37448587000314343, "grad_norm": 0.6292663216590881, "learning_rate": 2.1592952172490972e-05, "loss": 0.0629, "step": 21146 }, { "epoch": 0.3745035795401719, "grad_norm": 0.6462604999542236, "learning_rate": 2.1592179358211516e-05, "loss": 0.0997, "step": 21147 }, { "epoch": 0.3745212890772003, "grad_norm": 1.2572910785675049, "learning_rate": 2.1591406522244467e-05, "loss": 0.0642, "step": 21148 }, { "epoch": 0.37453899861422874, "grad_norm": 1.9980946779251099, "learning_rate": 2.1590633664592357e-05, "loss": 0.1231, "step": 21149 }, { "epoch": 0.37455670815125713, "grad_norm": 0.46783626079559326, "learning_rate": 2.158986078525773e-05, "loss": 0.1061, "step": 21150 }, { "epoch": 0.3745744176882856, "grad_norm": 0.7184494733810425, "learning_rate": 2.1589087884243122e-05, "loss": 0.0812, "step": 21151 }, { "epoch": 0.374592127225314, "grad_norm": 1.7198227643966675, "learning_rate": 2.1588314961551083e-05, "loss": 0.091, "step": 21152 }, { "epoch": 0.37460983676234244, "grad_norm": 0.9704546928405762, "learning_rate": 2.1587542017184155e-05, "loss": 0.0935, "step": 21153 }, { "epoch": 0.3746275462993709, "grad_norm": 0.6561893820762634, "learning_rate": 2.1586769051144883e-05, "loss": 0.0758, "step": 21154 }, { "epoch": 0.3746452558363993, "grad_norm": 0.7255266904830933, "learning_rate": 2.1585996063435807e-05, "loss": 0.0667, "step": 21155 }, { "epoch": 0.37466296537342775, "grad_norm": 0.8031526803970337, "learning_rate": 2.1585223054059463e-05, "loss": 0.0916, "step": 21156 }, { "epoch": 0.37468067491045615, "grad_norm": 0.7887926697731018, "learning_rate": 2.1584450023018408e-05, "loss": 0.0844, "step": 21157 }, { "epoch": 0.3746983844474846, "grad_norm": 0.5163352489471436, "learning_rate": 2.1583676970315173e-05, "loss": 0.0502, "step": 21158 }, { "epoch": 0.374716093984513, "grad_norm": 0.8977475166320801, "learning_rate": 2.1582903895952317e-05, "loss": 0.1159, "step": 21159 }, { "epoch": 0.37473380352154145, "grad_norm": 0.7538403868675232, "learning_rate": 2.158213079993236e-05, "loss": 0.1026, "step": 21160 }, { "epoch": 0.37475151305856985, "grad_norm": 0.7926469445228577, "learning_rate": 2.1581357682257865e-05, "loss": 0.0773, "step": 21161 }, { "epoch": 0.3747692225955983, "grad_norm": 0.8039175868034363, "learning_rate": 2.158058454293137e-05, "loss": 0.0908, "step": 21162 }, { "epoch": 0.3747869321326267, "grad_norm": 0.7146045565605164, "learning_rate": 2.1579811381955413e-05, "loss": 0.0755, "step": 21163 }, { "epoch": 0.37480464166965516, "grad_norm": 0.7266887426376343, "learning_rate": 2.1579038199332547e-05, "loss": 0.0606, "step": 21164 }, { "epoch": 0.37482235120668356, "grad_norm": 0.7171911001205444, "learning_rate": 2.1578264995065305e-05, "loss": 0.0752, "step": 21165 }, { "epoch": 0.374840060743712, "grad_norm": 0.9690918922424316, "learning_rate": 2.1577491769156243e-05, "loss": 0.1094, "step": 21166 }, { "epoch": 0.3748577702807404, "grad_norm": 0.9018309116363525, "learning_rate": 2.1576718521607895e-05, "loss": 0.1015, "step": 21167 }, { "epoch": 0.37487547981776886, "grad_norm": 0.9040161371231079, "learning_rate": 2.157594525242281e-05, "loss": 0.07, "step": 21168 }, { "epoch": 0.3748931893547973, "grad_norm": 0.9230950474739075, "learning_rate": 2.1575171961603525e-05, "loss": 0.094, "step": 21169 }, { "epoch": 0.3749108988918257, "grad_norm": 0.7560517191886902, "learning_rate": 2.1574398649152596e-05, "loss": 0.1019, "step": 21170 }, { "epoch": 0.37492860842885417, "grad_norm": 0.66535884141922, "learning_rate": 2.1573625315072554e-05, "loss": 0.0616, "step": 21171 }, { "epoch": 0.37494631796588257, "grad_norm": 0.6671314835548401, "learning_rate": 2.1572851959365956e-05, "loss": 0.1147, "step": 21172 }, { "epoch": 0.374964027502911, "grad_norm": 0.6511019468307495, "learning_rate": 2.1572078582035335e-05, "loss": 0.0617, "step": 21173 }, { "epoch": 0.3749817370399394, "grad_norm": 0.772153377532959, "learning_rate": 2.1571305183083242e-05, "loss": 0.0896, "step": 21174 }, { "epoch": 0.3749994465769679, "grad_norm": 0.5802702307701111, "learning_rate": 2.1570531762512223e-05, "loss": 0.0869, "step": 21175 }, { "epoch": 0.3750171561139963, "grad_norm": 0.6906421184539795, "learning_rate": 2.156975832032481e-05, "loss": 0.0675, "step": 21176 }, { "epoch": 0.37503486565102473, "grad_norm": 0.8771696090698242, "learning_rate": 2.1568984856523564e-05, "loss": 0.0873, "step": 21177 }, { "epoch": 0.37505257518805313, "grad_norm": 0.9224531054496765, "learning_rate": 2.1568211371111024e-05, "loss": 0.0989, "step": 21178 }, { "epoch": 0.3750702847250816, "grad_norm": 0.8934210538864136, "learning_rate": 2.156743786408973e-05, "loss": 0.0894, "step": 21179 }, { "epoch": 0.37508799426211, "grad_norm": 0.654066801071167, "learning_rate": 2.1566664335462225e-05, "loss": 0.0741, "step": 21180 }, { "epoch": 0.37510570379913843, "grad_norm": 1.3684501647949219, "learning_rate": 2.156589078523106e-05, "loss": 0.1366, "step": 21181 }, { "epoch": 0.37512341333616683, "grad_norm": 0.5494108200073242, "learning_rate": 2.1565117213398785e-05, "loss": 0.0955, "step": 21182 }, { "epoch": 0.3751411228731953, "grad_norm": 0.7259804010391235, "learning_rate": 2.1564343619967934e-05, "loss": 0.1044, "step": 21183 }, { "epoch": 0.37515883241022374, "grad_norm": 0.7558956146240234, "learning_rate": 2.1563570004941058e-05, "loss": 0.0687, "step": 21184 }, { "epoch": 0.37517654194725214, "grad_norm": 1.132266879081726, "learning_rate": 2.15627963683207e-05, "loss": 0.0956, "step": 21185 }, { "epoch": 0.3751942514842806, "grad_norm": 1.0363937616348267, "learning_rate": 2.1562022710109405e-05, "loss": 0.0804, "step": 21186 }, { "epoch": 0.375211961021309, "grad_norm": 0.8232042193412781, "learning_rate": 2.1561249030309722e-05, "loss": 0.0933, "step": 21187 }, { "epoch": 0.37522967055833745, "grad_norm": 0.830522894859314, "learning_rate": 2.1560475328924187e-05, "loss": 0.1057, "step": 21188 }, { "epoch": 0.37524738009536585, "grad_norm": 0.6107243299484253, "learning_rate": 2.155970160595536e-05, "loss": 0.0738, "step": 21189 }, { "epoch": 0.3752650896323943, "grad_norm": 0.6158187985420227, "learning_rate": 2.1558927861405767e-05, "loss": 0.0617, "step": 21190 }, { "epoch": 0.3752827991694227, "grad_norm": 0.8592634201049805, "learning_rate": 2.1558154095277973e-05, "loss": 0.1183, "step": 21191 }, { "epoch": 0.37530050870645115, "grad_norm": 0.6385435461997986, "learning_rate": 2.1557380307574512e-05, "loss": 0.0588, "step": 21192 }, { "epoch": 0.37531821824347955, "grad_norm": 0.6957159638404846, "learning_rate": 2.1556606498297935e-05, "loss": 0.0946, "step": 21193 }, { "epoch": 0.375335927780508, "grad_norm": 0.8034921884536743, "learning_rate": 2.1555832667450787e-05, "loss": 0.0677, "step": 21194 }, { "epoch": 0.3753536373175364, "grad_norm": 0.7566574811935425, "learning_rate": 2.1555058815035608e-05, "loss": 0.0864, "step": 21195 }, { "epoch": 0.37537134685456486, "grad_norm": 0.6018345952033997, "learning_rate": 2.1554284941054957e-05, "loss": 0.0631, "step": 21196 }, { "epoch": 0.37538905639159326, "grad_norm": 0.6980656981468201, "learning_rate": 2.155351104551136e-05, "loss": 0.0697, "step": 21197 }, { "epoch": 0.3754067659286217, "grad_norm": 0.5605973601341248, "learning_rate": 2.1552737128407385e-05, "loss": 0.0628, "step": 21198 }, { "epoch": 0.37542447546565016, "grad_norm": 1.165968656539917, "learning_rate": 2.155196318974556e-05, "loss": 0.1177, "step": 21199 }, { "epoch": 0.37544218500267856, "grad_norm": 0.9599081873893738, "learning_rate": 2.155118922952844e-05, "loss": 0.0952, "step": 21200 }, { "epoch": 0.375459894539707, "grad_norm": 0.927522599697113, "learning_rate": 2.1550415247758574e-05, "loss": 0.0742, "step": 21201 }, { "epoch": 0.3754776040767354, "grad_norm": 1.343461275100708, "learning_rate": 2.1549641244438503e-05, "loss": 0.0599, "step": 21202 }, { "epoch": 0.37549531361376387, "grad_norm": 0.7285616993904114, "learning_rate": 2.1548867219570777e-05, "loss": 0.0912, "step": 21203 }, { "epoch": 0.37551302315079227, "grad_norm": 0.7607535123825073, "learning_rate": 2.1548093173157934e-05, "loss": 0.0867, "step": 21204 }, { "epoch": 0.3755307326878207, "grad_norm": 0.6817860007286072, "learning_rate": 2.154731910520253e-05, "loss": 0.1263, "step": 21205 }, { "epoch": 0.3755484422248491, "grad_norm": 0.8758338093757629, "learning_rate": 2.154654501570711e-05, "loss": 0.0724, "step": 21206 }, { "epoch": 0.3755661517618776, "grad_norm": 1.1227680444717407, "learning_rate": 2.1545770904674214e-05, "loss": 0.1041, "step": 21207 }, { "epoch": 0.375583861298906, "grad_norm": 1.0746994018554688, "learning_rate": 2.15449967721064e-05, "loss": 0.1036, "step": 21208 }, { "epoch": 0.37560157083593443, "grad_norm": 0.7219234108924866, "learning_rate": 2.1544222618006205e-05, "loss": 0.0862, "step": 21209 }, { "epoch": 0.3756192803729628, "grad_norm": 0.8185597658157349, "learning_rate": 2.1543448442376183e-05, "loss": 0.1164, "step": 21210 }, { "epoch": 0.3756369899099913, "grad_norm": 1.0309832096099854, "learning_rate": 2.1542674245218873e-05, "loss": 0.1176, "step": 21211 }, { "epoch": 0.3756546994470197, "grad_norm": 0.4803389608860016, "learning_rate": 2.154190002653683e-05, "loss": 0.0837, "step": 21212 }, { "epoch": 0.37567240898404813, "grad_norm": 0.4616715610027313, "learning_rate": 2.1541125786332592e-05, "loss": 0.0739, "step": 21213 }, { "epoch": 0.3756901185210766, "grad_norm": 0.5473361015319824, "learning_rate": 2.1540351524608715e-05, "loss": 0.0648, "step": 21214 }, { "epoch": 0.375707828058105, "grad_norm": 0.40150871872901917, "learning_rate": 2.1539577241367744e-05, "loss": 0.104, "step": 21215 }, { "epoch": 0.37572553759513344, "grad_norm": 0.7016949653625488, "learning_rate": 2.1538802936612224e-05, "loss": 0.0763, "step": 21216 }, { "epoch": 0.37574324713216184, "grad_norm": 0.7633509635925293, "learning_rate": 2.1538028610344707e-05, "loss": 0.1036, "step": 21217 }, { "epoch": 0.3757609566691903, "grad_norm": 0.7744438648223877, "learning_rate": 2.1537254262567737e-05, "loss": 0.0926, "step": 21218 }, { "epoch": 0.3757786662062187, "grad_norm": 0.453958660364151, "learning_rate": 2.153647989328386e-05, "loss": 0.0794, "step": 21219 }, { "epoch": 0.37579637574324715, "grad_norm": 0.37390705943107605, "learning_rate": 2.153570550249562e-05, "loss": 0.084, "step": 21220 }, { "epoch": 0.37581408528027554, "grad_norm": 0.4651065766811371, "learning_rate": 2.1534931090205578e-05, "loss": 0.1115, "step": 21221 }, { "epoch": 0.375831794817304, "grad_norm": 0.8345924019813538, "learning_rate": 2.153415665641627e-05, "loss": 0.0541, "step": 21222 }, { "epoch": 0.3758495043543324, "grad_norm": 0.9936767816543579, "learning_rate": 2.1533382201130244e-05, "loss": 0.0862, "step": 21223 }, { "epoch": 0.37586721389136085, "grad_norm": 0.928602933883667, "learning_rate": 2.1532607724350057e-05, "loss": 0.1333, "step": 21224 }, { "epoch": 0.37588492342838925, "grad_norm": 0.6927741765975952, "learning_rate": 2.153183322607825e-05, "loss": 0.0708, "step": 21225 }, { "epoch": 0.3759026329654177, "grad_norm": 0.8880906701087952, "learning_rate": 2.1531058706317376e-05, "loss": 0.1116, "step": 21226 }, { "epoch": 0.3759203425024461, "grad_norm": 0.5312791466712952, "learning_rate": 2.1530284165069973e-05, "loss": 0.0936, "step": 21227 }, { "epoch": 0.37593805203947456, "grad_norm": 3.0531809329986572, "learning_rate": 2.1529509602338596e-05, "loss": 0.0824, "step": 21228 }, { "epoch": 0.375955761576503, "grad_norm": 0.5987738966941833, "learning_rate": 2.1528735018125795e-05, "loss": 0.081, "step": 21229 }, { "epoch": 0.3759734711135314, "grad_norm": 0.5501540899276733, "learning_rate": 2.1527960412434117e-05, "loss": 0.0821, "step": 21230 }, { "epoch": 0.37599118065055986, "grad_norm": 1.0103275775909424, "learning_rate": 2.1527185785266107e-05, "loss": 0.0858, "step": 21231 }, { "epoch": 0.37600889018758826, "grad_norm": 0.9996145963668823, "learning_rate": 2.152641113662432e-05, "loss": 0.0848, "step": 21232 }, { "epoch": 0.3760265997246167, "grad_norm": 0.6823638081550598, "learning_rate": 2.1525636466511295e-05, "loss": 0.0724, "step": 21233 }, { "epoch": 0.3760443092616451, "grad_norm": 0.795820951461792, "learning_rate": 2.1524861774929587e-05, "loss": 0.0844, "step": 21234 }, { "epoch": 0.37606201879867357, "grad_norm": 0.6719580888748169, "learning_rate": 2.1524087061881748e-05, "loss": 0.0883, "step": 21235 }, { "epoch": 0.37607972833570197, "grad_norm": 0.9297149777412415, "learning_rate": 2.152331232737032e-05, "loss": 0.0762, "step": 21236 }, { "epoch": 0.3760974378727304, "grad_norm": 1.1182399988174438, "learning_rate": 2.152253757139786e-05, "loss": 0.0784, "step": 21237 }, { "epoch": 0.3761151474097588, "grad_norm": 0.7376518845558167, "learning_rate": 2.1521762793966896e-05, "loss": 0.0623, "step": 21238 }, { "epoch": 0.3761328569467873, "grad_norm": 0.6217880249023438, "learning_rate": 2.1520987995080002e-05, "loss": 0.0472, "step": 21239 }, { "epoch": 0.3761505664838157, "grad_norm": 0.6467828750610352, "learning_rate": 2.1520213174739716e-05, "loss": 0.0565, "step": 21240 }, { "epoch": 0.37616827602084413, "grad_norm": 1.3747888803482056, "learning_rate": 2.1519438332948594e-05, "loss": 0.1053, "step": 21241 }, { "epoch": 0.3761859855578725, "grad_norm": 0.7047738432884216, "learning_rate": 2.151866346970917e-05, "loss": 0.0879, "step": 21242 }, { "epoch": 0.376203695094901, "grad_norm": 0.8604934811592102, "learning_rate": 2.1517888585024006e-05, "loss": 0.1174, "step": 21243 }, { "epoch": 0.37622140463192943, "grad_norm": 0.9160570502281189, "learning_rate": 2.1517113678895654e-05, "loss": 0.0739, "step": 21244 }, { "epoch": 0.37623911416895783, "grad_norm": 0.8548198342323303, "learning_rate": 2.1516338751326644e-05, "loss": 0.1026, "step": 21245 }, { "epoch": 0.3762568237059863, "grad_norm": 0.8045499324798584, "learning_rate": 2.151556380231955e-05, "loss": 0.0646, "step": 21246 }, { "epoch": 0.3762745332430147, "grad_norm": 0.534817099571228, "learning_rate": 2.1514788831876905e-05, "loss": 0.0876, "step": 21247 }, { "epoch": 0.37629224278004314, "grad_norm": 0.7865324020385742, "learning_rate": 2.1514013840001266e-05, "loss": 0.0903, "step": 21248 }, { "epoch": 0.37630995231707154, "grad_norm": 0.45739680528640747, "learning_rate": 2.151323882669518e-05, "loss": 0.118, "step": 21249 }, { "epoch": 0.3763276618541, "grad_norm": 0.7038936614990234, "learning_rate": 2.1512463791961193e-05, "loss": 0.077, "step": 21250 }, { "epoch": 0.3763453713911284, "grad_norm": 0.6932259798049927, "learning_rate": 2.1511688735801868e-05, "loss": 0.091, "step": 21251 }, { "epoch": 0.37636308092815685, "grad_norm": 0.6563907265663147, "learning_rate": 2.1510913658219732e-05, "loss": 0.0759, "step": 21252 }, { "epoch": 0.37638079046518524, "grad_norm": 0.6131801605224609, "learning_rate": 2.151013855921736e-05, "loss": 0.0793, "step": 21253 }, { "epoch": 0.3763985000022137, "grad_norm": 0.7313022613525391, "learning_rate": 2.1509363438797283e-05, "loss": 0.0786, "step": 21254 }, { "epoch": 0.3764162095392421, "grad_norm": 0.5654155015945435, "learning_rate": 2.1508588296962064e-05, "loss": 0.0771, "step": 21255 }, { "epoch": 0.37643391907627055, "grad_norm": 0.7648240327835083, "learning_rate": 2.1507813133714244e-05, "loss": 0.0908, "step": 21256 }, { "epoch": 0.37645162861329895, "grad_norm": 0.9462639093399048, "learning_rate": 2.1507037949056382e-05, "loss": 0.1131, "step": 21257 }, { "epoch": 0.3764693381503274, "grad_norm": 0.5365093350410461, "learning_rate": 2.150626274299102e-05, "loss": 0.0762, "step": 21258 }, { "epoch": 0.37648704768735586, "grad_norm": 0.45740339159965515, "learning_rate": 2.1505487515520712e-05, "loss": 0.1039, "step": 21259 }, { "epoch": 0.37650475722438426, "grad_norm": 0.502655029296875, "learning_rate": 2.1504712266648012e-05, "loss": 0.0622, "step": 21260 }, { "epoch": 0.3765224667614127, "grad_norm": 0.8365009427070618, "learning_rate": 2.150393699637546e-05, "loss": 0.0762, "step": 21261 }, { "epoch": 0.3765401762984411, "grad_norm": 0.6795560717582703, "learning_rate": 2.1503161704705616e-05, "loss": 0.0868, "step": 21262 }, { "epoch": 0.37655788583546956, "grad_norm": 0.8339605927467346, "learning_rate": 2.1502386391641026e-05, "loss": 0.0774, "step": 21263 }, { "epoch": 0.37657559537249796, "grad_norm": 0.8368531465530396, "learning_rate": 2.1501611057184246e-05, "loss": 0.1359, "step": 21264 }, { "epoch": 0.3765933049095264, "grad_norm": 0.5782878994941711, "learning_rate": 2.1500835701337824e-05, "loss": 0.054, "step": 21265 }, { "epoch": 0.3766110144465548, "grad_norm": 0.7142383456230164, "learning_rate": 2.1500060324104304e-05, "loss": 0.0828, "step": 21266 }, { "epoch": 0.37662872398358327, "grad_norm": 0.5619484782218933, "learning_rate": 2.1499284925486246e-05, "loss": 0.0909, "step": 21267 }, { "epoch": 0.37664643352061167, "grad_norm": 0.8936901092529297, "learning_rate": 2.14985095054862e-05, "loss": 0.0762, "step": 21268 }, { "epoch": 0.3766641430576401, "grad_norm": 0.7383111715316772, "learning_rate": 2.149773406410671e-05, "loss": 0.0799, "step": 21269 }, { "epoch": 0.3766818525946685, "grad_norm": 0.8188033103942871, "learning_rate": 2.1496958601350336e-05, "loss": 0.074, "step": 21270 }, { "epoch": 0.376699562131697, "grad_norm": 0.5492680668830872, "learning_rate": 2.1496183117219625e-05, "loss": 0.0375, "step": 21271 }, { "epoch": 0.3767172716687254, "grad_norm": 0.6310580372810364, "learning_rate": 2.149540761171713e-05, "loss": 0.1097, "step": 21272 }, { "epoch": 0.3767349812057538, "grad_norm": 0.5521525144577026, "learning_rate": 2.14946320848454e-05, "loss": 0.0705, "step": 21273 }, { "epoch": 0.3767526907427823, "grad_norm": 0.607586681842804, "learning_rate": 2.1493856536606987e-05, "loss": 0.0866, "step": 21274 }, { "epoch": 0.3767704002798107, "grad_norm": 0.4866723418235779, "learning_rate": 2.1493080967004442e-05, "loss": 0.0725, "step": 21275 }, { "epoch": 0.37678810981683913, "grad_norm": 0.18999701738357544, "learning_rate": 2.1492305376040315e-05, "loss": 0.0993, "step": 21276 }, { "epoch": 0.37680581935386753, "grad_norm": 0.81760174036026, "learning_rate": 2.1491529763717162e-05, "loss": 0.0813, "step": 21277 }, { "epoch": 0.376823528890896, "grad_norm": 0.5837575793266296, "learning_rate": 2.1490754130037537e-05, "loss": 0.0839, "step": 21278 }, { "epoch": 0.3768412384279244, "grad_norm": 0.6778815984725952, "learning_rate": 2.1489978475003984e-05, "loss": 0.1363, "step": 21279 }, { "epoch": 0.37685894796495284, "grad_norm": 0.4858318567276001, "learning_rate": 2.1489202798619058e-05, "loss": 0.0774, "step": 21280 }, { "epoch": 0.37687665750198124, "grad_norm": 0.6666743159294128, "learning_rate": 2.148842710088531e-05, "loss": 0.0795, "step": 21281 }, { "epoch": 0.3768943670390097, "grad_norm": 0.5230711102485657, "learning_rate": 2.1487651381805292e-05, "loss": 0.0562, "step": 21282 }, { "epoch": 0.3769120765760381, "grad_norm": 1.034220814704895, "learning_rate": 2.148687564138156e-05, "loss": 0.0773, "step": 21283 }, { "epoch": 0.37692978611306655, "grad_norm": 0.857871949672699, "learning_rate": 2.1486099879616663e-05, "loss": 0.0914, "step": 21284 }, { "epoch": 0.37694749565009494, "grad_norm": 0.8536893725395203, "learning_rate": 2.148532409651315e-05, "loss": 0.0847, "step": 21285 }, { "epoch": 0.3769652051871234, "grad_norm": 0.682207465171814, "learning_rate": 2.148454829207358e-05, "loss": 0.1185, "step": 21286 }, { "epoch": 0.3769829147241518, "grad_norm": 0.6520200371742249, "learning_rate": 2.1483772466300503e-05, "loss": 0.0623, "step": 21287 }, { "epoch": 0.37700062426118025, "grad_norm": 0.873656690120697, "learning_rate": 2.1482996619196473e-05, "loss": 0.0843, "step": 21288 }, { "epoch": 0.3770183337982087, "grad_norm": 0.7405787110328674, "learning_rate": 2.1482220750764035e-05, "loss": 0.0804, "step": 21289 }, { "epoch": 0.3770360433352371, "grad_norm": 0.6117566823959351, "learning_rate": 2.1481444861005746e-05, "loss": 0.0882, "step": 21290 }, { "epoch": 0.37705375287226556, "grad_norm": 0.5582040548324585, "learning_rate": 2.1480668949924162e-05, "loss": 0.0885, "step": 21291 }, { "epoch": 0.37707146240929396, "grad_norm": 1.0954036712646484, "learning_rate": 2.147989301752183e-05, "loss": 0.1087, "step": 21292 }, { "epoch": 0.3770891719463224, "grad_norm": 0.5909628868103027, "learning_rate": 2.1479117063801306e-05, "loss": 0.0653, "step": 21293 }, { "epoch": 0.3771068814833508, "grad_norm": 0.8482322096824646, "learning_rate": 2.1478341088765146e-05, "loss": 0.0808, "step": 21294 }, { "epoch": 0.37712459102037926, "grad_norm": 0.7086260914802551, "learning_rate": 2.1477565092415892e-05, "loss": 0.0849, "step": 21295 }, { "epoch": 0.37714230055740766, "grad_norm": 0.46926113963127136, "learning_rate": 2.147678907475611e-05, "loss": 0.0843, "step": 21296 }, { "epoch": 0.3771600100944361, "grad_norm": 0.7283793687820435, "learning_rate": 2.1476013035788346e-05, "loss": 0.0776, "step": 21297 }, { "epoch": 0.3771777196314645, "grad_norm": 0.8216617107391357, "learning_rate": 2.1475236975515153e-05, "loss": 0.0891, "step": 21298 }, { "epoch": 0.37719542916849297, "grad_norm": 0.7469027638435364, "learning_rate": 2.1474460893939084e-05, "loss": 0.0721, "step": 21299 }, { "epoch": 0.37721313870552137, "grad_norm": 0.9752880334854126, "learning_rate": 2.147368479106269e-05, "loss": 0.0916, "step": 21300 }, { "epoch": 0.3772308482425498, "grad_norm": 0.6915171146392822, "learning_rate": 2.1472908666888536e-05, "loss": 0.1047, "step": 21301 }, { "epoch": 0.3772485577795782, "grad_norm": 0.7711033821105957, "learning_rate": 2.1472132521419162e-05, "loss": 0.0879, "step": 21302 }, { "epoch": 0.3772662673166067, "grad_norm": 0.695536196231842, "learning_rate": 2.147135635465713e-05, "loss": 0.1284, "step": 21303 }, { "epoch": 0.37728397685363513, "grad_norm": 0.9527027010917664, "learning_rate": 2.1470580166604987e-05, "loss": 0.116, "step": 21304 }, { "epoch": 0.3773016863906635, "grad_norm": 0.9048089981079102, "learning_rate": 2.146980395726529e-05, "loss": 0.1191, "step": 21305 }, { "epoch": 0.377319395927692, "grad_norm": 0.7660486102104187, "learning_rate": 2.1469027726640598e-05, "loss": 0.1106, "step": 21306 }, { "epoch": 0.3773371054647204, "grad_norm": 0.5234358310699463, "learning_rate": 2.146825147473345e-05, "loss": 0.1081, "step": 21307 }, { "epoch": 0.37735481500174883, "grad_norm": 0.9245859980583191, "learning_rate": 2.1467475201546417e-05, "loss": 0.1123, "step": 21308 }, { "epoch": 0.37737252453877723, "grad_norm": 0.9475942850112915, "learning_rate": 2.146669890708204e-05, "loss": 0.0897, "step": 21309 }, { "epoch": 0.3773902340758057, "grad_norm": 0.5169289708137512, "learning_rate": 2.1465922591342876e-05, "loss": 0.0646, "step": 21310 }, { "epoch": 0.3774079436128341, "grad_norm": 0.4755846858024597, "learning_rate": 2.1465146254331483e-05, "loss": 0.0913, "step": 21311 }, { "epoch": 0.37742565314986254, "grad_norm": 1.0942522287368774, "learning_rate": 2.1464369896050418e-05, "loss": 0.0811, "step": 21312 }, { "epoch": 0.37744336268689094, "grad_norm": 0.47013986110687256, "learning_rate": 2.1463593516502222e-05, "loss": 0.087, "step": 21313 }, { "epoch": 0.3774610722239194, "grad_norm": 0.6029039621353149, "learning_rate": 2.1462817115689458e-05, "loss": 0.0944, "step": 21314 }, { "epoch": 0.3774787817609478, "grad_norm": 0.6705079078674316, "learning_rate": 2.1462040693614682e-05, "loss": 0.0686, "step": 21315 }, { "epoch": 0.37749649129797624, "grad_norm": 0.8520189523696899, "learning_rate": 2.1461264250280447e-05, "loss": 0.0814, "step": 21316 }, { "epoch": 0.3775142008350047, "grad_norm": 0.8313719034194946, "learning_rate": 2.1460487785689307e-05, "loss": 0.094, "step": 21317 }, { "epoch": 0.3775319103720331, "grad_norm": 1.0863425731658936, "learning_rate": 2.145971129984381e-05, "loss": 0.0749, "step": 21318 }, { "epoch": 0.37754961990906155, "grad_norm": 0.5550942420959473, "learning_rate": 2.145893479274652e-05, "loss": 0.0941, "step": 21319 }, { "epoch": 0.37756732944608995, "grad_norm": 1.0011860132217407, "learning_rate": 2.145815826439999e-05, "loss": 0.0877, "step": 21320 }, { "epoch": 0.3775850389831184, "grad_norm": 0.6966270208358765, "learning_rate": 2.1457381714806764e-05, "loss": 0.0544, "step": 21321 }, { "epoch": 0.3776027485201468, "grad_norm": 0.7803908586502075, "learning_rate": 2.145660514396941e-05, "loss": 0.1277, "step": 21322 }, { "epoch": 0.37762045805717526, "grad_norm": 0.6318302154541016, "learning_rate": 2.1455828551890482e-05, "loss": 0.0852, "step": 21323 }, { "epoch": 0.37763816759420366, "grad_norm": 0.7357754707336426, "learning_rate": 2.1455051938572526e-05, "loss": 0.1078, "step": 21324 }, { "epoch": 0.3776558771312321, "grad_norm": 0.6828832030296326, "learning_rate": 2.1454275304018106e-05, "loss": 0.075, "step": 21325 }, { "epoch": 0.3776735866682605, "grad_norm": 0.42581838369369507, "learning_rate": 2.145349864822977e-05, "loss": 0.0743, "step": 21326 }, { "epoch": 0.37769129620528896, "grad_norm": 0.5871667265892029, "learning_rate": 2.1452721971210078e-05, "loss": 0.0444, "step": 21327 }, { "epoch": 0.37770900574231736, "grad_norm": 0.8631728291511536, "learning_rate": 2.1451945272961582e-05, "loss": 0.0872, "step": 21328 }, { "epoch": 0.3777267152793458, "grad_norm": 0.6730014085769653, "learning_rate": 2.1451168553486837e-05, "loss": 0.0943, "step": 21329 }, { "epoch": 0.3777444248163742, "grad_norm": 0.6203665137290955, "learning_rate": 2.1450391812788404e-05, "loss": 0.0897, "step": 21330 }, { "epoch": 0.37776213435340267, "grad_norm": 0.6006897687911987, "learning_rate": 2.144961505086883e-05, "loss": 0.0794, "step": 21331 }, { "epoch": 0.3777798438904311, "grad_norm": 0.5490039587020874, "learning_rate": 2.144883826773068e-05, "loss": 0.0758, "step": 21332 }, { "epoch": 0.3777975534274595, "grad_norm": 0.8412954211235046, "learning_rate": 2.14480614633765e-05, "loss": 0.1165, "step": 21333 }, { "epoch": 0.377815262964488, "grad_norm": 0.5297084450721741, "learning_rate": 2.1447284637808853e-05, "loss": 0.0896, "step": 21334 }, { "epoch": 0.3778329725015164, "grad_norm": 0.6703323125839233, "learning_rate": 2.144650779103029e-05, "loss": 0.0941, "step": 21335 }, { "epoch": 0.3778506820385448, "grad_norm": 0.7379097938537598, "learning_rate": 2.1445730923043372e-05, "loss": 0.0895, "step": 21336 }, { "epoch": 0.3778683915755732, "grad_norm": 0.8719224333763123, "learning_rate": 2.1444954033850645e-05, "loss": 0.0645, "step": 21337 }, { "epoch": 0.3778861011126017, "grad_norm": 0.7839393615722656, "learning_rate": 2.1444177123454673e-05, "loss": 0.0822, "step": 21338 }, { "epoch": 0.3779038106496301, "grad_norm": 0.9598457217216492, "learning_rate": 2.144340019185801e-05, "loss": 0.1049, "step": 21339 }, { "epoch": 0.37792152018665853, "grad_norm": 0.7315807938575745, "learning_rate": 2.1442623239063215e-05, "loss": 0.0923, "step": 21340 }, { "epoch": 0.37793922972368693, "grad_norm": 1.0406136512756348, "learning_rate": 2.144184626507284e-05, "loss": 0.0816, "step": 21341 }, { "epoch": 0.3779569392607154, "grad_norm": 0.5529322624206543, "learning_rate": 2.1441069269889443e-05, "loss": 0.0697, "step": 21342 }, { "epoch": 0.3779746487977438, "grad_norm": 0.5234700441360474, "learning_rate": 2.1440292253515572e-05, "loss": 0.0854, "step": 21343 }, { "epoch": 0.37799235833477224, "grad_norm": 0.5592925548553467, "learning_rate": 2.1439515215953802e-05, "loss": 0.0828, "step": 21344 }, { "epoch": 0.37801006787180064, "grad_norm": 1.022564172744751, "learning_rate": 2.1438738157206675e-05, "loss": 0.0745, "step": 21345 }, { "epoch": 0.3780277774088291, "grad_norm": 0.7903600335121155, "learning_rate": 2.1437961077276747e-05, "loss": 0.0738, "step": 21346 }, { "epoch": 0.37804548694585755, "grad_norm": 0.92951500415802, "learning_rate": 2.1437183976166578e-05, "loss": 0.1099, "step": 21347 }, { "epoch": 0.37806319648288594, "grad_norm": 0.7880159616470337, "learning_rate": 2.1436406853878725e-05, "loss": 0.0784, "step": 21348 }, { "epoch": 0.3780809060199144, "grad_norm": 0.8794720768928528, "learning_rate": 2.143562971041575e-05, "loss": 0.0624, "step": 21349 }, { "epoch": 0.3780986155569428, "grad_norm": 0.8139309287071228, "learning_rate": 2.14348525457802e-05, "loss": 0.0847, "step": 21350 }, { "epoch": 0.37811632509397125, "grad_norm": 0.6708818078041077, "learning_rate": 2.1434075359974637e-05, "loss": 0.1163, "step": 21351 }, { "epoch": 0.37813403463099965, "grad_norm": 0.6532303094863892, "learning_rate": 2.1433298153001616e-05, "loss": 0.1148, "step": 21352 }, { "epoch": 0.3781517441680281, "grad_norm": 0.7692868113517761, "learning_rate": 2.1432520924863694e-05, "loss": 0.0949, "step": 21353 }, { "epoch": 0.3781694537050565, "grad_norm": 0.6594412922859192, "learning_rate": 2.143174367556343e-05, "loss": 0.0795, "step": 21354 }, { "epoch": 0.37818716324208496, "grad_norm": 0.5131353735923767, "learning_rate": 2.143096640510338e-05, "loss": 0.058, "step": 21355 }, { "epoch": 0.37820487277911335, "grad_norm": 0.6921869516372681, "learning_rate": 2.14301891134861e-05, "loss": 0.0607, "step": 21356 }, { "epoch": 0.3782225823161418, "grad_norm": 0.689766526222229, "learning_rate": 2.1429411800714155e-05, "loss": 0.0765, "step": 21357 }, { "epoch": 0.3782402918531702, "grad_norm": 0.6352130174636841, "learning_rate": 2.1428634466790086e-05, "loss": 0.0762, "step": 21358 }, { "epoch": 0.37825800139019866, "grad_norm": 1.0550041198730469, "learning_rate": 2.142785711171646e-05, "loss": 0.0796, "step": 21359 }, { "epoch": 0.37827571092722706, "grad_norm": 0.7871925830841064, "learning_rate": 2.142707973549584e-05, "loss": 0.123, "step": 21360 }, { "epoch": 0.3782934204642555, "grad_norm": 0.710058331489563, "learning_rate": 2.1426302338130776e-05, "loss": 0.0839, "step": 21361 }, { "epoch": 0.37831113000128397, "grad_norm": 0.7077524662017822, "learning_rate": 2.1425524919623822e-05, "loss": 0.1184, "step": 21362 }, { "epoch": 0.37832883953831237, "grad_norm": 0.695293664932251, "learning_rate": 2.142474747997755e-05, "loss": 0.0721, "step": 21363 }, { "epoch": 0.3783465490753408, "grad_norm": 0.6969985365867615, "learning_rate": 2.14239700191945e-05, "loss": 0.1022, "step": 21364 }, { "epoch": 0.3783642586123692, "grad_norm": 0.7665047645568848, "learning_rate": 2.1423192537277247e-05, "loss": 0.0828, "step": 21365 }, { "epoch": 0.3783819681493977, "grad_norm": 1.044055461883545, "learning_rate": 2.1422415034228335e-05, "loss": 0.0978, "step": 21366 }, { "epoch": 0.3783996776864261, "grad_norm": 0.5468665361404419, "learning_rate": 2.1421637510050325e-05, "loss": 0.1029, "step": 21367 }, { "epoch": 0.3784173872234545, "grad_norm": 0.8709893226623535, "learning_rate": 2.1420859964745785e-05, "loss": 0.0931, "step": 21368 }, { "epoch": 0.3784350967604829, "grad_norm": 0.6883227229118347, "learning_rate": 2.1420082398317256e-05, "loss": 0.1049, "step": 21369 }, { "epoch": 0.3784528062975114, "grad_norm": 0.6135469675064087, "learning_rate": 2.1419304810767314e-05, "loss": 0.0962, "step": 21370 }, { "epoch": 0.3784705158345398, "grad_norm": 0.5351791977882385, "learning_rate": 2.1418527202098503e-05, "loss": 0.0777, "step": 21371 }, { "epoch": 0.37848822537156823, "grad_norm": 0.5865383148193359, "learning_rate": 2.1417749572313387e-05, "loss": 0.0574, "step": 21372 }, { "epoch": 0.37850593490859663, "grad_norm": 0.5821980237960815, "learning_rate": 2.1416971921414525e-05, "loss": 0.0881, "step": 21373 }, { "epoch": 0.3785236444456251, "grad_norm": 0.4603726863861084, "learning_rate": 2.141619424940447e-05, "loss": 0.072, "step": 21374 }, { "epoch": 0.3785413539826535, "grad_norm": 0.7078744173049927, "learning_rate": 2.141541655628579e-05, "loss": 0.0977, "step": 21375 }, { "epoch": 0.37855906351968194, "grad_norm": 0.8596468567848206, "learning_rate": 2.1414638842061034e-05, "loss": 0.0826, "step": 21376 }, { "epoch": 0.3785767730567104, "grad_norm": 0.8432907462120056, "learning_rate": 2.1413861106732774e-05, "loss": 0.1123, "step": 21377 }, { "epoch": 0.3785944825937388, "grad_norm": 0.576134204864502, "learning_rate": 2.1413083350303554e-05, "loss": 0.0659, "step": 21378 }, { "epoch": 0.37861219213076724, "grad_norm": 0.8368847966194153, "learning_rate": 2.1412305572775936e-05, "loss": 0.0773, "step": 21379 }, { "epoch": 0.37862990166779564, "grad_norm": 0.6348788142204285, "learning_rate": 2.1411527774152485e-05, "loss": 0.0683, "step": 21380 }, { "epoch": 0.3786476112048241, "grad_norm": 1.3718421459197998, "learning_rate": 2.1410749954435755e-05, "loss": 0.0617, "step": 21381 }, { "epoch": 0.3786653207418525, "grad_norm": 0.6821743845939636, "learning_rate": 2.14099721136283e-05, "loss": 0.0582, "step": 21382 }, { "epoch": 0.37868303027888095, "grad_norm": 0.6830626726150513, "learning_rate": 2.1409194251732695e-05, "loss": 0.0653, "step": 21383 }, { "epoch": 0.37870073981590935, "grad_norm": 0.9801463484764099, "learning_rate": 2.1408416368751488e-05, "loss": 0.0836, "step": 21384 }, { "epoch": 0.3787184493529378, "grad_norm": 0.9014316201210022, "learning_rate": 2.1407638464687234e-05, "loss": 0.0948, "step": 21385 }, { "epoch": 0.3787361588899662, "grad_norm": 0.4772458076477051, "learning_rate": 2.14068605395425e-05, "loss": 0.0725, "step": 21386 }, { "epoch": 0.37875386842699466, "grad_norm": 0.9000560641288757, "learning_rate": 2.1406082593319843e-05, "loss": 0.0813, "step": 21387 }, { "epoch": 0.37877157796402305, "grad_norm": 0.726845920085907, "learning_rate": 2.140530462602182e-05, "loss": 0.0815, "step": 21388 }, { "epoch": 0.3787892875010515, "grad_norm": 0.8672218918800354, "learning_rate": 2.1404526637650996e-05, "loss": 0.097, "step": 21389 }, { "epoch": 0.3788069970380799, "grad_norm": 0.8502057194709778, "learning_rate": 2.1403748628209925e-05, "loss": 0.0741, "step": 21390 }, { "epoch": 0.37882470657510836, "grad_norm": 0.8418210744857788, "learning_rate": 2.1402970597701165e-05, "loss": 0.0685, "step": 21391 }, { "epoch": 0.3788424161121368, "grad_norm": 1.1776220798492432, "learning_rate": 2.1402192546127286e-05, "loss": 0.1068, "step": 21392 }, { "epoch": 0.3788601256491652, "grad_norm": 0.530951976776123, "learning_rate": 2.1401414473490845e-05, "loss": 0.0675, "step": 21393 }, { "epoch": 0.37887783518619367, "grad_norm": 1.0176875591278076, "learning_rate": 2.140063637979439e-05, "loss": 0.0793, "step": 21394 }, { "epoch": 0.37889554472322207, "grad_norm": 0.4900515675544739, "learning_rate": 2.1399858265040485e-05, "loss": 0.0994, "step": 21395 }, { "epoch": 0.3789132542602505, "grad_norm": 1.1617521047592163, "learning_rate": 2.13990801292317e-05, "loss": 0.0701, "step": 21396 }, { "epoch": 0.3789309637972789, "grad_norm": 0.5224190354347229, "learning_rate": 2.1398301972370592e-05, "loss": 0.089, "step": 21397 }, { "epoch": 0.3789486733343074, "grad_norm": 0.4721217453479767, "learning_rate": 2.139752379445971e-05, "loss": 0.0714, "step": 21398 }, { "epoch": 0.37896638287133577, "grad_norm": 0.631907045841217, "learning_rate": 2.139674559550163e-05, "loss": 0.074, "step": 21399 }, { "epoch": 0.3789840924083642, "grad_norm": 1.1431186199188232, "learning_rate": 2.1395967375498894e-05, "loss": 0.0934, "step": 21400 }, { "epoch": 0.3790018019453926, "grad_norm": 0.8521278500556946, "learning_rate": 2.1395189134454074e-05, "loss": 0.0956, "step": 21401 }, { "epoch": 0.3790195114824211, "grad_norm": 0.717415452003479, "learning_rate": 2.1394410872369736e-05, "loss": 0.059, "step": 21402 }, { "epoch": 0.3790372210194495, "grad_norm": 0.5369395613670349, "learning_rate": 2.1393632589248426e-05, "loss": 0.078, "step": 21403 }, { "epoch": 0.37905493055647793, "grad_norm": 1.28193998336792, "learning_rate": 2.139285428509272e-05, "loss": 0.0777, "step": 21404 }, { "epoch": 0.37907264009350633, "grad_norm": 0.4639691114425659, "learning_rate": 2.139207595990516e-05, "loss": 0.0966, "step": 21405 }, { "epoch": 0.3790903496305348, "grad_norm": 0.5118880271911621, "learning_rate": 2.139129761368832e-05, "loss": 0.0557, "step": 21406 }, { "epoch": 0.37910805916756324, "grad_norm": 0.9806897640228271, "learning_rate": 2.139051924644476e-05, "loss": 0.0735, "step": 21407 }, { "epoch": 0.37912576870459164, "grad_norm": 0.35667502880096436, "learning_rate": 2.1389740858177037e-05, "loss": 0.0856, "step": 21408 }, { "epoch": 0.3791434782416201, "grad_norm": 0.6258523464202881, "learning_rate": 2.138896244888771e-05, "loss": 0.0772, "step": 21409 }, { "epoch": 0.3791611877786485, "grad_norm": 0.9010928273200989, "learning_rate": 2.1388184018579345e-05, "loss": 0.0554, "step": 21410 }, { "epoch": 0.37917889731567694, "grad_norm": 0.9332318902015686, "learning_rate": 2.13874055672545e-05, "loss": 0.0883, "step": 21411 }, { "epoch": 0.37919660685270534, "grad_norm": 1.0911856889724731, "learning_rate": 2.1386627094915737e-05, "loss": 0.0999, "step": 21412 }, { "epoch": 0.3792143163897338, "grad_norm": 1.6269725561141968, "learning_rate": 2.138584860156562e-05, "loss": 0.1177, "step": 21413 }, { "epoch": 0.3792320259267622, "grad_norm": 0.8283188343048096, "learning_rate": 2.13850700872067e-05, "loss": 0.0827, "step": 21414 }, { "epoch": 0.37924973546379065, "grad_norm": 1.0562734603881836, "learning_rate": 2.138429155184155e-05, "loss": 0.1015, "step": 21415 }, { "epoch": 0.37926744500081905, "grad_norm": 0.6682151556015015, "learning_rate": 2.138351299547273e-05, "loss": 0.1177, "step": 21416 }, { "epoch": 0.3792851545378475, "grad_norm": 0.8330860137939453, "learning_rate": 2.138273441810279e-05, "loss": 0.1062, "step": 21417 }, { "epoch": 0.3793028640748759, "grad_norm": 0.5571048259735107, "learning_rate": 2.1381955819734305e-05, "loss": 0.0826, "step": 21418 }, { "epoch": 0.37932057361190435, "grad_norm": 0.6664708852767944, "learning_rate": 2.138117720036983e-05, "loss": 0.0724, "step": 21419 }, { "epoch": 0.37933828314893275, "grad_norm": 0.628675639629364, "learning_rate": 2.1380398560011924e-05, "loss": 0.0794, "step": 21420 }, { "epoch": 0.3793559926859612, "grad_norm": 0.5903208255767822, "learning_rate": 2.1379619898663152e-05, "loss": 0.0737, "step": 21421 }, { "epoch": 0.37937370222298966, "grad_norm": 0.7558552026748657, "learning_rate": 2.137884121632608e-05, "loss": 0.105, "step": 21422 }, { "epoch": 0.37939141176001806, "grad_norm": 0.6270383596420288, "learning_rate": 2.137806251300327e-05, "loss": 0.1081, "step": 21423 }, { "epoch": 0.3794091212970465, "grad_norm": 0.633506178855896, "learning_rate": 2.1377283788697266e-05, "loss": 0.1074, "step": 21424 }, { "epoch": 0.3794268308340749, "grad_norm": 0.7460591793060303, "learning_rate": 2.1376505043410653e-05, "loss": 0.0853, "step": 21425 }, { "epoch": 0.37944454037110337, "grad_norm": 0.8627418279647827, "learning_rate": 2.137572627714598e-05, "loss": 0.0932, "step": 21426 }, { "epoch": 0.37946224990813177, "grad_norm": 0.9765567779541016, "learning_rate": 2.1374947489905814e-05, "loss": 0.0963, "step": 21427 }, { "epoch": 0.3794799594451602, "grad_norm": 0.7287486791610718, "learning_rate": 2.1374168681692714e-05, "loss": 0.0782, "step": 21428 }, { "epoch": 0.3794976689821886, "grad_norm": 0.5801445245742798, "learning_rate": 2.1373389852509243e-05, "loss": 0.0641, "step": 21429 }, { "epoch": 0.3795153785192171, "grad_norm": 0.7730566263198853, "learning_rate": 2.1372611002357966e-05, "loss": 0.0978, "step": 21430 }, { "epoch": 0.37953308805624547, "grad_norm": 0.8201887607574463, "learning_rate": 2.1371832131241445e-05, "loss": 0.0803, "step": 21431 }, { "epoch": 0.3795507975932739, "grad_norm": 0.4132179021835327, "learning_rate": 2.137105323916224e-05, "loss": 0.0718, "step": 21432 }, { "epoch": 0.3795685071303023, "grad_norm": 1.1506612300872803, "learning_rate": 2.1370274326122914e-05, "loss": 0.0933, "step": 21433 }, { "epoch": 0.3795862166673308, "grad_norm": 0.8874242305755615, "learning_rate": 2.1369495392126024e-05, "loss": 0.0723, "step": 21434 }, { "epoch": 0.3796039262043592, "grad_norm": 0.9541869163513184, "learning_rate": 2.1368716437174146e-05, "loss": 0.1005, "step": 21435 }, { "epoch": 0.37962163574138763, "grad_norm": 0.8280876278877258, "learning_rate": 2.136793746126983e-05, "loss": 0.0847, "step": 21436 }, { "epoch": 0.3796393452784161, "grad_norm": 0.5919387340545654, "learning_rate": 2.136715846441565e-05, "loss": 0.0503, "step": 21437 }, { "epoch": 0.3796570548154445, "grad_norm": 0.6545982956886292, "learning_rate": 2.1366379446614156e-05, "loss": 0.0848, "step": 21438 }, { "epoch": 0.37967476435247294, "grad_norm": 0.8505011200904846, "learning_rate": 2.1365600407867924e-05, "loss": 0.0967, "step": 21439 }, { "epoch": 0.37969247388950134, "grad_norm": 0.6762498617172241, "learning_rate": 2.1364821348179503e-05, "loss": 0.0854, "step": 21440 }, { "epoch": 0.3797101834265298, "grad_norm": 0.5047481060028076, "learning_rate": 2.136404226755147e-05, "loss": 0.059, "step": 21441 }, { "epoch": 0.3797278929635582, "grad_norm": 0.43045368790626526, "learning_rate": 2.1363263165986377e-05, "loss": 0.0772, "step": 21442 }, { "epoch": 0.37974560250058664, "grad_norm": 0.6311982274055481, "learning_rate": 2.136248404348679e-05, "loss": 0.1029, "step": 21443 }, { "epoch": 0.37976331203761504, "grad_norm": 0.731651246547699, "learning_rate": 2.136170490005528e-05, "loss": 0.1215, "step": 21444 }, { "epoch": 0.3797810215746435, "grad_norm": 0.5910623073577881, "learning_rate": 2.1360925735694402e-05, "loss": 0.0978, "step": 21445 }, { "epoch": 0.3797987311116719, "grad_norm": 1.0899194478988647, "learning_rate": 2.1360146550406724e-05, "loss": 0.1086, "step": 21446 }, { "epoch": 0.37981644064870035, "grad_norm": 0.5558837652206421, "learning_rate": 2.1359367344194807e-05, "loss": 0.0733, "step": 21447 }, { "epoch": 0.37983415018572875, "grad_norm": 0.5982499718666077, "learning_rate": 2.135858811706121e-05, "loss": 0.0575, "step": 21448 }, { "epoch": 0.3798518597227572, "grad_norm": 3.559748411178589, "learning_rate": 2.13578088690085e-05, "loss": 0.0568, "step": 21449 }, { "epoch": 0.3798695692597856, "grad_norm": 1.1099733114242554, "learning_rate": 2.135702960003925e-05, "loss": 0.1278, "step": 21450 }, { "epoch": 0.37988727879681405, "grad_norm": 0.7736650705337524, "learning_rate": 2.135625031015601e-05, "loss": 0.0975, "step": 21451 }, { "epoch": 0.3799049883338425, "grad_norm": 0.6286906599998474, "learning_rate": 2.1355470999361354e-05, "loss": 0.0738, "step": 21452 }, { "epoch": 0.3799226978708709, "grad_norm": 0.826680064201355, "learning_rate": 2.1354691667657833e-05, "loss": 0.0609, "step": 21453 }, { "epoch": 0.37994040740789936, "grad_norm": 0.8140884637832642, "learning_rate": 2.1353912315048025e-05, "loss": 0.1531, "step": 21454 }, { "epoch": 0.37995811694492776, "grad_norm": 0.914396345615387, "learning_rate": 2.1353132941534487e-05, "loss": 0.1069, "step": 21455 }, { "epoch": 0.3799758264819562, "grad_norm": 0.7014028429985046, "learning_rate": 2.1352353547119784e-05, "loss": 0.1015, "step": 21456 }, { "epoch": 0.3799935360189846, "grad_norm": 0.42982906103134155, "learning_rate": 2.135157413180648e-05, "loss": 0.0815, "step": 21457 }, { "epoch": 0.38001124555601307, "grad_norm": 0.701796293258667, "learning_rate": 2.135079469559714e-05, "loss": 0.0819, "step": 21458 }, { "epoch": 0.38002895509304147, "grad_norm": 0.6302323937416077, "learning_rate": 2.1350015238494332e-05, "loss": 0.0749, "step": 21459 }, { "epoch": 0.3800466646300699, "grad_norm": 0.7356839179992676, "learning_rate": 2.1349235760500608e-05, "loss": 0.0794, "step": 21460 }, { "epoch": 0.3800643741670983, "grad_norm": 0.9087691903114319, "learning_rate": 2.1348456261618547e-05, "loss": 0.0671, "step": 21461 }, { "epoch": 0.38008208370412677, "grad_norm": 0.5141380429267883, "learning_rate": 2.1347676741850707e-05, "loss": 0.0942, "step": 21462 }, { "epoch": 0.38009979324115517, "grad_norm": 0.7694185376167297, "learning_rate": 2.1346897201199645e-05, "loss": 0.1105, "step": 21463 }, { "epoch": 0.3801175027781836, "grad_norm": 0.6554431915283203, "learning_rate": 2.134611763966794e-05, "loss": 0.0769, "step": 21464 }, { "epoch": 0.380135212315212, "grad_norm": 0.9316045045852661, "learning_rate": 2.1345338057258148e-05, "loss": 0.0905, "step": 21465 }, { "epoch": 0.3801529218522405, "grad_norm": 0.6192694902420044, "learning_rate": 2.134455845397284e-05, "loss": 0.095, "step": 21466 }, { "epoch": 0.38017063138926893, "grad_norm": 0.4180329740047455, "learning_rate": 2.1343778829814567e-05, "loss": 0.1053, "step": 21467 }, { "epoch": 0.38018834092629733, "grad_norm": 1.2061012983322144, "learning_rate": 2.1342999184785912e-05, "loss": 0.0829, "step": 21468 }, { "epoch": 0.3802060504633258, "grad_norm": 1.0240108966827393, "learning_rate": 2.1342219518889425e-05, "loss": 0.0805, "step": 21469 }, { "epoch": 0.3802237600003542, "grad_norm": 0.8714291453361511, "learning_rate": 2.134143983212768e-05, "loss": 0.0868, "step": 21470 }, { "epoch": 0.38024146953738264, "grad_norm": 0.6566405892372131, "learning_rate": 2.134066012450324e-05, "loss": 0.088, "step": 21471 }, { "epoch": 0.38025917907441104, "grad_norm": 0.5241129994392395, "learning_rate": 2.1339880396018668e-05, "loss": 0.0714, "step": 21472 }, { "epoch": 0.3802768886114395, "grad_norm": 0.7816857099533081, "learning_rate": 2.1339100646676533e-05, "loss": 0.1137, "step": 21473 }, { "epoch": 0.3802945981484679, "grad_norm": 0.2213497906923294, "learning_rate": 2.1338320876479397e-05, "loss": 0.0515, "step": 21474 }, { "epoch": 0.38031230768549634, "grad_norm": 0.35720294713974, "learning_rate": 2.1337541085429828e-05, "loss": 0.0736, "step": 21475 }, { "epoch": 0.38033001722252474, "grad_norm": 0.5721759796142578, "learning_rate": 2.1336761273530387e-05, "loss": 0.0593, "step": 21476 }, { "epoch": 0.3803477267595532, "grad_norm": 0.7923288941383362, "learning_rate": 2.133598144078364e-05, "loss": 0.0992, "step": 21477 }, { "epoch": 0.3803654362965816, "grad_norm": 1.1859327554702759, "learning_rate": 2.1335201587192158e-05, "loss": 0.0786, "step": 21478 }, { "epoch": 0.38038314583361005, "grad_norm": 1.3194358348846436, "learning_rate": 2.1334421712758507e-05, "loss": 0.0799, "step": 21479 }, { "epoch": 0.38040085537063845, "grad_norm": 0.7157415747642517, "learning_rate": 2.1333641817485247e-05, "loss": 0.0798, "step": 21480 }, { "epoch": 0.3804185649076669, "grad_norm": 1.085174560546875, "learning_rate": 2.1332861901374943e-05, "loss": 0.1029, "step": 21481 }, { "epoch": 0.38043627444469535, "grad_norm": 0.596930742263794, "learning_rate": 2.1332081964430165e-05, "loss": 0.0914, "step": 21482 }, { "epoch": 0.38045398398172375, "grad_norm": 0.8627491593360901, "learning_rate": 2.1331302006653473e-05, "loss": 0.0828, "step": 21483 }, { "epoch": 0.3804716935187522, "grad_norm": 0.8431840538978577, "learning_rate": 2.1330522028047448e-05, "loss": 0.0762, "step": 21484 }, { "epoch": 0.3804894030557806, "grad_norm": 0.6684840321540833, "learning_rate": 2.132974202861464e-05, "loss": 0.1006, "step": 21485 }, { "epoch": 0.38050711259280906, "grad_norm": 0.6177348494529724, "learning_rate": 2.1328962008357625e-05, "loss": 0.0822, "step": 21486 }, { "epoch": 0.38052482212983746, "grad_norm": 0.7365365624427795, "learning_rate": 2.1328181967278962e-05, "loss": 0.1009, "step": 21487 }, { "epoch": 0.3805425316668659, "grad_norm": 0.9781801104545593, "learning_rate": 2.132740190538122e-05, "loss": 0.119, "step": 21488 }, { "epoch": 0.3805602412038943, "grad_norm": 1.2133756875991821, "learning_rate": 2.1326621822666968e-05, "loss": 0.091, "step": 21489 }, { "epoch": 0.38057795074092277, "grad_norm": 0.7860046029090881, "learning_rate": 2.1325841719138765e-05, "loss": 0.0532, "step": 21490 }, { "epoch": 0.38059566027795116, "grad_norm": 0.9377838373184204, "learning_rate": 2.1325061594799184e-05, "loss": 0.0872, "step": 21491 }, { "epoch": 0.3806133698149796, "grad_norm": 0.42847874760627747, "learning_rate": 2.1324281449650794e-05, "loss": 0.0799, "step": 21492 }, { "epoch": 0.380631079352008, "grad_norm": 0.9962118268013, "learning_rate": 2.1323501283696155e-05, "loss": 0.0689, "step": 21493 }, { "epoch": 0.38064878888903647, "grad_norm": 0.6490833759307861, "learning_rate": 2.132272109693784e-05, "loss": 0.0976, "step": 21494 }, { "epoch": 0.38066649842606487, "grad_norm": 0.37855371832847595, "learning_rate": 2.132194088937841e-05, "loss": 0.0615, "step": 21495 }, { "epoch": 0.3806842079630933, "grad_norm": 0.5271138548851013, "learning_rate": 2.1321160661020432e-05, "loss": 0.0731, "step": 21496 }, { "epoch": 0.3807019175001218, "grad_norm": 0.6113178730010986, "learning_rate": 2.1320380411866476e-05, "loss": 0.0807, "step": 21497 }, { "epoch": 0.3807196270371502, "grad_norm": 0.8789593577384949, "learning_rate": 2.1319600141919108e-05, "loss": 0.1148, "step": 21498 }, { "epoch": 0.38073733657417863, "grad_norm": 0.3450040817260742, "learning_rate": 2.1318819851180897e-05, "loss": 0.0494, "step": 21499 }, { "epoch": 0.38075504611120703, "grad_norm": 0.7674987316131592, "learning_rate": 2.1318039539654403e-05, "loss": 0.1196, "step": 21500 }, { "epoch": 0.3807727556482355, "grad_norm": 0.741650402545929, "learning_rate": 2.1317259207342194e-05, "loss": 0.0748, "step": 21501 }, { "epoch": 0.3807904651852639, "grad_norm": 0.7939332723617554, "learning_rate": 2.1316478854246854e-05, "loss": 0.1547, "step": 21502 }, { "epoch": 0.38080817472229234, "grad_norm": 0.688729465007782, "learning_rate": 2.131569848037093e-05, "loss": 0.0897, "step": 21503 }, { "epoch": 0.38082588425932073, "grad_norm": 0.6234003305435181, "learning_rate": 2.1314918085716997e-05, "loss": 0.069, "step": 21504 }, { "epoch": 0.3808435937963492, "grad_norm": 0.7026490569114685, "learning_rate": 2.131413767028762e-05, "loss": 0.0532, "step": 21505 }, { "epoch": 0.3808613033333776, "grad_norm": 0.8597815632820129, "learning_rate": 2.1313357234085372e-05, "loss": 0.0959, "step": 21506 }, { "epoch": 0.38087901287040604, "grad_norm": 0.6451571583747864, "learning_rate": 2.1312576777112818e-05, "loss": 0.0932, "step": 21507 }, { "epoch": 0.38089672240743444, "grad_norm": 0.7631529569625854, "learning_rate": 2.1311796299372523e-05, "loss": 0.0444, "step": 21508 }, { "epoch": 0.3809144319444629, "grad_norm": 0.47477373480796814, "learning_rate": 2.131101580086706e-05, "loss": 0.0587, "step": 21509 }, { "epoch": 0.3809321414814913, "grad_norm": 0.4598764479160309, "learning_rate": 2.1310235281598987e-05, "loss": 0.0767, "step": 21510 }, { "epoch": 0.38094985101851975, "grad_norm": 0.799115002155304, "learning_rate": 2.130945474157088e-05, "loss": 0.1286, "step": 21511 }, { "epoch": 0.3809675605555482, "grad_norm": 0.5183797478675842, "learning_rate": 2.1308674180785312e-05, "loss": 0.0704, "step": 21512 }, { "epoch": 0.3809852700925766, "grad_norm": 0.48710334300994873, "learning_rate": 2.1307893599244836e-05, "loss": 0.0847, "step": 21513 }, { "epoch": 0.38100297962960505, "grad_norm": 0.7932860851287842, "learning_rate": 2.1307112996952034e-05, "loss": 0.1023, "step": 21514 }, { "epoch": 0.38102068916663345, "grad_norm": 0.6790327429771423, "learning_rate": 2.130633237390946e-05, "loss": 0.0861, "step": 21515 }, { "epoch": 0.3810383987036619, "grad_norm": 0.7487481832504272, "learning_rate": 2.1305551730119697e-05, "loss": 0.0652, "step": 21516 }, { "epoch": 0.3810561082406903, "grad_norm": 0.6013093590736389, "learning_rate": 2.1304771065585304e-05, "loss": 0.0661, "step": 21517 }, { "epoch": 0.38107381777771876, "grad_norm": 0.8026271462440491, "learning_rate": 2.1303990380308856e-05, "loss": 0.0794, "step": 21518 }, { "epoch": 0.38109152731474716, "grad_norm": 0.671210765838623, "learning_rate": 2.130320967429291e-05, "loss": 0.1058, "step": 21519 }, { "epoch": 0.3811092368517756, "grad_norm": 0.7476171851158142, "learning_rate": 2.130242894754005e-05, "loss": 0.1031, "step": 21520 }, { "epoch": 0.381126946388804, "grad_norm": 0.796953558921814, "learning_rate": 2.130164820005283e-05, "loss": 0.089, "step": 21521 }, { "epoch": 0.38114465592583247, "grad_norm": 0.8535159230232239, "learning_rate": 2.1300867431833823e-05, "loss": 0.0786, "step": 21522 }, { "epoch": 0.38116236546286086, "grad_norm": 0.826281726360321, "learning_rate": 2.1300086642885604e-05, "loss": 0.0747, "step": 21523 }, { "epoch": 0.3811800749998893, "grad_norm": 0.5365136861801147, "learning_rate": 2.129930583321074e-05, "loss": 0.0526, "step": 21524 }, { "epoch": 0.3811977845369177, "grad_norm": 1.041577935218811, "learning_rate": 2.129852500281179e-05, "loss": 0.1081, "step": 21525 }, { "epoch": 0.38121549407394617, "grad_norm": 1.0635334253311157, "learning_rate": 2.1297744151691334e-05, "loss": 0.0953, "step": 21526 }, { "epoch": 0.3812332036109746, "grad_norm": 0.8213537335395813, "learning_rate": 2.129696327985194e-05, "loss": 0.11, "step": 21527 }, { "epoch": 0.381250913148003, "grad_norm": 0.6960511803627014, "learning_rate": 2.1296182387296168e-05, "loss": 0.0847, "step": 21528 }, { "epoch": 0.3812686226850315, "grad_norm": 0.607181191444397, "learning_rate": 2.1295401474026592e-05, "loss": 0.0527, "step": 21529 }, { "epoch": 0.3812863322220599, "grad_norm": 0.7912932634353638, "learning_rate": 2.1294620540045784e-05, "loss": 0.0566, "step": 21530 }, { "epoch": 0.38130404175908833, "grad_norm": 0.6093305349349976, "learning_rate": 2.129383958535631e-05, "loss": 0.0859, "step": 21531 }, { "epoch": 0.38132175129611673, "grad_norm": 0.6469433903694153, "learning_rate": 2.1293058609960744e-05, "loss": 0.0864, "step": 21532 }, { "epoch": 0.3813394608331452, "grad_norm": 0.97046959400177, "learning_rate": 2.129227761386165e-05, "loss": 0.0966, "step": 21533 }, { "epoch": 0.3813571703701736, "grad_norm": 0.6055330038070679, "learning_rate": 2.1291496597061595e-05, "loss": 0.0809, "step": 21534 }, { "epoch": 0.38137487990720204, "grad_norm": 1.6089304685592651, "learning_rate": 2.1290715559563155e-05, "loss": 0.0699, "step": 21535 }, { "epoch": 0.38139258944423043, "grad_norm": 0.45301884412765503, "learning_rate": 2.1289934501368902e-05, "loss": 0.0903, "step": 21536 }, { "epoch": 0.3814102989812589, "grad_norm": 0.6390342116355896, "learning_rate": 2.1289153422481397e-05, "loss": 0.0733, "step": 21537 }, { "epoch": 0.3814280085182873, "grad_norm": 0.7301613688468933, "learning_rate": 2.128837232290321e-05, "loss": 0.0581, "step": 21538 }, { "epoch": 0.38144571805531574, "grad_norm": 0.8585165143013, "learning_rate": 2.1287591202636916e-05, "loss": 0.0817, "step": 21539 }, { "epoch": 0.38146342759234414, "grad_norm": 1.257467269897461, "learning_rate": 2.1286810061685085e-05, "loss": 0.1342, "step": 21540 }, { "epoch": 0.3814811371293726, "grad_norm": 0.6136038303375244, "learning_rate": 2.1286028900050287e-05, "loss": 0.1224, "step": 21541 }, { "epoch": 0.38149884666640105, "grad_norm": 0.6009058952331543, "learning_rate": 2.1285247717735085e-05, "loss": 0.0803, "step": 21542 }, { "epoch": 0.38151655620342945, "grad_norm": 0.7180688977241516, "learning_rate": 2.1284466514742054e-05, "loss": 0.0887, "step": 21543 }, { "epoch": 0.3815342657404579, "grad_norm": 0.7499793171882629, "learning_rate": 2.1283685291073765e-05, "loss": 0.0687, "step": 21544 }, { "epoch": 0.3815519752774863, "grad_norm": 0.9058381915092468, "learning_rate": 2.1282904046732786e-05, "loss": 0.1068, "step": 21545 }, { "epoch": 0.38156968481451475, "grad_norm": 0.9490220546722412, "learning_rate": 2.128212278172169e-05, "loss": 0.0888, "step": 21546 }, { "epoch": 0.38158739435154315, "grad_norm": 0.902040958404541, "learning_rate": 2.1281341496043044e-05, "loss": 0.0788, "step": 21547 }, { "epoch": 0.3816051038885716, "grad_norm": 0.6213720440864563, "learning_rate": 2.128056018969942e-05, "loss": 0.1176, "step": 21548 }, { "epoch": 0.3816228134256, "grad_norm": 0.6486466526985168, "learning_rate": 2.127977886269339e-05, "loss": 0.0978, "step": 21549 }, { "epoch": 0.38164052296262846, "grad_norm": 0.7189167737960815, "learning_rate": 2.127899751502752e-05, "loss": 0.0754, "step": 21550 }, { "epoch": 0.38165823249965686, "grad_norm": 0.7977759838104248, "learning_rate": 2.1278216146704388e-05, "loss": 0.1223, "step": 21551 }, { "epoch": 0.3816759420366853, "grad_norm": 0.8668656945228577, "learning_rate": 2.1277434757726554e-05, "loss": 0.0882, "step": 21552 }, { "epoch": 0.3816936515737137, "grad_norm": 0.6252523064613342, "learning_rate": 2.1276653348096596e-05, "loss": 0.0657, "step": 21553 }, { "epoch": 0.38171136111074216, "grad_norm": 0.8598272204399109, "learning_rate": 2.1275871917817083e-05, "loss": 0.0969, "step": 21554 }, { "epoch": 0.38172907064777056, "grad_norm": 0.8566762804985046, "learning_rate": 2.127509046689059e-05, "loss": 0.0935, "step": 21555 }, { "epoch": 0.381746780184799, "grad_norm": 1.4885354042053223, "learning_rate": 2.127430899531968e-05, "loss": 0.0682, "step": 21556 }, { "epoch": 0.38176448972182747, "grad_norm": 0.6551862955093384, "learning_rate": 2.127352750310693e-05, "loss": 0.1041, "step": 21557 }, { "epoch": 0.38178219925885587, "grad_norm": 0.6441527009010315, "learning_rate": 2.1272745990254913e-05, "loss": 0.0764, "step": 21558 }, { "epoch": 0.3817999087958843, "grad_norm": 0.5258123874664307, "learning_rate": 2.1271964456766187e-05, "loss": 0.0992, "step": 21559 }, { "epoch": 0.3818176183329127, "grad_norm": 0.44373467564582825, "learning_rate": 2.127118290264334e-05, "loss": 0.089, "step": 21560 }, { "epoch": 0.3818353278699412, "grad_norm": 0.82485032081604, "learning_rate": 2.1270401327888932e-05, "loss": 0.0736, "step": 21561 }, { "epoch": 0.3818530374069696, "grad_norm": 0.958958089351654, "learning_rate": 2.126961973250554e-05, "loss": 0.1212, "step": 21562 }, { "epoch": 0.38187074694399803, "grad_norm": 1.0334856510162354, "learning_rate": 2.1268838116495727e-05, "loss": 0.0809, "step": 21563 }, { "epoch": 0.38188845648102643, "grad_norm": 0.4571077227592468, "learning_rate": 2.1268056479862078e-05, "loss": 0.0831, "step": 21564 }, { "epoch": 0.3819061660180549, "grad_norm": 0.6111301779747009, "learning_rate": 2.1267274822607154e-05, "loss": 0.07, "step": 21565 }, { "epoch": 0.3819238755550833, "grad_norm": 0.8061014413833618, "learning_rate": 2.126649314473353e-05, "loss": 0.1003, "step": 21566 }, { "epoch": 0.38194158509211174, "grad_norm": 0.6834048628807068, "learning_rate": 2.1265711446243776e-05, "loss": 0.0872, "step": 21567 }, { "epoch": 0.38195929462914013, "grad_norm": 0.959622859954834, "learning_rate": 2.1264929727140465e-05, "loss": 0.1265, "step": 21568 }, { "epoch": 0.3819770041661686, "grad_norm": 0.661113977432251, "learning_rate": 2.1264147987426172e-05, "loss": 0.0582, "step": 21569 }, { "epoch": 0.38199471370319704, "grad_norm": 1.0651466846466064, "learning_rate": 2.1263366227103466e-05, "loss": 0.1036, "step": 21570 }, { "epoch": 0.38201242324022544, "grad_norm": 0.8558667302131653, "learning_rate": 2.1262584446174918e-05, "loss": 0.098, "step": 21571 }, { "epoch": 0.3820301327772539, "grad_norm": 0.2898825705051422, "learning_rate": 2.1261802644643097e-05, "loss": 0.0988, "step": 21572 }, { "epoch": 0.3820478423142823, "grad_norm": 0.63345867395401, "learning_rate": 2.126102082251058e-05, "loss": 0.0788, "step": 21573 }, { "epoch": 0.38206555185131075, "grad_norm": 0.6417220830917358, "learning_rate": 2.126023897977994e-05, "loss": 0.0483, "step": 21574 }, { "epoch": 0.38208326138833915, "grad_norm": 0.6338311433792114, "learning_rate": 2.1259457116453746e-05, "loss": 0.0795, "step": 21575 }, { "epoch": 0.3821009709253676, "grad_norm": 0.49765682220458984, "learning_rate": 2.1258675232534575e-05, "loss": 0.056, "step": 21576 }, { "epoch": 0.382118680462396, "grad_norm": 1.204379916191101, "learning_rate": 2.1257893328024984e-05, "loss": 0.0582, "step": 21577 }, { "epoch": 0.38213638999942445, "grad_norm": 0.8060033321380615, "learning_rate": 2.1257111402927566e-05, "loss": 0.0789, "step": 21578 }, { "epoch": 0.38215409953645285, "grad_norm": 0.5153332948684692, "learning_rate": 2.1256329457244883e-05, "loss": 0.1088, "step": 21579 }, { "epoch": 0.3821718090734813, "grad_norm": 0.5346264243125916, "learning_rate": 2.125554749097951e-05, "loss": 0.048, "step": 21580 }, { "epoch": 0.3821895186105097, "grad_norm": 0.7426918148994446, "learning_rate": 2.1254765504134016e-05, "loss": 0.0558, "step": 21581 }, { "epoch": 0.38220722814753816, "grad_norm": 0.547727108001709, "learning_rate": 2.1253983496710976e-05, "loss": 0.0613, "step": 21582 }, { "epoch": 0.38222493768456656, "grad_norm": 0.5468162298202515, "learning_rate": 2.1253201468712967e-05, "loss": 0.0756, "step": 21583 }, { "epoch": 0.382242647221595, "grad_norm": 0.38374966382980347, "learning_rate": 2.1252419420142554e-05, "loss": 0.0456, "step": 21584 }, { "epoch": 0.38226035675862347, "grad_norm": 1.4819518327713013, "learning_rate": 2.1251637351002318e-05, "loss": 0.0573, "step": 21585 }, { "epoch": 0.38227806629565186, "grad_norm": 0.6641697883605957, "learning_rate": 2.125085526129482e-05, "loss": 0.121, "step": 21586 }, { "epoch": 0.3822957758326803, "grad_norm": 0.6940056085586548, "learning_rate": 2.1250073151022645e-05, "loss": 0.0764, "step": 21587 }, { "epoch": 0.3823134853697087, "grad_norm": 0.8648322820663452, "learning_rate": 2.1249291020188355e-05, "loss": 0.0766, "step": 21588 }, { "epoch": 0.38233119490673717, "grad_norm": 0.6195012927055359, "learning_rate": 2.1248508868794542e-05, "loss": 0.0788, "step": 21589 }, { "epoch": 0.38234890444376557, "grad_norm": 0.8464404940605164, "learning_rate": 2.1247726696843758e-05, "loss": 0.068, "step": 21590 }, { "epoch": 0.382366613980794, "grad_norm": 1.302014946937561, "learning_rate": 2.1246944504338587e-05, "loss": 0.1016, "step": 21591 }, { "epoch": 0.3823843235178224, "grad_norm": 0.797479510307312, "learning_rate": 2.12461622912816e-05, "loss": 0.0779, "step": 21592 }, { "epoch": 0.3824020330548509, "grad_norm": 0.7076509594917297, "learning_rate": 2.124538005767537e-05, "loss": 0.0833, "step": 21593 }, { "epoch": 0.3824197425918793, "grad_norm": 0.7146515846252441, "learning_rate": 2.1244597803522472e-05, "loss": 0.088, "step": 21594 }, { "epoch": 0.38243745212890773, "grad_norm": 0.5010578036308289, "learning_rate": 2.124381552882548e-05, "loss": 0.0818, "step": 21595 }, { "epoch": 0.3824551616659361, "grad_norm": 0.6996718049049377, "learning_rate": 2.1243033233586964e-05, "loss": 0.1317, "step": 21596 }, { "epoch": 0.3824728712029646, "grad_norm": 0.44264575839042664, "learning_rate": 2.1242250917809507e-05, "loss": 0.1001, "step": 21597 }, { "epoch": 0.382490580739993, "grad_norm": 0.7451748847961426, "learning_rate": 2.124146858149567e-05, "loss": 0.0848, "step": 21598 }, { "epoch": 0.38250829027702143, "grad_norm": 0.6859145760536194, "learning_rate": 2.124068622464803e-05, "loss": 0.0864, "step": 21599 }, { "epoch": 0.3825259998140499, "grad_norm": 0.43996304273605347, "learning_rate": 2.123990384726917e-05, "loss": 0.0821, "step": 21600 }, { "epoch": 0.3825437093510783, "grad_norm": 0.5940400958061218, "learning_rate": 2.1239121449361653e-05, "loss": 0.0657, "step": 21601 }, { "epoch": 0.38256141888810674, "grad_norm": 0.5781711339950562, "learning_rate": 2.123833903092806e-05, "loss": 0.092, "step": 21602 }, { "epoch": 0.38257912842513514, "grad_norm": 1.0484764575958252, "learning_rate": 2.123755659197096e-05, "loss": 0.111, "step": 21603 }, { "epoch": 0.3825968379621636, "grad_norm": 0.8511720895767212, "learning_rate": 2.123677413249293e-05, "loss": 0.0881, "step": 21604 }, { "epoch": 0.382614547499192, "grad_norm": 0.5715275406837463, "learning_rate": 2.1235991652496547e-05, "loss": 0.1002, "step": 21605 }, { "epoch": 0.38263225703622045, "grad_norm": 0.7207967042922974, "learning_rate": 2.123520915198438e-05, "loss": 0.0848, "step": 21606 }, { "epoch": 0.38264996657324885, "grad_norm": 0.600411593914032, "learning_rate": 2.1234426630959005e-05, "loss": 0.079, "step": 21607 }, { "epoch": 0.3826676761102773, "grad_norm": 0.7458668947219849, "learning_rate": 2.1233644089423e-05, "loss": 0.0665, "step": 21608 }, { "epoch": 0.3826853856473057, "grad_norm": 0.8942650556564331, "learning_rate": 2.123286152737893e-05, "loss": 0.0756, "step": 21609 }, { "epoch": 0.38270309518433415, "grad_norm": 0.7851637005805969, "learning_rate": 2.1232078944829382e-05, "loss": 0.0784, "step": 21610 }, { "epoch": 0.38272080472136255, "grad_norm": 0.5104387402534485, "learning_rate": 2.123129634177692e-05, "loss": 0.0699, "step": 21611 }, { "epoch": 0.382738514258391, "grad_norm": 0.8515573143959045, "learning_rate": 2.123051371822413e-05, "loss": 0.1029, "step": 21612 }, { "epoch": 0.3827562237954194, "grad_norm": 0.5186964869499207, "learning_rate": 2.122973107417358e-05, "loss": 0.0412, "step": 21613 }, { "epoch": 0.38277393333244786, "grad_norm": 0.7548505663871765, "learning_rate": 2.1228948409627838e-05, "loss": 0.0724, "step": 21614 }, { "epoch": 0.3827916428694763, "grad_norm": 0.6626559495925903, "learning_rate": 2.1228165724589485e-05, "loss": 0.0802, "step": 21615 }, { "epoch": 0.3828093524065047, "grad_norm": 0.5987811088562012, "learning_rate": 2.12273830190611e-05, "loss": 0.0917, "step": 21616 }, { "epoch": 0.38282706194353316, "grad_norm": 1.0395901203155518, "learning_rate": 2.122660029304526e-05, "loss": 0.0776, "step": 21617 }, { "epoch": 0.38284477148056156, "grad_norm": 0.5913302302360535, "learning_rate": 2.1225817546544525e-05, "loss": 0.1038, "step": 21618 }, { "epoch": 0.38286248101759, "grad_norm": 0.7103778123855591, "learning_rate": 2.1225034779561488e-05, "loss": 0.116, "step": 21619 }, { "epoch": 0.3828801905546184, "grad_norm": 0.6543284058570862, "learning_rate": 2.1224251992098715e-05, "loss": 0.0701, "step": 21620 }, { "epoch": 0.38289790009164687, "grad_norm": 0.707438051700592, "learning_rate": 2.122346918415878e-05, "loss": 0.0641, "step": 21621 }, { "epoch": 0.38291560962867527, "grad_norm": 0.7082772254943848, "learning_rate": 2.122268635574426e-05, "loss": 0.0773, "step": 21622 }, { "epoch": 0.3829333191657037, "grad_norm": 0.8887909054756165, "learning_rate": 2.122190350685773e-05, "loss": 0.068, "step": 21623 }, { "epoch": 0.3829510287027321, "grad_norm": 0.4308134913444519, "learning_rate": 2.1221120637501777e-05, "loss": 0.0602, "step": 21624 }, { "epoch": 0.3829687382397606, "grad_norm": 0.8375872373580933, "learning_rate": 2.1220337747678953e-05, "loss": 0.0936, "step": 21625 }, { "epoch": 0.382986447776789, "grad_norm": 0.8670256733894348, "learning_rate": 2.1219554837391856e-05, "loss": 0.0811, "step": 21626 }, { "epoch": 0.38300415731381743, "grad_norm": 0.5899962186813354, "learning_rate": 2.1218771906643045e-05, "loss": 0.0969, "step": 21627 }, { "epoch": 0.3830218668508458, "grad_norm": 0.9960896372795105, "learning_rate": 2.1217988955435112e-05, "loss": 0.1415, "step": 21628 }, { "epoch": 0.3830395763878743, "grad_norm": 1.1479220390319824, "learning_rate": 2.121720598377062e-05, "loss": 0.09, "step": 21629 }, { "epoch": 0.38305728592490274, "grad_norm": 0.9344800114631653, "learning_rate": 2.1216422991652147e-05, "loss": 0.0769, "step": 21630 }, { "epoch": 0.38307499546193113, "grad_norm": 0.8254603147506714, "learning_rate": 2.1215639979082277e-05, "loss": 0.069, "step": 21631 }, { "epoch": 0.3830927049989596, "grad_norm": 0.5930342078208923, "learning_rate": 2.1214856946063576e-05, "loss": 0.079, "step": 21632 }, { "epoch": 0.383110414535988, "grad_norm": 1.2233847379684448, "learning_rate": 2.1214073892598624e-05, "loss": 0.1069, "step": 21633 }, { "epoch": 0.38312812407301644, "grad_norm": 1.0151772499084473, "learning_rate": 2.121329081869e-05, "loss": 0.0981, "step": 21634 }, { "epoch": 0.38314583361004484, "grad_norm": 0.8202821612358093, "learning_rate": 2.1212507724340272e-05, "loss": 0.1061, "step": 21635 }, { "epoch": 0.3831635431470733, "grad_norm": 0.5176152586936951, "learning_rate": 2.1211724609552025e-05, "loss": 0.0862, "step": 21636 }, { "epoch": 0.3831812526841017, "grad_norm": 0.9528597593307495, "learning_rate": 2.1210941474327838e-05, "loss": 0.0907, "step": 21637 }, { "epoch": 0.38319896222113015, "grad_norm": 0.5088796615600586, "learning_rate": 2.1210158318670275e-05, "loss": 0.0513, "step": 21638 }, { "epoch": 0.38321667175815854, "grad_norm": 0.6215513944625854, "learning_rate": 2.120937514258192e-05, "loss": 0.0881, "step": 21639 }, { "epoch": 0.383234381295187, "grad_norm": 0.6805882453918457, "learning_rate": 2.120859194606535e-05, "loss": 0.0911, "step": 21640 }, { "epoch": 0.3832520908322154, "grad_norm": 0.8163148164749146, "learning_rate": 2.1207808729123138e-05, "loss": 0.1113, "step": 21641 }, { "epoch": 0.38326980036924385, "grad_norm": 0.5725514888763428, "learning_rate": 2.120702549175787e-05, "loss": 0.0993, "step": 21642 }, { "epoch": 0.38328750990627225, "grad_norm": 0.6265844702720642, "learning_rate": 2.120624223397211e-05, "loss": 0.0882, "step": 21643 }, { "epoch": 0.3833052194433007, "grad_norm": 0.6844419240951538, "learning_rate": 2.120545895576844e-05, "loss": 0.0617, "step": 21644 }, { "epoch": 0.38332292898032916, "grad_norm": 0.5707069039344788, "learning_rate": 2.1204675657149443e-05, "loss": 0.0751, "step": 21645 }, { "epoch": 0.38334063851735756, "grad_norm": 0.5949103236198425, "learning_rate": 2.1203892338117687e-05, "loss": 0.085, "step": 21646 }, { "epoch": 0.383358348054386, "grad_norm": 0.6128636002540588, "learning_rate": 2.1203108998675754e-05, "loss": 0.0859, "step": 21647 }, { "epoch": 0.3833760575914144, "grad_norm": 0.5487093329429626, "learning_rate": 2.120232563882622e-05, "loss": 0.087, "step": 21648 }, { "epoch": 0.38339376712844286, "grad_norm": 0.7996604442596436, "learning_rate": 2.120154225857166e-05, "loss": 0.0706, "step": 21649 }, { "epoch": 0.38341147666547126, "grad_norm": 0.6864727139472961, "learning_rate": 2.1200758857914655e-05, "loss": 0.0886, "step": 21650 }, { "epoch": 0.3834291862024997, "grad_norm": 0.9185165762901306, "learning_rate": 2.119997543685778e-05, "loss": 0.0743, "step": 21651 }, { "epoch": 0.3834468957395281, "grad_norm": 0.7878047823905945, "learning_rate": 2.1199191995403613e-05, "loss": 0.0867, "step": 21652 }, { "epoch": 0.38346460527655657, "grad_norm": 0.9054370522499084, "learning_rate": 2.119840853355473e-05, "loss": 0.0856, "step": 21653 }, { "epoch": 0.38348231481358497, "grad_norm": 0.9656383395195007, "learning_rate": 2.119762505131371e-05, "loss": 0.1054, "step": 21654 }, { "epoch": 0.3835000243506134, "grad_norm": 0.6466490030288696, "learning_rate": 2.119684154868313e-05, "loss": 0.0648, "step": 21655 }, { "epoch": 0.3835177338876418, "grad_norm": 0.33847692608833313, "learning_rate": 2.1196058025665573e-05, "loss": 0.0571, "step": 21656 }, { "epoch": 0.3835354434246703, "grad_norm": 0.6919435858726501, "learning_rate": 2.1195274482263608e-05, "loss": 0.0871, "step": 21657 }, { "epoch": 0.3835531529616987, "grad_norm": 0.6147472858428955, "learning_rate": 2.1194490918479815e-05, "loss": 0.1163, "step": 21658 }, { "epoch": 0.3835708624987271, "grad_norm": 0.7316898703575134, "learning_rate": 2.119370733431677e-05, "loss": 0.0616, "step": 21659 }, { "epoch": 0.3835885720357556, "grad_norm": 0.5969298481941223, "learning_rate": 2.1192923729777066e-05, "loss": 0.0607, "step": 21660 }, { "epoch": 0.383606281572784, "grad_norm": 0.5657297372817993, "learning_rate": 2.119214010486326e-05, "loss": 0.0681, "step": 21661 }, { "epoch": 0.38362399110981243, "grad_norm": 0.6143116354942322, "learning_rate": 2.1191356459577947e-05, "loss": 0.0759, "step": 21662 }, { "epoch": 0.38364170064684083, "grad_norm": 0.6942512392997742, "learning_rate": 2.119057279392369e-05, "loss": 0.1078, "step": 21663 }, { "epoch": 0.3836594101838693, "grad_norm": 1.1241251230239868, "learning_rate": 2.1189789107903075e-05, "loss": 0.0509, "step": 21664 }, { "epoch": 0.3836771197208977, "grad_norm": 0.879916250705719, "learning_rate": 2.1189005401518683e-05, "loss": 0.1153, "step": 21665 }, { "epoch": 0.38369482925792614, "grad_norm": 0.4885651469230652, "learning_rate": 2.118822167477309e-05, "loss": 0.0913, "step": 21666 }, { "epoch": 0.38371253879495454, "grad_norm": 0.5971298813819885, "learning_rate": 2.1187437927668872e-05, "loss": 0.0804, "step": 21667 }, { "epoch": 0.383730248331983, "grad_norm": 0.7174977660179138, "learning_rate": 2.118665416020861e-05, "loss": 0.0548, "step": 21668 }, { "epoch": 0.3837479578690114, "grad_norm": 0.8465758562088013, "learning_rate": 2.118587037239488e-05, "loss": 0.091, "step": 21669 }, { "epoch": 0.38376566740603985, "grad_norm": 0.505531370639801, "learning_rate": 2.1185086564230262e-05, "loss": 0.0594, "step": 21670 }, { "epoch": 0.38378337694306824, "grad_norm": 1.0111006498336792, "learning_rate": 2.1184302735717336e-05, "loss": 0.0593, "step": 21671 }, { "epoch": 0.3838010864800967, "grad_norm": 0.7886232137680054, "learning_rate": 2.118351888685868e-05, "loss": 0.0917, "step": 21672 }, { "epoch": 0.3838187960171251, "grad_norm": 0.5786513090133667, "learning_rate": 2.1182735017656868e-05, "loss": 0.0837, "step": 21673 }, { "epoch": 0.38383650555415355, "grad_norm": 0.8549252152442932, "learning_rate": 2.1181951128114486e-05, "loss": 0.1361, "step": 21674 }, { "epoch": 0.383854215091182, "grad_norm": 0.8201506733894348, "learning_rate": 2.118116721823411e-05, "loss": 0.1112, "step": 21675 }, { "epoch": 0.3838719246282104, "grad_norm": 0.7340771555900574, "learning_rate": 2.1180383288018322e-05, "loss": 0.0741, "step": 21676 }, { "epoch": 0.38388963416523886, "grad_norm": 0.8862686157226562, "learning_rate": 2.1179599337469697e-05, "loss": 0.1065, "step": 21677 }, { "epoch": 0.38390734370226726, "grad_norm": 0.8385319709777832, "learning_rate": 2.117881536659081e-05, "loss": 0.0945, "step": 21678 }, { "epoch": 0.3839250532392957, "grad_norm": 0.9238569736480713, "learning_rate": 2.1178031375384254e-05, "loss": 0.1103, "step": 21679 }, { "epoch": 0.3839427627763241, "grad_norm": 0.7368589043617249, "learning_rate": 2.1177247363852594e-05, "loss": 0.0838, "step": 21680 }, { "epoch": 0.38396047231335256, "grad_norm": 0.7019298076629639, "learning_rate": 2.1176463331998418e-05, "loss": 0.1136, "step": 21681 }, { "epoch": 0.38397818185038096, "grad_norm": 0.8015645742416382, "learning_rate": 2.11756792798243e-05, "loss": 0.0452, "step": 21682 }, { "epoch": 0.3839958913874094, "grad_norm": 0.5124119520187378, "learning_rate": 2.117489520733282e-05, "loss": 0.0944, "step": 21683 }, { "epoch": 0.3840136009244378, "grad_norm": 1.1055467128753662, "learning_rate": 2.1174111114526564e-05, "loss": 0.1323, "step": 21684 }, { "epoch": 0.38403131046146627, "grad_norm": 0.5770251750946045, "learning_rate": 2.11733270014081e-05, "loss": 0.0614, "step": 21685 }, { "epoch": 0.38404901999849467, "grad_norm": 0.8573850989341736, "learning_rate": 2.1172542867980024e-05, "loss": 0.0819, "step": 21686 }, { "epoch": 0.3840667295355231, "grad_norm": 0.7556328773498535, "learning_rate": 2.1171758714244898e-05, "loss": 0.0674, "step": 21687 }, { "epoch": 0.3840844390725515, "grad_norm": 0.542762279510498, "learning_rate": 2.1170974540205315e-05, "loss": 0.0594, "step": 21688 }, { "epoch": 0.38410214860958, "grad_norm": 0.9833683967590332, "learning_rate": 2.1170190345863846e-05, "loss": 0.056, "step": 21689 }, { "epoch": 0.38411985814660843, "grad_norm": 0.7354870438575745, "learning_rate": 2.116940613122308e-05, "loss": 0.0701, "step": 21690 }, { "epoch": 0.3841375676836368, "grad_norm": 0.6822860240936279, "learning_rate": 2.1168621896285586e-05, "loss": 0.0613, "step": 21691 }, { "epoch": 0.3841552772206653, "grad_norm": 0.9009267687797546, "learning_rate": 2.1167837641053954e-05, "loss": 0.0814, "step": 21692 }, { "epoch": 0.3841729867576937, "grad_norm": 1.0880573987960815, "learning_rate": 2.1167053365530757e-05, "loss": 0.099, "step": 21693 }, { "epoch": 0.38419069629472213, "grad_norm": 0.927873432636261, "learning_rate": 2.116626906971858e-05, "loss": 0.1084, "step": 21694 }, { "epoch": 0.38420840583175053, "grad_norm": 0.7345330715179443, "learning_rate": 2.116548475362e-05, "loss": 0.0794, "step": 21695 }, { "epoch": 0.384226115368779, "grad_norm": 1.1289139986038208, "learning_rate": 2.1164700417237602e-05, "loss": 0.1168, "step": 21696 }, { "epoch": 0.3842438249058074, "grad_norm": 0.5802537798881531, "learning_rate": 2.1163916060573958e-05, "loss": 0.1074, "step": 21697 }, { "epoch": 0.38426153444283584, "grad_norm": 0.6801271438598633, "learning_rate": 2.1163131683631658e-05, "loss": 0.1007, "step": 21698 }, { "epoch": 0.38427924397986424, "grad_norm": 1.0686897039413452, "learning_rate": 2.116234728641328e-05, "loss": 0.094, "step": 21699 }, { "epoch": 0.3842969535168927, "grad_norm": 1.2282938957214355, "learning_rate": 2.11615628689214e-05, "loss": 0.1015, "step": 21700 }, { "epoch": 0.3843146630539211, "grad_norm": 0.376219242811203, "learning_rate": 2.1160778431158597e-05, "loss": 0.0669, "step": 21701 }, { "epoch": 0.38433237259094954, "grad_norm": 0.7416589260101318, "learning_rate": 2.115999397312746e-05, "loss": 0.0876, "step": 21702 }, { "epoch": 0.38435008212797794, "grad_norm": 1.024900197982788, "learning_rate": 2.1159209494830566e-05, "loss": 0.0629, "step": 21703 }, { "epoch": 0.3843677916650064, "grad_norm": 0.7609686851501465, "learning_rate": 2.11584249962705e-05, "loss": 0.0879, "step": 21704 }, { "epoch": 0.38438550120203485, "grad_norm": 0.7106240391731262, "learning_rate": 2.115764047744983e-05, "loss": 0.1006, "step": 21705 }, { "epoch": 0.38440321073906325, "grad_norm": 1.0581779479980469, "learning_rate": 2.115685593837115e-05, "loss": 0.1372, "step": 21706 }, { "epoch": 0.3844209202760917, "grad_norm": 0.5616019368171692, "learning_rate": 2.1156071379037035e-05, "loss": 0.1051, "step": 21707 }, { "epoch": 0.3844386298131201, "grad_norm": 0.878267228603363, "learning_rate": 2.115528679945007e-05, "loss": 0.0912, "step": 21708 }, { "epoch": 0.38445633935014856, "grad_norm": 1.2907581329345703, "learning_rate": 2.115450219961284e-05, "loss": 0.0887, "step": 21709 }, { "epoch": 0.38447404888717696, "grad_norm": 0.8470105528831482, "learning_rate": 2.115371757952791e-05, "loss": 0.0922, "step": 21710 }, { "epoch": 0.3844917584242054, "grad_norm": 0.9005894064903259, "learning_rate": 2.1152932939197873e-05, "loss": 0.0646, "step": 21711 }, { "epoch": 0.3845094679612338, "grad_norm": 0.548585057258606, "learning_rate": 2.115214827862531e-05, "loss": 0.0969, "step": 21712 }, { "epoch": 0.38452717749826226, "grad_norm": 0.6479794383049011, "learning_rate": 2.1151363597812806e-05, "loss": 0.0901, "step": 21713 }, { "epoch": 0.38454488703529066, "grad_norm": 0.6747848391532898, "learning_rate": 2.1150578896762936e-05, "loss": 0.0798, "step": 21714 }, { "epoch": 0.3845625965723191, "grad_norm": 0.28103938698768616, "learning_rate": 2.1149794175478283e-05, "loss": 0.084, "step": 21715 }, { "epoch": 0.3845803061093475, "grad_norm": 1.6122794151306152, "learning_rate": 2.1149009433961428e-05, "loss": 0.1094, "step": 21716 }, { "epoch": 0.38459801564637597, "grad_norm": 0.5464508533477783, "learning_rate": 2.1148224672214953e-05, "loss": 0.0716, "step": 21717 }, { "epoch": 0.38461572518340437, "grad_norm": 0.6208986043930054, "learning_rate": 2.1147439890241443e-05, "loss": 0.0817, "step": 21718 }, { "epoch": 0.3846334347204328, "grad_norm": 0.6282994151115417, "learning_rate": 2.1146655088043473e-05, "loss": 0.0874, "step": 21719 }, { "epoch": 0.3846511442574613, "grad_norm": 0.6904319524765015, "learning_rate": 2.1145870265623635e-05, "loss": 0.0828, "step": 21720 }, { "epoch": 0.3846688537944897, "grad_norm": 1.0558222532272339, "learning_rate": 2.1145085422984504e-05, "loss": 0.0785, "step": 21721 }, { "epoch": 0.3846865633315181, "grad_norm": 1.3806242942810059, "learning_rate": 2.114430056012866e-05, "loss": 0.0686, "step": 21722 }, { "epoch": 0.3847042728685465, "grad_norm": 0.7283380031585693, "learning_rate": 2.1143515677058693e-05, "loss": 0.0803, "step": 21723 }, { "epoch": 0.384721982405575, "grad_norm": 6.0474371910095215, "learning_rate": 2.1142730773777183e-05, "loss": 0.1084, "step": 21724 }, { "epoch": 0.3847396919426034, "grad_norm": 0.971164882183075, "learning_rate": 2.1141945850286704e-05, "loss": 0.094, "step": 21725 }, { "epoch": 0.38475740147963183, "grad_norm": 0.7524240016937256, "learning_rate": 2.1141160906589844e-05, "loss": 0.1117, "step": 21726 }, { "epoch": 0.38477511101666023, "grad_norm": 0.7558104395866394, "learning_rate": 2.114037594268919e-05, "loss": 0.0965, "step": 21727 }, { "epoch": 0.3847928205536887, "grad_norm": 0.7275876402854919, "learning_rate": 2.1139590958587314e-05, "loss": 0.0929, "step": 21728 }, { "epoch": 0.3848105300907171, "grad_norm": 1.009968638420105, "learning_rate": 2.1138805954286816e-05, "loss": 0.0731, "step": 21729 }, { "epoch": 0.38482823962774554, "grad_norm": 0.9423418641090393, "learning_rate": 2.1138020929790256e-05, "loss": 0.0775, "step": 21730 }, { "epoch": 0.38484594916477394, "grad_norm": 0.6086273789405823, "learning_rate": 2.1137235885100235e-05, "loss": 0.0887, "step": 21731 }, { "epoch": 0.3848636587018024, "grad_norm": 1.10276198387146, "learning_rate": 2.1136450820219326e-05, "loss": 0.0897, "step": 21732 }, { "epoch": 0.3848813682388308, "grad_norm": 0.7572353482246399, "learning_rate": 2.1135665735150116e-05, "loss": 0.1081, "step": 21733 }, { "epoch": 0.38489907777585924, "grad_norm": 0.7759023904800415, "learning_rate": 2.1134880629895182e-05, "loss": 0.0935, "step": 21734 }, { "epoch": 0.3849167873128877, "grad_norm": 0.7821515202522278, "learning_rate": 2.1134095504457114e-05, "loss": 0.0802, "step": 21735 }, { "epoch": 0.3849344968499161, "grad_norm": 0.5818333029747009, "learning_rate": 2.1133310358838495e-05, "loss": 0.0664, "step": 21736 }, { "epoch": 0.38495220638694455, "grad_norm": 0.8530287742614746, "learning_rate": 2.1132525193041902e-05, "loss": 0.102, "step": 21737 }, { "epoch": 0.38496991592397295, "grad_norm": 0.5640244483947754, "learning_rate": 2.1131740007069922e-05, "loss": 0.0715, "step": 21738 }, { "epoch": 0.3849876254610014, "grad_norm": 0.6326295733451843, "learning_rate": 2.1130954800925138e-05, "loss": 0.0799, "step": 21739 }, { "epoch": 0.3850053349980298, "grad_norm": 0.9422451257705688, "learning_rate": 2.113016957461013e-05, "loss": 0.0984, "step": 21740 }, { "epoch": 0.38502304453505826, "grad_norm": 0.6915056109428406, "learning_rate": 2.112938432812749e-05, "loss": 0.1197, "step": 21741 }, { "epoch": 0.38504075407208666, "grad_norm": 0.8308019638061523, "learning_rate": 2.112859906147979e-05, "loss": 0.0646, "step": 21742 }, { "epoch": 0.3850584636091151, "grad_norm": 0.7499498724937439, "learning_rate": 2.1127813774669625e-05, "loss": 0.0771, "step": 21743 }, { "epoch": 0.3850761731461435, "grad_norm": 0.7506861090660095, "learning_rate": 2.1127028467699568e-05, "loss": 0.0685, "step": 21744 }, { "epoch": 0.38509388268317196, "grad_norm": 0.8231192827224731, "learning_rate": 2.1126243140572208e-05, "loss": 0.0762, "step": 21745 }, { "epoch": 0.38511159222020036, "grad_norm": 0.6139288544654846, "learning_rate": 2.1125457793290125e-05, "loss": 0.0757, "step": 21746 }, { "epoch": 0.3851293017572288, "grad_norm": 0.39984560012817383, "learning_rate": 2.112467242585591e-05, "loss": 0.0765, "step": 21747 }, { "epoch": 0.3851470112942572, "grad_norm": 0.5795812606811523, "learning_rate": 2.112388703827214e-05, "loss": 0.0315, "step": 21748 }, { "epoch": 0.38516472083128567, "grad_norm": 0.5390257239341736, "learning_rate": 2.11231016305414e-05, "loss": 0.0964, "step": 21749 }, { "epoch": 0.3851824303683141, "grad_norm": 0.8027664422988892, "learning_rate": 2.1122316202666276e-05, "loss": 0.1008, "step": 21750 }, { "epoch": 0.3852001399053425, "grad_norm": 0.8775618672370911, "learning_rate": 2.112153075464935e-05, "loss": 0.0709, "step": 21751 }, { "epoch": 0.385217849442371, "grad_norm": 0.7862491607666016, "learning_rate": 2.112074528649321e-05, "loss": 0.0882, "step": 21752 }, { "epoch": 0.3852355589793994, "grad_norm": 0.5459315180778503, "learning_rate": 2.1119959798200433e-05, "loss": 0.0357, "step": 21753 }, { "epoch": 0.3852532685164278, "grad_norm": 0.5632675886154175, "learning_rate": 2.1119174289773608e-05, "loss": 0.0812, "step": 21754 }, { "epoch": 0.3852709780534562, "grad_norm": 0.9934225678443909, "learning_rate": 2.111838876121532e-05, "loss": 0.097, "step": 21755 }, { "epoch": 0.3852886875904847, "grad_norm": 0.5979912281036377, "learning_rate": 2.1117603212528153e-05, "loss": 0.1076, "step": 21756 }, { "epoch": 0.3853063971275131, "grad_norm": 0.7398096919059753, "learning_rate": 2.111681764371469e-05, "loss": 0.0976, "step": 21757 }, { "epoch": 0.38532410666454153, "grad_norm": 0.5974196195602417, "learning_rate": 2.1116032054777512e-05, "loss": 0.0712, "step": 21758 }, { "epoch": 0.38534181620156993, "grad_norm": 0.6626681685447693, "learning_rate": 2.1115246445719213e-05, "loss": 0.0678, "step": 21759 }, { "epoch": 0.3853595257385984, "grad_norm": 0.7258930802345276, "learning_rate": 2.1114460816542366e-05, "loss": 0.1181, "step": 21760 }, { "epoch": 0.3853772352756268, "grad_norm": 0.7817389965057373, "learning_rate": 2.1113675167249565e-05, "loss": 0.0667, "step": 21761 }, { "epoch": 0.38539494481265524, "grad_norm": 0.7864957451820374, "learning_rate": 2.1112889497843387e-05, "loss": 0.092, "step": 21762 }, { "epoch": 0.38541265434968364, "grad_norm": 0.6956064701080322, "learning_rate": 2.111210380832642e-05, "loss": 0.0754, "step": 21763 }, { "epoch": 0.3854303638867121, "grad_norm": 0.6676415205001831, "learning_rate": 2.111131809870126e-05, "loss": 0.0897, "step": 21764 }, { "epoch": 0.38544807342374054, "grad_norm": 1.0297515392303467, "learning_rate": 2.111053236897047e-05, "loss": 0.0617, "step": 21765 }, { "epoch": 0.38546578296076894, "grad_norm": 0.9136772155761719, "learning_rate": 2.1109746619136657e-05, "loss": 0.0861, "step": 21766 }, { "epoch": 0.3854834924977974, "grad_norm": 0.48982709646224976, "learning_rate": 2.1108960849202386e-05, "loss": 0.0972, "step": 21767 }, { "epoch": 0.3855012020348258, "grad_norm": 0.7955165505409241, "learning_rate": 2.1108175059170253e-05, "loss": 0.0722, "step": 21768 }, { "epoch": 0.38551891157185425, "grad_norm": 0.8931949138641357, "learning_rate": 2.1107389249042845e-05, "loss": 0.1161, "step": 21769 }, { "epoch": 0.38553662110888265, "grad_norm": 0.8766112327575684, "learning_rate": 2.1106603418822746e-05, "loss": 0.1068, "step": 21770 }, { "epoch": 0.3855543306459111, "grad_norm": 0.7663982510566711, "learning_rate": 2.1105817568512536e-05, "loss": 0.1032, "step": 21771 }, { "epoch": 0.3855720401829395, "grad_norm": 0.4668987989425659, "learning_rate": 2.1105031698114808e-05, "loss": 0.0616, "step": 21772 }, { "epoch": 0.38558974971996796, "grad_norm": 0.591960072517395, "learning_rate": 2.1104245807632136e-05, "loss": 0.0701, "step": 21773 }, { "epoch": 0.38560745925699635, "grad_norm": 0.6609048843383789, "learning_rate": 2.1103459897067116e-05, "loss": 0.0873, "step": 21774 }, { "epoch": 0.3856251687940248, "grad_norm": 1.041226863861084, "learning_rate": 2.1102673966422333e-05, "loss": 0.1018, "step": 21775 }, { "epoch": 0.3856428783310532, "grad_norm": 0.6775611042976379, "learning_rate": 2.1101888015700368e-05, "loss": 0.0718, "step": 21776 }, { "epoch": 0.38566058786808166, "grad_norm": 1.43780517578125, "learning_rate": 2.1101102044903808e-05, "loss": 0.1079, "step": 21777 }, { "epoch": 0.38567829740511006, "grad_norm": 0.8430872559547424, "learning_rate": 2.110031605403524e-05, "loss": 0.1134, "step": 21778 }, { "epoch": 0.3856960069421385, "grad_norm": 1.1264922618865967, "learning_rate": 2.109953004309725e-05, "loss": 0.1017, "step": 21779 }, { "epoch": 0.38571371647916697, "grad_norm": 0.84553062915802, "learning_rate": 2.1098744012092424e-05, "loss": 0.0572, "step": 21780 }, { "epoch": 0.38573142601619537, "grad_norm": 0.7866904139518738, "learning_rate": 2.109795796102334e-05, "loss": 0.0784, "step": 21781 }, { "epoch": 0.3857491355532238, "grad_norm": 0.7612746953964233, "learning_rate": 2.1097171889892597e-05, "loss": 0.0681, "step": 21782 }, { "epoch": 0.3857668450902522, "grad_norm": 1.349330186843872, "learning_rate": 2.1096385798702774e-05, "loss": 0.098, "step": 21783 }, { "epoch": 0.3857845546272807, "grad_norm": 0.7516138553619385, "learning_rate": 2.1095599687456462e-05, "loss": 0.1031, "step": 21784 }, { "epoch": 0.38580226416430907, "grad_norm": 0.7266398668289185, "learning_rate": 2.1094813556156238e-05, "loss": 0.0737, "step": 21785 }, { "epoch": 0.3858199737013375, "grad_norm": 0.4530080556869507, "learning_rate": 2.1094027404804698e-05, "loss": 0.0749, "step": 21786 }, { "epoch": 0.3858376832383659, "grad_norm": 0.7312238216400146, "learning_rate": 2.109324123340442e-05, "loss": 0.0893, "step": 21787 }, { "epoch": 0.3858553927753944, "grad_norm": 0.4635394215583801, "learning_rate": 2.1092455041957996e-05, "loss": 0.1017, "step": 21788 }, { "epoch": 0.3858731023124228, "grad_norm": 1.177108883857727, "learning_rate": 2.1091668830468014e-05, "loss": 0.0623, "step": 21789 }, { "epoch": 0.38589081184945123, "grad_norm": 0.3542186915874481, "learning_rate": 2.1090882598937054e-05, "loss": 0.0725, "step": 21790 }, { "epoch": 0.38590852138647963, "grad_norm": 0.5673930048942566, "learning_rate": 2.109009634736771e-05, "loss": 0.0716, "step": 21791 }, { "epoch": 0.3859262309235081, "grad_norm": 0.5298947691917419, "learning_rate": 2.108931007576256e-05, "loss": 0.0666, "step": 21792 }, { "epoch": 0.3859439404605365, "grad_norm": 0.6992558836936951, "learning_rate": 2.10885237841242e-05, "loss": 0.0991, "step": 21793 }, { "epoch": 0.38596164999756494, "grad_norm": 0.6118788719177246, "learning_rate": 2.108773747245521e-05, "loss": 0.0574, "step": 21794 }, { "epoch": 0.3859793595345934, "grad_norm": 1.143833875656128, "learning_rate": 2.1086951140758183e-05, "loss": 0.0927, "step": 21795 }, { "epoch": 0.3859970690716218, "grad_norm": 0.6000986695289612, "learning_rate": 2.10861647890357e-05, "loss": 0.0699, "step": 21796 }, { "epoch": 0.38601477860865024, "grad_norm": 0.7571879625320435, "learning_rate": 2.108537841729035e-05, "loss": 0.0763, "step": 21797 }, { "epoch": 0.38603248814567864, "grad_norm": 1.041656494140625, "learning_rate": 2.108459202552472e-05, "loss": 0.0562, "step": 21798 }, { "epoch": 0.3860501976827071, "grad_norm": 0.5301716327667236, "learning_rate": 2.10838056137414e-05, "loss": 0.0583, "step": 21799 }, { "epoch": 0.3860679072197355, "grad_norm": 0.5413751602172852, "learning_rate": 2.108301918194298e-05, "loss": 0.0863, "step": 21800 }, { "epoch": 0.38608561675676395, "grad_norm": 0.4618072211742401, "learning_rate": 2.108223273013203e-05, "loss": 0.0898, "step": 21801 }, { "epoch": 0.38610332629379235, "grad_norm": 0.7774667143821716, "learning_rate": 2.1081446258311154e-05, "loss": 0.0546, "step": 21802 }, { "epoch": 0.3861210358308208, "grad_norm": 0.5780982971191406, "learning_rate": 2.1080659766482937e-05, "loss": 0.0815, "step": 21803 }, { "epoch": 0.3861387453678492, "grad_norm": 0.5698358416557312, "learning_rate": 2.1079873254649967e-05, "loss": 0.0771, "step": 21804 }, { "epoch": 0.38615645490487766, "grad_norm": 0.8491549491882324, "learning_rate": 2.1079086722814825e-05, "loss": 0.1299, "step": 21805 }, { "epoch": 0.38617416444190605, "grad_norm": 0.7192665934562683, "learning_rate": 2.1078300170980103e-05, "loss": 0.0584, "step": 21806 }, { "epoch": 0.3861918739789345, "grad_norm": 0.6098106503486633, "learning_rate": 2.1077513599148384e-05, "loss": 0.0667, "step": 21807 }, { "epoch": 0.3862095835159629, "grad_norm": 0.4652130603790283, "learning_rate": 2.1076727007322263e-05, "loss": 0.0565, "step": 21808 }, { "epoch": 0.38622729305299136, "grad_norm": 0.7503346800804138, "learning_rate": 2.107594039550433e-05, "loss": 0.0746, "step": 21809 }, { "epoch": 0.3862450025900198, "grad_norm": 0.6894869804382324, "learning_rate": 2.1075153763697163e-05, "loss": 0.0965, "step": 21810 }, { "epoch": 0.3862627121270482, "grad_norm": 0.4459116756916046, "learning_rate": 2.1074367111903355e-05, "loss": 0.0939, "step": 21811 }, { "epoch": 0.38628042166407667, "grad_norm": 0.6666737794876099, "learning_rate": 2.1073580440125495e-05, "loss": 0.0637, "step": 21812 }, { "epoch": 0.38629813120110507, "grad_norm": 0.8117415904998779, "learning_rate": 2.1072793748366165e-05, "loss": 0.1208, "step": 21813 }, { "epoch": 0.3863158407381335, "grad_norm": 0.8438935279846191, "learning_rate": 2.1072007036627965e-05, "loss": 0.1102, "step": 21814 }, { "epoch": 0.3863335502751619, "grad_norm": 0.8003619909286499, "learning_rate": 2.107122030491347e-05, "loss": 0.0712, "step": 21815 }, { "epoch": 0.3863512598121904, "grad_norm": 0.6333029866218567, "learning_rate": 2.1070433553225276e-05, "loss": 0.0694, "step": 21816 }, { "epoch": 0.38636896934921877, "grad_norm": 0.7935822606086731, "learning_rate": 2.106964678156597e-05, "loss": 0.0761, "step": 21817 }, { "epoch": 0.3863866788862472, "grad_norm": 1.0399798154830933, "learning_rate": 2.106885998993814e-05, "loss": 0.0572, "step": 21818 }, { "epoch": 0.3864043884232756, "grad_norm": 0.6410512328147888, "learning_rate": 2.1068073178344378e-05, "loss": 0.1133, "step": 21819 }, { "epoch": 0.3864220979603041, "grad_norm": 0.8270901441574097, "learning_rate": 2.1067286346787268e-05, "loss": 0.0947, "step": 21820 }, { "epoch": 0.3864398074973325, "grad_norm": 0.7620866894721985, "learning_rate": 2.106649949526939e-05, "loss": 0.1087, "step": 21821 }, { "epoch": 0.38645751703436093, "grad_norm": 0.9999731779098511, "learning_rate": 2.1065712623793352e-05, "loss": 0.0992, "step": 21822 }, { "epoch": 0.38647522657138933, "grad_norm": 0.8002409338951111, "learning_rate": 2.1064925732361732e-05, "loss": 0.1021, "step": 21823 }, { "epoch": 0.3864929361084178, "grad_norm": 0.766974151134491, "learning_rate": 2.1064138820977117e-05, "loss": 0.0699, "step": 21824 }, { "epoch": 0.38651064564544624, "grad_norm": 0.6791236400604248, "learning_rate": 2.1063351889642098e-05, "loss": 0.0893, "step": 21825 }, { "epoch": 0.38652835518247464, "grad_norm": 0.546906054019928, "learning_rate": 2.1062564938359266e-05, "loss": 0.071, "step": 21826 }, { "epoch": 0.3865460647195031, "grad_norm": 0.6472758650779724, "learning_rate": 2.106177796713121e-05, "loss": 0.0765, "step": 21827 }, { "epoch": 0.3865637742565315, "grad_norm": 0.8660130500793457, "learning_rate": 2.106099097596052e-05, "loss": 0.1236, "step": 21828 }, { "epoch": 0.38658148379355994, "grad_norm": 0.6089116334915161, "learning_rate": 2.1060203964849776e-05, "loss": 0.0832, "step": 21829 }, { "epoch": 0.38659919333058834, "grad_norm": 0.9206540584564209, "learning_rate": 2.1059416933801577e-05, "loss": 0.0843, "step": 21830 }, { "epoch": 0.3866169028676168, "grad_norm": 0.7580124139785767, "learning_rate": 2.105862988281851e-05, "loss": 0.0659, "step": 21831 }, { "epoch": 0.3866346124046452, "grad_norm": 0.5190394520759583, "learning_rate": 2.1057842811903166e-05, "loss": 0.1069, "step": 21832 }, { "epoch": 0.38665232194167365, "grad_norm": 0.6282260417938232, "learning_rate": 2.1057055721058124e-05, "loss": 0.0872, "step": 21833 }, { "epoch": 0.38667003147870205, "grad_norm": 0.5700573921203613, "learning_rate": 2.1056268610285988e-05, "loss": 0.0826, "step": 21834 }, { "epoch": 0.3866877410157305, "grad_norm": 0.7990343570709229, "learning_rate": 2.1055481479589336e-05, "loss": 0.0712, "step": 21835 }, { "epoch": 0.3867054505527589, "grad_norm": 1.1025820970535278, "learning_rate": 2.1054694328970765e-05, "loss": 0.0669, "step": 21836 }, { "epoch": 0.38672316008978735, "grad_norm": 0.8616716861724854, "learning_rate": 2.1053907158432865e-05, "loss": 0.0833, "step": 21837 }, { "epoch": 0.3867408696268158, "grad_norm": 1.0107005834579468, "learning_rate": 2.105311996797822e-05, "loss": 0.079, "step": 21838 }, { "epoch": 0.3867585791638442, "grad_norm": 0.6569020748138428, "learning_rate": 2.1052332757609425e-05, "loss": 0.1027, "step": 21839 }, { "epoch": 0.38677628870087266, "grad_norm": 0.9661634564399719, "learning_rate": 2.1051545527329062e-05, "loss": 0.0818, "step": 21840 }, { "epoch": 0.38679399823790106, "grad_norm": 0.38169753551483154, "learning_rate": 2.1050758277139728e-05, "loss": 0.0926, "step": 21841 }, { "epoch": 0.3868117077749295, "grad_norm": 0.688092052936554, "learning_rate": 2.1049971007044015e-05, "loss": 0.0769, "step": 21842 }, { "epoch": 0.3868294173119579, "grad_norm": 0.8498534560203552, "learning_rate": 2.1049183717044505e-05, "loss": 0.0696, "step": 21843 }, { "epoch": 0.38684712684898637, "grad_norm": 0.8622381091117859, "learning_rate": 2.1048396407143793e-05, "loss": 0.0931, "step": 21844 }, { "epoch": 0.38686483638601477, "grad_norm": 0.3844375014305115, "learning_rate": 2.1047609077344472e-05, "loss": 0.0507, "step": 21845 }, { "epoch": 0.3868825459230432, "grad_norm": 0.7662315964698792, "learning_rate": 2.1046821727649126e-05, "loss": 0.0982, "step": 21846 }, { "epoch": 0.3869002554600716, "grad_norm": 0.6853424906730652, "learning_rate": 2.1046034358060347e-05, "loss": 0.0928, "step": 21847 }, { "epoch": 0.3869179649971001, "grad_norm": 0.4814067482948303, "learning_rate": 2.104524696858073e-05, "loss": 0.0827, "step": 21848 }, { "epoch": 0.38693567453412847, "grad_norm": 0.6164460778236389, "learning_rate": 2.104445955921286e-05, "loss": 0.1062, "step": 21849 }, { "epoch": 0.3869533840711569, "grad_norm": 0.5735241770744324, "learning_rate": 2.104367212995933e-05, "loss": 0.0845, "step": 21850 }, { "epoch": 0.3869710936081853, "grad_norm": 0.6184836626052856, "learning_rate": 2.104288468082273e-05, "loss": 0.1041, "step": 21851 }, { "epoch": 0.3869888031452138, "grad_norm": 0.7788957953453064, "learning_rate": 2.104209721180565e-05, "loss": 0.0595, "step": 21852 }, { "epoch": 0.38700651268224223, "grad_norm": 0.7095044255256653, "learning_rate": 2.1041309722910683e-05, "loss": 0.0773, "step": 21853 }, { "epoch": 0.38702422221927063, "grad_norm": 0.736525297164917, "learning_rate": 2.104052221414041e-05, "loss": 0.0869, "step": 21854 }, { "epoch": 0.3870419317562991, "grad_norm": 0.9089909791946411, "learning_rate": 2.103973468549744e-05, "loss": 0.1031, "step": 21855 }, { "epoch": 0.3870596412933275, "grad_norm": 0.7797393798828125, "learning_rate": 2.103894713698435e-05, "loss": 0.0768, "step": 21856 }, { "epoch": 0.38707735083035594, "grad_norm": 0.5810315012931824, "learning_rate": 2.1038159568603737e-05, "loss": 0.0867, "step": 21857 }, { "epoch": 0.38709506036738434, "grad_norm": 0.7143328785896301, "learning_rate": 2.1037371980358183e-05, "loss": 0.0885, "step": 21858 }, { "epoch": 0.3871127699044128, "grad_norm": 0.6321197748184204, "learning_rate": 2.1036584372250293e-05, "loss": 0.0865, "step": 21859 }, { "epoch": 0.3871304794414412, "grad_norm": 0.6547191739082336, "learning_rate": 2.103579674428265e-05, "loss": 0.0721, "step": 21860 }, { "epoch": 0.38714818897846964, "grad_norm": 0.5289754867553711, "learning_rate": 2.103500909645784e-05, "loss": 0.0881, "step": 21861 }, { "epoch": 0.38716589851549804, "grad_norm": 0.6757630705833435, "learning_rate": 2.103422142877847e-05, "loss": 0.0888, "step": 21862 }, { "epoch": 0.3871836080525265, "grad_norm": 0.7783929109573364, "learning_rate": 2.1033433741247115e-05, "loss": 0.0934, "step": 21863 }, { "epoch": 0.3872013175895549, "grad_norm": 0.7889645099639893, "learning_rate": 2.1032646033866375e-05, "loss": 0.0738, "step": 21864 }, { "epoch": 0.38721902712658335, "grad_norm": 0.6346659064292908, "learning_rate": 2.103185830663884e-05, "loss": 0.0578, "step": 21865 }, { "epoch": 0.38723673666361175, "grad_norm": 0.5510355234146118, "learning_rate": 2.1031070559567103e-05, "loss": 0.0698, "step": 21866 }, { "epoch": 0.3872544462006402, "grad_norm": 1.0217252969741821, "learning_rate": 2.103028279265375e-05, "loss": 0.081, "step": 21867 }, { "epoch": 0.38727215573766866, "grad_norm": 0.6894917488098145, "learning_rate": 2.102949500590138e-05, "loss": 0.089, "step": 21868 }, { "epoch": 0.38728986527469705, "grad_norm": 0.44340506196022034, "learning_rate": 2.102870719931258e-05, "loss": 0.0539, "step": 21869 }, { "epoch": 0.3873075748117255, "grad_norm": 0.7216784358024597, "learning_rate": 2.1027919372889942e-05, "loss": 0.0544, "step": 21870 }, { "epoch": 0.3873252843487539, "grad_norm": 0.673149824142456, "learning_rate": 2.102713152663606e-05, "loss": 0.0894, "step": 21871 }, { "epoch": 0.38734299388578236, "grad_norm": 0.6854825615882874, "learning_rate": 2.1026343660553524e-05, "loss": 0.0825, "step": 21872 }, { "epoch": 0.38736070342281076, "grad_norm": 0.4388713836669922, "learning_rate": 2.1025555774644926e-05, "loss": 0.0637, "step": 21873 }, { "epoch": 0.3873784129598392, "grad_norm": 0.45267078280448914, "learning_rate": 2.102476786891286e-05, "loss": 0.093, "step": 21874 }, { "epoch": 0.3873961224968676, "grad_norm": 0.8535915017127991, "learning_rate": 2.102397994335992e-05, "loss": 0.097, "step": 21875 }, { "epoch": 0.38741383203389607, "grad_norm": 0.787548303604126, "learning_rate": 2.102319199798869e-05, "loss": 0.0713, "step": 21876 }, { "epoch": 0.38743154157092446, "grad_norm": 0.690902829170227, "learning_rate": 2.1022404032801772e-05, "loss": 0.0809, "step": 21877 }, { "epoch": 0.3874492511079529, "grad_norm": 0.477708101272583, "learning_rate": 2.102161604780175e-05, "loss": 0.1042, "step": 21878 }, { "epoch": 0.3874669606449813, "grad_norm": 0.509844958782196, "learning_rate": 2.1020828042991225e-05, "loss": 0.0508, "step": 21879 }, { "epoch": 0.38748467018200977, "grad_norm": 0.4777199625968933, "learning_rate": 2.1020040018372783e-05, "loss": 0.0678, "step": 21880 }, { "epoch": 0.38750237971903817, "grad_norm": 0.6961154341697693, "learning_rate": 2.1019251973949016e-05, "loss": 0.0595, "step": 21881 }, { "epoch": 0.3875200892560666, "grad_norm": 0.9425904154777527, "learning_rate": 2.101846390972252e-05, "loss": 0.0746, "step": 21882 }, { "epoch": 0.3875377987930951, "grad_norm": 0.7911259531974792, "learning_rate": 2.1017675825695888e-05, "loss": 0.1083, "step": 21883 }, { "epoch": 0.3875555083301235, "grad_norm": 0.5609250068664551, "learning_rate": 2.101688772187171e-05, "loss": 0.0602, "step": 21884 }, { "epoch": 0.38757321786715193, "grad_norm": 0.5537386536598206, "learning_rate": 2.101609959825258e-05, "loss": 0.0603, "step": 21885 }, { "epoch": 0.38759092740418033, "grad_norm": 0.833918571472168, "learning_rate": 2.101531145484109e-05, "loss": 0.0731, "step": 21886 }, { "epoch": 0.3876086369412088, "grad_norm": 0.8783143758773804, "learning_rate": 2.101452329163983e-05, "loss": 0.0951, "step": 21887 }, { "epoch": 0.3876263464782372, "grad_norm": 1.0300906896591187, "learning_rate": 2.10137351086514e-05, "loss": 0.0932, "step": 21888 }, { "epoch": 0.38764405601526564, "grad_norm": 0.8726394176483154, "learning_rate": 2.1012946905878395e-05, "loss": 0.0772, "step": 21889 }, { "epoch": 0.38766176555229404, "grad_norm": 0.579440176486969, "learning_rate": 2.1012158683323403e-05, "loss": 0.0706, "step": 21890 }, { "epoch": 0.3876794750893225, "grad_norm": 0.5685347318649292, "learning_rate": 2.101137044098901e-05, "loss": 0.0905, "step": 21891 }, { "epoch": 0.3876971846263509, "grad_norm": 0.9747193455696106, "learning_rate": 2.1010582178877816e-05, "loss": 0.0961, "step": 21892 }, { "epoch": 0.38771489416337934, "grad_norm": 0.5621901154518127, "learning_rate": 2.100979389699242e-05, "loss": 0.0809, "step": 21893 }, { "epoch": 0.38773260370040774, "grad_norm": 0.9561659693717957, "learning_rate": 2.1009005595335408e-05, "loss": 0.0821, "step": 21894 }, { "epoch": 0.3877503132374362, "grad_norm": 0.7202829122543335, "learning_rate": 2.1008217273909377e-05, "loss": 0.0899, "step": 21895 }, { "epoch": 0.3877680227744646, "grad_norm": 0.5876312255859375, "learning_rate": 2.1007428932716918e-05, "loss": 0.0718, "step": 21896 }, { "epoch": 0.38778573231149305, "grad_norm": 0.5243270993232727, "learning_rate": 2.100664057176062e-05, "loss": 0.0796, "step": 21897 }, { "epoch": 0.3878034418485215, "grad_norm": 1.0666956901550293, "learning_rate": 2.1005852191043084e-05, "loss": 0.1104, "step": 21898 }, { "epoch": 0.3878211513855499, "grad_norm": 0.5964990258216858, "learning_rate": 2.1005063790566907e-05, "loss": 0.0523, "step": 21899 }, { "epoch": 0.38783886092257835, "grad_norm": 0.5774668455123901, "learning_rate": 2.1004275370334673e-05, "loss": 0.0857, "step": 21900 }, { "epoch": 0.38785657045960675, "grad_norm": 0.7495612502098083, "learning_rate": 2.1003486930348983e-05, "loss": 0.0831, "step": 21901 }, { "epoch": 0.3878742799966352, "grad_norm": 0.7638180255889893, "learning_rate": 2.1002698470612423e-05, "loss": 0.1109, "step": 21902 }, { "epoch": 0.3878919895336636, "grad_norm": 0.3975537121295929, "learning_rate": 2.10019099911276e-05, "loss": 0.073, "step": 21903 }, { "epoch": 0.38790969907069206, "grad_norm": 0.5833870768547058, "learning_rate": 2.1001121491897097e-05, "loss": 0.0702, "step": 21904 }, { "epoch": 0.38792740860772046, "grad_norm": 0.7519543766975403, "learning_rate": 2.1000332972923514e-05, "loss": 0.1024, "step": 21905 }, { "epoch": 0.3879451181447489, "grad_norm": 0.9157853722572327, "learning_rate": 2.0999544434209436e-05, "loss": 0.077, "step": 21906 }, { "epoch": 0.3879628276817773, "grad_norm": 0.3860486149787903, "learning_rate": 2.0998755875757464e-05, "loss": 0.0925, "step": 21907 }, { "epoch": 0.38798053721880577, "grad_norm": 0.7615447640419006, "learning_rate": 2.0997967297570197e-05, "loss": 0.0911, "step": 21908 }, { "epoch": 0.38799824675583416, "grad_norm": 0.8733407855033875, "learning_rate": 2.099717869965022e-05, "loss": 0.0747, "step": 21909 }, { "epoch": 0.3880159562928626, "grad_norm": 0.6133973598480225, "learning_rate": 2.0996390082000135e-05, "loss": 0.0502, "step": 21910 }, { "epoch": 0.388033665829891, "grad_norm": 0.717430591583252, "learning_rate": 2.099560144462253e-05, "loss": 0.0986, "step": 21911 }, { "epoch": 0.38805137536691947, "grad_norm": 0.7459990382194519, "learning_rate": 2.0994812787520006e-05, "loss": 0.1041, "step": 21912 }, { "epoch": 0.3880690849039479, "grad_norm": 1.3191111087799072, "learning_rate": 2.099402411069515e-05, "loss": 0.0642, "step": 21913 }, { "epoch": 0.3880867944409763, "grad_norm": 0.48040279746055603, "learning_rate": 2.0993235414150566e-05, "loss": 0.0906, "step": 21914 }, { "epoch": 0.3881045039780048, "grad_norm": 0.6104070544242859, "learning_rate": 2.099244669788884e-05, "loss": 0.0753, "step": 21915 }, { "epoch": 0.3881222135150332, "grad_norm": 1.069764256477356, "learning_rate": 2.099165796191257e-05, "loss": 0.1056, "step": 21916 }, { "epoch": 0.38813992305206163, "grad_norm": 0.6122235059738159, "learning_rate": 2.0990869206224358e-05, "loss": 0.085, "step": 21917 }, { "epoch": 0.38815763258909003, "grad_norm": 1.0289772748947144, "learning_rate": 2.099008043082678e-05, "loss": 0.0949, "step": 21918 }, { "epoch": 0.3881753421261185, "grad_norm": 0.6257615685462952, "learning_rate": 2.0989291635722457e-05, "loss": 0.0686, "step": 21919 }, { "epoch": 0.3881930516631469, "grad_norm": 0.8157433867454529, "learning_rate": 2.098850282091396e-05, "loss": 0.0912, "step": 21920 }, { "epoch": 0.38821076120017534, "grad_norm": 0.6573559045791626, "learning_rate": 2.09877139864039e-05, "loss": 0.097, "step": 21921 }, { "epoch": 0.38822847073720373, "grad_norm": 0.5126941204071045, "learning_rate": 2.0986925132194866e-05, "loss": 0.0787, "step": 21922 }, { "epoch": 0.3882461802742322, "grad_norm": 0.5793595314025879, "learning_rate": 2.0986136258289454e-05, "loss": 0.0923, "step": 21923 }, { "epoch": 0.3882638898112606, "grad_norm": 0.8669062852859497, "learning_rate": 2.0985347364690255e-05, "loss": 0.0795, "step": 21924 }, { "epoch": 0.38828159934828904, "grad_norm": 0.5239159464836121, "learning_rate": 2.098455845139987e-05, "loss": 0.0634, "step": 21925 }, { "epoch": 0.38829930888531744, "grad_norm": 0.6333598494529724, "learning_rate": 2.0983769518420897e-05, "loss": 0.0605, "step": 21926 }, { "epoch": 0.3883170184223459, "grad_norm": 0.6244233846664429, "learning_rate": 2.0982980565755922e-05, "loss": 0.0844, "step": 21927 }, { "epoch": 0.38833472795937435, "grad_norm": 0.6527164578437805, "learning_rate": 2.098219159340755e-05, "loss": 0.112, "step": 21928 }, { "epoch": 0.38835243749640275, "grad_norm": 0.6962594389915466, "learning_rate": 2.0981402601378374e-05, "loss": 0.1178, "step": 21929 }, { "epoch": 0.3883701470334312, "grad_norm": 0.6683825254440308, "learning_rate": 2.0980613589670984e-05, "loss": 0.0486, "step": 21930 }, { "epoch": 0.3883878565704596, "grad_norm": 0.7325620651245117, "learning_rate": 2.097982455828798e-05, "loss": 0.0918, "step": 21931 }, { "epoch": 0.38840556610748805, "grad_norm": 0.6105490326881409, "learning_rate": 2.097903550723196e-05, "loss": 0.1076, "step": 21932 }, { "epoch": 0.38842327564451645, "grad_norm": 0.6608289480209351, "learning_rate": 2.097824643650552e-05, "loss": 0.0665, "step": 21933 }, { "epoch": 0.3884409851815449, "grad_norm": 0.5734466910362244, "learning_rate": 2.097745734611125e-05, "loss": 0.0758, "step": 21934 }, { "epoch": 0.3884586947185733, "grad_norm": 0.8870583176612854, "learning_rate": 2.0976668236051756e-05, "loss": 0.0868, "step": 21935 }, { "epoch": 0.38847640425560176, "grad_norm": 0.6558462977409363, "learning_rate": 2.097587910632962e-05, "loss": 0.0852, "step": 21936 }, { "epoch": 0.38849411379263016, "grad_norm": 1.0876753330230713, "learning_rate": 2.0975089956947454e-05, "loss": 0.0972, "step": 21937 }, { "epoch": 0.3885118233296586, "grad_norm": 0.8222971558570862, "learning_rate": 2.0974300787907842e-05, "loss": 0.0794, "step": 21938 }, { "epoch": 0.388529532866687, "grad_norm": 1.2099244594573975, "learning_rate": 2.0973511599213386e-05, "loss": 0.0804, "step": 21939 }, { "epoch": 0.38854724240371546, "grad_norm": 1.1730976104736328, "learning_rate": 2.0972722390866677e-05, "loss": 0.1147, "step": 21940 }, { "epoch": 0.38856495194074386, "grad_norm": 0.6695750951766968, "learning_rate": 2.097193316287032e-05, "loss": 0.0864, "step": 21941 }, { "epoch": 0.3885826614777723, "grad_norm": 0.9136196374893188, "learning_rate": 2.0971143915226903e-05, "loss": 0.0734, "step": 21942 }, { "epoch": 0.38860037101480077, "grad_norm": 0.8276719450950623, "learning_rate": 2.097035464793903e-05, "loss": 0.0967, "step": 21943 }, { "epoch": 0.38861808055182917, "grad_norm": 0.7750648856163025, "learning_rate": 2.09695653610093e-05, "loss": 0.1017, "step": 21944 }, { "epoch": 0.3886357900888576, "grad_norm": 0.4351789653301239, "learning_rate": 2.0968776054440294e-05, "loss": 0.0634, "step": 21945 }, { "epoch": 0.388653499625886, "grad_norm": 0.5680214762687683, "learning_rate": 2.096798672823462e-05, "loss": 0.0796, "step": 21946 }, { "epoch": 0.3886712091629145, "grad_norm": 0.7506940364837646, "learning_rate": 2.0967197382394874e-05, "loss": 0.0838, "step": 21947 }, { "epoch": 0.3886889186999429, "grad_norm": 0.9693347215652466, "learning_rate": 2.0966408016923652e-05, "loss": 0.0962, "step": 21948 }, { "epoch": 0.38870662823697133, "grad_norm": 1.2268695831298828, "learning_rate": 2.096561863182355e-05, "loss": 0.1469, "step": 21949 }, { "epoch": 0.38872433777399973, "grad_norm": 0.4106277823448181, "learning_rate": 2.0964829227097172e-05, "loss": 0.0972, "step": 21950 }, { "epoch": 0.3887420473110282, "grad_norm": 0.970071017742157, "learning_rate": 2.0964039802747105e-05, "loss": 0.0857, "step": 21951 }, { "epoch": 0.3887597568480566, "grad_norm": 0.7207602858543396, "learning_rate": 2.0963250358775946e-05, "loss": 0.0862, "step": 21952 }, { "epoch": 0.38877746638508504, "grad_norm": 0.7110415101051331, "learning_rate": 2.0962460895186305e-05, "loss": 0.0954, "step": 21953 }, { "epoch": 0.38879517592211343, "grad_norm": 0.5319266319274902, "learning_rate": 2.0961671411980766e-05, "loss": 0.0896, "step": 21954 }, { "epoch": 0.3888128854591419, "grad_norm": 1.225197196006775, "learning_rate": 2.096088190916193e-05, "loss": 0.0839, "step": 21955 }, { "epoch": 0.3888305949961703, "grad_norm": 0.7047346830368042, "learning_rate": 2.0960092386732402e-05, "loss": 0.0931, "step": 21956 }, { "epoch": 0.38884830453319874, "grad_norm": 0.9417374134063721, "learning_rate": 2.0959302844694767e-05, "loss": 0.0874, "step": 21957 }, { "epoch": 0.3888660140702272, "grad_norm": 0.7111855149269104, "learning_rate": 2.095851328305163e-05, "loss": 0.0941, "step": 21958 }, { "epoch": 0.3888837236072556, "grad_norm": 0.6328814029693604, "learning_rate": 2.0957723701805585e-05, "loss": 0.1132, "step": 21959 }, { "epoch": 0.38890143314428405, "grad_norm": 0.499117910861969, "learning_rate": 2.095693410095923e-05, "loss": 0.0605, "step": 21960 }, { "epoch": 0.38891914268131245, "grad_norm": 0.48651573061943054, "learning_rate": 2.095614448051517e-05, "loss": 0.0735, "step": 21961 }, { "epoch": 0.3889368522183409, "grad_norm": 0.27470457553863525, "learning_rate": 2.0955354840475994e-05, "loss": 0.0497, "step": 21962 }, { "epoch": 0.3889545617553693, "grad_norm": 0.8529990911483765, "learning_rate": 2.0954565180844304e-05, "loss": 0.0667, "step": 21963 }, { "epoch": 0.38897227129239775, "grad_norm": 0.8645404577255249, "learning_rate": 2.0953775501622694e-05, "loss": 0.0843, "step": 21964 }, { "epoch": 0.38898998082942615, "grad_norm": 0.7054041028022766, "learning_rate": 2.0952985802813768e-05, "loss": 0.0798, "step": 21965 }, { "epoch": 0.3890076903664546, "grad_norm": 0.723092257976532, "learning_rate": 2.095219608442012e-05, "loss": 0.0763, "step": 21966 }, { "epoch": 0.389025399903483, "grad_norm": 0.7544469833374023, "learning_rate": 2.095140634644435e-05, "loss": 0.0749, "step": 21967 }, { "epoch": 0.38904310944051146, "grad_norm": 0.43257519602775574, "learning_rate": 2.095061658888905e-05, "loss": 0.0657, "step": 21968 }, { "epoch": 0.38906081897753986, "grad_norm": 0.9643524289131165, "learning_rate": 2.094982681175683e-05, "loss": 0.0926, "step": 21969 }, { "epoch": 0.3890785285145683, "grad_norm": 0.5673800706863403, "learning_rate": 2.0949037015050278e-05, "loss": 0.0707, "step": 21970 }, { "epoch": 0.3890962380515967, "grad_norm": 0.7903921008110046, "learning_rate": 2.0948247198771998e-05, "loss": 0.115, "step": 21971 }, { "epoch": 0.38911394758862516, "grad_norm": 0.9438861608505249, "learning_rate": 2.0947457362924586e-05, "loss": 0.1042, "step": 21972 }, { "epoch": 0.3891316571256536, "grad_norm": 0.822054922580719, "learning_rate": 2.0946667507510636e-05, "loss": 0.0722, "step": 21973 }, { "epoch": 0.389149366662682, "grad_norm": 0.6146988272666931, "learning_rate": 2.0945877632532752e-05, "loss": 0.0554, "step": 21974 }, { "epoch": 0.38916707619971047, "grad_norm": 0.5691305994987488, "learning_rate": 2.0945087737993537e-05, "loss": 0.0635, "step": 21975 }, { "epoch": 0.38918478573673887, "grad_norm": 0.8178492784500122, "learning_rate": 2.0944297823895586e-05, "loss": 0.1159, "step": 21976 }, { "epoch": 0.3892024952737673, "grad_norm": 0.8704147338867188, "learning_rate": 2.0943507890241493e-05, "loss": 0.0923, "step": 21977 }, { "epoch": 0.3892202048107957, "grad_norm": 0.9317101836204529, "learning_rate": 2.0942717937033855e-05, "loss": 0.0854, "step": 21978 }, { "epoch": 0.3892379143478242, "grad_norm": 0.5786729454994202, "learning_rate": 2.0941927964275284e-05, "loss": 0.0876, "step": 21979 }, { "epoch": 0.3892556238848526, "grad_norm": 0.772739052772522, "learning_rate": 2.094113797196837e-05, "loss": 0.0899, "step": 21980 }, { "epoch": 0.38927333342188103, "grad_norm": 0.7258821129798889, "learning_rate": 2.094034796011571e-05, "loss": 0.0725, "step": 21981 }, { "epoch": 0.38929104295890943, "grad_norm": 0.7399624586105347, "learning_rate": 2.0939557928719908e-05, "loss": 0.0821, "step": 21982 }, { "epoch": 0.3893087524959379, "grad_norm": 0.7858448624610901, "learning_rate": 2.0938767877783557e-05, "loss": 0.0585, "step": 21983 }, { "epoch": 0.3893264620329663, "grad_norm": 0.7222053408622742, "learning_rate": 2.0937977807309266e-05, "loss": 0.0724, "step": 21984 }, { "epoch": 0.38934417156999473, "grad_norm": 1.0405091047286987, "learning_rate": 2.0937187717299627e-05, "loss": 0.1308, "step": 21985 }, { "epoch": 0.38936188110702313, "grad_norm": 1.031705617904663, "learning_rate": 2.0936397607757243e-05, "loss": 0.1006, "step": 21986 }, { "epoch": 0.3893795906440516, "grad_norm": 0.8889355659484863, "learning_rate": 2.0935607478684704e-05, "loss": 0.0896, "step": 21987 }, { "epoch": 0.38939730018108004, "grad_norm": 0.3665701448917389, "learning_rate": 2.093481733008462e-05, "loss": 0.0806, "step": 21988 }, { "epoch": 0.38941500971810844, "grad_norm": 0.8844444155693054, "learning_rate": 2.0934027161959588e-05, "loss": 0.0856, "step": 21989 }, { "epoch": 0.3894327192551369, "grad_norm": 0.9996846318244934, "learning_rate": 2.0933236974312214e-05, "loss": 0.1203, "step": 21990 }, { "epoch": 0.3894504287921653, "grad_norm": 0.4820076525211334, "learning_rate": 2.0932446767145078e-05, "loss": 0.071, "step": 21991 }, { "epoch": 0.38946813832919375, "grad_norm": 0.7645300626754761, "learning_rate": 2.0931656540460805e-05, "loss": 0.0867, "step": 21992 }, { "epoch": 0.38948584786622215, "grad_norm": 0.545539915561676, "learning_rate": 2.0930866294261973e-05, "loss": 0.0843, "step": 21993 }, { "epoch": 0.3895035574032506, "grad_norm": 0.427306205034256, "learning_rate": 2.093007602855119e-05, "loss": 0.073, "step": 21994 }, { "epoch": 0.389521266940279, "grad_norm": 0.8720887899398804, "learning_rate": 2.092928574333106e-05, "loss": 0.0807, "step": 21995 }, { "epoch": 0.38953897647730745, "grad_norm": 0.556749701499939, "learning_rate": 2.0928495438604178e-05, "loss": 0.0692, "step": 21996 }, { "epoch": 0.38955668601433585, "grad_norm": 0.8532317280769348, "learning_rate": 2.0927705114373148e-05, "loss": 0.0796, "step": 21997 }, { "epoch": 0.3895743955513643, "grad_norm": 0.8393340110778809, "learning_rate": 2.0926914770640563e-05, "loss": 0.0762, "step": 21998 }, { "epoch": 0.3895921050883927, "grad_norm": 1.027784824371338, "learning_rate": 2.0926124407409036e-05, "loss": 0.0715, "step": 21999 }, { "epoch": 0.38960981462542116, "grad_norm": 0.5250788331031799, "learning_rate": 2.092533402468115e-05, "loss": 0.055, "step": 22000 }, { "epoch": 0.38962752416244956, "grad_norm": 0.6518666744232178, "learning_rate": 2.0924543622459522e-05, "loss": 0.1044, "step": 22001 }, { "epoch": 0.389645233699478, "grad_norm": 0.7396172285079956, "learning_rate": 2.092375320074674e-05, "loss": 0.0803, "step": 22002 }, { "epoch": 0.38966294323650646, "grad_norm": 0.8113366365432739, "learning_rate": 2.092296275954541e-05, "loss": 0.0734, "step": 22003 }, { "epoch": 0.38968065277353486, "grad_norm": 1.1222602128982544, "learning_rate": 2.0922172298858134e-05, "loss": 0.1361, "step": 22004 }, { "epoch": 0.3896983623105633, "grad_norm": 0.6752293109893799, "learning_rate": 2.092138181868751e-05, "loss": 0.0986, "step": 22005 }, { "epoch": 0.3897160718475917, "grad_norm": 0.5984991788864136, "learning_rate": 2.0920591319036138e-05, "loss": 0.0913, "step": 22006 }, { "epoch": 0.38973378138462017, "grad_norm": 0.9215400218963623, "learning_rate": 2.0919800799906615e-05, "loss": 0.0802, "step": 22007 }, { "epoch": 0.38975149092164857, "grad_norm": 0.5228732228279114, "learning_rate": 2.091901026130155e-05, "loss": 0.0615, "step": 22008 }, { "epoch": 0.389769200458677, "grad_norm": 0.774409830570221, "learning_rate": 2.0918219703223542e-05, "loss": 0.0958, "step": 22009 }, { "epoch": 0.3897869099957054, "grad_norm": 0.6463958024978638, "learning_rate": 2.0917429125675186e-05, "loss": 0.0812, "step": 22010 }, { "epoch": 0.3898046195327339, "grad_norm": 0.589859664440155, "learning_rate": 2.091663852865909e-05, "loss": 0.0831, "step": 22011 }, { "epoch": 0.3898223290697623, "grad_norm": 0.658299446105957, "learning_rate": 2.0915847912177847e-05, "loss": 0.1163, "step": 22012 }, { "epoch": 0.38984003860679073, "grad_norm": 0.48245686292648315, "learning_rate": 2.0915057276234064e-05, "loss": 0.0412, "step": 22013 }, { "epoch": 0.3898577481438191, "grad_norm": 0.877744734287262, "learning_rate": 2.0914266620830343e-05, "loss": 0.1054, "step": 22014 }, { "epoch": 0.3898754576808476, "grad_norm": 0.6336301565170288, "learning_rate": 2.0913475945969286e-05, "loss": 0.0708, "step": 22015 }, { "epoch": 0.389893167217876, "grad_norm": 0.9110630750656128, "learning_rate": 2.091268525165348e-05, "loss": 0.1008, "step": 22016 }, { "epoch": 0.38991087675490443, "grad_norm": 0.6572451591491699, "learning_rate": 2.0911894537885548e-05, "loss": 0.0697, "step": 22017 }, { "epoch": 0.3899285862919329, "grad_norm": 0.6684641242027283, "learning_rate": 2.091110380466808e-05, "loss": 0.0724, "step": 22018 }, { "epoch": 0.3899462958289613, "grad_norm": 0.5078213810920715, "learning_rate": 2.0910313052003677e-05, "loss": 0.0523, "step": 22019 }, { "epoch": 0.38996400536598974, "grad_norm": 0.7840036153793335, "learning_rate": 2.0909522279894943e-05, "loss": 0.0818, "step": 22020 }, { "epoch": 0.38998171490301814, "grad_norm": 0.36704009771347046, "learning_rate": 2.0908731488344472e-05, "loss": 0.0432, "step": 22021 }, { "epoch": 0.3899994244400466, "grad_norm": 0.7903964519500732, "learning_rate": 2.0907940677354877e-05, "loss": 0.0751, "step": 22022 }, { "epoch": 0.390017133977075, "grad_norm": 0.6267823576927185, "learning_rate": 2.090714984692875e-05, "loss": 0.0623, "step": 22023 }, { "epoch": 0.39003484351410345, "grad_norm": 0.5520806312561035, "learning_rate": 2.0906358997068705e-05, "loss": 0.0897, "step": 22024 }, { "epoch": 0.39005255305113185, "grad_norm": 1.600913643836975, "learning_rate": 2.0905568127777337e-05, "loss": 0.0951, "step": 22025 }, { "epoch": 0.3900702625881603, "grad_norm": 0.6764944195747375, "learning_rate": 2.090477723905724e-05, "loss": 0.0855, "step": 22026 }, { "epoch": 0.3900879721251887, "grad_norm": 0.6891301870346069, "learning_rate": 2.0903986330911026e-05, "loss": 0.0729, "step": 22027 }, { "epoch": 0.39010568166221715, "grad_norm": 0.562991738319397, "learning_rate": 2.0903195403341294e-05, "loss": 0.0693, "step": 22028 }, { "epoch": 0.39012339119924555, "grad_norm": 0.6980622410774231, "learning_rate": 2.0902404456350648e-05, "loss": 0.0731, "step": 22029 }, { "epoch": 0.390141100736274, "grad_norm": 1.1671862602233887, "learning_rate": 2.0901613489941684e-05, "loss": 0.0759, "step": 22030 }, { "epoch": 0.3901588102733024, "grad_norm": 0.80085289478302, "learning_rate": 2.0900822504117014e-05, "loss": 0.1284, "step": 22031 }, { "epoch": 0.39017651981033086, "grad_norm": 1.04904043674469, "learning_rate": 2.0900031498879236e-05, "loss": 0.0929, "step": 22032 }, { "epoch": 0.3901942293473593, "grad_norm": 0.8330109715461731, "learning_rate": 2.0899240474230946e-05, "loss": 0.0633, "step": 22033 }, { "epoch": 0.3902119388843877, "grad_norm": 0.7717811465263367, "learning_rate": 2.0898449430174753e-05, "loss": 0.0762, "step": 22034 }, { "epoch": 0.39022964842141616, "grad_norm": 0.6367571353912354, "learning_rate": 2.0897658366713258e-05, "loss": 0.0699, "step": 22035 }, { "epoch": 0.39024735795844456, "grad_norm": 0.5008438229560852, "learning_rate": 2.089686728384906e-05, "loss": 0.0664, "step": 22036 }, { "epoch": 0.390265067495473, "grad_norm": 0.641832172870636, "learning_rate": 2.0896076181584767e-05, "loss": 0.0541, "step": 22037 }, { "epoch": 0.3902827770325014, "grad_norm": 0.7570413947105408, "learning_rate": 2.0895285059922985e-05, "loss": 0.1063, "step": 22038 }, { "epoch": 0.39030048656952987, "grad_norm": 0.7052991390228271, "learning_rate": 2.0894493918866304e-05, "loss": 0.0664, "step": 22039 }, { "epoch": 0.39031819610655827, "grad_norm": 0.7696933746337891, "learning_rate": 2.089370275841734e-05, "loss": 0.0996, "step": 22040 }, { "epoch": 0.3903359056435867, "grad_norm": 0.7070717215538025, "learning_rate": 2.0892911578578687e-05, "loss": 0.0779, "step": 22041 }, { "epoch": 0.3903536151806151, "grad_norm": 0.7948499917984009, "learning_rate": 2.089212037935295e-05, "loss": 0.0604, "step": 22042 }, { "epoch": 0.3903713247176436, "grad_norm": 0.497877836227417, "learning_rate": 2.089132916074274e-05, "loss": 0.0902, "step": 22043 }, { "epoch": 0.390389034254672, "grad_norm": 0.6740993857383728, "learning_rate": 2.0890537922750646e-05, "loss": 0.0824, "step": 22044 }, { "epoch": 0.39040674379170043, "grad_norm": 0.5464945435523987, "learning_rate": 2.0889746665379278e-05, "loss": 0.0448, "step": 22045 }, { "epoch": 0.3904244533287288, "grad_norm": 1.0143059492111206, "learning_rate": 2.088895538863124e-05, "loss": 0.093, "step": 22046 }, { "epoch": 0.3904421628657573, "grad_norm": 0.4347708225250244, "learning_rate": 2.0888164092509136e-05, "loss": 0.0734, "step": 22047 }, { "epoch": 0.39045987240278573, "grad_norm": 0.8060929775238037, "learning_rate": 2.0887372777015567e-05, "loss": 0.1097, "step": 22048 }, { "epoch": 0.39047758193981413, "grad_norm": 0.9311954379081726, "learning_rate": 2.0886581442153135e-05, "loss": 0.0683, "step": 22049 }, { "epoch": 0.3904952914768426, "grad_norm": 0.932500958442688, "learning_rate": 2.088579008792445e-05, "loss": 0.0621, "step": 22050 }, { "epoch": 0.390513001013871, "grad_norm": 0.9226129651069641, "learning_rate": 2.0884998714332106e-05, "loss": 0.0856, "step": 22051 }, { "epoch": 0.39053071055089944, "grad_norm": 0.6640241742134094, "learning_rate": 2.0884207321378718e-05, "loss": 0.089, "step": 22052 }, { "epoch": 0.39054842008792784, "grad_norm": 0.7160239815711975, "learning_rate": 2.0883415909066874e-05, "loss": 0.0787, "step": 22053 }, { "epoch": 0.3905661296249563, "grad_norm": 0.8841724395751953, "learning_rate": 2.08826244773992e-05, "loss": 0.08, "step": 22054 }, { "epoch": 0.3905838391619847, "grad_norm": 1.5354115962982178, "learning_rate": 2.088183302637827e-05, "loss": 0.0884, "step": 22055 }, { "epoch": 0.39060154869901315, "grad_norm": 0.5693111419677734, "learning_rate": 2.0881041556006715e-05, "loss": 0.0821, "step": 22056 }, { "epoch": 0.39061925823604154, "grad_norm": 0.7331165075302124, "learning_rate": 2.0880250066287125e-05, "loss": 0.0856, "step": 22057 }, { "epoch": 0.39063696777307, "grad_norm": 0.3741103708744049, "learning_rate": 2.0879458557222106e-05, "loss": 0.0563, "step": 22058 }, { "epoch": 0.3906546773100984, "grad_norm": 1.1367249488830566, "learning_rate": 2.0878667028814265e-05, "loss": 0.1073, "step": 22059 }, { "epoch": 0.39067238684712685, "grad_norm": 0.823785126209259, "learning_rate": 2.0877875481066203e-05, "loss": 0.0787, "step": 22060 }, { "epoch": 0.39069009638415525, "grad_norm": 0.540423572063446, "learning_rate": 2.0877083913980528e-05, "loss": 0.0598, "step": 22061 }, { "epoch": 0.3907078059211837, "grad_norm": 0.6020651459693909, "learning_rate": 2.0876292327559838e-05, "loss": 0.0881, "step": 22062 }, { "epoch": 0.39072551545821216, "grad_norm": 1.1224992275238037, "learning_rate": 2.0875500721806743e-05, "loss": 0.104, "step": 22063 }, { "epoch": 0.39074322499524056, "grad_norm": 0.318524569272995, "learning_rate": 2.087470909672384e-05, "loss": 0.0774, "step": 22064 }, { "epoch": 0.390760934532269, "grad_norm": 0.9787919521331787, "learning_rate": 2.0873917452313744e-05, "loss": 0.1124, "step": 22065 }, { "epoch": 0.3907786440692974, "grad_norm": 0.8048123121261597, "learning_rate": 2.087312578857905e-05, "loss": 0.065, "step": 22066 }, { "epoch": 0.39079635360632586, "grad_norm": 0.9668335318565369, "learning_rate": 2.0872334105522368e-05, "loss": 0.0951, "step": 22067 }, { "epoch": 0.39081406314335426, "grad_norm": 0.7511411905288696, "learning_rate": 2.0871542403146304e-05, "loss": 0.0698, "step": 22068 }, { "epoch": 0.3908317726803827, "grad_norm": 0.9277697205543518, "learning_rate": 2.0870750681453453e-05, "loss": 0.0989, "step": 22069 }, { "epoch": 0.3908494822174111, "grad_norm": 0.5562634468078613, "learning_rate": 2.086995894044643e-05, "loss": 0.0615, "step": 22070 }, { "epoch": 0.39086719175443957, "grad_norm": 0.48428091406822205, "learning_rate": 2.086916718012783e-05, "loss": 0.0806, "step": 22071 }, { "epoch": 0.39088490129146797, "grad_norm": 0.44891518354415894, "learning_rate": 2.086837540050027e-05, "loss": 0.0641, "step": 22072 }, { "epoch": 0.3909026108284964, "grad_norm": 0.518246591091156, "learning_rate": 2.086758360156635e-05, "loss": 0.0746, "step": 22073 }, { "epoch": 0.3909203203655248, "grad_norm": 0.4159037172794342, "learning_rate": 2.0866791783328667e-05, "loss": 0.0725, "step": 22074 }, { "epoch": 0.3909380299025533, "grad_norm": 0.7514236569404602, "learning_rate": 2.0865999945789837e-05, "loss": 0.0813, "step": 22075 }, { "epoch": 0.3909557394395817, "grad_norm": 0.6390936970710754, "learning_rate": 2.0865208088952456e-05, "loss": 0.1239, "step": 22076 }, { "epoch": 0.3909734489766101, "grad_norm": 0.4894077181816101, "learning_rate": 2.0864416212819142e-05, "loss": 0.0835, "step": 22077 }, { "epoch": 0.3909911585136386, "grad_norm": 1.065773367881775, "learning_rate": 2.0863624317392485e-05, "loss": 0.1026, "step": 22078 }, { "epoch": 0.391008868050667, "grad_norm": 0.633329451084137, "learning_rate": 2.0862832402675096e-05, "loss": 0.0487, "step": 22079 }, { "epoch": 0.39102657758769543, "grad_norm": 1.0756523609161377, "learning_rate": 2.0862040468669585e-05, "loss": 0.0839, "step": 22080 }, { "epoch": 0.39104428712472383, "grad_norm": 0.7173762917518616, "learning_rate": 2.0861248515378553e-05, "loss": 0.1153, "step": 22081 }, { "epoch": 0.3910619966617523, "grad_norm": 0.5402106046676636, "learning_rate": 2.086045654280461e-05, "loss": 0.1021, "step": 22082 }, { "epoch": 0.3910797061987807, "grad_norm": 0.7294623255729675, "learning_rate": 2.085966455095035e-05, "loss": 0.0916, "step": 22083 }, { "epoch": 0.39109741573580914, "grad_norm": 0.8831422328948975, "learning_rate": 2.0858872539818388e-05, "loss": 0.1122, "step": 22084 }, { "epoch": 0.39111512527283754, "grad_norm": 0.6132441759109497, "learning_rate": 2.085808050941133e-05, "loss": 0.1155, "step": 22085 }, { "epoch": 0.391132834809866, "grad_norm": 0.5656866431236267, "learning_rate": 2.0857288459731782e-05, "loss": 0.0734, "step": 22086 }, { "epoch": 0.3911505443468944, "grad_norm": 0.9215893745422363, "learning_rate": 2.0856496390782345e-05, "loss": 0.1113, "step": 22087 }, { "epoch": 0.39116825388392285, "grad_norm": 0.7780059576034546, "learning_rate": 2.0855704302565622e-05, "loss": 0.1069, "step": 22088 }, { "epoch": 0.39118596342095124, "grad_norm": 0.5941486954689026, "learning_rate": 2.0854912195084233e-05, "loss": 0.1015, "step": 22089 }, { "epoch": 0.3912036729579797, "grad_norm": 0.891638994216919, "learning_rate": 2.0854120068340772e-05, "loss": 0.0916, "step": 22090 }, { "epoch": 0.39122138249500815, "grad_norm": 1.3261916637420654, "learning_rate": 2.085332792233785e-05, "loss": 0.1134, "step": 22091 }, { "epoch": 0.39123909203203655, "grad_norm": 0.8460533618927002, "learning_rate": 2.085253575707807e-05, "loss": 0.0937, "step": 22092 }, { "epoch": 0.391256801569065, "grad_norm": 1.2156485319137573, "learning_rate": 2.0851743572564033e-05, "loss": 0.0575, "step": 22093 }, { "epoch": 0.3912745111060934, "grad_norm": 0.7950559854507446, "learning_rate": 2.0850951368798355e-05, "loss": 0.0998, "step": 22094 }, { "epoch": 0.39129222064312186, "grad_norm": 0.7243471145629883, "learning_rate": 2.0850159145783644e-05, "loss": 0.0946, "step": 22095 }, { "epoch": 0.39130993018015026, "grad_norm": 0.7344530820846558, "learning_rate": 2.08493669035225e-05, "loss": 0.0835, "step": 22096 }, { "epoch": 0.3913276397171787, "grad_norm": 0.7908767461776733, "learning_rate": 2.0848574642017528e-05, "loss": 0.0724, "step": 22097 }, { "epoch": 0.3913453492542071, "grad_norm": 0.8690815567970276, "learning_rate": 2.084778236127133e-05, "loss": 0.1069, "step": 22098 }, { "epoch": 0.39136305879123556, "grad_norm": 0.648810625076294, "learning_rate": 2.0846990061286528e-05, "loss": 0.0764, "step": 22099 }, { "epoch": 0.39138076832826396, "grad_norm": 0.48725971579551697, "learning_rate": 2.0846197742065716e-05, "loss": 0.0731, "step": 22100 }, { "epoch": 0.3913984778652924, "grad_norm": 0.8312864303588867, "learning_rate": 2.084540540361151e-05, "loss": 0.084, "step": 22101 }, { "epoch": 0.3914161874023208, "grad_norm": 0.6505860090255737, "learning_rate": 2.084461304592651e-05, "loss": 0.0531, "step": 22102 }, { "epoch": 0.39143389693934927, "grad_norm": 0.7664639949798584, "learning_rate": 2.0843820669013324e-05, "loss": 0.0872, "step": 22103 }, { "epoch": 0.39145160647637767, "grad_norm": 1.2780277729034424, "learning_rate": 2.0843028272874554e-05, "loss": 0.1245, "step": 22104 }, { "epoch": 0.3914693160134061, "grad_norm": 0.6156843900680542, "learning_rate": 2.0842235857512816e-05, "loss": 0.0994, "step": 22105 }, { "epoch": 0.3914870255504346, "grad_norm": 0.6623408794403076, "learning_rate": 2.0841443422930714e-05, "loss": 0.0746, "step": 22106 }, { "epoch": 0.391504735087463, "grad_norm": 0.8754978179931641, "learning_rate": 2.084065096913085e-05, "loss": 0.0953, "step": 22107 }, { "epoch": 0.39152244462449143, "grad_norm": 0.8294057250022888, "learning_rate": 2.083985849611584e-05, "loss": 0.064, "step": 22108 }, { "epoch": 0.3915401541615198, "grad_norm": 0.6680780053138733, "learning_rate": 2.0839066003888287e-05, "loss": 0.0922, "step": 22109 }, { "epoch": 0.3915578636985483, "grad_norm": 0.5616486668586731, "learning_rate": 2.083827349245079e-05, "loss": 0.0727, "step": 22110 }, { "epoch": 0.3915755732355767, "grad_norm": 0.7257292866706848, "learning_rate": 2.0837480961805973e-05, "loss": 0.072, "step": 22111 }, { "epoch": 0.39159328277260513, "grad_norm": 0.6723896861076355, "learning_rate": 2.0836688411956426e-05, "loss": 0.0703, "step": 22112 }, { "epoch": 0.39161099230963353, "grad_norm": 0.5514652729034424, "learning_rate": 2.0835895842904767e-05, "loss": 0.0831, "step": 22113 }, { "epoch": 0.391628701846662, "grad_norm": 0.5450780987739563, "learning_rate": 2.0835103254653606e-05, "loss": 0.0952, "step": 22114 }, { "epoch": 0.3916464113836904, "grad_norm": 0.592041015625, "learning_rate": 2.0834310647205538e-05, "loss": 0.102, "step": 22115 }, { "epoch": 0.39166412092071884, "grad_norm": 0.7034430503845215, "learning_rate": 2.0833518020563184e-05, "loss": 0.0659, "step": 22116 }, { "epoch": 0.39168183045774724, "grad_norm": 0.7398791313171387, "learning_rate": 2.0832725374729137e-05, "loss": 0.0766, "step": 22117 }, { "epoch": 0.3916995399947757, "grad_norm": 0.4991624653339386, "learning_rate": 2.083193270970602e-05, "loss": 0.0578, "step": 22118 }, { "epoch": 0.3917172495318041, "grad_norm": 0.8202318549156189, "learning_rate": 2.0831140025496434e-05, "loss": 0.0899, "step": 22119 }, { "epoch": 0.39173495906883254, "grad_norm": 0.7116737961769104, "learning_rate": 2.083034732210299e-05, "loss": 0.0751, "step": 22120 }, { "epoch": 0.391752668605861, "grad_norm": 0.578580915927887, "learning_rate": 2.0829554599528286e-05, "loss": 0.093, "step": 22121 }, { "epoch": 0.3917703781428894, "grad_norm": 0.7297441959381104, "learning_rate": 2.082876185777494e-05, "loss": 0.0858, "step": 22122 }, { "epoch": 0.39178808767991785, "grad_norm": 0.734613299369812, "learning_rate": 2.082796909684556e-05, "loss": 0.0939, "step": 22123 }, { "epoch": 0.39180579721694625, "grad_norm": 0.8010517358779907, "learning_rate": 2.0827176316742747e-05, "loss": 0.064, "step": 22124 }, { "epoch": 0.3918235067539747, "grad_norm": 0.6580571532249451, "learning_rate": 2.082638351746912e-05, "loss": 0.1051, "step": 22125 }, { "epoch": 0.3918412162910031, "grad_norm": 0.5123972296714783, "learning_rate": 2.0825590699027274e-05, "loss": 0.0715, "step": 22126 }, { "epoch": 0.39185892582803156, "grad_norm": 0.67863929271698, "learning_rate": 2.0824797861419825e-05, "loss": 0.0538, "step": 22127 }, { "epoch": 0.39187663536505996, "grad_norm": 0.7422481775283813, "learning_rate": 2.082400500464938e-05, "loss": 0.0752, "step": 22128 }, { "epoch": 0.3918943449020884, "grad_norm": 0.8099458813667297, "learning_rate": 2.082321212871855e-05, "loss": 0.0631, "step": 22129 }, { "epoch": 0.3919120544391168, "grad_norm": 0.6346912980079651, "learning_rate": 2.082241923362994e-05, "loss": 0.0904, "step": 22130 }, { "epoch": 0.39192976397614526, "grad_norm": 0.4107922613620758, "learning_rate": 2.0821626319386158e-05, "loss": 0.0973, "step": 22131 }, { "epoch": 0.39194747351317366, "grad_norm": 0.8530217409133911, "learning_rate": 2.0820833385989813e-05, "loss": 0.0922, "step": 22132 }, { "epoch": 0.3919651830502021, "grad_norm": 0.7338706254959106, "learning_rate": 2.082004043344352e-05, "loss": 0.0512, "step": 22133 }, { "epoch": 0.3919828925872305, "grad_norm": 0.9509896636009216, "learning_rate": 2.0819247461749878e-05, "loss": 0.0748, "step": 22134 }, { "epoch": 0.39200060212425897, "grad_norm": 0.8200719356536865, "learning_rate": 2.0818454470911502e-05, "loss": 0.0876, "step": 22135 }, { "epoch": 0.3920183116612874, "grad_norm": 0.704914391040802, "learning_rate": 2.0817661460931e-05, "loss": 0.0772, "step": 22136 }, { "epoch": 0.3920360211983158, "grad_norm": 0.9970102906227112, "learning_rate": 2.081686843181098e-05, "loss": 0.0996, "step": 22137 }, { "epoch": 0.3920537307353443, "grad_norm": 0.6049189567565918, "learning_rate": 2.0816075383554053e-05, "loss": 0.0755, "step": 22138 }, { "epoch": 0.3920714402723727, "grad_norm": 0.5770494937896729, "learning_rate": 2.0815282316162825e-05, "loss": 0.0716, "step": 22139 }, { "epoch": 0.3920891498094011, "grad_norm": 0.5959963798522949, "learning_rate": 2.0814489229639904e-05, "loss": 0.109, "step": 22140 }, { "epoch": 0.3921068593464295, "grad_norm": 0.7008476853370667, "learning_rate": 2.08136961239879e-05, "loss": 0.0761, "step": 22141 }, { "epoch": 0.392124568883458, "grad_norm": 0.6285748481750488, "learning_rate": 2.0812902999209427e-05, "loss": 0.074, "step": 22142 }, { "epoch": 0.3921422784204864, "grad_norm": 0.554731011390686, "learning_rate": 2.0812109855307092e-05, "loss": 0.0859, "step": 22143 }, { "epoch": 0.39215998795751483, "grad_norm": 1.1176888942718506, "learning_rate": 2.0811316692283503e-05, "loss": 0.0961, "step": 22144 }, { "epoch": 0.39217769749454323, "grad_norm": 0.5599515438079834, "learning_rate": 2.081052351014127e-05, "loss": 0.0767, "step": 22145 }, { "epoch": 0.3921954070315717, "grad_norm": 0.9492665529251099, "learning_rate": 2.0809730308883002e-05, "loss": 0.075, "step": 22146 }, { "epoch": 0.3922131165686001, "grad_norm": 0.6432497501373291, "learning_rate": 2.0808937088511305e-05, "loss": 0.1153, "step": 22147 }, { "epoch": 0.39223082610562854, "grad_norm": 0.6327574849128723, "learning_rate": 2.08081438490288e-05, "loss": 0.0577, "step": 22148 }, { "epoch": 0.39224853564265694, "grad_norm": 0.5756396651268005, "learning_rate": 2.0807350590438082e-05, "loss": 0.0674, "step": 22149 }, { "epoch": 0.3922662451796854, "grad_norm": 0.5093618631362915, "learning_rate": 2.080655731274177e-05, "loss": 0.0821, "step": 22150 }, { "epoch": 0.39228395471671385, "grad_norm": 0.45671698451042175, "learning_rate": 2.0805764015942476e-05, "loss": 0.0818, "step": 22151 }, { "epoch": 0.39230166425374224, "grad_norm": 0.5479037165641785, "learning_rate": 2.08049707000428e-05, "loss": 0.0934, "step": 22152 }, { "epoch": 0.3923193737907707, "grad_norm": 0.5953389406204224, "learning_rate": 2.0804177365045363e-05, "loss": 0.0841, "step": 22153 }, { "epoch": 0.3923370833277991, "grad_norm": 0.8671945929527283, "learning_rate": 2.0803384010952764e-05, "loss": 0.0298, "step": 22154 }, { "epoch": 0.39235479286482755, "grad_norm": 0.9979780912399292, "learning_rate": 2.0802590637767622e-05, "loss": 0.0698, "step": 22155 }, { "epoch": 0.39237250240185595, "grad_norm": 0.7100638151168823, "learning_rate": 2.080179724549254e-05, "loss": 0.0836, "step": 22156 }, { "epoch": 0.3923902119388844, "grad_norm": 0.6273651719093323, "learning_rate": 2.0801003834130136e-05, "loss": 0.0612, "step": 22157 }, { "epoch": 0.3924079214759128, "grad_norm": 0.6725337505340576, "learning_rate": 2.0800210403683013e-05, "loss": 0.1064, "step": 22158 }, { "epoch": 0.39242563101294126, "grad_norm": 0.964922308921814, "learning_rate": 2.0799416954153792e-05, "loss": 0.1413, "step": 22159 }, { "epoch": 0.39244334054996965, "grad_norm": 0.524914026260376, "learning_rate": 2.0798623485545066e-05, "loss": 0.0708, "step": 22160 }, { "epoch": 0.3924610500869981, "grad_norm": 0.5649158954620361, "learning_rate": 2.079782999785946e-05, "loss": 0.0751, "step": 22161 }, { "epoch": 0.3924787596240265, "grad_norm": 0.8276411294937134, "learning_rate": 2.0797036491099582e-05, "loss": 0.0723, "step": 22162 }, { "epoch": 0.39249646916105496, "grad_norm": 0.8145735263824463, "learning_rate": 2.0796242965268034e-05, "loss": 0.0857, "step": 22163 }, { "epoch": 0.39251417869808336, "grad_norm": 0.5211979746818542, "learning_rate": 2.079544942036744e-05, "loss": 0.0776, "step": 22164 }, { "epoch": 0.3925318882351118, "grad_norm": 0.27780455350875854, "learning_rate": 2.0794655856400393e-05, "loss": 0.0845, "step": 22165 }, { "epoch": 0.39254959777214027, "grad_norm": 0.7347331047058105, "learning_rate": 2.0793862273369527e-05, "loss": 0.1098, "step": 22166 }, { "epoch": 0.39256730730916867, "grad_norm": 0.5597811937332153, "learning_rate": 2.0793068671277434e-05, "loss": 0.0798, "step": 22167 }, { "epoch": 0.3925850168461971, "grad_norm": 0.9523370862007141, "learning_rate": 2.079227505012673e-05, "loss": 0.09, "step": 22168 }, { "epoch": 0.3926027263832255, "grad_norm": 0.5744799971580505, "learning_rate": 2.0791481409920027e-05, "loss": 0.0718, "step": 22169 }, { "epoch": 0.392620435920254, "grad_norm": 0.8971601724624634, "learning_rate": 2.0790687750659936e-05, "loss": 0.0686, "step": 22170 }, { "epoch": 0.3926381454572824, "grad_norm": 0.6050364971160889, "learning_rate": 2.078989407234907e-05, "loss": 0.0503, "step": 22171 }, { "epoch": 0.3926558549943108, "grad_norm": 0.6216321587562561, "learning_rate": 2.0789100374990035e-05, "loss": 0.0755, "step": 22172 }, { "epoch": 0.3926735645313392, "grad_norm": 0.8792409896850586, "learning_rate": 2.0788306658585452e-05, "loss": 0.0806, "step": 22173 }, { "epoch": 0.3926912740683677, "grad_norm": 0.8332783579826355, "learning_rate": 2.078751292313792e-05, "loss": 0.0841, "step": 22174 }, { "epoch": 0.3927089836053961, "grad_norm": 0.8413549065589905, "learning_rate": 2.0786719168650054e-05, "loss": 0.0895, "step": 22175 }, { "epoch": 0.39272669314242453, "grad_norm": 0.6854770183563232, "learning_rate": 2.0785925395124473e-05, "loss": 0.0844, "step": 22176 }, { "epoch": 0.39274440267945293, "grad_norm": 0.8345938920974731, "learning_rate": 2.078513160256378e-05, "loss": 0.052, "step": 22177 }, { "epoch": 0.3927621122164814, "grad_norm": 0.5173788666725159, "learning_rate": 2.0784337790970592e-05, "loss": 0.0681, "step": 22178 }, { "epoch": 0.3927798217535098, "grad_norm": 0.5277050137519836, "learning_rate": 2.078354396034751e-05, "loss": 0.0679, "step": 22179 }, { "epoch": 0.39279753129053824, "grad_norm": 0.6970950365066528, "learning_rate": 2.078275011069716e-05, "loss": 0.0672, "step": 22180 }, { "epoch": 0.3928152408275667, "grad_norm": 0.5175102353096008, "learning_rate": 2.0781956242022143e-05, "loss": 0.0637, "step": 22181 }, { "epoch": 0.3928329503645951, "grad_norm": 1.0316799879074097, "learning_rate": 2.0781162354325076e-05, "loss": 0.0627, "step": 22182 }, { "epoch": 0.39285065990162354, "grad_norm": 0.48890572786331177, "learning_rate": 2.078036844760857e-05, "loss": 0.0488, "step": 22183 }, { "epoch": 0.39286836943865194, "grad_norm": 0.6575378179550171, "learning_rate": 2.0779574521875236e-05, "loss": 0.0553, "step": 22184 }, { "epoch": 0.3928860789756804, "grad_norm": 0.4531404376029968, "learning_rate": 2.077878057712769e-05, "loss": 0.0657, "step": 22185 }, { "epoch": 0.3929037885127088, "grad_norm": 0.8578850030899048, "learning_rate": 2.0777986613368536e-05, "loss": 0.0982, "step": 22186 }, { "epoch": 0.39292149804973725, "grad_norm": 0.6040427088737488, "learning_rate": 2.0777192630600396e-05, "loss": 0.0795, "step": 22187 }, { "epoch": 0.39293920758676565, "grad_norm": 0.6119489073753357, "learning_rate": 2.077639862882587e-05, "loss": 0.0654, "step": 22188 }, { "epoch": 0.3929569171237941, "grad_norm": 0.6757015585899353, "learning_rate": 2.0775604608047577e-05, "loss": 0.0912, "step": 22189 }, { "epoch": 0.3929746266608225, "grad_norm": 1.5764710903167725, "learning_rate": 2.0774810568268137e-05, "loss": 0.0473, "step": 22190 }, { "epoch": 0.39299233619785096, "grad_norm": 0.8214746713638306, "learning_rate": 2.0774016509490147e-05, "loss": 0.1023, "step": 22191 }, { "epoch": 0.39301004573487935, "grad_norm": 0.7729054689407349, "learning_rate": 2.077322243171623e-05, "loss": 0.1098, "step": 22192 }, { "epoch": 0.3930277552719078, "grad_norm": 0.6725289225578308, "learning_rate": 2.0772428334948992e-05, "loss": 0.0828, "step": 22193 }, { "epoch": 0.3930454648089362, "grad_norm": 0.6241804361343384, "learning_rate": 2.077163421919105e-05, "loss": 0.0715, "step": 22194 }, { "epoch": 0.39306317434596466, "grad_norm": 0.48179104924201965, "learning_rate": 2.0770840084445015e-05, "loss": 0.0944, "step": 22195 }, { "epoch": 0.3930808838829931, "grad_norm": 0.6030277013778687, "learning_rate": 2.07700459307135e-05, "loss": 0.0685, "step": 22196 }, { "epoch": 0.3930985934200215, "grad_norm": 0.7289325594902039, "learning_rate": 2.0769251757999116e-05, "loss": 0.0672, "step": 22197 }, { "epoch": 0.39311630295704997, "grad_norm": 0.9415627121925354, "learning_rate": 2.076845756630448e-05, "loss": 0.1176, "step": 22198 }, { "epoch": 0.39313401249407837, "grad_norm": 0.6087040305137634, "learning_rate": 2.0767663355632204e-05, "loss": 0.108, "step": 22199 }, { "epoch": 0.3931517220311068, "grad_norm": 0.7752184271812439, "learning_rate": 2.0766869125984893e-05, "loss": 0.063, "step": 22200 }, { "epoch": 0.3931694315681352, "grad_norm": 0.69437175989151, "learning_rate": 2.076607487736517e-05, "loss": 0.0896, "step": 22201 }, { "epoch": 0.3931871411051637, "grad_norm": 0.5808667540550232, "learning_rate": 2.0765280609775642e-05, "loss": 0.0667, "step": 22202 }, { "epoch": 0.39320485064219207, "grad_norm": 0.62211674451828, "learning_rate": 2.0764486323218922e-05, "loss": 0.0758, "step": 22203 }, { "epoch": 0.3932225601792205, "grad_norm": 0.6214311718940735, "learning_rate": 2.076369201769763e-05, "loss": 0.0926, "step": 22204 }, { "epoch": 0.3932402697162489, "grad_norm": 0.6261385679244995, "learning_rate": 2.076289769321437e-05, "loss": 0.0593, "step": 22205 }, { "epoch": 0.3932579792532774, "grad_norm": 0.43709343671798706, "learning_rate": 2.0762103349771763e-05, "loss": 0.0582, "step": 22206 }, { "epoch": 0.3932756887903058, "grad_norm": 0.4249952435493469, "learning_rate": 2.0761308987372418e-05, "loss": 0.0755, "step": 22207 }, { "epoch": 0.39329339832733423, "grad_norm": 0.7763804197311401, "learning_rate": 2.0760514606018945e-05, "loss": 0.0943, "step": 22208 }, { "epoch": 0.39331110786436263, "grad_norm": 0.8189700245857239, "learning_rate": 2.0759720205713964e-05, "loss": 0.0689, "step": 22209 }, { "epoch": 0.3933288174013911, "grad_norm": 0.9639089703559875, "learning_rate": 2.075892578646009e-05, "loss": 0.0631, "step": 22210 }, { "epoch": 0.39334652693841954, "grad_norm": 0.4298561215400696, "learning_rate": 2.075813134825993e-05, "loss": 0.0778, "step": 22211 }, { "epoch": 0.39336423647544794, "grad_norm": 0.5767417550086975, "learning_rate": 2.0757336891116096e-05, "loss": 0.0814, "step": 22212 }, { "epoch": 0.3933819460124764, "grad_norm": 0.7445710897445679, "learning_rate": 2.075654241503121e-05, "loss": 0.0965, "step": 22213 }, { "epoch": 0.3933996555495048, "grad_norm": 0.5092534422874451, "learning_rate": 2.0755747920007882e-05, "loss": 0.0735, "step": 22214 }, { "epoch": 0.39341736508653324, "grad_norm": 0.41389837861061096, "learning_rate": 2.0754953406048728e-05, "loss": 0.0693, "step": 22215 }, { "epoch": 0.39343507462356164, "grad_norm": 0.7992584109306335, "learning_rate": 2.0754158873156355e-05, "loss": 0.1003, "step": 22216 }, { "epoch": 0.3934527841605901, "grad_norm": 1.3003885746002197, "learning_rate": 2.0753364321333382e-05, "loss": 0.0979, "step": 22217 }, { "epoch": 0.3934704936976185, "grad_norm": 0.42833635210990906, "learning_rate": 2.075256975058242e-05, "loss": 0.0511, "step": 22218 }, { "epoch": 0.39348820323464695, "grad_norm": 0.8199974298477173, "learning_rate": 2.075177516090609e-05, "loss": 0.0587, "step": 22219 }, { "epoch": 0.39350591277167535, "grad_norm": 1.2128876447677612, "learning_rate": 2.0750980552306997e-05, "loss": 0.0891, "step": 22220 }, { "epoch": 0.3935236223087038, "grad_norm": 0.7080788016319275, "learning_rate": 2.0750185924787765e-05, "loss": 0.0702, "step": 22221 }, { "epoch": 0.3935413318457322, "grad_norm": 1.1502262353897095, "learning_rate": 2.0749391278351e-05, "loss": 0.0938, "step": 22222 }, { "epoch": 0.39355904138276065, "grad_norm": 0.4028005003929138, "learning_rate": 2.074859661299932e-05, "loss": 0.0664, "step": 22223 }, { "epoch": 0.39357675091978905, "grad_norm": 0.6498987674713135, "learning_rate": 2.0747801928735337e-05, "loss": 0.0646, "step": 22224 }, { "epoch": 0.3935944604568175, "grad_norm": 0.7524843215942383, "learning_rate": 2.074700722556167e-05, "loss": 0.0777, "step": 22225 }, { "epoch": 0.39361216999384596, "grad_norm": 1.073261022567749, "learning_rate": 2.0746212503480928e-05, "loss": 0.0988, "step": 22226 }, { "epoch": 0.39362987953087436, "grad_norm": 0.41409146785736084, "learning_rate": 2.0745417762495725e-05, "loss": 0.0467, "step": 22227 }, { "epoch": 0.3936475890679028, "grad_norm": 0.9638151526451111, "learning_rate": 2.0744623002608686e-05, "loss": 0.0774, "step": 22228 }, { "epoch": 0.3936652986049312, "grad_norm": 0.8561638593673706, "learning_rate": 2.074382822382241e-05, "loss": 0.0774, "step": 22229 }, { "epoch": 0.39368300814195967, "grad_norm": 1.166438341140747, "learning_rate": 2.074303342613953e-05, "loss": 0.1191, "step": 22230 }, { "epoch": 0.39370071767898807, "grad_norm": 0.9457311630249023, "learning_rate": 2.0742238609562642e-05, "loss": 0.0786, "step": 22231 }, { "epoch": 0.3937184272160165, "grad_norm": 0.49621203541755676, "learning_rate": 2.0741443774094373e-05, "loss": 0.0689, "step": 22232 }, { "epoch": 0.3937361367530449, "grad_norm": 0.7258021831512451, "learning_rate": 2.074064891973734e-05, "loss": 0.1294, "step": 22233 }, { "epoch": 0.3937538462900734, "grad_norm": 0.9032511711120605, "learning_rate": 2.0739854046494145e-05, "loss": 0.0783, "step": 22234 }, { "epoch": 0.39377155582710177, "grad_norm": 0.3993060290813446, "learning_rate": 2.0739059154367415e-05, "loss": 0.0687, "step": 22235 }, { "epoch": 0.3937892653641302, "grad_norm": 0.5741679072380066, "learning_rate": 2.0738264243359758e-05, "loss": 0.0432, "step": 22236 }, { "epoch": 0.3938069749011586, "grad_norm": 0.5429668426513672, "learning_rate": 2.0737469313473793e-05, "loss": 0.0676, "step": 22237 }, { "epoch": 0.3938246844381871, "grad_norm": 0.5757718682289124, "learning_rate": 2.0736674364712136e-05, "loss": 0.0869, "step": 22238 }, { "epoch": 0.3938423939752155, "grad_norm": 1.2122704982757568, "learning_rate": 2.07358793970774e-05, "loss": 0.1212, "step": 22239 }, { "epoch": 0.39386010351224393, "grad_norm": 0.7320849299430847, "learning_rate": 2.07350844105722e-05, "loss": 0.0751, "step": 22240 }, { "epoch": 0.3938778130492724, "grad_norm": 0.7128779292106628, "learning_rate": 2.073428940519915e-05, "loss": 0.0943, "step": 22241 }, { "epoch": 0.3938955225863008, "grad_norm": 0.9339314699172974, "learning_rate": 2.0733494380960872e-05, "loss": 0.0775, "step": 22242 }, { "epoch": 0.39391323212332924, "grad_norm": 0.8996244072914124, "learning_rate": 2.0732699337859975e-05, "loss": 0.0946, "step": 22243 }, { "epoch": 0.39393094166035764, "grad_norm": 0.8223494291305542, "learning_rate": 2.073190427589908e-05, "loss": 0.1013, "step": 22244 }, { "epoch": 0.3939486511973861, "grad_norm": 0.9454244375228882, "learning_rate": 2.0731109195080795e-05, "loss": 0.0819, "step": 22245 }, { "epoch": 0.3939663607344145, "grad_norm": 0.7186179757118225, "learning_rate": 2.0730314095407743e-05, "loss": 0.0985, "step": 22246 }, { "epoch": 0.39398407027144294, "grad_norm": 0.733788251876831, "learning_rate": 2.0729518976882537e-05, "loss": 0.0798, "step": 22247 }, { "epoch": 0.39400177980847134, "grad_norm": 0.8102348446846008, "learning_rate": 2.0728723839507795e-05, "loss": 0.0809, "step": 22248 }, { "epoch": 0.3940194893454998, "grad_norm": 0.49957355856895447, "learning_rate": 2.072792868328613e-05, "loss": 0.087, "step": 22249 }, { "epoch": 0.3940371988825282, "grad_norm": 0.5441418886184692, "learning_rate": 2.0727133508220155e-05, "loss": 0.0897, "step": 22250 }, { "epoch": 0.39405490841955665, "grad_norm": 0.8177305459976196, "learning_rate": 2.0726338314312492e-05, "loss": 0.0823, "step": 22251 }, { "epoch": 0.39407261795658505, "grad_norm": 0.7814978361129761, "learning_rate": 2.0725543101565757e-05, "loss": 0.0732, "step": 22252 }, { "epoch": 0.3940903274936135, "grad_norm": 0.6710196137428284, "learning_rate": 2.0724747869982567e-05, "loss": 0.1007, "step": 22253 }, { "epoch": 0.3941080370306419, "grad_norm": 0.7881436944007874, "learning_rate": 2.0723952619565535e-05, "loss": 0.1029, "step": 22254 }, { "epoch": 0.39412574656767035, "grad_norm": 1.0440993309020996, "learning_rate": 2.0723157350317273e-05, "loss": 0.0757, "step": 22255 }, { "epoch": 0.3941434561046988, "grad_norm": 0.8131648302078247, "learning_rate": 2.07223620622404e-05, "loss": 0.0847, "step": 22256 }, { "epoch": 0.3941611656417272, "grad_norm": 0.6595450639724731, "learning_rate": 2.072156675533754e-05, "loss": 0.0925, "step": 22257 }, { "epoch": 0.39417887517875566, "grad_norm": 0.6729006767272949, "learning_rate": 2.0720771429611303e-05, "loss": 0.0717, "step": 22258 }, { "epoch": 0.39419658471578406, "grad_norm": 0.43103501200675964, "learning_rate": 2.0719976085064306e-05, "loss": 0.0898, "step": 22259 }, { "epoch": 0.3942142942528125, "grad_norm": 0.7914212346076965, "learning_rate": 2.0719180721699164e-05, "loss": 0.0519, "step": 22260 }, { "epoch": 0.3942320037898409, "grad_norm": 0.7872007489204407, "learning_rate": 2.07183853395185e-05, "loss": 0.0774, "step": 22261 }, { "epoch": 0.39424971332686937, "grad_norm": 0.6052120327949524, "learning_rate": 2.071758993852493e-05, "loss": 0.0693, "step": 22262 }, { "epoch": 0.39426742286389777, "grad_norm": 0.9024789333343506, "learning_rate": 2.071679451872106e-05, "loss": 0.112, "step": 22263 }, { "epoch": 0.3942851324009262, "grad_norm": 0.5803954601287842, "learning_rate": 2.0715999080109517e-05, "loss": 0.0768, "step": 22264 }, { "epoch": 0.3943028419379546, "grad_norm": 1.0084723234176636, "learning_rate": 2.0715203622692913e-05, "loss": 0.1412, "step": 22265 }, { "epoch": 0.39432055147498307, "grad_norm": 0.4233153164386749, "learning_rate": 2.0714408146473868e-05, "loss": 0.0842, "step": 22266 }, { "epoch": 0.39433826101201147, "grad_norm": 0.5509198904037476, "learning_rate": 2.0713612651455002e-05, "loss": 0.081, "step": 22267 }, { "epoch": 0.3943559705490399, "grad_norm": 0.9162866473197937, "learning_rate": 2.0712817137638923e-05, "loss": 0.0871, "step": 22268 }, { "epoch": 0.3943736800860683, "grad_norm": 1.2872065305709839, "learning_rate": 2.071202160502826e-05, "loss": 0.1047, "step": 22269 }, { "epoch": 0.3943913896230968, "grad_norm": 0.8662809729576111, "learning_rate": 2.071122605362562e-05, "loss": 0.0991, "step": 22270 }, { "epoch": 0.39440909916012523, "grad_norm": 0.8007125854492188, "learning_rate": 2.0710430483433622e-05, "loss": 0.1019, "step": 22271 }, { "epoch": 0.39442680869715363, "grad_norm": 0.4197680652141571, "learning_rate": 2.070963489445489e-05, "loss": 0.0363, "step": 22272 }, { "epoch": 0.3944445182341821, "grad_norm": 0.7130893468856812, "learning_rate": 2.070883928669203e-05, "loss": 0.0721, "step": 22273 }, { "epoch": 0.3944622277712105, "grad_norm": 1.1647757291793823, "learning_rate": 2.070804366014767e-05, "loss": 0.101, "step": 22274 }, { "epoch": 0.39447993730823894, "grad_norm": 0.9294419884681702, "learning_rate": 2.0707248014824424e-05, "loss": 0.0817, "step": 22275 }, { "epoch": 0.39449764684526734, "grad_norm": 0.5743036270141602, "learning_rate": 2.070645235072491e-05, "loss": 0.0791, "step": 22276 }, { "epoch": 0.3945153563822958, "grad_norm": 0.6450696587562561, "learning_rate": 2.070565666785174e-05, "loss": 0.0852, "step": 22277 }, { "epoch": 0.3945330659193242, "grad_norm": 1.2340357303619385, "learning_rate": 2.070486096620754e-05, "loss": 0.1037, "step": 22278 }, { "epoch": 0.39455077545635264, "grad_norm": 0.7243004441261292, "learning_rate": 2.0704065245794926e-05, "loss": 0.0546, "step": 22279 }, { "epoch": 0.39456848499338104, "grad_norm": 0.7066050171852112, "learning_rate": 2.0703269506616512e-05, "loss": 0.0823, "step": 22280 }, { "epoch": 0.3945861945304095, "grad_norm": 0.5193068981170654, "learning_rate": 2.0702473748674922e-05, "loss": 0.0801, "step": 22281 }, { "epoch": 0.3946039040674379, "grad_norm": 0.5052165985107422, "learning_rate": 2.0701677971972766e-05, "loss": 0.0673, "step": 22282 }, { "epoch": 0.39462161360446635, "grad_norm": 0.7017740607261658, "learning_rate": 2.070088217651267e-05, "loss": 0.0769, "step": 22283 }, { "epoch": 0.39463932314149475, "grad_norm": 0.23311938345432281, "learning_rate": 2.0700086362297242e-05, "loss": 0.0909, "step": 22284 }, { "epoch": 0.3946570326785232, "grad_norm": 0.864737331867218, "learning_rate": 2.069929052932911e-05, "loss": 0.0522, "step": 22285 }, { "epoch": 0.39467474221555165, "grad_norm": 0.6490874290466309, "learning_rate": 2.0698494677610893e-05, "loss": 0.0469, "step": 22286 }, { "epoch": 0.39469245175258005, "grad_norm": 0.6049473285675049, "learning_rate": 2.0697698807145198e-05, "loss": 0.0525, "step": 22287 }, { "epoch": 0.3947101612896085, "grad_norm": 0.9270645380020142, "learning_rate": 2.0696902917934657e-05, "loss": 0.088, "step": 22288 }, { "epoch": 0.3947278708266369, "grad_norm": 0.731309711933136, "learning_rate": 2.0696107009981872e-05, "loss": 0.1232, "step": 22289 }, { "epoch": 0.39474558036366536, "grad_norm": 0.7276571393013, "learning_rate": 2.0695311083289476e-05, "loss": 0.0842, "step": 22290 }, { "epoch": 0.39476328990069376, "grad_norm": 0.5108351111412048, "learning_rate": 2.069451513786008e-05, "loss": 0.0607, "step": 22291 }, { "epoch": 0.3947809994377222, "grad_norm": 0.5720744729042053, "learning_rate": 2.0693719173696313e-05, "loss": 0.0781, "step": 22292 }, { "epoch": 0.3947987089747506, "grad_norm": 0.709046483039856, "learning_rate": 2.0692923190800777e-05, "loss": 0.0847, "step": 22293 }, { "epoch": 0.39481641851177907, "grad_norm": 0.5193805694580078, "learning_rate": 2.0692127189176103e-05, "loss": 0.0685, "step": 22294 }, { "epoch": 0.39483412804880746, "grad_norm": 0.4804653823375702, "learning_rate": 2.0691331168824913e-05, "loss": 0.054, "step": 22295 }, { "epoch": 0.3948518375858359, "grad_norm": 0.6356785297393799, "learning_rate": 2.069053512974981e-05, "loss": 0.0819, "step": 22296 }, { "epoch": 0.3948695471228643, "grad_norm": 1.1670781373977661, "learning_rate": 2.068973907195343e-05, "loss": 0.1281, "step": 22297 }, { "epoch": 0.39488725665989277, "grad_norm": 0.5479735732078552, "learning_rate": 2.0688942995438377e-05, "loss": 0.083, "step": 22298 }, { "epoch": 0.39490496619692117, "grad_norm": 0.6908315420150757, "learning_rate": 2.0688146900207275e-05, "loss": 0.1053, "step": 22299 }, { "epoch": 0.3949226757339496, "grad_norm": 0.8570178151130676, "learning_rate": 2.068735078626275e-05, "loss": 0.0913, "step": 22300 }, { "epoch": 0.3949403852709781, "grad_norm": 0.9934573173522949, "learning_rate": 2.0686554653607417e-05, "loss": 0.0955, "step": 22301 }, { "epoch": 0.3949580948080065, "grad_norm": 1.0033371448516846, "learning_rate": 2.0685758502243893e-05, "loss": 0.0663, "step": 22302 }, { "epoch": 0.39497580434503493, "grad_norm": 0.7729472517967224, "learning_rate": 2.0684962332174794e-05, "loss": 0.0997, "step": 22303 }, { "epoch": 0.39499351388206333, "grad_norm": 0.8117713928222656, "learning_rate": 2.068416614340275e-05, "loss": 0.059, "step": 22304 }, { "epoch": 0.3950112234190918, "grad_norm": 1.063141942024231, "learning_rate": 2.068336993593037e-05, "loss": 0.1079, "step": 22305 }, { "epoch": 0.3950289329561202, "grad_norm": 0.64264976978302, "learning_rate": 2.0682573709760284e-05, "loss": 0.0934, "step": 22306 }, { "epoch": 0.39504664249314864, "grad_norm": 0.6178790926933289, "learning_rate": 2.0681777464895096e-05, "loss": 0.1076, "step": 22307 }, { "epoch": 0.39506435203017704, "grad_norm": 0.8719605803489685, "learning_rate": 2.068098120133744e-05, "loss": 0.0838, "step": 22308 }, { "epoch": 0.3950820615672055, "grad_norm": 0.7442482113838196, "learning_rate": 2.0680184919089934e-05, "loss": 0.1125, "step": 22309 }, { "epoch": 0.3950997711042339, "grad_norm": 0.7078790664672852, "learning_rate": 2.067938861815519e-05, "loss": 0.069, "step": 22310 }, { "epoch": 0.39511748064126234, "grad_norm": 0.5256430506706238, "learning_rate": 2.067859229853583e-05, "loss": 0.0663, "step": 22311 }, { "epoch": 0.39513519017829074, "grad_norm": 0.5543398261070251, "learning_rate": 2.0677795960234483e-05, "loss": 0.0775, "step": 22312 }, { "epoch": 0.3951528997153192, "grad_norm": 0.46049752831459045, "learning_rate": 2.067699960325375e-05, "loss": 0.0705, "step": 22313 }, { "epoch": 0.3951706092523476, "grad_norm": 0.5790908932685852, "learning_rate": 2.0676203227596274e-05, "loss": 0.0711, "step": 22314 }, { "epoch": 0.39518831878937605, "grad_norm": 0.6535521149635315, "learning_rate": 2.067540683326466e-05, "loss": 0.0653, "step": 22315 }, { "epoch": 0.3952060283264045, "grad_norm": 0.6625264286994934, "learning_rate": 2.0674610420261527e-05, "loss": 0.0735, "step": 22316 }, { "epoch": 0.3952237378634329, "grad_norm": 0.622133731842041, "learning_rate": 2.0673813988589507e-05, "loss": 0.0844, "step": 22317 }, { "epoch": 0.39524144740046135, "grad_norm": 0.6663814187049866, "learning_rate": 2.0673017538251206e-05, "loss": 0.1023, "step": 22318 }, { "epoch": 0.39525915693748975, "grad_norm": 1.1404247283935547, "learning_rate": 2.0672221069249252e-05, "loss": 0.07, "step": 22319 }, { "epoch": 0.3952768664745182, "grad_norm": 0.657558023929596, "learning_rate": 2.0671424581586268e-05, "loss": 0.0723, "step": 22320 }, { "epoch": 0.3952945760115466, "grad_norm": 0.570959746837616, "learning_rate": 2.067062807526487e-05, "loss": 0.0732, "step": 22321 }, { "epoch": 0.39531228554857506, "grad_norm": 0.6613854765892029, "learning_rate": 2.0669831550287678e-05, "loss": 0.0864, "step": 22322 }, { "epoch": 0.39532999508560346, "grad_norm": 0.7551388740539551, "learning_rate": 2.066903500665731e-05, "loss": 0.1216, "step": 22323 }, { "epoch": 0.3953477046226319, "grad_norm": 0.6035403609275818, "learning_rate": 2.0668238444376395e-05, "loss": 0.057, "step": 22324 }, { "epoch": 0.3953654141596603, "grad_norm": 0.5043212175369263, "learning_rate": 2.066744186344755e-05, "loss": 0.0937, "step": 22325 }, { "epoch": 0.39538312369668877, "grad_norm": 0.5416833162307739, "learning_rate": 2.0666645263873392e-05, "loss": 0.0581, "step": 22326 }, { "epoch": 0.39540083323371716, "grad_norm": 0.8392754793167114, "learning_rate": 2.0665848645656545e-05, "loss": 0.0896, "step": 22327 }, { "epoch": 0.3954185427707456, "grad_norm": 1.025862216949463, "learning_rate": 2.0665052008799627e-05, "loss": 0.0752, "step": 22328 }, { "epoch": 0.395436252307774, "grad_norm": 0.355272501707077, "learning_rate": 2.0664255353305264e-05, "loss": 0.0851, "step": 22329 }, { "epoch": 0.39545396184480247, "grad_norm": 0.8515479564666748, "learning_rate": 2.066345867917607e-05, "loss": 0.0739, "step": 22330 }, { "epoch": 0.3954716713818309, "grad_norm": 1.0639879703521729, "learning_rate": 2.0662661986414676e-05, "loss": 0.1082, "step": 22331 }, { "epoch": 0.3954893809188593, "grad_norm": 0.8055950403213501, "learning_rate": 2.0661865275023687e-05, "loss": 0.0825, "step": 22332 }, { "epoch": 0.3955070904558878, "grad_norm": 0.6139385104179382, "learning_rate": 2.0661068545005742e-05, "loss": 0.0723, "step": 22333 }, { "epoch": 0.3955247999929162, "grad_norm": 0.8207120299339294, "learning_rate": 2.0660271796363457e-05, "loss": 0.1026, "step": 22334 }, { "epoch": 0.39554250952994463, "grad_norm": 1.3018845319747925, "learning_rate": 2.0659475029099443e-05, "loss": 0.1171, "step": 22335 }, { "epoch": 0.39556021906697303, "grad_norm": 0.6728647351264954, "learning_rate": 2.0658678243216328e-05, "loss": 0.0587, "step": 22336 }, { "epoch": 0.3955779286040015, "grad_norm": 0.9674417972564697, "learning_rate": 2.0657881438716736e-05, "loss": 0.0946, "step": 22337 }, { "epoch": 0.3955956381410299, "grad_norm": 0.6972084045410156, "learning_rate": 2.0657084615603287e-05, "loss": 0.0616, "step": 22338 }, { "epoch": 0.39561334767805834, "grad_norm": 0.7730160355567932, "learning_rate": 2.0656287773878603e-05, "loss": 0.1277, "step": 22339 }, { "epoch": 0.39563105721508673, "grad_norm": 0.5916057229042053, "learning_rate": 2.0655490913545306e-05, "loss": 0.0683, "step": 22340 }, { "epoch": 0.3956487667521152, "grad_norm": 0.8077991008758545, "learning_rate": 2.065469403460601e-05, "loss": 0.076, "step": 22341 }, { "epoch": 0.3956664762891436, "grad_norm": 0.6789605617523193, "learning_rate": 2.0653897137063345e-05, "loss": 0.0519, "step": 22342 }, { "epoch": 0.39568418582617204, "grad_norm": 0.899102509021759, "learning_rate": 2.0653100220919935e-05, "loss": 0.1269, "step": 22343 }, { "epoch": 0.39570189536320044, "grad_norm": 0.7298356890678406, "learning_rate": 2.065230328617839e-05, "loss": 0.0852, "step": 22344 }, { "epoch": 0.3957196049002289, "grad_norm": 0.8603744506835938, "learning_rate": 2.0651506332841346e-05, "loss": 0.1041, "step": 22345 }, { "epoch": 0.39573731443725735, "grad_norm": 0.5363691449165344, "learning_rate": 2.0650709360911408e-05, "loss": 0.0813, "step": 22346 }, { "epoch": 0.39575502397428575, "grad_norm": 1.2383770942687988, "learning_rate": 2.0649912370391214e-05, "loss": 0.123, "step": 22347 }, { "epoch": 0.3957727335113142, "grad_norm": 0.588360607624054, "learning_rate": 2.0649115361283377e-05, "loss": 0.0768, "step": 22348 }, { "epoch": 0.3957904430483426, "grad_norm": 1.0146045684814453, "learning_rate": 2.0648318333590526e-05, "loss": 0.1044, "step": 22349 }, { "epoch": 0.39580815258537105, "grad_norm": 0.8521865010261536, "learning_rate": 2.064752128731528e-05, "loss": 0.0926, "step": 22350 }, { "epoch": 0.39582586212239945, "grad_norm": 0.6897615790367126, "learning_rate": 2.0646724222460253e-05, "loss": 0.0795, "step": 22351 }, { "epoch": 0.3958435716594279, "grad_norm": 0.7705371975898743, "learning_rate": 2.0645927139028084e-05, "loss": 0.1057, "step": 22352 }, { "epoch": 0.3958612811964563, "grad_norm": 0.5598341822624207, "learning_rate": 2.0645130037021376e-05, "loss": 0.1036, "step": 22353 }, { "epoch": 0.39587899073348476, "grad_norm": 0.7884498834609985, "learning_rate": 2.0644332916442766e-05, "loss": 0.109, "step": 22354 }, { "epoch": 0.39589670027051316, "grad_norm": 0.9636673331260681, "learning_rate": 2.064353577729487e-05, "loss": 0.1098, "step": 22355 }, { "epoch": 0.3959144098075416, "grad_norm": 0.6610617637634277, "learning_rate": 2.0642738619580313e-05, "loss": 0.0881, "step": 22356 }, { "epoch": 0.39593211934457, "grad_norm": 0.8259115219116211, "learning_rate": 2.064194144330172e-05, "loss": 0.0589, "step": 22357 }, { "epoch": 0.39594982888159846, "grad_norm": 0.7569223642349243, "learning_rate": 2.0641144248461704e-05, "loss": 0.0842, "step": 22358 }, { "epoch": 0.3959675384186269, "grad_norm": 0.7223590612411499, "learning_rate": 2.06403470350629e-05, "loss": 0.1056, "step": 22359 }, { "epoch": 0.3959852479556553, "grad_norm": 0.5454311370849609, "learning_rate": 2.063954980310792e-05, "loss": 0.1135, "step": 22360 }, { "epoch": 0.39600295749268377, "grad_norm": 0.728864848613739, "learning_rate": 2.0638752552599394e-05, "loss": 0.097, "step": 22361 }, { "epoch": 0.39602066702971217, "grad_norm": 0.8776479363441467, "learning_rate": 2.0637955283539937e-05, "loss": 0.0891, "step": 22362 }, { "epoch": 0.3960383765667406, "grad_norm": 0.7656279802322388, "learning_rate": 2.0637157995932187e-05, "loss": 0.0932, "step": 22363 }, { "epoch": 0.396056086103769, "grad_norm": 0.5835768580436707, "learning_rate": 2.063636068977875e-05, "loss": 0.0816, "step": 22364 }, { "epoch": 0.3960737956407975, "grad_norm": 0.6467065811157227, "learning_rate": 2.0635563365082257e-05, "loss": 0.0868, "step": 22365 }, { "epoch": 0.3960915051778259, "grad_norm": 0.6073245406150818, "learning_rate": 2.0634766021845333e-05, "loss": 0.0854, "step": 22366 }, { "epoch": 0.39610921471485433, "grad_norm": 0.857012152671814, "learning_rate": 2.0633968660070602e-05, "loss": 0.1102, "step": 22367 }, { "epoch": 0.39612692425188273, "grad_norm": 0.8493871688842773, "learning_rate": 2.063317127976068e-05, "loss": 0.0975, "step": 22368 }, { "epoch": 0.3961446337889112, "grad_norm": 0.9568703770637512, "learning_rate": 2.063237388091819e-05, "loss": 0.097, "step": 22369 }, { "epoch": 0.3961623433259396, "grad_norm": 1.0902019739151, "learning_rate": 2.0631576463545765e-05, "loss": 0.0884, "step": 22370 }, { "epoch": 0.39618005286296804, "grad_norm": 0.6567729711532593, "learning_rate": 2.0630779027646022e-05, "loss": 0.0786, "step": 22371 }, { "epoch": 0.39619776239999643, "grad_norm": 0.5666654706001282, "learning_rate": 2.0629981573221587e-05, "loss": 0.0704, "step": 22372 }, { "epoch": 0.3962154719370249, "grad_norm": 0.6340126395225525, "learning_rate": 2.0629184100275086e-05, "loss": 0.0917, "step": 22373 }, { "epoch": 0.39623318147405334, "grad_norm": 0.5117418169975281, "learning_rate": 2.062838660880913e-05, "loss": 0.0529, "step": 22374 }, { "epoch": 0.39625089101108174, "grad_norm": 0.6973430514335632, "learning_rate": 2.0627589098826355e-05, "loss": 0.075, "step": 22375 }, { "epoch": 0.3962686005481102, "grad_norm": 0.5063250064849854, "learning_rate": 2.0626791570329377e-05, "loss": 0.0625, "step": 22376 }, { "epoch": 0.3962863100851386, "grad_norm": 0.6674004197120667, "learning_rate": 2.062599402332083e-05, "loss": 0.0933, "step": 22377 }, { "epoch": 0.39630401962216705, "grad_norm": 1.154977560043335, "learning_rate": 2.062519645780333e-05, "loss": 0.1036, "step": 22378 }, { "epoch": 0.39632172915919545, "grad_norm": 0.7462401986122131, "learning_rate": 2.0624398873779503e-05, "loss": 0.0883, "step": 22379 }, { "epoch": 0.3963394386962239, "grad_norm": 1.3425837755203247, "learning_rate": 2.0623601271251967e-05, "loss": 0.0746, "step": 22380 }, { "epoch": 0.3963571482332523, "grad_norm": 0.8498780131340027, "learning_rate": 2.0622803650223358e-05, "loss": 0.1026, "step": 22381 }, { "epoch": 0.39637485777028075, "grad_norm": 0.46718019247055054, "learning_rate": 2.062200601069629e-05, "loss": 0.0809, "step": 22382 }, { "epoch": 0.39639256730730915, "grad_norm": 1.186360239982605, "learning_rate": 2.0621208352673396e-05, "loss": 0.0723, "step": 22383 }, { "epoch": 0.3964102768443376, "grad_norm": 0.8215749859809875, "learning_rate": 2.0620410676157288e-05, "loss": 0.0844, "step": 22384 }, { "epoch": 0.396427986381366, "grad_norm": 0.7969964146614075, "learning_rate": 2.06196129811506e-05, "loss": 0.0964, "step": 22385 }, { "epoch": 0.39644569591839446, "grad_norm": 0.42903417348861694, "learning_rate": 2.0618815267655954e-05, "loss": 0.0689, "step": 22386 }, { "epoch": 0.39646340545542286, "grad_norm": 0.6777797341346741, "learning_rate": 2.0618017535675977e-05, "loss": 0.0659, "step": 22387 }, { "epoch": 0.3964811149924513, "grad_norm": 0.6160513162612915, "learning_rate": 2.061721978521329e-05, "loss": 0.0828, "step": 22388 }, { "epoch": 0.39649882452947977, "grad_norm": 0.8718293905258179, "learning_rate": 2.0616422016270515e-05, "loss": 0.0852, "step": 22389 }, { "epoch": 0.39651653406650816, "grad_norm": 0.5848712921142578, "learning_rate": 2.061562422885028e-05, "loss": 0.0973, "step": 22390 }, { "epoch": 0.3965342436035366, "grad_norm": 0.9353058338165283, "learning_rate": 2.061482642295521e-05, "loss": 0.1278, "step": 22391 }, { "epoch": 0.396551953140565, "grad_norm": 0.5413122177124023, "learning_rate": 2.061402859858793e-05, "loss": 0.0822, "step": 22392 }, { "epoch": 0.39656966267759347, "grad_norm": 0.7930799126625061, "learning_rate": 2.0613230755751066e-05, "loss": 0.1123, "step": 22393 }, { "epoch": 0.39658737221462187, "grad_norm": 0.43990594148635864, "learning_rate": 2.0612432894447233e-05, "loss": 0.0634, "step": 22394 }, { "epoch": 0.3966050817516503, "grad_norm": 0.7926904559135437, "learning_rate": 2.0611635014679067e-05, "loss": 0.0789, "step": 22395 }, { "epoch": 0.3966227912886787, "grad_norm": 0.8324015140533447, "learning_rate": 2.0610837116449196e-05, "loss": 0.0851, "step": 22396 }, { "epoch": 0.3966405008257072, "grad_norm": 0.759668231010437, "learning_rate": 2.061003919976023e-05, "loss": 0.0904, "step": 22397 }, { "epoch": 0.3966582103627356, "grad_norm": 0.8645749092102051, "learning_rate": 2.0609241264614805e-05, "loss": 0.1061, "step": 22398 }, { "epoch": 0.39667591989976403, "grad_norm": 0.6650944352149963, "learning_rate": 2.060844331101554e-05, "loss": 0.1037, "step": 22399 }, { "epoch": 0.3966936294367924, "grad_norm": 0.6061391830444336, "learning_rate": 2.060764533896507e-05, "loss": 0.0691, "step": 22400 }, { "epoch": 0.3967113389738209, "grad_norm": 0.7734699249267578, "learning_rate": 2.0606847348466013e-05, "loss": 0.0753, "step": 22401 }, { "epoch": 0.3967290485108493, "grad_norm": 0.5575833916664124, "learning_rate": 2.0606049339520993e-05, "loss": 0.0808, "step": 22402 }, { "epoch": 0.39674675804787773, "grad_norm": 1.0691771507263184, "learning_rate": 2.060525131213264e-05, "loss": 0.1235, "step": 22403 }, { "epoch": 0.3967644675849062, "grad_norm": 0.5290544629096985, "learning_rate": 2.0604453266303574e-05, "loss": 0.0843, "step": 22404 }, { "epoch": 0.3967821771219346, "grad_norm": 0.27378320693969727, "learning_rate": 2.0603655202036427e-05, "loss": 0.0912, "step": 22405 }, { "epoch": 0.39679988665896304, "grad_norm": 0.7562194466590881, "learning_rate": 2.0602857119333818e-05, "loss": 0.0815, "step": 22406 }, { "epoch": 0.39681759619599144, "grad_norm": 0.5191016793251038, "learning_rate": 2.060205901819838e-05, "loss": 0.0701, "step": 22407 }, { "epoch": 0.3968353057330199, "grad_norm": 0.3682962954044342, "learning_rate": 2.060126089863273e-05, "loss": 0.0597, "step": 22408 }, { "epoch": 0.3968530152700483, "grad_norm": 0.9129227995872498, "learning_rate": 2.0600462760639496e-05, "loss": 0.0548, "step": 22409 }, { "epoch": 0.39687072480707675, "grad_norm": 0.6545965671539307, "learning_rate": 2.0599664604221314e-05, "loss": 0.0907, "step": 22410 }, { "epoch": 0.39688843434410515, "grad_norm": 1.016356110572815, "learning_rate": 2.05988664293808e-05, "loss": 0.0857, "step": 22411 }, { "epoch": 0.3969061438811336, "grad_norm": 0.55170738697052, "learning_rate": 2.059806823612058e-05, "loss": 0.0692, "step": 22412 }, { "epoch": 0.396923853418162, "grad_norm": 0.9006198048591614, "learning_rate": 2.0597270024443275e-05, "loss": 0.0846, "step": 22413 }, { "epoch": 0.39694156295519045, "grad_norm": 0.7838729023933411, "learning_rate": 2.0596471794351525e-05, "loss": 0.0988, "step": 22414 }, { "epoch": 0.39695927249221885, "grad_norm": 0.9578604102134705, "learning_rate": 2.0595673545847946e-05, "loss": 0.0851, "step": 22415 }, { "epoch": 0.3969769820292473, "grad_norm": 0.7395488023757935, "learning_rate": 2.0594875278935176e-05, "loss": 0.1268, "step": 22416 }, { "epoch": 0.3969946915662757, "grad_norm": 0.9741729497909546, "learning_rate": 2.059407699361582e-05, "loss": 0.1129, "step": 22417 }, { "epoch": 0.39701240110330416, "grad_norm": 0.8211990594863892, "learning_rate": 2.059327868989252e-05, "loss": 0.0734, "step": 22418 }, { "epoch": 0.3970301106403326, "grad_norm": 0.8432009220123291, "learning_rate": 2.05924803677679e-05, "loss": 0.0715, "step": 22419 }, { "epoch": 0.397047820177361, "grad_norm": 0.8215579390525818, "learning_rate": 2.059168202724459e-05, "loss": 0.0852, "step": 22420 }, { "epoch": 0.39706552971438946, "grad_norm": 1.0377094745635986, "learning_rate": 2.0590883668325207e-05, "loss": 0.0924, "step": 22421 }, { "epoch": 0.39708323925141786, "grad_norm": 0.7351133227348328, "learning_rate": 2.059008529101238e-05, "loss": 0.0841, "step": 22422 }, { "epoch": 0.3971009487884463, "grad_norm": 0.4691277742385864, "learning_rate": 2.058928689530874e-05, "loss": 0.0459, "step": 22423 }, { "epoch": 0.3971186583254747, "grad_norm": 0.7701582908630371, "learning_rate": 2.0588488481216912e-05, "loss": 0.0554, "step": 22424 }, { "epoch": 0.39713636786250317, "grad_norm": 0.6639388799667358, "learning_rate": 2.058769004873952e-05, "loss": 0.0883, "step": 22425 }, { "epoch": 0.39715407739953157, "grad_norm": 0.4120662212371826, "learning_rate": 2.0586891597879195e-05, "loss": 0.0832, "step": 22426 }, { "epoch": 0.39717178693656, "grad_norm": 0.5511384010314941, "learning_rate": 2.058609312863856e-05, "loss": 0.0971, "step": 22427 }, { "epoch": 0.3971894964735884, "grad_norm": 0.6929892897605896, "learning_rate": 2.0585294641020248e-05, "loss": 0.0985, "step": 22428 }, { "epoch": 0.3972072060106169, "grad_norm": 0.6194030046463013, "learning_rate": 2.0584496135026876e-05, "loss": 0.0756, "step": 22429 }, { "epoch": 0.3972249155476453, "grad_norm": 0.578145444393158, "learning_rate": 2.0583697610661084e-05, "loss": 0.0747, "step": 22430 }, { "epoch": 0.39724262508467373, "grad_norm": 0.7724847197532654, "learning_rate": 2.0582899067925483e-05, "loss": 0.074, "step": 22431 }, { "epoch": 0.3972603346217021, "grad_norm": 0.7510423064231873, "learning_rate": 2.0582100506822714e-05, "loss": 0.1031, "step": 22432 }, { "epoch": 0.3972780441587306, "grad_norm": 0.4737144410610199, "learning_rate": 2.0581301927355396e-05, "loss": 0.055, "step": 22433 }, { "epoch": 0.39729575369575904, "grad_norm": 1.301694393157959, "learning_rate": 2.058050332952616e-05, "loss": 0.0966, "step": 22434 }, { "epoch": 0.39731346323278743, "grad_norm": 0.5973285436630249, "learning_rate": 2.0579704713337633e-05, "loss": 0.0773, "step": 22435 }, { "epoch": 0.3973311727698159, "grad_norm": 0.6567854285240173, "learning_rate": 2.0578906078792445e-05, "loss": 0.0967, "step": 22436 }, { "epoch": 0.3973488823068443, "grad_norm": 0.5331141352653503, "learning_rate": 2.0578107425893212e-05, "loss": 0.0931, "step": 22437 }, { "epoch": 0.39736659184387274, "grad_norm": 0.9154861569404602, "learning_rate": 2.057730875464258e-05, "loss": 0.1074, "step": 22438 }, { "epoch": 0.39738430138090114, "grad_norm": 0.8271469473838806, "learning_rate": 2.057651006504316e-05, "loss": 0.0944, "step": 22439 }, { "epoch": 0.3974020109179296, "grad_norm": 0.701625406742096, "learning_rate": 2.0575711357097585e-05, "loss": 0.0776, "step": 22440 }, { "epoch": 0.397419720454958, "grad_norm": 1.143112301826477, "learning_rate": 2.0574912630808487e-05, "loss": 0.0751, "step": 22441 }, { "epoch": 0.39743742999198645, "grad_norm": 0.6196819543838501, "learning_rate": 2.0574113886178486e-05, "loss": 0.0751, "step": 22442 }, { "epoch": 0.39745513952901484, "grad_norm": 0.6936127543449402, "learning_rate": 2.0573315123210218e-05, "loss": 0.0638, "step": 22443 }, { "epoch": 0.3974728490660433, "grad_norm": 0.9931745529174805, "learning_rate": 2.057251634190631e-05, "loss": 0.0611, "step": 22444 }, { "epoch": 0.3974905586030717, "grad_norm": 0.7335616946220398, "learning_rate": 2.0571717542269383e-05, "loss": 0.0599, "step": 22445 }, { "epoch": 0.39750826814010015, "grad_norm": 0.9340468645095825, "learning_rate": 2.0570918724302067e-05, "loss": 0.0968, "step": 22446 }, { "epoch": 0.39752597767712855, "grad_norm": 0.7859484553337097, "learning_rate": 2.057011988800699e-05, "loss": 0.0751, "step": 22447 }, { "epoch": 0.397543687214157, "grad_norm": 0.49707338213920593, "learning_rate": 2.0569321033386787e-05, "loss": 0.0794, "step": 22448 }, { "epoch": 0.39756139675118546, "grad_norm": 0.7618638277053833, "learning_rate": 2.056852216044408e-05, "loss": 0.0845, "step": 22449 }, { "epoch": 0.39757910628821386, "grad_norm": 0.8288366198539734, "learning_rate": 2.05677232691815e-05, "loss": 0.0883, "step": 22450 }, { "epoch": 0.3975968158252423, "grad_norm": 0.8433703780174255, "learning_rate": 2.056692435960167e-05, "loss": 0.0722, "step": 22451 }, { "epoch": 0.3976145253622707, "grad_norm": 0.6204336285591125, "learning_rate": 2.0566125431707226e-05, "loss": 0.0562, "step": 22452 }, { "epoch": 0.39763223489929916, "grad_norm": 0.5590028762817383, "learning_rate": 2.056532648550079e-05, "loss": 0.0728, "step": 22453 }, { "epoch": 0.39764994443632756, "grad_norm": 0.7012936472892761, "learning_rate": 2.0564527520984992e-05, "loss": 0.0571, "step": 22454 }, { "epoch": 0.397667653973356, "grad_norm": 0.7604981064796448, "learning_rate": 2.0563728538162462e-05, "loss": 0.0922, "step": 22455 }, { "epoch": 0.3976853635103844, "grad_norm": 0.5212352275848389, "learning_rate": 2.0562929537035824e-05, "loss": 0.0557, "step": 22456 }, { "epoch": 0.39770307304741287, "grad_norm": 0.5726014375686646, "learning_rate": 2.0562130517607715e-05, "loss": 0.0857, "step": 22457 }, { "epoch": 0.39772078258444127, "grad_norm": 0.5793603658676147, "learning_rate": 2.0561331479880757e-05, "loss": 0.0885, "step": 22458 }, { "epoch": 0.3977384921214697, "grad_norm": 1.3403328657150269, "learning_rate": 2.0560532423857586e-05, "loss": 0.0681, "step": 22459 }, { "epoch": 0.3977562016584981, "grad_norm": 0.56779944896698, "learning_rate": 2.055973334954082e-05, "loss": 0.0467, "step": 22460 }, { "epoch": 0.3977739111955266, "grad_norm": 0.6802392601966858, "learning_rate": 2.0558934256933096e-05, "loss": 0.0869, "step": 22461 }, { "epoch": 0.397791620732555, "grad_norm": 0.8416557908058167, "learning_rate": 2.0558135146037043e-05, "loss": 0.0837, "step": 22462 }, { "epoch": 0.3978093302695834, "grad_norm": 0.9023711085319519, "learning_rate": 2.055733601685528e-05, "loss": 0.1113, "step": 22463 }, { "epoch": 0.3978270398066119, "grad_norm": 0.5244098901748657, "learning_rate": 2.0556536869390454e-05, "loss": 0.0481, "step": 22464 }, { "epoch": 0.3978447493436403, "grad_norm": 0.9454758763313293, "learning_rate": 2.0555737703645175e-05, "loss": 0.1035, "step": 22465 }, { "epoch": 0.39786245888066873, "grad_norm": 0.7239236235618591, "learning_rate": 2.055493851962208e-05, "loss": 0.0629, "step": 22466 }, { "epoch": 0.39788016841769713, "grad_norm": 0.9214420318603516, "learning_rate": 2.0554139317323808e-05, "loss": 0.1136, "step": 22467 }, { "epoch": 0.3978978779547256, "grad_norm": 0.5476646423339844, "learning_rate": 2.0553340096752973e-05, "loss": 0.0492, "step": 22468 }, { "epoch": 0.397915587491754, "grad_norm": 0.6557180881500244, "learning_rate": 2.0552540857912214e-05, "loss": 0.0894, "step": 22469 }, { "epoch": 0.39793329702878244, "grad_norm": 0.652987539768219, "learning_rate": 2.0551741600804156e-05, "loss": 0.072, "step": 22470 }, { "epoch": 0.39795100656581084, "grad_norm": 0.5805691480636597, "learning_rate": 2.055094232543143e-05, "loss": 0.0941, "step": 22471 }, { "epoch": 0.3979687161028393, "grad_norm": 0.8119324445724487, "learning_rate": 2.055014303179666e-05, "loss": 0.1011, "step": 22472 }, { "epoch": 0.3979864256398677, "grad_norm": 0.6419578790664673, "learning_rate": 2.0549343719902488e-05, "loss": 0.0762, "step": 22473 }, { "epoch": 0.39800413517689615, "grad_norm": 0.3194752633571625, "learning_rate": 2.0548544389751533e-05, "loss": 0.0646, "step": 22474 }, { "epoch": 0.39802184471392454, "grad_norm": 0.24942198395729065, "learning_rate": 2.0547745041346425e-05, "loss": 0.0825, "step": 22475 }, { "epoch": 0.398039554250953, "grad_norm": 0.8430830240249634, "learning_rate": 2.0546945674689804e-05, "loss": 0.0802, "step": 22476 }, { "epoch": 0.3980572637879814, "grad_norm": 0.47736307978630066, "learning_rate": 2.054614628978429e-05, "loss": 0.0913, "step": 22477 }, { "epoch": 0.39807497332500985, "grad_norm": 0.631952166557312, "learning_rate": 2.0545346886632514e-05, "loss": 0.1309, "step": 22478 }, { "epoch": 0.3980926828620383, "grad_norm": 0.5216507911682129, "learning_rate": 2.0544547465237107e-05, "loss": 0.0691, "step": 22479 }, { "epoch": 0.3981103923990667, "grad_norm": 0.5077417492866516, "learning_rate": 2.05437480256007e-05, "loss": 0.074, "step": 22480 }, { "epoch": 0.39812810193609516, "grad_norm": 0.8664531707763672, "learning_rate": 2.0542948567725924e-05, "loss": 0.0798, "step": 22481 }, { "epoch": 0.39814581147312356, "grad_norm": 0.724567174911499, "learning_rate": 2.054214909161541e-05, "loss": 0.0769, "step": 22482 }, { "epoch": 0.398163521010152, "grad_norm": 0.5934800505638123, "learning_rate": 2.0541349597271788e-05, "loss": 0.0814, "step": 22483 }, { "epoch": 0.3981812305471804, "grad_norm": 0.4430384933948517, "learning_rate": 2.0540550084697678e-05, "loss": 0.0519, "step": 22484 }, { "epoch": 0.39819894008420886, "grad_norm": 1.9157816171646118, "learning_rate": 2.0539750553895725e-05, "loss": 0.077, "step": 22485 }, { "epoch": 0.39821664962123726, "grad_norm": 0.5883981585502625, "learning_rate": 2.053895100486855e-05, "loss": 0.0626, "step": 22486 }, { "epoch": 0.3982343591582657, "grad_norm": 0.5102099180221558, "learning_rate": 2.0538151437618788e-05, "loss": 0.0622, "step": 22487 }, { "epoch": 0.3982520686952941, "grad_norm": 0.769161581993103, "learning_rate": 2.0537351852149067e-05, "loss": 0.0812, "step": 22488 }, { "epoch": 0.39826977823232257, "grad_norm": 0.5921483039855957, "learning_rate": 2.053655224846202e-05, "loss": 0.0598, "step": 22489 }, { "epoch": 0.39828748776935097, "grad_norm": 0.8593654036521912, "learning_rate": 2.053575262656027e-05, "loss": 0.0886, "step": 22490 }, { "epoch": 0.3983051973063794, "grad_norm": 0.6910449862480164, "learning_rate": 2.053495298644646e-05, "loss": 0.0611, "step": 22491 }, { "epoch": 0.3983229068434078, "grad_norm": 0.511385440826416, "learning_rate": 2.0534153328123215e-05, "loss": 0.0856, "step": 22492 }, { "epoch": 0.3983406163804363, "grad_norm": 0.6577861309051514, "learning_rate": 2.053335365159316e-05, "loss": 0.0844, "step": 22493 }, { "epoch": 0.39835832591746473, "grad_norm": 0.5800166726112366, "learning_rate": 2.0532553956858936e-05, "loss": 0.0619, "step": 22494 }, { "epoch": 0.3983760354544931, "grad_norm": 0.714309811592102, "learning_rate": 2.0531754243923164e-05, "loss": 0.1055, "step": 22495 }, { "epoch": 0.3983937449915216, "grad_norm": 0.5461762547492981, "learning_rate": 2.0530954512788486e-05, "loss": 0.056, "step": 22496 }, { "epoch": 0.39841145452855, "grad_norm": 0.5170645713806152, "learning_rate": 2.0530154763457524e-05, "loss": 0.0722, "step": 22497 }, { "epoch": 0.39842916406557843, "grad_norm": 1.342166543006897, "learning_rate": 2.0529354995932914e-05, "loss": 0.0835, "step": 22498 }, { "epoch": 0.39844687360260683, "grad_norm": 0.7315854430198669, "learning_rate": 2.0528555210217283e-05, "loss": 0.1102, "step": 22499 }, { "epoch": 0.3984645831396353, "grad_norm": 1.2892831563949585, "learning_rate": 2.0527755406313265e-05, "loss": 0.0866, "step": 22500 }, { "epoch": 0.3984822926766637, "grad_norm": 0.3754717707633972, "learning_rate": 2.0526955584223495e-05, "loss": 0.0583, "step": 22501 }, { "epoch": 0.39850000221369214, "grad_norm": 0.6466554403305054, "learning_rate": 2.0526155743950593e-05, "loss": 0.0659, "step": 22502 }, { "epoch": 0.39851771175072054, "grad_norm": 0.552636981010437, "learning_rate": 2.0525355885497203e-05, "loss": 0.0764, "step": 22503 }, { "epoch": 0.398535421287749, "grad_norm": 0.9310035109519958, "learning_rate": 2.0524556008865948e-05, "loss": 0.0661, "step": 22504 }, { "epoch": 0.3985531308247774, "grad_norm": 0.5619072318077087, "learning_rate": 2.0523756114059462e-05, "loss": 0.0802, "step": 22505 }, { "epoch": 0.39857084036180584, "grad_norm": 0.741547167301178, "learning_rate": 2.052295620108038e-05, "loss": 0.0798, "step": 22506 }, { "epoch": 0.39858854989883424, "grad_norm": 0.7857577800750732, "learning_rate": 2.052215626993133e-05, "loss": 0.05, "step": 22507 }, { "epoch": 0.3986062594358627, "grad_norm": 0.6161087155342102, "learning_rate": 2.0521356320614943e-05, "loss": 0.0846, "step": 22508 }, { "epoch": 0.39862396897289115, "grad_norm": 0.6994085907936096, "learning_rate": 2.052055635313385e-05, "loss": 0.1111, "step": 22509 }, { "epoch": 0.39864167850991955, "grad_norm": 0.967191219329834, "learning_rate": 2.051975636749069e-05, "loss": 0.0816, "step": 22510 }, { "epoch": 0.398659388046948, "grad_norm": 0.84612637758255, "learning_rate": 2.0518956363688088e-05, "loss": 0.1172, "step": 22511 }, { "epoch": 0.3986770975839764, "grad_norm": 0.4834648668766022, "learning_rate": 2.0518156341728682e-05, "loss": 0.0587, "step": 22512 }, { "epoch": 0.39869480712100486, "grad_norm": 0.6952138543128967, "learning_rate": 2.051735630161509e-05, "loss": 0.0868, "step": 22513 }, { "epoch": 0.39871251665803326, "grad_norm": 0.6002312898635864, "learning_rate": 2.051655624334996e-05, "loss": 0.0906, "step": 22514 }, { "epoch": 0.3987302261950617, "grad_norm": 0.7260414958000183, "learning_rate": 2.051575616693592e-05, "loss": 0.0763, "step": 22515 }, { "epoch": 0.3987479357320901, "grad_norm": 0.6877250671386719, "learning_rate": 2.0514956072375593e-05, "loss": 0.0775, "step": 22516 }, { "epoch": 0.39876564526911856, "grad_norm": 0.609024703502655, "learning_rate": 2.0514155959671627e-05, "loss": 0.0798, "step": 22517 }, { "epoch": 0.39878335480614696, "grad_norm": 0.42358508706092834, "learning_rate": 2.051335582882664e-05, "loss": 0.0574, "step": 22518 }, { "epoch": 0.3988010643431754, "grad_norm": 0.8805298209190369, "learning_rate": 2.0512555679843268e-05, "loss": 0.1026, "step": 22519 }, { "epoch": 0.3988187738802038, "grad_norm": 0.8423239588737488, "learning_rate": 2.051175551272415e-05, "loss": 0.0773, "step": 22520 }, { "epoch": 0.39883648341723227, "grad_norm": 0.40373843908309937, "learning_rate": 2.0510955327471913e-05, "loss": 0.0624, "step": 22521 }, { "epoch": 0.39885419295426067, "grad_norm": 0.5225907564163208, "learning_rate": 2.0510155124089187e-05, "loss": 0.0927, "step": 22522 }, { "epoch": 0.3988719024912891, "grad_norm": 0.6063756942749023, "learning_rate": 2.050935490257861e-05, "loss": 0.1065, "step": 22523 }, { "epoch": 0.3988896120283176, "grad_norm": 0.7558119297027588, "learning_rate": 2.0508554662942814e-05, "loss": 0.114, "step": 22524 }, { "epoch": 0.398907321565346, "grad_norm": 0.3590787649154663, "learning_rate": 2.0507754405184432e-05, "loss": 0.071, "step": 22525 }, { "epoch": 0.39892503110237443, "grad_norm": 1.378920078277588, "learning_rate": 2.0506954129306094e-05, "loss": 0.09, "step": 22526 }, { "epoch": 0.3989427406394028, "grad_norm": 0.45691078901290894, "learning_rate": 2.050615383531043e-05, "loss": 0.0527, "step": 22527 }, { "epoch": 0.3989604501764313, "grad_norm": 0.7415266036987305, "learning_rate": 2.050535352320008e-05, "loss": 0.0651, "step": 22528 }, { "epoch": 0.3989781597134597, "grad_norm": 0.5754692554473877, "learning_rate": 2.050455319297767e-05, "loss": 0.0782, "step": 22529 }, { "epoch": 0.39899586925048813, "grad_norm": 0.659762978553772, "learning_rate": 2.0503752844645843e-05, "loss": 0.1026, "step": 22530 }, { "epoch": 0.39901357878751653, "grad_norm": 1.1325299739837646, "learning_rate": 2.0502952478207225e-05, "loss": 0.0764, "step": 22531 }, { "epoch": 0.399031288324545, "grad_norm": 0.7033260464668274, "learning_rate": 2.0502152093664446e-05, "loss": 0.0768, "step": 22532 }, { "epoch": 0.3990489978615734, "grad_norm": 0.7038958072662354, "learning_rate": 2.0501351691020143e-05, "loss": 0.072, "step": 22533 }, { "epoch": 0.39906670739860184, "grad_norm": 0.8371983170509338, "learning_rate": 2.0500551270276952e-05, "loss": 0.1176, "step": 22534 }, { "epoch": 0.39908441693563024, "grad_norm": 0.7475382685661316, "learning_rate": 2.0499750831437505e-05, "loss": 0.0676, "step": 22535 }, { "epoch": 0.3991021264726587, "grad_norm": 0.8874193429946899, "learning_rate": 2.0498950374504428e-05, "loss": 0.0879, "step": 22536 }, { "epoch": 0.3991198360096871, "grad_norm": 0.674537718296051, "learning_rate": 2.0498149899480364e-05, "loss": 0.1008, "step": 22537 }, { "epoch": 0.39913754554671554, "grad_norm": 0.6070966720581055, "learning_rate": 2.0497349406367944e-05, "loss": 0.0698, "step": 22538 }, { "epoch": 0.399155255083744, "grad_norm": 0.9097142815589905, "learning_rate": 2.04965488951698e-05, "loss": 0.1211, "step": 22539 }, { "epoch": 0.3991729646207724, "grad_norm": 0.849520206451416, "learning_rate": 2.0495748365888566e-05, "loss": 0.093, "step": 22540 }, { "epoch": 0.39919067415780085, "grad_norm": 0.8688774704933167, "learning_rate": 2.0494947818526874e-05, "loss": 0.072, "step": 22541 }, { "epoch": 0.39920838369482925, "grad_norm": 0.5110467672348022, "learning_rate": 2.0494147253087362e-05, "loss": 0.0532, "step": 22542 }, { "epoch": 0.3992260932318577, "grad_norm": 0.990472674369812, "learning_rate": 2.049334666957266e-05, "loss": 0.0826, "step": 22543 }, { "epoch": 0.3992438027688861, "grad_norm": 0.8149856328964233, "learning_rate": 2.0492546067985405e-05, "loss": 0.104, "step": 22544 }, { "epoch": 0.39926151230591456, "grad_norm": 0.669133186340332, "learning_rate": 2.0491745448328226e-05, "loss": 0.0776, "step": 22545 }, { "epoch": 0.39927922184294296, "grad_norm": 0.6786996722221375, "learning_rate": 2.0490944810603762e-05, "loss": 0.0921, "step": 22546 }, { "epoch": 0.3992969313799714, "grad_norm": 0.6377934217453003, "learning_rate": 2.0490144154814644e-05, "loss": 0.0501, "step": 22547 }, { "epoch": 0.3993146409169998, "grad_norm": 0.9679386615753174, "learning_rate": 2.0489343480963503e-05, "loss": 0.1053, "step": 22548 }, { "epoch": 0.39933235045402826, "grad_norm": 0.5200783014297485, "learning_rate": 2.0488542789052987e-05, "loss": 0.0619, "step": 22549 }, { "epoch": 0.39935005999105666, "grad_norm": 0.5578212141990662, "learning_rate": 2.0487742079085713e-05, "loss": 0.0496, "step": 22550 }, { "epoch": 0.3993677695280851, "grad_norm": 0.7197826504707336, "learning_rate": 2.0486941351064325e-05, "loss": 0.1126, "step": 22551 }, { "epoch": 0.3993854790651135, "grad_norm": 0.8164398670196533, "learning_rate": 2.048614060499145e-05, "loss": 0.0985, "step": 22552 }, { "epoch": 0.39940318860214197, "grad_norm": 0.6709662675857544, "learning_rate": 2.0485339840869734e-05, "loss": 0.0822, "step": 22553 }, { "epoch": 0.3994208981391704, "grad_norm": 0.4589053988456726, "learning_rate": 2.0484539058701803e-05, "loss": 0.066, "step": 22554 }, { "epoch": 0.3994386076761988, "grad_norm": 0.7352138161659241, "learning_rate": 2.048373825849029e-05, "loss": 0.0637, "step": 22555 }, { "epoch": 0.3994563172132273, "grad_norm": 0.5782555937767029, "learning_rate": 2.0482937440237837e-05, "loss": 0.078, "step": 22556 }, { "epoch": 0.3994740267502557, "grad_norm": 0.7346488237380981, "learning_rate": 2.048213660394707e-05, "loss": 0.0748, "step": 22557 }, { "epoch": 0.3994917362872841, "grad_norm": 0.7072421312332153, "learning_rate": 2.0481335749620634e-05, "loss": 0.0798, "step": 22558 }, { "epoch": 0.3995094458243125, "grad_norm": 0.6696862578392029, "learning_rate": 2.0480534877261152e-05, "loss": 0.0942, "step": 22559 }, { "epoch": 0.399527155361341, "grad_norm": 0.6761930584907532, "learning_rate": 2.047973398687127e-05, "loss": 0.0604, "step": 22560 }, { "epoch": 0.3995448648983694, "grad_norm": 0.7242625951766968, "learning_rate": 2.0478933078453613e-05, "loss": 0.1013, "step": 22561 }, { "epoch": 0.39956257443539783, "grad_norm": 0.8378974795341492, "learning_rate": 2.0478132152010817e-05, "loss": 0.0724, "step": 22562 }, { "epoch": 0.39958028397242623, "grad_norm": 0.6684213876724243, "learning_rate": 2.047733120754553e-05, "loss": 0.0954, "step": 22563 }, { "epoch": 0.3995979935094547, "grad_norm": 0.6131780743598938, "learning_rate": 2.047653024506037e-05, "loss": 0.0591, "step": 22564 }, { "epoch": 0.3996157030464831, "grad_norm": 0.7636157274246216, "learning_rate": 2.0475729264557985e-05, "loss": 0.0901, "step": 22565 }, { "epoch": 0.39963341258351154, "grad_norm": 0.6818345785140991, "learning_rate": 2.0474928266040996e-05, "loss": 0.0893, "step": 22566 }, { "epoch": 0.39965112212053994, "grad_norm": 0.6990582346916199, "learning_rate": 2.0474127249512055e-05, "loss": 0.0877, "step": 22567 }, { "epoch": 0.3996688316575684, "grad_norm": 0.5788012742996216, "learning_rate": 2.0473326214973784e-05, "loss": 0.0845, "step": 22568 }, { "epoch": 0.39968654119459684, "grad_norm": 0.9873561859130859, "learning_rate": 2.047252516242883e-05, "loss": 0.0737, "step": 22569 }, { "epoch": 0.39970425073162524, "grad_norm": 0.7992565631866455, "learning_rate": 2.0471724091879813e-05, "loss": 0.0709, "step": 22570 }, { "epoch": 0.3997219602686537, "grad_norm": 0.7794461846351624, "learning_rate": 2.0470923003329382e-05, "loss": 0.0869, "step": 22571 }, { "epoch": 0.3997396698056821, "grad_norm": 0.7104654908180237, "learning_rate": 2.0470121896780168e-05, "loss": 0.1038, "step": 22572 }, { "epoch": 0.39975737934271055, "grad_norm": 0.7441607713699341, "learning_rate": 2.0469320772234804e-05, "loss": 0.1036, "step": 22573 }, { "epoch": 0.39977508887973895, "grad_norm": 0.4396601617336273, "learning_rate": 2.0468519629695932e-05, "loss": 0.0637, "step": 22574 }, { "epoch": 0.3997927984167674, "grad_norm": 0.6081957817077637, "learning_rate": 2.046771846916618e-05, "loss": 0.0613, "step": 22575 }, { "epoch": 0.3998105079537958, "grad_norm": 0.6465696096420288, "learning_rate": 2.0466917290648187e-05, "loss": 0.0579, "step": 22576 }, { "epoch": 0.39982821749082426, "grad_norm": 0.7394546270370483, "learning_rate": 2.046611609414459e-05, "loss": 0.0724, "step": 22577 }, { "epoch": 0.39984592702785265, "grad_norm": 0.6911370754241943, "learning_rate": 2.0465314879658024e-05, "loss": 0.0833, "step": 22578 }, { "epoch": 0.3998636365648811, "grad_norm": 0.5890784859657288, "learning_rate": 2.0464513647191125e-05, "loss": 0.0961, "step": 22579 }, { "epoch": 0.3998813461019095, "grad_norm": 0.4883890748023987, "learning_rate": 2.0463712396746527e-05, "loss": 0.1009, "step": 22580 }, { "epoch": 0.39989905563893796, "grad_norm": 0.8417417407035828, "learning_rate": 2.046291112832687e-05, "loss": 0.0971, "step": 22581 }, { "epoch": 0.39991676517596636, "grad_norm": 0.696938157081604, "learning_rate": 2.0462109841934788e-05, "loss": 0.0673, "step": 22582 }, { "epoch": 0.3999344747129948, "grad_norm": 0.5309481620788574, "learning_rate": 2.0461308537572914e-05, "loss": 0.084, "step": 22583 }, { "epoch": 0.39995218425002327, "grad_norm": 0.6568340063095093, "learning_rate": 2.046050721524389e-05, "loss": 0.0841, "step": 22584 }, { "epoch": 0.39996989378705167, "grad_norm": 0.4602547585964203, "learning_rate": 2.045970587495035e-05, "loss": 0.0329, "step": 22585 }, { "epoch": 0.3999876033240801, "grad_norm": 0.5167988538742065, "learning_rate": 2.045890451669493e-05, "loss": 0.0829, "step": 22586 }, { "epoch": 0.4000053128611085, "grad_norm": 0.707917332649231, "learning_rate": 2.0458103140480267e-05, "loss": 0.0828, "step": 22587 }, { "epoch": 0.400023022398137, "grad_norm": 1.0469201803207397, "learning_rate": 2.0457301746308994e-05, "loss": 0.1016, "step": 22588 }, { "epoch": 0.4000407319351654, "grad_norm": 0.6802067756652832, "learning_rate": 2.045650033418375e-05, "loss": 0.0644, "step": 22589 }, { "epoch": 0.4000584414721938, "grad_norm": 0.7141675353050232, "learning_rate": 2.0455698904107175e-05, "loss": 0.0687, "step": 22590 }, { "epoch": 0.4000761510092222, "grad_norm": 1.0850383043289185, "learning_rate": 2.0454897456081897e-05, "loss": 0.0763, "step": 22591 }, { "epoch": 0.4000938605462507, "grad_norm": 0.6830509305000305, "learning_rate": 2.0454095990110562e-05, "loss": 0.0961, "step": 22592 }, { "epoch": 0.4001115700832791, "grad_norm": 0.776759922504425, "learning_rate": 2.0453294506195803e-05, "loss": 0.0888, "step": 22593 }, { "epoch": 0.40012927962030753, "grad_norm": 0.5568399429321289, "learning_rate": 2.0452493004340256e-05, "loss": 0.0681, "step": 22594 }, { "epoch": 0.40014698915733593, "grad_norm": 0.7182331085205078, "learning_rate": 2.045169148454656e-05, "loss": 0.0991, "step": 22595 }, { "epoch": 0.4001646986943644, "grad_norm": 0.6645980477333069, "learning_rate": 2.0450889946817346e-05, "loss": 0.0932, "step": 22596 }, { "epoch": 0.4001824082313928, "grad_norm": 0.5972999334335327, "learning_rate": 2.045008839115526e-05, "loss": 0.0637, "step": 22597 }, { "epoch": 0.40020011776842124, "grad_norm": 0.8692396879196167, "learning_rate": 2.0449286817562932e-05, "loss": 0.0753, "step": 22598 }, { "epoch": 0.4002178273054497, "grad_norm": 1.186005711555481, "learning_rate": 2.0448485226043002e-05, "loss": 0.0787, "step": 22599 }, { "epoch": 0.4002355368424781, "grad_norm": 0.6677235960960388, "learning_rate": 2.0447683616598106e-05, "loss": 0.088, "step": 22600 }, { "epoch": 0.40025324637950654, "grad_norm": 0.6613878607749939, "learning_rate": 2.0446881989230887e-05, "loss": 0.0597, "step": 22601 }, { "epoch": 0.40027095591653494, "grad_norm": 0.8459815979003906, "learning_rate": 2.0446080343943975e-05, "loss": 0.0917, "step": 22602 }, { "epoch": 0.4002886654535634, "grad_norm": 0.7771697640419006, "learning_rate": 2.0445278680740006e-05, "loss": 0.0824, "step": 22603 }, { "epoch": 0.4003063749905918, "grad_norm": 0.8607004880905151, "learning_rate": 2.044447699962162e-05, "loss": 0.0896, "step": 22604 }, { "epoch": 0.40032408452762025, "grad_norm": 0.542561411857605, "learning_rate": 2.044367530059146e-05, "loss": 0.0654, "step": 22605 }, { "epoch": 0.40034179406464865, "grad_norm": 0.6813545823097229, "learning_rate": 2.044287358365216e-05, "loss": 0.0839, "step": 22606 }, { "epoch": 0.4003595036016771, "grad_norm": 0.5473136305809021, "learning_rate": 2.044207184880635e-05, "loss": 0.0647, "step": 22607 }, { "epoch": 0.4003772131387055, "grad_norm": 0.8132180571556091, "learning_rate": 2.044127009605668e-05, "loss": 0.1, "step": 22608 }, { "epoch": 0.40039492267573396, "grad_norm": 0.8187912106513977, "learning_rate": 2.0440468325405777e-05, "loss": 0.0905, "step": 22609 }, { "epoch": 0.40041263221276235, "grad_norm": 0.7409238219261169, "learning_rate": 2.0439666536856282e-05, "loss": 0.0664, "step": 22610 }, { "epoch": 0.4004303417497908, "grad_norm": 0.9809882640838623, "learning_rate": 2.0438864730410842e-05, "loss": 0.1005, "step": 22611 }, { "epoch": 0.4004480512868192, "grad_norm": 1.037016749382019, "learning_rate": 2.0438062906072078e-05, "loss": 0.0886, "step": 22612 }, { "epoch": 0.40046576082384766, "grad_norm": 0.4636889398097992, "learning_rate": 2.0437261063842643e-05, "loss": 0.0515, "step": 22613 }, { "epoch": 0.4004834703608761, "grad_norm": 0.5248458981513977, "learning_rate": 2.0436459203725167e-05, "loss": 0.0847, "step": 22614 }, { "epoch": 0.4005011798979045, "grad_norm": 0.5752886533737183, "learning_rate": 2.0435657325722295e-05, "loss": 0.0649, "step": 22615 }, { "epoch": 0.40051888943493297, "grad_norm": 0.741244375705719, "learning_rate": 2.0434855429836654e-05, "loss": 0.0828, "step": 22616 }, { "epoch": 0.40053659897196137, "grad_norm": 0.7293193936347961, "learning_rate": 2.0434053516070893e-05, "loss": 0.0644, "step": 22617 }, { "epoch": 0.4005543085089898, "grad_norm": 0.6647514700889587, "learning_rate": 2.0433251584427643e-05, "loss": 0.061, "step": 22618 }, { "epoch": 0.4005720180460182, "grad_norm": 0.5420827865600586, "learning_rate": 2.043244963490954e-05, "loss": 0.0689, "step": 22619 }, { "epoch": 0.4005897275830467, "grad_norm": 0.7289344668388367, "learning_rate": 2.0431647667519237e-05, "loss": 0.0836, "step": 22620 }, { "epoch": 0.40060743712007507, "grad_norm": 0.6475109457969666, "learning_rate": 2.0430845682259354e-05, "loss": 0.0922, "step": 22621 }, { "epoch": 0.4006251466571035, "grad_norm": 0.4647665023803711, "learning_rate": 2.0430043679132545e-05, "loss": 0.0827, "step": 22622 }, { "epoch": 0.4006428561941319, "grad_norm": 0.8615585565567017, "learning_rate": 2.0429241658141435e-05, "loss": 0.0771, "step": 22623 }, { "epoch": 0.4006605657311604, "grad_norm": 0.9756463766098022, "learning_rate": 2.0428439619288674e-05, "loss": 0.1092, "step": 22624 }, { "epoch": 0.4006782752681888, "grad_norm": 0.6428642272949219, "learning_rate": 2.0427637562576893e-05, "loss": 0.0626, "step": 22625 }, { "epoch": 0.40069598480521723, "grad_norm": 0.5237982869148254, "learning_rate": 2.0426835488008737e-05, "loss": 0.0904, "step": 22626 }, { "epoch": 0.4007136943422457, "grad_norm": 0.5184674263000488, "learning_rate": 2.0426033395586837e-05, "loss": 0.088, "step": 22627 }, { "epoch": 0.4007314038792741, "grad_norm": 0.6106225848197937, "learning_rate": 2.0425231285313836e-05, "loss": 0.0661, "step": 22628 }, { "epoch": 0.40074911341630254, "grad_norm": 0.9937909245491028, "learning_rate": 2.0424429157192374e-05, "loss": 0.0783, "step": 22629 }, { "epoch": 0.40076682295333094, "grad_norm": 0.6487653255462646, "learning_rate": 2.042362701122509e-05, "loss": 0.0826, "step": 22630 }, { "epoch": 0.4007845324903594, "grad_norm": 0.6123434901237488, "learning_rate": 2.042282484741462e-05, "loss": 0.0901, "step": 22631 }, { "epoch": 0.4008022420273878, "grad_norm": 1.2602139711380005, "learning_rate": 2.042202266576361e-05, "loss": 0.0909, "step": 22632 }, { "epoch": 0.40081995156441624, "grad_norm": 0.37022215127944946, "learning_rate": 2.0421220466274685e-05, "loss": 0.0611, "step": 22633 }, { "epoch": 0.40083766110144464, "grad_norm": 0.702660083770752, "learning_rate": 2.04204182489505e-05, "loss": 0.0544, "step": 22634 }, { "epoch": 0.4008553706384731, "grad_norm": 0.7550207376480103, "learning_rate": 2.041961601379368e-05, "loss": 0.0635, "step": 22635 }, { "epoch": 0.4008730801755015, "grad_norm": 0.9484726190567017, "learning_rate": 2.041881376080688e-05, "loss": 0.0775, "step": 22636 }, { "epoch": 0.40089078971252995, "grad_norm": 0.8560688495635986, "learning_rate": 2.0418011489992727e-05, "loss": 0.1154, "step": 22637 }, { "epoch": 0.40090849924955835, "grad_norm": 0.5837262272834778, "learning_rate": 2.0417209201353863e-05, "loss": 0.0592, "step": 22638 }, { "epoch": 0.4009262087865868, "grad_norm": 0.9810382127761841, "learning_rate": 2.041640689489293e-05, "loss": 0.0894, "step": 22639 }, { "epoch": 0.4009439183236152, "grad_norm": 0.7608053088188171, "learning_rate": 2.0415604570612567e-05, "loss": 0.1132, "step": 22640 }, { "epoch": 0.40096162786064365, "grad_norm": 0.8772740364074707, "learning_rate": 2.041480222851541e-05, "loss": 0.0887, "step": 22641 }, { "epoch": 0.4009793373976721, "grad_norm": 0.5650457143783569, "learning_rate": 2.0413999868604104e-05, "loss": 0.0792, "step": 22642 }, { "epoch": 0.4009970469347005, "grad_norm": 0.5623883008956909, "learning_rate": 2.0413197490881283e-05, "loss": 0.0874, "step": 22643 }, { "epoch": 0.40101475647172896, "grad_norm": 0.5450530052185059, "learning_rate": 2.0412395095349592e-05, "loss": 0.082, "step": 22644 }, { "epoch": 0.40103246600875736, "grad_norm": 0.5154597163200378, "learning_rate": 2.041159268201167e-05, "loss": 0.08, "step": 22645 }, { "epoch": 0.4010501755457858, "grad_norm": 0.42527031898498535, "learning_rate": 2.0410790250870153e-05, "loss": 0.0564, "step": 22646 }, { "epoch": 0.4010678850828142, "grad_norm": 0.9191917777061462, "learning_rate": 2.0409987801927678e-05, "loss": 0.0963, "step": 22647 }, { "epoch": 0.40108559461984267, "grad_norm": 0.5099489092826843, "learning_rate": 2.04091853351869e-05, "loss": 0.073, "step": 22648 }, { "epoch": 0.40110330415687107, "grad_norm": 0.5774407982826233, "learning_rate": 2.0408382850650445e-05, "loss": 0.0907, "step": 22649 }, { "epoch": 0.4011210136938995, "grad_norm": 0.4674478769302368, "learning_rate": 2.040758034832096e-05, "loss": 0.0819, "step": 22650 }, { "epoch": 0.4011387232309279, "grad_norm": 0.5182493925094604, "learning_rate": 2.040677782820108e-05, "loss": 0.0555, "step": 22651 }, { "epoch": 0.4011564327679564, "grad_norm": 0.7853979468345642, "learning_rate": 2.0405975290293447e-05, "loss": 0.1157, "step": 22652 }, { "epoch": 0.40117414230498477, "grad_norm": 0.4414048194885254, "learning_rate": 2.04051727346007e-05, "loss": 0.0722, "step": 22653 }, { "epoch": 0.4011918518420132, "grad_norm": 0.7644044756889343, "learning_rate": 2.040437016112549e-05, "loss": 0.1023, "step": 22654 }, { "epoch": 0.4012095613790416, "grad_norm": 0.9258975386619568, "learning_rate": 2.040356756987044e-05, "loss": 0.0883, "step": 22655 }, { "epoch": 0.4012272709160701, "grad_norm": 0.7250853180885315, "learning_rate": 2.040276496083821e-05, "loss": 0.0777, "step": 22656 }, { "epoch": 0.40124498045309853, "grad_norm": 1.1815712451934814, "learning_rate": 2.040196233403142e-05, "loss": 0.132, "step": 22657 }, { "epoch": 0.40126268999012693, "grad_norm": 0.6651766896247864, "learning_rate": 2.0401159689452718e-05, "loss": 0.0687, "step": 22658 }, { "epoch": 0.4012803995271554, "grad_norm": 0.8036478757858276, "learning_rate": 2.0400357027104757e-05, "loss": 0.068, "step": 22659 }, { "epoch": 0.4012981090641838, "grad_norm": 0.46469977498054504, "learning_rate": 2.0399554346990162e-05, "loss": 0.0615, "step": 22660 }, { "epoch": 0.40131581860121224, "grad_norm": 0.6992495059967041, "learning_rate": 2.0398751649111578e-05, "loss": 0.058, "step": 22661 }, { "epoch": 0.40133352813824064, "grad_norm": 0.5007750988006592, "learning_rate": 2.0397948933471648e-05, "loss": 0.0562, "step": 22662 }, { "epoch": 0.4013512376752691, "grad_norm": 0.6859326958656311, "learning_rate": 2.0397146200073015e-05, "loss": 0.0765, "step": 22663 }, { "epoch": 0.4013689472122975, "grad_norm": 0.9727010726928711, "learning_rate": 2.039634344891831e-05, "loss": 0.0818, "step": 22664 }, { "epoch": 0.40138665674932594, "grad_norm": 0.6951802372932434, "learning_rate": 2.039554068001019e-05, "loss": 0.0976, "step": 22665 }, { "epoch": 0.40140436628635434, "grad_norm": 0.6090533137321472, "learning_rate": 2.039473789335128e-05, "loss": 0.0852, "step": 22666 }, { "epoch": 0.4014220758233828, "grad_norm": 0.5489889979362488, "learning_rate": 2.0393935088944233e-05, "loss": 0.0326, "step": 22667 }, { "epoch": 0.4014397853604112, "grad_norm": 0.7121413946151733, "learning_rate": 2.0393132266791682e-05, "loss": 0.0776, "step": 22668 }, { "epoch": 0.40145749489743965, "grad_norm": 0.2282177358865738, "learning_rate": 2.039232942689627e-05, "loss": 0.043, "step": 22669 }, { "epoch": 0.40147520443446805, "grad_norm": 0.5745954513549805, "learning_rate": 2.0391526569260646e-05, "loss": 0.0457, "step": 22670 }, { "epoch": 0.4014929139714965, "grad_norm": 1.0076862573623657, "learning_rate": 2.039072369388744e-05, "loss": 0.0843, "step": 22671 }, { "epoch": 0.40151062350852496, "grad_norm": 0.747611403465271, "learning_rate": 2.0389920800779298e-05, "loss": 0.0633, "step": 22672 }, { "epoch": 0.40152833304555335, "grad_norm": 0.47201356291770935, "learning_rate": 2.0389117889938863e-05, "loss": 0.0729, "step": 22673 }, { "epoch": 0.4015460425825818, "grad_norm": 0.5201798677444458, "learning_rate": 2.0388314961368772e-05, "loss": 0.0467, "step": 22674 }, { "epoch": 0.4015637521196102, "grad_norm": 0.6337958574295044, "learning_rate": 2.0387512015071674e-05, "loss": 0.0977, "step": 22675 }, { "epoch": 0.40158146165663866, "grad_norm": 0.9124394655227661, "learning_rate": 2.0386709051050202e-05, "loss": 0.0858, "step": 22676 }, { "epoch": 0.40159917119366706, "grad_norm": 1.1183561086654663, "learning_rate": 2.0385906069307008e-05, "loss": 0.1123, "step": 22677 }, { "epoch": 0.4016168807306955, "grad_norm": 0.6560637950897217, "learning_rate": 2.038510306984472e-05, "loss": 0.0909, "step": 22678 }, { "epoch": 0.4016345902677239, "grad_norm": 0.9316399693489075, "learning_rate": 2.0384300052666e-05, "loss": 0.08, "step": 22679 }, { "epoch": 0.40165229980475237, "grad_norm": 1.0820329189300537, "learning_rate": 2.0383497017773466e-05, "loss": 0.0924, "step": 22680 }, { "epoch": 0.40167000934178076, "grad_norm": 0.5304457545280457, "learning_rate": 2.0382693965169772e-05, "loss": 0.0773, "step": 22681 }, { "epoch": 0.4016877188788092, "grad_norm": 1.0568801164627075, "learning_rate": 2.0381890894857567e-05, "loss": 0.067, "step": 22682 }, { "epoch": 0.4017054284158376, "grad_norm": 0.5594400763511658, "learning_rate": 2.038108780683948e-05, "loss": 0.0625, "step": 22683 }, { "epoch": 0.40172313795286607, "grad_norm": 0.48499631881713867, "learning_rate": 2.038028470111816e-05, "loss": 0.0832, "step": 22684 }, { "epoch": 0.40174084748989447, "grad_norm": 0.643085241317749, "learning_rate": 2.0379481577696245e-05, "loss": 0.0964, "step": 22685 }, { "epoch": 0.4017585570269229, "grad_norm": 0.4655340909957886, "learning_rate": 2.037867843657638e-05, "loss": 0.095, "step": 22686 }, { "epoch": 0.4017762665639514, "grad_norm": 0.4996539354324341, "learning_rate": 2.0377875277761208e-05, "loss": 0.0938, "step": 22687 }, { "epoch": 0.4017939761009798, "grad_norm": 0.698737621307373, "learning_rate": 2.0377072101253372e-05, "loss": 0.1063, "step": 22688 }, { "epoch": 0.40181168563800823, "grad_norm": 0.5309618711471558, "learning_rate": 2.037626890705551e-05, "loss": 0.0471, "step": 22689 }, { "epoch": 0.40182939517503663, "grad_norm": 0.7089031934738159, "learning_rate": 2.037546569517027e-05, "loss": 0.074, "step": 22690 }, { "epoch": 0.4018471047120651, "grad_norm": 0.35650214552879333, "learning_rate": 2.037466246560029e-05, "loss": 0.0673, "step": 22691 }, { "epoch": 0.4018648142490935, "grad_norm": 0.5645544528961182, "learning_rate": 2.0373859218348214e-05, "loss": 0.0598, "step": 22692 }, { "epoch": 0.40188252378612194, "grad_norm": 0.7479618787765503, "learning_rate": 2.037305595341668e-05, "loss": 0.0942, "step": 22693 }, { "epoch": 0.40190023332315034, "grad_norm": 0.9167914390563965, "learning_rate": 2.0372252670808342e-05, "loss": 0.1131, "step": 22694 }, { "epoch": 0.4019179428601788, "grad_norm": 1.5119740962982178, "learning_rate": 2.0371449370525832e-05, "loss": 0.1106, "step": 22695 }, { "epoch": 0.4019356523972072, "grad_norm": 0.7680555582046509, "learning_rate": 2.0370646052571798e-05, "loss": 0.0698, "step": 22696 }, { "epoch": 0.40195336193423564, "grad_norm": 0.6001074910163879, "learning_rate": 2.0369842716948884e-05, "loss": 0.0816, "step": 22697 }, { "epoch": 0.40197107147126404, "grad_norm": 0.6095249056816101, "learning_rate": 2.0369039363659733e-05, "loss": 0.107, "step": 22698 }, { "epoch": 0.4019887810082925, "grad_norm": 0.7701290249824524, "learning_rate": 2.036823599270698e-05, "loss": 0.0625, "step": 22699 }, { "epoch": 0.4020064905453209, "grad_norm": 0.8642642498016357, "learning_rate": 2.0367432604093275e-05, "loss": 0.0973, "step": 22700 }, { "epoch": 0.40202420008234935, "grad_norm": 0.7084600329399109, "learning_rate": 2.0366629197821257e-05, "loss": 0.0953, "step": 22701 }, { "epoch": 0.4020419096193778, "grad_norm": 0.8622750639915466, "learning_rate": 2.0365825773893576e-05, "loss": 0.1016, "step": 22702 }, { "epoch": 0.4020596191564062, "grad_norm": 0.5642402172088623, "learning_rate": 2.0365022332312868e-05, "loss": 0.0652, "step": 22703 }, { "epoch": 0.40207732869343465, "grad_norm": 0.6049978137016296, "learning_rate": 2.0364218873081785e-05, "loss": 0.0744, "step": 22704 }, { "epoch": 0.40209503823046305, "grad_norm": 0.5899279713630676, "learning_rate": 2.0363415396202958e-05, "loss": 0.0828, "step": 22705 }, { "epoch": 0.4021127477674915, "grad_norm": 0.8819916248321533, "learning_rate": 2.0362611901679038e-05, "loss": 0.0614, "step": 22706 }, { "epoch": 0.4021304573045199, "grad_norm": 0.8301828503608704, "learning_rate": 2.036180838951267e-05, "loss": 0.0781, "step": 22707 }, { "epoch": 0.40214816684154836, "grad_norm": 0.6705003976821899, "learning_rate": 2.0361004859706493e-05, "loss": 0.0683, "step": 22708 }, { "epoch": 0.40216587637857676, "grad_norm": 0.9185181260108948, "learning_rate": 2.036020131226315e-05, "loss": 0.055, "step": 22709 }, { "epoch": 0.4021835859156052, "grad_norm": 1.1240001916885376, "learning_rate": 2.0359397747185288e-05, "loss": 0.108, "step": 22710 }, { "epoch": 0.4022012954526336, "grad_norm": 0.8262141942977905, "learning_rate": 2.035859416447555e-05, "loss": 0.0991, "step": 22711 }, { "epoch": 0.40221900498966207, "grad_norm": 0.5513061285018921, "learning_rate": 2.035779056413658e-05, "loss": 0.0996, "step": 22712 }, { "epoch": 0.40223671452669046, "grad_norm": 0.5285237431526184, "learning_rate": 2.0356986946171022e-05, "loss": 0.0632, "step": 22713 }, { "epoch": 0.4022544240637189, "grad_norm": 0.7471111416816711, "learning_rate": 2.0356183310581517e-05, "loss": 0.0749, "step": 22714 }, { "epoch": 0.4022721336007473, "grad_norm": 0.9104821681976318, "learning_rate": 2.035537965737071e-05, "loss": 0.0976, "step": 22715 }, { "epoch": 0.40228984313777577, "grad_norm": 0.4507869482040405, "learning_rate": 2.035457598654125e-05, "loss": 0.0695, "step": 22716 }, { "epoch": 0.4023075526748042, "grad_norm": 0.5796130299568176, "learning_rate": 2.035377229809577e-05, "loss": 0.0515, "step": 22717 }, { "epoch": 0.4023252622118326, "grad_norm": 0.8236445784568787, "learning_rate": 2.0352968592036927e-05, "loss": 0.0689, "step": 22718 }, { "epoch": 0.4023429717488611, "grad_norm": 0.5312992930412292, "learning_rate": 2.0352164868367352e-05, "loss": 0.0771, "step": 22719 }, { "epoch": 0.4023606812858895, "grad_norm": 0.6183386445045471, "learning_rate": 2.0351361127089704e-05, "loss": 0.0645, "step": 22720 }, { "epoch": 0.40237839082291793, "grad_norm": 0.9030572175979614, "learning_rate": 2.035055736820661e-05, "loss": 0.098, "step": 22721 }, { "epoch": 0.40239610035994633, "grad_norm": 1.008280873298645, "learning_rate": 2.034975359172073e-05, "loss": 0.0705, "step": 22722 }, { "epoch": 0.4024138098969748, "grad_norm": 0.7893079519271851, "learning_rate": 2.03489497976347e-05, "loss": 0.0667, "step": 22723 }, { "epoch": 0.4024315194340032, "grad_norm": 0.8107609748840332, "learning_rate": 2.0348145985951164e-05, "loss": 0.0762, "step": 22724 }, { "epoch": 0.40244922897103164, "grad_norm": 0.6925731301307678, "learning_rate": 2.0347342156672773e-05, "loss": 0.054, "step": 22725 }, { "epoch": 0.40246693850806003, "grad_norm": 0.814738392829895, "learning_rate": 2.0346538309802165e-05, "loss": 0.0963, "step": 22726 }, { "epoch": 0.4024846480450885, "grad_norm": 0.6890774965286255, "learning_rate": 2.034573444534199e-05, "loss": 0.1096, "step": 22727 }, { "epoch": 0.4025023575821169, "grad_norm": 0.6066020131111145, "learning_rate": 2.034493056329488e-05, "loss": 0.0683, "step": 22728 }, { "epoch": 0.40252006711914534, "grad_norm": 1.3673938512802124, "learning_rate": 2.0344126663663496e-05, "loss": 0.0852, "step": 22729 }, { "epoch": 0.40253777665617374, "grad_norm": 0.9666997194290161, "learning_rate": 2.0343322746450477e-05, "loss": 0.1345, "step": 22730 }, { "epoch": 0.4025554861932022, "grad_norm": 0.6634582281112671, "learning_rate": 2.0342518811658462e-05, "loss": 0.0776, "step": 22731 }, { "epoch": 0.40257319573023065, "grad_norm": 0.8979077339172363, "learning_rate": 2.0341714859290104e-05, "loss": 0.0775, "step": 22732 }, { "epoch": 0.40259090526725905, "grad_norm": 1.2189114093780518, "learning_rate": 2.0340910889348045e-05, "loss": 0.1414, "step": 22733 }, { "epoch": 0.4026086148042875, "grad_norm": 0.5352331399917603, "learning_rate": 2.0340106901834927e-05, "loss": 0.0775, "step": 22734 }, { "epoch": 0.4026263243413159, "grad_norm": 0.5823455452919006, "learning_rate": 2.0339302896753394e-05, "loss": 0.0664, "step": 22735 }, { "epoch": 0.40264403387834435, "grad_norm": 0.563156008720398, "learning_rate": 2.0338498874106102e-05, "loss": 0.0777, "step": 22736 }, { "epoch": 0.40266174341537275, "grad_norm": 0.7668667435646057, "learning_rate": 2.0337694833895687e-05, "loss": 0.0733, "step": 22737 }, { "epoch": 0.4026794529524012, "grad_norm": 0.6873134970664978, "learning_rate": 2.0336890776124788e-05, "loss": 0.0738, "step": 22738 }, { "epoch": 0.4026971624894296, "grad_norm": 0.7092819809913635, "learning_rate": 2.0336086700796068e-05, "loss": 0.063, "step": 22739 }, { "epoch": 0.40271487202645806, "grad_norm": 0.48591986298561096, "learning_rate": 2.0335282607912157e-05, "loss": 0.0781, "step": 22740 }, { "epoch": 0.40273258156348646, "grad_norm": 0.6812049150466919, "learning_rate": 2.033447849747571e-05, "loss": 0.1163, "step": 22741 }, { "epoch": 0.4027502911005149, "grad_norm": 0.7412100434303284, "learning_rate": 2.0333674369489362e-05, "loss": 0.0763, "step": 22742 }, { "epoch": 0.4027680006375433, "grad_norm": 0.7207057476043701, "learning_rate": 2.0332870223955773e-05, "loss": 0.0802, "step": 22743 }, { "epoch": 0.40278571017457176, "grad_norm": 0.8498383164405823, "learning_rate": 2.0332066060877574e-05, "loss": 0.0675, "step": 22744 }, { "epoch": 0.40280341971160016, "grad_norm": 0.5619754195213318, "learning_rate": 2.0331261880257418e-05, "loss": 0.0609, "step": 22745 }, { "epoch": 0.4028211292486286, "grad_norm": 0.47317075729370117, "learning_rate": 2.0330457682097953e-05, "loss": 0.0667, "step": 22746 }, { "epoch": 0.40283883878565707, "grad_norm": 0.7144535183906555, "learning_rate": 2.032965346640182e-05, "loss": 0.0632, "step": 22747 }, { "epoch": 0.40285654832268547, "grad_norm": 0.38598552346229553, "learning_rate": 2.0328849233171665e-05, "loss": 0.0689, "step": 22748 }, { "epoch": 0.4028742578597139, "grad_norm": 0.8150320649147034, "learning_rate": 2.032804498241013e-05, "loss": 0.0931, "step": 22749 }, { "epoch": 0.4028919673967423, "grad_norm": 0.6078513264656067, "learning_rate": 2.0327240714119878e-05, "loss": 0.0989, "step": 22750 }, { "epoch": 0.4029096769337708, "grad_norm": 0.7536861300468445, "learning_rate": 2.032643642830353e-05, "loss": 0.072, "step": 22751 }, { "epoch": 0.4029273864707992, "grad_norm": 0.5702897906303406, "learning_rate": 2.0325632124963755e-05, "loss": 0.0822, "step": 22752 }, { "epoch": 0.40294509600782763, "grad_norm": 0.9455785155296326, "learning_rate": 2.0324827804103182e-05, "loss": 0.0938, "step": 22753 }, { "epoch": 0.40296280554485603, "grad_norm": 0.49182382225990295, "learning_rate": 2.0324023465724468e-05, "loss": 0.0623, "step": 22754 }, { "epoch": 0.4029805150818845, "grad_norm": 1.0388609170913696, "learning_rate": 2.0323219109830253e-05, "loss": 0.0788, "step": 22755 }, { "epoch": 0.4029982246189129, "grad_norm": 0.7692945599555969, "learning_rate": 2.032241473642319e-05, "loss": 0.0637, "step": 22756 }, { "epoch": 0.40301593415594134, "grad_norm": 0.609171986579895, "learning_rate": 2.0321610345505914e-05, "loss": 0.0889, "step": 22757 }, { "epoch": 0.40303364369296973, "grad_norm": 0.7334933280944824, "learning_rate": 2.0320805937081078e-05, "loss": 0.0998, "step": 22758 }, { "epoch": 0.4030513532299982, "grad_norm": 0.4298197031021118, "learning_rate": 2.0320001511151337e-05, "loss": 0.06, "step": 22759 }, { "epoch": 0.4030690627670266, "grad_norm": 0.6332176923751831, "learning_rate": 2.0319197067719327e-05, "loss": 0.07, "step": 22760 }, { "epoch": 0.40308677230405504, "grad_norm": 1.1706149578094482, "learning_rate": 2.0318392606787695e-05, "loss": 0.0716, "step": 22761 }, { "epoch": 0.4031044818410835, "grad_norm": 0.655210018157959, "learning_rate": 2.0317588128359085e-05, "loss": 0.0673, "step": 22762 }, { "epoch": 0.4031221913781119, "grad_norm": 0.7902431488037109, "learning_rate": 2.031678363243615e-05, "loss": 0.0747, "step": 22763 }, { "epoch": 0.40313990091514035, "grad_norm": 0.7596185803413391, "learning_rate": 2.031597911902154e-05, "loss": 0.1184, "step": 22764 }, { "epoch": 0.40315761045216875, "grad_norm": 0.47815367579460144, "learning_rate": 2.031517458811789e-05, "loss": 0.087, "step": 22765 }, { "epoch": 0.4031753199891972, "grad_norm": 0.8302655220031738, "learning_rate": 2.031437003972786e-05, "loss": 0.0688, "step": 22766 }, { "epoch": 0.4031930295262256, "grad_norm": 0.55135577917099, "learning_rate": 2.0313565473854075e-05, "loss": 0.0787, "step": 22767 }, { "epoch": 0.40321073906325405, "grad_norm": 0.5679128766059875, "learning_rate": 2.0312760890499212e-05, "loss": 0.0777, "step": 22768 }, { "epoch": 0.40322844860028245, "grad_norm": 0.42167073488235474, "learning_rate": 2.0311956289665902e-05, "loss": 0.0776, "step": 22769 }, { "epoch": 0.4032461581373109, "grad_norm": 0.7578828930854797, "learning_rate": 2.0311151671356788e-05, "loss": 0.081, "step": 22770 }, { "epoch": 0.4032638676743393, "grad_norm": 0.5683820843696594, "learning_rate": 2.0310347035574523e-05, "loss": 0.1004, "step": 22771 }, { "epoch": 0.40328157721136776, "grad_norm": 0.6071069836616516, "learning_rate": 2.0309542382321752e-05, "loss": 0.0651, "step": 22772 }, { "epoch": 0.40329928674839616, "grad_norm": 0.5152893662452698, "learning_rate": 2.030873771160113e-05, "loss": 0.09, "step": 22773 }, { "epoch": 0.4033169962854246, "grad_norm": 0.7001855373382568, "learning_rate": 2.030793302341529e-05, "loss": 0.0769, "step": 22774 }, { "epoch": 0.403334705822453, "grad_norm": 1.1240417957305908, "learning_rate": 2.0307128317766894e-05, "loss": 0.1169, "step": 22775 }, { "epoch": 0.40335241535948146, "grad_norm": 0.7400156855583191, "learning_rate": 2.0306323594658578e-05, "loss": 0.109, "step": 22776 }, { "epoch": 0.4033701248965099, "grad_norm": 0.770346999168396, "learning_rate": 2.0305518854092997e-05, "loss": 0.0982, "step": 22777 }, { "epoch": 0.4033878344335383, "grad_norm": 0.7580846548080444, "learning_rate": 2.0304714096072795e-05, "loss": 0.0828, "step": 22778 }, { "epoch": 0.40340554397056677, "grad_norm": 0.5434731841087341, "learning_rate": 2.0303909320600617e-05, "loss": 0.0545, "step": 22779 }, { "epoch": 0.40342325350759517, "grad_norm": 0.6933498382568359, "learning_rate": 2.0303104527679117e-05, "loss": 0.0624, "step": 22780 }, { "epoch": 0.4034409630446236, "grad_norm": 0.6722903847694397, "learning_rate": 2.0302299717310937e-05, "loss": 0.0643, "step": 22781 }, { "epoch": 0.403458672581652, "grad_norm": 0.47527626156806946, "learning_rate": 2.030149488949873e-05, "loss": 0.1029, "step": 22782 }, { "epoch": 0.4034763821186805, "grad_norm": 0.8821532726287842, "learning_rate": 2.030069004424514e-05, "loss": 0.0742, "step": 22783 }, { "epoch": 0.4034940916557089, "grad_norm": 0.7732935547828674, "learning_rate": 2.0299885181552816e-05, "loss": 0.0922, "step": 22784 }, { "epoch": 0.40351180119273733, "grad_norm": 0.5870466232299805, "learning_rate": 2.0299080301424405e-05, "loss": 0.0632, "step": 22785 }, { "epoch": 0.40352951072976573, "grad_norm": 0.4665567874908447, "learning_rate": 2.0298275403862555e-05, "loss": 0.0913, "step": 22786 }, { "epoch": 0.4035472202667942, "grad_norm": 0.5154774785041809, "learning_rate": 2.029747048886992e-05, "loss": 0.0656, "step": 22787 }, { "epoch": 0.4035649298038226, "grad_norm": 0.934737503528595, "learning_rate": 2.0296665556449136e-05, "loss": 0.0686, "step": 22788 }, { "epoch": 0.40358263934085103, "grad_norm": 0.5284652709960938, "learning_rate": 2.0295860606602866e-05, "loss": 0.1062, "step": 22789 }, { "epoch": 0.40360034887787943, "grad_norm": 0.7796885967254639, "learning_rate": 2.0295055639333745e-05, "loss": 0.0778, "step": 22790 }, { "epoch": 0.4036180584149079, "grad_norm": 0.5573655962944031, "learning_rate": 2.0294250654644422e-05, "loss": 0.0425, "step": 22791 }, { "epoch": 0.40363576795193634, "grad_norm": 0.9828760027885437, "learning_rate": 2.0293445652537558e-05, "loss": 0.0574, "step": 22792 }, { "epoch": 0.40365347748896474, "grad_norm": 0.6121768951416016, "learning_rate": 2.0292640633015788e-05, "loss": 0.0654, "step": 22793 }, { "epoch": 0.4036711870259932, "grad_norm": 0.639251172542572, "learning_rate": 2.029183559608177e-05, "loss": 0.0803, "step": 22794 }, { "epoch": 0.4036888965630216, "grad_norm": 1.1076411008834839, "learning_rate": 2.029103054173814e-05, "loss": 0.0579, "step": 22795 }, { "epoch": 0.40370660610005005, "grad_norm": 0.6202138066291809, "learning_rate": 2.0290225469987563e-05, "loss": 0.0627, "step": 22796 }, { "epoch": 0.40372431563707845, "grad_norm": 0.48494499921798706, "learning_rate": 2.0289420380832675e-05, "loss": 0.0588, "step": 22797 }, { "epoch": 0.4037420251741069, "grad_norm": 1.1154402494430542, "learning_rate": 2.028861527427613e-05, "loss": 0.074, "step": 22798 }, { "epoch": 0.4037597347111353, "grad_norm": 0.4631176292896271, "learning_rate": 2.028781015032058e-05, "loss": 0.0644, "step": 22799 }, { "epoch": 0.40377744424816375, "grad_norm": 0.8979944586753845, "learning_rate": 2.0287005008968664e-05, "loss": 0.0899, "step": 22800 }, { "epoch": 0.40379515378519215, "grad_norm": 0.8073111176490784, "learning_rate": 2.028619985022304e-05, "loss": 0.1, "step": 22801 }, { "epoch": 0.4038128633222206, "grad_norm": 0.8015003204345703, "learning_rate": 2.028539467408635e-05, "loss": 0.0706, "step": 22802 }, { "epoch": 0.403830572859249, "grad_norm": 0.9057607054710388, "learning_rate": 2.0284589480561248e-05, "loss": 0.0616, "step": 22803 }, { "epoch": 0.40384828239627746, "grad_norm": 0.4491930603981018, "learning_rate": 2.028378426965038e-05, "loss": 0.0735, "step": 22804 }, { "epoch": 0.40386599193330586, "grad_norm": 0.6095542311668396, "learning_rate": 2.0282979041356395e-05, "loss": 0.0822, "step": 22805 }, { "epoch": 0.4038837014703343, "grad_norm": 0.5886566638946533, "learning_rate": 2.0282173795681944e-05, "loss": 0.091, "step": 22806 }, { "epoch": 0.40390141100736277, "grad_norm": 1.1082117557525635, "learning_rate": 2.0281368532629677e-05, "loss": 0.0799, "step": 22807 }, { "epoch": 0.40391912054439116, "grad_norm": 0.7569085359573364, "learning_rate": 2.0280563252202243e-05, "loss": 0.0718, "step": 22808 }, { "epoch": 0.4039368300814196, "grad_norm": 1.4436266422271729, "learning_rate": 2.0279757954402286e-05, "loss": 0.0836, "step": 22809 }, { "epoch": 0.403954539618448, "grad_norm": 0.8473215103149414, "learning_rate": 2.027895263923246e-05, "loss": 0.1191, "step": 22810 }, { "epoch": 0.40397224915547647, "grad_norm": 0.492608904838562, "learning_rate": 2.0278147306695418e-05, "loss": 0.0595, "step": 22811 }, { "epoch": 0.40398995869250487, "grad_norm": 0.590108335018158, "learning_rate": 2.0277341956793803e-05, "loss": 0.0908, "step": 22812 }, { "epoch": 0.4040076682295333, "grad_norm": 0.6563659906387329, "learning_rate": 2.0276536589530266e-05, "loss": 0.096, "step": 22813 }, { "epoch": 0.4040253777665617, "grad_norm": 0.6228612661361694, "learning_rate": 2.0275731204907456e-05, "loss": 0.0717, "step": 22814 }, { "epoch": 0.4040430873035902, "grad_norm": 0.4519713222980499, "learning_rate": 2.0274925802928026e-05, "loss": 0.0951, "step": 22815 }, { "epoch": 0.4040607968406186, "grad_norm": 0.8293176293373108, "learning_rate": 2.0274120383594623e-05, "loss": 0.0943, "step": 22816 }, { "epoch": 0.40407850637764703, "grad_norm": 0.4024594724178314, "learning_rate": 2.0273314946909898e-05, "loss": 0.0481, "step": 22817 }, { "epoch": 0.4040962159146754, "grad_norm": 0.9540272355079651, "learning_rate": 2.02725094928765e-05, "loss": 0.0907, "step": 22818 }, { "epoch": 0.4041139254517039, "grad_norm": 0.9445818662643433, "learning_rate": 2.0271704021497075e-05, "loss": 0.105, "step": 22819 }, { "epoch": 0.4041316349887323, "grad_norm": 1.0009922981262207, "learning_rate": 2.0270898532774278e-05, "loss": 0.0957, "step": 22820 }, { "epoch": 0.40414934452576073, "grad_norm": 0.6014627814292908, "learning_rate": 2.0270093026710766e-05, "loss": 0.0568, "step": 22821 }, { "epoch": 0.4041670540627892, "grad_norm": 0.8324629664421082, "learning_rate": 2.0269287503309173e-05, "loss": 0.0813, "step": 22822 }, { "epoch": 0.4041847635998176, "grad_norm": 0.389934241771698, "learning_rate": 2.026848196257216e-05, "loss": 0.0737, "step": 22823 }, { "epoch": 0.40420247313684604, "grad_norm": 0.8291733860969543, "learning_rate": 2.0267676404502376e-05, "loss": 0.0611, "step": 22824 }, { "epoch": 0.40422018267387444, "grad_norm": 0.7539059519767761, "learning_rate": 2.026687082910246e-05, "loss": 0.0841, "step": 22825 }, { "epoch": 0.4042378922109029, "grad_norm": 0.9129008650779724, "learning_rate": 2.0266065236375084e-05, "loss": 0.106, "step": 22826 }, { "epoch": 0.4042556017479313, "grad_norm": 0.7516035437583923, "learning_rate": 2.0265259626322882e-05, "loss": 0.0605, "step": 22827 }, { "epoch": 0.40427331128495975, "grad_norm": 0.5018874406814575, "learning_rate": 2.026445399894851e-05, "loss": 0.0786, "step": 22828 }, { "epoch": 0.40429102082198815, "grad_norm": 0.9609583616256714, "learning_rate": 2.0263648354254608e-05, "loss": 0.0801, "step": 22829 }, { "epoch": 0.4043087303590166, "grad_norm": 0.6956406831741333, "learning_rate": 2.0262842692243843e-05, "loss": 0.0896, "step": 22830 }, { "epoch": 0.404326439896045, "grad_norm": 0.7547840476036072, "learning_rate": 2.0262037012918856e-05, "loss": 0.0717, "step": 22831 }, { "epoch": 0.40434414943307345, "grad_norm": 0.7790485620498657, "learning_rate": 2.02612313162823e-05, "loss": 0.0769, "step": 22832 }, { "epoch": 0.40436185897010185, "grad_norm": 1.499768614768982, "learning_rate": 2.0260425602336823e-05, "loss": 0.0736, "step": 22833 }, { "epoch": 0.4043795685071303, "grad_norm": 0.6374626755714417, "learning_rate": 2.0259619871085078e-05, "loss": 0.0642, "step": 22834 }, { "epoch": 0.4043972780441587, "grad_norm": 0.5996456742286682, "learning_rate": 2.0258814122529718e-05, "loss": 0.0685, "step": 22835 }, { "epoch": 0.40441498758118716, "grad_norm": 0.48505645990371704, "learning_rate": 2.025800835667339e-05, "loss": 0.08, "step": 22836 }, { "epoch": 0.4044326971182156, "grad_norm": 0.5413348078727722, "learning_rate": 2.025720257351875e-05, "loss": 0.0699, "step": 22837 }, { "epoch": 0.404450406655244, "grad_norm": 0.6562858819961548, "learning_rate": 2.0256396773068442e-05, "loss": 0.0699, "step": 22838 }, { "epoch": 0.40446811619227246, "grad_norm": 1.0802003145217896, "learning_rate": 2.025559095532512e-05, "loss": 0.0963, "step": 22839 }, { "epoch": 0.40448582572930086, "grad_norm": 0.8211511373519897, "learning_rate": 2.025478512029144e-05, "loss": 0.0882, "step": 22840 }, { "epoch": 0.4045035352663293, "grad_norm": 0.5265985131263733, "learning_rate": 2.0253979267970042e-05, "loss": 0.0664, "step": 22841 }, { "epoch": 0.4045212448033577, "grad_norm": 0.6808794736862183, "learning_rate": 2.0253173398363587e-05, "loss": 0.0918, "step": 22842 }, { "epoch": 0.40453895434038617, "grad_norm": 1.0338975191116333, "learning_rate": 2.0252367511474715e-05, "loss": 0.0797, "step": 22843 }, { "epoch": 0.40455666387741457, "grad_norm": 0.8512800335884094, "learning_rate": 2.0251561607306094e-05, "loss": 0.0923, "step": 22844 }, { "epoch": 0.404574373414443, "grad_norm": 0.7228925228118896, "learning_rate": 2.025075568586036e-05, "loss": 0.1072, "step": 22845 }, { "epoch": 0.4045920829514714, "grad_norm": 0.7253751754760742, "learning_rate": 2.024994974714018e-05, "loss": 0.1051, "step": 22846 }, { "epoch": 0.4046097924884999, "grad_norm": 0.9928785562515259, "learning_rate": 2.024914379114819e-05, "loss": 0.1027, "step": 22847 }, { "epoch": 0.4046275020255283, "grad_norm": 0.48026925325393677, "learning_rate": 2.024833781788705e-05, "loss": 0.0788, "step": 22848 }, { "epoch": 0.40464521156255673, "grad_norm": 0.6988264322280884, "learning_rate": 2.024753182735941e-05, "loss": 0.1123, "step": 22849 }, { "epoch": 0.4046629210995851, "grad_norm": 1.1030319929122925, "learning_rate": 2.024672581956792e-05, "loss": 0.0843, "step": 22850 }, { "epoch": 0.4046806306366136, "grad_norm": 0.9306052923202515, "learning_rate": 2.0245919794515235e-05, "loss": 0.1036, "step": 22851 }, { "epoch": 0.40469834017364203, "grad_norm": 1.0246754884719849, "learning_rate": 2.0245113752204e-05, "loss": 0.104, "step": 22852 }, { "epoch": 0.40471604971067043, "grad_norm": 0.5155499577522278, "learning_rate": 2.024430769263687e-05, "loss": 0.078, "step": 22853 }, { "epoch": 0.4047337592476989, "grad_norm": 0.5347212553024292, "learning_rate": 2.0243501615816503e-05, "loss": 0.0589, "step": 22854 }, { "epoch": 0.4047514687847273, "grad_norm": 0.6409276723861694, "learning_rate": 2.0242695521745548e-05, "loss": 0.0609, "step": 22855 }, { "epoch": 0.40476917832175574, "grad_norm": 0.7116920948028564, "learning_rate": 2.024188941042665e-05, "loss": 0.0728, "step": 22856 }, { "epoch": 0.40478688785878414, "grad_norm": 0.9320119619369507, "learning_rate": 2.024108328186247e-05, "loss": 0.0749, "step": 22857 }, { "epoch": 0.4048045973958126, "grad_norm": 0.7354645729064941, "learning_rate": 2.024027713605565e-05, "loss": 0.0878, "step": 22858 }, { "epoch": 0.404822306932841, "grad_norm": 0.4685327112674713, "learning_rate": 2.023947097300885e-05, "loss": 0.0557, "step": 22859 }, { "epoch": 0.40484001646986945, "grad_norm": 0.7566090226173401, "learning_rate": 2.0238664792724727e-05, "loss": 0.0887, "step": 22860 }, { "epoch": 0.40485772600689784, "grad_norm": 0.9609932899475098, "learning_rate": 2.023785859520592e-05, "loss": 0.0849, "step": 22861 }, { "epoch": 0.4048754355439263, "grad_norm": 0.970492959022522, "learning_rate": 2.0237052380455086e-05, "loss": 0.0985, "step": 22862 }, { "epoch": 0.4048931450809547, "grad_norm": 0.6655212044715881, "learning_rate": 2.0236246148474888e-05, "loss": 0.0682, "step": 22863 }, { "epoch": 0.40491085461798315, "grad_norm": 0.7196593880653381, "learning_rate": 2.023543989926796e-05, "loss": 0.0864, "step": 22864 }, { "epoch": 0.40492856415501155, "grad_norm": 0.5990925431251526, "learning_rate": 2.0234633632836975e-05, "loss": 0.064, "step": 22865 }, { "epoch": 0.40494627369204, "grad_norm": 0.7451491951942444, "learning_rate": 2.0233827349184565e-05, "loss": 0.0707, "step": 22866 }, { "epoch": 0.40496398322906846, "grad_norm": 0.6145603656768799, "learning_rate": 2.0233021048313396e-05, "loss": 0.0784, "step": 22867 }, { "epoch": 0.40498169276609686, "grad_norm": 0.7409718632698059, "learning_rate": 2.023221473022612e-05, "loss": 0.0638, "step": 22868 }, { "epoch": 0.4049994023031253, "grad_norm": 1.0641802549362183, "learning_rate": 2.0231408394925384e-05, "loss": 0.0758, "step": 22869 }, { "epoch": 0.4050171118401537, "grad_norm": 0.3975578248500824, "learning_rate": 2.023060204241384e-05, "loss": 0.0601, "step": 22870 }, { "epoch": 0.40503482137718216, "grad_norm": 0.6365723013877869, "learning_rate": 2.0229795672694153e-05, "loss": 0.1072, "step": 22871 }, { "epoch": 0.40505253091421056, "grad_norm": 0.7080401182174683, "learning_rate": 2.0228989285768963e-05, "loss": 0.1041, "step": 22872 }, { "epoch": 0.405070240451239, "grad_norm": 0.6547380685806274, "learning_rate": 2.0228182881640923e-05, "loss": 0.0709, "step": 22873 }, { "epoch": 0.4050879499882674, "grad_norm": 0.8953737616539001, "learning_rate": 2.0227376460312693e-05, "loss": 0.0885, "step": 22874 }, { "epoch": 0.40510565952529587, "grad_norm": 0.9848681688308716, "learning_rate": 2.0226570021786926e-05, "loss": 0.0642, "step": 22875 }, { "epoch": 0.40512336906232427, "grad_norm": 0.6070558428764343, "learning_rate": 2.0225763566066268e-05, "loss": 0.0997, "step": 22876 }, { "epoch": 0.4051410785993527, "grad_norm": 1.0949490070343018, "learning_rate": 2.022495709315338e-05, "loss": 0.0971, "step": 22877 }, { "epoch": 0.4051587881363811, "grad_norm": 1.033082127571106, "learning_rate": 2.022415060305091e-05, "loss": 0.0979, "step": 22878 }, { "epoch": 0.4051764976734096, "grad_norm": 0.7654853463172913, "learning_rate": 2.0223344095761517e-05, "loss": 0.0801, "step": 22879 }, { "epoch": 0.40519420721043803, "grad_norm": 0.7209203839302063, "learning_rate": 2.0222537571287842e-05, "loss": 0.086, "step": 22880 }, { "epoch": 0.4052119167474664, "grad_norm": 0.6820583343505859, "learning_rate": 2.022173102963255e-05, "loss": 0.0894, "step": 22881 }, { "epoch": 0.4052296262844949, "grad_norm": 0.6347004771232605, "learning_rate": 2.0220924470798294e-05, "loss": 0.069, "step": 22882 }, { "epoch": 0.4052473358215233, "grad_norm": 0.7534987330436707, "learning_rate": 2.0220117894787723e-05, "loss": 0.0957, "step": 22883 }, { "epoch": 0.40526504535855173, "grad_norm": 0.6537235379219055, "learning_rate": 2.021931130160349e-05, "loss": 0.085, "step": 22884 }, { "epoch": 0.40528275489558013, "grad_norm": 1.1585166454315186, "learning_rate": 2.021850469124825e-05, "loss": 0.0698, "step": 22885 }, { "epoch": 0.4053004644326086, "grad_norm": 0.4519543945789337, "learning_rate": 2.0217698063724663e-05, "loss": 0.0961, "step": 22886 }, { "epoch": 0.405318173969637, "grad_norm": 0.7397206425666809, "learning_rate": 2.0216891419035373e-05, "loss": 0.0499, "step": 22887 }, { "epoch": 0.40533588350666544, "grad_norm": 0.6638358235359192, "learning_rate": 2.0216084757183037e-05, "loss": 0.0686, "step": 22888 }, { "epoch": 0.40535359304369384, "grad_norm": 1.191419005393982, "learning_rate": 2.0215278078170312e-05, "loss": 0.0814, "step": 22889 }, { "epoch": 0.4053713025807223, "grad_norm": 0.7237991094589233, "learning_rate": 2.0214471381999853e-05, "loss": 0.0978, "step": 22890 }, { "epoch": 0.4053890121177507, "grad_norm": 0.8288202881813049, "learning_rate": 2.02136646686743e-05, "loss": 0.1278, "step": 22891 }, { "epoch": 0.40540672165477915, "grad_norm": 0.702528715133667, "learning_rate": 2.021285793819633e-05, "loss": 0.0871, "step": 22892 }, { "epoch": 0.40542443119180754, "grad_norm": 0.864014208316803, "learning_rate": 2.0212051190568578e-05, "loss": 0.1122, "step": 22893 }, { "epoch": 0.405442140728836, "grad_norm": 0.847841739654541, "learning_rate": 2.0211244425793706e-05, "loss": 0.0921, "step": 22894 }, { "epoch": 0.40545985026586445, "grad_norm": 0.7428502440452576, "learning_rate": 2.021043764387436e-05, "loss": 0.045, "step": 22895 }, { "epoch": 0.40547755980289285, "grad_norm": 0.5547819137573242, "learning_rate": 2.0209630844813212e-05, "loss": 0.0731, "step": 22896 }, { "epoch": 0.4054952693399213, "grad_norm": 0.6760928630828857, "learning_rate": 2.0208824028612903e-05, "loss": 0.1121, "step": 22897 }, { "epoch": 0.4055129788769497, "grad_norm": 0.7602996230125427, "learning_rate": 2.0208017195276087e-05, "loss": 0.1203, "step": 22898 }, { "epoch": 0.40553068841397816, "grad_norm": 0.7866230607032776, "learning_rate": 2.020721034480542e-05, "loss": 0.0904, "step": 22899 }, { "epoch": 0.40554839795100656, "grad_norm": 0.360725462436676, "learning_rate": 2.020640347720356e-05, "loss": 0.0678, "step": 22900 }, { "epoch": 0.405566107488035, "grad_norm": 1.1175527572631836, "learning_rate": 2.0205596592473157e-05, "loss": 0.091, "step": 22901 }, { "epoch": 0.4055838170250634, "grad_norm": 1.1104800701141357, "learning_rate": 2.0204789690616872e-05, "loss": 0.0958, "step": 22902 }, { "epoch": 0.40560152656209186, "grad_norm": 0.6624431610107422, "learning_rate": 2.0203982771637354e-05, "loss": 0.0869, "step": 22903 }, { "epoch": 0.40561923609912026, "grad_norm": 0.4444580376148224, "learning_rate": 2.0203175835537258e-05, "loss": 0.0519, "step": 22904 }, { "epoch": 0.4056369456361487, "grad_norm": 0.6605021953582764, "learning_rate": 2.0202368882319236e-05, "loss": 0.0855, "step": 22905 }, { "epoch": 0.4056546551731771, "grad_norm": 1.0424290895462036, "learning_rate": 2.020156191198595e-05, "loss": 0.0887, "step": 22906 }, { "epoch": 0.40567236471020557, "grad_norm": 1.0945171117782593, "learning_rate": 2.0200754924540056e-05, "loss": 0.0845, "step": 22907 }, { "epoch": 0.40569007424723397, "grad_norm": 0.7450660467147827, "learning_rate": 2.01999479199842e-05, "loss": 0.1052, "step": 22908 }, { "epoch": 0.4057077837842624, "grad_norm": 0.6182152032852173, "learning_rate": 2.0199140898321037e-05, "loss": 0.079, "step": 22909 }, { "epoch": 0.4057254933212909, "grad_norm": 0.9834427833557129, "learning_rate": 2.0198333859553232e-05, "loss": 0.0811, "step": 22910 }, { "epoch": 0.4057432028583193, "grad_norm": 0.39643394947052, "learning_rate": 2.0197526803683433e-05, "loss": 0.0672, "step": 22911 }, { "epoch": 0.40576091239534773, "grad_norm": 0.7501589059829712, "learning_rate": 2.0196719730714298e-05, "loss": 0.1002, "step": 22912 }, { "epoch": 0.4057786219323761, "grad_norm": 0.48425397276878357, "learning_rate": 2.019591264064848e-05, "loss": 0.0532, "step": 22913 }, { "epoch": 0.4057963314694046, "grad_norm": 0.6439971923828125, "learning_rate": 2.019510553348863e-05, "loss": 0.0496, "step": 22914 }, { "epoch": 0.405814041006433, "grad_norm": 0.541983425617218, "learning_rate": 2.0194298409237414e-05, "loss": 0.0543, "step": 22915 }, { "epoch": 0.40583175054346143, "grad_norm": 0.5997874140739441, "learning_rate": 2.019349126789748e-05, "loss": 0.0634, "step": 22916 }, { "epoch": 0.40584946008048983, "grad_norm": 0.6919761896133423, "learning_rate": 2.0192684109471487e-05, "loss": 0.0809, "step": 22917 }, { "epoch": 0.4058671696175183, "grad_norm": 0.7610214352607727, "learning_rate": 2.019187693396209e-05, "loss": 0.1225, "step": 22918 }, { "epoch": 0.4058848791545467, "grad_norm": 0.6815895438194275, "learning_rate": 2.0191069741371935e-05, "loss": 0.083, "step": 22919 }, { "epoch": 0.40590258869157514, "grad_norm": 0.4590054154396057, "learning_rate": 2.019026253170369e-05, "loss": 0.1002, "step": 22920 }, { "epoch": 0.40592029822860354, "grad_norm": 0.4576612114906311, "learning_rate": 2.0189455304960007e-05, "loss": 0.0771, "step": 22921 }, { "epoch": 0.405938007765632, "grad_norm": 0.5977521538734436, "learning_rate": 2.018864806114354e-05, "loss": 0.0753, "step": 22922 }, { "epoch": 0.4059557173026604, "grad_norm": 1.039033055305481, "learning_rate": 2.0187840800256948e-05, "loss": 0.0791, "step": 22923 }, { "epoch": 0.40597342683968884, "grad_norm": 0.8681557774543762, "learning_rate": 2.018703352230288e-05, "loss": 0.0864, "step": 22924 }, { "epoch": 0.4059911363767173, "grad_norm": 0.4102647602558136, "learning_rate": 2.0186226227283996e-05, "loss": 0.0792, "step": 22925 }, { "epoch": 0.4060088459137457, "grad_norm": 0.6119033694267273, "learning_rate": 2.0185418915202956e-05, "loss": 0.0767, "step": 22926 }, { "epoch": 0.40602655545077415, "grad_norm": 0.4023951590061188, "learning_rate": 2.0184611586062415e-05, "loss": 0.06, "step": 22927 }, { "epoch": 0.40604426498780255, "grad_norm": 0.3131586015224457, "learning_rate": 2.0183804239865022e-05, "loss": 0.0717, "step": 22928 }, { "epoch": 0.406061974524831, "grad_norm": 0.6688824892044067, "learning_rate": 2.0182996876613437e-05, "loss": 0.093, "step": 22929 }, { "epoch": 0.4060796840618594, "grad_norm": 0.9568623304367065, "learning_rate": 2.0182189496310317e-05, "loss": 0.1006, "step": 22930 }, { "epoch": 0.40609739359888786, "grad_norm": 0.668003499507904, "learning_rate": 2.018138209895832e-05, "loss": 0.0636, "step": 22931 }, { "epoch": 0.40611510313591626, "grad_norm": 0.7167365550994873, "learning_rate": 2.0180574684560095e-05, "loss": 0.0994, "step": 22932 }, { "epoch": 0.4061328126729447, "grad_norm": 0.7430147528648376, "learning_rate": 2.017976725311831e-05, "loss": 0.0967, "step": 22933 }, { "epoch": 0.4061505222099731, "grad_norm": 0.7228319644927979, "learning_rate": 2.017895980463561e-05, "loss": 0.074, "step": 22934 }, { "epoch": 0.40616823174700156, "grad_norm": 0.8117314577102661, "learning_rate": 2.0178152339114656e-05, "loss": 0.0983, "step": 22935 }, { "epoch": 0.40618594128402996, "grad_norm": 0.4434225261211395, "learning_rate": 2.0177344856558105e-05, "loss": 0.0558, "step": 22936 }, { "epoch": 0.4062036508210584, "grad_norm": 0.6539587378501892, "learning_rate": 2.0176537356968615e-05, "loss": 0.0716, "step": 22937 }, { "epoch": 0.4062213603580868, "grad_norm": 0.8027775287628174, "learning_rate": 2.0175729840348837e-05, "loss": 0.1045, "step": 22938 }, { "epoch": 0.40623906989511527, "grad_norm": 1.0022380352020264, "learning_rate": 2.017492230670143e-05, "loss": 0.1309, "step": 22939 }, { "epoch": 0.4062567794321437, "grad_norm": 0.9906224012374878, "learning_rate": 2.017411475602906e-05, "loss": 0.0805, "step": 22940 }, { "epoch": 0.4062744889691721, "grad_norm": 0.6505491137504578, "learning_rate": 2.017330718833437e-05, "loss": 0.0691, "step": 22941 }, { "epoch": 0.4062921985062006, "grad_norm": 0.612311601638794, "learning_rate": 2.017249960362003e-05, "loss": 0.0666, "step": 22942 }, { "epoch": 0.406309908043229, "grad_norm": 0.6361929178237915, "learning_rate": 2.0171692001888677e-05, "loss": 0.0755, "step": 22943 }, { "epoch": 0.4063276175802574, "grad_norm": 0.7668375372886658, "learning_rate": 2.017088438314299e-05, "loss": 0.09, "step": 22944 }, { "epoch": 0.4063453271172858, "grad_norm": 0.6730066537857056, "learning_rate": 2.0170076747385612e-05, "loss": 0.0726, "step": 22945 }, { "epoch": 0.4063630366543143, "grad_norm": 0.6548226475715637, "learning_rate": 2.0169269094619203e-05, "loss": 0.1142, "step": 22946 }, { "epoch": 0.4063807461913427, "grad_norm": 0.7377318739891052, "learning_rate": 2.016846142484643e-05, "loss": 0.0437, "step": 22947 }, { "epoch": 0.40639845572837113, "grad_norm": 1.0563442707061768, "learning_rate": 2.0167653738069936e-05, "loss": 0.0754, "step": 22948 }, { "epoch": 0.40641616526539953, "grad_norm": 0.6110846996307373, "learning_rate": 2.016684603429238e-05, "loss": 0.0949, "step": 22949 }, { "epoch": 0.406433874802428, "grad_norm": 0.8983539342880249, "learning_rate": 2.0166038313516432e-05, "loss": 0.1136, "step": 22950 }, { "epoch": 0.4064515843394564, "grad_norm": 0.6281050443649292, "learning_rate": 2.0165230575744734e-05, "loss": 0.0598, "step": 22951 }, { "epoch": 0.40646929387648484, "grad_norm": 0.4318118989467621, "learning_rate": 2.016442282097995e-05, "loss": 0.0786, "step": 22952 }, { "epoch": 0.40648700341351324, "grad_norm": 0.6505727171897888, "learning_rate": 2.0163615049224736e-05, "loss": 0.0528, "step": 22953 }, { "epoch": 0.4065047129505417, "grad_norm": 0.7445944547653198, "learning_rate": 2.0162807260481756e-05, "loss": 0.0896, "step": 22954 }, { "epoch": 0.40652242248757015, "grad_norm": 0.4204566478729248, "learning_rate": 2.0161999454753658e-05, "loss": 0.1014, "step": 22955 }, { "epoch": 0.40654013202459854, "grad_norm": 0.7641453146934509, "learning_rate": 2.0161191632043105e-05, "loss": 0.0643, "step": 22956 }, { "epoch": 0.406557841561627, "grad_norm": 0.9536505341529846, "learning_rate": 2.0160383792352757e-05, "loss": 0.0736, "step": 22957 }, { "epoch": 0.4065755510986554, "grad_norm": 0.7052583694458008, "learning_rate": 2.0159575935685263e-05, "loss": 0.1054, "step": 22958 }, { "epoch": 0.40659326063568385, "grad_norm": 1.023952603340149, "learning_rate": 2.015876806204329e-05, "loss": 0.1041, "step": 22959 }, { "epoch": 0.40661097017271225, "grad_norm": 0.8315969705581665, "learning_rate": 2.015796017142949e-05, "loss": 0.0718, "step": 22960 }, { "epoch": 0.4066286797097407, "grad_norm": 0.4468231797218323, "learning_rate": 2.0157152263846524e-05, "loss": 0.0659, "step": 22961 }, { "epoch": 0.4066463892467691, "grad_norm": 0.5642144680023193, "learning_rate": 2.0156344339297047e-05, "loss": 0.0683, "step": 22962 }, { "epoch": 0.40666409878379756, "grad_norm": 0.7321026921272278, "learning_rate": 2.0155536397783717e-05, "loss": 0.0728, "step": 22963 }, { "epoch": 0.40668180832082595, "grad_norm": 0.9069750308990479, "learning_rate": 2.0154728439309195e-05, "loss": 0.0803, "step": 22964 }, { "epoch": 0.4066995178578544, "grad_norm": 0.7955936789512634, "learning_rate": 2.0153920463876142e-05, "loss": 0.0676, "step": 22965 }, { "epoch": 0.4067172273948828, "grad_norm": 0.6736449599266052, "learning_rate": 2.015311247148721e-05, "loss": 0.0834, "step": 22966 }, { "epoch": 0.40673493693191126, "grad_norm": 0.48031771183013916, "learning_rate": 2.0152304462145053e-05, "loss": 0.0782, "step": 22967 }, { "epoch": 0.40675264646893966, "grad_norm": 0.7507286667823792, "learning_rate": 2.0151496435852344e-05, "loss": 0.0725, "step": 22968 }, { "epoch": 0.4067703560059681, "grad_norm": 0.6691054105758667, "learning_rate": 2.015068839261173e-05, "loss": 0.0678, "step": 22969 }, { "epoch": 0.40678806554299657, "grad_norm": 0.37374645471572876, "learning_rate": 2.014988033242587e-05, "loss": 0.0492, "step": 22970 }, { "epoch": 0.40680577508002497, "grad_norm": 0.9001564979553223, "learning_rate": 2.0149072255297424e-05, "loss": 0.0826, "step": 22971 }, { "epoch": 0.4068234846170534, "grad_norm": 0.9176821112632751, "learning_rate": 2.0148264161229052e-05, "loss": 0.0936, "step": 22972 }, { "epoch": 0.4068411941540818, "grad_norm": 0.7366674542427063, "learning_rate": 2.0147456050223412e-05, "loss": 0.0968, "step": 22973 }, { "epoch": 0.4068589036911103, "grad_norm": 0.6971983909606934, "learning_rate": 2.0146647922283164e-05, "loss": 0.0968, "step": 22974 }, { "epoch": 0.4068766132281387, "grad_norm": 1.0444601774215698, "learning_rate": 2.014583977741097e-05, "loss": 0.1257, "step": 22975 }, { "epoch": 0.4068943227651671, "grad_norm": 1.0004299879074097, "learning_rate": 2.0145031615609474e-05, "loss": 0.082, "step": 22976 }, { "epoch": 0.4069120323021955, "grad_norm": 0.6247841715812683, "learning_rate": 2.0144223436881345e-05, "loss": 0.0911, "step": 22977 }, { "epoch": 0.406929741839224, "grad_norm": 1.254958987236023, "learning_rate": 2.0143415241229243e-05, "loss": 0.1122, "step": 22978 }, { "epoch": 0.4069474513762524, "grad_norm": 0.48931920528411865, "learning_rate": 2.0142607028655828e-05, "loss": 0.0667, "step": 22979 }, { "epoch": 0.40696516091328083, "grad_norm": 0.6008662581443787, "learning_rate": 2.0141798799163753e-05, "loss": 0.0552, "step": 22980 }, { "epoch": 0.40698287045030923, "grad_norm": 0.7818153500556946, "learning_rate": 2.0140990552755685e-05, "loss": 0.0838, "step": 22981 }, { "epoch": 0.4070005799873377, "grad_norm": 0.5394988059997559, "learning_rate": 2.014018228943427e-05, "loss": 0.071, "step": 22982 }, { "epoch": 0.4070182895243661, "grad_norm": 0.8759750723838806, "learning_rate": 2.013937400920218e-05, "loss": 0.0829, "step": 22983 }, { "epoch": 0.40703599906139454, "grad_norm": 0.711825430393219, "learning_rate": 2.013856571206207e-05, "loss": 0.0544, "step": 22984 }, { "epoch": 0.407053708598423, "grad_norm": 0.9781798124313354, "learning_rate": 2.0137757398016595e-05, "loss": 0.0936, "step": 22985 }, { "epoch": 0.4070714181354514, "grad_norm": 0.4809337854385376, "learning_rate": 2.013694906706842e-05, "loss": 0.0934, "step": 22986 }, { "epoch": 0.40708912767247984, "grad_norm": 0.6672983765602112, "learning_rate": 2.01361407192202e-05, "loss": 0.0802, "step": 22987 }, { "epoch": 0.40710683720950824, "grad_norm": 0.7433575391769409, "learning_rate": 2.01353323544746e-05, "loss": 0.0917, "step": 22988 }, { "epoch": 0.4071245467465367, "grad_norm": 0.301736444234848, "learning_rate": 2.0134523972834275e-05, "loss": 0.0598, "step": 22989 }, { "epoch": 0.4071422562835651, "grad_norm": 0.7914435863494873, "learning_rate": 2.013371557430189e-05, "loss": 0.0903, "step": 22990 }, { "epoch": 0.40715996582059355, "grad_norm": 0.5542269945144653, "learning_rate": 2.0132907158880088e-05, "loss": 0.0692, "step": 22991 }, { "epoch": 0.40717767535762195, "grad_norm": 0.7502711415290833, "learning_rate": 2.0132098726571545e-05, "loss": 0.0952, "step": 22992 }, { "epoch": 0.4071953848946504, "grad_norm": 0.6749892830848694, "learning_rate": 2.0131290277378924e-05, "loss": 0.0892, "step": 22993 }, { "epoch": 0.4072130944316788, "grad_norm": 0.46368223428726196, "learning_rate": 2.0130481811304872e-05, "loss": 0.0971, "step": 22994 }, { "epoch": 0.40723080396870726, "grad_norm": 0.5581966638565063, "learning_rate": 2.0129673328352055e-05, "loss": 0.0534, "step": 22995 }, { "epoch": 0.40724851350573565, "grad_norm": 0.6551519632339478, "learning_rate": 2.012886482852313e-05, "loss": 0.0831, "step": 22996 }, { "epoch": 0.4072662230427641, "grad_norm": 0.7254890203475952, "learning_rate": 2.0128056311820754e-05, "loss": 0.0777, "step": 22997 }, { "epoch": 0.4072839325797925, "grad_norm": 0.9012649059295654, "learning_rate": 2.0127247778247602e-05, "loss": 0.0869, "step": 22998 }, { "epoch": 0.40730164211682096, "grad_norm": 0.6538971662521362, "learning_rate": 2.0126439227806314e-05, "loss": 0.0982, "step": 22999 }, { "epoch": 0.4073193516538494, "grad_norm": 0.9933077096939087, "learning_rate": 2.0125630660499566e-05, "loss": 0.0862, "step": 23000 }, { "epoch": 0.4073370611908778, "grad_norm": 0.755977213382721, "learning_rate": 2.0124822076330006e-05, "loss": 0.0851, "step": 23001 }, { "epoch": 0.40735477072790627, "grad_norm": 0.6978679299354553, "learning_rate": 2.0124013475300302e-05, "loss": 0.0739, "step": 23002 }, { "epoch": 0.40737248026493467, "grad_norm": 0.6648557186126709, "learning_rate": 2.0123204857413107e-05, "loss": 0.0787, "step": 23003 }, { "epoch": 0.4073901898019631, "grad_norm": 0.6093356609344482, "learning_rate": 2.0122396222671096e-05, "loss": 0.0814, "step": 23004 }, { "epoch": 0.4074078993389915, "grad_norm": 1.1186964511871338, "learning_rate": 2.0121587571076916e-05, "loss": 0.12, "step": 23005 }, { "epoch": 0.40742560887602, "grad_norm": 0.6425116658210754, "learning_rate": 2.012077890263323e-05, "loss": 0.0909, "step": 23006 }, { "epoch": 0.40744331841304837, "grad_norm": 0.4609769582748413, "learning_rate": 2.01199702173427e-05, "loss": 0.0645, "step": 23007 }, { "epoch": 0.4074610279500768, "grad_norm": 0.7120566368103027, "learning_rate": 2.0119161515207982e-05, "loss": 0.0767, "step": 23008 }, { "epoch": 0.4074787374871052, "grad_norm": 1.149811863899231, "learning_rate": 2.0118352796231745e-05, "loss": 0.128, "step": 23009 }, { "epoch": 0.4074964470241337, "grad_norm": 0.859058678150177, "learning_rate": 2.0117544060416643e-05, "loss": 0.0851, "step": 23010 }, { "epoch": 0.4075141565611621, "grad_norm": 0.5992390513420105, "learning_rate": 2.0116735307765334e-05, "loss": 0.0826, "step": 23011 }, { "epoch": 0.40753186609819053, "grad_norm": 0.7431758642196655, "learning_rate": 2.011592653828049e-05, "loss": 0.0761, "step": 23012 }, { "epoch": 0.40754957563521893, "grad_norm": 0.6894372701644897, "learning_rate": 2.0115117751964764e-05, "loss": 0.0784, "step": 23013 }, { "epoch": 0.4075672851722474, "grad_norm": 0.48851218819618225, "learning_rate": 2.0114308948820822e-05, "loss": 0.0644, "step": 23014 }, { "epoch": 0.40758499470927584, "grad_norm": 0.4459410607814789, "learning_rate": 2.011350012885131e-05, "loss": 0.08, "step": 23015 }, { "epoch": 0.40760270424630424, "grad_norm": 0.725519061088562, "learning_rate": 2.011269129205891e-05, "loss": 0.051, "step": 23016 }, { "epoch": 0.4076204137833327, "grad_norm": 0.9679292440414429, "learning_rate": 2.0111882438446267e-05, "loss": 0.0887, "step": 23017 }, { "epoch": 0.4076381233203611, "grad_norm": 1.2594000101089478, "learning_rate": 2.011107356801605e-05, "loss": 0.0615, "step": 23018 }, { "epoch": 0.40765583285738954, "grad_norm": 0.45296305418014526, "learning_rate": 2.0110264680770918e-05, "loss": 0.0532, "step": 23019 }, { "epoch": 0.40767354239441794, "grad_norm": 0.7262109518051147, "learning_rate": 2.010945577671353e-05, "loss": 0.0716, "step": 23020 }, { "epoch": 0.4076912519314464, "grad_norm": 1.1150656938552856, "learning_rate": 2.0108646855846554e-05, "loss": 0.1041, "step": 23021 }, { "epoch": 0.4077089614684748, "grad_norm": 0.8878383040428162, "learning_rate": 2.010783791817264e-05, "loss": 0.1005, "step": 23022 }, { "epoch": 0.40772667100550325, "grad_norm": 0.9762131571769714, "learning_rate": 2.0107028963694465e-05, "loss": 0.0895, "step": 23023 }, { "epoch": 0.40774438054253165, "grad_norm": 0.9807379841804504, "learning_rate": 2.0106219992414673e-05, "loss": 0.0956, "step": 23024 }, { "epoch": 0.4077620900795601, "grad_norm": 0.898948073387146, "learning_rate": 2.0105411004335937e-05, "loss": 0.1077, "step": 23025 }, { "epoch": 0.4077797996165885, "grad_norm": 0.45808538794517517, "learning_rate": 2.010460199946091e-05, "loss": 0.051, "step": 23026 }, { "epoch": 0.40779750915361695, "grad_norm": 0.6086233854293823, "learning_rate": 2.0103792977792266e-05, "loss": 0.0725, "step": 23027 }, { "epoch": 0.40781521869064535, "grad_norm": 0.5784538984298706, "learning_rate": 2.0102983939332654e-05, "loss": 0.079, "step": 23028 }, { "epoch": 0.4078329282276738, "grad_norm": 0.7385066151618958, "learning_rate": 2.0102174884084744e-05, "loss": 0.0815, "step": 23029 }, { "epoch": 0.40785063776470226, "grad_norm": 0.5690834522247314, "learning_rate": 2.0101365812051196e-05, "loss": 0.0639, "step": 23030 }, { "epoch": 0.40786834730173066, "grad_norm": 0.6984382271766663, "learning_rate": 2.010055672323467e-05, "loss": 0.0783, "step": 23031 }, { "epoch": 0.4078860568387591, "grad_norm": 0.8637034296989441, "learning_rate": 2.0099747617637826e-05, "loss": 0.1225, "step": 23032 }, { "epoch": 0.4079037663757875, "grad_norm": 0.7930966019630432, "learning_rate": 2.0098938495263328e-05, "loss": 0.0747, "step": 23033 }, { "epoch": 0.40792147591281597, "grad_norm": 0.8590693473815918, "learning_rate": 2.009812935611384e-05, "loss": 0.0581, "step": 23034 }, { "epoch": 0.40793918544984437, "grad_norm": 0.998278021812439, "learning_rate": 2.0097320200192015e-05, "loss": 0.0739, "step": 23035 }, { "epoch": 0.4079568949868728, "grad_norm": 0.5046176910400391, "learning_rate": 2.009651102750053e-05, "loss": 0.0821, "step": 23036 }, { "epoch": 0.4079746045239012, "grad_norm": 0.5806110501289368, "learning_rate": 2.0095701838042042e-05, "loss": 0.0607, "step": 23037 }, { "epoch": 0.4079923140609297, "grad_norm": 0.7430938482284546, "learning_rate": 2.0094892631819207e-05, "loss": 0.0641, "step": 23038 }, { "epoch": 0.40801002359795807, "grad_norm": 0.6736186146736145, "learning_rate": 2.0094083408834684e-05, "loss": 0.0698, "step": 23039 }, { "epoch": 0.4080277331349865, "grad_norm": 0.5814420580863953, "learning_rate": 2.009327416909115e-05, "loss": 0.0632, "step": 23040 }, { "epoch": 0.4080454426720149, "grad_norm": 0.4204719066619873, "learning_rate": 2.0092464912591257e-05, "loss": 0.0845, "step": 23041 }, { "epoch": 0.4080631522090434, "grad_norm": 0.618980348110199, "learning_rate": 2.0091655639337666e-05, "loss": 0.078, "step": 23042 }, { "epoch": 0.4080808617460718, "grad_norm": 0.8128350377082825, "learning_rate": 2.009084634933305e-05, "loss": 0.0937, "step": 23043 }, { "epoch": 0.40809857128310023, "grad_norm": 0.9121240377426147, "learning_rate": 2.0090037042580055e-05, "loss": 0.0728, "step": 23044 }, { "epoch": 0.4081162808201287, "grad_norm": 0.7916960120201111, "learning_rate": 2.0089227719081358e-05, "loss": 0.0627, "step": 23045 }, { "epoch": 0.4081339903571571, "grad_norm": 0.69870525598526, "learning_rate": 2.008841837883962e-05, "loss": 0.0475, "step": 23046 }, { "epoch": 0.40815169989418554, "grad_norm": 0.7344028949737549, "learning_rate": 2.0087609021857497e-05, "loss": 0.0838, "step": 23047 }, { "epoch": 0.40816940943121394, "grad_norm": 0.9184672832489014, "learning_rate": 2.0086799648137654e-05, "loss": 0.0806, "step": 23048 }, { "epoch": 0.4081871189682424, "grad_norm": 0.7690860033035278, "learning_rate": 2.0085990257682754e-05, "loss": 0.0654, "step": 23049 }, { "epoch": 0.4082048285052708, "grad_norm": 0.7731207609176636, "learning_rate": 2.0085180850495463e-05, "loss": 0.0732, "step": 23050 }, { "epoch": 0.40822253804229924, "grad_norm": 0.4074666500091553, "learning_rate": 2.008437142657844e-05, "loss": 0.0456, "step": 23051 }, { "epoch": 0.40824024757932764, "grad_norm": 0.38856935501098633, "learning_rate": 2.008356198593435e-05, "loss": 0.0617, "step": 23052 }, { "epoch": 0.4082579571163561, "grad_norm": 0.6425116658210754, "learning_rate": 2.0082752528565854e-05, "loss": 0.0789, "step": 23053 }, { "epoch": 0.4082756666533845, "grad_norm": 1.1423275470733643, "learning_rate": 2.008194305447562e-05, "loss": 0.067, "step": 23054 }, { "epoch": 0.40829337619041295, "grad_norm": 0.5072335600852966, "learning_rate": 2.00811335636663e-05, "loss": 0.1075, "step": 23055 }, { "epoch": 0.40831108572744135, "grad_norm": 0.6199526190757751, "learning_rate": 2.008032405614057e-05, "loss": 0.0849, "step": 23056 }, { "epoch": 0.4083287952644698, "grad_norm": 0.8547672033309937, "learning_rate": 2.007951453190109e-05, "loss": 0.0895, "step": 23057 }, { "epoch": 0.4083465048014982, "grad_norm": 0.6541351675987244, "learning_rate": 2.0078704990950515e-05, "loss": 0.0838, "step": 23058 }, { "epoch": 0.40836421433852665, "grad_norm": 1.0438992977142334, "learning_rate": 2.0077895433291514e-05, "loss": 0.0804, "step": 23059 }, { "epoch": 0.4083819238755551, "grad_norm": 1.2817351818084717, "learning_rate": 2.0077085858926757e-05, "loss": 0.0781, "step": 23060 }, { "epoch": 0.4083996334125835, "grad_norm": 0.8455130457878113, "learning_rate": 2.0076276267858896e-05, "loss": 0.0744, "step": 23061 }, { "epoch": 0.40841734294961196, "grad_norm": 0.587702214717865, "learning_rate": 2.0075466660090606e-05, "loss": 0.0696, "step": 23062 }, { "epoch": 0.40843505248664036, "grad_norm": 0.6683230996131897, "learning_rate": 2.007465703562453e-05, "loss": 0.0614, "step": 23063 }, { "epoch": 0.4084527620236688, "grad_norm": 0.6076823472976685, "learning_rate": 2.007384739446336e-05, "loss": 0.0755, "step": 23064 }, { "epoch": 0.4084704715606972, "grad_norm": 0.7508801221847534, "learning_rate": 2.0073037736609738e-05, "loss": 0.0409, "step": 23065 }, { "epoch": 0.40848818109772567, "grad_norm": 0.6771113276481628, "learning_rate": 2.007222806206634e-05, "loss": 0.0721, "step": 23066 }, { "epoch": 0.40850589063475407, "grad_norm": 0.781663179397583, "learning_rate": 2.0071418370835817e-05, "loss": 0.0951, "step": 23067 }, { "epoch": 0.4085236001717825, "grad_norm": 0.5068305730819702, "learning_rate": 2.0070608662920845e-05, "loss": 0.079, "step": 23068 }, { "epoch": 0.4085413097088109, "grad_norm": 0.6460627317428589, "learning_rate": 2.0069798938324088e-05, "loss": 0.091, "step": 23069 }, { "epoch": 0.40855901924583937, "grad_norm": 0.5077702403068542, "learning_rate": 2.00689891970482e-05, "loss": 0.0644, "step": 23070 }, { "epoch": 0.40857672878286777, "grad_norm": 0.45273417234420776, "learning_rate": 2.006817943909585e-05, "loss": 0.0736, "step": 23071 }, { "epoch": 0.4085944383198962, "grad_norm": 0.9522786140441895, "learning_rate": 2.0067369664469702e-05, "loss": 0.108, "step": 23072 }, { "epoch": 0.4086121478569246, "grad_norm": 1.1264911890029907, "learning_rate": 2.006655987317242e-05, "loss": 0.0864, "step": 23073 }, { "epoch": 0.4086298573939531, "grad_norm": 0.8231020569801331, "learning_rate": 2.006575006520667e-05, "loss": 0.0825, "step": 23074 }, { "epoch": 0.40864756693098153, "grad_norm": 0.5384263396263123, "learning_rate": 2.006494024057512e-05, "loss": 0.0662, "step": 23075 }, { "epoch": 0.40866527646800993, "grad_norm": 0.9496863484382629, "learning_rate": 2.0064130399280424e-05, "loss": 0.091, "step": 23076 }, { "epoch": 0.4086829860050384, "grad_norm": 1.2416468858718872, "learning_rate": 2.0063320541325244e-05, "loss": 0.1189, "step": 23077 }, { "epoch": 0.4087006955420668, "grad_norm": 0.9821388125419617, "learning_rate": 2.006251066671226e-05, "loss": 0.0953, "step": 23078 }, { "epoch": 0.40871840507909524, "grad_norm": 0.9469196200370789, "learning_rate": 2.0061700775444126e-05, "loss": 0.0561, "step": 23079 }, { "epoch": 0.40873611461612364, "grad_norm": 0.7298834323883057, "learning_rate": 2.006089086752351e-05, "loss": 0.0903, "step": 23080 }, { "epoch": 0.4087538241531521, "grad_norm": 1.0388801097869873, "learning_rate": 2.0060080942953072e-05, "loss": 0.1141, "step": 23081 }, { "epoch": 0.4087715336901805, "grad_norm": 0.6635755896568298, "learning_rate": 2.0059271001735483e-05, "loss": 0.0807, "step": 23082 }, { "epoch": 0.40878924322720894, "grad_norm": 1.0814495086669922, "learning_rate": 2.0058461043873397e-05, "loss": 0.1091, "step": 23083 }, { "epoch": 0.40880695276423734, "grad_norm": 0.6642794013023376, "learning_rate": 2.0057651069369493e-05, "loss": 0.0854, "step": 23084 }, { "epoch": 0.4088246623012658, "grad_norm": 0.9494882225990295, "learning_rate": 2.005684107822643e-05, "loss": 0.1072, "step": 23085 }, { "epoch": 0.4088423718382942, "grad_norm": 0.6896620988845825, "learning_rate": 2.0056031070446867e-05, "loss": 0.0671, "step": 23086 }, { "epoch": 0.40886008137532265, "grad_norm": 1.2005547285079956, "learning_rate": 2.0055221046033474e-05, "loss": 0.1015, "step": 23087 }, { "epoch": 0.40887779091235105, "grad_norm": 0.8973137736320496, "learning_rate": 2.005441100498891e-05, "loss": 0.0729, "step": 23088 }, { "epoch": 0.4088955004493795, "grad_norm": 0.7616205811500549, "learning_rate": 2.0053600947315852e-05, "loss": 0.0663, "step": 23089 }, { "epoch": 0.40891320998640796, "grad_norm": 1.0846662521362305, "learning_rate": 2.0052790873016955e-05, "loss": 0.0662, "step": 23090 }, { "epoch": 0.40893091952343635, "grad_norm": 0.7656596302986145, "learning_rate": 2.0051980782094894e-05, "loss": 0.0692, "step": 23091 }, { "epoch": 0.4089486290604648, "grad_norm": 0.7163341641426086, "learning_rate": 2.005117067455232e-05, "loss": 0.0693, "step": 23092 }, { "epoch": 0.4089663385974932, "grad_norm": 0.8086057901382446, "learning_rate": 2.0050360550391907e-05, "loss": 0.0778, "step": 23093 }, { "epoch": 0.40898404813452166, "grad_norm": 0.4472181797027588, "learning_rate": 2.004955040961632e-05, "loss": 0.042, "step": 23094 }, { "epoch": 0.40900175767155006, "grad_norm": 0.5920597314834595, "learning_rate": 2.004874025222822e-05, "loss": 0.0671, "step": 23095 }, { "epoch": 0.4090194672085785, "grad_norm": 0.7045744061470032, "learning_rate": 2.0047930078230273e-05, "loss": 0.0906, "step": 23096 }, { "epoch": 0.4090371767456069, "grad_norm": 0.5628425478935242, "learning_rate": 2.004711988762515e-05, "loss": 0.0954, "step": 23097 }, { "epoch": 0.40905488628263537, "grad_norm": 0.7250984907150269, "learning_rate": 2.0046309680415515e-05, "loss": 0.0807, "step": 23098 }, { "epoch": 0.40907259581966376, "grad_norm": 0.7741914987564087, "learning_rate": 2.0045499456604027e-05, "loss": 0.0947, "step": 23099 }, { "epoch": 0.4090903053566922, "grad_norm": 1.08157217502594, "learning_rate": 2.004468921619336e-05, "loss": 0.0874, "step": 23100 }, { "epoch": 0.4091080148937206, "grad_norm": 0.9584625959396362, "learning_rate": 2.0043878959186177e-05, "loss": 0.1032, "step": 23101 }, { "epoch": 0.40912572443074907, "grad_norm": 1.0410962104797363, "learning_rate": 2.0043068685585137e-05, "loss": 0.0966, "step": 23102 }, { "epoch": 0.40914343396777747, "grad_norm": 0.7322121262550354, "learning_rate": 2.0042258395392914e-05, "loss": 0.0708, "step": 23103 }, { "epoch": 0.4091611435048059, "grad_norm": 0.9072728157043457, "learning_rate": 2.0041448088612174e-05, "loss": 0.0702, "step": 23104 }, { "epoch": 0.4091788530418344, "grad_norm": 0.617889940738678, "learning_rate": 2.0040637765245577e-05, "loss": 0.0829, "step": 23105 }, { "epoch": 0.4091965625788628, "grad_norm": 0.4471270442008972, "learning_rate": 2.0039827425295786e-05, "loss": 0.0759, "step": 23106 }, { "epoch": 0.40921427211589123, "grad_norm": 1.0042123794555664, "learning_rate": 2.003901706876548e-05, "loss": 0.1149, "step": 23107 }, { "epoch": 0.40923198165291963, "grad_norm": 1.1977430582046509, "learning_rate": 2.0038206695657315e-05, "loss": 0.0631, "step": 23108 }, { "epoch": 0.4092496911899481, "grad_norm": 0.6943140029907227, "learning_rate": 2.003739630597396e-05, "loss": 0.0714, "step": 23109 }, { "epoch": 0.4092674007269765, "grad_norm": 0.700445294380188, "learning_rate": 2.003658589971808e-05, "loss": 0.0893, "step": 23110 }, { "epoch": 0.40928511026400494, "grad_norm": 0.9837494492530823, "learning_rate": 2.003577547689234e-05, "loss": 0.1051, "step": 23111 }, { "epoch": 0.40930281980103334, "grad_norm": 0.7966005206108093, "learning_rate": 2.003496503749941e-05, "loss": 0.0626, "step": 23112 }, { "epoch": 0.4093205293380618, "grad_norm": 0.7505972981452942, "learning_rate": 2.0034154581541955e-05, "loss": 0.1098, "step": 23113 }, { "epoch": 0.4093382388750902, "grad_norm": 0.8047946691513062, "learning_rate": 2.003334410902264e-05, "loss": 0.0729, "step": 23114 }, { "epoch": 0.40935594841211864, "grad_norm": 0.8193312287330627, "learning_rate": 2.0032533619944134e-05, "loss": 0.0605, "step": 23115 }, { "epoch": 0.40937365794914704, "grad_norm": 0.4552202522754669, "learning_rate": 2.0031723114309098e-05, "loss": 0.0596, "step": 23116 }, { "epoch": 0.4093913674861755, "grad_norm": 0.6864802241325378, "learning_rate": 2.0030912592120206e-05, "loss": 0.0728, "step": 23117 }, { "epoch": 0.4094090770232039, "grad_norm": 0.9124594330787659, "learning_rate": 2.0030102053380117e-05, "loss": 0.0784, "step": 23118 }, { "epoch": 0.40942678656023235, "grad_norm": 0.7124873399734497, "learning_rate": 2.0029291498091504e-05, "loss": 0.0739, "step": 23119 }, { "epoch": 0.4094444960972608, "grad_norm": 0.4610695242881775, "learning_rate": 2.0028480926257025e-05, "loss": 0.0703, "step": 23120 }, { "epoch": 0.4094622056342892, "grad_norm": 1.1255048513412476, "learning_rate": 2.0027670337879358e-05, "loss": 0.0967, "step": 23121 }, { "epoch": 0.40947991517131765, "grad_norm": 0.9730676412582397, "learning_rate": 2.0026859732961158e-05, "loss": 0.0908, "step": 23122 }, { "epoch": 0.40949762470834605, "grad_norm": 0.8177198767662048, "learning_rate": 2.0026049111505103e-05, "loss": 0.085, "step": 23123 }, { "epoch": 0.4095153342453745, "grad_norm": 0.5866222381591797, "learning_rate": 2.0025238473513857e-05, "loss": 0.0833, "step": 23124 }, { "epoch": 0.4095330437824029, "grad_norm": 0.5433495044708252, "learning_rate": 2.0024427818990076e-05, "loss": 0.064, "step": 23125 }, { "epoch": 0.40955075331943136, "grad_norm": 0.46149900555610657, "learning_rate": 2.0023617147936445e-05, "loss": 0.0777, "step": 23126 }, { "epoch": 0.40956846285645976, "grad_norm": 0.6150116324424744, "learning_rate": 2.0022806460355618e-05, "loss": 0.1007, "step": 23127 }, { "epoch": 0.4095861723934882, "grad_norm": 0.6359500288963318, "learning_rate": 2.0021995756250265e-05, "loss": 0.0788, "step": 23128 }, { "epoch": 0.4096038819305166, "grad_norm": 0.55332350730896, "learning_rate": 2.0021185035623054e-05, "loss": 0.0723, "step": 23129 }, { "epoch": 0.40962159146754507, "grad_norm": 0.7214668393135071, "learning_rate": 2.002037429847665e-05, "loss": 0.0993, "step": 23130 }, { "epoch": 0.40963930100457346, "grad_norm": 1.1183379888534546, "learning_rate": 2.0019563544813726e-05, "loss": 0.1092, "step": 23131 }, { "epoch": 0.4096570105416019, "grad_norm": 0.5959768891334534, "learning_rate": 2.0018752774636947e-05, "loss": 0.1162, "step": 23132 }, { "epoch": 0.4096747200786303, "grad_norm": 0.6185164451599121, "learning_rate": 2.001794198794898e-05, "loss": 0.0494, "step": 23133 }, { "epoch": 0.40969242961565877, "grad_norm": 0.74452805519104, "learning_rate": 2.0017131184752482e-05, "loss": 0.1104, "step": 23134 }, { "epoch": 0.4097101391526872, "grad_norm": 0.744996964931488, "learning_rate": 2.0016320365050134e-05, "loss": 0.109, "step": 23135 }, { "epoch": 0.4097278486897156, "grad_norm": 0.6296323537826538, "learning_rate": 2.00155095288446e-05, "loss": 0.0982, "step": 23136 }, { "epoch": 0.4097455582267441, "grad_norm": 0.7341364026069641, "learning_rate": 2.0014698676138552e-05, "loss": 0.0638, "step": 23137 }, { "epoch": 0.4097632677637725, "grad_norm": 0.47727787494659424, "learning_rate": 2.0013887806934647e-05, "loss": 0.0442, "step": 23138 }, { "epoch": 0.40978097730080093, "grad_norm": 0.8331189751625061, "learning_rate": 2.0013076921235555e-05, "loss": 0.0697, "step": 23139 }, { "epoch": 0.40979868683782933, "grad_norm": 0.2200002521276474, "learning_rate": 2.0012266019043955e-05, "loss": 0.1075, "step": 23140 }, { "epoch": 0.4098163963748578, "grad_norm": 0.4146588444709778, "learning_rate": 2.0011455100362505e-05, "loss": 0.0371, "step": 23141 }, { "epoch": 0.4098341059118862, "grad_norm": 0.7277596592903137, "learning_rate": 2.0010644165193873e-05, "loss": 0.0707, "step": 23142 }, { "epoch": 0.40985181544891464, "grad_norm": 1.1522784233093262, "learning_rate": 2.000983321354073e-05, "loss": 0.11, "step": 23143 }, { "epoch": 0.40986952498594303, "grad_norm": 0.5220716595649719, "learning_rate": 2.0009022245405738e-05, "loss": 0.0807, "step": 23144 }, { "epoch": 0.4098872345229715, "grad_norm": 0.5653984546661377, "learning_rate": 2.000821126079157e-05, "loss": 0.1122, "step": 23145 }, { "epoch": 0.4099049440599999, "grad_norm": 0.8062781691551208, "learning_rate": 2.00074002597009e-05, "loss": 0.0924, "step": 23146 }, { "epoch": 0.40992265359702834, "grad_norm": 0.5509577989578247, "learning_rate": 2.000658924213638e-05, "loss": 0.083, "step": 23147 }, { "epoch": 0.4099403631340568, "grad_norm": 0.8586263656616211, "learning_rate": 2.00057782081007e-05, "loss": 0.1158, "step": 23148 }, { "epoch": 0.4099580726710852, "grad_norm": 0.7095521092414856, "learning_rate": 2.0004967157596506e-05, "loss": 0.0997, "step": 23149 }, { "epoch": 0.40997578220811365, "grad_norm": 0.3527519404888153, "learning_rate": 2.000415609062648e-05, "loss": 0.0805, "step": 23150 }, { "epoch": 0.40999349174514205, "grad_norm": 0.5123306512832642, "learning_rate": 2.0003345007193287e-05, "loss": 0.0741, "step": 23151 }, { "epoch": 0.4100112012821705, "grad_norm": 0.7651352286338806, "learning_rate": 2.0002533907299593e-05, "loss": 0.095, "step": 23152 }, { "epoch": 0.4100289108191989, "grad_norm": 0.3627704679965973, "learning_rate": 2.0001722790948072e-05, "loss": 0.1014, "step": 23153 }, { "epoch": 0.41004662035622735, "grad_norm": 0.6751750111579895, "learning_rate": 2.000091165814138e-05, "loss": 0.0965, "step": 23154 }, { "epoch": 0.41006432989325575, "grad_norm": 0.7393704652786255, "learning_rate": 2.0000100508882208e-05, "loss": 0.0763, "step": 23155 }, { "epoch": 0.4100820394302842, "grad_norm": 0.7113251686096191, "learning_rate": 1.9999289343173203e-05, "loss": 0.0845, "step": 23156 }, { "epoch": 0.4100997489673126, "grad_norm": 0.791881263256073, "learning_rate": 1.9998478161017045e-05, "loss": 0.1075, "step": 23157 }, { "epoch": 0.41011745850434106, "grad_norm": 0.559004545211792, "learning_rate": 1.9997666962416392e-05, "loss": 0.0886, "step": 23158 }, { "epoch": 0.41013516804136946, "grad_norm": 0.8571139574050903, "learning_rate": 1.9996855747373926e-05, "loss": 0.0587, "step": 23159 }, { "epoch": 0.4101528775783979, "grad_norm": 0.671245276927948, "learning_rate": 1.9996044515892308e-05, "loss": 0.0717, "step": 23160 }, { "epoch": 0.4101705871154263, "grad_norm": 1.0387591123580933, "learning_rate": 1.9995233267974212e-05, "loss": 0.0944, "step": 23161 }, { "epoch": 0.41018829665245476, "grad_norm": 1.19802987575531, "learning_rate": 1.9994422003622302e-05, "loss": 0.0826, "step": 23162 }, { "epoch": 0.4102060061894832, "grad_norm": 0.9255942702293396, "learning_rate": 1.999361072283925e-05, "loss": 0.1144, "step": 23163 }, { "epoch": 0.4102237157265116, "grad_norm": 1.458331823348999, "learning_rate": 1.999279942562772e-05, "loss": 0.1001, "step": 23164 }, { "epoch": 0.41024142526354007, "grad_norm": 0.7237004637718201, "learning_rate": 1.999198811199039e-05, "loss": 0.0862, "step": 23165 }, { "epoch": 0.41025913480056847, "grad_norm": 0.7375348210334778, "learning_rate": 1.9991176781929917e-05, "loss": 0.0993, "step": 23166 }, { "epoch": 0.4102768443375969, "grad_norm": 0.8154007792472839, "learning_rate": 1.9990365435448988e-05, "loss": 0.0817, "step": 23167 }, { "epoch": 0.4102945538746253, "grad_norm": 0.7490528225898743, "learning_rate": 1.9989554072550252e-05, "loss": 0.1131, "step": 23168 }, { "epoch": 0.4103122634116538, "grad_norm": 0.7461866736412048, "learning_rate": 1.998874269323639e-05, "loss": 0.0835, "step": 23169 }, { "epoch": 0.4103299729486822, "grad_norm": 0.9540725350379944, "learning_rate": 1.998793129751007e-05, "loss": 0.1158, "step": 23170 }, { "epoch": 0.41034768248571063, "grad_norm": 0.3801586627960205, "learning_rate": 1.9987119885373964e-05, "loss": 0.0714, "step": 23171 }, { "epoch": 0.41036539202273903, "grad_norm": 0.9541348814964294, "learning_rate": 1.998630845683073e-05, "loss": 0.1007, "step": 23172 }, { "epoch": 0.4103831015597675, "grad_norm": 0.46086397767066956, "learning_rate": 1.998549701188305e-05, "loss": 0.068, "step": 23173 }, { "epoch": 0.4104008110967959, "grad_norm": 0.8051877617835999, "learning_rate": 1.998468555053359e-05, "loss": 0.0885, "step": 23174 }, { "epoch": 0.41041852063382434, "grad_norm": 0.46007490158081055, "learning_rate": 1.998387407278502e-05, "loss": 0.0702, "step": 23175 }, { "epoch": 0.41043623017085273, "grad_norm": 0.4433250427246094, "learning_rate": 1.998306257864001e-05, "loss": 0.0643, "step": 23176 }, { "epoch": 0.4104539397078812, "grad_norm": 0.9216062426567078, "learning_rate": 1.998225106810122e-05, "loss": 0.0874, "step": 23177 }, { "epoch": 0.41047164924490964, "grad_norm": 0.8101752400398254, "learning_rate": 1.998143954117133e-05, "loss": 0.1067, "step": 23178 }, { "epoch": 0.41048935878193804, "grad_norm": 0.5890591144561768, "learning_rate": 1.9980627997853012e-05, "loss": 0.0931, "step": 23179 }, { "epoch": 0.4105070683189665, "grad_norm": 0.7000582218170166, "learning_rate": 1.997981643814893e-05, "loss": 0.0728, "step": 23180 }, { "epoch": 0.4105247778559949, "grad_norm": 1.1887128353118896, "learning_rate": 1.997900486206176e-05, "loss": 0.1276, "step": 23181 }, { "epoch": 0.41054248739302335, "grad_norm": 0.7091704607009888, "learning_rate": 1.997819326959416e-05, "loss": 0.0821, "step": 23182 }, { "epoch": 0.41056019693005175, "grad_norm": 0.8275741934776306, "learning_rate": 1.9977381660748807e-05, "loss": 0.0864, "step": 23183 }, { "epoch": 0.4105779064670802, "grad_norm": 0.5208624601364136, "learning_rate": 1.9976570035528376e-05, "loss": 0.082, "step": 23184 }, { "epoch": 0.4105956160041086, "grad_norm": 0.4191149175167084, "learning_rate": 1.9975758393935533e-05, "loss": 0.0519, "step": 23185 }, { "epoch": 0.41061332554113705, "grad_norm": 0.783248245716095, "learning_rate": 1.9974946735972945e-05, "loss": 0.0486, "step": 23186 }, { "epoch": 0.41063103507816545, "grad_norm": 0.5898186564445496, "learning_rate": 1.997413506164329e-05, "loss": 0.0883, "step": 23187 }, { "epoch": 0.4106487446151939, "grad_norm": 0.8192641139030457, "learning_rate": 1.997332337094923e-05, "loss": 0.114, "step": 23188 }, { "epoch": 0.4106664541522223, "grad_norm": 0.6461530923843384, "learning_rate": 1.997251166389344e-05, "loss": 0.0847, "step": 23189 }, { "epoch": 0.41068416368925076, "grad_norm": 2.4981942176818848, "learning_rate": 1.9971699940478592e-05, "loss": 0.0854, "step": 23190 }, { "epoch": 0.41070187322627916, "grad_norm": 0.5309120416641235, "learning_rate": 1.9970888200707353e-05, "loss": 0.1026, "step": 23191 }, { "epoch": 0.4107195827633076, "grad_norm": 1.2996152639389038, "learning_rate": 1.997007644458239e-05, "loss": 0.1045, "step": 23192 }, { "epoch": 0.41073729230033607, "grad_norm": 0.9219275116920471, "learning_rate": 1.9969264672106383e-05, "loss": 0.1324, "step": 23193 }, { "epoch": 0.41075500183736446, "grad_norm": 0.5492362976074219, "learning_rate": 1.9968452883282e-05, "loss": 0.0908, "step": 23194 }, { "epoch": 0.4107727113743929, "grad_norm": 0.6179863810539246, "learning_rate": 1.996764107811191e-05, "loss": 0.0658, "step": 23195 }, { "epoch": 0.4107904209114213, "grad_norm": 0.4893108010292053, "learning_rate": 1.9966829256598775e-05, "loss": 0.0902, "step": 23196 }, { "epoch": 0.41080813044844977, "grad_norm": 0.6749781966209412, "learning_rate": 1.996601741874528e-05, "loss": 0.0842, "step": 23197 }, { "epoch": 0.41082583998547817, "grad_norm": 1.150749683380127, "learning_rate": 1.9965205564554087e-05, "loss": 0.1181, "step": 23198 }, { "epoch": 0.4108435495225066, "grad_norm": 0.7350566387176514, "learning_rate": 1.9964393694027872e-05, "loss": 0.0613, "step": 23199 }, { "epoch": 0.410861259059535, "grad_norm": 0.7624791860580444, "learning_rate": 1.99635818071693e-05, "loss": 0.1014, "step": 23200 }, { "epoch": 0.4108789685965635, "grad_norm": 0.5088481903076172, "learning_rate": 1.9962769903981053e-05, "loss": 0.0879, "step": 23201 }, { "epoch": 0.4108966781335919, "grad_norm": 0.5871715545654297, "learning_rate": 1.996195798446579e-05, "loss": 0.0537, "step": 23202 }, { "epoch": 0.41091438767062033, "grad_norm": 1.2092210054397583, "learning_rate": 1.996114604862619e-05, "loss": 0.1023, "step": 23203 }, { "epoch": 0.4109320972076487, "grad_norm": 1.045738935470581, "learning_rate": 1.9960334096464922e-05, "loss": 0.0936, "step": 23204 }, { "epoch": 0.4109498067446772, "grad_norm": 0.4881857931613922, "learning_rate": 1.9959522127984653e-05, "loss": 0.1023, "step": 23205 }, { "epoch": 0.4109675162817056, "grad_norm": 0.6576319336891174, "learning_rate": 1.9958710143188057e-05, "loss": 0.0575, "step": 23206 }, { "epoch": 0.41098522581873403, "grad_norm": 0.9115997552871704, "learning_rate": 1.995789814207781e-05, "loss": 0.0905, "step": 23207 }, { "epoch": 0.4110029353557625, "grad_norm": 0.4999978244304657, "learning_rate": 1.9957086124656576e-05, "loss": 0.0825, "step": 23208 }, { "epoch": 0.4110206448927909, "grad_norm": 0.34957364201545715, "learning_rate": 1.995627409092703e-05, "loss": 0.0946, "step": 23209 }, { "epoch": 0.41103835442981934, "grad_norm": 0.933971107006073, "learning_rate": 1.9955462040891852e-05, "loss": 0.0931, "step": 23210 }, { "epoch": 0.41105606396684774, "grad_norm": 0.5161330699920654, "learning_rate": 1.9954649974553698e-05, "loss": 0.066, "step": 23211 }, { "epoch": 0.4110737735038762, "grad_norm": 0.6021808981895447, "learning_rate": 1.9953837891915244e-05, "loss": 0.069, "step": 23212 }, { "epoch": 0.4110914830409046, "grad_norm": 0.5062015652656555, "learning_rate": 1.995302579297917e-05, "loss": 0.075, "step": 23213 }, { "epoch": 0.41110919257793305, "grad_norm": 0.7714529037475586, "learning_rate": 1.9952213677748138e-05, "loss": 0.0615, "step": 23214 }, { "epoch": 0.41112690211496145, "grad_norm": 0.44280025362968445, "learning_rate": 1.995140154622483e-05, "loss": 0.0401, "step": 23215 }, { "epoch": 0.4111446116519899, "grad_norm": 0.7567553520202637, "learning_rate": 1.9950589398411903e-05, "loss": 0.0719, "step": 23216 }, { "epoch": 0.4111623211890183, "grad_norm": 0.8741481304168701, "learning_rate": 1.994977723431204e-05, "loss": 0.1067, "step": 23217 }, { "epoch": 0.41118003072604675, "grad_norm": 0.7247104048728943, "learning_rate": 1.9948965053927914e-05, "loss": 0.0785, "step": 23218 }, { "epoch": 0.41119774026307515, "grad_norm": 0.8246129155158997, "learning_rate": 1.9948152857262193e-05, "loss": 0.0984, "step": 23219 }, { "epoch": 0.4112154498001036, "grad_norm": 0.7070779800415039, "learning_rate": 1.994734064431755e-05, "loss": 0.0608, "step": 23220 }, { "epoch": 0.411233159337132, "grad_norm": 0.4914900064468384, "learning_rate": 1.9946528415096654e-05, "loss": 0.102, "step": 23221 }, { "epoch": 0.41125086887416046, "grad_norm": 0.5160431265830994, "learning_rate": 1.9945716169602185e-05, "loss": 0.085, "step": 23222 }, { "epoch": 0.4112685784111889, "grad_norm": 0.7508330345153809, "learning_rate": 1.9944903907836802e-05, "loss": 0.0749, "step": 23223 }, { "epoch": 0.4112862879482173, "grad_norm": 0.8812789916992188, "learning_rate": 1.994409162980319e-05, "loss": 0.0975, "step": 23224 }, { "epoch": 0.41130399748524576, "grad_norm": 0.4967309236526489, "learning_rate": 1.9943279335504016e-05, "loss": 0.0767, "step": 23225 }, { "epoch": 0.41132170702227416, "grad_norm": 1.1484215259552002, "learning_rate": 1.9942467024941953e-05, "loss": 0.0916, "step": 23226 }, { "epoch": 0.4113394165593026, "grad_norm": 0.7073158621788025, "learning_rate": 1.9941654698119673e-05, "loss": 0.0885, "step": 23227 }, { "epoch": 0.411357126096331, "grad_norm": 0.5808917284011841, "learning_rate": 1.994084235503985e-05, "loss": 0.0498, "step": 23228 }, { "epoch": 0.41137483563335947, "grad_norm": 0.8496379852294922, "learning_rate": 1.9940029995705154e-05, "loss": 0.1133, "step": 23229 }, { "epoch": 0.41139254517038787, "grad_norm": 0.6562903523445129, "learning_rate": 1.993921762011826e-05, "loss": 0.0952, "step": 23230 }, { "epoch": 0.4114102547074163, "grad_norm": 0.6222124099731445, "learning_rate": 1.993840522828184e-05, "loss": 0.0573, "step": 23231 }, { "epoch": 0.4114279642444447, "grad_norm": 0.5095108151435852, "learning_rate": 1.9937592820198562e-05, "loss": 0.0702, "step": 23232 }, { "epoch": 0.4114456737814732, "grad_norm": 0.6670632362365723, "learning_rate": 1.993678039587111e-05, "loss": 0.0778, "step": 23233 }, { "epoch": 0.4114633833185016, "grad_norm": 0.8913232088088989, "learning_rate": 1.9935967955302145e-05, "loss": 0.0697, "step": 23234 }, { "epoch": 0.41148109285553003, "grad_norm": 0.8125097751617432, "learning_rate": 1.9935155498494345e-05, "loss": 0.098, "step": 23235 }, { "epoch": 0.4114988023925584, "grad_norm": 0.6219647526741028, "learning_rate": 1.9934343025450386e-05, "loss": 0.0797, "step": 23236 }, { "epoch": 0.4115165119295869, "grad_norm": 0.43837204575538635, "learning_rate": 1.9933530536172933e-05, "loss": 0.1106, "step": 23237 }, { "epoch": 0.41153422146661534, "grad_norm": 0.8421443104743958, "learning_rate": 1.993271803066467e-05, "loss": 0.0806, "step": 23238 }, { "epoch": 0.41155193100364373, "grad_norm": 0.5322011113166809, "learning_rate": 1.9931905508928256e-05, "loss": 0.0722, "step": 23239 }, { "epoch": 0.4115696405406722, "grad_norm": 0.6558750867843628, "learning_rate": 1.9931092970966378e-05, "loss": 0.0912, "step": 23240 }, { "epoch": 0.4115873500777006, "grad_norm": 0.6221596598625183, "learning_rate": 1.9930280416781694e-05, "loss": 0.0625, "step": 23241 }, { "epoch": 0.41160505961472904, "grad_norm": 0.45518743991851807, "learning_rate": 1.9929467846376896e-05, "loss": 0.0644, "step": 23242 }, { "epoch": 0.41162276915175744, "grad_norm": 0.5491266846656799, "learning_rate": 1.9928655259754645e-05, "loss": 0.0858, "step": 23243 }, { "epoch": 0.4116404786887859, "grad_norm": 0.6247519254684448, "learning_rate": 1.9927842656917613e-05, "loss": 0.0914, "step": 23244 }, { "epoch": 0.4116581882258143, "grad_norm": 0.6275997161865234, "learning_rate": 1.992703003786848e-05, "loss": 0.0674, "step": 23245 }, { "epoch": 0.41167589776284275, "grad_norm": 0.6503551602363586, "learning_rate": 1.992621740260991e-05, "loss": 0.0555, "step": 23246 }, { "epoch": 0.41169360729987114, "grad_norm": 0.6341824531555176, "learning_rate": 1.9925404751144594e-05, "loss": 0.0632, "step": 23247 }, { "epoch": 0.4117113168368996, "grad_norm": 0.4932858943939209, "learning_rate": 1.992459208347519e-05, "loss": 0.0868, "step": 23248 }, { "epoch": 0.411729026373928, "grad_norm": 0.6047669053077698, "learning_rate": 1.9923779399604374e-05, "loss": 0.0582, "step": 23249 }, { "epoch": 0.41174673591095645, "grad_norm": 0.8535919785499573, "learning_rate": 1.9922966699534827e-05, "loss": 0.0811, "step": 23250 }, { "epoch": 0.41176444544798485, "grad_norm": 0.8065668344497681, "learning_rate": 1.992215398326921e-05, "loss": 0.1042, "step": 23251 }, { "epoch": 0.4117821549850133, "grad_norm": 0.3867082893848419, "learning_rate": 1.9921341250810207e-05, "loss": 0.1043, "step": 23252 }, { "epoch": 0.41179986452204176, "grad_norm": 0.6575846076011658, "learning_rate": 1.992052850216049e-05, "loss": 0.0629, "step": 23253 }, { "epoch": 0.41181757405907016, "grad_norm": 0.5607206225395203, "learning_rate": 1.9919715737322732e-05, "loss": 0.0696, "step": 23254 }, { "epoch": 0.4118352835960986, "grad_norm": 0.7584275007247925, "learning_rate": 1.9918902956299607e-05, "loss": 0.0877, "step": 23255 }, { "epoch": 0.411852993133127, "grad_norm": 0.4926973581314087, "learning_rate": 1.9918090159093788e-05, "loss": 0.0968, "step": 23256 }, { "epoch": 0.41187070267015546, "grad_norm": 0.4904899299144745, "learning_rate": 1.9917277345707953e-05, "loss": 0.063, "step": 23257 }, { "epoch": 0.41188841220718386, "grad_norm": 0.6856705546379089, "learning_rate": 1.9916464516144768e-05, "loss": 0.0639, "step": 23258 }, { "epoch": 0.4119061217442123, "grad_norm": 0.5453476905822754, "learning_rate": 1.9915651670406912e-05, "loss": 0.0741, "step": 23259 }, { "epoch": 0.4119238312812407, "grad_norm": 0.5979713797569275, "learning_rate": 1.991483880849706e-05, "loss": 0.0838, "step": 23260 }, { "epoch": 0.41194154081826917, "grad_norm": 0.434431791305542, "learning_rate": 1.991402593041789e-05, "loss": 0.0595, "step": 23261 }, { "epoch": 0.41195925035529757, "grad_norm": 0.7982280254364014, "learning_rate": 1.9913213036172068e-05, "loss": 0.0688, "step": 23262 }, { "epoch": 0.411976959892326, "grad_norm": 0.9648523330688477, "learning_rate": 1.991240012576227e-05, "loss": 0.0714, "step": 23263 }, { "epoch": 0.4119946694293544, "grad_norm": 0.3912932276725769, "learning_rate": 1.9911587199191177e-05, "loss": 0.0646, "step": 23264 }, { "epoch": 0.4120123789663829, "grad_norm": 0.6405133605003357, "learning_rate": 1.9910774256461456e-05, "loss": 0.0515, "step": 23265 }, { "epoch": 0.4120300885034113, "grad_norm": 0.586516261100769, "learning_rate": 1.9909961297575787e-05, "loss": 0.0697, "step": 23266 }, { "epoch": 0.41204779804043973, "grad_norm": 0.8702840209007263, "learning_rate": 1.9909148322536838e-05, "loss": 0.111, "step": 23267 }, { "epoch": 0.4120655075774682, "grad_norm": 0.6653152704238892, "learning_rate": 1.9908335331347288e-05, "loss": 0.0737, "step": 23268 }, { "epoch": 0.4120832171144966, "grad_norm": 0.5580484867095947, "learning_rate": 1.9907522324009813e-05, "loss": 0.1116, "step": 23269 }, { "epoch": 0.41210092665152503, "grad_norm": 0.8155533075332642, "learning_rate": 1.9906709300527088e-05, "loss": 0.108, "step": 23270 }, { "epoch": 0.41211863618855343, "grad_norm": 0.6299554705619812, "learning_rate": 1.9905896260901785e-05, "loss": 0.0831, "step": 23271 }, { "epoch": 0.4121363457255819, "grad_norm": 0.6374672055244446, "learning_rate": 1.990508320513658e-05, "loss": 0.1026, "step": 23272 }, { "epoch": 0.4121540552626103, "grad_norm": 0.665916919708252, "learning_rate": 1.9904270133234142e-05, "loss": 0.0839, "step": 23273 }, { "epoch": 0.41217176479963874, "grad_norm": 0.6130699515342712, "learning_rate": 1.9903457045197155e-05, "loss": 0.0914, "step": 23274 }, { "epoch": 0.41218947433666714, "grad_norm": 0.7474392056465149, "learning_rate": 1.9902643941028294e-05, "loss": 0.0857, "step": 23275 }, { "epoch": 0.4122071838736956, "grad_norm": 0.79267418384552, "learning_rate": 1.9901830820730225e-05, "loss": 0.052, "step": 23276 }, { "epoch": 0.412224893410724, "grad_norm": 0.6281483769416809, "learning_rate": 1.990101768430563e-05, "loss": 0.1162, "step": 23277 }, { "epoch": 0.41224260294775245, "grad_norm": 0.8425053358078003, "learning_rate": 1.9900204531757178e-05, "loss": 0.0898, "step": 23278 }, { "epoch": 0.41226031248478084, "grad_norm": 0.6372822523117065, "learning_rate": 1.9899391363087554e-05, "loss": 0.105, "step": 23279 }, { "epoch": 0.4122780220218093, "grad_norm": 0.31956738233566284, "learning_rate": 1.9898578178299425e-05, "loss": 0.0932, "step": 23280 }, { "epoch": 0.4122957315588377, "grad_norm": 0.5186510682106018, "learning_rate": 1.9897764977395478e-05, "loss": 0.075, "step": 23281 }, { "epoch": 0.41231344109586615, "grad_norm": 0.5027865767478943, "learning_rate": 1.989695176037837e-05, "loss": 0.0585, "step": 23282 }, { "epoch": 0.4123311506328946, "grad_norm": 0.9186686873435974, "learning_rate": 1.9896138527250784e-05, "loss": 0.0633, "step": 23283 }, { "epoch": 0.412348860169923, "grad_norm": 0.5955052375793457, "learning_rate": 1.9895325278015408e-05, "loss": 0.0856, "step": 23284 }, { "epoch": 0.41236656970695146, "grad_norm": 0.8585156798362732, "learning_rate": 1.98945120126749e-05, "loss": 0.0795, "step": 23285 }, { "epoch": 0.41238427924397986, "grad_norm": 0.9179673790931702, "learning_rate": 1.9893698731231946e-05, "loss": 0.1105, "step": 23286 }, { "epoch": 0.4124019887810083, "grad_norm": 0.825598418712616, "learning_rate": 1.9892885433689213e-05, "loss": 0.1018, "step": 23287 }, { "epoch": 0.4124196983180367, "grad_norm": 0.32678401470184326, "learning_rate": 1.9892072120049382e-05, "loss": 0.059, "step": 23288 }, { "epoch": 0.41243740785506516, "grad_norm": 0.6617121696472168, "learning_rate": 1.9891258790315132e-05, "loss": 0.0807, "step": 23289 }, { "epoch": 0.41245511739209356, "grad_norm": 0.658411979675293, "learning_rate": 1.9890445444489136e-05, "loss": 0.0943, "step": 23290 }, { "epoch": 0.412472826929122, "grad_norm": 0.5074251294136047, "learning_rate": 1.988963208257407e-05, "loss": 0.094, "step": 23291 }, { "epoch": 0.4124905364661504, "grad_norm": 1.9893721342086792, "learning_rate": 1.98888187045726e-05, "loss": 0.0977, "step": 23292 }, { "epoch": 0.41250824600317887, "grad_norm": 0.8560801148414612, "learning_rate": 1.9888005310487422e-05, "loss": 0.0592, "step": 23293 }, { "epoch": 0.41252595554020727, "grad_norm": 0.9577414393424988, "learning_rate": 1.9887191900321196e-05, "loss": 0.0593, "step": 23294 }, { "epoch": 0.4125436650772357, "grad_norm": 0.5543267130851746, "learning_rate": 1.9886378474076605e-05, "loss": 0.0952, "step": 23295 }, { "epoch": 0.4125613746142641, "grad_norm": 0.8220435380935669, "learning_rate": 1.9885565031756318e-05, "loss": 0.0904, "step": 23296 }, { "epoch": 0.4125790841512926, "grad_norm": 0.5996899604797363, "learning_rate": 1.988475157336302e-05, "loss": 0.0797, "step": 23297 }, { "epoch": 0.41259679368832103, "grad_norm": 1.0610429048538208, "learning_rate": 1.9883938098899382e-05, "loss": 0.0987, "step": 23298 }, { "epoch": 0.4126145032253494, "grad_norm": 0.693757176399231, "learning_rate": 1.9883124608368087e-05, "loss": 0.0912, "step": 23299 }, { "epoch": 0.4126322127623779, "grad_norm": 0.46602606773376465, "learning_rate": 1.98823111017718e-05, "loss": 0.0641, "step": 23300 }, { "epoch": 0.4126499222994063, "grad_norm": 1.3772872686386108, "learning_rate": 1.9881497579113205e-05, "loss": 0.0857, "step": 23301 }, { "epoch": 0.41266763183643473, "grad_norm": 0.5958353877067566, "learning_rate": 1.9880684040394978e-05, "loss": 0.0661, "step": 23302 }, { "epoch": 0.41268534137346313, "grad_norm": 0.7230838537216187, "learning_rate": 1.9879870485619788e-05, "loss": 0.0662, "step": 23303 }, { "epoch": 0.4127030509104916, "grad_norm": 0.761394202709198, "learning_rate": 1.9879056914790325e-05, "loss": 0.0945, "step": 23304 }, { "epoch": 0.41272076044752, "grad_norm": 0.9043979644775391, "learning_rate": 1.9878243327909256e-05, "loss": 0.1109, "step": 23305 }, { "epoch": 0.41273846998454844, "grad_norm": 0.34215834736824036, "learning_rate": 1.987742972497926e-05, "loss": 0.0681, "step": 23306 }, { "epoch": 0.41275617952157684, "grad_norm": 0.3971540331840515, "learning_rate": 1.987661610600301e-05, "loss": 0.0733, "step": 23307 }, { "epoch": 0.4127738890586053, "grad_norm": 0.5986325144767761, "learning_rate": 1.9875802470983193e-05, "loss": 0.0516, "step": 23308 }, { "epoch": 0.4127915985956337, "grad_norm": 0.8814803957939148, "learning_rate": 1.9874988819922474e-05, "loss": 0.0885, "step": 23309 }, { "epoch": 0.41280930813266214, "grad_norm": 0.863576352596283, "learning_rate": 1.9874175152823536e-05, "loss": 0.0631, "step": 23310 }, { "epoch": 0.41282701766969054, "grad_norm": 0.6785200834274292, "learning_rate": 1.9873361469689055e-05, "loss": 0.0806, "step": 23311 }, { "epoch": 0.412844727206719, "grad_norm": 0.8489505648612976, "learning_rate": 1.9872547770521706e-05, "loss": 0.1182, "step": 23312 }, { "epoch": 0.41286243674374745, "grad_norm": 0.9267069697380066, "learning_rate": 1.987173405532417e-05, "loss": 0.1018, "step": 23313 }, { "epoch": 0.41288014628077585, "grad_norm": 0.5658779144287109, "learning_rate": 1.9870920324099123e-05, "loss": 0.0842, "step": 23314 }, { "epoch": 0.4128978558178043, "grad_norm": 0.6751581430435181, "learning_rate": 1.987010657684924e-05, "loss": 0.0697, "step": 23315 }, { "epoch": 0.4129155653548327, "grad_norm": 0.5788858532905579, "learning_rate": 1.9869292813577197e-05, "loss": 0.1051, "step": 23316 }, { "epoch": 0.41293327489186116, "grad_norm": 0.3559616804122925, "learning_rate": 1.986847903428567e-05, "loss": 0.0559, "step": 23317 }, { "epoch": 0.41295098442888956, "grad_norm": 0.8723931908607483, "learning_rate": 1.986766523897735e-05, "loss": 0.0815, "step": 23318 }, { "epoch": 0.412968693965918, "grad_norm": 0.5912629961967468, "learning_rate": 1.986685142765489e-05, "loss": 0.0999, "step": 23319 }, { "epoch": 0.4129864035029464, "grad_norm": 0.8909170031547546, "learning_rate": 1.9866037600320995e-05, "loss": 0.1095, "step": 23320 }, { "epoch": 0.41300411303997486, "grad_norm": 1.670767068862915, "learning_rate": 1.9865223756978316e-05, "loss": 0.1134, "step": 23321 }, { "epoch": 0.41302182257700326, "grad_norm": 0.5869160294532776, "learning_rate": 1.986440989762955e-05, "loss": 0.0696, "step": 23322 }, { "epoch": 0.4130395321140317, "grad_norm": 0.7966539263725281, "learning_rate": 1.9863596022277366e-05, "loss": 0.0803, "step": 23323 }, { "epoch": 0.4130572416510601, "grad_norm": 0.7470903992652893, "learning_rate": 1.9862782130924442e-05, "loss": 0.0632, "step": 23324 }, { "epoch": 0.41307495118808857, "grad_norm": 1.0888915061950684, "learning_rate": 1.986196822357346e-05, "loss": 0.0794, "step": 23325 }, { "epoch": 0.41309266072511697, "grad_norm": 1.0775171518325806, "learning_rate": 1.9861154300227087e-05, "loss": 0.0669, "step": 23326 }, { "epoch": 0.4131103702621454, "grad_norm": 0.4920596778392792, "learning_rate": 1.9860340360888016e-05, "loss": 0.0529, "step": 23327 }, { "epoch": 0.4131280797991739, "grad_norm": 0.6681001782417297, "learning_rate": 1.9859526405558914e-05, "loss": 0.1016, "step": 23328 }, { "epoch": 0.4131457893362023, "grad_norm": 0.8366275429725647, "learning_rate": 1.985871243424246e-05, "loss": 0.0725, "step": 23329 }, { "epoch": 0.41316349887323073, "grad_norm": 0.7077487111091614, "learning_rate": 1.9857898446941335e-05, "loss": 0.0643, "step": 23330 }, { "epoch": 0.4131812084102591, "grad_norm": 0.8692668080329895, "learning_rate": 1.9857084443658215e-05, "loss": 0.0688, "step": 23331 }, { "epoch": 0.4131989179472876, "grad_norm": 0.8630952835083008, "learning_rate": 1.985627042439578e-05, "loss": 0.059, "step": 23332 }, { "epoch": 0.413216627484316, "grad_norm": 0.8357576131820679, "learning_rate": 1.9855456389156707e-05, "loss": 0.0877, "step": 23333 }, { "epoch": 0.41323433702134443, "grad_norm": 0.6331818699836731, "learning_rate": 1.9854642337943672e-05, "loss": 0.0786, "step": 23334 }, { "epoch": 0.41325204655837283, "grad_norm": 0.7917622923851013, "learning_rate": 1.985382827075935e-05, "loss": 0.0982, "step": 23335 }, { "epoch": 0.4132697560954013, "grad_norm": 0.6430091857910156, "learning_rate": 1.985301418760643e-05, "loss": 0.0887, "step": 23336 }, { "epoch": 0.4132874656324297, "grad_norm": 0.4056061804294586, "learning_rate": 1.985220008848758e-05, "loss": 0.063, "step": 23337 }, { "epoch": 0.41330517516945814, "grad_norm": 0.8946950435638428, "learning_rate": 1.985138597340549e-05, "loss": 0.1109, "step": 23338 }, { "epoch": 0.41332288470648654, "grad_norm": 0.6424611210823059, "learning_rate": 1.9850571842362828e-05, "loss": 0.061, "step": 23339 }, { "epoch": 0.413340594243515, "grad_norm": 0.7875023484230042, "learning_rate": 1.984975769536227e-05, "loss": 0.0707, "step": 23340 }, { "epoch": 0.4133583037805434, "grad_norm": 0.7897403240203857, "learning_rate": 1.9848943532406503e-05, "loss": 0.074, "step": 23341 }, { "epoch": 0.41337601331757184, "grad_norm": 0.7753856778144836, "learning_rate": 1.98481293534982e-05, "loss": 0.1169, "step": 23342 }, { "epoch": 0.4133937228546003, "grad_norm": 0.856570303440094, "learning_rate": 1.9847315158640045e-05, "loss": 0.074, "step": 23343 }, { "epoch": 0.4134114323916287, "grad_norm": 0.6778311729431152, "learning_rate": 1.9846500947834713e-05, "loss": 0.0707, "step": 23344 }, { "epoch": 0.41342914192865715, "grad_norm": 2.7307820320129395, "learning_rate": 1.984568672108488e-05, "loss": 0.1076, "step": 23345 }, { "epoch": 0.41344685146568555, "grad_norm": 0.6545647382736206, "learning_rate": 1.984487247839323e-05, "loss": 0.0767, "step": 23346 }, { "epoch": 0.413464561002714, "grad_norm": 0.7494730949401855, "learning_rate": 1.984405821976244e-05, "loss": 0.0719, "step": 23347 }, { "epoch": 0.4134822705397424, "grad_norm": 0.7023877501487732, "learning_rate": 1.984324394519519e-05, "loss": 0.0916, "step": 23348 }, { "epoch": 0.41349998007677086, "grad_norm": 0.8090680241584778, "learning_rate": 1.9842429654694158e-05, "loss": 0.0783, "step": 23349 }, { "epoch": 0.41351768961379926, "grad_norm": 0.753007173538208, "learning_rate": 1.9841615348262015e-05, "loss": 0.0584, "step": 23350 }, { "epoch": 0.4135353991508277, "grad_norm": 0.5282881855964661, "learning_rate": 1.9840801025901452e-05, "loss": 0.0714, "step": 23351 }, { "epoch": 0.4135531086878561, "grad_norm": 0.6587798595428467, "learning_rate": 1.9839986687615144e-05, "loss": 0.0771, "step": 23352 }, { "epoch": 0.41357081822488456, "grad_norm": 0.4885038733482361, "learning_rate": 1.9839172333405773e-05, "loss": 0.1198, "step": 23353 }, { "epoch": 0.41358852776191296, "grad_norm": 0.8280075788497925, "learning_rate": 1.9838357963276004e-05, "loss": 0.0729, "step": 23354 }, { "epoch": 0.4136062372989414, "grad_norm": 1.7223092317581177, "learning_rate": 1.9837543577228537e-05, "loss": 0.0901, "step": 23355 }, { "epoch": 0.4136239468359698, "grad_norm": 0.47521650791168213, "learning_rate": 1.9836729175266033e-05, "loss": 0.0791, "step": 23356 }, { "epoch": 0.41364165637299827, "grad_norm": 0.8132875561714172, "learning_rate": 1.9835914757391182e-05, "loss": 0.1202, "step": 23357 }, { "epoch": 0.4136593659100267, "grad_norm": 1.1978873014450073, "learning_rate": 1.9835100323606663e-05, "loss": 0.1043, "step": 23358 }, { "epoch": 0.4136770754470551, "grad_norm": 0.7507041692733765, "learning_rate": 1.9834285873915147e-05, "loss": 0.0969, "step": 23359 }, { "epoch": 0.4136947849840836, "grad_norm": 0.7517043948173523, "learning_rate": 1.9833471408319323e-05, "loss": 0.0663, "step": 23360 }, { "epoch": 0.413712494521112, "grad_norm": 1.0885566473007202, "learning_rate": 1.9832656926821872e-05, "loss": 0.0776, "step": 23361 }, { "epoch": 0.4137302040581404, "grad_norm": 0.7872394323348999, "learning_rate": 1.9831842429425467e-05, "loss": 0.0869, "step": 23362 }, { "epoch": 0.4137479135951688, "grad_norm": 0.9692599773406982, "learning_rate": 1.9831027916132785e-05, "loss": 0.1179, "step": 23363 }, { "epoch": 0.4137656231321973, "grad_norm": 0.43356138467788696, "learning_rate": 1.9830213386946508e-05, "loss": 0.0694, "step": 23364 }, { "epoch": 0.4137833326692257, "grad_norm": 0.5587649941444397, "learning_rate": 1.982939884186932e-05, "loss": 0.0843, "step": 23365 }, { "epoch": 0.41380104220625413, "grad_norm": 1.1513724327087402, "learning_rate": 1.98285842809039e-05, "loss": 0.0779, "step": 23366 }, { "epoch": 0.41381875174328253, "grad_norm": 0.28013262152671814, "learning_rate": 1.9827769704052927e-05, "loss": 0.0992, "step": 23367 }, { "epoch": 0.413836461280311, "grad_norm": 0.440809965133667, "learning_rate": 1.9826955111319078e-05, "loss": 0.0603, "step": 23368 }, { "epoch": 0.4138541708173394, "grad_norm": 0.9008979797363281, "learning_rate": 1.982614050270503e-05, "loss": 0.0824, "step": 23369 }, { "epoch": 0.41387188035436784, "grad_norm": 0.49070096015930176, "learning_rate": 1.9825325878213477e-05, "loss": 0.093, "step": 23370 }, { "epoch": 0.41388958989139624, "grad_norm": 1.6234965324401855, "learning_rate": 1.9824511237847085e-05, "loss": 0.1076, "step": 23371 }, { "epoch": 0.4139072994284247, "grad_norm": 1.0090301036834717, "learning_rate": 1.982369658160854e-05, "loss": 0.0809, "step": 23372 }, { "epoch": 0.41392500896545315, "grad_norm": 0.5909960269927979, "learning_rate": 1.982288190950052e-05, "loss": 0.0793, "step": 23373 }, { "epoch": 0.41394271850248154, "grad_norm": 0.690125048160553, "learning_rate": 1.9822067221525703e-05, "loss": 0.0889, "step": 23374 }, { "epoch": 0.41396042803951, "grad_norm": 0.5388031601905823, "learning_rate": 1.982125251768678e-05, "loss": 0.111, "step": 23375 }, { "epoch": 0.4139781375765384, "grad_norm": 0.8047170042991638, "learning_rate": 1.9820437797986418e-05, "loss": 0.1168, "step": 23376 }, { "epoch": 0.41399584711356685, "grad_norm": 0.44173261523246765, "learning_rate": 1.981962306242731e-05, "loss": 0.0509, "step": 23377 }, { "epoch": 0.41401355665059525, "grad_norm": 1.256948709487915, "learning_rate": 1.9818808311012124e-05, "loss": 0.0981, "step": 23378 }, { "epoch": 0.4140312661876237, "grad_norm": 0.540081262588501, "learning_rate": 1.9817993543743547e-05, "loss": 0.0791, "step": 23379 }, { "epoch": 0.4140489757246521, "grad_norm": 0.5571532845497131, "learning_rate": 1.981717876062426e-05, "loss": 0.058, "step": 23380 }, { "epoch": 0.41406668526168056, "grad_norm": 0.5760067105293274, "learning_rate": 1.981636396165694e-05, "loss": 0.0755, "step": 23381 }, { "epoch": 0.41408439479870895, "grad_norm": 0.9278075695037842, "learning_rate": 1.9815549146844275e-05, "loss": 0.0932, "step": 23382 }, { "epoch": 0.4141021043357374, "grad_norm": 0.4643652141094208, "learning_rate": 1.9814734316188927e-05, "loss": 0.0481, "step": 23383 }, { "epoch": 0.4141198138727658, "grad_norm": 0.879317581653595, "learning_rate": 1.9813919469693602e-05, "loss": 0.0657, "step": 23384 }, { "epoch": 0.41413752340979426, "grad_norm": 0.7359594106674194, "learning_rate": 1.981310460736097e-05, "loss": 0.0851, "step": 23385 }, { "epoch": 0.41415523294682266, "grad_norm": 0.48836714029312134, "learning_rate": 1.9812289729193707e-05, "loss": 0.0667, "step": 23386 }, { "epoch": 0.4141729424838511, "grad_norm": 0.9167528748512268, "learning_rate": 1.9811474835194493e-05, "loss": 0.0832, "step": 23387 }, { "epoch": 0.41419065202087957, "grad_norm": 1.0737731456756592, "learning_rate": 1.9810659925366017e-05, "loss": 0.0751, "step": 23388 }, { "epoch": 0.41420836155790797, "grad_norm": 0.9712691903114319, "learning_rate": 1.980984499971096e-05, "loss": 0.0876, "step": 23389 }, { "epoch": 0.4142260710949364, "grad_norm": 0.6758160591125488, "learning_rate": 1.9809030058231996e-05, "loss": 0.0551, "step": 23390 }, { "epoch": 0.4142437806319648, "grad_norm": 0.40936556458473206, "learning_rate": 1.980821510093181e-05, "loss": 0.0695, "step": 23391 }, { "epoch": 0.4142614901689933, "grad_norm": 0.852477490901947, "learning_rate": 1.980740012781308e-05, "loss": 0.1108, "step": 23392 }, { "epoch": 0.4142791997060217, "grad_norm": 0.7938307523727417, "learning_rate": 1.9806585138878495e-05, "loss": 0.0933, "step": 23393 }, { "epoch": 0.4142969092430501, "grad_norm": 0.738990306854248, "learning_rate": 1.9805770134130727e-05, "loss": 0.0743, "step": 23394 }, { "epoch": 0.4143146187800785, "grad_norm": 1.2360819578170776, "learning_rate": 1.9804955113572462e-05, "loss": 0.102, "step": 23395 }, { "epoch": 0.414332328317107, "grad_norm": 0.6923536062240601, "learning_rate": 1.9804140077206384e-05, "loss": 0.0821, "step": 23396 }, { "epoch": 0.4143500378541354, "grad_norm": 0.8537057042121887, "learning_rate": 1.9803325025035166e-05, "loss": 0.1166, "step": 23397 }, { "epoch": 0.41436774739116383, "grad_norm": 0.408351331949234, "learning_rate": 1.9802509957061497e-05, "loss": 0.0634, "step": 23398 }, { "epoch": 0.41438545692819223, "grad_norm": 0.8136550188064575, "learning_rate": 1.9801694873288056e-05, "loss": 0.0569, "step": 23399 }, { "epoch": 0.4144031664652207, "grad_norm": 0.37009942531585693, "learning_rate": 1.9800879773717527e-05, "loss": 0.0716, "step": 23400 }, { "epoch": 0.41442087600224914, "grad_norm": 1.1457486152648926, "learning_rate": 1.980006465835259e-05, "loss": 0.0846, "step": 23401 }, { "epoch": 0.41443858553927754, "grad_norm": 0.40524500608444214, "learning_rate": 1.9799249527195918e-05, "loss": 0.0638, "step": 23402 }, { "epoch": 0.414456295076306, "grad_norm": 0.8087210655212402, "learning_rate": 1.9798434380250203e-05, "loss": 0.0702, "step": 23403 }, { "epoch": 0.4144740046133344, "grad_norm": 0.6295615434646606, "learning_rate": 1.9797619217518126e-05, "loss": 0.0486, "step": 23404 }, { "epoch": 0.41449171415036284, "grad_norm": 0.668446958065033, "learning_rate": 1.9796804039002368e-05, "loss": 0.0874, "step": 23405 }, { "epoch": 0.41450942368739124, "grad_norm": 0.86067134141922, "learning_rate": 1.979598884470561e-05, "loss": 0.1161, "step": 23406 }, { "epoch": 0.4145271332244197, "grad_norm": 0.893539309501648, "learning_rate": 1.979517363463053e-05, "loss": 0.0626, "step": 23407 }, { "epoch": 0.4145448427614481, "grad_norm": 0.9270548820495605, "learning_rate": 1.9794358408779818e-05, "loss": 0.0799, "step": 23408 }, { "epoch": 0.41456255229847655, "grad_norm": 0.9721794724464417, "learning_rate": 1.9793543167156152e-05, "loss": 0.1264, "step": 23409 }, { "epoch": 0.41458026183550495, "grad_norm": 0.8724388480186462, "learning_rate": 1.9792727909762213e-05, "loss": 0.0762, "step": 23410 }, { "epoch": 0.4145979713725334, "grad_norm": 0.6328602433204651, "learning_rate": 1.9791912636600685e-05, "loss": 0.0696, "step": 23411 }, { "epoch": 0.4146156809095618, "grad_norm": 0.6738648414611816, "learning_rate": 1.9791097347674244e-05, "loss": 0.0805, "step": 23412 }, { "epoch": 0.41463339044659026, "grad_norm": 1.0170929431915283, "learning_rate": 1.979028204298558e-05, "loss": 0.0889, "step": 23413 }, { "epoch": 0.41465109998361865, "grad_norm": 0.5701143145561218, "learning_rate": 1.9789466722537374e-05, "loss": 0.0622, "step": 23414 }, { "epoch": 0.4146688095206471, "grad_norm": 0.8339636325836182, "learning_rate": 1.9788651386332305e-05, "loss": 0.1028, "step": 23415 }, { "epoch": 0.41468651905767556, "grad_norm": 0.7466618418693542, "learning_rate": 1.9787836034373057e-05, "loss": 0.076, "step": 23416 }, { "epoch": 0.41470422859470396, "grad_norm": 0.6127910614013672, "learning_rate": 1.9787020666662313e-05, "loss": 0.0608, "step": 23417 }, { "epoch": 0.4147219381317324, "grad_norm": 0.456277996301651, "learning_rate": 1.978620528320276e-05, "loss": 0.0664, "step": 23418 }, { "epoch": 0.4147396476687608, "grad_norm": 0.6518248319625854, "learning_rate": 1.9785389883997074e-05, "loss": 0.073, "step": 23419 }, { "epoch": 0.41475735720578927, "grad_norm": 1.0831701755523682, "learning_rate": 1.9784574469047936e-05, "loss": 0.1232, "step": 23420 }, { "epoch": 0.41477506674281767, "grad_norm": 0.33267077803611755, "learning_rate": 1.978375903835803e-05, "loss": 0.0744, "step": 23421 }, { "epoch": 0.4147927762798461, "grad_norm": 0.5376983284950256, "learning_rate": 1.9782943591930043e-05, "loss": 0.0971, "step": 23422 }, { "epoch": 0.4148104858168745, "grad_norm": 0.8432180881500244, "learning_rate": 1.9782128129766655e-05, "loss": 0.0798, "step": 23423 }, { "epoch": 0.414828195353903, "grad_norm": 0.4700354337692261, "learning_rate": 1.9781312651870552e-05, "loss": 0.0765, "step": 23424 }, { "epoch": 0.41484590489093137, "grad_norm": 0.6010125875473022, "learning_rate": 1.9780497158244413e-05, "loss": 0.0765, "step": 23425 }, { "epoch": 0.4148636144279598, "grad_norm": 0.6982454657554626, "learning_rate": 1.977968164889092e-05, "loss": 0.0991, "step": 23426 }, { "epoch": 0.4148813239649882, "grad_norm": 0.626167356967926, "learning_rate": 1.977886612381276e-05, "loss": 0.0762, "step": 23427 }, { "epoch": 0.4148990335020167, "grad_norm": 0.911068320274353, "learning_rate": 1.977805058301261e-05, "loss": 0.0865, "step": 23428 }, { "epoch": 0.4149167430390451, "grad_norm": 0.6613808274269104, "learning_rate": 1.977723502649316e-05, "loss": 0.072, "step": 23429 }, { "epoch": 0.41493445257607353, "grad_norm": 0.5081256031990051, "learning_rate": 1.9776419454257096e-05, "loss": 0.0775, "step": 23430 }, { "epoch": 0.414952162113102, "grad_norm": 0.7695619463920593, "learning_rate": 1.9775603866307083e-05, "loss": 0.0706, "step": 23431 }, { "epoch": 0.4149698716501304, "grad_norm": 1.335109829902649, "learning_rate": 1.9774788262645824e-05, "loss": 0.0899, "step": 23432 }, { "epoch": 0.41498758118715884, "grad_norm": 0.524677574634552, "learning_rate": 1.9773972643275995e-05, "loss": 0.1094, "step": 23433 }, { "epoch": 0.41500529072418724, "grad_norm": 0.5744791626930237, "learning_rate": 1.9773157008200273e-05, "loss": 0.0871, "step": 23434 }, { "epoch": 0.4150230002612157, "grad_norm": 0.700526237487793, "learning_rate": 1.977234135742135e-05, "loss": 0.0901, "step": 23435 }, { "epoch": 0.4150407097982441, "grad_norm": 0.752237856388092, "learning_rate": 1.9771525690941906e-05, "loss": 0.0691, "step": 23436 }, { "epoch": 0.41505841933527254, "grad_norm": 0.6931776404380798, "learning_rate": 1.9770710008764627e-05, "loss": 0.1029, "step": 23437 }, { "epoch": 0.41507612887230094, "grad_norm": 0.5521351099014282, "learning_rate": 1.9769894310892193e-05, "loss": 0.0878, "step": 23438 }, { "epoch": 0.4150938384093294, "grad_norm": 0.506027340888977, "learning_rate": 1.9769078597327292e-05, "loss": 0.1103, "step": 23439 }, { "epoch": 0.4151115479463578, "grad_norm": 0.7015608549118042, "learning_rate": 1.97682628680726e-05, "loss": 0.1148, "step": 23440 }, { "epoch": 0.41512925748338625, "grad_norm": 0.5613675713539124, "learning_rate": 1.976744712313081e-05, "loss": 0.072, "step": 23441 }, { "epoch": 0.41514696702041465, "grad_norm": 0.9635123610496521, "learning_rate": 1.97666313625046e-05, "loss": 0.073, "step": 23442 }, { "epoch": 0.4151646765574431, "grad_norm": 0.4969722032546997, "learning_rate": 1.9765815586196653e-05, "loss": 0.0662, "step": 23443 }, { "epoch": 0.4151823860944715, "grad_norm": 0.7196369767189026, "learning_rate": 1.9764999794209657e-05, "loss": 0.0953, "step": 23444 }, { "epoch": 0.41520009563149995, "grad_norm": 0.6505299210548401, "learning_rate": 1.976418398654629e-05, "loss": 0.0812, "step": 23445 }, { "epoch": 0.4152178051685284, "grad_norm": 0.7181956768035889, "learning_rate": 1.9763368163209246e-05, "loss": 0.1009, "step": 23446 }, { "epoch": 0.4152355147055568, "grad_norm": 0.6307757496833801, "learning_rate": 1.9762552324201198e-05, "loss": 0.0826, "step": 23447 }, { "epoch": 0.41525322424258526, "grad_norm": 0.537605345249176, "learning_rate": 1.9761736469524833e-05, "loss": 0.068, "step": 23448 }, { "epoch": 0.41527093377961366, "grad_norm": 0.6651743650436401, "learning_rate": 1.976092059918284e-05, "loss": 0.0826, "step": 23449 }, { "epoch": 0.4152886433166421, "grad_norm": 0.8601866364479065, "learning_rate": 1.9760104713177898e-05, "loss": 0.0805, "step": 23450 }, { "epoch": 0.4153063528536705, "grad_norm": 0.9295660853385925, "learning_rate": 1.9759288811512695e-05, "loss": 0.1454, "step": 23451 }, { "epoch": 0.41532406239069897, "grad_norm": 0.8279285430908203, "learning_rate": 1.9758472894189908e-05, "loss": 0.0855, "step": 23452 }, { "epoch": 0.41534177192772737, "grad_norm": 0.6648648977279663, "learning_rate": 1.9757656961212235e-05, "loss": 0.0684, "step": 23453 }, { "epoch": 0.4153594814647558, "grad_norm": 1.0987145900726318, "learning_rate": 1.9756841012582345e-05, "loss": 0.0893, "step": 23454 }, { "epoch": 0.4153771910017842, "grad_norm": 1.3164712190628052, "learning_rate": 1.975602504830293e-05, "loss": 0.0757, "step": 23455 }, { "epoch": 0.4153949005388127, "grad_norm": 0.6502642631530762, "learning_rate": 1.975520906837668e-05, "loss": 0.0931, "step": 23456 }, { "epoch": 0.41541261007584107, "grad_norm": 0.645929753780365, "learning_rate": 1.9754393072806266e-05, "loss": 0.0625, "step": 23457 }, { "epoch": 0.4154303196128695, "grad_norm": 0.7647984623908997, "learning_rate": 1.9753577061594383e-05, "loss": 0.0686, "step": 23458 }, { "epoch": 0.4154480291498979, "grad_norm": 0.2505376636981964, "learning_rate": 1.975276103474371e-05, "loss": 0.0591, "step": 23459 }, { "epoch": 0.4154657386869264, "grad_norm": 0.8949318528175354, "learning_rate": 1.9751944992256933e-05, "loss": 0.0874, "step": 23460 }, { "epoch": 0.41548344822395483, "grad_norm": 0.43506932258605957, "learning_rate": 1.9751128934136738e-05, "loss": 0.0745, "step": 23461 }, { "epoch": 0.41550115776098323, "grad_norm": 0.7224005460739136, "learning_rate": 1.9750312860385814e-05, "loss": 0.068, "step": 23462 }, { "epoch": 0.4155188672980117, "grad_norm": 0.8698359131813049, "learning_rate": 1.9749496771006838e-05, "loss": 0.0773, "step": 23463 }, { "epoch": 0.4155365768350401, "grad_norm": 0.4988207519054413, "learning_rate": 1.9748680666002496e-05, "loss": 0.0586, "step": 23464 }, { "epoch": 0.41555428637206854, "grad_norm": 0.5382190942764282, "learning_rate": 1.974786454537548e-05, "loss": 0.0702, "step": 23465 }, { "epoch": 0.41557199590909694, "grad_norm": 1.0117777585983276, "learning_rate": 1.9747048409128464e-05, "loss": 0.1061, "step": 23466 }, { "epoch": 0.4155897054461254, "grad_norm": 0.6402666568756104, "learning_rate": 1.9746232257264145e-05, "loss": 0.0719, "step": 23467 }, { "epoch": 0.4156074149831538, "grad_norm": 0.5543620586395264, "learning_rate": 1.9745416089785197e-05, "loss": 0.0866, "step": 23468 }, { "epoch": 0.41562512452018224, "grad_norm": 0.4707241654396057, "learning_rate": 1.974459990669431e-05, "loss": 0.0762, "step": 23469 }, { "epoch": 0.41564283405721064, "grad_norm": 0.6838123798370361, "learning_rate": 1.9743783707994172e-05, "loss": 0.0886, "step": 23470 }, { "epoch": 0.4156605435942391, "grad_norm": 0.8387578725814819, "learning_rate": 1.974296749368746e-05, "loss": 0.0893, "step": 23471 }, { "epoch": 0.4156782531312675, "grad_norm": 0.5120759606361389, "learning_rate": 1.9742151263776876e-05, "loss": 0.0729, "step": 23472 }, { "epoch": 0.41569596266829595, "grad_norm": 0.8249927759170532, "learning_rate": 1.9741335018265086e-05, "loss": 0.0962, "step": 23473 }, { "epoch": 0.41571367220532435, "grad_norm": 0.4543670415878296, "learning_rate": 1.974051875715478e-05, "loss": 0.0398, "step": 23474 }, { "epoch": 0.4157313817423528, "grad_norm": 0.9738665819168091, "learning_rate": 1.9739702480448654e-05, "loss": 0.1166, "step": 23475 }, { "epoch": 0.41574909127938126, "grad_norm": 0.6656720042228699, "learning_rate": 1.9738886188149386e-05, "loss": 0.0619, "step": 23476 }, { "epoch": 0.41576680081640965, "grad_norm": 0.7655359506607056, "learning_rate": 1.9738069880259653e-05, "loss": 0.0889, "step": 23477 }, { "epoch": 0.4157845103534381, "grad_norm": 0.8189947605133057, "learning_rate": 1.973725355678216e-05, "loss": 0.1041, "step": 23478 }, { "epoch": 0.4158022198904665, "grad_norm": 0.1999581903219223, "learning_rate": 1.9736437217719574e-05, "loss": 0.0654, "step": 23479 }, { "epoch": 0.41581992942749496, "grad_norm": 1.351910948753357, "learning_rate": 1.9735620863074592e-05, "loss": 0.0855, "step": 23480 }, { "epoch": 0.41583763896452336, "grad_norm": 0.8189428448677063, "learning_rate": 1.97348044928499e-05, "loss": 0.0672, "step": 23481 }, { "epoch": 0.4158553485015518, "grad_norm": 0.5305885076522827, "learning_rate": 1.9733988107048175e-05, "loss": 0.076, "step": 23482 }, { "epoch": 0.4158730580385802, "grad_norm": 0.7119004726409912, "learning_rate": 1.9733171705672107e-05, "loss": 0.0604, "step": 23483 }, { "epoch": 0.41589076757560867, "grad_norm": 0.6049078106880188, "learning_rate": 1.973235528872438e-05, "loss": 0.0929, "step": 23484 }, { "epoch": 0.41590847711263706, "grad_norm": 0.6259426474571228, "learning_rate": 1.9731538856207693e-05, "loss": 0.0795, "step": 23485 }, { "epoch": 0.4159261866496655, "grad_norm": 0.43627023696899414, "learning_rate": 1.9730722408124717e-05, "loss": 0.0685, "step": 23486 }, { "epoch": 0.4159438961866939, "grad_norm": 0.5384159088134766, "learning_rate": 1.972990594447814e-05, "loss": 0.0568, "step": 23487 }, { "epoch": 0.41596160572372237, "grad_norm": 0.6487655639648438, "learning_rate": 1.9729089465270655e-05, "loss": 0.1009, "step": 23488 }, { "epoch": 0.41597931526075077, "grad_norm": 0.6754744648933411, "learning_rate": 1.972827297050494e-05, "loss": 0.0768, "step": 23489 }, { "epoch": 0.4159970247977792, "grad_norm": 0.5277718305587769, "learning_rate": 1.972745646018369e-05, "loss": 0.0742, "step": 23490 }, { "epoch": 0.4160147343348077, "grad_norm": 0.2692733705043793, "learning_rate": 1.9726639934309584e-05, "loss": 0.072, "step": 23491 }, { "epoch": 0.4160324438718361, "grad_norm": 0.6197364926338196, "learning_rate": 1.972582339288531e-05, "loss": 0.052, "step": 23492 }, { "epoch": 0.41605015340886453, "grad_norm": 0.7993684411048889, "learning_rate": 1.972500683591355e-05, "loss": 0.0698, "step": 23493 }, { "epoch": 0.41606786294589293, "grad_norm": 0.5824460983276367, "learning_rate": 1.9724190263397003e-05, "loss": 0.0906, "step": 23494 }, { "epoch": 0.4160855724829214, "grad_norm": 0.6798214316368103, "learning_rate": 1.9723373675338347e-05, "loss": 0.0949, "step": 23495 }, { "epoch": 0.4161032820199498, "grad_norm": 0.4905812442302704, "learning_rate": 1.972255707174027e-05, "loss": 0.0482, "step": 23496 }, { "epoch": 0.41612099155697824, "grad_norm": 0.6052994728088379, "learning_rate": 1.9721740452605456e-05, "loss": 0.0912, "step": 23497 }, { "epoch": 0.41613870109400664, "grad_norm": 0.6906474828720093, "learning_rate": 1.972092381793659e-05, "loss": 0.0653, "step": 23498 }, { "epoch": 0.4161564106310351, "grad_norm": 0.7093395590782166, "learning_rate": 1.972010716773637e-05, "loss": 0.0653, "step": 23499 }, { "epoch": 0.4161741201680635, "grad_norm": 0.5674675107002258, "learning_rate": 1.9719290502007468e-05, "loss": 0.0579, "step": 23500 }, { "epoch": 0.41619182970509194, "grad_norm": 0.8877120018005371, "learning_rate": 1.971847382075258e-05, "loss": 0.0709, "step": 23501 }, { "epoch": 0.41620953924212034, "grad_norm": 0.7000198364257812, "learning_rate": 1.9717657123974393e-05, "loss": 0.0578, "step": 23502 }, { "epoch": 0.4162272487791488, "grad_norm": 0.5297505855560303, "learning_rate": 1.9716840411675587e-05, "loss": 0.0531, "step": 23503 }, { "epoch": 0.4162449583161772, "grad_norm": 0.8070759773254395, "learning_rate": 1.9716023683858855e-05, "loss": 0.0583, "step": 23504 }, { "epoch": 0.41626266785320565, "grad_norm": 0.8833472728729248, "learning_rate": 1.9715206940526882e-05, "loss": 0.0483, "step": 23505 }, { "epoch": 0.4162803773902341, "grad_norm": 0.294657438993454, "learning_rate": 1.9714390181682355e-05, "loss": 0.0609, "step": 23506 }, { "epoch": 0.4162980869272625, "grad_norm": 0.5642068982124329, "learning_rate": 1.971357340732796e-05, "loss": 0.0757, "step": 23507 }, { "epoch": 0.41631579646429095, "grad_norm": 0.7292291522026062, "learning_rate": 1.9712756617466387e-05, "loss": 0.0563, "step": 23508 }, { "epoch": 0.41633350600131935, "grad_norm": 0.9814126491546631, "learning_rate": 1.9711939812100316e-05, "loss": 0.0636, "step": 23509 }, { "epoch": 0.4163512155383478, "grad_norm": 0.5689730048179626, "learning_rate": 1.9711122991232448e-05, "loss": 0.072, "step": 23510 }, { "epoch": 0.4163689250753762, "grad_norm": 0.886279821395874, "learning_rate": 1.9710306154865456e-05, "loss": 0.0568, "step": 23511 }, { "epoch": 0.41638663461240466, "grad_norm": 0.9069172739982605, "learning_rate": 1.9709489303002037e-05, "loss": 0.1027, "step": 23512 }, { "epoch": 0.41640434414943306, "grad_norm": 0.8354417085647583, "learning_rate": 1.9708672435644874e-05, "loss": 0.1009, "step": 23513 }, { "epoch": 0.4164220536864615, "grad_norm": 0.7514670491218567, "learning_rate": 1.970785555279665e-05, "loss": 0.0603, "step": 23514 }, { "epoch": 0.4164397632234899, "grad_norm": 0.8339427709579468, "learning_rate": 1.9707038654460064e-05, "loss": 0.0971, "step": 23515 }, { "epoch": 0.41645747276051837, "grad_norm": 0.7558082938194275, "learning_rate": 1.9706221740637795e-05, "loss": 0.0944, "step": 23516 }, { "epoch": 0.41647518229754676, "grad_norm": 0.5763340592384338, "learning_rate": 1.970540481133253e-05, "loss": 0.0734, "step": 23517 }, { "epoch": 0.4164928918345752, "grad_norm": 0.9835328459739685, "learning_rate": 1.9704587866546955e-05, "loss": 0.0673, "step": 23518 }, { "epoch": 0.4165106013716036, "grad_norm": 0.7650729417800903, "learning_rate": 1.970377090628377e-05, "loss": 0.0916, "step": 23519 }, { "epoch": 0.41652831090863207, "grad_norm": 0.5044551491737366, "learning_rate": 1.9702953930545658e-05, "loss": 0.0761, "step": 23520 }, { "epoch": 0.4165460204456605, "grad_norm": 0.6118185520172119, "learning_rate": 1.9702136939335294e-05, "loss": 0.0816, "step": 23521 }, { "epoch": 0.4165637299826889, "grad_norm": 0.7346500158309937, "learning_rate": 1.9701319932655377e-05, "loss": 0.0947, "step": 23522 }, { "epoch": 0.4165814395197174, "grad_norm": 0.6488130688667297, "learning_rate": 1.9700502910508594e-05, "loss": 0.072, "step": 23523 }, { "epoch": 0.4165991490567458, "grad_norm": 0.5333350300788879, "learning_rate": 1.9699685872897633e-05, "loss": 0.1048, "step": 23524 }, { "epoch": 0.41661685859377423, "grad_norm": 0.31547248363494873, "learning_rate": 1.969886881982518e-05, "loss": 0.0888, "step": 23525 }, { "epoch": 0.41663456813080263, "grad_norm": 1.0316240787506104, "learning_rate": 1.9698051751293924e-05, "loss": 0.0788, "step": 23526 }, { "epoch": 0.4166522776678311, "grad_norm": 0.44905561208724976, "learning_rate": 1.9697234667306554e-05, "loss": 0.0761, "step": 23527 }, { "epoch": 0.4166699872048595, "grad_norm": 1.1222196817398071, "learning_rate": 1.9696417567865755e-05, "loss": 0.1036, "step": 23528 }, { "epoch": 0.41668769674188794, "grad_norm": 0.6571280360221863, "learning_rate": 1.9695600452974218e-05, "loss": 0.0891, "step": 23529 }, { "epoch": 0.41670540627891633, "grad_norm": 0.861589252948761, "learning_rate": 1.969478332263463e-05, "loss": 0.0805, "step": 23530 }, { "epoch": 0.4167231158159448, "grad_norm": 0.6743046641349792, "learning_rate": 1.9693966176849676e-05, "loss": 0.0737, "step": 23531 }, { "epoch": 0.4167408253529732, "grad_norm": 0.8896918892860413, "learning_rate": 1.9693149015622053e-05, "loss": 0.1307, "step": 23532 }, { "epoch": 0.41675853489000164, "grad_norm": 0.6180474162101746, "learning_rate": 1.9692331838954444e-05, "loss": 0.0875, "step": 23533 }, { "epoch": 0.41677624442703004, "grad_norm": 0.5786805152893066, "learning_rate": 1.9691514646849535e-05, "loss": 0.0772, "step": 23534 }, { "epoch": 0.4167939539640585, "grad_norm": 0.7653472423553467, "learning_rate": 1.9690697439310022e-05, "loss": 0.098, "step": 23535 }, { "epoch": 0.41681166350108695, "grad_norm": 0.6557155847549438, "learning_rate": 1.9689880216338586e-05, "loss": 0.0589, "step": 23536 }, { "epoch": 0.41682937303811535, "grad_norm": 0.544685423374176, "learning_rate": 1.968906297793792e-05, "loss": 0.0677, "step": 23537 }, { "epoch": 0.4168470825751438, "grad_norm": 0.49869778752326965, "learning_rate": 1.9688245724110708e-05, "loss": 0.0729, "step": 23538 }, { "epoch": 0.4168647921121722, "grad_norm": 0.7481068968772888, "learning_rate": 1.9687428454859644e-05, "loss": 0.0887, "step": 23539 }, { "epoch": 0.41688250164920065, "grad_norm": 0.8176814913749695, "learning_rate": 1.9686611170187417e-05, "loss": 0.1216, "step": 23540 }, { "epoch": 0.41690021118622905, "grad_norm": 0.8127906322479248, "learning_rate": 1.9685793870096704e-05, "loss": 0.1186, "step": 23541 }, { "epoch": 0.4169179207232575, "grad_norm": 0.803865909576416, "learning_rate": 1.9684976554590214e-05, "loss": 0.1342, "step": 23542 }, { "epoch": 0.4169356302602859, "grad_norm": 0.6202648282051086, "learning_rate": 1.9684159223670622e-05, "loss": 0.0495, "step": 23543 }, { "epoch": 0.41695333979731436, "grad_norm": 0.8055647015571594, "learning_rate": 1.9683341877340617e-05, "loss": 0.0888, "step": 23544 }, { "epoch": 0.41697104933434276, "grad_norm": 0.7595555186271667, "learning_rate": 1.9682524515602893e-05, "loss": 0.0593, "step": 23545 }, { "epoch": 0.4169887588713712, "grad_norm": 0.8385366797447205, "learning_rate": 1.9681707138460136e-05, "loss": 0.0857, "step": 23546 }, { "epoch": 0.4170064684083996, "grad_norm": 0.7516914010047913, "learning_rate": 1.9680889745915035e-05, "loss": 0.0935, "step": 23547 }, { "epoch": 0.41702417794542807, "grad_norm": 1.217703938484192, "learning_rate": 1.968007233797028e-05, "loss": 0.0917, "step": 23548 }, { "epoch": 0.41704188748245646, "grad_norm": 0.5560723543167114, "learning_rate": 1.9679254914628566e-05, "loss": 0.0858, "step": 23549 }, { "epoch": 0.4170595970194849, "grad_norm": 0.539446234703064, "learning_rate": 1.967843747589257e-05, "loss": 0.0523, "step": 23550 }, { "epoch": 0.41707730655651337, "grad_norm": 0.6276129484176636, "learning_rate": 1.967762002176499e-05, "loss": 0.0883, "step": 23551 }, { "epoch": 0.41709501609354177, "grad_norm": 0.5793816447257996, "learning_rate": 1.9676802552248517e-05, "loss": 0.0854, "step": 23552 }, { "epoch": 0.4171127256305702, "grad_norm": 0.5102821588516235, "learning_rate": 1.9675985067345833e-05, "loss": 0.0598, "step": 23553 }, { "epoch": 0.4171304351675986, "grad_norm": 0.6552644968032837, "learning_rate": 1.9675167567059634e-05, "loss": 0.0956, "step": 23554 }, { "epoch": 0.4171481447046271, "grad_norm": 0.6293771266937256, "learning_rate": 1.9674350051392598e-05, "loss": 0.1337, "step": 23555 }, { "epoch": 0.4171658542416555, "grad_norm": 0.7567682862281799, "learning_rate": 1.9673532520347432e-05, "loss": 0.0777, "step": 23556 }, { "epoch": 0.41718356377868393, "grad_norm": 0.7683385610580444, "learning_rate": 1.967271497392681e-05, "loss": 0.0835, "step": 23557 }, { "epoch": 0.41720127331571233, "grad_norm": 0.4261552095413208, "learning_rate": 1.9671897412133437e-05, "loss": 0.0589, "step": 23558 }, { "epoch": 0.4172189828527408, "grad_norm": 0.7821711301803589, "learning_rate": 1.9671079834969986e-05, "loss": 0.0912, "step": 23559 }, { "epoch": 0.4172366923897692, "grad_norm": 0.8455270528793335, "learning_rate": 1.9670262242439156e-05, "loss": 0.0895, "step": 23560 }, { "epoch": 0.41725440192679764, "grad_norm": 0.4008678197860718, "learning_rate": 1.9669444634543642e-05, "loss": 0.0509, "step": 23561 }, { "epoch": 0.41727211146382603, "grad_norm": 0.740166962146759, "learning_rate": 1.966862701128612e-05, "loss": 0.0632, "step": 23562 }, { "epoch": 0.4172898210008545, "grad_norm": 0.8011607527732849, "learning_rate": 1.9667809372669292e-05, "loss": 0.0728, "step": 23563 }, { "epoch": 0.4173075305378829, "grad_norm": 0.7925419807434082, "learning_rate": 1.9666991718695843e-05, "loss": 0.0713, "step": 23564 }, { "epoch": 0.41732524007491134, "grad_norm": 0.6815643906593323, "learning_rate": 1.9666174049368462e-05, "loss": 0.0608, "step": 23565 }, { "epoch": 0.4173429496119398, "grad_norm": 0.7800499796867371, "learning_rate": 1.966535636468984e-05, "loss": 0.0747, "step": 23566 }, { "epoch": 0.4173606591489682, "grad_norm": 0.44418856501579285, "learning_rate": 1.966453866466267e-05, "loss": 0.0995, "step": 23567 }, { "epoch": 0.41737836868599665, "grad_norm": 0.7586656808853149, "learning_rate": 1.9663720949289635e-05, "loss": 0.0709, "step": 23568 }, { "epoch": 0.41739607822302505, "grad_norm": 0.6680418848991394, "learning_rate": 1.9662903218573433e-05, "loss": 0.0827, "step": 23569 }, { "epoch": 0.4174137877600535, "grad_norm": 0.9681183695793152, "learning_rate": 1.966208547251675e-05, "loss": 0.063, "step": 23570 }, { "epoch": 0.4174314972970819, "grad_norm": 0.7823484539985657, "learning_rate": 1.9661267711122273e-05, "loss": 0.0979, "step": 23571 }, { "epoch": 0.41744920683411035, "grad_norm": 0.6685460805892944, "learning_rate": 1.9660449934392703e-05, "loss": 0.108, "step": 23572 }, { "epoch": 0.41746691637113875, "grad_norm": 0.5761573314666748, "learning_rate": 1.9659632142330722e-05, "loss": 0.0765, "step": 23573 }, { "epoch": 0.4174846259081672, "grad_norm": 0.4155987501144409, "learning_rate": 1.965881433493902e-05, "loss": 0.0613, "step": 23574 }, { "epoch": 0.4175023354451956, "grad_norm": 0.37237849831581116, "learning_rate": 1.9657996512220294e-05, "loss": 0.0614, "step": 23575 }, { "epoch": 0.41752004498222406, "grad_norm": 0.4206502437591553, "learning_rate": 1.965717867417723e-05, "loss": 0.0793, "step": 23576 }, { "epoch": 0.41753775451925246, "grad_norm": 0.8764514327049255, "learning_rate": 1.965636082081252e-05, "loss": 0.0785, "step": 23577 }, { "epoch": 0.4175554640562809, "grad_norm": 0.46538373827934265, "learning_rate": 1.9655542952128855e-05, "loss": 0.0672, "step": 23578 }, { "epoch": 0.4175731735933093, "grad_norm": 1.0603482723236084, "learning_rate": 1.965472506812892e-05, "loss": 0.0986, "step": 23579 }, { "epoch": 0.41759088313033776, "grad_norm": 0.5144228935241699, "learning_rate": 1.965390716881541e-05, "loss": 0.0474, "step": 23580 }, { "epoch": 0.4176085926673662, "grad_norm": 0.9647369384765625, "learning_rate": 1.965308925419102e-05, "loss": 0.1026, "step": 23581 }, { "epoch": 0.4176263022043946, "grad_norm": 0.36776286363601685, "learning_rate": 1.9652271324258434e-05, "loss": 0.0603, "step": 23582 }, { "epoch": 0.41764401174142307, "grad_norm": 0.49822086095809937, "learning_rate": 1.965145337902035e-05, "loss": 0.0644, "step": 23583 }, { "epoch": 0.41766172127845147, "grad_norm": 0.39401403069496155, "learning_rate": 1.965063541847945e-05, "loss": 0.0645, "step": 23584 }, { "epoch": 0.4176794308154799, "grad_norm": 0.9002789855003357, "learning_rate": 1.9649817442638435e-05, "loss": 0.065, "step": 23585 }, { "epoch": 0.4176971403525083, "grad_norm": 0.7126389741897583, "learning_rate": 1.964899945149999e-05, "loss": 0.0832, "step": 23586 }, { "epoch": 0.4177148498895368, "grad_norm": 0.5388398766517639, "learning_rate": 1.96481814450668e-05, "loss": 0.0859, "step": 23587 }, { "epoch": 0.4177325594265652, "grad_norm": 0.6033867001533508, "learning_rate": 1.9647363423341568e-05, "loss": 0.0778, "step": 23588 }, { "epoch": 0.41775026896359363, "grad_norm": 0.37590909004211426, "learning_rate": 1.9646545386326983e-05, "loss": 0.0462, "step": 23589 }, { "epoch": 0.41776797850062203, "grad_norm": 0.5777350664138794, "learning_rate": 1.964572733402573e-05, "loss": 0.0645, "step": 23590 }, { "epoch": 0.4177856880376505, "grad_norm": 0.7633907198905945, "learning_rate": 1.964490926644051e-05, "loss": 0.0784, "step": 23591 }, { "epoch": 0.4178033975746789, "grad_norm": 0.5593807101249695, "learning_rate": 1.9644091183574e-05, "loss": 0.0931, "step": 23592 }, { "epoch": 0.41782110711170733, "grad_norm": 0.6632445454597473, "learning_rate": 1.9643273085428902e-05, "loss": 0.0792, "step": 23593 }, { "epoch": 0.41783881664873573, "grad_norm": 0.5316735506057739, "learning_rate": 1.9642454972007903e-05, "loss": 0.0591, "step": 23594 }, { "epoch": 0.4178565261857642, "grad_norm": 1.018251657485962, "learning_rate": 1.9641636843313707e-05, "loss": 0.0872, "step": 23595 }, { "epoch": 0.41787423572279264, "grad_norm": 0.5829581022262573, "learning_rate": 1.9640818699348985e-05, "loss": 0.0454, "step": 23596 }, { "epoch": 0.41789194525982104, "grad_norm": 0.6558313965797424, "learning_rate": 1.9640000540116442e-05, "loss": 0.0918, "step": 23597 }, { "epoch": 0.4179096547968495, "grad_norm": 0.7648471593856812, "learning_rate": 1.9639182365618767e-05, "loss": 0.0577, "step": 23598 }, { "epoch": 0.4179273643338779, "grad_norm": 0.49795201420783997, "learning_rate": 1.9638364175858653e-05, "loss": 0.1005, "step": 23599 }, { "epoch": 0.41794507387090635, "grad_norm": 0.5303980708122253, "learning_rate": 1.963754597083879e-05, "loss": 0.0929, "step": 23600 }, { "epoch": 0.41796278340793475, "grad_norm": 0.6104173064231873, "learning_rate": 1.9636727750561863e-05, "loss": 0.0694, "step": 23601 }, { "epoch": 0.4179804929449632, "grad_norm": 0.35857415199279785, "learning_rate": 1.9635909515030578e-05, "loss": 0.0698, "step": 23602 }, { "epoch": 0.4179982024819916, "grad_norm": 0.612730085849762, "learning_rate": 1.9635091264247615e-05, "loss": 0.0745, "step": 23603 }, { "epoch": 0.41801591201902005, "grad_norm": 0.693042516708374, "learning_rate": 1.9634272998215673e-05, "loss": 0.1068, "step": 23604 }, { "epoch": 0.41803362155604845, "grad_norm": 0.6580432057380676, "learning_rate": 1.963345471693744e-05, "loss": 0.0618, "step": 23605 }, { "epoch": 0.4180513310930769, "grad_norm": 0.7457895278930664, "learning_rate": 1.9632636420415612e-05, "loss": 0.0436, "step": 23606 }, { "epoch": 0.4180690406301053, "grad_norm": 0.20969223976135254, "learning_rate": 1.963181810865288e-05, "loss": 0.0972, "step": 23607 }, { "epoch": 0.41808675016713376, "grad_norm": 0.7421050667762756, "learning_rate": 1.963099978165193e-05, "loss": 0.0753, "step": 23608 }, { "epoch": 0.41810445970416216, "grad_norm": 0.6737250089645386, "learning_rate": 1.9630181439415464e-05, "loss": 0.065, "step": 23609 }, { "epoch": 0.4181221692411906, "grad_norm": 0.6480224132537842, "learning_rate": 1.9629363081946166e-05, "loss": 0.1059, "step": 23610 }, { "epoch": 0.41813987877821907, "grad_norm": 0.7149601578712463, "learning_rate": 1.9628544709246732e-05, "loss": 0.0991, "step": 23611 }, { "epoch": 0.41815758831524746, "grad_norm": 0.5496078133583069, "learning_rate": 1.9627726321319854e-05, "loss": 0.0566, "step": 23612 }, { "epoch": 0.4181752978522759, "grad_norm": 0.6764562726020813, "learning_rate": 1.9626907918168223e-05, "loss": 0.0542, "step": 23613 }, { "epoch": 0.4181930073893043, "grad_norm": 1.9141380786895752, "learning_rate": 1.962608949979454e-05, "loss": 0.0754, "step": 23614 }, { "epoch": 0.41821071692633277, "grad_norm": 0.8298581838607788, "learning_rate": 1.9625271066201484e-05, "loss": 0.0457, "step": 23615 }, { "epoch": 0.41822842646336117, "grad_norm": 0.7453315854072571, "learning_rate": 1.9624452617391758e-05, "loss": 0.0965, "step": 23616 }, { "epoch": 0.4182461360003896, "grad_norm": 0.586147129535675, "learning_rate": 1.962363415336804e-05, "loss": 0.1035, "step": 23617 }, { "epoch": 0.418263845537418, "grad_norm": 0.6555935144424438, "learning_rate": 1.9622815674133047e-05, "loss": 0.0905, "step": 23618 }, { "epoch": 0.4182815550744465, "grad_norm": 0.728205144405365, "learning_rate": 1.9621997179689452e-05, "loss": 0.0744, "step": 23619 }, { "epoch": 0.4182992646114749, "grad_norm": 0.5865186452865601, "learning_rate": 1.9621178670039956e-05, "loss": 0.0395, "step": 23620 }, { "epoch": 0.41831697414850333, "grad_norm": 0.5899046659469604, "learning_rate": 1.9620360145187248e-05, "loss": 0.0426, "step": 23621 }, { "epoch": 0.4183346836855317, "grad_norm": 1.015644907951355, "learning_rate": 1.961954160513402e-05, "loss": 0.0585, "step": 23622 }, { "epoch": 0.4183523932225602, "grad_norm": 1.0151374340057373, "learning_rate": 1.961872304988297e-05, "loss": 0.0848, "step": 23623 }, { "epoch": 0.4183701027595886, "grad_norm": 0.5842286348342896, "learning_rate": 1.9617904479436787e-05, "loss": 0.0724, "step": 23624 }, { "epoch": 0.41838781229661703, "grad_norm": 0.5202152729034424, "learning_rate": 1.961708589379817e-05, "loss": 0.0582, "step": 23625 }, { "epoch": 0.4184055218336455, "grad_norm": 0.3817427158355713, "learning_rate": 1.9616267292969805e-05, "loss": 0.0527, "step": 23626 }, { "epoch": 0.4184232313706739, "grad_norm": 0.636379599571228, "learning_rate": 1.9615448676954383e-05, "loss": 0.1069, "step": 23627 }, { "epoch": 0.41844094090770234, "grad_norm": 1.0239614248275757, "learning_rate": 1.9614630045754605e-05, "loss": 0.0856, "step": 23628 }, { "epoch": 0.41845865044473074, "grad_norm": 0.6266937255859375, "learning_rate": 1.961381139937316e-05, "loss": 0.0978, "step": 23629 }, { "epoch": 0.4184763599817592, "grad_norm": 0.9981674551963806, "learning_rate": 1.9612992737812746e-05, "loss": 0.1052, "step": 23630 }, { "epoch": 0.4184940695187876, "grad_norm": 0.5256761312484741, "learning_rate": 1.9612174061076046e-05, "loss": 0.0677, "step": 23631 }, { "epoch": 0.41851177905581605, "grad_norm": 0.8878315687179565, "learning_rate": 1.9611355369165765e-05, "loss": 0.1, "step": 23632 }, { "epoch": 0.41852948859284445, "grad_norm": 0.7941734194755554, "learning_rate": 1.9610536662084587e-05, "loss": 0.0745, "step": 23633 }, { "epoch": 0.4185471981298729, "grad_norm": 0.6143015623092651, "learning_rate": 1.9609717939835215e-05, "loss": 0.0612, "step": 23634 }, { "epoch": 0.4185649076669013, "grad_norm": 0.8772131204605103, "learning_rate": 1.9608899202420334e-05, "loss": 0.0981, "step": 23635 }, { "epoch": 0.41858261720392975, "grad_norm": 0.8132858276367188, "learning_rate": 1.960808044984264e-05, "loss": 0.0716, "step": 23636 }, { "epoch": 0.41860032674095815, "grad_norm": 0.864558219909668, "learning_rate": 1.9607261682104825e-05, "loss": 0.0717, "step": 23637 }, { "epoch": 0.4186180362779866, "grad_norm": 0.8935508131980896, "learning_rate": 1.9606442899209595e-05, "loss": 0.0826, "step": 23638 }, { "epoch": 0.418635745815015, "grad_norm": 1.0917564630508423, "learning_rate": 1.9605624101159623e-05, "loss": 0.1333, "step": 23639 }, { "epoch": 0.41865345535204346, "grad_norm": 1.2416112422943115, "learning_rate": 1.960480528795762e-05, "loss": 0.0699, "step": 23640 }, { "epoch": 0.4186711648890719, "grad_norm": 0.694379985332489, "learning_rate": 1.960398645960627e-05, "loss": 0.0596, "step": 23641 }, { "epoch": 0.4186888744261003, "grad_norm": 0.7985854744911194, "learning_rate": 1.9603167616108275e-05, "loss": 0.1011, "step": 23642 }, { "epoch": 0.41870658396312876, "grad_norm": 0.6194629073143005, "learning_rate": 1.960234875746632e-05, "loss": 0.058, "step": 23643 }, { "epoch": 0.41872429350015716, "grad_norm": 0.5575700998306274, "learning_rate": 1.9601529883683102e-05, "loss": 0.0583, "step": 23644 }, { "epoch": 0.4187420030371856, "grad_norm": 0.7245814204216003, "learning_rate": 1.960071099476132e-05, "loss": 0.0843, "step": 23645 }, { "epoch": 0.418759712574214, "grad_norm": 0.6970334649085999, "learning_rate": 1.9599892090703663e-05, "loss": 0.1206, "step": 23646 }, { "epoch": 0.41877742211124247, "grad_norm": 0.7346437573432922, "learning_rate": 1.959907317151282e-05, "loss": 0.1152, "step": 23647 }, { "epoch": 0.41879513164827087, "grad_norm": 0.28341609239578247, "learning_rate": 1.9598254237191498e-05, "loss": 0.0374, "step": 23648 }, { "epoch": 0.4188128411852993, "grad_norm": 0.9707556366920471, "learning_rate": 1.9597435287742383e-05, "loss": 0.0695, "step": 23649 }, { "epoch": 0.4188305507223277, "grad_norm": 0.8793022632598877, "learning_rate": 1.959661632316817e-05, "loss": 0.0975, "step": 23650 }, { "epoch": 0.4188482602593562, "grad_norm": 0.5879509449005127, "learning_rate": 1.9595797343471557e-05, "loss": 0.0726, "step": 23651 }, { "epoch": 0.4188659697963846, "grad_norm": 0.8591073155403137, "learning_rate": 1.9594978348655235e-05, "loss": 0.0635, "step": 23652 }, { "epoch": 0.41888367933341303, "grad_norm": 0.6729079484939575, "learning_rate": 1.9594159338721897e-05, "loss": 0.0668, "step": 23653 }, { "epoch": 0.4189013888704414, "grad_norm": 0.5954091548919678, "learning_rate": 1.959334031367424e-05, "loss": 0.07, "step": 23654 }, { "epoch": 0.4189190984074699, "grad_norm": 0.5131877064704895, "learning_rate": 1.9592521273514962e-05, "loss": 0.0621, "step": 23655 }, { "epoch": 0.41893680794449834, "grad_norm": 0.6618404984474182, "learning_rate": 1.9591702218246746e-05, "loss": 0.0776, "step": 23656 }, { "epoch": 0.41895451748152673, "grad_norm": 0.4796162247657776, "learning_rate": 1.95908831478723e-05, "loss": 0.0669, "step": 23657 }, { "epoch": 0.4189722270185552, "grad_norm": 0.8144027590751648, "learning_rate": 1.959006406239431e-05, "loss": 0.103, "step": 23658 }, { "epoch": 0.4189899365555836, "grad_norm": 1.2131043672561646, "learning_rate": 1.9589244961815475e-05, "loss": 0.0936, "step": 23659 }, { "epoch": 0.41900764609261204, "grad_norm": 0.5874926447868347, "learning_rate": 1.9588425846138486e-05, "loss": 0.0849, "step": 23660 }, { "epoch": 0.41902535562964044, "grad_norm": 0.47233307361602783, "learning_rate": 1.958760671536604e-05, "loss": 0.0523, "step": 23661 }, { "epoch": 0.4190430651666689, "grad_norm": 0.44353699684143066, "learning_rate": 1.9586787569500838e-05, "loss": 0.0608, "step": 23662 }, { "epoch": 0.4190607747036973, "grad_norm": 0.5284425616264343, "learning_rate": 1.9585968408545564e-05, "loss": 0.0928, "step": 23663 }, { "epoch": 0.41907848424072575, "grad_norm": 0.5108414888381958, "learning_rate": 1.958514923250292e-05, "loss": 0.0539, "step": 23664 }, { "epoch": 0.41909619377775414, "grad_norm": 0.6391441822052002, "learning_rate": 1.9584330041375595e-05, "loss": 0.0743, "step": 23665 }, { "epoch": 0.4191139033147826, "grad_norm": 0.8222335577011108, "learning_rate": 1.958351083516629e-05, "loss": 0.0907, "step": 23666 }, { "epoch": 0.419131612851811, "grad_norm": 1.000919222831726, "learning_rate": 1.9582691613877696e-05, "loss": 0.123, "step": 23667 }, { "epoch": 0.41914932238883945, "grad_norm": 0.42755192518234253, "learning_rate": 1.9581872377512515e-05, "loss": 0.0635, "step": 23668 }, { "epoch": 0.4191670319258679, "grad_norm": 0.5108127593994141, "learning_rate": 1.9581053126073434e-05, "loss": 0.0763, "step": 23669 }, { "epoch": 0.4191847414628963, "grad_norm": 0.4062514007091522, "learning_rate": 1.958023385956315e-05, "loss": 0.0819, "step": 23670 }, { "epoch": 0.41920245099992476, "grad_norm": 0.6886229515075684, "learning_rate": 1.9579414577984366e-05, "loss": 0.0914, "step": 23671 }, { "epoch": 0.41922016053695316, "grad_norm": 0.9569984674453735, "learning_rate": 1.9578595281339763e-05, "loss": 0.0605, "step": 23672 }, { "epoch": 0.4192378700739816, "grad_norm": 0.43706512451171875, "learning_rate": 1.9577775969632048e-05, "loss": 0.0906, "step": 23673 }, { "epoch": 0.41925557961101, "grad_norm": 2.0934150218963623, "learning_rate": 1.9576956642863914e-05, "loss": 0.0711, "step": 23674 }, { "epoch": 0.41927328914803846, "grad_norm": 0.7162663340568542, "learning_rate": 1.9576137301038056e-05, "loss": 0.0674, "step": 23675 }, { "epoch": 0.41929099868506686, "grad_norm": 0.5537905693054199, "learning_rate": 1.9575317944157166e-05, "loss": 0.0789, "step": 23676 }, { "epoch": 0.4193087082220953, "grad_norm": 0.6815620064735413, "learning_rate": 1.9574498572223944e-05, "loss": 0.0735, "step": 23677 }, { "epoch": 0.4193264177591237, "grad_norm": 0.8727263808250427, "learning_rate": 1.9573679185241092e-05, "loss": 0.1094, "step": 23678 }, { "epoch": 0.41934412729615217, "grad_norm": 0.9116560220718384, "learning_rate": 1.9572859783211283e-05, "loss": 0.1307, "step": 23679 }, { "epoch": 0.41936183683318057, "grad_norm": 0.6605243682861328, "learning_rate": 1.957204036613724e-05, "loss": 0.0771, "step": 23680 }, { "epoch": 0.419379546370209, "grad_norm": 0.9386342167854309, "learning_rate": 1.957122093402164e-05, "loss": 0.0577, "step": 23681 }, { "epoch": 0.4193972559072374, "grad_norm": 0.9511741995811462, "learning_rate": 1.9570401486867194e-05, "loss": 0.0493, "step": 23682 }, { "epoch": 0.4194149654442659, "grad_norm": 0.8741879463195801, "learning_rate": 1.956958202467658e-05, "loss": 0.0856, "step": 23683 }, { "epoch": 0.41943267498129433, "grad_norm": 0.5580829381942749, "learning_rate": 1.9568762547452506e-05, "loss": 0.0524, "step": 23684 }, { "epoch": 0.4194503845183227, "grad_norm": 0.5895513892173767, "learning_rate": 1.9567943055197668e-05, "loss": 0.1049, "step": 23685 }, { "epoch": 0.4194680940553512, "grad_norm": 0.6301935911178589, "learning_rate": 1.956712354791476e-05, "loss": 0.0613, "step": 23686 }, { "epoch": 0.4194858035923796, "grad_norm": 0.6746100187301636, "learning_rate": 1.9566304025606474e-05, "loss": 0.0766, "step": 23687 }, { "epoch": 0.41950351312940803, "grad_norm": 0.5420066714286804, "learning_rate": 1.956548448827551e-05, "loss": 0.075, "step": 23688 }, { "epoch": 0.41952122266643643, "grad_norm": 0.7057170271873474, "learning_rate": 1.9564664935924567e-05, "loss": 0.0841, "step": 23689 }, { "epoch": 0.4195389322034649, "grad_norm": 0.9757918119430542, "learning_rate": 1.956384536855633e-05, "loss": 0.0882, "step": 23690 }, { "epoch": 0.4195566417404933, "grad_norm": 0.46749716997146606, "learning_rate": 1.9563025786173515e-05, "loss": 0.0741, "step": 23691 }, { "epoch": 0.41957435127752174, "grad_norm": 0.4294587969779968, "learning_rate": 1.95622061887788e-05, "loss": 0.0917, "step": 23692 }, { "epoch": 0.41959206081455014, "grad_norm": 1.119844675064087, "learning_rate": 1.956138657637489e-05, "loss": 0.0774, "step": 23693 }, { "epoch": 0.4196097703515786, "grad_norm": 0.4055284857749939, "learning_rate": 1.9560566948964476e-05, "loss": 0.0699, "step": 23694 }, { "epoch": 0.419627479888607, "grad_norm": 0.8945107460021973, "learning_rate": 1.955974730655026e-05, "loss": 0.1111, "step": 23695 }, { "epoch": 0.41964518942563545, "grad_norm": 0.5939079523086548, "learning_rate": 1.9558927649134938e-05, "loss": 0.0794, "step": 23696 }, { "epoch": 0.41966289896266384, "grad_norm": 0.4169315993785858, "learning_rate": 1.9558107976721204e-05, "loss": 0.1057, "step": 23697 }, { "epoch": 0.4196806084996923, "grad_norm": 1.0678383111953735, "learning_rate": 1.9557288289311754e-05, "loss": 0.0633, "step": 23698 }, { "epoch": 0.41969831803672075, "grad_norm": 0.4834181070327759, "learning_rate": 1.9556468586909286e-05, "loss": 0.0799, "step": 23699 }, { "epoch": 0.41971602757374915, "grad_norm": 0.8220914006233215, "learning_rate": 1.9555648869516503e-05, "loss": 0.0977, "step": 23700 }, { "epoch": 0.4197337371107776, "grad_norm": 0.5606575608253479, "learning_rate": 1.9554829137136095e-05, "loss": 0.0919, "step": 23701 }, { "epoch": 0.419751446647806, "grad_norm": 0.571502685546875, "learning_rate": 1.9554009389770756e-05, "loss": 0.0914, "step": 23702 }, { "epoch": 0.41976915618483446, "grad_norm": 0.5881478190422058, "learning_rate": 1.9553189627423187e-05, "loss": 0.0677, "step": 23703 }, { "epoch": 0.41978686572186286, "grad_norm": 0.7112909555435181, "learning_rate": 1.9552369850096084e-05, "loss": 0.0842, "step": 23704 }, { "epoch": 0.4198045752588913, "grad_norm": 0.8397397398948669, "learning_rate": 1.9551550057792147e-05, "loss": 0.0957, "step": 23705 }, { "epoch": 0.4198222847959197, "grad_norm": 0.9198829531669617, "learning_rate": 1.955073025051407e-05, "loss": 0.0863, "step": 23706 }, { "epoch": 0.41983999433294816, "grad_norm": 0.54628586769104, "learning_rate": 1.9549910428264552e-05, "loss": 0.0525, "step": 23707 }, { "epoch": 0.41985770386997656, "grad_norm": 0.4188808500766754, "learning_rate": 1.9549090591046284e-05, "loss": 0.0595, "step": 23708 }, { "epoch": 0.419875413407005, "grad_norm": 0.7244288921356201, "learning_rate": 1.9548270738861976e-05, "loss": 0.0839, "step": 23709 }, { "epoch": 0.4198931229440334, "grad_norm": 0.5193143486976624, "learning_rate": 1.9547450871714314e-05, "loss": 0.0581, "step": 23710 }, { "epoch": 0.41991083248106187, "grad_norm": 0.8215347528457642, "learning_rate": 1.9546630989605996e-05, "loss": 0.0784, "step": 23711 }, { "epoch": 0.41992854201809027, "grad_norm": 1.1302521228790283, "learning_rate": 1.954581109253972e-05, "loss": 0.0742, "step": 23712 }, { "epoch": 0.4199462515551187, "grad_norm": 0.6936098337173462, "learning_rate": 1.9544991180518187e-05, "loss": 0.0689, "step": 23713 }, { "epoch": 0.4199639610921472, "grad_norm": 0.5960809588432312, "learning_rate": 1.95441712535441e-05, "loss": 0.0823, "step": 23714 }, { "epoch": 0.4199816706291756, "grad_norm": 0.6413289308547974, "learning_rate": 1.954335131162014e-05, "loss": 0.0633, "step": 23715 }, { "epoch": 0.41999938016620403, "grad_norm": 0.5983454585075378, "learning_rate": 1.954253135474902e-05, "loss": 0.0823, "step": 23716 }, { "epoch": 0.4200170897032324, "grad_norm": 0.6897104382514954, "learning_rate": 1.9541711382933427e-05, "loss": 0.1, "step": 23717 }, { "epoch": 0.4200347992402609, "grad_norm": 0.37563127279281616, "learning_rate": 1.9540891396176066e-05, "loss": 0.0773, "step": 23718 }, { "epoch": 0.4200525087772893, "grad_norm": 0.7071501016616821, "learning_rate": 1.9540071394479632e-05, "loss": 0.0535, "step": 23719 }, { "epoch": 0.42007021831431773, "grad_norm": 1.0076375007629395, "learning_rate": 1.953925137784682e-05, "loss": 0.0987, "step": 23720 }, { "epoch": 0.42008792785134613, "grad_norm": 0.3370887339115143, "learning_rate": 1.953843134628033e-05, "loss": 0.0701, "step": 23721 }, { "epoch": 0.4201056373883746, "grad_norm": 0.9190014600753784, "learning_rate": 1.953761129978286e-05, "loss": 0.0951, "step": 23722 }, { "epoch": 0.420123346925403, "grad_norm": 0.6862201690673828, "learning_rate": 1.953679123835711e-05, "loss": 0.0914, "step": 23723 }, { "epoch": 0.42014105646243144, "grad_norm": 1.000881552696228, "learning_rate": 1.9535971162005773e-05, "loss": 0.0731, "step": 23724 }, { "epoch": 0.42015876599945984, "grad_norm": 0.23065023124217987, "learning_rate": 1.9535151070731552e-05, "loss": 0.0833, "step": 23725 }, { "epoch": 0.4201764755364883, "grad_norm": 0.68197101354599, "learning_rate": 1.953433096453714e-05, "loss": 0.0893, "step": 23726 }, { "epoch": 0.4201941850735167, "grad_norm": 0.5937983989715576, "learning_rate": 1.9533510843425237e-05, "loss": 0.0583, "step": 23727 }, { "epoch": 0.42021189461054514, "grad_norm": 0.5756849646568298, "learning_rate": 1.9532690707398546e-05, "loss": 0.0923, "step": 23728 }, { "epoch": 0.4202296041475736, "grad_norm": 0.6453230977058411, "learning_rate": 1.9531870556459758e-05, "loss": 0.0894, "step": 23729 }, { "epoch": 0.420247313684602, "grad_norm": 0.56158447265625, "learning_rate": 1.9531050390611578e-05, "loss": 0.048, "step": 23730 }, { "epoch": 0.42026502322163045, "grad_norm": 0.6147857904434204, "learning_rate": 1.9530230209856695e-05, "loss": 0.081, "step": 23731 }, { "epoch": 0.42028273275865885, "grad_norm": 0.9185388088226318, "learning_rate": 1.9529410014197814e-05, "loss": 0.0817, "step": 23732 }, { "epoch": 0.4203004422956873, "grad_norm": 0.7973214387893677, "learning_rate": 1.9528589803637638e-05, "loss": 0.094, "step": 23733 }, { "epoch": 0.4203181518327157, "grad_norm": 0.5658504366874695, "learning_rate": 1.952776957817885e-05, "loss": 0.0775, "step": 23734 }, { "epoch": 0.42033586136974416, "grad_norm": 0.7100446224212646, "learning_rate": 1.9526949337824168e-05, "loss": 0.0706, "step": 23735 }, { "epoch": 0.42035357090677256, "grad_norm": 0.6396911144256592, "learning_rate": 1.952612908257627e-05, "loss": 0.0669, "step": 23736 }, { "epoch": 0.420371280443801, "grad_norm": 0.5636816620826721, "learning_rate": 1.9525308812437875e-05, "loss": 0.0782, "step": 23737 }, { "epoch": 0.4203889899808294, "grad_norm": 0.519257128238678, "learning_rate": 1.9524488527411664e-05, "loss": 0.1099, "step": 23738 }, { "epoch": 0.42040669951785786, "grad_norm": 0.9909467101097107, "learning_rate": 1.9523668227500347e-05, "loss": 0.1029, "step": 23739 }, { "epoch": 0.42042440905488626, "grad_norm": 1.0020465850830078, "learning_rate": 1.9522847912706624e-05, "loss": 0.0797, "step": 23740 }, { "epoch": 0.4204421185919147, "grad_norm": 0.989504337310791, "learning_rate": 1.952202758303318e-05, "loss": 0.0938, "step": 23741 }, { "epoch": 0.4204598281289431, "grad_norm": 0.9714129567146301, "learning_rate": 1.952120723848273e-05, "loss": 0.0757, "step": 23742 }, { "epoch": 0.42047753766597157, "grad_norm": 0.5604074001312256, "learning_rate": 1.9520386879057957e-05, "loss": 0.0633, "step": 23743 }, { "epoch": 0.420495247203, "grad_norm": 1.0132874250411987, "learning_rate": 1.9519566504761576e-05, "loss": 0.0857, "step": 23744 }, { "epoch": 0.4205129567400284, "grad_norm": 0.5294745564460754, "learning_rate": 1.9518746115596276e-05, "loss": 0.0595, "step": 23745 }, { "epoch": 0.4205306662770569, "grad_norm": 0.8064154982566833, "learning_rate": 1.951792571156476e-05, "loss": 0.0828, "step": 23746 }, { "epoch": 0.4205483758140853, "grad_norm": 0.6413056254386902, "learning_rate": 1.9517105292669718e-05, "loss": 0.0513, "step": 23747 }, { "epoch": 0.4205660853511137, "grad_norm": 0.7698249220848083, "learning_rate": 1.9516284858913866e-05, "loss": 0.0765, "step": 23748 }, { "epoch": 0.4205837948881421, "grad_norm": 0.5469348430633545, "learning_rate": 1.951546441029989e-05, "loss": 0.0704, "step": 23749 }, { "epoch": 0.4206015044251706, "grad_norm": 0.47719311714172363, "learning_rate": 1.9514643946830493e-05, "loss": 0.075, "step": 23750 }, { "epoch": 0.420619213962199, "grad_norm": 0.7238662242889404, "learning_rate": 1.9513823468508372e-05, "loss": 0.083, "step": 23751 }, { "epoch": 0.42063692349922743, "grad_norm": 1.067790150642395, "learning_rate": 1.9513002975336226e-05, "loss": 0.099, "step": 23752 }, { "epoch": 0.42065463303625583, "grad_norm": 0.575264573097229, "learning_rate": 1.9512182467316764e-05, "loss": 0.0932, "step": 23753 }, { "epoch": 0.4206723425732843, "grad_norm": 0.6717340350151062, "learning_rate": 1.9511361944452674e-05, "loss": 0.0524, "step": 23754 }, { "epoch": 0.4206900521103127, "grad_norm": 0.8460087180137634, "learning_rate": 1.9510541406746662e-05, "loss": 0.1102, "step": 23755 }, { "epoch": 0.42070776164734114, "grad_norm": 0.42254531383514404, "learning_rate": 1.950972085420142e-05, "loss": 0.0627, "step": 23756 }, { "epoch": 0.42072547118436954, "grad_norm": 0.32469359040260315, "learning_rate": 1.9508900286819656e-05, "loss": 0.0652, "step": 23757 }, { "epoch": 0.420743180721398, "grad_norm": 0.5203328132629395, "learning_rate": 1.9508079704604065e-05, "loss": 0.0627, "step": 23758 }, { "epoch": 0.42076089025842645, "grad_norm": 0.8962687849998474, "learning_rate": 1.950725910755735e-05, "loss": 0.0904, "step": 23759 }, { "epoch": 0.42077859979545484, "grad_norm": 0.7548952102661133, "learning_rate": 1.9506438495682204e-05, "loss": 0.0859, "step": 23760 }, { "epoch": 0.4207963093324833, "grad_norm": 0.7282292246818542, "learning_rate": 1.9505617868981334e-05, "loss": 0.0683, "step": 23761 }, { "epoch": 0.4208140188695117, "grad_norm": 0.7679247856140137, "learning_rate": 1.9504797227457442e-05, "loss": 0.0967, "step": 23762 }, { "epoch": 0.42083172840654015, "grad_norm": 0.8710014224052429, "learning_rate": 1.9503976571113214e-05, "loss": 0.1066, "step": 23763 }, { "epoch": 0.42084943794356855, "grad_norm": 0.9777283072471619, "learning_rate": 1.9503155899951365e-05, "loss": 0.0879, "step": 23764 }, { "epoch": 0.420867147480597, "grad_norm": 0.6112900376319885, "learning_rate": 1.9502335213974584e-05, "loss": 0.1155, "step": 23765 }, { "epoch": 0.4208848570176254, "grad_norm": 0.8701276779174805, "learning_rate": 1.9501514513185575e-05, "loss": 0.0903, "step": 23766 }, { "epoch": 0.42090256655465386, "grad_norm": 0.5581190586090088, "learning_rate": 1.9500693797587042e-05, "loss": 0.0746, "step": 23767 }, { "epoch": 0.42092027609168225, "grad_norm": 0.5813480615615845, "learning_rate": 1.9499873067181682e-05, "loss": 0.0718, "step": 23768 }, { "epoch": 0.4209379856287107, "grad_norm": 0.48113858699798584, "learning_rate": 1.9499052321972192e-05, "loss": 0.1136, "step": 23769 }, { "epoch": 0.4209556951657391, "grad_norm": 0.5098552703857422, "learning_rate": 1.9498231561961275e-05, "loss": 0.1, "step": 23770 }, { "epoch": 0.42097340470276756, "grad_norm": 0.7452232837677002, "learning_rate": 1.9497410787151633e-05, "loss": 0.0575, "step": 23771 }, { "epoch": 0.42099111423979596, "grad_norm": 0.8096737265586853, "learning_rate": 1.9496589997545964e-05, "loss": 0.077, "step": 23772 }, { "epoch": 0.4210088237768244, "grad_norm": 0.5178524255752563, "learning_rate": 1.9495769193146967e-05, "loss": 0.0794, "step": 23773 }, { "epoch": 0.42102653331385287, "grad_norm": 0.35645535588264465, "learning_rate": 1.9494948373957345e-05, "loss": 0.1068, "step": 23774 }, { "epoch": 0.42104424285088127, "grad_norm": 0.7161682844161987, "learning_rate": 1.9494127539979798e-05, "loss": 0.0944, "step": 23775 }, { "epoch": 0.4210619523879097, "grad_norm": 0.8681582808494568, "learning_rate": 1.9493306691217027e-05, "loss": 0.1348, "step": 23776 }, { "epoch": 0.4210796619249381, "grad_norm": 0.5678898692131042, "learning_rate": 1.949248582767173e-05, "loss": 0.063, "step": 23777 }, { "epoch": 0.4210973714619666, "grad_norm": 0.5864291787147522, "learning_rate": 1.949166494934661e-05, "loss": 0.0824, "step": 23778 }, { "epoch": 0.421115080998995, "grad_norm": 0.723953366279602, "learning_rate": 1.9490844056244367e-05, "loss": 0.0877, "step": 23779 }, { "epoch": 0.4211327905360234, "grad_norm": 0.6097089648246765, "learning_rate": 1.9490023148367698e-05, "loss": 0.0604, "step": 23780 }, { "epoch": 0.4211505000730518, "grad_norm": 0.8156062364578247, "learning_rate": 1.9489202225719314e-05, "loss": 0.0488, "step": 23781 }, { "epoch": 0.4211682096100803, "grad_norm": 0.45425501465797424, "learning_rate": 1.94883812883019e-05, "loss": 0.0765, "step": 23782 }, { "epoch": 0.4211859191471087, "grad_norm": 0.776626467704773, "learning_rate": 1.948756033611817e-05, "loss": 0.0706, "step": 23783 }, { "epoch": 0.42120362868413713, "grad_norm": 0.7886282801628113, "learning_rate": 1.948673936917082e-05, "loss": 0.0653, "step": 23784 }, { "epoch": 0.42122133822116553, "grad_norm": 0.6096362471580505, "learning_rate": 1.9485918387462552e-05, "loss": 0.0898, "step": 23785 }, { "epoch": 0.421239047758194, "grad_norm": 0.6229512691497803, "learning_rate": 1.948509739099606e-05, "loss": 0.0937, "step": 23786 }, { "epoch": 0.4212567572952224, "grad_norm": 0.5932336449623108, "learning_rate": 1.948427637977406e-05, "loss": 0.0571, "step": 23787 }, { "epoch": 0.42127446683225084, "grad_norm": 0.9356570839881897, "learning_rate": 1.9483455353799243e-05, "loss": 0.0963, "step": 23788 }, { "epoch": 0.4212921763692793, "grad_norm": 0.710150420665741, "learning_rate": 1.9482634313074306e-05, "loss": 0.1081, "step": 23789 }, { "epoch": 0.4213098859063077, "grad_norm": 0.859775722026825, "learning_rate": 1.948181325760196e-05, "loss": 0.0644, "step": 23790 }, { "epoch": 0.42132759544333614, "grad_norm": 0.974413275718689, "learning_rate": 1.9480992187384898e-05, "loss": 0.1179, "step": 23791 }, { "epoch": 0.42134530498036454, "grad_norm": 0.6435611844062805, "learning_rate": 1.948017110242583e-05, "loss": 0.0906, "step": 23792 }, { "epoch": 0.421363014517393, "grad_norm": 0.5807059407234192, "learning_rate": 1.947935000272745e-05, "loss": 0.0583, "step": 23793 }, { "epoch": 0.4213807240544214, "grad_norm": 0.525818943977356, "learning_rate": 1.947852888829246e-05, "loss": 0.0812, "step": 23794 }, { "epoch": 0.42139843359144985, "grad_norm": 1.0509867668151855, "learning_rate": 1.9477707759123562e-05, "loss": 0.076, "step": 23795 }, { "epoch": 0.42141614312847825, "grad_norm": 0.8810983300209045, "learning_rate": 1.9476886615223463e-05, "loss": 0.1105, "step": 23796 }, { "epoch": 0.4214338526655067, "grad_norm": 0.6855149865150452, "learning_rate": 1.9476065456594857e-05, "loss": 0.0959, "step": 23797 }, { "epoch": 0.4214515622025351, "grad_norm": 0.6700144410133362, "learning_rate": 1.9475244283240448e-05, "loss": 0.0667, "step": 23798 }, { "epoch": 0.42146927173956356, "grad_norm": 0.503399670124054, "learning_rate": 1.947442309516294e-05, "loss": 0.1233, "step": 23799 }, { "epoch": 0.42148698127659195, "grad_norm": 0.4186426103115082, "learning_rate": 1.9473601892365028e-05, "loss": 0.0805, "step": 23800 }, { "epoch": 0.4215046908136204, "grad_norm": 0.9717091917991638, "learning_rate": 1.9472780674849428e-05, "loss": 0.0932, "step": 23801 }, { "epoch": 0.4215224003506488, "grad_norm": 0.7807286381721497, "learning_rate": 1.9471959442618822e-05, "loss": 0.1388, "step": 23802 }, { "epoch": 0.42154010988767726, "grad_norm": 0.6023477911949158, "learning_rate": 1.9471138195675925e-05, "loss": 0.0787, "step": 23803 }, { "epoch": 0.4215578194247057, "grad_norm": 0.6841499209403992, "learning_rate": 1.9470316934023437e-05, "loss": 0.0857, "step": 23804 }, { "epoch": 0.4215755289617341, "grad_norm": 0.480277955532074, "learning_rate": 1.9469495657664055e-05, "loss": 0.0621, "step": 23805 }, { "epoch": 0.42159323849876257, "grad_norm": 0.5353644490242004, "learning_rate": 1.946867436660049e-05, "loss": 0.0477, "step": 23806 }, { "epoch": 0.42161094803579097, "grad_norm": 1.0033222436904907, "learning_rate": 1.9467853060835433e-05, "loss": 0.0818, "step": 23807 }, { "epoch": 0.4216286575728194, "grad_norm": 0.5731155276298523, "learning_rate": 1.9467031740371594e-05, "loss": 0.0879, "step": 23808 }, { "epoch": 0.4216463671098478, "grad_norm": 0.7029039859771729, "learning_rate": 1.946621040521167e-05, "loss": 0.0762, "step": 23809 }, { "epoch": 0.4216640766468763, "grad_norm": 0.7009975910186768, "learning_rate": 1.9465389055358365e-05, "loss": 0.0934, "step": 23810 }, { "epoch": 0.42168178618390467, "grad_norm": 0.484842449426651, "learning_rate": 1.9464567690814386e-05, "loss": 0.0888, "step": 23811 }, { "epoch": 0.4216994957209331, "grad_norm": 0.8633050322532654, "learning_rate": 1.946374631158243e-05, "loss": 0.1215, "step": 23812 }, { "epoch": 0.4217172052579615, "grad_norm": 0.6478766798973083, "learning_rate": 1.9462924917665197e-05, "loss": 0.0767, "step": 23813 }, { "epoch": 0.42173491479499, "grad_norm": 0.9976066946983337, "learning_rate": 1.946210350906539e-05, "loss": 0.088, "step": 23814 }, { "epoch": 0.4217526243320184, "grad_norm": 0.45641931891441345, "learning_rate": 1.9461282085785722e-05, "loss": 0.0565, "step": 23815 }, { "epoch": 0.42177033386904683, "grad_norm": 0.6535177230834961, "learning_rate": 1.946046064782888e-05, "loss": 0.061, "step": 23816 }, { "epoch": 0.42178804340607523, "grad_norm": 0.479844331741333, "learning_rate": 1.945963919519758e-05, "loss": 0.0651, "step": 23817 }, { "epoch": 0.4218057529431037, "grad_norm": 0.6260796785354614, "learning_rate": 1.945881772789451e-05, "loss": 0.0884, "step": 23818 }, { "epoch": 0.42182346248013214, "grad_norm": 0.5500917434692383, "learning_rate": 1.9457996245922387e-05, "loss": 0.0531, "step": 23819 }, { "epoch": 0.42184117201716054, "grad_norm": 0.8278864026069641, "learning_rate": 1.9457174749283908e-05, "loss": 0.0756, "step": 23820 }, { "epoch": 0.421858881554189, "grad_norm": 0.45644375681877136, "learning_rate": 1.945635323798177e-05, "loss": 0.0588, "step": 23821 }, { "epoch": 0.4218765910912174, "grad_norm": 0.675864040851593, "learning_rate": 1.9455531712018682e-05, "loss": 0.0651, "step": 23822 }, { "epoch": 0.42189430062824584, "grad_norm": 0.8710344433784485, "learning_rate": 1.9454710171397347e-05, "loss": 0.0949, "step": 23823 }, { "epoch": 0.42191201016527424, "grad_norm": 0.6671717762947083, "learning_rate": 1.9453888616120467e-05, "loss": 0.0799, "step": 23824 }, { "epoch": 0.4219297197023027, "grad_norm": 0.5478869080543518, "learning_rate": 1.945306704619074e-05, "loss": 0.0521, "step": 23825 }, { "epoch": 0.4219474292393311, "grad_norm": 0.7309076189994812, "learning_rate": 1.945224546161088e-05, "loss": 0.0881, "step": 23826 }, { "epoch": 0.42196513877635955, "grad_norm": 0.47753438353538513, "learning_rate": 1.9451423862383574e-05, "loss": 0.0521, "step": 23827 }, { "epoch": 0.42198284831338795, "grad_norm": 0.7794350981712341, "learning_rate": 1.9450602248511537e-05, "loss": 0.0894, "step": 23828 }, { "epoch": 0.4220005578504164, "grad_norm": 0.8373096585273743, "learning_rate": 1.9449780619997472e-05, "loss": 0.0957, "step": 23829 }, { "epoch": 0.4220182673874448, "grad_norm": 1.0939295291900635, "learning_rate": 1.944895897684408e-05, "loss": 0.1189, "step": 23830 }, { "epoch": 0.42203597692447326, "grad_norm": 0.3374170958995819, "learning_rate": 1.9448137319054057e-05, "loss": 0.1329, "step": 23831 }, { "epoch": 0.42205368646150165, "grad_norm": 0.8750079274177551, "learning_rate": 1.944731564663011e-05, "loss": 0.0855, "step": 23832 }, { "epoch": 0.4220713959985301, "grad_norm": 0.9051545262336731, "learning_rate": 1.944649395957495e-05, "loss": 0.0649, "step": 23833 }, { "epoch": 0.42208910553555856, "grad_norm": 0.269014835357666, "learning_rate": 1.9445672257891272e-05, "loss": 0.0625, "step": 23834 }, { "epoch": 0.42210681507258696, "grad_norm": 0.5870257616043091, "learning_rate": 1.9444850541581785e-05, "loss": 0.0614, "step": 23835 }, { "epoch": 0.4221245246096154, "grad_norm": 0.6001025438308716, "learning_rate": 1.9444028810649188e-05, "loss": 0.0899, "step": 23836 }, { "epoch": 0.4221422341466438, "grad_norm": 0.6403553485870361, "learning_rate": 1.9443207065096182e-05, "loss": 0.0658, "step": 23837 }, { "epoch": 0.42215994368367227, "grad_norm": 0.9183845520019531, "learning_rate": 1.9442385304925482e-05, "loss": 0.1254, "step": 23838 }, { "epoch": 0.42217765322070067, "grad_norm": 0.5786135196685791, "learning_rate": 1.9441563530139777e-05, "loss": 0.053, "step": 23839 }, { "epoch": 0.4221953627577291, "grad_norm": 0.7495977878570557, "learning_rate": 1.9440741740741782e-05, "loss": 0.0907, "step": 23840 }, { "epoch": 0.4222130722947575, "grad_norm": 0.7630795836448669, "learning_rate": 1.943991993673419e-05, "loss": 0.085, "step": 23841 }, { "epoch": 0.422230781831786, "grad_norm": 0.7032306790351868, "learning_rate": 1.9439098118119713e-05, "loss": 0.0956, "step": 23842 }, { "epoch": 0.42224849136881437, "grad_norm": 1.0840023756027222, "learning_rate": 1.943827628490105e-05, "loss": 0.0527, "step": 23843 }, { "epoch": 0.4222662009058428, "grad_norm": 0.5469158291816711, "learning_rate": 1.9437454437080912e-05, "loss": 0.0648, "step": 23844 }, { "epoch": 0.4222839104428712, "grad_norm": 0.6258965730667114, "learning_rate": 1.9436632574662e-05, "loss": 0.103, "step": 23845 }, { "epoch": 0.4223016199798997, "grad_norm": 0.8248336315155029, "learning_rate": 1.9435810697647013e-05, "loss": 0.0924, "step": 23846 }, { "epoch": 0.4223193295169281, "grad_norm": 0.6531941294670105, "learning_rate": 1.943498880603865e-05, "loss": 0.0603, "step": 23847 }, { "epoch": 0.42233703905395653, "grad_norm": 0.48462367057800293, "learning_rate": 1.943416689983963e-05, "loss": 0.0527, "step": 23848 }, { "epoch": 0.422354748590985, "grad_norm": 0.9347237944602966, "learning_rate": 1.9433344979052647e-05, "loss": 0.0844, "step": 23849 }, { "epoch": 0.4223724581280134, "grad_norm": 0.9400871396064758, "learning_rate": 1.9432523043680405e-05, "loss": 0.0835, "step": 23850 }, { "epoch": 0.42239016766504184, "grad_norm": 0.7681233882904053, "learning_rate": 1.9431701093725614e-05, "loss": 0.077, "step": 23851 }, { "epoch": 0.42240787720207024, "grad_norm": 0.5134989619255066, "learning_rate": 1.9430879129190976e-05, "loss": 0.0869, "step": 23852 }, { "epoch": 0.4224255867390987, "grad_norm": 0.8661432862281799, "learning_rate": 1.9430057150079193e-05, "loss": 0.0826, "step": 23853 }, { "epoch": 0.4224432962761271, "grad_norm": 0.9966904520988464, "learning_rate": 1.9429235156392968e-05, "loss": 0.0862, "step": 23854 }, { "epoch": 0.42246100581315554, "grad_norm": 0.7246181964874268, "learning_rate": 1.9428413148135004e-05, "loss": 0.098, "step": 23855 }, { "epoch": 0.42247871535018394, "grad_norm": 0.9381176829338074, "learning_rate": 1.9427591125308017e-05, "loss": 0.0461, "step": 23856 }, { "epoch": 0.4224964248872124, "grad_norm": 0.3207843005657196, "learning_rate": 1.942676908791469e-05, "loss": 0.0583, "step": 23857 }, { "epoch": 0.4225141344242408, "grad_norm": 0.6963847279548645, "learning_rate": 1.9425947035957757e-05, "loss": 0.057, "step": 23858 }, { "epoch": 0.42253184396126925, "grad_norm": 0.9344499707221985, "learning_rate": 1.9425124969439895e-05, "loss": 0.054, "step": 23859 }, { "epoch": 0.42254955349829765, "grad_norm": 0.5204786062240601, "learning_rate": 1.9424302888363827e-05, "loss": 0.0902, "step": 23860 }, { "epoch": 0.4225672630353261, "grad_norm": 0.5592592358589172, "learning_rate": 1.9423480792732243e-05, "loss": 0.0836, "step": 23861 }, { "epoch": 0.4225849725723545, "grad_norm": 0.7567208409309387, "learning_rate": 1.9422658682547854e-05, "loss": 0.1007, "step": 23862 }, { "epoch": 0.42260268210938295, "grad_norm": 0.9173900485038757, "learning_rate": 1.942183655781337e-05, "loss": 0.1446, "step": 23863 }, { "epoch": 0.4226203916464114, "grad_norm": 0.8128861784934998, "learning_rate": 1.942101441853149e-05, "loss": 0.0819, "step": 23864 }, { "epoch": 0.4226381011834398, "grad_norm": 0.9818291068077087, "learning_rate": 1.9420192264704917e-05, "loss": 0.0956, "step": 23865 }, { "epoch": 0.42265581072046826, "grad_norm": 0.6070456504821777, "learning_rate": 1.941937009633636e-05, "loss": 0.081, "step": 23866 }, { "epoch": 0.42267352025749666, "grad_norm": 0.47652631998062134, "learning_rate": 1.9418547913428525e-05, "loss": 0.0597, "step": 23867 }, { "epoch": 0.4226912297945251, "grad_norm": 0.7350487112998962, "learning_rate": 1.9417725715984113e-05, "loss": 0.0783, "step": 23868 }, { "epoch": 0.4227089393315535, "grad_norm": 0.7157383561134338, "learning_rate": 1.9416903504005832e-05, "loss": 0.0883, "step": 23869 }, { "epoch": 0.42272664886858197, "grad_norm": 0.7119336724281311, "learning_rate": 1.941608127749638e-05, "loss": 0.0752, "step": 23870 }, { "epoch": 0.42274435840561037, "grad_norm": 0.7864252328872681, "learning_rate": 1.941525903645847e-05, "loss": 0.1082, "step": 23871 }, { "epoch": 0.4227620679426388, "grad_norm": 0.6706308722496033, "learning_rate": 1.9414436780894806e-05, "loss": 0.1123, "step": 23872 }, { "epoch": 0.4227797774796672, "grad_norm": 0.6750450134277344, "learning_rate": 1.9413614510808086e-05, "loss": 0.065, "step": 23873 }, { "epoch": 0.42279748701669567, "grad_norm": 1.0336377620697021, "learning_rate": 1.941279222620103e-05, "loss": 0.1043, "step": 23874 }, { "epoch": 0.42281519655372407, "grad_norm": 0.6834905743598938, "learning_rate": 1.9411969927076322e-05, "loss": 0.0769, "step": 23875 }, { "epoch": 0.4228329060907525, "grad_norm": 0.905848503112793, "learning_rate": 1.941114761343669e-05, "loss": 0.0646, "step": 23876 }, { "epoch": 0.4228506156277809, "grad_norm": 0.5486182570457458, "learning_rate": 1.941032528528482e-05, "loss": 0.0417, "step": 23877 }, { "epoch": 0.4228683251648094, "grad_norm": 0.6647774577140808, "learning_rate": 1.9409502942623434e-05, "loss": 0.0892, "step": 23878 }, { "epoch": 0.42288603470183783, "grad_norm": 0.6414440274238586, "learning_rate": 1.9408680585455226e-05, "loss": 0.0755, "step": 23879 }, { "epoch": 0.42290374423886623, "grad_norm": 0.7634661197662354, "learning_rate": 1.94078582137829e-05, "loss": 0.0993, "step": 23880 }, { "epoch": 0.4229214537758947, "grad_norm": 1.0032994747161865, "learning_rate": 1.9407035827609172e-05, "loss": 0.0894, "step": 23881 }, { "epoch": 0.4229391633129231, "grad_norm": 0.726354718208313, "learning_rate": 1.9406213426936738e-05, "loss": 0.0919, "step": 23882 }, { "epoch": 0.42295687284995154, "grad_norm": 0.617233395576477, "learning_rate": 1.940539101176831e-05, "loss": 0.0682, "step": 23883 }, { "epoch": 0.42297458238697994, "grad_norm": 0.6131599545478821, "learning_rate": 1.940456858210659e-05, "loss": 0.0726, "step": 23884 }, { "epoch": 0.4229922919240084, "grad_norm": 0.7905923128128052, "learning_rate": 1.9403746137954285e-05, "loss": 0.0763, "step": 23885 }, { "epoch": 0.4230100014610368, "grad_norm": 0.6643926501274109, "learning_rate": 1.9402923679314105e-05, "loss": 0.0802, "step": 23886 }, { "epoch": 0.42302771099806524, "grad_norm": 0.6313215494155884, "learning_rate": 1.9402101206188746e-05, "loss": 0.073, "step": 23887 }, { "epoch": 0.42304542053509364, "grad_norm": 1.1017969846725464, "learning_rate": 1.9401278718580922e-05, "loss": 0.1239, "step": 23888 }, { "epoch": 0.4230631300721221, "grad_norm": 0.6957409381866455, "learning_rate": 1.9400456216493335e-05, "loss": 0.049, "step": 23889 }, { "epoch": 0.4230808396091505, "grad_norm": 0.6547967195510864, "learning_rate": 1.939963369992869e-05, "loss": 0.0956, "step": 23890 }, { "epoch": 0.42309854914617895, "grad_norm": 1.0913872718811035, "learning_rate": 1.93988111688897e-05, "loss": 0.1025, "step": 23891 }, { "epoch": 0.42311625868320735, "grad_norm": 0.8447481989860535, "learning_rate": 1.9397988623379065e-05, "loss": 0.0592, "step": 23892 }, { "epoch": 0.4231339682202358, "grad_norm": 0.7362576127052307, "learning_rate": 1.939716606339949e-05, "loss": 0.084, "step": 23893 }, { "epoch": 0.42315167775726426, "grad_norm": 0.7181573510169983, "learning_rate": 1.939634348895368e-05, "loss": 0.062, "step": 23894 }, { "epoch": 0.42316938729429265, "grad_norm": 0.6041368246078491, "learning_rate": 1.939552090004435e-05, "loss": 0.0676, "step": 23895 }, { "epoch": 0.4231870968313211, "grad_norm": 0.6500610113143921, "learning_rate": 1.93946982966742e-05, "loss": 0.0585, "step": 23896 }, { "epoch": 0.4232048063683495, "grad_norm": 0.8356122374534607, "learning_rate": 1.939387567884594e-05, "loss": 0.0779, "step": 23897 }, { "epoch": 0.42322251590537796, "grad_norm": 0.6081557273864746, "learning_rate": 1.9393053046562266e-05, "loss": 0.067, "step": 23898 }, { "epoch": 0.42324022544240636, "grad_norm": 0.5025218725204468, "learning_rate": 1.9392230399825893e-05, "loss": 0.0843, "step": 23899 }, { "epoch": 0.4232579349794348, "grad_norm": 0.7006725072860718, "learning_rate": 1.9391407738639535e-05, "loss": 0.0739, "step": 23900 }, { "epoch": 0.4232756445164632, "grad_norm": 0.5734977722167969, "learning_rate": 1.939058506300588e-05, "loss": 0.0552, "step": 23901 }, { "epoch": 0.42329335405349167, "grad_norm": 0.7281814813613892, "learning_rate": 1.9389762372927652e-05, "loss": 0.0659, "step": 23902 }, { "epoch": 0.42331106359052006, "grad_norm": 0.2649520933628082, "learning_rate": 1.9388939668407546e-05, "loss": 0.0535, "step": 23903 }, { "epoch": 0.4233287731275485, "grad_norm": 0.4379211962223053, "learning_rate": 1.938811694944827e-05, "loss": 0.0483, "step": 23904 }, { "epoch": 0.4233464826645769, "grad_norm": 0.9870124459266663, "learning_rate": 1.9387294216052535e-05, "loss": 0.1115, "step": 23905 }, { "epoch": 0.42336419220160537, "grad_norm": 0.7718234062194824, "learning_rate": 1.9386471468223048e-05, "loss": 0.0871, "step": 23906 }, { "epoch": 0.42338190173863377, "grad_norm": 0.8006581664085388, "learning_rate": 1.9385648705962514e-05, "loss": 0.0571, "step": 23907 }, { "epoch": 0.4233996112756622, "grad_norm": 0.6539360284805298, "learning_rate": 1.938482592927364e-05, "loss": 0.0774, "step": 23908 }, { "epoch": 0.4234173208126907, "grad_norm": 0.632744312286377, "learning_rate": 1.9384003138159125e-05, "loss": 0.0677, "step": 23909 }, { "epoch": 0.4234350303497191, "grad_norm": 0.9548075199127197, "learning_rate": 1.9383180332621686e-05, "loss": 0.091, "step": 23910 }, { "epoch": 0.42345273988674753, "grad_norm": 0.7611828446388245, "learning_rate": 1.938235751266403e-05, "loss": 0.0712, "step": 23911 }, { "epoch": 0.42347044942377593, "grad_norm": 0.8129559755325317, "learning_rate": 1.9381534678288857e-05, "loss": 0.101, "step": 23912 }, { "epoch": 0.4234881589608044, "grad_norm": 0.49541616439819336, "learning_rate": 1.938071182949888e-05, "loss": 0.0658, "step": 23913 }, { "epoch": 0.4235058684978328, "grad_norm": 0.8373169898986816, "learning_rate": 1.9379888966296803e-05, "loss": 0.0984, "step": 23914 }, { "epoch": 0.42352357803486124, "grad_norm": 0.7303729057312012, "learning_rate": 1.9379066088685338e-05, "loss": 0.0634, "step": 23915 }, { "epoch": 0.42354128757188964, "grad_norm": 0.43338581919670105, "learning_rate": 1.937824319666719e-05, "loss": 0.0532, "step": 23916 }, { "epoch": 0.4235589971089181, "grad_norm": 0.4586799442768097, "learning_rate": 1.937742029024506e-05, "loss": 0.0665, "step": 23917 }, { "epoch": 0.4235767066459465, "grad_norm": 0.825148344039917, "learning_rate": 1.937659736942166e-05, "loss": 0.0728, "step": 23918 }, { "epoch": 0.42359441618297494, "grad_norm": 0.7724465131759644, "learning_rate": 1.9375774434199698e-05, "loss": 0.0501, "step": 23919 }, { "epoch": 0.42361212572000334, "grad_norm": 0.5378347039222717, "learning_rate": 1.9374951484581883e-05, "loss": 0.0814, "step": 23920 }, { "epoch": 0.4236298352570318, "grad_norm": 0.7443909049034119, "learning_rate": 1.9374128520570913e-05, "loss": 0.1128, "step": 23921 }, { "epoch": 0.42364754479406025, "grad_norm": 0.5686814188957214, "learning_rate": 1.937330554216951e-05, "loss": 0.0826, "step": 23922 }, { "epoch": 0.42366525433108865, "grad_norm": 0.9037157893180847, "learning_rate": 1.937248254938037e-05, "loss": 0.0698, "step": 23923 }, { "epoch": 0.4236829638681171, "grad_norm": 0.9038273692131042, "learning_rate": 1.9371659542206207e-05, "loss": 0.1123, "step": 23924 }, { "epoch": 0.4237006734051455, "grad_norm": 1.1442509889602661, "learning_rate": 1.937083652064973e-05, "loss": 0.0846, "step": 23925 }, { "epoch": 0.42371838294217395, "grad_norm": 0.4507974088191986, "learning_rate": 1.9370013484713637e-05, "loss": 0.0733, "step": 23926 }, { "epoch": 0.42373609247920235, "grad_norm": 0.5903588533401489, "learning_rate": 1.936919043440064e-05, "loss": 0.0889, "step": 23927 }, { "epoch": 0.4237538020162308, "grad_norm": 1.0085629224777222, "learning_rate": 1.936836736971345e-05, "loss": 0.0798, "step": 23928 }, { "epoch": 0.4237715115532592, "grad_norm": 0.6574936509132385, "learning_rate": 1.936754429065478e-05, "loss": 0.0606, "step": 23929 }, { "epoch": 0.42378922109028766, "grad_norm": 0.9857378005981445, "learning_rate": 1.9366721197227325e-05, "loss": 0.1234, "step": 23930 }, { "epoch": 0.42380693062731606, "grad_norm": 0.7461755275726318, "learning_rate": 1.93658980894338e-05, "loss": 0.0507, "step": 23931 }, { "epoch": 0.4238246401643445, "grad_norm": 0.43738943338394165, "learning_rate": 1.9365074967276912e-05, "loss": 0.1019, "step": 23932 }, { "epoch": 0.4238423497013729, "grad_norm": 0.7943410873413086, "learning_rate": 1.9364251830759367e-05, "loss": 0.0958, "step": 23933 }, { "epoch": 0.42386005923840137, "grad_norm": 0.26709121465682983, "learning_rate": 1.936342867988388e-05, "loss": 0.0697, "step": 23934 }, { "epoch": 0.42387776877542976, "grad_norm": 3.052215814590454, "learning_rate": 1.9362605514653146e-05, "loss": 0.0921, "step": 23935 }, { "epoch": 0.4238954783124582, "grad_norm": 0.6239018440246582, "learning_rate": 1.936178233506989e-05, "loss": 0.0518, "step": 23936 }, { "epoch": 0.4239131878494867, "grad_norm": 0.6506986021995544, "learning_rate": 1.9360959141136808e-05, "loss": 0.0945, "step": 23937 }, { "epoch": 0.42393089738651507, "grad_norm": 1.5588781833648682, "learning_rate": 1.9360135932856606e-05, "loss": 0.0575, "step": 23938 }, { "epoch": 0.4239486069235435, "grad_norm": 0.9046158790588379, "learning_rate": 1.9359312710232006e-05, "loss": 0.1223, "step": 23939 }, { "epoch": 0.4239663164605719, "grad_norm": 0.7688039541244507, "learning_rate": 1.93584894732657e-05, "loss": 0.0762, "step": 23940 }, { "epoch": 0.4239840259976004, "grad_norm": 0.6766766309738159, "learning_rate": 1.9357666221960413e-05, "loss": 0.0805, "step": 23941 }, { "epoch": 0.4240017355346288, "grad_norm": 1.0633383989334106, "learning_rate": 1.9356842956318838e-05, "loss": 0.0848, "step": 23942 }, { "epoch": 0.42401944507165723, "grad_norm": 0.8740554451942444, "learning_rate": 1.9356019676343693e-05, "loss": 0.078, "step": 23943 }, { "epoch": 0.42403715460868563, "grad_norm": 0.5343936681747437, "learning_rate": 1.9355196382037687e-05, "loss": 0.0476, "step": 23944 }, { "epoch": 0.4240548641457141, "grad_norm": 1.9861756563186646, "learning_rate": 1.935437307340352e-05, "loss": 0.1243, "step": 23945 }, { "epoch": 0.4240725736827425, "grad_norm": 0.9187873005867004, "learning_rate": 1.9353549750443908e-05, "loss": 0.0765, "step": 23946 }, { "epoch": 0.42409028321977094, "grad_norm": 0.6461310982704163, "learning_rate": 1.9352726413161557e-05, "loss": 0.0643, "step": 23947 }, { "epoch": 0.42410799275679933, "grad_norm": 0.9363739490509033, "learning_rate": 1.9351903061559178e-05, "loss": 0.1364, "step": 23948 }, { "epoch": 0.4241257022938278, "grad_norm": 0.618665337562561, "learning_rate": 1.935107969563948e-05, "loss": 0.0969, "step": 23949 }, { "epoch": 0.4241434118308562, "grad_norm": 0.45839664340019226, "learning_rate": 1.935025631540517e-05, "loss": 0.0483, "step": 23950 }, { "epoch": 0.42416112136788464, "grad_norm": 0.6519973874092102, "learning_rate": 1.934943292085895e-05, "loss": 0.0956, "step": 23951 }, { "epoch": 0.4241788309049131, "grad_norm": 0.8150706887245178, "learning_rate": 1.934860951200354e-05, "loss": 0.0739, "step": 23952 }, { "epoch": 0.4241965404419415, "grad_norm": 0.7847519516944885, "learning_rate": 1.934778608884164e-05, "loss": 0.0655, "step": 23953 }, { "epoch": 0.42421424997896995, "grad_norm": 0.5740929841995239, "learning_rate": 1.9346962651375972e-05, "loss": 0.0607, "step": 23954 }, { "epoch": 0.42423195951599835, "grad_norm": 0.9561858177185059, "learning_rate": 1.9346139199609234e-05, "loss": 0.0923, "step": 23955 }, { "epoch": 0.4242496690530268, "grad_norm": 0.7791129946708679, "learning_rate": 1.9345315733544136e-05, "loss": 0.0652, "step": 23956 }, { "epoch": 0.4242673785900552, "grad_norm": 0.7133737802505493, "learning_rate": 1.934449225318339e-05, "loss": 0.0843, "step": 23957 }, { "epoch": 0.42428508812708365, "grad_norm": 0.5170652270317078, "learning_rate": 1.93436687585297e-05, "loss": 0.0596, "step": 23958 }, { "epoch": 0.42430279766411205, "grad_norm": 0.41413286328315735, "learning_rate": 1.9342845249585783e-05, "loss": 0.0868, "step": 23959 }, { "epoch": 0.4243205072011405, "grad_norm": 0.5047609210014343, "learning_rate": 1.934202172635434e-05, "loss": 0.0569, "step": 23960 }, { "epoch": 0.4243382167381689, "grad_norm": 0.6563296914100647, "learning_rate": 1.9341198188838086e-05, "loss": 0.0717, "step": 23961 }, { "epoch": 0.42435592627519736, "grad_norm": 0.5715828537940979, "learning_rate": 1.934037463703973e-05, "loss": 0.068, "step": 23962 }, { "epoch": 0.42437363581222576, "grad_norm": 0.8221351504325867, "learning_rate": 1.9339551070961984e-05, "loss": 0.0823, "step": 23963 }, { "epoch": 0.4243913453492542, "grad_norm": 1.0103075504302979, "learning_rate": 1.9338727490607553e-05, "loss": 0.0909, "step": 23964 }, { "epoch": 0.4244090548862826, "grad_norm": 0.5877995491027832, "learning_rate": 1.9337903895979144e-05, "loss": 0.1107, "step": 23965 }, { "epoch": 0.42442676442331106, "grad_norm": 0.8121212124824524, "learning_rate": 1.933708028707947e-05, "loss": 0.0966, "step": 23966 }, { "epoch": 0.4244444739603395, "grad_norm": 0.5426387190818787, "learning_rate": 1.933625666391124e-05, "loss": 0.065, "step": 23967 }, { "epoch": 0.4244621834973679, "grad_norm": 0.6162817478179932, "learning_rate": 1.9335433026477167e-05, "loss": 0.0984, "step": 23968 }, { "epoch": 0.42447989303439637, "grad_norm": 0.9516119956970215, "learning_rate": 1.933460937477996e-05, "loss": 0.0771, "step": 23969 }, { "epoch": 0.42449760257142477, "grad_norm": 0.7887403964996338, "learning_rate": 1.9333785708822324e-05, "loss": 0.061, "step": 23970 }, { "epoch": 0.4245153121084532, "grad_norm": 0.9454470872879028, "learning_rate": 1.933296202860697e-05, "loss": 0.0789, "step": 23971 }, { "epoch": 0.4245330216454816, "grad_norm": 0.8392911553382874, "learning_rate": 1.933213833413661e-05, "loss": 0.0669, "step": 23972 }, { "epoch": 0.4245507311825101, "grad_norm": 0.7816632390022278, "learning_rate": 1.933131462541395e-05, "loss": 0.1479, "step": 23973 }, { "epoch": 0.4245684407195385, "grad_norm": 0.750981867313385, "learning_rate": 1.9330490902441706e-05, "loss": 0.1096, "step": 23974 }, { "epoch": 0.42458615025656693, "grad_norm": 0.8095316290855408, "learning_rate": 1.9329667165222584e-05, "loss": 0.1271, "step": 23975 }, { "epoch": 0.42460385979359533, "grad_norm": 0.3511427938938141, "learning_rate": 1.9328843413759296e-05, "loss": 0.0661, "step": 23976 }, { "epoch": 0.4246215693306238, "grad_norm": 0.8148764371871948, "learning_rate": 1.9328019648054554e-05, "loss": 0.0827, "step": 23977 }, { "epoch": 0.4246392788676522, "grad_norm": 0.5192356109619141, "learning_rate": 1.9327195868111064e-05, "loss": 0.0628, "step": 23978 }, { "epoch": 0.42465698840468064, "grad_norm": 0.549281895160675, "learning_rate": 1.9326372073931535e-05, "loss": 0.0978, "step": 23979 }, { "epoch": 0.42467469794170903, "grad_norm": 0.7323254346847534, "learning_rate": 1.9325548265518677e-05, "loss": 0.0918, "step": 23980 }, { "epoch": 0.4246924074787375, "grad_norm": 0.5893282294273376, "learning_rate": 1.9324724442875205e-05, "loss": 0.0987, "step": 23981 }, { "epoch": 0.42471011701576594, "grad_norm": 0.9410910606384277, "learning_rate": 1.932390060600383e-05, "loss": 0.0749, "step": 23982 }, { "epoch": 0.42472782655279434, "grad_norm": 0.7312895059585571, "learning_rate": 1.9323076754907254e-05, "loss": 0.0634, "step": 23983 }, { "epoch": 0.4247455360898228, "grad_norm": 0.5542731285095215, "learning_rate": 1.9322252889588203e-05, "loss": 0.0765, "step": 23984 }, { "epoch": 0.4247632456268512, "grad_norm": 0.8259862661361694, "learning_rate": 1.9321429010049362e-05, "loss": 0.0839, "step": 23985 }, { "epoch": 0.42478095516387965, "grad_norm": 0.9524046778678894, "learning_rate": 1.9320605116293466e-05, "loss": 0.0883, "step": 23986 }, { "epoch": 0.42479866470090805, "grad_norm": 1.3169206380844116, "learning_rate": 1.9319781208323216e-05, "loss": 0.1012, "step": 23987 }, { "epoch": 0.4248163742379365, "grad_norm": 0.7930140495300293, "learning_rate": 1.931895728614132e-05, "loss": 0.0756, "step": 23988 }, { "epoch": 0.4248340837749649, "grad_norm": 0.5744486451148987, "learning_rate": 1.9318133349750496e-05, "loss": 0.0851, "step": 23989 }, { "epoch": 0.42485179331199335, "grad_norm": 0.6694108247756958, "learning_rate": 1.931730939915344e-05, "loss": 0.0794, "step": 23990 }, { "epoch": 0.42486950284902175, "grad_norm": 0.5838760733604431, "learning_rate": 1.9316485434352884e-05, "loss": 0.0508, "step": 23991 }, { "epoch": 0.4248872123860502, "grad_norm": 0.6424716114997864, "learning_rate": 1.9315661455351525e-05, "loss": 0.0946, "step": 23992 }, { "epoch": 0.4249049219230786, "grad_norm": 1.3654183149337769, "learning_rate": 1.931483746215207e-05, "loss": 0.0919, "step": 23993 }, { "epoch": 0.42492263146010706, "grad_norm": 0.9640253186225891, "learning_rate": 1.931401345475724e-05, "loss": 0.0663, "step": 23994 }, { "epoch": 0.42494034099713546, "grad_norm": 0.7521533370018005, "learning_rate": 1.9313189433169742e-05, "loss": 0.0978, "step": 23995 }, { "epoch": 0.4249580505341639, "grad_norm": 0.37658968567848206, "learning_rate": 1.9312365397392293e-05, "loss": 0.0402, "step": 23996 }, { "epoch": 0.42497576007119237, "grad_norm": 0.5870036482810974, "learning_rate": 1.931154134742759e-05, "loss": 0.0774, "step": 23997 }, { "epoch": 0.42499346960822076, "grad_norm": 0.8203322887420654, "learning_rate": 1.9310717283278356e-05, "loss": 0.0971, "step": 23998 }, { "epoch": 0.4250111791452492, "grad_norm": 0.5296125411987305, "learning_rate": 1.9309893204947293e-05, "loss": 0.0757, "step": 23999 }, { "epoch": 0.4250288886822776, "grad_norm": 0.5082348585128784, "learning_rate": 1.930906911243712e-05, "loss": 0.11, "step": 24000 }, { "epoch": 0.42504659821930607, "grad_norm": 0.5244293808937073, "learning_rate": 1.9308245005750546e-05, "loss": 0.0805, "step": 24001 }, { "epoch": 0.42506430775633447, "grad_norm": 0.4337126612663269, "learning_rate": 1.9307420884890284e-05, "loss": 0.067, "step": 24002 }, { "epoch": 0.4250820172933629, "grad_norm": 0.633798360824585, "learning_rate": 1.9306596749859045e-05, "loss": 0.0669, "step": 24003 }, { "epoch": 0.4250997268303913, "grad_norm": 0.7305153608322144, "learning_rate": 1.9305772600659527e-05, "loss": 0.0771, "step": 24004 }, { "epoch": 0.4251174363674198, "grad_norm": 1.0119755268096924, "learning_rate": 1.9304948437294464e-05, "loss": 0.0863, "step": 24005 }, { "epoch": 0.4251351459044482, "grad_norm": 0.37392085790634155, "learning_rate": 1.9304124259766553e-05, "loss": 0.0578, "step": 24006 }, { "epoch": 0.42515285544147663, "grad_norm": 0.5277126431465149, "learning_rate": 1.9303300068078507e-05, "loss": 0.0858, "step": 24007 }, { "epoch": 0.42517056497850503, "grad_norm": 0.7887082099914551, "learning_rate": 1.930247586223304e-05, "loss": 0.0776, "step": 24008 }, { "epoch": 0.4251882745155335, "grad_norm": 0.5118269920349121, "learning_rate": 1.930165164223286e-05, "loss": 0.0668, "step": 24009 }, { "epoch": 0.4252059840525619, "grad_norm": 0.7256459593772888, "learning_rate": 1.9300827408080688e-05, "loss": 0.1034, "step": 24010 }, { "epoch": 0.42522369358959033, "grad_norm": 0.9992806315422058, "learning_rate": 1.9300003159779223e-05, "loss": 0.0737, "step": 24011 }, { "epoch": 0.4252414031266188, "grad_norm": 0.5131428241729736, "learning_rate": 1.9299178897331186e-05, "loss": 0.0764, "step": 24012 }, { "epoch": 0.4252591126636472, "grad_norm": 0.6260712742805481, "learning_rate": 1.9298354620739282e-05, "loss": 0.0964, "step": 24013 }, { "epoch": 0.42527682220067564, "grad_norm": 0.5880693197250366, "learning_rate": 1.9297530330006223e-05, "loss": 0.0615, "step": 24014 }, { "epoch": 0.42529453173770404, "grad_norm": 0.5620965361595154, "learning_rate": 1.9296706025134728e-05, "loss": 0.0965, "step": 24015 }, { "epoch": 0.4253122412747325, "grad_norm": 0.6277493834495544, "learning_rate": 1.9295881706127506e-05, "loss": 0.0711, "step": 24016 }, { "epoch": 0.4253299508117609, "grad_norm": 0.5514084100723267, "learning_rate": 1.929505737298727e-05, "loss": 0.0714, "step": 24017 }, { "epoch": 0.42534766034878935, "grad_norm": 0.5891886949539185, "learning_rate": 1.9294233025716723e-05, "loss": 0.082, "step": 24018 }, { "epoch": 0.42536536988581775, "grad_norm": 0.6377490162849426, "learning_rate": 1.9293408664318585e-05, "loss": 0.0675, "step": 24019 }, { "epoch": 0.4253830794228462, "grad_norm": 0.5903027057647705, "learning_rate": 1.9292584288795567e-05, "loss": 0.0692, "step": 24020 }, { "epoch": 0.4254007889598746, "grad_norm": 0.5875881910324097, "learning_rate": 1.9291759899150385e-05, "loss": 0.0792, "step": 24021 }, { "epoch": 0.42541849849690305, "grad_norm": 0.3734111785888672, "learning_rate": 1.9290935495385743e-05, "loss": 0.0631, "step": 24022 }, { "epoch": 0.42543620803393145, "grad_norm": 0.7833982110023499, "learning_rate": 1.929011107750436e-05, "loss": 0.1122, "step": 24023 }, { "epoch": 0.4254539175709599, "grad_norm": 0.5712379813194275, "learning_rate": 1.9289286645508942e-05, "loss": 0.072, "step": 24024 }, { "epoch": 0.4254716271079883, "grad_norm": 1.0034668445587158, "learning_rate": 1.928846219940221e-05, "loss": 0.0763, "step": 24025 }, { "epoch": 0.42548933664501676, "grad_norm": 0.4729742407798767, "learning_rate": 1.928763773918687e-05, "loss": 0.0713, "step": 24026 }, { "epoch": 0.4255070461820452, "grad_norm": 0.9521291851997375, "learning_rate": 1.9286813264865633e-05, "loss": 0.0741, "step": 24027 }, { "epoch": 0.4255247557190736, "grad_norm": 0.5208005309104919, "learning_rate": 1.9285988776441213e-05, "loss": 0.0918, "step": 24028 }, { "epoch": 0.42554246525610206, "grad_norm": 0.9027261734008789, "learning_rate": 1.928516427391632e-05, "loss": 0.0636, "step": 24029 }, { "epoch": 0.42556017479313046, "grad_norm": 0.1960829347372055, "learning_rate": 1.928433975729368e-05, "loss": 0.0555, "step": 24030 }, { "epoch": 0.4255778843301589, "grad_norm": 0.7253435850143433, "learning_rate": 1.9283515226575988e-05, "loss": 0.0471, "step": 24031 }, { "epoch": 0.4255955938671873, "grad_norm": 0.648930013179779, "learning_rate": 1.9282690681765967e-05, "loss": 0.1087, "step": 24032 }, { "epoch": 0.42561330340421577, "grad_norm": 0.4509566128253937, "learning_rate": 1.9281866122866323e-05, "loss": 0.0622, "step": 24033 }, { "epoch": 0.42563101294124417, "grad_norm": 1.1367108821868896, "learning_rate": 1.928104154987978e-05, "loss": 0.0982, "step": 24034 }, { "epoch": 0.4256487224782726, "grad_norm": 0.8317202925682068, "learning_rate": 1.9280216962809042e-05, "loss": 0.0847, "step": 24035 }, { "epoch": 0.425666432015301, "grad_norm": 0.8299692869186401, "learning_rate": 1.9279392361656816e-05, "loss": 0.0809, "step": 24036 }, { "epoch": 0.4256841415523295, "grad_norm": 0.48133954405784607, "learning_rate": 1.9278567746425828e-05, "loss": 0.0683, "step": 24037 }, { "epoch": 0.4257018510893579, "grad_norm": 0.4259180724620819, "learning_rate": 1.927774311711878e-05, "loss": 0.0528, "step": 24038 }, { "epoch": 0.42571956062638633, "grad_norm": 0.9789159297943115, "learning_rate": 1.9276918473738393e-05, "loss": 0.1238, "step": 24039 }, { "epoch": 0.4257372701634147, "grad_norm": 0.45972979068756104, "learning_rate": 1.9276093816287377e-05, "loss": 0.0736, "step": 24040 }, { "epoch": 0.4257549797004432, "grad_norm": 0.7577955722808838, "learning_rate": 1.9275269144768448e-05, "loss": 0.0983, "step": 24041 }, { "epoch": 0.42577268923747164, "grad_norm": 0.9159942865371704, "learning_rate": 1.927444445918431e-05, "loss": 0.0819, "step": 24042 }, { "epoch": 0.42579039877450003, "grad_norm": 1.188051462173462, "learning_rate": 1.9273619759537683e-05, "loss": 0.0993, "step": 24043 }, { "epoch": 0.4258081083115285, "grad_norm": 0.9822323322296143, "learning_rate": 1.927279504583128e-05, "loss": 0.0839, "step": 24044 }, { "epoch": 0.4258258178485569, "grad_norm": 0.5547163486480713, "learning_rate": 1.9271970318067816e-05, "loss": 0.0791, "step": 24045 }, { "epoch": 0.42584352738558534, "grad_norm": 0.80791836977005, "learning_rate": 1.9271145576250004e-05, "loss": 0.0853, "step": 24046 }, { "epoch": 0.42586123692261374, "grad_norm": 0.6094925999641418, "learning_rate": 1.927032082038055e-05, "loss": 0.0737, "step": 24047 }, { "epoch": 0.4258789464596422, "grad_norm": 0.9288151264190674, "learning_rate": 1.926949605046217e-05, "loss": 0.0858, "step": 24048 }, { "epoch": 0.4258966559966706, "grad_norm": 0.5827811360359192, "learning_rate": 1.9268671266497587e-05, "loss": 0.0598, "step": 24049 }, { "epoch": 0.42591436553369905, "grad_norm": 0.7915801405906677, "learning_rate": 1.9267846468489503e-05, "loss": 0.0486, "step": 24050 }, { "epoch": 0.42593207507072744, "grad_norm": 0.7756912708282471, "learning_rate": 1.926702165644064e-05, "loss": 0.1125, "step": 24051 }, { "epoch": 0.4259497846077559, "grad_norm": 0.7239353060722351, "learning_rate": 1.9266196830353697e-05, "loss": 0.0952, "step": 24052 }, { "epoch": 0.4259674941447843, "grad_norm": 0.6388350129127502, "learning_rate": 1.926537199023141e-05, "loss": 0.062, "step": 24053 }, { "epoch": 0.42598520368181275, "grad_norm": 0.7736408114433289, "learning_rate": 1.926454713607647e-05, "loss": 0.1002, "step": 24054 }, { "epoch": 0.42600291321884115, "grad_norm": 0.813870370388031, "learning_rate": 1.926372226789161e-05, "loss": 0.075, "step": 24055 }, { "epoch": 0.4260206227558696, "grad_norm": 1.2525604963302612, "learning_rate": 1.926289738567953e-05, "loss": 0.0814, "step": 24056 }, { "epoch": 0.42603833229289806, "grad_norm": 0.8606574535369873, "learning_rate": 1.926207248944295e-05, "loss": 0.0968, "step": 24057 }, { "epoch": 0.42605604182992646, "grad_norm": 0.656067430973053, "learning_rate": 1.9261247579184585e-05, "loss": 0.1, "step": 24058 }, { "epoch": 0.4260737513669549, "grad_norm": 0.981010913848877, "learning_rate": 1.926042265490714e-05, "loss": 0.0922, "step": 24059 }, { "epoch": 0.4260914609039833, "grad_norm": 0.9591295123100281, "learning_rate": 1.9259597716613342e-05, "loss": 0.0967, "step": 24060 }, { "epoch": 0.42610917044101176, "grad_norm": 1.2414779663085938, "learning_rate": 1.9258772764305895e-05, "loss": 0.0657, "step": 24061 }, { "epoch": 0.42612687997804016, "grad_norm": 0.5861192345619202, "learning_rate": 1.925794779798752e-05, "loss": 0.0647, "step": 24062 }, { "epoch": 0.4261445895150686, "grad_norm": 0.949173629283905, "learning_rate": 1.925712281766092e-05, "loss": 0.0938, "step": 24063 }, { "epoch": 0.426162299052097, "grad_norm": 0.9075074791908264, "learning_rate": 1.9256297823328824e-05, "loss": 0.0654, "step": 24064 }, { "epoch": 0.42618000858912547, "grad_norm": 0.9487868547439575, "learning_rate": 1.925547281499394e-05, "loss": 0.1173, "step": 24065 }, { "epoch": 0.42619771812615387, "grad_norm": 0.6093606352806091, "learning_rate": 1.925464779265897e-05, "loss": 0.0732, "step": 24066 }, { "epoch": 0.4262154276631823, "grad_norm": 0.7837016582489014, "learning_rate": 1.9253822756326647e-05, "loss": 0.0786, "step": 24067 }, { "epoch": 0.4262331372002107, "grad_norm": 0.5550825595855713, "learning_rate": 1.9252997705999677e-05, "loss": 0.0769, "step": 24068 }, { "epoch": 0.4262508467372392, "grad_norm": 0.4667077362537384, "learning_rate": 1.9252172641680775e-05, "loss": 0.0647, "step": 24069 }, { "epoch": 0.4262685562742676, "grad_norm": 1.0564370155334473, "learning_rate": 1.9251347563372653e-05, "loss": 0.0948, "step": 24070 }, { "epoch": 0.42628626581129603, "grad_norm": 0.9546770453453064, "learning_rate": 1.9250522471078026e-05, "loss": 0.0972, "step": 24071 }, { "epoch": 0.4263039753483245, "grad_norm": 0.48369741439819336, "learning_rate": 1.9249697364799613e-05, "loss": 0.0685, "step": 24072 }, { "epoch": 0.4263216848853529, "grad_norm": 0.6543063521385193, "learning_rate": 1.9248872244540128e-05, "loss": 0.0737, "step": 24073 }, { "epoch": 0.42633939442238133, "grad_norm": 0.4732855260372162, "learning_rate": 1.924804711030228e-05, "loss": 0.0595, "step": 24074 }, { "epoch": 0.42635710395940973, "grad_norm": 1.0055975914001465, "learning_rate": 1.924722196208879e-05, "loss": 0.1297, "step": 24075 }, { "epoch": 0.4263748134964382, "grad_norm": 0.6133977174758911, "learning_rate": 1.9246396799902362e-05, "loss": 0.0851, "step": 24076 }, { "epoch": 0.4263925230334666, "grad_norm": 0.7265166640281677, "learning_rate": 1.924557162374572e-05, "loss": 0.1018, "step": 24077 }, { "epoch": 0.42641023257049504, "grad_norm": 0.6984639167785645, "learning_rate": 1.924474643362158e-05, "loss": 0.0862, "step": 24078 }, { "epoch": 0.42642794210752344, "grad_norm": 0.7014684677124023, "learning_rate": 1.9243921229532655e-05, "loss": 0.0847, "step": 24079 }, { "epoch": 0.4264456516445519, "grad_norm": 0.8446346521377563, "learning_rate": 1.9243096011481658e-05, "loss": 0.106, "step": 24080 }, { "epoch": 0.4264633611815803, "grad_norm": 0.8427159190177917, "learning_rate": 1.9242270779471297e-05, "loss": 0.0461, "step": 24081 }, { "epoch": 0.42648107071860875, "grad_norm": 0.9983760714530945, "learning_rate": 1.92414455335043e-05, "loss": 0.0865, "step": 24082 }, { "epoch": 0.42649878025563714, "grad_norm": 0.5280508995056152, "learning_rate": 1.924062027358338e-05, "loss": 0.0831, "step": 24083 }, { "epoch": 0.4265164897926656, "grad_norm": 0.7028874754905701, "learning_rate": 1.923979499971124e-05, "loss": 0.0699, "step": 24084 }, { "epoch": 0.426534199329694, "grad_norm": 0.7706685662269592, "learning_rate": 1.9238969711890608e-05, "loss": 0.0886, "step": 24085 }, { "epoch": 0.42655190886672245, "grad_norm": 0.6287580728530884, "learning_rate": 1.9238144410124193e-05, "loss": 0.0588, "step": 24086 }, { "epoch": 0.4265696184037509, "grad_norm": 0.6747866272926331, "learning_rate": 1.9237319094414712e-05, "loss": 0.1118, "step": 24087 }, { "epoch": 0.4265873279407793, "grad_norm": 0.5831820368766785, "learning_rate": 1.9236493764764878e-05, "loss": 0.0672, "step": 24088 }, { "epoch": 0.42660503747780776, "grad_norm": 0.746669590473175, "learning_rate": 1.923566842117741e-05, "loss": 0.0611, "step": 24089 }, { "epoch": 0.42662274701483616, "grad_norm": 0.5094746351242065, "learning_rate": 1.9234843063655023e-05, "loss": 0.0581, "step": 24090 }, { "epoch": 0.4266404565518646, "grad_norm": 0.6689118146896362, "learning_rate": 1.9234017692200426e-05, "loss": 0.096, "step": 24091 }, { "epoch": 0.426658166088893, "grad_norm": 0.45589807629585266, "learning_rate": 1.9233192306816346e-05, "loss": 0.064, "step": 24092 }, { "epoch": 0.42667587562592146, "grad_norm": 1.128354787826538, "learning_rate": 1.9232366907505485e-05, "loss": 0.0985, "step": 24093 }, { "epoch": 0.42669358516294986, "grad_norm": 0.795936644077301, "learning_rate": 1.923154149427057e-05, "loss": 0.0842, "step": 24094 }, { "epoch": 0.4267112946999783, "grad_norm": 0.8076367974281311, "learning_rate": 1.9230716067114306e-05, "loss": 0.0864, "step": 24095 }, { "epoch": 0.4267290042370067, "grad_norm": 0.775526225566864, "learning_rate": 1.9229890626039415e-05, "loss": 0.0722, "step": 24096 }, { "epoch": 0.42674671377403517, "grad_norm": 0.39655059576034546, "learning_rate": 1.922906517104862e-05, "loss": 0.0705, "step": 24097 }, { "epoch": 0.42676442331106357, "grad_norm": 0.765375554561615, "learning_rate": 1.9228239702144618e-05, "loss": 0.0933, "step": 24098 }, { "epoch": 0.426782132848092, "grad_norm": 0.8673003911972046, "learning_rate": 1.922741421933014e-05, "loss": 0.0825, "step": 24099 }, { "epoch": 0.4267998423851204, "grad_norm": 0.9321007132530212, "learning_rate": 1.922658872260789e-05, "loss": 0.0705, "step": 24100 }, { "epoch": 0.4268175519221489, "grad_norm": 0.626207709312439, "learning_rate": 1.92257632119806e-05, "loss": 0.1067, "step": 24101 }, { "epoch": 0.42683526145917733, "grad_norm": 0.6972181797027588, "learning_rate": 1.9224937687450972e-05, "loss": 0.0501, "step": 24102 }, { "epoch": 0.4268529709962057, "grad_norm": 0.5251414179801941, "learning_rate": 1.922411214902173e-05, "loss": 0.0894, "step": 24103 }, { "epoch": 0.4268706805332342, "grad_norm": 0.7251669764518738, "learning_rate": 1.922328659669558e-05, "loss": 0.0723, "step": 24104 }, { "epoch": 0.4268883900702626, "grad_norm": 0.6153379082679749, "learning_rate": 1.9222461030475244e-05, "loss": 0.0534, "step": 24105 }, { "epoch": 0.42690609960729103, "grad_norm": 0.6659212112426758, "learning_rate": 1.922163545036345e-05, "loss": 0.0823, "step": 24106 }, { "epoch": 0.42692380914431943, "grad_norm": 0.6370896697044373, "learning_rate": 1.922080985636289e-05, "loss": 0.074, "step": 24107 }, { "epoch": 0.4269415186813479, "grad_norm": 0.5265683531761169, "learning_rate": 1.9219984248476297e-05, "loss": 0.0546, "step": 24108 }, { "epoch": 0.4269592282183763, "grad_norm": 0.7792828679084778, "learning_rate": 1.921915862670638e-05, "loss": 0.1057, "step": 24109 }, { "epoch": 0.42697693775540474, "grad_norm": 1.0088632106781006, "learning_rate": 1.9218332991055863e-05, "loss": 0.0628, "step": 24110 }, { "epoch": 0.42699464729243314, "grad_norm": 0.6780046224594116, "learning_rate": 1.921750734152745e-05, "loss": 0.0803, "step": 24111 }, { "epoch": 0.4270123568294616, "grad_norm": 0.4467274844646454, "learning_rate": 1.921668167812387e-05, "loss": 0.059, "step": 24112 }, { "epoch": 0.42703006636649, "grad_norm": 0.3937254548072815, "learning_rate": 1.9215856000847836e-05, "loss": 0.0547, "step": 24113 }, { "epoch": 0.42704777590351845, "grad_norm": 0.7219886779785156, "learning_rate": 1.9215030309702053e-05, "loss": 0.0648, "step": 24114 }, { "epoch": 0.42706548544054684, "grad_norm": 0.6771048307418823, "learning_rate": 1.9214204604689253e-05, "loss": 0.0711, "step": 24115 }, { "epoch": 0.4270831949775753, "grad_norm": 0.4543551206588745, "learning_rate": 1.9213378885812146e-05, "loss": 0.054, "step": 24116 }, { "epoch": 0.42710090451460375, "grad_norm": 0.8043020367622375, "learning_rate": 1.9212553153073447e-05, "loss": 0.0804, "step": 24117 }, { "epoch": 0.42711861405163215, "grad_norm": 0.9339209794998169, "learning_rate": 1.9211727406475877e-05, "loss": 0.0782, "step": 24118 }, { "epoch": 0.4271363235886606, "grad_norm": 0.4690764546394348, "learning_rate": 1.9210901646022148e-05, "loss": 0.0895, "step": 24119 }, { "epoch": 0.427154033125689, "grad_norm": 0.730685830116272, "learning_rate": 1.921007587171498e-05, "loss": 0.0841, "step": 24120 }, { "epoch": 0.42717174266271746, "grad_norm": 1.064043402671814, "learning_rate": 1.9209250083557085e-05, "loss": 0.0982, "step": 24121 }, { "epoch": 0.42718945219974586, "grad_norm": 0.8919116854667664, "learning_rate": 1.920842428155119e-05, "loss": 0.0534, "step": 24122 }, { "epoch": 0.4272071617367743, "grad_norm": 0.6928452253341675, "learning_rate": 1.9207598465699995e-05, "loss": 0.0643, "step": 24123 }, { "epoch": 0.4272248712738027, "grad_norm": 0.9781774878501892, "learning_rate": 1.9206772636006232e-05, "loss": 0.0858, "step": 24124 }, { "epoch": 0.42724258081083116, "grad_norm": 1.2895177602767944, "learning_rate": 1.920594679247261e-05, "loss": 0.0642, "step": 24125 }, { "epoch": 0.42726029034785956, "grad_norm": 0.8578669428825378, "learning_rate": 1.920512093510185e-05, "loss": 0.0934, "step": 24126 }, { "epoch": 0.427277999884888, "grad_norm": 0.42112022638320923, "learning_rate": 1.9204295063896667e-05, "loss": 0.0332, "step": 24127 }, { "epoch": 0.4272957094219164, "grad_norm": 0.8153700828552246, "learning_rate": 1.9203469178859777e-05, "loss": 0.0843, "step": 24128 }, { "epoch": 0.42731341895894487, "grad_norm": 0.9577718377113342, "learning_rate": 1.9202643279993902e-05, "loss": 0.0811, "step": 24129 }, { "epoch": 0.42733112849597327, "grad_norm": 0.8430384397506714, "learning_rate": 1.9201817367301753e-05, "loss": 0.0623, "step": 24130 }, { "epoch": 0.4273488380330017, "grad_norm": 0.6064261794090271, "learning_rate": 1.9200991440786052e-05, "loss": 0.0816, "step": 24131 }, { "epoch": 0.4273665475700302, "grad_norm": 1.012420654296875, "learning_rate": 1.9200165500449516e-05, "loss": 0.0796, "step": 24132 }, { "epoch": 0.4273842571070586, "grad_norm": 0.5289326906204224, "learning_rate": 1.9199339546294856e-05, "loss": 0.1011, "step": 24133 }, { "epoch": 0.42740196664408703, "grad_norm": 0.808277428150177, "learning_rate": 1.9198513578324794e-05, "loss": 0.0856, "step": 24134 }, { "epoch": 0.4274196761811154, "grad_norm": 0.8452203273773193, "learning_rate": 1.9197687596542047e-05, "loss": 0.0951, "step": 24135 }, { "epoch": 0.4274373857181439, "grad_norm": 0.5801777839660645, "learning_rate": 1.919686160094934e-05, "loss": 0.0506, "step": 24136 }, { "epoch": 0.4274550952551723, "grad_norm": 0.28272226452827454, "learning_rate": 1.9196035591549372e-05, "loss": 0.0416, "step": 24137 }, { "epoch": 0.42747280479220073, "grad_norm": 0.46812182664871216, "learning_rate": 1.9195209568344874e-05, "loss": 0.0717, "step": 24138 }, { "epoch": 0.42749051432922913, "grad_norm": 0.5729511976242065, "learning_rate": 1.919438353133856e-05, "loss": 0.0892, "step": 24139 }, { "epoch": 0.4275082238662576, "grad_norm": 0.5758105516433716, "learning_rate": 1.9193557480533156e-05, "loss": 0.0677, "step": 24140 }, { "epoch": 0.427525933403286, "grad_norm": 0.619904100894928, "learning_rate": 1.9192731415931363e-05, "loss": 0.0818, "step": 24141 }, { "epoch": 0.42754364294031444, "grad_norm": 0.5562300682067871, "learning_rate": 1.9191905337535913e-05, "loss": 0.073, "step": 24142 }, { "epoch": 0.42756135247734284, "grad_norm": 0.7824470400810242, "learning_rate": 1.9191079245349513e-05, "loss": 0.0924, "step": 24143 }, { "epoch": 0.4275790620143713, "grad_norm": 0.7764997482299805, "learning_rate": 1.9190253139374894e-05, "loss": 0.057, "step": 24144 }, { "epoch": 0.4275967715513997, "grad_norm": 0.5325989127159119, "learning_rate": 1.9189427019614762e-05, "loss": 0.0475, "step": 24145 }, { "epoch": 0.42761448108842814, "grad_norm": 0.817337691783905, "learning_rate": 1.918860088607184e-05, "loss": 0.0646, "step": 24146 }, { "epoch": 0.4276321906254566, "grad_norm": 0.7969205975532532, "learning_rate": 1.918777473874884e-05, "loss": 0.109, "step": 24147 }, { "epoch": 0.427649900162485, "grad_norm": 0.8047003149986267, "learning_rate": 1.918694857764849e-05, "loss": 0.1052, "step": 24148 }, { "epoch": 0.42766760969951345, "grad_norm": 0.6084837913513184, "learning_rate": 1.9186122402773503e-05, "loss": 0.1062, "step": 24149 }, { "epoch": 0.42768531923654185, "grad_norm": 0.7067281007766724, "learning_rate": 1.918529621412659e-05, "loss": 0.0774, "step": 24150 }, { "epoch": 0.4277030287735703, "grad_norm": 0.4781375229358673, "learning_rate": 1.9184470011710485e-05, "loss": 0.0785, "step": 24151 }, { "epoch": 0.4277207383105987, "grad_norm": 0.8559266924858093, "learning_rate": 1.918364379552789e-05, "loss": 0.1087, "step": 24152 }, { "epoch": 0.42773844784762716, "grad_norm": 0.8582841157913208, "learning_rate": 1.918281756558153e-05, "loss": 0.0895, "step": 24153 }, { "epoch": 0.42775615738465556, "grad_norm": 0.8180118203163147, "learning_rate": 1.918199132187413e-05, "loss": 0.0729, "step": 24154 }, { "epoch": 0.427773866921684, "grad_norm": 0.6263266801834106, "learning_rate": 1.9181165064408395e-05, "loss": 0.0627, "step": 24155 }, { "epoch": 0.4277915764587124, "grad_norm": 0.8262796401977539, "learning_rate": 1.9180338793187053e-05, "loss": 0.0998, "step": 24156 }, { "epoch": 0.42780928599574086, "grad_norm": 0.7697561979293823, "learning_rate": 1.9179512508212817e-05, "loss": 0.0861, "step": 24157 }, { "epoch": 0.42782699553276926, "grad_norm": 0.6651659607887268, "learning_rate": 1.9178686209488407e-05, "loss": 0.0634, "step": 24158 }, { "epoch": 0.4278447050697977, "grad_norm": 0.4469548165798187, "learning_rate": 1.9177859897016544e-05, "loss": 0.0507, "step": 24159 }, { "epoch": 0.4278624146068261, "grad_norm": 1.948529601097107, "learning_rate": 1.9177033570799948e-05, "loss": 0.1318, "step": 24160 }, { "epoch": 0.42788012414385457, "grad_norm": 0.8619458079338074, "learning_rate": 1.9176207230841327e-05, "loss": 0.0974, "step": 24161 }, { "epoch": 0.427897833680883, "grad_norm": 0.6500519514083862, "learning_rate": 1.917538087714341e-05, "loss": 0.0943, "step": 24162 }, { "epoch": 0.4279155432179114, "grad_norm": 0.8054329752922058, "learning_rate": 1.917455450970892e-05, "loss": 0.118, "step": 24163 }, { "epoch": 0.4279332527549399, "grad_norm": 0.8261699080467224, "learning_rate": 1.9173728128540556e-05, "loss": 0.0847, "step": 24164 }, { "epoch": 0.4279509622919683, "grad_norm": 0.8395956754684448, "learning_rate": 1.9172901733641053e-05, "loss": 0.0449, "step": 24165 }, { "epoch": 0.4279686718289967, "grad_norm": 0.6265168786048889, "learning_rate": 1.9172075325013126e-05, "loss": 0.0997, "step": 24166 }, { "epoch": 0.4279863813660251, "grad_norm": 0.49865037202835083, "learning_rate": 1.917124890265949e-05, "loss": 0.1087, "step": 24167 }, { "epoch": 0.4280040909030536, "grad_norm": 0.5386716723442078, "learning_rate": 1.917042246658287e-05, "loss": 0.0747, "step": 24168 }, { "epoch": 0.428021800440082, "grad_norm": 0.27994516491889954, "learning_rate": 1.916959601678598e-05, "loss": 0.0548, "step": 24169 }, { "epoch": 0.42803950997711043, "grad_norm": 0.7861675024032593, "learning_rate": 1.9168769553271544e-05, "loss": 0.078, "step": 24170 }, { "epoch": 0.42805721951413883, "grad_norm": 0.8768112063407898, "learning_rate": 1.9167943076042272e-05, "loss": 0.0903, "step": 24171 }, { "epoch": 0.4280749290511673, "grad_norm": 0.7487947344779968, "learning_rate": 1.9167116585100895e-05, "loss": 0.0924, "step": 24172 }, { "epoch": 0.4280926385881957, "grad_norm": 0.5969479084014893, "learning_rate": 1.916629008045012e-05, "loss": 0.0735, "step": 24173 }, { "epoch": 0.42811034812522414, "grad_norm": 0.5540899038314819, "learning_rate": 1.916546356209268e-05, "loss": 0.0556, "step": 24174 }, { "epoch": 0.42812805766225254, "grad_norm": 0.5554087162017822, "learning_rate": 1.916463703003128e-05, "loss": 0.0829, "step": 24175 }, { "epoch": 0.428145767199281, "grad_norm": 0.40929746627807617, "learning_rate": 1.9163810484268646e-05, "loss": 0.0428, "step": 24176 }, { "epoch": 0.42816347673630945, "grad_norm": 0.9351001977920532, "learning_rate": 1.9162983924807502e-05, "loss": 0.1331, "step": 24177 }, { "epoch": 0.42818118627333784, "grad_norm": 0.9166727662086487, "learning_rate": 1.9162157351650553e-05, "loss": 0.0895, "step": 24178 }, { "epoch": 0.4281988958103663, "grad_norm": 0.8272896409034729, "learning_rate": 1.9161330764800537e-05, "loss": 0.0743, "step": 24179 }, { "epoch": 0.4282166053473947, "grad_norm": 0.534943699836731, "learning_rate": 1.9160504164260154e-05, "loss": 0.064, "step": 24180 }, { "epoch": 0.42823431488442315, "grad_norm": 0.661385715007782, "learning_rate": 1.915967755003214e-05, "loss": 0.0847, "step": 24181 }, { "epoch": 0.42825202442145155, "grad_norm": 0.7669121623039246, "learning_rate": 1.9158850922119203e-05, "loss": 0.0754, "step": 24182 }, { "epoch": 0.42826973395848, "grad_norm": 1.0851038694381714, "learning_rate": 1.9158024280524072e-05, "loss": 0.1049, "step": 24183 }, { "epoch": 0.4282874434955084, "grad_norm": 0.9758118987083435, "learning_rate": 1.915719762524946e-05, "loss": 0.0736, "step": 24184 }, { "epoch": 0.42830515303253686, "grad_norm": 1.0497455596923828, "learning_rate": 1.915637095629809e-05, "loss": 0.1196, "step": 24185 }, { "epoch": 0.42832286256956525, "grad_norm": 1.1067811250686646, "learning_rate": 1.9155544273672673e-05, "loss": 0.1019, "step": 24186 }, { "epoch": 0.4283405721065937, "grad_norm": 0.8787216544151306, "learning_rate": 1.915471757737594e-05, "loss": 0.068, "step": 24187 }, { "epoch": 0.4283582816436221, "grad_norm": 0.615357518196106, "learning_rate": 1.915389086741061e-05, "loss": 0.0652, "step": 24188 }, { "epoch": 0.42837599118065056, "grad_norm": 0.5849943161010742, "learning_rate": 1.9153064143779394e-05, "loss": 0.0874, "step": 24189 }, { "epoch": 0.428393700717679, "grad_norm": 0.5855765342712402, "learning_rate": 1.9152237406485017e-05, "loss": 0.0593, "step": 24190 }, { "epoch": 0.4284114102547074, "grad_norm": 0.6684132218360901, "learning_rate": 1.91514106555302e-05, "loss": 0.0778, "step": 24191 }, { "epoch": 0.42842911979173587, "grad_norm": 0.5207391977310181, "learning_rate": 1.9150583890917666e-05, "loss": 0.0794, "step": 24192 }, { "epoch": 0.42844682932876427, "grad_norm": 0.6359266042709351, "learning_rate": 1.9149757112650127e-05, "loss": 0.0889, "step": 24193 }, { "epoch": 0.4284645388657927, "grad_norm": 0.6978774666786194, "learning_rate": 1.9148930320730307e-05, "loss": 0.0727, "step": 24194 }, { "epoch": 0.4284822484028211, "grad_norm": 1.1767760515213013, "learning_rate": 1.9148103515160924e-05, "loss": 0.0888, "step": 24195 }, { "epoch": 0.4284999579398496, "grad_norm": 0.5949190855026245, "learning_rate": 1.91472766959447e-05, "loss": 0.0644, "step": 24196 }, { "epoch": 0.428517667476878, "grad_norm": 0.9566816091537476, "learning_rate": 1.914644986308436e-05, "loss": 0.0732, "step": 24197 }, { "epoch": 0.4285353770139064, "grad_norm": 0.754703938961029, "learning_rate": 1.9145623016582613e-05, "loss": 0.064, "step": 24198 }, { "epoch": 0.4285530865509348, "grad_norm": 0.7119109630584717, "learning_rate": 1.9144796156442194e-05, "loss": 0.0862, "step": 24199 }, { "epoch": 0.4285707960879633, "grad_norm": 0.7744840979576111, "learning_rate": 1.9143969282665808e-05, "loss": 0.0921, "step": 24200 }, { "epoch": 0.4285885056249917, "grad_norm": 0.5511783957481384, "learning_rate": 1.9143142395256183e-05, "loss": 0.0698, "step": 24201 }, { "epoch": 0.42860621516202013, "grad_norm": 0.44321006536483765, "learning_rate": 1.9142315494216043e-05, "loss": 0.0713, "step": 24202 }, { "epoch": 0.42862392469904853, "grad_norm": 0.9866582751274109, "learning_rate": 1.9141488579548098e-05, "loss": 0.1147, "step": 24203 }, { "epoch": 0.428641634236077, "grad_norm": 0.9930644631385803, "learning_rate": 1.914066165125508e-05, "loss": 0.118, "step": 24204 }, { "epoch": 0.42865934377310544, "grad_norm": 0.5700473785400391, "learning_rate": 1.9139834709339694e-05, "loss": 0.0789, "step": 24205 }, { "epoch": 0.42867705331013384, "grad_norm": 0.7049615383148193, "learning_rate": 1.9139007753804683e-05, "loss": 0.0805, "step": 24206 }, { "epoch": 0.4286947628471623, "grad_norm": 0.5095669627189636, "learning_rate": 1.9138180784652746e-05, "loss": 0.0683, "step": 24207 }, { "epoch": 0.4287124723841907, "grad_norm": 0.6576018929481506, "learning_rate": 1.913735380188662e-05, "loss": 0.0772, "step": 24208 }, { "epoch": 0.42873018192121914, "grad_norm": 0.701341450214386, "learning_rate": 1.9136526805509014e-05, "loss": 0.085, "step": 24209 }, { "epoch": 0.42874789145824754, "grad_norm": 0.8736091256141663, "learning_rate": 1.9135699795522656e-05, "loss": 0.071, "step": 24210 }, { "epoch": 0.428765600995276, "grad_norm": 0.9336289167404175, "learning_rate": 1.913487277193026e-05, "loss": 0.0582, "step": 24211 }, { "epoch": 0.4287833105323044, "grad_norm": 0.6178492307662964, "learning_rate": 1.9134045734734552e-05, "loss": 0.0915, "step": 24212 }, { "epoch": 0.42880102006933285, "grad_norm": 0.7250799536705017, "learning_rate": 1.9133218683938255e-05, "loss": 0.0844, "step": 24213 }, { "epoch": 0.42881872960636125, "grad_norm": 0.8808297514915466, "learning_rate": 1.9132391619544084e-05, "loss": 0.1118, "step": 24214 }, { "epoch": 0.4288364391433897, "grad_norm": 0.8936545848846436, "learning_rate": 1.9131564541554763e-05, "loss": 0.0853, "step": 24215 }, { "epoch": 0.4288541486804181, "grad_norm": 0.5370464324951172, "learning_rate": 1.9130737449973013e-05, "loss": 0.0691, "step": 24216 }, { "epoch": 0.42887185821744656, "grad_norm": 0.47355225682258606, "learning_rate": 1.9129910344801555e-05, "loss": 0.078, "step": 24217 }, { "epoch": 0.42888956775447495, "grad_norm": 0.539527177810669, "learning_rate": 1.912908322604311e-05, "loss": 0.0595, "step": 24218 }, { "epoch": 0.4289072772915034, "grad_norm": 0.8777919411659241, "learning_rate": 1.912825609370039e-05, "loss": 0.0738, "step": 24219 }, { "epoch": 0.42892498682853186, "grad_norm": 0.9932183623313904, "learning_rate": 1.9127428947776135e-05, "loss": 0.1198, "step": 24220 }, { "epoch": 0.42894269636556026, "grad_norm": 1.1210612058639526, "learning_rate": 1.9126601788273052e-05, "loss": 0.0723, "step": 24221 }, { "epoch": 0.4289604059025887, "grad_norm": 0.8492101430892944, "learning_rate": 1.912577461519387e-05, "loss": 0.0608, "step": 24222 }, { "epoch": 0.4289781154396171, "grad_norm": 0.9204462766647339, "learning_rate": 1.9124947428541306e-05, "loss": 0.0876, "step": 24223 }, { "epoch": 0.42899582497664557, "grad_norm": 0.320261150598526, "learning_rate": 1.9124120228318077e-05, "loss": 0.0531, "step": 24224 }, { "epoch": 0.42901353451367397, "grad_norm": 0.6775135397911072, "learning_rate": 1.9123293014526918e-05, "loss": 0.0644, "step": 24225 }, { "epoch": 0.4290312440507024, "grad_norm": 1.0373034477233887, "learning_rate": 1.9122465787170536e-05, "loss": 0.0701, "step": 24226 }, { "epoch": 0.4290489535877308, "grad_norm": 0.41427651047706604, "learning_rate": 1.912163854625166e-05, "loss": 0.0915, "step": 24227 }, { "epoch": 0.4290666631247593, "grad_norm": 0.8354513049125671, "learning_rate": 1.9120811291773013e-05, "loss": 0.0655, "step": 24228 }, { "epoch": 0.42908437266178767, "grad_norm": 0.38829368352890015, "learning_rate": 1.911998402373731e-05, "loss": 0.0736, "step": 24229 }, { "epoch": 0.4291020821988161, "grad_norm": 0.6244722604751587, "learning_rate": 1.9119156742147274e-05, "loss": 0.0721, "step": 24230 }, { "epoch": 0.4291197917358445, "grad_norm": 0.6667800545692444, "learning_rate": 1.9118329447005635e-05, "loss": 0.0801, "step": 24231 }, { "epoch": 0.429137501272873, "grad_norm": 0.7708134055137634, "learning_rate": 1.9117502138315107e-05, "loss": 0.0727, "step": 24232 }, { "epoch": 0.4291552108099014, "grad_norm": 0.6806396245956421, "learning_rate": 1.911667481607841e-05, "loss": 0.0684, "step": 24233 }, { "epoch": 0.42917292034692983, "grad_norm": 0.5882434844970703, "learning_rate": 1.9115847480298272e-05, "loss": 0.06, "step": 24234 }, { "epoch": 0.4291906298839583, "grad_norm": 0.6899294853210449, "learning_rate": 1.911502013097741e-05, "loss": 0.0519, "step": 24235 }, { "epoch": 0.4292083394209867, "grad_norm": 0.6206304430961609, "learning_rate": 1.9114192768118552e-05, "loss": 0.0612, "step": 24236 }, { "epoch": 0.42922604895801514, "grad_norm": 1.0456174612045288, "learning_rate": 1.911336539172441e-05, "loss": 0.0964, "step": 24237 }, { "epoch": 0.42924375849504354, "grad_norm": 0.8873047828674316, "learning_rate": 1.9112538001797717e-05, "loss": 0.1037, "step": 24238 }, { "epoch": 0.429261468032072, "grad_norm": 0.8288788795471191, "learning_rate": 1.911171059834119e-05, "loss": 0.07, "step": 24239 }, { "epoch": 0.4292791775691004, "grad_norm": 0.8018420338630676, "learning_rate": 1.9110883181357547e-05, "loss": 0.0819, "step": 24240 }, { "epoch": 0.42929688710612884, "grad_norm": 0.9251218438148499, "learning_rate": 1.911005575084952e-05, "loss": 0.0832, "step": 24241 }, { "epoch": 0.42931459664315724, "grad_norm": 0.7426689863204956, "learning_rate": 1.9109228306819816e-05, "loss": 0.0715, "step": 24242 }, { "epoch": 0.4293323061801857, "grad_norm": 0.5914452075958252, "learning_rate": 1.9108400849271172e-05, "loss": 0.0633, "step": 24243 }, { "epoch": 0.4293500157172141, "grad_norm": 0.5543091893196106, "learning_rate": 1.9107573378206305e-05, "loss": 0.0769, "step": 24244 }, { "epoch": 0.42936772525424255, "grad_norm": 0.49876293540000916, "learning_rate": 1.910674589362794e-05, "loss": 0.067, "step": 24245 }, { "epoch": 0.42938543479127095, "grad_norm": 1.3926066160202026, "learning_rate": 1.9105918395538795e-05, "loss": 0.0803, "step": 24246 }, { "epoch": 0.4294031443282994, "grad_norm": 0.9164790511131287, "learning_rate": 1.9105090883941592e-05, "loss": 0.0945, "step": 24247 }, { "epoch": 0.4294208538653278, "grad_norm": 1.083205223083496, "learning_rate": 1.9104263358839052e-05, "loss": 0.1242, "step": 24248 }, { "epoch": 0.42943856340235625, "grad_norm": 0.4932175576686859, "learning_rate": 1.9103435820233902e-05, "loss": 0.0924, "step": 24249 }, { "epoch": 0.4294562729393847, "grad_norm": 0.6494930982589722, "learning_rate": 1.9102608268128867e-05, "loss": 0.1071, "step": 24250 }, { "epoch": 0.4294739824764131, "grad_norm": 0.7299734354019165, "learning_rate": 1.9101780702526666e-05, "loss": 0.0616, "step": 24251 }, { "epoch": 0.42949169201344156, "grad_norm": 0.708605170249939, "learning_rate": 1.9100953123430018e-05, "loss": 0.0758, "step": 24252 }, { "epoch": 0.42950940155046996, "grad_norm": 0.4838123321533203, "learning_rate": 1.910012553084165e-05, "loss": 0.078, "step": 24253 }, { "epoch": 0.4295271110874984, "grad_norm": 0.84138423204422, "learning_rate": 1.9099297924764284e-05, "loss": 0.0815, "step": 24254 }, { "epoch": 0.4295448206245268, "grad_norm": 0.9742992520332336, "learning_rate": 1.9098470305200646e-05, "loss": 0.0676, "step": 24255 }, { "epoch": 0.42956253016155527, "grad_norm": 0.6766819953918457, "learning_rate": 1.909764267215345e-05, "loss": 0.0567, "step": 24256 }, { "epoch": 0.42958023969858367, "grad_norm": 1.0540809631347656, "learning_rate": 1.9096815025625424e-05, "loss": 0.0729, "step": 24257 }, { "epoch": 0.4295979492356121, "grad_norm": 0.5251173973083496, "learning_rate": 1.909598736561929e-05, "loss": 0.0648, "step": 24258 }, { "epoch": 0.4296156587726405, "grad_norm": 0.7871448993682861, "learning_rate": 1.9095159692137775e-05, "loss": 0.0663, "step": 24259 }, { "epoch": 0.429633368309669, "grad_norm": 1.6225711107254028, "learning_rate": 1.90943320051836e-05, "loss": 0.0609, "step": 24260 }, { "epoch": 0.42965107784669737, "grad_norm": 0.773720383644104, "learning_rate": 1.9093504304759487e-05, "loss": 0.0928, "step": 24261 }, { "epoch": 0.4296687873837258, "grad_norm": 0.639438807964325, "learning_rate": 1.9092676590868153e-05, "loss": 0.0567, "step": 24262 }, { "epoch": 0.4296864969207542, "grad_norm": 0.46482840180397034, "learning_rate": 1.9091848863512333e-05, "loss": 0.0762, "step": 24263 }, { "epoch": 0.4297042064577827, "grad_norm": 1.1413373947143555, "learning_rate": 1.9091021122694743e-05, "loss": 0.0901, "step": 24264 }, { "epoch": 0.42972191599481113, "grad_norm": 0.7156425714492798, "learning_rate": 1.909019336841811e-05, "loss": 0.0588, "step": 24265 }, { "epoch": 0.42973962553183953, "grad_norm": 0.49342483282089233, "learning_rate": 1.908936560068515e-05, "loss": 0.1054, "step": 24266 }, { "epoch": 0.429757335068868, "grad_norm": 0.48427602648735046, "learning_rate": 1.9088537819498586e-05, "loss": 0.0772, "step": 24267 }, { "epoch": 0.4297750446058964, "grad_norm": 0.6193720698356628, "learning_rate": 1.9087710024861157e-05, "loss": 0.0611, "step": 24268 }, { "epoch": 0.42979275414292484, "grad_norm": 0.5176944732666016, "learning_rate": 1.9086882216775568e-05, "loss": 0.0922, "step": 24269 }, { "epoch": 0.42981046367995324, "grad_norm": 0.6620091199874878, "learning_rate": 1.908605439524455e-05, "loss": 0.101, "step": 24270 }, { "epoch": 0.4298281732169817, "grad_norm": 0.821425199508667, "learning_rate": 1.908522656027083e-05, "loss": 0.0794, "step": 24271 }, { "epoch": 0.4298458827540101, "grad_norm": 1.5293850898742676, "learning_rate": 1.9084398711857125e-05, "loss": 0.0944, "step": 24272 }, { "epoch": 0.42986359229103854, "grad_norm": 0.4968221187591553, "learning_rate": 1.9083570850006164e-05, "loss": 0.0658, "step": 24273 }, { "epoch": 0.42988130182806694, "grad_norm": 1.0907655954360962, "learning_rate": 1.9082742974720665e-05, "loss": 0.0987, "step": 24274 }, { "epoch": 0.4298990113650954, "grad_norm": 0.8579274415969849, "learning_rate": 1.908191508600336e-05, "loss": 0.0908, "step": 24275 }, { "epoch": 0.4299167209021238, "grad_norm": 0.7382381558418274, "learning_rate": 1.9081087183856962e-05, "loss": 0.0708, "step": 24276 }, { "epoch": 0.42993443043915225, "grad_norm": 1.2008527517318726, "learning_rate": 1.90802592682842e-05, "loss": 0.0863, "step": 24277 }, { "epoch": 0.42995213997618065, "grad_norm": 0.5024915933609009, "learning_rate": 1.90794313392878e-05, "loss": 0.0457, "step": 24278 }, { "epoch": 0.4299698495132091, "grad_norm": 0.8824151754379272, "learning_rate": 1.907860339687048e-05, "loss": 0.0813, "step": 24279 }, { "epoch": 0.42998755905023756, "grad_norm": 0.5082520246505737, "learning_rate": 1.9077775441034976e-05, "loss": 0.0821, "step": 24280 }, { "epoch": 0.43000526858726595, "grad_norm": 0.3508763909339905, "learning_rate": 1.907694747178399e-05, "loss": 0.0627, "step": 24281 }, { "epoch": 0.4300229781242944, "grad_norm": 0.4322890043258667, "learning_rate": 1.907611948912027e-05, "loss": 0.0632, "step": 24282 }, { "epoch": 0.4300406876613228, "grad_norm": 0.6135969161987305, "learning_rate": 1.907529149304652e-05, "loss": 0.0615, "step": 24283 }, { "epoch": 0.43005839719835126, "grad_norm": 0.877465546131134, "learning_rate": 1.9074463483565486e-05, "loss": 0.0797, "step": 24284 }, { "epoch": 0.43007610673537966, "grad_norm": 0.9440138339996338, "learning_rate": 1.9073635460679867e-05, "loss": 0.114, "step": 24285 }, { "epoch": 0.4300938162724081, "grad_norm": 0.7039619088172913, "learning_rate": 1.9072807424392405e-05, "loss": 0.0795, "step": 24286 }, { "epoch": 0.4301115258094365, "grad_norm": 0.3048536777496338, "learning_rate": 1.907197937470582e-05, "loss": 0.0335, "step": 24287 }, { "epoch": 0.43012923534646497, "grad_norm": 0.6734210252761841, "learning_rate": 1.907115131162283e-05, "loss": 0.0825, "step": 24288 }, { "epoch": 0.43014694488349337, "grad_norm": 0.8890876173973083, "learning_rate": 1.9070323235146167e-05, "loss": 0.1049, "step": 24289 }, { "epoch": 0.4301646544205218, "grad_norm": 0.4907750189304352, "learning_rate": 1.906949514527855e-05, "loss": 0.0736, "step": 24290 }, { "epoch": 0.4301823639575502, "grad_norm": 0.5021981596946716, "learning_rate": 1.906866704202271e-05, "loss": 0.0566, "step": 24291 }, { "epoch": 0.43020007349457867, "grad_norm": 0.954070508480072, "learning_rate": 1.906783892538136e-05, "loss": 0.1222, "step": 24292 }, { "epoch": 0.43021778303160707, "grad_norm": 0.6133285760879517, "learning_rate": 1.906701079535724e-05, "loss": 0.0797, "step": 24293 }, { "epoch": 0.4302354925686355, "grad_norm": 0.44956907629966736, "learning_rate": 1.9066182651953058e-05, "loss": 0.0807, "step": 24294 }, { "epoch": 0.430253202105664, "grad_norm": 0.6064429879188538, "learning_rate": 1.9065354495171554e-05, "loss": 0.0873, "step": 24295 }, { "epoch": 0.4302709116426924, "grad_norm": 0.46146777272224426, "learning_rate": 1.9064526325015437e-05, "loss": 0.0964, "step": 24296 }, { "epoch": 0.43028862117972083, "grad_norm": 0.8463471531867981, "learning_rate": 1.9063698141487445e-05, "loss": 0.0999, "step": 24297 }, { "epoch": 0.43030633071674923, "grad_norm": 0.426753968000412, "learning_rate": 1.9062869944590294e-05, "loss": 0.0751, "step": 24298 }, { "epoch": 0.4303240402537777, "grad_norm": 0.552166223526001, "learning_rate": 1.9062041734326713e-05, "loss": 0.069, "step": 24299 }, { "epoch": 0.4303417497908061, "grad_norm": 0.8722464442253113, "learning_rate": 1.9061213510699422e-05, "loss": 0.102, "step": 24300 }, { "epoch": 0.43035945932783454, "grad_norm": 0.7559674382209778, "learning_rate": 1.9060385273711155e-05, "loss": 0.0977, "step": 24301 }, { "epoch": 0.43037716886486294, "grad_norm": 0.6991231441497803, "learning_rate": 1.905955702336463e-05, "loss": 0.0739, "step": 24302 }, { "epoch": 0.4303948784018914, "grad_norm": 0.422695130109787, "learning_rate": 1.9058728759662577e-05, "loss": 0.0544, "step": 24303 }, { "epoch": 0.4304125879389198, "grad_norm": 0.7648485898971558, "learning_rate": 1.9057900482607707e-05, "loss": 0.0786, "step": 24304 }, { "epoch": 0.43043029747594824, "grad_norm": 0.7897231578826904, "learning_rate": 1.9057072192202758e-05, "loss": 0.0614, "step": 24305 }, { "epoch": 0.43044800701297664, "grad_norm": 0.5616443157196045, "learning_rate": 1.9056243888450456e-05, "loss": 0.058, "step": 24306 }, { "epoch": 0.4304657165500051, "grad_norm": 0.9482662677764893, "learning_rate": 1.905541557135352e-05, "loss": 0.0504, "step": 24307 }, { "epoch": 0.4304834260870335, "grad_norm": 0.47361820936203003, "learning_rate": 1.9054587240914676e-05, "loss": 0.072, "step": 24308 }, { "epoch": 0.43050113562406195, "grad_norm": 0.34518563747406006, "learning_rate": 1.9053758897136653e-05, "loss": 0.0803, "step": 24309 }, { "epoch": 0.4305188451610904, "grad_norm": 0.8988879919052124, "learning_rate": 1.9052930540022166e-05, "loss": 0.0547, "step": 24310 }, { "epoch": 0.4305365546981188, "grad_norm": 0.6365110874176025, "learning_rate": 1.9052102169573952e-05, "loss": 0.0887, "step": 24311 }, { "epoch": 0.43055426423514725, "grad_norm": 0.7498816847801208, "learning_rate": 1.9051273785794736e-05, "loss": 0.0752, "step": 24312 }, { "epoch": 0.43057197377217565, "grad_norm": 0.6656030416488647, "learning_rate": 1.905044538868723e-05, "loss": 0.0535, "step": 24313 }, { "epoch": 0.4305896833092041, "grad_norm": 0.8041996955871582, "learning_rate": 1.9049616978254175e-05, "loss": 0.0785, "step": 24314 }, { "epoch": 0.4306073928462325, "grad_norm": 0.7259342670440674, "learning_rate": 1.9048788554498288e-05, "loss": 0.1256, "step": 24315 }, { "epoch": 0.43062510238326096, "grad_norm": 0.5479152202606201, "learning_rate": 1.90479601174223e-05, "loss": 0.0744, "step": 24316 }, { "epoch": 0.43064281192028936, "grad_norm": 0.803026556968689, "learning_rate": 1.9047131667028923e-05, "loss": 0.0783, "step": 24317 }, { "epoch": 0.4306605214573178, "grad_norm": 0.6752461194992065, "learning_rate": 1.90463032033209e-05, "loss": 0.0726, "step": 24318 }, { "epoch": 0.4306782309943462, "grad_norm": 0.6963403820991516, "learning_rate": 1.9045474726300948e-05, "loss": 0.1003, "step": 24319 }, { "epoch": 0.43069594053137467, "grad_norm": 0.6778883934020996, "learning_rate": 1.904464623597179e-05, "loss": 0.0876, "step": 24320 }, { "epoch": 0.43071365006840306, "grad_norm": 1.1370463371276855, "learning_rate": 1.904381773233616e-05, "loss": 0.0717, "step": 24321 }, { "epoch": 0.4307313596054315, "grad_norm": 0.5523015856742859, "learning_rate": 1.9042989215396772e-05, "loss": 0.0658, "step": 24322 }, { "epoch": 0.4307490691424599, "grad_norm": 0.7154659032821655, "learning_rate": 1.9042160685156365e-05, "loss": 0.0727, "step": 24323 }, { "epoch": 0.43076677867948837, "grad_norm": 0.3748617172241211, "learning_rate": 1.9041332141617655e-05, "loss": 0.0621, "step": 24324 }, { "epoch": 0.4307844882165168, "grad_norm": 0.8150674700737, "learning_rate": 1.9040503584783368e-05, "loss": 0.0989, "step": 24325 }, { "epoch": 0.4308021977535452, "grad_norm": 0.4928216338157654, "learning_rate": 1.903967501465624e-05, "loss": 0.0759, "step": 24326 }, { "epoch": 0.4308199072905737, "grad_norm": 0.6409309506416321, "learning_rate": 1.9038846431238985e-05, "loss": 0.0736, "step": 24327 }, { "epoch": 0.4308376168276021, "grad_norm": 0.4711609184741974, "learning_rate": 1.9038017834534335e-05, "loss": 0.0643, "step": 24328 }, { "epoch": 0.43085532636463053, "grad_norm": 0.5776541829109192, "learning_rate": 1.9037189224545014e-05, "loss": 0.094, "step": 24329 }, { "epoch": 0.43087303590165893, "grad_norm": 0.9965726137161255, "learning_rate": 1.903636060127375e-05, "loss": 0.0765, "step": 24330 }, { "epoch": 0.4308907454386874, "grad_norm": 0.32056909799575806, "learning_rate": 1.9035531964723265e-05, "loss": 0.0497, "step": 24331 }, { "epoch": 0.4309084549757158, "grad_norm": 0.7199656963348389, "learning_rate": 1.903470331489629e-05, "loss": 0.0755, "step": 24332 }, { "epoch": 0.43092616451274424, "grad_norm": 0.5684866905212402, "learning_rate": 1.903387465179555e-05, "loss": 0.0564, "step": 24333 }, { "epoch": 0.43094387404977264, "grad_norm": 0.2195328325033188, "learning_rate": 1.9033045975423767e-05, "loss": 0.0513, "step": 24334 }, { "epoch": 0.4309615835868011, "grad_norm": 0.5785518288612366, "learning_rate": 1.9032217285783675e-05, "loss": 0.047, "step": 24335 }, { "epoch": 0.4309792931238295, "grad_norm": 0.6415882706642151, "learning_rate": 1.9031388582877994e-05, "loss": 0.0643, "step": 24336 }, { "epoch": 0.43099700266085794, "grad_norm": 0.8632696270942688, "learning_rate": 1.9030559866709456e-05, "loss": 0.1056, "step": 24337 }, { "epoch": 0.43101471219788634, "grad_norm": 0.6893792748451233, "learning_rate": 1.9029731137280778e-05, "loss": 0.0858, "step": 24338 }, { "epoch": 0.4310324217349148, "grad_norm": 0.7227798700332642, "learning_rate": 1.9028902394594692e-05, "loss": 0.0557, "step": 24339 }, { "epoch": 0.43105013127194325, "grad_norm": 0.5717962384223938, "learning_rate": 1.902807363865393e-05, "loss": 0.0819, "step": 24340 }, { "epoch": 0.43106784080897165, "grad_norm": 0.861819863319397, "learning_rate": 1.902724486946121e-05, "loss": 0.0886, "step": 24341 }, { "epoch": 0.4310855503460001, "grad_norm": 0.5263697504997253, "learning_rate": 1.9026416087019264e-05, "loss": 0.0809, "step": 24342 }, { "epoch": 0.4311032598830285, "grad_norm": 0.6905173659324646, "learning_rate": 1.902558729133081e-05, "loss": 0.0606, "step": 24343 }, { "epoch": 0.43112096942005695, "grad_norm": 0.4206681251525879, "learning_rate": 1.9024758482398585e-05, "loss": 0.0302, "step": 24344 }, { "epoch": 0.43113867895708535, "grad_norm": 0.8712735176086426, "learning_rate": 1.902392966022531e-05, "loss": 0.0999, "step": 24345 }, { "epoch": 0.4311563884941138, "grad_norm": 0.8099102973937988, "learning_rate": 1.902310082481372e-05, "loss": 0.0911, "step": 24346 }, { "epoch": 0.4311740980311422, "grad_norm": 0.45975005626678467, "learning_rate": 1.902227197616653e-05, "loss": 0.079, "step": 24347 }, { "epoch": 0.43119180756817066, "grad_norm": 0.7599451541900635, "learning_rate": 1.902144311428647e-05, "loss": 0.0567, "step": 24348 }, { "epoch": 0.43120951710519906, "grad_norm": 0.721228837966919, "learning_rate": 1.902061423917627e-05, "loss": 0.0893, "step": 24349 }, { "epoch": 0.4312272266422275, "grad_norm": 0.33637773990631104, "learning_rate": 1.9019785350838657e-05, "loss": 0.0623, "step": 24350 }, { "epoch": 0.4312449361792559, "grad_norm": 0.6974172592163086, "learning_rate": 1.9018956449276363e-05, "loss": 0.0666, "step": 24351 }, { "epoch": 0.43126264571628437, "grad_norm": 0.6232931613922119, "learning_rate": 1.9018127534492102e-05, "loss": 0.0809, "step": 24352 }, { "epoch": 0.43128035525331276, "grad_norm": 0.6389428973197937, "learning_rate": 1.9017298606488607e-05, "loss": 0.0925, "step": 24353 }, { "epoch": 0.4312980647903412, "grad_norm": 0.9082134366035461, "learning_rate": 1.9016469665268603e-05, "loss": 0.0778, "step": 24354 }, { "epoch": 0.43131577432736967, "grad_norm": 0.7977423071861267, "learning_rate": 1.9015640710834827e-05, "loss": 0.1084, "step": 24355 }, { "epoch": 0.43133348386439807, "grad_norm": 0.5779871940612793, "learning_rate": 1.9014811743189998e-05, "loss": 0.0542, "step": 24356 }, { "epoch": 0.4313511934014265, "grad_norm": 0.5684525370597839, "learning_rate": 1.9013982762336844e-05, "loss": 0.046, "step": 24357 }, { "epoch": 0.4313689029384549, "grad_norm": 0.5604076981544495, "learning_rate": 1.901315376827809e-05, "loss": 0.0591, "step": 24358 }, { "epoch": 0.4313866124754834, "grad_norm": 0.9311964511871338, "learning_rate": 1.9012324761016467e-05, "loss": 0.1083, "step": 24359 }, { "epoch": 0.4314043220125118, "grad_norm": 0.4871034622192383, "learning_rate": 1.9011495740554704e-05, "loss": 0.0581, "step": 24360 }, { "epoch": 0.43142203154954023, "grad_norm": 0.5845499038696289, "learning_rate": 1.901066670689552e-05, "loss": 0.0953, "step": 24361 }, { "epoch": 0.43143974108656863, "grad_norm": 0.5354467034339905, "learning_rate": 1.9009837660041656e-05, "loss": 0.051, "step": 24362 }, { "epoch": 0.4314574506235971, "grad_norm": 0.43670567870140076, "learning_rate": 1.9009008599995828e-05, "loss": 0.055, "step": 24363 }, { "epoch": 0.4314751601606255, "grad_norm": 0.6225027441978455, "learning_rate": 1.9008179526760765e-05, "loss": 0.0808, "step": 24364 }, { "epoch": 0.43149286969765394, "grad_norm": 0.47911956906318665, "learning_rate": 1.90073504403392e-05, "loss": 0.0753, "step": 24365 }, { "epoch": 0.43151057923468233, "grad_norm": 0.5529442429542542, "learning_rate": 1.900652134073386e-05, "loss": 0.0862, "step": 24366 }, { "epoch": 0.4315282887717108, "grad_norm": 0.7501804232597351, "learning_rate": 1.900569222794746e-05, "loss": 0.0913, "step": 24367 }, { "epoch": 0.4315459983087392, "grad_norm": 0.4558701813220978, "learning_rate": 1.9004863101982747e-05, "loss": 0.1025, "step": 24368 }, { "epoch": 0.43156370784576764, "grad_norm": 0.6358678936958313, "learning_rate": 1.9004033962842436e-05, "loss": 0.0851, "step": 24369 }, { "epoch": 0.4315814173827961, "grad_norm": 0.8237822651863098, "learning_rate": 1.9003204810529258e-05, "loss": 0.0629, "step": 24370 }, { "epoch": 0.4315991269198245, "grad_norm": 0.7521084547042847, "learning_rate": 1.9002375645045945e-05, "loss": 0.0817, "step": 24371 }, { "epoch": 0.43161683645685295, "grad_norm": 0.6257664561271667, "learning_rate": 1.9001546466395213e-05, "loss": 0.0687, "step": 24372 }, { "epoch": 0.43163454599388135, "grad_norm": 0.7446724772453308, "learning_rate": 1.9000717274579803e-05, "loss": 0.0702, "step": 24373 }, { "epoch": 0.4316522555309098, "grad_norm": 1.0235354900360107, "learning_rate": 1.8999888069602444e-05, "loss": 0.1219, "step": 24374 }, { "epoch": 0.4316699650679382, "grad_norm": 0.7959911227226257, "learning_rate": 1.8999058851465848e-05, "loss": 0.0652, "step": 24375 }, { "epoch": 0.43168767460496665, "grad_norm": 0.5808579325675964, "learning_rate": 1.8998229620172757e-05, "loss": 0.0728, "step": 24376 }, { "epoch": 0.43170538414199505, "grad_norm": 0.7242923974990845, "learning_rate": 1.899740037572589e-05, "loss": 0.0632, "step": 24377 }, { "epoch": 0.4317230936790235, "grad_norm": 0.7867627143859863, "learning_rate": 1.8996571118127985e-05, "loss": 0.0786, "step": 24378 }, { "epoch": 0.4317408032160519, "grad_norm": 0.9962023496627808, "learning_rate": 1.8995741847381766e-05, "loss": 0.0991, "step": 24379 }, { "epoch": 0.43175851275308036, "grad_norm": 0.6278130412101746, "learning_rate": 1.899491256348996e-05, "loss": 0.1061, "step": 24380 }, { "epoch": 0.43177622229010876, "grad_norm": 0.8236081600189209, "learning_rate": 1.899408326645529e-05, "loss": 0.0831, "step": 24381 }, { "epoch": 0.4317939318271372, "grad_norm": 0.6742140650749207, "learning_rate": 1.8993253956280497e-05, "loss": 0.0626, "step": 24382 }, { "epoch": 0.4318116413641656, "grad_norm": 1.0183911323547363, "learning_rate": 1.89924246329683e-05, "loss": 0.1232, "step": 24383 }, { "epoch": 0.43182935090119406, "grad_norm": 0.7191607356071472, "learning_rate": 1.899159529652143e-05, "loss": 0.0809, "step": 24384 }, { "epoch": 0.4318470604382225, "grad_norm": 0.8620215654373169, "learning_rate": 1.8990765946942617e-05, "loss": 0.0723, "step": 24385 }, { "epoch": 0.4318647699752509, "grad_norm": 0.6219186186790466, "learning_rate": 1.8989936584234583e-05, "loss": 0.102, "step": 24386 }, { "epoch": 0.43188247951227937, "grad_norm": 0.639789342880249, "learning_rate": 1.8989107208400063e-05, "loss": 0.0645, "step": 24387 }, { "epoch": 0.43190018904930777, "grad_norm": 0.7009145021438599, "learning_rate": 1.8988277819441784e-05, "loss": 0.0726, "step": 24388 }, { "epoch": 0.4319178985863362, "grad_norm": 0.4816814064979553, "learning_rate": 1.8987448417362476e-05, "loss": 0.0519, "step": 24389 }, { "epoch": 0.4319356081233646, "grad_norm": 0.3832266926765442, "learning_rate": 1.8986619002164868e-05, "loss": 0.0776, "step": 24390 }, { "epoch": 0.4319533176603931, "grad_norm": 0.9150787591934204, "learning_rate": 1.898578957385168e-05, "loss": 0.0938, "step": 24391 }, { "epoch": 0.4319710271974215, "grad_norm": 0.5996606349945068, "learning_rate": 1.8984960132425652e-05, "loss": 0.075, "step": 24392 }, { "epoch": 0.43198873673444993, "grad_norm": 0.789269208908081, "learning_rate": 1.8984130677889503e-05, "loss": 0.0802, "step": 24393 }, { "epoch": 0.43200644627147833, "grad_norm": 0.46197643876075745, "learning_rate": 1.8983301210245972e-05, "loss": 0.0614, "step": 24394 }, { "epoch": 0.4320241558085068, "grad_norm": 0.5684311389923096, "learning_rate": 1.8982471729497785e-05, "loss": 0.0777, "step": 24395 }, { "epoch": 0.4320418653455352, "grad_norm": 0.9068282246589661, "learning_rate": 1.8981642235647666e-05, "loss": 0.0798, "step": 24396 }, { "epoch": 0.43205957488256364, "grad_norm": 1.1645970344543457, "learning_rate": 1.8980812728698344e-05, "loss": 0.087, "step": 24397 }, { "epoch": 0.43207728441959203, "grad_norm": 0.5657825469970703, "learning_rate": 1.8979983208652555e-05, "loss": 0.0627, "step": 24398 }, { "epoch": 0.4320949939566205, "grad_norm": 0.9313731789588928, "learning_rate": 1.8979153675513025e-05, "loss": 0.0979, "step": 24399 }, { "epoch": 0.43211270349364894, "grad_norm": 0.6907685399055481, "learning_rate": 1.8978324129282476e-05, "loss": 0.0642, "step": 24400 }, { "epoch": 0.43213041303067734, "grad_norm": 0.4414728581905365, "learning_rate": 1.8977494569963648e-05, "loss": 0.0643, "step": 24401 }, { "epoch": 0.4321481225677058, "grad_norm": 0.5207279920578003, "learning_rate": 1.897666499755926e-05, "loss": 0.0555, "step": 24402 }, { "epoch": 0.4321658321047342, "grad_norm": 1.120005488395691, "learning_rate": 1.8975835412072053e-05, "loss": 0.0876, "step": 24403 }, { "epoch": 0.43218354164176265, "grad_norm": 0.27220097184181213, "learning_rate": 1.8975005813504748e-05, "loss": 0.0693, "step": 24404 }, { "epoch": 0.43220125117879105, "grad_norm": 0.38261449337005615, "learning_rate": 1.897417620186007e-05, "loss": 0.0753, "step": 24405 }, { "epoch": 0.4322189607158195, "grad_norm": 0.7973933815956116, "learning_rate": 1.897334657714076e-05, "loss": 0.0728, "step": 24406 }, { "epoch": 0.4322366702528479, "grad_norm": 0.5997953414916992, "learning_rate": 1.897251693934954e-05, "loss": 0.0972, "step": 24407 }, { "epoch": 0.43225437978987635, "grad_norm": 0.8295911550521851, "learning_rate": 1.8971687288489143e-05, "loss": 0.0703, "step": 24408 }, { "epoch": 0.43227208932690475, "grad_norm": 0.41270536184310913, "learning_rate": 1.8970857624562292e-05, "loss": 0.0553, "step": 24409 }, { "epoch": 0.4322897988639332, "grad_norm": 0.6358349919319153, "learning_rate": 1.8970027947571726e-05, "loss": 0.0754, "step": 24410 }, { "epoch": 0.4323075084009616, "grad_norm": 0.7499387860298157, "learning_rate": 1.8969198257520163e-05, "loss": 0.0699, "step": 24411 }, { "epoch": 0.43232521793799006, "grad_norm": 0.4037408232688904, "learning_rate": 1.8968368554410347e-05, "loss": 0.0924, "step": 24412 }, { "epoch": 0.43234292747501846, "grad_norm": 0.5613142251968384, "learning_rate": 1.8967538838245e-05, "loss": 0.0641, "step": 24413 }, { "epoch": 0.4323606370120469, "grad_norm": 0.6187917590141296, "learning_rate": 1.8966709109026846e-05, "loss": 0.0567, "step": 24414 }, { "epoch": 0.43237834654907537, "grad_norm": 0.7042884230613708, "learning_rate": 1.8965879366758622e-05, "loss": 0.0708, "step": 24415 }, { "epoch": 0.43239605608610376, "grad_norm": 0.5734136700630188, "learning_rate": 1.8965049611443055e-05, "loss": 0.0716, "step": 24416 }, { "epoch": 0.4324137656231322, "grad_norm": 0.7503379583358765, "learning_rate": 1.896421984308288e-05, "loss": 0.066, "step": 24417 }, { "epoch": 0.4324314751601606, "grad_norm": 0.6672741770744324, "learning_rate": 1.8963390061680815e-05, "loss": 0.0916, "step": 24418 }, { "epoch": 0.43244918469718907, "grad_norm": 0.819521427154541, "learning_rate": 1.8962560267239607e-05, "loss": 0.1077, "step": 24419 }, { "epoch": 0.43246689423421747, "grad_norm": 0.4469963014125824, "learning_rate": 1.896173045976197e-05, "loss": 0.0805, "step": 24420 }, { "epoch": 0.4324846037712459, "grad_norm": 0.5979803800582886, "learning_rate": 1.8960900639250642e-05, "loss": 0.0413, "step": 24421 }, { "epoch": 0.4325023133082743, "grad_norm": 0.7535507678985596, "learning_rate": 1.8960070805708354e-05, "loss": 0.068, "step": 24422 }, { "epoch": 0.4325200228453028, "grad_norm": 1.0119067430496216, "learning_rate": 1.8959240959137828e-05, "loss": 0.0697, "step": 24423 }, { "epoch": 0.4325377323823312, "grad_norm": 1.2109276056289673, "learning_rate": 1.8958411099541802e-05, "loss": 0.091, "step": 24424 }, { "epoch": 0.43255544191935963, "grad_norm": 0.607941746711731, "learning_rate": 1.8957581226923e-05, "loss": 0.0619, "step": 24425 }, { "epoch": 0.432573151456388, "grad_norm": 0.7716004252433777, "learning_rate": 1.8956751341284168e-05, "loss": 0.0724, "step": 24426 }, { "epoch": 0.4325908609934165, "grad_norm": 0.461906373500824, "learning_rate": 1.8955921442628017e-05, "loss": 0.0485, "step": 24427 }, { "epoch": 0.4326085705304449, "grad_norm": 0.6067945957183838, "learning_rate": 1.8955091530957283e-05, "loss": 0.0985, "step": 24428 }, { "epoch": 0.43262628006747333, "grad_norm": 0.7956933379173279, "learning_rate": 1.89542616062747e-05, "loss": 0.0502, "step": 24429 }, { "epoch": 0.4326439896045018, "grad_norm": 0.9250121712684631, "learning_rate": 1.8953431668582996e-05, "loss": 0.0821, "step": 24430 }, { "epoch": 0.4326616991415302, "grad_norm": 0.6801571846008301, "learning_rate": 1.8952601717884904e-05, "loss": 0.0948, "step": 24431 }, { "epoch": 0.43267940867855864, "grad_norm": 0.5457557439804077, "learning_rate": 1.8951771754183148e-05, "loss": 0.0637, "step": 24432 }, { "epoch": 0.43269711821558704, "grad_norm": 0.7326564788818359, "learning_rate": 1.8950941777480467e-05, "loss": 0.0908, "step": 24433 }, { "epoch": 0.4327148277526155, "grad_norm": 0.4524065852165222, "learning_rate": 1.8950111787779577e-05, "loss": 0.0841, "step": 24434 }, { "epoch": 0.4327325372896439, "grad_norm": 1.3345847129821777, "learning_rate": 1.894928178508323e-05, "loss": 0.1159, "step": 24435 }, { "epoch": 0.43275024682667235, "grad_norm": 1.0789225101470947, "learning_rate": 1.894845176939414e-05, "loss": 0.0968, "step": 24436 }, { "epoch": 0.43276795636370075, "grad_norm": 0.6839389801025391, "learning_rate": 1.8947621740715048e-05, "loss": 0.0777, "step": 24437 }, { "epoch": 0.4327856659007292, "grad_norm": 0.6345551013946533, "learning_rate": 1.8946791699048677e-05, "loss": 0.074, "step": 24438 }, { "epoch": 0.4328033754377576, "grad_norm": 0.5556069016456604, "learning_rate": 1.8945961644397758e-05, "loss": 0.0849, "step": 24439 }, { "epoch": 0.43282108497478605, "grad_norm": 0.8724642395973206, "learning_rate": 1.894513157676503e-05, "loss": 0.063, "step": 24440 }, { "epoch": 0.43283879451181445, "grad_norm": 0.749610424041748, "learning_rate": 1.894430149615321e-05, "loss": 0.0635, "step": 24441 }, { "epoch": 0.4328565040488429, "grad_norm": 0.5518561005592346, "learning_rate": 1.8943471402565042e-05, "loss": 0.039, "step": 24442 }, { "epoch": 0.43287421358587136, "grad_norm": 0.8854645490646362, "learning_rate": 1.8942641296003252e-05, "loss": 0.0802, "step": 24443 }, { "epoch": 0.43289192312289976, "grad_norm": 0.5772579908370972, "learning_rate": 1.894181117647057e-05, "loss": 0.0797, "step": 24444 }, { "epoch": 0.4329096326599282, "grad_norm": 0.7072456479072571, "learning_rate": 1.894098104396973e-05, "loss": 0.1085, "step": 24445 }, { "epoch": 0.4329273421969566, "grad_norm": 1.0124038457870483, "learning_rate": 1.894015089850346e-05, "loss": 0.0675, "step": 24446 }, { "epoch": 0.43294505173398506, "grad_norm": 0.8121325969696045, "learning_rate": 1.8939320740074494e-05, "loss": 0.0791, "step": 24447 }, { "epoch": 0.43296276127101346, "grad_norm": 0.8755220770835876, "learning_rate": 1.8938490568685555e-05, "loss": 0.1094, "step": 24448 }, { "epoch": 0.4329804708080419, "grad_norm": 0.7060143947601318, "learning_rate": 1.893766038433938e-05, "loss": 0.0949, "step": 24449 }, { "epoch": 0.4329981803450703, "grad_norm": 0.6814170479774475, "learning_rate": 1.893683018703871e-05, "loss": 0.1012, "step": 24450 }, { "epoch": 0.43301588988209877, "grad_norm": 0.593726396560669, "learning_rate": 1.893599997678626e-05, "loss": 0.0625, "step": 24451 }, { "epoch": 0.43303359941912717, "grad_norm": 0.71371990442276, "learning_rate": 1.893516975358477e-05, "loss": 0.0925, "step": 24452 }, { "epoch": 0.4330513089561556, "grad_norm": 0.5429752469062805, "learning_rate": 1.8934339517436963e-05, "loss": 0.0722, "step": 24453 }, { "epoch": 0.433069018493184, "grad_norm": 0.7409193515777588, "learning_rate": 1.8933509268345586e-05, "loss": 0.0773, "step": 24454 }, { "epoch": 0.4330867280302125, "grad_norm": 0.5083459615707397, "learning_rate": 1.893267900631336e-05, "loss": 0.0792, "step": 24455 }, { "epoch": 0.4331044375672409, "grad_norm": 0.7800517082214355, "learning_rate": 1.8931848731343017e-05, "loss": 0.1022, "step": 24456 }, { "epoch": 0.43312214710426933, "grad_norm": 0.7596278786659241, "learning_rate": 1.8931018443437287e-05, "loss": 0.0851, "step": 24457 }, { "epoch": 0.4331398566412978, "grad_norm": 0.8466562032699585, "learning_rate": 1.8930188142598906e-05, "loss": 0.1082, "step": 24458 }, { "epoch": 0.4331575661783262, "grad_norm": 0.5713055729866028, "learning_rate": 1.89293578288306e-05, "loss": 0.1069, "step": 24459 }, { "epoch": 0.43317527571535464, "grad_norm": 0.6219578981399536, "learning_rate": 1.892852750213511e-05, "loss": 0.0701, "step": 24460 }, { "epoch": 0.43319298525238303, "grad_norm": 0.6773897409439087, "learning_rate": 1.892769716251516e-05, "loss": 0.0794, "step": 24461 }, { "epoch": 0.4332106947894115, "grad_norm": 0.8251965641975403, "learning_rate": 1.8926866809973484e-05, "loss": 0.0739, "step": 24462 }, { "epoch": 0.4332284043264399, "grad_norm": 0.5693920254707336, "learning_rate": 1.8926036444512808e-05, "loss": 0.0733, "step": 24463 }, { "epoch": 0.43324611386346834, "grad_norm": 1.0012344121932983, "learning_rate": 1.892520606613587e-05, "loss": 0.0702, "step": 24464 }, { "epoch": 0.43326382340049674, "grad_norm": 1.0697141885757446, "learning_rate": 1.892437567484541e-05, "loss": 0.1085, "step": 24465 }, { "epoch": 0.4332815329375252, "grad_norm": 0.7085883617401123, "learning_rate": 1.8923545270644146e-05, "loss": 0.0592, "step": 24466 }, { "epoch": 0.4332992424745536, "grad_norm": 0.5325191020965576, "learning_rate": 1.8922714853534815e-05, "loss": 0.0643, "step": 24467 }, { "epoch": 0.43331695201158205, "grad_norm": 0.5426819920539856, "learning_rate": 1.8921884423520142e-05, "loss": 0.0578, "step": 24468 }, { "epoch": 0.43333466154861044, "grad_norm": 0.7850807905197144, "learning_rate": 1.8921053980602873e-05, "loss": 0.0743, "step": 24469 }, { "epoch": 0.4333523710856389, "grad_norm": 1.0391106605529785, "learning_rate": 1.8920223524785736e-05, "loss": 0.0667, "step": 24470 }, { "epoch": 0.4333700806226673, "grad_norm": 0.6770734190940857, "learning_rate": 1.891939305607145e-05, "loss": 0.0725, "step": 24471 }, { "epoch": 0.43338779015969575, "grad_norm": 0.6889360547065735, "learning_rate": 1.8918562574462768e-05, "loss": 0.0976, "step": 24472 }, { "epoch": 0.4334054996967242, "grad_norm": 0.9773955345153809, "learning_rate": 1.89177320799624e-05, "loss": 0.0888, "step": 24473 }, { "epoch": 0.4334232092337526, "grad_norm": 0.5963610410690308, "learning_rate": 1.89169015725731e-05, "loss": 0.0631, "step": 24474 }, { "epoch": 0.43344091877078106, "grad_norm": 0.7946102023124695, "learning_rate": 1.8916071052297587e-05, "loss": 0.0714, "step": 24475 }, { "epoch": 0.43345862830780946, "grad_norm": 0.6183902025222778, "learning_rate": 1.8915240519138596e-05, "loss": 0.0695, "step": 24476 }, { "epoch": 0.4334763378448379, "grad_norm": 0.8382107019424438, "learning_rate": 1.8914409973098857e-05, "loss": 0.0928, "step": 24477 }, { "epoch": 0.4334940473818663, "grad_norm": 0.637190580368042, "learning_rate": 1.891357941418111e-05, "loss": 0.0667, "step": 24478 }, { "epoch": 0.43351175691889476, "grad_norm": 1.205450177192688, "learning_rate": 1.891274884238808e-05, "loss": 0.0646, "step": 24479 }, { "epoch": 0.43352946645592316, "grad_norm": 2.025714159011841, "learning_rate": 1.8911918257722504e-05, "loss": 0.1188, "step": 24480 }, { "epoch": 0.4335471759929516, "grad_norm": 0.4585520625114441, "learning_rate": 1.8911087660187112e-05, "loss": 0.0717, "step": 24481 }, { "epoch": 0.43356488552998, "grad_norm": 0.8700956106185913, "learning_rate": 1.891025704978463e-05, "loss": 0.1035, "step": 24482 }, { "epoch": 0.43358259506700847, "grad_norm": 0.5860534310340881, "learning_rate": 1.8909426426517806e-05, "loss": 0.0749, "step": 24483 }, { "epoch": 0.43360030460403687, "grad_norm": 1.2055987119674683, "learning_rate": 1.890859579038936e-05, "loss": 0.0778, "step": 24484 }, { "epoch": 0.4336180141410653, "grad_norm": 0.8432331085205078, "learning_rate": 1.8907765141402033e-05, "loss": 0.0794, "step": 24485 }, { "epoch": 0.4336357236780937, "grad_norm": 0.608924925327301, "learning_rate": 1.8906934479558555e-05, "loss": 0.0876, "step": 24486 }, { "epoch": 0.4336534332151222, "grad_norm": 0.3663519620895386, "learning_rate": 1.8906103804861654e-05, "loss": 0.0905, "step": 24487 }, { "epoch": 0.43367114275215063, "grad_norm": 0.5186470746994019, "learning_rate": 1.890527311731407e-05, "loss": 0.0595, "step": 24488 }, { "epoch": 0.433688852289179, "grad_norm": 0.7356992363929749, "learning_rate": 1.8904442416918527e-05, "loss": 0.0838, "step": 24489 }, { "epoch": 0.4337065618262075, "grad_norm": 0.45461082458496094, "learning_rate": 1.8903611703677767e-05, "loss": 0.0691, "step": 24490 }, { "epoch": 0.4337242713632359, "grad_norm": 0.8160508275032043, "learning_rate": 1.8902780977594518e-05, "loss": 0.0681, "step": 24491 }, { "epoch": 0.43374198090026433, "grad_norm": 0.5571234226226807, "learning_rate": 1.8901950238671516e-05, "loss": 0.0521, "step": 24492 }, { "epoch": 0.43375969043729273, "grad_norm": 0.5093504190444946, "learning_rate": 1.8901119486911492e-05, "loss": 0.0737, "step": 24493 }, { "epoch": 0.4337773999743212, "grad_norm": 0.7798845767974854, "learning_rate": 1.8900288722317178e-05, "loss": 0.1012, "step": 24494 }, { "epoch": 0.4337951095113496, "grad_norm": 0.609644889831543, "learning_rate": 1.889945794489131e-05, "loss": 0.0819, "step": 24495 }, { "epoch": 0.43381281904837804, "grad_norm": 0.49115699529647827, "learning_rate": 1.889862715463662e-05, "loss": 0.0675, "step": 24496 }, { "epoch": 0.43383052858540644, "grad_norm": 0.845152735710144, "learning_rate": 1.889779635155584e-05, "loss": 0.0787, "step": 24497 }, { "epoch": 0.4338482381224349, "grad_norm": 0.8943266272544861, "learning_rate": 1.8896965535651703e-05, "loss": 0.0775, "step": 24498 }, { "epoch": 0.4338659476594633, "grad_norm": 0.634781539440155, "learning_rate": 1.889613470692695e-05, "loss": 0.0877, "step": 24499 }, { "epoch": 0.43388365719649175, "grad_norm": 0.6111553907394409, "learning_rate": 1.8895303865384302e-05, "loss": 0.0672, "step": 24500 }, { "epoch": 0.43390136673352014, "grad_norm": 0.8036760091781616, "learning_rate": 1.8894473011026495e-05, "loss": 0.1011, "step": 24501 }, { "epoch": 0.4339190762705486, "grad_norm": 0.7412338256835938, "learning_rate": 1.8893642143856274e-05, "loss": 0.0832, "step": 24502 }, { "epoch": 0.43393678580757705, "grad_norm": 0.5921657681465149, "learning_rate": 1.8892811263876353e-05, "loss": 0.0897, "step": 24503 }, { "epoch": 0.43395449534460545, "grad_norm": 0.8872487545013428, "learning_rate": 1.8891980371089488e-05, "loss": 0.0828, "step": 24504 }, { "epoch": 0.4339722048816339, "grad_norm": 0.6388253569602966, "learning_rate": 1.8891149465498395e-05, "loss": 0.0828, "step": 24505 }, { "epoch": 0.4339899144186623, "grad_norm": 0.8280143141746521, "learning_rate": 1.8890318547105818e-05, "loss": 0.0519, "step": 24506 }, { "epoch": 0.43400762395569076, "grad_norm": 0.8158607482910156, "learning_rate": 1.8889487615914482e-05, "loss": 0.1005, "step": 24507 }, { "epoch": 0.43402533349271916, "grad_norm": 0.5937716960906982, "learning_rate": 1.888865667192713e-05, "loss": 0.0675, "step": 24508 }, { "epoch": 0.4340430430297476, "grad_norm": 0.5932141542434692, "learning_rate": 1.888782571514649e-05, "loss": 0.0886, "step": 24509 }, { "epoch": 0.434060752566776, "grad_norm": 0.6687312722206116, "learning_rate": 1.8886994745575294e-05, "loss": 0.0551, "step": 24510 }, { "epoch": 0.43407846210380446, "grad_norm": 0.9443049430847168, "learning_rate": 1.8886163763216277e-05, "loss": 0.0815, "step": 24511 }, { "epoch": 0.43409617164083286, "grad_norm": 0.8405393958091736, "learning_rate": 1.8885332768072178e-05, "loss": 0.0888, "step": 24512 }, { "epoch": 0.4341138811778613, "grad_norm": 0.9299356937408447, "learning_rate": 1.8884501760145725e-05, "loss": 0.0696, "step": 24513 }, { "epoch": 0.4341315907148897, "grad_norm": 0.5183059573173523, "learning_rate": 1.8883670739439656e-05, "loss": 0.0806, "step": 24514 }, { "epoch": 0.43414930025191817, "grad_norm": 0.7967883348464966, "learning_rate": 1.88828397059567e-05, "loss": 0.0834, "step": 24515 }, { "epoch": 0.43416700978894657, "grad_norm": 0.6226022243499756, "learning_rate": 1.8882008659699595e-05, "loss": 0.0597, "step": 24516 }, { "epoch": 0.434184719325975, "grad_norm": 0.6132243275642395, "learning_rate": 1.8881177600671075e-05, "loss": 0.0818, "step": 24517 }, { "epoch": 0.4342024288630035, "grad_norm": 0.5516335368156433, "learning_rate": 1.8880346528873876e-05, "loss": 0.076, "step": 24518 }, { "epoch": 0.4342201384000319, "grad_norm": 1.0155575275421143, "learning_rate": 1.8879515444310728e-05, "loss": 0.1206, "step": 24519 }, { "epoch": 0.43423784793706033, "grad_norm": 0.7759342193603516, "learning_rate": 1.8878684346984362e-05, "loss": 0.118, "step": 24520 }, { "epoch": 0.4342555574740887, "grad_norm": 0.6511762142181396, "learning_rate": 1.8877853236897523e-05, "loss": 0.0915, "step": 24521 }, { "epoch": 0.4342732670111172, "grad_norm": 0.5063493847846985, "learning_rate": 1.8877022114052935e-05, "loss": 0.0566, "step": 24522 }, { "epoch": 0.4342909765481456, "grad_norm": 0.6656315922737122, "learning_rate": 1.8876190978453338e-05, "loss": 0.0774, "step": 24523 }, { "epoch": 0.43430868608517403, "grad_norm": 0.5839117765426636, "learning_rate": 1.8875359830101467e-05, "loss": 0.0738, "step": 24524 }, { "epoch": 0.43432639562220243, "grad_norm": 0.5069788098335266, "learning_rate": 1.887452866900005e-05, "loss": 0.0433, "step": 24525 }, { "epoch": 0.4343441051592309, "grad_norm": 1.203351378440857, "learning_rate": 1.8873697495151822e-05, "loss": 0.1011, "step": 24526 }, { "epoch": 0.4343618146962593, "grad_norm": 0.4633219540119171, "learning_rate": 1.887286630855953e-05, "loss": 0.0546, "step": 24527 }, { "epoch": 0.43437952423328774, "grad_norm": 0.6139234900474548, "learning_rate": 1.8872035109225892e-05, "loss": 0.0723, "step": 24528 }, { "epoch": 0.43439723377031614, "grad_norm": 0.785327136516571, "learning_rate": 1.8871203897153657e-05, "loss": 0.0698, "step": 24529 }, { "epoch": 0.4344149433073446, "grad_norm": 0.875396192073822, "learning_rate": 1.8870372672345546e-05, "loss": 0.0665, "step": 24530 }, { "epoch": 0.434432652844373, "grad_norm": 0.8977999091148376, "learning_rate": 1.8869541434804307e-05, "loss": 0.0778, "step": 24531 }, { "epoch": 0.43445036238140144, "grad_norm": 0.7143203616142273, "learning_rate": 1.886871018453267e-05, "loss": 0.1028, "step": 24532 }, { "epoch": 0.4344680719184299, "grad_norm": 0.5818031430244446, "learning_rate": 1.8867878921533357e-05, "loss": 0.0564, "step": 24533 }, { "epoch": 0.4344857814554583, "grad_norm": 0.47788625955581665, "learning_rate": 1.886704764580912e-05, "loss": 0.0669, "step": 24534 }, { "epoch": 0.43450349099248675, "grad_norm": 0.7611202001571655, "learning_rate": 1.886621635736269e-05, "loss": 0.057, "step": 24535 }, { "epoch": 0.43452120052951515, "grad_norm": 1.3240324258804321, "learning_rate": 1.8865385056196797e-05, "loss": 0.1107, "step": 24536 }, { "epoch": 0.4345389100665436, "grad_norm": 0.9545243978500366, "learning_rate": 1.8864553742314173e-05, "loss": 0.1026, "step": 24537 }, { "epoch": 0.434556619603572, "grad_norm": 0.7263143658638, "learning_rate": 1.8863722415717564e-05, "loss": 0.0963, "step": 24538 }, { "epoch": 0.43457432914060046, "grad_norm": 0.9666990041732788, "learning_rate": 1.8862891076409698e-05, "loss": 0.0921, "step": 24539 }, { "epoch": 0.43459203867762886, "grad_norm": 0.7000693082809448, "learning_rate": 1.8862059724393305e-05, "loss": 0.087, "step": 24540 }, { "epoch": 0.4346097482146573, "grad_norm": 0.5954986214637756, "learning_rate": 1.8861228359671133e-05, "loss": 0.0467, "step": 24541 }, { "epoch": 0.4346274577516857, "grad_norm": 0.7301366329193115, "learning_rate": 1.886039698224591e-05, "loss": 0.0705, "step": 24542 }, { "epoch": 0.43464516728871416, "grad_norm": 0.5676435232162476, "learning_rate": 1.8859565592120373e-05, "loss": 0.1063, "step": 24543 }, { "epoch": 0.43466287682574256, "grad_norm": 0.4932704567909241, "learning_rate": 1.8858734189297247e-05, "loss": 0.0718, "step": 24544 }, { "epoch": 0.434680586362771, "grad_norm": 0.5431851744651794, "learning_rate": 1.8857902773779285e-05, "loss": 0.053, "step": 24545 }, { "epoch": 0.4346982958997994, "grad_norm": 0.61458420753479, "learning_rate": 1.885707134556921e-05, "loss": 0.076, "step": 24546 }, { "epoch": 0.43471600543682787, "grad_norm": 0.45973870158195496, "learning_rate": 1.885623990466976e-05, "loss": 0.0445, "step": 24547 }, { "epoch": 0.4347337149738563, "grad_norm": 0.6611375212669373, "learning_rate": 1.8855408451083667e-05, "loss": 0.0544, "step": 24548 }, { "epoch": 0.4347514245108847, "grad_norm": 0.7456547617912292, "learning_rate": 1.8854576984813673e-05, "loss": 0.0606, "step": 24549 }, { "epoch": 0.4347691340479132, "grad_norm": 0.6097344160079956, "learning_rate": 1.8853745505862515e-05, "loss": 0.074, "step": 24550 }, { "epoch": 0.4347868435849416, "grad_norm": 1.1265637874603271, "learning_rate": 1.885291401423292e-05, "loss": 0.1079, "step": 24551 }, { "epoch": 0.43480455312197, "grad_norm": 0.7660226225852966, "learning_rate": 1.885208250992763e-05, "loss": 0.0752, "step": 24552 }, { "epoch": 0.4348222626589984, "grad_norm": 0.434676855802536, "learning_rate": 1.8851250992949375e-05, "loss": 0.0718, "step": 24553 }, { "epoch": 0.4348399721960269, "grad_norm": 0.46687036752700806, "learning_rate": 1.8850419463300893e-05, "loss": 0.063, "step": 24554 }, { "epoch": 0.4348576817330553, "grad_norm": 0.6252298951148987, "learning_rate": 1.8849587920984918e-05, "loss": 0.08, "step": 24555 }, { "epoch": 0.43487539127008373, "grad_norm": 0.8066558241844177, "learning_rate": 1.8848756366004198e-05, "loss": 0.1034, "step": 24556 }, { "epoch": 0.43489310080711213, "grad_norm": 0.8642601370811462, "learning_rate": 1.8847924798361453e-05, "loss": 0.0912, "step": 24557 }, { "epoch": 0.4349108103441406, "grad_norm": 0.6685806512832642, "learning_rate": 1.8847093218059425e-05, "loss": 0.0761, "step": 24558 }, { "epoch": 0.434928519881169, "grad_norm": 0.773901641368866, "learning_rate": 1.8846261625100848e-05, "loss": 0.0591, "step": 24559 }, { "epoch": 0.43494622941819744, "grad_norm": 0.7995530962944031, "learning_rate": 1.8845430019488462e-05, "loss": 0.1183, "step": 24560 }, { "epoch": 0.43496393895522584, "grad_norm": 0.5361754298210144, "learning_rate": 1.8844598401225e-05, "loss": 0.0744, "step": 24561 }, { "epoch": 0.4349816484922543, "grad_norm": 0.7955485582351685, "learning_rate": 1.8843766770313193e-05, "loss": 0.0748, "step": 24562 }, { "epoch": 0.43499935802928275, "grad_norm": 0.6097567081451416, "learning_rate": 1.8842935126755788e-05, "loss": 0.0802, "step": 24563 }, { "epoch": 0.43501706756631114, "grad_norm": 0.5365581512451172, "learning_rate": 1.8842103470555515e-05, "loss": 0.0687, "step": 24564 }, { "epoch": 0.4350347771033396, "grad_norm": 0.48313072323799133, "learning_rate": 1.8841271801715105e-05, "loss": 0.0554, "step": 24565 }, { "epoch": 0.435052486640368, "grad_norm": 0.6023468971252441, "learning_rate": 1.8840440120237306e-05, "loss": 0.0839, "step": 24566 }, { "epoch": 0.43507019617739645, "grad_norm": 0.8336750864982605, "learning_rate": 1.8839608426124844e-05, "loss": 0.0758, "step": 24567 }, { "epoch": 0.43508790571442485, "grad_norm": 0.8222293853759766, "learning_rate": 1.8838776719380458e-05, "loss": 0.0971, "step": 24568 }, { "epoch": 0.4351056152514533, "grad_norm": 0.541276752948761, "learning_rate": 1.8837945000006882e-05, "loss": 0.0739, "step": 24569 }, { "epoch": 0.4351233247884817, "grad_norm": 0.7027677297592163, "learning_rate": 1.883711326800686e-05, "loss": 0.0556, "step": 24570 }, { "epoch": 0.43514103432551016, "grad_norm": 0.592714250087738, "learning_rate": 1.8836281523383124e-05, "loss": 0.0485, "step": 24571 }, { "epoch": 0.43515874386253856, "grad_norm": 0.41694048047065735, "learning_rate": 1.8835449766138407e-05, "loss": 0.0646, "step": 24572 }, { "epoch": 0.435176453399567, "grad_norm": 0.7183693647384644, "learning_rate": 1.8834617996275447e-05, "loss": 0.0593, "step": 24573 }, { "epoch": 0.4351941629365954, "grad_norm": 0.6364453434944153, "learning_rate": 1.8833786213796985e-05, "loss": 0.0457, "step": 24574 }, { "epoch": 0.43521187247362386, "grad_norm": 0.8364648222923279, "learning_rate": 1.883295441870575e-05, "loss": 0.0616, "step": 24575 }, { "epoch": 0.43522958201065226, "grad_norm": 0.6777885556221008, "learning_rate": 1.8832122611004488e-05, "loss": 0.0597, "step": 24576 }, { "epoch": 0.4352472915476807, "grad_norm": 0.9390044808387756, "learning_rate": 1.883129079069593e-05, "loss": 0.1073, "step": 24577 }, { "epoch": 0.43526500108470917, "grad_norm": 0.6683016419410706, "learning_rate": 1.8830458957782807e-05, "loss": 0.0925, "step": 24578 }, { "epoch": 0.43528271062173757, "grad_norm": 0.565514087677002, "learning_rate": 1.8829627112267865e-05, "loss": 0.0498, "step": 24579 }, { "epoch": 0.435300420158766, "grad_norm": 0.6020559072494507, "learning_rate": 1.882879525415384e-05, "loss": 0.108, "step": 24580 }, { "epoch": 0.4353181296957944, "grad_norm": 0.8162083029747009, "learning_rate": 1.8827963383443462e-05, "loss": 0.0915, "step": 24581 }, { "epoch": 0.4353358392328229, "grad_norm": 0.7507932782173157, "learning_rate": 1.882713150013947e-05, "loss": 0.0967, "step": 24582 }, { "epoch": 0.4353535487698513, "grad_norm": 0.842430055141449, "learning_rate": 1.8826299604244604e-05, "loss": 0.1147, "step": 24583 }, { "epoch": 0.4353712583068797, "grad_norm": 0.9160561561584473, "learning_rate": 1.8825467695761605e-05, "loss": 0.0683, "step": 24584 }, { "epoch": 0.4353889678439081, "grad_norm": 0.587435245513916, "learning_rate": 1.8824635774693198e-05, "loss": 0.0647, "step": 24585 }, { "epoch": 0.4354066773809366, "grad_norm": 0.8140060901641846, "learning_rate": 1.882380384104213e-05, "loss": 0.1487, "step": 24586 }, { "epoch": 0.435424386917965, "grad_norm": 0.6284701824188232, "learning_rate": 1.8822971894811128e-05, "loss": 0.0629, "step": 24587 }, { "epoch": 0.43544209645499343, "grad_norm": 0.6311659812927246, "learning_rate": 1.8822139936002937e-05, "loss": 0.0808, "step": 24588 }, { "epoch": 0.43545980599202183, "grad_norm": 0.7662092447280884, "learning_rate": 1.88213079646203e-05, "loss": 0.066, "step": 24589 }, { "epoch": 0.4354775155290503, "grad_norm": 0.6939205527305603, "learning_rate": 1.882047598066594e-05, "loss": 0.0969, "step": 24590 }, { "epoch": 0.4354952250660787, "grad_norm": 0.5340505242347717, "learning_rate": 1.88196439841426e-05, "loss": 0.0786, "step": 24591 }, { "epoch": 0.43551293460310714, "grad_norm": 0.7817367315292358, "learning_rate": 1.8818811975053012e-05, "loss": 0.0923, "step": 24592 }, { "epoch": 0.4355306441401356, "grad_norm": 0.5755372643470764, "learning_rate": 1.881797995339993e-05, "loss": 0.093, "step": 24593 }, { "epoch": 0.435548353677164, "grad_norm": 0.8364596366882324, "learning_rate": 1.8817147919186075e-05, "loss": 0.0732, "step": 24594 }, { "epoch": 0.43556606321419244, "grad_norm": 0.437050461769104, "learning_rate": 1.881631587241419e-05, "loss": 0.0563, "step": 24595 }, { "epoch": 0.43558377275122084, "grad_norm": 0.798275351524353, "learning_rate": 1.8815483813087014e-05, "loss": 0.1005, "step": 24596 }, { "epoch": 0.4356014822882493, "grad_norm": 0.38769951462745667, "learning_rate": 1.8814651741207277e-05, "loss": 0.0684, "step": 24597 }, { "epoch": 0.4356191918252777, "grad_norm": 0.7197138071060181, "learning_rate": 1.8813819656777727e-05, "loss": 0.0785, "step": 24598 }, { "epoch": 0.43563690136230615, "grad_norm": 0.5485959053039551, "learning_rate": 1.8812987559801092e-05, "loss": 0.0932, "step": 24599 }, { "epoch": 0.43565461089933455, "grad_norm": 1.6069364547729492, "learning_rate": 1.881215545028012e-05, "loss": 0.088, "step": 24600 }, { "epoch": 0.435672320436363, "grad_norm": 0.8599053025245667, "learning_rate": 1.8811323328217532e-05, "loss": 0.1139, "step": 24601 }, { "epoch": 0.4356900299733914, "grad_norm": 0.6379039883613586, "learning_rate": 1.8810491193616083e-05, "loss": 0.0687, "step": 24602 }, { "epoch": 0.43570773951041986, "grad_norm": 0.6370434165000916, "learning_rate": 1.8809659046478502e-05, "loss": 0.1003, "step": 24603 }, { "epoch": 0.43572544904744825, "grad_norm": 0.6407108902931213, "learning_rate": 1.8808826886807527e-05, "loss": 0.0834, "step": 24604 }, { "epoch": 0.4357431585844767, "grad_norm": 0.8044488430023193, "learning_rate": 1.88079947146059e-05, "loss": 0.0753, "step": 24605 }, { "epoch": 0.4357608681215051, "grad_norm": 0.5469511151313782, "learning_rate": 1.880716252987635e-05, "loss": 0.0485, "step": 24606 }, { "epoch": 0.43577857765853356, "grad_norm": 0.7590671181678772, "learning_rate": 1.8806330332621624e-05, "loss": 0.0638, "step": 24607 }, { "epoch": 0.435796287195562, "grad_norm": 1.2466871738433838, "learning_rate": 1.8805498122844452e-05, "loss": 0.0939, "step": 24608 }, { "epoch": 0.4358139967325904, "grad_norm": 0.5563588738441467, "learning_rate": 1.8804665900547585e-05, "loss": 0.0886, "step": 24609 }, { "epoch": 0.43583170626961887, "grad_norm": 0.8804448843002319, "learning_rate": 1.8803833665733742e-05, "loss": 0.0771, "step": 24610 }, { "epoch": 0.43584941580664727, "grad_norm": 0.5858412384986877, "learning_rate": 1.8803001418405675e-05, "loss": 0.0742, "step": 24611 }, { "epoch": 0.4358671253436757, "grad_norm": 1.462314248085022, "learning_rate": 1.880216915856612e-05, "loss": 0.0928, "step": 24612 }, { "epoch": 0.4358848348807041, "grad_norm": 0.4828566312789917, "learning_rate": 1.880133688621781e-05, "loss": 0.0629, "step": 24613 }, { "epoch": 0.4359025444177326, "grad_norm": 0.6947767734527588, "learning_rate": 1.8800504601363487e-05, "loss": 0.0826, "step": 24614 }, { "epoch": 0.435920253954761, "grad_norm": 0.935832679271698, "learning_rate": 1.879967230400589e-05, "loss": 0.063, "step": 24615 }, { "epoch": 0.4359379634917894, "grad_norm": 0.5748955607414246, "learning_rate": 1.8798839994147754e-05, "loss": 0.0763, "step": 24616 }, { "epoch": 0.4359556730288178, "grad_norm": 0.5830109715461731, "learning_rate": 1.8798007671791816e-05, "loss": 0.0669, "step": 24617 }, { "epoch": 0.4359733825658463, "grad_norm": 0.9687357544898987, "learning_rate": 1.879717533694082e-05, "loss": 0.0857, "step": 24618 }, { "epoch": 0.4359910921028747, "grad_norm": 0.7876817584037781, "learning_rate": 1.8796342989597506e-05, "loss": 0.0836, "step": 24619 }, { "epoch": 0.43600880163990313, "grad_norm": 1.047867774963379, "learning_rate": 1.87955106297646e-05, "loss": 0.1241, "step": 24620 }, { "epoch": 0.43602651117693153, "grad_norm": 0.679247260093689, "learning_rate": 1.879467825744485e-05, "loss": 0.0605, "step": 24621 }, { "epoch": 0.43604422071396, "grad_norm": 0.44870638847351074, "learning_rate": 1.8793845872640992e-05, "loss": 0.069, "step": 24622 }, { "epoch": 0.43606193025098844, "grad_norm": 0.495829701423645, "learning_rate": 1.8793013475355768e-05, "loss": 0.057, "step": 24623 }, { "epoch": 0.43607963978801684, "grad_norm": 0.6074089407920837, "learning_rate": 1.8792181065591908e-05, "loss": 0.0517, "step": 24624 }, { "epoch": 0.4360973493250453, "grad_norm": 0.77145916223526, "learning_rate": 1.879134864335216e-05, "loss": 0.1001, "step": 24625 }, { "epoch": 0.4361150588620737, "grad_norm": 0.7402048110961914, "learning_rate": 1.8790516208639258e-05, "loss": 0.0752, "step": 24626 }, { "epoch": 0.43613276839910214, "grad_norm": 0.5586828589439392, "learning_rate": 1.8789683761455942e-05, "loss": 0.0825, "step": 24627 }, { "epoch": 0.43615047793613054, "grad_norm": 0.8004738688468933, "learning_rate": 1.878885130180495e-05, "loss": 0.069, "step": 24628 }, { "epoch": 0.436168187473159, "grad_norm": 0.5768329501152039, "learning_rate": 1.878801882968902e-05, "loss": 0.0636, "step": 24629 }, { "epoch": 0.4361858970101874, "grad_norm": 0.7308055758476257, "learning_rate": 1.8787186345110886e-05, "loss": 0.0732, "step": 24630 }, { "epoch": 0.43620360654721585, "grad_norm": 0.6152181029319763, "learning_rate": 1.8786353848073292e-05, "loss": 0.0847, "step": 24631 }, { "epoch": 0.43622131608424425, "grad_norm": 0.753940224647522, "learning_rate": 1.8785521338578984e-05, "loss": 0.0824, "step": 24632 }, { "epoch": 0.4362390256212727, "grad_norm": 0.9093393683433533, "learning_rate": 1.878468881663069e-05, "loss": 0.0518, "step": 24633 }, { "epoch": 0.4362567351583011, "grad_norm": 0.5704606175422668, "learning_rate": 1.8783856282231152e-05, "loss": 0.0658, "step": 24634 }, { "epoch": 0.43627444469532956, "grad_norm": 0.5974199175834656, "learning_rate": 1.8783023735383108e-05, "loss": 0.0679, "step": 24635 }, { "epoch": 0.43629215423235795, "grad_norm": 0.720427930355072, "learning_rate": 1.8782191176089303e-05, "loss": 0.0885, "step": 24636 }, { "epoch": 0.4363098637693864, "grad_norm": 0.5097323060035706, "learning_rate": 1.878135860435247e-05, "loss": 0.0442, "step": 24637 }, { "epoch": 0.43632757330641486, "grad_norm": 0.45860281586647034, "learning_rate": 1.8780526020175348e-05, "loss": 0.0925, "step": 24638 }, { "epoch": 0.43634528284344326, "grad_norm": 0.9011431932449341, "learning_rate": 1.8779693423560675e-05, "loss": 0.0893, "step": 24639 }, { "epoch": 0.4363629923804717, "grad_norm": 0.9583885073661804, "learning_rate": 1.8778860814511197e-05, "loss": 0.0985, "step": 24640 }, { "epoch": 0.4363807019175001, "grad_norm": 0.6229115128517151, "learning_rate": 1.8778028193029653e-05, "loss": 0.0728, "step": 24641 }, { "epoch": 0.43639841145452857, "grad_norm": 0.5837810635566711, "learning_rate": 1.8777195559118774e-05, "loss": 0.081, "step": 24642 }, { "epoch": 0.43641612099155697, "grad_norm": 0.6189839243888855, "learning_rate": 1.8776362912781303e-05, "loss": 0.0847, "step": 24643 }, { "epoch": 0.4364338305285854, "grad_norm": 0.5493304133415222, "learning_rate": 1.877553025401998e-05, "loss": 0.0784, "step": 24644 }, { "epoch": 0.4364515400656138, "grad_norm": 0.38489073514938354, "learning_rate": 1.8774697582837547e-05, "loss": 0.0552, "step": 24645 }, { "epoch": 0.4364692496026423, "grad_norm": 0.5691249966621399, "learning_rate": 1.8773864899236738e-05, "loss": 0.0584, "step": 24646 }, { "epoch": 0.43648695913967067, "grad_norm": 0.6040869355201721, "learning_rate": 1.8773032203220296e-05, "loss": 0.1175, "step": 24647 }, { "epoch": 0.4365046686766991, "grad_norm": 0.6099078059196472, "learning_rate": 1.8772199494790962e-05, "loss": 0.0696, "step": 24648 }, { "epoch": 0.4365223782137275, "grad_norm": 0.5014967322349548, "learning_rate": 1.8771366773951468e-05, "loss": 0.0817, "step": 24649 }, { "epoch": 0.436540087750756, "grad_norm": 0.7921810150146484, "learning_rate": 1.8770534040704564e-05, "loss": 0.0467, "step": 24650 }, { "epoch": 0.4365577972877844, "grad_norm": 0.44626718759536743, "learning_rate": 1.876970129505298e-05, "loss": 0.0683, "step": 24651 }, { "epoch": 0.43657550682481283, "grad_norm": 0.6309026479721069, "learning_rate": 1.8768868536999463e-05, "loss": 0.0864, "step": 24652 }, { "epoch": 0.4365932163618413, "grad_norm": 0.5479763746261597, "learning_rate": 1.8768035766546748e-05, "loss": 0.0667, "step": 24653 }, { "epoch": 0.4366109258988697, "grad_norm": 0.7838695645332336, "learning_rate": 1.8767202983697574e-05, "loss": 0.0776, "step": 24654 }, { "epoch": 0.43662863543589814, "grad_norm": 1.152587652206421, "learning_rate": 1.876637018845469e-05, "loss": 0.071, "step": 24655 }, { "epoch": 0.43664634497292654, "grad_norm": 0.7530964016914368, "learning_rate": 1.8765537380820826e-05, "loss": 0.0544, "step": 24656 }, { "epoch": 0.436664054509955, "grad_norm": 0.889580249786377, "learning_rate": 1.8764704560798726e-05, "loss": 0.1045, "step": 24657 }, { "epoch": 0.4366817640469834, "grad_norm": 0.5066052079200745, "learning_rate": 1.8763871728391125e-05, "loss": 0.0632, "step": 24658 }, { "epoch": 0.43669947358401184, "grad_norm": 0.47865796089172363, "learning_rate": 1.876303888360077e-05, "loss": 0.0363, "step": 24659 }, { "epoch": 0.43671718312104024, "grad_norm": 0.4039372205734253, "learning_rate": 1.8762206026430394e-05, "loss": 0.0642, "step": 24660 }, { "epoch": 0.4367348926580687, "grad_norm": 0.9356088042259216, "learning_rate": 1.8761373156882742e-05, "loss": 0.0836, "step": 24661 }, { "epoch": 0.4367526021950971, "grad_norm": 0.839848518371582, "learning_rate": 1.8760540274960556e-05, "loss": 0.0868, "step": 24662 }, { "epoch": 0.43677031173212555, "grad_norm": 0.6545610427856445, "learning_rate": 1.8759707380666573e-05, "loss": 0.0832, "step": 24663 }, { "epoch": 0.43678802126915395, "grad_norm": 0.544086754322052, "learning_rate": 1.875887447400353e-05, "loss": 0.0831, "step": 24664 }, { "epoch": 0.4368057308061824, "grad_norm": 0.7438899278640747, "learning_rate": 1.875804155497417e-05, "loss": 0.073, "step": 24665 }, { "epoch": 0.4368234403432108, "grad_norm": 0.5540844798088074, "learning_rate": 1.8757208623581233e-05, "loss": 0.0744, "step": 24666 }, { "epoch": 0.43684114988023925, "grad_norm": 0.8052344918251038, "learning_rate": 1.8756375679827468e-05, "loss": 0.0911, "step": 24667 }, { "epoch": 0.4368588594172677, "grad_norm": 0.6656730771064758, "learning_rate": 1.875554272371559e-05, "loss": 0.08, "step": 24668 }, { "epoch": 0.4368765689542961, "grad_norm": 0.6714646220207214, "learning_rate": 1.875470975524837e-05, "loss": 0.0697, "step": 24669 }, { "epoch": 0.43689427849132456, "grad_norm": 0.8555816411972046, "learning_rate": 1.875387677442853e-05, "loss": 0.0863, "step": 24670 }, { "epoch": 0.43691198802835296, "grad_norm": 0.8089169263839722, "learning_rate": 1.875304378125882e-05, "loss": 0.0923, "step": 24671 }, { "epoch": 0.4369296975653814, "grad_norm": 0.48694369196891785, "learning_rate": 1.8752210775741966e-05, "loss": 0.0879, "step": 24672 }, { "epoch": 0.4369474071024098, "grad_norm": 0.6006914377212524, "learning_rate": 1.8751377757880722e-05, "loss": 0.0745, "step": 24673 }, { "epoch": 0.43696511663943827, "grad_norm": 1.0189403295516968, "learning_rate": 1.8750544727677833e-05, "loss": 0.0547, "step": 24674 }, { "epoch": 0.43698282617646667, "grad_norm": 0.8699588179588318, "learning_rate": 1.874971168513602e-05, "loss": 0.091, "step": 24675 }, { "epoch": 0.4370005357134951, "grad_norm": 0.8180283308029175, "learning_rate": 1.8748878630258045e-05, "loss": 0.1061, "step": 24676 }, { "epoch": 0.4370182452505235, "grad_norm": 0.5252681374549866, "learning_rate": 1.874804556304663e-05, "loss": 0.0759, "step": 24677 }, { "epoch": 0.437035954787552, "grad_norm": 0.5875066518783569, "learning_rate": 1.8747212483504524e-05, "loss": 0.045, "step": 24678 }, { "epoch": 0.43705366432458037, "grad_norm": 0.7228789329528809, "learning_rate": 1.874637939163447e-05, "loss": 0.0674, "step": 24679 }, { "epoch": 0.4370713738616088, "grad_norm": 0.48700249195098877, "learning_rate": 1.8745546287439212e-05, "loss": 0.0673, "step": 24680 }, { "epoch": 0.4370890833986372, "grad_norm": 0.5336323976516724, "learning_rate": 1.874471317092148e-05, "loss": 0.0813, "step": 24681 }, { "epoch": 0.4371067929356657, "grad_norm": 0.7046190500259399, "learning_rate": 1.8743880042084023e-05, "loss": 0.1019, "step": 24682 }, { "epoch": 0.43712450247269413, "grad_norm": 0.3096661865711212, "learning_rate": 1.8743046900929575e-05, "loss": 0.0459, "step": 24683 }, { "epoch": 0.43714221200972253, "grad_norm": 0.7703930735588074, "learning_rate": 1.8742213747460884e-05, "loss": 0.0721, "step": 24684 }, { "epoch": 0.437159921546751, "grad_norm": 0.5269794464111328, "learning_rate": 1.874138058168069e-05, "loss": 0.0701, "step": 24685 }, { "epoch": 0.4371776310837794, "grad_norm": 0.6691704392433167, "learning_rate": 1.874054740359173e-05, "loss": 0.1094, "step": 24686 }, { "epoch": 0.43719534062080784, "grad_norm": 0.6963305473327637, "learning_rate": 1.8739714213196747e-05, "loss": 0.0955, "step": 24687 }, { "epoch": 0.43721305015783624, "grad_norm": 0.46999645233154297, "learning_rate": 1.8738881010498485e-05, "loss": 0.074, "step": 24688 }, { "epoch": 0.4372307596948647, "grad_norm": 0.676051139831543, "learning_rate": 1.873804779549968e-05, "loss": 0.0916, "step": 24689 }, { "epoch": 0.4372484692318931, "grad_norm": 0.928425133228302, "learning_rate": 1.873721456820308e-05, "loss": 0.0851, "step": 24690 }, { "epoch": 0.43726617876892154, "grad_norm": 0.629015326499939, "learning_rate": 1.873638132861142e-05, "loss": 0.0757, "step": 24691 }, { "epoch": 0.43728388830594994, "grad_norm": 0.6779038906097412, "learning_rate": 1.873554807672744e-05, "loss": 0.0842, "step": 24692 }, { "epoch": 0.4373015978429784, "grad_norm": 0.6845101714134216, "learning_rate": 1.8734714812553885e-05, "loss": 0.058, "step": 24693 }, { "epoch": 0.4373193073800068, "grad_norm": 0.5229335427284241, "learning_rate": 1.87338815360935e-05, "loss": 0.0882, "step": 24694 }, { "epoch": 0.43733701691703525, "grad_norm": 0.4907912313938141, "learning_rate": 1.8733048247349023e-05, "loss": 0.0596, "step": 24695 }, { "epoch": 0.43735472645406365, "grad_norm": 1.9382041692733765, "learning_rate": 1.8732214946323195e-05, "loss": 0.0628, "step": 24696 }, { "epoch": 0.4373724359910921, "grad_norm": 0.8604539632797241, "learning_rate": 1.8731381633018746e-05, "loss": 0.0836, "step": 24697 }, { "epoch": 0.43739014552812056, "grad_norm": 0.6625440120697021, "learning_rate": 1.8730548307438443e-05, "loss": 0.0761, "step": 24698 }, { "epoch": 0.43740785506514895, "grad_norm": 0.6761645674705505, "learning_rate": 1.8729714969585008e-05, "loss": 0.0555, "step": 24699 }, { "epoch": 0.4374255646021774, "grad_norm": 0.629441499710083, "learning_rate": 1.872888161946119e-05, "loss": 0.0741, "step": 24700 }, { "epoch": 0.4374432741392058, "grad_norm": 0.5222582817077637, "learning_rate": 1.8728048257069724e-05, "loss": 0.0847, "step": 24701 }, { "epoch": 0.43746098367623426, "grad_norm": 0.5763585567474365, "learning_rate": 1.8727214882413355e-05, "loss": 0.0694, "step": 24702 }, { "epoch": 0.43747869321326266, "grad_norm": 0.40761488676071167, "learning_rate": 1.8726381495494835e-05, "loss": 0.0729, "step": 24703 }, { "epoch": 0.4374964027502911, "grad_norm": 0.8578798770904541, "learning_rate": 1.872554809631689e-05, "loss": 0.0996, "step": 24704 }, { "epoch": 0.4375141122873195, "grad_norm": 0.33744168281555176, "learning_rate": 1.872471468488227e-05, "loss": 0.0865, "step": 24705 }, { "epoch": 0.43753182182434797, "grad_norm": 0.6401466131210327, "learning_rate": 1.8723881261193716e-05, "loss": 0.0728, "step": 24706 }, { "epoch": 0.43754953136137636, "grad_norm": 0.5640740990638733, "learning_rate": 1.8723047825253967e-05, "loss": 0.0947, "step": 24707 }, { "epoch": 0.4375672408984048, "grad_norm": 0.6767343878746033, "learning_rate": 1.8722214377065773e-05, "loss": 0.0418, "step": 24708 }, { "epoch": 0.4375849504354332, "grad_norm": 0.6980544328689575, "learning_rate": 1.8721380916631864e-05, "loss": 0.1113, "step": 24709 }, { "epoch": 0.43760265997246167, "grad_norm": 0.7066517472267151, "learning_rate": 1.8720547443954992e-05, "loss": 0.1189, "step": 24710 }, { "epoch": 0.4376203695094901, "grad_norm": 0.5039361119270325, "learning_rate": 1.8719713959037895e-05, "loss": 0.0788, "step": 24711 }, { "epoch": 0.4376380790465185, "grad_norm": 0.7982649803161621, "learning_rate": 1.8718880461883312e-05, "loss": 0.1209, "step": 24712 }, { "epoch": 0.437655788583547, "grad_norm": 0.40280476212501526, "learning_rate": 1.8718046952493986e-05, "loss": 0.072, "step": 24713 }, { "epoch": 0.4376734981205754, "grad_norm": 0.6533535122871399, "learning_rate": 1.871721343087267e-05, "loss": 0.109, "step": 24714 }, { "epoch": 0.43769120765760383, "grad_norm": 0.7886252403259277, "learning_rate": 1.87163798970221e-05, "loss": 0.0814, "step": 24715 }, { "epoch": 0.43770891719463223, "grad_norm": 0.6650736331939697, "learning_rate": 1.8715546350945005e-05, "loss": 0.0478, "step": 24716 }, { "epoch": 0.4377266267316607, "grad_norm": 0.900471031665802, "learning_rate": 1.8714712792644144e-05, "loss": 0.0699, "step": 24717 }, { "epoch": 0.4377443362686891, "grad_norm": 0.6910430192947388, "learning_rate": 1.871387922212225e-05, "loss": 0.0729, "step": 24718 }, { "epoch": 0.43776204580571754, "grad_norm": 1.2452534437179565, "learning_rate": 1.8713045639382073e-05, "loss": 0.0976, "step": 24719 }, { "epoch": 0.43777975534274594, "grad_norm": 0.6288771629333496, "learning_rate": 1.871221204442635e-05, "loss": 0.0733, "step": 24720 }, { "epoch": 0.4377974648797744, "grad_norm": 0.640597403049469, "learning_rate": 1.8711378437257823e-05, "loss": 0.0595, "step": 24721 }, { "epoch": 0.4378151744168028, "grad_norm": 0.6036666631698608, "learning_rate": 1.8710544817879242e-05, "loss": 0.08, "step": 24722 }, { "epoch": 0.43783288395383124, "grad_norm": 0.7648670673370361, "learning_rate": 1.8709711186293338e-05, "loss": 0.0761, "step": 24723 }, { "epoch": 0.43785059349085964, "grad_norm": 0.9298645257949829, "learning_rate": 1.870887754250286e-05, "loss": 0.131, "step": 24724 }, { "epoch": 0.4378683030278881, "grad_norm": 0.45415598154067993, "learning_rate": 1.8708043886510553e-05, "loss": 0.0712, "step": 24725 }, { "epoch": 0.43788601256491655, "grad_norm": 0.5738952159881592, "learning_rate": 1.8707210218319158e-05, "loss": 0.0365, "step": 24726 }, { "epoch": 0.43790372210194495, "grad_norm": 0.608036458492279, "learning_rate": 1.870637653793141e-05, "loss": 0.0609, "step": 24727 }, { "epoch": 0.4379214316389734, "grad_norm": 0.6245426535606384, "learning_rate": 1.8705542845350063e-05, "loss": 0.0815, "step": 24728 }, { "epoch": 0.4379391411760018, "grad_norm": 0.8687123656272888, "learning_rate": 1.8704709140577856e-05, "loss": 0.0529, "step": 24729 }, { "epoch": 0.43795685071303025, "grad_norm": 0.5868659615516663, "learning_rate": 1.8703875423617525e-05, "loss": 0.0821, "step": 24730 }, { "epoch": 0.43797456025005865, "grad_norm": 0.7994272708892822, "learning_rate": 1.8703041694471823e-05, "loss": 0.0808, "step": 24731 }, { "epoch": 0.4379922697870871, "grad_norm": 0.8080021142959595, "learning_rate": 1.8702207953143486e-05, "loss": 0.0635, "step": 24732 }, { "epoch": 0.4380099793241155, "grad_norm": 0.6226429343223572, "learning_rate": 1.8701374199635263e-05, "loss": 0.079, "step": 24733 }, { "epoch": 0.43802768886114396, "grad_norm": 0.7992799878120422, "learning_rate": 1.870054043394989e-05, "loss": 0.0774, "step": 24734 }, { "epoch": 0.43804539839817236, "grad_norm": 0.6101059317588806, "learning_rate": 1.8699706656090114e-05, "loss": 0.0985, "step": 24735 }, { "epoch": 0.4380631079352008, "grad_norm": 0.7281714081764221, "learning_rate": 1.8698872866058674e-05, "loss": 0.0744, "step": 24736 }, { "epoch": 0.4380808174722292, "grad_norm": 1.1064693927764893, "learning_rate": 1.869803906385832e-05, "loss": 0.0959, "step": 24737 }, { "epoch": 0.43809852700925767, "grad_norm": 1.2296112775802612, "learning_rate": 1.8697205249491794e-05, "loss": 0.1034, "step": 24738 }, { "epoch": 0.43811623654628606, "grad_norm": 0.8757676482200623, "learning_rate": 1.8696371422961833e-05, "loss": 0.0857, "step": 24739 }, { "epoch": 0.4381339460833145, "grad_norm": 0.6807526350021362, "learning_rate": 1.8695537584271186e-05, "loss": 0.0713, "step": 24740 }, { "epoch": 0.438151655620343, "grad_norm": 0.6193358302116394, "learning_rate": 1.869470373342259e-05, "loss": 0.099, "step": 24741 }, { "epoch": 0.43816936515737137, "grad_norm": 0.7303386926651001, "learning_rate": 1.8693869870418798e-05, "loss": 0.0917, "step": 24742 }, { "epoch": 0.4381870746943998, "grad_norm": 1.8304622173309326, "learning_rate": 1.8693035995262545e-05, "loss": 0.0673, "step": 24743 }, { "epoch": 0.4382047842314282, "grad_norm": 0.5250474810600281, "learning_rate": 1.8692202107956578e-05, "loss": 0.0895, "step": 24744 }, { "epoch": 0.4382224937684567, "grad_norm": 0.5278113484382629, "learning_rate": 1.8691368208503636e-05, "loss": 0.0746, "step": 24745 }, { "epoch": 0.4382402033054851, "grad_norm": 0.5520843267440796, "learning_rate": 1.8690534296906467e-05, "loss": 0.0712, "step": 24746 }, { "epoch": 0.43825791284251353, "grad_norm": 0.6882482767105103, "learning_rate": 1.8689700373167817e-05, "loss": 0.0531, "step": 24747 }, { "epoch": 0.43827562237954193, "grad_norm": 0.6993667483329773, "learning_rate": 1.8688866437290422e-05, "loss": 0.0721, "step": 24748 }, { "epoch": 0.4382933319165704, "grad_norm": 0.4875580370426178, "learning_rate": 1.868803248927703e-05, "loss": 0.0638, "step": 24749 }, { "epoch": 0.4383110414535988, "grad_norm": 0.5684603452682495, "learning_rate": 1.8687198529130387e-05, "loss": 0.0573, "step": 24750 }, { "epoch": 0.43832875099062724, "grad_norm": 0.8678379654884338, "learning_rate": 1.868636455685323e-05, "loss": 0.0653, "step": 24751 }, { "epoch": 0.43834646052765563, "grad_norm": 0.739845335483551, "learning_rate": 1.868553057244831e-05, "loss": 0.0473, "step": 24752 }, { "epoch": 0.4383641700646841, "grad_norm": 0.6822121143341064, "learning_rate": 1.868469657591837e-05, "loss": 0.0929, "step": 24753 }, { "epoch": 0.4383818796017125, "grad_norm": 0.706684947013855, "learning_rate": 1.8683862567266146e-05, "loss": 0.0678, "step": 24754 }, { "epoch": 0.43839958913874094, "grad_norm": 0.7457706332206726, "learning_rate": 1.8683028546494386e-05, "loss": 0.0784, "step": 24755 }, { "epoch": 0.4384172986757694, "grad_norm": 0.8504682183265686, "learning_rate": 1.8682194513605836e-05, "loss": 0.0623, "step": 24756 }, { "epoch": 0.4384350082127978, "grad_norm": 0.894375205039978, "learning_rate": 1.868136046860324e-05, "loss": 0.0805, "step": 24757 }, { "epoch": 0.43845271774982625, "grad_norm": 0.4602794945240021, "learning_rate": 1.868052641148934e-05, "loss": 0.0657, "step": 24758 }, { "epoch": 0.43847042728685465, "grad_norm": 1.020032525062561, "learning_rate": 1.8679692342266876e-05, "loss": 0.0904, "step": 24759 }, { "epoch": 0.4384881368238831, "grad_norm": 0.8825402855873108, "learning_rate": 1.86788582609386e-05, "loss": 0.0925, "step": 24760 }, { "epoch": 0.4385058463609115, "grad_norm": 0.6549660563468933, "learning_rate": 1.8678024167507256e-05, "loss": 0.0666, "step": 24761 }, { "epoch": 0.43852355589793995, "grad_norm": 0.6359143853187561, "learning_rate": 1.867719006197558e-05, "loss": 0.0894, "step": 24762 }, { "epoch": 0.43854126543496835, "grad_norm": 0.6680671572685242, "learning_rate": 1.867635594434632e-05, "loss": 0.0701, "step": 24763 }, { "epoch": 0.4385589749719968, "grad_norm": 0.48110219836235046, "learning_rate": 1.867552181462222e-05, "loss": 0.0657, "step": 24764 }, { "epoch": 0.4385766845090252, "grad_norm": 0.9793986678123474, "learning_rate": 1.8674687672806033e-05, "loss": 0.0764, "step": 24765 }, { "epoch": 0.43859439404605366, "grad_norm": 0.6537436246871948, "learning_rate": 1.8673853518900487e-05, "loss": 0.0726, "step": 24766 }, { "epoch": 0.43861210358308206, "grad_norm": 0.34346556663513184, "learning_rate": 1.8673019352908337e-05, "loss": 0.0496, "step": 24767 }, { "epoch": 0.4386298131201105, "grad_norm": 0.3811620771884918, "learning_rate": 1.8672185174832323e-05, "loss": 0.0423, "step": 24768 }, { "epoch": 0.4386475226571389, "grad_norm": 0.7214168906211853, "learning_rate": 1.8671350984675193e-05, "loss": 0.0661, "step": 24769 }, { "epoch": 0.43866523219416736, "grad_norm": 0.5969458222389221, "learning_rate": 1.867051678243969e-05, "loss": 0.075, "step": 24770 }, { "epoch": 0.4386829417311958, "grad_norm": 0.8772894144058228, "learning_rate": 1.8669682568128558e-05, "loss": 0.0813, "step": 24771 }, { "epoch": 0.4387006512682242, "grad_norm": 0.7672324180603027, "learning_rate": 1.866884834174454e-05, "loss": 0.0716, "step": 24772 }, { "epoch": 0.43871836080525267, "grad_norm": 0.9038197994232178, "learning_rate": 1.8668014103290386e-05, "loss": 0.0452, "step": 24773 }, { "epoch": 0.43873607034228107, "grad_norm": 0.6603182554244995, "learning_rate": 1.866717985276883e-05, "loss": 0.0743, "step": 24774 }, { "epoch": 0.4387537798793095, "grad_norm": 1.0564013719558716, "learning_rate": 1.8666345590182628e-05, "loss": 0.0767, "step": 24775 }, { "epoch": 0.4387714894163379, "grad_norm": 0.6886322498321533, "learning_rate": 1.8665511315534518e-05, "loss": 0.0974, "step": 24776 }, { "epoch": 0.4387891989533664, "grad_norm": 1.0192387104034424, "learning_rate": 1.866467702882725e-05, "loss": 0.079, "step": 24777 }, { "epoch": 0.4388069084903948, "grad_norm": 1.076859712600708, "learning_rate": 1.866384273006356e-05, "loss": 0.1022, "step": 24778 }, { "epoch": 0.43882461802742323, "grad_norm": 0.4867289662361145, "learning_rate": 1.8663008419246204e-05, "loss": 0.0667, "step": 24779 }, { "epoch": 0.43884232756445163, "grad_norm": 0.5397515892982483, "learning_rate": 1.8662174096377914e-05, "loss": 0.0768, "step": 24780 }, { "epoch": 0.4388600371014801, "grad_norm": 0.8606603145599365, "learning_rate": 1.8661339761461448e-05, "loss": 0.0962, "step": 24781 }, { "epoch": 0.4388777466385085, "grad_norm": 0.5416051149368286, "learning_rate": 1.866050541449954e-05, "loss": 0.0894, "step": 24782 }, { "epoch": 0.43889545617553694, "grad_norm": 0.8998135924339294, "learning_rate": 1.865967105549494e-05, "loss": 0.0963, "step": 24783 }, { "epoch": 0.43891316571256533, "grad_norm": 0.7919527292251587, "learning_rate": 1.8658836684450394e-05, "loss": 0.1206, "step": 24784 }, { "epoch": 0.4389308752495938, "grad_norm": 0.6271438598632812, "learning_rate": 1.8658002301368646e-05, "loss": 0.0952, "step": 24785 }, { "epoch": 0.43894858478662224, "grad_norm": 0.7799205780029297, "learning_rate": 1.865716790625244e-05, "loss": 0.0917, "step": 24786 }, { "epoch": 0.43896629432365064, "grad_norm": 0.5440710186958313, "learning_rate": 1.865633349910452e-05, "loss": 0.0856, "step": 24787 }, { "epoch": 0.4389840038606791, "grad_norm": 0.6682812571525574, "learning_rate": 1.8655499079927633e-05, "loss": 0.1222, "step": 24788 }, { "epoch": 0.4390017133977075, "grad_norm": 1.016910433769226, "learning_rate": 1.865466464872452e-05, "loss": 0.1048, "step": 24789 }, { "epoch": 0.43901942293473595, "grad_norm": 0.8893163204193115, "learning_rate": 1.8653830205497934e-05, "loss": 0.0876, "step": 24790 }, { "epoch": 0.43903713247176435, "grad_norm": 0.7661311030387878, "learning_rate": 1.865299575025062e-05, "loss": 0.0698, "step": 24791 }, { "epoch": 0.4390548420087928, "grad_norm": 0.5914343595504761, "learning_rate": 1.8652161282985313e-05, "loss": 0.0885, "step": 24792 }, { "epoch": 0.4390725515458212, "grad_norm": 0.6674404144287109, "learning_rate": 1.865132680370477e-05, "loss": 0.0577, "step": 24793 }, { "epoch": 0.43909026108284965, "grad_norm": 0.5328570604324341, "learning_rate": 1.8650492312411726e-05, "loss": 0.0762, "step": 24794 }, { "epoch": 0.43910797061987805, "grad_norm": 0.7289875745773315, "learning_rate": 1.8649657809108935e-05, "loss": 0.0557, "step": 24795 }, { "epoch": 0.4391256801569065, "grad_norm": 0.8262423276901245, "learning_rate": 1.8648823293799137e-05, "loss": 0.0771, "step": 24796 }, { "epoch": 0.4391433896939349, "grad_norm": 0.8531630039215088, "learning_rate": 1.864798876648508e-05, "loss": 0.0886, "step": 24797 }, { "epoch": 0.43916109923096336, "grad_norm": 0.6447521448135376, "learning_rate": 1.8647154227169507e-05, "loss": 0.0694, "step": 24798 }, { "epoch": 0.43917880876799176, "grad_norm": 0.8975902795791626, "learning_rate": 1.8646319675855168e-05, "loss": 0.0786, "step": 24799 }, { "epoch": 0.4391965183050202, "grad_norm": 1.2029805183410645, "learning_rate": 1.8645485112544805e-05, "loss": 0.0663, "step": 24800 }, { "epoch": 0.43921422784204867, "grad_norm": 0.5290046334266663, "learning_rate": 1.8644650537241168e-05, "loss": 0.0916, "step": 24801 }, { "epoch": 0.43923193737907706, "grad_norm": 0.5791680216789246, "learning_rate": 1.8643815949946997e-05, "loss": 0.0533, "step": 24802 }, { "epoch": 0.4392496469161055, "grad_norm": 0.5818972587585449, "learning_rate": 1.864298135066504e-05, "loss": 0.0809, "step": 24803 }, { "epoch": 0.4392673564531339, "grad_norm": 0.8765523433685303, "learning_rate": 1.8642146739398046e-05, "loss": 0.0911, "step": 24804 }, { "epoch": 0.43928506599016237, "grad_norm": 1.0171502828598022, "learning_rate": 1.8641312116148753e-05, "loss": 0.0619, "step": 24805 }, { "epoch": 0.43930277552719077, "grad_norm": 0.9449426531791687, "learning_rate": 1.8640477480919913e-05, "loss": 0.0736, "step": 24806 }, { "epoch": 0.4393204850642192, "grad_norm": 0.7710474133491516, "learning_rate": 1.8639642833714264e-05, "loss": 0.0826, "step": 24807 }, { "epoch": 0.4393381946012476, "grad_norm": 0.8033083081245422, "learning_rate": 1.8638808174534568e-05, "loss": 0.0853, "step": 24808 }, { "epoch": 0.4393559041382761, "grad_norm": 0.5783545970916748, "learning_rate": 1.863797350338356e-05, "loss": 0.0584, "step": 24809 }, { "epoch": 0.4393736136753045, "grad_norm": 0.83375483751297, "learning_rate": 1.8637138820263984e-05, "loss": 0.083, "step": 24810 }, { "epoch": 0.43939132321233293, "grad_norm": 0.7307806611061096, "learning_rate": 1.863630412517859e-05, "loss": 0.1015, "step": 24811 }, { "epoch": 0.43940903274936133, "grad_norm": 0.7998982667922974, "learning_rate": 1.863546941813012e-05, "loss": 0.0716, "step": 24812 }, { "epoch": 0.4394267422863898, "grad_norm": 0.7689484357833862, "learning_rate": 1.8634634699121326e-05, "loss": 0.0718, "step": 24813 }, { "epoch": 0.4394444518234182, "grad_norm": 0.6371349096298218, "learning_rate": 1.863379996815495e-05, "loss": 0.0939, "step": 24814 }, { "epoch": 0.43946216136044663, "grad_norm": 0.7678428292274475, "learning_rate": 1.8632965225233745e-05, "loss": 0.0869, "step": 24815 }, { "epoch": 0.4394798708974751, "grad_norm": 0.40578967332839966, "learning_rate": 1.8632130470360448e-05, "loss": 0.0982, "step": 24816 }, { "epoch": 0.4394975804345035, "grad_norm": 0.8091587424278259, "learning_rate": 1.863129570353781e-05, "loss": 0.0946, "step": 24817 }, { "epoch": 0.43951528997153194, "grad_norm": 0.47940629720687866, "learning_rate": 1.8630460924768577e-05, "loss": 0.0575, "step": 24818 }, { "epoch": 0.43953299950856034, "grad_norm": 0.8897283673286438, "learning_rate": 1.862962613405549e-05, "loss": 0.1288, "step": 24819 }, { "epoch": 0.4395507090455888, "grad_norm": 0.8036736249923706, "learning_rate": 1.8628791331401308e-05, "loss": 0.0668, "step": 24820 }, { "epoch": 0.4395684185826172, "grad_norm": 0.503755509853363, "learning_rate": 1.862795651680876e-05, "loss": 0.0972, "step": 24821 }, { "epoch": 0.43958612811964565, "grad_norm": 0.4313490688800812, "learning_rate": 1.862712169028061e-05, "loss": 0.0509, "step": 24822 }, { "epoch": 0.43960383765667405, "grad_norm": 0.6898611187934875, "learning_rate": 1.862628685181959e-05, "loss": 0.0573, "step": 24823 }, { "epoch": 0.4396215471937025, "grad_norm": 0.656554102897644, "learning_rate": 1.862545200142846e-05, "loss": 0.0627, "step": 24824 }, { "epoch": 0.4396392567307309, "grad_norm": 0.9905987977981567, "learning_rate": 1.8624617139109957e-05, "loss": 0.0868, "step": 24825 }, { "epoch": 0.43965696626775935, "grad_norm": 0.5850760340690613, "learning_rate": 1.8623782264866827e-05, "loss": 0.0518, "step": 24826 }, { "epoch": 0.43967467580478775, "grad_norm": 0.5936408042907715, "learning_rate": 1.8622947378701827e-05, "loss": 0.0751, "step": 24827 }, { "epoch": 0.4396923853418162, "grad_norm": 0.43736255168914795, "learning_rate": 1.8622112480617687e-05, "loss": 0.0538, "step": 24828 }, { "epoch": 0.4397100948788446, "grad_norm": 0.6895685791969299, "learning_rate": 1.8621277570617173e-05, "loss": 0.0859, "step": 24829 }, { "epoch": 0.43972780441587306, "grad_norm": 0.9983595609664917, "learning_rate": 1.8620442648703012e-05, "loss": 0.1021, "step": 24830 }, { "epoch": 0.4397455139529015, "grad_norm": 0.6530117392539978, "learning_rate": 1.861960771487797e-05, "loss": 0.0926, "step": 24831 }, { "epoch": 0.4397632234899299, "grad_norm": 0.6168520450592041, "learning_rate": 1.8618772769144777e-05, "loss": 0.0683, "step": 24832 }, { "epoch": 0.43978093302695836, "grad_norm": 0.7665994167327881, "learning_rate": 1.8617937811506195e-05, "loss": 0.0971, "step": 24833 }, { "epoch": 0.43979864256398676, "grad_norm": 0.544374406337738, "learning_rate": 1.8617102841964963e-05, "loss": 0.0821, "step": 24834 }, { "epoch": 0.4398163521010152, "grad_norm": 0.2577345669269562, "learning_rate": 1.8616267860523824e-05, "loss": 0.0426, "step": 24835 }, { "epoch": 0.4398340616380436, "grad_norm": 0.9733120799064636, "learning_rate": 1.8615432867185527e-05, "loss": 0.0745, "step": 24836 }, { "epoch": 0.43985177117507207, "grad_norm": 0.3137953579425812, "learning_rate": 1.8614597861952828e-05, "loss": 0.0453, "step": 24837 }, { "epoch": 0.43986948071210047, "grad_norm": 0.5231456756591797, "learning_rate": 1.8613762844828463e-05, "loss": 0.0783, "step": 24838 }, { "epoch": 0.4398871902491289, "grad_norm": 0.578498363494873, "learning_rate": 1.8612927815815185e-05, "loss": 0.0917, "step": 24839 }, { "epoch": 0.4399048997861573, "grad_norm": 0.6616829633712769, "learning_rate": 1.861209277491574e-05, "loss": 0.072, "step": 24840 }, { "epoch": 0.4399226093231858, "grad_norm": 0.6344080567359924, "learning_rate": 1.861125772213288e-05, "loss": 0.0726, "step": 24841 }, { "epoch": 0.4399403188602142, "grad_norm": 0.7909145355224609, "learning_rate": 1.8610422657469343e-05, "loss": 0.0709, "step": 24842 }, { "epoch": 0.43995802839724263, "grad_norm": 0.4905681610107422, "learning_rate": 1.8609587580927884e-05, "loss": 0.0701, "step": 24843 }, { "epoch": 0.439975737934271, "grad_norm": 0.550710916519165, "learning_rate": 1.8608752492511243e-05, "loss": 0.055, "step": 24844 }, { "epoch": 0.4399934474712995, "grad_norm": 0.43754687905311584, "learning_rate": 1.8607917392222172e-05, "loss": 0.0394, "step": 24845 }, { "epoch": 0.44001115700832794, "grad_norm": 0.6806316375732422, "learning_rate": 1.8607082280063415e-05, "loss": 0.0774, "step": 24846 }, { "epoch": 0.44002886654535633, "grad_norm": 0.6358391642570496, "learning_rate": 1.8606247156037726e-05, "loss": 0.0858, "step": 24847 }, { "epoch": 0.4400465760823848, "grad_norm": 0.9432904124259949, "learning_rate": 1.860541202014785e-05, "loss": 0.1087, "step": 24848 }, { "epoch": 0.4400642856194132, "grad_norm": 0.41514351963996887, "learning_rate": 1.860457687239653e-05, "loss": 0.0873, "step": 24849 }, { "epoch": 0.44008199515644164, "grad_norm": 1.7062729597091675, "learning_rate": 1.8603741712786517e-05, "loss": 0.0713, "step": 24850 }, { "epoch": 0.44009970469347004, "grad_norm": 1.0163495540618896, "learning_rate": 1.8602906541320556e-05, "loss": 0.113, "step": 24851 }, { "epoch": 0.4401174142304985, "grad_norm": 1.3754359483718872, "learning_rate": 1.8602071358001406e-05, "loss": 0.0957, "step": 24852 }, { "epoch": 0.4401351237675269, "grad_norm": 0.733339786529541, "learning_rate": 1.8601236162831794e-05, "loss": 0.0751, "step": 24853 }, { "epoch": 0.44015283330455535, "grad_norm": 0.8282833695411682, "learning_rate": 1.8600400955814484e-05, "loss": 0.0583, "step": 24854 }, { "epoch": 0.44017054284158375, "grad_norm": 0.6490082144737244, "learning_rate": 1.859956573695222e-05, "loss": 0.0811, "step": 24855 }, { "epoch": 0.4401882523786122, "grad_norm": 0.701150119304657, "learning_rate": 1.859873050624775e-05, "loss": 0.1055, "step": 24856 }, { "epoch": 0.4402059619156406, "grad_norm": 0.7005876898765564, "learning_rate": 1.859789526370382e-05, "loss": 0.0492, "step": 24857 }, { "epoch": 0.44022367145266905, "grad_norm": 0.6533803939819336, "learning_rate": 1.859706000932318e-05, "loss": 0.073, "step": 24858 }, { "epoch": 0.44024138098969745, "grad_norm": 0.6949975490570068, "learning_rate": 1.8596224743108566e-05, "loss": 0.0948, "step": 24859 }, { "epoch": 0.4402590905267259, "grad_norm": 0.5473166704177856, "learning_rate": 1.8595389465062747e-05, "loss": 0.0899, "step": 24860 }, { "epoch": 0.44027680006375436, "grad_norm": 0.9332965612411499, "learning_rate": 1.859455417518846e-05, "loss": 0.075, "step": 24861 }, { "epoch": 0.44029450960078276, "grad_norm": 0.7977526187896729, "learning_rate": 1.8593718873488446e-05, "loss": 0.1007, "step": 24862 }, { "epoch": 0.4403122191378112, "grad_norm": 0.7720233201980591, "learning_rate": 1.859288355996547e-05, "loss": 0.1172, "step": 24863 }, { "epoch": 0.4403299286748396, "grad_norm": 0.6607546806335449, "learning_rate": 1.8592048234622264e-05, "loss": 0.0826, "step": 24864 }, { "epoch": 0.44034763821186806, "grad_norm": 0.7202917337417603, "learning_rate": 1.8591212897461584e-05, "loss": 0.1073, "step": 24865 }, { "epoch": 0.44036534774889646, "grad_norm": 0.5535990595817566, "learning_rate": 1.8590377548486178e-05, "loss": 0.0529, "step": 24866 }, { "epoch": 0.4403830572859249, "grad_norm": 0.6079830527305603, "learning_rate": 1.858954218769879e-05, "loss": 0.0574, "step": 24867 }, { "epoch": 0.4404007668229533, "grad_norm": 0.5437347888946533, "learning_rate": 1.8588706815102177e-05, "loss": 0.0522, "step": 24868 }, { "epoch": 0.44041847635998177, "grad_norm": 0.788317084312439, "learning_rate": 1.8587871430699072e-05, "loss": 0.0608, "step": 24869 }, { "epoch": 0.44043618589701017, "grad_norm": 0.8496711850166321, "learning_rate": 1.858703603449224e-05, "loss": 0.085, "step": 24870 }, { "epoch": 0.4404538954340386, "grad_norm": 0.9434928297996521, "learning_rate": 1.858620062648442e-05, "loss": 0.0582, "step": 24871 }, { "epoch": 0.440471604971067, "grad_norm": 0.6782633066177368, "learning_rate": 1.8585365206678368e-05, "loss": 0.089, "step": 24872 }, { "epoch": 0.4404893145080955, "grad_norm": 0.681940495967865, "learning_rate": 1.8584529775076818e-05, "loss": 0.0855, "step": 24873 }, { "epoch": 0.4405070240451239, "grad_norm": 0.6851154565811157, "learning_rate": 1.8583694331682534e-05, "loss": 0.0811, "step": 24874 }, { "epoch": 0.44052473358215233, "grad_norm": 0.28595060110092163, "learning_rate": 1.8582858876498262e-05, "loss": 0.0537, "step": 24875 }, { "epoch": 0.4405424431191808, "grad_norm": 0.5478953123092651, "learning_rate": 1.8582023409526737e-05, "loss": 0.0678, "step": 24876 }, { "epoch": 0.4405601526562092, "grad_norm": 0.5626556277275085, "learning_rate": 1.8581187930770725e-05, "loss": 0.1002, "step": 24877 }, { "epoch": 0.44057786219323763, "grad_norm": 0.9818259477615356, "learning_rate": 1.858035244023296e-05, "loss": 0.0477, "step": 24878 }, { "epoch": 0.44059557173026603, "grad_norm": 0.7759279012680054, "learning_rate": 1.8579516937916202e-05, "loss": 0.1114, "step": 24879 }, { "epoch": 0.4406132812672945, "grad_norm": 0.7882974147796631, "learning_rate": 1.8578681423823195e-05, "loss": 0.0641, "step": 24880 }, { "epoch": 0.4406309908043229, "grad_norm": 0.5377748012542725, "learning_rate": 1.8577845897956687e-05, "loss": 0.0773, "step": 24881 }, { "epoch": 0.44064870034135134, "grad_norm": 0.3934396505355835, "learning_rate": 1.857701036031943e-05, "loss": 0.0674, "step": 24882 }, { "epoch": 0.44066640987837974, "grad_norm": 0.6102115511894226, "learning_rate": 1.8576174810914164e-05, "loss": 0.0805, "step": 24883 }, { "epoch": 0.4406841194154082, "grad_norm": 0.7481312155723572, "learning_rate": 1.857533924974365e-05, "loss": 0.083, "step": 24884 }, { "epoch": 0.4407018289524366, "grad_norm": 0.9324361681938171, "learning_rate": 1.8574503676810633e-05, "loss": 0.077, "step": 24885 }, { "epoch": 0.44071953848946505, "grad_norm": 1.0539218187332153, "learning_rate": 1.8573668092117863e-05, "loss": 0.065, "step": 24886 }, { "epoch": 0.44073724802649344, "grad_norm": 0.4396795630455017, "learning_rate": 1.857283249566808e-05, "loss": 0.0537, "step": 24887 }, { "epoch": 0.4407549575635219, "grad_norm": 0.9334489703178406, "learning_rate": 1.857199688746404e-05, "loss": 0.0779, "step": 24888 }, { "epoch": 0.4407726671005503, "grad_norm": 0.758219838142395, "learning_rate": 1.8571161267508495e-05, "loss": 0.0521, "step": 24889 }, { "epoch": 0.44079037663757875, "grad_norm": 0.6290345191955566, "learning_rate": 1.8570325635804186e-05, "loss": 0.0553, "step": 24890 }, { "epoch": 0.4408080861746072, "grad_norm": 0.6715117692947388, "learning_rate": 1.8569489992353873e-05, "loss": 0.0511, "step": 24891 }, { "epoch": 0.4408257957116356, "grad_norm": 0.6901198029518127, "learning_rate": 1.8568654337160294e-05, "loss": 0.0993, "step": 24892 }, { "epoch": 0.44084350524866406, "grad_norm": 1.0486056804656982, "learning_rate": 1.85678186702262e-05, "loss": 0.0795, "step": 24893 }, { "epoch": 0.44086121478569246, "grad_norm": 0.6725221872329712, "learning_rate": 1.856698299155435e-05, "loss": 0.0944, "step": 24894 }, { "epoch": 0.4408789243227209, "grad_norm": 0.488899827003479, "learning_rate": 1.8566147301147484e-05, "loss": 0.0587, "step": 24895 }, { "epoch": 0.4408966338597493, "grad_norm": 0.5876697301864624, "learning_rate": 1.8565311599008358e-05, "loss": 0.0646, "step": 24896 }, { "epoch": 0.44091434339677776, "grad_norm": 0.8214800953865051, "learning_rate": 1.8564475885139713e-05, "loss": 0.0616, "step": 24897 }, { "epoch": 0.44093205293380616, "grad_norm": 0.5114035606384277, "learning_rate": 1.8563640159544303e-05, "loss": 0.0654, "step": 24898 }, { "epoch": 0.4409497624708346, "grad_norm": 0.7542101144790649, "learning_rate": 1.8562804422224876e-05, "loss": 0.0432, "step": 24899 }, { "epoch": 0.440967472007863, "grad_norm": 0.6888273358345032, "learning_rate": 1.8561968673184187e-05, "loss": 0.0612, "step": 24900 }, { "epoch": 0.44098518154489147, "grad_norm": 0.7438564896583557, "learning_rate": 1.8561132912424977e-05, "loss": 0.058, "step": 24901 }, { "epoch": 0.44100289108191987, "grad_norm": 0.4309166967868805, "learning_rate": 1.8560297139950003e-05, "loss": 0.064, "step": 24902 }, { "epoch": 0.4410206006189483, "grad_norm": 0.5720735788345337, "learning_rate": 1.8559461355762007e-05, "loss": 0.0698, "step": 24903 }, { "epoch": 0.4410383101559767, "grad_norm": 0.5233849883079529, "learning_rate": 1.855862555986375e-05, "loss": 0.0479, "step": 24904 }, { "epoch": 0.4410560196930052, "grad_norm": 0.7313069105148315, "learning_rate": 1.8557789752257974e-05, "loss": 0.0669, "step": 24905 }, { "epoch": 0.44107372923003363, "grad_norm": 0.5126940011978149, "learning_rate": 1.8556953932947428e-05, "loss": 0.0537, "step": 24906 }, { "epoch": 0.441091438767062, "grad_norm": 0.8498655557632446, "learning_rate": 1.855611810193486e-05, "loss": 0.0793, "step": 24907 }, { "epoch": 0.4411091483040905, "grad_norm": 0.5137119293212891, "learning_rate": 1.8555282259223025e-05, "loss": 0.0804, "step": 24908 }, { "epoch": 0.4411268578411189, "grad_norm": 0.6294877529144287, "learning_rate": 1.8554446404814676e-05, "loss": 0.0894, "step": 24909 }, { "epoch": 0.44114456737814733, "grad_norm": 1.0141196250915527, "learning_rate": 1.8553610538712553e-05, "loss": 0.0922, "step": 24910 }, { "epoch": 0.44116227691517573, "grad_norm": 0.8814092874526978, "learning_rate": 1.8552774660919414e-05, "loss": 0.1169, "step": 24911 }, { "epoch": 0.4411799864522042, "grad_norm": 0.6178092956542969, "learning_rate": 1.8551938771438003e-05, "loss": 0.0699, "step": 24912 }, { "epoch": 0.4411976959892326, "grad_norm": 0.6797279715538025, "learning_rate": 1.855110287027107e-05, "loss": 0.1024, "step": 24913 }, { "epoch": 0.44121540552626104, "grad_norm": 0.4399515390396118, "learning_rate": 1.8550266957421372e-05, "loss": 0.0586, "step": 24914 }, { "epoch": 0.44123311506328944, "grad_norm": 0.6515165567398071, "learning_rate": 1.8549431032891652e-05, "loss": 0.0559, "step": 24915 }, { "epoch": 0.4412508246003179, "grad_norm": 0.6376922130584717, "learning_rate": 1.8548595096684664e-05, "loss": 0.0923, "step": 24916 }, { "epoch": 0.4412685341373463, "grad_norm": 0.7018851041793823, "learning_rate": 1.8547759148803157e-05, "loss": 0.0939, "step": 24917 }, { "epoch": 0.44128624367437475, "grad_norm": 0.9734770059585571, "learning_rate": 1.854692318924988e-05, "loss": 0.0812, "step": 24918 }, { "epoch": 0.44130395321140314, "grad_norm": 0.8354418873786926, "learning_rate": 1.8546087218027588e-05, "loss": 0.0845, "step": 24919 }, { "epoch": 0.4413216627484316, "grad_norm": 0.5259357690811157, "learning_rate": 1.8545251235139026e-05, "loss": 0.0727, "step": 24920 }, { "epoch": 0.44133937228546005, "grad_norm": 0.8829948306083679, "learning_rate": 1.8544415240586945e-05, "loss": 0.0765, "step": 24921 }, { "epoch": 0.44135708182248845, "grad_norm": 0.5139832496643066, "learning_rate": 1.8543579234374093e-05, "loss": 0.053, "step": 24922 }, { "epoch": 0.4413747913595169, "grad_norm": 0.6102429628372192, "learning_rate": 1.8542743216503233e-05, "loss": 0.0762, "step": 24923 }, { "epoch": 0.4413925008965453, "grad_norm": 0.5336744785308838, "learning_rate": 1.85419071869771e-05, "loss": 0.0854, "step": 24924 }, { "epoch": 0.44141021043357376, "grad_norm": 0.6811589598655701, "learning_rate": 1.854107114579845e-05, "loss": 0.0648, "step": 24925 }, { "epoch": 0.44142791997060216, "grad_norm": 0.888618528842926, "learning_rate": 1.854023509297004e-05, "loss": 0.0823, "step": 24926 }, { "epoch": 0.4414456295076306, "grad_norm": 0.563414990901947, "learning_rate": 1.8539399028494606e-05, "loss": 0.0592, "step": 24927 }, { "epoch": 0.441463339044659, "grad_norm": 0.6969473958015442, "learning_rate": 1.8538562952374913e-05, "loss": 0.0945, "step": 24928 }, { "epoch": 0.44148104858168746, "grad_norm": 0.6421359777450562, "learning_rate": 1.8537726864613707e-05, "loss": 0.0694, "step": 24929 }, { "epoch": 0.44149875811871586, "grad_norm": 0.45396506786346436, "learning_rate": 1.8536890765213733e-05, "loss": 0.0996, "step": 24930 }, { "epoch": 0.4415164676557443, "grad_norm": 0.7729189395904541, "learning_rate": 1.8536054654177743e-05, "loss": 0.0798, "step": 24931 }, { "epoch": 0.4415341771927727, "grad_norm": 0.5275903940200806, "learning_rate": 1.85352185315085e-05, "loss": 0.1569, "step": 24932 }, { "epoch": 0.44155188672980117, "grad_norm": 0.4243955910205841, "learning_rate": 1.8534382397208742e-05, "loss": 0.0581, "step": 24933 }, { "epoch": 0.44156959626682957, "grad_norm": 1.3077771663665771, "learning_rate": 1.853354625128122e-05, "loss": 0.1017, "step": 24934 }, { "epoch": 0.441587305803858, "grad_norm": 1.0113680362701416, "learning_rate": 1.8532710093728692e-05, "loss": 0.0863, "step": 24935 }, { "epoch": 0.4416050153408865, "grad_norm": 0.8125731945037842, "learning_rate": 1.8531873924553905e-05, "loss": 0.0614, "step": 24936 }, { "epoch": 0.4416227248779149, "grad_norm": 0.9796385169029236, "learning_rate": 1.8531037743759608e-05, "loss": 0.0852, "step": 24937 }, { "epoch": 0.44164043441494333, "grad_norm": 0.6152409315109253, "learning_rate": 1.8530201551348555e-05, "loss": 0.0621, "step": 24938 }, { "epoch": 0.4416581439519717, "grad_norm": 0.7490182518959045, "learning_rate": 1.85293653473235e-05, "loss": 0.0725, "step": 24939 }, { "epoch": 0.4416758534890002, "grad_norm": 0.498305082321167, "learning_rate": 1.8528529131687186e-05, "loss": 0.0703, "step": 24940 }, { "epoch": 0.4416935630260286, "grad_norm": 0.6801393628120422, "learning_rate": 1.8527692904442364e-05, "loss": 0.0933, "step": 24941 }, { "epoch": 0.44171127256305703, "grad_norm": 0.5958465337753296, "learning_rate": 1.852685666559179e-05, "loss": 0.0964, "step": 24942 }, { "epoch": 0.44172898210008543, "grad_norm": 0.740281343460083, "learning_rate": 1.8526020415138223e-05, "loss": 0.054, "step": 24943 }, { "epoch": 0.4417466916371139, "grad_norm": 1.0462654829025269, "learning_rate": 1.85251841530844e-05, "loss": 0.0891, "step": 24944 }, { "epoch": 0.4417644011741423, "grad_norm": 0.6689282059669495, "learning_rate": 1.852434787943308e-05, "loss": 0.0685, "step": 24945 }, { "epoch": 0.44178211071117074, "grad_norm": 0.7136099338531494, "learning_rate": 1.8523511594187005e-05, "loss": 0.0794, "step": 24946 }, { "epoch": 0.44179982024819914, "grad_norm": 0.7451034784317017, "learning_rate": 1.852267529734894e-05, "loss": 0.0653, "step": 24947 }, { "epoch": 0.4418175297852276, "grad_norm": 0.8085832595825195, "learning_rate": 1.8521838988921625e-05, "loss": 0.0908, "step": 24948 }, { "epoch": 0.441835239322256, "grad_norm": 0.7669697999954224, "learning_rate": 1.8521002668907817e-05, "loss": 0.0796, "step": 24949 }, { "epoch": 0.44185294885928444, "grad_norm": 0.7974442839622498, "learning_rate": 1.8520166337310268e-05, "loss": 0.0842, "step": 24950 }, { "epoch": 0.4418706583963129, "grad_norm": 0.7710841298103333, "learning_rate": 1.8519329994131728e-05, "loss": 0.0719, "step": 24951 }, { "epoch": 0.4418883679333413, "grad_norm": 0.7429367899894714, "learning_rate": 1.851849363937495e-05, "loss": 0.0737, "step": 24952 }, { "epoch": 0.44190607747036975, "grad_norm": 0.5065791606903076, "learning_rate": 1.8517657273042683e-05, "loss": 0.0797, "step": 24953 }, { "epoch": 0.44192378700739815, "grad_norm": 0.5523396134376526, "learning_rate": 1.8516820895137676e-05, "loss": 0.0753, "step": 24954 }, { "epoch": 0.4419414965444266, "grad_norm": 0.666382372379303, "learning_rate": 1.8515984505662684e-05, "loss": 0.081, "step": 24955 }, { "epoch": 0.441959206081455, "grad_norm": 0.8040904402732849, "learning_rate": 1.851514810462046e-05, "loss": 0.0899, "step": 24956 }, { "epoch": 0.44197691561848346, "grad_norm": 0.6540672779083252, "learning_rate": 1.8514311692013755e-05, "loss": 0.0671, "step": 24957 }, { "epoch": 0.44199462515551186, "grad_norm": 0.7717395424842834, "learning_rate": 1.851347526784532e-05, "loss": 0.0587, "step": 24958 }, { "epoch": 0.4420123346925403, "grad_norm": 0.48442867398262024, "learning_rate": 1.851263883211791e-05, "loss": 0.0665, "step": 24959 }, { "epoch": 0.4420300442295687, "grad_norm": 0.49803853034973145, "learning_rate": 1.8511802384834264e-05, "loss": 0.0765, "step": 24960 }, { "epoch": 0.44204775376659716, "grad_norm": 0.9396370649337769, "learning_rate": 1.851096592599715e-05, "loss": 0.0969, "step": 24961 }, { "epoch": 0.44206546330362556, "grad_norm": 0.9023539423942566, "learning_rate": 1.8510129455609316e-05, "loss": 0.1158, "step": 24962 }, { "epoch": 0.442083172840654, "grad_norm": 1.158711314201355, "learning_rate": 1.8509292973673504e-05, "loss": 0.0733, "step": 24963 }, { "epoch": 0.4421008823776824, "grad_norm": 0.7668168544769287, "learning_rate": 1.8508456480192475e-05, "loss": 0.083, "step": 24964 }, { "epoch": 0.44211859191471087, "grad_norm": 0.7152203917503357, "learning_rate": 1.8507619975168982e-05, "loss": 0.0671, "step": 24965 }, { "epoch": 0.4421363014517393, "grad_norm": 0.5064740180969238, "learning_rate": 1.8506783458605776e-05, "loss": 0.07, "step": 24966 }, { "epoch": 0.4421540109887677, "grad_norm": 0.9548527598381042, "learning_rate": 1.8505946930505606e-05, "loss": 0.0789, "step": 24967 }, { "epoch": 0.4421717205257962, "grad_norm": 0.48393869400024414, "learning_rate": 1.850511039087122e-05, "loss": 0.0907, "step": 24968 }, { "epoch": 0.4421894300628246, "grad_norm": 0.5506570935249329, "learning_rate": 1.8504273839705378e-05, "loss": 0.1154, "step": 24969 }, { "epoch": 0.442207139599853, "grad_norm": 0.7927108407020569, "learning_rate": 1.8503437277010827e-05, "loss": 0.0826, "step": 24970 }, { "epoch": 0.4422248491368814, "grad_norm": 0.4370618760585785, "learning_rate": 1.850260070279033e-05, "loss": 0.0829, "step": 24971 }, { "epoch": 0.4422425586739099, "grad_norm": 0.6062835454940796, "learning_rate": 1.8501764117046622e-05, "loss": 0.0904, "step": 24972 }, { "epoch": 0.4422602682109383, "grad_norm": 0.6490353345870972, "learning_rate": 1.850092751978247e-05, "loss": 0.0889, "step": 24973 }, { "epoch": 0.44227797774796673, "grad_norm": 0.5119832754135132, "learning_rate": 1.850009091100062e-05, "loss": 0.0703, "step": 24974 }, { "epoch": 0.44229568728499513, "grad_norm": 0.7048665285110474, "learning_rate": 1.849925429070382e-05, "loss": 0.0953, "step": 24975 }, { "epoch": 0.4423133968220236, "grad_norm": 0.6993332505226135, "learning_rate": 1.849841765889483e-05, "loss": 0.0661, "step": 24976 }, { "epoch": 0.442331106359052, "grad_norm": 0.7130685448646545, "learning_rate": 1.8497581015576403e-05, "loss": 0.0814, "step": 24977 }, { "epoch": 0.44234881589608044, "grad_norm": 0.6382835507392883, "learning_rate": 1.8496744360751284e-05, "loss": 0.0861, "step": 24978 }, { "epoch": 0.4423665254331089, "grad_norm": 1.011849045753479, "learning_rate": 1.849590769442223e-05, "loss": 0.0726, "step": 24979 }, { "epoch": 0.4423842349701373, "grad_norm": 0.5238965749740601, "learning_rate": 1.8495071016591996e-05, "loss": 0.0572, "step": 24980 }, { "epoch": 0.44240194450716575, "grad_norm": 1.0469964742660522, "learning_rate": 1.849423432726333e-05, "loss": 0.0876, "step": 24981 }, { "epoch": 0.44241965404419414, "grad_norm": 0.6357438564300537, "learning_rate": 1.8493397626438988e-05, "loss": 0.0778, "step": 24982 }, { "epoch": 0.4424373635812226, "grad_norm": 1.0681959390640259, "learning_rate": 1.8492560914121718e-05, "loss": 0.1167, "step": 24983 }, { "epoch": 0.442455073118251, "grad_norm": 0.5022057890892029, "learning_rate": 1.849172419031428e-05, "loss": 0.0971, "step": 24984 }, { "epoch": 0.44247278265527945, "grad_norm": 0.5357736945152283, "learning_rate": 1.8490887455019422e-05, "loss": 0.0544, "step": 24985 }, { "epoch": 0.44249049219230785, "grad_norm": 0.6797893643379211, "learning_rate": 1.849005070823989e-05, "loss": 0.0552, "step": 24986 }, { "epoch": 0.4425082017293363, "grad_norm": 1.1609187126159668, "learning_rate": 1.8489213949978456e-05, "loss": 0.0809, "step": 24987 }, { "epoch": 0.4425259112663647, "grad_norm": 0.7332289814949036, "learning_rate": 1.848837718023785e-05, "loss": 0.079, "step": 24988 }, { "epoch": 0.44254362080339316, "grad_norm": 0.6454168558120728, "learning_rate": 1.848754039902084e-05, "loss": 0.0667, "step": 24989 }, { "epoch": 0.44256133034042155, "grad_norm": 1.5048911571502686, "learning_rate": 1.8486703606330177e-05, "loss": 0.0672, "step": 24990 }, { "epoch": 0.44257903987745, "grad_norm": 0.7090232372283936, "learning_rate": 1.8485866802168613e-05, "loss": 0.0642, "step": 24991 }, { "epoch": 0.4425967494144784, "grad_norm": 0.4886423945426941, "learning_rate": 1.8485029986538897e-05, "loss": 0.0808, "step": 24992 }, { "epoch": 0.44261445895150686, "grad_norm": 0.7705570459365845, "learning_rate": 1.8484193159443784e-05, "loss": 0.0813, "step": 24993 }, { "epoch": 0.4426321684885353, "grad_norm": 0.5782399773597717, "learning_rate": 1.848335632088603e-05, "loss": 0.0712, "step": 24994 }, { "epoch": 0.4426498780255637, "grad_norm": 0.5589566230773926, "learning_rate": 1.8482519470868383e-05, "loss": 0.0761, "step": 24995 }, { "epoch": 0.44266758756259217, "grad_norm": 0.8790232539176941, "learning_rate": 1.8481682609393602e-05, "loss": 0.0963, "step": 24996 }, { "epoch": 0.44268529709962057, "grad_norm": 0.8651593923568726, "learning_rate": 1.848084573646444e-05, "loss": 0.0813, "step": 24997 }, { "epoch": 0.442703006636649, "grad_norm": 0.5838635563850403, "learning_rate": 1.848000885208364e-05, "loss": 0.1006, "step": 24998 }, { "epoch": 0.4427207161736774, "grad_norm": 0.4132241904735565, "learning_rate": 1.8479171956253968e-05, "loss": 0.0829, "step": 24999 }, { "epoch": 0.4427384257107059, "grad_norm": 0.8456116318702698, "learning_rate": 1.8478335048978174e-05, "loss": 0.1079, "step": 25000 }, { "epoch": 0.4427561352477343, "grad_norm": 0.6700233221054077, "learning_rate": 1.847749813025901e-05, "loss": 0.1062, "step": 25001 }, { "epoch": 0.4427738447847627, "grad_norm": 0.4347732961177826, "learning_rate": 1.8476661200099223e-05, "loss": 0.0634, "step": 25002 }, { "epoch": 0.4427915543217911, "grad_norm": 0.924227774143219, "learning_rate": 1.8475824258501573e-05, "loss": 0.0719, "step": 25003 }, { "epoch": 0.4428092638588196, "grad_norm": 0.7132534384727478, "learning_rate": 1.8474987305468812e-05, "loss": 0.0844, "step": 25004 }, { "epoch": 0.442826973395848, "grad_norm": 1.058747410774231, "learning_rate": 1.84741503410037e-05, "loss": 0.0693, "step": 25005 }, { "epoch": 0.44284468293287643, "grad_norm": 0.6219318509101868, "learning_rate": 1.847331336510898e-05, "loss": 0.082, "step": 25006 }, { "epoch": 0.44286239246990483, "grad_norm": 0.7138341665267944, "learning_rate": 1.8472476377787415e-05, "loss": 0.0802, "step": 25007 }, { "epoch": 0.4428801020069333, "grad_norm": 0.8596370220184326, "learning_rate": 1.847163937904175e-05, "loss": 0.0737, "step": 25008 }, { "epoch": 0.44289781154396174, "grad_norm": 0.722063422203064, "learning_rate": 1.8470802368874742e-05, "loss": 0.0858, "step": 25009 }, { "epoch": 0.44291552108099014, "grad_norm": 1.0252420902252197, "learning_rate": 1.8469965347289148e-05, "loss": 0.0913, "step": 25010 }, { "epoch": 0.4429332306180186, "grad_norm": 0.8909247517585754, "learning_rate": 1.8469128314287715e-05, "loss": 0.0952, "step": 25011 }, { "epoch": 0.442950940155047, "grad_norm": 0.4684188663959503, "learning_rate": 1.8468291269873204e-05, "loss": 0.075, "step": 25012 }, { "epoch": 0.44296864969207544, "grad_norm": 0.5617192983627319, "learning_rate": 1.8467454214048366e-05, "loss": 0.0713, "step": 25013 }, { "epoch": 0.44298635922910384, "grad_norm": 0.3914923667907715, "learning_rate": 1.8466617146815953e-05, "loss": 0.0768, "step": 25014 }, { "epoch": 0.4430040687661323, "grad_norm": 0.8698685169219971, "learning_rate": 1.8465780068178723e-05, "loss": 0.0712, "step": 25015 }, { "epoch": 0.4430217783031607, "grad_norm": 0.9321116209030151, "learning_rate": 1.8464942978139426e-05, "loss": 0.0651, "step": 25016 }, { "epoch": 0.44303948784018915, "grad_norm": 0.83439701795578, "learning_rate": 1.8464105876700814e-05, "loss": 0.0976, "step": 25017 }, { "epoch": 0.44305719737721755, "grad_norm": 0.5732735395431519, "learning_rate": 1.846326876386564e-05, "loss": 0.0834, "step": 25018 }, { "epoch": 0.443074906914246, "grad_norm": 0.988085925579071, "learning_rate": 1.8462431639636672e-05, "loss": 0.1006, "step": 25019 }, { "epoch": 0.4430926164512744, "grad_norm": 0.3660283088684082, "learning_rate": 1.8461594504016652e-05, "loss": 0.0368, "step": 25020 }, { "epoch": 0.44311032598830286, "grad_norm": 0.5932416915893555, "learning_rate": 1.8460757357008335e-05, "loss": 0.0821, "step": 25021 }, { "epoch": 0.44312803552533125, "grad_norm": 0.6412020325660706, "learning_rate": 1.845992019861447e-05, "loss": 0.0968, "step": 25022 }, { "epoch": 0.4431457450623597, "grad_norm": 0.785953938961029, "learning_rate": 1.8459083028837824e-05, "loss": 0.0766, "step": 25023 }, { "epoch": 0.44316345459938816, "grad_norm": 0.7726574540138245, "learning_rate": 1.8458245847681146e-05, "loss": 0.0797, "step": 25024 }, { "epoch": 0.44318116413641656, "grad_norm": 0.540116012096405, "learning_rate": 1.8457408655147187e-05, "loss": 0.0767, "step": 25025 }, { "epoch": 0.443198873673445, "grad_norm": 0.47623977065086365, "learning_rate": 1.8456571451238703e-05, "loss": 0.0737, "step": 25026 }, { "epoch": 0.4432165832104734, "grad_norm": 0.5033631920814514, "learning_rate": 1.8455734235958446e-05, "loss": 0.0694, "step": 25027 }, { "epoch": 0.44323429274750187, "grad_norm": 0.9419111609458923, "learning_rate": 1.8454897009309177e-05, "loss": 0.0859, "step": 25028 }, { "epoch": 0.44325200228453027, "grad_norm": 0.8010034561157227, "learning_rate": 1.8454059771293644e-05, "loss": 0.0829, "step": 25029 }, { "epoch": 0.4432697118215587, "grad_norm": 0.37825337052345276, "learning_rate": 1.8453222521914605e-05, "loss": 0.0476, "step": 25030 }, { "epoch": 0.4432874213585871, "grad_norm": 0.6386605501174927, "learning_rate": 1.8452385261174812e-05, "loss": 0.0893, "step": 25031 }, { "epoch": 0.4433051308956156, "grad_norm": 0.6265493035316467, "learning_rate": 1.8451547989077018e-05, "loss": 0.0728, "step": 25032 }, { "epoch": 0.44332284043264397, "grad_norm": 0.497443288564682, "learning_rate": 1.845071070562399e-05, "loss": 0.0863, "step": 25033 }, { "epoch": 0.4433405499696724, "grad_norm": 0.536346971988678, "learning_rate": 1.8449873410818464e-05, "loss": 0.0744, "step": 25034 }, { "epoch": 0.4433582595067008, "grad_norm": 1.3405728340148926, "learning_rate": 1.8449036104663207e-05, "loss": 0.0646, "step": 25035 }, { "epoch": 0.4433759690437293, "grad_norm": 1.5411373376846313, "learning_rate": 1.8448198787160967e-05, "loss": 0.0708, "step": 25036 }, { "epoch": 0.4433936785807577, "grad_norm": 0.6632344126701355, "learning_rate": 1.84473614583145e-05, "loss": 0.1018, "step": 25037 }, { "epoch": 0.44341138811778613, "grad_norm": 0.5851048827171326, "learning_rate": 1.844652411812657e-05, "loss": 0.0533, "step": 25038 }, { "epoch": 0.4434290976548146, "grad_norm": 0.6274443864822388, "learning_rate": 1.844568676659992e-05, "loss": 0.0762, "step": 25039 }, { "epoch": 0.443446807191843, "grad_norm": 0.8070198893547058, "learning_rate": 1.844484940373731e-05, "loss": 0.0945, "step": 25040 }, { "epoch": 0.44346451672887144, "grad_norm": 1.0335830450057983, "learning_rate": 1.8444012029541488e-05, "loss": 0.081, "step": 25041 }, { "epoch": 0.44348222626589984, "grad_norm": 0.6852208375930786, "learning_rate": 1.8443174644015225e-05, "loss": 0.0707, "step": 25042 }, { "epoch": 0.4434999358029283, "grad_norm": 0.7318112850189209, "learning_rate": 1.844233724716126e-05, "loss": 0.0705, "step": 25043 }, { "epoch": 0.4435176453399567, "grad_norm": 0.43257349729537964, "learning_rate": 1.8441499838982353e-05, "loss": 0.0572, "step": 25044 }, { "epoch": 0.44353535487698514, "grad_norm": 0.8407908082008362, "learning_rate": 1.844066241948126e-05, "loss": 0.1095, "step": 25045 }, { "epoch": 0.44355306441401354, "grad_norm": 0.7659863829612732, "learning_rate": 1.8439824988660734e-05, "loss": 0.1032, "step": 25046 }, { "epoch": 0.443570773951042, "grad_norm": 0.7829921245574951, "learning_rate": 1.843898754652354e-05, "loss": 0.0897, "step": 25047 }, { "epoch": 0.4435884834880704, "grad_norm": 0.9092869758605957, "learning_rate": 1.8438150093072416e-05, "loss": 0.0963, "step": 25048 }, { "epoch": 0.44360619302509885, "grad_norm": 0.654901385307312, "learning_rate": 1.8437312628310128e-05, "loss": 0.0397, "step": 25049 }, { "epoch": 0.44362390256212725, "grad_norm": 0.5027210116386414, "learning_rate": 1.843647515223943e-05, "loss": 0.0932, "step": 25050 }, { "epoch": 0.4436416120991557, "grad_norm": 0.523051381111145, "learning_rate": 1.8435637664863074e-05, "loss": 0.1101, "step": 25051 }, { "epoch": 0.4436593216361841, "grad_norm": 0.9131274819374084, "learning_rate": 1.843480016618382e-05, "loss": 0.0806, "step": 25052 }, { "epoch": 0.44367703117321255, "grad_norm": 0.6525949239730835, "learning_rate": 1.8433962656204423e-05, "loss": 0.0828, "step": 25053 }, { "epoch": 0.443694740710241, "grad_norm": 0.5570206642150879, "learning_rate": 1.8433125134927635e-05, "loss": 0.1005, "step": 25054 }, { "epoch": 0.4437124502472694, "grad_norm": 0.9266394972801208, "learning_rate": 1.8432287602356206e-05, "loss": 0.0936, "step": 25055 }, { "epoch": 0.44373015978429786, "grad_norm": 0.6447847485542297, "learning_rate": 1.8431450058492905e-05, "loss": 0.0677, "step": 25056 }, { "epoch": 0.44374786932132626, "grad_norm": 0.5238540768623352, "learning_rate": 1.8430612503340478e-05, "loss": 0.0536, "step": 25057 }, { "epoch": 0.4437655788583547, "grad_norm": 0.640108585357666, "learning_rate": 1.842977493690169e-05, "loss": 0.0671, "step": 25058 }, { "epoch": 0.4437832883953831, "grad_norm": 0.6013476252555847, "learning_rate": 1.842893735917928e-05, "loss": 0.0635, "step": 25059 }, { "epoch": 0.44380099793241157, "grad_norm": 1.265446662902832, "learning_rate": 1.8428099770176015e-05, "loss": 0.0653, "step": 25060 }, { "epoch": 0.44381870746943997, "grad_norm": 0.5431205034255981, "learning_rate": 1.8427262169894648e-05, "loss": 0.0731, "step": 25061 }, { "epoch": 0.4438364170064684, "grad_norm": 0.4533165395259857, "learning_rate": 1.8426424558337938e-05, "loss": 0.0822, "step": 25062 }, { "epoch": 0.4438541265434968, "grad_norm": 0.8947665095329285, "learning_rate": 1.842558693550864e-05, "loss": 0.0745, "step": 25063 }, { "epoch": 0.4438718360805253, "grad_norm": 0.673258364200592, "learning_rate": 1.84247493014095e-05, "loss": 0.0768, "step": 25064 }, { "epoch": 0.44388954561755367, "grad_norm": 0.5422803163528442, "learning_rate": 1.8423911656043282e-05, "loss": 0.0714, "step": 25065 }, { "epoch": 0.4439072551545821, "grad_norm": 0.720122754573822, "learning_rate": 1.8423073999412744e-05, "loss": 0.062, "step": 25066 }, { "epoch": 0.4439249646916105, "grad_norm": 0.9929726123809814, "learning_rate": 1.842223633152064e-05, "loss": 0.0565, "step": 25067 }, { "epoch": 0.443942674228639, "grad_norm": 0.811877429485321, "learning_rate": 1.8421398652369727e-05, "loss": 0.1221, "step": 25068 }, { "epoch": 0.44396038376566743, "grad_norm": 0.7876613140106201, "learning_rate": 1.8420560961962753e-05, "loss": 0.0852, "step": 25069 }, { "epoch": 0.44397809330269583, "grad_norm": 0.6083772778511047, "learning_rate": 1.841972326030248e-05, "loss": 0.0661, "step": 25070 }, { "epoch": 0.4439958028397243, "grad_norm": 0.58327317237854, "learning_rate": 1.8418885547391664e-05, "loss": 0.055, "step": 25071 }, { "epoch": 0.4440135123767527, "grad_norm": 0.7218049764633179, "learning_rate": 1.8418047823233064e-05, "loss": 0.0852, "step": 25072 }, { "epoch": 0.44403122191378114, "grad_norm": 1.055747628211975, "learning_rate": 1.8417210087829427e-05, "loss": 0.0718, "step": 25073 }, { "epoch": 0.44404893145080954, "grad_norm": 0.7192106246948242, "learning_rate": 1.8416372341183515e-05, "loss": 0.0858, "step": 25074 }, { "epoch": 0.444066640987838, "grad_norm": 0.6582986116409302, "learning_rate": 1.8415534583298082e-05, "loss": 0.0816, "step": 25075 }, { "epoch": 0.4440843505248664, "grad_norm": 0.47320979833602905, "learning_rate": 1.8414696814175893e-05, "loss": 0.0615, "step": 25076 }, { "epoch": 0.44410206006189484, "grad_norm": 0.9501416683197021, "learning_rate": 1.8413859033819692e-05, "loss": 0.1309, "step": 25077 }, { "epoch": 0.44411976959892324, "grad_norm": 0.9037710428237915, "learning_rate": 1.841302124223224e-05, "loss": 0.079, "step": 25078 }, { "epoch": 0.4441374791359517, "grad_norm": 0.7384297251701355, "learning_rate": 1.8412183439416295e-05, "loss": 0.0647, "step": 25079 }, { "epoch": 0.4441551886729801, "grad_norm": 1.017478108406067, "learning_rate": 1.841134562537461e-05, "loss": 0.0703, "step": 25080 }, { "epoch": 0.44417289821000855, "grad_norm": 0.6341410279273987, "learning_rate": 1.8410507800109946e-05, "loss": 0.0779, "step": 25081 }, { "epoch": 0.44419060774703695, "grad_norm": 0.6010369658470154, "learning_rate": 1.8409669963625053e-05, "loss": 0.0902, "step": 25082 }, { "epoch": 0.4442083172840654, "grad_norm": 0.6279956102371216, "learning_rate": 1.8408832115922693e-05, "loss": 0.0719, "step": 25083 }, { "epoch": 0.44422602682109386, "grad_norm": 0.6133694648742676, "learning_rate": 1.8407994257005616e-05, "loss": 0.0544, "step": 25084 }, { "epoch": 0.44424373635812225, "grad_norm": 0.5438562631607056, "learning_rate": 1.8407156386876588e-05, "loss": 0.053, "step": 25085 }, { "epoch": 0.4442614458951507, "grad_norm": 0.5560295581817627, "learning_rate": 1.8406318505538356e-05, "loss": 0.0987, "step": 25086 }, { "epoch": 0.4442791554321791, "grad_norm": 1.0152325630187988, "learning_rate": 1.8405480612993682e-05, "loss": 0.0973, "step": 25087 }, { "epoch": 0.44429686496920756, "grad_norm": 0.6377537250518799, "learning_rate": 1.840464270924532e-05, "loss": 0.0733, "step": 25088 }, { "epoch": 0.44431457450623596, "grad_norm": 0.8655478358268738, "learning_rate": 1.8403804794296028e-05, "loss": 0.0819, "step": 25089 }, { "epoch": 0.4443322840432644, "grad_norm": 0.790873646736145, "learning_rate": 1.8402966868148567e-05, "loss": 0.0608, "step": 25090 }, { "epoch": 0.4443499935802928, "grad_norm": 0.6084504127502441, "learning_rate": 1.8402128930805683e-05, "loss": 0.0691, "step": 25091 }, { "epoch": 0.44436770311732127, "grad_norm": 1.1224149465560913, "learning_rate": 1.8401290982270145e-05, "loss": 0.0755, "step": 25092 }, { "epoch": 0.44438541265434967, "grad_norm": 0.6257543563842773, "learning_rate": 1.84004530225447e-05, "loss": 0.0628, "step": 25093 }, { "epoch": 0.4444031221913781, "grad_norm": 0.7085097432136536, "learning_rate": 1.8399615051632106e-05, "loss": 0.0793, "step": 25094 }, { "epoch": 0.4444208317284065, "grad_norm": 0.8521304726600647, "learning_rate": 1.8398777069535128e-05, "loss": 0.0768, "step": 25095 }, { "epoch": 0.44443854126543497, "grad_norm": 0.7351583242416382, "learning_rate": 1.839793907625651e-05, "loss": 0.0815, "step": 25096 }, { "epoch": 0.44445625080246337, "grad_norm": 0.7963641881942749, "learning_rate": 1.8397101071799025e-05, "loss": 0.0497, "step": 25097 }, { "epoch": 0.4444739603394918, "grad_norm": 0.9726858139038086, "learning_rate": 1.8396263056165417e-05, "loss": 0.0746, "step": 25098 }, { "epoch": 0.4444916698765203, "grad_norm": 0.9007193446159363, "learning_rate": 1.8395425029358445e-05, "loss": 0.1169, "step": 25099 }, { "epoch": 0.4445093794135487, "grad_norm": 0.66725754737854, "learning_rate": 1.839458699138087e-05, "loss": 0.1024, "step": 25100 }, { "epoch": 0.44452708895057713, "grad_norm": 0.5156370401382446, "learning_rate": 1.839374894223545e-05, "loss": 0.0626, "step": 25101 }, { "epoch": 0.44454479848760553, "grad_norm": 0.9107809662818909, "learning_rate": 1.8392910881924937e-05, "loss": 0.0729, "step": 25102 }, { "epoch": 0.444562508024634, "grad_norm": 1.0510742664337158, "learning_rate": 1.8392072810452084e-05, "loss": 0.0811, "step": 25103 }, { "epoch": 0.4445802175616624, "grad_norm": 0.9567421078681946, "learning_rate": 1.839123472781966e-05, "loss": 0.0994, "step": 25104 }, { "epoch": 0.44459792709869084, "grad_norm": 1.1858211755752563, "learning_rate": 1.8390396634030417e-05, "loss": 0.1014, "step": 25105 }, { "epoch": 0.44461563663571924, "grad_norm": 0.7913001775741577, "learning_rate": 1.8389558529087112e-05, "loss": 0.0678, "step": 25106 }, { "epoch": 0.4446333461727477, "grad_norm": 0.6900132298469543, "learning_rate": 1.8388720412992503e-05, "loss": 0.0671, "step": 25107 }, { "epoch": 0.4446510557097761, "grad_norm": 0.6542088985443115, "learning_rate": 1.8387882285749348e-05, "loss": 0.0537, "step": 25108 }, { "epoch": 0.44466876524680454, "grad_norm": 0.5329679250717163, "learning_rate": 1.83870441473604e-05, "loss": 0.0711, "step": 25109 }, { "epoch": 0.44468647478383294, "grad_norm": 0.8728355169296265, "learning_rate": 1.8386205997828418e-05, "loss": 0.09, "step": 25110 }, { "epoch": 0.4447041843208614, "grad_norm": 0.5801078081130981, "learning_rate": 1.8385367837156165e-05, "loss": 0.085, "step": 25111 }, { "epoch": 0.4447218938578898, "grad_norm": 0.8834083080291748, "learning_rate": 1.838452966534639e-05, "loss": 0.0687, "step": 25112 }, { "epoch": 0.44473960339491825, "grad_norm": 0.9032697677612305, "learning_rate": 1.8383691482401855e-05, "loss": 0.0975, "step": 25113 }, { "epoch": 0.4447573129319467, "grad_norm": 0.9684724807739258, "learning_rate": 1.8382853288325317e-05, "loss": 0.0796, "step": 25114 }, { "epoch": 0.4447750224689751, "grad_norm": 0.3517744243144989, "learning_rate": 1.8382015083119542e-05, "loss": 0.0529, "step": 25115 }, { "epoch": 0.44479273200600355, "grad_norm": 0.6631702780723572, "learning_rate": 1.838117686678727e-05, "loss": 0.0884, "step": 25116 }, { "epoch": 0.44481044154303195, "grad_norm": 0.5588436126708984, "learning_rate": 1.838033863933127e-05, "loss": 0.0983, "step": 25117 }, { "epoch": 0.4448281510800604, "grad_norm": 0.7701961398124695, "learning_rate": 1.8379500400754302e-05, "loss": 0.0717, "step": 25118 }, { "epoch": 0.4448458606170888, "grad_norm": 0.7248483896255493, "learning_rate": 1.8378662151059114e-05, "loss": 0.1014, "step": 25119 }, { "epoch": 0.44486357015411726, "grad_norm": 0.5978619456291199, "learning_rate": 1.8377823890248475e-05, "loss": 0.0569, "step": 25120 }, { "epoch": 0.44488127969114566, "grad_norm": 0.29789137840270996, "learning_rate": 1.8376985618325133e-05, "loss": 0.0958, "step": 25121 }, { "epoch": 0.4448989892281741, "grad_norm": 0.6833857893943787, "learning_rate": 1.837614733529185e-05, "loss": 0.0903, "step": 25122 }, { "epoch": 0.4449166987652025, "grad_norm": 0.8321934342384338, "learning_rate": 1.8375309041151386e-05, "loss": 0.0882, "step": 25123 }, { "epoch": 0.44493440830223097, "grad_norm": 1.07645845413208, "learning_rate": 1.8374470735906493e-05, "loss": 0.0857, "step": 25124 }, { "epoch": 0.44495211783925936, "grad_norm": 0.6174831986427307, "learning_rate": 1.837363241955994e-05, "loss": 0.084, "step": 25125 }, { "epoch": 0.4449698273762878, "grad_norm": 0.7927865386009216, "learning_rate": 1.837279409211447e-05, "loss": 0.0965, "step": 25126 }, { "epoch": 0.4449875369133162, "grad_norm": 0.5263915657997131, "learning_rate": 1.837195575357285e-05, "loss": 0.0723, "step": 25127 }, { "epoch": 0.44500524645034467, "grad_norm": 0.7579562067985535, "learning_rate": 1.8371117403937833e-05, "loss": 0.0549, "step": 25128 }, { "epoch": 0.4450229559873731, "grad_norm": 1.0330170392990112, "learning_rate": 1.837027904321219e-05, "loss": 0.0805, "step": 25129 }, { "epoch": 0.4450406655244015, "grad_norm": 0.6061210036277771, "learning_rate": 1.8369440671398663e-05, "loss": 0.0624, "step": 25130 }, { "epoch": 0.44505837506143, "grad_norm": 0.4907524585723877, "learning_rate": 1.8368602288500023e-05, "loss": 0.068, "step": 25131 }, { "epoch": 0.4450760845984584, "grad_norm": 0.6484152674674988, "learning_rate": 1.8367763894519013e-05, "loss": 0.0939, "step": 25132 }, { "epoch": 0.44509379413548683, "grad_norm": 0.919545590877533, "learning_rate": 1.8366925489458406e-05, "loss": 0.1056, "step": 25133 }, { "epoch": 0.44511150367251523, "grad_norm": 0.6431088447570801, "learning_rate": 1.8366087073320955e-05, "loss": 0.0865, "step": 25134 }, { "epoch": 0.4451292132095437, "grad_norm": 0.4927597641944885, "learning_rate": 1.8365248646109417e-05, "loss": 0.0982, "step": 25135 }, { "epoch": 0.4451469227465721, "grad_norm": 0.5187059640884399, "learning_rate": 1.8364410207826554e-05, "loss": 0.0921, "step": 25136 }, { "epoch": 0.44516463228360054, "grad_norm": 0.6210819482803345, "learning_rate": 1.8363571758475116e-05, "loss": 0.0969, "step": 25137 }, { "epoch": 0.44518234182062894, "grad_norm": 0.8380640149116516, "learning_rate": 1.8362733298057873e-05, "loss": 0.0841, "step": 25138 }, { "epoch": 0.4452000513576574, "grad_norm": 0.5474704504013062, "learning_rate": 1.836189482657757e-05, "loss": 0.0743, "step": 25139 }, { "epoch": 0.4452177608946858, "grad_norm": 0.4763500690460205, "learning_rate": 1.8361056344036983e-05, "loss": 0.0579, "step": 25140 }, { "epoch": 0.44523547043171424, "grad_norm": 0.7785052061080933, "learning_rate": 1.8360217850438854e-05, "loss": 0.0971, "step": 25141 }, { "epoch": 0.44525317996874264, "grad_norm": 0.4733698070049286, "learning_rate": 1.835937934578595e-05, "loss": 0.0615, "step": 25142 }, { "epoch": 0.4452708895057711, "grad_norm": 0.3697771430015564, "learning_rate": 1.835854083008103e-05, "loss": 0.0552, "step": 25143 }, { "epoch": 0.44528859904279955, "grad_norm": 0.5663526654243469, "learning_rate": 1.8357702303326846e-05, "loss": 0.121, "step": 25144 }, { "epoch": 0.44530630857982795, "grad_norm": 0.6468303799629211, "learning_rate": 1.8356863765526164e-05, "loss": 0.0592, "step": 25145 }, { "epoch": 0.4453240181168564, "grad_norm": 0.9541885852813721, "learning_rate": 1.8356025216681738e-05, "loss": 0.0948, "step": 25146 }, { "epoch": 0.4453417276538848, "grad_norm": 0.7292044162750244, "learning_rate": 1.835518665679633e-05, "loss": 0.0766, "step": 25147 }, { "epoch": 0.44535943719091325, "grad_norm": 0.5443375110626221, "learning_rate": 1.8354348085872694e-05, "loss": 0.0543, "step": 25148 }, { "epoch": 0.44537714672794165, "grad_norm": 0.5633822679519653, "learning_rate": 1.8353509503913596e-05, "loss": 0.0637, "step": 25149 }, { "epoch": 0.4453948562649701, "grad_norm": 0.6662988066673279, "learning_rate": 1.835267091092179e-05, "loss": 0.0813, "step": 25150 }, { "epoch": 0.4454125658019985, "grad_norm": 0.4158371090888977, "learning_rate": 1.835183230690003e-05, "loss": 0.0508, "step": 25151 }, { "epoch": 0.44543027533902696, "grad_norm": 0.44164276123046875, "learning_rate": 1.8350993691851092e-05, "loss": 0.0498, "step": 25152 }, { "epoch": 0.44544798487605536, "grad_norm": 0.669556736946106, "learning_rate": 1.8350155065777715e-05, "loss": 0.0647, "step": 25153 }, { "epoch": 0.4454656944130838, "grad_norm": 0.9790463447570801, "learning_rate": 1.834931642868267e-05, "loss": 0.0933, "step": 25154 }, { "epoch": 0.4454834039501122, "grad_norm": 0.8021271228790283, "learning_rate": 1.8348477780568714e-05, "loss": 0.0926, "step": 25155 }, { "epoch": 0.44550111348714067, "grad_norm": 0.8547641038894653, "learning_rate": 1.8347639121438602e-05, "loss": 0.085, "step": 25156 }, { "epoch": 0.44551882302416906, "grad_norm": 0.8656014204025269, "learning_rate": 1.8346800451295097e-05, "loss": 0.0765, "step": 25157 }, { "epoch": 0.4455365325611975, "grad_norm": 0.6990315318107605, "learning_rate": 1.8345961770140957e-05, "loss": 0.0574, "step": 25158 }, { "epoch": 0.44555424209822597, "grad_norm": 0.7441391348838806, "learning_rate": 1.8345123077978942e-05, "loss": 0.0799, "step": 25159 }, { "epoch": 0.44557195163525437, "grad_norm": 0.7251363396644592, "learning_rate": 1.8344284374811808e-05, "loss": 0.0914, "step": 25160 }, { "epoch": 0.4455896611722828, "grad_norm": 0.5289754867553711, "learning_rate": 1.8343445660642315e-05, "loss": 0.0658, "step": 25161 }, { "epoch": 0.4456073707093112, "grad_norm": 0.6817154288291931, "learning_rate": 1.834260693547323e-05, "loss": 0.0722, "step": 25162 }, { "epoch": 0.4456250802463397, "grad_norm": 0.9698989391326904, "learning_rate": 1.83417681993073e-05, "loss": 0.095, "step": 25163 }, { "epoch": 0.4456427897833681, "grad_norm": 0.6319032907485962, "learning_rate": 1.8340929452147297e-05, "loss": 0.0797, "step": 25164 }, { "epoch": 0.44566049932039653, "grad_norm": 0.3918684720993042, "learning_rate": 1.8340090693995968e-05, "loss": 0.0768, "step": 25165 }, { "epoch": 0.44567820885742493, "grad_norm": 0.5172426104545593, "learning_rate": 1.8339251924856083e-05, "loss": 0.0735, "step": 25166 }, { "epoch": 0.4456959183944534, "grad_norm": 0.3643542528152466, "learning_rate": 1.833841314473039e-05, "loss": 0.0846, "step": 25167 }, { "epoch": 0.4457136279314818, "grad_norm": 0.5870198607444763, "learning_rate": 1.833757435362167e-05, "loss": 0.0561, "step": 25168 }, { "epoch": 0.44573133746851024, "grad_norm": 0.840255856513977, "learning_rate": 1.8336735551532655e-05, "loss": 0.0533, "step": 25169 }, { "epoch": 0.44574904700553863, "grad_norm": 0.6991400718688965, "learning_rate": 1.833589673846612e-05, "loss": 0.0793, "step": 25170 }, { "epoch": 0.4457667565425671, "grad_norm": 0.9828779101371765, "learning_rate": 1.8335057914424822e-05, "loss": 0.0608, "step": 25171 }, { "epoch": 0.4457844660795955, "grad_norm": 0.7124803066253662, "learning_rate": 1.8334219079411524e-05, "loss": 0.0943, "step": 25172 }, { "epoch": 0.44580217561662394, "grad_norm": 0.554457426071167, "learning_rate": 1.8333380233428984e-05, "loss": 0.0665, "step": 25173 }, { "epoch": 0.4458198851536524, "grad_norm": 1.0353882312774658, "learning_rate": 1.833254137647995e-05, "loss": 0.0689, "step": 25174 }, { "epoch": 0.4458375946906808, "grad_norm": 0.48841592669487, "learning_rate": 1.8331702508567202e-05, "loss": 0.046, "step": 25175 }, { "epoch": 0.44585530422770925, "grad_norm": 0.8255824446678162, "learning_rate": 1.833086362969348e-05, "loss": 0.0724, "step": 25176 }, { "epoch": 0.44587301376473765, "grad_norm": 0.6905122995376587, "learning_rate": 1.8330024739861566e-05, "loss": 0.1093, "step": 25177 }, { "epoch": 0.4458907233017661, "grad_norm": 0.6398375630378723, "learning_rate": 1.8329185839074198e-05, "loss": 0.0693, "step": 25178 }, { "epoch": 0.4459084328387945, "grad_norm": 0.7573913335800171, "learning_rate": 1.8328346927334152e-05, "loss": 0.0782, "step": 25179 }, { "epoch": 0.44592614237582295, "grad_norm": 0.545272946357727, "learning_rate": 1.8327508004644175e-05, "loss": 0.0717, "step": 25180 }, { "epoch": 0.44594385191285135, "grad_norm": 0.7868880033493042, "learning_rate": 1.832666907100704e-05, "loss": 0.0785, "step": 25181 }, { "epoch": 0.4459615614498798, "grad_norm": 0.698377251625061, "learning_rate": 1.8325830126425497e-05, "loss": 0.0978, "step": 25182 }, { "epoch": 0.4459792709869082, "grad_norm": 0.7638397216796875, "learning_rate": 1.8324991170902308e-05, "loss": 0.0574, "step": 25183 }, { "epoch": 0.44599698052393666, "grad_norm": 0.730964720249176, "learning_rate": 1.8324152204440233e-05, "loss": 0.0521, "step": 25184 }, { "epoch": 0.44601469006096506, "grad_norm": 1.2571094036102295, "learning_rate": 1.8323313227042035e-05, "loss": 0.0882, "step": 25185 }, { "epoch": 0.4460323995979935, "grad_norm": 0.691237211227417, "learning_rate": 1.832247423871047e-05, "loss": 0.0711, "step": 25186 }, { "epoch": 0.4460501091350219, "grad_norm": 0.7192988395690918, "learning_rate": 1.8321635239448303e-05, "loss": 0.0838, "step": 25187 }, { "epoch": 0.44606781867205036, "grad_norm": 0.7757291793823242, "learning_rate": 1.8320796229258298e-05, "loss": 0.0994, "step": 25188 }, { "epoch": 0.4460855282090788, "grad_norm": 0.5614396929740906, "learning_rate": 1.83199572081432e-05, "loss": 0.0733, "step": 25189 }, { "epoch": 0.4461032377461072, "grad_norm": 0.42385753989219666, "learning_rate": 1.8319118176105785e-05, "loss": 0.0692, "step": 25190 }, { "epoch": 0.44612094728313567, "grad_norm": 1.003568172454834, "learning_rate": 1.8318279133148805e-05, "loss": 0.1001, "step": 25191 }, { "epoch": 0.44613865682016407, "grad_norm": 0.688235342502594, "learning_rate": 1.8317440079275024e-05, "loss": 0.1241, "step": 25192 }, { "epoch": 0.4461563663571925, "grad_norm": 0.5652799010276794, "learning_rate": 1.83166010144872e-05, "loss": 0.0838, "step": 25193 }, { "epoch": 0.4461740758942209, "grad_norm": 2.014021635055542, "learning_rate": 1.8315761938788088e-05, "loss": 0.0861, "step": 25194 }, { "epoch": 0.4461917854312494, "grad_norm": 0.5622841715812683, "learning_rate": 1.8314922852180458e-05, "loss": 0.0762, "step": 25195 }, { "epoch": 0.4462094949682778, "grad_norm": 0.7333006858825684, "learning_rate": 1.8314083754667072e-05, "loss": 0.0859, "step": 25196 }, { "epoch": 0.44622720450530623, "grad_norm": 0.44691526889801025, "learning_rate": 1.831324464625068e-05, "loss": 0.0938, "step": 25197 }, { "epoch": 0.44624491404233463, "grad_norm": 0.8176665902137756, "learning_rate": 1.831240552693405e-05, "loss": 0.0781, "step": 25198 }, { "epoch": 0.4462626235793631, "grad_norm": 0.6437193751335144, "learning_rate": 1.8311566396719942e-05, "loss": 0.0785, "step": 25199 }, { "epoch": 0.4462803331163915, "grad_norm": 0.635145902633667, "learning_rate": 1.8310727255611116e-05, "loss": 0.0899, "step": 25200 }, { "epoch": 0.44629804265341994, "grad_norm": 0.6658564805984497, "learning_rate": 1.830988810361033e-05, "loss": 0.054, "step": 25201 }, { "epoch": 0.44631575219044833, "grad_norm": 0.6210757493972778, "learning_rate": 1.830904894072035e-05, "loss": 0.064, "step": 25202 }, { "epoch": 0.4463334617274768, "grad_norm": 0.8620787262916565, "learning_rate": 1.830820976694393e-05, "loss": 0.0783, "step": 25203 }, { "epoch": 0.44635117126450524, "grad_norm": 0.8189516067504883, "learning_rate": 1.8307370582283833e-05, "loss": 0.0761, "step": 25204 }, { "epoch": 0.44636888080153364, "grad_norm": 0.8856334090232849, "learning_rate": 1.8306531386742826e-05, "loss": 0.0776, "step": 25205 }, { "epoch": 0.4463865903385621, "grad_norm": 0.9241918921470642, "learning_rate": 1.830569218032366e-05, "loss": 0.1027, "step": 25206 }, { "epoch": 0.4464042998755905, "grad_norm": 0.8302900195121765, "learning_rate": 1.8304852963029112e-05, "loss": 0.0935, "step": 25207 }, { "epoch": 0.44642200941261895, "grad_norm": 0.6390017867088318, "learning_rate": 1.8304013734861917e-05, "loss": 0.0828, "step": 25208 }, { "epoch": 0.44643971894964735, "grad_norm": 0.7113221883773804, "learning_rate": 1.830317449582486e-05, "loss": 0.0782, "step": 25209 }, { "epoch": 0.4464574284866758, "grad_norm": 1.0375488996505737, "learning_rate": 1.830233524592069e-05, "loss": 0.0811, "step": 25210 }, { "epoch": 0.4464751380237042, "grad_norm": 0.950631856918335, "learning_rate": 1.8301495985152176e-05, "loss": 0.0799, "step": 25211 }, { "epoch": 0.44649284756073265, "grad_norm": 0.9139246344566345, "learning_rate": 1.8300656713522068e-05, "loss": 0.0762, "step": 25212 }, { "epoch": 0.44651055709776105, "grad_norm": 0.7856965661048889, "learning_rate": 1.8299817431033136e-05, "loss": 0.1049, "step": 25213 }, { "epoch": 0.4465282666347895, "grad_norm": 0.5922976732254028, "learning_rate": 1.8298978137688138e-05, "loss": 0.0905, "step": 25214 }, { "epoch": 0.4465459761718179, "grad_norm": 0.5768937468528748, "learning_rate": 1.8298138833489837e-05, "loss": 0.1097, "step": 25215 }, { "epoch": 0.44656368570884636, "grad_norm": 0.5710002779960632, "learning_rate": 1.8297299518440992e-05, "loss": 0.0684, "step": 25216 }, { "epoch": 0.44658139524587476, "grad_norm": 0.4445195496082306, "learning_rate": 1.8296460192544366e-05, "loss": 0.0835, "step": 25217 }, { "epoch": 0.4465991047829032, "grad_norm": 0.9492349028587341, "learning_rate": 1.8295620855802716e-05, "loss": 0.0561, "step": 25218 }, { "epoch": 0.44661681431993167, "grad_norm": 0.9247875809669495, "learning_rate": 1.829478150821881e-05, "loss": 0.1192, "step": 25219 }, { "epoch": 0.44663452385696006, "grad_norm": 0.7328823804855347, "learning_rate": 1.8293942149795404e-05, "loss": 0.0928, "step": 25220 }, { "epoch": 0.4466522333939885, "grad_norm": 0.635376513004303, "learning_rate": 1.8293102780535265e-05, "loss": 0.0988, "step": 25221 }, { "epoch": 0.4466699429310169, "grad_norm": 0.8541421890258789, "learning_rate": 1.8292263400441148e-05, "loss": 0.0888, "step": 25222 }, { "epoch": 0.44668765246804537, "grad_norm": 1.5301152467727661, "learning_rate": 1.8291424009515817e-05, "loss": 0.1097, "step": 25223 }, { "epoch": 0.44670536200507377, "grad_norm": 0.8003528714179993, "learning_rate": 1.8290584607762034e-05, "loss": 0.0944, "step": 25224 }, { "epoch": 0.4467230715421022, "grad_norm": 0.5839651226997375, "learning_rate": 1.8289745195182563e-05, "loss": 0.0864, "step": 25225 }, { "epoch": 0.4467407810791306, "grad_norm": 0.6624751687049866, "learning_rate": 1.828890577178016e-05, "loss": 0.0565, "step": 25226 }, { "epoch": 0.4467584906161591, "grad_norm": 0.382144033908844, "learning_rate": 1.8288066337557587e-05, "loss": 0.0565, "step": 25227 }, { "epoch": 0.4467762001531875, "grad_norm": 0.7250545620918274, "learning_rate": 1.8287226892517615e-05, "loss": 0.0748, "step": 25228 }, { "epoch": 0.44679390969021593, "grad_norm": 0.5447733998298645, "learning_rate": 1.8286387436662997e-05, "loss": 0.1237, "step": 25229 }, { "epoch": 0.4468116192272443, "grad_norm": 1.0215733051300049, "learning_rate": 1.8285547969996498e-05, "loss": 0.1065, "step": 25230 }, { "epoch": 0.4468293287642728, "grad_norm": 0.5699619054794312, "learning_rate": 1.8284708492520873e-05, "loss": 0.0513, "step": 25231 }, { "epoch": 0.44684703830130124, "grad_norm": 0.8726946711540222, "learning_rate": 1.8283869004238893e-05, "loss": 0.0671, "step": 25232 }, { "epoch": 0.44686474783832963, "grad_norm": 0.6339903473854065, "learning_rate": 1.8283029505153316e-05, "loss": 0.098, "step": 25233 }, { "epoch": 0.4468824573753581, "grad_norm": 0.9162320494651794, "learning_rate": 1.8282189995266904e-05, "loss": 0.0973, "step": 25234 }, { "epoch": 0.4469001669123865, "grad_norm": 0.7661095261573792, "learning_rate": 1.8281350474582417e-05, "loss": 0.1013, "step": 25235 }, { "epoch": 0.44691787644941494, "grad_norm": 0.8188510537147522, "learning_rate": 1.8280510943102624e-05, "loss": 0.0663, "step": 25236 }, { "epoch": 0.44693558598644334, "grad_norm": 0.7436214089393616, "learning_rate": 1.8279671400830276e-05, "loss": 0.0726, "step": 25237 }, { "epoch": 0.4469532955234718, "grad_norm": 0.802143931388855, "learning_rate": 1.8278831847768145e-05, "loss": 0.0835, "step": 25238 }, { "epoch": 0.4469710050605002, "grad_norm": 0.5960385799407959, "learning_rate": 1.8277992283918984e-05, "loss": 0.0868, "step": 25239 }, { "epoch": 0.44698871459752865, "grad_norm": 0.7874083518981934, "learning_rate": 1.8277152709285563e-05, "loss": 0.073, "step": 25240 }, { "epoch": 0.44700642413455705, "grad_norm": 0.5918982028961182, "learning_rate": 1.8276313123870642e-05, "loss": 0.0626, "step": 25241 }, { "epoch": 0.4470241336715855, "grad_norm": 0.44416746497154236, "learning_rate": 1.827547352767698e-05, "loss": 0.0729, "step": 25242 }, { "epoch": 0.4470418432086139, "grad_norm": 0.7946100234985352, "learning_rate": 1.8274633920707344e-05, "loss": 0.0724, "step": 25243 }, { "epoch": 0.44705955274564235, "grad_norm": 0.7597132325172424, "learning_rate": 1.8273794302964496e-05, "loss": 0.1136, "step": 25244 }, { "epoch": 0.44707726228267075, "grad_norm": 0.7310827970504761, "learning_rate": 1.8272954674451187e-05, "loss": 0.0739, "step": 25245 }, { "epoch": 0.4470949718196992, "grad_norm": 0.5842251777648926, "learning_rate": 1.8272115035170196e-05, "loss": 0.0588, "step": 25246 }, { "epoch": 0.44711268135672766, "grad_norm": 1.0573451519012451, "learning_rate": 1.8271275385124272e-05, "loss": 0.1153, "step": 25247 }, { "epoch": 0.44713039089375606, "grad_norm": 0.7972801327705383, "learning_rate": 1.827043572431619e-05, "loss": 0.0766, "step": 25248 }, { "epoch": 0.4471481004307845, "grad_norm": 0.4805269241333008, "learning_rate": 1.8269596052748697e-05, "loss": 0.0767, "step": 25249 }, { "epoch": 0.4471658099678129, "grad_norm": 0.7390137314796448, "learning_rate": 1.826875637042457e-05, "loss": 0.0706, "step": 25250 }, { "epoch": 0.44718351950484136, "grad_norm": 0.6944484114646912, "learning_rate": 1.826791667734656e-05, "loss": 0.0527, "step": 25251 }, { "epoch": 0.44720122904186976, "grad_norm": 0.2834366261959076, "learning_rate": 1.8267076973517437e-05, "loss": 0.0404, "step": 25252 }, { "epoch": 0.4472189385788982, "grad_norm": 0.7042036056518555, "learning_rate": 1.8266237258939963e-05, "loss": 0.052, "step": 25253 }, { "epoch": 0.4472366481159266, "grad_norm": 0.3538850247859955, "learning_rate": 1.8265397533616897e-05, "loss": 0.0865, "step": 25254 }, { "epoch": 0.44725435765295507, "grad_norm": 0.7375470995903015, "learning_rate": 1.8264557797551007e-05, "loss": 0.059, "step": 25255 }, { "epoch": 0.44727206718998347, "grad_norm": 0.5271561145782471, "learning_rate": 1.8263718050745044e-05, "loss": 0.06, "step": 25256 }, { "epoch": 0.4472897767270119, "grad_norm": 0.7644521594047546, "learning_rate": 1.8262878293201784e-05, "loss": 0.111, "step": 25257 }, { "epoch": 0.4473074862640403, "grad_norm": 0.5033413171768188, "learning_rate": 1.8262038524923986e-05, "loss": 0.0474, "step": 25258 }, { "epoch": 0.4473251958010688, "grad_norm": 0.7474949955940247, "learning_rate": 1.8261198745914407e-05, "loss": 0.0644, "step": 25259 }, { "epoch": 0.4473429053380972, "grad_norm": 0.4090607166290283, "learning_rate": 1.8260358956175816e-05, "loss": 0.064, "step": 25260 }, { "epoch": 0.44736061487512563, "grad_norm": 0.7130523324012756, "learning_rate": 1.8259519155710974e-05, "loss": 0.0775, "step": 25261 }, { "epoch": 0.4473783244121541, "grad_norm": 0.495992511510849, "learning_rate": 1.8258679344522643e-05, "loss": 0.0736, "step": 25262 }, { "epoch": 0.4473960339491825, "grad_norm": 1.2990995645523071, "learning_rate": 1.825783952261359e-05, "loss": 0.0794, "step": 25263 }, { "epoch": 0.44741374348621094, "grad_norm": 0.4423438608646393, "learning_rate": 1.825699968998657e-05, "loss": 0.0644, "step": 25264 }, { "epoch": 0.44743145302323933, "grad_norm": 0.5984320640563965, "learning_rate": 1.8256159846644353e-05, "loss": 0.0664, "step": 25265 }, { "epoch": 0.4474491625602678, "grad_norm": 0.5481002926826477, "learning_rate": 1.8255319992589698e-05, "loss": 0.0586, "step": 25266 }, { "epoch": 0.4474668720972962, "grad_norm": 0.9523095488548279, "learning_rate": 1.8254480127825367e-05, "loss": 0.0827, "step": 25267 }, { "epoch": 0.44748458163432464, "grad_norm": 0.6256269812583923, "learning_rate": 1.8253640252354127e-05, "loss": 0.0837, "step": 25268 }, { "epoch": 0.44750229117135304, "grad_norm": 0.9258899092674255, "learning_rate": 1.825280036617875e-05, "loss": 0.1079, "step": 25269 }, { "epoch": 0.4475200007083815, "grad_norm": 0.7058687210083008, "learning_rate": 1.8251960469301974e-05, "loss": 0.1012, "step": 25270 }, { "epoch": 0.4475377102454099, "grad_norm": 0.8581541776657104, "learning_rate": 1.8251120561726586e-05, "loss": 0.0969, "step": 25271 }, { "epoch": 0.44755541978243835, "grad_norm": 0.5311824083328247, "learning_rate": 1.8250280643455333e-05, "loss": 0.0848, "step": 25272 }, { "epoch": 0.44757312931946674, "grad_norm": 0.6834692358970642, "learning_rate": 1.824944071449099e-05, "loss": 0.1074, "step": 25273 }, { "epoch": 0.4475908388564952, "grad_norm": 0.731860339641571, "learning_rate": 1.8248600774836314e-05, "loss": 0.0989, "step": 25274 }, { "epoch": 0.4476085483935236, "grad_norm": 0.7778656482696533, "learning_rate": 1.824776082449407e-05, "loss": 0.0576, "step": 25275 }, { "epoch": 0.44762625793055205, "grad_norm": 0.49186378717422485, "learning_rate": 1.8246920863467027e-05, "loss": 0.0627, "step": 25276 }, { "epoch": 0.4476439674675805, "grad_norm": 0.7631625533103943, "learning_rate": 1.8246080891757933e-05, "loss": 0.0694, "step": 25277 }, { "epoch": 0.4476616770046089, "grad_norm": 0.4090210199356079, "learning_rate": 1.8245240909369573e-05, "loss": 0.0455, "step": 25278 }, { "epoch": 0.44767938654163736, "grad_norm": 0.38104915618896484, "learning_rate": 1.8244400916304692e-05, "loss": 0.0594, "step": 25279 }, { "epoch": 0.44769709607866576, "grad_norm": 0.710049569606781, "learning_rate": 1.8243560912566057e-05, "loss": 0.1035, "step": 25280 }, { "epoch": 0.4477148056156942, "grad_norm": 0.6644420623779297, "learning_rate": 1.8242720898156436e-05, "loss": 0.0653, "step": 25281 }, { "epoch": 0.4477325151527226, "grad_norm": 0.6251183748245239, "learning_rate": 1.8241880873078595e-05, "loss": 0.069, "step": 25282 }, { "epoch": 0.44775022468975106, "grad_norm": 0.7959861755371094, "learning_rate": 1.8241040837335297e-05, "loss": 0.0968, "step": 25283 }, { "epoch": 0.44776793422677946, "grad_norm": 0.7294960618019104, "learning_rate": 1.8240200790929295e-05, "loss": 0.0935, "step": 25284 }, { "epoch": 0.4477856437638079, "grad_norm": 0.4283510148525238, "learning_rate": 1.823936073386336e-05, "loss": 0.043, "step": 25285 }, { "epoch": 0.4478033533008363, "grad_norm": 0.6457324624061584, "learning_rate": 1.823852066614026e-05, "loss": 0.0632, "step": 25286 }, { "epoch": 0.44782106283786477, "grad_norm": 0.8525435924530029, "learning_rate": 1.8237680587762754e-05, "loss": 0.0934, "step": 25287 }, { "epoch": 0.44783877237489317, "grad_norm": 0.4637197256088257, "learning_rate": 1.8236840498733603e-05, "loss": 0.0545, "step": 25288 }, { "epoch": 0.4478564819119216, "grad_norm": 0.34333735704421997, "learning_rate": 1.8236000399055575e-05, "loss": 0.0642, "step": 25289 }, { "epoch": 0.44787419144895, "grad_norm": 0.6041548252105713, "learning_rate": 1.8235160288731436e-05, "loss": 0.042, "step": 25290 }, { "epoch": 0.4478919009859785, "grad_norm": 0.6308373808860779, "learning_rate": 1.8234320167763946e-05, "loss": 0.0789, "step": 25291 }, { "epoch": 0.44790961052300693, "grad_norm": 0.8382940888404846, "learning_rate": 1.823348003615587e-05, "loss": 0.0902, "step": 25292 }, { "epoch": 0.4479273200600353, "grad_norm": 0.48039594292640686, "learning_rate": 1.823263989390997e-05, "loss": 0.047, "step": 25293 }, { "epoch": 0.4479450295970638, "grad_norm": 0.47625282406806946, "learning_rate": 1.823179974102901e-05, "loss": 0.074, "step": 25294 }, { "epoch": 0.4479627391340922, "grad_norm": 0.7683936953544617, "learning_rate": 1.8230959577515757e-05, "loss": 0.0647, "step": 25295 }, { "epoch": 0.44798044867112063, "grad_norm": 0.6219161748886108, "learning_rate": 1.823011940337298e-05, "loss": 0.0841, "step": 25296 }, { "epoch": 0.44799815820814903, "grad_norm": 0.6242360472679138, "learning_rate": 1.822927921860343e-05, "loss": 0.0942, "step": 25297 }, { "epoch": 0.4480158677451775, "grad_norm": 0.5102183818817139, "learning_rate": 1.8228439023209885e-05, "loss": 0.0896, "step": 25298 }, { "epoch": 0.4480335772822059, "grad_norm": 0.6609688401222229, "learning_rate": 1.8227598817195096e-05, "loss": 0.0827, "step": 25299 }, { "epoch": 0.44805128681923434, "grad_norm": 1.1164294481277466, "learning_rate": 1.822675860056183e-05, "loss": 0.0751, "step": 25300 }, { "epoch": 0.44806899635626274, "grad_norm": 1.1224697828292847, "learning_rate": 1.822591837331286e-05, "loss": 0.1236, "step": 25301 }, { "epoch": 0.4480867058932912, "grad_norm": 0.48195144534111023, "learning_rate": 1.8225078135450945e-05, "loss": 0.0715, "step": 25302 }, { "epoch": 0.4481044154303196, "grad_norm": 0.4364321529865265, "learning_rate": 1.8224237886978847e-05, "loss": 0.0532, "step": 25303 }, { "epoch": 0.44812212496734805, "grad_norm": 0.7333250641822815, "learning_rate": 1.822339762789933e-05, "loss": 0.0652, "step": 25304 }, { "epoch": 0.44813983450437644, "grad_norm": 0.7615711092948914, "learning_rate": 1.822255735821517e-05, "loss": 0.0676, "step": 25305 }, { "epoch": 0.4481575440414049, "grad_norm": 0.6403586268424988, "learning_rate": 1.8221717077929113e-05, "loss": 0.0872, "step": 25306 }, { "epoch": 0.44817525357843335, "grad_norm": 0.43913957476615906, "learning_rate": 1.8220876787043937e-05, "loss": 0.058, "step": 25307 }, { "epoch": 0.44819296311546175, "grad_norm": 0.5264655947685242, "learning_rate": 1.8220036485562403e-05, "loss": 0.0486, "step": 25308 }, { "epoch": 0.4482106726524902, "grad_norm": 0.6284526586532593, "learning_rate": 1.821919617348727e-05, "loss": 0.0751, "step": 25309 }, { "epoch": 0.4482283821895186, "grad_norm": 0.8681415915489197, "learning_rate": 1.821835585082131e-05, "loss": 0.091, "step": 25310 }, { "epoch": 0.44824609172654706, "grad_norm": 0.5013852715492249, "learning_rate": 1.8217515517567286e-05, "loss": 0.0581, "step": 25311 }, { "epoch": 0.44826380126357546, "grad_norm": 0.7441345453262329, "learning_rate": 1.821667517372796e-05, "loss": 0.0745, "step": 25312 }, { "epoch": 0.4482815108006039, "grad_norm": 0.6913418173789978, "learning_rate": 1.82158348193061e-05, "loss": 0.0779, "step": 25313 }, { "epoch": 0.4482992203376323, "grad_norm": 0.6213415861129761, "learning_rate": 1.8214994454304466e-05, "loss": 0.0817, "step": 25314 }, { "epoch": 0.44831692987466076, "grad_norm": 0.9980587959289551, "learning_rate": 1.8214154078725827e-05, "loss": 0.0994, "step": 25315 }, { "epoch": 0.44833463941168916, "grad_norm": 0.9692245721817017, "learning_rate": 1.8213313692572948e-05, "loss": 0.071, "step": 25316 }, { "epoch": 0.4483523489487176, "grad_norm": 0.7054484486579895, "learning_rate": 1.821247329584859e-05, "loss": 0.0383, "step": 25317 }, { "epoch": 0.448370058485746, "grad_norm": 0.5758234262466431, "learning_rate": 1.8211632888555517e-05, "loss": 0.0686, "step": 25318 }, { "epoch": 0.44838776802277447, "grad_norm": 0.738332986831665, "learning_rate": 1.8210792470696497e-05, "loss": 0.0658, "step": 25319 }, { "epoch": 0.44840547755980287, "grad_norm": 0.5530968904495239, "learning_rate": 1.8209952042274295e-05, "loss": 0.0562, "step": 25320 }, { "epoch": 0.4484231870968313, "grad_norm": 0.5234781503677368, "learning_rate": 1.8209111603291683e-05, "loss": 0.076, "step": 25321 }, { "epoch": 0.4484408966338598, "grad_norm": 0.5788922905921936, "learning_rate": 1.820827115375141e-05, "loss": 0.0392, "step": 25322 }, { "epoch": 0.4484586061708882, "grad_norm": 0.5761127471923828, "learning_rate": 1.820743069365625e-05, "loss": 0.0906, "step": 25323 }, { "epoch": 0.44847631570791663, "grad_norm": 0.8114427328109741, "learning_rate": 1.820659022300897e-05, "loss": 0.0711, "step": 25324 }, { "epoch": 0.448494025244945, "grad_norm": 0.20191581547260284, "learning_rate": 1.820574974181233e-05, "loss": 0.0364, "step": 25325 }, { "epoch": 0.4485117347819735, "grad_norm": 1.233905553817749, "learning_rate": 1.8204909250069104e-05, "loss": 0.095, "step": 25326 }, { "epoch": 0.4485294443190019, "grad_norm": 0.6798930168151855, "learning_rate": 1.8204068747782044e-05, "loss": 0.0419, "step": 25327 }, { "epoch": 0.44854715385603033, "grad_norm": 0.99443519115448, "learning_rate": 1.8203228234953924e-05, "loss": 0.0514, "step": 25328 }, { "epoch": 0.44856486339305873, "grad_norm": 0.8780547976493835, "learning_rate": 1.8202387711587505e-05, "loss": 0.0928, "step": 25329 }, { "epoch": 0.4485825729300872, "grad_norm": 0.6494779586791992, "learning_rate": 1.8201547177685558e-05, "loss": 0.0488, "step": 25330 }, { "epoch": 0.4486002824671156, "grad_norm": 0.6032947301864624, "learning_rate": 1.8200706633250844e-05, "loss": 0.0568, "step": 25331 }, { "epoch": 0.44861799200414404, "grad_norm": 0.7813467979431152, "learning_rate": 1.8199866078286124e-05, "loss": 0.0574, "step": 25332 }, { "epoch": 0.44863570154117244, "grad_norm": 0.7712026834487915, "learning_rate": 1.8199025512794175e-05, "loss": 0.1095, "step": 25333 }, { "epoch": 0.4486534110782009, "grad_norm": 0.6355705857276917, "learning_rate": 1.8198184936777753e-05, "loss": 0.0909, "step": 25334 }, { "epoch": 0.4486711206152293, "grad_norm": 0.5947937965393066, "learning_rate": 1.8197344350239626e-05, "loss": 0.0708, "step": 25335 }, { "epoch": 0.44868883015225774, "grad_norm": 0.24942989647388458, "learning_rate": 1.819650375318256e-05, "loss": 0.0549, "step": 25336 }, { "epoch": 0.4487065396892862, "grad_norm": 0.4225156605243683, "learning_rate": 1.8195663145609315e-05, "loss": 0.0912, "step": 25337 }, { "epoch": 0.4487242492263146, "grad_norm": 0.5815736055374146, "learning_rate": 1.8194822527522668e-05, "loss": 0.0456, "step": 25338 }, { "epoch": 0.44874195876334305, "grad_norm": 0.5714375972747803, "learning_rate": 1.8193981898925378e-05, "loss": 0.0762, "step": 25339 }, { "epoch": 0.44875966830037145, "grad_norm": 0.7285685539245605, "learning_rate": 1.819314125982021e-05, "loss": 0.0859, "step": 25340 }, { "epoch": 0.4487773778373999, "grad_norm": 0.5967934727668762, "learning_rate": 1.8192300610209927e-05, "loss": 0.1025, "step": 25341 }, { "epoch": 0.4487950873744283, "grad_norm": 0.5960914492607117, "learning_rate": 1.81914599500973e-05, "loss": 0.1058, "step": 25342 }, { "epoch": 0.44881279691145676, "grad_norm": 0.5018354058265686, "learning_rate": 1.8190619279485087e-05, "loss": 0.0818, "step": 25343 }, { "epoch": 0.44883050644848516, "grad_norm": 0.8215447068214417, "learning_rate": 1.8189778598376068e-05, "loss": 0.1012, "step": 25344 }, { "epoch": 0.4488482159855136, "grad_norm": 0.8862419128417969, "learning_rate": 1.8188937906772994e-05, "loss": 0.0507, "step": 25345 }, { "epoch": 0.448865925522542, "grad_norm": 0.606602132320404, "learning_rate": 1.8188097204678642e-05, "loss": 0.081, "step": 25346 }, { "epoch": 0.44888363505957046, "grad_norm": 0.7595340609550476, "learning_rate": 1.818725649209577e-05, "loss": 0.0669, "step": 25347 }, { "epoch": 0.44890134459659886, "grad_norm": 1.0037630796432495, "learning_rate": 1.8186415769027142e-05, "loss": 0.1094, "step": 25348 }, { "epoch": 0.4489190541336273, "grad_norm": 0.6941606402397156, "learning_rate": 1.8185575035475535e-05, "loss": 0.0807, "step": 25349 }, { "epoch": 0.4489367636706557, "grad_norm": 0.5292515754699707, "learning_rate": 1.8184734291443707e-05, "loss": 0.0889, "step": 25350 }, { "epoch": 0.44895447320768417, "grad_norm": 0.7401610016822815, "learning_rate": 1.8183893536934424e-05, "loss": 0.089, "step": 25351 }, { "epoch": 0.4489721827447126, "grad_norm": 0.6790474653244019, "learning_rate": 1.818305277195045e-05, "loss": 0.0847, "step": 25352 }, { "epoch": 0.448989892281741, "grad_norm": 0.6273956894874573, "learning_rate": 1.818221199649456e-05, "loss": 0.051, "step": 25353 }, { "epoch": 0.4490076018187695, "grad_norm": 0.6017305254936218, "learning_rate": 1.8181371210569513e-05, "loss": 0.0744, "step": 25354 }, { "epoch": 0.4490253113557979, "grad_norm": 0.838964581489563, "learning_rate": 1.8180530414178076e-05, "loss": 0.0949, "step": 25355 }, { "epoch": 0.44904302089282633, "grad_norm": 0.6500337719917297, "learning_rate": 1.8179689607323014e-05, "loss": 0.0486, "step": 25356 }, { "epoch": 0.4490607304298547, "grad_norm": 0.8972151279449463, "learning_rate": 1.8178848790007095e-05, "loss": 0.0756, "step": 25357 }, { "epoch": 0.4490784399668832, "grad_norm": 1.0072152614593506, "learning_rate": 1.817800796223309e-05, "loss": 0.1174, "step": 25358 }, { "epoch": 0.4490961495039116, "grad_norm": 0.5358304977416992, "learning_rate": 1.8177167124003752e-05, "loss": 0.0444, "step": 25359 }, { "epoch": 0.44911385904094003, "grad_norm": 0.7182134389877319, "learning_rate": 1.8176326275321865e-05, "loss": 0.091, "step": 25360 }, { "epoch": 0.44913156857796843, "grad_norm": 0.5122148990631104, "learning_rate": 1.817548541619018e-05, "loss": 0.0606, "step": 25361 }, { "epoch": 0.4491492781149969, "grad_norm": 1.0947953462600708, "learning_rate": 1.8174644546611466e-05, "loss": 0.0817, "step": 25362 }, { "epoch": 0.4491669876520253, "grad_norm": 0.5752490162849426, "learning_rate": 1.8173803666588502e-05, "loss": 0.0891, "step": 25363 }, { "epoch": 0.44918469718905374, "grad_norm": 0.7998279929161072, "learning_rate": 1.8172962776124034e-05, "loss": 0.0888, "step": 25364 }, { "epoch": 0.44920240672608214, "grad_norm": 0.5473750233650208, "learning_rate": 1.8172121875220848e-05, "loss": 0.0826, "step": 25365 }, { "epoch": 0.4492201162631106, "grad_norm": 0.5569798946380615, "learning_rate": 1.8171280963881697e-05, "loss": 0.0659, "step": 25366 }, { "epoch": 0.44923782580013905, "grad_norm": 0.7695556282997131, "learning_rate": 1.8170440042109356e-05, "loss": 0.0689, "step": 25367 }, { "epoch": 0.44925553533716744, "grad_norm": 0.36060312390327454, "learning_rate": 1.8169599109906584e-05, "loss": 0.0601, "step": 25368 }, { "epoch": 0.4492732448741959, "grad_norm": 0.6489096283912659, "learning_rate": 1.8168758167276156e-05, "loss": 0.0595, "step": 25369 }, { "epoch": 0.4492909544112243, "grad_norm": 1.3344635963439941, "learning_rate": 1.816791721422083e-05, "loss": 0.0698, "step": 25370 }, { "epoch": 0.44930866394825275, "grad_norm": 0.5577053427696228, "learning_rate": 1.8167076250743377e-05, "loss": 0.0875, "step": 25371 }, { "epoch": 0.44932637348528115, "grad_norm": 0.503040611743927, "learning_rate": 1.8166235276846567e-05, "loss": 0.0406, "step": 25372 }, { "epoch": 0.4493440830223096, "grad_norm": 1.9793469905853271, "learning_rate": 1.8165394292533163e-05, "loss": 0.0672, "step": 25373 }, { "epoch": 0.449361792559338, "grad_norm": 0.7214652299880981, "learning_rate": 1.816455329780593e-05, "loss": 0.0734, "step": 25374 }, { "epoch": 0.44937950209636646, "grad_norm": 1.3397037982940674, "learning_rate": 1.8163712292667637e-05, "loss": 0.0678, "step": 25375 }, { "epoch": 0.44939721163339486, "grad_norm": 0.8390594720840454, "learning_rate": 1.8162871277121048e-05, "loss": 0.069, "step": 25376 }, { "epoch": 0.4494149211704233, "grad_norm": 0.3684571385383606, "learning_rate": 1.8162030251168934e-05, "loss": 0.0846, "step": 25377 }, { "epoch": 0.4494326307074517, "grad_norm": 0.4503956437110901, "learning_rate": 1.8161189214814062e-05, "loss": 0.0595, "step": 25378 }, { "epoch": 0.44945034024448016, "grad_norm": 0.9251695871353149, "learning_rate": 1.8160348168059198e-05, "loss": 0.0799, "step": 25379 }, { "epoch": 0.44946804978150856, "grad_norm": 0.45156389474868774, "learning_rate": 1.8159507110907103e-05, "loss": 0.0525, "step": 25380 }, { "epoch": 0.449485759318537, "grad_norm": 0.7673372626304626, "learning_rate": 1.8158666043360555e-05, "loss": 0.0758, "step": 25381 }, { "epoch": 0.44950346885556547, "grad_norm": 0.4985158145427704, "learning_rate": 1.8157824965422312e-05, "loss": 0.0741, "step": 25382 }, { "epoch": 0.44952117839259387, "grad_norm": 0.5317308902740479, "learning_rate": 1.815698387709515e-05, "loss": 0.0596, "step": 25383 }, { "epoch": 0.4495388879296223, "grad_norm": 0.6212965250015259, "learning_rate": 1.815614277838182e-05, "loss": 0.0714, "step": 25384 }, { "epoch": 0.4495565974666507, "grad_norm": 0.43497470021247864, "learning_rate": 1.8155301669285104e-05, "loss": 0.0618, "step": 25385 }, { "epoch": 0.4495743070036792, "grad_norm": 0.7242089509963989, "learning_rate": 1.815446054980777e-05, "loss": 0.096, "step": 25386 }, { "epoch": 0.4495920165407076, "grad_norm": 0.5079663395881653, "learning_rate": 1.8153619419952573e-05, "loss": 0.0716, "step": 25387 }, { "epoch": 0.449609726077736, "grad_norm": 0.6275439262390137, "learning_rate": 1.8152778279722294e-05, "loss": 0.0768, "step": 25388 }, { "epoch": 0.4496274356147644, "grad_norm": 0.9959443211555481, "learning_rate": 1.8151937129119686e-05, "loss": 0.0691, "step": 25389 }, { "epoch": 0.4496451451517929, "grad_norm": 0.6043567657470703, "learning_rate": 1.8151095968147525e-05, "loss": 0.0687, "step": 25390 }, { "epoch": 0.4496628546888213, "grad_norm": 0.31232136487960815, "learning_rate": 1.8150254796808575e-05, "loss": 0.0715, "step": 25391 }, { "epoch": 0.44968056422584973, "grad_norm": 0.8289013504981995, "learning_rate": 1.814941361510561e-05, "loss": 0.066, "step": 25392 }, { "epoch": 0.44969827376287813, "grad_norm": 0.5502950549125671, "learning_rate": 1.8148572423041394e-05, "loss": 0.0686, "step": 25393 }, { "epoch": 0.4497159832999066, "grad_norm": 0.7381714582443237, "learning_rate": 1.814773122061869e-05, "loss": 0.0695, "step": 25394 }, { "epoch": 0.449733692836935, "grad_norm": 0.526109516620636, "learning_rate": 1.814689000784027e-05, "loss": 0.0583, "step": 25395 }, { "epoch": 0.44975140237396344, "grad_norm": 0.5849776864051819, "learning_rate": 1.81460487847089e-05, "loss": 0.0831, "step": 25396 }, { "epoch": 0.4497691119109919, "grad_norm": 1.0504626035690308, "learning_rate": 1.8145207551227347e-05, "loss": 0.1452, "step": 25397 }, { "epoch": 0.4497868214480203, "grad_norm": 0.5552718639373779, "learning_rate": 1.814436630739838e-05, "loss": 0.0848, "step": 25398 }, { "epoch": 0.44980453098504874, "grad_norm": 0.9855126142501831, "learning_rate": 1.8143525053224764e-05, "loss": 0.0828, "step": 25399 }, { "epoch": 0.44982224052207714, "grad_norm": 0.5491417646408081, "learning_rate": 1.814268378870927e-05, "loss": 0.0561, "step": 25400 }, { "epoch": 0.4498399500591056, "grad_norm": 0.6510876417160034, "learning_rate": 1.8141842513854663e-05, "loss": 0.0656, "step": 25401 }, { "epoch": 0.449857659596134, "grad_norm": 0.5004521012306213, "learning_rate": 1.8141001228663714e-05, "loss": 0.0541, "step": 25402 }, { "epoch": 0.44987536913316245, "grad_norm": 0.4962078034877777, "learning_rate": 1.8140159933139187e-05, "loss": 0.0837, "step": 25403 }, { "epoch": 0.44989307867019085, "grad_norm": 0.6380445957183838, "learning_rate": 1.8139318627283853e-05, "loss": 0.0763, "step": 25404 }, { "epoch": 0.4499107882072193, "grad_norm": 0.6291643381118774, "learning_rate": 1.8138477311100476e-05, "loss": 0.0959, "step": 25405 }, { "epoch": 0.4499284977442477, "grad_norm": 0.6632918119430542, "learning_rate": 1.813763598459183e-05, "loss": 0.0879, "step": 25406 }, { "epoch": 0.44994620728127616, "grad_norm": 0.7368097901344299, "learning_rate": 1.8136794647760677e-05, "loss": 0.0874, "step": 25407 }, { "epoch": 0.44996391681830455, "grad_norm": 0.6829839944839478, "learning_rate": 1.8135953300609786e-05, "loss": 0.0699, "step": 25408 }, { "epoch": 0.449981626355333, "grad_norm": 0.9352108836174011, "learning_rate": 1.8135111943141922e-05, "loss": 0.0583, "step": 25409 }, { "epoch": 0.4499993358923614, "grad_norm": 1.1677427291870117, "learning_rate": 1.8134270575359863e-05, "loss": 0.0827, "step": 25410 }, { "epoch": 0.45001704542938986, "grad_norm": 0.6486325860023499, "learning_rate": 1.8133429197266368e-05, "loss": 0.0636, "step": 25411 }, { "epoch": 0.4500347549664183, "grad_norm": 0.4032415449619293, "learning_rate": 1.8132587808864207e-05, "loss": 0.0672, "step": 25412 }, { "epoch": 0.4500524645034467, "grad_norm": 0.7065995335578918, "learning_rate": 1.8131746410156152e-05, "loss": 0.057, "step": 25413 }, { "epoch": 0.45007017404047517, "grad_norm": 0.7367731928825378, "learning_rate": 1.8130905001144965e-05, "loss": 0.0896, "step": 25414 }, { "epoch": 0.45008788357750357, "grad_norm": 0.7141711115837097, "learning_rate": 1.813006358183342e-05, "loss": 0.0578, "step": 25415 }, { "epoch": 0.450105593114532, "grad_norm": 0.6079898476600647, "learning_rate": 1.812922215222428e-05, "loss": 0.0614, "step": 25416 }, { "epoch": 0.4501233026515604, "grad_norm": 0.6212625503540039, "learning_rate": 1.8128380712320322e-05, "loss": 0.0735, "step": 25417 }, { "epoch": 0.4501410121885889, "grad_norm": 0.7475617527961731, "learning_rate": 1.8127539262124302e-05, "loss": 0.1089, "step": 25418 }, { "epoch": 0.4501587217256173, "grad_norm": 1.0371148586273193, "learning_rate": 1.812669780163899e-05, "loss": 0.1211, "step": 25419 }, { "epoch": 0.4501764312626457, "grad_norm": 0.2870621979236603, "learning_rate": 1.8125856330867165e-05, "loss": 0.0444, "step": 25420 }, { "epoch": 0.4501941407996741, "grad_norm": 0.7288498878479004, "learning_rate": 1.812501484981159e-05, "loss": 0.0673, "step": 25421 }, { "epoch": 0.4502118503367026, "grad_norm": 0.8762491345405579, "learning_rate": 1.812417335847503e-05, "loss": 0.0926, "step": 25422 }, { "epoch": 0.450229559873731, "grad_norm": 1.004276990890503, "learning_rate": 1.8123331856860252e-05, "loss": 0.0884, "step": 25423 }, { "epoch": 0.45024726941075943, "grad_norm": 0.8179444074630737, "learning_rate": 1.8122490344970032e-05, "loss": 0.0848, "step": 25424 }, { "epoch": 0.45026497894778783, "grad_norm": 0.8500927686691284, "learning_rate": 1.812164882280713e-05, "loss": 0.0901, "step": 25425 }, { "epoch": 0.4502826884848163, "grad_norm": 0.7849419713020325, "learning_rate": 1.8120807290374326e-05, "loss": 0.0801, "step": 25426 }, { "epoch": 0.45030039802184474, "grad_norm": 1.1686656475067139, "learning_rate": 1.811996574767438e-05, "loss": 0.0969, "step": 25427 }, { "epoch": 0.45031810755887314, "grad_norm": 0.7957327365875244, "learning_rate": 1.811912419471006e-05, "loss": 0.0591, "step": 25428 }, { "epoch": 0.4503358170959016, "grad_norm": 0.5559796094894409, "learning_rate": 1.8118282631484136e-05, "loss": 0.0814, "step": 25429 }, { "epoch": 0.45035352663293, "grad_norm": 0.7447205781936646, "learning_rate": 1.811744105799938e-05, "loss": 0.0927, "step": 25430 }, { "epoch": 0.45037123616995844, "grad_norm": 0.8146765828132629, "learning_rate": 1.811659947425856e-05, "loss": 0.0753, "step": 25431 }, { "epoch": 0.45038894570698684, "grad_norm": 0.555632472038269, "learning_rate": 1.811575788026444e-05, "loss": 0.0838, "step": 25432 }, { "epoch": 0.4504066552440153, "grad_norm": 0.7836970090866089, "learning_rate": 1.811491627601979e-05, "loss": 0.0976, "step": 25433 }, { "epoch": 0.4504243647810437, "grad_norm": 0.6818221807479858, "learning_rate": 1.8114074661527384e-05, "loss": 0.0772, "step": 25434 }, { "epoch": 0.45044207431807215, "grad_norm": 0.6732226610183716, "learning_rate": 1.8113233036789987e-05, "loss": 0.0993, "step": 25435 }, { "epoch": 0.45045978385510055, "grad_norm": 0.7467254400253296, "learning_rate": 1.8112391401810365e-05, "loss": 0.0787, "step": 25436 }, { "epoch": 0.450477493392129, "grad_norm": 0.6054794192314148, "learning_rate": 1.8111549756591294e-05, "loss": 0.0696, "step": 25437 }, { "epoch": 0.4504952029291574, "grad_norm": 0.48871874809265137, "learning_rate": 1.8110708101135535e-05, "loss": 0.1096, "step": 25438 }, { "epoch": 0.45051291246618586, "grad_norm": 0.8044130802154541, "learning_rate": 1.810986643544586e-05, "loss": 0.0677, "step": 25439 }, { "epoch": 0.45053062200321425, "grad_norm": 0.7078584432601929, "learning_rate": 1.8109024759525046e-05, "loss": 0.0781, "step": 25440 }, { "epoch": 0.4505483315402427, "grad_norm": 0.8125265836715698, "learning_rate": 1.810818307337585e-05, "loss": 0.0624, "step": 25441 }, { "epoch": 0.45056604107727116, "grad_norm": 0.7967872023582458, "learning_rate": 1.8107341377001047e-05, "loss": 0.0725, "step": 25442 }, { "epoch": 0.45058375061429956, "grad_norm": 1.0310769081115723, "learning_rate": 1.8106499670403407e-05, "loss": 0.0818, "step": 25443 }, { "epoch": 0.450601460151328, "grad_norm": 0.37172985076904297, "learning_rate": 1.8105657953585695e-05, "loss": 0.0809, "step": 25444 }, { "epoch": 0.4506191696883564, "grad_norm": 0.5084831714630127, "learning_rate": 1.810481622655069e-05, "loss": 0.0387, "step": 25445 }, { "epoch": 0.45063687922538487, "grad_norm": 0.4159599244594574, "learning_rate": 1.8103974489301142e-05, "loss": 0.0503, "step": 25446 }, { "epoch": 0.45065458876241327, "grad_norm": 0.9471002221107483, "learning_rate": 1.8103132741839834e-05, "loss": 0.0589, "step": 25447 }, { "epoch": 0.4506722982994417, "grad_norm": 0.6713544726371765, "learning_rate": 1.8102290984169533e-05, "loss": 0.0518, "step": 25448 }, { "epoch": 0.4506900078364701, "grad_norm": 0.5833098888397217, "learning_rate": 1.8101449216293014e-05, "loss": 0.0566, "step": 25449 }, { "epoch": 0.4507077173734986, "grad_norm": 0.6566795706748962, "learning_rate": 1.810060743821304e-05, "loss": 0.0997, "step": 25450 }, { "epoch": 0.45072542691052697, "grad_norm": 0.704314112663269, "learning_rate": 1.8099765649932382e-05, "loss": 0.0872, "step": 25451 }, { "epoch": 0.4507431364475554, "grad_norm": 0.7742372751235962, "learning_rate": 1.80989238514538e-05, "loss": 0.1086, "step": 25452 }, { "epoch": 0.4507608459845838, "grad_norm": 0.7239282727241516, "learning_rate": 1.809808204278008e-05, "loss": 0.0566, "step": 25453 }, { "epoch": 0.4507785555216123, "grad_norm": 0.7181404829025269, "learning_rate": 1.809724022391398e-05, "loss": 0.0481, "step": 25454 }, { "epoch": 0.4507962650586407, "grad_norm": 1.0282467603683472, "learning_rate": 1.8096398394858276e-05, "loss": 0.0862, "step": 25455 }, { "epoch": 0.45081397459566913, "grad_norm": 0.6721805930137634, "learning_rate": 1.8095556555615734e-05, "loss": 0.0776, "step": 25456 }, { "epoch": 0.4508316841326976, "grad_norm": 0.6149385571479797, "learning_rate": 1.8094714706189118e-05, "loss": 0.0549, "step": 25457 }, { "epoch": 0.450849393669726, "grad_norm": 0.46394315361976624, "learning_rate": 1.8093872846581207e-05, "loss": 0.0687, "step": 25458 }, { "epoch": 0.45086710320675444, "grad_norm": 0.5698086023330688, "learning_rate": 1.8093030976794773e-05, "loss": 0.0617, "step": 25459 }, { "epoch": 0.45088481274378284, "grad_norm": 0.6987398862838745, "learning_rate": 1.8092189096832572e-05, "loss": 0.0881, "step": 25460 }, { "epoch": 0.4509025222808113, "grad_norm": 0.5565692782402039, "learning_rate": 1.8091347206697387e-05, "loss": 0.0612, "step": 25461 }, { "epoch": 0.4509202318178397, "grad_norm": 0.5146426558494568, "learning_rate": 1.809050530639198e-05, "loss": 0.0654, "step": 25462 }, { "epoch": 0.45093794135486814, "grad_norm": 0.9555982351303101, "learning_rate": 1.8089663395919127e-05, "loss": 0.0825, "step": 25463 }, { "epoch": 0.45095565089189654, "grad_norm": 0.6121317148208618, "learning_rate": 1.8088821475281584e-05, "loss": 0.0743, "step": 25464 }, { "epoch": 0.450973360428925, "grad_norm": 0.8123134970664978, "learning_rate": 1.808797954448214e-05, "loss": 0.0716, "step": 25465 }, { "epoch": 0.4509910699659534, "grad_norm": 0.6684750318527222, "learning_rate": 1.808713760352355e-05, "loss": 0.078, "step": 25466 }, { "epoch": 0.45100877950298185, "grad_norm": 0.49863922595977783, "learning_rate": 1.808629565240859e-05, "loss": 0.0834, "step": 25467 }, { "epoch": 0.45102648904001025, "grad_norm": 0.5006290674209595, "learning_rate": 1.8085453691140036e-05, "loss": 0.065, "step": 25468 }, { "epoch": 0.4510441985770387, "grad_norm": 0.5774030685424805, "learning_rate": 1.808461171972065e-05, "loss": 0.0653, "step": 25469 }, { "epoch": 0.4510619081140671, "grad_norm": 1.1080830097198486, "learning_rate": 1.80837697381532e-05, "loss": 0.094, "step": 25470 }, { "epoch": 0.45107961765109555, "grad_norm": 0.6411064863204956, "learning_rate": 1.8082927746440456e-05, "loss": 0.0635, "step": 25471 }, { "epoch": 0.451097327188124, "grad_norm": 0.6444520354270935, "learning_rate": 1.8082085744585196e-05, "loss": 0.1041, "step": 25472 }, { "epoch": 0.4511150367251524, "grad_norm": 0.9390161037445068, "learning_rate": 1.8081243732590187e-05, "loss": 0.0888, "step": 25473 }, { "epoch": 0.45113274626218086, "grad_norm": 0.7188876271247864, "learning_rate": 1.8080401710458195e-05, "loss": 0.0826, "step": 25474 }, { "epoch": 0.45115045579920926, "grad_norm": 0.8503326773643494, "learning_rate": 1.8079559678191993e-05, "loss": 0.1041, "step": 25475 }, { "epoch": 0.4511681653362377, "grad_norm": 0.6833011507987976, "learning_rate": 1.807871763579435e-05, "loss": 0.0888, "step": 25476 }, { "epoch": 0.4511858748732661, "grad_norm": 0.9862270355224609, "learning_rate": 1.8077875583268043e-05, "loss": 0.0622, "step": 25477 }, { "epoch": 0.45120358441029457, "grad_norm": 0.48997658491134644, "learning_rate": 1.807703352061583e-05, "loss": 0.0534, "step": 25478 }, { "epoch": 0.45122129394732297, "grad_norm": 0.5505809187889099, "learning_rate": 1.8076191447840495e-05, "loss": 0.0971, "step": 25479 }, { "epoch": 0.4512390034843514, "grad_norm": 0.7369042038917542, "learning_rate": 1.8075349364944796e-05, "loss": 0.0561, "step": 25480 }, { "epoch": 0.4512567130213798, "grad_norm": 1.3776161670684814, "learning_rate": 1.807450727193151e-05, "loss": 0.1108, "step": 25481 }, { "epoch": 0.4512744225584083, "grad_norm": 1.361095905303955, "learning_rate": 1.8073665168803407e-05, "loss": 0.0604, "step": 25482 }, { "epoch": 0.45129213209543667, "grad_norm": 0.7053282856941223, "learning_rate": 1.8072823055563254e-05, "loss": 0.089, "step": 25483 }, { "epoch": 0.4513098416324651, "grad_norm": 0.45945894718170166, "learning_rate": 1.8071980932213827e-05, "loss": 0.0936, "step": 25484 }, { "epoch": 0.4513275511694935, "grad_norm": 0.7882913947105408, "learning_rate": 1.807113879875789e-05, "loss": 0.0911, "step": 25485 }, { "epoch": 0.451345260706522, "grad_norm": 0.4761616587638855, "learning_rate": 1.807029665519822e-05, "loss": 0.0856, "step": 25486 }, { "epoch": 0.45136297024355043, "grad_norm": 0.8745480179786682, "learning_rate": 1.806945450153758e-05, "loss": 0.066, "step": 25487 }, { "epoch": 0.45138067978057883, "grad_norm": 0.4742962121963501, "learning_rate": 1.8068612337778753e-05, "loss": 0.0804, "step": 25488 }, { "epoch": 0.4513983893176073, "grad_norm": 0.5776158571243286, "learning_rate": 1.80677701639245e-05, "loss": 0.0543, "step": 25489 }, { "epoch": 0.4514160988546357, "grad_norm": 0.5750523805618286, "learning_rate": 1.8066927979977586e-05, "loss": 0.0709, "step": 25490 }, { "epoch": 0.45143380839166414, "grad_norm": 1.0316585302352905, "learning_rate": 1.8066085785940796e-05, "loss": 0.101, "step": 25491 }, { "epoch": 0.45145151792869254, "grad_norm": 0.7851903438568115, "learning_rate": 1.8065243581816888e-05, "loss": 0.0951, "step": 25492 }, { "epoch": 0.451469227465721, "grad_norm": 0.7736641764640808, "learning_rate": 1.8064401367608645e-05, "loss": 0.0872, "step": 25493 }, { "epoch": 0.4514869370027494, "grad_norm": 0.9639396667480469, "learning_rate": 1.8063559143318827e-05, "loss": 0.0949, "step": 25494 }, { "epoch": 0.45150464653977784, "grad_norm": 0.5214598774909973, "learning_rate": 1.8062716908950215e-05, "loss": 0.0391, "step": 25495 }, { "epoch": 0.45152235607680624, "grad_norm": 0.6773228049278259, "learning_rate": 1.8061874664505566e-05, "loss": 0.0633, "step": 25496 }, { "epoch": 0.4515400656138347, "grad_norm": 0.5445188879966736, "learning_rate": 1.8061032409987666e-05, "loss": 0.0611, "step": 25497 }, { "epoch": 0.4515577751508631, "grad_norm": 0.8231793642044067, "learning_rate": 1.806019014539928e-05, "loss": 0.0681, "step": 25498 }, { "epoch": 0.45157548468789155, "grad_norm": 0.5426188707351685, "learning_rate": 1.8059347870743172e-05, "loss": 0.0727, "step": 25499 }, { "epoch": 0.45159319422492, "grad_norm": 0.542175829410553, "learning_rate": 1.8058505586022117e-05, "loss": 0.0517, "step": 25500 }, { "epoch": 0.4516109037619484, "grad_norm": 0.6489238142967224, "learning_rate": 1.8057663291238893e-05, "loss": 0.0952, "step": 25501 }, { "epoch": 0.45162861329897686, "grad_norm": 0.9041863679885864, "learning_rate": 1.805682098639627e-05, "loss": 0.0733, "step": 25502 }, { "epoch": 0.45164632283600525, "grad_norm": 0.769766628742218, "learning_rate": 1.8055978671497005e-05, "loss": 0.1217, "step": 25503 }, { "epoch": 0.4516640323730337, "grad_norm": 0.6401513814926147, "learning_rate": 1.8055136346543887e-05, "loss": 0.0648, "step": 25504 }, { "epoch": 0.4516817419100621, "grad_norm": 0.4498111605644226, "learning_rate": 1.8054294011539677e-05, "loss": 0.075, "step": 25505 }, { "epoch": 0.45169945144709056, "grad_norm": 0.6793095469474792, "learning_rate": 1.8053451666487148e-05, "loss": 0.0607, "step": 25506 }, { "epoch": 0.45171716098411896, "grad_norm": 0.6424110531806946, "learning_rate": 1.8052609311389073e-05, "loss": 0.0827, "step": 25507 }, { "epoch": 0.4517348705211474, "grad_norm": 0.6214944124221802, "learning_rate": 1.8051766946248223e-05, "loss": 0.0501, "step": 25508 }, { "epoch": 0.4517525800581758, "grad_norm": 0.501660168170929, "learning_rate": 1.8050924571067366e-05, "loss": 0.0907, "step": 25509 }, { "epoch": 0.45177028959520427, "grad_norm": 0.6564011573791504, "learning_rate": 1.8050082185849274e-05, "loss": 0.0889, "step": 25510 }, { "epoch": 0.45178799913223266, "grad_norm": 0.7485711574554443, "learning_rate": 1.8049239790596728e-05, "loss": 0.064, "step": 25511 }, { "epoch": 0.4518057086692611, "grad_norm": 0.6972712278366089, "learning_rate": 1.8048397385312482e-05, "loss": 0.0536, "step": 25512 }, { "epoch": 0.4518234182062895, "grad_norm": 0.7345569133758545, "learning_rate": 1.8047554969999323e-05, "loss": 0.1272, "step": 25513 }, { "epoch": 0.45184112774331797, "grad_norm": 0.534857451915741, "learning_rate": 1.8046712544660014e-05, "loss": 0.0589, "step": 25514 }, { "epoch": 0.4518588372803464, "grad_norm": 0.4808374345302582, "learning_rate": 1.804587010929733e-05, "loss": 0.0372, "step": 25515 }, { "epoch": 0.4518765468173748, "grad_norm": 0.7010623812675476, "learning_rate": 1.8045027663914044e-05, "loss": 0.0874, "step": 25516 }, { "epoch": 0.4518942563544033, "grad_norm": 0.5364171862602234, "learning_rate": 1.804418520851292e-05, "loss": 0.0607, "step": 25517 }, { "epoch": 0.4519119658914317, "grad_norm": 0.8724071979522705, "learning_rate": 1.8043342743096738e-05, "loss": 0.0849, "step": 25518 }, { "epoch": 0.45192967542846013, "grad_norm": 0.5974688529968262, "learning_rate": 1.8042500267668258e-05, "loss": 0.0635, "step": 25519 }, { "epoch": 0.45194738496548853, "grad_norm": 0.5434710383415222, "learning_rate": 1.804165778223027e-05, "loss": 0.0488, "step": 25520 }, { "epoch": 0.451965094502517, "grad_norm": 0.536454975605011, "learning_rate": 1.8040815286785535e-05, "loss": 0.0537, "step": 25521 }, { "epoch": 0.4519828040395454, "grad_norm": 0.6123595833778381, "learning_rate": 1.803997278133682e-05, "loss": 0.0745, "step": 25522 }, { "epoch": 0.45200051357657384, "grad_norm": 1.0897488594055176, "learning_rate": 1.8039130265886905e-05, "loss": 0.0564, "step": 25523 }, { "epoch": 0.45201822311360224, "grad_norm": 0.7748997211456299, "learning_rate": 1.8038287740438558e-05, "loss": 0.0862, "step": 25524 }, { "epoch": 0.4520359326506307, "grad_norm": 0.6253455877304077, "learning_rate": 1.8037445204994554e-05, "loss": 0.0688, "step": 25525 }, { "epoch": 0.4520536421876591, "grad_norm": 0.7754712104797363, "learning_rate": 1.8036602659557656e-05, "loss": 0.1052, "step": 25526 }, { "epoch": 0.45207135172468754, "grad_norm": 0.46539896726608276, "learning_rate": 1.803576010413065e-05, "loss": 0.0569, "step": 25527 }, { "epoch": 0.45208906126171594, "grad_norm": 0.3875292241573334, "learning_rate": 1.8034917538716297e-05, "loss": 0.0429, "step": 25528 }, { "epoch": 0.4521067707987444, "grad_norm": 0.6929813623428345, "learning_rate": 1.8034074963317374e-05, "loss": 0.0602, "step": 25529 }, { "epoch": 0.45212448033577285, "grad_norm": 0.576474130153656, "learning_rate": 1.8033232377936648e-05, "loss": 0.0707, "step": 25530 }, { "epoch": 0.45214218987280125, "grad_norm": 0.7061291933059692, "learning_rate": 1.8032389782576895e-05, "loss": 0.0968, "step": 25531 }, { "epoch": 0.4521598994098297, "grad_norm": 0.7928377389907837, "learning_rate": 1.803154717724089e-05, "loss": 0.0938, "step": 25532 }, { "epoch": 0.4521776089468581, "grad_norm": 0.5129731893539429, "learning_rate": 1.8030704561931392e-05, "loss": 0.0663, "step": 25533 }, { "epoch": 0.45219531848388655, "grad_norm": 0.597393274307251, "learning_rate": 1.8029861936651193e-05, "loss": 0.0664, "step": 25534 }, { "epoch": 0.45221302802091495, "grad_norm": 0.7851952314376831, "learning_rate": 1.802901930140305e-05, "loss": 0.1208, "step": 25535 }, { "epoch": 0.4522307375579434, "grad_norm": 0.6405734419822693, "learning_rate": 1.8028176656189742e-05, "loss": 0.0705, "step": 25536 }, { "epoch": 0.4522484470949718, "grad_norm": 0.6380513906478882, "learning_rate": 1.8027334001014036e-05, "loss": 0.0894, "step": 25537 }, { "epoch": 0.45226615663200026, "grad_norm": 0.5235670804977417, "learning_rate": 1.802649133587871e-05, "loss": 0.0662, "step": 25538 }, { "epoch": 0.45228386616902866, "grad_norm": 0.5522752404212952, "learning_rate": 1.8025648660786533e-05, "loss": 0.0616, "step": 25539 }, { "epoch": 0.4523015757060571, "grad_norm": 0.6966513991355896, "learning_rate": 1.802480597574028e-05, "loss": 0.1004, "step": 25540 }, { "epoch": 0.4523192852430855, "grad_norm": 0.4223296642303467, "learning_rate": 1.802396328074272e-05, "loss": 0.0575, "step": 25541 }, { "epoch": 0.45233699478011397, "grad_norm": 0.6676360964775085, "learning_rate": 1.802312057579662e-05, "loss": 0.0879, "step": 25542 }, { "epoch": 0.45235470431714236, "grad_norm": 0.5252627730369568, "learning_rate": 1.8022277860904764e-05, "loss": 0.0447, "step": 25543 }, { "epoch": 0.4523724138541708, "grad_norm": 0.6857199668884277, "learning_rate": 1.802143513606992e-05, "loss": 0.1065, "step": 25544 }, { "epoch": 0.4523901233911993, "grad_norm": 0.8787230253219604, "learning_rate": 1.8020592401294862e-05, "loss": 0.1007, "step": 25545 }, { "epoch": 0.45240783292822767, "grad_norm": 1.107110619544983, "learning_rate": 1.8019749656582363e-05, "loss": 0.1171, "step": 25546 }, { "epoch": 0.4524255424652561, "grad_norm": 0.5049080848693848, "learning_rate": 1.8018906901935188e-05, "loss": 0.0572, "step": 25547 }, { "epoch": 0.4524432520022845, "grad_norm": 0.6789930462837219, "learning_rate": 1.8018064137356114e-05, "loss": 0.0935, "step": 25548 }, { "epoch": 0.452460961539313, "grad_norm": 1.255265712738037, "learning_rate": 1.8017221362847913e-05, "loss": 0.0654, "step": 25549 }, { "epoch": 0.4524786710763414, "grad_norm": 0.7073007822036743, "learning_rate": 1.8016378578413367e-05, "loss": 0.0879, "step": 25550 }, { "epoch": 0.45249638061336983, "grad_norm": 0.7846774458885193, "learning_rate": 1.801553578405523e-05, "loss": 0.0706, "step": 25551 }, { "epoch": 0.45251409015039823, "grad_norm": 0.5765010118484497, "learning_rate": 1.8014692979776292e-05, "loss": 0.0568, "step": 25552 }, { "epoch": 0.4525317996874267, "grad_norm": 0.7653738260269165, "learning_rate": 1.801385016557932e-05, "loss": 0.0647, "step": 25553 }, { "epoch": 0.4525495092244551, "grad_norm": 0.9742051959037781, "learning_rate": 1.801300734146708e-05, "loss": 0.0681, "step": 25554 }, { "epoch": 0.45256721876148354, "grad_norm": 0.5419345498085022, "learning_rate": 1.8012164507442356e-05, "loss": 0.0727, "step": 25555 }, { "epoch": 0.45258492829851193, "grad_norm": 0.6356462240219116, "learning_rate": 1.801132166350791e-05, "loss": 0.0789, "step": 25556 }, { "epoch": 0.4526026378355404, "grad_norm": 0.7584607005119324, "learning_rate": 1.8010478809666525e-05, "loss": 0.056, "step": 25557 }, { "epoch": 0.4526203473725688, "grad_norm": 0.562775731086731, "learning_rate": 1.8009635945920964e-05, "loss": 0.079, "step": 25558 }, { "epoch": 0.45263805690959724, "grad_norm": 0.4741467833518982, "learning_rate": 1.800879307227401e-05, "loss": 0.049, "step": 25559 }, { "epoch": 0.4526557664466257, "grad_norm": 0.5786338448524475, "learning_rate": 1.8007950188728435e-05, "loss": 0.0498, "step": 25560 }, { "epoch": 0.4526734759836541, "grad_norm": 1.0763405561447144, "learning_rate": 1.8007107295287e-05, "loss": 0.0853, "step": 25561 }, { "epoch": 0.45269118552068255, "grad_norm": 0.47922930121421814, "learning_rate": 1.800626439195249e-05, "loss": 0.0748, "step": 25562 }, { "epoch": 0.45270889505771095, "grad_norm": 0.715269923210144, "learning_rate": 1.8005421478727673e-05, "loss": 0.0966, "step": 25563 }, { "epoch": 0.4527266045947394, "grad_norm": 0.5091957449913025, "learning_rate": 1.8004578555615324e-05, "loss": 0.0699, "step": 25564 }, { "epoch": 0.4527443141317678, "grad_norm": 0.9413061141967773, "learning_rate": 1.8003735622618216e-05, "loss": 0.0849, "step": 25565 }, { "epoch": 0.45276202366879625, "grad_norm": 0.6573640704154968, "learning_rate": 1.8002892679739117e-05, "loss": 0.0915, "step": 25566 }, { "epoch": 0.45277973320582465, "grad_norm": 0.9521732926368713, "learning_rate": 1.8002049726980808e-05, "loss": 0.0513, "step": 25567 }, { "epoch": 0.4527974427428531, "grad_norm": 0.5187568068504333, "learning_rate": 1.800120676434606e-05, "loss": 0.0556, "step": 25568 }, { "epoch": 0.4528151522798815, "grad_norm": 0.3380104601383209, "learning_rate": 1.800036379183765e-05, "loss": 0.0521, "step": 25569 }, { "epoch": 0.45283286181690996, "grad_norm": 0.7521780729293823, "learning_rate": 1.799952080945834e-05, "loss": 0.0722, "step": 25570 }, { "epoch": 0.45285057135393836, "grad_norm": 0.3426322937011719, "learning_rate": 1.7998677817210907e-05, "loss": 0.0785, "step": 25571 }, { "epoch": 0.4528682808909668, "grad_norm": 0.7138980627059937, "learning_rate": 1.7997834815098133e-05, "loss": 0.0679, "step": 25572 }, { "epoch": 0.4528859904279952, "grad_norm": 1.1110155582427979, "learning_rate": 1.7996991803122786e-05, "loss": 0.0985, "step": 25573 }, { "epoch": 0.45290369996502367, "grad_norm": 0.8960241079330444, "learning_rate": 1.7996148781287636e-05, "loss": 0.0908, "step": 25574 }, { "epoch": 0.4529214095020521, "grad_norm": 0.647311270236969, "learning_rate": 1.7995305749595465e-05, "loss": 0.0547, "step": 25575 }, { "epoch": 0.4529391190390805, "grad_norm": 0.4350755214691162, "learning_rate": 1.7994462708049035e-05, "loss": 0.0606, "step": 25576 }, { "epoch": 0.45295682857610897, "grad_norm": 0.9276055693626404, "learning_rate": 1.7993619656651127e-05, "loss": 0.0708, "step": 25577 }, { "epoch": 0.45297453811313737, "grad_norm": 0.4901144206523895, "learning_rate": 1.7992776595404517e-05, "loss": 0.0587, "step": 25578 }, { "epoch": 0.4529922476501658, "grad_norm": 0.5329846143722534, "learning_rate": 1.799193352431197e-05, "loss": 0.0783, "step": 25579 }, { "epoch": 0.4530099571871942, "grad_norm": 0.6198931932449341, "learning_rate": 1.7991090443376266e-05, "loss": 0.0721, "step": 25580 }, { "epoch": 0.4530276667242227, "grad_norm": 0.7490860223770142, "learning_rate": 1.7990247352600173e-05, "loss": 0.0748, "step": 25581 }, { "epoch": 0.4530453762612511, "grad_norm": 0.7001135945320129, "learning_rate": 1.7989404251986476e-05, "loss": 0.0761, "step": 25582 }, { "epoch": 0.45306308579827953, "grad_norm": 0.6124378442764282, "learning_rate": 1.7988561141537937e-05, "loss": 0.1136, "step": 25583 }, { "epoch": 0.45308079533530793, "grad_norm": 0.6844950318336487, "learning_rate": 1.7987718021257335e-05, "loss": 0.0653, "step": 25584 }, { "epoch": 0.4530985048723364, "grad_norm": 0.6955217123031616, "learning_rate": 1.7986874891147444e-05, "loss": 0.0923, "step": 25585 }, { "epoch": 0.4531162144093648, "grad_norm": 0.6659115552902222, "learning_rate": 1.7986031751211032e-05, "loss": 0.0656, "step": 25586 }, { "epoch": 0.45313392394639324, "grad_norm": 0.8801599740982056, "learning_rate": 1.7985188601450885e-05, "loss": 0.0983, "step": 25587 }, { "epoch": 0.45315163348342163, "grad_norm": 0.3591504991054535, "learning_rate": 1.7984345441869763e-05, "loss": 0.0849, "step": 25588 }, { "epoch": 0.4531693430204501, "grad_norm": 1.046940565109253, "learning_rate": 1.7983502272470452e-05, "loss": 0.1263, "step": 25589 }, { "epoch": 0.45318705255747854, "grad_norm": 0.6279469728469849, "learning_rate": 1.7982659093255713e-05, "loss": 0.0549, "step": 25590 }, { "epoch": 0.45320476209450694, "grad_norm": 0.4079444706439972, "learning_rate": 1.798181590422833e-05, "loss": 0.0794, "step": 25591 }, { "epoch": 0.4532224716315354, "grad_norm": 0.683923602104187, "learning_rate": 1.7980972705391078e-05, "loss": 0.0808, "step": 25592 }, { "epoch": 0.4532401811685638, "grad_norm": 0.9247773289680481, "learning_rate": 1.7980129496746728e-05, "loss": 0.1053, "step": 25593 }, { "epoch": 0.45325789070559225, "grad_norm": 0.7244678735733032, "learning_rate": 1.7979286278298053e-05, "loss": 0.0868, "step": 25594 }, { "epoch": 0.45327560024262065, "grad_norm": 0.5704255700111389, "learning_rate": 1.7978443050047817e-05, "loss": 0.0679, "step": 25595 }, { "epoch": 0.4532933097796491, "grad_norm": 0.8058216571807861, "learning_rate": 1.7977599811998814e-05, "loss": 0.0761, "step": 25596 }, { "epoch": 0.4533110193166775, "grad_norm": 0.8810042142868042, "learning_rate": 1.797675656415381e-05, "loss": 0.0773, "step": 25597 }, { "epoch": 0.45332872885370595, "grad_norm": 0.5879635214805603, "learning_rate": 1.7975913306515576e-05, "loss": 0.0768, "step": 25598 }, { "epoch": 0.45334643839073435, "grad_norm": 0.6415538191795349, "learning_rate": 1.7975070039086886e-05, "loss": 0.0571, "step": 25599 }, { "epoch": 0.4533641479277628, "grad_norm": 0.6251271963119507, "learning_rate": 1.7974226761870516e-05, "loss": 0.059, "step": 25600 }, { "epoch": 0.4533818574647912, "grad_norm": 0.532392144203186, "learning_rate": 1.7973383474869245e-05, "loss": 0.069, "step": 25601 }, { "epoch": 0.45339956700181966, "grad_norm": 0.5335893034934998, "learning_rate": 1.7972540178085842e-05, "loss": 0.1052, "step": 25602 }, { "epoch": 0.45341727653884806, "grad_norm": 0.7293741703033447, "learning_rate": 1.797169687152308e-05, "loss": 0.0725, "step": 25603 }, { "epoch": 0.4534349860758765, "grad_norm": 0.8639202117919922, "learning_rate": 1.7970853555183736e-05, "loss": 0.1151, "step": 25604 }, { "epoch": 0.45345269561290497, "grad_norm": 0.7166875600814819, "learning_rate": 1.7970010229070584e-05, "loss": 0.0968, "step": 25605 }, { "epoch": 0.45347040514993336, "grad_norm": 0.8051171898841858, "learning_rate": 1.79691668931864e-05, "loss": 0.075, "step": 25606 }, { "epoch": 0.4534881146869618, "grad_norm": 1.0511122941970825, "learning_rate": 1.796832354753396e-05, "loss": 0.1136, "step": 25607 }, { "epoch": 0.4535058242239902, "grad_norm": 0.6136294603347778, "learning_rate": 1.7967480192116036e-05, "loss": 0.0949, "step": 25608 }, { "epoch": 0.45352353376101867, "grad_norm": 1.00754976272583, "learning_rate": 1.79666368269354e-05, "loss": 0.0802, "step": 25609 }, { "epoch": 0.45354124329804707, "grad_norm": 0.8848549723625183, "learning_rate": 1.7965793451994823e-05, "loss": 0.1036, "step": 25610 }, { "epoch": 0.4535589528350755, "grad_norm": 0.7472349405288696, "learning_rate": 1.796495006729709e-05, "loss": 0.0824, "step": 25611 }, { "epoch": 0.4535766623721039, "grad_norm": 0.7351040840148926, "learning_rate": 1.7964106672844978e-05, "loss": 0.0705, "step": 25612 }, { "epoch": 0.4535943719091324, "grad_norm": 0.6952922940254211, "learning_rate": 1.7963263268641246e-05, "loss": 0.0761, "step": 25613 }, { "epoch": 0.4536120814461608, "grad_norm": 0.8746352195739746, "learning_rate": 1.796241985468868e-05, "loss": 0.0916, "step": 25614 }, { "epoch": 0.45362979098318923, "grad_norm": 0.7514142990112305, "learning_rate": 1.796157643099005e-05, "loss": 0.0858, "step": 25615 }, { "epoch": 0.45364750052021763, "grad_norm": 0.5476568937301636, "learning_rate": 1.796073299754814e-05, "loss": 0.0699, "step": 25616 }, { "epoch": 0.4536652100572461, "grad_norm": 0.997830331325531, "learning_rate": 1.7959889554365717e-05, "loss": 0.0544, "step": 25617 }, { "epoch": 0.4536829195942745, "grad_norm": 0.6403471231460571, "learning_rate": 1.795904610144555e-05, "loss": 0.0623, "step": 25618 }, { "epoch": 0.45370062913130293, "grad_norm": 0.6325637102127075, "learning_rate": 1.795820263879042e-05, "loss": 0.0708, "step": 25619 }, { "epoch": 0.4537183386683314, "grad_norm": 0.9527604579925537, "learning_rate": 1.7957359166403105e-05, "loss": 0.1253, "step": 25620 }, { "epoch": 0.4537360482053598, "grad_norm": 0.7328715324401855, "learning_rate": 1.7956515684286386e-05, "loss": 0.0992, "step": 25621 }, { "epoch": 0.45375375774238824, "grad_norm": 0.8866108655929565, "learning_rate": 1.7955672192443018e-05, "loss": 0.0717, "step": 25622 }, { "epoch": 0.45377146727941664, "grad_norm": 1.1201865673065186, "learning_rate": 1.7954828690875796e-05, "loss": 0.117, "step": 25623 }, { "epoch": 0.4537891768164451, "grad_norm": 0.6661069989204407, "learning_rate": 1.7953985179587482e-05, "loss": 0.0652, "step": 25624 }, { "epoch": 0.4538068863534735, "grad_norm": 0.8035378456115723, "learning_rate": 1.7953141658580854e-05, "loss": 0.0669, "step": 25625 }, { "epoch": 0.45382459589050195, "grad_norm": 0.6634271740913391, "learning_rate": 1.795229812785869e-05, "loss": 0.0678, "step": 25626 }, { "epoch": 0.45384230542753035, "grad_norm": 0.4197116792201996, "learning_rate": 1.7951454587423766e-05, "loss": 0.0466, "step": 25627 }, { "epoch": 0.4538600149645588, "grad_norm": 0.6965797543525696, "learning_rate": 1.795061103727885e-05, "loss": 0.073, "step": 25628 }, { "epoch": 0.4538777245015872, "grad_norm": 0.5745123624801636, "learning_rate": 1.7949767477426728e-05, "loss": 0.063, "step": 25629 }, { "epoch": 0.45389543403861565, "grad_norm": 0.5994323492050171, "learning_rate": 1.7948923907870167e-05, "loss": 0.0523, "step": 25630 }, { "epoch": 0.45391314357564405, "grad_norm": 0.646372377872467, "learning_rate": 1.7948080328611945e-05, "loss": 0.0651, "step": 25631 }, { "epoch": 0.4539308531126725, "grad_norm": 0.671218991279602, "learning_rate": 1.7947236739654836e-05, "loss": 0.066, "step": 25632 }, { "epoch": 0.4539485626497009, "grad_norm": 0.8837151527404785, "learning_rate": 1.7946393141001616e-05, "loss": 0.0927, "step": 25633 }, { "epoch": 0.45396627218672936, "grad_norm": 0.5665481686592102, "learning_rate": 1.794554953265506e-05, "loss": 0.0668, "step": 25634 }, { "epoch": 0.4539839817237578, "grad_norm": 0.5364488959312439, "learning_rate": 1.7944705914617945e-05, "loss": 0.0526, "step": 25635 }, { "epoch": 0.4540016912607862, "grad_norm": 0.4988410770893097, "learning_rate": 1.7943862286893043e-05, "loss": 0.0652, "step": 25636 }, { "epoch": 0.45401940079781467, "grad_norm": 0.6820729970932007, "learning_rate": 1.794301864948314e-05, "loss": 0.0892, "step": 25637 }, { "epoch": 0.45403711033484306, "grad_norm": 0.5391271114349365, "learning_rate": 1.7942175002390995e-05, "loss": 0.0737, "step": 25638 }, { "epoch": 0.4540548198718715, "grad_norm": 0.7209503054618835, "learning_rate": 1.7941331345619396e-05, "loss": 0.0637, "step": 25639 }, { "epoch": 0.4540725294088999, "grad_norm": 1.3017710447311401, "learning_rate": 1.7940487679171112e-05, "loss": 0.0824, "step": 25640 }, { "epoch": 0.45409023894592837, "grad_norm": 0.817654013633728, "learning_rate": 1.7939644003048923e-05, "loss": 0.0497, "step": 25641 }, { "epoch": 0.45410794848295677, "grad_norm": 0.5873557925224304, "learning_rate": 1.7938800317255603e-05, "loss": 0.0787, "step": 25642 }, { "epoch": 0.4541256580199852, "grad_norm": 0.9995366334915161, "learning_rate": 1.793795662179392e-05, "loss": 0.0841, "step": 25643 }, { "epoch": 0.4541433675570136, "grad_norm": 0.5435360074043274, "learning_rate": 1.7937112916666663e-05, "loss": 0.0699, "step": 25644 }, { "epoch": 0.4541610770940421, "grad_norm": 1.4466747045516968, "learning_rate": 1.79362692018766e-05, "loss": 0.0829, "step": 25645 }, { "epoch": 0.4541787866310705, "grad_norm": 0.8308596014976501, "learning_rate": 1.7935425477426515e-05, "loss": 0.0763, "step": 25646 }, { "epoch": 0.45419649616809893, "grad_norm": 1.4922304153442383, "learning_rate": 1.7934581743319168e-05, "loss": 0.0757, "step": 25647 }, { "epoch": 0.4542142057051273, "grad_norm": 0.672001838684082, "learning_rate": 1.7933737999557345e-05, "loss": 0.0686, "step": 25648 }, { "epoch": 0.4542319152421558, "grad_norm": 0.5234041810035706, "learning_rate": 1.7932894246143827e-05, "loss": 0.0853, "step": 25649 }, { "epoch": 0.45424962477918424, "grad_norm": 0.7138240337371826, "learning_rate": 1.793205048308138e-05, "loss": 0.0725, "step": 25650 }, { "epoch": 0.45426733431621263, "grad_norm": 0.7652578353881836, "learning_rate": 1.7931206710372782e-05, "loss": 0.0772, "step": 25651 }, { "epoch": 0.4542850438532411, "grad_norm": 0.6872710585594177, "learning_rate": 1.793036292802081e-05, "loss": 0.1024, "step": 25652 }, { "epoch": 0.4543027533902695, "grad_norm": 0.8117712736129761, "learning_rate": 1.7929519136028243e-05, "loss": 0.0656, "step": 25653 }, { "epoch": 0.45432046292729794, "grad_norm": 0.5504493713378906, "learning_rate": 1.792867533439785e-05, "loss": 0.0797, "step": 25654 }, { "epoch": 0.45433817246432634, "grad_norm": 0.8012197613716125, "learning_rate": 1.7927831523132416e-05, "loss": 0.07, "step": 25655 }, { "epoch": 0.4543558820013548, "grad_norm": 0.5574668049812317, "learning_rate": 1.7926987702234712e-05, "loss": 0.0695, "step": 25656 }, { "epoch": 0.4543735915383832, "grad_norm": 0.6332299113273621, "learning_rate": 1.7926143871707514e-05, "loss": 0.0705, "step": 25657 }, { "epoch": 0.45439130107541165, "grad_norm": 0.8410632610321045, "learning_rate": 1.7925300031553596e-05, "loss": 0.0763, "step": 25658 }, { "epoch": 0.45440901061244005, "grad_norm": 0.9337643384933472, "learning_rate": 1.7924456181775738e-05, "loss": 0.0741, "step": 25659 }, { "epoch": 0.4544267201494685, "grad_norm": 0.6795789003372192, "learning_rate": 1.7923612322376716e-05, "loss": 0.0544, "step": 25660 }, { "epoch": 0.4544444296864969, "grad_norm": 0.6842604279518127, "learning_rate": 1.7922768453359304e-05, "loss": 0.0443, "step": 25661 }, { "epoch": 0.45446213922352535, "grad_norm": 0.2435477077960968, "learning_rate": 1.7921924574726282e-05, "loss": 0.046, "step": 25662 }, { "epoch": 0.45447984876055375, "grad_norm": 0.9843154549598694, "learning_rate": 1.7921080686480423e-05, "loss": 0.0672, "step": 25663 }, { "epoch": 0.4544975582975822, "grad_norm": 0.517738401889801, "learning_rate": 1.79202367886245e-05, "loss": 0.0807, "step": 25664 }, { "epoch": 0.45451526783461066, "grad_norm": 0.49325990676879883, "learning_rate": 1.79193928811613e-05, "loss": 0.1058, "step": 25665 }, { "epoch": 0.45453297737163906, "grad_norm": 0.6836180686950684, "learning_rate": 1.7918548964093588e-05, "loss": 0.0772, "step": 25666 }, { "epoch": 0.4545506869086675, "grad_norm": 0.8447715044021606, "learning_rate": 1.7917705037424145e-05, "loss": 0.0903, "step": 25667 }, { "epoch": 0.4545683964456959, "grad_norm": 0.6041799187660217, "learning_rate": 1.7916861101155745e-05, "loss": 0.0666, "step": 25668 }, { "epoch": 0.45458610598272436, "grad_norm": 0.6311625242233276, "learning_rate": 1.7916017155291174e-05, "loss": 0.0917, "step": 25669 }, { "epoch": 0.45460381551975276, "grad_norm": 0.5501230359077454, "learning_rate": 1.79151731998332e-05, "loss": 0.0966, "step": 25670 }, { "epoch": 0.4546215250567812, "grad_norm": 0.8019275665283203, "learning_rate": 1.79143292347846e-05, "loss": 0.089, "step": 25671 }, { "epoch": 0.4546392345938096, "grad_norm": 0.6311920285224915, "learning_rate": 1.791348526014815e-05, "loss": 0.1012, "step": 25672 }, { "epoch": 0.45465694413083807, "grad_norm": 0.5899824500083923, "learning_rate": 1.7912641275926628e-05, "loss": 0.0975, "step": 25673 }, { "epoch": 0.45467465366786647, "grad_norm": 0.7351729869842529, "learning_rate": 1.791179728212281e-05, "loss": 0.0727, "step": 25674 }, { "epoch": 0.4546923632048949, "grad_norm": 0.5356541872024536, "learning_rate": 1.7910953278739474e-05, "loss": 0.0925, "step": 25675 }, { "epoch": 0.4547100727419233, "grad_norm": 0.38956260681152344, "learning_rate": 1.7910109265779397e-05, "loss": 0.0257, "step": 25676 }, { "epoch": 0.4547277822789518, "grad_norm": 1.020986557006836, "learning_rate": 1.7909265243245354e-05, "loss": 0.1129, "step": 25677 }, { "epoch": 0.4547454918159802, "grad_norm": 0.6425068378448486, "learning_rate": 1.7908421211140127e-05, "loss": 0.0942, "step": 25678 }, { "epoch": 0.45476320135300863, "grad_norm": 0.6883454322814941, "learning_rate": 1.7907577169466486e-05, "loss": 0.0628, "step": 25679 }, { "epoch": 0.4547809108900371, "grad_norm": 0.5319927334785461, "learning_rate": 1.7906733118227208e-05, "loss": 0.0703, "step": 25680 }, { "epoch": 0.4547986204270655, "grad_norm": 1.181584358215332, "learning_rate": 1.7905889057425074e-05, "loss": 0.0941, "step": 25681 }, { "epoch": 0.45481632996409393, "grad_norm": 0.9273459911346436, "learning_rate": 1.7905044987062854e-05, "loss": 0.0635, "step": 25682 }, { "epoch": 0.45483403950112233, "grad_norm": 0.45556098222732544, "learning_rate": 1.790420090714334e-05, "loss": 0.0693, "step": 25683 }, { "epoch": 0.4548517490381508, "grad_norm": 0.591033935546875, "learning_rate": 1.790335681766929e-05, "loss": 0.0752, "step": 25684 }, { "epoch": 0.4548694585751792, "grad_norm": 0.8084607124328613, "learning_rate": 1.7902512718643494e-05, "loss": 0.0808, "step": 25685 }, { "epoch": 0.45488716811220764, "grad_norm": 0.7962768077850342, "learning_rate": 1.7901668610068722e-05, "loss": 0.0803, "step": 25686 }, { "epoch": 0.45490487764923604, "grad_norm": 0.6214559674263, "learning_rate": 1.7900824491947756e-05, "loss": 0.0561, "step": 25687 }, { "epoch": 0.4549225871862645, "grad_norm": 0.7323096990585327, "learning_rate": 1.789998036428337e-05, "loss": 0.0854, "step": 25688 }, { "epoch": 0.4549402967232929, "grad_norm": 0.49959149956703186, "learning_rate": 1.7899136227078343e-05, "loss": 0.0906, "step": 25689 }, { "epoch": 0.45495800626032135, "grad_norm": 0.8308025598526001, "learning_rate": 1.789829208033545e-05, "loss": 0.0612, "step": 25690 }, { "epoch": 0.45497571579734974, "grad_norm": 0.4993303418159485, "learning_rate": 1.7897447924057466e-05, "loss": 0.0829, "step": 25691 }, { "epoch": 0.4549934253343782, "grad_norm": 0.7359415888786316, "learning_rate": 1.7896603758247176e-05, "loss": 0.0831, "step": 25692 }, { "epoch": 0.4550111348714066, "grad_norm": 0.7666200399398804, "learning_rate": 1.789575958290735e-05, "loss": 0.0849, "step": 25693 }, { "epoch": 0.45502884440843505, "grad_norm": 0.7672370672225952, "learning_rate": 1.789491539804077e-05, "loss": 0.0727, "step": 25694 }, { "epoch": 0.4550465539454635, "grad_norm": 0.5609625577926636, "learning_rate": 1.789407120365021e-05, "loss": 0.0882, "step": 25695 }, { "epoch": 0.4550642634824919, "grad_norm": 0.30009517073631287, "learning_rate": 1.7893226999738447e-05, "loss": 0.0718, "step": 25696 }, { "epoch": 0.45508197301952036, "grad_norm": 0.9188783168792725, "learning_rate": 1.7892382786308263e-05, "loss": 0.0954, "step": 25697 }, { "epoch": 0.45509968255654876, "grad_norm": 0.9053890109062195, "learning_rate": 1.789153856336243e-05, "loss": 0.096, "step": 25698 }, { "epoch": 0.4551173920935772, "grad_norm": 0.8483542203903198, "learning_rate": 1.789069433090373e-05, "loss": 0.1, "step": 25699 }, { "epoch": 0.4551351016306056, "grad_norm": 0.7045567631721497, "learning_rate": 1.7889850088934935e-05, "loss": 0.058, "step": 25700 }, { "epoch": 0.45515281116763406, "grad_norm": 0.7631939053535461, "learning_rate": 1.7889005837458823e-05, "loss": 0.0771, "step": 25701 }, { "epoch": 0.45517052070466246, "grad_norm": 0.7628574371337891, "learning_rate": 1.788816157647818e-05, "loss": 0.0855, "step": 25702 }, { "epoch": 0.4551882302416909, "grad_norm": 0.5324682593345642, "learning_rate": 1.788731730599578e-05, "loss": 0.0847, "step": 25703 }, { "epoch": 0.4552059397787193, "grad_norm": 0.7967996597290039, "learning_rate": 1.788647302601439e-05, "loss": 0.0865, "step": 25704 }, { "epoch": 0.45522364931574777, "grad_norm": 0.7494239211082458, "learning_rate": 1.7885628736536798e-05, "loss": 0.0669, "step": 25705 }, { "epoch": 0.45524135885277617, "grad_norm": 0.7986389994621277, "learning_rate": 1.788478443756578e-05, "loss": 0.0833, "step": 25706 }, { "epoch": 0.4552590683898046, "grad_norm": 0.5144603252410889, "learning_rate": 1.788394012910411e-05, "loss": 0.0951, "step": 25707 }, { "epoch": 0.455276777926833, "grad_norm": 0.6205397844314575, "learning_rate": 1.7883095811154578e-05, "loss": 0.0729, "step": 25708 }, { "epoch": 0.4552944874638615, "grad_norm": 0.7658795118331909, "learning_rate": 1.7882251483719946e-05, "loss": 0.0625, "step": 25709 }, { "epoch": 0.45531219700088993, "grad_norm": 0.9363434910774231, "learning_rate": 1.7881407146802994e-05, "loss": 0.1052, "step": 25710 }, { "epoch": 0.4553299065379183, "grad_norm": 0.741661787033081, "learning_rate": 1.788056280040651e-05, "loss": 0.083, "step": 25711 }, { "epoch": 0.4553476160749468, "grad_norm": 0.564089298248291, "learning_rate": 1.7879718444533265e-05, "loss": 0.1234, "step": 25712 }, { "epoch": 0.4553653256119752, "grad_norm": 0.7078623175621033, "learning_rate": 1.787887407918604e-05, "loss": 0.0803, "step": 25713 }, { "epoch": 0.45538303514900363, "grad_norm": 0.8808640837669373, "learning_rate": 1.78780297043676e-05, "loss": 0.0743, "step": 25714 }, { "epoch": 0.45540074468603203, "grad_norm": 0.8879724740982056, "learning_rate": 1.7877185320080745e-05, "loss": 0.0936, "step": 25715 }, { "epoch": 0.4554184542230605, "grad_norm": 0.7942731976509094, "learning_rate": 1.7876340926328234e-05, "loss": 0.0729, "step": 25716 }, { "epoch": 0.4554361637600889, "grad_norm": 0.6747115254402161, "learning_rate": 1.7875496523112855e-05, "loss": 0.0739, "step": 25717 }, { "epoch": 0.45545387329711734, "grad_norm": 1.106185793876648, "learning_rate": 1.7874652110437384e-05, "loss": 0.0937, "step": 25718 }, { "epoch": 0.45547158283414574, "grad_norm": 0.550780713558197, "learning_rate": 1.7873807688304595e-05, "loss": 0.0868, "step": 25719 }, { "epoch": 0.4554892923711742, "grad_norm": 0.8845083117485046, "learning_rate": 1.7872963256717274e-05, "loss": 0.0659, "step": 25720 }, { "epoch": 0.4555070019082026, "grad_norm": 1.3221136331558228, "learning_rate": 1.7872118815678194e-05, "loss": 0.0579, "step": 25721 }, { "epoch": 0.45552471144523105, "grad_norm": 0.6145464181900024, "learning_rate": 1.7871274365190136e-05, "loss": 0.0843, "step": 25722 }, { "epoch": 0.45554242098225944, "grad_norm": 0.7950524687767029, "learning_rate": 1.7870429905255868e-05, "loss": 0.0821, "step": 25723 }, { "epoch": 0.4555601305192879, "grad_norm": 0.5199078321456909, "learning_rate": 1.786958543587818e-05, "loss": 0.0785, "step": 25724 }, { "epoch": 0.45557784005631635, "grad_norm": 0.49166056513786316, "learning_rate": 1.7868740957059844e-05, "loss": 0.0668, "step": 25725 }, { "epoch": 0.45559554959334475, "grad_norm": 0.5444347858428955, "learning_rate": 1.7867896468803644e-05, "loss": 0.0738, "step": 25726 }, { "epoch": 0.4556132591303732, "grad_norm": 0.8781057000160217, "learning_rate": 1.7867051971112354e-05, "loss": 0.116, "step": 25727 }, { "epoch": 0.4556309686674016, "grad_norm": 1.2580251693725586, "learning_rate": 1.7866207463988752e-05, "loss": 0.1, "step": 25728 }, { "epoch": 0.45564867820443006, "grad_norm": 1.0014468431472778, "learning_rate": 1.7865362947435618e-05, "loss": 0.103, "step": 25729 }, { "epoch": 0.45566638774145846, "grad_norm": 0.8890479207038879, "learning_rate": 1.7864518421455725e-05, "loss": 0.0948, "step": 25730 }, { "epoch": 0.4556840972784869, "grad_norm": 0.7045716643333435, "learning_rate": 1.786367388605186e-05, "loss": 0.0768, "step": 25731 }, { "epoch": 0.4557018068155153, "grad_norm": 0.7019326090812683, "learning_rate": 1.78628293412268e-05, "loss": 0.0712, "step": 25732 }, { "epoch": 0.45571951635254376, "grad_norm": 0.4518376588821411, "learning_rate": 1.786198478698332e-05, "loss": 0.0797, "step": 25733 }, { "epoch": 0.45573722588957216, "grad_norm": 0.8698550462722778, "learning_rate": 1.7861140223324198e-05, "loss": 0.0798, "step": 25734 }, { "epoch": 0.4557549354266006, "grad_norm": 0.7791723608970642, "learning_rate": 1.7860295650252213e-05, "loss": 0.0937, "step": 25735 }, { "epoch": 0.455772644963629, "grad_norm": 0.6449646949768066, "learning_rate": 1.7859451067770147e-05, "loss": 0.0734, "step": 25736 }, { "epoch": 0.45579035450065747, "grad_norm": 0.4526538848876953, "learning_rate": 1.7858606475880774e-05, "loss": 0.0594, "step": 25737 }, { "epoch": 0.45580806403768587, "grad_norm": 1.3143423795700073, "learning_rate": 1.785776187458688e-05, "loss": 0.0741, "step": 25738 }, { "epoch": 0.4558257735747143, "grad_norm": 0.6317011117935181, "learning_rate": 1.785691726389123e-05, "loss": 0.0897, "step": 25739 }, { "epoch": 0.4558434831117428, "grad_norm": 0.6251835227012634, "learning_rate": 1.7856072643796618e-05, "loss": 0.0835, "step": 25740 }, { "epoch": 0.4558611926487712, "grad_norm": 0.6267066597938538, "learning_rate": 1.7855228014305812e-05, "loss": 0.0818, "step": 25741 }, { "epoch": 0.45587890218579963, "grad_norm": 0.46638667583465576, "learning_rate": 1.7854383375421597e-05, "loss": 0.0581, "step": 25742 }, { "epoch": 0.455896611722828, "grad_norm": 0.6590883731842041, "learning_rate": 1.7853538727146748e-05, "loss": 0.0986, "step": 25743 }, { "epoch": 0.4559143212598565, "grad_norm": 0.73089200258255, "learning_rate": 1.7852694069484047e-05, "loss": 0.0746, "step": 25744 }, { "epoch": 0.4559320307968849, "grad_norm": 1.0351468324661255, "learning_rate": 1.785184940243627e-05, "loss": 0.0864, "step": 25745 }, { "epoch": 0.45594974033391333, "grad_norm": 0.7506029009819031, "learning_rate": 1.7851004726006196e-05, "loss": 0.0953, "step": 25746 }, { "epoch": 0.45596744987094173, "grad_norm": 0.6962270736694336, "learning_rate": 1.7850160040196607e-05, "loss": 0.0639, "step": 25747 }, { "epoch": 0.4559851594079702, "grad_norm": 0.5867238640785217, "learning_rate": 1.7849315345010275e-05, "loss": 0.1052, "step": 25748 }, { "epoch": 0.4560028689449986, "grad_norm": 0.7724328637123108, "learning_rate": 1.784847064044999e-05, "loss": 0.0518, "step": 25749 }, { "epoch": 0.45602057848202704, "grad_norm": 1.1791059970855713, "learning_rate": 1.7847625926518522e-05, "loss": 0.1052, "step": 25750 }, { "epoch": 0.45603828801905544, "grad_norm": 0.5464195013046265, "learning_rate": 1.7846781203218652e-05, "loss": 0.0886, "step": 25751 }, { "epoch": 0.4560559975560839, "grad_norm": 0.6623222827911377, "learning_rate": 1.7845936470553158e-05, "loss": 0.1341, "step": 25752 }, { "epoch": 0.45607370709311235, "grad_norm": 0.5344114303588867, "learning_rate": 1.7845091728524823e-05, "loss": 0.058, "step": 25753 }, { "epoch": 0.45609141663014074, "grad_norm": 0.38914766907691956, "learning_rate": 1.7844246977136425e-05, "loss": 0.053, "step": 25754 }, { "epoch": 0.4561091261671692, "grad_norm": 0.6452866792678833, "learning_rate": 1.784340221639074e-05, "loss": 0.049, "step": 25755 }, { "epoch": 0.4561268357041976, "grad_norm": 0.8409972786903381, "learning_rate": 1.784255744629055e-05, "loss": 0.0544, "step": 25756 }, { "epoch": 0.45614454524122605, "grad_norm": 0.3612308204174042, "learning_rate": 1.7841712666838636e-05, "loss": 0.0626, "step": 25757 }, { "epoch": 0.45616225477825445, "grad_norm": 0.5410093069076538, "learning_rate": 1.784086787803777e-05, "loss": 0.0664, "step": 25758 }, { "epoch": 0.4561799643152829, "grad_norm": 0.7342100739479065, "learning_rate": 1.7840023079890743e-05, "loss": 0.0931, "step": 25759 }, { "epoch": 0.4561976738523113, "grad_norm": 0.5585290789604187, "learning_rate": 1.7839178272400324e-05, "loss": 0.0943, "step": 25760 }, { "epoch": 0.45621538338933976, "grad_norm": 0.8622803688049316, "learning_rate": 1.7838333455569293e-05, "loss": 0.101, "step": 25761 }, { "epoch": 0.45623309292636816, "grad_norm": 0.761606752872467, "learning_rate": 1.7837488629400432e-05, "loss": 0.0846, "step": 25762 }, { "epoch": 0.4562508024633966, "grad_norm": 0.5945202708244324, "learning_rate": 1.783664379389652e-05, "loss": 0.0685, "step": 25763 }, { "epoch": 0.456268512000425, "grad_norm": 0.59881192445755, "learning_rate": 1.783579894906034e-05, "loss": 0.0641, "step": 25764 }, { "epoch": 0.45628622153745346, "grad_norm": 0.40216633677482605, "learning_rate": 1.7834954094894672e-05, "loss": 0.0778, "step": 25765 }, { "epoch": 0.45630393107448186, "grad_norm": 0.6659069657325745, "learning_rate": 1.7834109231402283e-05, "loss": 0.1197, "step": 25766 }, { "epoch": 0.4563216406115103, "grad_norm": 0.6420918703079224, "learning_rate": 1.7833264358585963e-05, "loss": 0.0813, "step": 25767 }, { "epoch": 0.45633935014853877, "grad_norm": 0.4917606711387634, "learning_rate": 1.7832419476448495e-05, "loss": 0.0507, "step": 25768 }, { "epoch": 0.45635705968556717, "grad_norm": 0.7744791507720947, "learning_rate": 1.783157458499265e-05, "loss": 0.0551, "step": 25769 }, { "epoch": 0.4563747692225956, "grad_norm": 0.5532875061035156, "learning_rate": 1.7830729684221215e-05, "loss": 0.0591, "step": 25770 }, { "epoch": 0.456392478759624, "grad_norm": 0.5999799966812134, "learning_rate": 1.782988477413696e-05, "loss": 0.0817, "step": 25771 }, { "epoch": 0.4564101882966525, "grad_norm": 0.9359021186828613, "learning_rate": 1.782903985474267e-05, "loss": 0.0741, "step": 25772 }, { "epoch": 0.4564278978336809, "grad_norm": 0.5350659489631653, "learning_rate": 1.7828194926041128e-05, "loss": 0.0806, "step": 25773 }, { "epoch": 0.4564456073707093, "grad_norm": 0.8616090416908264, "learning_rate": 1.7827349988035112e-05, "loss": 0.1098, "step": 25774 }, { "epoch": 0.4564633169077377, "grad_norm": 0.6943286061286926, "learning_rate": 1.78265050407274e-05, "loss": 0.0615, "step": 25775 }, { "epoch": 0.4564810264447662, "grad_norm": 0.7079847455024719, "learning_rate": 1.782566008412077e-05, "loss": 0.0631, "step": 25776 }, { "epoch": 0.4564987359817946, "grad_norm": 0.4595019519329071, "learning_rate": 1.7824815118218007e-05, "loss": 0.0455, "step": 25777 }, { "epoch": 0.45651644551882303, "grad_norm": 0.6671257019042969, "learning_rate": 1.782397014302188e-05, "loss": 0.0809, "step": 25778 }, { "epoch": 0.45653415505585143, "grad_norm": 0.4736787974834442, "learning_rate": 1.782312515853519e-05, "loss": 0.0618, "step": 25779 }, { "epoch": 0.4565518645928799, "grad_norm": 0.6803755760192871, "learning_rate": 1.7822280164760694e-05, "loss": 0.0418, "step": 25780 }, { "epoch": 0.4565695741299083, "grad_norm": 0.5770308971405029, "learning_rate": 1.7821435161701182e-05, "loss": 0.0907, "step": 25781 }, { "epoch": 0.45658728366693674, "grad_norm": 0.5292230248451233, "learning_rate": 1.7820590149359437e-05, "loss": 0.082, "step": 25782 }, { "epoch": 0.4566049932039652, "grad_norm": 0.7189542055130005, "learning_rate": 1.7819745127738235e-05, "loss": 0.0907, "step": 25783 }, { "epoch": 0.4566227027409936, "grad_norm": 0.4199639558792114, "learning_rate": 1.781890009684036e-05, "loss": 0.0475, "step": 25784 }, { "epoch": 0.45664041227802205, "grad_norm": 0.8850342035293579, "learning_rate": 1.7818055056668583e-05, "loss": 0.1128, "step": 25785 }, { "epoch": 0.45665812181505044, "grad_norm": 0.7324659824371338, "learning_rate": 1.7817210007225693e-05, "loss": 0.0809, "step": 25786 }, { "epoch": 0.4566758313520789, "grad_norm": 0.8144761919975281, "learning_rate": 1.7816364948514466e-05, "loss": 0.108, "step": 25787 }, { "epoch": 0.4566935408891073, "grad_norm": 1.0996510982513428, "learning_rate": 1.7815519880537682e-05, "loss": 0.116, "step": 25788 }, { "epoch": 0.45671125042613575, "grad_norm": 0.7656295299530029, "learning_rate": 1.7814674803298122e-05, "loss": 0.0651, "step": 25789 }, { "epoch": 0.45672895996316415, "grad_norm": 0.601476788520813, "learning_rate": 1.781382971679857e-05, "loss": 0.0792, "step": 25790 }, { "epoch": 0.4567466695001926, "grad_norm": 0.5101580023765564, "learning_rate": 1.7812984621041802e-05, "loss": 0.0735, "step": 25791 }, { "epoch": 0.456764379037221, "grad_norm": 1.0055650472640991, "learning_rate": 1.78121395160306e-05, "loss": 0.106, "step": 25792 }, { "epoch": 0.45678208857424946, "grad_norm": 0.9765823483467102, "learning_rate": 1.781129440176774e-05, "loss": 0.0858, "step": 25793 }, { "epoch": 0.45679979811127785, "grad_norm": 0.6546497344970703, "learning_rate": 1.781044927825601e-05, "loss": 0.0884, "step": 25794 }, { "epoch": 0.4568175076483063, "grad_norm": 0.6531233787536621, "learning_rate": 1.7809604145498184e-05, "loss": 0.0592, "step": 25795 }, { "epoch": 0.4568352171853347, "grad_norm": 0.7520462870597839, "learning_rate": 1.780875900349704e-05, "loss": 0.0699, "step": 25796 }, { "epoch": 0.45685292672236316, "grad_norm": 0.6056843996047974, "learning_rate": 1.780791385225537e-05, "loss": 0.0717, "step": 25797 }, { "epoch": 0.4568706362593916, "grad_norm": 0.5987430214881897, "learning_rate": 1.7807068691775945e-05, "loss": 0.0717, "step": 25798 }, { "epoch": 0.45688834579642, "grad_norm": 0.7535312175750732, "learning_rate": 1.780622352206155e-05, "loss": 0.0532, "step": 25799 }, { "epoch": 0.45690605533344847, "grad_norm": 1.0341590642929077, "learning_rate": 1.7805378343114955e-05, "loss": 0.1239, "step": 25800 }, { "epoch": 0.45692376487047687, "grad_norm": 0.647649884223938, "learning_rate": 1.7804533154938954e-05, "loss": 0.0785, "step": 25801 }, { "epoch": 0.4569414744075053, "grad_norm": 0.7136515378952026, "learning_rate": 1.7803687957536325e-05, "loss": 0.048, "step": 25802 }, { "epoch": 0.4569591839445337, "grad_norm": 1.6305875778198242, "learning_rate": 1.7802842750909846e-05, "loss": 0.0566, "step": 25803 }, { "epoch": 0.4569768934815622, "grad_norm": 0.9891130924224854, "learning_rate": 1.78019975350623e-05, "loss": 0.1017, "step": 25804 }, { "epoch": 0.4569946030185906, "grad_norm": 0.6782421469688416, "learning_rate": 1.780115230999646e-05, "loss": 0.0925, "step": 25805 }, { "epoch": 0.457012312555619, "grad_norm": 0.5237930417060852, "learning_rate": 1.7800307075715115e-05, "loss": 0.0901, "step": 25806 }, { "epoch": 0.4570300220926474, "grad_norm": 0.4993993043899536, "learning_rate": 1.7799461832221043e-05, "loss": 0.0571, "step": 25807 }, { "epoch": 0.4570477316296759, "grad_norm": 0.8549890518188477, "learning_rate": 1.779861657951702e-05, "loss": 0.072, "step": 25808 }, { "epoch": 0.4570654411667043, "grad_norm": 0.5460250973701477, "learning_rate": 1.779777131760584e-05, "loss": 0.0593, "step": 25809 }, { "epoch": 0.45708315070373273, "grad_norm": 0.4913947284221649, "learning_rate": 1.7796926046490267e-05, "loss": 0.0709, "step": 25810 }, { "epoch": 0.45710086024076113, "grad_norm": 0.8401378989219666, "learning_rate": 1.7796080766173094e-05, "loss": 0.0791, "step": 25811 }, { "epoch": 0.4571185697777896, "grad_norm": 0.4862920045852661, "learning_rate": 1.77952354766571e-05, "loss": 0.0593, "step": 25812 }, { "epoch": 0.45713627931481804, "grad_norm": 0.731866717338562, "learning_rate": 1.7794390177945064e-05, "loss": 0.0736, "step": 25813 }, { "epoch": 0.45715398885184644, "grad_norm": 0.9832105040550232, "learning_rate": 1.7793544870039763e-05, "loss": 0.1281, "step": 25814 }, { "epoch": 0.4571716983888749, "grad_norm": 0.8232518434524536, "learning_rate": 1.7792699552943983e-05, "loss": 0.0788, "step": 25815 }, { "epoch": 0.4571894079259033, "grad_norm": 0.6415732502937317, "learning_rate": 1.7791854226660507e-05, "loss": 0.0543, "step": 25816 }, { "epoch": 0.45720711746293174, "grad_norm": 0.8193429112434387, "learning_rate": 1.779100889119211e-05, "loss": 0.0782, "step": 25817 }, { "epoch": 0.45722482699996014, "grad_norm": 0.5906147360801697, "learning_rate": 1.779016354654158e-05, "loss": 0.0762, "step": 25818 }, { "epoch": 0.4572425365369886, "grad_norm": 0.5659984946250916, "learning_rate": 1.778931819271169e-05, "loss": 0.0941, "step": 25819 }, { "epoch": 0.457260246074017, "grad_norm": 0.6370632648468018, "learning_rate": 1.7788472829705222e-05, "loss": 0.0642, "step": 25820 }, { "epoch": 0.45727795561104545, "grad_norm": 0.9226956963539124, "learning_rate": 1.7787627457524968e-05, "loss": 0.0661, "step": 25821 }, { "epoch": 0.45729566514807385, "grad_norm": 0.7436676025390625, "learning_rate": 1.7786782076173698e-05, "loss": 0.0684, "step": 25822 }, { "epoch": 0.4573133746851023, "grad_norm": 1.0898370742797852, "learning_rate": 1.77859366856542e-05, "loss": 0.0792, "step": 25823 }, { "epoch": 0.4573310842221307, "grad_norm": 0.6075113415718079, "learning_rate": 1.778509128596925e-05, "loss": 0.104, "step": 25824 }, { "epoch": 0.45734879375915916, "grad_norm": 0.5820805430412292, "learning_rate": 1.7784245877121626e-05, "loss": 0.0702, "step": 25825 }, { "epoch": 0.45736650329618755, "grad_norm": 0.649481475353241, "learning_rate": 1.778340045911412e-05, "loss": 0.0828, "step": 25826 }, { "epoch": 0.457384212833216, "grad_norm": 0.7217544913291931, "learning_rate": 1.778255503194951e-05, "loss": 0.0655, "step": 25827 }, { "epoch": 0.45740192237024446, "grad_norm": 0.8209952712059021, "learning_rate": 1.778170959563057e-05, "loss": 0.1001, "step": 25828 }, { "epoch": 0.45741963190727286, "grad_norm": 0.6628758907318115, "learning_rate": 1.778086415016009e-05, "loss": 0.0587, "step": 25829 }, { "epoch": 0.4574373414443013, "grad_norm": 1.075303077697754, "learning_rate": 1.778001869554085e-05, "loss": 0.0778, "step": 25830 }, { "epoch": 0.4574550509813297, "grad_norm": 0.6819196343421936, "learning_rate": 1.7779173231775625e-05, "loss": 0.0762, "step": 25831 }, { "epoch": 0.45747276051835817, "grad_norm": 0.6814588904380798, "learning_rate": 1.7778327758867208e-05, "loss": 0.0859, "step": 25832 }, { "epoch": 0.45749047005538657, "grad_norm": 0.8331215381622314, "learning_rate": 1.7777482276818364e-05, "loss": 0.0661, "step": 25833 }, { "epoch": 0.457508179592415, "grad_norm": 0.4969019293785095, "learning_rate": 1.7776636785631892e-05, "loss": 0.0402, "step": 25834 }, { "epoch": 0.4575258891294434, "grad_norm": 0.4953894019126892, "learning_rate": 1.777579128531056e-05, "loss": 0.075, "step": 25835 }, { "epoch": 0.4575435986664719, "grad_norm": 0.47432276606559753, "learning_rate": 1.7774945775857163e-05, "loss": 0.0736, "step": 25836 }, { "epoch": 0.45756130820350027, "grad_norm": 0.7219680547714233, "learning_rate": 1.777410025727447e-05, "loss": 0.0581, "step": 25837 }, { "epoch": 0.4575790177405287, "grad_norm": 0.29942983388900757, "learning_rate": 1.777325472956527e-05, "loss": 0.0843, "step": 25838 }, { "epoch": 0.4575967272775571, "grad_norm": 0.6363130807876587, "learning_rate": 1.777240919273234e-05, "loss": 0.0836, "step": 25839 }, { "epoch": 0.4576144368145856, "grad_norm": 0.775862991809845, "learning_rate": 1.777156364677846e-05, "loss": 0.0732, "step": 25840 }, { "epoch": 0.457632146351614, "grad_norm": 0.8297954797744751, "learning_rate": 1.7770718091706426e-05, "loss": 0.0718, "step": 25841 }, { "epoch": 0.45764985588864243, "grad_norm": 0.5722125768661499, "learning_rate": 1.7769872527519005e-05, "loss": 0.0969, "step": 25842 }, { "epoch": 0.4576675654256709, "grad_norm": 0.41621798276901245, "learning_rate": 1.7769026954218983e-05, "loss": 0.0423, "step": 25843 }, { "epoch": 0.4576852749626993, "grad_norm": 0.7261747121810913, "learning_rate": 1.776818137180914e-05, "loss": 0.0779, "step": 25844 }, { "epoch": 0.45770298449972774, "grad_norm": 1.029949426651001, "learning_rate": 1.7767335780292268e-05, "loss": 0.0823, "step": 25845 }, { "epoch": 0.45772069403675614, "grad_norm": 0.6541147232055664, "learning_rate": 1.7766490179671134e-05, "loss": 0.0832, "step": 25846 }, { "epoch": 0.4577384035737846, "grad_norm": 0.9303785562515259, "learning_rate": 1.7765644569948533e-05, "loss": 0.1088, "step": 25847 }, { "epoch": 0.457756113110813, "grad_norm": 0.45103010535240173, "learning_rate": 1.7764798951127233e-05, "loss": 0.0636, "step": 25848 }, { "epoch": 0.45777382264784144, "grad_norm": 0.5946276187896729, "learning_rate": 1.776395332321003e-05, "loss": 0.0739, "step": 25849 }, { "epoch": 0.45779153218486984, "grad_norm": 0.8068044781684875, "learning_rate": 1.77631076861997e-05, "loss": 0.0464, "step": 25850 }, { "epoch": 0.4578092417218983, "grad_norm": 0.8602330088615417, "learning_rate": 1.776226204009902e-05, "loss": 0.0583, "step": 25851 }, { "epoch": 0.4578269512589267, "grad_norm": 0.9665508270263672, "learning_rate": 1.7761416384910785e-05, "loss": 0.1041, "step": 25852 }, { "epoch": 0.45784466079595515, "grad_norm": 0.48567867279052734, "learning_rate": 1.7760570720637764e-05, "loss": 0.0708, "step": 25853 }, { "epoch": 0.45786237033298355, "grad_norm": 0.6180872321128845, "learning_rate": 1.7759725047282745e-05, "loss": 0.0741, "step": 25854 }, { "epoch": 0.457880079870012, "grad_norm": 0.8784725069999695, "learning_rate": 1.775887936484851e-05, "loss": 0.1046, "step": 25855 }, { "epoch": 0.4578977894070404, "grad_norm": 0.7131109237670898, "learning_rate": 1.7758033673337846e-05, "loss": 0.1036, "step": 25856 }, { "epoch": 0.45791549894406886, "grad_norm": 0.6419669985771179, "learning_rate": 1.7757187972753526e-05, "loss": 0.0743, "step": 25857 }, { "epoch": 0.4579332084810973, "grad_norm": 0.9692253470420837, "learning_rate": 1.7756342263098333e-05, "loss": 0.0891, "step": 25858 }, { "epoch": 0.4579509180181257, "grad_norm": 0.48888787627220154, "learning_rate": 1.7755496544375057e-05, "loss": 0.0803, "step": 25859 }, { "epoch": 0.45796862755515416, "grad_norm": 0.3503296673297882, "learning_rate": 1.7754650816586475e-05, "loss": 0.0705, "step": 25860 }, { "epoch": 0.45798633709218256, "grad_norm": 0.7023087739944458, "learning_rate": 1.775380507973537e-05, "loss": 0.0832, "step": 25861 }, { "epoch": 0.458004046629211, "grad_norm": 0.4354090392589569, "learning_rate": 1.7752959333824526e-05, "loss": 0.0702, "step": 25862 }, { "epoch": 0.4580217561662394, "grad_norm": 0.5286896824836731, "learning_rate": 1.7752113578856724e-05, "loss": 0.0615, "step": 25863 }, { "epoch": 0.45803946570326787, "grad_norm": 0.8336315751075745, "learning_rate": 1.7751267814834745e-05, "loss": 0.0838, "step": 25864 }, { "epoch": 0.45805717524029627, "grad_norm": 0.9365374445915222, "learning_rate": 1.7750422041761376e-05, "loss": 0.1166, "step": 25865 }, { "epoch": 0.4580748847773247, "grad_norm": 0.5524207353591919, "learning_rate": 1.7749576259639398e-05, "loss": 0.0731, "step": 25866 }, { "epoch": 0.4580925943143531, "grad_norm": 0.45626360177993774, "learning_rate": 1.774873046847159e-05, "loss": 0.084, "step": 25867 }, { "epoch": 0.4581103038513816, "grad_norm": 0.49158746004104614, "learning_rate": 1.774788466826073e-05, "loss": 0.0803, "step": 25868 }, { "epoch": 0.45812801338840997, "grad_norm": 0.854333758354187, "learning_rate": 1.774703885900962e-05, "loss": 0.0585, "step": 25869 }, { "epoch": 0.4581457229254384, "grad_norm": 0.6711097359657288, "learning_rate": 1.7746193040721018e-05, "loss": 0.1016, "step": 25870 }, { "epoch": 0.4581634324624668, "grad_norm": 0.5228300094604492, "learning_rate": 1.7745347213397726e-05, "loss": 0.0499, "step": 25871 }, { "epoch": 0.4581811419994953, "grad_norm": 0.4392693042755127, "learning_rate": 1.7744501377042515e-05, "loss": 0.0742, "step": 25872 }, { "epoch": 0.45819885153652373, "grad_norm": 0.29767733812332153, "learning_rate": 1.7743655531658177e-05, "loss": 0.0349, "step": 25873 }, { "epoch": 0.45821656107355213, "grad_norm": 0.7675876617431641, "learning_rate": 1.7742809677247487e-05, "loss": 0.0762, "step": 25874 }, { "epoch": 0.4582342706105806, "grad_norm": 0.8953840136528015, "learning_rate": 1.7741963813813233e-05, "loss": 0.1029, "step": 25875 }, { "epoch": 0.458251980147609, "grad_norm": 0.775685727596283, "learning_rate": 1.7741117941358192e-05, "loss": 0.0778, "step": 25876 }, { "epoch": 0.45826968968463744, "grad_norm": 0.4245501756668091, "learning_rate": 1.774027205988515e-05, "loss": 0.061, "step": 25877 }, { "epoch": 0.45828739922166584, "grad_norm": 0.8459892868995667, "learning_rate": 1.7739426169396895e-05, "loss": 0.0921, "step": 25878 }, { "epoch": 0.4583051087586943, "grad_norm": 0.7142456769943237, "learning_rate": 1.77385802698962e-05, "loss": 0.0532, "step": 25879 }, { "epoch": 0.4583228182957227, "grad_norm": 0.925428569316864, "learning_rate": 1.7737734361385852e-05, "loss": 0.0898, "step": 25880 }, { "epoch": 0.45834052783275114, "grad_norm": 0.6857360005378723, "learning_rate": 1.773688844386864e-05, "loss": 0.0827, "step": 25881 }, { "epoch": 0.45835823736977954, "grad_norm": 1.2825982570648193, "learning_rate": 1.7736042517347337e-05, "loss": 0.1017, "step": 25882 }, { "epoch": 0.458375946906808, "grad_norm": 0.6210827827453613, "learning_rate": 1.773519658182473e-05, "loss": 0.0701, "step": 25883 }, { "epoch": 0.4583936564438364, "grad_norm": 0.742876410484314, "learning_rate": 1.7734350637303613e-05, "loss": 0.0824, "step": 25884 }, { "epoch": 0.45841136598086485, "grad_norm": 0.854581892490387, "learning_rate": 1.773350468378675e-05, "loss": 0.1184, "step": 25885 }, { "epoch": 0.45842907551789325, "grad_norm": 0.6139144897460938, "learning_rate": 1.7732658721276936e-05, "loss": 0.0602, "step": 25886 }, { "epoch": 0.4584467850549217, "grad_norm": 0.8865346908569336, "learning_rate": 1.7731812749776948e-05, "loss": 0.0801, "step": 25887 }, { "epoch": 0.45846449459195016, "grad_norm": 0.5430954098701477, "learning_rate": 1.7730966769289574e-05, "loss": 0.078, "step": 25888 }, { "epoch": 0.45848220412897855, "grad_norm": 0.6504746079444885, "learning_rate": 1.7730120779817596e-05, "loss": 0.0952, "step": 25889 }, { "epoch": 0.458499913666007, "grad_norm": 0.570533812046051, "learning_rate": 1.77292747813638e-05, "loss": 0.0553, "step": 25890 }, { "epoch": 0.4585176232030354, "grad_norm": 0.5183939933776855, "learning_rate": 1.772842877393096e-05, "loss": 0.0664, "step": 25891 }, { "epoch": 0.45853533274006386, "grad_norm": 0.26029807329177856, "learning_rate": 1.7727582757521867e-05, "loss": 0.0682, "step": 25892 }, { "epoch": 0.45855304227709226, "grad_norm": 0.9182160496711731, "learning_rate": 1.7726736732139304e-05, "loss": 0.1033, "step": 25893 }, { "epoch": 0.4585707518141207, "grad_norm": 0.6922105550765991, "learning_rate": 1.7725890697786055e-05, "loss": 0.0763, "step": 25894 }, { "epoch": 0.4585884613511491, "grad_norm": 0.556281566619873, "learning_rate": 1.77250446544649e-05, "loss": 0.0582, "step": 25895 }, { "epoch": 0.45860617088817757, "grad_norm": 0.7021956443786621, "learning_rate": 1.772419860217862e-05, "loss": 0.0694, "step": 25896 }, { "epoch": 0.45862388042520597, "grad_norm": 0.5746757984161377, "learning_rate": 1.7723352540930005e-05, "loss": 0.0686, "step": 25897 }, { "epoch": 0.4586415899622344, "grad_norm": 0.6517502665519714, "learning_rate": 1.7722506470721838e-05, "loss": 0.0881, "step": 25898 }, { "epoch": 0.4586592994992628, "grad_norm": 0.7644951939582825, "learning_rate": 1.7721660391556898e-05, "loss": 0.0726, "step": 25899 }, { "epoch": 0.45867700903629127, "grad_norm": 1.0931791067123413, "learning_rate": 1.772081430343797e-05, "loss": 0.091, "step": 25900 }, { "epoch": 0.45869471857331967, "grad_norm": 0.9735203385353088, "learning_rate": 1.7719968206367837e-05, "loss": 0.105, "step": 25901 }, { "epoch": 0.4587124281103481, "grad_norm": 0.7713057398796082, "learning_rate": 1.7719122100349285e-05, "loss": 0.1013, "step": 25902 }, { "epoch": 0.4587301376473766, "grad_norm": 0.5793014764785767, "learning_rate": 1.7718275985385097e-05, "loss": 0.0629, "step": 25903 }, { "epoch": 0.458747847184405, "grad_norm": 0.9858889579772949, "learning_rate": 1.7717429861478054e-05, "loss": 0.0534, "step": 25904 }, { "epoch": 0.45876555672143343, "grad_norm": 0.625789225101471, "learning_rate": 1.7716583728630947e-05, "loss": 0.0743, "step": 25905 }, { "epoch": 0.45878326625846183, "grad_norm": 0.7465884685516357, "learning_rate": 1.7715737586846545e-05, "loss": 0.0792, "step": 25906 }, { "epoch": 0.4588009757954903, "grad_norm": 0.6174581050872803, "learning_rate": 1.771489143612765e-05, "loss": 0.0842, "step": 25907 }, { "epoch": 0.4588186853325187, "grad_norm": 0.9078802466392517, "learning_rate": 1.7714045276477032e-05, "loss": 0.0959, "step": 25908 }, { "epoch": 0.45883639486954714, "grad_norm": 0.42850446701049805, "learning_rate": 1.7713199107897482e-05, "loss": 0.087, "step": 25909 }, { "epoch": 0.45885410440657554, "grad_norm": 0.3734176456928253, "learning_rate": 1.7712352930391776e-05, "loss": 0.0727, "step": 25910 }, { "epoch": 0.458871813943604, "grad_norm": 0.8054925799369812, "learning_rate": 1.7711506743962706e-05, "loss": 0.0714, "step": 25911 }, { "epoch": 0.4588895234806324, "grad_norm": 0.8496475219726562, "learning_rate": 1.7710660548613055e-05, "loss": 0.0809, "step": 25912 }, { "epoch": 0.45890723301766084, "grad_norm": 0.7029772996902466, "learning_rate": 1.7709814344345604e-05, "loss": 0.0756, "step": 25913 }, { "epoch": 0.45892494255468924, "grad_norm": 1.1035130023956299, "learning_rate": 1.7708968131163142e-05, "loss": 0.1037, "step": 25914 }, { "epoch": 0.4589426520917177, "grad_norm": 0.324868768453598, "learning_rate": 1.7708121909068442e-05, "loss": 0.0687, "step": 25915 }, { "epoch": 0.4589603616287461, "grad_norm": 0.6183716654777527, "learning_rate": 1.7707275678064298e-05, "loss": 0.0672, "step": 25916 }, { "epoch": 0.45897807116577455, "grad_norm": 0.5142965912818909, "learning_rate": 1.7706429438153494e-05, "loss": 0.0725, "step": 25917 }, { "epoch": 0.458995780702803, "grad_norm": 0.7826803922653198, "learning_rate": 1.77055831893388e-05, "loss": 0.0859, "step": 25918 }, { "epoch": 0.4590134902398314, "grad_norm": 0.6170554757118225, "learning_rate": 1.7704736931623023e-05, "loss": 0.0856, "step": 25919 }, { "epoch": 0.45903119977685986, "grad_norm": 0.5967049598693848, "learning_rate": 1.770389066500893e-05, "loss": 0.0588, "step": 25920 }, { "epoch": 0.45904890931388825, "grad_norm": 0.7264901995658875, "learning_rate": 1.7703044389499312e-05, "loss": 0.0745, "step": 25921 }, { "epoch": 0.4590666188509167, "grad_norm": 0.5932773351669312, "learning_rate": 1.7702198105096947e-05, "loss": 0.0756, "step": 25922 }, { "epoch": 0.4590843283879451, "grad_norm": 0.34833836555480957, "learning_rate": 1.770135181180463e-05, "loss": 0.067, "step": 25923 }, { "epoch": 0.45910203792497356, "grad_norm": 0.607458770275116, "learning_rate": 1.7700505509625133e-05, "loss": 0.0666, "step": 25924 }, { "epoch": 0.45911974746200196, "grad_norm": 0.7624800801277161, "learning_rate": 1.7699659198561247e-05, "loss": 0.077, "step": 25925 }, { "epoch": 0.4591374569990304, "grad_norm": 0.5508293509483337, "learning_rate": 1.769881287861576e-05, "loss": 0.0546, "step": 25926 }, { "epoch": 0.4591551665360588, "grad_norm": 0.6603132486343384, "learning_rate": 1.7697966549791447e-05, "loss": 0.0591, "step": 25927 }, { "epoch": 0.45917287607308727, "grad_norm": 0.9590770602226257, "learning_rate": 1.76971202120911e-05, "loss": 0.0996, "step": 25928 }, { "epoch": 0.45919058561011566, "grad_norm": 0.6200108528137207, "learning_rate": 1.7696273865517498e-05, "loss": 0.0501, "step": 25929 }, { "epoch": 0.4592082951471441, "grad_norm": 0.9600335359573364, "learning_rate": 1.7695427510073426e-05, "loss": 0.0879, "step": 25930 }, { "epoch": 0.4592260046841725, "grad_norm": 0.7683345079421997, "learning_rate": 1.769458114576167e-05, "loss": 0.0664, "step": 25931 }, { "epoch": 0.45924371422120097, "grad_norm": 1.1495736837387085, "learning_rate": 1.769373477258502e-05, "loss": 0.0829, "step": 25932 }, { "epoch": 0.4592614237582294, "grad_norm": 0.36791062355041504, "learning_rate": 1.7692888390546253e-05, "loss": 0.0681, "step": 25933 }, { "epoch": 0.4592791332952578, "grad_norm": 0.8368340134620667, "learning_rate": 1.7692041999648154e-05, "loss": 0.1011, "step": 25934 }, { "epoch": 0.4592968428322863, "grad_norm": 0.6256344318389893, "learning_rate": 1.7691195599893508e-05, "loss": 0.0767, "step": 25935 }, { "epoch": 0.4593145523693147, "grad_norm": 1.2523187398910522, "learning_rate": 1.76903491912851e-05, "loss": 0.0855, "step": 25936 }, { "epoch": 0.45933226190634313, "grad_norm": 0.6671488881111145, "learning_rate": 1.768950277382572e-05, "loss": 0.0491, "step": 25937 }, { "epoch": 0.45934997144337153, "grad_norm": 0.8030498623847961, "learning_rate": 1.7688656347518145e-05, "loss": 0.0821, "step": 25938 }, { "epoch": 0.4593676809804, "grad_norm": 1.2966669797897339, "learning_rate": 1.7687809912365163e-05, "loss": 0.1023, "step": 25939 }, { "epoch": 0.4593853905174284, "grad_norm": 0.8058144450187683, "learning_rate": 1.768696346836956e-05, "loss": 0.1011, "step": 25940 }, { "epoch": 0.45940310005445684, "grad_norm": 0.989498496055603, "learning_rate": 1.7686117015534113e-05, "loss": 0.0767, "step": 25941 }, { "epoch": 0.45942080959148524, "grad_norm": 1.0759633779525757, "learning_rate": 1.7685270553861623e-05, "loss": 0.0865, "step": 25942 }, { "epoch": 0.4594385191285137, "grad_norm": 0.8392525911331177, "learning_rate": 1.7684424083354854e-05, "loss": 0.0862, "step": 25943 }, { "epoch": 0.4594562286655421, "grad_norm": 0.6644368767738342, "learning_rate": 1.7683577604016604e-05, "loss": 0.0702, "step": 25944 }, { "epoch": 0.45947393820257054, "grad_norm": 0.6589052081108093, "learning_rate": 1.7682731115849656e-05, "loss": 0.0967, "step": 25945 }, { "epoch": 0.45949164773959894, "grad_norm": 0.7134870290756226, "learning_rate": 1.76818846188568e-05, "loss": 0.0674, "step": 25946 }, { "epoch": 0.4595093572766274, "grad_norm": 1.0840588808059692, "learning_rate": 1.7681038113040805e-05, "loss": 0.0775, "step": 25947 }, { "epoch": 0.45952706681365585, "grad_norm": 0.6359695792198181, "learning_rate": 1.7680191598404473e-05, "loss": 0.0663, "step": 25948 }, { "epoch": 0.45954477635068425, "grad_norm": 0.5440465807914734, "learning_rate": 1.767934507495058e-05, "loss": 0.0723, "step": 25949 }, { "epoch": 0.4595624858877127, "grad_norm": 0.6308403611183167, "learning_rate": 1.767849854268191e-05, "loss": 0.0843, "step": 25950 }, { "epoch": 0.4595801954247411, "grad_norm": 0.8477228879928589, "learning_rate": 1.7677652001601255e-05, "loss": 0.0345, "step": 25951 }, { "epoch": 0.45959790496176955, "grad_norm": 0.897936224937439, "learning_rate": 1.767680545171139e-05, "loss": 0.0855, "step": 25952 }, { "epoch": 0.45961561449879795, "grad_norm": 0.6749721765518188, "learning_rate": 1.767595889301511e-05, "loss": 0.0429, "step": 25953 }, { "epoch": 0.4596333240358264, "grad_norm": 0.969599723815918, "learning_rate": 1.767511232551519e-05, "loss": 0.0874, "step": 25954 }, { "epoch": 0.4596510335728548, "grad_norm": 0.5752995014190674, "learning_rate": 1.767426574921443e-05, "loss": 0.0758, "step": 25955 }, { "epoch": 0.45966874310988326, "grad_norm": 0.6627720594406128, "learning_rate": 1.7673419164115603e-05, "loss": 0.0777, "step": 25956 }, { "epoch": 0.45968645264691166, "grad_norm": 0.6966500282287598, "learning_rate": 1.7672572570221497e-05, "loss": 0.0613, "step": 25957 }, { "epoch": 0.4597041621839401, "grad_norm": 0.9261645674705505, "learning_rate": 1.7671725967534897e-05, "loss": 0.0687, "step": 25958 }, { "epoch": 0.4597218717209685, "grad_norm": 0.7024922370910645, "learning_rate": 1.7670879356058586e-05, "loss": 0.0801, "step": 25959 }, { "epoch": 0.45973958125799697, "grad_norm": 0.7390472888946533, "learning_rate": 1.767003273579536e-05, "loss": 0.0752, "step": 25960 }, { "epoch": 0.45975729079502536, "grad_norm": 0.7121254205703735, "learning_rate": 1.7669186106747986e-05, "loss": 0.07, "step": 25961 }, { "epoch": 0.4597750003320538, "grad_norm": 0.9137840867042542, "learning_rate": 1.7668339468919267e-05, "loss": 0.0729, "step": 25962 }, { "epoch": 0.4597927098690823, "grad_norm": 0.3862372636795044, "learning_rate": 1.766749282231198e-05, "loss": 0.0621, "step": 25963 }, { "epoch": 0.45981041940611067, "grad_norm": 0.4790850579738617, "learning_rate": 1.7666646166928907e-05, "loss": 0.0675, "step": 25964 }, { "epoch": 0.4598281289431391, "grad_norm": 0.8597264885902405, "learning_rate": 1.7665799502772845e-05, "loss": 0.1129, "step": 25965 }, { "epoch": 0.4598458384801675, "grad_norm": 0.5773606896400452, "learning_rate": 1.7664952829846567e-05, "loss": 0.0637, "step": 25966 }, { "epoch": 0.459863548017196, "grad_norm": 0.7830582857131958, "learning_rate": 1.7664106148152868e-05, "loss": 0.1012, "step": 25967 }, { "epoch": 0.4598812575542244, "grad_norm": 0.750546395778656, "learning_rate": 1.7663259457694523e-05, "loss": 0.0672, "step": 25968 }, { "epoch": 0.45989896709125283, "grad_norm": 0.8040508031845093, "learning_rate": 1.7662412758474326e-05, "loss": 0.0807, "step": 25969 }, { "epoch": 0.45991667662828123, "grad_norm": 0.6962705254554749, "learning_rate": 1.766156605049506e-05, "loss": 0.0669, "step": 25970 }, { "epoch": 0.4599343861653097, "grad_norm": 0.9324262738227844, "learning_rate": 1.7660719333759517e-05, "loss": 0.0737, "step": 25971 }, { "epoch": 0.4599520957023381, "grad_norm": 0.6261630654335022, "learning_rate": 1.765987260827047e-05, "loss": 0.0878, "step": 25972 }, { "epoch": 0.45996980523936654, "grad_norm": 0.7383919358253479, "learning_rate": 1.7659025874030713e-05, "loss": 0.0799, "step": 25973 }, { "epoch": 0.45998751477639493, "grad_norm": 0.711495041847229, "learning_rate": 1.765817913104303e-05, "loss": 0.082, "step": 25974 }, { "epoch": 0.4600052243134234, "grad_norm": 0.5614613890647888, "learning_rate": 1.7657332379310206e-05, "loss": 0.045, "step": 25975 }, { "epoch": 0.4600229338504518, "grad_norm": 0.6532325148582458, "learning_rate": 1.765648561883503e-05, "loss": 0.0713, "step": 25976 }, { "epoch": 0.46004064338748024, "grad_norm": 0.44645971059799194, "learning_rate": 1.765563884962028e-05, "loss": 0.0728, "step": 25977 }, { "epoch": 0.4600583529245087, "grad_norm": 0.8025312423706055, "learning_rate": 1.765479207166875e-05, "loss": 0.0815, "step": 25978 }, { "epoch": 0.4600760624615371, "grad_norm": 0.3057026267051697, "learning_rate": 1.765394528498322e-05, "loss": 0.0473, "step": 25979 }, { "epoch": 0.46009377199856555, "grad_norm": 0.491940438747406, "learning_rate": 1.7653098489566483e-05, "loss": 0.0619, "step": 25980 }, { "epoch": 0.46011148153559395, "grad_norm": 1.069377064704895, "learning_rate": 1.765225168542132e-05, "loss": 0.0944, "step": 25981 }, { "epoch": 0.4601291910726224, "grad_norm": 0.6046686768531799, "learning_rate": 1.7651404872550512e-05, "loss": 0.1133, "step": 25982 }, { "epoch": 0.4601469006096508, "grad_norm": 0.6634771227836609, "learning_rate": 1.7650558050956853e-05, "loss": 0.0727, "step": 25983 }, { "epoch": 0.46016461014667925, "grad_norm": 0.46619096398353577, "learning_rate": 1.7649711220643123e-05, "loss": 0.0657, "step": 25984 }, { "epoch": 0.46018231968370765, "grad_norm": 0.592929482460022, "learning_rate": 1.764886438161212e-05, "loss": 0.0771, "step": 25985 }, { "epoch": 0.4602000292207361, "grad_norm": 0.846217691898346, "learning_rate": 1.7648017533866612e-05, "loss": 0.0612, "step": 25986 }, { "epoch": 0.4602177387577645, "grad_norm": 0.6773446798324585, "learning_rate": 1.7647170677409396e-05, "loss": 0.0683, "step": 25987 }, { "epoch": 0.46023544829479296, "grad_norm": 0.6277856230735779, "learning_rate": 1.764632381224326e-05, "loss": 0.0681, "step": 25988 }, { "epoch": 0.46025315783182136, "grad_norm": 0.6673673391342163, "learning_rate": 1.7645476938370987e-05, "loss": 0.0588, "step": 25989 }, { "epoch": 0.4602708673688498, "grad_norm": 0.790406346321106, "learning_rate": 1.764463005579536e-05, "loss": 0.1085, "step": 25990 }, { "epoch": 0.4602885769058782, "grad_norm": 0.9008472561836243, "learning_rate": 1.7643783164519164e-05, "loss": 0.0465, "step": 25991 }, { "epoch": 0.46030628644290666, "grad_norm": 0.33223074674606323, "learning_rate": 1.7642936264545193e-05, "loss": 0.0647, "step": 25992 }, { "epoch": 0.4603239959799351, "grad_norm": 0.6235498785972595, "learning_rate": 1.7642089355876226e-05, "loss": 0.0577, "step": 25993 }, { "epoch": 0.4603417055169635, "grad_norm": 1.0886731147766113, "learning_rate": 1.7641242438515057e-05, "loss": 0.0826, "step": 25994 }, { "epoch": 0.46035941505399197, "grad_norm": 0.7021464705467224, "learning_rate": 1.7640395512464465e-05, "loss": 0.0684, "step": 25995 }, { "epoch": 0.46037712459102037, "grad_norm": 0.3987496495246887, "learning_rate": 1.763954857772724e-05, "loss": 0.063, "step": 25996 }, { "epoch": 0.4603948341280488, "grad_norm": 0.7421777248382568, "learning_rate": 1.7638701634306163e-05, "loss": 0.0975, "step": 25997 }, { "epoch": 0.4604125436650772, "grad_norm": 0.55899578332901, "learning_rate": 1.7637854682204026e-05, "loss": 0.0709, "step": 25998 }, { "epoch": 0.4604302532021057, "grad_norm": 0.8090215921401978, "learning_rate": 1.7637007721423616e-05, "loss": 0.0502, "step": 25999 }, { "epoch": 0.4604479627391341, "grad_norm": 0.7286496162414551, "learning_rate": 1.7636160751967715e-05, "loss": 0.0641, "step": 26000 }, { "epoch": 0.46046567227616253, "grad_norm": 0.8982388973236084, "learning_rate": 1.7635313773839112e-05, "loss": 0.0941, "step": 26001 }, { "epoch": 0.46048338181319093, "grad_norm": 1.0283448696136475, "learning_rate": 1.7634466787040594e-05, "loss": 0.1091, "step": 26002 }, { "epoch": 0.4605010913502194, "grad_norm": 1.0340337753295898, "learning_rate": 1.763361979157495e-05, "loss": 0.1133, "step": 26003 }, { "epoch": 0.4605188008872478, "grad_norm": 0.5959938168525696, "learning_rate": 1.763277278744496e-05, "loss": 0.0757, "step": 26004 }, { "epoch": 0.46053651042427624, "grad_norm": 0.5549719929695129, "learning_rate": 1.7631925774653414e-05, "loss": 0.0847, "step": 26005 }, { "epoch": 0.46055421996130463, "grad_norm": 0.8193151354789734, "learning_rate": 1.7631078753203097e-05, "loss": 0.0813, "step": 26006 }, { "epoch": 0.4605719294983331, "grad_norm": 0.5775253772735596, "learning_rate": 1.7630231723096798e-05, "loss": 0.0918, "step": 26007 }, { "epoch": 0.46058963903536154, "grad_norm": 0.8715527057647705, "learning_rate": 1.7629384684337306e-05, "loss": 0.0731, "step": 26008 }, { "epoch": 0.46060734857238994, "grad_norm": 0.4151131212711334, "learning_rate": 1.7628537636927397e-05, "loss": 0.0736, "step": 26009 }, { "epoch": 0.4606250581094184, "grad_norm": 0.6404525637626648, "learning_rate": 1.7627690580869876e-05, "loss": 0.0579, "step": 26010 }, { "epoch": 0.4606427676464468, "grad_norm": 0.519688606262207, "learning_rate": 1.762684351616751e-05, "loss": 0.0579, "step": 26011 }, { "epoch": 0.46066047718347525, "grad_norm": 0.9330958127975464, "learning_rate": 1.7625996442823095e-05, "loss": 0.0707, "step": 26012 }, { "epoch": 0.46067818672050365, "grad_norm": 0.9242390394210815, "learning_rate": 1.762514936083942e-05, "loss": 0.0872, "step": 26013 }, { "epoch": 0.4606958962575321, "grad_norm": 0.9245863556861877, "learning_rate": 1.762430227021927e-05, "loss": 0.1035, "step": 26014 }, { "epoch": 0.4607136057945605, "grad_norm": 1.2326947450637817, "learning_rate": 1.7623455170965427e-05, "loss": 0.1187, "step": 26015 }, { "epoch": 0.46073131533158895, "grad_norm": 0.5693238973617554, "learning_rate": 1.7622608063080686e-05, "loss": 0.0631, "step": 26016 }, { "epoch": 0.46074902486861735, "grad_norm": 0.7127512693405151, "learning_rate": 1.7621760946567828e-05, "loss": 0.0692, "step": 26017 }, { "epoch": 0.4607667344056458, "grad_norm": 0.8143483996391296, "learning_rate": 1.762091382142964e-05, "loss": 0.1039, "step": 26018 }, { "epoch": 0.4607844439426742, "grad_norm": 0.4815535843372345, "learning_rate": 1.7620066687668914e-05, "loss": 0.0596, "step": 26019 }, { "epoch": 0.46080215347970266, "grad_norm": 0.8668257594108582, "learning_rate": 1.761921954528843e-05, "loss": 0.0696, "step": 26020 }, { "epoch": 0.4608198630167311, "grad_norm": 0.45708274841308594, "learning_rate": 1.7618372394290978e-05, "loss": 0.1006, "step": 26021 }, { "epoch": 0.4608375725537595, "grad_norm": 0.5124131441116333, "learning_rate": 1.7617525234679352e-05, "loss": 0.0721, "step": 26022 }, { "epoch": 0.46085528209078797, "grad_norm": 0.6947694420814514, "learning_rate": 1.7616678066456328e-05, "loss": 0.0538, "step": 26023 }, { "epoch": 0.46087299162781636, "grad_norm": 1.0029346942901611, "learning_rate": 1.7615830889624702e-05, "loss": 0.0761, "step": 26024 }, { "epoch": 0.4608907011648448, "grad_norm": 0.47798314690589905, "learning_rate": 1.761498370418725e-05, "loss": 0.0906, "step": 26025 }, { "epoch": 0.4609084107018732, "grad_norm": 0.612300455570221, "learning_rate": 1.7614136510146768e-05, "loss": 0.1448, "step": 26026 }, { "epoch": 0.46092612023890167, "grad_norm": 0.952204167842865, "learning_rate": 1.7613289307506048e-05, "loss": 0.0853, "step": 26027 }, { "epoch": 0.46094382977593007, "grad_norm": 0.8514330387115479, "learning_rate": 1.7612442096267862e-05, "loss": 0.0597, "step": 26028 }, { "epoch": 0.4609615393129585, "grad_norm": 0.6553100943565369, "learning_rate": 1.7611594876435012e-05, "loss": 0.0751, "step": 26029 }, { "epoch": 0.4609792488499869, "grad_norm": 0.6874003410339355, "learning_rate": 1.761074764801027e-05, "loss": 0.0862, "step": 26030 }, { "epoch": 0.4609969583870154, "grad_norm": 0.5117431879043579, "learning_rate": 1.760990041099644e-05, "loss": 0.1025, "step": 26031 }, { "epoch": 0.4610146679240438, "grad_norm": 0.8811970949172974, "learning_rate": 1.76090531653963e-05, "loss": 0.0905, "step": 26032 }, { "epoch": 0.46103237746107223, "grad_norm": 0.6003018617630005, "learning_rate": 1.7608205911212643e-05, "loss": 0.0855, "step": 26033 }, { "epoch": 0.46105008699810063, "grad_norm": 1.003915786743164, "learning_rate": 1.7607358648448247e-05, "loss": 0.106, "step": 26034 }, { "epoch": 0.4610677965351291, "grad_norm": 0.5951852798461914, "learning_rate": 1.7606511377105904e-05, "loss": 0.0607, "step": 26035 }, { "epoch": 0.46108550607215754, "grad_norm": 0.544061005115509, "learning_rate": 1.7605664097188406e-05, "loss": 0.0448, "step": 26036 }, { "epoch": 0.46110321560918593, "grad_norm": 0.6983654499053955, "learning_rate": 1.7604816808698535e-05, "loss": 0.0916, "step": 26037 }, { "epoch": 0.4611209251462144, "grad_norm": 0.6318293809890747, "learning_rate": 1.7603969511639083e-05, "loss": 0.0961, "step": 26038 }, { "epoch": 0.4611386346832428, "grad_norm": 1.3811739683151245, "learning_rate": 1.760312220601283e-05, "loss": 0.0532, "step": 26039 }, { "epoch": 0.46115634422027124, "grad_norm": 0.5122222900390625, "learning_rate": 1.7602274891822567e-05, "loss": 0.0759, "step": 26040 }, { "epoch": 0.46117405375729964, "grad_norm": 0.6909933090209961, "learning_rate": 1.7601427569071087e-05, "loss": 0.0519, "step": 26041 }, { "epoch": 0.4611917632943281, "grad_norm": 0.5860295295715332, "learning_rate": 1.7600580237761177e-05, "loss": 0.085, "step": 26042 }, { "epoch": 0.4612094728313565, "grad_norm": 0.6468679904937744, "learning_rate": 1.7599732897895617e-05, "loss": 0.0736, "step": 26043 }, { "epoch": 0.46122718236838495, "grad_norm": 0.7830149531364441, "learning_rate": 1.7598885549477192e-05, "loss": 0.0688, "step": 26044 }, { "epoch": 0.46124489190541335, "grad_norm": 0.48096397519111633, "learning_rate": 1.7598038192508705e-05, "loss": 0.0477, "step": 26045 }, { "epoch": 0.4612626014424418, "grad_norm": 1.3169454336166382, "learning_rate": 1.759719082699293e-05, "loss": 0.1003, "step": 26046 }, { "epoch": 0.4612803109794702, "grad_norm": 0.5918534994125366, "learning_rate": 1.7596343452932665e-05, "loss": 0.085, "step": 26047 }, { "epoch": 0.46129802051649865, "grad_norm": 0.548341691493988, "learning_rate": 1.759549607033069e-05, "loss": 0.082, "step": 26048 }, { "epoch": 0.46131573005352705, "grad_norm": 0.9835955500602722, "learning_rate": 1.7594648679189795e-05, "loss": 0.1093, "step": 26049 }, { "epoch": 0.4613334395905555, "grad_norm": 0.7766850590705872, "learning_rate": 1.759380127951277e-05, "loss": 0.1089, "step": 26050 }, { "epoch": 0.46135114912758396, "grad_norm": 0.5519850254058838, "learning_rate": 1.75929538713024e-05, "loss": 0.0455, "step": 26051 }, { "epoch": 0.46136885866461236, "grad_norm": 0.6994345188140869, "learning_rate": 1.7592106454561475e-05, "loss": 0.0573, "step": 26052 }, { "epoch": 0.4613865682016408, "grad_norm": 0.41038402915000916, "learning_rate": 1.759125902929278e-05, "loss": 0.0502, "step": 26053 }, { "epoch": 0.4614042777386692, "grad_norm": 0.41372376680374146, "learning_rate": 1.7590411595499104e-05, "loss": 0.0819, "step": 26054 }, { "epoch": 0.46142198727569766, "grad_norm": 0.7720922827720642, "learning_rate": 1.758956415318324e-05, "loss": 0.0779, "step": 26055 }, { "epoch": 0.46143969681272606, "grad_norm": 0.810383141040802, "learning_rate": 1.758871670234797e-05, "loss": 0.0797, "step": 26056 }, { "epoch": 0.4614574063497545, "grad_norm": 0.5653863549232483, "learning_rate": 1.758786924299608e-05, "loss": 0.0728, "step": 26057 }, { "epoch": 0.4614751158867829, "grad_norm": 0.9739656448364258, "learning_rate": 1.758702177513037e-05, "loss": 0.1026, "step": 26058 }, { "epoch": 0.46149282542381137, "grad_norm": 0.6233146786689758, "learning_rate": 1.7586174298753613e-05, "loss": 0.0641, "step": 26059 }, { "epoch": 0.46151053496083977, "grad_norm": 1.0920497179031372, "learning_rate": 1.7585326813868606e-05, "loss": 0.1197, "step": 26060 }, { "epoch": 0.4615282444978682, "grad_norm": 0.7621757388114929, "learning_rate": 1.7584479320478138e-05, "loss": 0.0909, "step": 26061 }, { "epoch": 0.4615459540348966, "grad_norm": 0.8404417037963867, "learning_rate": 1.758363181858499e-05, "loss": 0.091, "step": 26062 }, { "epoch": 0.4615636635719251, "grad_norm": 0.5283723473548889, "learning_rate": 1.758278430819196e-05, "loss": 0.0659, "step": 26063 }, { "epoch": 0.4615813731089535, "grad_norm": 0.7149242758750916, "learning_rate": 1.7581936789301825e-05, "loss": 0.1117, "step": 26064 }, { "epoch": 0.46159908264598193, "grad_norm": 0.9758396148681641, "learning_rate": 1.7581089261917383e-05, "loss": 0.0918, "step": 26065 }, { "epoch": 0.4616167921830104, "grad_norm": 0.4312394857406616, "learning_rate": 1.7580241726041416e-05, "loss": 0.0896, "step": 26066 }, { "epoch": 0.4616345017200388, "grad_norm": 0.6526073217391968, "learning_rate": 1.757939418167672e-05, "loss": 0.0628, "step": 26067 }, { "epoch": 0.46165221125706724, "grad_norm": 0.5998799204826355, "learning_rate": 1.757854662882607e-05, "loss": 0.0752, "step": 26068 }, { "epoch": 0.46166992079409563, "grad_norm": 0.787061333656311, "learning_rate": 1.7577699067492266e-05, "loss": 0.0986, "step": 26069 }, { "epoch": 0.4616876303311241, "grad_norm": 0.5785632729530334, "learning_rate": 1.7576851497678095e-05, "loss": 0.052, "step": 26070 }, { "epoch": 0.4617053398681525, "grad_norm": 0.2433759868144989, "learning_rate": 1.757600391938634e-05, "loss": 0.0701, "step": 26071 }, { "epoch": 0.46172304940518094, "grad_norm": 1.5868264436721802, "learning_rate": 1.7575156332619796e-05, "loss": 0.0928, "step": 26072 }, { "epoch": 0.46174075894220934, "grad_norm": 0.8626255393028259, "learning_rate": 1.7574308737381243e-05, "loss": 0.0868, "step": 26073 }, { "epoch": 0.4617584684792378, "grad_norm": 0.4004860520362854, "learning_rate": 1.757346113367348e-05, "loss": 0.0532, "step": 26074 }, { "epoch": 0.4617761780162662, "grad_norm": 0.5679993033409119, "learning_rate": 1.7572613521499287e-05, "loss": 0.0795, "step": 26075 }, { "epoch": 0.46179388755329465, "grad_norm": 0.6764345765113831, "learning_rate": 1.7571765900861455e-05, "loss": 0.0663, "step": 26076 }, { "epoch": 0.46181159709032304, "grad_norm": 0.7576676607131958, "learning_rate": 1.7570918271762777e-05, "loss": 0.0653, "step": 26077 }, { "epoch": 0.4618293066273515, "grad_norm": 0.5852075219154358, "learning_rate": 1.7570070634206034e-05, "loss": 0.0546, "step": 26078 }, { "epoch": 0.4618470161643799, "grad_norm": 0.8541864156723022, "learning_rate": 1.7569222988194025e-05, "loss": 0.0978, "step": 26079 }, { "epoch": 0.46186472570140835, "grad_norm": 0.47041332721710205, "learning_rate": 1.7568375333729525e-05, "loss": 0.0589, "step": 26080 }, { "epoch": 0.4618824352384368, "grad_norm": 0.7192929983139038, "learning_rate": 1.7567527670815333e-05, "loss": 0.0879, "step": 26081 }, { "epoch": 0.4619001447754652, "grad_norm": 0.981793224811554, "learning_rate": 1.7566679999454234e-05, "loss": 0.0855, "step": 26082 }, { "epoch": 0.46191785431249366, "grad_norm": 0.6828164458274841, "learning_rate": 1.7565832319649016e-05, "loss": 0.1096, "step": 26083 }, { "epoch": 0.46193556384952206, "grad_norm": 0.4777863621711731, "learning_rate": 1.7564984631402474e-05, "loss": 0.0504, "step": 26084 }, { "epoch": 0.4619532733865505, "grad_norm": 0.6381756067276001, "learning_rate": 1.7564136934717387e-05, "loss": 0.0687, "step": 26085 }, { "epoch": 0.4619709829235789, "grad_norm": 0.49004217982292175, "learning_rate": 1.7563289229596556e-05, "loss": 0.0527, "step": 26086 }, { "epoch": 0.46198869246060736, "grad_norm": 0.7992522120475769, "learning_rate": 1.7562441516042753e-05, "loss": 0.0923, "step": 26087 }, { "epoch": 0.46200640199763576, "grad_norm": 1.1383544206619263, "learning_rate": 1.756159379405878e-05, "loss": 0.0499, "step": 26088 }, { "epoch": 0.4620241115346642, "grad_norm": 0.4701741933822632, "learning_rate": 1.7560746063647423e-05, "loss": 0.0847, "step": 26089 }, { "epoch": 0.4620418210716926, "grad_norm": 0.4415719509124756, "learning_rate": 1.7559898324811472e-05, "loss": 0.0661, "step": 26090 }, { "epoch": 0.46205953060872107, "grad_norm": 0.6676641702651978, "learning_rate": 1.7559050577553717e-05, "loss": 0.0511, "step": 26091 }, { "epoch": 0.46207724014574947, "grad_norm": 0.6474044322967529, "learning_rate": 1.7558202821876936e-05, "loss": 0.0732, "step": 26092 }, { "epoch": 0.4620949496827779, "grad_norm": 0.6150479912757874, "learning_rate": 1.7557355057783933e-05, "loss": 0.069, "step": 26093 }, { "epoch": 0.4621126592198063, "grad_norm": 0.905784547328949, "learning_rate": 1.7556507285277488e-05, "loss": 0.0975, "step": 26094 }, { "epoch": 0.4621303687568348, "grad_norm": 0.4738767147064209, "learning_rate": 1.7555659504360395e-05, "loss": 0.0575, "step": 26095 }, { "epoch": 0.46214807829386323, "grad_norm": 0.5397933721542358, "learning_rate": 1.755481171503544e-05, "loss": 0.0331, "step": 26096 }, { "epoch": 0.46216578783089163, "grad_norm": 0.5488073229789734, "learning_rate": 1.7553963917305406e-05, "loss": 0.0684, "step": 26097 }, { "epoch": 0.4621834973679201, "grad_norm": 0.4566202759742737, "learning_rate": 1.7553116111173098e-05, "loss": 0.0961, "step": 26098 }, { "epoch": 0.4622012069049485, "grad_norm": 0.49950921535491943, "learning_rate": 1.7552268296641293e-05, "loss": 0.0704, "step": 26099 }, { "epoch": 0.46221891644197693, "grad_norm": 0.48950105905532837, "learning_rate": 1.755142047371279e-05, "loss": 0.0764, "step": 26100 }, { "epoch": 0.46223662597900533, "grad_norm": 0.6851563453674316, "learning_rate": 1.7550572642390357e-05, "loss": 0.0703, "step": 26101 }, { "epoch": 0.4622543355160338, "grad_norm": 0.7143093943595886, "learning_rate": 1.754972480267681e-05, "loss": 0.0748, "step": 26102 }, { "epoch": 0.4622720450530622, "grad_norm": 0.4992792308330536, "learning_rate": 1.7548876954574922e-05, "loss": 0.0747, "step": 26103 }, { "epoch": 0.46228975459009064, "grad_norm": 0.28762945532798767, "learning_rate": 1.7548029098087487e-05, "loss": 0.0699, "step": 26104 }, { "epoch": 0.46230746412711904, "grad_norm": 0.3809664249420166, "learning_rate": 1.7547181233217293e-05, "loss": 0.0573, "step": 26105 }, { "epoch": 0.4623251736641475, "grad_norm": 0.434540718793869, "learning_rate": 1.754633335996713e-05, "loss": 0.0668, "step": 26106 }, { "epoch": 0.4623428832011759, "grad_norm": 0.5201095938682556, "learning_rate": 1.7545485478339792e-05, "loss": 0.0775, "step": 26107 }, { "epoch": 0.46236059273820435, "grad_norm": 0.7316892743110657, "learning_rate": 1.754463758833806e-05, "loss": 0.0551, "step": 26108 }, { "epoch": 0.46237830227523274, "grad_norm": 0.7239928245544434, "learning_rate": 1.7543789689964734e-05, "loss": 0.0744, "step": 26109 }, { "epoch": 0.4623960118122612, "grad_norm": 0.6926124095916748, "learning_rate": 1.754294178322259e-05, "loss": 0.1066, "step": 26110 }, { "epoch": 0.46241372134928965, "grad_norm": 0.33478546142578125, "learning_rate": 1.7542093868114425e-05, "loss": 0.066, "step": 26111 }, { "epoch": 0.46243143088631805, "grad_norm": 1.0998822450637817, "learning_rate": 1.7541245944643032e-05, "loss": 0.0835, "step": 26112 }, { "epoch": 0.4624491404233465, "grad_norm": 1.084187626838684, "learning_rate": 1.7540398012811197e-05, "loss": 0.065, "step": 26113 }, { "epoch": 0.4624668499603749, "grad_norm": 0.9086903929710388, "learning_rate": 1.753955007262171e-05, "loss": 0.0913, "step": 26114 }, { "epoch": 0.46248455949740336, "grad_norm": 0.829964816570282, "learning_rate": 1.7538702124077354e-05, "loss": 0.0897, "step": 26115 }, { "epoch": 0.46250226903443176, "grad_norm": 0.7420592904090881, "learning_rate": 1.753785416718093e-05, "loss": 0.0471, "step": 26116 }, { "epoch": 0.4625199785714602, "grad_norm": 0.735634982585907, "learning_rate": 1.753700620193522e-05, "loss": 0.0621, "step": 26117 }, { "epoch": 0.4625376881084886, "grad_norm": 0.4194587469100952, "learning_rate": 1.7536158228343018e-05, "loss": 0.0663, "step": 26118 }, { "epoch": 0.46255539764551706, "grad_norm": 0.7502721548080444, "learning_rate": 1.753531024640711e-05, "loss": 0.0708, "step": 26119 }, { "epoch": 0.46257310718254546, "grad_norm": 0.6390182375907898, "learning_rate": 1.7534462256130294e-05, "loss": 0.0912, "step": 26120 }, { "epoch": 0.4625908167195739, "grad_norm": 0.6147060394287109, "learning_rate": 1.7533614257515342e-05, "loss": 0.0734, "step": 26121 }, { "epoch": 0.4626085262566023, "grad_norm": 0.6687071919441223, "learning_rate": 1.7532766250565068e-05, "loss": 0.0697, "step": 26122 }, { "epoch": 0.46262623579363077, "grad_norm": 0.40646106004714966, "learning_rate": 1.7531918235282246e-05, "loss": 0.0431, "step": 26123 }, { "epoch": 0.46264394533065917, "grad_norm": 0.9627699255943298, "learning_rate": 1.7531070211669664e-05, "loss": 0.0881, "step": 26124 }, { "epoch": 0.4626616548676876, "grad_norm": 0.6244025230407715, "learning_rate": 1.7530222179730116e-05, "loss": 0.0498, "step": 26125 }, { "epoch": 0.4626793644047161, "grad_norm": 1.7517166137695312, "learning_rate": 1.7529374139466398e-05, "loss": 0.0742, "step": 26126 }, { "epoch": 0.4626970739417445, "grad_norm": 0.8532721996307373, "learning_rate": 1.7528526090881293e-05, "loss": 0.0689, "step": 26127 }, { "epoch": 0.46271478347877293, "grad_norm": 0.4235754907131195, "learning_rate": 1.7527678033977593e-05, "loss": 0.0481, "step": 26128 }, { "epoch": 0.4627324930158013, "grad_norm": 0.6967424154281616, "learning_rate": 1.752682996875809e-05, "loss": 0.0526, "step": 26129 }, { "epoch": 0.4627502025528298, "grad_norm": 0.7432126998901367, "learning_rate": 1.752598189522557e-05, "loss": 0.0503, "step": 26130 }, { "epoch": 0.4627679120898582, "grad_norm": 0.837906002998352, "learning_rate": 1.752513381338282e-05, "loss": 0.0696, "step": 26131 }, { "epoch": 0.46278562162688663, "grad_norm": 0.8856038451194763, "learning_rate": 1.7524285723232644e-05, "loss": 0.0867, "step": 26132 }, { "epoch": 0.46280333116391503, "grad_norm": 0.7861909866333008, "learning_rate": 1.7523437624777818e-05, "loss": 0.0778, "step": 26133 }, { "epoch": 0.4628210407009435, "grad_norm": 0.545917809009552, "learning_rate": 1.7522589518021143e-05, "loss": 0.0506, "step": 26134 }, { "epoch": 0.4628387502379719, "grad_norm": 0.49578389525413513, "learning_rate": 1.7521741402965392e-05, "loss": 0.0741, "step": 26135 }, { "epoch": 0.46285645977500034, "grad_norm": 0.4661608934402466, "learning_rate": 1.7520893279613376e-05, "loss": 0.0466, "step": 26136 }, { "epoch": 0.46287416931202874, "grad_norm": 0.446735143661499, "learning_rate": 1.7520045147967875e-05, "loss": 0.0784, "step": 26137 }, { "epoch": 0.4628918788490572, "grad_norm": 1.6206530332565308, "learning_rate": 1.7519197008031683e-05, "loss": 0.0964, "step": 26138 }, { "epoch": 0.4629095883860856, "grad_norm": 0.4026810824871063, "learning_rate": 1.7518348859807586e-05, "loss": 0.0694, "step": 26139 }, { "epoch": 0.46292729792311405, "grad_norm": 0.7224300503730774, "learning_rate": 1.751750070329837e-05, "loss": 0.0772, "step": 26140 }, { "epoch": 0.4629450074601425, "grad_norm": 0.9252074956893921, "learning_rate": 1.7516652538506837e-05, "loss": 0.1134, "step": 26141 }, { "epoch": 0.4629627169971709, "grad_norm": 0.5282242298126221, "learning_rate": 1.751580436543577e-05, "loss": 0.0464, "step": 26142 }, { "epoch": 0.46298042653419935, "grad_norm": 1.3195347785949707, "learning_rate": 1.7514956184087965e-05, "loss": 0.0796, "step": 26143 }, { "epoch": 0.46299813607122775, "grad_norm": 1.3799927234649658, "learning_rate": 1.7514107994466202e-05, "loss": 0.067, "step": 26144 }, { "epoch": 0.4630158456082562, "grad_norm": 0.6854498982429504, "learning_rate": 1.7513259796573282e-05, "loss": 0.0536, "step": 26145 }, { "epoch": 0.4630335551452846, "grad_norm": 1.0448157787322998, "learning_rate": 1.7512411590411992e-05, "loss": 0.0742, "step": 26146 }, { "epoch": 0.46305126468231306, "grad_norm": 0.594990611076355, "learning_rate": 1.751156337598512e-05, "loss": 0.0666, "step": 26147 }, { "epoch": 0.46306897421934146, "grad_norm": 0.8493761420249939, "learning_rate": 1.751071515329546e-05, "loss": 0.0794, "step": 26148 }, { "epoch": 0.4630866837563699, "grad_norm": 1.016624093055725, "learning_rate": 1.75098669223458e-05, "loss": 0.116, "step": 26149 }, { "epoch": 0.4631043932933983, "grad_norm": 1.4147361516952515, "learning_rate": 1.750901868313893e-05, "loss": 0.0944, "step": 26150 }, { "epoch": 0.46312210283042676, "grad_norm": 1.1312168836593628, "learning_rate": 1.7508170435677645e-05, "loss": 0.0936, "step": 26151 }, { "epoch": 0.46313981236745516, "grad_norm": 0.7199474573135376, "learning_rate": 1.7507322179964734e-05, "loss": 0.087, "step": 26152 }, { "epoch": 0.4631575219044836, "grad_norm": 0.5832982659339905, "learning_rate": 1.7506473916002983e-05, "loss": 0.0784, "step": 26153 }, { "epoch": 0.463175231441512, "grad_norm": 1.0267541408538818, "learning_rate": 1.7505625643795187e-05, "loss": 0.0962, "step": 26154 }, { "epoch": 0.46319294097854047, "grad_norm": 0.5720481872558594, "learning_rate": 1.750477736334414e-05, "loss": 0.0801, "step": 26155 }, { "epoch": 0.4632106505155689, "grad_norm": 0.6779751777648926, "learning_rate": 1.750392907465262e-05, "loss": 0.0465, "step": 26156 }, { "epoch": 0.4632283600525973, "grad_norm": 0.8861123323440552, "learning_rate": 1.7503080777723437e-05, "loss": 0.0873, "step": 26157 }, { "epoch": 0.4632460695896258, "grad_norm": 0.9915681481361389, "learning_rate": 1.7502232472559367e-05, "loss": 0.0891, "step": 26158 }, { "epoch": 0.4632637791266542, "grad_norm": 0.9366528987884521, "learning_rate": 1.7501384159163204e-05, "loss": 0.1031, "step": 26159 }, { "epoch": 0.46328148866368263, "grad_norm": 0.7826785445213318, "learning_rate": 1.7500535837537744e-05, "loss": 0.064, "step": 26160 }, { "epoch": 0.463299198200711, "grad_norm": 1.1086609363555908, "learning_rate": 1.749968750768577e-05, "loss": 0.1063, "step": 26161 }, { "epoch": 0.4633169077377395, "grad_norm": 0.7873758673667908, "learning_rate": 1.749883916961008e-05, "loss": 0.0615, "step": 26162 }, { "epoch": 0.4633346172747679, "grad_norm": 0.5684881806373596, "learning_rate": 1.749799082331346e-05, "loss": 0.0441, "step": 26163 }, { "epoch": 0.46335232681179633, "grad_norm": 0.8993249535560608, "learning_rate": 1.74971424687987e-05, "loss": 0.0803, "step": 26164 }, { "epoch": 0.46337003634882473, "grad_norm": 0.5750896334648132, "learning_rate": 1.7496294106068597e-05, "loss": 0.1081, "step": 26165 }, { "epoch": 0.4633877458858532, "grad_norm": 0.6404993534088135, "learning_rate": 1.749544573512594e-05, "loss": 0.0576, "step": 26166 }, { "epoch": 0.4634054554228816, "grad_norm": 0.6511586904525757, "learning_rate": 1.749459735597352e-05, "loss": 0.0886, "step": 26167 }, { "epoch": 0.46342316495991004, "grad_norm": 0.5956019163131714, "learning_rate": 1.749374896861412e-05, "loss": 0.0942, "step": 26168 }, { "epoch": 0.46344087449693844, "grad_norm": 0.6771085262298584, "learning_rate": 1.7492900573050545e-05, "loss": 0.0349, "step": 26169 }, { "epoch": 0.4634585840339669, "grad_norm": 0.9110636115074158, "learning_rate": 1.7492052169285576e-05, "loss": 0.0465, "step": 26170 }, { "epoch": 0.46347629357099535, "grad_norm": 0.7102203965187073, "learning_rate": 1.7491203757322012e-05, "loss": 0.0578, "step": 26171 }, { "epoch": 0.46349400310802374, "grad_norm": 0.9235800504684448, "learning_rate": 1.7490355337162635e-05, "loss": 0.0969, "step": 26172 }, { "epoch": 0.4635117126450522, "grad_norm": 0.5115057229995728, "learning_rate": 1.748950690881024e-05, "loss": 0.0603, "step": 26173 }, { "epoch": 0.4635294221820806, "grad_norm": 0.8793895244598389, "learning_rate": 1.748865847226762e-05, "loss": 0.0604, "step": 26174 }, { "epoch": 0.46354713171910905, "grad_norm": 0.5435798168182373, "learning_rate": 1.748781002753757e-05, "loss": 0.0826, "step": 26175 }, { "epoch": 0.46356484125613745, "grad_norm": 0.875071108341217, "learning_rate": 1.748696157462287e-05, "loss": 0.0943, "step": 26176 }, { "epoch": 0.4635825507931659, "grad_norm": 0.843828022480011, "learning_rate": 1.7486113113526322e-05, "loss": 0.1014, "step": 26177 }, { "epoch": 0.4636002603301943, "grad_norm": 0.576512336730957, "learning_rate": 1.7485264644250713e-05, "loss": 0.0747, "step": 26178 }, { "epoch": 0.46361796986722276, "grad_norm": 0.4120144546031952, "learning_rate": 1.748441616679883e-05, "loss": 0.0563, "step": 26179 }, { "epoch": 0.46363567940425116, "grad_norm": 0.42279061675071716, "learning_rate": 1.7483567681173477e-05, "loss": 0.0881, "step": 26180 }, { "epoch": 0.4636533889412796, "grad_norm": 0.4339389503002167, "learning_rate": 1.748271918737743e-05, "loss": 0.0735, "step": 26181 }, { "epoch": 0.463671098478308, "grad_norm": 0.8667292594909668, "learning_rate": 1.7481870685413495e-05, "loss": 0.0691, "step": 26182 }, { "epoch": 0.46368880801533646, "grad_norm": 0.9449184536933899, "learning_rate": 1.748102217528445e-05, "loss": 0.0693, "step": 26183 }, { "epoch": 0.46370651755236486, "grad_norm": 1.001846432685852, "learning_rate": 1.7480173656993098e-05, "loss": 0.0836, "step": 26184 }, { "epoch": 0.4637242270893933, "grad_norm": 0.38029995560646057, "learning_rate": 1.7479325130542226e-05, "loss": 0.0731, "step": 26185 }, { "epoch": 0.46374193662642177, "grad_norm": 0.858664333820343, "learning_rate": 1.747847659593462e-05, "loss": 0.0912, "step": 26186 }, { "epoch": 0.46375964616345017, "grad_norm": 0.4456143379211426, "learning_rate": 1.747762805317308e-05, "loss": 0.0446, "step": 26187 }, { "epoch": 0.4637773557004786, "grad_norm": 0.5050563216209412, "learning_rate": 1.747677950226039e-05, "loss": 0.1106, "step": 26188 }, { "epoch": 0.463795065237507, "grad_norm": 0.7516251802444458, "learning_rate": 1.7475930943199354e-05, "loss": 0.0566, "step": 26189 }, { "epoch": 0.4638127747745355, "grad_norm": 0.7597537040710449, "learning_rate": 1.7475082375992747e-05, "loss": 0.0709, "step": 26190 }, { "epoch": 0.4638304843115639, "grad_norm": 0.7531347870826721, "learning_rate": 1.7474233800643375e-05, "loss": 0.0826, "step": 26191 }, { "epoch": 0.4638481938485923, "grad_norm": 0.5331343412399292, "learning_rate": 1.7473385217154023e-05, "loss": 0.0546, "step": 26192 }, { "epoch": 0.4638659033856207, "grad_norm": 0.8085845112800598, "learning_rate": 1.7472536625527483e-05, "loss": 0.0535, "step": 26193 }, { "epoch": 0.4638836129226492, "grad_norm": 1.0537937879562378, "learning_rate": 1.747168802576655e-05, "loss": 0.0808, "step": 26194 }, { "epoch": 0.4639013224596776, "grad_norm": 0.6427987217903137, "learning_rate": 1.7470839417874013e-05, "loss": 0.0778, "step": 26195 }, { "epoch": 0.46391903199670603, "grad_norm": 0.7616071701049805, "learning_rate": 1.746999080185266e-05, "loss": 0.0746, "step": 26196 }, { "epoch": 0.46393674153373443, "grad_norm": 0.38957297801971436, "learning_rate": 1.7469142177705286e-05, "loss": 0.0669, "step": 26197 }, { "epoch": 0.4639544510707629, "grad_norm": 0.675376296043396, "learning_rate": 1.746829354543469e-05, "loss": 0.0455, "step": 26198 }, { "epoch": 0.4639721606077913, "grad_norm": 0.4148422181606293, "learning_rate": 1.7467444905043654e-05, "loss": 0.0798, "step": 26199 }, { "epoch": 0.46398987014481974, "grad_norm": 0.6526930928230286, "learning_rate": 1.746659625653498e-05, "loss": 0.062, "step": 26200 }, { "epoch": 0.4640075796818482, "grad_norm": 0.8166449666023254, "learning_rate": 1.7465747599911447e-05, "loss": 0.0954, "step": 26201 }, { "epoch": 0.4640252892188766, "grad_norm": 0.6584718227386475, "learning_rate": 1.7464898935175854e-05, "loss": 0.0919, "step": 26202 }, { "epoch": 0.46404299875590505, "grad_norm": 0.8621351718902588, "learning_rate": 1.7464050262330997e-05, "loss": 0.08, "step": 26203 }, { "epoch": 0.46406070829293344, "grad_norm": 0.9561270475387573, "learning_rate": 1.746320158137966e-05, "loss": 0.0484, "step": 26204 }, { "epoch": 0.4640784178299619, "grad_norm": 0.6789866089820862, "learning_rate": 1.7462352892324645e-05, "loss": 0.0601, "step": 26205 }, { "epoch": 0.4640961273669903, "grad_norm": 0.5318709015846252, "learning_rate": 1.746150419516873e-05, "loss": 0.0588, "step": 26206 }, { "epoch": 0.46411383690401875, "grad_norm": 0.8691227436065674, "learning_rate": 1.7460655489914716e-05, "loss": 0.0897, "step": 26207 }, { "epoch": 0.46413154644104715, "grad_norm": 0.7949140071868896, "learning_rate": 1.7459806776565398e-05, "loss": 0.0627, "step": 26208 }, { "epoch": 0.4641492559780756, "grad_norm": 0.575259268283844, "learning_rate": 1.7458958055123564e-05, "loss": 0.0967, "step": 26209 }, { "epoch": 0.464166965515104, "grad_norm": 0.7726999521255493, "learning_rate": 1.745810932559201e-05, "loss": 0.0738, "step": 26210 }, { "epoch": 0.46418467505213246, "grad_norm": 0.6420838832855225, "learning_rate": 1.7457260587973518e-05, "loss": 0.0582, "step": 26211 }, { "epoch": 0.46420238458916085, "grad_norm": 0.47545909881591797, "learning_rate": 1.745641184227089e-05, "loss": 0.0448, "step": 26212 }, { "epoch": 0.4642200941261893, "grad_norm": 0.7781044840812683, "learning_rate": 1.7455563088486914e-05, "loss": 0.067, "step": 26213 }, { "epoch": 0.4642378036632177, "grad_norm": 0.9596928954124451, "learning_rate": 1.7454714326624387e-05, "loss": 0.0858, "step": 26214 }, { "epoch": 0.46425551320024616, "grad_norm": 0.7607917189598083, "learning_rate": 1.74538655566861e-05, "loss": 0.1055, "step": 26215 }, { "epoch": 0.4642732227372746, "grad_norm": 0.6141498684883118, "learning_rate": 1.7453016778674835e-05, "loss": 0.1171, "step": 26216 }, { "epoch": 0.464290932274303, "grad_norm": 0.6999301314353943, "learning_rate": 1.7452167992593403e-05, "loss": 0.0612, "step": 26217 }, { "epoch": 0.46430864181133147, "grad_norm": 0.7522624135017395, "learning_rate": 1.745131919844458e-05, "loss": 0.0763, "step": 26218 }, { "epoch": 0.46432635134835987, "grad_norm": 0.7021920680999756, "learning_rate": 1.7450470396231167e-05, "loss": 0.0698, "step": 26219 }, { "epoch": 0.4643440608853883, "grad_norm": 0.6249196529388428, "learning_rate": 1.7449621585955953e-05, "loss": 0.098, "step": 26220 }, { "epoch": 0.4643617704224167, "grad_norm": 0.4919811487197876, "learning_rate": 1.7448772767621733e-05, "loss": 0.0328, "step": 26221 }, { "epoch": 0.4643794799594452, "grad_norm": 0.7490333914756775, "learning_rate": 1.7447923941231294e-05, "loss": 0.0787, "step": 26222 }, { "epoch": 0.4643971894964736, "grad_norm": 0.46623221039772034, "learning_rate": 1.744707510678744e-05, "loss": 0.0719, "step": 26223 }, { "epoch": 0.464414899033502, "grad_norm": 0.5361999273300171, "learning_rate": 1.7446226264292952e-05, "loss": 0.0558, "step": 26224 }, { "epoch": 0.4644326085705304, "grad_norm": 0.7633761763572693, "learning_rate": 1.744537741375063e-05, "loss": 0.1018, "step": 26225 }, { "epoch": 0.4644503181075589, "grad_norm": 0.5847926735877991, "learning_rate": 1.744452855516326e-05, "loss": 0.066, "step": 26226 }, { "epoch": 0.4644680276445873, "grad_norm": 0.7171745896339417, "learning_rate": 1.744367968853364e-05, "loss": 0.1021, "step": 26227 }, { "epoch": 0.46448573718161573, "grad_norm": 0.6565280556678772, "learning_rate": 1.7442830813864564e-05, "loss": 0.0982, "step": 26228 }, { "epoch": 0.46450344671864413, "grad_norm": 0.6460938453674316, "learning_rate": 1.7441981931158814e-05, "loss": 0.0693, "step": 26229 }, { "epoch": 0.4645211562556726, "grad_norm": 0.6385923027992249, "learning_rate": 1.7441133040419198e-05, "loss": 0.0669, "step": 26230 }, { "epoch": 0.46453886579270104, "grad_norm": 0.8020142912864685, "learning_rate": 1.7440284141648496e-05, "loss": 0.1032, "step": 26231 }, { "epoch": 0.46455657532972944, "grad_norm": 0.8061662912368774, "learning_rate": 1.743943523484951e-05, "loss": 0.0524, "step": 26232 }, { "epoch": 0.4645742848667579, "grad_norm": 0.6722242832183838, "learning_rate": 1.7438586320025027e-05, "loss": 0.0835, "step": 26233 }, { "epoch": 0.4645919944037863, "grad_norm": 0.7793601155281067, "learning_rate": 1.743773739717784e-05, "loss": 0.0642, "step": 26234 }, { "epoch": 0.46460970394081474, "grad_norm": 0.31152138113975525, "learning_rate": 1.7436888466310743e-05, "loss": 0.0676, "step": 26235 }, { "epoch": 0.46462741347784314, "grad_norm": 0.6828815937042236, "learning_rate": 1.743603952742653e-05, "loss": 0.1088, "step": 26236 }, { "epoch": 0.4646451230148716, "grad_norm": 0.909769594669342, "learning_rate": 1.7435190580527998e-05, "loss": 0.0666, "step": 26237 }, { "epoch": 0.4646628325519, "grad_norm": 0.5951590538024902, "learning_rate": 1.7434341625617934e-05, "loss": 0.0616, "step": 26238 }, { "epoch": 0.46468054208892845, "grad_norm": 0.9053525328636169, "learning_rate": 1.743349266269913e-05, "loss": 0.0853, "step": 26239 }, { "epoch": 0.46469825162595685, "grad_norm": 0.5801853537559509, "learning_rate": 1.743264369177438e-05, "loss": 0.0908, "step": 26240 }, { "epoch": 0.4647159611629853, "grad_norm": 0.6959654092788696, "learning_rate": 1.743179471284648e-05, "loss": 0.0757, "step": 26241 }, { "epoch": 0.4647336707000137, "grad_norm": 0.5935226678848267, "learning_rate": 1.7430945725918223e-05, "loss": 0.0714, "step": 26242 }, { "epoch": 0.46475138023704216, "grad_norm": 0.501041829586029, "learning_rate": 1.7430096730992398e-05, "loss": 0.0705, "step": 26243 }, { "epoch": 0.46476908977407055, "grad_norm": 0.8340871930122375, "learning_rate": 1.7429247728071802e-05, "loss": 0.0821, "step": 26244 }, { "epoch": 0.464786799311099, "grad_norm": 0.7685747146606445, "learning_rate": 1.742839871715922e-05, "loss": 0.0665, "step": 26245 }, { "epoch": 0.46480450884812746, "grad_norm": 1.0567772388458252, "learning_rate": 1.742754969825746e-05, "loss": 0.0973, "step": 26246 }, { "epoch": 0.46482221838515586, "grad_norm": 0.616547703742981, "learning_rate": 1.7426700671369305e-05, "loss": 0.0691, "step": 26247 }, { "epoch": 0.4648399279221843, "grad_norm": 0.5863080024719238, "learning_rate": 1.7425851636497553e-05, "loss": 0.065, "step": 26248 }, { "epoch": 0.4648576374592127, "grad_norm": 0.691707968711853, "learning_rate": 1.742500259364499e-05, "loss": 0.066, "step": 26249 }, { "epoch": 0.46487534699624117, "grad_norm": 0.7305141091346741, "learning_rate": 1.7424153542814416e-05, "loss": 0.0654, "step": 26250 }, { "epoch": 0.46489305653326957, "grad_norm": 0.9173481464385986, "learning_rate": 1.7423304484008623e-05, "loss": 0.0855, "step": 26251 }, { "epoch": 0.464910766070298, "grad_norm": 0.5292474627494812, "learning_rate": 1.74224554172304e-05, "loss": 0.0436, "step": 26252 }, { "epoch": 0.4649284756073264, "grad_norm": 0.8918182253837585, "learning_rate": 1.7421606342482545e-05, "loss": 0.0771, "step": 26253 }, { "epoch": 0.4649461851443549, "grad_norm": 0.6829769611358643, "learning_rate": 1.742075725976785e-05, "loss": 0.0613, "step": 26254 }, { "epoch": 0.46496389468138327, "grad_norm": 0.4759274125099182, "learning_rate": 1.7419908169089112e-05, "loss": 0.0543, "step": 26255 }, { "epoch": 0.4649816042184117, "grad_norm": 0.4811229407787323, "learning_rate": 1.7419059070449118e-05, "loss": 0.05, "step": 26256 }, { "epoch": 0.4649993137554401, "grad_norm": 0.6394049525260925, "learning_rate": 1.7418209963850663e-05, "loss": 0.0551, "step": 26257 }, { "epoch": 0.4650170232924686, "grad_norm": 0.5970050096511841, "learning_rate": 1.7417360849296545e-05, "loss": 0.0464, "step": 26258 }, { "epoch": 0.465034732829497, "grad_norm": 0.7407974004745483, "learning_rate": 1.7416511726789548e-05, "loss": 0.0723, "step": 26259 }, { "epoch": 0.46505244236652543, "grad_norm": 0.697288990020752, "learning_rate": 1.741566259633248e-05, "loss": 0.0735, "step": 26260 }, { "epoch": 0.4650701519035539, "grad_norm": 1.014572262763977, "learning_rate": 1.741481345792812e-05, "loss": 0.0896, "step": 26261 }, { "epoch": 0.4650878614405823, "grad_norm": 0.3942214846611023, "learning_rate": 1.7413964311579272e-05, "loss": 0.0763, "step": 26262 }, { "epoch": 0.46510557097761074, "grad_norm": 0.7333115339279175, "learning_rate": 1.741311515728872e-05, "loss": 0.0964, "step": 26263 }, { "epoch": 0.46512328051463914, "grad_norm": 0.8078610897064209, "learning_rate": 1.741226599505927e-05, "loss": 0.0909, "step": 26264 }, { "epoch": 0.4651409900516676, "grad_norm": 0.5536862015724182, "learning_rate": 1.7411416824893706e-05, "loss": 0.0624, "step": 26265 }, { "epoch": 0.465158699588696, "grad_norm": 0.6478779315948486, "learning_rate": 1.7410567646794825e-05, "loss": 0.0985, "step": 26266 }, { "epoch": 0.46517640912572444, "grad_norm": 0.9767842292785645, "learning_rate": 1.740971846076542e-05, "loss": 0.0858, "step": 26267 }, { "epoch": 0.46519411866275284, "grad_norm": 0.5461781024932861, "learning_rate": 1.7408869266808288e-05, "loss": 0.0675, "step": 26268 }, { "epoch": 0.4652118281997813, "grad_norm": 0.7535248398780823, "learning_rate": 1.7408020064926212e-05, "loss": 0.0912, "step": 26269 }, { "epoch": 0.4652295377368097, "grad_norm": 0.9318046569824219, "learning_rate": 1.7407170855122e-05, "loss": 0.1105, "step": 26270 }, { "epoch": 0.46524724727383815, "grad_norm": 0.849466860294342, "learning_rate": 1.7406321637398436e-05, "loss": 0.0676, "step": 26271 }, { "epoch": 0.46526495681086655, "grad_norm": 0.5957752466201782, "learning_rate": 1.740547241175832e-05, "loss": 0.056, "step": 26272 }, { "epoch": 0.465282666347895, "grad_norm": 0.9675704836845398, "learning_rate": 1.7404623178204442e-05, "loss": 0.0923, "step": 26273 }, { "epoch": 0.46530037588492346, "grad_norm": 1.04668128490448, "learning_rate": 1.7403773936739598e-05, "loss": 0.0941, "step": 26274 }, { "epoch": 0.46531808542195185, "grad_norm": 0.8506333231925964, "learning_rate": 1.7402924687366577e-05, "loss": 0.0724, "step": 26275 }, { "epoch": 0.4653357949589803, "grad_norm": 0.6913495063781738, "learning_rate": 1.740207543008818e-05, "loss": 0.0849, "step": 26276 }, { "epoch": 0.4653535044960087, "grad_norm": 1.473197340965271, "learning_rate": 1.7401226164907198e-05, "loss": 0.1035, "step": 26277 }, { "epoch": 0.46537121403303716, "grad_norm": 0.5446269512176514, "learning_rate": 1.7400376891826423e-05, "loss": 0.0689, "step": 26278 }, { "epoch": 0.46538892357006556, "grad_norm": 0.6764471530914307, "learning_rate": 1.739952761084865e-05, "loss": 0.0848, "step": 26279 }, { "epoch": 0.465406633107094, "grad_norm": 0.5817613005638123, "learning_rate": 1.7398678321976678e-05, "loss": 0.0592, "step": 26280 }, { "epoch": 0.4654243426441224, "grad_norm": 0.6723836064338684, "learning_rate": 1.73978290252133e-05, "loss": 0.1015, "step": 26281 }, { "epoch": 0.46544205218115087, "grad_norm": 0.9547903537750244, "learning_rate": 1.7396979720561298e-05, "loss": 0.0682, "step": 26282 }, { "epoch": 0.46545976171817927, "grad_norm": 0.7966780066490173, "learning_rate": 1.7396130408023475e-05, "loss": 0.0982, "step": 26283 }, { "epoch": 0.4654774712552077, "grad_norm": 0.48722296953201294, "learning_rate": 1.739528108760263e-05, "loss": 0.0485, "step": 26284 }, { "epoch": 0.4654951807922361, "grad_norm": 1.1657648086547852, "learning_rate": 1.7394431759301556e-05, "loss": 0.0868, "step": 26285 }, { "epoch": 0.4655128903292646, "grad_norm": 0.6920551061630249, "learning_rate": 1.7393582423123037e-05, "loss": 0.07, "step": 26286 }, { "epoch": 0.46553059986629297, "grad_norm": 0.8218438625335693, "learning_rate": 1.739273307906988e-05, "loss": 0.0629, "step": 26287 }, { "epoch": 0.4655483094033214, "grad_norm": 0.6952908039093018, "learning_rate": 1.7391883727144865e-05, "loss": 0.0919, "step": 26288 }, { "epoch": 0.4655660189403499, "grad_norm": 0.7769994139671326, "learning_rate": 1.73910343673508e-05, "loss": 0.0695, "step": 26289 }, { "epoch": 0.4655837284773783, "grad_norm": 0.49101483821868896, "learning_rate": 1.7390184999690474e-05, "loss": 0.0644, "step": 26290 }, { "epoch": 0.46560143801440673, "grad_norm": 0.595041036605835, "learning_rate": 1.738933562416668e-05, "loss": 0.0825, "step": 26291 }, { "epoch": 0.46561914755143513, "grad_norm": 0.43354326486587524, "learning_rate": 1.738848624078221e-05, "loss": 0.0597, "step": 26292 }, { "epoch": 0.4656368570884636, "grad_norm": 0.3828199803829193, "learning_rate": 1.7387636849539864e-05, "loss": 0.0512, "step": 26293 }, { "epoch": 0.465654566625492, "grad_norm": 0.6778378486633301, "learning_rate": 1.7386787450442437e-05, "loss": 0.0873, "step": 26294 }, { "epoch": 0.46567227616252044, "grad_norm": 0.6534101963043213, "learning_rate": 1.738593804349272e-05, "loss": 0.044, "step": 26295 }, { "epoch": 0.46568998569954884, "grad_norm": 0.8181159496307373, "learning_rate": 1.738508862869351e-05, "loss": 0.1053, "step": 26296 }, { "epoch": 0.4657076952365773, "grad_norm": 0.6328641772270203, "learning_rate": 1.7384239206047594e-05, "loss": 0.0702, "step": 26297 }, { "epoch": 0.4657254047736057, "grad_norm": 0.8481858372688293, "learning_rate": 1.7383389775557773e-05, "loss": 0.0842, "step": 26298 }, { "epoch": 0.46574311431063414, "grad_norm": 0.6383642554283142, "learning_rate": 1.7382540337226843e-05, "loss": 0.0759, "step": 26299 }, { "epoch": 0.46576082384766254, "grad_norm": 1.1100085973739624, "learning_rate": 1.7381690891057596e-05, "loss": 0.1135, "step": 26300 }, { "epoch": 0.465778533384691, "grad_norm": 0.49200764298439026, "learning_rate": 1.738084143705283e-05, "loss": 0.0543, "step": 26301 }, { "epoch": 0.4657962429217194, "grad_norm": 0.9123039841651917, "learning_rate": 1.7379991975215333e-05, "loss": 0.0754, "step": 26302 }, { "epoch": 0.46581395245874785, "grad_norm": 0.7897251844406128, "learning_rate": 1.73791425055479e-05, "loss": 0.0712, "step": 26303 }, { "epoch": 0.4658316619957763, "grad_norm": 0.5982465147972107, "learning_rate": 1.7378293028053335e-05, "loss": 0.0747, "step": 26304 }, { "epoch": 0.4658493715328047, "grad_norm": 0.9340787529945374, "learning_rate": 1.737744354273442e-05, "loss": 0.1043, "step": 26305 }, { "epoch": 0.46586708106983316, "grad_norm": 0.4396582245826721, "learning_rate": 1.7376594049593965e-05, "loss": 0.0586, "step": 26306 }, { "epoch": 0.46588479060686155, "grad_norm": 0.676033079624176, "learning_rate": 1.7375744548634747e-05, "loss": 0.0896, "step": 26307 }, { "epoch": 0.46590250014389, "grad_norm": 0.4919794797897339, "learning_rate": 1.7374895039859576e-05, "loss": 0.0563, "step": 26308 }, { "epoch": 0.4659202096809184, "grad_norm": 0.9355981349945068, "learning_rate": 1.7374045523271235e-05, "loss": 0.1256, "step": 26309 }, { "epoch": 0.46593791921794686, "grad_norm": 0.7782089114189148, "learning_rate": 1.7373195998872532e-05, "loss": 0.0581, "step": 26310 }, { "epoch": 0.46595562875497526, "grad_norm": 0.6203729510307312, "learning_rate": 1.7372346466666245e-05, "loss": 0.0805, "step": 26311 }, { "epoch": 0.4659733382920037, "grad_norm": 0.4859638810157776, "learning_rate": 1.7371496926655183e-05, "loss": 0.0682, "step": 26312 }, { "epoch": 0.4659910478290321, "grad_norm": 0.5640116333961487, "learning_rate": 1.7370647378842138e-05, "loss": 0.0495, "step": 26313 }, { "epoch": 0.46600875736606057, "grad_norm": 0.8179793953895569, "learning_rate": 1.73697978232299e-05, "loss": 0.0945, "step": 26314 }, { "epoch": 0.46602646690308897, "grad_norm": 0.4578303396701813, "learning_rate": 1.7368948259821272e-05, "loss": 0.0661, "step": 26315 }, { "epoch": 0.4660441764401174, "grad_norm": 0.8158304691314697, "learning_rate": 1.7368098688619035e-05, "loss": 0.1125, "step": 26316 }, { "epoch": 0.4660618859771458, "grad_norm": 0.305862158536911, "learning_rate": 1.7367249109625995e-05, "loss": 0.0621, "step": 26317 }, { "epoch": 0.46607959551417427, "grad_norm": 0.8098322749137878, "learning_rate": 1.736639952284495e-05, "loss": 0.0801, "step": 26318 }, { "epoch": 0.4660973050512027, "grad_norm": 0.4718693494796753, "learning_rate": 1.7365549928278686e-05, "loss": 0.0786, "step": 26319 }, { "epoch": 0.4661150145882311, "grad_norm": 0.5937814116477966, "learning_rate": 1.7364700325930008e-05, "loss": 0.0753, "step": 26320 }, { "epoch": 0.4661327241252596, "grad_norm": 0.7979339361190796, "learning_rate": 1.7363850715801694e-05, "loss": 0.1227, "step": 26321 }, { "epoch": 0.466150433662288, "grad_norm": 0.8357315063476562, "learning_rate": 1.7363001097896557e-05, "loss": 0.0464, "step": 26322 }, { "epoch": 0.46616814319931643, "grad_norm": 0.6221742630004883, "learning_rate": 1.7362151472217382e-05, "loss": 0.0772, "step": 26323 }, { "epoch": 0.46618585273634483, "grad_norm": 0.7838717103004456, "learning_rate": 1.7361301838766975e-05, "loss": 0.0532, "step": 26324 }, { "epoch": 0.4662035622733733, "grad_norm": 1.0982805490493774, "learning_rate": 1.7360452197548117e-05, "loss": 0.056, "step": 26325 }, { "epoch": 0.4662212718104017, "grad_norm": 0.5584080815315247, "learning_rate": 1.735960254856361e-05, "loss": 0.0662, "step": 26326 }, { "epoch": 0.46623898134743014, "grad_norm": 0.713158369064331, "learning_rate": 1.7358752891816253e-05, "loss": 0.083, "step": 26327 }, { "epoch": 0.46625669088445854, "grad_norm": 0.5330527424812317, "learning_rate": 1.7357903227308837e-05, "loss": 0.0647, "step": 26328 }, { "epoch": 0.466274400421487, "grad_norm": 0.6958756446838379, "learning_rate": 1.735705355504416e-05, "loss": 0.1206, "step": 26329 }, { "epoch": 0.4662921099585154, "grad_norm": 0.4016999900341034, "learning_rate": 1.735620387502501e-05, "loss": 0.0394, "step": 26330 }, { "epoch": 0.46630981949554384, "grad_norm": 0.8636496663093567, "learning_rate": 1.7355354187254188e-05, "loss": 0.0762, "step": 26331 }, { "epoch": 0.46632752903257224, "grad_norm": 0.7262299060821533, "learning_rate": 1.735450449173449e-05, "loss": 0.0822, "step": 26332 }, { "epoch": 0.4663452385696007, "grad_norm": 0.6351091861724854, "learning_rate": 1.735365478846871e-05, "loss": 0.0538, "step": 26333 }, { "epoch": 0.46636294810662915, "grad_norm": 0.5225252509117126, "learning_rate": 1.7352805077459646e-05, "loss": 0.0722, "step": 26334 }, { "epoch": 0.46638065764365755, "grad_norm": 1.03704833984375, "learning_rate": 1.735195535871009e-05, "loss": 0.0842, "step": 26335 }, { "epoch": 0.466398367180686, "grad_norm": 0.5274146795272827, "learning_rate": 1.7351105632222838e-05, "loss": 0.0536, "step": 26336 }, { "epoch": 0.4664160767177144, "grad_norm": 0.37595880031585693, "learning_rate": 1.7350255898000687e-05, "loss": 0.0466, "step": 26337 }, { "epoch": 0.46643378625474285, "grad_norm": 0.4070683419704437, "learning_rate": 1.7349406156046434e-05, "loss": 0.0712, "step": 26338 }, { "epoch": 0.46645149579177125, "grad_norm": 0.5637058019638062, "learning_rate": 1.734855640636287e-05, "loss": 0.0415, "step": 26339 }, { "epoch": 0.4664692053287997, "grad_norm": 0.8539628982543945, "learning_rate": 1.734770664895279e-05, "loss": 0.0668, "step": 26340 }, { "epoch": 0.4664869148658281, "grad_norm": 0.5472758412361145, "learning_rate": 1.7346856883818996e-05, "loss": 0.0832, "step": 26341 }, { "epoch": 0.46650462440285656, "grad_norm": 0.61411052942276, "learning_rate": 1.734600711096428e-05, "loss": 0.0729, "step": 26342 }, { "epoch": 0.46652233393988496, "grad_norm": 0.6243573427200317, "learning_rate": 1.734515733039144e-05, "loss": 0.0627, "step": 26343 }, { "epoch": 0.4665400434769134, "grad_norm": 1.0595297813415527, "learning_rate": 1.7344307542103267e-05, "loss": 0.0753, "step": 26344 }, { "epoch": 0.4665577530139418, "grad_norm": 0.6426959037780762, "learning_rate": 1.7343457746102558e-05, "loss": 0.0852, "step": 26345 }, { "epoch": 0.46657546255097027, "grad_norm": 0.9525996446609497, "learning_rate": 1.734260794239211e-05, "loss": 0.1271, "step": 26346 }, { "epoch": 0.46659317208799866, "grad_norm": 0.5111520886421204, "learning_rate": 1.7341758130974725e-05, "loss": 0.0658, "step": 26347 }, { "epoch": 0.4666108816250271, "grad_norm": 0.5507128238677979, "learning_rate": 1.7340908311853185e-05, "loss": 0.0959, "step": 26348 }, { "epoch": 0.4666285911620556, "grad_norm": 0.6013649702072144, "learning_rate": 1.73400584850303e-05, "loss": 0.0423, "step": 26349 }, { "epoch": 0.46664630069908397, "grad_norm": 0.8081457614898682, "learning_rate": 1.733920865050885e-05, "loss": 0.1046, "step": 26350 }, { "epoch": 0.4666640102361124, "grad_norm": 0.7601976990699768, "learning_rate": 1.7338358808291647e-05, "loss": 0.0739, "step": 26351 }, { "epoch": 0.4666817197731408, "grad_norm": 0.722513735294342, "learning_rate": 1.7337508958381483e-05, "loss": 0.0717, "step": 26352 }, { "epoch": 0.4666994293101693, "grad_norm": 0.7020745873451233, "learning_rate": 1.7336659100781146e-05, "loss": 0.0704, "step": 26353 }, { "epoch": 0.4667171388471977, "grad_norm": 0.8688532710075378, "learning_rate": 1.7335809235493434e-05, "loss": 0.0658, "step": 26354 }, { "epoch": 0.46673484838422613, "grad_norm": 0.7639199495315552, "learning_rate": 1.733495936252115e-05, "loss": 0.0635, "step": 26355 }, { "epoch": 0.46675255792125453, "grad_norm": 0.533341109752655, "learning_rate": 1.7334109481867088e-05, "loss": 0.0749, "step": 26356 }, { "epoch": 0.466770267458283, "grad_norm": 0.6222018599510193, "learning_rate": 1.7333259593534038e-05, "loss": 0.0713, "step": 26357 }, { "epoch": 0.4667879769953114, "grad_norm": 0.7334274053573608, "learning_rate": 1.7332409697524803e-05, "loss": 0.1113, "step": 26358 }, { "epoch": 0.46680568653233984, "grad_norm": 0.6425809264183044, "learning_rate": 1.7331559793842175e-05, "loss": 0.0621, "step": 26359 }, { "epoch": 0.46682339606936823, "grad_norm": 0.629101574420929, "learning_rate": 1.7330709882488948e-05, "loss": 0.0509, "step": 26360 }, { "epoch": 0.4668411056063967, "grad_norm": 0.5479440093040466, "learning_rate": 1.7329859963467925e-05, "loss": 0.0855, "step": 26361 }, { "epoch": 0.4668588151434251, "grad_norm": 0.6423470973968506, "learning_rate": 1.7329010036781894e-05, "loss": 0.0762, "step": 26362 }, { "epoch": 0.46687652468045354, "grad_norm": 0.907814085483551, "learning_rate": 1.7328160102433665e-05, "loss": 0.092, "step": 26363 }, { "epoch": 0.466894234217482, "grad_norm": 0.49491870403289795, "learning_rate": 1.7327310160426017e-05, "loss": 0.0788, "step": 26364 }, { "epoch": 0.4669119437545104, "grad_norm": 0.8503424525260925, "learning_rate": 1.732646021076175e-05, "loss": 0.0677, "step": 26365 }, { "epoch": 0.46692965329153885, "grad_norm": 1.09843909740448, "learning_rate": 1.7325610253443672e-05, "loss": 0.077, "step": 26366 }, { "epoch": 0.46694736282856725, "grad_norm": 0.4567486047744751, "learning_rate": 1.732476028847457e-05, "loss": 0.0545, "step": 26367 }, { "epoch": 0.4669650723655957, "grad_norm": 0.5510638356208801, "learning_rate": 1.7323910315857244e-05, "loss": 0.0677, "step": 26368 }, { "epoch": 0.4669827819026241, "grad_norm": 0.5772054195404053, "learning_rate": 1.7323060335594478e-05, "loss": 0.0639, "step": 26369 }, { "epoch": 0.46700049143965255, "grad_norm": 0.6348274946212769, "learning_rate": 1.7322210347689087e-05, "loss": 0.085, "step": 26370 }, { "epoch": 0.46701820097668095, "grad_norm": 0.49063828587532043, "learning_rate": 1.732136035214386e-05, "loss": 0.0917, "step": 26371 }, { "epoch": 0.4670359105137094, "grad_norm": 0.9617887139320374, "learning_rate": 1.7320510348961588e-05, "loss": 0.0698, "step": 26372 }, { "epoch": 0.4670536200507378, "grad_norm": 0.6533712148666382, "learning_rate": 1.7319660338145073e-05, "loss": 0.0775, "step": 26373 }, { "epoch": 0.46707132958776626, "grad_norm": 0.5754501819610596, "learning_rate": 1.731881031969711e-05, "loss": 0.0816, "step": 26374 }, { "epoch": 0.46708903912479466, "grad_norm": 0.6494819521903992, "learning_rate": 1.7317960293620498e-05, "loss": 0.0868, "step": 26375 }, { "epoch": 0.4671067486618231, "grad_norm": 0.3348020613193512, "learning_rate": 1.7317110259918028e-05, "loss": 0.0536, "step": 26376 }, { "epoch": 0.4671244581988515, "grad_norm": 0.4935278296470642, "learning_rate": 1.73162602185925e-05, "loss": 0.0701, "step": 26377 }, { "epoch": 0.46714216773587997, "grad_norm": 0.6485896706581116, "learning_rate": 1.7315410169646714e-05, "loss": 0.0836, "step": 26378 }, { "epoch": 0.4671598772729084, "grad_norm": 0.548224925994873, "learning_rate": 1.7314560113083458e-05, "loss": 0.0667, "step": 26379 }, { "epoch": 0.4671775868099368, "grad_norm": 0.8224548101425171, "learning_rate": 1.731371004890553e-05, "loss": 0.0978, "step": 26380 }, { "epoch": 0.46719529634696527, "grad_norm": 0.6439711451530457, "learning_rate": 1.7312859977115737e-05, "loss": 0.1043, "step": 26381 }, { "epoch": 0.46721300588399367, "grad_norm": 0.5188706517219543, "learning_rate": 1.7312009897716865e-05, "loss": 0.0705, "step": 26382 }, { "epoch": 0.4672307154210221, "grad_norm": 1.5311713218688965, "learning_rate": 1.7311159810711716e-05, "loss": 0.0926, "step": 26383 }, { "epoch": 0.4672484249580505, "grad_norm": 0.40302368998527527, "learning_rate": 1.7310309716103087e-05, "loss": 0.0819, "step": 26384 }, { "epoch": 0.467266134495079, "grad_norm": 0.3965652883052826, "learning_rate": 1.7309459613893768e-05, "loss": 0.0647, "step": 26385 }, { "epoch": 0.4672838440321074, "grad_norm": 0.8577683568000793, "learning_rate": 1.7308609504086563e-05, "loss": 0.0637, "step": 26386 }, { "epoch": 0.46730155356913583, "grad_norm": 0.7162535786628723, "learning_rate": 1.7307759386684267e-05, "loss": 0.0982, "step": 26387 }, { "epoch": 0.46731926310616423, "grad_norm": 0.6699587106704712, "learning_rate": 1.7306909261689674e-05, "loss": 0.0924, "step": 26388 }, { "epoch": 0.4673369726431927, "grad_norm": 0.4496946632862091, "learning_rate": 1.730605912910558e-05, "loss": 0.0769, "step": 26389 }, { "epoch": 0.4673546821802211, "grad_norm": 0.6012686491012573, "learning_rate": 1.730520898893479e-05, "loss": 0.0529, "step": 26390 }, { "epoch": 0.46737239171724954, "grad_norm": 0.8091077208518982, "learning_rate": 1.73043588411801e-05, "loss": 0.0949, "step": 26391 }, { "epoch": 0.46739010125427793, "grad_norm": 0.965193510055542, "learning_rate": 1.7303508685844295e-05, "loss": 0.1148, "step": 26392 }, { "epoch": 0.4674078107913064, "grad_norm": 0.5721014142036438, "learning_rate": 1.730265852293018e-05, "loss": 0.0875, "step": 26393 }, { "epoch": 0.46742552032833484, "grad_norm": 0.6408958435058594, "learning_rate": 1.7301808352440553e-05, "loss": 0.0694, "step": 26394 }, { "epoch": 0.46744322986536324, "grad_norm": 0.7419156432151794, "learning_rate": 1.7300958174378206e-05, "loss": 0.1003, "step": 26395 }, { "epoch": 0.4674609394023917, "grad_norm": 1.1039947271347046, "learning_rate": 1.7300107988745943e-05, "loss": 0.0649, "step": 26396 }, { "epoch": 0.4674786489394201, "grad_norm": 1.0809321403503418, "learning_rate": 1.7299257795546556e-05, "loss": 0.0771, "step": 26397 }, { "epoch": 0.46749635847644855, "grad_norm": 0.4736548960208893, "learning_rate": 1.7298407594782842e-05, "loss": 0.0618, "step": 26398 }, { "epoch": 0.46751406801347695, "grad_norm": 0.8436186909675598, "learning_rate": 1.7297557386457604e-05, "loss": 0.069, "step": 26399 }, { "epoch": 0.4675317775505054, "grad_norm": 0.6556485891342163, "learning_rate": 1.7296707170573634e-05, "loss": 0.0756, "step": 26400 }, { "epoch": 0.4675494870875338, "grad_norm": 0.8190597295761108, "learning_rate": 1.7295856947133723e-05, "loss": 0.078, "step": 26401 }, { "epoch": 0.46756719662456225, "grad_norm": 0.4622569680213928, "learning_rate": 1.729500671614068e-05, "loss": 0.0776, "step": 26402 }, { "epoch": 0.46758490616159065, "grad_norm": 0.8194805979728699, "learning_rate": 1.72941564775973e-05, "loss": 0.0954, "step": 26403 }, { "epoch": 0.4676026156986191, "grad_norm": 0.5880872011184692, "learning_rate": 1.7293306231506372e-05, "loss": 0.0946, "step": 26404 }, { "epoch": 0.4676203252356475, "grad_norm": 0.6751596927642822, "learning_rate": 1.72924559778707e-05, "loss": 0.0827, "step": 26405 }, { "epoch": 0.46763803477267596, "grad_norm": 0.5464972853660583, "learning_rate": 1.7291605716693082e-05, "loss": 0.0438, "step": 26406 }, { "epoch": 0.46765574430970436, "grad_norm": 1.071842908859253, "learning_rate": 1.729075544797631e-05, "loss": 0.1143, "step": 26407 }, { "epoch": 0.4676734538467328, "grad_norm": 0.40947988629341125, "learning_rate": 1.7289905171723183e-05, "loss": 0.0394, "step": 26408 }, { "epoch": 0.46769116338376127, "grad_norm": 0.8499123454093933, "learning_rate": 1.7289054887936505e-05, "loss": 0.0983, "step": 26409 }, { "epoch": 0.46770887292078966, "grad_norm": 1.050378441810608, "learning_rate": 1.728820459661906e-05, "loss": 0.0587, "step": 26410 }, { "epoch": 0.4677265824578181, "grad_norm": 0.5969806909561157, "learning_rate": 1.728735429777366e-05, "loss": 0.0605, "step": 26411 }, { "epoch": 0.4677442919948465, "grad_norm": 0.8383982181549072, "learning_rate": 1.7286503991403092e-05, "loss": 0.1074, "step": 26412 }, { "epoch": 0.46776200153187497, "grad_norm": 0.3872240483760834, "learning_rate": 1.728565367751016e-05, "loss": 0.0426, "step": 26413 }, { "epoch": 0.46777971106890337, "grad_norm": 0.6989473104476929, "learning_rate": 1.7284803356097658e-05, "loss": 0.1109, "step": 26414 }, { "epoch": 0.4677974206059318, "grad_norm": 0.7477114796638489, "learning_rate": 1.7283953027168386e-05, "loss": 0.0714, "step": 26415 }, { "epoch": 0.4678151301429602, "grad_norm": 0.5756039023399353, "learning_rate": 1.7283102690725136e-05, "loss": 0.0585, "step": 26416 }, { "epoch": 0.4678328396799887, "grad_norm": 0.7619119882583618, "learning_rate": 1.7282252346770705e-05, "loss": 0.0709, "step": 26417 }, { "epoch": 0.4678505492170171, "grad_norm": 0.7801205515861511, "learning_rate": 1.7281401995307904e-05, "loss": 0.0998, "step": 26418 }, { "epoch": 0.46786825875404553, "grad_norm": 0.857575535774231, "learning_rate": 1.7280551636339513e-05, "loss": 0.0749, "step": 26419 }, { "epoch": 0.46788596829107393, "grad_norm": 0.8828892111778259, "learning_rate": 1.7279701269868345e-05, "loss": 0.1037, "step": 26420 }, { "epoch": 0.4679036778281024, "grad_norm": 0.3276183605194092, "learning_rate": 1.7278850895897185e-05, "loss": 0.0457, "step": 26421 }, { "epoch": 0.4679213873651308, "grad_norm": 0.6507564783096313, "learning_rate": 1.7278000514428838e-05, "loss": 0.0654, "step": 26422 }, { "epoch": 0.46793909690215924, "grad_norm": 0.6179741024971008, "learning_rate": 1.7277150125466103e-05, "loss": 0.0808, "step": 26423 }, { "epoch": 0.4679568064391877, "grad_norm": 0.4639166295528412, "learning_rate": 1.727629972901177e-05, "loss": 0.0709, "step": 26424 }, { "epoch": 0.4679745159762161, "grad_norm": 0.6684831380844116, "learning_rate": 1.727544932506864e-05, "loss": 0.0731, "step": 26425 }, { "epoch": 0.46799222551324454, "grad_norm": 0.7946586608886719, "learning_rate": 1.7274598913639515e-05, "loss": 0.0815, "step": 26426 }, { "epoch": 0.46800993505027294, "grad_norm": 0.5372064113616943, "learning_rate": 1.727374849472719e-05, "loss": 0.0307, "step": 26427 }, { "epoch": 0.4680276445873014, "grad_norm": 0.5242475867271423, "learning_rate": 1.7272898068334457e-05, "loss": 0.0889, "step": 26428 }, { "epoch": 0.4680453541243298, "grad_norm": 0.7035139203071594, "learning_rate": 1.7272047634464126e-05, "loss": 0.0983, "step": 26429 }, { "epoch": 0.46806306366135825, "grad_norm": 0.7301856279373169, "learning_rate": 1.727119719311899e-05, "loss": 0.0533, "step": 26430 }, { "epoch": 0.46808077319838665, "grad_norm": 1.1694331169128418, "learning_rate": 1.7270346744301836e-05, "loss": 0.0627, "step": 26431 }, { "epoch": 0.4680984827354151, "grad_norm": 0.4897821843624115, "learning_rate": 1.726949628801548e-05, "loss": 0.0788, "step": 26432 }, { "epoch": 0.4681161922724435, "grad_norm": 0.6959812641143799, "learning_rate": 1.72686458242627e-05, "loss": 0.1067, "step": 26433 }, { "epoch": 0.46813390180947195, "grad_norm": 0.6409515142440796, "learning_rate": 1.7267795353046315e-05, "loss": 0.0757, "step": 26434 }, { "epoch": 0.46815161134650035, "grad_norm": 0.963344395160675, "learning_rate": 1.726694487436911e-05, "loss": 0.1044, "step": 26435 }, { "epoch": 0.4681693208835288, "grad_norm": 0.9161853790283203, "learning_rate": 1.726609438823388e-05, "loss": 0.099, "step": 26436 }, { "epoch": 0.4681870304205572, "grad_norm": 0.6897559762001038, "learning_rate": 1.7265243894643432e-05, "loss": 0.0911, "step": 26437 }, { "epoch": 0.46820473995758566, "grad_norm": 0.5835633277893066, "learning_rate": 1.7264393393600562e-05, "loss": 0.0747, "step": 26438 }, { "epoch": 0.4682224494946141, "grad_norm": 0.5877161622047424, "learning_rate": 1.7263542885108067e-05, "loss": 0.0836, "step": 26439 }, { "epoch": 0.4682401590316425, "grad_norm": 0.7566601634025574, "learning_rate": 1.7262692369168744e-05, "loss": 0.0684, "step": 26440 }, { "epoch": 0.46825786856867097, "grad_norm": 0.43862301111221313, "learning_rate": 1.7261841845785393e-05, "loss": 0.072, "step": 26441 }, { "epoch": 0.46827557810569936, "grad_norm": 0.7730214595794678, "learning_rate": 1.726099131496081e-05, "loss": 0.079, "step": 26442 }, { "epoch": 0.4682932876427278, "grad_norm": 0.7859854698181152, "learning_rate": 1.7260140776697798e-05, "loss": 0.1084, "step": 26443 }, { "epoch": 0.4683109971797562, "grad_norm": 0.8663367033004761, "learning_rate": 1.7259290230999144e-05, "loss": 0.1121, "step": 26444 }, { "epoch": 0.46832870671678467, "grad_norm": 0.7242224812507629, "learning_rate": 1.7258439677867658e-05, "loss": 0.0611, "step": 26445 }, { "epoch": 0.46834641625381307, "grad_norm": 0.6501627564430237, "learning_rate": 1.7257589117306134e-05, "loss": 0.0513, "step": 26446 }, { "epoch": 0.4683641257908415, "grad_norm": 0.7696996927261353, "learning_rate": 1.7256738549317367e-05, "loss": 0.0786, "step": 26447 }, { "epoch": 0.4683818353278699, "grad_norm": 0.829868495464325, "learning_rate": 1.7255887973904164e-05, "loss": 0.0793, "step": 26448 }, { "epoch": 0.4683995448648984, "grad_norm": 1.0034023523330688, "learning_rate": 1.7255037391069312e-05, "loss": 0.1022, "step": 26449 }, { "epoch": 0.4684172544019268, "grad_norm": 0.535771369934082, "learning_rate": 1.7254186800815618e-05, "loss": 0.0568, "step": 26450 }, { "epoch": 0.46843496393895523, "grad_norm": 0.6896476745605469, "learning_rate": 1.7253336203145875e-05, "loss": 0.1027, "step": 26451 }, { "epoch": 0.4684526734759836, "grad_norm": 0.7015489339828491, "learning_rate": 1.725248559806289e-05, "loss": 0.0689, "step": 26452 }, { "epoch": 0.4684703830130121, "grad_norm": 0.6191500425338745, "learning_rate": 1.725163498556945e-05, "loss": 0.0621, "step": 26453 }, { "epoch": 0.46848809255004054, "grad_norm": 0.41789260506629944, "learning_rate": 1.7250784365668362e-05, "loss": 0.0777, "step": 26454 }, { "epoch": 0.46850580208706893, "grad_norm": 0.8026413917541504, "learning_rate": 1.7249933738362417e-05, "loss": 0.0905, "step": 26455 }, { "epoch": 0.4685235116240974, "grad_norm": 0.9039325714111328, "learning_rate": 1.7249083103654418e-05, "loss": 0.0801, "step": 26456 }, { "epoch": 0.4685412211611258, "grad_norm": 0.42544087767601013, "learning_rate": 1.7248232461547168e-05, "loss": 0.0963, "step": 26457 }, { "epoch": 0.46855893069815424, "grad_norm": 0.8102836608886719, "learning_rate": 1.7247381812043454e-05, "loss": 0.0886, "step": 26458 }, { "epoch": 0.46857664023518264, "grad_norm": 0.9289152026176453, "learning_rate": 1.7246531155146088e-05, "loss": 0.0746, "step": 26459 }, { "epoch": 0.4685943497722111, "grad_norm": 0.7240261435508728, "learning_rate": 1.724568049085785e-05, "loss": 0.1031, "step": 26460 }, { "epoch": 0.4686120593092395, "grad_norm": 0.5133612751960754, "learning_rate": 1.7244829819181566e-05, "loss": 0.0562, "step": 26461 }, { "epoch": 0.46862976884626795, "grad_norm": 0.9624108076095581, "learning_rate": 1.7243979140120014e-05, "loss": 0.0687, "step": 26462 }, { "epoch": 0.46864747838329635, "grad_norm": 0.7405878305435181, "learning_rate": 1.7243128453675994e-05, "loss": 0.0697, "step": 26463 }, { "epoch": 0.4686651879203248, "grad_norm": 0.8420228362083435, "learning_rate": 1.7242277759852304e-05, "loss": 0.0785, "step": 26464 }, { "epoch": 0.4686828974573532, "grad_norm": 0.7573959827423096, "learning_rate": 1.724142705865175e-05, "loss": 0.0929, "step": 26465 }, { "epoch": 0.46870060699438165, "grad_norm": 0.6786665916442871, "learning_rate": 1.7240576350077135e-05, "loss": 0.0538, "step": 26466 }, { "epoch": 0.46871831653141005, "grad_norm": 0.5833179354667664, "learning_rate": 1.7239725634131242e-05, "loss": 0.0832, "step": 26467 }, { "epoch": 0.4687360260684385, "grad_norm": 0.5616462826728821, "learning_rate": 1.7238874910816884e-05, "loss": 0.0568, "step": 26468 }, { "epoch": 0.46875373560546696, "grad_norm": 0.46541574597358704, "learning_rate": 1.7238024180136853e-05, "loss": 0.0687, "step": 26469 }, { "epoch": 0.46877144514249536, "grad_norm": 0.8153798580169678, "learning_rate": 1.7237173442093943e-05, "loss": 0.0841, "step": 26470 }, { "epoch": 0.4687891546795238, "grad_norm": 0.782573401927948, "learning_rate": 1.7236322696690965e-05, "loss": 0.0774, "step": 26471 }, { "epoch": 0.4688068642165522, "grad_norm": 0.912390410900116, "learning_rate": 1.723547194393071e-05, "loss": 0.062, "step": 26472 }, { "epoch": 0.46882457375358066, "grad_norm": 0.3769821524620056, "learning_rate": 1.723462118381598e-05, "loss": 0.0862, "step": 26473 }, { "epoch": 0.46884228329060906, "grad_norm": 0.5386148691177368, "learning_rate": 1.723377041634957e-05, "loss": 0.0888, "step": 26474 }, { "epoch": 0.4688599928276375, "grad_norm": 0.9221457839012146, "learning_rate": 1.7232919641534278e-05, "loss": 0.086, "step": 26475 }, { "epoch": 0.4688777023646659, "grad_norm": 0.5370649099349976, "learning_rate": 1.7232068859372913e-05, "loss": 0.0565, "step": 26476 }, { "epoch": 0.46889541190169437, "grad_norm": 0.8086903691291809, "learning_rate": 1.7231218069868263e-05, "loss": 0.0547, "step": 26477 }, { "epoch": 0.46891312143872277, "grad_norm": 0.496586412191391, "learning_rate": 1.7230367273023134e-05, "loss": 0.0517, "step": 26478 }, { "epoch": 0.4689308309757512, "grad_norm": 0.38607674837112427, "learning_rate": 1.7229516468840318e-05, "loss": 0.0636, "step": 26479 }, { "epoch": 0.4689485405127796, "grad_norm": 0.41173988580703735, "learning_rate": 1.722866565732262e-05, "loss": 0.0795, "step": 26480 }, { "epoch": 0.4689662500498081, "grad_norm": 0.829793393611908, "learning_rate": 1.722781483847284e-05, "loss": 0.0956, "step": 26481 }, { "epoch": 0.4689839595868365, "grad_norm": 1.0457764863967896, "learning_rate": 1.7226964012293773e-05, "loss": 0.0611, "step": 26482 }, { "epoch": 0.46900166912386493, "grad_norm": 0.7558538317680359, "learning_rate": 1.7226113178788218e-05, "loss": 0.0636, "step": 26483 }, { "epoch": 0.4690193786608934, "grad_norm": 0.4586865305900574, "learning_rate": 1.7225262337958975e-05, "loss": 0.0776, "step": 26484 }, { "epoch": 0.4690370881979218, "grad_norm": 0.7279952168464661, "learning_rate": 1.7224411489808847e-05, "loss": 0.0744, "step": 26485 }, { "epoch": 0.46905479773495024, "grad_norm": 0.36763787269592285, "learning_rate": 1.722356063434063e-05, "loss": 0.042, "step": 26486 }, { "epoch": 0.46907250727197863, "grad_norm": 0.8940551280975342, "learning_rate": 1.7222709771557123e-05, "loss": 0.0528, "step": 26487 }, { "epoch": 0.4690902168090071, "grad_norm": 0.9662317633628845, "learning_rate": 1.7221858901461127e-05, "loss": 0.0706, "step": 26488 }, { "epoch": 0.4691079263460355, "grad_norm": 0.7671000957489014, "learning_rate": 1.7221008024055436e-05, "loss": 0.117, "step": 26489 }, { "epoch": 0.46912563588306394, "grad_norm": 0.4697016775608063, "learning_rate": 1.7220157139342853e-05, "loss": 0.0562, "step": 26490 }, { "epoch": 0.46914334542009234, "grad_norm": 3.497529983520508, "learning_rate": 1.721930624732618e-05, "loss": 0.1414, "step": 26491 }, { "epoch": 0.4691610549571208, "grad_norm": 0.49313250184059143, "learning_rate": 1.7218455348008214e-05, "loss": 0.0701, "step": 26492 }, { "epoch": 0.4691787644941492, "grad_norm": 0.9541051387786865, "learning_rate": 1.7217604441391756e-05, "loss": 0.0798, "step": 26493 }, { "epoch": 0.46919647403117765, "grad_norm": 0.6157838106155396, "learning_rate": 1.7216753527479602e-05, "loss": 0.0702, "step": 26494 }, { "epoch": 0.46921418356820604, "grad_norm": 0.8959559202194214, "learning_rate": 1.7215902606274553e-05, "loss": 0.0805, "step": 26495 }, { "epoch": 0.4692318931052345, "grad_norm": 0.5452831387519836, "learning_rate": 1.721505167777941e-05, "loss": 0.0767, "step": 26496 }, { "epoch": 0.4692496026422629, "grad_norm": 0.8704339861869812, "learning_rate": 1.7214200741996967e-05, "loss": 0.0984, "step": 26497 }, { "epoch": 0.46926731217929135, "grad_norm": 0.5731263756752014, "learning_rate": 1.721334979893003e-05, "loss": 0.0719, "step": 26498 }, { "epoch": 0.4692850217163198, "grad_norm": 0.46151331067085266, "learning_rate": 1.7212498848581397e-05, "loss": 0.0799, "step": 26499 }, { "epoch": 0.4693027312533482, "grad_norm": 0.6497999429702759, "learning_rate": 1.7211647890953867e-05, "loss": 0.1189, "step": 26500 }, { "epoch": 0.46932044079037666, "grad_norm": 0.4995582699775696, "learning_rate": 1.7210796926050236e-05, "loss": 0.0595, "step": 26501 }, { "epoch": 0.46933815032740506, "grad_norm": 0.6107082962989807, "learning_rate": 1.7209945953873308e-05, "loss": 0.0858, "step": 26502 }, { "epoch": 0.4693558598644335, "grad_norm": 0.623211681842804, "learning_rate": 1.7209094974425884e-05, "loss": 0.0631, "step": 26503 }, { "epoch": 0.4693735694014619, "grad_norm": 0.7443751096725464, "learning_rate": 1.7208243987710754e-05, "loss": 0.0994, "step": 26504 }, { "epoch": 0.46939127893849036, "grad_norm": 0.4905649423599243, "learning_rate": 1.7207392993730735e-05, "loss": 0.0673, "step": 26505 }, { "epoch": 0.46940898847551876, "grad_norm": 0.4019216299057007, "learning_rate": 1.720654199248861e-05, "loss": 0.0724, "step": 26506 }, { "epoch": 0.4694266980125472, "grad_norm": 0.5312755107879639, "learning_rate": 1.7205690983987187e-05, "loss": 0.0591, "step": 26507 }, { "epoch": 0.4694444075495756, "grad_norm": 0.5022310614585876, "learning_rate": 1.720483996822926e-05, "loss": 0.044, "step": 26508 }, { "epoch": 0.46946211708660407, "grad_norm": 0.4312475919723511, "learning_rate": 1.7203988945217636e-05, "loss": 0.0885, "step": 26509 }, { "epoch": 0.46947982662363247, "grad_norm": 0.40726587176322937, "learning_rate": 1.7203137914955112e-05, "loss": 0.0641, "step": 26510 }, { "epoch": 0.4694975361606609, "grad_norm": 0.5216572880744934, "learning_rate": 1.7202286877444484e-05, "loss": 0.0674, "step": 26511 }, { "epoch": 0.4695152456976893, "grad_norm": 0.5022714138031006, "learning_rate": 1.7201435832688556e-05, "loss": 0.0498, "step": 26512 }, { "epoch": 0.4695329552347178, "grad_norm": 0.6143733263015747, "learning_rate": 1.7200584780690127e-05, "loss": 0.0807, "step": 26513 }, { "epoch": 0.46955066477174623, "grad_norm": 0.8384096622467041, "learning_rate": 1.7199733721451996e-05, "loss": 0.089, "step": 26514 }, { "epoch": 0.4695683743087746, "grad_norm": 0.5503332018852234, "learning_rate": 1.7198882654976968e-05, "loss": 0.0558, "step": 26515 }, { "epoch": 0.4695860838458031, "grad_norm": 1.076468825340271, "learning_rate": 1.7198031581267834e-05, "loss": 0.0901, "step": 26516 }, { "epoch": 0.4696037933828315, "grad_norm": 0.8244922161102295, "learning_rate": 1.7197180500327397e-05, "loss": 0.11, "step": 26517 }, { "epoch": 0.46962150291985993, "grad_norm": 0.6238043904304504, "learning_rate": 1.719632941215846e-05, "loss": 0.0765, "step": 26518 }, { "epoch": 0.46963921245688833, "grad_norm": 0.638717532157898, "learning_rate": 1.7195478316763825e-05, "loss": 0.0799, "step": 26519 }, { "epoch": 0.4696569219939168, "grad_norm": 0.6925754547119141, "learning_rate": 1.7194627214146284e-05, "loss": 0.0772, "step": 26520 }, { "epoch": 0.4696746315309452, "grad_norm": 0.7044622302055359, "learning_rate": 1.7193776104308644e-05, "loss": 0.1006, "step": 26521 }, { "epoch": 0.46969234106797364, "grad_norm": 0.8038445711135864, "learning_rate": 1.7192924987253697e-05, "loss": 0.0638, "step": 26522 }, { "epoch": 0.46971005060500204, "grad_norm": 0.6664558053016663, "learning_rate": 1.7192073862984255e-05, "loss": 0.0812, "step": 26523 }, { "epoch": 0.4697277601420305, "grad_norm": 0.601999819278717, "learning_rate": 1.719122273150311e-05, "loss": 0.0917, "step": 26524 }, { "epoch": 0.4697454696790589, "grad_norm": 0.6077027320861816, "learning_rate": 1.7190371592813065e-05, "loss": 0.0622, "step": 26525 }, { "epoch": 0.46976317921608735, "grad_norm": 0.8361162543296814, "learning_rate": 1.7189520446916914e-05, "loss": 0.0665, "step": 26526 }, { "epoch": 0.46978088875311574, "grad_norm": 0.5087446570396423, "learning_rate": 1.7188669293817466e-05, "loss": 0.0691, "step": 26527 }, { "epoch": 0.4697985982901442, "grad_norm": 0.6655909419059753, "learning_rate": 1.718781813351752e-05, "loss": 0.0656, "step": 26528 }, { "epoch": 0.46981630782717265, "grad_norm": 0.45112520456314087, "learning_rate": 1.7186966966019867e-05, "loss": 0.0759, "step": 26529 }, { "epoch": 0.46983401736420105, "grad_norm": 0.5625488758087158, "learning_rate": 1.7186115791327323e-05, "loss": 0.0785, "step": 26530 }, { "epoch": 0.4698517269012295, "grad_norm": 0.8515849709510803, "learning_rate": 1.718526460944267e-05, "loss": 0.0687, "step": 26531 }, { "epoch": 0.4698694364382579, "grad_norm": 0.780392587184906, "learning_rate": 1.7184413420368723e-05, "loss": 0.0697, "step": 26532 }, { "epoch": 0.46988714597528636, "grad_norm": 0.7799062132835388, "learning_rate": 1.7183562224108277e-05, "loss": 0.0691, "step": 26533 }, { "epoch": 0.46990485551231476, "grad_norm": 0.5341660976409912, "learning_rate": 1.7182711020664125e-05, "loss": 0.07, "step": 26534 }, { "epoch": 0.4699225650493432, "grad_norm": 0.538580596446991, "learning_rate": 1.7181859810039085e-05, "loss": 0.0569, "step": 26535 }, { "epoch": 0.4699402745863716, "grad_norm": 0.5241564512252808, "learning_rate": 1.718100859223594e-05, "loss": 0.0418, "step": 26536 }, { "epoch": 0.46995798412340006, "grad_norm": 0.5688135027885437, "learning_rate": 1.71801573672575e-05, "loss": 0.0867, "step": 26537 }, { "epoch": 0.46997569366042846, "grad_norm": 0.9416400194168091, "learning_rate": 1.717930613510656e-05, "loss": 0.1035, "step": 26538 }, { "epoch": 0.4699934031974569, "grad_norm": 0.5307660698890686, "learning_rate": 1.7178454895785927e-05, "loss": 0.0767, "step": 26539 }, { "epoch": 0.4700111127344853, "grad_norm": 0.6340019106864929, "learning_rate": 1.7177603649298392e-05, "loss": 0.0695, "step": 26540 }, { "epoch": 0.47002882227151377, "grad_norm": 0.8838563561439514, "learning_rate": 1.7176752395646762e-05, "loss": 0.0938, "step": 26541 }, { "epoch": 0.4700465318085422, "grad_norm": 0.3085387051105499, "learning_rate": 1.7175901134833844e-05, "loss": 0.0491, "step": 26542 }, { "epoch": 0.4700642413455706, "grad_norm": 0.6271229982376099, "learning_rate": 1.7175049866862427e-05, "loss": 0.0821, "step": 26543 }, { "epoch": 0.4700819508825991, "grad_norm": 0.7139864563941956, "learning_rate": 1.7174198591735316e-05, "loss": 0.062, "step": 26544 }, { "epoch": 0.4700996604196275, "grad_norm": 0.9350887537002563, "learning_rate": 1.717334730945531e-05, "loss": 0.0838, "step": 26545 }, { "epoch": 0.47011736995665593, "grad_norm": 0.5747281908988953, "learning_rate": 1.7172496020025212e-05, "loss": 0.0588, "step": 26546 }, { "epoch": 0.4701350794936843, "grad_norm": 0.2819553017616272, "learning_rate": 1.7171644723447822e-05, "loss": 0.0819, "step": 26547 }, { "epoch": 0.4701527890307128, "grad_norm": 0.5927582383155823, "learning_rate": 1.717079341972594e-05, "loss": 0.0598, "step": 26548 }, { "epoch": 0.4701704985677412, "grad_norm": 0.25664547085762024, "learning_rate": 1.7169942108862373e-05, "loss": 0.0558, "step": 26549 }, { "epoch": 0.47018820810476963, "grad_norm": 0.7682616710662842, "learning_rate": 1.716909079085991e-05, "loss": 0.0749, "step": 26550 }, { "epoch": 0.47020591764179803, "grad_norm": 0.965788722038269, "learning_rate": 1.7168239465721357e-05, "loss": 0.0721, "step": 26551 }, { "epoch": 0.4702236271788265, "grad_norm": 0.5536038875579834, "learning_rate": 1.7167388133449514e-05, "loss": 0.1058, "step": 26552 }, { "epoch": 0.4702413367158549, "grad_norm": 0.3815792500972748, "learning_rate": 1.7166536794047192e-05, "loss": 0.0622, "step": 26553 }, { "epoch": 0.47025904625288334, "grad_norm": 0.4984458088874817, "learning_rate": 1.7165685447517176e-05, "loss": 0.0952, "step": 26554 }, { "epoch": 0.47027675578991174, "grad_norm": 0.5136953592300415, "learning_rate": 1.7164834093862274e-05, "loss": 0.0643, "step": 26555 }, { "epoch": 0.4702944653269402, "grad_norm": 0.7102324962615967, "learning_rate": 1.716398273308529e-05, "loss": 0.0819, "step": 26556 }, { "epoch": 0.47031217486396865, "grad_norm": 0.6901133060455322, "learning_rate": 1.716313136518902e-05, "loss": 0.0848, "step": 26557 }, { "epoch": 0.47032988440099704, "grad_norm": 1.0411502122879028, "learning_rate": 1.716227999017627e-05, "loss": 0.1067, "step": 26558 }, { "epoch": 0.4703475939380255, "grad_norm": 0.9004725813865662, "learning_rate": 1.716142860804983e-05, "loss": 0.0883, "step": 26559 }, { "epoch": 0.4703653034750539, "grad_norm": 0.5607749819755554, "learning_rate": 1.7160577218812514e-05, "loss": 0.0691, "step": 26560 }, { "epoch": 0.47038301301208235, "grad_norm": 0.6712116003036499, "learning_rate": 1.7159725822467114e-05, "loss": 0.0575, "step": 26561 }, { "epoch": 0.47040072254911075, "grad_norm": 1.299407720565796, "learning_rate": 1.7158874419016436e-05, "loss": 0.096, "step": 26562 }, { "epoch": 0.4704184320861392, "grad_norm": 0.8590912222862244, "learning_rate": 1.715802300846328e-05, "loss": 0.1064, "step": 26563 }, { "epoch": 0.4704361416231676, "grad_norm": 0.44437554478645325, "learning_rate": 1.715717159081045e-05, "loss": 0.0643, "step": 26564 }, { "epoch": 0.47045385116019606, "grad_norm": 0.8728269338607788, "learning_rate": 1.715632016606074e-05, "loss": 0.0946, "step": 26565 }, { "epoch": 0.47047156069722446, "grad_norm": 0.579751193523407, "learning_rate": 1.7155468734216952e-05, "loss": 0.0663, "step": 26566 }, { "epoch": 0.4704892702342529, "grad_norm": 0.8028656244277954, "learning_rate": 1.7154617295281896e-05, "loss": 0.1043, "step": 26567 }, { "epoch": 0.4705069797712813, "grad_norm": 0.7248623967170715, "learning_rate": 1.7153765849258363e-05, "loss": 0.0971, "step": 26568 }, { "epoch": 0.47052468930830976, "grad_norm": 1.1217106580734253, "learning_rate": 1.7152914396149162e-05, "loss": 0.093, "step": 26569 }, { "epoch": 0.47054239884533816, "grad_norm": 0.40149247646331787, "learning_rate": 1.7152062935957082e-05, "loss": 0.0821, "step": 26570 }, { "epoch": 0.4705601083823666, "grad_norm": 0.5574053525924683, "learning_rate": 1.715121146868494e-05, "loss": 0.0633, "step": 26571 }, { "epoch": 0.47057781791939507, "grad_norm": 0.4824869632720947, "learning_rate": 1.715035999433553e-05, "loss": 0.0672, "step": 26572 }, { "epoch": 0.47059552745642347, "grad_norm": 0.6956035494804382, "learning_rate": 1.714950851291165e-05, "loss": 0.0698, "step": 26573 }, { "epoch": 0.4706132369934519, "grad_norm": 0.7212387919425964, "learning_rate": 1.7148657024416107e-05, "loss": 0.0888, "step": 26574 }, { "epoch": 0.4706309465304803, "grad_norm": 0.6851460933685303, "learning_rate": 1.7147805528851694e-05, "loss": 0.0799, "step": 26575 }, { "epoch": 0.4706486560675088, "grad_norm": 0.4648232161998749, "learning_rate": 1.7146954026221227e-05, "loss": 0.0701, "step": 26576 }, { "epoch": 0.4706663656045372, "grad_norm": 1.0084069967269897, "learning_rate": 1.7146102516527492e-05, "loss": 0.076, "step": 26577 }, { "epoch": 0.4706840751415656, "grad_norm": 0.5480095148086548, "learning_rate": 1.71452509997733e-05, "loss": 0.0614, "step": 26578 }, { "epoch": 0.470701784678594, "grad_norm": 0.462799996137619, "learning_rate": 1.714439947596145e-05, "loss": 0.0742, "step": 26579 }, { "epoch": 0.4707194942156225, "grad_norm": 0.605128288269043, "learning_rate": 1.7143547945094736e-05, "loss": 0.095, "step": 26580 }, { "epoch": 0.4707372037526509, "grad_norm": 0.7100392580032349, "learning_rate": 1.714269640717597e-05, "loss": 0.0661, "step": 26581 }, { "epoch": 0.47075491328967933, "grad_norm": 0.47881823778152466, "learning_rate": 1.7141844862207952e-05, "loss": 0.0798, "step": 26582 }, { "epoch": 0.47077262282670773, "grad_norm": 0.6721704602241516, "learning_rate": 1.714099331019348e-05, "loss": 0.094, "step": 26583 }, { "epoch": 0.4707903323637362, "grad_norm": 0.551870584487915, "learning_rate": 1.714014175113535e-05, "loss": 0.102, "step": 26584 }, { "epoch": 0.4708080419007646, "grad_norm": 0.8310275673866272, "learning_rate": 1.7139290185036376e-05, "loss": 0.0903, "step": 26585 }, { "epoch": 0.47082575143779304, "grad_norm": 0.7618282437324524, "learning_rate": 1.7138438611899355e-05, "loss": 0.0844, "step": 26586 }, { "epoch": 0.4708434609748215, "grad_norm": 0.5901480913162231, "learning_rate": 1.7137587031727087e-05, "loss": 0.0823, "step": 26587 }, { "epoch": 0.4708611705118499, "grad_norm": 0.6427823305130005, "learning_rate": 1.7136735444522373e-05, "loss": 0.0661, "step": 26588 }, { "epoch": 0.47087888004887835, "grad_norm": 0.7476740479469299, "learning_rate": 1.7135883850288013e-05, "loss": 0.0687, "step": 26589 }, { "epoch": 0.47089658958590674, "grad_norm": 0.6076863408088684, "learning_rate": 1.7135032249026814e-05, "loss": 0.0661, "step": 26590 }, { "epoch": 0.4709142991229352, "grad_norm": 0.8522083759307861, "learning_rate": 1.7134180640741574e-05, "loss": 0.0886, "step": 26591 }, { "epoch": 0.4709320086599636, "grad_norm": 0.8337290287017822, "learning_rate": 1.7133329025435095e-05, "loss": 0.0814, "step": 26592 }, { "epoch": 0.47094971819699205, "grad_norm": 0.4813188314437866, "learning_rate": 1.713247740311018e-05, "loss": 0.0519, "step": 26593 }, { "epoch": 0.47096742773402045, "grad_norm": 0.7635636329650879, "learning_rate": 1.7131625773769627e-05, "loss": 0.0792, "step": 26594 }, { "epoch": 0.4709851372710489, "grad_norm": 2.050549030303955, "learning_rate": 1.7130774137416246e-05, "loss": 0.1107, "step": 26595 }, { "epoch": 0.4710028468080773, "grad_norm": 0.4918237626552582, "learning_rate": 1.712992249405283e-05, "loss": 0.0785, "step": 26596 }, { "epoch": 0.47102055634510576, "grad_norm": 0.6128193736076355, "learning_rate": 1.712907084368219e-05, "loss": 0.0516, "step": 26597 }, { "epoch": 0.47103826588213416, "grad_norm": 0.8438754081726074, "learning_rate": 1.7128219186307118e-05, "loss": 0.086, "step": 26598 }, { "epoch": 0.4710559754191626, "grad_norm": 0.676755428314209, "learning_rate": 1.7127367521930417e-05, "loss": 0.0726, "step": 26599 }, { "epoch": 0.471073684956191, "grad_norm": 0.6500277519226074, "learning_rate": 1.7126515850554894e-05, "loss": 0.0981, "step": 26600 }, { "epoch": 0.47109139449321946, "grad_norm": 0.9075325727462769, "learning_rate": 1.712566417218335e-05, "loss": 0.0702, "step": 26601 }, { "epoch": 0.4711091040302479, "grad_norm": 0.5970176458358765, "learning_rate": 1.712481248681859e-05, "loss": 0.0699, "step": 26602 }, { "epoch": 0.4711268135672763, "grad_norm": 0.9876694679260254, "learning_rate": 1.7123960794463404e-05, "loss": 0.1139, "step": 26603 }, { "epoch": 0.47114452310430477, "grad_norm": 0.4294356107711792, "learning_rate": 1.7123109095120604e-05, "loss": 0.0746, "step": 26604 }, { "epoch": 0.47116223264133317, "grad_norm": 0.7627363204956055, "learning_rate": 1.712225738879299e-05, "loss": 0.0999, "step": 26605 }, { "epoch": 0.4711799421783616, "grad_norm": 0.8432000875473022, "learning_rate": 1.7121405675483373e-05, "loss": 0.0854, "step": 26606 }, { "epoch": 0.47119765171539, "grad_norm": 0.5810791850090027, "learning_rate": 1.7120553955194536e-05, "loss": 0.0305, "step": 26607 }, { "epoch": 0.4712153612524185, "grad_norm": 0.7731204032897949, "learning_rate": 1.711970222792929e-05, "loss": 0.0798, "step": 26608 }, { "epoch": 0.4712330707894469, "grad_norm": 0.8033575415611267, "learning_rate": 1.7118850493690442e-05, "loss": 0.0574, "step": 26609 }, { "epoch": 0.4712507803264753, "grad_norm": 0.7429781556129456, "learning_rate": 1.711799875248079e-05, "loss": 0.1014, "step": 26610 }, { "epoch": 0.4712684898635037, "grad_norm": 0.265521377325058, "learning_rate": 1.7117147004303135e-05, "loss": 0.0496, "step": 26611 }, { "epoch": 0.4712861994005322, "grad_norm": 0.43148526549339294, "learning_rate": 1.711629524916028e-05, "loss": 0.0916, "step": 26612 }, { "epoch": 0.4713039089375606, "grad_norm": 0.8997297286987305, "learning_rate": 1.711544348705503e-05, "loss": 0.0824, "step": 26613 }, { "epoch": 0.47132161847458903, "grad_norm": 0.5796127915382385, "learning_rate": 1.7114591717990184e-05, "loss": 0.0709, "step": 26614 }, { "epoch": 0.47133932801161743, "grad_norm": 0.6589413285255432, "learning_rate": 1.7113739941968544e-05, "loss": 0.0837, "step": 26615 }, { "epoch": 0.4713570375486459, "grad_norm": 1.1737409830093384, "learning_rate": 1.7112888158992916e-05, "loss": 0.0562, "step": 26616 }, { "epoch": 0.47137474708567434, "grad_norm": 0.8341108560562134, "learning_rate": 1.7112036369066096e-05, "loss": 0.065, "step": 26617 }, { "epoch": 0.47139245662270274, "grad_norm": 0.3266594111919403, "learning_rate": 1.7111184572190887e-05, "loss": 0.0543, "step": 26618 }, { "epoch": 0.4714101661597312, "grad_norm": 0.8191812038421631, "learning_rate": 1.7110332768370104e-05, "loss": 0.0799, "step": 26619 }, { "epoch": 0.4714278756967596, "grad_norm": 0.6928758025169373, "learning_rate": 1.7109480957606537e-05, "loss": 0.0844, "step": 26620 }, { "epoch": 0.47144558523378804, "grad_norm": 0.8431764841079712, "learning_rate": 1.710862913990299e-05, "loss": 0.0713, "step": 26621 }, { "epoch": 0.47146329477081644, "grad_norm": 0.7351939082145691, "learning_rate": 1.710777731526226e-05, "loss": 0.0889, "step": 26622 }, { "epoch": 0.4714810043078449, "grad_norm": 0.517809271812439, "learning_rate": 1.7106925483687162e-05, "loss": 0.047, "step": 26623 }, { "epoch": 0.4714987138448733, "grad_norm": 0.30200493335723877, "learning_rate": 1.710607364518049e-05, "loss": 0.0549, "step": 26624 }, { "epoch": 0.47151642338190175, "grad_norm": 0.49832862615585327, "learning_rate": 1.7105221799745054e-05, "loss": 0.0956, "step": 26625 }, { "epoch": 0.47153413291893015, "grad_norm": 0.6119764447212219, "learning_rate": 1.7104369947383645e-05, "loss": 0.0791, "step": 26626 }, { "epoch": 0.4715518424559586, "grad_norm": 0.4034304618835449, "learning_rate": 1.7103518088099073e-05, "loss": 0.0669, "step": 26627 }, { "epoch": 0.471569551992987, "grad_norm": 0.5389631390571594, "learning_rate": 1.710266622189414e-05, "loss": 0.0734, "step": 26628 }, { "epoch": 0.47158726153001546, "grad_norm": 0.6575652360916138, "learning_rate": 1.710181434877165e-05, "loss": 0.0446, "step": 26629 }, { "epoch": 0.47160497106704385, "grad_norm": 0.5813018083572388, "learning_rate": 1.71009624687344e-05, "loss": 0.1037, "step": 26630 }, { "epoch": 0.4716226806040723, "grad_norm": 0.6065701246261597, "learning_rate": 1.71001105817852e-05, "loss": 0.0643, "step": 26631 }, { "epoch": 0.47164039014110076, "grad_norm": 0.9227182269096375, "learning_rate": 1.7099258687926843e-05, "loss": 0.0889, "step": 26632 }, { "epoch": 0.47165809967812916, "grad_norm": 0.9913935661315918, "learning_rate": 1.709840678716214e-05, "loss": 0.0852, "step": 26633 }, { "epoch": 0.4716758092151576, "grad_norm": 0.6590315103530884, "learning_rate": 1.709755487949389e-05, "loss": 0.082, "step": 26634 }, { "epoch": 0.471693518752186, "grad_norm": 0.8539062142372131, "learning_rate": 1.70967029649249e-05, "loss": 0.0656, "step": 26635 }, { "epoch": 0.47171122828921447, "grad_norm": 0.4676600396633148, "learning_rate": 1.7095851043457967e-05, "loss": 0.0679, "step": 26636 }, { "epoch": 0.47172893782624287, "grad_norm": 0.4136258065700531, "learning_rate": 1.7094999115095893e-05, "loss": 0.0585, "step": 26637 }, { "epoch": 0.4717466473632713, "grad_norm": 0.869662880897522, "learning_rate": 1.709414717984149e-05, "loss": 0.0888, "step": 26638 }, { "epoch": 0.4717643569002997, "grad_norm": 0.7654761672019958, "learning_rate": 1.709329523769755e-05, "loss": 0.0893, "step": 26639 }, { "epoch": 0.4717820664373282, "grad_norm": 0.28940823674201965, "learning_rate": 1.7092443288666884e-05, "loss": 0.0609, "step": 26640 }, { "epoch": 0.47179977597435657, "grad_norm": 0.3816315233707428, "learning_rate": 1.709159133275229e-05, "loss": 0.0517, "step": 26641 }, { "epoch": 0.471817485511385, "grad_norm": 0.6212351322174072, "learning_rate": 1.709073936995657e-05, "loss": 0.0779, "step": 26642 }, { "epoch": 0.4718351950484134, "grad_norm": 0.6769042611122131, "learning_rate": 1.7089887400282528e-05, "loss": 0.0655, "step": 26643 }, { "epoch": 0.4718529045854419, "grad_norm": 0.8002088665962219, "learning_rate": 1.708903542373297e-05, "loss": 0.0848, "step": 26644 }, { "epoch": 0.4718706141224703, "grad_norm": 0.7797274589538574, "learning_rate": 1.7088183440310698e-05, "loss": 0.0715, "step": 26645 }, { "epoch": 0.47188832365949873, "grad_norm": 0.6509760022163391, "learning_rate": 1.7087331450018508e-05, "loss": 0.0687, "step": 26646 }, { "epoch": 0.4719060331965272, "grad_norm": 0.6656322479248047, "learning_rate": 1.7086479452859215e-05, "loss": 0.0667, "step": 26647 }, { "epoch": 0.4719237427335556, "grad_norm": 0.6840328574180603, "learning_rate": 1.708562744883561e-05, "loss": 0.0758, "step": 26648 }, { "epoch": 0.47194145227058404, "grad_norm": 0.7907149791717529, "learning_rate": 1.7084775437950512e-05, "loss": 0.0582, "step": 26649 }, { "epoch": 0.47195916180761244, "grad_norm": 0.6819541454315186, "learning_rate": 1.7083923420206703e-05, "loss": 0.0634, "step": 26650 }, { "epoch": 0.4719768713446409, "grad_norm": 0.8872793316841125, "learning_rate": 1.7083071395607002e-05, "loss": 0.0616, "step": 26651 }, { "epoch": 0.4719945808816693, "grad_norm": 0.3571566939353943, "learning_rate": 1.7082219364154205e-05, "loss": 0.0453, "step": 26652 }, { "epoch": 0.47201229041869774, "grad_norm": 0.8042591214179993, "learning_rate": 1.7081367325851117e-05, "loss": 0.0597, "step": 26653 }, { "epoch": 0.47202999995572614, "grad_norm": 0.597953200340271, "learning_rate": 1.7080515280700547e-05, "loss": 0.0772, "step": 26654 }, { "epoch": 0.4720477094927546, "grad_norm": 0.8454446792602539, "learning_rate": 1.7079663228705283e-05, "loss": 0.0933, "step": 26655 }, { "epoch": 0.472065419029783, "grad_norm": 0.5489445924758911, "learning_rate": 1.707881116986814e-05, "loss": 0.078, "step": 26656 }, { "epoch": 0.47208312856681145, "grad_norm": 0.6701823472976685, "learning_rate": 1.707795910419192e-05, "loss": 0.0642, "step": 26657 }, { "epoch": 0.47210083810383985, "grad_norm": 0.8838431239128113, "learning_rate": 1.7077107031679424e-05, "loss": 0.0683, "step": 26658 }, { "epoch": 0.4721185476408683, "grad_norm": 0.506312370300293, "learning_rate": 1.7076254952333453e-05, "loss": 0.0705, "step": 26659 }, { "epoch": 0.4721362571778967, "grad_norm": 0.5886541604995728, "learning_rate": 1.7075402866156818e-05, "loss": 0.0824, "step": 26660 }, { "epoch": 0.47215396671492516, "grad_norm": 0.9915239214897156, "learning_rate": 1.707455077315232e-05, "loss": 0.089, "step": 26661 }, { "epoch": 0.4721716762519536, "grad_norm": 0.94798344373703, "learning_rate": 1.707369867332275e-05, "loss": 0.0817, "step": 26662 }, { "epoch": 0.472189385788982, "grad_norm": 0.5818092823028564, "learning_rate": 1.707284656667093e-05, "loss": 0.0793, "step": 26663 }, { "epoch": 0.47220709532601046, "grad_norm": 0.5913466811180115, "learning_rate": 1.707199445319965e-05, "loss": 0.0589, "step": 26664 }, { "epoch": 0.47222480486303886, "grad_norm": 0.6784542798995972, "learning_rate": 1.7071142332911716e-05, "loss": 0.0872, "step": 26665 }, { "epoch": 0.4722425144000673, "grad_norm": 0.44822898507118225, "learning_rate": 1.7070290205809935e-05, "loss": 0.0803, "step": 26666 }, { "epoch": 0.4722602239370957, "grad_norm": 0.7175621390342712, "learning_rate": 1.7069438071897112e-05, "loss": 0.0671, "step": 26667 }, { "epoch": 0.47227793347412417, "grad_norm": 0.7214211821556091, "learning_rate": 1.706858593117605e-05, "loss": 0.0576, "step": 26668 }, { "epoch": 0.47229564301115257, "grad_norm": 0.6828007102012634, "learning_rate": 1.7067733783649542e-05, "loss": 0.0679, "step": 26669 }, { "epoch": 0.472313352548181, "grad_norm": 0.7728890776634216, "learning_rate": 1.7066881629320396e-05, "loss": 0.0868, "step": 26670 }, { "epoch": 0.4723310620852094, "grad_norm": 0.9640800952911377, "learning_rate": 1.7066029468191426e-05, "loss": 0.081, "step": 26671 }, { "epoch": 0.4723487716222379, "grad_norm": 1.1740974187850952, "learning_rate": 1.7065177300265425e-05, "loss": 0.1102, "step": 26672 }, { "epoch": 0.47236648115926627, "grad_norm": 0.5954418182373047, "learning_rate": 1.7064325125545198e-05, "loss": 0.0579, "step": 26673 }, { "epoch": 0.4723841906962947, "grad_norm": 0.7497156858444214, "learning_rate": 1.7063472944033555e-05, "loss": 0.0665, "step": 26674 }, { "epoch": 0.4724019002333231, "grad_norm": 0.6937122344970703, "learning_rate": 1.7062620755733288e-05, "loss": 0.0674, "step": 26675 }, { "epoch": 0.4724196097703516, "grad_norm": 0.944761335849762, "learning_rate": 1.7061768560647217e-05, "loss": 0.08, "step": 26676 }, { "epoch": 0.47243731930738003, "grad_norm": 0.7782578468322754, "learning_rate": 1.706091635877813e-05, "loss": 0.0916, "step": 26677 }, { "epoch": 0.47245502884440843, "grad_norm": 0.6381303668022156, "learning_rate": 1.7060064150128836e-05, "loss": 0.0619, "step": 26678 }, { "epoch": 0.4724727383814369, "grad_norm": 0.8446165323257446, "learning_rate": 1.7059211934702142e-05, "loss": 0.0561, "step": 26679 }, { "epoch": 0.4724904479184653, "grad_norm": 0.6362038850784302, "learning_rate": 1.7058359712500846e-05, "loss": 0.0622, "step": 26680 }, { "epoch": 0.47250815745549374, "grad_norm": 0.47955742478370667, "learning_rate": 1.7057507483527758e-05, "loss": 0.0814, "step": 26681 }, { "epoch": 0.47252586699252214, "grad_norm": 1.0207165479660034, "learning_rate": 1.7056655247785674e-05, "loss": 0.1047, "step": 26682 }, { "epoch": 0.4725435765295506, "grad_norm": 0.7173260450363159, "learning_rate": 1.7055803005277407e-05, "loss": 0.1118, "step": 26683 }, { "epoch": 0.472561286066579, "grad_norm": 0.967584490776062, "learning_rate": 1.705495075600575e-05, "loss": 0.081, "step": 26684 }, { "epoch": 0.47257899560360744, "grad_norm": 0.7368069887161255, "learning_rate": 1.7054098499973517e-05, "loss": 0.0845, "step": 26685 }, { "epoch": 0.47259670514063584, "grad_norm": 0.46261242032051086, "learning_rate": 1.705324623718351e-05, "loss": 0.0335, "step": 26686 }, { "epoch": 0.4726144146776643, "grad_norm": 1.0303711891174316, "learning_rate": 1.7052393967638527e-05, "loss": 0.1199, "step": 26687 }, { "epoch": 0.4726321242146927, "grad_norm": 0.582502543926239, "learning_rate": 1.705154169134138e-05, "loss": 0.0719, "step": 26688 }, { "epoch": 0.47264983375172115, "grad_norm": 0.560295581817627, "learning_rate": 1.7050689408294864e-05, "loss": 0.0952, "step": 26689 }, { "epoch": 0.47266754328874955, "grad_norm": 0.5877725481987, "learning_rate": 1.7049837118501785e-05, "loss": 0.0594, "step": 26690 }, { "epoch": 0.472685252825778, "grad_norm": 0.709559440612793, "learning_rate": 1.704898482196495e-05, "loss": 0.0582, "step": 26691 }, { "epoch": 0.47270296236280646, "grad_norm": 0.4168396294116974, "learning_rate": 1.704813251868717e-05, "loss": 0.0458, "step": 26692 }, { "epoch": 0.47272067189983485, "grad_norm": 1.0546174049377441, "learning_rate": 1.7047280208671236e-05, "loss": 0.0752, "step": 26693 }, { "epoch": 0.4727383814368633, "grad_norm": 0.670077919960022, "learning_rate": 1.704642789191995e-05, "loss": 0.0843, "step": 26694 }, { "epoch": 0.4727560909738917, "grad_norm": 0.3467196226119995, "learning_rate": 1.7045575568436133e-05, "loss": 0.0964, "step": 26695 }, { "epoch": 0.47277380051092016, "grad_norm": 1.1725198030471802, "learning_rate": 1.7044723238222574e-05, "loss": 0.0851, "step": 26696 }, { "epoch": 0.47279151004794856, "grad_norm": 0.5346852540969849, "learning_rate": 1.704387090128209e-05, "loss": 0.0575, "step": 26697 }, { "epoch": 0.472809219584977, "grad_norm": 0.8167245388031006, "learning_rate": 1.704301855761747e-05, "loss": 0.0713, "step": 26698 }, { "epoch": 0.4728269291220054, "grad_norm": 0.6851617097854614, "learning_rate": 1.7042166207231528e-05, "loss": 0.0677, "step": 26699 }, { "epoch": 0.47284463865903387, "grad_norm": 0.4851799011230469, "learning_rate": 1.7041313850127065e-05, "loss": 0.0658, "step": 26700 }, { "epoch": 0.47286234819606227, "grad_norm": 0.8267085552215576, "learning_rate": 1.7040461486306888e-05, "loss": 0.0514, "step": 26701 }, { "epoch": 0.4728800577330907, "grad_norm": 1.0308371782302856, "learning_rate": 1.7039609115773798e-05, "loss": 0.0726, "step": 26702 }, { "epoch": 0.4728977672701191, "grad_norm": 0.4408184289932251, "learning_rate": 1.70387567385306e-05, "loss": 0.0636, "step": 26703 }, { "epoch": 0.4729154768071476, "grad_norm": 0.5970065593719482, "learning_rate": 1.7037904354580093e-05, "loss": 0.0687, "step": 26704 }, { "epoch": 0.47293318634417597, "grad_norm": 0.8780743479728699, "learning_rate": 1.7037051963925096e-05, "loss": 0.0694, "step": 26705 }, { "epoch": 0.4729508958812044, "grad_norm": 1.0399401187896729, "learning_rate": 1.70361995665684e-05, "loss": 0.0888, "step": 26706 }, { "epoch": 0.4729686054182329, "grad_norm": 0.7275312542915344, "learning_rate": 1.7035347162512817e-05, "loss": 0.0781, "step": 26707 }, { "epoch": 0.4729863149552613, "grad_norm": 0.6656326651573181, "learning_rate": 1.703449475176114e-05, "loss": 0.0917, "step": 26708 }, { "epoch": 0.47300402449228973, "grad_norm": 1.0481032133102417, "learning_rate": 1.7033642334316187e-05, "loss": 0.1023, "step": 26709 }, { "epoch": 0.47302173402931813, "grad_norm": 0.7197338342666626, "learning_rate": 1.703278991018075e-05, "loss": 0.0611, "step": 26710 }, { "epoch": 0.4730394435663466, "grad_norm": 0.7998692393302917, "learning_rate": 1.703193747935765e-05, "loss": 0.0747, "step": 26711 }, { "epoch": 0.473057153103375, "grad_norm": 0.5349118709564209, "learning_rate": 1.703108504184967e-05, "loss": 0.0467, "step": 26712 }, { "epoch": 0.47307486264040344, "grad_norm": 0.6206893920898438, "learning_rate": 1.7030232597659633e-05, "loss": 0.0605, "step": 26713 }, { "epoch": 0.47309257217743184, "grad_norm": 0.4163625240325928, "learning_rate": 1.7029380146790334e-05, "loss": 0.0653, "step": 26714 }, { "epoch": 0.4731102817144603, "grad_norm": 0.4019698202610016, "learning_rate": 1.7028527689244585e-05, "loss": 0.0803, "step": 26715 }, { "epoch": 0.4731279912514887, "grad_norm": 0.8247199058532715, "learning_rate": 1.702767522502518e-05, "loss": 0.0695, "step": 26716 }, { "epoch": 0.47314570078851714, "grad_norm": 0.7345554828643799, "learning_rate": 1.7026822754134925e-05, "loss": 0.0842, "step": 26717 }, { "epoch": 0.47316341032554554, "grad_norm": 0.6143868565559387, "learning_rate": 1.702597027657663e-05, "loss": 0.0634, "step": 26718 }, { "epoch": 0.473181119862574, "grad_norm": 1.0760759115219116, "learning_rate": 1.7025117792353102e-05, "loss": 0.1212, "step": 26719 }, { "epoch": 0.4731988293996024, "grad_norm": 0.7685323357582092, "learning_rate": 1.702426530146714e-05, "loss": 0.0611, "step": 26720 }, { "epoch": 0.47321653893663085, "grad_norm": 0.7284489274024963, "learning_rate": 1.7023412803921545e-05, "loss": 0.0612, "step": 26721 }, { "epoch": 0.4732342484736593, "grad_norm": 0.539334237575531, "learning_rate": 1.7022560299719135e-05, "loss": 0.1009, "step": 26722 }, { "epoch": 0.4732519580106877, "grad_norm": 0.3887122571468353, "learning_rate": 1.70217077888627e-05, "loss": 0.0698, "step": 26723 }, { "epoch": 0.47326966754771616, "grad_norm": 0.532334566116333, "learning_rate": 1.702085527135505e-05, "loss": 0.0675, "step": 26724 }, { "epoch": 0.47328737708474455, "grad_norm": 0.6998949646949768, "learning_rate": 1.7020002747198996e-05, "loss": 0.1111, "step": 26725 }, { "epoch": 0.473305086621773, "grad_norm": 0.5297340154647827, "learning_rate": 1.7019150216397332e-05, "loss": 0.0766, "step": 26726 }, { "epoch": 0.4733227961588014, "grad_norm": 0.5658103823661804, "learning_rate": 1.701829767895287e-05, "loss": 0.063, "step": 26727 }, { "epoch": 0.47334050569582986, "grad_norm": 0.6428636908531189, "learning_rate": 1.701744513486841e-05, "loss": 0.0938, "step": 26728 }, { "epoch": 0.47335821523285826, "grad_norm": 0.3664032816886902, "learning_rate": 1.701659258414677e-05, "loss": 0.0673, "step": 26729 }, { "epoch": 0.4733759247698867, "grad_norm": 0.43363893032073975, "learning_rate": 1.701574002679073e-05, "loss": 0.0722, "step": 26730 }, { "epoch": 0.4733936343069151, "grad_norm": 0.6528198719024658, "learning_rate": 1.7014887462803122e-05, "loss": 0.088, "step": 26731 }, { "epoch": 0.47341134384394357, "grad_norm": 0.47563087940216064, "learning_rate": 1.701403489218673e-05, "loss": 0.0701, "step": 26732 }, { "epoch": 0.47342905338097196, "grad_norm": 0.524013876914978, "learning_rate": 1.7013182314944366e-05, "loss": 0.0854, "step": 26733 }, { "epoch": 0.4734467629180004, "grad_norm": 1.2210631370544434, "learning_rate": 1.7012329731078842e-05, "loss": 0.0839, "step": 26734 }, { "epoch": 0.4734644724550288, "grad_norm": 0.5262378454208374, "learning_rate": 1.7011477140592953e-05, "loss": 0.0837, "step": 26735 }, { "epoch": 0.47348218199205727, "grad_norm": 0.7080176472663879, "learning_rate": 1.701062454348951e-05, "loss": 0.0741, "step": 26736 }, { "epoch": 0.4734998915290857, "grad_norm": 0.5756261348724365, "learning_rate": 1.7009771939771307e-05, "loss": 0.0695, "step": 26737 }, { "epoch": 0.4735176010661141, "grad_norm": 0.6771136522293091, "learning_rate": 1.7008919329441167e-05, "loss": 0.0871, "step": 26738 }, { "epoch": 0.4735353106031426, "grad_norm": 1.0637216567993164, "learning_rate": 1.7008066712501887e-05, "loss": 0.0962, "step": 26739 }, { "epoch": 0.473553020140171, "grad_norm": 0.7366620302200317, "learning_rate": 1.7007214088956264e-05, "loss": 0.0568, "step": 26740 }, { "epoch": 0.47357072967719943, "grad_norm": 0.4534119963645935, "learning_rate": 1.7006361458807112e-05, "loss": 0.0592, "step": 26741 }, { "epoch": 0.47358843921422783, "grad_norm": 0.48351815342903137, "learning_rate": 1.7005508822057232e-05, "loss": 0.0833, "step": 26742 }, { "epoch": 0.4736061487512563, "grad_norm": 0.8646671175956726, "learning_rate": 1.7004656178709433e-05, "loss": 0.0607, "step": 26743 }, { "epoch": 0.4736238582882847, "grad_norm": 0.5960408449172974, "learning_rate": 1.7003803528766517e-05, "loss": 0.0575, "step": 26744 }, { "epoch": 0.47364156782531314, "grad_norm": 0.9894034266471863, "learning_rate": 1.7002950872231293e-05, "loss": 0.0794, "step": 26745 }, { "epoch": 0.47365927736234154, "grad_norm": 0.6051294803619385, "learning_rate": 1.7002098209106562e-05, "loss": 0.0486, "step": 26746 }, { "epoch": 0.47367698689937, "grad_norm": 0.6398322582244873, "learning_rate": 1.7001245539395127e-05, "loss": 0.1147, "step": 26747 }, { "epoch": 0.4736946964363984, "grad_norm": 0.6318168044090271, "learning_rate": 1.7000392863099802e-05, "loss": 0.1294, "step": 26748 }, { "epoch": 0.47371240597342684, "grad_norm": 0.4215238690376282, "learning_rate": 1.699954018022338e-05, "loss": 0.0652, "step": 26749 }, { "epoch": 0.47373011551045524, "grad_norm": 0.7768449783325195, "learning_rate": 1.6998687490768682e-05, "loss": 0.0605, "step": 26750 }, { "epoch": 0.4737478250474837, "grad_norm": 0.5711542367935181, "learning_rate": 1.6997834794738497e-05, "loss": 0.0515, "step": 26751 }, { "epoch": 0.47376553458451215, "grad_norm": 0.9776968359947205, "learning_rate": 1.699698209213564e-05, "loss": 0.1131, "step": 26752 }, { "epoch": 0.47378324412154055, "grad_norm": 0.7933584451675415, "learning_rate": 1.6996129382962912e-05, "loss": 0.0696, "step": 26753 }, { "epoch": 0.473800953658569, "grad_norm": 0.3521077632904053, "learning_rate": 1.699527666722312e-05, "loss": 0.0522, "step": 26754 }, { "epoch": 0.4738186631955974, "grad_norm": 0.7518221139907837, "learning_rate": 1.6994423944919077e-05, "loss": 0.0731, "step": 26755 }, { "epoch": 0.47383637273262585, "grad_norm": 0.8433693647384644, "learning_rate": 1.699357121605357e-05, "loss": 0.078, "step": 26756 }, { "epoch": 0.47385408226965425, "grad_norm": 0.7644814252853394, "learning_rate": 1.6992718480629422e-05, "loss": 0.0765, "step": 26757 }, { "epoch": 0.4738717918066827, "grad_norm": 0.6180313229560852, "learning_rate": 1.699186573864943e-05, "loss": 0.0655, "step": 26758 }, { "epoch": 0.4738895013437111, "grad_norm": 1.338590145111084, "learning_rate": 1.6991012990116402e-05, "loss": 0.0861, "step": 26759 }, { "epoch": 0.47390721088073956, "grad_norm": 0.5635085701942444, "learning_rate": 1.6990160235033143e-05, "loss": 0.0442, "step": 26760 }, { "epoch": 0.47392492041776796, "grad_norm": 0.9319465160369873, "learning_rate": 1.6989307473402458e-05, "loss": 0.1053, "step": 26761 }, { "epoch": 0.4739426299547964, "grad_norm": 0.5964182019233704, "learning_rate": 1.698845470522715e-05, "loss": 0.051, "step": 26762 }, { "epoch": 0.4739603394918248, "grad_norm": 0.5556520819664001, "learning_rate": 1.6987601930510032e-05, "loss": 0.0667, "step": 26763 }, { "epoch": 0.47397804902885327, "grad_norm": 0.5091248154640198, "learning_rate": 1.6986749149253902e-05, "loss": 0.0553, "step": 26764 }, { "epoch": 0.47399575856588166, "grad_norm": 0.7014707326889038, "learning_rate": 1.6985896361461568e-05, "loss": 0.0739, "step": 26765 }, { "epoch": 0.4740134681029101, "grad_norm": 0.4812801480293274, "learning_rate": 1.6985043567135835e-05, "loss": 0.0523, "step": 26766 }, { "epoch": 0.4740311776399386, "grad_norm": 0.4858182668685913, "learning_rate": 1.6984190766279508e-05, "loss": 0.0626, "step": 26767 }, { "epoch": 0.47404888717696697, "grad_norm": 0.8781874775886536, "learning_rate": 1.6983337958895396e-05, "loss": 0.0806, "step": 26768 }, { "epoch": 0.4740665967139954, "grad_norm": 0.6963638067245483, "learning_rate": 1.6982485144986305e-05, "loss": 0.0886, "step": 26769 }, { "epoch": 0.4740843062510238, "grad_norm": 0.3149701952934265, "learning_rate": 1.6981632324555034e-05, "loss": 0.0712, "step": 26770 }, { "epoch": 0.4741020157880523, "grad_norm": 0.7221413850784302, "learning_rate": 1.69807794976044e-05, "loss": 0.0468, "step": 26771 }, { "epoch": 0.4741197253250807, "grad_norm": 1.2764676809310913, "learning_rate": 1.6979926664137196e-05, "loss": 0.092, "step": 26772 }, { "epoch": 0.47413743486210913, "grad_norm": 0.8247042894363403, "learning_rate": 1.697907382415624e-05, "loss": 0.0827, "step": 26773 }, { "epoch": 0.47415514439913753, "grad_norm": 0.6085209846496582, "learning_rate": 1.6978220977664324e-05, "loss": 0.0786, "step": 26774 }, { "epoch": 0.474172853936166, "grad_norm": 0.679920494556427, "learning_rate": 1.697736812466426e-05, "loss": 0.0693, "step": 26775 }, { "epoch": 0.4741905634731944, "grad_norm": 0.9107692241668701, "learning_rate": 1.6976515265158857e-05, "loss": 0.0554, "step": 26776 }, { "epoch": 0.47420827301022284, "grad_norm": 0.2883157730102539, "learning_rate": 1.6975662399150924e-05, "loss": 0.0554, "step": 26777 }, { "epoch": 0.47422598254725123, "grad_norm": 0.5499504804611206, "learning_rate": 1.6974809526643257e-05, "loss": 0.0876, "step": 26778 }, { "epoch": 0.4742436920842797, "grad_norm": 0.82182377576828, "learning_rate": 1.6973956647638664e-05, "loss": 0.104, "step": 26779 }, { "epoch": 0.4742614016213081, "grad_norm": 0.8102405071258545, "learning_rate": 1.6973103762139958e-05, "loss": 0.04, "step": 26780 }, { "epoch": 0.47427911115833654, "grad_norm": 0.8707457780838013, "learning_rate": 1.6972250870149937e-05, "loss": 0.0497, "step": 26781 }, { "epoch": 0.474296820695365, "grad_norm": 0.4341297745704651, "learning_rate": 1.6971397971671413e-05, "loss": 0.0548, "step": 26782 }, { "epoch": 0.4743145302323934, "grad_norm": 0.6729600429534912, "learning_rate": 1.6970545066707185e-05, "loss": 0.0964, "step": 26783 }, { "epoch": 0.47433223976942185, "grad_norm": 0.7733837366104126, "learning_rate": 1.6969692155260068e-05, "loss": 0.0909, "step": 26784 }, { "epoch": 0.47434994930645025, "grad_norm": 0.6306964159011841, "learning_rate": 1.6968839237332858e-05, "loss": 0.0902, "step": 26785 }, { "epoch": 0.4743676588434787, "grad_norm": 0.8494129776954651, "learning_rate": 1.6967986312928366e-05, "loss": 0.0721, "step": 26786 }, { "epoch": 0.4743853683805071, "grad_norm": 0.6590760946273804, "learning_rate": 1.6967133382049406e-05, "loss": 0.0468, "step": 26787 }, { "epoch": 0.47440307791753555, "grad_norm": 0.3757898211479187, "learning_rate": 1.696628044469877e-05, "loss": 0.0523, "step": 26788 }, { "epoch": 0.47442078745456395, "grad_norm": 0.477253794670105, "learning_rate": 1.696542750087927e-05, "loss": 0.0673, "step": 26789 }, { "epoch": 0.4744384969915924, "grad_norm": 0.4620674252510071, "learning_rate": 1.6964574550593712e-05, "loss": 0.0602, "step": 26790 }, { "epoch": 0.4744562065286208, "grad_norm": 0.30449149012565613, "learning_rate": 1.6963721593844905e-05, "loss": 0.0738, "step": 26791 }, { "epoch": 0.47447391606564926, "grad_norm": 0.4448125660419464, "learning_rate": 1.6962868630635652e-05, "loss": 0.08, "step": 26792 }, { "epoch": 0.47449162560267766, "grad_norm": 0.609289824962616, "learning_rate": 1.696201566096876e-05, "loss": 0.0785, "step": 26793 }, { "epoch": 0.4745093351397061, "grad_norm": 0.6070806384086609, "learning_rate": 1.696116268484703e-05, "loss": 0.0686, "step": 26794 }, { "epoch": 0.4745270446767345, "grad_norm": 0.7135379314422607, "learning_rate": 1.6960309702273277e-05, "loss": 0.0707, "step": 26795 }, { "epoch": 0.47454475421376296, "grad_norm": 0.48696064949035645, "learning_rate": 1.6959456713250306e-05, "loss": 0.0652, "step": 26796 }, { "epoch": 0.4745624637507914, "grad_norm": 0.5265219211578369, "learning_rate": 1.695860371778092e-05, "loss": 0.0708, "step": 26797 }, { "epoch": 0.4745801732878198, "grad_norm": 0.7574750185012817, "learning_rate": 1.6957750715867926e-05, "loss": 0.0634, "step": 26798 }, { "epoch": 0.47459788282484827, "grad_norm": 0.7937036156654358, "learning_rate": 1.6956897707514124e-05, "loss": 0.0696, "step": 26799 }, { "epoch": 0.47461559236187667, "grad_norm": 0.5156676769256592, "learning_rate": 1.695604469272233e-05, "loss": 0.0561, "step": 26800 }, { "epoch": 0.4746333018989051, "grad_norm": 0.7774897813796997, "learning_rate": 1.6955191671495346e-05, "loss": 0.0872, "step": 26801 }, { "epoch": 0.4746510114359335, "grad_norm": 0.5731139183044434, "learning_rate": 1.6954338643835986e-05, "loss": 0.0628, "step": 26802 }, { "epoch": 0.474668720972962, "grad_norm": 0.8077001571655273, "learning_rate": 1.695348560974704e-05, "loss": 0.0791, "step": 26803 }, { "epoch": 0.4746864305099904, "grad_norm": 0.47071000933647156, "learning_rate": 1.6952632569231326e-05, "loss": 0.0575, "step": 26804 }, { "epoch": 0.47470414004701883, "grad_norm": 0.8536093831062317, "learning_rate": 1.6951779522291654e-05, "loss": 0.0803, "step": 26805 }, { "epoch": 0.47472184958404723, "grad_norm": 0.5427111983299255, "learning_rate": 1.6950926468930818e-05, "loss": 0.0692, "step": 26806 }, { "epoch": 0.4747395591210757, "grad_norm": 0.3855239748954773, "learning_rate": 1.6950073409151637e-05, "loss": 0.0787, "step": 26807 }, { "epoch": 0.4747572686581041, "grad_norm": 0.5583474040031433, "learning_rate": 1.694922034295691e-05, "loss": 0.0548, "step": 26808 }, { "epoch": 0.47477497819513254, "grad_norm": 0.8480715155601501, "learning_rate": 1.6948367270349442e-05, "loss": 0.0901, "step": 26809 }, { "epoch": 0.474792687732161, "grad_norm": 0.6073962450027466, "learning_rate": 1.6947514191332046e-05, "loss": 0.0437, "step": 26810 }, { "epoch": 0.4748103972691894, "grad_norm": 0.47513529658317566, "learning_rate": 1.6946661105907523e-05, "loss": 0.0453, "step": 26811 }, { "epoch": 0.47482810680621784, "grad_norm": 0.8240795135498047, "learning_rate": 1.6945808014078685e-05, "loss": 0.0941, "step": 26812 }, { "epoch": 0.47484581634324624, "grad_norm": 0.49337586760520935, "learning_rate": 1.6944954915848335e-05, "loss": 0.0785, "step": 26813 }, { "epoch": 0.4748635258802747, "grad_norm": 0.5565456748008728, "learning_rate": 1.6944101811219275e-05, "loss": 0.0594, "step": 26814 }, { "epoch": 0.4748812354173031, "grad_norm": 0.5260483622550964, "learning_rate": 1.6943248700194323e-05, "loss": 0.0457, "step": 26815 }, { "epoch": 0.47489894495433155, "grad_norm": 0.4477677047252655, "learning_rate": 1.6942395582776274e-05, "loss": 0.0682, "step": 26816 }, { "epoch": 0.47491665449135995, "grad_norm": 0.675762951374054, "learning_rate": 1.6941542458967945e-05, "loss": 0.0712, "step": 26817 }, { "epoch": 0.4749343640283884, "grad_norm": 0.8271118998527527, "learning_rate": 1.6940689328772134e-05, "loss": 0.0997, "step": 26818 }, { "epoch": 0.4749520735654168, "grad_norm": 0.6199427843093872, "learning_rate": 1.6939836192191656e-05, "loss": 0.0538, "step": 26819 }, { "epoch": 0.47496978310244525, "grad_norm": 0.6397281885147095, "learning_rate": 1.6938983049229306e-05, "loss": 0.0648, "step": 26820 }, { "epoch": 0.47498749263947365, "grad_norm": 0.610341489315033, "learning_rate": 1.6938129899887907e-05, "loss": 0.0843, "step": 26821 }, { "epoch": 0.4750052021765021, "grad_norm": 0.6701176762580872, "learning_rate": 1.6937276744170248e-05, "loss": 0.0807, "step": 26822 }, { "epoch": 0.4750229117135305, "grad_norm": 0.8005309700965881, "learning_rate": 1.6936423582079146e-05, "loss": 0.086, "step": 26823 }, { "epoch": 0.47504062125055896, "grad_norm": 0.7029657959938049, "learning_rate": 1.693557041361741e-05, "loss": 0.062, "step": 26824 }, { "epoch": 0.4750583307875874, "grad_norm": 0.6019193530082703, "learning_rate": 1.6934717238787843e-05, "loss": 0.0769, "step": 26825 }, { "epoch": 0.4750760403246158, "grad_norm": 0.7000279426574707, "learning_rate": 1.693386405759325e-05, "loss": 0.0867, "step": 26826 }, { "epoch": 0.47509374986164427, "grad_norm": 0.9381222724914551, "learning_rate": 1.6933010870036437e-05, "loss": 0.0871, "step": 26827 }, { "epoch": 0.47511145939867266, "grad_norm": 0.44993001222610474, "learning_rate": 1.6932157676120217e-05, "loss": 0.0735, "step": 26828 }, { "epoch": 0.4751291689357011, "grad_norm": 0.9305257797241211, "learning_rate": 1.693130447584739e-05, "loss": 0.0748, "step": 26829 }, { "epoch": 0.4751468784727295, "grad_norm": 0.8310249447822571, "learning_rate": 1.6930451269220775e-05, "loss": 0.0744, "step": 26830 }, { "epoch": 0.47516458800975797, "grad_norm": 0.6071791648864746, "learning_rate": 1.6929598056243166e-05, "loss": 0.0807, "step": 26831 }, { "epoch": 0.47518229754678637, "grad_norm": 0.5622352957725525, "learning_rate": 1.692874483691737e-05, "loss": 0.0779, "step": 26832 }, { "epoch": 0.4752000070838148, "grad_norm": 0.6474803686141968, "learning_rate": 1.69278916112462e-05, "loss": 0.052, "step": 26833 }, { "epoch": 0.4752177166208432, "grad_norm": 0.5616136789321899, "learning_rate": 1.692703837923247e-05, "loss": 0.0659, "step": 26834 }, { "epoch": 0.4752354261578717, "grad_norm": 0.7204155921936035, "learning_rate": 1.6926185140878977e-05, "loss": 0.0536, "step": 26835 }, { "epoch": 0.4752531356949001, "grad_norm": 0.7231724262237549, "learning_rate": 1.6925331896188522e-05, "loss": 0.0916, "step": 26836 }, { "epoch": 0.47527084523192853, "grad_norm": 0.5137701630592346, "learning_rate": 1.6924478645163922e-05, "loss": 0.0909, "step": 26837 }, { "epoch": 0.47528855476895693, "grad_norm": 0.6505580544471741, "learning_rate": 1.6923625387807984e-05, "loss": 0.059, "step": 26838 }, { "epoch": 0.4753062643059854, "grad_norm": 0.5567837953567505, "learning_rate": 1.6922772124123514e-05, "loss": 0.0903, "step": 26839 }, { "epoch": 0.47532397384301384, "grad_norm": 0.5969358086585999, "learning_rate": 1.6921918854113317e-05, "loss": 0.0981, "step": 26840 }, { "epoch": 0.47534168338004223, "grad_norm": 0.5070409178733826, "learning_rate": 1.69210655777802e-05, "loss": 0.0555, "step": 26841 }, { "epoch": 0.4753593929170707, "grad_norm": 0.7277452945709229, "learning_rate": 1.6920212295126974e-05, "loss": 0.0949, "step": 26842 }, { "epoch": 0.4753771024540991, "grad_norm": 0.42442673444747925, "learning_rate": 1.6919359006156443e-05, "loss": 0.0708, "step": 26843 }, { "epoch": 0.47539481199112754, "grad_norm": 0.6556652784347534, "learning_rate": 1.6918505710871416e-05, "loss": 0.071, "step": 26844 }, { "epoch": 0.47541252152815594, "grad_norm": 0.8931788206100464, "learning_rate": 1.6917652409274693e-05, "loss": 0.0573, "step": 26845 }, { "epoch": 0.4754302310651844, "grad_norm": 0.7475603222846985, "learning_rate": 1.6916799101369096e-05, "loss": 0.0986, "step": 26846 }, { "epoch": 0.4754479406022128, "grad_norm": 0.9399808645248413, "learning_rate": 1.6915945787157413e-05, "loss": 0.0917, "step": 26847 }, { "epoch": 0.47546565013924125, "grad_norm": 0.5127321481704712, "learning_rate": 1.6915092466642473e-05, "loss": 0.061, "step": 26848 }, { "epoch": 0.47548335967626965, "grad_norm": 0.8564659357070923, "learning_rate": 1.691423913982707e-05, "loss": 0.0957, "step": 26849 }, { "epoch": 0.4755010692132981, "grad_norm": 0.7293154001235962, "learning_rate": 1.691338580671401e-05, "loss": 0.0809, "step": 26850 }, { "epoch": 0.4755187787503265, "grad_norm": 0.8169857859611511, "learning_rate": 1.691253246730611e-05, "loss": 0.0631, "step": 26851 }, { "epoch": 0.47553648828735495, "grad_norm": 0.8524863123893738, "learning_rate": 1.6911679121606167e-05, "loss": 0.0857, "step": 26852 }, { "epoch": 0.47555419782438335, "grad_norm": 0.7366263270378113, "learning_rate": 1.6910825769616995e-05, "loss": 0.0706, "step": 26853 }, { "epoch": 0.4755719073614118, "grad_norm": 0.3546257019042969, "learning_rate": 1.6909972411341397e-05, "loss": 0.0436, "step": 26854 }, { "epoch": 0.47558961689844026, "grad_norm": 0.7347310781478882, "learning_rate": 1.6909119046782188e-05, "loss": 0.0921, "step": 26855 }, { "epoch": 0.47560732643546866, "grad_norm": 0.7075917720794678, "learning_rate": 1.6908265675942165e-05, "loss": 0.0734, "step": 26856 }, { "epoch": 0.4756250359724971, "grad_norm": 0.9687082767486572, "learning_rate": 1.6907412298824143e-05, "loss": 0.0988, "step": 26857 }, { "epoch": 0.4756427455095255, "grad_norm": 0.5302174687385559, "learning_rate": 1.6906558915430932e-05, "loss": 0.1036, "step": 26858 }, { "epoch": 0.47566045504655396, "grad_norm": 0.7729274034500122, "learning_rate": 1.6905705525765328e-05, "loss": 0.0963, "step": 26859 }, { "epoch": 0.47567816458358236, "grad_norm": 0.6953893899917603, "learning_rate": 1.6904852129830147e-05, "loss": 0.057, "step": 26860 }, { "epoch": 0.4756958741206108, "grad_norm": 0.9809700846672058, "learning_rate": 1.6903998727628197e-05, "loss": 0.0916, "step": 26861 }, { "epoch": 0.4757135836576392, "grad_norm": 1.1066337823867798, "learning_rate": 1.690314531916228e-05, "loss": 0.0737, "step": 26862 }, { "epoch": 0.47573129319466767, "grad_norm": 0.7972615957260132, "learning_rate": 1.6902291904435213e-05, "loss": 0.078, "step": 26863 }, { "epoch": 0.47574900273169607, "grad_norm": 0.5820022225379944, "learning_rate": 1.69014384834498e-05, "loss": 0.0734, "step": 26864 }, { "epoch": 0.4757667122687245, "grad_norm": 0.9929766654968262, "learning_rate": 1.6900585056208836e-05, "loss": 0.0531, "step": 26865 }, { "epoch": 0.4757844218057529, "grad_norm": 0.8446220755577087, "learning_rate": 1.6899731622715145e-05, "loss": 0.0836, "step": 26866 }, { "epoch": 0.4758021313427814, "grad_norm": 0.9214723706245422, "learning_rate": 1.6898878182971532e-05, "loss": 0.047, "step": 26867 }, { "epoch": 0.4758198408798098, "grad_norm": 0.6568761467933655, "learning_rate": 1.6898024736980798e-05, "loss": 0.0816, "step": 26868 }, { "epoch": 0.47583755041683823, "grad_norm": 0.904540479183197, "learning_rate": 1.6897171284745757e-05, "loss": 0.0626, "step": 26869 }, { "epoch": 0.4758552599538667, "grad_norm": 0.5797404050827026, "learning_rate": 1.6896317826269215e-05, "loss": 0.0536, "step": 26870 }, { "epoch": 0.4758729694908951, "grad_norm": 1.1187341213226318, "learning_rate": 1.6895464361553977e-05, "loss": 0.1063, "step": 26871 }, { "epoch": 0.47589067902792354, "grad_norm": 0.6754060983657837, "learning_rate": 1.689461089060285e-05, "loss": 0.0714, "step": 26872 }, { "epoch": 0.47590838856495193, "grad_norm": 0.4495438039302826, "learning_rate": 1.689375741341865e-05, "loss": 0.0848, "step": 26873 }, { "epoch": 0.4759260981019804, "grad_norm": 0.4895191788673401, "learning_rate": 1.6892903930004182e-05, "loss": 0.0769, "step": 26874 }, { "epoch": 0.4759438076390088, "grad_norm": 0.5725639462471008, "learning_rate": 1.6892050440362248e-05, "loss": 0.0651, "step": 26875 }, { "epoch": 0.47596151717603724, "grad_norm": 0.6138505935668945, "learning_rate": 1.6891196944495656e-05, "loss": 0.1046, "step": 26876 }, { "epoch": 0.47597922671306564, "grad_norm": 1.4880348443984985, "learning_rate": 1.689034344240722e-05, "loss": 0.097, "step": 26877 }, { "epoch": 0.4759969362500941, "grad_norm": 0.4617690443992615, "learning_rate": 1.688948993409975e-05, "loss": 0.0631, "step": 26878 }, { "epoch": 0.4760146457871225, "grad_norm": 0.37911322712898254, "learning_rate": 1.688863641957604e-05, "loss": 0.0561, "step": 26879 }, { "epoch": 0.47603235532415095, "grad_norm": 0.7755507826805115, "learning_rate": 1.6887782898838914e-05, "loss": 0.0616, "step": 26880 }, { "epoch": 0.47605006486117935, "grad_norm": 0.5803321599960327, "learning_rate": 1.6886929371891173e-05, "loss": 0.0631, "step": 26881 }, { "epoch": 0.4760677743982078, "grad_norm": 0.6189344525337219, "learning_rate": 1.688607583873562e-05, "loss": 0.0754, "step": 26882 }, { "epoch": 0.4760854839352362, "grad_norm": 0.743726909160614, "learning_rate": 1.6885222299375077e-05, "loss": 0.0799, "step": 26883 }, { "epoch": 0.47610319347226465, "grad_norm": 0.6499446034431458, "learning_rate": 1.6884368753812336e-05, "loss": 0.0829, "step": 26884 }, { "epoch": 0.4761209030092931, "grad_norm": 0.4807908535003662, "learning_rate": 1.6883515202050216e-05, "loss": 0.0646, "step": 26885 }, { "epoch": 0.4761386125463215, "grad_norm": 0.596176266670227, "learning_rate": 1.6882661644091514e-05, "loss": 0.0702, "step": 26886 }, { "epoch": 0.47615632208334996, "grad_norm": 0.47609415650367737, "learning_rate": 1.6881808079939056e-05, "loss": 0.0864, "step": 26887 }, { "epoch": 0.47617403162037836, "grad_norm": 0.8855457305908203, "learning_rate": 1.6880954509595632e-05, "loss": 0.0842, "step": 26888 }, { "epoch": 0.4761917411574068, "grad_norm": 0.736616313457489, "learning_rate": 1.6880100933064066e-05, "loss": 0.0706, "step": 26889 }, { "epoch": 0.4762094506944352, "grad_norm": 0.4250969886779785, "learning_rate": 1.687924735034715e-05, "loss": 0.0525, "step": 26890 }, { "epoch": 0.47622716023146366, "grad_norm": 0.9806066155433655, "learning_rate": 1.6878393761447703e-05, "loss": 0.0948, "step": 26891 }, { "epoch": 0.47624486976849206, "grad_norm": 0.45263397693634033, "learning_rate": 1.6877540166368535e-05, "loss": 0.0678, "step": 26892 }, { "epoch": 0.4762625793055205, "grad_norm": 0.3736223876476288, "learning_rate": 1.6876686565112443e-05, "loss": 0.0552, "step": 26893 }, { "epoch": 0.4762802888425489, "grad_norm": 0.864088237285614, "learning_rate": 1.687583295768225e-05, "loss": 0.0772, "step": 26894 }, { "epoch": 0.47629799837957737, "grad_norm": 0.8102930784225464, "learning_rate": 1.6874979344080744e-05, "loss": 0.0781, "step": 26895 }, { "epoch": 0.47631570791660577, "grad_norm": 0.380441278219223, "learning_rate": 1.6874125724310755e-05, "loss": 0.0385, "step": 26896 }, { "epoch": 0.4763334174536342, "grad_norm": 0.6825838088989258, "learning_rate": 1.687327209837508e-05, "loss": 0.0756, "step": 26897 }, { "epoch": 0.4763511269906626, "grad_norm": 0.8625710606575012, "learning_rate": 1.687241846627653e-05, "loss": 0.0719, "step": 26898 }, { "epoch": 0.4763688365276911, "grad_norm": 0.6005109548568726, "learning_rate": 1.6871564828017912e-05, "loss": 0.0737, "step": 26899 }, { "epoch": 0.47638654606471953, "grad_norm": 0.34806182980537415, "learning_rate": 1.6870711183602033e-05, "loss": 0.0601, "step": 26900 }, { "epoch": 0.47640425560174793, "grad_norm": 0.5952885746955872, "learning_rate": 1.6869857533031706e-05, "loss": 0.0787, "step": 26901 }, { "epoch": 0.4764219651387764, "grad_norm": 0.8017308115959167, "learning_rate": 1.6869003876309735e-05, "loss": 0.1323, "step": 26902 }, { "epoch": 0.4764396746758048, "grad_norm": 0.5715013146400452, "learning_rate": 1.6868150213438932e-05, "loss": 0.0984, "step": 26903 }, { "epoch": 0.47645738421283323, "grad_norm": 0.510073184967041, "learning_rate": 1.68672965444221e-05, "loss": 0.0714, "step": 26904 }, { "epoch": 0.47647509374986163, "grad_norm": 0.8315008282661438, "learning_rate": 1.6866442869262056e-05, "loss": 0.1154, "step": 26905 }, { "epoch": 0.4764928032868901, "grad_norm": 0.5293294191360474, "learning_rate": 1.6865589187961605e-05, "loss": 0.0532, "step": 26906 }, { "epoch": 0.4765105128239185, "grad_norm": 0.6290871500968933, "learning_rate": 1.686473550052355e-05, "loss": 0.0877, "step": 26907 }, { "epoch": 0.47652822236094694, "grad_norm": 0.7954078316688538, "learning_rate": 1.686388180695071e-05, "loss": 0.0862, "step": 26908 }, { "epoch": 0.47654593189797534, "grad_norm": 0.9292319416999817, "learning_rate": 1.686302810724587e-05, "loss": 0.0776, "step": 26909 }, { "epoch": 0.4765636414350038, "grad_norm": 0.807640552520752, "learning_rate": 1.6862174401411873e-05, "loss": 0.0846, "step": 26910 }, { "epoch": 0.4765813509720322, "grad_norm": 0.7074406743049622, "learning_rate": 1.6861320689451506e-05, "loss": 0.0481, "step": 26911 }, { "epoch": 0.47659906050906065, "grad_norm": 0.3623373508453369, "learning_rate": 1.686046697136758e-05, "loss": 0.0667, "step": 26912 }, { "epoch": 0.47661677004608904, "grad_norm": 0.7625654935836792, "learning_rate": 1.6859613247162912e-05, "loss": 0.0593, "step": 26913 }, { "epoch": 0.4766344795831175, "grad_norm": 0.7013476490974426, "learning_rate": 1.68587595168403e-05, "loss": 0.0814, "step": 26914 }, { "epoch": 0.47665218912014595, "grad_norm": 0.5612282156944275, "learning_rate": 1.6857905780402558e-05, "loss": 0.0851, "step": 26915 }, { "epoch": 0.47666989865717435, "grad_norm": 1.296341061592102, "learning_rate": 1.6857052037852493e-05, "loss": 0.069, "step": 26916 }, { "epoch": 0.4766876081942028, "grad_norm": 0.8200100064277649, "learning_rate": 1.685619828919292e-05, "loss": 0.0934, "step": 26917 }, { "epoch": 0.4767053177312312, "grad_norm": 0.5483032464981079, "learning_rate": 1.6855344534426634e-05, "loss": 0.0867, "step": 26918 }, { "epoch": 0.47672302726825966, "grad_norm": 0.7482990622520447, "learning_rate": 1.6854490773556453e-05, "loss": 0.0949, "step": 26919 }, { "epoch": 0.47674073680528806, "grad_norm": 1.1122441291809082, "learning_rate": 1.6853637006585185e-05, "loss": 0.0828, "step": 26920 }, { "epoch": 0.4767584463423165, "grad_norm": 0.7863752245903015, "learning_rate": 1.6852783233515646e-05, "loss": 0.0407, "step": 26921 }, { "epoch": 0.4767761558793449, "grad_norm": 0.5360297560691833, "learning_rate": 1.6851929454350634e-05, "loss": 0.0708, "step": 26922 }, { "epoch": 0.47679386541637336, "grad_norm": 0.7353922128677368, "learning_rate": 1.6851075669092962e-05, "loss": 0.0611, "step": 26923 }, { "epoch": 0.47681157495340176, "grad_norm": 0.4268275499343872, "learning_rate": 1.6850221877745434e-05, "loss": 0.0582, "step": 26924 }, { "epoch": 0.4768292844904302, "grad_norm": 0.7549803853034973, "learning_rate": 1.6849368080310864e-05, "loss": 0.0841, "step": 26925 }, { "epoch": 0.4768469940274586, "grad_norm": 0.5725405812263489, "learning_rate": 1.6848514276792066e-05, "loss": 0.0793, "step": 26926 }, { "epoch": 0.47686470356448707, "grad_norm": 0.6017592549324036, "learning_rate": 1.6847660467191836e-05, "loss": 0.0576, "step": 26927 }, { "epoch": 0.47688241310151547, "grad_norm": 0.5879706144332886, "learning_rate": 1.684680665151299e-05, "loss": 0.0757, "step": 26928 }, { "epoch": 0.4769001226385439, "grad_norm": 0.42866748571395874, "learning_rate": 1.6845952829758344e-05, "loss": 0.0616, "step": 26929 }, { "epoch": 0.4769178321755724, "grad_norm": 0.703880250453949, "learning_rate": 1.6845099001930693e-05, "loss": 0.0971, "step": 26930 }, { "epoch": 0.4769355417126008, "grad_norm": 1.8992090225219727, "learning_rate": 1.6844245168032858e-05, "loss": 0.0763, "step": 26931 }, { "epoch": 0.47695325124962923, "grad_norm": 1.777947187423706, "learning_rate": 1.684339132806764e-05, "loss": 0.1052, "step": 26932 }, { "epoch": 0.4769709607866576, "grad_norm": 0.6220934391021729, "learning_rate": 1.684253748203785e-05, "loss": 0.0835, "step": 26933 }, { "epoch": 0.4769886703236861, "grad_norm": 0.7916774749755859, "learning_rate": 1.6841683629946296e-05, "loss": 0.0803, "step": 26934 }, { "epoch": 0.4770063798607145, "grad_norm": 0.28767791390419006, "learning_rate": 1.6840829771795794e-05, "loss": 0.0561, "step": 26935 }, { "epoch": 0.47702408939774293, "grad_norm": 0.6158872246742249, "learning_rate": 1.683997590758915e-05, "loss": 0.0651, "step": 26936 }, { "epoch": 0.47704179893477133, "grad_norm": 0.5869369506835938, "learning_rate": 1.6839122037329165e-05, "loss": 0.0704, "step": 26937 }, { "epoch": 0.4770595084717998, "grad_norm": 0.7306830883026123, "learning_rate": 1.683826816101866e-05, "loss": 0.0829, "step": 26938 }, { "epoch": 0.4770772180088282, "grad_norm": 0.6647404432296753, "learning_rate": 1.6837414278660434e-05, "loss": 0.0649, "step": 26939 }, { "epoch": 0.47709492754585664, "grad_norm": 0.6963396072387695, "learning_rate": 1.6836560390257306e-05, "loss": 0.0739, "step": 26940 }, { "epoch": 0.47711263708288504, "grad_norm": 0.5185464024543762, "learning_rate": 1.6835706495812076e-05, "loss": 0.0561, "step": 26941 }, { "epoch": 0.4771303466199135, "grad_norm": 0.6762682795524597, "learning_rate": 1.6834852595327558e-05, "loss": 0.0668, "step": 26942 }, { "epoch": 0.4771480561569419, "grad_norm": 1.0561577081680298, "learning_rate": 1.683399868880656e-05, "loss": 0.0579, "step": 26943 }, { "epoch": 0.47716576569397035, "grad_norm": 0.8072631359100342, "learning_rate": 1.6833144776251896e-05, "loss": 0.0883, "step": 26944 }, { "epoch": 0.4771834752309988, "grad_norm": 0.6135969758033752, "learning_rate": 1.6832290857666366e-05, "loss": 0.0986, "step": 26945 }, { "epoch": 0.4772011847680272, "grad_norm": 0.6826710104942322, "learning_rate": 1.6831436933052787e-05, "loss": 0.0845, "step": 26946 }, { "epoch": 0.47721889430505565, "grad_norm": 0.45603954792022705, "learning_rate": 1.6830583002413964e-05, "loss": 0.0633, "step": 26947 }, { "epoch": 0.47723660384208405, "grad_norm": 0.5913527607917786, "learning_rate": 1.682972906575271e-05, "loss": 0.0776, "step": 26948 }, { "epoch": 0.4772543133791125, "grad_norm": 0.7247462868690491, "learning_rate": 1.6828875123071833e-05, "loss": 0.0887, "step": 26949 }, { "epoch": 0.4772720229161409, "grad_norm": 0.4446161985397339, "learning_rate": 1.682802117437414e-05, "loss": 0.0586, "step": 26950 }, { "epoch": 0.47728973245316936, "grad_norm": 0.9456869959831238, "learning_rate": 1.6827167219662444e-05, "loss": 0.0808, "step": 26951 }, { "epoch": 0.47730744199019776, "grad_norm": 0.8117630481719971, "learning_rate": 1.682631325893955e-05, "loss": 0.0934, "step": 26952 }, { "epoch": 0.4773251515272262, "grad_norm": 1.0451298952102661, "learning_rate": 1.6825459292208272e-05, "loss": 0.0793, "step": 26953 }, { "epoch": 0.4773428610642546, "grad_norm": 0.6315415501594543, "learning_rate": 1.6824605319471417e-05, "loss": 0.091, "step": 26954 }, { "epoch": 0.47736057060128306, "grad_norm": 0.6565812230110168, "learning_rate": 1.6823751340731792e-05, "loss": 0.0623, "step": 26955 }, { "epoch": 0.47737828013831146, "grad_norm": 0.3652873933315277, "learning_rate": 1.6822897355992216e-05, "loss": 0.0604, "step": 26956 }, { "epoch": 0.4773959896753399, "grad_norm": 0.6173478364944458, "learning_rate": 1.6822043365255488e-05, "loss": 0.0929, "step": 26957 }, { "epoch": 0.4774136992123683, "grad_norm": 0.6472705006599426, "learning_rate": 1.6821189368524422e-05, "loss": 0.0589, "step": 26958 }, { "epoch": 0.47743140874939677, "grad_norm": 0.5643398761749268, "learning_rate": 1.6820335365801825e-05, "loss": 0.0633, "step": 26959 }, { "epoch": 0.4774491182864252, "grad_norm": 0.40208467841148376, "learning_rate": 1.6819481357090515e-05, "loss": 0.0644, "step": 26960 }, { "epoch": 0.4774668278234536, "grad_norm": 0.6695325374603271, "learning_rate": 1.681862734239329e-05, "loss": 0.0666, "step": 26961 }, { "epoch": 0.4774845373604821, "grad_norm": 0.4111526608467102, "learning_rate": 1.6817773321712966e-05, "loss": 0.0851, "step": 26962 }, { "epoch": 0.4775022468975105, "grad_norm": 0.4657152593135834, "learning_rate": 1.6816919295052354e-05, "loss": 0.0404, "step": 26963 }, { "epoch": 0.47751995643453893, "grad_norm": 0.8099360466003418, "learning_rate": 1.6816065262414257e-05, "loss": 0.0806, "step": 26964 }, { "epoch": 0.4775376659715673, "grad_norm": 0.34189242124557495, "learning_rate": 1.6815211223801495e-05, "loss": 0.0789, "step": 26965 }, { "epoch": 0.4775553755085958, "grad_norm": 0.6868885159492493, "learning_rate": 1.6814357179216866e-05, "loss": 0.0682, "step": 26966 }, { "epoch": 0.4775730850456242, "grad_norm": 0.9609774947166443, "learning_rate": 1.6813503128663186e-05, "loss": 0.0893, "step": 26967 }, { "epoch": 0.47759079458265263, "grad_norm": 0.5302602052688599, "learning_rate": 1.681264907214327e-05, "loss": 0.0584, "step": 26968 }, { "epoch": 0.47760850411968103, "grad_norm": 0.7258141040802002, "learning_rate": 1.6811795009659916e-05, "loss": 0.071, "step": 26969 }, { "epoch": 0.4776262136567095, "grad_norm": 0.6447279453277588, "learning_rate": 1.6810940941215945e-05, "loss": 0.0705, "step": 26970 }, { "epoch": 0.4776439231937379, "grad_norm": 0.5838698148727417, "learning_rate": 1.681008686681415e-05, "loss": 0.0524, "step": 26971 }, { "epoch": 0.47766163273076634, "grad_norm": 0.9199663996696472, "learning_rate": 1.680923278645736e-05, "loss": 0.0949, "step": 26972 }, { "epoch": 0.47767934226779474, "grad_norm": 1.0377928018569946, "learning_rate": 1.6808378700148377e-05, "loss": 0.0755, "step": 26973 }, { "epoch": 0.4776970518048232, "grad_norm": 0.7086476683616638, "learning_rate": 1.6807524607890017e-05, "loss": 0.1079, "step": 26974 }, { "epoch": 0.47771476134185165, "grad_norm": 0.8228188157081604, "learning_rate": 1.6806670509685075e-05, "loss": 0.068, "step": 26975 }, { "epoch": 0.47773247087888004, "grad_norm": 0.4399006962776184, "learning_rate": 1.680581640553637e-05, "loss": 0.0778, "step": 26976 }, { "epoch": 0.4777501804159085, "grad_norm": 0.6536989212036133, "learning_rate": 1.680496229544672e-05, "loss": 0.085, "step": 26977 }, { "epoch": 0.4777678899529369, "grad_norm": 0.8086361289024353, "learning_rate": 1.6804108179418918e-05, "loss": 0.0674, "step": 26978 }, { "epoch": 0.47778559948996535, "grad_norm": 0.7985585331916809, "learning_rate": 1.680325405745579e-05, "loss": 0.1027, "step": 26979 }, { "epoch": 0.47780330902699375, "grad_norm": 0.6741791367530823, "learning_rate": 1.680239992956013e-05, "loss": 0.0965, "step": 26980 }, { "epoch": 0.4778210185640222, "grad_norm": 0.5987643599510193, "learning_rate": 1.680154579573476e-05, "loss": 0.0535, "step": 26981 }, { "epoch": 0.4778387281010506, "grad_norm": 0.42377108335494995, "learning_rate": 1.6800691655982486e-05, "loss": 0.066, "step": 26982 }, { "epoch": 0.47785643763807906, "grad_norm": 0.8403549194335938, "learning_rate": 1.679983751030612e-05, "loss": 0.0921, "step": 26983 }, { "epoch": 0.47787414717510746, "grad_norm": 0.4473462700843811, "learning_rate": 1.6798983358708475e-05, "loss": 0.0532, "step": 26984 }, { "epoch": 0.4778918567121359, "grad_norm": 0.7283853888511658, "learning_rate": 1.679812920119235e-05, "loss": 0.0619, "step": 26985 }, { "epoch": 0.4779095662491643, "grad_norm": 0.9221112728118896, "learning_rate": 1.6797275037760564e-05, "loss": 0.0545, "step": 26986 }, { "epoch": 0.47792727578619276, "grad_norm": 0.44891637563705444, "learning_rate": 1.6796420868415923e-05, "loss": 0.0393, "step": 26987 }, { "epoch": 0.47794498532322116, "grad_norm": 0.6313521862030029, "learning_rate": 1.6795566693161243e-05, "loss": 0.0629, "step": 26988 }, { "epoch": 0.4779626948602496, "grad_norm": 0.7608067393302917, "learning_rate": 1.6794712511999327e-05, "loss": 0.0763, "step": 26989 }, { "epoch": 0.47798040439727807, "grad_norm": 0.6408293843269348, "learning_rate": 1.679385832493299e-05, "loss": 0.0765, "step": 26990 }, { "epoch": 0.47799811393430647, "grad_norm": 0.8870121240615845, "learning_rate": 1.6793004131965037e-05, "loss": 0.053, "step": 26991 }, { "epoch": 0.4780158234713349, "grad_norm": 0.6353417038917542, "learning_rate": 1.6792149933098287e-05, "loss": 0.0602, "step": 26992 }, { "epoch": 0.4780335330083633, "grad_norm": 1.024440884590149, "learning_rate": 1.6791295728335546e-05, "loss": 0.1098, "step": 26993 }, { "epoch": 0.4780512425453918, "grad_norm": 0.7050821781158447, "learning_rate": 1.6790441517679615e-05, "loss": 0.0977, "step": 26994 }, { "epoch": 0.4780689520824202, "grad_norm": 0.7311010360717773, "learning_rate": 1.678958730113332e-05, "loss": 0.0577, "step": 26995 }, { "epoch": 0.4780866616194486, "grad_norm": 0.6748319864273071, "learning_rate": 1.678873307869946e-05, "loss": 0.0666, "step": 26996 }, { "epoch": 0.478104371156477, "grad_norm": 0.585311770439148, "learning_rate": 1.678787885038085e-05, "loss": 0.0826, "step": 26997 }, { "epoch": 0.4781220806935055, "grad_norm": 0.38323792815208435, "learning_rate": 1.67870246161803e-05, "loss": 0.0893, "step": 26998 }, { "epoch": 0.4781397902305339, "grad_norm": 0.6514883041381836, "learning_rate": 1.6786170376100623e-05, "loss": 0.0714, "step": 26999 }, { "epoch": 0.47815749976756233, "grad_norm": 0.5378930568695068, "learning_rate": 1.678531613014462e-05, "loss": 0.0658, "step": 27000 }, { "epoch": 0.47817520930459073, "grad_norm": 0.6031093001365662, "learning_rate": 1.678446187831511e-05, "loss": 0.0845, "step": 27001 }, { "epoch": 0.4781929188416192, "grad_norm": 0.9375579357147217, "learning_rate": 1.67836076206149e-05, "loss": 0.1032, "step": 27002 }, { "epoch": 0.4782106283786476, "grad_norm": 0.6117473244667053, "learning_rate": 1.6782753357046803e-05, "loss": 0.0874, "step": 27003 }, { "epoch": 0.47822833791567604, "grad_norm": 0.7053112983703613, "learning_rate": 1.6781899087613625e-05, "loss": 0.0616, "step": 27004 }, { "epoch": 0.4782460474527045, "grad_norm": 0.7396113872528076, "learning_rate": 1.678104481231818e-05, "loss": 0.0524, "step": 27005 }, { "epoch": 0.4782637569897329, "grad_norm": 0.7370498180389404, "learning_rate": 1.6780190531163284e-05, "loss": 0.0544, "step": 27006 }, { "epoch": 0.47828146652676135, "grad_norm": 0.8721262216567993, "learning_rate": 1.6779336244151733e-05, "loss": 0.0845, "step": 27007 }, { "epoch": 0.47829917606378974, "grad_norm": 0.9591951966285706, "learning_rate": 1.6778481951286352e-05, "loss": 0.0679, "step": 27008 }, { "epoch": 0.4783168856008182, "grad_norm": 0.5897278785705566, "learning_rate": 1.677762765256994e-05, "loss": 0.0832, "step": 27009 }, { "epoch": 0.4783345951378466, "grad_norm": 0.700615406036377, "learning_rate": 1.6776773348005314e-05, "loss": 0.0681, "step": 27010 }, { "epoch": 0.47835230467487505, "grad_norm": 0.40352076292037964, "learning_rate": 1.6775919037595288e-05, "loss": 0.0962, "step": 27011 }, { "epoch": 0.47837001421190345, "grad_norm": 0.46430566906929016, "learning_rate": 1.6775064721342663e-05, "loss": 0.045, "step": 27012 }, { "epoch": 0.4783877237489319, "grad_norm": 1.0676382780075073, "learning_rate": 1.6774210399250257e-05, "loss": 0.0767, "step": 27013 }, { "epoch": 0.4784054332859603, "grad_norm": 0.5323813557624817, "learning_rate": 1.6773356071320876e-05, "loss": 0.0474, "step": 27014 }, { "epoch": 0.47842314282298876, "grad_norm": 1.3912235498428345, "learning_rate": 1.6772501737557332e-05, "loss": 0.1213, "step": 27015 }, { "epoch": 0.47844085236001715, "grad_norm": 0.9821866750717163, "learning_rate": 1.6771647397962444e-05, "loss": 0.0626, "step": 27016 }, { "epoch": 0.4784585618970456, "grad_norm": 0.5461556911468506, "learning_rate": 1.6770793052539007e-05, "loss": 0.0562, "step": 27017 }, { "epoch": 0.478476271434074, "grad_norm": 0.9143053889274597, "learning_rate": 1.6769938701289847e-05, "loss": 0.1059, "step": 27018 }, { "epoch": 0.47849398097110246, "grad_norm": 0.6748811602592468, "learning_rate": 1.676908434421776e-05, "loss": 0.0618, "step": 27019 }, { "epoch": 0.4785116905081309, "grad_norm": 0.6724440455436707, "learning_rate": 1.6768229981325568e-05, "loss": 0.0598, "step": 27020 }, { "epoch": 0.4785294000451593, "grad_norm": 0.5925474762916565, "learning_rate": 1.6767375612616077e-05, "loss": 0.0684, "step": 27021 }, { "epoch": 0.47854710958218777, "grad_norm": 0.8476144075393677, "learning_rate": 1.6766521238092105e-05, "loss": 0.1067, "step": 27022 }, { "epoch": 0.47856481911921617, "grad_norm": 0.574299693107605, "learning_rate": 1.6765666857756447e-05, "loss": 0.0638, "step": 27023 }, { "epoch": 0.4785825286562446, "grad_norm": 0.5338254570960999, "learning_rate": 1.6764812471611928e-05, "loss": 0.0523, "step": 27024 }, { "epoch": 0.478600238193273, "grad_norm": 0.5234591960906982, "learning_rate": 1.6763958079661357e-05, "loss": 0.0508, "step": 27025 }, { "epoch": 0.4786179477303015, "grad_norm": 0.8572645783424377, "learning_rate": 1.6763103681907544e-05, "loss": 0.0657, "step": 27026 }, { "epoch": 0.4786356572673299, "grad_norm": 0.6841996312141418, "learning_rate": 1.6762249278353296e-05, "loss": 0.0808, "step": 27027 }, { "epoch": 0.4786533668043583, "grad_norm": 0.5252810120582581, "learning_rate": 1.6761394869001423e-05, "loss": 0.0507, "step": 27028 }, { "epoch": 0.4786710763413867, "grad_norm": 0.20537957549095154, "learning_rate": 1.676054045385474e-05, "loss": 0.0609, "step": 27029 }, { "epoch": 0.4786887858784152, "grad_norm": 0.9001392126083374, "learning_rate": 1.6759686032916056e-05, "loss": 0.0621, "step": 27030 }, { "epoch": 0.4787064954154436, "grad_norm": 0.5450926423072815, "learning_rate": 1.6758831606188186e-05, "loss": 0.0899, "step": 27031 }, { "epoch": 0.47872420495247203, "grad_norm": 1.2113231420516968, "learning_rate": 1.6757977173673936e-05, "loss": 0.1095, "step": 27032 }, { "epoch": 0.47874191448950043, "grad_norm": 0.7471641302108765, "learning_rate": 1.6757122735376118e-05, "loss": 0.0877, "step": 27033 }, { "epoch": 0.4787596240265289, "grad_norm": 0.823182225227356, "learning_rate": 1.6756268291297548e-05, "loss": 0.0785, "step": 27034 }, { "epoch": 0.47877733356355734, "grad_norm": 0.6171416640281677, "learning_rate": 1.675541384144103e-05, "loss": 0.0398, "step": 27035 }, { "epoch": 0.47879504310058574, "grad_norm": 0.840761661529541, "learning_rate": 1.675455938580938e-05, "loss": 0.1136, "step": 27036 }, { "epoch": 0.4788127526376142, "grad_norm": 0.5336737632751465, "learning_rate": 1.6753704924405405e-05, "loss": 0.0478, "step": 27037 }, { "epoch": 0.4788304621746426, "grad_norm": 0.596830427646637, "learning_rate": 1.675285045723192e-05, "loss": 0.0756, "step": 27038 }, { "epoch": 0.47884817171167104, "grad_norm": 0.54926598072052, "learning_rate": 1.6751995984291727e-05, "loss": 0.0629, "step": 27039 }, { "epoch": 0.47886588124869944, "grad_norm": 0.7411817312240601, "learning_rate": 1.6751141505587656e-05, "loss": 0.0906, "step": 27040 }, { "epoch": 0.4788835907857279, "grad_norm": 0.6512635350227356, "learning_rate": 1.67502870211225e-05, "loss": 0.0912, "step": 27041 }, { "epoch": 0.4789013003227563, "grad_norm": 0.6488584280014038, "learning_rate": 1.674943253089908e-05, "loss": 0.0994, "step": 27042 }, { "epoch": 0.47891900985978475, "grad_norm": 0.5096307992935181, "learning_rate": 1.67485780349202e-05, "loss": 0.1275, "step": 27043 }, { "epoch": 0.47893671939681315, "grad_norm": 0.47838059067726135, "learning_rate": 1.6747723533188675e-05, "loss": 0.0661, "step": 27044 }, { "epoch": 0.4789544289338416, "grad_norm": 0.5505791902542114, "learning_rate": 1.674686902570732e-05, "loss": 0.0698, "step": 27045 }, { "epoch": 0.47897213847087, "grad_norm": 0.7091179490089417, "learning_rate": 1.6746014512478938e-05, "loss": 0.0645, "step": 27046 }, { "epoch": 0.47898984800789846, "grad_norm": 0.7976135015487671, "learning_rate": 1.674515999350635e-05, "loss": 0.0697, "step": 27047 }, { "epoch": 0.47900755754492685, "grad_norm": 0.7041366100311279, "learning_rate": 1.674430546879236e-05, "loss": 0.0489, "step": 27048 }, { "epoch": 0.4790252670819553, "grad_norm": 0.6711312532424927, "learning_rate": 1.6743450938339782e-05, "loss": 0.0705, "step": 27049 }, { "epoch": 0.47904297661898376, "grad_norm": 0.3491441011428833, "learning_rate": 1.6742596402151426e-05, "loss": 0.0691, "step": 27050 }, { "epoch": 0.47906068615601216, "grad_norm": 0.5848095417022705, "learning_rate": 1.67417418602301e-05, "loss": 0.066, "step": 27051 }, { "epoch": 0.4790783956930406, "grad_norm": 0.6991996169090271, "learning_rate": 1.6740887312578626e-05, "loss": 0.0776, "step": 27052 }, { "epoch": 0.479096105230069, "grad_norm": 0.6189022064208984, "learning_rate": 1.6740032759199805e-05, "loss": 0.077, "step": 27053 }, { "epoch": 0.47911381476709747, "grad_norm": 0.7380667328834534, "learning_rate": 1.6739178200096454e-05, "loss": 0.0842, "step": 27054 }, { "epoch": 0.47913152430412587, "grad_norm": 0.4774903357028961, "learning_rate": 1.6738323635271385e-05, "loss": 0.0483, "step": 27055 }, { "epoch": 0.4791492338411543, "grad_norm": 0.6337267160415649, "learning_rate": 1.67374690647274e-05, "loss": 0.093, "step": 27056 }, { "epoch": 0.4791669433781827, "grad_norm": 0.7909871935844421, "learning_rate": 1.6736614488467322e-05, "loss": 0.0597, "step": 27057 }, { "epoch": 0.4791846529152112, "grad_norm": 0.526629626750946, "learning_rate": 1.673575990649396e-05, "loss": 0.1027, "step": 27058 }, { "epoch": 0.47920236245223957, "grad_norm": 0.8282426595687866, "learning_rate": 1.673490531881012e-05, "loss": 0.0716, "step": 27059 }, { "epoch": 0.479220071989268, "grad_norm": 0.6090613007545471, "learning_rate": 1.673405072541862e-05, "loss": 0.0815, "step": 27060 }, { "epoch": 0.4792377815262964, "grad_norm": 0.41702133417129517, "learning_rate": 1.6733196126322265e-05, "loss": 0.0661, "step": 27061 }, { "epoch": 0.4792554910633249, "grad_norm": 0.5808578133583069, "learning_rate": 1.6732341521523872e-05, "loss": 0.0565, "step": 27062 }, { "epoch": 0.47927320060035333, "grad_norm": 0.9980318546295166, "learning_rate": 1.6731486911026248e-05, "loss": 0.1199, "step": 27063 }, { "epoch": 0.47929091013738173, "grad_norm": 0.37923339009284973, "learning_rate": 1.6730632294832212e-05, "loss": 0.0726, "step": 27064 }, { "epoch": 0.4793086196744102, "grad_norm": 0.4746439456939697, "learning_rate": 1.6729777672944567e-05, "loss": 0.0767, "step": 27065 }, { "epoch": 0.4793263292114386, "grad_norm": 0.5073674321174622, "learning_rate": 1.672892304536613e-05, "loss": 0.0828, "step": 27066 }, { "epoch": 0.47934403874846704, "grad_norm": 1.0917243957519531, "learning_rate": 1.672806841209971e-05, "loss": 0.1205, "step": 27067 }, { "epoch": 0.47936174828549544, "grad_norm": 0.47783929109573364, "learning_rate": 1.672721377314812e-05, "loss": 0.0812, "step": 27068 }, { "epoch": 0.4793794578225239, "grad_norm": 0.7678757905960083, "learning_rate": 1.6726359128514174e-05, "loss": 0.0887, "step": 27069 }, { "epoch": 0.4793971673595523, "grad_norm": 0.5528421998023987, "learning_rate": 1.6725504478200682e-05, "loss": 0.0864, "step": 27070 }, { "epoch": 0.47941487689658074, "grad_norm": 0.6155546307563782, "learning_rate": 1.6724649822210447e-05, "loss": 0.078, "step": 27071 }, { "epoch": 0.47943258643360914, "grad_norm": 0.5015763640403748, "learning_rate": 1.672379516054629e-05, "loss": 0.0554, "step": 27072 }, { "epoch": 0.4794502959706376, "grad_norm": 0.42875078320503235, "learning_rate": 1.672294049321103e-05, "loss": 0.0486, "step": 27073 }, { "epoch": 0.479468005507666, "grad_norm": 0.7139292359352112, "learning_rate": 1.6722085820207464e-05, "loss": 0.0962, "step": 27074 }, { "epoch": 0.47948571504469445, "grad_norm": 0.4326460361480713, "learning_rate": 1.672123114153841e-05, "loss": 0.0776, "step": 27075 }, { "epoch": 0.47950342458172285, "grad_norm": 0.7279658913612366, "learning_rate": 1.6720376457206678e-05, "loss": 0.0681, "step": 27076 }, { "epoch": 0.4795211341187513, "grad_norm": 0.7714447975158691, "learning_rate": 1.6719521767215083e-05, "loss": 0.0714, "step": 27077 }, { "epoch": 0.47953884365577976, "grad_norm": 0.8508086204528809, "learning_rate": 1.6718667071566437e-05, "loss": 0.0751, "step": 27078 }, { "epoch": 0.47955655319280815, "grad_norm": 1.0303571224212646, "learning_rate": 1.671781237026355e-05, "loss": 0.119, "step": 27079 }, { "epoch": 0.4795742627298366, "grad_norm": 0.6134213805198669, "learning_rate": 1.6716957663309236e-05, "loss": 0.0713, "step": 27080 }, { "epoch": 0.479591972266865, "grad_norm": 0.5190236568450928, "learning_rate": 1.6716102950706297e-05, "loss": 0.082, "step": 27081 }, { "epoch": 0.47960968180389346, "grad_norm": 1.042625904083252, "learning_rate": 1.671524823245756e-05, "loss": 0.0753, "step": 27082 }, { "epoch": 0.47962739134092186, "grad_norm": 0.6261175274848938, "learning_rate": 1.6714393508565827e-05, "loss": 0.085, "step": 27083 }, { "epoch": 0.4796451008779503, "grad_norm": 0.6139577031135559, "learning_rate": 1.6713538779033917e-05, "loss": 0.0884, "step": 27084 }, { "epoch": 0.4796628104149787, "grad_norm": 0.45552942156791687, "learning_rate": 1.671268404386463e-05, "loss": 0.0946, "step": 27085 }, { "epoch": 0.47968051995200717, "grad_norm": 0.566233217716217, "learning_rate": 1.671182930306079e-05, "loss": 0.083, "step": 27086 }, { "epoch": 0.47969822948903557, "grad_norm": 0.4643305838108063, "learning_rate": 1.6710974556625206e-05, "loss": 0.0504, "step": 27087 }, { "epoch": 0.479715939026064, "grad_norm": 0.291803777217865, "learning_rate": 1.671011980456069e-05, "loss": 0.0472, "step": 27088 }, { "epoch": 0.4797336485630924, "grad_norm": 0.8801767230033875, "learning_rate": 1.6709265046870047e-05, "loss": 0.0731, "step": 27089 }, { "epoch": 0.4797513581001209, "grad_norm": 0.3636791408061981, "learning_rate": 1.67084102835561e-05, "loss": 0.0594, "step": 27090 }, { "epoch": 0.47976906763714927, "grad_norm": 0.36065253615379333, "learning_rate": 1.6707555514621653e-05, "loss": 0.0383, "step": 27091 }, { "epoch": 0.4797867771741777, "grad_norm": 0.8467808961868286, "learning_rate": 1.670670074006952e-05, "loss": 0.0661, "step": 27092 }, { "epoch": 0.4798044867112062, "grad_norm": 0.7766105532646179, "learning_rate": 1.670584595990252e-05, "loss": 0.1045, "step": 27093 }, { "epoch": 0.4798221962482346, "grad_norm": 0.6763088703155518, "learning_rate": 1.6704991174123454e-05, "loss": 0.071, "step": 27094 }, { "epoch": 0.47983990578526303, "grad_norm": 1.3234659433364868, "learning_rate": 1.6704136382735137e-05, "loss": 0.0903, "step": 27095 }, { "epoch": 0.47985761532229143, "grad_norm": 1.0556516647338867, "learning_rate": 1.670328158574039e-05, "loss": 0.0581, "step": 27096 }, { "epoch": 0.4798753248593199, "grad_norm": 0.40781137347221375, "learning_rate": 1.6702426783142017e-05, "loss": 0.048, "step": 27097 }, { "epoch": 0.4798930343963483, "grad_norm": 0.9843699336051941, "learning_rate": 1.670157197494283e-05, "loss": 0.0681, "step": 27098 }, { "epoch": 0.47991074393337674, "grad_norm": 0.7024988532066345, "learning_rate": 1.6700717161145644e-05, "loss": 0.0768, "step": 27099 }, { "epoch": 0.47992845347040514, "grad_norm": 0.730207085609436, "learning_rate": 1.6699862341753273e-05, "loss": 0.082, "step": 27100 }, { "epoch": 0.4799461630074336, "grad_norm": 0.6537379622459412, "learning_rate": 1.6699007516768523e-05, "loss": 0.0788, "step": 27101 }, { "epoch": 0.479963872544462, "grad_norm": 0.9546040892601013, "learning_rate": 1.6698152686194216e-05, "loss": 0.113, "step": 27102 }, { "epoch": 0.47998158208149044, "grad_norm": 0.6662730574607849, "learning_rate": 1.6697297850033157e-05, "loss": 0.082, "step": 27103 }, { "epoch": 0.47999929161851884, "grad_norm": 0.6450134515762329, "learning_rate": 1.6696443008288156e-05, "loss": 0.0497, "step": 27104 }, { "epoch": 0.4800170011555473, "grad_norm": 0.9126725196838379, "learning_rate": 1.6695588160962027e-05, "loss": 0.0823, "step": 27105 }, { "epoch": 0.4800347106925757, "grad_norm": 0.8096857070922852, "learning_rate": 1.669473330805759e-05, "loss": 0.0913, "step": 27106 }, { "epoch": 0.48005242022960415, "grad_norm": 0.5309362411499023, "learning_rate": 1.669387844957765e-05, "loss": 0.0711, "step": 27107 }, { "epoch": 0.4800701297666326, "grad_norm": 1.2498853206634521, "learning_rate": 1.669302358552502e-05, "loss": 0.1046, "step": 27108 }, { "epoch": 0.480087839303661, "grad_norm": 0.6451829671859741, "learning_rate": 1.6692168715902518e-05, "loss": 0.0744, "step": 27109 }, { "epoch": 0.48010554884068946, "grad_norm": 0.47011780738830566, "learning_rate": 1.6691313840712946e-05, "loss": 0.0729, "step": 27110 }, { "epoch": 0.48012325837771785, "grad_norm": 0.39547866582870483, "learning_rate": 1.6690458959959126e-05, "loss": 0.0438, "step": 27111 }, { "epoch": 0.4801409679147463, "grad_norm": 0.557887613773346, "learning_rate": 1.6689604073643868e-05, "loss": 0.0658, "step": 27112 }, { "epoch": 0.4801586774517747, "grad_norm": 0.3794174790382385, "learning_rate": 1.6688749181769983e-05, "loss": 0.0801, "step": 27113 }, { "epoch": 0.48017638698880316, "grad_norm": 0.7052462697029114, "learning_rate": 1.668789428434028e-05, "loss": 0.0829, "step": 27114 }, { "epoch": 0.48019409652583156, "grad_norm": 0.7746443748474121, "learning_rate": 1.6687039381357575e-05, "loss": 0.0646, "step": 27115 }, { "epoch": 0.48021180606286, "grad_norm": 0.5567551851272583, "learning_rate": 1.668618447282469e-05, "loss": 0.0613, "step": 27116 }, { "epoch": 0.4802295155998884, "grad_norm": 0.4711504578590393, "learning_rate": 1.6685329558744423e-05, "loss": 0.0684, "step": 27117 }, { "epoch": 0.48024722513691687, "grad_norm": 0.5362216830253601, "learning_rate": 1.668447463911959e-05, "loss": 0.0444, "step": 27118 }, { "epoch": 0.48026493467394527, "grad_norm": 0.6602023839950562, "learning_rate": 1.6683619713953008e-05, "loss": 0.0776, "step": 27119 }, { "epoch": 0.4802826442109737, "grad_norm": 0.6795321106910706, "learning_rate": 1.668276478324749e-05, "loss": 0.0814, "step": 27120 }, { "epoch": 0.4803003537480021, "grad_norm": 0.47563740611076355, "learning_rate": 1.6681909847005844e-05, "loss": 0.0838, "step": 27121 }, { "epoch": 0.48031806328503057, "grad_norm": 0.7000564336776733, "learning_rate": 1.6681054905230883e-05, "loss": 0.0817, "step": 27122 }, { "epoch": 0.480335772822059, "grad_norm": 0.6942594051361084, "learning_rate": 1.6680199957925427e-05, "loss": 0.0852, "step": 27123 }, { "epoch": 0.4803534823590874, "grad_norm": 0.7241277694702148, "learning_rate": 1.6679345005092275e-05, "loss": 0.0854, "step": 27124 }, { "epoch": 0.4803711918961159, "grad_norm": 0.462188184261322, "learning_rate": 1.667849004673425e-05, "loss": 0.0514, "step": 27125 }, { "epoch": 0.4803889014331443, "grad_norm": 0.8404896855354309, "learning_rate": 1.6677635082854164e-05, "loss": 0.1046, "step": 27126 }, { "epoch": 0.48040661097017273, "grad_norm": 0.6773070096969604, "learning_rate": 1.667678011345483e-05, "loss": 0.1113, "step": 27127 }, { "epoch": 0.48042432050720113, "grad_norm": 0.7956616282463074, "learning_rate": 1.6675925138539055e-05, "loss": 0.0761, "step": 27128 }, { "epoch": 0.4804420300442296, "grad_norm": 0.6059138774871826, "learning_rate": 1.6675070158109657e-05, "loss": 0.0876, "step": 27129 }, { "epoch": 0.480459739581258, "grad_norm": 0.9555615186691284, "learning_rate": 1.667421517216945e-05, "loss": 0.1095, "step": 27130 }, { "epoch": 0.48047744911828644, "grad_norm": 0.4889015853404999, "learning_rate": 1.6673360180721245e-05, "loss": 0.0797, "step": 27131 }, { "epoch": 0.48049515865531484, "grad_norm": 0.7874497175216675, "learning_rate": 1.6672505183767854e-05, "loss": 0.0494, "step": 27132 }, { "epoch": 0.4805128681923433, "grad_norm": 1.2306386232376099, "learning_rate": 1.667165018131209e-05, "loss": 0.1019, "step": 27133 }, { "epoch": 0.4805305777293717, "grad_norm": 0.7477826476097107, "learning_rate": 1.6670795173356764e-05, "loss": 0.0834, "step": 27134 }, { "epoch": 0.48054828726640014, "grad_norm": 0.9037291407585144, "learning_rate": 1.666994015990469e-05, "loss": 0.0821, "step": 27135 }, { "epoch": 0.48056599680342854, "grad_norm": 0.5532103180885315, "learning_rate": 1.6669085140958685e-05, "loss": 0.0681, "step": 27136 }, { "epoch": 0.480583706340457, "grad_norm": 0.6780572533607483, "learning_rate": 1.666823011652156e-05, "loss": 0.0878, "step": 27137 }, { "epoch": 0.48060141587748545, "grad_norm": 0.7988297939300537, "learning_rate": 1.666737508659612e-05, "loss": 0.0975, "step": 27138 }, { "epoch": 0.48061912541451385, "grad_norm": 0.5361782312393188, "learning_rate": 1.6666520051185186e-05, "loss": 0.0847, "step": 27139 }, { "epoch": 0.4806368349515423, "grad_norm": 0.7774655222892761, "learning_rate": 1.6665665010291572e-05, "loss": 0.0919, "step": 27140 }, { "epoch": 0.4806545444885707, "grad_norm": 0.9047411680221558, "learning_rate": 1.6664809963918088e-05, "loss": 0.0887, "step": 27141 }, { "epoch": 0.48067225402559915, "grad_norm": 0.7029715180397034, "learning_rate": 1.6663954912067553e-05, "loss": 0.0758, "step": 27142 }, { "epoch": 0.48068996356262755, "grad_norm": 1.046955943107605, "learning_rate": 1.6663099854742763e-05, "loss": 0.0916, "step": 27143 }, { "epoch": 0.480707673099656, "grad_norm": 0.638077974319458, "learning_rate": 1.6662244791946554e-05, "loss": 0.1002, "step": 27144 }, { "epoch": 0.4807253826366844, "grad_norm": 0.7151853442192078, "learning_rate": 1.666138972368172e-05, "loss": 0.0848, "step": 27145 }, { "epoch": 0.48074309217371286, "grad_norm": 0.8201736807823181, "learning_rate": 1.6660534649951088e-05, "loss": 0.0569, "step": 27146 }, { "epoch": 0.48076080171074126, "grad_norm": 0.5755053162574768, "learning_rate": 1.6659679570757463e-05, "loss": 0.0737, "step": 27147 }, { "epoch": 0.4807785112477697, "grad_norm": 2.2658724784851074, "learning_rate": 1.6658824486103653e-05, "loss": 0.0641, "step": 27148 }, { "epoch": 0.4807962207847981, "grad_norm": 0.5211023092269897, "learning_rate": 1.6657969395992488e-05, "loss": 0.0662, "step": 27149 }, { "epoch": 0.48081393032182657, "grad_norm": 0.8548353314399719, "learning_rate": 1.665711430042677e-05, "loss": 0.0981, "step": 27150 }, { "epoch": 0.48083163985885496, "grad_norm": 0.5882465243339539, "learning_rate": 1.665625919940931e-05, "loss": 0.0902, "step": 27151 }, { "epoch": 0.4808493493958834, "grad_norm": 1.1577590703964233, "learning_rate": 1.6655404092942924e-05, "loss": 0.0752, "step": 27152 }, { "epoch": 0.4808670589329119, "grad_norm": 0.39398255944252014, "learning_rate": 1.6654548981030425e-05, "loss": 0.0684, "step": 27153 }, { "epoch": 0.48088476846994027, "grad_norm": 0.7647300958633423, "learning_rate": 1.665369386367463e-05, "loss": 0.0662, "step": 27154 }, { "epoch": 0.4809024780069687, "grad_norm": 0.7821192145347595, "learning_rate": 1.665283874087835e-05, "loss": 0.0843, "step": 27155 }, { "epoch": 0.4809201875439971, "grad_norm": 0.6963526606559753, "learning_rate": 1.66519836126444e-05, "loss": 0.0836, "step": 27156 }, { "epoch": 0.4809378970810256, "grad_norm": 0.6210367679595947, "learning_rate": 1.6651128478975587e-05, "loss": 0.0685, "step": 27157 }, { "epoch": 0.480955606618054, "grad_norm": 0.4292910695075989, "learning_rate": 1.665027333987473e-05, "loss": 0.0519, "step": 27158 }, { "epoch": 0.48097331615508243, "grad_norm": 0.697704553604126, "learning_rate": 1.664941819534464e-05, "loss": 0.0708, "step": 27159 }, { "epoch": 0.48099102569211083, "grad_norm": 1.383346438407898, "learning_rate": 1.6648563045388132e-05, "loss": 0.1203, "step": 27160 }, { "epoch": 0.4810087352291393, "grad_norm": 0.4681070148944855, "learning_rate": 1.6647707890008017e-05, "loss": 0.0715, "step": 27161 }, { "epoch": 0.4810264447661677, "grad_norm": 0.7908909320831299, "learning_rate": 1.6646852729207107e-05, "loss": 0.0988, "step": 27162 }, { "epoch": 0.48104415430319614, "grad_norm": 0.7630775570869446, "learning_rate": 1.6645997562988222e-05, "loss": 0.0809, "step": 27163 }, { "epoch": 0.48106186384022454, "grad_norm": 0.7745333313941956, "learning_rate": 1.6645142391354174e-05, "loss": 0.0949, "step": 27164 }, { "epoch": 0.481079573377253, "grad_norm": 0.6832088232040405, "learning_rate": 1.664428721430777e-05, "loss": 0.0746, "step": 27165 }, { "epoch": 0.4810972829142814, "grad_norm": 0.7181018590927124, "learning_rate": 1.664343203185183e-05, "loss": 0.1047, "step": 27166 }, { "epoch": 0.48111499245130984, "grad_norm": 0.562282145023346, "learning_rate": 1.664257684398916e-05, "loss": 0.0769, "step": 27167 }, { "epoch": 0.4811327019883383, "grad_norm": 1.5084444284439087, "learning_rate": 1.6641721650722583e-05, "loss": 0.0978, "step": 27168 }, { "epoch": 0.4811504115253667, "grad_norm": 0.3841347098350525, "learning_rate": 1.6640866452054906e-05, "loss": 0.0473, "step": 27169 }, { "epoch": 0.48116812106239515, "grad_norm": 0.7062242031097412, "learning_rate": 1.6640011247988943e-05, "loss": 0.0947, "step": 27170 }, { "epoch": 0.48118583059942355, "grad_norm": 0.516621470451355, "learning_rate": 1.663915603852751e-05, "loss": 0.075, "step": 27171 }, { "epoch": 0.481203540136452, "grad_norm": 0.41837143898010254, "learning_rate": 1.6638300823673418e-05, "loss": 0.0758, "step": 27172 }, { "epoch": 0.4812212496734804, "grad_norm": 1.005233645439148, "learning_rate": 1.6637445603429484e-05, "loss": 0.0744, "step": 27173 }, { "epoch": 0.48123895921050885, "grad_norm": 0.5545529723167419, "learning_rate": 1.6636590377798523e-05, "loss": 0.0747, "step": 27174 }, { "epoch": 0.48125666874753725, "grad_norm": 0.8608132004737854, "learning_rate": 1.6635735146783342e-05, "loss": 0.0909, "step": 27175 }, { "epoch": 0.4812743782845657, "grad_norm": 0.5411866307258606, "learning_rate": 1.6634879910386754e-05, "loss": 0.0879, "step": 27176 }, { "epoch": 0.4812920878215941, "grad_norm": 0.5758610963821411, "learning_rate": 1.6634024668611582e-05, "loss": 0.0654, "step": 27177 }, { "epoch": 0.48130979735862256, "grad_norm": 0.8148974776268005, "learning_rate": 1.663316942146063e-05, "loss": 0.0601, "step": 27178 }, { "epoch": 0.48132750689565096, "grad_norm": 0.8020598292350769, "learning_rate": 1.6632314168936715e-05, "loss": 0.102, "step": 27179 }, { "epoch": 0.4813452164326794, "grad_norm": 0.6461206674575806, "learning_rate": 1.6631458911042658e-05, "loss": 0.0669, "step": 27180 }, { "epoch": 0.4813629259697078, "grad_norm": 0.8632002472877502, "learning_rate": 1.663060364778126e-05, "loss": 0.0786, "step": 27181 }, { "epoch": 0.48138063550673627, "grad_norm": 0.6377658247947693, "learning_rate": 1.6629748379155345e-05, "loss": 0.0632, "step": 27182 }, { "epoch": 0.4813983450437647, "grad_norm": 0.41837918758392334, "learning_rate": 1.6628893105167723e-05, "loss": 0.0876, "step": 27183 }, { "epoch": 0.4814160545807931, "grad_norm": 1.0453660488128662, "learning_rate": 1.6628037825821205e-05, "loss": 0.0971, "step": 27184 }, { "epoch": 0.48143376411782157, "grad_norm": 0.7523470520973206, "learning_rate": 1.662718254111861e-05, "loss": 0.097, "step": 27185 }, { "epoch": 0.48145147365484997, "grad_norm": 0.3939976990222931, "learning_rate": 1.6626327251062738e-05, "loss": 0.0558, "step": 27186 }, { "epoch": 0.4814691831918784, "grad_norm": 0.8593865633010864, "learning_rate": 1.6625471955656428e-05, "loss": 0.0856, "step": 27187 }, { "epoch": 0.4814868927289068, "grad_norm": 0.8083938956260681, "learning_rate": 1.662461665490247e-05, "loss": 0.0857, "step": 27188 }, { "epoch": 0.4815046022659353, "grad_norm": 0.6959416270256042, "learning_rate": 1.6623761348803694e-05, "loss": 0.0831, "step": 27189 }, { "epoch": 0.4815223118029637, "grad_norm": 0.8881477117538452, "learning_rate": 1.6622906037362904e-05, "loss": 0.0574, "step": 27190 }, { "epoch": 0.48154002133999213, "grad_norm": 0.8541035056114197, "learning_rate": 1.662205072058292e-05, "loss": 0.0719, "step": 27191 }, { "epoch": 0.48155773087702053, "grad_norm": 0.8824620246887207, "learning_rate": 1.662119539846655e-05, "loss": 0.0609, "step": 27192 }, { "epoch": 0.481575440414049, "grad_norm": 0.43280044198036194, "learning_rate": 1.662034007101661e-05, "loss": 0.0496, "step": 27193 }, { "epoch": 0.4815931499510774, "grad_norm": 0.527901291847229, "learning_rate": 1.6619484738235924e-05, "loss": 0.0636, "step": 27194 }, { "epoch": 0.48161085948810584, "grad_norm": 1.1131649017333984, "learning_rate": 1.6618629400127288e-05, "loss": 0.0799, "step": 27195 }, { "epoch": 0.48162856902513423, "grad_norm": 0.5585647821426392, "learning_rate": 1.6617774056693525e-05, "loss": 0.074, "step": 27196 }, { "epoch": 0.4816462785621627, "grad_norm": 0.471779465675354, "learning_rate": 1.661691870793745e-05, "loss": 0.0772, "step": 27197 }, { "epoch": 0.48166398809919114, "grad_norm": 1.0612620115280151, "learning_rate": 1.6616063353861882e-05, "loss": 0.0744, "step": 27198 }, { "epoch": 0.48168169763621954, "grad_norm": 0.5807049870491028, "learning_rate": 1.6615207994469625e-05, "loss": 0.0601, "step": 27199 }, { "epoch": 0.481699407173248, "grad_norm": 0.8298125267028809, "learning_rate": 1.6614352629763494e-05, "loss": 0.0894, "step": 27200 }, { "epoch": 0.4817171167102764, "grad_norm": 0.8056572079658508, "learning_rate": 1.661349725974631e-05, "loss": 0.1213, "step": 27201 }, { "epoch": 0.48173482624730485, "grad_norm": 0.3513643443584442, "learning_rate": 1.6612641884420883e-05, "loss": 0.0646, "step": 27202 }, { "epoch": 0.48175253578433325, "grad_norm": 1.090254545211792, "learning_rate": 1.6611786503790024e-05, "loss": 0.0919, "step": 27203 }, { "epoch": 0.4817702453213617, "grad_norm": 0.8259414434432983, "learning_rate": 1.6610931117856553e-05, "loss": 0.06, "step": 27204 }, { "epoch": 0.4817879548583901, "grad_norm": 0.955981969833374, "learning_rate": 1.661007572662328e-05, "loss": 0.1171, "step": 27205 }, { "epoch": 0.48180566439541855, "grad_norm": 0.6661307215690613, "learning_rate": 1.6609220330093027e-05, "loss": 0.1122, "step": 27206 }, { "epoch": 0.48182337393244695, "grad_norm": 1.4197087287902832, "learning_rate": 1.6608364928268594e-05, "loss": 0.094, "step": 27207 }, { "epoch": 0.4818410834694754, "grad_norm": 0.9997456073760986, "learning_rate": 1.6607509521152807e-05, "loss": 0.0908, "step": 27208 }, { "epoch": 0.4818587930065038, "grad_norm": 0.9272117614746094, "learning_rate": 1.6606654108748475e-05, "loss": 0.1125, "step": 27209 }, { "epoch": 0.48187650254353226, "grad_norm": 1.0210819244384766, "learning_rate": 1.6605798691058414e-05, "loss": 0.0704, "step": 27210 }, { "epoch": 0.48189421208056066, "grad_norm": 0.8259065747261047, "learning_rate": 1.6604943268085435e-05, "loss": 0.0705, "step": 27211 }, { "epoch": 0.4819119216175891, "grad_norm": 0.8824121952056885, "learning_rate": 1.6604087839832358e-05, "loss": 0.0686, "step": 27212 }, { "epoch": 0.48192963115461757, "grad_norm": 0.7308309674263, "learning_rate": 1.6603232406301994e-05, "loss": 0.0754, "step": 27213 }, { "epoch": 0.48194734069164596, "grad_norm": 0.7181418538093567, "learning_rate": 1.660237696749716e-05, "loss": 0.101, "step": 27214 }, { "epoch": 0.4819650502286744, "grad_norm": 0.7885313034057617, "learning_rate": 1.6601521523420663e-05, "loss": 0.0889, "step": 27215 }, { "epoch": 0.4819827597657028, "grad_norm": 0.931390106678009, "learning_rate": 1.6600666074075324e-05, "loss": 0.0804, "step": 27216 }, { "epoch": 0.48200046930273127, "grad_norm": 0.7259086966514587, "learning_rate": 1.6599810619463957e-05, "loss": 0.0532, "step": 27217 }, { "epoch": 0.48201817883975967, "grad_norm": 0.6630253195762634, "learning_rate": 1.6598955159589374e-05, "loss": 0.1083, "step": 27218 }, { "epoch": 0.4820358883767881, "grad_norm": 0.45518016815185547, "learning_rate": 1.6598099694454385e-05, "loss": 0.0673, "step": 27219 }, { "epoch": 0.4820535979138165, "grad_norm": 0.7508439421653748, "learning_rate": 1.6597244224061815e-05, "loss": 0.0749, "step": 27220 }, { "epoch": 0.482071307450845, "grad_norm": 0.70163893699646, "learning_rate": 1.6596388748414473e-05, "loss": 0.0803, "step": 27221 }, { "epoch": 0.4820890169878734, "grad_norm": 0.8760345578193665, "learning_rate": 1.6595533267515178e-05, "loss": 0.1023, "step": 27222 }, { "epoch": 0.48210672652490183, "grad_norm": 0.8804606795310974, "learning_rate": 1.6594677781366734e-05, "loss": 0.0795, "step": 27223 }, { "epoch": 0.48212443606193023, "grad_norm": 0.8954132795333862, "learning_rate": 1.659382228997196e-05, "loss": 0.0815, "step": 27224 }, { "epoch": 0.4821421455989587, "grad_norm": 0.5328410267829895, "learning_rate": 1.6592966793333673e-05, "loss": 0.0667, "step": 27225 }, { "epoch": 0.4821598551359871, "grad_norm": 0.9247741103172302, "learning_rate": 1.659211129145469e-05, "loss": 0.1118, "step": 27226 }, { "epoch": 0.48217756467301554, "grad_norm": 0.6583927869796753, "learning_rate": 1.659125578433782e-05, "loss": 0.0939, "step": 27227 }, { "epoch": 0.482195274210044, "grad_norm": 0.4340892732143402, "learning_rate": 1.6590400271985877e-05, "loss": 0.0623, "step": 27228 }, { "epoch": 0.4822129837470724, "grad_norm": 0.34326279163360596, "learning_rate": 1.658954475440168e-05, "loss": 0.0539, "step": 27229 }, { "epoch": 0.48223069328410084, "grad_norm": 0.5912236571311951, "learning_rate": 1.658868923158804e-05, "loss": 0.0563, "step": 27230 }, { "epoch": 0.48224840282112924, "grad_norm": 0.9083519577980042, "learning_rate": 1.6587833703547776e-05, "loss": 0.098, "step": 27231 }, { "epoch": 0.4822661123581577, "grad_norm": 0.753990113735199, "learning_rate": 1.6586978170283698e-05, "loss": 0.049, "step": 27232 }, { "epoch": 0.4822838218951861, "grad_norm": 0.5472643971443176, "learning_rate": 1.6586122631798623e-05, "loss": 0.0669, "step": 27233 }, { "epoch": 0.48230153143221455, "grad_norm": 0.8811796307563782, "learning_rate": 1.6585267088095362e-05, "loss": 0.0825, "step": 27234 }, { "epoch": 0.48231924096924295, "grad_norm": 1.0208847522735596, "learning_rate": 1.6584411539176736e-05, "loss": 0.0822, "step": 27235 }, { "epoch": 0.4823369505062714, "grad_norm": 0.6198880672454834, "learning_rate": 1.6583555985045557e-05, "loss": 0.0686, "step": 27236 }, { "epoch": 0.4823546600432998, "grad_norm": 0.6254864931106567, "learning_rate": 1.6582700425704637e-05, "loss": 0.0482, "step": 27237 }, { "epoch": 0.48237236958032825, "grad_norm": 0.6488572359085083, "learning_rate": 1.6581844861156793e-05, "loss": 0.1049, "step": 27238 }, { "epoch": 0.48239007911735665, "grad_norm": 0.7707183361053467, "learning_rate": 1.658098929140484e-05, "loss": 0.0859, "step": 27239 }, { "epoch": 0.4824077886543851, "grad_norm": 0.31789490580558777, "learning_rate": 1.658013371645159e-05, "loss": 0.061, "step": 27240 }, { "epoch": 0.4824254981914135, "grad_norm": 0.6423370242118835, "learning_rate": 1.657927813629986e-05, "loss": 0.0458, "step": 27241 }, { "epoch": 0.48244320772844196, "grad_norm": 0.6441604495048523, "learning_rate": 1.6578422550952473e-05, "loss": 0.0697, "step": 27242 }, { "epoch": 0.4824609172654704, "grad_norm": 0.4676113724708557, "learning_rate": 1.6577566960412226e-05, "loss": 0.0818, "step": 27243 }, { "epoch": 0.4824786268024988, "grad_norm": 0.8307303786277771, "learning_rate": 1.6576711364681945e-05, "loss": 0.0681, "step": 27244 }, { "epoch": 0.48249633633952727, "grad_norm": 0.5003783702850342, "learning_rate": 1.6575855763764444e-05, "loss": 0.1076, "step": 27245 }, { "epoch": 0.48251404587655566, "grad_norm": 0.9036508202552795, "learning_rate": 1.6575000157662535e-05, "loss": 0.0969, "step": 27246 }, { "epoch": 0.4825317554135841, "grad_norm": 0.3747292160987854, "learning_rate": 1.657414454637904e-05, "loss": 0.0619, "step": 27247 }, { "epoch": 0.4825494649506125, "grad_norm": 0.7142235636711121, "learning_rate": 1.657328892991676e-05, "loss": 0.1025, "step": 27248 }, { "epoch": 0.48256717448764097, "grad_norm": 0.5758687853813171, "learning_rate": 1.657243330827853e-05, "loss": 0.0629, "step": 27249 }, { "epoch": 0.48258488402466937, "grad_norm": 0.9347373843193054, "learning_rate": 1.6571577681467143e-05, "loss": 0.0968, "step": 27250 }, { "epoch": 0.4826025935616978, "grad_norm": 0.8077710270881653, "learning_rate": 1.6570722049485434e-05, "loss": 0.0739, "step": 27251 }, { "epoch": 0.4826203030987262, "grad_norm": 0.674747109413147, "learning_rate": 1.65698664123362e-05, "loss": 0.0695, "step": 27252 }, { "epoch": 0.4826380126357547, "grad_norm": 0.5844478607177734, "learning_rate": 1.6569010770022264e-05, "loss": 0.0566, "step": 27253 }, { "epoch": 0.4826557221727831, "grad_norm": 0.7395483255386353, "learning_rate": 1.656815512254645e-05, "loss": 0.0707, "step": 27254 }, { "epoch": 0.48267343170981153, "grad_norm": 0.6936503648757935, "learning_rate": 1.6567299469911555e-05, "loss": 0.0683, "step": 27255 }, { "epoch": 0.4826911412468399, "grad_norm": 0.7337377667427063, "learning_rate": 1.6566443812120413e-05, "loss": 0.0797, "step": 27256 }, { "epoch": 0.4827088507838684, "grad_norm": 0.9735061526298523, "learning_rate": 1.656558814917582e-05, "loss": 0.0811, "step": 27257 }, { "epoch": 0.48272656032089684, "grad_norm": 0.6506859660148621, "learning_rate": 1.65647324810806e-05, "loss": 0.0937, "step": 27258 }, { "epoch": 0.48274426985792523, "grad_norm": 0.6262120604515076, "learning_rate": 1.6563876807837575e-05, "loss": 0.0797, "step": 27259 }, { "epoch": 0.4827619793949537, "grad_norm": 0.6670916080474854, "learning_rate": 1.6563021129449553e-05, "loss": 0.0932, "step": 27260 }, { "epoch": 0.4827796889319821, "grad_norm": 0.45917844772338867, "learning_rate": 1.6562165445919346e-05, "loss": 0.0791, "step": 27261 }, { "epoch": 0.48279739846901054, "grad_norm": 0.7640916705131531, "learning_rate": 1.6561309757249774e-05, "loss": 0.0609, "step": 27262 }, { "epoch": 0.48281510800603894, "grad_norm": 0.5422658324241638, "learning_rate": 1.656045406344365e-05, "loss": 0.0605, "step": 27263 }, { "epoch": 0.4828328175430674, "grad_norm": 0.7550177574157715, "learning_rate": 1.655959836450379e-05, "loss": 0.0723, "step": 27264 }, { "epoch": 0.4828505270800958, "grad_norm": 0.4189569354057312, "learning_rate": 1.655874266043301e-05, "loss": 0.0506, "step": 27265 }, { "epoch": 0.48286823661712425, "grad_norm": 0.505554735660553, "learning_rate": 1.6557886951234125e-05, "loss": 0.0646, "step": 27266 }, { "epoch": 0.48288594615415265, "grad_norm": 0.7962849736213684, "learning_rate": 1.6557031236909947e-05, "loss": 0.0709, "step": 27267 }, { "epoch": 0.4829036556911811, "grad_norm": 0.838406503200531, "learning_rate": 1.655617551746329e-05, "loss": 0.0736, "step": 27268 }, { "epoch": 0.4829213652282095, "grad_norm": 0.701841413974762, "learning_rate": 1.6555319792896987e-05, "loss": 0.0629, "step": 27269 }, { "epoch": 0.48293907476523795, "grad_norm": 0.6393983960151672, "learning_rate": 1.655446406321383e-05, "loss": 0.0689, "step": 27270 }, { "epoch": 0.48295678430226635, "grad_norm": 0.4659520387649536, "learning_rate": 1.6553608328416644e-05, "loss": 0.0709, "step": 27271 }, { "epoch": 0.4829744938392948, "grad_norm": 0.8925066590309143, "learning_rate": 1.6552752588508243e-05, "loss": 0.0899, "step": 27272 }, { "epoch": 0.48299220337632326, "grad_norm": 0.6483957171440125, "learning_rate": 1.655189684349144e-05, "loss": 0.0586, "step": 27273 }, { "epoch": 0.48300991291335166, "grad_norm": 1.207448959350586, "learning_rate": 1.6551041093369062e-05, "loss": 0.0805, "step": 27274 }, { "epoch": 0.4830276224503801, "grad_norm": 0.6065567135810852, "learning_rate": 1.655018533814391e-05, "loss": 0.0776, "step": 27275 }, { "epoch": 0.4830453319874085, "grad_norm": 0.492922306060791, "learning_rate": 1.6549329577818806e-05, "loss": 0.0493, "step": 27276 }, { "epoch": 0.48306304152443696, "grad_norm": 0.29688435792922974, "learning_rate": 1.654847381239656e-05, "loss": 0.0645, "step": 27277 }, { "epoch": 0.48308075106146536, "grad_norm": 0.5823151469230652, "learning_rate": 1.6547618041879996e-05, "loss": 0.0406, "step": 27278 }, { "epoch": 0.4830984605984938, "grad_norm": 0.2517758011817932, "learning_rate": 1.654676226627193e-05, "loss": 0.0571, "step": 27279 }, { "epoch": 0.4831161701355222, "grad_norm": 0.894430935382843, "learning_rate": 1.6545906485575166e-05, "loss": 0.0902, "step": 27280 }, { "epoch": 0.48313387967255067, "grad_norm": 0.6170027852058411, "learning_rate": 1.6545050699792524e-05, "loss": 0.0613, "step": 27281 }, { "epoch": 0.48315158920957907, "grad_norm": 1.0244898796081543, "learning_rate": 1.6544194908926824e-05, "loss": 0.0728, "step": 27282 }, { "epoch": 0.4831692987466075, "grad_norm": 0.5227451324462891, "learning_rate": 1.6543339112980885e-05, "loss": 0.0554, "step": 27283 }, { "epoch": 0.4831870082836359, "grad_norm": 0.4746384620666504, "learning_rate": 1.6542483311957508e-05, "loss": 0.0726, "step": 27284 }, { "epoch": 0.4832047178206644, "grad_norm": 0.7139683961868286, "learning_rate": 1.6541627505859526e-05, "loss": 0.0833, "step": 27285 }, { "epoch": 0.4832224273576928, "grad_norm": 0.6107183694839478, "learning_rate": 1.6540771694689738e-05, "loss": 0.1141, "step": 27286 }, { "epoch": 0.48324013689472123, "grad_norm": 1.0026546716690063, "learning_rate": 1.6539915878450966e-05, "loss": 0.104, "step": 27287 }, { "epoch": 0.4832578464317497, "grad_norm": 0.727849006652832, "learning_rate": 1.653906005714603e-05, "loss": 0.0705, "step": 27288 }, { "epoch": 0.4832755559687781, "grad_norm": 0.7169470191001892, "learning_rate": 1.653820423077774e-05, "loss": 0.0881, "step": 27289 }, { "epoch": 0.48329326550580654, "grad_norm": 0.5120416283607483, "learning_rate": 1.653734839934892e-05, "loss": 0.0748, "step": 27290 }, { "epoch": 0.48331097504283493, "grad_norm": 0.891330361366272, "learning_rate": 1.6536492562862373e-05, "loss": 0.1137, "step": 27291 }, { "epoch": 0.4833286845798634, "grad_norm": 0.908385694026947, "learning_rate": 1.6535636721320918e-05, "loss": 0.0744, "step": 27292 }, { "epoch": 0.4833463941168918, "grad_norm": 0.6765169501304626, "learning_rate": 1.6534780874727382e-05, "loss": 0.087, "step": 27293 }, { "epoch": 0.48336410365392024, "grad_norm": 0.5306316614151001, "learning_rate": 1.6533925023084568e-05, "loss": 0.0529, "step": 27294 }, { "epoch": 0.48338181319094864, "grad_norm": 0.9707270264625549, "learning_rate": 1.6533069166395297e-05, "loss": 0.0762, "step": 27295 }, { "epoch": 0.4833995227279771, "grad_norm": 0.5132182836532593, "learning_rate": 1.653221330466238e-05, "loss": 0.061, "step": 27296 }, { "epoch": 0.4834172322650055, "grad_norm": 0.3686031699180603, "learning_rate": 1.653135743788864e-05, "loss": 0.0397, "step": 27297 }, { "epoch": 0.48343494180203395, "grad_norm": 0.6973376870155334, "learning_rate": 1.6530501566076882e-05, "loss": 0.0743, "step": 27298 }, { "epoch": 0.48345265133906234, "grad_norm": 0.743262529373169, "learning_rate": 1.6529645689229938e-05, "loss": 0.0918, "step": 27299 }, { "epoch": 0.4834703608760908, "grad_norm": 0.3983609080314636, "learning_rate": 1.652878980735061e-05, "loss": 0.0574, "step": 27300 }, { "epoch": 0.4834880704131192, "grad_norm": 0.644810140132904, "learning_rate": 1.6527933920441717e-05, "loss": 0.0555, "step": 27301 }, { "epoch": 0.48350577995014765, "grad_norm": 1.4725775718688965, "learning_rate": 1.652707802850608e-05, "loss": 0.0544, "step": 27302 }, { "epoch": 0.4835234894871761, "grad_norm": 0.6871074438095093, "learning_rate": 1.6526222131546505e-05, "loss": 0.0825, "step": 27303 }, { "epoch": 0.4835411990242045, "grad_norm": 0.43001607060432434, "learning_rate": 1.652536622956582e-05, "loss": 0.0702, "step": 27304 }, { "epoch": 0.48355890856123296, "grad_norm": 0.78535395860672, "learning_rate": 1.652451032256683e-05, "loss": 0.1173, "step": 27305 }, { "epoch": 0.48357661809826136, "grad_norm": 1.0589139461517334, "learning_rate": 1.6523654410552356e-05, "loss": 0.0706, "step": 27306 }, { "epoch": 0.4835943276352898, "grad_norm": 0.7756159901618958, "learning_rate": 1.6522798493525212e-05, "loss": 0.0934, "step": 27307 }, { "epoch": 0.4836120371723182, "grad_norm": 0.6027469635009766, "learning_rate": 1.652194257148822e-05, "loss": 0.0838, "step": 27308 }, { "epoch": 0.48362974670934666, "grad_norm": 0.6764542460441589, "learning_rate": 1.652108664444419e-05, "loss": 0.0633, "step": 27309 }, { "epoch": 0.48364745624637506, "grad_norm": 0.7896871566772461, "learning_rate": 1.6520230712395932e-05, "loss": 0.088, "step": 27310 }, { "epoch": 0.4836651657834035, "grad_norm": 0.41995829343795776, "learning_rate": 1.6519374775346274e-05, "loss": 0.0684, "step": 27311 }, { "epoch": 0.4836828753204319, "grad_norm": 0.7915477156639099, "learning_rate": 1.6518518833298024e-05, "loss": 0.0585, "step": 27312 }, { "epoch": 0.48370058485746037, "grad_norm": 0.6345251202583313, "learning_rate": 1.6517662886254004e-05, "loss": 0.0437, "step": 27313 }, { "epoch": 0.48371829439448877, "grad_norm": 0.5779318809509277, "learning_rate": 1.6516806934217022e-05, "loss": 0.0634, "step": 27314 }, { "epoch": 0.4837360039315172, "grad_norm": 0.3246096968650818, "learning_rate": 1.6515950977189903e-05, "loss": 0.077, "step": 27315 }, { "epoch": 0.4837537134685456, "grad_norm": 0.7893155217170715, "learning_rate": 1.6515095015175456e-05, "loss": 0.0921, "step": 27316 }, { "epoch": 0.4837714230055741, "grad_norm": 0.5411520600318909, "learning_rate": 1.6514239048176502e-05, "loss": 0.099, "step": 27317 }, { "epoch": 0.48378913254260253, "grad_norm": 0.5785015225410461, "learning_rate": 1.6513383076195855e-05, "loss": 0.0727, "step": 27318 }, { "epoch": 0.4838068420796309, "grad_norm": 0.6204192638397217, "learning_rate": 1.651252709923633e-05, "loss": 0.0909, "step": 27319 }, { "epoch": 0.4838245516166594, "grad_norm": 0.6987402439117432, "learning_rate": 1.6511671117300738e-05, "loss": 0.0575, "step": 27320 }, { "epoch": 0.4838422611536878, "grad_norm": 0.5374017953872681, "learning_rate": 1.6510815130391905e-05, "loss": 0.0658, "step": 27321 }, { "epoch": 0.48385997069071623, "grad_norm": 0.5738078355789185, "learning_rate": 1.6509959138512646e-05, "loss": 0.0299, "step": 27322 }, { "epoch": 0.48387768022774463, "grad_norm": 0.40326058864593506, "learning_rate": 1.650910314166577e-05, "loss": 0.0487, "step": 27323 }, { "epoch": 0.4838953897647731, "grad_norm": 0.4456145763397217, "learning_rate": 1.6508247139854102e-05, "loss": 0.071, "step": 27324 }, { "epoch": 0.4839130993018015, "grad_norm": 0.48940494656562805, "learning_rate": 1.6507391133080453e-05, "loss": 0.0586, "step": 27325 }, { "epoch": 0.48393080883882994, "grad_norm": 1.014550805091858, "learning_rate": 1.6506535121347635e-05, "loss": 0.0759, "step": 27326 }, { "epoch": 0.48394851837585834, "grad_norm": 0.5735622644424438, "learning_rate": 1.6505679104658472e-05, "loss": 0.058, "step": 27327 }, { "epoch": 0.4839662279128868, "grad_norm": 0.5883028507232666, "learning_rate": 1.6504823083015776e-05, "loss": 0.1114, "step": 27328 }, { "epoch": 0.4839839374499152, "grad_norm": 0.6726921796798706, "learning_rate": 1.6503967056422362e-05, "loss": 0.068, "step": 27329 }, { "epoch": 0.48400164698694365, "grad_norm": 0.8506096005439758, "learning_rate": 1.650311102488105e-05, "loss": 0.1172, "step": 27330 }, { "epoch": 0.4840193565239721, "grad_norm": 0.4061616361141205, "learning_rate": 1.6502254988394656e-05, "loss": 0.0338, "step": 27331 }, { "epoch": 0.4840370660610005, "grad_norm": 0.8825153112411499, "learning_rate": 1.6501398946965998e-05, "loss": 0.1082, "step": 27332 }, { "epoch": 0.48405477559802895, "grad_norm": 0.9356188178062439, "learning_rate": 1.6500542900597885e-05, "loss": 0.0956, "step": 27333 }, { "epoch": 0.48407248513505735, "grad_norm": 0.456503301858902, "learning_rate": 1.649968684929314e-05, "loss": 0.06, "step": 27334 }, { "epoch": 0.4840901946720858, "grad_norm": 0.5788202285766602, "learning_rate": 1.6498830793054574e-05, "loss": 0.0818, "step": 27335 }, { "epoch": 0.4841079042091142, "grad_norm": 1.048267126083374, "learning_rate": 1.6497974731885007e-05, "loss": 0.0717, "step": 27336 }, { "epoch": 0.48412561374614266, "grad_norm": 1.1047568321228027, "learning_rate": 1.6497118665787257e-05, "loss": 0.086, "step": 27337 }, { "epoch": 0.48414332328317106, "grad_norm": 0.38119953870773315, "learning_rate": 1.649626259476414e-05, "loss": 0.0461, "step": 27338 }, { "epoch": 0.4841610328201995, "grad_norm": 0.8126210570335388, "learning_rate": 1.649540651881846e-05, "loss": 0.0787, "step": 27339 }, { "epoch": 0.4841787423572279, "grad_norm": 0.7234535813331604, "learning_rate": 1.6494550437953052e-05, "loss": 0.0491, "step": 27340 }, { "epoch": 0.48419645189425636, "grad_norm": 0.770879864692688, "learning_rate": 1.6493694352170727e-05, "loss": 0.0846, "step": 27341 }, { "epoch": 0.48421416143128476, "grad_norm": 0.8569195866584778, "learning_rate": 1.6492838261474293e-05, "loss": 0.0663, "step": 27342 }, { "epoch": 0.4842318709683132, "grad_norm": 0.49063539505004883, "learning_rate": 1.6491982165866573e-05, "loss": 0.055, "step": 27343 }, { "epoch": 0.4842495805053416, "grad_norm": 0.5693801045417786, "learning_rate": 1.649112606535038e-05, "loss": 0.0501, "step": 27344 }, { "epoch": 0.48426729004237007, "grad_norm": 0.7245118618011475, "learning_rate": 1.649026995992854e-05, "loss": 0.1189, "step": 27345 }, { "epoch": 0.4842849995793985, "grad_norm": 0.6188014149665833, "learning_rate": 1.648941384960386e-05, "loss": 0.0568, "step": 27346 }, { "epoch": 0.4843027091164269, "grad_norm": 0.7414014935493469, "learning_rate": 1.648855773437916e-05, "loss": 0.0827, "step": 27347 }, { "epoch": 0.4843204186534554, "grad_norm": 0.7171038389205933, "learning_rate": 1.6487701614257252e-05, "loss": 0.0715, "step": 27348 }, { "epoch": 0.4843381281904838, "grad_norm": 0.9909749031066895, "learning_rate": 1.6486845489240958e-05, "loss": 0.082, "step": 27349 }, { "epoch": 0.48435583772751223, "grad_norm": 0.4056987762451172, "learning_rate": 1.6485989359333095e-05, "loss": 0.0553, "step": 27350 }, { "epoch": 0.4843735472645406, "grad_norm": 0.7024205923080444, "learning_rate": 1.6485133224536474e-05, "loss": 0.0789, "step": 27351 }, { "epoch": 0.4843912568015691, "grad_norm": 0.598209798336029, "learning_rate": 1.648427708485392e-05, "loss": 0.0808, "step": 27352 }, { "epoch": 0.4844089663385975, "grad_norm": 0.8609903454780579, "learning_rate": 1.6483420940288236e-05, "loss": 0.0866, "step": 27353 }, { "epoch": 0.48442667587562593, "grad_norm": 0.5638741254806519, "learning_rate": 1.648256479084225e-05, "loss": 0.0844, "step": 27354 }, { "epoch": 0.48444438541265433, "grad_norm": 0.5498431324958801, "learning_rate": 1.6481708636518775e-05, "loss": 0.0734, "step": 27355 }, { "epoch": 0.4844620949496828, "grad_norm": 0.7736203074455261, "learning_rate": 1.6480852477320635e-05, "loss": 0.074, "step": 27356 }, { "epoch": 0.4844798044867112, "grad_norm": 0.3603200614452362, "learning_rate": 1.647999631325064e-05, "loss": 0.0593, "step": 27357 }, { "epoch": 0.48449751402373964, "grad_norm": 0.5587388277053833, "learning_rate": 1.6479140144311597e-05, "loss": 0.053, "step": 27358 }, { "epoch": 0.48451522356076804, "grad_norm": 0.8216997981071472, "learning_rate": 1.647828397050634e-05, "loss": 0.1063, "step": 27359 }, { "epoch": 0.4845329330977965, "grad_norm": 0.43131664395332336, "learning_rate": 1.6477427791837675e-05, "loss": 0.0451, "step": 27360 }, { "epoch": 0.48455064263482495, "grad_norm": 0.31187018752098083, "learning_rate": 1.6476571608308425e-05, "loss": 0.0534, "step": 27361 }, { "epoch": 0.48456835217185334, "grad_norm": 0.3004411458969116, "learning_rate": 1.64757154199214e-05, "loss": 0.044, "step": 27362 }, { "epoch": 0.4845860617088818, "grad_norm": 0.5122782588005066, "learning_rate": 1.6474859226679425e-05, "loss": 0.0752, "step": 27363 }, { "epoch": 0.4846037712459102, "grad_norm": 0.40862828493118286, "learning_rate": 1.6474003028585312e-05, "loss": 0.0672, "step": 27364 }, { "epoch": 0.48462148078293865, "grad_norm": 0.8598162531852722, "learning_rate": 1.647314682564187e-05, "loss": 0.0639, "step": 27365 }, { "epoch": 0.48463919031996705, "grad_norm": 0.3934439420700073, "learning_rate": 1.6472290617851935e-05, "loss": 0.0561, "step": 27366 }, { "epoch": 0.4846568998569955, "grad_norm": 0.631430983543396, "learning_rate": 1.6471434405218308e-05, "loss": 0.0574, "step": 27367 }, { "epoch": 0.4846746093940239, "grad_norm": 0.6568973660469055, "learning_rate": 1.6470578187743807e-05, "loss": 0.0658, "step": 27368 }, { "epoch": 0.48469231893105236, "grad_norm": 0.7865110039710999, "learning_rate": 1.6469721965431257e-05, "loss": 0.0991, "step": 27369 }, { "epoch": 0.48471002846808076, "grad_norm": 0.5721496343612671, "learning_rate": 1.646886573828347e-05, "loss": 0.0594, "step": 27370 }, { "epoch": 0.4847277380051092, "grad_norm": 0.6066815257072449, "learning_rate": 1.646800950630326e-05, "loss": 0.076, "step": 27371 }, { "epoch": 0.4847454475421376, "grad_norm": 0.9716998338699341, "learning_rate": 1.6467153269493455e-05, "loss": 0.0839, "step": 27372 }, { "epoch": 0.48476315707916606, "grad_norm": 0.8463623523712158, "learning_rate": 1.6466297027856857e-05, "loss": 0.0663, "step": 27373 }, { "epoch": 0.48478086661619446, "grad_norm": 1.0764415264129639, "learning_rate": 1.6465440781396292e-05, "loss": 0.1175, "step": 27374 }, { "epoch": 0.4847985761532229, "grad_norm": 0.7962493896484375, "learning_rate": 1.646458453011458e-05, "loss": 0.0762, "step": 27375 }, { "epoch": 0.48481628569025137, "grad_norm": 0.6527408957481384, "learning_rate": 1.6463728274014524e-05, "loss": 0.0775, "step": 27376 }, { "epoch": 0.48483399522727977, "grad_norm": 0.5999971032142639, "learning_rate": 1.6462872013098952e-05, "loss": 0.0517, "step": 27377 }, { "epoch": 0.4848517047643082, "grad_norm": 0.7615950107574463, "learning_rate": 1.646201574737068e-05, "loss": 0.0622, "step": 27378 }, { "epoch": 0.4848694143013366, "grad_norm": 0.956815242767334, "learning_rate": 1.6461159476832528e-05, "loss": 0.1044, "step": 27379 }, { "epoch": 0.4848871238383651, "grad_norm": 0.9626910090446472, "learning_rate": 1.646030320148731e-05, "loss": 0.0819, "step": 27380 }, { "epoch": 0.4849048333753935, "grad_norm": 0.6775972843170166, "learning_rate": 1.6459446921337838e-05, "loss": 0.0397, "step": 27381 }, { "epoch": 0.48492254291242193, "grad_norm": 0.5040091872215271, "learning_rate": 1.6458590636386934e-05, "loss": 0.0786, "step": 27382 }, { "epoch": 0.4849402524494503, "grad_norm": 0.9500078558921814, "learning_rate": 1.645773434663741e-05, "loss": 0.0569, "step": 27383 }, { "epoch": 0.4849579619864788, "grad_norm": 0.8303382396697998, "learning_rate": 1.6456878052092094e-05, "loss": 0.0906, "step": 27384 }, { "epoch": 0.4849756715235072, "grad_norm": 0.5902158617973328, "learning_rate": 1.6456021752753796e-05, "loss": 0.0479, "step": 27385 }, { "epoch": 0.48499338106053563, "grad_norm": 0.6204268932342529, "learning_rate": 1.6455165448625334e-05, "loss": 0.0729, "step": 27386 }, { "epoch": 0.48501109059756403, "grad_norm": 0.6415414810180664, "learning_rate": 1.6454309139709513e-05, "loss": 0.0581, "step": 27387 }, { "epoch": 0.4850288001345925, "grad_norm": 0.30213505029678345, "learning_rate": 1.6453452826009176e-05, "loss": 0.0732, "step": 27388 }, { "epoch": 0.4850465096716209, "grad_norm": 0.7169657945632935, "learning_rate": 1.6452596507527127e-05, "loss": 0.0611, "step": 27389 }, { "epoch": 0.48506421920864934, "grad_norm": 0.48501336574554443, "learning_rate": 1.6451740184266173e-05, "loss": 0.0513, "step": 27390 }, { "epoch": 0.4850819287456778, "grad_norm": 0.4912932515144348, "learning_rate": 1.6450883856229145e-05, "loss": 0.0888, "step": 27391 }, { "epoch": 0.4850996382827062, "grad_norm": 0.5763953328132629, "learning_rate": 1.6450027523418854e-05, "loss": 0.0777, "step": 27392 }, { "epoch": 0.48511734781973465, "grad_norm": 1.2601882219314575, "learning_rate": 1.6449171185838126e-05, "loss": 0.0664, "step": 27393 }, { "epoch": 0.48513505735676304, "grad_norm": 0.8344525098800659, "learning_rate": 1.6448314843489762e-05, "loss": 0.0807, "step": 27394 }, { "epoch": 0.4851527668937915, "grad_norm": 0.30187487602233887, "learning_rate": 1.6447458496376597e-05, "loss": 0.0702, "step": 27395 }, { "epoch": 0.4851704764308199, "grad_norm": 0.5035584568977356, "learning_rate": 1.6446602144501434e-05, "loss": 0.1049, "step": 27396 }, { "epoch": 0.48518818596784835, "grad_norm": 0.601791262626648, "learning_rate": 1.6445745787867097e-05, "loss": 0.0685, "step": 27397 }, { "epoch": 0.48520589550487675, "grad_norm": 0.8110393285751343, "learning_rate": 1.6444889426476405e-05, "loss": 0.0845, "step": 27398 }, { "epoch": 0.4852236050419052, "grad_norm": 0.6731463074684143, "learning_rate": 1.6444033060332172e-05, "loss": 0.0748, "step": 27399 }, { "epoch": 0.4852413145789336, "grad_norm": 0.8529390692710876, "learning_rate": 1.644317668943722e-05, "loss": 0.0648, "step": 27400 }, { "epoch": 0.48525902411596206, "grad_norm": 0.6610704064369202, "learning_rate": 1.6442320313794358e-05, "loss": 0.0862, "step": 27401 }, { "epoch": 0.48527673365299046, "grad_norm": 0.9448896646499634, "learning_rate": 1.6441463933406405e-05, "loss": 0.0653, "step": 27402 }, { "epoch": 0.4852944431900189, "grad_norm": 0.9297448396682739, "learning_rate": 1.644060754827619e-05, "loss": 0.089, "step": 27403 }, { "epoch": 0.4853121527270473, "grad_norm": 0.5908681154251099, "learning_rate": 1.6439751158406515e-05, "loss": 0.0903, "step": 27404 }, { "epoch": 0.48532986226407576, "grad_norm": 0.6487104892730713, "learning_rate": 1.6438894763800206e-05, "loss": 0.061, "step": 27405 }, { "epoch": 0.4853475718011042, "grad_norm": 0.6152386665344238, "learning_rate": 1.6438038364460078e-05, "loss": 0.0749, "step": 27406 }, { "epoch": 0.4853652813381326, "grad_norm": 0.6471173167228699, "learning_rate": 1.643718196038895e-05, "loss": 0.0825, "step": 27407 }, { "epoch": 0.48538299087516107, "grad_norm": 0.4202366769313812, "learning_rate": 1.643632555158964e-05, "loss": 0.0686, "step": 27408 }, { "epoch": 0.48540070041218947, "grad_norm": 0.7847133278846741, "learning_rate": 1.6435469138064965e-05, "loss": 0.0644, "step": 27409 }, { "epoch": 0.4854184099492179, "grad_norm": 0.8514401912689209, "learning_rate": 1.643461271981774e-05, "loss": 0.0739, "step": 27410 }, { "epoch": 0.4854361194862463, "grad_norm": 0.7016512155532837, "learning_rate": 1.6433756296850782e-05, "loss": 0.1242, "step": 27411 }, { "epoch": 0.4854538290232748, "grad_norm": 0.7913352847099304, "learning_rate": 1.6432899869166915e-05, "loss": 0.104, "step": 27412 }, { "epoch": 0.4854715385603032, "grad_norm": 0.9501394629478455, "learning_rate": 1.6432043436768947e-05, "loss": 0.0812, "step": 27413 }, { "epoch": 0.4854892480973316, "grad_norm": 0.41702690720558167, "learning_rate": 1.643118699965971e-05, "loss": 0.0562, "step": 27414 }, { "epoch": 0.48550695763436, "grad_norm": 0.5703304409980774, "learning_rate": 1.6430330557842004e-05, "loss": 0.067, "step": 27415 }, { "epoch": 0.4855246671713885, "grad_norm": 0.569579005241394, "learning_rate": 1.6429474111318657e-05, "loss": 0.0635, "step": 27416 }, { "epoch": 0.4855423767084169, "grad_norm": 1.0732970237731934, "learning_rate": 1.6428617660092485e-05, "loss": 0.0677, "step": 27417 }, { "epoch": 0.48556008624544533, "grad_norm": 0.7352405190467834, "learning_rate": 1.642776120416631e-05, "loss": 0.0771, "step": 27418 }, { "epoch": 0.48557779578247373, "grad_norm": 0.6681811809539795, "learning_rate": 1.642690474354294e-05, "loss": 0.0701, "step": 27419 }, { "epoch": 0.4855955053195022, "grad_norm": 0.8152165412902832, "learning_rate": 1.6426048278225196e-05, "loss": 0.0679, "step": 27420 }, { "epoch": 0.48561321485653064, "grad_norm": 1.135938286781311, "learning_rate": 1.6425191808215903e-05, "loss": 0.0666, "step": 27421 }, { "epoch": 0.48563092439355904, "grad_norm": 0.47310853004455566, "learning_rate": 1.642433533351787e-05, "loss": 0.0511, "step": 27422 }, { "epoch": 0.4856486339305875, "grad_norm": 0.7038875818252563, "learning_rate": 1.642347885413392e-05, "loss": 0.0775, "step": 27423 }, { "epoch": 0.4856663434676159, "grad_norm": 0.6016613245010376, "learning_rate": 1.6422622370066866e-05, "loss": 0.0811, "step": 27424 }, { "epoch": 0.48568405300464434, "grad_norm": 0.6800801753997803, "learning_rate": 1.6421765881319527e-05, "loss": 0.0504, "step": 27425 }, { "epoch": 0.48570176254167274, "grad_norm": 0.4357990622520447, "learning_rate": 1.6420909387894725e-05, "loss": 0.0671, "step": 27426 }, { "epoch": 0.4857194720787012, "grad_norm": 0.3489891588687897, "learning_rate": 1.6420052889795274e-05, "loss": 0.0619, "step": 27427 }, { "epoch": 0.4857371816157296, "grad_norm": 0.5910683274269104, "learning_rate": 1.6419196387023993e-05, "loss": 0.0454, "step": 27428 }, { "epoch": 0.48575489115275805, "grad_norm": 0.808168888092041, "learning_rate": 1.64183398795837e-05, "loss": 0.0604, "step": 27429 }, { "epoch": 0.48577260068978645, "grad_norm": 0.45836693048477173, "learning_rate": 1.6417483367477208e-05, "loss": 0.0739, "step": 27430 }, { "epoch": 0.4857903102268149, "grad_norm": 0.8545101284980774, "learning_rate": 1.641662685070734e-05, "loss": 0.0777, "step": 27431 }, { "epoch": 0.4858080197638433, "grad_norm": 0.6932527422904968, "learning_rate": 1.641577032927692e-05, "loss": 0.0815, "step": 27432 }, { "epoch": 0.48582572930087176, "grad_norm": 0.18869100511074066, "learning_rate": 1.6414913803188753e-05, "loss": 0.0512, "step": 27433 }, { "epoch": 0.48584343883790015, "grad_norm": 1.1434516906738281, "learning_rate": 1.6414057272445666e-05, "loss": 0.0817, "step": 27434 }, { "epoch": 0.4858611483749286, "grad_norm": 0.484397292137146, "learning_rate": 1.641320073705047e-05, "loss": 0.0716, "step": 27435 }, { "epoch": 0.48587885791195706, "grad_norm": 0.7118412852287292, "learning_rate": 1.6412344197005988e-05, "loss": 0.0793, "step": 27436 }, { "epoch": 0.48589656744898546, "grad_norm": 0.40153583884239197, "learning_rate": 1.641148765231504e-05, "loss": 0.0651, "step": 27437 }, { "epoch": 0.4859142769860139, "grad_norm": 0.5581051111221313, "learning_rate": 1.6410631102980433e-05, "loss": 0.0457, "step": 27438 }, { "epoch": 0.4859319865230423, "grad_norm": 0.5626887679100037, "learning_rate": 1.6409774549004997e-05, "loss": 0.1004, "step": 27439 }, { "epoch": 0.48594969606007077, "grad_norm": 0.6237019896507263, "learning_rate": 1.640891799039154e-05, "loss": 0.1009, "step": 27440 }, { "epoch": 0.48596740559709917, "grad_norm": 0.7378831505775452, "learning_rate": 1.6408061427142896e-05, "loss": 0.0682, "step": 27441 }, { "epoch": 0.4859851151341276, "grad_norm": 0.7859798073768616, "learning_rate": 1.6407204859261862e-05, "loss": 0.0683, "step": 27442 }, { "epoch": 0.486002824671156, "grad_norm": 0.4997950792312622, "learning_rate": 1.6406348286751275e-05, "loss": 0.0642, "step": 27443 }, { "epoch": 0.4860205342081845, "grad_norm": 0.8824084401130676, "learning_rate": 1.6405491709613936e-05, "loss": 0.1034, "step": 27444 }, { "epoch": 0.4860382437452129, "grad_norm": 0.6591848134994507, "learning_rate": 1.6404635127852676e-05, "loss": 0.0606, "step": 27445 }, { "epoch": 0.4860559532822413, "grad_norm": 0.7789856195449829, "learning_rate": 1.640377854147031e-05, "loss": 0.0482, "step": 27446 }, { "epoch": 0.4860736628192697, "grad_norm": 0.5861160755157471, "learning_rate": 1.640292195046965e-05, "loss": 0.0638, "step": 27447 }, { "epoch": 0.4860913723562982, "grad_norm": 0.7491268515586853, "learning_rate": 1.6402065354853524e-05, "loss": 0.0794, "step": 27448 }, { "epoch": 0.4861090818933266, "grad_norm": 0.47546374797821045, "learning_rate": 1.640120875462474e-05, "loss": 0.086, "step": 27449 }, { "epoch": 0.48612679143035503, "grad_norm": 0.5552485585212708, "learning_rate": 1.6400352149786123e-05, "loss": 0.0725, "step": 27450 }, { "epoch": 0.4861445009673835, "grad_norm": 0.6226272583007812, "learning_rate": 1.6399495540340492e-05, "loss": 0.0703, "step": 27451 }, { "epoch": 0.4861622105044119, "grad_norm": 0.55886310338974, "learning_rate": 1.6398638926290657e-05, "loss": 0.0699, "step": 27452 }, { "epoch": 0.48617992004144034, "grad_norm": 0.6915693879127502, "learning_rate": 1.639778230763944e-05, "loss": 0.0848, "step": 27453 }, { "epoch": 0.48619762957846874, "grad_norm": 0.7897887825965881, "learning_rate": 1.6396925684389666e-05, "loss": 0.0924, "step": 27454 }, { "epoch": 0.4862153391154972, "grad_norm": 0.5618153810501099, "learning_rate": 1.6396069056544148e-05, "loss": 0.0696, "step": 27455 }, { "epoch": 0.4862330486525256, "grad_norm": 0.669077455997467, "learning_rate": 1.6395212424105698e-05, "loss": 0.0894, "step": 27456 }, { "epoch": 0.48625075818955404, "grad_norm": 0.9112212061882019, "learning_rate": 1.6394355787077147e-05, "loss": 0.0948, "step": 27457 }, { "epoch": 0.48626846772658244, "grad_norm": 0.9257872104644775, "learning_rate": 1.6393499145461302e-05, "loss": 0.0748, "step": 27458 }, { "epoch": 0.4862861772636109, "grad_norm": 0.5277848243713379, "learning_rate": 1.6392642499260987e-05, "loss": 0.0531, "step": 27459 }, { "epoch": 0.4863038868006393, "grad_norm": 0.4167046844959259, "learning_rate": 1.639178584847902e-05, "loss": 0.054, "step": 27460 }, { "epoch": 0.48632159633766775, "grad_norm": 0.9635116457939148, "learning_rate": 1.6390929193118218e-05, "loss": 0.0596, "step": 27461 }, { "epoch": 0.48633930587469615, "grad_norm": 0.7979294657707214, "learning_rate": 1.6390072533181403e-05, "loss": 0.062, "step": 27462 }, { "epoch": 0.4863570154117246, "grad_norm": 0.5383100509643555, "learning_rate": 1.6389215868671383e-05, "loss": 0.1172, "step": 27463 }, { "epoch": 0.486374724948753, "grad_norm": 0.9254360795021057, "learning_rate": 1.6388359199590986e-05, "loss": 0.087, "step": 27464 }, { "epoch": 0.48639243448578146, "grad_norm": 0.6422131657600403, "learning_rate": 1.6387502525943025e-05, "loss": 0.0855, "step": 27465 }, { "epoch": 0.4864101440228099, "grad_norm": 0.7672243118286133, "learning_rate": 1.6386645847730326e-05, "loss": 0.0836, "step": 27466 }, { "epoch": 0.4864278535598383, "grad_norm": 0.7983369827270508, "learning_rate": 1.6385789164955704e-05, "loss": 0.0633, "step": 27467 }, { "epoch": 0.48644556309686676, "grad_norm": 0.7756997346878052, "learning_rate": 1.6384932477621967e-05, "loss": 0.0765, "step": 27468 }, { "epoch": 0.48646327263389516, "grad_norm": 0.3901691734790802, "learning_rate": 1.6384075785731953e-05, "loss": 0.0799, "step": 27469 }, { "epoch": 0.4864809821709236, "grad_norm": 0.3382765054702759, "learning_rate": 1.638321908928846e-05, "loss": 0.0785, "step": 27470 }, { "epoch": 0.486498691707952, "grad_norm": 1.2020002603530884, "learning_rate": 1.6382362388294323e-05, "loss": 0.0789, "step": 27471 }, { "epoch": 0.48651640124498047, "grad_norm": 0.7809545993804932, "learning_rate": 1.6381505682752353e-05, "loss": 0.0964, "step": 27472 }, { "epoch": 0.48653411078200887, "grad_norm": 0.6314139366149902, "learning_rate": 1.6380648972665364e-05, "loss": 0.0642, "step": 27473 }, { "epoch": 0.4865518203190373, "grad_norm": 0.9602482914924622, "learning_rate": 1.6379792258036183e-05, "loss": 0.1027, "step": 27474 }, { "epoch": 0.4865695298560657, "grad_norm": 0.5703184008598328, "learning_rate": 1.6378935538867625e-05, "loss": 0.0589, "step": 27475 }, { "epoch": 0.4865872393930942, "grad_norm": 1.1472220420837402, "learning_rate": 1.637807881516251e-05, "loss": 0.0978, "step": 27476 }, { "epoch": 0.48660494893012257, "grad_norm": 0.743691623210907, "learning_rate": 1.6377222086923653e-05, "loss": 0.096, "step": 27477 }, { "epoch": 0.486622658467151, "grad_norm": 0.7502194046974182, "learning_rate": 1.6376365354153873e-05, "loss": 0.0923, "step": 27478 }, { "epoch": 0.4866403680041794, "grad_norm": 0.6175882816314697, "learning_rate": 1.637550861685599e-05, "loss": 0.0698, "step": 27479 }, { "epoch": 0.4866580775412079, "grad_norm": 0.5884175896644592, "learning_rate": 1.6374651875032828e-05, "loss": 0.0618, "step": 27480 }, { "epoch": 0.48667578707823633, "grad_norm": 0.8134057521820068, "learning_rate": 1.6373795128687197e-05, "loss": 0.1005, "step": 27481 }, { "epoch": 0.48669349661526473, "grad_norm": 0.33198362588882446, "learning_rate": 1.637293837782192e-05, "loss": 0.0448, "step": 27482 }, { "epoch": 0.4867112061522932, "grad_norm": 0.7484838366508484, "learning_rate": 1.6372081622439813e-05, "loss": 0.0793, "step": 27483 }, { "epoch": 0.4867289156893216, "grad_norm": 0.8398594260215759, "learning_rate": 1.63712248625437e-05, "loss": 0.0914, "step": 27484 }, { "epoch": 0.48674662522635004, "grad_norm": 1.123359203338623, "learning_rate": 1.637036809813639e-05, "loss": 0.1073, "step": 27485 }, { "epoch": 0.48676433476337844, "grad_norm": 0.7084863185882568, "learning_rate": 1.636951132922071e-05, "loss": 0.065, "step": 27486 }, { "epoch": 0.4867820443004069, "grad_norm": 0.42855989933013916, "learning_rate": 1.6368654555799478e-05, "loss": 0.0738, "step": 27487 }, { "epoch": 0.4867997538374353, "grad_norm": 0.8603323698043823, "learning_rate": 1.6367797777875507e-05, "loss": 0.0684, "step": 27488 }, { "epoch": 0.48681746337446374, "grad_norm": 0.6296412944793701, "learning_rate": 1.6366940995451626e-05, "loss": 0.0891, "step": 27489 }, { "epoch": 0.48683517291149214, "grad_norm": 0.6284468770027161, "learning_rate": 1.6366084208530643e-05, "loss": 0.0668, "step": 27490 }, { "epoch": 0.4868528824485206, "grad_norm": 0.7790712714195251, "learning_rate": 1.6365227417115382e-05, "loss": 0.0735, "step": 27491 }, { "epoch": 0.486870591985549, "grad_norm": 0.6303753852844238, "learning_rate": 1.636437062120866e-05, "loss": 0.0879, "step": 27492 }, { "epoch": 0.48688830152257745, "grad_norm": 0.570819079875946, "learning_rate": 1.63635138208133e-05, "loss": 0.0587, "step": 27493 }, { "epoch": 0.48690601105960585, "grad_norm": 0.6424683332443237, "learning_rate": 1.6362657015932115e-05, "loss": 0.0714, "step": 27494 }, { "epoch": 0.4869237205966343, "grad_norm": 0.9144212603569031, "learning_rate": 1.636180020656792e-05, "loss": 0.1053, "step": 27495 }, { "epoch": 0.48694143013366276, "grad_norm": 0.7441580295562744, "learning_rate": 1.6360943392723552e-05, "loss": 0.1032, "step": 27496 }, { "epoch": 0.48695913967069115, "grad_norm": 0.4702499210834503, "learning_rate": 1.6360086574401808e-05, "loss": 0.0708, "step": 27497 }, { "epoch": 0.4869768492077196, "grad_norm": 0.4293000102043152, "learning_rate": 1.635922975160552e-05, "loss": 0.0588, "step": 27498 }, { "epoch": 0.486994558744748, "grad_norm": 0.6638392210006714, "learning_rate": 1.6358372924337507e-05, "loss": 0.0564, "step": 27499 }, { "epoch": 0.48701226828177646, "grad_norm": 0.6244531869888306, "learning_rate": 1.6357516092600584e-05, "loss": 0.076, "step": 27500 }, { "epoch": 0.48702997781880486, "grad_norm": 0.7258561253547668, "learning_rate": 1.6356659256397564e-05, "loss": 0.0842, "step": 27501 }, { "epoch": 0.4870476873558333, "grad_norm": 0.39507585763931274, "learning_rate": 1.6355802415731274e-05, "loss": 0.0664, "step": 27502 }, { "epoch": 0.4870653968928617, "grad_norm": 0.6142821907997131, "learning_rate": 1.6354945570604536e-05, "loss": 0.074, "step": 27503 }, { "epoch": 0.48708310642989017, "grad_norm": 0.5934224724769592, "learning_rate": 1.635408872102016e-05, "loss": 0.0821, "step": 27504 }, { "epoch": 0.48710081596691857, "grad_norm": 0.6223453879356384, "learning_rate": 1.6353231866980976e-05, "loss": 0.0578, "step": 27505 }, { "epoch": 0.487118525503947, "grad_norm": 0.6678272485733032, "learning_rate": 1.635237500848979e-05, "loss": 0.0673, "step": 27506 }, { "epoch": 0.4871362350409754, "grad_norm": 0.5412026643753052, "learning_rate": 1.6351518145549426e-05, "loss": 0.059, "step": 27507 }, { "epoch": 0.4871539445780039, "grad_norm": 1.5634382963180542, "learning_rate": 1.6350661278162705e-05, "loss": 0.1273, "step": 27508 }, { "epoch": 0.48717165411503227, "grad_norm": 0.46870383620262146, "learning_rate": 1.6349804406332447e-05, "loss": 0.0811, "step": 27509 }, { "epoch": 0.4871893636520607, "grad_norm": 0.5199695825576782, "learning_rate": 1.6348947530061472e-05, "loss": 0.065, "step": 27510 }, { "epoch": 0.4872070731890892, "grad_norm": 0.6138614416122437, "learning_rate": 1.6348090649352587e-05, "loss": 0.0653, "step": 27511 }, { "epoch": 0.4872247827261176, "grad_norm": 0.7773814797401428, "learning_rate": 1.6347233764208624e-05, "loss": 0.0511, "step": 27512 }, { "epoch": 0.48724249226314603, "grad_norm": 0.6761115789413452, "learning_rate": 1.6346376874632402e-05, "loss": 0.0735, "step": 27513 }, { "epoch": 0.48726020180017443, "grad_norm": 0.5066881775856018, "learning_rate": 1.6345519980626736e-05, "loss": 0.0731, "step": 27514 }, { "epoch": 0.4872779113372029, "grad_norm": 0.3955516517162323, "learning_rate": 1.634466308219444e-05, "loss": 0.0974, "step": 27515 }, { "epoch": 0.4872956208742313, "grad_norm": 0.667625904083252, "learning_rate": 1.634380617933834e-05, "loss": 0.0822, "step": 27516 }, { "epoch": 0.48731333041125974, "grad_norm": 0.7161039710044861, "learning_rate": 1.6342949272061256e-05, "loss": 0.0833, "step": 27517 }, { "epoch": 0.48733103994828814, "grad_norm": 0.6980948448181152, "learning_rate": 1.6342092360366005e-05, "loss": 0.1006, "step": 27518 }, { "epoch": 0.4873487494853166, "grad_norm": 0.8524110317230225, "learning_rate": 1.6341235444255406e-05, "loss": 0.0969, "step": 27519 }, { "epoch": 0.487366459022345, "grad_norm": 0.6888840198516846, "learning_rate": 1.6340378523732276e-05, "loss": 0.0832, "step": 27520 }, { "epoch": 0.48738416855937344, "grad_norm": 0.7613381147384644, "learning_rate": 1.6339521598799435e-05, "loss": 0.0833, "step": 27521 }, { "epoch": 0.48740187809640184, "grad_norm": 1.022547960281372, "learning_rate": 1.6338664669459708e-05, "loss": 0.091, "step": 27522 }, { "epoch": 0.4874195876334303, "grad_norm": 0.6387839913368225, "learning_rate": 1.63378077357159e-05, "loss": 0.0393, "step": 27523 }, { "epoch": 0.4874372971704587, "grad_norm": 0.5430598258972168, "learning_rate": 1.633695079757085e-05, "loss": 0.0804, "step": 27524 }, { "epoch": 0.48745500670748715, "grad_norm": 0.5687907338142395, "learning_rate": 1.6336093855027368e-05, "loss": 0.089, "step": 27525 }, { "epoch": 0.4874727162445156, "grad_norm": 0.4777176082134247, "learning_rate": 1.6335236908088262e-05, "loss": 0.0383, "step": 27526 }, { "epoch": 0.487490425781544, "grad_norm": 0.9891697764396667, "learning_rate": 1.633437995675637e-05, "loss": 0.0952, "step": 27527 }, { "epoch": 0.48750813531857246, "grad_norm": 0.5524129867553711, "learning_rate": 1.63335230010345e-05, "loss": 0.063, "step": 27528 }, { "epoch": 0.48752584485560085, "grad_norm": 0.5028034448623657, "learning_rate": 1.6332666040925475e-05, "loss": 0.0618, "step": 27529 }, { "epoch": 0.4875435543926293, "grad_norm": 0.46681198477745056, "learning_rate": 1.6331809076432113e-05, "loss": 0.0716, "step": 27530 }, { "epoch": 0.4875612639296577, "grad_norm": 0.8360635042190552, "learning_rate": 1.6330952107557235e-05, "loss": 0.0831, "step": 27531 }, { "epoch": 0.48757897346668616, "grad_norm": 0.4766017198562622, "learning_rate": 1.6330095134303658e-05, "loss": 0.0798, "step": 27532 }, { "epoch": 0.48759668300371456, "grad_norm": 0.7773292064666748, "learning_rate": 1.6329238156674207e-05, "loss": 0.0591, "step": 27533 }, { "epoch": 0.487614392540743, "grad_norm": 0.5254559516906738, "learning_rate": 1.632838117467169e-05, "loss": 0.0532, "step": 27534 }, { "epoch": 0.4876321020777714, "grad_norm": 0.6315693259239197, "learning_rate": 1.6327524188298936e-05, "loss": 0.0705, "step": 27535 }, { "epoch": 0.48764981161479987, "grad_norm": 0.9964125752449036, "learning_rate": 1.632666719755876e-05, "loss": 0.0769, "step": 27536 }, { "epoch": 0.48766752115182826, "grad_norm": 0.8953366279602051, "learning_rate": 1.632581020245399e-05, "loss": 0.0841, "step": 27537 }, { "epoch": 0.4876852306888567, "grad_norm": 0.7761960625648499, "learning_rate": 1.6324953202987433e-05, "loss": 0.0806, "step": 27538 }, { "epoch": 0.4877029402258851, "grad_norm": 0.8162235021591187, "learning_rate": 1.6324096199161916e-05, "loss": 0.0656, "step": 27539 }, { "epoch": 0.48772064976291357, "grad_norm": 0.5161719918251038, "learning_rate": 1.632323919098025e-05, "loss": 0.0536, "step": 27540 }, { "epoch": 0.487738359299942, "grad_norm": 0.7408279180526733, "learning_rate": 1.6322382178445267e-05, "loss": 0.0858, "step": 27541 }, { "epoch": 0.4877560688369704, "grad_norm": 0.6816580295562744, "learning_rate": 1.632152516155978e-05, "loss": 0.0671, "step": 27542 }, { "epoch": 0.4877737783739989, "grad_norm": 0.6649735569953918, "learning_rate": 1.6320668140326607e-05, "loss": 0.0764, "step": 27543 }, { "epoch": 0.4877914879110273, "grad_norm": 0.6564404964447021, "learning_rate": 1.631981111474857e-05, "loss": 0.0838, "step": 27544 }, { "epoch": 0.48780919744805573, "grad_norm": 0.6088340878486633, "learning_rate": 1.6318954084828488e-05, "loss": 0.0753, "step": 27545 }, { "epoch": 0.48782690698508413, "grad_norm": 0.5057749152183533, "learning_rate": 1.631809705056918e-05, "loss": 0.0572, "step": 27546 }, { "epoch": 0.4878446165221126, "grad_norm": 0.8144289255142212, "learning_rate": 1.631724001197347e-05, "loss": 0.1163, "step": 27547 }, { "epoch": 0.487862326059141, "grad_norm": 0.5609707236289978, "learning_rate": 1.631638296904417e-05, "loss": 0.0911, "step": 27548 }, { "epoch": 0.48788003559616944, "grad_norm": 0.863822877407074, "learning_rate": 1.63155259217841e-05, "loss": 0.0673, "step": 27549 }, { "epoch": 0.48789774513319784, "grad_norm": 0.3849359452724457, "learning_rate": 1.6314668870196087e-05, "loss": 0.0577, "step": 27550 }, { "epoch": 0.4879154546702263, "grad_norm": 0.9487938284873962, "learning_rate": 1.6313811814282947e-05, "loss": 0.0568, "step": 27551 }, { "epoch": 0.4879331642072547, "grad_norm": 0.6597834229469299, "learning_rate": 1.63129547540475e-05, "loss": 0.0594, "step": 27552 }, { "epoch": 0.48795087374428314, "grad_norm": 0.6249271631240845, "learning_rate": 1.6312097689492562e-05, "loss": 0.0684, "step": 27553 }, { "epoch": 0.48796858328131154, "grad_norm": 0.7607535719871521, "learning_rate": 1.6311240620620956e-05, "loss": 0.0739, "step": 27554 }, { "epoch": 0.48798629281834, "grad_norm": 0.9077820777893066, "learning_rate": 1.63103835474355e-05, "loss": 0.0735, "step": 27555 }, { "epoch": 0.48800400235536845, "grad_norm": 0.5142853856086731, "learning_rate": 1.6309526469939017e-05, "loss": 0.0512, "step": 27556 }, { "epoch": 0.48802171189239685, "grad_norm": 0.6094696521759033, "learning_rate": 1.6308669388134322e-05, "loss": 0.0585, "step": 27557 }, { "epoch": 0.4880394214294253, "grad_norm": 0.8112682104110718, "learning_rate": 1.6307812302024237e-05, "loss": 0.0727, "step": 27558 }, { "epoch": 0.4880571309664537, "grad_norm": 0.3715015947818756, "learning_rate": 1.630695521161158e-05, "loss": 0.0871, "step": 27559 }, { "epoch": 0.48807484050348215, "grad_norm": 0.597724437713623, "learning_rate": 1.6306098116899177e-05, "loss": 0.0704, "step": 27560 }, { "epoch": 0.48809255004051055, "grad_norm": 0.6787909865379333, "learning_rate": 1.630524101788984e-05, "loss": 0.1248, "step": 27561 }, { "epoch": 0.488110259577539, "grad_norm": 0.5849855542182922, "learning_rate": 1.6304383914586396e-05, "loss": 0.0492, "step": 27562 }, { "epoch": 0.4881279691145674, "grad_norm": 0.6308375000953674, "learning_rate": 1.6303526806991655e-05, "loss": 0.0654, "step": 27563 }, { "epoch": 0.48814567865159586, "grad_norm": 0.8198156952857971, "learning_rate": 1.6302669695108448e-05, "loss": 0.0551, "step": 27564 }, { "epoch": 0.48816338818862426, "grad_norm": 0.6673410534858704, "learning_rate": 1.630181257893959e-05, "loss": 0.0448, "step": 27565 }, { "epoch": 0.4881810977256527, "grad_norm": 0.616736888885498, "learning_rate": 1.6300955458487895e-05, "loss": 0.0703, "step": 27566 }, { "epoch": 0.4881988072626811, "grad_norm": 0.6630985736846924, "learning_rate": 1.6300098333756194e-05, "loss": 0.0538, "step": 27567 }, { "epoch": 0.48821651679970957, "grad_norm": 0.8797993063926697, "learning_rate": 1.6299241204747295e-05, "loss": 0.056, "step": 27568 }, { "epoch": 0.48823422633673796, "grad_norm": 0.7063300013542175, "learning_rate": 1.6298384071464027e-05, "loss": 0.0721, "step": 27569 }, { "epoch": 0.4882519358737664, "grad_norm": 0.7306681275367737, "learning_rate": 1.629752693390921e-05, "loss": 0.0669, "step": 27570 }, { "epoch": 0.4882696454107949, "grad_norm": 0.4179780185222626, "learning_rate": 1.6296669792085657e-05, "loss": 0.0823, "step": 27571 }, { "epoch": 0.48828735494782327, "grad_norm": 0.7019184827804565, "learning_rate": 1.6295812645996193e-05, "loss": 0.0505, "step": 27572 }, { "epoch": 0.4883050644848517, "grad_norm": 0.6435315608978271, "learning_rate": 1.6294955495643632e-05, "loss": 0.1055, "step": 27573 }, { "epoch": 0.4883227740218801, "grad_norm": 0.6007624268531799, "learning_rate": 1.62940983410308e-05, "loss": 0.0644, "step": 27574 }, { "epoch": 0.4883404835589086, "grad_norm": 0.38777706027030945, "learning_rate": 1.6293241182160518e-05, "loss": 0.0461, "step": 27575 }, { "epoch": 0.488358193095937, "grad_norm": 0.8788277506828308, "learning_rate": 1.62923840190356e-05, "loss": 0.0586, "step": 27576 }, { "epoch": 0.48837590263296543, "grad_norm": 0.6856840252876282, "learning_rate": 1.6291526851658875e-05, "loss": 0.0686, "step": 27577 }, { "epoch": 0.48839361216999383, "grad_norm": 0.5067330598831177, "learning_rate": 1.6290669680033153e-05, "loss": 0.0792, "step": 27578 }, { "epoch": 0.4884113217070223, "grad_norm": 0.43703773617744446, "learning_rate": 1.628981250416126e-05, "loss": 0.0875, "step": 27579 }, { "epoch": 0.4884290312440507, "grad_norm": 0.37100496888160706, "learning_rate": 1.6288955324046013e-05, "loss": 0.0379, "step": 27580 }, { "epoch": 0.48844674078107914, "grad_norm": 0.5431103110313416, "learning_rate": 1.6288098139690233e-05, "loss": 0.053, "step": 27581 }, { "epoch": 0.48846445031810753, "grad_norm": 0.7531404495239258, "learning_rate": 1.628724095109674e-05, "loss": 0.0782, "step": 27582 }, { "epoch": 0.488482159855136, "grad_norm": 1.0317524671554565, "learning_rate": 1.6286383758268354e-05, "loss": 0.1128, "step": 27583 }, { "epoch": 0.48849986939216444, "grad_norm": 0.7604717016220093, "learning_rate": 1.62855265612079e-05, "loss": 0.0694, "step": 27584 }, { "epoch": 0.48851757892919284, "grad_norm": 0.7950616478919983, "learning_rate": 1.6284669359918193e-05, "loss": 0.1052, "step": 27585 }, { "epoch": 0.4885352884662213, "grad_norm": 0.6602894067764282, "learning_rate": 1.6283812154402052e-05, "loss": 0.0757, "step": 27586 }, { "epoch": 0.4885529980032497, "grad_norm": 0.7066982388496399, "learning_rate": 1.6282954944662297e-05, "loss": 0.0688, "step": 27587 }, { "epoch": 0.48857070754027815, "grad_norm": 0.5380708575248718, "learning_rate": 1.628209773070175e-05, "loss": 0.0657, "step": 27588 }, { "epoch": 0.48858841707730655, "grad_norm": 0.7033549547195435, "learning_rate": 1.628124051252323e-05, "loss": 0.0797, "step": 27589 }, { "epoch": 0.488606126614335, "grad_norm": 0.6583829522132874, "learning_rate": 1.6280383290129565e-05, "loss": 0.0886, "step": 27590 }, { "epoch": 0.4886238361513634, "grad_norm": 0.5161586999893188, "learning_rate": 1.627952606352356e-05, "loss": 0.0585, "step": 27591 }, { "epoch": 0.48864154568839185, "grad_norm": 0.5658882260322571, "learning_rate": 1.627866883270805e-05, "loss": 0.0677, "step": 27592 }, { "epoch": 0.48865925522542025, "grad_norm": 0.6509019732475281, "learning_rate": 1.627781159768585e-05, "loss": 0.0995, "step": 27593 }, { "epoch": 0.4886769647624487, "grad_norm": 1.1162407398223877, "learning_rate": 1.6276954358459774e-05, "loss": 0.0951, "step": 27594 }, { "epoch": 0.4886946742994771, "grad_norm": 0.7607938051223755, "learning_rate": 1.6276097115032648e-05, "loss": 0.094, "step": 27595 }, { "epoch": 0.48871238383650556, "grad_norm": 0.7316312789916992, "learning_rate": 1.6275239867407294e-05, "loss": 0.0763, "step": 27596 }, { "epoch": 0.48873009337353396, "grad_norm": 0.9340102076530457, "learning_rate": 1.6274382615586526e-05, "loss": 0.082, "step": 27597 }, { "epoch": 0.4887478029105624, "grad_norm": 0.5817422270774841, "learning_rate": 1.627352535957317e-05, "loss": 0.0589, "step": 27598 }, { "epoch": 0.48876551244759087, "grad_norm": 0.3552548885345459, "learning_rate": 1.6272668099370046e-05, "loss": 0.0756, "step": 27599 }, { "epoch": 0.48878322198461926, "grad_norm": 0.8418087363243103, "learning_rate": 1.6271810834979973e-05, "loss": 0.0978, "step": 27600 }, { "epoch": 0.4888009315216477, "grad_norm": 0.6970691084861755, "learning_rate": 1.627095356640577e-05, "loss": 0.0715, "step": 27601 }, { "epoch": 0.4888186410586761, "grad_norm": 1.040110468864441, "learning_rate": 1.6270096293650257e-05, "loss": 0.071, "step": 27602 }, { "epoch": 0.48883635059570457, "grad_norm": 0.5600712895393372, "learning_rate": 1.6269239016716256e-05, "loss": 0.0758, "step": 27603 }, { "epoch": 0.48885406013273297, "grad_norm": 1.1971548795700073, "learning_rate": 1.6268381735606593e-05, "loss": 0.0851, "step": 27604 }, { "epoch": 0.4888717696697614, "grad_norm": 0.5816879868507385, "learning_rate": 1.6267524450324072e-05, "loss": 0.0762, "step": 27605 }, { "epoch": 0.4888894792067898, "grad_norm": 0.6109853386878967, "learning_rate": 1.6266667160871527e-05, "loss": 0.0992, "step": 27606 }, { "epoch": 0.4889071887438183, "grad_norm": 0.4943143129348755, "learning_rate": 1.626580986725178e-05, "loss": 0.0506, "step": 27607 }, { "epoch": 0.4889248982808467, "grad_norm": 0.7950044870376587, "learning_rate": 1.6264952569467646e-05, "loss": 0.0873, "step": 27608 }, { "epoch": 0.48894260781787513, "grad_norm": 0.7450976967811584, "learning_rate": 1.6264095267521945e-05, "loss": 0.1024, "step": 27609 }, { "epoch": 0.48896031735490353, "grad_norm": 0.5821112990379333, "learning_rate": 1.6263237961417494e-05, "loss": 0.074, "step": 27610 }, { "epoch": 0.488978026891932, "grad_norm": 0.8079614639282227, "learning_rate": 1.6262380651157124e-05, "loss": 0.0747, "step": 27611 }, { "epoch": 0.4889957364289604, "grad_norm": 0.5720515251159668, "learning_rate": 1.6261523336743643e-05, "loss": 0.0901, "step": 27612 }, { "epoch": 0.48901344596598884, "grad_norm": 0.8765673041343689, "learning_rate": 1.626066601817988e-05, "loss": 0.0638, "step": 27613 }, { "epoch": 0.4890311555030173, "grad_norm": 0.782062828540802, "learning_rate": 1.6259808695468654e-05, "loss": 0.0667, "step": 27614 }, { "epoch": 0.4890488650400457, "grad_norm": 0.407233327627182, "learning_rate": 1.6258951368612788e-05, "loss": 0.0579, "step": 27615 }, { "epoch": 0.48906657457707414, "grad_norm": 0.8681185245513916, "learning_rate": 1.6258094037615096e-05, "loss": 0.0908, "step": 27616 }, { "epoch": 0.48908428411410254, "grad_norm": 0.623820424079895, "learning_rate": 1.62572367024784e-05, "loss": 0.0757, "step": 27617 }, { "epoch": 0.489101993651131, "grad_norm": 0.47794073820114136, "learning_rate": 1.6256379363205527e-05, "loss": 0.0774, "step": 27618 }, { "epoch": 0.4891197031881594, "grad_norm": 0.4844408333301544, "learning_rate": 1.625552201979929e-05, "loss": 0.0659, "step": 27619 }, { "epoch": 0.48913741272518785, "grad_norm": 0.5752267837524414, "learning_rate": 1.625466467226252e-05, "loss": 0.0889, "step": 27620 }, { "epoch": 0.48915512226221625, "grad_norm": 0.9663417339324951, "learning_rate": 1.6253807320598016e-05, "loss": 0.1113, "step": 27621 }, { "epoch": 0.4891728317992447, "grad_norm": 0.8148195743560791, "learning_rate": 1.6252949964808622e-05, "loss": 0.0831, "step": 27622 }, { "epoch": 0.4891905413362731, "grad_norm": 0.6664634943008423, "learning_rate": 1.6252092604897146e-05, "loss": 0.0845, "step": 27623 }, { "epoch": 0.48920825087330155, "grad_norm": 1.3046014308929443, "learning_rate": 1.6251235240866416e-05, "loss": 0.1154, "step": 27624 }, { "epoch": 0.48922596041032995, "grad_norm": 0.7410134673118591, "learning_rate": 1.6250377872719245e-05, "loss": 0.0707, "step": 27625 }, { "epoch": 0.4892436699473584, "grad_norm": 0.5989567637443542, "learning_rate": 1.6249520500458455e-05, "loss": 0.0783, "step": 27626 }, { "epoch": 0.4892613794843868, "grad_norm": 0.7272923588752747, "learning_rate": 1.624866312408688e-05, "loss": 0.094, "step": 27627 }, { "epoch": 0.48927908902141526, "grad_norm": 0.570642352104187, "learning_rate": 1.6247805743607318e-05, "loss": 0.093, "step": 27628 }, { "epoch": 0.4892967985584437, "grad_norm": 0.578132688999176, "learning_rate": 1.6246948359022606e-05, "loss": 0.0559, "step": 27629 }, { "epoch": 0.4893145080954721, "grad_norm": 0.802495539188385, "learning_rate": 1.6246090970335557e-05, "loss": 0.0792, "step": 27630 }, { "epoch": 0.48933221763250057, "grad_norm": 0.4588949382305145, "learning_rate": 1.6245233577548997e-05, "loss": 0.0759, "step": 27631 }, { "epoch": 0.48934992716952896, "grad_norm": 0.6153720617294312, "learning_rate": 1.6244376180665745e-05, "loss": 0.0528, "step": 27632 }, { "epoch": 0.4893676367065574, "grad_norm": 0.6277512311935425, "learning_rate": 1.624351877968862e-05, "loss": 0.0703, "step": 27633 }, { "epoch": 0.4893853462435858, "grad_norm": 0.4883795380592346, "learning_rate": 1.6242661374620446e-05, "loss": 0.0668, "step": 27634 }, { "epoch": 0.48940305578061427, "grad_norm": 0.21230706572532654, "learning_rate": 1.624180396546404e-05, "loss": 0.059, "step": 27635 }, { "epoch": 0.48942076531764267, "grad_norm": 0.75347900390625, "learning_rate": 1.6240946552222227e-05, "loss": 0.0953, "step": 27636 }, { "epoch": 0.4894384748546711, "grad_norm": 0.5042995810508728, "learning_rate": 1.624008913489782e-05, "loss": 0.0645, "step": 27637 }, { "epoch": 0.4894561843916995, "grad_norm": 0.5822485685348511, "learning_rate": 1.623923171349365e-05, "loss": 0.0556, "step": 27638 }, { "epoch": 0.489473893928728, "grad_norm": 0.9375947117805481, "learning_rate": 1.623837428801253e-05, "loss": 0.0696, "step": 27639 }, { "epoch": 0.4894916034657564, "grad_norm": 0.7201749086380005, "learning_rate": 1.6237516858457283e-05, "loss": 0.0792, "step": 27640 }, { "epoch": 0.48950931300278483, "grad_norm": 0.5177714824676514, "learning_rate": 1.6236659424830738e-05, "loss": 0.0746, "step": 27641 }, { "epoch": 0.48952702253981323, "grad_norm": 0.7141815423965454, "learning_rate": 1.62358019871357e-05, "loss": 0.0748, "step": 27642 }, { "epoch": 0.4895447320768417, "grad_norm": 0.7874472141265869, "learning_rate": 1.6234944545375007e-05, "loss": 0.0676, "step": 27643 }, { "epoch": 0.48956244161387014, "grad_norm": 0.8181313276290894, "learning_rate": 1.6234087099551466e-05, "loss": 0.1035, "step": 27644 }, { "epoch": 0.48958015115089853, "grad_norm": 0.8591031432151794, "learning_rate": 1.62332296496679e-05, "loss": 0.0851, "step": 27645 }, { "epoch": 0.489597860687927, "grad_norm": 0.6769773364067078, "learning_rate": 1.6232372195727136e-05, "loss": 0.0771, "step": 27646 }, { "epoch": 0.4896155702249554, "grad_norm": 1.1369444131851196, "learning_rate": 1.6231514737731996e-05, "loss": 0.0669, "step": 27647 }, { "epoch": 0.48963327976198384, "grad_norm": 0.5001557469367981, "learning_rate": 1.6230657275685293e-05, "loss": 0.0908, "step": 27648 }, { "epoch": 0.48965098929901224, "grad_norm": 0.5778818130493164, "learning_rate": 1.6229799809589855e-05, "loss": 0.0697, "step": 27649 }, { "epoch": 0.4896686988360407, "grad_norm": 0.4768303334712982, "learning_rate": 1.62289423394485e-05, "loss": 0.0527, "step": 27650 }, { "epoch": 0.4896864083730691, "grad_norm": 0.6782669425010681, "learning_rate": 1.6228084865264045e-05, "loss": 0.06, "step": 27651 }, { "epoch": 0.48970411791009755, "grad_norm": 0.4896237254142761, "learning_rate": 1.6227227387039318e-05, "loss": 0.0468, "step": 27652 }, { "epoch": 0.48972182744712595, "grad_norm": 0.7878006100654602, "learning_rate": 1.6226369904777134e-05, "loss": 0.061, "step": 27653 }, { "epoch": 0.4897395369841544, "grad_norm": 0.9198501110076904, "learning_rate": 1.622551241848032e-05, "loss": 0.0709, "step": 27654 }, { "epoch": 0.4897572465211828, "grad_norm": 0.7617549300193787, "learning_rate": 1.6224654928151688e-05, "loss": 0.0823, "step": 27655 }, { "epoch": 0.48977495605821125, "grad_norm": 0.5325115323066711, "learning_rate": 1.6223797433794077e-05, "loss": 0.0835, "step": 27656 }, { "epoch": 0.48979266559523965, "grad_norm": 0.5477933287620544, "learning_rate": 1.622293993541029e-05, "loss": 0.0873, "step": 27657 }, { "epoch": 0.4898103751322681, "grad_norm": 0.653861403465271, "learning_rate": 1.6222082433003154e-05, "loss": 0.089, "step": 27658 }, { "epoch": 0.48982808466929656, "grad_norm": 1.1161285638809204, "learning_rate": 1.6221224926575486e-05, "loss": 0.0669, "step": 27659 }, { "epoch": 0.48984579420632496, "grad_norm": 0.7778034210205078, "learning_rate": 1.6220367416130117e-05, "loss": 0.0613, "step": 27660 }, { "epoch": 0.4898635037433534, "grad_norm": 0.6434244513511658, "learning_rate": 1.6219509901669863e-05, "loss": 0.1154, "step": 27661 }, { "epoch": 0.4898812132803818, "grad_norm": 0.7269030213356018, "learning_rate": 1.6218652383197542e-05, "loss": 0.0781, "step": 27662 }, { "epoch": 0.48989892281741027, "grad_norm": 0.5957168936729431, "learning_rate": 1.6217794860715985e-05, "loss": 0.0596, "step": 27663 }, { "epoch": 0.48991663235443866, "grad_norm": 0.657065749168396, "learning_rate": 1.6216937334227995e-05, "loss": 0.078, "step": 27664 }, { "epoch": 0.4899343418914671, "grad_norm": 0.8312009572982788, "learning_rate": 1.6216079803736412e-05, "loss": 0.0896, "step": 27665 }, { "epoch": 0.4899520514284955, "grad_norm": 1.005545973777771, "learning_rate": 1.6215222269244046e-05, "loss": 0.0988, "step": 27666 }, { "epoch": 0.48996976096552397, "grad_norm": 0.9526273608207703, "learning_rate": 1.621436473075372e-05, "loss": 0.0767, "step": 27667 }, { "epoch": 0.48998747050255237, "grad_norm": 0.6981961727142334, "learning_rate": 1.621350718826826e-05, "loss": 0.0901, "step": 27668 }, { "epoch": 0.4900051800395808, "grad_norm": 0.5895503163337708, "learning_rate": 1.6212649641790486e-05, "loss": 0.0594, "step": 27669 }, { "epoch": 0.4900228895766092, "grad_norm": 0.8618959784507751, "learning_rate": 1.6211792091323214e-05, "loss": 0.0775, "step": 27670 }, { "epoch": 0.4900405991136377, "grad_norm": 0.5580379366874695, "learning_rate": 1.621093453686927e-05, "loss": 0.0609, "step": 27671 }, { "epoch": 0.4900583086506661, "grad_norm": 0.4403974711894989, "learning_rate": 1.6210076978431475e-05, "loss": 0.0695, "step": 27672 }, { "epoch": 0.49007601818769453, "grad_norm": 0.5848469734191895, "learning_rate": 1.6209219416012645e-05, "loss": 0.0615, "step": 27673 }, { "epoch": 0.490093727724723, "grad_norm": 0.6276705265045166, "learning_rate": 1.6208361849615607e-05, "loss": 0.0522, "step": 27674 }, { "epoch": 0.4901114372617514, "grad_norm": 0.4165043532848358, "learning_rate": 1.6207504279243183e-05, "loss": 0.0563, "step": 27675 }, { "epoch": 0.49012914679877984, "grad_norm": 0.7754683494567871, "learning_rate": 1.6206646704898194e-05, "loss": 0.0698, "step": 27676 }, { "epoch": 0.49014685633580823, "grad_norm": 0.7871456146240234, "learning_rate": 1.6205789126583456e-05, "loss": 0.0584, "step": 27677 }, { "epoch": 0.4901645658728367, "grad_norm": 0.5428600907325745, "learning_rate": 1.6204931544301796e-05, "loss": 0.0875, "step": 27678 }, { "epoch": 0.4901822754098651, "grad_norm": 0.46237093210220337, "learning_rate": 1.6204073958056028e-05, "loss": 0.0561, "step": 27679 }, { "epoch": 0.49019998494689354, "grad_norm": 0.5688832402229309, "learning_rate": 1.6203216367848982e-05, "loss": 0.0665, "step": 27680 }, { "epoch": 0.49021769448392194, "grad_norm": 0.5549533367156982, "learning_rate": 1.6202358773683473e-05, "loss": 0.0599, "step": 27681 }, { "epoch": 0.4902354040209504, "grad_norm": 0.24602451920509338, "learning_rate": 1.6201501175562334e-05, "loss": 0.0556, "step": 27682 }, { "epoch": 0.4902531135579788, "grad_norm": 0.3451266884803772, "learning_rate": 1.6200643573488363e-05, "loss": 0.0626, "step": 27683 }, { "epoch": 0.49027082309500725, "grad_norm": 0.8264092206954956, "learning_rate": 1.6199785967464406e-05, "loss": 0.1103, "step": 27684 }, { "epoch": 0.49028853263203565, "grad_norm": 0.7250900864601135, "learning_rate": 1.6198928357493276e-05, "loss": 0.0786, "step": 27685 }, { "epoch": 0.4903062421690641, "grad_norm": 0.6302852630615234, "learning_rate": 1.6198070743577788e-05, "loss": 0.0855, "step": 27686 }, { "epoch": 0.4903239517060925, "grad_norm": 1.0040334463119507, "learning_rate": 1.619721312572077e-05, "loss": 0.0753, "step": 27687 }, { "epoch": 0.49034166124312095, "grad_norm": 0.3619144856929779, "learning_rate": 1.619635550392504e-05, "loss": 0.0674, "step": 27688 }, { "epoch": 0.4903593707801494, "grad_norm": 0.7809416055679321, "learning_rate": 1.6195497878193425e-05, "loss": 0.0776, "step": 27689 }, { "epoch": 0.4903770803171778, "grad_norm": 0.4781982898712158, "learning_rate": 1.6194640248528742e-05, "loss": 0.0602, "step": 27690 }, { "epoch": 0.49039478985420626, "grad_norm": 0.8294983506202698, "learning_rate": 1.6193782614933815e-05, "loss": 0.1057, "step": 27691 }, { "epoch": 0.49041249939123466, "grad_norm": 0.5947065353393555, "learning_rate": 1.619292497741146e-05, "loss": 0.07, "step": 27692 }, { "epoch": 0.4904302089282631, "grad_norm": 0.8077536225318909, "learning_rate": 1.6192067335964502e-05, "loss": 0.0629, "step": 27693 }, { "epoch": 0.4904479184652915, "grad_norm": 0.4298311471939087, "learning_rate": 1.6191209690595767e-05, "loss": 0.0454, "step": 27694 }, { "epoch": 0.49046562800231996, "grad_norm": 0.75677889585495, "learning_rate": 1.619035204130807e-05, "loss": 0.0884, "step": 27695 }, { "epoch": 0.49048333753934836, "grad_norm": 0.6663152575492859, "learning_rate": 1.6189494388104237e-05, "loss": 0.0943, "step": 27696 }, { "epoch": 0.4905010470763768, "grad_norm": 0.558914065361023, "learning_rate": 1.6188636730987087e-05, "loss": 0.0612, "step": 27697 }, { "epoch": 0.4905187566134052, "grad_norm": 0.5183906555175781, "learning_rate": 1.618777906995944e-05, "loss": 0.0551, "step": 27698 }, { "epoch": 0.49053646615043367, "grad_norm": 0.5767343640327454, "learning_rate": 1.618692140502412e-05, "loss": 0.0848, "step": 27699 }, { "epoch": 0.49055417568746207, "grad_norm": 0.561978816986084, "learning_rate": 1.6186063736183957e-05, "loss": 0.0825, "step": 27700 }, { "epoch": 0.4905718852244905, "grad_norm": 0.7555473446846008, "learning_rate": 1.6185206063441757e-05, "loss": 0.0558, "step": 27701 }, { "epoch": 0.4905895947615189, "grad_norm": 0.38340070843696594, "learning_rate": 1.6184348386800348e-05, "loss": 0.0722, "step": 27702 }, { "epoch": 0.4906073042985474, "grad_norm": 0.7878876328468323, "learning_rate": 1.6183490706262554e-05, "loss": 0.0802, "step": 27703 }, { "epoch": 0.49062501383557583, "grad_norm": 0.7271296977996826, "learning_rate": 1.6182633021831197e-05, "loss": 0.0649, "step": 27704 }, { "epoch": 0.49064272337260423, "grad_norm": 0.7854848504066467, "learning_rate": 1.61817753335091e-05, "loss": 0.0571, "step": 27705 }, { "epoch": 0.4906604329096327, "grad_norm": 0.8022370934486389, "learning_rate": 1.6180917641299076e-05, "loss": 0.0551, "step": 27706 }, { "epoch": 0.4906781424466611, "grad_norm": 0.5833969712257385, "learning_rate": 1.618005994520395e-05, "loss": 0.0806, "step": 27707 }, { "epoch": 0.49069585198368953, "grad_norm": 0.9000149369239807, "learning_rate": 1.617920224522655e-05, "loss": 0.0705, "step": 27708 }, { "epoch": 0.49071356152071793, "grad_norm": 0.8017573952674866, "learning_rate": 1.6178344541369695e-05, "loss": 0.09, "step": 27709 }, { "epoch": 0.4907312710577464, "grad_norm": 0.5915900468826294, "learning_rate": 1.6177486833636204e-05, "loss": 0.0799, "step": 27710 }, { "epoch": 0.4907489805947748, "grad_norm": 0.8276732563972473, "learning_rate": 1.6176629122028902e-05, "loss": 0.0791, "step": 27711 }, { "epoch": 0.49076669013180324, "grad_norm": 0.6702280640602112, "learning_rate": 1.617577140655061e-05, "loss": 0.0664, "step": 27712 }, { "epoch": 0.49078439966883164, "grad_norm": 0.5780649185180664, "learning_rate": 1.6174913687204144e-05, "loss": 0.0686, "step": 27713 }, { "epoch": 0.4908021092058601, "grad_norm": 0.6329216957092285, "learning_rate": 1.617405596399234e-05, "loss": 0.1142, "step": 27714 }, { "epoch": 0.4908198187428885, "grad_norm": 1.1770174503326416, "learning_rate": 1.6173198236918003e-05, "loss": 0.0544, "step": 27715 }, { "epoch": 0.49083752827991695, "grad_norm": 0.5581544637680054, "learning_rate": 1.617234050598396e-05, "loss": 0.0609, "step": 27716 }, { "epoch": 0.49085523781694534, "grad_norm": 0.6980670690536499, "learning_rate": 1.617148277119304e-05, "loss": 0.0737, "step": 27717 }, { "epoch": 0.4908729473539738, "grad_norm": 0.9459043145179749, "learning_rate": 1.617062503254806e-05, "loss": 0.087, "step": 27718 }, { "epoch": 0.49089065689100225, "grad_norm": 0.6095002889633179, "learning_rate": 1.616976729005184e-05, "loss": 0.0812, "step": 27719 }, { "epoch": 0.49090836642803065, "grad_norm": 0.6234803795814514, "learning_rate": 1.6168909543707208e-05, "loss": 0.0688, "step": 27720 }, { "epoch": 0.4909260759650591, "grad_norm": 0.8680749535560608, "learning_rate": 1.616805179351698e-05, "loss": 0.0602, "step": 27721 }, { "epoch": 0.4909437855020875, "grad_norm": 0.6306866407394409, "learning_rate": 1.6167194039483977e-05, "loss": 0.0991, "step": 27722 }, { "epoch": 0.49096149503911596, "grad_norm": 0.4535798728466034, "learning_rate": 1.6166336281611028e-05, "loss": 0.0434, "step": 27723 }, { "epoch": 0.49097920457614436, "grad_norm": 0.7544457316398621, "learning_rate": 1.6165478519900945e-05, "loss": 0.0839, "step": 27724 }, { "epoch": 0.4909969141131728, "grad_norm": 0.8314833045005798, "learning_rate": 1.6164620754356564e-05, "loss": 0.0961, "step": 27725 }, { "epoch": 0.4910146236502012, "grad_norm": 0.619256854057312, "learning_rate": 1.6163762984980688e-05, "loss": 0.0881, "step": 27726 }, { "epoch": 0.49103233318722966, "grad_norm": 0.5686466097831726, "learning_rate": 1.6162905211776154e-05, "loss": 0.0853, "step": 27727 }, { "epoch": 0.49105004272425806, "grad_norm": 0.6403284668922424, "learning_rate": 1.6162047434745787e-05, "loss": 0.064, "step": 27728 }, { "epoch": 0.4910677522612865, "grad_norm": 0.5664335489273071, "learning_rate": 1.6161189653892393e-05, "loss": 0.0564, "step": 27729 }, { "epoch": 0.4910854617983149, "grad_norm": 0.49922505021095276, "learning_rate": 1.6160331869218803e-05, "loss": 0.0724, "step": 27730 }, { "epoch": 0.49110317133534337, "grad_norm": 0.5961032509803772, "learning_rate": 1.6159474080727836e-05, "loss": 0.0987, "step": 27731 }, { "epoch": 0.49112088087237177, "grad_norm": 0.49234455823898315, "learning_rate": 1.6158616288422323e-05, "loss": 0.0677, "step": 27732 }, { "epoch": 0.4911385904094002, "grad_norm": 0.7506365180015564, "learning_rate": 1.6157758492305078e-05, "loss": 0.0661, "step": 27733 }, { "epoch": 0.4911562999464287, "grad_norm": 0.6490204334259033, "learning_rate": 1.6156900692378925e-05, "loss": 0.0749, "step": 27734 }, { "epoch": 0.4911740094834571, "grad_norm": 0.7162283658981323, "learning_rate": 1.6156042888646684e-05, "loss": 0.0761, "step": 27735 }, { "epoch": 0.49119171902048553, "grad_norm": 0.7604308128356934, "learning_rate": 1.6155185081111177e-05, "loss": 0.0932, "step": 27736 }, { "epoch": 0.4912094285575139, "grad_norm": 0.584179162979126, "learning_rate": 1.6154327269775233e-05, "loss": 0.0655, "step": 27737 }, { "epoch": 0.4912271380945424, "grad_norm": 0.6733971834182739, "learning_rate": 1.6153469454641664e-05, "loss": 0.0948, "step": 27738 }, { "epoch": 0.4912448476315708, "grad_norm": 1.0878256559371948, "learning_rate": 1.6152611635713303e-05, "loss": 0.0928, "step": 27739 }, { "epoch": 0.49126255716859923, "grad_norm": 1.1731394529342651, "learning_rate": 1.615175381299296e-05, "loss": 0.0776, "step": 27740 }, { "epoch": 0.49128026670562763, "grad_norm": 0.6265571713447571, "learning_rate": 1.6150895986483466e-05, "loss": 0.1078, "step": 27741 }, { "epoch": 0.4912979762426561, "grad_norm": 0.5253932476043701, "learning_rate": 1.615003815618764e-05, "loss": 0.0741, "step": 27742 }, { "epoch": 0.4913156857796845, "grad_norm": 1.0474674701690674, "learning_rate": 1.614918032210831e-05, "loss": 0.0663, "step": 27743 }, { "epoch": 0.49133339531671294, "grad_norm": 1.5350204706192017, "learning_rate": 1.6148322484248288e-05, "loss": 0.0761, "step": 27744 }, { "epoch": 0.49135110485374134, "grad_norm": 0.4446571171283722, "learning_rate": 1.6147464642610398e-05, "loss": 0.0487, "step": 27745 }, { "epoch": 0.4913688143907698, "grad_norm": 0.6060378551483154, "learning_rate": 1.614660679719747e-05, "loss": 0.1098, "step": 27746 }, { "epoch": 0.4913865239277982, "grad_norm": 1.0494393110275269, "learning_rate": 1.614574894801232e-05, "loss": 0.0676, "step": 27747 }, { "epoch": 0.49140423346482665, "grad_norm": 0.8071455955505371, "learning_rate": 1.614489109505777e-05, "loss": 0.0703, "step": 27748 }, { "epoch": 0.4914219430018551, "grad_norm": 0.6688710451126099, "learning_rate": 1.614403323833665e-05, "loss": 0.0545, "step": 27749 }, { "epoch": 0.4914396525388835, "grad_norm": 0.6087179183959961, "learning_rate": 1.6143175377851772e-05, "loss": 0.0866, "step": 27750 }, { "epoch": 0.49145736207591195, "grad_norm": 0.23978225886821747, "learning_rate": 1.6142317513605962e-05, "loss": 0.0514, "step": 27751 }, { "epoch": 0.49147507161294035, "grad_norm": 0.6637261509895325, "learning_rate": 1.6141459645602047e-05, "loss": 0.1091, "step": 27752 }, { "epoch": 0.4914927811499688, "grad_norm": 0.694826066493988, "learning_rate": 1.6140601773842845e-05, "loss": 0.0603, "step": 27753 }, { "epoch": 0.4915104906869972, "grad_norm": 0.6565545201301575, "learning_rate": 1.6139743898331175e-05, "loss": 0.0838, "step": 27754 }, { "epoch": 0.49152820022402566, "grad_norm": 0.5896245241165161, "learning_rate": 1.6138886019069863e-05, "loss": 0.1218, "step": 27755 }, { "epoch": 0.49154590976105406, "grad_norm": 0.6283907294273376, "learning_rate": 1.613802813606173e-05, "loss": 0.0484, "step": 27756 }, { "epoch": 0.4915636192980825, "grad_norm": 0.7065007090568542, "learning_rate": 1.6137170249309606e-05, "loss": 0.056, "step": 27757 }, { "epoch": 0.4915813288351109, "grad_norm": 0.33394911885261536, "learning_rate": 1.6136312358816302e-05, "loss": 0.0784, "step": 27758 }, { "epoch": 0.49159903837213936, "grad_norm": 0.593989372253418, "learning_rate": 1.6135454464584645e-05, "loss": 0.0682, "step": 27759 }, { "epoch": 0.49161674790916776, "grad_norm": 0.696418046951294, "learning_rate": 1.6134596566617457e-05, "loss": 0.0529, "step": 27760 }, { "epoch": 0.4916344574461962, "grad_norm": 0.6217941641807556, "learning_rate": 1.6133738664917565e-05, "loss": 0.0416, "step": 27761 }, { "epoch": 0.4916521669832246, "grad_norm": 0.8537494540214539, "learning_rate": 1.6132880759487784e-05, "loss": 0.0966, "step": 27762 }, { "epoch": 0.49166987652025307, "grad_norm": 0.6034933924674988, "learning_rate": 1.6132022850330942e-05, "loss": 0.1113, "step": 27763 }, { "epoch": 0.4916875860572815, "grad_norm": 0.9063936471939087, "learning_rate": 1.6131164937449856e-05, "loss": 0.1234, "step": 27764 }, { "epoch": 0.4917052955943099, "grad_norm": 0.34248241782188416, "learning_rate": 1.6130307020847354e-05, "loss": 0.0391, "step": 27765 }, { "epoch": 0.4917230051313384, "grad_norm": 0.34832286834716797, "learning_rate": 1.6129449100526256e-05, "loss": 0.0631, "step": 27766 }, { "epoch": 0.4917407146683668, "grad_norm": 0.543658971786499, "learning_rate": 1.612859117648939e-05, "loss": 0.0939, "step": 27767 }, { "epoch": 0.49175842420539523, "grad_norm": 0.7222685217857361, "learning_rate": 1.6127733248739565e-05, "loss": 0.075, "step": 27768 }, { "epoch": 0.4917761337424236, "grad_norm": 0.6961066126823425, "learning_rate": 1.6126875317279614e-05, "loss": 0.1082, "step": 27769 }, { "epoch": 0.4917938432794521, "grad_norm": 0.6018797159194946, "learning_rate": 1.612601738211236e-05, "loss": 0.0938, "step": 27770 }, { "epoch": 0.4918115528164805, "grad_norm": 0.40671777725219727, "learning_rate": 1.612515944324062e-05, "loss": 0.0489, "step": 27771 }, { "epoch": 0.49182926235350893, "grad_norm": 0.8250179886817932, "learning_rate": 1.612430150066722e-05, "loss": 0.0524, "step": 27772 }, { "epoch": 0.49184697189053733, "grad_norm": 0.3489479422569275, "learning_rate": 1.6123443554394983e-05, "loss": 0.0525, "step": 27773 }, { "epoch": 0.4918646814275658, "grad_norm": 0.5973102450370789, "learning_rate": 1.612258560442673e-05, "loss": 0.071, "step": 27774 }, { "epoch": 0.4918823909645942, "grad_norm": 0.58003830909729, "learning_rate": 1.6121727650765282e-05, "loss": 0.0277, "step": 27775 }, { "epoch": 0.49190010050162264, "grad_norm": 0.693495512008667, "learning_rate": 1.6120869693413465e-05, "loss": 0.0772, "step": 27776 }, { "epoch": 0.49191781003865104, "grad_norm": 0.6989708542823792, "learning_rate": 1.61200117323741e-05, "loss": 0.0836, "step": 27777 }, { "epoch": 0.4919355195756795, "grad_norm": 0.4836225211620331, "learning_rate": 1.6119153767650007e-05, "loss": 0.0666, "step": 27778 }, { "epoch": 0.49195322911270795, "grad_norm": 0.6828100681304932, "learning_rate": 1.6118295799244013e-05, "loss": 0.0742, "step": 27779 }, { "epoch": 0.49197093864973634, "grad_norm": 0.5328496098518372, "learning_rate": 1.6117437827158942e-05, "loss": 0.0561, "step": 27780 }, { "epoch": 0.4919886481867648, "grad_norm": 0.34947076439857483, "learning_rate": 1.611657985139761e-05, "loss": 0.0408, "step": 27781 }, { "epoch": 0.4920063577237932, "grad_norm": 0.4481387138366699, "learning_rate": 1.6115721871962848e-05, "loss": 0.0648, "step": 27782 }, { "epoch": 0.49202406726082165, "grad_norm": 0.7142938375473022, "learning_rate": 1.611486388885747e-05, "loss": 0.0827, "step": 27783 }, { "epoch": 0.49204177679785005, "grad_norm": 0.6077114343643188, "learning_rate": 1.6114005902084297e-05, "loss": 0.0828, "step": 27784 }, { "epoch": 0.4920594863348785, "grad_norm": 0.44396501779556274, "learning_rate": 1.6113147911646164e-05, "loss": 0.0701, "step": 27785 }, { "epoch": 0.4920771958719069, "grad_norm": 0.6826894879341125, "learning_rate": 1.6112289917545887e-05, "loss": 0.1139, "step": 27786 }, { "epoch": 0.49209490540893536, "grad_norm": 0.6512882113456726, "learning_rate": 1.6111431919786292e-05, "loss": 0.0596, "step": 27787 }, { "epoch": 0.49211261494596376, "grad_norm": 0.7669743299484253, "learning_rate": 1.6110573918370188e-05, "loss": 0.0519, "step": 27788 }, { "epoch": 0.4921303244829922, "grad_norm": 0.6157140135765076, "learning_rate": 1.6109715913300418e-05, "loss": 0.0705, "step": 27789 }, { "epoch": 0.4921480340200206, "grad_norm": 0.7915442585945129, "learning_rate": 1.6108857904579786e-05, "loss": 0.0553, "step": 27790 }, { "epoch": 0.49216574355704906, "grad_norm": 0.8324832916259766, "learning_rate": 1.6107999892211133e-05, "loss": 0.0914, "step": 27791 }, { "epoch": 0.49218345309407746, "grad_norm": 0.8917213678359985, "learning_rate": 1.610714187619727e-05, "loss": 0.0624, "step": 27792 }, { "epoch": 0.4922011626311059, "grad_norm": 0.6141093969345093, "learning_rate": 1.6106283856541016e-05, "loss": 0.0427, "step": 27793 }, { "epoch": 0.49221887216813437, "grad_norm": 0.6452786326408386, "learning_rate": 1.6105425833245208e-05, "loss": 0.0742, "step": 27794 }, { "epoch": 0.49223658170516277, "grad_norm": 0.726620614528656, "learning_rate": 1.6104567806312653e-05, "loss": 0.0846, "step": 27795 }, { "epoch": 0.4922542912421912, "grad_norm": 0.8004719018936157, "learning_rate": 1.6103709775746188e-05, "loss": 0.0993, "step": 27796 }, { "epoch": 0.4922720007792196, "grad_norm": 0.8147866725921631, "learning_rate": 1.6102851741548628e-05, "loss": 0.0705, "step": 27797 }, { "epoch": 0.4922897103162481, "grad_norm": 0.3038276135921478, "learning_rate": 1.6101993703722796e-05, "loss": 0.0823, "step": 27798 }, { "epoch": 0.4923074198532765, "grad_norm": 0.6182486414909363, "learning_rate": 1.610113566227152e-05, "loss": 0.055, "step": 27799 }, { "epoch": 0.4923251293903049, "grad_norm": 0.5638610124588013, "learning_rate": 1.6100277617197614e-05, "loss": 0.0612, "step": 27800 }, { "epoch": 0.4923428389273333, "grad_norm": 0.8645267486572266, "learning_rate": 1.609941956850391e-05, "loss": 0.0639, "step": 27801 }, { "epoch": 0.4923605484643618, "grad_norm": 0.8290277123451233, "learning_rate": 1.6098561516193224e-05, "loss": 0.1053, "step": 27802 }, { "epoch": 0.4923782580013902, "grad_norm": 1.0447369813919067, "learning_rate": 1.6097703460268378e-05, "loss": 0.088, "step": 27803 }, { "epoch": 0.49239596753841863, "grad_norm": 0.8521263599395752, "learning_rate": 1.6096845400732203e-05, "loss": 0.0639, "step": 27804 }, { "epoch": 0.49241367707544703, "grad_norm": 0.5219423174858093, "learning_rate": 1.609598733758752e-05, "loss": 0.0544, "step": 27805 }, { "epoch": 0.4924313866124755, "grad_norm": 0.7203154563903809, "learning_rate": 1.6095129270837145e-05, "loss": 0.1004, "step": 27806 }, { "epoch": 0.4924490961495039, "grad_norm": 0.5153360962867737, "learning_rate": 1.6094271200483906e-05, "loss": 0.0487, "step": 27807 }, { "epoch": 0.49246680568653234, "grad_norm": 0.47427797317504883, "learning_rate": 1.6093413126530632e-05, "loss": 0.0849, "step": 27808 }, { "epoch": 0.4924845152235608, "grad_norm": 0.7689629197120667, "learning_rate": 1.609255504898013e-05, "loss": 0.0941, "step": 27809 }, { "epoch": 0.4925022247605892, "grad_norm": 0.6434373259544373, "learning_rate": 1.6091696967835237e-05, "loss": 0.0712, "step": 27810 }, { "epoch": 0.49251993429761765, "grad_norm": 0.4978145658969879, "learning_rate": 1.6090838883098772e-05, "loss": 0.0604, "step": 27811 }, { "epoch": 0.49253764383464604, "grad_norm": 0.4381178617477417, "learning_rate": 1.6089980794773558e-05, "loss": 0.0967, "step": 27812 }, { "epoch": 0.4925553533716745, "grad_norm": 0.602059543132782, "learning_rate": 1.6089122702862415e-05, "loss": 0.0639, "step": 27813 }, { "epoch": 0.4925730629087029, "grad_norm": 0.7121920585632324, "learning_rate": 1.608826460736817e-05, "loss": 0.0606, "step": 27814 }, { "epoch": 0.49259077244573135, "grad_norm": 0.46863821148872375, "learning_rate": 1.6087406508293647e-05, "loss": 0.065, "step": 27815 }, { "epoch": 0.49260848198275975, "grad_norm": 0.6695646643638611, "learning_rate": 1.6086548405641663e-05, "loss": 0.0614, "step": 27816 }, { "epoch": 0.4926261915197882, "grad_norm": 0.5152092576026917, "learning_rate": 1.6085690299415044e-05, "loss": 0.0594, "step": 27817 }, { "epoch": 0.4926439010568166, "grad_norm": 0.5465632677078247, "learning_rate": 1.6084832189616612e-05, "loss": 0.0682, "step": 27818 }, { "epoch": 0.49266161059384506, "grad_norm": 0.920901894569397, "learning_rate": 1.6083974076249197e-05, "loss": 0.072, "step": 27819 }, { "epoch": 0.49267932013087345, "grad_norm": 0.5980111360549927, "learning_rate": 1.6083115959315614e-05, "loss": 0.069, "step": 27820 }, { "epoch": 0.4926970296679019, "grad_norm": 0.7867836952209473, "learning_rate": 1.608225783881869e-05, "loss": 0.0561, "step": 27821 }, { "epoch": 0.4927147392049303, "grad_norm": 0.407659113407135, "learning_rate": 1.6081399714761243e-05, "loss": 0.0593, "step": 27822 }, { "epoch": 0.49273244874195876, "grad_norm": 0.8121855854988098, "learning_rate": 1.6080541587146107e-05, "loss": 0.0649, "step": 27823 }, { "epoch": 0.4927501582789872, "grad_norm": 0.8718152046203613, "learning_rate": 1.60796834559761e-05, "loss": 0.056, "step": 27824 }, { "epoch": 0.4927678678160156, "grad_norm": 0.8924765586853027, "learning_rate": 1.6078825321254038e-05, "loss": 0.068, "step": 27825 }, { "epoch": 0.49278557735304407, "grad_norm": 0.48490649461746216, "learning_rate": 1.607796718298275e-05, "loss": 0.0734, "step": 27826 }, { "epoch": 0.49280328689007247, "grad_norm": 0.6101521253585815, "learning_rate": 1.6077109041165056e-05, "loss": 0.0657, "step": 27827 }, { "epoch": 0.4928209964271009, "grad_norm": 0.7211384773254395, "learning_rate": 1.6076250895803787e-05, "loss": 0.0743, "step": 27828 }, { "epoch": 0.4928387059641293, "grad_norm": 0.4299878776073456, "learning_rate": 1.6075392746901764e-05, "loss": 0.0847, "step": 27829 }, { "epoch": 0.4928564155011578, "grad_norm": 0.7157092094421387, "learning_rate": 1.6074534594461803e-05, "loss": 0.1007, "step": 27830 }, { "epoch": 0.4928741250381862, "grad_norm": 0.6032603979110718, "learning_rate": 1.6073676438486733e-05, "loss": 0.0641, "step": 27831 }, { "epoch": 0.4928918345752146, "grad_norm": 0.40052589774131775, "learning_rate": 1.6072818278979373e-05, "loss": 0.0663, "step": 27832 }, { "epoch": 0.492909544112243, "grad_norm": 0.6777377128601074, "learning_rate": 1.6071960115942556e-05, "loss": 0.0697, "step": 27833 }, { "epoch": 0.4929272536492715, "grad_norm": 0.814750075340271, "learning_rate": 1.6071101949379093e-05, "loss": 0.0606, "step": 27834 }, { "epoch": 0.4929449631862999, "grad_norm": 0.8849397301673889, "learning_rate": 1.6070243779291818e-05, "loss": 0.0816, "step": 27835 }, { "epoch": 0.49296267272332833, "grad_norm": 0.6850120425224304, "learning_rate": 1.6069385605683545e-05, "loss": 0.0651, "step": 27836 }, { "epoch": 0.49298038226035673, "grad_norm": 0.5735528469085693, "learning_rate": 1.6068527428557103e-05, "loss": 0.0403, "step": 27837 }, { "epoch": 0.4929980917973852, "grad_norm": 0.7085813283920288, "learning_rate": 1.6067669247915313e-05, "loss": 0.0664, "step": 27838 }, { "epoch": 0.49301580133441364, "grad_norm": 0.7786878347396851, "learning_rate": 1.6066811063761e-05, "loss": 0.0593, "step": 27839 }, { "epoch": 0.49303351087144204, "grad_norm": 0.8583085536956787, "learning_rate": 1.6065952876096986e-05, "loss": 0.0555, "step": 27840 }, { "epoch": 0.4930512204084705, "grad_norm": 0.8532502055168152, "learning_rate": 1.6065094684926094e-05, "loss": 0.1235, "step": 27841 }, { "epoch": 0.4930689299454989, "grad_norm": 0.7311709523200989, "learning_rate": 1.6064236490251153e-05, "loss": 0.073, "step": 27842 }, { "epoch": 0.49308663948252734, "grad_norm": 0.670494019985199, "learning_rate": 1.6063378292074977e-05, "loss": 0.062, "step": 27843 }, { "epoch": 0.49310434901955574, "grad_norm": 0.5076584219932556, "learning_rate": 1.6062520090400396e-05, "loss": 0.0641, "step": 27844 }, { "epoch": 0.4931220585565842, "grad_norm": 0.8382565975189209, "learning_rate": 1.6061661885230232e-05, "loss": 0.0643, "step": 27845 }, { "epoch": 0.4931397680936126, "grad_norm": 0.9159311652183533, "learning_rate": 1.6060803676567304e-05, "loss": 0.1158, "step": 27846 }, { "epoch": 0.49315747763064105, "grad_norm": 0.7330985069274902, "learning_rate": 1.605994546441444e-05, "loss": 0.0889, "step": 27847 }, { "epoch": 0.49317518716766945, "grad_norm": 0.7394912242889404, "learning_rate": 1.6059087248774466e-05, "loss": 0.0511, "step": 27848 }, { "epoch": 0.4931928967046979, "grad_norm": 0.8720151782035828, "learning_rate": 1.6058229029650203e-05, "loss": 0.0614, "step": 27849 }, { "epoch": 0.4932106062417263, "grad_norm": 0.8097327947616577, "learning_rate": 1.6057370807044465e-05, "loss": 0.079, "step": 27850 }, { "epoch": 0.49322831577875476, "grad_norm": 0.5011999011039734, "learning_rate": 1.6056512580960092e-05, "loss": 0.0912, "step": 27851 }, { "epoch": 0.4932460253157832, "grad_norm": 0.6652196049690247, "learning_rate": 1.6055654351399896e-05, "loss": 0.0835, "step": 27852 }, { "epoch": 0.4932637348528116, "grad_norm": 0.3692737817764282, "learning_rate": 1.6054796118366708e-05, "loss": 0.0771, "step": 27853 }, { "epoch": 0.49328144438984006, "grad_norm": 0.8239654898643494, "learning_rate": 1.6053937881863345e-05, "loss": 0.1132, "step": 27854 }, { "epoch": 0.49329915392686846, "grad_norm": 0.6794748902320862, "learning_rate": 1.605307964189263e-05, "loss": 0.0683, "step": 27855 }, { "epoch": 0.4933168634638969, "grad_norm": 0.8776181936264038, "learning_rate": 1.6052221398457393e-05, "loss": 0.0757, "step": 27856 }, { "epoch": 0.4933345730009253, "grad_norm": 0.817150890827179, "learning_rate": 1.605136315156045e-05, "loss": 0.0651, "step": 27857 }, { "epoch": 0.49335228253795377, "grad_norm": 0.3615187108516693, "learning_rate": 1.6050504901204637e-05, "loss": 0.0412, "step": 27858 }, { "epoch": 0.49336999207498217, "grad_norm": 0.4020715057849884, "learning_rate": 1.604964664739276e-05, "loss": 0.0693, "step": 27859 }, { "epoch": 0.4933877016120106, "grad_norm": 0.5723080635070801, "learning_rate": 1.6048788390127656e-05, "loss": 0.061, "step": 27860 }, { "epoch": 0.493405411149039, "grad_norm": 0.6954941749572754, "learning_rate": 1.604793012941214e-05, "loss": 0.1155, "step": 27861 }, { "epoch": 0.4934231206860675, "grad_norm": 0.5354796648025513, "learning_rate": 1.6047071865249046e-05, "loss": 0.079, "step": 27862 }, { "epoch": 0.49344083022309587, "grad_norm": 0.3907565474510193, "learning_rate": 1.6046213597641194e-05, "loss": 0.0699, "step": 27863 }, { "epoch": 0.4934585397601243, "grad_norm": 0.7354978322982788, "learning_rate": 1.60453553265914e-05, "loss": 0.0813, "step": 27864 }, { "epoch": 0.4934762492971527, "grad_norm": 0.510307788848877, "learning_rate": 1.6044497052102488e-05, "loss": 0.0579, "step": 27865 }, { "epoch": 0.4934939588341812, "grad_norm": 1.0987039804458618, "learning_rate": 1.6043638774177293e-05, "loss": 0.0943, "step": 27866 }, { "epoch": 0.49351166837120963, "grad_norm": 0.5219809412956238, "learning_rate": 1.604278049281863e-05, "loss": 0.0504, "step": 27867 }, { "epoch": 0.49352937790823803, "grad_norm": 0.8176438212394714, "learning_rate": 1.6041922208029323e-05, "loss": 0.0997, "step": 27868 }, { "epoch": 0.4935470874452665, "grad_norm": 0.9662008881568909, "learning_rate": 1.60410639198122e-05, "loss": 0.0675, "step": 27869 }, { "epoch": 0.4935647969822949, "grad_norm": 0.6746759414672852, "learning_rate": 1.6040205628170082e-05, "loss": 0.0536, "step": 27870 }, { "epoch": 0.49358250651932334, "grad_norm": 0.6412733197212219, "learning_rate": 1.6039347333105792e-05, "loss": 0.0912, "step": 27871 }, { "epoch": 0.49360021605635174, "grad_norm": 0.421536922454834, "learning_rate": 1.6038489034622156e-05, "loss": 0.0623, "step": 27872 }, { "epoch": 0.4936179255933802, "grad_norm": 0.5880247950553894, "learning_rate": 1.6037630732721994e-05, "loss": 0.0707, "step": 27873 }, { "epoch": 0.4936356351304086, "grad_norm": 0.707313597202301, "learning_rate": 1.6036772427408128e-05, "loss": 0.0845, "step": 27874 }, { "epoch": 0.49365334466743704, "grad_norm": 0.8067984580993652, "learning_rate": 1.6035914118683392e-05, "loss": 0.0663, "step": 27875 }, { "epoch": 0.49367105420446544, "grad_norm": 0.31344643235206604, "learning_rate": 1.6035055806550602e-05, "loss": 0.0546, "step": 27876 }, { "epoch": 0.4936887637414939, "grad_norm": 0.7774050831794739, "learning_rate": 1.603419749101258e-05, "loss": 0.0665, "step": 27877 }, { "epoch": 0.4937064732785223, "grad_norm": 0.8429394960403442, "learning_rate": 1.603333917207216e-05, "loss": 0.0739, "step": 27878 }, { "epoch": 0.49372418281555075, "grad_norm": 0.6419054269790649, "learning_rate": 1.603248084973215e-05, "loss": 0.0769, "step": 27879 }, { "epoch": 0.49374189235257915, "grad_norm": 0.7729085683822632, "learning_rate": 1.6031622523995385e-05, "loss": 0.0675, "step": 27880 }, { "epoch": 0.4937596018896076, "grad_norm": 0.47990259528160095, "learning_rate": 1.603076419486469e-05, "loss": 0.0527, "step": 27881 }, { "epoch": 0.49377731142663606, "grad_norm": 1.4664214849472046, "learning_rate": 1.6029905862342882e-05, "loss": 0.0618, "step": 27882 }, { "epoch": 0.49379502096366445, "grad_norm": 0.6499947309494019, "learning_rate": 1.602904752643279e-05, "loss": 0.0609, "step": 27883 }, { "epoch": 0.4938127305006929, "grad_norm": 0.3492397665977478, "learning_rate": 1.602818918713723e-05, "loss": 0.0822, "step": 27884 }, { "epoch": 0.4938304400377213, "grad_norm": 0.9358716607093811, "learning_rate": 1.6027330844459035e-05, "loss": 0.0704, "step": 27885 }, { "epoch": 0.49384814957474976, "grad_norm": 0.6322406530380249, "learning_rate": 1.602647249840103e-05, "loss": 0.0613, "step": 27886 }, { "epoch": 0.49386585911177816, "grad_norm": 0.4534606635570526, "learning_rate": 1.602561414896603e-05, "loss": 0.0556, "step": 27887 }, { "epoch": 0.4938835686488066, "grad_norm": 0.43373364210128784, "learning_rate": 1.602475579615686e-05, "loss": 0.0522, "step": 27888 }, { "epoch": 0.493901278185835, "grad_norm": 0.3908151090145111, "learning_rate": 1.602389743997635e-05, "loss": 0.1357, "step": 27889 }, { "epoch": 0.49391898772286347, "grad_norm": 0.6476324796676636, "learning_rate": 1.6023039080427326e-05, "loss": 0.1007, "step": 27890 }, { "epoch": 0.49393669725989187, "grad_norm": 0.5316349267959595, "learning_rate": 1.60221807175126e-05, "loss": 0.0696, "step": 27891 }, { "epoch": 0.4939544067969203, "grad_norm": 0.6538680791854858, "learning_rate": 1.6021322351235007e-05, "loss": 0.0614, "step": 27892 }, { "epoch": 0.4939721163339487, "grad_norm": 0.8039781451225281, "learning_rate": 1.6020463981597364e-05, "loss": 0.0753, "step": 27893 }, { "epoch": 0.4939898258709772, "grad_norm": 0.904222846031189, "learning_rate": 1.60196056086025e-05, "loss": 0.0771, "step": 27894 }, { "epoch": 0.49400753540800557, "grad_norm": 0.8113470077514648, "learning_rate": 1.6018747232253234e-05, "loss": 0.1067, "step": 27895 }, { "epoch": 0.494025244945034, "grad_norm": 0.6615402102470398, "learning_rate": 1.6017888852552392e-05, "loss": 0.0637, "step": 27896 }, { "epoch": 0.4940429544820625, "grad_norm": 0.4648713767528534, "learning_rate": 1.6017030469502802e-05, "loss": 0.0743, "step": 27897 }, { "epoch": 0.4940606640190909, "grad_norm": 0.6944214701652527, "learning_rate": 1.6016172083107282e-05, "loss": 0.0703, "step": 27898 }, { "epoch": 0.49407837355611933, "grad_norm": 0.3038221597671509, "learning_rate": 1.601531369336866e-05, "loss": 0.0397, "step": 27899 }, { "epoch": 0.49409608309314773, "grad_norm": 0.5400567054748535, "learning_rate": 1.601445530028976e-05, "loss": 0.0395, "step": 27900 }, { "epoch": 0.4941137926301762, "grad_norm": 0.42428651452064514, "learning_rate": 1.6013596903873406e-05, "loss": 0.0849, "step": 27901 }, { "epoch": 0.4941315021672046, "grad_norm": 0.6099020838737488, "learning_rate": 1.6012738504122413e-05, "loss": 0.0435, "step": 27902 }, { "epoch": 0.49414921170423304, "grad_norm": 0.6807169318199158, "learning_rate": 1.601188010103962e-05, "loss": 0.0781, "step": 27903 }, { "epoch": 0.49416692124126144, "grad_norm": 0.494186133146286, "learning_rate": 1.6011021694627844e-05, "loss": 0.066, "step": 27904 }, { "epoch": 0.4941846307782899, "grad_norm": 0.5716560482978821, "learning_rate": 1.6010163284889903e-05, "loss": 0.0529, "step": 27905 }, { "epoch": 0.4942023403153183, "grad_norm": 0.47012650966644287, "learning_rate": 1.600930487182863e-05, "loss": 0.0705, "step": 27906 }, { "epoch": 0.49422004985234674, "grad_norm": 0.5998489260673523, "learning_rate": 1.6008446455446848e-05, "loss": 0.072, "step": 27907 }, { "epoch": 0.49423775938937514, "grad_norm": 0.48771440982818604, "learning_rate": 1.6007588035747375e-05, "loss": 0.0772, "step": 27908 }, { "epoch": 0.4942554689264036, "grad_norm": 0.6799049377441406, "learning_rate": 1.6006729612733042e-05, "loss": 0.0872, "step": 27909 }, { "epoch": 0.494273178463432, "grad_norm": 0.5486010313034058, "learning_rate": 1.6005871186406672e-05, "loss": 0.0543, "step": 27910 }, { "epoch": 0.49429088800046045, "grad_norm": 0.8755928874015808, "learning_rate": 1.600501275677109e-05, "loss": 0.0596, "step": 27911 }, { "epoch": 0.4943085975374889, "grad_norm": 0.8534859418869019, "learning_rate": 1.6004154323829108e-05, "loss": 0.0972, "step": 27912 }, { "epoch": 0.4943263070745173, "grad_norm": 0.4333602786064148, "learning_rate": 1.6003295887583568e-05, "loss": 0.0705, "step": 27913 }, { "epoch": 0.49434401661154576, "grad_norm": 0.5646132826805115, "learning_rate": 1.600243744803728e-05, "loss": 0.0596, "step": 27914 }, { "epoch": 0.49436172614857415, "grad_norm": 0.8256862759590149, "learning_rate": 1.600157900519308e-05, "loss": 0.0741, "step": 27915 }, { "epoch": 0.4943794356856026, "grad_norm": 1.0714890956878662, "learning_rate": 1.6000720559053785e-05, "loss": 0.0784, "step": 27916 }, { "epoch": 0.494397145222631, "grad_norm": 0.4109598398208618, "learning_rate": 1.5999862109622216e-05, "loss": 0.0721, "step": 27917 }, { "epoch": 0.49441485475965946, "grad_norm": 0.675351619720459, "learning_rate": 1.599900365690121e-05, "loss": 0.0779, "step": 27918 }, { "epoch": 0.49443256429668786, "grad_norm": 0.5233670473098755, "learning_rate": 1.5998145200893577e-05, "loss": 0.0581, "step": 27919 }, { "epoch": 0.4944502738337163, "grad_norm": 0.368365615606308, "learning_rate": 1.599728674160215e-05, "loss": 0.0572, "step": 27920 }, { "epoch": 0.4944679833707447, "grad_norm": 0.5434733629226685, "learning_rate": 1.5996428279029744e-05, "loss": 0.0796, "step": 27921 }, { "epoch": 0.49448569290777317, "grad_norm": 0.6977111101150513, "learning_rate": 1.5995569813179194e-05, "loss": 0.0576, "step": 27922 }, { "epoch": 0.49450340244480157, "grad_norm": 0.8526076674461365, "learning_rate": 1.5994711344053322e-05, "loss": 0.0808, "step": 27923 }, { "epoch": 0.49452111198183, "grad_norm": 0.7434318661689758, "learning_rate": 1.599385287165495e-05, "loss": 0.0776, "step": 27924 }, { "epoch": 0.4945388215188584, "grad_norm": 0.8342227935791016, "learning_rate": 1.59929943959869e-05, "loss": 0.0895, "step": 27925 }, { "epoch": 0.49455653105588687, "grad_norm": 1.033276915550232, "learning_rate": 1.5992135917052006e-05, "loss": 0.0954, "step": 27926 }, { "epoch": 0.4945742405929153, "grad_norm": 1.0308266878128052, "learning_rate": 1.5991277434853075e-05, "loss": 0.0701, "step": 27927 }, { "epoch": 0.4945919501299437, "grad_norm": 0.6614779829978943, "learning_rate": 1.5990418949392946e-05, "loss": 0.0802, "step": 27928 }, { "epoch": 0.4946096596669722, "grad_norm": 0.9310947060585022, "learning_rate": 1.598956046067444e-05, "loss": 0.1044, "step": 27929 }, { "epoch": 0.4946273692040006, "grad_norm": 0.917266309261322, "learning_rate": 1.5988701968700378e-05, "loss": 0.0904, "step": 27930 }, { "epoch": 0.49464507874102903, "grad_norm": 0.5217146873474121, "learning_rate": 1.5987843473473586e-05, "loss": 0.0539, "step": 27931 }, { "epoch": 0.49466278827805743, "grad_norm": 0.5196408629417419, "learning_rate": 1.598698497499689e-05, "loss": 0.0638, "step": 27932 }, { "epoch": 0.4946804978150859, "grad_norm": 0.37478917837142944, "learning_rate": 1.5986126473273117e-05, "loss": 0.0774, "step": 27933 }, { "epoch": 0.4946982073521143, "grad_norm": 0.7806214690208435, "learning_rate": 1.5985267968305088e-05, "loss": 0.1036, "step": 27934 }, { "epoch": 0.49471591688914274, "grad_norm": 0.5258042216300964, "learning_rate": 1.5984409460095622e-05, "loss": 0.0871, "step": 27935 }, { "epoch": 0.49473362642617114, "grad_norm": 0.7419878244400024, "learning_rate": 1.5983550948647547e-05, "loss": 0.0529, "step": 27936 }, { "epoch": 0.4947513359631996, "grad_norm": 0.7528712749481201, "learning_rate": 1.5982692433963693e-05, "loss": 0.062, "step": 27937 }, { "epoch": 0.494769045500228, "grad_norm": 0.5043533444404602, "learning_rate": 1.5981833916046878e-05, "loss": 0.0637, "step": 27938 }, { "epoch": 0.49478675503725644, "grad_norm": 0.5526414513587952, "learning_rate": 1.5980975394899933e-05, "loss": 0.088, "step": 27939 }, { "epoch": 0.49480446457428484, "grad_norm": 0.5546485781669617, "learning_rate": 1.5980116870525674e-05, "loss": 0.0846, "step": 27940 }, { "epoch": 0.4948221741113133, "grad_norm": 0.917395830154419, "learning_rate": 1.597925834292693e-05, "loss": 0.1, "step": 27941 }, { "epoch": 0.49483988364834175, "grad_norm": 0.5279028415679932, "learning_rate": 1.5978399812106523e-05, "loss": 0.0548, "step": 27942 }, { "epoch": 0.49485759318537015, "grad_norm": 0.7288187146186829, "learning_rate": 1.5977541278067286e-05, "loss": 0.0918, "step": 27943 }, { "epoch": 0.4948753027223986, "grad_norm": 0.7500481009483337, "learning_rate": 1.597668274081203e-05, "loss": 0.08, "step": 27944 }, { "epoch": 0.494893012259427, "grad_norm": 0.6734548807144165, "learning_rate": 1.5975824200343593e-05, "loss": 0.0777, "step": 27945 }, { "epoch": 0.49491072179645546, "grad_norm": 1.0047935247421265, "learning_rate": 1.5974965656664786e-05, "loss": 0.0935, "step": 27946 }, { "epoch": 0.49492843133348385, "grad_norm": 0.849446177482605, "learning_rate": 1.5974107109778447e-05, "loss": 0.0821, "step": 27947 }, { "epoch": 0.4949461408705123, "grad_norm": 0.3542400896549225, "learning_rate": 1.5973248559687394e-05, "loss": 0.0726, "step": 27948 }, { "epoch": 0.4949638504075407, "grad_norm": 0.6709415912628174, "learning_rate": 1.597239000639445e-05, "loss": 0.0649, "step": 27949 }, { "epoch": 0.49498155994456916, "grad_norm": 0.6303073763847351, "learning_rate": 1.597153144990244e-05, "loss": 0.049, "step": 27950 }, { "epoch": 0.49499926948159756, "grad_norm": 0.9331178069114685, "learning_rate": 1.597067289021419e-05, "loss": 0.0622, "step": 27951 }, { "epoch": 0.495016979018626, "grad_norm": 0.43298453092575073, "learning_rate": 1.5969814327332524e-05, "loss": 0.0498, "step": 27952 }, { "epoch": 0.4950346885556544, "grad_norm": 0.636981725692749, "learning_rate": 1.5968955761260266e-05, "loss": 0.1062, "step": 27953 }, { "epoch": 0.49505239809268287, "grad_norm": 0.7706398367881775, "learning_rate": 1.5968097192000244e-05, "loss": 0.1171, "step": 27954 }, { "epoch": 0.49507010762971126, "grad_norm": 0.857830822467804, "learning_rate": 1.596723861955528e-05, "loss": 0.0671, "step": 27955 }, { "epoch": 0.4950878171667397, "grad_norm": 0.45776548981666565, "learning_rate": 1.5966380043928198e-05, "loss": 0.0643, "step": 27956 }, { "epoch": 0.4951055267037682, "grad_norm": 0.7669618725776672, "learning_rate": 1.5965521465121823e-05, "loss": 0.0967, "step": 27957 }, { "epoch": 0.49512323624079657, "grad_norm": 0.5028809309005737, "learning_rate": 1.596466288313898e-05, "loss": 0.0605, "step": 27958 }, { "epoch": 0.495140945777825, "grad_norm": 0.42229506373405457, "learning_rate": 1.59638042979825e-05, "loss": 0.0729, "step": 27959 }, { "epoch": 0.4951586553148534, "grad_norm": 0.6524009704589844, "learning_rate": 1.5962945709655187e-05, "loss": 0.0717, "step": 27960 }, { "epoch": 0.4951763648518819, "grad_norm": 0.7104712128639221, "learning_rate": 1.5962087118159895e-05, "loss": 0.0897, "step": 27961 }, { "epoch": 0.4951940743889103, "grad_norm": 0.7681029438972473, "learning_rate": 1.5961228523499425e-05, "loss": 0.0769, "step": 27962 }, { "epoch": 0.49521178392593873, "grad_norm": 0.7137904763221741, "learning_rate": 1.5960369925676615e-05, "loss": 0.0541, "step": 27963 }, { "epoch": 0.49522949346296713, "grad_norm": 0.8214914202690125, "learning_rate": 1.595951132469428e-05, "loss": 0.084, "step": 27964 }, { "epoch": 0.4952472029999956, "grad_norm": 0.5577628016471863, "learning_rate": 1.5958652720555254e-05, "loss": 0.0546, "step": 27965 }, { "epoch": 0.495264912537024, "grad_norm": 0.6511794328689575, "learning_rate": 1.5957794113262357e-05, "loss": 0.0542, "step": 27966 }, { "epoch": 0.49528262207405244, "grad_norm": 0.9513384699821472, "learning_rate": 1.5956935502818414e-05, "loss": 0.0756, "step": 27967 }, { "epoch": 0.49530033161108084, "grad_norm": 0.6414067149162292, "learning_rate": 1.5956076889226253e-05, "loss": 0.0641, "step": 27968 }, { "epoch": 0.4953180411481093, "grad_norm": 0.5408930778503418, "learning_rate": 1.595521827248869e-05, "loss": 0.0723, "step": 27969 }, { "epoch": 0.4953357506851377, "grad_norm": 0.6859836578369141, "learning_rate": 1.5954359652608554e-05, "loss": 0.1024, "step": 27970 }, { "epoch": 0.49535346022216614, "grad_norm": 0.611506462097168, "learning_rate": 1.5953501029588672e-05, "loss": 0.068, "step": 27971 }, { "epoch": 0.4953711697591946, "grad_norm": 0.5108082890510559, "learning_rate": 1.5952642403431874e-05, "loss": 0.067, "step": 27972 }, { "epoch": 0.495388879296223, "grad_norm": 0.5691533088684082, "learning_rate": 1.5951783774140977e-05, "loss": 0.106, "step": 27973 }, { "epoch": 0.49540658883325145, "grad_norm": 0.8950662016868591, "learning_rate": 1.595092514171881e-05, "loss": 0.107, "step": 27974 }, { "epoch": 0.49542429837027985, "grad_norm": 0.4750869572162628, "learning_rate": 1.595006650616819e-05, "loss": 0.0801, "step": 27975 }, { "epoch": 0.4954420079073083, "grad_norm": 0.7467476725578308, "learning_rate": 1.5949207867491946e-05, "loss": 0.0833, "step": 27976 }, { "epoch": 0.4954597174443367, "grad_norm": 0.773202657699585, "learning_rate": 1.594834922569291e-05, "loss": 0.0745, "step": 27977 }, { "epoch": 0.49547742698136515, "grad_norm": 0.45081979036331177, "learning_rate": 1.5947490580773895e-05, "loss": 0.0744, "step": 27978 }, { "epoch": 0.49549513651839355, "grad_norm": 0.5070881843566895, "learning_rate": 1.5946631932737735e-05, "loss": 0.0789, "step": 27979 }, { "epoch": 0.495512846055422, "grad_norm": 0.3465934097766876, "learning_rate": 1.5945773281587252e-05, "loss": 0.0741, "step": 27980 }, { "epoch": 0.4955305555924504, "grad_norm": 0.8640896081924438, "learning_rate": 1.5944914627325274e-05, "loss": 0.0695, "step": 27981 }, { "epoch": 0.49554826512947886, "grad_norm": 0.529955267906189, "learning_rate": 1.5944055969954618e-05, "loss": 0.0938, "step": 27982 }, { "epoch": 0.49556597466650726, "grad_norm": 0.4060552716255188, "learning_rate": 1.5943197309478112e-05, "loss": 0.0642, "step": 27983 }, { "epoch": 0.4955836842035357, "grad_norm": 1.1181766986846924, "learning_rate": 1.5942338645898586e-05, "loss": 0.12, "step": 27984 }, { "epoch": 0.4956013937405641, "grad_norm": 0.7037898898124695, "learning_rate": 1.5941479979218858e-05, "loss": 0.0997, "step": 27985 }, { "epoch": 0.49561910327759257, "grad_norm": 0.8433256149291992, "learning_rate": 1.5940621309441758e-05, "loss": 0.0795, "step": 27986 }, { "epoch": 0.495636812814621, "grad_norm": 0.3487795889377594, "learning_rate": 1.5939762636570107e-05, "loss": 0.072, "step": 27987 }, { "epoch": 0.4956545223516494, "grad_norm": 0.7641366124153137, "learning_rate": 1.593890396060674e-05, "loss": 0.0576, "step": 27988 }, { "epoch": 0.49567223188867787, "grad_norm": 0.5653177499771118, "learning_rate": 1.5938045281554462e-05, "loss": 0.0663, "step": 27989 }, { "epoch": 0.49568994142570627, "grad_norm": 0.38755685091018677, "learning_rate": 1.5937186599416115e-05, "loss": 0.0594, "step": 27990 }, { "epoch": 0.4957076509627347, "grad_norm": 0.5992740392684937, "learning_rate": 1.593632791419452e-05, "loss": 0.0677, "step": 27991 }, { "epoch": 0.4957253604997631, "grad_norm": 1.4169484376907349, "learning_rate": 1.59354692258925e-05, "loss": 0.0892, "step": 27992 }, { "epoch": 0.4957430700367916, "grad_norm": 1.012881875038147, "learning_rate": 1.5934610534512883e-05, "loss": 0.1193, "step": 27993 }, { "epoch": 0.49576077957382, "grad_norm": 0.5179710388183594, "learning_rate": 1.5933751840058487e-05, "loss": 0.0971, "step": 27994 }, { "epoch": 0.49577848911084843, "grad_norm": 0.6749024987220764, "learning_rate": 1.5932893142532147e-05, "loss": 0.0964, "step": 27995 }, { "epoch": 0.49579619864787683, "grad_norm": 0.2972247302532196, "learning_rate": 1.593203444193668e-05, "loss": 0.0849, "step": 27996 }, { "epoch": 0.4958139081849053, "grad_norm": 0.5806611180305481, "learning_rate": 1.5931175738274915e-05, "loss": 0.0767, "step": 27997 }, { "epoch": 0.4958316177219337, "grad_norm": 0.4248620569705963, "learning_rate": 1.5930317031549675e-05, "loss": 0.0567, "step": 27998 }, { "epoch": 0.49584932725896214, "grad_norm": 0.5091578364372253, "learning_rate": 1.5929458321763787e-05, "loss": 0.0644, "step": 27999 }, { "epoch": 0.49586703679599053, "grad_norm": 0.43693891167640686, "learning_rate": 1.5928599608920076e-05, "loss": 0.0736, "step": 28000 }, { "epoch": 0.495884746333019, "grad_norm": 0.7019818425178528, "learning_rate": 1.5927740893021364e-05, "loss": 0.0482, "step": 28001 }, { "epoch": 0.49590245587004744, "grad_norm": 0.8513554334640503, "learning_rate": 1.592688217407048e-05, "loss": 0.0508, "step": 28002 }, { "epoch": 0.49592016540707584, "grad_norm": 0.5481004118919373, "learning_rate": 1.5926023452070248e-05, "loss": 0.08, "step": 28003 }, { "epoch": 0.4959378749441043, "grad_norm": 0.48400142788887024, "learning_rate": 1.592516472702349e-05, "loss": 0.053, "step": 28004 }, { "epoch": 0.4959555844811327, "grad_norm": 0.4483737051486969, "learning_rate": 1.592430599893304e-05, "loss": 0.0679, "step": 28005 }, { "epoch": 0.49597329401816115, "grad_norm": 0.6076057553291321, "learning_rate": 1.5923447267801707e-05, "loss": 0.066, "step": 28006 }, { "epoch": 0.49599100355518955, "grad_norm": 0.5593565106391907, "learning_rate": 1.5922588533632335e-05, "loss": 0.0768, "step": 28007 }, { "epoch": 0.496008713092218, "grad_norm": 1.0118298530578613, "learning_rate": 1.5921729796427733e-05, "loss": 0.0802, "step": 28008 }, { "epoch": 0.4960264226292464, "grad_norm": 0.5326526165008545, "learning_rate": 1.5920871056190735e-05, "loss": 0.0713, "step": 28009 }, { "epoch": 0.49604413216627485, "grad_norm": 0.7575428485870361, "learning_rate": 1.5920012312924165e-05, "loss": 0.0682, "step": 28010 }, { "epoch": 0.49606184170330325, "grad_norm": 0.6931931376457214, "learning_rate": 1.5919153566630852e-05, "loss": 0.0444, "step": 28011 }, { "epoch": 0.4960795512403317, "grad_norm": 0.6268678307533264, "learning_rate": 1.5918294817313607e-05, "loss": 0.0541, "step": 28012 }, { "epoch": 0.4960972607773601, "grad_norm": 0.8587478995323181, "learning_rate": 1.591743606497527e-05, "loss": 0.0708, "step": 28013 }, { "epoch": 0.49611497031438856, "grad_norm": 0.6570829153060913, "learning_rate": 1.5916577309618667e-05, "loss": 0.0961, "step": 28014 }, { "epoch": 0.49613267985141696, "grad_norm": 0.41431137919425964, "learning_rate": 1.591571855124661e-05, "loss": 0.0826, "step": 28015 }, { "epoch": 0.4961503893884454, "grad_norm": 0.8886555433273315, "learning_rate": 1.5914859789861934e-05, "loss": 0.091, "step": 28016 }, { "epoch": 0.49616809892547387, "grad_norm": 1.1551682949066162, "learning_rate": 1.591400102546746e-05, "loss": 0.075, "step": 28017 }, { "epoch": 0.49618580846250226, "grad_norm": 0.7959297299385071, "learning_rate": 1.5913142258066016e-05, "loss": 0.0905, "step": 28018 }, { "epoch": 0.4962035179995307, "grad_norm": 0.7138158082962036, "learning_rate": 1.5912283487660426e-05, "loss": 0.0825, "step": 28019 }, { "epoch": 0.4962212275365591, "grad_norm": 0.6516563892364502, "learning_rate": 1.5911424714253516e-05, "loss": 0.0663, "step": 28020 }, { "epoch": 0.49623893707358757, "grad_norm": 0.6093845963478088, "learning_rate": 1.5910565937848114e-05, "loss": 0.0973, "step": 28021 }, { "epoch": 0.49625664661061597, "grad_norm": 0.6537194848060608, "learning_rate": 1.5909707158447037e-05, "loss": 0.0581, "step": 28022 }, { "epoch": 0.4962743561476444, "grad_norm": 0.424640029668808, "learning_rate": 1.590884837605312e-05, "loss": 0.0838, "step": 28023 }, { "epoch": 0.4962920656846728, "grad_norm": 0.38341382145881653, "learning_rate": 1.590798959066918e-05, "loss": 0.0615, "step": 28024 }, { "epoch": 0.4963097752217013, "grad_norm": 0.6083632111549377, "learning_rate": 1.5907130802298048e-05, "loss": 0.0548, "step": 28025 }, { "epoch": 0.4963274847587297, "grad_norm": 0.8940140008926392, "learning_rate": 1.5906272010942548e-05, "loss": 0.111, "step": 28026 }, { "epoch": 0.49634519429575813, "grad_norm": 0.8915613293647766, "learning_rate": 1.5905413216605502e-05, "loss": 0.083, "step": 28027 }, { "epoch": 0.49636290383278653, "grad_norm": 0.9187387228012085, "learning_rate": 1.5904554419289743e-05, "loss": 0.0936, "step": 28028 }, { "epoch": 0.496380613369815, "grad_norm": 0.806286633014679, "learning_rate": 1.5903695618998086e-05, "loss": 0.0944, "step": 28029 }, { "epoch": 0.4963983229068434, "grad_norm": 1.4900554418563843, "learning_rate": 1.5902836815733364e-05, "loss": 0.0621, "step": 28030 }, { "epoch": 0.49641603244387184, "grad_norm": 0.7031517028808594, "learning_rate": 1.59019780094984e-05, "loss": 0.0693, "step": 28031 }, { "epoch": 0.4964337419809003, "grad_norm": 0.40460658073425293, "learning_rate": 1.5901119200296022e-05, "loss": 0.0747, "step": 28032 }, { "epoch": 0.4964514515179287, "grad_norm": 2.3277883529663086, "learning_rate": 1.5900260388129048e-05, "loss": 0.0524, "step": 28033 }, { "epoch": 0.49646916105495714, "grad_norm": 0.9371119737625122, "learning_rate": 1.5899401573000315e-05, "loss": 0.118, "step": 28034 }, { "epoch": 0.49648687059198554, "grad_norm": 0.4667857587337494, "learning_rate": 1.5898542754912634e-05, "loss": 0.0652, "step": 28035 }, { "epoch": 0.496504580129014, "grad_norm": 0.61390620470047, "learning_rate": 1.5897683933868846e-05, "loss": 0.0727, "step": 28036 }, { "epoch": 0.4965222896660424, "grad_norm": 0.37725701928138733, "learning_rate": 1.5896825109871766e-05, "loss": 0.0669, "step": 28037 }, { "epoch": 0.49653999920307085, "grad_norm": 0.6826726198196411, "learning_rate": 1.5895966282924216e-05, "loss": 0.0786, "step": 28038 }, { "epoch": 0.49655770874009925, "grad_norm": 0.8257824778556824, "learning_rate": 1.5895107453029035e-05, "loss": 0.0436, "step": 28039 }, { "epoch": 0.4965754182771277, "grad_norm": 0.4876299202442169, "learning_rate": 1.589424862018904e-05, "loss": 0.0479, "step": 28040 }, { "epoch": 0.4965931278141561, "grad_norm": 0.8705973029136658, "learning_rate": 1.5893389784407054e-05, "loss": 0.1077, "step": 28041 }, { "epoch": 0.49661083735118455, "grad_norm": 0.5812385082244873, "learning_rate": 1.5892530945685905e-05, "loss": 0.0604, "step": 28042 }, { "epoch": 0.49662854688821295, "grad_norm": 0.5817359089851379, "learning_rate": 1.589167210402843e-05, "loss": 0.0738, "step": 28043 }, { "epoch": 0.4966462564252414, "grad_norm": 0.5489839911460876, "learning_rate": 1.5890813259437436e-05, "loss": 0.0854, "step": 28044 }, { "epoch": 0.4966639659622698, "grad_norm": 1.1243032217025757, "learning_rate": 1.5889954411915754e-05, "loss": 0.0906, "step": 28045 }, { "epoch": 0.49668167549929826, "grad_norm": 0.5004205107688904, "learning_rate": 1.5889095561466217e-05, "loss": 0.0695, "step": 28046 }, { "epoch": 0.4966993850363267, "grad_norm": 0.49576228857040405, "learning_rate": 1.588823670809164e-05, "loss": 0.0967, "step": 28047 }, { "epoch": 0.4967170945733551, "grad_norm": 0.678213357925415, "learning_rate": 1.5887377851794858e-05, "loss": 0.0533, "step": 28048 }, { "epoch": 0.49673480411038357, "grad_norm": 0.7034148573875427, "learning_rate": 1.5886518992578693e-05, "loss": 0.0509, "step": 28049 }, { "epoch": 0.49675251364741196, "grad_norm": 0.5850976705551147, "learning_rate": 1.5885660130445974e-05, "loss": 0.0654, "step": 28050 }, { "epoch": 0.4967702231844404, "grad_norm": 0.5038760304450989, "learning_rate": 1.588480126539951e-05, "loss": 0.0837, "step": 28051 }, { "epoch": 0.4967879327214688, "grad_norm": 0.6885361671447754, "learning_rate": 1.5883942397442154e-05, "loss": 0.0732, "step": 28052 }, { "epoch": 0.49680564225849727, "grad_norm": 0.8869557976722717, "learning_rate": 1.5883083526576708e-05, "loss": 0.0607, "step": 28053 }, { "epoch": 0.49682335179552567, "grad_norm": 0.35819023847579956, "learning_rate": 1.588222465280601e-05, "loss": 0.0514, "step": 28054 }, { "epoch": 0.4968410613325541, "grad_norm": 0.5113514065742493, "learning_rate": 1.588136577613288e-05, "loss": 0.0625, "step": 28055 }, { "epoch": 0.4968587708695825, "grad_norm": 0.7100242972373962, "learning_rate": 1.5880506896560147e-05, "loss": 0.0847, "step": 28056 }, { "epoch": 0.496876480406611, "grad_norm": 0.8022750616073608, "learning_rate": 1.5879648014090635e-05, "loss": 0.0382, "step": 28057 }, { "epoch": 0.4968941899436394, "grad_norm": 0.6741389036178589, "learning_rate": 1.587878912872717e-05, "loss": 0.0524, "step": 28058 }, { "epoch": 0.49691189948066783, "grad_norm": 0.4632749855518341, "learning_rate": 1.587793024047258e-05, "loss": 0.0576, "step": 28059 }, { "epoch": 0.4969296090176962, "grad_norm": 0.43558263778686523, "learning_rate": 1.5877071349329684e-05, "loss": 0.0538, "step": 28060 }, { "epoch": 0.4969473185547247, "grad_norm": 0.9488440752029419, "learning_rate": 1.5876212455301312e-05, "loss": 0.0935, "step": 28061 }, { "epoch": 0.49696502809175314, "grad_norm": 0.6927636861801147, "learning_rate": 1.5875353558390294e-05, "loss": 0.0588, "step": 28062 }, { "epoch": 0.49698273762878153, "grad_norm": 0.7118231058120728, "learning_rate": 1.587449465859945e-05, "loss": 0.0615, "step": 28063 }, { "epoch": 0.49700044716581, "grad_norm": 0.5744582414627075, "learning_rate": 1.587363575593161e-05, "loss": 0.0844, "step": 28064 }, { "epoch": 0.4970181567028384, "grad_norm": 0.746245265007019, "learning_rate": 1.587277685038959e-05, "loss": 0.0662, "step": 28065 }, { "epoch": 0.49703586623986684, "grad_norm": 0.5502920746803284, "learning_rate": 1.5871917941976223e-05, "loss": 0.0639, "step": 28066 }, { "epoch": 0.49705357577689524, "grad_norm": 0.6857590675354004, "learning_rate": 1.5871059030694335e-05, "loss": 0.0575, "step": 28067 }, { "epoch": 0.4970712853139237, "grad_norm": 6.428872108459473, "learning_rate": 1.5870200116546756e-05, "loss": 0.0602, "step": 28068 }, { "epoch": 0.4970889948509521, "grad_norm": 1.0153965950012207, "learning_rate": 1.5869341199536303e-05, "loss": 0.096, "step": 28069 }, { "epoch": 0.49710670438798055, "grad_norm": 1.0225164890289307, "learning_rate": 1.5868482279665803e-05, "loss": 0.0794, "step": 28070 }, { "epoch": 0.49712441392500895, "grad_norm": 0.5294989347457886, "learning_rate": 1.5867623356938085e-05, "loss": 0.0792, "step": 28071 }, { "epoch": 0.4971421234620374, "grad_norm": 0.7436168193817139, "learning_rate": 1.5866764431355977e-05, "loss": 0.0764, "step": 28072 }, { "epoch": 0.4971598329990658, "grad_norm": 0.45384326577186584, "learning_rate": 1.5865905502922303e-05, "loss": 0.0534, "step": 28073 }, { "epoch": 0.49717754253609425, "grad_norm": 0.49585941433906555, "learning_rate": 1.5865046571639884e-05, "loss": 0.0724, "step": 28074 }, { "epoch": 0.49719525207312265, "grad_norm": 0.7109377384185791, "learning_rate": 1.5864187637511546e-05, "loss": 0.099, "step": 28075 }, { "epoch": 0.4972129616101511, "grad_norm": 0.8610849380493164, "learning_rate": 1.5863328700540125e-05, "loss": 0.0914, "step": 28076 }, { "epoch": 0.49723067114717956, "grad_norm": 0.6060336828231812, "learning_rate": 1.5862469760728436e-05, "loss": 0.1014, "step": 28077 }, { "epoch": 0.49724838068420796, "grad_norm": 0.8226684331893921, "learning_rate": 1.5861610818079313e-05, "loss": 0.0958, "step": 28078 }, { "epoch": 0.4972660902212364, "grad_norm": 0.7275616526603699, "learning_rate": 1.5860751872595573e-05, "loss": 0.0563, "step": 28079 }, { "epoch": 0.4972837997582648, "grad_norm": 0.5668635368347168, "learning_rate": 1.5859892924280047e-05, "loss": 0.0822, "step": 28080 }, { "epoch": 0.49730150929529326, "grad_norm": 0.43327897787094116, "learning_rate": 1.585903397313556e-05, "loss": 0.0571, "step": 28081 }, { "epoch": 0.49731921883232166, "grad_norm": 0.6139699816703796, "learning_rate": 1.5858175019164935e-05, "loss": 0.0709, "step": 28082 }, { "epoch": 0.4973369283693501, "grad_norm": 0.793927788734436, "learning_rate": 1.585731606237101e-05, "loss": 0.1242, "step": 28083 }, { "epoch": 0.4973546379063785, "grad_norm": 0.39036083221435547, "learning_rate": 1.585645710275659e-05, "loss": 0.0695, "step": 28084 }, { "epoch": 0.49737234744340697, "grad_norm": 1.364079236984253, "learning_rate": 1.5855598140324522e-05, "loss": 0.1122, "step": 28085 }, { "epoch": 0.49739005698043537, "grad_norm": 0.760172426700592, "learning_rate": 1.585473917507762e-05, "loss": 0.0992, "step": 28086 }, { "epoch": 0.4974077665174638, "grad_norm": 0.6233843564987183, "learning_rate": 1.5853880207018714e-05, "loss": 0.0715, "step": 28087 }, { "epoch": 0.4974254760544922, "grad_norm": 1.1615351438522339, "learning_rate": 1.5853021236150627e-05, "loss": 0.0756, "step": 28088 }, { "epoch": 0.4974431855915207, "grad_norm": 0.9907670617103577, "learning_rate": 1.5852162262476184e-05, "loss": 0.0833, "step": 28089 }, { "epoch": 0.4974608951285491, "grad_norm": 0.4750553071498871, "learning_rate": 1.5851303285998215e-05, "loss": 0.053, "step": 28090 }, { "epoch": 0.49747860466557753, "grad_norm": 0.6989678740501404, "learning_rate": 1.5850444306719548e-05, "loss": 0.1092, "step": 28091 }, { "epoch": 0.497496314202606, "grad_norm": 0.2657918930053711, "learning_rate": 1.5849585324643006e-05, "loss": 0.0415, "step": 28092 }, { "epoch": 0.4975140237396344, "grad_norm": 0.3348248600959778, "learning_rate": 1.5848726339771408e-05, "loss": 0.0685, "step": 28093 }, { "epoch": 0.49753173327666284, "grad_norm": 1.2916675806045532, "learning_rate": 1.584786735210759e-05, "loss": 0.1039, "step": 28094 }, { "epoch": 0.49754944281369123, "grad_norm": 0.6889427900314331, "learning_rate": 1.584700836165437e-05, "loss": 0.1022, "step": 28095 }, { "epoch": 0.4975671523507197, "grad_norm": 0.2763056457042694, "learning_rate": 1.5846149368414583e-05, "loss": 0.0673, "step": 28096 }, { "epoch": 0.4975848618877481, "grad_norm": 0.8795926570892334, "learning_rate": 1.584529037239105e-05, "loss": 0.0899, "step": 28097 }, { "epoch": 0.49760257142477654, "grad_norm": 0.4630257189273834, "learning_rate": 1.5844431373586594e-05, "loss": 0.0529, "step": 28098 }, { "epoch": 0.49762028096180494, "grad_norm": 0.5793963670730591, "learning_rate": 1.5843572372004045e-05, "loss": 0.0618, "step": 28099 }, { "epoch": 0.4976379904988334, "grad_norm": 0.567832350730896, "learning_rate": 1.584271336764623e-05, "loss": 0.0676, "step": 28100 }, { "epoch": 0.4976557000358618, "grad_norm": 0.6609420776367188, "learning_rate": 1.584185436051597e-05, "loss": 0.069, "step": 28101 }, { "epoch": 0.49767340957289025, "grad_norm": 0.9803524613380432, "learning_rate": 1.58409953506161e-05, "loss": 0.0856, "step": 28102 }, { "epoch": 0.49769111910991864, "grad_norm": 0.911831259727478, "learning_rate": 1.5840136337949435e-05, "loss": 0.0563, "step": 28103 }, { "epoch": 0.4977088286469471, "grad_norm": 0.5695269107818604, "learning_rate": 1.5839277322518806e-05, "loss": 0.0595, "step": 28104 }, { "epoch": 0.49772653818397555, "grad_norm": 0.7683839797973633, "learning_rate": 1.5838418304327046e-05, "loss": 0.0677, "step": 28105 }, { "epoch": 0.49774424772100395, "grad_norm": 0.5290153622627258, "learning_rate": 1.5837559283376967e-05, "loss": 0.0809, "step": 28106 }, { "epoch": 0.4977619572580324, "grad_norm": 0.733025312423706, "learning_rate": 1.5836700259671408e-05, "loss": 0.0714, "step": 28107 }, { "epoch": 0.4977796667950608, "grad_norm": 0.6367095112800598, "learning_rate": 1.583584123321319e-05, "loss": 0.0615, "step": 28108 }, { "epoch": 0.49779737633208926, "grad_norm": 0.5802415013313293, "learning_rate": 1.583498220400513e-05, "loss": 0.0711, "step": 28109 }, { "epoch": 0.49781508586911766, "grad_norm": 0.35888877511024475, "learning_rate": 1.5834123172050075e-05, "loss": 0.0735, "step": 28110 }, { "epoch": 0.4978327954061461, "grad_norm": 0.6667577624320984, "learning_rate": 1.583326413735083e-05, "loss": 0.0941, "step": 28111 }, { "epoch": 0.4978505049431745, "grad_norm": 0.6923448443412781, "learning_rate": 1.5832405099910238e-05, "loss": 0.0934, "step": 28112 }, { "epoch": 0.49786821448020296, "grad_norm": 0.5068411231040955, "learning_rate": 1.5831546059731108e-05, "loss": 0.0618, "step": 28113 }, { "epoch": 0.49788592401723136, "grad_norm": 0.7489765286445618, "learning_rate": 1.5830687016816286e-05, "loss": 0.1106, "step": 28114 }, { "epoch": 0.4979036335542598, "grad_norm": 0.6122783422470093, "learning_rate": 1.5829827971168583e-05, "loss": 0.108, "step": 28115 }, { "epoch": 0.4979213430912882, "grad_norm": 0.82246994972229, "learning_rate": 1.5828968922790828e-05, "loss": 0.1034, "step": 28116 }, { "epoch": 0.49793905262831667, "grad_norm": 0.8318877816200256, "learning_rate": 1.582810987168585e-05, "loss": 0.0923, "step": 28117 }, { "epoch": 0.49795676216534507, "grad_norm": 0.41834545135498047, "learning_rate": 1.5827250817856474e-05, "loss": 0.1009, "step": 28118 }, { "epoch": 0.4979744717023735, "grad_norm": 0.6415014266967773, "learning_rate": 1.582639176130553e-05, "loss": 0.079, "step": 28119 }, { "epoch": 0.497992181239402, "grad_norm": 0.527282178401947, "learning_rate": 1.5825532702035835e-05, "loss": 0.0625, "step": 28120 }, { "epoch": 0.4980098907764304, "grad_norm": 1.2836371660232544, "learning_rate": 1.582467364005023e-05, "loss": 0.1224, "step": 28121 }, { "epoch": 0.49802760031345883, "grad_norm": 0.5596021413803101, "learning_rate": 1.5823814575351525e-05, "loss": 0.0588, "step": 28122 }, { "epoch": 0.49804530985048723, "grad_norm": 0.9468501806259155, "learning_rate": 1.5822955507942552e-05, "loss": 0.1069, "step": 28123 }, { "epoch": 0.4980630193875157, "grad_norm": 0.6874371767044067, "learning_rate": 1.5822096437826144e-05, "loss": 0.0797, "step": 28124 }, { "epoch": 0.4980807289245441, "grad_norm": 0.4841632843017578, "learning_rate": 1.5821237365005116e-05, "loss": 0.0643, "step": 28125 }, { "epoch": 0.49809843846157253, "grad_norm": 0.7661231160163879, "learning_rate": 1.5820378289482307e-05, "loss": 0.0771, "step": 28126 }, { "epoch": 0.49811614799860093, "grad_norm": 0.28955215215682983, "learning_rate": 1.5819519211260532e-05, "loss": 0.0489, "step": 28127 }, { "epoch": 0.4981338575356294, "grad_norm": 0.6514561772346497, "learning_rate": 1.5818660130342618e-05, "loss": 0.0446, "step": 28128 }, { "epoch": 0.4981515670726578, "grad_norm": 0.34213393926620483, "learning_rate": 1.58178010467314e-05, "loss": 0.0729, "step": 28129 }, { "epoch": 0.49816927660968624, "grad_norm": 0.7505307197570801, "learning_rate": 1.58169419604297e-05, "loss": 0.1036, "step": 28130 }, { "epoch": 0.49818698614671464, "grad_norm": 0.5362306833267212, "learning_rate": 1.5816082871440344e-05, "loss": 0.0782, "step": 28131 }, { "epoch": 0.4982046956837431, "grad_norm": 0.7371702194213867, "learning_rate": 1.581522377976615e-05, "loss": 0.067, "step": 28132 }, { "epoch": 0.4982224052207715, "grad_norm": 0.6845751404762268, "learning_rate": 1.5814364685409964e-05, "loss": 0.0767, "step": 28133 }, { "epoch": 0.49824011475779995, "grad_norm": 0.8436746001243591, "learning_rate": 1.5813505588374595e-05, "loss": 0.0695, "step": 28134 }, { "epoch": 0.4982578242948284, "grad_norm": 0.8290258646011353, "learning_rate": 1.5812646488662873e-05, "loss": 0.0629, "step": 28135 }, { "epoch": 0.4982755338318568, "grad_norm": 0.39877399802207947, "learning_rate": 1.581178738627763e-05, "loss": 0.0782, "step": 28136 }, { "epoch": 0.49829324336888525, "grad_norm": 0.5854843258857727, "learning_rate": 1.5810928281221686e-05, "loss": 0.0442, "step": 28137 }, { "epoch": 0.49831095290591365, "grad_norm": 0.7280228137969971, "learning_rate": 1.581006917349787e-05, "loss": 0.0772, "step": 28138 }, { "epoch": 0.4983286624429421, "grad_norm": 1.112626552581787, "learning_rate": 1.5809210063109008e-05, "loss": 0.0822, "step": 28139 }, { "epoch": 0.4983463719799705, "grad_norm": 0.7732198238372803, "learning_rate": 1.580835095005793e-05, "loss": 0.065, "step": 28140 }, { "epoch": 0.49836408151699896, "grad_norm": 0.6834312677383423, "learning_rate": 1.5807491834347456e-05, "loss": 0.0852, "step": 28141 }, { "epoch": 0.49838179105402736, "grad_norm": 0.6937406063079834, "learning_rate": 1.5806632715980412e-05, "loss": 0.0687, "step": 28142 }, { "epoch": 0.4983995005910558, "grad_norm": 0.7448128461837769, "learning_rate": 1.580577359495963e-05, "loss": 0.0852, "step": 28143 }, { "epoch": 0.4984172101280842, "grad_norm": 0.6473994255065918, "learning_rate": 1.5804914471287943e-05, "loss": 0.0622, "step": 28144 }, { "epoch": 0.49843491966511266, "grad_norm": 0.7457244396209717, "learning_rate": 1.580405534496816e-05, "loss": 0.0955, "step": 28145 }, { "epoch": 0.49845262920214106, "grad_norm": 0.9856900572776794, "learning_rate": 1.5803196216003116e-05, "loss": 0.1005, "step": 28146 }, { "epoch": 0.4984703387391695, "grad_norm": 0.8002992868423462, "learning_rate": 1.5802337084395646e-05, "loss": 0.078, "step": 28147 }, { "epoch": 0.4984880482761979, "grad_norm": 0.6003637909889221, "learning_rate": 1.580147795014856e-05, "loss": 0.0632, "step": 28148 }, { "epoch": 0.49850575781322637, "grad_norm": 0.7011313438415527, "learning_rate": 1.58006188132647e-05, "loss": 0.0988, "step": 28149 }, { "epoch": 0.4985234673502548, "grad_norm": 0.8193155527114868, "learning_rate": 1.5799759673746878e-05, "loss": 0.0668, "step": 28150 }, { "epoch": 0.4985411768872832, "grad_norm": 0.2275552600622177, "learning_rate": 1.5798900531597927e-05, "loss": 0.0789, "step": 28151 }, { "epoch": 0.4985588864243117, "grad_norm": 0.8619982004165649, "learning_rate": 1.5798041386820678e-05, "loss": 0.1052, "step": 28152 }, { "epoch": 0.4985765959613401, "grad_norm": 0.44760632514953613, "learning_rate": 1.579718223941795e-05, "loss": 0.0777, "step": 28153 }, { "epoch": 0.49859430549836853, "grad_norm": 0.6261985301971436, "learning_rate": 1.5796323089392577e-05, "loss": 0.0666, "step": 28154 }, { "epoch": 0.4986120150353969, "grad_norm": 0.6929695010185242, "learning_rate": 1.579546393674738e-05, "loss": 0.0654, "step": 28155 }, { "epoch": 0.4986297245724254, "grad_norm": 0.3950730860233307, "learning_rate": 1.5794604781485188e-05, "loss": 0.0687, "step": 28156 }, { "epoch": 0.4986474341094538, "grad_norm": 0.6611356735229492, "learning_rate": 1.5793745623608825e-05, "loss": 0.0746, "step": 28157 }, { "epoch": 0.49866514364648223, "grad_norm": 1.0601897239685059, "learning_rate": 1.579288646312112e-05, "loss": 0.0785, "step": 28158 }, { "epoch": 0.49868285318351063, "grad_norm": 0.584740400314331, "learning_rate": 1.5792027300024902e-05, "loss": 0.0568, "step": 28159 }, { "epoch": 0.4987005627205391, "grad_norm": 0.6296530961990356, "learning_rate": 1.579116813432299e-05, "loss": 0.0651, "step": 28160 }, { "epoch": 0.4987182722575675, "grad_norm": 0.7951127290725708, "learning_rate": 1.5790308966018212e-05, "loss": 0.0833, "step": 28161 }, { "epoch": 0.49873598179459594, "grad_norm": 0.6547711491584778, "learning_rate": 1.57894497951134e-05, "loss": 0.0925, "step": 28162 }, { "epoch": 0.49875369133162434, "grad_norm": 0.9622015953063965, "learning_rate": 1.5788590621611384e-05, "loss": 0.0661, "step": 28163 }, { "epoch": 0.4987714008686528, "grad_norm": 0.8926600813865662, "learning_rate": 1.578773144551498e-05, "loss": 0.0872, "step": 28164 }, { "epoch": 0.49878911040568125, "grad_norm": 0.9358741044998169, "learning_rate": 1.578687226682702e-05, "loss": 0.0811, "step": 28165 }, { "epoch": 0.49880681994270964, "grad_norm": 0.7193470597267151, "learning_rate": 1.5786013085550324e-05, "loss": 0.1034, "step": 28166 }, { "epoch": 0.4988245294797381, "grad_norm": 0.9262775182723999, "learning_rate": 1.5785153901687732e-05, "loss": 0.0876, "step": 28167 }, { "epoch": 0.4988422390167665, "grad_norm": 0.40329572558403015, "learning_rate": 1.5784294715242062e-05, "loss": 0.0809, "step": 28168 }, { "epoch": 0.49885994855379495, "grad_norm": 0.6038287281990051, "learning_rate": 1.578343552621614e-05, "loss": 0.0803, "step": 28169 }, { "epoch": 0.49887765809082335, "grad_norm": 0.8017048835754395, "learning_rate": 1.5782576334612795e-05, "loss": 0.0834, "step": 28170 }, { "epoch": 0.4988953676278518, "grad_norm": 0.7910131812095642, "learning_rate": 1.578171714043485e-05, "loss": 0.0654, "step": 28171 }, { "epoch": 0.4989130771648802, "grad_norm": 0.4834287464618683, "learning_rate": 1.5780857943685138e-05, "loss": 0.0685, "step": 28172 }, { "epoch": 0.49893078670190866, "grad_norm": 0.43786659836769104, "learning_rate": 1.5779998744366482e-05, "loss": 0.0787, "step": 28173 }, { "epoch": 0.49894849623893706, "grad_norm": 0.4719901382923126, "learning_rate": 1.5779139542481712e-05, "loss": 0.0719, "step": 28174 }, { "epoch": 0.4989662057759655, "grad_norm": 0.44828104972839355, "learning_rate": 1.577828033803364e-05, "loss": 0.0623, "step": 28175 }, { "epoch": 0.4989839153129939, "grad_norm": 0.9148560166358948, "learning_rate": 1.5777421131025118e-05, "loss": 0.0686, "step": 28176 }, { "epoch": 0.49900162485002236, "grad_norm": 0.5285899639129639, "learning_rate": 1.577656192145895e-05, "loss": 0.0761, "step": 28177 }, { "epoch": 0.49901933438705076, "grad_norm": 0.42091846466064453, "learning_rate": 1.577570270933798e-05, "loss": 0.0541, "step": 28178 }, { "epoch": 0.4990370439240792, "grad_norm": 0.48062050342559814, "learning_rate": 1.577484349466502e-05, "loss": 0.0659, "step": 28179 }, { "epoch": 0.49905475346110767, "grad_norm": 0.6227712631225586, "learning_rate": 1.5773984277442906e-05, "loss": 0.0756, "step": 28180 }, { "epoch": 0.49907246299813607, "grad_norm": 0.4414641559123993, "learning_rate": 1.577312505767446e-05, "loss": 0.0639, "step": 28181 }, { "epoch": 0.4990901725351645, "grad_norm": 0.5810233950614929, "learning_rate": 1.5772265835362516e-05, "loss": 0.0836, "step": 28182 }, { "epoch": 0.4991078820721929, "grad_norm": 0.2939499318599701, "learning_rate": 1.5771406610509892e-05, "loss": 0.0633, "step": 28183 }, { "epoch": 0.4991255916092214, "grad_norm": 0.6721945405006409, "learning_rate": 1.577054738311942e-05, "loss": 0.0516, "step": 28184 }, { "epoch": 0.4991433011462498, "grad_norm": 0.5754801630973816, "learning_rate": 1.5769688153193918e-05, "loss": 0.0577, "step": 28185 }, { "epoch": 0.49916101068327823, "grad_norm": 0.47968432307243347, "learning_rate": 1.5768828920736222e-05, "loss": 0.0829, "step": 28186 }, { "epoch": 0.4991787202203066, "grad_norm": 0.47729650139808655, "learning_rate": 1.5767969685749164e-05, "loss": 0.0972, "step": 28187 }, { "epoch": 0.4991964297573351, "grad_norm": 0.46285882592201233, "learning_rate": 1.576711044823556e-05, "loss": 0.0586, "step": 28188 }, { "epoch": 0.4992141392943635, "grad_norm": 0.49216845631599426, "learning_rate": 1.576625120819824e-05, "loss": 0.0584, "step": 28189 }, { "epoch": 0.49923184883139193, "grad_norm": 0.8471779823303223, "learning_rate": 1.576539196564003e-05, "loss": 0.0988, "step": 28190 }, { "epoch": 0.49924955836842033, "grad_norm": 0.8028319478034973, "learning_rate": 1.5764532720563754e-05, "loss": 0.0875, "step": 28191 }, { "epoch": 0.4992672679054488, "grad_norm": 0.3884343206882477, "learning_rate": 1.576367347297225e-05, "loss": 0.0583, "step": 28192 }, { "epoch": 0.4992849774424772, "grad_norm": 0.6024838089942932, "learning_rate": 1.576281422286833e-05, "loss": 0.0623, "step": 28193 }, { "epoch": 0.49930268697950564, "grad_norm": 0.468235045671463, "learning_rate": 1.5761954970254835e-05, "loss": 0.0634, "step": 28194 }, { "epoch": 0.4993203965165341, "grad_norm": 0.3728763461112976, "learning_rate": 1.5761095715134586e-05, "loss": 0.0767, "step": 28195 }, { "epoch": 0.4993381060535625, "grad_norm": 0.579928457736969, "learning_rate": 1.5760236457510403e-05, "loss": 0.0623, "step": 28196 }, { "epoch": 0.49935581559059095, "grad_norm": 0.5415984392166138, "learning_rate": 1.5759377197385122e-05, "loss": 0.0789, "step": 28197 }, { "epoch": 0.49937352512761934, "grad_norm": 0.5035886764526367, "learning_rate": 1.5758517934761567e-05, "loss": 0.0611, "step": 28198 }, { "epoch": 0.4993912346646478, "grad_norm": 0.543085515499115, "learning_rate": 1.5757658669642562e-05, "loss": 0.0661, "step": 28199 }, { "epoch": 0.4994089442016762, "grad_norm": 0.7601874470710754, "learning_rate": 1.575679940203094e-05, "loss": 0.0743, "step": 28200 }, { "epoch": 0.49942665373870465, "grad_norm": 0.37367796897888184, "learning_rate": 1.5755940131929527e-05, "loss": 0.0635, "step": 28201 }, { "epoch": 0.49944436327573305, "grad_norm": 0.3658351004123688, "learning_rate": 1.5755080859341142e-05, "loss": 0.0568, "step": 28202 }, { "epoch": 0.4994620728127615, "grad_norm": 1.2078219652175903, "learning_rate": 1.5754221584268622e-05, "loss": 0.1049, "step": 28203 }, { "epoch": 0.4994797823497899, "grad_norm": 0.6959379315376282, "learning_rate": 1.5753362306714784e-05, "loss": 0.0842, "step": 28204 }, { "epoch": 0.49949749188681836, "grad_norm": 0.321403831243515, "learning_rate": 1.5752503026682463e-05, "loss": 0.028, "step": 28205 }, { "epoch": 0.49951520142384676, "grad_norm": 0.40873983502388, "learning_rate": 1.575164374417448e-05, "loss": 0.0506, "step": 28206 }, { "epoch": 0.4995329109608752, "grad_norm": 0.4214757978916168, "learning_rate": 1.575078445919367e-05, "loss": 0.0776, "step": 28207 }, { "epoch": 0.4995506204979036, "grad_norm": 0.4536721110343933, "learning_rate": 1.574992517174285e-05, "loss": 0.0769, "step": 28208 }, { "epoch": 0.49956833003493206, "grad_norm": 0.4418588876724243, "learning_rate": 1.5749065881824855e-05, "loss": 0.0912, "step": 28209 }, { "epoch": 0.4995860395719605, "grad_norm": 0.7147939801216125, "learning_rate": 1.5748206589442512e-05, "loss": 0.0705, "step": 28210 }, { "epoch": 0.4996037491089889, "grad_norm": 0.4971655607223511, "learning_rate": 1.5747347294598645e-05, "loss": 0.0487, "step": 28211 }, { "epoch": 0.49962145864601737, "grad_norm": 0.9409638047218323, "learning_rate": 1.5746487997296074e-05, "loss": 0.0906, "step": 28212 }, { "epoch": 0.49963916818304577, "grad_norm": 0.5364806056022644, "learning_rate": 1.574562869753764e-05, "loss": 0.0684, "step": 28213 }, { "epoch": 0.4996568777200742, "grad_norm": 0.5722295045852661, "learning_rate": 1.5744769395326154e-05, "loss": 0.0808, "step": 28214 }, { "epoch": 0.4996745872571026, "grad_norm": 0.6929754614830017, "learning_rate": 1.5743910090664463e-05, "loss": 0.0947, "step": 28215 }, { "epoch": 0.4996922967941311, "grad_norm": 0.7116976380348206, "learning_rate": 1.5743050783555375e-05, "loss": 0.0961, "step": 28216 }, { "epoch": 0.4997100063311595, "grad_norm": 0.7596457600593567, "learning_rate": 1.5742191474001733e-05, "loss": 0.0806, "step": 28217 }, { "epoch": 0.4997277158681879, "grad_norm": 0.5138882398605347, "learning_rate": 1.574133216200635e-05, "loss": 0.0557, "step": 28218 }, { "epoch": 0.4997454254052163, "grad_norm": 0.552455723285675, "learning_rate": 1.574047284757206e-05, "loss": 0.065, "step": 28219 }, { "epoch": 0.4997631349422448, "grad_norm": 0.658268928527832, "learning_rate": 1.573961353070169e-05, "loss": 0.0783, "step": 28220 }, { "epoch": 0.4997808444792732, "grad_norm": 0.43246790766716003, "learning_rate": 1.5738754211398065e-05, "loss": 0.0662, "step": 28221 }, { "epoch": 0.49979855401630163, "grad_norm": 0.7961422204971313, "learning_rate": 1.5737894889664018e-05, "loss": 0.0903, "step": 28222 }, { "epoch": 0.49981626355333003, "grad_norm": 0.4379541873931885, "learning_rate": 1.573703556550236e-05, "loss": 0.0791, "step": 28223 }, { "epoch": 0.4998339730903585, "grad_norm": 0.6472674012184143, "learning_rate": 1.573617623891594e-05, "loss": 0.0831, "step": 28224 }, { "epoch": 0.49985168262738694, "grad_norm": 0.7674142718315125, "learning_rate": 1.573531690990757e-05, "loss": 0.0592, "step": 28225 }, { "epoch": 0.49986939216441534, "grad_norm": 0.8883934617042542, "learning_rate": 1.573445757848009e-05, "loss": 0.0683, "step": 28226 }, { "epoch": 0.4998871017014438, "grad_norm": 0.8749291300773621, "learning_rate": 1.5733598244636308e-05, "loss": 0.0843, "step": 28227 }, { "epoch": 0.4999048112384722, "grad_norm": 0.5764446258544922, "learning_rate": 1.5732738908379068e-05, "loss": 0.0632, "step": 28228 }, { "epoch": 0.49992252077550065, "grad_norm": 0.3778817653656006, "learning_rate": 1.5731879569711193e-05, "loss": 0.0763, "step": 28229 }, { "epoch": 0.49994023031252904, "grad_norm": 0.40401870012283325, "learning_rate": 1.57310202286355e-05, "loss": 0.053, "step": 28230 }, { "epoch": 0.4999579398495575, "grad_norm": 0.7639538049697876, "learning_rate": 1.5730160885154832e-05, "loss": 0.0895, "step": 28231 }, { "epoch": 0.4999756493865859, "grad_norm": 0.7241803407669067, "learning_rate": 1.5729301539272004e-05, "loss": 0.0949, "step": 28232 }, { "epoch": 0.49999335892361435, "grad_norm": 0.6084790825843811, "learning_rate": 1.572844219098985e-05, "loss": 0.0878, "step": 28233 }, { "epoch": 0.5000110684606428, "grad_norm": 0.7340092062950134, "learning_rate": 1.5727582840311198e-05, "loss": 0.0804, "step": 28234 }, { "epoch": 0.5000287779976712, "grad_norm": 0.9327430129051208, "learning_rate": 1.5726723487238867e-05, "loss": 0.0986, "step": 28235 }, { "epoch": 0.5000464875346996, "grad_norm": 0.7041915655136108, "learning_rate": 1.5725864131775693e-05, "loss": 0.0765, "step": 28236 }, { "epoch": 0.500064197071728, "grad_norm": 0.5022675395011902, "learning_rate": 1.572500477392449e-05, "loss": 0.0552, "step": 28237 }, { "epoch": 0.5000819066087565, "grad_norm": 0.9381957054138184, "learning_rate": 1.5724145413688104e-05, "loss": 0.0732, "step": 28238 }, { "epoch": 0.5000996161457849, "grad_norm": 0.5737648606300354, "learning_rate": 1.5723286051069352e-05, "loss": 0.0863, "step": 28239 }, { "epoch": 0.5001173256828133, "grad_norm": 0.7425101399421692, "learning_rate": 1.572242668607106e-05, "loss": 0.0352, "step": 28240 }, { "epoch": 0.5001350352198418, "grad_norm": 0.4844991862773895, "learning_rate": 1.572156731869606e-05, "loss": 0.0588, "step": 28241 }, { "epoch": 0.5001527447568702, "grad_norm": 0.5981969237327576, "learning_rate": 1.572070794894717e-05, "loss": 0.0539, "step": 28242 }, { "epoch": 0.5001704542938986, "grad_norm": 0.7928001284599304, "learning_rate": 1.571984857682723e-05, "loss": 0.1024, "step": 28243 }, { "epoch": 0.500188163830927, "grad_norm": 0.33377495408058167, "learning_rate": 1.571898920233906e-05, "loss": 0.0389, "step": 28244 }, { "epoch": 0.5002058733679555, "grad_norm": 0.7007021903991699, "learning_rate": 1.571812982548549e-05, "loss": 0.0841, "step": 28245 }, { "epoch": 0.5002235829049839, "grad_norm": 0.7513840794563293, "learning_rate": 1.571727044626934e-05, "loss": 0.0548, "step": 28246 }, { "epoch": 0.5002412924420123, "grad_norm": 0.6896507740020752, "learning_rate": 1.5716411064693447e-05, "loss": 0.0742, "step": 28247 }, { "epoch": 0.5002590019790407, "grad_norm": 0.6083849668502808, "learning_rate": 1.571555168076063e-05, "loss": 0.0904, "step": 28248 }, { "epoch": 0.5002767115160692, "grad_norm": 0.34845802187919617, "learning_rate": 1.5714692294473727e-05, "loss": 0.0713, "step": 28249 }, { "epoch": 0.5002944210530976, "grad_norm": 0.6868922114372253, "learning_rate": 1.5713832905835553e-05, "loss": 0.0721, "step": 28250 }, { "epoch": 0.500312130590126, "grad_norm": 0.6978855729103088, "learning_rate": 1.5712973514848943e-05, "loss": 0.0737, "step": 28251 }, { "epoch": 0.5003298401271544, "grad_norm": 0.9517205953598022, "learning_rate": 1.5712114121516722e-05, "loss": 0.0544, "step": 28252 }, { "epoch": 0.5003475496641829, "grad_norm": 1.3108962774276733, "learning_rate": 1.5711254725841713e-05, "loss": 0.0562, "step": 28253 }, { "epoch": 0.5003652592012113, "grad_norm": 0.5723857879638672, "learning_rate": 1.5710395327826756e-05, "loss": 0.0946, "step": 28254 }, { "epoch": 0.5003829687382397, "grad_norm": 0.731994092464447, "learning_rate": 1.570953592747467e-05, "loss": 0.0878, "step": 28255 }, { "epoch": 0.5004006782752682, "grad_norm": 0.8121766448020935, "learning_rate": 1.5708676524788273e-05, "loss": 0.0561, "step": 28256 }, { "epoch": 0.5004183878122966, "grad_norm": 0.6436330676078796, "learning_rate": 1.570781711977041e-05, "loss": 0.0879, "step": 28257 }, { "epoch": 0.500436097349325, "grad_norm": 0.3887624442577362, "learning_rate": 1.5706957712423897e-05, "loss": 0.0741, "step": 28258 }, { "epoch": 0.5004538068863534, "grad_norm": 0.6730890870094299, "learning_rate": 1.5706098302751568e-05, "loss": 0.0762, "step": 28259 }, { "epoch": 0.500471516423382, "grad_norm": 0.8232815861701965, "learning_rate": 1.5705238890756247e-05, "loss": 0.0821, "step": 28260 }, { "epoch": 0.5004892259604103, "grad_norm": 0.9619551301002502, "learning_rate": 1.5704379476440757e-05, "loss": 0.1177, "step": 28261 }, { "epoch": 0.5005069354974387, "grad_norm": 0.6539572477340698, "learning_rate": 1.570352005980793e-05, "loss": 0.0676, "step": 28262 }, { "epoch": 0.5005246450344671, "grad_norm": 0.5630492568016052, "learning_rate": 1.5702660640860596e-05, "loss": 0.0504, "step": 28263 }, { "epoch": 0.5005423545714957, "grad_norm": 0.3330731689929962, "learning_rate": 1.5701801219601578e-05, "loss": 0.0473, "step": 28264 }, { "epoch": 0.500560064108524, "grad_norm": 0.7160674333572388, "learning_rate": 1.570094179603371e-05, "loss": 0.0737, "step": 28265 }, { "epoch": 0.5005777736455524, "grad_norm": 0.7541053295135498, "learning_rate": 1.5700082370159808e-05, "loss": 0.0563, "step": 28266 }, { "epoch": 0.5005954831825808, "grad_norm": 0.5921832919120789, "learning_rate": 1.569922294198271e-05, "loss": 0.0631, "step": 28267 }, { "epoch": 0.5006131927196094, "grad_norm": 0.686290979385376, "learning_rate": 1.5698363511505237e-05, "loss": 0.0722, "step": 28268 }, { "epoch": 0.5006309022566378, "grad_norm": 0.8394278883934021, "learning_rate": 1.5697504078730216e-05, "loss": 0.0887, "step": 28269 }, { "epoch": 0.5006486117936662, "grad_norm": 0.8995399475097656, "learning_rate": 1.569664464366048e-05, "loss": 0.0876, "step": 28270 }, { "epoch": 0.5006663213306947, "grad_norm": 0.6475520730018616, "learning_rate": 1.569578520629885e-05, "loss": 0.094, "step": 28271 }, { "epoch": 0.5006840308677231, "grad_norm": 0.8180175423622131, "learning_rate": 1.5694925766648165e-05, "loss": 0.0689, "step": 28272 }, { "epoch": 0.5007017404047515, "grad_norm": 0.6759049296379089, "learning_rate": 1.569406632471124e-05, "loss": 0.0808, "step": 28273 }, { "epoch": 0.5007194499417799, "grad_norm": 0.7401844263076782, "learning_rate": 1.569320688049091e-05, "loss": 0.0767, "step": 28274 }, { "epoch": 0.5007371594788084, "grad_norm": 0.4415625333786011, "learning_rate": 1.5692347433989996e-05, "loss": 0.086, "step": 28275 }, { "epoch": 0.5007548690158368, "grad_norm": 0.4303138852119446, "learning_rate": 1.5691487985211327e-05, "loss": 0.0589, "step": 28276 }, { "epoch": 0.5007725785528652, "grad_norm": 0.8771042227745056, "learning_rate": 1.5690628534157735e-05, "loss": 0.1067, "step": 28277 }, { "epoch": 0.5007902880898936, "grad_norm": 0.5432571172714233, "learning_rate": 1.5689769080832044e-05, "loss": 0.0487, "step": 28278 }, { "epoch": 0.5008079976269221, "grad_norm": 0.8189416527748108, "learning_rate": 1.5688909625237088e-05, "loss": 0.0941, "step": 28279 }, { "epoch": 0.5008257071639505, "grad_norm": 0.7221609950065613, "learning_rate": 1.5688050167375682e-05, "loss": 0.0787, "step": 28280 }, { "epoch": 0.5008434167009789, "grad_norm": 0.8997278213500977, "learning_rate": 1.568719070725066e-05, "loss": 0.0616, "step": 28281 }, { "epoch": 0.5008611262380073, "grad_norm": 0.6103308200836182, "learning_rate": 1.568633124486486e-05, "loss": 0.0729, "step": 28282 }, { "epoch": 0.5008788357750358, "grad_norm": 0.7613716721534729, "learning_rate": 1.5685471780221087e-05, "loss": 0.0757, "step": 28283 }, { "epoch": 0.5008965453120642, "grad_norm": 1.0511471033096313, "learning_rate": 1.568461231332219e-05, "loss": 0.0859, "step": 28284 }, { "epoch": 0.5009142548490926, "grad_norm": 0.5864144563674927, "learning_rate": 1.568375284417098e-05, "loss": 0.0721, "step": 28285 }, { "epoch": 0.5009319643861211, "grad_norm": 0.5751778483390808, "learning_rate": 1.56828933727703e-05, "loss": 0.0675, "step": 28286 }, { "epoch": 0.5009496739231495, "grad_norm": 0.5815088152885437, "learning_rate": 1.5682033899122967e-05, "loss": 0.0899, "step": 28287 }, { "epoch": 0.5009673834601779, "grad_norm": 0.9645400643348694, "learning_rate": 1.5681174423231813e-05, "loss": 0.0804, "step": 28288 }, { "epoch": 0.5009850929972063, "grad_norm": 0.8504035472869873, "learning_rate": 1.5680314945099662e-05, "loss": 0.0891, "step": 28289 }, { "epoch": 0.5010028025342348, "grad_norm": 0.9510743618011475, "learning_rate": 1.567945546472934e-05, "loss": 0.0913, "step": 28290 }, { "epoch": 0.5010205120712632, "grad_norm": 0.8876581788063049, "learning_rate": 1.5678595982123683e-05, "loss": 0.0708, "step": 28291 }, { "epoch": 0.5010382216082916, "grad_norm": 0.5264475345611572, "learning_rate": 1.5677736497285514e-05, "loss": 0.0661, "step": 28292 }, { "epoch": 0.50105593114532, "grad_norm": 0.4035866856575012, "learning_rate": 1.5676877010217663e-05, "loss": 0.0538, "step": 28293 }, { "epoch": 0.5010736406823485, "grad_norm": 0.7745576500892639, "learning_rate": 1.5676017520922952e-05, "loss": 0.0794, "step": 28294 }, { "epoch": 0.5010913502193769, "grad_norm": 0.8684746623039246, "learning_rate": 1.5675158029404207e-05, "loss": 0.098, "step": 28295 }, { "epoch": 0.5011090597564053, "grad_norm": 0.7469390034675598, "learning_rate": 1.5674298535664264e-05, "loss": 0.0886, "step": 28296 }, { "epoch": 0.5011267692934337, "grad_norm": 0.6682854294776917, "learning_rate": 1.567343903970595e-05, "loss": 0.1088, "step": 28297 }, { "epoch": 0.5011444788304622, "grad_norm": 0.4066092073917389, "learning_rate": 1.567257954153209e-05, "loss": 0.0648, "step": 28298 }, { "epoch": 0.5011621883674906, "grad_norm": 1.1032509803771973, "learning_rate": 1.5671720041145505e-05, "loss": 0.1056, "step": 28299 }, { "epoch": 0.501179897904519, "grad_norm": 0.4731194078922272, "learning_rate": 1.5670860538549037e-05, "loss": 0.0453, "step": 28300 }, { "epoch": 0.5011976074415475, "grad_norm": 0.6733424067497253, "learning_rate": 1.5670001033745498e-05, "loss": 0.0882, "step": 28301 }, { "epoch": 0.5012153169785759, "grad_norm": 0.4932540953159332, "learning_rate": 1.566914152673773e-05, "loss": 0.0754, "step": 28302 }, { "epoch": 0.5012330265156043, "grad_norm": 0.5088598132133484, "learning_rate": 1.5668282017528552e-05, "loss": 0.0666, "step": 28303 }, { "epoch": 0.5012507360526327, "grad_norm": 0.5976930260658264, "learning_rate": 1.566742250612079e-05, "loss": 0.05, "step": 28304 }, { "epoch": 0.5012684455896612, "grad_norm": 0.8813059329986572, "learning_rate": 1.566656299251728e-05, "loss": 0.0648, "step": 28305 }, { "epoch": 0.5012861551266896, "grad_norm": 0.6961474418640137, "learning_rate": 1.5665703476720842e-05, "loss": 0.0659, "step": 28306 }, { "epoch": 0.501303864663718, "grad_norm": 0.8996744155883789, "learning_rate": 1.566484395873431e-05, "loss": 0.1033, "step": 28307 }, { "epoch": 0.5013215742007464, "grad_norm": 0.5280689001083374, "learning_rate": 1.5663984438560508e-05, "loss": 0.0703, "step": 28308 }, { "epoch": 0.5013392837377749, "grad_norm": 0.8440096974372864, "learning_rate": 1.566312491620226e-05, "loss": 0.072, "step": 28309 }, { "epoch": 0.5013569932748033, "grad_norm": 0.6509681940078735, "learning_rate": 1.56622653916624e-05, "loss": 0.1075, "step": 28310 }, { "epoch": 0.5013747028118317, "grad_norm": 0.8644037842750549, "learning_rate": 1.5661405864943756e-05, "loss": 0.0754, "step": 28311 }, { "epoch": 0.5013924123488601, "grad_norm": 0.5544492602348328, "learning_rate": 1.566054633604915e-05, "loss": 0.0657, "step": 28312 }, { "epoch": 0.5014101218858886, "grad_norm": 0.9555819034576416, "learning_rate": 1.565968680498142e-05, "loss": 0.0806, "step": 28313 }, { "epoch": 0.501427831422917, "grad_norm": 0.5067635178565979, "learning_rate": 1.5658827271743383e-05, "loss": 0.0728, "step": 28314 }, { "epoch": 0.5014455409599454, "grad_norm": 0.359817773103714, "learning_rate": 1.565796773633787e-05, "loss": 0.0588, "step": 28315 }, { "epoch": 0.5014632504969739, "grad_norm": 0.7574318051338196, "learning_rate": 1.5657108198767714e-05, "loss": 0.0519, "step": 28316 }, { "epoch": 0.5014809600340023, "grad_norm": 0.5124636888504028, "learning_rate": 1.5656248659035732e-05, "loss": 0.0726, "step": 28317 }, { "epoch": 0.5014986695710307, "grad_norm": 0.6795473098754883, "learning_rate": 1.565538911714476e-05, "loss": 0.0551, "step": 28318 }, { "epoch": 0.5015163791080591, "grad_norm": 0.6747366786003113, "learning_rate": 1.5654529573097627e-05, "loss": 0.0942, "step": 28319 }, { "epoch": 0.5015340886450876, "grad_norm": 0.592918872833252, "learning_rate": 1.5653670026897155e-05, "loss": 0.0726, "step": 28320 }, { "epoch": 0.501551798182116, "grad_norm": 0.8019766807556152, "learning_rate": 1.5652810478546182e-05, "loss": 0.0784, "step": 28321 }, { "epoch": 0.5015695077191444, "grad_norm": 0.5754134654998779, "learning_rate": 1.565195092804752e-05, "loss": 0.0681, "step": 28322 }, { "epoch": 0.5015872172561728, "grad_norm": 0.7602091431617737, "learning_rate": 1.5651091375404008e-05, "loss": 0.0685, "step": 28323 }, { "epoch": 0.5016049267932013, "grad_norm": 0.5409515500068665, "learning_rate": 1.565023182061847e-05, "loss": 0.088, "step": 28324 }, { "epoch": 0.5016226363302297, "grad_norm": 0.546958863735199, "learning_rate": 1.564937226369374e-05, "loss": 0.0747, "step": 28325 }, { "epoch": 0.5016403458672581, "grad_norm": 0.7269978523254395, "learning_rate": 1.5648512704632635e-05, "loss": 0.0826, "step": 28326 }, { "epoch": 0.5016580554042865, "grad_norm": 0.7143573760986328, "learning_rate": 1.5647653143437993e-05, "loss": 0.0751, "step": 28327 }, { "epoch": 0.501675764941315, "grad_norm": 0.5643624663352966, "learning_rate": 1.5646793580112633e-05, "loss": 0.0577, "step": 28328 }, { "epoch": 0.5016934744783434, "grad_norm": 0.73322993516922, "learning_rate": 1.564593401465939e-05, "loss": 0.0768, "step": 28329 }, { "epoch": 0.5017111840153718, "grad_norm": 0.5983761548995972, "learning_rate": 1.5645074447081092e-05, "loss": 0.0605, "step": 28330 }, { "epoch": 0.5017288935524004, "grad_norm": 0.6973755359649658, "learning_rate": 1.5644214877380562e-05, "loss": 0.0754, "step": 28331 }, { "epoch": 0.5017466030894288, "grad_norm": 0.6038734316825867, "learning_rate": 1.564335530556063e-05, "loss": 0.0867, "step": 28332 }, { "epoch": 0.5017643126264572, "grad_norm": 0.33585289120674133, "learning_rate": 1.5642495731624123e-05, "loss": 0.0744, "step": 28333 }, { "epoch": 0.5017820221634856, "grad_norm": 0.5959736108779907, "learning_rate": 1.5641636155573875e-05, "loss": 0.0579, "step": 28334 }, { "epoch": 0.5017997317005141, "grad_norm": 0.47724053263664246, "learning_rate": 1.5640776577412706e-05, "loss": 0.1255, "step": 28335 }, { "epoch": 0.5018174412375425, "grad_norm": 0.6465904116630554, "learning_rate": 1.5639916997143445e-05, "loss": 0.0806, "step": 28336 }, { "epoch": 0.5018351507745709, "grad_norm": 0.7111974954605103, "learning_rate": 1.5639057414768924e-05, "loss": 0.0554, "step": 28337 }, { "epoch": 0.5018528603115993, "grad_norm": 0.6574508547782898, "learning_rate": 1.5638197830291965e-05, "loss": 0.0849, "step": 28338 }, { "epoch": 0.5018705698486278, "grad_norm": 0.5085976719856262, "learning_rate": 1.5637338243715405e-05, "loss": 0.0955, "step": 28339 }, { "epoch": 0.5018882793856562, "grad_norm": 0.6131251454353333, "learning_rate": 1.5636478655042066e-05, "loss": 0.0535, "step": 28340 }, { "epoch": 0.5019059889226846, "grad_norm": 0.631205141544342, "learning_rate": 1.5635619064274776e-05, "loss": 0.0992, "step": 28341 }, { "epoch": 0.501923698459713, "grad_norm": 0.8559355735778809, "learning_rate": 1.563475947141636e-05, "loss": 0.0968, "step": 28342 }, { "epoch": 0.5019414079967415, "grad_norm": 0.735341489315033, "learning_rate": 1.563389987646965e-05, "loss": 0.055, "step": 28343 }, { "epoch": 0.5019591175337699, "grad_norm": 1.1267465353012085, "learning_rate": 1.5633040279437477e-05, "loss": 0.0679, "step": 28344 }, { "epoch": 0.5019768270707983, "grad_norm": 0.6896029710769653, "learning_rate": 1.5632180680322665e-05, "loss": 0.0856, "step": 28345 }, { "epoch": 0.5019945366078268, "grad_norm": 0.8360159397125244, "learning_rate": 1.5631321079128044e-05, "loss": 0.079, "step": 28346 }, { "epoch": 0.5020122461448552, "grad_norm": 0.6365011930465698, "learning_rate": 1.5630461475856434e-05, "loss": 0.0769, "step": 28347 }, { "epoch": 0.5020299556818836, "grad_norm": 0.9418882131576538, "learning_rate": 1.5629601870510674e-05, "loss": 0.0833, "step": 28348 }, { "epoch": 0.502047665218912, "grad_norm": 0.547795295715332, "learning_rate": 1.5628742263093587e-05, "loss": 0.0754, "step": 28349 }, { "epoch": 0.5020653747559405, "grad_norm": 0.6017022728919983, "learning_rate": 1.5627882653608004e-05, "loss": 0.0759, "step": 28350 }, { "epoch": 0.5020830842929689, "grad_norm": 0.8487054109573364, "learning_rate": 1.562702304205675e-05, "loss": 0.0714, "step": 28351 }, { "epoch": 0.5021007938299973, "grad_norm": 0.92047518491745, "learning_rate": 1.5626163428442646e-05, "loss": 0.0604, "step": 28352 }, { "epoch": 0.5021185033670257, "grad_norm": 0.4524243175983429, "learning_rate": 1.562530381276854e-05, "loss": 0.0681, "step": 28353 }, { "epoch": 0.5021362129040542, "grad_norm": 0.7131531834602356, "learning_rate": 1.5624444195037237e-05, "loss": 0.0672, "step": 28354 }, { "epoch": 0.5021539224410826, "grad_norm": 0.4347284734249115, "learning_rate": 1.562358457525158e-05, "loss": 0.0815, "step": 28355 }, { "epoch": 0.502171631978111, "grad_norm": 0.7818007469177246, "learning_rate": 1.5622724953414394e-05, "loss": 0.0672, "step": 28356 }, { "epoch": 0.5021893415151394, "grad_norm": 0.7258461713790894, "learning_rate": 1.5621865329528503e-05, "loss": 0.0507, "step": 28357 }, { "epoch": 0.5022070510521679, "grad_norm": 0.8669838309288025, "learning_rate": 1.5621005703596736e-05, "loss": 0.0904, "step": 28358 }, { "epoch": 0.5022247605891963, "grad_norm": 0.480974406003952, "learning_rate": 1.562014607562193e-05, "loss": 0.0597, "step": 28359 }, { "epoch": 0.5022424701262247, "grad_norm": 0.799418032169342, "learning_rate": 1.5619286445606903e-05, "loss": 0.0754, "step": 28360 }, { "epoch": 0.5022601796632532, "grad_norm": 1.0445066690444946, "learning_rate": 1.561842681355449e-05, "loss": 0.0851, "step": 28361 }, { "epoch": 0.5022778892002816, "grad_norm": 0.914165735244751, "learning_rate": 1.561756717946751e-05, "loss": 0.0976, "step": 28362 }, { "epoch": 0.50229559873731, "grad_norm": 0.5702779293060303, "learning_rate": 1.5616707543348794e-05, "loss": 0.06, "step": 28363 }, { "epoch": 0.5023133082743384, "grad_norm": 0.7482748031616211, "learning_rate": 1.561584790520118e-05, "loss": 0.065, "step": 28364 }, { "epoch": 0.5023310178113669, "grad_norm": 0.555116593837738, "learning_rate": 1.561498826502748e-05, "loss": 0.0346, "step": 28365 }, { "epoch": 0.5023487273483953, "grad_norm": 0.7982653379440308, "learning_rate": 1.561412862283054e-05, "loss": 0.0934, "step": 28366 }, { "epoch": 0.5023664368854237, "grad_norm": 0.5029848217964172, "learning_rate": 1.561326897861317e-05, "loss": 0.0914, "step": 28367 }, { "epoch": 0.5023841464224521, "grad_norm": 0.761133074760437, "learning_rate": 1.5612409332378217e-05, "loss": 0.0705, "step": 28368 }, { "epoch": 0.5024018559594806, "grad_norm": 0.8333499431610107, "learning_rate": 1.5611549684128493e-05, "loss": 0.0692, "step": 28369 }, { "epoch": 0.502419565496509, "grad_norm": 0.5494679808616638, "learning_rate": 1.5610690033866836e-05, "loss": 0.064, "step": 28370 }, { "epoch": 0.5024372750335374, "grad_norm": 0.4145379960536957, "learning_rate": 1.5609830381596064e-05, "loss": 0.0513, "step": 28371 }, { "epoch": 0.5024549845705658, "grad_norm": 0.7376580238342285, "learning_rate": 1.5608970727319013e-05, "loss": 0.0712, "step": 28372 }, { "epoch": 0.5024726941075943, "grad_norm": 0.4635641574859619, "learning_rate": 1.5608111071038516e-05, "loss": 0.0625, "step": 28373 }, { "epoch": 0.5024904036446227, "grad_norm": 0.3900671899318695, "learning_rate": 1.560725141275739e-05, "loss": 0.0514, "step": 28374 }, { "epoch": 0.5025081131816511, "grad_norm": 0.48151105642318726, "learning_rate": 1.5606391752478475e-05, "loss": 0.0951, "step": 28375 }, { "epoch": 0.5025258227186796, "grad_norm": 0.6821981072425842, "learning_rate": 1.5605532090204586e-05, "loss": 0.0755, "step": 28376 }, { "epoch": 0.502543532255708, "grad_norm": 0.8227754831314087, "learning_rate": 1.5604672425938557e-05, "loss": 0.094, "step": 28377 }, { "epoch": 0.5025612417927364, "grad_norm": 0.8166113495826721, "learning_rate": 1.5603812759683224e-05, "loss": 0.0808, "step": 28378 }, { "epoch": 0.5025789513297648, "grad_norm": 0.7907069325447083, "learning_rate": 1.56029530914414e-05, "loss": 0.0702, "step": 28379 }, { "epoch": 0.5025966608667933, "grad_norm": 0.6307104825973511, "learning_rate": 1.5602093421215924e-05, "loss": 0.0805, "step": 28380 }, { "epoch": 0.5026143704038217, "grad_norm": 0.7721337080001831, "learning_rate": 1.5601233749009618e-05, "loss": 0.0727, "step": 28381 }, { "epoch": 0.5026320799408501, "grad_norm": 0.7864593863487244, "learning_rate": 1.5600374074825325e-05, "loss": 0.0557, "step": 28382 }, { "epoch": 0.5026497894778785, "grad_norm": 0.755545973777771, "learning_rate": 1.5599514398665852e-05, "loss": 0.0633, "step": 28383 }, { "epoch": 0.502667499014907, "grad_norm": 0.6584537029266357, "learning_rate": 1.5598654720534043e-05, "loss": 0.1008, "step": 28384 }, { "epoch": 0.5026852085519354, "grad_norm": 1.232098937034607, "learning_rate": 1.5597795040432712e-05, "loss": 0.0814, "step": 28385 }, { "epoch": 0.5027029180889638, "grad_norm": 0.7214758396148682, "learning_rate": 1.5596935358364704e-05, "loss": 0.0853, "step": 28386 }, { "epoch": 0.5027206276259922, "grad_norm": 0.9037285447120667, "learning_rate": 1.559607567433284e-05, "loss": 0.0882, "step": 28387 }, { "epoch": 0.5027383371630207, "grad_norm": 0.7253235578536987, "learning_rate": 1.5595215988339937e-05, "loss": 0.0849, "step": 28388 }, { "epoch": 0.5027560467000491, "grad_norm": 0.7879735231399536, "learning_rate": 1.5594356300388844e-05, "loss": 0.0683, "step": 28389 }, { "epoch": 0.5027737562370775, "grad_norm": 0.7123993039131165, "learning_rate": 1.5593496610482376e-05, "loss": 0.086, "step": 28390 }, { "epoch": 0.502791465774106, "grad_norm": 0.358735591173172, "learning_rate": 1.559263691862336e-05, "loss": 0.0444, "step": 28391 }, { "epoch": 0.5028091753111344, "grad_norm": 1.3300763368606567, "learning_rate": 1.5591777224814636e-05, "loss": 0.1138, "step": 28392 }, { "epoch": 0.5028268848481628, "grad_norm": 0.8075767755508423, "learning_rate": 1.559091752905902e-05, "loss": 0.058, "step": 28393 }, { "epoch": 0.5028445943851912, "grad_norm": 0.36553800106048584, "learning_rate": 1.5590057831359345e-05, "loss": 0.0348, "step": 28394 }, { "epoch": 0.5028623039222198, "grad_norm": 0.7791438698768616, "learning_rate": 1.558919813171844e-05, "loss": 0.0582, "step": 28395 }, { "epoch": 0.5028800134592482, "grad_norm": 0.8237286806106567, "learning_rate": 1.558833843013913e-05, "loss": 0.0695, "step": 28396 }, { "epoch": 0.5028977229962766, "grad_norm": 0.5599036812782288, "learning_rate": 1.5587478726624252e-05, "loss": 0.0624, "step": 28397 }, { "epoch": 0.502915432533305, "grad_norm": 0.651620090007782, "learning_rate": 1.5586619021176626e-05, "loss": 0.0643, "step": 28398 }, { "epoch": 0.5029331420703335, "grad_norm": 0.561729907989502, "learning_rate": 1.558575931379908e-05, "loss": 0.0635, "step": 28399 }, { "epoch": 0.5029508516073619, "grad_norm": 0.9003394842147827, "learning_rate": 1.5584899604494446e-05, "loss": 0.058, "step": 28400 }, { "epoch": 0.5029685611443903, "grad_norm": 0.8738299608230591, "learning_rate": 1.5584039893265556e-05, "loss": 0.0639, "step": 28401 }, { "epoch": 0.5029862706814187, "grad_norm": 0.3998192548751831, "learning_rate": 1.558318018011523e-05, "loss": 0.0364, "step": 28402 }, { "epoch": 0.5030039802184472, "grad_norm": 0.9631298780441284, "learning_rate": 1.5582320465046302e-05, "loss": 0.0685, "step": 28403 }, { "epoch": 0.5030216897554756, "grad_norm": 0.6287341713905334, "learning_rate": 1.5581460748061596e-05, "loss": 0.06, "step": 28404 }, { "epoch": 0.503039399292504, "grad_norm": 0.6553195714950562, "learning_rate": 1.5580601029163945e-05, "loss": 0.0561, "step": 28405 }, { "epoch": 0.5030571088295325, "grad_norm": 0.7447848320007324, "learning_rate": 1.557974130835617e-05, "loss": 0.082, "step": 28406 }, { "epoch": 0.5030748183665609, "grad_norm": 0.894932210445404, "learning_rate": 1.5578881585641116e-05, "loss": 0.1047, "step": 28407 }, { "epoch": 0.5030925279035893, "grad_norm": 0.7009222507476807, "learning_rate": 1.5578021861021594e-05, "loss": 0.0883, "step": 28408 }, { "epoch": 0.5031102374406177, "grad_norm": 0.48740705847740173, "learning_rate": 1.5577162134500433e-05, "loss": 0.0676, "step": 28409 }, { "epoch": 0.5031279469776462, "grad_norm": 0.7143219709396362, "learning_rate": 1.5576302406080474e-05, "loss": 0.0852, "step": 28410 }, { "epoch": 0.5031456565146746, "grad_norm": 0.3121899366378784, "learning_rate": 1.5575442675764534e-05, "loss": 0.0684, "step": 28411 }, { "epoch": 0.503163366051703, "grad_norm": 0.2968066334724426, "learning_rate": 1.557458294355545e-05, "loss": 0.0738, "step": 28412 }, { "epoch": 0.5031810755887314, "grad_norm": 0.5061315894126892, "learning_rate": 1.5573723209456045e-05, "loss": 0.109, "step": 28413 }, { "epoch": 0.5031987851257599, "grad_norm": 0.5500639081001282, "learning_rate": 1.5572863473469147e-05, "loss": 0.0443, "step": 28414 }, { "epoch": 0.5032164946627883, "grad_norm": 0.6598462462425232, "learning_rate": 1.5572003735597585e-05, "loss": 0.0788, "step": 28415 }, { "epoch": 0.5032342041998167, "grad_norm": 0.5089427828788757, "learning_rate": 1.5571143995844193e-05, "loss": 0.0496, "step": 28416 }, { "epoch": 0.5032519137368451, "grad_norm": 0.5199460983276367, "learning_rate": 1.5570284254211798e-05, "loss": 0.0616, "step": 28417 }, { "epoch": 0.5032696232738736, "grad_norm": 1.1753997802734375, "learning_rate": 1.5569424510703217e-05, "loss": 0.1107, "step": 28418 }, { "epoch": 0.503287332810902, "grad_norm": 0.6019824147224426, "learning_rate": 1.556856476532129e-05, "loss": 0.075, "step": 28419 }, { "epoch": 0.5033050423479304, "grad_norm": 0.701078474521637, "learning_rate": 1.556770501806884e-05, "loss": 0.0637, "step": 28420 }, { "epoch": 0.5033227518849589, "grad_norm": 0.44585657119750977, "learning_rate": 1.5566845268948703e-05, "loss": 0.0613, "step": 28421 }, { "epoch": 0.5033404614219873, "grad_norm": 0.8940517902374268, "learning_rate": 1.55659855179637e-05, "loss": 0.0972, "step": 28422 }, { "epoch": 0.5033581709590157, "grad_norm": 0.7659351825714111, "learning_rate": 1.556512576511666e-05, "loss": 0.054, "step": 28423 }, { "epoch": 0.5033758804960441, "grad_norm": 0.7176397442817688, "learning_rate": 1.5564266010410417e-05, "loss": 0.0624, "step": 28424 }, { "epoch": 0.5033935900330726, "grad_norm": 0.792209267616272, "learning_rate": 1.556340625384779e-05, "loss": 0.0649, "step": 28425 }, { "epoch": 0.503411299570101, "grad_norm": 0.7085474729537964, "learning_rate": 1.556254649543162e-05, "loss": 0.09, "step": 28426 }, { "epoch": 0.5034290091071294, "grad_norm": 0.7574893832206726, "learning_rate": 1.5561686735164725e-05, "loss": 0.0764, "step": 28427 }, { "epoch": 0.5034467186441578, "grad_norm": 0.6785889267921448, "learning_rate": 1.5560826973049938e-05, "loss": 0.0825, "step": 28428 }, { "epoch": 0.5034644281811863, "grad_norm": 0.9489256143569946, "learning_rate": 1.555996720909009e-05, "loss": 0.1049, "step": 28429 }, { "epoch": 0.5034821377182147, "grad_norm": 0.8203003406524658, "learning_rate": 1.5559107443288005e-05, "loss": 0.0733, "step": 28430 }, { "epoch": 0.5034998472552431, "grad_norm": 0.5487570762634277, "learning_rate": 1.555824767564651e-05, "loss": 0.0486, "step": 28431 }, { "epoch": 0.5035175567922715, "grad_norm": 0.7945175766944885, "learning_rate": 1.5557387906168443e-05, "loss": 0.0678, "step": 28432 }, { "epoch": 0.5035352663293, "grad_norm": 0.5295387506484985, "learning_rate": 1.555652813485662e-05, "loss": 0.0503, "step": 28433 }, { "epoch": 0.5035529758663284, "grad_norm": 0.6329087018966675, "learning_rate": 1.5555668361713877e-05, "loss": 0.0785, "step": 28434 }, { "epoch": 0.5035706854033568, "grad_norm": 0.7896602749824524, "learning_rate": 1.5554808586743044e-05, "loss": 0.1079, "step": 28435 }, { "epoch": 0.5035883949403853, "grad_norm": 0.627495288848877, "learning_rate": 1.5553948809946943e-05, "loss": 0.0947, "step": 28436 }, { "epoch": 0.5036061044774137, "grad_norm": 0.6749498844146729, "learning_rate": 1.5553089031328413e-05, "loss": 0.0842, "step": 28437 }, { "epoch": 0.5036238140144421, "grad_norm": 0.4318702816963196, "learning_rate": 1.5552229250890264e-05, "loss": 0.0642, "step": 28438 }, { "epoch": 0.5036415235514705, "grad_norm": 0.6305559277534485, "learning_rate": 1.555136946863535e-05, "loss": 0.0668, "step": 28439 }, { "epoch": 0.503659233088499, "grad_norm": 0.6543883681297302, "learning_rate": 1.5550509684566483e-05, "loss": 0.0739, "step": 28440 }, { "epoch": 0.5036769426255274, "grad_norm": 0.4167223274707794, "learning_rate": 1.554964989868649e-05, "loss": 0.0507, "step": 28441 }, { "epoch": 0.5036946521625558, "grad_norm": 0.6318796873092651, "learning_rate": 1.5548790110998205e-05, "loss": 0.0471, "step": 28442 }, { "epoch": 0.5037123616995842, "grad_norm": 0.5165998935699463, "learning_rate": 1.5547930321504456e-05, "loss": 0.0608, "step": 28443 }, { "epoch": 0.5037300712366127, "grad_norm": 0.7248167395591736, "learning_rate": 1.5547070530208073e-05, "loss": 0.0728, "step": 28444 }, { "epoch": 0.5037477807736411, "grad_norm": 0.7357708215713501, "learning_rate": 1.5546210737111884e-05, "loss": 0.0613, "step": 28445 }, { "epoch": 0.5037654903106695, "grad_norm": 0.6194685101509094, "learning_rate": 1.554535094221872e-05, "loss": 0.0673, "step": 28446 }, { "epoch": 0.5037831998476979, "grad_norm": 0.4940167963504791, "learning_rate": 1.5544491145531403e-05, "loss": 0.0958, "step": 28447 }, { "epoch": 0.5038009093847264, "grad_norm": 0.6751278042793274, "learning_rate": 1.5543631347052763e-05, "loss": 0.0697, "step": 28448 }, { "epoch": 0.5038186189217548, "grad_norm": 0.7228839993476868, "learning_rate": 1.5542771546785636e-05, "loss": 0.1002, "step": 28449 }, { "epoch": 0.5038363284587832, "grad_norm": 0.6609681844711304, "learning_rate": 1.554191174473284e-05, "loss": 0.0646, "step": 28450 }, { "epoch": 0.5038540379958117, "grad_norm": 0.7001616358757019, "learning_rate": 1.5541051940897214e-05, "loss": 0.1073, "step": 28451 }, { "epoch": 0.5038717475328401, "grad_norm": 0.48813942074775696, "learning_rate": 1.554019213528158e-05, "loss": 0.0675, "step": 28452 }, { "epoch": 0.5038894570698685, "grad_norm": 0.7194779515266418, "learning_rate": 1.5539332327888764e-05, "loss": 0.0655, "step": 28453 }, { "epoch": 0.5039071666068969, "grad_norm": 0.7987613677978516, "learning_rate": 1.5538472518721605e-05, "loss": 0.0759, "step": 28454 }, { "epoch": 0.5039248761439254, "grad_norm": 0.8999785780906677, "learning_rate": 1.5537612707782924e-05, "loss": 0.089, "step": 28455 }, { "epoch": 0.5039425856809538, "grad_norm": 0.6276481747627258, "learning_rate": 1.5536752895075557e-05, "loss": 0.0753, "step": 28456 }, { "epoch": 0.5039602952179822, "grad_norm": 0.3958780765533447, "learning_rate": 1.5535893080602316e-05, "loss": 0.0987, "step": 28457 }, { "epoch": 0.5039780047550106, "grad_norm": 0.35971423983573914, "learning_rate": 1.553503326436605e-05, "loss": 0.0669, "step": 28458 }, { "epoch": 0.5039957142920392, "grad_norm": 0.5914100408554077, "learning_rate": 1.5534173446369576e-05, "loss": 0.0669, "step": 28459 }, { "epoch": 0.5040134238290676, "grad_norm": 0.8586146831512451, "learning_rate": 1.5533313626615724e-05, "loss": 0.0904, "step": 28460 }, { "epoch": 0.504031133366096, "grad_norm": 1.2909374237060547, "learning_rate": 1.5532453805107322e-05, "loss": 0.0804, "step": 28461 }, { "epoch": 0.5040488429031244, "grad_norm": 0.5472096800804138, "learning_rate": 1.5531593981847205e-05, "loss": 0.0934, "step": 28462 }, { "epoch": 0.5040665524401529, "grad_norm": 0.6394737362861633, "learning_rate": 1.5530734156838203e-05, "loss": 0.0655, "step": 28463 }, { "epoch": 0.5040842619771813, "grad_norm": 0.30731743574142456, "learning_rate": 1.552987433008313e-05, "loss": 0.0653, "step": 28464 }, { "epoch": 0.5041019715142097, "grad_norm": 0.642690122127533, "learning_rate": 1.552901450158483e-05, "loss": 0.0612, "step": 28465 }, { "epoch": 0.5041196810512382, "grad_norm": 0.5221913456916809, "learning_rate": 1.552815467134612e-05, "loss": 0.069, "step": 28466 }, { "epoch": 0.5041373905882666, "grad_norm": 0.7038373351097107, "learning_rate": 1.5527294839369833e-05, "loss": 0.0657, "step": 28467 }, { "epoch": 0.504155100125295, "grad_norm": 0.8673656582832336, "learning_rate": 1.5526435005658807e-05, "loss": 0.1079, "step": 28468 }, { "epoch": 0.5041728096623234, "grad_norm": 0.5753893256187439, "learning_rate": 1.5525575170215863e-05, "loss": 0.0744, "step": 28469 }, { "epoch": 0.5041905191993519, "grad_norm": 0.5471320152282715, "learning_rate": 1.5524715333043822e-05, "loss": 0.0768, "step": 28470 }, { "epoch": 0.5042082287363803, "grad_norm": 0.5950979590415955, "learning_rate": 1.5523855494145522e-05, "loss": 0.0935, "step": 28471 }, { "epoch": 0.5042259382734087, "grad_norm": 0.9880126714706421, "learning_rate": 1.5522995653523798e-05, "loss": 0.0609, "step": 28472 }, { "epoch": 0.5042436478104371, "grad_norm": 1.2362116575241089, "learning_rate": 1.552213581118147e-05, "loss": 0.0876, "step": 28473 }, { "epoch": 0.5042613573474656, "grad_norm": 0.561313807964325, "learning_rate": 1.5521275967121363e-05, "loss": 0.0621, "step": 28474 }, { "epoch": 0.504279066884494, "grad_norm": 0.9465153217315674, "learning_rate": 1.552041612134631e-05, "loss": 0.0699, "step": 28475 }, { "epoch": 0.5042967764215224, "grad_norm": 0.5976886749267578, "learning_rate": 1.551955627385914e-05, "loss": 0.0634, "step": 28476 }, { "epoch": 0.5043144859585508, "grad_norm": 0.537796139717102, "learning_rate": 1.551869642466269e-05, "loss": 0.0534, "step": 28477 }, { "epoch": 0.5043321954955793, "grad_norm": 0.6925192475318909, "learning_rate": 1.5517836573759777e-05, "loss": 0.1004, "step": 28478 }, { "epoch": 0.5043499050326077, "grad_norm": 0.8769083619117737, "learning_rate": 1.5516976721153238e-05, "loss": 0.0794, "step": 28479 }, { "epoch": 0.5043676145696361, "grad_norm": 0.7747026681900024, "learning_rate": 1.551611686684589e-05, "loss": 0.0837, "step": 28480 }, { "epoch": 0.5043853241066646, "grad_norm": 0.48696860671043396, "learning_rate": 1.5515257010840574e-05, "loss": 0.0942, "step": 28481 }, { "epoch": 0.504403033643693, "grad_norm": 0.9975029230117798, "learning_rate": 1.551439715314011e-05, "loss": 0.0849, "step": 28482 }, { "epoch": 0.5044207431807214, "grad_norm": 0.5554585456848145, "learning_rate": 1.551353729374734e-05, "loss": 0.0902, "step": 28483 }, { "epoch": 0.5044384527177498, "grad_norm": 0.6517606377601624, "learning_rate": 1.551267743266508e-05, "loss": 0.0841, "step": 28484 }, { "epoch": 0.5044561622547783, "grad_norm": 0.4098753035068512, "learning_rate": 1.5511817569896163e-05, "loss": 0.0411, "step": 28485 }, { "epoch": 0.5044738717918067, "grad_norm": 0.7275522351264954, "learning_rate": 1.5510957705443415e-05, "loss": 0.0777, "step": 28486 }, { "epoch": 0.5044915813288351, "grad_norm": 0.45659366250038147, "learning_rate": 1.551009783930967e-05, "loss": 0.059, "step": 28487 }, { "epoch": 0.5045092908658635, "grad_norm": 0.9063073396682739, "learning_rate": 1.550923797149776e-05, "loss": 0.099, "step": 28488 }, { "epoch": 0.504527000402892, "grad_norm": 0.6404056549072266, "learning_rate": 1.55083781020105e-05, "loss": 0.0964, "step": 28489 }, { "epoch": 0.5045447099399204, "grad_norm": 0.6642173528671265, "learning_rate": 1.550751823085073e-05, "loss": 0.0658, "step": 28490 }, { "epoch": 0.5045624194769488, "grad_norm": 0.5836272239685059, "learning_rate": 1.5506658358021278e-05, "loss": 0.0385, "step": 28491 }, { "epoch": 0.5045801290139772, "grad_norm": 0.5736193656921387, "learning_rate": 1.550579848352497e-05, "loss": 0.0621, "step": 28492 }, { "epoch": 0.5045978385510057, "grad_norm": 0.42396029829978943, "learning_rate": 1.5504938607364638e-05, "loss": 0.0829, "step": 28493 }, { "epoch": 0.5046155480880341, "grad_norm": 0.5607056617736816, "learning_rate": 1.550407872954311e-05, "loss": 0.0549, "step": 28494 }, { "epoch": 0.5046332576250625, "grad_norm": 0.5962293744087219, "learning_rate": 1.5503218850063215e-05, "loss": 0.0567, "step": 28495 }, { "epoch": 0.504650967162091, "grad_norm": 0.692680835723877, "learning_rate": 1.5502358968927776e-05, "loss": 0.1053, "step": 28496 }, { "epoch": 0.5046686766991194, "grad_norm": 0.9011791944503784, "learning_rate": 1.5501499086139627e-05, "loss": 0.0785, "step": 28497 }, { "epoch": 0.5046863862361478, "grad_norm": 0.4361553192138672, "learning_rate": 1.5500639201701596e-05, "loss": 0.0928, "step": 28498 }, { "epoch": 0.5047040957731762, "grad_norm": 0.4200815260410309, "learning_rate": 1.549977931561652e-05, "loss": 0.0975, "step": 28499 }, { "epoch": 0.5047218053102047, "grad_norm": 0.6652052998542786, "learning_rate": 1.549891942788721e-05, "loss": 0.07, "step": 28500 }, { "epoch": 0.5047395148472331, "grad_norm": 0.7339312434196472, "learning_rate": 1.5498059538516515e-05, "loss": 0.0957, "step": 28501 }, { "epoch": 0.5047572243842615, "grad_norm": 1.0874085426330566, "learning_rate": 1.5497199647507247e-05, "loss": 0.0532, "step": 28502 }, { "epoch": 0.5047749339212899, "grad_norm": 0.8236292600631714, "learning_rate": 1.549633975486225e-05, "loss": 0.0702, "step": 28503 }, { "epoch": 0.5047926434583184, "grad_norm": 0.8015736937522888, "learning_rate": 1.549547986058434e-05, "loss": 0.0912, "step": 28504 }, { "epoch": 0.5048103529953468, "grad_norm": 0.9341556429862976, "learning_rate": 1.549461996467635e-05, "loss": 0.053, "step": 28505 }, { "epoch": 0.5048280625323752, "grad_norm": 0.8582356572151184, "learning_rate": 1.5493760067141118e-05, "loss": 0.0898, "step": 28506 }, { "epoch": 0.5048457720694037, "grad_norm": 0.8909592032432556, "learning_rate": 1.5492900167981462e-05, "loss": 0.0529, "step": 28507 }, { "epoch": 0.5048634816064321, "grad_norm": 0.6085403561592102, "learning_rate": 1.5492040267200212e-05, "loss": 0.0892, "step": 28508 }, { "epoch": 0.5048811911434605, "grad_norm": 0.46614208817481995, "learning_rate": 1.54911803648002e-05, "loss": 0.0801, "step": 28509 }, { "epoch": 0.5048989006804889, "grad_norm": 0.5704031586647034, "learning_rate": 1.5490320460784254e-05, "loss": 0.0862, "step": 28510 }, { "epoch": 0.5049166102175174, "grad_norm": 1.0496968030929565, "learning_rate": 1.5489460555155208e-05, "loss": 0.0831, "step": 28511 }, { "epoch": 0.5049343197545458, "grad_norm": 0.7788682579994202, "learning_rate": 1.5488600647915877e-05, "loss": 0.0604, "step": 28512 }, { "epoch": 0.5049520292915742, "grad_norm": 0.5753024220466614, "learning_rate": 1.5487740739069108e-05, "loss": 0.0707, "step": 28513 }, { "epoch": 0.5049697388286026, "grad_norm": 0.486507773399353, "learning_rate": 1.5486880828617717e-05, "loss": 0.1094, "step": 28514 }, { "epoch": 0.5049874483656311, "grad_norm": 0.42698559165000916, "learning_rate": 1.548602091656454e-05, "loss": 0.0644, "step": 28515 }, { "epoch": 0.5050051579026595, "grad_norm": 0.7300403714179993, "learning_rate": 1.5485161002912396e-05, "loss": 0.0386, "step": 28516 }, { "epoch": 0.5050228674396879, "grad_norm": 0.9664210677146912, "learning_rate": 1.5484301087664132e-05, "loss": 0.0927, "step": 28517 }, { "epoch": 0.5050405769767163, "grad_norm": 0.4540872275829315, "learning_rate": 1.5483441170822558e-05, "loss": 0.084, "step": 28518 }, { "epoch": 0.5050582865137448, "grad_norm": 0.7430108785629272, "learning_rate": 1.5482581252390513e-05, "loss": 0.0873, "step": 28519 }, { "epoch": 0.5050759960507732, "grad_norm": 0.33694326877593994, "learning_rate": 1.5481721332370828e-05, "loss": 0.0901, "step": 28520 }, { "epoch": 0.5050937055878016, "grad_norm": 0.8879972100257874, "learning_rate": 1.5480861410766323e-05, "loss": 0.0897, "step": 28521 }, { "epoch": 0.5051114151248302, "grad_norm": 0.8045668005943298, "learning_rate": 1.548000148757984e-05, "loss": 0.0733, "step": 28522 }, { "epoch": 0.5051291246618586, "grad_norm": 0.30843308568000793, "learning_rate": 1.5479141562814194e-05, "loss": 0.083, "step": 28523 }, { "epoch": 0.505146834198887, "grad_norm": 0.5207710862159729, "learning_rate": 1.5478281636472224e-05, "loss": 0.0509, "step": 28524 }, { "epoch": 0.5051645437359154, "grad_norm": 0.8479745984077454, "learning_rate": 1.5477421708556755e-05, "loss": 0.09, "step": 28525 }, { "epoch": 0.5051822532729439, "grad_norm": 0.6130167245864868, "learning_rate": 1.5476561779070622e-05, "loss": 0.0834, "step": 28526 }, { "epoch": 0.5051999628099723, "grad_norm": 0.9269284009933472, "learning_rate": 1.5475701848016645e-05, "loss": 0.1263, "step": 28527 }, { "epoch": 0.5052176723470007, "grad_norm": 0.5717657208442688, "learning_rate": 1.5474841915397652e-05, "loss": 0.0699, "step": 28528 }, { "epoch": 0.5052353818840291, "grad_norm": 0.5005680322647095, "learning_rate": 1.5473981981216484e-05, "loss": 0.0665, "step": 28529 }, { "epoch": 0.5052530914210576, "grad_norm": 0.7114957571029663, "learning_rate": 1.547312204547596e-05, "loss": 0.0659, "step": 28530 }, { "epoch": 0.505270800958086, "grad_norm": 0.7245174646377563, "learning_rate": 1.547226210817891e-05, "loss": 0.0569, "step": 28531 }, { "epoch": 0.5052885104951144, "grad_norm": 0.774558961391449, "learning_rate": 1.5471402169328172e-05, "loss": 0.0794, "step": 28532 }, { "epoch": 0.5053062200321428, "grad_norm": 0.5156983733177185, "learning_rate": 1.547054222892656e-05, "loss": 0.0813, "step": 28533 }, { "epoch": 0.5053239295691713, "grad_norm": 0.7332723140716553, "learning_rate": 1.5469682286976917e-05, "loss": 0.0926, "step": 28534 }, { "epoch": 0.5053416391061997, "grad_norm": 0.7113900780677795, "learning_rate": 1.546882234348207e-05, "loss": 0.1111, "step": 28535 }, { "epoch": 0.5053593486432281, "grad_norm": 0.766806423664093, "learning_rate": 1.546796239844484e-05, "loss": 0.061, "step": 28536 }, { "epoch": 0.5053770581802566, "grad_norm": 0.6056691408157349, "learning_rate": 1.546710245186806e-05, "loss": 0.0996, "step": 28537 }, { "epoch": 0.505394767717285, "grad_norm": 0.5414521098136902, "learning_rate": 1.5466242503754562e-05, "loss": 0.0656, "step": 28538 }, { "epoch": 0.5054124772543134, "grad_norm": 0.9302076101303101, "learning_rate": 1.5465382554107175e-05, "loss": 0.0809, "step": 28539 }, { "epoch": 0.5054301867913418, "grad_norm": 0.6362201571464539, "learning_rate": 1.546452260292873e-05, "loss": 0.0554, "step": 28540 }, { "epoch": 0.5054478963283703, "grad_norm": 0.852318286895752, "learning_rate": 1.5463662650222045e-05, "loss": 0.069, "step": 28541 }, { "epoch": 0.5054656058653987, "grad_norm": 0.4857572317123413, "learning_rate": 1.546280269598996e-05, "loss": 0.071, "step": 28542 }, { "epoch": 0.5054833154024271, "grad_norm": 0.7338934540748596, "learning_rate": 1.54619427402353e-05, "loss": 0.0907, "step": 28543 }, { "epoch": 0.5055010249394555, "grad_norm": 0.42946842312812805, "learning_rate": 1.5461082782960896e-05, "loss": 0.0657, "step": 28544 }, { "epoch": 0.505518734476484, "grad_norm": 0.5617775917053223, "learning_rate": 1.5460222824169575e-05, "loss": 0.0756, "step": 28545 }, { "epoch": 0.5055364440135124, "grad_norm": 0.4379403293132782, "learning_rate": 1.545936286386417e-05, "loss": 0.0497, "step": 28546 }, { "epoch": 0.5055541535505408, "grad_norm": 0.48900744318962097, "learning_rate": 1.5458502902047507e-05, "loss": 0.0869, "step": 28547 }, { "epoch": 0.5055718630875692, "grad_norm": 0.9561846852302551, "learning_rate": 1.545764293872241e-05, "loss": 0.0844, "step": 28548 }, { "epoch": 0.5055895726245977, "grad_norm": 0.4161584973335266, "learning_rate": 1.545678297389172e-05, "loss": 0.0715, "step": 28549 }, { "epoch": 0.5056072821616261, "grad_norm": 0.5118221640586853, "learning_rate": 1.545592300755826e-05, "loss": 0.0536, "step": 28550 }, { "epoch": 0.5056249916986545, "grad_norm": 0.8193929195404053, "learning_rate": 1.5455063039724862e-05, "loss": 0.0775, "step": 28551 }, { "epoch": 0.505642701235683, "grad_norm": 0.6336780190467834, "learning_rate": 1.5454203070394347e-05, "loss": 0.0864, "step": 28552 }, { "epoch": 0.5056604107727114, "grad_norm": 1.3771334886550903, "learning_rate": 1.545334309956955e-05, "loss": 0.0504, "step": 28553 }, { "epoch": 0.5056781203097398, "grad_norm": 0.46230316162109375, "learning_rate": 1.5452483127253305e-05, "loss": 0.0867, "step": 28554 }, { "epoch": 0.5056958298467682, "grad_norm": 0.5053952932357788, "learning_rate": 1.545162315344843e-05, "loss": 0.072, "step": 28555 }, { "epoch": 0.5057135393837967, "grad_norm": 0.9434694051742554, "learning_rate": 1.5450763178157768e-05, "loss": 0.0754, "step": 28556 }, { "epoch": 0.5057312489208251, "grad_norm": 0.5056782960891724, "learning_rate": 1.5449903201384134e-05, "loss": 0.0755, "step": 28557 }, { "epoch": 0.5057489584578535, "grad_norm": 1.5064929723739624, "learning_rate": 1.5449043223130367e-05, "loss": 0.0605, "step": 28558 }, { "epoch": 0.5057666679948819, "grad_norm": 0.5842334628105164, "learning_rate": 1.5448183243399294e-05, "loss": 0.0606, "step": 28559 }, { "epoch": 0.5057843775319104, "grad_norm": 0.7253817319869995, "learning_rate": 1.544732326219374e-05, "loss": 0.0941, "step": 28560 }, { "epoch": 0.5058020870689388, "grad_norm": 1.0083825588226318, "learning_rate": 1.5446463279516543e-05, "loss": 0.0914, "step": 28561 }, { "epoch": 0.5058197966059672, "grad_norm": 1.0896894931793213, "learning_rate": 1.5445603295370517e-05, "loss": 0.1237, "step": 28562 }, { "epoch": 0.5058375061429956, "grad_norm": 0.672430694103241, "learning_rate": 1.544474330975851e-05, "loss": 0.0672, "step": 28563 }, { "epoch": 0.5058552156800241, "grad_norm": 1.1927909851074219, "learning_rate": 1.5443883322683344e-05, "loss": 0.0963, "step": 28564 }, { "epoch": 0.5058729252170525, "grad_norm": 0.5387817025184631, "learning_rate": 1.544302333414784e-05, "loss": 0.0659, "step": 28565 }, { "epoch": 0.5058906347540809, "grad_norm": 0.8766637444496155, "learning_rate": 1.544216334415484e-05, "loss": 0.0571, "step": 28566 }, { "epoch": 0.5059083442911094, "grad_norm": 0.680914044380188, "learning_rate": 1.544130335270716e-05, "loss": 0.0733, "step": 28567 }, { "epoch": 0.5059260538281378, "grad_norm": 0.6526183485984802, "learning_rate": 1.5440443359807646e-05, "loss": 0.0907, "step": 28568 }, { "epoch": 0.5059437633651662, "grad_norm": 0.78338623046875, "learning_rate": 1.543958336545911e-05, "loss": 0.0827, "step": 28569 }, { "epoch": 0.5059614729021946, "grad_norm": 0.9493274688720703, "learning_rate": 1.5438723369664397e-05, "loss": 0.0964, "step": 28570 }, { "epoch": 0.5059791824392231, "grad_norm": 0.5501536130905151, "learning_rate": 1.5437863372426325e-05, "loss": 0.0762, "step": 28571 }, { "epoch": 0.5059968919762515, "grad_norm": 0.667508065700531, "learning_rate": 1.5437003373747724e-05, "loss": 0.0825, "step": 28572 }, { "epoch": 0.5060146015132799, "grad_norm": 0.2731110751628876, "learning_rate": 1.5436143373631427e-05, "loss": 0.0694, "step": 28573 }, { "epoch": 0.5060323110503083, "grad_norm": 0.6952399015426636, "learning_rate": 1.5435283372080264e-05, "loss": 0.0826, "step": 28574 }, { "epoch": 0.5060500205873368, "grad_norm": 0.716802179813385, "learning_rate": 1.5434423369097062e-05, "loss": 0.0744, "step": 28575 }, { "epoch": 0.5060677301243652, "grad_norm": 0.22139011323451996, "learning_rate": 1.543356336468465e-05, "loss": 0.0531, "step": 28576 }, { "epoch": 0.5060854396613936, "grad_norm": 0.6339759230613708, "learning_rate": 1.5432703358845857e-05, "loss": 0.0807, "step": 28577 }, { "epoch": 0.506103149198422, "grad_norm": 0.874794602394104, "learning_rate": 1.5431843351583516e-05, "loss": 0.0712, "step": 28578 }, { "epoch": 0.5061208587354505, "grad_norm": 0.6653344035148621, "learning_rate": 1.5430983342900457e-05, "loss": 0.0456, "step": 28579 }, { "epoch": 0.5061385682724789, "grad_norm": 0.6494090557098389, "learning_rate": 1.5430123332799502e-05, "loss": 0.0966, "step": 28580 }, { "epoch": 0.5061562778095073, "grad_norm": 0.7185962796211243, "learning_rate": 1.5429263321283487e-05, "loss": 0.1108, "step": 28581 }, { "epoch": 0.5061739873465358, "grad_norm": 0.6479809284210205, "learning_rate": 1.5428403308355237e-05, "loss": 0.0951, "step": 28582 }, { "epoch": 0.5061916968835642, "grad_norm": 0.6674548983573914, "learning_rate": 1.5427543294017586e-05, "loss": 0.0659, "step": 28583 }, { "epoch": 0.5062094064205926, "grad_norm": 0.384687215089798, "learning_rate": 1.5426683278273362e-05, "loss": 0.0773, "step": 28584 }, { "epoch": 0.506227115957621, "grad_norm": 0.5942826271057129, "learning_rate": 1.542582326112539e-05, "loss": 0.0627, "step": 28585 }, { "epoch": 0.5062448254946496, "grad_norm": 0.6365704536437988, "learning_rate": 1.54249632425765e-05, "loss": 0.0902, "step": 28586 }, { "epoch": 0.506262535031678, "grad_norm": 0.513001561164856, "learning_rate": 1.5424103222629527e-05, "loss": 0.0756, "step": 28587 }, { "epoch": 0.5062802445687064, "grad_norm": 0.6240721940994263, "learning_rate": 1.5423243201287303e-05, "loss": 0.0842, "step": 28588 }, { "epoch": 0.5062979541057347, "grad_norm": 0.6962231397628784, "learning_rate": 1.5422383178552645e-05, "loss": 0.0774, "step": 28589 }, { "epoch": 0.5063156636427633, "grad_norm": 0.7171329259872437, "learning_rate": 1.542152315442839e-05, "loss": 0.073, "step": 28590 }, { "epoch": 0.5063333731797917, "grad_norm": 0.7062224745750427, "learning_rate": 1.5420663128917366e-05, "loss": 0.088, "step": 28591 }, { "epoch": 0.5063510827168201, "grad_norm": 0.7682397365570068, "learning_rate": 1.54198031020224e-05, "loss": 0.0729, "step": 28592 }, { "epoch": 0.5063687922538485, "grad_norm": 0.5585309267044067, "learning_rate": 1.5418943073746334e-05, "loss": 0.0417, "step": 28593 }, { "epoch": 0.506386501790877, "grad_norm": 1.1601186990737915, "learning_rate": 1.5418083044091976e-05, "loss": 0.0756, "step": 28594 }, { "epoch": 0.5064042113279054, "grad_norm": 0.9653415083885193, "learning_rate": 1.5417223013062175e-05, "loss": 0.0711, "step": 28595 }, { "epoch": 0.5064219208649338, "grad_norm": 0.6124349236488342, "learning_rate": 1.5416362980659747e-05, "loss": 0.0802, "step": 28596 }, { "epoch": 0.5064396304019623, "grad_norm": 0.7665083408355713, "learning_rate": 1.541550294688753e-05, "loss": 0.0827, "step": 28597 }, { "epoch": 0.5064573399389907, "grad_norm": 0.4508618414402008, "learning_rate": 1.5414642911748356e-05, "loss": 0.0566, "step": 28598 }, { "epoch": 0.5064750494760191, "grad_norm": 0.5604614019393921, "learning_rate": 1.5413782875245042e-05, "loss": 0.0708, "step": 28599 }, { "epoch": 0.5064927590130475, "grad_norm": 0.2828984558582306, "learning_rate": 1.5412922837380424e-05, "loss": 0.0833, "step": 28600 }, { "epoch": 0.506510468550076, "grad_norm": 0.7935249209403992, "learning_rate": 1.5412062798157334e-05, "loss": 0.0656, "step": 28601 }, { "epoch": 0.5065281780871044, "grad_norm": 0.6636938452720642, "learning_rate": 1.54112027575786e-05, "loss": 0.0746, "step": 28602 }, { "epoch": 0.5065458876241328, "grad_norm": 0.7479162812232971, "learning_rate": 1.5410342715647043e-05, "loss": 0.073, "step": 28603 }, { "epoch": 0.5065635971611612, "grad_norm": 0.8932572603225708, "learning_rate": 1.5409482672365507e-05, "loss": 0.0687, "step": 28604 }, { "epoch": 0.5065813066981897, "grad_norm": 0.8597751259803772, "learning_rate": 1.5408622627736812e-05, "loss": 0.0817, "step": 28605 }, { "epoch": 0.5065990162352181, "grad_norm": 0.9133700728416443, "learning_rate": 1.540776258176379e-05, "loss": 0.0843, "step": 28606 }, { "epoch": 0.5066167257722465, "grad_norm": 0.7342522144317627, "learning_rate": 1.5406902534449274e-05, "loss": 0.0988, "step": 28607 }, { "epoch": 0.5066344353092749, "grad_norm": 0.9593634009361267, "learning_rate": 1.540604248579609e-05, "loss": 0.0572, "step": 28608 }, { "epoch": 0.5066521448463034, "grad_norm": 0.7933056354522705, "learning_rate": 1.5405182435807064e-05, "loss": 0.0973, "step": 28609 }, { "epoch": 0.5066698543833318, "grad_norm": 0.7333539724349976, "learning_rate": 1.5404322384485024e-05, "loss": 0.082, "step": 28610 }, { "epoch": 0.5066875639203602, "grad_norm": 0.8026947975158691, "learning_rate": 1.540346233183281e-05, "loss": 0.0904, "step": 28611 }, { "epoch": 0.5067052734573887, "grad_norm": 0.7042170166969299, "learning_rate": 1.5402602277853247e-05, "loss": 0.0835, "step": 28612 }, { "epoch": 0.5067229829944171, "grad_norm": 0.6951097249984741, "learning_rate": 1.5401742222549162e-05, "loss": 0.0905, "step": 28613 }, { "epoch": 0.5067406925314455, "grad_norm": 0.5539054274559021, "learning_rate": 1.5400882165923384e-05, "loss": 0.0716, "step": 28614 }, { "epoch": 0.5067584020684739, "grad_norm": 0.4786496162414551, "learning_rate": 1.5400022107978745e-05, "loss": 0.059, "step": 28615 }, { "epoch": 0.5067761116055024, "grad_norm": 0.9702441692352295, "learning_rate": 1.5399162048718075e-05, "loss": 0.0867, "step": 28616 }, { "epoch": 0.5067938211425308, "grad_norm": 0.6396114826202393, "learning_rate": 1.53983019881442e-05, "loss": 0.068, "step": 28617 }, { "epoch": 0.5068115306795592, "grad_norm": 0.4255372881889343, "learning_rate": 1.5397441926259952e-05, "loss": 0.0542, "step": 28618 }, { "epoch": 0.5068292402165876, "grad_norm": 0.9329054355621338, "learning_rate": 1.539658186306816e-05, "loss": 0.0954, "step": 28619 }, { "epoch": 0.5068469497536161, "grad_norm": 0.7713994383811951, "learning_rate": 1.5395721798571654e-05, "loss": 0.0558, "step": 28620 }, { "epoch": 0.5068646592906445, "grad_norm": 0.8497214913368225, "learning_rate": 1.5394861732773265e-05, "loss": 0.0626, "step": 28621 }, { "epoch": 0.5068823688276729, "grad_norm": 0.7312994003295898, "learning_rate": 1.5394001665675822e-05, "loss": 0.0678, "step": 28622 }, { "epoch": 0.5069000783647013, "grad_norm": 0.7550882697105408, "learning_rate": 1.5393141597282154e-05, "loss": 0.0486, "step": 28623 }, { "epoch": 0.5069177879017298, "grad_norm": 0.8735380172729492, "learning_rate": 1.5392281527595084e-05, "loss": 0.0558, "step": 28624 }, { "epoch": 0.5069354974387582, "grad_norm": 0.6991906762123108, "learning_rate": 1.539142145661745e-05, "loss": 0.0431, "step": 28625 }, { "epoch": 0.5069532069757866, "grad_norm": 0.6712841987609863, "learning_rate": 1.5390561384352077e-05, "loss": 0.053, "step": 28626 }, { "epoch": 0.5069709165128151, "grad_norm": 0.5776546001434326, "learning_rate": 1.53897013108018e-05, "loss": 0.0623, "step": 28627 }, { "epoch": 0.5069886260498435, "grad_norm": 1.9072120189666748, "learning_rate": 1.538884123596945e-05, "loss": 0.0997, "step": 28628 }, { "epoch": 0.5070063355868719, "grad_norm": 0.3230156898498535, "learning_rate": 1.5387981159857842e-05, "loss": 0.0416, "step": 28629 }, { "epoch": 0.5070240451239003, "grad_norm": 1.0276663303375244, "learning_rate": 1.5387121082469823e-05, "loss": 0.1339, "step": 28630 }, { "epoch": 0.5070417546609288, "grad_norm": 0.9485934376716614, "learning_rate": 1.5386261003808208e-05, "loss": 0.0859, "step": 28631 }, { "epoch": 0.5070594641979572, "grad_norm": 0.7877182960510254, "learning_rate": 1.538540092387584e-05, "loss": 0.1017, "step": 28632 }, { "epoch": 0.5070771737349856, "grad_norm": 0.5867377519607544, "learning_rate": 1.5384540842675536e-05, "loss": 0.0625, "step": 28633 }, { "epoch": 0.507094883272014, "grad_norm": 0.4795384705066681, "learning_rate": 1.538368076021013e-05, "loss": 0.0595, "step": 28634 }, { "epoch": 0.5071125928090425, "grad_norm": 0.508102536201477, "learning_rate": 1.538282067648246e-05, "loss": 0.0727, "step": 28635 }, { "epoch": 0.5071303023460709, "grad_norm": 0.32871463894844055, "learning_rate": 1.5381960591495345e-05, "loss": 0.0824, "step": 28636 }, { "epoch": 0.5071480118830993, "grad_norm": 0.5743885040283203, "learning_rate": 1.538110050525162e-05, "loss": 0.047, "step": 28637 }, { "epoch": 0.5071657214201277, "grad_norm": 0.5834270715713501, "learning_rate": 1.5380240417754114e-05, "loss": 0.0632, "step": 28638 }, { "epoch": 0.5071834309571562, "grad_norm": 0.46755605936050415, "learning_rate": 1.537938032900565e-05, "loss": 0.0581, "step": 28639 }, { "epoch": 0.5072011404941846, "grad_norm": 0.9801954627037048, "learning_rate": 1.537852023900907e-05, "loss": 0.0778, "step": 28640 }, { "epoch": 0.507218850031213, "grad_norm": 0.8545645475387573, "learning_rate": 1.5377660147767192e-05, "loss": 0.0809, "step": 28641 }, { "epoch": 0.5072365595682415, "grad_norm": 0.9104371070861816, "learning_rate": 1.537680005528285e-05, "loss": 0.0913, "step": 28642 }, { "epoch": 0.5072542691052699, "grad_norm": 0.7233526706695557, "learning_rate": 1.5375939961558872e-05, "loss": 0.0714, "step": 28643 }, { "epoch": 0.5072719786422983, "grad_norm": 0.6521656513214111, "learning_rate": 1.5375079866598094e-05, "loss": 0.0688, "step": 28644 }, { "epoch": 0.5072896881793267, "grad_norm": 0.9335275888442993, "learning_rate": 1.537421977040334e-05, "loss": 0.1416, "step": 28645 }, { "epoch": 0.5073073977163552, "grad_norm": 0.4741717278957367, "learning_rate": 1.5373359672977447e-05, "loss": 0.064, "step": 28646 }, { "epoch": 0.5073251072533836, "grad_norm": 0.5487870573997498, "learning_rate": 1.537249957432323e-05, "loss": 0.0657, "step": 28647 }, { "epoch": 0.507342816790412, "grad_norm": 0.5220997929573059, "learning_rate": 1.5371639474443527e-05, "loss": 0.0513, "step": 28648 }, { "epoch": 0.5073605263274404, "grad_norm": 0.5530524849891663, "learning_rate": 1.537077937334117e-05, "loss": 0.0603, "step": 28649 }, { "epoch": 0.507378235864469, "grad_norm": 0.6174942851066589, "learning_rate": 1.5369919271018988e-05, "loss": 0.0776, "step": 28650 }, { "epoch": 0.5073959454014974, "grad_norm": 0.921043872833252, "learning_rate": 1.536905916747981e-05, "loss": 0.0735, "step": 28651 }, { "epoch": 0.5074136549385257, "grad_norm": 0.47911199927330017, "learning_rate": 1.5368199062726463e-05, "loss": 0.0677, "step": 28652 }, { "epoch": 0.5074313644755541, "grad_norm": 0.9760714769363403, "learning_rate": 1.5367338956761775e-05, "loss": 0.0952, "step": 28653 }, { "epoch": 0.5074490740125827, "grad_norm": 0.4537760615348816, "learning_rate": 1.536647884958858e-05, "loss": 0.0713, "step": 28654 }, { "epoch": 0.5074667835496111, "grad_norm": 0.6485469341278076, "learning_rate": 1.5365618741209713e-05, "loss": 0.0737, "step": 28655 }, { "epoch": 0.5074844930866395, "grad_norm": 0.42385974526405334, "learning_rate": 1.536475863162799e-05, "loss": 0.0593, "step": 28656 }, { "epoch": 0.507502202623668, "grad_norm": 0.5077194571495056, "learning_rate": 1.536389852084625e-05, "loss": 0.0751, "step": 28657 }, { "epoch": 0.5075199121606964, "grad_norm": 0.44671595096588135, "learning_rate": 1.536303840886732e-05, "loss": 0.0609, "step": 28658 }, { "epoch": 0.5075376216977248, "grad_norm": 0.5038868188858032, "learning_rate": 1.536217829569403e-05, "loss": 0.043, "step": 28659 }, { "epoch": 0.5075553312347532, "grad_norm": 1.709049940109253, "learning_rate": 1.5361318181329212e-05, "loss": 0.0875, "step": 28660 }, { "epoch": 0.5075730407717817, "grad_norm": 0.5509591102600098, "learning_rate": 1.5360458065775698e-05, "loss": 0.065, "step": 28661 }, { "epoch": 0.5075907503088101, "grad_norm": 0.5199072957038879, "learning_rate": 1.5359597949036307e-05, "loss": 0.1107, "step": 28662 }, { "epoch": 0.5076084598458385, "grad_norm": 0.532575786113739, "learning_rate": 1.5358737831113875e-05, "loss": 0.0748, "step": 28663 }, { "epoch": 0.5076261693828669, "grad_norm": 0.6533800959587097, "learning_rate": 1.5357877712011235e-05, "loss": 0.0868, "step": 28664 }, { "epoch": 0.5076438789198954, "grad_norm": 0.7721942067146301, "learning_rate": 1.5357017591731212e-05, "loss": 0.1115, "step": 28665 }, { "epoch": 0.5076615884569238, "grad_norm": 0.4143619239330292, "learning_rate": 1.535615747027664e-05, "loss": 0.0644, "step": 28666 }, { "epoch": 0.5076792979939522, "grad_norm": 0.6256051659584045, "learning_rate": 1.535529734765034e-05, "loss": 0.0921, "step": 28667 }, { "epoch": 0.5076970075309806, "grad_norm": 0.82281893491745, "learning_rate": 1.5354437223855153e-05, "loss": 0.0986, "step": 28668 }, { "epoch": 0.5077147170680091, "grad_norm": 0.5934159159660339, "learning_rate": 1.53535770988939e-05, "loss": 0.0586, "step": 28669 }, { "epoch": 0.5077324266050375, "grad_norm": 0.593900740146637, "learning_rate": 1.5352716972769415e-05, "loss": 0.0998, "step": 28670 }, { "epoch": 0.5077501361420659, "grad_norm": 0.9748520255088806, "learning_rate": 1.535185684548453e-05, "loss": 0.0668, "step": 28671 }, { "epoch": 0.5077678456790944, "grad_norm": 0.8712050914764404, "learning_rate": 1.5350996717042065e-05, "loss": 0.0748, "step": 28672 }, { "epoch": 0.5077855552161228, "grad_norm": 0.58454829454422, "learning_rate": 1.5350136587444863e-05, "loss": 0.0346, "step": 28673 }, { "epoch": 0.5078032647531512, "grad_norm": 0.6551015377044678, "learning_rate": 1.534927645669574e-05, "loss": 0.0725, "step": 28674 }, { "epoch": 0.5078209742901796, "grad_norm": 0.97368985414505, "learning_rate": 1.5348416324797542e-05, "loss": 0.08, "step": 28675 }, { "epoch": 0.5078386838272081, "grad_norm": 0.656399130821228, "learning_rate": 1.5347556191753082e-05, "loss": 0.1223, "step": 28676 }, { "epoch": 0.5078563933642365, "grad_norm": 0.6386435031890869, "learning_rate": 1.53466960575652e-05, "loss": 0.0481, "step": 28677 }, { "epoch": 0.5078741029012649, "grad_norm": 0.6137299537658691, "learning_rate": 1.5345835922236724e-05, "loss": 0.049, "step": 28678 }, { "epoch": 0.5078918124382933, "grad_norm": 0.348787397146225, "learning_rate": 1.534497578577048e-05, "loss": 0.0968, "step": 28679 }, { "epoch": 0.5079095219753218, "grad_norm": 0.4514407813549042, "learning_rate": 1.5344115648169305e-05, "loss": 0.0724, "step": 28680 }, { "epoch": 0.5079272315123502, "grad_norm": 0.4901953339576721, "learning_rate": 1.5343255509436022e-05, "loss": 0.0629, "step": 28681 }, { "epoch": 0.5079449410493786, "grad_norm": 0.8417720794677734, "learning_rate": 1.534239536957346e-05, "loss": 0.0678, "step": 28682 }, { "epoch": 0.507962650586407, "grad_norm": 0.8405170440673828, "learning_rate": 1.5341535228584453e-05, "loss": 0.0607, "step": 28683 }, { "epoch": 0.5079803601234355, "grad_norm": 1.1398465633392334, "learning_rate": 1.5340675086471834e-05, "loss": 0.1101, "step": 28684 }, { "epoch": 0.5079980696604639, "grad_norm": 0.24070408940315247, "learning_rate": 1.5339814943238428e-05, "loss": 0.1035, "step": 28685 }, { "epoch": 0.5080157791974923, "grad_norm": 0.38059452176094055, "learning_rate": 1.5338954798887057e-05, "loss": 0.0685, "step": 28686 }, { "epoch": 0.5080334887345208, "grad_norm": 0.8262730240821838, "learning_rate": 1.5338094653420567e-05, "loss": 0.0616, "step": 28687 }, { "epoch": 0.5080511982715492, "grad_norm": 0.537260115146637, "learning_rate": 1.5337234506841777e-05, "loss": 0.0736, "step": 28688 }, { "epoch": 0.5080689078085776, "grad_norm": 0.5914062261581421, "learning_rate": 1.5336374359153523e-05, "loss": 0.0889, "step": 28689 }, { "epoch": 0.508086617345606, "grad_norm": 0.9556017518043518, "learning_rate": 1.5335514210358627e-05, "loss": 0.0965, "step": 28690 }, { "epoch": 0.5081043268826345, "grad_norm": 0.8303605318069458, "learning_rate": 1.5334654060459922e-05, "loss": 0.061, "step": 28691 }, { "epoch": 0.5081220364196629, "grad_norm": 0.6565145254135132, "learning_rate": 1.5333793909460244e-05, "loss": 0.0569, "step": 28692 }, { "epoch": 0.5081397459566913, "grad_norm": 0.5727855563163757, "learning_rate": 1.5332933757362415e-05, "loss": 0.0795, "step": 28693 }, { "epoch": 0.5081574554937197, "grad_norm": 1.0531673431396484, "learning_rate": 1.533207360416927e-05, "loss": 0.0863, "step": 28694 }, { "epoch": 0.5081751650307482, "grad_norm": 0.7208468914031982, "learning_rate": 1.5331213449883636e-05, "loss": 0.1238, "step": 28695 }, { "epoch": 0.5081928745677766, "grad_norm": 1.0981626510620117, "learning_rate": 1.5330353294508337e-05, "loss": 0.0556, "step": 28696 }, { "epoch": 0.508210584104805, "grad_norm": 0.6136245727539062, "learning_rate": 1.5329493138046215e-05, "loss": 0.0352, "step": 28697 }, { "epoch": 0.5082282936418334, "grad_norm": 0.7440205216407776, "learning_rate": 1.5328632980500097e-05, "loss": 0.0806, "step": 28698 }, { "epoch": 0.5082460031788619, "grad_norm": 0.4427461326122284, "learning_rate": 1.5327772821872802e-05, "loss": 0.0708, "step": 28699 }, { "epoch": 0.5082637127158903, "grad_norm": 0.5598100423812866, "learning_rate": 1.5326912662167173e-05, "loss": 0.0534, "step": 28700 }, { "epoch": 0.5082814222529187, "grad_norm": 0.8749774098396301, "learning_rate": 1.532605250138603e-05, "loss": 0.0627, "step": 28701 }, { "epoch": 0.5082991317899472, "grad_norm": 0.7578205466270447, "learning_rate": 1.532519233953221e-05, "loss": 0.0542, "step": 28702 }, { "epoch": 0.5083168413269756, "grad_norm": 0.627320408821106, "learning_rate": 1.532433217660854e-05, "loss": 0.0729, "step": 28703 }, { "epoch": 0.508334550864004, "grad_norm": 1.0299208164215088, "learning_rate": 1.532347201261785e-05, "loss": 0.079, "step": 28704 }, { "epoch": 0.5083522604010324, "grad_norm": 0.6471815705299377, "learning_rate": 1.532261184756297e-05, "loss": 0.056, "step": 28705 }, { "epoch": 0.5083699699380609, "grad_norm": 0.8305619955062866, "learning_rate": 1.5321751681446726e-05, "loss": 0.0744, "step": 28706 }, { "epoch": 0.5083876794750893, "grad_norm": 0.6524133086204529, "learning_rate": 1.5320891514271957e-05, "loss": 0.0673, "step": 28707 }, { "epoch": 0.5084053890121177, "grad_norm": 0.47161540389060974, "learning_rate": 1.5320031346041484e-05, "loss": 0.0635, "step": 28708 }, { "epoch": 0.5084230985491461, "grad_norm": 0.4952642321586609, "learning_rate": 1.5319171176758142e-05, "loss": 0.0563, "step": 28709 }, { "epoch": 0.5084408080861746, "grad_norm": 0.18571026623249054, "learning_rate": 1.5318311006424757e-05, "loss": 0.0737, "step": 28710 }, { "epoch": 0.508458517623203, "grad_norm": 0.9926881194114685, "learning_rate": 1.5317450835044162e-05, "loss": 0.0752, "step": 28711 }, { "epoch": 0.5084762271602314, "grad_norm": 0.5658674836158752, "learning_rate": 1.531659066261919e-05, "loss": 0.0634, "step": 28712 }, { "epoch": 0.5084939366972598, "grad_norm": 1.0289978981018066, "learning_rate": 1.5315730489152662e-05, "loss": 0.0725, "step": 28713 }, { "epoch": 0.5085116462342884, "grad_norm": 0.4951963722705841, "learning_rate": 1.5314870314647416e-05, "loss": 0.0868, "step": 28714 }, { "epoch": 0.5085293557713167, "grad_norm": 0.6396661996841431, "learning_rate": 1.531401013910627e-05, "loss": 0.0895, "step": 28715 }, { "epoch": 0.5085470653083451, "grad_norm": 0.5164998173713684, "learning_rate": 1.531314996253207e-05, "loss": 0.0479, "step": 28716 }, { "epoch": 0.5085647748453737, "grad_norm": 0.3979083299636841, "learning_rate": 1.531228978492764e-05, "loss": 0.0418, "step": 28717 }, { "epoch": 0.5085824843824021, "grad_norm": 0.6829357147216797, "learning_rate": 1.5311429606295802e-05, "loss": 0.0838, "step": 28718 }, { "epoch": 0.5086001939194305, "grad_norm": 0.5546011924743652, "learning_rate": 1.5310569426639397e-05, "loss": 0.0572, "step": 28719 }, { "epoch": 0.5086179034564589, "grad_norm": 0.8612796068191528, "learning_rate": 1.530970924596125e-05, "loss": 0.0894, "step": 28720 }, { "epoch": 0.5086356129934874, "grad_norm": 0.6822860836982727, "learning_rate": 1.530884906426419e-05, "loss": 0.1055, "step": 28721 }, { "epoch": 0.5086533225305158, "grad_norm": 0.6201756000518799, "learning_rate": 1.5307988881551045e-05, "loss": 0.0988, "step": 28722 }, { "epoch": 0.5086710320675442, "grad_norm": 0.5936692357063293, "learning_rate": 1.530712869782465e-05, "loss": 0.0892, "step": 28723 }, { "epoch": 0.5086887416045726, "grad_norm": 0.824688196182251, "learning_rate": 1.530626851308783e-05, "loss": 0.0756, "step": 28724 }, { "epoch": 0.5087064511416011, "grad_norm": 0.8198993802070618, "learning_rate": 1.530540832734342e-05, "loss": 0.0729, "step": 28725 }, { "epoch": 0.5087241606786295, "grad_norm": 0.475734144449234, "learning_rate": 1.5304548140594252e-05, "loss": 0.0759, "step": 28726 }, { "epoch": 0.5087418702156579, "grad_norm": 0.5571784377098083, "learning_rate": 1.5303687952843143e-05, "loss": 0.0973, "step": 28727 }, { "epoch": 0.5087595797526863, "grad_norm": 0.6172629594802856, "learning_rate": 1.5302827764092937e-05, "loss": 0.0599, "step": 28728 }, { "epoch": 0.5087772892897148, "grad_norm": 0.8175389766693115, "learning_rate": 1.5301967574346455e-05, "loss": 0.0739, "step": 28729 }, { "epoch": 0.5087949988267432, "grad_norm": 0.7061100602149963, "learning_rate": 1.530110738360653e-05, "loss": 0.0679, "step": 28730 }, { "epoch": 0.5088127083637716, "grad_norm": 1.125162124633789, "learning_rate": 1.5300247191875997e-05, "loss": 0.0848, "step": 28731 }, { "epoch": 0.5088304179008001, "grad_norm": 0.4165390133857727, "learning_rate": 1.529938699915768e-05, "loss": 0.0773, "step": 28732 }, { "epoch": 0.5088481274378285, "grad_norm": 0.9475959539413452, "learning_rate": 1.5298526805454408e-05, "loss": 0.0931, "step": 28733 }, { "epoch": 0.5088658369748569, "grad_norm": 0.7017257809638977, "learning_rate": 1.529766661076901e-05, "loss": 0.0621, "step": 28734 }, { "epoch": 0.5088835465118853, "grad_norm": 0.48377716541290283, "learning_rate": 1.529680641510432e-05, "loss": 0.0609, "step": 28735 }, { "epoch": 0.5089012560489138, "grad_norm": 0.7477728724479675, "learning_rate": 1.529594621846317e-05, "loss": 0.0843, "step": 28736 }, { "epoch": 0.5089189655859422, "grad_norm": 0.4051234722137451, "learning_rate": 1.5295086020848385e-05, "loss": 0.0456, "step": 28737 }, { "epoch": 0.5089366751229706, "grad_norm": 0.36744317412376404, "learning_rate": 1.52942258222628e-05, "loss": 0.0697, "step": 28738 }, { "epoch": 0.508954384659999, "grad_norm": 0.590392529964447, "learning_rate": 1.5293365622709237e-05, "loss": 0.0624, "step": 28739 }, { "epoch": 0.5089720941970275, "grad_norm": 0.4392564594745636, "learning_rate": 1.5292505422190535e-05, "loss": 0.0471, "step": 28740 }, { "epoch": 0.5089898037340559, "grad_norm": 0.7529475092887878, "learning_rate": 1.5291645220709516e-05, "loss": 0.0531, "step": 28741 }, { "epoch": 0.5090075132710843, "grad_norm": 0.8308330774307251, "learning_rate": 1.529078501826902e-05, "loss": 0.0772, "step": 28742 }, { "epoch": 0.5090252228081127, "grad_norm": 1.0732094049453735, "learning_rate": 1.528992481487186e-05, "loss": 0.0862, "step": 28743 }, { "epoch": 0.5090429323451412, "grad_norm": 0.6228277087211609, "learning_rate": 1.5289064610520885e-05, "loss": 0.0419, "step": 28744 }, { "epoch": 0.5090606418821696, "grad_norm": 0.5055201053619385, "learning_rate": 1.5288204405218913e-05, "loss": 0.0651, "step": 28745 }, { "epoch": 0.509078351419198, "grad_norm": 0.8082595467567444, "learning_rate": 1.5287344198968784e-05, "loss": 0.0793, "step": 28746 }, { "epoch": 0.5090960609562265, "grad_norm": 0.8337514400482178, "learning_rate": 1.5286483991773314e-05, "loss": 0.0887, "step": 28747 }, { "epoch": 0.5091137704932549, "grad_norm": 0.6899519562721252, "learning_rate": 1.528562378363534e-05, "loss": 0.0517, "step": 28748 }, { "epoch": 0.5091314800302833, "grad_norm": 0.7457098364830017, "learning_rate": 1.5284763574557703e-05, "loss": 0.0859, "step": 28749 }, { "epoch": 0.5091491895673117, "grad_norm": 0.5528397560119629, "learning_rate": 1.5283903364543213e-05, "loss": 0.0468, "step": 28750 }, { "epoch": 0.5091668991043402, "grad_norm": 0.552147626876831, "learning_rate": 1.5283043153594714e-05, "loss": 0.0574, "step": 28751 }, { "epoch": 0.5091846086413686, "grad_norm": 0.4866293966770172, "learning_rate": 1.528218294171503e-05, "loss": 0.0521, "step": 28752 }, { "epoch": 0.509202318178397, "grad_norm": 0.44654741883277893, "learning_rate": 1.5281322728906992e-05, "loss": 0.0828, "step": 28753 }, { "epoch": 0.5092200277154254, "grad_norm": 0.4322417974472046, "learning_rate": 1.528046251517343e-05, "loss": 0.0542, "step": 28754 }, { "epoch": 0.5092377372524539, "grad_norm": 0.5350540280342102, "learning_rate": 1.5279602300517177e-05, "loss": 0.056, "step": 28755 }, { "epoch": 0.5092554467894823, "grad_norm": 0.8246468901634216, "learning_rate": 1.5278742084941062e-05, "loss": 0.0609, "step": 28756 }, { "epoch": 0.5092731563265107, "grad_norm": 0.9858406186103821, "learning_rate": 1.5277881868447908e-05, "loss": 0.0707, "step": 28757 }, { "epoch": 0.5092908658635391, "grad_norm": 0.7912715673446655, "learning_rate": 1.5277021651040555e-05, "loss": 0.0866, "step": 28758 }, { "epoch": 0.5093085754005676, "grad_norm": 0.6706645488739014, "learning_rate": 1.5276161432721826e-05, "loss": 0.0639, "step": 28759 }, { "epoch": 0.509326284937596, "grad_norm": 0.9257394075393677, "learning_rate": 1.527530121349456e-05, "loss": 0.0786, "step": 28760 }, { "epoch": 0.5093439944746244, "grad_norm": 0.9492319226264954, "learning_rate": 1.5274440993361575e-05, "loss": 0.0944, "step": 28761 }, { "epoch": 0.5093617040116529, "grad_norm": 0.7126356363296509, "learning_rate": 1.5273580772325713e-05, "loss": 0.072, "step": 28762 }, { "epoch": 0.5093794135486813, "grad_norm": 0.553215742111206, "learning_rate": 1.527272055038979e-05, "loss": 0.051, "step": 28763 }, { "epoch": 0.5093971230857097, "grad_norm": 0.4199290871620178, "learning_rate": 1.5271860327556646e-05, "loss": 0.0873, "step": 28764 }, { "epoch": 0.5094148326227381, "grad_norm": 0.5643254518508911, "learning_rate": 1.5271000103829112e-05, "loss": 0.0583, "step": 28765 }, { "epoch": 0.5094325421597666, "grad_norm": 0.39583975076675415, "learning_rate": 1.5270139879210016e-05, "loss": 0.0619, "step": 28766 }, { "epoch": 0.509450251696795, "grad_norm": 0.4449368119239807, "learning_rate": 1.5269279653702182e-05, "loss": 0.0645, "step": 28767 }, { "epoch": 0.5094679612338234, "grad_norm": 0.4936937689781189, "learning_rate": 1.526841942730845e-05, "loss": 0.0598, "step": 28768 }, { "epoch": 0.5094856707708518, "grad_norm": 0.41685956716537476, "learning_rate": 1.5267559200031642e-05, "loss": 0.0532, "step": 28769 }, { "epoch": 0.5095033803078803, "grad_norm": 0.9543519020080566, "learning_rate": 1.526669897187459e-05, "loss": 0.1008, "step": 28770 }, { "epoch": 0.5095210898449087, "grad_norm": 0.4234737455844879, "learning_rate": 1.5265838742840132e-05, "loss": 0.0652, "step": 28771 }, { "epoch": 0.5095387993819371, "grad_norm": 0.6695479154586792, "learning_rate": 1.5264978512931088e-05, "loss": 0.0814, "step": 28772 }, { "epoch": 0.5095565089189655, "grad_norm": 0.6801131367683411, "learning_rate": 1.5264118282150288e-05, "loss": 0.064, "step": 28773 }, { "epoch": 0.509574218455994, "grad_norm": 0.7385424375534058, "learning_rate": 1.526325805050057e-05, "loss": 0.0501, "step": 28774 }, { "epoch": 0.5095919279930224, "grad_norm": 0.4483904242515564, "learning_rate": 1.5262397817984755e-05, "loss": 0.0576, "step": 28775 }, { "epoch": 0.5096096375300508, "grad_norm": 0.9126002192497253, "learning_rate": 1.5261537584605685e-05, "loss": 0.0688, "step": 28776 }, { "epoch": 0.5096273470670794, "grad_norm": 0.5096442103385925, "learning_rate": 1.5260677350366175e-05, "loss": 0.0821, "step": 28777 }, { "epoch": 0.5096450566041077, "grad_norm": 0.9539254307746887, "learning_rate": 1.525981711526907e-05, "loss": 0.0786, "step": 28778 }, { "epoch": 0.5096627661411361, "grad_norm": 0.4618174135684967, "learning_rate": 1.525895687931719e-05, "loss": 0.0449, "step": 28779 }, { "epoch": 0.5096804756781645, "grad_norm": 0.8516321182250977, "learning_rate": 1.5258096642513364e-05, "loss": 0.0835, "step": 28780 }, { "epoch": 0.5096981852151931, "grad_norm": 0.40205705165863037, "learning_rate": 1.525723640486043e-05, "loss": 0.0475, "step": 28781 }, { "epoch": 0.5097158947522215, "grad_norm": 0.821236252784729, "learning_rate": 1.5256376166361212e-05, "loss": 0.036, "step": 28782 }, { "epoch": 0.5097336042892499, "grad_norm": 1.4189318418502808, "learning_rate": 1.5255515927018546e-05, "loss": 0.0694, "step": 28783 }, { "epoch": 0.5097513138262783, "grad_norm": 0.7934969067573547, "learning_rate": 1.5254655686835256e-05, "loss": 0.0611, "step": 28784 }, { "epoch": 0.5097690233633068, "grad_norm": 0.8492423892021179, "learning_rate": 1.525379544581418e-05, "loss": 0.1085, "step": 28785 }, { "epoch": 0.5097867329003352, "grad_norm": 0.6285756230354309, "learning_rate": 1.5252935203958134e-05, "loss": 0.0695, "step": 28786 }, { "epoch": 0.5098044424373636, "grad_norm": 0.7754638195037842, "learning_rate": 1.5252074961269961e-05, "loss": 0.0636, "step": 28787 }, { "epoch": 0.509822151974392, "grad_norm": 0.6831557154655457, "learning_rate": 1.5251214717752488e-05, "loss": 0.0808, "step": 28788 }, { "epoch": 0.5098398615114205, "grad_norm": 0.4685856103897095, "learning_rate": 1.5250354473408542e-05, "loss": 0.0728, "step": 28789 }, { "epoch": 0.5098575710484489, "grad_norm": 0.9248685836791992, "learning_rate": 1.5249494228240955e-05, "loss": 0.097, "step": 28790 }, { "epoch": 0.5098752805854773, "grad_norm": 0.334005743265152, "learning_rate": 1.5248633982252559e-05, "loss": 0.0525, "step": 28791 }, { "epoch": 0.5098929901225058, "grad_norm": 0.8722782731056213, "learning_rate": 1.524777373544618e-05, "loss": 0.0688, "step": 28792 }, { "epoch": 0.5099106996595342, "grad_norm": 0.7430809140205383, "learning_rate": 1.524691348782465e-05, "loss": 0.0956, "step": 28793 }, { "epoch": 0.5099284091965626, "grad_norm": 0.8334140181541443, "learning_rate": 1.5246053239390802e-05, "loss": 0.0928, "step": 28794 }, { "epoch": 0.509946118733591, "grad_norm": 0.6298500895500183, "learning_rate": 1.5245192990147464e-05, "loss": 0.0931, "step": 28795 }, { "epoch": 0.5099638282706195, "grad_norm": 0.74289470911026, "learning_rate": 1.5244332740097462e-05, "loss": 0.0804, "step": 28796 }, { "epoch": 0.5099815378076479, "grad_norm": 0.7966554164886475, "learning_rate": 1.5243472489243634e-05, "loss": 0.0603, "step": 28797 }, { "epoch": 0.5099992473446763, "grad_norm": 0.9430118799209595, "learning_rate": 1.5242612237588803e-05, "loss": 0.0797, "step": 28798 }, { "epoch": 0.5100169568817047, "grad_norm": 0.5418671369552612, "learning_rate": 1.5241751985135807e-05, "loss": 0.1028, "step": 28799 }, { "epoch": 0.5100346664187332, "grad_norm": 0.6098170280456543, "learning_rate": 1.5240891731887468e-05, "loss": 0.0767, "step": 28800 }, { "epoch": 0.5100523759557616, "grad_norm": 0.7984751462936401, "learning_rate": 1.524003147784662e-05, "loss": 0.04, "step": 28801 }, { "epoch": 0.51007008549279, "grad_norm": 0.8298717141151428, "learning_rate": 1.5239171223016093e-05, "loss": 0.0897, "step": 28802 }, { "epoch": 0.5100877950298184, "grad_norm": 0.593081533908844, "learning_rate": 1.5238310967398718e-05, "loss": 0.0946, "step": 28803 }, { "epoch": 0.5101055045668469, "grad_norm": 1.0787352323532104, "learning_rate": 1.5237450710997324e-05, "loss": 0.0694, "step": 28804 }, { "epoch": 0.5101232141038753, "grad_norm": 0.5631611347198486, "learning_rate": 1.5236590453814739e-05, "loss": 0.0444, "step": 28805 }, { "epoch": 0.5101409236409037, "grad_norm": 0.6145539283752441, "learning_rate": 1.5235730195853797e-05, "loss": 0.0842, "step": 28806 }, { "epoch": 0.5101586331779322, "grad_norm": 0.6619873642921448, "learning_rate": 1.5234869937117324e-05, "loss": 0.076, "step": 28807 }, { "epoch": 0.5101763427149606, "grad_norm": 0.9465886950492859, "learning_rate": 1.5234009677608157e-05, "loss": 0.0747, "step": 28808 }, { "epoch": 0.510194052251989, "grad_norm": 0.7653399705886841, "learning_rate": 1.5233149417329117e-05, "loss": 0.105, "step": 28809 }, { "epoch": 0.5102117617890174, "grad_norm": 0.5591025948524475, "learning_rate": 1.5232289156283038e-05, "loss": 0.0833, "step": 28810 }, { "epoch": 0.5102294713260459, "grad_norm": 0.7584209442138672, "learning_rate": 1.5231428894472755e-05, "loss": 0.0727, "step": 28811 }, { "epoch": 0.5102471808630743, "grad_norm": 1.1209173202514648, "learning_rate": 1.5230568631901097e-05, "loss": 0.0924, "step": 28812 }, { "epoch": 0.5102648904001027, "grad_norm": 0.6993194818496704, "learning_rate": 1.5229708368570889e-05, "loss": 0.0566, "step": 28813 }, { "epoch": 0.5102825999371311, "grad_norm": 1.1201677322387695, "learning_rate": 1.5228848104484962e-05, "loss": 0.0779, "step": 28814 }, { "epoch": 0.5103003094741596, "grad_norm": 0.7217416763305664, "learning_rate": 1.5227987839646147e-05, "loss": 0.0905, "step": 28815 }, { "epoch": 0.510318019011188, "grad_norm": 0.6972032189369202, "learning_rate": 1.5227127574057277e-05, "loss": 0.0765, "step": 28816 }, { "epoch": 0.5103357285482164, "grad_norm": 0.5897740125656128, "learning_rate": 1.5226267307721183e-05, "loss": 0.0637, "step": 28817 }, { "epoch": 0.5103534380852448, "grad_norm": 0.35909825563430786, "learning_rate": 1.5225407040640689e-05, "loss": 0.0721, "step": 28818 }, { "epoch": 0.5103711476222733, "grad_norm": 0.6990304589271545, "learning_rate": 1.522454677281863e-05, "loss": 0.103, "step": 28819 }, { "epoch": 0.5103888571593017, "grad_norm": 0.43177473545074463, "learning_rate": 1.5223686504257834e-05, "loss": 0.0949, "step": 28820 }, { "epoch": 0.5104065666963301, "grad_norm": 0.43716326355934143, "learning_rate": 1.5222826234961131e-05, "loss": 0.046, "step": 28821 }, { "epoch": 0.5104242762333586, "grad_norm": 0.45916101336479187, "learning_rate": 1.5221965964931357e-05, "loss": 0.0983, "step": 28822 }, { "epoch": 0.510441985770387, "grad_norm": 0.5089365839958191, "learning_rate": 1.5221105694171331e-05, "loss": 0.0757, "step": 28823 }, { "epoch": 0.5104596953074154, "grad_norm": 0.7008010149002075, "learning_rate": 1.5220245422683895e-05, "loss": 0.106, "step": 28824 }, { "epoch": 0.5104774048444438, "grad_norm": 0.7573557496070862, "learning_rate": 1.5219385150471865e-05, "loss": 0.0787, "step": 28825 }, { "epoch": 0.5104951143814723, "grad_norm": 0.6417087316513062, "learning_rate": 1.5218524877538089e-05, "loss": 0.065, "step": 28826 }, { "epoch": 0.5105128239185007, "grad_norm": 1.203108549118042, "learning_rate": 1.5217664603885389e-05, "loss": 0.1002, "step": 28827 }, { "epoch": 0.5105305334555291, "grad_norm": 0.8363730311393738, "learning_rate": 1.521680432951659e-05, "loss": 0.0625, "step": 28828 }, { "epoch": 0.5105482429925575, "grad_norm": 0.8029553890228271, "learning_rate": 1.5215944054434526e-05, "loss": 0.0696, "step": 28829 }, { "epoch": 0.510565952529586, "grad_norm": 0.8849847316741943, "learning_rate": 1.5215083778642028e-05, "loss": 0.0785, "step": 28830 }, { "epoch": 0.5105836620666144, "grad_norm": 0.7681806087493896, "learning_rate": 1.5214223502141931e-05, "loss": 0.0868, "step": 28831 }, { "epoch": 0.5106013716036428, "grad_norm": 0.7591203451156616, "learning_rate": 1.5213363224937054e-05, "loss": 0.1187, "step": 28832 }, { "epoch": 0.5106190811406712, "grad_norm": 0.21025551855564117, "learning_rate": 1.521250294703024e-05, "loss": 0.0629, "step": 28833 }, { "epoch": 0.5106367906776997, "grad_norm": 0.3994697630405426, "learning_rate": 1.5211642668424309e-05, "loss": 0.0531, "step": 28834 }, { "epoch": 0.5106545002147281, "grad_norm": 0.7129825353622437, "learning_rate": 1.5210782389122094e-05, "loss": 0.0673, "step": 28835 }, { "epoch": 0.5106722097517565, "grad_norm": 0.5146604776382446, "learning_rate": 1.520992210912643e-05, "loss": 0.0621, "step": 28836 }, { "epoch": 0.510689919288785, "grad_norm": 0.5267768502235413, "learning_rate": 1.5209061828440138e-05, "loss": 0.0653, "step": 28837 }, { "epoch": 0.5107076288258134, "grad_norm": 0.7351944446563721, "learning_rate": 1.520820154706606e-05, "loss": 0.0812, "step": 28838 }, { "epoch": 0.5107253383628418, "grad_norm": 0.8370080590248108, "learning_rate": 1.5207341265007014e-05, "loss": 0.0532, "step": 28839 }, { "epoch": 0.5107430478998702, "grad_norm": 0.5007734298706055, "learning_rate": 1.520648098226584e-05, "loss": 0.0805, "step": 28840 }, { "epoch": 0.5107607574368987, "grad_norm": 0.5769387483596802, "learning_rate": 1.5205620698845364e-05, "loss": 0.0571, "step": 28841 }, { "epoch": 0.5107784669739271, "grad_norm": 0.5297203660011292, "learning_rate": 1.5204760414748417e-05, "loss": 0.0538, "step": 28842 }, { "epoch": 0.5107961765109555, "grad_norm": 0.7474256753921509, "learning_rate": 1.520390012997783e-05, "loss": 0.0836, "step": 28843 }, { "epoch": 0.510813886047984, "grad_norm": 0.6127769947052002, "learning_rate": 1.5203039844536428e-05, "loss": 0.0757, "step": 28844 }, { "epoch": 0.5108315955850125, "grad_norm": 0.9601754546165466, "learning_rate": 1.5202179558427049e-05, "loss": 0.0604, "step": 28845 }, { "epoch": 0.5108493051220409, "grad_norm": 0.6898972988128662, "learning_rate": 1.5201319271652518e-05, "loss": 0.0677, "step": 28846 }, { "epoch": 0.5108670146590693, "grad_norm": 0.5556688904762268, "learning_rate": 1.5200458984215669e-05, "loss": 0.0506, "step": 28847 }, { "epoch": 0.5108847241960977, "grad_norm": 0.4077818989753723, "learning_rate": 1.5199598696119329e-05, "loss": 0.0562, "step": 28848 }, { "epoch": 0.5109024337331262, "grad_norm": 0.5562890768051147, "learning_rate": 1.5198738407366326e-05, "loss": 0.0623, "step": 28849 }, { "epoch": 0.5109201432701546, "grad_norm": 0.700107753276825, "learning_rate": 1.5197878117959494e-05, "loss": 0.0888, "step": 28850 }, { "epoch": 0.510937852807183, "grad_norm": 0.5831775665283203, "learning_rate": 1.519701782790167e-05, "loss": 0.1007, "step": 28851 }, { "epoch": 0.5109555623442115, "grad_norm": 0.71531742811203, "learning_rate": 1.5196157537195677e-05, "loss": 0.0672, "step": 28852 }, { "epoch": 0.5109732718812399, "grad_norm": 0.7493498921394348, "learning_rate": 1.519529724584434e-05, "loss": 0.0618, "step": 28853 }, { "epoch": 0.5109909814182683, "grad_norm": 0.6121418476104736, "learning_rate": 1.5194436953850495e-05, "loss": 0.0645, "step": 28854 }, { "epoch": 0.5110086909552967, "grad_norm": 0.8029183745384216, "learning_rate": 1.5193576661216974e-05, "loss": 0.0713, "step": 28855 }, { "epoch": 0.5110264004923252, "grad_norm": 0.48755139112472534, "learning_rate": 1.5192716367946606e-05, "loss": 0.0433, "step": 28856 }, { "epoch": 0.5110441100293536, "grad_norm": 0.5862760543823242, "learning_rate": 1.5191856074042217e-05, "loss": 0.0485, "step": 28857 }, { "epoch": 0.511061819566382, "grad_norm": 0.5443352460861206, "learning_rate": 1.5190995779506644e-05, "loss": 0.0859, "step": 28858 }, { "epoch": 0.5110795291034104, "grad_norm": 0.6930197477340698, "learning_rate": 1.5190135484342717e-05, "loss": 0.0669, "step": 28859 }, { "epoch": 0.5110972386404389, "grad_norm": 0.635627806186676, "learning_rate": 1.5189275188553258e-05, "loss": 0.0779, "step": 28860 }, { "epoch": 0.5111149481774673, "grad_norm": 0.5471624732017517, "learning_rate": 1.5188414892141106e-05, "loss": 0.0661, "step": 28861 }, { "epoch": 0.5111326577144957, "grad_norm": 0.8789386749267578, "learning_rate": 1.5187554595109087e-05, "loss": 0.0729, "step": 28862 }, { "epoch": 0.5111503672515241, "grad_norm": 0.7887840867042542, "learning_rate": 1.5186694297460031e-05, "loss": 0.0699, "step": 28863 }, { "epoch": 0.5111680767885526, "grad_norm": 0.8961278200149536, "learning_rate": 1.518583399919677e-05, "loss": 0.0379, "step": 28864 }, { "epoch": 0.511185786325581, "grad_norm": 0.7436877489089966, "learning_rate": 1.5184973700322137e-05, "loss": 0.0818, "step": 28865 }, { "epoch": 0.5112034958626094, "grad_norm": 0.6633634567260742, "learning_rate": 1.5184113400838955e-05, "loss": 0.1248, "step": 28866 }, { "epoch": 0.5112212053996379, "grad_norm": 1.3224245309829712, "learning_rate": 1.5183253100750065e-05, "loss": 0.1197, "step": 28867 }, { "epoch": 0.5112389149366663, "grad_norm": 0.8181213736534119, "learning_rate": 1.5182392800058288e-05, "loss": 0.0784, "step": 28868 }, { "epoch": 0.5112566244736947, "grad_norm": 0.44120362401008606, "learning_rate": 1.5181532498766452e-05, "loss": 0.0747, "step": 28869 }, { "epoch": 0.5112743340107231, "grad_norm": 0.587662935256958, "learning_rate": 1.5180672196877398e-05, "loss": 0.0661, "step": 28870 }, { "epoch": 0.5112920435477516, "grad_norm": 0.6801819205284119, "learning_rate": 1.517981189439395e-05, "loss": 0.0687, "step": 28871 }, { "epoch": 0.51130975308478, "grad_norm": 0.8539526462554932, "learning_rate": 1.5178951591318941e-05, "loss": 0.0626, "step": 28872 }, { "epoch": 0.5113274626218084, "grad_norm": 0.7053549289703369, "learning_rate": 1.5178091287655189e-05, "loss": 0.0606, "step": 28873 }, { "epoch": 0.5113451721588368, "grad_norm": 0.48545610904693604, "learning_rate": 1.5177230983405546e-05, "loss": 0.0671, "step": 28874 }, { "epoch": 0.5113628816958653, "grad_norm": 0.6146962642669678, "learning_rate": 1.5176370678572828e-05, "loss": 0.0516, "step": 28875 }, { "epoch": 0.5113805912328937, "grad_norm": 0.5481932759284973, "learning_rate": 1.5175510373159865e-05, "loss": 0.0701, "step": 28876 }, { "epoch": 0.5113983007699221, "grad_norm": 0.7447877526283264, "learning_rate": 1.5174650067169495e-05, "loss": 0.0576, "step": 28877 }, { "epoch": 0.5114160103069505, "grad_norm": 0.4453701376914978, "learning_rate": 1.5173789760604538e-05, "loss": 0.0776, "step": 28878 }, { "epoch": 0.511433719843979, "grad_norm": 0.7646300196647644, "learning_rate": 1.517292945346784e-05, "loss": 0.037, "step": 28879 }, { "epoch": 0.5114514293810074, "grad_norm": 0.8849834203720093, "learning_rate": 1.5172069145762214e-05, "loss": 0.0838, "step": 28880 }, { "epoch": 0.5114691389180358, "grad_norm": 0.6595688462257385, "learning_rate": 1.5171208837490503e-05, "loss": 0.0621, "step": 28881 }, { "epoch": 0.5114868484550643, "grad_norm": 0.5903481841087341, "learning_rate": 1.5170348528655525e-05, "loss": 0.0511, "step": 28882 }, { "epoch": 0.5115045579920927, "grad_norm": 0.685627818107605, "learning_rate": 1.5169488219260121e-05, "loss": 0.0987, "step": 28883 }, { "epoch": 0.5115222675291211, "grad_norm": 0.7482975721359253, "learning_rate": 1.5168627909307121e-05, "loss": 0.0421, "step": 28884 }, { "epoch": 0.5115399770661495, "grad_norm": 0.554945707321167, "learning_rate": 1.5167767598799348e-05, "loss": 0.0524, "step": 28885 }, { "epoch": 0.511557686603178, "grad_norm": 0.9284608364105225, "learning_rate": 1.516690728773964e-05, "loss": 0.0569, "step": 28886 }, { "epoch": 0.5115753961402064, "grad_norm": 0.7220245599746704, "learning_rate": 1.5166046976130817e-05, "loss": 0.0695, "step": 28887 }, { "epoch": 0.5115931056772348, "grad_norm": 0.48077771067619324, "learning_rate": 1.5165186663975726e-05, "loss": 0.0706, "step": 28888 }, { "epoch": 0.5116108152142632, "grad_norm": 0.5130099058151245, "learning_rate": 1.5164326351277182e-05, "loss": 0.0555, "step": 28889 }, { "epoch": 0.5116285247512917, "grad_norm": 0.6642302870750427, "learning_rate": 1.5163466038038024e-05, "loss": 0.0501, "step": 28890 }, { "epoch": 0.5116462342883201, "grad_norm": 0.4503400921821594, "learning_rate": 1.5162605724261075e-05, "loss": 0.0488, "step": 28891 }, { "epoch": 0.5116639438253485, "grad_norm": 0.5860836505889893, "learning_rate": 1.5161745409949171e-05, "loss": 0.0505, "step": 28892 }, { "epoch": 0.5116816533623769, "grad_norm": 0.7291680574417114, "learning_rate": 1.5160885095105144e-05, "loss": 0.061, "step": 28893 }, { "epoch": 0.5116993628994054, "grad_norm": 0.9124506115913391, "learning_rate": 1.5160024779731815e-05, "loss": 0.0977, "step": 28894 }, { "epoch": 0.5117170724364338, "grad_norm": 0.7967506647109985, "learning_rate": 1.5159164463832029e-05, "loss": 0.1001, "step": 28895 }, { "epoch": 0.5117347819734622, "grad_norm": 0.7366032004356384, "learning_rate": 1.5158304147408601e-05, "loss": 0.0678, "step": 28896 }, { "epoch": 0.5117524915104907, "grad_norm": 0.7463142275810242, "learning_rate": 1.515744383046437e-05, "loss": 0.05, "step": 28897 }, { "epoch": 0.5117702010475191, "grad_norm": 0.8146676421165466, "learning_rate": 1.5156583513002167e-05, "loss": 0.0741, "step": 28898 }, { "epoch": 0.5117879105845475, "grad_norm": 0.48247379064559937, "learning_rate": 1.5155723195024822e-05, "loss": 0.0832, "step": 28899 }, { "epoch": 0.5118056201215759, "grad_norm": 0.5890753269195557, "learning_rate": 1.515486287653516e-05, "loss": 0.0776, "step": 28900 }, { "epoch": 0.5118233296586044, "grad_norm": 0.6283602714538574, "learning_rate": 1.5154002557536017e-05, "loss": 0.0521, "step": 28901 }, { "epoch": 0.5118410391956328, "grad_norm": 0.6685734987258911, "learning_rate": 1.5153142238030218e-05, "loss": 0.0721, "step": 28902 }, { "epoch": 0.5118587487326612, "grad_norm": 0.8597001433372498, "learning_rate": 1.5152281918020595e-05, "loss": 0.0956, "step": 28903 }, { "epoch": 0.5118764582696896, "grad_norm": 0.4352695643901825, "learning_rate": 1.5151421597509983e-05, "loss": 0.0677, "step": 28904 }, { "epoch": 0.5118941678067181, "grad_norm": 0.974934458732605, "learning_rate": 1.515056127650121e-05, "loss": 0.1082, "step": 28905 }, { "epoch": 0.5119118773437465, "grad_norm": 0.4173189401626587, "learning_rate": 1.5149700954997103e-05, "loss": 0.0638, "step": 28906 }, { "epoch": 0.511929586880775, "grad_norm": 0.6767233610153198, "learning_rate": 1.51488406330005e-05, "loss": 0.0564, "step": 28907 }, { "epoch": 0.5119472964178033, "grad_norm": 0.7203866839408875, "learning_rate": 1.5147980310514223e-05, "loss": 0.0615, "step": 28908 }, { "epoch": 0.5119650059548319, "grad_norm": 0.4248747229576111, "learning_rate": 1.5147119987541107e-05, "loss": 0.0427, "step": 28909 }, { "epoch": 0.5119827154918603, "grad_norm": 0.5117546916007996, "learning_rate": 1.5146259664083979e-05, "loss": 0.0709, "step": 28910 }, { "epoch": 0.5120004250288887, "grad_norm": 0.5822808146476746, "learning_rate": 1.514539934014567e-05, "loss": 0.052, "step": 28911 }, { "epoch": 0.5120181345659172, "grad_norm": 0.46342504024505615, "learning_rate": 1.5144539015729013e-05, "loss": 0.0485, "step": 28912 }, { "epoch": 0.5120358441029456, "grad_norm": 0.4494785666465759, "learning_rate": 1.5143678690836841e-05, "loss": 0.0716, "step": 28913 }, { "epoch": 0.512053553639974, "grad_norm": 0.5051568746566772, "learning_rate": 1.514281836547198e-05, "loss": 0.0902, "step": 28914 }, { "epoch": 0.5120712631770024, "grad_norm": 0.5765427350997925, "learning_rate": 1.5141958039637258e-05, "loss": 0.0971, "step": 28915 }, { "epoch": 0.5120889727140309, "grad_norm": 0.7440986633300781, "learning_rate": 1.5141097713335508e-05, "loss": 0.0921, "step": 28916 }, { "epoch": 0.5121066822510593, "grad_norm": 0.7328000664710999, "learning_rate": 1.514023738656956e-05, "loss": 0.0989, "step": 28917 }, { "epoch": 0.5121243917880877, "grad_norm": 1.1224077939987183, "learning_rate": 1.513937705934225e-05, "loss": 0.0694, "step": 28918 }, { "epoch": 0.5121421013251161, "grad_norm": 1.0600608587265015, "learning_rate": 1.5138516731656402e-05, "loss": 0.0792, "step": 28919 }, { "epoch": 0.5121598108621446, "grad_norm": 0.9436952471733093, "learning_rate": 1.5137656403514845e-05, "loss": 0.0645, "step": 28920 }, { "epoch": 0.512177520399173, "grad_norm": 0.5712293982505798, "learning_rate": 1.5136796074920413e-05, "loss": 0.0653, "step": 28921 }, { "epoch": 0.5121952299362014, "grad_norm": 0.8419885635375977, "learning_rate": 1.513593574587594e-05, "loss": 0.0757, "step": 28922 }, { "epoch": 0.5122129394732298, "grad_norm": 0.6967975497245789, "learning_rate": 1.513507541638425e-05, "loss": 0.0831, "step": 28923 }, { "epoch": 0.5122306490102583, "grad_norm": 0.4368765950202942, "learning_rate": 1.5134215086448175e-05, "loss": 0.0687, "step": 28924 }, { "epoch": 0.5122483585472867, "grad_norm": 0.8169836401939392, "learning_rate": 1.5133354756070546e-05, "loss": 0.0636, "step": 28925 }, { "epoch": 0.5122660680843151, "grad_norm": 0.5820162892341614, "learning_rate": 1.513249442525419e-05, "loss": 0.097, "step": 28926 }, { "epoch": 0.5122837776213436, "grad_norm": 0.9171538949012756, "learning_rate": 1.5131634094001945e-05, "loss": 0.0751, "step": 28927 }, { "epoch": 0.512301487158372, "grad_norm": 0.3768339157104492, "learning_rate": 1.5130773762316636e-05, "loss": 0.0478, "step": 28928 }, { "epoch": 0.5123191966954004, "grad_norm": 0.785245954990387, "learning_rate": 1.5129913430201096e-05, "loss": 0.1034, "step": 28929 }, { "epoch": 0.5123369062324288, "grad_norm": 0.44918614625930786, "learning_rate": 1.5129053097658152e-05, "loss": 0.0978, "step": 28930 }, { "epoch": 0.5123546157694573, "grad_norm": 0.6039944291114807, "learning_rate": 1.5128192764690637e-05, "loss": 0.0948, "step": 28931 }, { "epoch": 0.5123723253064857, "grad_norm": 0.7358072996139526, "learning_rate": 1.5127332431301383e-05, "loss": 0.0818, "step": 28932 }, { "epoch": 0.5123900348435141, "grad_norm": 0.5519692897796631, "learning_rate": 1.5126472097493214e-05, "loss": 0.0695, "step": 28933 }, { "epoch": 0.5124077443805425, "grad_norm": 0.8481228947639465, "learning_rate": 1.512561176326897e-05, "loss": 0.0815, "step": 28934 }, { "epoch": 0.512425453917571, "grad_norm": 0.26460450887680054, "learning_rate": 1.5124751428631466e-05, "loss": 0.052, "step": 28935 }, { "epoch": 0.5124431634545994, "grad_norm": 0.40790674090385437, "learning_rate": 1.5123891093583552e-05, "loss": 0.0497, "step": 28936 }, { "epoch": 0.5124608729916278, "grad_norm": 0.45836979150772095, "learning_rate": 1.5123030758128045e-05, "loss": 0.0487, "step": 28937 }, { "epoch": 0.5124785825286562, "grad_norm": 0.4523357152938843, "learning_rate": 1.5122170422267781e-05, "loss": 0.0484, "step": 28938 }, { "epoch": 0.5124962920656847, "grad_norm": 0.806566596031189, "learning_rate": 1.5121310086005587e-05, "loss": 0.0628, "step": 28939 }, { "epoch": 0.5125140016027131, "grad_norm": 1.04503333568573, "learning_rate": 1.5120449749344294e-05, "loss": 0.0847, "step": 28940 }, { "epoch": 0.5125317111397415, "grad_norm": 0.7268127799034119, "learning_rate": 1.5119589412286738e-05, "loss": 0.0911, "step": 28941 }, { "epoch": 0.51254942067677, "grad_norm": 0.8278191089630127, "learning_rate": 1.5118729074835741e-05, "loss": 0.0879, "step": 28942 }, { "epoch": 0.5125671302137984, "grad_norm": 0.8488348722457886, "learning_rate": 1.5117868736994139e-05, "loss": 0.0722, "step": 28943 }, { "epoch": 0.5125848397508268, "grad_norm": 0.4892045855522156, "learning_rate": 1.5117008398764761e-05, "loss": 0.0921, "step": 28944 }, { "epoch": 0.5126025492878552, "grad_norm": 0.3749085068702698, "learning_rate": 1.5116148060150434e-05, "loss": 0.0418, "step": 28945 }, { "epoch": 0.5126202588248837, "grad_norm": 0.8491710424423218, "learning_rate": 1.5115287721153997e-05, "loss": 0.0598, "step": 28946 }, { "epoch": 0.5126379683619121, "grad_norm": 0.3243038058280945, "learning_rate": 1.5114427381778272e-05, "loss": 0.0893, "step": 28947 }, { "epoch": 0.5126556778989405, "grad_norm": 0.5702174305915833, "learning_rate": 1.5113567042026096e-05, "loss": 0.0511, "step": 28948 }, { "epoch": 0.5126733874359689, "grad_norm": 0.8608100414276123, "learning_rate": 1.5112706701900287e-05, "loss": 0.0694, "step": 28949 }, { "epoch": 0.5126910969729974, "grad_norm": 0.5506209135055542, "learning_rate": 1.5111846361403689e-05, "loss": 0.069, "step": 28950 }, { "epoch": 0.5127088065100258, "grad_norm": 0.4719510078430176, "learning_rate": 1.511098602053913e-05, "loss": 0.0729, "step": 28951 }, { "epoch": 0.5127265160470542, "grad_norm": 0.8483052253723145, "learning_rate": 1.5110125679309437e-05, "loss": 0.0964, "step": 28952 }, { "epoch": 0.5127442255840826, "grad_norm": 0.5983946919441223, "learning_rate": 1.5109265337717442e-05, "loss": 0.0537, "step": 28953 }, { "epoch": 0.5127619351211111, "grad_norm": 0.49474188685417175, "learning_rate": 1.5108404995765972e-05, "loss": 0.0732, "step": 28954 }, { "epoch": 0.5127796446581395, "grad_norm": 0.706684947013855, "learning_rate": 1.5107544653457866e-05, "loss": 0.092, "step": 28955 }, { "epoch": 0.5127973541951679, "grad_norm": 0.7194599509239197, "learning_rate": 1.5106684310795948e-05, "loss": 0.0853, "step": 28956 }, { "epoch": 0.5128150637321964, "grad_norm": 0.48875415325164795, "learning_rate": 1.5105823967783049e-05, "loss": 0.0792, "step": 28957 }, { "epoch": 0.5128327732692248, "grad_norm": 0.6053205728530884, "learning_rate": 1.5104963624421996e-05, "loss": 0.0986, "step": 28958 }, { "epoch": 0.5128504828062532, "grad_norm": 0.5369924902915955, "learning_rate": 1.5104103280715626e-05, "loss": 0.0637, "step": 28959 }, { "epoch": 0.5128681923432816, "grad_norm": 0.6480687856674194, "learning_rate": 1.5103242936666763e-05, "loss": 0.083, "step": 28960 }, { "epoch": 0.5128859018803101, "grad_norm": 1.5242705345153809, "learning_rate": 1.5102382592278248e-05, "loss": 0.0524, "step": 28961 }, { "epoch": 0.5129036114173385, "grad_norm": 0.5922138690948486, "learning_rate": 1.5101522247552902e-05, "loss": 0.0787, "step": 28962 }, { "epoch": 0.5129213209543669, "grad_norm": 0.4545208811759949, "learning_rate": 1.5100661902493557e-05, "loss": 0.0433, "step": 28963 }, { "epoch": 0.5129390304913953, "grad_norm": 0.585576593875885, "learning_rate": 1.5099801557103044e-05, "loss": 0.0638, "step": 28964 }, { "epoch": 0.5129567400284238, "grad_norm": 0.5563848614692688, "learning_rate": 1.5098941211384195e-05, "loss": 0.0602, "step": 28965 }, { "epoch": 0.5129744495654522, "grad_norm": 0.5808964371681213, "learning_rate": 1.509808086533984e-05, "loss": 0.0647, "step": 28966 }, { "epoch": 0.5129921591024806, "grad_norm": 0.6426770687103271, "learning_rate": 1.5097220518972806e-05, "loss": 0.0841, "step": 28967 }, { "epoch": 0.513009868639509, "grad_norm": 0.6879875063896179, "learning_rate": 1.509636017228593e-05, "loss": 0.0587, "step": 28968 }, { "epoch": 0.5130275781765375, "grad_norm": 0.8561022877693176, "learning_rate": 1.5095499825282033e-05, "loss": 0.0627, "step": 28969 }, { "epoch": 0.513045287713566, "grad_norm": 0.8832555413246155, "learning_rate": 1.509463947796396e-05, "loss": 0.0654, "step": 28970 }, { "epoch": 0.5130629972505943, "grad_norm": 0.4368706941604614, "learning_rate": 1.509377913033453e-05, "loss": 0.0664, "step": 28971 }, { "epoch": 0.5130807067876229, "grad_norm": 0.6650910377502441, "learning_rate": 1.5092918782396571e-05, "loss": 0.0776, "step": 28972 }, { "epoch": 0.5130984163246513, "grad_norm": 0.6776267290115356, "learning_rate": 1.5092058434152923e-05, "loss": 0.0635, "step": 28973 }, { "epoch": 0.5131161258616797, "grad_norm": 0.3592630624771118, "learning_rate": 1.5091198085606408e-05, "loss": 0.0536, "step": 28974 }, { "epoch": 0.513133835398708, "grad_norm": 0.5829570889472961, "learning_rate": 1.5090337736759865e-05, "loss": 0.0416, "step": 28975 }, { "epoch": 0.5131515449357366, "grad_norm": 0.46216949820518494, "learning_rate": 1.5089477387616118e-05, "loss": 0.0683, "step": 28976 }, { "epoch": 0.513169254472765, "grad_norm": 0.5699111223220825, "learning_rate": 1.5088617038178002e-05, "loss": 0.0481, "step": 28977 }, { "epoch": 0.5131869640097934, "grad_norm": 0.3978796899318695, "learning_rate": 1.5087756688448342e-05, "loss": 0.0461, "step": 28978 }, { "epoch": 0.5132046735468218, "grad_norm": 0.9243252873420715, "learning_rate": 1.5086896338429973e-05, "loss": 0.0766, "step": 28979 }, { "epoch": 0.5132223830838503, "grad_norm": 0.47115498781204224, "learning_rate": 1.5086035988125724e-05, "loss": 0.0944, "step": 28980 }, { "epoch": 0.5132400926208787, "grad_norm": 0.6035425662994385, "learning_rate": 1.5085175637538424e-05, "loss": 0.0581, "step": 28981 }, { "epoch": 0.5132578021579071, "grad_norm": 0.701489269733429, "learning_rate": 1.5084315286670903e-05, "loss": 0.0805, "step": 28982 }, { "epoch": 0.5132755116949355, "grad_norm": 0.7492169141769409, "learning_rate": 1.5083454935525996e-05, "loss": 0.0636, "step": 28983 }, { "epoch": 0.513293221231964, "grad_norm": 0.5854015350341797, "learning_rate": 1.5082594584106531e-05, "loss": 0.0799, "step": 28984 }, { "epoch": 0.5133109307689924, "grad_norm": 0.6298657655715942, "learning_rate": 1.5081734232415335e-05, "loss": 0.0453, "step": 28985 }, { "epoch": 0.5133286403060208, "grad_norm": 0.6007005572319031, "learning_rate": 1.5080873880455247e-05, "loss": 0.0533, "step": 28986 }, { "epoch": 0.5133463498430493, "grad_norm": 0.5727282166481018, "learning_rate": 1.5080013528229089e-05, "loss": 0.0724, "step": 28987 }, { "epoch": 0.5133640593800777, "grad_norm": 0.761748731136322, "learning_rate": 1.507915317573969e-05, "loss": 0.055, "step": 28988 }, { "epoch": 0.5133817689171061, "grad_norm": 0.654976487159729, "learning_rate": 1.5078292822989894e-05, "loss": 0.0776, "step": 28989 }, { "epoch": 0.5133994784541345, "grad_norm": 0.7078017592430115, "learning_rate": 1.5077432469982517e-05, "loss": 0.0459, "step": 28990 }, { "epoch": 0.513417187991163, "grad_norm": 0.5964492559432983, "learning_rate": 1.5076572116720397e-05, "loss": 0.0583, "step": 28991 }, { "epoch": 0.5134348975281914, "grad_norm": 0.6304985284805298, "learning_rate": 1.5075711763206363e-05, "loss": 0.071, "step": 28992 }, { "epoch": 0.5134526070652198, "grad_norm": 0.7113452553749084, "learning_rate": 1.507485140944324e-05, "loss": 0.0489, "step": 28993 }, { "epoch": 0.5134703166022482, "grad_norm": 0.48891788721084595, "learning_rate": 1.5073991055433872e-05, "loss": 0.0802, "step": 28994 }, { "epoch": 0.5134880261392767, "grad_norm": 0.7591187953948975, "learning_rate": 1.5073130701181074e-05, "loss": 0.1351, "step": 28995 }, { "epoch": 0.5135057356763051, "grad_norm": 0.8064825534820557, "learning_rate": 1.5072270346687686e-05, "loss": 0.0807, "step": 28996 }, { "epoch": 0.5135234452133335, "grad_norm": 0.42397022247314453, "learning_rate": 1.5071409991956531e-05, "loss": 0.0784, "step": 28997 }, { "epoch": 0.5135411547503619, "grad_norm": 0.3670116662979126, "learning_rate": 1.507054963699045e-05, "loss": 0.0468, "step": 28998 }, { "epoch": 0.5135588642873904, "grad_norm": 0.648727536201477, "learning_rate": 1.5069689281792267e-05, "loss": 0.0934, "step": 28999 }, { "epoch": 0.5135765738244188, "grad_norm": 0.3808113932609558, "learning_rate": 1.5068828926364812e-05, "loss": 0.0396, "step": 29000 }, { "epoch": 0.5135942833614472, "grad_norm": 0.7684992551803589, "learning_rate": 1.5067968570710918e-05, "loss": 0.0738, "step": 29001 }, { "epoch": 0.5136119928984757, "grad_norm": 0.6459607481956482, "learning_rate": 1.506710821483341e-05, "loss": 0.0493, "step": 29002 }, { "epoch": 0.5136297024355041, "grad_norm": 0.4629768431186676, "learning_rate": 1.506624785873513e-05, "loss": 0.0405, "step": 29003 }, { "epoch": 0.5136474119725325, "grad_norm": 0.5348491668701172, "learning_rate": 1.5065387502418896e-05, "loss": 0.0734, "step": 29004 }, { "epoch": 0.5136651215095609, "grad_norm": 0.2422093003988266, "learning_rate": 1.5064527145887547e-05, "loss": 0.034, "step": 29005 }, { "epoch": 0.5136828310465894, "grad_norm": 0.616904616355896, "learning_rate": 1.506366678914391e-05, "loss": 0.0686, "step": 29006 }, { "epoch": 0.5137005405836178, "grad_norm": 0.6397685408592224, "learning_rate": 1.5062806432190812e-05, "loss": 0.0665, "step": 29007 }, { "epoch": 0.5137182501206462, "grad_norm": 0.7374492287635803, "learning_rate": 1.506194607503109e-05, "loss": 0.0832, "step": 29008 }, { "epoch": 0.5137359596576746, "grad_norm": 0.6055130362510681, "learning_rate": 1.5061085717667572e-05, "loss": 0.0591, "step": 29009 }, { "epoch": 0.5137536691947031, "grad_norm": 0.4925629794597626, "learning_rate": 1.506022536010309e-05, "loss": 0.0674, "step": 29010 }, { "epoch": 0.5137713787317315, "grad_norm": 0.6262653470039368, "learning_rate": 1.5059365002340467e-05, "loss": 0.0703, "step": 29011 }, { "epoch": 0.5137890882687599, "grad_norm": 0.586410641670227, "learning_rate": 1.5058504644382543e-05, "loss": 0.0669, "step": 29012 }, { "epoch": 0.5138067978057883, "grad_norm": 0.31747740507125854, "learning_rate": 1.5057644286232143e-05, "loss": 0.0494, "step": 29013 }, { "epoch": 0.5138245073428168, "grad_norm": 0.31362074613571167, "learning_rate": 1.5056783927892101e-05, "loss": 0.042, "step": 29014 }, { "epoch": 0.5138422168798452, "grad_norm": 1.0561676025390625, "learning_rate": 1.5055923569365243e-05, "loss": 0.0876, "step": 29015 }, { "epoch": 0.5138599264168736, "grad_norm": 0.7374118566513062, "learning_rate": 1.5055063210654405e-05, "loss": 0.0641, "step": 29016 }, { "epoch": 0.5138776359539021, "grad_norm": 0.7406802773475647, "learning_rate": 1.5054202851762416e-05, "loss": 0.0808, "step": 29017 }, { "epoch": 0.5138953454909305, "grad_norm": 0.7164549231529236, "learning_rate": 1.5053342492692101e-05, "loss": 0.1101, "step": 29018 }, { "epoch": 0.5139130550279589, "grad_norm": 0.7323532700538635, "learning_rate": 1.5052482133446298e-05, "loss": 0.0698, "step": 29019 }, { "epoch": 0.5139307645649873, "grad_norm": 0.5369274616241455, "learning_rate": 1.5051621774027829e-05, "loss": 0.0585, "step": 29020 }, { "epoch": 0.5139484741020158, "grad_norm": 0.7248794436454773, "learning_rate": 1.5050761414439533e-05, "loss": 0.0772, "step": 29021 }, { "epoch": 0.5139661836390442, "grad_norm": 0.6608729958534241, "learning_rate": 1.5049901054684234e-05, "loss": 0.0792, "step": 29022 }, { "epoch": 0.5139838931760726, "grad_norm": 0.6081526279449463, "learning_rate": 1.5049040694764772e-05, "loss": 0.0542, "step": 29023 }, { "epoch": 0.514001602713101, "grad_norm": 0.762134313583374, "learning_rate": 1.5048180334683967e-05, "loss": 0.0764, "step": 29024 }, { "epoch": 0.5140193122501295, "grad_norm": 0.38260284066200256, "learning_rate": 1.5047319974444656e-05, "loss": 0.0609, "step": 29025 }, { "epoch": 0.5140370217871579, "grad_norm": 0.969005286693573, "learning_rate": 1.5046459614049664e-05, "loss": 0.0813, "step": 29026 }, { "epoch": 0.5140547313241863, "grad_norm": 0.8292202949523926, "learning_rate": 1.5045599253501822e-05, "loss": 0.0624, "step": 29027 }, { "epoch": 0.5140724408612148, "grad_norm": 0.8429253697395325, "learning_rate": 1.504473889280397e-05, "loss": 0.0748, "step": 29028 }, { "epoch": 0.5140901503982432, "grad_norm": 0.8053967356681824, "learning_rate": 1.5043878531958927e-05, "loss": 0.0658, "step": 29029 }, { "epoch": 0.5141078599352716, "grad_norm": 0.5589720010757446, "learning_rate": 1.5043018170969529e-05, "loss": 0.0715, "step": 29030 }, { "epoch": 0.5141255694723, "grad_norm": 0.6145157217979431, "learning_rate": 1.5042157809838606e-05, "loss": 0.0954, "step": 29031 }, { "epoch": 0.5141432790093285, "grad_norm": 0.6798000931739807, "learning_rate": 1.5041297448568989e-05, "loss": 0.0884, "step": 29032 }, { "epoch": 0.514160988546357, "grad_norm": 0.5824846625328064, "learning_rate": 1.5040437087163509e-05, "loss": 0.0461, "step": 29033 }, { "epoch": 0.5141786980833853, "grad_norm": 0.9592165946960449, "learning_rate": 1.5039576725624993e-05, "loss": 0.0641, "step": 29034 }, { "epoch": 0.5141964076204137, "grad_norm": 0.48038890957832336, "learning_rate": 1.503871636395627e-05, "loss": 0.0465, "step": 29035 }, { "epoch": 0.5142141171574423, "grad_norm": 0.6915911436080933, "learning_rate": 1.5037856002160178e-05, "loss": 0.056, "step": 29036 }, { "epoch": 0.5142318266944707, "grad_norm": 0.6250941157341003, "learning_rate": 1.5036995640239543e-05, "loss": 0.0628, "step": 29037 }, { "epoch": 0.514249536231499, "grad_norm": 0.7131437659263611, "learning_rate": 1.5036135278197195e-05, "loss": 0.1106, "step": 29038 }, { "epoch": 0.5142672457685274, "grad_norm": 0.5853999853134155, "learning_rate": 1.503527491603597e-05, "loss": 0.0655, "step": 29039 }, { "epoch": 0.514284955305556, "grad_norm": 0.7444978356361389, "learning_rate": 1.5034414553758683e-05, "loss": 0.0557, "step": 29040 }, { "epoch": 0.5143026648425844, "grad_norm": 0.48915955424308777, "learning_rate": 1.5033554191368186e-05, "loss": 0.0478, "step": 29041 }, { "epoch": 0.5143203743796128, "grad_norm": 0.697805643081665, "learning_rate": 1.50326938288673e-05, "loss": 0.0739, "step": 29042 }, { "epoch": 0.5143380839166413, "grad_norm": 0.694243311882019, "learning_rate": 1.503183346625885e-05, "loss": 0.0867, "step": 29043 }, { "epoch": 0.5143557934536697, "grad_norm": 1.0969868898391724, "learning_rate": 1.503097310354567e-05, "loss": 0.0923, "step": 29044 }, { "epoch": 0.5143735029906981, "grad_norm": 0.5810120105743408, "learning_rate": 1.5030112740730592e-05, "loss": 0.0781, "step": 29045 }, { "epoch": 0.5143912125277265, "grad_norm": 0.5471540093421936, "learning_rate": 1.5029252377816453e-05, "loss": 0.0711, "step": 29046 }, { "epoch": 0.514408922064755, "grad_norm": 0.6557453870773315, "learning_rate": 1.5028392014806068e-05, "loss": 0.109, "step": 29047 }, { "epoch": 0.5144266316017834, "grad_norm": 0.42555269598960876, "learning_rate": 1.5027531651702284e-05, "loss": 0.0687, "step": 29048 }, { "epoch": 0.5144443411388118, "grad_norm": 0.7990263104438782, "learning_rate": 1.5026671288507915e-05, "loss": 0.0811, "step": 29049 }, { "epoch": 0.5144620506758402, "grad_norm": 0.5843579769134521, "learning_rate": 1.5025810925225803e-05, "loss": 0.0974, "step": 29050 }, { "epoch": 0.5144797602128687, "grad_norm": 0.9146828651428223, "learning_rate": 1.502495056185878e-05, "loss": 0.0838, "step": 29051 }, { "epoch": 0.5144974697498971, "grad_norm": 0.6888360977172852, "learning_rate": 1.5024090198409668e-05, "loss": 0.0852, "step": 29052 }, { "epoch": 0.5145151792869255, "grad_norm": 0.555063784122467, "learning_rate": 1.5023229834881304e-05, "loss": 0.0715, "step": 29053 }, { "epoch": 0.5145328888239539, "grad_norm": 0.517228364944458, "learning_rate": 1.5022369471276515e-05, "loss": 0.0518, "step": 29054 }, { "epoch": 0.5145505983609824, "grad_norm": 0.7197278738021851, "learning_rate": 1.5021509107598134e-05, "loss": 0.0718, "step": 29055 }, { "epoch": 0.5145683078980108, "grad_norm": 1.4114097356796265, "learning_rate": 1.5020648743848985e-05, "loss": 0.0882, "step": 29056 }, { "epoch": 0.5145860174350392, "grad_norm": 0.529981255531311, "learning_rate": 1.5019788380031913e-05, "loss": 0.084, "step": 29057 }, { "epoch": 0.5146037269720677, "grad_norm": 0.6946845650672913, "learning_rate": 1.5018928016149735e-05, "loss": 0.0761, "step": 29058 }, { "epoch": 0.5146214365090961, "grad_norm": 0.5296712517738342, "learning_rate": 1.501806765220528e-05, "loss": 0.0563, "step": 29059 }, { "epoch": 0.5146391460461245, "grad_norm": 0.9084469079971313, "learning_rate": 1.5017207288201392e-05, "loss": 0.118, "step": 29060 }, { "epoch": 0.5146568555831529, "grad_norm": 0.5434680581092834, "learning_rate": 1.501634692414089e-05, "loss": 0.0411, "step": 29061 }, { "epoch": 0.5146745651201814, "grad_norm": 0.6415942907333374, "learning_rate": 1.5015486560026611e-05, "loss": 0.0787, "step": 29062 }, { "epoch": 0.5146922746572098, "grad_norm": 0.3456045687198639, "learning_rate": 1.501462619586138e-05, "loss": 0.072, "step": 29063 }, { "epoch": 0.5147099841942382, "grad_norm": 0.3942614197731018, "learning_rate": 1.501376583164803e-05, "loss": 0.1014, "step": 29064 }, { "epoch": 0.5147276937312666, "grad_norm": 1.0189313888549805, "learning_rate": 1.5012905467389392e-05, "loss": 0.0836, "step": 29065 }, { "epoch": 0.5147454032682951, "grad_norm": 0.6828410029411316, "learning_rate": 1.5012045103088297e-05, "loss": 0.0662, "step": 29066 }, { "epoch": 0.5147631128053235, "grad_norm": 0.5048231482505798, "learning_rate": 1.5011184738747578e-05, "loss": 0.0617, "step": 29067 }, { "epoch": 0.5147808223423519, "grad_norm": 0.6377149820327759, "learning_rate": 1.5010324374370057e-05, "loss": 0.0604, "step": 29068 }, { "epoch": 0.5147985318793803, "grad_norm": 0.6146369576454163, "learning_rate": 1.5009464009958572e-05, "loss": 0.0694, "step": 29069 }, { "epoch": 0.5148162414164088, "grad_norm": 0.8969276547431946, "learning_rate": 1.5008603645515949e-05, "loss": 0.0733, "step": 29070 }, { "epoch": 0.5148339509534372, "grad_norm": 0.5751534700393677, "learning_rate": 1.5007743281045028e-05, "loss": 0.058, "step": 29071 }, { "epoch": 0.5148516604904656, "grad_norm": 0.5675338506698608, "learning_rate": 1.5006882916548625e-05, "loss": 0.0535, "step": 29072 }, { "epoch": 0.5148693700274941, "grad_norm": 0.38633906841278076, "learning_rate": 1.500602255202958e-05, "loss": 0.0633, "step": 29073 }, { "epoch": 0.5148870795645225, "grad_norm": 0.6219077110290527, "learning_rate": 1.5005162187490724e-05, "loss": 0.1176, "step": 29074 }, { "epoch": 0.5149047891015509, "grad_norm": 0.661673367023468, "learning_rate": 1.5004301822934881e-05, "loss": 0.084, "step": 29075 }, { "epoch": 0.5149224986385793, "grad_norm": 0.6790189743041992, "learning_rate": 1.5003441458364891e-05, "loss": 0.0506, "step": 29076 }, { "epoch": 0.5149402081756078, "grad_norm": 0.5186909437179565, "learning_rate": 1.5002581093783575e-05, "loss": 0.0696, "step": 29077 }, { "epoch": 0.5149579177126362, "grad_norm": 0.6283756494522095, "learning_rate": 1.5001720729193766e-05, "loss": 0.0482, "step": 29078 }, { "epoch": 0.5149756272496646, "grad_norm": 0.9551946520805359, "learning_rate": 1.50008603645983e-05, "loss": 0.0856, "step": 29079 }, { "epoch": 0.514993336786693, "grad_norm": 0.5879543423652649, "learning_rate": 1.5e-05, "loss": 0.0978, "step": 29080 }, { "epoch": 0.5150110463237215, "grad_norm": 0.7460177540779114, "learning_rate": 1.4999139635401701e-05, "loss": 0.1089, "step": 29081 }, { "epoch": 0.5150287558607499, "grad_norm": 0.49601423740386963, "learning_rate": 1.4998279270806236e-05, "loss": 0.043, "step": 29082 }, { "epoch": 0.5150464653977783, "grad_norm": 0.49377521872520447, "learning_rate": 1.4997418906216427e-05, "loss": 0.0671, "step": 29083 }, { "epoch": 0.5150641749348067, "grad_norm": 0.49620771408081055, "learning_rate": 1.499655854163511e-05, "loss": 0.0734, "step": 29084 }, { "epoch": 0.5150818844718352, "grad_norm": 0.7827837467193604, "learning_rate": 1.4995698177065116e-05, "loss": 0.0946, "step": 29085 }, { "epoch": 0.5150995940088636, "grad_norm": 0.5233216285705566, "learning_rate": 1.4994837812509282e-05, "loss": 0.0579, "step": 29086 }, { "epoch": 0.515117303545892, "grad_norm": 0.6618533134460449, "learning_rate": 1.4993977447970423e-05, "loss": 0.076, "step": 29087 }, { "epoch": 0.5151350130829205, "grad_norm": 0.5772751569747925, "learning_rate": 1.4993117083451374e-05, "loss": 0.0703, "step": 29088 }, { "epoch": 0.5151527226199489, "grad_norm": 0.6992269158363342, "learning_rate": 1.499225671895498e-05, "loss": 0.07, "step": 29089 }, { "epoch": 0.5151704321569773, "grad_norm": 0.6221893429756165, "learning_rate": 1.4991396354484052e-05, "loss": 0.0532, "step": 29090 }, { "epoch": 0.5151881416940057, "grad_norm": 0.7789581418037415, "learning_rate": 1.499053599004143e-05, "loss": 0.0948, "step": 29091 }, { "epoch": 0.5152058512310342, "grad_norm": 0.630743145942688, "learning_rate": 1.4989675625629944e-05, "loss": 0.0575, "step": 29092 }, { "epoch": 0.5152235607680626, "grad_norm": 0.953815758228302, "learning_rate": 1.4988815261252429e-05, "loss": 0.0734, "step": 29093 }, { "epoch": 0.515241270305091, "grad_norm": 0.45723915100097656, "learning_rate": 1.4987954896911704e-05, "loss": 0.0255, "step": 29094 }, { "epoch": 0.5152589798421194, "grad_norm": 1.1655341386795044, "learning_rate": 1.498709453261061e-05, "loss": 0.1036, "step": 29095 }, { "epoch": 0.515276689379148, "grad_norm": 0.33825570344924927, "learning_rate": 1.4986234168351974e-05, "loss": 0.0734, "step": 29096 }, { "epoch": 0.5152943989161763, "grad_norm": 0.918597400188446, "learning_rate": 1.4985373804138625e-05, "loss": 0.0743, "step": 29097 }, { "epoch": 0.5153121084532047, "grad_norm": 0.7631983160972595, "learning_rate": 1.4984513439973393e-05, "loss": 0.0609, "step": 29098 }, { "epoch": 0.5153298179902331, "grad_norm": 0.6273888945579529, "learning_rate": 1.498365307585911e-05, "loss": 0.066, "step": 29099 }, { "epoch": 0.5153475275272617, "grad_norm": 0.6165858507156372, "learning_rate": 1.4982792711798616e-05, "loss": 0.0465, "step": 29100 }, { "epoch": 0.51536523706429, "grad_norm": 0.6590034365653992, "learning_rate": 1.4981932347794722e-05, "loss": 0.0856, "step": 29101 }, { "epoch": 0.5153829466013184, "grad_norm": 0.7875701189041138, "learning_rate": 1.4981071983850268e-05, "loss": 0.0785, "step": 29102 }, { "epoch": 0.515400656138347, "grad_norm": 0.5815783143043518, "learning_rate": 1.4980211619968088e-05, "loss": 0.069, "step": 29103 }, { "epoch": 0.5154183656753754, "grad_norm": 0.52874356508255, "learning_rate": 1.4979351256151017e-05, "loss": 0.0633, "step": 29104 }, { "epoch": 0.5154360752124038, "grad_norm": 0.5037457942962646, "learning_rate": 1.497849089240187e-05, "loss": 0.0642, "step": 29105 }, { "epoch": 0.5154537847494322, "grad_norm": 0.5781826376914978, "learning_rate": 1.4977630528723486e-05, "loss": 0.0613, "step": 29106 }, { "epoch": 0.5154714942864607, "grad_norm": 0.44820964336395264, "learning_rate": 1.49767701651187e-05, "loss": 0.0624, "step": 29107 }, { "epoch": 0.5154892038234891, "grad_norm": 0.8648228645324707, "learning_rate": 1.4975909801590334e-05, "loss": 0.1017, "step": 29108 }, { "epoch": 0.5155069133605175, "grad_norm": 0.3582944869995117, "learning_rate": 1.4975049438141223e-05, "loss": 0.0957, "step": 29109 }, { "epoch": 0.5155246228975459, "grad_norm": 0.7838494181632996, "learning_rate": 1.4974189074774195e-05, "loss": 0.0881, "step": 29110 }, { "epoch": 0.5155423324345744, "grad_norm": 0.6919897794723511, "learning_rate": 1.4973328711492086e-05, "loss": 0.0911, "step": 29111 }, { "epoch": 0.5155600419716028, "grad_norm": 0.5937818288803101, "learning_rate": 1.497246834829772e-05, "loss": 0.0565, "step": 29112 }, { "epoch": 0.5155777515086312, "grad_norm": 0.38309958577156067, "learning_rate": 1.497160798519393e-05, "loss": 0.0366, "step": 29113 }, { "epoch": 0.5155954610456596, "grad_norm": 1.0033705234527588, "learning_rate": 1.4970747622183554e-05, "loss": 0.0762, "step": 29114 }, { "epoch": 0.5156131705826881, "grad_norm": 0.5910993814468384, "learning_rate": 1.4969887259269409e-05, "loss": 0.0645, "step": 29115 }, { "epoch": 0.5156308801197165, "grad_norm": 0.5222979784011841, "learning_rate": 1.4969026896454333e-05, "loss": 0.055, "step": 29116 }, { "epoch": 0.5156485896567449, "grad_norm": 0.5566441416740417, "learning_rate": 1.496816653374115e-05, "loss": 0.0645, "step": 29117 }, { "epoch": 0.5156662991937734, "grad_norm": 0.6059591174125671, "learning_rate": 1.4967306171132707e-05, "loss": 0.0569, "step": 29118 }, { "epoch": 0.5156840087308018, "grad_norm": 0.3782190978527069, "learning_rate": 1.4966445808631815e-05, "loss": 0.0668, "step": 29119 }, { "epoch": 0.5157017182678302, "grad_norm": 0.40606632828712463, "learning_rate": 1.4965585446241316e-05, "loss": 0.0583, "step": 29120 }, { "epoch": 0.5157194278048586, "grad_norm": 0.8106945753097534, "learning_rate": 1.4964725083964036e-05, "loss": 0.1043, "step": 29121 }, { "epoch": 0.5157371373418871, "grad_norm": 0.3355250060558319, "learning_rate": 1.4963864721802806e-05, "loss": 0.0527, "step": 29122 }, { "epoch": 0.5157548468789155, "grad_norm": 0.8604836463928223, "learning_rate": 1.496300435976046e-05, "loss": 0.0643, "step": 29123 }, { "epoch": 0.5157725564159439, "grad_norm": 0.577616274356842, "learning_rate": 1.4962143997839823e-05, "loss": 0.0714, "step": 29124 }, { "epoch": 0.5157902659529723, "grad_norm": 0.8554075360298157, "learning_rate": 1.4961283636043734e-05, "loss": 0.0646, "step": 29125 }, { "epoch": 0.5158079754900008, "grad_norm": 0.8659204244613647, "learning_rate": 1.4960423274375011e-05, "loss": 0.0595, "step": 29126 }, { "epoch": 0.5158256850270292, "grad_norm": 0.6469418406486511, "learning_rate": 1.4959562912836493e-05, "loss": 0.053, "step": 29127 }, { "epoch": 0.5158433945640576, "grad_norm": 0.4978382885456085, "learning_rate": 1.4958702551431017e-05, "loss": 0.0494, "step": 29128 }, { "epoch": 0.515861104101086, "grad_norm": 0.9120301604270935, "learning_rate": 1.4957842190161398e-05, "loss": 0.0685, "step": 29129 }, { "epoch": 0.5158788136381145, "grad_norm": 0.6077202558517456, "learning_rate": 1.4956981829030474e-05, "loss": 0.0774, "step": 29130 }, { "epoch": 0.5158965231751429, "grad_norm": 0.5061609148979187, "learning_rate": 1.495612146804107e-05, "loss": 0.0862, "step": 29131 }, { "epoch": 0.5159142327121713, "grad_norm": 0.5760279893875122, "learning_rate": 1.4955261107196035e-05, "loss": 0.0612, "step": 29132 }, { "epoch": 0.5159319422491998, "grad_norm": 0.812944769859314, "learning_rate": 1.495440074649818e-05, "loss": 0.0868, "step": 29133 }, { "epoch": 0.5159496517862282, "grad_norm": 0.6554796695709229, "learning_rate": 1.495354038595034e-05, "loss": 0.0677, "step": 29134 }, { "epoch": 0.5159673613232566, "grad_norm": 0.49094507098197937, "learning_rate": 1.4952680025555347e-05, "loss": 0.0406, "step": 29135 }, { "epoch": 0.515985070860285, "grad_norm": 0.6738570928573608, "learning_rate": 1.4951819665316037e-05, "loss": 0.0479, "step": 29136 }, { "epoch": 0.5160027803973135, "grad_norm": 0.676758885383606, "learning_rate": 1.4950959305235232e-05, "loss": 0.0616, "step": 29137 }, { "epoch": 0.5160204899343419, "grad_norm": 0.6289083957672119, "learning_rate": 1.4950098945315763e-05, "loss": 0.095, "step": 29138 }, { "epoch": 0.5160381994713703, "grad_norm": 0.5470361113548279, "learning_rate": 1.4949238585560471e-05, "loss": 0.0816, "step": 29139 }, { "epoch": 0.5160559090083987, "grad_norm": 0.6335808634757996, "learning_rate": 1.4948378225972174e-05, "loss": 0.075, "step": 29140 }, { "epoch": 0.5160736185454272, "grad_norm": 0.7632917165756226, "learning_rate": 1.4947517866553706e-05, "loss": 0.0693, "step": 29141 }, { "epoch": 0.5160913280824556, "grad_norm": 0.5198200345039368, "learning_rate": 1.49466575073079e-05, "loss": 0.0729, "step": 29142 }, { "epoch": 0.516109037619484, "grad_norm": 0.6500416994094849, "learning_rate": 1.4945797148237591e-05, "loss": 0.1036, "step": 29143 }, { "epoch": 0.5161267471565124, "grad_norm": 0.6391258835792542, "learning_rate": 1.4944936789345599e-05, "loss": 0.0644, "step": 29144 }, { "epoch": 0.5161444566935409, "grad_norm": 0.569729745388031, "learning_rate": 1.4944076430634758e-05, "loss": 0.0522, "step": 29145 }, { "epoch": 0.5161621662305693, "grad_norm": 0.5467687845230103, "learning_rate": 1.4943216072107905e-05, "loss": 0.0824, "step": 29146 }, { "epoch": 0.5161798757675977, "grad_norm": 0.6751968264579773, "learning_rate": 1.4942355713767859e-05, "loss": 0.1032, "step": 29147 }, { "epoch": 0.5161975853046262, "grad_norm": 0.6459317803382874, "learning_rate": 1.494149535561746e-05, "loss": 0.0897, "step": 29148 }, { "epoch": 0.5162152948416546, "grad_norm": 0.7589010000228882, "learning_rate": 1.4940634997659532e-05, "loss": 0.0528, "step": 29149 }, { "epoch": 0.516233004378683, "grad_norm": 0.5047550797462463, "learning_rate": 1.4939774639896916e-05, "loss": 0.0538, "step": 29150 }, { "epoch": 0.5162507139157114, "grad_norm": 1.0539865493774414, "learning_rate": 1.493891428233243e-05, "loss": 0.0752, "step": 29151 }, { "epoch": 0.5162684234527399, "grad_norm": 0.5439115166664124, "learning_rate": 1.4938053924968911e-05, "loss": 0.0911, "step": 29152 }, { "epoch": 0.5162861329897683, "grad_norm": 0.9186185002326965, "learning_rate": 1.4937193567809194e-05, "loss": 0.0941, "step": 29153 }, { "epoch": 0.5163038425267967, "grad_norm": 0.6195271015167236, "learning_rate": 1.4936333210856092e-05, "loss": 0.0691, "step": 29154 }, { "epoch": 0.5163215520638251, "grad_norm": 0.8012577891349792, "learning_rate": 1.4935472854112453e-05, "loss": 0.1062, "step": 29155 }, { "epoch": 0.5163392616008536, "grad_norm": 0.39370808005332947, "learning_rate": 1.4934612497581101e-05, "loss": 0.0647, "step": 29156 }, { "epoch": 0.516356971137882, "grad_norm": 0.5488874316215515, "learning_rate": 1.4933752141264875e-05, "loss": 0.0314, "step": 29157 }, { "epoch": 0.5163746806749104, "grad_norm": 0.8680866956710815, "learning_rate": 1.493289178516659e-05, "loss": 0.0834, "step": 29158 }, { "epoch": 0.5163923902119388, "grad_norm": 0.5780507922172546, "learning_rate": 1.4932031429289086e-05, "loss": 0.0686, "step": 29159 }, { "epoch": 0.5164100997489673, "grad_norm": 0.7139751315116882, "learning_rate": 1.4931171073635192e-05, "loss": 0.0601, "step": 29160 }, { "epoch": 0.5164278092859957, "grad_norm": 0.33082687854766846, "learning_rate": 1.4930310718207738e-05, "loss": 0.0543, "step": 29161 }, { "epoch": 0.5164455188230241, "grad_norm": 0.6726861000061035, "learning_rate": 1.4929450363009552e-05, "loss": 0.0469, "step": 29162 }, { "epoch": 0.5164632283600527, "grad_norm": 0.8497605323791504, "learning_rate": 1.4928590008043468e-05, "loss": 0.088, "step": 29163 }, { "epoch": 0.516480937897081, "grad_norm": 0.6109678745269775, "learning_rate": 1.492772965331232e-05, "loss": 0.0612, "step": 29164 }, { "epoch": 0.5164986474341094, "grad_norm": 0.6753832101821899, "learning_rate": 1.492686929881893e-05, "loss": 0.0895, "step": 29165 }, { "epoch": 0.5165163569711378, "grad_norm": 0.6919213533401489, "learning_rate": 1.4926008944566132e-05, "loss": 0.0644, "step": 29166 }, { "epoch": 0.5165340665081664, "grad_norm": 0.9984956383705139, "learning_rate": 1.492514859055676e-05, "loss": 0.0982, "step": 29167 }, { "epoch": 0.5165517760451948, "grad_norm": 1.1597000360488892, "learning_rate": 1.4924288236793642e-05, "loss": 0.0875, "step": 29168 }, { "epoch": 0.5165694855822232, "grad_norm": 0.6768776178359985, "learning_rate": 1.4923427883279605e-05, "loss": 0.0946, "step": 29169 }, { "epoch": 0.5165871951192516, "grad_norm": 0.5067356824874878, "learning_rate": 1.4922567530017483e-05, "loss": 0.0722, "step": 29170 }, { "epoch": 0.5166049046562801, "grad_norm": 0.9378129243850708, "learning_rate": 1.4921707177010113e-05, "loss": 0.0849, "step": 29171 }, { "epoch": 0.5166226141933085, "grad_norm": 0.506895899772644, "learning_rate": 1.4920846824260313e-05, "loss": 0.1066, "step": 29172 }, { "epoch": 0.5166403237303369, "grad_norm": 0.5936112999916077, "learning_rate": 1.4919986471770915e-05, "loss": 0.0586, "step": 29173 }, { "epoch": 0.5166580332673653, "grad_norm": 0.3908892571926117, "learning_rate": 1.4919126119544757e-05, "loss": 0.0531, "step": 29174 }, { "epoch": 0.5166757428043938, "grad_norm": 0.6935337781906128, "learning_rate": 1.4918265767584667e-05, "loss": 0.0757, "step": 29175 }, { "epoch": 0.5166934523414222, "grad_norm": 0.5673019886016846, "learning_rate": 1.4917405415893473e-05, "loss": 0.056, "step": 29176 }, { "epoch": 0.5167111618784506, "grad_norm": 0.47239673137664795, "learning_rate": 1.4916545064474005e-05, "loss": 0.0455, "step": 29177 }, { "epoch": 0.5167288714154791, "grad_norm": 0.6232794523239136, "learning_rate": 1.49156847133291e-05, "loss": 0.0512, "step": 29178 }, { "epoch": 0.5167465809525075, "grad_norm": 0.5029210448265076, "learning_rate": 1.491482436246158e-05, "loss": 0.0779, "step": 29179 }, { "epoch": 0.5167642904895359, "grad_norm": 0.8709738850593567, "learning_rate": 1.4913964011874278e-05, "loss": 0.092, "step": 29180 }, { "epoch": 0.5167820000265643, "grad_norm": 0.592693030834198, "learning_rate": 1.4913103661570028e-05, "loss": 0.068, "step": 29181 }, { "epoch": 0.5167997095635928, "grad_norm": 0.7856195569038391, "learning_rate": 1.4912243311551663e-05, "loss": 0.075, "step": 29182 }, { "epoch": 0.5168174191006212, "grad_norm": 0.6150431632995605, "learning_rate": 1.4911382961822e-05, "loss": 0.0855, "step": 29183 }, { "epoch": 0.5168351286376496, "grad_norm": 0.5879341959953308, "learning_rate": 1.4910522612383881e-05, "loss": 0.0576, "step": 29184 }, { "epoch": 0.516852838174678, "grad_norm": 0.7504130601882935, "learning_rate": 1.490966226324014e-05, "loss": 0.0841, "step": 29185 }, { "epoch": 0.5168705477117065, "grad_norm": 0.6366173624992371, "learning_rate": 1.4908801914393594e-05, "loss": 0.0787, "step": 29186 }, { "epoch": 0.5168882572487349, "grad_norm": 0.6806877255439758, "learning_rate": 1.4907941565847081e-05, "loss": 0.0977, "step": 29187 }, { "epoch": 0.5169059667857633, "grad_norm": 0.7295646667480469, "learning_rate": 1.490708121760343e-05, "loss": 0.07, "step": 29188 }, { "epoch": 0.5169236763227917, "grad_norm": 0.46304237842559814, "learning_rate": 1.4906220869665476e-05, "loss": 0.0854, "step": 29189 }, { "epoch": 0.5169413858598202, "grad_norm": 0.810792088508606, "learning_rate": 1.4905360522036043e-05, "loss": 0.0725, "step": 29190 }, { "epoch": 0.5169590953968486, "grad_norm": 0.5482211709022522, "learning_rate": 1.4904500174717965e-05, "loss": 0.0675, "step": 29191 }, { "epoch": 0.516976804933877, "grad_norm": 0.9192641377449036, "learning_rate": 1.4903639827714075e-05, "loss": 0.0619, "step": 29192 }, { "epoch": 0.5169945144709055, "grad_norm": 0.5229099988937378, "learning_rate": 1.4902779481027195e-05, "loss": 0.0731, "step": 29193 }, { "epoch": 0.5170122240079339, "grad_norm": 0.45286092162132263, "learning_rate": 1.4901919134660164e-05, "loss": 0.0583, "step": 29194 }, { "epoch": 0.5170299335449623, "grad_norm": 0.7799853086471558, "learning_rate": 1.4901058788615804e-05, "loss": 0.0485, "step": 29195 }, { "epoch": 0.5170476430819907, "grad_norm": 0.8140276670455933, "learning_rate": 1.4900198442896963e-05, "loss": 0.0969, "step": 29196 }, { "epoch": 0.5170653526190192, "grad_norm": 0.76115483045578, "learning_rate": 1.4899338097506447e-05, "loss": 0.0691, "step": 29197 }, { "epoch": 0.5170830621560476, "grad_norm": 1.2945101261138916, "learning_rate": 1.48984777524471e-05, "loss": 0.1024, "step": 29198 }, { "epoch": 0.517100771693076, "grad_norm": 0.3648708760738373, "learning_rate": 1.4897617407721758e-05, "loss": 0.0599, "step": 29199 }, { "epoch": 0.5171184812301044, "grad_norm": 0.6775011420249939, "learning_rate": 1.4896757063333238e-05, "loss": 0.078, "step": 29200 }, { "epoch": 0.5171361907671329, "grad_norm": 0.5109745264053345, "learning_rate": 1.4895896719284378e-05, "loss": 0.0654, "step": 29201 }, { "epoch": 0.5171539003041613, "grad_norm": 0.4267703592777252, "learning_rate": 1.4895036375578006e-05, "loss": 0.0518, "step": 29202 }, { "epoch": 0.5171716098411897, "grad_norm": 0.9448433518409729, "learning_rate": 1.4894176032216959e-05, "loss": 0.0728, "step": 29203 }, { "epoch": 0.5171893193782181, "grad_norm": 0.5713220834732056, "learning_rate": 1.4893315689204056e-05, "loss": 0.0862, "step": 29204 }, { "epoch": 0.5172070289152466, "grad_norm": 0.5295488834381104, "learning_rate": 1.4892455346542136e-05, "loss": 0.0847, "step": 29205 }, { "epoch": 0.517224738452275, "grad_norm": 0.6375291347503662, "learning_rate": 1.4891595004234026e-05, "loss": 0.0602, "step": 29206 }, { "epoch": 0.5172424479893034, "grad_norm": 0.9868224263191223, "learning_rate": 1.4890734662282562e-05, "loss": 0.0878, "step": 29207 }, { "epoch": 0.5172601575263319, "grad_norm": 0.7359581589698792, "learning_rate": 1.4889874320690565e-05, "loss": 0.0668, "step": 29208 }, { "epoch": 0.5172778670633603, "grad_norm": 0.6114475727081299, "learning_rate": 1.488901397946087e-05, "loss": 0.092, "step": 29209 }, { "epoch": 0.5172955766003887, "grad_norm": 0.759680986404419, "learning_rate": 1.4888153638596315e-05, "loss": 0.0469, "step": 29210 }, { "epoch": 0.5173132861374171, "grad_norm": 0.5258932709693909, "learning_rate": 1.4887293298099717e-05, "loss": 0.069, "step": 29211 }, { "epoch": 0.5173309956744456, "grad_norm": 0.670562744140625, "learning_rate": 1.4886432957973909e-05, "loss": 0.094, "step": 29212 }, { "epoch": 0.517348705211474, "grad_norm": 0.8207903504371643, "learning_rate": 1.4885572618221728e-05, "loss": 0.064, "step": 29213 }, { "epoch": 0.5173664147485024, "grad_norm": 0.5341303944587708, "learning_rate": 1.4884712278846009e-05, "loss": 0.0668, "step": 29214 }, { "epoch": 0.5173841242855308, "grad_norm": 0.6155224442481995, "learning_rate": 1.4883851939849568e-05, "loss": 0.0868, "step": 29215 }, { "epoch": 0.5174018338225593, "grad_norm": 0.868159830570221, "learning_rate": 1.4882991601235243e-05, "loss": 0.0659, "step": 29216 }, { "epoch": 0.5174195433595877, "grad_norm": 0.7910193800926208, "learning_rate": 1.4882131263005867e-05, "loss": 0.093, "step": 29217 }, { "epoch": 0.5174372528966161, "grad_norm": 0.5668127536773682, "learning_rate": 1.4881270925164261e-05, "loss": 0.1018, "step": 29218 }, { "epoch": 0.5174549624336445, "grad_norm": 1.4364235401153564, "learning_rate": 1.4880410587713266e-05, "loss": 0.0554, "step": 29219 }, { "epoch": 0.517472671970673, "grad_norm": 0.5762299299240112, "learning_rate": 1.4879550250655705e-05, "loss": 0.1171, "step": 29220 }, { "epoch": 0.5174903815077014, "grad_norm": 0.6129853129386902, "learning_rate": 1.4878689913994416e-05, "loss": 0.0576, "step": 29221 }, { "epoch": 0.5175080910447298, "grad_norm": 0.5719045400619507, "learning_rate": 1.4877829577732221e-05, "loss": 0.0602, "step": 29222 }, { "epoch": 0.5175258005817583, "grad_norm": 0.5241268277168274, "learning_rate": 1.4876969241871956e-05, "loss": 0.0488, "step": 29223 }, { "epoch": 0.5175435101187867, "grad_norm": 0.560636579990387, "learning_rate": 1.4876108906416454e-05, "loss": 0.0499, "step": 29224 }, { "epoch": 0.5175612196558151, "grad_norm": 0.5710617899894714, "learning_rate": 1.4875248571368535e-05, "loss": 0.0623, "step": 29225 }, { "epoch": 0.5175789291928435, "grad_norm": 0.6392486095428467, "learning_rate": 1.4874388236731035e-05, "loss": 0.0769, "step": 29226 }, { "epoch": 0.517596638729872, "grad_norm": 0.5337508916854858, "learning_rate": 1.4873527902506786e-05, "loss": 0.0838, "step": 29227 }, { "epoch": 0.5176143482669004, "grad_norm": 0.9157063364982605, "learning_rate": 1.4872667568698623e-05, "loss": 0.0659, "step": 29228 }, { "epoch": 0.5176320578039288, "grad_norm": 0.9359449148178101, "learning_rate": 1.4871807235309366e-05, "loss": 0.0614, "step": 29229 }, { "epoch": 0.5176497673409572, "grad_norm": 0.573663055896759, "learning_rate": 1.487094690234185e-05, "loss": 0.073, "step": 29230 }, { "epoch": 0.5176674768779858, "grad_norm": 0.7983813285827637, "learning_rate": 1.487008656979891e-05, "loss": 0.0543, "step": 29231 }, { "epoch": 0.5176851864150142, "grad_norm": 0.7046640515327454, "learning_rate": 1.4869226237683365e-05, "loss": 0.0632, "step": 29232 }, { "epoch": 0.5177028959520426, "grad_norm": 0.6304296851158142, "learning_rate": 1.4868365905998056e-05, "loss": 0.1074, "step": 29233 }, { "epoch": 0.517720605489071, "grad_norm": 0.5230650305747986, "learning_rate": 1.4867505574745809e-05, "loss": 0.0477, "step": 29234 }, { "epoch": 0.5177383150260995, "grad_norm": 0.6769535541534424, "learning_rate": 1.4866645243929459e-05, "loss": 0.0814, "step": 29235 }, { "epoch": 0.5177560245631279, "grad_norm": 0.7846522927284241, "learning_rate": 1.4865784913551827e-05, "loss": 0.0772, "step": 29236 }, { "epoch": 0.5177737341001563, "grad_norm": 0.7090252041816711, "learning_rate": 1.4864924583615751e-05, "loss": 0.0535, "step": 29237 }, { "epoch": 0.5177914436371848, "grad_norm": 0.5675914883613586, "learning_rate": 1.486406425412406e-05, "loss": 0.0785, "step": 29238 }, { "epoch": 0.5178091531742132, "grad_norm": 0.8602662086486816, "learning_rate": 1.486320392507959e-05, "loss": 0.0646, "step": 29239 }, { "epoch": 0.5178268627112416, "grad_norm": 0.6354915499687195, "learning_rate": 1.4862343596485158e-05, "loss": 0.0641, "step": 29240 }, { "epoch": 0.51784457224827, "grad_norm": 0.2098589390516281, "learning_rate": 1.4861483268343599e-05, "loss": 0.0348, "step": 29241 }, { "epoch": 0.5178622817852985, "grad_norm": 0.784602165222168, "learning_rate": 1.4860622940657755e-05, "loss": 0.0797, "step": 29242 }, { "epoch": 0.5178799913223269, "grad_norm": 0.9072402715682983, "learning_rate": 1.4859762613430441e-05, "loss": 0.0824, "step": 29243 }, { "epoch": 0.5178977008593553, "grad_norm": 0.8212347626686096, "learning_rate": 1.4858902286664494e-05, "loss": 0.0518, "step": 29244 }, { "epoch": 0.5179154103963837, "grad_norm": 0.4068233370780945, "learning_rate": 1.4858041960362745e-05, "loss": 0.0554, "step": 29245 }, { "epoch": 0.5179331199334122, "grad_norm": 0.7493931651115417, "learning_rate": 1.4857181634528026e-05, "loss": 0.0986, "step": 29246 }, { "epoch": 0.5179508294704406, "grad_norm": 0.6319104433059692, "learning_rate": 1.4856321309163163e-05, "loss": 0.0728, "step": 29247 }, { "epoch": 0.517968539007469, "grad_norm": 0.7164536118507385, "learning_rate": 1.4855460984270986e-05, "loss": 0.0506, "step": 29248 }, { "epoch": 0.5179862485444974, "grad_norm": 0.4217078685760498, "learning_rate": 1.4854600659854333e-05, "loss": 0.0528, "step": 29249 }, { "epoch": 0.5180039580815259, "grad_norm": 0.779815673828125, "learning_rate": 1.4853740335916026e-05, "loss": 0.0554, "step": 29250 }, { "epoch": 0.5180216676185543, "grad_norm": 0.7418398261070251, "learning_rate": 1.4852880012458898e-05, "loss": 0.0995, "step": 29251 }, { "epoch": 0.5180393771555827, "grad_norm": 0.6205316185951233, "learning_rate": 1.4852019689485778e-05, "loss": 0.0751, "step": 29252 }, { "epoch": 0.5180570866926112, "grad_norm": 0.9239130020141602, "learning_rate": 1.4851159366999507e-05, "loss": 0.0826, "step": 29253 }, { "epoch": 0.5180747962296396, "grad_norm": 0.5737340450286865, "learning_rate": 1.4850299045002899e-05, "loss": 0.054, "step": 29254 }, { "epoch": 0.518092505766668, "grad_norm": 0.619475781917572, "learning_rate": 1.4849438723498789e-05, "loss": 0.0633, "step": 29255 }, { "epoch": 0.5181102153036964, "grad_norm": 0.7470846176147461, "learning_rate": 1.4848578402490021e-05, "loss": 0.0637, "step": 29256 }, { "epoch": 0.5181279248407249, "grad_norm": 0.6073050498962402, "learning_rate": 1.4847718081979407e-05, "loss": 0.0616, "step": 29257 }, { "epoch": 0.5181456343777533, "grad_norm": 0.7869625687599182, "learning_rate": 1.4846857761969786e-05, "loss": 0.0563, "step": 29258 }, { "epoch": 0.5181633439147817, "grad_norm": 0.771869957447052, "learning_rate": 1.4845997442463987e-05, "loss": 0.0454, "step": 29259 }, { "epoch": 0.5181810534518101, "grad_norm": 0.6150224804878235, "learning_rate": 1.4845137123464845e-05, "loss": 0.0954, "step": 29260 }, { "epoch": 0.5181987629888386, "grad_norm": 0.33398422598838806, "learning_rate": 1.4844276804975182e-05, "loss": 0.0832, "step": 29261 }, { "epoch": 0.518216472525867, "grad_norm": 0.7234766483306885, "learning_rate": 1.4843416486997832e-05, "loss": 0.0983, "step": 29262 }, { "epoch": 0.5182341820628954, "grad_norm": 0.7170031666755676, "learning_rate": 1.484255616953563e-05, "loss": 0.0938, "step": 29263 }, { "epoch": 0.5182518915999238, "grad_norm": 0.41526466608047485, "learning_rate": 1.48416958525914e-05, "loss": 0.0439, "step": 29264 }, { "epoch": 0.5182696011369523, "grad_norm": 0.18096347153186798, "learning_rate": 1.4840835536167974e-05, "loss": 0.0621, "step": 29265 }, { "epoch": 0.5182873106739807, "grad_norm": 0.4951270818710327, "learning_rate": 1.4839975220268182e-05, "loss": 0.0873, "step": 29266 }, { "epoch": 0.5183050202110091, "grad_norm": 0.8983641862869263, "learning_rate": 1.4839114904894862e-05, "loss": 0.0919, "step": 29267 }, { "epoch": 0.5183227297480376, "grad_norm": 0.5900784730911255, "learning_rate": 1.4838254590050833e-05, "loss": 0.0806, "step": 29268 }, { "epoch": 0.518340439285066, "grad_norm": 0.3982273042201996, "learning_rate": 1.4837394275738928e-05, "loss": 0.0967, "step": 29269 }, { "epoch": 0.5183581488220944, "grad_norm": 0.6662191152572632, "learning_rate": 1.4836533961961982e-05, "loss": 0.0714, "step": 29270 }, { "epoch": 0.5183758583591228, "grad_norm": 0.6339689493179321, "learning_rate": 1.4835673648722821e-05, "loss": 0.0794, "step": 29271 }, { "epoch": 0.5183935678961513, "grad_norm": 0.5916558504104614, "learning_rate": 1.4834813336024277e-05, "loss": 0.0836, "step": 29272 }, { "epoch": 0.5184112774331797, "grad_norm": 0.5800533294677734, "learning_rate": 1.4833953023869182e-05, "loss": 0.036, "step": 29273 }, { "epoch": 0.5184289869702081, "grad_norm": 0.4215655028820038, "learning_rate": 1.4833092712260366e-05, "loss": 0.1075, "step": 29274 }, { "epoch": 0.5184466965072365, "grad_norm": 0.5384124517440796, "learning_rate": 1.4832232401200655e-05, "loss": 0.0753, "step": 29275 }, { "epoch": 0.518464406044265, "grad_norm": 0.4741443991661072, "learning_rate": 1.4831372090692882e-05, "loss": 0.0713, "step": 29276 }, { "epoch": 0.5184821155812934, "grad_norm": 0.6793406009674072, "learning_rate": 1.4830511780739878e-05, "loss": 0.0592, "step": 29277 }, { "epoch": 0.5184998251183218, "grad_norm": 0.713777482509613, "learning_rate": 1.4829651471344476e-05, "loss": 0.0972, "step": 29278 }, { "epoch": 0.5185175346553502, "grad_norm": 0.6464924216270447, "learning_rate": 1.4828791162509501e-05, "loss": 0.0963, "step": 29279 }, { "epoch": 0.5185352441923787, "grad_norm": 0.4653870761394501, "learning_rate": 1.4827930854237787e-05, "loss": 0.0452, "step": 29280 }, { "epoch": 0.5185529537294071, "grad_norm": 0.7823979258537292, "learning_rate": 1.4827070546532167e-05, "loss": 0.0928, "step": 29281 }, { "epoch": 0.5185706632664355, "grad_norm": 0.27365314960479736, "learning_rate": 1.4826210239395463e-05, "loss": 0.0484, "step": 29282 }, { "epoch": 0.518588372803464, "grad_norm": 0.6890760660171509, "learning_rate": 1.482534993283051e-05, "loss": 0.087, "step": 29283 }, { "epoch": 0.5186060823404924, "grad_norm": 0.9242328405380249, "learning_rate": 1.4824489626840133e-05, "loss": 0.0805, "step": 29284 }, { "epoch": 0.5186237918775208, "grad_norm": 1.2650309801101685, "learning_rate": 1.4823629321427176e-05, "loss": 0.08, "step": 29285 }, { "epoch": 0.5186415014145492, "grad_norm": 0.4953298568725586, "learning_rate": 1.4822769016594458e-05, "loss": 0.0697, "step": 29286 }, { "epoch": 0.5186592109515777, "grad_norm": 0.6606470942497253, "learning_rate": 1.482190871234481e-05, "loss": 0.0778, "step": 29287 }, { "epoch": 0.5186769204886061, "grad_norm": 0.8508689999580383, "learning_rate": 1.4821048408681066e-05, "loss": 0.0793, "step": 29288 }, { "epoch": 0.5186946300256345, "grad_norm": 0.7005510926246643, "learning_rate": 1.4820188105606053e-05, "loss": 0.0578, "step": 29289 }, { "epoch": 0.5187123395626629, "grad_norm": 0.83244788646698, "learning_rate": 1.4819327803122603e-05, "loss": 0.0627, "step": 29290 }, { "epoch": 0.5187300490996914, "grad_norm": 0.8464810252189636, "learning_rate": 1.4818467501233548e-05, "loss": 0.0573, "step": 29291 }, { "epoch": 0.5187477586367198, "grad_norm": 0.5531803369522095, "learning_rate": 1.4817607199941717e-05, "loss": 0.043, "step": 29292 }, { "epoch": 0.5187654681737482, "grad_norm": 0.49200981855392456, "learning_rate": 1.4816746899249937e-05, "loss": 0.0724, "step": 29293 }, { "epoch": 0.5187831777107766, "grad_norm": 0.7276808023452759, "learning_rate": 1.4815886599161042e-05, "loss": 0.0825, "step": 29294 }, { "epoch": 0.5188008872478052, "grad_norm": 0.9999276995658875, "learning_rate": 1.4815026299677867e-05, "loss": 0.0735, "step": 29295 }, { "epoch": 0.5188185967848336, "grad_norm": 0.5992675423622131, "learning_rate": 1.4814166000803231e-05, "loss": 0.0583, "step": 29296 }, { "epoch": 0.518836306321862, "grad_norm": 0.6899375915527344, "learning_rate": 1.4813305702539971e-05, "loss": 0.133, "step": 29297 }, { "epoch": 0.5188540158588905, "grad_norm": 0.7121132612228394, "learning_rate": 1.4812445404890916e-05, "loss": 0.0597, "step": 29298 }, { "epoch": 0.5188717253959189, "grad_norm": 0.5494518876075745, "learning_rate": 1.4811585107858898e-05, "loss": 0.0885, "step": 29299 }, { "epoch": 0.5188894349329473, "grad_norm": 0.8508530259132385, "learning_rate": 1.4810724811446744e-05, "loss": 0.0876, "step": 29300 }, { "epoch": 0.5189071444699757, "grad_norm": 0.5412651300430298, "learning_rate": 1.4809864515657287e-05, "loss": 0.0587, "step": 29301 }, { "epoch": 0.5189248540070042, "grad_norm": 0.6039538979530334, "learning_rate": 1.4809004220493358e-05, "loss": 0.0994, "step": 29302 }, { "epoch": 0.5189425635440326, "grad_norm": 0.6628490090370178, "learning_rate": 1.4808143925957784e-05, "loss": 0.0686, "step": 29303 }, { "epoch": 0.518960273081061, "grad_norm": 0.6289207339286804, "learning_rate": 1.4807283632053397e-05, "loss": 0.0711, "step": 29304 }, { "epoch": 0.5189779826180894, "grad_norm": 0.5415955781936646, "learning_rate": 1.4806423338783027e-05, "loss": 0.0743, "step": 29305 }, { "epoch": 0.5189956921551179, "grad_norm": 0.4188022017478943, "learning_rate": 1.480556304614951e-05, "loss": 0.0591, "step": 29306 }, { "epoch": 0.5190134016921463, "grad_norm": 1.0475131273269653, "learning_rate": 1.4804702754155664e-05, "loss": 0.0801, "step": 29307 }, { "epoch": 0.5190311112291747, "grad_norm": 0.5800026059150696, "learning_rate": 1.4803842462804327e-05, "loss": 0.045, "step": 29308 }, { "epoch": 0.5190488207662031, "grad_norm": 0.6210284233093262, "learning_rate": 1.480298217209833e-05, "loss": 0.0571, "step": 29309 }, { "epoch": 0.5190665303032316, "grad_norm": 0.9786893725395203, "learning_rate": 1.4802121882040506e-05, "loss": 0.0949, "step": 29310 }, { "epoch": 0.51908423984026, "grad_norm": 0.6812525987625122, "learning_rate": 1.4801261592633676e-05, "loss": 0.1069, "step": 29311 }, { "epoch": 0.5191019493772884, "grad_norm": 0.6599486470222473, "learning_rate": 1.4800401303880675e-05, "loss": 0.085, "step": 29312 }, { "epoch": 0.5191196589143169, "grad_norm": 1.0283582210540771, "learning_rate": 1.4799541015784337e-05, "loss": 0.0579, "step": 29313 }, { "epoch": 0.5191373684513453, "grad_norm": 1.0743762254714966, "learning_rate": 1.4798680728347485e-05, "loss": 0.0821, "step": 29314 }, { "epoch": 0.5191550779883737, "grad_norm": 0.7917749285697937, "learning_rate": 1.4797820441572953e-05, "loss": 0.0702, "step": 29315 }, { "epoch": 0.5191727875254021, "grad_norm": 0.4818962812423706, "learning_rate": 1.4796960155463573e-05, "loss": 0.0668, "step": 29316 }, { "epoch": 0.5191904970624306, "grad_norm": 0.5415278077125549, "learning_rate": 1.4796099870022175e-05, "loss": 0.0437, "step": 29317 }, { "epoch": 0.519208206599459, "grad_norm": 0.7320986986160278, "learning_rate": 1.4795239585251586e-05, "loss": 0.0977, "step": 29318 }, { "epoch": 0.5192259161364874, "grad_norm": 0.5621667504310608, "learning_rate": 1.4794379301154636e-05, "loss": 0.0656, "step": 29319 }, { "epoch": 0.5192436256735158, "grad_norm": 0.9073353409767151, "learning_rate": 1.4793519017734166e-05, "loss": 0.0898, "step": 29320 }, { "epoch": 0.5192613352105443, "grad_norm": 0.7118582129478455, "learning_rate": 1.479265873499299e-05, "loss": 0.0919, "step": 29321 }, { "epoch": 0.5192790447475727, "grad_norm": 0.8026415705680847, "learning_rate": 1.4791798452933942e-05, "loss": 0.0891, "step": 29322 }, { "epoch": 0.5192967542846011, "grad_norm": 0.5479809045791626, "learning_rate": 1.4790938171559859e-05, "loss": 0.0625, "step": 29323 }, { "epoch": 0.5193144638216295, "grad_norm": 0.49266326427459717, "learning_rate": 1.4790077890873577e-05, "loss": 0.0699, "step": 29324 }, { "epoch": 0.519332173358658, "grad_norm": 0.8018717765808105, "learning_rate": 1.4789217610877909e-05, "loss": 0.0728, "step": 29325 }, { "epoch": 0.5193498828956864, "grad_norm": 0.6034086346626282, "learning_rate": 1.4788357331575694e-05, "loss": 0.0569, "step": 29326 }, { "epoch": 0.5193675924327148, "grad_norm": 0.4338582158088684, "learning_rate": 1.4787497052969765e-05, "loss": 0.0588, "step": 29327 }, { "epoch": 0.5193853019697433, "grad_norm": 0.6791536211967468, "learning_rate": 1.4786636775062948e-05, "loss": 0.0784, "step": 29328 }, { "epoch": 0.5194030115067717, "grad_norm": 0.5222346186637878, "learning_rate": 1.4785776497858073e-05, "loss": 0.0754, "step": 29329 }, { "epoch": 0.5194207210438001, "grad_norm": 0.701407790184021, "learning_rate": 1.4784916221357971e-05, "loss": 0.0569, "step": 29330 }, { "epoch": 0.5194384305808285, "grad_norm": 0.605597972869873, "learning_rate": 1.4784055945565478e-05, "loss": 0.0753, "step": 29331 }, { "epoch": 0.519456140117857, "grad_norm": 0.7879331111907959, "learning_rate": 1.4783195670483414e-05, "loss": 0.0717, "step": 29332 }, { "epoch": 0.5194738496548854, "grad_norm": 0.5175342559814453, "learning_rate": 1.4782335396114614e-05, "loss": 0.0584, "step": 29333 }, { "epoch": 0.5194915591919138, "grad_norm": 0.6305782198905945, "learning_rate": 1.4781475122461917e-05, "loss": 0.0542, "step": 29334 }, { "epoch": 0.5195092687289422, "grad_norm": 0.2705047130584717, "learning_rate": 1.4780614849528137e-05, "loss": 0.0446, "step": 29335 }, { "epoch": 0.5195269782659707, "grad_norm": 0.6864007115364075, "learning_rate": 1.477975457731611e-05, "loss": 0.0692, "step": 29336 }, { "epoch": 0.5195446878029991, "grad_norm": 0.4281965494155884, "learning_rate": 1.4778894305828668e-05, "loss": 0.073, "step": 29337 }, { "epoch": 0.5195623973400275, "grad_norm": 0.5233429074287415, "learning_rate": 1.4778034035068651e-05, "loss": 0.0439, "step": 29338 }, { "epoch": 0.5195801068770559, "grad_norm": 0.7049299478530884, "learning_rate": 1.4777173765038873e-05, "loss": 0.0883, "step": 29339 }, { "epoch": 0.5195978164140844, "grad_norm": 0.7068848013877869, "learning_rate": 1.4776313495742168e-05, "loss": 0.0797, "step": 29340 }, { "epoch": 0.5196155259511128, "grad_norm": 0.6549633741378784, "learning_rate": 1.4775453227181373e-05, "loss": 0.0808, "step": 29341 }, { "epoch": 0.5196332354881412, "grad_norm": 0.5814151167869568, "learning_rate": 1.4774592959359315e-05, "loss": 0.068, "step": 29342 }, { "epoch": 0.5196509450251697, "grad_norm": 0.9527032375335693, "learning_rate": 1.4773732692278821e-05, "loss": 0.0806, "step": 29343 }, { "epoch": 0.5196686545621981, "grad_norm": 0.9351404905319214, "learning_rate": 1.4772872425942722e-05, "loss": 0.1111, "step": 29344 }, { "epoch": 0.5196863640992265, "grad_norm": 0.6352638602256775, "learning_rate": 1.4772012160353855e-05, "loss": 0.0851, "step": 29345 }, { "epoch": 0.5197040736362549, "grad_norm": 0.8857734203338623, "learning_rate": 1.4771151895515039e-05, "loss": 0.0433, "step": 29346 }, { "epoch": 0.5197217831732834, "grad_norm": 0.28870561718940735, "learning_rate": 1.4770291631429113e-05, "loss": 0.0785, "step": 29347 }, { "epoch": 0.5197394927103118, "grad_norm": 0.48035484552383423, "learning_rate": 1.4769431368098904e-05, "loss": 0.0499, "step": 29348 }, { "epoch": 0.5197572022473402, "grad_norm": 0.4834539592266083, "learning_rate": 1.4768571105527248e-05, "loss": 0.0599, "step": 29349 }, { "epoch": 0.5197749117843686, "grad_norm": 0.6272562742233276, "learning_rate": 1.4767710843716964e-05, "loss": 0.0476, "step": 29350 }, { "epoch": 0.5197926213213971, "grad_norm": 0.5523353219032288, "learning_rate": 1.4766850582670884e-05, "loss": 0.0531, "step": 29351 }, { "epoch": 0.5198103308584255, "grad_norm": 0.6252676844596863, "learning_rate": 1.476599032239185e-05, "loss": 0.0751, "step": 29352 }, { "epoch": 0.5198280403954539, "grad_norm": 0.6577476263046265, "learning_rate": 1.476513006288268e-05, "loss": 0.0816, "step": 29353 }, { "epoch": 0.5198457499324823, "grad_norm": 0.7193920612335205, "learning_rate": 1.4764269804146208e-05, "loss": 0.0957, "step": 29354 }, { "epoch": 0.5198634594695108, "grad_norm": 0.6863700151443481, "learning_rate": 1.4763409546185264e-05, "loss": 0.0876, "step": 29355 }, { "epoch": 0.5198811690065392, "grad_norm": 0.8426961898803711, "learning_rate": 1.476254928900268e-05, "loss": 0.071, "step": 29356 }, { "epoch": 0.5198988785435676, "grad_norm": 0.26297903060913086, "learning_rate": 1.4761689032601285e-05, "loss": 0.0478, "step": 29357 }, { "epoch": 0.5199165880805962, "grad_norm": 0.507206916809082, "learning_rate": 1.4760828776983908e-05, "loss": 0.0709, "step": 29358 }, { "epoch": 0.5199342976176246, "grad_norm": 0.608392596244812, "learning_rate": 1.4759968522153383e-05, "loss": 0.0909, "step": 29359 }, { "epoch": 0.519952007154653, "grad_norm": 0.7583944797515869, "learning_rate": 1.4759108268112534e-05, "loss": 0.0873, "step": 29360 }, { "epoch": 0.5199697166916813, "grad_norm": 0.6369591951370239, "learning_rate": 1.4758248014864195e-05, "loss": 0.0544, "step": 29361 }, { "epoch": 0.5199874262287099, "grad_norm": 0.658193051815033, "learning_rate": 1.4757387762411194e-05, "loss": 0.088, "step": 29362 }, { "epoch": 0.5200051357657383, "grad_norm": 0.5924336910247803, "learning_rate": 1.475652751075637e-05, "loss": 0.0631, "step": 29363 }, { "epoch": 0.5200228453027667, "grad_norm": 0.43968138098716736, "learning_rate": 1.475566725990254e-05, "loss": 0.0442, "step": 29364 }, { "epoch": 0.520040554839795, "grad_norm": 0.871653139591217, "learning_rate": 1.4754807009852535e-05, "loss": 0.0659, "step": 29365 }, { "epoch": 0.5200582643768236, "grad_norm": 0.3485250771045685, "learning_rate": 1.4753946760609204e-05, "loss": 0.044, "step": 29366 }, { "epoch": 0.520075973913852, "grad_norm": 0.5428621768951416, "learning_rate": 1.4753086512175352e-05, "loss": 0.0586, "step": 29367 }, { "epoch": 0.5200936834508804, "grad_norm": 0.7533922791481018, "learning_rate": 1.4752226264553824e-05, "loss": 0.077, "step": 29368 }, { "epoch": 0.5201113929879088, "grad_norm": 0.48409146070480347, "learning_rate": 1.4751366017747445e-05, "loss": 0.0535, "step": 29369 }, { "epoch": 0.5201291025249373, "grad_norm": 0.9134548902511597, "learning_rate": 1.475050577175905e-05, "loss": 0.0989, "step": 29370 }, { "epoch": 0.5201468120619657, "grad_norm": 0.7474781274795532, "learning_rate": 1.4749645526591462e-05, "loss": 0.0706, "step": 29371 }, { "epoch": 0.5201645215989941, "grad_norm": 0.745389461517334, "learning_rate": 1.4748785282247515e-05, "loss": 0.0523, "step": 29372 }, { "epoch": 0.5201822311360226, "grad_norm": 0.9112396240234375, "learning_rate": 1.4747925038730043e-05, "loss": 0.0791, "step": 29373 }, { "epoch": 0.520199940673051, "grad_norm": 0.3869912624359131, "learning_rate": 1.4747064796041868e-05, "loss": 0.0567, "step": 29374 }, { "epoch": 0.5202176502100794, "grad_norm": 0.9208511710166931, "learning_rate": 1.4746204554185824e-05, "loss": 0.0718, "step": 29375 }, { "epoch": 0.5202353597471078, "grad_norm": 0.5337092876434326, "learning_rate": 1.4745344313164743e-05, "loss": 0.0699, "step": 29376 }, { "epoch": 0.5202530692841363, "grad_norm": 0.40760889649391174, "learning_rate": 1.474448407298146e-05, "loss": 0.0529, "step": 29377 }, { "epoch": 0.5202707788211647, "grad_norm": 0.8095003962516785, "learning_rate": 1.474362383363879e-05, "loss": 0.0588, "step": 29378 }, { "epoch": 0.5202884883581931, "grad_norm": 0.7119308710098267, "learning_rate": 1.4742763595139573e-05, "loss": 0.0831, "step": 29379 }, { "epoch": 0.5203061978952215, "grad_norm": 0.39080679416656494, "learning_rate": 1.4741903357486634e-05, "loss": 0.1023, "step": 29380 }, { "epoch": 0.52032390743225, "grad_norm": 0.7324613928794861, "learning_rate": 1.4741043120682818e-05, "loss": 0.0687, "step": 29381 }, { "epoch": 0.5203416169692784, "grad_norm": 0.30752673745155334, "learning_rate": 1.4740182884730935e-05, "loss": 0.1049, "step": 29382 }, { "epoch": 0.5203593265063068, "grad_norm": 0.6451138257980347, "learning_rate": 1.4739322649633825e-05, "loss": 0.0672, "step": 29383 }, { "epoch": 0.5203770360433352, "grad_norm": 0.7723154425621033, "learning_rate": 1.4738462415394321e-05, "loss": 0.0741, "step": 29384 }, { "epoch": 0.5203947455803637, "grad_norm": 0.6694228649139404, "learning_rate": 1.4737602182015246e-05, "loss": 0.0933, "step": 29385 }, { "epoch": 0.5204124551173921, "grad_norm": 0.7510311603546143, "learning_rate": 1.4736741949499432e-05, "loss": 0.057, "step": 29386 }, { "epoch": 0.5204301646544205, "grad_norm": 0.6532682776451111, "learning_rate": 1.4735881717849712e-05, "loss": 0.0766, "step": 29387 }, { "epoch": 0.520447874191449, "grad_norm": 0.48790496587753296, "learning_rate": 1.4735021487068916e-05, "loss": 0.0521, "step": 29388 }, { "epoch": 0.5204655837284774, "grad_norm": 0.6275405883789062, "learning_rate": 1.473416125715987e-05, "loss": 0.0624, "step": 29389 }, { "epoch": 0.5204832932655058, "grad_norm": 0.8188108801841736, "learning_rate": 1.4733301028125407e-05, "loss": 0.0895, "step": 29390 }, { "epoch": 0.5205010028025342, "grad_norm": 0.4976118505001068, "learning_rate": 1.4732440799968364e-05, "loss": 0.0724, "step": 29391 }, { "epoch": 0.5205187123395627, "grad_norm": 0.7054935693740845, "learning_rate": 1.4731580572691556e-05, "loss": 0.0813, "step": 29392 }, { "epoch": 0.5205364218765911, "grad_norm": 0.6199583411216736, "learning_rate": 1.473072034629782e-05, "loss": 0.0803, "step": 29393 }, { "epoch": 0.5205541314136195, "grad_norm": 0.41041943430900574, "learning_rate": 1.4729860120789985e-05, "loss": 0.0838, "step": 29394 }, { "epoch": 0.5205718409506479, "grad_norm": 0.3636219799518585, "learning_rate": 1.4728999896170892e-05, "loss": 0.0707, "step": 29395 }, { "epoch": 0.5205895504876764, "grad_norm": 0.6860989928245544, "learning_rate": 1.4728139672443355e-05, "loss": 0.0659, "step": 29396 }, { "epoch": 0.5206072600247048, "grad_norm": 0.7283385396003723, "learning_rate": 1.4727279449610213e-05, "loss": 0.0932, "step": 29397 }, { "epoch": 0.5206249695617332, "grad_norm": 0.428340882062912, "learning_rate": 1.4726419227674295e-05, "loss": 0.0389, "step": 29398 }, { "epoch": 0.5206426790987616, "grad_norm": 0.9360154271125793, "learning_rate": 1.4725559006638427e-05, "loss": 0.0899, "step": 29399 }, { "epoch": 0.5206603886357901, "grad_norm": 0.6772070527076721, "learning_rate": 1.4724698786505443e-05, "loss": 0.1025, "step": 29400 }, { "epoch": 0.5206780981728185, "grad_norm": 0.39470788836479187, "learning_rate": 1.4723838567278172e-05, "loss": 0.0557, "step": 29401 }, { "epoch": 0.5206958077098469, "grad_norm": 0.7126390933990479, "learning_rate": 1.4722978348959447e-05, "loss": 0.0699, "step": 29402 }, { "epoch": 0.5207135172468754, "grad_norm": 0.7618135809898376, "learning_rate": 1.472211813155209e-05, "loss": 0.0594, "step": 29403 }, { "epoch": 0.5207312267839038, "grad_norm": 0.6675541996955872, "learning_rate": 1.472125791505894e-05, "loss": 0.0898, "step": 29404 }, { "epoch": 0.5207489363209322, "grad_norm": 1.0726829767227173, "learning_rate": 1.4720397699482829e-05, "loss": 0.1079, "step": 29405 }, { "epoch": 0.5207666458579606, "grad_norm": 0.565410852432251, "learning_rate": 1.4719537484826575e-05, "loss": 0.0675, "step": 29406 }, { "epoch": 0.5207843553949891, "grad_norm": 0.6010382771492004, "learning_rate": 1.4718677271093012e-05, "loss": 0.0478, "step": 29407 }, { "epoch": 0.5208020649320175, "grad_norm": 0.8069906830787659, "learning_rate": 1.4717817058284976e-05, "loss": 0.0668, "step": 29408 }, { "epoch": 0.5208197744690459, "grad_norm": 0.5385231375694275, "learning_rate": 1.4716956846405292e-05, "loss": 0.0641, "step": 29409 }, { "epoch": 0.5208374840060743, "grad_norm": 0.4813116192817688, "learning_rate": 1.471609663545679e-05, "loss": 0.0544, "step": 29410 }, { "epoch": 0.5208551935431028, "grad_norm": 0.8186599612236023, "learning_rate": 1.4715236425442302e-05, "loss": 0.0614, "step": 29411 }, { "epoch": 0.5208729030801312, "grad_norm": 0.9014776945114136, "learning_rate": 1.4714376216364656e-05, "loss": 0.0584, "step": 29412 }, { "epoch": 0.5208906126171596, "grad_norm": 0.8548853993415833, "learning_rate": 1.471351600822669e-05, "loss": 0.0774, "step": 29413 }, { "epoch": 0.520908322154188, "grad_norm": 0.5542030334472656, "learning_rate": 1.471265580103122e-05, "loss": 0.1006, "step": 29414 }, { "epoch": 0.5209260316912165, "grad_norm": 0.7429592609405518, "learning_rate": 1.4711795594781086e-05, "loss": 0.0876, "step": 29415 }, { "epoch": 0.5209437412282449, "grad_norm": 0.4927867650985718, "learning_rate": 1.4710935389479121e-05, "loss": 0.0587, "step": 29416 }, { "epoch": 0.5209614507652733, "grad_norm": 0.6066619157791138, "learning_rate": 1.471007518512814e-05, "loss": 0.0631, "step": 29417 }, { "epoch": 0.5209791603023018, "grad_norm": 0.648622453212738, "learning_rate": 1.4709214981730984e-05, "loss": 0.0655, "step": 29418 }, { "epoch": 0.5209968698393302, "grad_norm": 0.6384338736534119, "learning_rate": 1.4708354779290483e-05, "loss": 0.0641, "step": 29419 }, { "epoch": 0.5210145793763586, "grad_norm": 0.3454364538192749, "learning_rate": 1.470749457780947e-05, "loss": 0.0745, "step": 29420 }, { "epoch": 0.521032288913387, "grad_norm": 1.0273358821868896, "learning_rate": 1.4706634377290766e-05, "loss": 0.0941, "step": 29421 }, { "epoch": 0.5210499984504156, "grad_norm": 0.7443972826004028, "learning_rate": 1.4705774177737204e-05, "loss": 0.0738, "step": 29422 }, { "epoch": 0.521067707987444, "grad_norm": 0.6500481963157654, "learning_rate": 1.470491397915162e-05, "loss": 0.0536, "step": 29423 }, { "epoch": 0.5210854175244723, "grad_norm": 0.5106346011161804, "learning_rate": 1.4704053781536833e-05, "loss": 0.0725, "step": 29424 }, { "epoch": 0.5211031270615007, "grad_norm": 0.567390501499176, "learning_rate": 1.4703193584895682e-05, "loss": 0.0751, "step": 29425 }, { "epoch": 0.5211208365985293, "grad_norm": 0.568855345249176, "learning_rate": 1.4702333389230992e-05, "loss": 0.0698, "step": 29426 }, { "epoch": 0.5211385461355577, "grad_norm": 0.8205053806304932, "learning_rate": 1.4701473194545598e-05, "loss": 0.0659, "step": 29427 }, { "epoch": 0.521156255672586, "grad_norm": 0.6720948815345764, "learning_rate": 1.4700613000842325e-05, "loss": 0.0708, "step": 29428 }, { "epoch": 0.5211739652096145, "grad_norm": 0.5859094262123108, "learning_rate": 1.4699752808124006e-05, "loss": 0.0736, "step": 29429 }, { "epoch": 0.521191674746643, "grad_norm": 0.931509256362915, "learning_rate": 1.4698892616393474e-05, "loss": 0.061, "step": 29430 }, { "epoch": 0.5212093842836714, "grad_norm": 1.109679937362671, "learning_rate": 1.4698032425653546e-05, "loss": 0.0553, "step": 29431 }, { "epoch": 0.5212270938206998, "grad_norm": 0.6755277514457703, "learning_rate": 1.4697172235907065e-05, "loss": 0.1065, "step": 29432 }, { "epoch": 0.5212448033577283, "grad_norm": 0.5285733938217163, "learning_rate": 1.4696312047156857e-05, "loss": 0.0586, "step": 29433 }, { "epoch": 0.5212625128947567, "grad_norm": 0.96416836977005, "learning_rate": 1.4695451859405757e-05, "loss": 0.0663, "step": 29434 }, { "epoch": 0.5212802224317851, "grad_norm": 0.8569886684417725, "learning_rate": 1.4694591672656582e-05, "loss": 0.0721, "step": 29435 }, { "epoch": 0.5212979319688135, "grad_norm": 0.6819618940353394, "learning_rate": 1.4693731486912171e-05, "loss": 0.068, "step": 29436 }, { "epoch": 0.521315641505842, "grad_norm": 0.6812595129013062, "learning_rate": 1.4692871302175355e-05, "loss": 0.0446, "step": 29437 }, { "epoch": 0.5213333510428704, "grad_norm": 0.5540086030960083, "learning_rate": 1.469201111844896e-05, "loss": 0.0638, "step": 29438 }, { "epoch": 0.5213510605798988, "grad_norm": 0.7456357479095459, "learning_rate": 1.4691150935735814e-05, "loss": 0.0964, "step": 29439 }, { "epoch": 0.5213687701169272, "grad_norm": 0.6987352967262268, "learning_rate": 1.4690290754038752e-05, "loss": 0.112, "step": 29440 }, { "epoch": 0.5213864796539557, "grad_norm": 0.7879062294960022, "learning_rate": 1.4689430573360606e-05, "loss": 0.0746, "step": 29441 }, { "epoch": 0.5214041891909841, "grad_norm": 0.8534639477729797, "learning_rate": 1.4688570393704197e-05, "loss": 0.1347, "step": 29442 }, { "epoch": 0.5214218987280125, "grad_norm": 0.5800182223320007, "learning_rate": 1.468771021507236e-05, "loss": 0.059, "step": 29443 }, { "epoch": 0.5214396082650409, "grad_norm": 0.4681813716888428, "learning_rate": 1.4686850037467935e-05, "loss": 0.0541, "step": 29444 }, { "epoch": 0.5214573178020694, "grad_norm": 0.49032923579216003, "learning_rate": 1.468598986089373e-05, "loss": 0.0794, "step": 29445 }, { "epoch": 0.5214750273390978, "grad_norm": 0.7059001922607422, "learning_rate": 1.4685129685352586e-05, "loss": 0.0722, "step": 29446 }, { "epoch": 0.5214927368761262, "grad_norm": 0.5463256239891052, "learning_rate": 1.4684269510847339e-05, "loss": 0.0724, "step": 29447 }, { "epoch": 0.5215104464131547, "grad_norm": 0.6938696503639221, "learning_rate": 1.4683409337380816e-05, "loss": 0.0759, "step": 29448 }, { "epoch": 0.5215281559501831, "grad_norm": 0.49550455808639526, "learning_rate": 1.4682549164955839e-05, "loss": 0.0563, "step": 29449 }, { "epoch": 0.5215458654872115, "grad_norm": 0.5709758996963501, "learning_rate": 1.4681688993575244e-05, "loss": 0.0604, "step": 29450 }, { "epoch": 0.5215635750242399, "grad_norm": 0.7298496961593628, "learning_rate": 1.4680828823241859e-05, "loss": 0.0635, "step": 29451 }, { "epoch": 0.5215812845612684, "grad_norm": 0.5252613425254822, "learning_rate": 1.4679968653958518e-05, "loss": 0.0652, "step": 29452 }, { "epoch": 0.5215989940982968, "grad_norm": 0.5016818046569824, "learning_rate": 1.4679108485728045e-05, "loss": 0.0982, "step": 29453 }, { "epoch": 0.5216167036353252, "grad_norm": 0.6653280854225159, "learning_rate": 1.4678248318553273e-05, "loss": 0.0954, "step": 29454 }, { "epoch": 0.5216344131723536, "grad_norm": 0.4896317422389984, "learning_rate": 1.4677388152437035e-05, "loss": 0.0327, "step": 29455 }, { "epoch": 0.5216521227093821, "grad_norm": 0.5898453593254089, "learning_rate": 1.4676527987382152e-05, "loss": 0.078, "step": 29456 }, { "epoch": 0.5216698322464105, "grad_norm": 0.721041202545166, "learning_rate": 1.4675667823391462e-05, "loss": 0.0733, "step": 29457 }, { "epoch": 0.5216875417834389, "grad_norm": 0.48041340708732605, "learning_rate": 1.467480766046779e-05, "loss": 0.0564, "step": 29458 }, { "epoch": 0.5217052513204673, "grad_norm": 0.7019335627555847, "learning_rate": 1.4673947498613975e-05, "loss": 0.0958, "step": 29459 }, { "epoch": 0.5217229608574958, "grad_norm": 1.127684473991394, "learning_rate": 1.467308733783283e-05, "loss": 0.0529, "step": 29460 }, { "epoch": 0.5217406703945242, "grad_norm": 0.7473136782646179, "learning_rate": 1.4672227178127199e-05, "loss": 0.0695, "step": 29461 }, { "epoch": 0.5217583799315526, "grad_norm": 0.6904817819595337, "learning_rate": 1.4671367019499911e-05, "loss": 0.0632, "step": 29462 }, { "epoch": 0.5217760894685811, "grad_norm": 0.4178985357284546, "learning_rate": 1.467050686195379e-05, "loss": 0.0897, "step": 29463 }, { "epoch": 0.5217937990056095, "grad_norm": 0.412947416305542, "learning_rate": 1.4669646705491663e-05, "loss": 0.073, "step": 29464 }, { "epoch": 0.5218115085426379, "grad_norm": 0.5570401549339294, "learning_rate": 1.4668786550116369e-05, "loss": 0.0623, "step": 29465 }, { "epoch": 0.5218292180796663, "grad_norm": 0.4839179813861847, "learning_rate": 1.4667926395830736e-05, "loss": 0.0504, "step": 29466 }, { "epoch": 0.5218469276166948, "grad_norm": 1.0467246770858765, "learning_rate": 1.4667066242637588e-05, "loss": 0.1364, "step": 29467 }, { "epoch": 0.5218646371537232, "grad_norm": 0.5647783875465393, "learning_rate": 1.4666206090539758e-05, "loss": 0.0601, "step": 29468 }, { "epoch": 0.5218823466907516, "grad_norm": 0.42681336402893066, "learning_rate": 1.466534593954008e-05, "loss": 0.0748, "step": 29469 }, { "epoch": 0.52190005622778, "grad_norm": 0.6610948443412781, "learning_rate": 1.4664485789641377e-05, "loss": 0.0732, "step": 29470 }, { "epoch": 0.5219177657648085, "grad_norm": 0.7467507719993591, "learning_rate": 1.4663625640846481e-05, "loss": 0.0595, "step": 29471 }, { "epoch": 0.5219354753018369, "grad_norm": 0.6511716842651367, "learning_rate": 1.4662765493158222e-05, "loss": 0.047, "step": 29472 }, { "epoch": 0.5219531848388653, "grad_norm": 0.6038857102394104, "learning_rate": 1.4661905346579437e-05, "loss": 0.0392, "step": 29473 }, { "epoch": 0.5219708943758937, "grad_norm": 0.2959372103214264, "learning_rate": 1.4661045201112946e-05, "loss": 0.056, "step": 29474 }, { "epoch": 0.5219886039129222, "grad_norm": 0.7357149124145508, "learning_rate": 1.4660185056761576e-05, "loss": 0.068, "step": 29475 }, { "epoch": 0.5220063134499506, "grad_norm": 0.4412868618965149, "learning_rate": 1.4659324913528172e-05, "loss": 0.0644, "step": 29476 }, { "epoch": 0.522024022986979, "grad_norm": 0.5656547546386719, "learning_rate": 1.4658464771415548e-05, "loss": 0.0536, "step": 29477 }, { "epoch": 0.5220417325240075, "grad_norm": 0.5102996230125427, "learning_rate": 1.4657604630426542e-05, "loss": 0.0582, "step": 29478 }, { "epoch": 0.5220594420610359, "grad_norm": 0.8103389739990234, "learning_rate": 1.4656744490563982e-05, "loss": 0.0693, "step": 29479 }, { "epoch": 0.5220771515980643, "grad_norm": 0.41247445344924927, "learning_rate": 1.46558843518307e-05, "loss": 0.0456, "step": 29480 }, { "epoch": 0.5220948611350927, "grad_norm": 0.7404242753982544, "learning_rate": 1.4655024214229522e-05, "loss": 0.0873, "step": 29481 }, { "epoch": 0.5221125706721212, "grad_norm": 0.7786125540733337, "learning_rate": 1.4654164077763279e-05, "loss": 0.0532, "step": 29482 }, { "epoch": 0.5221302802091496, "grad_norm": 0.48917636275291443, "learning_rate": 1.4653303942434799e-05, "loss": 0.064, "step": 29483 }, { "epoch": 0.522147989746178, "grad_norm": 0.7915677428245544, "learning_rate": 1.465244380824692e-05, "loss": 0.0865, "step": 29484 }, { "epoch": 0.5221656992832064, "grad_norm": 0.5253631472587585, "learning_rate": 1.465158367520246e-05, "loss": 0.0762, "step": 29485 }, { "epoch": 0.522183408820235, "grad_norm": 0.46388986706733704, "learning_rate": 1.4650723543304257e-05, "loss": 0.0599, "step": 29486 }, { "epoch": 0.5222011183572633, "grad_norm": 0.8135704398155212, "learning_rate": 1.4649863412555145e-05, "loss": 0.0792, "step": 29487 }, { "epoch": 0.5222188278942917, "grad_norm": 0.5250216126441956, "learning_rate": 1.4649003282957937e-05, "loss": 0.0418, "step": 29488 }, { "epoch": 0.5222365374313201, "grad_norm": 0.5917377471923828, "learning_rate": 1.4648143154515473e-05, "loss": 0.0534, "step": 29489 }, { "epoch": 0.5222542469683487, "grad_norm": 0.7781103849411011, "learning_rate": 1.4647283027230583e-05, "loss": 0.0776, "step": 29490 }, { "epoch": 0.522271956505377, "grad_norm": 0.6556519865989685, "learning_rate": 1.4646422901106105e-05, "loss": 0.0827, "step": 29491 }, { "epoch": 0.5222896660424055, "grad_norm": 0.7452228665351868, "learning_rate": 1.4645562776144852e-05, "loss": 0.0767, "step": 29492 }, { "epoch": 0.522307375579434, "grad_norm": 0.7708566188812256, "learning_rate": 1.464470265234966e-05, "loss": 0.0703, "step": 29493 }, { "epoch": 0.5223250851164624, "grad_norm": 0.8225083947181702, "learning_rate": 1.4643842529723367e-05, "loss": 0.0965, "step": 29494 }, { "epoch": 0.5223427946534908, "grad_norm": 0.7964603900909424, "learning_rate": 1.464298240826879e-05, "loss": 0.0763, "step": 29495 }, { "epoch": 0.5223605041905192, "grad_norm": 0.5715176463127136, "learning_rate": 1.4642122287988766e-05, "loss": 0.0562, "step": 29496 }, { "epoch": 0.5223782137275477, "grad_norm": 0.5436908602714539, "learning_rate": 1.4641262168886126e-05, "loss": 0.0693, "step": 29497 }, { "epoch": 0.5223959232645761, "grad_norm": 0.8180003762245178, "learning_rate": 1.4640402050963696e-05, "loss": 0.0666, "step": 29498 }, { "epoch": 0.5224136328016045, "grad_norm": 0.44544273614883423, "learning_rate": 1.4639541934224306e-05, "loss": 0.0436, "step": 29499 }, { "epoch": 0.5224313423386329, "grad_norm": 0.6552956700325012, "learning_rate": 1.4638681818670787e-05, "loss": 0.0767, "step": 29500 }, { "epoch": 0.5224490518756614, "grad_norm": 0.710701048374176, "learning_rate": 1.4637821704305973e-05, "loss": 0.0455, "step": 29501 }, { "epoch": 0.5224667614126898, "grad_norm": 0.7914907932281494, "learning_rate": 1.4636961591132683e-05, "loss": 0.0432, "step": 29502 }, { "epoch": 0.5224844709497182, "grad_norm": 0.6290462613105774, "learning_rate": 1.4636101479153755e-05, "loss": 0.0499, "step": 29503 }, { "epoch": 0.5225021804867466, "grad_norm": 0.6335188746452332, "learning_rate": 1.463524136837201e-05, "loss": 0.0801, "step": 29504 }, { "epoch": 0.5225198900237751, "grad_norm": 0.6758729219436646, "learning_rate": 1.4634381258790295e-05, "loss": 0.0728, "step": 29505 }, { "epoch": 0.5225375995608035, "grad_norm": 0.7818412780761719, "learning_rate": 1.4633521150411422e-05, "loss": 0.0934, "step": 29506 }, { "epoch": 0.5225553090978319, "grad_norm": 0.7433280944824219, "learning_rate": 1.4632661043238228e-05, "loss": 0.0724, "step": 29507 }, { "epoch": 0.5225730186348604, "grad_norm": 0.8925247192382812, "learning_rate": 1.4631800937273543e-05, "loss": 0.0945, "step": 29508 }, { "epoch": 0.5225907281718888, "grad_norm": 0.5907559990882874, "learning_rate": 1.4630940832520195e-05, "loss": 0.0595, "step": 29509 }, { "epoch": 0.5226084377089172, "grad_norm": 0.4451483190059662, "learning_rate": 1.4630080728981013e-05, "loss": 0.0905, "step": 29510 }, { "epoch": 0.5226261472459456, "grad_norm": 0.378165066242218, "learning_rate": 1.4629220626658829e-05, "loss": 0.0713, "step": 29511 }, { "epoch": 0.5226438567829741, "grad_norm": 0.9866103529930115, "learning_rate": 1.4628360525556474e-05, "loss": 0.0788, "step": 29512 }, { "epoch": 0.5226615663200025, "grad_norm": 1.0461843013763428, "learning_rate": 1.4627500425676772e-05, "loss": 0.1183, "step": 29513 }, { "epoch": 0.5226792758570309, "grad_norm": 0.6627113223075867, "learning_rate": 1.4626640327022557e-05, "loss": 0.0632, "step": 29514 }, { "epoch": 0.5226969853940593, "grad_norm": 0.7620300650596619, "learning_rate": 1.4625780229596656e-05, "loss": 0.091, "step": 29515 }, { "epoch": 0.5227146949310878, "grad_norm": 0.36070388555526733, "learning_rate": 1.4624920133401908e-05, "loss": 0.0477, "step": 29516 }, { "epoch": 0.5227324044681162, "grad_norm": 0.8838918805122375, "learning_rate": 1.4624060038441127e-05, "loss": 0.0874, "step": 29517 }, { "epoch": 0.5227501140051446, "grad_norm": 0.7132511138916016, "learning_rate": 1.4623199944717149e-05, "loss": 0.0973, "step": 29518 }, { "epoch": 0.522767823542173, "grad_norm": 0.6455364227294922, "learning_rate": 1.4622339852232812e-05, "loss": 0.0743, "step": 29519 }, { "epoch": 0.5227855330792015, "grad_norm": 0.8108228445053101, "learning_rate": 1.4621479760990935e-05, "loss": 0.0999, "step": 29520 }, { "epoch": 0.5228032426162299, "grad_norm": 1.3765584230422974, "learning_rate": 1.4620619670994351e-05, "loss": 0.0854, "step": 29521 }, { "epoch": 0.5228209521532583, "grad_norm": 0.5562130212783813, "learning_rate": 1.4619759582245889e-05, "loss": 0.0733, "step": 29522 }, { "epoch": 0.5228386616902868, "grad_norm": 0.48701992630958557, "learning_rate": 1.4618899494748382e-05, "loss": 0.0753, "step": 29523 }, { "epoch": 0.5228563712273152, "grad_norm": 0.5781806707382202, "learning_rate": 1.4618039408504656e-05, "loss": 0.0732, "step": 29524 }, { "epoch": 0.5228740807643436, "grad_norm": 0.5292277932167053, "learning_rate": 1.4617179323517541e-05, "loss": 0.099, "step": 29525 }, { "epoch": 0.522891790301372, "grad_norm": 0.518425464630127, "learning_rate": 1.4616319239789869e-05, "loss": 0.084, "step": 29526 }, { "epoch": 0.5229094998384005, "grad_norm": 0.6424612402915955, "learning_rate": 1.4615459157324466e-05, "loss": 0.0646, "step": 29527 }, { "epoch": 0.5229272093754289, "grad_norm": 0.7676516771316528, "learning_rate": 1.4614599076124163e-05, "loss": 0.1009, "step": 29528 }, { "epoch": 0.5229449189124573, "grad_norm": 0.5978632569313049, "learning_rate": 1.461373899619179e-05, "loss": 0.0633, "step": 29529 }, { "epoch": 0.5229626284494857, "grad_norm": 0.5965548753738403, "learning_rate": 1.4612878917530183e-05, "loss": 0.0861, "step": 29530 }, { "epoch": 0.5229803379865142, "grad_norm": 0.6002246737480164, "learning_rate": 1.461201884014216e-05, "loss": 0.0665, "step": 29531 }, { "epoch": 0.5229980475235426, "grad_norm": 0.48728737235069275, "learning_rate": 1.4611158764030556e-05, "loss": 0.0605, "step": 29532 }, { "epoch": 0.523015757060571, "grad_norm": 0.23627349734306335, "learning_rate": 1.4610298689198201e-05, "loss": 0.0448, "step": 29533 }, { "epoch": 0.5230334665975994, "grad_norm": 1.0401262044906616, "learning_rate": 1.4609438615647924e-05, "loss": 0.1027, "step": 29534 }, { "epoch": 0.5230511761346279, "grad_norm": 1.02370023727417, "learning_rate": 1.4608578543382551e-05, "loss": 0.0643, "step": 29535 }, { "epoch": 0.5230688856716563, "grad_norm": 0.6376059651374817, "learning_rate": 1.4607718472404916e-05, "loss": 0.0849, "step": 29536 }, { "epoch": 0.5230865952086847, "grad_norm": 0.5102383494377136, "learning_rate": 1.4606858402717853e-05, "loss": 0.0941, "step": 29537 }, { "epoch": 0.5231043047457132, "grad_norm": 0.8356125354766846, "learning_rate": 1.4605998334324182e-05, "loss": 0.1286, "step": 29538 }, { "epoch": 0.5231220142827416, "grad_norm": 0.8212763071060181, "learning_rate": 1.4605138267226736e-05, "loss": 0.0792, "step": 29539 }, { "epoch": 0.52313972381977, "grad_norm": 0.905636191368103, "learning_rate": 1.460427820142835e-05, "loss": 0.0917, "step": 29540 }, { "epoch": 0.5231574333567984, "grad_norm": 0.9413872361183167, "learning_rate": 1.4603418136931842e-05, "loss": 0.0654, "step": 29541 }, { "epoch": 0.5231751428938269, "grad_norm": 0.32291263341903687, "learning_rate": 1.4602558073740049e-05, "loss": 0.057, "step": 29542 }, { "epoch": 0.5231928524308553, "grad_norm": 0.3976385295391083, "learning_rate": 1.4601698011855799e-05, "loss": 0.0877, "step": 29543 }, { "epoch": 0.5232105619678837, "grad_norm": 0.46809282898902893, "learning_rate": 1.4600837951281933e-05, "loss": 0.0627, "step": 29544 }, { "epoch": 0.5232282715049121, "grad_norm": 0.9111127853393555, "learning_rate": 1.459997789202126e-05, "loss": 0.0914, "step": 29545 }, { "epoch": 0.5232459810419406, "grad_norm": 0.7342215180397034, "learning_rate": 1.459911783407662e-05, "loss": 0.0612, "step": 29546 }, { "epoch": 0.523263690578969, "grad_norm": 1.1178418397903442, "learning_rate": 1.4598257777450845e-05, "loss": 0.0895, "step": 29547 }, { "epoch": 0.5232814001159974, "grad_norm": 0.9714510440826416, "learning_rate": 1.4597397722146759e-05, "loss": 0.0573, "step": 29548 }, { "epoch": 0.523299109653026, "grad_norm": 0.5319679975509644, "learning_rate": 1.4596537668167191e-05, "loss": 0.0603, "step": 29549 }, { "epoch": 0.5233168191900543, "grad_norm": 0.5384005904197693, "learning_rate": 1.4595677615514975e-05, "loss": 0.0686, "step": 29550 }, { "epoch": 0.5233345287270827, "grad_norm": 0.7367302775382996, "learning_rate": 1.4594817564192942e-05, "loss": 0.0783, "step": 29551 }, { "epoch": 0.5233522382641111, "grad_norm": 0.7733847498893738, "learning_rate": 1.4593957514203916e-05, "loss": 0.1104, "step": 29552 }, { "epoch": 0.5233699478011397, "grad_norm": 1.0856796503067017, "learning_rate": 1.4593097465550727e-05, "loss": 0.091, "step": 29553 }, { "epoch": 0.523387657338168, "grad_norm": 0.6692346334457397, "learning_rate": 1.4592237418236208e-05, "loss": 0.0855, "step": 29554 }, { "epoch": 0.5234053668751965, "grad_norm": 0.3889167308807373, "learning_rate": 1.4591377372263189e-05, "loss": 0.0629, "step": 29555 }, { "epoch": 0.5234230764122249, "grad_norm": 0.7160680294036865, "learning_rate": 1.4590517327634492e-05, "loss": 0.0859, "step": 29556 }, { "epoch": 0.5234407859492534, "grad_norm": 0.4639280438423157, "learning_rate": 1.4589657284352954e-05, "loss": 0.0614, "step": 29557 }, { "epoch": 0.5234584954862818, "grad_norm": 0.6292238235473633, "learning_rate": 1.4588797242421409e-05, "loss": 0.0681, "step": 29558 }, { "epoch": 0.5234762050233102, "grad_norm": 0.6510758399963379, "learning_rate": 1.4587937201842672e-05, "loss": 0.0562, "step": 29559 }, { "epoch": 0.5234939145603386, "grad_norm": 0.9049183130264282, "learning_rate": 1.4587077162619579e-05, "loss": 0.1085, "step": 29560 }, { "epoch": 0.5235116240973671, "grad_norm": 0.4313191771507263, "learning_rate": 1.4586217124754962e-05, "loss": 0.0559, "step": 29561 }, { "epoch": 0.5235293336343955, "grad_norm": 0.8512029051780701, "learning_rate": 1.4585357088251651e-05, "loss": 0.076, "step": 29562 }, { "epoch": 0.5235470431714239, "grad_norm": 0.8127394914627075, "learning_rate": 1.4584497053112468e-05, "loss": 0.0963, "step": 29563 }, { "epoch": 0.5235647527084524, "grad_norm": 0.5221987366676331, "learning_rate": 1.458363701934025e-05, "loss": 0.0505, "step": 29564 }, { "epoch": 0.5235824622454808, "grad_norm": 0.510147750377655, "learning_rate": 1.4582776986937828e-05, "loss": 0.0638, "step": 29565 }, { "epoch": 0.5236001717825092, "grad_norm": 0.5319716930389404, "learning_rate": 1.4581916955908023e-05, "loss": 0.0862, "step": 29566 }, { "epoch": 0.5236178813195376, "grad_norm": 0.7788864374160767, "learning_rate": 1.458105692625367e-05, "loss": 0.0689, "step": 29567 }, { "epoch": 0.5236355908565661, "grad_norm": 0.503540575504303, "learning_rate": 1.4580196897977597e-05, "loss": 0.0829, "step": 29568 }, { "epoch": 0.5236533003935945, "grad_norm": 0.4689989984035492, "learning_rate": 1.457933687108264e-05, "loss": 0.0447, "step": 29569 }, { "epoch": 0.5236710099306229, "grad_norm": 0.5458680391311646, "learning_rate": 1.4578476845571613e-05, "loss": 0.0855, "step": 29570 }, { "epoch": 0.5236887194676513, "grad_norm": 0.580316424369812, "learning_rate": 1.4577616821447356e-05, "loss": 0.0554, "step": 29571 }, { "epoch": 0.5237064290046798, "grad_norm": 0.5701465010643005, "learning_rate": 1.4576756798712705e-05, "loss": 0.1041, "step": 29572 }, { "epoch": 0.5237241385417082, "grad_norm": 0.535136342048645, "learning_rate": 1.4575896777370475e-05, "loss": 0.0467, "step": 29573 }, { "epoch": 0.5237418480787366, "grad_norm": 0.6960576176643372, "learning_rate": 1.45750367574235e-05, "loss": 0.0908, "step": 29574 }, { "epoch": 0.523759557615765, "grad_norm": 0.6597697138786316, "learning_rate": 1.4574176738874613e-05, "loss": 0.0607, "step": 29575 }, { "epoch": 0.5237772671527935, "grad_norm": 0.6572519540786743, "learning_rate": 1.4573316721726642e-05, "loss": 0.1005, "step": 29576 }, { "epoch": 0.5237949766898219, "grad_norm": 1.0372672080993652, "learning_rate": 1.4572456705982417e-05, "loss": 0.0702, "step": 29577 }, { "epoch": 0.5238126862268503, "grad_norm": 0.43442851305007935, "learning_rate": 1.4571596691644762e-05, "loss": 0.0417, "step": 29578 }, { "epoch": 0.5238303957638788, "grad_norm": 0.3182342052459717, "learning_rate": 1.4570736678716516e-05, "loss": 0.0551, "step": 29579 }, { "epoch": 0.5238481053009072, "grad_norm": 0.47889286279678345, "learning_rate": 1.4569876667200499e-05, "loss": 0.0593, "step": 29580 }, { "epoch": 0.5238658148379356, "grad_norm": 0.5486769676208496, "learning_rate": 1.4569016657099544e-05, "loss": 0.0722, "step": 29581 }, { "epoch": 0.523883524374964, "grad_norm": 0.695777952671051, "learning_rate": 1.4568156648416481e-05, "loss": 0.0511, "step": 29582 }, { "epoch": 0.5239012339119925, "grad_norm": 0.6177716255187988, "learning_rate": 1.4567296641154145e-05, "loss": 0.0719, "step": 29583 }, { "epoch": 0.5239189434490209, "grad_norm": 0.8416682481765747, "learning_rate": 1.4566436635315351e-05, "loss": 0.0603, "step": 29584 }, { "epoch": 0.5239366529860493, "grad_norm": 0.6538705825805664, "learning_rate": 1.4565576630902939e-05, "loss": 0.0756, "step": 29585 }, { "epoch": 0.5239543625230777, "grad_norm": 0.44400855898857117, "learning_rate": 1.4564716627919735e-05, "loss": 0.0471, "step": 29586 }, { "epoch": 0.5239720720601062, "grad_norm": 0.7098777294158936, "learning_rate": 1.4563856626368578e-05, "loss": 0.0868, "step": 29587 }, { "epoch": 0.5239897815971346, "grad_norm": 0.56175297498703, "learning_rate": 1.456299662625228e-05, "loss": 0.0569, "step": 29588 }, { "epoch": 0.524007491134163, "grad_norm": 0.45848703384399414, "learning_rate": 1.456213662757368e-05, "loss": 0.0582, "step": 29589 }, { "epoch": 0.5240252006711914, "grad_norm": 0.607179582118988, "learning_rate": 1.4561276630335609e-05, "loss": 0.0724, "step": 29590 }, { "epoch": 0.5240429102082199, "grad_norm": 0.4614582061767578, "learning_rate": 1.456041663454089e-05, "loss": 0.0493, "step": 29591 }, { "epoch": 0.5240606197452483, "grad_norm": 0.4548150897026062, "learning_rate": 1.4559556640192355e-05, "loss": 0.0654, "step": 29592 }, { "epoch": 0.5240783292822767, "grad_norm": 0.4304034411907196, "learning_rate": 1.4558696647292835e-05, "loss": 0.0758, "step": 29593 }, { "epoch": 0.5240960388193052, "grad_norm": 0.8162755370140076, "learning_rate": 1.4557836655845163e-05, "loss": 0.0658, "step": 29594 }, { "epoch": 0.5241137483563336, "grad_norm": 0.8032101988792419, "learning_rate": 1.4556976665852158e-05, "loss": 0.0924, "step": 29595 }, { "epoch": 0.524131457893362, "grad_norm": 0.7579207420349121, "learning_rate": 1.4556116677316657e-05, "loss": 0.0621, "step": 29596 }, { "epoch": 0.5241491674303904, "grad_norm": 0.24676455557346344, "learning_rate": 1.4555256690241493e-05, "loss": 0.0512, "step": 29597 }, { "epoch": 0.5241668769674189, "grad_norm": 0.7025247812271118, "learning_rate": 1.4554396704629483e-05, "loss": 0.0696, "step": 29598 }, { "epoch": 0.5241845865044473, "grad_norm": 0.6699124574661255, "learning_rate": 1.4553536720483461e-05, "loss": 0.0544, "step": 29599 }, { "epoch": 0.5242022960414757, "grad_norm": 0.380946546792984, "learning_rate": 1.4552676737806258e-05, "loss": 0.076, "step": 29600 }, { "epoch": 0.5242200055785041, "grad_norm": 0.5103388428688049, "learning_rate": 1.4551816756600712e-05, "loss": 0.0506, "step": 29601 }, { "epoch": 0.5242377151155326, "grad_norm": 0.6324823498725891, "learning_rate": 1.4550956776869637e-05, "loss": 0.056, "step": 29602 }, { "epoch": 0.524255424652561, "grad_norm": 0.5171487927436829, "learning_rate": 1.4550096798615869e-05, "loss": 0.0601, "step": 29603 }, { "epoch": 0.5242731341895894, "grad_norm": 0.5136539340019226, "learning_rate": 1.4549236821842238e-05, "loss": 0.0634, "step": 29604 }, { "epoch": 0.5242908437266178, "grad_norm": 0.5965473651885986, "learning_rate": 1.454837684655157e-05, "loss": 0.0728, "step": 29605 }, { "epoch": 0.5243085532636463, "grad_norm": 0.5759310722351074, "learning_rate": 1.4547516872746699e-05, "loss": 0.0834, "step": 29606 }, { "epoch": 0.5243262628006747, "grad_norm": 0.44602537155151367, "learning_rate": 1.454665690043045e-05, "loss": 0.05, "step": 29607 }, { "epoch": 0.5243439723377031, "grad_norm": 0.4424009621143341, "learning_rate": 1.4545796929605657e-05, "loss": 0.0549, "step": 29608 }, { "epoch": 0.5243616818747316, "grad_norm": 0.9595322012901306, "learning_rate": 1.4544936960275142e-05, "loss": 0.0685, "step": 29609 }, { "epoch": 0.52437939141176, "grad_norm": 0.3581636846065521, "learning_rate": 1.454407699244174e-05, "loss": 0.0505, "step": 29610 }, { "epoch": 0.5243971009487884, "grad_norm": 0.5788711905479431, "learning_rate": 1.4543217026108286e-05, "loss": 0.0676, "step": 29611 }, { "epoch": 0.5244148104858168, "grad_norm": 0.5554704666137695, "learning_rate": 1.4542357061277593e-05, "loss": 0.082, "step": 29612 }, { "epoch": 0.5244325200228453, "grad_norm": 0.5758532285690308, "learning_rate": 1.4541497097952496e-05, "loss": 0.0467, "step": 29613 }, { "epoch": 0.5244502295598737, "grad_norm": 0.9004272222518921, "learning_rate": 1.454063713613583e-05, "loss": 0.0794, "step": 29614 }, { "epoch": 0.5244679390969021, "grad_norm": 0.9599897265434265, "learning_rate": 1.453977717583043e-05, "loss": 0.0525, "step": 29615 }, { "epoch": 0.5244856486339305, "grad_norm": 0.39814308285713196, "learning_rate": 1.4538917217039108e-05, "loss": 0.0553, "step": 29616 }, { "epoch": 0.524503358170959, "grad_norm": 0.5986723899841309, "learning_rate": 1.4538057259764703e-05, "loss": 0.0854, "step": 29617 }, { "epoch": 0.5245210677079875, "grad_norm": 0.5260793566703796, "learning_rate": 1.4537197304010044e-05, "loss": 0.0766, "step": 29618 }, { "epoch": 0.5245387772450159, "grad_norm": 0.6157610416412354, "learning_rate": 1.4536337349777959e-05, "loss": 0.0715, "step": 29619 }, { "epoch": 0.5245564867820443, "grad_norm": 0.37114933133125305, "learning_rate": 1.4535477397071277e-05, "loss": 0.076, "step": 29620 }, { "epoch": 0.5245741963190728, "grad_norm": 0.6616126298904419, "learning_rate": 1.4534617445892824e-05, "loss": 0.0755, "step": 29621 }, { "epoch": 0.5245919058561012, "grad_norm": 0.6776224374771118, "learning_rate": 1.4533757496245438e-05, "loss": 0.0523, "step": 29622 }, { "epoch": 0.5246096153931296, "grad_norm": 0.552939772605896, "learning_rate": 1.453289754813194e-05, "loss": 0.0458, "step": 29623 }, { "epoch": 0.5246273249301581, "grad_norm": 0.37807419896125793, "learning_rate": 1.4532037601555161e-05, "loss": 0.0637, "step": 29624 }, { "epoch": 0.5246450344671865, "grad_norm": 0.6804203987121582, "learning_rate": 1.453117765651793e-05, "loss": 0.0951, "step": 29625 }, { "epoch": 0.5246627440042149, "grad_norm": 0.609095573425293, "learning_rate": 1.4530317713023086e-05, "loss": 0.0723, "step": 29626 }, { "epoch": 0.5246804535412433, "grad_norm": 0.8528693318367004, "learning_rate": 1.4529457771073441e-05, "loss": 0.041, "step": 29627 }, { "epoch": 0.5246981630782718, "grad_norm": 0.7387906312942505, "learning_rate": 1.4528597830671828e-05, "loss": 0.0769, "step": 29628 }, { "epoch": 0.5247158726153002, "grad_norm": 0.49436700344085693, "learning_rate": 1.4527737891821093e-05, "loss": 0.0702, "step": 29629 }, { "epoch": 0.5247335821523286, "grad_norm": 0.7187792062759399, "learning_rate": 1.4526877954524046e-05, "loss": 0.0539, "step": 29630 }, { "epoch": 0.524751291689357, "grad_norm": 1.0199732780456543, "learning_rate": 1.452601801878352e-05, "loss": 0.0857, "step": 29631 }, { "epoch": 0.5247690012263855, "grad_norm": 0.6152991056442261, "learning_rate": 1.4525158084602348e-05, "loss": 0.0751, "step": 29632 }, { "epoch": 0.5247867107634139, "grad_norm": 0.16641753911972046, "learning_rate": 1.4524298151983361e-05, "loss": 0.0639, "step": 29633 }, { "epoch": 0.5248044203004423, "grad_norm": 0.44809821248054504, "learning_rate": 1.4523438220929384e-05, "loss": 0.0766, "step": 29634 }, { "epoch": 0.5248221298374707, "grad_norm": 0.5299443602561951, "learning_rate": 1.4522578291443243e-05, "loss": 0.0465, "step": 29635 }, { "epoch": 0.5248398393744992, "grad_norm": 0.49198171496391296, "learning_rate": 1.4521718363527779e-05, "loss": 0.0627, "step": 29636 }, { "epoch": 0.5248575489115276, "grad_norm": 0.8247623443603516, "learning_rate": 1.4520858437185805e-05, "loss": 0.0944, "step": 29637 }, { "epoch": 0.524875258448556, "grad_norm": 0.6068984270095825, "learning_rate": 1.4519998512420162e-05, "loss": 0.059, "step": 29638 }, { "epoch": 0.5248929679855845, "grad_norm": 0.6558772325515747, "learning_rate": 1.4519138589233674e-05, "loss": 0.0611, "step": 29639 }, { "epoch": 0.5249106775226129, "grad_norm": 0.9366299510002136, "learning_rate": 1.4518278667629178e-05, "loss": 0.0947, "step": 29640 }, { "epoch": 0.5249283870596413, "grad_norm": 0.9461870193481445, "learning_rate": 1.4517418747609489e-05, "loss": 0.043, "step": 29641 }, { "epoch": 0.5249460965966697, "grad_norm": 0.5623181462287903, "learning_rate": 1.4516558829177441e-05, "loss": 0.1018, "step": 29642 }, { "epoch": 0.5249638061336982, "grad_norm": 0.52170729637146, "learning_rate": 1.4515698912335876e-05, "loss": 0.0772, "step": 29643 }, { "epoch": 0.5249815156707266, "grad_norm": 0.8175044059753418, "learning_rate": 1.4514838997087605e-05, "loss": 0.095, "step": 29644 }, { "epoch": 0.524999225207755, "grad_norm": 0.9695461392402649, "learning_rate": 1.4513979083435467e-05, "loss": 0.072, "step": 29645 }, { "epoch": 0.5250169347447834, "grad_norm": 0.6785779595375061, "learning_rate": 1.4513119171382286e-05, "loss": 0.0544, "step": 29646 }, { "epoch": 0.5250346442818119, "grad_norm": 0.747969925403595, "learning_rate": 1.4512259260930898e-05, "loss": 0.0755, "step": 29647 }, { "epoch": 0.5250523538188403, "grad_norm": 0.4894106388092041, "learning_rate": 1.4511399352084124e-05, "loss": 0.0812, "step": 29648 }, { "epoch": 0.5250700633558687, "grad_norm": 0.8876397013664246, "learning_rate": 1.4510539444844797e-05, "loss": 0.0723, "step": 29649 }, { "epoch": 0.5250877728928971, "grad_norm": 1.0875732898712158, "learning_rate": 1.4509679539215748e-05, "loss": 0.0602, "step": 29650 }, { "epoch": 0.5251054824299256, "grad_norm": 0.7606383562088013, "learning_rate": 1.4508819635199803e-05, "loss": 0.076, "step": 29651 }, { "epoch": 0.525123191966954, "grad_norm": 0.6843915581703186, "learning_rate": 1.4507959732799789e-05, "loss": 0.0583, "step": 29652 }, { "epoch": 0.5251409015039824, "grad_norm": 0.42279279232025146, "learning_rate": 1.450709983201854e-05, "loss": 0.0709, "step": 29653 }, { "epoch": 0.5251586110410109, "grad_norm": 0.5995246171951294, "learning_rate": 1.4506239932858888e-05, "loss": 0.0592, "step": 29654 }, { "epoch": 0.5251763205780393, "grad_norm": 0.6148226857185364, "learning_rate": 1.450538003532365e-05, "loss": 0.1037, "step": 29655 }, { "epoch": 0.5251940301150677, "grad_norm": 0.7145648002624512, "learning_rate": 1.4504520139415661e-05, "loss": 0.0592, "step": 29656 }, { "epoch": 0.5252117396520961, "grad_norm": 0.749917209148407, "learning_rate": 1.4503660245137749e-05, "loss": 0.0685, "step": 29657 }, { "epoch": 0.5252294491891246, "grad_norm": 0.3966984748840332, "learning_rate": 1.4502800352492754e-05, "loss": 0.073, "step": 29658 }, { "epoch": 0.525247158726153, "grad_norm": 0.6438524723052979, "learning_rate": 1.4501940461483489e-05, "loss": 0.0803, "step": 29659 }, { "epoch": 0.5252648682631814, "grad_norm": 0.5823473930358887, "learning_rate": 1.450108057211279e-05, "loss": 0.0802, "step": 29660 }, { "epoch": 0.5252825778002098, "grad_norm": 1.014418363571167, "learning_rate": 1.4500220684383485e-05, "loss": 0.0875, "step": 29661 }, { "epoch": 0.5253002873372383, "grad_norm": 0.9254348874092102, "learning_rate": 1.4499360798298403e-05, "loss": 0.1177, "step": 29662 }, { "epoch": 0.5253179968742667, "grad_norm": 0.6208145022392273, "learning_rate": 1.4498500913860374e-05, "loss": 0.065, "step": 29663 }, { "epoch": 0.5253357064112951, "grad_norm": 0.6572917699813843, "learning_rate": 1.4497641031072227e-05, "loss": 0.0905, "step": 29664 }, { "epoch": 0.5253534159483235, "grad_norm": 0.4513761103153229, "learning_rate": 1.4496781149936791e-05, "loss": 0.0715, "step": 29665 }, { "epoch": 0.525371125485352, "grad_norm": 0.7710140943527222, "learning_rate": 1.449592127045689e-05, "loss": 0.0795, "step": 29666 }, { "epoch": 0.5253888350223804, "grad_norm": 0.7403562068939209, "learning_rate": 1.4495061392635361e-05, "loss": 0.0807, "step": 29667 }, { "epoch": 0.5254065445594088, "grad_norm": 0.6370881795883179, "learning_rate": 1.4494201516475033e-05, "loss": 0.0681, "step": 29668 }, { "epoch": 0.5254242540964373, "grad_norm": 0.3786320686340332, "learning_rate": 1.4493341641978725e-05, "loss": 0.0659, "step": 29669 }, { "epoch": 0.5254419636334657, "grad_norm": 0.6748698949813843, "learning_rate": 1.4492481769149271e-05, "loss": 0.0661, "step": 29670 }, { "epoch": 0.5254596731704941, "grad_norm": 0.8978286385536194, "learning_rate": 1.4491621897989498e-05, "loss": 0.1043, "step": 29671 }, { "epoch": 0.5254773827075225, "grad_norm": 0.9809714555740356, "learning_rate": 1.4490762028502247e-05, "loss": 0.0854, "step": 29672 }, { "epoch": 0.525495092244551, "grad_norm": 0.9990087747573853, "learning_rate": 1.4489902160690331e-05, "loss": 0.1114, "step": 29673 }, { "epoch": 0.5255128017815794, "grad_norm": 0.38816961646080017, "learning_rate": 1.4489042294556586e-05, "loss": 0.0661, "step": 29674 }, { "epoch": 0.5255305113186078, "grad_norm": 0.5329911708831787, "learning_rate": 1.4488182430103844e-05, "loss": 0.0772, "step": 29675 }, { "epoch": 0.5255482208556362, "grad_norm": 0.6110405325889587, "learning_rate": 1.4487322567334924e-05, "loss": 0.0691, "step": 29676 }, { "epoch": 0.5255659303926647, "grad_norm": 0.5387386083602905, "learning_rate": 1.4486462706252663e-05, "loss": 0.0621, "step": 29677 }, { "epoch": 0.5255836399296931, "grad_norm": 0.35033565759658813, "learning_rate": 1.4485602846859888e-05, "loss": 0.0755, "step": 29678 }, { "epoch": 0.5256013494667215, "grad_norm": 0.6101962924003601, "learning_rate": 1.448474298915943e-05, "loss": 0.071, "step": 29679 }, { "epoch": 0.5256190590037499, "grad_norm": 1.0718635320663452, "learning_rate": 1.4483883133154112e-05, "loss": 0.0924, "step": 29680 }, { "epoch": 0.5256367685407785, "grad_norm": 0.8660425543785095, "learning_rate": 1.4483023278846766e-05, "loss": 0.0783, "step": 29681 }, { "epoch": 0.5256544780778069, "grad_norm": 0.47638455033302307, "learning_rate": 1.4482163426240229e-05, "loss": 0.0757, "step": 29682 }, { "epoch": 0.5256721876148353, "grad_norm": 0.7987551689147949, "learning_rate": 1.4481303575337314e-05, "loss": 0.1127, "step": 29683 }, { "epoch": 0.5256898971518638, "grad_norm": 0.3671366572380066, "learning_rate": 1.4480443726140858e-05, "loss": 0.0724, "step": 29684 }, { "epoch": 0.5257076066888922, "grad_norm": 0.6589298248291016, "learning_rate": 1.4479583878653691e-05, "loss": 0.0733, "step": 29685 }, { "epoch": 0.5257253162259206, "grad_norm": 0.8884803056716919, "learning_rate": 1.4478724032878642e-05, "loss": 0.0497, "step": 29686 }, { "epoch": 0.525743025762949, "grad_norm": 0.7794991731643677, "learning_rate": 1.4477864188818537e-05, "loss": 0.1073, "step": 29687 }, { "epoch": 0.5257607352999775, "grad_norm": 0.8132060766220093, "learning_rate": 1.4477004346476204e-05, "loss": 0.1112, "step": 29688 }, { "epoch": 0.5257784448370059, "grad_norm": 0.7574644088745117, "learning_rate": 1.4476144505854473e-05, "loss": 0.0679, "step": 29689 }, { "epoch": 0.5257961543740343, "grad_norm": 0.6131156086921692, "learning_rate": 1.4475284666956178e-05, "loss": 0.0732, "step": 29690 }, { "epoch": 0.5258138639110627, "grad_norm": 0.8458199501037598, "learning_rate": 1.4474424829784143e-05, "loss": 0.0837, "step": 29691 }, { "epoch": 0.5258315734480912, "grad_norm": 0.7014027833938599, "learning_rate": 1.4473564994341194e-05, "loss": 0.0441, "step": 29692 }, { "epoch": 0.5258492829851196, "grad_norm": 1.2114750146865845, "learning_rate": 1.447270516063017e-05, "loss": 0.1069, "step": 29693 }, { "epoch": 0.525866992522148, "grad_norm": 0.6042454242706299, "learning_rate": 1.4471845328653883e-05, "loss": 0.106, "step": 29694 }, { "epoch": 0.5258847020591764, "grad_norm": 0.6619640588760376, "learning_rate": 1.4470985498415174e-05, "loss": 0.0945, "step": 29695 }, { "epoch": 0.5259024115962049, "grad_norm": 0.7304397225379944, "learning_rate": 1.447012566991687e-05, "loss": 0.0892, "step": 29696 }, { "epoch": 0.5259201211332333, "grad_norm": 0.9231600165367126, "learning_rate": 1.4469265843161807e-05, "loss": 0.1023, "step": 29697 }, { "epoch": 0.5259378306702617, "grad_norm": 0.6662645936012268, "learning_rate": 1.4468406018152795e-05, "loss": 0.0902, "step": 29698 }, { "epoch": 0.5259555402072902, "grad_norm": 0.8342877626419067, "learning_rate": 1.4467546194892677e-05, "loss": 0.0995, "step": 29699 }, { "epoch": 0.5259732497443186, "grad_norm": 0.9736042618751526, "learning_rate": 1.4466686373384281e-05, "loss": 0.1121, "step": 29700 }, { "epoch": 0.525990959281347, "grad_norm": 0.49893349409103394, "learning_rate": 1.4465826553630429e-05, "loss": 0.0733, "step": 29701 }, { "epoch": 0.5260086688183754, "grad_norm": 0.43779003620147705, "learning_rate": 1.4464966735633952e-05, "loss": 0.0702, "step": 29702 }, { "epoch": 0.5260263783554039, "grad_norm": 0.4750998616218567, "learning_rate": 1.4464106919397683e-05, "loss": 0.055, "step": 29703 }, { "epoch": 0.5260440878924323, "grad_norm": 0.8213316202163696, "learning_rate": 1.446324710492445e-05, "loss": 0.1089, "step": 29704 }, { "epoch": 0.5260617974294607, "grad_norm": 0.7437693476676941, "learning_rate": 1.4462387292217077e-05, "loss": 0.054, "step": 29705 }, { "epoch": 0.5260795069664891, "grad_norm": 0.7738478779792786, "learning_rate": 1.4461527481278394e-05, "loss": 0.0694, "step": 29706 }, { "epoch": 0.5260972165035176, "grad_norm": 0.7156509757041931, "learning_rate": 1.446066767211124e-05, "loss": 0.0844, "step": 29707 }, { "epoch": 0.526114926040546, "grad_norm": 0.57208651304245, "learning_rate": 1.4459807864718423e-05, "loss": 0.0726, "step": 29708 }, { "epoch": 0.5261326355775744, "grad_norm": 0.7344118356704712, "learning_rate": 1.4458948059102789e-05, "loss": 0.0755, "step": 29709 }, { "epoch": 0.5261503451146028, "grad_norm": 0.8682857155799866, "learning_rate": 1.445808825526716e-05, "loss": 0.0812, "step": 29710 }, { "epoch": 0.5261680546516313, "grad_norm": 0.42732492089271545, "learning_rate": 1.4457228453214371e-05, "loss": 0.0697, "step": 29711 }, { "epoch": 0.5261857641886597, "grad_norm": 0.5489463806152344, "learning_rate": 1.445636865294724e-05, "loss": 0.0685, "step": 29712 }, { "epoch": 0.5262034737256881, "grad_norm": 0.9711822867393494, "learning_rate": 1.4455508854468602e-05, "loss": 0.0791, "step": 29713 }, { "epoch": 0.5262211832627166, "grad_norm": 0.5671663284301758, "learning_rate": 1.4454649057781287e-05, "loss": 0.0966, "step": 29714 }, { "epoch": 0.526238892799745, "grad_norm": 0.8012699484825134, "learning_rate": 1.4453789262888117e-05, "loss": 0.1088, "step": 29715 }, { "epoch": 0.5262566023367734, "grad_norm": 0.39129477739334106, "learning_rate": 1.4452929469791926e-05, "loss": 0.0454, "step": 29716 }, { "epoch": 0.5262743118738018, "grad_norm": 0.5856733918190002, "learning_rate": 1.4452069678495544e-05, "loss": 0.0765, "step": 29717 }, { "epoch": 0.5262920214108303, "grad_norm": 0.5541980266571045, "learning_rate": 1.4451209889001798e-05, "loss": 0.0877, "step": 29718 }, { "epoch": 0.5263097309478587, "grad_norm": 0.6992239952087402, "learning_rate": 1.4450350101313514e-05, "loss": 0.0835, "step": 29719 }, { "epoch": 0.5263274404848871, "grad_norm": 0.7429531812667847, "learning_rate": 1.4449490315433522e-05, "loss": 0.0503, "step": 29720 }, { "epoch": 0.5263451500219155, "grad_norm": 0.8149779438972473, "learning_rate": 1.4448630531364659e-05, "loss": 0.065, "step": 29721 }, { "epoch": 0.526362859558944, "grad_norm": 0.7654021382331848, "learning_rate": 1.4447770749109737e-05, "loss": 0.0813, "step": 29722 }, { "epoch": 0.5263805690959724, "grad_norm": 0.7579575181007385, "learning_rate": 1.4446910968671591e-05, "loss": 0.0891, "step": 29723 }, { "epoch": 0.5263982786330008, "grad_norm": 0.6468884348869324, "learning_rate": 1.4446051190053055e-05, "loss": 0.0737, "step": 29724 }, { "epoch": 0.5264159881700292, "grad_norm": 0.7310354113578796, "learning_rate": 1.444519141325696e-05, "loss": 0.0774, "step": 29725 }, { "epoch": 0.5264336977070577, "grad_norm": 0.3939589262008667, "learning_rate": 1.4444331638286126e-05, "loss": 0.0419, "step": 29726 }, { "epoch": 0.5264514072440861, "grad_norm": 0.7651492357254028, "learning_rate": 1.4443471865143383e-05, "loss": 0.0678, "step": 29727 }, { "epoch": 0.5264691167811145, "grad_norm": 0.4882908761501312, "learning_rate": 1.444261209383156e-05, "loss": 0.0804, "step": 29728 }, { "epoch": 0.526486826318143, "grad_norm": 1.0288091897964478, "learning_rate": 1.4441752324353491e-05, "loss": 0.0943, "step": 29729 }, { "epoch": 0.5265045358551714, "grad_norm": 0.3509948253631592, "learning_rate": 1.4440892556711999e-05, "loss": 0.0522, "step": 29730 }, { "epoch": 0.5265222453921998, "grad_norm": 0.4449903666973114, "learning_rate": 1.4440032790909911e-05, "loss": 0.0733, "step": 29731 }, { "epoch": 0.5265399549292282, "grad_norm": 0.8636770844459534, "learning_rate": 1.4439173026950063e-05, "loss": 0.0691, "step": 29732 }, { "epoch": 0.5265576644662567, "grad_norm": 0.48803940415382385, "learning_rate": 1.4438313264835275e-05, "loss": 0.0773, "step": 29733 }, { "epoch": 0.5265753740032851, "grad_norm": 0.5536119937896729, "learning_rate": 1.443745350456838e-05, "loss": 0.0429, "step": 29734 }, { "epoch": 0.5265930835403135, "grad_norm": 0.5804683566093445, "learning_rate": 1.4436593746152208e-05, "loss": 0.0788, "step": 29735 }, { "epoch": 0.5266107930773419, "grad_norm": 0.7871878147125244, "learning_rate": 1.443573398958959e-05, "loss": 0.0676, "step": 29736 }, { "epoch": 0.5266285026143704, "grad_norm": 0.7305380702018738, "learning_rate": 1.4434874234883341e-05, "loss": 0.0623, "step": 29737 }, { "epoch": 0.5266462121513988, "grad_norm": 0.8246363997459412, "learning_rate": 1.44340144820363e-05, "loss": 0.0799, "step": 29738 }, { "epoch": 0.5266639216884272, "grad_norm": 0.2020639032125473, "learning_rate": 1.4433154731051304e-05, "loss": 0.0546, "step": 29739 }, { "epoch": 0.5266816312254556, "grad_norm": 0.3757997751235962, "learning_rate": 1.4432294981931163e-05, "loss": 0.0472, "step": 29740 }, { "epoch": 0.5266993407624841, "grad_norm": 0.7773415446281433, "learning_rate": 1.4431435234678716e-05, "loss": 0.0705, "step": 29741 }, { "epoch": 0.5267170502995125, "grad_norm": 0.7602940201759338, "learning_rate": 1.4430575489296787e-05, "loss": 0.0785, "step": 29742 }, { "epoch": 0.5267347598365409, "grad_norm": 0.5929509997367859, "learning_rate": 1.4429715745788212e-05, "loss": 0.0749, "step": 29743 }, { "epoch": 0.5267524693735695, "grad_norm": 0.4061259925365448, "learning_rate": 1.442885600415581e-05, "loss": 0.0921, "step": 29744 }, { "epoch": 0.5267701789105979, "grad_norm": 0.4444805979728699, "learning_rate": 1.4427996264402414e-05, "loss": 0.0567, "step": 29745 }, { "epoch": 0.5267878884476263, "grad_norm": 0.651482105255127, "learning_rate": 1.4427136526530855e-05, "loss": 0.07, "step": 29746 }, { "epoch": 0.5268055979846546, "grad_norm": 0.3882080316543579, "learning_rate": 1.4426276790543958e-05, "loss": 0.025, "step": 29747 }, { "epoch": 0.5268233075216832, "grad_norm": 0.42858609557151794, "learning_rate": 1.4425417056444552e-05, "loss": 0.067, "step": 29748 }, { "epoch": 0.5268410170587116, "grad_norm": 0.6387331485748291, "learning_rate": 1.4424557324235464e-05, "loss": 0.0819, "step": 29749 }, { "epoch": 0.52685872659574, "grad_norm": 0.5126243829727173, "learning_rate": 1.4423697593919532e-05, "loss": 0.0699, "step": 29750 }, { "epoch": 0.5268764361327684, "grad_norm": 0.559414803981781, "learning_rate": 1.442283786549957e-05, "loss": 0.0619, "step": 29751 }, { "epoch": 0.5268941456697969, "grad_norm": 0.4094139337539673, "learning_rate": 1.442197813897841e-05, "loss": 0.066, "step": 29752 }, { "epoch": 0.5269118552068253, "grad_norm": 0.430309534072876, "learning_rate": 1.4421118414358893e-05, "loss": 0.0327, "step": 29753 }, { "epoch": 0.5269295647438537, "grad_norm": 0.8807911276817322, "learning_rate": 1.4420258691643831e-05, "loss": 0.1366, "step": 29754 }, { "epoch": 0.5269472742808821, "grad_norm": 0.7006994485855103, "learning_rate": 1.441939897083606e-05, "loss": 0.0397, "step": 29755 }, { "epoch": 0.5269649838179106, "grad_norm": 1.0195459127426147, "learning_rate": 1.4418539251938406e-05, "loss": 0.1012, "step": 29756 }, { "epoch": 0.526982693354939, "grad_norm": 0.6258520483970642, "learning_rate": 1.4417679534953704e-05, "loss": 0.0518, "step": 29757 }, { "epoch": 0.5270004028919674, "grad_norm": 0.6707533001899719, "learning_rate": 1.4416819819884773e-05, "loss": 0.0723, "step": 29758 }, { "epoch": 0.5270181124289959, "grad_norm": 0.7237003445625305, "learning_rate": 1.4415960106734447e-05, "loss": 0.0775, "step": 29759 }, { "epoch": 0.5270358219660243, "grad_norm": 1.028058648109436, "learning_rate": 1.4415100395505552e-05, "loss": 0.1014, "step": 29760 }, { "epoch": 0.5270535315030527, "grad_norm": 0.6273210048675537, "learning_rate": 1.4414240686200921e-05, "loss": 0.0617, "step": 29761 }, { "epoch": 0.5270712410400811, "grad_norm": 0.6940124034881592, "learning_rate": 1.4413380978823378e-05, "loss": 0.0558, "step": 29762 }, { "epoch": 0.5270889505771096, "grad_norm": 0.6381261348724365, "learning_rate": 1.4412521273375749e-05, "loss": 0.0686, "step": 29763 }, { "epoch": 0.527106660114138, "grad_norm": 0.592266857624054, "learning_rate": 1.4411661569860875e-05, "loss": 0.0302, "step": 29764 }, { "epoch": 0.5271243696511664, "grad_norm": 0.8076146245002747, "learning_rate": 1.4410801868281565e-05, "loss": 0.0775, "step": 29765 }, { "epoch": 0.5271420791881948, "grad_norm": 0.570510983467102, "learning_rate": 1.4409942168640655e-05, "loss": 0.063, "step": 29766 }, { "epoch": 0.5271597887252233, "grad_norm": 1.1350337266921997, "learning_rate": 1.440908247094098e-05, "loss": 0.111, "step": 29767 }, { "epoch": 0.5271774982622517, "grad_norm": 0.9220561385154724, "learning_rate": 1.4408222775185372e-05, "loss": 0.0649, "step": 29768 }, { "epoch": 0.5271952077992801, "grad_norm": 0.5432900190353394, "learning_rate": 1.4407363081376641e-05, "loss": 0.0426, "step": 29769 }, { "epoch": 0.5272129173363085, "grad_norm": 0.709252655506134, "learning_rate": 1.4406503389517628e-05, "loss": 0.0731, "step": 29770 }, { "epoch": 0.527230626873337, "grad_norm": 0.6737270355224609, "learning_rate": 1.4405643699611161e-05, "loss": 0.0687, "step": 29771 }, { "epoch": 0.5272483364103654, "grad_norm": 0.9639564752578735, "learning_rate": 1.4404784011660063e-05, "loss": 0.0765, "step": 29772 }, { "epoch": 0.5272660459473938, "grad_norm": 0.5788571238517761, "learning_rate": 1.4403924325667167e-05, "loss": 0.0632, "step": 29773 }, { "epoch": 0.5272837554844223, "grad_norm": 0.4413244426250458, "learning_rate": 1.4403064641635297e-05, "loss": 0.0568, "step": 29774 }, { "epoch": 0.5273014650214507, "grad_norm": 0.4060840308666229, "learning_rate": 1.4402204959567288e-05, "loss": 0.0414, "step": 29775 }, { "epoch": 0.5273191745584791, "grad_norm": 0.7440985441207886, "learning_rate": 1.4401345279465961e-05, "loss": 0.0897, "step": 29776 }, { "epoch": 0.5273368840955075, "grad_norm": 0.6635017395019531, "learning_rate": 1.4400485601334149e-05, "loss": 0.0707, "step": 29777 }, { "epoch": 0.527354593632536, "grad_norm": 0.5827146172523499, "learning_rate": 1.4399625925174685e-05, "loss": 0.0787, "step": 29778 }, { "epoch": 0.5273723031695644, "grad_norm": 0.42731475830078125, "learning_rate": 1.4398766250990383e-05, "loss": 0.0843, "step": 29779 }, { "epoch": 0.5273900127065928, "grad_norm": 0.9655769467353821, "learning_rate": 1.439790657878408e-05, "loss": 0.0603, "step": 29780 }, { "epoch": 0.5274077222436212, "grad_norm": 0.6026448607444763, "learning_rate": 1.4397046908558599e-05, "loss": 0.0638, "step": 29781 }, { "epoch": 0.5274254317806497, "grad_norm": 0.798809826374054, "learning_rate": 1.4396187240316785e-05, "loss": 0.0675, "step": 29782 }, { "epoch": 0.5274431413176781, "grad_norm": 1.0691999197006226, "learning_rate": 1.4395327574061445e-05, "loss": 0.1063, "step": 29783 }, { "epoch": 0.5274608508547065, "grad_norm": 0.7141544222831726, "learning_rate": 1.4394467909795418e-05, "loss": 0.0892, "step": 29784 }, { "epoch": 0.5274785603917349, "grad_norm": 0.5995586514472961, "learning_rate": 1.4393608247521532e-05, "loss": 0.0871, "step": 29785 }, { "epoch": 0.5274962699287634, "grad_norm": 0.5942378044128418, "learning_rate": 1.4392748587242611e-05, "loss": 0.0424, "step": 29786 }, { "epoch": 0.5275139794657918, "grad_norm": 0.3687332272529602, "learning_rate": 1.4391888928961486e-05, "loss": 0.0806, "step": 29787 }, { "epoch": 0.5275316890028202, "grad_norm": 0.4778652489185333, "learning_rate": 1.4391029272680983e-05, "loss": 0.0892, "step": 29788 }, { "epoch": 0.5275493985398487, "grad_norm": 0.7737455368041992, "learning_rate": 1.4390169618403939e-05, "loss": 0.0791, "step": 29789 }, { "epoch": 0.5275671080768771, "grad_norm": 0.7118037343025208, "learning_rate": 1.4389309966133168e-05, "loss": 0.0489, "step": 29790 }, { "epoch": 0.5275848176139055, "grad_norm": 0.7055181264877319, "learning_rate": 1.4388450315871508e-05, "loss": 0.0774, "step": 29791 }, { "epoch": 0.5276025271509339, "grad_norm": 0.5908320546150208, "learning_rate": 1.438759066762179e-05, "loss": 0.0498, "step": 29792 }, { "epoch": 0.5276202366879624, "grad_norm": 0.7474480867385864, "learning_rate": 1.4386731021386832e-05, "loss": 0.1063, "step": 29793 }, { "epoch": 0.5276379462249908, "grad_norm": 0.46482932567596436, "learning_rate": 1.4385871377169465e-05, "loss": 0.0683, "step": 29794 }, { "epoch": 0.5276556557620192, "grad_norm": 0.6075072884559631, "learning_rate": 1.4385011734972515e-05, "loss": 0.0562, "step": 29795 }, { "epoch": 0.5276733652990476, "grad_norm": 0.6574355363845825, "learning_rate": 1.4384152094798827e-05, "loss": 0.0761, "step": 29796 }, { "epoch": 0.5276910748360761, "grad_norm": 0.9060544371604919, "learning_rate": 1.4383292456651208e-05, "loss": 0.0768, "step": 29797 }, { "epoch": 0.5277087843731045, "grad_norm": 0.5466088056564331, "learning_rate": 1.4382432820532494e-05, "loss": 0.1062, "step": 29798 }, { "epoch": 0.5277264939101329, "grad_norm": 0.458465576171875, "learning_rate": 1.4381573186445514e-05, "loss": 0.0645, "step": 29799 }, { "epoch": 0.5277442034471613, "grad_norm": 0.4668075442314148, "learning_rate": 1.43807135543931e-05, "loss": 0.0656, "step": 29800 }, { "epoch": 0.5277619129841898, "grad_norm": 0.32296326756477356, "learning_rate": 1.4379853924378073e-05, "loss": 0.0715, "step": 29801 }, { "epoch": 0.5277796225212182, "grad_norm": 0.4999653995037079, "learning_rate": 1.437899429640326e-05, "loss": 0.0724, "step": 29802 }, { "epoch": 0.5277973320582466, "grad_norm": 1.147457480430603, "learning_rate": 1.4378134670471503e-05, "loss": 0.092, "step": 29803 }, { "epoch": 0.5278150415952751, "grad_norm": 0.6771450638771057, "learning_rate": 1.437727504658561e-05, "loss": 0.0967, "step": 29804 }, { "epoch": 0.5278327511323035, "grad_norm": 0.5706110596656799, "learning_rate": 1.437641542474842e-05, "loss": 0.0492, "step": 29805 }, { "epoch": 0.527850460669332, "grad_norm": 0.6093305945396423, "learning_rate": 1.4375555804962762e-05, "loss": 0.0617, "step": 29806 }, { "epoch": 0.5278681702063603, "grad_norm": 0.4352802634239197, "learning_rate": 1.437469618723147e-05, "loss": 0.0474, "step": 29807 }, { "epoch": 0.5278858797433889, "grad_norm": 0.7119497656822205, "learning_rate": 1.4373836571557356e-05, "loss": 0.0599, "step": 29808 }, { "epoch": 0.5279035892804173, "grad_norm": 0.8036937713623047, "learning_rate": 1.4372976957943257e-05, "loss": 0.043, "step": 29809 }, { "epoch": 0.5279212988174457, "grad_norm": 0.6440648436546326, "learning_rate": 1.4372117346392003e-05, "loss": 0.0693, "step": 29810 }, { "epoch": 0.527939008354474, "grad_norm": 0.5958718657493591, "learning_rate": 1.4371257736906416e-05, "loss": 0.0771, "step": 29811 }, { "epoch": 0.5279567178915026, "grad_norm": 0.2997390329837799, "learning_rate": 1.4370398129489328e-05, "loss": 0.0781, "step": 29812 }, { "epoch": 0.527974427428531, "grad_norm": 0.6977068781852722, "learning_rate": 1.4369538524143567e-05, "loss": 0.075, "step": 29813 }, { "epoch": 0.5279921369655594, "grad_norm": 0.6153275370597839, "learning_rate": 1.4368678920871962e-05, "loss": 0.0625, "step": 29814 }, { "epoch": 0.5280098465025878, "grad_norm": 0.5492392182350159, "learning_rate": 1.4367819319677338e-05, "loss": 0.0544, "step": 29815 }, { "epoch": 0.5280275560396163, "grad_norm": 0.5248259902000427, "learning_rate": 1.4366959720562523e-05, "loss": 0.0621, "step": 29816 }, { "epoch": 0.5280452655766447, "grad_norm": 0.7417497634887695, "learning_rate": 1.4366100123530355e-05, "loss": 0.0632, "step": 29817 }, { "epoch": 0.5280629751136731, "grad_norm": 0.533194899559021, "learning_rate": 1.4365240528583642e-05, "loss": 0.057, "step": 29818 }, { "epoch": 0.5280806846507016, "grad_norm": 0.612960696220398, "learning_rate": 1.4364380935725228e-05, "loss": 0.0572, "step": 29819 }, { "epoch": 0.52809839418773, "grad_norm": 0.43763870000839233, "learning_rate": 1.4363521344957935e-05, "loss": 0.0559, "step": 29820 }, { "epoch": 0.5281161037247584, "grad_norm": 0.2931637167930603, "learning_rate": 1.4362661756284601e-05, "loss": 0.0486, "step": 29821 }, { "epoch": 0.5281338132617868, "grad_norm": 0.4536614418029785, "learning_rate": 1.4361802169708036e-05, "loss": 0.0501, "step": 29822 }, { "epoch": 0.5281515227988153, "grad_norm": 0.8756511211395264, "learning_rate": 1.436094258523108e-05, "loss": 0.0535, "step": 29823 }, { "epoch": 0.5281692323358437, "grad_norm": 0.6861663460731506, "learning_rate": 1.436008300285656e-05, "loss": 0.0581, "step": 29824 }, { "epoch": 0.5281869418728721, "grad_norm": 0.5602846741676331, "learning_rate": 1.4359223422587298e-05, "loss": 0.0658, "step": 29825 }, { "epoch": 0.5282046514099005, "grad_norm": 1.1056936979293823, "learning_rate": 1.4358363844426129e-05, "loss": 0.0942, "step": 29826 }, { "epoch": 0.528222360946929, "grad_norm": 0.9076744914054871, "learning_rate": 1.4357504268375876e-05, "loss": 0.0612, "step": 29827 }, { "epoch": 0.5282400704839574, "grad_norm": 1.451583981513977, "learning_rate": 1.4356644694439373e-05, "loss": 0.0742, "step": 29828 }, { "epoch": 0.5282577800209858, "grad_norm": 0.7900699973106384, "learning_rate": 1.4355785122619438e-05, "loss": 0.0709, "step": 29829 }, { "epoch": 0.5282754895580142, "grad_norm": 0.7081095576286316, "learning_rate": 1.4354925552918909e-05, "loss": 0.0869, "step": 29830 }, { "epoch": 0.5282931990950427, "grad_norm": 0.6227011680603027, "learning_rate": 1.4354065985340608e-05, "loss": 0.0832, "step": 29831 }, { "epoch": 0.5283109086320711, "grad_norm": 0.4793124794960022, "learning_rate": 1.4353206419887371e-05, "loss": 0.0647, "step": 29832 }, { "epoch": 0.5283286181690995, "grad_norm": 0.4814970791339874, "learning_rate": 1.435234685656201e-05, "loss": 0.0503, "step": 29833 }, { "epoch": 0.528346327706128, "grad_norm": 0.7340204119682312, "learning_rate": 1.4351487295367364e-05, "loss": 0.0754, "step": 29834 }, { "epoch": 0.5283640372431564, "grad_norm": 0.4755081534385681, "learning_rate": 1.4350627736306268e-05, "loss": 0.0431, "step": 29835 }, { "epoch": 0.5283817467801848, "grad_norm": 0.35565385222435, "learning_rate": 1.4349768179381533e-05, "loss": 0.032, "step": 29836 }, { "epoch": 0.5283994563172132, "grad_norm": 0.8278311491012573, "learning_rate": 1.4348908624595996e-05, "loss": 0.0757, "step": 29837 }, { "epoch": 0.5284171658542417, "grad_norm": 0.6263828873634338, "learning_rate": 1.4348049071952482e-05, "loss": 0.0635, "step": 29838 }, { "epoch": 0.5284348753912701, "grad_norm": 0.5158915519714355, "learning_rate": 1.4347189521453825e-05, "loss": 0.066, "step": 29839 }, { "epoch": 0.5284525849282985, "grad_norm": 0.5251286625862122, "learning_rate": 1.4346329973102845e-05, "loss": 0.0598, "step": 29840 }, { "epoch": 0.5284702944653269, "grad_norm": 0.7304735779762268, "learning_rate": 1.4345470426902374e-05, "loss": 0.0763, "step": 29841 }, { "epoch": 0.5284880040023554, "grad_norm": 0.7480485439300537, "learning_rate": 1.4344610882855242e-05, "loss": 0.0719, "step": 29842 }, { "epoch": 0.5285057135393838, "grad_norm": 0.672831118106842, "learning_rate": 1.434375134096427e-05, "loss": 0.0559, "step": 29843 }, { "epoch": 0.5285234230764122, "grad_norm": 0.7935728430747986, "learning_rate": 1.434289180123229e-05, "loss": 0.0852, "step": 29844 }, { "epoch": 0.5285411326134406, "grad_norm": 0.26640573143959045, "learning_rate": 1.434203226366213e-05, "loss": 0.042, "step": 29845 }, { "epoch": 0.5285588421504691, "grad_norm": 0.48986244201660156, "learning_rate": 1.4341172728256625e-05, "loss": 0.074, "step": 29846 }, { "epoch": 0.5285765516874975, "grad_norm": 0.8896265029907227, "learning_rate": 1.4340313195018583e-05, "loss": 0.1038, "step": 29847 }, { "epoch": 0.5285942612245259, "grad_norm": 0.9417197704315186, "learning_rate": 1.4339453663950847e-05, "loss": 0.0795, "step": 29848 }, { "epoch": 0.5286119707615544, "grad_norm": 0.746684193611145, "learning_rate": 1.433859413505625e-05, "loss": 0.0768, "step": 29849 }, { "epoch": 0.5286296802985828, "grad_norm": 0.9857904314994812, "learning_rate": 1.4337734608337604e-05, "loss": 0.0743, "step": 29850 }, { "epoch": 0.5286473898356112, "grad_norm": 0.7608555555343628, "learning_rate": 1.4336875083797744e-05, "loss": 0.0698, "step": 29851 }, { "epoch": 0.5286650993726396, "grad_norm": 0.36525648832321167, "learning_rate": 1.4336015561439498e-05, "loss": 0.0458, "step": 29852 }, { "epoch": 0.5286828089096681, "grad_norm": 0.7509030103683472, "learning_rate": 1.4335156041265696e-05, "loss": 0.0736, "step": 29853 }, { "epoch": 0.5287005184466965, "grad_norm": 0.3902280032634735, "learning_rate": 1.433429652327916e-05, "loss": 0.0636, "step": 29854 }, { "epoch": 0.5287182279837249, "grad_norm": 0.4589043855667114, "learning_rate": 1.4333437007482723e-05, "loss": 0.0648, "step": 29855 }, { "epoch": 0.5287359375207533, "grad_norm": 0.737433671951294, "learning_rate": 1.4332577493879214e-05, "loss": 0.0584, "step": 29856 }, { "epoch": 0.5287536470577818, "grad_norm": 0.7177714705467224, "learning_rate": 1.4331717982471454e-05, "loss": 0.0907, "step": 29857 }, { "epoch": 0.5287713565948102, "grad_norm": 0.8458083271980286, "learning_rate": 1.4330858473262273e-05, "loss": 0.0546, "step": 29858 }, { "epoch": 0.5287890661318386, "grad_norm": 0.6484156847000122, "learning_rate": 1.43299989662545e-05, "loss": 0.0627, "step": 29859 }, { "epoch": 0.528806775668867, "grad_norm": 0.47623589634895325, "learning_rate": 1.4329139461450971e-05, "loss": 0.0771, "step": 29860 }, { "epoch": 0.5288244852058955, "grad_norm": 0.6522610187530518, "learning_rate": 1.4328279958854497e-05, "loss": 0.1115, "step": 29861 }, { "epoch": 0.5288421947429239, "grad_norm": 0.4575101137161255, "learning_rate": 1.4327420458467912e-05, "loss": 0.0668, "step": 29862 }, { "epoch": 0.5288599042799523, "grad_norm": 0.9228606224060059, "learning_rate": 1.4326560960294048e-05, "loss": 0.1058, "step": 29863 }, { "epoch": 0.5288776138169808, "grad_norm": 0.5983853340148926, "learning_rate": 1.4325701464335737e-05, "loss": 0.0506, "step": 29864 }, { "epoch": 0.5288953233540092, "grad_norm": 0.689714252948761, "learning_rate": 1.4324841970595794e-05, "loss": 0.0569, "step": 29865 }, { "epoch": 0.5289130328910376, "grad_norm": 0.6683095097541809, "learning_rate": 1.4323982479077052e-05, "loss": 0.0836, "step": 29866 }, { "epoch": 0.528930742428066, "grad_norm": 0.4984703063964844, "learning_rate": 1.4323122989782343e-05, "loss": 0.0576, "step": 29867 }, { "epoch": 0.5289484519650945, "grad_norm": 0.7579972743988037, "learning_rate": 1.4322263502714487e-05, "loss": 0.0473, "step": 29868 }, { "epoch": 0.528966161502123, "grad_norm": 0.4867815673351288, "learning_rate": 1.4321404017876316e-05, "loss": 0.0801, "step": 29869 }, { "epoch": 0.5289838710391513, "grad_norm": 0.6301957368850708, "learning_rate": 1.4320544535270656e-05, "loss": 0.0433, "step": 29870 }, { "epoch": 0.5290015805761797, "grad_norm": 0.32383930683135986, "learning_rate": 1.4319685054900342e-05, "loss": 0.0383, "step": 29871 }, { "epoch": 0.5290192901132083, "grad_norm": 0.5338340401649475, "learning_rate": 1.4318825576768189e-05, "loss": 0.0376, "step": 29872 }, { "epoch": 0.5290369996502367, "grad_norm": 0.43255698680877686, "learning_rate": 1.4317966100877034e-05, "loss": 0.0692, "step": 29873 }, { "epoch": 0.529054709187265, "grad_norm": 0.7091071605682373, "learning_rate": 1.4317106627229705e-05, "loss": 0.1284, "step": 29874 }, { "epoch": 0.5290724187242934, "grad_norm": 0.8021879196166992, "learning_rate": 1.4316247155829023e-05, "loss": 0.096, "step": 29875 }, { "epoch": 0.529090128261322, "grad_norm": 0.7740535736083984, "learning_rate": 1.431538768667781e-05, "loss": 0.0679, "step": 29876 }, { "epoch": 0.5291078377983504, "grad_norm": 0.6435506343841553, "learning_rate": 1.431452821977891e-05, "loss": 0.0738, "step": 29877 }, { "epoch": 0.5291255473353788, "grad_norm": 0.8605541586875916, "learning_rate": 1.4313668755135148e-05, "loss": 0.1097, "step": 29878 }, { "epoch": 0.5291432568724073, "grad_norm": 0.8347082138061523, "learning_rate": 1.431280929274934e-05, "loss": 0.0826, "step": 29879 }, { "epoch": 0.5291609664094357, "grad_norm": 0.36685872077941895, "learning_rate": 1.431194983262432e-05, "loss": 0.052, "step": 29880 }, { "epoch": 0.5291786759464641, "grad_norm": 0.5548364520072937, "learning_rate": 1.431109037476292e-05, "loss": 0.0686, "step": 29881 }, { "epoch": 0.5291963854834925, "grad_norm": 0.44938400387763977, "learning_rate": 1.4310230919167956e-05, "loss": 0.0416, "step": 29882 }, { "epoch": 0.529214095020521, "grad_norm": 0.8569482564926147, "learning_rate": 1.4309371465842266e-05, "loss": 0.073, "step": 29883 }, { "epoch": 0.5292318045575494, "grad_norm": 0.5925045609474182, "learning_rate": 1.4308512014788673e-05, "loss": 0.0835, "step": 29884 }, { "epoch": 0.5292495140945778, "grad_norm": 0.8265342116355896, "learning_rate": 1.4307652566010008e-05, "loss": 0.1001, "step": 29885 }, { "epoch": 0.5292672236316062, "grad_norm": 0.5638101100921631, "learning_rate": 1.4306793119509096e-05, "loss": 0.0677, "step": 29886 }, { "epoch": 0.5292849331686347, "grad_norm": 0.43511316180229187, "learning_rate": 1.430593367528876e-05, "loss": 0.0589, "step": 29887 }, { "epoch": 0.5293026427056631, "grad_norm": 0.8066469430923462, "learning_rate": 1.4305074233351841e-05, "loss": 0.1236, "step": 29888 }, { "epoch": 0.5293203522426915, "grad_norm": 0.34459245204925537, "learning_rate": 1.430421479370115e-05, "loss": 0.0698, "step": 29889 }, { "epoch": 0.5293380617797199, "grad_norm": 0.8624998331069946, "learning_rate": 1.4303355356339523e-05, "loss": 0.0696, "step": 29890 }, { "epoch": 0.5293557713167484, "grad_norm": 0.693353533744812, "learning_rate": 1.4302495921269781e-05, "loss": 0.0562, "step": 29891 }, { "epoch": 0.5293734808537768, "grad_norm": 0.8221900463104248, "learning_rate": 1.430163648849477e-05, "loss": 0.0776, "step": 29892 }, { "epoch": 0.5293911903908052, "grad_norm": 0.46397653222084045, "learning_rate": 1.4300777058017296e-05, "loss": 0.0776, "step": 29893 }, { "epoch": 0.5294088999278337, "grad_norm": 0.33773013949394226, "learning_rate": 1.4299917629840194e-05, "loss": 0.0716, "step": 29894 }, { "epoch": 0.5294266094648621, "grad_norm": 0.6025999784469604, "learning_rate": 1.4299058203966298e-05, "loss": 0.0751, "step": 29895 }, { "epoch": 0.5294443190018905, "grad_norm": 0.47417548298835754, "learning_rate": 1.4298198780398425e-05, "loss": 0.0526, "step": 29896 }, { "epoch": 0.5294620285389189, "grad_norm": 0.7018489837646484, "learning_rate": 1.4297339359139406e-05, "loss": 0.0772, "step": 29897 }, { "epoch": 0.5294797380759474, "grad_norm": 0.46195727586746216, "learning_rate": 1.429647994019207e-05, "loss": 0.0549, "step": 29898 }, { "epoch": 0.5294974476129758, "grad_norm": 0.6775549054145813, "learning_rate": 1.4295620523559246e-05, "loss": 0.0956, "step": 29899 }, { "epoch": 0.5295151571500042, "grad_norm": 0.6148697733879089, "learning_rate": 1.4294761109243758e-05, "loss": 0.0928, "step": 29900 }, { "epoch": 0.5295328666870326, "grad_norm": 0.5569566488265991, "learning_rate": 1.4293901697248433e-05, "loss": 0.0656, "step": 29901 }, { "epoch": 0.5295505762240611, "grad_norm": 0.47170186042785645, "learning_rate": 1.42930422875761e-05, "loss": 0.0583, "step": 29902 }, { "epoch": 0.5295682857610895, "grad_norm": 0.5694579482078552, "learning_rate": 1.4292182880229596e-05, "loss": 0.0629, "step": 29903 }, { "epoch": 0.5295859952981179, "grad_norm": 0.5658285617828369, "learning_rate": 1.4291323475211729e-05, "loss": 0.0782, "step": 29904 }, { "epoch": 0.5296037048351463, "grad_norm": 0.6754800081253052, "learning_rate": 1.4290464072525334e-05, "loss": 0.0853, "step": 29905 }, { "epoch": 0.5296214143721748, "grad_norm": 1.2523095607757568, "learning_rate": 1.428960467217325e-05, "loss": 0.0547, "step": 29906 }, { "epoch": 0.5296391239092032, "grad_norm": 0.6827576160430908, "learning_rate": 1.4288745274158287e-05, "loss": 0.0793, "step": 29907 }, { "epoch": 0.5296568334462316, "grad_norm": 0.5969982147216797, "learning_rate": 1.4287885878483282e-05, "loss": 0.0612, "step": 29908 }, { "epoch": 0.5296745429832601, "grad_norm": 0.8907191157341003, "learning_rate": 1.428702648515106e-05, "loss": 0.0931, "step": 29909 }, { "epoch": 0.5296922525202885, "grad_norm": 0.6326898336410522, "learning_rate": 1.4286167094164451e-05, "loss": 0.0443, "step": 29910 }, { "epoch": 0.5297099620573169, "grad_norm": 0.8492076396942139, "learning_rate": 1.4285307705526277e-05, "loss": 0.0643, "step": 29911 }, { "epoch": 0.5297276715943453, "grad_norm": 0.5126123428344727, "learning_rate": 1.428444831923937e-05, "loss": 0.0894, "step": 29912 }, { "epoch": 0.5297453811313738, "grad_norm": 0.5114376544952393, "learning_rate": 1.4283588935306557e-05, "loss": 0.0582, "step": 29913 }, { "epoch": 0.5297630906684022, "grad_norm": 0.5047821402549744, "learning_rate": 1.4282729553730661e-05, "loss": 0.0355, "step": 29914 }, { "epoch": 0.5297808002054306, "grad_norm": 0.6608496308326721, "learning_rate": 1.4281870174514514e-05, "loss": 0.0708, "step": 29915 }, { "epoch": 0.529798509742459, "grad_norm": 1.0828006267547607, "learning_rate": 1.428101079766094e-05, "loss": 0.0689, "step": 29916 }, { "epoch": 0.5298162192794875, "grad_norm": 0.4819607734680176, "learning_rate": 1.4280151423172773e-05, "loss": 0.105, "step": 29917 }, { "epoch": 0.5298339288165159, "grad_norm": 0.506985068321228, "learning_rate": 1.4279292051052831e-05, "loss": 0.1037, "step": 29918 }, { "epoch": 0.5298516383535443, "grad_norm": 0.6317769289016724, "learning_rate": 1.4278432681303944e-05, "loss": 0.0592, "step": 29919 }, { "epoch": 0.5298693478905727, "grad_norm": 0.6767680048942566, "learning_rate": 1.4277573313928945e-05, "loss": 0.0833, "step": 29920 }, { "epoch": 0.5298870574276012, "grad_norm": 0.8296738266944885, "learning_rate": 1.4276713948930652e-05, "loss": 0.0754, "step": 29921 }, { "epoch": 0.5299047669646296, "grad_norm": 0.6531362533569336, "learning_rate": 1.4275854586311897e-05, "loss": 0.0777, "step": 29922 }, { "epoch": 0.529922476501658, "grad_norm": 0.9749430418014526, "learning_rate": 1.4274995226075509e-05, "loss": 0.0857, "step": 29923 }, { "epoch": 0.5299401860386865, "grad_norm": 0.45428356528282166, "learning_rate": 1.4274135868224316e-05, "loss": 0.049, "step": 29924 }, { "epoch": 0.5299578955757149, "grad_norm": 1.0662139654159546, "learning_rate": 1.4273276512761138e-05, "loss": 0.0849, "step": 29925 }, { "epoch": 0.5299756051127433, "grad_norm": 1.4373854398727417, "learning_rate": 1.4272417159688806e-05, "loss": 0.0727, "step": 29926 }, { "epoch": 0.5299933146497717, "grad_norm": 0.6892010569572449, "learning_rate": 1.4271557809010156e-05, "loss": 0.0735, "step": 29927 }, { "epoch": 0.5300110241868002, "grad_norm": 0.7707332372665405, "learning_rate": 1.4270698460727998e-05, "loss": 0.0749, "step": 29928 }, { "epoch": 0.5300287337238286, "grad_norm": 0.6347399950027466, "learning_rate": 1.4269839114845169e-05, "loss": 0.0725, "step": 29929 }, { "epoch": 0.530046443260857, "grad_norm": 0.8816730380058289, "learning_rate": 1.4268979771364498e-05, "loss": 0.0852, "step": 29930 }, { "epoch": 0.5300641527978854, "grad_norm": 0.6139553785324097, "learning_rate": 1.4268120430288815e-05, "loss": 0.0664, "step": 29931 }, { "epoch": 0.530081862334914, "grad_norm": 0.41601094603538513, "learning_rate": 1.4267261091620936e-05, "loss": 0.0643, "step": 29932 }, { "epoch": 0.5300995718719423, "grad_norm": 0.6670428514480591, "learning_rate": 1.4266401755363693e-05, "loss": 0.0401, "step": 29933 }, { "epoch": 0.5301172814089707, "grad_norm": 0.2992514669895172, "learning_rate": 1.4265542421519911e-05, "loss": 0.0593, "step": 29934 }, { "epoch": 0.5301349909459991, "grad_norm": 0.7750360369682312, "learning_rate": 1.4264683090092431e-05, "loss": 0.0815, "step": 29935 }, { "epoch": 0.5301527004830277, "grad_norm": 0.5411127805709839, "learning_rate": 1.426382376108406e-05, "loss": 0.0649, "step": 29936 }, { "epoch": 0.530170410020056, "grad_norm": 0.747543454170227, "learning_rate": 1.4262964434497636e-05, "loss": 0.0803, "step": 29937 }, { "epoch": 0.5301881195570844, "grad_norm": 1.2688422203063965, "learning_rate": 1.426210511033599e-05, "loss": 0.0925, "step": 29938 }, { "epoch": 0.530205829094113, "grad_norm": 0.8528472185134888, "learning_rate": 1.4261245788601938e-05, "loss": 0.0779, "step": 29939 }, { "epoch": 0.5302235386311414, "grad_norm": 0.7049176692962646, "learning_rate": 1.4260386469298313e-05, "loss": 0.068, "step": 29940 }, { "epoch": 0.5302412481681698, "grad_norm": 0.7723342776298523, "learning_rate": 1.4259527152427941e-05, "loss": 0.0425, "step": 29941 }, { "epoch": 0.5302589577051982, "grad_norm": 0.6657282710075378, "learning_rate": 1.4258667837993653e-05, "loss": 0.0738, "step": 29942 }, { "epoch": 0.5302766672422267, "grad_norm": 0.5666097402572632, "learning_rate": 1.4257808525998271e-05, "loss": 0.0674, "step": 29943 }, { "epoch": 0.5302943767792551, "grad_norm": 0.5732466578483582, "learning_rate": 1.4256949216444622e-05, "loss": 0.0506, "step": 29944 }, { "epoch": 0.5303120863162835, "grad_norm": 0.46618160605430603, "learning_rate": 1.4256089909335545e-05, "loss": 0.0589, "step": 29945 }, { "epoch": 0.5303297958533119, "grad_norm": 0.38901686668395996, "learning_rate": 1.4255230604673845e-05, "loss": 0.0609, "step": 29946 }, { "epoch": 0.5303475053903404, "grad_norm": 0.6529955863952637, "learning_rate": 1.4254371302462366e-05, "loss": 0.0718, "step": 29947 }, { "epoch": 0.5303652149273688, "grad_norm": 0.4745158553123474, "learning_rate": 1.4253512002703927e-05, "loss": 0.0579, "step": 29948 }, { "epoch": 0.5303829244643972, "grad_norm": 0.6823479533195496, "learning_rate": 1.4252652705401361e-05, "loss": 0.0973, "step": 29949 }, { "epoch": 0.5304006340014256, "grad_norm": 0.5672337412834167, "learning_rate": 1.4251793410557492e-05, "loss": 0.0822, "step": 29950 }, { "epoch": 0.5304183435384541, "grad_norm": 0.31700825691223145, "learning_rate": 1.4250934118175144e-05, "loss": 0.0529, "step": 29951 }, { "epoch": 0.5304360530754825, "grad_norm": 0.47146862745285034, "learning_rate": 1.4250074828257152e-05, "loss": 0.0712, "step": 29952 }, { "epoch": 0.5304537626125109, "grad_norm": 0.6080902814865112, "learning_rate": 1.4249215540806332e-05, "loss": 0.0574, "step": 29953 }, { "epoch": 0.5304714721495394, "grad_norm": 0.7507113218307495, "learning_rate": 1.424835625582552e-05, "loss": 0.0934, "step": 29954 }, { "epoch": 0.5304891816865678, "grad_norm": 0.6841524839401245, "learning_rate": 1.424749697331754e-05, "loss": 0.0596, "step": 29955 }, { "epoch": 0.5305068912235962, "grad_norm": 0.5255672931671143, "learning_rate": 1.4246637693285223e-05, "loss": 0.0413, "step": 29956 }, { "epoch": 0.5305246007606246, "grad_norm": 0.6735550761222839, "learning_rate": 1.4245778415731384e-05, "loss": 0.0754, "step": 29957 }, { "epoch": 0.5305423102976531, "grad_norm": 0.4989875853061676, "learning_rate": 1.4244919140658859e-05, "loss": 0.0516, "step": 29958 }, { "epoch": 0.5305600198346815, "grad_norm": 0.6466843485832214, "learning_rate": 1.4244059868070482e-05, "loss": 0.0628, "step": 29959 }, { "epoch": 0.5305777293717099, "grad_norm": 0.5248311161994934, "learning_rate": 1.4243200597969065e-05, "loss": 0.086, "step": 29960 }, { "epoch": 0.5305954389087383, "grad_norm": 0.9906835556030273, "learning_rate": 1.4242341330357438e-05, "loss": 0.1138, "step": 29961 }, { "epoch": 0.5306131484457668, "grad_norm": 0.6507498621940613, "learning_rate": 1.4241482065238435e-05, "loss": 0.0718, "step": 29962 }, { "epoch": 0.5306308579827952, "grad_norm": 0.4653985798358917, "learning_rate": 1.4240622802614882e-05, "loss": 0.0882, "step": 29963 }, { "epoch": 0.5306485675198236, "grad_norm": 0.48820948600769043, "learning_rate": 1.42397635424896e-05, "loss": 0.0574, "step": 29964 }, { "epoch": 0.530666277056852, "grad_norm": 0.3277515470981598, "learning_rate": 1.423890428486542e-05, "loss": 0.0531, "step": 29965 }, { "epoch": 0.5306839865938805, "grad_norm": 0.555296778678894, "learning_rate": 1.423804502974517e-05, "loss": 0.0773, "step": 29966 }, { "epoch": 0.5307016961309089, "grad_norm": 0.47658872604370117, "learning_rate": 1.4237185777131669e-05, "loss": 0.0699, "step": 29967 }, { "epoch": 0.5307194056679373, "grad_norm": 0.5877569913864136, "learning_rate": 1.4236326527027753e-05, "loss": 0.0927, "step": 29968 }, { "epoch": 0.5307371152049658, "grad_norm": 0.6511232256889343, "learning_rate": 1.4235467279436245e-05, "loss": 0.0725, "step": 29969 }, { "epoch": 0.5307548247419942, "grad_norm": 0.7165367007255554, "learning_rate": 1.4234608034359977e-05, "loss": 0.0846, "step": 29970 }, { "epoch": 0.5307725342790226, "grad_norm": 0.7740377187728882, "learning_rate": 1.4233748791801763e-05, "loss": 0.0982, "step": 29971 }, { "epoch": 0.530790243816051, "grad_norm": 0.40369853377342224, "learning_rate": 1.423288955176444e-05, "loss": 0.0557, "step": 29972 }, { "epoch": 0.5308079533530795, "grad_norm": 0.5595431923866272, "learning_rate": 1.4232030314250835e-05, "loss": 0.0629, "step": 29973 }, { "epoch": 0.5308256628901079, "grad_norm": 0.2825150787830353, "learning_rate": 1.4231171079263779e-05, "loss": 0.0555, "step": 29974 }, { "epoch": 0.5308433724271363, "grad_norm": 0.870986819267273, "learning_rate": 1.4230311846806083e-05, "loss": 0.0633, "step": 29975 }, { "epoch": 0.5308610819641647, "grad_norm": 1.0511311292648315, "learning_rate": 1.4229452616880586e-05, "loss": 0.0763, "step": 29976 }, { "epoch": 0.5308787915011932, "grad_norm": 0.7087768912315369, "learning_rate": 1.4228593389490115e-05, "loss": 0.0439, "step": 29977 }, { "epoch": 0.5308965010382216, "grad_norm": 0.6905273199081421, "learning_rate": 1.422773416463749e-05, "loss": 0.0618, "step": 29978 }, { "epoch": 0.53091421057525, "grad_norm": 0.63217693567276, "learning_rate": 1.422687494232554e-05, "loss": 0.0616, "step": 29979 }, { "epoch": 0.5309319201122784, "grad_norm": 0.6752134561538696, "learning_rate": 1.4226015722557093e-05, "loss": 0.0601, "step": 29980 }, { "epoch": 0.5309496296493069, "grad_norm": 0.68001788854599, "learning_rate": 1.4225156505334983e-05, "loss": 0.051, "step": 29981 }, { "epoch": 0.5309673391863353, "grad_norm": 0.6261446475982666, "learning_rate": 1.4224297290662023e-05, "loss": 0.0681, "step": 29982 }, { "epoch": 0.5309850487233637, "grad_norm": 0.5656073093414307, "learning_rate": 1.4223438078541048e-05, "loss": 0.0389, "step": 29983 }, { "epoch": 0.5310027582603922, "grad_norm": 0.7420815229415894, "learning_rate": 1.4222578868974888e-05, "loss": 0.0508, "step": 29984 }, { "epoch": 0.5310204677974206, "grad_norm": 0.7655535340309143, "learning_rate": 1.422171966196636e-05, "loss": 0.0682, "step": 29985 }, { "epoch": 0.531038177334449, "grad_norm": 0.7486833333969116, "learning_rate": 1.4220860457518292e-05, "loss": 0.0984, "step": 29986 }, { "epoch": 0.5310558868714774, "grad_norm": 0.571104884147644, "learning_rate": 1.4220001255633517e-05, "loss": 0.1074, "step": 29987 }, { "epoch": 0.5310735964085059, "grad_norm": 0.5511923432350159, "learning_rate": 1.4219142056314866e-05, "loss": 0.0834, "step": 29988 }, { "epoch": 0.5310913059455343, "grad_norm": 0.7458773255348206, "learning_rate": 1.4218282859565152e-05, "loss": 0.0534, "step": 29989 }, { "epoch": 0.5311090154825627, "grad_norm": 0.6557738780975342, "learning_rate": 1.421742366538721e-05, "loss": 0.0646, "step": 29990 }, { "epoch": 0.5311267250195911, "grad_norm": 0.9471295475959778, "learning_rate": 1.4216564473783864e-05, "loss": 0.0694, "step": 29991 }, { "epoch": 0.5311444345566196, "grad_norm": 0.9357163310050964, "learning_rate": 1.4215705284757942e-05, "loss": 0.1008, "step": 29992 }, { "epoch": 0.531162144093648, "grad_norm": 0.39885473251342773, "learning_rate": 1.4214846098312269e-05, "loss": 0.0666, "step": 29993 }, { "epoch": 0.5311798536306764, "grad_norm": 0.6211446523666382, "learning_rate": 1.4213986914449673e-05, "loss": 0.0868, "step": 29994 }, { "epoch": 0.5311975631677048, "grad_norm": 0.5190328359603882, "learning_rate": 1.4213127733172986e-05, "loss": 0.0473, "step": 29995 }, { "epoch": 0.5312152727047333, "grad_norm": 0.6405143141746521, "learning_rate": 1.4212268554485023e-05, "loss": 0.067, "step": 29996 }, { "epoch": 0.5312329822417617, "grad_norm": 0.905459463596344, "learning_rate": 1.4211409378388617e-05, "loss": 0.072, "step": 29997 }, { "epoch": 0.5312506917787901, "grad_norm": 0.4788704812526703, "learning_rate": 1.4210550204886601e-05, "loss": 0.0623, "step": 29998 }, { "epoch": 0.5312684013158187, "grad_norm": 0.5640387535095215, "learning_rate": 1.420969103398179e-05, "loss": 0.0517, "step": 29999 }, { "epoch": 0.531286110852847, "grad_norm": 0.49831604957580566, "learning_rate": 1.4208831865677012e-05, "loss": 0.067, "step": 30000 }, { "epoch": 0.5313038203898754, "grad_norm": 0.8888677358627319, "learning_rate": 1.42079726999751e-05, "loss": 0.0626, "step": 30001 }, { "epoch": 0.5313215299269038, "grad_norm": 0.4931992292404175, "learning_rate": 1.4207113536878885e-05, "loss": 0.0526, "step": 30002 }, { "epoch": 0.5313392394639324, "grad_norm": 0.3999041020870209, "learning_rate": 1.4206254376391178e-05, "loss": 0.0696, "step": 30003 }, { "epoch": 0.5313569490009608, "grad_norm": 0.668603777885437, "learning_rate": 1.4205395218514815e-05, "loss": 0.0867, "step": 30004 }, { "epoch": 0.5313746585379892, "grad_norm": 0.5637859106063843, "learning_rate": 1.420453606325262e-05, "loss": 0.0505, "step": 30005 }, { "epoch": 0.5313923680750176, "grad_norm": 0.5127281546592712, "learning_rate": 1.4203676910607425e-05, "loss": 0.0415, "step": 30006 }, { "epoch": 0.5314100776120461, "grad_norm": 0.6497901082038879, "learning_rate": 1.420281776058205e-05, "loss": 0.0674, "step": 30007 }, { "epoch": 0.5314277871490745, "grad_norm": 0.3548557162284851, "learning_rate": 1.4201958613179323e-05, "loss": 0.0806, "step": 30008 }, { "epoch": 0.5314454966861029, "grad_norm": 0.5653296113014221, "learning_rate": 1.4201099468402075e-05, "loss": 0.0624, "step": 30009 }, { "epoch": 0.5314632062231313, "grad_norm": 0.45907115936279297, "learning_rate": 1.4200240326253125e-05, "loss": 0.0774, "step": 30010 }, { "epoch": 0.5314809157601598, "grad_norm": 0.6281090378761292, "learning_rate": 1.4199381186735306e-05, "loss": 0.0594, "step": 30011 }, { "epoch": 0.5314986252971882, "grad_norm": 0.8421664834022522, "learning_rate": 1.4198522049851438e-05, "loss": 0.0656, "step": 30012 }, { "epoch": 0.5315163348342166, "grad_norm": 0.908649206161499, "learning_rate": 1.4197662915604361e-05, "loss": 0.0807, "step": 30013 }, { "epoch": 0.5315340443712451, "grad_norm": 0.7026916742324829, "learning_rate": 1.4196803783996883e-05, "loss": 0.1011, "step": 30014 }, { "epoch": 0.5315517539082735, "grad_norm": 0.39790672063827515, "learning_rate": 1.4195944655031838e-05, "loss": 0.0657, "step": 30015 }, { "epoch": 0.5315694634453019, "grad_norm": 0.6188536286354065, "learning_rate": 1.4195085528712063e-05, "loss": 0.0916, "step": 30016 }, { "epoch": 0.5315871729823303, "grad_norm": 0.7350118160247803, "learning_rate": 1.4194226405040367e-05, "loss": 0.098, "step": 30017 }, { "epoch": 0.5316048825193588, "grad_norm": 0.7453500032424927, "learning_rate": 1.4193367284019587e-05, "loss": 0.0869, "step": 30018 }, { "epoch": 0.5316225920563872, "grad_norm": 0.6535696983337402, "learning_rate": 1.4192508165652546e-05, "loss": 0.0762, "step": 30019 }, { "epoch": 0.5316403015934156, "grad_norm": 0.5639569163322449, "learning_rate": 1.4191649049942074e-05, "loss": 0.0399, "step": 30020 }, { "epoch": 0.531658011130444, "grad_norm": 0.7791262269020081, "learning_rate": 1.4190789936890994e-05, "loss": 0.0902, "step": 30021 }, { "epoch": 0.5316757206674725, "grad_norm": 0.8718921542167664, "learning_rate": 1.4189930826502132e-05, "loss": 0.0671, "step": 30022 }, { "epoch": 0.5316934302045009, "grad_norm": 0.7857140898704529, "learning_rate": 1.4189071718778319e-05, "loss": 0.0721, "step": 30023 }, { "epoch": 0.5317111397415293, "grad_norm": 0.2959243059158325, "learning_rate": 1.4188212613722374e-05, "loss": 0.0543, "step": 30024 }, { "epoch": 0.5317288492785577, "grad_norm": 0.7868688702583313, "learning_rate": 1.4187353511337128e-05, "loss": 0.0674, "step": 30025 }, { "epoch": 0.5317465588155862, "grad_norm": 0.5533717274665833, "learning_rate": 1.4186494411625407e-05, "loss": 0.0571, "step": 30026 }, { "epoch": 0.5317642683526146, "grad_norm": 0.626884400844574, "learning_rate": 1.4185635314590043e-05, "loss": 0.1059, "step": 30027 }, { "epoch": 0.531781977889643, "grad_norm": 0.4021157920360565, "learning_rate": 1.418477622023385e-05, "loss": 0.0611, "step": 30028 }, { "epoch": 0.5317996874266715, "grad_norm": 0.5658994317054749, "learning_rate": 1.4183917128559657e-05, "loss": 0.0697, "step": 30029 }, { "epoch": 0.5318173969636999, "grad_norm": 1.0719821453094482, "learning_rate": 1.4183058039570304e-05, "loss": 0.0883, "step": 30030 }, { "epoch": 0.5318351065007283, "grad_norm": 0.8294171094894409, "learning_rate": 1.4182198953268601e-05, "loss": 0.0802, "step": 30031 }, { "epoch": 0.5318528160377567, "grad_norm": 0.8199030160903931, "learning_rate": 1.4181339869657383e-05, "loss": 0.0635, "step": 30032 }, { "epoch": 0.5318705255747852, "grad_norm": 0.44618701934814453, "learning_rate": 1.4180480788739472e-05, "loss": 0.0712, "step": 30033 }, { "epoch": 0.5318882351118136, "grad_norm": 0.6416836977005005, "learning_rate": 1.41796217105177e-05, "loss": 0.0842, "step": 30034 }, { "epoch": 0.531905944648842, "grad_norm": 0.7759981155395508, "learning_rate": 1.4178762634994886e-05, "loss": 0.0824, "step": 30035 }, { "epoch": 0.5319236541858704, "grad_norm": 0.86598140001297, "learning_rate": 1.417790356217386e-05, "loss": 0.0713, "step": 30036 }, { "epoch": 0.5319413637228989, "grad_norm": 0.7006828784942627, "learning_rate": 1.4177044492057448e-05, "loss": 0.0818, "step": 30037 }, { "epoch": 0.5319590732599273, "grad_norm": 0.4609038233757019, "learning_rate": 1.417618542464848e-05, "loss": 0.0696, "step": 30038 }, { "epoch": 0.5319767827969557, "grad_norm": 0.29966506361961365, "learning_rate": 1.4175326359949774e-05, "loss": 0.07, "step": 30039 }, { "epoch": 0.5319944923339841, "grad_norm": 0.3728523254394531, "learning_rate": 1.4174467297964162e-05, "loss": 0.0429, "step": 30040 }, { "epoch": 0.5320122018710126, "grad_norm": 0.4233493506908417, "learning_rate": 1.4173608238694475e-05, "loss": 0.0717, "step": 30041 }, { "epoch": 0.532029911408041, "grad_norm": 0.5239638686180115, "learning_rate": 1.4172749182143529e-05, "loss": 0.0612, "step": 30042 }, { "epoch": 0.5320476209450694, "grad_norm": 0.506070613861084, "learning_rate": 1.4171890128314151e-05, "loss": 0.0581, "step": 30043 }, { "epoch": 0.5320653304820979, "grad_norm": 0.24321597814559937, "learning_rate": 1.4171031077209171e-05, "loss": 0.0603, "step": 30044 }, { "epoch": 0.5320830400191263, "grad_norm": 0.334641695022583, "learning_rate": 1.4170172028831423e-05, "loss": 0.0668, "step": 30045 }, { "epoch": 0.5321007495561547, "grad_norm": 0.8921105265617371, "learning_rate": 1.4169312983183718e-05, "loss": 0.0641, "step": 30046 }, { "epoch": 0.5321184590931831, "grad_norm": 0.7133063673973083, "learning_rate": 1.4168453940268889e-05, "loss": 0.0718, "step": 30047 }, { "epoch": 0.5321361686302116, "grad_norm": 1.0519185066223145, "learning_rate": 1.4167594900089768e-05, "loss": 0.0971, "step": 30048 }, { "epoch": 0.53215387816724, "grad_norm": 0.53215092420578, "learning_rate": 1.416673586264917e-05, "loss": 0.0969, "step": 30049 }, { "epoch": 0.5321715877042684, "grad_norm": 0.6450271606445312, "learning_rate": 1.4165876827949927e-05, "loss": 0.0702, "step": 30050 }, { "epoch": 0.5321892972412968, "grad_norm": 0.9577884078025818, "learning_rate": 1.4165017795994867e-05, "loss": 0.0648, "step": 30051 }, { "epoch": 0.5322070067783253, "grad_norm": 0.6383087038993835, "learning_rate": 1.4164158766786815e-05, "loss": 0.0882, "step": 30052 }, { "epoch": 0.5322247163153537, "grad_norm": 0.6394810676574707, "learning_rate": 1.4163299740328593e-05, "loss": 0.0803, "step": 30053 }, { "epoch": 0.5322424258523821, "grad_norm": 0.6898701190948486, "learning_rate": 1.4162440716623033e-05, "loss": 0.0874, "step": 30054 }, { "epoch": 0.5322601353894105, "grad_norm": 1.843401551246643, "learning_rate": 1.4161581695672962e-05, "loss": 0.081, "step": 30055 }, { "epoch": 0.532277844926439, "grad_norm": 0.9993993043899536, "learning_rate": 1.4160722677481197e-05, "loss": 0.0451, "step": 30056 }, { "epoch": 0.5322955544634674, "grad_norm": 0.6325503587722778, "learning_rate": 1.4159863662050569e-05, "loss": 0.0804, "step": 30057 }, { "epoch": 0.5323132640004958, "grad_norm": 1.6182091236114502, "learning_rate": 1.4159004649383901e-05, "loss": 0.0877, "step": 30058 }, { "epoch": 0.5323309735375243, "grad_norm": 0.7959520220756531, "learning_rate": 1.4158145639484033e-05, "loss": 0.0646, "step": 30059 }, { "epoch": 0.5323486830745527, "grad_norm": 0.6879903674125671, "learning_rate": 1.4157286632353774e-05, "loss": 0.0619, "step": 30060 }, { "epoch": 0.5323663926115811, "grad_norm": 0.749678373336792, "learning_rate": 1.4156427627995957e-05, "loss": 0.0592, "step": 30061 }, { "epoch": 0.5323841021486095, "grad_norm": 0.5267702341079712, "learning_rate": 1.415556862641341e-05, "loss": 0.0634, "step": 30062 }, { "epoch": 0.532401811685638, "grad_norm": 0.6015000343322754, "learning_rate": 1.4154709627608955e-05, "loss": 0.0729, "step": 30063 }, { "epoch": 0.5324195212226664, "grad_norm": 0.7655929923057556, "learning_rate": 1.4153850631585418e-05, "loss": 0.0761, "step": 30064 }, { "epoch": 0.5324372307596948, "grad_norm": 0.4655773341655731, "learning_rate": 1.4152991638345629e-05, "loss": 0.0666, "step": 30065 }, { "epoch": 0.5324549402967232, "grad_norm": 0.5492486953735352, "learning_rate": 1.4152132647892414e-05, "loss": 0.0653, "step": 30066 }, { "epoch": 0.5324726498337518, "grad_norm": 0.3989723324775696, "learning_rate": 1.4151273660228593e-05, "loss": 0.0412, "step": 30067 }, { "epoch": 0.5324903593707802, "grad_norm": 0.8974533677101135, "learning_rate": 1.4150414675356998e-05, "loss": 0.1013, "step": 30068 }, { "epoch": 0.5325080689078086, "grad_norm": 0.7870668768882751, "learning_rate": 1.4149555693280456e-05, "loss": 0.0519, "step": 30069 }, { "epoch": 0.5325257784448371, "grad_norm": 0.8751277327537537, "learning_rate": 1.4148696714001788e-05, "loss": 0.0513, "step": 30070 }, { "epoch": 0.5325434879818655, "grad_norm": 0.5980927348136902, "learning_rate": 1.4147837737523816e-05, "loss": 0.087, "step": 30071 }, { "epoch": 0.5325611975188939, "grad_norm": 0.4878588020801544, "learning_rate": 1.4146978763849375e-05, "loss": 0.0529, "step": 30072 }, { "epoch": 0.5325789070559223, "grad_norm": 0.6044242978096008, "learning_rate": 1.414611979298129e-05, "loss": 0.0738, "step": 30073 }, { "epoch": 0.5325966165929508, "grad_norm": 0.3051760494709015, "learning_rate": 1.4145260824922381e-05, "loss": 0.0487, "step": 30074 }, { "epoch": 0.5326143261299792, "grad_norm": 1.0987968444824219, "learning_rate": 1.4144401859675479e-05, "loss": 0.0562, "step": 30075 }, { "epoch": 0.5326320356670076, "grad_norm": 0.6898407936096191, "learning_rate": 1.4143542897243408e-05, "loss": 0.0723, "step": 30076 }, { "epoch": 0.532649745204036, "grad_norm": 0.8834363222122192, "learning_rate": 1.4142683937628995e-05, "loss": 0.0653, "step": 30077 }, { "epoch": 0.5326674547410645, "grad_norm": 0.8280798196792603, "learning_rate": 1.4141824980835064e-05, "loss": 0.0882, "step": 30078 }, { "epoch": 0.5326851642780929, "grad_norm": 0.7299198508262634, "learning_rate": 1.4140966026864442e-05, "loss": 0.0894, "step": 30079 }, { "epoch": 0.5327028738151213, "grad_norm": 0.4400617778301239, "learning_rate": 1.4140107075719963e-05, "loss": 0.0411, "step": 30080 }, { "epoch": 0.5327205833521497, "grad_norm": 0.5260241627693176, "learning_rate": 1.4139248127404431e-05, "loss": 0.0482, "step": 30081 }, { "epoch": 0.5327382928891782, "grad_norm": 0.8692116141319275, "learning_rate": 1.4138389181920693e-05, "loss": 0.0804, "step": 30082 }, { "epoch": 0.5327560024262066, "grad_norm": 0.4840066432952881, "learning_rate": 1.4137530239271565e-05, "loss": 0.0654, "step": 30083 }, { "epoch": 0.532773711963235, "grad_norm": 0.6258432865142822, "learning_rate": 1.4136671299459881e-05, "loss": 0.0444, "step": 30084 }, { "epoch": 0.5327914215002635, "grad_norm": 0.6570133566856384, "learning_rate": 1.4135812362488455e-05, "loss": 0.0811, "step": 30085 }, { "epoch": 0.5328091310372919, "grad_norm": 0.7535020709037781, "learning_rate": 1.413495342836012e-05, "loss": 0.0992, "step": 30086 }, { "epoch": 0.5328268405743203, "grad_norm": 0.785427451133728, "learning_rate": 1.4134094497077703e-05, "loss": 0.0662, "step": 30087 }, { "epoch": 0.5328445501113487, "grad_norm": 0.47028541564941406, "learning_rate": 1.4133235568644025e-05, "loss": 0.0573, "step": 30088 }, { "epoch": 0.5328622596483772, "grad_norm": 0.9588833451271057, "learning_rate": 1.4132376643061914e-05, "loss": 0.0937, "step": 30089 }, { "epoch": 0.5328799691854056, "grad_norm": 0.5655084848403931, "learning_rate": 1.4131517720334198e-05, "loss": 0.0963, "step": 30090 }, { "epoch": 0.532897678722434, "grad_norm": 0.8059650659561157, "learning_rate": 1.4130658800463701e-05, "loss": 0.0683, "step": 30091 }, { "epoch": 0.5329153882594624, "grad_norm": 0.7500036954879761, "learning_rate": 1.4129799883453249e-05, "loss": 0.0485, "step": 30092 }, { "epoch": 0.5329330977964909, "grad_norm": 0.5216013789176941, "learning_rate": 1.4128940969305664e-05, "loss": 0.07, "step": 30093 }, { "epoch": 0.5329508073335193, "grad_norm": 0.6453394889831543, "learning_rate": 1.4128082058023783e-05, "loss": 0.0686, "step": 30094 }, { "epoch": 0.5329685168705477, "grad_norm": 0.6399341225624084, "learning_rate": 1.4127223149610413e-05, "loss": 0.0747, "step": 30095 }, { "epoch": 0.5329862264075761, "grad_norm": 1.0680352449417114, "learning_rate": 1.4126364244068396e-05, "loss": 0.0494, "step": 30096 }, { "epoch": 0.5330039359446046, "grad_norm": 0.6803085207939148, "learning_rate": 1.412550534140055e-05, "loss": 0.0603, "step": 30097 }, { "epoch": 0.533021645481633, "grad_norm": 0.548221230506897, "learning_rate": 1.412464644160971e-05, "loss": 0.0684, "step": 30098 }, { "epoch": 0.5330393550186614, "grad_norm": 0.3155478537082672, "learning_rate": 1.4123787544698689e-05, "loss": 0.0561, "step": 30099 }, { "epoch": 0.5330570645556899, "grad_norm": 0.8833836317062378, "learning_rate": 1.4122928650670317e-05, "loss": 0.0748, "step": 30100 }, { "epoch": 0.5330747740927183, "grad_norm": 0.8107720017433167, "learning_rate": 1.4122069759527427e-05, "loss": 0.0744, "step": 30101 }, { "epoch": 0.5330924836297467, "grad_norm": 0.5147733092308044, "learning_rate": 1.4121210871272833e-05, "loss": 0.0633, "step": 30102 }, { "epoch": 0.5331101931667751, "grad_norm": 0.5495252013206482, "learning_rate": 1.4120351985909366e-05, "loss": 0.0648, "step": 30103 }, { "epoch": 0.5331279027038036, "grad_norm": 0.6315391659736633, "learning_rate": 1.4119493103439855e-05, "loss": 0.0563, "step": 30104 }, { "epoch": 0.533145612240832, "grad_norm": 0.37508928775787354, "learning_rate": 1.4118634223867124e-05, "loss": 0.0707, "step": 30105 }, { "epoch": 0.5331633217778604, "grad_norm": 0.7081011533737183, "learning_rate": 1.4117775347193993e-05, "loss": 0.0949, "step": 30106 }, { "epoch": 0.5331810313148888, "grad_norm": 0.6186451315879822, "learning_rate": 1.4116916473423293e-05, "loss": 0.0765, "step": 30107 }, { "epoch": 0.5331987408519173, "grad_norm": 0.7407571077346802, "learning_rate": 1.4116057602557849e-05, "loss": 0.1034, "step": 30108 }, { "epoch": 0.5332164503889457, "grad_norm": 0.5037828683853149, "learning_rate": 1.411519873460049e-05, "loss": 0.05, "step": 30109 }, { "epoch": 0.5332341599259741, "grad_norm": 1.0571049451828003, "learning_rate": 1.4114339869554032e-05, "loss": 0.0533, "step": 30110 }, { "epoch": 0.5332518694630025, "grad_norm": 0.4626261293888092, "learning_rate": 1.4113481007421307e-05, "loss": 0.0547, "step": 30111 }, { "epoch": 0.533269579000031, "grad_norm": 0.7305696606636047, "learning_rate": 1.4112622148205146e-05, "loss": 0.0746, "step": 30112 }, { "epoch": 0.5332872885370594, "grad_norm": 0.5240771770477295, "learning_rate": 1.4111763291908362e-05, "loss": 0.075, "step": 30113 }, { "epoch": 0.5333049980740878, "grad_norm": 0.8001683354377747, "learning_rate": 1.4110904438533788e-05, "loss": 0.1209, "step": 30114 }, { "epoch": 0.5333227076111163, "grad_norm": 0.5654464364051819, "learning_rate": 1.4110045588084247e-05, "loss": 0.0636, "step": 30115 }, { "epoch": 0.5333404171481447, "grad_norm": 0.6672315001487732, "learning_rate": 1.410918674056257e-05, "loss": 0.0845, "step": 30116 }, { "epoch": 0.5333581266851731, "grad_norm": 0.5693275332450867, "learning_rate": 1.4108327895971577e-05, "loss": 0.0629, "step": 30117 }, { "epoch": 0.5333758362222015, "grad_norm": 0.7059623003005981, "learning_rate": 1.4107469054314092e-05, "loss": 0.0568, "step": 30118 }, { "epoch": 0.53339354575923, "grad_norm": 0.8366133570671082, "learning_rate": 1.4106610215592949e-05, "loss": 0.0891, "step": 30119 }, { "epoch": 0.5334112552962584, "grad_norm": 0.6318244338035583, "learning_rate": 1.4105751379810963e-05, "loss": 0.0545, "step": 30120 }, { "epoch": 0.5334289648332868, "grad_norm": 0.470420241355896, "learning_rate": 1.4104892546970966e-05, "loss": 0.0876, "step": 30121 }, { "epoch": 0.5334466743703152, "grad_norm": 0.39517489075660706, "learning_rate": 1.4104033717075782e-05, "loss": 0.0694, "step": 30122 }, { "epoch": 0.5334643839073437, "grad_norm": 0.527268648147583, "learning_rate": 1.4103174890128242e-05, "loss": 0.0597, "step": 30123 }, { "epoch": 0.5334820934443721, "grad_norm": 0.6582135558128357, "learning_rate": 1.4102316066131156e-05, "loss": 0.0682, "step": 30124 }, { "epoch": 0.5334998029814005, "grad_norm": 0.690799355506897, "learning_rate": 1.4101457245087365e-05, "loss": 0.0933, "step": 30125 }, { "epoch": 0.5335175125184289, "grad_norm": 1.0586200952529907, "learning_rate": 1.4100598426999691e-05, "loss": 0.0938, "step": 30126 }, { "epoch": 0.5335352220554574, "grad_norm": 0.8676053285598755, "learning_rate": 1.4099739611870955e-05, "loss": 0.0747, "step": 30127 }, { "epoch": 0.5335529315924858, "grad_norm": 0.48527050018310547, "learning_rate": 1.4098880799703982e-05, "loss": 0.0583, "step": 30128 }, { "epoch": 0.5335706411295142, "grad_norm": 0.3724229037761688, "learning_rate": 1.4098021990501601e-05, "loss": 0.0781, "step": 30129 }, { "epoch": 0.5335883506665428, "grad_norm": 0.7847881317138672, "learning_rate": 1.4097163184266639e-05, "loss": 0.0606, "step": 30130 }, { "epoch": 0.5336060602035712, "grad_norm": 0.3107866048812866, "learning_rate": 1.4096304381001917e-05, "loss": 0.0766, "step": 30131 }, { "epoch": 0.5336237697405996, "grad_norm": 0.535687267780304, "learning_rate": 1.4095445580710261e-05, "loss": 0.0735, "step": 30132 }, { "epoch": 0.533641479277628, "grad_norm": 0.728843092918396, "learning_rate": 1.40945867833945e-05, "loss": 0.0743, "step": 30133 }, { "epoch": 0.5336591888146565, "grad_norm": 0.13838912546634674, "learning_rate": 1.4093727989057454e-05, "loss": 0.0469, "step": 30134 }, { "epoch": 0.5336768983516849, "grad_norm": 0.5266982913017273, "learning_rate": 1.4092869197701954e-05, "loss": 0.059, "step": 30135 }, { "epoch": 0.5336946078887133, "grad_norm": 0.5856173038482666, "learning_rate": 1.4092010409330819e-05, "loss": 0.0754, "step": 30136 }, { "epoch": 0.5337123174257417, "grad_norm": 0.8711238503456116, "learning_rate": 1.4091151623946887e-05, "loss": 0.0863, "step": 30137 }, { "epoch": 0.5337300269627702, "grad_norm": 0.7811914682388306, "learning_rate": 1.4090292841552967e-05, "loss": 0.0818, "step": 30138 }, { "epoch": 0.5337477364997986, "grad_norm": 0.8000338077545166, "learning_rate": 1.4089434062151887e-05, "loss": 0.0597, "step": 30139 }, { "epoch": 0.533765446036827, "grad_norm": 0.6357010006904602, "learning_rate": 1.4088575285746481e-05, "loss": 0.0745, "step": 30140 }, { "epoch": 0.5337831555738554, "grad_norm": 0.6986569166183472, "learning_rate": 1.4087716512339578e-05, "loss": 0.0672, "step": 30141 }, { "epoch": 0.5338008651108839, "grad_norm": 0.4871700704097748, "learning_rate": 1.4086857741933986e-05, "loss": 0.049, "step": 30142 }, { "epoch": 0.5338185746479123, "grad_norm": 0.6841534376144409, "learning_rate": 1.4085998974532543e-05, "loss": 0.078, "step": 30143 }, { "epoch": 0.5338362841849407, "grad_norm": 0.48967093229293823, "learning_rate": 1.408514021013807e-05, "loss": 0.0569, "step": 30144 }, { "epoch": 0.5338539937219692, "grad_norm": 0.6397231817245483, "learning_rate": 1.4084281448753395e-05, "loss": 0.1106, "step": 30145 }, { "epoch": 0.5338717032589976, "grad_norm": 0.7643113136291504, "learning_rate": 1.4083422690381337e-05, "loss": 0.0487, "step": 30146 }, { "epoch": 0.533889412796026, "grad_norm": 0.4984959065914154, "learning_rate": 1.4082563935024727e-05, "loss": 0.072, "step": 30147 }, { "epoch": 0.5339071223330544, "grad_norm": 0.6752596497535706, "learning_rate": 1.4081705182686394e-05, "loss": 0.0622, "step": 30148 }, { "epoch": 0.5339248318700829, "grad_norm": 0.7863680124282837, "learning_rate": 1.4080846433369154e-05, "loss": 0.0677, "step": 30149 }, { "epoch": 0.5339425414071113, "grad_norm": 0.9027847051620483, "learning_rate": 1.4079987687075835e-05, "loss": 0.0707, "step": 30150 }, { "epoch": 0.5339602509441397, "grad_norm": 0.3925786018371582, "learning_rate": 1.407912894380927e-05, "loss": 0.0682, "step": 30151 }, { "epoch": 0.5339779604811681, "grad_norm": 0.48027053475379944, "learning_rate": 1.4078270203572272e-05, "loss": 0.0517, "step": 30152 }, { "epoch": 0.5339956700181966, "grad_norm": 0.625453531742096, "learning_rate": 1.4077411466367669e-05, "loss": 0.0731, "step": 30153 }, { "epoch": 0.534013379555225, "grad_norm": 0.5650391578674316, "learning_rate": 1.407655273219829e-05, "loss": 0.1003, "step": 30154 }, { "epoch": 0.5340310890922534, "grad_norm": 0.3279980719089508, "learning_rate": 1.4075694001066968e-05, "loss": 0.0585, "step": 30155 }, { "epoch": 0.5340487986292818, "grad_norm": 1.0895100831985474, "learning_rate": 1.4074835272976512e-05, "loss": 0.0916, "step": 30156 }, { "epoch": 0.5340665081663103, "grad_norm": 0.5334538221359253, "learning_rate": 1.4073976547929752e-05, "loss": 0.0653, "step": 30157 }, { "epoch": 0.5340842177033387, "grad_norm": 0.7365963459014893, "learning_rate": 1.4073117825929522e-05, "loss": 0.0695, "step": 30158 }, { "epoch": 0.5341019272403671, "grad_norm": 0.5857205986976624, "learning_rate": 1.4072259106978637e-05, "loss": 0.0585, "step": 30159 }, { "epoch": 0.5341196367773956, "grad_norm": 0.6655545234680176, "learning_rate": 1.4071400391079925e-05, "loss": 0.0662, "step": 30160 }, { "epoch": 0.534137346314424, "grad_norm": 0.47019892930984497, "learning_rate": 1.4070541678236212e-05, "loss": 0.0714, "step": 30161 }, { "epoch": 0.5341550558514524, "grad_norm": 0.7820513844490051, "learning_rate": 1.4069682968450325e-05, "loss": 0.0575, "step": 30162 }, { "epoch": 0.5341727653884808, "grad_norm": 1.1693682670593262, "learning_rate": 1.4068824261725085e-05, "loss": 0.0804, "step": 30163 }, { "epoch": 0.5341904749255093, "grad_norm": 0.7263409495353699, "learning_rate": 1.406796555806332e-05, "loss": 0.0854, "step": 30164 }, { "epoch": 0.5342081844625377, "grad_norm": 0.5042292475700378, "learning_rate": 1.4067106857467859e-05, "loss": 0.0646, "step": 30165 }, { "epoch": 0.5342258939995661, "grad_norm": 0.6417886018753052, "learning_rate": 1.4066248159941514e-05, "loss": 0.0468, "step": 30166 }, { "epoch": 0.5342436035365945, "grad_norm": 0.8629976511001587, "learning_rate": 1.4065389465487121e-05, "loss": 0.0453, "step": 30167 }, { "epoch": 0.534261313073623, "grad_norm": 0.5370258092880249, "learning_rate": 1.4064530774107498e-05, "loss": 0.055, "step": 30168 }, { "epoch": 0.5342790226106514, "grad_norm": 0.8405349254608154, "learning_rate": 1.4063672085805482e-05, "loss": 0.0783, "step": 30169 }, { "epoch": 0.5342967321476798, "grad_norm": 0.587724506855011, "learning_rate": 1.4062813400583886e-05, "loss": 0.0464, "step": 30170 }, { "epoch": 0.5343144416847082, "grad_norm": 0.6745541095733643, "learning_rate": 1.4061954718445539e-05, "loss": 0.0796, "step": 30171 }, { "epoch": 0.5343321512217367, "grad_norm": 0.6556443572044373, "learning_rate": 1.4061096039393269e-05, "loss": 0.0707, "step": 30172 }, { "epoch": 0.5343498607587651, "grad_norm": 0.5473342537879944, "learning_rate": 1.4060237363429894e-05, "loss": 0.0629, "step": 30173 }, { "epoch": 0.5343675702957935, "grad_norm": 0.9909491539001465, "learning_rate": 1.4059378690558245e-05, "loss": 0.1113, "step": 30174 }, { "epoch": 0.534385279832822, "grad_norm": 0.6134198307991028, "learning_rate": 1.4058520020781143e-05, "loss": 0.0962, "step": 30175 }, { "epoch": 0.5344029893698504, "grad_norm": 0.8687369227409363, "learning_rate": 1.4057661354101418e-05, "loss": 0.0808, "step": 30176 }, { "epoch": 0.5344206989068788, "grad_norm": 0.9345783591270447, "learning_rate": 1.4056802690521888e-05, "loss": 0.0854, "step": 30177 }, { "epoch": 0.5344384084439072, "grad_norm": 1.1675792932510376, "learning_rate": 1.4055944030045384e-05, "loss": 0.09, "step": 30178 }, { "epoch": 0.5344561179809357, "grad_norm": 0.49429312348365784, "learning_rate": 1.4055085372674728e-05, "loss": 0.066, "step": 30179 }, { "epoch": 0.5344738275179641, "grad_norm": 0.40059104561805725, "learning_rate": 1.4054226718412752e-05, "loss": 0.0603, "step": 30180 }, { "epoch": 0.5344915370549925, "grad_norm": 1.0583032369613647, "learning_rate": 1.4053368067262267e-05, "loss": 0.055, "step": 30181 }, { "epoch": 0.5345092465920209, "grad_norm": 0.9485703110694885, "learning_rate": 1.4052509419226102e-05, "loss": 0.0694, "step": 30182 }, { "epoch": 0.5345269561290494, "grad_norm": 0.5803943872451782, "learning_rate": 1.4051650774307096e-05, "loss": 0.0436, "step": 30183 }, { "epoch": 0.5345446656660778, "grad_norm": 0.516845703125, "learning_rate": 1.4050792132508056e-05, "loss": 0.0336, "step": 30184 }, { "epoch": 0.5345623752031062, "grad_norm": 0.654211699962616, "learning_rate": 1.4049933493831814e-05, "loss": 0.1059, "step": 30185 }, { "epoch": 0.5345800847401346, "grad_norm": 1.0752511024475098, "learning_rate": 1.4049074858281195e-05, "loss": 0.0892, "step": 30186 }, { "epoch": 0.5345977942771631, "grad_norm": 0.7955760359764099, "learning_rate": 1.4048216225859026e-05, "loss": 0.0811, "step": 30187 }, { "epoch": 0.5346155038141915, "grad_norm": 0.7223108410835266, "learning_rate": 1.4047357596568126e-05, "loss": 0.0643, "step": 30188 }, { "epoch": 0.5346332133512199, "grad_norm": 0.3695371747016907, "learning_rate": 1.4046498970411326e-05, "loss": 0.064, "step": 30189 }, { "epoch": 0.5346509228882484, "grad_norm": 0.6771413683891296, "learning_rate": 1.4045640347391448e-05, "loss": 0.0915, "step": 30190 }, { "epoch": 0.5346686324252768, "grad_norm": 0.39652010798454285, "learning_rate": 1.4044781727511316e-05, "loss": 0.0869, "step": 30191 }, { "epoch": 0.5346863419623052, "grad_norm": 1.0018168687820435, "learning_rate": 1.4043923110773753e-05, "loss": 0.0636, "step": 30192 }, { "epoch": 0.5347040514993336, "grad_norm": 0.6313053965568542, "learning_rate": 1.4043064497181588e-05, "loss": 0.0607, "step": 30193 }, { "epoch": 0.5347217610363622, "grad_norm": 0.6589593291282654, "learning_rate": 1.404220588673765e-05, "loss": 0.0466, "step": 30194 }, { "epoch": 0.5347394705733906, "grad_norm": 0.5285904407501221, "learning_rate": 1.404134727944475e-05, "loss": 0.0736, "step": 30195 }, { "epoch": 0.534757180110419, "grad_norm": 0.5432525277137756, "learning_rate": 1.4040488675305722e-05, "loss": 0.0577, "step": 30196 }, { "epoch": 0.5347748896474473, "grad_norm": 0.7085635662078857, "learning_rate": 1.4039630074323391e-05, "loss": 0.0738, "step": 30197 }, { "epoch": 0.5347925991844759, "grad_norm": 0.4924730062484741, "learning_rate": 1.4038771476500577e-05, "loss": 0.0574, "step": 30198 }, { "epoch": 0.5348103087215043, "grad_norm": 0.7757667899131775, "learning_rate": 1.4037912881840109e-05, "loss": 0.0934, "step": 30199 }, { "epoch": 0.5348280182585327, "grad_norm": 0.46133697032928467, "learning_rate": 1.4037054290344809e-05, "loss": 0.07, "step": 30200 }, { "epoch": 0.534845727795561, "grad_norm": 0.3951377868652344, "learning_rate": 1.4036195702017508e-05, "loss": 0.0578, "step": 30201 }, { "epoch": 0.5348634373325896, "grad_norm": 0.46941906213760376, "learning_rate": 1.403533711686102e-05, "loss": 0.0669, "step": 30202 }, { "epoch": 0.534881146869618, "grad_norm": 0.7014841437339783, "learning_rate": 1.4034478534878176e-05, "loss": 0.1097, "step": 30203 }, { "epoch": 0.5348988564066464, "grad_norm": 0.567214846611023, "learning_rate": 1.403361995607181e-05, "loss": 0.0773, "step": 30204 }, { "epoch": 0.5349165659436749, "grad_norm": 0.6097940802574158, "learning_rate": 1.4032761380444722e-05, "loss": 0.0585, "step": 30205 }, { "epoch": 0.5349342754807033, "grad_norm": 0.17517279088497162, "learning_rate": 1.4031902807999756e-05, "loss": 0.0459, "step": 30206 }, { "epoch": 0.5349519850177317, "grad_norm": 0.5838720798492432, "learning_rate": 1.4031044238739731e-05, "loss": 0.0872, "step": 30207 }, { "epoch": 0.5349696945547601, "grad_norm": 0.7641436457633972, "learning_rate": 1.4030185672667482e-05, "loss": 0.1093, "step": 30208 }, { "epoch": 0.5349874040917886, "grad_norm": 0.5096585750579834, "learning_rate": 1.4029327109785816e-05, "loss": 0.0964, "step": 30209 }, { "epoch": 0.535005113628817, "grad_norm": 0.8097859025001526, "learning_rate": 1.4028468550097566e-05, "loss": 0.0667, "step": 30210 }, { "epoch": 0.5350228231658454, "grad_norm": 0.5590893626213074, "learning_rate": 1.4027609993605552e-05, "loss": 0.0337, "step": 30211 }, { "epoch": 0.5350405327028738, "grad_norm": 0.3300880789756775, "learning_rate": 1.4026751440312612e-05, "loss": 0.0611, "step": 30212 }, { "epoch": 0.5350582422399023, "grad_norm": 0.60478276014328, "learning_rate": 1.4025892890221557e-05, "loss": 0.0563, "step": 30213 }, { "epoch": 0.5350759517769307, "grad_norm": 0.5630017518997192, "learning_rate": 1.4025034343335213e-05, "loss": 0.0696, "step": 30214 }, { "epoch": 0.5350936613139591, "grad_norm": 0.5985168218612671, "learning_rate": 1.4024175799656411e-05, "loss": 0.0523, "step": 30215 }, { "epoch": 0.5351113708509875, "grad_norm": 0.7208453416824341, "learning_rate": 1.402331725918797e-05, "loss": 0.0585, "step": 30216 }, { "epoch": 0.535129080388016, "grad_norm": 0.6448411345481873, "learning_rate": 1.4022458721932717e-05, "loss": 0.073, "step": 30217 }, { "epoch": 0.5351467899250444, "grad_norm": 0.4529164433479309, "learning_rate": 1.4021600187893475e-05, "loss": 0.0455, "step": 30218 }, { "epoch": 0.5351644994620728, "grad_norm": 0.7950616478919983, "learning_rate": 1.4020741657073073e-05, "loss": 0.0726, "step": 30219 }, { "epoch": 0.5351822089991013, "grad_norm": 0.6913487315177917, "learning_rate": 1.4019883129474329e-05, "loss": 0.0729, "step": 30220 }, { "epoch": 0.5351999185361297, "grad_norm": 0.8495696187019348, "learning_rate": 1.401902460510007e-05, "loss": 0.0966, "step": 30221 }, { "epoch": 0.5352176280731581, "grad_norm": 0.7651123404502869, "learning_rate": 1.4018166083953126e-05, "loss": 0.0786, "step": 30222 }, { "epoch": 0.5352353376101865, "grad_norm": 0.6673861145973206, "learning_rate": 1.401730756603631e-05, "loss": 0.0705, "step": 30223 }, { "epoch": 0.535253047147215, "grad_norm": 0.765293538570404, "learning_rate": 1.4016449051352454e-05, "loss": 0.0643, "step": 30224 }, { "epoch": 0.5352707566842434, "grad_norm": 0.63590407371521, "learning_rate": 1.4015590539904382e-05, "loss": 0.0748, "step": 30225 }, { "epoch": 0.5352884662212718, "grad_norm": 0.45398950576782227, "learning_rate": 1.401473203169492e-05, "loss": 0.0566, "step": 30226 }, { "epoch": 0.5353061757583002, "grad_norm": 0.708987832069397, "learning_rate": 1.4013873526726887e-05, "loss": 0.0773, "step": 30227 }, { "epoch": 0.5353238852953287, "grad_norm": 0.565483808517456, "learning_rate": 1.4013015025003109e-05, "loss": 0.0548, "step": 30228 }, { "epoch": 0.5353415948323571, "grad_norm": 0.7220153212547302, "learning_rate": 1.4012156526526414e-05, "loss": 0.1221, "step": 30229 }, { "epoch": 0.5353593043693855, "grad_norm": 0.555341362953186, "learning_rate": 1.4011298031299623e-05, "loss": 0.0828, "step": 30230 }, { "epoch": 0.5353770139064139, "grad_norm": 0.7905586957931519, "learning_rate": 1.4010439539325563e-05, "loss": 0.0775, "step": 30231 }, { "epoch": 0.5353947234434424, "grad_norm": 0.40099626779556274, "learning_rate": 1.4009581050607054e-05, "loss": 0.0728, "step": 30232 }, { "epoch": 0.5354124329804708, "grad_norm": 0.36940157413482666, "learning_rate": 1.400872256514693e-05, "loss": 0.0587, "step": 30233 }, { "epoch": 0.5354301425174992, "grad_norm": 0.7159227728843689, "learning_rate": 1.4007864082948e-05, "loss": 0.096, "step": 30234 }, { "epoch": 0.5354478520545277, "grad_norm": 0.7901331782341003, "learning_rate": 1.4007005604013098e-05, "loss": 0.0582, "step": 30235 }, { "epoch": 0.5354655615915561, "grad_norm": 0.5054775476455688, "learning_rate": 1.4006147128345056e-05, "loss": 0.0723, "step": 30236 }, { "epoch": 0.5354832711285845, "grad_norm": 0.44343000650405884, "learning_rate": 1.4005288655946682e-05, "loss": 0.0339, "step": 30237 }, { "epoch": 0.5355009806656129, "grad_norm": 0.8608281016349792, "learning_rate": 1.4004430186820807e-05, "loss": 0.1068, "step": 30238 }, { "epoch": 0.5355186902026414, "grad_norm": 0.546405017375946, "learning_rate": 1.4003571720970257e-05, "loss": 0.0469, "step": 30239 }, { "epoch": 0.5355363997396698, "grad_norm": 0.8049537539482117, "learning_rate": 1.4002713258397858e-05, "loss": 0.0889, "step": 30240 }, { "epoch": 0.5355541092766982, "grad_norm": 1.0856953859329224, "learning_rate": 1.4001854799106429e-05, "loss": 0.0996, "step": 30241 }, { "epoch": 0.5355718188137266, "grad_norm": 0.9582113027572632, "learning_rate": 1.4000996343098796e-05, "loss": 0.084, "step": 30242 }, { "epoch": 0.5355895283507551, "grad_norm": 0.44869089126586914, "learning_rate": 1.4000137890377786e-05, "loss": 0.0815, "step": 30243 }, { "epoch": 0.5356072378877835, "grad_norm": 0.8036742806434631, "learning_rate": 1.399927944094622e-05, "loss": 0.0656, "step": 30244 }, { "epoch": 0.5356249474248119, "grad_norm": 0.6856180429458618, "learning_rate": 1.3998420994806923e-05, "loss": 0.0997, "step": 30245 }, { "epoch": 0.5356426569618403, "grad_norm": 0.5720446705818176, "learning_rate": 1.3997562551962719e-05, "loss": 0.0556, "step": 30246 }, { "epoch": 0.5356603664988688, "grad_norm": 0.7942166924476624, "learning_rate": 1.399670411241644e-05, "loss": 0.0661, "step": 30247 }, { "epoch": 0.5356780760358972, "grad_norm": 0.37731337547302246, "learning_rate": 1.3995845676170891e-05, "loss": 0.0654, "step": 30248 }, { "epoch": 0.5356957855729256, "grad_norm": 0.6057217121124268, "learning_rate": 1.3994987243228914e-05, "loss": 0.0693, "step": 30249 }, { "epoch": 0.5357134951099541, "grad_norm": 0.434584379196167, "learning_rate": 1.3994128813593327e-05, "loss": 0.0697, "step": 30250 }, { "epoch": 0.5357312046469825, "grad_norm": 0.6152800917625427, "learning_rate": 1.3993270387266962e-05, "loss": 0.0752, "step": 30251 }, { "epoch": 0.5357489141840109, "grad_norm": 0.7262791991233826, "learning_rate": 1.3992411964252626e-05, "loss": 0.0767, "step": 30252 }, { "epoch": 0.5357666237210393, "grad_norm": 0.7981663346290588, "learning_rate": 1.3991553544553155e-05, "loss": 0.0729, "step": 30253 }, { "epoch": 0.5357843332580678, "grad_norm": 0.5694260597229004, "learning_rate": 1.3990695128171373e-05, "loss": 0.0658, "step": 30254 }, { "epoch": 0.5358020427950962, "grad_norm": 0.6054561734199524, "learning_rate": 1.39898367151101e-05, "loss": 0.0998, "step": 30255 }, { "epoch": 0.5358197523321246, "grad_norm": 0.7481834888458252, "learning_rate": 1.3988978305372161e-05, "loss": 0.0903, "step": 30256 }, { "epoch": 0.535837461869153, "grad_norm": 0.546368420124054, "learning_rate": 1.3988119898960383e-05, "loss": 0.0655, "step": 30257 }, { "epoch": 0.5358551714061816, "grad_norm": 0.49785882234573364, "learning_rate": 1.398726149587759e-05, "loss": 0.0624, "step": 30258 }, { "epoch": 0.53587288094321, "grad_norm": 0.7167009115219116, "learning_rate": 1.3986403096126598e-05, "loss": 0.0541, "step": 30259 }, { "epoch": 0.5358905904802383, "grad_norm": 0.9454349875450134, "learning_rate": 1.398554469971024e-05, "loss": 0.0759, "step": 30260 }, { "epoch": 0.5359083000172667, "grad_norm": 0.5746225714683533, "learning_rate": 1.3984686306631345e-05, "loss": 0.0634, "step": 30261 }, { "epoch": 0.5359260095542953, "grad_norm": 0.4706343412399292, "learning_rate": 1.3983827916892722e-05, "loss": 0.0843, "step": 30262 }, { "epoch": 0.5359437190913237, "grad_norm": 0.9246274828910828, "learning_rate": 1.3982969530497199e-05, "loss": 0.0859, "step": 30263 }, { "epoch": 0.535961428628352, "grad_norm": 0.6452212333679199, "learning_rate": 1.3982111147447607e-05, "loss": 0.0696, "step": 30264 }, { "epoch": 0.5359791381653806, "grad_norm": 0.6285916566848755, "learning_rate": 1.398125276774677e-05, "loss": 0.0675, "step": 30265 }, { "epoch": 0.535996847702409, "grad_norm": 0.7268115878105164, "learning_rate": 1.3980394391397505e-05, "loss": 0.0797, "step": 30266 }, { "epoch": 0.5360145572394374, "grad_norm": 1.1542840003967285, "learning_rate": 1.397953601840264e-05, "loss": 0.111, "step": 30267 }, { "epoch": 0.5360322667764658, "grad_norm": 0.6138265132904053, "learning_rate": 1.3978677648765e-05, "loss": 0.051, "step": 30268 }, { "epoch": 0.5360499763134943, "grad_norm": 0.6110491752624512, "learning_rate": 1.3977819282487403e-05, "loss": 0.0763, "step": 30269 }, { "epoch": 0.5360676858505227, "grad_norm": 1.2314035892486572, "learning_rate": 1.3976960919572679e-05, "loss": 0.0792, "step": 30270 }, { "epoch": 0.5360853953875511, "grad_norm": 0.6265844106674194, "learning_rate": 1.3976102560023648e-05, "loss": 0.0691, "step": 30271 }, { "epoch": 0.5361031049245795, "grad_norm": 0.7252830266952515, "learning_rate": 1.3975244203843143e-05, "loss": 0.0869, "step": 30272 }, { "epoch": 0.536120814461608, "grad_norm": 0.989378035068512, "learning_rate": 1.3974385851033974e-05, "loss": 0.0739, "step": 30273 }, { "epoch": 0.5361385239986364, "grad_norm": 0.5281038880348206, "learning_rate": 1.3973527501598974e-05, "loss": 0.0646, "step": 30274 }, { "epoch": 0.5361562335356648, "grad_norm": 1.0817276239395142, "learning_rate": 1.397266915554097e-05, "loss": 0.063, "step": 30275 }, { "epoch": 0.5361739430726932, "grad_norm": 0.6667593717575073, "learning_rate": 1.3971810812862774e-05, "loss": 0.0715, "step": 30276 }, { "epoch": 0.5361916526097217, "grad_norm": 1.1455039978027344, "learning_rate": 1.3970952473567213e-05, "loss": 0.0915, "step": 30277 }, { "epoch": 0.5362093621467501, "grad_norm": 0.3180994391441345, "learning_rate": 1.3970094137657119e-05, "loss": 0.0848, "step": 30278 }, { "epoch": 0.5362270716837785, "grad_norm": 0.9734440445899963, "learning_rate": 1.3969235805135317e-05, "loss": 0.0871, "step": 30279 }, { "epoch": 0.536244781220807, "grad_norm": 1.4611048698425293, "learning_rate": 1.3968377476004617e-05, "loss": 0.1076, "step": 30280 }, { "epoch": 0.5362624907578354, "grad_norm": 0.5448694825172424, "learning_rate": 1.3967519150267853e-05, "loss": 0.0647, "step": 30281 }, { "epoch": 0.5362802002948638, "grad_norm": 0.4728657603263855, "learning_rate": 1.3966660827927845e-05, "loss": 0.0483, "step": 30282 }, { "epoch": 0.5362979098318922, "grad_norm": 0.6906089186668396, "learning_rate": 1.3965802508987422e-05, "loss": 0.0788, "step": 30283 }, { "epoch": 0.5363156193689207, "grad_norm": 0.4491818845272064, "learning_rate": 1.39649441934494e-05, "loss": 0.0489, "step": 30284 }, { "epoch": 0.5363333289059491, "grad_norm": 0.7448108792304993, "learning_rate": 1.3964085881316609e-05, "loss": 0.0843, "step": 30285 }, { "epoch": 0.5363510384429775, "grad_norm": 0.4866989850997925, "learning_rate": 1.3963227572591873e-05, "loss": 0.081, "step": 30286 }, { "epoch": 0.5363687479800059, "grad_norm": 0.9366762638092041, "learning_rate": 1.396236926727801e-05, "loss": 0.0786, "step": 30287 }, { "epoch": 0.5363864575170344, "grad_norm": 0.5754181742668152, "learning_rate": 1.3961510965377847e-05, "loss": 0.0803, "step": 30288 }, { "epoch": 0.5364041670540628, "grad_norm": 0.40455707907676697, "learning_rate": 1.3960652666894208e-05, "loss": 0.0391, "step": 30289 }, { "epoch": 0.5364218765910912, "grad_norm": 0.6134898662567139, "learning_rate": 1.3959794371829923e-05, "loss": 0.0737, "step": 30290 }, { "epoch": 0.5364395861281196, "grad_norm": 0.6302705407142639, "learning_rate": 1.3958936080187804e-05, "loss": 0.1052, "step": 30291 }, { "epoch": 0.5364572956651481, "grad_norm": 0.8933943510055542, "learning_rate": 1.3958077791970676e-05, "loss": 0.0853, "step": 30292 }, { "epoch": 0.5364750052021765, "grad_norm": 0.5611476302146912, "learning_rate": 1.3957219507181375e-05, "loss": 0.0915, "step": 30293 }, { "epoch": 0.5364927147392049, "grad_norm": 0.41751718521118164, "learning_rate": 1.3956361225822711e-05, "loss": 0.0753, "step": 30294 }, { "epoch": 0.5365104242762334, "grad_norm": 0.6828235983848572, "learning_rate": 1.3955502947897513e-05, "loss": 0.0648, "step": 30295 }, { "epoch": 0.5365281338132618, "grad_norm": 0.8058337569236755, "learning_rate": 1.3954644673408605e-05, "loss": 0.0936, "step": 30296 }, { "epoch": 0.5365458433502902, "grad_norm": 0.7591012716293335, "learning_rate": 1.3953786402358812e-05, "loss": 0.0643, "step": 30297 }, { "epoch": 0.5365635528873186, "grad_norm": 0.5034132599830627, "learning_rate": 1.3952928134750954e-05, "loss": 0.0909, "step": 30298 }, { "epoch": 0.5365812624243471, "grad_norm": 0.5533226728439331, "learning_rate": 1.3952069870587856e-05, "loss": 0.0769, "step": 30299 }, { "epoch": 0.5365989719613755, "grad_norm": 0.5194509625434875, "learning_rate": 1.3951211609872348e-05, "loss": 0.0999, "step": 30300 }, { "epoch": 0.5366166814984039, "grad_norm": 0.46423473954200745, "learning_rate": 1.395035335260724e-05, "loss": 0.0545, "step": 30301 }, { "epoch": 0.5366343910354323, "grad_norm": 0.5259503126144409, "learning_rate": 1.3949495098795367e-05, "loss": 0.0645, "step": 30302 }, { "epoch": 0.5366521005724608, "grad_norm": 0.8026761412620544, "learning_rate": 1.3948636848439546e-05, "loss": 0.0874, "step": 30303 }, { "epoch": 0.5366698101094892, "grad_norm": 0.23297512531280518, "learning_rate": 1.3947778601542613e-05, "loss": 0.0608, "step": 30304 }, { "epoch": 0.5366875196465176, "grad_norm": 0.40495648980140686, "learning_rate": 1.3946920358107375e-05, "loss": 0.0593, "step": 30305 }, { "epoch": 0.536705229183546, "grad_norm": 0.6371431350708008, "learning_rate": 1.3946062118136655e-05, "loss": 0.1244, "step": 30306 }, { "epoch": 0.5367229387205745, "grad_norm": 0.4502624273300171, "learning_rate": 1.3945203881633298e-05, "loss": 0.0881, "step": 30307 }, { "epoch": 0.5367406482576029, "grad_norm": 0.5746288895606995, "learning_rate": 1.3944345648600105e-05, "loss": 0.0803, "step": 30308 }, { "epoch": 0.5367583577946313, "grad_norm": 0.835019588470459, "learning_rate": 1.394348741903991e-05, "loss": 0.0921, "step": 30309 }, { "epoch": 0.5367760673316598, "grad_norm": 0.5313385725021362, "learning_rate": 1.3942629192955534e-05, "loss": 0.0601, "step": 30310 }, { "epoch": 0.5367937768686882, "grad_norm": 0.5716127157211304, "learning_rate": 1.3941770970349803e-05, "loss": 0.0795, "step": 30311 }, { "epoch": 0.5368114864057166, "grad_norm": 0.6300946474075317, "learning_rate": 1.3940912751225537e-05, "loss": 0.0688, "step": 30312 }, { "epoch": 0.536829195942745, "grad_norm": 0.43745946884155273, "learning_rate": 1.394005453558556e-05, "loss": 0.0699, "step": 30313 }, { "epoch": 0.5368469054797735, "grad_norm": 0.8753213286399841, "learning_rate": 1.3939196323432695e-05, "loss": 0.0739, "step": 30314 }, { "epoch": 0.5368646150168019, "grad_norm": 0.45579198002815247, "learning_rate": 1.3938338114769772e-05, "loss": 0.0747, "step": 30315 }, { "epoch": 0.5368823245538303, "grad_norm": 0.7616225481033325, "learning_rate": 1.3937479909599606e-05, "loss": 0.0455, "step": 30316 }, { "epoch": 0.5369000340908587, "grad_norm": 0.5601693987846375, "learning_rate": 1.3936621707925023e-05, "loss": 0.0669, "step": 30317 }, { "epoch": 0.5369177436278872, "grad_norm": 0.5742355585098267, "learning_rate": 1.3935763509748853e-05, "loss": 0.0536, "step": 30318 }, { "epoch": 0.5369354531649156, "grad_norm": 0.7527617812156677, "learning_rate": 1.3934905315073906e-05, "loss": 0.0834, "step": 30319 }, { "epoch": 0.536953162701944, "grad_norm": 0.4852076470851898, "learning_rate": 1.3934047123903017e-05, "loss": 0.0631, "step": 30320 }, { "epoch": 0.5369708722389724, "grad_norm": 0.8895004391670227, "learning_rate": 1.3933188936238997e-05, "loss": 0.0669, "step": 30321 }, { "epoch": 0.536988581776001, "grad_norm": 0.9200969338417053, "learning_rate": 1.3932330752084688e-05, "loss": 0.0843, "step": 30322 }, { "epoch": 0.5370062913130293, "grad_norm": 0.7040665745735168, "learning_rate": 1.39314725714429e-05, "loss": 0.0563, "step": 30323 }, { "epoch": 0.5370240008500577, "grad_norm": 0.5749678015708923, "learning_rate": 1.3930614394316456e-05, "loss": 0.0712, "step": 30324 }, { "epoch": 0.5370417103870863, "grad_norm": 0.5458274483680725, "learning_rate": 1.3929756220708186e-05, "loss": 0.0453, "step": 30325 }, { "epoch": 0.5370594199241147, "grad_norm": 0.27926769852638245, "learning_rate": 1.3928898050620907e-05, "loss": 0.0645, "step": 30326 }, { "epoch": 0.537077129461143, "grad_norm": 0.3662637174129486, "learning_rate": 1.3928039884057445e-05, "loss": 0.0956, "step": 30327 }, { "epoch": 0.5370948389981715, "grad_norm": 0.9591112732887268, "learning_rate": 1.3927181721020626e-05, "loss": 0.0692, "step": 30328 }, { "epoch": 0.5371125485352, "grad_norm": 0.3279881477355957, "learning_rate": 1.3926323561513269e-05, "loss": 0.0611, "step": 30329 }, { "epoch": 0.5371302580722284, "grad_norm": 0.612313449382782, "learning_rate": 1.39254654055382e-05, "loss": 0.0816, "step": 30330 }, { "epoch": 0.5371479676092568, "grad_norm": 0.9169248342514038, "learning_rate": 1.3924607253098238e-05, "loss": 0.0745, "step": 30331 }, { "epoch": 0.5371656771462852, "grad_norm": 0.5879753232002258, "learning_rate": 1.3923749104196216e-05, "loss": 0.0612, "step": 30332 }, { "epoch": 0.5371833866833137, "grad_norm": 0.27567699551582336, "learning_rate": 1.3922890958834946e-05, "loss": 0.0617, "step": 30333 }, { "epoch": 0.5372010962203421, "grad_norm": 0.4177916347980499, "learning_rate": 1.3922032817017255e-05, "loss": 0.0699, "step": 30334 }, { "epoch": 0.5372188057573705, "grad_norm": 0.9073328375816345, "learning_rate": 1.3921174678745966e-05, "loss": 0.0729, "step": 30335 }, { "epoch": 0.5372365152943989, "grad_norm": 0.5530189871788025, "learning_rate": 1.3920316544023909e-05, "loss": 0.0759, "step": 30336 }, { "epoch": 0.5372542248314274, "grad_norm": 0.7443638443946838, "learning_rate": 1.3919458412853895e-05, "loss": 0.0666, "step": 30337 }, { "epoch": 0.5372719343684558, "grad_norm": 0.3868299722671509, "learning_rate": 1.3918600285238756e-05, "loss": 0.0391, "step": 30338 }, { "epoch": 0.5372896439054842, "grad_norm": 0.457366943359375, "learning_rate": 1.3917742161181314e-05, "loss": 0.0593, "step": 30339 }, { "epoch": 0.5373073534425127, "grad_norm": 0.6967867612838745, "learning_rate": 1.391688404068439e-05, "loss": 0.065, "step": 30340 }, { "epoch": 0.5373250629795411, "grad_norm": 0.4125598073005676, "learning_rate": 1.3916025923750805e-05, "loss": 0.0494, "step": 30341 }, { "epoch": 0.5373427725165695, "grad_norm": 0.39849913120269775, "learning_rate": 1.3915167810383387e-05, "loss": 0.0847, "step": 30342 }, { "epoch": 0.5373604820535979, "grad_norm": 0.9107216596603394, "learning_rate": 1.3914309700584963e-05, "loss": 0.073, "step": 30343 }, { "epoch": 0.5373781915906264, "grad_norm": 0.41141289472579956, "learning_rate": 1.3913451594358343e-05, "loss": 0.0342, "step": 30344 }, { "epoch": 0.5373959011276548, "grad_norm": 0.7657148838043213, "learning_rate": 1.3912593491706357e-05, "loss": 0.0588, "step": 30345 }, { "epoch": 0.5374136106646832, "grad_norm": 0.7052506804466248, "learning_rate": 1.3911735392631835e-05, "loss": 0.0727, "step": 30346 }, { "epoch": 0.5374313202017116, "grad_norm": 0.6124055981636047, "learning_rate": 1.391087729713759e-05, "loss": 0.0652, "step": 30347 }, { "epoch": 0.5374490297387401, "grad_norm": 0.350866436958313, "learning_rate": 1.3910019205226447e-05, "loss": 0.0427, "step": 30348 }, { "epoch": 0.5374667392757685, "grad_norm": 0.23560261726379395, "learning_rate": 1.390916111690123e-05, "loss": 0.0511, "step": 30349 }, { "epoch": 0.5374844488127969, "grad_norm": 0.40735000371932983, "learning_rate": 1.3908303032164765e-05, "loss": 0.0526, "step": 30350 }, { "epoch": 0.5375021583498253, "grad_norm": 0.6315218210220337, "learning_rate": 1.390744495101987e-05, "loss": 0.076, "step": 30351 }, { "epoch": 0.5375198678868538, "grad_norm": 0.7951707243919373, "learning_rate": 1.3906586873469372e-05, "loss": 0.0777, "step": 30352 }, { "epoch": 0.5375375774238822, "grad_norm": 0.3907536566257477, "learning_rate": 1.3905728799516093e-05, "loss": 0.029, "step": 30353 }, { "epoch": 0.5375552869609106, "grad_norm": 0.7601656317710876, "learning_rate": 1.3904870729162857e-05, "loss": 0.0807, "step": 30354 }, { "epoch": 0.5375729964979391, "grad_norm": 0.7142633199691772, "learning_rate": 1.3904012662412483e-05, "loss": 0.0781, "step": 30355 }, { "epoch": 0.5375907060349675, "grad_norm": 0.5854220390319824, "learning_rate": 1.3903154599267796e-05, "loss": 0.0408, "step": 30356 }, { "epoch": 0.5376084155719959, "grad_norm": 0.5372040271759033, "learning_rate": 1.3902296539731627e-05, "loss": 0.06, "step": 30357 }, { "epoch": 0.5376261251090243, "grad_norm": 0.9245257377624512, "learning_rate": 1.390143848380678e-05, "loss": 0.0451, "step": 30358 }, { "epoch": 0.5376438346460528, "grad_norm": 0.923784077167511, "learning_rate": 1.3900580431496094e-05, "loss": 0.079, "step": 30359 }, { "epoch": 0.5376615441830812, "grad_norm": 1.1668784618377686, "learning_rate": 1.3899722382802387e-05, "loss": 0.0382, "step": 30360 }, { "epoch": 0.5376792537201096, "grad_norm": 0.71623694896698, "learning_rate": 1.3898864337728487e-05, "loss": 0.1053, "step": 30361 }, { "epoch": 0.537696963257138, "grad_norm": 0.700526237487793, "learning_rate": 1.3898006296277206e-05, "loss": 0.0678, "step": 30362 }, { "epoch": 0.5377146727941665, "grad_norm": 0.5194867849349976, "learning_rate": 1.3897148258451374e-05, "loss": 0.0787, "step": 30363 }, { "epoch": 0.5377323823311949, "grad_norm": 0.6315292119979858, "learning_rate": 1.3896290224253816e-05, "loss": 0.0697, "step": 30364 }, { "epoch": 0.5377500918682233, "grad_norm": 0.5783942341804504, "learning_rate": 1.3895432193687348e-05, "loss": 0.0502, "step": 30365 }, { "epoch": 0.5377678014052517, "grad_norm": 0.6646434664726257, "learning_rate": 1.3894574166754797e-05, "loss": 0.1065, "step": 30366 }, { "epoch": 0.5377855109422802, "grad_norm": 0.5263311266899109, "learning_rate": 1.3893716143458983e-05, "loss": 0.0542, "step": 30367 }, { "epoch": 0.5378032204793086, "grad_norm": 0.5905373096466064, "learning_rate": 1.3892858123802736e-05, "loss": 0.0536, "step": 30368 }, { "epoch": 0.537820930016337, "grad_norm": 0.9132928848266602, "learning_rate": 1.389200010778887e-05, "loss": 0.111, "step": 30369 }, { "epoch": 0.5378386395533655, "grad_norm": 0.7040530443191528, "learning_rate": 1.3891142095420211e-05, "loss": 0.0796, "step": 30370 }, { "epoch": 0.5378563490903939, "grad_norm": 0.5153604745864868, "learning_rate": 1.3890284086699588e-05, "loss": 0.0587, "step": 30371 }, { "epoch": 0.5378740586274223, "grad_norm": 0.5223594903945923, "learning_rate": 1.3889426081629814e-05, "loss": 0.1, "step": 30372 }, { "epoch": 0.5378917681644507, "grad_norm": 0.5134195685386658, "learning_rate": 1.3888568080213712e-05, "loss": 0.09, "step": 30373 }, { "epoch": 0.5379094777014792, "grad_norm": 0.7254575490951538, "learning_rate": 1.3887710082454112e-05, "loss": 0.0872, "step": 30374 }, { "epoch": 0.5379271872385076, "grad_norm": 0.6992086172103882, "learning_rate": 1.3886852088353839e-05, "loss": 0.0821, "step": 30375 }, { "epoch": 0.537944896775536, "grad_norm": 0.4694986045360565, "learning_rate": 1.3885994097915703e-05, "loss": 0.0767, "step": 30376 }, { "epoch": 0.5379626063125644, "grad_norm": 0.5828418731689453, "learning_rate": 1.3885136111142536e-05, "loss": 0.053, "step": 30377 }, { "epoch": 0.5379803158495929, "grad_norm": 0.6371756196022034, "learning_rate": 1.388427812803716e-05, "loss": 0.0783, "step": 30378 }, { "epoch": 0.5379980253866213, "grad_norm": 0.773293137550354, "learning_rate": 1.3883420148602393e-05, "loss": 0.0643, "step": 30379 }, { "epoch": 0.5380157349236497, "grad_norm": 0.5442383289337158, "learning_rate": 1.388256217284106e-05, "loss": 0.0785, "step": 30380 }, { "epoch": 0.5380334444606781, "grad_norm": 0.865373969078064, "learning_rate": 1.3881704200755986e-05, "loss": 0.0988, "step": 30381 }, { "epoch": 0.5380511539977066, "grad_norm": 0.6175130605697632, "learning_rate": 1.3880846232349996e-05, "loss": 0.084, "step": 30382 }, { "epoch": 0.538068863534735, "grad_norm": 0.6716530919075012, "learning_rate": 1.3879988267625904e-05, "loss": 0.0575, "step": 30383 }, { "epoch": 0.5380865730717634, "grad_norm": 0.915754497051239, "learning_rate": 1.3879130306586536e-05, "loss": 0.0687, "step": 30384 }, { "epoch": 0.538104282608792, "grad_norm": 0.6910615563392639, "learning_rate": 1.3878272349234719e-05, "loss": 0.1041, "step": 30385 }, { "epoch": 0.5381219921458203, "grad_norm": 1.001196026802063, "learning_rate": 1.3877414395573277e-05, "loss": 0.1023, "step": 30386 }, { "epoch": 0.5381397016828487, "grad_norm": 0.7077949047088623, "learning_rate": 1.3876556445605018e-05, "loss": 0.0858, "step": 30387 }, { "epoch": 0.5381574112198771, "grad_norm": 0.8308721780776978, "learning_rate": 1.3875698499332778e-05, "loss": 0.0926, "step": 30388 }, { "epoch": 0.5381751207569057, "grad_norm": 0.43837398290634155, "learning_rate": 1.3874840556759384e-05, "loss": 0.0529, "step": 30389 }, { "epoch": 0.538192830293934, "grad_norm": 0.37525588274002075, "learning_rate": 1.3873982617887645e-05, "loss": 0.069, "step": 30390 }, { "epoch": 0.5382105398309625, "grad_norm": 0.8190324306488037, "learning_rate": 1.3873124682720389e-05, "loss": 0.0841, "step": 30391 }, { "epoch": 0.5382282493679909, "grad_norm": 0.8055960536003113, "learning_rate": 1.3872266751260436e-05, "loss": 0.0751, "step": 30392 }, { "epoch": 0.5382459589050194, "grad_norm": 0.7084810137748718, "learning_rate": 1.3871408823510616e-05, "loss": 0.0524, "step": 30393 }, { "epoch": 0.5382636684420478, "grad_norm": 0.5644245743751526, "learning_rate": 1.3870550899473744e-05, "loss": 0.0603, "step": 30394 }, { "epoch": 0.5382813779790762, "grad_norm": 0.6723551750183105, "learning_rate": 1.3869692979152647e-05, "loss": 0.1048, "step": 30395 }, { "epoch": 0.5382990875161046, "grad_norm": 0.62382972240448, "learning_rate": 1.3868835062550145e-05, "loss": 0.0696, "step": 30396 }, { "epoch": 0.5383167970531331, "grad_norm": 0.7655531167984009, "learning_rate": 1.386797714966906e-05, "loss": 0.0837, "step": 30397 }, { "epoch": 0.5383345065901615, "grad_norm": 0.5250557065010071, "learning_rate": 1.3867119240512215e-05, "loss": 0.0679, "step": 30398 }, { "epoch": 0.5383522161271899, "grad_norm": 0.7331111431121826, "learning_rate": 1.3866261335082436e-05, "loss": 0.1105, "step": 30399 }, { "epoch": 0.5383699256642184, "grad_norm": 0.6226695775985718, "learning_rate": 1.3865403433382547e-05, "loss": 0.0593, "step": 30400 }, { "epoch": 0.5383876352012468, "grad_norm": 0.5563850998878479, "learning_rate": 1.386454553541536e-05, "loss": 0.0758, "step": 30401 }, { "epoch": 0.5384053447382752, "grad_norm": 0.7606868147850037, "learning_rate": 1.3863687641183699e-05, "loss": 0.0716, "step": 30402 }, { "epoch": 0.5384230542753036, "grad_norm": 0.6122893691062927, "learning_rate": 1.38628297506904e-05, "loss": 0.0541, "step": 30403 }, { "epoch": 0.5384407638123321, "grad_norm": 0.5763446688652039, "learning_rate": 1.3861971863938272e-05, "loss": 0.0865, "step": 30404 }, { "epoch": 0.5384584733493605, "grad_norm": 0.49444669485092163, "learning_rate": 1.3861113980930141e-05, "loss": 0.0657, "step": 30405 }, { "epoch": 0.5384761828863889, "grad_norm": 0.6332927346229553, "learning_rate": 1.3860256101668827e-05, "loss": 0.0523, "step": 30406 }, { "epoch": 0.5384938924234173, "grad_norm": 0.6463145017623901, "learning_rate": 1.3859398226157161e-05, "loss": 0.068, "step": 30407 }, { "epoch": 0.5385116019604458, "grad_norm": 0.30602169036865234, "learning_rate": 1.3858540354397956e-05, "loss": 0.0834, "step": 30408 }, { "epoch": 0.5385293114974742, "grad_norm": 0.6107950806617737, "learning_rate": 1.3857682486394037e-05, "loss": 0.0451, "step": 30409 }, { "epoch": 0.5385470210345026, "grad_norm": 1.1135060787200928, "learning_rate": 1.385682462214823e-05, "loss": 0.0903, "step": 30410 }, { "epoch": 0.538564730571531, "grad_norm": 0.38487863540649414, "learning_rate": 1.3855966761663354e-05, "loss": 0.0616, "step": 30411 }, { "epoch": 0.5385824401085595, "grad_norm": 0.6645715832710266, "learning_rate": 1.3855108904942227e-05, "loss": 0.0831, "step": 30412 }, { "epoch": 0.5386001496455879, "grad_norm": 0.4119526743888855, "learning_rate": 1.3854251051987679e-05, "loss": 0.0427, "step": 30413 }, { "epoch": 0.5386178591826163, "grad_norm": 0.956173300743103, "learning_rate": 1.3853393202802537e-05, "loss": 0.1075, "step": 30414 }, { "epoch": 0.5386355687196448, "grad_norm": 0.5688021183013916, "learning_rate": 1.3852535357389607e-05, "loss": 0.0669, "step": 30415 }, { "epoch": 0.5386532782566732, "grad_norm": 0.7962983250617981, "learning_rate": 1.3851677515751714e-05, "loss": 0.0907, "step": 30416 }, { "epoch": 0.5386709877937016, "grad_norm": 0.5169287919998169, "learning_rate": 1.38508196778917e-05, "loss": 0.0413, "step": 30417 }, { "epoch": 0.53868869733073, "grad_norm": 0.8144192695617676, "learning_rate": 1.3849961843812363e-05, "loss": 0.0794, "step": 30418 }, { "epoch": 0.5387064068677585, "grad_norm": 0.571182370185852, "learning_rate": 1.3849104013516537e-05, "loss": 0.0473, "step": 30419 }, { "epoch": 0.5387241164047869, "grad_norm": 0.713204026222229, "learning_rate": 1.3848246187007041e-05, "loss": 0.0777, "step": 30420 }, { "epoch": 0.5387418259418153, "grad_norm": 0.4992412030696869, "learning_rate": 1.3847388364286703e-05, "loss": 0.0493, "step": 30421 }, { "epoch": 0.5387595354788437, "grad_norm": 0.46767863631248474, "learning_rate": 1.3846530545358337e-05, "loss": 0.0691, "step": 30422 }, { "epoch": 0.5387772450158722, "grad_norm": 0.7932624220848083, "learning_rate": 1.384567273022477e-05, "loss": 0.0731, "step": 30423 }, { "epoch": 0.5387949545529006, "grad_norm": 0.6423037052154541, "learning_rate": 1.384481491888882e-05, "loss": 0.0618, "step": 30424 }, { "epoch": 0.538812664089929, "grad_norm": 0.3688720762729645, "learning_rate": 1.3843957111353319e-05, "loss": 0.0451, "step": 30425 }, { "epoch": 0.5388303736269574, "grad_norm": 0.8736017346382141, "learning_rate": 1.3843099307621077e-05, "loss": 0.081, "step": 30426 }, { "epoch": 0.5388480831639859, "grad_norm": 0.5071535110473633, "learning_rate": 1.3842241507694923e-05, "loss": 0.0708, "step": 30427 }, { "epoch": 0.5388657927010143, "grad_norm": 0.4057365655899048, "learning_rate": 1.384138371157768e-05, "loss": 0.0704, "step": 30428 }, { "epoch": 0.5388835022380427, "grad_norm": 0.9479560852050781, "learning_rate": 1.3840525919272165e-05, "loss": 0.0781, "step": 30429 }, { "epoch": 0.5389012117750712, "grad_norm": 0.4664163887500763, "learning_rate": 1.38396681307812e-05, "loss": 0.0603, "step": 30430 }, { "epoch": 0.5389189213120996, "grad_norm": 0.6560785174369812, "learning_rate": 1.3838810346107607e-05, "loss": 0.0587, "step": 30431 }, { "epoch": 0.538936630849128, "grad_norm": 0.7253455519676208, "learning_rate": 1.3837952565254221e-05, "loss": 0.0706, "step": 30432 }, { "epoch": 0.5389543403861564, "grad_norm": 0.4951212406158447, "learning_rate": 1.3837094788223845e-05, "loss": 0.0575, "step": 30433 }, { "epoch": 0.5389720499231849, "grad_norm": 0.6901873350143433, "learning_rate": 1.3836237015019311e-05, "loss": 0.0758, "step": 30434 }, { "epoch": 0.5389897594602133, "grad_norm": 0.7650766968727112, "learning_rate": 1.3835379245643443e-05, "loss": 0.0774, "step": 30435 }, { "epoch": 0.5390074689972417, "grad_norm": 0.7078207731246948, "learning_rate": 1.3834521480099056e-05, "loss": 0.0789, "step": 30436 }, { "epoch": 0.5390251785342701, "grad_norm": 0.7160547971725464, "learning_rate": 1.3833663718388975e-05, "loss": 0.075, "step": 30437 }, { "epoch": 0.5390428880712986, "grad_norm": 0.8121596574783325, "learning_rate": 1.3832805960516022e-05, "loss": 0.0747, "step": 30438 }, { "epoch": 0.539060597608327, "grad_norm": 0.4757046699523926, "learning_rate": 1.3831948206483024e-05, "loss": 0.0642, "step": 30439 }, { "epoch": 0.5390783071453554, "grad_norm": 0.5183414220809937, "learning_rate": 1.3831090456292794e-05, "loss": 0.0465, "step": 30440 }, { "epoch": 0.5390960166823838, "grad_norm": 0.5824103951454163, "learning_rate": 1.3830232709948159e-05, "loss": 0.0414, "step": 30441 }, { "epoch": 0.5391137262194123, "grad_norm": 0.5125564336776733, "learning_rate": 1.3829374967451946e-05, "loss": 0.0755, "step": 30442 }, { "epoch": 0.5391314357564407, "grad_norm": 0.4549746513366699, "learning_rate": 1.3828517228806963e-05, "loss": 0.0484, "step": 30443 }, { "epoch": 0.5391491452934691, "grad_norm": 0.44805580377578735, "learning_rate": 1.3827659494016042e-05, "loss": 0.0835, "step": 30444 }, { "epoch": 0.5391668548304976, "grad_norm": 0.5136874318122864, "learning_rate": 1.3826801763081998e-05, "loss": 0.0578, "step": 30445 }, { "epoch": 0.539184564367526, "grad_norm": 0.7801997661590576, "learning_rate": 1.382594403600767e-05, "loss": 0.0752, "step": 30446 }, { "epoch": 0.5392022739045544, "grad_norm": 0.39424046874046326, "learning_rate": 1.3825086312795856e-05, "loss": 0.0435, "step": 30447 }, { "epoch": 0.5392199834415828, "grad_norm": 0.4982118606567383, "learning_rate": 1.3824228593449395e-05, "loss": 0.0474, "step": 30448 }, { "epoch": 0.5392376929786113, "grad_norm": 0.3584413528442383, "learning_rate": 1.3823370877971102e-05, "loss": 0.0585, "step": 30449 }, { "epoch": 0.5392554025156397, "grad_norm": 0.8392577171325684, "learning_rate": 1.3822513166363797e-05, "loss": 0.0636, "step": 30450 }, { "epoch": 0.5392731120526681, "grad_norm": 0.6237121820449829, "learning_rate": 1.3821655458630306e-05, "loss": 0.0661, "step": 30451 }, { "epoch": 0.5392908215896965, "grad_norm": 0.43931442499160767, "learning_rate": 1.382079775477345e-05, "loss": 0.0784, "step": 30452 }, { "epoch": 0.539308531126725, "grad_norm": 0.8588308691978455, "learning_rate": 1.3819940054796051e-05, "loss": 0.076, "step": 30453 }, { "epoch": 0.5393262406637535, "grad_norm": 0.8276953101158142, "learning_rate": 1.3819082358700928e-05, "loss": 0.0634, "step": 30454 }, { "epoch": 0.5393439502007819, "grad_norm": 0.4643464982509613, "learning_rate": 1.3818224666490905e-05, "loss": 0.0714, "step": 30455 }, { "epoch": 0.5393616597378103, "grad_norm": 0.48304077982902527, "learning_rate": 1.3817366978168802e-05, "loss": 0.0585, "step": 30456 }, { "epoch": 0.5393793692748388, "grad_norm": 0.8643799424171448, "learning_rate": 1.381650929373745e-05, "loss": 0.0763, "step": 30457 }, { "epoch": 0.5393970788118672, "grad_norm": 0.519573986530304, "learning_rate": 1.3815651613199656e-05, "loss": 0.0822, "step": 30458 }, { "epoch": 0.5394147883488956, "grad_norm": 0.504338264465332, "learning_rate": 1.3814793936558248e-05, "loss": 0.0709, "step": 30459 }, { "epoch": 0.5394324978859241, "grad_norm": 0.2987268567085266, "learning_rate": 1.381393626381605e-05, "loss": 0.0569, "step": 30460 }, { "epoch": 0.5394502074229525, "grad_norm": 0.5326896905899048, "learning_rate": 1.3813078594975881e-05, "loss": 0.0553, "step": 30461 }, { "epoch": 0.5394679169599809, "grad_norm": 0.8537079691886902, "learning_rate": 1.381222093004056e-05, "loss": 0.0835, "step": 30462 }, { "epoch": 0.5394856264970093, "grad_norm": 1.8170491456985474, "learning_rate": 1.3811363269012916e-05, "loss": 0.1172, "step": 30463 }, { "epoch": 0.5395033360340378, "grad_norm": 0.5914218425750732, "learning_rate": 1.3810505611895766e-05, "loss": 0.0828, "step": 30464 }, { "epoch": 0.5395210455710662, "grad_norm": 0.2231302261352539, "learning_rate": 1.3809647958691931e-05, "loss": 0.0562, "step": 30465 }, { "epoch": 0.5395387551080946, "grad_norm": 0.5069291591644287, "learning_rate": 1.3808790309404234e-05, "loss": 0.0933, "step": 30466 }, { "epoch": 0.539556464645123, "grad_norm": 0.2966553568840027, "learning_rate": 1.3807932664035502e-05, "loss": 0.0617, "step": 30467 }, { "epoch": 0.5395741741821515, "grad_norm": 0.5594856142997742, "learning_rate": 1.3807075022588543e-05, "loss": 0.0682, "step": 30468 }, { "epoch": 0.5395918837191799, "grad_norm": 0.6124885082244873, "learning_rate": 1.3806217385066189e-05, "loss": 0.05, "step": 30469 }, { "epoch": 0.5396095932562083, "grad_norm": 0.42559728026390076, "learning_rate": 1.3805359751471259e-05, "loss": 0.0731, "step": 30470 }, { "epoch": 0.5396273027932367, "grad_norm": 0.42801085114479065, "learning_rate": 1.3804502121806579e-05, "loss": 0.0528, "step": 30471 }, { "epoch": 0.5396450123302652, "grad_norm": 0.3148384392261505, "learning_rate": 1.3803644496074962e-05, "loss": 0.0532, "step": 30472 }, { "epoch": 0.5396627218672936, "grad_norm": 0.6878442168235779, "learning_rate": 1.3802786874279232e-05, "loss": 0.0465, "step": 30473 }, { "epoch": 0.539680431404322, "grad_norm": 0.43183019757270813, "learning_rate": 1.3801929256422216e-05, "loss": 0.0483, "step": 30474 }, { "epoch": 0.5396981409413505, "grad_norm": 0.8389491438865662, "learning_rate": 1.380107164250673e-05, "loss": 0.0734, "step": 30475 }, { "epoch": 0.5397158504783789, "grad_norm": 0.607995867729187, "learning_rate": 1.3800214032535596e-05, "loss": 0.0796, "step": 30476 }, { "epoch": 0.5397335600154073, "grad_norm": 0.8809753656387329, "learning_rate": 1.3799356426511636e-05, "loss": 0.0744, "step": 30477 }, { "epoch": 0.5397512695524357, "grad_norm": 0.7859141826629639, "learning_rate": 1.3798498824437676e-05, "loss": 0.0554, "step": 30478 }, { "epoch": 0.5397689790894642, "grad_norm": 0.5143912434577942, "learning_rate": 1.3797641226316528e-05, "loss": 0.0564, "step": 30479 }, { "epoch": 0.5397866886264926, "grad_norm": 0.7258574366569519, "learning_rate": 1.379678363215102e-05, "loss": 0.0882, "step": 30480 }, { "epoch": 0.539804398163521, "grad_norm": 0.4114226698875427, "learning_rate": 1.379592604194398e-05, "loss": 0.0901, "step": 30481 }, { "epoch": 0.5398221077005494, "grad_norm": 0.4595853090286255, "learning_rate": 1.379506845569821e-05, "loss": 0.0507, "step": 30482 }, { "epoch": 0.5398398172375779, "grad_norm": 0.7136776447296143, "learning_rate": 1.3794210873416546e-05, "loss": 0.071, "step": 30483 }, { "epoch": 0.5398575267746063, "grad_norm": 0.532731294631958, "learning_rate": 1.3793353295101807e-05, "loss": 0.0486, "step": 30484 }, { "epoch": 0.5398752363116347, "grad_norm": 0.6827626824378967, "learning_rate": 1.379249572075682e-05, "loss": 0.0605, "step": 30485 }, { "epoch": 0.5398929458486631, "grad_norm": 0.6131389737129211, "learning_rate": 1.3791638150384394e-05, "loss": 0.056, "step": 30486 }, { "epoch": 0.5399106553856916, "grad_norm": 0.6712371110916138, "learning_rate": 1.3790780583987356e-05, "loss": 0.0834, "step": 30487 }, { "epoch": 0.53992836492272, "grad_norm": 0.6464656591415405, "learning_rate": 1.3789923021568528e-05, "loss": 0.0822, "step": 30488 }, { "epoch": 0.5399460744597484, "grad_norm": 0.47370225191116333, "learning_rate": 1.3789065463130733e-05, "loss": 0.0737, "step": 30489 }, { "epoch": 0.5399637839967769, "grad_norm": 0.5581014156341553, "learning_rate": 1.3788207908676787e-05, "loss": 0.0589, "step": 30490 }, { "epoch": 0.5399814935338053, "grad_norm": 0.40341854095458984, "learning_rate": 1.3787350358209514e-05, "loss": 0.0339, "step": 30491 }, { "epoch": 0.5399992030708337, "grad_norm": 0.4849453866481781, "learning_rate": 1.378649281173174e-05, "loss": 0.053, "step": 30492 }, { "epoch": 0.5400169126078621, "grad_norm": 0.8620517253875732, "learning_rate": 1.3785635269246278e-05, "loss": 0.0828, "step": 30493 }, { "epoch": 0.5400346221448906, "grad_norm": 0.9218215346336365, "learning_rate": 1.3784777730755955e-05, "loss": 0.0773, "step": 30494 }, { "epoch": 0.540052331681919, "grad_norm": 0.48891204595565796, "learning_rate": 1.3783920196263588e-05, "loss": 0.0762, "step": 30495 }, { "epoch": 0.5400700412189474, "grad_norm": 0.5585379004478455, "learning_rate": 1.378306266577201e-05, "loss": 0.0483, "step": 30496 }, { "epoch": 0.5400877507559758, "grad_norm": 0.7376695275306702, "learning_rate": 1.378220513928402e-05, "loss": 0.0666, "step": 30497 }, { "epoch": 0.5401054602930043, "grad_norm": 0.3908650279045105, "learning_rate": 1.3781347616802457e-05, "loss": 0.08, "step": 30498 }, { "epoch": 0.5401231698300327, "grad_norm": 0.8986218571662903, "learning_rate": 1.3780490098330141e-05, "loss": 0.103, "step": 30499 }, { "epoch": 0.5401408793670611, "grad_norm": 0.4337221682071686, "learning_rate": 1.3779632583869884e-05, "loss": 0.0609, "step": 30500 }, { "epoch": 0.5401585889040895, "grad_norm": 0.3735657036304474, "learning_rate": 1.3778775073424513e-05, "loss": 0.0523, "step": 30501 }, { "epoch": 0.540176298441118, "grad_norm": 0.5821273326873779, "learning_rate": 1.3777917566996849e-05, "loss": 0.0694, "step": 30502 }, { "epoch": 0.5401940079781464, "grad_norm": 0.42612624168395996, "learning_rate": 1.3777060064589716e-05, "loss": 0.0792, "step": 30503 }, { "epoch": 0.5402117175151748, "grad_norm": 0.5668913125991821, "learning_rate": 1.3776202566205925e-05, "loss": 0.084, "step": 30504 }, { "epoch": 0.5402294270522033, "grad_norm": 0.6424313187599182, "learning_rate": 1.3775345071848307e-05, "loss": 0.0687, "step": 30505 }, { "epoch": 0.5402471365892317, "grad_norm": 0.3051929175853729, "learning_rate": 1.3774487581519683e-05, "loss": 0.0406, "step": 30506 }, { "epoch": 0.5402648461262601, "grad_norm": 0.6318680047988892, "learning_rate": 1.3773630095222866e-05, "loss": 0.0921, "step": 30507 }, { "epoch": 0.5402825556632885, "grad_norm": 0.5500909686088562, "learning_rate": 1.3772772612960684e-05, "loss": 0.0953, "step": 30508 }, { "epoch": 0.540300265200317, "grad_norm": 0.5117612481117249, "learning_rate": 1.3771915134735956e-05, "loss": 0.0342, "step": 30509 }, { "epoch": 0.5403179747373454, "grad_norm": 0.4008832275867462, "learning_rate": 1.3771057660551507e-05, "loss": 0.092, "step": 30510 }, { "epoch": 0.5403356842743738, "grad_norm": 0.4685361087322235, "learning_rate": 1.3770200190410146e-05, "loss": 0.0552, "step": 30511 }, { "epoch": 0.5403533938114022, "grad_norm": 0.5271837115287781, "learning_rate": 1.3769342724314706e-05, "loss": 0.0505, "step": 30512 }, { "epoch": 0.5403711033484307, "grad_norm": 0.6142472624778748, "learning_rate": 1.376848526226801e-05, "loss": 0.0519, "step": 30513 }, { "epoch": 0.5403888128854591, "grad_norm": 0.7559219598770142, "learning_rate": 1.3767627804272866e-05, "loss": 0.1049, "step": 30514 }, { "epoch": 0.5404065224224875, "grad_norm": 0.7828533053398132, "learning_rate": 1.3766770350332102e-05, "loss": 0.0512, "step": 30515 }, { "epoch": 0.5404242319595159, "grad_norm": 0.7624606490135193, "learning_rate": 1.3765912900448538e-05, "loss": 0.0998, "step": 30516 }, { "epoch": 0.5404419414965445, "grad_norm": 0.6648988723754883, "learning_rate": 1.3765055454624999e-05, "loss": 0.086, "step": 30517 }, { "epoch": 0.5404596510335729, "grad_norm": 0.5266503691673279, "learning_rate": 1.37641980128643e-05, "loss": 0.0569, "step": 30518 }, { "epoch": 0.5404773605706013, "grad_norm": 0.564362645149231, "learning_rate": 1.3763340575169265e-05, "loss": 0.0995, "step": 30519 }, { "epoch": 0.5404950701076298, "grad_norm": 0.4725480377674103, "learning_rate": 1.3762483141542717e-05, "loss": 0.0682, "step": 30520 }, { "epoch": 0.5405127796446582, "grad_norm": 0.5050037503242493, "learning_rate": 1.3761625711987471e-05, "loss": 0.0393, "step": 30521 }, { "epoch": 0.5405304891816866, "grad_norm": 0.8145007491111755, "learning_rate": 1.3760768286506351e-05, "loss": 0.0716, "step": 30522 }, { "epoch": 0.540548198718715, "grad_norm": 0.6651108264923096, "learning_rate": 1.3759910865102179e-05, "loss": 0.08, "step": 30523 }, { "epoch": 0.5405659082557435, "grad_norm": 0.8702157735824585, "learning_rate": 1.375905344777778e-05, "loss": 0.0617, "step": 30524 }, { "epoch": 0.5405836177927719, "grad_norm": 0.6400167346000671, "learning_rate": 1.3758196034535965e-05, "loss": 0.0913, "step": 30525 }, { "epoch": 0.5406013273298003, "grad_norm": 0.6784164309501648, "learning_rate": 1.3757338625379555e-05, "loss": 0.0592, "step": 30526 }, { "epoch": 0.5406190368668287, "grad_norm": 1.165278673171997, "learning_rate": 1.3756481220311378e-05, "loss": 0.0807, "step": 30527 }, { "epoch": 0.5406367464038572, "grad_norm": 1.2664589881896973, "learning_rate": 1.375562381933426e-05, "loss": 0.112, "step": 30528 }, { "epoch": 0.5406544559408856, "grad_norm": 1.1221257448196411, "learning_rate": 1.3754766422451006e-05, "loss": 0.0681, "step": 30529 }, { "epoch": 0.540672165477914, "grad_norm": 0.5823079347610474, "learning_rate": 1.3753909029664444e-05, "loss": 0.0528, "step": 30530 }, { "epoch": 0.5406898750149424, "grad_norm": 0.10350288450717926, "learning_rate": 1.3753051640977399e-05, "loss": 0.0479, "step": 30531 }, { "epoch": 0.5407075845519709, "grad_norm": 0.419111430644989, "learning_rate": 1.3752194256392686e-05, "loss": 0.0514, "step": 30532 }, { "epoch": 0.5407252940889993, "grad_norm": 0.9529820680618286, "learning_rate": 1.3751336875913127e-05, "loss": 0.0759, "step": 30533 }, { "epoch": 0.5407430036260277, "grad_norm": 0.6214290261268616, "learning_rate": 1.3750479499541542e-05, "loss": 0.0756, "step": 30534 }, { "epoch": 0.5407607131630562, "grad_norm": 0.7429568767547607, "learning_rate": 1.3749622127280757e-05, "loss": 0.0845, "step": 30535 }, { "epoch": 0.5407784227000846, "grad_norm": 0.539542555809021, "learning_rate": 1.3748764759133587e-05, "loss": 0.0767, "step": 30536 }, { "epoch": 0.540796132237113, "grad_norm": 1.1176235675811768, "learning_rate": 1.3747907395102853e-05, "loss": 0.0812, "step": 30537 }, { "epoch": 0.5408138417741414, "grad_norm": 0.6749545931816101, "learning_rate": 1.3747050035191384e-05, "loss": 0.0765, "step": 30538 }, { "epoch": 0.5408315513111699, "grad_norm": 0.9285339713096619, "learning_rate": 1.3746192679401987e-05, "loss": 0.0913, "step": 30539 }, { "epoch": 0.5408492608481983, "grad_norm": 0.45122793316841125, "learning_rate": 1.3745335327737486e-05, "loss": 0.0947, "step": 30540 }, { "epoch": 0.5408669703852267, "grad_norm": 0.5339455604553223, "learning_rate": 1.3744477980200708e-05, "loss": 0.0833, "step": 30541 }, { "epoch": 0.5408846799222551, "grad_norm": 0.8218775987625122, "learning_rate": 1.3743620636794477e-05, "loss": 0.081, "step": 30542 }, { "epoch": 0.5409023894592836, "grad_norm": 0.6932284235954285, "learning_rate": 1.37427632975216e-05, "loss": 0.0923, "step": 30543 }, { "epoch": 0.540920098996312, "grad_norm": 0.6627675890922546, "learning_rate": 1.3741905962384906e-05, "loss": 0.0633, "step": 30544 }, { "epoch": 0.5409378085333404, "grad_norm": 0.8033472299575806, "learning_rate": 1.3741048631387216e-05, "loss": 0.0647, "step": 30545 }, { "epoch": 0.5409555180703688, "grad_norm": 0.44820302724838257, "learning_rate": 1.3740191304531346e-05, "loss": 0.0544, "step": 30546 }, { "epoch": 0.5409732276073973, "grad_norm": 0.8211390972137451, "learning_rate": 1.373933398182012e-05, "loss": 0.0787, "step": 30547 }, { "epoch": 0.5409909371444257, "grad_norm": 0.494185209274292, "learning_rate": 1.3738476663256357e-05, "loss": 0.055, "step": 30548 }, { "epoch": 0.5410086466814541, "grad_norm": 0.6004286408424377, "learning_rate": 1.3737619348842882e-05, "loss": 0.0597, "step": 30549 }, { "epoch": 0.5410263562184826, "grad_norm": 0.4035133421421051, "learning_rate": 1.3736762038582506e-05, "loss": 0.0503, "step": 30550 }, { "epoch": 0.541044065755511, "grad_norm": 0.8036801815032959, "learning_rate": 1.3735904732478058e-05, "loss": 0.1013, "step": 30551 }, { "epoch": 0.5410617752925394, "grad_norm": 0.8655831813812256, "learning_rate": 1.3735047430532361e-05, "loss": 0.1266, "step": 30552 }, { "epoch": 0.5410794848295678, "grad_norm": 1.4516410827636719, "learning_rate": 1.3734190132748224e-05, "loss": 0.0765, "step": 30553 }, { "epoch": 0.5410971943665963, "grad_norm": 0.8473039269447327, "learning_rate": 1.3733332839128472e-05, "loss": 0.0914, "step": 30554 }, { "epoch": 0.5411149039036247, "grad_norm": 0.559048056602478, "learning_rate": 1.3732475549675925e-05, "loss": 0.0722, "step": 30555 }, { "epoch": 0.5411326134406531, "grad_norm": 0.6040682792663574, "learning_rate": 1.3731618264393415e-05, "loss": 0.0645, "step": 30556 }, { "epoch": 0.5411503229776815, "grad_norm": 0.9474524855613708, "learning_rate": 1.3730760983283745e-05, "loss": 0.1159, "step": 30557 }, { "epoch": 0.54116803251471, "grad_norm": 0.4243403971195221, "learning_rate": 1.3729903706349743e-05, "loss": 0.0611, "step": 30558 }, { "epoch": 0.5411857420517384, "grad_norm": 0.6025040149688721, "learning_rate": 1.372904643359423e-05, "loss": 0.0749, "step": 30559 }, { "epoch": 0.5412034515887668, "grad_norm": 0.7768975496292114, "learning_rate": 1.372818916502003e-05, "loss": 0.0715, "step": 30560 }, { "epoch": 0.5412211611257952, "grad_norm": 0.4484197795391083, "learning_rate": 1.3727331900629953e-05, "loss": 0.0623, "step": 30561 }, { "epoch": 0.5412388706628237, "grad_norm": 0.5658335089683533, "learning_rate": 1.3726474640426828e-05, "loss": 0.0721, "step": 30562 }, { "epoch": 0.5412565801998521, "grad_norm": 0.21555839478969574, "learning_rate": 1.3725617384413475e-05, "loss": 0.0563, "step": 30563 }, { "epoch": 0.5412742897368805, "grad_norm": 0.5620259046554565, "learning_rate": 1.372476013259271e-05, "loss": 0.0809, "step": 30564 }, { "epoch": 0.541291999273909, "grad_norm": 0.8558303117752075, "learning_rate": 1.3723902884967351e-05, "loss": 0.0734, "step": 30565 }, { "epoch": 0.5413097088109374, "grad_norm": 0.6610508561134338, "learning_rate": 1.3723045641540227e-05, "loss": 0.0693, "step": 30566 }, { "epoch": 0.5413274183479658, "grad_norm": 0.6642761826515198, "learning_rate": 1.3722188402314157e-05, "loss": 0.0833, "step": 30567 }, { "epoch": 0.5413451278849942, "grad_norm": 0.889811635017395, "learning_rate": 1.3721331167291953e-05, "loss": 0.0524, "step": 30568 }, { "epoch": 0.5413628374220227, "grad_norm": 0.5604926943778992, "learning_rate": 1.3720473936476436e-05, "loss": 0.0424, "step": 30569 }, { "epoch": 0.5413805469590511, "grad_norm": 0.654364287853241, "learning_rate": 1.3719616709870441e-05, "loss": 0.0888, "step": 30570 }, { "epoch": 0.5413982564960795, "grad_norm": 0.48874083161354065, "learning_rate": 1.3718759487476771e-05, "loss": 0.0734, "step": 30571 }, { "epoch": 0.5414159660331079, "grad_norm": 0.3662136495113373, "learning_rate": 1.3717902269298254e-05, "loss": 0.0789, "step": 30572 }, { "epoch": 0.5414336755701364, "grad_norm": 0.5025133490562439, "learning_rate": 1.3717045055337705e-05, "loss": 0.0672, "step": 30573 }, { "epoch": 0.5414513851071648, "grad_norm": 0.46127307415008545, "learning_rate": 1.3716187845597955e-05, "loss": 0.0591, "step": 30574 }, { "epoch": 0.5414690946441932, "grad_norm": 0.6827836632728577, "learning_rate": 1.3715330640081813e-05, "loss": 0.0601, "step": 30575 }, { "epoch": 0.5414868041812216, "grad_norm": 0.7516865730285645, "learning_rate": 1.3714473438792103e-05, "loss": 0.0871, "step": 30576 }, { "epoch": 0.5415045137182501, "grad_norm": 0.43385446071624756, "learning_rate": 1.3713616241731647e-05, "loss": 0.0574, "step": 30577 }, { "epoch": 0.5415222232552785, "grad_norm": 0.37317174673080444, "learning_rate": 1.371275904890326e-05, "loss": 0.0613, "step": 30578 }, { "epoch": 0.5415399327923069, "grad_norm": 0.7228025197982788, "learning_rate": 1.3711901860309768e-05, "loss": 0.063, "step": 30579 }, { "epoch": 0.5415576423293355, "grad_norm": 0.909712016582489, "learning_rate": 1.3711044675953988e-05, "loss": 0.0802, "step": 30580 }, { "epoch": 0.5415753518663639, "grad_norm": 0.6972993612289429, "learning_rate": 1.3710187495838748e-05, "loss": 0.04, "step": 30581 }, { "epoch": 0.5415930614033923, "grad_norm": 0.6478250026702881, "learning_rate": 1.3709330319966851e-05, "loss": 0.0462, "step": 30582 }, { "epoch": 0.5416107709404206, "grad_norm": 0.6231339573860168, "learning_rate": 1.370847314834113e-05, "loss": 0.0535, "step": 30583 }, { "epoch": 0.5416284804774492, "grad_norm": 0.6340474486351013, "learning_rate": 1.3707615980964403e-05, "loss": 0.0448, "step": 30584 }, { "epoch": 0.5416461900144776, "grad_norm": 0.9899389147758484, "learning_rate": 1.3706758817839487e-05, "loss": 0.0869, "step": 30585 }, { "epoch": 0.541663899551506, "grad_norm": 0.7151530981063843, "learning_rate": 1.3705901658969202e-05, "loss": 0.0711, "step": 30586 }, { "epoch": 0.5416816090885344, "grad_norm": 0.810404896736145, "learning_rate": 1.370504450435637e-05, "loss": 0.0477, "step": 30587 }, { "epoch": 0.5416993186255629, "grad_norm": 0.8517720103263855, "learning_rate": 1.3704187354003815e-05, "loss": 0.0445, "step": 30588 }, { "epoch": 0.5417170281625913, "grad_norm": 0.2595559060573578, "learning_rate": 1.3703330207914349e-05, "loss": 0.0612, "step": 30589 }, { "epoch": 0.5417347376996197, "grad_norm": 0.901900589466095, "learning_rate": 1.3702473066090793e-05, "loss": 0.0677, "step": 30590 }, { "epoch": 0.5417524472366482, "grad_norm": 0.7025631070137024, "learning_rate": 1.370161592853598e-05, "loss": 0.1008, "step": 30591 }, { "epoch": 0.5417701567736766, "grad_norm": 0.4233381450176239, "learning_rate": 1.3700758795252706e-05, "loss": 0.0547, "step": 30592 }, { "epoch": 0.541787866310705, "grad_norm": 0.6343251466751099, "learning_rate": 1.3699901666243809e-05, "loss": 0.0736, "step": 30593 }, { "epoch": 0.5418055758477334, "grad_norm": 0.263581782579422, "learning_rate": 1.3699044541512104e-05, "loss": 0.0533, "step": 30594 }, { "epoch": 0.5418232853847619, "grad_norm": 0.567585289478302, "learning_rate": 1.3698187421060417e-05, "loss": 0.0891, "step": 30595 }, { "epoch": 0.5418409949217903, "grad_norm": 0.5318887233734131, "learning_rate": 1.3697330304891555e-05, "loss": 0.0611, "step": 30596 }, { "epoch": 0.5418587044588187, "grad_norm": 0.7047865390777588, "learning_rate": 1.3696473193008344e-05, "loss": 0.0743, "step": 30597 }, { "epoch": 0.5418764139958471, "grad_norm": 0.5539330840110779, "learning_rate": 1.3695616085413603e-05, "loss": 0.0796, "step": 30598 }, { "epoch": 0.5418941235328756, "grad_norm": 0.6555225849151611, "learning_rate": 1.3694758982110162e-05, "loss": 0.0548, "step": 30599 }, { "epoch": 0.541911833069904, "grad_norm": 0.7006276249885559, "learning_rate": 1.3693901883100825e-05, "loss": 0.0585, "step": 30600 }, { "epoch": 0.5419295426069324, "grad_norm": 0.7314636707305908, "learning_rate": 1.3693044788388419e-05, "loss": 0.0868, "step": 30601 }, { "epoch": 0.5419472521439608, "grad_norm": 0.5980636477470398, "learning_rate": 1.3692187697975767e-05, "loss": 0.0612, "step": 30602 }, { "epoch": 0.5419649616809893, "grad_norm": 0.5196947455406189, "learning_rate": 1.3691330611865682e-05, "loss": 0.0586, "step": 30603 }, { "epoch": 0.5419826712180177, "grad_norm": 0.453310489654541, "learning_rate": 1.3690473530060987e-05, "loss": 0.0636, "step": 30604 }, { "epoch": 0.5420003807550461, "grad_norm": 0.32461971044540405, "learning_rate": 1.3689616452564502e-05, "loss": 0.0709, "step": 30605 }, { "epoch": 0.5420180902920746, "grad_norm": 0.6131659746170044, "learning_rate": 1.3688759379379048e-05, "loss": 0.056, "step": 30606 }, { "epoch": 0.542035799829103, "grad_norm": 0.6335233449935913, "learning_rate": 1.368790231050744e-05, "loss": 0.0569, "step": 30607 }, { "epoch": 0.5420535093661314, "grad_norm": 1.0808403491973877, "learning_rate": 1.3687045245952502e-05, "loss": 0.0926, "step": 30608 }, { "epoch": 0.5420712189031598, "grad_norm": 0.42638811469078064, "learning_rate": 1.3686188185717058e-05, "loss": 0.0426, "step": 30609 }, { "epoch": 0.5420889284401883, "grad_norm": 0.3990926146507263, "learning_rate": 1.3685331129803915e-05, "loss": 0.0345, "step": 30610 }, { "epoch": 0.5421066379772167, "grad_norm": 0.6623455882072449, "learning_rate": 1.3684474078215901e-05, "loss": 0.0719, "step": 30611 }, { "epoch": 0.5421243475142451, "grad_norm": 0.46233639121055603, "learning_rate": 1.3683617030955834e-05, "loss": 0.0579, "step": 30612 }, { "epoch": 0.5421420570512735, "grad_norm": 0.6919980645179749, "learning_rate": 1.3682759988026536e-05, "loss": 0.0573, "step": 30613 }, { "epoch": 0.542159766588302, "grad_norm": 0.6109525561332703, "learning_rate": 1.3681902949430822e-05, "loss": 0.0913, "step": 30614 }, { "epoch": 0.5421774761253304, "grad_norm": 0.523220419883728, "learning_rate": 1.3681045915171515e-05, "loss": 0.0427, "step": 30615 }, { "epoch": 0.5421951856623588, "grad_norm": 0.4014296233654022, "learning_rate": 1.3680188885251433e-05, "loss": 0.0564, "step": 30616 }, { "epoch": 0.5422128951993872, "grad_norm": 0.4934106171131134, "learning_rate": 1.3679331859673396e-05, "loss": 0.0782, "step": 30617 }, { "epoch": 0.5422306047364157, "grad_norm": 0.5724064111709595, "learning_rate": 1.3678474838440223e-05, "loss": 0.056, "step": 30618 }, { "epoch": 0.5422483142734441, "grad_norm": 0.5244584679603577, "learning_rate": 1.3677617821554734e-05, "loss": 0.0589, "step": 30619 }, { "epoch": 0.5422660238104725, "grad_norm": 0.40933528542518616, "learning_rate": 1.3676760809019755e-05, "loss": 0.0532, "step": 30620 }, { "epoch": 0.542283733347501, "grad_norm": 0.5378829836845398, "learning_rate": 1.367590380083809e-05, "loss": 0.0762, "step": 30621 }, { "epoch": 0.5423014428845294, "grad_norm": 0.5768070816993713, "learning_rate": 1.367504679701257e-05, "loss": 0.0704, "step": 30622 }, { "epoch": 0.5423191524215578, "grad_norm": 0.4150731563568115, "learning_rate": 1.367418979754602e-05, "loss": 0.0719, "step": 30623 }, { "epoch": 0.5423368619585862, "grad_norm": 1.081388235092163, "learning_rate": 1.3673332802441244e-05, "loss": 0.0985, "step": 30624 }, { "epoch": 0.5423545714956147, "grad_norm": 0.30840709805488586, "learning_rate": 1.3672475811701068e-05, "loss": 0.0561, "step": 30625 }, { "epoch": 0.5423722810326431, "grad_norm": 0.5558671951293945, "learning_rate": 1.3671618825328311e-05, "loss": 0.0552, "step": 30626 }, { "epoch": 0.5423899905696715, "grad_norm": 1.1829968690872192, "learning_rate": 1.36707618433258e-05, "loss": 0.0755, "step": 30627 }, { "epoch": 0.5424077001066999, "grad_norm": 0.614400327205658, "learning_rate": 1.3669904865696344e-05, "loss": 0.0728, "step": 30628 }, { "epoch": 0.5424254096437284, "grad_norm": 1.0124294757843018, "learning_rate": 1.3669047892442767e-05, "loss": 0.0604, "step": 30629 }, { "epoch": 0.5424431191807568, "grad_norm": 0.6227825880050659, "learning_rate": 1.3668190923567887e-05, "loss": 0.0849, "step": 30630 }, { "epoch": 0.5424608287177852, "grad_norm": 0.5258467793464661, "learning_rate": 1.3667333959074526e-05, "loss": 0.054, "step": 30631 }, { "epoch": 0.5424785382548136, "grad_norm": 0.6178774833679199, "learning_rate": 1.36664769989655e-05, "loss": 0.1029, "step": 30632 }, { "epoch": 0.5424962477918421, "grad_norm": 0.5880957841873169, "learning_rate": 1.366562004324363e-05, "loss": 0.0664, "step": 30633 }, { "epoch": 0.5425139573288705, "grad_norm": 1.0898665189743042, "learning_rate": 1.3664763091911742e-05, "loss": 0.1169, "step": 30634 }, { "epoch": 0.5425316668658989, "grad_norm": 0.708099901676178, "learning_rate": 1.3663906144972638e-05, "loss": 0.0774, "step": 30635 }, { "epoch": 0.5425493764029274, "grad_norm": 0.3318136930465698, "learning_rate": 1.366304920242915e-05, "loss": 0.0515, "step": 30636 }, { "epoch": 0.5425670859399558, "grad_norm": 0.6031147837638855, "learning_rate": 1.3662192264284096e-05, "loss": 0.0698, "step": 30637 }, { "epoch": 0.5425847954769842, "grad_norm": 0.5050771236419678, "learning_rate": 1.36613353305403e-05, "loss": 0.055, "step": 30638 }, { "epoch": 0.5426025050140126, "grad_norm": 0.47676920890808105, "learning_rate": 1.366047840120057e-05, "loss": 0.0515, "step": 30639 }, { "epoch": 0.5426202145510411, "grad_norm": 0.7374727129936218, "learning_rate": 1.3659621476267729e-05, "loss": 0.101, "step": 30640 }, { "epoch": 0.5426379240880695, "grad_norm": 0.4746037721633911, "learning_rate": 1.3658764555744602e-05, "loss": 0.0608, "step": 30641 }, { "epoch": 0.5426556336250979, "grad_norm": 0.9559512138366699, "learning_rate": 1.3657907639634e-05, "loss": 0.0793, "step": 30642 }, { "epoch": 0.5426733431621263, "grad_norm": 0.7454280257225037, "learning_rate": 1.3657050727938746e-05, "loss": 0.0709, "step": 30643 }, { "epoch": 0.5426910526991549, "grad_norm": 0.644180417060852, "learning_rate": 1.365619382066166e-05, "loss": 0.089, "step": 30644 }, { "epoch": 0.5427087622361833, "grad_norm": 0.42740219831466675, "learning_rate": 1.3655336917805563e-05, "loss": 0.063, "step": 30645 }, { "epoch": 0.5427264717732116, "grad_norm": 0.464651495218277, "learning_rate": 1.365448001937327e-05, "loss": 0.0805, "step": 30646 }, { "epoch": 0.54274418131024, "grad_norm": 0.8250635266304016, "learning_rate": 1.3653623125367599e-05, "loss": 0.0613, "step": 30647 }, { "epoch": 0.5427618908472686, "grad_norm": 0.7562676072120667, "learning_rate": 1.365276623579138e-05, "loss": 0.053, "step": 30648 }, { "epoch": 0.542779600384297, "grad_norm": 0.8629449605941772, "learning_rate": 1.3651909350647417e-05, "loss": 0.0947, "step": 30649 }, { "epoch": 0.5427973099213254, "grad_norm": 0.4886443018913269, "learning_rate": 1.3651052469938534e-05, "loss": 0.0602, "step": 30650 }, { "epoch": 0.5428150194583539, "grad_norm": 0.5226276516914368, "learning_rate": 1.3650195593667552e-05, "loss": 0.061, "step": 30651 }, { "epoch": 0.5428327289953823, "grad_norm": 0.5292342305183411, "learning_rate": 1.3649338721837297e-05, "loss": 0.065, "step": 30652 }, { "epoch": 0.5428504385324107, "grad_norm": 0.6555805802345276, "learning_rate": 1.3648481854450577e-05, "loss": 0.0646, "step": 30653 }, { "epoch": 0.5428681480694391, "grad_norm": 0.46710625290870667, "learning_rate": 1.3647624991510214e-05, "loss": 0.084, "step": 30654 }, { "epoch": 0.5428858576064676, "grad_norm": 0.8934643268585205, "learning_rate": 1.364676813301903e-05, "loss": 0.0648, "step": 30655 }, { "epoch": 0.542903567143496, "grad_norm": 0.5953379273414612, "learning_rate": 1.3645911278979841e-05, "loss": 0.0873, "step": 30656 }, { "epoch": 0.5429212766805244, "grad_norm": 0.5095747709274292, "learning_rate": 1.3645054429395465e-05, "loss": 0.0666, "step": 30657 }, { "epoch": 0.5429389862175528, "grad_norm": 0.6216044425964355, "learning_rate": 1.3644197584268723e-05, "loss": 0.1018, "step": 30658 }, { "epoch": 0.5429566957545813, "grad_norm": 0.5429727435112, "learning_rate": 1.3643340743602436e-05, "loss": 0.0444, "step": 30659 }, { "epoch": 0.5429744052916097, "grad_norm": 0.7369731664657593, "learning_rate": 1.364248390739942e-05, "loss": 0.0896, "step": 30660 }, { "epoch": 0.5429921148286381, "grad_norm": 0.7101828455924988, "learning_rate": 1.3641627075662494e-05, "loss": 0.0838, "step": 30661 }, { "epoch": 0.5430098243656665, "grad_norm": 0.6683295965194702, "learning_rate": 1.3640770248394478e-05, "loss": 0.0588, "step": 30662 }, { "epoch": 0.543027533902695, "grad_norm": 0.6116577386856079, "learning_rate": 1.3639913425598196e-05, "loss": 0.053, "step": 30663 }, { "epoch": 0.5430452434397234, "grad_norm": 0.6472548246383667, "learning_rate": 1.3639056607276452e-05, "loss": 0.0693, "step": 30664 }, { "epoch": 0.5430629529767518, "grad_norm": 0.3753165602684021, "learning_rate": 1.3638199793432075e-05, "loss": 0.0904, "step": 30665 }, { "epoch": 0.5430806625137803, "grad_norm": 0.7796998620033264, "learning_rate": 1.3637342984067893e-05, "loss": 0.1083, "step": 30666 }, { "epoch": 0.5430983720508087, "grad_norm": 0.3644087612628937, "learning_rate": 1.3636486179186707e-05, "loss": 0.0531, "step": 30667 }, { "epoch": 0.5431160815878371, "grad_norm": 0.639565110206604, "learning_rate": 1.3635629378791343e-05, "loss": 0.0661, "step": 30668 }, { "epoch": 0.5431337911248655, "grad_norm": 0.7432054281234741, "learning_rate": 1.363477258288462e-05, "loss": 0.0555, "step": 30669 }, { "epoch": 0.543151500661894, "grad_norm": 0.5682779550552368, "learning_rate": 1.363391579146936e-05, "loss": 0.0585, "step": 30670 }, { "epoch": 0.5431692101989224, "grad_norm": 0.5221482515335083, "learning_rate": 1.3633059004548378e-05, "loss": 0.0529, "step": 30671 }, { "epoch": 0.5431869197359508, "grad_norm": 0.46494343876838684, "learning_rate": 1.3632202222124492e-05, "loss": 0.0506, "step": 30672 }, { "epoch": 0.5432046292729792, "grad_norm": 0.932384729385376, "learning_rate": 1.3631345444200526e-05, "loss": 0.0602, "step": 30673 }, { "epoch": 0.5432223388100077, "grad_norm": 0.3913983106613159, "learning_rate": 1.363048867077929e-05, "loss": 0.0502, "step": 30674 }, { "epoch": 0.5432400483470361, "grad_norm": 0.7753501534461975, "learning_rate": 1.3629631901863611e-05, "loss": 0.1134, "step": 30675 }, { "epoch": 0.5432577578840645, "grad_norm": 0.3320159614086151, "learning_rate": 1.3628775137456302e-05, "loss": 0.0411, "step": 30676 }, { "epoch": 0.5432754674210929, "grad_norm": 0.5937644243240356, "learning_rate": 1.362791837756019e-05, "loss": 0.041, "step": 30677 }, { "epoch": 0.5432931769581214, "grad_norm": 0.4469437003135681, "learning_rate": 1.3627061622178086e-05, "loss": 0.0721, "step": 30678 }, { "epoch": 0.5433108864951498, "grad_norm": 0.8063976168632507, "learning_rate": 1.3626204871312802e-05, "loss": 0.0966, "step": 30679 }, { "epoch": 0.5433285960321782, "grad_norm": 0.9743659496307373, "learning_rate": 1.3625348124967176e-05, "loss": 0.0785, "step": 30680 }, { "epoch": 0.5433463055692067, "grad_norm": 0.6175949573516846, "learning_rate": 1.362449138314401e-05, "loss": 0.0787, "step": 30681 }, { "epoch": 0.5433640151062351, "grad_norm": 0.655690610408783, "learning_rate": 1.3623634645846128e-05, "loss": 0.0685, "step": 30682 }, { "epoch": 0.5433817246432635, "grad_norm": 0.6909466981887817, "learning_rate": 1.362277791307635e-05, "loss": 0.0552, "step": 30683 }, { "epoch": 0.5433994341802919, "grad_norm": 0.6269460916519165, "learning_rate": 1.3621921184837493e-05, "loss": 0.0571, "step": 30684 }, { "epoch": 0.5434171437173204, "grad_norm": 1.131677508354187, "learning_rate": 1.3621064461132377e-05, "loss": 0.0685, "step": 30685 }, { "epoch": 0.5434348532543488, "grad_norm": 0.7354660630226135, "learning_rate": 1.3620207741963817e-05, "loss": 0.0836, "step": 30686 }, { "epoch": 0.5434525627913772, "grad_norm": 0.6080155372619629, "learning_rate": 1.3619351027334637e-05, "loss": 0.0528, "step": 30687 }, { "epoch": 0.5434702723284056, "grad_norm": 0.3306649327278137, "learning_rate": 1.3618494317247651e-05, "loss": 0.0519, "step": 30688 }, { "epoch": 0.5434879818654341, "grad_norm": 1.8517378568649292, "learning_rate": 1.3617637611705678e-05, "loss": 0.0745, "step": 30689 }, { "epoch": 0.5435056914024625, "grad_norm": 0.8141434192657471, "learning_rate": 1.3616780910711536e-05, "loss": 0.0706, "step": 30690 }, { "epoch": 0.5435234009394909, "grad_norm": 0.5753671526908875, "learning_rate": 1.3615924214268053e-05, "loss": 0.0768, "step": 30691 }, { "epoch": 0.5435411104765193, "grad_norm": 0.7302276492118835, "learning_rate": 1.3615067522378033e-05, "loss": 0.0739, "step": 30692 }, { "epoch": 0.5435588200135478, "grad_norm": 0.8433472514152527, "learning_rate": 1.3614210835044299e-05, "loss": 0.0583, "step": 30693 }, { "epoch": 0.5435765295505762, "grad_norm": 1.051090955734253, "learning_rate": 1.3613354152269676e-05, "loss": 0.0757, "step": 30694 }, { "epoch": 0.5435942390876046, "grad_norm": 1.0588420629501343, "learning_rate": 1.3612497474056975e-05, "loss": 0.0826, "step": 30695 }, { "epoch": 0.5436119486246331, "grad_norm": 0.552272617816925, "learning_rate": 1.3611640800409018e-05, "loss": 0.0846, "step": 30696 }, { "epoch": 0.5436296581616615, "grad_norm": 0.385001003742218, "learning_rate": 1.3610784131328618e-05, "loss": 0.0854, "step": 30697 }, { "epoch": 0.5436473676986899, "grad_norm": 0.8745614886283875, "learning_rate": 1.3609927466818604e-05, "loss": 0.0886, "step": 30698 }, { "epoch": 0.5436650772357183, "grad_norm": 0.7106891870498657, "learning_rate": 1.3609070806881784e-05, "loss": 0.0873, "step": 30699 }, { "epoch": 0.5436827867727468, "grad_norm": 0.7173212766647339, "learning_rate": 1.3608214151520982e-05, "loss": 0.0916, "step": 30700 }, { "epoch": 0.5437004963097752, "grad_norm": 0.7100936770439148, "learning_rate": 1.3607357500739012e-05, "loss": 0.0699, "step": 30701 }, { "epoch": 0.5437182058468036, "grad_norm": 1.1353566646575928, "learning_rate": 1.36065008545387e-05, "loss": 0.1149, "step": 30702 }, { "epoch": 0.543735915383832, "grad_norm": 0.8420792818069458, "learning_rate": 1.3605644212922856e-05, "loss": 0.0715, "step": 30703 }, { "epoch": 0.5437536249208605, "grad_norm": 0.8000667095184326, "learning_rate": 1.3604787575894301e-05, "loss": 0.0759, "step": 30704 }, { "epoch": 0.5437713344578889, "grad_norm": 0.6065320372581482, "learning_rate": 1.360393094345586e-05, "loss": 0.0495, "step": 30705 }, { "epoch": 0.5437890439949173, "grad_norm": 0.5704129934310913, "learning_rate": 1.3603074315610338e-05, "loss": 0.0817, "step": 30706 }, { "epoch": 0.5438067535319457, "grad_norm": 0.4408407509326935, "learning_rate": 1.360221769236056e-05, "loss": 0.0959, "step": 30707 }, { "epoch": 0.5438244630689743, "grad_norm": 0.6488422155380249, "learning_rate": 1.3601361073709342e-05, "loss": 0.0855, "step": 30708 }, { "epoch": 0.5438421726060026, "grad_norm": 0.6394411325454712, "learning_rate": 1.3600504459659514e-05, "loss": 0.082, "step": 30709 }, { "epoch": 0.543859882143031, "grad_norm": 0.7203454971313477, "learning_rate": 1.3599647850213879e-05, "loss": 0.0513, "step": 30710 }, { "epoch": 0.5438775916800596, "grad_norm": 0.6900578141212463, "learning_rate": 1.3598791245375261e-05, "loss": 0.0252, "step": 30711 }, { "epoch": 0.543895301217088, "grad_norm": 0.2359873652458191, "learning_rate": 1.3597934645146482e-05, "loss": 0.0518, "step": 30712 }, { "epoch": 0.5439130107541164, "grad_norm": 0.3103293478488922, "learning_rate": 1.3597078049530352e-05, "loss": 0.0602, "step": 30713 }, { "epoch": 0.5439307202911448, "grad_norm": 0.46700021624565125, "learning_rate": 1.3596221458529693e-05, "loss": 0.0674, "step": 30714 }, { "epoch": 0.5439484298281733, "grad_norm": 0.7106197476387024, "learning_rate": 1.3595364872147324e-05, "loss": 0.0696, "step": 30715 }, { "epoch": 0.5439661393652017, "grad_norm": 0.583267092704773, "learning_rate": 1.3594508290386064e-05, "loss": 0.0654, "step": 30716 }, { "epoch": 0.5439838489022301, "grad_norm": 0.8900294899940491, "learning_rate": 1.3593651713248729e-05, "loss": 0.0723, "step": 30717 }, { "epoch": 0.5440015584392585, "grad_norm": 0.422601580619812, "learning_rate": 1.3592795140738137e-05, "loss": 0.0566, "step": 30718 }, { "epoch": 0.544019267976287, "grad_norm": 0.5128000378608704, "learning_rate": 1.3591938572857112e-05, "loss": 0.0653, "step": 30719 }, { "epoch": 0.5440369775133154, "grad_norm": 0.7235954403877258, "learning_rate": 1.3591082009608459e-05, "loss": 0.0887, "step": 30720 }, { "epoch": 0.5440546870503438, "grad_norm": 0.9499644637107849, "learning_rate": 1.3590225450995008e-05, "loss": 0.0619, "step": 30721 }, { "epoch": 0.5440723965873722, "grad_norm": 0.5810734629631042, "learning_rate": 1.3589368897019565e-05, "loss": 0.0791, "step": 30722 }, { "epoch": 0.5440901061244007, "grad_norm": 0.8333202004432678, "learning_rate": 1.3588512347684966e-05, "loss": 0.0811, "step": 30723 }, { "epoch": 0.5441078156614291, "grad_norm": 0.568371057510376, "learning_rate": 1.3587655802994014e-05, "loss": 0.0574, "step": 30724 }, { "epoch": 0.5441255251984575, "grad_norm": 0.4898103177547455, "learning_rate": 1.3586799262949531e-05, "loss": 0.0707, "step": 30725 }, { "epoch": 0.544143234735486, "grad_norm": 0.5249922275543213, "learning_rate": 1.3585942727554338e-05, "loss": 0.0731, "step": 30726 }, { "epoch": 0.5441609442725144, "grad_norm": 0.7871415019035339, "learning_rate": 1.3585086196811248e-05, "loss": 0.0858, "step": 30727 }, { "epoch": 0.5441786538095428, "grad_norm": 0.5299668312072754, "learning_rate": 1.358422967072308e-05, "loss": 0.0555, "step": 30728 }, { "epoch": 0.5441963633465712, "grad_norm": 0.7150794863700867, "learning_rate": 1.3583373149292654e-05, "loss": 0.0916, "step": 30729 }, { "epoch": 0.5442140728835997, "grad_norm": 0.5717913508415222, "learning_rate": 1.3582516632522793e-05, "loss": 0.0597, "step": 30730 }, { "epoch": 0.5442317824206281, "grad_norm": 0.7399242520332336, "learning_rate": 1.3581660120416302e-05, "loss": 0.0721, "step": 30731 }, { "epoch": 0.5442494919576565, "grad_norm": 0.2721887528896332, "learning_rate": 1.3580803612976008e-05, "loss": 0.0803, "step": 30732 }, { "epoch": 0.5442672014946849, "grad_norm": 0.39154285192489624, "learning_rate": 1.3579947110204725e-05, "loss": 0.0443, "step": 30733 }, { "epoch": 0.5442849110317134, "grad_norm": 0.7049247622489929, "learning_rate": 1.3579090612105279e-05, "loss": 0.0783, "step": 30734 }, { "epoch": 0.5443026205687418, "grad_norm": 0.8401951789855957, "learning_rate": 1.3578234118680476e-05, "loss": 0.0986, "step": 30735 }, { "epoch": 0.5443203301057702, "grad_norm": 1.35267174243927, "learning_rate": 1.3577377629933138e-05, "loss": 0.0767, "step": 30736 }, { "epoch": 0.5443380396427986, "grad_norm": 0.6906471848487854, "learning_rate": 1.3576521145866087e-05, "loss": 0.0654, "step": 30737 }, { "epoch": 0.5443557491798271, "grad_norm": 0.5277668833732605, "learning_rate": 1.3575664666482134e-05, "loss": 0.1064, "step": 30738 }, { "epoch": 0.5443734587168555, "grad_norm": 0.5132412314414978, "learning_rate": 1.35748081917841e-05, "loss": 0.0465, "step": 30739 }, { "epoch": 0.5443911682538839, "grad_norm": 0.8707635998725891, "learning_rate": 1.3573951721774803e-05, "loss": 0.0555, "step": 30740 }, { "epoch": 0.5444088777909124, "grad_norm": 1.135709285736084, "learning_rate": 1.3573095256457065e-05, "loss": 0.0922, "step": 30741 }, { "epoch": 0.5444265873279408, "grad_norm": 0.40968722105026245, "learning_rate": 1.3572238795833694e-05, "loss": 0.0482, "step": 30742 }, { "epoch": 0.5444442968649692, "grad_norm": 0.7928636074066162, "learning_rate": 1.3571382339907514e-05, "loss": 0.0815, "step": 30743 }, { "epoch": 0.5444620064019976, "grad_norm": 0.5066545605659485, "learning_rate": 1.3570525888681349e-05, "loss": 0.0765, "step": 30744 }, { "epoch": 0.5444797159390261, "grad_norm": 0.8303850889205933, "learning_rate": 1.3569669442157999e-05, "loss": 0.058, "step": 30745 }, { "epoch": 0.5444974254760545, "grad_norm": 0.5065139532089233, "learning_rate": 1.3568813000340294e-05, "loss": 0.0711, "step": 30746 }, { "epoch": 0.5445151350130829, "grad_norm": 0.740490198135376, "learning_rate": 1.3567956563231049e-05, "loss": 0.0746, "step": 30747 }, { "epoch": 0.5445328445501113, "grad_norm": 0.5233662724494934, "learning_rate": 1.3567100130833091e-05, "loss": 0.0714, "step": 30748 }, { "epoch": 0.5445505540871398, "grad_norm": 0.377498060464859, "learning_rate": 1.356624370314922e-05, "loss": 0.0565, "step": 30749 }, { "epoch": 0.5445682636241682, "grad_norm": 0.8269138336181641, "learning_rate": 1.3565387280182263e-05, "loss": 0.081, "step": 30750 }, { "epoch": 0.5445859731611966, "grad_norm": 0.28384584188461304, "learning_rate": 1.3564530861935041e-05, "loss": 0.07, "step": 30751 }, { "epoch": 0.544603682698225, "grad_norm": 0.6816782355308533, "learning_rate": 1.3563674448410364e-05, "loss": 0.0604, "step": 30752 }, { "epoch": 0.5446213922352535, "grad_norm": 0.6751524806022644, "learning_rate": 1.3562818039611053e-05, "loss": 0.1025, "step": 30753 }, { "epoch": 0.5446391017722819, "grad_norm": 0.8045667409896851, "learning_rate": 1.3561961635539923e-05, "loss": 0.0485, "step": 30754 }, { "epoch": 0.5446568113093103, "grad_norm": 0.8868434429168701, "learning_rate": 1.3561105236199798e-05, "loss": 0.071, "step": 30755 }, { "epoch": 0.5446745208463388, "grad_norm": 0.7585786581039429, "learning_rate": 1.3560248841593489e-05, "loss": 0.0886, "step": 30756 }, { "epoch": 0.5446922303833672, "grad_norm": 0.5284472107887268, "learning_rate": 1.3559392451723815e-05, "loss": 0.0789, "step": 30757 }, { "epoch": 0.5447099399203956, "grad_norm": 0.5768498778343201, "learning_rate": 1.35585360665936e-05, "loss": 0.0966, "step": 30758 }, { "epoch": 0.544727649457424, "grad_norm": 0.5481693148612976, "learning_rate": 1.3557679686205648e-05, "loss": 0.0814, "step": 30759 }, { "epoch": 0.5447453589944525, "grad_norm": 0.7540830373764038, "learning_rate": 1.3556823310562785e-05, "loss": 0.0741, "step": 30760 }, { "epoch": 0.5447630685314809, "grad_norm": 0.709452211856842, "learning_rate": 1.3555966939667827e-05, "loss": 0.0616, "step": 30761 }, { "epoch": 0.5447807780685093, "grad_norm": 0.5688100457191467, "learning_rate": 1.3555110573523599e-05, "loss": 0.0394, "step": 30762 }, { "epoch": 0.5447984876055377, "grad_norm": 0.5722681879997253, "learning_rate": 1.3554254212132906e-05, "loss": 0.0645, "step": 30763 }, { "epoch": 0.5448161971425662, "grad_norm": 0.963413655757904, "learning_rate": 1.3553397855498567e-05, "loss": 0.0914, "step": 30764 }, { "epoch": 0.5448339066795946, "grad_norm": 0.9647036790847778, "learning_rate": 1.355254150362341e-05, "loss": 0.0861, "step": 30765 }, { "epoch": 0.544851616216623, "grad_norm": 1.0207258462905884, "learning_rate": 1.3551685156510238e-05, "loss": 0.0773, "step": 30766 }, { "epoch": 0.5448693257536514, "grad_norm": 0.574887216091156, "learning_rate": 1.3550828814161878e-05, "loss": 0.0736, "step": 30767 }, { "epoch": 0.5448870352906799, "grad_norm": 0.7953537702560425, "learning_rate": 1.3549972476581144e-05, "loss": 0.0774, "step": 30768 }, { "epoch": 0.5449047448277083, "grad_norm": 0.43914684653282166, "learning_rate": 1.3549116143770857e-05, "loss": 0.0584, "step": 30769 }, { "epoch": 0.5449224543647367, "grad_norm": 0.5943748354911804, "learning_rate": 1.3548259815733828e-05, "loss": 0.0466, "step": 30770 }, { "epoch": 0.5449401639017653, "grad_norm": 0.5368626117706299, "learning_rate": 1.3547403492472877e-05, "loss": 0.073, "step": 30771 }, { "epoch": 0.5449578734387936, "grad_norm": 0.8331804275512695, "learning_rate": 1.3546547173990823e-05, "loss": 0.0764, "step": 30772 }, { "epoch": 0.544975582975822, "grad_norm": 0.5612620711326599, "learning_rate": 1.3545690860290486e-05, "loss": 0.0598, "step": 30773 }, { "epoch": 0.5449932925128504, "grad_norm": 0.8219890594482422, "learning_rate": 1.3544834551374671e-05, "loss": 0.0888, "step": 30774 }, { "epoch": 0.545011002049879, "grad_norm": 0.8475660085678101, "learning_rate": 1.3543978247246206e-05, "loss": 0.0651, "step": 30775 }, { "epoch": 0.5450287115869074, "grad_norm": 0.6105208992958069, "learning_rate": 1.3543121947907912e-05, "loss": 0.0604, "step": 30776 }, { "epoch": 0.5450464211239358, "grad_norm": 0.7601628303527832, "learning_rate": 1.3542265653362593e-05, "loss": 0.0688, "step": 30777 }, { "epoch": 0.5450641306609642, "grad_norm": 0.9978309273719788, "learning_rate": 1.354140936361307e-05, "loss": 0.0796, "step": 30778 }, { "epoch": 0.5450818401979927, "grad_norm": 0.555691659450531, "learning_rate": 1.3540553078662164e-05, "loss": 0.039, "step": 30779 }, { "epoch": 0.5450995497350211, "grad_norm": 1.4729220867156982, "learning_rate": 1.3539696798512697e-05, "loss": 0.0543, "step": 30780 }, { "epoch": 0.5451172592720495, "grad_norm": 0.7137302160263062, "learning_rate": 1.3538840523167473e-05, "loss": 0.0469, "step": 30781 }, { "epoch": 0.5451349688090779, "grad_norm": 0.43211910128593445, "learning_rate": 1.3537984252629317e-05, "loss": 0.085, "step": 30782 }, { "epoch": 0.5451526783461064, "grad_norm": 0.5915550589561462, "learning_rate": 1.3537127986901048e-05, "loss": 0.0582, "step": 30783 }, { "epoch": 0.5451703878831348, "grad_norm": 0.9298032522201538, "learning_rate": 1.353627172598548e-05, "loss": 0.0801, "step": 30784 }, { "epoch": 0.5451880974201632, "grad_norm": 0.22225268185138702, "learning_rate": 1.3535415469885426e-05, "loss": 0.0401, "step": 30785 }, { "epoch": 0.5452058069571917, "grad_norm": 0.4826403558254242, "learning_rate": 1.3534559218603708e-05, "loss": 0.0677, "step": 30786 }, { "epoch": 0.5452235164942201, "grad_norm": 0.7191269993782043, "learning_rate": 1.3533702972143148e-05, "loss": 0.0716, "step": 30787 }, { "epoch": 0.5452412260312485, "grad_norm": 0.6832802295684814, "learning_rate": 1.3532846730506548e-05, "loss": 0.1018, "step": 30788 }, { "epoch": 0.5452589355682769, "grad_norm": 0.3924318253993988, "learning_rate": 1.3531990493696737e-05, "loss": 0.0671, "step": 30789 }, { "epoch": 0.5452766451053054, "grad_norm": 0.54818195104599, "learning_rate": 1.3531134261716534e-05, "loss": 0.1078, "step": 30790 }, { "epoch": 0.5452943546423338, "grad_norm": 0.8585382103919983, "learning_rate": 1.3530278034568747e-05, "loss": 0.0927, "step": 30791 }, { "epoch": 0.5453120641793622, "grad_norm": 0.6791592836380005, "learning_rate": 1.3529421812256194e-05, "loss": 0.0813, "step": 30792 }, { "epoch": 0.5453297737163906, "grad_norm": 0.44771015644073486, "learning_rate": 1.3528565594781696e-05, "loss": 0.0617, "step": 30793 }, { "epoch": 0.5453474832534191, "grad_norm": 0.7128124237060547, "learning_rate": 1.3527709382148071e-05, "loss": 0.0539, "step": 30794 }, { "epoch": 0.5453651927904475, "grad_norm": 0.7720430493354797, "learning_rate": 1.352685317435813e-05, "loss": 0.0666, "step": 30795 }, { "epoch": 0.5453829023274759, "grad_norm": 0.6026961803436279, "learning_rate": 1.3525996971414694e-05, "loss": 0.0676, "step": 30796 }, { "epoch": 0.5454006118645043, "grad_norm": 1.4204429388046265, "learning_rate": 1.352514077332058e-05, "loss": 0.0903, "step": 30797 }, { "epoch": 0.5454183214015328, "grad_norm": 0.9907400608062744, "learning_rate": 1.3524284580078601e-05, "loss": 0.0856, "step": 30798 }, { "epoch": 0.5454360309385612, "grad_norm": 0.4758605360984802, "learning_rate": 1.3523428391691577e-05, "loss": 0.0641, "step": 30799 }, { "epoch": 0.5454537404755896, "grad_norm": 0.5306340456008911, "learning_rate": 1.3522572208162324e-05, "loss": 0.0539, "step": 30800 }, { "epoch": 0.5454714500126181, "grad_norm": 0.4405331015586853, "learning_rate": 1.3521716029493667e-05, "loss": 0.0663, "step": 30801 }, { "epoch": 0.5454891595496465, "grad_norm": 0.43254509568214417, "learning_rate": 1.3520859855688408e-05, "loss": 0.0647, "step": 30802 }, { "epoch": 0.5455068690866749, "grad_norm": 0.5060455203056335, "learning_rate": 1.3520003686749366e-05, "loss": 0.0979, "step": 30803 }, { "epoch": 0.5455245786237033, "grad_norm": 0.5902637839317322, "learning_rate": 1.3519147522679364e-05, "loss": 0.0554, "step": 30804 }, { "epoch": 0.5455422881607318, "grad_norm": 0.5702007412910461, "learning_rate": 1.3518291363481226e-05, "loss": 0.0767, "step": 30805 }, { "epoch": 0.5455599976977602, "grad_norm": 0.7725769281387329, "learning_rate": 1.3517435209157752e-05, "loss": 0.0747, "step": 30806 }, { "epoch": 0.5455777072347886, "grad_norm": 0.673671543598175, "learning_rate": 1.3516579059711767e-05, "loss": 0.0646, "step": 30807 }, { "epoch": 0.545595416771817, "grad_norm": 0.7298747301101685, "learning_rate": 1.3515722915146089e-05, "loss": 0.086, "step": 30808 }, { "epoch": 0.5456131263088455, "grad_norm": 0.8402894139289856, "learning_rate": 1.3514866775463529e-05, "loss": 0.0824, "step": 30809 }, { "epoch": 0.5456308358458739, "grad_norm": 0.4205757677555084, "learning_rate": 1.3514010640666909e-05, "loss": 0.0871, "step": 30810 }, { "epoch": 0.5456485453829023, "grad_norm": 0.4072173535823822, "learning_rate": 1.3513154510759041e-05, "loss": 0.0671, "step": 30811 }, { "epoch": 0.5456662549199307, "grad_norm": 0.4868699908256531, "learning_rate": 1.351229838574275e-05, "loss": 0.0402, "step": 30812 }, { "epoch": 0.5456839644569592, "grad_norm": 0.3095068335533142, "learning_rate": 1.3511442265620842e-05, "loss": 0.0495, "step": 30813 }, { "epoch": 0.5457016739939876, "grad_norm": 0.7412969470024109, "learning_rate": 1.351058615039614e-05, "loss": 0.0562, "step": 30814 }, { "epoch": 0.545719383531016, "grad_norm": 0.44653329253196716, "learning_rate": 1.3509730040071464e-05, "loss": 0.0745, "step": 30815 }, { "epoch": 0.5457370930680445, "grad_norm": 0.9363341331481934, "learning_rate": 1.350887393464962e-05, "loss": 0.0815, "step": 30816 }, { "epoch": 0.5457548026050729, "grad_norm": 0.42798060178756714, "learning_rate": 1.3508017834133428e-05, "loss": 0.0659, "step": 30817 }, { "epoch": 0.5457725121421013, "grad_norm": 0.7526681423187256, "learning_rate": 1.3507161738525706e-05, "loss": 0.0553, "step": 30818 }, { "epoch": 0.5457902216791297, "grad_norm": 0.5442955493927002, "learning_rate": 1.3506305647829279e-05, "loss": 0.0485, "step": 30819 }, { "epoch": 0.5458079312161582, "grad_norm": 0.5230451226234436, "learning_rate": 1.3505449562046947e-05, "loss": 0.0717, "step": 30820 }, { "epoch": 0.5458256407531866, "grad_norm": 0.316387802362442, "learning_rate": 1.3504593481181537e-05, "loss": 0.0549, "step": 30821 }, { "epoch": 0.545843350290215, "grad_norm": 0.5769056677818298, "learning_rate": 1.3503737405235867e-05, "loss": 0.0734, "step": 30822 }, { "epoch": 0.5458610598272434, "grad_norm": 0.5182129740715027, "learning_rate": 1.3502881334212745e-05, "loss": 0.0597, "step": 30823 }, { "epoch": 0.5458787693642719, "grad_norm": 0.6214693188667297, "learning_rate": 1.3502025268114992e-05, "loss": 0.0876, "step": 30824 }, { "epoch": 0.5458964789013003, "grad_norm": 1.0913901329040527, "learning_rate": 1.3501169206945425e-05, "loss": 0.0944, "step": 30825 }, { "epoch": 0.5459141884383287, "grad_norm": 0.8176056146621704, "learning_rate": 1.3500313150706864e-05, "loss": 0.0589, "step": 30826 }, { "epoch": 0.5459318979753571, "grad_norm": 1.3774062395095825, "learning_rate": 1.3499457099402116e-05, "loss": 0.0862, "step": 30827 }, { "epoch": 0.5459496075123856, "grad_norm": 0.7988319396972656, "learning_rate": 1.3498601053034003e-05, "loss": 0.0812, "step": 30828 }, { "epoch": 0.545967317049414, "grad_norm": 0.5861194729804993, "learning_rate": 1.3497745011605347e-05, "loss": 0.0673, "step": 30829 }, { "epoch": 0.5459850265864424, "grad_norm": 0.3628455698490143, "learning_rate": 1.3496888975118952e-05, "loss": 0.0564, "step": 30830 }, { "epoch": 0.5460027361234709, "grad_norm": 0.5059532523155212, "learning_rate": 1.349603294357764e-05, "loss": 0.0476, "step": 30831 }, { "epoch": 0.5460204456604993, "grad_norm": 0.35354626178741455, "learning_rate": 1.3495176916984223e-05, "loss": 0.07, "step": 30832 }, { "epoch": 0.5460381551975277, "grad_norm": 0.8293258547782898, "learning_rate": 1.3494320895341534e-05, "loss": 0.0878, "step": 30833 }, { "epoch": 0.5460558647345561, "grad_norm": 0.7117290496826172, "learning_rate": 1.3493464878652369e-05, "loss": 0.0589, "step": 30834 }, { "epoch": 0.5460735742715846, "grad_norm": 0.41477102041244507, "learning_rate": 1.3492608866919552e-05, "loss": 0.0655, "step": 30835 }, { "epoch": 0.546091283808613, "grad_norm": 0.43151721358299255, "learning_rate": 1.34917528601459e-05, "loss": 0.0476, "step": 30836 }, { "epoch": 0.5461089933456414, "grad_norm": 0.673876941204071, "learning_rate": 1.3490896858334233e-05, "loss": 0.0717, "step": 30837 }, { "epoch": 0.5461267028826698, "grad_norm": 0.45803695917129517, "learning_rate": 1.3490040861487355e-05, "loss": 0.0546, "step": 30838 }, { "epoch": 0.5461444124196984, "grad_norm": 0.4437621831893921, "learning_rate": 1.3489184869608094e-05, "loss": 0.0544, "step": 30839 }, { "epoch": 0.5461621219567268, "grad_norm": 0.8373185992240906, "learning_rate": 1.3488328882699263e-05, "loss": 0.0683, "step": 30840 }, { "epoch": 0.5461798314937552, "grad_norm": 0.7498213052749634, "learning_rate": 1.3487472900763675e-05, "loss": 0.0588, "step": 30841 }, { "epoch": 0.5461975410307836, "grad_norm": 0.9157915115356445, "learning_rate": 1.3486616923804147e-05, "loss": 0.0883, "step": 30842 }, { "epoch": 0.5462152505678121, "grad_norm": 0.44844794273376465, "learning_rate": 1.3485760951823499e-05, "loss": 0.057, "step": 30843 }, { "epoch": 0.5462329601048405, "grad_norm": 0.5507733821868896, "learning_rate": 1.3484904984824548e-05, "loss": 0.0579, "step": 30844 }, { "epoch": 0.5462506696418689, "grad_norm": 0.3131479024887085, "learning_rate": 1.34840490228101e-05, "loss": 0.0727, "step": 30845 }, { "epoch": 0.5462683791788974, "grad_norm": 0.6252608895301819, "learning_rate": 1.348319306578298e-05, "loss": 0.0951, "step": 30846 }, { "epoch": 0.5462860887159258, "grad_norm": 0.4770011901855469, "learning_rate": 1.3482337113746002e-05, "loss": 0.0495, "step": 30847 }, { "epoch": 0.5463037982529542, "grad_norm": 0.702392041683197, "learning_rate": 1.348148116670198e-05, "loss": 0.1032, "step": 30848 }, { "epoch": 0.5463215077899826, "grad_norm": 0.6377902030944824, "learning_rate": 1.348062522465373e-05, "loss": 0.0752, "step": 30849 }, { "epoch": 0.5463392173270111, "grad_norm": 0.6123652458190918, "learning_rate": 1.347976928760407e-05, "loss": 0.0498, "step": 30850 }, { "epoch": 0.5463569268640395, "grad_norm": 0.5179934501647949, "learning_rate": 1.3478913355555816e-05, "loss": 0.1078, "step": 30851 }, { "epoch": 0.5463746364010679, "grad_norm": 0.9169039726257324, "learning_rate": 1.3478057428511784e-05, "loss": 0.1006, "step": 30852 }, { "epoch": 0.5463923459380963, "grad_norm": 0.6934994459152222, "learning_rate": 1.3477201506474787e-05, "loss": 0.0913, "step": 30853 }, { "epoch": 0.5464100554751248, "grad_norm": 0.484800785779953, "learning_rate": 1.3476345589447651e-05, "loss": 0.0488, "step": 30854 }, { "epoch": 0.5464277650121532, "grad_norm": 0.7321929931640625, "learning_rate": 1.3475489677433172e-05, "loss": 0.0733, "step": 30855 }, { "epoch": 0.5464454745491816, "grad_norm": 0.5539242029190063, "learning_rate": 1.3474633770434182e-05, "loss": 0.0619, "step": 30856 }, { "epoch": 0.54646318408621, "grad_norm": 0.45209047198295593, "learning_rate": 1.3473777868453492e-05, "loss": 0.051, "step": 30857 }, { "epoch": 0.5464808936232385, "grad_norm": 0.8613141775131226, "learning_rate": 1.3472921971493926e-05, "loss": 0.0874, "step": 30858 }, { "epoch": 0.5464986031602669, "grad_norm": 1.761705756187439, "learning_rate": 1.3472066079558286e-05, "loss": 0.101, "step": 30859 }, { "epoch": 0.5465163126972953, "grad_norm": 0.33492550253868103, "learning_rate": 1.3471210192649393e-05, "loss": 0.049, "step": 30860 }, { "epoch": 0.5465340222343238, "grad_norm": 0.6190668940544128, "learning_rate": 1.3470354310770068e-05, "loss": 0.0723, "step": 30861 }, { "epoch": 0.5465517317713522, "grad_norm": 0.4384116232395172, "learning_rate": 1.3469498433923117e-05, "loss": 0.0833, "step": 30862 }, { "epoch": 0.5465694413083806, "grad_norm": 0.5255187749862671, "learning_rate": 1.3468642562111365e-05, "loss": 0.0716, "step": 30863 }, { "epoch": 0.546587150845409, "grad_norm": 0.49184495210647583, "learning_rate": 1.346778669533762e-05, "loss": 0.0537, "step": 30864 }, { "epoch": 0.5466048603824375, "grad_norm": 0.6474063992500305, "learning_rate": 1.346693083360471e-05, "loss": 0.0567, "step": 30865 }, { "epoch": 0.5466225699194659, "grad_norm": 0.36378517746925354, "learning_rate": 1.3466074976915436e-05, "loss": 0.073, "step": 30866 }, { "epoch": 0.5466402794564943, "grad_norm": 0.8137747645378113, "learning_rate": 1.346521912527262e-05, "loss": 0.0734, "step": 30867 }, { "epoch": 0.5466579889935227, "grad_norm": 0.4961371421813965, "learning_rate": 1.3464363278679084e-05, "loss": 0.0952, "step": 30868 }, { "epoch": 0.5466756985305512, "grad_norm": 0.7453979849815369, "learning_rate": 1.346350743713763e-05, "loss": 0.0792, "step": 30869 }, { "epoch": 0.5466934080675796, "grad_norm": 0.5525342226028442, "learning_rate": 1.3462651600651083e-05, "loss": 0.0644, "step": 30870 }, { "epoch": 0.546711117604608, "grad_norm": 1.1347826719284058, "learning_rate": 1.3461795769222258e-05, "loss": 0.0935, "step": 30871 }, { "epoch": 0.5467288271416364, "grad_norm": 0.7115172743797302, "learning_rate": 1.3460939942853973e-05, "loss": 0.0431, "step": 30872 }, { "epoch": 0.5467465366786649, "grad_norm": 0.5623276233673096, "learning_rate": 1.3460084121549036e-05, "loss": 0.0741, "step": 30873 }, { "epoch": 0.5467642462156933, "grad_norm": 0.9718701243400574, "learning_rate": 1.3459228305310266e-05, "loss": 0.0984, "step": 30874 }, { "epoch": 0.5467819557527217, "grad_norm": 0.48161056637763977, "learning_rate": 1.345837249414048e-05, "loss": 0.0604, "step": 30875 }, { "epoch": 0.5467996652897502, "grad_norm": 0.757290244102478, "learning_rate": 1.3457516688042493e-05, "loss": 0.0608, "step": 30876 }, { "epoch": 0.5468173748267786, "grad_norm": 0.6868053078651428, "learning_rate": 1.345666088701912e-05, "loss": 0.0658, "step": 30877 }, { "epoch": 0.546835084363807, "grad_norm": 0.4043591022491455, "learning_rate": 1.3455805091073173e-05, "loss": 0.0676, "step": 30878 }, { "epoch": 0.5468527939008354, "grad_norm": 0.7870240211486816, "learning_rate": 1.3454949300207477e-05, "loss": 0.076, "step": 30879 }, { "epoch": 0.5468705034378639, "grad_norm": 0.624582827091217, "learning_rate": 1.3454093514424838e-05, "loss": 0.0579, "step": 30880 }, { "epoch": 0.5468882129748923, "grad_norm": 0.7223733067512512, "learning_rate": 1.3453237733728074e-05, "loss": 0.0645, "step": 30881 }, { "epoch": 0.5469059225119207, "grad_norm": 0.5509833693504333, "learning_rate": 1.3452381958120002e-05, "loss": 0.0549, "step": 30882 }, { "epoch": 0.5469236320489491, "grad_norm": 0.6978431344032288, "learning_rate": 1.3451526187603443e-05, "loss": 0.0723, "step": 30883 }, { "epoch": 0.5469413415859776, "grad_norm": 0.485643208026886, "learning_rate": 1.3450670422181197e-05, "loss": 0.0527, "step": 30884 }, { "epoch": 0.546959051123006, "grad_norm": 0.6041207313537598, "learning_rate": 1.3449814661856093e-05, "loss": 0.0959, "step": 30885 }, { "epoch": 0.5469767606600344, "grad_norm": 0.6359114050865173, "learning_rate": 1.3448958906630946e-05, "loss": 0.0493, "step": 30886 }, { "epoch": 0.5469944701970628, "grad_norm": 0.9311938285827637, "learning_rate": 1.3448103156508562e-05, "loss": 0.0804, "step": 30887 }, { "epoch": 0.5470121797340913, "grad_norm": 0.7355273962020874, "learning_rate": 1.3447247411491763e-05, "loss": 0.0859, "step": 30888 }, { "epoch": 0.5470298892711197, "grad_norm": 1.0760653018951416, "learning_rate": 1.344639167158336e-05, "loss": 0.0823, "step": 30889 }, { "epoch": 0.5470475988081481, "grad_norm": 0.5937033891677856, "learning_rate": 1.3445535936786176e-05, "loss": 0.0495, "step": 30890 }, { "epoch": 0.5470653083451766, "grad_norm": 0.3772096335887909, "learning_rate": 1.3444680207103019e-05, "loss": 0.0668, "step": 30891 }, { "epoch": 0.547083017882205, "grad_norm": 0.48820438981056213, "learning_rate": 1.3443824482536705e-05, "loss": 0.0551, "step": 30892 }, { "epoch": 0.5471007274192334, "grad_norm": 0.7580453157424927, "learning_rate": 1.3442968763090057e-05, "loss": 0.0651, "step": 30893 }, { "epoch": 0.5471184369562618, "grad_norm": 0.7869372963905334, "learning_rate": 1.3442113048765879e-05, "loss": 0.0904, "step": 30894 }, { "epoch": 0.5471361464932903, "grad_norm": 0.39869439601898193, "learning_rate": 1.3441257339566991e-05, "loss": 0.0549, "step": 30895 }, { "epoch": 0.5471538560303187, "grad_norm": 0.6381229758262634, "learning_rate": 1.344040163549621e-05, "loss": 0.0537, "step": 30896 }, { "epoch": 0.5471715655673471, "grad_norm": 0.401569664478302, "learning_rate": 1.3439545936556356e-05, "loss": 0.0524, "step": 30897 }, { "epoch": 0.5471892751043755, "grad_norm": 0.687166154384613, "learning_rate": 1.3438690242750229e-05, "loss": 0.0659, "step": 30898 }, { "epoch": 0.547206984641404, "grad_norm": 1.1089181900024414, "learning_rate": 1.3437834554080655e-05, "loss": 0.1229, "step": 30899 }, { "epoch": 0.5472246941784324, "grad_norm": 0.8353850841522217, "learning_rate": 1.3436978870550455e-05, "loss": 0.0896, "step": 30900 }, { "epoch": 0.5472424037154608, "grad_norm": 0.8456692099571228, "learning_rate": 1.3436123192162429e-05, "loss": 0.0989, "step": 30901 }, { "epoch": 0.5472601132524892, "grad_norm": 0.7223150134086609, "learning_rate": 1.34352675189194e-05, "loss": 0.0572, "step": 30902 }, { "epoch": 0.5472778227895178, "grad_norm": 0.8364695906639099, "learning_rate": 1.3434411850824182e-05, "loss": 0.0956, "step": 30903 }, { "epoch": 0.5472955323265462, "grad_norm": 0.9398266077041626, "learning_rate": 1.3433556187879595e-05, "loss": 0.0673, "step": 30904 }, { "epoch": 0.5473132418635746, "grad_norm": 0.4144355356693268, "learning_rate": 1.3432700530088446e-05, "loss": 0.0554, "step": 30905 }, { "epoch": 0.5473309514006031, "grad_norm": 0.5590170621871948, "learning_rate": 1.3431844877453554e-05, "loss": 0.0722, "step": 30906 }, { "epoch": 0.5473486609376315, "grad_norm": 0.4952831268310547, "learning_rate": 1.3430989229977733e-05, "loss": 0.0558, "step": 30907 }, { "epoch": 0.5473663704746599, "grad_norm": 0.4919086992740631, "learning_rate": 1.3430133587663802e-05, "loss": 0.0392, "step": 30908 }, { "epoch": 0.5473840800116883, "grad_norm": 1.0200506448745728, "learning_rate": 1.3429277950514572e-05, "loss": 0.0829, "step": 30909 }, { "epoch": 0.5474017895487168, "grad_norm": 0.8480714559555054, "learning_rate": 1.3428422318532854e-05, "loss": 0.086, "step": 30910 }, { "epoch": 0.5474194990857452, "grad_norm": 0.5818111300468445, "learning_rate": 1.342756669172148e-05, "loss": 0.0925, "step": 30911 }, { "epoch": 0.5474372086227736, "grad_norm": 0.5093564391136169, "learning_rate": 1.342671107008324e-05, "loss": 0.0456, "step": 30912 }, { "epoch": 0.547454918159802, "grad_norm": 0.6284371018409729, "learning_rate": 1.3425855453620961e-05, "loss": 0.0543, "step": 30913 }, { "epoch": 0.5474726276968305, "grad_norm": 0.6144139766693115, "learning_rate": 1.3424999842337462e-05, "loss": 0.0785, "step": 30914 }, { "epoch": 0.5474903372338589, "grad_norm": 0.808383047580719, "learning_rate": 1.342414423623556e-05, "loss": 0.0877, "step": 30915 }, { "epoch": 0.5475080467708873, "grad_norm": 0.6814347505569458, "learning_rate": 1.342328863531806e-05, "loss": 0.0635, "step": 30916 }, { "epoch": 0.5475257563079157, "grad_norm": 0.6185818910598755, "learning_rate": 1.3422433039587778e-05, "loss": 0.088, "step": 30917 }, { "epoch": 0.5475434658449442, "grad_norm": 0.556316077709198, "learning_rate": 1.3421577449047535e-05, "loss": 0.0811, "step": 30918 }, { "epoch": 0.5475611753819726, "grad_norm": 0.8522481918334961, "learning_rate": 1.3420721863700139e-05, "loss": 0.0621, "step": 30919 }, { "epoch": 0.547578884919001, "grad_norm": 0.6792196035385132, "learning_rate": 1.341986628354841e-05, "loss": 0.0676, "step": 30920 }, { "epoch": 0.5475965944560295, "grad_norm": 0.3926471769809723, "learning_rate": 1.341901070859516e-05, "loss": 0.0602, "step": 30921 }, { "epoch": 0.5476143039930579, "grad_norm": 0.5301864147186279, "learning_rate": 1.341815513884321e-05, "loss": 0.07, "step": 30922 }, { "epoch": 0.5476320135300863, "grad_norm": 0.4787618815898895, "learning_rate": 1.3417299574295364e-05, "loss": 0.0876, "step": 30923 }, { "epoch": 0.5476497230671147, "grad_norm": 0.5745658874511719, "learning_rate": 1.3416444014954444e-05, "loss": 0.0469, "step": 30924 }, { "epoch": 0.5476674326041432, "grad_norm": 0.44103044271469116, "learning_rate": 1.3415588460823269e-05, "loss": 0.0612, "step": 30925 }, { "epoch": 0.5476851421411716, "grad_norm": 0.8301287889480591, "learning_rate": 1.3414732911904639e-05, "loss": 0.0918, "step": 30926 }, { "epoch": 0.5477028516782, "grad_norm": 0.7598839402198792, "learning_rate": 1.3413877368201378e-05, "loss": 0.09, "step": 30927 }, { "epoch": 0.5477205612152284, "grad_norm": 0.9054822325706482, "learning_rate": 1.34130218297163e-05, "loss": 0.0641, "step": 30928 }, { "epoch": 0.5477382707522569, "grad_norm": 0.7196367979049683, "learning_rate": 1.3412166296452228e-05, "loss": 0.0818, "step": 30929 }, { "epoch": 0.5477559802892853, "grad_norm": 0.7948513627052307, "learning_rate": 1.3411310768411961e-05, "loss": 0.0786, "step": 30930 }, { "epoch": 0.5477736898263137, "grad_norm": 0.7462849617004395, "learning_rate": 1.3410455245598321e-05, "loss": 0.0863, "step": 30931 }, { "epoch": 0.5477913993633421, "grad_norm": 0.6790282726287842, "learning_rate": 1.3409599728014127e-05, "loss": 0.1023, "step": 30932 }, { "epoch": 0.5478091089003706, "grad_norm": 0.6491605639457703, "learning_rate": 1.3408744215662186e-05, "loss": 0.0841, "step": 30933 }, { "epoch": 0.547826818437399, "grad_norm": 0.5955706834793091, "learning_rate": 1.3407888708545311e-05, "loss": 0.0741, "step": 30934 }, { "epoch": 0.5478445279744274, "grad_norm": 0.5559658408164978, "learning_rate": 1.3407033206666326e-05, "loss": 0.0669, "step": 30935 }, { "epoch": 0.5478622375114559, "grad_norm": 0.4270778000354767, "learning_rate": 1.3406177710028042e-05, "loss": 0.0701, "step": 30936 }, { "epoch": 0.5478799470484843, "grad_norm": 0.6148866415023804, "learning_rate": 1.340532221863327e-05, "loss": 0.0829, "step": 30937 }, { "epoch": 0.5478976565855127, "grad_norm": 0.6506413221359253, "learning_rate": 1.3404466732484825e-05, "loss": 0.0426, "step": 30938 }, { "epoch": 0.5479153661225411, "grad_norm": 0.5001291036605835, "learning_rate": 1.340361125158553e-05, "loss": 0.0631, "step": 30939 }, { "epoch": 0.5479330756595696, "grad_norm": 0.588507354259491, "learning_rate": 1.3402755775938187e-05, "loss": 0.053, "step": 30940 }, { "epoch": 0.547950785196598, "grad_norm": 0.40338069200515747, "learning_rate": 1.3401900305545616e-05, "loss": 0.052, "step": 30941 }, { "epoch": 0.5479684947336264, "grad_norm": 0.4490584433078766, "learning_rate": 1.3401044840410628e-05, "loss": 0.0482, "step": 30942 }, { "epoch": 0.5479862042706548, "grad_norm": 0.6216120719909668, "learning_rate": 1.3400189380536049e-05, "loss": 0.0522, "step": 30943 }, { "epoch": 0.5480039138076833, "grad_norm": 0.5173248648643494, "learning_rate": 1.339933392592468e-05, "loss": 0.0663, "step": 30944 }, { "epoch": 0.5480216233447117, "grad_norm": 0.5977834463119507, "learning_rate": 1.339847847657934e-05, "loss": 0.0367, "step": 30945 }, { "epoch": 0.5480393328817401, "grad_norm": 0.7116206884384155, "learning_rate": 1.3397623032502844e-05, "loss": 0.0851, "step": 30946 }, { "epoch": 0.5480570424187685, "grad_norm": 0.6590853929519653, "learning_rate": 1.3396767593698008e-05, "loss": 0.0504, "step": 30947 }, { "epoch": 0.548074751955797, "grad_norm": 0.680942714214325, "learning_rate": 1.3395912160167643e-05, "loss": 0.0662, "step": 30948 }, { "epoch": 0.5480924614928254, "grad_norm": 0.3096373677253723, "learning_rate": 1.3395056731914566e-05, "loss": 0.0613, "step": 30949 }, { "epoch": 0.5481101710298538, "grad_norm": 0.3487984240055084, "learning_rate": 1.339420130894159e-05, "loss": 0.0532, "step": 30950 }, { "epoch": 0.5481278805668823, "grad_norm": 0.6897775530815125, "learning_rate": 1.3393345891251529e-05, "loss": 0.0434, "step": 30951 }, { "epoch": 0.5481455901039107, "grad_norm": 0.460610568523407, "learning_rate": 1.3392490478847195e-05, "loss": 0.07, "step": 30952 }, { "epoch": 0.5481632996409391, "grad_norm": 0.6345202922821045, "learning_rate": 1.3391635071731405e-05, "loss": 0.0574, "step": 30953 }, { "epoch": 0.5481810091779675, "grad_norm": 0.44128572940826416, "learning_rate": 1.339077966990698e-05, "loss": 0.0664, "step": 30954 }, { "epoch": 0.548198718714996, "grad_norm": 0.7010590434074402, "learning_rate": 1.3389924273376722e-05, "loss": 0.0558, "step": 30955 }, { "epoch": 0.5482164282520244, "grad_norm": 0.805575966835022, "learning_rate": 1.3389068882143445e-05, "loss": 0.083, "step": 30956 }, { "epoch": 0.5482341377890528, "grad_norm": 0.4865102469921112, "learning_rate": 1.3388213496209978e-05, "loss": 0.0605, "step": 30957 }, { "epoch": 0.5482518473260812, "grad_norm": 0.13618843257427216, "learning_rate": 1.3387358115579123e-05, "loss": 0.0532, "step": 30958 }, { "epoch": 0.5482695568631097, "grad_norm": 0.9260938763618469, "learning_rate": 1.3386502740253692e-05, "loss": 0.053, "step": 30959 }, { "epoch": 0.5482872664001381, "grad_norm": 0.529201090335846, "learning_rate": 1.3385647370236505e-05, "loss": 0.0672, "step": 30960 }, { "epoch": 0.5483049759371665, "grad_norm": 1.0270004272460938, "learning_rate": 1.338479200553038e-05, "loss": 0.064, "step": 30961 }, { "epoch": 0.5483226854741949, "grad_norm": 0.8027679324150085, "learning_rate": 1.3383936646138122e-05, "loss": 0.0718, "step": 30962 }, { "epoch": 0.5483403950112234, "grad_norm": 0.5440306663513184, "learning_rate": 1.3383081292062548e-05, "loss": 0.0535, "step": 30963 }, { "epoch": 0.5483581045482518, "grad_norm": 0.6154869198799133, "learning_rate": 1.3382225943306477e-05, "loss": 0.064, "step": 30964 }, { "epoch": 0.5483758140852802, "grad_norm": 0.4754961133003235, "learning_rate": 1.3381370599872716e-05, "loss": 0.0839, "step": 30965 }, { "epoch": 0.5483935236223088, "grad_norm": 0.7881823182106018, "learning_rate": 1.3380515261764082e-05, "loss": 0.0678, "step": 30966 }, { "epoch": 0.5484112331593372, "grad_norm": 0.749421238899231, "learning_rate": 1.3379659928983388e-05, "loss": 0.0416, "step": 30967 }, { "epoch": 0.5484289426963656, "grad_norm": 0.5507725477218628, "learning_rate": 1.3378804601533455e-05, "loss": 0.069, "step": 30968 }, { "epoch": 0.548446652233394, "grad_norm": 0.5899897217750549, "learning_rate": 1.3377949279417086e-05, "loss": 0.0705, "step": 30969 }, { "epoch": 0.5484643617704225, "grad_norm": 0.5457793474197388, "learning_rate": 1.3377093962637098e-05, "loss": 0.1054, "step": 30970 }, { "epoch": 0.5484820713074509, "grad_norm": 0.569095253944397, "learning_rate": 1.337623865119631e-05, "loss": 0.0654, "step": 30971 }, { "epoch": 0.5484997808444793, "grad_norm": 0.9632067084312439, "learning_rate": 1.3375383345097532e-05, "loss": 0.1153, "step": 30972 }, { "epoch": 0.5485174903815077, "grad_norm": 1.0550881624221802, "learning_rate": 1.3374528044343576e-05, "loss": 0.085, "step": 30973 }, { "epoch": 0.5485351999185362, "grad_norm": 0.7115805149078369, "learning_rate": 1.3373672748937259e-05, "loss": 0.0942, "step": 30974 }, { "epoch": 0.5485529094555646, "grad_norm": 0.8142768144607544, "learning_rate": 1.3372817458881398e-05, "loss": 0.073, "step": 30975 }, { "epoch": 0.548570618992593, "grad_norm": 0.7868286967277527, "learning_rate": 1.33719621741788e-05, "loss": 0.0923, "step": 30976 }, { "epoch": 0.5485883285296214, "grad_norm": 0.5154991149902344, "learning_rate": 1.3371106894832281e-05, "loss": 0.0611, "step": 30977 }, { "epoch": 0.5486060380666499, "grad_norm": 0.22605881094932556, "learning_rate": 1.3370251620844654e-05, "loss": 0.0682, "step": 30978 }, { "epoch": 0.5486237476036783, "grad_norm": 0.46818336844444275, "learning_rate": 1.3369396352218741e-05, "loss": 0.0786, "step": 30979 }, { "epoch": 0.5486414571407067, "grad_norm": 0.906317949295044, "learning_rate": 1.3368541088957343e-05, "loss": 0.073, "step": 30980 }, { "epoch": 0.5486591666777352, "grad_norm": 0.5293228626251221, "learning_rate": 1.3367685831063283e-05, "loss": 0.0535, "step": 30981 }, { "epoch": 0.5486768762147636, "grad_norm": 0.6002565622329712, "learning_rate": 1.3366830578539374e-05, "loss": 0.0477, "step": 30982 }, { "epoch": 0.548694585751792, "grad_norm": 0.4409104287624359, "learning_rate": 1.3365975331388424e-05, "loss": 0.0491, "step": 30983 }, { "epoch": 0.5487122952888204, "grad_norm": 0.7192304134368896, "learning_rate": 1.3365120089613248e-05, "loss": 0.051, "step": 30984 }, { "epoch": 0.5487300048258489, "grad_norm": 0.7201111316680908, "learning_rate": 1.3364264853216659e-05, "loss": 0.0474, "step": 30985 }, { "epoch": 0.5487477143628773, "grad_norm": 0.5461027026176453, "learning_rate": 1.3363409622201483e-05, "loss": 0.0542, "step": 30986 }, { "epoch": 0.5487654238999057, "grad_norm": 0.8674190640449524, "learning_rate": 1.3362554396570516e-05, "loss": 0.0697, "step": 30987 }, { "epoch": 0.5487831334369341, "grad_norm": 0.5485538840293884, "learning_rate": 1.3361699176326581e-05, "loss": 0.0455, "step": 30988 }, { "epoch": 0.5488008429739626, "grad_norm": 0.4752744436264038, "learning_rate": 1.3360843961472494e-05, "loss": 0.0468, "step": 30989 }, { "epoch": 0.548818552510991, "grad_norm": 0.509800910949707, "learning_rate": 1.335998875201106e-05, "loss": 0.0634, "step": 30990 }, { "epoch": 0.5488362620480194, "grad_norm": 0.6418960094451904, "learning_rate": 1.3359133547945098e-05, "loss": 0.0763, "step": 30991 }, { "epoch": 0.5488539715850478, "grad_norm": 0.7035183906555176, "learning_rate": 1.335827834927742e-05, "loss": 0.0976, "step": 30992 }, { "epoch": 0.5488716811220763, "grad_norm": 0.559123158454895, "learning_rate": 1.3357423156010841e-05, "loss": 0.0665, "step": 30993 }, { "epoch": 0.5488893906591047, "grad_norm": 1.1187480688095093, "learning_rate": 1.3356567968148172e-05, "loss": 0.0826, "step": 30994 }, { "epoch": 0.5489071001961331, "grad_norm": 0.4564201235771179, "learning_rate": 1.335571278569223e-05, "loss": 0.0687, "step": 30995 }, { "epoch": 0.5489248097331616, "grad_norm": 0.5277755856513977, "learning_rate": 1.3354857608645833e-05, "loss": 0.0538, "step": 30996 }, { "epoch": 0.54894251927019, "grad_norm": 0.42552465200424194, "learning_rate": 1.335400243701178e-05, "loss": 0.0781, "step": 30997 }, { "epoch": 0.5489602288072184, "grad_norm": 0.7103613615036011, "learning_rate": 1.3353147270792892e-05, "loss": 0.0659, "step": 30998 }, { "epoch": 0.5489779383442468, "grad_norm": 0.877417266368866, "learning_rate": 1.3352292109991986e-05, "loss": 0.0651, "step": 30999 }, { "epoch": 0.5489956478812753, "grad_norm": 0.5633000731468201, "learning_rate": 1.3351436954611872e-05, "loss": 0.0677, "step": 31000 }, { "epoch": 0.5490133574183037, "grad_norm": 0.5918171405792236, "learning_rate": 1.3350581804655362e-05, "loss": 0.0721, "step": 31001 }, { "epoch": 0.5490310669553321, "grad_norm": 0.7464267611503601, "learning_rate": 1.3349726660125271e-05, "loss": 0.079, "step": 31002 }, { "epoch": 0.5490487764923605, "grad_norm": 0.3511817753314972, "learning_rate": 1.3348871521024415e-05, "loss": 0.0569, "step": 31003 }, { "epoch": 0.549066486029389, "grad_norm": 0.9749866724014282, "learning_rate": 1.3348016387355602e-05, "loss": 0.092, "step": 31004 }, { "epoch": 0.5490841955664174, "grad_norm": 0.5510485768318176, "learning_rate": 1.334716125912165e-05, "loss": 0.0361, "step": 31005 }, { "epoch": 0.5491019051034458, "grad_norm": 0.46686914563179016, "learning_rate": 1.3346306136325368e-05, "loss": 0.1045, "step": 31006 }, { "epoch": 0.5491196146404742, "grad_norm": 0.40242913365364075, "learning_rate": 1.3345451018969578e-05, "loss": 0.07, "step": 31007 }, { "epoch": 0.5491373241775027, "grad_norm": 0.6202008724212646, "learning_rate": 1.3344595907057077e-05, "loss": 0.0704, "step": 31008 }, { "epoch": 0.5491550337145311, "grad_norm": 0.586672306060791, "learning_rate": 1.3343740800590692e-05, "loss": 0.0445, "step": 31009 }, { "epoch": 0.5491727432515595, "grad_norm": 0.6756633520126343, "learning_rate": 1.3342885699573233e-05, "loss": 0.0644, "step": 31010 }, { "epoch": 0.549190452788588, "grad_norm": 0.90128493309021, "learning_rate": 1.3342030604007518e-05, "loss": 0.1064, "step": 31011 }, { "epoch": 0.5492081623256164, "grad_norm": 0.7224963903427124, "learning_rate": 1.3341175513896346e-05, "loss": 0.0898, "step": 31012 }, { "epoch": 0.5492258718626448, "grad_norm": 0.7619264125823975, "learning_rate": 1.3340320429242543e-05, "loss": 0.0689, "step": 31013 }, { "epoch": 0.5492435813996732, "grad_norm": 0.6325697302818298, "learning_rate": 1.3339465350048918e-05, "loss": 0.0646, "step": 31014 }, { "epoch": 0.5492612909367017, "grad_norm": 0.5931451916694641, "learning_rate": 1.3338610276318282e-05, "loss": 0.0755, "step": 31015 }, { "epoch": 0.5492790004737301, "grad_norm": 0.4840798079967499, "learning_rate": 1.333775520805345e-05, "loss": 0.1025, "step": 31016 }, { "epoch": 0.5492967100107585, "grad_norm": 0.6473061442375183, "learning_rate": 1.3336900145257234e-05, "loss": 0.063, "step": 31017 }, { "epoch": 0.5493144195477869, "grad_norm": 0.5501394867897034, "learning_rate": 1.3336045087932452e-05, "loss": 0.0933, "step": 31018 }, { "epoch": 0.5493321290848154, "grad_norm": 0.9244354963302612, "learning_rate": 1.3335190036081913e-05, "loss": 0.0848, "step": 31019 }, { "epoch": 0.5493498386218438, "grad_norm": 0.7404205799102783, "learning_rate": 1.3334334989708429e-05, "loss": 0.1032, "step": 31020 }, { "epoch": 0.5493675481588722, "grad_norm": 0.5672847628593445, "learning_rate": 1.3333479948814818e-05, "loss": 0.0646, "step": 31021 }, { "epoch": 0.5493852576959006, "grad_norm": 0.39583179354667664, "learning_rate": 1.3332624913403883e-05, "loss": 0.0753, "step": 31022 }, { "epoch": 0.5494029672329291, "grad_norm": 0.676813006401062, "learning_rate": 1.3331769883478444e-05, "loss": 0.0855, "step": 31023 }, { "epoch": 0.5494206767699575, "grad_norm": 0.3391912877559662, "learning_rate": 1.3330914859041316e-05, "loss": 0.0449, "step": 31024 }, { "epoch": 0.5494383863069859, "grad_norm": 0.599411129951477, "learning_rate": 1.3330059840095315e-05, "loss": 0.0508, "step": 31025 }, { "epoch": 0.5494560958440144, "grad_norm": 0.7158253788948059, "learning_rate": 1.3329204826643242e-05, "loss": 0.0756, "step": 31026 }, { "epoch": 0.5494738053810428, "grad_norm": 0.5127002596855164, "learning_rate": 1.3328349818687914e-05, "loss": 0.0389, "step": 31027 }, { "epoch": 0.5494915149180712, "grad_norm": 0.4056156277656555, "learning_rate": 1.3327494816232152e-05, "loss": 0.0543, "step": 31028 }, { "epoch": 0.5495092244550996, "grad_norm": 0.6898962259292603, "learning_rate": 1.3326639819278758e-05, "loss": 0.0867, "step": 31029 }, { "epoch": 0.5495269339921282, "grad_norm": 0.8984578847885132, "learning_rate": 1.332578482783055e-05, "loss": 0.0782, "step": 31030 }, { "epoch": 0.5495446435291566, "grad_norm": 0.5300559997558594, "learning_rate": 1.332492984189034e-05, "loss": 0.0711, "step": 31031 }, { "epoch": 0.549562353066185, "grad_norm": 0.6786346435546875, "learning_rate": 1.3324074861460945e-05, "loss": 0.0565, "step": 31032 }, { "epoch": 0.5495800626032133, "grad_norm": 0.9264769554138184, "learning_rate": 1.3323219886545173e-05, "loss": 0.0672, "step": 31033 }, { "epoch": 0.5495977721402419, "grad_norm": 0.3469441831111908, "learning_rate": 1.3322364917145836e-05, "loss": 0.0271, "step": 31034 }, { "epoch": 0.5496154816772703, "grad_norm": 0.8523464798927307, "learning_rate": 1.3321509953265755e-05, "loss": 0.0725, "step": 31035 }, { "epoch": 0.5496331912142987, "grad_norm": 0.5050697922706604, "learning_rate": 1.332065499490773e-05, "loss": 0.0423, "step": 31036 }, { "epoch": 0.549650900751327, "grad_norm": 0.6477720737457275, "learning_rate": 1.3319800042074579e-05, "loss": 0.1121, "step": 31037 }, { "epoch": 0.5496686102883556, "grad_norm": 0.8890103101730347, "learning_rate": 1.3318945094769118e-05, "loss": 0.1017, "step": 31038 }, { "epoch": 0.549686319825384, "grad_norm": 0.6466975808143616, "learning_rate": 1.3318090152994162e-05, "loss": 0.0453, "step": 31039 }, { "epoch": 0.5497040293624124, "grad_norm": 0.6583836674690247, "learning_rate": 1.3317235216752515e-05, "loss": 0.0611, "step": 31040 }, { "epoch": 0.5497217388994409, "grad_norm": 0.72627192735672, "learning_rate": 1.3316380286046994e-05, "loss": 0.0713, "step": 31041 }, { "epoch": 0.5497394484364693, "grad_norm": 0.6828399300575256, "learning_rate": 1.3315525360880413e-05, "loss": 0.0808, "step": 31042 }, { "epoch": 0.5497571579734977, "grad_norm": 0.9677910804748535, "learning_rate": 1.3314670441255582e-05, "loss": 0.072, "step": 31043 }, { "epoch": 0.5497748675105261, "grad_norm": 0.532372236251831, "learning_rate": 1.3313815527175314e-05, "loss": 0.0858, "step": 31044 }, { "epoch": 0.5497925770475546, "grad_norm": 0.7006393074989319, "learning_rate": 1.331296061864242e-05, "loss": 0.0792, "step": 31045 }, { "epoch": 0.549810286584583, "grad_norm": 0.5501739978790283, "learning_rate": 1.3312105715659722e-05, "loss": 0.0623, "step": 31046 }, { "epoch": 0.5498279961216114, "grad_norm": 0.5637642741203308, "learning_rate": 1.3311250818230021e-05, "loss": 0.0953, "step": 31047 }, { "epoch": 0.5498457056586398, "grad_norm": 0.6281655430793762, "learning_rate": 1.3310395926356134e-05, "loss": 0.076, "step": 31048 }, { "epoch": 0.5498634151956683, "grad_norm": 0.64531010389328, "learning_rate": 1.3309541040040874e-05, "loss": 0.0931, "step": 31049 }, { "epoch": 0.5498811247326967, "grad_norm": 0.6313787698745728, "learning_rate": 1.3308686159287058e-05, "loss": 0.0614, "step": 31050 }, { "epoch": 0.5498988342697251, "grad_norm": 0.35040363669395447, "learning_rate": 1.3307831284097484e-05, "loss": 0.0552, "step": 31051 }, { "epoch": 0.5499165438067535, "grad_norm": 0.5272802114486694, "learning_rate": 1.3306976414474979e-05, "loss": 0.0483, "step": 31052 }, { "epoch": 0.549934253343782, "grad_norm": 0.49062228202819824, "learning_rate": 1.3306121550422355e-05, "loss": 0.0679, "step": 31053 }, { "epoch": 0.5499519628808104, "grad_norm": 0.6176832318305969, "learning_rate": 1.3305266691942414e-05, "loss": 0.0517, "step": 31054 }, { "epoch": 0.5499696724178388, "grad_norm": 0.5682175159454346, "learning_rate": 1.3304411839037974e-05, "loss": 0.0445, "step": 31055 }, { "epoch": 0.5499873819548673, "grad_norm": 0.6986664533615112, "learning_rate": 1.3303556991711847e-05, "loss": 0.0936, "step": 31056 }, { "epoch": 0.5500050914918957, "grad_norm": 0.39967212080955505, "learning_rate": 1.330270214996685e-05, "loss": 0.061, "step": 31057 }, { "epoch": 0.5500228010289241, "grad_norm": 0.7828832864761353, "learning_rate": 1.3301847313805787e-05, "loss": 0.0483, "step": 31058 }, { "epoch": 0.5500405105659525, "grad_norm": 0.617362380027771, "learning_rate": 1.3300992483231476e-05, "loss": 0.0805, "step": 31059 }, { "epoch": 0.550058220102981, "grad_norm": 0.7182279825210571, "learning_rate": 1.330013765824673e-05, "loss": 0.1109, "step": 31060 }, { "epoch": 0.5500759296400094, "grad_norm": 0.5029628276824951, "learning_rate": 1.3299282838854357e-05, "loss": 0.0839, "step": 31061 }, { "epoch": 0.5500936391770378, "grad_norm": 0.29203858971595764, "learning_rate": 1.329842802505717e-05, "loss": 0.0738, "step": 31062 }, { "epoch": 0.5501113487140662, "grad_norm": 0.858906626701355, "learning_rate": 1.3297573216857984e-05, "loss": 0.0657, "step": 31063 }, { "epoch": 0.5501290582510947, "grad_norm": 0.6635187268257141, "learning_rate": 1.3296718414259617e-05, "loss": 0.0679, "step": 31064 }, { "epoch": 0.5501467677881231, "grad_norm": 0.45871999859809875, "learning_rate": 1.3295863617264865e-05, "loss": 0.0509, "step": 31065 }, { "epoch": 0.5501644773251515, "grad_norm": 1.2614665031433105, "learning_rate": 1.3295008825876549e-05, "loss": 0.0865, "step": 31066 }, { "epoch": 0.5501821868621799, "grad_norm": 0.44013291597366333, "learning_rate": 1.3294154040097489e-05, "loss": 0.065, "step": 31067 }, { "epoch": 0.5501998963992084, "grad_norm": 0.3605998754501343, "learning_rate": 1.3293299259930484e-05, "loss": 0.0571, "step": 31068 }, { "epoch": 0.5502176059362368, "grad_norm": 0.8546638488769531, "learning_rate": 1.3292444485378353e-05, "loss": 0.0629, "step": 31069 }, { "epoch": 0.5502353154732652, "grad_norm": 0.6730170249938965, "learning_rate": 1.3291589716443904e-05, "loss": 0.0979, "step": 31070 }, { "epoch": 0.5502530250102937, "grad_norm": 0.6909811496734619, "learning_rate": 1.3290734953129957e-05, "loss": 0.0645, "step": 31071 }, { "epoch": 0.5502707345473221, "grad_norm": 0.49322688579559326, "learning_rate": 1.3289880195439317e-05, "loss": 0.0663, "step": 31072 }, { "epoch": 0.5502884440843505, "grad_norm": 0.6813052892684937, "learning_rate": 1.3289025443374796e-05, "loss": 0.0595, "step": 31073 }, { "epoch": 0.5503061536213789, "grad_norm": 0.38465264439582825, "learning_rate": 1.3288170696939213e-05, "loss": 0.0492, "step": 31074 }, { "epoch": 0.5503238631584074, "grad_norm": 0.5983412265777588, "learning_rate": 1.3287315956135372e-05, "loss": 0.0575, "step": 31075 }, { "epoch": 0.5503415726954358, "grad_norm": 0.9011463522911072, "learning_rate": 1.3286461220966089e-05, "loss": 0.0907, "step": 31076 }, { "epoch": 0.5503592822324642, "grad_norm": 0.6966652870178223, "learning_rate": 1.3285606491434174e-05, "loss": 0.1051, "step": 31077 }, { "epoch": 0.5503769917694926, "grad_norm": 0.5876762270927429, "learning_rate": 1.3284751767542447e-05, "loss": 0.0482, "step": 31078 }, { "epoch": 0.5503947013065211, "grad_norm": 0.6322061419487, "learning_rate": 1.3283897049293706e-05, "loss": 0.0722, "step": 31079 }, { "epoch": 0.5504124108435495, "grad_norm": 0.3958471715450287, "learning_rate": 1.3283042336690768e-05, "loss": 0.0449, "step": 31080 }, { "epoch": 0.5504301203805779, "grad_norm": 0.5588500499725342, "learning_rate": 1.3282187629736449e-05, "loss": 0.0739, "step": 31081 }, { "epoch": 0.5504478299176063, "grad_norm": 0.5507272481918335, "learning_rate": 1.3281332928433566e-05, "loss": 0.0771, "step": 31082 }, { "epoch": 0.5504655394546348, "grad_norm": 0.6791023015975952, "learning_rate": 1.3280478232784917e-05, "loss": 0.0759, "step": 31083 }, { "epoch": 0.5504832489916632, "grad_norm": 0.5272642970085144, "learning_rate": 1.3279623542793323e-05, "loss": 0.0495, "step": 31084 }, { "epoch": 0.5505009585286916, "grad_norm": 0.40115827322006226, "learning_rate": 1.3278768858461593e-05, "loss": 0.0627, "step": 31085 }, { "epoch": 0.5505186680657201, "grad_norm": 1.2716212272644043, "learning_rate": 1.327791417979254e-05, "loss": 0.0956, "step": 31086 }, { "epoch": 0.5505363776027485, "grad_norm": 0.5115470290184021, "learning_rate": 1.3277059506788974e-05, "loss": 0.0948, "step": 31087 }, { "epoch": 0.5505540871397769, "grad_norm": 0.398033082485199, "learning_rate": 1.3276204839453706e-05, "loss": 0.0867, "step": 31088 }, { "epoch": 0.5505717966768053, "grad_norm": 0.7053830623626709, "learning_rate": 1.3275350177789554e-05, "loss": 0.0681, "step": 31089 }, { "epoch": 0.5505895062138338, "grad_norm": 0.673656165599823, "learning_rate": 1.3274495521799324e-05, "loss": 0.0574, "step": 31090 }, { "epoch": 0.5506072157508622, "grad_norm": 0.6136214733123779, "learning_rate": 1.3273640871485827e-05, "loss": 0.0723, "step": 31091 }, { "epoch": 0.5506249252878906, "grad_norm": 0.9046054482460022, "learning_rate": 1.3272786226851884e-05, "loss": 0.0632, "step": 31092 }, { "epoch": 0.550642634824919, "grad_norm": 0.7722945809364319, "learning_rate": 1.3271931587900294e-05, "loss": 0.0826, "step": 31093 }, { "epoch": 0.5506603443619476, "grad_norm": 1.006223440170288, "learning_rate": 1.3271076954633874e-05, "loss": 0.159, "step": 31094 }, { "epoch": 0.550678053898976, "grad_norm": 0.8538707494735718, "learning_rate": 1.3270222327055432e-05, "loss": 0.081, "step": 31095 }, { "epoch": 0.5506957634360043, "grad_norm": 0.6639113426208496, "learning_rate": 1.3269367705167792e-05, "loss": 0.068, "step": 31096 }, { "epoch": 0.5507134729730327, "grad_norm": 0.5393576622009277, "learning_rate": 1.3268513088973753e-05, "loss": 0.0662, "step": 31097 }, { "epoch": 0.5507311825100613, "grad_norm": 0.5840782523155212, "learning_rate": 1.326765847847613e-05, "loss": 0.0802, "step": 31098 }, { "epoch": 0.5507488920470897, "grad_norm": 0.6236894726753235, "learning_rate": 1.3266803873677739e-05, "loss": 0.1109, "step": 31099 }, { "epoch": 0.550766601584118, "grad_norm": 0.5501778721809387, "learning_rate": 1.3265949274581385e-05, "loss": 0.0648, "step": 31100 }, { "epoch": 0.5507843111211466, "grad_norm": 0.6028819680213928, "learning_rate": 1.3265094681189881e-05, "loss": 0.0452, "step": 31101 }, { "epoch": 0.550802020658175, "grad_norm": 0.9168205857276917, "learning_rate": 1.326424009350604e-05, "loss": 0.085, "step": 31102 }, { "epoch": 0.5508197301952034, "grad_norm": 0.673587441444397, "learning_rate": 1.3263385511532678e-05, "loss": 0.0575, "step": 31103 }, { "epoch": 0.5508374397322318, "grad_norm": 0.7330197095870972, "learning_rate": 1.32625309352726e-05, "loss": 0.0837, "step": 31104 }, { "epoch": 0.5508551492692603, "grad_norm": 0.5417134761810303, "learning_rate": 1.3261676364728618e-05, "loss": 0.0635, "step": 31105 }, { "epoch": 0.5508728588062887, "grad_norm": 0.73000168800354, "learning_rate": 1.3260821799903552e-05, "loss": 0.0558, "step": 31106 }, { "epoch": 0.5508905683433171, "grad_norm": 0.7277453541755676, "learning_rate": 1.32599672408002e-05, "loss": 0.0605, "step": 31107 }, { "epoch": 0.5509082778803455, "grad_norm": 0.8607276678085327, "learning_rate": 1.3259112687421378e-05, "loss": 0.0996, "step": 31108 }, { "epoch": 0.550925987417374, "grad_norm": 0.8513545989990234, "learning_rate": 1.3258258139769898e-05, "loss": 0.0931, "step": 31109 }, { "epoch": 0.5509436969544024, "grad_norm": 0.6289948225021362, "learning_rate": 1.3257403597848581e-05, "loss": 0.0843, "step": 31110 }, { "epoch": 0.5509614064914308, "grad_norm": 0.8181406259536743, "learning_rate": 1.3256549061660224e-05, "loss": 0.0811, "step": 31111 }, { "epoch": 0.5509791160284593, "grad_norm": 0.8727540969848633, "learning_rate": 1.3255694531207644e-05, "loss": 0.0871, "step": 31112 }, { "epoch": 0.5509968255654877, "grad_norm": 0.4850088655948639, "learning_rate": 1.3254840006493654e-05, "loss": 0.0582, "step": 31113 }, { "epoch": 0.5510145351025161, "grad_norm": 0.6534557342529297, "learning_rate": 1.3253985487521064e-05, "loss": 0.0722, "step": 31114 }, { "epoch": 0.5510322446395445, "grad_norm": 0.8289754986763, "learning_rate": 1.3253130974292682e-05, "loss": 0.0561, "step": 31115 }, { "epoch": 0.551049954176573, "grad_norm": 0.46365872025489807, "learning_rate": 1.3252276466811326e-05, "loss": 0.0515, "step": 31116 }, { "epoch": 0.5510676637136014, "grad_norm": 0.6632309556007385, "learning_rate": 1.3251421965079805e-05, "loss": 0.0735, "step": 31117 }, { "epoch": 0.5510853732506298, "grad_norm": 0.7524870038032532, "learning_rate": 1.3250567469100925e-05, "loss": 0.0712, "step": 31118 }, { "epoch": 0.5511030827876582, "grad_norm": 0.4136832058429718, "learning_rate": 1.32497129788775e-05, "loss": 0.0757, "step": 31119 }, { "epoch": 0.5511207923246867, "grad_norm": 0.4844818413257599, "learning_rate": 1.3248858494412345e-05, "loss": 0.0641, "step": 31120 }, { "epoch": 0.5511385018617151, "grad_norm": 0.5632781982421875, "learning_rate": 1.3248004015708274e-05, "loss": 0.0655, "step": 31121 }, { "epoch": 0.5511562113987435, "grad_norm": 0.821134626865387, "learning_rate": 1.3247149542768085e-05, "loss": 0.0721, "step": 31122 }, { "epoch": 0.5511739209357719, "grad_norm": 0.7073140740394592, "learning_rate": 1.3246295075594599e-05, "loss": 0.0694, "step": 31123 }, { "epoch": 0.5511916304728004, "grad_norm": 0.7077056765556335, "learning_rate": 1.3245440614190625e-05, "loss": 0.0699, "step": 31124 }, { "epoch": 0.5512093400098288, "grad_norm": 0.8549020886421204, "learning_rate": 1.3244586158558972e-05, "loss": 0.0758, "step": 31125 }, { "epoch": 0.5512270495468572, "grad_norm": 0.28292137384414673, "learning_rate": 1.3243731708702456e-05, "loss": 0.0436, "step": 31126 }, { "epoch": 0.5512447590838857, "grad_norm": 0.5577297806739807, "learning_rate": 1.324287726462388e-05, "loss": 0.0625, "step": 31127 }, { "epoch": 0.5512624686209141, "grad_norm": 0.528451681137085, "learning_rate": 1.3242022826326066e-05, "loss": 0.0685, "step": 31128 }, { "epoch": 0.5512801781579425, "grad_norm": 0.6382191181182861, "learning_rate": 1.3241168393811816e-05, "loss": 0.0856, "step": 31129 }, { "epoch": 0.5512978876949709, "grad_norm": 0.7688215970993042, "learning_rate": 1.3240313967083945e-05, "loss": 0.1047, "step": 31130 }, { "epoch": 0.5513155972319994, "grad_norm": 0.6140669584274292, "learning_rate": 1.3239459546145267e-05, "loss": 0.0615, "step": 31131 }, { "epoch": 0.5513333067690278, "grad_norm": 0.8637690544128418, "learning_rate": 1.3238605130998581e-05, "loss": 0.0548, "step": 31132 }, { "epoch": 0.5513510163060562, "grad_norm": 0.6477504372596741, "learning_rate": 1.3237750721646708e-05, "loss": 0.0715, "step": 31133 }, { "epoch": 0.5513687258430846, "grad_norm": 0.8183934092521667, "learning_rate": 1.3236896318092459e-05, "loss": 0.0422, "step": 31134 }, { "epoch": 0.5513864353801131, "grad_norm": 0.7176089286804199, "learning_rate": 1.3236041920338647e-05, "loss": 0.0515, "step": 31135 }, { "epoch": 0.5514041449171415, "grad_norm": 0.6931936740875244, "learning_rate": 1.3235187528388073e-05, "loss": 0.0689, "step": 31136 }, { "epoch": 0.5514218544541699, "grad_norm": 0.349215030670166, "learning_rate": 1.3234333142243554e-05, "loss": 0.0741, "step": 31137 }, { "epoch": 0.5514395639911983, "grad_norm": 0.7155926823616028, "learning_rate": 1.3233478761907904e-05, "loss": 0.0487, "step": 31138 }, { "epoch": 0.5514572735282268, "grad_norm": 0.634337842464447, "learning_rate": 1.3232624387383925e-05, "loss": 0.0828, "step": 31139 }, { "epoch": 0.5514749830652552, "grad_norm": 0.6774921417236328, "learning_rate": 1.3231770018674434e-05, "loss": 0.0874, "step": 31140 }, { "epoch": 0.5514926926022836, "grad_norm": 0.8797332048416138, "learning_rate": 1.3230915655782242e-05, "loss": 0.1059, "step": 31141 }, { "epoch": 0.5515104021393121, "grad_norm": 1.0446581840515137, "learning_rate": 1.323006129871016e-05, "loss": 0.1032, "step": 31142 }, { "epoch": 0.5515281116763405, "grad_norm": 0.6567269563674927, "learning_rate": 1.3229206947460995e-05, "loss": 0.0577, "step": 31143 }, { "epoch": 0.5515458212133689, "grad_norm": 0.5434610843658447, "learning_rate": 1.322835260203756e-05, "loss": 0.045, "step": 31144 }, { "epoch": 0.5515635307503973, "grad_norm": 0.9343993663787842, "learning_rate": 1.322749826244267e-05, "loss": 0.1039, "step": 31145 }, { "epoch": 0.5515812402874258, "grad_norm": 0.6764140725135803, "learning_rate": 1.3226643928679126e-05, "loss": 0.0539, "step": 31146 }, { "epoch": 0.5515989498244542, "grad_norm": 0.765241801738739, "learning_rate": 1.3225789600749745e-05, "loss": 0.114, "step": 31147 }, { "epoch": 0.5516166593614826, "grad_norm": 0.42279383540153503, "learning_rate": 1.3224935278657338e-05, "loss": 0.0403, "step": 31148 }, { "epoch": 0.551634368898511, "grad_norm": 0.3075764775276184, "learning_rate": 1.3224080962404718e-05, "loss": 0.0457, "step": 31149 }, { "epoch": 0.5516520784355395, "grad_norm": 0.6854187846183777, "learning_rate": 1.3223226651994689e-05, "loss": 0.0642, "step": 31150 }, { "epoch": 0.5516697879725679, "grad_norm": 0.556976318359375, "learning_rate": 1.3222372347430061e-05, "loss": 0.0509, "step": 31151 }, { "epoch": 0.5516874975095963, "grad_norm": 1.7517907619476318, "learning_rate": 1.322151804871365e-05, "loss": 0.0739, "step": 31152 }, { "epoch": 0.5517052070466247, "grad_norm": 0.8867583274841309, "learning_rate": 1.3220663755848269e-05, "loss": 0.0909, "step": 31153 }, { "epoch": 0.5517229165836532, "grad_norm": 0.7762181758880615, "learning_rate": 1.321980946883672e-05, "loss": 0.0765, "step": 31154 }, { "epoch": 0.5517406261206816, "grad_norm": 0.5880069732666016, "learning_rate": 1.3218955187681817e-05, "loss": 0.0591, "step": 31155 }, { "epoch": 0.55175833565771, "grad_norm": 0.7974973917007446, "learning_rate": 1.3218100912386377e-05, "loss": 0.0556, "step": 31156 }, { "epoch": 0.5517760451947386, "grad_norm": 0.5255842208862305, "learning_rate": 1.32172466429532e-05, "loss": 0.0806, "step": 31157 }, { "epoch": 0.551793754731767, "grad_norm": 0.29281848669052124, "learning_rate": 1.32163923793851e-05, "loss": 0.0405, "step": 31158 }, { "epoch": 0.5518114642687953, "grad_norm": 0.9663721919059753, "learning_rate": 1.321553812168489e-05, "loss": 0.0978, "step": 31159 }, { "epoch": 0.5518291738058237, "grad_norm": 0.5617535710334778, "learning_rate": 1.3214683869855385e-05, "loss": 0.0875, "step": 31160 }, { "epoch": 0.5518468833428523, "grad_norm": 0.4709063172340393, "learning_rate": 1.3213829623899381e-05, "loss": 0.0794, "step": 31161 }, { "epoch": 0.5518645928798807, "grad_norm": 0.3068370223045349, "learning_rate": 1.32129753838197e-05, "loss": 0.0983, "step": 31162 }, { "epoch": 0.551882302416909, "grad_norm": 0.7147100567817688, "learning_rate": 1.3212121149619154e-05, "loss": 0.1174, "step": 31163 }, { "epoch": 0.5519000119539375, "grad_norm": 0.471513569355011, "learning_rate": 1.3211266921300544e-05, "loss": 0.0775, "step": 31164 }, { "epoch": 0.551917721490966, "grad_norm": 0.7350912094116211, "learning_rate": 1.3210412698866684e-05, "loss": 0.0885, "step": 31165 }, { "epoch": 0.5519354310279944, "grad_norm": 0.6441700458526611, "learning_rate": 1.3209558482320386e-05, "loss": 0.0499, "step": 31166 }, { "epoch": 0.5519531405650228, "grad_norm": 0.553387463092804, "learning_rate": 1.3208704271664461e-05, "loss": 0.0883, "step": 31167 }, { "epoch": 0.5519708501020512, "grad_norm": 0.6898511052131653, "learning_rate": 1.3207850066901716e-05, "loss": 0.0789, "step": 31168 }, { "epoch": 0.5519885596390797, "grad_norm": 0.6801318526268005, "learning_rate": 1.3206995868034964e-05, "loss": 0.0736, "step": 31169 }, { "epoch": 0.5520062691761081, "grad_norm": 0.5290738344192505, "learning_rate": 1.3206141675067015e-05, "loss": 0.0922, "step": 31170 }, { "epoch": 0.5520239787131365, "grad_norm": 0.7370188236236572, "learning_rate": 1.3205287488000675e-05, "loss": 0.068, "step": 31171 }, { "epoch": 0.552041688250165, "grad_norm": 0.5170000791549683, "learning_rate": 1.320443330683876e-05, "loss": 0.069, "step": 31172 }, { "epoch": 0.5520593977871934, "grad_norm": 1.129845142364502, "learning_rate": 1.3203579131584077e-05, "loss": 0.0526, "step": 31173 }, { "epoch": 0.5520771073242218, "grad_norm": 0.3772677183151245, "learning_rate": 1.3202724962239442e-05, "loss": 0.052, "step": 31174 }, { "epoch": 0.5520948168612502, "grad_norm": 0.6914286613464355, "learning_rate": 1.3201870798807653e-05, "loss": 0.0823, "step": 31175 }, { "epoch": 0.5521125263982787, "grad_norm": 0.590363621711731, "learning_rate": 1.3201016641291528e-05, "loss": 0.0474, "step": 31176 }, { "epoch": 0.5521302359353071, "grad_norm": 0.4783618152141571, "learning_rate": 1.3200162489693884e-05, "loss": 0.0647, "step": 31177 }, { "epoch": 0.5521479454723355, "grad_norm": 0.45784300565719604, "learning_rate": 1.3199308344017516e-05, "loss": 0.0635, "step": 31178 }, { "epoch": 0.5521656550093639, "grad_norm": 0.7365187406539917, "learning_rate": 1.3198454204265243e-05, "loss": 0.0871, "step": 31179 }, { "epoch": 0.5521833645463924, "grad_norm": 0.656225860118866, "learning_rate": 1.3197600070439872e-05, "loss": 0.0458, "step": 31180 }, { "epoch": 0.5522010740834208, "grad_norm": 0.45175519585609436, "learning_rate": 1.3196745942544218e-05, "loss": 0.0505, "step": 31181 }, { "epoch": 0.5522187836204492, "grad_norm": 0.6781395673751831, "learning_rate": 1.3195891820581085e-05, "loss": 0.0771, "step": 31182 }, { "epoch": 0.5522364931574776, "grad_norm": 0.3097221255302429, "learning_rate": 1.3195037704553284e-05, "loss": 0.0383, "step": 31183 }, { "epoch": 0.5522542026945061, "grad_norm": 0.5457318425178528, "learning_rate": 1.3194183594463628e-05, "loss": 0.0652, "step": 31184 }, { "epoch": 0.5522719122315345, "grad_norm": 0.6248899698257446, "learning_rate": 1.3193329490314929e-05, "loss": 0.071, "step": 31185 }, { "epoch": 0.5522896217685629, "grad_norm": 0.5216996073722839, "learning_rate": 1.3192475392109987e-05, "loss": 0.049, "step": 31186 }, { "epoch": 0.5523073313055914, "grad_norm": 0.45941081643104553, "learning_rate": 1.319162129985162e-05, "loss": 0.0377, "step": 31187 }, { "epoch": 0.5523250408426198, "grad_norm": 0.5920332670211792, "learning_rate": 1.3190767213542643e-05, "loss": 0.0536, "step": 31188 }, { "epoch": 0.5523427503796482, "grad_norm": 0.8245667219161987, "learning_rate": 1.318991313318585e-05, "loss": 0.0512, "step": 31189 }, { "epoch": 0.5523604599166766, "grad_norm": 0.5873145461082458, "learning_rate": 1.318905905878406e-05, "loss": 0.0887, "step": 31190 }, { "epoch": 0.5523781694537051, "grad_norm": 0.6059957146644592, "learning_rate": 1.3188204990340084e-05, "loss": 0.0956, "step": 31191 }, { "epoch": 0.5523958789907335, "grad_norm": 0.4618944823741913, "learning_rate": 1.3187350927856738e-05, "loss": 0.0529, "step": 31192 }, { "epoch": 0.5524135885277619, "grad_norm": 0.8075489401817322, "learning_rate": 1.3186496871336815e-05, "loss": 0.0791, "step": 31193 }, { "epoch": 0.5524312980647903, "grad_norm": 0.5937492847442627, "learning_rate": 1.3185642820783137e-05, "loss": 0.0788, "step": 31194 }, { "epoch": 0.5524490076018188, "grad_norm": 0.9834856390953064, "learning_rate": 1.3184788776198511e-05, "loss": 0.0994, "step": 31195 }, { "epoch": 0.5524667171388472, "grad_norm": 0.6639525890350342, "learning_rate": 1.3183934737585744e-05, "loss": 0.0698, "step": 31196 }, { "epoch": 0.5524844266758756, "grad_norm": 0.8200325965881348, "learning_rate": 1.3183080704947649e-05, "loss": 0.0658, "step": 31197 }, { "epoch": 0.552502136212904, "grad_norm": 1.0263919830322266, "learning_rate": 1.3182226678287035e-05, "loss": 0.1068, "step": 31198 }, { "epoch": 0.5525198457499325, "grad_norm": 0.9532861709594727, "learning_rate": 1.3181372657606713e-05, "loss": 0.1071, "step": 31199 }, { "epoch": 0.5525375552869609, "grad_norm": 0.5229176878929138, "learning_rate": 1.3180518642909489e-05, "loss": 0.0811, "step": 31200 }, { "epoch": 0.5525552648239893, "grad_norm": 0.606658399105072, "learning_rate": 1.3179664634198174e-05, "loss": 0.0551, "step": 31201 }, { "epoch": 0.5525729743610178, "grad_norm": 0.66231769323349, "learning_rate": 1.3178810631475585e-05, "loss": 0.0774, "step": 31202 }, { "epoch": 0.5525906838980462, "grad_norm": 0.45427536964416504, "learning_rate": 1.3177956634744518e-05, "loss": 0.0776, "step": 31203 }, { "epoch": 0.5526083934350746, "grad_norm": 0.5852136015892029, "learning_rate": 1.3177102644007786e-05, "loss": 0.0822, "step": 31204 }, { "epoch": 0.552626102972103, "grad_norm": 0.6904280781745911, "learning_rate": 1.3176248659268205e-05, "loss": 0.0804, "step": 31205 }, { "epoch": 0.5526438125091315, "grad_norm": 0.47178763151168823, "learning_rate": 1.3175394680528589e-05, "loss": 0.0373, "step": 31206 }, { "epoch": 0.5526615220461599, "grad_norm": 0.7057246565818787, "learning_rate": 1.3174540707791732e-05, "loss": 0.053, "step": 31207 }, { "epoch": 0.5526792315831883, "grad_norm": 0.43510299921035767, "learning_rate": 1.3173686741060453e-05, "loss": 0.079, "step": 31208 }, { "epoch": 0.5526969411202167, "grad_norm": 0.6073855757713318, "learning_rate": 1.3172832780337562e-05, "loss": 0.072, "step": 31209 }, { "epoch": 0.5527146506572452, "grad_norm": 0.6957898736000061, "learning_rate": 1.3171978825625865e-05, "loss": 0.0862, "step": 31210 }, { "epoch": 0.5527323601942736, "grad_norm": 0.6742576360702515, "learning_rate": 1.317112487692817e-05, "loss": 0.0527, "step": 31211 }, { "epoch": 0.552750069731302, "grad_norm": 0.5671781897544861, "learning_rate": 1.3170270934247291e-05, "loss": 0.0631, "step": 31212 }, { "epoch": 0.5527677792683304, "grad_norm": 0.48478883504867554, "learning_rate": 1.3169416997586039e-05, "loss": 0.0672, "step": 31213 }, { "epoch": 0.5527854888053589, "grad_norm": 0.41766560077667236, "learning_rate": 1.3168563066947216e-05, "loss": 0.0706, "step": 31214 }, { "epoch": 0.5528031983423873, "grad_norm": 0.4788459837436676, "learning_rate": 1.3167709142333633e-05, "loss": 0.0698, "step": 31215 }, { "epoch": 0.5528209078794157, "grad_norm": 0.6942253112792969, "learning_rate": 1.3166855223748111e-05, "loss": 0.0661, "step": 31216 }, { "epoch": 0.5528386174164442, "grad_norm": 0.6944501399993896, "learning_rate": 1.3166001311193444e-05, "loss": 0.0825, "step": 31217 }, { "epoch": 0.5528563269534726, "grad_norm": 0.9391353130340576, "learning_rate": 1.3165147404672446e-05, "loss": 0.1286, "step": 31218 }, { "epoch": 0.552874036490501, "grad_norm": 0.7594025731086731, "learning_rate": 1.3164293504187923e-05, "loss": 0.0709, "step": 31219 }, { "epoch": 0.5528917460275294, "grad_norm": 0.576255738735199, "learning_rate": 1.31634396097427e-05, "loss": 0.0661, "step": 31220 }, { "epoch": 0.552909455564558, "grad_norm": 1.2449347972869873, "learning_rate": 1.3162585721339567e-05, "loss": 0.0607, "step": 31221 }, { "epoch": 0.5529271651015863, "grad_norm": 0.6658644676208496, "learning_rate": 1.3161731838981345e-05, "loss": 0.0553, "step": 31222 }, { "epoch": 0.5529448746386147, "grad_norm": 0.6869906783103943, "learning_rate": 1.3160877962670834e-05, "loss": 0.0581, "step": 31223 }, { "epoch": 0.5529625841756431, "grad_norm": 0.5295876264572144, "learning_rate": 1.3160024092410854e-05, "loss": 0.0653, "step": 31224 }, { "epoch": 0.5529802937126717, "grad_norm": 0.7324197888374329, "learning_rate": 1.3159170228204205e-05, "loss": 0.0995, "step": 31225 }, { "epoch": 0.5529980032497, "grad_norm": 0.6007175445556641, "learning_rate": 1.3158316370053702e-05, "loss": 0.0511, "step": 31226 }, { "epoch": 0.5530157127867285, "grad_norm": 0.5781726241111755, "learning_rate": 1.3157462517962153e-05, "loss": 0.0964, "step": 31227 }, { "epoch": 0.5530334223237569, "grad_norm": 0.44606372714042664, "learning_rate": 1.3156608671932362e-05, "loss": 0.0559, "step": 31228 }, { "epoch": 0.5530511318607854, "grad_norm": 0.586211085319519, "learning_rate": 1.3155754831967144e-05, "loss": 0.0611, "step": 31229 }, { "epoch": 0.5530688413978138, "grad_norm": 0.6652708649635315, "learning_rate": 1.3154900998069306e-05, "loss": 0.0766, "step": 31230 }, { "epoch": 0.5530865509348422, "grad_norm": 1.0534095764160156, "learning_rate": 1.3154047170241664e-05, "loss": 0.0604, "step": 31231 }, { "epoch": 0.5531042604718707, "grad_norm": 0.699763834476471, "learning_rate": 1.3153193348487012e-05, "loss": 0.0872, "step": 31232 }, { "epoch": 0.5531219700088991, "grad_norm": 0.5639578104019165, "learning_rate": 1.3152339532808164e-05, "loss": 0.0519, "step": 31233 }, { "epoch": 0.5531396795459275, "grad_norm": 0.6901869177818298, "learning_rate": 1.3151485723207942e-05, "loss": 0.0801, "step": 31234 }, { "epoch": 0.5531573890829559, "grad_norm": 0.6396759152412415, "learning_rate": 1.3150631919689139e-05, "loss": 0.0813, "step": 31235 }, { "epoch": 0.5531750986199844, "grad_norm": 0.5960878133773804, "learning_rate": 1.314977812225457e-05, "loss": 0.0766, "step": 31236 }, { "epoch": 0.5531928081570128, "grad_norm": 0.4934993088245392, "learning_rate": 1.3148924330907042e-05, "loss": 0.0549, "step": 31237 }, { "epoch": 0.5532105176940412, "grad_norm": 0.671657145023346, "learning_rate": 1.3148070545649372e-05, "loss": 0.0503, "step": 31238 }, { "epoch": 0.5532282272310696, "grad_norm": 0.7997903823852539, "learning_rate": 1.3147216766484356e-05, "loss": 0.1043, "step": 31239 }, { "epoch": 0.5532459367680981, "grad_norm": 0.8330159783363342, "learning_rate": 1.314636299341481e-05, "loss": 0.0816, "step": 31240 }, { "epoch": 0.5532636463051265, "grad_norm": 0.6212786436080933, "learning_rate": 1.314550922644355e-05, "loss": 0.0726, "step": 31241 }, { "epoch": 0.5532813558421549, "grad_norm": 0.33210813999176025, "learning_rate": 1.314465546557337e-05, "loss": 0.0661, "step": 31242 }, { "epoch": 0.5532990653791833, "grad_norm": 0.8636290431022644, "learning_rate": 1.3143801710807087e-05, "loss": 0.1117, "step": 31243 }, { "epoch": 0.5533167749162118, "grad_norm": 0.8767995238304138, "learning_rate": 1.3142947962147506e-05, "loss": 0.1054, "step": 31244 }, { "epoch": 0.5533344844532402, "grad_norm": 0.6325126886367798, "learning_rate": 1.3142094219597448e-05, "loss": 0.0916, "step": 31245 }, { "epoch": 0.5533521939902686, "grad_norm": 0.6754811406135559, "learning_rate": 1.3141240483159704e-05, "loss": 0.0626, "step": 31246 }, { "epoch": 0.5533699035272971, "grad_norm": 0.2936815321445465, "learning_rate": 1.3140386752837092e-05, "loss": 0.0865, "step": 31247 }, { "epoch": 0.5533876130643255, "grad_norm": 0.6752887964248657, "learning_rate": 1.3139533028632421e-05, "loss": 0.0758, "step": 31248 }, { "epoch": 0.5534053226013539, "grad_norm": 0.7707300782203674, "learning_rate": 1.3138679310548497e-05, "loss": 0.0619, "step": 31249 }, { "epoch": 0.5534230321383823, "grad_norm": 0.8027032613754272, "learning_rate": 1.3137825598588128e-05, "loss": 0.0797, "step": 31250 }, { "epoch": 0.5534407416754108, "grad_norm": 0.7633060216903687, "learning_rate": 1.3136971892754124e-05, "loss": 0.1041, "step": 31251 }, { "epoch": 0.5534584512124392, "grad_norm": 0.6920800805091858, "learning_rate": 1.31361181930493e-05, "loss": 0.068, "step": 31252 }, { "epoch": 0.5534761607494676, "grad_norm": 0.776317298412323, "learning_rate": 1.3135264499476453e-05, "loss": 0.0703, "step": 31253 }, { "epoch": 0.553493870286496, "grad_norm": 0.5658820271492004, "learning_rate": 1.3134410812038398e-05, "loss": 0.0776, "step": 31254 }, { "epoch": 0.5535115798235245, "grad_norm": 0.3516611158847809, "learning_rate": 1.3133557130737943e-05, "loss": 0.0692, "step": 31255 }, { "epoch": 0.5535292893605529, "grad_norm": 0.7384401559829712, "learning_rate": 1.31327034555779e-05, "loss": 0.0932, "step": 31256 }, { "epoch": 0.5535469988975813, "grad_norm": 0.6451904773712158, "learning_rate": 1.3131849786561069e-05, "loss": 0.0633, "step": 31257 }, { "epoch": 0.5535647084346097, "grad_norm": 0.5609357953071594, "learning_rate": 1.3130996123690266e-05, "loss": 0.064, "step": 31258 }, { "epoch": 0.5535824179716382, "grad_norm": 0.47654715180397034, "learning_rate": 1.31301424669683e-05, "loss": 0.0645, "step": 31259 }, { "epoch": 0.5536001275086666, "grad_norm": 0.4495658278465271, "learning_rate": 1.3129288816397972e-05, "loss": 0.0449, "step": 31260 }, { "epoch": 0.553617837045695, "grad_norm": 0.4177982211112976, "learning_rate": 1.3128435171982093e-05, "loss": 0.0625, "step": 31261 }, { "epoch": 0.5536355465827235, "grad_norm": 0.8699103593826294, "learning_rate": 1.3127581533723469e-05, "loss": 0.0828, "step": 31262 }, { "epoch": 0.5536532561197519, "grad_norm": 0.9551699161529541, "learning_rate": 1.3126727901624923e-05, "loss": 0.0835, "step": 31263 }, { "epoch": 0.5536709656567803, "grad_norm": 0.5505695939064026, "learning_rate": 1.3125874275689248e-05, "loss": 0.0545, "step": 31264 }, { "epoch": 0.5536886751938087, "grad_norm": 0.6202438473701477, "learning_rate": 1.3125020655919255e-05, "loss": 0.0761, "step": 31265 }, { "epoch": 0.5537063847308372, "grad_norm": 0.32185009121894836, "learning_rate": 1.3124167042317758e-05, "loss": 0.0493, "step": 31266 }, { "epoch": 0.5537240942678656, "grad_norm": 0.5593823790550232, "learning_rate": 1.3123313434887558e-05, "loss": 0.0325, "step": 31267 }, { "epoch": 0.553741803804894, "grad_norm": 0.7452221512794495, "learning_rate": 1.312245983363147e-05, "loss": 0.088, "step": 31268 }, { "epoch": 0.5537595133419224, "grad_norm": 0.6007430553436279, "learning_rate": 1.3121606238552296e-05, "loss": 0.0672, "step": 31269 }, { "epoch": 0.5537772228789509, "grad_norm": 0.44289088249206543, "learning_rate": 1.3120752649652854e-05, "loss": 0.0519, "step": 31270 }, { "epoch": 0.5537949324159793, "grad_norm": 0.6857889294624329, "learning_rate": 1.3119899066935937e-05, "loss": 0.0923, "step": 31271 }, { "epoch": 0.5538126419530077, "grad_norm": 0.6889633536338806, "learning_rate": 1.3119045490404366e-05, "loss": 0.0549, "step": 31272 }, { "epoch": 0.5538303514900361, "grad_norm": 0.5280179381370544, "learning_rate": 1.3118191920060951e-05, "loss": 0.0753, "step": 31273 }, { "epoch": 0.5538480610270646, "grad_norm": 0.5336878895759583, "learning_rate": 1.3117338355908485e-05, "loss": 0.0597, "step": 31274 }, { "epoch": 0.553865770564093, "grad_norm": 0.49485230445861816, "learning_rate": 1.3116484797949789e-05, "loss": 0.0663, "step": 31275 }, { "epoch": 0.5538834801011214, "grad_norm": 0.8778740763664246, "learning_rate": 1.3115631246187666e-05, "loss": 0.0661, "step": 31276 }, { "epoch": 0.5539011896381499, "grad_norm": 0.7731620073318481, "learning_rate": 1.3114777700624931e-05, "loss": 0.0987, "step": 31277 }, { "epoch": 0.5539188991751783, "grad_norm": 0.5861889719963074, "learning_rate": 1.311392416126438e-05, "loss": 0.0661, "step": 31278 }, { "epoch": 0.5539366087122067, "grad_norm": 0.8831548690795898, "learning_rate": 1.311307062810883e-05, "loss": 0.0656, "step": 31279 }, { "epoch": 0.5539543182492351, "grad_norm": 0.6891712546348572, "learning_rate": 1.311221710116109e-05, "loss": 0.068, "step": 31280 }, { "epoch": 0.5539720277862636, "grad_norm": 0.7390686273574829, "learning_rate": 1.311136358042396e-05, "loss": 0.075, "step": 31281 }, { "epoch": 0.553989737323292, "grad_norm": 0.733110785484314, "learning_rate": 1.3110510065900254e-05, "loss": 0.0594, "step": 31282 }, { "epoch": 0.5540074468603204, "grad_norm": 0.5171472430229187, "learning_rate": 1.3109656557592779e-05, "loss": 0.0502, "step": 31283 }, { "epoch": 0.5540251563973488, "grad_norm": 0.3714386224746704, "learning_rate": 1.3108803055504348e-05, "loss": 0.0683, "step": 31284 }, { "epoch": 0.5540428659343773, "grad_norm": 0.6802538633346558, "learning_rate": 1.3107949559637756e-05, "loss": 0.1147, "step": 31285 }, { "epoch": 0.5540605754714057, "grad_norm": 0.8703446984291077, "learning_rate": 1.3107096069995819e-05, "loss": 0.0803, "step": 31286 }, { "epoch": 0.5540782850084341, "grad_norm": 0.6077936887741089, "learning_rate": 1.3106242586581348e-05, "loss": 0.0561, "step": 31287 }, { "epoch": 0.5540959945454625, "grad_norm": 0.7447422742843628, "learning_rate": 1.3105389109397151e-05, "loss": 0.0822, "step": 31288 }, { "epoch": 0.554113704082491, "grad_norm": 1.063148021697998, "learning_rate": 1.3104535638446028e-05, "loss": 0.0715, "step": 31289 }, { "epoch": 0.5541314136195195, "grad_norm": 0.7624967694282532, "learning_rate": 1.3103682173730786e-05, "loss": 0.0723, "step": 31290 }, { "epoch": 0.5541491231565479, "grad_norm": 0.45360517501831055, "learning_rate": 1.3102828715254246e-05, "loss": 0.0656, "step": 31291 }, { "epoch": 0.5541668326935764, "grad_norm": 0.3350945711135864, "learning_rate": 1.3101975263019203e-05, "loss": 0.0686, "step": 31292 }, { "epoch": 0.5541845422306048, "grad_norm": 0.42834562063217163, "learning_rate": 1.3101121817028468e-05, "loss": 0.0439, "step": 31293 }, { "epoch": 0.5542022517676332, "grad_norm": 0.8498958945274353, "learning_rate": 1.3100268377284854e-05, "loss": 0.0836, "step": 31294 }, { "epoch": 0.5542199613046616, "grad_norm": 0.5957204103469849, "learning_rate": 1.3099414943791166e-05, "loss": 0.0427, "step": 31295 }, { "epoch": 0.5542376708416901, "grad_norm": 0.5809309482574463, "learning_rate": 1.3098561516550207e-05, "loss": 0.0536, "step": 31296 }, { "epoch": 0.5542553803787185, "grad_norm": 0.426796555519104, "learning_rate": 1.3097708095564787e-05, "loss": 0.0486, "step": 31297 }, { "epoch": 0.5542730899157469, "grad_norm": 0.378339946269989, "learning_rate": 1.3096854680837723e-05, "loss": 0.0516, "step": 31298 }, { "epoch": 0.5542907994527753, "grad_norm": 0.4634789824485779, "learning_rate": 1.3096001272371809e-05, "loss": 0.0593, "step": 31299 }, { "epoch": 0.5543085089898038, "grad_norm": 0.8667120337486267, "learning_rate": 1.3095147870169852e-05, "loss": 0.0712, "step": 31300 }, { "epoch": 0.5543262185268322, "grad_norm": 1.0249958038330078, "learning_rate": 1.3094294474234672e-05, "loss": 0.0765, "step": 31301 }, { "epoch": 0.5543439280638606, "grad_norm": 0.6539323329925537, "learning_rate": 1.3093441084569077e-05, "loss": 0.0649, "step": 31302 }, { "epoch": 0.554361637600889, "grad_norm": 0.7248240113258362, "learning_rate": 1.3092587701175861e-05, "loss": 0.0992, "step": 31303 }, { "epoch": 0.5543793471379175, "grad_norm": 0.49132996797561646, "learning_rate": 1.3091734324057836e-05, "loss": 0.0461, "step": 31304 }, { "epoch": 0.5543970566749459, "grad_norm": 0.5665618181228638, "learning_rate": 1.3090880953217818e-05, "loss": 0.0652, "step": 31305 }, { "epoch": 0.5544147662119743, "grad_norm": 0.5721004605293274, "learning_rate": 1.3090027588658605e-05, "loss": 0.072, "step": 31306 }, { "epoch": 0.5544324757490028, "grad_norm": 0.9435254335403442, "learning_rate": 1.3089174230383006e-05, "loss": 0.0696, "step": 31307 }, { "epoch": 0.5544501852860312, "grad_norm": 1.0767642259597778, "learning_rate": 1.3088320878393833e-05, "loss": 0.0885, "step": 31308 }, { "epoch": 0.5544678948230596, "grad_norm": 0.5696005821228027, "learning_rate": 1.3087467532693894e-05, "loss": 0.0602, "step": 31309 }, { "epoch": 0.554485604360088, "grad_norm": 0.8766822814941406, "learning_rate": 1.308661419328599e-05, "loss": 0.0673, "step": 31310 }, { "epoch": 0.5545033138971165, "grad_norm": 0.6625574827194214, "learning_rate": 1.308576086017293e-05, "loss": 0.056, "step": 31311 }, { "epoch": 0.5545210234341449, "grad_norm": 0.6685371398925781, "learning_rate": 1.3084907533357533e-05, "loss": 0.0593, "step": 31312 }, { "epoch": 0.5545387329711733, "grad_norm": 0.6860643029212952, "learning_rate": 1.3084054212842586e-05, "loss": 0.0678, "step": 31313 }, { "epoch": 0.5545564425082017, "grad_norm": 0.6140735149383545, "learning_rate": 1.3083200898630907e-05, "loss": 0.0705, "step": 31314 }, { "epoch": 0.5545741520452302, "grad_norm": 0.4357835650444031, "learning_rate": 1.3082347590725304e-05, "loss": 0.0728, "step": 31315 }, { "epoch": 0.5545918615822586, "grad_norm": 0.786827027797699, "learning_rate": 1.3081494289128591e-05, "loss": 0.0982, "step": 31316 }, { "epoch": 0.554609571119287, "grad_norm": 0.7446742653846741, "learning_rate": 1.3080640993843561e-05, "loss": 0.0997, "step": 31317 }, { "epoch": 0.5546272806563154, "grad_norm": 0.43881627917289734, "learning_rate": 1.3079787704873028e-05, "loss": 0.0608, "step": 31318 }, { "epoch": 0.5546449901933439, "grad_norm": 0.37010350823402405, "learning_rate": 1.3078934422219804e-05, "loss": 0.0449, "step": 31319 }, { "epoch": 0.5546626997303723, "grad_norm": 0.5238775014877319, "learning_rate": 1.3078081145886688e-05, "loss": 0.0471, "step": 31320 }, { "epoch": 0.5546804092674007, "grad_norm": 0.8135373592376709, "learning_rate": 1.3077227875876487e-05, "loss": 0.079, "step": 31321 }, { "epoch": 0.5546981188044292, "grad_norm": 0.39011064171791077, "learning_rate": 1.3076374612192015e-05, "loss": 0.0479, "step": 31322 }, { "epoch": 0.5547158283414576, "grad_norm": 0.5337151885032654, "learning_rate": 1.3075521354836079e-05, "loss": 0.0611, "step": 31323 }, { "epoch": 0.554733537878486, "grad_norm": 0.6611891984939575, "learning_rate": 1.307466810381148e-05, "loss": 0.077, "step": 31324 }, { "epoch": 0.5547512474155144, "grad_norm": 0.5495405197143555, "learning_rate": 1.3073814859121027e-05, "loss": 0.0526, "step": 31325 }, { "epoch": 0.5547689569525429, "grad_norm": 1.221725344657898, "learning_rate": 1.307296162076753e-05, "loss": 0.0808, "step": 31326 }, { "epoch": 0.5547866664895713, "grad_norm": 0.764380156993866, "learning_rate": 1.3072108388753798e-05, "loss": 0.0638, "step": 31327 }, { "epoch": 0.5548043760265997, "grad_norm": 0.7096787095069885, "learning_rate": 1.3071255163082631e-05, "loss": 0.0592, "step": 31328 }, { "epoch": 0.5548220855636281, "grad_norm": 0.7752687931060791, "learning_rate": 1.3070401943756834e-05, "loss": 0.0801, "step": 31329 }, { "epoch": 0.5548397951006566, "grad_norm": 0.764244019985199, "learning_rate": 1.306954873077923e-05, "loss": 0.0584, "step": 31330 }, { "epoch": 0.554857504637685, "grad_norm": 0.6045950651168823, "learning_rate": 1.306869552415261e-05, "loss": 0.0649, "step": 31331 }, { "epoch": 0.5548752141747134, "grad_norm": 1.0034316778182983, "learning_rate": 1.3067842323879785e-05, "loss": 0.0998, "step": 31332 }, { "epoch": 0.5548929237117418, "grad_norm": 0.5697485208511353, "learning_rate": 1.3066989129963562e-05, "loss": 0.0776, "step": 31333 }, { "epoch": 0.5549106332487703, "grad_norm": 0.7980000376701355, "learning_rate": 1.3066135942406756e-05, "loss": 0.0842, "step": 31334 }, { "epoch": 0.5549283427857987, "grad_norm": 0.6097344160079956, "learning_rate": 1.3065282761212161e-05, "loss": 0.0417, "step": 31335 }, { "epoch": 0.5549460523228271, "grad_norm": 0.7434884309768677, "learning_rate": 1.3064429586382592e-05, "loss": 0.0878, "step": 31336 }, { "epoch": 0.5549637618598556, "grad_norm": 0.5426512360572815, "learning_rate": 1.3063576417920854e-05, "loss": 0.0525, "step": 31337 }, { "epoch": 0.554981471396884, "grad_norm": 0.5606587529182434, "learning_rate": 1.3062723255829753e-05, "loss": 0.0791, "step": 31338 }, { "epoch": 0.5549991809339124, "grad_norm": 0.9909968972206116, "learning_rate": 1.3061870100112097e-05, "loss": 0.0876, "step": 31339 }, { "epoch": 0.5550168904709408, "grad_norm": 0.9043985605239868, "learning_rate": 1.3061016950770693e-05, "loss": 0.0809, "step": 31340 }, { "epoch": 0.5550346000079693, "grad_norm": 0.557255208492279, "learning_rate": 1.3060163807808352e-05, "loss": 0.0534, "step": 31341 }, { "epoch": 0.5550523095449977, "grad_norm": 0.4077340066432953, "learning_rate": 1.3059310671227869e-05, "loss": 0.0668, "step": 31342 }, { "epoch": 0.5550700190820261, "grad_norm": 0.2688490152359009, "learning_rate": 1.3058457541032056e-05, "loss": 0.0452, "step": 31343 }, { "epoch": 0.5550877286190545, "grad_norm": 0.617037296295166, "learning_rate": 1.3057604417223728e-05, "loss": 0.0497, "step": 31344 }, { "epoch": 0.555105438156083, "grad_norm": 0.5422094464302063, "learning_rate": 1.3056751299805683e-05, "loss": 0.0491, "step": 31345 }, { "epoch": 0.5551231476931114, "grad_norm": 0.7394779324531555, "learning_rate": 1.3055898188780726e-05, "loss": 0.0953, "step": 31346 }, { "epoch": 0.5551408572301398, "grad_norm": 0.5500145554542542, "learning_rate": 1.3055045084151669e-05, "loss": 0.0576, "step": 31347 }, { "epoch": 0.5551585667671682, "grad_norm": 0.82448410987854, "learning_rate": 1.3054191985921319e-05, "loss": 0.0699, "step": 31348 }, { "epoch": 0.5551762763041967, "grad_norm": 0.6870736479759216, "learning_rate": 1.3053338894092478e-05, "loss": 0.066, "step": 31349 }, { "epoch": 0.5551939858412251, "grad_norm": 0.604207456111908, "learning_rate": 1.3052485808667955e-05, "loss": 0.0549, "step": 31350 }, { "epoch": 0.5552116953782535, "grad_norm": 0.36984288692474365, "learning_rate": 1.305163272965056e-05, "loss": 0.0533, "step": 31351 }, { "epoch": 0.555229404915282, "grad_norm": 0.8703764081001282, "learning_rate": 1.3050779657043094e-05, "loss": 0.0607, "step": 31352 }, { "epoch": 0.5552471144523105, "grad_norm": 0.6321744322776794, "learning_rate": 1.3049926590848364e-05, "loss": 0.0673, "step": 31353 }, { "epoch": 0.5552648239893389, "grad_norm": 0.861217200756073, "learning_rate": 1.304907353106918e-05, "loss": 0.0705, "step": 31354 }, { "epoch": 0.5552825335263673, "grad_norm": 0.756621241569519, "learning_rate": 1.3048220477708352e-05, "loss": 0.1037, "step": 31355 }, { "epoch": 0.5553002430633958, "grad_norm": 0.66111159324646, "learning_rate": 1.3047367430768676e-05, "loss": 0.0653, "step": 31356 }, { "epoch": 0.5553179526004242, "grad_norm": 0.4641329348087311, "learning_rate": 1.304651439025296e-05, "loss": 0.0639, "step": 31357 }, { "epoch": 0.5553356621374526, "grad_norm": 0.7070780992507935, "learning_rate": 1.3045661356164016e-05, "loss": 0.072, "step": 31358 }, { "epoch": 0.555353371674481, "grad_norm": 0.5279121994972229, "learning_rate": 1.3044808328504655e-05, "loss": 0.0578, "step": 31359 }, { "epoch": 0.5553710812115095, "grad_norm": 0.6665891408920288, "learning_rate": 1.3043955307277672e-05, "loss": 0.0604, "step": 31360 }, { "epoch": 0.5553887907485379, "grad_norm": 0.8218125700950623, "learning_rate": 1.3043102292485878e-05, "loss": 0.0615, "step": 31361 }, { "epoch": 0.5554065002855663, "grad_norm": 0.8988685011863708, "learning_rate": 1.3042249284132082e-05, "loss": 0.0958, "step": 31362 }, { "epoch": 0.5554242098225947, "grad_norm": 0.5580261945724487, "learning_rate": 1.3041396282219083e-05, "loss": 0.0755, "step": 31363 }, { "epoch": 0.5554419193596232, "grad_norm": 0.7760521769523621, "learning_rate": 1.3040543286749697e-05, "loss": 0.0888, "step": 31364 }, { "epoch": 0.5554596288966516, "grad_norm": 0.5356101393699646, "learning_rate": 1.3039690297726722e-05, "loss": 0.0569, "step": 31365 }, { "epoch": 0.55547733843368, "grad_norm": 0.581475019454956, "learning_rate": 1.303883731515297e-05, "loss": 0.0585, "step": 31366 }, { "epoch": 0.5554950479707085, "grad_norm": 0.5929082632064819, "learning_rate": 1.3037984339031243e-05, "loss": 0.0699, "step": 31367 }, { "epoch": 0.5555127575077369, "grad_norm": 0.4797764718532562, "learning_rate": 1.3037131369364349e-05, "loss": 0.071, "step": 31368 }, { "epoch": 0.5555304670447653, "grad_norm": 0.6089693307876587, "learning_rate": 1.30362784061551e-05, "loss": 0.0724, "step": 31369 }, { "epoch": 0.5555481765817937, "grad_norm": 0.6334903836250305, "learning_rate": 1.3035425449406292e-05, "loss": 0.0479, "step": 31370 }, { "epoch": 0.5555658861188222, "grad_norm": 0.6102054119110107, "learning_rate": 1.3034572499120734e-05, "loss": 0.0794, "step": 31371 }, { "epoch": 0.5555835956558506, "grad_norm": 0.47887906432151794, "learning_rate": 1.303371955530123e-05, "loss": 0.0586, "step": 31372 }, { "epoch": 0.555601305192879, "grad_norm": 0.8942975997924805, "learning_rate": 1.30328666179506e-05, "loss": 0.0688, "step": 31373 }, { "epoch": 0.5556190147299074, "grad_norm": 0.7401956915855408, "learning_rate": 1.3032013687071633e-05, "loss": 0.0912, "step": 31374 }, { "epoch": 0.5556367242669359, "grad_norm": 0.517117977142334, "learning_rate": 1.3031160762667143e-05, "loss": 0.0831, "step": 31375 }, { "epoch": 0.5556544338039643, "grad_norm": 0.6874175667762756, "learning_rate": 1.3030307844739936e-05, "loss": 0.0607, "step": 31376 }, { "epoch": 0.5556721433409927, "grad_norm": 0.941568911075592, "learning_rate": 1.3029454933292816e-05, "loss": 0.0697, "step": 31377 }, { "epoch": 0.5556898528780211, "grad_norm": 0.6716726422309875, "learning_rate": 1.302860202832859e-05, "loss": 0.0602, "step": 31378 }, { "epoch": 0.5557075624150496, "grad_norm": 0.581423282623291, "learning_rate": 1.3027749129850064e-05, "loss": 0.0759, "step": 31379 }, { "epoch": 0.555725271952078, "grad_norm": 0.620476245880127, "learning_rate": 1.3026896237860046e-05, "loss": 0.06, "step": 31380 }, { "epoch": 0.5557429814891064, "grad_norm": 0.9806293845176697, "learning_rate": 1.3026043352361337e-05, "loss": 0.0917, "step": 31381 }, { "epoch": 0.5557606910261349, "grad_norm": 0.8566989302635193, "learning_rate": 1.3025190473356746e-05, "loss": 0.0669, "step": 31382 }, { "epoch": 0.5557784005631633, "grad_norm": 0.7529498934745789, "learning_rate": 1.3024337600849084e-05, "loss": 0.1058, "step": 31383 }, { "epoch": 0.5557961101001917, "grad_norm": 0.6224807500839233, "learning_rate": 1.3023484734841146e-05, "loss": 0.0686, "step": 31384 }, { "epoch": 0.5558138196372201, "grad_norm": 0.532093346118927, "learning_rate": 1.3022631875335743e-05, "loss": 0.0697, "step": 31385 }, { "epoch": 0.5558315291742486, "grad_norm": 0.41889306902885437, "learning_rate": 1.3021779022335682e-05, "loss": 0.0482, "step": 31386 }, { "epoch": 0.555849238711277, "grad_norm": 0.7295282483100891, "learning_rate": 1.3020926175843771e-05, "loss": 0.0774, "step": 31387 }, { "epoch": 0.5558669482483054, "grad_norm": 0.6907130479812622, "learning_rate": 1.3020073335862808e-05, "loss": 0.0705, "step": 31388 }, { "epoch": 0.5558846577853338, "grad_norm": 0.6079961061477661, "learning_rate": 1.3019220502395602e-05, "loss": 0.0712, "step": 31389 }, { "epoch": 0.5559023673223623, "grad_norm": 0.7680983543395996, "learning_rate": 1.3018367675444967e-05, "loss": 0.0487, "step": 31390 }, { "epoch": 0.5559200768593907, "grad_norm": 0.6513264179229736, "learning_rate": 1.3017514855013699e-05, "loss": 0.0983, "step": 31391 }, { "epoch": 0.5559377863964191, "grad_norm": 0.7002441883087158, "learning_rate": 1.3016662041104603e-05, "loss": 0.0339, "step": 31392 }, { "epoch": 0.5559554959334475, "grad_norm": 0.5448518395423889, "learning_rate": 1.3015809233720491e-05, "loss": 0.0492, "step": 31393 }, { "epoch": 0.555973205470476, "grad_norm": 0.5066615343093872, "learning_rate": 1.3014956432864171e-05, "loss": 0.0744, "step": 31394 }, { "epoch": 0.5559909150075044, "grad_norm": 0.7225425839424133, "learning_rate": 1.3014103638538435e-05, "loss": 0.0687, "step": 31395 }, { "epoch": 0.5560086245445328, "grad_norm": 0.4611146152019501, "learning_rate": 1.30132508507461e-05, "loss": 0.0746, "step": 31396 }, { "epoch": 0.5560263340815613, "grad_norm": 0.991266131401062, "learning_rate": 1.3012398069489969e-05, "loss": 0.083, "step": 31397 }, { "epoch": 0.5560440436185897, "grad_norm": 0.5921623706817627, "learning_rate": 1.3011545294772853e-05, "loss": 0.0593, "step": 31398 }, { "epoch": 0.5560617531556181, "grad_norm": 0.6947253346443176, "learning_rate": 1.3010692526597546e-05, "loss": 0.054, "step": 31399 }, { "epoch": 0.5560794626926465, "grad_norm": 0.6990795731544495, "learning_rate": 1.300983976496686e-05, "loss": 0.0715, "step": 31400 }, { "epoch": 0.556097172229675, "grad_norm": 0.48155108094215393, "learning_rate": 1.3008987009883602e-05, "loss": 0.0757, "step": 31401 }, { "epoch": 0.5561148817667034, "grad_norm": 0.7787246108055115, "learning_rate": 1.3008134261350572e-05, "loss": 0.0646, "step": 31402 }, { "epoch": 0.5561325913037318, "grad_norm": 0.6948229074478149, "learning_rate": 1.3007281519370579e-05, "loss": 0.0705, "step": 31403 }, { "epoch": 0.5561503008407602, "grad_norm": 0.4893482029438019, "learning_rate": 1.300642878394643e-05, "loss": 0.0459, "step": 31404 }, { "epoch": 0.5561680103777887, "grad_norm": 0.7192875742912292, "learning_rate": 1.300557605508093e-05, "loss": 0.0729, "step": 31405 }, { "epoch": 0.5561857199148171, "grad_norm": 0.5523310899734497, "learning_rate": 1.3004723332776879e-05, "loss": 0.0569, "step": 31406 }, { "epoch": 0.5562034294518455, "grad_norm": 0.4450775384902954, "learning_rate": 1.3003870617037089e-05, "loss": 0.0541, "step": 31407 }, { "epoch": 0.5562211389888739, "grad_norm": 0.8066673278808594, "learning_rate": 1.3003017907864367e-05, "loss": 0.0812, "step": 31408 }, { "epoch": 0.5562388485259024, "grad_norm": 0.7665965557098389, "learning_rate": 1.3002165205261504e-05, "loss": 0.0748, "step": 31409 }, { "epoch": 0.5562565580629308, "grad_norm": 0.6807926297187805, "learning_rate": 1.3001312509231322e-05, "loss": 0.0701, "step": 31410 }, { "epoch": 0.5562742675999592, "grad_norm": 0.6999444365501404, "learning_rate": 1.3000459819776618e-05, "loss": 0.0427, "step": 31411 }, { "epoch": 0.5562919771369877, "grad_norm": 0.5223407745361328, "learning_rate": 1.2999607136900203e-05, "loss": 0.059, "step": 31412 }, { "epoch": 0.5563096866740161, "grad_norm": 0.6391555070877075, "learning_rate": 1.2998754460604874e-05, "loss": 0.0679, "step": 31413 }, { "epoch": 0.5563273962110445, "grad_norm": 0.5636505484580994, "learning_rate": 1.299790179089344e-05, "loss": 0.0627, "step": 31414 }, { "epoch": 0.5563451057480729, "grad_norm": 0.4807247817516327, "learning_rate": 1.2997049127768711e-05, "loss": 0.0644, "step": 31415 }, { "epoch": 0.5563628152851015, "grad_norm": 0.9196189045906067, "learning_rate": 1.2996196471233484e-05, "loss": 0.0687, "step": 31416 }, { "epoch": 0.5563805248221299, "grad_norm": 0.9540018439292908, "learning_rate": 1.2995343821290567e-05, "loss": 0.0934, "step": 31417 }, { "epoch": 0.5563982343591583, "grad_norm": 1.0326964855194092, "learning_rate": 1.2994491177942767e-05, "loss": 0.0406, "step": 31418 }, { "epoch": 0.5564159438961866, "grad_norm": 0.238214373588562, "learning_rate": 1.2993638541192892e-05, "loss": 0.0501, "step": 31419 }, { "epoch": 0.5564336534332152, "grad_norm": 0.5824107527732849, "learning_rate": 1.2992785911043738e-05, "loss": 0.0504, "step": 31420 }, { "epoch": 0.5564513629702436, "grad_norm": 0.6591788530349731, "learning_rate": 1.2991933287498117e-05, "loss": 0.0714, "step": 31421 }, { "epoch": 0.556469072507272, "grad_norm": 0.4877742528915405, "learning_rate": 1.2991080670558839e-05, "loss": 0.0505, "step": 31422 }, { "epoch": 0.5564867820443004, "grad_norm": 0.6139743328094482, "learning_rate": 1.2990228060228694e-05, "loss": 0.1045, "step": 31423 }, { "epoch": 0.5565044915813289, "grad_norm": 0.5195713639259338, "learning_rate": 1.2989375456510494e-05, "loss": 0.0655, "step": 31424 }, { "epoch": 0.5565222011183573, "grad_norm": 0.6163328886032104, "learning_rate": 1.2988522859407048e-05, "loss": 0.0605, "step": 31425 }, { "epoch": 0.5565399106553857, "grad_norm": 0.742955207824707, "learning_rate": 1.2987670268921164e-05, "loss": 0.0691, "step": 31426 }, { "epoch": 0.5565576201924142, "grad_norm": 0.5271834135055542, "learning_rate": 1.2986817685055635e-05, "loss": 0.0557, "step": 31427 }, { "epoch": 0.5565753297294426, "grad_norm": 0.5529646873474121, "learning_rate": 1.2985965107813274e-05, "loss": 0.0748, "step": 31428 }, { "epoch": 0.556593039266471, "grad_norm": 0.8178931474685669, "learning_rate": 1.2985112537196884e-05, "loss": 0.0813, "step": 31429 }, { "epoch": 0.5566107488034994, "grad_norm": 0.606665313243866, "learning_rate": 1.298425997320927e-05, "loss": 0.074, "step": 31430 }, { "epoch": 0.5566284583405279, "grad_norm": 0.7440615892410278, "learning_rate": 1.2983407415853237e-05, "loss": 0.0589, "step": 31431 }, { "epoch": 0.5566461678775563, "grad_norm": 0.4796551764011383, "learning_rate": 1.2982554865131587e-05, "loss": 0.0812, "step": 31432 }, { "epoch": 0.5566638774145847, "grad_norm": 0.5763682723045349, "learning_rate": 1.2981702321047132e-05, "loss": 0.0696, "step": 31433 }, { "epoch": 0.5566815869516131, "grad_norm": 0.7240610718727112, "learning_rate": 1.298084978360267e-05, "loss": 0.06, "step": 31434 }, { "epoch": 0.5566992964886416, "grad_norm": 0.7993595004081726, "learning_rate": 1.2979997252801006e-05, "loss": 0.0703, "step": 31435 }, { "epoch": 0.55671700602567, "grad_norm": 0.594622015953064, "learning_rate": 1.2979144728644949e-05, "loss": 0.0699, "step": 31436 }, { "epoch": 0.5567347155626984, "grad_norm": 1.2039096355438232, "learning_rate": 1.2978292211137306e-05, "loss": 0.0504, "step": 31437 }, { "epoch": 0.5567524250997268, "grad_norm": 0.5605003833770752, "learning_rate": 1.297743970028087e-05, "loss": 0.0764, "step": 31438 }, { "epoch": 0.5567701346367553, "grad_norm": 0.5683051943778992, "learning_rate": 1.297658719607845e-05, "loss": 0.0612, "step": 31439 }, { "epoch": 0.5567878441737837, "grad_norm": 0.5235254764556885, "learning_rate": 1.2975734698532865e-05, "loss": 0.0505, "step": 31440 }, { "epoch": 0.5568055537108121, "grad_norm": 0.6457208395004272, "learning_rate": 1.2974882207646902e-05, "loss": 0.0808, "step": 31441 }, { "epoch": 0.5568232632478406, "grad_norm": 0.6463894248008728, "learning_rate": 1.297402972342337e-05, "loss": 0.074, "step": 31442 }, { "epoch": 0.556840972784869, "grad_norm": 0.4667113125324249, "learning_rate": 1.2973177245865076e-05, "loss": 0.0627, "step": 31443 }, { "epoch": 0.5568586823218974, "grad_norm": 0.6376446485519409, "learning_rate": 1.2972324774974828e-05, "loss": 0.0642, "step": 31444 }, { "epoch": 0.5568763918589258, "grad_norm": 0.6282168626785278, "learning_rate": 1.297147231075542e-05, "loss": 0.0884, "step": 31445 }, { "epoch": 0.5568941013959543, "grad_norm": 0.5350281596183777, "learning_rate": 1.2970619853209665e-05, "loss": 0.0882, "step": 31446 }, { "epoch": 0.5569118109329827, "grad_norm": 0.4211069941520691, "learning_rate": 1.2969767402340368e-05, "loss": 0.0505, "step": 31447 }, { "epoch": 0.5569295204700111, "grad_norm": 0.6909359693527222, "learning_rate": 1.2968914958150328e-05, "loss": 0.079, "step": 31448 }, { "epoch": 0.5569472300070395, "grad_norm": 0.5014763474464417, "learning_rate": 1.2968062520642355e-05, "loss": 0.0789, "step": 31449 }, { "epoch": 0.556964939544068, "grad_norm": 0.5784206390380859, "learning_rate": 1.2967210089819246e-05, "loss": 0.0586, "step": 31450 }, { "epoch": 0.5569826490810964, "grad_norm": 0.8219859600067139, "learning_rate": 1.2966357665683819e-05, "loss": 0.0916, "step": 31451 }, { "epoch": 0.5570003586181248, "grad_norm": 0.8721848726272583, "learning_rate": 1.2965505248238865e-05, "loss": 0.0716, "step": 31452 }, { "epoch": 0.5570180681551532, "grad_norm": 0.7805938720703125, "learning_rate": 1.2964652837487188e-05, "loss": 0.0532, "step": 31453 }, { "epoch": 0.5570357776921817, "grad_norm": 0.6156862378120422, "learning_rate": 1.2963800433431605e-05, "loss": 0.0666, "step": 31454 }, { "epoch": 0.5570534872292101, "grad_norm": 0.547986626625061, "learning_rate": 1.2962948036074908e-05, "loss": 0.0606, "step": 31455 }, { "epoch": 0.5570711967662385, "grad_norm": 0.4954211413860321, "learning_rate": 1.2962095645419906e-05, "loss": 0.0544, "step": 31456 }, { "epoch": 0.557088906303267, "grad_norm": 0.4141111969947815, "learning_rate": 1.2961243261469403e-05, "loss": 0.0784, "step": 31457 }, { "epoch": 0.5571066158402954, "grad_norm": 0.7367815375328064, "learning_rate": 1.2960390884226208e-05, "loss": 0.0681, "step": 31458 }, { "epoch": 0.5571243253773238, "grad_norm": 0.6516585350036621, "learning_rate": 1.2959538513693115e-05, "loss": 0.0943, "step": 31459 }, { "epoch": 0.5571420349143522, "grad_norm": 0.4815778136253357, "learning_rate": 1.2958686149872934e-05, "loss": 0.0743, "step": 31460 }, { "epoch": 0.5571597444513807, "grad_norm": 0.7440378665924072, "learning_rate": 1.2957833792768471e-05, "loss": 0.0927, "step": 31461 }, { "epoch": 0.5571774539884091, "grad_norm": 1.0126014947891235, "learning_rate": 1.2956981442382531e-05, "loss": 0.0606, "step": 31462 }, { "epoch": 0.5571951635254375, "grad_norm": 0.5329815745353699, "learning_rate": 1.2956129098717913e-05, "loss": 0.0776, "step": 31463 }, { "epoch": 0.5572128730624659, "grad_norm": 0.8077289462089539, "learning_rate": 1.2955276761777421e-05, "loss": 0.0686, "step": 31464 }, { "epoch": 0.5572305825994944, "grad_norm": 1.051855444908142, "learning_rate": 1.2954424431563871e-05, "loss": 0.0849, "step": 31465 }, { "epoch": 0.5572482921365228, "grad_norm": 0.6728551387786865, "learning_rate": 1.295357210808005e-05, "loss": 0.0769, "step": 31466 }, { "epoch": 0.5572660016735512, "grad_norm": 0.7024386525154114, "learning_rate": 1.2952719791328767e-05, "loss": 0.0823, "step": 31467 }, { "epoch": 0.5572837112105796, "grad_norm": 0.640887439250946, "learning_rate": 1.295186748131283e-05, "loss": 0.0602, "step": 31468 }, { "epoch": 0.5573014207476081, "grad_norm": 0.4145090878009796, "learning_rate": 1.295101517803505e-05, "loss": 0.0535, "step": 31469 }, { "epoch": 0.5573191302846365, "grad_norm": 0.7465123534202576, "learning_rate": 1.2950162881498218e-05, "loss": 0.0941, "step": 31470 }, { "epoch": 0.5573368398216649, "grad_norm": 0.4567716419696808, "learning_rate": 1.294931059170514e-05, "loss": 0.0715, "step": 31471 }, { "epoch": 0.5573545493586934, "grad_norm": 0.46298688650131226, "learning_rate": 1.2948458308658628e-05, "loss": 0.0572, "step": 31472 }, { "epoch": 0.5573722588957218, "grad_norm": 0.8882218599319458, "learning_rate": 1.2947606032361474e-05, "loss": 0.0757, "step": 31473 }, { "epoch": 0.5573899684327502, "grad_norm": 0.41812270879745483, "learning_rate": 1.2946753762816492e-05, "loss": 0.0652, "step": 31474 }, { "epoch": 0.5574076779697786, "grad_norm": 0.873375654220581, "learning_rate": 1.2945901500026482e-05, "loss": 0.068, "step": 31475 }, { "epoch": 0.5574253875068071, "grad_norm": 0.5837485194206238, "learning_rate": 1.2945049243994249e-05, "loss": 0.0874, "step": 31476 }, { "epoch": 0.5574430970438355, "grad_norm": 0.9529101252555847, "learning_rate": 1.2944196994722594e-05, "loss": 0.063, "step": 31477 }, { "epoch": 0.5574608065808639, "grad_norm": 0.9891709089279175, "learning_rate": 1.2943344752214325e-05, "loss": 0.0652, "step": 31478 }, { "epoch": 0.5574785161178923, "grad_norm": 0.36560487747192383, "learning_rate": 1.294249251647225e-05, "loss": 0.0445, "step": 31479 }, { "epoch": 0.5574962256549209, "grad_norm": 0.45263615250587463, "learning_rate": 1.2941640287499158e-05, "loss": 0.0821, "step": 31480 }, { "epoch": 0.5575139351919493, "grad_norm": 0.6185146570205688, "learning_rate": 1.2940788065297863e-05, "loss": 0.0678, "step": 31481 }, { "epoch": 0.5575316447289776, "grad_norm": 0.6167140007019043, "learning_rate": 1.2939935849871162e-05, "loss": 0.0758, "step": 31482 }, { "epoch": 0.557549354266006, "grad_norm": 0.24547593295574188, "learning_rate": 1.2939083641221874e-05, "loss": 0.0574, "step": 31483 }, { "epoch": 0.5575670638030346, "grad_norm": 0.29256194829940796, "learning_rate": 1.2938231439352787e-05, "loss": 0.032, "step": 31484 }, { "epoch": 0.557584773340063, "grad_norm": 0.749375581741333, "learning_rate": 1.293737924426671e-05, "loss": 0.0505, "step": 31485 }, { "epoch": 0.5576024828770914, "grad_norm": 0.56859290599823, "learning_rate": 1.2936527055966448e-05, "loss": 0.0877, "step": 31486 }, { "epoch": 0.5576201924141199, "grad_norm": 0.20681947469711304, "learning_rate": 1.2935674874454801e-05, "loss": 0.0513, "step": 31487 }, { "epoch": 0.5576379019511483, "grad_norm": 0.7655296921730042, "learning_rate": 1.2934822699734575e-05, "loss": 0.0781, "step": 31488 }, { "epoch": 0.5576556114881767, "grad_norm": 0.8834663033485413, "learning_rate": 1.2933970531808574e-05, "loss": 0.0544, "step": 31489 }, { "epoch": 0.5576733210252051, "grad_norm": 0.6120762228965759, "learning_rate": 1.2933118370679604e-05, "loss": 0.0352, "step": 31490 }, { "epoch": 0.5576910305622336, "grad_norm": 0.4060348868370056, "learning_rate": 1.2932266216350462e-05, "loss": 0.0481, "step": 31491 }, { "epoch": 0.557708740099262, "grad_norm": 0.7538771033287048, "learning_rate": 1.2931414068823956e-05, "loss": 0.0866, "step": 31492 }, { "epoch": 0.5577264496362904, "grad_norm": 0.6157392859458923, "learning_rate": 1.2930561928102894e-05, "loss": 0.0798, "step": 31493 }, { "epoch": 0.5577441591733188, "grad_norm": 0.9029898643493652, "learning_rate": 1.2929709794190067e-05, "loss": 0.0606, "step": 31494 }, { "epoch": 0.5577618687103473, "grad_norm": 0.7942318916320801, "learning_rate": 1.2928857667088285e-05, "loss": 0.0619, "step": 31495 }, { "epoch": 0.5577795782473757, "grad_norm": 0.6854258179664612, "learning_rate": 1.2928005546800351e-05, "loss": 0.0568, "step": 31496 }, { "epoch": 0.5577972877844041, "grad_norm": 0.7365768551826477, "learning_rate": 1.2927153433329078e-05, "loss": 0.0505, "step": 31497 }, { "epoch": 0.5578149973214325, "grad_norm": 0.7803095579147339, "learning_rate": 1.2926301326677253e-05, "loss": 0.0629, "step": 31498 }, { "epoch": 0.557832706858461, "grad_norm": 0.5893849730491638, "learning_rate": 1.2925449226847687e-05, "loss": 0.0747, "step": 31499 }, { "epoch": 0.5578504163954894, "grad_norm": 0.5283639430999756, "learning_rate": 1.2924597133843183e-05, "loss": 0.0668, "step": 31500 }, { "epoch": 0.5578681259325178, "grad_norm": 0.5144966244697571, "learning_rate": 1.2923745047666548e-05, "loss": 0.0431, "step": 31501 }, { "epoch": 0.5578858354695463, "grad_norm": 0.7369498014450073, "learning_rate": 1.292289296832058e-05, "loss": 0.0875, "step": 31502 }, { "epoch": 0.5579035450065747, "grad_norm": 0.5291873216629028, "learning_rate": 1.2922040895808083e-05, "loss": 0.0371, "step": 31503 }, { "epoch": 0.5579212545436031, "grad_norm": 0.4316406548023224, "learning_rate": 1.2921188830131863e-05, "loss": 0.0618, "step": 31504 }, { "epoch": 0.5579389640806315, "grad_norm": 0.7446994781494141, "learning_rate": 1.2920336771294721e-05, "loss": 0.0649, "step": 31505 }, { "epoch": 0.55795667361766, "grad_norm": 0.5677075982093811, "learning_rate": 1.2919484719299457e-05, "loss": 0.0623, "step": 31506 }, { "epoch": 0.5579743831546884, "grad_norm": 0.5613978505134583, "learning_rate": 1.2918632674148882e-05, "loss": 0.0915, "step": 31507 }, { "epoch": 0.5579920926917168, "grad_norm": 0.614822268486023, "learning_rate": 1.29177806358458e-05, "loss": 0.0604, "step": 31508 }, { "epoch": 0.5580098022287452, "grad_norm": 0.608482837677002, "learning_rate": 1.2916928604393002e-05, "loss": 0.0887, "step": 31509 }, { "epoch": 0.5580275117657737, "grad_norm": 0.866713285446167, "learning_rate": 1.2916076579793298e-05, "loss": 0.0618, "step": 31510 }, { "epoch": 0.5580452213028021, "grad_norm": 0.6381991505622864, "learning_rate": 1.2915224562049495e-05, "loss": 0.0805, "step": 31511 }, { "epoch": 0.5580629308398305, "grad_norm": 0.6115503907203674, "learning_rate": 1.291437255116439e-05, "loss": 0.0631, "step": 31512 }, { "epoch": 0.5580806403768589, "grad_norm": 0.5902110934257507, "learning_rate": 1.2913520547140786e-05, "loss": 0.0891, "step": 31513 }, { "epoch": 0.5580983499138874, "grad_norm": 0.8964792490005493, "learning_rate": 1.2912668549981491e-05, "loss": 0.0601, "step": 31514 }, { "epoch": 0.5581160594509158, "grad_norm": 0.5692107677459717, "learning_rate": 1.2911816559689306e-05, "loss": 0.0514, "step": 31515 }, { "epoch": 0.5581337689879442, "grad_norm": 0.46579691767692566, "learning_rate": 1.2910964576267031e-05, "loss": 0.0659, "step": 31516 }, { "epoch": 0.5581514785249727, "grad_norm": 1.0966284275054932, "learning_rate": 1.2910112599717472e-05, "loss": 0.0765, "step": 31517 }, { "epoch": 0.5581691880620011, "grad_norm": 0.5737146735191345, "learning_rate": 1.2909260630043437e-05, "loss": 0.0954, "step": 31518 }, { "epoch": 0.5581868975990295, "grad_norm": 0.7257658839225769, "learning_rate": 1.2908408667247714e-05, "loss": 0.0783, "step": 31519 }, { "epoch": 0.5582046071360579, "grad_norm": 0.5286385416984558, "learning_rate": 1.2907556711333119e-05, "loss": 0.0641, "step": 31520 }, { "epoch": 0.5582223166730864, "grad_norm": 0.7262259721755981, "learning_rate": 1.290670476230245e-05, "loss": 0.0411, "step": 31521 }, { "epoch": 0.5582400262101148, "grad_norm": 0.8071216940879822, "learning_rate": 1.2905852820158516e-05, "loss": 0.0612, "step": 31522 }, { "epoch": 0.5582577357471432, "grad_norm": 0.3868533670902252, "learning_rate": 1.2905000884904108e-05, "loss": 0.0786, "step": 31523 }, { "epoch": 0.5582754452841716, "grad_norm": 1.1009724140167236, "learning_rate": 1.2904148956542036e-05, "loss": 0.0713, "step": 31524 }, { "epoch": 0.5582931548212001, "grad_norm": 0.47131890058517456, "learning_rate": 1.2903297035075107e-05, "loss": 0.0673, "step": 31525 }, { "epoch": 0.5583108643582285, "grad_norm": 0.7152882218360901, "learning_rate": 1.2902445120506112e-05, "loss": 0.081, "step": 31526 }, { "epoch": 0.5583285738952569, "grad_norm": 0.856020450592041, "learning_rate": 1.2901593212837861e-05, "loss": 0.0786, "step": 31527 }, { "epoch": 0.5583462834322853, "grad_norm": 0.8354696035385132, "learning_rate": 1.2900741312073159e-05, "loss": 0.0706, "step": 31528 }, { "epoch": 0.5583639929693138, "grad_norm": 0.6709499359130859, "learning_rate": 1.2899889418214806e-05, "loss": 0.0589, "step": 31529 }, { "epoch": 0.5583817025063422, "grad_norm": 0.7762269973754883, "learning_rate": 1.2899037531265603e-05, "loss": 0.0776, "step": 31530 }, { "epoch": 0.5583994120433706, "grad_norm": 0.5346760153770447, "learning_rate": 1.2898185651228353e-05, "loss": 0.0514, "step": 31531 }, { "epoch": 0.5584171215803991, "grad_norm": 0.4024926424026489, "learning_rate": 1.289733377810586e-05, "loss": 0.05, "step": 31532 }, { "epoch": 0.5584348311174275, "grad_norm": 0.7253000140190125, "learning_rate": 1.2896481911900928e-05, "loss": 0.0528, "step": 31533 }, { "epoch": 0.5584525406544559, "grad_norm": 0.7527802586555481, "learning_rate": 1.2895630052616355e-05, "loss": 0.0738, "step": 31534 }, { "epoch": 0.5584702501914843, "grad_norm": 0.8812085390090942, "learning_rate": 1.2894778200254949e-05, "loss": 0.0666, "step": 31535 }, { "epoch": 0.5584879597285128, "grad_norm": 0.48016369342803955, "learning_rate": 1.2893926354819513e-05, "loss": 0.0599, "step": 31536 }, { "epoch": 0.5585056692655412, "grad_norm": 0.29022085666656494, "learning_rate": 1.289307451631284e-05, "loss": 0.0342, "step": 31537 }, { "epoch": 0.5585233788025696, "grad_norm": 0.4280359447002411, "learning_rate": 1.289222268473774e-05, "loss": 0.0929, "step": 31538 }, { "epoch": 0.558541088339598, "grad_norm": 0.823145866394043, "learning_rate": 1.2891370860097015e-05, "loss": 0.0623, "step": 31539 }, { "epoch": 0.5585587978766265, "grad_norm": 0.5675235390663147, "learning_rate": 1.2890519042393469e-05, "loss": 0.0804, "step": 31540 }, { "epoch": 0.5585765074136549, "grad_norm": 0.6878769993782043, "learning_rate": 1.2889667231629898e-05, "loss": 0.0856, "step": 31541 }, { "epoch": 0.5585942169506833, "grad_norm": 0.5944958329200745, "learning_rate": 1.2888815427809108e-05, "loss": 0.0665, "step": 31542 }, { "epoch": 0.5586119264877117, "grad_norm": 0.5152678489685059, "learning_rate": 1.2887963630933909e-05, "loss": 0.0458, "step": 31543 }, { "epoch": 0.5586296360247403, "grad_norm": 0.9594475030899048, "learning_rate": 1.288711184100709e-05, "loss": 0.0732, "step": 31544 }, { "epoch": 0.5586473455617686, "grad_norm": 0.4128396809101105, "learning_rate": 1.2886260058031458e-05, "loss": 0.0376, "step": 31545 }, { "epoch": 0.558665055098797, "grad_norm": 1.0238755941390991, "learning_rate": 1.2885408282009817e-05, "loss": 0.0762, "step": 31546 }, { "epoch": 0.5586827646358256, "grad_norm": 0.17558635771274567, "learning_rate": 1.2884556512944975e-05, "loss": 0.0545, "step": 31547 }, { "epoch": 0.558700474172854, "grad_norm": 0.39433974027633667, "learning_rate": 1.288370475083972e-05, "loss": 0.0439, "step": 31548 }, { "epoch": 0.5587181837098824, "grad_norm": 0.6094312071800232, "learning_rate": 1.2882852995696866e-05, "loss": 0.0613, "step": 31549 }, { "epoch": 0.5587358932469108, "grad_norm": 0.5393736362457275, "learning_rate": 1.2882001247519215e-05, "loss": 0.0693, "step": 31550 }, { "epoch": 0.5587536027839393, "grad_norm": 0.4550098776817322, "learning_rate": 1.2881149506309563e-05, "loss": 0.0895, "step": 31551 }, { "epoch": 0.5587713123209677, "grad_norm": 0.5749484300613403, "learning_rate": 1.2880297772070712e-05, "loss": 0.0654, "step": 31552 }, { "epoch": 0.5587890218579961, "grad_norm": 0.5678461194038391, "learning_rate": 1.2879446044805468e-05, "loss": 0.0565, "step": 31553 }, { "epoch": 0.5588067313950245, "grad_norm": 0.6610795855522156, "learning_rate": 1.2878594324516634e-05, "loss": 0.0522, "step": 31554 }, { "epoch": 0.558824440932053, "grad_norm": 0.6668117046356201, "learning_rate": 1.2877742611207008e-05, "loss": 0.0596, "step": 31555 }, { "epoch": 0.5588421504690814, "grad_norm": 0.5226237177848816, "learning_rate": 1.2876890904879395e-05, "loss": 0.089, "step": 31556 }, { "epoch": 0.5588598600061098, "grad_norm": 0.9184889197349548, "learning_rate": 1.2876039205536599e-05, "loss": 0.0652, "step": 31557 }, { "epoch": 0.5588775695431382, "grad_norm": 0.8551676869392395, "learning_rate": 1.2875187513181416e-05, "loss": 0.1262, "step": 31558 }, { "epoch": 0.5588952790801667, "grad_norm": 0.5453310608863831, "learning_rate": 1.287433582781665e-05, "loss": 0.0885, "step": 31559 }, { "epoch": 0.5589129886171951, "grad_norm": 0.5220983028411865, "learning_rate": 1.2873484149445104e-05, "loss": 0.0675, "step": 31560 }, { "epoch": 0.5589306981542235, "grad_norm": 1.0740015506744385, "learning_rate": 1.2872632478069588e-05, "loss": 0.0749, "step": 31561 }, { "epoch": 0.558948407691252, "grad_norm": 0.3918151557445526, "learning_rate": 1.2871780813692885e-05, "loss": 0.0685, "step": 31562 }, { "epoch": 0.5589661172282804, "grad_norm": 0.471017986536026, "learning_rate": 1.2870929156317813e-05, "loss": 0.0596, "step": 31563 }, { "epoch": 0.5589838267653088, "grad_norm": 0.853961169719696, "learning_rate": 1.2870077505947173e-05, "loss": 0.0905, "step": 31564 }, { "epoch": 0.5590015363023372, "grad_norm": 0.6735260486602783, "learning_rate": 1.2869225862583758e-05, "loss": 0.062, "step": 31565 }, { "epoch": 0.5590192458393657, "grad_norm": 0.6684700846672058, "learning_rate": 1.2868374226230373e-05, "loss": 0.0797, "step": 31566 }, { "epoch": 0.5590369553763941, "grad_norm": 0.2155119627714157, "learning_rate": 1.2867522596889822e-05, "loss": 0.0488, "step": 31567 }, { "epoch": 0.5590546649134225, "grad_norm": 0.37752291560173035, "learning_rate": 1.2866670974564907e-05, "loss": 0.0564, "step": 31568 }, { "epoch": 0.5590723744504509, "grad_norm": 0.5707870721817017, "learning_rate": 1.2865819359258428e-05, "loss": 0.0646, "step": 31569 }, { "epoch": 0.5590900839874794, "grad_norm": 0.4127880930900574, "learning_rate": 1.2864967750973186e-05, "loss": 0.0596, "step": 31570 }, { "epoch": 0.5591077935245078, "grad_norm": 0.6427428722381592, "learning_rate": 1.2864116149711987e-05, "loss": 0.0733, "step": 31571 }, { "epoch": 0.5591255030615362, "grad_norm": 0.4257816672325134, "learning_rate": 1.2863264555477631e-05, "loss": 0.0584, "step": 31572 }, { "epoch": 0.5591432125985646, "grad_norm": 1.065602421760559, "learning_rate": 1.2862412968272913e-05, "loss": 0.0711, "step": 31573 }, { "epoch": 0.5591609221355931, "grad_norm": 0.39842432737350464, "learning_rate": 1.2861561388100644e-05, "loss": 0.0547, "step": 31574 }, { "epoch": 0.5591786316726215, "grad_norm": 0.7794880867004395, "learning_rate": 1.2860709814963626e-05, "loss": 0.0501, "step": 31575 }, { "epoch": 0.5591963412096499, "grad_norm": 0.9119457006454468, "learning_rate": 1.2859858248864652e-05, "loss": 0.0865, "step": 31576 }, { "epoch": 0.5592140507466784, "grad_norm": 0.620698869228363, "learning_rate": 1.2859006689806522e-05, "loss": 0.0596, "step": 31577 }, { "epoch": 0.5592317602837068, "grad_norm": 0.7028619050979614, "learning_rate": 1.2858155137792049e-05, "loss": 0.0871, "step": 31578 }, { "epoch": 0.5592494698207352, "grad_norm": 0.4663306474685669, "learning_rate": 1.2857303592824033e-05, "loss": 0.0412, "step": 31579 }, { "epoch": 0.5592671793577636, "grad_norm": 0.6429193019866943, "learning_rate": 1.2856452054905267e-05, "loss": 0.0775, "step": 31580 }, { "epoch": 0.5592848888947921, "grad_norm": 0.5334696769714355, "learning_rate": 1.2855600524038557e-05, "loss": 0.0702, "step": 31581 }, { "epoch": 0.5593025984318205, "grad_norm": 0.5290891528129578, "learning_rate": 1.2854749000226706e-05, "loss": 0.0688, "step": 31582 }, { "epoch": 0.5593203079688489, "grad_norm": 0.7988594174385071, "learning_rate": 1.285389748347251e-05, "loss": 0.0443, "step": 31583 }, { "epoch": 0.5593380175058773, "grad_norm": 0.41732388734817505, "learning_rate": 1.2853045973778777e-05, "loss": 0.038, "step": 31584 }, { "epoch": 0.5593557270429058, "grad_norm": 0.5839695334434509, "learning_rate": 1.2852194471148303e-05, "loss": 0.0407, "step": 31585 }, { "epoch": 0.5593734365799342, "grad_norm": 1.239974021911621, "learning_rate": 1.2851342975583897e-05, "loss": 0.0901, "step": 31586 }, { "epoch": 0.5593911461169626, "grad_norm": 0.7089678049087524, "learning_rate": 1.2850491487088352e-05, "loss": 0.1041, "step": 31587 }, { "epoch": 0.559408855653991, "grad_norm": 0.3706526756286621, "learning_rate": 1.2849640005664473e-05, "loss": 0.0482, "step": 31588 }, { "epoch": 0.5594265651910195, "grad_norm": 0.5043989419937134, "learning_rate": 1.2848788531315064e-05, "loss": 0.053, "step": 31589 }, { "epoch": 0.5594442747280479, "grad_norm": 0.7453874349594116, "learning_rate": 1.284793706404292e-05, "loss": 0.0603, "step": 31590 }, { "epoch": 0.5594619842650763, "grad_norm": 0.5514916777610779, "learning_rate": 1.2847085603850842e-05, "loss": 0.0502, "step": 31591 }, { "epoch": 0.5594796938021048, "grad_norm": 1.1700003147125244, "learning_rate": 1.2846234150741636e-05, "loss": 0.0684, "step": 31592 }, { "epoch": 0.5594974033391332, "grad_norm": 0.4798964262008667, "learning_rate": 1.284538270471811e-05, "loss": 0.0755, "step": 31593 }, { "epoch": 0.5595151128761616, "grad_norm": 0.5061998963356018, "learning_rate": 1.284453126578305e-05, "loss": 0.0541, "step": 31594 }, { "epoch": 0.55953282241319, "grad_norm": 0.435373991727829, "learning_rate": 1.2843679833939263e-05, "loss": 0.0539, "step": 31595 }, { "epoch": 0.5595505319502185, "grad_norm": 0.629786491394043, "learning_rate": 1.2842828409189555e-05, "loss": 0.037, "step": 31596 }, { "epoch": 0.5595682414872469, "grad_norm": 0.3968219459056854, "learning_rate": 1.2841976991536722e-05, "loss": 0.0497, "step": 31597 }, { "epoch": 0.5595859510242753, "grad_norm": 0.5454856157302856, "learning_rate": 1.2841125580983564e-05, "loss": 0.0669, "step": 31598 }, { "epoch": 0.5596036605613037, "grad_norm": 0.4770377576351166, "learning_rate": 1.2840274177532887e-05, "loss": 0.0361, "step": 31599 }, { "epoch": 0.5596213700983322, "grad_norm": 0.7642751336097717, "learning_rate": 1.283942278118749e-05, "loss": 0.0822, "step": 31600 }, { "epoch": 0.5596390796353606, "grad_norm": 0.7815201878547668, "learning_rate": 1.2838571391950171e-05, "loss": 0.0674, "step": 31601 }, { "epoch": 0.559656789172389, "grad_norm": 0.603036642074585, "learning_rate": 1.2837720009823734e-05, "loss": 0.0791, "step": 31602 }, { "epoch": 0.5596744987094174, "grad_norm": 0.8604346513748169, "learning_rate": 1.283686863481098e-05, "loss": 0.064, "step": 31603 }, { "epoch": 0.5596922082464459, "grad_norm": 0.9254831671714783, "learning_rate": 1.2836017266914715e-05, "loss": 0.0769, "step": 31604 }, { "epoch": 0.5597099177834743, "grad_norm": 0.4868471026420593, "learning_rate": 1.2835165906137729e-05, "loss": 0.0356, "step": 31605 }, { "epoch": 0.5597276273205027, "grad_norm": 0.8798221349716187, "learning_rate": 1.2834314552482824e-05, "loss": 0.0925, "step": 31606 }, { "epoch": 0.5597453368575313, "grad_norm": 0.48179909586906433, "learning_rate": 1.2833463205952816e-05, "loss": 0.082, "step": 31607 }, { "epoch": 0.5597630463945596, "grad_norm": 0.9908902645111084, "learning_rate": 1.2832611866550485e-05, "loss": 0.0749, "step": 31608 }, { "epoch": 0.559780755931588, "grad_norm": 0.4736483097076416, "learning_rate": 1.2831760534278646e-05, "loss": 0.0628, "step": 31609 }, { "epoch": 0.5597984654686164, "grad_norm": 0.5504580736160278, "learning_rate": 1.2830909209140093e-05, "loss": 0.0478, "step": 31610 }, { "epoch": 0.559816175005645, "grad_norm": 0.4280553460121155, "learning_rate": 1.2830057891137633e-05, "loss": 0.0499, "step": 31611 }, { "epoch": 0.5598338845426734, "grad_norm": 0.39538824558258057, "learning_rate": 1.282920658027406e-05, "loss": 0.0593, "step": 31612 }, { "epoch": 0.5598515940797018, "grad_norm": 0.566778302192688, "learning_rate": 1.2828355276552177e-05, "loss": 0.049, "step": 31613 }, { "epoch": 0.5598693036167302, "grad_norm": 0.7063633799552917, "learning_rate": 1.2827503979974789e-05, "loss": 0.0481, "step": 31614 }, { "epoch": 0.5598870131537587, "grad_norm": 0.700944185256958, "learning_rate": 1.2826652690544692e-05, "loss": 0.0746, "step": 31615 }, { "epoch": 0.5599047226907871, "grad_norm": 0.7990159392356873, "learning_rate": 1.2825801408264686e-05, "loss": 0.0735, "step": 31616 }, { "epoch": 0.5599224322278155, "grad_norm": 0.34846535325050354, "learning_rate": 1.2824950133137574e-05, "loss": 0.062, "step": 31617 }, { "epoch": 0.5599401417648439, "grad_norm": 0.6675617694854736, "learning_rate": 1.2824098865166162e-05, "loss": 0.0479, "step": 31618 }, { "epoch": 0.5599578513018724, "grad_norm": 0.5739222764968872, "learning_rate": 1.2823247604353237e-05, "loss": 0.0587, "step": 31619 }, { "epoch": 0.5599755608389008, "grad_norm": 0.7274157404899597, "learning_rate": 1.2822396350701607e-05, "loss": 0.0692, "step": 31620 }, { "epoch": 0.5599932703759292, "grad_norm": 0.521986722946167, "learning_rate": 1.2821545104214081e-05, "loss": 0.0467, "step": 31621 }, { "epoch": 0.5600109799129577, "grad_norm": 0.7395371198654175, "learning_rate": 1.2820693864893445e-05, "loss": 0.0468, "step": 31622 }, { "epoch": 0.5600286894499861, "grad_norm": 0.6705719828605652, "learning_rate": 1.2819842632742506e-05, "loss": 0.0953, "step": 31623 }, { "epoch": 0.5600463989870145, "grad_norm": 0.7676094770431519, "learning_rate": 1.2818991407764064e-05, "loss": 0.0362, "step": 31624 }, { "epoch": 0.5600641085240429, "grad_norm": 0.5305489897727966, "learning_rate": 1.2818140189960921e-05, "loss": 0.0626, "step": 31625 }, { "epoch": 0.5600818180610714, "grad_norm": 0.48745813965797424, "learning_rate": 1.2817288979335876e-05, "loss": 0.1018, "step": 31626 }, { "epoch": 0.5600995275980998, "grad_norm": 0.4088377356529236, "learning_rate": 1.2816437775891727e-05, "loss": 0.0724, "step": 31627 }, { "epoch": 0.5601172371351282, "grad_norm": 0.6633813381195068, "learning_rate": 1.2815586579631283e-05, "loss": 0.0634, "step": 31628 }, { "epoch": 0.5601349466721566, "grad_norm": 0.460577130317688, "learning_rate": 1.2814735390557331e-05, "loss": 0.0737, "step": 31629 }, { "epoch": 0.5601526562091851, "grad_norm": 0.7445859909057617, "learning_rate": 1.2813884208672681e-05, "loss": 0.0759, "step": 31630 }, { "epoch": 0.5601703657462135, "grad_norm": 0.589707612991333, "learning_rate": 1.2813033033980129e-05, "loss": 0.0695, "step": 31631 }, { "epoch": 0.5601880752832419, "grad_norm": 0.6105620861053467, "learning_rate": 1.2812181866482488e-05, "loss": 0.0541, "step": 31632 }, { "epoch": 0.5602057848202704, "grad_norm": 0.5862123966217041, "learning_rate": 1.2811330706182536e-05, "loss": 0.0537, "step": 31633 }, { "epoch": 0.5602234943572988, "grad_norm": 0.5644406080245972, "learning_rate": 1.2810479553083087e-05, "loss": 0.0552, "step": 31634 }, { "epoch": 0.5602412038943272, "grad_norm": 0.7136497497558594, "learning_rate": 1.2809628407186935e-05, "loss": 0.0572, "step": 31635 }, { "epoch": 0.5602589134313556, "grad_norm": 0.9701201915740967, "learning_rate": 1.2808777268496893e-05, "loss": 0.1002, "step": 31636 }, { "epoch": 0.5602766229683841, "grad_norm": 0.7490914463996887, "learning_rate": 1.2807926137015746e-05, "loss": 0.0603, "step": 31637 }, { "epoch": 0.5602943325054125, "grad_norm": 0.8612223267555237, "learning_rate": 1.2807075012746302e-05, "loss": 0.0643, "step": 31638 }, { "epoch": 0.5603120420424409, "grad_norm": 0.5719138979911804, "learning_rate": 1.2806223895691362e-05, "loss": 0.0701, "step": 31639 }, { "epoch": 0.5603297515794693, "grad_norm": 0.6495382189750671, "learning_rate": 1.2805372785853719e-05, "loss": 0.0916, "step": 31640 }, { "epoch": 0.5603474611164978, "grad_norm": 0.5826586484909058, "learning_rate": 1.2804521683236179e-05, "loss": 0.0342, "step": 31641 }, { "epoch": 0.5603651706535262, "grad_norm": 0.8575061559677124, "learning_rate": 1.2803670587841538e-05, "loss": 0.0839, "step": 31642 }, { "epoch": 0.5603828801905546, "grad_norm": 0.6857591867446899, "learning_rate": 1.2802819499672604e-05, "loss": 0.0601, "step": 31643 }, { "epoch": 0.560400589727583, "grad_norm": 0.5038797855377197, "learning_rate": 1.2801968418732168e-05, "loss": 0.0777, "step": 31644 }, { "epoch": 0.5604182992646115, "grad_norm": 0.6376875042915344, "learning_rate": 1.2801117345023035e-05, "loss": 0.0505, "step": 31645 }, { "epoch": 0.5604360088016399, "grad_norm": 0.7174150943756104, "learning_rate": 1.2800266278548008e-05, "loss": 0.0828, "step": 31646 }, { "epoch": 0.5604537183386683, "grad_norm": 0.7397875189781189, "learning_rate": 1.2799415219309879e-05, "loss": 0.072, "step": 31647 }, { "epoch": 0.5604714278756968, "grad_norm": 0.4735141396522522, "learning_rate": 1.2798564167311448e-05, "loss": 0.0507, "step": 31648 }, { "epoch": 0.5604891374127252, "grad_norm": 0.5484716296195984, "learning_rate": 1.2797713122555515e-05, "loss": 0.052, "step": 31649 }, { "epoch": 0.5605068469497536, "grad_norm": 0.48820677399635315, "learning_rate": 1.2796862085044893e-05, "loss": 0.0915, "step": 31650 }, { "epoch": 0.560524556486782, "grad_norm": 0.6834997534751892, "learning_rate": 1.2796011054782368e-05, "loss": 0.0781, "step": 31651 }, { "epoch": 0.5605422660238105, "grad_norm": 0.8063926100730896, "learning_rate": 1.2795160031770742e-05, "loss": 0.0705, "step": 31652 }, { "epoch": 0.5605599755608389, "grad_norm": 0.520254373550415, "learning_rate": 1.2794309016012819e-05, "loss": 0.0559, "step": 31653 }, { "epoch": 0.5605776850978673, "grad_norm": 0.6097058653831482, "learning_rate": 1.2793458007511394e-05, "loss": 0.0791, "step": 31654 }, { "epoch": 0.5605953946348957, "grad_norm": 1.054025650024414, "learning_rate": 1.279260700626927e-05, "loss": 0.0746, "step": 31655 }, { "epoch": 0.5606131041719242, "grad_norm": 0.8877245783805847, "learning_rate": 1.2791756012289242e-05, "loss": 0.0644, "step": 31656 }, { "epoch": 0.5606308137089526, "grad_norm": 0.8186485171318054, "learning_rate": 1.279090502557412e-05, "loss": 0.0587, "step": 31657 }, { "epoch": 0.560648523245981, "grad_norm": 0.6725814938545227, "learning_rate": 1.279005404612669e-05, "loss": 0.0542, "step": 31658 }, { "epoch": 0.5606662327830094, "grad_norm": 0.8976356387138367, "learning_rate": 1.2789203073949764e-05, "loss": 0.0918, "step": 31659 }, { "epoch": 0.5606839423200379, "grad_norm": 0.9134002923965454, "learning_rate": 1.2788352109046139e-05, "loss": 0.0439, "step": 31660 }, { "epoch": 0.5607016518570663, "grad_norm": 0.9016113877296448, "learning_rate": 1.2787501151418608e-05, "loss": 0.0492, "step": 31661 }, { "epoch": 0.5607193613940947, "grad_norm": 0.5737518668174744, "learning_rate": 1.2786650201069973e-05, "loss": 0.0703, "step": 31662 }, { "epoch": 0.5607370709311232, "grad_norm": 0.6850455403327942, "learning_rate": 1.2785799258003033e-05, "loss": 0.0888, "step": 31663 }, { "epoch": 0.5607547804681516, "grad_norm": 0.5803295373916626, "learning_rate": 1.2784948322220596e-05, "loss": 0.0434, "step": 31664 }, { "epoch": 0.56077249000518, "grad_norm": 0.727162778377533, "learning_rate": 1.2784097393725451e-05, "loss": 0.0581, "step": 31665 }, { "epoch": 0.5607901995422084, "grad_norm": 0.674627959728241, "learning_rate": 1.27832464725204e-05, "loss": 0.0613, "step": 31666 }, { "epoch": 0.5608079090792369, "grad_norm": 0.5642042756080627, "learning_rate": 1.2782395558608248e-05, "loss": 0.0651, "step": 31667 }, { "epoch": 0.5608256186162653, "grad_norm": 0.7960411906242371, "learning_rate": 1.2781544651991786e-05, "loss": 0.0602, "step": 31668 }, { "epoch": 0.5608433281532937, "grad_norm": 0.37762385606765747, "learning_rate": 1.278069375267382e-05, "loss": 0.0543, "step": 31669 }, { "epoch": 0.5608610376903221, "grad_norm": 0.5697516202926636, "learning_rate": 1.2779842860657146e-05, "loss": 0.0545, "step": 31670 }, { "epoch": 0.5608787472273506, "grad_norm": 0.8391003608703613, "learning_rate": 1.2778991975944572e-05, "loss": 0.0777, "step": 31671 }, { "epoch": 0.560896456764379, "grad_norm": 0.522316575050354, "learning_rate": 1.2778141098538878e-05, "loss": 0.0603, "step": 31672 }, { "epoch": 0.5609141663014074, "grad_norm": 0.8740276098251343, "learning_rate": 1.2777290228442878e-05, "loss": 0.0881, "step": 31673 }, { "epoch": 0.5609318758384358, "grad_norm": 0.8676638603210449, "learning_rate": 1.277643936565937e-05, "loss": 0.0778, "step": 31674 }, { "epoch": 0.5609495853754644, "grad_norm": 0.5379608273506165, "learning_rate": 1.2775588510191159e-05, "loss": 0.0663, "step": 31675 }, { "epoch": 0.5609672949124928, "grad_norm": 0.5117152333259583, "learning_rate": 1.2774737662041027e-05, "loss": 0.0611, "step": 31676 }, { "epoch": 0.5609850044495212, "grad_norm": 0.5030736923217773, "learning_rate": 1.2773886821211786e-05, "loss": 0.0493, "step": 31677 }, { "epoch": 0.5610027139865497, "grad_norm": 0.8694801926612854, "learning_rate": 1.2773035987706235e-05, "loss": 0.0821, "step": 31678 }, { "epoch": 0.5610204235235781, "grad_norm": 0.9712185263633728, "learning_rate": 1.2772185161527166e-05, "loss": 0.0738, "step": 31679 }, { "epoch": 0.5610381330606065, "grad_norm": 0.6655195951461792, "learning_rate": 1.2771334342677381e-05, "loss": 0.0521, "step": 31680 }, { "epoch": 0.5610558425976349, "grad_norm": 0.38179266452789307, "learning_rate": 1.2770483531159685e-05, "loss": 0.0748, "step": 31681 }, { "epoch": 0.5610735521346634, "grad_norm": 0.7451058030128479, "learning_rate": 1.2769632726976872e-05, "loss": 0.0814, "step": 31682 }, { "epoch": 0.5610912616716918, "grad_norm": 0.6179315447807312, "learning_rate": 1.276878193013174e-05, "loss": 0.0301, "step": 31683 }, { "epoch": 0.5611089712087202, "grad_norm": 0.5697710514068604, "learning_rate": 1.276793114062709e-05, "loss": 0.0396, "step": 31684 }, { "epoch": 0.5611266807457486, "grad_norm": 0.20834980905056, "learning_rate": 1.2767080358465726e-05, "loss": 0.0577, "step": 31685 }, { "epoch": 0.5611443902827771, "grad_norm": 0.38255786895751953, "learning_rate": 1.2766229583650433e-05, "loss": 0.0514, "step": 31686 }, { "epoch": 0.5611620998198055, "grad_norm": 0.4385516941547394, "learning_rate": 1.2765378816184022e-05, "loss": 0.0508, "step": 31687 }, { "epoch": 0.5611798093568339, "grad_norm": 0.7212991118431091, "learning_rate": 1.276452805606929e-05, "loss": 0.0686, "step": 31688 }, { "epoch": 0.5611975188938623, "grad_norm": 0.46607261896133423, "learning_rate": 1.276367730330904e-05, "loss": 0.0582, "step": 31689 }, { "epoch": 0.5612152284308908, "grad_norm": 0.5421643257141113, "learning_rate": 1.2762826557906059e-05, "loss": 0.0687, "step": 31690 }, { "epoch": 0.5612329379679192, "grad_norm": 0.6722414493560791, "learning_rate": 1.2761975819863152e-05, "loss": 0.0674, "step": 31691 }, { "epoch": 0.5612506475049476, "grad_norm": 0.6894975304603577, "learning_rate": 1.276112508918312e-05, "loss": 0.08, "step": 31692 }, { "epoch": 0.5612683570419761, "grad_norm": 0.9807912111282349, "learning_rate": 1.2760274365868757e-05, "loss": 0.0693, "step": 31693 }, { "epoch": 0.5612860665790045, "grad_norm": 0.6127585172653198, "learning_rate": 1.2759423649922868e-05, "loss": 0.0447, "step": 31694 }, { "epoch": 0.5613037761160329, "grad_norm": 0.6590135097503662, "learning_rate": 1.2758572941348247e-05, "loss": 0.0559, "step": 31695 }, { "epoch": 0.5613214856530613, "grad_norm": 0.4533591568470001, "learning_rate": 1.2757722240147698e-05, "loss": 0.0525, "step": 31696 }, { "epoch": 0.5613391951900898, "grad_norm": 0.4442882835865021, "learning_rate": 1.2756871546324012e-05, "loss": 0.0443, "step": 31697 }, { "epoch": 0.5613569047271182, "grad_norm": 0.9841101765632629, "learning_rate": 1.2756020859879992e-05, "loss": 0.1046, "step": 31698 }, { "epoch": 0.5613746142641466, "grad_norm": 0.6942728161811829, "learning_rate": 1.2755170180818441e-05, "loss": 0.0879, "step": 31699 }, { "epoch": 0.561392323801175, "grad_norm": 0.6900455951690674, "learning_rate": 1.2754319509142148e-05, "loss": 0.073, "step": 31700 }, { "epoch": 0.5614100333382035, "grad_norm": 0.5176229476928711, "learning_rate": 1.2753468844853914e-05, "loss": 0.0716, "step": 31701 }, { "epoch": 0.5614277428752319, "grad_norm": 0.7670465111732483, "learning_rate": 1.2752618187956543e-05, "loss": 0.0614, "step": 31702 }, { "epoch": 0.5614454524122603, "grad_norm": 0.6357258558273315, "learning_rate": 1.2751767538452838e-05, "loss": 0.087, "step": 31703 }, { "epoch": 0.5614631619492887, "grad_norm": 0.7401039600372314, "learning_rate": 1.2750916896345583e-05, "loss": 0.0508, "step": 31704 }, { "epoch": 0.5614808714863172, "grad_norm": 0.47277966141700745, "learning_rate": 1.2750066261637585e-05, "loss": 0.0826, "step": 31705 }, { "epoch": 0.5614985810233456, "grad_norm": 0.3978395462036133, "learning_rate": 1.274921563433164e-05, "loss": 0.0587, "step": 31706 }, { "epoch": 0.561516290560374, "grad_norm": 0.4028467833995819, "learning_rate": 1.2748365014430554e-05, "loss": 0.0631, "step": 31707 }, { "epoch": 0.5615340000974025, "grad_norm": 0.863757312297821, "learning_rate": 1.2747514401937113e-05, "loss": 0.095, "step": 31708 }, { "epoch": 0.5615517096344309, "grad_norm": 0.5985994338989258, "learning_rate": 1.2746663796854122e-05, "loss": 0.0631, "step": 31709 }, { "epoch": 0.5615694191714593, "grad_norm": 1.0301207304000854, "learning_rate": 1.2745813199184385e-05, "loss": 0.0584, "step": 31710 }, { "epoch": 0.5615871287084877, "grad_norm": 0.5626640915870667, "learning_rate": 1.2744962608930688e-05, "loss": 0.0588, "step": 31711 }, { "epoch": 0.5616048382455162, "grad_norm": 0.48396337032318115, "learning_rate": 1.2744112026095838e-05, "loss": 0.0431, "step": 31712 }, { "epoch": 0.5616225477825446, "grad_norm": 0.7907369136810303, "learning_rate": 1.2743261450682632e-05, "loss": 0.1033, "step": 31713 }, { "epoch": 0.561640257319573, "grad_norm": 1.1759111881256104, "learning_rate": 1.274241088269387e-05, "loss": 0.0972, "step": 31714 }, { "epoch": 0.5616579668566014, "grad_norm": 0.691327691078186, "learning_rate": 1.2741560322132343e-05, "loss": 0.0741, "step": 31715 }, { "epoch": 0.5616756763936299, "grad_norm": 0.5326399803161621, "learning_rate": 1.2740709769000856e-05, "loss": 0.0897, "step": 31716 }, { "epoch": 0.5616933859306583, "grad_norm": 0.6586509346961975, "learning_rate": 1.2739859223302211e-05, "loss": 0.0651, "step": 31717 }, { "epoch": 0.5617110954676867, "grad_norm": 0.5622912645339966, "learning_rate": 1.2739008685039193e-05, "loss": 0.0505, "step": 31718 }, { "epoch": 0.5617288050047151, "grad_norm": 0.4049296975135803, "learning_rate": 1.2738158154214611e-05, "loss": 0.0477, "step": 31719 }, { "epoch": 0.5617465145417436, "grad_norm": 0.45655280351638794, "learning_rate": 1.2737307630831257e-05, "loss": 0.0686, "step": 31720 }, { "epoch": 0.561764224078772, "grad_norm": 0.4441109597682953, "learning_rate": 1.2736457114891935e-05, "loss": 0.0611, "step": 31721 }, { "epoch": 0.5617819336158004, "grad_norm": 0.582535445690155, "learning_rate": 1.273560660639944e-05, "loss": 0.0487, "step": 31722 }, { "epoch": 0.5617996431528289, "grad_norm": 0.6479812860488892, "learning_rate": 1.2734756105356567e-05, "loss": 0.0524, "step": 31723 }, { "epoch": 0.5618173526898573, "grad_norm": 0.6066914796829224, "learning_rate": 1.2733905611766123e-05, "loss": 0.0605, "step": 31724 }, { "epoch": 0.5618350622268857, "grad_norm": 0.7687298059463501, "learning_rate": 1.2733055125630895e-05, "loss": 0.0922, "step": 31725 }, { "epoch": 0.5618527717639141, "grad_norm": 0.5634852647781372, "learning_rate": 1.2732204646953686e-05, "loss": 0.0763, "step": 31726 }, { "epoch": 0.5618704813009426, "grad_norm": 0.6188985109329224, "learning_rate": 1.2731354175737297e-05, "loss": 0.0957, "step": 31727 }, { "epoch": 0.561888190837971, "grad_norm": 0.4896889328956604, "learning_rate": 1.273050371198453e-05, "loss": 0.0387, "step": 31728 }, { "epoch": 0.5619059003749994, "grad_norm": 0.7361546158790588, "learning_rate": 1.2729653255698168e-05, "loss": 0.0607, "step": 31729 }, { "epoch": 0.5619236099120278, "grad_norm": 0.7402220368385315, "learning_rate": 1.2728802806881014e-05, "loss": 0.0399, "step": 31730 }, { "epoch": 0.5619413194490563, "grad_norm": 0.6708053946495056, "learning_rate": 1.2727952365535877e-05, "loss": 0.0716, "step": 31731 }, { "epoch": 0.5619590289860847, "grad_norm": 0.6904875040054321, "learning_rate": 1.2727101931665542e-05, "loss": 0.0823, "step": 31732 }, { "epoch": 0.5619767385231131, "grad_norm": 0.9743961691856384, "learning_rate": 1.2726251505272813e-05, "loss": 0.0772, "step": 31733 }, { "epoch": 0.5619944480601415, "grad_norm": 0.47866666316986084, "learning_rate": 1.2725401086360485e-05, "loss": 0.0396, "step": 31734 }, { "epoch": 0.56201215759717, "grad_norm": 0.733612060546875, "learning_rate": 1.2724550674931362e-05, "loss": 0.0508, "step": 31735 }, { "epoch": 0.5620298671341984, "grad_norm": 0.9381710290908813, "learning_rate": 1.2723700270988235e-05, "loss": 0.0784, "step": 31736 }, { "epoch": 0.5620475766712268, "grad_norm": 0.9247533082962036, "learning_rate": 1.2722849874533902e-05, "loss": 0.0586, "step": 31737 }, { "epoch": 0.5620652862082554, "grad_norm": 0.743974506855011, "learning_rate": 1.2721999485571165e-05, "loss": 0.0736, "step": 31738 }, { "epoch": 0.5620829957452838, "grad_norm": 0.7229824662208557, "learning_rate": 1.2721149104102816e-05, "loss": 0.0593, "step": 31739 }, { "epoch": 0.5621007052823122, "grad_norm": 0.7170383930206299, "learning_rate": 1.2720298730131657e-05, "loss": 0.0505, "step": 31740 }, { "epoch": 0.5621184148193406, "grad_norm": 0.6265930533409119, "learning_rate": 1.2719448363660484e-05, "loss": 0.0637, "step": 31741 }, { "epoch": 0.5621361243563691, "grad_norm": 0.6199805736541748, "learning_rate": 1.2718598004692103e-05, "loss": 0.0678, "step": 31742 }, { "epoch": 0.5621538338933975, "grad_norm": 0.24417419731616974, "learning_rate": 1.2717747653229295e-05, "loss": 0.05, "step": 31743 }, { "epoch": 0.5621715434304259, "grad_norm": 0.7471158504486084, "learning_rate": 1.2716897309274865e-05, "loss": 0.0661, "step": 31744 }, { "epoch": 0.5621892529674543, "grad_norm": 0.5573647022247314, "learning_rate": 1.2716046972831617e-05, "loss": 0.0674, "step": 31745 }, { "epoch": 0.5622069625044828, "grad_norm": 0.5133066177368164, "learning_rate": 1.2715196643902346e-05, "loss": 0.0491, "step": 31746 }, { "epoch": 0.5622246720415112, "grad_norm": 0.447662889957428, "learning_rate": 1.2714346322489843e-05, "loss": 0.0583, "step": 31747 }, { "epoch": 0.5622423815785396, "grad_norm": 0.49115246534347534, "learning_rate": 1.2713496008596907e-05, "loss": 0.072, "step": 31748 }, { "epoch": 0.562260091115568, "grad_norm": 0.9974036812782288, "learning_rate": 1.2712645702226343e-05, "loss": 0.0726, "step": 31749 }, { "epoch": 0.5622778006525965, "grad_norm": 1.205828309059143, "learning_rate": 1.271179540338094e-05, "loss": 0.0778, "step": 31750 }, { "epoch": 0.5622955101896249, "grad_norm": 0.6618037819862366, "learning_rate": 1.27109451120635e-05, "loss": 0.0657, "step": 31751 }, { "epoch": 0.5623132197266533, "grad_norm": 0.5066842436790466, "learning_rate": 1.2710094828276816e-05, "loss": 0.0511, "step": 31752 }, { "epoch": 0.5623309292636818, "grad_norm": 0.8759958744049072, "learning_rate": 1.2709244552023695e-05, "loss": 0.0644, "step": 31753 }, { "epoch": 0.5623486388007102, "grad_norm": 0.897202730178833, "learning_rate": 1.2708394283306922e-05, "loss": 0.082, "step": 31754 }, { "epoch": 0.5623663483377386, "grad_norm": 0.4765944182872772, "learning_rate": 1.27075440221293e-05, "loss": 0.0538, "step": 31755 }, { "epoch": 0.562384057874767, "grad_norm": 0.3106308877468109, "learning_rate": 1.2706693768493634e-05, "loss": 0.0584, "step": 31756 }, { "epoch": 0.5624017674117955, "grad_norm": 0.6178870797157288, "learning_rate": 1.2705843522402707e-05, "loss": 0.05, "step": 31757 }, { "epoch": 0.5624194769488239, "grad_norm": 0.5355246067047119, "learning_rate": 1.2704993283859321e-05, "loss": 0.0612, "step": 31758 }, { "epoch": 0.5624371864858523, "grad_norm": 0.2557358145713806, "learning_rate": 1.2704143052866272e-05, "loss": 0.0851, "step": 31759 }, { "epoch": 0.5624548960228807, "grad_norm": 0.6529178023338318, "learning_rate": 1.2703292829426372e-05, "loss": 0.0607, "step": 31760 }, { "epoch": 0.5624726055599092, "grad_norm": 0.791115939617157, "learning_rate": 1.27024426135424e-05, "loss": 0.0884, "step": 31761 }, { "epoch": 0.5624903150969376, "grad_norm": 0.690830647945404, "learning_rate": 1.2701592405217159e-05, "loss": 0.0953, "step": 31762 }, { "epoch": 0.562508024633966, "grad_norm": 0.35355597734451294, "learning_rate": 1.2700742204453447e-05, "loss": 0.0664, "step": 31763 }, { "epoch": 0.5625257341709944, "grad_norm": 0.6060256361961365, "learning_rate": 1.269989201125406e-05, "loss": 0.0773, "step": 31764 }, { "epoch": 0.5625434437080229, "grad_norm": 0.7503913640975952, "learning_rate": 1.2699041825621794e-05, "loss": 0.0742, "step": 31765 }, { "epoch": 0.5625611532450513, "grad_norm": 0.5168799161911011, "learning_rate": 1.269819164755945e-05, "loss": 0.0624, "step": 31766 }, { "epoch": 0.5625788627820797, "grad_norm": 0.36183616518974304, "learning_rate": 1.2697341477069823e-05, "loss": 0.0467, "step": 31767 }, { "epoch": 0.5625965723191082, "grad_norm": 0.6078453660011292, "learning_rate": 1.269649131415571e-05, "loss": 0.0889, "step": 31768 }, { "epoch": 0.5626142818561366, "grad_norm": 0.6332477927207947, "learning_rate": 1.2695641158819904e-05, "loss": 0.0601, "step": 31769 }, { "epoch": 0.562631991393165, "grad_norm": 0.842361569404602, "learning_rate": 1.2694791011065213e-05, "loss": 0.0728, "step": 31770 }, { "epoch": 0.5626497009301934, "grad_norm": 0.6303425431251526, "learning_rate": 1.2693940870894421e-05, "loss": 0.0538, "step": 31771 }, { "epoch": 0.5626674104672219, "grad_norm": 0.5307482481002808, "learning_rate": 1.269309073831033e-05, "loss": 0.0512, "step": 31772 }, { "epoch": 0.5626851200042503, "grad_norm": 0.8415747880935669, "learning_rate": 1.2692240613315733e-05, "loss": 0.0743, "step": 31773 }, { "epoch": 0.5627028295412787, "grad_norm": 1.0263363122940063, "learning_rate": 1.2691390495913441e-05, "loss": 0.0528, "step": 31774 }, { "epoch": 0.5627205390783071, "grad_norm": 0.6435808539390564, "learning_rate": 1.2690540386106234e-05, "loss": 0.0609, "step": 31775 }, { "epoch": 0.5627382486153356, "grad_norm": 0.5587572455406189, "learning_rate": 1.2689690283896918e-05, "loss": 0.0649, "step": 31776 }, { "epoch": 0.562755958152364, "grad_norm": 0.40529316663742065, "learning_rate": 1.2688840189288285e-05, "loss": 0.0633, "step": 31777 }, { "epoch": 0.5627736676893924, "grad_norm": 0.6520440578460693, "learning_rate": 1.2687990102283135e-05, "loss": 0.0864, "step": 31778 }, { "epoch": 0.5627913772264208, "grad_norm": 0.7994441390037537, "learning_rate": 1.2687140022884264e-05, "loss": 0.0571, "step": 31779 }, { "epoch": 0.5628090867634493, "grad_norm": 0.3354352116584778, "learning_rate": 1.2686289951094468e-05, "loss": 0.0431, "step": 31780 }, { "epoch": 0.5628267963004777, "grad_norm": 0.7525127530097961, "learning_rate": 1.2685439886916551e-05, "loss": 0.0666, "step": 31781 }, { "epoch": 0.5628445058375061, "grad_norm": 0.7504488825798035, "learning_rate": 1.2684589830353292e-05, "loss": 0.059, "step": 31782 }, { "epoch": 0.5628622153745346, "grad_norm": 0.6798394322395325, "learning_rate": 1.26837397814075e-05, "loss": 0.0599, "step": 31783 }, { "epoch": 0.562879924911563, "grad_norm": 0.5664766430854797, "learning_rate": 1.2682889740081971e-05, "loss": 0.1021, "step": 31784 }, { "epoch": 0.5628976344485914, "grad_norm": 0.5635769367218018, "learning_rate": 1.2682039706379508e-05, "loss": 0.0546, "step": 31785 }, { "epoch": 0.5629153439856198, "grad_norm": 0.5828762054443359, "learning_rate": 1.2681189680302894e-05, "loss": 0.0433, "step": 31786 }, { "epoch": 0.5629330535226483, "grad_norm": 0.8273183107376099, "learning_rate": 1.2680339661854929e-05, "loss": 0.0518, "step": 31787 }, { "epoch": 0.5629507630596767, "grad_norm": 0.731120765209198, "learning_rate": 1.2679489651038415e-05, "loss": 0.0394, "step": 31788 }, { "epoch": 0.5629684725967051, "grad_norm": 0.8685956001281738, "learning_rate": 1.2678639647856146e-05, "loss": 0.081, "step": 31789 }, { "epoch": 0.5629861821337335, "grad_norm": 0.4961591958999634, "learning_rate": 1.2677789652310912e-05, "loss": 0.0536, "step": 31790 }, { "epoch": 0.563003891670762, "grad_norm": 0.5834859013557434, "learning_rate": 1.2676939664405519e-05, "loss": 0.0788, "step": 31791 }, { "epoch": 0.5630216012077904, "grad_norm": 0.6316567659378052, "learning_rate": 1.2676089684142762e-05, "loss": 0.0452, "step": 31792 }, { "epoch": 0.5630393107448188, "grad_norm": 0.9041735529899597, "learning_rate": 1.2675239711525433e-05, "loss": 0.0721, "step": 31793 }, { "epoch": 0.5630570202818472, "grad_norm": 0.7610386610031128, "learning_rate": 1.2674389746556329e-05, "loss": 0.0436, "step": 31794 }, { "epoch": 0.5630747298188757, "grad_norm": 0.9637402892112732, "learning_rate": 1.2673539789238253e-05, "loss": 0.0745, "step": 31795 }, { "epoch": 0.5630924393559041, "grad_norm": 0.6324886083602905, "learning_rate": 1.2672689839573988e-05, "loss": 0.0506, "step": 31796 }, { "epoch": 0.5631101488929325, "grad_norm": 0.7602875232696533, "learning_rate": 1.267183989756634e-05, "loss": 0.0781, "step": 31797 }, { "epoch": 0.563127858429961, "grad_norm": 0.27289649844169617, "learning_rate": 1.2670989963218103e-05, "loss": 0.0823, "step": 31798 }, { "epoch": 0.5631455679669894, "grad_norm": 0.5495485663414001, "learning_rate": 1.267014003653208e-05, "loss": 0.0459, "step": 31799 }, { "epoch": 0.5631632775040178, "grad_norm": 0.5835631489753723, "learning_rate": 1.2669290117511056e-05, "loss": 0.0779, "step": 31800 }, { "epoch": 0.5631809870410462, "grad_norm": 0.520740807056427, "learning_rate": 1.266844020615783e-05, "loss": 0.0613, "step": 31801 }, { "epoch": 0.5631986965780748, "grad_norm": 1.1490267515182495, "learning_rate": 1.2667590302475201e-05, "loss": 0.0848, "step": 31802 }, { "epoch": 0.5632164061151032, "grad_norm": 0.810279130935669, "learning_rate": 1.2666740406465966e-05, "loss": 0.1144, "step": 31803 }, { "epoch": 0.5632341156521316, "grad_norm": 0.6853789687156677, "learning_rate": 1.2665890518132916e-05, "loss": 0.0838, "step": 31804 }, { "epoch": 0.56325182518916, "grad_norm": 0.38965246081352234, "learning_rate": 1.266504063747885e-05, "loss": 0.0525, "step": 31805 }, { "epoch": 0.5632695347261885, "grad_norm": 0.8613364696502686, "learning_rate": 1.2664190764506567e-05, "loss": 0.0768, "step": 31806 }, { "epoch": 0.5632872442632169, "grad_norm": 0.506119966506958, "learning_rate": 1.2663340899218858e-05, "loss": 0.0874, "step": 31807 }, { "epoch": 0.5633049538002453, "grad_norm": 0.5116607546806335, "learning_rate": 1.2662491041618522e-05, "loss": 0.0758, "step": 31808 }, { "epoch": 0.5633226633372737, "grad_norm": 0.7739100456237793, "learning_rate": 1.2661641191708352e-05, "loss": 0.0798, "step": 31809 }, { "epoch": 0.5633403728743022, "grad_norm": 0.6708866953849792, "learning_rate": 1.2660791349491152e-05, "loss": 0.0943, "step": 31810 }, { "epoch": 0.5633580824113306, "grad_norm": 0.8505311012268066, "learning_rate": 1.2659941514969702e-05, "loss": 0.0693, "step": 31811 }, { "epoch": 0.563375791948359, "grad_norm": 0.6889128088951111, "learning_rate": 1.2659091688146813e-05, "loss": 0.0907, "step": 31812 }, { "epoch": 0.5633935014853875, "grad_norm": 0.5682324767112732, "learning_rate": 1.2658241869025283e-05, "loss": 0.0648, "step": 31813 }, { "epoch": 0.5634112110224159, "grad_norm": 0.14995333552360535, "learning_rate": 1.2657392057607891e-05, "loss": 0.0485, "step": 31814 }, { "epoch": 0.5634289205594443, "grad_norm": 0.8222634196281433, "learning_rate": 1.2656542253897445e-05, "loss": 0.098, "step": 31815 }, { "epoch": 0.5634466300964727, "grad_norm": 0.996379554271698, "learning_rate": 1.2655692457896736e-05, "loss": 0.084, "step": 31816 }, { "epoch": 0.5634643396335012, "grad_norm": 0.633693277835846, "learning_rate": 1.2654842669608565e-05, "loss": 0.0802, "step": 31817 }, { "epoch": 0.5634820491705296, "grad_norm": 0.4037800431251526, "learning_rate": 1.2653992889035721e-05, "loss": 0.0519, "step": 31818 }, { "epoch": 0.563499758707558, "grad_norm": 0.8850095272064209, "learning_rate": 1.2653143116181004e-05, "loss": 0.0628, "step": 31819 }, { "epoch": 0.5635174682445864, "grad_norm": 0.40398305654525757, "learning_rate": 1.2652293351047213e-05, "loss": 0.0406, "step": 31820 }, { "epoch": 0.5635351777816149, "grad_norm": 0.870560884475708, "learning_rate": 1.2651443593637135e-05, "loss": 0.0716, "step": 31821 }, { "epoch": 0.5635528873186433, "grad_norm": 0.5683002471923828, "learning_rate": 1.2650593843953569e-05, "loss": 0.0666, "step": 31822 }, { "epoch": 0.5635705968556717, "grad_norm": 1.1391685009002686, "learning_rate": 1.2649744101999313e-05, "loss": 0.0966, "step": 31823 }, { "epoch": 0.5635883063927001, "grad_norm": 0.890440046787262, "learning_rate": 1.2648894367777168e-05, "loss": 0.0684, "step": 31824 }, { "epoch": 0.5636060159297286, "grad_norm": 0.7842850685119629, "learning_rate": 1.264804464128991e-05, "loss": 0.0623, "step": 31825 }, { "epoch": 0.563623725466757, "grad_norm": 0.7618728280067444, "learning_rate": 1.2647194922540354e-05, "loss": 0.0645, "step": 31826 }, { "epoch": 0.5636414350037854, "grad_norm": 0.5041856169700623, "learning_rate": 1.2646345211531294e-05, "loss": 0.0767, "step": 31827 }, { "epoch": 0.5636591445408139, "grad_norm": 0.9275258183479309, "learning_rate": 1.2645495508265514e-05, "loss": 0.0794, "step": 31828 }, { "epoch": 0.5636768540778423, "grad_norm": 0.6244298219680786, "learning_rate": 1.2644645812745815e-05, "loss": 0.0632, "step": 31829 }, { "epoch": 0.5636945636148707, "grad_norm": 0.6459617018699646, "learning_rate": 1.2643796124974995e-05, "loss": 0.0652, "step": 31830 }, { "epoch": 0.5637122731518991, "grad_norm": 0.7136498093605042, "learning_rate": 1.2642946444955848e-05, "loss": 0.0913, "step": 31831 }, { "epoch": 0.5637299826889276, "grad_norm": 0.7566790580749512, "learning_rate": 1.2642096772691166e-05, "loss": 0.0631, "step": 31832 }, { "epoch": 0.563747692225956, "grad_norm": 0.40746745467185974, "learning_rate": 1.2641247108183748e-05, "loss": 0.0756, "step": 31833 }, { "epoch": 0.5637654017629844, "grad_norm": 0.6347885727882385, "learning_rate": 1.264039745143639e-05, "loss": 0.0666, "step": 31834 }, { "epoch": 0.5637831113000128, "grad_norm": 0.54966139793396, "learning_rate": 1.2639547802451886e-05, "loss": 0.0858, "step": 31835 }, { "epoch": 0.5638008208370413, "grad_norm": 0.7391445636749268, "learning_rate": 1.2638698161233027e-05, "loss": 0.0837, "step": 31836 }, { "epoch": 0.5638185303740697, "grad_norm": 0.7671618461608887, "learning_rate": 1.2637848527782615e-05, "loss": 0.0715, "step": 31837 }, { "epoch": 0.5638362399110981, "grad_norm": 0.5994940996170044, "learning_rate": 1.2636998902103447e-05, "loss": 0.0801, "step": 31838 }, { "epoch": 0.5638539494481265, "grad_norm": 0.5368624925613403, "learning_rate": 1.2636149284198309e-05, "loss": 0.0928, "step": 31839 }, { "epoch": 0.563871658985155, "grad_norm": 0.40501970052719116, "learning_rate": 1.2635299674069997e-05, "loss": 0.051, "step": 31840 }, { "epoch": 0.5638893685221834, "grad_norm": 0.8063758015632629, "learning_rate": 1.263445007172132e-05, "loss": 0.0598, "step": 31841 }, { "epoch": 0.5639070780592118, "grad_norm": 1.8019218444824219, "learning_rate": 1.2633600477155054e-05, "loss": 0.0775, "step": 31842 }, { "epoch": 0.5639247875962403, "grad_norm": 0.6010292768478394, "learning_rate": 1.2632750890374006e-05, "loss": 0.0658, "step": 31843 }, { "epoch": 0.5639424971332687, "grad_norm": 1.5713094472885132, "learning_rate": 1.2631901311380966e-05, "loss": 0.0923, "step": 31844 }, { "epoch": 0.5639602066702971, "grad_norm": 2.7337472438812256, "learning_rate": 1.2631051740178736e-05, "loss": 0.08, "step": 31845 }, { "epoch": 0.5639779162073255, "grad_norm": 1.0249899625778198, "learning_rate": 1.2630202176770101e-05, "loss": 0.0874, "step": 31846 }, { "epoch": 0.563995625744354, "grad_norm": 1.6131161451339722, "learning_rate": 1.2629352621157865e-05, "loss": 0.1053, "step": 31847 }, { "epoch": 0.5640133352813824, "grad_norm": 0.5454382300376892, "learning_rate": 1.2628503073344816e-05, "loss": 0.071, "step": 31848 }, { "epoch": 0.5640310448184108, "grad_norm": 0.5401921272277832, "learning_rate": 1.2627653533333756e-05, "loss": 0.0745, "step": 31849 }, { "epoch": 0.5640487543554392, "grad_norm": 0.6956915855407715, "learning_rate": 1.2626804001127472e-05, "loss": 0.0823, "step": 31850 }, { "epoch": 0.5640664638924677, "grad_norm": 0.6184582710266113, "learning_rate": 1.2625954476728762e-05, "loss": 0.0631, "step": 31851 }, { "epoch": 0.5640841734294961, "grad_norm": 0.3087574541568756, "learning_rate": 1.2625104960140431e-05, "loss": 0.0531, "step": 31852 }, { "epoch": 0.5641018829665245, "grad_norm": 0.43683773279190063, "learning_rate": 1.2624255451365256e-05, "loss": 0.067, "step": 31853 }, { "epoch": 0.5641195925035529, "grad_norm": 1.1491707563400269, "learning_rate": 1.2623405950406038e-05, "loss": 0.0641, "step": 31854 }, { "epoch": 0.5641373020405814, "grad_norm": 0.7812311053276062, "learning_rate": 1.2622556457265576e-05, "loss": 0.0964, "step": 31855 }, { "epoch": 0.5641550115776098, "grad_norm": 0.6475468873977661, "learning_rate": 1.2621706971946671e-05, "loss": 0.0759, "step": 31856 }, { "epoch": 0.5641727211146382, "grad_norm": 0.5180601477622986, "learning_rate": 1.2620857494452102e-05, "loss": 0.083, "step": 31857 }, { "epoch": 0.5641904306516667, "grad_norm": 0.7441869378089905, "learning_rate": 1.2620008024784672e-05, "loss": 0.0713, "step": 31858 }, { "epoch": 0.5642081401886951, "grad_norm": 1.206932544708252, "learning_rate": 1.2619158562947176e-05, "loss": 0.0557, "step": 31859 }, { "epoch": 0.5642258497257235, "grad_norm": 0.7701668739318848, "learning_rate": 1.2618309108942405e-05, "loss": 0.1031, "step": 31860 }, { "epoch": 0.5642435592627519, "grad_norm": 0.6000317335128784, "learning_rate": 1.2617459662773158e-05, "loss": 0.0427, "step": 31861 }, { "epoch": 0.5642612687997804, "grad_norm": 1.0173475742340088, "learning_rate": 1.2616610224442226e-05, "loss": 0.0515, "step": 31862 }, { "epoch": 0.5642789783368088, "grad_norm": 0.8924068212509155, "learning_rate": 1.261576079395241e-05, "loss": 0.0764, "step": 31863 }, { "epoch": 0.5642966878738372, "grad_norm": 0.6240417957305908, "learning_rate": 1.2614911371306496e-05, "loss": 0.0801, "step": 31864 }, { "epoch": 0.5643143974108656, "grad_norm": 0.7097069621086121, "learning_rate": 1.2614061956507282e-05, "loss": 0.0673, "step": 31865 }, { "epoch": 0.5643321069478942, "grad_norm": 0.8440859317779541, "learning_rate": 1.2613212549557567e-05, "loss": 0.0976, "step": 31866 }, { "epoch": 0.5643498164849226, "grad_norm": 0.6786975860595703, "learning_rate": 1.261236315046014e-05, "loss": 0.0585, "step": 31867 }, { "epoch": 0.564367526021951, "grad_norm": 0.486811101436615, "learning_rate": 1.2611513759217792e-05, "loss": 0.0531, "step": 31868 }, { "epoch": 0.5643852355589793, "grad_norm": 0.7124269604682922, "learning_rate": 1.2610664375833322e-05, "loss": 0.0591, "step": 31869 }, { "epoch": 0.5644029450960079, "grad_norm": 0.681239128112793, "learning_rate": 1.2609815000309533e-05, "loss": 0.0704, "step": 31870 }, { "epoch": 0.5644206546330363, "grad_norm": 0.5242125988006592, "learning_rate": 1.2608965632649205e-05, "loss": 0.0248, "step": 31871 }, { "epoch": 0.5644383641700647, "grad_norm": 0.8139170408248901, "learning_rate": 1.2608116272855138e-05, "loss": 0.0699, "step": 31872 }, { "epoch": 0.5644560737070932, "grad_norm": 0.7268623113632202, "learning_rate": 1.260726692093013e-05, "loss": 0.0747, "step": 31873 }, { "epoch": 0.5644737832441216, "grad_norm": 0.5801298022270203, "learning_rate": 1.2606417576876967e-05, "loss": 0.0713, "step": 31874 }, { "epoch": 0.56449149278115, "grad_norm": 0.8940760493278503, "learning_rate": 1.260556824069845e-05, "loss": 0.0507, "step": 31875 }, { "epoch": 0.5645092023181784, "grad_norm": 0.6438860297203064, "learning_rate": 1.260471891239737e-05, "loss": 0.067, "step": 31876 }, { "epoch": 0.5645269118552069, "grad_norm": 0.5514670610427856, "learning_rate": 1.2603869591976526e-05, "loss": 0.0846, "step": 31877 }, { "epoch": 0.5645446213922353, "grad_norm": 0.5082943439483643, "learning_rate": 1.2603020279438706e-05, "loss": 0.0384, "step": 31878 }, { "epoch": 0.5645623309292637, "grad_norm": 0.7493080496788025, "learning_rate": 1.2602170974786706e-05, "loss": 0.0521, "step": 31879 }, { "epoch": 0.5645800404662921, "grad_norm": 0.49398455023765564, "learning_rate": 1.2601321678023321e-05, "loss": 0.0594, "step": 31880 }, { "epoch": 0.5645977500033206, "grad_norm": 1.145584225654602, "learning_rate": 1.2600472389151352e-05, "loss": 0.1045, "step": 31881 }, { "epoch": 0.564615459540349, "grad_norm": 0.7353256344795227, "learning_rate": 1.259962310817358e-05, "loss": 0.0716, "step": 31882 }, { "epoch": 0.5646331690773774, "grad_norm": 0.8008496761322021, "learning_rate": 1.2598773835092801e-05, "loss": 0.0972, "step": 31883 }, { "epoch": 0.5646508786144058, "grad_norm": 0.6947250962257385, "learning_rate": 1.2597924569911825e-05, "loss": 0.0567, "step": 31884 }, { "epoch": 0.5646685881514343, "grad_norm": 0.8290883302688599, "learning_rate": 1.2597075312633425e-05, "loss": 0.0958, "step": 31885 }, { "epoch": 0.5646862976884627, "grad_norm": 0.610072672367096, "learning_rate": 1.2596226063260406e-05, "loss": 0.0554, "step": 31886 }, { "epoch": 0.5647040072254911, "grad_norm": 0.37480753660202026, "learning_rate": 1.2595376821795559e-05, "loss": 0.0658, "step": 31887 }, { "epoch": 0.5647217167625196, "grad_norm": 0.5308910012245178, "learning_rate": 1.2594527588241683e-05, "loss": 0.0679, "step": 31888 }, { "epoch": 0.564739426299548, "grad_norm": 0.6380571126937866, "learning_rate": 1.2593678362601565e-05, "loss": 0.063, "step": 31889 }, { "epoch": 0.5647571358365764, "grad_norm": 0.5619276762008667, "learning_rate": 1.2592829144878003e-05, "loss": 0.0713, "step": 31890 }, { "epoch": 0.5647748453736048, "grad_norm": 0.6994465589523315, "learning_rate": 1.259197993507379e-05, "loss": 0.0681, "step": 31891 }, { "epoch": 0.5647925549106333, "grad_norm": 0.7459947466850281, "learning_rate": 1.2591130733191718e-05, "loss": 0.0589, "step": 31892 }, { "epoch": 0.5648102644476617, "grad_norm": 1.2333402633666992, "learning_rate": 1.2590281539234581e-05, "loss": 0.0678, "step": 31893 }, { "epoch": 0.5648279739846901, "grad_norm": 0.5404103994369507, "learning_rate": 1.2589432353205176e-05, "loss": 0.0931, "step": 31894 }, { "epoch": 0.5648456835217185, "grad_norm": 0.7926644682884216, "learning_rate": 1.2588583175106298e-05, "loss": 0.0663, "step": 31895 }, { "epoch": 0.564863393058747, "grad_norm": 0.8423616290092468, "learning_rate": 1.2587734004940734e-05, "loss": 0.07, "step": 31896 }, { "epoch": 0.5648811025957754, "grad_norm": 0.4492044746875763, "learning_rate": 1.258688484271128e-05, "loss": 0.0942, "step": 31897 }, { "epoch": 0.5648988121328038, "grad_norm": 0.951795220375061, "learning_rate": 1.2586035688420736e-05, "loss": 0.0702, "step": 31898 }, { "epoch": 0.5649165216698322, "grad_norm": 0.6779451966285706, "learning_rate": 1.2585186542071883e-05, "loss": 0.0767, "step": 31899 }, { "epoch": 0.5649342312068607, "grad_norm": 1.0295612812042236, "learning_rate": 1.2584337403667526e-05, "loss": 0.0835, "step": 31900 }, { "epoch": 0.5649519407438891, "grad_norm": 0.3667888045310974, "learning_rate": 1.2583488273210451e-05, "loss": 0.0462, "step": 31901 }, { "epoch": 0.5649696502809175, "grad_norm": 0.5519828200340271, "learning_rate": 1.258263915070346e-05, "loss": 0.112, "step": 31902 }, { "epoch": 0.564987359817946, "grad_norm": 0.7924495339393616, "learning_rate": 1.258179003614934e-05, "loss": 0.0576, "step": 31903 }, { "epoch": 0.5650050693549744, "grad_norm": 0.9860209822654724, "learning_rate": 1.2580940929550885e-05, "loss": 0.0827, "step": 31904 }, { "epoch": 0.5650227788920028, "grad_norm": 0.5026848912239075, "learning_rate": 1.2580091830910895e-05, "loss": 0.0688, "step": 31905 }, { "epoch": 0.5650404884290312, "grad_norm": 0.47222912311553955, "learning_rate": 1.257924274023215e-05, "loss": 0.076, "step": 31906 }, { "epoch": 0.5650581979660597, "grad_norm": 0.5006563663482666, "learning_rate": 1.2578393657517454e-05, "loss": 0.0584, "step": 31907 }, { "epoch": 0.5650759075030881, "grad_norm": 0.6092709898948669, "learning_rate": 1.25775445827696e-05, "loss": 0.0481, "step": 31908 }, { "epoch": 0.5650936170401165, "grad_norm": 0.8741694092750549, "learning_rate": 1.2576695515991383e-05, "loss": 0.0684, "step": 31909 }, { "epoch": 0.5651113265771449, "grad_norm": 0.5849668979644775, "learning_rate": 1.2575846457185588e-05, "loss": 0.0411, "step": 31910 }, { "epoch": 0.5651290361141734, "grad_norm": 0.3321058750152588, "learning_rate": 1.2574997406355013e-05, "loss": 0.0517, "step": 31911 }, { "epoch": 0.5651467456512018, "grad_norm": 0.6024765968322754, "learning_rate": 1.2574148363502453e-05, "loss": 0.069, "step": 31912 }, { "epoch": 0.5651644551882302, "grad_norm": 0.6230045557022095, "learning_rate": 1.2573299328630697e-05, "loss": 0.092, "step": 31913 }, { "epoch": 0.5651821647252586, "grad_norm": 0.3448255658149719, "learning_rate": 1.257245030174254e-05, "loss": 0.0701, "step": 31914 }, { "epoch": 0.5651998742622871, "grad_norm": 0.6003791689872742, "learning_rate": 1.2571601282840777e-05, "loss": 0.1004, "step": 31915 }, { "epoch": 0.5652175837993155, "grad_norm": 0.6983773112297058, "learning_rate": 1.2570752271928202e-05, "loss": 0.061, "step": 31916 }, { "epoch": 0.5652352933363439, "grad_norm": 0.6591753363609314, "learning_rate": 1.2569903269007606e-05, "loss": 0.0676, "step": 31917 }, { "epoch": 0.5652530028733724, "grad_norm": 0.5635102391242981, "learning_rate": 1.256905427408178e-05, "loss": 0.0487, "step": 31918 }, { "epoch": 0.5652707124104008, "grad_norm": 0.4864956736564636, "learning_rate": 1.256820528715352e-05, "loss": 0.0889, "step": 31919 }, { "epoch": 0.5652884219474292, "grad_norm": 0.722679078578949, "learning_rate": 1.2567356308225622e-05, "loss": 0.0926, "step": 31920 }, { "epoch": 0.5653061314844576, "grad_norm": 0.7308098077774048, "learning_rate": 1.2566507337300873e-05, "loss": 0.0546, "step": 31921 }, { "epoch": 0.5653238410214861, "grad_norm": 0.5587587952613831, "learning_rate": 1.2565658374382067e-05, "loss": 0.066, "step": 31922 }, { "epoch": 0.5653415505585145, "grad_norm": 0.565971314907074, "learning_rate": 1.2564809419472006e-05, "loss": 0.0864, "step": 31923 }, { "epoch": 0.5653592600955429, "grad_norm": 0.4974243938922882, "learning_rate": 1.256396047257347e-05, "loss": 0.0675, "step": 31924 }, { "epoch": 0.5653769696325713, "grad_norm": 0.6605293154716492, "learning_rate": 1.2563111533689258e-05, "loss": 0.076, "step": 31925 }, { "epoch": 0.5653946791695998, "grad_norm": 0.5161436200141907, "learning_rate": 1.2562262602822164e-05, "loss": 0.0575, "step": 31926 }, { "epoch": 0.5654123887066282, "grad_norm": 0.3417125642299652, "learning_rate": 1.2561413679974979e-05, "loss": 0.0455, "step": 31927 }, { "epoch": 0.5654300982436566, "grad_norm": 0.7088237404823303, "learning_rate": 1.2560564765150495e-05, "loss": 0.0635, "step": 31928 }, { "epoch": 0.565447807780685, "grad_norm": 0.5783767104148865, "learning_rate": 1.2559715858351507e-05, "loss": 0.0531, "step": 31929 }, { "epoch": 0.5654655173177136, "grad_norm": 0.4949658215045929, "learning_rate": 1.2558866959580808e-05, "loss": 0.1088, "step": 31930 }, { "epoch": 0.565483226854742, "grad_norm": 0.8113298416137695, "learning_rate": 1.2558018068841188e-05, "loss": 0.082, "step": 31931 }, { "epoch": 0.5655009363917703, "grad_norm": 0.7976177930831909, "learning_rate": 1.255716918613544e-05, "loss": 0.063, "step": 31932 }, { "epoch": 0.5655186459287989, "grad_norm": 0.708585262298584, "learning_rate": 1.2556320311466361e-05, "loss": 0.0633, "step": 31933 }, { "epoch": 0.5655363554658273, "grad_norm": 0.45964789390563965, "learning_rate": 1.2555471444836746e-05, "loss": 0.082, "step": 31934 }, { "epoch": 0.5655540650028557, "grad_norm": 0.5985620021820068, "learning_rate": 1.2554622586249374e-05, "loss": 0.0653, "step": 31935 }, { "epoch": 0.565571774539884, "grad_norm": 0.8138004541397095, "learning_rate": 1.2553773735707047e-05, "loss": 0.1032, "step": 31936 }, { "epoch": 0.5655894840769126, "grad_norm": 0.35742703080177307, "learning_rate": 1.2552924893212566e-05, "loss": 0.0494, "step": 31937 }, { "epoch": 0.565607193613941, "grad_norm": 0.8557983040809631, "learning_rate": 1.2552076058768707e-05, "loss": 0.0918, "step": 31938 }, { "epoch": 0.5656249031509694, "grad_norm": 0.7449016571044922, "learning_rate": 1.2551227232378271e-05, "loss": 0.0644, "step": 31939 }, { "epoch": 0.5656426126879978, "grad_norm": 0.5369024276733398, "learning_rate": 1.2550378414044049e-05, "loss": 0.0825, "step": 31940 }, { "epoch": 0.5656603222250263, "grad_norm": 0.7308081984519958, "learning_rate": 1.2549529603768837e-05, "loss": 0.0779, "step": 31941 }, { "epoch": 0.5656780317620547, "grad_norm": 0.2461688220500946, "learning_rate": 1.2548680801555422e-05, "loss": 0.039, "step": 31942 }, { "epoch": 0.5656957412990831, "grad_norm": 0.6872522830963135, "learning_rate": 1.25478320074066e-05, "loss": 0.064, "step": 31943 }, { "epoch": 0.5657134508361115, "grad_norm": 0.8946845531463623, "learning_rate": 1.2546983221325164e-05, "loss": 0.0947, "step": 31944 }, { "epoch": 0.56573116037314, "grad_norm": 0.9282834529876709, "learning_rate": 1.2546134443313903e-05, "loss": 0.082, "step": 31945 }, { "epoch": 0.5657488699101684, "grad_norm": 0.4867679476737976, "learning_rate": 1.2545285673375613e-05, "loss": 0.079, "step": 31946 }, { "epoch": 0.5657665794471968, "grad_norm": 0.5940490961074829, "learning_rate": 1.2544436911513083e-05, "loss": 0.0441, "step": 31947 }, { "epoch": 0.5657842889842253, "grad_norm": 0.6209051609039307, "learning_rate": 1.2543588157729114e-05, "loss": 0.0679, "step": 31948 }, { "epoch": 0.5658019985212537, "grad_norm": 0.40880000591278076, "learning_rate": 1.2542739412026483e-05, "loss": 0.0326, "step": 31949 }, { "epoch": 0.5658197080582821, "grad_norm": 0.30431097745895386, "learning_rate": 1.2541890674407992e-05, "loss": 0.0544, "step": 31950 }, { "epoch": 0.5658374175953105, "grad_norm": 0.5518105626106262, "learning_rate": 1.2541041944876433e-05, "loss": 0.0518, "step": 31951 }, { "epoch": 0.565855127132339, "grad_norm": 0.6064988970756531, "learning_rate": 1.2540193223434604e-05, "loss": 0.0616, "step": 31952 }, { "epoch": 0.5658728366693674, "grad_norm": 0.7656648755073547, "learning_rate": 1.2539344510085285e-05, "loss": 0.094, "step": 31953 }, { "epoch": 0.5658905462063958, "grad_norm": 0.8605470061302185, "learning_rate": 1.2538495804831272e-05, "loss": 0.0762, "step": 31954 }, { "epoch": 0.5659082557434242, "grad_norm": 0.6748242378234863, "learning_rate": 1.2537647107675362e-05, "loss": 0.0893, "step": 31955 }, { "epoch": 0.5659259652804527, "grad_norm": 0.6534261107444763, "learning_rate": 1.2536798418620341e-05, "loss": 0.0805, "step": 31956 }, { "epoch": 0.5659436748174811, "grad_norm": 0.4709051549434662, "learning_rate": 1.2535949737669005e-05, "loss": 0.0485, "step": 31957 }, { "epoch": 0.5659613843545095, "grad_norm": 0.7179385423660278, "learning_rate": 1.2535101064824145e-05, "loss": 0.0852, "step": 31958 }, { "epoch": 0.5659790938915379, "grad_norm": 0.5963732600212097, "learning_rate": 1.2534252400088555e-05, "loss": 0.101, "step": 31959 }, { "epoch": 0.5659968034285664, "grad_norm": 0.833329439163208, "learning_rate": 1.2533403743465025e-05, "loss": 0.0597, "step": 31960 }, { "epoch": 0.5660145129655948, "grad_norm": 0.33534735441207886, "learning_rate": 1.2532555094956345e-05, "loss": 0.0277, "step": 31961 }, { "epoch": 0.5660322225026232, "grad_norm": 0.6636465191841125, "learning_rate": 1.2531706454565315e-05, "loss": 0.0727, "step": 31962 }, { "epoch": 0.5660499320396517, "grad_norm": 0.7435613870620728, "learning_rate": 1.2530857822294714e-05, "loss": 0.0758, "step": 31963 }, { "epoch": 0.5660676415766801, "grad_norm": 0.6280508637428284, "learning_rate": 1.253000919814734e-05, "loss": 0.0668, "step": 31964 }, { "epoch": 0.5660853511137085, "grad_norm": 0.491065114736557, "learning_rate": 1.2529160582125989e-05, "loss": 0.0745, "step": 31965 }, { "epoch": 0.5661030606507369, "grad_norm": 0.9356680512428284, "learning_rate": 1.2528311974233455e-05, "loss": 0.0586, "step": 31966 }, { "epoch": 0.5661207701877654, "grad_norm": 0.7655166983604431, "learning_rate": 1.252746337447252e-05, "loss": 0.0898, "step": 31967 }, { "epoch": 0.5661384797247938, "grad_norm": 0.5523207783699036, "learning_rate": 1.2526614782845978e-05, "loss": 0.0665, "step": 31968 }, { "epoch": 0.5661561892618222, "grad_norm": 0.9822039604187012, "learning_rate": 1.2525766199356627e-05, "loss": 0.0798, "step": 31969 }, { "epoch": 0.5661738987988506, "grad_norm": 0.5675738453865051, "learning_rate": 1.2524917624007252e-05, "loss": 0.0714, "step": 31970 }, { "epoch": 0.5661916083358791, "grad_norm": 0.8721292018890381, "learning_rate": 1.252406905680065e-05, "loss": 0.0704, "step": 31971 }, { "epoch": 0.5662093178729075, "grad_norm": 0.712692141532898, "learning_rate": 1.2523220497739606e-05, "loss": 0.0876, "step": 31972 }, { "epoch": 0.5662270274099359, "grad_norm": 0.47113141417503357, "learning_rate": 1.2522371946826923e-05, "loss": 0.0653, "step": 31973 }, { "epoch": 0.5662447369469643, "grad_norm": 0.5608629584312439, "learning_rate": 1.2521523404065382e-05, "loss": 0.0793, "step": 31974 }, { "epoch": 0.5662624464839928, "grad_norm": 0.5170263648033142, "learning_rate": 1.2520674869457778e-05, "loss": 0.0658, "step": 31975 }, { "epoch": 0.5662801560210212, "grad_norm": 1.0049717426300049, "learning_rate": 1.2519826343006908e-05, "loss": 0.0502, "step": 31976 }, { "epoch": 0.5662978655580496, "grad_norm": 0.990394115447998, "learning_rate": 1.2518977824715552e-05, "loss": 0.0665, "step": 31977 }, { "epoch": 0.5663155750950781, "grad_norm": 0.5841243267059326, "learning_rate": 1.2518129314586506e-05, "loss": 0.0345, "step": 31978 }, { "epoch": 0.5663332846321065, "grad_norm": 0.6292685270309448, "learning_rate": 1.2517280812622566e-05, "loss": 0.0702, "step": 31979 }, { "epoch": 0.5663509941691349, "grad_norm": 1.0295480489730835, "learning_rate": 1.2516432318826529e-05, "loss": 0.0764, "step": 31980 }, { "epoch": 0.5663687037061633, "grad_norm": 0.5155828595161438, "learning_rate": 1.251558383320117e-05, "loss": 0.0538, "step": 31981 }, { "epoch": 0.5663864132431918, "grad_norm": 0.5749263167381287, "learning_rate": 1.251473535574929e-05, "loss": 0.0484, "step": 31982 }, { "epoch": 0.5664041227802202, "grad_norm": 0.7255105376243591, "learning_rate": 1.2513886886473678e-05, "loss": 0.0721, "step": 31983 }, { "epoch": 0.5664218323172486, "grad_norm": 0.5744431018829346, "learning_rate": 1.2513038425377133e-05, "loss": 0.0712, "step": 31984 }, { "epoch": 0.566439541854277, "grad_norm": 0.4387868344783783, "learning_rate": 1.2512189972462433e-05, "loss": 0.0657, "step": 31985 }, { "epoch": 0.5664572513913055, "grad_norm": 0.6005809903144836, "learning_rate": 1.2511341527732379e-05, "loss": 0.0581, "step": 31986 }, { "epoch": 0.5664749609283339, "grad_norm": 0.3832448422908783, "learning_rate": 1.2510493091189762e-05, "loss": 0.0416, "step": 31987 }, { "epoch": 0.5664926704653623, "grad_norm": 0.7066106796264648, "learning_rate": 1.2509644662837368e-05, "loss": 0.0676, "step": 31988 }, { "epoch": 0.5665103800023907, "grad_norm": 0.3073945641517639, "learning_rate": 1.250879624267799e-05, "loss": 0.0672, "step": 31989 }, { "epoch": 0.5665280895394192, "grad_norm": 0.5656851530075073, "learning_rate": 1.2507947830714422e-05, "loss": 0.0609, "step": 31990 }, { "epoch": 0.5665457990764476, "grad_norm": 0.6717868447303772, "learning_rate": 1.250709942694946e-05, "loss": 0.074, "step": 31991 }, { "epoch": 0.566563508613476, "grad_norm": 0.38901272416114807, "learning_rate": 1.2506251031385881e-05, "loss": 0.0593, "step": 31992 }, { "epoch": 0.5665812181505046, "grad_norm": 0.4152493178844452, "learning_rate": 1.2505402644026483e-05, "loss": 0.0721, "step": 31993 }, { "epoch": 0.566598927687533, "grad_norm": 0.40759265422821045, "learning_rate": 1.2504554264874065e-05, "loss": 0.0554, "step": 31994 }, { "epoch": 0.5666166372245613, "grad_norm": 0.739037811756134, "learning_rate": 1.2503705893931405e-05, "loss": 0.069, "step": 31995 }, { "epoch": 0.5666343467615897, "grad_norm": 0.6802502274513245, "learning_rate": 1.2502857531201301e-05, "loss": 0.0801, "step": 31996 }, { "epoch": 0.5666520562986183, "grad_norm": 0.6696584820747375, "learning_rate": 1.2502009176686542e-05, "loss": 0.0893, "step": 31997 }, { "epoch": 0.5666697658356467, "grad_norm": 0.43782296776771545, "learning_rate": 1.2501160830389924e-05, "loss": 0.0825, "step": 31998 }, { "epoch": 0.566687475372675, "grad_norm": 0.7022212743759155, "learning_rate": 1.2500312492314232e-05, "loss": 0.065, "step": 31999 }, { "epoch": 0.5667051849097035, "grad_norm": 0.4653888940811157, "learning_rate": 1.2499464162462259e-05, "loss": 0.0576, "step": 32000 }, { "epoch": 0.566722894446732, "grad_norm": 0.5191114544868469, "learning_rate": 1.2498615840836798e-05, "loss": 0.0864, "step": 32001 }, { "epoch": 0.5667406039837604, "grad_norm": 1.0284960269927979, "learning_rate": 1.2497767527440635e-05, "loss": 0.0546, "step": 32002 }, { "epoch": 0.5667583135207888, "grad_norm": 0.34611546993255615, "learning_rate": 1.2496919222276564e-05, "loss": 0.0651, "step": 32003 }, { "epoch": 0.5667760230578172, "grad_norm": 0.6873725652694702, "learning_rate": 1.2496070925347374e-05, "loss": 0.0627, "step": 32004 }, { "epoch": 0.5667937325948457, "grad_norm": 0.5623266100883484, "learning_rate": 1.2495222636655868e-05, "loss": 0.1151, "step": 32005 }, { "epoch": 0.5668114421318741, "grad_norm": 1.0650537014007568, "learning_rate": 1.2494374356204816e-05, "loss": 0.0791, "step": 32006 }, { "epoch": 0.5668291516689025, "grad_norm": 1.6276825666427612, "learning_rate": 1.2493526083997016e-05, "loss": 0.0575, "step": 32007 }, { "epoch": 0.566846861205931, "grad_norm": 0.7027642130851746, "learning_rate": 1.2492677820035273e-05, "loss": 0.099, "step": 32008 }, { "epoch": 0.5668645707429594, "grad_norm": 0.5908248424530029, "learning_rate": 1.249182956432236e-05, "loss": 0.056, "step": 32009 }, { "epoch": 0.5668822802799878, "grad_norm": 0.7881045937538147, "learning_rate": 1.249098131686107e-05, "loss": 0.0785, "step": 32010 }, { "epoch": 0.5668999898170162, "grad_norm": 0.5043210983276367, "learning_rate": 1.2490133077654201e-05, "loss": 0.0535, "step": 32011 }, { "epoch": 0.5669176993540447, "grad_norm": 0.6150913834571838, "learning_rate": 1.2489284846704545e-05, "loss": 0.0771, "step": 32012 }, { "epoch": 0.5669354088910731, "grad_norm": 0.8538277745246887, "learning_rate": 1.2488436624014882e-05, "loss": 0.0646, "step": 32013 }, { "epoch": 0.5669531184281015, "grad_norm": 0.42438066005706787, "learning_rate": 1.2487588409588009e-05, "loss": 0.0632, "step": 32014 }, { "epoch": 0.5669708279651299, "grad_norm": 1.1815073490142822, "learning_rate": 1.248674020342672e-05, "loss": 0.1136, "step": 32015 }, { "epoch": 0.5669885375021584, "grad_norm": 0.722836434841156, "learning_rate": 1.2485892005533798e-05, "loss": 0.0966, "step": 32016 }, { "epoch": 0.5670062470391868, "grad_norm": 0.5614092350006104, "learning_rate": 1.2485043815912038e-05, "loss": 0.0667, "step": 32017 }, { "epoch": 0.5670239565762152, "grad_norm": 0.645570695400238, "learning_rate": 1.2484195634564227e-05, "loss": 0.0704, "step": 32018 }, { "epoch": 0.5670416661132436, "grad_norm": 0.47687527537345886, "learning_rate": 1.2483347461493167e-05, "loss": 0.0543, "step": 32019 }, { "epoch": 0.5670593756502721, "grad_norm": 0.5514523386955261, "learning_rate": 1.2482499296701632e-05, "loss": 0.0884, "step": 32020 }, { "epoch": 0.5670770851873005, "grad_norm": 0.7438817620277405, "learning_rate": 1.248165114019242e-05, "loss": 0.0678, "step": 32021 }, { "epoch": 0.5670947947243289, "grad_norm": 0.5469152927398682, "learning_rate": 1.2480802991968316e-05, "loss": 0.0577, "step": 32022 }, { "epoch": 0.5671125042613574, "grad_norm": 0.8077574968338013, "learning_rate": 1.2479954852032127e-05, "loss": 0.0955, "step": 32023 }, { "epoch": 0.5671302137983858, "grad_norm": 0.30793261528015137, "learning_rate": 1.2479106720386624e-05, "loss": 0.0684, "step": 32024 }, { "epoch": 0.5671479233354142, "grad_norm": 0.8334038853645325, "learning_rate": 1.2478258597034604e-05, "loss": 0.0925, "step": 32025 }, { "epoch": 0.5671656328724426, "grad_norm": 0.8410107493400574, "learning_rate": 1.2477410481978865e-05, "loss": 0.0792, "step": 32026 }, { "epoch": 0.5671833424094711, "grad_norm": 0.48437923192977905, "learning_rate": 1.2476562375222185e-05, "loss": 0.0726, "step": 32027 }, { "epoch": 0.5672010519464995, "grad_norm": 0.5720330476760864, "learning_rate": 1.2475714276767358e-05, "loss": 0.0729, "step": 32028 }, { "epoch": 0.5672187614835279, "grad_norm": 0.5834505558013916, "learning_rate": 1.2474866186617178e-05, "loss": 0.0544, "step": 32029 }, { "epoch": 0.5672364710205563, "grad_norm": 0.6724889278411865, "learning_rate": 1.2474018104774435e-05, "loss": 0.0514, "step": 32030 }, { "epoch": 0.5672541805575848, "grad_norm": 0.5926309823989868, "learning_rate": 1.2473170031241913e-05, "loss": 0.0683, "step": 32031 }, { "epoch": 0.5672718900946132, "grad_norm": 0.4506351947784424, "learning_rate": 1.2472321966022408e-05, "loss": 0.0922, "step": 32032 }, { "epoch": 0.5672895996316416, "grad_norm": 0.784433901309967, "learning_rate": 1.2471473909118714e-05, "loss": 0.0912, "step": 32033 }, { "epoch": 0.56730730916867, "grad_norm": 0.44942569732666016, "learning_rate": 1.2470625860533608e-05, "loss": 0.0719, "step": 32034 }, { "epoch": 0.5673250187056985, "grad_norm": 0.442470520734787, "learning_rate": 1.2469777820269885e-05, "loss": 0.0685, "step": 32035 }, { "epoch": 0.5673427282427269, "grad_norm": 0.4806049168109894, "learning_rate": 1.2468929788330338e-05, "loss": 0.0672, "step": 32036 }, { "epoch": 0.5673604377797553, "grad_norm": 0.5137248039245605, "learning_rate": 1.2468081764717762e-05, "loss": 0.0432, "step": 32037 }, { "epoch": 0.5673781473167838, "grad_norm": 0.5202717185020447, "learning_rate": 1.2467233749434938e-05, "loss": 0.0529, "step": 32038 }, { "epoch": 0.5673958568538122, "grad_norm": 1.2059086561203003, "learning_rate": 1.2466385742484655e-05, "loss": 0.0956, "step": 32039 }, { "epoch": 0.5674135663908406, "grad_norm": 0.5850261449813843, "learning_rate": 1.2465537743869712e-05, "loss": 0.0618, "step": 32040 }, { "epoch": 0.567431275927869, "grad_norm": 0.4149758219718933, "learning_rate": 1.2464689753592892e-05, "loss": 0.0455, "step": 32041 }, { "epoch": 0.5674489854648975, "grad_norm": 0.4869752526283264, "learning_rate": 1.2463841771656982e-05, "loss": 0.0391, "step": 32042 }, { "epoch": 0.5674666950019259, "grad_norm": 0.3625093698501587, "learning_rate": 1.246299379806478e-05, "loss": 0.0395, "step": 32043 }, { "epoch": 0.5674844045389543, "grad_norm": 0.4987937808036804, "learning_rate": 1.2462145832819072e-05, "loss": 0.0423, "step": 32044 }, { "epoch": 0.5675021140759827, "grad_norm": 0.6439947485923767, "learning_rate": 1.2461297875922646e-05, "loss": 0.0675, "step": 32045 }, { "epoch": 0.5675198236130112, "grad_norm": 0.5596663355827332, "learning_rate": 1.2460449927378292e-05, "loss": 0.058, "step": 32046 }, { "epoch": 0.5675375331500396, "grad_norm": 0.5125604271888733, "learning_rate": 1.245960198718881e-05, "loss": 0.0566, "step": 32047 }, { "epoch": 0.567555242687068, "grad_norm": 0.5606564283370972, "learning_rate": 1.2458754055356972e-05, "loss": 0.0595, "step": 32048 }, { "epoch": 0.5675729522240964, "grad_norm": 0.6883001327514648, "learning_rate": 1.2457906131885576e-05, "loss": 0.0639, "step": 32049 }, { "epoch": 0.5675906617611249, "grad_norm": 0.5983901619911194, "learning_rate": 1.2457058216777413e-05, "loss": 0.0585, "step": 32050 }, { "epoch": 0.5676083712981533, "grad_norm": 0.579567551612854, "learning_rate": 1.2456210310035274e-05, "loss": 0.0609, "step": 32051 }, { "epoch": 0.5676260808351817, "grad_norm": 0.6711718440055847, "learning_rate": 1.2455362411661941e-05, "loss": 0.0865, "step": 32052 }, { "epoch": 0.5676437903722102, "grad_norm": 0.5985419750213623, "learning_rate": 1.245451452166021e-05, "loss": 0.042, "step": 32053 }, { "epoch": 0.5676614999092386, "grad_norm": 0.8160181641578674, "learning_rate": 1.2453666640032868e-05, "loss": 0.0855, "step": 32054 }, { "epoch": 0.567679209446267, "grad_norm": 0.6633344292640686, "learning_rate": 1.245281876678271e-05, "loss": 0.0599, "step": 32055 }, { "epoch": 0.5676969189832954, "grad_norm": 0.9250836968421936, "learning_rate": 1.2451970901912515e-05, "loss": 0.064, "step": 32056 }, { "epoch": 0.567714628520324, "grad_norm": 0.6181427836418152, "learning_rate": 1.245112304542508e-05, "loss": 0.0538, "step": 32057 }, { "epoch": 0.5677323380573523, "grad_norm": 0.7215179800987244, "learning_rate": 1.2450275197323198e-05, "loss": 0.0841, "step": 32058 }, { "epoch": 0.5677500475943807, "grad_norm": 0.727493405342102, "learning_rate": 1.2449427357609642e-05, "loss": 0.0631, "step": 32059 }, { "epoch": 0.5677677571314091, "grad_norm": 1.354485034942627, "learning_rate": 1.2448579526287217e-05, "loss": 0.0483, "step": 32060 }, { "epoch": 0.5677854666684377, "grad_norm": 0.5421867370605469, "learning_rate": 1.2447731703358706e-05, "loss": 0.0736, "step": 32061 }, { "epoch": 0.567803176205466, "grad_norm": 0.463853657245636, "learning_rate": 1.2446883888826906e-05, "loss": 0.0265, "step": 32062 }, { "epoch": 0.5678208857424945, "grad_norm": 0.6424992680549622, "learning_rate": 1.2446036082694594e-05, "loss": 0.0569, "step": 32063 }, { "epoch": 0.5678385952795229, "grad_norm": 0.5576879978179932, "learning_rate": 1.2445188284964565e-05, "loss": 0.0608, "step": 32064 }, { "epoch": 0.5678563048165514, "grad_norm": 0.5574473142623901, "learning_rate": 1.2444340495639613e-05, "loss": 0.0969, "step": 32065 }, { "epoch": 0.5678740143535798, "grad_norm": 0.5820960998535156, "learning_rate": 1.2443492714722517e-05, "loss": 0.0523, "step": 32066 }, { "epoch": 0.5678917238906082, "grad_norm": 0.8004629015922546, "learning_rate": 1.244264494221607e-05, "loss": 0.0837, "step": 32067 }, { "epoch": 0.5679094334276367, "grad_norm": 0.75925612449646, "learning_rate": 1.2441797178123063e-05, "loss": 0.0516, "step": 32068 }, { "epoch": 0.5679271429646651, "grad_norm": 0.7900775671005249, "learning_rate": 1.244094942244629e-05, "loss": 0.0867, "step": 32069 }, { "epoch": 0.5679448525016935, "grad_norm": 0.6006561517715454, "learning_rate": 1.2440101675188532e-05, "loss": 0.0527, "step": 32070 }, { "epoch": 0.5679625620387219, "grad_norm": 0.8251017928123474, "learning_rate": 1.2439253936352576e-05, "loss": 0.0756, "step": 32071 }, { "epoch": 0.5679802715757504, "grad_norm": 0.4333364963531494, "learning_rate": 1.2438406205941227e-05, "loss": 0.0606, "step": 32072 }, { "epoch": 0.5679979811127788, "grad_norm": 0.590124785900116, "learning_rate": 1.243755848395725e-05, "loss": 0.0671, "step": 32073 }, { "epoch": 0.5680156906498072, "grad_norm": 0.5856439471244812, "learning_rate": 1.243671077040345e-05, "loss": 0.0708, "step": 32074 }, { "epoch": 0.5680334001868356, "grad_norm": 0.17113304138183594, "learning_rate": 1.2435863065282613e-05, "loss": 0.0611, "step": 32075 }, { "epoch": 0.5680511097238641, "grad_norm": 0.3872230648994446, "learning_rate": 1.2435015368597532e-05, "loss": 0.0454, "step": 32076 }, { "epoch": 0.5680688192608925, "grad_norm": 0.6359776258468628, "learning_rate": 1.2434167680350985e-05, "loss": 0.0524, "step": 32077 }, { "epoch": 0.5680865287979209, "grad_norm": 0.7175921201705933, "learning_rate": 1.2433320000545768e-05, "loss": 0.0599, "step": 32078 }, { "epoch": 0.5681042383349493, "grad_norm": 0.6789243221282959, "learning_rate": 1.2432472329184673e-05, "loss": 0.0597, "step": 32079 }, { "epoch": 0.5681219478719778, "grad_norm": 0.6653874516487122, "learning_rate": 1.2431624666270479e-05, "loss": 0.0648, "step": 32080 }, { "epoch": 0.5681396574090062, "grad_norm": 0.6969621777534485, "learning_rate": 1.2430777011805981e-05, "loss": 0.06, "step": 32081 }, { "epoch": 0.5681573669460346, "grad_norm": 0.6020510792732239, "learning_rate": 1.2429929365793965e-05, "loss": 0.0731, "step": 32082 }, { "epoch": 0.5681750764830631, "grad_norm": 0.6673212647438049, "learning_rate": 1.2429081728237226e-05, "loss": 0.0586, "step": 32083 }, { "epoch": 0.5681927860200915, "grad_norm": 0.5491183400154114, "learning_rate": 1.2428234099138544e-05, "loss": 0.0418, "step": 32084 }, { "epoch": 0.5682104955571199, "grad_norm": 0.4809625446796417, "learning_rate": 1.2427386478500713e-05, "loss": 0.0489, "step": 32085 }, { "epoch": 0.5682282050941483, "grad_norm": 0.5918083786964417, "learning_rate": 1.2426538866326528e-05, "loss": 0.0492, "step": 32086 }, { "epoch": 0.5682459146311768, "grad_norm": 0.5040280818939209, "learning_rate": 1.242569126261876e-05, "loss": 0.0867, "step": 32087 }, { "epoch": 0.5682636241682052, "grad_norm": 0.9416595697402954, "learning_rate": 1.2424843667380206e-05, "loss": 0.0985, "step": 32088 }, { "epoch": 0.5682813337052336, "grad_norm": 0.7158036828041077, "learning_rate": 1.242399608061366e-05, "loss": 0.0642, "step": 32089 }, { "epoch": 0.568299043242262, "grad_norm": 0.5771554708480835, "learning_rate": 1.2423148502321911e-05, "loss": 0.0888, "step": 32090 }, { "epoch": 0.5683167527792905, "grad_norm": 0.755833089351654, "learning_rate": 1.2422300932507738e-05, "loss": 0.0709, "step": 32091 }, { "epoch": 0.5683344623163189, "grad_norm": 0.609159529209137, "learning_rate": 1.2421453371173933e-05, "loss": 0.0729, "step": 32092 }, { "epoch": 0.5683521718533473, "grad_norm": 0.8710517883300781, "learning_rate": 1.2420605818323285e-05, "loss": 0.044, "step": 32093 }, { "epoch": 0.5683698813903757, "grad_norm": 0.4402029514312744, "learning_rate": 1.2419758273958588e-05, "loss": 0.0405, "step": 32094 }, { "epoch": 0.5683875909274042, "grad_norm": 0.39316877722740173, "learning_rate": 1.2418910738082621e-05, "loss": 0.0694, "step": 32095 }, { "epoch": 0.5684053004644326, "grad_norm": 0.6748971343040466, "learning_rate": 1.2418063210698176e-05, "loss": 0.0626, "step": 32096 }, { "epoch": 0.568423010001461, "grad_norm": 0.44542163610458374, "learning_rate": 1.2417215691808045e-05, "loss": 0.0548, "step": 32097 }, { "epoch": 0.5684407195384895, "grad_norm": 0.9116297960281372, "learning_rate": 1.241636818141501e-05, "loss": 0.0738, "step": 32098 }, { "epoch": 0.5684584290755179, "grad_norm": 0.6288077235221863, "learning_rate": 1.2415520679521865e-05, "loss": 0.0605, "step": 32099 }, { "epoch": 0.5684761386125463, "grad_norm": 0.7289451360702515, "learning_rate": 1.2414673186131393e-05, "loss": 0.0632, "step": 32100 }, { "epoch": 0.5684938481495747, "grad_norm": 0.6758137941360474, "learning_rate": 1.2413825701246391e-05, "loss": 0.0758, "step": 32101 }, { "epoch": 0.5685115576866032, "grad_norm": 0.7677842378616333, "learning_rate": 1.2412978224869634e-05, "loss": 0.0856, "step": 32102 }, { "epoch": 0.5685292672236316, "grad_norm": 0.7132148742675781, "learning_rate": 1.2412130757003917e-05, "loss": 0.0912, "step": 32103 }, { "epoch": 0.56854697676066, "grad_norm": 0.7754043340682983, "learning_rate": 1.2411283297652036e-05, "loss": 0.0804, "step": 32104 }, { "epoch": 0.5685646862976884, "grad_norm": 0.838131308555603, "learning_rate": 1.2410435846816766e-05, "loss": 0.0692, "step": 32105 }, { "epoch": 0.5685823958347169, "grad_norm": 0.617529571056366, "learning_rate": 1.2409588404500897e-05, "loss": 0.0793, "step": 32106 }, { "epoch": 0.5686001053717453, "grad_norm": 0.6623238921165466, "learning_rate": 1.2408740970707223e-05, "loss": 0.0694, "step": 32107 }, { "epoch": 0.5686178149087737, "grad_norm": 0.5289571285247803, "learning_rate": 1.240789354543853e-05, "loss": 0.0761, "step": 32108 }, { "epoch": 0.5686355244458021, "grad_norm": 0.8352116942405701, "learning_rate": 1.2407046128697602e-05, "loss": 0.0871, "step": 32109 }, { "epoch": 0.5686532339828306, "grad_norm": 1.1493861675262451, "learning_rate": 1.2406198720487232e-05, "loss": 0.0944, "step": 32110 }, { "epoch": 0.568670943519859, "grad_norm": 0.38959062099456787, "learning_rate": 1.2405351320810208e-05, "loss": 0.0746, "step": 32111 }, { "epoch": 0.5686886530568874, "grad_norm": 0.5875883102416992, "learning_rate": 1.2404503929669313e-05, "loss": 0.0591, "step": 32112 }, { "epoch": 0.5687063625939159, "grad_norm": 0.8334208726882935, "learning_rate": 1.2403656547067336e-05, "loss": 0.0818, "step": 32113 }, { "epoch": 0.5687240721309443, "grad_norm": 0.7427839040756226, "learning_rate": 1.2402809173007068e-05, "loss": 0.084, "step": 32114 }, { "epoch": 0.5687417816679727, "grad_norm": 0.40776747465133667, "learning_rate": 1.24019618074913e-05, "loss": 0.0638, "step": 32115 }, { "epoch": 0.5687594912050011, "grad_norm": 0.5322638154029846, "learning_rate": 1.2401114450522808e-05, "loss": 0.0705, "step": 32116 }, { "epoch": 0.5687772007420296, "grad_norm": 0.4840281009674072, "learning_rate": 1.2400267102104386e-05, "loss": 0.0671, "step": 32117 }, { "epoch": 0.568794910279058, "grad_norm": 0.6273261308670044, "learning_rate": 1.2399419762238833e-05, "loss": 0.0858, "step": 32118 }, { "epoch": 0.5688126198160864, "grad_norm": 0.7015739679336548, "learning_rate": 1.2398572430928915e-05, "loss": 0.0537, "step": 32119 }, { "epoch": 0.5688303293531148, "grad_norm": 0.5029284358024597, "learning_rate": 1.2397725108177434e-05, "loss": 0.0507, "step": 32120 }, { "epoch": 0.5688480388901433, "grad_norm": 0.7214176058769226, "learning_rate": 1.2396877793987172e-05, "loss": 0.0752, "step": 32121 }, { "epoch": 0.5688657484271717, "grad_norm": 0.852748453617096, "learning_rate": 1.2396030488360923e-05, "loss": 0.0766, "step": 32122 }, { "epoch": 0.5688834579642001, "grad_norm": 0.6100326180458069, "learning_rate": 1.2395183191301468e-05, "loss": 0.0759, "step": 32123 }, { "epoch": 0.5689011675012285, "grad_norm": 0.7170178890228271, "learning_rate": 1.2394335902811595e-05, "loss": 0.0647, "step": 32124 }, { "epoch": 0.568918877038257, "grad_norm": 0.8024606108665466, "learning_rate": 1.2393488622894094e-05, "loss": 0.0664, "step": 32125 }, { "epoch": 0.5689365865752855, "grad_norm": 0.8200044631958008, "learning_rate": 1.2392641351551756e-05, "loss": 0.1123, "step": 32126 }, { "epoch": 0.5689542961123139, "grad_norm": 0.7171723246574402, "learning_rate": 1.239179408878736e-05, "loss": 0.0528, "step": 32127 }, { "epoch": 0.5689720056493424, "grad_norm": 0.7339950799942017, "learning_rate": 1.2390946834603697e-05, "loss": 0.0806, "step": 32128 }, { "epoch": 0.5689897151863708, "grad_norm": 0.523071825504303, "learning_rate": 1.2390099589003563e-05, "loss": 0.0891, "step": 32129 }, { "epoch": 0.5690074247233992, "grad_norm": 1.172005534172058, "learning_rate": 1.238925235198973e-05, "loss": 0.0958, "step": 32130 }, { "epoch": 0.5690251342604276, "grad_norm": 0.905681848526001, "learning_rate": 1.238840512356499e-05, "loss": 0.0764, "step": 32131 }, { "epoch": 0.5690428437974561, "grad_norm": 0.5117649435997009, "learning_rate": 1.2387557903732137e-05, "loss": 0.0705, "step": 32132 }, { "epoch": 0.5690605533344845, "grad_norm": 0.569423496723175, "learning_rate": 1.238671069249396e-05, "loss": 0.078, "step": 32133 }, { "epoch": 0.5690782628715129, "grad_norm": 0.5679693818092346, "learning_rate": 1.2385863489853234e-05, "loss": 0.0801, "step": 32134 }, { "epoch": 0.5690959724085413, "grad_norm": 0.5123853087425232, "learning_rate": 1.2385016295812751e-05, "loss": 0.0524, "step": 32135 }, { "epoch": 0.5691136819455698, "grad_norm": 0.6392247080802917, "learning_rate": 1.2384169110375306e-05, "loss": 0.0825, "step": 32136 }, { "epoch": 0.5691313914825982, "grad_norm": 0.6094750761985779, "learning_rate": 1.2383321933543676e-05, "loss": 0.0499, "step": 32137 }, { "epoch": 0.5691491010196266, "grad_norm": 0.30776339769363403, "learning_rate": 1.2382474765320649e-05, "loss": 0.041, "step": 32138 }, { "epoch": 0.569166810556655, "grad_norm": 0.5469199419021606, "learning_rate": 1.2381627605709018e-05, "loss": 0.0804, "step": 32139 }, { "epoch": 0.5691845200936835, "grad_norm": 0.8073201775550842, "learning_rate": 1.2380780454711571e-05, "loss": 0.0529, "step": 32140 }, { "epoch": 0.5692022296307119, "grad_norm": 0.8333155512809753, "learning_rate": 1.2379933312331089e-05, "loss": 0.0753, "step": 32141 }, { "epoch": 0.5692199391677403, "grad_norm": 0.7848847508430481, "learning_rate": 1.237908617857036e-05, "loss": 0.0747, "step": 32142 }, { "epoch": 0.5692376487047688, "grad_norm": 0.8906718492507935, "learning_rate": 1.237823905343218e-05, "loss": 0.0807, "step": 32143 }, { "epoch": 0.5692553582417972, "grad_norm": 0.23012538254261017, "learning_rate": 1.237739193691932e-05, "loss": 0.0598, "step": 32144 }, { "epoch": 0.5692730677788256, "grad_norm": 0.7747862935066223, "learning_rate": 1.2376544829034574e-05, "loss": 0.0783, "step": 32145 }, { "epoch": 0.569290777315854, "grad_norm": 0.7515994906425476, "learning_rate": 1.237569772978073e-05, "loss": 0.0418, "step": 32146 }, { "epoch": 0.5693084868528825, "grad_norm": 1.085193395614624, "learning_rate": 1.2374850639160583e-05, "loss": 0.0848, "step": 32147 }, { "epoch": 0.5693261963899109, "grad_norm": 0.38177746534347534, "learning_rate": 1.2374003557176906e-05, "loss": 0.0664, "step": 32148 }, { "epoch": 0.5693439059269393, "grad_norm": 0.5276238322257996, "learning_rate": 1.2373156483832492e-05, "loss": 0.0591, "step": 32149 }, { "epoch": 0.5693616154639677, "grad_norm": 1.0773316621780396, "learning_rate": 1.2372309419130132e-05, "loss": 0.0861, "step": 32150 }, { "epoch": 0.5693793250009962, "grad_norm": 0.6769436597824097, "learning_rate": 1.2371462363072602e-05, "loss": 0.0545, "step": 32151 }, { "epoch": 0.5693970345380246, "grad_norm": 0.8466554284095764, "learning_rate": 1.2370615315662697e-05, "loss": 0.0852, "step": 32152 }, { "epoch": 0.569414744075053, "grad_norm": 0.6790854930877686, "learning_rate": 1.23697682769032e-05, "loss": 0.0756, "step": 32153 }, { "epoch": 0.5694324536120814, "grad_norm": 0.7545911073684692, "learning_rate": 1.2368921246796905e-05, "loss": 0.0948, "step": 32154 }, { "epoch": 0.5694501631491099, "grad_norm": 0.5247538089752197, "learning_rate": 1.2368074225346588e-05, "loss": 0.0792, "step": 32155 }, { "epoch": 0.5694678726861383, "grad_norm": 0.8005377054214478, "learning_rate": 1.2367227212555042e-05, "loss": 0.1022, "step": 32156 }, { "epoch": 0.5694855822231667, "grad_norm": 0.44169968366622925, "learning_rate": 1.2366380208425051e-05, "loss": 0.0392, "step": 32157 }, { "epoch": 0.5695032917601952, "grad_norm": 0.6223446726799011, "learning_rate": 1.2365533212959408e-05, "loss": 0.0861, "step": 32158 }, { "epoch": 0.5695210012972236, "grad_norm": 0.6629540324211121, "learning_rate": 1.2364686226160889e-05, "loss": 0.0728, "step": 32159 }, { "epoch": 0.569538710834252, "grad_norm": 0.3783138692378998, "learning_rate": 1.2363839248032284e-05, "loss": 0.0375, "step": 32160 }, { "epoch": 0.5695564203712804, "grad_norm": 0.5951818227767944, "learning_rate": 1.236299227857639e-05, "loss": 0.0869, "step": 32161 }, { "epoch": 0.5695741299083089, "grad_norm": 0.4965958595275879, "learning_rate": 1.2362145317795977e-05, "loss": 0.0721, "step": 32162 }, { "epoch": 0.5695918394453373, "grad_norm": 0.5799642205238342, "learning_rate": 1.2361298365693842e-05, "loss": 0.0747, "step": 32163 }, { "epoch": 0.5696095489823657, "grad_norm": 0.6049955487251282, "learning_rate": 1.2360451422272766e-05, "loss": 0.0808, "step": 32164 }, { "epoch": 0.5696272585193941, "grad_norm": 0.6866398453712463, "learning_rate": 1.2359604487535541e-05, "loss": 0.0776, "step": 32165 }, { "epoch": 0.5696449680564226, "grad_norm": 0.3834882974624634, "learning_rate": 1.2358757561484947e-05, "loss": 0.0301, "step": 32166 }, { "epoch": 0.569662677593451, "grad_norm": 0.8059571385383606, "learning_rate": 1.2357910644123775e-05, "loss": 0.0531, "step": 32167 }, { "epoch": 0.5696803871304794, "grad_norm": 0.45180997252464294, "learning_rate": 1.2357063735454813e-05, "loss": 0.0685, "step": 32168 }, { "epoch": 0.5696980966675079, "grad_norm": 0.47271594405174255, "learning_rate": 1.2356216835480838e-05, "loss": 0.029, "step": 32169 }, { "epoch": 0.5697158062045363, "grad_norm": 0.5390242338180542, "learning_rate": 1.2355369944204644e-05, "loss": 0.0731, "step": 32170 }, { "epoch": 0.5697335157415647, "grad_norm": 0.7756391167640686, "learning_rate": 1.2354523061629016e-05, "loss": 0.0372, "step": 32171 }, { "epoch": 0.5697512252785931, "grad_norm": 0.592557966709137, "learning_rate": 1.2353676187756744e-05, "loss": 0.0645, "step": 32172 }, { "epoch": 0.5697689348156216, "grad_norm": 0.652790367603302, "learning_rate": 1.2352829322590606e-05, "loss": 0.0719, "step": 32173 }, { "epoch": 0.56978664435265, "grad_norm": 0.5984336733818054, "learning_rate": 1.235198246613339e-05, "loss": 0.0678, "step": 32174 }, { "epoch": 0.5698043538896784, "grad_norm": 0.641049861907959, "learning_rate": 1.2351135618387887e-05, "loss": 0.0535, "step": 32175 }, { "epoch": 0.5698220634267068, "grad_norm": 0.7814025282859802, "learning_rate": 1.2350288779356878e-05, "loss": 0.0941, "step": 32176 }, { "epoch": 0.5698397729637353, "grad_norm": 0.438812255859375, "learning_rate": 1.2349441949043149e-05, "loss": 0.0879, "step": 32177 }, { "epoch": 0.5698574825007637, "grad_norm": 0.885073184967041, "learning_rate": 1.234859512744949e-05, "loss": 0.0989, "step": 32178 }, { "epoch": 0.5698751920377921, "grad_norm": 0.47020673751831055, "learning_rate": 1.2347748314578687e-05, "loss": 0.0691, "step": 32179 }, { "epoch": 0.5698929015748205, "grad_norm": 0.5447614192962646, "learning_rate": 1.2346901510433521e-05, "loss": 0.0345, "step": 32180 }, { "epoch": 0.569910611111849, "grad_norm": 0.5020049214363098, "learning_rate": 1.234605471501678e-05, "loss": 0.0825, "step": 32181 }, { "epoch": 0.5699283206488774, "grad_norm": 0.649120569229126, "learning_rate": 1.2345207928331255e-05, "loss": 0.0447, "step": 32182 }, { "epoch": 0.5699460301859058, "grad_norm": 0.7510334849357605, "learning_rate": 1.2344361150379722e-05, "loss": 0.0583, "step": 32183 }, { "epoch": 0.5699637397229343, "grad_norm": 0.4478131830692291, "learning_rate": 1.2343514381164973e-05, "loss": 0.0566, "step": 32184 }, { "epoch": 0.5699814492599627, "grad_norm": 0.5334487557411194, "learning_rate": 1.2342667620689793e-05, "loss": 0.0778, "step": 32185 }, { "epoch": 0.5699991587969911, "grad_norm": 0.5456313490867615, "learning_rate": 1.2341820868956974e-05, "loss": 0.0734, "step": 32186 }, { "epoch": 0.5700168683340195, "grad_norm": 0.7103011012077332, "learning_rate": 1.234097412596929e-05, "loss": 0.0607, "step": 32187 }, { "epoch": 0.570034577871048, "grad_norm": 0.7358720302581787, "learning_rate": 1.2340127391729532e-05, "loss": 0.0804, "step": 32188 }, { "epoch": 0.5700522874080765, "grad_norm": 0.4562365710735321, "learning_rate": 1.233928066624049e-05, "loss": 0.0744, "step": 32189 }, { "epoch": 0.5700699969451049, "grad_norm": 0.8645631074905396, "learning_rate": 1.233843394950494e-05, "loss": 0.0897, "step": 32190 }, { "epoch": 0.5700877064821332, "grad_norm": 0.7834948897361755, "learning_rate": 1.2337587241525676e-05, "loss": 0.0538, "step": 32191 }, { "epoch": 0.5701054160191618, "grad_norm": 0.5215749740600586, "learning_rate": 1.2336740542305478e-05, "loss": 0.069, "step": 32192 }, { "epoch": 0.5701231255561902, "grad_norm": 0.8581012487411499, "learning_rate": 1.233589385184714e-05, "loss": 0.0919, "step": 32193 }, { "epoch": 0.5701408350932186, "grad_norm": 0.6544727087020874, "learning_rate": 1.2335047170153435e-05, "loss": 0.0577, "step": 32194 }, { "epoch": 0.570158544630247, "grad_norm": 0.5995144248008728, "learning_rate": 1.2334200497227159e-05, "loss": 0.0503, "step": 32195 }, { "epoch": 0.5701762541672755, "grad_norm": 0.7686895132064819, "learning_rate": 1.233335383307109e-05, "loss": 0.0649, "step": 32196 }, { "epoch": 0.5701939637043039, "grad_norm": 0.7596769332885742, "learning_rate": 1.2332507177688024e-05, "loss": 0.072, "step": 32197 }, { "epoch": 0.5702116732413323, "grad_norm": 1.0223256349563599, "learning_rate": 1.2331660531080735e-05, "loss": 0.0771, "step": 32198 }, { "epoch": 0.5702293827783608, "grad_norm": 0.52273029088974, "learning_rate": 1.2330813893252013e-05, "loss": 0.0803, "step": 32199 }, { "epoch": 0.5702470923153892, "grad_norm": 0.7525910139083862, "learning_rate": 1.2329967264204649e-05, "loss": 0.0645, "step": 32200 }, { "epoch": 0.5702648018524176, "grad_norm": 0.8914533257484436, "learning_rate": 1.2329120643941416e-05, "loss": 0.073, "step": 32201 }, { "epoch": 0.570282511389446, "grad_norm": 0.44774746894836426, "learning_rate": 1.2328274032465108e-05, "loss": 0.0667, "step": 32202 }, { "epoch": 0.5703002209264745, "grad_norm": 0.4454409182071686, "learning_rate": 1.2327427429778506e-05, "loss": 0.0717, "step": 32203 }, { "epoch": 0.5703179304635029, "grad_norm": 1.1086273193359375, "learning_rate": 1.2326580835884401e-05, "loss": 0.0699, "step": 32204 }, { "epoch": 0.5703356400005313, "grad_norm": 0.561653196811676, "learning_rate": 1.2325734250785572e-05, "loss": 0.0611, "step": 32205 }, { "epoch": 0.5703533495375597, "grad_norm": 0.5033618211746216, "learning_rate": 1.2324887674484808e-05, "loss": 0.0417, "step": 32206 }, { "epoch": 0.5703710590745882, "grad_norm": 0.779023289680481, "learning_rate": 1.2324041106984894e-05, "loss": 0.0985, "step": 32207 }, { "epoch": 0.5703887686116166, "grad_norm": 0.730069100856781, "learning_rate": 1.232319454828861e-05, "loss": 0.0638, "step": 32208 }, { "epoch": 0.570406478148645, "grad_norm": 0.717844545841217, "learning_rate": 1.232234799839875e-05, "loss": 0.0835, "step": 32209 }, { "epoch": 0.5704241876856734, "grad_norm": 0.5845385193824768, "learning_rate": 1.232150145731809e-05, "loss": 0.0602, "step": 32210 }, { "epoch": 0.5704418972227019, "grad_norm": 0.7653293609619141, "learning_rate": 1.2320654925049428e-05, "loss": 0.074, "step": 32211 }, { "epoch": 0.5704596067597303, "grad_norm": 0.40344002842903137, "learning_rate": 1.2319808401595529e-05, "loss": 0.0485, "step": 32212 }, { "epoch": 0.5704773162967587, "grad_norm": 0.7527709603309631, "learning_rate": 1.2318961886959192e-05, "loss": 0.0532, "step": 32213 }, { "epoch": 0.5704950258337872, "grad_norm": 0.4325261414051056, "learning_rate": 1.231811538114321e-05, "loss": 0.0571, "step": 32214 }, { "epoch": 0.5705127353708156, "grad_norm": 0.5572718977928162, "learning_rate": 1.2317268884150347e-05, "loss": 0.0947, "step": 32215 }, { "epoch": 0.570530444907844, "grad_norm": 0.6302840709686279, "learning_rate": 1.2316422395983396e-05, "loss": 0.0622, "step": 32216 }, { "epoch": 0.5705481544448724, "grad_norm": 0.6039966344833374, "learning_rate": 1.2315575916645148e-05, "loss": 0.0715, "step": 32217 }, { "epoch": 0.5705658639819009, "grad_norm": 0.7301195859909058, "learning_rate": 1.2314729446138386e-05, "loss": 0.0648, "step": 32218 }, { "epoch": 0.5705835735189293, "grad_norm": 0.5500310063362122, "learning_rate": 1.2313882984465888e-05, "loss": 0.0773, "step": 32219 }, { "epoch": 0.5706012830559577, "grad_norm": 0.8661927580833435, "learning_rate": 1.2313036531630443e-05, "loss": 0.0902, "step": 32220 }, { "epoch": 0.5706189925929861, "grad_norm": 0.4668639898300171, "learning_rate": 1.2312190087634842e-05, "loss": 0.1191, "step": 32221 }, { "epoch": 0.5706367021300146, "grad_norm": 0.6698319911956787, "learning_rate": 1.2311343652481857e-05, "loss": 0.0518, "step": 32222 }, { "epoch": 0.570654411667043, "grad_norm": 0.4444975256919861, "learning_rate": 1.2310497226174282e-05, "loss": 0.0487, "step": 32223 }, { "epoch": 0.5706721212040714, "grad_norm": 0.5862643122673035, "learning_rate": 1.2309650808714898e-05, "loss": 0.0669, "step": 32224 }, { "epoch": 0.5706898307410998, "grad_norm": 0.5820664763450623, "learning_rate": 1.2308804400106498e-05, "loss": 0.0624, "step": 32225 }, { "epoch": 0.5707075402781283, "grad_norm": 0.4865027964115143, "learning_rate": 1.2307958000351849e-05, "loss": 0.0608, "step": 32226 }, { "epoch": 0.5707252498151567, "grad_norm": 0.6178894639015198, "learning_rate": 1.2307111609453749e-05, "loss": 0.0427, "step": 32227 }, { "epoch": 0.5707429593521851, "grad_norm": 0.9643919467926025, "learning_rate": 1.2306265227414979e-05, "loss": 0.0746, "step": 32228 }, { "epoch": 0.5707606688892136, "grad_norm": 0.7416050434112549, "learning_rate": 1.2305418854238333e-05, "loss": 0.0572, "step": 32229 }, { "epoch": 0.570778378426242, "grad_norm": 0.7820170521736145, "learning_rate": 1.2304572489926576e-05, "loss": 0.0546, "step": 32230 }, { "epoch": 0.5707960879632704, "grad_norm": 0.47717955708503723, "learning_rate": 1.2303726134482506e-05, "loss": 0.0551, "step": 32231 }, { "epoch": 0.5708137975002988, "grad_norm": 0.5136247873306274, "learning_rate": 1.2302879787908905e-05, "loss": 0.0448, "step": 32232 }, { "epoch": 0.5708315070373273, "grad_norm": 0.6053663492202759, "learning_rate": 1.2302033450208557e-05, "loss": 0.0687, "step": 32233 }, { "epoch": 0.5708492165743557, "grad_norm": 0.6453792452812195, "learning_rate": 1.2301187121384244e-05, "loss": 0.0752, "step": 32234 }, { "epoch": 0.5708669261113841, "grad_norm": 0.552366316318512, "learning_rate": 1.2300340801438752e-05, "loss": 0.0647, "step": 32235 }, { "epoch": 0.5708846356484125, "grad_norm": 0.47553372383117676, "learning_rate": 1.229949449037487e-05, "loss": 0.0343, "step": 32236 }, { "epoch": 0.570902345185441, "grad_norm": 0.6209495067596436, "learning_rate": 1.2298648188195373e-05, "loss": 0.0618, "step": 32237 }, { "epoch": 0.5709200547224694, "grad_norm": 0.6559191346168518, "learning_rate": 1.2297801894903052e-05, "loss": 0.0733, "step": 32238 }, { "epoch": 0.5709377642594978, "grad_norm": 0.7192056179046631, "learning_rate": 1.2296955610500695e-05, "loss": 0.0429, "step": 32239 }, { "epoch": 0.5709554737965262, "grad_norm": 0.8727260828018188, "learning_rate": 1.2296109334991075e-05, "loss": 0.0739, "step": 32240 }, { "epoch": 0.5709731833335547, "grad_norm": 0.5411040782928467, "learning_rate": 1.2295263068376978e-05, "loss": 0.0671, "step": 32241 }, { "epoch": 0.5709908928705831, "grad_norm": 0.5129917860031128, "learning_rate": 1.2294416810661194e-05, "loss": 0.0627, "step": 32242 }, { "epoch": 0.5710086024076115, "grad_norm": 0.5709752440452576, "learning_rate": 1.2293570561846515e-05, "loss": 0.0777, "step": 32243 }, { "epoch": 0.57102631194464, "grad_norm": 0.7519816160202026, "learning_rate": 1.2292724321935706e-05, "loss": 0.0901, "step": 32244 }, { "epoch": 0.5710440214816684, "grad_norm": 0.6524154543876648, "learning_rate": 1.2291878090931558e-05, "loss": 0.0483, "step": 32245 }, { "epoch": 0.5710617310186968, "grad_norm": 0.595734179019928, "learning_rate": 1.2291031868836866e-05, "loss": 0.0398, "step": 32246 }, { "epoch": 0.5710794405557252, "grad_norm": 0.5895704030990601, "learning_rate": 1.2290185655654398e-05, "loss": 0.0482, "step": 32247 }, { "epoch": 0.5710971500927537, "grad_norm": 0.8500896096229553, "learning_rate": 1.2289339451386945e-05, "loss": 0.0907, "step": 32248 }, { "epoch": 0.5711148596297821, "grad_norm": 0.6103063821792603, "learning_rate": 1.2288493256037293e-05, "loss": 0.0606, "step": 32249 }, { "epoch": 0.5711325691668105, "grad_norm": 0.33840036392211914, "learning_rate": 1.2287647069608227e-05, "loss": 0.0344, "step": 32250 }, { "epoch": 0.5711502787038389, "grad_norm": 0.550635039806366, "learning_rate": 1.2286800892102522e-05, "loss": 0.0436, "step": 32251 }, { "epoch": 0.5711679882408675, "grad_norm": 0.7123674750328064, "learning_rate": 1.228595472352297e-05, "loss": 0.0839, "step": 32252 }, { "epoch": 0.5711856977778959, "grad_norm": 0.33093976974487305, "learning_rate": 1.2285108563872358e-05, "loss": 0.052, "step": 32253 }, { "epoch": 0.5712034073149242, "grad_norm": 1.0717557668685913, "learning_rate": 1.2284262413153459e-05, "loss": 0.0887, "step": 32254 }, { "epoch": 0.5712211168519526, "grad_norm": 0.5998247265815735, "learning_rate": 1.2283416271369058e-05, "loss": 0.0855, "step": 32255 }, { "epoch": 0.5712388263889812, "grad_norm": 0.9456158876419067, "learning_rate": 1.2282570138521945e-05, "loss": 0.088, "step": 32256 }, { "epoch": 0.5712565359260096, "grad_norm": 0.7113637924194336, "learning_rate": 1.2281724014614908e-05, "loss": 0.0571, "step": 32257 }, { "epoch": 0.571274245463038, "grad_norm": 0.5250766277313232, "learning_rate": 1.2280877899650719e-05, "loss": 0.071, "step": 32258 }, { "epoch": 0.5712919550000665, "grad_norm": 0.5317425727844238, "learning_rate": 1.2280031793632166e-05, "loss": 0.0845, "step": 32259 }, { "epoch": 0.5713096645370949, "grad_norm": 0.7373777627944946, "learning_rate": 1.2279185696562032e-05, "loss": 0.0677, "step": 32260 }, { "epoch": 0.5713273740741233, "grad_norm": 0.7156388759613037, "learning_rate": 1.2278339608443108e-05, "loss": 0.0601, "step": 32261 }, { "epoch": 0.5713450836111517, "grad_norm": 0.5860385298728943, "learning_rate": 1.2277493529278166e-05, "loss": 0.0597, "step": 32262 }, { "epoch": 0.5713627931481802, "grad_norm": 0.6870616674423218, "learning_rate": 1.2276647459069994e-05, "loss": 0.0575, "step": 32263 }, { "epoch": 0.5713805026852086, "grad_norm": 0.8356465101242065, "learning_rate": 1.2275801397821382e-05, "loss": 0.0732, "step": 32264 }, { "epoch": 0.571398212222237, "grad_norm": 0.535507082939148, "learning_rate": 1.2274955345535104e-05, "loss": 0.0926, "step": 32265 }, { "epoch": 0.5714159217592654, "grad_norm": 0.5121619701385498, "learning_rate": 1.2274109302213945e-05, "loss": 0.0499, "step": 32266 }, { "epoch": 0.5714336312962939, "grad_norm": 0.5936954617500305, "learning_rate": 1.2273263267860694e-05, "loss": 0.043, "step": 32267 }, { "epoch": 0.5714513408333223, "grad_norm": 0.9742552042007446, "learning_rate": 1.2272417242478136e-05, "loss": 0.0827, "step": 32268 }, { "epoch": 0.5714690503703507, "grad_norm": 1.1992096900939941, "learning_rate": 1.2271571226069043e-05, "loss": 0.098, "step": 32269 }, { "epoch": 0.5714867599073791, "grad_norm": 0.27036917209625244, "learning_rate": 1.2270725218636201e-05, "loss": 0.0584, "step": 32270 }, { "epoch": 0.5715044694444076, "grad_norm": 0.525401771068573, "learning_rate": 1.2269879220182406e-05, "loss": 0.0532, "step": 32271 }, { "epoch": 0.571522178981436, "grad_norm": 0.6969798803329468, "learning_rate": 1.2269033230710429e-05, "loss": 0.0734, "step": 32272 }, { "epoch": 0.5715398885184644, "grad_norm": 0.4910256564617157, "learning_rate": 1.2268187250223053e-05, "loss": 0.065, "step": 32273 }, { "epoch": 0.5715575980554929, "grad_norm": 0.5379369258880615, "learning_rate": 1.2267341278723067e-05, "loss": 0.0492, "step": 32274 }, { "epoch": 0.5715753075925213, "grad_norm": 0.8838235139846802, "learning_rate": 1.2266495316213256e-05, "loss": 0.0816, "step": 32275 }, { "epoch": 0.5715930171295497, "grad_norm": 0.6320348381996155, "learning_rate": 1.2265649362696393e-05, "loss": 0.0498, "step": 32276 }, { "epoch": 0.5716107266665781, "grad_norm": 0.625988245010376, "learning_rate": 1.2264803418175267e-05, "loss": 0.0597, "step": 32277 }, { "epoch": 0.5716284362036066, "grad_norm": 0.790403425693512, "learning_rate": 1.2263957482652666e-05, "loss": 0.0785, "step": 32278 }, { "epoch": 0.571646145740635, "grad_norm": 0.6397426724433899, "learning_rate": 1.2263111556131363e-05, "loss": 0.0653, "step": 32279 }, { "epoch": 0.5716638552776634, "grad_norm": 0.6100596189498901, "learning_rate": 1.2262265638614147e-05, "loss": 0.0815, "step": 32280 }, { "epoch": 0.5716815648146918, "grad_norm": 0.393122136592865, "learning_rate": 1.22614197301038e-05, "loss": 0.0479, "step": 32281 }, { "epoch": 0.5716992743517203, "grad_norm": 0.5846192836761475, "learning_rate": 1.2260573830603114e-05, "loss": 0.0762, "step": 32282 }, { "epoch": 0.5717169838887487, "grad_norm": 0.9672908782958984, "learning_rate": 1.2259727940114854e-05, "loss": 0.0609, "step": 32283 }, { "epoch": 0.5717346934257771, "grad_norm": 0.4543990194797516, "learning_rate": 1.2258882058641813e-05, "loss": 0.0707, "step": 32284 }, { "epoch": 0.5717524029628055, "grad_norm": 0.8832933306694031, "learning_rate": 1.2258036186186775e-05, "loss": 0.0839, "step": 32285 }, { "epoch": 0.571770112499834, "grad_norm": 0.4891633093357086, "learning_rate": 1.2257190322752517e-05, "loss": 0.0553, "step": 32286 }, { "epoch": 0.5717878220368624, "grad_norm": 0.5743684768676758, "learning_rate": 1.2256344468341827e-05, "loss": 0.0464, "step": 32287 }, { "epoch": 0.5718055315738908, "grad_norm": 0.8019272685050964, "learning_rate": 1.2255498622957485e-05, "loss": 0.0777, "step": 32288 }, { "epoch": 0.5718232411109193, "grad_norm": 0.75167316198349, "learning_rate": 1.2254652786602278e-05, "loss": 0.0676, "step": 32289 }, { "epoch": 0.5718409506479477, "grad_norm": 0.6072346568107605, "learning_rate": 1.2253806959278983e-05, "loss": 0.0943, "step": 32290 }, { "epoch": 0.5718586601849761, "grad_norm": 0.5662788152694702, "learning_rate": 1.2252961140990387e-05, "loss": 0.0791, "step": 32291 }, { "epoch": 0.5718763697220045, "grad_norm": 0.6664140224456787, "learning_rate": 1.2252115331739275e-05, "loss": 0.081, "step": 32292 }, { "epoch": 0.571894079259033, "grad_norm": 0.8070420026779175, "learning_rate": 1.2251269531528417e-05, "loss": 0.0635, "step": 32293 }, { "epoch": 0.5719117887960614, "grad_norm": 1.0457162857055664, "learning_rate": 1.2250423740360606e-05, "loss": 0.0919, "step": 32294 }, { "epoch": 0.5719294983330898, "grad_norm": 0.8348034620285034, "learning_rate": 1.2249577958238625e-05, "loss": 0.0857, "step": 32295 }, { "epoch": 0.5719472078701182, "grad_norm": 0.6662620306015015, "learning_rate": 1.2248732185165259e-05, "loss": 0.0614, "step": 32296 }, { "epoch": 0.5719649174071467, "grad_norm": 0.5805836319923401, "learning_rate": 1.224788642114328e-05, "loss": 0.0444, "step": 32297 }, { "epoch": 0.5719826269441751, "grad_norm": 0.4776536226272583, "learning_rate": 1.2247040666175478e-05, "loss": 0.0395, "step": 32298 }, { "epoch": 0.5720003364812035, "grad_norm": 0.36464568972587585, "learning_rate": 1.2246194920264627e-05, "loss": 0.0579, "step": 32299 }, { "epoch": 0.5720180460182319, "grad_norm": 0.6842594742774963, "learning_rate": 1.224534918341353e-05, "loss": 0.0542, "step": 32300 }, { "epoch": 0.5720357555552604, "grad_norm": 0.6790611147880554, "learning_rate": 1.2244503455624947e-05, "loss": 0.06, "step": 32301 }, { "epoch": 0.5720534650922888, "grad_norm": 0.7147458791732788, "learning_rate": 1.2243657736901668e-05, "loss": 0.0778, "step": 32302 }, { "epoch": 0.5720711746293172, "grad_norm": 0.7267078757286072, "learning_rate": 1.224281202724648e-05, "loss": 0.0695, "step": 32303 }, { "epoch": 0.5720888841663457, "grad_norm": 0.9108293652534485, "learning_rate": 1.224196632666216e-05, "loss": 0.0584, "step": 32304 }, { "epoch": 0.5721065937033741, "grad_norm": 0.4060863256454468, "learning_rate": 1.224112063515149e-05, "loss": 0.081, "step": 32305 }, { "epoch": 0.5721243032404025, "grad_norm": 0.6275749802589417, "learning_rate": 1.2240274952717256e-05, "loss": 0.0841, "step": 32306 }, { "epoch": 0.5721420127774309, "grad_norm": 0.6626797318458557, "learning_rate": 1.2239429279362238e-05, "loss": 0.0659, "step": 32307 }, { "epoch": 0.5721597223144594, "grad_norm": 0.6982342004776001, "learning_rate": 1.2238583615089218e-05, "loss": 0.0685, "step": 32308 }, { "epoch": 0.5721774318514878, "grad_norm": 0.586925745010376, "learning_rate": 1.2237737959900978e-05, "loss": 0.0597, "step": 32309 }, { "epoch": 0.5721951413885162, "grad_norm": 0.42961859703063965, "learning_rate": 1.2236892313800307e-05, "loss": 0.0513, "step": 32310 }, { "epoch": 0.5722128509255446, "grad_norm": 0.710965633392334, "learning_rate": 1.2236046676789974e-05, "loss": 0.0807, "step": 32311 }, { "epoch": 0.5722305604625731, "grad_norm": 0.7290685176849365, "learning_rate": 1.2235201048872768e-05, "loss": 0.0561, "step": 32312 }, { "epoch": 0.5722482699996015, "grad_norm": 0.33811110258102417, "learning_rate": 1.2234355430051473e-05, "loss": 0.0763, "step": 32313 }, { "epoch": 0.5722659795366299, "grad_norm": 0.7535368800163269, "learning_rate": 1.223350982032887e-05, "loss": 0.0664, "step": 32314 }, { "epoch": 0.5722836890736583, "grad_norm": 0.5882701277732849, "learning_rate": 1.2232664219707738e-05, "loss": 0.053, "step": 32315 }, { "epoch": 0.5723013986106869, "grad_norm": 0.48000484704971313, "learning_rate": 1.2231818628190858e-05, "loss": 0.0506, "step": 32316 }, { "epoch": 0.5723191081477152, "grad_norm": 0.6548928022384644, "learning_rate": 1.2230973045781021e-05, "loss": 0.0793, "step": 32317 }, { "epoch": 0.5723368176847436, "grad_norm": 0.7240139842033386, "learning_rate": 1.2230127472480998e-05, "loss": 0.065, "step": 32318 }, { "epoch": 0.5723545272217722, "grad_norm": 0.5432125329971313, "learning_rate": 1.2229281908293577e-05, "loss": 0.0652, "step": 32319 }, { "epoch": 0.5723722367588006, "grad_norm": 1.0107451677322388, "learning_rate": 1.2228436353221534e-05, "loss": 0.0694, "step": 32320 }, { "epoch": 0.572389946295829, "grad_norm": 0.5284342169761658, "learning_rate": 1.2227590807267666e-05, "loss": 0.0492, "step": 32321 }, { "epoch": 0.5724076558328574, "grad_norm": 0.5597767233848572, "learning_rate": 1.2226745270434734e-05, "loss": 0.0743, "step": 32322 }, { "epoch": 0.5724253653698859, "grad_norm": 0.7422924041748047, "learning_rate": 1.222589974272553e-05, "loss": 0.0701, "step": 32323 }, { "epoch": 0.5724430749069143, "grad_norm": 0.6623713970184326, "learning_rate": 1.2225054224142845e-05, "loss": 0.0778, "step": 32324 }, { "epoch": 0.5724607844439427, "grad_norm": 0.46693143248558044, "learning_rate": 1.222420871468944e-05, "loss": 0.062, "step": 32325 }, { "epoch": 0.5724784939809711, "grad_norm": 0.7373611330986023, "learning_rate": 1.2223363214368112e-05, "loss": 0.0852, "step": 32326 }, { "epoch": 0.5724962035179996, "grad_norm": 0.7805116772651672, "learning_rate": 1.2222517723181635e-05, "loss": 0.0752, "step": 32327 }, { "epoch": 0.572513913055028, "grad_norm": 0.5998212099075317, "learning_rate": 1.22216722411328e-05, "loss": 0.0724, "step": 32328 }, { "epoch": 0.5725316225920564, "grad_norm": 0.5144466161727905, "learning_rate": 1.2220826768224378e-05, "loss": 0.0709, "step": 32329 }, { "epoch": 0.5725493321290848, "grad_norm": 1.1800036430358887, "learning_rate": 1.2219981304459154e-05, "loss": 0.1302, "step": 32330 }, { "epoch": 0.5725670416661133, "grad_norm": 0.6827116012573242, "learning_rate": 1.2219135849839911e-05, "loss": 0.0467, "step": 32331 }, { "epoch": 0.5725847512031417, "grad_norm": 0.781830906867981, "learning_rate": 1.2218290404369432e-05, "loss": 0.0829, "step": 32332 }, { "epoch": 0.5726024607401701, "grad_norm": 0.4656483829021454, "learning_rate": 1.2217444968050492e-05, "loss": 0.0814, "step": 32333 }, { "epoch": 0.5726201702771986, "grad_norm": 0.6207677125930786, "learning_rate": 1.221659954088588e-05, "loss": 0.0697, "step": 32334 }, { "epoch": 0.572637879814227, "grad_norm": 0.7813323736190796, "learning_rate": 1.2215754122878377e-05, "loss": 0.0956, "step": 32335 }, { "epoch": 0.5726555893512554, "grad_norm": 0.5504900217056274, "learning_rate": 1.2214908714030756e-05, "loss": 0.0567, "step": 32336 }, { "epoch": 0.5726732988882838, "grad_norm": 0.6598050594329834, "learning_rate": 1.2214063314345803e-05, "loss": 0.0839, "step": 32337 }, { "epoch": 0.5726910084253123, "grad_norm": 0.6640232801437378, "learning_rate": 1.2213217923826303e-05, "loss": 0.0628, "step": 32338 }, { "epoch": 0.5727087179623407, "grad_norm": 0.7692962288856506, "learning_rate": 1.2212372542475036e-05, "loss": 0.088, "step": 32339 }, { "epoch": 0.5727264274993691, "grad_norm": 0.7975624203681946, "learning_rate": 1.2211527170294778e-05, "loss": 0.094, "step": 32340 }, { "epoch": 0.5727441370363975, "grad_norm": 0.46629154682159424, "learning_rate": 1.2210681807288314e-05, "loss": 0.0379, "step": 32341 }, { "epoch": 0.572761846573426, "grad_norm": 0.8004587292671204, "learning_rate": 1.2209836453458427e-05, "loss": 0.0706, "step": 32342 }, { "epoch": 0.5727795561104544, "grad_norm": 0.8534315228462219, "learning_rate": 1.2208991108807892e-05, "loss": 0.1047, "step": 32343 }, { "epoch": 0.5727972656474828, "grad_norm": 0.5655636787414551, "learning_rate": 1.2208145773339496e-05, "loss": 0.0431, "step": 32344 }, { "epoch": 0.5728149751845112, "grad_norm": 0.4891910254955292, "learning_rate": 1.2207300447056016e-05, "loss": 0.0633, "step": 32345 }, { "epoch": 0.5728326847215397, "grad_norm": 0.6914149522781372, "learning_rate": 1.2206455129960239e-05, "loss": 0.0621, "step": 32346 }, { "epoch": 0.5728503942585681, "grad_norm": 0.4438014030456543, "learning_rate": 1.220560982205494e-05, "loss": 0.0926, "step": 32347 }, { "epoch": 0.5728681037955965, "grad_norm": 0.7600774765014648, "learning_rate": 1.2204764523342902e-05, "loss": 0.0873, "step": 32348 }, { "epoch": 0.572885813332625, "grad_norm": 0.6684507131576538, "learning_rate": 1.220391923382691e-05, "loss": 0.0468, "step": 32349 }, { "epoch": 0.5729035228696534, "grad_norm": 0.7657060027122498, "learning_rate": 1.2203073953509737e-05, "loss": 0.0613, "step": 32350 }, { "epoch": 0.5729212324066818, "grad_norm": 0.4223412871360779, "learning_rate": 1.2202228682394164e-05, "loss": 0.0642, "step": 32351 }, { "epoch": 0.5729389419437102, "grad_norm": 0.6219479441642761, "learning_rate": 1.2201383420482978e-05, "loss": 0.1027, "step": 32352 }, { "epoch": 0.5729566514807387, "grad_norm": 0.8479256629943848, "learning_rate": 1.2200538167778963e-05, "loss": 0.0769, "step": 32353 }, { "epoch": 0.5729743610177671, "grad_norm": 1.1867541074752808, "learning_rate": 1.2199692924284889e-05, "loss": 0.0663, "step": 32354 }, { "epoch": 0.5729920705547955, "grad_norm": 0.33982089161872864, "learning_rate": 1.2198847690003543e-05, "loss": 0.0631, "step": 32355 }, { "epoch": 0.5730097800918239, "grad_norm": 0.5121232867240906, "learning_rate": 1.2198002464937706e-05, "loss": 0.0698, "step": 32356 }, { "epoch": 0.5730274896288524, "grad_norm": 0.5343014597892761, "learning_rate": 1.2197157249090157e-05, "loss": 0.053, "step": 32357 }, { "epoch": 0.5730451991658808, "grad_norm": 0.4729542136192322, "learning_rate": 1.2196312042463674e-05, "loss": 0.0629, "step": 32358 }, { "epoch": 0.5730629087029092, "grad_norm": 0.5017275810241699, "learning_rate": 1.2195466845061043e-05, "loss": 0.0506, "step": 32359 }, { "epoch": 0.5730806182399376, "grad_norm": 0.4583207070827484, "learning_rate": 1.2194621656885046e-05, "loss": 0.0438, "step": 32360 }, { "epoch": 0.5730983277769661, "grad_norm": 0.4507865607738495, "learning_rate": 1.2193776477938457e-05, "loss": 0.0538, "step": 32361 }, { "epoch": 0.5731160373139945, "grad_norm": 0.7938979864120483, "learning_rate": 1.2192931308224057e-05, "loss": 0.0742, "step": 32362 }, { "epoch": 0.5731337468510229, "grad_norm": 0.8284518718719482, "learning_rate": 1.2192086147744638e-05, "loss": 0.0644, "step": 32363 }, { "epoch": 0.5731514563880514, "grad_norm": 0.46760138869285583, "learning_rate": 1.2191240996502963e-05, "loss": 0.0657, "step": 32364 }, { "epoch": 0.5731691659250798, "grad_norm": 0.6427384614944458, "learning_rate": 1.2190395854501819e-05, "loss": 0.0542, "step": 32365 }, { "epoch": 0.5731868754621082, "grad_norm": 0.442850798368454, "learning_rate": 1.2189550721743992e-05, "loss": 0.0415, "step": 32366 }, { "epoch": 0.5732045849991366, "grad_norm": 0.8088782429695129, "learning_rate": 1.2188705598232265e-05, "loss": 0.0789, "step": 32367 }, { "epoch": 0.5732222945361651, "grad_norm": 0.5870975255966187, "learning_rate": 1.2187860483969406e-05, "loss": 0.046, "step": 32368 }, { "epoch": 0.5732400040731935, "grad_norm": 0.7560549378395081, "learning_rate": 1.21870153789582e-05, "loss": 0.0808, "step": 32369 }, { "epoch": 0.5732577136102219, "grad_norm": 0.7089276909828186, "learning_rate": 1.218617028320143e-05, "loss": 0.0765, "step": 32370 }, { "epoch": 0.5732754231472503, "grad_norm": 0.3645717203617096, "learning_rate": 1.2185325196701879e-05, "loss": 0.044, "step": 32371 }, { "epoch": 0.5732931326842788, "grad_norm": 0.5834230780601501, "learning_rate": 1.218448011946232e-05, "loss": 0.0529, "step": 32372 }, { "epoch": 0.5733108422213072, "grad_norm": 0.7310523390769958, "learning_rate": 1.2183635051485536e-05, "loss": 0.076, "step": 32373 }, { "epoch": 0.5733285517583356, "grad_norm": 1.0679481029510498, "learning_rate": 1.2182789992774312e-05, "loss": 0.0808, "step": 32374 }, { "epoch": 0.573346261295364, "grad_norm": 0.7471297383308411, "learning_rate": 1.218194494333142e-05, "loss": 0.0445, "step": 32375 }, { "epoch": 0.5733639708323925, "grad_norm": 0.7644641995429993, "learning_rate": 1.2181099903159642e-05, "loss": 0.0898, "step": 32376 }, { "epoch": 0.5733816803694209, "grad_norm": 0.480677992105484, "learning_rate": 1.2180254872261763e-05, "loss": 0.0851, "step": 32377 }, { "epoch": 0.5733993899064493, "grad_norm": 0.9078253507614136, "learning_rate": 1.2179409850640568e-05, "loss": 0.0925, "step": 32378 }, { "epoch": 0.5734170994434779, "grad_norm": 0.6964321732521057, "learning_rate": 1.2178564838298822e-05, "loss": 0.0776, "step": 32379 }, { "epoch": 0.5734348089805062, "grad_norm": 0.7080230712890625, "learning_rate": 1.2177719835239307e-05, "loss": 0.0677, "step": 32380 }, { "epoch": 0.5734525185175346, "grad_norm": 0.5588104724884033, "learning_rate": 1.217687484146482e-05, "loss": 0.0882, "step": 32381 }, { "epoch": 0.573470228054563, "grad_norm": 0.9109691381454468, "learning_rate": 1.217602985697812e-05, "loss": 0.0856, "step": 32382 }, { "epoch": 0.5734879375915916, "grad_norm": 0.5419039130210876, "learning_rate": 1.2175184881781999e-05, "loss": 0.0772, "step": 32383 }, { "epoch": 0.57350564712862, "grad_norm": 0.4023682177066803, "learning_rate": 1.2174339915879233e-05, "loss": 0.054, "step": 32384 }, { "epoch": 0.5735233566656484, "grad_norm": 0.48188483715057373, "learning_rate": 1.2173494959272607e-05, "loss": 0.0639, "step": 32385 }, { "epoch": 0.5735410662026768, "grad_norm": 0.7157992720603943, "learning_rate": 1.2172650011964892e-05, "loss": 0.0844, "step": 32386 }, { "epoch": 0.5735587757397053, "grad_norm": 0.9809737801551819, "learning_rate": 1.2171805073958873e-05, "loss": 0.0721, "step": 32387 }, { "epoch": 0.5735764852767337, "grad_norm": 0.6160092949867249, "learning_rate": 1.2170960145257333e-05, "loss": 0.0579, "step": 32388 }, { "epoch": 0.5735941948137621, "grad_norm": 0.6107267737388611, "learning_rate": 1.2170115225863043e-05, "loss": 0.067, "step": 32389 }, { "epoch": 0.5736119043507905, "grad_norm": 0.6784172058105469, "learning_rate": 1.216927031577879e-05, "loss": 0.0684, "step": 32390 }, { "epoch": 0.573629613887819, "grad_norm": 0.6802480220794678, "learning_rate": 1.216842541500735e-05, "loss": 0.0766, "step": 32391 }, { "epoch": 0.5736473234248474, "grad_norm": 1.0088895559310913, "learning_rate": 1.216758052355151e-05, "loss": 0.0982, "step": 32392 }, { "epoch": 0.5736650329618758, "grad_norm": 0.39584478735923767, "learning_rate": 1.2166735641414038e-05, "loss": 0.075, "step": 32393 }, { "epoch": 0.5736827424989043, "grad_norm": 0.8756899833679199, "learning_rate": 1.2165890768597716e-05, "loss": 0.0895, "step": 32394 }, { "epoch": 0.5737004520359327, "grad_norm": 0.44068431854248047, "learning_rate": 1.2165045905105336e-05, "loss": 0.0591, "step": 32395 }, { "epoch": 0.5737181615729611, "grad_norm": 0.88874751329422, "learning_rate": 1.2164201050939662e-05, "loss": 0.0864, "step": 32396 }, { "epoch": 0.5737358711099895, "grad_norm": 0.538183331489563, "learning_rate": 1.216335620610348e-05, "loss": 0.0414, "step": 32397 }, { "epoch": 0.573753580647018, "grad_norm": 0.6072931885719299, "learning_rate": 1.2162511370599568e-05, "loss": 0.0742, "step": 32398 }, { "epoch": 0.5737712901840464, "grad_norm": 0.6785829067230225, "learning_rate": 1.2161666544430713e-05, "loss": 0.0857, "step": 32399 }, { "epoch": 0.5737889997210748, "grad_norm": 0.46971678733825684, "learning_rate": 1.2160821727599682e-05, "loss": 0.0487, "step": 32400 }, { "epoch": 0.5738067092581032, "grad_norm": 0.8233420848846436, "learning_rate": 1.2159976920109261e-05, "loss": 0.0989, "step": 32401 }, { "epoch": 0.5738244187951317, "grad_norm": 0.5218852758407593, "learning_rate": 1.2159132121962227e-05, "loss": 0.0576, "step": 32402 }, { "epoch": 0.5738421283321601, "grad_norm": 0.17010213434696198, "learning_rate": 1.2158287333161368e-05, "loss": 0.0571, "step": 32403 }, { "epoch": 0.5738598378691885, "grad_norm": 0.427261620759964, "learning_rate": 1.2157442553709449e-05, "loss": 0.0447, "step": 32404 }, { "epoch": 0.5738775474062169, "grad_norm": 0.6209496855735779, "learning_rate": 1.2156597783609258e-05, "loss": 0.0749, "step": 32405 }, { "epoch": 0.5738952569432454, "grad_norm": 0.48512229323387146, "learning_rate": 1.215575302286358e-05, "loss": 0.0372, "step": 32406 }, { "epoch": 0.5739129664802738, "grad_norm": 0.47119367122650146, "learning_rate": 1.2154908271475179e-05, "loss": 0.0493, "step": 32407 }, { "epoch": 0.5739306760173022, "grad_norm": 0.6505542397499084, "learning_rate": 1.2154063529446844e-05, "loss": 0.0749, "step": 32408 }, { "epoch": 0.5739483855543307, "grad_norm": 0.8096152544021606, "learning_rate": 1.2153218796781349e-05, "loss": 0.0714, "step": 32409 }, { "epoch": 0.5739660950913591, "grad_norm": 0.6691958904266357, "learning_rate": 1.2152374073481483e-05, "loss": 0.0849, "step": 32410 }, { "epoch": 0.5739838046283875, "grad_norm": 0.5882213711738586, "learning_rate": 1.2151529359550014e-05, "loss": 0.0756, "step": 32411 }, { "epoch": 0.5740015141654159, "grad_norm": 1.0222456455230713, "learning_rate": 1.2150684654989724e-05, "loss": 0.0717, "step": 32412 }, { "epoch": 0.5740192237024444, "grad_norm": 0.48263105750083923, "learning_rate": 1.2149839959803399e-05, "loss": 0.0575, "step": 32413 }, { "epoch": 0.5740369332394728, "grad_norm": 0.6654754877090454, "learning_rate": 1.2148995273993806e-05, "loss": 0.0766, "step": 32414 }, { "epoch": 0.5740546427765012, "grad_norm": 0.49065086245536804, "learning_rate": 1.2148150597563733e-05, "loss": 0.0579, "step": 32415 }, { "epoch": 0.5740723523135296, "grad_norm": 0.8259304761886597, "learning_rate": 1.2147305930515956e-05, "loss": 0.0957, "step": 32416 }, { "epoch": 0.5740900618505581, "grad_norm": 0.2967986762523651, "learning_rate": 1.2146461272853254e-05, "loss": 0.0539, "step": 32417 }, { "epoch": 0.5741077713875865, "grad_norm": 0.5403550863265991, "learning_rate": 1.2145616624578405e-05, "loss": 0.0565, "step": 32418 }, { "epoch": 0.5741254809246149, "grad_norm": 0.7558165788650513, "learning_rate": 1.2144771985694187e-05, "loss": 0.06, "step": 32419 }, { "epoch": 0.5741431904616433, "grad_norm": 0.8278915286064148, "learning_rate": 1.2143927356203388e-05, "loss": 0.0878, "step": 32420 }, { "epoch": 0.5741608999986718, "grad_norm": 0.34818682074546814, "learning_rate": 1.2143082736108773e-05, "loss": 0.0801, "step": 32421 }, { "epoch": 0.5741786095357002, "grad_norm": 0.7869198322296143, "learning_rate": 1.2142238125413127e-05, "loss": 0.0765, "step": 32422 }, { "epoch": 0.5741963190727286, "grad_norm": 0.9124706983566284, "learning_rate": 1.2141393524119223e-05, "loss": 0.0652, "step": 32423 }, { "epoch": 0.5742140286097571, "grad_norm": 0.9609746336936951, "learning_rate": 1.2140548932229857e-05, "loss": 0.0799, "step": 32424 }, { "epoch": 0.5742317381467855, "grad_norm": 0.8219453692436218, "learning_rate": 1.2139704349747789e-05, "loss": 0.0605, "step": 32425 }, { "epoch": 0.5742494476838139, "grad_norm": 0.564508855342865, "learning_rate": 1.2138859776675803e-05, "loss": 0.0553, "step": 32426 }, { "epoch": 0.5742671572208423, "grad_norm": 0.7843888998031616, "learning_rate": 1.2138015213016686e-05, "loss": 0.077, "step": 32427 }, { "epoch": 0.5742848667578708, "grad_norm": 0.8590216040611267, "learning_rate": 1.2137170658773203e-05, "loss": 0.0578, "step": 32428 }, { "epoch": 0.5743025762948992, "grad_norm": 0.43859782814979553, "learning_rate": 1.2136326113948138e-05, "loss": 0.0531, "step": 32429 }, { "epoch": 0.5743202858319276, "grad_norm": 0.9278826117515564, "learning_rate": 1.2135481578544273e-05, "loss": 0.0813, "step": 32430 }, { "epoch": 0.574337995368956, "grad_norm": 0.5101181864738464, "learning_rate": 1.2134637052564387e-05, "loss": 0.0442, "step": 32431 }, { "epoch": 0.5743557049059845, "grad_norm": 0.6273530721664429, "learning_rate": 1.2133792536011252e-05, "loss": 0.059, "step": 32432 }, { "epoch": 0.5743734144430129, "grad_norm": 0.8281123042106628, "learning_rate": 1.2132948028887647e-05, "loss": 0.0584, "step": 32433 }, { "epoch": 0.5743911239800413, "grad_norm": 0.6268115043640137, "learning_rate": 1.2132103531196355e-05, "loss": 0.0842, "step": 32434 }, { "epoch": 0.5744088335170697, "grad_norm": 0.5917453765869141, "learning_rate": 1.2131259042940159e-05, "loss": 0.0459, "step": 32435 }, { "epoch": 0.5744265430540982, "grad_norm": 0.7903291583061218, "learning_rate": 1.2130414564121825e-05, "loss": 0.0788, "step": 32436 }, { "epoch": 0.5744442525911266, "grad_norm": 0.827959418296814, "learning_rate": 1.2129570094744135e-05, "loss": 0.0685, "step": 32437 }, { "epoch": 0.574461962128155, "grad_norm": 0.6305457949638367, "learning_rate": 1.2128725634809874e-05, "loss": 0.0384, "step": 32438 }, { "epoch": 0.5744796716651835, "grad_norm": 0.49221083521842957, "learning_rate": 1.2127881184321809e-05, "loss": 0.1022, "step": 32439 }, { "epoch": 0.5744973812022119, "grad_norm": 0.419148713350296, "learning_rate": 1.2127036743282727e-05, "loss": 0.0759, "step": 32440 }, { "epoch": 0.5745150907392403, "grad_norm": 0.48202842473983765, "learning_rate": 1.2126192311695403e-05, "loss": 0.0373, "step": 32441 }, { "epoch": 0.5745328002762687, "grad_norm": 0.5096094608306885, "learning_rate": 1.2125347889562618e-05, "loss": 0.0468, "step": 32442 }, { "epoch": 0.5745505098132972, "grad_norm": 0.6397266983985901, "learning_rate": 1.2124503476887146e-05, "loss": 0.0834, "step": 32443 }, { "epoch": 0.5745682193503256, "grad_norm": 0.38627392053604126, "learning_rate": 1.2123659073671766e-05, "loss": 0.0686, "step": 32444 }, { "epoch": 0.574585928887354, "grad_norm": 0.8453919291496277, "learning_rate": 1.2122814679919263e-05, "loss": 0.0469, "step": 32445 }, { "epoch": 0.5746036384243824, "grad_norm": 0.7250256538391113, "learning_rate": 1.2121970295632398e-05, "loss": 0.0677, "step": 32446 }, { "epoch": 0.574621347961411, "grad_norm": 0.2806500494480133, "learning_rate": 1.2121125920813966e-05, "loss": 0.0622, "step": 32447 }, { "epoch": 0.5746390574984394, "grad_norm": 0.6468021273612976, "learning_rate": 1.2120281555466735e-05, "loss": 0.0625, "step": 32448 }, { "epoch": 0.5746567670354678, "grad_norm": 0.5291807055473328, "learning_rate": 1.2119437199593494e-05, "loss": 0.0676, "step": 32449 }, { "epoch": 0.5746744765724962, "grad_norm": 0.41650116443634033, "learning_rate": 1.2118592853197007e-05, "loss": 0.0821, "step": 32450 }, { "epoch": 0.5746921861095247, "grad_norm": 1.017872929573059, "learning_rate": 1.211774851628006e-05, "loss": 0.0671, "step": 32451 }, { "epoch": 0.5747098956465531, "grad_norm": 0.7099055051803589, "learning_rate": 1.211690418884543e-05, "loss": 0.072, "step": 32452 }, { "epoch": 0.5747276051835815, "grad_norm": 0.81865394115448, "learning_rate": 1.211605987089589e-05, "loss": 0.0913, "step": 32453 }, { "epoch": 0.57474531472061, "grad_norm": 0.4099896550178528, "learning_rate": 1.2115215562434222e-05, "loss": 0.0526, "step": 32454 }, { "epoch": 0.5747630242576384, "grad_norm": 0.4651033282279968, "learning_rate": 1.2114371263463204e-05, "loss": 0.0403, "step": 32455 }, { "epoch": 0.5747807337946668, "grad_norm": 0.6169385313987732, "learning_rate": 1.2113526973985613e-05, "loss": 0.0586, "step": 32456 }, { "epoch": 0.5747984433316952, "grad_norm": 0.7861030101776123, "learning_rate": 1.2112682694004227e-05, "loss": 0.0887, "step": 32457 }, { "epoch": 0.5748161528687237, "grad_norm": 0.7270211577415466, "learning_rate": 1.211183842352182e-05, "loss": 0.0476, "step": 32458 }, { "epoch": 0.5748338624057521, "grad_norm": 0.45152029395103455, "learning_rate": 1.211099416254118e-05, "loss": 0.049, "step": 32459 }, { "epoch": 0.5748515719427805, "grad_norm": 0.44826963543891907, "learning_rate": 1.2110149911065067e-05, "loss": 0.0618, "step": 32460 }, { "epoch": 0.5748692814798089, "grad_norm": 0.501575767993927, "learning_rate": 1.2109305669096273e-05, "loss": 0.0741, "step": 32461 }, { "epoch": 0.5748869910168374, "grad_norm": 0.5697761178016663, "learning_rate": 1.2108461436637569e-05, "loss": 0.0664, "step": 32462 }, { "epoch": 0.5749047005538658, "grad_norm": 1.06271493434906, "learning_rate": 1.2107617213691743e-05, "loss": 0.0828, "step": 32463 }, { "epoch": 0.5749224100908942, "grad_norm": 0.5981706976890564, "learning_rate": 1.2106773000261555e-05, "loss": 0.0386, "step": 32464 }, { "epoch": 0.5749401196279226, "grad_norm": 0.6460986137390137, "learning_rate": 1.2105928796349793e-05, "loss": 0.0872, "step": 32465 }, { "epoch": 0.5749578291649511, "grad_norm": 0.48414474725723267, "learning_rate": 1.2105084601959234e-05, "loss": 0.0485, "step": 32466 }, { "epoch": 0.5749755387019795, "grad_norm": 0.6111769080162048, "learning_rate": 1.2104240417092653e-05, "loss": 0.06, "step": 32467 }, { "epoch": 0.5749932482390079, "grad_norm": 0.4771108329296112, "learning_rate": 1.2103396241752828e-05, "loss": 0.0561, "step": 32468 }, { "epoch": 0.5750109577760364, "grad_norm": 0.48700639605522156, "learning_rate": 1.2102552075942535e-05, "loss": 0.0565, "step": 32469 }, { "epoch": 0.5750286673130648, "grad_norm": 0.5095832347869873, "learning_rate": 1.2101707919664556e-05, "loss": 0.0666, "step": 32470 }, { "epoch": 0.5750463768500932, "grad_norm": 0.48582878708839417, "learning_rate": 1.210086377292166e-05, "loss": 0.0614, "step": 32471 }, { "epoch": 0.5750640863871216, "grad_norm": 0.9281229972839355, "learning_rate": 1.2100019635716631e-05, "loss": 0.0618, "step": 32472 }, { "epoch": 0.5750817959241501, "grad_norm": 0.842664361000061, "learning_rate": 1.2099175508052245e-05, "loss": 0.0902, "step": 32473 }, { "epoch": 0.5750995054611785, "grad_norm": 0.393807977437973, "learning_rate": 1.2098331389931282e-05, "loss": 0.0643, "step": 32474 }, { "epoch": 0.5751172149982069, "grad_norm": 0.5511142611503601, "learning_rate": 1.2097487281356507e-05, "loss": 0.0723, "step": 32475 }, { "epoch": 0.5751349245352353, "grad_norm": 0.7042414546012878, "learning_rate": 1.209664318233071e-05, "loss": 0.0606, "step": 32476 }, { "epoch": 0.5751526340722638, "grad_norm": 0.38865014910697937, "learning_rate": 1.2095799092856667e-05, "loss": 0.0742, "step": 32477 }, { "epoch": 0.5751703436092922, "grad_norm": 0.5901826620101929, "learning_rate": 1.2094955012937146e-05, "loss": 0.0876, "step": 32478 }, { "epoch": 0.5751880531463206, "grad_norm": 0.6261412501335144, "learning_rate": 1.2094110942574929e-05, "loss": 0.0699, "step": 32479 }, { "epoch": 0.575205762683349, "grad_norm": 0.47428980469703674, "learning_rate": 1.2093266881772794e-05, "loss": 0.0488, "step": 32480 }, { "epoch": 0.5752234722203775, "grad_norm": 0.7128209471702576, "learning_rate": 1.209242283053352e-05, "loss": 0.0578, "step": 32481 }, { "epoch": 0.5752411817574059, "grad_norm": 0.8373169302940369, "learning_rate": 1.2091578788859876e-05, "loss": 0.0848, "step": 32482 }, { "epoch": 0.5752588912944343, "grad_norm": 0.6406180262565613, "learning_rate": 1.2090734756754643e-05, "loss": 0.0697, "step": 32483 }, { "epoch": 0.5752766008314628, "grad_norm": 0.551271915435791, "learning_rate": 1.2089890734220605e-05, "loss": 0.0862, "step": 32484 }, { "epoch": 0.5752943103684912, "grad_norm": 0.6966299414634705, "learning_rate": 1.2089046721260526e-05, "loss": 0.0886, "step": 32485 }, { "epoch": 0.5753120199055196, "grad_norm": 0.5588893294334412, "learning_rate": 1.2088202717877192e-05, "loss": 0.1065, "step": 32486 }, { "epoch": 0.575329729442548, "grad_norm": 0.5434717535972595, "learning_rate": 1.2087358724073375e-05, "loss": 0.0635, "step": 32487 }, { "epoch": 0.5753474389795765, "grad_norm": 0.7191351056098938, "learning_rate": 1.2086514739851856e-05, "loss": 0.0725, "step": 32488 }, { "epoch": 0.5753651485166049, "grad_norm": 0.45205944776535034, "learning_rate": 1.2085670765215404e-05, "loss": 0.0491, "step": 32489 }, { "epoch": 0.5753828580536333, "grad_norm": 0.4977267384529114, "learning_rate": 1.2084826800166802e-05, "loss": 0.0564, "step": 32490 }, { "epoch": 0.5754005675906617, "grad_norm": 0.7814485430717468, "learning_rate": 1.208398284470883e-05, "loss": 0.0441, "step": 32491 }, { "epoch": 0.5754182771276902, "grad_norm": 0.6711365580558777, "learning_rate": 1.2083138898844255e-05, "loss": 0.0609, "step": 32492 }, { "epoch": 0.5754359866647186, "grad_norm": 0.5320058465003967, "learning_rate": 1.2082294962575858e-05, "loss": 0.0594, "step": 32493 }, { "epoch": 0.575453696201747, "grad_norm": 0.11606691032648087, "learning_rate": 1.2081451035906415e-05, "loss": 0.0509, "step": 32494 }, { "epoch": 0.5754714057387754, "grad_norm": 0.7267455458641052, "learning_rate": 1.2080607118838706e-05, "loss": 0.1053, "step": 32495 }, { "epoch": 0.5754891152758039, "grad_norm": 0.8202080726623535, "learning_rate": 1.2079763211375501e-05, "loss": 0.0521, "step": 32496 }, { "epoch": 0.5755068248128323, "grad_norm": 0.6326993107795715, "learning_rate": 1.207891931351958e-05, "loss": 0.0615, "step": 32497 }, { "epoch": 0.5755245343498607, "grad_norm": 0.8478977084159851, "learning_rate": 1.2078075425273722e-05, "loss": 0.0865, "step": 32498 }, { "epoch": 0.5755422438868892, "grad_norm": 0.8976388573646545, "learning_rate": 1.2077231546640697e-05, "loss": 0.0721, "step": 32499 }, { "epoch": 0.5755599534239176, "grad_norm": 0.3821180760860443, "learning_rate": 1.2076387677623284e-05, "loss": 0.0796, "step": 32500 }, { "epoch": 0.575577662960946, "grad_norm": 0.5218533277511597, "learning_rate": 1.2075543818224261e-05, "loss": 0.0798, "step": 32501 }, { "epoch": 0.5755953724979744, "grad_norm": 1.0489072799682617, "learning_rate": 1.207469996844641e-05, "loss": 0.0923, "step": 32502 }, { "epoch": 0.5756130820350029, "grad_norm": 0.6149494051933289, "learning_rate": 1.2073856128292492e-05, "loss": 0.0687, "step": 32503 }, { "epoch": 0.5756307915720313, "grad_norm": 0.7341240048408508, "learning_rate": 1.2073012297765289e-05, "loss": 0.0757, "step": 32504 }, { "epoch": 0.5756485011090597, "grad_norm": 0.5473954081535339, "learning_rate": 1.2072168476867583e-05, "loss": 0.0889, "step": 32505 }, { "epoch": 0.5756662106460881, "grad_norm": 0.597686767578125, "learning_rate": 1.2071324665602152e-05, "loss": 0.0536, "step": 32506 }, { "epoch": 0.5756839201831166, "grad_norm": 0.6051100492477417, "learning_rate": 1.207048086397176e-05, "loss": 0.0818, "step": 32507 }, { "epoch": 0.575701629720145, "grad_norm": 0.7597898840904236, "learning_rate": 1.2069637071979193e-05, "loss": 0.0817, "step": 32508 }, { "epoch": 0.5757193392571734, "grad_norm": 0.5843770503997803, "learning_rate": 1.2068793289627224e-05, "loss": 0.0874, "step": 32509 }, { "epoch": 0.5757370487942018, "grad_norm": 0.5796022415161133, "learning_rate": 1.2067949516918625e-05, "loss": 0.0737, "step": 32510 }, { "epoch": 0.5757547583312304, "grad_norm": 0.632930338382721, "learning_rate": 1.2067105753856176e-05, "loss": 0.0606, "step": 32511 }, { "epoch": 0.5757724678682588, "grad_norm": 0.22028811275959015, "learning_rate": 1.2066262000442654e-05, "loss": 0.0586, "step": 32512 }, { "epoch": 0.5757901774052872, "grad_norm": 0.7525059580802917, "learning_rate": 1.2065418256680833e-05, "loss": 0.0551, "step": 32513 }, { "epoch": 0.5758078869423157, "grad_norm": 1.0714716911315918, "learning_rate": 1.2064574522573489e-05, "loss": 0.065, "step": 32514 }, { "epoch": 0.5758255964793441, "grad_norm": 1.0989458560943604, "learning_rate": 1.2063730798123396e-05, "loss": 0.057, "step": 32515 }, { "epoch": 0.5758433060163725, "grad_norm": 0.6193802356719971, "learning_rate": 1.206288708333334e-05, "loss": 0.0754, "step": 32516 }, { "epoch": 0.5758610155534009, "grad_norm": 0.37055426836013794, "learning_rate": 1.2062043378206081e-05, "loss": 0.0536, "step": 32517 }, { "epoch": 0.5758787250904294, "grad_norm": 0.9183078408241272, "learning_rate": 1.20611996827444e-05, "loss": 0.0777, "step": 32518 }, { "epoch": 0.5758964346274578, "grad_norm": 0.6108722686767578, "learning_rate": 1.2060355996951078e-05, "loss": 0.0472, "step": 32519 }, { "epoch": 0.5759141441644862, "grad_norm": 0.8324161767959595, "learning_rate": 1.2059512320828892e-05, "loss": 0.0935, "step": 32520 }, { "epoch": 0.5759318537015146, "grad_norm": 0.6918834447860718, "learning_rate": 1.2058668654380609e-05, "loss": 0.0961, "step": 32521 }, { "epoch": 0.5759495632385431, "grad_norm": 0.7083547711372375, "learning_rate": 1.2057824997609006e-05, "loss": 0.0775, "step": 32522 }, { "epoch": 0.5759672727755715, "grad_norm": 0.5993508100509644, "learning_rate": 1.2056981350516867e-05, "loss": 0.0729, "step": 32523 }, { "epoch": 0.5759849823125999, "grad_norm": 0.6621010303497314, "learning_rate": 1.2056137713106956e-05, "loss": 0.0596, "step": 32524 }, { "epoch": 0.5760026918496283, "grad_norm": 0.4864442050457001, "learning_rate": 1.2055294085382054e-05, "loss": 0.0628, "step": 32525 }, { "epoch": 0.5760204013866568, "grad_norm": 0.720490574836731, "learning_rate": 1.205445046734494e-05, "loss": 0.087, "step": 32526 }, { "epoch": 0.5760381109236852, "grad_norm": 0.8046722412109375, "learning_rate": 1.2053606858998386e-05, "loss": 0.056, "step": 32527 }, { "epoch": 0.5760558204607136, "grad_norm": 0.6629419326782227, "learning_rate": 1.2052763260345166e-05, "loss": 0.058, "step": 32528 }, { "epoch": 0.5760735299977421, "grad_norm": 0.5371220707893372, "learning_rate": 1.2051919671388056e-05, "loss": 0.0619, "step": 32529 }, { "epoch": 0.5760912395347705, "grad_norm": 0.3976675570011139, "learning_rate": 1.2051076092129839e-05, "loss": 0.0566, "step": 32530 }, { "epoch": 0.5761089490717989, "grad_norm": 0.5371699333190918, "learning_rate": 1.2050232522573276e-05, "loss": 0.0454, "step": 32531 }, { "epoch": 0.5761266586088273, "grad_norm": 0.47927266359329224, "learning_rate": 1.204938896272115e-05, "loss": 0.0704, "step": 32532 }, { "epoch": 0.5761443681458558, "grad_norm": 0.7179365754127502, "learning_rate": 1.2048545412576233e-05, "loss": 0.0486, "step": 32533 }, { "epoch": 0.5761620776828842, "grad_norm": 0.4556492865085602, "learning_rate": 1.2047701872141312e-05, "loss": 0.0587, "step": 32534 }, { "epoch": 0.5761797872199126, "grad_norm": 0.5748894810676575, "learning_rate": 1.2046858341419149e-05, "loss": 0.0599, "step": 32535 }, { "epoch": 0.576197496756941, "grad_norm": 0.654203474521637, "learning_rate": 1.204601482041252e-05, "loss": 0.0759, "step": 32536 }, { "epoch": 0.5762152062939695, "grad_norm": 1.1112979650497437, "learning_rate": 1.204517130912421e-05, "loss": 0.0995, "step": 32537 }, { "epoch": 0.5762329158309979, "grad_norm": 0.4538697302341461, "learning_rate": 1.2044327807556983e-05, "loss": 0.0709, "step": 32538 }, { "epoch": 0.5762506253680263, "grad_norm": 0.4848034381866455, "learning_rate": 1.2043484315713618e-05, "loss": 0.0725, "step": 32539 }, { "epoch": 0.5762683349050547, "grad_norm": 0.549757719039917, "learning_rate": 1.204264083359689e-05, "loss": 0.0576, "step": 32540 }, { "epoch": 0.5762860444420832, "grad_norm": 0.6247006058692932, "learning_rate": 1.2041797361209581e-05, "loss": 0.0691, "step": 32541 }, { "epoch": 0.5763037539791116, "grad_norm": 0.5251107215881348, "learning_rate": 1.2040953898554452e-05, "loss": 0.0805, "step": 32542 }, { "epoch": 0.57632146351614, "grad_norm": 0.34836894273757935, "learning_rate": 1.2040110445634289e-05, "loss": 0.043, "step": 32543 }, { "epoch": 0.5763391730531685, "grad_norm": 0.7694801092147827, "learning_rate": 1.2039267002451862e-05, "loss": 0.0695, "step": 32544 }, { "epoch": 0.5763568825901969, "grad_norm": 0.39552396535873413, "learning_rate": 1.2038423569009954e-05, "loss": 0.0374, "step": 32545 }, { "epoch": 0.5763745921272253, "grad_norm": 0.43888506293296814, "learning_rate": 1.2037580145311324e-05, "loss": 0.0381, "step": 32546 }, { "epoch": 0.5763923016642537, "grad_norm": 0.6722369194030762, "learning_rate": 1.2036736731358753e-05, "loss": 0.045, "step": 32547 }, { "epoch": 0.5764100112012822, "grad_norm": 0.3768762946128845, "learning_rate": 1.203589332715503e-05, "loss": 0.0522, "step": 32548 }, { "epoch": 0.5764277207383106, "grad_norm": 0.4233427345752716, "learning_rate": 1.2035049932702911e-05, "loss": 0.055, "step": 32549 }, { "epoch": 0.576445430275339, "grad_norm": 0.5544934272766113, "learning_rate": 1.2034206548005177e-05, "loss": 0.0547, "step": 32550 }, { "epoch": 0.5764631398123674, "grad_norm": 0.9917316436767578, "learning_rate": 1.2033363173064606e-05, "loss": 0.0686, "step": 32551 }, { "epoch": 0.5764808493493959, "grad_norm": 0.7626819610595703, "learning_rate": 1.2032519807883971e-05, "loss": 0.076, "step": 32552 }, { "epoch": 0.5764985588864243, "grad_norm": 0.39341723918914795, "learning_rate": 1.2031676452466044e-05, "loss": 0.069, "step": 32553 }, { "epoch": 0.5765162684234527, "grad_norm": 0.9283981919288635, "learning_rate": 1.2030833106813598e-05, "loss": 0.0871, "step": 32554 }, { "epoch": 0.5765339779604811, "grad_norm": 0.44402509927749634, "learning_rate": 1.2029989770929417e-05, "loss": 0.0401, "step": 32555 }, { "epoch": 0.5765516874975096, "grad_norm": 0.8377848863601685, "learning_rate": 1.2029146444816265e-05, "loss": 0.0726, "step": 32556 }, { "epoch": 0.576569397034538, "grad_norm": 0.5562501549720764, "learning_rate": 1.202830312847692e-05, "loss": 0.0446, "step": 32557 }, { "epoch": 0.5765871065715664, "grad_norm": 0.9889182448387146, "learning_rate": 1.2027459821914159e-05, "loss": 0.0532, "step": 32558 }, { "epoch": 0.5766048161085949, "grad_norm": 0.5742960572242737, "learning_rate": 1.202661652513076e-05, "loss": 0.0852, "step": 32559 }, { "epoch": 0.5766225256456233, "grad_norm": 0.7327349185943604, "learning_rate": 1.2025773238129487e-05, "loss": 0.0774, "step": 32560 }, { "epoch": 0.5766402351826517, "grad_norm": 0.24416621029376984, "learning_rate": 1.2024929960913117e-05, "loss": 0.0358, "step": 32561 }, { "epoch": 0.5766579447196801, "grad_norm": 0.812360405921936, "learning_rate": 1.202408669348443e-05, "loss": 0.0479, "step": 32562 }, { "epoch": 0.5766756542567086, "grad_norm": 0.4655684530735016, "learning_rate": 1.2023243435846195e-05, "loss": 0.0622, "step": 32563 }, { "epoch": 0.576693363793737, "grad_norm": 0.832311749458313, "learning_rate": 1.2022400188001186e-05, "loss": 0.0599, "step": 32564 }, { "epoch": 0.5767110733307654, "grad_norm": 0.6002122163772583, "learning_rate": 1.2021556949952182e-05, "loss": 0.0632, "step": 32565 }, { "epoch": 0.5767287828677938, "grad_norm": 0.6670840382575989, "learning_rate": 1.2020713721701955e-05, "loss": 0.0795, "step": 32566 }, { "epoch": 0.5767464924048223, "grad_norm": 0.21095654368400574, "learning_rate": 1.2019870503253277e-05, "loss": 0.048, "step": 32567 }, { "epoch": 0.5767642019418507, "grad_norm": 0.634052038192749, "learning_rate": 1.2019027294608922e-05, "loss": 0.0824, "step": 32568 }, { "epoch": 0.5767819114788791, "grad_norm": 0.4822095036506653, "learning_rate": 1.2018184095771673e-05, "loss": 0.0415, "step": 32569 }, { "epoch": 0.5767996210159075, "grad_norm": 0.7180173397064209, "learning_rate": 1.2017340906744286e-05, "loss": 0.0616, "step": 32570 }, { "epoch": 0.576817330552936, "grad_norm": 0.5385351777076721, "learning_rate": 1.201649772752955e-05, "loss": 0.0487, "step": 32571 }, { "epoch": 0.5768350400899644, "grad_norm": 1.0209293365478516, "learning_rate": 1.2015654558130234e-05, "loss": 0.0776, "step": 32572 }, { "epoch": 0.5768527496269928, "grad_norm": 1.1375902891159058, "learning_rate": 1.2014811398549123e-05, "loss": 0.0854, "step": 32573 }, { "epoch": 0.5768704591640214, "grad_norm": 0.6449156999588013, "learning_rate": 1.2013968248788969e-05, "loss": 0.0556, "step": 32574 }, { "epoch": 0.5768881687010498, "grad_norm": 0.4797096848487854, "learning_rate": 1.201312510885256e-05, "loss": 0.0578, "step": 32575 }, { "epoch": 0.5769058782380782, "grad_norm": 0.3024943768978119, "learning_rate": 1.2012281978742664e-05, "loss": 0.0663, "step": 32576 }, { "epoch": 0.5769235877751065, "grad_norm": 0.8777656555175781, "learning_rate": 1.2011438858462067e-05, "loss": 0.0607, "step": 32577 }, { "epoch": 0.5769412973121351, "grad_norm": 0.8698768615722656, "learning_rate": 1.2010595748013528e-05, "loss": 0.0633, "step": 32578 }, { "epoch": 0.5769590068491635, "grad_norm": 0.4980737268924713, "learning_rate": 1.2009752647399826e-05, "loss": 0.096, "step": 32579 }, { "epoch": 0.5769767163861919, "grad_norm": 0.6972385048866272, "learning_rate": 1.2008909556623738e-05, "loss": 0.047, "step": 32580 }, { "epoch": 0.5769944259232203, "grad_norm": 0.5014858245849609, "learning_rate": 1.2008066475688033e-05, "loss": 0.0352, "step": 32581 }, { "epoch": 0.5770121354602488, "grad_norm": 0.5870018601417542, "learning_rate": 1.2007223404595486e-05, "loss": 0.0449, "step": 32582 }, { "epoch": 0.5770298449972772, "grad_norm": 0.6397015452384949, "learning_rate": 1.2006380343348872e-05, "loss": 0.0571, "step": 32583 }, { "epoch": 0.5770475545343056, "grad_norm": 0.7726107239723206, "learning_rate": 1.2005537291950968e-05, "loss": 0.0718, "step": 32584 }, { "epoch": 0.577065264071334, "grad_norm": 0.7281147837638855, "learning_rate": 1.2004694250404538e-05, "loss": 0.0616, "step": 32585 }, { "epoch": 0.5770829736083625, "grad_norm": 0.895184338092804, "learning_rate": 1.200385121871236e-05, "loss": 0.0878, "step": 32586 }, { "epoch": 0.5771006831453909, "grad_norm": 0.8026114702224731, "learning_rate": 1.2003008196877218e-05, "loss": 0.0772, "step": 32587 }, { "epoch": 0.5771183926824193, "grad_norm": 0.5921457409858704, "learning_rate": 1.2002165184901867e-05, "loss": 0.0554, "step": 32588 }, { "epoch": 0.5771361022194478, "grad_norm": 0.4557889699935913, "learning_rate": 1.2001322182789092e-05, "loss": 0.0498, "step": 32589 }, { "epoch": 0.5771538117564762, "grad_norm": 0.6310113668441772, "learning_rate": 1.2000479190541663e-05, "loss": 0.0914, "step": 32590 }, { "epoch": 0.5771715212935046, "grad_norm": 0.7864664196968079, "learning_rate": 1.1999636208162356e-05, "loss": 0.0866, "step": 32591 }, { "epoch": 0.577189230830533, "grad_norm": 0.4762393534183502, "learning_rate": 1.199879323565394e-05, "loss": 0.0604, "step": 32592 }, { "epoch": 0.5772069403675615, "grad_norm": 0.47132834792137146, "learning_rate": 1.1997950273019191e-05, "loss": 0.0459, "step": 32593 }, { "epoch": 0.5772246499045899, "grad_norm": 0.3802032768726349, "learning_rate": 1.1997107320260884e-05, "loss": 0.0401, "step": 32594 }, { "epoch": 0.5772423594416183, "grad_norm": 0.555172860622406, "learning_rate": 1.1996264377381788e-05, "loss": 0.0698, "step": 32595 }, { "epoch": 0.5772600689786467, "grad_norm": 0.34724414348602295, "learning_rate": 1.1995421444384678e-05, "loss": 0.0434, "step": 32596 }, { "epoch": 0.5772777785156752, "grad_norm": 0.7193440794944763, "learning_rate": 1.1994578521272328e-05, "loss": 0.0712, "step": 32597 }, { "epoch": 0.5772954880527036, "grad_norm": 0.8366050124168396, "learning_rate": 1.1993735608047516e-05, "loss": 0.0692, "step": 32598 }, { "epoch": 0.577313197589732, "grad_norm": 0.248917818069458, "learning_rate": 1.1992892704713e-05, "loss": 0.0661, "step": 32599 }, { "epoch": 0.5773309071267604, "grad_norm": 0.6507118940353394, "learning_rate": 1.1992049811271567e-05, "loss": 0.0648, "step": 32600 }, { "epoch": 0.5773486166637889, "grad_norm": 0.7408032417297363, "learning_rate": 1.1991206927725993e-05, "loss": 0.0682, "step": 32601 }, { "epoch": 0.5773663262008173, "grad_norm": 0.8918880820274353, "learning_rate": 1.1990364054079036e-05, "loss": 0.0594, "step": 32602 }, { "epoch": 0.5773840357378457, "grad_norm": 0.8760711550712585, "learning_rate": 1.1989521190333479e-05, "loss": 0.1018, "step": 32603 }, { "epoch": 0.5774017452748742, "grad_norm": 0.7384811639785767, "learning_rate": 1.198867833649209e-05, "loss": 0.0673, "step": 32604 }, { "epoch": 0.5774194548119026, "grad_norm": 0.31485599279403687, "learning_rate": 1.1987835492557651e-05, "loss": 0.0656, "step": 32605 }, { "epoch": 0.577437164348931, "grad_norm": 0.5503256320953369, "learning_rate": 1.1986992658532923e-05, "loss": 0.0583, "step": 32606 }, { "epoch": 0.5774548738859594, "grad_norm": 0.8598392009735107, "learning_rate": 1.1986149834420685e-05, "loss": 0.0923, "step": 32607 }, { "epoch": 0.5774725834229879, "grad_norm": 0.48078790307044983, "learning_rate": 1.1985307020223709e-05, "loss": 0.0678, "step": 32608 }, { "epoch": 0.5774902929600163, "grad_norm": 0.30539390444755554, "learning_rate": 1.198446421594477e-05, "loss": 0.038, "step": 32609 }, { "epoch": 0.5775080024970447, "grad_norm": 0.8891189098358154, "learning_rate": 1.1983621421586639e-05, "loss": 0.0865, "step": 32610 }, { "epoch": 0.5775257120340731, "grad_norm": 0.3284316062927246, "learning_rate": 1.1982778637152086e-05, "loss": 0.0728, "step": 32611 }, { "epoch": 0.5775434215711016, "grad_norm": 0.701101541519165, "learning_rate": 1.1981935862643894e-05, "loss": 0.0764, "step": 32612 }, { "epoch": 0.57756113110813, "grad_norm": 0.8216384649276733, "learning_rate": 1.1981093098064815e-05, "loss": 0.0512, "step": 32613 }, { "epoch": 0.5775788406451584, "grad_norm": 0.9351965188980103, "learning_rate": 1.198025034341764e-05, "loss": 0.0543, "step": 32614 }, { "epoch": 0.5775965501821868, "grad_norm": 0.41369786858558655, "learning_rate": 1.1979407598705135e-05, "loss": 0.045, "step": 32615 }, { "epoch": 0.5776142597192153, "grad_norm": 0.4805141091346741, "learning_rate": 1.1978564863930081e-05, "loss": 0.0409, "step": 32616 }, { "epoch": 0.5776319692562437, "grad_norm": 0.3204720616340637, "learning_rate": 1.1977722139095235e-05, "loss": 0.0403, "step": 32617 }, { "epoch": 0.5776496787932721, "grad_norm": 0.488567054271698, "learning_rate": 1.197687942420338e-05, "loss": 0.0516, "step": 32618 }, { "epoch": 0.5776673883303006, "grad_norm": 0.925995945930481, "learning_rate": 1.1976036719257287e-05, "loss": 0.0643, "step": 32619 }, { "epoch": 0.577685097867329, "grad_norm": 0.5974884033203125, "learning_rate": 1.1975194024259725e-05, "loss": 0.0771, "step": 32620 }, { "epoch": 0.5777028074043574, "grad_norm": 0.790797770023346, "learning_rate": 1.1974351339213467e-05, "loss": 0.0738, "step": 32621 }, { "epoch": 0.5777205169413858, "grad_norm": 0.46257051825523376, "learning_rate": 1.1973508664121292e-05, "loss": 0.0552, "step": 32622 }, { "epoch": 0.5777382264784143, "grad_norm": 0.33702749013900757, "learning_rate": 1.1972665998985964e-05, "loss": 0.0877, "step": 32623 }, { "epoch": 0.5777559360154427, "grad_norm": 0.5189715623855591, "learning_rate": 1.1971823343810259e-05, "loss": 0.0839, "step": 32624 }, { "epoch": 0.5777736455524711, "grad_norm": 0.7006461024284363, "learning_rate": 1.1970980698596948e-05, "loss": 0.0491, "step": 32625 }, { "epoch": 0.5777913550894995, "grad_norm": 0.6888265609741211, "learning_rate": 1.1970138063348813e-05, "loss": 0.0531, "step": 32626 }, { "epoch": 0.577809064626528, "grad_norm": 0.6031597852706909, "learning_rate": 1.1969295438068608e-05, "loss": 0.0743, "step": 32627 }, { "epoch": 0.5778267741635564, "grad_norm": 0.7995091676712036, "learning_rate": 1.1968452822759112e-05, "loss": 0.0705, "step": 32628 }, { "epoch": 0.5778444837005848, "grad_norm": 0.7780033349990845, "learning_rate": 1.1967610217423103e-05, "loss": 0.0577, "step": 32629 }, { "epoch": 0.5778621932376132, "grad_norm": 0.5488274097442627, "learning_rate": 1.1966767622063356e-05, "loss": 0.0817, "step": 32630 }, { "epoch": 0.5778799027746417, "grad_norm": 0.6844568252563477, "learning_rate": 1.196592503668263e-05, "loss": 0.0891, "step": 32631 }, { "epoch": 0.5778976123116701, "grad_norm": 0.32588475942611694, "learning_rate": 1.1965082461283707e-05, "loss": 0.074, "step": 32632 }, { "epoch": 0.5779153218486985, "grad_norm": 0.9061629772186279, "learning_rate": 1.1964239895869355e-05, "loss": 0.0694, "step": 32633 }, { "epoch": 0.577933031385727, "grad_norm": 0.9466539621353149, "learning_rate": 1.1963397340442343e-05, "loss": 0.052, "step": 32634 }, { "epoch": 0.5779507409227554, "grad_norm": 0.944619357585907, "learning_rate": 1.196255479500545e-05, "loss": 0.0615, "step": 32635 }, { "epoch": 0.5779684504597838, "grad_norm": 0.6802517175674438, "learning_rate": 1.1961712259561443e-05, "loss": 0.0544, "step": 32636 }, { "epoch": 0.5779861599968122, "grad_norm": 0.6132825613021851, "learning_rate": 1.1960869734113097e-05, "loss": 0.0704, "step": 32637 }, { "epoch": 0.5780038695338408, "grad_norm": 0.4200935363769531, "learning_rate": 1.1960027218663181e-05, "loss": 0.0585, "step": 32638 }, { "epoch": 0.5780215790708692, "grad_norm": 0.5123299360275269, "learning_rate": 1.1959184713214466e-05, "loss": 0.0527, "step": 32639 }, { "epoch": 0.5780392886078976, "grad_norm": 0.6461209654808044, "learning_rate": 1.1958342217769735e-05, "loss": 0.0744, "step": 32640 }, { "epoch": 0.578056998144926, "grad_norm": 0.6808214783668518, "learning_rate": 1.1957499732331741e-05, "loss": 0.0624, "step": 32641 }, { "epoch": 0.5780747076819545, "grad_norm": 0.49605053663253784, "learning_rate": 1.1956657256903264e-05, "loss": 0.0529, "step": 32642 }, { "epoch": 0.5780924172189829, "grad_norm": 0.7326542139053345, "learning_rate": 1.195581479148708e-05, "loss": 0.0752, "step": 32643 }, { "epoch": 0.5781101267560113, "grad_norm": 0.5548449158668518, "learning_rate": 1.1954972336085963e-05, "loss": 0.0589, "step": 32644 }, { "epoch": 0.5781278362930397, "grad_norm": 0.5159961581230164, "learning_rate": 1.1954129890702673e-05, "loss": 0.0522, "step": 32645 }, { "epoch": 0.5781455458300682, "grad_norm": 0.48332783579826355, "learning_rate": 1.1953287455339987e-05, "loss": 0.0699, "step": 32646 }, { "epoch": 0.5781632553670966, "grad_norm": 0.4272417426109314, "learning_rate": 1.1952445030000678e-05, "loss": 0.0321, "step": 32647 }, { "epoch": 0.578180964904125, "grad_norm": 0.5277341604232788, "learning_rate": 1.195160261468752e-05, "loss": 0.0631, "step": 32648 }, { "epoch": 0.5781986744411535, "grad_norm": 0.6132137775421143, "learning_rate": 1.1950760209403278e-05, "loss": 0.0927, "step": 32649 }, { "epoch": 0.5782163839781819, "grad_norm": 0.4589391350746155, "learning_rate": 1.1949917814150724e-05, "loss": 0.0759, "step": 32650 }, { "epoch": 0.5782340935152103, "grad_norm": 0.6771503686904907, "learning_rate": 1.1949075428932637e-05, "loss": 0.0575, "step": 32651 }, { "epoch": 0.5782518030522387, "grad_norm": 0.554465651512146, "learning_rate": 1.194823305375178e-05, "loss": 0.0512, "step": 32652 }, { "epoch": 0.5782695125892672, "grad_norm": 0.7945030927658081, "learning_rate": 1.1947390688610928e-05, "loss": 0.0609, "step": 32653 }, { "epoch": 0.5782872221262956, "grad_norm": 0.7552318572998047, "learning_rate": 1.1946548333512851e-05, "loss": 0.0725, "step": 32654 }, { "epoch": 0.578304931663324, "grad_norm": 0.8451619744300842, "learning_rate": 1.1945705988460328e-05, "loss": 0.077, "step": 32655 }, { "epoch": 0.5783226412003524, "grad_norm": 0.6192764639854431, "learning_rate": 1.1944863653456117e-05, "loss": 0.0907, "step": 32656 }, { "epoch": 0.5783403507373809, "grad_norm": 0.46682676672935486, "learning_rate": 1.1944021328502993e-05, "loss": 0.0551, "step": 32657 }, { "epoch": 0.5783580602744093, "grad_norm": 0.5550462603569031, "learning_rate": 1.1943179013603737e-05, "loss": 0.0528, "step": 32658 }, { "epoch": 0.5783757698114377, "grad_norm": 0.5677711367607117, "learning_rate": 1.1942336708761108e-05, "loss": 0.0437, "step": 32659 }, { "epoch": 0.5783934793484661, "grad_norm": 0.6819178462028503, "learning_rate": 1.1941494413977882e-05, "loss": 0.0791, "step": 32660 }, { "epoch": 0.5784111888854946, "grad_norm": 0.8395527601242065, "learning_rate": 1.194065212925683e-05, "loss": 0.0886, "step": 32661 }, { "epoch": 0.578428898422523, "grad_norm": 0.5444633364677429, "learning_rate": 1.1939809854600726e-05, "loss": 0.0552, "step": 32662 }, { "epoch": 0.5784466079595514, "grad_norm": 0.6400392651557922, "learning_rate": 1.1938967590012338e-05, "loss": 0.083, "step": 32663 }, { "epoch": 0.5784643174965799, "grad_norm": 0.37662428617477417, "learning_rate": 1.1938125335494433e-05, "loss": 0.0465, "step": 32664 }, { "epoch": 0.5784820270336083, "grad_norm": 0.39509016275405884, "learning_rate": 1.193728309104979e-05, "loss": 0.0827, "step": 32665 }, { "epoch": 0.5784997365706367, "grad_norm": 0.6539031863212585, "learning_rate": 1.1936440856681172e-05, "loss": 0.0778, "step": 32666 }, { "epoch": 0.5785174461076651, "grad_norm": 0.864008367061615, "learning_rate": 1.1935598632391355e-05, "loss": 0.096, "step": 32667 }, { "epoch": 0.5785351556446936, "grad_norm": 0.5942972898483276, "learning_rate": 1.1934756418183108e-05, "loss": 0.072, "step": 32668 }, { "epoch": 0.578552865181722, "grad_norm": 0.8194940686225891, "learning_rate": 1.1933914214059211e-05, "loss": 0.084, "step": 32669 }, { "epoch": 0.5785705747187504, "grad_norm": 0.6189806461334229, "learning_rate": 1.1933072020022418e-05, "loss": 0.06, "step": 32670 }, { "epoch": 0.5785882842557788, "grad_norm": 0.5688814520835876, "learning_rate": 1.1932229836075504e-05, "loss": 0.0526, "step": 32671 }, { "epoch": 0.5786059937928073, "grad_norm": 0.7365366220474243, "learning_rate": 1.1931387662221253e-05, "loss": 0.067, "step": 32672 }, { "epoch": 0.5786237033298357, "grad_norm": 0.9232038855552673, "learning_rate": 1.193054549846242e-05, "loss": 0.089, "step": 32673 }, { "epoch": 0.5786414128668641, "grad_norm": 0.7324382066726685, "learning_rate": 1.1929703344801782e-05, "loss": 0.0627, "step": 32674 }, { "epoch": 0.5786591224038925, "grad_norm": 0.30479222536087036, "learning_rate": 1.192886120124211e-05, "loss": 0.0381, "step": 32675 }, { "epoch": 0.578676831940921, "grad_norm": 0.612944483757019, "learning_rate": 1.1928019067786177e-05, "loss": 0.0873, "step": 32676 }, { "epoch": 0.5786945414779494, "grad_norm": 0.8465790748596191, "learning_rate": 1.1927176944436748e-05, "loss": 0.0771, "step": 32677 }, { "epoch": 0.5787122510149778, "grad_norm": 0.7119013071060181, "learning_rate": 1.1926334831196595e-05, "loss": 0.0839, "step": 32678 }, { "epoch": 0.5787299605520063, "grad_norm": 0.5418460965156555, "learning_rate": 1.1925492728068489e-05, "loss": 0.0575, "step": 32679 }, { "epoch": 0.5787476700890347, "grad_norm": 0.8175450563430786, "learning_rate": 1.1924650635055205e-05, "loss": 0.074, "step": 32680 }, { "epoch": 0.5787653796260631, "grad_norm": 0.5892671346664429, "learning_rate": 1.1923808552159507e-05, "loss": 0.0643, "step": 32681 }, { "epoch": 0.5787830891630915, "grad_norm": 0.47708913683891296, "learning_rate": 1.1922966479384166e-05, "loss": 0.0615, "step": 32682 }, { "epoch": 0.57880079870012, "grad_norm": 0.9515557289123535, "learning_rate": 1.1922124416731963e-05, "loss": 0.066, "step": 32683 }, { "epoch": 0.5788185082371484, "grad_norm": 0.4700223505496979, "learning_rate": 1.192128236420565e-05, "loss": 0.072, "step": 32684 }, { "epoch": 0.5788362177741768, "grad_norm": 0.7252059578895569, "learning_rate": 1.1920440321808008e-05, "loss": 0.1096, "step": 32685 }, { "epoch": 0.5788539273112052, "grad_norm": 0.5053450465202332, "learning_rate": 1.1919598289541802e-05, "loss": 0.0664, "step": 32686 }, { "epoch": 0.5788716368482337, "grad_norm": 0.4617025554180145, "learning_rate": 1.1918756267409817e-05, "loss": 0.0535, "step": 32687 }, { "epoch": 0.5788893463852621, "grad_norm": 0.40193235874176025, "learning_rate": 1.1917914255414804e-05, "loss": 0.047, "step": 32688 }, { "epoch": 0.5789070559222905, "grad_norm": 0.4253406524658203, "learning_rate": 1.1917072253559545e-05, "loss": 0.0562, "step": 32689 }, { "epoch": 0.578924765459319, "grad_norm": 0.9399105906486511, "learning_rate": 1.1916230261846807e-05, "loss": 0.0952, "step": 32690 }, { "epoch": 0.5789424749963474, "grad_norm": 1.0355188846588135, "learning_rate": 1.1915388280279355e-05, "loss": 0.0575, "step": 32691 }, { "epoch": 0.5789601845333758, "grad_norm": 0.4988536834716797, "learning_rate": 1.1914546308859966e-05, "loss": 0.0642, "step": 32692 }, { "epoch": 0.5789778940704042, "grad_norm": 0.6055215001106262, "learning_rate": 1.1913704347591405e-05, "loss": 0.0453, "step": 32693 }, { "epoch": 0.5789956036074327, "grad_norm": 0.5875882506370544, "learning_rate": 1.191286239647645e-05, "loss": 0.0793, "step": 32694 }, { "epoch": 0.5790133131444611, "grad_norm": 0.575621485710144, "learning_rate": 1.1912020455517861e-05, "loss": 0.0735, "step": 32695 }, { "epoch": 0.5790310226814895, "grad_norm": 0.6575978994369507, "learning_rate": 1.1911178524718414e-05, "loss": 0.0512, "step": 32696 }, { "epoch": 0.5790487322185179, "grad_norm": 0.7473236918449402, "learning_rate": 1.1910336604080883e-05, "loss": 0.0505, "step": 32697 }, { "epoch": 0.5790664417555464, "grad_norm": 0.8470904231071472, "learning_rate": 1.1909494693608025e-05, "loss": 0.0622, "step": 32698 }, { "epoch": 0.5790841512925748, "grad_norm": 0.38849884271621704, "learning_rate": 1.1908652793302617e-05, "loss": 0.0524, "step": 32699 }, { "epoch": 0.5791018608296032, "grad_norm": 0.41772201657295227, "learning_rate": 1.1907810903167425e-05, "loss": 0.0472, "step": 32700 }, { "epoch": 0.5791195703666316, "grad_norm": 0.8470209836959839, "learning_rate": 1.1906969023205233e-05, "loss": 0.061, "step": 32701 }, { "epoch": 0.5791372799036602, "grad_norm": 0.7625939249992371, "learning_rate": 1.1906127153418793e-05, "loss": 0.0573, "step": 32702 }, { "epoch": 0.5791549894406886, "grad_norm": 0.5476121306419373, "learning_rate": 1.1905285293810883e-05, "loss": 0.0882, "step": 32703 }, { "epoch": 0.579172698977717, "grad_norm": 0.3121013343334198, "learning_rate": 1.1904443444384272e-05, "loss": 0.0581, "step": 32704 }, { "epoch": 0.5791904085147455, "grad_norm": 0.5084072351455688, "learning_rate": 1.1903601605141729e-05, "loss": 0.0655, "step": 32705 }, { "epoch": 0.5792081180517739, "grad_norm": 0.5222773551940918, "learning_rate": 1.190275977608602e-05, "loss": 0.0668, "step": 32706 }, { "epoch": 0.5792258275888023, "grad_norm": 0.49942925572395325, "learning_rate": 1.1901917957219923e-05, "loss": 0.1064, "step": 32707 }, { "epoch": 0.5792435371258307, "grad_norm": 0.7721112370491028, "learning_rate": 1.19010761485462e-05, "loss": 0.0505, "step": 32708 }, { "epoch": 0.5792612466628592, "grad_norm": 0.39496076107025146, "learning_rate": 1.1900234350067624e-05, "loss": 0.0442, "step": 32709 }, { "epoch": 0.5792789561998876, "grad_norm": 0.6141555309295654, "learning_rate": 1.189939256178696e-05, "loss": 0.0581, "step": 32710 }, { "epoch": 0.579296665736916, "grad_norm": 0.7741348147392273, "learning_rate": 1.1898550783706992e-05, "loss": 0.0642, "step": 32711 }, { "epoch": 0.5793143752739444, "grad_norm": 0.7639851570129395, "learning_rate": 1.1897709015830467e-05, "loss": 0.0955, "step": 32712 }, { "epoch": 0.5793320848109729, "grad_norm": 0.5621324777603149, "learning_rate": 1.189686725816017e-05, "loss": 0.0518, "step": 32713 }, { "epoch": 0.5793497943480013, "grad_norm": 0.647698163986206, "learning_rate": 1.1896025510698862e-05, "loss": 0.0804, "step": 32714 }, { "epoch": 0.5793675038850297, "grad_norm": 0.5498786568641663, "learning_rate": 1.1895183773449321e-05, "loss": 0.0813, "step": 32715 }, { "epoch": 0.5793852134220581, "grad_norm": 0.5255087018013, "learning_rate": 1.1894342046414308e-05, "loss": 0.0567, "step": 32716 }, { "epoch": 0.5794029229590866, "grad_norm": 0.7434642314910889, "learning_rate": 1.1893500329596595e-05, "loss": 0.0786, "step": 32717 }, { "epoch": 0.579420632496115, "grad_norm": 0.6671950817108154, "learning_rate": 1.1892658622998952e-05, "loss": 0.0572, "step": 32718 }, { "epoch": 0.5794383420331434, "grad_norm": 0.7157443165779114, "learning_rate": 1.1891816926624151e-05, "loss": 0.0997, "step": 32719 }, { "epoch": 0.5794560515701719, "grad_norm": 0.8384960889816284, "learning_rate": 1.1890975240474956e-05, "loss": 0.0685, "step": 32720 }, { "epoch": 0.5794737611072003, "grad_norm": 0.672094464302063, "learning_rate": 1.1890133564554136e-05, "loss": 0.057, "step": 32721 }, { "epoch": 0.5794914706442287, "grad_norm": 0.48525434732437134, "learning_rate": 1.188929189886447e-05, "loss": 0.0541, "step": 32722 }, { "epoch": 0.5795091801812571, "grad_norm": 0.5182367563247681, "learning_rate": 1.188845024340871e-05, "loss": 0.0646, "step": 32723 }, { "epoch": 0.5795268897182856, "grad_norm": 0.7058548331260681, "learning_rate": 1.1887608598189636e-05, "loss": 0.0573, "step": 32724 }, { "epoch": 0.579544599255314, "grad_norm": 0.5084660649299622, "learning_rate": 1.1886766963210016e-05, "loss": 0.0662, "step": 32725 }, { "epoch": 0.5795623087923424, "grad_norm": 1.0761010646820068, "learning_rate": 1.1885925338472622e-05, "loss": 0.0873, "step": 32726 }, { "epoch": 0.5795800183293708, "grad_norm": 0.7633154392242432, "learning_rate": 1.1885083723980213e-05, "loss": 0.068, "step": 32727 }, { "epoch": 0.5795977278663993, "grad_norm": 0.704097330570221, "learning_rate": 1.1884242119735565e-05, "loss": 0.0744, "step": 32728 }, { "epoch": 0.5796154374034277, "grad_norm": 0.7196844816207886, "learning_rate": 1.1883400525741447e-05, "loss": 0.0704, "step": 32729 }, { "epoch": 0.5796331469404561, "grad_norm": 0.7403401136398315, "learning_rate": 1.1882558942000623e-05, "loss": 0.0849, "step": 32730 }, { "epoch": 0.5796508564774845, "grad_norm": 0.8078462481498718, "learning_rate": 1.1881717368515865e-05, "loss": 0.088, "step": 32731 }, { "epoch": 0.579668566014513, "grad_norm": 0.48796290159225464, "learning_rate": 1.188087580528994e-05, "loss": 0.0675, "step": 32732 }, { "epoch": 0.5796862755515414, "grad_norm": 0.5533180236816406, "learning_rate": 1.1880034252325624e-05, "loss": 0.0655, "step": 32733 }, { "epoch": 0.5797039850885698, "grad_norm": 0.4145489037036896, "learning_rate": 1.1879192709625675e-05, "loss": 0.0441, "step": 32734 }, { "epoch": 0.5797216946255983, "grad_norm": 0.5685601830482483, "learning_rate": 1.1878351177192866e-05, "loss": 0.0402, "step": 32735 }, { "epoch": 0.5797394041626267, "grad_norm": 0.40509718656539917, "learning_rate": 1.1877509655029974e-05, "loss": 0.0513, "step": 32736 }, { "epoch": 0.5797571136996551, "grad_norm": 0.574942409992218, "learning_rate": 1.1876668143139752e-05, "loss": 0.0683, "step": 32737 }, { "epoch": 0.5797748232366835, "grad_norm": 0.8536598086357117, "learning_rate": 1.1875826641524973e-05, "loss": 0.0972, "step": 32738 }, { "epoch": 0.579792532773712, "grad_norm": 0.6885942220687866, "learning_rate": 1.1874985150188411e-05, "loss": 0.1075, "step": 32739 }, { "epoch": 0.5798102423107404, "grad_norm": 0.7429757118225098, "learning_rate": 1.1874143669132838e-05, "loss": 0.0768, "step": 32740 }, { "epoch": 0.5798279518477688, "grad_norm": 0.5463106036186218, "learning_rate": 1.187330219836101e-05, "loss": 0.0803, "step": 32741 }, { "epoch": 0.5798456613847972, "grad_norm": 0.5776572823524475, "learning_rate": 1.1872460737875704e-05, "loss": 0.0783, "step": 32742 }, { "epoch": 0.5798633709218257, "grad_norm": 0.8941263556480408, "learning_rate": 1.1871619287679686e-05, "loss": 0.067, "step": 32743 }, { "epoch": 0.5798810804588541, "grad_norm": 0.6519724130630493, "learning_rate": 1.1870777847775721e-05, "loss": 0.0824, "step": 32744 }, { "epoch": 0.5798987899958825, "grad_norm": 0.5056903958320618, "learning_rate": 1.1869936418166581e-05, "loss": 0.0607, "step": 32745 }, { "epoch": 0.5799164995329109, "grad_norm": 0.5241462588310242, "learning_rate": 1.1869094998855034e-05, "loss": 0.0467, "step": 32746 }, { "epoch": 0.5799342090699394, "grad_norm": 1.1496492624282837, "learning_rate": 1.186825358984385e-05, "loss": 0.1018, "step": 32747 }, { "epoch": 0.5799519186069678, "grad_norm": 0.8092908263206482, "learning_rate": 1.1867412191135792e-05, "loss": 0.0481, "step": 32748 }, { "epoch": 0.5799696281439962, "grad_norm": 0.6449698805809021, "learning_rate": 1.1866570802733631e-05, "loss": 0.0665, "step": 32749 }, { "epoch": 0.5799873376810247, "grad_norm": 0.7266525030136108, "learning_rate": 1.1865729424640137e-05, "loss": 0.1064, "step": 32750 }, { "epoch": 0.5800050472180531, "grad_norm": 0.7745206952095032, "learning_rate": 1.1864888056858082e-05, "loss": 0.0847, "step": 32751 }, { "epoch": 0.5800227567550815, "grad_norm": 0.7442073225975037, "learning_rate": 1.1864046699390218e-05, "loss": 0.0531, "step": 32752 }, { "epoch": 0.5800404662921099, "grad_norm": 0.6034446358680725, "learning_rate": 1.1863205352239326e-05, "loss": 0.0692, "step": 32753 }, { "epoch": 0.5800581758291384, "grad_norm": 0.5097447037696838, "learning_rate": 1.1862364015408176e-05, "loss": 0.0698, "step": 32754 }, { "epoch": 0.5800758853661668, "grad_norm": 0.49779659509658813, "learning_rate": 1.1861522688899528e-05, "loss": 0.062, "step": 32755 }, { "epoch": 0.5800935949031952, "grad_norm": 0.4406939744949341, "learning_rate": 1.186068137271615e-05, "loss": 0.0802, "step": 32756 }, { "epoch": 0.5801113044402236, "grad_norm": 0.7566208839416504, "learning_rate": 1.1859840066860814e-05, "loss": 0.0551, "step": 32757 }, { "epoch": 0.5801290139772521, "grad_norm": 0.8355565667152405, "learning_rate": 1.1858998771336288e-05, "loss": 0.0864, "step": 32758 }, { "epoch": 0.5801467235142805, "grad_norm": 0.6382068991661072, "learning_rate": 1.1858157486145338e-05, "loss": 0.0699, "step": 32759 }, { "epoch": 0.5801644330513089, "grad_norm": 0.41711220145225525, "learning_rate": 1.1857316211290732e-05, "loss": 0.0421, "step": 32760 }, { "epoch": 0.5801821425883373, "grad_norm": 0.3375639319419861, "learning_rate": 1.185647494677524e-05, "loss": 0.0608, "step": 32761 }, { "epoch": 0.5801998521253658, "grad_norm": 0.9065048098564148, "learning_rate": 1.1855633692601625e-05, "loss": 0.0742, "step": 32762 }, { "epoch": 0.5802175616623942, "grad_norm": 0.5606499910354614, "learning_rate": 1.1854792448772655e-05, "loss": 0.0554, "step": 32763 }, { "epoch": 0.5802352711994226, "grad_norm": 0.5613172650337219, "learning_rate": 1.18539512152911e-05, "loss": 0.0506, "step": 32764 }, { "epoch": 0.5802529807364512, "grad_norm": 0.48490357398986816, "learning_rate": 1.1853109992159735e-05, "loss": 0.0835, "step": 32765 }, { "epoch": 0.5802706902734796, "grad_norm": 0.5781983733177185, "learning_rate": 1.1852268779381316e-05, "loss": 0.0523, "step": 32766 }, { "epoch": 0.580288399810508, "grad_norm": 0.5615884065628052, "learning_rate": 1.1851427576958606e-05, "loss": 0.0597, "step": 32767 }, { "epoch": 0.5803061093475363, "grad_norm": 0.7629773616790771, "learning_rate": 1.1850586384894393e-05, "loss": 0.0541, "step": 32768 }, { "epoch": 0.5803238188845649, "grad_norm": 0.5673759579658508, "learning_rate": 1.1849745203191426e-05, "loss": 0.0668, "step": 32769 }, { "epoch": 0.5803415284215933, "grad_norm": 0.8188986778259277, "learning_rate": 1.184890403185248e-05, "loss": 0.0457, "step": 32770 }, { "epoch": 0.5803592379586217, "grad_norm": 0.606378436088562, "learning_rate": 1.1848062870880318e-05, "loss": 0.0703, "step": 32771 }, { "epoch": 0.58037694749565, "grad_norm": 0.45172974467277527, "learning_rate": 1.1847221720277716e-05, "loss": 0.0478, "step": 32772 }, { "epoch": 0.5803946570326786, "grad_norm": 0.6479283571243286, "learning_rate": 1.1846380580047431e-05, "loss": 0.0896, "step": 32773 }, { "epoch": 0.580412366569707, "grad_norm": 0.44131582975387573, "learning_rate": 1.1845539450192235e-05, "loss": 0.0485, "step": 32774 }, { "epoch": 0.5804300761067354, "grad_norm": 0.5185670256614685, "learning_rate": 1.1844698330714898e-05, "loss": 0.0339, "step": 32775 }, { "epoch": 0.5804477856437638, "grad_norm": 0.7092037796974182, "learning_rate": 1.1843857221618182e-05, "loss": 0.0625, "step": 32776 }, { "epoch": 0.5804654951807923, "grad_norm": 0.47463101148605347, "learning_rate": 1.1843016122904855e-05, "loss": 0.0567, "step": 32777 }, { "epoch": 0.5804832047178207, "grad_norm": 0.7637417316436768, "learning_rate": 1.1842175034577689e-05, "loss": 0.1041, "step": 32778 }, { "epoch": 0.5805009142548491, "grad_norm": 0.5945472121238708, "learning_rate": 1.1841333956639451e-05, "loss": 0.06, "step": 32779 }, { "epoch": 0.5805186237918776, "grad_norm": 0.5960255861282349, "learning_rate": 1.18404928890929e-05, "loss": 0.0662, "step": 32780 }, { "epoch": 0.580536333328906, "grad_norm": 1.2030340433120728, "learning_rate": 1.1839651831940804e-05, "loss": 0.0732, "step": 32781 }, { "epoch": 0.5805540428659344, "grad_norm": 0.6510482430458069, "learning_rate": 1.1838810785185937e-05, "loss": 0.1138, "step": 32782 }, { "epoch": 0.5805717524029628, "grad_norm": 0.6111992001533508, "learning_rate": 1.1837969748831069e-05, "loss": 0.0649, "step": 32783 }, { "epoch": 0.5805894619399913, "grad_norm": 0.5990434288978577, "learning_rate": 1.1837128722878955e-05, "loss": 0.0564, "step": 32784 }, { "epoch": 0.5806071714770197, "grad_norm": 0.7012336850166321, "learning_rate": 1.1836287707332367e-05, "loss": 0.0609, "step": 32785 }, { "epoch": 0.5806248810140481, "grad_norm": 0.6582807898521423, "learning_rate": 1.1835446702194075e-05, "loss": 0.0484, "step": 32786 }, { "epoch": 0.5806425905510765, "grad_norm": 0.6553748250007629, "learning_rate": 1.1834605707466843e-05, "loss": 0.0698, "step": 32787 }, { "epoch": 0.580660300088105, "grad_norm": 0.4753528833389282, "learning_rate": 1.1833764723153433e-05, "loss": 0.0432, "step": 32788 }, { "epoch": 0.5806780096251334, "grad_norm": 0.6683588624000549, "learning_rate": 1.1832923749256622e-05, "loss": 0.0679, "step": 32789 }, { "epoch": 0.5806957191621618, "grad_norm": 0.3786150813102722, "learning_rate": 1.183208278577917e-05, "loss": 0.0507, "step": 32790 }, { "epoch": 0.5807134286991902, "grad_norm": 0.4296281635761261, "learning_rate": 1.1831241832723845e-05, "loss": 0.0534, "step": 32791 }, { "epoch": 0.5807311382362187, "grad_norm": 0.5065983533859253, "learning_rate": 1.1830400890093415e-05, "loss": 0.0637, "step": 32792 }, { "epoch": 0.5807488477732471, "grad_norm": 0.678808867931366, "learning_rate": 1.182955995789065e-05, "loss": 0.0812, "step": 32793 }, { "epoch": 0.5807665573102755, "grad_norm": 0.7143100500106812, "learning_rate": 1.1828719036118306e-05, "loss": 0.0619, "step": 32794 }, { "epoch": 0.580784266847304, "grad_norm": 0.3321307599544525, "learning_rate": 1.1827878124779157e-05, "loss": 0.04, "step": 32795 }, { "epoch": 0.5808019763843324, "grad_norm": 0.42857760190963745, "learning_rate": 1.1827037223875961e-05, "loss": 0.0717, "step": 32796 }, { "epoch": 0.5808196859213608, "grad_norm": 1.1515517234802246, "learning_rate": 1.1826196333411505e-05, "loss": 0.1017, "step": 32797 }, { "epoch": 0.5808373954583892, "grad_norm": 0.32617583870887756, "learning_rate": 1.1825355453388534e-05, "loss": 0.0677, "step": 32798 }, { "epoch": 0.5808551049954177, "grad_norm": 0.5392216444015503, "learning_rate": 1.1824514583809824e-05, "loss": 0.0503, "step": 32799 }, { "epoch": 0.5808728145324461, "grad_norm": 0.43886882066726685, "learning_rate": 1.1823673724678141e-05, "loss": 0.0352, "step": 32800 }, { "epoch": 0.5808905240694745, "grad_norm": 0.6774038076400757, "learning_rate": 1.1822832875996248e-05, "loss": 0.1043, "step": 32801 }, { "epoch": 0.5809082336065029, "grad_norm": 0.6639280915260315, "learning_rate": 1.1821992037766915e-05, "loss": 0.0654, "step": 32802 }, { "epoch": 0.5809259431435314, "grad_norm": 0.8521227836608887, "learning_rate": 1.1821151209992904e-05, "loss": 0.0928, "step": 32803 }, { "epoch": 0.5809436526805598, "grad_norm": 0.8235052824020386, "learning_rate": 1.1820310392676988e-05, "loss": 0.053, "step": 32804 }, { "epoch": 0.5809613622175882, "grad_norm": 0.8219525218009949, "learning_rate": 1.1819469585821926e-05, "loss": 0.0837, "step": 32805 }, { "epoch": 0.5809790717546166, "grad_norm": 0.7290851473808289, "learning_rate": 1.1818628789430488e-05, "loss": 0.0687, "step": 32806 }, { "epoch": 0.5809967812916451, "grad_norm": 0.8643907904624939, "learning_rate": 1.1817788003505445e-05, "loss": 0.055, "step": 32807 }, { "epoch": 0.5810144908286735, "grad_norm": 0.5315194725990295, "learning_rate": 1.1816947228049552e-05, "loss": 0.0697, "step": 32808 }, { "epoch": 0.5810322003657019, "grad_norm": 0.4947744905948639, "learning_rate": 1.1816106463065582e-05, "loss": 0.0631, "step": 32809 }, { "epoch": 0.5810499099027304, "grad_norm": 0.2535107433795929, "learning_rate": 1.1815265708556294e-05, "loss": 0.0526, "step": 32810 }, { "epoch": 0.5810676194397588, "grad_norm": 0.6214861869812012, "learning_rate": 1.181442496452447e-05, "loss": 0.0629, "step": 32811 }, { "epoch": 0.5810853289767872, "grad_norm": 0.3031831681728363, "learning_rate": 1.181358423097286e-05, "loss": 0.0459, "step": 32812 }, { "epoch": 0.5811030385138156, "grad_norm": 0.5514175295829773, "learning_rate": 1.1812743507904236e-05, "loss": 0.0784, "step": 32813 }, { "epoch": 0.5811207480508441, "grad_norm": 0.4476737082004547, "learning_rate": 1.1811902795321366e-05, "loss": 0.0519, "step": 32814 }, { "epoch": 0.5811384575878725, "grad_norm": 0.6818147301673889, "learning_rate": 1.1811062093227008e-05, "loss": 0.0828, "step": 32815 }, { "epoch": 0.5811561671249009, "grad_norm": 0.40847229957580566, "learning_rate": 1.1810221401623936e-05, "loss": 0.0698, "step": 32816 }, { "epoch": 0.5811738766619293, "grad_norm": 0.564337432384491, "learning_rate": 1.1809380720514912e-05, "loss": 0.0861, "step": 32817 }, { "epoch": 0.5811915861989578, "grad_norm": 0.6775192618370056, "learning_rate": 1.1808540049902705e-05, "loss": 0.0738, "step": 32818 }, { "epoch": 0.5812092957359862, "grad_norm": 0.4339064359664917, "learning_rate": 1.1807699389790077e-05, "loss": 0.0529, "step": 32819 }, { "epoch": 0.5812270052730146, "grad_norm": 0.6853536367416382, "learning_rate": 1.1806858740179794e-05, "loss": 0.0646, "step": 32820 }, { "epoch": 0.581244714810043, "grad_norm": 0.29541119933128357, "learning_rate": 1.1806018101074623e-05, "loss": 0.0949, "step": 32821 }, { "epoch": 0.5812624243470715, "grad_norm": 0.989580512046814, "learning_rate": 1.1805177472477337e-05, "loss": 0.0598, "step": 32822 }, { "epoch": 0.5812801338840999, "grad_norm": 0.7322189211845398, "learning_rate": 1.1804336854390685e-05, "loss": 0.0737, "step": 32823 }, { "epoch": 0.5812978434211283, "grad_norm": 1.2883752584457397, "learning_rate": 1.1803496246817444e-05, "loss": 0.058, "step": 32824 }, { "epoch": 0.5813155529581568, "grad_norm": 0.7989099621772766, "learning_rate": 1.180265564976038e-05, "loss": 0.115, "step": 32825 }, { "epoch": 0.5813332624951852, "grad_norm": 0.6980875730514526, "learning_rate": 1.1801815063222252e-05, "loss": 0.0796, "step": 32826 }, { "epoch": 0.5813509720322136, "grad_norm": 0.46928679943084717, "learning_rate": 1.1800974487205828e-05, "loss": 0.0727, "step": 32827 }, { "epoch": 0.581368681569242, "grad_norm": 0.41056787967681885, "learning_rate": 1.1800133921713875e-05, "loss": 0.0699, "step": 32828 }, { "epoch": 0.5813863911062706, "grad_norm": 0.5522953271865845, "learning_rate": 1.179929336674916e-05, "loss": 0.0602, "step": 32829 }, { "epoch": 0.581404100643299, "grad_norm": 0.607982873916626, "learning_rate": 1.1798452822314445e-05, "loss": 0.054, "step": 32830 }, { "epoch": 0.5814218101803273, "grad_norm": 0.6687539219856262, "learning_rate": 1.1797612288412494e-05, "loss": 0.0769, "step": 32831 }, { "epoch": 0.5814395197173557, "grad_norm": 0.6304432153701782, "learning_rate": 1.1796771765046083e-05, "loss": 0.0762, "step": 32832 }, { "epoch": 0.5814572292543843, "grad_norm": 0.6268848180770874, "learning_rate": 1.1795931252217958e-05, "loss": 0.0408, "step": 32833 }, { "epoch": 0.5814749387914127, "grad_norm": 0.36676090955734253, "learning_rate": 1.1795090749930899e-05, "loss": 0.0581, "step": 32834 }, { "epoch": 0.581492648328441, "grad_norm": 0.5896649360656738, "learning_rate": 1.1794250258187667e-05, "loss": 0.0638, "step": 32835 }, { "epoch": 0.5815103578654695, "grad_norm": 0.890378475189209, "learning_rate": 1.1793409776991035e-05, "loss": 0.0713, "step": 32836 }, { "epoch": 0.581528067402498, "grad_norm": 0.3738686740398407, "learning_rate": 1.1792569306343753e-05, "loss": 0.0349, "step": 32837 }, { "epoch": 0.5815457769395264, "grad_norm": 0.5866613984107971, "learning_rate": 1.1791728846248594e-05, "loss": 0.085, "step": 32838 }, { "epoch": 0.5815634864765548, "grad_norm": 0.6934369206428528, "learning_rate": 1.1790888396708324e-05, "loss": 0.0614, "step": 32839 }, { "epoch": 0.5815811960135833, "grad_norm": 0.4807985723018646, "learning_rate": 1.1790047957725704e-05, "loss": 0.0592, "step": 32840 }, { "epoch": 0.5815989055506117, "grad_norm": 0.48140788078308105, "learning_rate": 1.1789207529303502e-05, "loss": 0.0386, "step": 32841 }, { "epoch": 0.5816166150876401, "grad_norm": 0.6283431649208069, "learning_rate": 1.1788367111444484e-05, "loss": 0.065, "step": 32842 }, { "epoch": 0.5816343246246685, "grad_norm": 0.497035413980484, "learning_rate": 1.1787526704151416e-05, "loss": 0.067, "step": 32843 }, { "epoch": 0.581652034161697, "grad_norm": 0.5906275510787964, "learning_rate": 1.1786686307427058e-05, "loss": 0.0615, "step": 32844 }, { "epoch": 0.5816697436987254, "grad_norm": 0.677580714225769, "learning_rate": 1.1785845921274173e-05, "loss": 0.0548, "step": 32845 }, { "epoch": 0.5816874532357538, "grad_norm": 0.47271984815597534, "learning_rate": 1.1785005545695538e-05, "loss": 0.0597, "step": 32846 }, { "epoch": 0.5817051627727822, "grad_norm": 0.5954025983810425, "learning_rate": 1.1784165180693902e-05, "loss": 0.0383, "step": 32847 }, { "epoch": 0.5817228723098107, "grad_norm": 0.6242064237594604, "learning_rate": 1.178332482627204e-05, "loss": 0.0529, "step": 32848 }, { "epoch": 0.5817405818468391, "grad_norm": 0.5805709362030029, "learning_rate": 1.1782484482432712e-05, "loss": 0.0636, "step": 32849 }, { "epoch": 0.5817582913838675, "grad_norm": 0.669712245464325, "learning_rate": 1.1781644149178692e-05, "loss": 0.0556, "step": 32850 }, { "epoch": 0.5817760009208959, "grad_norm": 0.6800560355186462, "learning_rate": 1.1780803826512733e-05, "loss": 0.0663, "step": 32851 }, { "epoch": 0.5817937104579244, "grad_norm": 0.8306508660316467, "learning_rate": 1.1779963514437601e-05, "loss": 0.077, "step": 32852 }, { "epoch": 0.5818114199949528, "grad_norm": 0.25601041316986084, "learning_rate": 1.1779123212956063e-05, "loss": 0.0834, "step": 32853 }, { "epoch": 0.5818291295319812, "grad_norm": 0.6864594221115112, "learning_rate": 1.1778282922070889e-05, "loss": 0.0476, "step": 32854 }, { "epoch": 0.5818468390690097, "grad_norm": 0.3813558518886566, "learning_rate": 1.1777442641784835e-05, "loss": 0.0502, "step": 32855 }, { "epoch": 0.5818645486060381, "grad_norm": 0.41088059544563293, "learning_rate": 1.1776602372100668e-05, "loss": 0.0547, "step": 32856 }, { "epoch": 0.5818822581430665, "grad_norm": 0.6888284683227539, "learning_rate": 1.1775762113021157e-05, "loss": 0.091, "step": 32857 }, { "epoch": 0.5818999676800949, "grad_norm": 0.4126558005809784, "learning_rate": 1.1774921864549059e-05, "loss": 0.0679, "step": 32858 }, { "epoch": 0.5819176772171234, "grad_norm": 0.443450927734375, "learning_rate": 1.177408162668714e-05, "loss": 0.0943, "step": 32859 }, { "epoch": 0.5819353867541518, "grad_norm": 0.5611776113510132, "learning_rate": 1.1773241399438168e-05, "loss": 0.0568, "step": 32860 }, { "epoch": 0.5819530962911802, "grad_norm": 0.8905555009841919, "learning_rate": 1.1772401182804913e-05, "loss": 0.066, "step": 32861 }, { "epoch": 0.5819708058282086, "grad_norm": 0.45777463912963867, "learning_rate": 1.177156097679012e-05, "loss": 0.0729, "step": 32862 }, { "epoch": 0.5819885153652371, "grad_norm": 0.4410892724990845, "learning_rate": 1.1770720781396568e-05, "loss": 0.0573, "step": 32863 }, { "epoch": 0.5820062249022655, "grad_norm": 0.32610684633255005, "learning_rate": 1.1769880596627026e-05, "loss": 0.0597, "step": 32864 }, { "epoch": 0.5820239344392939, "grad_norm": 0.3026214838027954, "learning_rate": 1.1769040422484243e-05, "loss": 0.0341, "step": 32865 }, { "epoch": 0.5820416439763223, "grad_norm": 0.9390268325805664, "learning_rate": 1.176820025897099e-05, "loss": 0.0739, "step": 32866 }, { "epoch": 0.5820593535133508, "grad_norm": 0.47177502512931824, "learning_rate": 1.1767360106090032e-05, "loss": 0.0594, "step": 32867 }, { "epoch": 0.5820770630503792, "grad_norm": 0.43018582463264465, "learning_rate": 1.1766519963844134e-05, "loss": 0.0621, "step": 32868 }, { "epoch": 0.5820947725874076, "grad_norm": 0.685387372970581, "learning_rate": 1.1765679832236057e-05, "loss": 0.0923, "step": 32869 }, { "epoch": 0.5821124821244361, "grad_norm": 0.6097944974899292, "learning_rate": 1.1764839711268566e-05, "loss": 0.0663, "step": 32870 }, { "epoch": 0.5821301916614645, "grad_norm": 0.507583737373352, "learning_rate": 1.1763999600944426e-05, "loss": 0.0514, "step": 32871 }, { "epoch": 0.5821479011984929, "grad_norm": 0.305879682302475, "learning_rate": 1.1763159501266398e-05, "loss": 0.0422, "step": 32872 }, { "epoch": 0.5821656107355213, "grad_norm": 0.7293010950088501, "learning_rate": 1.1762319412237249e-05, "loss": 0.0812, "step": 32873 }, { "epoch": 0.5821833202725498, "grad_norm": 0.5933247804641724, "learning_rate": 1.1761479333859742e-05, "loss": 0.083, "step": 32874 }, { "epoch": 0.5822010298095782, "grad_norm": 0.8603260517120361, "learning_rate": 1.1760639266136643e-05, "loss": 0.0939, "step": 32875 }, { "epoch": 0.5822187393466066, "grad_norm": 0.45557984709739685, "learning_rate": 1.1759799209070707e-05, "loss": 0.0784, "step": 32876 }, { "epoch": 0.582236448883635, "grad_norm": 0.5790191888809204, "learning_rate": 1.1758959162664707e-05, "loss": 0.072, "step": 32877 }, { "epoch": 0.5822541584206635, "grad_norm": 0.5212382674217224, "learning_rate": 1.175811912692141e-05, "loss": 0.0615, "step": 32878 }, { "epoch": 0.5822718679576919, "grad_norm": 0.6583670377731323, "learning_rate": 1.1757279101843566e-05, "loss": 0.0666, "step": 32879 }, { "epoch": 0.5822895774947203, "grad_norm": 0.4622298777103424, "learning_rate": 1.1756439087433946e-05, "loss": 0.0826, "step": 32880 }, { "epoch": 0.5823072870317487, "grad_norm": 1.155364990234375, "learning_rate": 1.1755599083695312e-05, "loss": 0.0827, "step": 32881 }, { "epoch": 0.5823249965687772, "grad_norm": 0.6936962008476257, "learning_rate": 1.1754759090630434e-05, "loss": 0.0707, "step": 32882 }, { "epoch": 0.5823427061058056, "grad_norm": 0.44955164194107056, "learning_rate": 1.1753919108242066e-05, "loss": 0.0458, "step": 32883 }, { "epoch": 0.582360415642834, "grad_norm": 0.5883049368858337, "learning_rate": 1.1753079136532977e-05, "loss": 0.0548, "step": 32884 }, { "epoch": 0.5823781251798625, "grad_norm": 0.9800466299057007, "learning_rate": 1.175223917550593e-05, "loss": 0.0581, "step": 32885 }, { "epoch": 0.5823958347168909, "grad_norm": 0.4563066363334656, "learning_rate": 1.1751399225163688e-05, "loss": 0.0918, "step": 32886 }, { "epoch": 0.5824135442539193, "grad_norm": 0.9496461749076843, "learning_rate": 1.1750559285509011e-05, "loss": 0.094, "step": 32887 }, { "epoch": 0.5824312537909477, "grad_norm": 0.6616927981376648, "learning_rate": 1.1749719356544667e-05, "loss": 0.0665, "step": 32888 }, { "epoch": 0.5824489633279762, "grad_norm": 1.0088210105895996, "learning_rate": 1.1748879438273424e-05, "loss": 0.0614, "step": 32889 }, { "epoch": 0.5824666728650046, "grad_norm": 0.5938235521316528, "learning_rate": 1.174803953069803e-05, "loss": 0.0571, "step": 32890 }, { "epoch": 0.582484382402033, "grad_norm": 0.6645275354385376, "learning_rate": 1.1747199633821257e-05, "loss": 0.0562, "step": 32891 }, { "epoch": 0.5825020919390614, "grad_norm": 0.5474925637245178, "learning_rate": 1.1746359747645868e-05, "loss": 0.0509, "step": 32892 }, { "epoch": 0.58251980147609, "grad_norm": 0.3873334527015686, "learning_rate": 1.1745519872174635e-05, "loss": 0.0557, "step": 32893 }, { "epoch": 0.5825375110131183, "grad_norm": 0.4612705409526825, "learning_rate": 1.1744680007410307e-05, "loss": 0.0376, "step": 32894 }, { "epoch": 0.5825552205501467, "grad_norm": 0.9448632597923279, "learning_rate": 1.1743840153355651e-05, "loss": 0.0611, "step": 32895 }, { "epoch": 0.5825729300871751, "grad_norm": 0.3948754370212555, "learning_rate": 1.1743000310013434e-05, "loss": 0.0528, "step": 32896 }, { "epoch": 0.5825906396242037, "grad_norm": 0.43572020530700684, "learning_rate": 1.1742160477386415e-05, "loss": 0.054, "step": 32897 }, { "epoch": 0.582608349161232, "grad_norm": 0.5097911357879639, "learning_rate": 1.1741320655477358e-05, "loss": 0.0547, "step": 32898 }, { "epoch": 0.5826260586982605, "grad_norm": 0.7829708456993103, "learning_rate": 1.1740480844289025e-05, "loss": 0.0559, "step": 32899 }, { "epoch": 0.582643768235289, "grad_norm": 0.654086709022522, "learning_rate": 1.1739641043824186e-05, "loss": 0.0403, "step": 32900 }, { "epoch": 0.5826614777723174, "grad_norm": 1.0079896450042725, "learning_rate": 1.1738801254085594e-05, "loss": 0.0439, "step": 32901 }, { "epoch": 0.5826791873093458, "grad_norm": 0.38864371180534363, "learning_rate": 1.1737961475076016e-05, "loss": 0.0522, "step": 32902 }, { "epoch": 0.5826968968463742, "grad_norm": 0.5932408571243286, "learning_rate": 1.173712170679822e-05, "loss": 0.085, "step": 32903 }, { "epoch": 0.5827146063834027, "grad_norm": 0.5811014175415039, "learning_rate": 1.173628194925496e-05, "loss": 0.0799, "step": 32904 }, { "epoch": 0.5827323159204311, "grad_norm": 0.5173801183700562, "learning_rate": 1.1735442202448997e-05, "loss": 0.0782, "step": 32905 }, { "epoch": 0.5827500254574595, "grad_norm": 0.3125154376029968, "learning_rate": 1.1734602466383102e-05, "loss": 0.0426, "step": 32906 }, { "epoch": 0.5827677349944879, "grad_norm": 0.5064244270324707, "learning_rate": 1.1733762741060042e-05, "loss": 0.0474, "step": 32907 }, { "epoch": 0.5827854445315164, "grad_norm": 0.435224324464798, "learning_rate": 1.1732923026482565e-05, "loss": 0.059, "step": 32908 }, { "epoch": 0.5828031540685448, "grad_norm": 0.7291637063026428, "learning_rate": 1.1732083322653441e-05, "loss": 0.0569, "step": 32909 }, { "epoch": 0.5828208636055732, "grad_norm": 0.88262540102005, "learning_rate": 1.1731243629575435e-05, "loss": 0.0739, "step": 32910 }, { "epoch": 0.5828385731426016, "grad_norm": 1.0156508684158325, "learning_rate": 1.1730403947251306e-05, "loss": 0.0652, "step": 32911 }, { "epoch": 0.5828562826796301, "grad_norm": 0.29996663331985474, "learning_rate": 1.1729564275683815e-05, "loss": 0.0455, "step": 32912 }, { "epoch": 0.5828739922166585, "grad_norm": 0.33999794721603394, "learning_rate": 1.1728724614875727e-05, "loss": 0.0713, "step": 32913 }, { "epoch": 0.5828917017536869, "grad_norm": 0.6789402961730957, "learning_rate": 1.1727884964829809e-05, "loss": 0.0636, "step": 32914 }, { "epoch": 0.5829094112907154, "grad_norm": 0.50299471616745, "learning_rate": 1.1727045325548814e-05, "loss": 0.0815, "step": 32915 }, { "epoch": 0.5829271208277438, "grad_norm": 0.835554301738739, "learning_rate": 1.172620569703551e-05, "loss": 0.0753, "step": 32916 }, { "epoch": 0.5829448303647722, "grad_norm": 1.2323358058929443, "learning_rate": 1.1725366079292662e-05, "loss": 0.0882, "step": 32917 }, { "epoch": 0.5829625399018006, "grad_norm": 0.7442025542259216, "learning_rate": 1.1724526472323024e-05, "loss": 0.0657, "step": 32918 }, { "epoch": 0.5829802494388291, "grad_norm": 0.5424299240112305, "learning_rate": 1.172368687612936e-05, "loss": 0.0715, "step": 32919 }, { "epoch": 0.5829979589758575, "grad_norm": 0.8629547357559204, "learning_rate": 1.1722847290714434e-05, "loss": 0.0447, "step": 32920 }, { "epoch": 0.5830156685128859, "grad_norm": 0.445665568113327, "learning_rate": 1.172200771608102e-05, "loss": 0.0579, "step": 32921 }, { "epoch": 0.5830333780499143, "grad_norm": 0.6919355988502502, "learning_rate": 1.1721168152231861e-05, "loss": 0.0422, "step": 32922 }, { "epoch": 0.5830510875869428, "grad_norm": 0.6291745901107788, "learning_rate": 1.1720328599169726e-05, "loss": 0.0777, "step": 32923 }, { "epoch": 0.5830687971239712, "grad_norm": 0.8914390206336975, "learning_rate": 1.171948905689738e-05, "loss": 0.0769, "step": 32924 }, { "epoch": 0.5830865066609996, "grad_norm": 0.46996256709098816, "learning_rate": 1.1718649525417585e-05, "loss": 0.0788, "step": 32925 }, { "epoch": 0.583104216198028, "grad_norm": 0.7078665494918823, "learning_rate": 1.1717810004733097e-05, "loss": 0.0971, "step": 32926 }, { "epoch": 0.5831219257350565, "grad_norm": 0.4989977180957794, "learning_rate": 1.1716970494846685e-05, "loss": 0.0492, "step": 32927 }, { "epoch": 0.5831396352720849, "grad_norm": 0.7349549531936646, "learning_rate": 1.171613099576111e-05, "loss": 0.0809, "step": 32928 }, { "epoch": 0.5831573448091133, "grad_norm": 0.6221161484718323, "learning_rate": 1.1715291507479128e-05, "loss": 0.0638, "step": 32929 }, { "epoch": 0.5831750543461418, "grad_norm": 0.3344614505767822, "learning_rate": 1.1714452030003506e-05, "loss": 0.0395, "step": 32930 }, { "epoch": 0.5831927638831702, "grad_norm": 0.5827162861824036, "learning_rate": 1.1713612563337004e-05, "loss": 0.0617, "step": 32931 }, { "epoch": 0.5832104734201986, "grad_norm": 0.49063214659690857, "learning_rate": 1.171277310748239e-05, "loss": 0.0524, "step": 32932 }, { "epoch": 0.583228182957227, "grad_norm": 0.40758633613586426, "learning_rate": 1.1711933662442414e-05, "loss": 0.0482, "step": 32933 }, { "epoch": 0.5832458924942555, "grad_norm": 0.39216575026512146, "learning_rate": 1.171109422821984e-05, "loss": 0.0594, "step": 32934 }, { "epoch": 0.5832636020312839, "grad_norm": 0.4975905418395996, "learning_rate": 1.1710254804817443e-05, "loss": 0.0707, "step": 32935 }, { "epoch": 0.5832813115683123, "grad_norm": 0.8306305408477783, "learning_rate": 1.170941539223797e-05, "loss": 0.1024, "step": 32936 }, { "epoch": 0.5832990211053407, "grad_norm": 0.7748554348945618, "learning_rate": 1.1708575990484187e-05, "loss": 0.0841, "step": 32937 }, { "epoch": 0.5833167306423692, "grad_norm": 1.0658485889434814, "learning_rate": 1.1707736599558855e-05, "loss": 0.08, "step": 32938 }, { "epoch": 0.5833344401793976, "grad_norm": 0.5343115329742432, "learning_rate": 1.170689721946474e-05, "loss": 0.045, "step": 32939 }, { "epoch": 0.583352149716426, "grad_norm": 0.7249041795730591, "learning_rate": 1.1706057850204596e-05, "loss": 0.0724, "step": 32940 }, { "epoch": 0.5833698592534544, "grad_norm": 0.5419098734855652, "learning_rate": 1.1705218491781192e-05, "loss": 0.052, "step": 32941 }, { "epoch": 0.5833875687904829, "grad_norm": 0.613612174987793, "learning_rate": 1.1704379144197287e-05, "loss": 0.0532, "step": 32942 }, { "epoch": 0.5834052783275113, "grad_norm": 0.30361151695251465, "learning_rate": 1.1703539807455637e-05, "loss": 0.045, "step": 32943 }, { "epoch": 0.5834229878645397, "grad_norm": 0.7063406109809875, "learning_rate": 1.1702700481559009e-05, "loss": 0.0518, "step": 32944 }, { "epoch": 0.5834406974015682, "grad_norm": 0.7423917651176453, "learning_rate": 1.1701861166510163e-05, "loss": 0.0748, "step": 32945 }, { "epoch": 0.5834584069385966, "grad_norm": 0.49302881956100464, "learning_rate": 1.1701021862311864e-05, "loss": 0.0579, "step": 32946 }, { "epoch": 0.583476116475625, "grad_norm": 0.47964632511138916, "learning_rate": 1.1700182568966867e-05, "loss": 0.0629, "step": 32947 }, { "epoch": 0.5834938260126534, "grad_norm": 0.3290080428123474, "learning_rate": 1.1699343286477933e-05, "loss": 0.0626, "step": 32948 }, { "epoch": 0.5835115355496819, "grad_norm": 0.5205357670783997, "learning_rate": 1.169850401484783e-05, "loss": 0.0656, "step": 32949 }, { "epoch": 0.5835292450867103, "grad_norm": 0.49498045444488525, "learning_rate": 1.1697664754079312e-05, "loss": 0.0544, "step": 32950 }, { "epoch": 0.5835469546237387, "grad_norm": 1.1401058435440063, "learning_rate": 1.1696825504175142e-05, "loss": 0.1208, "step": 32951 }, { "epoch": 0.5835646641607671, "grad_norm": 0.48230648040771484, "learning_rate": 1.1695986265138082e-05, "loss": 0.0431, "step": 32952 }, { "epoch": 0.5835823736977956, "grad_norm": 0.554680347442627, "learning_rate": 1.1695147036970897e-05, "loss": 0.0933, "step": 32953 }, { "epoch": 0.583600083234824, "grad_norm": 0.8261892199516296, "learning_rate": 1.169430781967634e-05, "loss": 0.0901, "step": 32954 }, { "epoch": 0.5836177927718524, "grad_norm": 0.22958838939666748, "learning_rate": 1.1693468613257176e-05, "loss": 0.0638, "step": 32955 }, { "epoch": 0.5836355023088808, "grad_norm": 0.4476754069328308, "learning_rate": 1.1692629417716165e-05, "loss": 0.0485, "step": 32956 }, { "epoch": 0.5836532118459093, "grad_norm": 0.7088520526885986, "learning_rate": 1.1691790233056074e-05, "loss": 0.0785, "step": 32957 }, { "epoch": 0.5836709213829377, "grad_norm": 0.519978404045105, "learning_rate": 1.1690951059279653e-05, "loss": 0.1149, "step": 32958 }, { "epoch": 0.5836886309199661, "grad_norm": 0.5583704710006714, "learning_rate": 1.169011189638967e-05, "loss": 0.0468, "step": 32959 }, { "epoch": 0.5837063404569947, "grad_norm": 0.7187706232070923, "learning_rate": 1.168927274438889e-05, "loss": 0.0627, "step": 32960 }, { "epoch": 0.583724049994023, "grad_norm": 0.5881648659706116, "learning_rate": 1.1688433603280062e-05, "loss": 0.0751, "step": 32961 }, { "epoch": 0.5837417595310515, "grad_norm": 0.5213879942893982, "learning_rate": 1.1687594473065953e-05, "loss": 0.0766, "step": 32962 }, { "epoch": 0.5837594690680799, "grad_norm": 0.41734862327575684, "learning_rate": 1.1686755353749317e-05, "loss": 0.0713, "step": 32963 }, { "epoch": 0.5837771786051084, "grad_norm": 0.5344389081001282, "learning_rate": 1.1685916245332934e-05, "loss": 0.0619, "step": 32964 }, { "epoch": 0.5837948881421368, "grad_norm": 0.48476076126098633, "learning_rate": 1.1685077147819541e-05, "loss": 0.0761, "step": 32965 }, { "epoch": 0.5838125976791652, "grad_norm": 0.8361369371414185, "learning_rate": 1.1684238061211913e-05, "loss": 0.0696, "step": 32966 }, { "epoch": 0.5838303072161936, "grad_norm": 0.536257803440094, "learning_rate": 1.1683398985512807e-05, "loss": 0.0472, "step": 32967 }, { "epoch": 0.5838480167532221, "grad_norm": 0.6692375540733337, "learning_rate": 1.1682559920724981e-05, "loss": 0.0562, "step": 32968 }, { "epoch": 0.5838657262902505, "grad_norm": 0.8733815550804138, "learning_rate": 1.1681720866851196e-05, "loss": 0.0515, "step": 32969 }, { "epoch": 0.5838834358272789, "grad_norm": 0.6762328743934631, "learning_rate": 1.1680881823894214e-05, "loss": 0.0648, "step": 32970 }, { "epoch": 0.5839011453643073, "grad_norm": 0.5301904082298279, "learning_rate": 1.1680042791856799e-05, "loss": 0.0803, "step": 32971 }, { "epoch": 0.5839188549013358, "grad_norm": 1.0595958232879639, "learning_rate": 1.1679203770741704e-05, "loss": 0.075, "step": 32972 }, { "epoch": 0.5839365644383642, "grad_norm": 0.5261561274528503, "learning_rate": 1.1678364760551693e-05, "loss": 0.0756, "step": 32973 }, { "epoch": 0.5839542739753926, "grad_norm": 0.7181488871574402, "learning_rate": 1.1677525761289531e-05, "loss": 0.0744, "step": 32974 }, { "epoch": 0.5839719835124211, "grad_norm": 0.5535686612129211, "learning_rate": 1.1676686772957969e-05, "loss": 0.0457, "step": 32975 }, { "epoch": 0.5839896930494495, "grad_norm": 0.8006289601325989, "learning_rate": 1.1675847795559771e-05, "loss": 0.0955, "step": 32976 }, { "epoch": 0.5840074025864779, "grad_norm": 0.43234702944755554, "learning_rate": 1.1675008829097697e-05, "loss": 0.096, "step": 32977 }, { "epoch": 0.5840251121235063, "grad_norm": 0.4613129198551178, "learning_rate": 1.167416987357451e-05, "loss": 0.061, "step": 32978 }, { "epoch": 0.5840428216605348, "grad_norm": 0.8989536762237549, "learning_rate": 1.1673330928992966e-05, "loss": 0.0834, "step": 32979 }, { "epoch": 0.5840605311975632, "grad_norm": 0.8559523820877075, "learning_rate": 1.1672491995355825e-05, "loss": 0.0648, "step": 32980 }, { "epoch": 0.5840782407345916, "grad_norm": 0.8345073461532593, "learning_rate": 1.1671653072665854e-05, "loss": 0.0782, "step": 32981 }, { "epoch": 0.58409595027162, "grad_norm": 0.7526320815086365, "learning_rate": 1.1670814160925803e-05, "loss": 0.1091, "step": 32982 }, { "epoch": 0.5841136598086485, "grad_norm": 0.3859693109989166, "learning_rate": 1.1669975260138436e-05, "loss": 0.063, "step": 32983 }, { "epoch": 0.5841313693456769, "grad_norm": 0.4652079939842224, "learning_rate": 1.1669136370306514e-05, "loss": 0.0898, "step": 32984 }, { "epoch": 0.5841490788827053, "grad_norm": 0.3571585416793823, "learning_rate": 1.1668297491432805e-05, "loss": 0.0417, "step": 32985 }, { "epoch": 0.5841667884197337, "grad_norm": 0.6116554737091064, "learning_rate": 1.166745862352005e-05, "loss": 0.0862, "step": 32986 }, { "epoch": 0.5841844979567622, "grad_norm": 0.6006741523742676, "learning_rate": 1.166661976657102e-05, "loss": 0.0826, "step": 32987 }, { "epoch": 0.5842022074937906, "grad_norm": 0.5826758742332458, "learning_rate": 1.1665780920588482e-05, "loss": 0.0863, "step": 32988 }, { "epoch": 0.584219917030819, "grad_norm": 0.5949636101722717, "learning_rate": 1.1664942085575182e-05, "loss": 0.0901, "step": 32989 }, { "epoch": 0.5842376265678475, "grad_norm": 0.5359089374542236, "learning_rate": 1.1664103261533883e-05, "loss": 0.0503, "step": 32990 }, { "epoch": 0.5842553361048759, "grad_norm": 0.8222032189369202, "learning_rate": 1.1663264448467349e-05, "loss": 0.0909, "step": 32991 }, { "epoch": 0.5842730456419043, "grad_norm": 0.9474252462387085, "learning_rate": 1.1662425646378339e-05, "loss": 0.1134, "step": 32992 }, { "epoch": 0.5842907551789327, "grad_norm": 0.45578378438949585, "learning_rate": 1.1661586855269608e-05, "loss": 0.0534, "step": 32993 }, { "epoch": 0.5843084647159612, "grad_norm": 0.28208011388778687, "learning_rate": 1.166074807514392e-05, "loss": 0.0333, "step": 32994 }, { "epoch": 0.5843261742529896, "grad_norm": 0.4834757149219513, "learning_rate": 1.1659909306004031e-05, "loss": 0.0463, "step": 32995 }, { "epoch": 0.584343883790018, "grad_norm": 0.552416980266571, "learning_rate": 1.1659070547852709e-05, "loss": 0.0754, "step": 32996 }, { "epoch": 0.5843615933270464, "grad_norm": 0.633786141872406, "learning_rate": 1.1658231800692699e-05, "loss": 0.0836, "step": 32997 }, { "epoch": 0.5843793028640749, "grad_norm": 0.8546868562698364, "learning_rate": 1.1657393064526772e-05, "loss": 0.0654, "step": 32998 }, { "epoch": 0.5843970124011033, "grad_norm": 0.3875362277030945, "learning_rate": 1.165655433935769e-05, "loss": 0.0553, "step": 32999 }, { "epoch": 0.5844147219381317, "grad_norm": 0.4187523126602173, "learning_rate": 1.1655715625188195e-05, "loss": 0.0482, "step": 33000 }, { "epoch": 0.5844324314751601, "grad_norm": 0.5153915286064148, "learning_rate": 1.165487692202106e-05, "loss": 0.0638, "step": 33001 }, { "epoch": 0.5844501410121886, "grad_norm": 0.38956448435783386, "learning_rate": 1.1654038229859044e-05, "loss": 0.0607, "step": 33002 }, { "epoch": 0.584467850549217, "grad_norm": 0.6672894358634949, "learning_rate": 1.1653199548704909e-05, "loss": 0.0783, "step": 33003 }, { "epoch": 0.5844855600862454, "grad_norm": 0.49903127551078796, "learning_rate": 1.1652360878561402e-05, "loss": 0.0587, "step": 33004 }, { "epoch": 0.5845032696232739, "grad_norm": 1.437272548675537, "learning_rate": 1.165152221943129e-05, "loss": 0.0784, "step": 33005 }, { "epoch": 0.5845209791603023, "grad_norm": 0.7567058801651001, "learning_rate": 1.1650683571317333e-05, "loss": 0.0748, "step": 33006 }, { "epoch": 0.5845386886973307, "grad_norm": 0.6882102489471436, "learning_rate": 1.1649844934222287e-05, "loss": 0.0731, "step": 33007 }, { "epoch": 0.5845563982343591, "grad_norm": 0.5434873104095459, "learning_rate": 1.1649006308148912e-05, "loss": 0.0679, "step": 33008 }, { "epoch": 0.5845741077713876, "grad_norm": 0.5655055642127991, "learning_rate": 1.1648167693099965e-05, "loss": 0.0573, "step": 33009 }, { "epoch": 0.584591817308416, "grad_norm": 0.626407265663147, "learning_rate": 1.1647329089078214e-05, "loss": 0.0704, "step": 33010 }, { "epoch": 0.5846095268454444, "grad_norm": 0.590104341506958, "learning_rate": 1.1646490496086406e-05, "loss": 0.0917, "step": 33011 }, { "epoch": 0.5846272363824728, "grad_norm": 0.357546865940094, "learning_rate": 1.1645651914127305e-05, "loss": 0.0612, "step": 33012 }, { "epoch": 0.5846449459195013, "grad_norm": 0.6747215390205383, "learning_rate": 1.1644813343203677e-05, "loss": 0.0518, "step": 33013 }, { "epoch": 0.5846626554565297, "grad_norm": 0.6461443901062012, "learning_rate": 1.1643974783318268e-05, "loss": 0.0866, "step": 33014 }, { "epoch": 0.5846803649935581, "grad_norm": 1.0462647676467896, "learning_rate": 1.1643136234473838e-05, "loss": 0.0602, "step": 33015 }, { "epoch": 0.5846980745305865, "grad_norm": 0.516143262386322, "learning_rate": 1.1642297696673153e-05, "loss": 0.0454, "step": 33016 }, { "epoch": 0.584715784067615, "grad_norm": 1.014906883239746, "learning_rate": 1.1641459169918976e-05, "loss": 0.0693, "step": 33017 }, { "epoch": 0.5847334936046434, "grad_norm": 0.6045883297920227, "learning_rate": 1.1640620654214052e-05, "loss": 0.0924, "step": 33018 }, { "epoch": 0.5847512031416718, "grad_norm": 0.6360769271850586, "learning_rate": 1.1639782149561147e-05, "loss": 0.0505, "step": 33019 }, { "epoch": 0.5847689126787003, "grad_norm": 0.8682029843330383, "learning_rate": 1.1638943655963022e-05, "loss": 0.0688, "step": 33020 }, { "epoch": 0.5847866222157287, "grad_norm": 0.5241153240203857, "learning_rate": 1.1638105173422428e-05, "loss": 0.0429, "step": 33021 }, { "epoch": 0.5848043317527571, "grad_norm": 0.714903712272644, "learning_rate": 1.163726670194213e-05, "loss": 0.0515, "step": 33022 }, { "epoch": 0.5848220412897855, "grad_norm": 0.32916659116744995, "learning_rate": 1.1636428241524883e-05, "loss": 0.0496, "step": 33023 }, { "epoch": 0.584839750826814, "grad_norm": 3.5321686267852783, "learning_rate": 1.1635589792173451e-05, "loss": 0.0703, "step": 33024 }, { "epoch": 0.5848574603638425, "grad_norm": 0.420761376619339, "learning_rate": 1.1634751353890584e-05, "loss": 0.0522, "step": 33025 }, { "epoch": 0.5848751699008709, "grad_norm": 1.2645312547683716, "learning_rate": 1.1633912926679044e-05, "loss": 0.0827, "step": 33026 }, { "epoch": 0.5848928794378992, "grad_norm": 0.44219672679901123, "learning_rate": 1.1633074510541592e-05, "loss": 0.0468, "step": 33027 }, { "epoch": 0.5849105889749278, "grad_norm": 0.6270891427993774, "learning_rate": 1.1632236105480991e-05, "loss": 0.0718, "step": 33028 }, { "epoch": 0.5849282985119562, "grad_norm": 0.754148006439209, "learning_rate": 1.1631397711499982e-05, "loss": 0.0726, "step": 33029 }, { "epoch": 0.5849460080489846, "grad_norm": 0.6397913694381714, "learning_rate": 1.1630559328601337e-05, "loss": 0.0875, "step": 33030 }, { "epoch": 0.584963717586013, "grad_norm": 0.5812313556671143, "learning_rate": 1.1629720956787817e-05, "loss": 0.1035, "step": 33031 }, { "epoch": 0.5849814271230415, "grad_norm": 0.7585234642028809, "learning_rate": 1.1628882596062168e-05, "loss": 0.0904, "step": 33032 }, { "epoch": 0.5849991366600699, "grad_norm": 0.5132103562355042, "learning_rate": 1.1628044246427155e-05, "loss": 0.0463, "step": 33033 }, { "epoch": 0.5850168461970983, "grad_norm": 0.49741318821907043, "learning_rate": 1.1627205907885534e-05, "loss": 0.0882, "step": 33034 }, { "epoch": 0.5850345557341268, "grad_norm": 0.6906387805938721, "learning_rate": 1.1626367580440068e-05, "loss": 0.0472, "step": 33035 }, { "epoch": 0.5850522652711552, "grad_norm": 0.5795187950134277, "learning_rate": 1.162552926409351e-05, "loss": 0.0855, "step": 33036 }, { "epoch": 0.5850699748081836, "grad_norm": 0.8244909644126892, "learning_rate": 1.1624690958848616e-05, "loss": 0.0711, "step": 33037 }, { "epoch": 0.585087684345212, "grad_norm": 0.47587165236473083, "learning_rate": 1.1623852664708153e-05, "loss": 0.0579, "step": 33038 }, { "epoch": 0.5851053938822405, "grad_norm": 0.34835922718048096, "learning_rate": 1.1623014381674871e-05, "loss": 0.0556, "step": 33039 }, { "epoch": 0.5851231034192689, "grad_norm": 0.4930729269981384, "learning_rate": 1.1622176109751527e-05, "loss": 0.0624, "step": 33040 }, { "epoch": 0.5851408129562973, "grad_norm": 0.43447205424308777, "learning_rate": 1.1621337848940885e-05, "loss": 0.0534, "step": 33041 }, { "epoch": 0.5851585224933257, "grad_norm": 0.5295190811157227, "learning_rate": 1.1620499599245704e-05, "loss": 0.057, "step": 33042 }, { "epoch": 0.5851762320303542, "grad_norm": 0.6088729500770569, "learning_rate": 1.1619661360668733e-05, "loss": 0.0459, "step": 33043 }, { "epoch": 0.5851939415673826, "grad_norm": 0.5292271971702576, "learning_rate": 1.1618823133212729e-05, "loss": 0.053, "step": 33044 }, { "epoch": 0.585211651104411, "grad_norm": 0.664047122001648, "learning_rate": 1.1617984916880466e-05, "loss": 0.0758, "step": 33045 }, { "epoch": 0.5852293606414394, "grad_norm": 0.7583303451538086, "learning_rate": 1.1617146711674682e-05, "loss": 0.0828, "step": 33046 }, { "epoch": 0.5852470701784679, "grad_norm": 0.834442675113678, "learning_rate": 1.1616308517598145e-05, "loss": 0.0784, "step": 33047 }, { "epoch": 0.5852647797154963, "grad_norm": 1.6004599332809448, "learning_rate": 1.161547033465361e-05, "loss": 0.1142, "step": 33048 }, { "epoch": 0.5852824892525247, "grad_norm": 0.7439262270927429, "learning_rate": 1.161463216284384e-05, "loss": 0.07, "step": 33049 }, { "epoch": 0.5853001987895532, "grad_norm": 0.5823980569839478, "learning_rate": 1.1613794002171585e-05, "loss": 0.0588, "step": 33050 }, { "epoch": 0.5853179083265816, "grad_norm": 0.6525631546974182, "learning_rate": 1.1612955852639603e-05, "loss": 0.0601, "step": 33051 }, { "epoch": 0.58533561786361, "grad_norm": 0.4390718936920166, "learning_rate": 1.1612117714250658e-05, "loss": 0.0474, "step": 33052 }, { "epoch": 0.5853533274006384, "grad_norm": 0.6154481768608093, "learning_rate": 1.16112795870075e-05, "loss": 0.0489, "step": 33053 }, { "epoch": 0.5853710369376669, "grad_norm": 0.4855746030807495, "learning_rate": 1.1610441470912887e-05, "loss": 0.061, "step": 33054 }, { "epoch": 0.5853887464746953, "grad_norm": 0.6327833533287048, "learning_rate": 1.1609603365969582e-05, "loss": 0.0541, "step": 33055 }, { "epoch": 0.5854064560117237, "grad_norm": 0.5436839461326599, "learning_rate": 1.1608765272180343e-05, "loss": 0.0679, "step": 33056 }, { "epoch": 0.5854241655487521, "grad_norm": 0.4678030014038086, "learning_rate": 1.1607927189547918e-05, "loss": 0.0915, "step": 33057 }, { "epoch": 0.5854418750857806, "grad_norm": 0.9241122007369995, "learning_rate": 1.1607089118075067e-05, "loss": 0.1136, "step": 33058 }, { "epoch": 0.585459584622809, "grad_norm": 0.7357034087181091, "learning_rate": 1.1606251057764559e-05, "loss": 0.0788, "step": 33059 }, { "epoch": 0.5854772941598374, "grad_norm": 0.5009727478027344, "learning_rate": 1.1605413008619134e-05, "loss": 0.0689, "step": 33060 }, { "epoch": 0.5854950036968658, "grad_norm": 0.3195136487483978, "learning_rate": 1.1604574970641557e-05, "loss": 0.0318, "step": 33061 }, { "epoch": 0.5855127132338943, "grad_norm": 0.754729688167572, "learning_rate": 1.1603736943834585e-05, "loss": 0.0748, "step": 33062 }, { "epoch": 0.5855304227709227, "grad_norm": 0.9506471157073975, "learning_rate": 1.1602898928200979e-05, "loss": 0.0623, "step": 33063 }, { "epoch": 0.5855481323079511, "grad_norm": 0.5523514151573181, "learning_rate": 1.160206092374349e-05, "loss": 0.0659, "step": 33064 }, { "epoch": 0.5855658418449796, "grad_norm": 0.6831876635551453, "learning_rate": 1.1601222930464874e-05, "loss": 0.0497, "step": 33065 }, { "epoch": 0.585583551382008, "grad_norm": 0.4889509081840515, "learning_rate": 1.1600384948367894e-05, "loss": 0.0532, "step": 33066 }, { "epoch": 0.5856012609190364, "grad_norm": 0.5247613787651062, "learning_rate": 1.1599546977455305e-05, "loss": 0.0618, "step": 33067 }, { "epoch": 0.5856189704560648, "grad_norm": 0.7331075668334961, "learning_rate": 1.1598709017729857e-05, "loss": 0.0796, "step": 33068 }, { "epoch": 0.5856366799930933, "grad_norm": 0.38698917627334595, "learning_rate": 1.1597871069194316e-05, "loss": 0.0656, "step": 33069 }, { "epoch": 0.5856543895301217, "grad_norm": 0.7695820331573486, "learning_rate": 1.159703313185144e-05, "loss": 0.0523, "step": 33070 }, { "epoch": 0.5856720990671501, "grad_norm": 0.5335825681686401, "learning_rate": 1.1596195205703974e-05, "loss": 0.0481, "step": 33071 }, { "epoch": 0.5856898086041785, "grad_norm": 0.9560989737510681, "learning_rate": 1.1595357290754682e-05, "loss": 0.0781, "step": 33072 }, { "epoch": 0.585707518141207, "grad_norm": 0.305122435092926, "learning_rate": 1.1594519387006317e-05, "loss": 0.0487, "step": 33073 }, { "epoch": 0.5857252276782354, "grad_norm": 0.6649547219276428, "learning_rate": 1.159368149446165e-05, "loss": 0.0749, "step": 33074 }, { "epoch": 0.5857429372152638, "grad_norm": 0.34233200550079346, "learning_rate": 1.1592843613123418e-05, "loss": 0.0766, "step": 33075 }, { "epoch": 0.5857606467522922, "grad_norm": 0.39049625396728516, "learning_rate": 1.1592005742994386e-05, "loss": 0.043, "step": 33076 }, { "epoch": 0.5857783562893207, "grad_norm": 0.7610260248184204, "learning_rate": 1.1591167884077313e-05, "loss": 0.0417, "step": 33077 }, { "epoch": 0.5857960658263491, "grad_norm": 0.8291277885437012, "learning_rate": 1.1590330036374951e-05, "loss": 0.0716, "step": 33078 }, { "epoch": 0.5858137753633775, "grad_norm": 0.7746063470840454, "learning_rate": 1.1589492199890056e-05, "loss": 0.0769, "step": 33079 }, { "epoch": 0.585831484900406, "grad_norm": 0.4339425563812256, "learning_rate": 1.1588654374625389e-05, "loss": 0.0658, "step": 33080 }, { "epoch": 0.5858491944374344, "grad_norm": 0.8148411512374878, "learning_rate": 1.1587816560583708e-05, "loss": 0.045, "step": 33081 }, { "epoch": 0.5858669039744628, "grad_norm": 0.8924185037612915, "learning_rate": 1.158697875776776e-05, "loss": 0.0853, "step": 33082 }, { "epoch": 0.5858846135114912, "grad_norm": 0.7115373611450195, "learning_rate": 1.1586140966180308e-05, "loss": 0.0738, "step": 33083 }, { "epoch": 0.5859023230485197, "grad_norm": 0.5723312497138977, "learning_rate": 1.1585303185824113e-05, "loss": 0.0939, "step": 33084 }, { "epoch": 0.5859200325855481, "grad_norm": 0.5804351568222046, "learning_rate": 1.1584465416701919e-05, "loss": 0.0648, "step": 33085 }, { "epoch": 0.5859377421225765, "grad_norm": 0.5305380821228027, "learning_rate": 1.1583627658816488e-05, "loss": 0.0728, "step": 33086 }, { "epoch": 0.5859554516596049, "grad_norm": 0.9517909288406372, "learning_rate": 1.1582789912170574e-05, "loss": 0.0834, "step": 33087 }, { "epoch": 0.5859731611966335, "grad_norm": 0.7654041051864624, "learning_rate": 1.1581952176766944e-05, "loss": 0.0565, "step": 33088 }, { "epoch": 0.5859908707336619, "grad_norm": 0.8456404805183411, "learning_rate": 1.158111445260834e-05, "loss": 0.0497, "step": 33089 }, { "epoch": 0.5860085802706902, "grad_norm": 0.5255805850028992, "learning_rate": 1.1580276739697521e-05, "loss": 0.0614, "step": 33090 }, { "epoch": 0.5860262898077186, "grad_norm": 0.921852171421051, "learning_rate": 1.1579439038037252e-05, "loss": 0.076, "step": 33091 }, { "epoch": 0.5860439993447472, "grad_norm": 0.5659182667732239, "learning_rate": 1.157860134763028e-05, "loss": 0.0671, "step": 33092 }, { "epoch": 0.5860617088817756, "grad_norm": 0.39559972286224365, "learning_rate": 1.157776366847936e-05, "loss": 0.0587, "step": 33093 }, { "epoch": 0.586079418418804, "grad_norm": 0.8013438582420349, "learning_rate": 1.1576926000587253e-05, "loss": 0.0593, "step": 33094 }, { "epoch": 0.5860971279558325, "grad_norm": 0.6859267354011536, "learning_rate": 1.1576088343956718e-05, "loss": 0.0713, "step": 33095 }, { "epoch": 0.5861148374928609, "grad_norm": 0.7435534596443176, "learning_rate": 1.1575250698590502e-05, "loss": 0.0631, "step": 33096 }, { "epoch": 0.5861325470298893, "grad_norm": 1.1633520126342773, "learning_rate": 1.1574413064491363e-05, "loss": 0.0887, "step": 33097 }, { "epoch": 0.5861502565669177, "grad_norm": 0.3478078842163086, "learning_rate": 1.1573575441662063e-05, "loss": 0.0482, "step": 33098 }, { "epoch": 0.5861679661039462, "grad_norm": 0.8086410164833069, "learning_rate": 1.1572737830105356e-05, "loss": 0.074, "step": 33099 }, { "epoch": 0.5861856756409746, "grad_norm": 0.6479393839836121, "learning_rate": 1.157190022982399e-05, "loss": 0.059, "step": 33100 }, { "epoch": 0.586203385178003, "grad_norm": 0.7062463164329529, "learning_rate": 1.1571062640820724e-05, "loss": 0.092, "step": 33101 }, { "epoch": 0.5862210947150314, "grad_norm": 0.3535383939743042, "learning_rate": 1.1570225063098318e-05, "loss": 0.0576, "step": 33102 }, { "epoch": 0.5862388042520599, "grad_norm": 0.43121233582496643, "learning_rate": 1.1569387496659523e-05, "loss": 0.0878, "step": 33103 }, { "epoch": 0.5862565137890883, "grad_norm": 0.466632604598999, "learning_rate": 1.1568549941507096e-05, "loss": 0.0792, "step": 33104 }, { "epoch": 0.5862742233261167, "grad_norm": 0.22364412248134613, "learning_rate": 1.1567712397643792e-05, "loss": 0.0578, "step": 33105 }, { "epoch": 0.5862919328631451, "grad_norm": 0.7781029343605042, "learning_rate": 1.156687486507237e-05, "loss": 0.0902, "step": 33106 }, { "epoch": 0.5863096424001736, "grad_norm": 0.36758551001548767, "learning_rate": 1.156603734379558e-05, "loss": 0.0685, "step": 33107 }, { "epoch": 0.586327351937202, "grad_norm": 0.55091792345047, "learning_rate": 1.156519983381618e-05, "loss": 0.0492, "step": 33108 }, { "epoch": 0.5863450614742304, "grad_norm": 0.7731207013130188, "learning_rate": 1.156436233513693e-05, "loss": 0.0744, "step": 33109 }, { "epoch": 0.5863627710112589, "grad_norm": 0.4987729787826538, "learning_rate": 1.1563524847760572e-05, "loss": 0.0541, "step": 33110 }, { "epoch": 0.5863804805482873, "grad_norm": 0.6031606197357178, "learning_rate": 1.1562687371689873e-05, "loss": 0.0514, "step": 33111 }, { "epoch": 0.5863981900853157, "grad_norm": 0.6037087440490723, "learning_rate": 1.1561849906927584e-05, "loss": 0.0694, "step": 33112 }, { "epoch": 0.5864158996223441, "grad_norm": 1.138772964477539, "learning_rate": 1.1561012453476469e-05, "loss": 0.0972, "step": 33113 }, { "epoch": 0.5864336091593726, "grad_norm": 0.6362206339836121, "learning_rate": 1.1560175011339269e-05, "loss": 0.086, "step": 33114 }, { "epoch": 0.586451318696401, "grad_norm": 0.8273888826370239, "learning_rate": 1.1559337580518744e-05, "loss": 0.0667, "step": 33115 }, { "epoch": 0.5864690282334294, "grad_norm": 0.8097265958786011, "learning_rate": 1.1558500161017653e-05, "loss": 0.0477, "step": 33116 }, { "epoch": 0.5864867377704578, "grad_norm": 0.750429093837738, "learning_rate": 1.1557662752838745e-05, "loss": 0.0637, "step": 33117 }, { "epoch": 0.5865044473074863, "grad_norm": 0.41617733240127563, "learning_rate": 1.155682535598478e-05, "loss": 0.0361, "step": 33118 }, { "epoch": 0.5865221568445147, "grad_norm": 0.35064712166786194, "learning_rate": 1.1555987970458511e-05, "loss": 0.0448, "step": 33119 }, { "epoch": 0.5865398663815431, "grad_norm": 0.8018144369125366, "learning_rate": 1.1555150596262696e-05, "loss": 0.0626, "step": 33120 }, { "epoch": 0.5865575759185715, "grad_norm": 0.937174916267395, "learning_rate": 1.1554313233400084e-05, "loss": 0.0995, "step": 33121 }, { "epoch": 0.5865752854556, "grad_norm": 0.6155047416687012, "learning_rate": 1.1553475881873432e-05, "loss": 0.0675, "step": 33122 }, { "epoch": 0.5865929949926284, "grad_norm": 0.42197078466415405, "learning_rate": 1.1552638541685504e-05, "loss": 0.0973, "step": 33123 }, { "epoch": 0.5866107045296568, "grad_norm": 0.7029350399971008, "learning_rate": 1.1551801212839037e-05, "loss": 0.0825, "step": 33124 }, { "epoch": 0.5866284140666853, "grad_norm": 0.629550576210022, "learning_rate": 1.1550963895336796e-05, "loss": 0.0539, "step": 33125 }, { "epoch": 0.5866461236037137, "grad_norm": 0.5129380226135254, "learning_rate": 1.1550126589181538e-05, "loss": 0.0597, "step": 33126 }, { "epoch": 0.5866638331407421, "grad_norm": 0.7381910681724548, "learning_rate": 1.1549289294376019e-05, "loss": 0.0857, "step": 33127 }, { "epoch": 0.5866815426777705, "grad_norm": 0.5725146532058716, "learning_rate": 1.1548452010922981e-05, "loss": 0.0808, "step": 33128 }, { "epoch": 0.586699252214799, "grad_norm": 0.4871784746646881, "learning_rate": 1.154761473882519e-05, "loss": 0.0747, "step": 33129 }, { "epoch": 0.5867169617518274, "grad_norm": 0.6140019297599792, "learning_rate": 1.1546777478085397e-05, "loss": 0.057, "step": 33130 }, { "epoch": 0.5867346712888558, "grad_norm": 0.6685435175895691, "learning_rate": 1.154594022870636e-05, "loss": 0.0379, "step": 33131 }, { "epoch": 0.5867523808258842, "grad_norm": 0.543178379535675, "learning_rate": 1.1545102990690826e-05, "loss": 0.0768, "step": 33132 }, { "epoch": 0.5867700903629127, "grad_norm": 0.8329647779464722, "learning_rate": 1.1544265764041553e-05, "loss": 0.0754, "step": 33133 }, { "epoch": 0.5867877998999411, "grad_norm": 0.8539512157440186, "learning_rate": 1.15434285487613e-05, "loss": 0.0676, "step": 33134 }, { "epoch": 0.5868055094369695, "grad_norm": 0.7149160504341125, "learning_rate": 1.1542591344852816e-05, "loss": 0.0732, "step": 33135 }, { "epoch": 0.5868232189739979, "grad_norm": 0.719447910785675, "learning_rate": 1.1541754152318854e-05, "loss": 0.0695, "step": 33136 }, { "epoch": 0.5868409285110264, "grad_norm": 0.49751585721969604, "learning_rate": 1.1540916971162174e-05, "loss": 0.0369, "step": 33137 }, { "epoch": 0.5868586380480548, "grad_norm": 0.507682740688324, "learning_rate": 1.1540079801385531e-05, "loss": 0.0526, "step": 33138 }, { "epoch": 0.5868763475850832, "grad_norm": 1.1129032373428345, "learning_rate": 1.1539242642991668e-05, "loss": 0.088, "step": 33139 }, { "epoch": 0.5868940571221117, "grad_norm": 0.5686136484146118, "learning_rate": 1.1538405495983349e-05, "loss": 0.0666, "step": 33140 }, { "epoch": 0.5869117666591401, "grad_norm": 0.8223111033439636, "learning_rate": 1.1537568360363332e-05, "loss": 0.0564, "step": 33141 }, { "epoch": 0.5869294761961685, "grad_norm": 0.5724604725837708, "learning_rate": 1.1536731236134358e-05, "loss": 0.0922, "step": 33142 }, { "epoch": 0.5869471857331969, "grad_norm": 0.6931641697883606, "learning_rate": 1.153589412329919e-05, "loss": 0.0545, "step": 33143 }, { "epoch": 0.5869648952702254, "grad_norm": 0.6314674019813538, "learning_rate": 1.1535057021860578e-05, "loss": 0.0745, "step": 33144 }, { "epoch": 0.5869826048072538, "grad_norm": 0.9650496244430542, "learning_rate": 1.1534219931821281e-05, "loss": 0.1243, "step": 33145 }, { "epoch": 0.5870003143442822, "grad_norm": 1.034014105796814, "learning_rate": 1.1533382853184047e-05, "loss": 0.0935, "step": 33146 }, { "epoch": 0.5870180238813106, "grad_norm": 0.517994225025177, "learning_rate": 1.1532545785951636e-05, "loss": 0.0561, "step": 33147 }, { "epoch": 0.5870357334183391, "grad_norm": 0.7648985981941223, "learning_rate": 1.1531708730126798e-05, "loss": 0.0653, "step": 33148 }, { "epoch": 0.5870534429553675, "grad_norm": 0.6828994154930115, "learning_rate": 1.1530871685712286e-05, "loss": 0.0694, "step": 33149 }, { "epoch": 0.5870711524923959, "grad_norm": 0.5600035190582275, "learning_rate": 1.1530034652710854e-05, "loss": 0.0715, "step": 33150 }, { "epoch": 0.5870888620294243, "grad_norm": 0.3883458077907562, "learning_rate": 1.1529197631125258e-05, "loss": 0.0412, "step": 33151 }, { "epoch": 0.5871065715664529, "grad_norm": 0.8147712349891663, "learning_rate": 1.1528360620958256e-05, "loss": 0.0838, "step": 33152 }, { "epoch": 0.5871242811034812, "grad_norm": 0.44332948327064514, "learning_rate": 1.1527523622212589e-05, "loss": 0.0791, "step": 33153 }, { "epoch": 0.5871419906405096, "grad_norm": 0.8285402655601501, "learning_rate": 1.1526686634891019e-05, "loss": 0.0771, "step": 33154 }, { "epoch": 0.5871597001775382, "grad_norm": 0.6049438118934631, "learning_rate": 1.1525849658996307e-05, "loss": 0.1251, "step": 33155 }, { "epoch": 0.5871774097145666, "grad_norm": 0.23282305896282196, "learning_rate": 1.1525012694531189e-05, "loss": 0.0617, "step": 33156 }, { "epoch": 0.587195119251595, "grad_norm": 0.6087000966072083, "learning_rate": 1.1524175741498431e-05, "loss": 0.0486, "step": 33157 }, { "epoch": 0.5872128287886234, "grad_norm": 0.7765446901321411, "learning_rate": 1.152333879990078e-05, "loss": 0.0515, "step": 33158 }, { "epoch": 0.5872305383256519, "grad_norm": 0.6780924201011658, "learning_rate": 1.1522501869740999e-05, "loss": 0.0815, "step": 33159 }, { "epoch": 0.5872482478626803, "grad_norm": 0.6613562703132629, "learning_rate": 1.152166495102183e-05, "loss": 0.0638, "step": 33160 }, { "epoch": 0.5872659573997087, "grad_norm": 0.6366844177246094, "learning_rate": 1.1520828043746033e-05, "loss": 0.0525, "step": 33161 }, { "epoch": 0.5872836669367371, "grad_norm": 0.8442631363868713, "learning_rate": 1.1519991147916361e-05, "loss": 0.0642, "step": 33162 }, { "epoch": 0.5873013764737656, "grad_norm": 0.572844922542572, "learning_rate": 1.1519154263535564e-05, "loss": 0.0623, "step": 33163 }, { "epoch": 0.587319086010794, "grad_norm": 0.7225865721702576, "learning_rate": 1.1518317390606397e-05, "loss": 0.0781, "step": 33164 }, { "epoch": 0.5873367955478224, "grad_norm": 0.7135761380195618, "learning_rate": 1.1517480529131616e-05, "loss": 0.0456, "step": 33165 }, { "epoch": 0.5873545050848508, "grad_norm": 0.720395565032959, "learning_rate": 1.1516643679113976e-05, "loss": 0.0627, "step": 33166 }, { "epoch": 0.5873722146218793, "grad_norm": 0.5258051753044128, "learning_rate": 1.1515806840556219e-05, "loss": 0.0513, "step": 33167 }, { "epoch": 0.5873899241589077, "grad_norm": 0.9566596746444702, "learning_rate": 1.1514970013461102e-05, "loss": 0.099, "step": 33168 }, { "epoch": 0.5874076336959361, "grad_norm": 1.10310959815979, "learning_rate": 1.1514133197831386e-05, "loss": 0.0894, "step": 33169 }, { "epoch": 0.5874253432329646, "grad_norm": 0.450651615858078, "learning_rate": 1.1513296393669825e-05, "loss": 0.0804, "step": 33170 }, { "epoch": 0.587443052769993, "grad_norm": 0.44275373220443726, "learning_rate": 1.151245960097916e-05, "loss": 0.0476, "step": 33171 }, { "epoch": 0.5874607623070214, "grad_norm": 0.3541094660758972, "learning_rate": 1.151162281976215e-05, "loss": 0.0722, "step": 33172 }, { "epoch": 0.5874784718440498, "grad_norm": 0.4841821491718292, "learning_rate": 1.1510786050021551e-05, "loss": 0.0697, "step": 33173 }, { "epoch": 0.5874961813810783, "grad_norm": 0.5884993076324463, "learning_rate": 1.150994929176011e-05, "loss": 0.0521, "step": 33174 }, { "epoch": 0.5875138909181067, "grad_norm": 0.7469477653503418, "learning_rate": 1.1509112544980584e-05, "loss": 0.0569, "step": 33175 }, { "epoch": 0.5875316004551351, "grad_norm": 0.8515844941139221, "learning_rate": 1.1508275809685723e-05, "loss": 0.0653, "step": 33176 }, { "epoch": 0.5875493099921635, "grad_norm": 0.960303008556366, "learning_rate": 1.1507439085878283e-05, "loss": 0.0916, "step": 33177 }, { "epoch": 0.587567019529192, "grad_norm": 0.4834894835948944, "learning_rate": 1.1506602373561013e-05, "loss": 0.0679, "step": 33178 }, { "epoch": 0.5875847290662204, "grad_norm": 0.5754654407501221, "learning_rate": 1.150576567273667e-05, "loss": 0.0559, "step": 33179 }, { "epoch": 0.5876024386032488, "grad_norm": 0.9327725768089294, "learning_rate": 1.150492898340801e-05, "loss": 0.0833, "step": 33180 }, { "epoch": 0.5876201481402772, "grad_norm": 0.5702259540557861, "learning_rate": 1.1504092305577773e-05, "loss": 0.04, "step": 33181 }, { "epoch": 0.5876378576773057, "grad_norm": 0.5321357846260071, "learning_rate": 1.1503255639248715e-05, "loss": 0.0797, "step": 33182 }, { "epoch": 0.5876555672143341, "grad_norm": 0.5866020321846008, "learning_rate": 1.1502418984423598e-05, "loss": 0.0856, "step": 33183 }, { "epoch": 0.5876732767513625, "grad_norm": 0.7936299443244934, "learning_rate": 1.1501582341105173e-05, "loss": 0.0735, "step": 33184 }, { "epoch": 0.587690986288391, "grad_norm": 0.6682873964309692, "learning_rate": 1.1500745709296182e-05, "loss": 0.0631, "step": 33185 }, { "epoch": 0.5877086958254194, "grad_norm": 0.42111802101135254, "learning_rate": 1.1499909088999386e-05, "loss": 0.0444, "step": 33186 }, { "epoch": 0.5877264053624478, "grad_norm": 0.358304500579834, "learning_rate": 1.1499072480217535e-05, "loss": 0.0464, "step": 33187 }, { "epoch": 0.5877441148994762, "grad_norm": 0.7718775868415833, "learning_rate": 1.149823588295338e-05, "loss": 0.0769, "step": 33188 }, { "epoch": 0.5877618244365047, "grad_norm": 0.6612054705619812, "learning_rate": 1.1497399297209675e-05, "loss": 0.0765, "step": 33189 }, { "epoch": 0.5877795339735331, "grad_norm": 0.831209659576416, "learning_rate": 1.149656272298917e-05, "loss": 0.0873, "step": 33190 }, { "epoch": 0.5877972435105615, "grad_norm": 0.7068250775337219, "learning_rate": 1.1495726160294624e-05, "loss": 0.0456, "step": 33191 }, { "epoch": 0.5878149530475899, "grad_norm": 0.5367916226387024, "learning_rate": 1.1494889609128783e-05, "loss": 0.0697, "step": 33192 }, { "epoch": 0.5878326625846184, "grad_norm": 0.6541424989700317, "learning_rate": 1.1494053069494398e-05, "loss": 0.0612, "step": 33193 }, { "epoch": 0.5878503721216468, "grad_norm": 0.6287773251533508, "learning_rate": 1.1493216541394231e-05, "loss": 0.0693, "step": 33194 }, { "epoch": 0.5878680816586752, "grad_norm": 0.681510329246521, "learning_rate": 1.149238002483102e-05, "loss": 0.0688, "step": 33195 }, { "epoch": 0.5878857911957036, "grad_norm": 0.5550193786621094, "learning_rate": 1.1491543519807524e-05, "loss": 0.0732, "step": 33196 }, { "epoch": 0.5879035007327321, "grad_norm": 0.34870171546936035, "learning_rate": 1.1490707026326495e-05, "loss": 0.0501, "step": 33197 }, { "epoch": 0.5879212102697605, "grad_norm": 0.3855975866317749, "learning_rate": 1.1489870544390692e-05, "loss": 0.0667, "step": 33198 }, { "epoch": 0.5879389198067889, "grad_norm": 0.5169981122016907, "learning_rate": 1.1489034074002852e-05, "loss": 0.0495, "step": 33199 }, { "epoch": 0.5879566293438174, "grad_norm": 0.6465765833854675, "learning_rate": 1.1488197615165735e-05, "loss": 0.0528, "step": 33200 }, { "epoch": 0.5879743388808458, "grad_norm": 0.5624822378158569, "learning_rate": 1.1487361167882096e-05, "loss": 0.0927, "step": 33201 }, { "epoch": 0.5879920484178742, "grad_norm": 0.5272746682167053, "learning_rate": 1.1486524732154683e-05, "loss": 0.0499, "step": 33202 }, { "epoch": 0.5880097579549026, "grad_norm": 0.7057080268859863, "learning_rate": 1.1485688307986246e-05, "loss": 0.0728, "step": 33203 }, { "epoch": 0.5880274674919311, "grad_norm": 0.5496365427970886, "learning_rate": 1.148485189537954e-05, "loss": 0.066, "step": 33204 }, { "epoch": 0.5880451770289595, "grad_norm": 0.4980126619338989, "learning_rate": 1.1484015494337318e-05, "loss": 0.0775, "step": 33205 }, { "epoch": 0.5880628865659879, "grad_norm": 0.722572386264801, "learning_rate": 1.1483179104862328e-05, "loss": 0.065, "step": 33206 }, { "epoch": 0.5880805961030163, "grad_norm": 0.48604676127433777, "learning_rate": 1.1482342726957321e-05, "loss": 0.0572, "step": 33207 }, { "epoch": 0.5880983056400448, "grad_norm": 0.4975810945034027, "learning_rate": 1.1481506360625051e-05, "loss": 0.0703, "step": 33208 }, { "epoch": 0.5881160151770732, "grad_norm": 0.825217068195343, "learning_rate": 1.1480670005868276e-05, "loss": 0.0707, "step": 33209 }, { "epoch": 0.5881337247141016, "grad_norm": 0.9151150584220886, "learning_rate": 1.1479833662689735e-05, "loss": 0.0851, "step": 33210 }, { "epoch": 0.5881514342511301, "grad_norm": 0.8241897821426392, "learning_rate": 1.147899733109218e-05, "loss": 0.089, "step": 33211 }, { "epoch": 0.5881691437881585, "grad_norm": 0.7301275730133057, "learning_rate": 1.1478161011078379e-05, "loss": 0.0767, "step": 33212 }, { "epoch": 0.5881868533251869, "grad_norm": 1.2039083242416382, "learning_rate": 1.1477324702651065e-05, "loss": 0.0778, "step": 33213 }, { "epoch": 0.5882045628622153, "grad_norm": 0.5349552631378174, "learning_rate": 1.1476488405812997e-05, "loss": 0.085, "step": 33214 }, { "epoch": 0.5882222723992439, "grad_norm": 0.6398473978042603, "learning_rate": 1.1475652120566925e-05, "loss": 0.0583, "step": 33215 }, { "epoch": 0.5882399819362722, "grad_norm": 0.3468295931816101, "learning_rate": 1.1474815846915606e-05, "loss": 0.0496, "step": 33216 }, { "epoch": 0.5882576914733006, "grad_norm": 0.5231025815010071, "learning_rate": 1.1473979584861781e-05, "loss": 0.0589, "step": 33217 }, { "epoch": 0.588275401010329, "grad_norm": 0.5088666677474976, "learning_rate": 1.1473143334408206e-05, "loss": 0.0809, "step": 33218 }, { "epoch": 0.5882931105473576, "grad_norm": 0.48136577010154724, "learning_rate": 1.147230709555764e-05, "loss": 0.0508, "step": 33219 }, { "epoch": 0.588310820084386, "grad_norm": 0.23902471363544464, "learning_rate": 1.147147086831282e-05, "loss": 0.0417, "step": 33220 }, { "epoch": 0.5883285296214144, "grad_norm": 0.7289152145385742, "learning_rate": 1.1470634652676504e-05, "loss": 0.0749, "step": 33221 }, { "epoch": 0.5883462391584428, "grad_norm": 0.8936048746109009, "learning_rate": 1.1469798448651444e-05, "loss": 0.0803, "step": 33222 }, { "epoch": 0.5883639486954713, "grad_norm": 0.7152408361434937, "learning_rate": 1.1468962256240395e-05, "loss": 0.0327, "step": 33223 }, { "epoch": 0.5883816582324997, "grad_norm": 0.45210084319114685, "learning_rate": 1.14681260754461e-05, "loss": 0.0664, "step": 33224 }, { "epoch": 0.5883993677695281, "grad_norm": 0.5115478038787842, "learning_rate": 1.1467289906271309e-05, "loss": 0.0365, "step": 33225 }, { "epoch": 0.5884170773065566, "grad_norm": 0.49713626503944397, "learning_rate": 1.1466453748718783e-05, "loss": 0.0566, "step": 33226 }, { "epoch": 0.588434786843585, "grad_norm": 0.7207275032997131, "learning_rate": 1.1465617602791262e-05, "loss": 0.0652, "step": 33227 }, { "epoch": 0.5884524963806134, "grad_norm": 0.5384889841079712, "learning_rate": 1.1464781468491503e-05, "loss": 0.0585, "step": 33228 }, { "epoch": 0.5884702059176418, "grad_norm": 0.6372711658477783, "learning_rate": 1.1463945345822253e-05, "loss": 0.0848, "step": 33229 }, { "epoch": 0.5884879154546703, "grad_norm": 0.501501202583313, "learning_rate": 1.146310923478627e-05, "loss": 0.0624, "step": 33230 }, { "epoch": 0.5885056249916987, "grad_norm": 0.5611156821250916, "learning_rate": 1.1462273135386297e-05, "loss": 0.0609, "step": 33231 }, { "epoch": 0.5885233345287271, "grad_norm": 0.5946807265281677, "learning_rate": 1.1461437047625088e-05, "loss": 0.0388, "step": 33232 }, { "epoch": 0.5885410440657555, "grad_norm": 0.8136221170425415, "learning_rate": 1.1460600971505398e-05, "loss": 0.094, "step": 33233 }, { "epoch": 0.588558753602784, "grad_norm": 0.9166174530982971, "learning_rate": 1.1459764907029964e-05, "loss": 0.115, "step": 33234 }, { "epoch": 0.5885764631398124, "grad_norm": 0.44091618061065674, "learning_rate": 1.1458928854201548e-05, "loss": 0.0435, "step": 33235 }, { "epoch": 0.5885941726768408, "grad_norm": 0.5750811100006104, "learning_rate": 1.14580928130229e-05, "loss": 0.0932, "step": 33236 }, { "epoch": 0.5886118822138692, "grad_norm": 0.8585731387138367, "learning_rate": 1.1457256783496773e-05, "loss": 0.0796, "step": 33237 }, { "epoch": 0.5886295917508977, "grad_norm": 0.5194555521011353, "learning_rate": 1.1456420765625906e-05, "loss": 0.0773, "step": 33238 }, { "epoch": 0.5886473012879261, "grad_norm": 0.8974459171295166, "learning_rate": 1.1455584759413058e-05, "loss": 0.0973, "step": 33239 }, { "epoch": 0.5886650108249545, "grad_norm": 1.2041023969650269, "learning_rate": 1.1454748764860972e-05, "loss": 0.0926, "step": 33240 }, { "epoch": 0.588682720361983, "grad_norm": 0.534271240234375, "learning_rate": 1.1453912781972416e-05, "loss": 0.0525, "step": 33241 }, { "epoch": 0.5887004298990114, "grad_norm": 0.4215059280395508, "learning_rate": 1.145307681075012e-05, "loss": 0.0375, "step": 33242 }, { "epoch": 0.5887181394360398, "grad_norm": 0.7065570950508118, "learning_rate": 1.1452240851196844e-05, "loss": 0.0727, "step": 33243 }, { "epoch": 0.5887358489730682, "grad_norm": 0.5524595379829407, "learning_rate": 1.145140490331534e-05, "loss": 0.0922, "step": 33244 }, { "epoch": 0.5887535585100967, "grad_norm": 0.5229887962341309, "learning_rate": 1.145056896710835e-05, "loss": 0.0579, "step": 33245 }, { "epoch": 0.5887712680471251, "grad_norm": 0.4497517943382263, "learning_rate": 1.1449733042578632e-05, "loss": 0.0586, "step": 33246 }, { "epoch": 0.5887889775841535, "grad_norm": 0.5206732153892517, "learning_rate": 1.144889712972893e-05, "loss": 0.0872, "step": 33247 }, { "epoch": 0.5888066871211819, "grad_norm": 0.9711928367614746, "learning_rate": 1.1448061228562006e-05, "loss": 0.0875, "step": 33248 }, { "epoch": 0.5888243966582104, "grad_norm": 0.5447443127632141, "learning_rate": 1.144722533908059e-05, "loss": 0.0523, "step": 33249 }, { "epoch": 0.5888421061952388, "grad_norm": 0.40453293919563293, "learning_rate": 1.1446389461287447e-05, "loss": 0.064, "step": 33250 }, { "epoch": 0.5888598157322672, "grad_norm": 0.7841260433197021, "learning_rate": 1.144555359518533e-05, "loss": 0.0818, "step": 33251 }, { "epoch": 0.5888775252692956, "grad_norm": 0.4628211557865143, "learning_rate": 1.1444717740776976e-05, "loss": 0.0369, "step": 33252 }, { "epoch": 0.5888952348063241, "grad_norm": 0.5777237415313721, "learning_rate": 1.1443881898065141e-05, "loss": 0.0664, "step": 33253 }, { "epoch": 0.5889129443433525, "grad_norm": 0.7720171213150024, "learning_rate": 1.1443046067052574e-05, "loss": 0.0673, "step": 33254 }, { "epoch": 0.5889306538803809, "grad_norm": 0.7144738435745239, "learning_rate": 1.144221024774203e-05, "loss": 0.0593, "step": 33255 }, { "epoch": 0.5889483634174094, "grad_norm": 0.5010238289833069, "learning_rate": 1.1441374440136252e-05, "loss": 0.0597, "step": 33256 }, { "epoch": 0.5889660729544378, "grad_norm": 0.3583581745624542, "learning_rate": 1.1440538644237988e-05, "loss": 0.0412, "step": 33257 }, { "epoch": 0.5889837824914662, "grad_norm": 0.6285421252250671, "learning_rate": 1.143970286005e-05, "loss": 0.0761, "step": 33258 }, { "epoch": 0.5890014920284946, "grad_norm": 0.8523627519607544, "learning_rate": 1.1438867087575024e-05, "loss": 0.0791, "step": 33259 }, { "epoch": 0.5890192015655231, "grad_norm": 0.5152426958084106, "learning_rate": 1.1438031326815814e-05, "loss": 0.071, "step": 33260 }, { "epoch": 0.5890369111025515, "grad_norm": 0.7299211621284485, "learning_rate": 1.1437195577775123e-05, "loss": 0.1013, "step": 33261 }, { "epoch": 0.5890546206395799, "grad_norm": 0.2654699683189392, "learning_rate": 1.1436359840455703e-05, "loss": 0.0805, "step": 33262 }, { "epoch": 0.5890723301766083, "grad_norm": 0.7445220947265625, "learning_rate": 1.143552411486029e-05, "loss": 0.0699, "step": 33263 }, { "epoch": 0.5890900397136368, "grad_norm": 0.8353272676467896, "learning_rate": 1.1434688400991646e-05, "loss": 0.0933, "step": 33264 }, { "epoch": 0.5891077492506652, "grad_norm": 0.8166837692260742, "learning_rate": 1.143385269885252e-05, "loss": 0.0764, "step": 33265 }, { "epoch": 0.5891254587876936, "grad_norm": 0.7205803394317627, "learning_rate": 1.1433017008445656e-05, "loss": 0.0622, "step": 33266 }, { "epoch": 0.589143168324722, "grad_norm": 0.6435249447822571, "learning_rate": 1.1432181329773801e-05, "loss": 0.0716, "step": 33267 }, { "epoch": 0.5891608778617505, "grad_norm": 0.543738603591919, "learning_rate": 1.1431345662839712e-05, "loss": 0.0851, "step": 33268 }, { "epoch": 0.5891785873987789, "grad_norm": 0.4907585680484772, "learning_rate": 1.1430510007646135e-05, "loss": 0.0619, "step": 33269 }, { "epoch": 0.5891962969358073, "grad_norm": 0.522137463092804, "learning_rate": 1.1429674364195818e-05, "loss": 0.057, "step": 33270 }, { "epoch": 0.5892140064728358, "grad_norm": 0.5304272174835205, "learning_rate": 1.142883873249151e-05, "loss": 0.0425, "step": 33271 }, { "epoch": 0.5892317160098642, "grad_norm": 0.5530399084091187, "learning_rate": 1.1428003112535962e-05, "loss": 0.0653, "step": 33272 }, { "epoch": 0.5892494255468926, "grad_norm": 0.6427862644195557, "learning_rate": 1.1427167504331925e-05, "loss": 0.0658, "step": 33273 }, { "epoch": 0.589267135083921, "grad_norm": 0.6704414486885071, "learning_rate": 1.1426331907882143e-05, "loss": 0.0655, "step": 33274 }, { "epoch": 0.5892848446209495, "grad_norm": 1.0770004987716675, "learning_rate": 1.1425496323189368e-05, "loss": 0.0909, "step": 33275 }, { "epoch": 0.5893025541579779, "grad_norm": 0.6587163805961609, "learning_rate": 1.1424660750256353e-05, "loss": 0.072, "step": 33276 }, { "epoch": 0.5893202636950063, "grad_norm": 0.7438806891441345, "learning_rate": 1.1423825189085839e-05, "loss": 0.07, "step": 33277 }, { "epoch": 0.5893379732320347, "grad_norm": 0.7458595633506775, "learning_rate": 1.1422989639680571e-05, "loss": 0.0706, "step": 33278 }, { "epoch": 0.5893556827690632, "grad_norm": 0.44065654277801514, "learning_rate": 1.1422154102043312e-05, "loss": 0.0786, "step": 33279 }, { "epoch": 0.5893733923060916, "grad_norm": 0.37359797954559326, "learning_rate": 1.142131857617681e-05, "loss": 0.0796, "step": 33280 }, { "epoch": 0.58939110184312, "grad_norm": 0.40766409039497375, "learning_rate": 1.1420483062083802e-05, "loss": 0.0689, "step": 33281 }, { "epoch": 0.5894088113801484, "grad_norm": 0.8795565366744995, "learning_rate": 1.1419647559767043e-05, "loss": 0.064, "step": 33282 }, { "epoch": 0.589426520917177, "grad_norm": 0.34170180559158325, "learning_rate": 1.1418812069229283e-05, "loss": 0.0452, "step": 33283 }, { "epoch": 0.5894442304542054, "grad_norm": 0.7298269271850586, "learning_rate": 1.1417976590473265e-05, "loss": 0.0826, "step": 33284 }, { "epoch": 0.5894619399912338, "grad_norm": 0.7534101009368896, "learning_rate": 1.1417141123501744e-05, "loss": 0.0836, "step": 33285 }, { "epoch": 0.5894796495282623, "grad_norm": 0.6589872241020203, "learning_rate": 1.1416305668317464e-05, "loss": 0.0706, "step": 33286 }, { "epoch": 0.5894973590652907, "grad_norm": 0.4316643476486206, "learning_rate": 1.1415470224923181e-05, "loss": 0.0588, "step": 33287 }, { "epoch": 0.5895150686023191, "grad_norm": 0.45088067650794983, "learning_rate": 1.1414634793321635e-05, "loss": 0.0775, "step": 33288 }, { "epoch": 0.5895327781393475, "grad_norm": 0.7712359428405762, "learning_rate": 1.1413799373515576e-05, "loss": 0.054, "step": 33289 }, { "epoch": 0.589550487676376, "grad_norm": 0.7351956963539124, "learning_rate": 1.1412963965507764e-05, "loss": 0.0939, "step": 33290 }, { "epoch": 0.5895681972134044, "grad_norm": 0.49120596051216125, "learning_rate": 1.1412128569300929e-05, "loss": 0.0536, "step": 33291 }, { "epoch": 0.5895859067504328, "grad_norm": 0.927243709564209, "learning_rate": 1.1411293184897825e-05, "loss": 0.1001, "step": 33292 }, { "epoch": 0.5896036162874612, "grad_norm": 0.47330576181411743, "learning_rate": 1.1410457812301208e-05, "loss": 0.0503, "step": 33293 }, { "epoch": 0.5896213258244897, "grad_norm": 0.27858829498291016, "learning_rate": 1.1409622451513828e-05, "loss": 0.0756, "step": 33294 }, { "epoch": 0.5896390353615181, "grad_norm": 0.6361478567123413, "learning_rate": 1.140878710253842e-05, "loss": 0.0581, "step": 33295 }, { "epoch": 0.5896567448985465, "grad_norm": 0.5643506646156311, "learning_rate": 1.1407951765377739e-05, "loss": 0.0665, "step": 33296 }, { "epoch": 0.5896744544355749, "grad_norm": 0.8017462491989136, "learning_rate": 1.1407116440034536e-05, "loss": 0.0537, "step": 33297 }, { "epoch": 0.5896921639726034, "grad_norm": 0.6943590641021729, "learning_rate": 1.1406281126511554e-05, "loss": 0.0659, "step": 33298 }, { "epoch": 0.5897098735096318, "grad_norm": 0.7850872874259949, "learning_rate": 1.1405445824811545e-05, "loss": 0.0932, "step": 33299 }, { "epoch": 0.5897275830466602, "grad_norm": 0.5655826330184937, "learning_rate": 1.1404610534937252e-05, "loss": 0.0792, "step": 33300 }, { "epoch": 0.5897452925836887, "grad_norm": 0.6907005310058594, "learning_rate": 1.1403775256891433e-05, "loss": 0.1086, "step": 33301 }, { "epoch": 0.5897630021207171, "grad_norm": 0.9457941055297852, "learning_rate": 1.1402939990676827e-05, "loss": 0.0548, "step": 33302 }, { "epoch": 0.5897807116577455, "grad_norm": 0.5868015289306641, "learning_rate": 1.1402104736296183e-05, "loss": 0.0602, "step": 33303 }, { "epoch": 0.5897984211947739, "grad_norm": 0.6808344721794128, "learning_rate": 1.1401269493752251e-05, "loss": 0.0557, "step": 33304 }, { "epoch": 0.5898161307318024, "grad_norm": 0.801739513874054, "learning_rate": 1.1400434263047784e-05, "loss": 0.0581, "step": 33305 }, { "epoch": 0.5898338402688308, "grad_norm": 0.5895329117774963, "learning_rate": 1.1399599044185518e-05, "loss": 0.0491, "step": 33306 }, { "epoch": 0.5898515498058592, "grad_norm": 0.6521120071411133, "learning_rate": 1.1398763837168204e-05, "loss": 0.0617, "step": 33307 }, { "epoch": 0.5898692593428876, "grad_norm": 0.5176416635513306, "learning_rate": 1.1397928641998603e-05, "loss": 0.0346, "step": 33308 }, { "epoch": 0.5898869688799161, "grad_norm": 0.7502716183662415, "learning_rate": 1.1397093458679444e-05, "loss": 0.0816, "step": 33309 }, { "epoch": 0.5899046784169445, "grad_norm": 0.6244628429412842, "learning_rate": 1.1396258287213487e-05, "loss": 0.0652, "step": 33310 }, { "epoch": 0.5899223879539729, "grad_norm": 0.3399040102958679, "learning_rate": 1.1395423127603473e-05, "loss": 0.075, "step": 33311 }, { "epoch": 0.5899400974910013, "grad_norm": 0.2295624315738678, "learning_rate": 1.1394587979852154e-05, "loss": 0.0576, "step": 33312 }, { "epoch": 0.5899578070280298, "grad_norm": 0.450482040643692, "learning_rate": 1.1393752843962275e-05, "loss": 0.055, "step": 33313 }, { "epoch": 0.5899755165650582, "grad_norm": 0.4970043897628784, "learning_rate": 1.1392917719936586e-05, "loss": 0.0902, "step": 33314 }, { "epoch": 0.5899932261020866, "grad_norm": 0.7312549948692322, "learning_rate": 1.1392082607777834e-05, "loss": 0.0504, "step": 33315 }, { "epoch": 0.5900109356391151, "grad_norm": 0.7651529908180237, "learning_rate": 1.139124750748876e-05, "loss": 0.0554, "step": 33316 }, { "epoch": 0.5900286451761435, "grad_norm": 0.6801166534423828, "learning_rate": 1.139041241907212e-05, "loss": 0.0697, "step": 33317 }, { "epoch": 0.5900463547131719, "grad_norm": 0.5677693486213684, "learning_rate": 1.1389577342530656e-05, "loss": 0.0856, "step": 33318 }, { "epoch": 0.5900640642502003, "grad_norm": 0.595867931842804, "learning_rate": 1.1388742277867125e-05, "loss": 0.0542, "step": 33319 }, { "epoch": 0.5900817737872288, "grad_norm": 0.8350939154624939, "learning_rate": 1.138790722508426e-05, "loss": 0.0486, "step": 33320 }, { "epoch": 0.5900994833242572, "grad_norm": 0.7652482390403748, "learning_rate": 1.1387072184184812e-05, "loss": 0.0694, "step": 33321 }, { "epoch": 0.5901171928612856, "grad_norm": 0.6638721823692322, "learning_rate": 1.1386237155171539e-05, "loss": 0.0628, "step": 33322 }, { "epoch": 0.590134902398314, "grad_norm": 0.7680962681770325, "learning_rate": 1.1385402138047175e-05, "loss": 0.0657, "step": 33323 }, { "epoch": 0.5901526119353425, "grad_norm": 0.5091008543968201, "learning_rate": 1.1384567132814472e-05, "loss": 0.0503, "step": 33324 }, { "epoch": 0.5901703214723709, "grad_norm": 0.7119921445846558, "learning_rate": 1.1383732139476179e-05, "loss": 0.0751, "step": 33325 }, { "epoch": 0.5901880310093993, "grad_norm": 0.5608500838279724, "learning_rate": 1.1382897158035044e-05, "loss": 0.051, "step": 33326 }, { "epoch": 0.5902057405464277, "grad_norm": 0.49269312620162964, "learning_rate": 1.1382062188493808e-05, "loss": 0.0488, "step": 33327 }, { "epoch": 0.5902234500834562, "grad_norm": 0.5058133006095886, "learning_rate": 1.138122723085522e-05, "loss": 0.064, "step": 33328 }, { "epoch": 0.5902411596204846, "grad_norm": 0.49681299924850464, "learning_rate": 1.1380392285122036e-05, "loss": 0.0729, "step": 33329 }, { "epoch": 0.590258869157513, "grad_norm": 0.45797431468963623, "learning_rate": 1.1379557351296987e-05, "loss": 0.0321, "step": 33330 }, { "epoch": 0.5902765786945415, "grad_norm": 0.8952460885047913, "learning_rate": 1.1378722429382833e-05, "loss": 0.0845, "step": 33331 }, { "epoch": 0.5902942882315699, "grad_norm": 0.3616041839122772, "learning_rate": 1.1377887519382312e-05, "loss": 0.0572, "step": 33332 }, { "epoch": 0.5903119977685983, "grad_norm": 0.7206996083259583, "learning_rate": 1.1377052621298183e-05, "loss": 0.0469, "step": 33333 }, { "epoch": 0.5903297073056267, "grad_norm": 0.5479804277420044, "learning_rate": 1.1376217735133176e-05, "loss": 0.0879, "step": 33334 }, { "epoch": 0.5903474168426552, "grad_norm": 0.886750340461731, "learning_rate": 1.1375382860890047e-05, "loss": 0.0752, "step": 33335 }, { "epoch": 0.5903651263796836, "grad_norm": 0.5681664347648621, "learning_rate": 1.1374547998571546e-05, "loss": 0.0713, "step": 33336 }, { "epoch": 0.590382835916712, "grad_norm": 0.6552172899246216, "learning_rate": 1.1373713148180411e-05, "loss": 0.0657, "step": 33337 }, { "epoch": 0.5904005454537404, "grad_norm": 0.6358725428581238, "learning_rate": 1.1372878309719395e-05, "loss": 0.0628, "step": 33338 }, { "epoch": 0.5904182549907689, "grad_norm": 0.31828755140304565, "learning_rate": 1.1372043483191239e-05, "loss": 0.0577, "step": 33339 }, { "epoch": 0.5904359645277973, "grad_norm": 0.7594600915908813, "learning_rate": 1.1371208668598698e-05, "loss": 0.1055, "step": 33340 }, { "epoch": 0.5904536740648257, "grad_norm": 0.6490128636360168, "learning_rate": 1.1370373865944511e-05, "loss": 0.0606, "step": 33341 }, { "epoch": 0.5904713836018541, "grad_norm": 0.4560230076313019, "learning_rate": 1.1369539075231427e-05, "loss": 0.0655, "step": 33342 }, { "epoch": 0.5904890931388826, "grad_norm": 0.7650365233421326, "learning_rate": 1.1368704296462192e-05, "loss": 0.074, "step": 33343 }, { "epoch": 0.590506802675911, "grad_norm": 0.5198875069618225, "learning_rate": 1.1367869529639555e-05, "loss": 0.1011, "step": 33344 }, { "epoch": 0.5905245122129394, "grad_norm": 0.5281273126602173, "learning_rate": 1.1367034774766257e-05, "loss": 0.0601, "step": 33345 }, { "epoch": 0.590542221749968, "grad_norm": 1.0599162578582764, "learning_rate": 1.1366200031845047e-05, "loss": 0.1062, "step": 33346 }, { "epoch": 0.5905599312869964, "grad_norm": 0.6546053886413574, "learning_rate": 1.1365365300878677e-05, "loss": 0.0765, "step": 33347 }, { "epoch": 0.5905776408240248, "grad_norm": 0.5312196612358093, "learning_rate": 1.1364530581869884e-05, "loss": 0.0537, "step": 33348 }, { "epoch": 0.5905953503610532, "grad_norm": 0.6840822100639343, "learning_rate": 1.1363695874821415e-05, "loss": 0.0349, "step": 33349 }, { "epoch": 0.5906130598980817, "grad_norm": 0.6142904162406921, "learning_rate": 1.1362861179736017e-05, "loss": 0.0478, "step": 33350 }, { "epoch": 0.5906307694351101, "grad_norm": 0.5126665830612183, "learning_rate": 1.1362026496616445e-05, "loss": 0.0903, "step": 33351 }, { "epoch": 0.5906484789721385, "grad_norm": 0.522059440612793, "learning_rate": 1.1361191825465434e-05, "loss": 0.0806, "step": 33352 }, { "epoch": 0.5906661885091669, "grad_norm": 0.9801706671714783, "learning_rate": 1.1360357166285733e-05, "loss": 0.0659, "step": 33353 }, { "epoch": 0.5906838980461954, "grad_norm": 0.38430896401405334, "learning_rate": 1.1359522519080092e-05, "loss": 0.0459, "step": 33354 }, { "epoch": 0.5907016075832238, "grad_norm": 0.7277606129646301, "learning_rate": 1.1358687883851252e-05, "loss": 0.0772, "step": 33355 }, { "epoch": 0.5907193171202522, "grad_norm": 0.21004745364189148, "learning_rate": 1.135785326060196e-05, "loss": 0.0478, "step": 33356 }, { "epoch": 0.5907370266572806, "grad_norm": 0.7009501457214355, "learning_rate": 1.135701864933496e-05, "loss": 0.0594, "step": 33357 }, { "epoch": 0.5907547361943091, "grad_norm": 0.6343933343887329, "learning_rate": 1.1356184050053006e-05, "loss": 0.0867, "step": 33358 }, { "epoch": 0.5907724457313375, "grad_norm": 0.595614492893219, "learning_rate": 1.1355349462758833e-05, "loss": 0.0838, "step": 33359 }, { "epoch": 0.5907901552683659, "grad_norm": 0.6907851696014404, "learning_rate": 1.1354514887455192e-05, "loss": 0.0741, "step": 33360 }, { "epoch": 0.5908078648053944, "grad_norm": 0.6272417902946472, "learning_rate": 1.1353680324144835e-05, "loss": 0.0585, "step": 33361 }, { "epoch": 0.5908255743424228, "grad_norm": 0.8465738296508789, "learning_rate": 1.1352845772830495e-05, "loss": 0.0784, "step": 33362 }, { "epoch": 0.5908432838794512, "grad_norm": 0.9322509169578552, "learning_rate": 1.1352011233514923e-05, "loss": 0.0914, "step": 33363 }, { "epoch": 0.5908609934164796, "grad_norm": 0.6868753433227539, "learning_rate": 1.1351176706200864e-05, "loss": 0.0594, "step": 33364 }, { "epoch": 0.5908787029535081, "grad_norm": 0.3289474844932556, "learning_rate": 1.135034219089107e-05, "loss": 0.0587, "step": 33365 }, { "epoch": 0.5908964124905365, "grad_norm": 0.48820728063583374, "learning_rate": 1.1349507687588276e-05, "loss": 0.0479, "step": 33366 }, { "epoch": 0.5909141220275649, "grad_norm": 0.6354829668998718, "learning_rate": 1.1348673196295235e-05, "loss": 0.0483, "step": 33367 }, { "epoch": 0.5909318315645933, "grad_norm": 0.5334741473197937, "learning_rate": 1.1347838717014692e-05, "loss": 0.0647, "step": 33368 }, { "epoch": 0.5909495411016218, "grad_norm": 0.7809809446334839, "learning_rate": 1.1347004249749385e-05, "loss": 0.0767, "step": 33369 }, { "epoch": 0.5909672506386502, "grad_norm": 0.4629334807395935, "learning_rate": 1.1346169794502065e-05, "loss": 0.081, "step": 33370 }, { "epoch": 0.5909849601756786, "grad_norm": 0.39075767993927, "learning_rate": 1.1345335351275478e-05, "loss": 0.0853, "step": 33371 }, { "epoch": 0.591002669712707, "grad_norm": 0.4860834777355194, "learning_rate": 1.1344500920072374e-05, "loss": 0.0683, "step": 33372 }, { "epoch": 0.5910203792497355, "grad_norm": 0.8412585258483887, "learning_rate": 1.1343666500895484e-05, "loss": 0.106, "step": 33373 }, { "epoch": 0.5910380887867639, "grad_norm": 0.4868108332157135, "learning_rate": 1.1342832093747564e-05, "loss": 0.0874, "step": 33374 }, { "epoch": 0.5910557983237923, "grad_norm": 0.6725490093231201, "learning_rate": 1.1341997698631355e-05, "loss": 0.0756, "step": 33375 }, { "epoch": 0.5910735078608208, "grad_norm": 0.5705105066299438, "learning_rate": 1.1341163315549612e-05, "loss": 0.0461, "step": 33376 }, { "epoch": 0.5910912173978492, "grad_norm": 0.7725585103034973, "learning_rate": 1.1340328944505062e-05, "loss": 0.0491, "step": 33377 }, { "epoch": 0.5911089269348776, "grad_norm": 0.37355563044548035, "learning_rate": 1.1339494585500462e-05, "loss": 0.0825, "step": 33378 }, { "epoch": 0.591126636471906, "grad_norm": 0.5750938653945923, "learning_rate": 1.1338660238538558e-05, "loss": 0.0601, "step": 33379 }, { "epoch": 0.5911443460089345, "grad_norm": 0.903886079788208, "learning_rate": 1.1337825903622089e-05, "loss": 0.1123, "step": 33380 }, { "epoch": 0.5911620555459629, "grad_norm": 0.7087603807449341, "learning_rate": 1.13369915807538e-05, "loss": 0.0635, "step": 33381 }, { "epoch": 0.5911797650829913, "grad_norm": 0.5069507360458374, "learning_rate": 1.133615726993644e-05, "loss": 0.0631, "step": 33382 }, { "epoch": 0.5911974746200197, "grad_norm": 0.9626319408416748, "learning_rate": 1.1335322971172754e-05, "loss": 0.0544, "step": 33383 }, { "epoch": 0.5912151841570482, "grad_norm": 0.801931619644165, "learning_rate": 1.1334488684465482e-05, "loss": 0.0596, "step": 33384 }, { "epoch": 0.5912328936940766, "grad_norm": 0.8006947040557861, "learning_rate": 1.1333654409817371e-05, "loss": 0.0589, "step": 33385 }, { "epoch": 0.591250603231105, "grad_norm": 0.5342381596565247, "learning_rate": 1.1332820147231173e-05, "loss": 0.0722, "step": 33386 }, { "epoch": 0.5912683127681334, "grad_norm": 0.6006558537483215, "learning_rate": 1.1331985896709617e-05, "loss": 0.0732, "step": 33387 }, { "epoch": 0.5912860223051619, "grad_norm": 0.48075440526008606, "learning_rate": 1.1331151658255458e-05, "loss": 0.0475, "step": 33388 }, { "epoch": 0.5913037318421903, "grad_norm": 0.5582538843154907, "learning_rate": 1.1330317431871441e-05, "loss": 0.0823, "step": 33389 }, { "epoch": 0.5913214413792187, "grad_norm": 0.7626376152038574, "learning_rate": 1.1329483217560313e-05, "loss": 0.0663, "step": 33390 }, { "epoch": 0.5913391509162472, "grad_norm": 0.4404352903366089, "learning_rate": 1.1328649015324809e-05, "loss": 0.0388, "step": 33391 }, { "epoch": 0.5913568604532756, "grad_norm": 1.084451675415039, "learning_rate": 1.132781482516768e-05, "loss": 0.049, "step": 33392 }, { "epoch": 0.591374569990304, "grad_norm": 0.6275284290313721, "learning_rate": 1.132698064709167e-05, "loss": 0.0935, "step": 33393 }, { "epoch": 0.5913922795273324, "grad_norm": 0.21410846710205078, "learning_rate": 1.1326146481099517e-05, "loss": 0.0629, "step": 33394 }, { "epoch": 0.5914099890643609, "grad_norm": 0.7381879687309265, "learning_rate": 1.1325312327193973e-05, "loss": 0.0973, "step": 33395 }, { "epoch": 0.5914276986013893, "grad_norm": 0.6963194608688354, "learning_rate": 1.1324478185377778e-05, "loss": 0.071, "step": 33396 }, { "epoch": 0.5914454081384177, "grad_norm": 0.3579229414463043, "learning_rate": 1.1323644055653683e-05, "loss": 0.0791, "step": 33397 }, { "epoch": 0.5914631176754461, "grad_norm": 0.6264682412147522, "learning_rate": 1.1322809938024422e-05, "loss": 0.0594, "step": 33398 }, { "epoch": 0.5914808272124746, "grad_norm": 0.3925357460975647, "learning_rate": 1.1321975832492747e-05, "loss": 0.028, "step": 33399 }, { "epoch": 0.591498536749503, "grad_norm": 0.6511775255203247, "learning_rate": 1.1321141739061403e-05, "loss": 0.0711, "step": 33400 }, { "epoch": 0.5915162462865314, "grad_norm": 0.46138796210289, "learning_rate": 1.1320307657733126e-05, "loss": 0.056, "step": 33401 }, { "epoch": 0.5915339558235598, "grad_norm": 0.6220294237136841, "learning_rate": 1.1319473588510662e-05, "loss": 0.0533, "step": 33402 }, { "epoch": 0.5915516653605883, "grad_norm": 0.40100401639938354, "learning_rate": 1.131863953139676e-05, "loss": 0.0553, "step": 33403 }, { "epoch": 0.5915693748976167, "grad_norm": 0.675309419631958, "learning_rate": 1.131780548639417e-05, "loss": 0.0772, "step": 33404 }, { "epoch": 0.5915870844346451, "grad_norm": 0.846369743347168, "learning_rate": 1.1316971453505618e-05, "loss": 0.0819, "step": 33405 }, { "epoch": 0.5916047939716736, "grad_norm": 0.5748625993728638, "learning_rate": 1.1316137432733859e-05, "loss": 0.0559, "step": 33406 }, { "epoch": 0.591622503508702, "grad_norm": 0.6137794852256775, "learning_rate": 1.1315303424081634e-05, "loss": 0.0639, "step": 33407 }, { "epoch": 0.5916402130457304, "grad_norm": 0.5645470023155212, "learning_rate": 1.1314469427551691e-05, "loss": 0.0603, "step": 33408 }, { "epoch": 0.5916579225827588, "grad_norm": 0.5149998664855957, "learning_rate": 1.1313635443146769e-05, "loss": 0.0563, "step": 33409 }, { "epoch": 0.5916756321197874, "grad_norm": 0.46680131554603577, "learning_rate": 1.1312801470869614e-05, "loss": 0.075, "step": 33410 }, { "epoch": 0.5916933416568158, "grad_norm": 0.6943572759628296, "learning_rate": 1.1311967510722972e-05, "loss": 0.0575, "step": 33411 }, { "epoch": 0.5917110511938442, "grad_norm": 0.5473463535308838, "learning_rate": 1.1311133562709579e-05, "loss": 0.0441, "step": 33412 }, { "epoch": 0.5917287607308725, "grad_norm": 1.0593805313110352, "learning_rate": 1.1310299626832185e-05, "loss": 0.0803, "step": 33413 }, { "epoch": 0.5917464702679011, "grad_norm": 0.7259411215782166, "learning_rate": 1.1309465703093532e-05, "loss": 0.0825, "step": 33414 }, { "epoch": 0.5917641798049295, "grad_norm": 0.6219222545623779, "learning_rate": 1.130863179149637e-05, "loss": 0.0776, "step": 33415 }, { "epoch": 0.5917818893419579, "grad_norm": 0.48772844672203064, "learning_rate": 1.1307797892043426e-05, "loss": 0.0484, "step": 33416 }, { "epoch": 0.5917995988789863, "grad_norm": 0.47819554805755615, "learning_rate": 1.1306964004737457e-05, "loss": 0.0587, "step": 33417 }, { "epoch": 0.5918173084160148, "grad_norm": 0.3769620358943939, "learning_rate": 1.130613012958121e-05, "loss": 0.0442, "step": 33418 }, { "epoch": 0.5918350179530432, "grad_norm": 0.692258358001709, "learning_rate": 1.1305296266577414e-05, "loss": 0.0875, "step": 33419 }, { "epoch": 0.5918527274900716, "grad_norm": 0.5525737404823303, "learning_rate": 1.130446241572882e-05, "loss": 0.0416, "step": 33420 }, { "epoch": 0.5918704370271001, "grad_norm": 0.38741767406463623, "learning_rate": 1.130362857703817e-05, "loss": 0.041, "step": 33421 }, { "epoch": 0.5918881465641285, "grad_norm": 0.6340005993843079, "learning_rate": 1.1302794750508212e-05, "loss": 0.0782, "step": 33422 }, { "epoch": 0.5919058561011569, "grad_norm": 0.39434581995010376, "learning_rate": 1.130196093614168e-05, "loss": 0.0443, "step": 33423 }, { "epoch": 0.5919235656381853, "grad_norm": 0.7361103296279907, "learning_rate": 1.1301127133941327e-05, "loss": 0.0957, "step": 33424 }, { "epoch": 0.5919412751752138, "grad_norm": 0.8174949288368225, "learning_rate": 1.1300293343909891e-05, "loss": 0.0548, "step": 33425 }, { "epoch": 0.5919589847122422, "grad_norm": 0.4492219090461731, "learning_rate": 1.1299459566050113e-05, "loss": 0.0584, "step": 33426 }, { "epoch": 0.5919766942492706, "grad_norm": 0.5118299126625061, "learning_rate": 1.1298625800364741e-05, "loss": 0.0662, "step": 33427 }, { "epoch": 0.591994403786299, "grad_norm": 0.741186797618866, "learning_rate": 1.1297792046856513e-05, "loss": 0.062, "step": 33428 }, { "epoch": 0.5920121133233275, "grad_norm": 0.5313746333122253, "learning_rate": 1.1296958305528183e-05, "loss": 0.0603, "step": 33429 }, { "epoch": 0.5920298228603559, "grad_norm": 0.5458077788352966, "learning_rate": 1.1296124576382479e-05, "loss": 0.052, "step": 33430 }, { "epoch": 0.5920475323973843, "grad_norm": 0.5391892194747925, "learning_rate": 1.1295290859422145e-05, "loss": 0.0502, "step": 33431 }, { "epoch": 0.5920652419344127, "grad_norm": 0.7392486333847046, "learning_rate": 1.1294457154649941e-05, "loss": 0.0901, "step": 33432 }, { "epoch": 0.5920829514714412, "grad_norm": 0.6495391726493835, "learning_rate": 1.1293623462068593e-05, "loss": 0.0684, "step": 33433 }, { "epoch": 0.5921006610084696, "grad_norm": 0.9033793807029724, "learning_rate": 1.1292789781680847e-05, "loss": 0.0867, "step": 33434 }, { "epoch": 0.592118370545498, "grad_norm": 0.7129606604576111, "learning_rate": 1.1291956113489448e-05, "loss": 0.0648, "step": 33435 }, { "epoch": 0.5921360800825265, "grad_norm": 0.7884832620620728, "learning_rate": 1.1291122457497141e-05, "loss": 0.0842, "step": 33436 }, { "epoch": 0.5921537896195549, "grad_norm": 0.9068285822868347, "learning_rate": 1.1290288813706664e-05, "loss": 0.0795, "step": 33437 }, { "epoch": 0.5921714991565833, "grad_norm": 0.5304456353187561, "learning_rate": 1.1289455182120762e-05, "loss": 0.0703, "step": 33438 }, { "epoch": 0.5921892086936117, "grad_norm": 0.46940359473228455, "learning_rate": 1.1288621562742178e-05, "loss": 0.066, "step": 33439 }, { "epoch": 0.5922069182306402, "grad_norm": 0.22928614914417267, "learning_rate": 1.1287787955573652e-05, "loss": 0.0648, "step": 33440 }, { "epoch": 0.5922246277676686, "grad_norm": 0.651240348815918, "learning_rate": 1.128695436061793e-05, "loss": 0.0524, "step": 33441 }, { "epoch": 0.592242337304697, "grad_norm": 0.8406608700752258, "learning_rate": 1.1286120777877749e-05, "loss": 0.0657, "step": 33442 }, { "epoch": 0.5922600468417254, "grad_norm": 0.820895791053772, "learning_rate": 1.1285287207355864e-05, "loss": 0.08, "step": 33443 }, { "epoch": 0.5922777563787539, "grad_norm": 0.40572574734687805, "learning_rate": 1.1284453649055001e-05, "loss": 0.0762, "step": 33444 }, { "epoch": 0.5922954659157823, "grad_norm": 1.0568959712982178, "learning_rate": 1.1283620102977906e-05, "loss": 0.0602, "step": 33445 }, { "epoch": 0.5923131754528107, "grad_norm": 0.7712852358818054, "learning_rate": 1.1282786569127327e-05, "loss": 0.0526, "step": 33446 }, { "epoch": 0.5923308849898391, "grad_norm": 0.5769118070602417, "learning_rate": 1.1281953047506013e-05, "loss": 0.0748, "step": 33447 }, { "epoch": 0.5923485945268676, "grad_norm": 0.8404815793037415, "learning_rate": 1.1281119538116692e-05, "loss": 0.0543, "step": 33448 }, { "epoch": 0.592366304063896, "grad_norm": 0.4918634295463562, "learning_rate": 1.128028604096211e-05, "loss": 0.0281, "step": 33449 }, { "epoch": 0.5923840136009244, "grad_norm": 0.6128510236740112, "learning_rate": 1.1279452556045013e-05, "loss": 0.0629, "step": 33450 }, { "epoch": 0.5924017231379529, "grad_norm": 0.4674946963787079, "learning_rate": 1.1278619083368138e-05, "loss": 0.0544, "step": 33451 }, { "epoch": 0.5924194326749813, "grad_norm": 0.6520959734916687, "learning_rate": 1.1277785622934231e-05, "loss": 0.0547, "step": 33452 }, { "epoch": 0.5924371422120097, "grad_norm": 0.7134629487991333, "learning_rate": 1.1276952174746032e-05, "loss": 0.0778, "step": 33453 }, { "epoch": 0.5924548517490381, "grad_norm": 0.5111483335494995, "learning_rate": 1.1276118738806288e-05, "loss": 0.0628, "step": 33454 }, { "epoch": 0.5924725612860666, "grad_norm": 0.6811432242393494, "learning_rate": 1.1275285315117733e-05, "loss": 0.0824, "step": 33455 }, { "epoch": 0.592490270823095, "grad_norm": 1.0853115320205688, "learning_rate": 1.1274451903683112e-05, "loss": 0.093, "step": 33456 }, { "epoch": 0.5925079803601234, "grad_norm": 0.4879662096500397, "learning_rate": 1.1273618504505174e-05, "loss": 0.0826, "step": 33457 }, { "epoch": 0.5925256898971518, "grad_norm": 0.4746163487434387, "learning_rate": 1.1272785117586646e-05, "loss": 0.0365, "step": 33458 }, { "epoch": 0.5925433994341803, "grad_norm": 0.5413041710853577, "learning_rate": 1.1271951742930281e-05, "loss": 0.0596, "step": 33459 }, { "epoch": 0.5925611089712087, "grad_norm": 0.9749199151992798, "learning_rate": 1.1271118380538813e-05, "loss": 0.0671, "step": 33460 }, { "epoch": 0.5925788185082371, "grad_norm": 0.7612616419792175, "learning_rate": 1.1270285030414998e-05, "loss": 0.0777, "step": 33461 }, { "epoch": 0.5925965280452655, "grad_norm": 0.3838958740234375, "learning_rate": 1.1269451692561562e-05, "loss": 0.0273, "step": 33462 }, { "epoch": 0.592614237582294, "grad_norm": 0.7410739064216614, "learning_rate": 1.1268618366981251e-05, "loss": 0.0621, "step": 33463 }, { "epoch": 0.5926319471193224, "grad_norm": 0.41299888491630554, "learning_rate": 1.1267785053676815e-05, "loss": 0.0521, "step": 33464 }, { "epoch": 0.5926496566563508, "grad_norm": 0.6350436210632324, "learning_rate": 1.1266951752650983e-05, "loss": 0.0585, "step": 33465 }, { "epoch": 0.5926673661933793, "grad_norm": 1.3320960998535156, "learning_rate": 1.12661184639065e-05, "loss": 0.0519, "step": 33466 }, { "epoch": 0.5926850757304077, "grad_norm": 0.6333982944488525, "learning_rate": 1.1265285187446114e-05, "loss": 0.0568, "step": 33467 }, { "epoch": 0.5927027852674361, "grad_norm": 0.493916392326355, "learning_rate": 1.1264451923272563e-05, "loss": 0.0598, "step": 33468 }, { "epoch": 0.5927204948044645, "grad_norm": 0.3711424469947815, "learning_rate": 1.1263618671388585e-05, "loss": 0.059, "step": 33469 }, { "epoch": 0.592738204341493, "grad_norm": 0.22561673820018768, "learning_rate": 1.1262785431796923e-05, "loss": 0.0683, "step": 33470 }, { "epoch": 0.5927559138785214, "grad_norm": 0.4853718876838684, "learning_rate": 1.1261952204500323e-05, "loss": 0.0598, "step": 33471 }, { "epoch": 0.5927736234155498, "grad_norm": 0.5894452929496765, "learning_rate": 1.126111898950152e-05, "loss": 0.0426, "step": 33472 }, { "epoch": 0.5927913329525782, "grad_norm": 0.23426611721515656, "learning_rate": 1.1260285786803255e-05, "loss": 0.0737, "step": 33473 }, { "epoch": 0.5928090424896068, "grad_norm": 0.6304501891136169, "learning_rate": 1.1259452596408268e-05, "loss": 0.0446, "step": 33474 }, { "epoch": 0.5928267520266352, "grad_norm": 0.7944069504737854, "learning_rate": 1.1258619418319316e-05, "loss": 0.0567, "step": 33475 }, { "epoch": 0.5928444615636635, "grad_norm": 0.6721013188362122, "learning_rate": 1.1257786252539118e-05, "loss": 0.0611, "step": 33476 }, { "epoch": 0.592862171100692, "grad_norm": 0.5725852251052856, "learning_rate": 1.1256953099070426e-05, "loss": 0.0599, "step": 33477 }, { "epoch": 0.5928798806377205, "grad_norm": 1.0265207290649414, "learning_rate": 1.125611995791598e-05, "loss": 0.0615, "step": 33478 }, { "epoch": 0.5928975901747489, "grad_norm": 0.9616076946258545, "learning_rate": 1.1255286829078523e-05, "loss": 0.0793, "step": 33479 }, { "epoch": 0.5929152997117773, "grad_norm": 0.48388564586639404, "learning_rate": 1.1254453712560792e-05, "loss": 0.0717, "step": 33480 }, { "epoch": 0.5929330092488058, "grad_norm": 0.7425559163093567, "learning_rate": 1.1253620608365527e-05, "loss": 0.0473, "step": 33481 }, { "epoch": 0.5929507187858342, "grad_norm": 0.7759595513343811, "learning_rate": 1.1252787516495476e-05, "loss": 0.0802, "step": 33482 }, { "epoch": 0.5929684283228626, "grad_norm": 0.6072444915771484, "learning_rate": 1.1251954436953373e-05, "loss": 0.0821, "step": 33483 }, { "epoch": 0.592986137859891, "grad_norm": 0.6378865242004395, "learning_rate": 1.125112136974196e-05, "loss": 0.1027, "step": 33484 }, { "epoch": 0.5930038473969195, "grad_norm": 0.7127814292907715, "learning_rate": 1.1250288314863976e-05, "loss": 0.0721, "step": 33485 }, { "epoch": 0.5930215569339479, "grad_norm": 0.27665311098098755, "learning_rate": 1.1249455272322173e-05, "loss": 0.0472, "step": 33486 }, { "epoch": 0.5930392664709763, "grad_norm": 0.6296545267105103, "learning_rate": 1.1248622242119277e-05, "loss": 0.0565, "step": 33487 }, { "epoch": 0.5930569760080047, "grad_norm": 0.3924471437931061, "learning_rate": 1.1247789224258033e-05, "loss": 0.0555, "step": 33488 }, { "epoch": 0.5930746855450332, "grad_norm": 0.7883937358856201, "learning_rate": 1.1246956218741187e-05, "loss": 0.0929, "step": 33489 }, { "epoch": 0.5930923950820616, "grad_norm": 0.7532311677932739, "learning_rate": 1.1246123225571472e-05, "loss": 0.0847, "step": 33490 }, { "epoch": 0.59311010461909, "grad_norm": 0.5553898215293884, "learning_rate": 1.1245290244751632e-05, "loss": 0.0551, "step": 33491 }, { "epoch": 0.5931278141561184, "grad_norm": 0.7967252731323242, "learning_rate": 1.1244457276284407e-05, "loss": 0.083, "step": 33492 }, { "epoch": 0.5931455236931469, "grad_norm": 0.6341999769210815, "learning_rate": 1.1243624320172541e-05, "loss": 0.0571, "step": 33493 }, { "epoch": 0.5931632332301753, "grad_norm": 0.5435712337493896, "learning_rate": 1.1242791376418767e-05, "loss": 0.0495, "step": 33494 }, { "epoch": 0.5931809427672037, "grad_norm": 0.7098497152328491, "learning_rate": 1.124195844502583e-05, "loss": 0.0601, "step": 33495 }, { "epoch": 0.5931986523042322, "grad_norm": 0.40298160910606384, "learning_rate": 1.124112552599648e-05, "loss": 0.0678, "step": 33496 }, { "epoch": 0.5932163618412606, "grad_norm": 1.2088075876235962, "learning_rate": 1.1240292619333431e-05, "loss": 0.0965, "step": 33497 }, { "epoch": 0.593234071378289, "grad_norm": 0.6701462268829346, "learning_rate": 1.1239459725039445e-05, "loss": 0.0571, "step": 33498 }, { "epoch": 0.5932517809153174, "grad_norm": 0.6842267513275146, "learning_rate": 1.1238626843117255e-05, "loss": 0.0535, "step": 33499 }, { "epoch": 0.5932694904523459, "grad_norm": 0.7410164475440979, "learning_rate": 1.1237793973569609e-05, "loss": 0.059, "step": 33500 }, { "epoch": 0.5932871999893743, "grad_norm": 0.696499764919281, "learning_rate": 1.1236961116399236e-05, "loss": 0.0785, "step": 33501 }, { "epoch": 0.5933049095264027, "grad_norm": 0.4692714810371399, "learning_rate": 1.1236128271608877e-05, "loss": 0.0478, "step": 33502 }, { "epoch": 0.5933226190634311, "grad_norm": 0.567559003829956, "learning_rate": 1.1235295439201281e-05, "loss": 0.0621, "step": 33503 }, { "epoch": 0.5933403286004596, "grad_norm": 1.0083750486373901, "learning_rate": 1.1234462619179179e-05, "loss": 0.095, "step": 33504 }, { "epoch": 0.593358038137488, "grad_norm": 0.5653223991394043, "learning_rate": 1.1233629811545312e-05, "loss": 0.051, "step": 33505 }, { "epoch": 0.5933757476745164, "grad_norm": 0.7223659753799438, "learning_rate": 1.1232797016302425e-05, "loss": 0.0633, "step": 33506 }, { "epoch": 0.5933934572115448, "grad_norm": 0.4913223385810852, "learning_rate": 1.1231964233453254e-05, "loss": 0.0878, "step": 33507 }, { "epoch": 0.5934111667485733, "grad_norm": 0.4587551951408386, "learning_rate": 1.1231131463000541e-05, "loss": 0.0544, "step": 33508 }, { "epoch": 0.5934288762856017, "grad_norm": 0.5954827070236206, "learning_rate": 1.1230298704947022e-05, "loss": 0.0625, "step": 33509 }, { "epoch": 0.5934465858226301, "grad_norm": 0.6355915069580078, "learning_rate": 1.1229465959295444e-05, "loss": 0.0631, "step": 33510 }, { "epoch": 0.5934642953596586, "grad_norm": 0.43313148617744446, "learning_rate": 1.1228633226048535e-05, "loss": 0.0475, "step": 33511 }, { "epoch": 0.593482004896687, "grad_norm": 0.7654635310173035, "learning_rate": 1.1227800505209043e-05, "loss": 0.0544, "step": 33512 }, { "epoch": 0.5934997144337154, "grad_norm": 0.46654078364372253, "learning_rate": 1.1226967796779705e-05, "loss": 0.084, "step": 33513 }, { "epoch": 0.5935174239707438, "grad_norm": 0.6186671853065491, "learning_rate": 1.1226135100763268e-05, "loss": 0.0923, "step": 33514 }, { "epoch": 0.5935351335077723, "grad_norm": 0.5515984892845154, "learning_rate": 1.1225302417162459e-05, "loss": 0.0503, "step": 33515 }, { "epoch": 0.5935528430448007, "grad_norm": 0.4763512909412384, "learning_rate": 1.1224469745980022e-05, "loss": 0.0614, "step": 33516 }, { "epoch": 0.5935705525818291, "grad_norm": 1.0011323690414429, "learning_rate": 1.12236370872187e-05, "loss": 0.0748, "step": 33517 }, { "epoch": 0.5935882621188575, "grad_norm": 0.7532384991645813, "learning_rate": 1.1222804440881231e-05, "loss": 0.0623, "step": 33518 }, { "epoch": 0.593605971655886, "grad_norm": 0.3571627736091614, "learning_rate": 1.1221971806970352e-05, "loss": 0.0739, "step": 33519 }, { "epoch": 0.5936236811929144, "grad_norm": 0.5608706474304199, "learning_rate": 1.1221139185488801e-05, "loss": 0.0562, "step": 33520 }, { "epoch": 0.5936413907299428, "grad_norm": 0.5855646729469299, "learning_rate": 1.1220306576439326e-05, "loss": 0.0598, "step": 33521 }, { "epoch": 0.5936591002669712, "grad_norm": 0.590859591960907, "learning_rate": 1.1219473979824657e-05, "loss": 0.0739, "step": 33522 }, { "epoch": 0.5936768098039997, "grad_norm": 0.9412636756896973, "learning_rate": 1.1218641395647533e-05, "loss": 0.0744, "step": 33523 }, { "epoch": 0.5936945193410281, "grad_norm": 0.7885919809341431, "learning_rate": 1.1217808823910698e-05, "loss": 0.0624, "step": 33524 }, { "epoch": 0.5937122288780565, "grad_norm": 0.9673691391944885, "learning_rate": 1.1216976264616895e-05, "loss": 0.0807, "step": 33525 }, { "epoch": 0.593729938415085, "grad_norm": 0.31388843059539795, "learning_rate": 1.121614371776885e-05, "loss": 0.0556, "step": 33526 }, { "epoch": 0.5937476479521134, "grad_norm": 0.7003235816955566, "learning_rate": 1.1215311183369312e-05, "loss": 0.0614, "step": 33527 }, { "epoch": 0.5937653574891418, "grad_norm": 0.2721520662307739, "learning_rate": 1.1214478661421024e-05, "loss": 0.0501, "step": 33528 }, { "epoch": 0.5937830670261702, "grad_norm": 0.4500487148761749, "learning_rate": 1.121364615192671e-05, "loss": 0.0944, "step": 33529 }, { "epoch": 0.5938007765631987, "grad_norm": 0.7148170471191406, "learning_rate": 1.121281365488912e-05, "loss": 0.1064, "step": 33530 }, { "epoch": 0.5938184861002271, "grad_norm": 1.0539429187774658, "learning_rate": 1.1211981170310987e-05, "loss": 0.0445, "step": 33531 }, { "epoch": 0.5938361956372555, "grad_norm": 0.4732655882835388, "learning_rate": 1.1211148698195059e-05, "loss": 0.0711, "step": 33532 }, { "epoch": 0.5938539051742839, "grad_norm": 0.8081061244010925, "learning_rate": 1.1210316238544064e-05, "loss": 0.0614, "step": 33533 }, { "epoch": 0.5938716147113124, "grad_norm": 0.872368574142456, "learning_rate": 1.1209483791360745e-05, "loss": 0.066, "step": 33534 }, { "epoch": 0.5938893242483408, "grad_norm": 0.8183215260505676, "learning_rate": 1.1208651356647843e-05, "loss": 0.0732, "step": 33535 }, { "epoch": 0.5939070337853692, "grad_norm": 0.6061850190162659, "learning_rate": 1.1207818934408091e-05, "loss": 0.0867, "step": 33536 }, { "epoch": 0.5939247433223976, "grad_norm": 0.6654462218284607, "learning_rate": 1.1206986524644233e-05, "loss": 0.0702, "step": 33537 }, { "epoch": 0.5939424528594262, "grad_norm": 0.7848532199859619, "learning_rate": 1.1206154127359007e-05, "loss": 0.0628, "step": 33538 }, { "epoch": 0.5939601623964545, "grad_norm": 0.6217239499092102, "learning_rate": 1.1205321742555153e-05, "loss": 0.059, "step": 33539 }, { "epoch": 0.593977871933483, "grad_norm": 1.1404749155044556, "learning_rate": 1.12044893702354e-05, "loss": 0.1031, "step": 33540 }, { "epoch": 0.5939955814705115, "grad_norm": 0.6156477928161621, "learning_rate": 1.1203657010402494e-05, "loss": 0.0777, "step": 33541 }, { "epoch": 0.5940132910075399, "grad_norm": 0.8360083103179932, "learning_rate": 1.1202824663059183e-05, "loss": 0.0674, "step": 33542 }, { "epoch": 0.5940310005445683, "grad_norm": 0.5170297026634216, "learning_rate": 1.1201992328208184e-05, "loss": 0.0536, "step": 33543 }, { "epoch": 0.5940487100815967, "grad_norm": 0.7498898506164551, "learning_rate": 1.1201160005852248e-05, "loss": 0.0609, "step": 33544 }, { "epoch": 0.5940664196186252, "grad_norm": 0.8725998401641846, "learning_rate": 1.1200327695994112e-05, "loss": 0.0622, "step": 33545 }, { "epoch": 0.5940841291556536, "grad_norm": 0.7146342396736145, "learning_rate": 1.1199495398636515e-05, "loss": 0.086, "step": 33546 }, { "epoch": 0.594101838692682, "grad_norm": 0.49714231491088867, "learning_rate": 1.1198663113782192e-05, "loss": 0.0699, "step": 33547 }, { "epoch": 0.5941195482297104, "grad_norm": 0.6485574245452881, "learning_rate": 1.1197830841433882e-05, "loss": 0.0628, "step": 33548 }, { "epoch": 0.5941372577667389, "grad_norm": 0.4881914556026459, "learning_rate": 1.1196998581594325e-05, "loss": 0.0406, "step": 33549 }, { "epoch": 0.5941549673037673, "grad_norm": 0.6074205040931702, "learning_rate": 1.119616633426626e-05, "loss": 0.0758, "step": 33550 }, { "epoch": 0.5941726768407957, "grad_norm": 0.7677546739578247, "learning_rate": 1.119533409945242e-05, "loss": 0.1038, "step": 33551 }, { "epoch": 0.5941903863778241, "grad_norm": 0.5047823786735535, "learning_rate": 1.1194501877155547e-05, "loss": 0.0642, "step": 33552 }, { "epoch": 0.5942080959148526, "grad_norm": 0.22584371268749237, "learning_rate": 1.1193669667378383e-05, "loss": 0.0455, "step": 33553 }, { "epoch": 0.594225805451881, "grad_norm": 0.553109347820282, "learning_rate": 1.1192837470123655e-05, "loss": 0.0764, "step": 33554 }, { "epoch": 0.5942435149889094, "grad_norm": 0.7455114126205444, "learning_rate": 1.1192005285394103e-05, "loss": 0.0799, "step": 33555 }, { "epoch": 0.5942612245259379, "grad_norm": 0.9879838824272156, "learning_rate": 1.1191173113192472e-05, "loss": 0.0917, "step": 33556 }, { "epoch": 0.5942789340629663, "grad_norm": 0.492276132106781, "learning_rate": 1.1190340953521502e-05, "loss": 0.0663, "step": 33557 }, { "epoch": 0.5942966435999947, "grad_norm": 0.5593964457511902, "learning_rate": 1.1189508806383921e-05, "loss": 0.0861, "step": 33558 }, { "epoch": 0.5943143531370231, "grad_norm": 0.8015251159667969, "learning_rate": 1.1188676671782469e-05, "loss": 0.0609, "step": 33559 }, { "epoch": 0.5943320626740516, "grad_norm": 0.6630579233169556, "learning_rate": 1.1187844549719889e-05, "loss": 0.0733, "step": 33560 }, { "epoch": 0.59434977221108, "grad_norm": 0.47933563590049744, "learning_rate": 1.118701244019891e-05, "loss": 0.0728, "step": 33561 }, { "epoch": 0.5943674817481084, "grad_norm": 0.6826772093772888, "learning_rate": 1.1186180343222277e-05, "loss": 0.0422, "step": 33562 }, { "epoch": 0.5943851912851368, "grad_norm": 0.447843462228775, "learning_rate": 1.1185348258792722e-05, "loss": 0.0676, "step": 33563 }, { "epoch": 0.5944029008221653, "grad_norm": 0.9456165432929993, "learning_rate": 1.118451618691299e-05, "loss": 0.0518, "step": 33564 }, { "epoch": 0.5944206103591937, "grad_norm": 0.9152765274047852, "learning_rate": 1.1183684127585809e-05, "loss": 0.0772, "step": 33565 }, { "epoch": 0.5944383198962221, "grad_norm": 0.7104371786117554, "learning_rate": 1.1182852080813924e-05, "loss": 0.0492, "step": 33566 }, { "epoch": 0.5944560294332505, "grad_norm": 1.412894606590271, "learning_rate": 1.1182020046600074e-05, "loss": 0.096, "step": 33567 }, { "epoch": 0.594473738970279, "grad_norm": 0.5932635068893433, "learning_rate": 1.1181188024946987e-05, "loss": 0.0637, "step": 33568 }, { "epoch": 0.5944914485073074, "grad_norm": 0.6848117113113403, "learning_rate": 1.1180356015857402e-05, "loss": 0.0783, "step": 33569 }, { "epoch": 0.5945091580443358, "grad_norm": 0.5527969002723694, "learning_rate": 1.1179524019334062e-05, "loss": 0.0528, "step": 33570 }, { "epoch": 0.5945268675813643, "grad_norm": 0.3890452980995178, "learning_rate": 1.1178692035379706e-05, "loss": 0.1016, "step": 33571 }, { "epoch": 0.5945445771183927, "grad_norm": 0.6559169888496399, "learning_rate": 1.1177860063997063e-05, "loss": 0.054, "step": 33572 }, { "epoch": 0.5945622866554211, "grad_norm": 0.8016462326049805, "learning_rate": 1.1177028105188873e-05, "loss": 0.0662, "step": 33573 }, { "epoch": 0.5945799961924495, "grad_norm": 0.9796372056007385, "learning_rate": 1.1176196158957878e-05, "loss": 0.055, "step": 33574 }, { "epoch": 0.594597705729478, "grad_norm": 0.5701266527175903, "learning_rate": 1.1175364225306806e-05, "loss": 0.0599, "step": 33575 }, { "epoch": 0.5946154152665064, "grad_norm": 0.7162317037582397, "learning_rate": 1.1174532304238399e-05, "loss": 0.0802, "step": 33576 }, { "epoch": 0.5946331248035348, "grad_norm": 0.5326839685440063, "learning_rate": 1.1173700395755395e-05, "loss": 0.0842, "step": 33577 }, { "epoch": 0.5946508343405632, "grad_norm": 0.49785155057907104, "learning_rate": 1.117286849986053e-05, "loss": 0.0644, "step": 33578 }, { "epoch": 0.5946685438775917, "grad_norm": 0.6355206966400146, "learning_rate": 1.117203661655654e-05, "loss": 0.0557, "step": 33579 }, { "epoch": 0.5946862534146201, "grad_norm": 0.5514817833900452, "learning_rate": 1.1171204745846163e-05, "loss": 0.0697, "step": 33580 }, { "epoch": 0.5947039629516485, "grad_norm": 0.42483142018318176, "learning_rate": 1.1170372887732134e-05, "loss": 0.0415, "step": 33581 }, { "epoch": 0.5947216724886769, "grad_norm": 0.9158239960670471, "learning_rate": 1.1169541042217197e-05, "loss": 0.1206, "step": 33582 }, { "epoch": 0.5947393820257054, "grad_norm": 0.7965800762176514, "learning_rate": 1.1168709209304077e-05, "loss": 0.0655, "step": 33583 }, { "epoch": 0.5947570915627338, "grad_norm": 0.6225286722183228, "learning_rate": 1.1167877388995511e-05, "loss": 0.0623, "step": 33584 }, { "epoch": 0.5947748010997622, "grad_norm": 0.7531189918518066, "learning_rate": 1.1167045581294253e-05, "loss": 0.0866, "step": 33585 }, { "epoch": 0.5947925106367907, "grad_norm": 0.48508480191230774, "learning_rate": 1.116621378620302e-05, "loss": 0.056, "step": 33586 }, { "epoch": 0.5948102201738191, "grad_norm": 0.7294641733169556, "learning_rate": 1.1165382003724554e-05, "loss": 0.0557, "step": 33587 }, { "epoch": 0.5948279297108475, "grad_norm": 0.6453022956848145, "learning_rate": 1.1164550233861595e-05, "loss": 0.0619, "step": 33588 }, { "epoch": 0.5948456392478759, "grad_norm": 0.8924037218093872, "learning_rate": 1.1163718476616881e-05, "loss": 0.0893, "step": 33589 }, { "epoch": 0.5948633487849044, "grad_norm": 0.5572963356971741, "learning_rate": 1.1162886731993143e-05, "loss": 0.0521, "step": 33590 }, { "epoch": 0.5948810583219328, "grad_norm": 0.4393954277038574, "learning_rate": 1.1162054999993118e-05, "loss": 0.0727, "step": 33591 }, { "epoch": 0.5948987678589612, "grad_norm": 0.5615992546081543, "learning_rate": 1.1161223280619548e-05, "loss": 0.0797, "step": 33592 }, { "epoch": 0.5949164773959896, "grad_norm": 0.5260151624679565, "learning_rate": 1.1160391573875162e-05, "loss": 0.0415, "step": 33593 }, { "epoch": 0.5949341869330181, "grad_norm": 0.6033211350440979, "learning_rate": 1.1159559879762699e-05, "loss": 0.0855, "step": 33594 }, { "epoch": 0.5949518964700465, "grad_norm": 0.6553758382797241, "learning_rate": 1.1158728198284894e-05, "loss": 0.0801, "step": 33595 }, { "epoch": 0.5949696060070749, "grad_norm": 0.6275747418403625, "learning_rate": 1.1157896529444494e-05, "loss": 0.0662, "step": 33596 }, { "epoch": 0.5949873155441033, "grad_norm": 0.5718557238578796, "learning_rate": 1.1157064873244218e-05, "loss": 0.0621, "step": 33597 }, { "epoch": 0.5950050250811318, "grad_norm": 0.49597230553627014, "learning_rate": 1.1156233229686804e-05, "loss": 0.042, "step": 33598 }, { "epoch": 0.5950227346181602, "grad_norm": 0.6951913833618164, "learning_rate": 1.1155401598775008e-05, "loss": 0.0536, "step": 33599 }, { "epoch": 0.5950404441551886, "grad_norm": 0.6291202306747437, "learning_rate": 1.1154569980511543e-05, "loss": 0.0615, "step": 33600 }, { "epoch": 0.5950581536922172, "grad_norm": 0.7168087363243103, "learning_rate": 1.1153738374899154e-05, "loss": 0.0999, "step": 33601 }, { "epoch": 0.5950758632292455, "grad_norm": 0.38537031412124634, "learning_rate": 1.1152906781940577e-05, "loss": 0.0807, "step": 33602 }, { "epoch": 0.595093572766274, "grad_norm": 0.5226237773895264, "learning_rate": 1.115207520163855e-05, "loss": 0.0636, "step": 33603 }, { "epoch": 0.5951112823033023, "grad_norm": 0.4506753385066986, "learning_rate": 1.1151243633995806e-05, "loss": 0.05, "step": 33604 }, { "epoch": 0.5951289918403309, "grad_norm": 0.7246270179748535, "learning_rate": 1.115041207901508e-05, "loss": 0.0908, "step": 33605 }, { "epoch": 0.5951467013773593, "grad_norm": 0.3298993408679962, "learning_rate": 1.1149580536699111e-05, "loss": 0.052, "step": 33606 }, { "epoch": 0.5951644109143877, "grad_norm": 0.7427345514297485, "learning_rate": 1.1148749007050629e-05, "loss": 0.1063, "step": 33607 }, { "epoch": 0.595182120451416, "grad_norm": 0.6890249252319336, "learning_rate": 1.1147917490072374e-05, "loss": 0.0925, "step": 33608 }, { "epoch": 0.5951998299884446, "grad_norm": 0.18030892312526703, "learning_rate": 1.1147085985767082e-05, "loss": 0.0383, "step": 33609 }, { "epoch": 0.595217539525473, "grad_norm": 1.0579497814178467, "learning_rate": 1.1146254494137493e-05, "loss": 0.0721, "step": 33610 }, { "epoch": 0.5952352490625014, "grad_norm": 0.8961895704269409, "learning_rate": 1.1145423015186328e-05, "loss": 0.066, "step": 33611 }, { "epoch": 0.5952529585995298, "grad_norm": 0.8640614151954651, "learning_rate": 1.1144591548916332e-05, "loss": 0.0847, "step": 33612 }, { "epoch": 0.5952706681365583, "grad_norm": 0.5251383185386658, "learning_rate": 1.1143760095330247e-05, "loss": 0.0654, "step": 33613 }, { "epoch": 0.5952883776735867, "grad_norm": 0.7936388850212097, "learning_rate": 1.1142928654430795e-05, "loss": 0.064, "step": 33614 }, { "epoch": 0.5953060872106151, "grad_norm": 0.6602411866188049, "learning_rate": 1.114209722622072e-05, "loss": 0.0629, "step": 33615 }, { "epoch": 0.5953237967476436, "grad_norm": 0.5274016857147217, "learning_rate": 1.1141265810702751e-05, "loss": 0.0601, "step": 33616 }, { "epoch": 0.595341506284672, "grad_norm": 0.6091123819351196, "learning_rate": 1.1140434407879634e-05, "loss": 0.0712, "step": 33617 }, { "epoch": 0.5953592158217004, "grad_norm": 0.4916018843650818, "learning_rate": 1.1139603017754093e-05, "loss": 0.0629, "step": 33618 }, { "epoch": 0.5953769253587288, "grad_norm": 0.6237441897392273, "learning_rate": 1.1138771640328866e-05, "loss": 0.0509, "step": 33619 }, { "epoch": 0.5953946348957573, "grad_norm": 0.8068086504936218, "learning_rate": 1.1137940275606691e-05, "loss": 0.0738, "step": 33620 }, { "epoch": 0.5954123444327857, "grad_norm": 0.6440829634666443, "learning_rate": 1.1137108923590307e-05, "loss": 0.1074, "step": 33621 }, { "epoch": 0.5954300539698141, "grad_norm": 0.5157739520072937, "learning_rate": 1.113627758428244e-05, "loss": 0.0654, "step": 33622 }, { "epoch": 0.5954477635068425, "grad_norm": 0.582642674446106, "learning_rate": 1.1135446257685828e-05, "loss": 0.0748, "step": 33623 }, { "epoch": 0.595465473043871, "grad_norm": 0.46330007910728455, "learning_rate": 1.1134614943803212e-05, "loss": 0.0672, "step": 33624 }, { "epoch": 0.5954831825808994, "grad_norm": 0.6871828436851501, "learning_rate": 1.1133783642637318e-05, "loss": 0.0526, "step": 33625 }, { "epoch": 0.5955008921179278, "grad_norm": 0.6553740501403809, "learning_rate": 1.1132952354190881e-05, "loss": 0.0437, "step": 33626 }, { "epoch": 0.5955186016549562, "grad_norm": 0.4305940866470337, "learning_rate": 1.1132121078466639e-05, "loss": 0.0654, "step": 33627 }, { "epoch": 0.5955363111919847, "grad_norm": 0.5931012630462646, "learning_rate": 1.1131289815467337e-05, "loss": 0.0494, "step": 33628 }, { "epoch": 0.5955540207290131, "grad_norm": 0.4179224967956543, "learning_rate": 1.1130458565195695e-05, "loss": 0.0519, "step": 33629 }, { "epoch": 0.5955717302660415, "grad_norm": 0.3444674015045166, "learning_rate": 1.1129627327654451e-05, "loss": 0.035, "step": 33630 }, { "epoch": 0.59558943980307, "grad_norm": 0.8481588959693909, "learning_rate": 1.1128796102846346e-05, "loss": 0.0932, "step": 33631 }, { "epoch": 0.5956071493400984, "grad_norm": 0.2786201238632202, "learning_rate": 1.1127964890774107e-05, "loss": 0.081, "step": 33632 }, { "epoch": 0.5956248588771268, "grad_norm": 0.5298492312431335, "learning_rate": 1.1127133691440472e-05, "loss": 0.0508, "step": 33633 }, { "epoch": 0.5956425684141552, "grad_norm": 0.7098419070243835, "learning_rate": 1.1126302504848175e-05, "loss": 0.0622, "step": 33634 }, { "epoch": 0.5956602779511837, "grad_norm": 0.6439082026481628, "learning_rate": 1.1125471330999953e-05, "loss": 0.0646, "step": 33635 }, { "epoch": 0.5956779874882121, "grad_norm": 0.5866620540618896, "learning_rate": 1.1124640169898537e-05, "loss": 0.099, "step": 33636 }, { "epoch": 0.5956956970252405, "grad_norm": 0.672418475151062, "learning_rate": 1.1123809021546663e-05, "loss": 0.078, "step": 33637 }, { "epoch": 0.5957134065622689, "grad_norm": 0.8432122468948364, "learning_rate": 1.1122977885947071e-05, "loss": 0.0603, "step": 33638 }, { "epoch": 0.5957311160992974, "grad_norm": 0.6638570427894592, "learning_rate": 1.1122146763102483e-05, "loss": 0.0537, "step": 33639 }, { "epoch": 0.5957488256363258, "grad_norm": 0.8939924836158752, "learning_rate": 1.112131565301564e-05, "loss": 0.0734, "step": 33640 }, { "epoch": 0.5957665351733542, "grad_norm": 0.4828951060771942, "learning_rate": 1.1120484555689276e-05, "loss": 0.0646, "step": 33641 }, { "epoch": 0.5957842447103826, "grad_norm": 0.8305283188819885, "learning_rate": 1.1119653471126128e-05, "loss": 0.0564, "step": 33642 }, { "epoch": 0.5958019542474111, "grad_norm": 0.6243571043014526, "learning_rate": 1.1118822399328926e-05, "loss": 0.0587, "step": 33643 }, { "epoch": 0.5958196637844395, "grad_norm": 0.404938280582428, "learning_rate": 1.1117991340300404e-05, "loss": 0.0521, "step": 33644 }, { "epoch": 0.5958373733214679, "grad_norm": 0.7922526001930237, "learning_rate": 1.1117160294043303e-05, "loss": 0.0559, "step": 33645 }, { "epoch": 0.5958550828584964, "grad_norm": 0.3996886610984802, "learning_rate": 1.1116329260560348e-05, "loss": 0.0473, "step": 33646 }, { "epoch": 0.5958727923955248, "grad_norm": 0.6288394927978516, "learning_rate": 1.1115498239854276e-05, "loss": 0.0945, "step": 33647 }, { "epoch": 0.5958905019325532, "grad_norm": 0.7362467050552368, "learning_rate": 1.1114667231927825e-05, "loss": 0.0339, "step": 33648 }, { "epoch": 0.5959082114695816, "grad_norm": 0.7440853118896484, "learning_rate": 1.1113836236783727e-05, "loss": 0.073, "step": 33649 }, { "epoch": 0.5959259210066101, "grad_norm": 0.894550621509552, "learning_rate": 1.111300525442471e-05, "loss": 0.052, "step": 33650 }, { "epoch": 0.5959436305436385, "grad_norm": 0.743460476398468, "learning_rate": 1.1112174284853513e-05, "loss": 0.0697, "step": 33651 }, { "epoch": 0.5959613400806669, "grad_norm": 0.48326587677001953, "learning_rate": 1.111134332807287e-05, "loss": 0.0497, "step": 33652 }, { "epoch": 0.5959790496176953, "grad_norm": 0.2755998969078064, "learning_rate": 1.111051238408552e-05, "loss": 0.0556, "step": 33653 }, { "epoch": 0.5959967591547238, "grad_norm": 0.7191884517669678, "learning_rate": 1.1109681452894186e-05, "loss": 0.0781, "step": 33654 }, { "epoch": 0.5960144686917522, "grad_norm": 0.4337444007396698, "learning_rate": 1.1108850534501604e-05, "loss": 0.0532, "step": 33655 }, { "epoch": 0.5960321782287806, "grad_norm": 0.868365466594696, "learning_rate": 1.1108019628910516e-05, "loss": 0.0581, "step": 33656 }, { "epoch": 0.596049887765809, "grad_norm": 0.5262768268585205, "learning_rate": 1.1107188736123646e-05, "loss": 0.0642, "step": 33657 }, { "epoch": 0.5960675973028375, "grad_norm": 0.5234658122062683, "learning_rate": 1.110635785614373e-05, "loss": 0.0544, "step": 33658 }, { "epoch": 0.5960853068398659, "grad_norm": 0.18707798421382904, "learning_rate": 1.1105526988973506e-05, "loss": 0.0373, "step": 33659 }, { "epoch": 0.5961030163768943, "grad_norm": 0.6090641617774963, "learning_rate": 1.1104696134615705e-05, "loss": 0.0748, "step": 33660 }, { "epoch": 0.5961207259139228, "grad_norm": 0.6022455096244812, "learning_rate": 1.1103865293073057e-05, "loss": 0.0998, "step": 33661 }, { "epoch": 0.5961384354509512, "grad_norm": 0.542539656162262, "learning_rate": 1.1103034464348296e-05, "loss": 0.0637, "step": 33662 }, { "epoch": 0.5961561449879796, "grad_norm": 1.040208101272583, "learning_rate": 1.1102203648444167e-05, "loss": 0.0559, "step": 33663 }, { "epoch": 0.596173854525008, "grad_norm": 0.6953917145729065, "learning_rate": 1.1101372845363383e-05, "loss": 0.0575, "step": 33664 }, { "epoch": 0.5961915640620365, "grad_norm": 0.5652273297309875, "learning_rate": 1.110054205510869e-05, "loss": 0.0467, "step": 33665 }, { "epoch": 0.596209273599065, "grad_norm": 0.7389281392097473, "learning_rate": 1.1099711277682821e-05, "loss": 0.0784, "step": 33666 }, { "epoch": 0.5962269831360933, "grad_norm": 0.27583232522010803, "learning_rate": 1.1098880513088512e-05, "loss": 0.023, "step": 33667 }, { "epoch": 0.5962446926731217, "grad_norm": 0.3810187876224518, "learning_rate": 1.1098049761328489e-05, "loss": 0.0656, "step": 33668 }, { "epoch": 0.5962624022101503, "grad_norm": 0.43390917778015137, "learning_rate": 1.1097219022405483e-05, "loss": 0.0316, "step": 33669 }, { "epoch": 0.5962801117471787, "grad_norm": 0.6003546118736267, "learning_rate": 1.1096388296322237e-05, "loss": 0.0719, "step": 33670 }, { "epoch": 0.596297821284207, "grad_norm": 0.5182526707649231, "learning_rate": 1.1095557583081475e-05, "loss": 0.0729, "step": 33671 }, { "epoch": 0.5963155308212355, "grad_norm": 0.28550443053245544, "learning_rate": 1.1094726882685935e-05, "loss": 0.0435, "step": 33672 }, { "epoch": 0.596333240358264, "grad_norm": 0.4191380441188812, "learning_rate": 1.1093896195138347e-05, "loss": 0.0386, "step": 33673 }, { "epoch": 0.5963509498952924, "grad_norm": 0.5458725094795227, "learning_rate": 1.109306552044145e-05, "loss": 0.0797, "step": 33674 }, { "epoch": 0.5963686594323208, "grad_norm": 0.450651079416275, "learning_rate": 1.1092234858597967e-05, "loss": 0.0651, "step": 33675 }, { "epoch": 0.5963863689693493, "grad_norm": 0.6723929047584534, "learning_rate": 1.1091404209610637e-05, "loss": 0.0455, "step": 33676 }, { "epoch": 0.5964040785063777, "grad_norm": 0.9509382247924805, "learning_rate": 1.1090573573482198e-05, "loss": 0.1083, "step": 33677 }, { "epoch": 0.5964217880434061, "grad_norm": 0.2663627862930298, "learning_rate": 1.108974295021537e-05, "loss": 0.0729, "step": 33678 }, { "epoch": 0.5964394975804345, "grad_norm": 0.45482707023620605, "learning_rate": 1.1088912339812892e-05, "loss": 0.0523, "step": 33679 }, { "epoch": 0.596457207117463, "grad_norm": 1.233392357826233, "learning_rate": 1.1088081742277497e-05, "loss": 0.047, "step": 33680 }, { "epoch": 0.5964749166544914, "grad_norm": 0.45152604579925537, "learning_rate": 1.1087251157611922e-05, "loss": 0.0625, "step": 33681 }, { "epoch": 0.5964926261915198, "grad_norm": 0.301716148853302, "learning_rate": 1.1086420585818894e-05, "loss": 0.0317, "step": 33682 }, { "epoch": 0.5965103357285482, "grad_norm": 0.7916281223297119, "learning_rate": 1.1085590026901142e-05, "loss": 0.0595, "step": 33683 }, { "epoch": 0.5965280452655767, "grad_norm": 0.6646267771720886, "learning_rate": 1.1084759480861408e-05, "loss": 0.0723, "step": 33684 }, { "epoch": 0.5965457548026051, "grad_norm": 0.7014980316162109, "learning_rate": 1.1083928947702416e-05, "loss": 0.1068, "step": 33685 }, { "epoch": 0.5965634643396335, "grad_norm": 0.778541088104248, "learning_rate": 1.1083098427426901e-05, "loss": 0.0657, "step": 33686 }, { "epoch": 0.5965811738766619, "grad_norm": 0.36845165491104126, "learning_rate": 1.1082267920037596e-05, "loss": 0.0578, "step": 33687 }, { "epoch": 0.5965988834136904, "grad_norm": 0.6013950109481812, "learning_rate": 1.1081437425537238e-05, "loss": 0.063, "step": 33688 }, { "epoch": 0.5966165929507188, "grad_norm": 0.662575364112854, "learning_rate": 1.1080606943928548e-05, "loss": 0.0798, "step": 33689 }, { "epoch": 0.5966343024877472, "grad_norm": 0.44705161452293396, "learning_rate": 1.1079776475214269e-05, "loss": 0.07, "step": 33690 }, { "epoch": 0.5966520120247757, "grad_norm": 0.3838996887207031, "learning_rate": 1.1078946019397125e-05, "loss": 0.0671, "step": 33691 }, { "epoch": 0.5966697215618041, "grad_norm": 1.1654454469680786, "learning_rate": 1.107811557647986e-05, "loss": 0.1019, "step": 33692 }, { "epoch": 0.5966874310988325, "grad_norm": 0.5473713874816895, "learning_rate": 1.1077285146465189e-05, "loss": 0.0707, "step": 33693 }, { "epoch": 0.5967051406358609, "grad_norm": 0.7530761957168579, "learning_rate": 1.1076454729355856e-05, "loss": 0.0649, "step": 33694 }, { "epoch": 0.5967228501728894, "grad_norm": 0.49516618251800537, "learning_rate": 1.1075624325154598e-05, "loss": 0.0445, "step": 33695 }, { "epoch": 0.5967405597099178, "grad_norm": 0.5790170431137085, "learning_rate": 1.107479393386413e-05, "loss": 0.0725, "step": 33696 }, { "epoch": 0.5967582692469462, "grad_norm": 0.8230782747268677, "learning_rate": 1.1073963555487193e-05, "loss": 0.0836, "step": 33697 }, { "epoch": 0.5967759787839746, "grad_norm": 0.4387566149234772, "learning_rate": 1.1073133190026522e-05, "loss": 0.0506, "step": 33698 }, { "epoch": 0.5967936883210031, "grad_norm": 0.4379711151123047, "learning_rate": 1.1072302837484846e-05, "loss": 0.0611, "step": 33699 }, { "epoch": 0.5968113978580315, "grad_norm": 0.6753502488136292, "learning_rate": 1.1071472497864895e-05, "loss": 0.0679, "step": 33700 }, { "epoch": 0.5968291073950599, "grad_norm": 1.0786429643630981, "learning_rate": 1.1070642171169399e-05, "loss": 0.0935, "step": 33701 }, { "epoch": 0.5968468169320883, "grad_norm": 0.5998932719230652, "learning_rate": 1.1069811857401098e-05, "loss": 0.0703, "step": 33702 }, { "epoch": 0.5968645264691168, "grad_norm": 0.8533619046211243, "learning_rate": 1.1068981556562717e-05, "loss": 0.0671, "step": 33703 }, { "epoch": 0.5968822360061452, "grad_norm": 0.7481235265731812, "learning_rate": 1.1068151268656985e-05, "loss": 0.0837, "step": 33704 }, { "epoch": 0.5968999455431736, "grad_norm": 0.650892972946167, "learning_rate": 1.1067320993686642e-05, "loss": 0.0925, "step": 33705 }, { "epoch": 0.5969176550802021, "grad_norm": 0.6595343947410583, "learning_rate": 1.106649073165442e-05, "loss": 0.0765, "step": 33706 }, { "epoch": 0.5969353646172305, "grad_norm": 0.420297771692276, "learning_rate": 1.1065660482563036e-05, "loss": 0.0552, "step": 33707 }, { "epoch": 0.5969530741542589, "grad_norm": 0.6685729622840881, "learning_rate": 1.1064830246415232e-05, "loss": 0.0672, "step": 33708 }, { "epoch": 0.5969707836912873, "grad_norm": 0.6445016860961914, "learning_rate": 1.1064000023213747e-05, "loss": 0.0724, "step": 33709 }, { "epoch": 0.5969884932283158, "grad_norm": 0.5396537184715271, "learning_rate": 1.1063169812961297e-05, "loss": 0.0801, "step": 33710 }, { "epoch": 0.5970062027653442, "grad_norm": 0.7083719372749329, "learning_rate": 1.106233961566062e-05, "loss": 0.0576, "step": 33711 }, { "epoch": 0.5970239123023726, "grad_norm": 0.39356306195259094, "learning_rate": 1.1061509431314447e-05, "loss": 0.0546, "step": 33712 }, { "epoch": 0.597041621839401, "grad_norm": 0.5236517786979675, "learning_rate": 1.1060679259925514e-05, "loss": 0.0907, "step": 33713 }, { "epoch": 0.5970593313764295, "grad_norm": 0.6816428899765015, "learning_rate": 1.1059849101496546e-05, "loss": 0.0419, "step": 33714 }, { "epoch": 0.5970770409134579, "grad_norm": 0.5973107218742371, "learning_rate": 1.1059018956030273e-05, "loss": 0.0572, "step": 33715 }, { "epoch": 0.5970947504504863, "grad_norm": 0.886680543422699, "learning_rate": 1.1058188823529434e-05, "loss": 0.0676, "step": 33716 }, { "epoch": 0.5971124599875147, "grad_norm": 1.1692839860916138, "learning_rate": 1.105735870399675e-05, "loss": 0.069, "step": 33717 }, { "epoch": 0.5971301695245432, "grad_norm": 0.556679904460907, "learning_rate": 1.1056528597434958e-05, "loss": 0.0547, "step": 33718 }, { "epoch": 0.5971478790615716, "grad_norm": 0.7000488042831421, "learning_rate": 1.1055698503846789e-05, "loss": 0.0563, "step": 33719 }, { "epoch": 0.5971655885986, "grad_norm": 0.3646160960197449, "learning_rate": 1.1054868423234978e-05, "loss": 0.0488, "step": 33720 }, { "epoch": 0.5971832981356285, "grad_norm": 0.6599844098091125, "learning_rate": 1.1054038355602246e-05, "loss": 0.0658, "step": 33721 }, { "epoch": 0.5972010076726569, "grad_norm": 0.38187137246131897, "learning_rate": 1.1053208300951323e-05, "loss": 0.0396, "step": 33722 }, { "epoch": 0.5972187172096853, "grad_norm": 0.5728660821914673, "learning_rate": 1.1052378259284951e-05, "loss": 0.0778, "step": 33723 }, { "epoch": 0.5972364267467137, "grad_norm": 0.4972880780696869, "learning_rate": 1.105154823060586e-05, "loss": 0.0709, "step": 33724 }, { "epoch": 0.5972541362837422, "grad_norm": 0.7062140107154846, "learning_rate": 1.1050718214916772e-05, "loss": 0.0947, "step": 33725 }, { "epoch": 0.5972718458207706, "grad_norm": 0.6111928224563599, "learning_rate": 1.104988821222042e-05, "loss": 0.0646, "step": 33726 }, { "epoch": 0.597289555357799, "grad_norm": 0.7804625034332275, "learning_rate": 1.104905822251954e-05, "loss": 0.0484, "step": 33727 }, { "epoch": 0.5973072648948274, "grad_norm": 0.712148129940033, "learning_rate": 1.1048228245816856e-05, "loss": 0.0515, "step": 33728 }, { "epoch": 0.597324974431856, "grad_norm": 0.44106119871139526, "learning_rate": 1.1047398282115099e-05, "loss": 0.0582, "step": 33729 }, { "epoch": 0.5973426839688843, "grad_norm": 0.5837900042533875, "learning_rate": 1.1046568331417003e-05, "loss": 0.0448, "step": 33730 }, { "epoch": 0.5973603935059127, "grad_norm": 0.7944479584693909, "learning_rate": 1.1045738393725304e-05, "loss": 0.0817, "step": 33731 }, { "epoch": 0.5973781030429413, "grad_norm": 0.6580334901809692, "learning_rate": 1.1044908469042718e-05, "loss": 0.0575, "step": 33732 }, { "epoch": 0.5973958125799697, "grad_norm": 0.6339590549468994, "learning_rate": 1.1044078557371984e-05, "loss": 0.0696, "step": 33733 }, { "epoch": 0.597413522116998, "grad_norm": 0.6808101534843445, "learning_rate": 1.1043248658715838e-05, "loss": 0.084, "step": 33734 }, { "epoch": 0.5974312316540265, "grad_norm": 0.7832216620445251, "learning_rate": 1.1042418773076998e-05, "loss": 0.0683, "step": 33735 }, { "epoch": 0.597448941191055, "grad_norm": 0.46550360321998596, "learning_rate": 1.10415889004582e-05, "loss": 0.0639, "step": 33736 }, { "epoch": 0.5974666507280834, "grad_norm": 0.49831825494766235, "learning_rate": 1.1040759040862171e-05, "loss": 0.0521, "step": 33737 }, { "epoch": 0.5974843602651118, "grad_norm": 0.3975641131401062, "learning_rate": 1.1039929194291654e-05, "loss": 0.0718, "step": 33738 }, { "epoch": 0.5975020698021402, "grad_norm": 0.7645334005355835, "learning_rate": 1.1039099360749362e-05, "loss": 0.052, "step": 33739 }, { "epoch": 0.5975197793391687, "grad_norm": 0.7658402919769287, "learning_rate": 1.1038269540238033e-05, "loss": 0.0603, "step": 33740 }, { "epoch": 0.5975374888761971, "grad_norm": 0.42592889070510864, "learning_rate": 1.10374397327604e-05, "loss": 0.0778, "step": 33741 }, { "epoch": 0.5975551984132255, "grad_norm": 0.7383300065994263, "learning_rate": 1.1036609938319185e-05, "loss": 0.0779, "step": 33742 }, { "epoch": 0.5975729079502539, "grad_norm": 0.6717290282249451, "learning_rate": 1.1035780156917126e-05, "loss": 0.0637, "step": 33743 }, { "epoch": 0.5975906174872824, "grad_norm": 0.5858556032180786, "learning_rate": 1.1034950388556946e-05, "loss": 0.0745, "step": 33744 }, { "epoch": 0.5976083270243108, "grad_norm": 0.4392870366573334, "learning_rate": 1.1034120633241382e-05, "loss": 0.0568, "step": 33745 }, { "epoch": 0.5976260365613392, "grad_norm": 0.5969425439834595, "learning_rate": 1.1033290890973157e-05, "loss": 0.0494, "step": 33746 }, { "epoch": 0.5976437460983677, "grad_norm": 0.7829885482788086, "learning_rate": 1.1032461161755004e-05, "loss": 0.0831, "step": 33747 }, { "epoch": 0.5976614556353961, "grad_norm": 0.68540358543396, "learning_rate": 1.1031631445589657e-05, "loss": 0.0717, "step": 33748 }, { "epoch": 0.5976791651724245, "grad_norm": 0.34336382150650024, "learning_rate": 1.1030801742479837e-05, "loss": 0.0586, "step": 33749 }, { "epoch": 0.5976968747094529, "grad_norm": 0.3689188063144684, "learning_rate": 1.1029972052428278e-05, "loss": 0.0532, "step": 33750 }, { "epoch": 0.5977145842464814, "grad_norm": 0.874809741973877, "learning_rate": 1.1029142375437709e-05, "loss": 0.0875, "step": 33751 }, { "epoch": 0.5977322937835098, "grad_norm": 0.6398457884788513, "learning_rate": 1.1028312711510862e-05, "loss": 0.0641, "step": 33752 }, { "epoch": 0.5977500033205382, "grad_norm": 0.448503315448761, "learning_rate": 1.1027483060650462e-05, "loss": 0.0765, "step": 33753 }, { "epoch": 0.5977677128575666, "grad_norm": 0.7426121830940247, "learning_rate": 1.1026653422859243e-05, "loss": 0.0784, "step": 33754 }, { "epoch": 0.5977854223945951, "grad_norm": 0.3936804533004761, "learning_rate": 1.1025823798139929e-05, "loss": 0.0346, "step": 33755 }, { "epoch": 0.5978031319316235, "grad_norm": 0.5918641686439514, "learning_rate": 1.1024994186495258e-05, "loss": 0.0701, "step": 33756 }, { "epoch": 0.5978208414686519, "grad_norm": 0.601203441619873, "learning_rate": 1.1024164587927949e-05, "loss": 0.0574, "step": 33757 }, { "epoch": 0.5978385510056803, "grad_norm": 0.49199622869491577, "learning_rate": 1.1023335002440738e-05, "loss": 0.0718, "step": 33758 }, { "epoch": 0.5978562605427088, "grad_norm": 0.403534471988678, "learning_rate": 1.102250543003636e-05, "loss": 0.0541, "step": 33759 }, { "epoch": 0.5978739700797372, "grad_norm": 0.6187118291854858, "learning_rate": 1.1021675870717525e-05, "loss": 0.077, "step": 33760 }, { "epoch": 0.5978916796167656, "grad_norm": 0.9423379898071289, "learning_rate": 1.1020846324486977e-05, "loss": 0.0878, "step": 33761 }, { "epoch": 0.5979093891537941, "grad_norm": 0.95298832654953, "learning_rate": 1.1020016791347444e-05, "loss": 0.0506, "step": 33762 }, { "epoch": 0.5979270986908225, "grad_norm": 0.9328184723854065, "learning_rate": 1.1019187271301658e-05, "loss": 0.0508, "step": 33763 }, { "epoch": 0.5979448082278509, "grad_norm": 1.1650031805038452, "learning_rate": 1.1018357764352339e-05, "loss": 0.1084, "step": 33764 }, { "epoch": 0.5979625177648793, "grad_norm": 0.35898536443710327, "learning_rate": 1.1017528270502219e-05, "loss": 0.037, "step": 33765 }, { "epoch": 0.5979802273019078, "grad_norm": 0.33625996112823486, "learning_rate": 1.101669878975403e-05, "loss": 0.0714, "step": 33766 }, { "epoch": 0.5979979368389362, "grad_norm": 0.6572103500366211, "learning_rate": 1.1015869322110496e-05, "loss": 0.0787, "step": 33767 }, { "epoch": 0.5980156463759646, "grad_norm": 0.5871402621269226, "learning_rate": 1.1015039867574352e-05, "loss": 0.041, "step": 33768 }, { "epoch": 0.598033355912993, "grad_norm": 0.8586127161979675, "learning_rate": 1.1014210426148323e-05, "loss": 0.0969, "step": 33769 }, { "epoch": 0.5980510654500215, "grad_norm": 0.6520212292671204, "learning_rate": 1.101338099783514e-05, "loss": 0.0569, "step": 33770 }, { "epoch": 0.5980687749870499, "grad_norm": 0.8449375033378601, "learning_rate": 1.1012551582637528e-05, "loss": 0.0802, "step": 33771 }, { "epoch": 0.5980864845240783, "grad_norm": 0.5655487179756165, "learning_rate": 1.1011722180558217e-05, "loss": 0.0786, "step": 33772 }, { "epoch": 0.5981041940611067, "grad_norm": 0.6886317133903503, "learning_rate": 1.1010892791599943e-05, "loss": 0.0534, "step": 33773 }, { "epoch": 0.5981219035981352, "grad_norm": 0.4948781728744507, "learning_rate": 1.101006341576542e-05, "loss": 0.0581, "step": 33774 }, { "epoch": 0.5981396131351636, "grad_norm": 0.5394506454467773, "learning_rate": 1.1009234053057387e-05, "loss": 0.0671, "step": 33775 }, { "epoch": 0.598157322672192, "grad_norm": 1.0019617080688477, "learning_rate": 1.100840470347857e-05, "loss": 0.068, "step": 33776 }, { "epoch": 0.5981750322092205, "grad_norm": 0.6790338754653931, "learning_rate": 1.1007575367031707e-05, "loss": 0.0439, "step": 33777 }, { "epoch": 0.5981927417462489, "grad_norm": 0.6622337102890015, "learning_rate": 1.1006746043719507e-05, "loss": 0.0792, "step": 33778 }, { "epoch": 0.5982104512832773, "grad_norm": 0.7067539095878601, "learning_rate": 1.100591673354471e-05, "loss": 0.0373, "step": 33779 }, { "epoch": 0.5982281608203057, "grad_norm": 0.424510657787323, "learning_rate": 1.1005087436510046e-05, "loss": 0.053, "step": 33780 }, { "epoch": 0.5982458703573342, "grad_norm": 0.586589515209198, "learning_rate": 1.100425815261824e-05, "loss": 0.0675, "step": 33781 }, { "epoch": 0.5982635798943626, "grad_norm": 0.5364192724227905, "learning_rate": 1.1003428881872017e-05, "loss": 0.0819, "step": 33782 }, { "epoch": 0.598281289431391, "grad_norm": 0.4122663140296936, "learning_rate": 1.100259962427411e-05, "loss": 0.0567, "step": 33783 }, { "epoch": 0.5982989989684194, "grad_norm": 0.23477353155612946, "learning_rate": 1.1001770379827248e-05, "loss": 0.0452, "step": 33784 }, { "epoch": 0.5983167085054479, "grad_norm": 0.610563337802887, "learning_rate": 1.1000941148534155e-05, "loss": 0.0868, "step": 33785 }, { "epoch": 0.5983344180424763, "grad_norm": 0.5517613887786865, "learning_rate": 1.1000111930397562e-05, "loss": 0.0826, "step": 33786 }, { "epoch": 0.5983521275795047, "grad_norm": 1.080288052558899, "learning_rate": 1.0999282725420201e-05, "loss": 0.0724, "step": 33787 }, { "epoch": 0.5983698371165331, "grad_norm": 0.8830957412719727, "learning_rate": 1.099845353360479e-05, "loss": 0.1203, "step": 33788 }, { "epoch": 0.5983875466535616, "grad_norm": 0.9943189024925232, "learning_rate": 1.099762435495406e-05, "loss": 0.0764, "step": 33789 }, { "epoch": 0.59840525619059, "grad_norm": 0.5204887986183167, "learning_rate": 1.099679518947074e-05, "loss": 0.0566, "step": 33790 }, { "epoch": 0.5984229657276184, "grad_norm": 0.5490157008171082, "learning_rate": 1.0995966037157568e-05, "loss": 0.081, "step": 33791 }, { "epoch": 0.598440675264647, "grad_norm": 0.5851316452026367, "learning_rate": 1.0995136898017257e-05, "loss": 0.0584, "step": 33792 }, { "epoch": 0.5984583848016753, "grad_norm": 0.5098736882209778, "learning_rate": 1.0994307772052539e-05, "loss": 0.0565, "step": 33793 }, { "epoch": 0.5984760943387037, "grad_norm": 0.8927489519119263, "learning_rate": 1.0993478659266145e-05, "loss": 0.0729, "step": 33794 }, { "epoch": 0.5984938038757321, "grad_norm": 0.8235571980476379, "learning_rate": 1.0992649559660802e-05, "loss": 0.0781, "step": 33795 }, { "epoch": 0.5985115134127607, "grad_norm": 0.7996466159820557, "learning_rate": 1.0991820473239234e-05, "loss": 0.0714, "step": 33796 }, { "epoch": 0.598529222949789, "grad_norm": 0.7887166142463684, "learning_rate": 1.0990991400004173e-05, "loss": 0.0719, "step": 33797 }, { "epoch": 0.5985469324868175, "grad_norm": 0.46205073595046997, "learning_rate": 1.0990162339958348e-05, "loss": 0.0567, "step": 33798 }, { "epoch": 0.5985646420238458, "grad_norm": 0.7219568490982056, "learning_rate": 1.0989333293104478e-05, "loss": 0.0696, "step": 33799 }, { "epoch": 0.5985823515608744, "grad_norm": 0.6178891062736511, "learning_rate": 1.0988504259445296e-05, "loss": 0.0435, "step": 33800 }, { "epoch": 0.5986000610979028, "grad_norm": 0.8914109468460083, "learning_rate": 1.0987675238983532e-05, "loss": 0.0549, "step": 33801 }, { "epoch": 0.5986177706349312, "grad_norm": 0.860399603843689, "learning_rate": 1.0986846231721916e-05, "loss": 0.0792, "step": 33802 }, { "epoch": 0.5986354801719596, "grad_norm": 0.4095458686351776, "learning_rate": 1.0986017237663158e-05, "loss": 0.0854, "step": 33803 }, { "epoch": 0.5986531897089881, "grad_norm": 0.8345672488212585, "learning_rate": 1.0985188256810002e-05, "loss": 0.0775, "step": 33804 }, { "epoch": 0.5986708992460165, "grad_norm": 0.5222535729408264, "learning_rate": 1.0984359289165177e-05, "loss": 0.0774, "step": 33805 }, { "epoch": 0.5986886087830449, "grad_norm": 0.5142199397087097, "learning_rate": 1.0983530334731396e-05, "loss": 0.0432, "step": 33806 }, { "epoch": 0.5987063183200734, "grad_norm": 0.48170730471611023, "learning_rate": 1.0982701393511397e-05, "loss": 0.0649, "step": 33807 }, { "epoch": 0.5987240278571018, "grad_norm": 0.8739252686500549, "learning_rate": 1.0981872465507902e-05, "loss": 0.0793, "step": 33808 }, { "epoch": 0.5987417373941302, "grad_norm": 0.2202111929655075, "learning_rate": 1.0981043550723644e-05, "loss": 0.0497, "step": 33809 }, { "epoch": 0.5987594469311586, "grad_norm": 0.3723234534263611, "learning_rate": 1.0980214649161342e-05, "loss": 0.048, "step": 33810 }, { "epoch": 0.5987771564681871, "grad_norm": 0.4258573651313782, "learning_rate": 1.0979385760823729e-05, "loss": 0.0636, "step": 33811 }, { "epoch": 0.5987948660052155, "grad_norm": 0.6070601940155029, "learning_rate": 1.0978556885713533e-05, "loss": 0.0563, "step": 33812 }, { "epoch": 0.5988125755422439, "grad_norm": 0.850281834602356, "learning_rate": 1.0977728023833474e-05, "loss": 0.0637, "step": 33813 }, { "epoch": 0.5988302850792723, "grad_norm": 0.567658543586731, "learning_rate": 1.0976899175186282e-05, "loss": 0.0595, "step": 33814 }, { "epoch": 0.5988479946163008, "grad_norm": 0.5226947665214539, "learning_rate": 1.0976070339774688e-05, "loss": 0.0446, "step": 33815 }, { "epoch": 0.5988657041533292, "grad_norm": 0.614205002784729, "learning_rate": 1.0975241517601419e-05, "loss": 0.0592, "step": 33816 }, { "epoch": 0.5988834136903576, "grad_norm": 0.5233567357063293, "learning_rate": 1.0974412708669194e-05, "loss": 0.0699, "step": 33817 }, { "epoch": 0.598901123227386, "grad_norm": 0.6392578482627869, "learning_rate": 1.097358391298074e-05, "loss": 0.0707, "step": 33818 }, { "epoch": 0.5989188327644145, "grad_norm": 0.7444573044776917, "learning_rate": 1.0972755130538797e-05, "loss": 0.0529, "step": 33819 }, { "epoch": 0.5989365423014429, "grad_norm": 0.37342023849487305, "learning_rate": 1.0971926361346077e-05, "loss": 0.0639, "step": 33820 }, { "epoch": 0.5989542518384713, "grad_norm": 0.6287705302238464, "learning_rate": 1.0971097605405309e-05, "loss": 0.0618, "step": 33821 }, { "epoch": 0.5989719613754998, "grad_norm": 0.7221165299415588, "learning_rate": 1.0970268862719225e-05, "loss": 0.0529, "step": 33822 }, { "epoch": 0.5989896709125282, "grad_norm": 0.7032178640365601, "learning_rate": 1.0969440133290551e-05, "loss": 0.0808, "step": 33823 }, { "epoch": 0.5990073804495566, "grad_norm": 0.643906831741333, "learning_rate": 1.0968611417122009e-05, "loss": 0.0616, "step": 33824 }, { "epoch": 0.599025089986585, "grad_norm": 0.2511507272720337, "learning_rate": 1.0967782714216325e-05, "loss": 0.0368, "step": 33825 }, { "epoch": 0.5990427995236135, "grad_norm": 0.4036675989627838, "learning_rate": 1.0966954024576232e-05, "loss": 0.0559, "step": 33826 }, { "epoch": 0.5990605090606419, "grad_norm": 0.46114763617515564, "learning_rate": 1.0966125348204455e-05, "loss": 0.0505, "step": 33827 }, { "epoch": 0.5990782185976703, "grad_norm": 0.5780101418495178, "learning_rate": 1.0965296685103711e-05, "loss": 0.0522, "step": 33828 }, { "epoch": 0.5990959281346987, "grad_norm": 0.6441976428031921, "learning_rate": 1.0964468035276736e-05, "loss": 0.0865, "step": 33829 }, { "epoch": 0.5991136376717272, "grad_norm": 0.8085691332817078, "learning_rate": 1.0963639398726257e-05, "loss": 0.0527, "step": 33830 }, { "epoch": 0.5991313472087556, "grad_norm": 1.2530850172042847, "learning_rate": 1.0962810775454992e-05, "loss": 0.0767, "step": 33831 }, { "epoch": 0.599149056745784, "grad_norm": 0.32160308957099915, "learning_rate": 1.0961982165465665e-05, "loss": 0.0424, "step": 33832 }, { "epoch": 0.5991667662828124, "grad_norm": 0.5163793563842773, "learning_rate": 1.0961153568761015e-05, "loss": 0.0649, "step": 33833 }, { "epoch": 0.5991844758198409, "grad_norm": 0.4820074141025543, "learning_rate": 1.0960324985343766e-05, "loss": 0.0488, "step": 33834 }, { "epoch": 0.5992021853568693, "grad_norm": 0.39911019802093506, "learning_rate": 1.0959496415216633e-05, "loss": 0.0497, "step": 33835 }, { "epoch": 0.5992198948938977, "grad_norm": 0.6937578916549683, "learning_rate": 1.0958667858382349e-05, "loss": 0.0659, "step": 33836 }, { "epoch": 0.5992376044309262, "grad_norm": 0.5673028230667114, "learning_rate": 1.0957839314843641e-05, "loss": 0.0614, "step": 33837 }, { "epoch": 0.5992553139679546, "grad_norm": 0.8683996796607971, "learning_rate": 1.095701078460323e-05, "loss": 0.0792, "step": 33838 }, { "epoch": 0.599273023504983, "grad_norm": 0.6720879077911377, "learning_rate": 1.0956182267663844e-05, "loss": 0.0601, "step": 33839 }, { "epoch": 0.5992907330420114, "grad_norm": 0.6673786044120789, "learning_rate": 1.0955353764028211e-05, "loss": 0.0548, "step": 33840 }, { "epoch": 0.5993084425790399, "grad_norm": 0.39133143424987793, "learning_rate": 1.0954525273699056e-05, "loss": 0.0345, "step": 33841 }, { "epoch": 0.5993261521160683, "grad_norm": 0.5198445916175842, "learning_rate": 1.0953696796679102e-05, "loss": 0.0831, "step": 33842 }, { "epoch": 0.5993438616530967, "grad_norm": 0.5152477025985718, "learning_rate": 1.0952868332971075e-05, "loss": 0.0694, "step": 33843 }, { "epoch": 0.5993615711901251, "grad_norm": 0.8985307216644287, "learning_rate": 1.095203988257771e-05, "loss": 0.0403, "step": 33844 }, { "epoch": 0.5993792807271536, "grad_norm": 0.3907272517681122, "learning_rate": 1.0951211445501716e-05, "loss": 0.0583, "step": 33845 }, { "epoch": 0.599396990264182, "grad_norm": 0.35134831070899963, "learning_rate": 1.0950383021745829e-05, "loss": 0.0456, "step": 33846 }, { "epoch": 0.5994146998012104, "grad_norm": 0.5274512767791748, "learning_rate": 1.0949554611312767e-05, "loss": 0.0632, "step": 33847 }, { "epoch": 0.5994324093382388, "grad_norm": 0.4389834702014923, "learning_rate": 1.0948726214205271e-05, "loss": 0.0449, "step": 33848 }, { "epoch": 0.5994501188752673, "grad_norm": 0.655383288860321, "learning_rate": 1.0947897830426049e-05, "loss": 0.0735, "step": 33849 }, { "epoch": 0.5994678284122957, "grad_norm": 0.9140822291374207, "learning_rate": 1.0947069459977834e-05, "loss": 0.0713, "step": 33850 }, { "epoch": 0.5994855379493241, "grad_norm": 0.4673781096935272, "learning_rate": 1.0946241102863355e-05, "loss": 0.0319, "step": 33851 }, { "epoch": 0.5995032474863526, "grad_norm": 0.8527989387512207, "learning_rate": 1.0945412759085328e-05, "loss": 0.0697, "step": 33852 }, { "epoch": 0.599520957023381, "grad_norm": 0.9688906073570251, "learning_rate": 1.0944584428646483e-05, "loss": 0.0887, "step": 33853 }, { "epoch": 0.5995386665604094, "grad_norm": 1.4662941694259644, "learning_rate": 1.0943756111549545e-05, "loss": 0.055, "step": 33854 }, { "epoch": 0.5995563760974378, "grad_norm": 0.6550889015197754, "learning_rate": 1.0942927807797243e-05, "loss": 0.0676, "step": 33855 }, { "epoch": 0.5995740856344663, "grad_norm": 0.8664511442184448, "learning_rate": 1.0942099517392294e-05, "loss": 0.0504, "step": 33856 }, { "epoch": 0.5995917951714947, "grad_norm": 0.8144042491912842, "learning_rate": 1.0941271240337429e-05, "loss": 0.0768, "step": 33857 }, { "epoch": 0.5996095047085231, "grad_norm": 0.6453643441200256, "learning_rate": 1.0940442976635376e-05, "loss": 0.0669, "step": 33858 }, { "epoch": 0.5996272142455515, "grad_norm": 0.8553091883659363, "learning_rate": 1.0939614726288849e-05, "loss": 0.0811, "step": 33859 }, { "epoch": 0.59964492378258, "grad_norm": 0.873253345489502, "learning_rate": 1.0938786489300577e-05, "loss": 0.0806, "step": 33860 }, { "epoch": 0.5996626333196085, "grad_norm": 0.6421207785606384, "learning_rate": 1.0937958265673286e-05, "loss": 0.0465, "step": 33861 }, { "epoch": 0.5996803428566368, "grad_norm": 0.797763466835022, "learning_rate": 1.093713005540971e-05, "loss": 0.083, "step": 33862 }, { "epoch": 0.5996980523936652, "grad_norm": 0.5889663696289062, "learning_rate": 1.0936301858512561e-05, "loss": 0.0587, "step": 33863 }, { "epoch": 0.5997157619306938, "grad_norm": 0.6475849151611328, "learning_rate": 1.0935473674984564e-05, "loss": 0.0771, "step": 33864 }, { "epoch": 0.5997334714677222, "grad_norm": 0.40282297134399414, "learning_rate": 1.0934645504828452e-05, "loss": 0.0729, "step": 33865 }, { "epoch": 0.5997511810047506, "grad_norm": 0.5357057452201843, "learning_rate": 1.0933817348046943e-05, "loss": 0.0678, "step": 33866 }, { "epoch": 0.5997688905417791, "grad_norm": 0.6837273240089417, "learning_rate": 1.0932989204642764e-05, "loss": 0.0766, "step": 33867 }, { "epoch": 0.5997866000788075, "grad_norm": 0.495978981256485, "learning_rate": 1.0932161074618639e-05, "loss": 0.0853, "step": 33868 }, { "epoch": 0.5998043096158359, "grad_norm": 0.6299071311950684, "learning_rate": 1.0931332957977295e-05, "loss": 0.0815, "step": 33869 }, { "epoch": 0.5998220191528643, "grad_norm": 0.4894956350326538, "learning_rate": 1.093050485472145e-05, "loss": 0.0453, "step": 33870 }, { "epoch": 0.5998397286898928, "grad_norm": 0.45148706436157227, "learning_rate": 1.0929676764853833e-05, "loss": 0.0486, "step": 33871 }, { "epoch": 0.5998574382269212, "grad_norm": 0.578624963760376, "learning_rate": 1.092884868837717e-05, "loss": 0.0509, "step": 33872 }, { "epoch": 0.5998751477639496, "grad_norm": 0.5258735418319702, "learning_rate": 1.0928020625294186e-05, "loss": 0.0467, "step": 33873 }, { "epoch": 0.599892857300978, "grad_norm": 0.5086133480072021, "learning_rate": 1.09271925756076e-05, "loss": 0.0444, "step": 33874 }, { "epoch": 0.5999105668380065, "grad_norm": 0.3438016474246979, "learning_rate": 1.0926364539320134e-05, "loss": 0.0436, "step": 33875 }, { "epoch": 0.5999282763750349, "grad_norm": 0.6400997042655945, "learning_rate": 1.0925536516434522e-05, "loss": 0.0695, "step": 33876 }, { "epoch": 0.5999459859120633, "grad_norm": 0.47756949067115784, "learning_rate": 1.0924708506953479e-05, "loss": 0.0605, "step": 33877 }, { "epoch": 0.5999636954490917, "grad_norm": 0.356679230928421, "learning_rate": 1.0923880510879734e-05, "loss": 0.069, "step": 33878 }, { "epoch": 0.5999814049861202, "grad_norm": 0.702277660369873, "learning_rate": 1.092305252821601e-05, "loss": 0.0911, "step": 33879 }, { "epoch": 0.5999991145231486, "grad_norm": 0.5871865153312683, "learning_rate": 1.0922224558965032e-05, "loss": 0.0671, "step": 33880 }, { "epoch": 0.600016824060177, "grad_norm": 0.3925192356109619, "learning_rate": 1.0921396603129521e-05, "loss": 0.0748, "step": 33881 }, { "epoch": 0.6000345335972055, "grad_norm": 0.511594831943512, "learning_rate": 1.0920568660712202e-05, "loss": 0.0734, "step": 33882 }, { "epoch": 0.6000522431342339, "grad_norm": 0.3852694034576416, "learning_rate": 1.0919740731715806e-05, "loss": 0.0661, "step": 33883 }, { "epoch": 0.6000699526712623, "grad_norm": 0.4906241297721863, "learning_rate": 1.091891281614304e-05, "loss": 0.0511, "step": 33884 }, { "epoch": 0.6000876622082907, "grad_norm": 0.6671611070632935, "learning_rate": 1.0918084913996644e-05, "loss": 0.0737, "step": 33885 }, { "epoch": 0.6001053717453192, "grad_norm": 0.6970402598381042, "learning_rate": 1.0917257025279333e-05, "loss": 0.0339, "step": 33886 }, { "epoch": 0.6001230812823476, "grad_norm": 0.7541754245758057, "learning_rate": 1.0916429149993842e-05, "loss": 0.0884, "step": 33887 }, { "epoch": 0.600140790819376, "grad_norm": 0.6747713685035706, "learning_rate": 1.0915601288142877e-05, "loss": 0.0562, "step": 33888 }, { "epoch": 0.6001585003564044, "grad_norm": 0.4132566750049591, "learning_rate": 1.0914773439729173e-05, "loss": 0.0633, "step": 33889 }, { "epoch": 0.6001762098934329, "grad_norm": 0.6281277537345886, "learning_rate": 1.0913945604755453e-05, "loss": 0.0909, "step": 33890 }, { "epoch": 0.6001939194304613, "grad_norm": 0.5181326270103455, "learning_rate": 1.0913117783224436e-05, "loss": 0.0555, "step": 33891 }, { "epoch": 0.6002116289674897, "grad_norm": 0.6229572296142578, "learning_rate": 1.0912289975138848e-05, "loss": 0.0486, "step": 33892 }, { "epoch": 0.6002293385045181, "grad_norm": 1.0111395120620728, "learning_rate": 1.0911462180501413e-05, "loss": 0.0901, "step": 33893 }, { "epoch": 0.6002470480415466, "grad_norm": 0.3282740116119385, "learning_rate": 1.0910634399314857e-05, "loss": 0.043, "step": 33894 }, { "epoch": 0.600264757578575, "grad_norm": 0.4750216007232666, "learning_rate": 1.0909806631581897e-05, "loss": 0.0554, "step": 33895 }, { "epoch": 0.6002824671156034, "grad_norm": 0.37709489464759827, "learning_rate": 1.0908978877305258e-05, "loss": 0.0666, "step": 33896 }, { "epoch": 0.6003001766526319, "grad_norm": 0.5559116005897522, "learning_rate": 1.0908151136487666e-05, "loss": 0.0873, "step": 33897 }, { "epoch": 0.6003178861896603, "grad_norm": 0.3609386086463928, "learning_rate": 1.0907323409131846e-05, "loss": 0.0759, "step": 33898 }, { "epoch": 0.6003355957266887, "grad_norm": 0.5080658793449402, "learning_rate": 1.0906495695240516e-05, "loss": 0.056, "step": 33899 }, { "epoch": 0.6003533052637171, "grad_norm": 0.4182146191596985, "learning_rate": 1.09056679948164e-05, "loss": 0.0455, "step": 33900 }, { "epoch": 0.6003710148007456, "grad_norm": 0.9629865288734436, "learning_rate": 1.0904840307862228e-05, "loss": 0.0655, "step": 33901 }, { "epoch": 0.600388724337774, "grad_norm": 0.3429965674877167, "learning_rate": 1.0904012634380712e-05, "loss": 0.0504, "step": 33902 }, { "epoch": 0.6004064338748024, "grad_norm": 0.9592057466506958, "learning_rate": 1.090318497437458e-05, "loss": 0.0805, "step": 33903 }, { "epoch": 0.6004241434118308, "grad_norm": 0.6135124564170837, "learning_rate": 1.0902357327846553e-05, "loss": 0.0538, "step": 33904 }, { "epoch": 0.6004418529488593, "grad_norm": 0.6672097444534302, "learning_rate": 1.0901529694799361e-05, "loss": 0.0558, "step": 33905 }, { "epoch": 0.6004595624858877, "grad_norm": 0.5109516978263855, "learning_rate": 1.0900702075235719e-05, "loss": 0.0386, "step": 33906 }, { "epoch": 0.6004772720229161, "grad_norm": 0.6873820424079895, "learning_rate": 1.089987446915835e-05, "loss": 0.0707, "step": 33907 }, { "epoch": 0.6004949815599445, "grad_norm": 0.5318111777305603, "learning_rate": 1.0899046876569985e-05, "loss": 0.0589, "step": 33908 }, { "epoch": 0.600512691096973, "grad_norm": 0.7866751551628113, "learning_rate": 1.0898219297473338e-05, "loss": 0.0955, "step": 33909 }, { "epoch": 0.6005304006340014, "grad_norm": 0.5508787631988525, "learning_rate": 1.0897391731871134e-05, "loss": 0.0582, "step": 33910 }, { "epoch": 0.6005481101710298, "grad_norm": 0.5031009316444397, "learning_rate": 1.0896564179766095e-05, "loss": 0.0684, "step": 33911 }, { "epoch": 0.6005658197080583, "grad_norm": 0.5404229164123535, "learning_rate": 1.0895736641160952e-05, "loss": 0.08, "step": 33912 }, { "epoch": 0.6005835292450867, "grad_norm": 0.6693605184555054, "learning_rate": 1.0894909116058412e-05, "loss": 0.0584, "step": 33913 }, { "epoch": 0.6006012387821151, "grad_norm": 0.4368038475513458, "learning_rate": 1.0894081604461208e-05, "loss": 0.0681, "step": 33914 }, { "epoch": 0.6006189483191435, "grad_norm": 0.5115548372268677, "learning_rate": 1.0893254106372067e-05, "loss": 0.0502, "step": 33915 }, { "epoch": 0.600636657856172, "grad_norm": 0.8708000779151917, "learning_rate": 1.0892426621793697e-05, "loss": 0.0917, "step": 33916 }, { "epoch": 0.6006543673932004, "grad_norm": 0.6973490715026855, "learning_rate": 1.0891599150728828e-05, "loss": 0.0558, "step": 33917 }, { "epoch": 0.6006720769302288, "grad_norm": 0.45689770579338074, "learning_rate": 1.0890771693180183e-05, "loss": 0.0569, "step": 33918 }, { "epoch": 0.6006897864672572, "grad_norm": 0.5735307335853577, "learning_rate": 1.0889944249150487e-05, "loss": 0.0608, "step": 33919 }, { "epoch": 0.6007074960042857, "grad_norm": 0.4707545042037964, "learning_rate": 1.0889116818642455e-05, "loss": 0.0998, "step": 33920 }, { "epoch": 0.6007252055413141, "grad_norm": 0.5727633833885193, "learning_rate": 1.0888289401658814e-05, "loss": 0.0609, "step": 33921 }, { "epoch": 0.6007429150783425, "grad_norm": 0.8550640940666199, "learning_rate": 1.0887461998202288e-05, "loss": 0.0751, "step": 33922 }, { "epoch": 0.6007606246153709, "grad_norm": 0.2926798164844513, "learning_rate": 1.0886634608275591e-05, "loss": 0.0675, "step": 33923 }, { "epoch": 0.6007783341523995, "grad_norm": 0.7563980221748352, "learning_rate": 1.0885807231881454e-05, "loss": 0.0722, "step": 33924 }, { "epoch": 0.6007960436894278, "grad_norm": 0.5687029957771301, "learning_rate": 1.088497986902259e-05, "loss": 0.0882, "step": 33925 }, { "epoch": 0.6008137532264562, "grad_norm": 0.35067033767700195, "learning_rate": 1.0884152519701736e-05, "loss": 0.0687, "step": 33926 }, { "epoch": 0.6008314627634848, "grad_norm": 0.8154893517494202, "learning_rate": 1.0883325183921591e-05, "loss": 0.06, "step": 33927 }, { "epoch": 0.6008491723005132, "grad_norm": 0.6868240833282471, "learning_rate": 1.0882497861684896e-05, "loss": 0.0617, "step": 33928 }, { "epoch": 0.6008668818375416, "grad_norm": 1.2659046649932861, "learning_rate": 1.0881670552994364e-05, "loss": 0.0742, "step": 33929 }, { "epoch": 0.60088459137457, "grad_norm": 0.6846275329589844, "learning_rate": 1.088084325785273e-05, "loss": 0.0835, "step": 33930 }, { "epoch": 0.6009023009115985, "grad_norm": 0.7493043541908264, "learning_rate": 1.0880015976262694e-05, "loss": 0.098, "step": 33931 }, { "epoch": 0.6009200104486269, "grad_norm": 0.5573022365570068, "learning_rate": 1.0879188708226993e-05, "loss": 0.0585, "step": 33932 }, { "epoch": 0.6009377199856553, "grad_norm": 0.7139771580696106, "learning_rate": 1.0878361453748344e-05, "loss": 0.0911, "step": 33933 }, { "epoch": 0.6009554295226837, "grad_norm": 0.8860463500022888, "learning_rate": 1.0877534212829468e-05, "loss": 0.0846, "step": 33934 }, { "epoch": 0.6009731390597122, "grad_norm": 0.7302172780036926, "learning_rate": 1.0876706985473086e-05, "loss": 0.0616, "step": 33935 }, { "epoch": 0.6009908485967406, "grad_norm": 0.6714249849319458, "learning_rate": 1.087587977168192e-05, "loss": 0.0509, "step": 33936 }, { "epoch": 0.601008558133769, "grad_norm": 0.7635172605514526, "learning_rate": 1.0875052571458698e-05, "loss": 0.1129, "step": 33937 }, { "epoch": 0.6010262676707974, "grad_norm": 1.2808729410171509, "learning_rate": 1.0874225384806131e-05, "loss": 0.0706, "step": 33938 }, { "epoch": 0.6010439772078259, "grad_norm": 0.43663063645362854, "learning_rate": 1.0873398211726947e-05, "loss": 0.0575, "step": 33939 }, { "epoch": 0.6010616867448543, "grad_norm": 0.6273606419563293, "learning_rate": 1.087257105222387e-05, "loss": 0.073, "step": 33940 }, { "epoch": 0.6010793962818827, "grad_norm": 0.8291774988174438, "learning_rate": 1.0871743906299613e-05, "loss": 0.092, "step": 33941 }, { "epoch": 0.6010971058189112, "grad_norm": 0.63231360912323, "learning_rate": 1.0870916773956894e-05, "loss": 0.0755, "step": 33942 }, { "epoch": 0.6011148153559396, "grad_norm": 0.6116027235984802, "learning_rate": 1.0870089655198448e-05, "loss": 0.044, "step": 33943 }, { "epoch": 0.601132524892968, "grad_norm": 0.8872941136360168, "learning_rate": 1.0869262550026993e-05, "loss": 0.0763, "step": 33944 }, { "epoch": 0.6011502344299964, "grad_norm": 0.5664514303207397, "learning_rate": 1.086843545844524e-05, "loss": 0.0549, "step": 33945 }, { "epoch": 0.6011679439670249, "grad_norm": 0.5843383073806763, "learning_rate": 1.0867608380455918e-05, "loss": 0.0437, "step": 33946 }, { "epoch": 0.6011856535040533, "grad_norm": 0.4043780565261841, "learning_rate": 1.086678131606175e-05, "loss": 0.0445, "step": 33947 }, { "epoch": 0.6012033630410817, "grad_norm": 0.7176496386528015, "learning_rate": 1.086595426526545e-05, "loss": 0.0958, "step": 33948 }, { "epoch": 0.6012210725781101, "grad_norm": 1.0945088863372803, "learning_rate": 1.0865127228069743e-05, "loss": 0.0843, "step": 33949 }, { "epoch": 0.6012387821151386, "grad_norm": 0.9748667478561401, "learning_rate": 1.0864300204477347e-05, "loss": 0.0626, "step": 33950 }, { "epoch": 0.601256491652167, "grad_norm": 0.39464056491851807, "learning_rate": 1.0863473194490989e-05, "loss": 0.0416, "step": 33951 }, { "epoch": 0.6012742011891954, "grad_norm": 0.5995495319366455, "learning_rate": 1.0862646198113382e-05, "loss": 0.0782, "step": 33952 }, { "epoch": 0.6012919107262238, "grad_norm": 0.6268581748008728, "learning_rate": 1.0861819215347251e-05, "loss": 0.055, "step": 33953 }, { "epoch": 0.6013096202632523, "grad_norm": 0.5316526293754578, "learning_rate": 1.0860992246195325e-05, "loss": 0.0603, "step": 33954 }, { "epoch": 0.6013273298002807, "grad_norm": 0.8500575423240662, "learning_rate": 1.0860165290660305e-05, "loss": 0.0791, "step": 33955 }, { "epoch": 0.6013450393373091, "grad_norm": 0.5164319276809692, "learning_rate": 1.0859338348744923e-05, "loss": 0.063, "step": 33956 }, { "epoch": 0.6013627488743376, "grad_norm": 0.7957336902618408, "learning_rate": 1.0858511420451901e-05, "loss": 0.0767, "step": 33957 }, { "epoch": 0.601380458411366, "grad_norm": 0.9863541126251221, "learning_rate": 1.0857684505783965e-05, "loss": 0.0842, "step": 33958 }, { "epoch": 0.6013981679483944, "grad_norm": 0.7064663171768188, "learning_rate": 1.0856857604743821e-05, "loss": 0.081, "step": 33959 }, { "epoch": 0.6014158774854228, "grad_norm": 0.7372981905937195, "learning_rate": 1.0856030717334193e-05, "loss": 0.0729, "step": 33960 }, { "epoch": 0.6014335870224513, "grad_norm": 0.34517917037010193, "learning_rate": 1.0855203843557812e-05, "loss": 0.0655, "step": 33961 }, { "epoch": 0.6014512965594797, "grad_norm": 0.7221327424049377, "learning_rate": 1.0854376983417386e-05, "loss": 0.0532, "step": 33962 }, { "epoch": 0.6014690060965081, "grad_norm": 0.6797929406166077, "learning_rate": 1.0853550136915643e-05, "loss": 0.0862, "step": 33963 }, { "epoch": 0.6014867156335365, "grad_norm": 0.4759012460708618, "learning_rate": 1.0852723304055297e-05, "loss": 0.0625, "step": 33964 }, { "epoch": 0.601504425170565, "grad_norm": 0.5208002924919128, "learning_rate": 1.0851896484839078e-05, "loss": 0.0666, "step": 33965 }, { "epoch": 0.6015221347075934, "grad_norm": 0.7318927645683289, "learning_rate": 1.0851069679269697e-05, "loss": 0.0611, "step": 33966 }, { "epoch": 0.6015398442446218, "grad_norm": 0.4540040194988251, "learning_rate": 1.0850242887349876e-05, "loss": 0.0711, "step": 33967 }, { "epoch": 0.6015575537816502, "grad_norm": 0.9641212224960327, "learning_rate": 1.0849416109082335e-05, "loss": 0.0854, "step": 33968 }, { "epoch": 0.6015752633186787, "grad_norm": 0.519213855266571, "learning_rate": 1.0848589344469804e-05, "loss": 0.0654, "step": 33969 }, { "epoch": 0.6015929728557071, "grad_norm": 0.7806009650230408, "learning_rate": 1.0847762593514986e-05, "loss": 0.0584, "step": 33970 }, { "epoch": 0.6016106823927355, "grad_norm": 0.6540153622627258, "learning_rate": 1.0846935856220607e-05, "loss": 0.0639, "step": 33971 }, { "epoch": 0.601628391929764, "grad_norm": 0.7728628516197205, "learning_rate": 1.0846109132589397e-05, "loss": 0.0644, "step": 33972 }, { "epoch": 0.6016461014667924, "grad_norm": 0.5363419055938721, "learning_rate": 1.0845282422624062e-05, "loss": 0.0873, "step": 33973 }, { "epoch": 0.6016638110038208, "grad_norm": 0.36675381660461426, "learning_rate": 1.0844455726327328e-05, "loss": 0.0686, "step": 33974 }, { "epoch": 0.6016815205408492, "grad_norm": 0.7422680258750916, "learning_rate": 1.0843629043701916e-05, "loss": 0.0705, "step": 33975 }, { "epoch": 0.6016992300778777, "grad_norm": 0.7586536407470703, "learning_rate": 1.0842802374750543e-05, "loss": 0.0633, "step": 33976 }, { "epoch": 0.6017169396149061, "grad_norm": 0.824195146560669, "learning_rate": 1.084197571947593e-05, "loss": 0.0664, "step": 33977 }, { "epoch": 0.6017346491519345, "grad_norm": 0.6575470566749573, "learning_rate": 1.0841149077880796e-05, "loss": 0.0726, "step": 33978 }, { "epoch": 0.6017523586889629, "grad_norm": 0.8840572834014893, "learning_rate": 1.0840322449967863e-05, "loss": 0.0719, "step": 33979 }, { "epoch": 0.6017700682259914, "grad_norm": 0.9870665669441223, "learning_rate": 1.0839495835739845e-05, "loss": 0.0905, "step": 33980 }, { "epoch": 0.6017877777630198, "grad_norm": 0.24788488447666168, "learning_rate": 1.0838669235199467e-05, "loss": 0.04, "step": 33981 }, { "epoch": 0.6018054873000482, "grad_norm": 0.7745645046234131, "learning_rate": 1.0837842648349445e-05, "loss": 0.0676, "step": 33982 }, { "epoch": 0.6018231968370766, "grad_norm": 0.7249598503112793, "learning_rate": 1.0837016075192506e-05, "loss": 0.0725, "step": 33983 }, { "epoch": 0.6018409063741051, "grad_norm": 0.19522926211357117, "learning_rate": 1.0836189515731358e-05, "loss": 0.0761, "step": 33984 }, { "epoch": 0.6018586159111335, "grad_norm": 0.768879771232605, "learning_rate": 1.083536296996872e-05, "loss": 0.0592, "step": 33985 }, { "epoch": 0.6018763254481619, "grad_norm": 0.637606143951416, "learning_rate": 1.0834536437907326e-05, "loss": 0.0897, "step": 33986 }, { "epoch": 0.6018940349851905, "grad_norm": 0.6846110224723816, "learning_rate": 1.0833709919549881e-05, "loss": 0.0749, "step": 33987 }, { "epoch": 0.6019117445222189, "grad_norm": 0.3098858594894409, "learning_rate": 1.0832883414899107e-05, "loss": 0.0696, "step": 33988 }, { "epoch": 0.6019294540592472, "grad_norm": 0.4754663407802582, "learning_rate": 1.0832056923957727e-05, "loss": 0.047, "step": 33989 }, { "epoch": 0.6019471635962756, "grad_norm": 0.670270562171936, "learning_rate": 1.083123044672846e-05, "loss": 0.0529, "step": 33990 }, { "epoch": 0.6019648731333042, "grad_norm": 0.33004993200302124, "learning_rate": 1.0830403983214021e-05, "loss": 0.0604, "step": 33991 }, { "epoch": 0.6019825826703326, "grad_norm": 0.3348849415779114, "learning_rate": 1.0829577533417133e-05, "loss": 0.05, "step": 33992 }, { "epoch": 0.602000292207361, "grad_norm": 0.8685224652290344, "learning_rate": 1.0828751097340513e-05, "loss": 0.0585, "step": 33993 }, { "epoch": 0.6020180017443894, "grad_norm": 0.6791255474090576, "learning_rate": 1.082792467498688e-05, "loss": 0.1087, "step": 33994 }, { "epoch": 0.6020357112814179, "grad_norm": 0.8560538291931152, "learning_rate": 1.0827098266358951e-05, "loss": 0.0511, "step": 33995 }, { "epoch": 0.6020534208184463, "grad_norm": 0.6337798833847046, "learning_rate": 1.0826271871459445e-05, "loss": 0.0845, "step": 33996 }, { "epoch": 0.6020711303554747, "grad_norm": 0.8274631500244141, "learning_rate": 1.082544549029109e-05, "loss": 0.0702, "step": 33997 }, { "epoch": 0.6020888398925031, "grad_norm": 0.6746022701263428, "learning_rate": 1.0824619122856593e-05, "loss": 0.0939, "step": 33998 }, { "epoch": 0.6021065494295316, "grad_norm": 0.49745380878448486, "learning_rate": 1.0823792769158674e-05, "loss": 0.064, "step": 33999 }, { "epoch": 0.60212425896656, "grad_norm": 0.7641630172729492, "learning_rate": 1.0822966429200051e-05, "loss": 0.0619, "step": 34000 }, { "epoch": 0.6021419685035884, "grad_norm": 0.46092724800109863, "learning_rate": 1.0822140102983457e-05, "loss": 0.0662, "step": 34001 }, { "epoch": 0.6021596780406169, "grad_norm": 0.45962607860565186, "learning_rate": 1.0821313790511592e-05, "loss": 0.061, "step": 34002 }, { "epoch": 0.6021773875776453, "grad_norm": 0.9389891028404236, "learning_rate": 1.0820487491787184e-05, "loss": 0.058, "step": 34003 }, { "epoch": 0.6021950971146737, "grad_norm": 0.7021806240081787, "learning_rate": 1.0819661206812951e-05, "loss": 0.0625, "step": 34004 }, { "epoch": 0.6022128066517021, "grad_norm": 0.8080806732177734, "learning_rate": 1.0818834935591606e-05, "loss": 0.0716, "step": 34005 }, { "epoch": 0.6022305161887306, "grad_norm": 0.5894530415534973, "learning_rate": 1.0818008678125872e-05, "loss": 0.0451, "step": 34006 }, { "epoch": 0.602248225725759, "grad_norm": 0.3506992757320404, "learning_rate": 1.0817182434418466e-05, "loss": 0.0563, "step": 34007 }, { "epoch": 0.6022659352627874, "grad_norm": 0.6659246683120728, "learning_rate": 1.0816356204472114e-05, "loss": 0.0702, "step": 34008 }, { "epoch": 0.6022836447998158, "grad_norm": 0.4140036404132843, "learning_rate": 1.0815529988289518e-05, "loss": 0.0448, "step": 34009 }, { "epoch": 0.6023013543368443, "grad_norm": 0.40360382199287415, "learning_rate": 1.0814703785873407e-05, "loss": 0.0481, "step": 34010 }, { "epoch": 0.6023190638738727, "grad_norm": 0.7180529832839966, "learning_rate": 1.0813877597226504e-05, "loss": 0.0646, "step": 34011 }, { "epoch": 0.6023367734109011, "grad_norm": 0.3629646301269531, "learning_rate": 1.0813051422351516e-05, "loss": 0.0527, "step": 34012 }, { "epoch": 0.6023544829479295, "grad_norm": 0.42410987615585327, "learning_rate": 1.0812225261251161e-05, "loss": 0.0533, "step": 34013 }, { "epoch": 0.602372192484958, "grad_norm": 0.6971442699432373, "learning_rate": 1.0811399113928163e-05, "loss": 0.0856, "step": 34014 }, { "epoch": 0.6023899020219864, "grad_norm": 0.7336016297340393, "learning_rate": 1.0810572980385243e-05, "loss": 0.0676, "step": 34015 }, { "epoch": 0.6024076115590148, "grad_norm": 0.8080503940582275, "learning_rate": 1.080974686062511e-05, "loss": 0.0763, "step": 34016 }, { "epoch": 0.6024253210960433, "grad_norm": 0.882587730884552, "learning_rate": 1.0808920754650486e-05, "loss": 0.0799, "step": 34017 }, { "epoch": 0.6024430306330717, "grad_norm": 0.29936105012893677, "learning_rate": 1.0808094662464093e-05, "loss": 0.0586, "step": 34018 }, { "epoch": 0.6024607401701001, "grad_norm": 0.593360424041748, "learning_rate": 1.080726858406864e-05, "loss": 0.0662, "step": 34019 }, { "epoch": 0.6024784497071285, "grad_norm": 0.3068270981311798, "learning_rate": 1.080644251946685e-05, "loss": 0.0413, "step": 34020 }, { "epoch": 0.602496159244157, "grad_norm": 0.7520983815193176, "learning_rate": 1.0805616468661438e-05, "loss": 0.0687, "step": 34021 }, { "epoch": 0.6025138687811854, "grad_norm": 0.6209114789962769, "learning_rate": 1.0804790431655128e-05, "loss": 0.067, "step": 34022 }, { "epoch": 0.6025315783182138, "grad_norm": 0.7527974843978882, "learning_rate": 1.0803964408450632e-05, "loss": 0.0801, "step": 34023 }, { "epoch": 0.6025492878552422, "grad_norm": 0.6016331911087036, "learning_rate": 1.0803138399050666e-05, "loss": 0.0478, "step": 34024 }, { "epoch": 0.6025669973922707, "grad_norm": 0.5978307127952576, "learning_rate": 1.0802312403457959e-05, "loss": 0.0627, "step": 34025 }, { "epoch": 0.6025847069292991, "grad_norm": 0.5848572850227356, "learning_rate": 1.0801486421675212e-05, "loss": 0.0656, "step": 34026 }, { "epoch": 0.6026024164663275, "grad_norm": 1.3532440662384033, "learning_rate": 1.0800660453705148e-05, "loss": 0.0447, "step": 34027 }, { "epoch": 0.6026201260033559, "grad_norm": 0.5107903480529785, "learning_rate": 1.0799834499550489e-05, "loss": 0.0576, "step": 34028 }, { "epoch": 0.6026378355403844, "grad_norm": 0.894244372844696, "learning_rate": 1.0799008559213952e-05, "loss": 0.0665, "step": 34029 }, { "epoch": 0.6026555450774128, "grad_norm": 0.4043814241886139, "learning_rate": 1.0798182632698248e-05, "loss": 0.043, "step": 34030 }, { "epoch": 0.6026732546144412, "grad_norm": 0.7933621406555176, "learning_rate": 1.07973567200061e-05, "loss": 0.0788, "step": 34031 }, { "epoch": 0.6026909641514697, "grad_norm": 0.3015560805797577, "learning_rate": 1.0796530821140225e-05, "loss": 0.0154, "step": 34032 }, { "epoch": 0.6027086736884981, "grad_norm": 1.0319329500198364, "learning_rate": 1.0795704936103335e-05, "loss": 0.0638, "step": 34033 }, { "epoch": 0.6027263832255265, "grad_norm": 0.574345588684082, "learning_rate": 1.0794879064898151e-05, "loss": 0.0767, "step": 34034 }, { "epoch": 0.6027440927625549, "grad_norm": 0.6453429460525513, "learning_rate": 1.079405320752739e-05, "loss": 0.0828, "step": 34035 }, { "epoch": 0.6027618022995834, "grad_norm": 0.9559740424156189, "learning_rate": 1.0793227363993777e-05, "loss": 0.0787, "step": 34036 }, { "epoch": 0.6027795118366118, "grad_norm": 0.4011661112308502, "learning_rate": 1.0792401534300007e-05, "loss": 0.0698, "step": 34037 }, { "epoch": 0.6027972213736402, "grad_norm": 0.46109461784362793, "learning_rate": 1.0791575718448814e-05, "loss": 0.0507, "step": 34038 }, { "epoch": 0.6028149309106686, "grad_norm": 0.5689483880996704, "learning_rate": 1.0790749916442915e-05, "loss": 0.0705, "step": 34039 }, { "epoch": 0.6028326404476971, "grad_norm": 0.5364700555801392, "learning_rate": 1.0789924128285028e-05, "loss": 0.0723, "step": 34040 }, { "epoch": 0.6028503499847255, "grad_norm": 0.5522937774658203, "learning_rate": 1.0789098353977855e-05, "loss": 0.0705, "step": 34041 }, { "epoch": 0.6028680595217539, "grad_norm": 0.3166850805282593, "learning_rate": 1.0788272593524124e-05, "loss": 0.0723, "step": 34042 }, { "epoch": 0.6028857690587823, "grad_norm": 0.50933438539505, "learning_rate": 1.0787446846926555e-05, "loss": 0.0579, "step": 34043 }, { "epoch": 0.6029034785958108, "grad_norm": 0.34117960929870605, "learning_rate": 1.0786621114187856e-05, "loss": 0.075, "step": 34044 }, { "epoch": 0.6029211881328392, "grad_norm": 0.4095046818256378, "learning_rate": 1.0785795395310746e-05, "loss": 0.0584, "step": 34045 }, { "epoch": 0.6029388976698676, "grad_norm": 0.46982014179229736, "learning_rate": 1.0784969690297945e-05, "loss": 0.0517, "step": 34046 }, { "epoch": 0.6029566072068961, "grad_norm": 0.6075618267059326, "learning_rate": 1.078414399915217e-05, "loss": 0.054, "step": 34047 }, { "epoch": 0.6029743167439245, "grad_norm": 0.49693700671195984, "learning_rate": 1.078331832187613e-05, "loss": 0.0677, "step": 34048 }, { "epoch": 0.6029920262809529, "grad_norm": 0.37161311507225037, "learning_rate": 1.078249265847255e-05, "loss": 0.0338, "step": 34049 }, { "epoch": 0.6030097358179813, "grad_norm": 0.6766334176063538, "learning_rate": 1.0781667008944146e-05, "loss": 0.0764, "step": 34050 }, { "epoch": 0.6030274453550099, "grad_norm": 0.5736256837844849, "learning_rate": 1.0780841373293623e-05, "loss": 0.0269, "step": 34051 }, { "epoch": 0.6030451548920382, "grad_norm": 0.382197767496109, "learning_rate": 1.0780015751523704e-05, "loss": 0.0494, "step": 34052 }, { "epoch": 0.6030628644290666, "grad_norm": 0.6839591860771179, "learning_rate": 1.077919014363711e-05, "loss": 0.0655, "step": 34053 }, { "epoch": 0.603080573966095, "grad_norm": 0.8294698596000671, "learning_rate": 1.077836454963656e-05, "loss": 0.0674, "step": 34054 }, { "epoch": 0.6030982835031236, "grad_norm": 0.5105419754981995, "learning_rate": 1.0777538969524755e-05, "loss": 0.0705, "step": 34055 }, { "epoch": 0.603115993040152, "grad_norm": 0.5731054544448853, "learning_rate": 1.0776713403304423e-05, "loss": 0.0584, "step": 34056 }, { "epoch": 0.6031337025771804, "grad_norm": 0.6480886340141296, "learning_rate": 1.0775887850978277e-05, "loss": 0.0569, "step": 34057 }, { "epoch": 0.6031514121142088, "grad_norm": 0.44768601655960083, "learning_rate": 1.0775062312549032e-05, "loss": 0.0603, "step": 34058 }, { "epoch": 0.6031691216512373, "grad_norm": 0.5645642876625061, "learning_rate": 1.0774236788019404e-05, "loss": 0.0625, "step": 34059 }, { "epoch": 0.6031868311882657, "grad_norm": 0.5515225529670715, "learning_rate": 1.0773411277392109e-05, "loss": 0.0637, "step": 34060 }, { "epoch": 0.6032045407252941, "grad_norm": 0.6684160828590393, "learning_rate": 1.0772585780669865e-05, "loss": 0.0735, "step": 34061 }, { "epoch": 0.6032222502623226, "grad_norm": 0.7879403829574585, "learning_rate": 1.0771760297855385e-05, "loss": 0.0692, "step": 34062 }, { "epoch": 0.603239959799351, "grad_norm": 0.6106781363487244, "learning_rate": 1.0770934828951386e-05, "loss": 0.0465, "step": 34063 }, { "epoch": 0.6032576693363794, "grad_norm": 0.5750875473022461, "learning_rate": 1.0770109373960587e-05, "loss": 0.0486, "step": 34064 }, { "epoch": 0.6032753788734078, "grad_norm": 0.776658296585083, "learning_rate": 1.07692839328857e-05, "loss": 0.0756, "step": 34065 }, { "epoch": 0.6032930884104363, "grad_norm": 0.507809579372406, "learning_rate": 1.0768458505729434e-05, "loss": 0.0928, "step": 34066 }, { "epoch": 0.6033107979474647, "grad_norm": 0.5795965790748596, "learning_rate": 1.0767633092494516e-05, "loss": 0.0727, "step": 34067 }, { "epoch": 0.6033285074844931, "grad_norm": 0.5334080457687378, "learning_rate": 1.076680769318366e-05, "loss": 0.0771, "step": 34068 }, { "epoch": 0.6033462170215215, "grad_norm": 0.6451409459114075, "learning_rate": 1.0765982307799575e-05, "loss": 0.0895, "step": 34069 }, { "epoch": 0.60336392655855, "grad_norm": 0.5053988099098206, "learning_rate": 1.076515693634498e-05, "loss": 0.0601, "step": 34070 }, { "epoch": 0.6033816360955784, "grad_norm": 0.8949458599090576, "learning_rate": 1.076433157882259e-05, "loss": 0.0809, "step": 34071 }, { "epoch": 0.6033993456326068, "grad_norm": 0.5664790868759155, "learning_rate": 1.0763506235235123e-05, "loss": 0.0787, "step": 34072 }, { "epoch": 0.6034170551696352, "grad_norm": 0.6837242245674133, "learning_rate": 1.0762680905585289e-05, "loss": 0.1085, "step": 34073 }, { "epoch": 0.6034347647066637, "grad_norm": 0.5808565616607666, "learning_rate": 1.076185558987581e-05, "loss": 0.0848, "step": 34074 }, { "epoch": 0.6034524742436921, "grad_norm": 0.8305875062942505, "learning_rate": 1.0761030288109395e-05, "loss": 0.0643, "step": 34075 }, { "epoch": 0.6034701837807205, "grad_norm": 0.6512165069580078, "learning_rate": 1.0760205000288761e-05, "loss": 0.0365, "step": 34076 }, { "epoch": 0.603487893317749, "grad_norm": 0.556175947189331, "learning_rate": 1.0759379726416624e-05, "loss": 0.0699, "step": 34077 }, { "epoch": 0.6035056028547774, "grad_norm": 0.6564215421676636, "learning_rate": 1.0758554466495698e-05, "loss": 0.0507, "step": 34078 }, { "epoch": 0.6035233123918058, "grad_norm": 0.6651854515075684, "learning_rate": 1.0757729220528705e-05, "loss": 0.0657, "step": 34079 }, { "epoch": 0.6035410219288342, "grad_norm": 0.9943866729736328, "learning_rate": 1.0756903988518346e-05, "loss": 0.0573, "step": 34080 }, { "epoch": 0.6035587314658627, "grad_norm": 0.6691338419914246, "learning_rate": 1.0756078770467347e-05, "loss": 0.0662, "step": 34081 }, { "epoch": 0.6035764410028911, "grad_norm": 0.8089221715927124, "learning_rate": 1.0755253566378424e-05, "loss": 0.0541, "step": 34082 }, { "epoch": 0.6035941505399195, "grad_norm": 0.621093213558197, "learning_rate": 1.0754428376254281e-05, "loss": 0.0811, "step": 34083 }, { "epoch": 0.6036118600769479, "grad_norm": 0.6759334206581116, "learning_rate": 1.0753603200097639e-05, "loss": 0.053, "step": 34084 }, { "epoch": 0.6036295696139764, "grad_norm": 0.5417647957801819, "learning_rate": 1.0752778037911216e-05, "loss": 0.0582, "step": 34085 }, { "epoch": 0.6036472791510048, "grad_norm": 0.5693790912628174, "learning_rate": 1.0751952889697724e-05, "loss": 0.0525, "step": 34086 }, { "epoch": 0.6036649886880332, "grad_norm": 0.779533863067627, "learning_rate": 1.0751127755459876e-05, "loss": 0.0743, "step": 34087 }, { "epoch": 0.6036826982250616, "grad_norm": 1.150503158569336, "learning_rate": 1.0750302635200385e-05, "loss": 0.0758, "step": 34088 }, { "epoch": 0.6037004077620901, "grad_norm": 0.5688269734382629, "learning_rate": 1.0749477528921973e-05, "loss": 0.0319, "step": 34089 }, { "epoch": 0.6037181172991185, "grad_norm": 0.34223130345344543, "learning_rate": 1.0748652436627349e-05, "loss": 0.0457, "step": 34090 }, { "epoch": 0.6037358268361469, "grad_norm": 0.5125806331634521, "learning_rate": 1.0747827358319227e-05, "loss": 0.049, "step": 34091 }, { "epoch": 0.6037535363731754, "grad_norm": 0.8431499600410461, "learning_rate": 1.0747002294000323e-05, "loss": 0.0859, "step": 34092 }, { "epoch": 0.6037712459102038, "grad_norm": 0.539196252822876, "learning_rate": 1.0746177243673357e-05, "loss": 0.0851, "step": 34093 }, { "epoch": 0.6037889554472322, "grad_norm": 0.3938932716846466, "learning_rate": 1.0745352207341032e-05, "loss": 0.0444, "step": 34094 }, { "epoch": 0.6038066649842606, "grad_norm": 0.5304325222969055, "learning_rate": 1.0744527185006066e-05, "loss": 0.0516, "step": 34095 }, { "epoch": 0.6038243745212891, "grad_norm": 0.20413720607757568, "learning_rate": 1.074370217667118e-05, "loss": 0.0344, "step": 34096 }, { "epoch": 0.6038420840583175, "grad_norm": 0.7148419618606567, "learning_rate": 1.0742877182339082e-05, "loss": 0.0564, "step": 34097 }, { "epoch": 0.6038597935953459, "grad_norm": 0.6684855222702026, "learning_rate": 1.0742052202012486e-05, "loss": 0.0755, "step": 34098 }, { "epoch": 0.6038775031323743, "grad_norm": 0.655052661895752, "learning_rate": 1.0741227235694107e-05, "loss": 0.0797, "step": 34099 }, { "epoch": 0.6038952126694028, "grad_norm": 0.47255319356918335, "learning_rate": 1.0740402283386662e-05, "loss": 0.0367, "step": 34100 }, { "epoch": 0.6039129222064312, "grad_norm": 0.8831803202629089, "learning_rate": 1.0739577345092861e-05, "loss": 0.0708, "step": 34101 }, { "epoch": 0.6039306317434596, "grad_norm": 0.6295256614685059, "learning_rate": 1.073875242081542e-05, "loss": 0.0768, "step": 34102 }, { "epoch": 0.603948341280488, "grad_norm": 0.5878851413726807, "learning_rate": 1.0737927510557052e-05, "loss": 0.0625, "step": 34103 }, { "epoch": 0.6039660508175165, "grad_norm": 0.7162685990333557, "learning_rate": 1.0737102614320474e-05, "loss": 0.0455, "step": 34104 }, { "epoch": 0.6039837603545449, "grad_norm": 0.27151191234588623, "learning_rate": 1.0736277732108394e-05, "loss": 0.0563, "step": 34105 }, { "epoch": 0.6040014698915733, "grad_norm": 0.7453908920288086, "learning_rate": 1.0735452863923529e-05, "loss": 0.0593, "step": 34106 }, { "epoch": 0.6040191794286018, "grad_norm": 0.4796229898929596, "learning_rate": 1.07346280097686e-05, "loss": 0.0771, "step": 34107 }, { "epoch": 0.6040368889656302, "grad_norm": 0.4575926959514618, "learning_rate": 1.0733803169646305e-05, "loss": 0.074, "step": 34108 }, { "epoch": 0.6040545985026586, "grad_norm": 0.4266021251678467, "learning_rate": 1.0732978343559366e-05, "loss": 0.0633, "step": 34109 }, { "epoch": 0.604072308039687, "grad_norm": 0.9512262344360352, "learning_rate": 1.0732153531510498e-05, "loss": 0.0903, "step": 34110 }, { "epoch": 0.6040900175767155, "grad_norm": 0.538385272026062, "learning_rate": 1.0731328733502421e-05, "loss": 0.0577, "step": 34111 }, { "epoch": 0.6041077271137439, "grad_norm": 0.7295894622802734, "learning_rate": 1.073050394953783e-05, "loss": 0.0919, "step": 34112 }, { "epoch": 0.6041254366507723, "grad_norm": 0.46489307284355164, "learning_rate": 1.0729679179619453e-05, "loss": 0.0673, "step": 34113 }, { "epoch": 0.6041431461878007, "grad_norm": 0.7053688764572144, "learning_rate": 1.0728854423750003e-05, "loss": 0.0641, "step": 34114 }, { "epoch": 0.6041608557248292, "grad_norm": 0.663178563117981, "learning_rate": 1.0728029681932186e-05, "loss": 0.0839, "step": 34115 }, { "epoch": 0.6041785652618576, "grad_norm": 0.5855268836021423, "learning_rate": 1.0727204954168717e-05, "loss": 0.0567, "step": 34116 }, { "epoch": 0.604196274798886, "grad_norm": 0.76517653465271, "learning_rate": 1.0726380240462315e-05, "loss": 0.097, "step": 34117 }, { "epoch": 0.6042139843359144, "grad_norm": 0.44447770714759827, "learning_rate": 1.0725555540815691e-05, "loss": 0.0595, "step": 34118 }, { "epoch": 0.604231693872943, "grad_norm": 0.7148758769035339, "learning_rate": 1.0724730855231557e-05, "loss": 0.0689, "step": 34119 }, { "epoch": 0.6042494034099714, "grad_norm": 1.0670166015625, "learning_rate": 1.072390618371262e-05, "loss": 0.0627, "step": 34120 }, { "epoch": 0.6042671129469998, "grad_norm": 0.8519590497016907, "learning_rate": 1.072308152626161e-05, "loss": 0.0691, "step": 34121 }, { "epoch": 0.6042848224840283, "grad_norm": 0.7276239395141602, "learning_rate": 1.0722256882881222e-05, "loss": 0.0855, "step": 34122 }, { "epoch": 0.6043025320210567, "grad_norm": 0.5122709274291992, "learning_rate": 1.0721432253574176e-05, "loss": 0.0809, "step": 34123 }, { "epoch": 0.6043202415580851, "grad_norm": 0.3303059935569763, "learning_rate": 1.0720607638343182e-05, "loss": 0.0651, "step": 34124 }, { "epoch": 0.6043379510951135, "grad_norm": 0.754150927066803, "learning_rate": 1.0719783037190967e-05, "loss": 0.0896, "step": 34125 }, { "epoch": 0.604355660632142, "grad_norm": 0.5066128373146057, "learning_rate": 1.0718958450120224e-05, "loss": 0.0665, "step": 34126 }, { "epoch": 0.6043733701691704, "grad_norm": 0.4628499150276184, "learning_rate": 1.0718133877133674e-05, "loss": 0.055, "step": 34127 }, { "epoch": 0.6043910797061988, "grad_norm": 0.5256021022796631, "learning_rate": 1.0717309318234037e-05, "loss": 0.0495, "step": 34128 }, { "epoch": 0.6044087892432272, "grad_norm": 0.5259321928024292, "learning_rate": 1.0716484773424013e-05, "loss": 0.0655, "step": 34129 }, { "epoch": 0.6044264987802557, "grad_norm": 0.5615262389183044, "learning_rate": 1.0715660242706324e-05, "loss": 0.0622, "step": 34130 }, { "epoch": 0.6044442083172841, "grad_norm": 0.29846861958503723, "learning_rate": 1.0714835726083676e-05, "loss": 0.0355, "step": 34131 }, { "epoch": 0.6044619178543125, "grad_norm": 0.7305205464363098, "learning_rate": 1.0714011223558791e-05, "loss": 0.0826, "step": 34132 }, { "epoch": 0.6044796273913409, "grad_norm": 0.3970174491405487, "learning_rate": 1.0713186735134372e-05, "loss": 0.0728, "step": 34133 }, { "epoch": 0.6044973369283694, "grad_norm": 0.5475864410400391, "learning_rate": 1.0712362260813134e-05, "loss": 0.0272, "step": 34134 }, { "epoch": 0.6045150464653978, "grad_norm": 0.5313522219657898, "learning_rate": 1.0711537800597797e-05, "loss": 0.1021, "step": 34135 }, { "epoch": 0.6045327560024262, "grad_norm": 0.433937132358551, "learning_rate": 1.071071335449106e-05, "loss": 0.0547, "step": 34136 }, { "epoch": 0.6045504655394547, "grad_norm": 0.5990278720855713, "learning_rate": 1.0709888922495642e-05, "loss": 0.1024, "step": 34137 }, { "epoch": 0.6045681750764831, "grad_norm": 0.6453725099563599, "learning_rate": 1.0709064504614253e-05, "loss": 0.0659, "step": 34138 }, { "epoch": 0.6045858846135115, "grad_norm": 0.4230214059352875, "learning_rate": 1.070824010084962e-05, "loss": 0.0367, "step": 34139 }, { "epoch": 0.6046035941505399, "grad_norm": 0.4514840841293335, "learning_rate": 1.0707415711204434e-05, "loss": 0.0427, "step": 34140 }, { "epoch": 0.6046213036875684, "grad_norm": 0.6032879948616028, "learning_rate": 1.0706591335681416e-05, "loss": 0.0465, "step": 34141 }, { "epoch": 0.6046390132245968, "grad_norm": 0.5385010242462158, "learning_rate": 1.0705766974283278e-05, "loss": 0.075, "step": 34142 }, { "epoch": 0.6046567227616252, "grad_norm": 0.59687739610672, "learning_rate": 1.0704942627012735e-05, "loss": 0.0744, "step": 34143 }, { "epoch": 0.6046744322986536, "grad_norm": 0.9088703393936157, "learning_rate": 1.0704118293872495e-05, "loss": 0.0723, "step": 34144 }, { "epoch": 0.6046921418356821, "grad_norm": 0.3936697244644165, "learning_rate": 1.0703293974865271e-05, "loss": 0.077, "step": 34145 }, { "epoch": 0.6047098513727105, "grad_norm": 0.5516486167907715, "learning_rate": 1.0702469669993778e-05, "loss": 0.0318, "step": 34146 }, { "epoch": 0.6047275609097389, "grad_norm": 0.7775499820709229, "learning_rate": 1.0701645379260722e-05, "loss": 0.0775, "step": 34147 }, { "epoch": 0.6047452704467673, "grad_norm": 0.2791841924190521, "learning_rate": 1.0700821102668818e-05, "loss": 0.0427, "step": 34148 }, { "epoch": 0.6047629799837958, "grad_norm": 0.3613565266132355, "learning_rate": 1.0699996840220778e-05, "loss": 0.0371, "step": 34149 }, { "epoch": 0.6047806895208242, "grad_norm": 0.7046748399734497, "learning_rate": 1.069917259191932e-05, "loss": 0.0754, "step": 34150 }, { "epoch": 0.6047983990578526, "grad_norm": 0.34708163142204285, "learning_rate": 1.069834835776714e-05, "loss": 0.0689, "step": 34151 }, { "epoch": 0.6048161085948811, "grad_norm": 0.7293411493301392, "learning_rate": 1.0697524137766962e-05, "loss": 0.1038, "step": 34152 }, { "epoch": 0.6048338181319095, "grad_norm": 0.8550284504890442, "learning_rate": 1.0696699931921496e-05, "loss": 0.0754, "step": 34153 }, { "epoch": 0.6048515276689379, "grad_norm": 0.9676170945167542, "learning_rate": 1.0695875740233452e-05, "loss": 0.0525, "step": 34154 }, { "epoch": 0.6048692372059663, "grad_norm": 0.8602874875068665, "learning_rate": 1.0695051562705537e-05, "loss": 0.07, "step": 34155 }, { "epoch": 0.6048869467429948, "grad_norm": 0.4326886236667633, "learning_rate": 1.069422739934047e-05, "loss": 0.0571, "step": 34156 }, { "epoch": 0.6049046562800232, "grad_norm": 0.49809980392456055, "learning_rate": 1.069340325014096e-05, "loss": 0.0593, "step": 34157 }, { "epoch": 0.6049223658170516, "grad_norm": 0.6860921382904053, "learning_rate": 1.0692579115109717e-05, "loss": 0.0474, "step": 34158 }, { "epoch": 0.60494007535408, "grad_norm": 0.6728945970535278, "learning_rate": 1.0691754994249451e-05, "loss": 0.0727, "step": 34159 }, { "epoch": 0.6049577848911085, "grad_norm": 0.4811391234397888, "learning_rate": 1.0690930887562884e-05, "loss": 0.0803, "step": 34160 }, { "epoch": 0.6049754944281369, "grad_norm": 0.39008983969688416, "learning_rate": 1.0690106795052706e-05, "loss": 0.0545, "step": 34161 }, { "epoch": 0.6049932039651653, "grad_norm": 0.7082279920578003, "learning_rate": 1.0689282716721645e-05, "loss": 0.0677, "step": 34162 }, { "epoch": 0.6050109135021937, "grad_norm": 0.6363741755485535, "learning_rate": 1.068845865257241e-05, "loss": 0.0563, "step": 34163 }, { "epoch": 0.6050286230392222, "grad_norm": 0.45702579617500305, "learning_rate": 1.0687634602607715e-05, "loss": 0.0507, "step": 34164 }, { "epoch": 0.6050463325762506, "grad_norm": 0.7490890622138977, "learning_rate": 1.0686810566830259e-05, "loss": 0.0916, "step": 34165 }, { "epoch": 0.605064042113279, "grad_norm": 0.6625087261199951, "learning_rate": 1.0685986545242761e-05, "loss": 0.0942, "step": 34166 }, { "epoch": 0.6050817516503075, "grad_norm": 0.8441600799560547, "learning_rate": 1.0685162537847934e-05, "loss": 0.0483, "step": 34167 }, { "epoch": 0.6050994611873359, "grad_norm": 0.6644958853721619, "learning_rate": 1.0684338544648481e-05, "loss": 0.0593, "step": 34168 }, { "epoch": 0.6051171707243643, "grad_norm": 0.40743863582611084, "learning_rate": 1.068351456564712e-05, "loss": 0.0622, "step": 34169 }, { "epoch": 0.6051348802613927, "grad_norm": 0.7324621677398682, "learning_rate": 1.0682690600846558e-05, "loss": 0.11, "step": 34170 }, { "epoch": 0.6051525897984212, "grad_norm": 0.6257638931274414, "learning_rate": 1.0681866650249511e-05, "loss": 0.0793, "step": 34171 }, { "epoch": 0.6051702993354496, "grad_norm": 0.37288644909858704, "learning_rate": 1.0681042713858681e-05, "loss": 0.0236, "step": 34172 }, { "epoch": 0.605188008872478, "grad_norm": 1.0370043516159058, "learning_rate": 1.0680218791676785e-05, "loss": 0.0985, "step": 34173 }, { "epoch": 0.6052057184095064, "grad_norm": 0.6265571117401123, "learning_rate": 1.0679394883706533e-05, "loss": 0.0831, "step": 34174 }, { "epoch": 0.6052234279465349, "grad_norm": 0.5387885570526123, "learning_rate": 1.0678570989950637e-05, "loss": 0.0646, "step": 34175 }, { "epoch": 0.6052411374835633, "grad_norm": 0.4347517788410187, "learning_rate": 1.0677747110411803e-05, "loss": 0.0378, "step": 34176 }, { "epoch": 0.6052588470205917, "grad_norm": 0.4243268072605133, "learning_rate": 1.0676923245092743e-05, "loss": 0.063, "step": 34177 }, { "epoch": 0.6052765565576201, "grad_norm": 0.8192476630210876, "learning_rate": 1.0676099393996176e-05, "loss": 0.0592, "step": 34178 }, { "epoch": 0.6052942660946486, "grad_norm": 0.46440911293029785, "learning_rate": 1.0675275557124797e-05, "loss": 0.0437, "step": 34179 }, { "epoch": 0.605311975631677, "grad_norm": 0.7412565350532532, "learning_rate": 1.0674451734481325e-05, "loss": 0.1198, "step": 34180 }, { "epoch": 0.6053296851687054, "grad_norm": 0.7613614797592163, "learning_rate": 1.067362792606847e-05, "loss": 0.0744, "step": 34181 }, { "epoch": 0.605347394705734, "grad_norm": 0.5476717352867126, "learning_rate": 1.0672804131888942e-05, "loss": 0.0386, "step": 34182 }, { "epoch": 0.6053651042427624, "grad_norm": 0.603585422039032, "learning_rate": 1.0671980351945448e-05, "loss": 0.0477, "step": 34183 }, { "epoch": 0.6053828137797908, "grad_norm": 0.9732287526130676, "learning_rate": 1.0671156586240702e-05, "loss": 0.0868, "step": 34184 }, { "epoch": 0.6054005233168192, "grad_norm": 0.39747124910354614, "learning_rate": 1.0670332834777417e-05, "loss": 0.0746, "step": 34185 }, { "epoch": 0.6054182328538477, "grad_norm": 0.39590123295783997, "learning_rate": 1.0669509097558295e-05, "loss": 0.037, "step": 34186 }, { "epoch": 0.6054359423908761, "grad_norm": 0.5734750032424927, "learning_rate": 1.0668685374586049e-05, "loss": 0.0442, "step": 34187 }, { "epoch": 0.6054536519279045, "grad_norm": 0.19035346806049347, "learning_rate": 1.0667861665863393e-05, "loss": 0.0371, "step": 34188 }, { "epoch": 0.6054713614649329, "grad_norm": 0.40152931213378906, "learning_rate": 1.0667037971393036e-05, "loss": 0.0522, "step": 34189 }, { "epoch": 0.6054890710019614, "grad_norm": 0.6477747559547424, "learning_rate": 1.0666214291177679e-05, "loss": 0.0615, "step": 34190 }, { "epoch": 0.6055067805389898, "grad_norm": 0.41139572858810425, "learning_rate": 1.0665390625220042e-05, "loss": 0.0562, "step": 34191 }, { "epoch": 0.6055244900760182, "grad_norm": 0.44814449548721313, "learning_rate": 1.0664566973522835e-05, "loss": 0.048, "step": 34192 }, { "epoch": 0.6055421996130466, "grad_norm": 0.5262004137039185, "learning_rate": 1.066374333608876e-05, "loss": 0.0728, "step": 34193 }, { "epoch": 0.6055599091500751, "grad_norm": 0.5337638854980469, "learning_rate": 1.0662919712920531e-05, "loss": 0.0689, "step": 34194 }, { "epoch": 0.6055776186871035, "grad_norm": 1.0072864294052124, "learning_rate": 1.0662096104020858e-05, "loss": 0.0985, "step": 34195 }, { "epoch": 0.6055953282241319, "grad_norm": 0.8188009262084961, "learning_rate": 1.0661272509392454e-05, "loss": 0.0721, "step": 34196 }, { "epoch": 0.6056130377611604, "grad_norm": 0.4531497061252594, "learning_rate": 1.0660448929038019e-05, "loss": 0.0613, "step": 34197 }, { "epoch": 0.6056307472981888, "grad_norm": 0.40286004543304443, "learning_rate": 1.0659625362960268e-05, "loss": 0.0728, "step": 34198 }, { "epoch": 0.6056484568352172, "grad_norm": 0.6160683631896973, "learning_rate": 1.0658801811161915e-05, "loss": 0.0717, "step": 34199 }, { "epoch": 0.6056661663722456, "grad_norm": 0.6524884104728699, "learning_rate": 1.0657978273645662e-05, "loss": 0.0775, "step": 34200 }, { "epoch": 0.6056838759092741, "grad_norm": 0.8779373168945312, "learning_rate": 1.0657154750414223e-05, "loss": 0.0808, "step": 34201 }, { "epoch": 0.6057015854463025, "grad_norm": 0.6402224898338318, "learning_rate": 1.0656331241470302e-05, "loss": 0.0408, "step": 34202 }, { "epoch": 0.6057192949833309, "grad_norm": 0.32554543018341064, "learning_rate": 1.065550774681662e-05, "loss": 0.0539, "step": 34203 }, { "epoch": 0.6057370045203593, "grad_norm": 0.27327579259872437, "learning_rate": 1.0654684266455868e-05, "loss": 0.0683, "step": 34204 }, { "epoch": 0.6057547140573878, "grad_norm": 0.517288088798523, "learning_rate": 1.0653860800390768e-05, "loss": 0.0873, "step": 34205 }, { "epoch": 0.6057724235944162, "grad_norm": 0.508690357208252, "learning_rate": 1.0653037348624034e-05, "loss": 0.068, "step": 34206 }, { "epoch": 0.6057901331314446, "grad_norm": 1.0624053478240967, "learning_rate": 1.065221391115836e-05, "loss": 0.0671, "step": 34207 }, { "epoch": 0.605807842668473, "grad_norm": 0.9864470362663269, "learning_rate": 1.0651390487996464e-05, "loss": 0.0673, "step": 34208 }, { "epoch": 0.6058255522055015, "grad_norm": 0.7314910888671875, "learning_rate": 1.0650567079141052e-05, "loss": 0.0901, "step": 34209 }, { "epoch": 0.6058432617425299, "grad_norm": 0.4207085967063904, "learning_rate": 1.0649743684594837e-05, "loss": 0.0393, "step": 34210 }, { "epoch": 0.6058609712795583, "grad_norm": 0.649604082107544, "learning_rate": 1.0648920304360524e-05, "loss": 0.0772, "step": 34211 }, { "epoch": 0.6058786808165868, "grad_norm": 0.5358832478523254, "learning_rate": 1.0648096938440822e-05, "loss": 0.0601, "step": 34212 }, { "epoch": 0.6058963903536152, "grad_norm": 0.7922111749649048, "learning_rate": 1.0647273586838441e-05, "loss": 0.0747, "step": 34213 }, { "epoch": 0.6059140998906436, "grad_norm": 0.6393018960952759, "learning_rate": 1.0646450249556093e-05, "loss": 0.0914, "step": 34214 }, { "epoch": 0.605931809427672, "grad_norm": 0.5893641114234924, "learning_rate": 1.064562692659648e-05, "loss": 0.0558, "step": 34215 }, { "epoch": 0.6059495189647005, "grad_norm": 0.7238520383834839, "learning_rate": 1.0644803617962316e-05, "loss": 0.0832, "step": 34216 }, { "epoch": 0.6059672285017289, "grad_norm": 0.5455578565597534, "learning_rate": 1.064398032365631e-05, "loss": 0.0788, "step": 34217 }, { "epoch": 0.6059849380387573, "grad_norm": 0.7329574227333069, "learning_rate": 1.0643157043681165e-05, "loss": 0.0598, "step": 34218 }, { "epoch": 0.6060026475757857, "grad_norm": 0.4629891514778137, "learning_rate": 1.0642333778039588e-05, "loss": 0.0745, "step": 34219 }, { "epoch": 0.6060203571128142, "grad_norm": 1.1751984357833862, "learning_rate": 1.0641510526734295e-05, "loss": 0.0869, "step": 34220 }, { "epoch": 0.6060380666498426, "grad_norm": 0.6481829881668091, "learning_rate": 1.0640687289767998e-05, "loss": 0.0713, "step": 34221 }, { "epoch": 0.606055776186871, "grad_norm": 0.8578720688819885, "learning_rate": 1.0639864067143395e-05, "loss": 0.0957, "step": 34222 }, { "epoch": 0.6060734857238994, "grad_norm": 0.5273879766464233, "learning_rate": 1.0639040858863198e-05, "loss": 0.0497, "step": 34223 }, { "epoch": 0.6060911952609279, "grad_norm": 0.38740694522857666, "learning_rate": 1.0638217664930114e-05, "loss": 0.0794, "step": 34224 }, { "epoch": 0.6061089047979563, "grad_norm": 0.5245614051818848, "learning_rate": 1.0637394485346853e-05, "loss": 0.059, "step": 34225 }, { "epoch": 0.6061266143349847, "grad_norm": 0.5261921882629395, "learning_rate": 1.0636571320116124e-05, "loss": 0.0574, "step": 34226 }, { "epoch": 0.6061443238720132, "grad_norm": 0.30105194449424744, "learning_rate": 1.0635748169240632e-05, "loss": 0.043, "step": 34227 }, { "epoch": 0.6061620334090416, "grad_norm": 0.8113573789596558, "learning_rate": 1.063492503272309e-05, "loss": 0.0649, "step": 34228 }, { "epoch": 0.60617974294607, "grad_norm": 0.6343240737915039, "learning_rate": 1.06341019105662e-05, "loss": 0.1024, "step": 34229 }, { "epoch": 0.6061974524830984, "grad_norm": 0.6277153491973877, "learning_rate": 1.0633278802772674e-05, "loss": 0.0773, "step": 34230 }, { "epoch": 0.6062151620201269, "grad_norm": 0.5411259531974792, "learning_rate": 1.0632455709345226e-05, "loss": 0.0554, "step": 34231 }, { "epoch": 0.6062328715571553, "grad_norm": 0.6050788164138794, "learning_rate": 1.063163263028655e-05, "loss": 0.0758, "step": 34232 }, { "epoch": 0.6062505810941837, "grad_norm": 0.40401509404182434, "learning_rate": 1.063080956559936e-05, "loss": 0.0534, "step": 34233 }, { "epoch": 0.6062682906312121, "grad_norm": 0.6084538102149963, "learning_rate": 1.0629986515286363e-05, "loss": 0.0718, "step": 34234 }, { "epoch": 0.6062860001682406, "grad_norm": 0.9516839981079102, "learning_rate": 1.0629163479350275e-05, "loss": 0.087, "step": 34235 }, { "epoch": 0.606303709705269, "grad_norm": 0.6763355135917664, "learning_rate": 1.0628340457793794e-05, "loss": 0.0483, "step": 34236 }, { "epoch": 0.6063214192422974, "grad_norm": 0.7327969670295715, "learning_rate": 1.062751745061963e-05, "loss": 0.0588, "step": 34237 }, { "epoch": 0.6063391287793258, "grad_norm": 0.4431630074977875, "learning_rate": 1.0626694457830493e-05, "loss": 0.0544, "step": 34238 }, { "epoch": 0.6063568383163543, "grad_norm": 0.774874210357666, "learning_rate": 1.0625871479429086e-05, "loss": 0.0569, "step": 34239 }, { "epoch": 0.6063745478533827, "grad_norm": 0.8619167804718018, "learning_rate": 1.0625048515418122e-05, "loss": 0.0768, "step": 34240 }, { "epoch": 0.6063922573904111, "grad_norm": 0.5780671834945679, "learning_rate": 1.0624225565800305e-05, "loss": 0.0453, "step": 34241 }, { "epoch": 0.6064099669274396, "grad_norm": 0.8731722235679626, "learning_rate": 1.0623402630578344e-05, "loss": 0.094, "step": 34242 }, { "epoch": 0.606427676464468, "grad_norm": 0.41693931818008423, "learning_rate": 1.0622579709754944e-05, "loss": 0.0687, "step": 34243 }, { "epoch": 0.6064453860014964, "grad_norm": 0.41707876324653625, "learning_rate": 1.0621756803332815e-05, "loss": 0.0626, "step": 34244 }, { "epoch": 0.6064630955385248, "grad_norm": 0.5950415730476379, "learning_rate": 1.0620933911314663e-05, "loss": 0.088, "step": 34245 }, { "epoch": 0.6064808050755534, "grad_norm": 0.5964469313621521, "learning_rate": 1.0620111033703199e-05, "loss": 0.079, "step": 34246 }, { "epoch": 0.6064985146125818, "grad_norm": 0.667262852191925, "learning_rate": 1.0619288170501123e-05, "loss": 0.0661, "step": 34247 }, { "epoch": 0.6065162241496102, "grad_norm": 0.5757473111152649, "learning_rate": 1.0618465321711143e-05, "loss": 0.0477, "step": 34248 }, { "epoch": 0.6065339336866385, "grad_norm": 0.4894168972969055, "learning_rate": 1.0617642487335978e-05, "loss": 0.0645, "step": 34249 }, { "epoch": 0.6065516432236671, "grad_norm": 0.8833933472633362, "learning_rate": 1.0616819667378318e-05, "loss": 0.1021, "step": 34250 }, { "epoch": 0.6065693527606955, "grad_norm": 0.891289472579956, "learning_rate": 1.061599686184088e-05, "loss": 0.08, "step": 34251 }, { "epoch": 0.6065870622977239, "grad_norm": 0.7040873765945435, "learning_rate": 1.0615174070726367e-05, "loss": 0.0521, "step": 34252 }, { "epoch": 0.6066047718347524, "grad_norm": 0.5969521403312683, "learning_rate": 1.0614351294037493e-05, "loss": 0.0818, "step": 34253 }, { "epoch": 0.6066224813717808, "grad_norm": 0.7133867144584656, "learning_rate": 1.0613528531776955e-05, "loss": 0.0481, "step": 34254 }, { "epoch": 0.6066401909088092, "grad_norm": 0.36681410670280457, "learning_rate": 1.0612705783947464e-05, "loss": 0.0544, "step": 34255 }, { "epoch": 0.6066579004458376, "grad_norm": 0.45535391569137573, "learning_rate": 1.0611883050551732e-05, "loss": 0.0425, "step": 34256 }, { "epoch": 0.6066756099828661, "grad_norm": 0.43959298729896545, "learning_rate": 1.0611060331592458e-05, "loss": 0.0789, "step": 34257 }, { "epoch": 0.6066933195198945, "grad_norm": 0.7412095665931702, "learning_rate": 1.061023762707235e-05, "loss": 0.0688, "step": 34258 }, { "epoch": 0.6067110290569229, "grad_norm": 0.26761481165885925, "learning_rate": 1.0609414936994116e-05, "loss": 0.0361, "step": 34259 }, { "epoch": 0.6067287385939513, "grad_norm": 0.8212974071502686, "learning_rate": 1.0608592261360473e-05, "loss": 0.0737, "step": 34260 }, { "epoch": 0.6067464481309798, "grad_norm": 0.7945473790168762, "learning_rate": 1.0607769600174106e-05, "loss": 0.1043, "step": 34261 }, { "epoch": 0.6067641576680082, "grad_norm": 0.8802464604377747, "learning_rate": 1.0606946953437735e-05, "loss": 0.0974, "step": 34262 }, { "epoch": 0.6067818672050366, "grad_norm": 0.4558069407939911, "learning_rate": 1.0606124321154067e-05, "loss": 0.045, "step": 34263 }, { "epoch": 0.606799576742065, "grad_norm": 0.38684841990470886, "learning_rate": 1.0605301703325804e-05, "loss": 0.0661, "step": 34264 }, { "epoch": 0.6068172862790935, "grad_norm": 0.5804547667503357, "learning_rate": 1.0604479099955652e-05, "loss": 0.1013, "step": 34265 }, { "epoch": 0.6068349958161219, "grad_norm": 0.5129323601722717, "learning_rate": 1.0603656511046318e-05, "loss": 0.053, "step": 34266 }, { "epoch": 0.6068527053531503, "grad_norm": 0.5180448889732361, "learning_rate": 1.0602833936600515e-05, "loss": 0.0408, "step": 34267 }, { "epoch": 0.6068704148901788, "grad_norm": 0.6578581929206848, "learning_rate": 1.060201137662094e-05, "loss": 0.0696, "step": 34268 }, { "epoch": 0.6068881244272072, "grad_norm": 0.31023910641670227, "learning_rate": 1.0601188831110303e-05, "loss": 0.0543, "step": 34269 }, { "epoch": 0.6069058339642356, "grad_norm": 0.27646052837371826, "learning_rate": 1.0600366300071315e-05, "loss": 0.0328, "step": 34270 }, { "epoch": 0.606923543501264, "grad_norm": 0.5480343103408813, "learning_rate": 1.0599543783506669e-05, "loss": 0.0917, "step": 34271 }, { "epoch": 0.6069412530382925, "grad_norm": 0.6144362092018127, "learning_rate": 1.059872128141908e-05, "loss": 0.0642, "step": 34272 }, { "epoch": 0.6069589625753209, "grad_norm": 0.633196234703064, "learning_rate": 1.0597898793811253e-05, "loss": 0.0493, "step": 34273 }, { "epoch": 0.6069766721123493, "grad_norm": 0.7581839561462402, "learning_rate": 1.0597076320685901e-05, "loss": 0.091, "step": 34274 }, { "epoch": 0.6069943816493777, "grad_norm": 0.4160332679748535, "learning_rate": 1.0596253862045717e-05, "loss": 0.0712, "step": 34275 }, { "epoch": 0.6070120911864062, "grad_norm": 0.5934387445449829, "learning_rate": 1.0595431417893412e-05, "loss": 0.0539, "step": 34276 }, { "epoch": 0.6070298007234346, "grad_norm": 0.5836671590805054, "learning_rate": 1.0594608988231688e-05, "loss": 0.0648, "step": 34277 }, { "epoch": 0.607047510260463, "grad_norm": 0.5196865200996399, "learning_rate": 1.0593786573063266e-05, "loss": 0.0687, "step": 34278 }, { "epoch": 0.6070652197974914, "grad_norm": 1.0108009576797485, "learning_rate": 1.0592964172390832e-05, "loss": 0.0818, "step": 34279 }, { "epoch": 0.6070829293345199, "grad_norm": 0.7687334418296814, "learning_rate": 1.05921417862171e-05, "loss": 0.0985, "step": 34280 }, { "epoch": 0.6071006388715483, "grad_norm": 0.5675874352455139, "learning_rate": 1.059131941454478e-05, "loss": 0.0598, "step": 34281 }, { "epoch": 0.6071183484085767, "grad_norm": 1.4397395849227905, "learning_rate": 1.0590497057376572e-05, "loss": 0.0707, "step": 34282 }, { "epoch": 0.6071360579456052, "grad_norm": 0.7844453454017639, "learning_rate": 1.0589674714715178e-05, "loss": 0.079, "step": 34283 }, { "epoch": 0.6071537674826336, "grad_norm": 0.7678356766700745, "learning_rate": 1.0588852386563313e-05, "loss": 0.0934, "step": 34284 }, { "epoch": 0.607171477019662, "grad_norm": 0.5212816596031189, "learning_rate": 1.0588030072923678e-05, "loss": 0.085, "step": 34285 }, { "epoch": 0.6071891865566904, "grad_norm": 0.5042383670806885, "learning_rate": 1.0587207773798975e-05, "loss": 0.0914, "step": 34286 }, { "epoch": 0.6072068960937189, "grad_norm": 0.8989291787147522, "learning_rate": 1.0586385489191912e-05, "loss": 0.0931, "step": 34287 }, { "epoch": 0.6072246056307473, "grad_norm": 0.9448891282081604, "learning_rate": 1.05855632191052e-05, "loss": 0.0962, "step": 34288 }, { "epoch": 0.6072423151677757, "grad_norm": 0.7953335046768188, "learning_rate": 1.0584740963541535e-05, "loss": 0.0522, "step": 34289 }, { "epoch": 0.6072600247048041, "grad_norm": 0.6673406362533569, "learning_rate": 1.0583918722503623e-05, "loss": 0.0746, "step": 34290 }, { "epoch": 0.6072777342418326, "grad_norm": 1.022395372390747, "learning_rate": 1.0583096495994172e-05, "loss": 0.1118, "step": 34291 }, { "epoch": 0.607295443778861, "grad_norm": 0.26874732971191406, "learning_rate": 1.0582274284015891e-05, "loss": 0.0966, "step": 34292 }, { "epoch": 0.6073131533158894, "grad_norm": 0.5185457468032837, "learning_rate": 1.0581452086571476e-05, "loss": 0.0697, "step": 34293 }, { "epoch": 0.6073308628529178, "grad_norm": 0.4409138560295105, "learning_rate": 1.0580629903663638e-05, "loss": 0.0738, "step": 34294 }, { "epoch": 0.6073485723899463, "grad_norm": 0.25067269802093506, "learning_rate": 1.0579807735295084e-05, "loss": 0.0318, "step": 34295 }, { "epoch": 0.6073662819269747, "grad_norm": 0.6870440244674683, "learning_rate": 1.0578985581468512e-05, "loss": 0.0826, "step": 34296 }, { "epoch": 0.6073839914640031, "grad_norm": 0.6829989552497864, "learning_rate": 1.0578163442186628e-05, "loss": 0.074, "step": 34297 }, { "epoch": 0.6074017010010316, "grad_norm": 0.3861372172832489, "learning_rate": 1.0577341317452143e-05, "loss": 0.0409, "step": 34298 }, { "epoch": 0.60741941053806, "grad_norm": 1.0819134712219238, "learning_rate": 1.0576519207267763e-05, "loss": 0.0623, "step": 34299 }, { "epoch": 0.6074371200750884, "grad_norm": 0.6719540953636169, "learning_rate": 1.0575697111636177e-05, "loss": 0.0654, "step": 34300 }, { "epoch": 0.6074548296121168, "grad_norm": 0.6651609539985657, "learning_rate": 1.0574875030560103e-05, "loss": 0.0665, "step": 34301 }, { "epoch": 0.6074725391491453, "grad_norm": 0.5440233945846558, "learning_rate": 1.057405296404225e-05, "loss": 0.045, "step": 34302 }, { "epoch": 0.6074902486861737, "grad_norm": 0.7949401140213013, "learning_rate": 1.0573230912085307e-05, "loss": 0.0518, "step": 34303 }, { "epoch": 0.6075079582232021, "grad_norm": 0.4753517210483551, "learning_rate": 1.0572408874691989e-05, "loss": 0.0988, "step": 34304 }, { "epoch": 0.6075256677602305, "grad_norm": 0.6121160387992859, "learning_rate": 1.0571586851864995e-05, "loss": 0.0548, "step": 34305 }, { "epoch": 0.607543377297259, "grad_norm": 0.6784666776657104, "learning_rate": 1.0570764843607036e-05, "loss": 0.0577, "step": 34306 }, { "epoch": 0.6075610868342874, "grad_norm": 0.7571898102760315, "learning_rate": 1.0569942849920813e-05, "loss": 0.0591, "step": 34307 }, { "epoch": 0.6075787963713158, "grad_norm": 0.48369988799095154, "learning_rate": 1.0569120870809026e-05, "loss": 0.0625, "step": 34308 }, { "epoch": 0.6075965059083442, "grad_norm": 0.5327306985855103, "learning_rate": 1.0568298906274387e-05, "loss": 0.0511, "step": 34309 }, { "epoch": 0.6076142154453728, "grad_norm": 0.623202919960022, "learning_rate": 1.0567476956319596e-05, "loss": 0.0516, "step": 34310 }, { "epoch": 0.6076319249824012, "grad_norm": 0.7417260408401489, "learning_rate": 1.0566655020947354e-05, "loss": 0.0584, "step": 34311 }, { "epoch": 0.6076496345194295, "grad_norm": 0.44773268699645996, "learning_rate": 1.056583310016037e-05, "loss": 0.0498, "step": 34312 }, { "epoch": 0.6076673440564581, "grad_norm": 0.9285752773284912, "learning_rate": 1.0565011193961354e-05, "loss": 0.0708, "step": 34313 }, { "epoch": 0.6076850535934865, "grad_norm": 0.3406012952327728, "learning_rate": 1.0564189302352992e-05, "loss": 0.0725, "step": 34314 }, { "epoch": 0.6077027631305149, "grad_norm": 0.6815354228019714, "learning_rate": 1.0563367425338002e-05, "loss": 0.1177, "step": 34315 }, { "epoch": 0.6077204726675433, "grad_norm": 0.4649563729763031, "learning_rate": 1.0562545562919083e-05, "loss": 0.077, "step": 34316 }, { "epoch": 0.6077381822045718, "grad_norm": 1.0828782320022583, "learning_rate": 1.0561723715098948e-05, "loss": 0.0575, "step": 34317 }, { "epoch": 0.6077558917416002, "grad_norm": 0.6076758503913879, "learning_rate": 1.0560901881880288e-05, "loss": 0.0719, "step": 34318 }, { "epoch": 0.6077736012786286, "grad_norm": 0.5455791354179382, "learning_rate": 1.056008006326581e-05, "loss": 0.0543, "step": 34319 }, { "epoch": 0.607791310815657, "grad_norm": 0.6065644025802612, "learning_rate": 1.0559258259258224e-05, "loss": 0.0955, "step": 34320 }, { "epoch": 0.6078090203526855, "grad_norm": 0.4332660138607025, "learning_rate": 1.0558436469860225e-05, "loss": 0.0728, "step": 34321 }, { "epoch": 0.6078267298897139, "grad_norm": 0.7053905129432678, "learning_rate": 1.0557614695074522e-05, "loss": 0.0434, "step": 34322 }, { "epoch": 0.6078444394267423, "grad_norm": 0.661981463432312, "learning_rate": 1.0556792934903815e-05, "loss": 0.0972, "step": 34323 }, { "epoch": 0.6078621489637707, "grad_norm": 0.45951855182647705, "learning_rate": 1.0555971189350813e-05, "loss": 0.0517, "step": 34324 }, { "epoch": 0.6078798585007992, "grad_norm": 0.5927630066871643, "learning_rate": 1.0555149458418214e-05, "loss": 0.0651, "step": 34325 }, { "epoch": 0.6078975680378276, "grad_norm": 0.6066358685493469, "learning_rate": 1.0554327742108725e-05, "loss": 0.0518, "step": 34326 }, { "epoch": 0.607915277574856, "grad_norm": 0.6270768046379089, "learning_rate": 1.0553506040425054e-05, "loss": 0.0645, "step": 34327 }, { "epoch": 0.6079329871118845, "grad_norm": 0.5609111785888672, "learning_rate": 1.0552684353369892e-05, "loss": 0.0683, "step": 34328 }, { "epoch": 0.6079506966489129, "grad_norm": 0.728079080581665, "learning_rate": 1.0551862680945947e-05, "loss": 0.0753, "step": 34329 }, { "epoch": 0.6079684061859413, "grad_norm": 0.4521254897117615, "learning_rate": 1.0551041023155922e-05, "loss": 0.0469, "step": 34330 }, { "epoch": 0.6079861157229697, "grad_norm": 0.602554440498352, "learning_rate": 1.0550219380002532e-05, "loss": 0.1053, "step": 34331 }, { "epoch": 0.6080038252599982, "grad_norm": 0.52974933385849, "learning_rate": 1.0549397751488464e-05, "loss": 0.0722, "step": 34332 }, { "epoch": 0.6080215347970266, "grad_norm": 0.679669976234436, "learning_rate": 1.0548576137616425e-05, "loss": 0.0763, "step": 34333 }, { "epoch": 0.608039244334055, "grad_norm": 0.3234201967716217, "learning_rate": 1.0547754538389127e-05, "loss": 0.0516, "step": 34334 }, { "epoch": 0.6080569538710834, "grad_norm": 0.7890391945838928, "learning_rate": 1.0546932953809261e-05, "loss": 0.0834, "step": 34335 }, { "epoch": 0.6080746634081119, "grad_norm": 0.8825936913490295, "learning_rate": 1.0546111383879534e-05, "loss": 0.0567, "step": 34336 }, { "epoch": 0.6080923729451403, "grad_norm": 0.6699903607368469, "learning_rate": 1.0545289828602653e-05, "loss": 0.0918, "step": 34337 }, { "epoch": 0.6081100824821687, "grad_norm": 0.5284759402275085, "learning_rate": 1.054446828798132e-05, "loss": 0.051, "step": 34338 }, { "epoch": 0.6081277920191971, "grad_norm": 0.38788914680480957, "learning_rate": 1.0543646762018232e-05, "loss": 0.0651, "step": 34339 }, { "epoch": 0.6081455015562256, "grad_norm": 0.5039798021316528, "learning_rate": 1.0542825250716094e-05, "loss": 0.0721, "step": 34340 }, { "epoch": 0.608163211093254, "grad_norm": 0.5981763601303101, "learning_rate": 1.0542003754077617e-05, "loss": 0.0549, "step": 34341 }, { "epoch": 0.6081809206302824, "grad_norm": 0.7234451770782471, "learning_rate": 1.0541182272105493e-05, "loss": 0.0759, "step": 34342 }, { "epoch": 0.6081986301673109, "grad_norm": 0.4734109342098236, "learning_rate": 1.0540360804802424e-05, "loss": 0.0624, "step": 34343 }, { "epoch": 0.6082163397043393, "grad_norm": 0.814264714717865, "learning_rate": 1.0539539352171119e-05, "loss": 0.0598, "step": 34344 }, { "epoch": 0.6082340492413677, "grad_norm": 0.5805724859237671, "learning_rate": 1.0538717914214286e-05, "loss": 0.0673, "step": 34345 }, { "epoch": 0.6082517587783961, "grad_norm": 0.280692994594574, "learning_rate": 1.0537896490934611e-05, "loss": 0.0334, "step": 34346 }, { "epoch": 0.6082694683154246, "grad_norm": 0.7410094141960144, "learning_rate": 1.0537075082334807e-05, "loss": 0.0617, "step": 34347 }, { "epoch": 0.608287177852453, "grad_norm": 0.971250057220459, "learning_rate": 1.0536253688417575e-05, "loss": 0.0694, "step": 34348 }, { "epoch": 0.6083048873894814, "grad_norm": 0.3085121810436249, "learning_rate": 1.0535432309185618e-05, "loss": 0.0298, "step": 34349 }, { "epoch": 0.6083225969265098, "grad_norm": 0.6970360279083252, "learning_rate": 1.0534610944641636e-05, "loss": 0.0795, "step": 34350 }, { "epoch": 0.6083403064635383, "grad_norm": 0.4802277982234955, "learning_rate": 1.0533789594788332e-05, "loss": 0.054, "step": 34351 }, { "epoch": 0.6083580160005667, "grad_norm": 0.7321572303771973, "learning_rate": 1.0532968259628412e-05, "loss": 0.0686, "step": 34352 }, { "epoch": 0.6083757255375951, "grad_norm": 0.640914261341095, "learning_rate": 1.0532146939164571e-05, "loss": 0.0784, "step": 34353 }, { "epoch": 0.6083934350746235, "grad_norm": 0.9997975826263428, "learning_rate": 1.0531325633399514e-05, "loss": 0.0752, "step": 34354 }, { "epoch": 0.608411144611652, "grad_norm": 0.8123558759689331, "learning_rate": 1.0530504342335944e-05, "loss": 0.0757, "step": 34355 }, { "epoch": 0.6084288541486804, "grad_norm": 0.7132778167724609, "learning_rate": 1.052968306597657e-05, "loss": 0.0702, "step": 34356 }, { "epoch": 0.6084465636857088, "grad_norm": 0.612727165222168, "learning_rate": 1.052886180432408e-05, "loss": 0.0842, "step": 34357 }, { "epoch": 0.6084642732227373, "grad_norm": 0.34162455797195435, "learning_rate": 1.0528040557381179e-05, "loss": 0.0564, "step": 34358 }, { "epoch": 0.6084819827597657, "grad_norm": 0.4434535801410675, "learning_rate": 1.0527219325150582e-05, "loss": 0.0783, "step": 34359 }, { "epoch": 0.6084996922967941, "grad_norm": 0.44747623801231384, "learning_rate": 1.0526398107634973e-05, "loss": 0.0832, "step": 34360 }, { "epoch": 0.6085174018338225, "grad_norm": 0.643174409866333, "learning_rate": 1.0525576904837063e-05, "loss": 0.0795, "step": 34361 }, { "epoch": 0.608535111370851, "grad_norm": 0.7865784168243408, "learning_rate": 1.0524755716759553e-05, "loss": 0.0718, "step": 34362 }, { "epoch": 0.6085528209078794, "grad_norm": 1.1655174493789673, "learning_rate": 1.0523934543405147e-05, "loss": 0.081, "step": 34363 }, { "epoch": 0.6085705304449078, "grad_norm": 0.9640764594078064, "learning_rate": 1.0523113384776539e-05, "loss": 0.0645, "step": 34364 }, { "epoch": 0.6085882399819362, "grad_norm": 0.5486538410186768, "learning_rate": 1.0522292240876437e-05, "loss": 0.0853, "step": 34365 }, { "epoch": 0.6086059495189647, "grad_norm": 0.6578282713890076, "learning_rate": 1.0521471111707542e-05, "loss": 0.069, "step": 34366 }, { "epoch": 0.6086236590559931, "grad_norm": 0.582022488117218, "learning_rate": 1.0520649997272553e-05, "loss": 0.0752, "step": 34367 }, { "epoch": 0.6086413685930215, "grad_norm": 0.3469279110431671, "learning_rate": 1.0519828897574172e-05, "loss": 0.0897, "step": 34368 }, { "epoch": 0.6086590781300499, "grad_norm": 0.9592814445495605, "learning_rate": 1.05190078126151e-05, "loss": 0.0924, "step": 34369 }, { "epoch": 0.6086767876670784, "grad_norm": 0.9844011664390564, "learning_rate": 1.0518186742398046e-05, "loss": 0.0813, "step": 34370 }, { "epoch": 0.6086944972041068, "grad_norm": 0.6539509892463684, "learning_rate": 1.0517365686925697e-05, "loss": 0.0431, "step": 34371 }, { "epoch": 0.6087122067411352, "grad_norm": 0.8124280571937561, "learning_rate": 1.0516544646200758e-05, "loss": 0.0616, "step": 34372 }, { "epoch": 0.6087299162781638, "grad_norm": 0.45062360167503357, "learning_rate": 1.0515723620225945e-05, "loss": 0.0458, "step": 34373 }, { "epoch": 0.6087476258151922, "grad_norm": 0.5729182958602905, "learning_rate": 1.051490260900394e-05, "loss": 0.0793, "step": 34374 }, { "epoch": 0.6087653353522205, "grad_norm": 0.8790544271469116, "learning_rate": 1.0514081612537452e-05, "loss": 0.0852, "step": 34375 }, { "epoch": 0.608783044889249, "grad_norm": 0.5429642200469971, "learning_rate": 1.0513260630829182e-05, "loss": 0.0788, "step": 34376 }, { "epoch": 0.6088007544262775, "grad_norm": 0.4614180326461792, "learning_rate": 1.0512439663881833e-05, "loss": 0.0685, "step": 34377 }, { "epoch": 0.6088184639633059, "grad_norm": 0.3384053707122803, "learning_rate": 1.0511618711698103e-05, "loss": 0.0481, "step": 34378 }, { "epoch": 0.6088361735003343, "grad_norm": 0.5606854557991028, "learning_rate": 1.0510797774280692e-05, "loss": 0.0829, "step": 34379 }, { "epoch": 0.6088538830373627, "grad_norm": 0.6350721716880798, "learning_rate": 1.0509976851632301e-05, "loss": 0.0688, "step": 34380 }, { "epoch": 0.6088715925743912, "grad_norm": 0.5911282300949097, "learning_rate": 1.0509155943755637e-05, "loss": 0.0737, "step": 34381 }, { "epoch": 0.6088893021114196, "grad_norm": 0.45951271057128906, "learning_rate": 1.050833505065339e-05, "loss": 0.0751, "step": 34382 }, { "epoch": 0.608907011648448, "grad_norm": 0.670605480670929, "learning_rate": 1.050751417232827e-05, "loss": 0.0605, "step": 34383 }, { "epoch": 0.6089247211854764, "grad_norm": 0.4002082645893097, "learning_rate": 1.0506693308782977e-05, "loss": 0.0441, "step": 34384 }, { "epoch": 0.6089424307225049, "grad_norm": 0.7552564144134521, "learning_rate": 1.0505872460020204e-05, "loss": 0.1179, "step": 34385 }, { "epoch": 0.6089601402595333, "grad_norm": 0.4684158265590668, "learning_rate": 1.0505051626042656e-05, "loss": 0.0795, "step": 34386 }, { "epoch": 0.6089778497965617, "grad_norm": 0.7146211266517639, "learning_rate": 1.050423080685303e-05, "loss": 0.0383, "step": 34387 }, { "epoch": 0.6089955593335902, "grad_norm": 0.9863677620887756, "learning_rate": 1.050341000245404e-05, "loss": 0.0872, "step": 34388 }, { "epoch": 0.6090132688706186, "grad_norm": 0.557388424873352, "learning_rate": 1.050258921284837e-05, "loss": 0.068, "step": 34389 }, { "epoch": 0.609030978407647, "grad_norm": 0.6263837814331055, "learning_rate": 1.0501768438038726e-05, "loss": 0.0815, "step": 34390 }, { "epoch": 0.6090486879446754, "grad_norm": 0.48778823018074036, "learning_rate": 1.050094767802781e-05, "loss": 0.0606, "step": 34391 }, { "epoch": 0.6090663974817039, "grad_norm": 0.8385218381881714, "learning_rate": 1.0500126932818322e-05, "loss": 0.0954, "step": 34392 }, { "epoch": 0.6090841070187323, "grad_norm": 0.4080274701118469, "learning_rate": 1.0499306202412959e-05, "loss": 0.0646, "step": 34393 }, { "epoch": 0.6091018165557607, "grad_norm": 0.518491268157959, "learning_rate": 1.0498485486814423e-05, "loss": 0.0825, "step": 34394 }, { "epoch": 0.6091195260927891, "grad_norm": 0.457959920167923, "learning_rate": 1.0497664786025419e-05, "loss": 0.0549, "step": 34395 }, { "epoch": 0.6091372356298176, "grad_norm": 0.44360318779945374, "learning_rate": 1.0496844100048638e-05, "loss": 0.0935, "step": 34396 }, { "epoch": 0.609154945166846, "grad_norm": 0.4344208240509033, "learning_rate": 1.0496023428886785e-05, "loss": 0.0598, "step": 34397 }, { "epoch": 0.6091726547038744, "grad_norm": 0.573959231376648, "learning_rate": 1.0495202772542567e-05, "loss": 0.0391, "step": 34398 }, { "epoch": 0.6091903642409028, "grad_norm": 1.065403938293457, "learning_rate": 1.0494382131018668e-05, "loss": 0.0851, "step": 34399 }, { "epoch": 0.6092080737779313, "grad_norm": 0.6339686512947083, "learning_rate": 1.0493561504317796e-05, "loss": 0.0618, "step": 34400 }, { "epoch": 0.6092257833149597, "grad_norm": 0.3497532904148102, "learning_rate": 1.049274089244265e-05, "loss": 0.0799, "step": 34401 }, { "epoch": 0.6092434928519881, "grad_norm": 0.3245985507965088, "learning_rate": 1.0491920295395937e-05, "loss": 0.0553, "step": 34402 }, { "epoch": 0.6092612023890166, "grad_norm": 0.8251294493675232, "learning_rate": 1.0491099713180346e-05, "loss": 0.0806, "step": 34403 }, { "epoch": 0.609278911926045, "grad_norm": 0.9391271471977234, "learning_rate": 1.049027914579858e-05, "loss": 0.1067, "step": 34404 }, { "epoch": 0.6092966214630734, "grad_norm": 0.5530591607093811, "learning_rate": 1.0489458593253343e-05, "loss": 0.0576, "step": 34405 }, { "epoch": 0.6093143310001018, "grad_norm": 0.7710901498794556, "learning_rate": 1.0488638055547328e-05, "loss": 0.0658, "step": 34406 }, { "epoch": 0.6093320405371303, "grad_norm": 0.3621319532394409, "learning_rate": 1.0487817532683238e-05, "loss": 0.0586, "step": 34407 }, { "epoch": 0.6093497500741587, "grad_norm": 0.519772469997406, "learning_rate": 1.048699702466377e-05, "loss": 0.0587, "step": 34408 }, { "epoch": 0.6093674596111871, "grad_norm": 0.7024356722831726, "learning_rate": 1.048617653149163e-05, "loss": 0.0594, "step": 34409 }, { "epoch": 0.6093851691482155, "grad_norm": 0.47821810841560364, "learning_rate": 1.048535605316951e-05, "loss": 0.0461, "step": 34410 }, { "epoch": 0.609402878685244, "grad_norm": 0.6020268201828003, "learning_rate": 1.0484535589700111e-05, "loss": 0.0796, "step": 34411 }, { "epoch": 0.6094205882222724, "grad_norm": 0.9903064370155334, "learning_rate": 1.0483715141086141e-05, "loss": 0.0545, "step": 34412 }, { "epoch": 0.6094382977593008, "grad_norm": 0.6035075783729553, "learning_rate": 1.0482894707330281e-05, "loss": 0.0711, "step": 34413 }, { "epoch": 0.6094560072963292, "grad_norm": 0.7748677134513855, "learning_rate": 1.0482074288435245e-05, "loss": 0.0349, "step": 34414 }, { "epoch": 0.6094737168333577, "grad_norm": 1.1639740467071533, "learning_rate": 1.0481253884403725e-05, "loss": 0.0631, "step": 34415 }, { "epoch": 0.6094914263703861, "grad_norm": 0.46679577231407166, "learning_rate": 1.0480433495238427e-05, "loss": 0.0576, "step": 34416 }, { "epoch": 0.6095091359074145, "grad_norm": 0.6551216244697571, "learning_rate": 1.0479613120942042e-05, "loss": 0.0717, "step": 34417 }, { "epoch": 0.609526845444443, "grad_norm": 0.3637828826904297, "learning_rate": 1.0478792761517274e-05, "loss": 0.065, "step": 34418 }, { "epoch": 0.6095445549814714, "grad_norm": 0.600477397441864, "learning_rate": 1.0477972416966819e-05, "loss": 0.0933, "step": 34419 }, { "epoch": 0.6095622645184998, "grad_norm": 0.6911300420761108, "learning_rate": 1.0477152087293381e-05, "loss": 0.0962, "step": 34420 }, { "epoch": 0.6095799740555282, "grad_norm": 0.7108705639839172, "learning_rate": 1.0476331772499652e-05, "loss": 0.0618, "step": 34421 }, { "epoch": 0.6095976835925567, "grad_norm": 0.6663939356803894, "learning_rate": 1.0475511472588335e-05, "loss": 0.0518, "step": 34422 }, { "epoch": 0.6096153931295851, "grad_norm": 0.5553713440895081, "learning_rate": 1.0474691187562131e-05, "loss": 0.0594, "step": 34423 }, { "epoch": 0.6096331026666135, "grad_norm": 0.6870230436325073, "learning_rate": 1.0473870917423727e-05, "loss": 0.0534, "step": 34424 }, { "epoch": 0.6096508122036419, "grad_norm": 0.5256802439689636, "learning_rate": 1.0473050662175836e-05, "loss": 0.066, "step": 34425 }, { "epoch": 0.6096685217406704, "grad_norm": 0.48412492871284485, "learning_rate": 1.0472230421821147e-05, "loss": 0.0626, "step": 34426 }, { "epoch": 0.6096862312776988, "grad_norm": 0.7725688219070435, "learning_rate": 1.047141019636237e-05, "loss": 0.0573, "step": 34427 }, { "epoch": 0.6097039408147272, "grad_norm": 0.6837491989135742, "learning_rate": 1.047058998580219e-05, "loss": 0.068, "step": 34428 }, { "epoch": 0.6097216503517556, "grad_norm": 0.44512438774108887, "learning_rate": 1.0469769790143309e-05, "loss": 0.0641, "step": 34429 }, { "epoch": 0.6097393598887841, "grad_norm": 0.9266895651817322, "learning_rate": 1.046894960938843e-05, "loss": 0.0691, "step": 34430 }, { "epoch": 0.6097570694258125, "grad_norm": 0.6849960088729858, "learning_rate": 1.0468129443540246e-05, "loss": 0.072, "step": 34431 }, { "epoch": 0.6097747789628409, "grad_norm": 0.5577245354652405, "learning_rate": 1.0467309292601457e-05, "loss": 0.0446, "step": 34432 }, { "epoch": 0.6097924884998694, "grad_norm": 0.59372478723526, "learning_rate": 1.0466489156574762e-05, "loss": 0.0645, "step": 34433 }, { "epoch": 0.6098101980368978, "grad_norm": 0.501501739025116, "learning_rate": 1.0465669035462863e-05, "loss": 0.0699, "step": 34434 }, { "epoch": 0.6098279075739262, "grad_norm": 0.36427953839302063, "learning_rate": 1.0464848929268452e-05, "loss": 0.0567, "step": 34435 }, { "epoch": 0.6098456171109546, "grad_norm": 0.4901089072227478, "learning_rate": 1.046402883799423e-05, "loss": 0.0556, "step": 34436 }, { "epoch": 0.6098633266479832, "grad_norm": 1.0093752145767212, "learning_rate": 1.0463208761642899e-05, "loss": 0.0737, "step": 34437 }, { "epoch": 0.6098810361850115, "grad_norm": 0.9589390754699707, "learning_rate": 1.0462388700217142e-05, "loss": 0.066, "step": 34438 }, { "epoch": 0.60989874572204, "grad_norm": 0.8324583172798157, "learning_rate": 1.0461568653719672e-05, "loss": 0.0907, "step": 34439 }, { "epoch": 0.6099164552590683, "grad_norm": 0.455228716135025, "learning_rate": 1.0460748622153182e-05, "loss": 0.057, "step": 34440 }, { "epoch": 0.6099341647960969, "grad_norm": 0.710602343082428, "learning_rate": 1.0459928605520375e-05, "loss": 0.0535, "step": 34441 }, { "epoch": 0.6099518743331253, "grad_norm": 0.44043028354644775, "learning_rate": 1.0459108603823938e-05, "loss": 0.0625, "step": 34442 }, { "epoch": 0.6099695838701537, "grad_norm": 0.7276077270507812, "learning_rate": 1.0458288617066573e-05, "loss": 0.0627, "step": 34443 }, { "epoch": 0.609987293407182, "grad_norm": 0.5188146829605103, "learning_rate": 1.0457468645250985e-05, "loss": 0.0579, "step": 34444 }, { "epoch": 0.6100050029442106, "grad_norm": 0.5925126671791077, "learning_rate": 1.0456648688379862e-05, "loss": 0.0765, "step": 34445 }, { "epoch": 0.610022712481239, "grad_norm": 0.9656825065612793, "learning_rate": 1.0455828746455903e-05, "loss": 0.0975, "step": 34446 }, { "epoch": 0.6100404220182674, "grad_norm": 0.5862001776695251, "learning_rate": 1.0455008819481809e-05, "loss": 0.0366, "step": 34447 }, { "epoch": 0.6100581315552959, "grad_norm": 0.31254303455352783, "learning_rate": 1.045418890746028e-05, "loss": 0.0628, "step": 34448 }, { "epoch": 0.6100758410923243, "grad_norm": 0.7479037046432495, "learning_rate": 1.0453369010394008e-05, "loss": 0.0704, "step": 34449 }, { "epoch": 0.6100935506293527, "grad_norm": 0.9838415384292603, "learning_rate": 1.045254912828569e-05, "loss": 0.128, "step": 34450 }, { "epoch": 0.6101112601663811, "grad_norm": 1.0132261514663696, "learning_rate": 1.0451729261138026e-05, "loss": 0.0423, "step": 34451 }, { "epoch": 0.6101289697034096, "grad_norm": 0.8496779799461365, "learning_rate": 1.0450909408953719e-05, "loss": 0.0942, "step": 34452 }, { "epoch": 0.610146679240438, "grad_norm": 0.665341317653656, "learning_rate": 1.045008957173545e-05, "loss": 0.0626, "step": 34453 }, { "epoch": 0.6101643887774664, "grad_norm": 0.5395665764808655, "learning_rate": 1.044926974948593e-05, "loss": 0.0481, "step": 34454 }, { "epoch": 0.6101820983144948, "grad_norm": 0.7004742622375488, "learning_rate": 1.0448449942207857e-05, "loss": 0.0731, "step": 34455 }, { "epoch": 0.6101998078515233, "grad_norm": 0.6534145474433899, "learning_rate": 1.044763014990392e-05, "loss": 0.0625, "step": 34456 }, { "epoch": 0.6102175173885517, "grad_norm": 0.4124058783054352, "learning_rate": 1.0446810372576817e-05, "loss": 0.0605, "step": 34457 }, { "epoch": 0.6102352269255801, "grad_norm": 0.3589079678058624, "learning_rate": 1.0445990610229248e-05, "loss": 0.0472, "step": 34458 }, { "epoch": 0.6102529364626085, "grad_norm": 1.0377916097640991, "learning_rate": 1.0445170862863911e-05, "loss": 0.0655, "step": 34459 }, { "epoch": 0.610270645999637, "grad_norm": 0.4094790518283844, "learning_rate": 1.04443511304835e-05, "loss": 0.0499, "step": 34460 }, { "epoch": 0.6102883555366654, "grad_norm": 0.8863613605499268, "learning_rate": 1.0443531413090713e-05, "loss": 0.0627, "step": 34461 }, { "epoch": 0.6103060650736938, "grad_norm": 0.366143137216568, "learning_rate": 1.044271171068825e-05, "loss": 0.0669, "step": 34462 }, { "epoch": 0.6103237746107223, "grad_norm": 0.6749513149261475, "learning_rate": 1.0441892023278799e-05, "loss": 0.0553, "step": 34463 }, { "epoch": 0.6103414841477507, "grad_norm": 0.6741132736206055, "learning_rate": 1.0441072350865066e-05, "loss": 0.0729, "step": 34464 }, { "epoch": 0.6103591936847791, "grad_norm": 0.5950726270675659, "learning_rate": 1.044025269344974e-05, "loss": 0.0689, "step": 34465 }, { "epoch": 0.6103769032218075, "grad_norm": 0.5588816404342651, "learning_rate": 1.0439433051035531e-05, "loss": 0.0511, "step": 34466 }, { "epoch": 0.610394612758836, "grad_norm": 0.6017462015151978, "learning_rate": 1.0438613423625113e-05, "loss": 0.0692, "step": 34467 }, { "epoch": 0.6104123222958644, "grad_norm": 0.5196670293807983, "learning_rate": 1.0437793811221202e-05, "loss": 0.0584, "step": 34468 }, { "epoch": 0.6104300318328928, "grad_norm": 0.6946812272071838, "learning_rate": 1.0436974213826494e-05, "loss": 0.0782, "step": 34469 }, { "epoch": 0.6104477413699212, "grad_norm": 0.6775527596473694, "learning_rate": 1.043615463144367e-05, "loss": 0.0871, "step": 34470 }, { "epoch": 0.6104654509069497, "grad_norm": 0.5365217328071594, "learning_rate": 1.043533506407544e-05, "loss": 0.042, "step": 34471 }, { "epoch": 0.6104831604439781, "grad_norm": 0.5241977572441101, "learning_rate": 1.0434515511724492e-05, "loss": 0.0232, "step": 34472 }, { "epoch": 0.6105008699810065, "grad_norm": 0.5465313196182251, "learning_rate": 1.043369597439353e-05, "loss": 0.0637, "step": 34473 }, { "epoch": 0.6105185795180349, "grad_norm": 0.4197293817996979, "learning_rate": 1.0432876452085246e-05, "loss": 0.0835, "step": 34474 }, { "epoch": 0.6105362890550634, "grad_norm": 0.2875584363937378, "learning_rate": 1.0432056944802334e-05, "loss": 0.0396, "step": 34475 }, { "epoch": 0.6105539985920918, "grad_norm": 0.6569769978523254, "learning_rate": 1.0431237452547495e-05, "loss": 0.0419, "step": 34476 }, { "epoch": 0.6105717081291202, "grad_norm": 0.4982738196849823, "learning_rate": 1.0430417975323422e-05, "loss": 0.0547, "step": 34477 }, { "epoch": 0.6105894176661487, "grad_norm": 0.4593704640865326, "learning_rate": 1.0429598513132809e-05, "loss": 0.047, "step": 34478 }, { "epoch": 0.6106071272031771, "grad_norm": 0.602816641330719, "learning_rate": 1.0428779065978356e-05, "loss": 0.0801, "step": 34479 }, { "epoch": 0.6106248367402055, "grad_norm": 0.25467777252197266, "learning_rate": 1.0427959633862764e-05, "loss": 0.0756, "step": 34480 }, { "epoch": 0.6106425462772339, "grad_norm": 0.620050847530365, "learning_rate": 1.0427140216788718e-05, "loss": 0.0418, "step": 34481 }, { "epoch": 0.6106602558142624, "grad_norm": 0.7428419589996338, "learning_rate": 1.0426320814758913e-05, "loss": 0.0651, "step": 34482 }, { "epoch": 0.6106779653512908, "grad_norm": 0.6646893620491028, "learning_rate": 1.0425501427776058e-05, "loss": 0.0881, "step": 34483 }, { "epoch": 0.6106956748883192, "grad_norm": 0.7824429869651794, "learning_rate": 1.0424682055842835e-05, "loss": 0.0886, "step": 34484 }, { "epoch": 0.6107133844253476, "grad_norm": 0.3039605915546417, "learning_rate": 1.0423862698961948e-05, "loss": 0.0335, "step": 34485 }, { "epoch": 0.6107310939623761, "grad_norm": 0.41496312618255615, "learning_rate": 1.0423043357136088e-05, "loss": 0.0541, "step": 34486 }, { "epoch": 0.6107488034994045, "grad_norm": 0.697672426700592, "learning_rate": 1.0422224030367955e-05, "loss": 0.0703, "step": 34487 }, { "epoch": 0.6107665130364329, "grad_norm": 0.5208490490913391, "learning_rate": 1.042140471866024e-05, "loss": 0.048, "step": 34488 }, { "epoch": 0.6107842225734613, "grad_norm": 0.37927502393722534, "learning_rate": 1.042058542201564e-05, "loss": 0.0709, "step": 34489 }, { "epoch": 0.6108019321104898, "grad_norm": 0.6424916982650757, "learning_rate": 1.041976614043685e-05, "loss": 0.086, "step": 34490 }, { "epoch": 0.6108196416475182, "grad_norm": 0.41705596446990967, "learning_rate": 1.0418946873926569e-05, "loss": 0.0457, "step": 34491 }, { "epoch": 0.6108373511845466, "grad_norm": 0.5751922130584717, "learning_rate": 1.0418127622487486e-05, "loss": 0.0527, "step": 34492 }, { "epoch": 0.6108550607215751, "grad_norm": 0.6396898627281189, "learning_rate": 1.0417308386122303e-05, "loss": 0.0516, "step": 34493 }, { "epoch": 0.6108727702586035, "grad_norm": 0.5636671781539917, "learning_rate": 1.0416489164833715e-05, "loss": 0.0679, "step": 34494 }, { "epoch": 0.6108904797956319, "grad_norm": 0.5500692129135132, "learning_rate": 1.041566995862441e-05, "loss": 0.0678, "step": 34495 }, { "epoch": 0.6109081893326603, "grad_norm": 0.7075596451759338, "learning_rate": 1.0414850767497083e-05, "loss": 0.069, "step": 34496 }, { "epoch": 0.6109258988696888, "grad_norm": 0.4770575761795044, "learning_rate": 1.0414031591454435e-05, "loss": 0.0716, "step": 34497 }, { "epoch": 0.6109436084067172, "grad_norm": 0.5942458510398865, "learning_rate": 1.0413212430499168e-05, "loss": 0.0597, "step": 34498 }, { "epoch": 0.6109613179437456, "grad_norm": 0.5720932483673096, "learning_rate": 1.041239328463396e-05, "loss": 0.0477, "step": 34499 }, { "epoch": 0.610979027480774, "grad_norm": 0.4836086332798004, "learning_rate": 1.0411574153861513e-05, "loss": 0.0651, "step": 34500 }, { "epoch": 0.6109967370178025, "grad_norm": 0.7179682850837708, "learning_rate": 1.0410755038184527e-05, "loss": 0.0644, "step": 34501 }, { "epoch": 0.611014446554831, "grad_norm": 0.46397310495376587, "learning_rate": 1.0409935937605692e-05, "loss": 0.0838, "step": 34502 }, { "epoch": 0.6110321560918593, "grad_norm": 0.5075229406356812, "learning_rate": 1.04091168521277e-05, "loss": 0.0529, "step": 34503 }, { "epoch": 0.6110498656288877, "grad_norm": 0.522239089012146, "learning_rate": 1.0408297781753251e-05, "loss": 0.0533, "step": 34504 }, { "epoch": 0.6110675751659163, "grad_norm": 0.6587330102920532, "learning_rate": 1.0407478726485044e-05, "loss": 0.0591, "step": 34505 }, { "epoch": 0.6110852847029447, "grad_norm": 0.3876614272594452, "learning_rate": 1.0406659686325758e-05, "loss": 0.0362, "step": 34506 }, { "epoch": 0.611102994239973, "grad_norm": 0.7623295783996582, "learning_rate": 1.0405840661278102e-05, "loss": 0.0749, "step": 34507 }, { "epoch": 0.6111207037770016, "grad_norm": 0.7770479321479797, "learning_rate": 1.040502165134477e-05, "loss": 0.0819, "step": 34508 }, { "epoch": 0.61113841331403, "grad_norm": 0.7613233327865601, "learning_rate": 1.0404202656528446e-05, "loss": 0.0997, "step": 34509 }, { "epoch": 0.6111561228510584, "grad_norm": 0.5492851138114929, "learning_rate": 1.040338367683183e-05, "loss": 0.042, "step": 34510 }, { "epoch": 0.6111738323880868, "grad_norm": 0.8223986029624939, "learning_rate": 1.0402564712257614e-05, "loss": 0.0653, "step": 34511 }, { "epoch": 0.6111915419251153, "grad_norm": 0.4674559235572815, "learning_rate": 1.0401745762808504e-05, "loss": 0.0602, "step": 34512 }, { "epoch": 0.6112092514621437, "grad_norm": 0.46772363781929016, "learning_rate": 1.0400926828487181e-05, "loss": 0.0496, "step": 34513 }, { "epoch": 0.6112269609991721, "grad_norm": 0.6475425362586975, "learning_rate": 1.0400107909296342e-05, "loss": 0.0633, "step": 34514 }, { "epoch": 0.6112446705362005, "grad_norm": 0.42388811707496643, "learning_rate": 1.0399289005238686e-05, "loss": 0.0818, "step": 34515 }, { "epoch": 0.611262380073229, "grad_norm": 0.4344644546508789, "learning_rate": 1.03984701163169e-05, "loss": 0.0655, "step": 34516 }, { "epoch": 0.6112800896102574, "grad_norm": 0.5322016477584839, "learning_rate": 1.0397651242533684e-05, "loss": 0.0643, "step": 34517 }, { "epoch": 0.6112977991472858, "grad_norm": 0.5386543869972229, "learning_rate": 1.0396832383891728e-05, "loss": 0.0484, "step": 34518 }, { "epoch": 0.6113155086843142, "grad_norm": 0.6206432580947876, "learning_rate": 1.0396013540393731e-05, "loss": 0.0757, "step": 34519 }, { "epoch": 0.6113332182213427, "grad_norm": 0.7276799082756042, "learning_rate": 1.0395194712042382e-05, "loss": 0.0578, "step": 34520 }, { "epoch": 0.6113509277583711, "grad_norm": 0.6241207122802734, "learning_rate": 1.0394375898840376e-05, "loss": 0.0666, "step": 34521 }, { "epoch": 0.6113686372953995, "grad_norm": 0.4309538006782532, "learning_rate": 1.0393557100790408e-05, "loss": 0.0639, "step": 34522 }, { "epoch": 0.611386346832428, "grad_norm": 0.6277227401733398, "learning_rate": 1.0392738317895177e-05, "loss": 0.0548, "step": 34523 }, { "epoch": 0.6114040563694564, "grad_norm": 0.8532834053039551, "learning_rate": 1.0391919550157364e-05, "loss": 0.071, "step": 34524 }, { "epoch": 0.6114217659064848, "grad_norm": 0.4539695382118225, "learning_rate": 1.0391100797579667e-05, "loss": 0.043, "step": 34525 }, { "epoch": 0.6114394754435132, "grad_norm": 0.31442534923553467, "learning_rate": 1.0390282060164791e-05, "loss": 0.0415, "step": 34526 }, { "epoch": 0.6114571849805417, "grad_norm": 0.4875655472278595, "learning_rate": 1.0389463337915415e-05, "loss": 0.041, "step": 34527 }, { "epoch": 0.6114748945175701, "grad_norm": 0.2624683380126953, "learning_rate": 1.0388644630834238e-05, "loss": 0.0809, "step": 34528 }, { "epoch": 0.6114926040545985, "grad_norm": 0.3851398527622223, "learning_rate": 1.0387825938923954e-05, "loss": 0.067, "step": 34529 }, { "epoch": 0.6115103135916269, "grad_norm": 0.6974448561668396, "learning_rate": 1.038700726218726e-05, "loss": 0.0802, "step": 34530 }, { "epoch": 0.6115280231286554, "grad_norm": 1.2677847146987915, "learning_rate": 1.038618860062684e-05, "loss": 0.0514, "step": 34531 }, { "epoch": 0.6115457326656838, "grad_norm": 0.9171539545059204, "learning_rate": 1.0385369954245398e-05, "loss": 0.0742, "step": 34532 }, { "epoch": 0.6115634422027122, "grad_norm": 0.5917807221412659, "learning_rate": 1.0384551323045621e-05, "loss": 0.0564, "step": 34533 }, { "epoch": 0.6115811517397406, "grad_norm": 0.6874728202819824, "learning_rate": 1.0383732707030201e-05, "loss": 0.0577, "step": 34534 }, { "epoch": 0.6115988612767691, "grad_norm": 0.3238506317138672, "learning_rate": 1.0382914106201833e-05, "loss": 0.0463, "step": 34535 }, { "epoch": 0.6116165708137975, "grad_norm": 0.5168272256851196, "learning_rate": 1.0382095520563212e-05, "loss": 0.0675, "step": 34536 }, { "epoch": 0.6116342803508259, "grad_norm": 0.5605084896087646, "learning_rate": 1.0381276950117036e-05, "loss": 0.0645, "step": 34537 }, { "epoch": 0.6116519898878544, "grad_norm": 0.5922824740409851, "learning_rate": 1.0380458394865982e-05, "loss": 0.0675, "step": 34538 }, { "epoch": 0.6116696994248828, "grad_norm": 0.9536885023117065, "learning_rate": 1.0379639854812757e-05, "loss": 0.062, "step": 34539 }, { "epoch": 0.6116874089619112, "grad_norm": 0.5483404994010925, "learning_rate": 1.037882132996005e-05, "loss": 0.0637, "step": 34540 }, { "epoch": 0.6117051184989396, "grad_norm": 0.49743548035621643, "learning_rate": 1.0378002820310552e-05, "loss": 0.0748, "step": 34541 }, { "epoch": 0.6117228280359681, "grad_norm": 0.33064451813697815, "learning_rate": 1.0377184325866957e-05, "loss": 0.0474, "step": 34542 }, { "epoch": 0.6117405375729965, "grad_norm": 0.2192520797252655, "learning_rate": 1.0376365846631955e-05, "loss": 0.0679, "step": 34543 }, { "epoch": 0.6117582471100249, "grad_norm": 0.5611315369606018, "learning_rate": 1.037554738260825e-05, "loss": 0.0724, "step": 34544 }, { "epoch": 0.6117759566470533, "grad_norm": 0.46855321526527405, "learning_rate": 1.0374728933798519e-05, "loss": 0.0635, "step": 34545 }, { "epoch": 0.6117936661840818, "grad_norm": 1.0591689348220825, "learning_rate": 1.0373910500205463e-05, "loss": 0.1037, "step": 34546 }, { "epoch": 0.6118113757211102, "grad_norm": 0.6673963665962219, "learning_rate": 1.037309208183178e-05, "loss": 0.0531, "step": 34547 }, { "epoch": 0.6118290852581386, "grad_norm": 0.3955065906047821, "learning_rate": 1.0372273678680148e-05, "loss": 0.0558, "step": 34548 }, { "epoch": 0.611846794795167, "grad_norm": 1.0701141357421875, "learning_rate": 1.0371455290753269e-05, "loss": 0.0457, "step": 34549 }, { "epoch": 0.6118645043321955, "grad_norm": 0.6140042543411255, "learning_rate": 1.0370636918053835e-05, "loss": 0.085, "step": 34550 }, { "epoch": 0.6118822138692239, "grad_norm": 0.5839070081710815, "learning_rate": 1.0369818560584542e-05, "loss": 0.0602, "step": 34551 }, { "epoch": 0.6118999234062523, "grad_norm": 0.4945180118083954, "learning_rate": 1.0369000218348074e-05, "loss": 0.047, "step": 34552 }, { "epoch": 0.6119176329432808, "grad_norm": 0.5979256629943848, "learning_rate": 1.0368181891347126e-05, "loss": 0.0523, "step": 34553 }, { "epoch": 0.6119353424803092, "grad_norm": 0.44203582406044006, "learning_rate": 1.0367363579584387e-05, "loss": 0.0657, "step": 34554 }, { "epoch": 0.6119530520173376, "grad_norm": 0.8052446842193604, "learning_rate": 1.0366545283062563e-05, "loss": 0.0828, "step": 34555 }, { "epoch": 0.611970761554366, "grad_norm": 1.1511509418487549, "learning_rate": 1.036572700178433e-05, "loss": 0.0607, "step": 34556 }, { "epoch": 0.6119884710913945, "grad_norm": 0.738914966583252, "learning_rate": 1.0364908735752386e-05, "loss": 0.0523, "step": 34557 }, { "epoch": 0.6120061806284229, "grad_norm": 0.7255017161369324, "learning_rate": 1.0364090484969426e-05, "loss": 0.0461, "step": 34558 }, { "epoch": 0.6120238901654513, "grad_norm": 0.6417862772941589, "learning_rate": 1.0363272249438138e-05, "loss": 0.0688, "step": 34559 }, { "epoch": 0.6120415997024797, "grad_norm": 0.32806307077407837, "learning_rate": 1.0362454029161215e-05, "loss": 0.0625, "step": 34560 }, { "epoch": 0.6120593092395082, "grad_norm": 0.7724703550338745, "learning_rate": 1.0361635824141348e-05, "loss": 0.0801, "step": 34561 }, { "epoch": 0.6120770187765366, "grad_norm": 0.47210103273391724, "learning_rate": 1.0360817634381235e-05, "loss": 0.0444, "step": 34562 }, { "epoch": 0.612094728313565, "grad_norm": 0.43817228078842163, "learning_rate": 1.0359999459883558e-05, "loss": 0.0505, "step": 34563 }, { "epoch": 0.6121124378505934, "grad_norm": 0.6653777360916138, "learning_rate": 1.0359181300651014e-05, "loss": 0.0576, "step": 34564 }, { "epoch": 0.612130147387622, "grad_norm": 0.29728588461875916, "learning_rate": 1.0358363156686302e-05, "loss": 0.0566, "step": 34565 }, { "epoch": 0.6121478569246503, "grad_norm": 0.6797141432762146, "learning_rate": 1.0357545027992096e-05, "loss": 0.0605, "step": 34566 }, { "epoch": 0.6121655664616787, "grad_norm": 0.5032482147216797, "learning_rate": 1.03567269145711e-05, "loss": 0.0635, "step": 34567 }, { "epoch": 0.6121832759987073, "grad_norm": 0.5045117139816284, "learning_rate": 1.0355908816426002e-05, "loss": 0.0485, "step": 34568 }, { "epoch": 0.6122009855357357, "grad_norm": 0.4238109290599823, "learning_rate": 1.0355090733559499e-05, "loss": 0.0588, "step": 34569 }, { "epoch": 0.612218695072764, "grad_norm": 0.6235858798027039, "learning_rate": 1.0354272665974271e-05, "loss": 0.0817, "step": 34570 }, { "epoch": 0.6122364046097925, "grad_norm": 0.1668676882982254, "learning_rate": 1.035345461367302e-05, "loss": 0.068, "step": 34571 }, { "epoch": 0.612254114146821, "grad_norm": 0.6484094858169556, "learning_rate": 1.0352636576658435e-05, "loss": 0.0753, "step": 34572 }, { "epoch": 0.6122718236838494, "grad_norm": 0.5307098627090454, "learning_rate": 1.03518185549332e-05, "loss": 0.067, "step": 34573 }, { "epoch": 0.6122895332208778, "grad_norm": 0.731577455997467, "learning_rate": 1.0351000548500015e-05, "loss": 0.0998, "step": 34574 }, { "epoch": 0.6123072427579062, "grad_norm": 0.5782921314239502, "learning_rate": 1.0350182557361568e-05, "loss": 0.0554, "step": 34575 }, { "epoch": 0.6123249522949347, "grad_norm": 1.0268774032592773, "learning_rate": 1.0349364581520552e-05, "loss": 0.0661, "step": 34576 }, { "epoch": 0.6123426618319631, "grad_norm": 0.23504886031150818, "learning_rate": 1.0348546620979652e-05, "loss": 0.0643, "step": 34577 }, { "epoch": 0.6123603713689915, "grad_norm": 0.612755298614502, "learning_rate": 1.0347728675741565e-05, "loss": 0.0382, "step": 34578 }, { "epoch": 0.6123780809060199, "grad_norm": 0.531147837638855, "learning_rate": 1.0346910745808985e-05, "loss": 0.0544, "step": 34579 }, { "epoch": 0.6123957904430484, "grad_norm": 0.47770023345947266, "learning_rate": 1.0346092831184592e-05, "loss": 0.0785, "step": 34580 }, { "epoch": 0.6124134999800768, "grad_norm": 0.8793540596961975, "learning_rate": 1.0345274931871082e-05, "loss": 0.0736, "step": 34581 }, { "epoch": 0.6124312095171052, "grad_norm": 0.8751111030578613, "learning_rate": 1.034445704787115e-05, "loss": 0.0753, "step": 34582 }, { "epoch": 0.6124489190541337, "grad_norm": 0.6789291501045227, "learning_rate": 1.0343639179187486e-05, "loss": 0.0519, "step": 34583 }, { "epoch": 0.6124666285911621, "grad_norm": 0.65850830078125, "learning_rate": 1.0342821325822771e-05, "loss": 0.0441, "step": 34584 }, { "epoch": 0.6124843381281905, "grad_norm": 0.3722776174545288, "learning_rate": 1.0342003487779705e-05, "loss": 0.0673, "step": 34585 }, { "epoch": 0.6125020476652189, "grad_norm": 0.43620023131370544, "learning_rate": 1.0341185665060979e-05, "loss": 0.0644, "step": 34586 }, { "epoch": 0.6125197572022474, "grad_norm": 0.8243889808654785, "learning_rate": 1.0340367857669279e-05, "loss": 0.0748, "step": 34587 }, { "epoch": 0.6125374667392758, "grad_norm": 0.47553008794784546, "learning_rate": 1.0339550065607298e-05, "loss": 0.0661, "step": 34588 }, { "epoch": 0.6125551762763042, "grad_norm": 0.5918715000152588, "learning_rate": 1.0338732288877726e-05, "loss": 0.0556, "step": 34589 }, { "epoch": 0.6125728858133326, "grad_norm": 0.8027129769325256, "learning_rate": 1.0337914527483259e-05, "loss": 0.0448, "step": 34590 }, { "epoch": 0.6125905953503611, "grad_norm": 0.6304697394371033, "learning_rate": 1.033709678142657e-05, "loss": 0.0684, "step": 34591 }, { "epoch": 0.6126083048873895, "grad_norm": 0.8658968806266785, "learning_rate": 1.0336279050710367e-05, "loss": 0.0733, "step": 34592 }, { "epoch": 0.6126260144244179, "grad_norm": 0.7050118446350098, "learning_rate": 1.0335461335337333e-05, "loss": 0.0693, "step": 34593 }, { "epoch": 0.6126437239614463, "grad_norm": 1.1884273290634155, "learning_rate": 1.0334643635310166e-05, "loss": 0.0518, "step": 34594 }, { "epoch": 0.6126614334984748, "grad_norm": 0.3306611180305481, "learning_rate": 1.0333825950631544e-05, "loss": 0.0498, "step": 34595 }, { "epoch": 0.6126791430355032, "grad_norm": 0.43341416120529175, "learning_rate": 1.0333008281304161e-05, "loss": 0.0445, "step": 34596 }, { "epoch": 0.6126968525725316, "grad_norm": 0.4083501696586609, "learning_rate": 1.0332190627330712e-05, "loss": 0.0509, "step": 34597 }, { "epoch": 0.6127145621095601, "grad_norm": 0.35971754789352417, "learning_rate": 1.033137298871388e-05, "loss": 0.0431, "step": 34598 }, { "epoch": 0.6127322716465885, "grad_norm": 0.5125412344932556, "learning_rate": 1.033055536545636e-05, "loss": 0.0796, "step": 34599 }, { "epoch": 0.6127499811836169, "grad_norm": 0.9443230032920837, "learning_rate": 1.0329737757560842e-05, "loss": 0.0679, "step": 34600 }, { "epoch": 0.6127676907206453, "grad_norm": 0.5108118653297424, "learning_rate": 1.0328920165030016e-05, "loss": 0.0613, "step": 34601 }, { "epoch": 0.6127854002576738, "grad_norm": 0.6349275708198547, "learning_rate": 1.0328102587866567e-05, "loss": 0.0619, "step": 34602 }, { "epoch": 0.6128031097947022, "grad_norm": 0.30655813217163086, "learning_rate": 1.032728502607319e-05, "loss": 0.0543, "step": 34603 }, { "epoch": 0.6128208193317306, "grad_norm": 0.3825224041938782, "learning_rate": 1.0326467479652574e-05, "loss": 0.0694, "step": 34604 }, { "epoch": 0.612838528868759, "grad_norm": 0.3998982012271881, "learning_rate": 1.0325649948607404e-05, "loss": 0.0615, "step": 34605 }, { "epoch": 0.6128562384057875, "grad_norm": 1.1634843349456787, "learning_rate": 1.0324832432940371e-05, "loss": 0.0738, "step": 34606 }, { "epoch": 0.6128739479428159, "grad_norm": 0.5440296530723572, "learning_rate": 1.0324014932654168e-05, "loss": 0.0746, "step": 34607 }, { "epoch": 0.6128916574798443, "grad_norm": 1.1136538982391357, "learning_rate": 1.0323197447751487e-05, "loss": 0.0669, "step": 34608 }, { "epoch": 0.6129093670168727, "grad_norm": 0.7724729180335999, "learning_rate": 1.0322379978235012e-05, "loss": 0.0707, "step": 34609 }, { "epoch": 0.6129270765539012, "grad_norm": 0.750372052192688, "learning_rate": 1.032156252410743e-05, "loss": 0.0566, "step": 34610 }, { "epoch": 0.6129447860909296, "grad_norm": 0.8722894787788391, "learning_rate": 1.032074508537144e-05, "loss": 0.0673, "step": 34611 }, { "epoch": 0.612962495627958, "grad_norm": 0.819909393787384, "learning_rate": 1.0319927662029721e-05, "loss": 0.0723, "step": 34612 }, { "epoch": 0.6129802051649865, "grad_norm": 0.3587557375431061, "learning_rate": 1.0319110254084964e-05, "loss": 0.0572, "step": 34613 }, { "epoch": 0.6129979147020149, "grad_norm": 0.9625821709632874, "learning_rate": 1.0318292861539866e-05, "loss": 0.0843, "step": 34614 }, { "epoch": 0.6130156242390433, "grad_norm": 0.8449137806892395, "learning_rate": 1.031747548439711e-05, "loss": 0.0913, "step": 34615 }, { "epoch": 0.6130333337760717, "grad_norm": 0.6830506920814514, "learning_rate": 1.0316658122659386e-05, "loss": 0.0751, "step": 34616 }, { "epoch": 0.6130510433131002, "grad_norm": 0.6190362572669983, "learning_rate": 1.031584077632938e-05, "loss": 0.0694, "step": 34617 }, { "epoch": 0.6130687528501286, "grad_norm": 0.7770211696624756, "learning_rate": 1.0315023445409792e-05, "loss": 0.0739, "step": 34618 }, { "epoch": 0.613086462387157, "grad_norm": 0.8195469975471497, "learning_rate": 1.0314206129903297e-05, "loss": 0.0649, "step": 34619 }, { "epoch": 0.6131041719241854, "grad_norm": 0.43530014157295227, "learning_rate": 1.0313388829812585e-05, "loss": 0.0591, "step": 34620 }, { "epoch": 0.6131218814612139, "grad_norm": 0.44196245074272156, "learning_rate": 1.0312571545140356e-05, "loss": 0.042, "step": 34621 }, { "epoch": 0.6131395909982423, "grad_norm": 1.1092486381530762, "learning_rate": 1.0311754275889294e-05, "loss": 0.1152, "step": 34622 }, { "epoch": 0.6131573005352707, "grad_norm": 0.5359131693840027, "learning_rate": 1.0310937022062084e-05, "loss": 0.0546, "step": 34623 }, { "epoch": 0.6131750100722991, "grad_norm": 1.006941795349121, "learning_rate": 1.0310119783661417e-05, "loss": 0.0735, "step": 34624 }, { "epoch": 0.6131927196093276, "grad_norm": 0.5718908905982971, "learning_rate": 1.030930256068998e-05, "loss": 0.0724, "step": 34625 }, { "epoch": 0.613210429146356, "grad_norm": 0.6438770890235901, "learning_rate": 1.0308485353150465e-05, "loss": 0.0648, "step": 34626 }, { "epoch": 0.6132281386833844, "grad_norm": 0.5971347093582153, "learning_rate": 1.0307668161045558e-05, "loss": 0.0696, "step": 34627 }, { "epoch": 0.613245848220413, "grad_norm": 0.5245687365531921, "learning_rate": 1.0306850984377947e-05, "loss": 0.0876, "step": 34628 }, { "epoch": 0.6132635577574413, "grad_norm": 0.8184003829956055, "learning_rate": 1.0306033823150324e-05, "loss": 0.0675, "step": 34629 }, { "epoch": 0.6132812672944697, "grad_norm": 0.5979953408241272, "learning_rate": 1.0305216677365375e-05, "loss": 0.0658, "step": 34630 }, { "epoch": 0.6132989768314981, "grad_norm": 0.44548356533050537, "learning_rate": 1.0304399547025786e-05, "loss": 0.0609, "step": 34631 }, { "epoch": 0.6133166863685267, "grad_norm": 0.797425389289856, "learning_rate": 1.0303582432134246e-05, "loss": 0.0707, "step": 34632 }, { "epoch": 0.613334395905555, "grad_norm": 0.4818781316280365, "learning_rate": 1.0302765332693454e-05, "loss": 0.0407, "step": 34633 }, { "epoch": 0.6133521054425835, "grad_norm": 0.7785106301307678, "learning_rate": 1.0301948248706082e-05, "loss": 0.0811, "step": 34634 }, { "epoch": 0.6133698149796118, "grad_norm": 0.48153382539749146, "learning_rate": 1.0301131180174821e-05, "loss": 0.063, "step": 34635 }, { "epoch": 0.6133875245166404, "grad_norm": 0.4014893174171448, "learning_rate": 1.0300314127102371e-05, "loss": 0.0615, "step": 34636 }, { "epoch": 0.6134052340536688, "grad_norm": 0.40947574377059937, "learning_rate": 1.0299497089491408e-05, "loss": 0.0724, "step": 34637 }, { "epoch": 0.6134229435906972, "grad_norm": 0.4810069799423218, "learning_rate": 1.0298680067344624e-05, "loss": 0.0528, "step": 34638 }, { "epoch": 0.6134406531277256, "grad_norm": 0.3775233328342438, "learning_rate": 1.0297863060664707e-05, "loss": 0.0888, "step": 34639 }, { "epoch": 0.6134583626647541, "grad_norm": 0.6425824165344238, "learning_rate": 1.029704606945435e-05, "loss": 0.073, "step": 34640 }, { "epoch": 0.6134760722017825, "grad_norm": 0.7076261043548584, "learning_rate": 1.029622909371623e-05, "loss": 0.1033, "step": 34641 }, { "epoch": 0.6134937817388109, "grad_norm": 0.6702064871788025, "learning_rate": 1.029541213345304e-05, "loss": 0.1124, "step": 34642 }, { "epoch": 0.6135114912758394, "grad_norm": 0.5828850865364075, "learning_rate": 1.0294595188667474e-05, "loss": 0.0814, "step": 34643 }, { "epoch": 0.6135292008128678, "grad_norm": 0.9257165193557739, "learning_rate": 1.0293778259362208e-05, "loss": 0.0976, "step": 34644 }, { "epoch": 0.6135469103498962, "grad_norm": 0.48252072930336, "learning_rate": 1.0292961345539938e-05, "loss": 0.0661, "step": 34645 }, { "epoch": 0.6135646198869246, "grad_norm": 0.5816017389297485, "learning_rate": 1.0292144447203348e-05, "loss": 0.0707, "step": 34646 }, { "epoch": 0.6135823294239531, "grad_norm": 0.3675587475299835, "learning_rate": 1.0291327564355133e-05, "loss": 0.0406, "step": 34647 }, { "epoch": 0.6136000389609815, "grad_norm": 0.8580001592636108, "learning_rate": 1.0290510696997967e-05, "loss": 0.0693, "step": 34648 }, { "epoch": 0.6136177484980099, "grad_norm": 0.5307602882385254, "learning_rate": 1.0289693845134541e-05, "loss": 0.0772, "step": 34649 }, { "epoch": 0.6136354580350383, "grad_norm": 0.8816590905189514, "learning_rate": 1.0288877008767558e-05, "loss": 0.0789, "step": 34650 }, { "epoch": 0.6136531675720668, "grad_norm": 0.6043663024902344, "learning_rate": 1.0288060187899683e-05, "loss": 0.0608, "step": 34651 }, { "epoch": 0.6136708771090952, "grad_norm": 0.29826387763023376, "learning_rate": 1.0287243382533616e-05, "loss": 0.0864, "step": 34652 }, { "epoch": 0.6136885866461236, "grad_norm": 0.5208731889724731, "learning_rate": 1.0286426592672041e-05, "loss": 0.0512, "step": 34653 }, { "epoch": 0.613706296183152, "grad_norm": 0.430389404296875, "learning_rate": 1.028560981831765e-05, "loss": 0.0672, "step": 34654 }, { "epoch": 0.6137240057201805, "grad_norm": 0.48985975980758667, "learning_rate": 1.0284793059473122e-05, "loss": 0.0618, "step": 34655 }, { "epoch": 0.6137417152572089, "grad_norm": 0.5055108070373535, "learning_rate": 1.0283976316141146e-05, "loss": 0.0746, "step": 34656 }, { "epoch": 0.6137594247942373, "grad_norm": 0.4984966814517975, "learning_rate": 1.0283159588324417e-05, "loss": 0.053, "step": 34657 }, { "epoch": 0.6137771343312658, "grad_norm": 0.3285355269908905, "learning_rate": 1.0282342876025612e-05, "loss": 0.0681, "step": 34658 }, { "epoch": 0.6137948438682942, "grad_norm": 0.5853932499885559, "learning_rate": 1.028152617924742e-05, "loss": 0.0736, "step": 34659 }, { "epoch": 0.6138125534053226, "grad_norm": 0.4859016239643097, "learning_rate": 1.0280709497992531e-05, "loss": 0.0796, "step": 34660 }, { "epoch": 0.613830262942351, "grad_norm": 0.9150814414024353, "learning_rate": 1.0279892832263636e-05, "loss": 0.0723, "step": 34661 }, { "epoch": 0.6138479724793795, "grad_norm": 0.5883472561836243, "learning_rate": 1.0279076182063412e-05, "loss": 0.0498, "step": 34662 }, { "epoch": 0.6138656820164079, "grad_norm": 0.6267611980438232, "learning_rate": 1.0278259547394546e-05, "loss": 0.0661, "step": 34663 }, { "epoch": 0.6138833915534363, "grad_norm": 0.7670118808746338, "learning_rate": 1.027744292825973e-05, "loss": 0.0854, "step": 34664 }, { "epoch": 0.6139011010904647, "grad_norm": 0.8633360266685486, "learning_rate": 1.0276626324661656e-05, "loss": 0.0622, "step": 34665 }, { "epoch": 0.6139188106274932, "grad_norm": 0.6083292961120605, "learning_rate": 1.0275809736602997e-05, "loss": 0.0564, "step": 34666 }, { "epoch": 0.6139365201645216, "grad_norm": 0.4890473484992981, "learning_rate": 1.0274993164086447e-05, "loss": 0.0621, "step": 34667 }, { "epoch": 0.61395422970155, "grad_norm": 1.0211986303329468, "learning_rate": 1.0274176607114694e-05, "loss": 0.0618, "step": 34668 }, { "epoch": 0.6139719392385784, "grad_norm": 0.7288692593574524, "learning_rate": 1.027336006569042e-05, "loss": 0.0643, "step": 34669 }, { "epoch": 0.6139896487756069, "grad_norm": 0.7065585255622864, "learning_rate": 1.0272543539816313e-05, "loss": 0.063, "step": 34670 }, { "epoch": 0.6140073583126353, "grad_norm": 0.5187411904335022, "learning_rate": 1.0271727029495059e-05, "loss": 0.0774, "step": 34671 }, { "epoch": 0.6140250678496637, "grad_norm": 0.6912900805473328, "learning_rate": 1.0270910534729346e-05, "loss": 0.0802, "step": 34672 }, { "epoch": 0.6140427773866922, "grad_norm": 0.7098316550254822, "learning_rate": 1.027009405552186e-05, "loss": 0.054, "step": 34673 }, { "epoch": 0.6140604869237206, "grad_norm": 0.2956260144710541, "learning_rate": 1.0269277591875284e-05, "loss": 0.0368, "step": 34674 }, { "epoch": 0.614078196460749, "grad_norm": 0.45068156719207764, "learning_rate": 1.0268461143792311e-05, "loss": 0.0811, "step": 34675 }, { "epoch": 0.6140959059977774, "grad_norm": 0.9180080890655518, "learning_rate": 1.0267644711275618e-05, "loss": 0.0688, "step": 34676 }, { "epoch": 0.6141136155348059, "grad_norm": 0.8673187494277954, "learning_rate": 1.0266828294327896e-05, "loss": 0.0868, "step": 34677 }, { "epoch": 0.6141313250718343, "grad_norm": 0.6059946417808533, "learning_rate": 1.0266011892951826e-05, "loss": 0.0477, "step": 34678 }, { "epoch": 0.6141490346088627, "grad_norm": 0.9345081448554993, "learning_rate": 1.0265195507150107e-05, "loss": 0.097, "step": 34679 }, { "epoch": 0.6141667441458911, "grad_norm": 0.4821797311306, "learning_rate": 1.026437913692541e-05, "loss": 0.0508, "step": 34680 }, { "epoch": 0.6141844536829196, "grad_norm": 0.44539183378219604, "learning_rate": 1.0263562782280427e-05, "loss": 0.0645, "step": 34681 }, { "epoch": 0.614202163219948, "grad_norm": 0.4982277452945709, "learning_rate": 1.0262746443217845e-05, "loss": 0.0475, "step": 34682 }, { "epoch": 0.6142198727569764, "grad_norm": 0.781915545463562, "learning_rate": 1.0261930119740346e-05, "loss": 0.043, "step": 34683 }, { "epoch": 0.6142375822940048, "grad_norm": 0.42929545044898987, "learning_rate": 1.026111381185062e-05, "loss": 0.0433, "step": 34684 }, { "epoch": 0.6142552918310333, "grad_norm": 0.4820345342159271, "learning_rate": 1.0260297519551349e-05, "loss": 0.0693, "step": 34685 }, { "epoch": 0.6142730013680617, "grad_norm": 0.6843753457069397, "learning_rate": 1.0259481242845224e-05, "loss": 0.0948, "step": 34686 }, { "epoch": 0.6142907109050901, "grad_norm": 0.6448172330856323, "learning_rate": 1.0258664981734917e-05, "loss": 0.0999, "step": 34687 }, { "epoch": 0.6143084204421186, "grad_norm": 0.8728513717651367, "learning_rate": 1.0257848736223126e-05, "loss": 0.1085, "step": 34688 }, { "epoch": 0.614326129979147, "grad_norm": 0.7030624151229858, "learning_rate": 1.0257032506312541e-05, "loss": 0.0519, "step": 34689 }, { "epoch": 0.6143438395161754, "grad_norm": 0.284485399723053, "learning_rate": 1.0256216292005832e-05, "loss": 0.0742, "step": 34690 }, { "epoch": 0.6143615490532038, "grad_norm": 0.654167652130127, "learning_rate": 1.0255400093305692e-05, "loss": 0.0742, "step": 34691 }, { "epoch": 0.6143792585902323, "grad_norm": 1.1031795740127563, "learning_rate": 1.0254583910214807e-05, "loss": 0.0719, "step": 34692 }, { "epoch": 0.6143969681272607, "grad_norm": 0.6878606081008911, "learning_rate": 1.0253767742735862e-05, "loss": 0.0946, "step": 34693 }, { "epoch": 0.6144146776642891, "grad_norm": 0.7969387769699097, "learning_rate": 1.0252951590871538e-05, "loss": 0.0869, "step": 34694 }, { "epoch": 0.6144323872013175, "grad_norm": 0.4493722915649414, "learning_rate": 1.0252135454624523e-05, "loss": 0.0359, "step": 34695 }, { "epoch": 0.614450096738346, "grad_norm": 0.5355194211006165, "learning_rate": 1.0251319333997503e-05, "loss": 0.063, "step": 34696 }, { "epoch": 0.6144678062753745, "grad_norm": 0.3709430694580078, "learning_rate": 1.0250503228993166e-05, "loss": 0.0536, "step": 34697 }, { "epoch": 0.6144855158124028, "grad_norm": 0.6719488501548767, "learning_rate": 1.0249687139614188e-05, "loss": 0.0822, "step": 34698 }, { "epoch": 0.6145032253494312, "grad_norm": 0.6447195410728455, "learning_rate": 1.024887106586326e-05, "loss": 0.0671, "step": 34699 }, { "epoch": 0.6145209348864598, "grad_norm": 0.6695923805236816, "learning_rate": 1.0248055007743072e-05, "loss": 0.07, "step": 34700 }, { "epoch": 0.6145386444234882, "grad_norm": 1.022447943687439, "learning_rate": 1.0247238965256293e-05, "loss": 0.0862, "step": 34701 }, { "epoch": 0.6145563539605166, "grad_norm": 0.6947582960128784, "learning_rate": 1.0246422938405618e-05, "loss": 0.0733, "step": 34702 }, { "epoch": 0.6145740634975451, "grad_norm": 0.38853806257247925, "learning_rate": 1.0245606927193733e-05, "loss": 0.0536, "step": 34703 }, { "epoch": 0.6145917730345735, "grad_norm": 0.28236308693885803, "learning_rate": 1.0244790931623327e-05, "loss": 0.0645, "step": 34704 }, { "epoch": 0.6146094825716019, "grad_norm": 0.6118834018707275, "learning_rate": 1.024397495169707e-05, "loss": 0.0867, "step": 34705 }, { "epoch": 0.6146271921086303, "grad_norm": 0.42953935265541077, "learning_rate": 1.0243158987417656e-05, "loss": 0.0451, "step": 34706 }, { "epoch": 0.6146449016456588, "grad_norm": 0.5873019099235535, "learning_rate": 1.0242343038787771e-05, "loss": 0.0554, "step": 34707 }, { "epoch": 0.6146626111826872, "grad_norm": 0.43936628103256226, "learning_rate": 1.0241527105810091e-05, "loss": 0.064, "step": 34708 }, { "epoch": 0.6146803207197156, "grad_norm": 0.6315122842788696, "learning_rate": 1.0240711188487306e-05, "loss": 0.0624, "step": 34709 }, { "epoch": 0.614698030256744, "grad_norm": 0.5120981931686401, "learning_rate": 1.0239895286822103e-05, "loss": 0.0893, "step": 34710 }, { "epoch": 0.6147157397937725, "grad_norm": 0.4334065914154053, "learning_rate": 1.0239079400817162e-05, "loss": 0.0483, "step": 34711 }, { "epoch": 0.6147334493308009, "grad_norm": 0.7704384326934814, "learning_rate": 1.0238263530475166e-05, "loss": 0.0743, "step": 34712 }, { "epoch": 0.6147511588678293, "grad_norm": 0.4840973913669586, "learning_rate": 1.0237447675798803e-05, "loss": 0.0634, "step": 34713 }, { "epoch": 0.6147688684048577, "grad_norm": 0.49998927116394043, "learning_rate": 1.0236631836790761e-05, "loss": 0.059, "step": 34714 }, { "epoch": 0.6147865779418862, "grad_norm": 0.8448522686958313, "learning_rate": 1.0235816013453714e-05, "loss": 0.0849, "step": 34715 }, { "epoch": 0.6148042874789146, "grad_norm": 0.5755583047866821, "learning_rate": 1.0235000205790344e-05, "loss": 0.0769, "step": 34716 }, { "epoch": 0.614821997015943, "grad_norm": 0.646946370601654, "learning_rate": 1.0234184413803348e-05, "loss": 0.0649, "step": 34717 }, { "epoch": 0.6148397065529715, "grad_norm": 0.5057949423789978, "learning_rate": 1.0233368637495406e-05, "loss": 0.0577, "step": 34718 }, { "epoch": 0.6148574160899999, "grad_norm": 0.5295383334159851, "learning_rate": 1.0232552876869195e-05, "loss": 0.0592, "step": 34719 }, { "epoch": 0.6148751256270283, "grad_norm": 0.5350314378738403, "learning_rate": 1.02317371319274e-05, "loss": 0.0566, "step": 34720 }, { "epoch": 0.6148928351640567, "grad_norm": 0.6809912919998169, "learning_rate": 1.0230921402672712e-05, "loss": 0.0783, "step": 34721 }, { "epoch": 0.6149105447010852, "grad_norm": 0.7495014071464539, "learning_rate": 1.0230105689107809e-05, "loss": 0.0602, "step": 34722 }, { "epoch": 0.6149282542381136, "grad_norm": 0.7678201794624329, "learning_rate": 1.0229289991235374e-05, "loss": 0.0819, "step": 34723 }, { "epoch": 0.614945963775142, "grad_norm": 0.2895975410938263, "learning_rate": 1.0228474309058093e-05, "loss": 0.0448, "step": 34724 }, { "epoch": 0.6149636733121704, "grad_norm": 0.6038117408752441, "learning_rate": 1.0227658642578654e-05, "loss": 0.0596, "step": 34725 }, { "epoch": 0.6149813828491989, "grad_norm": 0.673709511756897, "learning_rate": 1.022684299179973e-05, "loss": 0.0645, "step": 34726 }, { "epoch": 0.6149990923862273, "grad_norm": 0.7931719422340393, "learning_rate": 1.022602735672401e-05, "loss": 0.0557, "step": 34727 }, { "epoch": 0.6150168019232557, "grad_norm": 0.43317437171936035, "learning_rate": 1.0225211737354176e-05, "loss": 0.0536, "step": 34728 }, { "epoch": 0.6150345114602841, "grad_norm": 0.5923629999160767, "learning_rate": 1.0224396133692921e-05, "loss": 0.0544, "step": 34729 }, { "epoch": 0.6150522209973126, "grad_norm": 0.40785565972328186, "learning_rate": 1.022358054574291e-05, "loss": 0.0653, "step": 34730 }, { "epoch": 0.615069930534341, "grad_norm": 1.0020554065704346, "learning_rate": 1.0222764973506838e-05, "loss": 0.0609, "step": 34731 }, { "epoch": 0.6150876400713694, "grad_norm": 0.5867511034011841, "learning_rate": 1.0221949416987391e-05, "loss": 0.0693, "step": 34732 }, { "epoch": 0.6151053496083979, "grad_norm": 0.41254693269729614, "learning_rate": 1.0221133876187243e-05, "loss": 0.0583, "step": 34733 }, { "epoch": 0.6151230591454263, "grad_norm": 0.805416464805603, "learning_rate": 1.0220318351109082e-05, "loss": 0.0529, "step": 34734 }, { "epoch": 0.6151407686824547, "grad_norm": 0.5525622367858887, "learning_rate": 1.0219502841755588e-05, "loss": 0.0647, "step": 34735 }, { "epoch": 0.6151584782194831, "grad_norm": 0.646336555480957, "learning_rate": 1.021868734812945e-05, "loss": 0.0552, "step": 34736 }, { "epoch": 0.6151761877565116, "grad_norm": 0.5012755393981934, "learning_rate": 1.0217871870233344e-05, "loss": 0.046, "step": 34737 }, { "epoch": 0.61519389729354, "grad_norm": 0.41557714343070984, "learning_rate": 1.0217056408069956e-05, "loss": 0.0302, "step": 34738 }, { "epoch": 0.6152116068305684, "grad_norm": 0.9817965030670166, "learning_rate": 1.0216240961641972e-05, "loss": 0.0915, "step": 34739 }, { "epoch": 0.6152293163675968, "grad_norm": 0.5690720677375793, "learning_rate": 1.0215425530952068e-05, "loss": 0.0565, "step": 34740 }, { "epoch": 0.6152470259046253, "grad_norm": 1.1411091089248657, "learning_rate": 1.021461011600293e-05, "loss": 0.0785, "step": 34741 }, { "epoch": 0.6152647354416537, "grad_norm": 0.45451828837394714, "learning_rate": 1.0213794716797241e-05, "loss": 0.0457, "step": 34742 }, { "epoch": 0.6152824449786821, "grad_norm": 0.8751809000968933, "learning_rate": 1.021297933333769e-05, "loss": 0.083, "step": 34743 }, { "epoch": 0.6153001545157105, "grad_norm": 0.9237696528434753, "learning_rate": 1.0212163965626944e-05, "loss": 0.0629, "step": 34744 }, { "epoch": 0.615317864052739, "grad_norm": 0.6884348392486572, "learning_rate": 1.0211348613667694e-05, "loss": 0.0605, "step": 34745 }, { "epoch": 0.6153355735897674, "grad_norm": 0.5038574934005737, "learning_rate": 1.021053327746263e-05, "loss": 0.0464, "step": 34746 }, { "epoch": 0.6153532831267958, "grad_norm": 0.5971452593803406, "learning_rate": 1.0209717957014424e-05, "loss": 0.0702, "step": 34747 }, { "epoch": 0.6153709926638243, "grad_norm": 1.2233465909957886, "learning_rate": 1.0208902652325758e-05, "loss": 0.0777, "step": 34748 }, { "epoch": 0.6153887022008527, "grad_norm": 0.6369587779045105, "learning_rate": 1.020808736339932e-05, "loss": 0.0627, "step": 34749 }, { "epoch": 0.6154064117378811, "grad_norm": 0.39594557881355286, "learning_rate": 1.0207272090237792e-05, "loss": 0.044, "step": 34750 }, { "epoch": 0.6154241212749095, "grad_norm": 0.47218069434165955, "learning_rate": 1.020645683284385e-05, "loss": 0.0684, "step": 34751 }, { "epoch": 0.615441830811938, "grad_norm": 0.7529397010803223, "learning_rate": 1.0205641591220183e-05, "loss": 0.0574, "step": 34752 }, { "epoch": 0.6154595403489664, "grad_norm": 0.5394012331962585, "learning_rate": 1.020482636536947e-05, "loss": 0.0514, "step": 34753 }, { "epoch": 0.6154772498859948, "grad_norm": 0.4217313230037689, "learning_rate": 1.0204011155294391e-05, "loss": 0.0373, "step": 34754 }, { "epoch": 0.6154949594230232, "grad_norm": 0.4137438237667084, "learning_rate": 1.0203195960997631e-05, "loss": 0.0455, "step": 34755 }, { "epoch": 0.6155126689600517, "grad_norm": 0.5782241225242615, "learning_rate": 1.0202380782481872e-05, "loss": 0.0604, "step": 34756 }, { "epoch": 0.6155303784970801, "grad_norm": 1.0548738241195679, "learning_rate": 1.02015656197498e-05, "loss": 0.0796, "step": 34757 }, { "epoch": 0.6155480880341085, "grad_norm": 0.4415448307991028, "learning_rate": 1.0200750472804086e-05, "loss": 0.0903, "step": 34758 }, { "epoch": 0.6155657975711369, "grad_norm": 0.5429895520210266, "learning_rate": 1.0199935341647413e-05, "loss": 0.0387, "step": 34759 }, { "epoch": 0.6155835071081655, "grad_norm": 0.7685734629631042, "learning_rate": 1.0199120226282478e-05, "loss": 0.0634, "step": 34760 }, { "epoch": 0.6156012166451938, "grad_norm": 0.6836918592453003, "learning_rate": 1.0198305126711947e-05, "loss": 0.0553, "step": 34761 }, { "epoch": 0.6156189261822222, "grad_norm": 0.6280678510665894, "learning_rate": 1.0197490042938503e-05, "loss": 0.0622, "step": 34762 }, { "epoch": 0.6156366357192508, "grad_norm": 0.6897939443588257, "learning_rate": 1.0196674974964833e-05, "loss": 0.0745, "step": 34763 }, { "epoch": 0.6156543452562792, "grad_norm": 0.8350005149841309, "learning_rate": 1.019585992279362e-05, "loss": 0.0651, "step": 34764 }, { "epoch": 0.6156720547933076, "grad_norm": 0.5159542560577393, "learning_rate": 1.0195044886427538e-05, "loss": 0.0459, "step": 34765 }, { "epoch": 0.615689764330336, "grad_norm": 0.672460675239563, "learning_rate": 1.0194229865869273e-05, "loss": 0.0229, "step": 34766 }, { "epoch": 0.6157074738673645, "grad_norm": 0.2517254650592804, "learning_rate": 1.0193414861121505e-05, "loss": 0.0597, "step": 34767 }, { "epoch": 0.6157251834043929, "grad_norm": 0.4254629909992218, "learning_rate": 1.019259987218692e-05, "loss": 0.0646, "step": 34768 }, { "epoch": 0.6157428929414213, "grad_norm": 0.6217920184135437, "learning_rate": 1.0191784899068193e-05, "loss": 0.0688, "step": 34769 }, { "epoch": 0.6157606024784497, "grad_norm": 0.5977789759635925, "learning_rate": 1.0190969941768005e-05, "loss": 0.0704, "step": 34770 }, { "epoch": 0.6157783120154782, "grad_norm": 0.46789461374282837, "learning_rate": 1.0190155000289048e-05, "loss": 0.0652, "step": 34771 }, { "epoch": 0.6157960215525066, "grad_norm": 0.5550220012664795, "learning_rate": 1.0189340074633986e-05, "loss": 0.0569, "step": 34772 }, { "epoch": 0.615813731089535, "grad_norm": 0.5945661067962646, "learning_rate": 1.018852516480551e-05, "loss": 0.0679, "step": 34773 }, { "epoch": 0.6158314406265635, "grad_norm": 0.4374566078186035, "learning_rate": 1.0187710270806294e-05, "loss": 0.0532, "step": 34774 }, { "epoch": 0.6158491501635919, "grad_norm": 0.677715539932251, "learning_rate": 1.0186895392639038e-05, "loss": 0.0411, "step": 34775 }, { "epoch": 0.6158668597006203, "grad_norm": 0.3886808454990387, "learning_rate": 1.0186080530306399e-05, "loss": 0.0544, "step": 34776 }, { "epoch": 0.6158845692376487, "grad_norm": 0.5436251163482666, "learning_rate": 1.018526568381107e-05, "loss": 0.0724, "step": 34777 }, { "epoch": 0.6159022787746772, "grad_norm": 0.4415634274482727, "learning_rate": 1.0184450853155734e-05, "loss": 0.0384, "step": 34778 }, { "epoch": 0.6159199883117056, "grad_norm": 0.6579908132553101, "learning_rate": 1.0183636038343063e-05, "loss": 0.0539, "step": 34779 }, { "epoch": 0.615937697848734, "grad_norm": 0.5822509527206421, "learning_rate": 1.0182821239375744e-05, "loss": 0.0484, "step": 34780 }, { "epoch": 0.6159554073857624, "grad_norm": 0.7588368058204651, "learning_rate": 1.0182006456256453e-05, "loss": 0.0873, "step": 34781 }, { "epoch": 0.6159731169227909, "grad_norm": 0.5185957551002502, "learning_rate": 1.0181191688987879e-05, "loss": 0.0567, "step": 34782 }, { "epoch": 0.6159908264598193, "grad_norm": 0.5596481561660767, "learning_rate": 1.0180376937572693e-05, "loss": 0.0562, "step": 34783 }, { "epoch": 0.6160085359968477, "grad_norm": 0.810023844242096, "learning_rate": 1.017956220201358e-05, "loss": 0.0487, "step": 34784 }, { "epoch": 0.6160262455338761, "grad_norm": 0.3350954055786133, "learning_rate": 1.0178747482313225e-05, "loss": 0.0784, "step": 34785 }, { "epoch": 0.6160439550709046, "grad_norm": 0.4125559329986572, "learning_rate": 1.0177932778474297e-05, "loss": 0.0482, "step": 34786 }, { "epoch": 0.616061664607933, "grad_norm": 0.5543525815010071, "learning_rate": 1.0177118090499484e-05, "loss": 0.0464, "step": 34787 }, { "epoch": 0.6160793741449614, "grad_norm": 0.6890394687652588, "learning_rate": 1.0176303418391461e-05, "loss": 0.0746, "step": 34788 }, { "epoch": 0.6160970836819899, "grad_norm": 0.5947917699813843, "learning_rate": 1.0175488762152919e-05, "loss": 0.0564, "step": 34789 }, { "epoch": 0.6161147932190183, "grad_norm": 0.6236432194709778, "learning_rate": 1.0174674121786527e-05, "loss": 0.064, "step": 34790 }, { "epoch": 0.6161325027560467, "grad_norm": 0.3234001398086548, "learning_rate": 1.0173859497294968e-05, "loss": 0.0418, "step": 34791 }, { "epoch": 0.6161502122930751, "grad_norm": 0.6507223844528198, "learning_rate": 1.0173044888680928e-05, "loss": 0.0786, "step": 34792 }, { "epoch": 0.6161679218301036, "grad_norm": 0.5634611248970032, "learning_rate": 1.0172230295947079e-05, "loss": 0.0573, "step": 34793 }, { "epoch": 0.616185631367132, "grad_norm": 0.45605623722076416, "learning_rate": 1.01714157190961e-05, "loss": 0.0443, "step": 34794 }, { "epoch": 0.6162033409041604, "grad_norm": 0.38811203837394714, "learning_rate": 1.0170601158130678e-05, "loss": 0.0577, "step": 34795 }, { "epoch": 0.6162210504411888, "grad_norm": 0.6785687208175659, "learning_rate": 1.0169786613053493e-05, "loss": 0.0626, "step": 34796 }, { "epoch": 0.6162387599782173, "grad_norm": 0.49861404299736023, "learning_rate": 1.0168972083867218e-05, "loss": 0.0492, "step": 34797 }, { "epoch": 0.6162564695152457, "grad_norm": 0.43279242515563965, "learning_rate": 1.0168157570574537e-05, "loss": 0.0655, "step": 34798 }, { "epoch": 0.6162741790522741, "grad_norm": 0.46507498621940613, "learning_rate": 1.0167343073178127e-05, "loss": 0.0502, "step": 34799 }, { "epoch": 0.6162918885893025, "grad_norm": 0.7694911360740662, "learning_rate": 1.0166528591680676e-05, "loss": 0.0895, "step": 34800 }, { "epoch": 0.616309598126331, "grad_norm": 0.47561174631118774, "learning_rate": 1.0165714126084854e-05, "loss": 0.0606, "step": 34801 }, { "epoch": 0.6163273076633594, "grad_norm": 0.8812107443809509, "learning_rate": 1.0164899676393341e-05, "loss": 0.0863, "step": 34802 }, { "epoch": 0.6163450172003878, "grad_norm": 0.5288441777229309, "learning_rate": 1.0164085242608822e-05, "loss": 0.0715, "step": 34803 }, { "epoch": 0.6163627267374163, "grad_norm": 0.2800109088420868, "learning_rate": 1.0163270824733969e-05, "loss": 0.0777, "step": 34804 }, { "epoch": 0.6163804362744447, "grad_norm": 0.4605681002140045, "learning_rate": 1.0162456422771469e-05, "loss": 0.0811, "step": 34805 }, { "epoch": 0.6163981458114731, "grad_norm": 0.6180317997932434, "learning_rate": 1.0161642036723995e-05, "loss": 0.0857, "step": 34806 }, { "epoch": 0.6164158553485015, "grad_norm": 0.5843349695205688, "learning_rate": 1.0160827666594235e-05, "loss": 0.0478, "step": 34807 }, { "epoch": 0.61643356488553, "grad_norm": 0.6977852582931519, "learning_rate": 1.0160013312384858e-05, "loss": 0.0593, "step": 34808 }, { "epoch": 0.6164512744225584, "grad_norm": 0.6366837024688721, "learning_rate": 1.0159198974098549e-05, "loss": 0.0601, "step": 34809 }, { "epoch": 0.6164689839595868, "grad_norm": 0.4122186601161957, "learning_rate": 1.0158384651737989e-05, "loss": 0.0939, "step": 34810 }, { "epoch": 0.6164866934966152, "grad_norm": 0.5247741341590881, "learning_rate": 1.0157570345305848e-05, "loss": 0.0785, "step": 34811 }, { "epoch": 0.6165044030336437, "grad_norm": 0.5044998526573181, "learning_rate": 1.015675605480481e-05, "loss": 0.0531, "step": 34812 }, { "epoch": 0.6165221125706721, "grad_norm": 0.9763932824134827, "learning_rate": 1.0155941780237558e-05, "loss": 0.0638, "step": 34813 }, { "epoch": 0.6165398221077005, "grad_norm": 0.5118047595024109, "learning_rate": 1.0155127521606773e-05, "loss": 0.0463, "step": 34814 }, { "epoch": 0.6165575316447289, "grad_norm": 0.710181474685669, "learning_rate": 1.0154313278915123e-05, "loss": 0.0824, "step": 34815 }, { "epoch": 0.6165752411817574, "grad_norm": 0.41047897934913635, "learning_rate": 1.015349905216529e-05, "loss": 0.0478, "step": 34816 }, { "epoch": 0.6165929507187858, "grad_norm": 0.5509020090103149, "learning_rate": 1.0152684841359959e-05, "loss": 0.0614, "step": 34817 }, { "epoch": 0.6166106602558142, "grad_norm": 0.5855426788330078, "learning_rate": 1.0151870646501802e-05, "loss": 0.055, "step": 34818 }, { "epoch": 0.6166283697928427, "grad_norm": 0.6336985230445862, "learning_rate": 1.0151056467593501e-05, "loss": 0.0698, "step": 34819 }, { "epoch": 0.6166460793298711, "grad_norm": 0.6777747869491577, "learning_rate": 1.0150242304637732e-05, "loss": 0.0585, "step": 34820 }, { "epoch": 0.6166637888668995, "grad_norm": 0.48068365454673767, "learning_rate": 1.014942815763718e-05, "loss": 0.0946, "step": 34821 }, { "epoch": 0.6166814984039279, "grad_norm": 0.6063886284828186, "learning_rate": 1.0148614026594515e-05, "loss": 0.052, "step": 34822 }, { "epoch": 0.6166992079409565, "grad_norm": 0.7231523990631104, "learning_rate": 1.0147799911512418e-05, "loss": 0.0584, "step": 34823 }, { "epoch": 0.6167169174779848, "grad_norm": 0.33417966961860657, "learning_rate": 1.0146985812393576e-05, "loss": 0.0646, "step": 34824 }, { "epoch": 0.6167346270150132, "grad_norm": 0.6413604617118835, "learning_rate": 1.014617172924065e-05, "loss": 0.0537, "step": 34825 }, { "epoch": 0.6167523365520416, "grad_norm": 1.0094093084335327, "learning_rate": 1.014535766205633e-05, "loss": 0.078, "step": 34826 }, { "epoch": 0.6167700460890702, "grad_norm": 0.6221015453338623, "learning_rate": 1.0144543610843295e-05, "loss": 0.1014, "step": 34827 }, { "epoch": 0.6167877556260986, "grad_norm": 0.58876633644104, "learning_rate": 1.0143729575604226e-05, "loss": 0.0729, "step": 34828 }, { "epoch": 0.616805465163127, "grad_norm": 0.6414422392845154, "learning_rate": 1.0142915556341789e-05, "loss": 0.0666, "step": 34829 }, { "epoch": 0.6168231747001554, "grad_norm": 0.30527836084365845, "learning_rate": 1.0142101553058669e-05, "loss": 0.0611, "step": 34830 }, { "epoch": 0.6168408842371839, "grad_norm": 0.8514625430107117, "learning_rate": 1.0141287565757544e-05, "loss": 0.0353, "step": 34831 }, { "epoch": 0.6168585937742123, "grad_norm": 0.7519596815109253, "learning_rate": 1.014047359444109e-05, "loss": 0.0813, "step": 34832 }, { "epoch": 0.6168763033112407, "grad_norm": 0.4436912536621094, "learning_rate": 1.0139659639111985e-05, "loss": 0.0441, "step": 34833 }, { "epoch": 0.6168940128482692, "grad_norm": 0.46578556299209595, "learning_rate": 1.013884569977291e-05, "loss": 0.0434, "step": 34834 }, { "epoch": 0.6169117223852976, "grad_norm": 0.722507119178772, "learning_rate": 1.0138031776426546e-05, "loss": 0.0647, "step": 34835 }, { "epoch": 0.616929431922326, "grad_norm": 0.5411722660064697, "learning_rate": 1.013721786907556e-05, "loss": 0.0672, "step": 34836 }, { "epoch": 0.6169471414593544, "grad_norm": 0.9525827169418335, "learning_rate": 1.0136403977722635e-05, "loss": 0.0857, "step": 34837 }, { "epoch": 0.6169648509963829, "grad_norm": 0.6657924056053162, "learning_rate": 1.0135590102370451e-05, "loss": 0.0504, "step": 34838 }, { "epoch": 0.6169825605334113, "grad_norm": 0.80262690782547, "learning_rate": 1.0134776243021687e-05, "loss": 0.0642, "step": 34839 }, { "epoch": 0.6170002700704397, "grad_norm": 0.6783338785171509, "learning_rate": 1.0133962399679009e-05, "loss": 0.0777, "step": 34840 }, { "epoch": 0.6170179796074681, "grad_norm": 0.26555752754211426, "learning_rate": 1.0133148572345107e-05, "loss": 0.0651, "step": 34841 }, { "epoch": 0.6170356891444966, "grad_norm": 0.4584875702857971, "learning_rate": 1.013233476102266e-05, "loss": 0.0632, "step": 34842 }, { "epoch": 0.617053398681525, "grad_norm": 0.8048606514930725, "learning_rate": 1.0131520965714331e-05, "loss": 0.0713, "step": 34843 }, { "epoch": 0.6170711082185534, "grad_norm": 0.780223548412323, "learning_rate": 1.0130707186422807e-05, "loss": 0.0756, "step": 34844 }, { "epoch": 0.6170888177555818, "grad_norm": 0.44036075472831726, "learning_rate": 1.0129893423150764e-05, "loss": 0.0486, "step": 34845 }, { "epoch": 0.6171065272926103, "grad_norm": 0.5001811981201172, "learning_rate": 1.012907967590088e-05, "loss": 0.0487, "step": 34846 }, { "epoch": 0.6171242368296387, "grad_norm": 0.4547058045864105, "learning_rate": 1.0128265944675833e-05, "loss": 0.0446, "step": 34847 }, { "epoch": 0.6171419463666671, "grad_norm": 0.6758930087089539, "learning_rate": 1.0127452229478293e-05, "loss": 0.0594, "step": 34848 }, { "epoch": 0.6171596559036956, "grad_norm": 0.24732635915279388, "learning_rate": 1.0126638530310948e-05, "loss": 0.0963, "step": 34849 }, { "epoch": 0.617177365440724, "grad_norm": 0.4555678069591522, "learning_rate": 1.0125824847176466e-05, "loss": 0.0657, "step": 34850 }, { "epoch": 0.6171950749777524, "grad_norm": 0.5897741317749023, "learning_rate": 1.0125011180077528e-05, "loss": 0.0786, "step": 34851 }, { "epoch": 0.6172127845147808, "grad_norm": 0.35025927424430847, "learning_rate": 1.0124197529016808e-05, "loss": 0.036, "step": 34852 }, { "epoch": 0.6172304940518093, "grad_norm": 0.7467545866966248, "learning_rate": 1.0123383893996993e-05, "loss": 0.0469, "step": 34853 }, { "epoch": 0.6172482035888377, "grad_norm": 0.658046305179596, "learning_rate": 1.012257027502074e-05, "loss": 0.0804, "step": 34854 }, { "epoch": 0.6172659131258661, "grad_norm": 0.6356505751609802, "learning_rate": 1.0121756672090743e-05, "loss": 0.0518, "step": 34855 }, { "epoch": 0.6172836226628945, "grad_norm": 0.8543242812156677, "learning_rate": 1.012094308520968e-05, "loss": 0.0756, "step": 34856 }, { "epoch": 0.617301332199923, "grad_norm": 0.46561452746391296, "learning_rate": 1.0120129514380213e-05, "loss": 0.1044, "step": 34857 }, { "epoch": 0.6173190417369514, "grad_norm": 0.6469986438751221, "learning_rate": 1.0119315959605028e-05, "loss": 0.0588, "step": 34858 }, { "epoch": 0.6173367512739798, "grad_norm": 0.7151246070861816, "learning_rate": 1.0118502420886796e-05, "loss": 0.072, "step": 34859 }, { "epoch": 0.6173544608110082, "grad_norm": 1.3719842433929443, "learning_rate": 1.0117688898228205e-05, "loss": 0.0852, "step": 34860 }, { "epoch": 0.6173721703480367, "grad_norm": 0.8211709856987, "learning_rate": 1.0116875391631919e-05, "loss": 0.0732, "step": 34861 }, { "epoch": 0.6173898798850651, "grad_norm": 0.34832414984703064, "learning_rate": 1.0116061901100617e-05, "loss": 0.0388, "step": 34862 }, { "epoch": 0.6174075894220935, "grad_norm": 0.2419625073671341, "learning_rate": 1.0115248426636983e-05, "loss": 0.0376, "step": 34863 }, { "epoch": 0.617425298959122, "grad_norm": 0.6336714029312134, "learning_rate": 1.0114434968243683e-05, "loss": 0.0799, "step": 34864 }, { "epoch": 0.6174430084961504, "grad_norm": 0.5377244353294373, "learning_rate": 1.0113621525923399e-05, "loss": 0.0408, "step": 34865 }, { "epoch": 0.6174607180331788, "grad_norm": 0.6097860932350159, "learning_rate": 1.0112808099678806e-05, "loss": 0.0722, "step": 34866 }, { "epoch": 0.6174784275702072, "grad_norm": 0.4690190255641937, "learning_rate": 1.0111994689512584e-05, "loss": 0.059, "step": 34867 }, { "epoch": 0.6174961371072357, "grad_norm": 0.6008411049842834, "learning_rate": 1.01111812954274e-05, "loss": 0.0736, "step": 34868 }, { "epoch": 0.6175138466442641, "grad_norm": 0.8712428212165833, "learning_rate": 1.0110367917425932e-05, "loss": 0.0714, "step": 34869 }, { "epoch": 0.6175315561812925, "grad_norm": 1.077836036682129, "learning_rate": 1.0109554555510863e-05, "loss": 0.1177, "step": 34870 }, { "epoch": 0.6175492657183209, "grad_norm": 0.9155020117759705, "learning_rate": 1.0108741209684872e-05, "loss": 0.0693, "step": 34871 }, { "epoch": 0.6175669752553494, "grad_norm": 0.8212645053863525, "learning_rate": 1.010792787995062e-05, "loss": 0.0736, "step": 34872 }, { "epoch": 0.6175846847923778, "grad_norm": 0.5989539623260498, "learning_rate": 1.0107114566310789e-05, "loss": 0.057, "step": 34873 }, { "epoch": 0.6176023943294062, "grad_norm": 0.5567363500595093, "learning_rate": 1.0106301268768062e-05, "loss": 0.0476, "step": 34874 }, { "epoch": 0.6176201038664346, "grad_norm": 0.5456699132919312, "learning_rate": 1.0105487987325105e-05, "loss": 0.0399, "step": 34875 }, { "epoch": 0.6176378134034631, "grad_norm": 0.4482773542404175, "learning_rate": 1.0104674721984596e-05, "loss": 0.0336, "step": 34876 }, { "epoch": 0.6176555229404915, "grad_norm": 0.4487209618091583, "learning_rate": 1.0103861472749213e-05, "loss": 0.0524, "step": 34877 }, { "epoch": 0.6176732324775199, "grad_norm": 0.45780274271965027, "learning_rate": 1.0103048239621634e-05, "loss": 0.0633, "step": 34878 }, { "epoch": 0.6176909420145484, "grad_norm": 0.3798298239707947, "learning_rate": 1.0102235022604528e-05, "loss": 0.06, "step": 34879 }, { "epoch": 0.6177086515515768, "grad_norm": 0.6814168691635132, "learning_rate": 1.0101421821700572e-05, "loss": 0.0755, "step": 34880 }, { "epoch": 0.6177263610886052, "grad_norm": 0.2616501748561859, "learning_rate": 1.0100608636912452e-05, "loss": 0.0685, "step": 34881 }, { "epoch": 0.6177440706256336, "grad_norm": 0.7560063600540161, "learning_rate": 1.0099795468242824e-05, "loss": 0.0834, "step": 34882 }, { "epoch": 0.6177617801626621, "grad_norm": 0.7344135046005249, "learning_rate": 1.0098982315694372e-05, "loss": 0.0521, "step": 34883 }, { "epoch": 0.6177794896996905, "grad_norm": 0.4067913591861725, "learning_rate": 1.0098169179269778e-05, "loss": 0.0547, "step": 34884 }, { "epoch": 0.6177971992367189, "grad_norm": 1.2547334432601929, "learning_rate": 1.0097356058971713e-05, "loss": 0.067, "step": 34885 }, { "epoch": 0.6178149087737473, "grad_norm": 0.8680694699287415, "learning_rate": 1.0096542954802849e-05, "loss": 0.0711, "step": 34886 }, { "epoch": 0.6178326183107758, "grad_norm": 0.6749756336212158, "learning_rate": 1.0095729866765859e-05, "loss": 0.066, "step": 34887 }, { "epoch": 0.6178503278478042, "grad_norm": 0.6907454133033752, "learning_rate": 1.0094916794863427e-05, "loss": 0.0648, "step": 34888 }, { "epoch": 0.6178680373848326, "grad_norm": 0.583084225654602, "learning_rate": 1.0094103739098219e-05, "loss": 0.0536, "step": 34889 }, { "epoch": 0.617885746921861, "grad_norm": 0.5065441727638245, "learning_rate": 1.0093290699472913e-05, "loss": 0.0618, "step": 34890 }, { "epoch": 0.6179034564588896, "grad_norm": 0.5105730891227722, "learning_rate": 1.0092477675990184e-05, "loss": 0.053, "step": 34891 }, { "epoch": 0.617921165995918, "grad_norm": 0.3031342923641205, "learning_rate": 1.0091664668652711e-05, "loss": 0.0531, "step": 34892 }, { "epoch": 0.6179388755329464, "grad_norm": 0.4447166323661804, "learning_rate": 1.0090851677463163e-05, "loss": 0.031, "step": 34893 }, { "epoch": 0.6179565850699749, "grad_norm": 0.4930824041366577, "learning_rate": 1.0090038702424214e-05, "loss": 0.0809, "step": 34894 }, { "epoch": 0.6179742946070033, "grad_norm": 0.8233370780944824, "learning_rate": 1.0089225743538548e-05, "loss": 0.0681, "step": 34895 }, { "epoch": 0.6179920041440317, "grad_norm": 0.9310661554336548, "learning_rate": 1.0088412800808827e-05, "loss": 0.0734, "step": 34896 }, { "epoch": 0.6180097136810601, "grad_norm": 0.3933926224708557, "learning_rate": 1.0087599874237732e-05, "loss": 0.0701, "step": 34897 }, { "epoch": 0.6180274232180886, "grad_norm": 0.4421175718307495, "learning_rate": 1.0086786963827931e-05, "loss": 0.0351, "step": 34898 }, { "epoch": 0.618045132755117, "grad_norm": 0.6236411333084106, "learning_rate": 1.0085974069582115e-05, "loss": 0.0459, "step": 34899 }, { "epoch": 0.6180628422921454, "grad_norm": 0.618382453918457, "learning_rate": 1.0085161191502941e-05, "loss": 0.0651, "step": 34900 }, { "epoch": 0.6180805518291738, "grad_norm": 0.606616199016571, "learning_rate": 1.0084348329593089e-05, "loss": 0.0696, "step": 34901 }, { "epoch": 0.6180982613662023, "grad_norm": 0.6122299432754517, "learning_rate": 1.0083535483855233e-05, "loss": 0.0636, "step": 34902 }, { "epoch": 0.6181159709032307, "grad_norm": 0.31261736154556274, "learning_rate": 1.0082722654292053e-05, "loss": 0.0785, "step": 34903 }, { "epoch": 0.6181336804402591, "grad_norm": 0.6702070832252502, "learning_rate": 1.0081909840906214e-05, "loss": 0.0628, "step": 34904 }, { "epoch": 0.6181513899772875, "grad_norm": 0.6030667424201965, "learning_rate": 1.0081097043700394e-05, "loss": 0.0692, "step": 34905 }, { "epoch": 0.618169099514316, "grad_norm": 0.9435245394706726, "learning_rate": 1.0080284262677272e-05, "loss": 0.0699, "step": 34906 }, { "epoch": 0.6181868090513444, "grad_norm": 0.6408875584602356, "learning_rate": 1.0079471497839511e-05, "loss": 0.0428, "step": 34907 }, { "epoch": 0.6182045185883728, "grad_norm": 0.5496427416801453, "learning_rate": 1.0078658749189794e-05, "loss": 0.0691, "step": 34908 }, { "epoch": 0.6182222281254013, "grad_norm": 0.5173925757408142, "learning_rate": 1.007784601673079e-05, "loss": 0.0568, "step": 34909 }, { "epoch": 0.6182399376624297, "grad_norm": 0.785285472869873, "learning_rate": 1.0077033300465182e-05, "loss": 0.0848, "step": 34910 }, { "epoch": 0.6182576471994581, "grad_norm": 0.978303074836731, "learning_rate": 1.007622060039563e-05, "loss": 0.0541, "step": 34911 }, { "epoch": 0.6182753567364865, "grad_norm": 1.348822832107544, "learning_rate": 1.0075407916524812e-05, "loss": 0.0892, "step": 34912 }, { "epoch": 0.618293066273515, "grad_norm": 0.7106345295906067, "learning_rate": 1.0074595248855412e-05, "loss": 0.0704, "step": 34913 }, { "epoch": 0.6183107758105434, "grad_norm": 0.23283591866493225, "learning_rate": 1.0073782597390089e-05, "loss": 0.0551, "step": 34914 }, { "epoch": 0.6183284853475718, "grad_norm": 0.5525862574577332, "learning_rate": 1.0072969962131525e-05, "loss": 0.0649, "step": 34915 }, { "epoch": 0.6183461948846002, "grad_norm": 0.5011003613471985, "learning_rate": 1.0072157343082388e-05, "loss": 0.0434, "step": 34916 }, { "epoch": 0.6183639044216287, "grad_norm": 0.6275437474250793, "learning_rate": 1.007134474024536e-05, "loss": 0.0423, "step": 34917 }, { "epoch": 0.6183816139586571, "grad_norm": 0.5974932312965393, "learning_rate": 1.0070532153623108e-05, "loss": 0.0457, "step": 34918 }, { "epoch": 0.6183993234956855, "grad_norm": 0.7495633959770203, "learning_rate": 1.0069719583218303e-05, "loss": 0.0855, "step": 34919 }, { "epoch": 0.6184170330327139, "grad_norm": 0.5057089924812317, "learning_rate": 1.0068907029033628e-05, "loss": 0.0405, "step": 34920 }, { "epoch": 0.6184347425697424, "grad_norm": 0.49506035447120667, "learning_rate": 1.0068094491071744e-05, "loss": 0.0534, "step": 34921 }, { "epoch": 0.6184524521067708, "grad_norm": 0.6392387747764587, "learning_rate": 1.0067281969335332e-05, "loss": 0.0418, "step": 34922 }, { "epoch": 0.6184701616437992, "grad_norm": 0.5321639776229858, "learning_rate": 1.0066469463827064e-05, "loss": 0.0555, "step": 34923 }, { "epoch": 0.6184878711808277, "grad_norm": 0.8781683444976807, "learning_rate": 1.006565697454962e-05, "loss": 0.0935, "step": 34924 }, { "epoch": 0.6185055807178561, "grad_norm": 0.5287649631500244, "learning_rate": 1.0064844501505656e-05, "loss": 0.053, "step": 34925 }, { "epoch": 0.6185232902548845, "grad_norm": 0.4928092956542969, "learning_rate": 1.0064032044697856e-05, "loss": 0.0474, "step": 34926 }, { "epoch": 0.6185409997919129, "grad_norm": 0.408689022064209, "learning_rate": 1.0063219604128896e-05, "loss": 0.0652, "step": 34927 }, { "epoch": 0.6185587093289414, "grad_norm": 0.2826360762119293, "learning_rate": 1.0062407179801438e-05, "loss": 0.0419, "step": 34928 }, { "epoch": 0.6185764188659698, "grad_norm": 1.1147339344024658, "learning_rate": 1.0061594771718163e-05, "loss": 0.0728, "step": 34929 }, { "epoch": 0.6185941284029982, "grad_norm": 0.8497682809829712, "learning_rate": 1.0060782379881742e-05, "loss": 0.0666, "step": 34930 }, { "epoch": 0.6186118379400266, "grad_norm": 0.9811990857124329, "learning_rate": 1.0059970004294848e-05, "loss": 0.0485, "step": 34931 }, { "epoch": 0.6186295474770551, "grad_norm": 0.5626429915428162, "learning_rate": 1.0059157644960152e-05, "loss": 0.0779, "step": 34932 }, { "epoch": 0.6186472570140835, "grad_norm": 0.5714683532714844, "learning_rate": 1.0058345301880328e-05, "loss": 0.0469, "step": 34933 }, { "epoch": 0.6186649665511119, "grad_norm": 0.8978409767150879, "learning_rate": 1.0057532975058053e-05, "loss": 0.0737, "step": 34934 }, { "epoch": 0.6186826760881403, "grad_norm": 0.6674726605415344, "learning_rate": 1.0056720664495986e-05, "loss": 0.0707, "step": 34935 }, { "epoch": 0.6187003856251688, "grad_norm": 1.0284252166748047, "learning_rate": 1.005590837019681e-05, "loss": 0.0951, "step": 34936 }, { "epoch": 0.6187180951621972, "grad_norm": 0.43483245372772217, "learning_rate": 1.0055096092163199e-05, "loss": 0.0653, "step": 34937 }, { "epoch": 0.6187358046992256, "grad_norm": 0.7446239590644836, "learning_rate": 1.0054283830397823e-05, "loss": 0.0495, "step": 34938 }, { "epoch": 0.6187535142362541, "grad_norm": 0.3718310296535492, "learning_rate": 1.005347158490335e-05, "loss": 0.0574, "step": 34939 }, { "epoch": 0.6187712237732825, "grad_norm": 0.38222354650497437, "learning_rate": 1.0052659355682454e-05, "loss": 0.0515, "step": 34940 }, { "epoch": 0.6187889333103109, "grad_norm": 0.6000602841377258, "learning_rate": 1.0051847142737804e-05, "loss": 0.0642, "step": 34941 }, { "epoch": 0.6188066428473393, "grad_norm": 0.5105679631233215, "learning_rate": 1.0051034946072089e-05, "loss": 0.0748, "step": 34942 }, { "epoch": 0.6188243523843678, "grad_norm": 0.8092129230499268, "learning_rate": 1.005022276568796e-05, "loss": 0.0961, "step": 34943 }, { "epoch": 0.6188420619213962, "grad_norm": 0.8012516498565674, "learning_rate": 1.0049410601588098e-05, "loss": 0.0985, "step": 34944 }, { "epoch": 0.6188597714584246, "grad_norm": 0.45974090695381165, "learning_rate": 1.004859845377518e-05, "loss": 0.0698, "step": 34945 }, { "epoch": 0.618877480995453, "grad_norm": 0.744520366191864, "learning_rate": 1.0047786322251864e-05, "loss": 0.0663, "step": 34946 }, { "epoch": 0.6188951905324815, "grad_norm": 0.5993742346763611, "learning_rate": 1.0046974207020833e-05, "loss": 0.0891, "step": 34947 }, { "epoch": 0.6189129000695099, "grad_norm": 0.5019246339797974, "learning_rate": 1.0046162108084755e-05, "loss": 0.0668, "step": 34948 }, { "epoch": 0.6189306096065383, "grad_norm": 0.587533175945282, "learning_rate": 1.0045350025446307e-05, "loss": 0.0543, "step": 34949 }, { "epoch": 0.6189483191435667, "grad_norm": 0.6937606930732727, "learning_rate": 1.0044537959108152e-05, "loss": 0.0784, "step": 34950 }, { "epoch": 0.6189660286805952, "grad_norm": 0.7029489874839783, "learning_rate": 1.0043725909072966e-05, "loss": 0.0661, "step": 34951 }, { "epoch": 0.6189837382176236, "grad_norm": 0.3946581184864044, "learning_rate": 1.0042913875343427e-05, "loss": 0.0572, "step": 34952 }, { "epoch": 0.619001447754652, "grad_norm": 0.6073316931724548, "learning_rate": 1.0042101857922194e-05, "loss": 0.0538, "step": 34953 }, { "epoch": 0.6190191572916806, "grad_norm": 0.7226475477218628, "learning_rate": 1.0041289856811944e-05, "loss": 0.0639, "step": 34954 }, { "epoch": 0.619036866828709, "grad_norm": 0.774548351764679, "learning_rate": 1.0040477872015349e-05, "loss": 0.0617, "step": 34955 }, { "epoch": 0.6190545763657374, "grad_norm": 0.5842835307121277, "learning_rate": 1.0039665903535084e-05, "loss": 0.0708, "step": 34956 }, { "epoch": 0.6190722859027658, "grad_norm": 0.36117345094680786, "learning_rate": 1.0038853951373813e-05, "loss": 0.0677, "step": 34957 }, { "epoch": 0.6190899954397943, "grad_norm": 0.7434139847755432, "learning_rate": 1.003804201553421e-05, "loss": 0.0571, "step": 34958 }, { "epoch": 0.6191077049768227, "grad_norm": 0.951317548751831, "learning_rate": 1.003723009601895e-05, "loss": 0.0908, "step": 34959 }, { "epoch": 0.6191254145138511, "grad_norm": 0.39835041761398315, "learning_rate": 1.0036418192830698e-05, "loss": 0.0387, "step": 34960 }, { "epoch": 0.6191431240508795, "grad_norm": 0.47383633255958557, "learning_rate": 1.003560630597213e-05, "loss": 0.0601, "step": 34961 }, { "epoch": 0.619160833587908, "grad_norm": 0.2957616448402405, "learning_rate": 1.0034794435445912e-05, "loss": 0.0446, "step": 34962 }, { "epoch": 0.6191785431249364, "grad_norm": 0.47050613164901733, "learning_rate": 1.0033982581254727e-05, "loss": 0.0617, "step": 34963 }, { "epoch": 0.6191962526619648, "grad_norm": 0.7420992255210876, "learning_rate": 1.0033170743401226e-05, "loss": 0.0859, "step": 34964 }, { "epoch": 0.6192139621989932, "grad_norm": 0.5393930077552795, "learning_rate": 1.0032358921888096e-05, "loss": 0.0383, "step": 34965 }, { "epoch": 0.6192316717360217, "grad_norm": 0.8985897898674011, "learning_rate": 1.0031547116718007e-05, "loss": 0.1036, "step": 34966 }, { "epoch": 0.6192493812730501, "grad_norm": 0.5065653324127197, "learning_rate": 1.003073532789362e-05, "loss": 0.0348, "step": 34967 }, { "epoch": 0.6192670908100785, "grad_norm": 0.747622549533844, "learning_rate": 1.0029923555417611e-05, "loss": 0.0876, "step": 34968 }, { "epoch": 0.619284800347107, "grad_norm": 0.4822077751159668, "learning_rate": 1.002911179929265e-05, "loss": 0.0554, "step": 34969 }, { "epoch": 0.6193025098841354, "grad_norm": 0.6493160128593445, "learning_rate": 1.0028300059521412e-05, "loss": 0.0672, "step": 34970 }, { "epoch": 0.6193202194211638, "grad_norm": 0.6537481546401978, "learning_rate": 1.002748833610656e-05, "loss": 0.0635, "step": 34971 }, { "epoch": 0.6193379289581922, "grad_norm": 0.5446373224258423, "learning_rate": 1.0026676629050771e-05, "loss": 0.0592, "step": 34972 }, { "epoch": 0.6193556384952207, "grad_norm": 0.5895130634307861, "learning_rate": 1.0025864938356712e-05, "loss": 0.0542, "step": 34973 }, { "epoch": 0.6193733480322491, "grad_norm": 0.7056971192359924, "learning_rate": 1.0025053264027056e-05, "loss": 0.072, "step": 34974 }, { "epoch": 0.6193910575692775, "grad_norm": 0.5206425786018372, "learning_rate": 1.0024241606064471e-05, "loss": 0.0617, "step": 34975 }, { "epoch": 0.6194087671063059, "grad_norm": 0.489067941904068, "learning_rate": 1.0023429964471625e-05, "loss": 0.043, "step": 34976 }, { "epoch": 0.6194264766433344, "grad_norm": 0.560361921787262, "learning_rate": 1.0022618339251197e-05, "loss": 0.0713, "step": 34977 }, { "epoch": 0.6194441861803628, "grad_norm": 0.6849238872528076, "learning_rate": 1.0021806730405844e-05, "loss": 0.059, "step": 34978 }, { "epoch": 0.6194618957173912, "grad_norm": 0.9479628205299377, "learning_rate": 1.0020995137938246e-05, "loss": 0.065, "step": 34979 }, { "epoch": 0.6194796052544196, "grad_norm": 0.4298081696033478, "learning_rate": 1.002018356185107e-05, "loss": 0.039, "step": 34980 }, { "epoch": 0.6194973147914481, "grad_norm": 0.5436169505119324, "learning_rate": 1.0019372002146992e-05, "loss": 0.0619, "step": 34981 }, { "epoch": 0.6195150243284765, "grad_norm": 0.724098265171051, "learning_rate": 1.0018560458828672e-05, "loss": 0.0681, "step": 34982 }, { "epoch": 0.6195327338655049, "grad_norm": 0.5362997651100159, "learning_rate": 1.0017748931898783e-05, "loss": 0.0634, "step": 34983 }, { "epoch": 0.6195504434025334, "grad_norm": 0.27964672446250916, "learning_rate": 1.0016937421359999e-05, "loss": 0.0433, "step": 34984 }, { "epoch": 0.6195681529395618, "grad_norm": 0.6839651465415955, "learning_rate": 1.0016125927214986e-05, "loss": 0.0764, "step": 34985 }, { "epoch": 0.6195858624765902, "grad_norm": 0.7035428285598755, "learning_rate": 1.0015314449466411e-05, "loss": 0.1039, "step": 34986 }, { "epoch": 0.6196035720136186, "grad_norm": 0.6259580850601196, "learning_rate": 1.0014502988116948e-05, "loss": 0.065, "step": 34987 }, { "epoch": 0.6196212815506471, "grad_norm": 0.6271852850914001, "learning_rate": 1.001369154316927e-05, "loss": 0.088, "step": 34988 }, { "epoch": 0.6196389910876755, "grad_norm": 0.626396119594574, "learning_rate": 1.001288011462604e-05, "loss": 0.0659, "step": 34989 }, { "epoch": 0.6196567006247039, "grad_norm": 0.6880386471748352, "learning_rate": 1.001206870248993e-05, "loss": 0.0657, "step": 34990 }, { "epoch": 0.6196744101617323, "grad_norm": 0.6899450421333313, "learning_rate": 1.0011257306763615e-05, "loss": 0.0449, "step": 34991 }, { "epoch": 0.6196921196987608, "grad_norm": 0.7133816480636597, "learning_rate": 1.0010445927449753e-05, "loss": 0.0664, "step": 34992 }, { "epoch": 0.6197098292357892, "grad_norm": 0.5293387770652771, "learning_rate": 1.0009634564551016e-05, "loss": 0.0468, "step": 34993 }, { "epoch": 0.6197275387728176, "grad_norm": 1.4159742593765259, "learning_rate": 1.0008823218070078e-05, "loss": 0.0618, "step": 34994 }, { "epoch": 0.619745248309846, "grad_norm": 0.6050586700439453, "learning_rate": 1.0008011888009616e-05, "loss": 0.0485, "step": 34995 }, { "epoch": 0.6197629578468745, "grad_norm": 0.7164339423179626, "learning_rate": 1.0007200574372282e-05, "loss": 0.0572, "step": 34996 }, { "epoch": 0.6197806673839029, "grad_norm": 0.6784414649009705, "learning_rate": 1.0006389277160754e-05, "loss": 0.0608, "step": 34997 }, { "epoch": 0.6197983769209313, "grad_norm": 0.5856431126594543, "learning_rate": 1.00055779963777e-05, "loss": 0.0407, "step": 34998 }, { "epoch": 0.6198160864579598, "grad_norm": 0.3704509735107422, "learning_rate": 1.000476673202579e-05, "loss": 0.0694, "step": 34999 }, { "epoch": 0.6198337959949882, "grad_norm": 0.698818027973175, "learning_rate": 1.0003955484107691e-05, "loss": 0.0615, "step": 35000 }, { "epoch": 0.6198515055320166, "grad_norm": 0.7895647287368774, "learning_rate": 1.0003144252626073e-05, "loss": 0.0688, "step": 35001 }, { "epoch": 0.619869215069045, "grad_norm": 0.5844992995262146, "learning_rate": 1.0002333037583608e-05, "loss": 0.0654, "step": 35002 }, { "epoch": 0.6198869246060735, "grad_norm": 0.42714136838912964, "learning_rate": 1.000152183898296e-05, "loss": 0.0768, "step": 35003 }, { "epoch": 0.6199046341431019, "grad_norm": 0.6764394640922546, "learning_rate": 1.00007106568268e-05, "loss": 0.0811, "step": 35004 }, { "epoch": 0.6199223436801303, "grad_norm": 0.5239677429199219, "learning_rate": 9.999899491117802e-06, "loss": 0.0767, "step": 35005 }, { "epoch": 0.6199400532171587, "grad_norm": 0.43811461329460144, "learning_rate": 9.99908834185862e-06, "loss": 0.062, "step": 35006 }, { "epoch": 0.6199577627541872, "grad_norm": 0.4494442641735077, "learning_rate": 9.99827720905193e-06, "loss": 0.0509, "step": 35007 }, { "epoch": 0.6199754722912156, "grad_norm": 1.119884729385376, "learning_rate": 9.997466092700405e-06, "loss": 0.087, "step": 35008 }, { "epoch": 0.619993181828244, "grad_norm": 0.7305793762207031, "learning_rate": 9.996654992806717e-06, "loss": 0.0574, "step": 35009 }, { "epoch": 0.6200108913652724, "grad_norm": 0.5229600667953491, "learning_rate": 9.995843909373523e-06, "loss": 0.0528, "step": 35010 }, { "epoch": 0.6200286009023009, "grad_norm": 0.49375438690185547, "learning_rate": 9.995032842403495e-06, "loss": 0.067, "step": 35011 }, { "epoch": 0.6200463104393293, "grad_norm": 0.5732267498970032, "learning_rate": 9.994221791899304e-06, "loss": 0.0663, "step": 35012 }, { "epoch": 0.6200640199763577, "grad_norm": 0.25828638672828674, "learning_rate": 9.993410757863618e-06, "loss": 0.0305, "step": 35013 }, { "epoch": 0.6200817295133862, "grad_norm": 0.7580773830413818, "learning_rate": 9.992599740299104e-06, "loss": 0.0739, "step": 35014 }, { "epoch": 0.6200994390504146, "grad_norm": 0.46144333481788635, "learning_rate": 9.991788739208428e-06, "loss": 0.0592, "step": 35015 }, { "epoch": 0.620117148587443, "grad_norm": 0.48807492852211, "learning_rate": 9.990977754594265e-06, "loss": 0.0349, "step": 35016 }, { "epoch": 0.6201348581244714, "grad_norm": 0.7563358545303345, "learning_rate": 9.990166786459275e-06, "loss": 0.0554, "step": 35017 }, { "epoch": 0.6201525676615, "grad_norm": 0.6437711715698242, "learning_rate": 9.98935583480613e-06, "loss": 0.067, "step": 35018 }, { "epoch": 0.6201702771985284, "grad_norm": 0.8750748634338379, "learning_rate": 9.988544899637496e-06, "loss": 0.0621, "step": 35019 }, { "epoch": 0.6201879867355568, "grad_norm": 0.7039157152175903, "learning_rate": 9.98773398095605e-06, "loss": 0.0545, "step": 35020 }, { "epoch": 0.6202056962725851, "grad_norm": 0.5195029973983765, "learning_rate": 9.986923078764446e-06, "loss": 0.0785, "step": 35021 }, { "epoch": 0.6202234058096137, "grad_norm": 0.5983185768127441, "learning_rate": 9.986112193065355e-06, "loss": 0.0981, "step": 35022 }, { "epoch": 0.6202411153466421, "grad_norm": 0.5191299319267273, "learning_rate": 9.985301323861455e-06, "loss": 0.0436, "step": 35023 }, { "epoch": 0.6202588248836705, "grad_norm": 0.5571510195732117, "learning_rate": 9.9844904711554e-06, "loss": 0.0585, "step": 35024 }, { "epoch": 0.6202765344206989, "grad_norm": 0.7818599939346313, "learning_rate": 9.983679634949868e-06, "loss": 0.0647, "step": 35025 }, { "epoch": 0.6202942439577274, "grad_norm": 0.608951985836029, "learning_rate": 9.982868815247519e-06, "loss": 0.0495, "step": 35026 }, { "epoch": 0.6203119534947558, "grad_norm": 1.2388648986816406, "learning_rate": 9.98205801205103e-06, "loss": 0.0792, "step": 35027 }, { "epoch": 0.6203296630317842, "grad_norm": 0.7332121133804321, "learning_rate": 9.981247225363059e-06, "loss": 0.0965, "step": 35028 }, { "epoch": 0.6203473725688127, "grad_norm": 0.6515267491340637, "learning_rate": 9.980436455186275e-06, "loss": 0.0786, "step": 35029 }, { "epoch": 0.6203650821058411, "grad_norm": 0.6797904372215271, "learning_rate": 9.979625701523353e-06, "loss": 0.0589, "step": 35030 }, { "epoch": 0.6203827916428695, "grad_norm": 0.6051425337791443, "learning_rate": 9.97881496437695e-06, "loss": 0.0616, "step": 35031 }, { "epoch": 0.6204005011798979, "grad_norm": 0.4915979206562042, "learning_rate": 9.978004243749737e-06, "loss": 0.0492, "step": 35032 }, { "epoch": 0.6204182107169264, "grad_norm": 0.5984260439872742, "learning_rate": 9.977193539644385e-06, "loss": 0.0828, "step": 35033 }, { "epoch": 0.6204359202539548, "grad_norm": 0.5799890160560608, "learning_rate": 9.976382852063563e-06, "loss": 0.0705, "step": 35034 }, { "epoch": 0.6204536297909832, "grad_norm": 0.7374260425567627, "learning_rate": 9.975572181009926e-06, "loss": 0.053, "step": 35035 }, { "epoch": 0.6204713393280116, "grad_norm": 0.40233346819877625, "learning_rate": 9.974761526486145e-06, "loss": 0.018, "step": 35036 }, { "epoch": 0.6204890488650401, "grad_norm": 0.6962865591049194, "learning_rate": 9.973950888494901e-06, "loss": 0.068, "step": 35037 }, { "epoch": 0.6205067584020685, "grad_norm": 0.5988284349441528, "learning_rate": 9.973140267038843e-06, "loss": 0.0495, "step": 35038 }, { "epoch": 0.6205244679390969, "grad_norm": 0.8555090427398682, "learning_rate": 9.972329662120646e-06, "loss": 0.0692, "step": 35039 }, { "epoch": 0.6205421774761253, "grad_norm": 0.5802507400512695, "learning_rate": 9.971519073742976e-06, "loss": 0.064, "step": 35040 }, { "epoch": 0.6205598870131538, "grad_norm": 0.8682752251625061, "learning_rate": 9.970708501908503e-06, "loss": 0.0845, "step": 35041 }, { "epoch": 0.6205775965501822, "grad_norm": 0.7633103132247925, "learning_rate": 9.969897946619886e-06, "loss": 0.0829, "step": 35042 }, { "epoch": 0.6205953060872106, "grad_norm": 0.4380646049976349, "learning_rate": 9.969087407879795e-06, "loss": 0.0512, "step": 35043 }, { "epoch": 0.6206130156242391, "grad_norm": 0.5925219058990479, "learning_rate": 9.968276885690901e-06, "loss": 0.0699, "step": 35044 }, { "epoch": 0.6206307251612675, "grad_norm": 0.3936229944229126, "learning_rate": 9.96746638005587e-06, "loss": 0.0737, "step": 35045 }, { "epoch": 0.6206484346982959, "grad_norm": 0.7391189932823181, "learning_rate": 9.966655890977358e-06, "loss": 0.0548, "step": 35046 }, { "epoch": 0.6206661442353243, "grad_norm": 0.5342219471931458, "learning_rate": 9.965845418458044e-06, "loss": 0.0698, "step": 35047 }, { "epoch": 0.6206838537723528, "grad_norm": 0.5404052734375, "learning_rate": 9.965034962500594e-06, "loss": 0.0687, "step": 35048 }, { "epoch": 0.6207015633093812, "grad_norm": 0.4352475702762604, "learning_rate": 9.96422452310766e-06, "loss": 0.0587, "step": 35049 }, { "epoch": 0.6207192728464096, "grad_norm": 0.5119709372520447, "learning_rate": 9.963414100281923e-06, "loss": 0.0457, "step": 35050 }, { "epoch": 0.620736982383438, "grad_norm": 0.6585328578948975, "learning_rate": 9.962603694026039e-06, "loss": 0.0589, "step": 35051 }, { "epoch": 0.6207546919204665, "grad_norm": 0.6234010457992554, "learning_rate": 9.96179330434269e-06, "loss": 0.0579, "step": 35052 }, { "epoch": 0.6207724014574949, "grad_norm": 0.6977767944335938, "learning_rate": 9.960982931234523e-06, "loss": 0.0713, "step": 35053 }, { "epoch": 0.6207901109945233, "grad_norm": 0.37032246589660645, "learning_rate": 9.960172574704213e-06, "loss": 0.0668, "step": 35054 }, { "epoch": 0.6208078205315517, "grad_norm": 0.34061169624328613, "learning_rate": 9.95936223475443e-06, "loss": 0.0325, "step": 35055 }, { "epoch": 0.6208255300685802, "grad_norm": 0.8747410178184509, "learning_rate": 9.958551911387831e-06, "loss": 0.0723, "step": 35056 }, { "epoch": 0.6208432396056086, "grad_norm": 0.46153417229652405, "learning_rate": 9.957741604607086e-06, "loss": 0.0447, "step": 35057 }, { "epoch": 0.620860949142637, "grad_norm": 0.743703305721283, "learning_rate": 9.956931314414863e-06, "loss": 0.0782, "step": 35058 }, { "epoch": 0.6208786586796655, "grad_norm": 0.4010496139526367, "learning_rate": 9.956121040813829e-06, "loss": 0.0437, "step": 35059 }, { "epoch": 0.6208963682166939, "grad_norm": 0.6543632745742798, "learning_rate": 9.95531078380664e-06, "loss": 0.0913, "step": 35060 }, { "epoch": 0.6209140777537223, "grad_norm": 0.5798489451408386, "learning_rate": 9.954500543395972e-06, "loss": 0.0947, "step": 35061 }, { "epoch": 0.6209317872907507, "grad_norm": 0.7511157989501953, "learning_rate": 9.953690319584493e-06, "loss": 0.0657, "step": 35062 }, { "epoch": 0.6209494968277792, "grad_norm": 0.663140594959259, "learning_rate": 9.952880112374853e-06, "loss": 0.0555, "step": 35063 }, { "epoch": 0.6209672063648076, "grad_norm": 0.767173707485199, "learning_rate": 9.95206992176973e-06, "loss": 0.0535, "step": 35064 }, { "epoch": 0.620984915901836, "grad_norm": 0.596829354763031, "learning_rate": 9.951259747771782e-06, "loss": 0.0546, "step": 35065 }, { "epoch": 0.6210026254388644, "grad_norm": 0.6485248804092407, "learning_rate": 9.950449590383688e-06, "loss": 0.101, "step": 35066 }, { "epoch": 0.6210203349758929, "grad_norm": 0.7770981192588806, "learning_rate": 9.949639449608098e-06, "loss": 0.0515, "step": 35067 }, { "epoch": 0.6210380445129213, "grad_norm": 0.4624428153038025, "learning_rate": 9.948829325447683e-06, "loss": 0.0963, "step": 35068 }, { "epoch": 0.6210557540499497, "grad_norm": 0.9466010332107544, "learning_rate": 9.948019217905113e-06, "loss": 0.0693, "step": 35069 }, { "epoch": 0.6210734635869781, "grad_norm": 0.442990779876709, "learning_rate": 9.947209126983044e-06, "loss": 0.0433, "step": 35070 }, { "epoch": 0.6210911731240066, "grad_norm": 0.6427623629570007, "learning_rate": 9.946399052684147e-06, "loss": 0.0696, "step": 35071 }, { "epoch": 0.621108882661035, "grad_norm": 0.6761457920074463, "learning_rate": 9.945588995011088e-06, "loss": 0.0763, "step": 35072 }, { "epoch": 0.6211265921980634, "grad_norm": 0.7438212633132935, "learning_rate": 9.94477895396653e-06, "loss": 0.0633, "step": 35073 }, { "epoch": 0.6211443017350919, "grad_norm": 0.5766673684120178, "learning_rate": 9.943968929553136e-06, "loss": 0.057, "step": 35074 }, { "epoch": 0.6211620112721203, "grad_norm": 0.37174686789512634, "learning_rate": 9.943158921773573e-06, "loss": 0.0413, "step": 35075 }, { "epoch": 0.6211797208091487, "grad_norm": 0.8170644640922546, "learning_rate": 9.942348930630504e-06, "loss": 0.0655, "step": 35076 }, { "epoch": 0.6211974303461771, "grad_norm": 0.4688159227371216, "learning_rate": 9.941538956126603e-06, "loss": 0.0373, "step": 35077 }, { "epoch": 0.6212151398832056, "grad_norm": 0.8277551531791687, "learning_rate": 9.940728998264522e-06, "loss": 0.0494, "step": 35078 }, { "epoch": 0.621232849420234, "grad_norm": 0.5069848299026489, "learning_rate": 9.93991905704693e-06, "loss": 0.0766, "step": 35079 }, { "epoch": 0.6212505589572624, "grad_norm": 0.817392110824585, "learning_rate": 9.939109132476496e-06, "loss": 0.0683, "step": 35080 }, { "epoch": 0.6212682684942908, "grad_norm": 0.636785626411438, "learning_rate": 9.938299224555878e-06, "loss": 0.0591, "step": 35081 }, { "epoch": 0.6212859780313194, "grad_norm": 0.7353700995445251, "learning_rate": 9.937489333287742e-06, "loss": 0.0786, "step": 35082 }, { "epoch": 0.6213036875683478, "grad_norm": 0.6002300381660461, "learning_rate": 9.936679458674755e-06, "loss": 0.0464, "step": 35083 }, { "epoch": 0.6213213971053761, "grad_norm": 1.1611844301223755, "learning_rate": 9.935869600719584e-06, "loss": 0.0807, "step": 35084 }, { "epoch": 0.6213391066424045, "grad_norm": 0.699385404586792, "learning_rate": 9.935059759424887e-06, "loss": 0.0815, "step": 35085 }, { "epoch": 0.6213568161794331, "grad_norm": 0.7710872292518616, "learning_rate": 9.93424993479333e-06, "loss": 0.0683, "step": 35086 }, { "epoch": 0.6213745257164615, "grad_norm": 1.0596860647201538, "learning_rate": 9.933440126827585e-06, "loss": 0.0824, "step": 35087 }, { "epoch": 0.6213922352534899, "grad_norm": 0.4847427308559418, "learning_rate": 9.9326303355303e-06, "loss": 0.0611, "step": 35088 }, { "epoch": 0.6214099447905184, "grad_norm": 0.5612726211547852, "learning_rate": 9.931820560904152e-06, "loss": 0.0741, "step": 35089 }, { "epoch": 0.6214276543275468, "grad_norm": 0.4449746608734131, "learning_rate": 9.931010802951802e-06, "loss": 0.045, "step": 35090 }, { "epoch": 0.6214453638645752, "grad_norm": 0.8415770530700684, "learning_rate": 9.93020106167592e-06, "loss": 0.0351, "step": 35091 }, { "epoch": 0.6214630734016036, "grad_norm": 0.4410010278224945, "learning_rate": 9.929391337079157e-06, "loss": 0.0725, "step": 35092 }, { "epoch": 0.6214807829386321, "grad_norm": 0.5781019330024719, "learning_rate": 9.928581629164184e-06, "loss": 0.0922, "step": 35093 }, { "epoch": 0.6214984924756605, "grad_norm": 0.7802736759185791, "learning_rate": 9.927771937933668e-06, "loss": 0.0584, "step": 35094 }, { "epoch": 0.6215162020126889, "grad_norm": 0.5384478569030762, "learning_rate": 9.926962263390266e-06, "loss": 0.0443, "step": 35095 }, { "epoch": 0.6215339115497173, "grad_norm": 0.46949559450149536, "learning_rate": 9.926152605536644e-06, "loss": 0.0631, "step": 35096 }, { "epoch": 0.6215516210867458, "grad_norm": 0.6018663048744202, "learning_rate": 9.925342964375467e-06, "loss": 0.0447, "step": 35097 }, { "epoch": 0.6215693306237742, "grad_norm": 0.46538615226745605, "learning_rate": 9.924533339909404e-06, "loss": 0.0438, "step": 35098 }, { "epoch": 0.6215870401608026, "grad_norm": 1.0002696514129639, "learning_rate": 9.923723732141106e-06, "loss": 0.0781, "step": 35099 }, { "epoch": 0.621604749697831, "grad_norm": 1.0052556991577148, "learning_rate": 9.922914141073245e-06, "loss": 0.0677, "step": 35100 }, { "epoch": 0.6216224592348595, "grad_norm": 0.5331699252128601, "learning_rate": 9.922104566708488e-06, "loss": 0.0817, "step": 35101 }, { "epoch": 0.6216401687718879, "grad_norm": 0.6338232755661011, "learning_rate": 9.921295009049487e-06, "loss": 0.0674, "step": 35102 }, { "epoch": 0.6216578783089163, "grad_norm": 0.45498061180114746, "learning_rate": 9.920485468098914e-06, "loss": 0.0557, "step": 35103 }, { "epoch": 0.6216755878459448, "grad_norm": 0.5159230828285217, "learning_rate": 9.919675943859429e-06, "loss": 0.0864, "step": 35104 }, { "epoch": 0.6216932973829732, "grad_norm": 0.4103408455848694, "learning_rate": 9.918866436333703e-06, "loss": 0.0723, "step": 35105 }, { "epoch": 0.6217110069200016, "grad_norm": 0.5578213930130005, "learning_rate": 9.918056945524387e-06, "loss": 0.1015, "step": 35106 }, { "epoch": 0.62172871645703, "grad_norm": 0.6336820125579834, "learning_rate": 9.917247471434148e-06, "loss": 0.0667, "step": 35107 }, { "epoch": 0.6217464259940585, "grad_norm": 0.8372504115104675, "learning_rate": 9.916438014065656e-06, "loss": 0.0562, "step": 35108 }, { "epoch": 0.6217641355310869, "grad_norm": 0.5513511300086975, "learning_rate": 9.915628573421563e-06, "loss": 0.0686, "step": 35109 }, { "epoch": 0.6217818450681153, "grad_norm": 0.5685436725616455, "learning_rate": 9.91481914950454e-06, "loss": 0.0514, "step": 35110 }, { "epoch": 0.6217995546051437, "grad_norm": 0.6438308358192444, "learning_rate": 9.914009742317247e-06, "loss": 0.0801, "step": 35111 }, { "epoch": 0.6218172641421722, "grad_norm": 0.7365606427192688, "learning_rate": 9.91320035186235e-06, "loss": 0.0721, "step": 35112 }, { "epoch": 0.6218349736792006, "grad_norm": 0.5254700183868408, "learning_rate": 9.912390978142507e-06, "loss": 0.0539, "step": 35113 }, { "epoch": 0.621852683216229, "grad_norm": 0.7982435822486877, "learning_rate": 9.911581621160382e-06, "loss": 0.0703, "step": 35114 }, { "epoch": 0.6218703927532574, "grad_norm": 0.6657820343971252, "learning_rate": 9.91077228091864e-06, "loss": 0.06, "step": 35115 }, { "epoch": 0.6218881022902859, "grad_norm": 0.7012212872505188, "learning_rate": 9.909962957419947e-06, "loss": 0.0692, "step": 35116 }, { "epoch": 0.6219058118273143, "grad_norm": 0.8289541602134705, "learning_rate": 9.909153650666954e-06, "loss": 0.0434, "step": 35117 }, { "epoch": 0.6219235213643427, "grad_norm": 0.5385042428970337, "learning_rate": 9.908344360662333e-06, "loss": 0.0872, "step": 35118 }, { "epoch": 0.6219412309013712, "grad_norm": 0.7422650456428528, "learning_rate": 9.90753508740875e-06, "loss": 0.068, "step": 35119 }, { "epoch": 0.6219589404383996, "grad_norm": 0.587867021560669, "learning_rate": 9.906725830908855e-06, "loss": 0.0607, "step": 35120 }, { "epoch": 0.621976649975428, "grad_norm": 0.5409029722213745, "learning_rate": 9.905916591165315e-06, "loss": 0.0653, "step": 35121 }, { "epoch": 0.6219943595124564, "grad_norm": 0.7741662859916687, "learning_rate": 9.905107368180797e-06, "loss": 0.0802, "step": 35122 }, { "epoch": 0.6220120690494849, "grad_norm": 0.426411509513855, "learning_rate": 9.904298161957964e-06, "loss": 0.0722, "step": 35123 }, { "epoch": 0.6220297785865133, "grad_norm": 0.7824350595474243, "learning_rate": 9.903488972499469e-06, "loss": 0.0455, "step": 35124 }, { "epoch": 0.6220474881235417, "grad_norm": 0.7479198575019836, "learning_rate": 9.90267979980798e-06, "loss": 0.0625, "step": 35125 }, { "epoch": 0.6220651976605701, "grad_norm": 0.7640777826309204, "learning_rate": 9.901870643886165e-06, "loss": 0.0651, "step": 35126 }, { "epoch": 0.6220829071975986, "grad_norm": 0.8910406231880188, "learning_rate": 9.901061504736676e-06, "loss": 0.0872, "step": 35127 }, { "epoch": 0.622100616734627, "grad_norm": 0.7157018780708313, "learning_rate": 9.900252382362176e-06, "loss": 0.0671, "step": 35128 }, { "epoch": 0.6221183262716554, "grad_norm": 0.45580223202705383, "learning_rate": 9.899443276765334e-06, "loss": 0.0331, "step": 35129 }, { "epoch": 0.6221360358086838, "grad_norm": 0.6205809712409973, "learning_rate": 9.89863418794881e-06, "loss": 0.0668, "step": 35130 }, { "epoch": 0.6221537453457123, "grad_norm": 0.5790659189224243, "learning_rate": 9.897825115915255e-06, "loss": 0.0731, "step": 35131 }, { "epoch": 0.6221714548827407, "grad_norm": 0.6287505030632019, "learning_rate": 9.897016060667345e-06, "loss": 0.0386, "step": 35132 }, { "epoch": 0.6221891644197691, "grad_norm": 0.7283958792686462, "learning_rate": 9.89620702220774e-06, "loss": 0.1094, "step": 35133 }, { "epoch": 0.6222068739567976, "grad_norm": 0.4574410915374756, "learning_rate": 9.895398000539092e-06, "loss": 0.0509, "step": 35134 }, { "epoch": 0.622224583493826, "grad_norm": 0.5405817031860352, "learning_rate": 9.894588995664067e-06, "loss": 0.0679, "step": 35135 }, { "epoch": 0.6222422930308544, "grad_norm": 0.5721881985664368, "learning_rate": 9.893780007585331e-06, "loss": 0.05, "step": 35136 }, { "epoch": 0.6222600025678828, "grad_norm": 0.5790274143218994, "learning_rate": 9.892971036305544e-06, "loss": 0.0754, "step": 35137 }, { "epoch": 0.6222777121049113, "grad_norm": 0.348143994808197, "learning_rate": 9.892162081827361e-06, "loss": 0.0387, "step": 35138 }, { "epoch": 0.6222954216419397, "grad_norm": 0.56333988904953, "learning_rate": 9.89135314415345e-06, "loss": 0.0725, "step": 35139 }, { "epoch": 0.6223131311789681, "grad_norm": 0.7307589650154114, "learning_rate": 9.890544223286472e-06, "loss": 0.0782, "step": 35140 }, { "epoch": 0.6223308407159965, "grad_norm": 0.7106979489326477, "learning_rate": 9.889735319229085e-06, "loss": 0.0476, "step": 35141 }, { "epoch": 0.622348550253025, "grad_norm": 0.5301296710968018, "learning_rate": 9.888926431983951e-06, "loss": 0.0598, "step": 35142 }, { "epoch": 0.6223662597900534, "grad_norm": 0.6654790043830872, "learning_rate": 9.888117561553732e-06, "loss": 0.0735, "step": 35143 }, { "epoch": 0.6223839693270818, "grad_norm": 0.5664248466491699, "learning_rate": 9.887308707941097e-06, "loss": 0.0512, "step": 35144 }, { "epoch": 0.6224016788641102, "grad_norm": 0.638608455657959, "learning_rate": 9.886499871148692e-06, "loss": 0.0424, "step": 35145 }, { "epoch": 0.6224193884011388, "grad_norm": 0.7426925897598267, "learning_rate": 9.88569105117918e-06, "loss": 0.0735, "step": 35146 }, { "epoch": 0.6224370979381671, "grad_norm": 0.45431771874427795, "learning_rate": 9.884882248035233e-06, "loss": 0.0371, "step": 35147 }, { "epoch": 0.6224548074751955, "grad_norm": 0.5928043723106384, "learning_rate": 9.884073461719511e-06, "loss": 0.0767, "step": 35148 }, { "epoch": 0.6224725170122241, "grad_norm": 0.9947494268417358, "learning_rate": 9.883264692234665e-06, "loss": 0.0729, "step": 35149 }, { "epoch": 0.6224902265492525, "grad_norm": 0.38090765476226807, "learning_rate": 9.88245593958336e-06, "loss": 0.0585, "step": 35150 }, { "epoch": 0.6225079360862809, "grad_norm": 0.32574570178985596, "learning_rate": 9.881647203768259e-06, "loss": 0.0445, "step": 35151 }, { "epoch": 0.6225256456233093, "grad_norm": 0.769724428653717, "learning_rate": 9.88083848479202e-06, "loss": 0.0581, "step": 35152 }, { "epoch": 0.6225433551603378, "grad_norm": 0.8407502770423889, "learning_rate": 9.880029782657304e-06, "loss": 0.0657, "step": 35153 }, { "epoch": 0.6225610646973662, "grad_norm": 0.5200601816177368, "learning_rate": 9.879221097366774e-06, "loss": 0.0628, "step": 35154 }, { "epoch": 0.6225787742343946, "grad_norm": 0.6761439442634583, "learning_rate": 9.878412428923088e-06, "loss": 0.0666, "step": 35155 }, { "epoch": 0.622596483771423, "grad_norm": 1.2510879039764404, "learning_rate": 9.877603777328905e-06, "loss": 0.0922, "step": 35156 }, { "epoch": 0.6226141933084515, "grad_norm": 1.1668659448623657, "learning_rate": 9.876795142586888e-06, "loss": 0.0729, "step": 35157 }, { "epoch": 0.6226319028454799, "grad_norm": 0.647813618183136, "learning_rate": 9.875986524699704e-06, "loss": 0.0685, "step": 35158 }, { "epoch": 0.6226496123825083, "grad_norm": 0.5129760503768921, "learning_rate": 9.875177923669998e-06, "loss": 0.0672, "step": 35159 }, { "epoch": 0.6226673219195367, "grad_norm": 0.666018009185791, "learning_rate": 9.87436933950044e-06, "loss": 0.0914, "step": 35160 }, { "epoch": 0.6226850314565652, "grad_norm": 0.604532778263092, "learning_rate": 9.873560772193685e-06, "loss": 0.0451, "step": 35161 }, { "epoch": 0.6227027409935936, "grad_norm": 0.6016229391098022, "learning_rate": 9.872752221752406e-06, "loss": 0.0826, "step": 35162 }, { "epoch": 0.622720450530622, "grad_norm": 0.4106288552284241, "learning_rate": 9.871943688179245e-06, "loss": 0.0643, "step": 35163 }, { "epoch": 0.6227381600676505, "grad_norm": 0.8938460946083069, "learning_rate": 9.871135171476874e-06, "loss": 0.0938, "step": 35164 }, { "epoch": 0.6227558696046789, "grad_norm": 0.4453381597995758, "learning_rate": 9.87032667164795e-06, "loss": 0.0952, "step": 35165 }, { "epoch": 0.6227735791417073, "grad_norm": 0.4005112648010254, "learning_rate": 9.869518188695132e-06, "loss": 0.0682, "step": 35166 }, { "epoch": 0.6227912886787357, "grad_norm": 0.39477261900901794, "learning_rate": 9.868709722621079e-06, "loss": 0.0568, "step": 35167 }, { "epoch": 0.6228089982157642, "grad_norm": 0.6729515790939331, "learning_rate": 9.867901273428451e-06, "loss": 0.0706, "step": 35168 }, { "epoch": 0.6228267077527926, "grad_norm": 0.3820127546787262, "learning_rate": 9.867092841119912e-06, "loss": 0.0579, "step": 35169 }, { "epoch": 0.622844417289821, "grad_norm": 0.7777586579322815, "learning_rate": 9.866284425698117e-06, "loss": 0.0989, "step": 35170 }, { "epoch": 0.6228621268268494, "grad_norm": 0.5074129700660706, "learning_rate": 9.865476027165726e-06, "loss": 0.0809, "step": 35171 }, { "epoch": 0.6228798363638779, "grad_norm": 0.7618275284767151, "learning_rate": 9.864667645525405e-06, "loss": 0.078, "step": 35172 }, { "epoch": 0.6228975459009063, "grad_norm": 0.9462591409683228, "learning_rate": 9.863859280779804e-06, "loss": 0.0675, "step": 35173 }, { "epoch": 0.6229152554379347, "grad_norm": 0.8966048359870911, "learning_rate": 9.863050932931584e-06, "loss": 0.0731, "step": 35174 }, { "epoch": 0.6229329649749631, "grad_norm": 0.9220243692398071, "learning_rate": 9.862242601983404e-06, "loss": 0.0731, "step": 35175 }, { "epoch": 0.6229506745119916, "grad_norm": 0.5746687650680542, "learning_rate": 9.861434287937937e-06, "loss": 0.0367, "step": 35176 }, { "epoch": 0.62296838404902, "grad_norm": 0.7020405530929565, "learning_rate": 9.860625990797825e-06, "loss": 0.0673, "step": 35177 }, { "epoch": 0.6229860935860484, "grad_norm": 0.5471540093421936, "learning_rate": 9.859817710565733e-06, "loss": 0.0672, "step": 35178 }, { "epoch": 0.6230038031230769, "grad_norm": 0.6355136632919312, "learning_rate": 9.859009447244324e-06, "loss": 0.0627, "step": 35179 }, { "epoch": 0.6230215126601053, "grad_norm": 0.7700051069259644, "learning_rate": 9.85820120083625e-06, "loss": 0.0647, "step": 35180 }, { "epoch": 0.6230392221971337, "grad_norm": 0.5229230523109436, "learning_rate": 9.857392971344175e-06, "loss": 0.0886, "step": 35181 }, { "epoch": 0.6230569317341621, "grad_norm": 0.5481257438659668, "learning_rate": 9.856584758770757e-06, "loss": 0.0455, "step": 35182 }, { "epoch": 0.6230746412711906, "grad_norm": 0.6588152050971985, "learning_rate": 9.855776563118656e-06, "loss": 0.0753, "step": 35183 }, { "epoch": 0.623092350808219, "grad_norm": 0.5266920924186707, "learning_rate": 9.85496838439053e-06, "loss": 0.0866, "step": 35184 }, { "epoch": 0.6231100603452474, "grad_norm": 0.7962116003036499, "learning_rate": 9.854160222589036e-06, "loss": 0.0621, "step": 35185 }, { "epoch": 0.6231277698822758, "grad_norm": 0.5597469210624695, "learning_rate": 9.853352077716835e-06, "loss": 0.0604, "step": 35186 }, { "epoch": 0.6231454794193043, "grad_norm": 0.9548394083976746, "learning_rate": 9.852543949776589e-06, "loss": 0.0473, "step": 35187 }, { "epoch": 0.6231631889563327, "grad_norm": 0.38296598196029663, "learning_rate": 9.85173583877095e-06, "loss": 0.0637, "step": 35188 }, { "epoch": 0.6231808984933611, "grad_norm": 0.5487433671951294, "learning_rate": 9.850927744702575e-06, "loss": 0.0625, "step": 35189 }, { "epoch": 0.6231986080303895, "grad_norm": 0.4145756661891937, "learning_rate": 9.850119667574134e-06, "loss": 0.0605, "step": 35190 }, { "epoch": 0.623216317567418, "grad_norm": 0.7419788241386414, "learning_rate": 9.849311607388276e-06, "loss": 0.0562, "step": 35191 }, { "epoch": 0.6232340271044464, "grad_norm": 0.33112722635269165, "learning_rate": 9.848503564147659e-06, "loss": 0.0389, "step": 35192 }, { "epoch": 0.6232517366414748, "grad_norm": 0.4444272220134735, "learning_rate": 9.847695537854946e-06, "loss": 0.047, "step": 35193 }, { "epoch": 0.6232694461785033, "grad_norm": 0.7522833347320557, "learning_rate": 9.846887528512794e-06, "loss": 0.0739, "step": 35194 }, { "epoch": 0.6232871557155317, "grad_norm": 0.37024083733558655, "learning_rate": 9.846079536123862e-06, "loss": 0.035, "step": 35195 }, { "epoch": 0.6233048652525601, "grad_norm": 0.7463929653167725, "learning_rate": 9.845271560690803e-06, "loss": 0.0902, "step": 35196 }, { "epoch": 0.6233225747895885, "grad_norm": 0.3390045166015625, "learning_rate": 9.844463602216288e-06, "loss": 0.0473, "step": 35197 }, { "epoch": 0.623340284326617, "grad_norm": 0.5953417420387268, "learning_rate": 9.843655660702955e-06, "loss": 0.0574, "step": 35198 }, { "epoch": 0.6233579938636454, "grad_norm": 0.32153040170669556, "learning_rate": 9.842847736153477e-06, "loss": 0.0439, "step": 35199 }, { "epoch": 0.6233757034006738, "grad_norm": 0.5675460696220398, "learning_rate": 9.84203982857051e-06, "loss": 0.046, "step": 35200 }, { "epoch": 0.6233934129377022, "grad_norm": 0.6835660934448242, "learning_rate": 9.841231937956716e-06, "loss": 0.0755, "step": 35201 }, { "epoch": 0.6234111224747307, "grad_norm": 0.42965033650398254, "learning_rate": 9.84042406431474e-06, "loss": 0.0753, "step": 35202 }, { "epoch": 0.6234288320117591, "grad_norm": 0.7425913214683533, "learning_rate": 9.839616207647247e-06, "loss": 0.0963, "step": 35203 }, { "epoch": 0.6234465415487875, "grad_norm": 0.5077539682388306, "learning_rate": 9.8388083679569e-06, "loss": 0.0767, "step": 35204 }, { "epoch": 0.6234642510858159, "grad_norm": 0.6321161985397339, "learning_rate": 9.838000545246345e-06, "loss": 0.1083, "step": 35205 }, { "epoch": 0.6234819606228444, "grad_norm": 0.6948367953300476, "learning_rate": 9.837192739518248e-06, "loss": 0.071, "step": 35206 }, { "epoch": 0.6234996701598728, "grad_norm": 0.41816192865371704, "learning_rate": 9.836384950775262e-06, "loss": 0.0793, "step": 35207 }, { "epoch": 0.6235173796969012, "grad_norm": 0.3728393316268921, "learning_rate": 9.835577179020054e-06, "loss": 0.0446, "step": 35208 }, { "epoch": 0.6235350892339298, "grad_norm": 0.5242362022399902, "learning_rate": 9.834769424255269e-06, "loss": 0.0604, "step": 35209 }, { "epoch": 0.6235527987709581, "grad_norm": 0.45153459906578064, "learning_rate": 9.833961686483572e-06, "loss": 0.0466, "step": 35210 }, { "epoch": 0.6235705083079865, "grad_norm": 0.41290581226348877, "learning_rate": 9.833153965707624e-06, "loss": 0.061, "step": 35211 }, { "epoch": 0.623588217845015, "grad_norm": 0.4768160879611969, "learning_rate": 9.832346261930068e-06, "loss": 0.0463, "step": 35212 }, { "epoch": 0.6236059273820435, "grad_norm": 0.5594528913497925, "learning_rate": 9.831538575153574e-06, "loss": 0.0684, "step": 35213 }, { "epoch": 0.6236236369190719, "grad_norm": 0.681472659111023, "learning_rate": 9.830730905380792e-06, "loss": 0.0532, "step": 35214 }, { "epoch": 0.6236413464561003, "grad_norm": 0.8478026986122131, "learning_rate": 9.829923252614392e-06, "loss": 0.0779, "step": 35215 }, { "epoch": 0.6236590559931287, "grad_norm": 0.7954598069190979, "learning_rate": 9.829115616857016e-06, "loss": 0.0707, "step": 35216 }, { "epoch": 0.6236767655301572, "grad_norm": 0.8730550408363342, "learning_rate": 9.828307998111322e-06, "loss": 0.0581, "step": 35217 }, { "epoch": 0.6236944750671856, "grad_norm": 0.7186259031295776, "learning_rate": 9.827500396379973e-06, "loss": 0.0804, "step": 35218 }, { "epoch": 0.623712184604214, "grad_norm": 0.5563880801200867, "learning_rate": 9.82669281166563e-06, "loss": 0.0291, "step": 35219 }, { "epoch": 0.6237298941412424, "grad_norm": 0.4601496160030365, "learning_rate": 9.825885243970941e-06, "loss": 0.0504, "step": 35220 }, { "epoch": 0.6237476036782709, "grad_norm": 0.6342894434928894, "learning_rate": 9.825077693298565e-06, "loss": 0.0634, "step": 35221 }, { "epoch": 0.6237653132152993, "grad_norm": 0.6776926517486572, "learning_rate": 9.824270159651163e-06, "loss": 0.0431, "step": 35222 }, { "epoch": 0.6237830227523277, "grad_norm": 0.8822306990623474, "learning_rate": 9.823462643031389e-06, "loss": 0.0565, "step": 35223 }, { "epoch": 0.6238007322893562, "grad_norm": 0.567213773727417, "learning_rate": 9.822655143441896e-06, "loss": 0.0583, "step": 35224 }, { "epoch": 0.6238184418263846, "grad_norm": 0.45023012161254883, "learning_rate": 9.821847660885344e-06, "loss": 0.0516, "step": 35225 }, { "epoch": 0.623836151363413, "grad_norm": 0.6352776885032654, "learning_rate": 9.821040195364396e-06, "loss": 0.0706, "step": 35226 }, { "epoch": 0.6238538609004414, "grad_norm": 0.7059202194213867, "learning_rate": 9.820232746881693e-06, "loss": 0.088, "step": 35227 }, { "epoch": 0.6238715704374699, "grad_norm": 0.7275024056434631, "learning_rate": 9.819425315439904e-06, "loss": 0.0759, "step": 35228 }, { "epoch": 0.6238892799744983, "grad_norm": 0.5851916670799255, "learning_rate": 9.818617901041688e-06, "loss": 0.0557, "step": 35229 }, { "epoch": 0.6239069895115267, "grad_norm": 0.5517730712890625, "learning_rate": 9.817810503689689e-06, "loss": 0.048, "step": 35230 }, { "epoch": 0.6239246990485551, "grad_norm": 0.5533100366592407, "learning_rate": 9.817003123386565e-06, "loss": 0.0895, "step": 35231 }, { "epoch": 0.6239424085855836, "grad_norm": 0.8455343246459961, "learning_rate": 9.81619576013498e-06, "loss": 0.0808, "step": 35232 }, { "epoch": 0.623960118122612, "grad_norm": 0.848921000957489, "learning_rate": 9.815388413937591e-06, "loss": 0.0734, "step": 35233 }, { "epoch": 0.6239778276596404, "grad_norm": 0.6983790993690491, "learning_rate": 9.814581084797045e-06, "loss": 0.076, "step": 35234 }, { "epoch": 0.6239955371966688, "grad_norm": 0.581500768661499, "learning_rate": 9.813773772716003e-06, "loss": 0.0768, "step": 35235 }, { "epoch": 0.6240132467336973, "grad_norm": 0.5653232336044312, "learning_rate": 9.812966477697124e-06, "loss": 0.0567, "step": 35236 }, { "epoch": 0.6240309562707257, "grad_norm": 0.5076503157615662, "learning_rate": 9.812159199743058e-06, "loss": 0.0601, "step": 35237 }, { "epoch": 0.6240486658077541, "grad_norm": 0.7432394623756409, "learning_rate": 9.811351938856462e-06, "loss": 0.0772, "step": 35238 }, { "epoch": 0.6240663753447826, "grad_norm": 1.154262900352478, "learning_rate": 9.810544695039994e-06, "loss": 0.0663, "step": 35239 }, { "epoch": 0.624084084881811, "grad_norm": 0.6189512610435486, "learning_rate": 9.809737468296315e-06, "loss": 0.0707, "step": 35240 }, { "epoch": 0.6241017944188394, "grad_norm": 0.45174092054367065, "learning_rate": 9.808930258628067e-06, "loss": 0.0801, "step": 35241 }, { "epoch": 0.6241195039558678, "grad_norm": 0.6434615254402161, "learning_rate": 9.808123066037914e-06, "loss": 0.0579, "step": 35242 }, { "epoch": 0.6241372134928963, "grad_norm": 0.4753570854663849, "learning_rate": 9.807315890528519e-06, "loss": 0.0619, "step": 35243 }, { "epoch": 0.6241549230299247, "grad_norm": 0.8539451956748962, "learning_rate": 9.806508732102523e-06, "loss": 0.0551, "step": 35244 }, { "epoch": 0.6241726325669531, "grad_norm": 0.4654093086719513, "learning_rate": 9.80570159076259e-06, "loss": 0.0299, "step": 35245 }, { "epoch": 0.6241903421039815, "grad_norm": 0.38080212473869324, "learning_rate": 9.804894466511368e-06, "loss": 0.0517, "step": 35246 }, { "epoch": 0.62420805164101, "grad_norm": 0.6803918480873108, "learning_rate": 9.804087359351525e-06, "loss": 0.054, "step": 35247 }, { "epoch": 0.6242257611780384, "grad_norm": 0.7033998370170593, "learning_rate": 9.803280269285706e-06, "loss": 0.0559, "step": 35248 }, { "epoch": 0.6242434707150668, "grad_norm": 0.8805202841758728, "learning_rate": 9.802473196316566e-06, "loss": 0.0622, "step": 35249 }, { "epoch": 0.6242611802520952, "grad_norm": 0.4041370451450348, "learning_rate": 9.801666140446767e-06, "loss": 0.0671, "step": 35250 }, { "epoch": 0.6242788897891237, "grad_norm": 0.6977158188819885, "learning_rate": 9.800859101678962e-06, "loss": 0.054, "step": 35251 }, { "epoch": 0.6242965993261521, "grad_norm": 0.4927828311920166, "learning_rate": 9.800052080015804e-06, "loss": 0.0418, "step": 35252 }, { "epoch": 0.6243143088631805, "grad_norm": 0.6534698605537415, "learning_rate": 9.799245075459947e-06, "loss": 0.0676, "step": 35253 }, { "epoch": 0.624332018400209, "grad_norm": 0.7528648376464844, "learning_rate": 9.798438088014052e-06, "loss": 0.0982, "step": 35254 }, { "epoch": 0.6243497279372374, "grad_norm": 0.4681549668312073, "learning_rate": 9.797631117680766e-06, "loss": 0.0464, "step": 35255 }, { "epoch": 0.6243674374742658, "grad_norm": 0.9615411162376404, "learning_rate": 9.796824164462743e-06, "loss": 0.0591, "step": 35256 }, { "epoch": 0.6243851470112942, "grad_norm": 0.7159672379493713, "learning_rate": 9.796017228362646e-06, "loss": 0.0734, "step": 35257 }, { "epoch": 0.6244028565483227, "grad_norm": 0.659201979637146, "learning_rate": 9.795210309383132e-06, "loss": 0.0849, "step": 35258 }, { "epoch": 0.6244205660853511, "grad_norm": 0.5734401941299438, "learning_rate": 9.794403407526843e-06, "loss": 0.0542, "step": 35259 }, { "epoch": 0.6244382756223795, "grad_norm": 0.4024903178215027, "learning_rate": 9.793596522796443e-06, "loss": 0.0646, "step": 35260 }, { "epoch": 0.6244559851594079, "grad_norm": 0.7509014010429382, "learning_rate": 9.792789655194585e-06, "loss": 0.0681, "step": 35261 }, { "epoch": 0.6244736946964364, "grad_norm": 0.6092606782913208, "learning_rate": 9.791982804723919e-06, "loss": 0.0992, "step": 35262 }, { "epoch": 0.6244914042334648, "grad_norm": 0.3944338262081146, "learning_rate": 9.791175971387101e-06, "loss": 0.0323, "step": 35263 }, { "epoch": 0.6245091137704932, "grad_norm": 1.1819082498550415, "learning_rate": 9.79036915518679e-06, "loss": 0.1068, "step": 35264 }, { "epoch": 0.6245268233075216, "grad_norm": 0.906356692314148, "learning_rate": 9.789562356125638e-06, "loss": 0.0805, "step": 35265 }, { "epoch": 0.6245445328445501, "grad_norm": 0.7352688908576965, "learning_rate": 9.788755574206298e-06, "loss": 0.0691, "step": 35266 }, { "epoch": 0.6245622423815785, "grad_norm": 0.34371060132980347, "learning_rate": 9.787948809431425e-06, "loss": 0.0437, "step": 35267 }, { "epoch": 0.6245799519186069, "grad_norm": 1.1088614463806152, "learning_rate": 9.787142061803679e-06, "loss": 0.0821, "step": 35268 }, { "epoch": 0.6245976614556354, "grad_norm": 0.6969180703163147, "learning_rate": 9.786335331325701e-06, "loss": 0.0666, "step": 35269 }, { "epoch": 0.6246153709926638, "grad_norm": 0.48244911432266235, "learning_rate": 9.78552861800015e-06, "loss": 0.063, "step": 35270 }, { "epoch": 0.6246330805296922, "grad_norm": 0.5021325945854187, "learning_rate": 9.784721921829686e-06, "loss": 0.0611, "step": 35271 }, { "epoch": 0.6246507900667206, "grad_norm": 0.36519792675971985, "learning_rate": 9.783915242816967e-06, "loss": 0.0619, "step": 35272 }, { "epoch": 0.6246684996037491, "grad_norm": 0.6541712284088135, "learning_rate": 9.783108580964631e-06, "loss": 0.0929, "step": 35273 }, { "epoch": 0.6246862091407775, "grad_norm": 0.6246352791786194, "learning_rate": 9.782301936275341e-06, "loss": 0.0662, "step": 35274 }, { "epoch": 0.624703918677806, "grad_norm": 0.34848079085350037, "learning_rate": 9.781495308751752e-06, "loss": 0.0546, "step": 35275 }, { "epoch": 0.6247216282148343, "grad_norm": 0.5120890736579895, "learning_rate": 9.780688698396514e-06, "loss": 0.0472, "step": 35276 }, { "epoch": 0.6247393377518629, "grad_norm": 0.7144238352775574, "learning_rate": 9.77988210521228e-06, "loss": 0.0865, "step": 35277 }, { "epoch": 0.6247570472888913, "grad_norm": 0.25799310207366943, "learning_rate": 9.779075529201709e-06, "loss": 0.0478, "step": 35278 }, { "epoch": 0.6247747568259197, "grad_norm": 0.7225110530853271, "learning_rate": 9.778268970367451e-06, "loss": 0.0611, "step": 35279 }, { "epoch": 0.624792466362948, "grad_norm": 0.6047430038452148, "learning_rate": 9.777462428712159e-06, "loss": 0.1075, "step": 35280 }, { "epoch": 0.6248101758999766, "grad_norm": 0.31962651014328003, "learning_rate": 9.776655904238489e-06, "loss": 0.0648, "step": 35281 }, { "epoch": 0.624827885437005, "grad_norm": 0.6609823703765869, "learning_rate": 9.775849396949095e-06, "loss": 0.0796, "step": 35282 }, { "epoch": 0.6248455949740334, "grad_norm": 0.6429893374443054, "learning_rate": 9.775042906846625e-06, "loss": 0.0612, "step": 35283 }, { "epoch": 0.6248633045110619, "grad_norm": 0.6759019494056702, "learning_rate": 9.774236433933734e-06, "loss": 0.0602, "step": 35284 }, { "epoch": 0.6248810140480903, "grad_norm": 0.9627147316932678, "learning_rate": 9.773429978213075e-06, "loss": 0.0743, "step": 35285 }, { "epoch": 0.6248987235851187, "grad_norm": 0.8542748689651489, "learning_rate": 9.77262353968731e-06, "loss": 0.0529, "step": 35286 }, { "epoch": 0.6249164331221471, "grad_norm": 0.5368475317955017, "learning_rate": 9.771817118359078e-06, "loss": 0.0586, "step": 35287 }, { "epoch": 0.6249341426591756, "grad_norm": 0.5075568556785583, "learning_rate": 9.771010714231042e-06, "loss": 0.0729, "step": 35288 }, { "epoch": 0.624951852196204, "grad_norm": 0.47784921526908875, "learning_rate": 9.770204327305848e-06, "loss": 0.0619, "step": 35289 }, { "epoch": 0.6249695617332324, "grad_norm": 0.6986640691757202, "learning_rate": 9.76939795758616e-06, "loss": 0.0837, "step": 35290 }, { "epoch": 0.6249872712702608, "grad_norm": 0.703854501247406, "learning_rate": 9.76859160507462e-06, "loss": 0.0746, "step": 35291 }, { "epoch": 0.6250049808072893, "grad_norm": 0.42962634563446045, "learning_rate": 9.767785269773882e-06, "loss": 0.0696, "step": 35292 }, { "epoch": 0.6250226903443177, "grad_norm": 1.162939190864563, "learning_rate": 9.766978951686605e-06, "loss": 0.0716, "step": 35293 }, { "epoch": 0.6250403998813461, "grad_norm": 0.5008141994476318, "learning_rate": 9.766172650815436e-06, "loss": 0.0593, "step": 35294 }, { "epoch": 0.6250581094183746, "grad_norm": 0.6385414600372314, "learning_rate": 9.765366367163028e-06, "loss": 0.0481, "step": 35295 }, { "epoch": 0.625075818955403, "grad_norm": 0.5367231965065002, "learning_rate": 9.764560100732037e-06, "loss": 0.0684, "step": 35296 }, { "epoch": 0.6250935284924314, "grad_norm": 0.5578699707984924, "learning_rate": 9.763753851525118e-06, "loss": 0.0658, "step": 35297 }, { "epoch": 0.6251112380294598, "grad_norm": 0.6388052105903625, "learning_rate": 9.762947619544913e-06, "loss": 0.0339, "step": 35298 }, { "epoch": 0.6251289475664883, "grad_norm": 0.8664582371711731, "learning_rate": 9.762141404794082e-06, "loss": 0.0771, "step": 35299 }, { "epoch": 0.6251466571035167, "grad_norm": 0.8018531799316406, "learning_rate": 9.761335207275281e-06, "loss": 0.1204, "step": 35300 }, { "epoch": 0.6251643666405451, "grad_norm": 0.5935377478599548, "learning_rate": 9.760529026991152e-06, "loss": 0.0802, "step": 35301 }, { "epoch": 0.6251820761775735, "grad_norm": 0.6575705409049988, "learning_rate": 9.759722863944354e-06, "loss": 0.0602, "step": 35302 }, { "epoch": 0.625199785714602, "grad_norm": 1.0975334644317627, "learning_rate": 9.758916718137534e-06, "loss": 0.0787, "step": 35303 }, { "epoch": 0.6252174952516304, "grad_norm": 0.7529339790344238, "learning_rate": 9.758110589573354e-06, "loss": 0.0505, "step": 35304 }, { "epoch": 0.6252352047886588, "grad_norm": 0.4821515679359436, "learning_rate": 9.757304478254458e-06, "loss": 0.0447, "step": 35305 }, { "epoch": 0.6252529143256872, "grad_norm": 0.6345432996749878, "learning_rate": 9.756498384183498e-06, "loss": 0.0566, "step": 35306 }, { "epoch": 0.6252706238627157, "grad_norm": 1.2696850299835205, "learning_rate": 9.75569230736313e-06, "loss": 0.0472, "step": 35307 }, { "epoch": 0.6252883333997441, "grad_norm": 0.8398371934890747, "learning_rate": 9.754886247796003e-06, "loss": 0.061, "step": 35308 }, { "epoch": 0.6253060429367725, "grad_norm": 0.5269546508789062, "learning_rate": 9.75408020548477e-06, "loss": 0.0605, "step": 35309 }, { "epoch": 0.625323752473801, "grad_norm": 0.45074716210365295, "learning_rate": 9.75327418043208e-06, "loss": 0.0879, "step": 35310 }, { "epoch": 0.6253414620108294, "grad_norm": 0.5831876993179321, "learning_rate": 9.752468172640594e-06, "loss": 0.0849, "step": 35311 }, { "epoch": 0.6253591715478578, "grad_norm": 0.29785066843032837, "learning_rate": 9.751662182112952e-06, "loss": 0.0448, "step": 35312 }, { "epoch": 0.6253768810848862, "grad_norm": 0.65802401304245, "learning_rate": 9.75085620885181e-06, "loss": 0.0957, "step": 35313 }, { "epoch": 0.6253945906219147, "grad_norm": 0.4935419261455536, "learning_rate": 9.750050252859824e-06, "loss": 0.028, "step": 35314 }, { "epoch": 0.6254123001589431, "grad_norm": 0.5554051995277405, "learning_rate": 9.749244314139638e-06, "loss": 0.0667, "step": 35315 }, { "epoch": 0.6254300096959715, "grad_norm": 0.6399857997894287, "learning_rate": 9.748438392693907e-06, "loss": 0.0484, "step": 35316 }, { "epoch": 0.6254477192329999, "grad_norm": 0.3200097382068634, "learning_rate": 9.747632488525284e-06, "loss": 0.0597, "step": 35317 }, { "epoch": 0.6254654287700284, "grad_norm": 0.8924017548561096, "learning_rate": 9.74682660163642e-06, "loss": 0.0891, "step": 35318 }, { "epoch": 0.6254831383070568, "grad_norm": 0.9154743552207947, "learning_rate": 9.746020732029962e-06, "loss": 0.1168, "step": 35319 }, { "epoch": 0.6255008478440852, "grad_norm": 0.4530530273914337, "learning_rate": 9.745214879708566e-06, "loss": 0.0417, "step": 35320 }, { "epoch": 0.6255185573811136, "grad_norm": 0.4924948513507843, "learning_rate": 9.744409044674882e-06, "loss": 0.0503, "step": 35321 }, { "epoch": 0.6255362669181421, "grad_norm": 0.594126284122467, "learning_rate": 9.74360322693156e-06, "loss": 0.0787, "step": 35322 }, { "epoch": 0.6255539764551705, "grad_norm": 0.40037640929222107, "learning_rate": 9.742797426481253e-06, "loss": 0.0861, "step": 35323 }, { "epoch": 0.6255716859921989, "grad_norm": 0.5412682890892029, "learning_rate": 9.741991643326607e-06, "loss": 0.0445, "step": 35324 }, { "epoch": 0.6255893955292274, "grad_norm": 0.6020652055740356, "learning_rate": 9.741185877470285e-06, "loss": 0.0623, "step": 35325 }, { "epoch": 0.6256071050662558, "grad_norm": 0.5472022294998169, "learning_rate": 9.740380128914924e-06, "loss": 0.0491, "step": 35326 }, { "epoch": 0.6256248146032842, "grad_norm": 0.4017689824104309, "learning_rate": 9.739574397663178e-06, "loss": 0.0489, "step": 35327 }, { "epoch": 0.6256425241403126, "grad_norm": 0.7642900347709656, "learning_rate": 9.738768683717698e-06, "loss": 0.0662, "step": 35328 }, { "epoch": 0.6256602336773411, "grad_norm": 0.6328514218330383, "learning_rate": 9.737962987081148e-06, "loss": 0.0831, "step": 35329 }, { "epoch": 0.6256779432143695, "grad_norm": 0.67223060131073, "learning_rate": 9.73715730775616e-06, "loss": 0.0621, "step": 35330 }, { "epoch": 0.6256956527513979, "grad_norm": 0.5327444076538086, "learning_rate": 9.736351645745393e-06, "loss": 0.0741, "step": 35331 }, { "epoch": 0.6257133622884263, "grad_norm": 0.685499906539917, "learning_rate": 9.7355460010515e-06, "loss": 0.0481, "step": 35332 }, { "epoch": 0.6257310718254548, "grad_norm": 0.30029287934303284, "learning_rate": 9.734740373677124e-06, "loss": 0.0607, "step": 35333 }, { "epoch": 0.6257487813624832, "grad_norm": 0.6673424243927002, "learning_rate": 9.733934763624918e-06, "loss": 0.065, "step": 35334 }, { "epoch": 0.6257664908995116, "grad_norm": 0.6447660326957703, "learning_rate": 9.733129170897536e-06, "loss": 0.0676, "step": 35335 }, { "epoch": 0.62578420043654, "grad_norm": 0.8143213987350464, "learning_rate": 9.73232359549763e-06, "loss": 0.0644, "step": 35336 }, { "epoch": 0.6258019099735685, "grad_norm": 0.8348807692527771, "learning_rate": 9.731518037427842e-06, "loss": 0.059, "step": 35337 }, { "epoch": 0.625819619510597, "grad_norm": 0.5690037608146667, "learning_rate": 9.730712496690828e-06, "loss": 0.0646, "step": 35338 }, { "epoch": 0.6258373290476253, "grad_norm": 0.5848604440689087, "learning_rate": 9.729906973289242e-06, "loss": 0.0439, "step": 35339 }, { "epoch": 0.6258550385846539, "grad_norm": 0.5730122327804565, "learning_rate": 9.729101467225723e-06, "loss": 0.072, "step": 35340 }, { "epoch": 0.6258727481216823, "grad_norm": 0.38990920782089233, "learning_rate": 9.728295978502928e-06, "loss": 0.0385, "step": 35341 }, { "epoch": 0.6258904576587107, "grad_norm": 0.5430408120155334, "learning_rate": 9.727490507123505e-06, "loss": 0.0894, "step": 35342 }, { "epoch": 0.625908167195739, "grad_norm": 0.9336557984352112, "learning_rate": 9.726685053090108e-06, "loss": 0.1009, "step": 35343 }, { "epoch": 0.6259258767327676, "grad_norm": 0.4088376760482788, "learning_rate": 9.725879616405382e-06, "loss": 0.0495, "step": 35344 }, { "epoch": 0.625943586269796, "grad_norm": 0.7715663909912109, "learning_rate": 9.725074197071978e-06, "loss": 0.0568, "step": 35345 }, { "epoch": 0.6259612958068244, "grad_norm": 0.45672962069511414, "learning_rate": 9.724268795092548e-06, "loss": 0.0457, "step": 35346 }, { "epoch": 0.6259790053438528, "grad_norm": 0.33022087812423706, "learning_rate": 9.72346341046974e-06, "loss": 0.0702, "step": 35347 }, { "epoch": 0.6259967148808813, "grad_norm": 0.5908503532409668, "learning_rate": 9.722658043206201e-06, "loss": 0.0945, "step": 35348 }, { "epoch": 0.6260144244179097, "grad_norm": 0.5257279276847839, "learning_rate": 9.721852693304583e-06, "loss": 0.0498, "step": 35349 }, { "epoch": 0.6260321339549381, "grad_norm": 0.8818819522857666, "learning_rate": 9.721047360767544e-06, "loss": 0.0819, "step": 35350 }, { "epoch": 0.6260498434919665, "grad_norm": 0.34034982323646545, "learning_rate": 9.720242045597714e-06, "loss": 0.0313, "step": 35351 }, { "epoch": 0.626067553028995, "grad_norm": 0.6761583089828491, "learning_rate": 9.71943674779776e-06, "loss": 0.0307, "step": 35352 }, { "epoch": 0.6260852625660234, "grad_norm": 0.9425426125526428, "learning_rate": 9.718631467370327e-06, "loss": 0.0784, "step": 35353 }, { "epoch": 0.6261029721030518, "grad_norm": 0.7120833992958069, "learning_rate": 9.71782620431806e-06, "loss": 0.0607, "step": 35354 }, { "epoch": 0.6261206816400803, "grad_norm": 0.7082605361938477, "learning_rate": 9.717020958643607e-06, "loss": 0.0806, "step": 35355 }, { "epoch": 0.6261383911771087, "grad_norm": 0.5474001169204712, "learning_rate": 9.716215730349623e-06, "loss": 0.0514, "step": 35356 }, { "epoch": 0.6261561007141371, "grad_norm": 0.3175923824310303, "learning_rate": 9.715410519438758e-06, "loss": 0.0505, "step": 35357 }, { "epoch": 0.6261738102511655, "grad_norm": 0.47605252265930176, "learning_rate": 9.714605325913654e-06, "loss": 0.0477, "step": 35358 }, { "epoch": 0.626191519788194, "grad_norm": 0.936284065246582, "learning_rate": 9.713800149776962e-06, "loss": 0.0769, "step": 35359 }, { "epoch": 0.6262092293252224, "grad_norm": 0.5545850992202759, "learning_rate": 9.712994991031337e-06, "loss": 0.0723, "step": 35360 }, { "epoch": 0.6262269388622508, "grad_norm": 0.5646087527275085, "learning_rate": 9.712189849679424e-06, "loss": 0.0493, "step": 35361 }, { "epoch": 0.6262446483992792, "grad_norm": 0.5563259124755859, "learning_rate": 9.711384725723868e-06, "loss": 0.0501, "step": 35362 }, { "epoch": 0.6262623579363077, "grad_norm": 0.4718778133392334, "learning_rate": 9.710579619167323e-06, "loss": 0.0556, "step": 35363 }, { "epoch": 0.6262800674733361, "grad_norm": 0.543913722038269, "learning_rate": 9.709774530012442e-06, "loss": 0.0643, "step": 35364 }, { "epoch": 0.6262977770103645, "grad_norm": 0.608750581741333, "learning_rate": 9.708969458261858e-06, "loss": 0.0592, "step": 35365 }, { "epoch": 0.6263154865473929, "grad_norm": 0.4610922336578369, "learning_rate": 9.708164403918233e-06, "loss": 0.0457, "step": 35366 }, { "epoch": 0.6263331960844214, "grad_norm": 0.5483994483947754, "learning_rate": 9.707359366984211e-06, "loss": 0.073, "step": 35367 }, { "epoch": 0.6263509056214498, "grad_norm": 0.7153577208518982, "learning_rate": 9.706554347462448e-06, "loss": 0.0565, "step": 35368 }, { "epoch": 0.6263686151584782, "grad_norm": 0.5278054475784302, "learning_rate": 9.70574934535558e-06, "loss": 0.0534, "step": 35369 }, { "epoch": 0.6263863246955067, "grad_norm": 0.4995473027229309, "learning_rate": 9.704944360666261e-06, "loss": 0.0646, "step": 35370 }, { "epoch": 0.6264040342325351, "grad_norm": 0.612983226776123, "learning_rate": 9.704139393397142e-06, "loss": 0.1282, "step": 35371 }, { "epoch": 0.6264217437695635, "grad_norm": 0.5191372632980347, "learning_rate": 9.703334443550867e-06, "loss": 0.0632, "step": 35372 }, { "epoch": 0.6264394533065919, "grad_norm": 0.4811992347240448, "learning_rate": 9.702529511130084e-06, "loss": 0.0532, "step": 35373 }, { "epoch": 0.6264571628436204, "grad_norm": 0.47950059175491333, "learning_rate": 9.701724596137444e-06, "loss": 0.0576, "step": 35374 }, { "epoch": 0.6264748723806488, "grad_norm": 0.3292519152164459, "learning_rate": 9.700919698575598e-06, "loss": 0.0405, "step": 35375 }, { "epoch": 0.6264925819176772, "grad_norm": 0.75771164894104, "learning_rate": 9.700114818447187e-06, "loss": 0.0775, "step": 35376 }, { "epoch": 0.6265102914547056, "grad_norm": 0.7100824117660522, "learning_rate": 9.69930995575486e-06, "loss": 0.06, "step": 35377 }, { "epoch": 0.6265280009917341, "grad_norm": 0.1707240343093872, "learning_rate": 9.698505110501276e-06, "loss": 0.0371, "step": 35378 }, { "epoch": 0.6265457105287625, "grad_norm": 0.5375038385391235, "learning_rate": 9.697700282689066e-06, "loss": 0.0683, "step": 35379 }, { "epoch": 0.6265634200657909, "grad_norm": 0.4789568781852722, "learning_rate": 9.696895472320884e-06, "loss": 0.0865, "step": 35380 }, { "epoch": 0.6265811296028193, "grad_norm": 0.24697622656822205, "learning_rate": 9.69609067939938e-06, "loss": 0.0373, "step": 35381 }, { "epoch": 0.6265988391398478, "grad_norm": 0.8239633440971375, "learning_rate": 9.695285903927211e-06, "loss": 0.0792, "step": 35382 }, { "epoch": 0.6266165486768762, "grad_norm": 0.5862744450569153, "learning_rate": 9.694481145907009e-06, "loss": 0.0687, "step": 35383 }, { "epoch": 0.6266342582139046, "grad_norm": 0.5193703770637512, "learning_rate": 9.693676405341423e-06, "loss": 0.0461, "step": 35384 }, { "epoch": 0.6266519677509331, "grad_norm": 0.9008292555809021, "learning_rate": 9.692871682233111e-06, "loss": 0.1053, "step": 35385 }, { "epoch": 0.6266696772879615, "grad_norm": 0.5438066720962524, "learning_rate": 9.69206697658471e-06, "loss": 0.069, "step": 35386 }, { "epoch": 0.6266873868249899, "grad_norm": 0.747085452079773, "learning_rate": 9.691262288398873e-06, "loss": 0.0533, "step": 35387 }, { "epoch": 0.6267050963620183, "grad_norm": 0.5985802412033081, "learning_rate": 9.690457617678247e-06, "loss": 0.0586, "step": 35388 }, { "epoch": 0.6267228058990468, "grad_norm": 0.6941965222358704, "learning_rate": 9.68965296442548e-06, "loss": 0.0649, "step": 35389 }, { "epoch": 0.6267405154360752, "grad_norm": 0.2710466682910919, "learning_rate": 9.688848328643214e-06, "loss": 0.0609, "step": 35390 }, { "epoch": 0.6267582249731036, "grad_norm": 0.591547429561615, "learning_rate": 9.688043710334102e-06, "loss": 0.0651, "step": 35391 }, { "epoch": 0.626775934510132, "grad_norm": 0.7478524446487427, "learning_rate": 9.687239109500787e-06, "loss": 0.0651, "step": 35392 }, { "epoch": 0.6267936440471605, "grad_norm": 0.43328985571861267, "learning_rate": 9.686434526145924e-06, "loss": 0.0546, "step": 35393 }, { "epoch": 0.6268113535841889, "grad_norm": 0.4655524790287018, "learning_rate": 9.685629960272147e-06, "loss": 0.0606, "step": 35394 }, { "epoch": 0.6268290631212173, "grad_norm": 0.34850528836250305, "learning_rate": 9.68482541188211e-06, "loss": 0.068, "step": 35395 }, { "epoch": 0.6268467726582457, "grad_norm": 0.45552921295166016, "learning_rate": 9.684020880978467e-06, "loss": 0.051, "step": 35396 }, { "epoch": 0.6268644821952742, "grad_norm": 0.3991921544075012, "learning_rate": 9.683216367563852e-06, "loss": 0.0583, "step": 35397 }, { "epoch": 0.6268821917323026, "grad_norm": 0.28318101167678833, "learning_rate": 9.682411871640917e-06, "loss": 0.0525, "step": 35398 }, { "epoch": 0.626899901269331, "grad_norm": 0.7976127862930298, "learning_rate": 9.68160739321231e-06, "loss": 0.0708, "step": 35399 }, { "epoch": 0.6269176108063595, "grad_norm": 0.3736304044723511, "learning_rate": 9.68080293228068e-06, "loss": 0.0497, "step": 35400 }, { "epoch": 0.626935320343388, "grad_norm": 0.5679295063018799, "learning_rate": 9.679998488848665e-06, "loss": 0.0648, "step": 35401 }, { "epoch": 0.6269530298804163, "grad_norm": 0.40860098600387573, "learning_rate": 9.679194062918918e-06, "loss": 0.0743, "step": 35402 }, { "epoch": 0.6269707394174447, "grad_norm": 0.934180736541748, "learning_rate": 9.678389654494088e-06, "loss": 0.0512, "step": 35403 }, { "epoch": 0.6269884489544733, "grad_norm": 0.8932439684867859, "learning_rate": 9.677585263576817e-06, "loss": 0.0845, "step": 35404 }, { "epoch": 0.6270061584915017, "grad_norm": 0.7533368468284607, "learning_rate": 9.676780890169747e-06, "loss": 0.0931, "step": 35405 }, { "epoch": 0.62702386802853, "grad_norm": 0.8315326571464539, "learning_rate": 9.675976534275533e-06, "loss": 0.0542, "step": 35406 }, { "epoch": 0.6270415775655585, "grad_norm": 0.839937150478363, "learning_rate": 9.675172195896822e-06, "loss": 0.0594, "step": 35407 }, { "epoch": 0.627059287102587, "grad_norm": 0.4716937839984894, "learning_rate": 9.674367875036251e-06, "loss": 0.0457, "step": 35408 }, { "epoch": 0.6270769966396154, "grad_norm": 0.5073274970054626, "learning_rate": 9.673563571696467e-06, "loss": 0.0626, "step": 35409 }, { "epoch": 0.6270947061766438, "grad_norm": 0.30679839849472046, "learning_rate": 9.672759285880131e-06, "loss": 0.0498, "step": 35410 }, { "epoch": 0.6271124157136722, "grad_norm": 0.755764365196228, "learning_rate": 9.671955017589871e-06, "loss": 0.0569, "step": 35411 }, { "epoch": 0.6271301252507007, "grad_norm": 0.6089417338371277, "learning_rate": 9.671150766828339e-06, "loss": 0.0498, "step": 35412 }, { "epoch": 0.6271478347877291, "grad_norm": 0.6480026245117188, "learning_rate": 9.670346533598184e-06, "loss": 0.0456, "step": 35413 }, { "epoch": 0.6271655443247575, "grad_norm": 0.6514377593994141, "learning_rate": 9.66954231790205e-06, "loss": 0.0486, "step": 35414 }, { "epoch": 0.627183253861786, "grad_norm": 0.6616454124450684, "learning_rate": 9.668738119742583e-06, "loss": 0.0881, "step": 35415 }, { "epoch": 0.6272009633988144, "grad_norm": 0.7527542114257812, "learning_rate": 9.667933939122427e-06, "loss": 0.0708, "step": 35416 }, { "epoch": 0.6272186729358428, "grad_norm": 0.6427138447761536, "learning_rate": 9.667129776044233e-06, "loss": 0.0436, "step": 35417 }, { "epoch": 0.6272363824728712, "grad_norm": 0.8169805407524109, "learning_rate": 9.666325630510637e-06, "loss": 0.0757, "step": 35418 }, { "epoch": 0.6272540920098997, "grad_norm": 1.0982099771499634, "learning_rate": 9.665521502524293e-06, "loss": 0.0834, "step": 35419 }, { "epoch": 0.6272718015469281, "grad_norm": 0.3178197145462036, "learning_rate": 9.664717392087842e-06, "loss": 0.0388, "step": 35420 }, { "epoch": 0.6272895110839565, "grad_norm": 0.644802987575531, "learning_rate": 9.663913299203938e-06, "loss": 0.044, "step": 35421 }, { "epoch": 0.6273072206209849, "grad_norm": 0.35934364795684814, "learning_rate": 9.663109223875213e-06, "loss": 0.05, "step": 35422 }, { "epoch": 0.6273249301580134, "grad_norm": 0.8616815209388733, "learning_rate": 9.662305166104316e-06, "loss": 0.0755, "step": 35423 }, { "epoch": 0.6273426396950418, "grad_norm": 0.4823758900165558, "learning_rate": 9.661501125893897e-06, "loss": 0.0523, "step": 35424 }, { "epoch": 0.6273603492320702, "grad_norm": 0.6080117225646973, "learning_rate": 9.660697103246607e-06, "loss": 0.0634, "step": 35425 }, { "epoch": 0.6273780587690986, "grad_norm": 1.1041470766067505, "learning_rate": 9.659893098165077e-06, "loss": 0.082, "step": 35426 }, { "epoch": 0.6273957683061271, "grad_norm": 0.5659698843955994, "learning_rate": 9.65908911065196e-06, "loss": 0.0632, "step": 35427 }, { "epoch": 0.6274134778431555, "grad_norm": 0.5421206951141357, "learning_rate": 9.658285140709899e-06, "loss": 0.083, "step": 35428 }, { "epoch": 0.6274311873801839, "grad_norm": 1.1081689596176147, "learning_rate": 9.65748118834154e-06, "loss": 0.0777, "step": 35429 }, { "epoch": 0.6274488969172124, "grad_norm": 0.6631150245666504, "learning_rate": 9.656677253549525e-06, "loss": 0.0614, "step": 35430 }, { "epoch": 0.6274666064542408, "grad_norm": 0.6887684464454651, "learning_rate": 9.655873336336505e-06, "loss": 0.0601, "step": 35431 }, { "epoch": 0.6274843159912692, "grad_norm": 0.730190634727478, "learning_rate": 9.655069436705123e-06, "loss": 0.072, "step": 35432 }, { "epoch": 0.6275020255282976, "grad_norm": 0.38556158542633057, "learning_rate": 9.654265554658015e-06, "loss": 0.0384, "step": 35433 }, { "epoch": 0.6275197350653261, "grad_norm": 0.602046549320221, "learning_rate": 9.653461690197836e-06, "loss": 0.0405, "step": 35434 }, { "epoch": 0.6275374446023545, "grad_norm": 0.6779980063438416, "learning_rate": 9.652657843327231e-06, "loss": 0.0841, "step": 35435 }, { "epoch": 0.6275551541393829, "grad_norm": 0.7566159963607788, "learning_rate": 9.651854014048838e-06, "loss": 0.0627, "step": 35436 }, { "epoch": 0.6275728636764113, "grad_norm": 0.3122369050979614, "learning_rate": 9.651050202365304e-06, "loss": 0.0723, "step": 35437 }, { "epoch": 0.6275905732134398, "grad_norm": 0.9020295143127441, "learning_rate": 9.65024640827927e-06, "loss": 0.0708, "step": 35438 }, { "epoch": 0.6276082827504682, "grad_norm": 0.5630890130996704, "learning_rate": 9.64944263179339e-06, "loss": 0.1059, "step": 35439 }, { "epoch": 0.6276259922874966, "grad_norm": 0.49235111474990845, "learning_rate": 9.648638872910302e-06, "loss": 0.0554, "step": 35440 }, { "epoch": 0.627643701824525, "grad_norm": 0.7491453886032104, "learning_rate": 9.647835131632649e-06, "loss": 0.063, "step": 35441 }, { "epoch": 0.6276614113615535, "grad_norm": 0.6073399782180786, "learning_rate": 9.64703140796308e-06, "loss": 0.0434, "step": 35442 }, { "epoch": 0.6276791208985819, "grad_norm": 0.5884754657745361, "learning_rate": 9.646227701904233e-06, "loss": 0.0611, "step": 35443 }, { "epoch": 0.6276968304356103, "grad_norm": 0.6431167125701904, "learning_rate": 9.645424013458755e-06, "loss": 0.0413, "step": 35444 }, { "epoch": 0.6277145399726388, "grad_norm": 0.6610298752784729, "learning_rate": 9.644620342629288e-06, "loss": 0.0597, "step": 35445 }, { "epoch": 0.6277322495096672, "grad_norm": 0.33159783482551575, "learning_rate": 9.643816689418484e-06, "loss": 0.0476, "step": 35446 }, { "epoch": 0.6277499590466956, "grad_norm": 0.631816029548645, "learning_rate": 9.643013053828979e-06, "loss": 0.0817, "step": 35447 }, { "epoch": 0.627767668583724, "grad_norm": 0.30958402156829834, "learning_rate": 9.642209435863419e-06, "loss": 0.0453, "step": 35448 }, { "epoch": 0.6277853781207525, "grad_norm": 0.558265745639801, "learning_rate": 9.641405835524454e-06, "loss": 0.054, "step": 35449 }, { "epoch": 0.6278030876577809, "grad_norm": 0.652020275592804, "learning_rate": 9.640602252814714e-06, "loss": 0.0574, "step": 35450 }, { "epoch": 0.6278207971948093, "grad_norm": 1.1372592449188232, "learning_rate": 9.639798687736851e-06, "loss": 0.078, "step": 35451 }, { "epoch": 0.6278385067318377, "grad_norm": 0.5163684487342834, "learning_rate": 9.638995140293508e-06, "loss": 0.0501, "step": 35452 }, { "epoch": 0.6278562162688662, "grad_norm": 0.5706843137741089, "learning_rate": 9.638191610487335e-06, "loss": 0.0488, "step": 35453 }, { "epoch": 0.6278739258058946, "grad_norm": 0.4041711091995239, "learning_rate": 9.637388098320965e-06, "loss": 0.0543, "step": 35454 }, { "epoch": 0.627891635342923, "grad_norm": 0.656485378742218, "learning_rate": 9.636584603797045e-06, "loss": 0.091, "step": 35455 }, { "epoch": 0.6279093448799514, "grad_norm": 0.7476944327354431, "learning_rate": 9.635781126918222e-06, "loss": 0.0687, "step": 35456 }, { "epoch": 0.6279270544169799, "grad_norm": 0.3887569308280945, "learning_rate": 9.634977667687133e-06, "loss": 0.0375, "step": 35457 }, { "epoch": 0.6279447639540083, "grad_norm": 0.5841754674911499, "learning_rate": 9.634174226106425e-06, "loss": 0.0573, "step": 35458 }, { "epoch": 0.6279624734910367, "grad_norm": 0.43593931198120117, "learning_rate": 9.633370802178742e-06, "loss": 0.0531, "step": 35459 }, { "epoch": 0.6279801830280652, "grad_norm": 0.9022220969200134, "learning_rate": 9.63256739590673e-06, "loss": 0.076, "step": 35460 }, { "epoch": 0.6279978925650936, "grad_norm": 0.5314980149269104, "learning_rate": 9.631764007293023e-06, "loss": 0.0819, "step": 35461 }, { "epoch": 0.628015602102122, "grad_norm": 0.4957068860530853, "learning_rate": 9.63096063634027e-06, "loss": 0.0578, "step": 35462 }, { "epoch": 0.6280333116391504, "grad_norm": 0.789106547832489, "learning_rate": 9.630157283051115e-06, "loss": 0.0668, "step": 35463 }, { "epoch": 0.628051021176179, "grad_norm": 0.48974502086639404, "learning_rate": 9.629353947428204e-06, "loss": 0.0861, "step": 35464 }, { "epoch": 0.6280687307132073, "grad_norm": 0.6835898756980896, "learning_rate": 9.628550629474169e-06, "loss": 0.0612, "step": 35465 }, { "epoch": 0.6280864402502357, "grad_norm": 0.7305949926376343, "learning_rate": 9.62774732919166e-06, "loss": 0.0575, "step": 35466 }, { "epoch": 0.6281041497872641, "grad_norm": 0.6242156624794006, "learning_rate": 9.626944046583322e-06, "loss": 0.0772, "step": 35467 }, { "epoch": 0.6281218593242927, "grad_norm": 0.4315786063671112, "learning_rate": 9.626140781651792e-06, "loss": 0.0668, "step": 35468 }, { "epoch": 0.628139568861321, "grad_norm": 0.3743109703063965, "learning_rate": 9.625337534399714e-06, "loss": 0.063, "step": 35469 }, { "epoch": 0.6281572783983495, "grad_norm": 0.5516928434371948, "learning_rate": 9.624534304829733e-06, "loss": 0.0654, "step": 35470 }, { "epoch": 0.6281749879353778, "grad_norm": 0.7996996641159058, "learning_rate": 9.623731092944494e-06, "loss": 0.0726, "step": 35471 }, { "epoch": 0.6281926974724064, "grad_norm": 0.8348891735076904, "learning_rate": 9.622927898746632e-06, "loss": 0.0773, "step": 35472 }, { "epoch": 0.6282104070094348, "grad_norm": 0.6521809101104736, "learning_rate": 9.622124722238793e-06, "loss": 0.0523, "step": 35473 }, { "epoch": 0.6282281165464632, "grad_norm": 0.5098822116851807, "learning_rate": 9.621321563423626e-06, "loss": 0.048, "step": 35474 }, { "epoch": 0.6282458260834917, "grad_norm": 0.44055888056755066, "learning_rate": 9.620518422303758e-06, "loss": 0.0698, "step": 35475 }, { "epoch": 0.6282635356205201, "grad_norm": 0.5921356678009033, "learning_rate": 9.619715298881843e-06, "loss": 0.0636, "step": 35476 }, { "epoch": 0.6282812451575485, "grad_norm": 0.5427037477493286, "learning_rate": 9.61891219316052e-06, "loss": 0.0595, "step": 35477 }, { "epoch": 0.6282989546945769, "grad_norm": 0.5203598141670227, "learning_rate": 9.61810910514244e-06, "loss": 0.0963, "step": 35478 }, { "epoch": 0.6283166642316054, "grad_norm": 0.5239763855934143, "learning_rate": 9.617306034830229e-06, "loss": 0.0561, "step": 35479 }, { "epoch": 0.6283343737686338, "grad_norm": 0.5476115345954895, "learning_rate": 9.616502982226537e-06, "loss": 0.0548, "step": 35480 }, { "epoch": 0.6283520833056622, "grad_norm": 0.6101110577583313, "learning_rate": 9.615699947334009e-06, "loss": 0.0489, "step": 35481 }, { "epoch": 0.6283697928426906, "grad_norm": 0.915172278881073, "learning_rate": 9.61489693015528e-06, "loss": 0.0918, "step": 35482 }, { "epoch": 0.6283875023797191, "grad_norm": 0.6079841256141663, "learning_rate": 9.614093930692995e-06, "loss": 0.094, "step": 35483 }, { "epoch": 0.6284052119167475, "grad_norm": 0.8908771872520447, "learning_rate": 9.613290948949797e-06, "loss": 0.0698, "step": 35484 }, { "epoch": 0.6284229214537759, "grad_norm": 0.6129160523414612, "learning_rate": 9.61248798492833e-06, "loss": 0.0485, "step": 35485 }, { "epoch": 0.6284406309908043, "grad_norm": 0.3754807114601135, "learning_rate": 9.611685038631228e-06, "loss": 0.0475, "step": 35486 }, { "epoch": 0.6284583405278328, "grad_norm": 0.5361267328262329, "learning_rate": 9.610882110061139e-06, "loss": 0.0583, "step": 35487 }, { "epoch": 0.6284760500648612, "grad_norm": 0.5733794569969177, "learning_rate": 9.61007919922071e-06, "loss": 0.07, "step": 35488 }, { "epoch": 0.6284937596018896, "grad_norm": 0.9223647713661194, "learning_rate": 9.609276306112565e-06, "loss": 0.0699, "step": 35489 }, { "epoch": 0.6285114691389181, "grad_norm": 0.7198745608329773, "learning_rate": 9.608473430739356e-06, "loss": 0.0723, "step": 35490 }, { "epoch": 0.6285291786759465, "grad_norm": 1.005177617073059, "learning_rate": 9.607670573103728e-06, "loss": 0.1093, "step": 35491 }, { "epoch": 0.6285468882129749, "grad_norm": 0.434658020734787, "learning_rate": 9.606867733208322e-06, "loss": 0.0723, "step": 35492 }, { "epoch": 0.6285645977500033, "grad_norm": 0.3653008043766022, "learning_rate": 9.606064911055773e-06, "loss": 0.0507, "step": 35493 }, { "epoch": 0.6285823072870318, "grad_norm": 0.7889226675033569, "learning_rate": 9.60526210664872e-06, "loss": 0.0552, "step": 35494 }, { "epoch": 0.6286000168240602, "grad_norm": 0.49388399720191956, "learning_rate": 9.604459319989812e-06, "loss": 0.0437, "step": 35495 }, { "epoch": 0.6286177263610886, "grad_norm": 0.2775190472602844, "learning_rate": 9.60365655108169e-06, "loss": 0.0676, "step": 35496 }, { "epoch": 0.628635435898117, "grad_norm": 0.7422632575035095, "learning_rate": 9.602853799926988e-06, "loss": 0.0941, "step": 35497 }, { "epoch": 0.6286531454351455, "grad_norm": 0.4790003001689911, "learning_rate": 9.602051066528351e-06, "loss": 0.0802, "step": 35498 }, { "epoch": 0.6286708549721739, "grad_norm": 0.764273464679718, "learning_rate": 9.601248350888425e-06, "loss": 0.0672, "step": 35499 }, { "epoch": 0.6286885645092023, "grad_norm": 0.5277310013771057, "learning_rate": 9.60044565300984e-06, "loss": 0.0837, "step": 35500 }, { "epoch": 0.6287062740462307, "grad_norm": 0.8089028596878052, "learning_rate": 9.599642972895245e-06, "loss": 0.0479, "step": 35501 }, { "epoch": 0.6287239835832592, "grad_norm": 0.6688273549079895, "learning_rate": 9.598840310547278e-06, "loss": 0.0426, "step": 35502 }, { "epoch": 0.6287416931202876, "grad_norm": 0.42079275846481323, "learning_rate": 9.598037665968587e-06, "loss": 0.0501, "step": 35503 }, { "epoch": 0.628759402657316, "grad_norm": 0.8848615884780884, "learning_rate": 9.597235039161795e-06, "loss": 0.0525, "step": 35504 }, { "epoch": 0.6287771121943445, "grad_norm": 0.7185371518135071, "learning_rate": 9.596432430129557e-06, "loss": 0.0581, "step": 35505 }, { "epoch": 0.6287948217313729, "grad_norm": 0.720420241355896, "learning_rate": 9.595629838874517e-06, "loss": 0.0689, "step": 35506 }, { "epoch": 0.6288125312684013, "grad_norm": 0.6585227847099304, "learning_rate": 9.594827265399299e-06, "loss": 0.065, "step": 35507 }, { "epoch": 0.6288302408054297, "grad_norm": 0.586665153503418, "learning_rate": 9.594024709706556e-06, "loss": 0.0597, "step": 35508 }, { "epoch": 0.6288479503424582, "grad_norm": 0.5262148976325989, "learning_rate": 9.593222171798922e-06, "loss": 0.0726, "step": 35509 }, { "epoch": 0.6288656598794866, "grad_norm": 0.7526687383651733, "learning_rate": 9.592419651679047e-06, "loss": 0.0663, "step": 35510 }, { "epoch": 0.628883369416515, "grad_norm": 0.6272761821746826, "learning_rate": 9.591617149349557e-06, "loss": 0.0817, "step": 35511 }, { "epoch": 0.6289010789535434, "grad_norm": 0.8814789056777954, "learning_rate": 9.590814664813102e-06, "loss": 0.0885, "step": 35512 }, { "epoch": 0.6289187884905719, "grad_norm": 0.7160177826881409, "learning_rate": 9.590012198072321e-06, "loss": 0.0818, "step": 35513 }, { "epoch": 0.6289364980276003, "grad_norm": 0.4507431089878082, "learning_rate": 9.589209749129851e-06, "loss": 0.039, "step": 35514 }, { "epoch": 0.6289542075646287, "grad_norm": 0.39485958218574524, "learning_rate": 9.588407317988334e-06, "loss": 0.052, "step": 35515 }, { "epoch": 0.6289719171016571, "grad_norm": 0.7441120743751526, "learning_rate": 9.58760490465041e-06, "loss": 0.0423, "step": 35516 }, { "epoch": 0.6289896266386856, "grad_norm": 0.6193029880523682, "learning_rate": 9.586802509118721e-06, "loss": 0.0555, "step": 35517 }, { "epoch": 0.629007336175714, "grad_norm": 0.6010441184043884, "learning_rate": 9.586000131395899e-06, "loss": 0.058, "step": 35518 }, { "epoch": 0.6290250457127424, "grad_norm": 0.6984872817993164, "learning_rate": 9.58519777148459e-06, "loss": 0.0771, "step": 35519 }, { "epoch": 0.6290427552497709, "grad_norm": 0.5060753226280212, "learning_rate": 9.58439542938744e-06, "loss": 0.0512, "step": 35520 }, { "epoch": 0.6290604647867993, "grad_norm": 0.971746563911438, "learning_rate": 9.583593105107075e-06, "loss": 0.0608, "step": 35521 }, { "epoch": 0.6290781743238277, "grad_norm": 0.7368144392967224, "learning_rate": 9.582790798646141e-06, "loss": 0.0802, "step": 35522 }, { "epoch": 0.6290958838608561, "grad_norm": 0.7762218713760376, "learning_rate": 9.581988510007277e-06, "loss": 0.055, "step": 35523 }, { "epoch": 0.6291135933978846, "grad_norm": 0.8692043423652649, "learning_rate": 9.581186239193125e-06, "loss": 0.0637, "step": 35524 }, { "epoch": 0.629131302934913, "grad_norm": 0.5771403312683105, "learning_rate": 9.58038398620632e-06, "loss": 0.0927, "step": 35525 }, { "epoch": 0.6291490124719414, "grad_norm": 0.36926183104515076, "learning_rate": 9.579581751049505e-06, "loss": 0.0449, "step": 35526 }, { "epoch": 0.6291667220089698, "grad_norm": 0.5433446764945984, "learning_rate": 9.578779533725314e-06, "loss": 0.064, "step": 35527 }, { "epoch": 0.6291844315459983, "grad_norm": 0.6590056419372559, "learning_rate": 9.577977334236395e-06, "loss": 0.0802, "step": 35528 }, { "epoch": 0.6292021410830267, "grad_norm": 0.7137061357498169, "learning_rate": 9.57717515258538e-06, "loss": 0.0838, "step": 35529 }, { "epoch": 0.6292198506200551, "grad_norm": 0.900039792060852, "learning_rate": 9.576372988774909e-06, "loss": 0.0635, "step": 35530 }, { "epoch": 0.6292375601570835, "grad_norm": 0.6015180349349976, "learning_rate": 9.575570842807628e-06, "loss": 0.0558, "step": 35531 }, { "epoch": 0.629255269694112, "grad_norm": 0.44017013907432556, "learning_rate": 9.574768714686168e-06, "loss": 0.0462, "step": 35532 }, { "epoch": 0.6292729792311405, "grad_norm": 0.744300901889801, "learning_rate": 9.573966604413162e-06, "loss": 0.0647, "step": 35533 }, { "epoch": 0.6292906887681688, "grad_norm": 0.5160863399505615, "learning_rate": 9.573164511991264e-06, "loss": 0.0649, "step": 35534 }, { "epoch": 0.6293083983051974, "grad_norm": 1.3446522951126099, "learning_rate": 9.572362437423109e-06, "loss": 0.0812, "step": 35535 }, { "epoch": 0.6293261078422258, "grad_norm": 0.5269151329994202, "learning_rate": 9.571560380711329e-06, "loss": 0.0423, "step": 35536 }, { "epoch": 0.6293438173792542, "grad_norm": 0.49843332171440125, "learning_rate": 9.570758341858564e-06, "loss": 0.0858, "step": 35537 }, { "epoch": 0.6293615269162826, "grad_norm": 0.7593872547149658, "learning_rate": 9.56995632086746e-06, "loss": 0.0688, "step": 35538 }, { "epoch": 0.6293792364533111, "grad_norm": 0.9000991582870483, "learning_rate": 9.569154317740646e-06, "loss": 0.0681, "step": 35539 }, { "epoch": 0.6293969459903395, "grad_norm": 0.9401373267173767, "learning_rate": 9.568352332480766e-06, "loss": 0.0956, "step": 35540 }, { "epoch": 0.6294146555273679, "grad_norm": 0.2945932149887085, "learning_rate": 9.567550365090458e-06, "loss": 0.0396, "step": 35541 }, { "epoch": 0.6294323650643963, "grad_norm": 0.949013352394104, "learning_rate": 9.566748415572362e-06, "loss": 0.1013, "step": 35542 }, { "epoch": 0.6294500746014248, "grad_norm": 0.6595280170440674, "learning_rate": 9.565946483929112e-06, "loss": 0.0594, "step": 35543 }, { "epoch": 0.6294677841384532, "grad_norm": 0.6974812150001526, "learning_rate": 9.565144570163347e-06, "loss": 0.0536, "step": 35544 }, { "epoch": 0.6294854936754816, "grad_norm": 0.650128185749054, "learning_rate": 9.564342674277712e-06, "loss": 0.0531, "step": 35545 }, { "epoch": 0.62950320321251, "grad_norm": 0.8235008716583252, "learning_rate": 9.563540796274835e-06, "loss": 0.0643, "step": 35546 }, { "epoch": 0.6295209127495385, "grad_norm": 0.9305869936943054, "learning_rate": 9.562738936157356e-06, "loss": 0.0716, "step": 35547 }, { "epoch": 0.6295386222865669, "grad_norm": 0.5912869572639465, "learning_rate": 9.561937093927917e-06, "loss": 0.0606, "step": 35548 }, { "epoch": 0.6295563318235953, "grad_norm": 1.1218671798706055, "learning_rate": 9.561135269589165e-06, "loss": 0.0479, "step": 35549 }, { "epoch": 0.6295740413606238, "grad_norm": 0.8620969653129578, "learning_rate": 9.56033346314372e-06, "loss": 0.0739, "step": 35550 }, { "epoch": 0.6295917508976522, "grad_norm": 0.4031074345111847, "learning_rate": 9.559531674594228e-06, "loss": 0.0747, "step": 35551 }, { "epoch": 0.6296094604346806, "grad_norm": 0.8904949426651001, "learning_rate": 9.558729903943326e-06, "loss": 0.088, "step": 35552 }, { "epoch": 0.629627169971709, "grad_norm": 0.5462722778320312, "learning_rate": 9.557928151193653e-06, "loss": 0.0545, "step": 35553 }, { "epoch": 0.6296448795087375, "grad_norm": 0.5449139475822449, "learning_rate": 9.557126416347847e-06, "loss": 0.0771, "step": 35554 }, { "epoch": 0.6296625890457659, "grad_norm": 0.5583049058914185, "learning_rate": 9.556324699408541e-06, "loss": 0.0699, "step": 35555 }, { "epoch": 0.6296802985827943, "grad_norm": 0.6777629852294922, "learning_rate": 9.555523000378383e-06, "loss": 0.0679, "step": 35556 }, { "epoch": 0.6296980081198227, "grad_norm": 0.7797657251358032, "learning_rate": 9.554721319259995e-06, "loss": 0.0811, "step": 35557 }, { "epoch": 0.6297157176568512, "grad_norm": 0.6707165241241455, "learning_rate": 9.553919656056029e-06, "loss": 0.0743, "step": 35558 }, { "epoch": 0.6297334271938796, "grad_norm": 0.9609830379486084, "learning_rate": 9.553118010769119e-06, "loss": 0.067, "step": 35559 }, { "epoch": 0.629751136730908, "grad_norm": 0.7765944600105286, "learning_rate": 9.552316383401895e-06, "loss": 0.0893, "step": 35560 }, { "epoch": 0.6297688462679364, "grad_norm": 0.5162283182144165, "learning_rate": 9.551514773956999e-06, "loss": 0.0596, "step": 35561 }, { "epoch": 0.6297865558049649, "grad_norm": 0.6219342350959778, "learning_rate": 9.550713182437064e-06, "loss": 0.0821, "step": 35562 }, { "epoch": 0.6298042653419933, "grad_norm": 0.1394062340259552, "learning_rate": 9.549911608844743e-06, "loss": 0.064, "step": 35563 }, { "epoch": 0.6298219748790217, "grad_norm": 0.575038492679596, "learning_rate": 9.549110053182655e-06, "loss": 0.0533, "step": 35564 }, { "epoch": 0.6298396844160502, "grad_norm": 0.49062955379486084, "learning_rate": 9.548308515453442e-06, "loss": 0.0524, "step": 35565 }, { "epoch": 0.6298573939530786, "grad_norm": 0.6370452046394348, "learning_rate": 9.547506995659744e-06, "loss": 0.07, "step": 35566 }, { "epoch": 0.629875103490107, "grad_norm": 0.5194162130355835, "learning_rate": 9.5467054938042e-06, "loss": 0.0474, "step": 35567 }, { "epoch": 0.6298928130271354, "grad_norm": 0.431748628616333, "learning_rate": 9.545904009889437e-06, "loss": 0.0795, "step": 35568 }, { "epoch": 0.6299105225641639, "grad_norm": 1.2199772596359253, "learning_rate": 9.545102543918102e-06, "loss": 0.0887, "step": 35569 }, { "epoch": 0.6299282321011923, "grad_norm": 0.7806032299995422, "learning_rate": 9.54430109589283e-06, "loss": 0.085, "step": 35570 }, { "epoch": 0.6299459416382207, "grad_norm": 0.6872337460517883, "learning_rate": 9.54349966581625e-06, "loss": 0.0664, "step": 35571 }, { "epoch": 0.6299636511752491, "grad_norm": 0.879359781742096, "learning_rate": 9.542698253691007e-06, "loss": 0.0606, "step": 35572 }, { "epoch": 0.6299813607122776, "grad_norm": 0.537449836730957, "learning_rate": 9.541896859519736e-06, "loss": 0.0599, "step": 35573 }, { "epoch": 0.629999070249306, "grad_norm": 0.4573552906513214, "learning_rate": 9.541095483305074e-06, "loss": 0.0454, "step": 35574 }, { "epoch": 0.6300167797863344, "grad_norm": 0.765743613243103, "learning_rate": 9.540294125049654e-06, "loss": 0.0538, "step": 35575 }, { "epoch": 0.6300344893233628, "grad_norm": 0.600771427154541, "learning_rate": 9.539492784756107e-06, "loss": 0.0532, "step": 35576 }, { "epoch": 0.6300521988603913, "grad_norm": 0.6584750413894653, "learning_rate": 9.538691462427088e-06, "loss": 0.0387, "step": 35577 }, { "epoch": 0.6300699083974197, "grad_norm": 0.8547260165214539, "learning_rate": 9.537890158065216e-06, "loss": 0.0828, "step": 35578 }, { "epoch": 0.6300876179344481, "grad_norm": 0.4952933192253113, "learning_rate": 9.537088871673133e-06, "loss": 0.0593, "step": 35579 }, { "epoch": 0.6301053274714766, "grad_norm": 0.8945393562316895, "learning_rate": 9.536287603253473e-06, "loss": 0.0774, "step": 35580 }, { "epoch": 0.630123037008505, "grad_norm": 0.23768974840641022, "learning_rate": 9.53548635280888e-06, "loss": 0.0733, "step": 35581 }, { "epoch": 0.6301407465455334, "grad_norm": 1.4200292825698853, "learning_rate": 9.534685120341978e-06, "loss": 0.0603, "step": 35582 }, { "epoch": 0.6301584560825618, "grad_norm": 0.6314279437065125, "learning_rate": 9.533883905855412e-06, "loss": 0.1013, "step": 35583 }, { "epoch": 0.6301761656195903, "grad_norm": 0.8540448546409607, "learning_rate": 9.533082709351817e-06, "loss": 0.0748, "step": 35584 }, { "epoch": 0.6301938751566187, "grad_norm": 0.5071719884872437, "learning_rate": 9.532281530833822e-06, "loss": 0.0586, "step": 35585 }, { "epoch": 0.6302115846936471, "grad_norm": 0.9705315828323364, "learning_rate": 9.53148037030407e-06, "loss": 0.0756, "step": 35586 }, { "epoch": 0.6302292942306755, "grad_norm": 0.27307936549186707, "learning_rate": 9.530679227765195e-06, "loss": 0.0444, "step": 35587 }, { "epoch": 0.630247003767704, "grad_norm": 0.4128779172897339, "learning_rate": 9.529878103219836e-06, "loss": 0.0466, "step": 35588 }, { "epoch": 0.6302647133047324, "grad_norm": 0.47201505303382874, "learning_rate": 9.52907699667062e-06, "loss": 0.0555, "step": 35589 }, { "epoch": 0.6302824228417608, "grad_norm": 0.757240891456604, "learning_rate": 9.528275908120187e-06, "loss": 0.0559, "step": 35590 }, { "epoch": 0.6303001323787892, "grad_norm": 0.5438941717147827, "learning_rate": 9.527474837571176e-06, "loss": 0.0611, "step": 35591 }, { "epoch": 0.6303178419158177, "grad_norm": 0.6636413335800171, "learning_rate": 9.526673785026216e-06, "loss": 0.0813, "step": 35592 }, { "epoch": 0.6303355514528461, "grad_norm": 0.446611225605011, "learning_rate": 9.525872750487945e-06, "loss": 0.0552, "step": 35593 }, { "epoch": 0.6303532609898745, "grad_norm": 0.6582837104797363, "learning_rate": 9.525071733959003e-06, "loss": 0.0881, "step": 35594 }, { "epoch": 0.630370970526903, "grad_norm": 0.7280277609825134, "learning_rate": 9.52427073544202e-06, "loss": 0.0528, "step": 35595 }, { "epoch": 0.6303886800639315, "grad_norm": 0.3690570592880249, "learning_rate": 9.52346975493963e-06, "loss": 0.0286, "step": 35596 }, { "epoch": 0.6304063896009598, "grad_norm": 0.7733045816421509, "learning_rate": 9.522668792454473e-06, "loss": 0.0598, "step": 35597 }, { "epoch": 0.6304240991379882, "grad_norm": 0.4614241123199463, "learning_rate": 9.521867847989179e-06, "loss": 0.0437, "step": 35598 }, { "epoch": 0.6304418086750168, "grad_norm": 0.579246461391449, "learning_rate": 9.52106692154639e-06, "loss": 0.0618, "step": 35599 }, { "epoch": 0.6304595182120452, "grad_norm": 0.6931503415107727, "learning_rate": 9.520266013128734e-06, "loss": 0.0737, "step": 35600 }, { "epoch": 0.6304772277490736, "grad_norm": 0.7576878666877747, "learning_rate": 9.519465122738849e-06, "loss": 0.0607, "step": 35601 }, { "epoch": 0.630494937286102, "grad_norm": 0.3893272578716278, "learning_rate": 9.518664250379372e-06, "loss": 0.0546, "step": 35602 }, { "epoch": 0.6305126468231305, "grad_norm": 0.464719295501709, "learning_rate": 9.517863396052933e-06, "loss": 0.0576, "step": 35603 }, { "epoch": 0.6305303563601589, "grad_norm": 0.6974339485168457, "learning_rate": 9.517062559762167e-06, "loss": 0.0747, "step": 35604 }, { "epoch": 0.6305480658971873, "grad_norm": 0.3086886405944824, "learning_rate": 9.516261741509707e-06, "loss": 0.0457, "step": 35605 }, { "epoch": 0.6305657754342157, "grad_norm": 0.7353295683860779, "learning_rate": 9.515460941298202e-06, "loss": 0.0829, "step": 35606 }, { "epoch": 0.6305834849712442, "grad_norm": 0.5401889681816101, "learning_rate": 9.51466015913027e-06, "loss": 0.0582, "step": 35607 }, { "epoch": 0.6306011945082726, "grad_norm": 0.46713897585868835, "learning_rate": 9.513859395008548e-06, "loss": 0.0311, "step": 35608 }, { "epoch": 0.630618904045301, "grad_norm": 0.5192221403121948, "learning_rate": 9.51305864893568e-06, "loss": 0.0655, "step": 35609 }, { "epoch": 0.6306366135823295, "grad_norm": 0.4599513113498688, "learning_rate": 9.512257920914291e-06, "loss": 0.069, "step": 35610 }, { "epoch": 0.6306543231193579, "grad_norm": 0.7999959588050842, "learning_rate": 9.511457210947017e-06, "loss": 0.0722, "step": 35611 }, { "epoch": 0.6306720326563863, "grad_norm": 0.8332910537719727, "learning_rate": 9.510656519036494e-06, "loss": 0.0849, "step": 35612 }, { "epoch": 0.6306897421934147, "grad_norm": 0.6111049652099609, "learning_rate": 9.50985584518536e-06, "loss": 0.0693, "step": 35613 }, { "epoch": 0.6307074517304432, "grad_norm": 0.7473441362380981, "learning_rate": 9.50905518939624e-06, "loss": 0.0409, "step": 35614 }, { "epoch": 0.6307251612674716, "grad_norm": 0.27943599224090576, "learning_rate": 9.508254551671773e-06, "loss": 0.0774, "step": 35615 }, { "epoch": 0.6307428708045, "grad_norm": 0.6939944624900818, "learning_rate": 9.507453932014602e-06, "loss": 0.0684, "step": 35616 }, { "epoch": 0.6307605803415284, "grad_norm": 0.862462043762207, "learning_rate": 9.506653330427344e-06, "loss": 0.1013, "step": 35617 }, { "epoch": 0.6307782898785569, "grad_norm": 0.4512895941734314, "learning_rate": 9.50585274691264e-06, "loss": 0.0691, "step": 35618 }, { "epoch": 0.6307959994155853, "grad_norm": 0.9917078614234924, "learning_rate": 9.505052181473126e-06, "loss": 0.0559, "step": 35619 }, { "epoch": 0.6308137089526137, "grad_norm": 0.53746098279953, "learning_rate": 9.504251634111437e-06, "loss": 0.0396, "step": 35620 }, { "epoch": 0.6308314184896421, "grad_norm": 0.7976692318916321, "learning_rate": 9.503451104830203e-06, "loss": 0.0602, "step": 35621 }, { "epoch": 0.6308491280266706, "grad_norm": 0.7633659243583679, "learning_rate": 9.502650593632058e-06, "loss": 0.0666, "step": 35622 }, { "epoch": 0.630866837563699, "grad_norm": 0.4105664789676666, "learning_rate": 9.501850100519637e-06, "loss": 0.0546, "step": 35623 }, { "epoch": 0.6308845471007274, "grad_norm": 0.39630281925201416, "learning_rate": 9.501049625495573e-06, "loss": 0.0572, "step": 35624 }, { "epoch": 0.6309022566377559, "grad_norm": 0.6227419972419739, "learning_rate": 9.500249168562499e-06, "loss": 0.0614, "step": 35625 }, { "epoch": 0.6309199661747843, "grad_norm": 0.79490727186203, "learning_rate": 9.499448729723049e-06, "loss": 0.0827, "step": 35626 }, { "epoch": 0.6309376757118127, "grad_norm": 0.4387994706630707, "learning_rate": 9.498648308979862e-06, "loss": 0.0534, "step": 35627 }, { "epoch": 0.6309553852488411, "grad_norm": 0.35073548555374146, "learning_rate": 9.497847906335558e-06, "loss": 0.0523, "step": 35628 }, { "epoch": 0.6309730947858696, "grad_norm": 0.7054636478424072, "learning_rate": 9.497047521792778e-06, "loss": 0.0743, "step": 35629 }, { "epoch": 0.630990804322898, "grad_norm": 0.5963948369026184, "learning_rate": 9.496247155354163e-06, "loss": 0.035, "step": 35630 }, { "epoch": 0.6310085138599264, "grad_norm": 0.6786612272262573, "learning_rate": 9.495446807022331e-06, "loss": 0.0836, "step": 35631 }, { "epoch": 0.6310262233969548, "grad_norm": 0.28553685545921326, "learning_rate": 9.494646476799923e-06, "loss": 0.0661, "step": 35632 }, { "epoch": 0.6310439329339833, "grad_norm": 0.18863384425640106, "learning_rate": 9.493846164689571e-06, "loss": 0.0344, "step": 35633 }, { "epoch": 0.6310616424710117, "grad_norm": 0.5068081617355347, "learning_rate": 9.49304587069391e-06, "loss": 0.0691, "step": 35634 }, { "epoch": 0.6310793520080401, "grad_norm": 0.5298766493797302, "learning_rate": 9.492245594815572e-06, "loss": 0.0585, "step": 35635 }, { "epoch": 0.6310970615450685, "grad_norm": 0.6541489958763123, "learning_rate": 9.491445337057185e-06, "loss": 0.0675, "step": 35636 }, { "epoch": 0.631114771082097, "grad_norm": 0.3912198841571808, "learning_rate": 9.49064509742139e-06, "loss": 0.0452, "step": 35637 }, { "epoch": 0.6311324806191254, "grad_norm": 0.5281643867492676, "learning_rate": 9.489844875910813e-06, "loss": 0.0479, "step": 35638 }, { "epoch": 0.6311501901561538, "grad_norm": 0.7945255041122437, "learning_rate": 9.489044672528087e-06, "loss": 0.0663, "step": 35639 }, { "epoch": 0.6311678996931823, "grad_norm": 0.4737197160720825, "learning_rate": 9.48824448727585e-06, "loss": 0.0862, "step": 35640 }, { "epoch": 0.6311856092302107, "grad_norm": 0.8555996417999268, "learning_rate": 9.487444320156735e-06, "loss": 0.0807, "step": 35641 }, { "epoch": 0.6312033187672391, "grad_norm": 0.3694171905517578, "learning_rate": 9.486644171173363e-06, "loss": 0.0554, "step": 35642 }, { "epoch": 0.6312210283042675, "grad_norm": 0.6133773326873779, "learning_rate": 9.485844040328374e-06, "loss": 0.0768, "step": 35643 }, { "epoch": 0.631238737841296, "grad_norm": 0.3753339648246765, "learning_rate": 9.485043927624403e-06, "loss": 0.0664, "step": 35644 }, { "epoch": 0.6312564473783244, "grad_norm": 0.6112047433853149, "learning_rate": 9.484243833064086e-06, "loss": 0.0627, "step": 35645 }, { "epoch": 0.6312741569153528, "grad_norm": 0.29257360100746155, "learning_rate": 9.483443756650041e-06, "loss": 0.0551, "step": 35646 }, { "epoch": 0.6312918664523812, "grad_norm": 0.6619455814361572, "learning_rate": 9.48264369838491e-06, "loss": 0.0483, "step": 35647 }, { "epoch": 0.6313095759894097, "grad_norm": 0.6922048330307007, "learning_rate": 9.481843658271325e-06, "loss": 0.0688, "step": 35648 }, { "epoch": 0.6313272855264381, "grad_norm": 0.9853581190109253, "learning_rate": 9.481043636311915e-06, "loss": 0.0661, "step": 35649 }, { "epoch": 0.6313449950634665, "grad_norm": 0.6363502740859985, "learning_rate": 9.480243632509311e-06, "loss": 0.0745, "step": 35650 }, { "epoch": 0.6313627046004949, "grad_norm": 0.7610191702842712, "learning_rate": 9.479443646866147e-06, "loss": 0.0644, "step": 35651 }, { "epoch": 0.6313804141375234, "grad_norm": 0.7967646718025208, "learning_rate": 9.478643679385058e-06, "loss": 0.0882, "step": 35652 }, { "epoch": 0.6313981236745518, "grad_norm": 0.9746524691581726, "learning_rate": 9.477843730068672e-06, "loss": 0.0673, "step": 35653 }, { "epoch": 0.6314158332115802, "grad_norm": 0.808212399482727, "learning_rate": 9.47704379891962e-06, "loss": 0.0736, "step": 35654 }, { "epoch": 0.6314335427486087, "grad_norm": 0.4948405623435974, "learning_rate": 9.476243885940542e-06, "loss": 0.0666, "step": 35655 }, { "epoch": 0.6314512522856371, "grad_norm": 0.7876033186912537, "learning_rate": 9.475443991134058e-06, "loss": 0.065, "step": 35656 }, { "epoch": 0.6314689618226655, "grad_norm": 0.7420923709869385, "learning_rate": 9.474644114502798e-06, "loss": 0.0631, "step": 35657 }, { "epoch": 0.6314866713596939, "grad_norm": 0.2537216544151306, "learning_rate": 9.473844256049406e-06, "loss": 0.0605, "step": 35658 }, { "epoch": 0.6315043808967225, "grad_norm": 0.4950861632823944, "learning_rate": 9.473044415776513e-06, "loss": 0.0876, "step": 35659 }, { "epoch": 0.6315220904337508, "grad_norm": 0.4700835347175598, "learning_rate": 9.472244593686737e-06, "loss": 0.0528, "step": 35660 }, { "epoch": 0.6315397999707792, "grad_norm": 0.6939375400543213, "learning_rate": 9.471444789782718e-06, "loss": 0.0626, "step": 35661 }, { "epoch": 0.6315575095078076, "grad_norm": 0.5816102027893066, "learning_rate": 9.47064500406709e-06, "loss": 0.0687, "step": 35662 }, { "epoch": 0.6315752190448362, "grad_norm": 0.6117284297943115, "learning_rate": 9.469845236542479e-06, "loss": 0.0532, "step": 35663 }, { "epoch": 0.6315929285818646, "grad_norm": 0.6560207009315491, "learning_rate": 9.469045487211515e-06, "loss": 0.0841, "step": 35664 }, { "epoch": 0.631610638118893, "grad_norm": 0.7130386829376221, "learning_rate": 9.468245756076835e-06, "loss": 0.0763, "step": 35665 }, { "epoch": 0.6316283476559214, "grad_norm": 0.6777669787406921, "learning_rate": 9.467446043141068e-06, "loss": 0.0648, "step": 35666 }, { "epoch": 0.6316460571929499, "grad_norm": 0.7751322984695435, "learning_rate": 9.46664634840684e-06, "loss": 0.0597, "step": 35667 }, { "epoch": 0.6316637667299783, "grad_norm": 0.655306875705719, "learning_rate": 9.465846671876789e-06, "loss": 0.0617, "step": 35668 }, { "epoch": 0.6316814762670067, "grad_norm": 0.6606748700141907, "learning_rate": 9.46504701355354e-06, "loss": 0.0791, "step": 35669 }, { "epoch": 0.6316991858040352, "grad_norm": 0.6512925624847412, "learning_rate": 9.464247373439734e-06, "loss": 0.053, "step": 35670 }, { "epoch": 0.6317168953410636, "grad_norm": 0.41136062145233154, "learning_rate": 9.463447751537984e-06, "loss": 0.0586, "step": 35671 }, { "epoch": 0.631734604878092, "grad_norm": 0.3881674110889435, "learning_rate": 9.462648147850934e-06, "loss": 0.0419, "step": 35672 }, { "epoch": 0.6317523144151204, "grad_norm": 0.34102046489715576, "learning_rate": 9.46184856238122e-06, "loss": 0.0655, "step": 35673 }, { "epoch": 0.6317700239521489, "grad_norm": 0.6085649728775024, "learning_rate": 9.461048995131457e-06, "loss": 0.0882, "step": 35674 }, { "epoch": 0.6317877334891773, "grad_norm": 0.6347894072532654, "learning_rate": 9.46024944610428e-06, "loss": 0.0542, "step": 35675 }, { "epoch": 0.6318054430262057, "grad_norm": 0.8409019708633423, "learning_rate": 9.459449915302324e-06, "loss": 0.0747, "step": 35676 }, { "epoch": 0.6318231525632341, "grad_norm": 0.8121224641799927, "learning_rate": 9.45865040272822e-06, "loss": 0.0802, "step": 35677 }, { "epoch": 0.6318408621002626, "grad_norm": 0.5840176343917847, "learning_rate": 9.457850908384592e-06, "loss": 0.0834, "step": 35678 }, { "epoch": 0.631858571637291, "grad_norm": 0.8205738663673401, "learning_rate": 9.457051432274075e-06, "loss": 0.0927, "step": 35679 }, { "epoch": 0.6318762811743194, "grad_norm": 0.3567878305912018, "learning_rate": 9.4562519743993e-06, "loss": 0.0395, "step": 35680 }, { "epoch": 0.6318939907113478, "grad_norm": 0.9191173315048218, "learning_rate": 9.455452534762894e-06, "loss": 0.0599, "step": 35681 }, { "epoch": 0.6319117002483763, "grad_norm": 0.2295660823583603, "learning_rate": 9.454653113367489e-06, "loss": 0.064, "step": 35682 }, { "epoch": 0.6319294097854047, "grad_norm": 0.23385123908519745, "learning_rate": 9.453853710215712e-06, "loss": 0.0661, "step": 35683 }, { "epoch": 0.6319471193224331, "grad_norm": 0.582851231098175, "learning_rate": 9.453054325310202e-06, "loss": 0.0811, "step": 35684 }, { "epoch": 0.6319648288594616, "grad_norm": 0.6035205125808716, "learning_rate": 9.452254958653577e-06, "loss": 0.0716, "step": 35685 }, { "epoch": 0.63198253839649, "grad_norm": 0.37463057041168213, "learning_rate": 9.451455610248468e-06, "loss": 0.0643, "step": 35686 }, { "epoch": 0.6320002479335184, "grad_norm": 0.7288083434104919, "learning_rate": 9.450656280097518e-06, "loss": 0.0416, "step": 35687 }, { "epoch": 0.6320179574705468, "grad_norm": 0.7285104990005493, "learning_rate": 9.449856968203342e-06, "loss": 0.0758, "step": 35688 }, { "epoch": 0.6320356670075753, "grad_norm": 0.4692908227443695, "learning_rate": 9.449057674568577e-06, "loss": 0.0558, "step": 35689 }, { "epoch": 0.6320533765446037, "grad_norm": 0.669023871421814, "learning_rate": 9.448258399195848e-06, "loss": 0.0841, "step": 35690 }, { "epoch": 0.6320710860816321, "grad_norm": 0.5805619955062866, "learning_rate": 9.44745914208779e-06, "loss": 0.0695, "step": 35691 }, { "epoch": 0.6320887956186605, "grad_norm": 1.027590036392212, "learning_rate": 9.446659903247029e-06, "loss": 0.1053, "step": 35692 }, { "epoch": 0.632106505155689, "grad_norm": 0.6754187941551208, "learning_rate": 9.445860682676195e-06, "loss": 0.0642, "step": 35693 }, { "epoch": 0.6321242146927174, "grad_norm": 0.8729063272476196, "learning_rate": 9.44506148037792e-06, "loss": 0.0702, "step": 35694 }, { "epoch": 0.6321419242297458, "grad_norm": 0.8051336407661438, "learning_rate": 9.444262296354827e-06, "loss": 0.0982, "step": 35695 }, { "epoch": 0.6321596337667742, "grad_norm": 0.737393856048584, "learning_rate": 9.44346313060955e-06, "loss": 0.05, "step": 35696 }, { "epoch": 0.6321773433038027, "grad_norm": 0.6993412971496582, "learning_rate": 9.442663983144717e-06, "loss": 0.0624, "step": 35697 }, { "epoch": 0.6321950528408311, "grad_norm": 0.714388370513916, "learning_rate": 9.441864853962964e-06, "loss": 0.0679, "step": 35698 }, { "epoch": 0.6322127623778595, "grad_norm": 0.6189890503883362, "learning_rate": 9.441065743066906e-06, "loss": 0.0641, "step": 35699 }, { "epoch": 0.632230471914888, "grad_norm": 0.7865234017372131, "learning_rate": 9.440266650459182e-06, "loss": 0.0659, "step": 35700 }, { "epoch": 0.6322481814519164, "grad_norm": 0.5144942402839661, "learning_rate": 9.439467576142413e-06, "loss": 0.0659, "step": 35701 }, { "epoch": 0.6322658909889448, "grad_norm": 1.0511142015457153, "learning_rate": 9.438668520119244e-06, "loss": 0.0793, "step": 35702 }, { "epoch": 0.6322836005259732, "grad_norm": 0.2933093309402466, "learning_rate": 9.437869482392285e-06, "loss": 0.0532, "step": 35703 }, { "epoch": 0.6323013100630017, "grad_norm": 0.8763460516929626, "learning_rate": 9.437070462964175e-06, "loss": 0.0632, "step": 35704 }, { "epoch": 0.6323190196000301, "grad_norm": 0.6417596936225891, "learning_rate": 9.436271461837542e-06, "loss": 0.0704, "step": 35705 }, { "epoch": 0.6323367291370585, "grad_norm": 0.7203391194343567, "learning_rate": 9.435472479015012e-06, "loss": 0.0756, "step": 35706 }, { "epoch": 0.6323544386740869, "grad_norm": 0.5301355719566345, "learning_rate": 9.434673514499212e-06, "loss": 0.0392, "step": 35707 }, { "epoch": 0.6323721482111154, "grad_norm": 0.6770710349082947, "learning_rate": 9.433874568292777e-06, "loss": 0.074, "step": 35708 }, { "epoch": 0.6323898577481438, "grad_norm": 1.259244441986084, "learning_rate": 9.433075640398333e-06, "loss": 0.0501, "step": 35709 }, { "epoch": 0.6324075672851722, "grad_norm": 0.74330735206604, "learning_rate": 9.432276730818501e-06, "loss": 0.0646, "step": 35710 }, { "epoch": 0.6324252768222006, "grad_norm": 0.4778535068035126, "learning_rate": 9.431477839555918e-06, "loss": 0.0376, "step": 35711 }, { "epoch": 0.6324429863592291, "grad_norm": 0.5972927808761597, "learning_rate": 9.430678966613216e-06, "loss": 0.0672, "step": 35712 }, { "epoch": 0.6324606958962575, "grad_norm": 0.581238329410553, "learning_rate": 9.42988011199301e-06, "loss": 0.0779, "step": 35713 }, { "epoch": 0.6324784054332859, "grad_norm": 0.8544129133224487, "learning_rate": 9.429081275697936e-06, "loss": 0.0766, "step": 35714 }, { "epoch": 0.6324961149703144, "grad_norm": 0.6942124366760254, "learning_rate": 9.428282457730618e-06, "loss": 0.0516, "step": 35715 }, { "epoch": 0.6325138245073428, "grad_norm": 0.5722770094871521, "learning_rate": 9.427483658093696e-06, "loss": 0.0434, "step": 35716 }, { "epoch": 0.6325315340443712, "grad_norm": 0.7564499378204346, "learning_rate": 9.426684876789782e-06, "loss": 0.0587, "step": 35717 }, { "epoch": 0.6325492435813996, "grad_norm": 0.7129598259925842, "learning_rate": 9.425886113821513e-06, "loss": 0.056, "step": 35718 }, { "epoch": 0.6325669531184281, "grad_norm": 0.7653625011444092, "learning_rate": 9.425087369191516e-06, "loss": 0.0843, "step": 35719 }, { "epoch": 0.6325846626554565, "grad_norm": 1.0579438209533691, "learning_rate": 9.424288642902418e-06, "loss": 0.0707, "step": 35720 }, { "epoch": 0.6326023721924849, "grad_norm": 0.559693455696106, "learning_rate": 9.423489934956843e-06, "loss": 0.0469, "step": 35721 }, { "epoch": 0.6326200817295133, "grad_norm": 0.4258580505847931, "learning_rate": 9.422691245357422e-06, "loss": 0.0472, "step": 35722 }, { "epoch": 0.6326377912665418, "grad_norm": 0.49479737877845764, "learning_rate": 9.421892574106787e-06, "loss": 0.0446, "step": 35723 }, { "epoch": 0.6326555008035702, "grad_norm": 1.0983734130859375, "learning_rate": 9.42109392120756e-06, "loss": 0.0811, "step": 35724 }, { "epoch": 0.6326732103405986, "grad_norm": 0.47930049896240234, "learning_rate": 9.420295286662367e-06, "loss": 0.0717, "step": 35725 }, { "epoch": 0.632690919877627, "grad_norm": 0.5435642004013062, "learning_rate": 9.419496670473843e-06, "loss": 0.0653, "step": 35726 }, { "epoch": 0.6327086294146556, "grad_norm": 0.5860353112220764, "learning_rate": 9.418698072644608e-06, "loss": 0.056, "step": 35727 }, { "epoch": 0.632726338951684, "grad_norm": 0.6739714741706848, "learning_rate": 9.41789949317729e-06, "loss": 0.0885, "step": 35728 }, { "epoch": 0.6327440484887124, "grad_norm": 0.6586341857910156, "learning_rate": 9.417100932074518e-06, "loss": 0.0481, "step": 35729 }, { "epoch": 0.6327617580257409, "grad_norm": 0.9937109351158142, "learning_rate": 9.416302389338925e-06, "loss": 0.1058, "step": 35730 }, { "epoch": 0.6327794675627693, "grad_norm": 0.591066837310791, "learning_rate": 9.415503864973123e-06, "loss": 0.0702, "step": 35731 }, { "epoch": 0.6327971770997977, "grad_norm": 0.5061793327331543, "learning_rate": 9.414705358979755e-06, "loss": 0.0745, "step": 35732 }, { "epoch": 0.6328148866368261, "grad_norm": 0.36490747332572937, "learning_rate": 9.413906871361442e-06, "loss": 0.0613, "step": 35733 }, { "epoch": 0.6328325961738546, "grad_norm": 0.4666607081890106, "learning_rate": 9.413108402120806e-06, "loss": 0.0636, "step": 35734 }, { "epoch": 0.632850305710883, "grad_norm": 0.4924115538597107, "learning_rate": 9.412309951260481e-06, "loss": 0.0622, "step": 35735 }, { "epoch": 0.6328680152479114, "grad_norm": 0.9824532270431519, "learning_rate": 9.41151151878309e-06, "loss": 0.09, "step": 35736 }, { "epoch": 0.6328857247849398, "grad_norm": 0.5900112986564636, "learning_rate": 9.410713104691267e-06, "loss": 0.0472, "step": 35737 }, { "epoch": 0.6329034343219683, "grad_norm": 0.6771470308303833, "learning_rate": 9.409914708987622e-06, "loss": 0.0645, "step": 35738 }, { "epoch": 0.6329211438589967, "grad_norm": 1.3894785642623901, "learning_rate": 9.409116331674797e-06, "loss": 0.0645, "step": 35739 }, { "epoch": 0.6329388533960251, "grad_norm": 0.8320780992507935, "learning_rate": 9.408317972755413e-06, "loss": 0.0571, "step": 35740 }, { "epoch": 0.6329565629330535, "grad_norm": 1.269403338432312, "learning_rate": 9.407519632232103e-06, "loss": 0.0716, "step": 35741 }, { "epoch": 0.632974272470082, "grad_norm": 0.6465181112289429, "learning_rate": 9.406721310107481e-06, "loss": 0.0834, "step": 35742 }, { "epoch": 0.6329919820071104, "grad_norm": 0.48064690828323364, "learning_rate": 9.405923006384183e-06, "loss": 0.0839, "step": 35743 }, { "epoch": 0.6330096915441388, "grad_norm": 0.37693649530410767, "learning_rate": 9.405124721064833e-06, "loss": 0.0536, "step": 35744 }, { "epoch": 0.6330274010811673, "grad_norm": 0.7131991982460022, "learning_rate": 9.404326454152055e-06, "loss": 0.0925, "step": 35745 }, { "epoch": 0.6330451106181957, "grad_norm": 0.5078678131103516, "learning_rate": 9.403528205648475e-06, "loss": 0.0448, "step": 35746 }, { "epoch": 0.6330628201552241, "grad_norm": 0.5290255546569824, "learning_rate": 9.402729975556724e-06, "loss": 0.0578, "step": 35747 }, { "epoch": 0.6330805296922525, "grad_norm": 0.4884467124938965, "learning_rate": 9.401931763879427e-06, "loss": 0.0539, "step": 35748 }, { "epoch": 0.633098239229281, "grad_norm": 0.6847144365310669, "learning_rate": 9.401133570619206e-06, "loss": 0.0677, "step": 35749 }, { "epoch": 0.6331159487663094, "grad_norm": 0.7405687570571899, "learning_rate": 9.400335395778687e-06, "loss": 0.0877, "step": 35750 }, { "epoch": 0.6331336583033378, "grad_norm": 1.0677164793014526, "learning_rate": 9.399537239360506e-06, "loss": 0.0726, "step": 35751 }, { "epoch": 0.6331513678403662, "grad_norm": 0.27792397141456604, "learning_rate": 9.398739101367272e-06, "loss": 0.06, "step": 35752 }, { "epoch": 0.6331690773773947, "grad_norm": 0.4919441044330597, "learning_rate": 9.397940981801623e-06, "loss": 0.0635, "step": 35753 }, { "epoch": 0.6331867869144231, "grad_norm": 0.6467756628990173, "learning_rate": 9.39714288066618e-06, "loss": 0.0465, "step": 35754 }, { "epoch": 0.6332044964514515, "grad_norm": 0.6254900693893433, "learning_rate": 9.396344797963579e-06, "loss": 0.0519, "step": 35755 }, { "epoch": 0.6332222059884799, "grad_norm": 0.7261432409286499, "learning_rate": 9.395546733696428e-06, "loss": 0.0516, "step": 35756 }, { "epoch": 0.6332399155255084, "grad_norm": 0.4555205702781677, "learning_rate": 9.394748687867364e-06, "loss": 0.0607, "step": 35757 }, { "epoch": 0.6332576250625368, "grad_norm": 0.7982421517372131, "learning_rate": 9.39395066047901e-06, "loss": 0.0612, "step": 35758 }, { "epoch": 0.6332753345995652, "grad_norm": 0.6852704286575317, "learning_rate": 9.39315265153399e-06, "loss": 0.0618, "step": 35759 }, { "epoch": 0.6332930441365937, "grad_norm": 0.5752776861190796, "learning_rate": 9.39235466103493e-06, "loss": 0.0629, "step": 35760 }, { "epoch": 0.6333107536736221, "grad_norm": 0.712610125541687, "learning_rate": 9.391556688984456e-06, "loss": 0.0796, "step": 35761 }, { "epoch": 0.6333284632106505, "grad_norm": 0.5768141746520996, "learning_rate": 9.390758735385199e-06, "loss": 0.0547, "step": 35762 }, { "epoch": 0.6333461727476789, "grad_norm": 0.43286117911338806, "learning_rate": 9.389960800239771e-06, "loss": 0.0439, "step": 35763 }, { "epoch": 0.6333638822847074, "grad_norm": 0.5823519825935364, "learning_rate": 9.389162883550807e-06, "loss": 0.0464, "step": 35764 }, { "epoch": 0.6333815918217358, "grad_norm": 0.7209987044334412, "learning_rate": 9.388364985320935e-06, "loss": 0.0725, "step": 35765 }, { "epoch": 0.6333993013587642, "grad_norm": 0.5263887047767639, "learning_rate": 9.38756710555277e-06, "loss": 0.041, "step": 35766 }, { "epoch": 0.6334170108957926, "grad_norm": 1.0003023147583008, "learning_rate": 9.386769244248937e-06, "loss": 0.0838, "step": 35767 }, { "epoch": 0.6334347204328211, "grad_norm": 0.7112410664558411, "learning_rate": 9.385971401412068e-06, "loss": 0.0669, "step": 35768 }, { "epoch": 0.6334524299698495, "grad_norm": 0.6180222034454346, "learning_rate": 9.385173577044793e-06, "loss": 0.0641, "step": 35769 }, { "epoch": 0.6334701395068779, "grad_norm": 0.26371654868125916, "learning_rate": 9.384375771149723e-06, "loss": 0.0475, "step": 35770 }, { "epoch": 0.6334878490439063, "grad_norm": 0.9091268181800842, "learning_rate": 9.383577983729487e-06, "loss": 0.0629, "step": 35771 }, { "epoch": 0.6335055585809348, "grad_norm": 0.6135255694389343, "learning_rate": 9.382780214786711e-06, "loss": 0.0559, "step": 35772 }, { "epoch": 0.6335232681179632, "grad_norm": 0.5127818584442139, "learning_rate": 9.381982464324024e-06, "loss": 0.0441, "step": 35773 }, { "epoch": 0.6335409776549916, "grad_norm": 0.779166042804718, "learning_rate": 9.381184732344045e-06, "loss": 0.0899, "step": 35774 }, { "epoch": 0.6335586871920201, "grad_norm": 0.7167762517929077, "learning_rate": 9.380387018849397e-06, "loss": 0.0783, "step": 35775 }, { "epoch": 0.6335763967290485, "grad_norm": 0.5525778532028198, "learning_rate": 9.379589323842714e-06, "loss": 0.0661, "step": 35776 }, { "epoch": 0.6335941062660769, "grad_norm": 0.4693705141544342, "learning_rate": 9.378791647326608e-06, "loss": 0.0677, "step": 35777 }, { "epoch": 0.6336118158031053, "grad_norm": 0.5664828419685364, "learning_rate": 9.37799398930371e-06, "loss": 0.0575, "step": 35778 }, { "epoch": 0.6336295253401338, "grad_norm": 0.4214632213115692, "learning_rate": 9.377196349776643e-06, "loss": 0.0339, "step": 35779 }, { "epoch": 0.6336472348771622, "grad_norm": 0.5238098502159119, "learning_rate": 9.376398728748035e-06, "loss": 0.0574, "step": 35780 }, { "epoch": 0.6336649444141906, "grad_norm": 0.5520322918891907, "learning_rate": 9.375601126220501e-06, "loss": 0.076, "step": 35781 }, { "epoch": 0.633682653951219, "grad_norm": 0.8212476372718811, "learning_rate": 9.37480354219667e-06, "loss": 0.0652, "step": 35782 }, { "epoch": 0.6337003634882475, "grad_norm": 0.5038573741912842, "learning_rate": 9.374005976679176e-06, "loss": 0.043, "step": 35783 }, { "epoch": 0.6337180730252759, "grad_norm": 0.43824878334999084, "learning_rate": 9.373208429670624e-06, "loss": 0.0568, "step": 35784 }, { "epoch": 0.6337357825623043, "grad_norm": 0.433798611164093, "learning_rate": 9.372410901173651e-06, "loss": 0.0486, "step": 35785 }, { "epoch": 0.6337534920993327, "grad_norm": 0.6601645946502686, "learning_rate": 9.371613391190875e-06, "loss": 0.0747, "step": 35786 }, { "epoch": 0.6337712016363612, "grad_norm": 0.8340121507644653, "learning_rate": 9.370815899724923e-06, "loss": 0.067, "step": 35787 }, { "epoch": 0.6337889111733896, "grad_norm": 0.5362944006919861, "learning_rate": 9.370018426778414e-06, "loss": 0.0603, "step": 35788 }, { "epoch": 0.633806620710418, "grad_norm": 0.7769880294799805, "learning_rate": 9.369220972353977e-06, "loss": 0.0536, "step": 35789 }, { "epoch": 0.6338243302474466, "grad_norm": 0.6442146897315979, "learning_rate": 9.368423536454236e-06, "loss": 0.0843, "step": 35790 }, { "epoch": 0.633842039784475, "grad_norm": 0.46566319465637207, "learning_rate": 9.367626119081808e-06, "loss": 0.0706, "step": 35791 }, { "epoch": 0.6338597493215034, "grad_norm": 0.6224003434181213, "learning_rate": 9.366828720239323e-06, "loss": 0.0786, "step": 35792 }, { "epoch": 0.6338774588585318, "grad_norm": 0.6898452043533325, "learning_rate": 9.3660313399294e-06, "loss": 0.0649, "step": 35793 }, { "epoch": 0.6338951683955603, "grad_norm": 0.6697024703025818, "learning_rate": 9.365233978154669e-06, "loss": 0.0786, "step": 35794 }, { "epoch": 0.6339128779325887, "grad_norm": 0.45787253975868225, "learning_rate": 9.364436634917743e-06, "loss": 0.0569, "step": 35795 }, { "epoch": 0.6339305874696171, "grad_norm": 0.7566666007041931, "learning_rate": 9.36363931022125e-06, "loss": 0.0798, "step": 35796 }, { "epoch": 0.6339482970066455, "grad_norm": 0.43892526626586914, "learning_rate": 9.36284200406782e-06, "loss": 0.0778, "step": 35797 }, { "epoch": 0.633966006543674, "grad_norm": 0.46780410408973694, "learning_rate": 9.362044716460064e-06, "loss": 0.0645, "step": 35798 }, { "epoch": 0.6339837160807024, "grad_norm": 0.5278297662734985, "learning_rate": 9.36124744740061e-06, "loss": 0.0606, "step": 35799 }, { "epoch": 0.6340014256177308, "grad_norm": 0.3424959182739258, "learning_rate": 9.360450196892083e-06, "loss": 0.0552, "step": 35800 }, { "epoch": 0.6340191351547592, "grad_norm": 0.37118813395500183, "learning_rate": 9.359652964937106e-06, "loss": 0.0773, "step": 35801 }, { "epoch": 0.6340368446917877, "grad_norm": 0.39543479681015015, "learning_rate": 9.358855751538298e-06, "loss": 0.0585, "step": 35802 }, { "epoch": 0.6340545542288161, "grad_norm": 0.5523988008499146, "learning_rate": 9.358058556698284e-06, "loss": 0.0546, "step": 35803 }, { "epoch": 0.6340722637658445, "grad_norm": 0.5630924105644226, "learning_rate": 9.35726138041969e-06, "loss": 0.0557, "step": 35804 }, { "epoch": 0.634089973302873, "grad_norm": 0.39270251989364624, "learning_rate": 9.356464222705132e-06, "loss": 0.0458, "step": 35805 }, { "epoch": 0.6341076828399014, "grad_norm": 1.0896655321121216, "learning_rate": 9.355667083557236e-06, "loss": 0.1023, "step": 35806 }, { "epoch": 0.6341253923769298, "grad_norm": 0.3527020514011383, "learning_rate": 9.354869962978625e-06, "loss": 0.0622, "step": 35807 }, { "epoch": 0.6341431019139582, "grad_norm": 0.3686785101890564, "learning_rate": 9.354072860971926e-06, "loss": 0.0643, "step": 35808 }, { "epoch": 0.6341608114509867, "grad_norm": 0.5577573180198669, "learning_rate": 9.353275777539751e-06, "loss": 0.0447, "step": 35809 }, { "epoch": 0.6341785209880151, "grad_norm": 0.6982314586639404, "learning_rate": 9.352478712684722e-06, "loss": 0.0793, "step": 35810 }, { "epoch": 0.6341962305250435, "grad_norm": 0.6562612652778625, "learning_rate": 9.351681666409472e-06, "loss": 0.0348, "step": 35811 }, { "epoch": 0.6342139400620719, "grad_norm": 0.6707544326782227, "learning_rate": 9.350884638716624e-06, "loss": 0.0775, "step": 35812 }, { "epoch": 0.6342316495991004, "grad_norm": 0.7110230326652527, "learning_rate": 9.350087629608788e-06, "loss": 0.0811, "step": 35813 }, { "epoch": 0.6342493591361288, "grad_norm": 0.43505844473838806, "learning_rate": 9.349290639088593e-06, "loss": 0.0661, "step": 35814 }, { "epoch": 0.6342670686731572, "grad_norm": 0.6355664730072021, "learning_rate": 9.348493667158661e-06, "loss": 0.065, "step": 35815 }, { "epoch": 0.6342847782101857, "grad_norm": 0.7313072681427002, "learning_rate": 9.347696713821612e-06, "loss": 0.0791, "step": 35816 }, { "epoch": 0.6343024877472141, "grad_norm": 0.7922577857971191, "learning_rate": 9.34689977908007e-06, "loss": 0.0596, "step": 35817 }, { "epoch": 0.6343201972842425, "grad_norm": 0.7167006731033325, "learning_rate": 9.346102862936656e-06, "loss": 0.078, "step": 35818 }, { "epoch": 0.6343379068212709, "grad_norm": 0.6468166708946228, "learning_rate": 9.345305965393991e-06, "loss": 0.0748, "step": 35819 }, { "epoch": 0.6343556163582994, "grad_norm": 0.7962091565132141, "learning_rate": 9.344509086454698e-06, "loss": 0.0934, "step": 35820 }, { "epoch": 0.6343733258953278, "grad_norm": 0.596169114112854, "learning_rate": 9.343712226121398e-06, "loss": 0.0695, "step": 35821 }, { "epoch": 0.6343910354323562, "grad_norm": 0.37540483474731445, "learning_rate": 9.342915384396717e-06, "loss": 0.0563, "step": 35822 }, { "epoch": 0.6344087449693846, "grad_norm": 0.6467509865760803, "learning_rate": 9.342118561283266e-06, "loss": 0.0465, "step": 35823 }, { "epoch": 0.6344264545064131, "grad_norm": 0.6199350357055664, "learning_rate": 9.341321756783674e-06, "loss": 0.053, "step": 35824 }, { "epoch": 0.6344441640434415, "grad_norm": 0.41944751143455505, "learning_rate": 9.340524970900557e-06, "loss": 0.0751, "step": 35825 }, { "epoch": 0.6344618735804699, "grad_norm": 0.5624192953109741, "learning_rate": 9.339728203636553e-06, "loss": 0.0633, "step": 35826 }, { "epoch": 0.6344795831174983, "grad_norm": 0.5767960548400879, "learning_rate": 9.338931454994259e-06, "loss": 0.0628, "step": 35827 }, { "epoch": 0.6344972926545268, "grad_norm": 1.014221429824829, "learning_rate": 9.33813472497631e-06, "loss": 0.0956, "step": 35828 }, { "epoch": 0.6345150021915552, "grad_norm": 0.44030728936195374, "learning_rate": 9.33733801358533e-06, "loss": 0.0607, "step": 35829 }, { "epoch": 0.6345327117285836, "grad_norm": 0.5981911420822144, "learning_rate": 9.33654132082393e-06, "loss": 0.0875, "step": 35830 }, { "epoch": 0.6345504212656121, "grad_norm": 0.8891475796699524, "learning_rate": 9.335744646694737e-06, "loss": 0.0506, "step": 35831 }, { "epoch": 0.6345681308026405, "grad_norm": 0.6755967736244202, "learning_rate": 9.334947991200372e-06, "loss": 0.078, "step": 35832 }, { "epoch": 0.6345858403396689, "grad_norm": 0.5272960066795349, "learning_rate": 9.334151354343457e-06, "loss": 0.053, "step": 35833 }, { "epoch": 0.6346035498766973, "grad_norm": 0.8557893633842468, "learning_rate": 9.333354736126609e-06, "loss": 0.0767, "step": 35834 }, { "epoch": 0.6346212594137258, "grad_norm": 0.6112837791442871, "learning_rate": 9.332558136552451e-06, "loss": 0.0616, "step": 35835 }, { "epoch": 0.6346389689507542, "grad_norm": 0.364242821931839, "learning_rate": 9.331761555623607e-06, "loss": 0.0634, "step": 35836 }, { "epoch": 0.6346566784877826, "grad_norm": 0.680138111114502, "learning_rate": 9.330964993342692e-06, "loss": 0.0814, "step": 35837 }, { "epoch": 0.634674388024811, "grad_norm": 0.6873587965965271, "learning_rate": 9.330168449712328e-06, "loss": 0.0551, "step": 35838 }, { "epoch": 0.6346920975618395, "grad_norm": 0.6564792394638062, "learning_rate": 9.329371924735132e-06, "loss": 0.0751, "step": 35839 }, { "epoch": 0.6347098070988679, "grad_norm": 0.7839322090148926, "learning_rate": 9.328575418413736e-06, "loss": 0.0711, "step": 35840 }, { "epoch": 0.6347275166358963, "grad_norm": 0.5911116600036621, "learning_rate": 9.327778930750748e-06, "loss": 0.0693, "step": 35841 }, { "epoch": 0.6347452261729247, "grad_norm": 0.5797422528266907, "learning_rate": 9.326982461748798e-06, "loss": 0.0698, "step": 35842 }, { "epoch": 0.6347629357099532, "grad_norm": 0.8709249496459961, "learning_rate": 9.326186011410497e-06, "loss": 0.0842, "step": 35843 }, { "epoch": 0.6347806452469816, "grad_norm": 0.44540950655937195, "learning_rate": 9.325389579738474e-06, "loss": 0.0784, "step": 35844 }, { "epoch": 0.63479835478401, "grad_norm": 0.8979334831237793, "learning_rate": 9.324593166735344e-06, "loss": 0.062, "step": 35845 }, { "epoch": 0.6348160643210385, "grad_norm": 0.36608126759529114, "learning_rate": 9.323796772403728e-06, "loss": 0.0709, "step": 35846 }, { "epoch": 0.6348337738580669, "grad_norm": 0.4452320635318756, "learning_rate": 9.323000396746248e-06, "loss": 0.0362, "step": 35847 }, { "epoch": 0.6348514833950953, "grad_norm": 0.4066850543022156, "learning_rate": 9.322204039765523e-06, "loss": 0.0506, "step": 35848 }, { "epoch": 0.6348691929321237, "grad_norm": 0.38268670439720154, "learning_rate": 9.321407701464168e-06, "loss": 0.0465, "step": 35849 }, { "epoch": 0.6348869024691522, "grad_norm": 0.7348656058311462, "learning_rate": 9.320611381844811e-06, "loss": 0.0907, "step": 35850 }, { "epoch": 0.6349046120061806, "grad_norm": 0.23034299910068512, "learning_rate": 9.319815080910072e-06, "loss": 0.0557, "step": 35851 }, { "epoch": 0.634922321543209, "grad_norm": 0.397458553314209, "learning_rate": 9.319018798662561e-06, "loss": 0.0582, "step": 35852 }, { "epoch": 0.6349400310802374, "grad_norm": 0.5627161860466003, "learning_rate": 9.318222535104903e-06, "loss": 0.0419, "step": 35853 }, { "epoch": 0.634957740617266, "grad_norm": 0.5347459316253662, "learning_rate": 9.317426290239723e-06, "loss": 0.0533, "step": 35854 }, { "epoch": 0.6349754501542944, "grad_norm": 0.7017930150032043, "learning_rate": 9.316630064069632e-06, "loss": 0.0537, "step": 35855 }, { "epoch": 0.6349931596913228, "grad_norm": 0.8164125084877014, "learning_rate": 9.315833856597252e-06, "loss": 0.0631, "step": 35856 }, { "epoch": 0.6350108692283511, "grad_norm": 0.5476680397987366, "learning_rate": 9.315037667825205e-06, "loss": 0.0564, "step": 35857 }, { "epoch": 0.6350285787653797, "grad_norm": 0.3583854138851166, "learning_rate": 9.314241497756113e-06, "loss": 0.0569, "step": 35858 }, { "epoch": 0.6350462883024081, "grad_norm": 0.7701411843299866, "learning_rate": 9.313445346392587e-06, "loss": 0.0601, "step": 35859 }, { "epoch": 0.6350639978394365, "grad_norm": 1.4333016872406006, "learning_rate": 9.31264921373725e-06, "loss": 0.0688, "step": 35860 }, { "epoch": 0.635081707376465, "grad_norm": 0.6078625321388245, "learning_rate": 9.311853099792729e-06, "loss": 0.0799, "step": 35861 }, { "epoch": 0.6350994169134934, "grad_norm": 0.5795864462852478, "learning_rate": 9.311057004561627e-06, "loss": 0.0607, "step": 35862 }, { "epoch": 0.6351171264505218, "grad_norm": 0.7905624508857727, "learning_rate": 9.310260928046575e-06, "loss": 0.0659, "step": 35863 }, { "epoch": 0.6351348359875502, "grad_norm": 0.4667057991027832, "learning_rate": 9.309464870250188e-06, "loss": 0.0824, "step": 35864 }, { "epoch": 0.6351525455245787, "grad_norm": 0.5777081847190857, "learning_rate": 9.308668831175095e-06, "loss": 0.0556, "step": 35865 }, { "epoch": 0.6351702550616071, "grad_norm": 0.824690580368042, "learning_rate": 9.307872810823896e-06, "loss": 0.1072, "step": 35866 }, { "epoch": 0.6351879645986355, "grad_norm": 0.6167414784431458, "learning_rate": 9.307076809199222e-06, "loss": 0.0691, "step": 35867 }, { "epoch": 0.6352056741356639, "grad_norm": 0.7462859153747559, "learning_rate": 9.306280826303693e-06, "loss": 0.0867, "step": 35868 }, { "epoch": 0.6352233836726924, "grad_norm": 0.794845700263977, "learning_rate": 9.30548486213992e-06, "loss": 0.0678, "step": 35869 }, { "epoch": 0.6352410932097208, "grad_norm": 0.6587865352630615, "learning_rate": 9.304688916710524e-06, "loss": 0.0634, "step": 35870 }, { "epoch": 0.6352588027467492, "grad_norm": 0.5851508378982544, "learning_rate": 9.303892990018128e-06, "loss": 0.0757, "step": 35871 }, { "epoch": 0.6352765122837776, "grad_norm": 0.7959766983985901, "learning_rate": 9.303097082065352e-06, "loss": 0.052, "step": 35872 }, { "epoch": 0.6352942218208061, "grad_norm": 0.7244705557823181, "learning_rate": 9.302301192854804e-06, "loss": 0.0524, "step": 35873 }, { "epoch": 0.6353119313578345, "grad_norm": 0.8853667974472046, "learning_rate": 9.301505322389111e-06, "loss": 0.0452, "step": 35874 }, { "epoch": 0.6353296408948629, "grad_norm": 0.3524380326271057, "learning_rate": 9.30070947067089e-06, "loss": 0.0668, "step": 35875 }, { "epoch": 0.6353473504318914, "grad_norm": 0.7049606442451477, "learning_rate": 9.299913637702759e-06, "loss": 0.0583, "step": 35876 }, { "epoch": 0.6353650599689198, "grad_norm": 0.5534448027610779, "learning_rate": 9.299117823487334e-06, "loss": 0.0764, "step": 35877 }, { "epoch": 0.6353827695059482, "grad_norm": 0.7576904296875, "learning_rate": 9.298322028027233e-06, "loss": 0.0686, "step": 35878 }, { "epoch": 0.6354004790429766, "grad_norm": 0.3592257797718048, "learning_rate": 9.297526251325084e-06, "loss": 0.0497, "step": 35879 }, { "epoch": 0.6354181885800051, "grad_norm": 0.6262725591659546, "learning_rate": 9.29673049338349e-06, "loss": 0.064, "step": 35880 }, { "epoch": 0.6354358981170335, "grad_norm": 0.5172688961029053, "learning_rate": 9.295934754205075e-06, "loss": 0.0583, "step": 35881 }, { "epoch": 0.6354536076540619, "grad_norm": 0.9047709107398987, "learning_rate": 9.29513903379246e-06, "loss": 0.0926, "step": 35882 }, { "epoch": 0.6354713171910903, "grad_norm": 0.19607819616794586, "learning_rate": 9.294343332148262e-06, "loss": 0.0392, "step": 35883 }, { "epoch": 0.6354890267281188, "grad_norm": 0.6503035426139832, "learning_rate": 9.293547649275095e-06, "loss": 0.0819, "step": 35884 }, { "epoch": 0.6355067362651472, "grad_norm": 0.528782844543457, "learning_rate": 9.292751985175579e-06, "loss": 0.0641, "step": 35885 }, { "epoch": 0.6355244458021756, "grad_norm": 0.4840650260448456, "learning_rate": 9.291956339852334e-06, "loss": 0.085, "step": 35886 }, { "epoch": 0.635542155339204, "grad_norm": 0.48992207646369934, "learning_rate": 9.291160713307972e-06, "loss": 0.0543, "step": 35887 }, { "epoch": 0.6355598648762325, "grad_norm": 0.5062667727470398, "learning_rate": 9.290365105545116e-06, "loss": 0.0507, "step": 35888 }, { "epoch": 0.6355775744132609, "grad_norm": 0.6704373955726624, "learning_rate": 9.28956951656638e-06, "loss": 0.0592, "step": 35889 }, { "epoch": 0.6355952839502893, "grad_norm": 0.5145223140716553, "learning_rate": 9.288773946374389e-06, "loss": 0.0528, "step": 35890 }, { "epoch": 0.6356129934873178, "grad_norm": 0.9074967503547668, "learning_rate": 9.287978394971744e-06, "loss": 0.0857, "step": 35891 }, { "epoch": 0.6356307030243462, "grad_norm": 0.6716790795326233, "learning_rate": 9.287182862361073e-06, "loss": 0.0836, "step": 35892 }, { "epoch": 0.6356484125613746, "grad_norm": 0.31275004148483276, "learning_rate": 9.286387348545002e-06, "loss": 0.0723, "step": 35893 }, { "epoch": 0.635666122098403, "grad_norm": 0.7591761946678162, "learning_rate": 9.285591853526134e-06, "loss": 0.0839, "step": 35894 }, { "epoch": 0.6356838316354315, "grad_norm": 0.7256729602813721, "learning_rate": 9.284796377307088e-06, "loss": 0.0566, "step": 35895 }, { "epoch": 0.6357015411724599, "grad_norm": 0.6931514739990234, "learning_rate": 9.284000919890484e-06, "loss": 0.0666, "step": 35896 }, { "epoch": 0.6357192507094883, "grad_norm": 0.47431057691574097, "learning_rate": 9.283205481278943e-06, "loss": 0.0622, "step": 35897 }, { "epoch": 0.6357369602465167, "grad_norm": 1.0750151872634888, "learning_rate": 9.282410061475076e-06, "loss": 0.109, "step": 35898 }, { "epoch": 0.6357546697835452, "grad_norm": 0.5749814510345459, "learning_rate": 9.2816146604815e-06, "loss": 0.0767, "step": 35899 }, { "epoch": 0.6357723793205736, "grad_norm": 0.29643985629081726, "learning_rate": 9.280819278300835e-06, "loss": 0.0359, "step": 35900 }, { "epoch": 0.635790088857602, "grad_norm": 0.4321785271167755, "learning_rate": 9.280023914935696e-06, "loss": 0.061, "step": 35901 }, { "epoch": 0.6358077983946304, "grad_norm": 0.22291968762874603, "learning_rate": 9.279228570388698e-06, "loss": 0.041, "step": 35902 }, { "epoch": 0.6358255079316589, "grad_norm": 0.43933433294296265, "learning_rate": 9.27843324466246e-06, "loss": 0.0593, "step": 35903 }, { "epoch": 0.6358432174686873, "grad_norm": 1.4007859230041504, "learning_rate": 9.277637937759604e-06, "loss": 0.0651, "step": 35904 }, { "epoch": 0.6358609270057157, "grad_norm": 0.5163166522979736, "learning_rate": 9.27684264968273e-06, "loss": 0.0711, "step": 35905 }, { "epoch": 0.6358786365427442, "grad_norm": 0.6563871502876282, "learning_rate": 9.27604738043447e-06, "loss": 0.0749, "step": 35906 }, { "epoch": 0.6358963460797726, "grad_norm": 0.66584312915802, "learning_rate": 9.27525213001744e-06, "loss": 0.0627, "step": 35907 }, { "epoch": 0.635914055616801, "grad_norm": 0.4581604599952698, "learning_rate": 9.274456898434244e-06, "loss": 0.0393, "step": 35908 }, { "epoch": 0.6359317651538294, "grad_norm": 0.44911301136016846, "learning_rate": 9.273661685687508e-06, "loss": 0.0759, "step": 35909 }, { "epoch": 0.6359494746908579, "grad_norm": 0.5614516139030457, "learning_rate": 9.272866491779846e-06, "loss": 0.0782, "step": 35910 }, { "epoch": 0.6359671842278863, "grad_norm": 0.5304128527641296, "learning_rate": 9.272071316713875e-06, "loss": 0.0545, "step": 35911 }, { "epoch": 0.6359848937649147, "grad_norm": 0.5800684094429016, "learning_rate": 9.27127616049221e-06, "loss": 0.051, "step": 35912 }, { "epoch": 0.6360026033019431, "grad_norm": 0.5970467925071716, "learning_rate": 9.270481023117464e-06, "loss": 0.043, "step": 35913 }, { "epoch": 0.6360203128389716, "grad_norm": 0.5843654870986938, "learning_rate": 9.269685904592256e-06, "loss": 0.06, "step": 35914 }, { "epoch": 0.636038022376, "grad_norm": 0.5294310450553894, "learning_rate": 9.268890804919206e-06, "loss": 0.0692, "step": 35915 }, { "epoch": 0.6360557319130284, "grad_norm": 0.6187130212783813, "learning_rate": 9.268095724100923e-06, "loss": 0.0939, "step": 35916 }, { "epoch": 0.6360734414500568, "grad_norm": 0.9048559069633484, "learning_rate": 9.267300662140024e-06, "loss": 0.0525, "step": 35917 }, { "epoch": 0.6360911509870854, "grad_norm": 0.851535439491272, "learning_rate": 9.266505619039132e-06, "loss": 0.083, "step": 35918 }, { "epoch": 0.6361088605241138, "grad_norm": 0.5635010004043579, "learning_rate": 9.265710594800853e-06, "loss": 0.0795, "step": 35919 }, { "epoch": 0.6361265700611421, "grad_norm": 0.36414191126823425, "learning_rate": 9.2649155894278e-06, "loss": 0.089, "step": 35920 }, { "epoch": 0.6361442795981707, "grad_norm": 0.5477080345153809, "learning_rate": 9.264120602922602e-06, "loss": 0.0548, "step": 35921 }, { "epoch": 0.6361619891351991, "grad_norm": 0.700760543346405, "learning_rate": 9.263325635287868e-06, "loss": 0.037, "step": 35922 }, { "epoch": 0.6361796986722275, "grad_norm": 0.1606038212776184, "learning_rate": 9.26253068652621e-06, "loss": 0.0732, "step": 35923 }, { "epoch": 0.6361974082092559, "grad_norm": 0.870898425579071, "learning_rate": 9.261735756640243e-06, "loss": 0.0777, "step": 35924 }, { "epoch": 0.6362151177462844, "grad_norm": 0.6806203722953796, "learning_rate": 9.260940845632591e-06, "loss": 0.065, "step": 35925 }, { "epoch": 0.6362328272833128, "grad_norm": 0.5146494507789612, "learning_rate": 9.260145953505857e-06, "loss": 0.054, "step": 35926 }, { "epoch": 0.6362505368203412, "grad_norm": 0.4143752455711365, "learning_rate": 9.259351080262664e-06, "loss": 0.0413, "step": 35927 }, { "epoch": 0.6362682463573696, "grad_norm": 0.3860335946083069, "learning_rate": 9.258556225905624e-06, "loss": 0.0704, "step": 35928 }, { "epoch": 0.6362859558943981, "grad_norm": 0.43872830271720886, "learning_rate": 9.257761390437358e-06, "loss": 0.0456, "step": 35929 }, { "epoch": 0.6363036654314265, "grad_norm": 0.6474226713180542, "learning_rate": 9.256966573860473e-06, "loss": 0.056, "step": 35930 }, { "epoch": 0.6363213749684549, "grad_norm": 0.7592416405677795, "learning_rate": 9.256171776177586e-06, "loss": 0.0551, "step": 35931 }, { "epoch": 0.6363390845054833, "grad_norm": 0.8787955045700073, "learning_rate": 9.25537699739132e-06, "loss": 0.0664, "step": 35932 }, { "epoch": 0.6363567940425118, "grad_norm": 0.7669005990028381, "learning_rate": 9.254582237504276e-06, "loss": 0.0732, "step": 35933 }, { "epoch": 0.6363745035795402, "grad_norm": 0.44029438495635986, "learning_rate": 9.253787496519073e-06, "loss": 0.0439, "step": 35934 }, { "epoch": 0.6363922131165686, "grad_norm": 0.5741602182388306, "learning_rate": 9.252992774438332e-06, "loss": 0.0601, "step": 35935 }, { "epoch": 0.6364099226535971, "grad_norm": 0.5837776064872742, "learning_rate": 9.252198071264667e-06, "loss": 0.096, "step": 35936 }, { "epoch": 0.6364276321906255, "grad_norm": 0.48968443274497986, "learning_rate": 9.251403387000682e-06, "loss": 0.0707, "step": 35937 }, { "epoch": 0.6364453417276539, "grad_norm": 0.6855796575546265, "learning_rate": 9.250608721649e-06, "loss": 0.0695, "step": 35938 }, { "epoch": 0.6364630512646823, "grad_norm": 0.6464354991912842, "learning_rate": 9.249814075212239e-06, "loss": 0.1345, "step": 35939 }, { "epoch": 0.6364807608017108, "grad_norm": 0.583047091960907, "learning_rate": 9.249019447693004e-06, "loss": 0.0561, "step": 35940 }, { "epoch": 0.6364984703387392, "grad_norm": 0.422475665807724, "learning_rate": 9.248224839093912e-06, "loss": 0.0532, "step": 35941 }, { "epoch": 0.6365161798757676, "grad_norm": 0.4926496744155884, "learning_rate": 9.24743024941758e-06, "loss": 0.0528, "step": 35942 }, { "epoch": 0.636533889412796, "grad_norm": 0.44555604457855225, "learning_rate": 9.246635678666622e-06, "loss": 0.0574, "step": 35943 }, { "epoch": 0.6365515989498245, "grad_norm": 0.5079622864723206, "learning_rate": 9.245841126843649e-06, "loss": 0.067, "step": 35944 }, { "epoch": 0.6365693084868529, "grad_norm": 0.60179203748703, "learning_rate": 9.245046593951276e-06, "loss": 0.0743, "step": 35945 }, { "epoch": 0.6365870180238813, "grad_norm": 0.7135917544364929, "learning_rate": 9.244252079992119e-06, "loss": 0.0805, "step": 35946 }, { "epoch": 0.6366047275609097, "grad_norm": 0.7147914171218872, "learning_rate": 9.243457584968795e-06, "loss": 0.0844, "step": 35947 }, { "epoch": 0.6366224370979382, "grad_norm": 0.575249969959259, "learning_rate": 9.242663108883908e-06, "loss": 0.0774, "step": 35948 }, { "epoch": 0.6366401466349666, "grad_norm": 0.46715009212493896, "learning_rate": 9.241868651740073e-06, "loss": 0.0777, "step": 35949 }, { "epoch": 0.636657856171995, "grad_norm": 0.6911525726318359, "learning_rate": 9.241074213539917e-06, "loss": 0.0613, "step": 35950 }, { "epoch": 0.6366755657090235, "grad_norm": 0.7246628999710083, "learning_rate": 9.240279794286038e-06, "loss": 0.0931, "step": 35951 }, { "epoch": 0.6366932752460519, "grad_norm": 0.5303769111633301, "learning_rate": 9.239485393981057e-06, "loss": 0.0497, "step": 35952 }, { "epoch": 0.6367109847830803, "grad_norm": 0.4524492621421814, "learning_rate": 9.238691012627586e-06, "loss": 0.0685, "step": 35953 }, { "epoch": 0.6367286943201087, "grad_norm": 0.461662620306015, "learning_rate": 9.237896650228241e-06, "loss": 0.0339, "step": 35954 }, { "epoch": 0.6367464038571372, "grad_norm": 1.2607240676879883, "learning_rate": 9.237102306785632e-06, "loss": 0.0857, "step": 35955 }, { "epoch": 0.6367641133941656, "grad_norm": 0.8084776401519775, "learning_rate": 9.236307982302373e-06, "loss": 0.0729, "step": 35956 }, { "epoch": 0.636781822931194, "grad_norm": 0.5403183698654175, "learning_rate": 9.23551367678108e-06, "loss": 0.0758, "step": 35957 }, { "epoch": 0.6367995324682224, "grad_norm": 0.554570198059082, "learning_rate": 9.234719390224362e-06, "loss": 0.0656, "step": 35958 }, { "epoch": 0.6368172420052509, "grad_norm": 0.45271357893943787, "learning_rate": 9.233925122634834e-06, "loss": 0.0663, "step": 35959 }, { "epoch": 0.6368349515422793, "grad_norm": 0.6796671748161316, "learning_rate": 9.233130874015107e-06, "loss": 0.0588, "step": 35960 }, { "epoch": 0.6368526610793077, "grad_norm": 0.9977066516876221, "learning_rate": 9.232336644367805e-06, "loss": 0.0733, "step": 35961 }, { "epoch": 0.6368703706163361, "grad_norm": 0.5094884037971497, "learning_rate": 9.231542433695524e-06, "loss": 0.0544, "step": 35962 }, { "epoch": 0.6368880801533646, "grad_norm": 0.46922534704208374, "learning_rate": 9.230748242000883e-06, "loss": 0.0841, "step": 35963 }, { "epoch": 0.636905789690393, "grad_norm": 0.5269062519073486, "learning_rate": 9.229954069286506e-06, "loss": 0.0519, "step": 35964 }, { "epoch": 0.6369234992274214, "grad_norm": 0.42827776074409485, "learning_rate": 9.229159915554991e-06, "loss": 0.0563, "step": 35965 }, { "epoch": 0.6369412087644499, "grad_norm": 0.2892386317253113, "learning_rate": 9.228365780808956e-06, "loss": 0.054, "step": 35966 }, { "epoch": 0.6369589183014783, "grad_norm": 0.6004830598831177, "learning_rate": 9.227571665051012e-06, "loss": 0.0617, "step": 35967 }, { "epoch": 0.6369766278385067, "grad_norm": 0.5696120262145996, "learning_rate": 9.226777568283777e-06, "loss": 0.0503, "step": 35968 }, { "epoch": 0.6369943373755351, "grad_norm": 0.583031952381134, "learning_rate": 9.225983490509859e-06, "loss": 0.0619, "step": 35969 }, { "epoch": 0.6370120469125636, "grad_norm": 0.5193024277687073, "learning_rate": 9.225189431731867e-06, "loss": 0.0677, "step": 35970 }, { "epoch": 0.637029756449592, "grad_norm": 0.935131847858429, "learning_rate": 9.224395391952425e-06, "loss": 0.1086, "step": 35971 }, { "epoch": 0.6370474659866204, "grad_norm": 0.5827335119247437, "learning_rate": 9.223601371174132e-06, "loss": 0.0557, "step": 35972 }, { "epoch": 0.6370651755236488, "grad_norm": 0.8961957693099976, "learning_rate": 9.222807369399608e-06, "loss": 0.085, "step": 35973 }, { "epoch": 0.6370828850606773, "grad_norm": 0.4478716552257538, "learning_rate": 9.222013386631463e-06, "loss": 0.0516, "step": 35974 }, { "epoch": 0.6371005945977057, "grad_norm": 0.8869341611862183, "learning_rate": 9.221219422872316e-06, "loss": 0.0935, "step": 35975 }, { "epoch": 0.6371183041347341, "grad_norm": 0.49350082874298096, "learning_rate": 9.220425478124766e-06, "loss": 0.0683, "step": 35976 }, { "epoch": 0.6371360136717625, "grad_norm": 0.8718024492263794, "learning_rate": 9.21963155239143e-06, "loss": 0.0759, "step": 35977 }, { "epoch": 0.637153723208791, "grad_norm": 0.5893704891204834, "learning_rate": 9.218837645674927e-06, "loss": 0.0581, "step": 35978 }, { "epoch": 0.6371714327458194, "grad_norm": 0.6218691468238831, "learning_rate": 9.21804375797786e-06, "loss": 0.0517, "step": 35979 }, { "epoch": 0.6371891422828478, "grad_norm": 0.3320542871952057, "learning_rate": 9.217249889302845e-06, "loss": 0.0699, "step": 35980 }, { "epoch": 0.6372068518198764, "grad_norm": 0.35581427812576294, "learning_rate": 9.216456039652492e-06, "loss": 0.0537, "step": 35981 }, { "epoch": 0.6372245613569048, "grad_norm": 0.42785289883613586, "learning_rate": 9.215662209029417e-06, "loss": 0.0645, "step": 35982 }, { "epoch": 0.6372422708939331, "grad_norm": 0.5614643096923828, "learning_rate": 9.214868397436225e-06, "loss": 0.0652, "step": 35983 }, { "epoch": 0.6372599804309615, "grad_norm": 0.3270626664161682, "learning_rate": 9.214074604875529e-06, "loss": 0.0673, "step": 35984 }, { "epoch": 0.6372776899679901, "grad_norm": 0.45730137825012207, "learning_rate": 9.213280831349945e-06, "loss": 0.0427, "step": 35985 }, { "epoch": 0.6372953995050185, "grad_norm": 0.3651101291179657, "learning_rate": 9.212487076862083e-06, "loss": 0.0425, "step": 35986 }, { "epoch": 0.6373131090420469, "grad_norm": 0.5817564129829407, "learning_rate": 9.211693341414552e-06, "loss": 0.062, "step": 35987 }, { "epoch": 0.6373308185790753, "grad_norm": 0.4281635582447052, "learning_rate": 9.210899625009963e-06, "loss": 0.0598, "step": 35988 }, { "epoch": 0.6373485281161038, "grad_norm": 0.5142223238945007, "learning_rate": 9.210105927650936e-06, "loss": 0.0685, "step": 35989 }, { "epoch": 0.6373662376531322, "grad_norm": 0.7947582602500916, "learning_rate": 9.209312249340067e-06, "loss": 0.0881, "step": 35990 }, { "epoch": 0.6373839471901606, "grad_norm": 0.9828311204910278, "learning_rate": 9.208518590079976e-06, "loss": 0.0734, "step": 35991 }, { "epoch": 0.637401656727189, "grad_norm": 0.2973802983760834, "learning_rate": 9.207724949873271e-06, "loss": 0.0538, "step": 35992 }, { "epoch": 0.6374193662642175, "grad_norm": 0.7818944454193115, "learning_rate": 9.206931328722572e-06, "loss": 0.0663, "step": 35993 }, { "epoch": 0.6374370758012459, "grad_norm": 0.6136961579322815, "learning_rate": 9.20613772663048e-06, "loss": 0.067, "step": 35994 }, { "epoch": 0.6374547853382743, "grad_norm": 0.4855281710624695, "learning_rate": 9.205344143599605e-06, "loss": 0.0619, "step": 35995 }, { "epoch": 0.6374724948753028, "grad_norm": 0.7121251225471497, "learning_rate": 9.204550579632569e-06, "loss": 0.0685, "step": 35996 }, { "epoch": 0.6374902044123312, "grad_norm": 0.7660703659057617, "learning_rate": 9.203757034731968e-06, "loss": 0.0784, "step": 35997 }, { "epoch": 0.6375079139493596, "grad_norm": 0.35857173800468445, "learning_rate": 9.202963508900422e-06, "loss": 0.0475, "step": 35998 }, { "epoch": 0.637525623486388, "grad_norm": 0.5248536467552185, "learning_rate": 9.202170002140541e-06, "loss": 0.0441, "step": 35999 }, { "epoch": 0.6375433330234165, "grad_norm": 0.5845298171043396, "learning_rate": 9.201376514454934e-06, "loss": 0.0529, "step": 36000 }, { "epoch": 0.6375610425604449, "grad_norm": 0.623737633228302, "learning_rate": 9.200583045846212e-06, "loss": 0.046, "step": 36001 }, { "epoch": 0.6375787520974733, "grad_norm": 0.442604124546051, "learning_rate": 9.199789596316983e-06, "loss": 0.0655, "step": 36002 }, { "epoch": 0.6375964616345017, "grad_norm": 0.5035715103149414, "learning_rate": 9.198996165869868e-06, "loss": 0.0532, "step": 36003 }, { "epoch": 0.6376141711715302, "grad_norm": 0.8057739734649658, "learning_rate": 9.19820275450746e-06, "loss": 0.0736, "step": 36004 }, { "epoch": 0.6376318807085586, "grad_norm": 0.6159996390342712, "learning_rate": 9.197409362232382e-06, "loss": 0.0653, "step": 36005 }, { "epoch": 0.637649590245587, "grad_norm": 0.5851014852523804, "learning_rate": 9.196615989047237e-06, "loss": 0.0522, "step": 36006 }, { "epoch": 0.6376672997826154, "grad_norm": 0.55734783411026, "learning_rate": 9.195822634954643e-06, "loss": 0.0531, "step": 36007 }, { "epoch": 0.6376850093196439, "grad_norm": 0.29961520433425903, "learning_rate": 9.195029299957203e-06, "loss": 0.0336, "step": 36008 }, { "epoch": 0.6377027188566723, "grad_norm": 0.8115798830986023, "learning_rate": 9.194235984057526e-06, "loss": 0.0545, "step": 36009 }, { "epoch": 0.6377204283937007, "grad_norm": 0.842664897441864, "learning_rate": 9.193442687258231e-06, "loss": 0.0809, "step": 36010 }, { "epoch": 0.6377381379307292, "grad_norm": 0.5565577149391174, "learning_rate": 9.192649409561919e-06, "loss": 0.0498, "step": 36011 }, { "epoch": 0.6377558474677576, "grad_norm": 0.31783783435821533, "learning_rate": 9.191856150971206e-06, "loss": 0.0423, "step": 36012 }, { "epoch": 0.637773557004786, "grad_norm": 0.49701574444770813, "learning_rate": 9.191062911488694e-06, "loss": 0.0572, "step": 36013 }, { "epoch": 0.6377912665418144, "grad_norm": 0.809033215045929, "learning_rate": 9.190269691117005e-06, "loss": 0.0696, "step": 36014 }, { "epoch": 0.6378089760788429, "grad_norm": 0.34665828943252563, "learning_rate": 9.189476489858733e-06, "loss": 0.0593, "step": 36015 }, { "epoch": 0.6378266856158713, "grad_norm": 0.5951168537139893, "learning_rate": 9.188683307716498e-06, "loss": 0.0611, "step": 36016 }, { "epoch": 0.6378443951528997, "grad_norm": 0.6932453513145447, "learning_rate": 9.187890144692907e-06, "loss": 0.0923, "step": 36017 }, { "epoch": 0.6378621046899281, "grad_norm": 0.6367301940917969, "learning_rate": 9.187097000790577e-06, "loss": 0.0706, "step": 36018 }, { "epoch": 0.6378798142269566, "grad_norm": 0.4834253489971161, "learning_rate": 9.186303876012101e-06, "loss": 0.0382, "step": 36019 }, { "epoch": 0.637897523763985, "grad_norm": 0.5328970551490784, "learning_rate": 9.1855107703601e-06, "loss": 0.0734, "step": 36020 }, { "epoch": 0.6379152333010134, "grad_norm": 0.6807581782341003, "learning_rate": 9.184717683837181e-06, "loss": 0.074, "step": 36021 }, { "epoch": 0.6379329428380418, "grad_norm": 0.3914601504802704, "learning_rate": 9.183924616445952e-06, "loss": 0.0925, "step": 36022 }, { "epoch": 0.6379506523750703, "grad_norm": 0.32996290922164917, "learning_rate": 9.183131568189022e-06, "loss": 0.0643, "step": 36023 }, { "epoch": 0.6379683619120987, "grad_norm": 0.4332408010959625, "learning_rate": 9.182338539069e-06, "loss": 0.0406, "step": 36024 }, { "epoch": 0.6379860714491271, "grad_norm": 0.5568820238113403, "learning_rate": 9.1815455290885e-06, "loss": 0.0771, "step": 36025 }, { "epoch": 0.6380037809861556, "grad_norm": 0.40207627415657043, "learning_rate": 9.180752538250123e-06, "loss": 0.0662, "step": 36026 }, { "epoch": 0.638021490523184, "grad_norm": 0.40742403268814087, "learning_rate": 9.179959566556482e-06, "loss": 0.0415, "step": 36027 }, { "epoch": 0.6380392000602124, "grad_norm": 0.9632952213287354, "learning_rate": 9.179166614010191e-06, "loss": 0.0836, "step": 36028 }, { "epoch": 0.6380569095972408, "grad_norm": 0.11439918726682663, "learning_rate": 9.178373680613844e-06, "loss": 0.0302, "step": 36029 }, { "epoch": 0.6380746191342693, "grad_norm": 0.6696133613586426, "learning_rate": 9.177580766370063e-06, "loss": 0.0548, "step": 36030 }, { "epoch": 0.6380923286712977, "grad_norm": 0.501158595085144, "learning_rate": 9.176787871281452e-06, "loss": 0.0702, "step": 36031 }, { "epoch": 0.6381100382083261, "grad_norm": 0.9394615292549133, "learning_rate": 9.175994995350624e-06, "loss": 0.0731, "step": 36032 }, { "epoch": 0.6381277477453545, "grad_norm": 0.7236801981925964, "learning_rate": 9.17520213858018e-06, "loss": 0.0586, "step": 36033 }, { "epoch": 0.638145457282383, "grad_norm": 0.9829425811767578, "learning_rate": 9.17440930097273e-06, "loss": 0.062, "step": 36034 }, { "epoch": 0.6381631668194114, "grad_norm": 0.8955726623535156, "learning_rate": 9.173616482530887e-06, "loss": 0.0711, "step": 36035 }, { "epoch": 0.6381808763564398, "grad_norm": 0.6989609003067017, "learning_rate": 9.172823683257252e-06, "loss": 0.0622, "step": 36036 }, { "epoch": 0.6381985858934682, "grad_norm": 0.42933452129364014, "learning_rate": 9.172030903154442e-06, "loss": 0.0391, "step": 36037 }, { "epoch": 0.6382162954304967, "grad_norm": 0.6762372851371765, "learning_rate": 9.171238142225058e-06, "loss": 0.0489, "step": 36038 }, { "epoch": 0.6382340049675251, "grad_norm": 0.5299136638641357, "learning_rate": 9.170445400471715e-06, "loss": 0.0669, "step": 36039 }, { "epoch": 0.6382517145045535, "grad_norm": 0.5706621408462524, "learning_rate": 9.169652677897015e-06, "loss": 0.0686, "step": 36040 }, { "epoch": 0.638269424041582, "grad_norm": 0.5335095524787903, "learning_rate": 9.168859974503565e-06, "loss": 0.0438, "step": 36041 }, { "epoch": 0.6382871335786104, "grad_norm": 0.5494693517684937, "learning_rate": 9.168067290293984e-06, "loss": 0.0523, "step": 36042 }, { "epoch": 0.6383048431156388, "grad_norm": 0.6091277599334717, "learning_rate": 9.167274625270865e-06, "loss": 0.0664, "step": 36043 }, { "epoch": 0.6383225526526672, "grad_norm": 0.7482870221138, "learning_rate": 9.16648197943682e-06, "loss": 0.0747, "step": 36044 }, { "epoch": 0.6383402621896958, "grad_norm": 0.7724266648292542, "learning_rate": 9.165689352794463e-06, "loss": 0.0687, "step": 36045 }, { "epoch": 0.6383579717267241, "grad_norm": 0.7328778505325317, "learning_rate": 9.164896745346403e-06, "loss": 0.0514, "step": 36046 }, { "epoch": 0.6383756812637525, "grad_norm": 0.6445262432098389, "learning_rate": 9.164104157095235e-06, "loss": 0.0768, "step": 36047 }, { "epoch": 0.638393390800781, "grad_norm": 0.7649533748626709, "learning_rate": 9.163311588043577e-06, "loss": 0.0828, "step": 36048 }, { "epoch": 0.6384111003378095, "grad_norm": 0.7780253887176514, "learning_rate": 9.162519038194031e-06, "loss": 0.0876, "step": 36049 }, { "epoch": 0.6384288098748379, "grad_norm": 0.5202304124832153, "learning_rate": 9.161726507549212e-06, "loss": 0.0623, "step": 36050 }, { "epoch": 0.6384465194118663, "grad_norm": 0.2560809552669525, "learning_rate": 9.16093399611172e-06, "loss": 0.0338, "step": 36051 }, { "epoch": 0.6384642289488947, "grad_norm": 0.47077614068984985, "learning_rate": 9.16014150388416e-06, "loss": 0.0539, "step": 36052 }, { "epoch": 0.6384819384859232, "grad_norm": 0.862777590751648, "learning_rate": 9.159349030869151e-06, "loss": 0.0651, "step": 36053 }, { "epoch": 0.6384996480229516, "grad_norm": 0.8845348358154297, "learning_rate": 9.158556577069287e-06, "loss": 0.0465, "step": 36054 }, { "epoch": 0.63851735755998, "grad_norm": 0.6923835277557373, "learning_rate": 9.157764142487183e-06, "loss": 0.0578, "step": 36055 }, { "epoch": 0.6385350670970085, "grad_norm": 0.6321160793304443, "learning_rate": 9.156971727125445e-06, "loss": 0.0575, "step": 36056 }, { "epoch": 0.6385527766340369, "grad_norm": 0.28575319051742554, "learning_rate": 9.156179330986682e-06, "loss": 0.047, "step": 36057 }, { "epoch": 0.6385704861710653, "grad_norm": 0.8114211559295654, "learning_rate": 9.15538695407349e-06, "loss": 0.0983, "step": 36058 }, { "epoch": 0.6385881957080937, "grad_norm": 0.8723039627075195, "learning_rate": 9.15459459638849e-06, "loss": 0.0451, "step": 36059 }, { "epoch": 0.6386059052451222, "grad_norm": 0.2027214616537094, "learning_rate": 9.153802257934286e-06, "loss": 0.0613, "step": 36060 }, { "epoch": 0.6386236147821506, "grad_norm": 0.4368606209754944, "learning_rate": 9.153009938713476e-06, "loss": 0.0443, "step": 36061 }, { "epoch": 0.638641324319179, "grad_norm": 0.7426902055740356, "learning_rate": 9.15221763872867e-06, "loss": 0.0666, "step": 36062 }, { "epoch": 0.6386590338562074, "grad_norm": 0.6034220457077026, "learning_rate": 9.151425357982478e-06, "loss": 0.0718, "step": 36063 }, { "epoch": 0.6386767433932359, "grad_norm": 0.35325759649276733, "learning_rate": 9.150633096477507e-06, "loss": 0.0501, "step": 36064 }, { "epoch": 0.6386944529302643, "grad_norm": 0.7719298601150513, "learning_rate": 9.14984085421636e-06, "loss": 0.0602, "step": 36065 }, { "epoch": 0.6387121624672927, "grad_norm": 0.6619901061058044, "learning_rate": 9.149048631201644e-06, "loss": 0.074, "step": 36066 }, { "epoch": 0.6387298720043211, "grad_norm": 0.6247581243515015, "learning_rate": 9.148256427435971e-06, "loss": 0.0684, "step": 36067 }, { "epoch": 0.6387475815413496, "grad_norm": 0.8287872076034546, "learning_rate": 9.147464242921938e-06, "loss": 0.0926, "step": 36068 }, { "epoch": 0.638765291078378, "grad_norm": 0.5895114541053772, "learning_rate": 9.146672077662154e-06, "loss": 0.0574, "step": 36069 }, { "epoch": 0.6387830006154064, "grad_norm": 0.566723644733429, "learning_rate": 9.145879931659229e-06, "loss": 0.0583, "step": 36070 }, { "epoch": 0.6388007101524349, "grad_norm": 0.2894954979419708, "learning_rate": 9.14508780491577e-06, "loss": 0.075, "step": 36071 }, { "epoch": 0.6388184196894633, "grad_norm": 0.6351422667503357, "learning_rate": 9.144295697434377e-06, "loss": 0.0741, "step": 36072 }, { "epoch": 0.6388361292264917, "grad_norm": 1.084266185760498, "learning_rate": 9.143503609217656e-06, "loss": 0.0652, "step": 36073 }, { "epoch": 0.6388538387635201, "grad_norm": 0.8823487162590027, "learning_rate": 9.142711540268224e-06, "loss": 0.106, "step": 36074 }, { "epoch": 0.6388715483005486, "grad_norm": 0.8076141476631165, "learning_rate": 9.14191949058867e-06, "loss": 0.0739, "step": 36075 }, { "epoch": 0.638889257837577, "grad_norm": 0.9512923955917358, "learning_rate": 9.141127460181611e-06, "loss": 0.0837, "step": 36076 }, { "epoch": 0.6389069673746054, "grad_norm": 0.5137022733688354, "learning_rate": 9.14033544904965e-06, "loss": 0.0358, "step": 36077 }, { "epoch": 0.6389246769116338, "grad_norm": 0.42187586426734924, "learning_rate": 9.139543457195397e-06, "loss": 0.0497, "step": 36078 }, { "epoch": 0.6389423864486623, "grad_norm": 0.5214409232139587, "learning_rate": 9.138751484621448e-06, "loss": 0.0444, "step": 36079 }, { "epoch": 0.6389600959856907, "grad_norm": 0.4588678479194641, "learning_rate": 9.137959531330414e-06, "loss": 0.0653, "step": 36080 }, { "epoch": 0.6389778055227191, "grad_norm": 0.20888692140579224, "learning_rate": 9.137167597324905e-06, "loss": 0.0513, "step": 36081 }, { "epoch": 0.6389955150597475, "grad_norm": 0.634630024433136, "learning_rate": 9.136375682607517e-06, "loss": 0.0646, "step": 36082 }, { "epoch": 0.639013224596776, "grad_norm": 0.3950454890727997, "learning_rate": 9.13558378718086e-06, "loss": 0.0798, "step": 36083 }, { "epoch": 0.6390309341338044, "grad_norm": 1.3931115865707397, "learning_rate": 9.134791911047542e-06, "loss": 0.0454, "step": 36084 }, { "epoch": 0.6390486436708328, "grad_norm": 0.3760833442211151, "learning_rate": 9.134000054210169e-06, "loss": 0.0701, "step": 36085 }, { "epoch": 0.6390663532078613, "grad_norm": 0.7873263955116272, "learning_rate": 9.133208216671336e-06, "loss": 0.0755, "step": 36086 }, { "epoch": 0.6390840627448897, "grad_norm": 0.6550539135932922, "learning_rate": 9.13241639843365e-06, "loss": 0.0571, "step": 36087 }, { "epoch": 0.6391017722819181, "grad_norm": 0.5334334969520569, "learning_rate": 9.131624599499726e-06, "loss": 0.0537, "step": 36088 }, { "epoch": 0.6391194818189465, "grad_norm": 1.0033972263336182, "learning_rate": 9.13083281987217e-06, "loss": 0.0621, "step": 36089 }, { "epoch": 0.639137191355975, "grad_norm": 0.3755425214767456, "learning_rate": 9.130041059553575e-06, "loss": 0.0916, "step": 36090 }, { "epoch": 0.6391549008930034, "grad_norm": 0.5986238718032837, "learning_rate": 9.129249318546548e-06, "loss": 0.0578, "step": 36091 }, { "epoch": 0.6391726104300318, "grad_norm": 0.5992549657821655, "learning_rate": 9.128457596853704e-06, "loss": 0.0783, "step": 36092 }, { "epoch": 0.6391903199670602, "grad_norm": 0.6975892782211304, "learning_rate": 9.127665894477633e-06, "loss": 0.0665, "step": 36093 }, { "epoch": 0.6392080295040887, "grad_norm": 0.5274873971939087, "learning_rate": 9.126874211420951e-06, "loss": 0.0628, "step": 36094 }, { "epoch": 0.6392257390411171, "grad_norm": 0.40990927815437317, "learning_rate": 9.126082547686257e-06, "loss": 0.0528, "step": 36095 }, { "epoch": 0.6392434485781455, "grad_norm": 0.9589095115661621, "learning_rate": 9.12529090327616e-06, "loss": 0.0443, "step": 36096 }, { "epoch": 0.6392611581151739, "grad_norm": 0.6874302625656128, "learning_rate": 9.12449927819326e-06, "loss": 0.0693, "step": 36097 }, { "epoch": 0.6392788676522024, "grad_norm": 0.578704297542572, "learning_rate": 9.123707672440163e-06, "loss": 0.068, "step": 36098 }, { "epoch": 0.6392965771892308, "grad_norm": 0.3151971101760864, "learning_rate": 9.122916086019478e-06, "loss": 0.0578, "step": 36099 }, { "epoch": 0.6393142867262592, "grad_norm": 0.5706018805503845, "learning_rate": 9.122124518933801e-06, "loss": 0.0751, "step": 36100 }, { "epoch": 0.6393319962632877, "grad_norm": 0.7773509621620178, "learning_rate": 9.121332971185739e-06, "loss": 0.0771, "step": 36101 }, { "epoch": 0.6393497058003161, "grad_norm": 0.7121984958648682, "learning_rate": 9.120541442777892e-06, "loss": 0.0615, "step": 36102 }, { "epoch": 0.6393674153373445, "grad_norm": 0.46343377232551575, "learning_rate": 9.119749933712879e-06, "loss": 0.0698, "step": 36103 }, { "epoch": 0.6393851248743729, "grad_norm": 0.4729551672935486, "learning_rate": 9.118958443993289e-06, "loss": 0.0623, "step": 36104 }, { "epoch": 0.6394028344114014, "grad_norm": 0.6579275727272034, "learning_rate": 9.11816697362173e-06, "loss": 0.0597, "step": 36105 }, { "epoch": 0.6394205439484298, "grad_norm": 0.5520259737968445, "learning_rate": 9.11737552260081e-06, "loss": 0.0605, "step": 36106 }, { "epoch": 0.6394382534854582, "grad_norm": 0.6617057919502258, "learning_rate": 9.116584090933125e-06, "loss": 0.0515, "step": 36107 }, { "epoch": 0.6394559630224866, "grad_norm": 0.7773169279098511, "learning_rate": 9.115792678621286e-06, "loss": 0.0658, "step": 36108 }, { "epoch": 0.6394736725595151, "grad_norm": 0.5112113356590271, "learning_rate": 9.115001285667893e-06, "loss": 0.0577, "step": 36109 }, { "epoch": 0.6394913820965435, "grad_norm": 0.5860405564308167, "learning_rate": 9.114209912075555e-06, "loss": 0.0479, "step": 36110 }, { "epoch": 0.639509091633572, "grad_norm": 0.6077076196670532, "learning_rate": 9.113418557846864e-06, "loss": 0.0679, "step": 36111 }, { "epoch": 0.6395268011706003, "grad_norm": 0.3101511001586914, "learning_rate": 9.112627222984432e-06, "loss": 0.0565, "step": 36112 }, { "epoch": 0.6395445107076289, "grad_norm": 0.5055323839187622, "learning_rate": 9.111835907490868e-06, "loss": 0.063, "step": 36113 }, { "epoch": 0.6395622202446573, "grad_norm": 0.5387505888938904, "learning_rate": 9.111044611368764e-06, "loss": 0.0359, "step": 36114 }, { "epoch": 0.6395799297816857, "grad_norm": 0.49511876702308655, "learning_rate": 9.110253334620725e-06, "loss": 0.0719, "step": 36115 }, { "epoch": 0.6395976393187142, "grad_norm": 0.5162252187728882, "learning_rate": 9.109462077249355e-06, "loss": 0.0562, "step": 36116 }, { "epoch": 0.6396153488557426, "grad_norm": 0.3637283742427826, "learning_rate": 9.108670839257267e-06, "loss": 0.0488, "step": 36117 }, { "epoch": 0.639633058392771, "grad_norm": 0.6447664499282837, "learning_rate": 9.10787962064705e-06, "loss": 0.0676, "step": 36118 }, { "epoch": 0.6396507679297994, "grad_norm": 0.5626688599586487, "learning_rate": 9.107088421421314e-06, "loss": 0.0657, "step": 36119 }, { "epoch": 0.6396684774668279, "grad_norm": 0.823039174079895, "learning_rate": 9.106297241582661e-06, "loss": 0.0919, "step": 36120 }, { "epoch": 0.6396861870038563, "grad_norm": 0.5194306373596191, "learning_rate": 9.105506081133697e-06, "loss": 0.0695, "step": 36121 }, { "epoch": 0.6397038965408847, "grad_norm": 0.5742307901382446, "learning_rate": 9.104714940077019e-06, "loss": 0.0374, "step": 36122 }, { "epoch": 0.6397216060779131, "grad_norm": 0.7843734622001648, "learning_rate": 9.103923818415232e-06, "loss": 0.0793, "step": 36123 }, { "epoch": 0.6397393156149416, "grad_norm": 0.56574547290802, "learning_rate": 9.103132716150943e-06, "loss": 0.1061, "step": 36124 }, { "epoch": 0.63975702515197, "grad_norm": 0.6562042236328125, "learning_rate": 9.102341633286748e-06, "loss": 0.0444, "step": 36125 }, { "epoch": 0.6397747346889984, "grad_norm": 1.4671560525894165, "learning_rate": 9.101550569825251e-06, "loss": 0.0885, "step": 36126 }, { "epoch": 0.6397924442260268, "grad_norm": 0.35751238465309143, "learning_rate": 9.100759525769055e-06, "loss": 0.0802, "step": 36127 }, { "epoch": 0.6398101537630553, "grad_norm": 0.63882976770401, "learning_rate": 9.099968501120772e-06, "loss": 0.0609, "step": 36128 }, { "epoch": 0.6398278633000837, "grad_norm": 0.6536567807197571, "learning_rate": 9.099177495882989e-06, "loss": 0.0702, "step": 36129 }, { "epoch": 0.6398455728371121, "grad_norm": 0.7159215807914734, "learning_rate": 9.098386510058317e-06, "loss": 0.0662, "step": 36130 }, { "epoch": 0.6398632823741406, "grad_norm": 0.48390382528305054, "learning_rate": 9.097595543649356e-06, "loss": 0.0553, "step": 36131 }, { "epoch": 0.639880991911169, "grad_norm": 0.774544358253479, "learning_rate": 9.096804596658709e-06, "loss": 0.0667, "step": 36132 }, { "epoch": 0.6398987014481974, "grad_norm": 0.7047628164291382, "learning_rate": 9.096013669088975e-06, "loss": 0.0743, "step": 36133 }, { "epoch": 0.6399164109852258, "grad_norm": 0.24808044731616974, "learning_rate": 9.09522276094276e-06, "loss": 0.033, "step": 36134 }, { "epoch": 0.6399341205222543, "grad_norm": 0.6168012022972107, "learning_rate": 9.094431872222669e-06, "loss": 0.096, "step": 36135 }, { "epoch": 0.6399518300592827, "grad_norm": 0.753349244594574, "learning_rate": 9.093641002931296e-06, "loss": 0.0625, "step": 36136 }, { "epoch": 0.6399695395963111, "grad_norm": 0.3042471706867218, "learning_rate": 9.092850153071246e-06, "loss": 0.0432, "step": 36137 }, { "epoch": 0.6399872491333395, "grad_norm": 0.6435283422470093, "learning_rate": 9.09205932264513e-06, "loss": 0.0606, "step": 36138 }, { "epoch": 0.640004958670368, "grad_norm": 0.512281060218811, "learning_rate": 9.09126851165553e-06, "loss": 0.0686, "step": 36139 }, { "epoch": 0.6400226682073964, "grad_norm": 1.159641146659851, "learning_rate": 9.090477720105063e-06, "loss": 0.0826, "step": 36140 }, { "epoch": 0.6400403777444248, "grad_norm": 0.7956483364105225, "learning_rate": 9.089686947996326e-06, "loss": 0.0998, "step": 36141 }, { "epoch": 0.6400580872814532, "grad_norm": 0.6711782813072205, "learning_rate": 9.088896195331928e-06, "loss": 0.0424, "step": 36142 }, { "epoch": 0.6400757968184817, "grad_norm": 0.6631645560264587, "learning_rate": 9.088105462114454e-06, "loss": 0.0663, "step": 36143 }, { "epoch": 0.6400935063555101, "grad_norm": 0.3434178829193115, "learning_rate": 9.087314748346518e-06, "loss": 0.0295, "step": 36144 }, { "epoch": 0.6401112158925385, "grad_norm": 0.4425918459892273, "learning_rate": 9.086524054030723e-06, "loss": 0.0536, "step": 36145 }, { "epoch": 0.640128925429567, "grad_norm": 0.9629466533660889, "learning_rate": 9.085733379169663e-06, "loss": 0.0604, "step": 36146 }, { "epoch": 0.6401466349665954, "grad_norm": 0.4899258017539978, "learning_rate": 9.084942723765936e-06, "loss": 0.0646, "step": 36147 }, { "epoch": 0.6401643445036238, "grad_norm": 0.6116781830787659, "learning_rate": 9.084152087822155e-06, "loss": 0.0778, "step": 36148 }, { "epoch": 0.6401820540406522, "grad_norm": 0.9278839230537415, "learning_rate": 9.083361471340914e-06, "loss": 0.085, "step": 36149 }, { "epoch": 0.6401997635776807, "grad_norm": 0.8279390931129456, "learning_rate": 9.082570874324818e-06, "loss": 0.0942, "step": 36150 }, { "epoch": 0.6402174731147091, "grad_norm": 0.3702341914176941, "learning_rate": 9.081780296776462e-06, "loss": 0.0385, "step": 36151 }, { "epoch": 0.6402351826517375, "grad_norm": 0.49228084087371826, "learning_rate": 9.080989738698456e-06, "loss": 0.0397, "step": 36152 }, { "epoch": 0.6402528921887659, "grad_norm": 0.4895595908164978, "learning_rate": 9.080199200093386e-06, "loss": 0.0465, "step": 36153 }, { "epoch": 0.6402706017257944, "grad_norm": 0.5354820489883423, "learning_rate": 9.079408680963866e-06, "loss": 0.0545, "step": 36154 }, { "epoch": 0.6402883112628228, "grad_norm": 0.48572301864624023, "learning_rate": 9.078618181312492e-06, "loss": 0.0597, "step": 36155 }, { "epoch": 0.6403060207998512, "grad_norm": 0.9440937042236328, "learning_rate": 9.077827701141871e-06, "loss": 0.0743, "step": 36156 }, { "epoch": 0.6403237303368796, "grad_norm": 0.6637715101242065, "learning_rate": 9.077037240454592e-06, "loss": 0.0583, "step": 36157 }, { "epoch": 0.6403414398739081, "grad_norm": 0.6047727465629578, "learning_rate": 9.07624679925326e-06, "loss": 0.0574, "step": 36158 }, { "epoch": 0.6403591494109365, "grad_norm": 0.9825014472007751, "learning_rate": 9.07545637754048e-06, "loss": 0.0793, "step": 36159 }, { "epoch": 0.6403768589479649, "grad_norm": 0.3036990463733673, "learning_rate": 9.07466597531885e-06, "loss": 0.0615, "step": 36160 }, { "epoch": 0.6403945684849934, "grad_norm": 0.7797136306762695, "learning_rate": 9.073875592590968e-06, "loss": 0.0527, "step": 36161 }, { "epoch": 0.6404122780220218, "grad_norm": 0.47465208172798157, "learning_rate": 9.073085229359436e-06, "loss": 0.0515, "step": 36162 }, { "epoch": 0.6404299875590502, "grad_norm": 0.5657834410667419, "learning_rate": 9.072294885626856e-06, "loss": 0.0597, "step": 36163 }, { "epoch": 0.6404476970960786, "grad_norm": 0.46566131711006165, "learning_rate": 9.071504561395823e-06, "loss": 0.0405, "step": 36164 }, { "epoch": 0.6404654066331071, "grad_norm": 1.0756858587265015, "learning_rate": 9.070714256668941e-06, "loss": 0.0971, "step": 36165 }, { "epoch": 0.6404831161701355, "grad_norm": 1.1549333333969116, "learning_rate": 9.06992397144881e-06, "loss": 0.0715, "step": 36166 }, { "epoch": 0.6405008257071639, "grad_norm": 0.5810699462890625, "learning_rate": 9.069133705738036e-06, "loss": 0.0709, "step": 36167 }, { "epoch": 0.6405185352441923, "grad_norm": 0.4128718078136444, "learning_rate": 9.0683434595392e-06, "loss": 0.0511, "step": 36168 }, { "epoch": 0.6405362447812208, "grad_norm": 0.8429692983627319, "learning_rate": 9.067553232854918e-06, "loss": 0.0775, "step": 36169 }, { "epoch": 0.6405539543182492, "grad_norm": 0.6195597648620605, "learning_rate": 9.066763025687793e-06, "loss": 0.0704, "step": 36170 }, { "epoch": 0.6405716638552776, "grad_norm": 0.4676605463027954, "learning_rate": 9.065972838040413e-06, "loss": 0.0535, "step": 36171 }, { "epoch": 0.640589373392306, "grad_norm": 0.7750959992408752, "learning_rate": 9.06518266991538e-06, "loss": 0.0613, "step": 36172 }, { "epoch": 0.6406070829293345, "grad_norm": 0.9593421220779419, "learning_rate": 9.064392521315296e-06, "loss": 0.0754, "step": 36173 }, { "epoch": 0.640624792466363, "grad_norm": 0.6970774531364441, "learning_rate": 9.063602392242765e-06, "loss": 0.0673, "step": 36174 }, { "epoch": 0.6406425020033913, "grad_norm": 0.48613059520721436, "learning_rate": 9.062812282700377e-06, "loss": 0.0774, "step": 36175 }, { "epoch": 0.6406602115404199, "grad_norm": 0.43255090713500977, "learning_rate": 9.062022192690737e-06, "loss": 0.047, "step": 36176 }, { "epoch": 0.6406779210774483, "grad_norm": 0.708308219909668, "learning_rate": 9.061232122216444e-06, "loss": 0.0665, "step": 36177 }, { "epoch": 0.6406956306144767, "grad_norm": 0.6836596727371216, "learning_rate": 9.060442071280096e-06, "loss": 0.0909, "step": 36178 }, { "epoch": 0.640713340151505, "grad_norm": 0.46181073784828186, "learning_rate": 9.05965203988429e-06, "loss": 0.0326, "step": 36179 }, { "epoch": 0.6407310496885336, "grad_norm": 0.8581390380859375, "learning_rate": 9.058862028031632e-06, "loss": 0.0845, "step": 36180 }, { "epoch": 0.640748759225562, "grad_norm": 0.46622568368911743, "learning_rate": 9.05807203572472e-06, "loss": 0.0535, "step": 36181 }, { "epoch": 0.6407664687625904, "grad_norm": 0.7285084128379822, "learning_rate": 9.057282062966145e-06, "loss": 0.0727, "step": 36182 }, { "epoch": 0.6407841782996188, "grad_norm": 0.4566713571548462, "learning_rate": 9.05649210975851e-06, "loss": 0.0381, "step": 36183 }, { "epoch": 0.6408018878366473, "grad_norm": 0.6590452194213867, "learning_rate": 9.055702176104422e-06, "loss": 0.0724, "step": 36184 }, { "epoch": 0.6408195973736757, "grad_norm": 0.41566959023475647, "learning_rate": 9.054912262006466e-06, "loss": 0.0535, "step": 36185 }, { "epoch": 0.6408373069107041, "grad_norm": 0.43102318048477173, "learning_rate": 9.054122367467249e-06, "loss": 0.0386, "step": 36186 }, { "epoch": 0.6408550164477325, "grad_norm": 0.9180614948272705, "learning_rate": 9.053332492489366e-06, "loss": 0.0881, "step": 36187 }, { "epoch": 0.640872725984761, "grad_norm": 0.8736681938171387, "learning_rate": 9.052542637075423e-06, "loss": 0.0791, "step": 36188 }, { "epoch": 0.6408904355217894, "grad_norm": 0.7853773236274719, "learning_rate": 9.051752801228007e-06, "loss": 0.0898, "step": 36189 }, { "epoch": 0.6409081450588178, "grad_norm": 0.8231920003890991, "learning_rate": 9.050962984949724e-06, "loss": 0.084, "step": 36190 }, { "epoch": 0.6409258545958463, "grad_norm": 0.928261399269104, "learning_rate": 9.050173188243172e-06, "loss": 0.0473, "step": 36191 }, { "epoch": 0.6409435641328747, "grad_norm": 0.6597460508346558, "learning_rate": 9.049383411110951e-06, "loss": 0.0639, "step": 36192 }, { "epoch": 0.6409612736699031, "grad_norm": 0.6013590693473816, "learning_rate": 9.048593653555654e-06, "loss": 0.0693, "step": 36193 }, { "epoch": 0.6409789832069315, "grad_norm": 0.7130730152130127, "learning_rate": 9.047803915579881e-06, "loss": 0.0676, "step": 36194 }, { "epoch": 0.64099669274396, "grad_norm": 0.4362117052078247, "learning_rate": 9.047014197186238e-06, "loss": 0.0806, "step": 36195 }, { "epoch": 0.6410144022809884, "grad_norm": 0.5422954559326172, "learning_rate": 9.046224498377309e-06, "loss": 0.0626, "step": 36196 }, { "epoch": 0.6410321118180168, "grad_norm": 0.6211876273155212, "learning_rate": 9.045434819155698e-06, "loss": 0.0596, "step": 36197 }, { "epoch": 0.6410498213550452, "grad_norm": 0.41095679998397827, "learning_rate": 9.044645159524007e-06, "loss": 0.07, "step": 36198 }, { "epoch": 0.6410675308920737, "grad_norm": 0.6708294153213501, "learning_rate": 9.043855519484834e-06, "loss": 0.0804, "step": 36199 }, { "epoch": 0.6410852404291021, "grad_norm": 0.5779637694358826, "learning_rate": 9.043065899040771e-06, "loss": 0.0514, "step": 36200 }, { "epoch": 0.6411029499661305, "grad_norm": 0.7768285274505615, "learning_rate": 9.042276298194418e-06, "loss": 0.0657, "step": 36201 }, { "epoch": 0.6411206595031589, "grad_norm": 0.6258198618888855, "learning_rate": 9.041486716948376e-06, "loss": 0.0798, "step": 36202 }, { "epoch": 0.6411383690401874, "grad_norm": 0.2400362342596054, "learning_rate": 9.040697155305239e-06, "loss": 0.0513, "step": 36203 }, { "epoch": 0.6411560785772158, "grad_norm": 0.4834483861923218, "learning_rate": 9.039907613267602e-06, "loss": 0.062, "step": 36204 }, { "epoch": 0.6411737881142442, "grad_norm": 0.6420286297798157, "learning_rate": 9.039118090838069e-06, "loss": 0.0498, "step": 36205 }, { "epoch": 0.6411914976512727, "grad_norm": 0.4040811359882355, "learning_rate": 9.038328588019236e-06, "loss": 0.078, "step": 36206 }, { "epoch": 0.6412092071883011, "grad_norm": 0.40780219435691833, "learning_rate": 9.037539104813698e-06, "loss": 0.0463, "step": 36207 }, { "epoch": 0.6412269167253295, "grad_norm": 0.3599505126476288, "learning_rate": 9.03674964122405e-06, "loss": 0.0426, "step": 36208 }, { "epoch": 0.6412446262623579, "grad_norm": 1.0198746919631958, "learning_rate": 9.035960197252901e-06, "loss": 0.1111, "step": 36209 }, { "epoch": 0.6412623357993864, "grad_norm": 0.7948503494262695, "learning_rate": 9.035170772902834e-06, "loss": 0.0716, "step": 36210 }, { "epoch": 0.6412800453364148, "grad_norm": 0.8595594167709351, "learning_rate": 9.03438136817645e-06, "loss": 0.084, "step": 36211 }, { "epoch": 0.6412977548734432, "grad_norm": 0.5005155205726624, "learning_rate": 9.033591983076347e-06, "loss": 0.057, "step": 36212 }, { "epoch": 0.6413154644104716, "grad_norm": 0.5287882089614868, "learning_rate": 9.032802617605129e-06, "loss": 0.04, "step": 36213 }, { "epoch": 0.6413331739475001, "grad_norm": 0.5647571086883545, "learning_rate": 9.032013271765383e-06, "loss": 0.0689, "step": 36214 }, { "epoch": 0.6413508834845285, "grad_norm": 0.459317684173584, "learning_rate": 9.03122394555971e-06, "loss": 0.0525, "step": 36215 }, { "epoch": 0.6413685930215569, "grad_norm": 0.9017398357391357, "learning_rate": 9.030434638990709e-06, "loss": 0.0737, "step": 36216 }, { "epoch": 0.6413863025585853, "grad_norm": 0.8701445460319519, "learning_rate": 9.029645352060971e-06, "loss": 0.0583, "step": 36217 }, { "epoch": 0.6414040120956138, "grad_norm": 0.3855922520160675, "learning_rate": 9.028856084773096e-06, "loss": 0.0633, "step": 36218 }, { "epoch": 0.6414217216326422, "grad_norm": 0.5510395169258118, "learning_rate": 9.02806683712968e-06, "loss": 0.0609, "step": 36219 }, { "epoch": 0.6414394311696706, "grad_norm": 0.7120214104652405, "learning_rate": 9.027277609133323e-06, "loss": 0.0535, "step": 36220 }, { "epoch": 0.6414571407066991, "grad_norm": 0.5225860476493835, "learning_rate": 9.026488400786617e-06, "loss": 0.05, "step": 36221 }, { "epoch": 0.6414748502437275, "grad_norm": 0.7325836420059204, "learning_rate": 9.025699212092159e-06, "loss": 0.0691, "step": 36222 }, { "epoch": 0.6414925597807559, "grad_norm": 0.7778984904289246, "learning_rate": 9.024910043052548e-06, "loss": 0.0534, "step": 36223 }, { "epoch": 0.6415102693177843, "grad_norm": 0.5446467995643616, "learning_rate": 9.024120893670382e-06, "loss": 0.0486, "step": 36224 }, { "epoch": 0.6415279788548128, "grad_norm": 0.5074001550674438, "learning_rate": 9.02333176394825e-06, "loss": 0.0596, "step": 36225 }, { "epoch": 0.6415456883918412, "grad_norm": 0.787629246711731, "learning_rate": 9.022542653888746e-06, "loss": 0.05, "step": 36226 }, { "epoch": 0.6415633979288696, "grad_norm": 0.5465366840362549, "learning_rate": 9.021753563494484e-06, "loss": 0.055, "step": 36227 }, { "epoch": 0.641581107465898, "grad_norm": 0.6117717027664185, "learning_rate": 9.020964492768041e-06, "loss": 0.0716, "step": 36228 }, { "epoch": 0.6415988170029265, "grad_norm": 0.5160171985626221, "learning_rate": 9.02017544171202e-06, "loss": 0.0361, "step": 36229 }, { "epoch": 0.6416165265399549, "grad_norm": 0.6923051476478577, "learning_rate": 9.019386410329016e-06, "loss": 0.0788, "step": 36230 }, { "epoch": 0.6416342360769833, "grad_norm": 0.5482892394065857, "learning_rate": 9.01859739862163e-06, "loss": 0.0518, "step": 36231 }, { "epoch": 0.6416519456140117, "grad_norm": 0.3650723695755005, "learning_rate": 9.017808406592453e-06, "loss": 0.0624, "step": 36232 }, { "epoch": 0.6416696551510402, "grad_norm": 0.537321925163269, "learning_rate": 9.017019434244077e-06, "loss": 0.0452, "step": 36233 }, { "epoch": 0.6416873646880686, "grad_norm": 0.9364757537841797, "learning_rate": 9.016230481579109e-06, "loss": 0.0956, "step": 36234 }, { "epoch": 0.641705074225097, "grad_norm": 0.6488345861434937, "learning_rate": 9.015441548600132e-06, "loss": 0.0645, "step": 36235 }, { "epoch": 0.6417227837621255, "grad_norm": 0.9344408512115479, "learning_rate": 9.014652635309746e-06, "loss": 0.0815, "step": 36236 }, { "epoch": 0.641740493299154, "grad_norm": 0.5058149099349976, "learning_rate": 9.013863741710549e-06, "loss": 0.0554, "step": 36237 }, { "epoch": 0.6417582028361823, "grad_norm": 0.6883019208908081, "learning_rate": 9.01307486780514e-06, "loss": 0.0572, "step": 36238 }, { "epoch": 0.6417759123732107, "grad_norm": 0.6105032563209534, "learning_rate": 9.012286013596104e-06, "loss": 0.071, "step": 36239 }, { "epoch": 0.6417936219102393, "grad_norm": 0.6514812111854553, "learning_rate": 9.011497179086042e-06, "loss": 0.0655, "step": 36240 }, { "epoch": 0.6418113314472677, "grad_norm": 0.35617053508758545, "learning_rate": 9.01070836427755e-06, "loss": 0.0736, "step": 36241 }, { "epoch": 0.641829040984296, "grad_norm": 0.7762507200241089, "learning_rate": 9.009919569173218e-06, "loss": 0.0755, "step": 36242 }, { "epoch": 0.6418467505213244, "grad_norm": 1.028063178062439, "learning_rate": 9.009130793775648e-06, "loss": 0.0745, "step": 36243 }, { "epoch": 0.641864460058353, "grad_norm": 0.8746560215950012, "learning_rate": 9.00834203808743e-06, "loss": 0.0603, "step": 36244 }, { "epoch": 0.6418821695953814, "grad_norm": 0.9562903046607971, "learning_rate": 9.007553302111161e-06, "loss": 0.0966, "step": 36245 }, { "epoch": 0.6418998791324098, "grad_norm": 1.0510307550430298, "learning_rate": 9.006764585849438e-06, "loss": 0.0706, "step": 36246 }, { "epoch": 0.6419175886694382, "grad_norm": 0.8782188892364502, "learning_rate": 9.00597588930485e-06, "loss": 0.0563, "step": 36247 }, { "epoch": 0.6419352982064667, "grad_norm": 0.5048774480819702, "learning_rate": 9.005187212480001e-06, "loss": 0.0538, "step": 36248 }, { "epoch": 0.6419530077434951, "grad_norm": 0.6167345643043518, "learning_rate": 9.004398555377471e-06, "loss": 0.0338, "step": 36249 }, { "epoch": 0.6419707172805235, "grad_norm": 0.45356592535972595, "learning_rate": 9.003609917999867e-06, "loss": 0.057, "step": 36250 }, { "epoch": 0.641988426817552, "grad_norm": 0.5958948731422424, "learning_rate": 9.002821300349779e-06, "loss": 0.0643, "step": 36251 }, { "epoch": 0.6420061363545804, "grad_norm": 0.5258387327194214, "learning_rate": 9.002032702429809e-06, "loss": 0.0492, "step": 36252 }, { "epoch": 0.6420238458916088, "grad_norm": 0.6673561930656433, "learning_rate": 9.001244124242538e-06, "loss": 0.0444, "step": 36253 }, { "epoch": 0.6420415554286372, "grad_norm": 0.9628141522407532, "learning_rate": 9.000455565790568e-06, "loss": 0.0579, "step": 36254 }, { "epoch": 0.6420592649656657, "grad_norm": 0.9503918290138245, "learning_rate": 8.999667027076495e-06, "loss": 0.0716, "step": 36255 }, { "epoch": 0.6420769745026941, "grad_norm": 0.6566528677940369, "learning_rate": 8.998878508102908e-06, "loss": 0.0406, "step": 36256 }, { "epoch": 0.6420946840397225, "grad_norm": 0.3987855613231659, "learning_rate": 8.998090008872405e-06, "loss": 0.0284, "step": 36257 }, { "epoch": 0.6421123935767509, "grad_norm": 0.5152313709259033, "learning_rate": 8.997301529387574e-06, "loss": 0.0624, "step": 36258 }, { "epoch": 0.6421301031137794, "grad_norm": 0.8285754919052124, "learning_rate": 8.99651306965102e-06, "loss": 0.0495, "step": 36259 }, { "epoch": 0.6421478126508078, "grad_norm": 0.8366257548332214, "learning_rate": 8.995724629665328e-06, "loss": 0.0872, "step": 36260 }, { "epoch": 0.6421655221878362, "grad_norm": 0.9966796636581421, "learning_rate": 8.994936209433095e-06, "loss": 0.0987, "step": 36261 }, { "epoch": 0.6421832317248646, "grad_norm": 0.9528099894523621, "learning_rate": 8.994147808956913e-06, "loss": 0.0609, "step": 36262 }, { "epoch": 0.6422009412618931, "grad_norm": 0.7003437876701355, "learning_rate": 8.993359428239382e-06, "loss": 0.0454, "step": 36263 }, { "epoch": 0.6422186507989215, "grad_norm": 0.8840405941009521, "learning_rate": 8.992571067283088e-06, "loss": 0.0743, "step": 36264 }, { "epoch": 0.6422363603359499, "grad_norm": 0.26650065183639526, "learning_rate": 8.991782726090625e-06, "loss": 0.0507, "step": 36265 }, { "epoch": 0.6422540698729784, "grad_norm": 0.6104863882064819, "learning_rate": 8.990994404664598e-06, "loss": 0.0469, "step": 36266 }, { "epoch": 0.6422717794100068, "grad_norm": 0.34249231219291687, "learning_rate": 8.990206103007584e-06, "loss": 0.0306, "step": 36267 }, { "epoch": 0.6422894889470352, "grad_norm": 0.591249406337738, "learning_rate": 8.989417821122183e-06, "loss": 0.0641, "step": 36268 }, { "epoch": 0.6423071984840636, "grad_norm": 0.5473962426185608, "learning_rate": 8.988629559010993e-06, "loss": 0.0536, "step": 36269 }, { "epoch": 0.6423249080210921, "grad_norm": 0.7124980688095093, "learning_rate": 8.987841316676604e-06, "loss": 0.0623, "step": 36270 }, { "epoch": 0.6423426175581205, "grad_norm": 0.476871520280838, "learning_rate": 8.987053094121605e-06, "loss": 0.1041, "step": 36271 }, { "epoch": 0.6423603270951489, "grad_norm": 0.7122949957847595, "learning_rate": 8.986264891348595e-06, "loss": 0.0655, "step": 36272 }, { "epoch": 0.6423780366321773, "grad_norm": 0.863090991973877, "learning_rate": 8.985476708360169e-06, "loss": 0.0836, "step": 36273 }, { "epoch": 0.6423957461692058, "grad_norm": 0.7708814740180969, "learning_rate": 8.984688545158912e-06, "loss": 0.0652, "step": 36274 }, { "epoch": 0.6424134557062342, "grad_norm": 0.709668755531311, "learning_rate": 8.98390040174742e-06, "loss": 0.0635, "step": 36275 }, { "epoch": 0.6424311652432626, "grad_norm": 0.7467520236968994, "learning_rate": 8.983112278128291e-06, "loss": 0.0845, "step": 36276 }, { "epoch": 0.642448874780291, "grad_norm": 0.8925454020500183, "learning_rate": 8.982324174304118e-06, "loss": 0.0769, "step": 36277 }, { "epoch": 0.6424665843173195, "grad_norm": 0.31787189841270447, "learning_rate": 8.98153609027748e-06, "loss": 0.0281, "step": 36278 }, { "epoch": 0.6424842938543479, "grad_norm": 0.7052867412567139, "learning_rate": 8.980748026050984e-06, "loss": 0.0786, "step": 36279 }, { "epoch": 0.6425020033913763, "grad_norm": 0.9271053671836853, "learning_rate": 8.979959981627223e-06, "loss": 0.0428, "step": 36280 }, { "epoch": 0.6425197129284048, "grad_norm": 0.551744818687439, "learning_rate": 8.979171957008779e-06, "loss": 0.0574, "step": 36281 }, { "epoch": 0.6425374224654332, "grad_norm": 0.8093976974487305, "learning_rate": 8.978383952198251e-06, "loss": 0.0934, "step": 36282 }, { "epoch": 0.6425551320024616, "grad_norm": 0.752073347568512, "learning_rate": 8.977595967198228e-06, "loss": 0.0694, "step": 36283 }, { "epoch": 0.64257284153949, "grad_norm": 0.40900808572769165, "learning_rate": 8.976808002011312e-06, "loss": 0.0523, "step": 36284 }, { "epoch": 0.6425905510765185, "grad_norm": 0.7298945188522339, "learning_rate": 8.976020056640083e-06, "loss": 0.0725, "step": 36285 }, { "epoch": 0.6426082606135469, "grad_norm": 0.7934969067573547, "learning_rate": 8.97523213108714e-06, "loss": 0.0852, "step": 36286 }, { "epoch": 0.6426259701505753, "grad_norm": 0.6049104928970337, "learning_rate": 8.974444225355076e-06, "loss": 0.0802, "step": 36287 }, { "epoch": 0.6426436796876037, "grad_norm": 0.5427731275558472, "learning_rate": 8.97365633944648e-06, "loss": 0.057, "step": 36288 }, { "epoch": 0.6426613892246322, "grad_norm": 0.4882042407989502, "learning_rate": 8.972868473363942e-06, "loss": 0.0782, "step": 36289 }, { "epoch": 0.6426790987616606, "grad_norm": 0.7609800696372986, "learning_rate": 8.97208062711006e-06, "loss": 0.074, "step": 36290 }, { "epoch": 0.642696808298689, "grad_norm": 0.3032148480415344, "learning_rate": 8.971292800687427e-06, "loss": 0.0624, "step": 36291 }, { "epoch": 0.6427145178357174, "grad_norm": 0.4759960472583771, "learning_rate": 8.970504994098623e-06, "loss": 0.0785, "step": 36292 }, { "epoch": 0.6427322273727459, "grad_norm": 1.2672932147979736, "learning_rate": 8.96971720734625e-06, "loss": 0.0625, "step": 36293 }, { "epoch": 0.6427499369097743, "grad_norm": 0.8351455926895142, "learning_rate": 8.9689294404329e-06, "loss": 0.0499, "step": 36294 }, { "epoch": 0.6427676464468027, "grad_norm": 0.6695425510406494, "learning_rate": 8.968141693361164e-06, "loss": 0.0547, "step": 36295 }, { "epoch": 0.6427853559838312, "grad_norm": 0.5531960725784302, "learning_rate": 8.96735396613363e-06, "loss": 0.0649, "step": 36296 }, { "epoch": 0.6428030655208596, "grad_norm": 0.6899420022964478, "learning_rate": 8.966566258752887e-06, "loss": 0.0568, "step": 36297 }, { "epoch": 0.642820775057888, "grad_norm": 0.6946435570716858, "learning_rate": 8.965778571221536e-06, "loss": 0.0644, "step": 36298 }, { "epoch": 0.6428384845949164, "grad_norm": 1.0594500303268433, "learning_rate": 8.96499090354216e-06, "loss": 0.0633, "step": 36299 }, { "epoch": 0.642856194131945, "grad_norm": 0.676925003528595, "learning_rate": 8.964203255717352e-06, "loss": 0.0552, "step": 36300 }, { "epoch": 0.6428739036689733, "grad_norm": 0.3953084647655487, "learning_rate": 8.963415627749708e-06, "loss": 0.033, "step": 36301 }, { "epoch": 0.6428916132060017, "grad_norm": 0.3304474949836731, "learning_rate": 8.962628019641816e-06, "loss": 0.0472, "step": 36302 }, { "epoch": 0.6429093227430301, "grad_norm": 0.5884467959403992, "learning_rate": 8.961840431396266e-06, "loss": 0.0693, "step": 36303 }, { "epoch": 0.6429270322800587, "grad_norm": 0.3755776286125183, "learning_rate": 8.961052863015651e-06, "loss": 0.0531, "step": 36304 }, { "epoch": 0.642944741817087, "grad_norm": 0.4301190674304962, "learning_rate": 8.960265314502565e-06, "loss": 0.0822, "step": 36305 }, { "epoch": 0.6429624513541154, "grad_norm": 0.9889146685600281, "learning_rate": 8.95947778585959e-06, "loss": 0.0775, "step": 36306 }, { "epoch": 0.6429801608911438, "grad_norm": 0.6726367473602295, "learning_rate": 8.958690277089321e-06, "loss": 0.0813, "step": 36307 }, { "epoch": 0.6429978704281724, "grad_norm": 0.5810102820396423, "learning_rate": 8.95790278819435e-06, "loss": 0.0659, "step": 36308 }, { "epoch": 0.6430155799652008, "grad_norm": 0.8442909717559814, "learning_rate": 8.957115319177275e-06, "loss": 0.0905, "step": 36309 }, { "epoch": 0.6430332895022292, "grad_norm": 0.6640690565109253, "learning_rate": 8.956327870040674e-06, "loss": 0.074, "step": 36310 }, { "epoch": 0.6430509990392577, "grad_norm": 0.6631778478622437, "learning_rate": 8.955540440787142e-06, "loss": 0.0887, "step": 36311 }, { "epoch": 0.6430687085762861, "grad_norm": 0.8109843730926514, "learning_rate": 8.954753031419273e-06, "loss": 0.048, "step": 36312 }, { "epoch": 0.6430864181133145, "grad_norm": 0.5816410779953003, "learning_rate": 8.953965641939654e-06, "loss": 0.0615, "step": 36313 }, { "epoch": 0.6431041276503429, "grad_norm": 0.522419810295105, "learning_rate": 8.953178272350875e-06, "loss": 0.059, "step": 36314 }, { "epoch": 0.6431218371873714, "grad_norm": 0.7261852025985718, "learning_rate": 8.95239092265553e-06, "loss": 0.0472, "step": 36315 }, { "epoch": 0.6431395467243998, "grad_norm": 0.76296466588974, "learning_rate": 8.95160359285621e-06, "loss": 0.0678, "step": 36316 }, { "epoch": 0.6431572562614282, "grad_norm": 0.5657321810722351, "learning_rate": 8.950816282955496e-06, "loss": 0.0614, "step": 36317 }, { "epoch": 0.6431749657984566, "grad_norm": 0.5572450757026672, "learning_rate": 8.950028992955986e-06, "loss": 0.0502, "step": 36318 }, { "epoch": 0.6431926753354851, "grad_norm": 0.473962664604187, "learning_rate": 8.949241722860276e-06, "loss": 0.0541, "step": 36319 }, { "epoch": 0.6432103848725135, "grad_norm": 0.7116189002990723, "learning_rate": 8.948454472670944e-06, "loss": 0.0754, "step": 36320 }, { "epoch": 0.6432280944095419, "grad_norm": 0.6672708988189697, "learning_rate": 8.947667242390579e-06, "loss": 0.0914, "step": 36321 }, { "epoch": 0.6432458039465703, "grad_norm": 0.5681184530258179, "learning_rate": 8.94688003202178e-06, "loss": 0.0849, "step": 36322 }, { "epoch": 0.6432635134835988, "grad_norm": 0.6907923817634583, "learning_rate": 8.94609284156714e-06, "loss": 0.0531, "step": 36323 }, { "epoch": 0.6432812230206272, "grad_norm": 0.4123839735984802, "learning_rate": 8.945305671029236e-06, "loss": 0.0564, "step": 36324 }, { "epoch": 0.6432989325576556, "grad_norm": 0.4663655757904053, "learning_rate": 8.944518520410664e-06, "loss": 0.0649, "step": 36325 }, { "epoch": 0.6433166420946841, "grad_norm": 0.8313634395599365, "learning_rate": 8.943731389714016e-06, "loss": 0.0715, "step": 36326 }, { "epoch": 0.6433343516317125, "grad_norm": 0.5960254073143005, "learning_rate": 8.942944278941877e-06, "loss": 0.0444, "step": 36327 }, { "epoch": 0.6433520611687409, "grad_norm": 0.5614907145500183, "learning_rate": 8.94215718809684e-06, "loss": 0.0662, "step": 36328 }, { "epoch": 0.6433697707057693, "grad_norm": 0.4470078647136688, "learning_rate": 8.94137011718149e-06, "loss": 0.0761, "step": 36329 }, { "epoch": 0.6433874802427978, "grad_norm": 0.4488038420677185, "learning_rate": 8.940583066198424e-06, "loss": 0.0419, "step": 36330 }, { "epoch": 0.6434051897798262, "grad_norm": 0.6173544526100159, "learning_rate": 8.939796035150225e-06, "loss": 0.0635, "step": 36331 }, { "epoch": 0.6434228993168546, "grad_norm": 0.5912562012672424, "learning_rate": 8.939009024039484e-06, "loss": 0.0614, "step": 36332 }, { "epoch": 0.643440608853883, "grad_norm": 0.6435223817825317, "learning_rate": 8.938222032868789e-06, "loss": 0.0475, "step": 36333 }, { "epoch": 0.6434583183909115, "grad_norm": 0.8305887579917908, "learning_rate": 8.937435061640737e-06, "loss": 0.0648, "step": 36334 }, { "epoch": 0.6434760279279399, "grad_norm": 0.35190877318382263, "learning_rate": 8.936648110357903e-06, "loss": 0.0676, "step": 36335 }, { "epoch": 0.6434937374649683, "grad_norm": 0.4382759630680084, "learning_rate": 8.935861179022882e-06, "loss": 0.0574, "step": 36336 }, { "epoch": 0.6435114470019967, "grad_norm": 0.7014936804771423, "learning_rate": 8.935074267638275e-06, "loss": 0.0633, "step": 36337 }, { "epoch": 0.6435291565390252, "grad_norm": 0.8830850720405579, "learning_rate": 8.93428737620665e-06, "loss": 0.105, "step": 36338 }, { "epoch": 0.6435468660760536, "grad_norm": 0.8502156734466553, "learning_rate": 8.93350050473061e-06, "loss": 0.0714, "step": 36339 }, { "epoch": 0.643564575613082, "grad_norm": 0.5805099606513977, "learning_rate": 8.932713653212738e-06, "loss": 0.0681, "step": 36340 }, { "epoch": 0.6435822851501105, "grad_norm": 0.6097193956375122, "learning_rate": 8.931926821655628e-06, "loss": 0.0698, "step": 36341 }, { "epoch": 0.6435999946871389, "grad_norm": 0.4365561604499817, "learning_rate": 8.93114001006186e-06, "loss": 0.0453, "step": 36342 }, { "epoch": 0.6436177042241673, "grad_norm": 0.46996331214904785, "learning_rate": 8.93035321843403e-06, "loss": 0.0707, "step": 36343 }, { "epoch": 0.6436354137611957, "grad_norm": 0.6265516877174377, "learning_rate": 8.929566446774725e-06, "loss": 0.055, "step": 36344 }, { "epoch": 0.6436531232982242, "grad_norm": 0.3455897867679596, "learning_rate": 8.92877969508653e-06, "loss": 0.0547, "step": 36345 }, { "epoch": 0.6436708328352526, "grad_norm": 0.35478800535202026, "learning_rate": 8.927992963372038e-06, "loss": 0.0431, "step": 36346 }, { "epoch": 0.643688542372281, "grad_norm": 0.521811842918396, "learning_rate": 8.927206251633834e-06, "loss": 0.0579, "step": 36347 }, { "epoch": 0.6437062519093094, "grad_norm": 0.6447334289550781, "learning_rate": 8.926419559874512e-06, "loss": 0.0522, "step": 36348 }, { "epoch": 0.6437239614463379, "grad_norm": 0.8769596815109253, "learning_rate": 8.92563288809665e-06, "loss": 0.0495, "step": 36349 }, { "epoch": 0.6437416709833663, "grad_norm": 0.676465630531311, "learning_rate": 8.924846236302838e-06, "loss": 0.0614, "step": 36350 }, { "epoch": 0.6437593805203947, "grad_norm": 0.7699036598205566, "learning_rate": 8.924059604495675e-06, "loss": 0.071, "step": 36351 }, { "epoch": 0.6437770900574232, "grad_norm": 0.46205073595046997, "learning_rate": 8.923272992677737e-06, "loss": 0.062, "step": 36352 }, { "epoch": 0.6437947995944516, "grad_norm": 0.6487031579017639, "learning_rate": 8.922486400851617e-06, "loss": 0.0552, "step": 36353 }, { "epoch": 0.64381250913148, "grad_norm": 0.4392223656177521, "learning_rate": 8.9216998290199e-06, "loss": 0.0392, "step": 36354 }, { "epoch": 0.6438302186685084, "grad_norm": 0.7859818339347839, "learning_rate": 8.920913277185181e-06, "loss": 0.0682, "step": 36355 }, { "epoch": 0.6438479282055369, "grad_norm": 0.2638116776943207, "learning_rate": 8.92012674535004e-06, "loss": 0.0531, "step": 36356 }, { "epoch": 0.6438656377425653, "grad_norm": 0.5212170481681824, "learning_rate": 8.919340233517064e-06, "loss": 0.0776, "step": 36357 }, { "epoch": 0.6438833472795937, "grad_norm": 0.774678647518158, "learning_rate": 8.918553741688848e-06, "loss": 0.0548, "step": 36358 }, { "epoch": 0.6439010568166221, "grad_norm": 0.5079925060272217, "learning_rate": 8.917767269867971e-06, "loss": 0.0562, "step": 36359 }, { "epoch": 0.6439187663536506, "grad_norm": 0.6905147433280945, "learning_rate": 8.916980818057027e-06, "loss": 0.0414, "step": 36360 }, { "epoch": 0.643936475890679, "grad_norm": 0.7187828421592712, "learning_rate": 8.916194386258601e-06, "loss": 0.0543, "step": 36361 }, { "epoch": 0.6439541854277074, "grad_norm": 0.5616216659545898, "learning_rate": 8.915407974475283e-06, "loss": 0.0762, "step": 36362 }, { "epoch": 0.6439718949647358, "grad_norm": 0.5010627508163452, "learning_rate": 8.914621582709655e-06, "loss": 0.0633, "step": 36363 }, { "epoch": 0.6439896045017643, "grad_norm": 0.5561059713363647, "learning_rate": 8.913835210964304e-06, "loss": 0.0531, "step": 36364 }, { "epoch": 0.6440073140387927, "grad_norm": 0.6076298356056213, "learning_rate": 8.913048859241816e-06, "loss": 0.0539, "step": 36365 }, { "epoch": 0.6440250235758211, "grad_norm": 0.8158057332038879, "learning_rate": 8.912262527544792e-06, "loss": 0.071, "step": 36366 }, { "epoch": 0.6440427331128497, "grad_norm": 0.505885660648346, "learning_rate": 8.911476215875802e-06, "loss": 0.0459, "step": 36367 }, { "epoch": 0.644060442649878, "grad_norm": 0.7096245288848877, "learning_rate": 8.91068992423744e-06, "loss": 0.0698, "step": 36368 }, { "epoch": 0.6440781521869064, "grad_norm": 0.6584905385971069, "learning_rate": 8.909903652632293e-06, "loss": 0.0831, "step": 36369 }, { "epoch": 0.6440958617239348, "grad_norm": 0.475953608751297, "learning_rate": 8.909117401062947e-06, "loss": 0.0361, "step": 36370 }, { "epoch": 0.6441135712609634, "grad_norm": 0.5375869274139404, "learning_rate": 8.90833116953199e-06, "loss": 0.0563, "step": 36371 }, { "epoch": 0.6441312807979918, "grad_norm": 0.6709325313568115, "learning_rate": 8.907544958042005e-06, "loss": 0.0912, "step": 36372 }, { "epoch": 0.6441489903350202, "grad_norm": 0.7079216241836548, "learning_rate": 8.906758766595582e-06, "loss": 0.0644, "step": 36373 }, { "epoch": 0.6441666998720486, "grad_norm": 1.0657143592834473, "learning_rate": 8.905972595195306e-06, "loss": 0.0772, "step": 36374 }, { "epoch": 0.6441844094090771, "grad_norm": 0.5620917081832886, "learning_rate": 8.905186443843762e-06, "loss": 0.048, "step": 36375 }, { "epoch": 0.6442021189461055, "grad_norm": 0.749895453453064, "learning_rate": 8.904400312543545e-06, "loss": 0.0704, "step": 36376 }, { "epoch": 0.6442198284831339, "grad_norm": 0.31085917353630066, "learning_rate": 8.903614201297228e-06, "loss": 0.0514, "step": 36377 }, { "epoch": 0.6442375380201623, "grad_norm": 0.5087569355964661, "learning_rate": 8.902828110107406e-06, "loss": 0.0424, "step": 36378 }, { "epoch": 0.6442552475571908, "grad_norm": 0.613275408744812, "learning_rate": 8.902042038976657e-06, "loss": 0.0361, "step": 36379 }, { "epoch": 0.6442729570942192, "grad_norm": 0.4399957060813904, "learning_rate": 8.901255987907583e-06, "loss": 0.0616, "step": 36380 }, { "epoch": 0.6442906666312476, "grad_norm": 0.3958605229854584, "learning_rate": 8.900469956902754e-06, "loss": 0.0482, "step": 36381 }, { "epoch": 0.6443083761682761, "grad_norm": 0.868730366230011, "learning_rate": 8.899683945964763e-06, "loss": 0.0477, "step": 36382 }, { "epoch": 0.6443260857053045, "grad_norm": 0.644890308380127, "learning_rate": 8.898897955096195e-06, "loss": 0.0724, "step": 36383 }, { "epoch": 0.6443437952423329, "grad_norm": 0.4532430171966553, "learning_rate": 8.898111984299634e-06, "loss": 0.0592, "step": 36384 }, { "epoch": 0.6443615047793613, "grad_norm": 0.09891735762357712, "learning_rate": 8.897326033577668e-06, "loss": 0.0504, "step": 36385 }, { "epoch": 0.6443792143163898, "grad_norm": 0.8579571843147278, "learning_rate": 8.896540102932882e-06, "loss": 0.0732, "step": 36386 }, { "epoch": 0.6443969238534182, "grad_norm": 0.940222442150116, "learning_rate": 8.895754192367865e-06, "loss": 0.073, "step": 36387 }, { "epoch": 0.6444146333904466, "grad_norm": 0.5261052846908569, "learning_rate": 8.894968301885195e-06, "loss": 0.0689, "step": 36388 }, { "epoch": 0.644432342927475, "grad_norm": 0.5375280380249023, "learning_rate": 8.894182431487463e-06, "loss": 0.0835, "step": 36389 }, { "epoch": 0.6444500524645035, "grad_norm": 0.7530656456947327, "learning_rate": 8.89339658117726e-06, "loss": 0.0711, "step": 36390 }, { "epoch": 0.6444677620015319, "grad_norm": 0.5517652034759521, "learning_rate": 8.892610750957157e-06, "loss": 0.0619, "step": 36391 }, { "epoch": 0.6444854715385603, "grad_norm": 0.6457176208496094, "learning_rate": 8.891824940829746e-06, "loss": 0.053, "step": 36392 }, { "epoch": 0.6445031810755887, "grad_norm": 0.5419765114784241, "learning_rate": 8.891039150797616e-06, "loss": 0.0672, "step": 36393 }, { "epoch": 0.6445208906126172, "grad_norm": 0.619504988193512, "learning_rate": 8.890253380863352e-06, "loss": 0.0558, "step": 36394 }, { "epoch": 0.6445386001496456, "grad_norm": 0.797523558139801, "learning_rate": 8.88946763102953e-06, "loss": 0.0749, "step": 36395 }, { "epoch": 0.644556309686674, "grad_norm": 0.7374597787857056, "learning_rate": 8.888681901298745e-06, "loss": 0.0577, "step": 36396 }, { "epoch": 0.6445740192237025, "grad_norm": 0.5727697014808655, "learning_rate": 8.887896191673575e-06, "loss": 0.0688, "step": 36397 }, { "epoch": 0.6445917287607309, "grad_norm": 0.3940356373786926, "learning_rate": 8.887110502156616e-06, "loss": 0.0695, "step": 36398 }, { "epoch": 0.6446094382977593, "grad_norm": 0.5882638692855835, "learning_rate": 8.88632483275044e-06, "loss": 0.055, "step": 36399 }, { "epoch": 0.6446271478347877, "grad_norm": 0.9877792000770569, "learning_rate": 8.885539183457634e-06, "loss": 0.0769, "step": 36400 }, { "epoch": 0.6446448573718162, "grad_norm": 0.6893185973167419, "learning_rate": 8.884753554280796e-06, "loss": 0.0819, "step": 36401 }, { "epoch": 0.6446625669088446, "grad_norm": 0.9121567606925964, "learning_rate": 8.883967945222489e-06, "loss": 0.0827, "step": 36402 }, { "epoch": 0.644680276445873, "grad_norm": 0.6761972904205322, "learning_rate": 8.883182356285311e-06, "loss": 0.0677, "step": 36403 }, { "epoch": 0.6446979859829014, "grad_norm": 0.972693145275116, "learning_rate": 8.882396787471847e-06, "loss": 0.0654, "step": 36404 }, { "epoch": 0.6447156955199299, "grad_norm": 0.4297705888748169, "learning_rate": 8.881611238784683e-06, "loss": 0.0582, "step": 36405 }, { "epoch": 0.6447334050569583, "grad_norm": 0.9533321857452393, "learning_rate": 8.880825710226394e-06, "loss": 0.07, "step": 36406 }, { "epoch": 0.6447511145939867, "grad_norm": 0.48520976305007935, "learning_rate": 8.880040201799568e-06, "loss": 0.0878, "step": 36407 }, { "epoch": 0.6447688241310151, "grad_norm": 0.7174879908561707, "learning_rate": 8.879254713506794e-06, "loss": 0.0921, "step": 36408 }, { "epoch": 0.6447865336680436, "grad_norm": 0.7432080507278442, "learning_rate": 8.878469245350652e-06, "loss": 0.0741, "step": 36409 }, { "epoch": 0.644804243205072, "grad_norm": 0.7360725998878479, "learning_rate": 8.877683797333726e-06, "loss": 0.069, "step": 36410 }, { "epoch": 0.6448219527421004, "grad_norm": 0.5454699397087097, "learning_rate": 8.876898369458601e-06, "loss": 0.0703, "step": 36411 }, { "epoch": 0.6448396622791289, "grad_norm": 1.1845420598983765, "learning_rate": 8.876112961727863e-06, "loss": 0.113, "step": 36412 }, { "epoch": 0.6448573718161573, "grad_norm": 0.4578951597213745, "learning_rate": 8.875327574144092e-06, "loss": 0.0688, "step": 36413 }, { "epoch": 0.6448750813531857, "grad_norm": 0.7876446843147278, "learning_rate": 8.874542206709876e-06, "loss": 0.083, "step": 36414 }, { "epoch": 0.6448927908902141, "grad_norm": 0.6640535593032837, "learning_rate": 8.8737568594278e-06, "loss": 0.0549, "step": 36415 }, { "epoch": 0.6449105004272426, "grad_norm": 0.42280465364456177, "learning_rate": 8.872971532300434e-06, "loss": 0.052, "step": 36416 }, { "epoch": 0.644928209964271, "grad_norm": 0.7027175426483154, "learning_rate": 8.872186225330377e-06, "loss": 0.0767, "step": 36417 }, { "epoch": 0.6449459195012994, "grad_norm": 0.6826172471046448, "learning_rate": 8.871400938520206e-06, "loss": 0.0754, "step": 36418 }, { "epoch": 0.6449636290383278, "grad_norm": 0.706174910068512, "learning_rate": 8.870615671872514e-06, "loss": 0.0835, "step": 36419 }, { "epoch": 0.6449813385753563, "grad_norm": 0.4168221354484558, "learning_rate": 8.86983042538987e-06, "loss": 0.0664, "step": 36420 }, { "epoch": 0.6449990481123847, "grad_norm": 0.6416240930557251, "learning_rate": 8.869045199074864e-06, "loss": 0.0609, "step": 36421 }, { "epoch": 0.6450167576494131, "grad_norm": 0.5087714195251465, "learning_rate": 8.868259992930082e-06, "loss": 0.0519, "step": 36422 }, { "epoch": 0.6450344671864415, "grad_norm": 0.46845972537994385, "learning_rate": 8.8674748069581e-06, "loss": 0.0651, "step": 36423 }, { "epoch": 0.64505217672347, "grad_norm": 0.44797834753990173, "learning_rate": 8.866689641161507e-06, "loss": 0.046, "step": 36424 }, { "epoch": 0.6450698862604984, "grad_norm": 0.7062824964523315, "learning_rate": 8.865904495542885e-06, "loss": 0.0708, "step": 36425 }, { "epoch": 0.6450875957975268, "grad_norm": 0.6921275854110718, "learning_rate": 8.865119370104818e-06, "loss": 0.0936, "step": 36426 }, { "epoch": 0.6451053053345553, "grad_norm": 0.4265204071998596, "learning_rate": 8.864334264849888e-06, "loss": 0.0574, "step": 36427 }, { "epoch": 0.6451230148715837, "grad_norm": 0.5180208086967468, "learning_rate": 8.863549179780674e-06, "loss": 0.0603, "step": 36428 }, { "epoch": 0.6451407244086121, "grad_norm": 0.6909295320510864, "learning_rate": 8.862764114899771e-06, "loss": 0.0601, "step": 36429 }, { "epoch": 0.6451584339456405, "grad_norm": 0.4732300937175751, "learning_rate": 8.861979070209746e-06, "loss": 0.0624, "step": 36430 }, { "epoch": 0.645176143482669, "grad_norm": 1.0256788730621338, "learning_rate": 8.861194045713188e-06, "loss": 0.0831, "step": 36431 }, { "epoch": 0.6451938530196974, "grad_norm": 0.7633603811264038, "learning_rate": 8.860409041412681e-06, "loss": 0.0997, "step": 36432 }, { "epoch": 0.6452115625567258, "grad_norm": 0.4914707541465759, "learning_rate": 8.859624057310815e-06, "loss": 0.0759, "step": 36433 }, { "epoch": 0.6452292720937542, "grad_norm": 0.5390878915786743, "learning_rate": 8.858839093410159e-06, "loss": 0.0492, "step": 36434 }, { "epoch": 0.6452469816307828, "grad_norm": 0.4766077995300293, "learning_rate": 8.8580541497133e-06, "loss": 0.0399, "step": 36435 }, { "epoch": 0.6452646911678112, "grad_norm": 0.5755807161331177, "learning_rate": 8.857269226222823e-06, "loss": 0.0788, "step": 36436 }, { "epoch": 0.6452824007048396, "grad_norm": 0.5860962271690369, "learning_rate": 8.85648432294131e-06, "loss": 0.0585, "step": 36437 }, { "epoch": 0.645300110241868, "grad_norm": 0.6806942820549011, "learning_rate": 8.855699439871343e-06, "loss": 0.0578, "step": 36438 }, { "epoch": 0.6453178197788965, "grad_norm": 0.3503214120864868, "learning_rate": 8.854914577015499e-06, "loss": 0.0672, "step": 36439 }, { "epoch": 0.6453355293159249, "grad_norm": 0.4332248866558075, "learning_rate": 8.854129734376369e-06, "loss": 0.0671, "step": 36440 }, { "epoch": 0.6453532388529533, "grad_norm": 0.6003896594047546, "learning_rate": 8.853344911956528e-06, "loss": 0.0634, "step": 36441 }, { "epoch": 0.6453709483899818, "grad_norm": 0.4121384024620056, "learning_rate": 8.85256010975856e-06, "loss": 0.0759, "step": 36442 }, { "epoch": 0.6453886579270102, "grad_norm": 0.8531633615493774, "learning_rate": 8.85177532778505e-06, "loss": 0.0895, "step": 36443 }, { "epoch": 0.6454063674640386, "grad_norm": 0.5137739181518555, "learning_rate": 8.85099056603858e-06, "loss": 0.0522, "step": 36444 }, { "epoch": 0.645424077001067, "grad_norm": 1.1586090326309204, "learning_rate": 8.850205824521721e-06, "loss": 0.0633, "step": 36445 }, { "epoch": 0.6454417865380955, "grad_norm": 0.7352125644683838, "learning_rate": 8.849421103237065e-06, "loss": 0.0731, "step": 36446 }, { "epoch": 0.6454594960751239, "grad_norm": 0.4582304358482361, "learning_rate": 8.8486364021872e-06, "loss": 0.0832, "step": 36447 }, { "epoch": 0.6454772056121523, "grad_norm": 0.6732280850410461, "learning_rate": 8.84785172137469e-06, "loss": 0.0421, "step": 36448 }, { "epoch": 0.6454949151491807, "grad_norm": 0.7504270672798157, "learning_rate": 8.847067060802127e-06, "loss": 0.0707, "step": 36449 }, { "epoch": 0.6455126246862092, "grad_norm": 0.7578535676002502, "learning_rate": 8.846282420472093e-06, "loss": 0.0602, "step": 36450 }, { "epoch": 0.6455303342232376, "grad_norm": 0.6996728777885437, "learning_rate": 8.845497800387167e-06, "loss": 0.0709, "step": 36451 }, { "epoch": 0.645548043760266, "grad_norm": 0.7641627788543701, "learning_rate": 8.84471320054993e-06, "loss": 0.0699, "step": 36452 }, { "epoch": 0.6455657532972944, "grad_norm": 0.5290562510490417, "learning_rate": 8.843928620962964e-06, "loss": 0.0689, "step": 36453 }, { "epoch": 0.6455834628343229, "grad_norm": 0.6035690903663635, "learning_rate": 8.843144061628852e-06, "loss": 0.0663, "step": 36454 }, { "epoch": 0.6456011723713513, "grad_norm": 0.6177418828010559, "learning_rate": 8.84235952255017e-06, "loss": 0.0533, "step": 36455 }, { "epoch": 0.6456188819083797, "grad_norm": 0.26819074153900146, "learning_rate": 8.841575003729503e-06, "loss": 0.0672, "step": 36456 }, { "epoch": 0.6456365914454082, "grad_norm": 0.5032821297645569, "learning_rate": 8.840790505169433e-06, "loss": 0.0626, "step": 36457 }, { "epoch": 0.6456543009824366, "grad_norm": 0.7562644481658936, "learning_rate": 8.840006026872544e-06, "loss": 0.0746, "step": 36458 }, { "epoch": 0.645672010519465, "grad_norm": 0.5769405364990234, "learning_rate": 8.839221568841406e-06, "loss": 0.0829, "step": 36459 }, { "epoch": 0.6456897200564934, "grad_norm": 0.3812374174594879, "learning_rate": 8.838437131078602e-06, "loss": 0.0391, "step": 36460 }, { "epoch": 0.6457074295935219, "grad_norm": 0.6429237723350525, "learning_rate": 8.837652713586727e-06, "loss": 0.0813, "step": 36461 }, { "epoch": 0.6457251391305503, "grad_norm": 0.5028193593025208, "learning_rate": 8.836868316368343e-06, "loss": 0.0754, "step": 36462 }, { "epoch": 0.6457428486675787, "grad_norm": 0.517334520816803, "learning_rate": 8.836083939426043e-06, "loss": 0.0804, "step": 36463 }, { "epoch": 0.6457605582046071, "grad_norm": 0.4722476899623871, "learning_rate": 8.8352995827624e-06, "loss": 0.0617, "step": 36464 }, { "epoch": 0.6457782677416356, "grad_norm": 0.4348243474960327, "learning_rate": 8.834515246380001e-06, "loss": 0.0342, "step": 36465 }, { "epoch": 0.645795977278664, "grad_norm": 0.7039474844932556, "learning_rate": 8.833730930281424e-06, "loss": 0.0641, "step": 36466 }, { "epoch": 0.6458136868156924, "grad_norm": 0.3656904995441437, "learning_rate": 8.832946634469245e-06, "loss": 0.0674, "step": 36467 }, { "epoch": 0.6458313963527208, "grad_norm": 0.6928630471229553, "learning_rate": 8.832162358946047e-06, "loss": 0.0847, "step": 36468 }, { "epoch": 0.6458491058897493, "grad_norm": 0.7238518595695496, "learning_rate": 8.831378103714417e-06, "loss": 0.0444, "step": 36469 }, { "epoch": 0.6458668154267777, "grad_norm": 0.45306605100631714, "learning_rate": 8.830593868776926e-06, "loss": 0.0704, "step": 36470 }, { "epoch": 0.6458845249638061, "grad_norm": 1.0179530382156372, "learning_rate": 8.829809654136155e-06, "loss": 0.0756, "step": 36471 }, { "epoch": 0.6459022345008346, "grad_norm": 0.64994215965271, "learning_rate": 8.829025459794693e-06, "loss": 0.0644, "step": 36472 }, { "epoch": 0.645919944037863, "grad_norm": 0.7812476754188538, "learning_rate": 8.828241285755106e-06, "loss": 0.0775, "step": 36473 }, { "epoch": 0.6459376535748914, "grad_norm": 0.6834189295768738, "learning_rate": 8.827457132019979e-06, "loss": 0.0741, "step": 36474 }, { "epoch": 0.6459553631119198, "grad_norm": 0.5960589647293091, "learning_rate": 8.826672998591897e-06, "loss": 0.0833, "step": 36475 }, { "epoch": 0.6459730726489483, "grad_norm": 0.913181722164154, "learning_rate": 8.825888885473442e-06, "loss": 0.0835, "step": 36476 }, { "epoch": 0.6459907821859767, "grad_norm": 0.6260813474655151, "learning_rate": 8.825104792667183e-06, "loss": 0.0686, "step": 36477 }, { "epoch": 0.6460084917230051, "grad_norm": 0.518838107585907, "learning_rate": 8.824320720175705e-06, "loss": 0.0512, "step": 36478 }, { "epoch": 0.6460262012600335, "grad_norm": 0.7407025098800659, "learning_rate": 8.823536668001588e-06, "loss": 0.0702, "step": 36479 }, { "epoch": 0.646043910797062, "grad_norm": 0.5562832355499268, "learning_rate": 8.82275263614741e-06, "loss": 0.0469, "step": 36480 }, { "epoch": 0.6460616203340904, "grad_norm": 0.6076432466506958, "learning_rate": 8.821968624615748e-06, "loss": 0.0633, "step": 36481 }, { "epoch": 0.6460793298711188, "grad_norm": 0.6728842854499817, "learning_rate": 8.821184633409186e-06, "loss": 0.0782, "step": 36482 }, { "epoch": 0.6460970394081472, "grad_norm": 0.34420084953308105, "learning_rate": 8.820400662530304e-06, "loss": 0.0302, "step": 36483 }, { "epoch": 0.6461147489451757, "grad_norm": 0.6204451322555542, "learning_rate": 8.819616711981677e-06, "loss": 0.0759, "step": 36484 }, { "epoch": 0.6461324584822041, "grad_norm": 0.5606496930122375, "learning_rate": 8.818832781765886e-06, "loss": 0.0395, "step": 36485 }, { "epoch": 0.6461501680192325, "grad_norm": 0.7074707746505737, "learning_rate": 8.818048871885516e-06, "loss": 0.0795, "step": 36486 }, { "epoch": 0.646167877556261, "grad_norm": 0.7106978297233582, "learning_rate": 8.817264982343133e-06, "loss": 0.0936, "step": 36487 }, { "epoch": 0.6461855870932894, "grad_norm": 0.7549358010292053, "learning_rate": 8.816481113141326e-06, "loss": 0.0598, "step": 36488 }, { "epoch": 0.6462032966303178, "grad_norm": 0.6814915537834167, "learning_rate": 8.815697264282665e-06, "loss": 0.0816, "step": 36489 }, { "epoch": 0.6462210061673462, "grad_norm": 0.6895772814750671, "learning_rate": 8.814913435769742e-06, "loss": 0.0437, "step": 36490 }, { "epoch": 0.6462387157043747, "grad_norm": 0.4152645170688629, "learning_rate": 8.814129627605122e-06, "loss": 0.0783, "step": 36491 }, { "epoch": 0.6462564252414031, "grad_norm": 0.5871875286102295, "learning_rate": 8.813345839791394e-06, "loss": 0.0544, "step": 36492 }, { "epoch": 0.6462741347784315, "grad_norm": 0.9484118819236755, "learning_rate": 8.81256207233113e-06, "loss": 0.0525, "step": 36493 }, { "epoch": 0.6462918443154599, "grad_norm": 0.5645924806594849, "learning_rate": 8.811778325226912e-06, "loss": 0.0454, "step": 36494 }, { "epoch": 0.6463095538524884, "grad_norm": 0.9219342470169067, "learning_rate": 8.810994598481316e-06, "loss": 0.059, "step": 36495 }, { "epoch": 0.6463272633895168, "grad_norm": 0.8153582811355591, "learning_rate": 8.810210892096923e-06, "loss": 0.0634, "step": 36496 }, { "epoch": 0.6463449729265452, "grad_norm": 0.41692090034484863, "learning_rate": 8.809427206076312e-06, "loss": 0.0474, "step": 36497 }, { "epoch": 0.6463626824635736, "grad_norm": 0.4309428632259369, "learning_rate": 8.808643540422059e-06, "loss": 0.0843, "step": 36498 }, { "epoch": 0.6463803920006022, "grad_norm": 0.407278835773468, "learning_rate": 8.807859895136738e-06, "loss": 0.0565, "step": 36499 }, { "epoch": 0.6463981015376306, "grad_norm": 0.3663448393344879, "learning_rate": 8.807076270222933e-06, "loss": 0.0509, "step": 36500 }, { "epoch": 0.646415811074659, "grad_norm": 0.49163150787353516, "learning_rate": 8.806292665683228e-06, "loss": 0.0728, "step": 36501 }, { "epoch": 0.6464335206116875, "grad_norm": 0.5331254601478577, "learning_rate": 8.80550908152019e-06, "loss": 0.0548, "step": 36502 }, { "epoch": 0.6464512301487159, "grad_norm": 0.33923017978668213, "learning_rate": 8.80472551773639e-06, "loss": 0.0488, "step": 36503 }, { "epoch": 0.6464689396857443, "grad_norm": 0.4091063141822815, "learning_rate": 8.803941974334433e-06, "loss": 0.0523, "step": 36504 }, { "epoch": 0.6464866492227727, "grad_norm": 0.3680310547351837, "learning_rate": 8.80315845131687e-06, "loss": 0.047, "step": 36505 }, { "epoch": 0.6465043587598012, "grad_norm": 0.6294880509376526, "learning_rate": 8.80237494868629e-06, "loss": 0.0665, "step": 36506 }, { "epoch": 0.6465220682968296, "grad_norm": 0.8177700638771057, "learning_rate": 8.801591466445272e-06, "loss": 0.0599, "step": 36507 }, { "epoch": 0.646539777833858, "grad_norm": 0.9724062085151672, "learning_rate": 8.800808004596391e-06, "loss": 0.0885, "step": 36508 }, { "epoch": 0.6465574873708864, "grad_norm": 0.7191776037216187, "learning_rate": 8.800024563142224e-06, "loss": 0.0577, "step": 36509 }, { "epoch": 0.6465751969079149, "grad_norm": 0.6602907776832581, "learning_rate": 8.799241142085346e-06, "loss": 0.1125, "step": 36510 }, { "epoch": 0.6465929064449433, "grad_norm": 0.9370369911193848, "learning_rate": 8.798457741428342e-06, "loss": 0.0901, "step": 36511 }, { "epoch": 0.6466106159819717, "grad_norm": 0.6139997243881226, "learning_rate": 8.797674361173782e-06, "loss": 0.0526, "step": 36512 }, { "epoch": 0.6466283255190001, "grad_norm": 0.6212480068206787, "learning_rate": 8.796891001324247e-06, "loss": 0.0501, "step": 36513 }, { "epoch": 0.6466460350560286, "grad_norm": 0.4642848074436188, "learning_rate": 8.796107661882314e-06, "loss": 0.0694, "step": 36514 }, { "epoch": 0.646663744593057, "grad_norm": 0.5736870169639587, "learning_rate": 8.79532434285056e-06, "loss": 0.0667, "step": 36515 }, { "epoch": 0.6466814541300854, "grad_norm": 0.42127159237861633, "learning_rate": 8.794541044231562e-06, "loss": 0.0468, "step": 36516 }, { "epoch": 0.6466991636671139, "grad_norm": 0.36593666672706604, "learning_rate": 8.793757766027891e-06, "loss": 0.0495, "step": 36517 }, { "epoch": 0.6467168732041423, "grad_norm": 0.5359753966331482, "learning_rate": 8.792974508242136e-06, "loss": 0.0712, "step": 36518 }, { "epoch": 0.6467345827411707, "grad_norm": 0.4608743488788605, "learning_rate": 8.792191270876861e-06, "loss": 0.0789, "step": 36519 }, { "epoch": 0.6467522922781991, "grad_norm": 0.43659162521362305, "learning_rate": 8.79140805393465e-06, "loss": 0.0448, "step": 36520 }, { "epoch": 0.6467700018152276, "grad_norm": 0.8066451549530029, "learning_rate": 8.790624857418079e-06, "loss": 0.0712, "step": 36521 }, { "epoch": 0.646787711352256, "grad_norm": 0.742721438407898, "learning_rate": 8.789841681329727e-06, "loss": 0.0808, "step": 36522 }, { "epoch": 0.6468054208892844, "grad_norm": 0.3895619213581085, "learning_rate": 8.789058525672166e-06, "loss": 0.054, "step": 36523 }, { "epoch": 0.6468231304263128, "grad_norm": 0.4803573191165924, "learning_rate": 8.788275390447974e-06, "loss": 0.0692, "step": 36524 }, { "epoch": 0.6468408399633413, "grad_norm": 0.5265199542045593, "learning_rate": 8.787492275659732e-06, "loss": 0.0626, "step": 36525 }, { "epoch": 0.6468585495003697, "grad_norm": 0.6646203398704529, "learning_rate": 8.786709181310004e-06, "loss": 0.0492, "step": 36526 }, { "epoch": 0.6468762590373981, "grad_norm": 1.0131746530532837, "learning_rate": 8.785926107401377e-06, "loss": 0.0841, "step": 36527 }, { "epoch": 0.6468939685744265, "grad_norm": 0.5842082500457764, "learning_rate": 8.785143053936425e-06, "loss": 0.0675, "step": 36528 }, { "epoch": 0.646911678111455, "grad_norm": 0.4859890937805176, "learning_rate": 8.78436002091773e-06, "loss": 0.0476, "step": 36529 }, { "epoch": 0.6469293876484834, "grad_norm": 0.2565542459487915, "learning_rate": 8.783577008347854e-06, "loss": 0.0338, "step": 36530 }, { "epoch": 0.6469470971855118, "grad_norm": 0.7161397933959961, "learning_rate": 8.782794016229384e-06, "loss": 0.0638, "step": 36531 }, { "epoch": 0.6469648067225403, "grad_norm": 0.7832669615745544, "learning_rate": 8.782011044564894e-06, "loss": 0.083, "step": 36532 }, { "epoch": 0.6469825162595687, "grad_norm": 0.6425275802612305, "learning_rate": 8.781228093356954e-06, "loss": 0.0515, "step": 36533 }, { "epoch": 0.6470002257965971, "grad_norm": 0.35441967844963074, "learning_rate": 8.780445162608148e-06, "loss": 0.0542, "step": 36534 }, { "epoch": 0.6470179353336255, "grad_norm": 0.35374778509140015, "learning_rate": 8.779662252321047e-06, "loss": 0.0419, "step": 36535 }, { "epoch": 0.647035644870654, "grad_norm": 0.5543762445449829, "learning_rate": 8.778879362498232e-06, "loss": 0.0522, "step": 36536 }, { "epoch": 0.6470533544076824, "grad_norm": 0.5696794390678406, "learning_rate": 8.778096493142269e-06, "loss": 0.0597, "step": 36537 }, { "epoch": 0.6470710639447108, "grad_norm": 0.2789943516254425, "learning_rate": 8.77731364425574e-06, "loss": 0.0565, "step": 36538 }, { "epoch": 0.6470887734817392, "grad_norm": 0.6456663608551025, "learning_rate": 8.776530815841222e-06, "loss": 0.0821, "step": 36539 }, { "epoch": 0.6471064830187677, "grad_norm": 0.8963542580604553, "learning_rate": 8.775748007901289e-06, "loss": 0.0674, "step": 36540 }, { "epoch": 0.6471241925557961, "grad_norm": 0.8220882415771484, "learning_rate": 8.774965220438513e-06, "loss": 0.0736, "step": 36541 }, { "epoch": 0.6471419020928245, "grad_norm": 0.4298146665096283, "learning_rate": 8.774182453455472e-06, "loss": 0.044, "step": 36542 }, { "epoch": 0.6471596116298529, "grad_norm": 0.3459453880786896, "learning_rate": 8.773399706954749e-06, "loss": 0.0324, "step": 36543 }, { "epoch": 0.6471773211668814, "grad_norm": 0.7571256756782532, "learning_rate": 8.7726169809389e-06, "loss": 0.0497, "step": 36544 }, { "epoch": 0.6471950307039098, "grad_norm": 0.6379508972167969, "learning_rate": 8.771834275410516e-06, "loss": 0.06, "step": 36545 }, { "epoch": 0.6472127402409382, "grad_norm": 0.6290884017944336, "learning_rate": 8.771051590372165e-06, "loss": 0.0846, "step": 36546 }, { "epoch": 0.6472304497779667, "grad_norm": 0.49942031502723694, "learning_rate": 8.770268925826428e-06, "loss": 0.0659, "step": 36547 }, { "epoch": 0.6472481593149951, "grad_norm": 0.6399222016334534, "learning_rate": 8.769486281775874e-06, "loss": 0.061, "step": 36548 }, { "epoch": 0.6472658688520235, "grad_norm": 0.3831181526184082, "learning_rate": 8.768703658223077e-06, "loss": 0.0576, "step": 36549 }, { "epoch": 0.6472835783890519, "grad_norm": 0.6573750972747803, "learning_rate": 8.76792105517062e-06, "loss": 0.0673, "step": 36550 }, { "epoch": 0.6473012879260804, "grad_norm": 0.8307440280914307, "learning_rate": 8.767138472621071e-06, "loss": 0.0657, "step": 36551 }, { "epoch": 0.6473189974631088, "grad_norm": 0.641971230506897, "learning_rate": 8.766355910577005e-06, "loss": 0.0439, "step": 36552 }, { "epoch": 0.6473367070001372, "grad_norm": 0.3057861626148224, "learning_rate": 8.765573369040995e-06, "loss": 0.0483, "step": 36553 }, { "epoch": 0.6473544165371656, "grad_norm": 0.614225447177887, "learning_rate": 8.764790848015626e-06, "loss": 0.0407, "step": 36554 }, { "epoch": 0.6473721260741941, "grad_norm": 0.4325579106807709, "learning_rate": 8.764008347503454e-06, "loss": 0.043, "step": 36555 }, { "epoch": 0.6473898356112225, "grad_norm": 0.724611759185791, "learning_rate": 8.763225867507068e-06, "loss": 0.0719, "step": 36556 }, { "epoch": 0.6474075451482509, "grad_norm": 0.4530296325683594, "learning_rate": 8.762443408029046e-06, "loss": 0.0759, "step": 36557 }, { "epoch": 0.6474252546852793, "grad_norm": 0.2689599096775055, "learning_rate": 8.761660969071947e-06, "loss": 0.0338, "step": 36558 }, { "epoch": 0.6474429642223078, "grad_norm": 0.6688099503517151, "learning_rate": 8.760878550638351e-06, "loss": 0.0556, "step": 36559 }, { "epoch": 0.6474606737593362, "grad_norm": 0.5938910841941833, "learning_rate": 8.760096152730833e-06, "loss": 0.0591, "step": 36560 }, { "epoch": 0.6474783832963646, "grad_norm": 0.47856754064559937, "learning_rate": 8.759313775351973e-06, "loss": 0.0726, "step": 36561 }, { "epoch": 0.6474960928333932, "grad_norm": 0.7156885862350464, "learning_rate": 8.758531418504335e-06, "loss": 0.0574, "step": 36562 }, { "epoch": 0.6475138023704216, "grad_norm": 0.3762720823287964, "learning_rate": 8.757749082190499e-06, "loss": 0.0756, "step": 36563 }, { "epoch": 0.64753151190745, "grad_norm": 0.42901256680488586, "learning_rate": 8.756966766413039e-06, "loss": 0.054, "step": 36564 }, { "epoch": 0.6475492214444784, "grad_norm": 0.6362947821617126, "learning_rate": 8.756184471174524e-06, "loss": 0.0735, "step": 36565 }, { "epoch": 0.6475669309815069, "grad_norm": 0.48865893483161926, "learning_rate": 8.755402196477532e-06, "loss": 0.0728, "step": 36566 }, { "epoch": 0.6475846405185353, "grad_norm": 0.44170355796813965, "learning_rate": 8.754619942324633e-06, "loss": 0.06, "step": 36567 }, { "epoch": 0.6476023500555637, "grad_norm": 0.5655947923660278, "learning_rate": 8.753837708718402e-06, "loss": 0.0617, "step": 36568 }, { "epoch": 0.6476200595925921, "grad_norm": 0.7381083965301514, "learning_rate": 8.753055495661415e-06, "loss": 0.0874, "step": 36569 }, { "epoch": 0.6476377691296206, "grad_norm": 0.6162121295928955, "learning_rate": 8.752273303156243e-06, "loss": 0.0476, "step": 36570 }, { "epoch": 0.647655478666649, "grad_norm": 0.4613930583000183, "learning_rate": 8.75149113120546e-06, "loss": 0.0627, "step": 36571 }, { "epoch": 0.6476731882036774, "grad_norm": 0.8319924473762512, "learning_rate": 8.750708979811644e-06, "loss": 0.0561, "step": 36572 }, { "epoch": 0.6476908977407058, "grad_norm": 0.7627478837966919, "learning_rate": 8.74992684897736e-06, "loss": 0.0658, "step": 36573 }, { "epoch": 0.6477086072777343, "grad_norm": 0.17763878405094147, "learning_rate": 8.749144738705181e-06, "loss": 0.0705, "step": 36574 }, { "epoch": 0.6477263168147627, "grad_norm": 0.41077935695648193, "learning_rate": 8.748362648997693e-06, "loss": 0.0559, "step": 36575 }, { "epoch": 0.6477440263517911, "grad_norm": 0.5125751495361328, "learning_rate": 8.747580579857452e-06, "loss": 0.0459, "step": 36576 }, { "epoch": 0.6477617358888196, "grad_norm": 0.6658782362937927, "learning_rate": 8.746798531287034e-06, "loss": 0.0682, "step": 36577 }, { "epoch": 0.647779445425848, "grad_norm": 0.44045138359069824, "learning_rate": 8.746016503289018e-06, "loss": 0.0666, "step": 36578 }, { "epoch": 0.6477971549628764, "grad_norm": 0.8390523195266724, "learning_rate": 8.745234495865984e-06, "loss": 0.0592, "step": 36579 }, { "epoch": 0.6478148644999048, "grad_norm": 0.7936387062072754, "learning_rate": 8.74445250902049e-06, "loss": 0.0657, "step": 36580 }, { "epoch": 0.6478325740369333, "grad_norm": 0.7380393147468567, "learning_rate": 8.743670542755116e-06, "loss": 0.0541, "step": 36581 }, { "epoch": 0.6478502835739617, "grad_norm": 0.31720584630966187, "learning_rate": 8.742888597072436e-06, "loss": 0.0512, "step": 36582 }, { "epoch": 0.6478679931109901, "grad_norm": 0.5928654670715332, "learning_rate": 8.742106671975017e-06, "loss": 0.0447, "step": 36583 }, { "epoch": 0.6478857026480185, "grad_norm": 0.6639375686645508, "learning_rate": 8.741324767465433e-06, "loss": 0.0854, "step": 36584 }, { "epoch": 0.647903412185047, "grad_norm": 0.565526008605957, "learning_rate": 8.740542883546256e-06, "loss": 0.0626, "step": 36585 }, { "epoch": 0.6479211217220754, "grad_norm": 0.7406451106071472, "learning_rate": 8.739761020220061e-06, "loss": 0.0664, "step": 36586 }, { "epoch": 0.6479388312591038, "grad_norm": 0.4643594026565552, "learning_rate": 8.738979177489418e-06, "loss": 0.0589, "step": 36587 }, { "epoch": 0.6479565407961322, "grad_norm": 0.518530011177063, "learning_rate": 8.7381973553569e-06, "loss": 0.0511, "step": 36588 }, { "epoch": 0.6479742503331607, "grad_norm": 0.7464331984519958, "learning_rate": 8.737415553825088e-06, "loss": 0.0771, "step": 36589 }, { "epoch": 0.6479919598701891, "grad_norm": 0.9073910713195801, "learning_rate": 8.736633772896537e-06, "loss": 0.0882, "step": 36590 }, { "epoch": 0.6480096694072175, "grad_norm": 1.1250941753387451, "learning_rate": 8.735852012573827e-06, "loss": 0.0795, "step": 36591 }, { "epoch": 0.648027378944246, "grad_norm": 0.5446142554283142, "learning_rate": 8.735070272859533e-06, "loss": 0.0392, "step": 36592 }, { "epoch": 0.6480450884812744, "grad_norm": 0.6887399554252625, "learning_rate": 8.734288553756228e-06, "loss": 0.0756, "step": 36593 }, { "epoch": 0.6480627980183028, "grad_norm": 0.33020931482315063, "learning_rate": 8.733506855266475e-06, "loss": 0.0358, "step": 36594 }, { "epoch": 0.6480805075553312, "grad_norm": 0.2645860016345978, "learning_rate": 8.73272517739285e-06, "loss": 0.0303, "step": 36595 }, { "epoch": 0.6480982170923597, "grad_norm": 0.6905906796455383, "learning_rate": 8.731943520137926e-06, "loss": 0.0523, "step": 36596 }, { "epoch": 0.6481159266293881, "grad_norm": 0.621781051158905, "learning_rate": 8.731161883504272e-06, "loss": 0.0621, "step": 36597 }, { "epoch": 0.6481336361664165, "grad_norm": 0.5980021357536316, "learning_rate": 8.730380267494462e-06, "loss": 0.0702, "step": 36598 }, { "epoch": 0.6481513457034449, "grad_norm": 0.7049280405044556, "learning_rate": 8.729598672111069e-06, "loss": 0.0578, "step": 36599 }, { "epoch": 0.6481690552404734, "grad_norm": 0.49687671661376953, "learning_rate": 8.728817097356666e-06, "loss": 0.0769, "step": 36600 }, { "epoch": 0.6481867647775018, "grad_norm": 0.3990817368030548, "learning_rate": 8.728035543233814e-06, "loss": 0.0417, "step": 36601 }, { "epoch": 0.6482044743145302, "grad_norm": 0.5007771253585815, "learning_rate": 8.727254009745093e-06, "loss": 0.0472, "step": 36602 }, { "epoch": 0.6482221838515586, "grad_norm": 0.7387956976890564, "learning_rate": 8.726472496893076e-06, "loss": 0.078, "step": 36603 }, { "epoch": 0.6482398933885871, "grad_norm": 0.3370215594768524, "learning_rate": 8.725691004680323e-06, "loss": 0.0204, "step": 36604 }, { "epoch": 0.6482576029256155, "grad_norm": 0.4678909480571747, "learning_rate": 8.724909533109414e-06, "loss": 0.062, "step": 36605 }, { "epoch": 0.6482753124626439, "grad_norm": 0.7718283534049988, "learning_rate": 8.724128082182911e-06, "loss": 0.0487, "step": 36606 }, { "epoch": 0.6482930219996724, "grad_norm": 0.6689490675926208, "learning_rate": 8.723346651903405e-06, "loss": 0.0769, "step": 36607 }, { "epoch": 0.6483107315367008, "grad_norm": 0.5825379490852356, "learning_rate": 8.722565242273447e-06, "loss": 0.0419, "step": 36608 }, { "epoch": 0.6483284410737292, "grad_norm": 0.7032452821731567, "learning_rate": 8.721783853295614e-06, "loss": 0.0735, "step": 36609 }, { "epoch": 0.6483461506107576, "grad_norm": 1.0088969469070435, "learning_rate": 8.721002484972479e-06, "loss": 0.0778, "step": 36610 }, { "epoch": 0.6483638601477861, "grad_norm": 0.6767535209655762, "learning_rate": 8.720221137306615e-06, "loss": 0.0703, "step": 36611 }, { "epoch": 0.6483815696848145, "grad_norm": 0.3817753791809082, "learning_rate": 8.71943981030058e-06, "loss": 0.0485, "step": 36612 }, { "epoch": 0.6483992792218429, "grad_norm": 0.6553043723106384, "learning_rate": 8.718658503956957e-06, "loss": 0.0634, "step": 36613 }, { "epoch": 0.6484169887588713, "grad_norm": 0.7914226651191711, "learning_rate": 8.717877218278309e-06, "loss": 0.0607, "step": 36614 }, { "epoch": 0.6484346982958998, "grad_norm": 0.7011375427246094, "learning_rate": 8.717095953267213e-06, "loss": 0.0451, "step": 36615 }, { "epoch": 0.6484524078329282, "grad_norm": 0.6652502417564392, "learning_rate": 8.716314708926234e-06, "loss": 0.0622, "step": 36616 }, { "epoch": 0.6484701173699566, "grad_norm": 0.728937566280365, "learning_rate": 8.715533485257942e-06, "loss": 0.0867, "step": 36617 }, { "epoch": 0.648487826906985, "grad_norm": 0.4525264501571655, "learning_rate": 8.71475228226492e-06, "loss": 0.0572, "step": 36618 }, { "epoch": 0.6485055364440135, "grad_norm": 0.8068496584892273, "learning_rate": 8.713971099949717e-06, "loss": 0.0901, "step": 36619 }, { "epoch": 0.6485232459810419, "grad_norm": 0.7725427746772766, "learning_rate": 8.713189938314915e-06, "loss": 0.0507, "step": 36620 }, { "epoch": 0.6485409555180703, "grad_norm": 0.5744723081588745, "learning_rate": 8.712408797363088e-06, "loss": 0.059, "step": 36621 }, { "epoch": 0.6485586650550988, "grad_norm": 0.4407237470149994, "learning_rate": 8.711627677096793e-06, "loss": 0.0804, "step": 36622 }, { "epoch": 0.6485763745921272, "grad_norm": 0.4853023886680603, "learning_rate": 8.710846577518607e-06, "loss": 0.0745, "step": 36623 }, { "epoch": 0.6485940841291556, "grad_norm": 0.6769704818725586, "learning_rate": 8.710065498631102e-06, "loss": 0.0639, "step": 36624 }, { "epoch": 0.648611793666184, "grad_norm": 1.0305700302124023, "learning_rate": 8.709284440436844e-06, "loss": 0.0661, "step": 36625 }, { "epoch": 0.6486295032032126, "grad_norm": 0.397969126701355, "learning_rate": 8.708503402938404e-06, "loss": 0.0804, "step": 36626 }, { "epoch": 0.648647212740241, "grad_norm": 0.9719077348709106, "learning_rate": 8.707722386138351e-06, "loss": 0.0765, "step": 36627 }, { "epoch": 0.6486649222772694, "grad_norm": 0.7556436061859131, "learning_rate": 8.706941390039262e-06, "loss": 0.0953, "step": 36628 }, { "epoch": 0.6486826318142977, "grad_norm": 0.4054395258426666, "learning_rate": 8.706160414643692e-06, "loss": 0.056, "step": 36629 }, { "epoch": 0.6487003413513263, "grad_norm": 0.5430463552474976, "learning_rate": 8.705379459954219e-06, "loss": 0.0455, "step": 36630 }, { "epoch": 0.6487180508883547, "grad_norm": 0.7281780242919922, "learning_rate": 8.704598525973409e-06, "loss": 0.0664, "step": 36631 }, { "epoch": 0.6487357604253831, "grad_norm": 0.8023790121078491, "learning_rate": 8.70381761270384e-06, "loss": 0.0587, "step": 36632 }, { "epoch": 0.6487534699624115, "grad_norm": 0.7189479470252991, "learning_rate": 8.70303672014807e-06, "loss": 0.0646, "step": 36633 }, { "epoch": 0.64877117949944, "grad_norm": 0.765195369720459, "learning_rate": 8.70225584830867e-06, "loss": 0.0762, "step": 36634 }, { "epoch": 0.6487888890364684, "grad_norm": 0.6592880487442017, "learning_rate": 8.70147499718821e-06, "loss": 0.0458, "step": 36635 }, { "epoch": 0.6488065985734968, "grad_norm": 0.8785237073898315, "learning_rate": 8.700694166789264e-06, "loss": 0.0636, "step": 36636 }, { "epoch": 0.6488243081105253, "grad_norm": 0.7369464635848999, "learning_rate": 8.699913357114395e-06, "loss": 0.07, "step": 36637 }, { "epoch": 0.6488420176475537, "grad_norm": 0.7120031714439392, "learning_rate": 8.699132568166174e-06, "loss": 0.0773, "step": 36638 }, { "epoch": 0.6488597271845821, "grad_norm": 0.8196641206741333, "learning_rate": 8.698351799947176e-06, "loss": 0.0474, "step": 36639 }, { "epoch": 0.6488774367216105, "grad_norm": 0.44444993138313293, "learning_rate": 8.697571052459958e-06, "loss": 0.0631, "step": 36640 }, { "epoch": 0.648895146258639, "grad_norm": 0.6631320714950562, "learning_rate": 8.69679032570709e-06, "loss": 0.079, "step": 36641 }, { "epoch": 0.6489128557956674, "grad_norm": 0.7321300506591797, "learning_rate": 8.696009619691148e-06, "loss": 0.0662, "step": 36642 }, { "epoch": 0.6489305653326958, "grad_norm": 0.5664862394332886, "learning_rate": 8.695228934414695e-06, "loss": 0.0738, "step": 36643 }, { "epoch": 0.6489482748697242, "grad_norm": 0.8351350426673889, "learning_rate": 8.694448269880302e-06, "loss": 0.0769, "step": 36644 }, { "epoch": 0.6489659844067527, "grad_norm": 0.37328097224235535, "learning_rate": 8.693667626090536e-06, "loss": 0.0714, "step": 36645 }, { "epoch": 0.6489836939437811, "grad_norm": 0.6685904264450073, "learning_rate": 8.692887003047974e-06, "loss": 0.0431, "step": 36646 }, { "epoch": 0.6490014034808095, "grad_norm": 1.0276330709457397, "learning_rate": 8.692106400755166e-06, "loss": 0.065, "step": 36647 }, { "epoch": 0.6490191130178379, "grad_norm": 1.0431628227233887, "learning_rate": 8.691325819214692e-06, "loss": 0.076, "step": 36648 }, { "epoch": 0.6490368225548664, "grad_norm": 0.5542189478874207, "learning_rate": 8.690545258429116e-06, "loss": 0.0454, "step": 36649 }, { "epoch": 0.6490545320918948, "grad_norm": 0.725191593170166, "learning_rate": 8.689764718401017e-06, "loss": 0.0671, "step": 36650 }, { "epoch": 0.6490722416289232, "grad_norm": 0.7203120589256287, "learning_rate": 8.688984199132947e-06, "loss": 0.0874, "step": 36651 }, { "epoch": 0.6490899511659517, "grad_norm": 1.0610301494598389, "learning_rate": 8.68820370062748e-06, "loss": 0.0898, "step": 36652 }, { "epoch": 0.6491076607029801, "grad_norm": 0.7210337519645691, "learning_rate": 8.687423222887185e-06, "loss": 0.0816, "step": 36653 }, { "epoch": 0.6491253702400085, "grad_norm": 0.8847957849502563, "learning_rate": 8.686642765914629e-06, "loss": 0.0753, "step": 36654 }, { "epoch": 0.6491430797770369, "grad_norm": 0.8452108502388, "learning_rate": 8.685862329712378e-06, "loss": 0.0745, "step": 36655 }, { "epoch": 0.6491607893140654, "grad_norm": 0.8407290577888489, "learning_rate": 8.685081914283003e-06, "loss": 0.0542, "step": 36656 }, { "epoch": 0.6491784988510938, "grad_norm": 0.3167744278907776, "learning_rate": 8.684301519629074e-06, "loss": 0.0495, "step": 36657 }, { "epoch": 0.6491962083881222, "grad_norm": 0.33130764961242676, "learning_rate": 8.683521145753152e-06, "loss": 0.0549, "step": 36658 }, { "epoch": 0.6492139179251506, "grad_norm": 0.797959566116333, "learning_rate": 8.682740792657803e-06, "loss": 0.0643, "step": 36659 }, { "epoch": 0.6492316274621791, "grad_norm": 0.45403793454170227, "learning_rate": 8.681960460345607e-06, "loss": 0.0604, "step": 36660 }, { "epoch": 0.6492493369992075, "grad_norm": 0.5693542957305908, "learning_rate": 8.681180148819113e-06, "loss": 0.0632, "step": 36661 }, { "epoch": 0.6492670465362359, "grad_norm": 0.9926256537437439, "learning_rate": 8.680399858080896e-06, "loss": 0.0835, "step": 36662 }, { "epoch": 0.6492847560732643, "grad_norm": 0.8240485191345215, "learning_rate": 8.679619588133528e-06, "loss": 0.0763, "step": 36663 }, { "epoch": 0.6493024656102928, "grad_norm": 0.4736001491546631, "learning_rate": 8.67883933897957e-06, "loss": 0.0572, "step": 36664 }, { "epoch": 0.6493201751473212, "grad_norm": 0.6332816481590271, "learning_rate": 8.678059110621594e-06, "loss": 0.0454, "step": 36665 }, { "epoch": 0.6493378846843496, "grad_norm": 0.4148843586444855, "learning_rate": 8.677278903062161e-06, "loss": 0.0323, "step": 36666 }, { "epoch": 0.6493555942213781, "grad_norm": 0.41306811571121216, "learning_rate": 8.676498716303848e-06, "loss": 0.035, "step": 36667 }, { "epoch": 0.6493733037584065, "grad_norm": 0.9853891134262085, "learning_rate": 8.675718550349208e-06, "loss": 0.0629, "step": 36668 }, { "epoch": 0.6493910132954349, "grad_norm": 0.6885053515434265, "learning_rate": 8.674938405200815e-06, "loss": 0.0503, "step": 36669 }, { "epoch": 0.6494087228324633, "grad_norm": 0.8505436778068542, "learning_rate": 8.674158280861235e-06, "loss": 0.0696, "step": 36670 }, { "epoch": 0.6494264323694918, "grad_norm": 1.1714156866073608, "learning_rate": 8.67337817733304e-06, "loss": 0.0664, "step": 36671 }, { "epoch": 0.6494441419065202, "grad_norm": 0.48295682668685913, "learning_rate": 8.672598094618781e-06, "loss": 0.0464, "step": 36672 }, { "epoch": 0.6494618514435486, "grad_norm": 0.5866521596908569, "learning_rate": 8.671818032721038e-06, "loss": 0.057, "step": 36673 }, { "epoch": 0.649479560980577, "grad_norm": 0.9315424561500549, "learning_rate": 8.671037991642373e-06, "loss": 0.0933, "step": 36674 }, { "epoch": 0.6494972705176055, "grad_norm": 0.39652538299560547, "learning_rate": 8.670257971385361e-06, "loss": 0.0464, "step": 36675 }, { "epoch": 0.6495149800546339, "grad_norm": 0.5036080479621887, "learning_rate": 8.669477971952554e-06, "loss": 0.0516, "step": 36676 }, { "epoch": 0.6495326895916623, "grad_norm": 0.5102232694625854, "learning_rate": 8.668697993346522e-06, "loss": 0.0696, "step": 36677 }, { "epoch": 0.6495503991286907, "grad_norm": 0.7701125741004944, "learning_rate": 8.667918035569842e-06, "loss": 0.08, "step": 36678 }, { "epoch": 0.6495681086657192, "grad_norm": 0.4852357506752014, "learning_rate": 8.667138098625063e-06, "loss": 0.06, "step": 36679 }, { "epoch": 0.6495858182027476, "grad_norm": 0.49067968130111694, "learning_rate": 8.66635818251476e-06, "loss": 0.0619, "step": 36680 }, { "epoch": 0.649603527739776, "grad_norm": 0.5564219951629639, "learning_rate": 8.665578287241499e-06, "loss": 0.0767, "step": 36681 }, { "epoch": 0.6496212372768045, "grad_norm": 0.8568791747093201, "learning_rate": 8.664798412807843e-06, "loss": 0.0935, "step": 36682 }, { "epoch": 0.6496389468138329, "grad_norm": 0.6171436309814453, "learning_rate": 8.664018559216359e-06, "loss": 0.0648, "step": 36683 }, { "epoch": 0.6496566563508613, "grad_norm": 0.7103667855262756, "learning_rate": 8.663238726469614e-06, "loss": 0.0508, "step": 36684 }, { "epoch": 0.6496743658878897, "grad_norm": 1.0740244388580322, "learning_rate": 8.662458914570178e-06, "loss": 0.0779, "step": 36685 }, { "epoch": 0.6496920754249182, "grad_norm": 0.67705237865448, "learning_rate": 8.661679123520607e-06, "loss": 0.0387, "step": 36686 }, { "epoch": 0.6497097849619466, "grad_norm": 0.44916197657585144, "learning_rate": 8.66089935332347e-06, "loss": 0.0845, "step": 36687 }, { "epoch": 0.649727494498975, "grad_norm": 0.7115024328231812, "learning_rate": 8.660119603981331e-06, "loss": 0.0577, "step": 36688 }, { "epoch": 0.6497452040360034, "grad_norm": 0.846567690372467, "learning_rate": 8.659339875496766e-06, "loss": 0.0866, "step": 36689 }, { "epoch": 0.649762913573032, "grad_norm": 0.30483463406562805, "learning_rate": 8.658560167872324e-06, "loss": 0.0357, "step": 36690 }, { "epoch": 0.6497806231100604, "grad_norm": 0.49018627405166626, "learning_rate": 8.657780481110578e-06, "loss": 0.0376, "step": 36691 }, { "epoch": 0.6497983326470887, "grad_norm": 0.6867346167564392, "learning_rate": 8.657000815214092e-06, "loss": 0.064, "step": 36692 }, { "epoch": 0.6498160421841171, "grad_norm": 0.8558656573295593, "learning_rate": 8.656221170185432e-06, "loss": 0.0598, "step": 36693 }, { "epoch": 0.6498337517211457, "grad_norm": 0.6210184097290039, "learning_rate": 8.655441546027164e-06, "loss": 0.0496, "step": 36694 }, { "epoch": 0.6498514612581741, "grad_norm": 0.538912296295166, "learning_rate": 8.654661942741851e-06, "loss": 0.07, "step": 36695 }, { "epoch": 0.6498691707952025, "grad_norm": 0.8221753239631653, "learning_rate": 8.653882360332064e-06, "loss": 0.0721, "step": 36696 }, { "epoch": 0.649886880332231, "grad_norm": 0.684471070766449, "learning_rate": 8.653102798800356e-06, "loss": 0.0794, "step": 36697 }, { "epoch": 0.6499045898692594, "grad_norm": 0.5315152406692505, "learning_rate": 8.652323258149298e-06, "loss": 0.0703, "step": 36698 }, { "epoch": 0.6499222994062878, "grad_norm": 0.6453882455825806, "learning_rate": 8.65154373838146e-06, "loss": 0.0475, "step": 36699 }, { "epoch": 0.6499400089433162, "grad_norm": 0.5400329828262329, "learning_rate": 8.650764239499396e-06, "loss": 0.0443, "step": 36700 }, { "epoch": 0.6499577184803447, "grad_norm": 0.7877736687660217, "learning_rate": 8.64998476150567e-06, "loss": 0.0732, "step": 36701 }, { "epoch": 0.6499754280173731, "grad_norm": 0.5201441049575806, "learning_rate": 8.649205304402857e-06, "loss": 0.0666, "step": 36702 }, { "epoch": 0.6499931375544015, "grad_norm": 0.5198496580123901, "learning_rate": 8.648425868193521e-06, "loss": 0.0363, "step": 36703 }, { "epoch": 0.6500108470914299, "grad_norm": 0.8620378971099854, "learning_rate": 8.647646452880217e-06, "loss": 0.0528, "step": 36704 }, { "epoch": 0.6500285566284584, "grad_norm": 0.6982181072235107, "learning_rate": 8.646867058465512e-06, "loss": 0.0581, "step": 36705 }, { "epoch": 0.6500462661654868, "grad_norm": 0.35803210735321045, "learning_rate": 8.64608768495198e-06, "loss": 0.0497, "step": 36706 }, { "epoch": 0.6500639757025152, "grad_norm": 0.8189863562583923, "learning_rate": 8.645308332342171e-06, "loss": 0.071, "step": 36707 }, { "epoch": 0.6500816852395436, "grad_norm": 0.6526732444763184, "learning_rate": 8.644529000638655e-06, "loss": 0.0517, "step": 36708 }, { "epoch": 0.6500993947765721, "grad_norm": 0.546234130859375, "learning_rate": 8.643749689843994e-06, "loss": 0.0759, "step": 36709 }, { "epoch": 0.6501171043136005, "grad_norm": 0.3022127151489258, "learning_rate": 8.642970399960753e-06, "loss": 0.0518, "step": 36710 }, { "epoch": 0.6501348138506289, "grad_norm": 0.6059027910232544, "learning_rate": 8.642191130991498e-06, "loss": 0.0516, "step": 36711 }, { "epoch": 0.6501525233876574, "grad_norm": 0.6565465927124023, "learning_rate": 8.64141188293879e-06, "loss": 0.0646, "step": 36712 }, { "epoch": 0.6501702329246858, "grad_norm": 1.1174784898757935, "learning_rate": 8.640632655805196e-06, "loss": 0.0467, "step": 36713 }, { "epoch": 0.6501879424617142, "grad_norm": 0.526426374912262, "learning_rate": 8.63985344959328e-06, "loss": 0.0718, "step": 36714 }, { "epoch": 0.6502056519987426, "grad_norm": 0.4767746925354004, "learning_rate": 8.6390742643056e-06, "loss": 0.0542, "step": 36715 }, { "epoch": 0.6502233615357711, "grad_norm": 0.7857601642608643, "learning_rate": 8.63829509994472e-06, "loss": 0.0592, "step": 36716 }, { "epoch": 0.6502410710727995, "grad_norm": 0.808243989944458, "learning_rate": 8.637515956513212e-06, "loss": 0.067, "step": 36717 }, { "epoch": 0.6502587806098279, "grad_norm": 0.7506263256072998, "learning_rate": 8.636736834013629e-06, "loss": 0.0727, "step": 36718 }, { "epoch": 0.6502764901468563, "grad_norm": 0.30333074927330017, "learning_rate": 8.635957732448537e-06, "loss": 0.0457, "step": 36719 }, { "epoch": 0.6502941996838848, "grad_norm": 0.6686466336250305, "learning_rate": 8.6351786518205e-06, "loss": 0.0677, "step": 36720 }, { "epoch": 0.6503119092209132, "grad_norm": 0.64561527967453, "learning_rate": 8.634399592132082e-06, "loss": 0.05, "step": 36721 }, { "epoch": 0.6503296187579416, "grad_norm": 0.6823159456253052, "learning_rate": 8.633620553385847e-06, "loss": 0.079, "step": 36722 }, { "epoch": 0.65034732829497, "grad_norm": 0.5463125705718994, "learning_rate": 8.632841535584352e-06, "loss": 0.0616, "step": 36723 }, { "epoch": 0.6503650378319985, "grad_norm": 0.49782058596611023, "learning_rate": 8.632062538730174e-06, "loss": 0.0553, "step": 36724 }, { "epoch": 0.6503827473690269, "grad_norm": 0.5497070550918579, "learning_rate": 8.63128356282586e-06, "loss": 0.054, "step": 36725 }, { "epoch": 0.6504004569060553, "grad_norm": 0.8075286149978638, "learning_rate": 8.630504607873976e-06, "loss": 0.0703, "step": 36726 }, { "epoch": 0.6504181664430838, "grad_norm": 0.57823246717453, "learning_rate": 8.629725673877092e-06, "loss": 0.059, "step": 36727 }, { "epoch": 0.6504358759801122, "grad_norm": 0.8497660756111145, "learning_rate": 8.628946760837767e-06, "loss": 0.0633, "step": 36728 }, { "epoch": 0.6504535855171406, "grad_norm": 0.333904504776001, "learning_rate": 8.62816786875856e-06, "loss": 0.0433, "step": 36729 }, { "epoch": 0.650471295054169, "grad_norm": 0.7782142758369446, "learning_rate": 8.627388997642031e-06, "loss": 0.0603, "step": 36730 }, { "epoch": 0.6504890045911975, "grad_norm": 0.8185862302780151, "learning_rate": 8.626610147490758e-06, "loss": 0.0554, "step": 36731 }, { "epoch": 0.6505067141282259, "grad_norm": 0.7460049986839294, "learning_rate": 8.625831318307288e-06, "loss": 0.0863, "step": 36732 }, { "epoch": 0.6505244236652543, "grad_norm": 0.8848022222518921, "learning_rate": 8.625052510094188e-06, "loss": 0.0896, "step": 36733 }, { "epoch": 0.6505421332022827, "grad_norm": 0.775497555732727, "learning_rate": 8.624273722854018e-06, "loss": 0.0534, "step": 36734 }, { "epoch": 0.6505598427393112, "grad_norm": 0.9624075889587402, "learning_rate": 8.623494956589354e-06, "loss": 0.063, "step": 36735 }, { "epoch": 0.6505775522763396, "grad_norm": 0.7654896378517151, "learning_rate": 8.622716211302736e-06, "loss": 0.0856, "step": 36736 }, { "epoch": 0.650595261813368, "grad_norm": 0.2944704592227936, "learning_rate": 8.621937486996739e-06, "loss": 0.0594, "step": 36737 }, { "epoch": 0.6506129713503964, "grad_norm": 0.5520678758621216, "learning_rate": 8.62115878367392e-06, "loss": 0.0798, "step": 36738 }, { "epoch": 0.6506306808874249, "grad_norm": 0.625859260559082, "learning_rate": 8.620380101336845e-06, "loss": 0.0469, "step": 36739 }, { "epoch": 0.6506483904244533, "grad_norm": 0.9929717183113098, "learning_rate": 8.619601439988077e-06, "loss": 0.0531, "step": 36740 }, { "epoch": 0.6506660999614817, "grad_norm": 0.7236222624778748, "learning_rate": 8.618822799630171e-06, "loss": 0.0581, "step": 36741 }, { "epoch": 0.6506838094985102, "grad_norm": 0.5059756636619568, "learning_rate": 8.618044180265699e-06, "loss": 0.0705, "step": 36742 }, { "epoch": 0.6507015190355386, "grad_norm": 0.6749892234802246, "learning_rate": 8.617265581897212e-06, "loss": 0.0495, "step": 36743 }, { "epoch": 0.650719228572567, "grad_norm": 0.4057249128818512, "learning_rate": 8.616487004527275e-06, "loss": 0.0398, "step": 36744 }, { "epoch": 0.6507369381095954, "grad_norm": 0.5376219749450684, "learning_rate": 8.615708448158447e-06, "loss": 0.0568, "step": 36745 }, { "epoch": 0.6507546476466239, "grad_norm": 0.7077410817146301, "learning_rate": 8.614929912793303e-06, "loss": 0.0635, "step": 36746 }, { "epoch": 0.6507723571836523, "grad_norm": 0.34280210733413696, "learning_rate": 8.614151398434387e-06, "loss": 0.0381, "step": 36747 }, { "epoch": 0.6507900667206807, "grad_norm": 0.7114147543907166, "learning_rate": 8.613372905084265e-06, "loss": 0.0807, "step": 36748 }, { "epoch": 0.6508077762577091, "grad_norm": 0.36369895935058594, "learning_rate": 8.6125944327455e-06, "loss": 0.0397, "step": 36749 }, { "epoch": 0.6508254857947376, "grad_norm": 0.7092717289924622, "learning_rate": 8.611815981420656e-06, "loss": 0.0679, "step": 36750 }, { "epoch": 0.650843195331766, "grad_norm": 0.5872851610183716, "learning_rate": 8.611037551112291e-06, "loss": 0.0657, "step": 36751 }, { "epoch": 0.6508609048687944, "grad_norm": 0.45663759112358093, "learning_rate": 8.610259141822964e-06, "loss": 0.038, "step": 36752 }, { "epoch": 0.6508786144058228, "grad_norm": 0.5810322165489197, "learning_rate": 8.609480753555245e-06, "loss": 0.0838, "step": 36753 }, { "epoch": 0.6508963239428514, "grad_norm": 0.5403347611427307, "learning_rate": 8.608702386311681e-06, "loss": 0.0567, "step": 36754 }, { "epoch": 0.6509140334798798, "grad_norm": 0.530141294002533, "learning_rate": 8.607924040094842e-06, "loss": 0.0535, "step": 36755 }, { "epoch": 0.6509317430169081, "grad_norm": 1.0463835000991821, "learning_rate": 8.607145714907292e-06, "loss": 0.0965, "step": 36756 }, { "epoch": 0.6509494525539367, "grad_norm": 0.6228297352790833, "learning_rate": 8.606367410751578e-06, "loss": 0.0534, "step": 36757 }, { "epoch": 0.6509671620909651, "grad_norm": 0.74690842628479, "learning_rate": 8.60558912763027e-06, "loss": 0.0756, "step": 36758 }, { "epoch": 0.6509848716279935, "grad_norm": 0.5267025828361511, "learning_rate": 8.60481086554592e-06, "loss": 0.0574, "step": 36759 }, { "epoch": 0.6510025811650219, "grad_norm": 0.6691282987594604, "learning_rate": 8.604032624501109e-06, "loss": 0.0777, "step": 36760 }, { "epoch": 0.6510202907020504, "grad_norm": 0.625366747379303, "learning_rate": 8.603254404498377e-06, "loss": 0.0513, "step": 36761 }, { "epoch": 0.6510380002390788, "grad_norm": 0.5722323060035706, "learning_rate": 8.602476205540289e-06, "loss": 0.0722, "step": 36762 }, { "epoch": 0.6510557097761072, "grad_norm": 0.5171593427658081, "learning_rate": 8.601698027629417e-06, "loss": 0.0481, "step": 36763 }, { "epoch": 0.6510734193131356, "grad_norm": 0.7054969668388367, "learning_rate": 8.600919870768302e-06, "loss": 0.0806, "step": 36764 }, { "epoch": 0.6510911288501641, "grad_norm": 1.6490471363067627, "learning_rate": 8.600141734959516e-06, "loss": 0.0478, "step": 36765 }, { "epoch": 0.6511088383871925, "grad_norm": 0.19811271131038666, "learning_rate": 8.599363620205615e-06, "loss": 0.0535, "step": 36766 }, { "epoch": 0.6511265479242209, "grad_norm": 0.41405147314071655, "learning_rate": 8.598585526509161e-06, "loss": 0.0446, "step": 36767 }, { "epoch": 0.6511442574612493, "grad_norm": 0.7660142779350281, "learning_rate": 8.597807453872713e-06, "loss": 0.0641, "step": 36768 }, { "epoch": 0.6511619669982778, "grad_norm": 0.8515531420707703, "learning_rate": 8.59702940229883e-06, "loss": 0.0754, "step": 36769 }, { "epoch": 0.6511796765353062, "grad_norm": 0.8154700994491577, "learning_rate": 8.59625137179008e-06, "loss": 0.0661, "step": 36770 }, { "epoch": 0.6511973860723346, "grad_norm": 0.5527061223983765, "learning_rate": 8.595473362349008e-06, "loss": 0.0606, "step": 36771 }, { "epoch": 0.6512150956093631, "grad_norm": 0.804940402507782, "learning_rate": 8.594695373978183e-06, "loss": 0.1035, "step": 36772 }, { "epoch": 0.6512328051463915, "grad_norm": 0.5837485790252686, "learning_rate": 8.59391740668016e-06, "loss": 0.0681, "step": 36773 }, { "epoch": 0.6512505146834199, "grad_norm": 0.7062970399856567, "learning_rate": 8.593139460457507e-06, "loss": 0.0674, "step": 36774 }, { "epoch": 0.6512682242204483, "grad_norm": 0.7137365937232971, "learning_rate": 8.59236153531277e-06, "loss": 0.0874, "step": 36775 }, { "epoch": 0.6512859337574768, "grad_norm": 0.3308214545249939, "learning_rate": 8.59158363124852e-06, "loss": 0.0561, "step": 36776 }, { "epoch": 0.6513036432945052, "grad_norm": 0.7375411987304688, "learning_rate": 8.590805748267308e-06, "loss": 0.0596, "step": 36777 }, { "epoch": 0.6513213528315336, "grad_norm": 0.09615521132946014, "learning_rate": 8.590027886371698e-06, "loss": 0.0439, "step": 36778 }, { "epoch": 0.651339062368562, "grad_norm": 0.40138158202171326, "learning_rate": 8.589250045564248e-06, "loss": 0.0489, "step": 36779 }, { "epoch": 0.6513567719055905, "grad_norm": 0.3434019684791565, "learning_rate": 8.588472225847516e-06, "loss": 0.0557, "step": 36780 }, { "epoch": 0.6513744814426189, "grad_norm": 0.4023023545742035, "learning_rate": 8.587694427224068e-06, "loss": 0.0569, "step": 36781 }, { "epoch": 0.6513921909796473, "grad_norm": 0.45525363087654114, "learning_rate": 8.586916649696452e-06, "loss": 0.0485, "step": 36782 }, { "epoch": 0.6514099005166757, "grad_norm": 0.9591105580329895, "learning_rate": 8.58613889326723e-06, "loss": 0.0872, "step": 36783 }, { "epoch": 0.6514276100537042, "grad_norm": 0.7378148436546326, "learning_rate": 8.585361157938962e-06, "loss": 0.0855, "step": 36784 }, { "epoch": 0.6514453195907326, "grad_norm": 0.6498607993125916, "learning_rate": 8.584583443714216e-06, "loss": 0.044, "step": 36785 }, { "epoch": 0.651463029127761, "grad_norm": 0.6892837285995483, "learning_rate": 8.583805750595532e-06, "loss": 0.0982, "step": 36786 }, { "epoch": 0.6514807386647895, "grad_norm": 0.7460803985595703, "learning_rate": 8.583028078585481e-06, "loss": 0.0678, "step": 36787 }, { "epoch": 0.6514984482018179, "grad_norm": 0.9454964995384216, "learning_rate": 8.582250427686617e-06, "loss": 0.0667, "step": 36788 }, { "epoch": 0.6515161577388463, "grad_norm": 0.7078198790550232, "learning_rate": 8.5814727979015e-06, "loss": 0.0593, "step": 36789 }, { "epoch": 0.6515338672758747, "grad_norm": 0.4146151542663574, "learning_rate": 8.58069518923269e-06, "loss": 0.0443, "step": 36790 }, { "epoch": 0.6515515768129032, "grad_norm": 0.801627516746521, "learning_rate": 8.579917601682741e-06, "loss": 0.0625, "step": 36791 }, { "epoch": 0.6515692863499316, "grad_norm": 0.39326196908950806, "learning_rate": 8.579140035254223e-06, "loss": 0.0541, "step": 36792 }, { "epoch": 0.65158699588696, "grad_norm": 0.39954453706741333, "learning_rate": 8.578362489949674e-06, "loss": 0.0578, "step": 36793 }, { "epoch": 0.6516047054239884, "grad_norm": 0.8105521202087402, "learning_rate": 8.577584965771666e-06, "loss": 0.0769, "step": 36794 }, { "epoch": 0.6516224149610169, "grad_norm": 0.4472743570804596, "learning_rate": 8.57680746272276e-06, "loss": 0.0615, "step": 36795 }, { "epoch": 0.6516401244980453, "grad_norm": 0.7118328213691711, "learning_rate": 8.576029980805496e-06, "loss": 0.0567, "step": 36796 }, { "epoch": 0.6516578340350737, "grad_norm": 0.8330121636390686, "learning_rate": 8.575252520022449e-06, "loss": 0.0807, "step": 36797 }, { "epoch": 0.6516755435721021, "grad_norm": 0.7267748713493347, "learning_rate": 8.57447508037617e-06, "loss": 0.0776, "step": 36798 }, { "epoch": 0.6516932531091306, "grad_norm": 0.6287820339202881, "learning_rate": 8.573697661869228e-06, "loss": 0.041, "step": 36799 }, { "epoch": 0.651710962646159, "grad_norm": 0.585762083530426, "learning_rate": 8.572920264504163e-06, "loss": 0.0888, "step": 36800 }, { "epoch": 0.6517286721831874, "grad_norm": 0.8905872702598572, "learning_rate": 8.572142888283537e-06, "loss": 0.0624, "step": 36801 }, { "epoch": 0.6517463817202159, "grad_norm": 0.7799485921859741, "learning_rate": 8.57136553320992e-06, "loss": 0.0619, "step": 36802 }, { "epoch": 0.6517640912572443, "grad_norm": 0.8710068464279175, "learning_rate": 8.570588199285854e-06, "loss": 0.0604, "step": 36803 }, { "epoch": 0.6517818007942727, "grad_norm": 0.6905086040496826, "learning_rate": 8.569810886513901e-06, "loss": 0.0823, "step": 36804 }, { "epoch": 0.6517995103313011, "grad_norm": 0.44760364294052124, "learning_rate": 8.56903359489662e-06, "loss": 0.0678, "step": 36805 }, { "epoch": 0.6518172198683296, "grad_norm": 0.9823461771011353, "learning_rate": 8.56825632443657e-06, "loss": 0.0698, "step": 36806 }, { "epoch": 0.651834929405358, "grad_norm": 0.705496072769165, "learning_rate": 8.567479075136305e-06, "loss": 0.0481, "step": 36807 }, { "epoch": 0.6518526389423864, "grad_norm": 0.6446430683135986, "learning_rate": 8.566701846998383e-06, "loss": 0.0598, "step": 36808 }, { "epoch": 0.6518703484794148, "grad_norm": 0.9627830386161804, "learning_rate": 8.565924640025369e-06, "loss": 0.063, "step": 36809 }, { "epoch": 0.6518880580164433, "grad_norm": 0.33286210894584656, "learning_rate": 8.565147454219804e-06, "loss": 0.0522, "step": 36810 }, { "epoch": 0.6519057675534717, "grad_norm": 0.9437965154647827, "learning_rate": 8.564370289584253e-06, "loss": 0.0597, "step": 36811 }, { "epoch": 0.6519234770905001, "grad_norm": 0.6150963306427002, "learning_rate": 8.563593146121272e-06, "loss": 0.0765, "step": 36812 }, { "epoch": 0.6519411866275285, "grad_norm": 0.6181294918060303, "learning_rate": 8.562816023833428e-06, "loss": 0.0387, "step": 36813 }, { "epoch": 0.651958896164557, "grad_norm": 0.636040210723877, "learning_rate": 8.562038922723258e-06, "loss": 0.0865, "step": 36814 }, { "epoch": 0.6519766057015854, "grad_norm": 0.8014622926712036, "learning_rate": 8.561261842793333e-06, "loss": 0.0633, "step": 36815 }, { "epoch": 0.6519943152386138, "grad_norm": 0.8270993828773499, "learning_rate": 8.560484784046203e-06, "loss": 0.0405, "step": 36816 }, { "epoch": 0.6520120247756424, "grad_norm": 0.6913318634033203, "learning_rate": 8.559707746484425e-06, "loss": 0.0831, "step": 36817 }, { "epoch": 0.6520297343126708, "grad_norm": 0.6485406756401062, "learning_rate": 8.558930730110561e-06, "loss": 0.051, "step": 36818 }, { "epoch": 0.6520474438496991, "grad_norm": 0.5870732069015503, "learning_rate": 8.558153734927163e-06, "loss": 0.0949, "step": 36819 }, { "epoch": 0.6520651533867275, "grad_norm": 0.6385864615440369, "learning_rate": 8.55737676093679e-06, "loss": 0.0903, "step": 36820 }, { "epoch": 0.6520828629237561, "grad_norm": 0.8964331150054932, "learning_rate": 8.556599808141991e-06, "loss": 0.0868, "step": 36821 }, { "epoch": 0.6521005724607845, "grad_norm": 0.6832244992256165, "learning_rate": 8.555822876545329e-06, "loss": 0.0582, "step": 36822 }, { "epoch": 0.6521182819978129, "grad_norm": 0.4427814781665802, "learning_rate": 8.555045966149358e-06, "loss": 0.0477, "step": 36823 }, { "epoch": 0.6521359915348413, "grad_norm": 0.8777073621749878, "learning_rate": 8.554269076956637e-06, "loss": 0.0899, "step": 36824 }, { "epoch": 0.6521537010718698, "grad_norm": 0.4049893617630005, "learning_rate": 8.55349220896971e-06, "loss": 0.0694, "step": 36825 }, { "epoch": 0.6521714106088982, "grad_norm": 0.5766319036483765, "learning_rate": 8.552715362191144e-06, "loss": 0.057, "step": 36826 }, { "epoch": 0.6521891201459266, "grad_norm": 0.6572766900062561, "learning_rate": 8.551938536623503e-06, "loss": 0.0699, "step": 36827 }, { "epoch": 0.652206829682955, "grad_norm": 0.7834053039550781, "learning_rate": 8.551161732269323e-06, "loss": 0.0618, "step": 36828 }, { "epoch": 0.6522245392199835, "grad_norm": 0.5815885066986084, "learning_rate": 8.55038494913117e-06, "loss": 0.0332, "step": 36829 }, { "epoch": 0.6522422487570119, "grad_norm": 1.213556170463562, "learning_rate": 8.549608187211597e-06, "loss": 0.0952, "step": 36830 }, { "epoch": 0.6522599582940403, "grad_norm": 0.7632006406784058, "learning_rate": 8.548831446513167e-06, "loss": 0.055, "step": 36831 }, { "epoch": 0.6522776678310688, "grad_norm": 0.5407592058181763, "learning_rate": 8.548054727038422e-06, "loss": 0.0556, "step": 36832 }, { "epoch": 0.6522953773680972, "grad_norm": 0.504469633102417, "learning_rate": 8.547278028789925e-06, "loss": 0.0572, "step": 36833 }, { "epoch": 0.6523130869051256, "grad_norm": 0.8369480967521667, "learning_rate": 8.546501351770231e-06, "loss": 0.0724, "step": 36834 }, { "epoch": 0.652330796442154, "grad_norm": 1.0072648525238037, "learning_rate": 8.545724695981896e-06, "loss": 0.0829, "step": 36835 }, { "epoch": 0.6523485059791825, "grad_norm": 0.812698483467102, "learning_rate": 8.544948061427473e-06, "loss": 0.0658, "step": 36836 }, { "epoch": 0.6523662155162109, "grad_norm": 0.8648325800895691, "learning_rate": 8.544171448109517e-06, "loss": 0.1134, "step": 36837 }, { "epoch": 0.6523839250532393, "grad_norm": 0.6592954397201538, "learning_rate": 8.54339485603059e-06, "loss": 0.0688, "step": 36838 }, { "epoch": 0.6524016345902677, "grad_norm": 0.6045688986778259, "learning_rate": 8.542618285193236e-06, "loss": 0.0668, "step": 36839 }, { "epoch": 0.6524193441272962, "grad_norm": 0.6189772486686707, "learning_rate": 8.541841735600016e-06, "loss": 0.0589, "step": 36840 }, { "epoch": 0.6524370536643246, "grad_norm": 0.36127379536628723, "learning_rate": 8.541065207253488e-06, "loss": 0.046, "step": 36841 }, { "epoch": 0.652454763201353, "grad_norm": 0.511059045791626, "learning_rate": 8.540288700156195e-06, "loss": 0.0447, "step": 36842 }, { "epoch": 0.6524724727383814, "grad_norm": 0.5784023404121399, "learning_rate": 8.5395122143107e-06, "loss": 0.0695, "step": 36843 }, { "epoch": 0.6524901822754099, "grad_norm": 1.13922917842865, "learning_rate": 8.538735749719555e-06, "loss": 0.0795, "step": 36844 }, { "epoch": 0.6525078918124383, "grad_norm": 0.6710148453712463, "learning_rate": 8.537959306385318e-06, "loss": 0.0639, "step": 36845 }, { "epoch": 0.6525256013494667, "grad_norm": 0.42061716318130493, "learning_rate": 8.537182884310541e-06, "loss": 0.0552, "step": 36846 }, { "epoch": 0.6525433108864952, "grad_norm": 0.7130745649337769, "learning_rate": 8.536406483497777e-06, "loss": 0.0777, "step": 36847 }, { "epoch": 0.6525610204235236, "grad_norm": 0.6964549422264099, "learning_rate": 8.535630103949583e-06, "loss": 0.055, "step": 36848 }, { "epoch": 0.652578729960552, "grad_norm": 0.5841548442840576, "learning_rate": 8.53485374566852e-06, "loss": 0.0687, "step": 36849 }, { "epoch": 0.6525964394975804, "grad_norm": 0.5764865279197693, "learning_rate": 8.534077408657126e-06, "loss": 0.0655, "step": 36850 }, { "epoch": 0.6526141490346089, "grad_norm": 0.7515311241149902, "learning_rate": 8.533301092917962e-06, "loss": 0.0742, "step": 36851 }, { "epoch": 0.6526318585716373, "grad_norm": 0.8900282382965088, "learning_rate": 8.532524798453592e-06, "loss": 0.0678, "step": 36852 }, { "epoch": 0.6526495681086657, "grad_norm": 0.3383643925189972, "learning_rate": 8.531748525266555e-06, "loss": 0.0726, "step": 36853 }, { "epoch": 0.6526672776456941, "grad_norm": 0.6546615958213806, "learning_rate": 8.530972273359403e-06, "loss": 0.0566, "step": 36854 }, { "epoch": 0.6526849871827226, "grad_norm": 0.5292069315910339, "learning_rate": 8.530196042734705e-06, "loss": 0.0425, "step": 36855 }, { "epoch": 0.652702696719751, "grad_norm": 0.854772686958313, "learning_rate": 8.529419833395016e-06, "loss": 0.0703, "step": 36856 }, { "epoch": 0.6527204062567794, "grad_norm": 0.45618823170661926, "learning_rate": 8.528643645342872e-06, "loss": 0.0589, "step": 36857 }, { "epoch": 0.6527381157938078, "grad_norm": 0.32485970854759216, "learning_rate": 8.527867478580837e-06, "loss": 0.0516, "step": 36858 }, { "epoch": 0.6527558253308363, "grad_norm": 0.6617200970649719, "learning_rate": 8.52709133311147e-06, "loss": 0.0876, "step": 36859 }, { "epoch": 0.6527735348678647, "grad_norm": 0.7742047309875488, "learning_rate": 8.526315208937312e-06, "loss": 0.065, "step": 36860 }, { "epoch": 0.6527912444048931, "grad_norm": 0.9584262371063232, "learning_rate": 8.52553910606092e-06, "loss": 0.0641, "step": 36861 }, { "epoch": 0.6528089539419216, "grad_norm": 0.8625147342681885, "learning_rate": 8.524763024484852e-06, "loss": 0.073, "step": 36862 }, { "epoch": 0.65282666347895, "grad_norm": 1.0145301818847656, "learning_rate": 8.523986964211656e-06, "loss": 0.0637, "step": 36863 }, { "epoch": 0.6528443730159784, "grad_norm": 0.9622951149940491, "learning_rate": 8.523210925243891e-06, "loss": 0.0745, "step": 36864 }, { "epoch": 0.6528620825530068, "grad_norm": 0.7142543196678162, "learning_rate": 8.522434907584105e-06, "loss": 0.0663, "step": 36865 }, { "epoch": 0.6528797920900353, "grad_norm": 0.44100135564804077, "learning_rate": 8.52165891123486e-06, "loss": 0.0827, "step": 36866 }, { "epoch": 0.6528975016270637, "grad_norm": 0.5953509211540222, "learning_rate": 8.520882936198695e-06, "loss": 0.0725, "step": 36867 }, { "epoch": 0.6529152111640921, "grad_norm": 0.7027291655540466, "learning_rate": 8.52010698247817e-06, "loss": 0.0632, "step": 36868 }, { "epoch": 0.6529329207011205, "grad_norm": 0.7832081913948059, "learning_rate": 8.519331050075839e-06, "loss": 0.0779, "step": 36869 }, { "epoch": 0.652950630238149, "grad_norm": 0.5112167000770569, "learning_rate": 8.518555138994258e-06, "loss": 0.0719, "step": 36870 }, { "epoch": 0.6529683397751774, "grad_norm": 0.11587175726890564, "learning_rate": 8.51777924923597e-06, "loss": 0.0379, "step": 36871 }, { "epoch": 0.6529860493122058, "grad_norm": 0.6842992305755615, "learning_rate": 8.517003380803533e-06, "loss": 0.0794, "step": 36872 }, { "epoch": 0.6530037588492343, "grad_norm": 0.7008516192436218, "learning_rate": 8.516227533699497e-06, "loss": 0.0451, "step": 36873 }, { "epoch": 0.6530214683862627, "grad_norm": 0.5912413001060486, "learning_rate": 8.515451707926418e-06, "loss": 0.0707, "step": 36874 }, { "epoch": 0.6530391779232911, "grad_norm": 0.9130915403366089, "learning_rate": 8.514675903486844e-06, "loss": 0.0856, "step": 36875 }, { "epoch": 0.6530568874603195, "grad_norm": 0.37954992055892944, "learning_rate": 8.513900120383334e-06, "loss": 0.0372, "step": 36876 }, { "epoch": 0.653074596997348, "grad_norm": 0.7034710645675659, "learning_rate": 8.513124358618441e-06, "loss": 0.0685, "step": 36877 }, { "epoch": 0.6530923065343764, "grad_norm": 0.5538227558135986, "learning_rate": 8.512348618194709e-06, "loss": 0.0642, "step": 36878 }, { "epoch": 0.6531100160714048, "grad_norm": 1.0314342975616455, "learning_rate": 8.511572899114693e-06, "loss": 0.0765, "step": 36879 }, { "epoch": 0.6531277256084332, "grad_norm": 0.8714990615844727, "learning_rate": 8.510797201380952e-06, "loss": 0.0851, "step": 36880 }, { "epoch": 0.6531454351454618, "grad_norm": 0.6697630882263184, "learning_rate": 8.510021524996024e-06, "loss": 0.0633, "step": 36881 }, { "epoch": 0.6531631446824901, "grad_norm": 0.5068016648292542, "learning_rate": 8.50924586996247e-06, "loss": 0.0527, "step": 36882 }, { "epoch": 0.6531808542195185, "grad_norm": 0.4605703055858612, "learning_rate": 8.508470236282834e-06, "loss": 0.0495, "step": 36883 }, { "epoch": 0.653198563756547, "grad_norm": 0.21289792656898499, "learning_rate": 8.507694623959685e-06, "loss": 0.0562, "step": 36884 }, { "epoch": 0.6532162732935755, "grad_norm": 0.5751837491989136, "learning_rate": 8.506919032995562e-06, "loss": 0.0739, "step": 36885 }, { "epoch": 0.6532339828306039, "grad_norm": 0.39923256635665894, "learning_rate": 8.506143463393015e-06, "loss": 0.0546, "step": 36886 }, { "epoch": 0.6532516923676323, "grad_norm": 0.6675534844398499, "learning_rate": 8.505367915154601e-06, "loss": 0.0905, "step": 36887 }, { "epoch": 0.6532694019046608, "grad_norm": 0.4203954339027405, "learning_rate": 8.504592388282876e-06, "loss": 0.0274, "step": 36888 }, { "epoch": 0.6532871114416892, "grad_norm": 0.7626606225967407, "learning_rate": 8.503816882780379e-06, "loss": 0.0607, "step": 36889 }, { "epoch": 0.6533048209787176, "grad_norm": 0.5356869697570801, "learning_rate": 8.503041398649665e-06, "loss": 0.0683, "step": 36890 }, { "epoch": 0.653322530515746, "grad_norm": 0.7204394340515137, "learning_rate": 8.502265935893289e-06, "loss": 0.0437, "step": 36891 }, { "epoch": 0.6533402400527745, "grad_norm": 0.5405391454696655, "learning_rate": 8.501490494513802e-06, "loss": 0.0608, "step": 36892 }, { "epoch": 0.6533579495898029, "grad_norm": 0.26282835006713867, "learning_rate": 8.500715074513753e-06, "loss": 0.0534, "step": 36893 }, { "epoch": 0.6533756591268313, "grad_norm": 0.48630043864250183, "learning_rate": 8.499939675895695e-06, "loss": 0.0487, "step": 36894 }, { "epoch": 0.6533933686638597, "grad_norm": 0.545894980430603, "learning_rate": 8.499164298662182e-06, "loss": 0.0742, "step": 36895 }, { "epoch": 0.6534110782008882, "grad_norm": 0.7854971289634705, "learning_rate": 8.498388942815759e-06, "loss": 0.0744, "step": 36896 }, { "epoch": 0.6534287877379166, "grad_norm": 0.6307492256164551, "learning_rate": 8.497613608358973e-06, "loss": 0.0669, "step": 36897 }, { "epoch": 0.653446497274945, "grad_norm": 0.5501856803894043, "learning_rate": 8.496838295294392e-06, "loss": 0.0463, "step": 36898 }, { "epoch": 0.6534642068119734, "grad_norm": 0.5260812044143677, "learning_rate": 8.496063003624544e-06, "loss": 0.0603, "step": 36899 }, { "epoch": 0.6534819163490019, "grad_norm": 0.508743166923523, "learning_rate": 8.495287733351996e-06, "loss": 0.0583, "step": 36900 }, { "epoch": 0.6534996258860303, "grad_norm": 0.38491177558898926, "learning_rate": 8.49451248447929e-06, "loss": 0.0665, "step": 36901 }, { "epoch": 0.6535173354230587, "grad_norm": 0.2927486002445221, "learning_rate": 8.493737257008982e-06, "loss": 0.0332, "step": 36902 }, { "epoch": 0.6535350449600872, "grad_norm": 0.6050020456314087, "learning_rate": 8.49296205094362e-06, "loss": 0.0414, "step": 36903 }, { "epoch": 0.6535527544971156, "grad_norm": 0.4918532073497772, "learning_rate": 8.492186866285754e-06, "loss": 0.0728, "step": 36904 }, { "epoch": 0.653570464034144, "grad_norm": 0.21493737399578094, "learning_rate": 8.491411703037939e-06, "loss": 0.0589, "step": 36905 }, { "epoch": 0.6535881735711724, "grad_norm": 0.6351008415222168, "learning_rate": 8.49063656120272e-06, "loss": 0.0418, "step": 36906 }, { "epoch": 0.6536058831082009, "grad_norm": 0.45315417647361755, "learning_rate": 8.489861440782645e-06, "loss": 0.0239, "step": 36907 }, { "epoch": 0.6536235926452293, "grad_norm": 0.36188703775405884, "learning_rate": 8.489086341780267e-06, "loss": 0.0393, "step": 36908 }, { "epoch": 0.6536413021822577, "grad_norm": 0.6948469877243042, "learning_rate": 8.488311264198145e-06, "loss": 0.0961, "step": 36909 }, { "epoch": 0.6536590117192861, "grad_norm": 0.6105656623840332, "learning_rate": 8.487536208038812e-06, "loss": 0.115, "step": 36910 }, { "epoch": 0.6536767212563146, "grad_norm": 0.7644152641296387, "learning_rate": 8.486761173304826e-06, "loss": 0.0616, "step": 36911 }, { "epoch": 0.653694430793343, "grad_norm": 0.29565495252609253, "learning_rate": 8.485986159998738e-06, "loss": 0.0832, "step": 36912 }, { "epoch": 0.6537121403303714, "grad_norm": 0.3057536780834198, "learning_rate": 8.485211168123095e-06, "loss": 0.0584, "step": 36913 }, { "epoch": 0.6537298498673998, "grad_norm": 0.7594771981239319, "learning_rate": 8.48443619768045e-06, "loss": 0.0716, "step": 36914 }, { "epoch": 0.6537475594044283, "grad_norm": 0.6017610430717468, "learning_rate": 8.483661248673352e-06, "loss": 0.0608, "step": 36915 }, { "epoch": 0.6537652689414567, "grad_norm": 0.7258775234222412, "learning_rate": 8.482886321104356e-06, "loss": 0.0551, "step": 36916 }, { "epoch": 0.6537829784784851, "grad_norm": 0.5876242518424988, "learning_rate": 8.482111414975998e-06, "loss": 0.0481, "step": 36917 }, { "epoch": 0.6538006880155136, "grad_norm": 0.5179548859596252, "learning_rate": 8.481336530290831e-06, "loss": 0.0707, "step": 36918 }, { "epoch": 0.653818397552542, "grad_norm": 0.6308438777923584, "learning_rate": 8.48056166705141e-06, "loss": 0.0838, "step": 36919 }, { "epoch": 0.6538361070895704, "grad_norm": 0.8975033760070801, "learning_rate": 8.479786825260282e-06, "loss": 0.0958, "step": 36920 }, { "epoch": 0.6538538166265988, "grad_norm": 0.4732470214366913, "learning_rate": 8.479012004919995e-06, "loss": 0.0771, "step": 36921 }, { "epoch": 0.6538715261636273, "grad_norm": 0.2738474905490875, "learning_rate": 8.478237206033098e-06, "loss": 0.063, "step": 36922 }, { "epoch": 0.6538892357006557, "grad_norm": 0.80030357837677, "learning_rate": 8.47746242860215e-06, "loss": 0.0862, "step": 36923 }, { "epoch": 0.6539069452376841, "grad_norm": 0.37718015909194946, "learning_rate": 8.476687672629682e-06, "loss": 0.0414, "step": 36924 }, { "epoch": 0.6539246547747125, "grad_norm": 0.5991491675376892, "learning_rate": 8.475912938118253e-06, "loss": 0.0556, "step": 36925 }, { "epoch": 0.653942364311741, "grad_norm": 1.2883985042572021, "learning_rate": 8.47513822507041e-06, "loss": 0.0721, "step": 36926 }, { "epoch": 0.6539600738487694, "grad_norm": 0.398732453584671, "learning_rate": 8.47436353348871e-06, "loss": 0.0708, "step": 36927 }, { "epoch": 0.6539777833857978, "grad_norm": 1.30729079246521, "learning_rate": 8.473588863375687e-06, "loss": 0.1125, "step": 36928 }, { "epoch": 0.6539954929228262, "grad_norm": 0.8542835116386414, "learning_rate": 8.472814214733895e-06, "loss": 0.045, "step": 36929 }, { "epoch": 0.6540132024598547, "grad_norm": 0.7335754632949829, "learning_rate": 8.472039587565886e-06, "loss": 0.0649, "step": 36930 }, { "epoch": 0.6540309119968831, "grad_norm": 0.7362516522407532, "learning_rate": 8.471264981874205e-06, "loss": 0.0517, "step": 36931 }, { "epoch": 0.6540486215339115, "grad_norm": 0.6738673448562622, "learning_rate": 8.470490397661402e-06, "loss": 0.0854, "step": 36932 }, { "epoch": 0.65406633107094, "grad_norm": 0.9874374270439148, "learning_rate": 8.469715834930028e-06, "loss": 0.0609, "step": 36933 }, { "epoch": 0.6540840406079684, "grad_norm": 0.8547676205635071, "learning_rate": 8.468941293682633e-06, "loss": 0.1008, "step": 36934 }, { "epoch": 0.6541017501449968, "grad_norm": 0.7579083442687988, "learning_rate": 8.468166773921752e-06, "loss": 0.0646, "step": 36935 }, { "epoch": 0.6541194596820252, "grad_norm": 0.5204967260360718, "learning_rate": 8.467392275649943e-06, "loss": 0.0709, "step": 36936 }, { "epoch": 0.6541371692190537, "grad_norm": 0.5191980004310608, "learning_rate": 8.466617798869759e-06, "loss": 0.0541, "step": 36937 }, { "epoch": 0.6541548787560821, "grad_norm": 0.6435490250587463, "learning_rate": 8.465843343583737e-06, "loss": 0.0636, "step": 36938 }, { "epoch": 0.6541725882931105, "grad_norm": 0.8182817101478577, "learning_rate": 8.465068909794428e-06, "loss": 0.0548, "step": 36939 }, { "epoch": 0.6541902978301389, "grad_norm": 0.5136518478393555, "learning_rate": 8.464294497504381e-06, "loss": 0.0456, "step": 36940 }, { "epoch": 0.6542080073671674, "grad_norm": 0.5182197690010071, "learning_rate": 8.463520106716145e-06, "loss": 0.0532, "step": 36941 }, { "epoch": 0.6542257169041958, "grad_norm": 0.5486745238304138, "learning_rate": 8.462745737432267e-06, "loss": 0.0513, "step": 36942 }, { "epoch": 0.6542434264412242, "grad_norm": 0.7458455562591553, "learning_rate": 8.461971389655293e-06, "loss": 0.0531, "step": 36943 }, { "epoch": 0.6542611359782526, "grad_norm": 0.20374652743339539, "learning_rate": 8.461197063387778e-06, "loss": 0.0394, "step": 36944 }, { "epoch": 0.6542788455152811, "grad_norm": 0.31398314237594604, "learning_rate": 8.460422758632259e-06, "loss": 0.0453, "step": 36945 }, { "epoch": 0.6542965550523095, "grad_norm": 0.8943377137184143, "learning_rate": 8.459648475391286e-06, "loss": 0.0808, "step": 36946 }, { "epoch": 0.654314264589338, "grad_norm": 0.7605465650558472, "learning_rate": 8.458874213667409e-06, "loss": 0.0713, "step": 36947 }, { "epoch": 0.6543319741263665, "grad_norm": 0.7872578501701355, "learning_rate": 8.458099973463179e-06, "loss": 0.0693, "step": 36948 }, { "epoch": 0.6543496836633949, "grad_norm": 0.6468203067779541, "learning_rate": 8.457325754781128e-06, "loss": 0.059, "step": 36949 }, { "epoch": 0.6543673932004233, "grad_norm": 0.6894772052764893, "learning_rate": 8.456551557623818e-06, "loss": 0.041, "step": 36950 }, { "epoch": 0.6543851027374517, "grad_norm": 0.902363657951355, "learning_rate": 8.455777381993799e-06, "loss": 0.0459, "step": 36951 }, { "epoch": 0.6544028122744802, "grad_norm": 0.37338143587112427, "learning_rate": 8.455003227893604e-06, "loss": 0.0288, "step": 36952 }, { "epoch": 0.6544205218115086, "grad_norm": 0.6784857511520386, "learning_rate": 8.454229095325785e-06, "loss": 0.0737, "step": 36953 }, { "epoch": 0.654438231348537, "grad_norm": 1.0287755727767944, "learning_rate": 8.453454984292892e-06, "loss": 0.084, "step": 36954 }, { "epoch": 0.6544559408855654, "grad_norm": 0.30400708317756653, "learning_rate": 8.452680894797476e-06, "loss": 0.0442, "step": 36955 }, { "epoch": 0.6544736504225939, "grad_norm": 0.7621248960494995, "learning_rate": 8.45190682684207e-06, "loss": 0.1012, "step": 36956 }, { "epoch": 0.6544913599596223, "grad_norm": 0.7792038917541504, "learning_rate": 8.451132780429229e-06, "loss": 0.0437, "step": 36957 }, { "epoch": 0.6545090694966507, "grad_norm": 0.39903900027275085, "learning_rate": 8.450358755561498e-06, "loss": 0.0557, "step": 36958 }, { "epoch": 0.6545267790336791, "grad_norm": 0.6701374650001526, "learning_rate": 8.449584752241427e-06, "loss": 0.0664, "step": 36959 }, { "epoch": 0.6545444885707076, "grad_norm": 0.51346355676651, "learning_rate": 8.448810770471557e-06, "loss": 0.0704, "step": 36960 }, { "epoch": 0.654562198107736, "grad_norm": 0.7861031889915466, "learning_rate": 8.448036810254437e-06, "loss": 0.0599, "step": 36961 }, { "epoch": 0.6545799076447644, "grad_norm": 0.550957441329956, "learning_rate": 8.44726287159262e-06, "loss": 0.05, "step": 36962 }, { "epoch": 0.6545976171817929, "grad_norm": 0.6256656646728516, "learning_rate": 8.44648895448864e-06, "loss": 0.0541, "step": 36963 }, { "epoch": 0.6546153267188213, "grad_norm": 0.46457645297050476, "learning_rate": 8.445715058945049e-06, "loss": 0.0402, "step": 36964 }, { "epoch": 0.6546330362558497, "grad_norm": 0.9742346405982971, "learning_rate": 8.44494118496439e-06, "loss": 0.0623, "step": 36965 }, { "epoch": 0.6546507457928781, "grad_norm": 0.6090449094772339, "learning_rate": 8.44416733254922e-06, "loss": 0.0675, "step": 36966 }, { "epoch": 0.6546684553299066, "grad_norm": 0.5518136620521545, "learning_rate": 8.443393501702069e-06, "loss": 0.068, "step": 36967 }, { "epoch": 0.654686164866935, "grad_norm": 0.38334763050079346, "learning_rate": 8.44261969242549e-06, "loss": 0.0419, "step": 36968 }, { "epoch": 0.6547038744039634, "grad_norm": 0.4307919144630432, "learning_rate": 8.44184590472203e-06, "loss": 0.0479, "step": 36969 }, { "epoch": 0.6547215839409918, "grad_norm": 0.535832405090332, "learning_rate": 8.44107213859423e-06, "loss": 0.0496, "step": 36970 }, { "epoch": 0.6547392934780203, "grad_norm": 0.9715706706047058, "learning_rate": 8.440298394044646e-06, "loss": 0.1078, "step": 36971 }, { "epoch": 0.6547570030150487, "grad_norm": 0.7883813977241516, "learning_rate": 8.439524671075812e-06, "loss": 0.0732, "step": 36972 }, { "epoch": 0.6547747125520771, "grad_norm": 0.8870468139648438, "learning_rate": 8.438750969690287e-06, "loss": 0.0717, "step": 36973 }, { "epoch": 0.6547924220891055, "grad_norm": 0.30187955498695374, "learning_rate": 8.437977289890599e-06, "loss": 0.0583, "step": 36974 }, { "epoch": 0.654810131626134, "grad_norm": 0.4878515899181366, "learning_rate": 8.437203631679302e-06, "loss": 0.0572, "step": 36975 }, { "epoch": 0.6548278411631624, "grad_norm": 0.52105313539505, "learning_rate": 8.43642999505895e-06, "loss": 0.0615, "step": 36976 }, { "epoch": 0.6548455507001908, "grad_norm": 0.8578776717185974, "learning_rate": 8.435656380032071e-06, "loss": 0.0561, "step": 36977 }, { "epoch": 0.6548632602372193, "grad_norm": 0.4254596531391144, "learning_rate": 8.434882786601214e-06, "loss": 0.0499, "step": 36978 }, { "epoch": 0.6548809697742477, "grad_norm": 0.7972310185432434, "learning_rate": 8.434109214768933e-06, "loss": 0.0609, "step": 36979 }, { "epoch": 0.6548986793112761, "grad_norm": 0.7456960082054138, "learning_rate": 8.433335664537776e-06, "loss": 0.072, "step": 36980 }, { "epoch": 0.6549163888483045, "grad_norm": 0.9585158228874207, "learning_rate": 8.432562135910274e-06, "loss": 0.0822, "step": 36981 }, { "epoch": 0.654934098385333, "grad_norm": 0.38505250215530396, "learning_rate": 8.431788628888978e-06, "loss": 0.0508, "step": 36982 }, { "epoch": 0.6549518079223614, "grad_norm": 0.6347617506980896, "learning_rate": 8.43101514347644e-06, "loss": 0.047, "step": 36983 }, { "epoch": 0.6549695174593898, "grad_norm": 0.44343721866607666, "learning_rate": 8.430241679675191e-06, "loss": 0.068, "step": 36984 }, { "epoch": 0.6549872269964182, "grad_norm": 0.42293116450309753, "learning_rate": 8.429468237487784e-06, "loss": 0.0698, "step": 36985 }, { "epoch": 0.6550049365334467, "grad_norm": 0.732912540435791, "learning_rate": 8.42869481691676e-06, "loss": 0.0858, "step": 36986 }, { "epoch": 0.6550226460704751, "grad_norm": 0.4707663655281067, "learning_rate": 8.427921417964666e-06, "loss": 0.0557, "step": 36987 }, { "epoch": 0.6550403556075035, "grad_norm": 0.5841072201728821, "learning_rate": 8.427148040634046e-06, "loss": 0.0527, "step": 36988 }, { "epoch": 0.6550580651445319, "grad_norm": 0.6190894842147827, "learning_rate": 8.426374684927443e-06, "loss": 0.0518, "step": 36989 }, { "epoch": 0.6550757746815604, "grad_norm": 1.209354281425476, "learning_rate": 8.425601350847403e-06, "loss": 0.0737, "step": 36990 }, { "epoch": 0.6550934842185888, "grad_norm": 0.3847704231739044, "learning_rate": 8.424828038396477e-06, "loss": 0.0523, "step": 36991 }, { "epoch": 0.6551111937556172, "grad_norm": 0.5553443431854248, "learning_rate": 8.424054747577193e-06, "loss": 0.0736, "step": 36992 }, { "epoch": 0.6551289032926457, "grad_norm": 0.19126640260219574, "learning_rate": 8.423281478392107e-06, "loss": 0.0722, "step": 36993 }, { "epoch": 0.6551466128296741, "grad_norm": 0.6014506220817566, "learning_rate": 8.422508230843766e-06, "loss": 0.0575, "step": 36994 }, { "epoch": 0.6551643223667025, "grad_norm": 0.46507978439331055, "learning_rate": 8.421735004934697e-06, "loss": 0.0255, "step": 36995 }, { "epoch": 0.6551820319037309, "grad_norm": 1.1345233917236328, "learning_rate": 8.420961800667458e-06, "loss": 0.0596, "step": 36996 }, { "epoch": 0.6551997414407594, "grad_norm": 0.5308893918991089, "learning_rate": 8.420188618044588e-06, "loss": 0.0475, "step": 36997 }, { "epoch": 0.6552174509777878, "grad_norm": 0.609786868095398, "learning_rate": 8.419415457068633e-06, "loss": 0.0764, "step": 36998 }, { "epoch": 0.6552351605148162, "grad_norm": 0.3242913782596588, "learning_rate": 8.418642317742136e-06, "loss": 0.0654, "step": 36999 }, { "epoch": 0.6552528700518446, "grad_norm": 0.6685709357261658, "learning_rate": 8.417869200067637e-06, "loss": 0.0559, "step": 37000 }, { "epoch": 0.6552705795888731, "grad_norm": 0.6050053834915161, "learning_rate": 8.417096104047693e-06, "loss": 0.0701, "step": 37001 }, { "epoch": 0.6552882891259015, "grad_norm": 0.618411123752594, "learning_rate": 8.416323029684828e-06, "loss": 0.065, "step": 37002 }, { "epoch": 0.6553059986629299, "grad_norm": 0.5784531235694885, "learning_rate": 8.415549976981596e-06, "loss": 0.0529, "step": 37003 }, { "epoch": 0.6553237081999583, "grad_norm": 0.3122718036174774, "learning_rate": 8.414776945940536e-06, "loss": 0.0378, "step": 37004 }, { "epoch": 0.6553414177369868, "grad_norm": 0.41179582476615906, "learning_rate": 8.414003936564202e-06, "loss": 0.0385, "step": 37005 }, { "epoch": 0.6553591272740152, "grad_norm": 0.5776560306549072, "learning_rate": 8.413230948855123e-06, "loss": 0.0501, "step": 37006 }, { "epoch": 0.6553768368110436, "grad_norm": 0.6887215971946716, "learning_rate": 8.41245798281584e-06, "loss": 0.0536, "step": 37007 }, { "epoch": 0.6553945463480721, "grad_norm": 0.5811153054237366, "learning_rate": 8.411685038448918e-06, "loss": 0.0585, "step": 37008 }, { "epoch": 0.6554122558851005, "grad_norm": 0.37313973903656006, "learning_rate": 8.41091211575688e-06, "loss": 0.0517, "step": 37009 }, { "epoch": 0.655429965422129, "grad_norm": 0.669304370880127, "learning_rate": 8.410139214742274e-06, "loss": 0.0641, "step": 37010 }, { "epoch": 0.6554476749591573, "grad_norm": 0.6576142311096191, "learning_rate": 8.409366335407642e-06, "loss": 0.0666, "step": 37011 }, { "epoch": 0.6554653844961859, "grad_norm": 0.63157057762146, "learning_rate": 8.408593477755536e-06, "loss": 0.0393, "step": 37012 }, { "epoch": 0.6554830940332143, "grad_norm": 0.509077787399292, "learning_rate": 8.407820641788484e-06, "loss": 0.065, "step": 37013 }, { "epoch": 0.6555008035702427, "grad_norm": 0.7153143882751465, "learning_rate": 8.407047827509036e-06, "loss": 0.0435, "step": 37014 }, { "epoch": 0.655518513107271, "grad_norm": 0.7583708763122559, "learning_rate": 8.406275034919732e-06, "loss": 0.0645, "step": 37015 }, { "epoch": 0.6555362226442996, "grad_norm": 0.4185650050640106, "learning_rate": 8.405502264023119e-06, "loss": 0.0742, "step": 37016 }, { "epoch": 0.655553932181328, "grad_norm": 0.18631356954574585, "learning_rate": 8.404729514821735e-06, "loss": 0.0492, "step": 37017 }, { "epoch": 0.6555716417183564, "grad_norm": 0.6646203398704529, "learning_rate": 8.403956787318125e-06, "loss": 0.0379, "step": 37018 }, { "epoch": 0.6555893512553848, "grad_norm": 0.28180474042892456, "learning_rate": 8.403184081514833e-06, "loss": 0.0413, "step": 37019 }, { "epoch": 0.6556070607924133, "grad_norm": 0.7124665379524231, "learning_rate": 8.402411397414396e-06, "loss": 0.0877, "step": 37020 }, { "epoch": 0.6556247703294417, "grad_norm": 0.6136631965637207, "learning_rate": 8.401638735019355e-06, "loss": 0.0601, "step": 37021 }, { "epoch": 0.6556424798664701, "grad_norm": 0.5370494723320007, "learning_rate": 8.400866094332257e-06, "loss": 0.0762, "step": 37022 }, { "epoch": 0.6556601894034986, "grad_norm": 0.7102327942848206, "learning_rate": 8.400093475355648e-06, "loss": 0.0551, "step": 37023 }, { "epoch": 0.655677898940527, "grad_norm": 1.0715349912643433, "learning_rate": 8.399320878092057e-06, "loss": 0.0964, "step": 37024 }, { "epoch": 0.6556956084775554, "grad_norm": 0.6743018627166748, "learning_rate": 8.398548302544032e-06, "loss": 0.0705, "step": 37025 }, { "epoch": 0.6557133180145838, "grad_norm": 1.1245532035827637, "learning_rate": 8.397775748714117e-06, "loss": 0.0561, "step": 37026 }, { "epoch": 0.6557310275516123, "grad_norm": 0.7617361545562744, "learning_rate": 8.397003216604852e-06, "loss": 0.0806, "step": 37027 }, { "epoch": 0.6557487370886407, "grad_norm": 0.5361914038658142, "learning_rate": 8.396230706218776e-06, "loss": 0.0577, "step": 37028 }, { "epoch": 0.6557664466256691, "grad_norm": 0.6790100932121277, "learning_rate": 8.395458217558439e-06, "loss": 0.0736, "step": 37029 }, { "epoch": 0.6557841561626975, "grad_norm": 0.6647942662239075, "learning_rate": 8.394685750626376e-06, "loss": 0.0648, "step": 37030 }, { "epoch": 0.655801865699726, "grad_norm": 0.5261484980583191, "learning_rate": 8.393913305425126e-06, "loss": 0.069, "step": 37031 }, { "epoch": 0.6558195752367544, "grad_norm": 0.5012534260749817, "learning_rate": 8.393140881957232e-06, "loss": 0.0477, "step": 37032 }, { "epoch": 0.6558372847737828, "grad_norm": 0.7291191220283508, "learning_rate": 8.392368480225245e-06, "loss": 0.0568, "step": 37033 }, { "epoch": 0.6558549943108112, "grad_norm": 0.46167704463005066, "learning_rate": 8.391596100231687e-06, "loss": 0.041, "step": 37034 }, { "epoch": 0.6558727038478397, "grad_norm": 0.783312201499939, "learning_rate": 8.390823741979114e-06, "loss": 0.0895, "step": 37035 }, { "epoch": 0.6558904133848681, "grad_norm": 0.5389156937599182, "learning_rate": 8.390051405470053e-06, "loss": 0.0456, "step": 37036 }, { "epoch": 0.6559081229218965, "grad_norm": 0.6423693895339966, "learning_rate": 8.389279090707066e-06, "loss": 0.0578, "step": 37037 }, { "epoch": 0.655925832458925, "grad_norm": 0.6761901378631592, "learning_rate": 8.388506797692678e-06, "loss": 0.0789, "step": 37038 }, { "epoch": 0.6559435419959534, "grad_norm": 0.6230611205101013, "learning_rate": 8.387734526429434e-06, "loss": 0.0633, "step": 37039 }, { "epoch": 0.6559612515329818, "grad_norm": 0.2878911793231964, "learning_rate": 8.386962276919879e-06, "loss": 0.0638, "step": 37040 }, { "epoch": 0.6559789610700102, "grad_norm": 0.6172857880592346, "learning_rate": 8.386190049166545e-06, "loss": 0.0506, "step": 37041 }, { "epoch": 0.6559966706070387, "grad_norm": 0.6112169623374939, "learning_rate": 8.385417843171975e-06, "loss": 0.0509, "step": 37042 }, { "epoch": 0.6560143801440671, "grad_norm": 0.6870928406715393, "learning_rate": 8.384645658938713e-06, "loss": 0.0541, "step": 37043 }, { "epoch": 0.6560320896810955, "grad_norm": 0.890172004699707, "learning_rate": 8.3838734964693e-06, "loss": 0.0938, "step": 37044 }, { "epoch": 0.6560497992181239, "grad_norm": 0.5377448797225952, "learning_rate": 8.38310135576627e-06, "loss": 0.0625, "step": 37045 }, { "epoch": 0.6560675087551524, "grad_norm": 0.5911670923233032, "learning_rate": 8.382329236832167e-06, "loss": 0.0717, "step": 37046 }, { "epoch": 0.6560852182921808, "grad_norm": 0.5843692421913147, "learning_rate": 8.38155713966954e-06, "loss": 0.0935, "step": 37047 }, { "epoch": 0.6561029278292092, "grad_norm": 0.43604576587677, "learning_rate": 8.380785064280912e-06, "loss": 0.0421, "step": 37048 }, { "epoch": 0.6561206373662376, "grad_norm": 0.5870685577392578, "learning_rate": 8.380013010668832e-06, "loss": 0.0721, "step": 37049 }, { "epoch": 0.6561383469032661, "grad_norm": 0.3479350805282593, "learning_rate": 8.379240978835843e-06, "loss": 0.0439, "step": 37050 }, { "epoch": 0.6561560564402945, "grad_norm": 0.3461187779903412, "learning_rate": 8.378468968784483e-06, "loss": 0.052, "step": 37051 }, { "epoch": 0.6561737659773229, "grad_norm": 0.6585122346878052, "learning_rate": 8.377696980517288e-06, "loss": 0.0934, "step": 37052 }, { "epoch": 0.6561914755143514, "grad_norm": 0.8215641379356384, "learning_rate": 8.376925014036797e-06, "loss": 0.0656, "step": 37053 }, { "epoch": 0.6562091850513798, "grad_norm": 0.8102724552154541, "learning_rate": 8.376153069345555e-06, "loss": 0.0707, "step": 37054 }, { "epoch": 0.6562268945884082, "grad_norm": 0.5453897714614868, "learning_rate": 8.375381146446099e-06, "loss": 0.0461, "step": 37055 }, { "epoch": 0.6562446041254366, "grad_norm": 0.5109564065933228, "learning_rate": 8.37460924534097e-06, "loss": 0.0487, "step": 37056 }, { "epoch": 0.6562623136624651, "grad_norm": 0.7782869338989258, "learning_rate": 8.373837366032704e-06, "loss": 0.0485, "step": 37057 }, { "epoch": 0.6562800231994935, "grad_norm": 0.5386443138122559, "learning_rate": 8.373065508523852e-06, "loss": 0.0591, "step": 37058 }, { "epoch": 0.6562977327365219, "grad_norm": 0.7569150924682617, "learning_rate": 8.372293672816936e-06, "loss": 0.0525, "step": 37059 }, { "epoch": 0.6563154422735503, "grad_norm": 0.3862989544868469, "learning_rate": 8.371521858914502e-06, "loss": 0.0546, "step": 37060 }, { "epoch": 0.6563331518105788, "grad_norm": 0.6337381601333618, "learning_rate": 8.370750066819093e-06, "loss": 0.0668, "step": 37061 }, { "epoch": 0.6563508613476072, "grad_norm": 0.8213531374931335, "learning_rate": 8.369978296533253e-06, "loss": 0.09, "step": 37062 }, { "epoch": 0.6563685708846356, "grad_norm": 0.5971046090126038, "learning_rate": 8.369206548059505e-06, "loss": 0.0615, "step": 37063 }, { "epoch": 0.656386280421664, "grad_norm": 0.5279148817062378, "learning_rate": 8.368434821400398e-06, "loss": 0.0838, "step": 37064 }, { "epoch": 0.6564039899586925, "grad_norm": 0.552857518196106, "learning_rate": 8.367663116558469e-06, "loss": 0.0629, "step": 37065 }, { "epoch": 0.6564216994957209, "grad_norm": 0.6281730532646179, "learning_rate": 8.366891433536258e-06, "loss": 0.0802, "step": 37066 }, { "epoch": 0.6564394090327493, "grad_norm": 0.2600281834602356, "learning_rate": 8.366119772336303e-06, "loss": 0.0863, "step": 37067 }, { "epoch": 0.6564571185697778, "grad_norm": 0.9286947846412659, "learning_rate": 8.365348132961144e-06, "loss": 0.0883, "step": 37068 }, { "epoch": 0.6564748281068062, "grad_norm": 0.8078578114509583, "learning_rate": 8.364576515413323e-06, "loss": 0.0866, "step": 37069 }, { "epoch": 0.6564925376438346, "grad_norm": 0.3614117503166199, "learning_rate": 8.363804919695371e-06, "loss": 0.032, "step": 37070 }, { "epoch": 0.656510247180863, "grad_norm": 0.7864598631858826, "learning_rate": 8.363033345809828e-06, "loss": 0.0939, "step": 37071 }, { "epoch": 0.6565279567178915, "grad_norm": 0.826058030128479, "learning_rate": 8.36226179375924e-06, "loss": 0.0927, "step": 37072 }, { "epoch": 0.65654566625492, "grad_norm": 0.4977874457836151, "learning_rate": 8.361490263546125e-06, "loss": 0.0907, "step": 37073 }, { "epoch": 0.6565633757919483, "grad_norm": 0.5238510370254517, "learning_rate": 8.360718755173044e-06, "loss": 0.0758, "step": 37074 }, { "epoch": 0.6565810853289767, "grad_norm": 0.4702190160751343, "learning_rate": 8.359947268642528e-06, "loss": 0.0537, "step": 37075 }, { "epoch": 0.6565987948660053, "grad_norm": 0.6783443093299866, "learning_rate": 8.359175803957118e-06, "loss": 0.0837, "step": 37076 }, { "epoch": 0.6566165044030337, "grad_norm": 0.6701607704162598, "learning_rate": 8.358404361119343e-06, "loss": 0.0677, "step": 37077 }, { "epoch": 0.656634213940062, "grad_norm": 0.4164515733718872, "learning_rate": 8.357632940131745e-06, "loss": 0.0512, "step": 37078 }, { "epoch": 0.6566519234770904, "grad_norm": 0.5517247319221497, "learning_rate": 8.356861540996868e-06, "loss": 0.0376, "step": 37079 }, { "epoch": 0.656669633014119, "grad_norm": 0.2752014398574829, "learning_rate": 8.356090163717239e-06, "loss": 0.0426, "step": 37080 }, { "epoch": 0.6566873425511474, "grad_norm": 0.9625338315963745, "learning_rate": 8.355318808295402e-06, "loss": 0.0706, "step": 37081 }, { "epoch": 0.6567050520881758, "grad_norm": 0.19016383588314056, "learning_rate": 8.354547474733894e-06, "loss": 0.0548, "step": 37082 }, { "epoch": 0.6567227616252043, "grad_norm": 0.9898526668548584, "learning_rate": 8.353776163035252e-06, "loss": 0.094, "step": 37083 }, { "epoch": 0.6567404711622327, "grad_norm": 0.900757372379303, "learning_rate": 8.353004873202017e-06, "loss": 0.073, "step": 37084 }, { "epoch": 0.6567581806992611, "grad_norm": 0.3719945549964905, "learning_rate": 8.352233605236721e-06, "loss": 0.0891, "step": 37085 }, { "epoch": 0.6567758902362895, "grad_norm": 0.5438732504844666, "learning_rate": 8.35146235914191e-06, "loss": 0.0819, "step": 37086 }, { "epoch": 0.656793599773318, "grad_norm": 0.496798038482666, "learning_rate": 8.35069113492011e-06, "loss": 0.0461, "step": 37087 }, { "epoch": 0.6568113093103464, "grad_norm": 0.4711049497127533, "learning_rate": 8.349919932573864e-06, "loss": 0.0502, "step": 37088 }, { "epoch": 0.6568290188473748, "grad_norm": 0.6769430637359619, "learning_rate": 8.349148752105708e-06, "loss": 0.0489, "step": 37089 }, { "epoch": 0.6568467283844032, "grad_norm": 0.5197854042053223, "learning_rate": 8.348377593518187e-06, "loss": 0.0556, "step": 37090 }, { "epoch": 0.6568644379214317, "grad_norm": 0.4963702857494354, "learning_rate": 8.347606456813824e-06, "loss": 0.0611, "step": 37091 }, { "epoch": 0.6568821474584601, "grad_norm": 0.6470615267753601, "learning_rate": 8.346835341995163e-06, "loss": 0.0513, "step": 37092 }, { "epoch": 0.6568998569954885, "grad_norm": 0.5344755053520203, "learning_rate": 8.34606424906474e-06, "loss": 0.0677, "step": 37093 }, { "epoch": 0.6569175665325169, "grad_norm": 0.4384995102882385, "learning_rate": 8.345293178025094e-06, "loss": 0.067, "step": 37094 }, { "epoch": 0.6569352760695454, "grad_norm": 0.7763599157333374, "learning_rate": 8.34452212887876e-06, "loss": 0.1092, "step": 37095 }, { "epoch": 0.6569529856065738, "grad_norm": 0.5639346837997437, "learning_rate": 8.343751101628274e-06, "loss": 0.0617, "step": 37096 }, { "epoch": 0.6569706951436022, "grad_norm": 0.46259281039237976, "learning_rate": 8.342980096276182e-06, "loss": 0.1231, "step": 37097 }, { "epoch": 0.6569884046806307, "grad_norm": 0.8550696969032288, "learning_rate": 8.342209112825003e-06, "loss": 0.0759, "step": 37098 }, { "epoch": 0.6570061142176591, "grad_norm": 0.5533730983734131, "learning_rate": 8.341438151277286e-06, "loss": 0.0494, "step": 37099 }, { "epoch": 0.6570238237546875, "grad_norm": 0.6322047114372253, "learning_rate": 8.340667211635562e-06, "loss": 0.0566, "step": 37100 }, { "epoch": 0.6570415332917159, "grad_norm": 0.30135083198547363, "learning_rate": 8.339896293902374e-06, "loss": 0.0421, "step": 37101 }, { "epoch": 0.6570592428287444, "grad_norm": 0.580987274646759, "learning_rate": 8.339125398080243e-06, "loss": 0.0648, "step": 37102 }, { "epoch": 0.6570769523657728, "grad_norm": 0.5202217102050781, "learning_rate": 8.338354524171724e-06, "loss": 0.038, "step": 37103 }, { "epoch": 0.6570946619028012, "grad_norm": 0.6527076363563538, "learning_rate": 8.33758367217935e-06, "loss": 0.0622, "step": 37104 }, { "epoch": 0.6571123714398296, "grad_norm": 0.3059816062450409, "learning_rate": 8.336812842105642e-06, "loss": 0.0608, "step": 37105 }, { "epoch": 0.6571300809768581, "grad_norm": 0.684465229511261, "learning_rate": 8.336042033953149e-06, "loss": 0.0847, "step": 37106 }, { "epoch": 0.6571477905138865, "grad_norm": 0.5942918062210083, "learning_rate": 8.335271247724403e-06, "loss": 0.0623, "step": 37107 }, { "epoch": 0.6571655000509149, "grad_norm": 0.6415532231330872, "learning_rate": 8.334500483421947e-06, "loss": 0.0784, "step": 37108 }, { "epoch": 0.6571832095879433, "grad_norm": 0.6227948069572449, "learning_rate": 8.333729741048304e-06, "loss": 0.0438, "step": 37109 }, { "epoch": 0.6572009191249718, "grad_norm": 0.512722909450531, "learning_rate": 8.332959020606017e-06, "loss": 0.0695, "step": 37110 }, { "epoch": 0.6572186286620002, "grad_norm": 0.5331997871398926, "learning_rate": 8.332188322097617e-06, "loss": 0.0424, "step": 37111 }, { "epoch": 0.6572363381990286, "grad_norm": 0.6925119161605835, "learning_rate": 8.331417645525647e-06, "loss": 0.0692, "step": 37112 }, { "epoch": 0.6572540477360571, "grad_norm": 0.5237718820571899, "learning_rate": 8.330646990892637e-06, "loss": 0.0734, "step": 37113 }, { "epoch": 0.6572717572730855, "grad_norm": 0.46189960837364197, "learning_rate": 8.329876358201123e-06, "loss": 0.0495, "step": 37114 }, { "epoch": 0.6572894668101139, "grad_norm": 0.755336344242096, "learning_rate": 8.32910574745365e-06, "loss": 0.0544, "step": 37115 }, { "epoch": 0.6573071763471423, "grad_norm": 0.6605273485183716, "learning_rate": 8.328335158652734e-06, "loss": 0.065, "step": 37116 }, { "epoch": 0.6573248858841708, "grad_norm": 0.7448170185089111, "learning_rate": 8.327564591800924e-06, "loss": 0.0422, "step": 37117 }, { "epoch": 0.6573425954211992, "grad_norm": 0.583660900592804, "learning_rate": 8.326794046900755e-06, "loss": 0.0613, "step": 37118 }, { "epoch": 0.6573603049582276, "grad_norm": 0.6950885653495789, "learning_rate": 8.326023523954753e-06, "loss": 0.0633, "step": 37119 }, { "epoch": 0.657378014495256, "grad_norm": 0.6954503059387207, "learning_rate": 8.32525302296546e-06, "loss": 0.0804, "step": 37120 }, { "epoch": 0.6573957240322845, "grad_norm": 0.5646741986274719, "learning_rate": 8.32448254393541e-06, "loss": 0.0439, "step": 37121 }, { "epoch": 0.6574134335693129, "grad_norm": 0.2502826452255249, "learning_rate": 8.323712086867134e-06, "loss": 0.0557, "step": 37122 }, { "epoch": 0.6574311431063413, "grad_norm": 0.40752214193344116, "learning_rate": 8.322941651763174e-06, "loss": 0.0674, "step": 37123 }, { "epoch": 0.6574488526433697, "grad_norm": 0.5038895010948181, "learning_rate": 8.322171238626058e-06, "loss": 0.0578, "step": 37124 }, { "epoch": 0.6574665621803982, "grad_norm": 0.4714217483997345, "learning_rate": 8.32140084745833e-06, "loss": 0.0575, "step": 37125 }, { "epoch": 0.6574842717174266, "grad_norm": 0.6467689275741577, "learning_rate": 8.320630478262512e-06, "loss": 0.0636, "step": 37126 }, { "epoch": 0.657501981254455, "grad_norm": 0.4995664656162262, "learning_rate": 8.319860131041146e-06, "loss": 0.0498, "step": 37127 }, { "epoch": 0.6575196907914835, "grad_norm": 0.4579225182533264, "learning_rate": 8.31908980579676e-06, "loss": 0.0455, "step": 37128 }, { "epoch": 0.6575374003285119, "grad_norm": 0.7589738368988037, "learning_rate": 8.318319502531901e-06, "loss": 0.0849, "step": 37129 }, { "epoch": 0.6575551098655403, "grad_norm": 0.8529548645019531, "learning_rate": 8.31754922124909e-06, "loss": 0.0544, "step": 37130 }, { "epoch": 0.6575728194025687, "grad_norm": 0.8513278961181641, "learning_rate": 8.316778961950859e-06, "loss": 0.0777, "step": 37131 }, { "epoch": 0.6575905289395972, "grad_norm": 0.7793896198272705, "learning_rate": 8.316008724639756e-06, "loss": 0.0809, "step": 37132 }, { "epoch": 0.6576082384766256, "grad_norm": 0.9724438190460205, "learning_rate": 8.315238509318313e-06, "loss": 0.0971, "step": 37133 }, { "epoch": 0.657625948013654, "grad_norm": 1.354079246520996, "learning_rate": 8.314468315989055e-06, "loss": 0.0819, "step": 37134 }, { "epoch": 0.6576436575506824, "grad_norm": 0.5257784128189087, "learning_rate": 8.313698144654517e-06, "loss": 0.0591, "step": 37135 }, { "epoch": 0.657661367087711, "grad_norm": 0.39772045612335205, "learning_rate": 8.312927995317244e-06, "loss": 0.074, "step": 37136 }, { "epoch": 0.6576790766247393, "grad_norm": 0.48868390917778015, "learning_rate": 8.312157867979756e-06, "loss": 0.0782, "step": 37137 }, { "epoch": 0.6576967861617677, "grad_norm": 0.5663073062896729, "learning_rate": 8.31138776264459e-06, "loss": 0.0576, "step": 37138 }, { "epoch": 0.6577144956987961, "grad_norm": 0.5723318457603455, "learning_rate": 8.310617679314286e-06, "loss": 0.0548, "step": 37139 }, { "epoch": 0.6577322052358247, "grad_norm": 0.9861963987350464, "learning_rate": 8.309847617991369e-06, "loss": 0.0707, "step": 37140 }, { "epoch": 0.657749914772853, "grad_norm": 0.5723990797996521, "learning_rate": 8.30907757867838e-06, "loss": 0.0376, "step": 37141 }, { "epoch": 0.6577676243098814, "grad_norm": 0.5819118618965149, "learning_rate": 8.308307561377847e-06, "loss": 0.0664, "step": 37142 }, { "epoch": 0.65778533384691, "grad_norm": 0.6576516032218933, "learning_rate": 8.307537566092311e-06, "loss": 0.0681, "step": 37143 }, { "epoch": 0.6578030433839384, "grad_norm": 0.8524050712585449, "learning_rate": 8.306767592824293e-06, "loss": 0.0874, "step": 37144 }, { "epoch": 0.6578207529209668, "grad_norm": 0.48481571674346924, "learning_rate": 8.305997641576336e-06, "loss": 0.0811, "step": 37145 }, { "epoch": 0.6578384624579952, "grad_norm": 0.41531145572662354, "learning_rate": 8.305227712350967e-06, "loss": 0.051, "step": 37146 }, { "epoch": 0.6578561719950237, "grad_norm": 0.5168871879577637, "learning_rate": 8.304457805150726e-06, "loss": 0.0736, "step": 37147 }, { "epoch": 0.6578738815320521, "grad_norm": 0.653462290763855, "learning_rate": 8.30368791997814e-06, "loss": 0.0664, "step": 37148 }, { "epoch": 0.6578915910690805, "grad_norm": 0.6775189638137817, "learning_rate": 8.30291805683574e-06, "loss": 0.0473, "step": 37149 }, { "epoch": 0.6579093006061089, "grad_norm": 0.7702416181564331, "learning_rate": 8.302148215726064e-06, "loss": 0.0779, "step": 37150 }, { "epoch": 0.6579270101431374, "grad_norm": 1.0949735641479492, "learning_rate": 8.301378396651641e-06, "loss": 0.0781, "step": 37151 }, { "epoch": 0.6579447196801658, "grad_norm": 1.124011516571045, "learning_rate": 8.300608599615008e-06, "loss": 0.074, "step": 37152 }, { "epoch": 0.6579624292171942, "grad_norm": 0.5523554682731628, "learning_rate": 8.299838824618694e-06, "loss": 0.0607, "step": 37153 }, { "epoch": 0.6579801387542226, "grad_norm": 0.411967009305954, "learning_rate": 8.29906907166524e-06, "loss": 0.0658, "step": 37154 }, { "epoch": 0.6579978482912511, "grad_norm": 0.16181884706020355, "learning_rate": 8.298299340757162e-06, "loss": 0.0358, "step": 37155 }, { "epoch": 0.6580155578282795, "grad_norm": 0.3532540798187256, "learning_rate": 8.297529631897003e-06, "loss": 0.0466, "step": 37156 }, { "epoch": 0.6580332673653079, "grad_norm": 0.8209270238876343, "learning_rate": 8.2967599450873e-06, "loss": 0.0851, "step": 37157 }, { "epoch": 0.6580509769023364, "grad_norm": 0.8416101932525635, "learning_rate": 8.295990280330571e-06, "loss": 0.0779, "step": 37158 }, { "epoch": 0.6580686864393648, "grad_norm": 0.6656513810157776, "learning_rate": 8.295220637629358e-06, "loss": 0.0457, "step": 37159 }, { "epoch": 0.6580863959763932, "grad_norm": 0.47638943791389465, "learning_rate": 8.294451016986185e-06, "loss": 0.0486, "step": 37160 }, { "epoch": 0.6581041055134216, "grad_norm": 1.0363962650299072, "learning_rate": 8.2936814184036e-06, "loss": 0.0729, "step": 37161 }, { "epoch": 0.6581218150504501, "grad_norm": 0.3407646417617798, "learning_rate": 8.29291184188412e-06, "loss": 0.0545, "step": 37162 }, { "epoch": 0.6581395245874785, "grad_norm": 0.6147106289863586, "learning_rate": 8.292142287430281e-06, "loss": 0.0647, "step": 37163 }, { "epoch": 0.6581572341245069, "grad_norm": 0.4214615523815155, "learning_rate": 8.291372755044617e-06, "loss": 0.0519, "step": 37164 }, { "epoch": 0.6581749436615353, "grad_norm": 1.1155768632888794, "learning_rate": 8.290603244729661e-06, "loss": 0.0625, "step": 37165 }, { "epoch": 0.6581926531985638, "grad_norm": 0.7801781892776489, "learning_rate": 8.28983375648794e-06, "loss": 0.0803, "step": 37166 }, { "epoch": 0.6582103627355922, "grad_norm": 0.7494887113571167, "learning_rate": 8.289064290321984e-06, "loss": 0.0608, "step": 37167 }, { "epoch": 0.6582280722726206, "grad_norm": 0.7254348993301392, "learning_rate": 8.28829484623433e-06, "loss": 0.0581, "step": 37168 }, { "epoch": 0.658245781809649, "grad_norm": 0.7373294830322266, "learning_rate": 8.287525424227505e-06, "loss": 0.0744, "step": 37169 }, { "epoch": 0.6582634913466775, "grad_norm": 1.6344019174575806, "learning_rate": 8.286756024304045e-06, "loss": 0.0897, "step": 37170 }, { "epoch": 0.6582812008837059, "grad_norm": 0.5917587876319885, "learning_rate": 8.285986646466475e-06, "loss": 0.0679, "step": 37171 }, { "epoch": 0.6582989104207343, "grad_norm": 0.4922092854976654, "learning_rate": 8.285217290717338e-06, "loss": 0.0895, "step": 37172 }, { "epoch": 0.6583166199577628, "grad_norm": 0.8546803593635559, "learning_rate": 8.284447957059151e-06, "loss": 0.0731, "step": 37173 }, { "epoch": 0.6583343294947912, "grad_norm": 0.6257734298706055, "learning_rate": 8.28367864549445e-06, "loss": 0.0446, "step": 37174 }, { "epoch": 0.6583520390318196, "grad_norm": 0.5718947649002075, "learning_rate": 8.282909356025774e-06, "loss": 0.0631, "step": 37175 }, { "epoch": 0.658369748568848, "grad_norm": 0.7297802567481995, "learning_rate": 8.28214008865564e-06, "loss": 0.0524, "step": 37176 }, { "epoch": 0.6583874581058765, "grad_norm": 0.5129464268684387, "learning_rate": 8.281370843386587e-06, "loss": 0.0679, "step": 37177 }, { "epoch": 0.6584051676429049, "grad_norm": 0.4670979082584381, "learning_rate": 8.280601620221144e-06, "loss": 0.05, "step": 37178 }, { "epoch": 0.6584228771799333, "grad_norm": 0.5861715078353882, "learning_rate": 8.27983241916184e-06, "loss": 0.0708, "step": 37179 }, { "epoch": 0.6584405867169617, "grad_norm": 0.6253162026405334, "learning_rate": 8.27906324021121e-06, "loss": 0.0893, "step": 37180 }, { "epoch": 0.6584582962539902, "grad_norm": 0.43946942687034607, "learning_rate": 8.278294083371783e-06, "loss": 0.0518, "step": 37181 }, { "epoch": 0.6584760057910186, "grad_norm": 0.6547335386276245, "learning_rate": 8.27752494864609e-06, "loss": 0.0645, "step": 37182 }, { "epoch": 0.658493715328047, "grad_norm": 0.5333991050720215, "learning_rate": 8.276755836036659e-06, "loss": 0.0672, "step": 37183 }, { "epoch": 0.6585114248650754, "grad_norm": 0.6576099395751953, "learning_rate": 8.275986745546018e-06, "loss": 0.0751, "step": 37184 }, { "epoch": 0.6585291344021039, "grad_norm": 0.6340715289115906, "learning_rate": 8.275217677176702e-06, "loss": 0.0647, "step": 37185 }, { "epoch": 0.6585468439391323, "grad_norm": 0.5364057421684265, "learning_rate": 8.274448630931245e-06, "loss": 0.07, "step": 37186 }, { "epoch": 0.6585645534761607, "grad_norm": 0.7005215883255005, "learning_rate": 8.273679606812168e-06, "loss": 0.0969, "step": 37187 }, { "epoch": 0.6585822630131892, "grad_norm": 0.577422022819519, "learning_rate": 8.272910604822001e-06, "loss": 0.0623, "step": 37188 }, { "epoch": 0.6585999725502176, "grad_norm": 0.5209736824035645, "learning_rate": 8.27214162496328e-06, "loss": 0.0821, "step": 37189 }, { "epoch": 0.658617682087246, "grad_norm": 0.5380540490150452, "learning_rate": 8.271372667238534e-06, "loss": 0.0872, "step": 37190 }, { "epoch": 0.6586353916242744, "grad_norm": 0.7973313331604004, "learning_rate": 8.270603731650289e-06, "loss": 0.0868, "step": 37191 }, { "epoch": 0.6586531011613029, "grad_norm": 0.6891225576400757, "learning_rate": 8.269834818201077e-06, "loss": 0.0422, "step": 37192 }, { "epoch": 0.6586708106983313, "grad_norm": 0.5559056401252747, "learning_rate": 8.269065926893434e-06, "loss": 0.0578, "step": 37193 }, { "epoch": 0.6586885202353597, "grad_norm": 0.8115125894546509, "learning_rate": 8.268297057729878e-06, "loss": 0.0759, "step": 37194 }, { "epoch": 0.6587062297723881, "grad_norm": 0.7198358178138733, "learning_rate": 8.267528210712944e-06, "loss": 0.0707, "step": 37195 }, { "epoch": 0.6587239393094166, "grad_norm": 0.4179477393627167, "learning_rate": 8.26675938584516e-06, "loss": 0.0469, "step": 37196 }, { "epoch": 0.658741648846445, "grad_norm": 0.5676305890083313, "learning_rate": 8.265990583129058e-06, "loss": 0.0527, "step": 37197 }, { "epoch": 0.6587593583834734, "grad_norm": 0.5709688067436218, "learning_rate": 8.265221802567166e-06, "loss": 0.081, "step": 37198 }, { "epoch": 0.6587770679205018, "grad_norm": 0.7084672451019287, "learning_rate": 8.264453044162011e-06, "loss": 0.0453, "step": 37199 }, { "epoch": 0.6587947774575303, "grad_norm": 0.2821893095970154, "learning_rate": 8.263684307916132e-06, "loss": 0.0535, "step": 37200 }, { "epoch": 0.6588124869945587, "grad_norm": 0.8373292088508606, "learning_rate": 8.262915593832044e-06, "loss": 0.0633, "step": 37201 }, { "epoch": 0.6588301965315871, "grad_norm": 0.5861815810203552, "learning_rate": 8.26214690191228e-06, "loss": 0.0545, "step": 37202 }, { "epoch": 0.6588479060686157, "grad_norm": 0.5705448389053345, "learning_rate": 8.261378232159376e-06, "loss": 0.0552, "step": 37203 }, { "epoch": 0.658865615605644, "grad_norm": 0.5768411755561829, "learning_rate": 8.260609584575858e-06, "loss": 0.0643, "step": 37204 }, { "epoch": 0.6588833251426724, "grad_norm": 0.6072431802749634, "learning_rate": 8.259840959164249e-06, "loss": 0.0449, "step": 37205 }, { "epoch": 0.6589010346797008, "grad_norm": 0.5034085512161255, "learning_rate": 8.25907235592708e-06, "loss": 0.0602, "step": 37206 }, { "epoch": 0.6589187442167294, "grad_norm": 0.6294782161712646, "learning_rate": 8.25830377486688e-06, "loss": 0.0545, "step": 37207 }, { "epoch": 0.6589364537537578, "grad_norm": 0.3327774703502655, "learning_rate": 8.257535215986182e-06, "loss": 0.076, "step": 37208 }, { "epoch": 0.6589541632907862, "grad_norm": 0.4793039560317993, "learning_rate": 8.25676667928751e-06, "loss": 0.0634, "step": 37209 }, { "epoch": 0.6589718728278146, "grad_norm": 0.5094487071037292, "learning_rate": 8.255998164773394e-06, "loss": 0.0661, "step": 37210 }, { "epoch": 0.6589895823648431, "grad_norm": 0.4264668822288513, "learning_rate": 8.255229672446366e-06, "loss": 0.0695, "step": 37211 }, { "epoch": 0.6590072919018715, "grad_norm": 1.0953792333602905, "learning_rate": 8.254461202308948e-06, "loss": 0.062, "step": 37212 }, { "epoch": 0.6590250014388999, "grad_norm": 0.4137006402015686, "learning_rate": 8.253692754363665e-06, "loss": 0.0371, "step": 37213 }, { "epoch": 0.6590427109759283, "grad_norm": 0.4729623794555664, "learning_rate": 8.25292432861306e-06, "loss": 0.1115, "step": 37214 }, { "epoch": 0.6590604205129568, "grad_norm": 0.5274905562400818, "learning_rate": 8.252155925059644e-06, "loss": 0.057, "step": 37215 }, { "epoch": 0.6590781300499852, "grad_norm": 0.5189187526702881, "learning_rate": 8.251387543705954e-06, "loss": 0.0646, "step": 37216 }, { "epoch": 0.6590958395870136, "grad_norm": 0.8681392669677734, "learning_rate": 8.250619184554517e-06, "loss": 0.0791, "step": 37217 }, { "epoch": 0.6591135491240421, "grad_norm": 0.6128708124160767, "learning_rate": 8.249850847607858e-06, "loss": 0.0639, "step": 37218 }, { "epoch": 0.6591312586610705, "grad_norm": 0.5947173237800598, "learning_rate": 8.249082532868508e-06, "loss": 0.0408, "step": 37219 }, { "epoch": 0.6591489681980989, "grad_norm": 0.6796666979789734, "learning_rate": 8.248314240338994e-06, "loss": 0.0446, "step": 37220 }, { "epoch": 0.6591666777351273, "grad_norm": 0.8040024638175964, "learning_rate": 8.247545970021847e-06, "loss": 0.1005, "step": 37221 }, { "epoch": 0.6591843872721558, "grad_norm": 0.6625109910964966, "learning_rate": 8.246777721919588e-06, "loss": 0.0386, "step": 37222 }, { "epoch": 0.6592020968091842, "grad_norm": 0.5552712678909302, "learning_rate": 8.246009496034747e-06, "loss": 0.049, "step": 37223 }, { "epoch": 0.6592198063462126, "grad_norm": 0.7597004175186157, "learning_rate": 8.24524129236985e-06, "loss": 0.049, "step": 37224 }, { "epoch": 0.659237515883241, "grad_norm": 0.7317407131195068, "learning_rate": 8.244473110927431e-06, "loss": 0.073, "step": 37225 }, { "epoch": 0.6592552254202695, "grad_norm": 0.572017252445221, "learning_rate": 8.243704951710002e-06, "loss": 0.0497, "step": 37226 }, { "epoch": 0.6592729349572979, "grad_norm": 0.48399588465690613, "learning_rate": 8.242936814720106e-06, "loss": 0.0971, "step": 37227 }, { "epoch": 0.6592906444943263, "grad_norm": 0.5417855381965637, "learning_rate": 8.24216869996027e-06, "loss": 0.0872, "step": 37228 }, { "epoch": 0.6593083540313547, "grad_norm": 0.7721584439277649, "learning_rate": 8.24140060743301e-06, "loss": 0.0512, "step": 37229 }, { "epoch": 0.6593260635683832, "grad_norm": 0.32046282291412354, "learning_rate": 8.240632537140857e-06, "loss": 0.0389, "step": 37230 }, { "epoch": 0.6593437731054116, "grad_norm": 0.7483916282653809, "learning_rate": 8.239864489086341e-06, "loss": 0.0738, "step": 37231 }, { "epoch": 0.65936148264244, "grad_norm": 0.47780001163482666, "learning_rate": 8.239096463271993e-06, "loss": 0.0477, "step": 37232 }, { "epoch": 0.6593791921794685, "grad_norm": 0.5398778319358826, "learning_rate": 8.238328459700327e-06, "loss": 0.0456, "step": 37233 }, { "epoch": 0.6593969017164969, "grad_norm": 0.4190853536128998, "learning_rate": 8.237560478373877e-06, "loss": 0.0451, "step": 37234 }, { "epoch": 0.6594146112535253, "grad_norm": 0.6501146554946899, "learning_rate": 8.23679251929517e-06, "loss": 0.0569, "step": 37235 }, { "epoch": 0.6594323207905537, "grad_norm": 0.4385167956352234, "learning_rate": 8.236024582466732e-06, "loss": 0.0685, "step": 37236 }, { "epoch": 0.6594500303275822, "grad_norm": 0.3906421363353729, "learning_rate": 8.235256667891086e-06, "loss": 0.0434, "step": 37237 }, { "epoch": 0.6594677398646106, "grad_norm": 0.3862416446208954, "learning_rate": 8.234488775570764e-06, "loss": 0.0686, "step": 37238 }, { "epoch": 0.659485449401639, "grad_norm": 0.8329373598098755, "learning_rate": 8.233720905508294e-06, "loss": 0.0581, "step": 37239 }, { "epoch": 0.6595031589386674, "grad_norm": 1.1582192182540894, "learning_rate": 8.232953057706192e-06, "loss": 0.0934, "step": 37240 }, { "epoch": 0.6595208684756959, "grad_norm": 0.8316929936408997, "learning_rate": 8.23218523216699e-06, "loss": 0.0691, "step": 37241 }, { "epoch": 0.6595385780127243, "grad_norm": 0.6329676508903503, "learning_rate": 8.231417428893214e-06, "loss": 0.0526, "step": 37242 }, { "epoch": 0.6595562875497527, "grad_norm": 0.8305962085723877, "learning_rate": 8.230649647887398e-06, "loss": 0.103, "step": 37243 }, { "epoch": 0.6595739970867811, "grad_norm": 0.8509044051170349, "learning_rate": 8.22988188915205e-06, "loss": 0.0579, "step": 37244 }, { "epoch": 0.6595917066238096, "grad_norm": 0.5398217439651489, "learning_rate": 8.22911415268971e-06, "loss": 0.0686, "step": 37245 }, { "epoch": 0.659609416160838, "grad_norm": 0.9345515370368958, "learning_rate": 8.228346438502898e-06, "loss": 0.0848, "step": 37246 }, { "epoch": 0.6596271256978664, "grad_norm": 0.5184885859489441, "learning_rate": 8.227578746594142e-06, "loss": 0.0611, "step": 37247 }, { "epoch": 0.6596448352348949, "grad_norm": 0.47661110758781433, "learning_rate": 8.226811076965965e-06, "loss": 0.0561, "step": 37248 }, { "epoch": 0.6596625447719233, "grad_norm": 0.6954110264778137, "learning_rate": 8.226043429620896e-06, "loss": 0.0588, "step": 37249 }, { "epoch": 0.6596802543089517, "grad_norm": 0.6159736514091492, "learning_rate": 8.225275804561464e-06, "loss": 0.0721, "step": 37250 }, { "epoch": 0.6596979638459801, "grad_norm": 0.6902934908866882, "learning_rate": 8.224508201790185e-06, "loss": 0.0551, "step": 37251 }, { "epoch": 0.6597156733830086, "grad_norm": 0.9082244038581848, "learning_rate": 8.223740621309588e-06, "loss": 0.0778, "step": 37252 }, { "epoch": 0.659733382920037, "grad_norm": 0.5555557012557983, "learning_rate": 8.222973063122203e-06, "loss": 0.0314, "step": 37253 }, { "epoch": 0.6597510924570654, "grad_norm": 0.45928841829299927, "learning_rate": 8.222205527230548e-06, "loss": 0.0413, "step": 37254 }, { "epoch": 0.6597688019940938, "grad_norm": 0.5691033005714417, "learning_rate": 8.221438013637145e-06, "loss": 0.0537, "step": 37255 }, { "epoch": 0.6597865115311223, "grad_norm": 0.6977382898330688, "learning_rate": 8.22067052234453e-06, "loss": 0.0433, "step": 37256 }, { "epoch": 0.6598042210681507, "grad_norm": 0.653779149055481, "learning_rate": 8.21990305335523e-06, "loss": 0.069, "step": 37257 }, { "epoch": 0.6598219306051791, "grad_norm": 0.5453339219093323, "learning_rate": 8.219135606671756e-06, "loss": 0.0447, "step": 37258 }, { "epoch": 0.6598396401422075, "grad_norm": 0.7604339718818665, "learning_rate": 8.218368182296642e-06, "loss": 0.0667, "step": 37259 }, { "epoch": 0.659857349679236, "grad_norm": 0.6810703873634338, "learning_rate": 8.217600780232415e-06, "loss": 0.0628, "step": 37260 }, { "epoch": 0.6598750592162644, "grad_norm": 0.8072049617767334, "learning_rate": 8.21683340048159e-06, "loss": 0.0852, "step": 37261 }, { "epoch": 0.6598927687532928, "grad_norm": 1.2382099628448486, "learning_rate": 8.216066043046697e-06, "loss": 0.0703, "step": 37262 }, { "epoch": 0.6599104782903213, "grad_norm": 0.8056899309158325, "learning_rate": 8.215298707930261e-06, "loss": 0.0872, "step": 37263 }, { "epoch": 0.6599281878273497, "grad_norm": 0.7351177334785461, "learning_rate": 8.214531395134806e-06, "loss": 0.0463, "step": 37264 }, { "epoch": 0.6599458973643781, "grad_norm": 1.0884116888046265, "learning_rate": 8.213764104662854e-06, "loss": 0.0854, "step": 37265 }, { "epoch": 0.6599636069014065, "grad_norm": 0.9086326956748962, "learning_rate": 8.212996836516937e-06, "loss": 0.0544, "step": 37266 }, { "epoch": 0.659981316438435, "grad_norm": 1.1108478307724, "learning_rate": 8.21222959069957e-06, "loss": 0.1077, "step": 37267 }, { "epoch": 0.6599990259754634, "grad_norm": 0.7454657554626465, "learning_rate": 8.211462367213286e-06, "loss": 0.0642, "step": 37268 }, { "epoch": 0.6600167355124918, "grad_norm": 0.43839818239212036, "learning_rate": 8.210695166060601e-06, "loss": 0.0488, "step": 37269 }, { "epoch": 0.6600344450495202, "grad_norm": 0.5522829294204712, "learning_rate": 8.20992798724404e-06, "loss": 0.0476, "step": 37270 }, { "epoch": 0.6600521545865488, "grad_norm": 0.5199210047721863, "learning_rate": 8.209160830766137e-06, "loss": 0.0364, "step": 37271 }, { "epoch": 0.6600698641235772, "grad_norm": 0.604426920413971, "learning_rate": 8.208393696629403e-06, "loss": 0.039, "step": 37272 }, { "epoch": 0.6600875736606056, "grad_norm": 0.601439893245697, "learning_rate": 8.207626584836365e-06, "loss": 0.0642, "step": 37273 }, { "epoch": 0.660105283197634, "grad_norm": 0.7019835114479065, "learning_rate": 8.206859495389549e-06, "loss": 0.0698, "step": 37274 }, { "epoch": 0.6601229927346625, "grad_norm": 0.7943004369735718, "learning_rate": 8.206092428291477e-06, "loss": 0.0691, "step": 37275 }, { "epoch": 0.6601407022716909, "grad_norm": 0.6525295972824097, "learning_rate": 8.205325383544675e-06, "loss": 0.0741, "step": 37276 }, { "epoch": 0.6601584118087193, "grad_norm": 0.6451632976531982, "learning_rate": 8.204558361151663e-06, "loss": 0.0681, "step": 37277 }, { "epoch": 0.6601761213457478, "grad_norm": 0.5457424521446228, "learning_rate": 8.203791361114977e-06, "loss": 0.0652, "step": 37278 }, { "epoch": 0.6601938308827762, "grad_norm": 0.4862106740474701, "learning_rate": 8.203024383437122e-06, "loss": 0.0603, "step": 37279 }, { "epoch": 0.6602115404198046, "grad_norm": 0.6226845979690552, "learning_rate": 8.202257428120628e-06, "loss": 0.0558, "step": 37280 }, { "epoch": 0.660229249956833, "grad_norm": 0.3949143588542938, "learning_rate": 8.201490495168021e-06, "loss": 0.0562, "step": 37281 }, { "epoch": 0.6602469594938615, "grad_norm": 0.8220869302749634, "learning_rate": 8.200723584581829e-06, "loss": 0.0524, "step": 37282 }, { "epoch": 0.6602646690308899, "grad_norm": 0.6649923920631409, "learning_rate": 8.19995669636456e-06, "loss": 0.0722, "step": 37283 }, { "epoch": 0.6602823785679183, "grad_norm": 0.5991930961608887, "learning_rate": 8.199189830518742e-06, "loss": 0.083, "step": 37284 }, { "epoch": 0.6603000881049467, "grad_norm": 0.721238911151886, "learning_rate": 8.198422987046912e-06, "loss": 0.0757, "step": 37285 }, { "epoch": 0.6603177976419752, "grad_norm": 0.8749009370803833, "learning_rate": 8.197656165951578e-06, "loss": 0.0576, "step": 37286 }, { "epoch": 0.6603355071790036, "grad_norm": 1.236878514289856, "learning_rate": 8.196889367235267e-06, "loss": 0.1, "step": 37287 }, { "epoch": 0.660353216716032, "grad_norm": 0.5083320140838623, "learning_rate": 8.1961225909005e-06, "loss": 0.0501, "step": 37288 }, { "epoch": 0.6603709262530604, "grad_norm": 0.7712529897689819, "learning_rate": 8.195355836949811e-06, "loss": 0.0999, "step": 37289 }, { "epoch": 0.6603886357900889, "grad_norm": 0.7350336909294128, "learning_rate": 8.194589105385706e-06, "loss": 0.0543, "step": 37290 }, { "epoch": 0.6604063453271173, "grad_norm": 0.22301267087459564, "learning_rate": 8.193822396210713e-06, "loss": 0.0355, "step": 37291 }, { "epoch": 0.6604240548641457, "grad_norm": 0.579524576663971, "learning_rate": 8.193055709427357e-06, "loss": 0.066, "step": 37292 }, { "epoch": 0.6604417644011742, "grad_norm": 0.2684693932533264, "learning_rate": 8.192289045038157e-06, "loss": 0.0602, "step": 37293 }, { "epoch": 0.6604594739382026, "grad_norm": 0.5521621108055115, "learning_rate": 8.19152240304564e-06, "loss": 0.0691, "step": 37294 }, { "epoch": 0.660477183475231, "grad_norm": 0.47288647294044495, "learning_rate": 8.190755783452326e-06, "loss": 0.0351, "step": 37295 }, { "epoch": 0.6604948930122594, "grad_norm": 0.7951592206954956, "learning_rate": 8.189989186260742e-06, "loss": 0.054, "step": 37296 }, { "epoch": 0.6605126025492879, "grad_norm": 0.6863124370574951, "learning_rate": 8.1892226114734e-06, "loss": 0.0584, "step": 37297 }, { "epoch": 0.6605303120863163, "grad_norm": 0.4642549753189087, "learning_rate": 8.188456059092823e-06, "loss": 0.0812, "step": 37298 }, { "epoch": 0.6605480216233447, "grad_norm": 0.31739315390586853, "learning_rate": 8.187689529121544e-06, "loss": 0.0569, "step": 37299 }, { "epoch": 0.6605657311603731, "grad_norm": 1.1837066411972046, "learning_rate": 8.186923021562073e-06, "loss": 0.0796, "step": 37300 }, { "epoch": 0.6605834406974016, "grad_norm": 0.768833577632904, "learning_rate": 8.186156536416937e-06, "loss": 0.0537, "step": 37301 }, { "epoch": 0.66060115023443, "grad_norm": 0.5685481429100037, "learning_rate": 8.185390073688655e-06, "loss": 0.073, "step": 37302 }, { "epoch": 0.6606188597714584, "grad_norm": 0.3991290032863617, "learning_rate": 8.18462363337975e-06, "loss": 0.0656, "step": 37303 }, { "epoch": 0.6606365693084868, "grad_norm": 0.417890727519989, "learning_rate": 8.183857215492745e-06, "loss": 0.0436, "step": 37304 }, { "epoch": 0.6606542788455153, "grad_norm": 0.920428991317749, "learning_rate": 8.18309082003016e-06, "loss": 0.0907, "step": 37305 }, { "epoch": 0.6606719883825437, "grad_norm": 0.6208471059799194, "learning_rate": 8.182324446994516e-06, "loss": 0.0615, "step": 37306 }, { "epoch": 0.6606896979195721, "grad_norm": 0.6777297854423523, "learning_rate": 8.181558096388341e-06, "loss": 0.107, "step": 37307 }, { "epoch": 0.6607074074566006, "grad_norm": 0.4977240264415741, "learning_rate": 8.180791768214146e-06, "loss": 0.0467, "step": 37308 }, { "epoch": 0.660725116993629, "grad_norm": 0.442047119140625, "learning_rate": 8.180025462474454e-06, "loss": 0.0683, "step": 37309 }, { "epoch": 0.6607428265306574, "grad_norm": 0.44882717728614807, "learning_rate": 8.179259179171796e-06, "loss": 0.0989, "step": 37310 }, { "epoch": 0.6607605360676858, "grad_norm": 0.603637158870697, "learning_rate": 8.178492918308679e-06, "loss": 0.0433, "step": 37311 }, { "epoch": 0.6607782456047143, "grad_norm": 0.4317101538181305, "learning_rate": 8.177726679887632e-06, "loss": 0.0652, "step": 37312 }, { "epoch": 0.6607959551417427, "grad_norm": 0.4808323085308075, "learning_rate": 8.176960463911167e-06, "loss": 0.0444, "step": 37313 }, { "epoch": 0.6608136646787711, "grad_norm": 0.935982882976532, "learning_rate": 8.176194270381824e-06, "loss": 0.0916, "step": 37314 }, { "epoch": 0.6608313742157995, "grad_norm": 0.5956856608390808, "learning_rate": 8.175428099302104e-06, "loss": 0.0408, "step": 37315 }, { "epoch": 0.660849083752828, "grad_norm": 0.480710506439209, "learning_rate": 8.174661950674536e-06, "loss": 0.0621, "step": 37316 }, { "epoch": 0.6608667932898564, "grad_norm": 0.34898480772972107, "learning_rate": 8.173895824501647e-06, "loss": 0.096, "step": 37317 }, { "epoch": 0.6608845028268848, "grad_norm": 0.2182767242193222, "learning_rate": 8.173129720785942e-06, "loss": 0.0754, "step": 37318 }, { "epoch": 0.6609022123639132, "grad_norm": 1.038571834564209, "learning_rate": 8.172363639529954e-06, "loss": 0.0934, "step": 37319 }, { "epoch": 0.6609199219009417, "grad_norm": 0.5195435285568237, "learning_rate": 8.171597580736195e-06, "loss": 0.0791, "step": 37320 }, { "epoch": 0.6609376314379701, "grad_norm": 0.9927016496658325, "learning_rate": 8.17083154440719e-06, "loss": 0.094, "step": 37321 }, { "epoch": 0.6609553409749985, "grad_norm": 0.1584944874048233, "learning_rate": 8.17006553054546e-06, "loss": 0.0468, "step": 37322 }, { "epoch": 0.660973050512027, "grad_norm": 0.5664525628089905, "learning_rate": 8.169299539153524e-06, "loss": 0.0629, "step": 37323 }, { "epoch": 0.6609907600490554, "grad_norm": 0.5185896754264832, "learning_rate": 8.168533570233907e-06, "loss": 0.0553, "step": 37324 }, { "epoch": 0.6610084695860838, "grad_norm": 0.6299896836280823, "learning_rate": 8.167767623789116e-06, "loss": 0.0604, "step": 37325 }, { "epoch": 0.6610261791231122, "grad_norm": 0.9187114834785461, "learning_rate": 8.16700169982168e-06, "loss": 0.0638, "step": 37326 }, { "epoch": 0.6610438886601407, "grad_norm": 0.9408800005912781, "learning_rate": 8.166235798334117e-06, "loss": 0.0679, "step": 37327 }, { "epoch": 0.6610615981971691, "grad_norm": 0.7151156663894653, "learning_rate": 8.165469919328953e-06, "loss": 0.0709, "step": 37328 }, { "epoch": 0.6610793077341975, "grad_norm": 0.7046350240707397, "learning_rate": 8.164704062808695e-06, "loss": 0.0706, "step": 37329 }, { "epoch": 0.6610970172712259, "grad_norm": 1.0327577590942383, "learning_rate": 8.16393822877587e-06, "loss": 0.0547, "step": 37330 }, { "epoch": 0.6611147268082544, "grad_norm": 0.6570818424224854, "learning_rate": 8.163172417232997e-06, "loss": 0.064, "step": 37331 }, { "epoch": 0.6611324363452828, "grad_norm": 0.5527414679527283, "learning_rate": 8.162406628182595e-06, "loss": 0.06, "step": 37332 }, { "epoch": 0.6611501458823112, "grad_norm": 0.5451435446739197, "learning_rate": 8.161640861627184e-06, "loss": 0.0722, "step": 37333 }, { "epoch": 0.6611678554193396, "grad_norm": 0.45587849617004395, "learning_rate": 8.160875117569281e-06, "loss": 0.0773, "step": 37334 }, { "epoch": 0.6611855649563682, "grad_norm": 0.447091668844223, "learning_rate": 8.160109396011416e-06, "loss": 0.0543, "step": 37335 }, { "epoch": 0.6612032744933966, "grad_norm": 0.7434076070785522, "learning_rate": 8.159343696956091e-06, "loss": 0.0597, "step": 37336 }, { "epoch": 0.661220984030425, "grad_norm": 0.47806891798973083, "learning_rate": 8.158578020405833e-06, "loss": 0.0565, "step": 37337 }, { "epoch": 0.6612386935674535, "grad_norm": 0.5583308935165405, "learning_rate": 8.157812366363164e-06, "loss": 0.0578, "step": 37338 }, { "epoch": 0.6612564031044819, "grad_norm": 0.45614975690841675, "learning_rate": 8.157046734830605e-06, "loss": 0.0561, "step": 37339 }, { "epoch": 0.6612741126415103, "grad_norm": 0.7544029951095581, "learning_rate": 8.156281125810664e-06, "loss": 0.0936, "step": 37340 }, { "epoch": 0.6612918221785387, "grad_norm": 0.875978410243988, "learning_rate": 8.155515539305865e-06, "loss": 0.0521, "step": 37341 }, { "epoch": 0.6613095317155672, "grad_norm": 0.527248740196228, "learning_rate": 8.154749975318729e-06, "loss": 0.0525, "step": 37342 }, { "epoch": 0.6613272412525956, "grad_norm": 0.5917528867721558, "learning_rate": 8.153984433851773e-06, "loss": 0.0626, "step": 37343 }, { "epoch": 0.661344950789624, "grad_norm": 0.7859395742416382, "learning_rate": 8.153218914907514e-06, "loss": 0.0672, "step": 37344 }, { "epoch": 0.6613626603266524, "grad_norm": 0.9595860242843628, "learning_rate": 8.152453418488474e-06, "loss": 0.0953, "step": 37345 }, { "epoch": 0.6613803698636809, "grad_norm": 0.8108491897583008, "learning_rate": 8.151687944597177e-06, "loss": 0.0784, "step": 37346 }, { "epoch": 0.6613980794007093, "grad_norm": 0.44340524077415466, "learning_rate": 8.150922493236125e-06, "loss": 0.0549, "step": 37347 }, { "epoch": 0.6614157889377377, "grad_norm": 0.6826184391975403, "learning_rate": 8.150157064407846e-06, "loss": 0.064, "step": 37348 }, { "epoch": 0.6614334984747661, "grad_norm": 0.6003007888793945, "learning_rate": 8.149391658114865e-06, "loss": 0.0447, "step": 37349 }, { "epoch": 0.6614512080117946, "grad_norm": 0.40784648060798645, "learning_rate": 8.14862627435968e-06, "loss": 0.0539, "step": 37350 }, { "epoch": 0.661468917548823, "grad_norm": 1.0750296115875244, "learning_rate": 8.147860913144825e-06, "loss": 0.1067, "step": 37351 }, { "epoch": 0.6614866270858514, "grad_norm": 0.5360099673271179, "learning_rate": 8.147095574472819e-06, "loss": 0.0678, "step": 37352 }, { "epoch": 0.6615043366228799, "grad_norm": 0.9942256212234497, "learning_rate": 8.14633025834618e-06, "loss": 0.0962, "step": 37353 }, { "epoch": 0.6615220461599083, "grad_norm": 0.5382852554321289, "learning_rate": 8.145564964767413e-06, "loss": 0.0746, "step": 37354 }, { "epoch": 0.6615397556969367, "grad_norm": 0.32884183526039124, "learning_rate": 8.144799693739045e-06, "loss": 0.0818, "step": 37355 }, { "epoch": 0.6615574652339651, "grad_norm": 0.6749533414840698, "learning_rate": 8.1440344452636e-06, "loss": 0.056, "step": 37356 }, { "epoch": 0.6615751747709936, "grad_norm": 0.3964531421661377, "learning_rate": 8.14326921934358e-06, "loss": 0.0584, "step": 37357 }, { "epoch": 0.661592884308022, "grad_norm": 0.599603533744812, "learning_rate": 8.142504015981514e-06, "loss": 0.0719, "step": 37358 }, { "epoch": 0.6616105938450504, "grad_norm": 0.7897229194641113, "learning_rate": 8.141738835179913e-06, "loss": 0.0663, "step": 37359 }, { "epoch": 0.6616283033820788, "grad_norm": 0.6472386717796326, "learning_rate": 8.140973676941298e-06, "loss": 0.0577, "step": 37360 }, { "epoch": 0.6616460129191073, "grad_norm": 0.7286458611488342, "learning_rate": 8.140208541268188e-06, "loss": 0.0488, "step": 37361 }, { "epoch": 0.6616637224561357, "grad_norm": 0.34001249074935913, "learning_rate": 8.139443428163096e-06, "loss": 0.0451, "step": 37362 }, { "epoch": 0.6616814319931641, "grad_norm": 0.4137825071811676, "learning_rate": 8.138678337628548e-06, "loss": 0.0387, "step": 37363 }, { "epoch": 0.6616991415301925, "grad_norm": 0.7593156695365906, "learning_rate": 8.137913269667048e-06, "loss": 0.0749, "step": 37364 }, { "epoch": 0.661716851067221, "grad_norm": 0.8991101384162903, "learning_rate": 8.13714822428112e-06, "loss": 0.0904, "step": 37365 }, { "epoch": 0.6617345606042494, "grad_norm": 0.4953085780143738, "learning_rate": 8.136383201473279e-06, "loss": 0.0559, "step": 37366 }, { "epoch": 0.6617522701412778, "grad_norm": 0.5422008037567139, "learning_rate": 8.135618201246051e-06, "loss": 0.0787, "step": 37367 }, { "epoch": 0.6617699796783063, "grad_norm": 0.5519644618034363, "learning_rate": 8.134853223601936e-06, "loss": 0.0489, "step": 37368 }, { "epoch": 0.6617876892153347, "grad_norm": 0.5632491111755371, "learning_rate": 8.134088268543462e-06, "loss": 0.0523, "step": 37369 }, { "epoch": 0.6618053987523631, "grad_norm": 0.6415479183197021, "learning_rate": 8.133323336073143e-06, "loss": 0.0878, "step": 37370 }, { "epoch": 0.6618231082893915, "grad_norm": 0.9009317755699158, "learning_rate": 8.132558426193493e-06, "loss": 0.0633, "step": 37371 }, { "epoch": 0.66184081782642, "grad_norm": 0.32457205653190613, "learning_rate": 8.131793538907033e-06, "loss": 0.0493, "step": 37372 }, { "epoch": 0.6618585273634484, "grad_norm": 0.9997486472129822, "learning_rate": 8.131028674216278e-06, "loss": 0.1149, "step": 37373 }, { "epoch": 0.6618762369004768, "grad_norm": 0.5246827602386475, "learning_rate": 8.13026383212375e-06, "loss": 0.0438, "step": 37374 }, { "epoch": 0.6618939464375052, "grad_norm": 0.6470149755477905, "learning_rate": 8.129499012631953e-06, "loss": 0.0561, "step": 37375 }, { "epoch": 0.6619116559745337, "grad_norm": 0.5541906356811523, "learning_rate": 8.128734215743409e-06, "loss": 0.0765, "step": 37376 }, { "epoch": 0.6619293655115621, "grad_norm": 0.7328190803527832, "learning_rate": 8.127969441460636e-06, "loss": 0.0646, "step": 37377 }, { "epoch": 0.6619470750485905, "grad_norm": 0.6917837262153625, "learning_rate": 8.127204689786151e-06, "loss": 0.0749, "step": 37378 }, { "epoch": 0.6619647845856189, "grad_norm": 0.7045770883560181, "learning_rate": 8.126439960722459e-06, "loss": 0.0522, "step": 37379 }, { "epoch": 0.6619824941226474, "grad_norm": 0.792367696762085, "learning_rate": 8.125675254272088e-06, "loss": 0.0786, "step": 37380 }, { "epoch": 0.6620002036596758, "grad_norm": 0.4561987817287445, "learning_rate": 8.124910570437555e-06, "loss": 0.0715, "step": 37381 }, { "epoch": 0.6620179131967042, "grad_norm": 0.5442560315132141, "learning_rate": 8.124145909221368e-06, "loss": 0.0545, "step": 37382 }, { "epoch": 0.6620356227337327, "grad_norm": 0.36491554975509644, "learning_rate": 8.123381270626046e-06, "loss": 0.0354, "step": 37383 }, { "epoch": 0.6620533322707611, "grad_norm": 0.8429502844810486, "learning_rate": 8.122616654654102e-06, "loss": 0.098, "step": 37384 }, { "epoch": 0.6620710418077895, "grad_norm": 0.5193727016448975, "learning_rate": 8.121852061308058e-06, "loss": 0.0507, "step": 37385 }, { "epoch": 0.6620887513448179, "grad_norm": 0.49444207549095154, "learning_rate": 8.121087490590421e-06, "loss": 0.0531, "step": 37386 }, { "epoch": 0.6621064608818464, "grad_norm": 0.6991643309593201, "learning_rate": 8.12032294250371e-06, "loss": 0.0734, "step": 37387 }, { "epoch": 0.6621241704188748, "grad_norm": 0.6427934765815735, "learning_rate": 8.119558417050442e-06, "loss": 0.0427, "step": 37388 }, { "epoch": 0.6621418799559032, "grad_norm": 0.6581586599349976, "learning_rate": 8.11879391423313e-06, "loss": 0.0794, "step": 37389 }, { "epoch": 0.6621595894929316, "grad_norm": 0.5691854953765869, "learning_rate": 8.118029434054289e-06, "loss": 0.0663, "step": 37390 }, { "epoch": 0.6621772990299601, "grad_norm": 0.6182124614715576, "learning_rate": 8.117264976516437e-06, "loss": 0.0549, "step": 37391 }, { "epoch": 0.6621950085669885, "grad_norm": 0.4707695543766022, "learning_rate": 8.116500541622093e-06, "loss": 0.0701, "step": 37392 }, { "epoch": 0.6622127181040169, "grad_norm": 0.7767183184623718, "learning_rate": 8.11573612937376e-06, "loss": 0.0584, "step": 37393 }, { "epoch": 0.6622304276410454, "grad_norm": 0.6703725457191467, "learning_rate": 8.114971739773956e-06, "loss": 0.0826, "step": 37394 }, { "epoch": 0.6622481371780738, "grad_norm": 0.90357506275177, "learning_rate": 8.114207372825208e-06, "loss": 0.0575, "step": 37395 }, { "epoch": 0.6622658467151022, "grad_norm": 0.9026831388473511, "learning_rate": 8.113443028530013e-06, "loss": 0.0733, "step": 37396 }, { "epoch": 0.6622835562521306, "grad_norm": 1.1585701704025269, "learning_rate": 8.112678706890895e-06, "loss": 0.0694, "step": 37397 }, { "epoch": 0.6623012657891592, "grad_norm": 0.4535929560661316, "learning_rate": 8.111914407910367e-06, "loss": 0.0732, "step": 37398 }, { "epoch": 0.6623189753261876, "grad_norm": 0.7802799940109253, "learning_rate": 8.111150131590945e-06, "loss": 0.0468, "step": 37399 }, { "epoch": 0.662336684863216, "grad_norm": 0.8649600744247437, "learning_rate": 8.110385877935142e-06, "loss": 0.0976, "step": 37400 }, { "epoch": 0.6623543944002444, "grad_norm": 0.515121579170227, "learning_rate": 8.10962164694547e-06, "loss": 0.0531, "step": 37401 }, { "epoch": 0.6623721039372729, "grad_norm": 0.7040245532989502, "learning_rate": 8.108857438624455e-06, "loss": 0.0726, "step": 37402 }, { "epoch": 0.6623898134743013, "grad_norm": 0.636472225189209, "learning_rate": 8.108093252974596e-06, "loss": 0.0644, "step": 37403 }, { "epoch": 0.6624075230113297, "grad_norm": 0.7592131495475769, "learning_rate": 8.10732908999841e-06, "loss": 0.0612, "step": 37404 }, { "epoch": 0.6624252325483581, "grad_norm": 0.5771148204803467, "learning_rate": 8.106564949698417e-06, "loss": 0.0661, "step": 37405 }, { "epoch": 0.6624429420853866, "grad_norm": 0.41428741812705994, "learning_rate": 8.105800832077133e-06, "loss": 0.0353, "step": 37406 }, { "epoch": 0.662460651622415, "grad_norm": 0.5972780585289001, "learning_rate": 8.105036737137061e-06, "loss": 0.0583, "step": 37407 }, { "epoch": 0.6624783611594434, "grad_norm": 0.30844002962112427, "learning_rate": 8.10427266488072e-06, "loss": 0.0338, "step": 37408 }, { "epoch": 0.6624960706964719, "grad_norm": 0.7216702103614807, "learning_rate": 8.10350861531062e-06, "loss": 0.0476, "step": 37409 }, { "epoch": 0.6625137802335003, "grad_norm": 0.2685036063194275, "learning_rate": 8.10274458842929e-06, "loss": 0.0393, "step": 37410 }, { "epoch": 0.6625314897705287, "grad_norm": 0.5585606694221497, "learning_rate": 8.101980584239229e-06, "loss": 0.0515, "step": 37411 }, { "epoch": 0.6625491993075571, "grad_norm": 0.2975572347640991, "learning_rate": 8.101216602742952e-06, "loss": 0.0461, "step": 37412 }, { "epoch": 0.6625669088445856, "grad_norm": 0.8950885534286499, "learning_rate": 8.100452643942981e-06, "loss": 0.0895, "step": 37413 }, { "epoch": 0.662584618381614, "grad_norm": 0.7680991291999817, "learning_rate": 8.099688707841817e-06, "loss": 0.0981, "step": 37414 }, { "epoch": 0.6626023279186424, "grad_norm": 0.5727869868278503, "learning_rate": 8.09892479444198e-06, "loss": 0.0598, "step": 37415 }, { "epoch": 0.6626200374556708, "grad_norm": 0.4920324385166168, "learning_rate": 8.098160903745981e-06, "loss": 0.042, "step": 37416 }, { "epoch": 0.6626377469926993, "grad_norm": 0.5384964942932129, "learning_rate": 8.097397035756335e-06, "loss": 0.0633, "step": 37417 }, { "epoch": 0.6626554565297277, "grad_norm": 0.6407486200332642, "learning_rate": 8.096633190475555e-06, "loss": 0.0604, "step": 37418 }, { "epoch": 0.6626731660667561, "grad_norm": 0.5861725807189941, "learning_rate": 8.095869367906152e-06, "loss": 0.0746, "step": 37419 }, { "epoch": 0.6626908756037845, "grad_norm": 0.610815167427063, "learning_rate": 8.095105568050649e-06, "loss": 0.0652, "step": 37420 }, { "epoch": 0.662708585140813, "grad_norm": 0.3564700186252594, "learning_rate": 8.094341790911543e-06, "loss": 0.0283, "step": 37421 }, { "epoch": 0.6627262946778414, "grad_norm": 0.8507567644119263, "learning_rate": 8.093578036491353e-06, "loss": 0.0626, "step": 37422 }, { "epoch": 0.6627440042148698, "grad_norm": 0.44941818714141846, "learning_rate": 8.092814304792593e-06, "loss": 0.0354, "step": 37423 }, { "epoch": 0.6627617137518983, "grad_norm": 0.5679214596748352, "learning_rate": 8.092050595817785e-06, "loss": 0.0658, "step": 37424 }, { "epoch": 0.6627794232889267, "grad_norm": 0.5618365406990051, "learning_rate": 8.091286909569422e-06, "loss": 0.073, "step": 37425 }, { "epoch": 0.6627971328259551, "grad_norm": 0.6296157836914062, "learning_rate": 8.090523246050026e-06, "loss": 0.0833, "step": 37426 }, { "epoch": 0.6628148423629835, "grad_norm": 0.6938693523406982, "learning_rate": 8.089759605262112e-06, "loss": 0.0782, "step": 37427 }, { "epoch": 0.662832551900012, "grad_norm": 0.713575541973114, "learning_rate": 8.08899598720819e-06, "loss": 0.0966, "step": 37428 }, { "epoch": 0.6628502614370404, "grad_norm": 0.5420036911964417, "learning_rate": 8.08823239189077e-06, "loss": 0.0528, "step": 37429 }, { "epoch": 0.6628679709740688, "grad_norm": 0.5386006236076355, "learning_rate": 8.087468819312369e-06, "loss": 0.0363, "step": 37430 }, { "epoch": 0.6628856805110972, "grad_norm": 0.5516505837440491, "learning_rate": 8.0867052694755e-06, "loss": 0.0461, "step": 37431 }, { "epoch": 0.6629033900481257, "grad_norm": 0.6709533929824829, "learning_rate": 8.085941742382667e-06, "loss": 0.0539, "step": 37432 }, { "epoch": 0.6629210995851541, "grad_norm": 0.4093933701515198, "learning_rate": 8.085178238036385e-06, "loss": 0.0602, "step": 37433 }, { "epoch": 0.6629388091221825, "grad_norm": 0.6263318061828613, "learning_rate": 8.084414756439174e-06, "loss": 0.0454, "step": 37434 }, { "epoch": 0.6629565186592109, "grad_norm": 0.5216847062110901, "learning_rate": 8.083651297593533e-06, "loss": 0.0529, "step": 37435 }, { "epoch": 0.6629742281962394, "grad_norm": 1.0799351930618286, "learning_rate": 8.08288786150198e-06, "loss": 0.0511, "step": 37436 }, { "epoch": 0.6629919377332678, "grad_norm": 0.41149646043777466, "learning_rate": 8.082124448167025e-06, "loss": 0.0806, "step": 37437 }, { "epoch": 0.6630096472702962, "grad_norm": 0.49179449677467346, "learning_rate": 8.081361057591183e-06, "loss": 0.0611, "step": 37438 }, { "epoch": 0.6630273568073247, "grad_norm": 0.2659825384616852, "learning_rate": 8.080597689776961e-06, "loss": 0.0271, "step": 37439 }, { "epoch": 0.6630450663443531, "grad_norm": 0.42323577404022217, "learning_rate": 8.079834344726875e-06, "loss": 0.0399, "step": 37440 }, { "epoch": 0.6630627758813815, "grad_norm": 0.6948755383491516, "learning_rate": 8.079071022443433e-06, "loss": 0.0557, "step": 37441 }, { "epoch": 0.6630804854184099, "grad_norm": 0.5353073477745056, "learning_rate": 8.078307722929154e-06, "loss": 0.0487, "step": 37442 }, { "epoch": 0.6630981949554384, "grad_norm": 0.46320751309394836, "learning_rate": 8.077544446186535e-06, "loss": 0.0661, "step": 37443 }, { "epoch": 0.6631159044924668, "grad_norm": 0.48322224617004395, "learning_rate": 8.076781192218095e-06, "loss": 0.0395, "step": 37444 }, { "epoch": 0.6631336140294952, "grad_norm": 0.45376765727996826, "learning_rate": 8.07601796102635e-06, "loss": 0.0559, "step": 37445 }, { "epoch": 0.6631513235665236, "grad_norm": 0.7594056725502014, "learning_rate": 8.075254752613795e-06, "loss": 0.0784, "step": 37446 }, { "epoch": 0.6631690331035521, "grad_norm": 0.45064935088157654, "learning_rate": 8.074491566982958e-06, "loss": 0.0656, "step": 37447 }, { "epoch": 0.6631867426405805, "grad_norm": 0.47645628452301025, "learning_rate": 8.073728404136341e-06, "loss": 0.0647, "step": 37448 }, { "epoch": 0.6632044521776089, "grad_norm": 0.5725292563438416, "learning_rate": 8.072965264076467e-06, "loss": 0.0703, "step": 37449 }, { "epoch": 0.6632221617146373, "grad_norm": 0.5618005394935608, "learning_rate": 8.072202146805825e-06, "loss": 0.0541, "step": 37450 }, { "epoch": 0.6632398712516658, "grad_norm": 0.6305601000785828, "learning_rate": 8.071439052326941e-06, "loss": 0.061, "step": 37451 }, { "epoch": 0.6632575807886942, "grad_norm": 0.7337250113487244, "learning_rate": 8.070675980642327e-06, "loss": 0.0486, "step": 37452 }, { "epoch": 0.6632752903257226, "grad_norm": 0.594163715839386, "learning_rate": 8.069912931754482e-06, "loss": 0.0478, "step": 37453 }, { "epoch": 0.6632929998627511, "grad_norm": 0.5032942891120911, "learning_rate": 8.069149905665923e-06, "loss": 0.0781, "step": 37454 }, { "epoch": 0.6633107093997795, "grad_norm": 0.6129691004753113, "learning_rate": 8.06838690237916e-06, "loss": 0.0513, "step": 37455 }, { "epoch": 0.6633284189368079, "grad_norm": 0.7578966021537781, "learning_rate": 8.067623921896702e-06, "loss": 0.055, "step": 37456 }, { "epoch": 0.6633461284738363, "grad_norm": 0.8540855050086975, "learning_rate": 8.066860964221061e-06, "loss": 0.0556, "step": 37457 }, { "epoch": 0.6633638380108648, "grad_norm": 0.7529767155647278, "learning_rate": 8.066098029354745e-06, "loss": 0.0745, "step": 37458 }, { "epoch": 0.6633815475478932, "grad_norm": 0.3353717029094696, "learning_rate": 8.065335117300271e-06, "loss": 0.0403, "step": 37459 }, { "epoch": 0.6633992570849216, "grad_norm": 0.3165600597858429, "learning_rate": 8.064572228060139e-06, "loss": 0.0457, "step": 37460 }, { "epoch": 0.66341696662195, "grad_norm": 0.6958266496658325, "learning_rate": 8.063809361636863e-06, "loss": 0.0775, "step": 37461 }, { "epoch": 0.6634346761589786, "grad_norm": 0.5726039409637451, "learning_rate": 8.06304651803295e-06, "loss": 0.0543, "step": 37462 }, { "epoch": 0.663452385696007, "grad_norm": 0.6800076961517334, "learning_rate": 8.06228369725092e-06, "loss": 0.0616, "step": 37463 }, { "epoch": 0.6634700952330354, "grad_norm": 0.5947690010070801, "learning_rate": 8.06152089929327e-06, "loss": 0.0533, "step": 37464 }, { "epoch": 0.6634878047700637, "grad_norm": 0.5228535532951355, "learning_rate": 8.060758124162514e-06, "loss": 0.0386, "step": 37465 }, { "epoch": 0.6635055143070923, "grad_norm": 0.4418245255947113, "learning_rate": 8.059995371861159e-06, "loss": 0.0573, "step": 37466 }, { "epoch": 0.6635232238441207, "grad_norm": 0.8470193147659302, "learning_rate": 8.05923264239172e-06, "loss": 0.0836, "step": 37467 }, { "epoch": 0.6635409333811491, "grad_norm": 0.7288259267807007, "learning_rate": 8.058469935756704e-06, "loss": 0.0578, "step": 37468 }, { "epoch": 0.6635586429181776, "grad_norm": 0.9303980469703674, "learning_rate": 8.057707251958617e-06, "loss": 0.0989, "step": 37469 }, { "epoch": 0.663576352455206, "grad_norm": 0.3500344157218933, "learning_rate": 8.056944590999981e-06, "loss": 0.0726, "step": 37470 }, { "epoch": 0.6635940619922344, "grad_norm": 1.1496741771697998, "learning_rate": 8.056181952883286e-06, "loss": 0.0638, "step": 37471 }, { "epoch": 0.6636117715292628, "grad_norm": 0.40808597207069397, "learning_rate": 8.05541933761105e-06, "loss": 0.0773, "step": 37472 }, { "epoch": 0.6636294810662913, "grad_norm": 0.5735756158828735, "learning_rate": 8.05465674518579e-06, "loss": 0.0572, "step": 37473 }, { "epoch": 0.6636471906033197, "grad_norm": 0.9109397530555725, "learning_rate": 8.053894175609997e-06, "loss": 0.0611, "step": 37474 }, { "epoch": 0.6636649001403481, "grad_norm": 0.42459455132484436, "learning_rate": 8.053131628886189e-06, "loss": 0.0565, "step": 37475 }, { "epoch": 0.6636826096773765, "grad_norm": 0.7839967608451843, "learning_rate": 8.052369105016877e-06, "loss": 0.0947, "step": 37476 }, { "epoch": 0.663700319214405, "grad_norm": 0.47764915227890015, "learning_rate": 8.051606604004577e-06, "loss": 0.0272, "step": 37477 }, { "epoch": 0.6637180287514334, "grad_norm": 0.65801602602005, "learning_rate": 8.05084412585178e-06, "loss": 0.0643, "step": 37478 }, { "epoch": 0.6637357382884618, "grad_norm": 0.6431809067726135, "learning_rate": 8.050081670561004e-06, "loss": 0.0488, "step": 37479 }, { "epoch": 0.6637534478254902, "grad_norm": 0.5965665578842163, "learning_rate": 8.049319238134758e-06, "loss": 0.0822, "step": 37480 }, { "epoch": 0.6637711573625187, "grad_norm": 0.4382897913455963, "learning_rate": 8.048556828575551e-06, "loss": 0.0527, "step": 37481 }, { "epoch": 0.6637888668995471, "grad_norm": 0.8256545662879944, "learning_rate": 8.047794441885885e-06, "loss": 0.0678, "step": 37482 }, { "epoch": 0.6638065764365755, "grad_norm": 0.5590458512306213, "learning_rate": 8.04703207806827e-06, "loss": 0.0625, "step": 37483 }, { "epoch": 0.663824285973604, "grad_norm": 0.6955088376998901, "learning_rate": 8.046269737125221e-06, "loss": 0.0518, "step": 37484 }, { "epoch": 0.6638419955106324, "grad_norm": 0.6521773338317871, "learning_rate": 8.04550741905924e-06, "loss": 0.0603, "step": 37485 }, { "epoch": 0.6638597050476608, "grad_norm": 0.17569229006767273, "learning_rate": 8.044745123872833e-06, "loss": 0.0246, "step": 37486 }, { "epoch": 0.6638774145846892, "grad_norm": 0.4856947362422943, "learning_rate": 8.043982851568513e-06, "loss": 0.0584, "step": 37487 }, { "epoch": 0.6638951241217177, "grad_norm": 0.6549028754234314, "learning_rate": 8.043220602148793e-06, "loss": 0.0813, "step": 37488 }, { "epoch": 0.6639128336587461, "grad_norm": 0.4471451938152313, "learning_rate": 8.042458375616164e-06, "loss": 0.0446, "step": 37489 }, { "epoch": 0.6639305431957745, "grad_norm": 0.7013059258460999, "learning_rate": 8.041696171973147e-06, "loss": 0.07, "step": 37490 }, { "epoch": 0.6639482527328029, "grad_norm": 0.5912158489227295, "learning_rate": 8.040933991222251e-06, "loss": 0.0642, "step": 37491 }, { "epoch": 0.6639659622698314, "grad_norm": 0.4749762713909149, "learning_rate": 8.04017183336597e-06, "loss": 0.0194, "step": 37492 }, { "epoch": 0.6639836718068598, "grad_norm": 0.8620157241821289, "learning_rate": 8.039409698406821e-06, "loss": 0.055, "step": 37493 }, { "epoch": 0.6640013813438882, "grad_norm": 0.99211186170578, "learning_rate": 8.03864758634731e-06, "loss": 0.085, "step": 37494 }, { "epoch": 0.6640190908809166, "grad_norm": 0.46794116497039795, "learning_rate": 8.037885497189946e-06, "loss": 0.0608, "step": 37495 }, { "epoch": 0.6640368004179451, "grad_norm": 0.6556431651115417, "learning_rate": 8.037123430937232e-06, "loss": 0.0913, "step": 37496 }, { "epoch": 0.6640545099549735, "grad_norm": 0.6531745195388794, "learning_rate": 8.03636138759168e-06, "loss": 0.1015, "step": 37497 }, { "epoch": 0.6640722194920019, "grad_norm": 0.402567982673645, "learning_rate": 8.0355993671558e-06, "loss": 0.0518, "step": 37498 }, { "epoch": 0.6640899290290304, "grad_norm": 0.5359188914299011, "learning_rate": 8.034837369632087e-06, "loss": 0.0707, "step": 37499 }, { "epoch": 0.6641076385660588, "grad_norm": 0.5630399584770203, "learning_rate": 8.034075395023054e-06, "loss": 0.0528, "step": 37500 }, { "epoch": 0.6641253481030872, "grad_norm": 0.5954763889312744, "learning_rate": 8.03331344333121e-06, "loss": 0.1023, "step": 37501 }, { "epoch": 0.6641430576401156, "grad_norm": 0.4435223937034607, "learning_rate": 8.032551514559066e-06, "loss": 0.0451, "step": 37502 }, { "epoch": 0.6641607671771441, "grad_norm": 0.37676355242729187, "learning_rate": 8.031789608709116e-06, "loss": 0.0401, "step": 37503 }, { "epoch": 0.6641784767141725, "grad_norm": 0.6851885318756104, "learning_rate": 8.031027725783868e-06, "loss": 0.0616, "step": 37504 }, { "epoch": 0.6641961862512009, "grad_norm": 0.6124918460845947, "learning_rate": 8.030265865785846e-06, "loss": 0.0546, "step": 37505 }, { "epoch": 0.6642138957882293, "grad_norm": 0.6876378655433655, "learning_rate": 8.029504028717537e-06, "loss": 0.0462, "step": 37506 }, { "epoch": 0.6642316053252578, "grad_norm": 0.5543712973594666, "learning_rate": 8.028742214581453e-06, "loss": 0.0611, "step": 37507 }, { "epoch": 0.6642493148622862, "grad_norm": 0.8646712303161621, "learning_rate": 8.027980423380107e-06, "loss": 0.102, "step": 37508 }, { "epoch": 0.6642670243993146, "grad_norm": 0.619127631187439, "learning_rate": 8.027218655116002e-06, "loss": 0.0515, "step": 37509 }, { "epoch": 0.664284733936343, "grad_norm": 0.8342938423156738, "learning_rate": 8.026456909791639e-06, "loss": 0.0673, "step": 37510 }, { "epoch": 0.6643024434733715, "grad_norm": 0.5999757051467896, "learning_rate": 8.025695187409527e-06, "loss": 0.0728, "step": 37511 }, { "epoch": 0.6643201530103999, "grad_norm": 0.6348944902420044, "learning_rate": 8.024933487972173e-06, "loss": 0.0568, "step": 37512 }, { "epoch": 0.6643378625474283, "grad_norm": 0.48603716492652893, "learning_rate": 8.024171811482081e-06, "loss": 0.0669, "step": 37513 }, { "epoch": 0.6643555720844568, "grad_norm": 0.498418390750885, "learning_rate": 8.023410157941757e-06, "loss": 0.0495, "step": 37514 }, { "epoch": 0.6643732816214852, "grad_norm": 0.7211073637008667, "learning_rate": 8.02264852735371e-06, "loss": 0.0624, "step": 37515 }, { "epoch": 0.6643909911585136, "grad_norm": 0.46876558661460876, "learning_rate": 8.02188691972045e-06, "loss": 0.0398, "step": 37516 }, { "epoch": 0.664408700695542, "grad_norm": 0.616001546382904, "learning_rate": 8.021125335044469e-06, "loss": 0.0548, "step": 37517 }, { "epoch": 0.6644264102325705, "grad_norm": 0.5448940992355347, "learning_rate": 8.02036377332828e-06, "loss": 0.0622, "step": 37518 }, { "epoch": 0.6644441197695989, "grad_norm": 0.4330146312713623, "learning_rate": 8.01960223457439e-06, "loss": 0.0647, "step": 37519 }, { "epoch": 0.6644618293066273, "grad_norm": 0.5978720784187317, "learning_rate": 8.018840718785304e-06, "loss": 0.0474, "step": 37520 }, { "epoch": 0.6644795388436557, "grad_norm": 0.6855853796005249, "learning_rate": 8.018079225963524e-06, "loss": 0.0651, "step": 37521 }, { "epoch": 0.6644972483806842, "grad_norm": 0.7594494223594666, "learning_rate": 8.017317756111557e-06, "loss": 0.0765, "step": 37522 }, { "epoch": 0.6645149579177126, "grad_norm": 0.3527064621448517, "learning_rate": 8.016556309231907e-06, "loss": 0.0468, "step": 37523 }, { "epoch": 0.664532667454741, "grad_norm": 0.5715122818946838, "learning_rate": 8.01579488532708e-06, "loss": 0.0818, "step": 37524 }, { "epoch": 0.6645503769917694, "grad_norm": 0.41578686237335205, "learning_rate": 8.015033484399583e-06, "loss": 0.0642, "step": 37525 }, { "epoch": 0.664568086528798, "grad_norm": 0.5366174578666687, "learning_rate": 8.014272106451918e-06, "loss": 0.0528, "step": 37526 }, { "epoch": 0.6645857960658264, "grad_norm": 0.6128190159797668, "learning_rate": 8.013510751486599e-06, "loss": 0.061, "step": 37527 }, { "epoch": 0.6646035056028547, "grad_norm": 0.6616562008857727, "learning_rate": 8.012749419506116e-06, "loss": 0.0841, "step": 37528 }, { "epoch": 0.6646212151398833, "grad_norm": 0.7188583016395569, "learning_rate": 8.01198811051298e-06, "loss": 0.0507, "step": 37529 }, { "epoch": 0.6646389246769117, "grad_norm": 0.6766344904899597, "learning_rate": 8.011226824509702e-06, "loss": 0.0583, "step": 37530 }, { "epoch": 0.6646566342139401, "grad_norm": 0.6094910502433777, "learning_rate": 8.010465561498776e-06, "loss": 0.0497, "step": 37531 }, { "epoch": 0.6646743437509685, "grad_norm": 0.3334425687789917, "learning_rate": 8.00970432148271e-06, "loss": 0.0495, "step": 37532 }, { "epoch": 0.664692053287997, "grad_norm": 0.6069051623344421, "learning_rate": 8.008943104464006e-06, "loss": 0.055, "step": 37533 }, { "epoch": 0.6647097628250254, "grad_norm": 0.6964495778083801, "learning_rate": 8.008181910445183e-06, "loss": 0.07, "step": 37534 }, { "epoch": 0.6647274723620538, "grad_norm": 1.023864507675171, "learning_rate": 8.007420739428728e-06, "loss": 0.0873, "step": 37535 }, { "epoch": 0.6647451818990822, "grad_norm": 0.3863474726676941, "learning_rate": 8.006659591417152e-06, "loss": 0.0687, "step": 37536 }, { "epoch": 0.6647628914361107, "grad_norm": 0.39453360438346863, "learning_rate": 8.005898466412963e-06, "loss": 0.0392, "step": 37537 }, { "epoch": 0.6647806009731391, "grad_norm": 0.9748402833938599, "learning_rate": 8.005137364418658e-06, "loss": 0.068, "step": 37538 }, { "epoch": 0.6647983105101675, "grad_norm": 0.35368984937667847, "learning_rate": 8.00437628543674e-06, "loss": 0.045, "step": 37539 }, { "epoch": 0.6648160200471959, "grad_norm": 0.5692508816719055, "learning_rate": 8.00361522946972e-06, "loss": 0.0524, "step": 37540 }, { "epoch": 0.6648337295842244, "grad_norm": 0.6514016389846802, "learning_rate": 8.002854196520096e-06, "loss": 0.07, "step": 37541 }, { "epoch": 0.6648514391212528, "grad_norm": 0.5796782374382019, "learning_rate": 8.002093186590372e-06, "loss": 0.0563, "step": 37542 }, { "epoch": 0.6648691486582812, "grad_norm": 0.66438227891922, "learning_rate": 8.001332199683057e-06, "loss": 0.0688, "step": 37543 }, { "epoch": 0.6648868581953097, "grad_norm": 0.8307287693023682, "learning_rate": 8.00057123580065e-06, "loss": 0.0625, "step": 37544 }, { "epoch": 0.6649045677323381, "grad_norm": 0.9430047273635864, "learning_rate": 7.999810294945661e-06, "loss": 0.0679, "step": 37545 }, { "epoch": 0.6649222772693665, "grad_norm": 0.44427090883255005, "learning_rate": 7.999049377120584e-06, "loss": 0.0512, "step": 37546 }, { "epoch": 0.6649399868063949, "grad_norm": 0.8464030623435974, "learning_rate": 7.998288482327924e-06, "loss": 0.0558, "step": 37547 }, { "epoch": 0.6649576963434234, "grad_norm": 0.43660107254981995, "learning_rate": 7.997527610570195e-06, "loss": 0.0456, "step": 37548 }, { "epoch": 0.6649754058804518, "grad_norm": 0.24842214584350586, "learning_rate": 7.996766761849886e-06, "loss": 0.0408, "step": 37549 }, { "epoch": 0.6649931154174802, "grad_norm": 0.5390738248825073, "learning_rate": 7.996005936169505e-06, "loss": 0.0587, "step": 37550 }, { "epoch": 0.6650108249545086, "grad_norm": 0.7919685244560242, "learning_rate": 7.995245133531555e-06, "loss": 0.11, "step": 37551 }, { "epoch": 0.6650285344915371, "grad_norm": 0.742422342300415, "learning_rate": 7.99448435393854e-06, "loss": 0.0759, "step": 37552 }, { "epoch": 0.6650462440285655, "grad_norm": 0.4566420912742615, "learning_rate": 7.993723597392963e-06, "loss": 0.0856, "step": 37553 }, { "epoch": 0.6650639535655939, "grad_norm": 0.8449131846427917, "learning_rate": 7.99296286389733e-06, "loss": 0.0753, "step": 37554 }, { "epoch": 0.6650816631026223, "grad_norm": 0.5785472989082336, "learning_rate": 7.992202153454142e-06, "loss": 0.0791, "step": 37555 }, { "epoch": 0.6650993726396508, "grad_norm": 0.45198965072631836, "learning_rate": 7.991441466065896e-06, "loss": 0.0544, "step": 37556 }, { "epoch": 0.6651170821766792, "grad_norm": 0.5522359013557434, "learning_rate": 7.990680801735099e-06, "loss": 0.0641, "step": 37557 }, { "epoch": 0.6651347917137076, "grad_norm": 0.4490451514720917, "learning_rate": 7.989920160464253e-06, "loss": 0.0381, "step": 37558 }, { "epoch": 0.6651525012507361, "grad_norm": 0.585321843624115, "learning_rate": 7.989159542255869e-06, "loss": 0.0598, "step": 37559 }, { "epoch": 0.6651702107877645, "grad_norm": 1.0869799852371216, "learning_rate": 7.988398947112432e-06, "loss": 0.0689, "step": 37560 }, { "epoch": 0.6651879203247929, "grad_norm": 0.7161285281181335, "learning_rate": 7.987638375036453e-06, "loss": 0.0567, "step": 37561 }, { "epoch": 0.6652056298618213, "grad_norm": 0.5190578699111938, "learning_rate": 7.986877826030437e-06, "loss": 0.0567, "step": 37562 }, { "epoch": 0.6652233393988498, "grad_norm": 0.21595092117786407, "learning_rate": 7.986117300096883e-06, "loss": 0.0343, "step": 37563 }, { "epoch": 0.6652410489358782, "grad_norm": 0.38372740149497986, "learning_rate": 7.985356797238292e-06, "loss": 0.0688, "step": 37564 }, { "epoch": 0.6652587584729066, "grad_norm": 0.5823089480400085, "learning_rate": 7.984596317457168e-06, "loss": 0.0618, "step": 37565 }, { "epoch": 0.665276468009935, "grad_norm": 0.19976629316806793, "learning_rate": 7.98383586075602e-06, "loss": 0.0447, "step": 37566 }, { "epoch": 0.6652941775469635, "grad_norm": 0.5391286611557007, "learning_rate": 7.983075427137334e-06, "loss": 0.0569, "step": 37567 }, { "epoch": 0.6653118870839919, "grad_norm": 0.7190254926681519, "learning_rate": 7.982315016603624e-06, "loss": 0.0605, "step": 37568 }, { "epoch": 0.6653295966210203, "grad_norm": 0.6068156361579895, "learning_rate": 7.981554629157388e-06, "loss": 0.0661, "step": 37569 }, { "epoch": 0.6653473061580487, "grad_norm": 0.7829208970069885, "learning_rate": 7.98079426480112e-06, "loss": 0.0964, "step": 37570 }, { "epoch": 0.6653650156950772, "grad_norm": 0.3592056334018707, "learning_rate": 7.980033923537332e-06, "loss": 0.0637, "step": 37571 }, { "epoch": 0.6653827252321056, "grad_norm": 0.5633084774017334, "learning_rate": 7.979273605368526e-06, "loss": 0.0609, "step": 37572 }, { "epoch": 0.665400434769134, "grad_norm": 0.5746201872825623, "learning_rate": 7.978513310297204e-06, "loss": 0.054, "step": 37573 }, { "epoch": 0.6654181443061625, "grad_norm": 0.6198919415473938, "learning_rate": 7.977753038325857e-06, "loss": 0.046, "step": 37574 }, { "epoch": 0.6654358538431909, "grad_norm": 0.6001875400543213, "learning_rate": 7.976992789456992e-06, "loss": 0.0534, "step": 37575 }, { "epoch": 0.6654535633802193, "grad_norm": 0.370261013507843, "learning_rate": 7.976232563693114e-06, "loss": 0.0578, "step": 37576 }, { "epoch": 0.6654712729172477, "grad_norm": 0.906051754951477, "learning_rate": 7.975472361036718e-06, "loss": 0.0694, "step": 37577 }, { "epoch": 0.6654889824542762, "grad_norm": 0.3454703986644745, "learning_rate": 7.974712181490305e-06, "loss": 0.0676, "step": 37578 }, { "epoch": 0.6655066919913046, "grad_norm": 0.48679986596107483, "learning_rate": 7.97395202505638e-06, "loss": 0.045, "step": 37579 }, { "epoch": 0.665524401528333, "grad_norm": 0.7623251676559448, "learning_rate": 7.973191891737443e-06, "loss": 0.0647, "step": 37580 }, { "epoch": 0.6655421110653614, "grad_norm": 0.742279589176178, "learning_rate": 7.972431781535994e-06, "loss": 0.0565, "step": 37581 }, { "epoch": 0.6655598206023899, "grad_norm": 0.8185468316078186, "learning_rate": 7.971671694454532e-06, "loss": 0.0743, "step": 37582 }, { "epoch": 0.6655775301394183, "grad_norm": 0.5003605484962463, "learning_rate": 7.97091163049556e-06, "loss": 0.0599, "step": 37583 }, { "epoch": 0.6655952396764467, "grad_norm": 0.7352645993232727, "learning_rate": 7.970151589661581e-06, "loss": 0.0658, "step": 37584 }, { "epoch": 0.6656129492134751, "grad_norm": 0.6111546754837036, "learning_rate": 7.96939157195509e-06, "loss": 0.0871, "step": 37585 }, { "epoch": 0.6656306587505036, "grad_norm": 0.35792672634124756, "learning_rate": 7.968631577378587e-06, "loss": 0.0648, "step": 37586 }, { "epoch": 0.665648368287532, "grad_norm": 0.6862797737121582, "learning_rate": 7.967871605934585e-06, "loss": 0.0671, "step": 37587 }, { "epoch": 0.6656660778245604, "grad_norm": 0.8510613441467285, "learning_rate": 7.967111657625564e-06, "loss": 0.0545, "step": 37588 }, { "epoch": 0.665683787361589, "grad_norm": 0.5381543040275574, "learning_rate": 7.966351732454036e-06, "loss": 0.0393, "step": 37589 }, { "epoch": 0.6657014968986174, "grad_norm": 0.7154479026794434, "learning_rate": 7.965591830422498e-06, "loss": 0.0783, "step": 37590 }, { "epoch": 0.6657192064356457, "grad_norm": 0.32081520557403564, "learning_rate": 7.964831951533454e-06, "loss": 0.0487, "step": 37591 }, { "epoch": 0.6657369159726741, "grad_norm": 0.421230286359787, "learning_rate": 7.964072095789401e-06, "loss": 0.0731, "step": 37592 }, { "epoch": 0.6657546255097027, "grad_norm": 0.7426518797874451, "learning_rate": 7.963312263192837e-06, "loss": 0.0626, "step": 37593 }, { "epoch": 0.6657723350467311, "grad_norm": 0.7407477498054504, "learning_rate": 7.96255245374627e-06, "loss": 0.0757, "step": 37594 }, { "epoch": 0.6657900445837595, "grad_norm": 0.5991421341896057, "learning_rate": 7.96179266745219e-06, "loss": 0.0756, "step": 37595 }, { "epoch": 0.6658077541207879, "grad_norm": 0.5751538276672363, "learning_rate": 7.9610329043131e-06, "loss": 0.0763, "step": 37596 }, { "epoch": 0.6658254636578164, "grad_norm": 0.935512125492096, "learning_rate": 7.960273164331497e-06, "loss": 0.0505, "step": 37597 }, { "epoch": 0.6658431731948448, "grad_norm": 0.22353263199329376, "learning_rate": 7.959513447509893e-06, "loss": 0.0368, "step": 37598 }, { "epoch": 0.6658608827318732, "grad_norm": 0.48532813787460327, "learning_rate": 7.958753753850762e-06, "loss": 0.0567, "step": 37599 }, { "epoch": 0.6658785922689016, "grad_norm": 0.8010048270225525, "learning_rate": 7.957994083356627e-06, "loss": 0.0582, "step": 37600 }, { "epoch": 0.6658963018059301, "grad_norm": 0.6851275563240051, "learning_rate": 7.957234436029983e-06, "loss": 0.0673, "step": 37601 }, { "epoch": 0.6659140113429585, "grad_norm": 0.8354924917221069, "learning_rate": 7.956474811873322e-06, "loss": 0.0695, "step": 37602 }, { "epoch": 0.6659317208799869, "grad_norm": 0.7869113683700562, "learning_rate": 7.955715210889146e-06, "loss": 0.0716, "step": 37603 }, { "epoch": 0.6659494304170154, "grad_norm": 0.2119700163602829, "learning_rate": 7.954955633079953e-06, "loss": 0.0269, "step": 37604 }, { "epoch": 0.6659671399540438, "grad_norm": 0.6235274076461792, "learning_rate": 7.954196078448251e-06, "loss": 0.029, "step": 37605 }, { "epoch": 0.6659848494910722, "grad_norm": 0.6994972229003906, "learning_rate": 7.953436546996526e-06, "loss": 0.0581, "step": 37606 }, { "epoch": 0.6660025590281006, "grad_norm": 0.807809591293335, "learning_rate": 7.952677038727278e-06, "loss": 0.0628, "step": 37607 }, { "epoch": 0.6660202685651291, "grad_norm": 0.5139956474304199, "learning_rate": 7.951917553643014e-06, "loss": 0.0486, "step": 37608 }, { "epoch": 0.6660379781021575, "grad_norm": 0.7242211103439331, "learning_rate": 7.951158091746226e-06, "loss": 0.0541, "step": 37609 }, { "epoch": 0.6660556876391859, "grad_norm": 0.9789085984230042, "learning_rate": 7.950398653039416e-06, "loss": 0.0942, "step": 37610 }, { "epoch": 0.6660733971762143, "grad_norm": 0.3498099148273468, "learning_rate": 7.949639237525082e-06, "loss": 0.0309, "step": 37611 }, { "epoch": 0.6660911067132428, "grad_norm": 0.7179504036903381, "learning_rate": 7.948879845205726e-06, "loss": 0.0754, "step": 37612 }, { "epoch": 0.6661088162502712, "grad_norm": 0.5363633632659912, "learning_rate": 7.948120476083837e-06, "loss": 0.0586, "step": 37613 }, { "epoch": 0.6661265257872996, "grad_norm": 0.5502007007598877, "learning_rate": 7.94736113016192e-06, "loss": 0.0534, "step": 37614 }, { "epoch": 0.666144235324328, "grad_norm": 0.45703601837158203, "learning_rate": 7.946601807442469e-06, "loss": 0.0937, "step": 37615 }, { "epoch": 0.6661619448613565, "grad_norm": 0.6405919790267944, "learning_rate": 7.945842507927992e-06, "loss": 0.0656, "step": 37616 }, { "epoch": 0.6661796543983849, "grad_norm": 0.713421642780304, "learning_rate": 7.945083231620973e-06, "loss": 0.0801, "step": 37617 }, { "epoch": 0.6661973639354133, "grad_norm": 0.644913375377655, "learning_rate": 7.944323978523917e-06, "loss": 0.0601, "step": 37618 }, { "epoch": 0.6662150734724418, "grad_norm": 0.5311557650566101, "learning_rate": 7.943564748639321e-06, "loss": 0.075, "step": 37619 }, { "epoch": 0.6662327830094702, "grad_norm": 1.0354574918746948, "learning_rate": 7.942805541969684e-06, "loss": 0.0928, "step": 37620 }, { "epoch": 0.6662504925464986, "grad_norm": 0.645384669303894, "learning_rate": 7.942046358517503e-06, "loss": 0.039, "step": 37621 }, { "epoch": 0.666268202083527, "grad_norm": 0.5002997517585754, "learning_rate": 7.941287198285276e-06, "loss": 0.0616, "step": 37622 }, { "epoch": 0.6662859116205555, "grad_norm": 0.876814067363739, "learning_rate": 7.940528061275505e-06, "loss": 0.0674, "step": 37623 }, { "epoch": 0.6663036211575839, "grad_norm": 0.5259138941764832, "learning_rate": 7.939768947490676e-06, "loss": 0.0745, "step": 37624 }, { "epoch": 0.6663213306946123, "grad_norm": 0.3319261074066162, "learning_rate": 7.939009856933295e-06, "loss": 0.0535, "step": 37625 }, { "epoch": 0.6663390402316407, "grad_norm": 0.6377061009407043, "learning_rate": 7.938250789605863e-06, "loss": 0.0613, "step": 37626 }, { "epoch": 0.6663567497686692, "grad_norm": 0.5146042108535767, "learning_rate": 7.937491745510865e-06, "loss": 0.0504, "step": 37627 }, { "epoch": 0.6663744593056976, "grad_norm": 0.8270146250724792, "learning_rate": 7.936732724650799e-06, "loss": 0.0816, "step": 37628 }, { "epoch": 0.666392168842726, "grad_norm": 0.11374662071466446, "learning_rate": 7.935973727028175e-06, "loss": 0.0486, "step": 37629 }, { "epoch": 0.6664098783797544, "grad_norm": 0.8588399291038513, "learning_rate": 7.935214752645487e-06, "loss": 0.0863, "step": 37630 }, { "epoch": 0.6664275879167829, "grad_norm": 0.43170249462127686, "learning_rate": 7.934455801505223e-06, "loss": 0.0479, "step": 37631 }, { "epoch": 0.6664452974538113, "grad_norm": 0.4048973619937897, "learning_rate": 7.933696873609884e-06, "loss": 0.0384, "step": 37632 }, { "epoch": 0.6664630069908397, "grad_norm": 0.7753552198410034, "learning_rate": 7.932937968961974e-06, "loss": 0.0908, "step": 37633 }, { "epoch": 0.6664807165278682, "grad_norm": 0.6761422753334045, "learning_rate": 7.932179087563978e-06, "loss": 0.0713, "step": 37634 }, { "epoch": 0.6664984260648966, "grad_norm": 1.247786283493042, "learning_rate": 7.931420229418399e-06, "loss": 0.1091, "step": 37635 }, { "epoch": 0.666516135601925, "grad_norm": 0.8175731301307678, "learning_rate": 7.93066139452773e-06, "loss": 0.0683, "step": 37636 }, { "epoch": 0.6665338451389534, "grad_norm": 0.3857707679271698, "learning_rate": 7.92990258289447e-06, "loss": 0.0636, "step": 37637 }, { "epoch": 0.6665515546759819, "grad_norm": 0.6953296065330505, "learning_rate": 7.929143794521117e-06, "loss": 0.0746, "step": 37638 }, { "epoch": 0.6665692642130103, "grad_norm": 0.4292019009590149, "learning_rate": 7.928385029410164e-06, "loss": 0.0393, "step": 37639 }, { "epoch": 0.6665869737500387, "grad_norm": 0.7070026397705078, "learning_rate": 7.927626287564117e-06, "loss": 0.074, "step": 37640 }, { "epoch": 0.6666046832870671, "grad_norm": 0.5715380311012268, "learning_rate": 7.926867568985458e-06, "loss": 0.0365, "step": 37641 }, { "epoch": 0.6666223928240956, "grad_norm": 0.5471308827400208, "learning_rate": 7.926108873676688e-06, "loss": 0.0642, "step": 37642 }, { "epoch": 0.666640102361124, "grad_norm": 0.6853353977203369, "learning_rate": 7.925350201640305e-06, "loss": 0.0665, "step": 37643 }, { "epoch": 0.6666578118981524, "grad_norm": 0.3602122664451599, "learning_rate": 7.92459155287881e-06, "loss": 0.0471, "step": 37644 }, { "epoch": 0.6666755214351808, "grad_norm": 0.8562238216400146, "learning_rate": 7.923832927394686e-06, "loss": 0.0627, "step": 37645 }, { "epoch": 0.6666932309722093, "grad_norm": 0.6112303733825684, "learning_rate": 7.923074325190439e-06, "loss": 0.0536, "step": 37646 }, { "epoch": 0.6667109405092377, "grad_norm": 0.7349247932434082, "learning_rate": 7.922315746268559e-06, "loss": 0.0867, "step": 37647 }, { "epoch": 0.6667286500462661, "grad_norm": 0.5868639945983887, "learning_rate": 7.921557190631546e-06, "loss": 0.0439, "step": 37648 }, { "epoch": 0.6667463595832946, "grad_norm": 0.782975435256958, "learning_rate": 7.920798658281892e-06, "loss": 0.0705, "step": 37649 }, { "epoch": 0.666764069120323, "grad_norm": 1.0609771013259888, "learning_rate": 7.920040149222097e-06, "loss": 0.0867, "step": 37650 }, { "epoch": 0.6667817786573514, "grad_norm": 0.4687315821647644, "learning_rate": 7.919281663454657e-06, "loss": 0.0675, "step": 37651 }, { "epoch": 0.6667994881943798, "grad_norm": 0.356519877910614, "learning_rate": 7.91852320098206e-06, "loss": 0.0329, "step": 37652 }, { "epoch": 0.6668171977314084, "grad_norm": 0.7916883230209351, "learning_rate": 7.917764761806807e-06, "loss": 0.063, "step": 37653 }, { "epoch": 0.6668349072684367, "grad_norm": 0.47208172082901, "learning_rate": 7.917006345931388e-06, "loss": 0.0544, "step": 37654 }, { "epoch": 0.6668526168054651, "grad_norm": 0.7055436372756958, "learning_rate": 7.91624795335831e-06, "loss": 0.0646, "step": 37655 }, { "epoch": 0.6668703263424935, "grad_norm": 0.7443931102752686, "learning_rate": 7.915489584090053e-06, "loss": 0.0544, "step": 37656 }, { "epoch": 0.6668880358795221, "grad_norm": 0.7773980498313904, "learning_rate": 7.914731238129112e-06, "loss": 0.0588, "step": 37657 }, { "epoch": 0.6669057454165505, "grad_norm": 0.4696180820465088, "learning_rate": 7.913972915478003e-06, "loss": 0.0572, "step": 37658 }, { "epoch": 0.6669234549535789, "grad_norm": 0.7157880067825317, "learning_rate": 7.913214616139198e-06, "loss": 0.0572, "step": 37659 }, { "epoch": 0.6669411644906073, "grad_norm": 0.6989046335220337, "learning_rate": 7.912456340115201e-06, "loss": 0.0735, "step": 37660 }, { "epoch": 0.6669588740276358, "grad_norm": 0.7415664196014404, "learning_rate": 7.911698087408505e-06, "loss": 0.0556, "step": 37661 }, { "epoch": 0.6669765835646642, "grad_norm": 0.7290847897529602, "learning_rate": 7.910939858021612e-06, "loss": 0.0524, "step": 37662 }, { "epoch": 0.6669942931016926, "grad_norm": 0.45476460456848145, "learning_rate": 7.910181651957005e-06, "loss": 0.0697, "step": 37663 }, { "epoch": 0.6670120026387211, "grad_norm": 1.0491772890090942, "learning_rate": 7.909423469217181e-06, "loss": 0.0912, "step": 37664 }, { "epoch": 0.6670297121757495, "grad_norm": 0.7445907592773438, "learning_rate": 7.908665309804637e-06, "loss": 0.0876, "step": 37665 }, { "epoch": 0.6670474217127779, "grad_norm": 0.6937467455863953, "learning_rate": 7.907907173721868e-06, "loss": 0.0318, "step": 37666 }, { "epoch": 0.6670651312498063, "grad_norm": 0.6598812341690063, "learning_rate": 7.907149060971368e-06, "loss": 0.0628, "step": 37667 }, { "epoch": 0.6670828407868348, "grad_norm": 0.5030218362808228, "learning_rate": 7.906390971555627e-06, "loss": 0.0411, "step": 37668 }, { "epoch": 0.6671005503238632, "grad_norm": 0.3799062967300415, "learning_rate": 7.90563290547715e-06, "loss": 0.0574, "step": 37669 }, { "epoch": 0.6671182598608916, "grad_norm": 0.9140811562538147, "learning_rate": 7.904874862738419e-06, "loss": 0.0569, "step": 37670 }, { "epoch": 0.66713596939792, "grad_norm": 0.9162504076957703, "learning_rate": 7.904116843341932e-06, "loss": 0.0851, "step": 37671 }, { "epoch": 0.6671536789349485, "grad_norm": 0.3681843876838684, "learning_rate": 7.903358847290186e-06, "loss": 0.0434, "step": 37672 }, { "epoch": 0.6671713884719769, "grad_norm": 0.7267318964004517, "learning_rate": 7.902600874585666e-06, "loss": 0.0883, "step": 37673 }, { "epoch": 0.6671890980090053, "grad_norm": 0.37233346700668335, "learning_rate": 7.90184292523087e-06, "loss": 0.0497, "step": 37674 }, { "epoch": 0.6672068075460337, "grad_norm": 0.28721246123313904, "learning_rate": 7.901084999228297e-06, "loss": 0.0339, "step": 37675 }, { "epoch": 0.6672245170830622, "grad_norm": 0.5295230150222778, "learning_rate": 7.900327096580435e-06, "loss": 0.0693, "step": 37676 }, { "epoch": 0.6672422266200906, "grad_norm": 0.5675286054611206, "learning_rate": 7.899569217289777e-06, "loss": 0.0572, "step": 37677 }, { "epoch": 0.667259936157119, "grad_norm": 0.8369609713554382, "learning_rate": 7.898811361358819e-06, "loss": 0.0486, "step": 37678 }, { "epoch": 0.6672776456941475, "grad_norm": 0.3878859877586365, "learning_rate": 7.89805352879006e-06, "loss": 0.0645, "step": 37679 }, { "epoch": 0.6672953552311759, "grad_norm": 0.5515313148498535, "learning_rate": 7.89729571958598e-06, "loss": 0.0744, "step": 37680 }, { "epoch": 0.6673130647682043, "grad_norm": 0.974006712436676, "learning_rate": 7.896537933749078e-06, "loss": 0.0951, "step": 37681 }, { "epoch": 0.6673307743052327, "grad_norm": 0.684174120426178, "learning_rate": 7.895780171281851e-06, "loss": 0.0801, "step": 37682 }, { "epoch": 0.6673484838422612, "grad_norm": 0.7988206744194031, "learning_rate": 7.895022432186791e-06, "loss": 0.0569, "step": 37683 }, { "epoch": 0.6673661933792896, "grad_norm": 0.7335145473480225, "learning_rate": 7.894264716466384e-06, "loss": 0.0714, "step": 37684 }, { "epoch": 0.667383902916318, "grad_norm": 0.8407557606697083, "learning_rate": 7.893507024123126e-06, "loss": 0.0807, "step": 37685 }, { "epoch": 0.6674016124533464, "grad_norm": 0.8104731440544128, "learning_rate": 7.892749355159508e-06, "loss": 0.0638, "step": 37686 }, { "epoch": 0.6674193219903749, "grad_norm": 1.096960186958313, "learning_rate": 7.891991709578035e-06, "loss": 0.0787, "step": 37687 }, { "epoch": 0.6674370315274033, "grad_norm": 0.7020661234855652, "learning_rate": 7.891234087381186e-06, "loss": 0.0653, "step": 37688 }, { "epoch": 0.6674547410644317, "grad_norm": 0.5697680711746216, "learning_rate": 7.890476488571456e-06, "loss": 0.0501, "step": 37689 }, { "epoch": 0.6674724506014601, "grad_norm": 0.6726093292236328, "learning_rate": 7.889718913151347e-06, "loss": 0.0807, "step": 37690 }, { "epoch": 0.6674901601384886, "grad_norm": 0.7464416027069092, "learning_rate": 7.888961361123339e-06, "loss": 0.0626, "step": 37691 }, { "epoch": 0.667507869675517, "grad_norm": 0.7913680672645569, "learning_rate": 7.888203832489925e-06, "loss": 0.0881, "step": 37692 }, { "epoch": 0.6675255792125454, "grad_norm": 0.519019365310669, "learning_rate": 7.887446327253604e-06, "loss": 0.0712, "step": 37693 }, { "epoch": 0.6675432887495739, "grad_norm": 0.24616658687591553, "learning_rate": 7.886688845416865e-06, "loss": 0.0699, "step": 37694 }, { "epoch": 0.6675609982866023, "grad_norm": 0.8103781342506409, "learning_rate": 7.885931386982201e-06, "loss": 0.0822, "step": 37695 }, { "epoch": 0.6675787078236307, "grad_norm": 0.5884706377983093, "learning_rate": 7.885173951952101e-06, "loss": 0.0755, "step": 37696 }, { "epoch": 0.6675964173606591, "grad_norm": 0.4097345769405365, "learning_rate": 7.884416540329064e-06, "loss": 0.0579, "step": 37697 }, { "epoch": 0.6676141268976876, "grad_norm": 0.7321420311927795, "learning_rate": 7.883659152115574e-06, "loss": 0.0602, "step": 37698 }, { "epoch": 0.667631836434716, "grad_norm": 0.5458633899688721, "learning_rate": 7.882901787314124e-06, "loss": 0.0585, "step": 37699 }, { "epoch": 0.6676495459717444, "grad_norm": 1.0104864835739136, "learning_rate": 7.882144445927208e-06, "loss": 0.109, "step": 37700 }, { "epoch": 0.6676672555087728, "grad_norm": 0.7368209362030029, "learning_rate": 7.881387127957322e-06, "loss": 0.0659, "step": 37701 }, { "epoch": 0.6676849650458013, "grad_norm": 0.49283620715141296, "learning_rate": 7.880629833406946e-06, "loss": 0.0305, "step": 37702 }, { "epoch": 0.6677026745828297, "grad_norm": 0.6116262078285217, "learning_rate": 7.879872562278579e-06, "loss": 0.0518, "step": 37703 }, { "epoch": 0.6677203841198581, "grad_norm": 0.45923280715942383, "learning_rate": 7.879115314574711e-06, "loss": 0.0512, "step": 37704 }, { "epoch": 0.6677380936568865, "grad_norm": 0.6970727443695068, "learning_rate": 7.878358090297833e-06, "loss": 0.0544, "step": 37705 }, { "epoch": 0.667755803193915, "grad_norm": 0.7192044854164124, "learning_rate": 7.877600889450435e-06, "loss": 0.0657, "step": 37706 }, { "epoch": 0.6677735127309434, "grad_norm": 0.6043837666511536, "learning_rate": 7.876843712035013e-06, "loss": 0.0629, "step": 37707 }, { "epoch": 0.6677912222679718, "grad_norm": 0.7099403142929077, "learning_rate": 7.87608655805406e-06, "loss": 0.0627, "step": 37708 }, { "epoch": 0.6678089318050003, "grad_norm": 0.6064301133155823, "learning_rate": 7.875329427510053e-06, "loss": 0.0659, "step": 37709 }, { "epoch": 0.6678266413420287, "grad_norm": 0.44884374737739563, "learning_rate": 7.874572320405494e-06, "loss": 0.0534, "step": 37710 }, { "epoch": 0.6678443508790571, "grad_norm": 0.6150496602058411, "learning_rate": 7.873815236742875e-06, "loss": 0.0509, "step": 37711 }, { "epoch": 0.6678620604160855, "grad_norm": 0.3341151773929596, "learning_rate": 7.87305817652468e-06, "loss": 0.0392, "step": 37712 }, { "epoch": 0.667879769953114, "grad_norm": 0.4513460099697113, "learning_rate": 7.872301139753401e-06, "loss": 0.041, "step": 37713 }, { "epoch": 0.6678974794901424, "grad_norm": 0.6342617869377136, "learning_rate": 7.871544126431532e-06, "loss": 0.1, "step": 37714 }, { "epoch": 0.6679151890271708, "grad_norm": 0.5495724678039551, "learning_rate": 7.87078713656156e-06, "loss": 0.0516, "step": 37715 }, { "epoch": 0.6679328985641992, "grad_norm": 0.40027645230293274, "learning_rate": 7.870030170145978e-06, "loss": 0.0673, "step": 37716 }, { "epoch": 0.6679506081012277, "grad_norm": 0.7461829781532288, "learning_rate": 7.869273227187274e-06, "loss": 0.0458, "step": 37717 }, { "epoch": 0.6679683176382561, "grad_norm": 1.2577382326126099, "learning_rate": 7.868516307687943e-06, "loss": 0.0478, "step": 37718 }, { "epoch": 0.6679860271752845, "grad_norm": 0.7461392283439636, "learning_rate": 7.867759411650478e-06, "loss": 0.0516, "step": 37719 }, { "epoch": 0.668003736712313, "grad_norm": 0.6313902139663696, "learning_rate": 7.867002539077353e-06, "loss": 0.0566, "step": 37720 }, { "epoch": 0.6680214462493415, "grad_norm": 0.7209183573722839, "learning_rate": 7.866245689971071e-06, "loss": 0.0628, "step": 37721 }, { "epoch": 0.6680391557863699, "grad_norm": 0.43953824043273926, "learning_rate": 7.865488864334124e-06, "loss": 0.0362, "step": 37722 }, { "epoch": 0.6680568653233983, "grad_norm": 0.49318426847457886, "learning_rate": 7.864732062168988e-06, "loss": 0.0625, "step": 37723 }, { "epoch": 0.6680745748604268, "grad_norm": 0.719073474407196, "learning_rate": 7.863975283478164e-06, "loss": 0.0527, "step": 37724 }, { "epoch": 0.6680922843974552, "grad_norm": 0.2836608588695526, "learning_rate": 7.863218528264141e-06, "loss": 0.0652, "step": 37725 }, { "epoch": 0.6681099939344836, "grad_norm": 0.7577429413795471, "learning_rate": 7.862461796529414e-06, "loss": 0.0608, "step": 37726 }, { "epoch": 0.668127703471512, "grad_norm": 0.23704147338867188, "learning_rate": 7.861705088276459e-06, "loss": 0.0579, "step": 37727 }, { "epoch": 0.6681454130085405, "grad_norm": 0.3758569359779358, "learning_rate": 7.860948403507772e-06, "loss": 0.0446, "step": 37728 }, { "epoch": 0.6681631225455689, "grad_norm": 0.5869010090827942, "learning_rate": 7.86019174222585e-06, "loss": 0.0716, "step": 37729 }, { "epoch": 0.6681808320825973, "grad_norm": 0.5055882334709167, "learning_rate": 7.859435104433168e-06, "loss": 0.0294, "step": 37730 }, { "epoch": 0.6681985416196257, "grad_norm": 0.4424251616001129, "learning_rate": 7.858678490132221e-06, "loss": 0.0337, "step": 37731 }, { "epoch": 0.6682162511566542, "grad_norm": 0.8959630727767944, "learning_rate": 7.857921899325503e-06, "loss": 0.1101, "step": 37732 }, { "epoch": 0.6682339606936826, "grad_norm": 0.6525548696517944, "learning_rate": 7.857165332015498e-06, "loss": 0.0796, "step": 37733 }, { "epoch": 0.668251670230711, "grad_norm": 0.7423349618911743, "learning_rate": 7.856408788204695e-06, "loss": 0.0843, "step": 37734 }, { "epoch": 0.6682693797677394, "grad_norm": 0.5004804730415344, "learning_rate": 7.855652267895586e-06, "loss": 0.0635, "step": 37735 }, { "epoch": 0.6682870893047679, "grad_norm": 0.58451247215271, "learning_rate": 7.854895771090665e-06, "loss": 0.0543, "step": 37736 }, { "epoch": 0.6683047988417963, "grad_norm": 0.6525775194168091, "learning_rate": 7.854139297792405e-06, "loss": 0.054, "step": 37737 }, { "epoch": 0.6683225083788247, "grad_norm": 0.5759454965591431, "learning_rate": 7.853382848003307e-06, "loss": 0.0495, "step": 37738 }, { "epoch": 0.6683402179158532, "grad_norm": 0.6675001382827759, "learning_rate": 7.852626421725856e-06, "loss": 0.0893, "step": 37739 }, { "epoch": 0.6683579274528816, "grad_norm": 0.4815869927406311, "learning_rate": 7.851870018962548e-06, "loss": 0.0456, "step": 37740 }, { "epoch": 0.66837563698991, "grad_norm": 1.010439395904541, "learning_rate": 7.851113639715857e-06, "loss": 0.0551, "step": 37741 }, { "epoch": 0.6683933465269384, "grad_norm": 0.44094473123550415, "learning_rate": 7.85035728398828e-06, "loss": 0.0525, "step": 37742 }, { "epoch": 0.6684110560639669, "grad_norm": 0.8771231770515442, "learning_rate": 7.849600951782303e-06, "loss": 0.0527, "step": 37743 }, { "epoch": 0.6684287656009953, "grad_norm": 0.39870285987854004, "learning_rate": 7.848844643100416e-06, "loss": 0.053, "step": 37744 }, { "epoch": 0.6684464751380237, "grad_norm": 0.8625189661979675, "learning_rate": 7.84808835794511e-06, "loss": 0.056, "step": 37745 }, { "epoch": 0.6684641846750521, "grad_norm": 0.7721062302589417, "learning_rate": 7.847332096318867e-06, "loss": 0.0526, "step": 37746 }, { "epoch": 0.6684818942120806, "grad_norm": 0.7298009991645813, "learning_rate": 7.846575858224184e-06, "loss": 0.0734, "step": 37747 }, { "epoch": 0.668499603749109, "grad_norm": 0.5361248850822449, "learning_rate": 7.845819643663537e-06, "loss": 0.0612, "step": 37748 }, { "epoch": 0.6685173132861374, "grad_norm": 0.6556277275085449, "learning_rate": 7.845063452639422e-06, "loss": 0.0515, "step": 37749 }, { "epoch": 0.6685350228231658, "grad_norm": 0.47450077533721924, "learning_rate": 7.844307285154327e-06, "loss": 0.0494, "step": 37750 }, { "epoch": 0.6685527323601943, "grad_norm": 0.8024638891220093, "learning_rate": 7.843551141210733e-06, "loss": 0.067, "step": 37751 }, { "epoch": 0.6685704418972227, "grad_norm": 0.27960795164108276, "learning_rate": 7.842795020811127e-06, "loss": 0.0512, "step": 37752 }, { "epoch": 0.6685881514342511, "grad_norm": 0.4057724177837372, "learning_rate": 7.842038923958006e-06, "loss": 0.05, "step": 37753 }, { "epoch": 0.6686058609712796, "grad_norm": 0.42498862743377686, "learning_rate": 7.841282850653859e-06, "loss": 0.063, "step": 37754 }, { "epoch": 0.668623570508308, "grad_norm": 0.45986008644104004, "learning_rate": 7.840526800901161e-06, "loss": 0.0575, "step": 37755 }, { "epoch": 0.6686412800453364, "grad_norm": 0.7587529420852661, "learning_rate": 7.839770774702405e-06, "loss": 0.0655, "step": 37756 }, { "epoch": 0.6686589895823648, "grad_norm": 0.6503382921218872, "learning_rate": 7.839014772060083e-06, "loss": 0.0777, "step": 37757 }, { "epoch": 0.6686766991193933, "grad_norm": 0.45401275157928467, "learning_rate": 7.838258792976681e-06, "loss": 0.0668, "step": 37758 }, { "epoch": 0.6686944086564217, "grad_norm": 0.417868971824646, "learning_rate": 7.837502837454678e-06, "loss": 0.0603, "step": 37759 }, { "epoch": 0.6687121181934501, "grad_norm": 1.009251594543457, "learning_rate": 7.836746905496564e-06, "loss": 0.0542, "step": 37760 }, { "epoch": 0.6687298277304785, "grad_norm": 0.5645579695701599, "learning_rate": 7.83599099710483e-06, "loss": 0.0684, "step": 37761 }, { "epoch": 0.668747537267507, "grad_norm": 0.8064829707145691, "learning_rate": 7.835235112281964e-06, "loss": 0.0731, "step": 37762 }, { "epoch": 0.6687652468045354, "grad_norm": 0.7819887399673462, "learning_rate": 7.834479251030445e-06, "loss": 0.0708, "step": 37763 }, { "epoch": 0.6687829563415638, "grad_norm": 0.5633701086044312, "learning_rate": 7.833723413352768e-06, "loss": 0.0702, "step": 37764 }, { "epoch": 0.6688006658785922, "grad_norm": 0.8204352259635925, "learning_rate": 7.83296759925142e-06, "loss": 0.0587, "step": 37765 }, { "epoch": 0.6688183754156207, "grad_norm": 0.8518983125686646, "learning_rate": 7.832211808728877e-06, "loss": 0.0736, "step": 37766 }, { "epoch": 0.6688360849526491, "grad_norm": 0.821215808391571, "learning_rate": 7.831456041787634e-06, "loss": 0.0672, "step": 37767 }, { "epoch": 0.6688537944896775, "grad_norm": 0.51678866147995, "learning_rate": 7.830700298430181e-06, "loss": 0.0545, "step": 37768 }, { "epoch": 0.668871504026706, "grad_norm": 0.6554126739501953, "learning_rate": 7.829944578658992e-06, "loss": 0.0408, "step": 37769 }, { "epoch": 0.6688892135637344, "grad_norm": 1.0207397937774658, "learning_rate": 7.829188882476562e-06, "loss": 0.1214, "step": 37770 }, { "epoch": 0.6689069231007628, "grad_norm": 0.5605342984199524, "learning_rate": 7.828433209885376e-06, "loss": 0.0628, "step": 37771 }, { "epoch": 0.6689246326377912, "grad_norm": 0.44621792435646057, "learning_rate": 7.827677560887915e-06, "loss": 0.0404, "step": 37772 }, { "epoch": 0.6689423421748197, "grad_norm": 0.7726099491119385, "learning_rate": 7.826921935486672e-06, "loss": 0.0839, "step": 37773 }, { "epoch": 0.6689600517118481, "grad_norm": 1.103766679763794, "learning_rate": 7.82616633368413e-06, "loss": 0.0909, "step": 37774 }, { "epoch": 0.6689777612488765, "grad_norm": 1.1948219537734985, "learning_rate": 7.82541075548278e-06, "loss": 0.0492, "step": 37775 }, { "epoch": 0.6689954707859049, "grad_norm": 0.6071541905403137, "learning_rate": 7.824655200885102e-06, "loss": 0.0855, "step": 37776 }, { "epoch": 0.6690131803229334, "grad_norm": 0.4377148449420929, "learning_rate": 7.823899669893578e-06, "loss": 0.0645, "step": 37777 }, { "epoch": 0.6690308898599618, "grad_norm": 0.687939465045929, "learning_rate": 7.8231441625107e-06, "loss": 0.0734, "step": 37778 }, { "epoch": 0.6690485993969902, "grad_norm": 0.7987707853317261, "learning_rate": 7.822388678738955e-06, "loss": 0.0694, "step": 37779 }, { "epoch": 0.6690663089340186, "grad_norm": 0.8817684054374695, "learning_rate": 7.821633218580823e-06, "loss": 0.0535, "step": 37780 }, { "epoch": 0.6690840184710471, "grad_norm": 1.0599929094314575, "learning_rate": 7.820877782038783e-06, "loss": 0.0891, "step": 37781 }, { "epoch": 0.6691017280080755, "grad_norm": 0.6583467125892639, "learning_rate": 7.820122369115342e-06, "loss": 0.067, "step": 37782 }, { "epoch": 0.669119437545104, "grad_norm": 0.5557153820991516, "learning_rate": 7.819366979812967e-06, "loss": 0.0431, "step": 37783 }, { "epoch": 0.6691371470821325, "grad_norm": 0.49740132689476013, "learning_rate": 7.818611614134149e-06, "loss": 0.0576, "step": 37784 }, { "epoch": 0.6691548566191609, "grad_norm": 0.6232894659042358, "learning_rate": 7.81785627208137e-06, "loss": 0.0596, "step": 37785 }, { "epoch": 0.6691725661561893, "grad_norm": 0.4461532235145569, "learning_rate": 7.817100953657126e-06, "loss": 0.0856, "step": 37786 }, { "epoch": 0.6691902756932177, "grad_norm": 0.3421210050582886, "learning_rate": 7.816345658863883e-06, "loss": 0.0382, "step": 37787 }, { "epoch": 0.6692079852302462, "grad_norm": 0.5708289742469788, "learning_rate": 7.81559038770414e-06, "loss": 0.081, "step": 37788 }, { "epoch": 0.6692256947672746, "grad_norm": 0.3770025670528412, "learning_rate": 7.814835140180377e-06, "loss": 0.0386, "step": 37789 }, { "epoch": 0.669243404304303, "grad_norm": 0.6161901354789734, "learning_rate": 7.81407991629508e-06, "loss": 0.0893, "step": 37790 }, { "epoch": 0.6692611138413314, "grad_norm": 0.8061360120773315, "learning_rate": 7.813324716050734e-06, "loss": 0.0706, "step": 37791 }, { "epoch": 0.6692788233783599, "grad_norm": 0.3854467570781708, "learning_rate": 7.812569539449821e-06, "loss": 0.0794, "step": 37792 }, { "epoch": 0.6692965329153883, "grad_norm": 0.857853889465332, "learning_rate": 7.811814386494834e-06, "loss": 0.0759, "step": 37793 }, { "epoch": 0.6693142424524167, "grad_norm": 0.8651459813117981, "learning_rate": 7.811059257188246e-06, "loss": 0.078, "step": 37794 }, { "epoch": 0.6693319519894451, "grad_norm": 0.3198578953742981, "learning_rate": 7.810304151532542e-06, "loss": 0.0757, "step": 37795 }, { "epoch": 0.6693496615264736, "grad_norm": 0.835423469543457, "learning_rate": 7.809549069530215e-06, "loss": 0.0554, "step": 37796 }, { "epoch": 0.669367371063502, "grad_norm": 0.7013850212097168, "learning_rate": 7.808794011183746e-06, "loss": 0.0911, "step": 37797 }, { "epoch": 0.6693850806005304, "grad_norm": 0.6299442052841187, "learning_rate": 7.808038976495614e-06, "loss": 0.0692, "step": 37798 }, { "epoch": 0.6694027901375589, "grad_norm": 0.3247227966785431, "learning_rate": 7.807283965468307e-06, "loss": 0.0547, "step": 37799 }, { "epoch": 0.6694204996745873, "grad_norm": 0.5365517735481262, "learning_rate": 7.806528978104307e-06, "loss": 0.0596, "step": 37800 }, { "epoch": 0.6694382092116157, "grad_norm": 0.5617998838424683, "learning_rate": 7.805774014406099e-06, "loss": 0.0634, "step": 37801 }, { "epoch": 0.6694559187486441, "grad_norm": 1.017214298248291, "learning_rate": 7.805019074376165e-06, "loss": 0.0633, "step": 37802 }, { "epoch": 0.6694736282856726, "grad_norm": 0.5683766007423401, "learning_rate": 7.804264158016994e-06, "loss": 0.0936, "step": 37803 }, { "epoch": 0.669491337822701, "grad_norm": 0.49247175455093384, "learning_rate": 7.803509265331069e-06, "loss": 0.046, "step": 37804 }, { "epoch": 0.6695090473597294, "grad_norm": 0.7473442554473877, "learning_rate": 7.802754396320868e-06, "loss": 0.0519, "step": 37805 }, { "epoch": 0.6695267568967578, "grad_norm": 0.5675160884857178, "learning_rate": 7.801999550988876e-06, "loss": 0.0796, "step": 37806 }, { "epoch": 0.6695444664337863, "grad_norm": 0.5720764994621277, "learning_rate": 7.801244729337581e-06, "loss": 0.0469, "step": 37807 }, { "epoch": 0.6695621759708147, "grad_norm": 0.5087037682533264, "learning_rate": 7.800489931369457e-06, "loss": 0.0908, "step": 37808 }, { "epoch": 0.6695798855078431, "grad_norm": 0.6879237294197083, "learning_rate": 7.799735157086997e-06, "loss": 0.053, "step": 37809 }, { "epoch": 0.6695975950448715, "grad_norm": 0.606039822101593, "learning_rate": 7.798980406492671e-06, "loss": 0.0694, "step": 37810 }, { "epoch": 0.6696153045819, "grad_norm": 0.6068388223648071, "learning_rate": 7.798225679588983e-06, "loss": 0.0707, "step": 37811 }, { "epoch": 0.6696330141189284, "grad_norm": 0.48302993178367615, "learning_rate": 7.797470976378398e-06, "loss": 0.0643, "step": 37812 }, { "epoch": 0.6696507236559568, "grad_norm": 0.8950731158256531, "learning_rate": 7.796716296863406e-06, "loss": 0.0795, "step": 37813 }, { "epoch": 0.6696684331929853, "grad_norm": 0.47793838381767273, "learning_rate": 7.795961641046494e-06, "loss": 0.068, "step": 37814 }, { "epoch": 0.6696861427300137, "grad_norm": 0.930557131767273, "learning_rate": 7.795207008930135e-06, "loss": 0.0766, "step": 37815 }, { "epoch": 0.6697038522670421, "grad_norm": 0.4914532005786896, "learning_rate": 7.794452400516815e-06, "loss": 0.0669, "step": 37816 }, { "epoch": 0.6697215618040705, "grad_norm": 0.5351182222366333, "learning_rate": 7.793697815809017e-06, "loss": 0.0447, "step": 37817 }, { "epoch": 0.669739271341099, "grad_norm": 0.574516236782074, "learning_rate": 7.792943254809226e-06, "loss": 0.0815, "step": 37818 }, { "epoch": 0.6697569808781274, "grad_norm": 0.4093156158924103, "learning_rate": 7.792188717519923e-06, "loss": 0.0312, "step": 37819 }, { "epoch": 0.6697746904151558, "grad_norm": 0.3829764723777771, "learning_rate": 7.791434203943588e-06, "loss": 0.0638, "step": 37820 }, { "epoch": 0.6697923999521842, "grad_norm": 0.34847158193588257, "learning_rate": 7.790679714082707e-06, "loss": 0.0524, "step": 37821 }, { "epoch": 0.6698101094892127, "grad_norm": 0.4899052679538727, "learning_rate": 7.789925247939766e-06, "loss": 0.0615, "step": 37822 }, { "epoch": 0.6698278190262411, "grad_norm": 0.586818516254425, "learning_rate": 7.789170805517235e-06, "loss": 0.0494, "step": 37823 }, { "epoch": 0.6698455285632695, "grad_norm": 0.4959055483341217, "learning_rate": 7.788416386817604e-06, "loss": 0.0553, "step": 37824 }, { "epoch": 0.6698632381002979, "grad_norm": 0.4871031939983368, "learning_rate": 7.78766199184336e-06, "loss": 0.0335, "step": 37825 }, { "epoch": 0.6698809476373264, "grad_norm": 0.6289150714874268, "learning_rate": 7.786907620596972e-06, "loss": 0.0602, "step": 37826 }, { "epoch": 0.6698986571743548, "grad_norm": 0.9838243722915649, "learning_rate": 7.786153273080927e-06, "loss": 0.0761, "step": 37827 }, { "epoch": 0.6699163667113832, "grad_norm": 0.4858381450176239, "learning_rate": 7.78539894929771e-06, "loss": 0.06, "step": 37828 }, { "epoch": 0.6699340762484117, "grad_norm": 0.6122105121612549, "learning_rate": 7.7846446492498e-06, "loss": 0.0798, "step": 37829 }, { "epoch": 0.6699517857854401, "grad_norm": 0.8024361729621887, "learning_rate": 7.78389037293968e-06, "loss": 0.0692, "step": 37830 }, { "epoch": 0.6699694953224685, "grad_norm": 0.8895788788795471, "learning_rate": 7.783136120369828e-06, "loss": 0.068, "step": 37831 }, { "epoch": 0.6699872048594969, "grad_norm": 0.5391173362731934, "learning_rate": 7.782381891542738e-06, "loss": 0.0618, "step": 37832 }, { "epoch": 0.6700049143965254, "grad_norm": 0.6748285293579102, "learning_rate": 7.781627686460873e-06, "loss": 0.06, "step": 37833 }, { "epoch": 0.6700226239335538, "grad_norm": 0.7543949484825134, "learning_rate": 7.780873505126724e-06, "loss": 0.1033, "step": 37834 }, { "epoch": 0.6700403334705822, "grad_norm": 0.18361014127731323, "learning_rate": 7.78011934754277e-06, "loss": 0.042, "step": 37835 }, { "epoch": 0.6700580430076106, "grad_norm": 0.6191258430480957, "learning_rate": 7.7793652137115e-06, "loss": 0.061, "step": 37836 }, { "epoch": 0.6700757525446391, "grad_norm": 0.656492292881012, "learning_rate": 7.778611103635381e-06, "loss": 0.0714, "step": 37837 }, { "epoch": 0.6700934620816675, "grad_norm": 0.572462260723114, "learning_rate": 7.777857017316902e-06, "loss": 0.0942, "step": 37838 }, { "epoch": 0.6701111716186959, "grad_norm": 0.6324884295463562, "learning_rate": 7.777102954758543e-06, "loss": 0.0511, "step": 37839 }, { "epoch": 0.6701288811557243, "grad_norm": 0.6131867170333862, "learning_rate": 7.776348915962786e-06, "loss": 0.0726, "step": 37840 }, { "epoch": 0.6701465906927528, "grad_norm": 0.6888096332550049, "learning_rate": 7.77559490093211e-06, "loss": 0.0813, "step": 37841 }, { "epoch": 0.6701643002297812, "grad_norm": 1.0709168910980225, "learning_rate": 7.774840909668994e-06, "loss": 0.1001, "step": 37842 }, { "epoch": 0.6701820097668096, "grad_norm": 0.7575375437736511, "learning_rate": 7.774086942175927e-06, "loss": 0.0652, "step": 37843 }, { "epoch": 0.6701997193038381, "grad_norm": 0.5379257202148438, "learning_rate": 7.773332998455377e-06, "loss": 0.0723, "step": 37844 }, { "epoch": 0.6702174288408665, "grad_norm": 0.3540855944156647, "learning_rate": 7.772579078509832e-06, "loss": 0.0595, "step": 37845 }, { "epoch": 0.670235138377895, "grad_norm": 0.3581533133983612, "learning_rate": 7.771825182341778e-06, "loss": 0.0758, "step": 37846 }, { "epoch": 0.6702528479149233, "grad_norm": 0.6429423689842224, "learning_rate": 7.771071309953672e-06, "loss": 0.0667, "step": 37847 }, { "epoch": 0.6702705574519519, "grad_norm": 0.6115476489067078, "learning_rate": 7.77031746134802e-06, "loss": 0.0591, "step": 37848 }, { "epoch": 0.6702882669889803, "grad_norm": 0.672863245010376, "learning_rate": 7.76956363652729e-06, "loss": 0.0688, "step": 37849 }, { "epoch": 0.6703059765260087, "grad_norm": 0.4840735197067261, "learning_rate": 7.768809835493972e-06, "loss": 0.0427, "step": 37850 }, { "epoch": 0.670323686063037, "grad_norm": 0.2544052004814148, "learning_rate": 7.768056058250531e-06, "loss": 0.0387, "step": 37851 }, { "epoch": 0.6703413956000656, "grad_norm": 0.82634437084198, "learning_rate": 7.767302304799454e-06, "loss": 0.0715, "step": 37852 }, { "epoch": 0.670359105137094, "grad_norm": 0.814868688583374, "learning_rate": 7.766548575143227e-06, "loss": 0.0916, "step": 37853 }, { "epoch": 0.6703768146741224, "grad_norm": 0.8273175954818726, "learning_rate": 7.765794869284318e-06, "loss": 0.0502, "step": 37854 }, { "epoch": 0.6703945242111508, "grad_norm": 0.2949642837047577, "learning_rate": 7.765041187225211e-06, "loss": 0.0422, "step": 37855 }, { "epoch": 0.6704122337481793, "grad_norm": 0.5950891375541687, "learning_rate": 7.764287528968388e-06, "loss": 0.0535, "step": 37856 }, { "epoch": 0.6704299432852077, "grad_norm": 0.6588664054870605, "learning_rate": 7.763533894516328e-06, "loss": 0.0446, "step": 37857 }, { "epoch": 0.6704476528222361, "grad_norm": 0.569830596446991, "learning_rate": 7.762780283871508e-06, "loss": 0.0897, "step": 37858 }, { "epoch": 0.6704653623592646, "grad_norm": 0.7228550314903259, "learning_rate": 7.76202669703641e-06, "loss": 0.0778, "step": 37859 }, { "epoch": 0.670483071896293, "grad_norm": 0.5741144418716431, "learning_rate": 7.761273134013512e-06, "loss": 0.0387, "step": 37860 }, { "epoch": 0.6705007814333214, "grad_norm": 0.7319726347923279, "learning_rate": 7.760519594805299e-06, "loss": 0.0724, "step": 37861 }, { "epoch": 0.6705184909703498, "grad_norm": 0.6564044952392578, "learning_rate": 7.759766079414236e-06, "loss": 0.061, "step": 37862 }, { "epoch": 0.6705362005073783, "grad_norm": 0.3909298777580261, "learning_rate": 7.759012587842813e-06, "loss": 0.047, "step": 37863 }, { "epoch": 0.6705539100444067, "grad_norm": 0.8251696228981018, "learning_rate": 7.758259120093511e-06, "loss": 0.0582, "step": 37864 }, { "epoch": 0.6705716195814351, "grad_norm": 1.153511643409729, "learning_rate": 7.7575056761688e-06, "loss": 0.0835, "step": 37865 }, { "epoch": 0.6705893291184635, "grad_norm": 0.5819679498672485, "learning_rate": 7.75675225607116e-06, "loss": 0.043, "step": 37866 }, { "epoch": 0.670607038655492, "grad_norm": 0.3755943775177002, "learning_rate": 7.755998859803075e-06, "loss": 0.0668, "step": 37867 }, { "epoch": 0.6706247481925204, "grad_norm": 0.7018383741378784, "learning_rate": 7.75524548736702e-06, "loss": 0.0401, "step": 37868 }, { "epoch": 0.6706424577295488, "grad_norm": 0.6383032202720642, "learning_rate": 7.754492138765476e-06, "loss": 0.0727, "step": 37869 }, { "epoch": 0.6706601672665772, "grad_norm": 0.39008191227912903, "learning_rate": 7.753738814000918e-06, "loss": 0.0624, "step": 37870 }, { "epoch": 0.6706778768036057, "grad_norm": 0.350911021232605, "learning_rate": 7.752985513075833e-06, "loss": 0.0394, "step": 37871 }, { "epoch": 0.6706955863406341, "grad_norm": 0.8026940226554871, "learning_rate": 7.752232235992686e-06, "loss": 0.0835, "step": 37872 }, { "epoch": 0.6707132958776625, "grad_norm": 0.3458169400691986, "learning_rate": 7.751478982753963e-06, "loss": 0.0392, "step": 37873 }, { "epoch": 0.670731005414691, "grad_norm": 0.48600471019744873, "learning_rate": 7.75072575336214e-06, "loss": 0.0696, "step": 37874 }, { "epoch": 0.6707487149517194, "grad_norm": 0.6035175919532776, "learning_rate": 7.749972547819701e-06, "loss": 0.0979, "step": 37875 }, { "epoch": 0.6707664244887478, "grad_norm": 0.5100980401039124, "learning_rate": 7.74921936612911e-06, "loss": 0.0544, "step": 37876 }, { "epoch": 0.6707841340257762, "grad_norm": 0.5430529713630676, "learning_rate": 7.748466208292856e-06, "loss": 0.0588, "step": 37877 }, { "epoch": 0.6708018435628047, "grad_norm": 0.4088134169578552, "learning_rate": 7.747713074313425e-06, "loss": 0.0522, "step": 37878 }, { "epoch": 0.6708195530998331, "grad_norm": 0.8423312306404114, "learning_rate": 7.746959964193276e-06, "loss": 0.079, "step": 37879 }, { "epoch": 0.6708372626368615, "grad_norm": 0.6337476968765259, "learning_rate": 7.746206877934896e-06, "loss": 0.0594, "step": 37880 }, { "epoch": 0.6708549721738899, "grad_norm": 0.4964882433414459, "learning_rate": 7.74545381554076e-06, "loss": 0.0655, "step": 37881 }, { "epoch": 0.6708726817109184, "grad_norm": 0.5536810159683228, "learning_rate": 7.744700777013355e-06, "loss": 0.078, "step": 37882 }, { "epoch": 0.6708903912479468, "grad_norm": 0.6414295434951782, "learning_rate": 7.743947762355146e-06, "loss": 0.043, "step": 37883 }, { "epoch": 0.6709081007849752, "grad_norm": 0.9079105854034424, "learning_rate": 7.743194771568613e-06, "loss": 0.0689, "step": 37884 }, { "epoch": 0.6709258103220036, "grad_norm": 0.5127070546150208, "learning_rate": 7.742441804656234e-06, "loss": 0.0406, "step": 37885 }, { "epoch": 0.6709435198590321, "grad_norm": 0.6131909489631653, "learning_rate": 7.741688861620489e-06, "loss": 0.0709, "step": 37886 }, { "epoch": 0.6709612293960605, "grad_norm": 0.5452584028244019, "learning_rate": 7.740935942463855e-06, "loss": 0.0542, "step": 37887 }, { "epoch": 0.6709789389330889, "grad_norm": 0.5563331842422485, "learning_rate": 7.740183047188805e-06, "loss": 0.0562, "step": 37888 }, { "epoch": 0.6709966484701174, "grad_norm": 0.5206785798072815, "learning_rate": 7.739430175797825e-06, "loss": 0.0523, "step": 37889 }, { "epoch": 0.6710143580071458, "grad_norm": 0.5216742753982544, "learning_rate": 7.73867732829338e-06, "loss": 0.0534, "step": 37890 }, { "epoch": 0.6710320675441742, "grad_norm": 0.5939707159996033, "learning_rate": 7.737924504677952e-06, "loss": 0.0454, "step": 37891 }, { "epoch": 0.6710497770812026, "grad_norm": 0.20819640159606934, "learning_rate": 7.737171704954018e-06, "loss": 0.0666, "step": 37892 }, { "epoch": 0.6710674866182311, "grad_norm": 0.4694124162197113, "learning_rate": 7.736418929124059e-06, "loss": 0.0582, "step": 37893 }, { "epoch": 0.6710851961552595, "grad_norm": 0.5083609819412231, "learning_rate": 7.735666177190543e-06, "loss": 0.0698, "step": 37894 }, { "epoch": 0.6711029056922879, "grad_norm": 0.5354248285293579, "learning_rate": 7.73491344915595e-06, "loss": 0.0727, "step": 37895 }, { "epoch": 0.6711206152293163, "grad_norm": 0.5821688771247864, "learning_rate": 7.734160745022757e-06, "loss": 0.0442, "step": 37896 }, { "epoch": 0.6711383247663448, "grad_norm": 0.41480371356010437, "learning_rate": 7.73340806479344e-06, "loss": 0.0562, "step": 37897 }, { "epoch": 0.6711560343033732, "grad_norm": 0.44978082180023193, "learning_rate": 7.732655408470475e-06, "loss": 0.0466, "step": 37898 }, { "epoch": 0.6711737438404016, "grad_norm": 1.1485662460327148, "learning_rate": 7.731902776056338e-06, "loss": 0.0801, "step": 37899 }, { "epoch": 0.67119145337743, "grad_norm": 0.639893114566803, "learning_rate": 7.731150167553513e-06, "loss": 0.0919, "step": 37900 }, { "epoch": 0.6712091629144585, "grad_norm": 1.0007017850875854, "learning_rate": 7.730397582964464e-06, "loss": 0.0653, "step": 37901 }, { "epoch": 0.6712268724514869, "grad_norm": 0.4870980679988861, "learning_rate": 7.72964502229167e-06, "loss": 0.0658, "step": 37902 }, { "epoch": 0.6712445819885153, "grad_norm": 0.74493807554245, "learning_rate": 7.728892485537615e-06, "loss": 0.0701, "step": 37903 }, { "epoch": 0.6712622915255438, "grad_norm": 0.8356837034225464, "learning_rate": 7.728139972704762e-06, "loss": 0.0517, "step": 37904 }, { "epoch": 0.6712800010625722, "grad_norm": 0.6780487895011902, "learning_rate": 7.727387483795588e-06, "loss": 0.0428, "step": 37905 }, { "epoch": 0.6712977105996006, "grad_norm": 0.4666852653026581, "learning_rate": 7.726635018812579e-06, "loss": 0.0503, "step": 37906 }, { "epoch": 0.671315420136629, "grad_norm": 0.47780996561050415, "learning_rate": 7.72588257775821e-06, "loss": 0.0806, "step": 37907 }, { "epoch": 0.6713331296736575, "grad_norm": 0.6666452288627625, "learning_rate": 7.725130160634945e-06, "loss": 0.0815, "step": 37908 }, { "epoch": 0.671350839210686, "grad_norm": 0.5137187242507935, "learning_rate": 7.724377767445268e-06, "loss": 0.0626, "step": 37909 }, { "epoch": 0.6713685487477143, "grad_norm": 1.1666429042816162, "learning_rate": 7.723625398191659e-06, "loss": 0.0662, "step": 37910 }, { "epoch": 0.6713862582847427, "grad_norm": 0.5882671475410461, "learning_rate": 7.722873052876579e-06, "loss": 0.0904, "step": 37911 }, { "epoch": 0.6714039678217713, "grad_norm": 0.7461935877799988, "learning_rate": 7.722120731502512e-06, "loss": 0.0906, "step": 37912 }, { "epoch": 0.6714216773587997, "grad_norm": 0.5932354927062988, "learning_rate": 7.721368434071928e-06, "loss": 0.0516, "step": 37913 }, { "epoch": 0.671439386895828, "grad_norm": 0.41684576869010925, "learning_rate": 7.720616160587309e-06, "loss": 0.0681, "step": 37914 }, { "epoch": 0.6714570964328566, "grad_norm": 0.417655885219574, "learning_rate": 7.719863911051126e-06, "loss": 0.0498, "step": 37915 }, { "epoch": 0.671474805969885, "grad_norm": 0.8891030550003052, "learning_rate": 7.719111685465853e-06, "loss": 0.0655, "step": 37916 }, { "epoch": 0.6714925155069134, "grad_norm": 0.7024490237236023, "learning_rate": 7.718359483833973e-06, "loss": 0.0782, "step": 37917 }, { "epoch": 0.6715102250439418, "grad_norm": 0.708280086517334, "learning_rate": 7.71760730615795e-06, "loss": 0.0857, "step": 37918 }, { "epoch": 0.6715279345809703, "grad_norm": 0.6246474385261536, "learning_rate": 7.71685515244026e-06, "loss": 0.0855, "step": 37919 }, { "epoch": 0.6715456441179987, "grad_norm": 1.0374218225479126, "learning_rate": 7.716103022683378e-06, "loss": 0.1036, "step": 37920 }, { "epoch": 0.6715633536550271, "grad_norm": 0.7063065767288208, "learning_rate": 7.71535091688979e-06, "loss": 0.0636, "step": 37921 }, { "epoch": 0.6715810631920555, "grad_norm": 1.039190411567688, "learning_rate": 7.714598835061953e-06, "loss": 0.0433, "step": 37922 }, { "epoch": 0.671598772729084, "grad_norm": 0.7923601865768433, "learning_rate": 7.71384677720235e-06, "loss": 0.0914, "step": 37923 }, { "epoch": 0.6716164822661124, "grad_norm": 0.7104589939117432, "learning_rate": 7.713094743313452e-06, "loss": 0.0765, "step": 37924 }, { "epoch": 0.6716341918031408, "grad_norm": 0.6192471981048584, "learning_rate": 7.712342733397737e-06, "loss": 0.0707, "step": 37925 }, { "epoch": 0.6716519013401692, "grad_norm": 0.387410968542099, "learning_rate": 7.711590747457679e-06, "loss": 0.0461, "step": 37926 }, { "epoch": 0.6716696108771977, "grad_norm": 0.4439530372619629, "learning_rate": 7.710838785495748e-06, "loss": 0.0458, "step": 37927 }, { "epoch": 0.6716873204142261, "grad_norm": 0.577351987361908, "learning_rate": 7.710086847514425e-06, "loss": 0.0736, "step": 37928 }, { "epoch": 0.6717050299512545, "grad_norm": 0.4684809744358063, "learning_rate": 7.709334933516176e-06, "loss": 0.0864, "step": 37929 }, { "epoch": 0.671722739488283, "grad_norm": 0.5055739283561707, "learning_rate": 7.708583043503474e-06, "loss": 0.0748, "step": 37930 }, { "epoch": 0.6717404490253114, "grad_norm": 0.8545762300491333, "learning_rate": 7.707831177478797e-06, "loss": 0.099, "step": 37931 }, { "epoch": 0.6717581585623398, "grad_norm": 0.8635883927345276, "learning_rate": 7.707079335444627e-06, "loss": 0.066, "step": 37932 }, { "epoch": 0.6717758680993682, "grad_norm": 0.7172739505767822, "learning_rate": 7.70632751740342e-06, "loss": 0.0919, "step": 37933 }, { "epoch": 0.6717935776363967, "grad_norm": 0.5509287714958191, "learning_rate": 7.705575723357653e-06, "loss": 0.0627, "step": 37934 }, { "epoch": 0.6718112871734251, "grad_norm": 0.6271812319755554, "learning_rate": 7.704823953309814e-06, "loss": 0.0592, "step": 37935 }, { "epoch": 0.6718289967104535, "grad_norm": 0.3297877311706543, "learning_rate": 7.704072207262364e-06, "loss": 0.0552, "step": 37936 }, { "epoch": 0.6718467062474819, "grad_norm": 0.4362943470478058, "learning_rate": 7.703320485217775e-06, "loss": 0.0558, "step": 37937 }, { "epoch": 0.6718644157845104, "grad_norm": 0.4106079339981079, "learning_rate": 7.702568787178525e-06, "loss": 0.0582, "step": 37938 }, { "epoch": 0.6718821253215388, "grad_norm": 0.5583903193473816, "learning_rate": 7.701817113147091e-06, "loss": 0.0421, "step": 37939 }, { "epoch": 0.6718998348585672, "grad_norm": 0.8462169170379639, "learning_rate": 7.701065463125934e-06, "loss": 0.0673, "step": 37940 }, { "epoch": 0.6719175443955956, "grad_norm": 0.590388298034668, "learning_rate": 7.700313837117536e-06, "loss": 0.076, "step": 37941 }, { "epoch": 0.6719352539326241, "grad_norm": 0.7960097193717957, "learning_rate": 7.699562235124365e-06, "loss": 0.064, "step": 37942 }, { "epoch": 0.6719529634696525, "grad_norm": 1.1081655025482178, "learning_rate": 7.698810657148896e-06, "loss": 0.0535, "step": 37943 }, { "epoch": 0.6719706730066809, "grad_norm": 0.7237261533737183, "learning_rate": 7.698059103193603e-06, "loss": 0.0572, "step": 37944 }, { "epoch": 0.6719883825437094, "grad_norm": 0.576867938041687, "learning_rate": 7.697307573260956e-06, "loss": 0.0541, "step": 37945 }, { "epoch": 0.6720060920807378, "grad_norm": 0.3758825361728668, "learning_rate": 7.696556067353434e-06, "loss": 0.065, "step": 37946 }, { "epoch": 0.6720238016177662, "grad_norm": 0.7253279685974121, "learning_rate": 7.695804585473497e-06, "loss": 0.061, "step": 37947 }, { "epoch": 0.6720415111547946, "grad_norm": 0.5942743420600891, "learning_rate": 7.695053127623626e-06, "loss": 0.0735, "step": 37948 }, { "epoch": 0.6720592206918231, "grad_norm": 0.5466311573982239, "learning_rate": 7.694301693806296e-06, "loss": 0.0536, "step": 37949 }, { "epoch": 0.6720769302288515, "grad_norm": 0.35660722851753235, "learning_rate": 7.693550284023971e-06, "loss": 0.055, "step": 37950 }, { "epoch": 0.6720946397658799, "grad_norm": 1.0201424360275269, "learning_rate": 7.692798898279123e-06, "loss": 0.0832, "step": 37951 }, { "epoch": 0.6721123493029083, "grad_norm": 0.18274275958538055, "learning_rate": 7.692047536574229e-06, "loss": 0.0521, "step": 37952 }, { "epoch": 0.6721300588399368, "grad_norm": 0.9310916066169739, "learning_rate": 7.69129619891176e-06, "loss": 0.0673, "step": 37953 }, { "epoch": 0.6721477683769652, "grad_norm": 0.672941267490387, "learning_rate": 7.690544885294187e-06, "loss": 0.0662, "step": 37954 }, { "epoch": 0.6721654779139936, "grad_norm": 0.4853176772594452, "learning_rate": 7.689793595723982e-06, "loss": 0.0401, "step": 37955 }, { "epoch": 0.672183187451022, "grad_norm": 0.9257867336273193, "learning_rate": 7.689042330203621e-06, "loss": 0.069, "step": 37956 }, { "epoch": 0.6722008969880505, "grad_norm": 0.6128822565078735, "learning_rate": 7.688291088735569e-06, "loss": 0.0941, "step": 37957 }, { "epoch": 0.6722186065250789, "grad_norm": 0.5009724497795105, "learning_rate": 7.687539871322297e-06, "loss": 0.0607, "step": 37958 }, { "epoch": 0.6722363160621073, "grad_norm": 0.8296338319778442, "learning_rate": 7.68678867796628e-06, "loss": 0.0492, "step": 37959 }, { "epoch": 0.6722540255991358, "grad_norm": 1.0972915887832642, "learning_rate": 7.686037508669994e-06, "loss": 0.0575, "step": 37960 }, { "epoch": 0.6722717351361642, "grad_norm": 0.570395827293396, "learning_rate": 7.685286363435899e-06, "loss": 0.0461, "step": 37961 }, { "epoch": 0.6722894446731926, "grad_norm": 0.7479625940322876, "learning_rate": 7.684535242266471e-06, "loss": 0.0789, "step": 37962 }, { "epoch": 0.672307154210221, "grad_norm": 0.5764553546905518, "learning_rate": 7.68378414516418e-06, "loss": 0.0797, "step": 37963 }, { "epoch": 0.6723248637472495, "grad_norm": 0.6968476176261902, "learning_rate": 7.683033072131507e-06, "loss": 0.0764, "step": 37964 }, { "epoch": 0.6723425732842779, "grad_norm": 1.0328413248062134, "learning_rate": 7.682282023170912e-06, "loss": 0.092, "step": 37965 }, { "epoch": 0.6723602828213063, "grad_norm": 0.2935951352119446, "learning_rate": 7.681530998284867e-06, "loss": 0.037, "step": 37966 }, { "epoch": 0.6723779923583347, "grad_norm": 0.5156165957450867, "learning_rate": 7.680779997475852e-06, "loss": 0.0376, "step": 37967 }, { "epoch": 0.6723957018953632, "grad_norm": 0.35411536693573, "learning_rate": 7.680029020746322e-06, "loss": 0.047, "step": 37968 }, { "epoch": 0.6724134114323916, "grad_norm": 0.4317331612110138, "learning_rate": 7.679278068098761e-06, "loss": 0.0843, "step": 37969 }, { "epoch": 0.67243112096942, "grad_norm": 0.5142724514007568, "learning_rate": 7.67852713953563e-06, "loss": 0.0633, "step": 37970 }, { "epoch": 0.6724488305064484, "grad_norm": 0.5681375861167908, "learning_rate": 7.677776235059407e-06, "loss": 0.0716, "step": 37971 }, { "epoch": 0.672466540043477, "grad_norm": 0.5884665250778198, "learning_rate": 7.67702535467256e-06, "loss": 0.0737, "step": 37972 }, { "epoch": 0.6724842495805053, "grad_norm": 0.40120723843574524, "learning_rate": 7.676274498377556e-06, "loss": 0.0448, "step": 37973 }, { "epoch": 0.6725019591175337, "grad_norm": 0.5600035786628723, "learning_rate": 7.675523666176875e-06, "loss": 0.0408, "step": 37974 }, { "epoch": 0.6725196686545623, "grad_norm": 0.6051912903785706, "learning_rate": 7.674772858072976e-06, "loss": 0.0489, "step": 37975 }, { "epoch": 0.6725373781915907, "grad_norm": 0.5756539106369019, "learning_rate": 7.674022074068332e-06, "loss": 0.042, "step": 37976 }, { "epoch": 0.672555087728619, "grad_norm": 0.6710761189460754, "learning_rate": 7.673271314165415e-06, "loss": 0.0788, "step": 37977 }, { "epoch": 0.6725727972656474, "grad_norm": 0.5662713050842285, "learning_rate": 7.6725205783667e-06, "loss": 0.0671, "step": 37978 }, { "epoch": 0.672590506802676, "grad_norm": 0.5565592050552368, "learning_rate": 7.671769866674645e-06, "loss": 0.0522, "step": 37979 }, { "epoch": 0.6726082163397044, "grad_norm": 0.7633630633354187, "learning_rate": 7.671019179091725e-06, "loss": 0.0614, "step": 37980 }, { "epoch": 0.6726259258767328, "grad_norm": 0.6550335884094238, "learning_rate": 7.670268515620413e-06, "loss": 0.0647, "step": 37981 }, { "epoch": 0.6726436354137612, "grad_norm": 0.9232276678085327, "learning_rate": 7.669517876263177e-06, "loss": 0.0679, "step": 37982 }, { "epoch": 0.6726613449507897, "grad_norm": 0.8474000692367554, "learning_rate": 7.668767261022483e-06, "loss": 0.0607, "step": 37983 }, { "epoch": 0.6726790544878181, "grad_norm": 0.5384021401405334, "learning_rate": 7.668016669900805e-06, "loss": 0.0682, "step": 37984 }, { "epoch": 0.6726967640248465, "grad_norm": 0.6966242790222168, "learning_rate": 7.667266102900614e-06, "loss": 0.064, "step": 37985 }, { "epoch": 0.6727144735618749, "grad_norm": 0.7376625537872314, "learning_rate": 7.666515560024372e-06, "loss": 0.0719, "step": 37986 }, { "epoch": 0.6727321830989034, "grad_norm": 1.0066455602645874, "learning_rate": 7.665765041274553e-06, "loss": 0.0963, "step": 37987 }, { "epoch": 0.6727498926359318, "grad_norm": 0.6032975316047668, "learning_rate": 7.66501454665363e-06, "loss": 0.0756, "step": 37988 }, { "epoch": 0.6727676021729602, "grad_norm": 0.4863855242729187, "learning_rate": 7.66426407616406e-06, "loss": 0.055, "step": 37989 }, { "epoch": 0.6727853117099887, "grad_norm": 0.4204701781272888, "learning_rate": 7.663513629808322e-06, "loss": 0.0452, "step": 37990 }, { "epoch": 0.6728030212470171, "grad_norm": 0.7012937068939209, "learning_rate": 7.662763207588877e-06, "loss": 0.0553, "step": 37991 }, { "epoch": 0.6728207307840455, "grad_norm": 0.4399968683719635, "learning_rate": 7.662012809508204e-06, "loss": 0.0435, "step": 37992 }, { "epoch": 0.6728384403210739, "grad_norm": 0.660811722278595, "learning_rate": 7.661262435568765e-06, "loss": 0.0609, "step": 37993 }, { "epoch": 0.6728561498581024, "grad_norm": 0.512020468711853, "learning_rate": 7.66051208577303e-06, "loss": 0.0691, "step": 37994 }, { "epoch": 0.6728738593951308, "grad_norm": 0.5706578493118286, "learning_rate": 7.65976176012347e-06, "loss": 0.0496, "step": 37995 }, { "epoch": 0.6728915689321592, "grad_norm": 0.5201446413993835, "learning_rate": 7.659011458622553e-06, "loss": 0.0529, "step": 37996 }, { "epoch": 0.6729092784691876, "grad_norm": 1.0838873386383057, "learning_rate": 7.658261181272743e-06, "loss": 0.0961, "step": 37997 }, { "epoch": 0.6729269880062161, "grad_norm": 0.758516788482666, "learning_rate": 7.65751092807651e-06, "loss": 0.0528, "step": 37998 }, { "epoch": 0.6729446975432445, "grad_norm": 0.5866602659225464, "learning_rate": 7.656760699036332e-06, "loss": 0.0574, "step": 37999 }, { "epoch": 0.6729624070802729, "grad_norm": 0.544774055480957, "learning_rate": 7.656010494154654e-06, "loss": 0.0668, "step": 38000 }, { "epoch": 0.6729801166173013, "grad_norm": 0.5751653909683228, "learning_rate": 7.655260313433964e-06, "loss": 0.0565, "step": 38001 }, { "epoch": 0.6729978261543298, "grad_norm": 0.371927410364151, "learning_rate": 7.654510156876726e-06, "loss": 0.0617, "step": 38002 }, { "epoch": 0.6730155356913582, "grad_norm": 0.6122120022773743, "learning_rate": 7.653760024485414e-06, "loss": 0.0514, "step": 38003 }, { "epoch": 0.6730332452283866, "grad_norm": 0.5974887013435364, "learning_rate": 7.653009916262479e-06, "loss": 0.0662, "step": 38004 }, { "epoch": 0.6730509547654151, "grad_norm": 0.3777807056903839, "learning_rate": 7.652259832210402e-06, "loss": 0.0742, "step": 38005 }, { "epoch": 0.6730686643024435, "grad_norm": 1.033645510673523, "learning_rate": 7.65150977233165e-06, "loss": 0.0969, "step": 38006 }, { "epoch": 0.6730863738394719, "grad_norm": 0.7654024958610535, "learning_rate": 7.650759736628682e-06, "loss": 0.0789, "step": 38007 }, { "epoch": 0.6731040833765003, "grad_norm": 0.5754101872444153, "learning_rate": 7.650009725103972e-06, "loss": 0.067, "step": 38008 }, { "epoch": 0.6731217929135288, "grad_norm": 0.5088958144187927, "learning_rate": 7.649259737759987e-06, "loss": 0.0487, "step": 38009 }, { "epoch": 0.6731395024505572, "grad_norm": 0.3697185218334198, "learning_rate": 7.648509774599193e-06, "loss": 0.0432, "step": 38010 }, { "epoch": 0.6731572119875856, "grad_norm": 0.996985912322998, "learning_rate": 7.64775983562406e-06, "loss": 0.0923, "step": 38011 }, { "epoch": 0.673174921524614, "grad_norm": 0.5635367035865784, "learning_rate": 7.647009920837052e-06, "loss": 0.0643, "step": 38012 }, { "epoch": 0.6731926310616425, "grad_norm": 0.5785757899284363, "learning_rate": 7.646260030240644e-06, "loss": 0.0628, "step": 38013 }, { "epoch": 0.6732103405986709, "grad_norm": 0.533879816532135, "learning_rate": 7.645510163837289e-06, "loss": 0.0653, "step": 38014 }, { "epoch": 0.6732280501356993, "grad_norm": 0.692069411277771, "learning_rate": 7.644760321629464e-06, "loss": 0.0466, "step": 38015 }, { "epoch": 0.6732457596727277, "grad_norm": 0.6877703070640564, "learning_rate": 7.644010503619635e-06, "loss": 0.0612, "step": 38016 }, { "epoch": 0.6732634692097562, "grad_norm": 1.0096609592437744, "learning_rate": 7.64326070981027e-06, "loss": 0.0609, "step": 38017 }, { "epoch": 0.6732811787467846, "grad_norm": 0.9300270676612854, "learning_rate": 7.64251094020383e-06, "loss": 0.0679, "step": 38018 }, { "epoch": 0.673298888283813, "grad_norm": 0.7908856272697449, "learning_rate": 7.64176119480278e-06, "loss": 0.0956, "step": 38019 }, { "epoch": 0.6733165978208415, "grad_norm": 0.577924370765686, "learning_rate": 7.641011473609598e-06, "loss": 0.0751, "step": 38020 }, { "epoch": 0.6733343073578699, "grad_norm": 0.689346194267273, "learning_rate": 7.64026177662674e-06, "loss": 0.0752, "step": 38021 }, { "epoch": 0.6733520168948983, "grad_norm": 0.8173178434371948, "learning_rate": 7.639512103856676e-06, "loss": 0.0737, "step": 38022 }, { "epoch": 0.6733697264319267, "grad_norm": 0.6144102811813354, "learning_rate": 7.638762455301874e-06, "loss": 0.0627, "step": 38023 }, { "epoch": 0.6733874359689552, "grad_norm": 0.36839333176612854, "learning_rate": 7.638012830964805e-06, "loss": 0.0526, "step": 38024 }, { "epoch": 0.6734051455059836, "grad_norm": 0.5021538734436035, "learning_rate": 7.637263230847924e-06, "loss": 0.057, "step": 38025 }, { "epoch": 0.673422855043012, "grad_norm": 0.786078929901123, "learning_rate": 7.636513654953701e-06, "loss": 0.0811, "step": 38026 }, { "epoch": 0.6734405645800404, "grad_norm": 0.847350537776947, "learning_rate": 7.63576410328461e-06, "loss": 0.0801, "step": 38027 }, { "epoch": 0.6734582741170689, "grad_norm": 0.6106759905815125, "learning_rate": 7.635014575843105e-06, "loss": 0.0618, "step": 38028 }, { "epoch": 0.6734759836540973, "grad_norm": 0.47013095021247864, "learning_rate": 7.634265072631649e-06, "loss": 0.044, "step": 38029 }, { "epoch": 0.6734936931911257, "grad_norm": 0.5075563192367554, "learning_rate": 7.633515593652721e-06, "loss": 0.0359, "step": 38030 }, { "epoch": 0.6735114027281541, "grad_norm": 0.7393973469734192, "learning_rate": 7.63276613890879e-06, "loss": 0.0641, "step": 38031 }, { "epoch": 0.6735291122651826, "grad_norm": 0.6381702423095703, "learning_rate": 7.632016708402307e-06, "loss": 0.0661, "step": 38032 }, { "epoch": 0.673546821802211, "grad_norm": 0.4483570158481598, "learning_rate": 7.631267302135745e-06, "loss": 0.0514, "step": 38033 }, { "epoch": 0.6735645313392394, "grad_norm": 0.6286265254020691, "learning_rate": 7.630517920111568e-06, "loss": 0.0603, "step": 38034 }, { "epoch": 0.673582240876268, "grad_norm": 0.4353522062301636, "learning_rate": 7.629768562332245e-06, "loss": 0.0513, "step": 38035 }, { "epoch": 0.6735999504132963, "grad_norm": 0.8781175017356873, "learning_rate": 7.629019228800233e-06, "loss": 0.0838, "step": 38036 }, { "epoch": 0.6736176599503247, "grad_norm": 0.6878194808959961, "learning_rate": 7.628269919518003e-06, "loss": 0.0334, "step": 38037 }, { "epoch": 0.6736353694873531, "grad_norm": 0.5197146534919739, "learning_rate": 7.6275206344880175e-06, "loss": 0.0868, "step": 38038 }, { "epoch": 0.6736530790243817, "grad_norm": 0.3788224458694458, "learning_rate": 7.626771373712746e-06, "loss": 0.0362, "step": 38039 }, { "epoch": 0.67367078856141, "grad_norm": 0.5304012298583984, "learning_rate": 7.62602213719465e-06, "loss": 0.0643, "step": 38040 }, { "epoch": 0.6736884980984384, "grad_norm": 0.6531550288200378, "learning_rate": 7.625272924936194e-06, "loss": 0.0507, "step": 38041 }, { "epoch": 0.6737062076354668, "grad_norm": 0.7046301364898682, "learning_rate": 7.6245237369398535e-06, "loss": 0.0668, "step": 38042 }, { "epoch": 0.6737239171724954, "grad_norm": 0.6027342081069946, "learning_rate": 7.623774573208075e-06, "loss": 0.0606, "step": 38043 }, { "epoch": 0.6737416267095238, "grad_norm": 0.37758180499076843, "learning_rate": 7.623025433743333e-06, "loss": 0.0421, "step": 38044 }, { "epoch": 0.6737593362465522, "grad_norm": 0.4466308057308197, "learning_rate": 7.622276318548096e-06, "loss": 0.0476, "step": 38045 }, { "epoch": 0.6737770457835806, "grad_norm": 0.6715067028999329, "learning_rate": 7.6215272276248185e-06, "loss": 0.0399, "step": 38046 }, { "epoch": 0.6737947553206091, "grad_norm": 0.7513093948364258, "learning_rate": 7.620778160975971e-06, "loss": 0.0448, "step": 38047 }, { "epoch": 0.6738124648576375, "grad_norm": 0.5560407638549805, "learning_rate": 7.6200291186040165e-06, "loss": 0.0367, "step": 38048 }, { "epoch": 0.6738301743946659, "grad_norm": 0.6272903680801392, "learning_rate": 7.61928010051142e-06, "loss": 0.065, "step": 38049 }, { "epoch": 0.6738478839316944, "grad_norm": 0.5118837356567383, "learning_rate": 7.618531106700646e-06, "loss": 0.0662, "step": 38050 }, { "epoch": 0.6738655934687228, "grad_norm": 0.8859209418296814, "learning_rate": 7.617782137174158e-06, "loss": 0.0622, "step": 38051 }, { "epoch": 0.6738833030057512, "grad_norm": 0.7207026481628418, "learning_rate": 7.617033191934423e-06, "loss": 0.0599, "step": 38052 }, { "epoch": 0.6739010125427796, "grad_norm": 0.823513388633728, "learning_rate": 7.6162842709839e-06, "loss": 0.0597, "step": 38053 }, { "epoch": 0.6739187220798081, "grad_norm": 0.5142763257026672, "learning_rate": 7.615535374325053e-06, "loss": 0.0746, "step": 38054 }, { "epoch": 0.6739364316168365, "grad_norm": 0.818667471408844, "learning_rate": 7.614786501960347e-06, "loss": 0.0697, "step": 38055 }, { "epoch": 0.6739541411538649, "grad_norm": 0.42322829365730286, "learning_rate": 7.614037653892254e-06, "loss": 0.0432, "step": 38056 }, { "epoch": 0.6739718506908933, "grad_norm": 0.5690250992774963, "learning_rate": 7.613288830123223e-06, "loss": 0.0667, "step": 38057 }, { "epoch": 0.6739895602279218, "grad_norm": 0.6854205131530762, "learning_rate": 7.61254003065572e-06, "loss": 0.0701, "step": 38058 }, { "epoch": 0.6740072697649502, "grad_norm": 0.7537938952445984, "learning_rate": 7.611791255492223e-06, "loss": 0.0635, "step": 38059 }, { "epoch": 0.6740249793019786, "grad_norm": 0.9693565368652344, "learning_rate": 7.611042504635181e-06, "loss": 0.0618, "step": 38060 }, { "epoch": 0.674042688839007, "grad_norm": 0.8729313611984253, "learning_rate": 7.610293778087062e-06, "loss": 0.0832, "step": 38061 }, { "epoch": 0.6740603983760355, "grad_norm": 0.5700636506080627, "learning_rate": 7.609545075850329e-06, "loss": 0.0506, "step": 38062 }, { "epoch": 0.6740781079130639, "grad_norm": 0.6132314801216125, "learning_rate": 7.60879639792745e-06, "loss": 0.0672, "step": 38063 }, { "epoch": 0.6740958174500923, "grad_norm": 0.567905068397522, "learning_rate": 7.608047744320878e-06, "loss": 0.0563, "step": 38064 }, { "epoch": 0.6741135269871208, "grad_norm": 0.7858486771583557, "learning_rate": 7.60729911503308e-06, "loss": 0.069, "step": 38065 }, { "epoch": 0.6741312365241492, "grad_norm": 0.683096170425415, "learning_rate": 7.606550510066525e-06, "loss": 0.0412, "step": 38066 }, { "epoch": 0.6741489460611776, "grad_norm": 0.5738423466682434, "learning_rate": 7.6058019294236645e-06, "loss": 0.0565, "step": 38067 }, { "epoch": 0.674166655598206, "grad_norm": 0.5937817692756653, "learning_rate": 7.605053373106973e-06, "loss": 0.0385, "step": 38068 }, { "epoch": 0.6741843651352345, "grad_norm": 0.673746645450592, "learning_rate": 7.604304841118905e-06, "loss": 0.0917, "step": 38069 }, { "epoch": 0.6742020746722629, "grad_norm": 0.5982412099838257, "learning_rate": 7.603556333461932e-06, "loss": 0.0718, "step": 38070 }, { "epoch": 0.6742197842092913, "grad_norm": 0.7839779257774353, "learning_rate": 7.602807850138507e-06, "loss": 0.0462, "step": 38071 }, { "epoch": 0.6742374937463197, "grad_norm": 0.45881468057632446, "learning_rate": 7.602059391151093e-06, "loss": 0.0619, "step": 38072 }, { "epoch": 0.6742552032833482, "grad_norm": 0.5020241141319275, "learning_rate": 7.601310956502157e-06, "loss": 0.0485, "step": 38073 }, { "epoch": 0.6742729128203766, "grad_norm": 0.6558395624160767, "learning_rate": 7.600562546194166e-06, "loss": 0.0544, "step": 38074 }, { "epoch": 0.674290622357405, "grad_norm": 0.4476222097873688, "learning_rate": 7.599814160229568e-06, "loss": 0.063, "step": 38075 }, { "epoch": 0.6743083318944334, "grad_norm": 0.5678406357765198, "learning_rate": 7.599065798610836e-06, "loss": 0.0548, "step": 38076 }, { "epoch": 0.6743260414314619, "grad_norm": 0.6353234052658081, "learning_rate": 7.5983174613404245e-06, "loss": 0.0688, "step": 38077 }, { "epoch": 0.6743437509684903, "grad_norm": 0.5978885889053345, "learning_rate": 7.597569148420804e-06, "loss": 0.0843, "step": 38078 }, { "epoch": 0.6743614605055187, "grad_norm": 0.5708581805229187, "learning_rate": 7.59682085985443e-06, "loss": 0.0743, "step": 38079 }, { "epoch": 0.6743791700425472, "grad_norm": 0.7116144895553589, "learning_rate": 7.596072595643768e-06, "loss": 0.086, "step": 38080 }, { "epoch": 0.6743968795795756, "grad_norm": 0.8945745825767517, "learning_rate": 7.595324355791282e-06, "loss": 0.0697, "step": 38081 }, { "epoch": 0.674414589116604, "grad_norm": 0.48140251636505127, "learning_rate": 7.5945761402994265e-06, "loss": 0.0659, "step": 38082 }, { "epoch": 0.6744322986536324, "grad_norm": 0.8134459257125854, "learning_rate": 7.593827949170667e-06, "loss": 0.076, "step": 38083 }, { "epoch": 0.6744500081906609, "grad_norm": 0.46792787313461304, "learning_rate": 7.593079782407467e-06, "loss": 0.0378, "step": 38084 }, { "epoch": 0.6744677177276893, "grad_norm": 0.32919010519981384, "learning_rate": 7.5923316400122825e-06, "loss": 0.0584, "step": 38085 }, { "epoch": 0.6744854272647177, "grad_norm": 0.6243937611579895, "learning_rate": 7.591583521987576e-06, "loss": 0.067, "step": 38086 }, { "epoch": 0.6745031368017461, "grad_norm": 0.745779275894165, "learning_rate": 7.590835428335806e-06, "loss": 0.0665, "step": 38087 }, { "epoch": 0.6745208463387746, "grad_norm": 0.7523325085639954, "learning_rate": 7.590087359059448e-06, "loss": 0.0785, "step": 38088 }, { "epoch": 0.674538555875803, "grad_norm": 0.4901147186756134, "learning_rate": 7.589339314160949e-06, "loss": 0.0666, "step": 38089 }, { "epoch": 0.6745562654128314, "grad_norm": 0.998759388923645, "learning_rate": 7.588591293642773e-06, "loss": 0.0791, "step": 38090 }, { "epoch": 0.6745739749498598, "grad_norm": 0.6605725288391113, "learning_rate": 7.587843297507388e-06, "loss": 0.0449, "step": 38091 }, { "epoch": 0.6745916844868883, "grad_norm": 0.5248119831085205, "learning_rate": 7.587095325757242e-06, "loss": 0.0511, "step": 38092 }, { "epoch": 0.6746093940239167, "grad_norm": 0.695241391658783, "learning_rate": 7.586347378394803e-06, "loss": 0.0731, "step": 38093 }, { "epoch": 0.6746271035609451, "grad_norm": 0.5196220874786377, "learning_rate": 7.585599455422533e-06, "loss": 0.063, "step": 38094 }, { "epoch": 0.6746448130979736, "grad_norm": 0.6348684430122375, "learning_rate": 7.584851556842891e-06, "loss": 0.056, "step": 38095 }, { "epoch": 0.674662522635002, "grad_norm": 0.5213786959648132, "learning_rate": 7.584103682658336e-06, "loss": 0.051, "step": 38096 }, { "epoch": 0.6746802321720304, "grad_norm": 0.5057192444801331, "learning_rate": 7.583355832871329e-06, "loss": 0.0498, "step": 38097 }, { "epoch": 0.6746979417090588, "grad_norm": 0.6769448518753052, "learning_rate": 7.582608007484339e-06, "loss": 0.0573, "step": 38098 }, { "epoch": 0.6747156512460873, "grad_norm": 0.5270088911056519, "learning_rate": 7.581860206499812e-06, "loss": 0.0515, "step": 38099 }, { "epoch": 0.6747333607831157, "grad_norm": 0.6587395668029785, "learning_rate": 7.5811124299202134e-06, "loss": 0.0827, "step": 38100 }, { "epoch": 0.6747510703201441, "grad_norm": 0.6670891046524048, "learning_rate": 7.5803646777480054e-06, "loss": 0.0598, "step": 38101 }, { "epoch": 0.6747687798571725, "grad_norm": 0.4874451756477356, "learning_rate": 7.579616949985654e-06, "loss": 0.0543, "step": 38102 }, { "epoch": 0.674786489394201, "grad_norm": 0.9472393989562988, "learning_rate": 7.578869246635606e-06, "loss": 0.0565, "step": 38103 }, { "epoch": 0.6748041989312294, "grad_norm": 0.688866913318634, "learning_rate": 7.578121567700325e-06, "loss": 0.073, "step": 38104 }, { "epoch": 0.6748219084682578, "grad_norm": 0.43820270895957947, "learning_rate": 7.577373913182275e-06, "loss": 0.0447, "step": 38105 }, { "epoch": 0.6748396180052862, "grad_norm": 0.7954855561256409, "learning_rate": 7.576626283083915e-06, "loss": 0.0853, "step": 38106 }, { "epoch": 0.6748573275423148, "grad_norm": 0.45833367109298706, "learning_rate": 7.575878677407703e-06, "loss": 0.0431, "step": 38107 }, { "epoch": 0.6748750370793432, "grad_norm": 0.5044223666191101, "learning_rate": 7.5751310961560995e-06, "loss": 0.0519, "step": 38108 }, { "epoch": 0.6748927466163716, "grad_norm": 0.5256691575050354, "learning_rate": 7.57438353933157e-06, "loss": 0.076, "step": 38109 }, { "epoch": 0.6749104561534001, "grad_norm": 0.38013240694999695, "learning_rate": 7.573636006936562e-06, "loss": 0.0749, "step": 38110 }, { "epoch": 0.6749281656904285, "grad_norm": 0.5697891116142273, "learning_rate": 7.5728884989735385e-06, "loss": 0.0702, "step": 38111 }, { "epoch": 0.6749458752274569, "grad_norm": 0.20576909184455872, "learning_rate": 7.5721410154449635e-06, "loss": 0.0495, "step": 38112 }, { "epoch": 0.6749635847644853, "grad_norm": 0.49475136399269104, "learning_rate": 7.571393556353296e-06, "loss": 0.0522, "step": 38113 }, { "epoch": 0.6749812943015138, "grad_norm": 0.568025529384613, "learning_rate": 7.570646121700988e-06, "loss": 0.0552, "step": 38114 }, { "epoch": 0.6749990038385422, "grad_norm": 0.2777117192745209, "learning_rate": 7.569898711490503e-06, "loss": 0.0317, "step": 38115 }, { "epoch": 0.6750167133755706, "grad_norm": 0.2584958076477051, "learning_rate": 7.5691513257242985e-06, "loss": 0.0485, "step": 38116 }, { "epoch": 0.675034422912599, "grad_norm": 0.8084304332733154, "learning_rate": 7.568403964404837e-06, "loss": 0.0598, "step": 38117 }, { "epoch": 0.6750521324496275, "grad_norm": 0.5375820398330688, "learning_rate": 7.567656627534572e-06, "loss": 0.048, "step": 38118 }, { "epoch": 0.6750698419866559, "grad_norm": 0.459545373916626, "learning_rate": 7.566909315115968e-06, "loss": 0.0704, "step": 38119 }, { "epoch": 0.6750875515236843, "grad_norm": 0.5092151761054993, "learning_rate": 7.5661620271514835e-06, "loss": 0.0442, "step": 38120 }, { "epoch": 0.6751052610607127, "grad_norm": 0.5143420696258545, "learning_rate": 7.56541476364357e-06, "loss": 0.0324, "step": 38121 }, { "epoch": 0.6751229705977412, "grad_norm": 1.1104854345321655, "learning_rate": 7.564667524594689e-06, "loss": 0.0837, "step": 38122 }, { "epoch": 0.6751406801347696, "grad_norm": 0.535969614982605, "learning_rate": 7.563920310007305e-06, "loss": 0.0626, "step": 38123 }, { "epoch": 0.675158389671798, "grad_norm": 0.8997683525085449, "learning_rate": 7.563173119883861e-06, "loss": 0.0986, "step": 38124 }, { "epoch": 0.6751760992088265, "grad_norm": 0.4626331925392151, "learning_rate": 7.56242595422683e-06, "loss": 0.0456, "step": 38125 }, { "epoch": 0.6751938087458549, "grad_norm": 0.7787010669708252, "learning_rate": 7.561678813038666e-06, "loss": 0.0457, "step": 38126 }, { "epoch": 0.6752115182828833, "grad_norm": 0.42543360590934753, "learning_rate": 7.5609316963218315e-06, "loss": 0.0407, "step": 38127 }, { "epoch": 0.6752292278199117, "grad_norm": 0.44857609272003174, "learning_rate": 7.560184604078774e-06, "loss": 0.0644, "step": 38128 }, { "epoch": 0.6752469373569402, "grad_norm": 0.6072142720222473, "learning_rate": 7.559437536311957e-06, "loss": 0.0456, "step": 38129 }, { "epoch": 0.6752646468939686, "grad_norm": 0.6878064274787903, "learning_rate": 7.558690493023843e-06, "loss": 0.0542, "step": 38130 }, { "epoch": 0.675282356430997, "grad_norm": 0.7671278119087219, "learning_rate": 7.5579434742168794e-06, "loss": 0.0626, "step": 38131 }, { "epoch": 0.6753000659680254, "grad_norm": 0.9472119808197021, "learning_rate": 7.557196479893529e-06, "loss": 0.0955, "step": 38132 }, { "epoch": 0.6753177755050539, "grad_norm": 1.1626452207565308, "learning_rate": 7.556449510056248e-06, "loss": 0.0453, "step": 38133 }, { "epoch": 0.6753354850420823, "grad_norm": 0.6637950539588928, "learning_rate": 7.555702564707499e-06, "loss": 0.0817, "step": 38134 }, { "epoch": 0.6753531945791107, "grad_norm": 0.851072371006012, "learning_rate": 7.554955643849731e-06, "loss": 0.0665, "step": 38135 }, { "epoch": 0.6753709041161391, "grad_norm": 0.5781377553939819, "learning_rate": 7.554208747485408e-06, "loss": 0.0652, "step": 38136 }, { "epoch": 0.6753886136531676, "grad_norm": 0.3687484562397003, "learning_rate": 7.553461875616985e-06, "loss": 0.0664, "step": 38137 }, { "epoch": 0.675406323190196, "grad_norm": 0.8703486323356628, "learning_rate": 7.552715028246926e-06, "loss": 0.0777, "step": 38138 }, { "epoch": 0.6754240327272244, "grad_norm": 0.48624327778816223, "learning_rate": 7.551968205377675e-06, "loss": 0.0533, "step": 38139 }, { "epoch": 0.6754417422642529, "grad_norm": 0.7430108785629272, "learning_rate": 7.551221407011694e-06, "loss": 0.0746, "step": 38140 }, { "epoch": 0.6754594518012813, "grad_norm": 0.8210588693618774, "learning_rate": 7.550474633151449e-06, "loss": 0.0707, "step": 38141 }, { "epoch": 0.6754771613383097, "grad_norm": 0.8635537028312683, "learning_rate": 7.549727883799382e-06, "loss": 0.0729, "step": 38142 }, { "epoch": 0.6754948708753381, "grad_norm": 0.7733713984489441, "learning_rate": 7.548981158957957e-06, "loss": 0.0588, "step": 38143 }, { "epoch": 0.6755125804123666, "grad_norm": 0.24305459856987, "learning_rate": 7.5482344586296295e-06, "loss": 0.0428, "step": 38144 }, { "epoch": 0.675530289949395, "grad_norm": 0.709221601486206, "learning_rate": 7.547487782816857e-06, "loss": 0.0803, "step": 38145 }, { "epoch": 0.6755479994864234, "grad_norm": 0.3621782660484314, "learning_rate": 7.546741131522098e-06, "loss": 0.0482, "step": 38146 }, { "epoch": 0.6755657090234518, "grad_norm": 0.8170774579048157, "learning_rate": 7.545994504747805e-06, "loss": 0.0831, "step": 38147 }, { "epoch": 0.6755834185604803, "grad_norm": 0.6045681834220886, "learning_rate": 7.545247902496442e-06, "loss": 0.0326, "step": 38148 }, { "epoch": 0.6756011280975087, "grad_norm": 0.6063342094421387, "learning_rate": 7.544501324770454e-06, "loss": 0.0578, "step": 38149 }, { "epoch": 0.6756188376345371, "grad_norm": 0.7776029706001282, "learning_rate": 7.543754771572301e-06, "loss": 0.0506, "step": 38150 }, { "epoch": 0.6756365471715655, "grad_norm": 0.28565290570259094, "learning_rate": 7.5430082429044445e-06, "loss": 0.0493, "step": 38151 }, { "epoch": 0.675654256708594, "grad_norm": 0.2897973358631134, "learning_rate": 7.5422617387693385e-06, "loss": 0.0332, "step": 38152 }, { "epoch": 0.6756719662456224, "grad_norm": 0.6445273756980896, "learning_rate": 7.541515259169427e-06, "loss": 0.0723, "step": 38153 }, { "epoch": 0.6756896757826508, "grad_norm": 0.5895984172821045, "learning_rate": 7.540768804107182e-06, "loss": 0.0477, "step": 38154 }, { "epoch": 0.6757073853196793, "grad_norm": 0.5572560429573059, "learning_rate": 7.540022373585059e-06, "loss": 0.0542, "step": 38155 }, { "epoch": 0.6757250948567077, "grad_norm": 0.9212892651557922, "learning_rate": 7.539275967605502e-06, "loss": 0.099, "step": 38156 }, { "epoch": 0.6757428043937361, "grad_norm": 0.618813693523407, "learning_rate": 7.538529586170972e-06, "loss": 0.0551, "step": 38157 }, { "epoch": 0.6757605139307645, "grad_norm": 0.44451308250427246, "learning_rate": 7.5377832292839256e-06, "loss": 0.033, "step": 38158 }, { "epoch": 0.675778223467793, "grad_norm": 0.7653217315673828, "learning_rate": 7.537036896946822e-06, "loss": 0.0509, "step": 38159 }, { "epoch": 0.6757959330048214, "grad_norm": 0.490446001291275, "learning_rate": 7.5362905891621075e-06, "loss": 0.048, "step": 38160 }, { "epoch": 0.6758136425418498, "grad_norm": 0.7856839895248413, "learning_rate": 7.535544305932243e-06, "loss": 0.0681, "step": 38161 }, { "epoch": 0.6758313520788782, "grad_norm": 0.29706406593322754, "learning_rate": 7.534798047259682e-06, "loss": 0.0561, "step": 38162 }, { "epoch": 0.6758490616159067, "grad_norm": 0.5825096368789673, "learning_rate": 7.53405181314688e-06, "loss": 0.0687, "step": 38163 }, { "epoch": 0.6758667711529351, "grad_norm": 0.6667470932006836, "learning_rate": 7.533305603596292e-06, "loss": 0.0683, "step": 38164 }, { "epoch": 0.6758844806899635, "grad_norm": 0.7834474444389343, "learning_rate": 7.532559418610373e-06, "loss": 0.0504, "step": 38165 }, { "epoch": 0.6759021902269919, "grad_norm": 0.2689138352870941, "learning_rate": 7.531813258191586e-06, "loss": 0.0445, "step": 38166 }, { "epoch": 0.6759198997640204, "grad_norm": 0.3499356210231781, "learning_rate": 7.531067122342371e-06, "loss": 0.0276, "step": 38167 }, { "epoch": 0.6759376093010488, "grad_norm": 0.7358246445655823, "learning_rate": 7.53032101106519e-06, "loss": 0.0949, "step": 38168 }, { "epoch": 0.6759553188380772, "grad_norm": 0.6124188303947449, "learning_rate": 7.529574924362496e-06, "loss": 0.0338, "step": 38169 }, { "epoch": 0.6759730283751058, "grad_norm": 0.5066714882850647, "learning_rate": 7.528828862236753e-06, "loss": 0.0523, "step": 38170 }, { "epoch": 0.6759907379121342, "grad_norm": 0.5403763055801392, "learning_rate": 7.5280828246904e-06, "loss": 0.0888, "step": 38171 }, { "epoch": 0.6760084474491626, "grad_norm": 0.30943533778190613, "learning_rate": 7.5273368117259e-06, "loss": 0.0549, "step": 38172 }, { "epoch": 0.676026156986191, "grad_norm": 0.6632062792778015, "learning_rate": 7.526590823345705e-06, "loss": 0.0568, "step": 38173 }, { "epoch": 0.6760438665232195, "grad_norm": 1.1772422790527344, "learning_rate": 7.525844859552269e-06, "loss": 0.0703, "step": 38174 }, { "epoch": 0.6760615760602479, "grad_norm": 0.5442279577255249, "learning_rate": 7.5250989203480506e-06, "loss": 0.0648, "step": 38175 }, { "epoch": 0.6760792855972763, "grad_norm": 0.39971616864204407, "learning_rate": 7.524353005735499e-06, "loss": 0.0563, "step": 38176 }, { "epoch": 0.6760969951343047, "grad_norm": 0.5525385737419128, "learning_rate": 7.523607115717075e-06, "loss": 0.0662, "step": 38177 }, { "epoch": 0.6761147046713332, "grad_norm": 0.6900871396064758, "learning_rate": 7.522861250295222e-06, "loss": 0.0507, "step": 38178 }, { "epoch": 0.6761324142083616, "grad_norm": 0.9739806652069092, "learning_rate": 7.522115409472399e-06, "loss": 0.0461, "step": 38179 }, { "epoch": 0.67615012374539, "grad_norm": 0.4584054946899414, "learning_rate": 7.521369593251069e-06, "loss": 0.0342, "step": 38180 }, { "epoch": 0.6761678332824184, "grad_norm": 0.742605447769165, "learning_rate": 7.520623801633668e-06, "loss": 0.0642, "step": 38181 }, { "epoch": 0.6761855428194469, "grad_norm": 0.4507697522640228, "learning_rate": 7.519878034622652e-06, "loss": 0.0683, "step": 38182 }, { "epoch": 0.6762032523564753, "grad_norm": 0.526938796043396, "learning_rate": 7.5191322922204865e-06, "loss": 0.0701, "step": 38183 }, { "epoch": 0.6762209618935037, "grad_norm": 0.3155478239059448, "learning_rate": 7.518386574429626e-06, "loss": 0.0373, "step": 38184 }, { "epoch": 0.6762386714305322, "grad_norm": 0.7667111754417419, "learning_rate": 7.517640881252512e-06, "loss": 0.068, "step": 38185 }, { "epoch": 0.6762563809675606, "grad_norm": 0.5537823438644409, "learning_rate": 7.5168952126916015e-06, "loss": 0.0492, "step": 38186 }, { "epoch": 0.676274090504589, "grad_norm": 0.8206791877746582, "learning_rate": 7.516149568749354e-06, "loss": 0.0864, "step": 38187 }, { "epoch": 0.6762918000416174, "grad_norm": 0.6676612496376038, "learning_rate": 7.515403949428212e-06, "loss": 0.1002, "step": 38188 }, { "epoch": 0.6763095095786459, "grad_norm": 0.8596973419189453, "learning_rate": 7.514658354730633e-06, "loss": 0.0697, "step": 38189 }, { "epoch": 0.6763272191156743, "grad_norm": 0.9054017066955566, "learning_rate": 7.513912784659073e-06, "loss": 0.0705, "step": 38190 }, { "epoch": 0.6763449286527027, "grad_norm": 0.5149039030075073, "learning_rate": 7.513167239215981e-06, "loss": 0.0548, "step": 38191 }, { "epoch": 0.6763626381897311, "grad_norm": 0.7321951389312744, "learning_rate": 7.512421718403813e-06, "loss": 0.0474, "step": 38192 }, { "epoch": 0.6763803477267596, "grad_norm": 0.4036332666873932, "learning_rate": 7.511676222225021e-06, "loss": 0.0485, "step": 38193 }, { "epoch": 0.676398057263788, "grad_norm": 0.5388551354408264, "learning_rate": 7.510930750682061e-06, "loss": 0.0989, "step": 38194 }, { "epoch": 0.6764157668008164, "grad_norm": 0.5203564167022705, "learning_rate": 7.510185303777375e-06, "loss": 0.0409, "step": 38195 }, { "epoch": 0.6764334763378448, "grad_norm": 0.7371429204940796, "learning_rate": 7.509439881513424e-06, "loss": 0.0712, "step": 38196 }, { "epoch": 0.6764511858748733, "grad_norm": 0.312313437461853, "learning_rate": 7.508694483892656e-06, "loss": 0.0296, "step": 38197 }, { "epoch": 0.6764688954119017, "grad_norm": 0.5916014313697815, "learning_rate": 7.507949110917532e-06, "loss": 0.0336, "step": 38198 }, { "epoch": 0.6764866049489301, "grad_norm": 0.8808605670928955, "learning_rate": 7.5072037625904935e-06, "loss": 0.0643, "step": 38199 }, { "epoch": 0.6765043144859586, "grad_norm": 1.3933519124984741, "learning_rate": 7.506458438913994e-06, "loss": 0.0717, "step": 38200 }, { "epoch": 0.676522024022987, "grad_norm": 0.5778163075447083, "learning_rate": 7.50571313989049e-06, "loss": 0.063, "step": 38201 }, { "epoch": 0.6765397335600154, "grad_norm": 0.9444911479949951, "learning_rate": 7.50496786552243e-06, "loss": 0.087, "step": 38202 }, { "epoch": 0.6765574430970438, "grad_norm": 0.9293844103813171, "learning_rate": 7.50422261581227e-06, "loss": 0.0433, "step": 38203 }, { "epoch": 0.6765751526340723, "grad_norm": 0.45369386672973633, "learning_rate": 7.503477390762457e-06, "loss": 0.0672, "step": 38204 }, { "epoch": 0.6765928621711007, "grad_norm": 0.4011944532394409, "learning_rate": 7.502732190375453e-06, "loss": 0.0565, "step": 38205 }, { "epoch": 0.6766105717081291, "grad_norm": 0.4128754138946533, "learning_rate": 7.501987014653695e-06, "loss": 0.0322, "step": 38206 }, { "epoch": 0.6766282812451575, "grad_norm": 0.4724961519241333, "learning_rate": 7.501241863599642e-06, "loss": 0.0577, "step": 38207 }, { "epoch": 0.676645990782186, "grad_norm": 0.4727197289466858, "learning_rate": 7.500496737215744e-06, "loss": 0.0457, "step": 38208 }, { "epoch": 0.6766637003192144, "grad_norm": 0.6153013110160828, "learning_rate": 7.49975163550446e-06, "loss": 0.0559, "step": 38209 }, { "epoch": 0.6766814098562428, "grad_norm": 0.6752817630767822, "learning_rate": 7.499006558468229e-06, "loss": 0.0581, "step": 38210 }, { "epoch": 0.6766991193932712, "grad_norm": 0.46232500672340393, "learning_rate": 7.498261506109501e-06, "loss": 0.0477, "step": 38211 }, { "epoch": 0.6767168289302997, "grad_norm": 0.395832359790802, "learning_rate": 7.4975164784307455e-06, "loss": 0.0298, "step": 38212 }, { "epoch": 0.6767345384673281, "grad_norm": 0.9586354494094849, "learning_rate": 7.4967714754343965e-06, "loss": 0.067, "step": 38213 }, { "epoch": 0.6767522480043565, "grad_norm": 0.5208064913749695, "learning_rate": 7.49602649712291e-06, "loss": 0.0933, "step": 38214 }, { "epoch": 0.676769957541385, "grad_norm": 0.5400742888450623, "learning_rate": 7.495281543498737e-06, "loss": 0.0496, "step": 38215 }, { "epoch": 0.6767876670784134, "grad_norm": 0.9980729222297668, "learning_rate": 7.494536614564337e-06, "loss": 0.0557, "step": 38216 }, { "epoch": 0.6768053766154418, "grad_norm": 0.46898484230041504, "learning_rate": 7.4937917103221435e-06, "loss": 0.037, "step": 38217 }, { "epoch": 0.6768230861524702, "grad_norm": 0.674839973449707, "learning_rate": 7.4930468307746175e-06, "loss": 0.0656, "step": 38218 }, { "epoch": 0.6768407956894987, "grad_norm": 0.5738393068313599, "learning_rate": 7.492301975924207e-06, "loss": 0.0563, "step": 38219 }, { "epoch": 0.6768585052265271, "grad_norm": 0.4855475127696991, "learning_rate": 7.491557145773364e-06, "loss": 0.0629, "step": 38220 }, { "epoch": 0.6768762147635555, "grad_norm": 0.8147101402282715, "learning_rate": 7.4908123403245405e-06, "loss": 0.0727, "step": 38221 }, { "epoch": 0.6768939243005839, "grad_norm": 0.25473862886428833, "learning_rate": 7.490067559580183e-06, "loss": 0.043, "step": 38222 }, { "epoch": 0.6769116338376124, "grad_norm": 0.36030614376068115, "learning_rate": 7.489322803542749e-06, "loss": 0.0641, "step": 38223 }, { "epoch": 0.6769293433746408, "grad_norm": 0.9783758521080017, "learning_rate": 7.4885780722146795e-06, "loss": 0.0866, "step": 38224 }, { "epoch": 0.6769470529116692, "grad_norm": 0.41315701603889465, "learning_rate": 7.4878333655984274e-06, "loss": 0.0584, "step": 38225 }, { "epoch": 0.6769647624486976, "grad_norm": 0.46824273467063904, "learning_rate": 7.4870886836964495e-06, "loss": 0.0545, "step": 38226 }, { "epoch": 0.6769824719857261, "grad_norm": 0.6527999043464661, "learning_rate": 7.486344026511184e-06, "loss": 0.1025, "step": 38227 }, { "epoch": 0.6770001815227545, "grad_norm": 0.4157485365867615, "learning_rate": 7.485599394045086e-06, "loss": 0.0604, "step": 38228 }, { "epoch": 0.6770178910597829, "grad_norm": 0.44681984186172485, "learning_rate": 7.484854786300607e-06, "loss": 0.0676, "step": 38229 }, { "epoch": 0.6770356005968114, "grad_norm": 0.45643383264541626, "learning_rate": 7.484110203280195e-06, "loss": 0.0765, "step": 38230 }, { "epoch": 0.6770533101338398, "grad_norm": 0.3689141273498535, "learning_rate": 7.483365644986301e-06, "loss": 0.0856, "step": 38231 }, { "epoch": 0.6770710196708682, "grad_norm": 0.3702766001224518, "learning_rate": 7.482621111421374e-06, "loss": 0.0637, "step": 38232 }, { "epoch": 0.6770887292078966, "grad_norm": 0.9371787905693054, "learning_rate": 7.481876602587869e-06, "loss": 0.0437, "step": 38233 }, { "epoch": 0.6771064387449252, "grad_norm": 0.38607922196388245, "learning_rate": 7.481132118488222e-06, "loss": 0.0455, "step": 38234 }, { "epoch": 0.6771241482819536, "grad_norm": 0.5260388255119324, "learning_rate": 7.480387659124891e-06, "loss": 0.0532, "step": 38235 }, { "epoch": 0.677141857818982, "grad_norm": 0.5992430448532104, "learning_rate": 7.479643224500324e-06, "loss": 0.0508, "step": 38236 }, { "epoch": 0.6771595673560104, "grad_norm": 0.5291491746902466, "learning_rate": 7.478898814616977e-06, "loss": 0.0631, "step": 38237 }, { "epoch": 0.6771772768930389, "grad_norm": 0.5494115948677063, "learning_rate": 7.478154429477285e-06, "loss": 0.07, "step": 38238 }, { "epoch": 0.6771949864300673, "grad_norm": 0.39483642578125, "learning_rate": 7.4774100690837036e-06, "loss": 0.0341, "step": 38239 }, { "epoch": 0.6772126959670957, "grad_norm": 0.9433004260063171, "learning_rate": 7.4766657334386775e-06, "loss": 0.0799, "step": 38240 }, { "epoch": 0.6772304055041241, "grad_norm": 0.5799779891967773, "learning_rate": 7.475921422544672e-06, "loss": 0.0469, "step": 38241 }, { "epoch": 0.6772481150411526, "grad_norm": 0.1959795206785202, "learning_rate": 7.475177136404118e-06, "loss": 0.0638, "step": 38242 }, { "epoch": 0.677265824578181, "grad_norm": 0.33948713541030884, "learning_rate": 7.474432875019469e-06, "loss": 0.0441, "step": 38243 }, { "epoch": 0.6772835341152094, "grad_norm": 0.5372824668884277, "learning_rate": 7.473688638393182e-06, "loss": 0.0586, "step": 38244 }, { "epoch": 0.6773012436522379, "grad_norm": 0.6470228433609009, "learning_rate": 7.472944426527692e-06, "loss": 0.0705, "step": 38245 }, { "epoch": 0.6773189531892663, "grad_norm": 0.46365660429000854, "learning_rate": 7.472200239425454e-06, "loss": 0.0659, "step": 38246 }, { "epoch": 0.6773366627262947, "grad_norm": 0.2451283484697342, "learning_rate": 7.471456077088915e-06, "loss": 0.0528, "step": 38247 }, { "epoch": 0.6773543722633231, "grad_norm": 1.1696090698242188, "learning_rate": 7.4707119395205235e-06, "loss": 0.0746, "step": 38248 }, { "epoch": 0.6773720818003516, "grad_norm": 0.774090051651001, "learning_rate": 7.46996782672273e-06, "loss": 0.0608, "step": 38249 }, { "epoch": 0.67738979133738, "grad_norm": 0.43429720401763916, "learning_rate": 7.469223738697979e-06, "loss": 0.0429, "step": 38250 }, { "epoch": 0.6774075008744084, "grad_norm": 0.7216660976409912, "learning_rate": 7.4684796754487265e-06, "loss": 0.0736, "step": 38251 }, { "epoch": 0.6774252104114368, "grad_norm": 0.6057812571525574, "learning_rate": 7.467735636977407e-06, "loss": 0.0441, "step": 38252 }, { "epoch": 0.6774429199484653, "grad_norm": 0.6697903275489807, "learning_rate": 7.466991623286478e-06, "loss": 0.0486, "step": 38253 }, { "epoch": 0.6774606294854937, "grad_norm": 0.7422977685928345, "learning_rate": 7.4662476343783836e-06, "loss": 0.0883, "step": 38254 }, { "epoch": 0.6774783390225221, "grad_norm": 0.4396749436855316, "learning_rate": 7.465503670255577e-06, "loss": 0.0601, "step": 38255 }, { "epoch": 0.6774960485595505, "grad_norm": 0.7818873524665833, "learning_rate": 7.4647597309204964e-06, "loss": 0.0761, "step": 38256 }, { "epoch": 0.677513758096579, "grad_norm": 0.5798535943031311, "learning_rate": 7.464015816375595e-06, "loss": 0.0604, "step": 38257 }, { "epoch": 0.6775314676336074, "grad_norm": 0.6174923181533813, "learning_rate": 7.463271926623318e-06, "loss": 0.0882, "step": 38258 }, { "epoch": 0.6775491771706358, "grad_norm": 0.5583842396736145, "learning_rate": 7.462528061666115e-06, "loss": 0.028, "step": 38259 }, { "epoch": 0.6775668867076643, "grad_norm": 0.6415153741836548, "learning_rate": 7.461784221506433e-06, "loss": 0.0364, "step": 38260 }, { "epoch": 0.6775845962446927, "grad_norm": 0.6339820623397827, "learning_rate": 7.461040406146717e-06, "loss": 0.0759, "step": 38261 }, { "epoch": 0.6776023057817211, "grad_norm": 0.5537630915641785, "learning_rate": 7.460296615589421e-06, "loss": 0.0668, "step": 38262 }, { "epoch": 0.6776200153187495, "grad_norm": 0.553403913974762, "learning_rate": 7.459552849836983e-06, "loss": 0.0521, "step": 38263 }, { "epoch": 0.677637724855778, "grad_norm": 0.5062695741653442, "learning_rate": 7.458809108891851e-06, "loss": 0.0337, "step": 38264 }, { "epoch": 0.6776554343928064, "grad_norm": 0.613631546497345, "learning_rate": 7.458065392756481e-06, "loss": 0.0492, "step": 38265 }, { "epoch": 0.6776731439298348, "grad_norm": 0.8019766807556152, "learning_rate": 7.457321701433307e-06, "loss": 0.061, "step": 38266 }, { "epoch": 0.6776908534668632, "grad_norm": 0.8762723207473755, "learning_rate": 7.456578034924783e-06, "loss": 0.0779, "step": 38267 }, { "epoch": 0.6777085630038917, "grad_norm": 0.5861356854438782, "learning_rate": 7.455834393233354e-06, "loss": 0.0527, "step": 38268 }, { "epoch": 0.6777262725409201, "grad_norm": 0.405394047498703, "learning_rate": 7.455090776361466e-06, "loss": 0.0329, "step": 38269 }, { "epoch": 0.6777439820779485, "grad_norm": 0.830452561378479, "learning_rate": 7.454347184311565e-06, "loss": 0.0719, "step": 38270 }, { "epoch": 0.6777616916149769, "grad_norm": 1.3189677000045776, "learning_rate": 7.453603617086102e-06, "loss": 0.0913, "step": 38271 }, { "epoch": 0.6777794011520054, "grad_norm": 0.4194904863834381, "learning_rate": 7.452860074687522e-06, "loss": 0.0617, "step": 38272 }, { "epoch": 0.6777971106890338, "grad_norm": 0.8090309500694275, "learning_rate": 7.452116557118266e-06, "loss": 0.0669, "step": 38273 }, { "epoch": 0.6778148202260622, "grad_norm": 0.28188273310661316, "learning_rate": 7.451373064380781e-06, "loss": 0.04, "step": 38274 }, { "epoch": 0.6778325297630907, "grad_norm": 0.5831766128540039, "learning_rate": 7.450629596477517e-06, "loss": 0.0726, "step": 38275 }, { "epoch": 0.6778502393001191, "grad_norm": 0.988601565361023, "learning_rate": 7.449886153410922e-06, "loss": 0.0704, "step": 38276 }, { "epoch": 0.6778679488371475, "grad_norm": 0.6984094977378845, "learning_rate": 7.449142735183427e-06, "loss": 0.0708, "step": 38277 }, { "epoch": 0.6778856583741759, "grad_norm": 0.4484163224697113, "learning_rate": 7.448399341797494e-06, "loss": 0.0464, "step": 38278 }, { "epoch": 0.6779033679112044, "grad_norm": 0.7605729103088379, "learning_rate": 7.447655973255565e-06, "loss": 0.0457, "step": 38279 }, { "epoch": 0.6779210774482328, "grad_norm": 0.8165556192398071, "learning_rate": 7.4469126295600884e-06, "loss": 0.0747, "step": 38280 }, { "epoch": 0.6779387869852612, "grad_norm": 0.3899425268173218, "learning_rate": 7.4461693107135005e-06, "loss": 0.0476, "step": 38281 }, { "epoch": 0.6779564965222896, "grad_norm": 0.2433270812034607, "learning_rate": 7.44542601671825e-06, "loss": 0.0615, "step": 38282 }, { "epoch": 0.6779742060593181, "grad_norm": 0.5116838812828064, "learning_rate": 7.444682747576791e-06, "loss": 0.0646, "step": 38283 }, { "epoch": 0.6779919155963465, "grad_norm": 0.4306999146938324, "learning_rate": 7.443939503291555e-06, "loss": 0.0438, "step": 38284 }, { "epoch": 0.6780096251333749, "grad_norm": 1.1371570825576782, "learning_rate": 7.443196283864995e-06, "loss": 0.0888, "step": 38285 }, { "epoch": 0.6780273346704033, "grad_norm": 0.608661413192749, "learning_rate": 7.442453089299554e-06, "loss": 0.0776, "step": 38286 }, { "epoch": 0.6780450442074318, "grad_norm": 0.5538753867149353, "learning_rate": 7.441709919597678e-06, "loss": 0.071, "step": 38287 }, { "epoch": 0.6780627537444602, "grad_norm": 0.7076241374015808, "learning_rate": 7.440966774761811e-06, "loss": 0.054, "step": 38288 }, { "epoch": 0.6780804632814886, "grad_norm": 0.41416123509407043, "learning_rate": 7.4402236547944e-06, "loss": 0.0666, "step": 38289 }, { "epoch": 0.6780981728185171, "grad_norm": 0.6413224339485168, "learning_rate": 7.439480559697894e-06, "loss": 0.0767, "step": 38290 }, { "epoch": 0.6781158823555455, "grad_norm": 0.3906920850276947, "learning_rate": 7.438737489474726e-06, "loss": 0.0353, "step": 38291 }, { "epoch": 0.6781335918925739, "grad_norm": 0.6906353831291199, "learning_rate": 7.437994444127347e-06, "loss": 0.0819, "step": 38292 }, { "epoch": 0.6781513014296023, "grad_norm": 0.7685886025428772, "learning_rate": 7.437251423658202e-06, "loss": 0.0656, "step": 38293 }, { "epoch": 0.6781690109666308, "grad_norm": 0.4117257297039032, "learning_rate": 7.436508428069741e-06, "loss": 0.0235, "step": 38294 }, { "epoch": 0.6781867205036592, "grad_norm": 0.856452465057373, "learning_rate": 7.435765457364396e-06, "loss": 0.0546, "step": 38295 }, { "epoch": 0.6782044300406876, "grad_norm": 0.870400607585907, "learning_rate": 7.435022511544618e-06, "loss": 0.0746, "step": 38296 }, { "epoch": 0.678222139577716, "grad_norm": 0.7333844900131226, "learning_rate": 7.4342795906128495e-06, "loss": 0.0943, "step": 38297 }, { "epoch": 0.6782398491147446, "grad_norm": 0.6176676154136658, "learning_rate": 7.433536694571536e-06, "loss": 0.0534, "step": 38298 }, { "epoch": 0.678257558651773, "grad_norm": 0.8453832864761353, "learning_rate": 7.432793823423123e-06, "loss": 0.0977, "step": 38299 }, { "epoch": 0.6782752681888014, "grad_norm": 0.6567992568016052, "learning_rate": 7.432050977170051e-06, "loss": 0.049, "step": 38300 }, { "epoch": 0.6782929777258297, "grad_norm": 0.5644246935844421, "learning_rate": 7.431308155814773e-06, "loss": 0.0896, "step": 38301 }, { "epoch": 0.6783106872628583, "grad_norm": 0.3599648177623749, "learning_rate": 7.430565359359719e-06, "loss": 0.0836, "step": 38302 }, { "epoch": 0.6783283967998867, "grad_norm": 0.5709099173545837, "learning_rate": 7.429822587807341e-06, "loss": 0.0671, "step": 38303 }, { "epoch": 0.6783461063369151, "grad_norm": 0.9575028419494629, "learning_rate": 7.429079841160085e-06, "loss": 0.089, "step": 38304 }, { "epoch": 0.6783638158739436, "grad_norm": 0.4690053164958954, "learning_rate": 7.428337119420386e-06, "loss": 0.0621, "step": 38305 }, { "epoch": 0.678381525410972, "grad_norm": 0.7379372119903564, "learning_rate": 7.427594422590686e-06, "loss": 0.0516, "step": 38306 }, { "epoch": 0.6783992349480004, "grad_norm": 0.46891579031944275, "learning_rate": 7.42685175067344e-06, "loss": 0.0515, "step": 38307 }, { "epoch": 0.6784169444850288, "grad_norm": 0.4883614480495453, "learning_rate": 7.426109103671091e-06, "loss": 0.0989, "step": 38308 }, { "epoch": 0.6784346540220573, "grad_norm": 0.4525202810764313, "learning_rate": 7.425366481586071e-06, "loss": 0.0572, "step": 38309 }, { "epoch": 0.6784523635590857, "grad_norm": 0.27435824275016785, "learning_rate": 7.42462388442083e-06, "loss": 0.033, "step": 38310 }, { "epoch": 0.6784700730961141, "grad_norm": 0.867226243019104, "learning_rate": 7.423881312177812e-06, "loss": 0.0699, "step": 38311 }, { "epoch": 0.6784877826331425, "grad_norm": 0.5271586775779724, "learning_rate": 7.42313876485946e-06, "loss": 0.0483, "step": 38312 }, { "epoch": 0.678505492170171, "grad_norm": 0.4583030641078949, "learning_rate": 7.422396242468213e-06, "loss": 0.0622, "step": 38313 }, { "epoch": 0.6785232017071994, "grad_norm": 0.9726884365081787, "learning_rate": 7.421653745006513e-06, "loss": 0.101, "step": 38314 }, { "epoch": 0.6785409112442278, "grad_norm": 0.43691015243530273, "learning_rate": 7.4209112724768065e-06, "loss": 0.0654, "step": 38315 }, { "epoch": 0.6785586207812562, "grad_norm": 0.3230000436306, "learning_rate": 7.420168824881537e-06, "loss": 0.0491, "step": 38316 }, { "epoch": 0.6785763303182847, "grad_norm": 0.669059157371521, "learning_rate": 7.419426402223144e-06, "loss": 0.0653, "step": 38317 }, { "epoch": 0.6785940398553131, "grad_norm": 0.8603572845458984, "learning_rate": 7.4186840045040725e-06, "loss": 0.0753, "step": 38318 }, { "epoch": 0.6786117493923415, "grad_norm": 0.5730043649673462, "learning_rate": 7.4179416317267675e-06, "loss": 0.0749, "step": 38319 }, { "epoch": 0.67862945892937, "grad_norm": 0.6485480070114136, "learning_rate": 7.417199283893663e-06, "loss": 0.0451, "step": 38320 }, { "epoch": 0.6786471684663984, "grad_norm": 0.6841661930084229, "learning_rate": 7.416456961007207e-06, "loss": 0.0681, "step": 38321 }, { "epoch": 0.6786648780034268, "grad_norm": 0.6373723745346069, "learning_rate": 7.415714663069846e-06, "loss": 0.0404, "step": 38322 }, { "epoch": 0.6786825875404552, "grad_norm": 0.6859942674636841, "learning_rate": 7.41497239008401e-06, "loss": 0.116, "step": 38323 }, { "epoch": 0.6787002970774837, "grad_norm": 0.7050678133964539, "learning_rate": 7.414230142052147e-06, "loss": 0.0658, "step": 38324 }, { "epoch": 0.6787180066145121, "grad_norm": 0.4448108971118927, "learning_rate": 7.413487918976702e-06, "loss": 0.0606, "step": 38325 }, { "epoch": 0.6787357161515405, "grad_norm": 0.5803583860397339, "learning_rate": 7.412745720860112e-06, "loss": 0.0554, "step": 38326 }, { "epoch": 0.6787534256885689, "grad_norm": 0.3805846571922302, "learning_rate": 7.412003547704822e-06, "loss": 0.0569, "step": 38327 }, { "epoch": 0.6787711352255974, "grad_norm": 0.6600638031959534, "learning_rate": 7.411261399513273e-06, "loss": 0.0512, "step": 38328 }, { "epoch": 0.6787888447626258, "grad_norm": 0.6292304396629333, "learning_rate": 7.410519276287913e-06, "loss": 0.0532, "step": 38329 }, { "epoch": 0.6788065542996542, "grad_norm": 0.48651450872421265, "learning_rate": 7.409777178031169e-06, "loss": 0.082, "step": 38330 }, { "epoch": 0.6788242638366826, "grad_norm": 0.4987376928329468, "learning_rate": 7.409035104745492e-06, "loss": 0.0744, "step": 38331 }, { "epoch": 0.6788419733737111, "grad_norm": 0.6151513457298279, "learning_rate": 7.408293056433323e-06, "loss": 0.0861, "step": 38332 }, { "epoch": 0.6788596829107395, "grad_norm": 0.7207847833633423, "learning_rate": 7.407551033097105e-06, "loss": 0.0538, "step": 38333 }, { "epoch": 0.6788773924477679, "grad_norm": 0.796795129776001, "learning_rate": 7.406809034739271e-06, "loss": 0.0609, "step": 38334 }, { "epoch": 0.6788951019847964, "grad_norm": 0.7840015888214111, "learning_rate": 7.406067061362262e-06, "loss": 0.092, "step": 38335 }, { "epoch": 0.6789128115218248, "grad_norm": 0.6412080526351929, "learning_rate": 7.405325112968537e-06, "loss": 0.0535, "step": 38336 }, { "epoch": 0.6789305210588532, "grad_norm": 0.5572493076324463, "learning_rate": 7.404583189560517e-06, "loss": 0.0567, "step": 38337 }, { "epoch": 0.6789482305958816, "grad_norm": 0.3891819715499878, "learning_rate": 7.403841291140651e-06, "loss": 0.0538, "step": 38338 }, { "epoch": 0.6789659401329101, "grad_norm": 0.5390622019767761, "learning_rate": 7.4030994177113775e-06, "loss": 0.0636, "step": 38339 }, { "epoch": 0.6789836496699385, "grad_norm": 0.29685458540916443, "learning_rate": 7.402357569275145e-06, "loss": 0.0374, "step": 38340 }, { "epoch": 0.6790013592069669, "grad_norm": 0.5510160326957703, "learning_rate": 7.401615745834383e-06, "loss": 0.0325, "step": 38341 }, { "epoch": 0.6790190687439953, "grad_norm": 0.8226115107536316, "learning_rate": 7.400873947391535e-06, "loss": 0.0436, "step": 38342 }, { "epoch": 0.6790367782810238, "grad_norm": 0.537932276725769, "learning_rate": 7.400132173949044e-06, "loss": 0.0405, "step": 38343 }, { "epoch": 0.6790544878180522, "grad_norm": 0.719114363193512, "learning_rate": 7.399390425509349e-06, "loss": 0.0325, "step": 38344 }, { "epoch": 0.6790721973550806, "grad_norm": 0.45760074257850647, "learning_rate": 7.398648702074892e-06, "loss": 0.0606, "step": 38345 }, { "epoch": 0.679089906892109, "grad_norm": 0.4653101861476898, "learning_rate": 7.397907003648111e-06, "loss": 0.0348, "step": 38346 }, { "epoch": 0.6791076164291375, "grad_norm": 0.38336122035980225, "learning_rate": 7.397165330231452e-06, "loss": 0.0681, "step": 38347 }, { "epoch": 0.6791253259661659, "grad_norm": 0.6973713636398315, "learning_rate": 7.396423681827345e-06, "loss": 0.0658, "step": 38348 }, { "epoch": 0.6791430355031943, "grad_norm": 0.38294553756713867, "learning_rate": 7.3956820584382355e-06, "loss": 0.0773, "step": 38349 }, { "epoch": 0.6791607450402228, "grad_norm": 0.8571380376815796, "learning_rate": 7.3949404600665615e-06, "loss": 0.0785, "step": 38350 }, { "epoch": 0.6791784545772512, "grad_norm": 0.4141157269477844, "learning_rate": 7.394198886714771e-06, "loss": 0.0628, "step": 38351 }, { "epoch": 0.6791961641142796, "grad_norm": 0.6951847076416016, "learning_rate": 7.393457338385291e-06, "loss": 0.0716, "step": 38352 }, { "epoch": 0.679213873651308, "grad_norm": 0.5903699994087219, "learning_rate": 7.392715815080567e-06, "loss": 0.0691, "step": 38353 }, { "epoch": 0.6792315831883365, "grad_norm": 0.38129281997680664, "learning_rate": 7.39197431680304e-06, "loss": 0.045, "step": 38354 }, { "epoch": 0.6792492927253649, "grad_norm": 0.5023608803749084, "learning_rate": 7.391232843555146e-06, "loss": 0.0674, "step": 38355 }, { "epoch": 0.6792670022623933, "grad_norm": 0.5018690228462219, "learning_rate": 7.3904913953393265e-06, "loss": 0.0566, "step": 38356 }, { "epoch": 0.6792847117994217, "grad_norm": 0.9390268921852112, "learning_rate": 7.38974997215802e-06, "loss": 0.0789, "step": 38357 }, { "epoch": 0.6793024213364502, "grad_norm": 0.3106801211833954, "learning_rate": 7.389008574013675e-06, "loss": 0.0422, "step": 38358 }, { "epoch": 0.6793201308734786, "grad_norm": 0.8479558825492859, "learning_rate": 7.3882672009087135e-06, "loss": 0.066, "step": 38359 }, { "epoch": 0.679337840410507, "grad_norm": 0.3459051251411438, "learning_rate": 7.387525852845584e-06, "loss": 0.0392, "step": 38360 }, { "epoch": 0.6793555499475354, "grad_norm": 0.7549366354942322, "learning_rate": 7.386784529826731e-06, "loss": 0.0822, "step": 38361 }, { "epoch": 0.679373259484564, "grad_norm": 0.7213097214698792, "learning_rate": 7.386043231854579e-06, "loss": 0.0747, "step": 38362 }, { "epoch": 0.6793909690215924, "grad_norm": 0.39308398962020874, "learning_rate": 7.385301958931577e-06, "loss": 0.0273, "step": 38363 }, { "epoch": 0.6794086785586207, "grad_norm": 0.519782304763794, "learning_rate": 7.384560711060154e-06, "loss": 0.0651, "step": 38364 }, { "epoch": 0.6794263880956493, "grad_norm": 0.5310434103012085, "learning_rate": 7.383819488242768e-06, "loss": 0.0436, "step": 38365 }, { "epoch": 0.6794440976326777, "grad_norm": 0.552146852016449, "learning_rate": 7.383078290481841e-06, "loss": 0.0834, "step": 38366 }, { "epoch": 0.6794618071697061, "grad_norm": 0.39680370688438416, "learning_rate": 7.382337117779815e-06, "loss": 0.0658, "step": 38367 }, { "epoch": 0.6794795167067345, "grad_norm": 0.6879393458366394, "learning_rate": 7.381595970139134e-06, "loss": 0.074, "step": 38368 }, { "epoch": 0.679497226243763, "grad_norm": 0.3474043905735016, "learning_rate": 7.3808548475622276e-06, "loss": 0.0465, "step": 38369 }, { "epoch": 0.6795149357807914, "grad_norm": 0.6511251330375671, "learning_rate": 7.380113750051538e-06, "loss": 0.0613, "step": 38370 }, { "epoch": 0.6795326453178198, "grad_norm": 0.31296586990356445, "learning_rate": 7.379372677609503e-06, "loss": 0.0684, "step": 38371 }, { "epoch": 0.6795503548548482, "grad_norm": 0.44521841406822205, "learning_rate": 7.378631630238566e-06, "loss": 0.0522, "step": 38372 }, { "epoch": 0.6795680643918767, "grad_norm": 0.4707525968551636, "learning_rate": 7.377890607941149e-06, "loss": 0.0691, "step": 38373 }, { "epoch": 0.6795857739289051, "grad_norm": 0.681000828742981, "learning_rate": 7.377149610719708e-06, "loss": 0.0666, "step": 38374 }, { "epoch": 0.6796034834659335, "grad_norm": 0.5771674513816833, "learning_rate": 7.376408638576679e-06, "loss": 0.0502, "step": 38375 }, { "epoch": 0.6796211930029619, "grad_norm": 0.6789135932922363, "learning_rate": 7.375667691514488e-06, "loss": 0.0941, "step": 38376 }, { "epoch": 0.6796389025399904, "grad_norm": 0.7382000088691711, "learning_rate": 7.37492676953558e-06, "loss": 0.0556, "step": 38377 }, { "epoch": 0.6796566120770188, "grad_norm": 0.5707647800445557, "learning_rate": 7.3741858726423915e-06, "loss": 0.0813, "step": 38378 }, { "epoch": 0.6796743216140472, "grad_norm": 0.6893614530563354, "learning_rate": 7.373445000837365e-06, "loss": 0.0602, "step": 38379 }, { "epoch": 0.6796920311510757, "grad_norm": 0.6504432559013367, "learning_rate": 7.3727041541229275e-06, "loss": 0.0848, "step": 38380 }, { "epoch": 0.6797097406881041, "grad_norm": 0.5062918663024902, "learning_rate": 7.371963332501523e-06, "loss": 0.0419, "step": 38381 }, { "epoch": 0.6797274502251325, "grad_norm": 0.5665552020072937, "learning_rate": 7.371222535975588e-06, "loss": 0.0688, "step": 38382 }, { "epoch": 0.6797451597621609, "grad_norm": 0.8107637763023376, "learning_rate": 7.370481764547558e-06, "loss": 0.0755, "step": 38383 }, { "epoch": 0.6797628692991894, "grad_norm": 0.5502197742462158, "learning_rate": 7.369741018219871e-06, "loss": 0.0414, "step": 38384 }, { "epoch": 0.6797805788362178, "grad_norm": 0.6707780361175537, "learning_rate": 7.369000296994965e-06, "loss": 0.0692, "step": 38385 }, { "epoch": 0.6797982883732462, "grad_norm": 0.4675992727279663, "learning_rate": 7.368259600875281e-06, "loss": 0.0468, "step": 38386 }, { "epoch": 0.6798159979102746, "grad_norm": 0.327850341796875, "learning_rate": 7.3675189298632466e-06, "loss": 0.0494, "step": 38387 }, { "epoch": 0.6798337074473031, "grad_norm": 0.6256169676780701, "learning_rate": 7.366778283961303e-06, "loss": 0.0637, "step": 38388 }, { "epoch": 0.6798514169843315, "grad_norm": 0.7397185564041138, "learning_rate": 7.366037663171886e-06, "loss": 0.071, "step": 38389 }, { "epoch": 0.6798691265213599, "grad_norm": 0.6316909790039062, "learning_rate": 7.36529706749744e-06, "loss": 0.0513, "step": 38390 }, { "epoch": 0.6798868360583883, "grad_norm": 0.43289199471473694, "learning_rate": 7.3645564969403865e-06, "loss": 0.0609, "step": 38391 }, { "epoch": 0.6799045455954168, "grad_norm": 0.4708855152130127, "learning_rate": 7.36381595150317e-06, "loss": 0.0613, "step": 38392 }, { "epoch": 0.6799222551324452, "grad_norm": 0.5220271348953247, "learning_rate": 7.3630754311882296e-06, "loss": 0.0506, "step": 38393 }, { "epoch": 0.6799399646694736, "grad_norm": 0.5960286259651184, "learning_rate": 7.362334935997995e-06, "loss": 0.0439, "step": 38394 }, { "epoch": 0.6799576742065021, "grad_norm": 0.5758195519447327, "learning_rate": 7.361594465934908e-06, "loss": 0.0625, "step": 38395 }, { "epoch": 0.6799753837435305, "grad_norm": 0.5458308458328247, "learning_rate": 7.360854021001403e-06, "loss": 0.0656, "step": 38396 }, { "epoch": 0.6799930932805589, "grad_norm": 0.47842028737068176, "learning_rate": 7.360113601199919e-06, "loss": 0.0718, "step": 38397 }, { "epoch": 0.6800108028175873, "grad_norm": 0.6418512463569641, "learning_rate": 7.359373206532885e-06, "loss": 0.0518, "step": 38398 }, { "epoch": 0.6800285123546158, "grad_norm": 0.48576775193214417, "learning_rate": 7.35863283700274e-06, "loss": 0.0401, "step": 38399 }, { "epoch": 0.6800462218916442, "grad_norm": 0.8023533225059509, "learning_rate": 7.357892492611926e-06, "loss": 0.0667, "step": 38400 }, { "epoch": 0.6800639314286726, "grad_norm": 0.6156736612319946, "learning_rate": 7.357152173362867e-06, "loss": 0.0746, "step": 38401 }, { "epoch": 0.680081640965701, "grad_norm": 0.6533995270729065, "learning_rate": 7.356411879257998e-06, "loss": 0.0618, "step": 38402 }, { "epoch": 0.6800993505027295, "grad_norm": 0.36849966645240784, "learning_rate": 7.3556716102997665e-06, "loss": 0.0673, "step": 38403 }, { "epoch": 0.6801170600397579, "grad_norm": 0.6117112040519714, "learning_rate": 7.354931366490607e-06, "loss": 0.045, "step": 38404 }, { "epoch": 0.6801347695767863, "grad_norm": 0.3991876244544983, "learning_rate": 7.354191147832947e-06, "loss": 0.039, "step": 38405 }, { "epoch": 0.6801524791138147, "grad_norm": 0.6507965326309204, "learning_rate": 7.353450954329222e-06, "loss": 0.0817, "step": 38406 }, { "epoch": 0.6801701886508432, "grad_norm": 0.4649338722229004, "learning_rate": 7.352710785981877e-06, "loss": 0.0619, "step": 38407 }, { "epoch": 0.6801878981878716, "grad_norm": 0.4659908711910248, "learning_rate": 7.3519706427933345e-06, "loss": 0.0565, "step": 38408 }, { "epoch": 0.6802056077249, "grad_norm": 0.5374787449836731, "learning_rate": 7.351230524766035e-06, "loss": 0.074, "step": 38409 }, { "epoch": 0.6802233172619285, "grad_norm": 0.3546587824821472, "learning_rate": 7.350490431902413e-06, "loss": 0.0576, "step": 38410 }, { "epoch": 0.6802410267989569, "grad_norm": 0.4766620695590973, "learning_rate": 7.349750364204904e-06, "loss": 0.0344, "step": 38411 }, { "epoch": 0.6802587363359853, "grad_norm": 0.9108274579048157, "learning_rate": 7.349010321675942e-06, "loss": 0.0706, "step": 38412 }, { "epoch": 0.6802764458730137, "grad_norm": 0.46562570333480835, "learning_rate": 7.348270304317964e-06, "loss": 0.0488, "step": 38413 }, { "epoch": 0.6802941554100422, "grad_norm": 0.8747015595436096, "learning_rate": 7.3475303121334e-06, "loss": 0.0533, "step": 38414 }, { "epoch": 0.6803118649470706, "grad_norm": 0.3273645341396332, "learning_rate": 7.346790345124695e-06, "loss": 0.0676, "step": 38415 }, { "epoch": 0.680329574484099, "grad_norm": 0.3248099386692047, "learning_rate": 7.346050403294271e-06, "loss": 0.0462, "step": 38416 }, { "epoch": 0.6803472840211274, "grad_norm": 0.650031328201294, "learning_rate": 7.345310486644563e-06, "loss": 0.0458, "step": 38417 }, { "epoch": 0.6803649935581559, "grad_norm": 0.4724784195423126, "learning_rate": 7.344570595178019e-06, "loss": 0.0572, "step": 38418 }, { "epoch": 0.6803827030951843, "grad_norm": 0.6239981055259705, "learning_rate": 7.3438307288970566e-06, "loss": 0.066, "step": 38419 }, { "epoch": 0.6804004126322127, "grad_norm": 0.6694153547286987, "learning_rate": 7.343090887804115e-06, "loss": 0.0746, "step": 38420 }, { "epoch": 0.6804181221692411, "grad_norm": 0.337423712015152, "learning_rate": 7.342351071901632e-06, "loss": 0.0354, "step": 38421 }, { "epoch": 0.6804358317062696, "grad_norm": 0.6572930216789246, "learning_rate": 7.341611281192039e-06, "loss": 0.0851, "step": 38422 }, { "epoch": 0.680453541243298, "grad_norm": 0.5013112425804138, "learning_rate": 7.340871515677769e-06, "loss": 0.042, "step": 38423 }, { "epoch": 0.6804712507803264, "grad_norm": 0.254723459482193, "learning_rate": 7.340131775361259e-06, "loss": 0.0343, "step": 38424 }, { "epoch": 0.680488960317355, "grad_norm": 0.6754701733589172, "learning_rate": 7.339392060244946e-06, "loss": 0.0611, "step": 38425 }, { "epoch": 0.6805066698543834, "grad_norm": 0.7077164649963379, "learning_rate": 7.338652370331252e-06, "loss": 0.0452, "step": 38426 }, { "epoch": 0.6805243793914117, "grad_norm": 0.6429126858711243, "learning_rate": 7.337912705622616e-06, "loss": 0.0716, "step": 38427 }, { "epoch": 0.6805420889284401, "grad_norm": 0.4565180540084839, "learning_rate": 7.337173066121474e-06, "loss": 0.0517, "step": 38428 }, { "epoch": 0.6805597984654687, "grad_norm": 0.6885573863983154, "learning_rate": 7.336433451830262e-06, "loss": 0.0395, "step": 38429 }, { "epoch": 0.6805775080024971, "grad_norm": 0.5742971897125244, "learning_rate": 7.335693862751404e-06, "loss": 0.0594, "step": 38430 }, { "epoch": 0.6805952175395255, "grad_norm": 0.4971732795238495, "learning_rate": 7.334954298887331e-06, "loss": 0.0361, "step": 38431 }, { "epoch": 0.6806129270765539, "grad_norm": 0.4346357583999634, "learning_rate": 7.334214760240495e-06, "loss": 0.0572, "step": 38432 }, { "epoch": 0.6806306366135824, "grad_norm": 0.6792575120925903, "learning_rate": 7.333475246813311e-06, "loss": 0.0552, "step": 38433 }, { "epoch": 0.6806483461506108, "grad_norm": 0.5823264718055725, "learning_rate": 7.332735758608219e-06, "loss": 0.0457, "step": 38434 }, { "epoch": 0.6806660556876392, "grad_norm": 0.515896737575531, "learning_rate": 7.3319962956276515e-06, "loss": 0.097, "step": 38435 }, { "epoch": 0.6806837652246677, "grad_norm": 0.5714995265007019, "learning_rate": 7.331256857874044e-06, "loss": 0.0674, "step": 38436 }, { "epoch": 0.6807014747616961, "grad_norm": 1.112493634223938, "learning_rate": 7.330517445349822e-06, "loss": 0.0552, "step": 38437 }, { "epoch": 0.6807191842987245, "grad_norm": 0.8555665612220764, "learning_rate": 7.3297780580574216e-06, "loss": 0.0835, "step": 38438 }, { "epoch": 0.6807368938357529, "grad_norm": 0.5980314016342163, "learning_rate": 7.329038695999275e-06, "loss": 0.0347, "step": 38439 }, { "epoch": 0.6807546033727814, "grad_norm": 0.6161836385726929, "learning_rate": 7.328299359177816e-06, "loss": 0.0512, "step": 38440 }, { "epoch": 0.6807723129098098, "grad_norm": 0.5752363204956055, "learning_rate": 7.3275600475954756e-06, "loss": 0.053, "step": 38441 }, { "epoch": 0.6807900224468382, "grad_norm": 0.7249048948287964, "learning_rate": 7.326820761254687e-06, "loss": 0.0469, "step": 38442 }, { "epoch": 0.6808077319838666, "grad_norm": 0.98150235414505, "learning_rate": 7.326081500157887e-06, "loss": 0.065, "step": 38443 }, { "epoch": 0.6808254415208951, "grad_norm": 0.6102765798568726, "learning_rate": 7.325342264307497e-06, "loss": 0.069, "step": 38444 }, { "epoch": 0.6808431510579235, "grad_norm": 0.460470974445343, "learning_rate": 7.324603053705955e-06, "loss": 0.0534, "step": 38445 }, { "epoch": 0.6808608605949519, "grad_norm": 0.44251397252082825, "learning_rate": 7.323863868355699e-06, "loss": 0.0525, "step": 38446 }, { "epoch": 0.6808785701319803, "grad_norm": 0.5703717470169067, "learning_rate": 7.323124708259148e-06, "loss": 0.0441, "step": 38447 }, { "epoch": 0.6808962796690088, "grad_norm": 0.25505879521369934, "learning_rate": 7.32238557341874e-06, "loss": 0.0276, "step": 38448 }, { "epoch": 0.6809139892060372, "grad_norm": 0.6242786645889282, "learning_rate": 7.321646463836907e-06, "loss": 0.054, "step": 38449 }, { "epoch": 0.6809316987430656, "grad_norm": 0.8369187116622925, "learning_rate": 7.3209073795160805e-06, "loss": 0.0737, "step": 38450 }, { "epoch": 0.6809494082800941, "grad_norm": 0.7662003636360168, "learning_rate": 7.320168320458693e-06, "loss": 0.0894, "step": 38451 }, { "epoch": 0.6809671178171225, "grad_norm": 0.3333909213542938, "learning_rate": 7.319429286667174e-06, "loss": 0.0671, "step": 38452 }, { "epoch": 0.6809848273541509, "grad_norm": 1.0925050973892212, "learning_rate": 7.318690278143956e-06, "loss": 0.0819, "step": 38453 }, { "epoch": 0.6810025368911793, "grad_norm": 0.35749727487564087, "learning_rate": 7.317951294891477e-06, "loss": 0.0436, "step": 38454 }, { "epoch": 0.6810202464282078, "grad_norm": 0.9134800434112549, "learning_rate": 7.3172123369121536e-06, "loss": 0.076, "step": 38455 }, { "epoch": 0.6810379559652362, "grad_norm": 0.36982932686805725, "learning_rate": 7.316473404208427e-06, "loss": 0.0514, "step": 38456 }, { "epoch": 0.6810556655022646, "grad_norm": 0.6576706767082214, "learning_rate": 7.315734496782729e-06, "loss": 0.0801, "step": 38457 }, { "epoch": 0.681073375039293, "grad_norm": 0.4812961220741272, "learning_rate": 7.314995614637483e-06, "loss": 0.0676, "step": 38458 }, { "epoch": 0.6810910845763215, "grad_norm": 0.3201483190059662, "learning_rate": 7.314256757775123e-06, "loss": 0.0588, "step": 38459 }, { "epoch": 0.6811087941133499, "grad_norm": 0.36106157302856445, "learning_rate": 7.313517926198075e-06, "loss": 0.0458, "step": 38460 }, { "epoch": 0.6811265036503783, "grad_norm": 0.5586209893226624, "learning_rate": 7.312779119908788e-06, "loss": 0.0556, "step": 38461 }, { "epoch": 0.6811442131874067, "grad_norm": 0.3228479325771332, "learning_rate": 7.312040338909675e-06, "loss": 0.0398, "step": 38462 }, { "epoch": 0.6811619227244352, "grad_norm": 0.7000734806060791, "learning_rate": 7.31130158320317e-06, "loss": 0.0684, "step": 38463 }, { "epoch": 0.6811796322614636, "grad_norm": 0.6417350172996521, "learning_rate": 7.310562852791713e-06, "loss": 0.0689, "step": 38464 }, { "epoch": 0.681197341798492, "grad_norm": 1.0567657947540283, "learning_rate": 7.309824147677719e-06, "loss": 0.1199, "step": 38465 }, { "epoch": 0.6812150513355205, "grad_norm": 0.4828307628631592, "learning_rate": 7.309085467863626e-06, "loss": 0.0432, "step": 38466 }, { "epoch": 0.6812327608725489, "grad_norm": 0.43380454182624817, "learning_rate": 7.3083468133518645e-06, "loss": 0.0606, "step": 38467 }, { "epoch": 0.6812504704095773, "grad_norm": 0.535917341709137, "learning_rate": 7.307608184144864e-06, "loss": 0.0669, "step": 38468 }, { "epoch": 0.6812681799466057, "grad_norm": 0.6589862108230591, "learning_rate": 7.306869580245054e-06, "loss": 0.0628, "step": 38469 }, { "epoch": 0.6812858894836342, "grad_norm": 0.317172646522522, "learning_rate": 7.306131001654865e-06, "loss": 0.0555, "step": 38470 }, { "epoch": 0.6813035990206626, "grad_norm": 0.45334333181381226, "learning_rate": 7.305392448376732e-06, "loss": 0.061, "step": 38471 }, { "epoch": 0.681321308557691, "grad_norm": 0.4246772229671478, "learning_rate": 7.304653920413075e-06, "loss": 0.0641, "step": 38472 }, { "epoch": 0.6813390180947194, "grad_norm": 0.5448575615882874, "learning_rate": 7.3039154177663285e-06, "loss": 0.064, "step": 38473 }, { "epoch": 0.6813567276317479, "grad_norm": 0.7330321073532104, "learning_rate": 7.303176940438921e-06, "loss": 0.0769, "step": 38474 }, { "epoch": 0.6813744371687763, "grad_norm": 0.4916219115257263, "learning_rate": 7.302438488433288e-06, "loss": 0.0547, "step": 38475 }, { "epoch": 0.6813921467058047, "grad_norm": 0.5291367173194885, "learning_rate": 7.30170006175185e-06, "loss": 0.085, "step": 38476 }, { "epoch": 0.6814098562428331, "grad_norm": 0.6954715251922607, "learning_rate": 7.300961660397039e-06, "loss": 0.0507, "step": 38477 }, { "epoch": 0.6814275657798616, "grad_norm": 0.47576725482940674, "learning_rate": 7.300223284371286e-06, "loss": 0.0492, "step": 38478 }, { "epoch": 0.68144527531689, "grad_norm": 0.4470580220222473, "learning_rate": 7.2994849336770195e-06, "loss": 0.0449, "step": 38479 }, { "epoch": 0.6814629848539184, "grad_norm": 0.5364404916763306, "learning_rate": 7.298746608316668e-06, "loss": 0.0666, "step": 38480 }, { "epoch": 0.6814806943909469, "grad_norm": 0.44471919536590576, "learning_rate": 7.298008308292661e-06, "loss": 0.0421, "step": 38481 }, { "epoch": 0.6814984039279753, "grad_norm": 0.898277997970581, "learning_rate": 7.297270033607434e-06, "loss": 0.0663, "step": 38482 }, { "epoch": 0.6815161134650037, "grad_norm": 0.6995343565940857, "learning_rate": 7.296531784263404e-06, "loss": 0.078, "step": 38483 }, { "epoch": 0.6815338230020321, "grad_norm": 0.3452029526233673, "learning_rate": 7.295793560263005e-06, "loss": 0.0719, "step": 38484 }, { "epoch": 0.6815515325390606, "grad_norm": 0.6791184544563293, "learning_rate": 7.295055361608667e-06, "loss": 0.0518, "step": 38485 }, { "epoch": 0.681569242076089, "grad_norm": 0.6022734642028809, "learning_rate": 7.294317188302821e-06, "loss": 0.0607, "step": 38486 }, { "epoch": 0.6815869516131174, "grad_norm": 0.4490354657173157, "learning_rate": 7.293579040347888e-06, "loss": 0.0398, "step": 38487 }, { "epoch": 0.6816046611501458, "grad_norm": 0.7348992228507996, "learning_rate": 7.2928409177462995e-06, "loss": 0.0697, "step": 38488 }, { "epoch": 0.6816223706871744, "grad_norm": 0.37335237860679626, "learning_rate": 7.292102820500485e-06, "loss": 0.0793, "step": 38489 }, { "epoch": 0.6816400802242027, "grad_norm": 0.5484915971755981, "learning_rate": 7.291364748612873e-06, "loss": 0.0554, "step": 38490 }, { "epoch": 0.6816577897612311, "grad_norm": 0.7756544351577759, "learning_rate": 7.290626702085893e-06, "loss": 0.0588, "step": 38491 }, { "epoch": 0.6816754992982595, "grad_norm": 0.348170667886734, "learning_rate": 7.28988868092197e-06, "loss": 0.0367, "step": 38492 }, { "epoch": 0.6816932088352881, "grad_norm": 0.6073037981987, "learning_rate": 7.289150685123537e-06, "loss": 0.0563, "step": 38493 }, { "epoch": 0.6817109183723165, "grad_norm": 0.557449996471405, "learning_rate": 7.288412714693015e-06, "loss": 0.0622, "step": 38494 }, { "epoch": 0.6817286279093449, "grad_norm": 0.8350997567176819, "learning_rate": 7.287674769632834e-06, "loss": 0.0899, "step": 38495 }, { "epoch": 0.6817463374463734, "grad_norm": 0.4134054481983185, "learning_rate": 7.28693684994543e-06, "loss": 0.0589, "step": 38496 }, { "epoch": 0.6817640469834018, "grad_norm": 0.5504549741744995, "learning_rate": 7.286198955633212e-06, "loss": 0.0526, "step": 38497 }, { "epoch": 0.6817817565204302, "grad_norm": 0.7740695476531982, "learning_rate": 7.285461086698624e-06, "loss": 0.0769, "step": 38498 }, { "epoch": 0.6817994660574586, "grad_norm": 0.5442845821380615, "learning_rate": 7.28472324314409e-06, "loss": 0.0308, "step": 38499 }, { "epoch": 0.6818171755944871, "grad_norm": 0.5234203338623047, "learning_rate": 7.283985424972041e-06, "loss": 0.0657, "step": 38500 }, { "epoch": 0.6818348851315155, "grad_norm": 0.710039496421814, "learning_rate": 7.283247632184894e-06, "loss": 0.0641, "step": 38501 }, { "epoch": 0.6818525946685439, "grad_norm": 0.6848850846290588, "learning_rate": 7.282509864785081e-06, "loss": 0.0997, "step": 38502 }, { "epoch": 0.6818703042055723, "grad_norm": 0.47093671560287476, "learning_rate": 7.281772122775036e-06, "loss": 0.0642, "step": 38503 }, { "epoch": 0.6818880137426008, "grad_norm": 1.0578508377075195, "learning_rate": 7.281034406157176e-06, "loss": 0.0873, "step": 38504 }, { "epoch": 0.6819057232796292, "grad_norm": 0.7397794127464294, "learning_rate": 7.28029671493393e-06, "loss": 0.059, "step": 38505 }, { "epoch": 0.6819234328166576, "grad_norm": 0.7121281623840332, "learning_rate": 7.279559049107728e-06, "loss": 0.0795, "step": 38506 }, { "epoch": 0.681941142353686, "grad_norm": 0.34800922870635986, "learning_rate": 7.278821408680996e-06, "loss": 0.0745, "step": 38507 }, { "epoch": 0.6819588518907145, "grad_norm": 0.7667157053947449, "learning_rate": 7.27808379365616e-06, "loss": 0.0659, "step": 38508 }, { "epoch": 0.6819765614277429, "grad_norm": 0.3623006343841553, "learning_rate": 7.2773462040356475e-06, "loss": 0.0554, "step": 38509 }, { "epoch": 0.6819942709647713, "grad_norm": 0.6304323673248291, "learning_rate": 7.276608639821892e-06, "loss": 0.0655, "step": 38510 }, { "epoch": 0.6820119805017998, "grad_norm": 1.2752431631088257, "learning_rate": 7.275871101017306e-06, "loss": 0.0799, "step": 38511 }, { "epoch": 0.6820296900388282, "grad_norm": 0.553439736366272, "learning_rate": 7.275133587624321e-06, "loss": 0.061, "step": 38512 }, { "epoch": 0.6820473995758566, "grad_norm": 0.5491536855697632, "learning_rate": 7.274396099645367e-06, "loss": 0.0639, "step": 38513 }, { "epoch": 0.682065109112885, "grad_norm": 0.5082502365112305, "learning_rate": 7.273658637082875e-06, "loss": 0.0849, "step": 38514 }, { "epoch": 0.6820828186499135, "grad_norm": 0.7945688366889954, "learning_rate": 7.272921199939257e-06, "loss": 0.0801, "step": 38515 }, { "epoch": 0.6821005281869419, "grad_norm": 0.6381534934043884, "learning_rate": 7.272183788216949e-06, "loss": 0.0635, "step": 38516 }, { "epoch": 0.6821182377239703, "grad_norm": 0.8868677020072937, "learning_rate": 7.271446401918372e-06, "loss": 0.074, "step": 38517 }, { "epoch": 0.6821359472609987, "grad_norm": 0.3741518557071686, "learning_rate": 7.270709041045957e-06, "loss": 0.0713, "step": 38518 }, { "epoch": 0.6821536567980272, "grad_norm": 0.8493117690086365, "learning_rate": 7.269971705602126e-06, "loss": 0.0535, "step": 38519 }, { "epoch": 0.6821713663350556, "grad_norm": 0.7182974219322205, "learning_rate": 7.269234395589307e-06, "loss": 0.0658, "step": 38520 }, { "epoch": 0.682189075872084, "grad_norm": 0.38012462854385376, "learning_rate": 7.26849711100993e-06, "loss": 0.0438, "step": 38521 }, { "epoch": 0.6822067854091124, "grad_norm": 0.4478270411491394, "learning_rate": 7.26775985186641e-06, "loss": 0.0268, "step": 38522 }, { "epoch": 0.6822244949461409, "grad_norm": 0.5900539755821228, "learning_rate": 7.267022618161178e-06, "loss": 0.098, "step": 38523 }, { "epoch": 0.6822422044831693, "grad_norm": 0.5971800684928894, "learning_rate": 7.266285409896661e-06, "loss": 0.0657, "step": 38524 }, { "epoch": 0.6822599140201977, "grad_norm": 1.2833038568496704, "learning_rate": 7.265548227075286e-06, "loss": 0.0965, "step": 38525 }, { "epoch": 0.6822776235572262, "grad_norm": 0.780971348285675, "learning_rate": 7.264811069699465e-06, "loss": 0.0484, "step": 38526 }, { "epoch": 0.6822953330942546, "grad_norm": 0.4994297921657562, "learning_rate": 7.264073937771638e-06, "loss": 0.054, "step": 38527 }, { "epoch": 0.682313042631283, "grad_norm": 0.5022772550582886, "learning_rate": 7.263336831294233e-06, "loss": 0.0612, "step": 38528 }, { "epoch": 0.6823307521683114, "grad_norm": 0.6080663204193115, "learning_rate": 7.262599750269659e-06, "loss": 0.0793, "step": 38529 }, { "epoch": 0.6823484617053399, "grad_norm": 0.35251742601394653, "learning_rate": 7.261862694700352e-06, "loss": 0.04, "step": 38530 }, { "epoch": 0.6823661712423683, "grad_norm": 0.7118879556655884, "learning_rate": 7.261125664588732e-06, "loss": 0.1041, "step": 38531 }, { "epoch": 0.6823838807793967, "grad_norm": 0.5070626139640808, "learning_rate": 7.260388659937232e-06, "loss": 0.0706, "step": 38532 }, { "epoch": 0.6824015903164251, "grad_norm": 0.542817234992981, "learning_rate": 7.259651680748265e-06, "loss": 0.0659, "step": 38533 }, { "epoch": 0.6824192998534536, "grad_norm": 1.1956528425216675, "learning_rate": 7.258914727024261e-06, "loss": 0.073, "step": 38534 }, { "epoch": 0.682437009390482, "grad_norm": 0.5548606514930725, "learning_rate": 7.258177798767644e-06, "loss": 0.0418, "step": 38535 }, { "epoch": 0.6824547189275104, "grad_norm": 0.5357141494750977, "learning_rate": 7.2574408959808396e-06, "loss": 0.0426, "step": 38536 }, { "epoch": 0.6824724284645388, "grad_norm": 0.5539937019348145, "learning_rate": 7.256704018666271e-06, "loss": 0.0555, "step": 38537 }, { "epoch": 0.6824901380015673, "grad_norm": 0.772485613822937, "learning_rate": 7.255967166826364e-06, "loss": 0.0766, "step": 38538 }, { "epoch": 0.6825078475385957, "grad_norm": 0.4690293073654175, "learning_rate": 7.255230340463546e-06, "loss": 0.044, "step": 38539 }, { "epoch": 0.6825255570756241, "grad_norm": 0.6310676336288452, "learning_rate": 7.254493539580233e-06, "loss": 0.089, "step": 38540 }, { "epoch": 0.6825432666126526, "grad_norm": 0.5325954556465149, "learning_rate": 7.253756764178853e-06, "loss": 0.0539, "step": 38541 }, { "epoch": 0.682560976149681, "grad_norm": 0.676851749420166, "learning_rate": 7.253020014261834e-06, "loss": 0.0495, "step": 38542 }, { "epoch": 0.6825786856867094, "grad_norm": 1.1589982509613037, "learning_rate": 7.252283289831589e-06, "loss": 0.0906, "step": 38543 }, { "epoch": 0.6825963952237378, "grad_norm": 0.577974796295166, "learning_rate": 7.25154659089055e-06, "loss": 0.0569, "step": 38544 }, { "epoch": 0.6826141047607663, "grad_norm": 0.3312862813472748, "learning_rate": 7.250809917441138e-06, "loss": 0.0647, "step": 38545 }, { "epoch": 0.6826318142977947, "grad_norm": 0.5155723690986633, "learning_rate": 7.25007326948578e-06, "loss": 0.0708, "step": 38546 }, { "epoch": 0.6826495238348231, "grad_norm": 0.5502867698669434, "learning_rate": 7.249336647026895e-06, "loss": 0.0518, "step": 38547 }, { "epoch": 0.6826672333718515, "grad_norm": 0.525514543056488, "learning_rate": 7.2486000500669075e-06, "loss": 0.0622, "step": 38548 }, { "epoch": 0.68268494290888, "grad_norm": 0.378902405500412, "learning_rate": 7.24786347860825e-06, "loss": 0.0591, "step": 38549 }, { "epoch": 0.6827026524459084, "grad_norm": 0.5659523606300354, "learning_rate": 7.2471269326533295e-06, "loss": 0.0522, "step": 38550 }, { "epoch": 0.6827203619829368, "grad_norm": 0.609886884689331, "learning_rate": 7.24639041220458e-06, "loss": 0.0894, "step": 38551 }, { "epoch": 0.6827380715199652, "grad_norm": 0.5205552577972412, "learning_rate": 7.245653917264419e-06, "loss": 0.0661, "step": 38552 }, { "epoch": 0.6827557810569937, "grad_norm": 0.6505902409553528, "learning_rate": 7.244917447835279e-06, "loss": 0.0484, "step": 38553 }, { "epoch": 0.6827734905940221, "grad_norm": 0.5973420143127441, "learning_rate": 7.244181003919571e-06, "loss": 0.0657, "step": 38554 }, { "epoch": 0.6827912001310505, "grad_norm": 0.7218610048294067, "learning_rate": 7.243444585519718e-06, "loss": 0.0588, "step": 38555 }, { "epoch": 0.6828089096680791, "grad_norm": 0.5614100098609924, "learning_rate": 7.242708192638153e-06, "loss": 0.0935, "step": 38556 }, { "epoch": 0.6828266192051075, "grad_norm": 0.8323377370834351, "learning_rate": 7.2419718252772975e-06, "loss": 0.0466, "step": 38557 }, { "epoch": 0.6828443287421359, "grad_norm": 2.595736503601074, "learning_rate": 7.241235483439566e-06, "loss": 0.0322, "step": 38558 }, { "epoch": 0.6828620382791643, "grad_norm": 0.536072313785553, "learning_rate": 7.240499167127385e-06, "loss": 0.0551, "step": 38559 }, { "epoch": 0.6828797478161928, "grad_norm": 0.6823137998580933, "learning_rate": 7.239762876343182e-06, "loss": 0.0604, "step": 38560 }, { "epoch": 0.6828974573532212, "grad_norm": 0.7519164085388184, "learning_rate": 7.239026611089368e-06, "loss": 0.0497, "step": 38561 }, { "epoch": 0.6829151668902496, "grad_norm": 0.5082029104232788, "learning_rate": 7.238290371368373e-06, "loss": 0.0732, "step": 38562 }, { "epoch": 0.682932876427278, "grad_norm": 0.8432684540748596, "learning_rate": 7.237554157182616e-06, "loss": 0.0579, "step": 38563 }, { "epoch": 0.6829505859643065, "grad_norm": 0.4918568730354309, "learning_rate": 7.236817968534522e-06, "loss": 0.0564, "step": 38564 }, { "epoch": 0.6829682955013349, "grad_norm": 0.7907721996307373, "learning_rate": 7.236081805426511e-06, "loss": 0.0619, "step": 38565 }, { "epoch": 0.6829860050383633, "grad_norm": 0.36126214265823364, "learning_rate": 7.235345667861006e-06, "loss": 0.0495, "step": 38566 }, { "epoch": 0.6830037145753917, "grad_norm": 0.8012994527816772, "learning_rate": 7.234609555840434e-06, "loss": 0.061, "step": 38567 }, { "epoch": 0.6830214241124202, "grad_norm": 0.49446842074394226, "learning_rate": 7.2338734693672046e-06, "loss": 0.056, "step": 38568 }, { "epoch": 0.6830391336494486, "grad_norm": 0.5244859457015991, "learning_rate": 7.233137408443748e-06, "loss": 0.0513, "step": 38569 }, { "epoch": 0.683056843186477, "grad_norm": 0.5321987271308899, "learning_rate": 7.232401373072482e-06, "loss": 0.0813, "step": 38570 }, { "epoch": 0.6830745527235055, "grad_norm": 0.6996946334838867, "learning_rate": 7.2316653632558365e-06, "loss": 0.0633, "step": 38571 }, { "epoch": 0.6830922622605339, "grad_norm": 0.41282740235328674, "learning_rate": 7.23092937899622e-06, "loss": 0.0806, "step": 38572 }, { "epoch": 0.6831099717975623, "grad_norm": 0.8125931024551392, "learning_rate": 7.230193420296059e-06, "loss": 0.0643, "step": 38573 }, { "epoch": 0.6831276813345907, "grad_norm": 0.5581484436988831, "learning_rate": 7.2294574871577764e-06, "loss": 0.0356, "step": 38574 }, { "epoch": 0.6831453908716192, "grad_norm": 0.5935699343681335, "learning_rate": 7.228721579583795e-06, "loss": 0.0875, "step": 38575 }, { "epoch": 0.6831631004086476, "grad_norm": 0.5893053412437439, "learning_rate": 7.2279856975765315e-06, "loss": 0.0439, "step": 38576 }, { "epoch": 0.683180809945676, "grad_norm": 0.3447984457015991, "learning_rate": 7.2272498411384084e-06, "loss": 0.0464, "step": 38577 }, { "epoch": 0.6831985194827044, "grad_norm": 0.7554436922073364, "learning_rate": 7.226514010271854e-06, "loss": 0.0565, "step": 38578 }, { "epoch": 0.6832162290197329, "grad_norm": 0.7230610847473145, "learning_rate": 7.225778204979276e-06, "loss": 0.0441, "step": 38579 }, { "epoch": 0.6832339385567613, "grad_norm": 0.6367328763008118, "learning_rate": 7.2250424252631015e-06, "loss": 0.0714, "step": 38580 }, { "epoch": 0.6832516480937897, "grad_norm": 0.5770665407180786, "learning_rate": 7.224306671125758e-06, "loss": 0.0717, "step": 38581 }, { "epoch": 0.6832693576308181, "grad_norm": 0.5722227692604065, "learning_rate": 7.223570942569651e-06, "loss": 0.063, "step": 38582 }, { "epoch": 0.6832870671678466, "grad_norm": 0.5372116565704346, "learning_rate": 7.222835239597212e-06, "loss": 0.055, "step": 38583 }, { "epoch": 0.683304776704875, "grad_norm": 0.4023679196834564, "learning_rate": 7.22209956221085e-06, "loss": 0.0351, "step": 38584 }, { "epoch": 0.6833224862419034, "grad_norm": 0.43625524640083313, "learning_rate": 7.221363910413006e-06, "loss": 0.0444, "step": 38585 }, { "epoch": 0.6833401957789319, "grad_norm": 0.8008030652999878, "learning_rate": 7.220628284206082e-06, "loss": 0.0503, "step": 38586 }, { "epoch": 0.6833579053159603, "grad_norm": 0.6614018082618713, "learning_rate": 7.219892683592505e-06, "loss": 0.0826, "step": 38587 }, { "epoch": 0.6833756148529887, "grad_norm": 0.759267270565033, "learning_rate": 7.219157108574694e-06, "loss": 0.0692, "step": 38588 }, { "epoch": 0.6833933243900171, "grad_norm": 0.4477349817752838, "learning_rate": 7.218421559155075e-06, "loss": 0.0597, "step": 38589 }, { "epoch": 0.6834110339270456, "grad_norm": 0.8007886409759521, "learning_rate": 7.217686035336056e-06, "loss": 0.0805, "step": 38590 }, { "epoch": 0.683428743464074, "grad_norm": 0.8826005458831787, "learning_rate": 7.216950537120063e-06, "loss": 0.085, "step": 38591 }, { "epoch": 0.6834464530011024, "grad_norm": 0.6891465783119202, "learning_rate": 7.216215064509514e-06, "loss": 0.0579, "step": 38592 }, { "epoch": 0.6834641625381308, "grad_norm": 0.7387345433235168, "learning_rate": 7.215479617506831e-06, "loss": 0.072, "step": 38593 }, { "epoch": 0.6834818720751593, "grad_norm": 0.45387595891952515, "learning_rate": 7.2147441961144345e-06, "loss": 0.0395, "step": 38594 }, { "epoch": 0.6834995816121877, "grad_norm": 0.3172288239002228, "learning_rate": 7.214008800334741e-06, "loss": 0.0571, "step": 38595 }, { "epoch": 0.6835172911492161, "grad_norm": 0.6543704867362976, "learning_rate": 7.213273430170176e-06, "loss": 0.064, "step": 38596 }, { "epoch": 0.6835350006862445, "grad_norm": 0.2827073633670807, "learning_rate": 7.2125380856231485e-06, "loss": 0.0553, "step": 38597 }, { "epoch": 0.683552710223273, "grad_norm": 0.2706853449344635, "learning_rate": 7.211802766696083e-06, "loss": 0.0283, "step": 38598 }, { "epoch": 0.6835704197603014, "grad_norm": 0.8167213797569275, "learning_rate": 7.211067473391404e-06, "loss": 0.0473, "step": 38599 }, { "epoch": 0.6835881292973298, "grad_norm": 0.576166570186615, "learning_rate": 7.210332205711522e-06, "loss": 0.068, "step": 38600 }, { "epoch": 0.6836058388343583, "grad_norm": 0.6560747623443604, "learning_rate": 7.209596963658856e-06, "loss": 0.043, "step": 38601 }, { "epoch": 0.6836235483713867, "grad_norm": 0.5572945475578308, "learning_rate": 7.20886174723583e-06, "loss": 0.0755, "step": 38602 }, { "epoch": 0.6836412579084151, "grad_norm": 0.7734816074371338, "learning_rate": 7.20812655644486e-06, "loss": 0.0665, "step": 38603 }, { "epoch": 0.6836589674454435, "grad_norm": 0.42134344577789307, "learning_rate": 7.207391391288365e-06, "loss": 0.0688, "step": 38604 }, { "epoch": 0.683676676982472, "grad_norm": 0.4371420443058014, "learning_rate": 7.206656251768764e-06, "loss": 0.0795, "step": 38605 }, { "epoch": 0.6836943865195004, "grad_norm": 0.48241961002349854, "learning_rate": 7.205921137888483e-06, "loss": 0.0707, "step": 38606 }, { "epoch": 0.6837120960565288, "grad_norm": 0.6930524706840515, "learning_rate": 7.205186049649927e-06, "loss": 0.0612, "step": 38607 }, { "epoch": 0.6837298055935572, "grad_norm": 0.6002151370048523, "learning_rate": 7.20445098705552e-06, "loss": 0.0669, "step": 38608 }, { "epoch": 0.6837475151305857, "grad_norm": 0.555454671382904, "learning_rate": 7.2037159501076805e-06, "loss": 0.0547, "step": 38609 }, { "epoch": 0.6837652246676141, "grad_norm": 0.6685702204704285, "learning_rate": 7.202980938808834e-06, "loss": 0.0578, "step": 38610 }, { "epoch": 0.6837829342046425, "grad_norm": 0.8113680481910706, "learning_rate": 7.202245953161384e-06, "loss": 0.0893, "step": 38611 }, { "epoch": 0.6838006437416709, "grad_norm": 0.5273265838623047, "learning_rate": 7.201510993167757e-06, "loss": 0.0729, "step": 38612 }, { "epoch": 0.6838183532786994, "grad_norm": 0.6007371544837952, "learning_rate": 7.20077605883037e-06, "loss": 0.0549, "step": 38613 }, { "epoch": 0.6838360628157278, "grad_norm": 0.46802014112472534, "learning_rate": 7.200041150151641e-06, "loss": 0.0412, "step": 38614 }, { "epoch": 0.6838537723527562, "grad_norm": 0.7567278742790222, "learning_rate": 7.1993062671339886e-06, "loss": 0.0556, "step": 38615 }, { "epoch": 0.6838714818897847, "grad_norm": 0.621548056602478, "learning_rate": 7.198571409779829e-06, "loss": 0.0434, "step": 38616 }, { "epoch": 0.6838891914268131, "grad_norm": 0.8219435214996338, "learning_rate": 7.197836578091586e-06, "loss": 0.0622, "step": 38617 }, { "epoch": 0.6839069009638415, "grad_norm": 0.7180850505828857, "learning_rate": 7.197101772071665e-06, "loss": 0.0691, "step": 38618 }, { "epoch": 0.68392461050087, "grad_norm": 0.3554854393005371, "learning_rate": 7.19636699172249e-06, "loss": 0.0722, "step": 38619 }, { "epoch": 0.6839423200378985, "grad_norm": 0.44051235914230347, "learning_rate": 7.19563223704648e-06, "loss": 0.0703, "step": 38620 }, { "epoch": 0.6839600295749269, "grad_norm": 0.507667064666748, "learning_rate": 7.194897508046048e-06, "loss": 0.0691, "step": 38621 }, { "epoch": 0.6839777391119553, "grad_norm": 0.5726436376571655, "learning_rate": 7.194162804723617e-06, "loss": 0.0542, "step": 38622 }, { "epoch": 0.6839954486489837, "grad_norm": 0.29882925748825073, "learning_rate": 7.193428127081599e-06, "loss": 0.0456, "step": 38623 }, { "epoch": 0.6840131581860122, "grad_norm": 0.6589863300323486, "learning_rate": 7.192693475122419e-06, "loss": 0.0439, "step": 38624 }, { "epoch": 0.6840308677230406, "grad_norm": 0.34564685821533203, "learning_rate": 7.191958848848481e-06, "loss": 0.0532, "step": 38625 }, { "epoch": 0.684048577260069, "grad_norm": 0.5326846837997437, "learning_rate": 7.191224248262209e-06, "loss": 0.0596, "step": 38626 }, { "epoch": 0.6840662867970974, "grad_norm": 0.8125612735748291, "learning_rate": 7.19048967336602e-06, "loss": 0.0534, "step": 38627 }, { "epoch": 0.6840839963341259, "grad_norm": 0.5135592222213745, "learning_rate": 7.189755124162335e-06, "loss": 0.0602, "step": 38628 }, { "epoch": 0.6841017058711543, "grad_norm": 0.8133426308631897, "learning_rate": 7.189020600653561e-06, "loss": 0.0631, "step": 38629 }, { "epoch": 0.6841194154081827, "grad_norm": 0.4236142337322235, "learning_rate": 7.1882861028421185e-06, "loss": 0.0543, "step": 38630 }, { "epoch": 0.6841371249452112, "grad_norm": 0.5439202785491943, "learning_rate": 7.187551630730426e-06, "loss": 0.083, "step": 38631 }, { "epoch": 0.6841548344822396, "grad_norm": 0.423846960067749, "learning_rate": 7.186817184320896e-06, "loss": 0.0785, "step": 38632 }, { "epoch": 0.684172544019268, "grad_norm": 0.6963368654251099, "learning_rate": 7.1860827636159504e-06, "loss": 0.0646, "step": 38633 }, { "epoch": 0.6841902535562964, "grad_norm": 0.837020993232727, "learning_rate": 7.185348368617999e-06, "loss": 0.0514, "step": 38634 }, { "epoch": 0.6842079630933249, "grad_norm": 0.4150172173976898, "learning_rate": 7.184613999329468e-06, "loss": 0.0203, "step": 38635 }, { "epoch": 0.6842256726303533, "grad_norm": 0.43471112847328186, "learning_rate": 7.183879655752761e-06, "loss": 0.0467, "step": 38636 }, { "epoch": 0.6842433821673817, "grad_norm": 0.39385712146759033, "learning_rate": 7.1831453378903e-06, "loss": 0.0498, "step": 38637 }, { "epoch": 0.6842610917044101, "grad_norm": 0.9762836694717407, "learning_rate": 7.182411045744505e-06, "loss": 0.0608, "step": 38638 }, { "epoch": 0.6842788012414386, "grad_norm": 0.31523919105529785, "learning_rate": 7.181676779317781e-06, "loss": 0.0555, "step": 38639 }, { "epoch": 0.684296510778467, "grad_norm": 0.5038267970085144, "learning_rate": 7.180942538612551e-06, "loss": 0.0584, "step": 38640 }, { "epoch": 0.6843142203154954, "grad_norm": 0.4455162286758423, "learning_rate": 7.180208323631228e-06, "loss": 0.0432, "step": 38641 }, { "epoch": 0.6843319298525238, "grad_norm": 0.4661824107170105, "learning_rate": 7.179474134376229e-06, "loss": 0.066, "step": 38642 }, { "epoch": 0.6843496393895523, "grad_norm": 0.7274842262268066, "learning_rate": 7.178739970849968e-06, "loss": 0.0663, "step": 38643 }, { "epoch": 0.6843673489265807, "grad_norm": 0.8661350607872009, "learning_rate": 7.178005833054863e-06, "loss": 0.0771, "step": 38644 }, { "epoch": 0.6843850584636091, "grad_norm": 0.30870360136032104, "learning_rate": 7.177271720993333e-06, "loss": 0.0411, "step": 38645 }, { "epoch": 0.6844027680006376, "grad_norm": 0.6271585822105408, "learning_rate": 7.176537634667781e-06, "loss": 0.0724, "step": 38646 }, { "epoch": 0.684420477537666, "grad_norm": 0.8899362087249756, "learning_rate": 7.175803574080631e-06, "loss": 0.0727, "step": 38647 }, { "epoch": 0.6844381870746944, "grad_norm": 0.6852053999900818, "learning_rate": 7.175069539234295e-06, "loss": 0.0535, "step": 38648 }, { "epoch": 0.6844558966117228, "grad_norm": 0.7209928035736084, "learning_rate": 7.1743355301311935e-06, "loss": 0.0593, "step": 38649 }, { "epoch": 0.6844736061487513, "grad_norm": 0.46809104084968567, "learning_rate": 7.173601546773726e-06, "loss": 0.0563, "step": 38650 }, { "epoch": 0.6844913156857797, "grad_norm": 0.8685495257377625, "learning_rate": 7.172867589164324e-06, "loss": 0.0786, "step": 38651 }, { "epoch": 0.6845090252228081, "grad_norm": 0.6897310614585876, "learning_rate": 7.172133657305399e-06, "loss": 0.0464, "step": 38652 }, { "epoch": 0.6845267347598365, "grad_norm": 0.5042226910591125, "learning_rate": 7.17139975119936e-06, "loss": 0.0793, "step": 38653 }, { "epoch": 0.684544444296865, "grad_norm": 0.6051994562149048, "learning_rate": 7.170665870848623e-06, "loss": 0.0809, "step": 38654 }, { "epoch": 0.6845621538338934, "grad_norm": 0.7256962656974792, "learning_rate": 7.169932016255604e-06, "loss": 0.0655, "step": 38655 }, { "epoch": 0.6845798633709218, "grad_norm": 0.8597670197486877, "learning_rate": 7.169198187422721e-06, "loss": 0.0537, "step": 38656 }, { "epoch": 0.6845975729079502, "grad_norm": 0.5495101809501648, "learning_rate": 7.168464384352379e-06, "loss": 0.042, "step": 38657 }, { "epoch": 0.6846152824449787, "grad_norm": 0.6511800289154053, "learning_rate": 7.1677306070469965e-06, "loss": 0.0353, "step": 38658 }, { "epoch": 0.6846329919820071, "grad_norm": 0.6714553236961365, "learning_rate": 7.166996855508989e-06, "loss": 0.0748, "step": 38659 }, { "epoch": 0.6846507015190355, "grad_norm": 0.3444053828716278, "learning_rate": 7.16626312974077e-06, "loss": 0.0631, "step": 38660 }, { "epoch": 0.684668411056064, "grad_norm": 0.5114291906356812, "learning_rate": 7.165529429744751e-06, "loss": 0.0736, "step": 38661 }, { "epoch": 0.6846861205930924, "grad_norm": 0.70450359582901, "learning_rate": 7.164795755523349e-06, "loss": 0.1075, "step": 38662 }, { "epoch": 0.6847038301301208, "grad_norm": 0.4875231087207794, "learning_rate": 7.164062107078982e-06, "loss": 0.0561, "step": 38663 }, { "epoch": 0.6847215396671492, "grad_norm": 0.5433787107467651, "learning_rate": 7.1633284844140535e-06, "loss": 0.0644, "step": 38664 }, { "epoch": 0.6847392492041777, "grad_norm": 0.6240085959434509, "learning_rate": 7.16259488753098e-06, "loss": 0.0761, "step": 38665 }, { "epoch": 0.6847569587412061, "grad_norm": 0.9806734919548035, "learning_rate": 7.161861316432178e-06, "loss": 0.039, "step": 38666 }, { "epoch": 0.6847746682782345, "grad_norm": 0.47422054409980774, "learning_rate": 7.1611277711200635e-06, "loss": 0.055, "step": 38667 }, { "epoch": 0.6847923778152629, "grad_norm": 1.130884051322937, "learning_rate": 7.160394251597042e-06, "loss": 0.0844, "step": 38668 }, { "epoch": 0.6848100873522914, "grad_norm": 0.9776528477668762, "learning_rate": 7.159660757865529e-06, "loss": 0.0858, "step": 38669 }, { "epoch": 0.6848277968893198, "grad_norm": 0.32425975799560547, "learning_rate": 7.158927289927939e-06, "loss": 0.0492, "step": 38670 }, { "epoch": 0.6848455064263482, "grad_norm": 0.46461769938468933, "learning_rate": 7.158193847786686e-06, "loss": 0.0421, "step": 38671 }, { "epoch": 0.6848632159633766, "grad_norm": 0.7904138565063477, "learning_rate": 7.157460431444181e-06, "loss": 0.0862, "step": 38672 }, { "epoch": 0.6848809255004051, "grad_norm": 0.5280763506889343, "learning_rate": 7.15672704090284e-06, "loss": 0.048, "step": 38673 }, { "epoch": 0.6848986350374335, "grad_norm": 0.585295557975769, "learning_rate": 7.1559936761650775e-06, "loss": 0.0886, "step": 38674 }, { "epoch": 0.6849163445744619, "grad_norm": 0.8487678170204163, "learning_rate": 7.155260337233298e-06, "loss": 0.0547, "step": 38675 }, { "epoch": 0.6849340541114904, "grad_norm": 0.589086651802063, "learning_rate": 7.154527024109918e-06, "loss": 0.0592, "step": 38676 }, { "epoch": 0.6849517636485188, "grad_norm": 0.7128205895423889, "learning_rate": 7.1537937367973555e-06, "loss": 0.0639, "step": 38677 }, { "epoch": 0.6849694731855472, "grad_norm": 0.5708039999008179, "learning_rate": 7.153060475298014e-06, "loss": 0.0673, "step": 38678 }, { "epoch": 0.6849871827225756, "grad_norm": 0.8052673935890198, "learning_rate": 7.152327239614303e-06, "loss": 0.1042, "step": 38679 }, { "epoch": 0.6850048922596041, "grad_norm": 0.49027979373931885, "learning_rate": 7.151594029748647e-06, "loss": 0.0422, "step": 38680 }, { "epoch": 0.6850226017966325, "grad_norm": 0.4930862784385681, "learning_rate": 7.15086084570346e-06, "loss": 0.0657, "step": 38681 }, { "epoch": 0.685040311333661, "grad_norm": 0.8083710074424744, "learning_rate": 7.150127687481141e-06, "loss": 0.0565, "step": 38682 }, { "epoch": 0.6850580208706893, "grad_norm": 0.4943476617336273, "learning_rate": 7.1493945550841065e-06, "loss": 0.0509, "step": 38683 }, { "epoch": 0.6850757304077179, "grad_norm": 0.2895425856113434, "learning_rate": 7.148661448514776e-06, "loss": 0.0299, "step": 38684 }, { "epoch": 0.6850934399447463, "grad_norm": 0.7517861127853394, "learning_rate": 7.14792836777555e-06, "loss": 0.0907, "step": 38685 }, { "epoch": 0.6851111494817747, "grad_norm": 0.5348360538482666, "learning_rate": 7.147195312868846e-06, "loss": 0.0481, "step": 38686 }, { "epoch": 0.685128859018803, "grad_norm": 0.5419220328330994, "learning_rate": 7.146462283797073e-06, "loss": 0.056, "step": 38687 }, { "epoch": 0.6851465685558316, "grad_norm": 0.5746733546257019, "learning_rate": 7.145729280562647e-06, "loss": 0.0434, "step": 38688 }, { "epoch": 0.68516427809286, "grad_norm": 0.5250232815742493, "learning_rate": 7.144996303167977e-06, "loss": 0.0855, "step": 38689 }, { "epoch": 0.6851819876298884, "grad_norm": 0.6958422064781189, "learning_rate": 7.1442633516154746e-06, "loss": 0.05, "step": 38690 }, { "epoch": 0.6851996971669169, "grad_norm": 0.8186337351799011, "learning_rate": 7.143530425907549e-06, "loss": 0.0443, "step": 38691 }, { "epoch": 0.6852174067039453, "grad_norm": 0.5134851932525635, "learning_rate": 7.1427975260466226e-06, "loss": 0.0586, "step": 38692 }, { "epoch": 0.6852351162409737, "grad_norm": 0.5709523558616638, "learning_rate": 7.14206465203509e-06, "loss": 0.0581, "step": 38693 }, { "epoch": 0.6852528257780021, "grad_norm": 0.7083277106285095, "learning_rate": 7.1413318038753696e-06, "loss": 0.0485, "step": 38694 }, { "epoch": 0.6852705353150306, "grad_norm": 0.7303839325904846, "learning_rate": 7.14059898156988e-06, "loss": 0.0844, "step": 38695 }, { "epoch": 0.685288244852059, "grad_norm": 0.5588252544403076, "learning_rate": 7.139866185121019e-06, "loss": 0.0645, "step": 38696 }, { "epoch": 0.6853059543890874, "grad_norm": 0.5813815593719482, "learning_rate": 7.139133414531203e-06, "loss": 0.0498, "step": 38697 }, { "epoch": 0.6853236639261158, "grad_norm": 0.5674326419830322, "learning_rate": 7.138400669802841e-06, "loss": 0.0662, "step": 38698 }, { "epoch": 0.6853413734631443, "grad_norm": 0.5322607159614563, "learning_rate": 7.137667950938347e-06, "loss": 0.0669, "step": 38699 }, { "epoch": 0.6853590830001727, "grad_norm": 0.7597225904464722, "learning_rate": 7.13693525794013e-06, "loss": 0.0697, "step": 38700 }, { "epoch": 0.6853767925372011, "grad_norm": 0.5389488935470581, "learning_rate": 7.136202590810603e-06, "loss": 0.0715, "step": 38701 }, { "epoch": 0.6853945020742295, "grad_norm": 0.35849010944366455, "learning_rate": 7.135469949552177e-06, "loss": 0.0433, "step": 38702 }, { "epoch": 0.685412211611258, "grad_norm": 0.3786361813545227, "learning_rate": 7.134737334167255e-06, "loss": 0.0337, "step": 38703 }, { "epoch": 0.6854299211482864, "grad_norm": 0.3950832486152649, "learning_rate": 7.134004744658251e-06, "loss": 0.0393, "step": 38704 }, { "epoch": 0.6854476306853148, "grad_norm": 0.637196958065033, "learning_rate": 7.133272181027577e-06, "loss": 0.0904, "step": 38705 }, { "epoch": 0.6854653402223433, "grad_norm": 0.4204862713813782, "learning_rate": 7.132539643277646e-06, "loss": 0.0628, "step": 38706 }, { "epoch": 0.6854830497593717, "grad_norm": 0.8602253198623657, "learning_rate": 7.131807131410858e-06, "loss": 0.0571, "step": 38707 }, { "epoch": 0.6855007592964001, "grad_norm": 0.535375714302063, "learning_rate": 7.131074645429625e-06, "loss": 0.0453, "step": 38708 }, { "epoch": 0.6855184688334285, "grad_norm": 0.6142808198928833, "learning_rate": 7.1303421853363705e-06, "loss": 0.0703, "step": 38709 }, { "epoch": 0.685536178370457, "grad_norm": 0.5236865282058716, "learning_rate": 7.129609751133489e-06, "loss": 0.0553, "step": 38710 }, { "epoch": 0.6855538879074854, "grad_norm": 0.5437151789665222, "learning_rate": 7.128877342823394e-06, "loss": 0.0603, "step": 38711 }, { "epoch": 0.6855715974445138, "grad_norm": 0.6305764317512512, "learning_rate": 7.128144960408498e-06, "loss": 0.0636, "step": 38712 }, { "epoch": 0.6855893069815422, "grad_norm": 0.7423421740531921, "learning_rate": 7.127412603891215e-06, "loss": 0.0531, "step": 38713 }, { "epoch": 0.6856070165185707, "grad_norm": 0.7403764724731445, "learning_rate": 7.12668027327394e-06, "loss": 0.047, "step": 38714 }, { "epoch": 0.6856247260555991, "grad_norm": 0.6806517839431763, "learning_rate": 7.125947968559092e-06, "loss": 0.0409, "step": 38715 }, { "epoch": 0.6856424355926275, "grad_norm": 0.7236927151679993, "learning_rate": 7.125215689749079e-06, "loss": 0.0475, "step": 38716 }, { "epoch": 0.6856601451296559, "grad_norm": 0.7771817445755005, "learning_rate": 7.124483436846311e-06, "loss": 0.0667, "step": 38717 }, { "epoch": 0.6856778546666844, "grad_norm": 0.7540528178215027, "learning_rate": 7.123751209853194e-06, "loss": 0.0501, "step": 38718 }, { "epoch": 0.6856955642037128, "grad_norm": 0.7879729866981506, "learning_rate": 7.123019008772141e-06, "loss": 0.0323, "step": 38719 }, { "epoch": 0.6857132737407412, "grad_norm": 0.6506496667861938, "learning_rate": 7.122286833605562e-06, "loss": 0.0714, "step": 38720 }, { "epoch": 0.6857309832777697, "grad_norm": 0.5419982671737671, "learning_rate": 7.121554684355859e-06, "loss": 0.0449, "step": 38721 }, { "epoch": 0.6857486928147981, "grad_norm": 0.6587454080581665, "learning_rate": 7.120822561025444e-06, "loss": 0.0534, "step": 38722 }, { "epoch": 0.6857664023518265, "grad_norm": 0.6474331617355347, "learning_rate": 7.120090463616729e-06, "loss": 0.0495, "step": 38723 }, { "epoch": 0.6857841118888549, "grad_norm": 0.44745612144470215, "learning_rate": 7.119358392132117e-06, "loss": 0.0679, "step": 38724 }, { "epoch": 0.6858018214258834, "grad_norm": 0.9441900849342346, "learning_rate": 7.118626346574017e-06, "loss": 0.0494, "step": 38725 }, { "epoch": 0.6858195309629118, "grad_norm": 0.33818623423576355, "learning_rate": 7.117894326944839e-06, "loss": 0.0342, "step": 38726 }, { "epoch": 0.6858372404999402, "grad_norm": 0.5918546319007874, "learning_rate": 7.117162333246992e-06, "loss": 0.0515, "step": 38727 }, { "epoch": 0.6858549500369686, "grad_norm": 0.7648893594741821, "learning_rate": 7.116430365482883e-06, "loss": 0.0684, "step": 38728 }, { "epoch": 0.6858726595739971, "grad_norm": 0.6901489496231079, "learning_rate": 7.115698423654921e-06, "loss": 0.0671, "step": 38729 }, { "epoch": 0.6858903691110255, "grad_norm": 0.6645207405090332, "learning_rate": 7.1149665077655156e-06, "loss": 0.089, "step": 38730 }, { "epoch": 0.6859080786480539, "grad_norm": 0.71430504322052, "learning_rate": 7.114234617817077e-06, "loss": 0.0736, "step": 38731 }, { "epoch": 0.6859257881850823, "grad_norm": 0.5356377363204956, "learning_rate": 7.113502753812003e-06, "loss": 0.0502, "step": 38732 }, { "epoch": 0.6859434977221108, "grad_norm": 0.4898093044757843, "learning_rate": 7.112770915752706e-06, "loss": 0.0641, "step": 38733 }, { "epoch": 0.6859612072591392, "grad_norm": 0.42703232169151306, "learning_rate": 7.112039103641603e-06, "loss": 0.0759, "step": 38734 }, { "epoch": 0.6859789167961676, "grad_norm": 0.660869300365448, "learning_rate": 7.111307317481086e-06, "loss": 0.0737, "step": 38735 }, { "epoch": 0.6859966263331961, "grad_norm": 0.43177133798599243, "learning_rate": 7.110575557273571e-06, "loss": 0.0538, "step": 38736 }, { "epoch": 0.6860143358702245, "grad_norm": 0.6356165409088135, "learning_rate": 7.109843823021458e-06, "loss": 0.0453, "step": 38737 }, { "epoch": 0.6860320454072529, "grad_norm": 0.22498494386672974, "learning_rate": 7.1091121147271735e-06, "loss": 0.0496, "step": 38738 }, { "epoch": 0.6860497549442813, "grad_norm": 0.5361613631248474, "learning_rate": 7.108380432393106e-06, "loss": 0.0507, "step": 38739 }, { "epoch": 0.6860674644813098, "grad_norm": 0.343487411737442, "learning_rate": 7.107648776021669e-06, "loss": 0.047, "step": 38740 }, { "epoch": 0.6860851740183382, "grad_norm": 0.639492928981781, "learning_rate": 7.106917145615274e-06, "loss": 0.0757, "step": 38741 }, { "epoch": 0.6861028835553666, "grad_norm": 0.7546170949935913, "learning_rate": 7.106185541176318e-06, "loss": 0.0694, "step": 38742 }, { "epoch": 0.686120593092395, "grad_norm": 1.111149787902832, "learning_rate": 7.105453962707212e-06, "loss": 0.0726, "step": 38743 }, { "epoch": 0.6861383026294235, "grad_norm": 0.3291189968585968, "learning_rate": 7.104722410210366e-06, "loss": 0.0938, "step": 38744 }, { "epoch": 0.686156012166452, "grad_norm": 0.45136258006095886, "learning_rate": 7.103990883688183e-06, "loss": 0.0469, "step": 38745 }, { "epoch": 0.6861737217034803, "grad_norm": 0.3550054728984833, "learning_rate": 7.103259383143073e-06, "loss": 0.0564, "step": 38746 }, { "epoch": 0.6861914312405087, "grad_norm": 0.6848828196525574, "learning_rate": 7.1025279085774395e-06, "loss": 0.0458, "step": 38747 }, { "epoch": 0.6862091407775373, "grad_norm": 0.7646388411521912, "learning_rate": 7.101796459993696e-06, "loss": 0.0642, "step": 38748 }, { "epoch": 0.6862268503145657, "grad_norm": 0.43319717049598694, "learning_rate": 7.101065037394239e-06, "loss": 0.0832, "step": 38749 }, { "epoch": 0.686244559851594, "grad_norm": 0.42878302931785583, "learning_rate": 7.100333640781478e-06, "loss": 0.0454, "step": 38750 }, { "epoch": 0.6862622693886226, "grad_norm": 0.5327896475791931, "learning_rate": 7.0996022701578215e-06, "loss": 0.0631, "step": 38751 }, { "epoch": 0.686279978925651, "grad_norm": 0.5286216735839844, "learning_rate": 7.098870925525679e-06, "loss": 0.0602, "step": 38752 }, { "epoch": 0.6862976884626794, "grad_norm": 0.5083455443382263, "learning_rate": 7.098139606887446e-06, "loss": 0.0551, "step": 38753 }, { "epoch": 0.6863153979997078, "grad_norm": 0.6242404580116272, "learning_rate": 7.097408314245536e-06, "loss": 0.0644, "step": 38754 }, { "epoch": 0.6863331075367363, "grad_norm": 0.57386714220047, "learning_rate": 7.096677047602352e-06, "loss": 0.0705, "step": 38755 }, { "epoch": 0.6863508170737647, "grad_norm": 0.24540306627750397, "learning_rate": 7.0959458069603e-06, "loss": 0.0417, "step": 38756 }, { "epoch": 0.6863685266107931, "grad_norm": 0.47262781858444214, "learning_rate": 7.09521459232179e-06, "loss": 0.0683, "step": 38757 }, { "epoch": 0.6863862361478215, "grad_norm": 0.40209782123565674, "learning_rate": 7.094483403689223e-06, "loss": 0.0361, "step": 38758 }, { "epoch": 0.68640394568485, "grad_norm": 0.4922196865081787, "learning_rate": 7.093752241065012e-06, "loss": 0.0569, "step": 38759 }, { "epoch": 0.6864216552218784, "grad_norm": 0.6356825828552246, "learning_rate": 7.09302110445155e-06, "loss": 0.0898, "step": 38760 }, { "epoch": 0.6864393647589068, "grad_norm": 0.38232696056365967, "learning_rate": 7.092289993851249e-06, "loss": 0.0793, "step": 38761 }, { "epoch": 0.6864570742959352, "grad_norm": 0.6629852652549744, "learning_rate": 7.091558909266514e-06, "loss": 0.0871, "step": 38762 }, { "epoch": 0.6864747838329637, "grad_norm": 0.49485480785369873, "learning_rate": 7.090827850699755e-06, "loss": 0.0364, "step": 38763 }, { "epoch": 0.6864924933699921, "grad_norm": 0.355541467666626, "learning_rate": 7.090096818153368e-06, "loss": 0.061, "step": 38764 }, { "epoch": 0.6865102029070205, "grad_norm": 0.5628626346588135, "learning_rate": 7.089365811629763e-06, "loss": 0.0698, "step": 38765 }, { "epoch": 0.686527912444049, "grad_norm": 0.7073285579681396, "learning_rate": 7.0886348311313435e-06, "loss": 0.0498, "step": 38766 }, { "epoch": 0.6865456219810774, "grad_norm": 0.8550578951835632, "learning_rate": 7.087903876660516e-06, "loss": 0.073, "step": 38767 }, { "epoch": 0.6865633315181058, "grad_norm": 0.7371887564659119, "learning_rate": 7.087172948219684e-06, "loss": 0.0635, "step": 38768 }, { "epoch": 0.6865810410551342, "grad_norm": 0.497516006231308, "learning_rate": 7.086442045811253e-06, "loss": 0.0561, "step": 38769 }, { "epoch": 0.6865987505921627, "grad_norm": 0.5973010659217834, "learning_rate": 7.085711169437632e-06, "loss": 0.0562, "step": 38770 }, { "epoch": 0.6866164601291911, "grad_norm": 0.41800549626350403, "learning_rate": 7.0849803191012156e-06, "loss": 0.0583, "step": 38771 }, { "epoch": 0.6866341696662195, "grad_norm": 0.6238904595375061, "learning_rate": 7.084249494804414e-06, "loss": 0.0793, "step": 38772 }, { "epoch": 0.6866518792032479, "grad_norm": 0.3823421001434326, "learning_rate": 7.083518696549636e-06, "loss": 0.0597, "step": 38773 }, { "epoch": 0.6866695887402764, "grad_norm": 0.376202791929245, "learning_rate": 7.082787924339269e-06, "loss": 0.0345, "step": 38774 }, { "epoch": 0.6866872982773048, "grad_norm": 0.5735582113265991, "learning_rate": 7.0820571781757354e-06, "loss": 0.0513, "step": 38775 }, { "epoch": 0.6867050078143332, "grad_norm": 0.45411354303359985, "learning_rate": 7.081326458061431e-06, "loss": 0.0465, "step": 38776 }, { "epoch": 0.6867227173513616, "grad_norm": 0.5569417476654053, "learning_rate": 7.080595763998771e-06, "loss": 0.0478, "step": 38777 }, { "epoch": 0.6867404268883901, "grad_norm": 0.49663570523262024, "learning_rate": 7.079865095990142e-06, "loss": 0.0566, "step": 38778 }, { "epoch": 0.6867581364254185, "grad_norm": 0.5182183384895325, "learning_rate": 7.079134454037955e-06, "loss": 0.0446, "step": 38779 }, { "epoch": 0.6867758459624469, "grad_norm": 0.2676619589328766, "learning_rate": 7.078403838144621e-06, "loss": 0.0516, "step": 38780 }, { "epoch": 0.6867935554994754, "grad_norm": 0.8351551294326782, "learning_rate": 7.077673248312531e-06, "loss": 0.0725, "step": 38781 }, { "epoch": 0.6868112650365038, "grad_norm": 0.5443168878555298, "learning_rate": 7.076942684544095e-06, "loss": 0.0451, "step": 38782 }, { "epoch": 0.6868289745735322, "grad_norm": 0.692988395690918, "learning_rate": 7.076212146841718e-06, "loss": 0.0733, "step": 38783 }, { "epoch": 0.6868466841105606, "grad_norm": 0.5580700635910034, "learning_rate": 7.0754816352078e-06, "loss": 0.0536, "step": 38784 }, { "epoch": 0.6868643936475891, "grad_norm": 0.40407437086105347, "learning_rate": 7.074751149644745e-06, "loss": 0.0632, "step": 38785 }, { "epoch": 0.6868821031846175, "grad_norm": 0.4125535190105438, "learning_rate": 7.074020690154958e-06, "loss": 0.0523, "step": 38786 }, { "epoch": 0.6868998127216459, "grad_norm": 0.9497816562652588, "learning_rate": 7.073290256740848e-06, "loss": 0.0654, "step": 38787 }, { "epoch": 0.6869175222586743, "grad_norm": 0.4729692339897156, "learning_rate": 7.072559849404804e-06, "loss": 0.0435, "step": 38788 }, { "epoch": 0.6869352317957028, "grad_norm": 0.5832818150520325, "learning_rate": 7.0718294681492366e-06, "loss": 0.0549, "step": 38789 }, { "epoch": 0.6869529413327312, "grad_norm": 0.5180973410606384, "learning_rate": 7.071099112976549e-06, "loss": 0.0528, "step": 38790 }, { "epoch": 0.6869706508697596, "grad_norm": 0.7255087494850159, "learning_rate": 7.070368783889149e-06, "loss": 0.0595, "step": 38791 }, { "epoch": 0.686988360406788, "grad_norm": 0.35241982340812683, "learning_rate": 7.0696384808894285e-06, "loss": 0.0432, "step": 38792 }, { "epoch": 0.6870060699438165, "grad_norm": 0.7909549474716187, "learning_rate": 7.068908203979794e-06, "loss": 0.0659, "step": 38793 }, { "epoch": 0.6870237794808449, "grad_norm": 0.6360375881195068, "learning_rate": 7.06817795316265e-06, "loss": 0.0671, "step": 38794 }, { "epoch": 0.6870414890178733, "grad_norm": 0.8830674290657043, "learning_rate": 7.067447728440397e-06, "loss": 0.0944, "step": 38795 }, { "epoch": 0.6870591985549018, "grad_norm": 0.7308847904205322, "learning_rate": 7.06671752981544e-06, "loss": 0.0446, "step": 38796 }, { "epoch": 0.6870769080919302, "grad_norm": 0.5192261934280396, "learning_rate": 7.06598735729018e-06, "loss": 0.0732, "step": 38797 }, { "epoch": 0.6870946176289586, "grad_norm": 0.7090926766395569, "learning_rate": 7.065257210867025e-06, "loss": 0.0566, "step": 38798 }, { "epoch": 0.687112327165987, "grad_norm": 0.5080969929695129, "learning_rate": 7.0645270905483646e-06, "loss": 0.0561, "step": 38799 }, { "epoch": 0.6871300367030155, "grad_norm": 1.014756679534912, "learning_rate": 7.063796996336607e-06, "loss": 0.0656, "step": 38800 }, { "epoch": 0.6871477462400439, "grad_norm": 0.9314982891082764, "learning_rate": 7.063066928234157e-06, "loss": 0.0661, "step": 38801 }, { "epoch": 0.6871654557770723, "grad_norm": 0.7425306439399719, "learning_rate": 7.062336886243416e-06, "loss": 0.0662, "step": 38802 }, { "epoch": 0.6871831653141007, "grad_norm": 0.5493040084838867, "learning_rate": 7.061606870366776e-06, "loss": 0.0662, "step": 38803 }, { "epoch": 0.6872008748511292, "grad_norm": 0.6052433848381042, "learning_rate": 7.06087688060665e-06, "loss": 0.0711, "step": 38804 }, { "epoch": 0.6872185843881576, "grad_norm": 0.6374469995498657, "learning_rate": 7.060146916965444e-06, "loss": 0.0457, "step": 38805 }, { "epoch": 0.687236293925186, "grad_norm": 0.562630832195282, "learning_rate": 7.059416979445542e-06, "loss": 0.0657, "step": 38806 }, { "epoch": 0.6872540034622144, "grad_norm": 0.5166643857955933, "learning_rate": 7.058687068049358e-06, "loss": 0.0712, "step": 38807 }, { "epoch": 0.687271712999243, "grad_norm": 0.5554556250572205, "learning_rate": 7.05795718277929e-06, "loss": 0.0613, "step": 38808 }, { "epoch": 0.6872894225362713, "grad_norm": 0.6830483078956604, "learning_rate": 7.0572273236377464e-06, "loss": 0.0785, "step": 38809 }, { "epoch": 0.6873071320732997, "grad_norm": 1.0193140506744385, "learning_rate": 7.056497490627113e-06, "loss": 0.0725, "step": 38810 }, { "epoch": 0.6873248416103283, "grad_norm": 0.5429612398147583, "learning_rate": 7.055767683749802e-06, "loss": 0.0442, "step": 38811 }, { "epoch": 0.6873425511473567, "grad_norm": 0.8172008991241455, "learning_rate": 7.05503790300821e-06, "loss": 0.0708, "step": 38812 }, { "epoch": 0.687360260684385, "grad_norm": 0.6141242980957031, "learning_rate": 7.054308148404741e-06, "loss": 0.067, "step": 38813 }, { "epoch": 0.6873779702214134, "grad_norm": 0.6173387169837952, "learning_rate": 7.053578419941793e-06, "loss": 0.0654, "step": 38814 }, { "epoch": 0.687395679758442, "grad_norm": 0.5835437178611755, "learning_rate": 7.05284871762177e-06, "loss": 0.0823, "step": 38815 }, { "epoch": 0.6874133892954704, "grad_norm": 0.682898759841919, "learning_rate": 7.0521190414470756e-06, "loss": 0.0589, "step": 38816 }, { "epoch": 0.6874310988324988, "grad_norm": 0.569776177406311, "learning_rate": 7.0513893914201e-06, "loss": 0.0599, "step": 38817 }, { "epoch": 0.6874488083695272, "grad_norm": 0.5370275378227234, "learning_rate": 7.05065976754325e-06, "loss": 0.0642, "step": 38818 }, { "epoch": 0.6874665179065557, "grad_norm": 0.5539945960044861, "learning_rate": 7.049930169818931e-06, "loss": 0.0747, "step": 38819 }, { "epoch": 0.6874842274435841, "grad_norm": 0.7802528738975525, "learning_rate": 7.049200598249531e-06, "loss": 0.0707, "step": 38820 }, { "epoch": 0.6875019369806125, "grad_norm": 0.9230250716209412, "learning_rate": 7.048471052837456e-06, "loss": 0.0609, "step": 38821 }, { "epoch": 0.6875196465176409, "grad_norm": 0.7601767778396606, "learning_rate": 7.047741533585109e-06, "loss": 0.0833, "step": 38822 }, { "epoch": 0.6875373560546694, "grad_norm": 0.8696222305297852, "learning_rate": 7.0470120404948876e-06, "loss": 0.0646, "step": 38823 }, { "epoch": 0.6875550655916978, "grad_norm": 0.5681825876235962, "learning_rate": 7.0462825735691926e-06, "loss": 0.0353, "step": 38824 }, { "epoch": 0.6875727751287262, "grad_norm": 0.19924703240394592, "learning_rate": 7.045553132810421e-06, "loss": 0.036, "step": 38825 }, { "epoch": 0.6875904846657547, "grad_norm": 0.4062608778476715, "learning_rate": 7.044823718220983e-06, "loss": 0.0379, "step": 38826 }, { "epoch": 0.6876081942027831, "grad_norm": 0.5263887643814087, "learning_rate": 7.044094329803266e-06, "loss": 0.062, "step": 38827 }, { "epoch": 0.6876259037398115, "grad_norm": 0.2591712772846222, "learning_rate": 7.043364967559671e-06, "loss": 0.0583, "step": 38828 }, { "epoch": 0.6876436132768399, "grad_norm": 0.8062665462493896, "learning_rate": 7.042635631492601e-06, "loss": 0.0535, "step": 38829 }, { "epoch": 0.6876613228138684, "grad_norm": 0.6333337426185608, "learning_rate": 7.041906321604461e-06, "loss": 0.0618, "step": 38830 }, { "epoch": 0.6876790323508968, "grad_norm": 0.4722730815410614, "learning_rate": 7.041177037897639e-06, "loss": 0.0403, "step": 38831 }, { "epoch": 0.6876967418879252, "grad_norm": 0.250643253326416, "learning_rate": 7.040447780374536e-06, "loss": 0.0458, "step": 38832 }, { "epoch": 0.6877144514249536, "grad_norm": 0.4042969048023224, "learning_rate": 7.0397185490375566e-06, "loss": 0.0423, "step": 38833 }, { "epoch": 0.6877321609619821, "grad_norm": 0.46146270632743835, "learning_rate": 7.038989343889106e-06, "loss": 0.0598, "step": 38834 }, { "epoch": 0.6877498704990105, "grad_norm": 1.0115689039230347, "learning_rate": 7.038260164931569e-06, "loss": 0.0747, "step": 38835 }, { "epoch": 0.6877675800360389, "grad_norm": 0.6450210809707642, "learning_rate": 7.03753101216735e-06, "loss": 0.0527, "step": 38836 }, { "epoch": 0.6877852895730673, "grad_norm": 0.729578971862793, "learning_rate": 7.036801885598855e-06, "loss": 0.0602, "step": 38837 }, { "epoch": 0.6878029991100958, "grad_norm": 0.7422145009040833, "learning_rate": 7.03607278522847e-06, "loss": 0.0626, "step": 38838 }, { "epoch": 0.6878207086471242, "grad_norm": 0.4377140700817108, "learning_rate": 7.035343711058601e-06, "loss": 0.0468, "step": 38839 }, { "epoch": 0.6878384181841526, "grad_norm": 0.5001653432846069, "learning_rate": 7.034614663091646e-06, "loss": 0.0681, "step": 38840 }, { "epoch": 0.6878561277211811, "grad_norm": 0.6567907333374023, "learning_rate": 7.033885641330003e-06, "loss": 0.0741, "step": 38841 }, { "epoch": 0.6878738372582095, "grad_norm": 0.49759647250175476, "learning_rate": 7.0331566457760685e-06, "loss": 0.0395, "step": 38842 }, { "epoch": 0.6878915467952379, "grad_norm": 0.7043688297271729, "learning_rate": 7.032427676432245e-06, "loss": 0.0787, "step": 38843 }, { "epoch": 0.6879092563322663, "grad_norm": 0.8505634665489197, "learning_rate": 7.0316987333009325e-06, "loss": 0.0514, "step": 38844 }, { "epoch": 0.6879269658692948, "grad_norm": 0.5551069378852844, "learning_rate": 7.0309698163845215e-06, "loss": 0.0776, "step": 38845 }, { "epoch": 0.6879446754063232, "grad_norm": 0.8979234099388123, "learning_rate": 7.0302409256854124e-06, "loss": 0.0604, "step": 38846 }, { "epoch": 0.6879623849433516, "grad_norm": 0.9253720641136169, "learning_rate": 7.029512061206006e-06, "loss": 0.0544, "step": 38847 }, { "epoch": 0.68798009448038, "grad_norm": 0.5126988291740417, "learning_rate": 7.028783222948702e-06, "loss": 0.0535, "step": 38848 }, { "epoch": 0.6879978040174085, "grad_norm": 0.6382798552513123, "learning_rate": 7.028054410915892e-06, "loss": 0.0798, "step": 38849 }, { "epoch": 0.6880155135544369, "grad_norm": 0.7963615655899048, "learning_rate": 7.027325625109975e-06, "loss": 0.0664, "step": 38850 }, { "epoch": 0.6880332230914653, "grad_norm": 0.7435243129730225, "learning_rate": 7.02659686553335e-06, "loss": 0.0755, "step": 38851 }, { "epoch": 0.6880509326284937, "grad_norm": 0.7917483448982239, "learning_rate": 7.025868132188416e-06, "loss": 0.0861, "step": 38852 }, { "epoch": 0.6880686421655222, "grad_norm": 0.4142736494541168, "learning_rate": 7.025139425077569e-06, "loss": 0.0531, "step": 38853 }, { "epoch": 0.6880863517025506, "grad_norm": 0.6369251608848572, "learning_rate": 7.024410744203206e-06, "loss": 0.0616, "step": 38854 }, { "epoch": 0.688104061239579, "grad_norm": 0.536657452583313, "learning_rate": 7.023682089567731e-06, "loss": 0.0942, "step": 38855 }, { "epoch": 0.6881217707766075, "grad_norm": 0.588498055934906, "learning_rate": 7.02295346117353e-06, "loss": 0.0387, "step": 38856 }, { "epoch": 0.6881394803136359, "grad_norm": 0.38093534111976624, "learning_rate": 7.022224859023006e-06, "loss": 0.0452, "step": 38857 }, { "epoch": 0.6881571898506643, "grad_norm": 0.6597825884819031, "learning_rate": 7.021496283118559e-06, "loss": 0.0839, "step": 38858 }, { "epoch": 0.6881748993876927, "grad_norm": 0.5998355746269226, "learning_rate": 7.020767733462578e-06, "loss": 0.0602, "step": 38859 }, { "epoch": 0.6881926089247212, "grad_norm": 0.5521284937858582, "learning_rate": 7.020039210057464e-06, "loss": 0.047, "step": 38860 }, { "epoch": 0.6882103184617496, "grad_norm": 0.400590181350708, "learning_rate": 7.0193107129056076e-06, "loss": 0.0266, "step": 38861 }, { "epoch": 0.688228027998778, "grad_norm": 0.7879428863525391, "learning_rate": 7.018582242009422e-06, "loss": 0.0536, "step": 38862 }, { "epoch": 0.6882457375358064, "grad_norm": 0.6828527450561523, "learning_rate": 7.0178537973712895e-06, "loss": 0.0279, "step": 38863 }, { "epoch": 0.6882634470728349, "grad_norm": 0.8311513662338257, "learning_rate": 7.017125378993611e-06, "loss": 0.0822, "step": 38864 }, { "epoch": 0.6882811566098633, "grad_norm": 0.8715770244598389, "learning_rate": 7.016396986878782e-06, "loss": 0.0538, "step": 38865 }, { "epoch": 0.6882988661468917, "grad_norm": 0.677772581577301, "learning_rate": 7.015668621029205e-06, "loss": 0.092, "step": 38866 }, { "epoch": 0.6883165756839201, "grad_norm": 0.7726327776908875, "learning_rate": 7.014940281447264e-06, "loss": 0.0606, "step": 38867 }, { "epoch": 0.6883342852209486, "grad_norm": 0.433826208114624, "learning_rate": 7.014211968135364e-06, "loss": 0.0663, "step": 38868 }, { "epoch": 0.688351994757977, "grad_norm": 0.33024862408638, "learning_rate": 7.013483681095897e-06, "loss": 0.0593, "step": 38869 }, { "epoch": 0.6883697042950054, "grad_norm": 0.6572424173355103, "learning_rate": 7.012755420331262e-06, "loss": 0.0781, "step": 38870 }, { "epoch": 0.688387413832034, "grad_norm": 0.7332528829574585, "learning_rate": 7.012027185843853e-06, "loss": 0.0555, "step": 38871 }, { "epoch": 0.6884051233690623, "grad_norm": 0.6067931652069092, "learning_rate": 7.011298977636068e-06, "loss": 0.04, "step": 38872 }, { "epoch": 0.6884228329060907, "grad_norm": 0.4474223256111145, "learning_rate": 7.010570795710307e-06, "loss": 0.0598, "step": 38873 }, { "epoch": 0.6884405424431191, "grad_norm": 0.7752330899238586, "learning_rate": 7.009842640068952e-06, "loss": 0.0504, "step": 38874 }, { "epoch": 0.6884582519801477, "grad_norm": 0.628231942653656, "learning_rate": 7.009114510714409e-06, "loss": 0.0726, "step": 38875 }, { "epoch": 0.688475961517176, "grad_norm": 0.7407528758049011, "learning_rate": 7.0083864076490754e-06, "loss": 0.0549, "step": 38876 }, { "epoch": 0.6884936710542044, "grad_norm": 0.7568052411079407, "learning_rate": 7.007658330875337e-06, "loss": 0.0607, "step": 38877 }, { "epoch": 0.6885113805912328, "grad_norm": 0.5910571217536926, "learning_rate": 7.0069302803955935e-06, "loss": 0.0629, "step": 38878 }, { "epoch": 0.6885290901282614, "grad_norm": 0.7849050760269165, "learning_rate": 7.0062022562122415e-06, "loss": 0.0541, "step": 38879 }, { "epoch": 0.6885467996652898, "grad_norm": 0.07519340515136719, "learning_rate": 7.005474258327676e-06, "loss": 0.0307, "step": 38880 }, { "epoch": 0.6885645092023182, "grad_norm": 0.7294987440109253, "learning_rate": 7.004746286744292e-06, "loss": 0.0737, "step": 38881 }, { "epoch": 0.6885822187393466, "grad_norm": 0.577410101890564, "learning_rate": 7.004018341464482e-06, "loss": 0.0683, "step": 38882 }, { "epoch": 0.6885999282763751, "grad_norm": 0.5695104002952576, "learning_rate": 7.003290422490651e-06, "loss": 0.0574, "step": 38883 }, { "epoch": 0.6886176378134035, "grad_norm": 0.37894725799560547, "learning_rate": 7.002562529825177e-06, "loss": 0.0553, "step": 38884 }, { "epoch": 0.6886353473504319, "grad_norm": 0.4960246682167053, "learning_rate": 7.001834663470466e-06, "loss": 0.0744, "step": 38885 }, { "epoch": 0.6886530568874604, "grad_norm": 0.7834652662277222, "learning_rate": 7.001106823428909e-06, "loss": 0.0531, "step": 38886 }, { "epoch": 0.6886707664244888, "grad_norm": 0.5866005420684814, "learning_rate": 7.000379009702909e-06, "loss": 0.0707, "step": 38887 }, { "epoch": 0.6886884759615172, "grad_norm": 0.5643951296806335, "learning_rate": 6.999651222294845e-06, "loss": 0.0483, "step": 38888 }, { "epoch": 0.6887061854985456, "grad_norm": 0.6921018362045288, "learning_rate": 6.99892346120712e-06, "loss": 0.0805, "step": 38889 }, { "epoch": 0.6887238950355741, "grad_norm": 0.4172089993953705, "learning_rate": 6.998195726442128e-06, "loss": 0.047, "step": 38890 }, { "epoch": 0.6887416045726025, "grad_norm": 0.796101450920105, "learning_rate": 6.997468018002262e-06, "loss": 0.0709, "step": 38891 }, { "epoch": 0.6887593141096309, "grad_norm": 0.7737014889717102, "learning_rate": 6.996740335889917e-06, "loss": 0.0695, "step": 38892 }, { "epoch": 0.6887770236466593, "grad_norm": 0.7776275873184204, "learning_rate": 6.996012680107487e-06, "loss": 0.0724, "step": 38893 }, { "epoch": 0.6887947331836878, "grad_norm": 0.6830191612243652, "learning_rate": 6.9952850506573725e-06, "loss": 0.0698, "step": 38894 }, { "epoch": 0.6888124427207162, "grad_norm": 0.5736362934112549, "learning_rate": 6.994557447541953e-06, "loss": 0.0601, "step": 38895 }, { "epoch": 0.6888301522577446, "grad_norm": 0.2390061765909195, "learning_rate": 6.993829870763631e-06, "loss": 0.029, "step": 38896 }, { "epoch": 0.688847861794773, "grad_norm": 0.36355289816856384, "learning_rate": 6.993102320324804e-06, "loss": 0.0413, "step": 38897 }, { "epoch": 0.6888655713318015, "grad_norm": 0.5512022972106934, "learning_rate": 6.992374796227849e-06, "loss": 0.0556, "step": 38898 }, { "epoch": 0.6888832808688299, "grad_norm": 0.6584532260894775, "learning_rate": 6.9916472984751765e-06, "loss": 0.0587, "step": 38899 }, { "epoch": 0.6889009904058583, "grad_norm": 0.7767027020454407, "learning_rate": 6.990919827069176e-06, "loss": 0.0835, "step": 38900 }, { "epoch": 0.6889186999428868, "grad_norm": 0.9114362597465515, "learning_rate": 6.990192382012242e-06, "loss": 0.0697, "step": 38901 }, { "epoch": 0.6889364094799152, "grad_norm": 0.7774176001548767, "learning_rate": 6.989464963306761e-06, "loss": 0.066, "step": 38902 }, { "epoch": 0.6889541190169436, "grad_norm": 0.6380574703216553, "learning_rate": 6.9887375709551315e-06, "loss": 0.0802, "step": 38903 }, { "epoch": 0.688971828553972, "grad_norm": 0.22514520585536957, "learning_rate": 6.9880102049597425e-06, "loss": 0.0385, "step": 38904 }, { "epoch": 0.6889895380910005, "grad_norm": 0.46783074736595154, "learning_rate": 6.9872828653229965e-06, "loss": 0.0466, "step": 38905 }, { "epoch": 0.6890072476280289, "grad_norm": 0.5007641315460205, "learning_rate": 6.986555552047274e-06, "loss": 0.0486, "step": 38906 }, { "epoch": 0.6890249571650573, "grad_norm": 0.40076199173927307, "learning_rate": 6.9858282651349735e-06, "loss": 0.0666, "step": 38907 }, { "epoch": 0.6890426667020857, "grad_norm": 0.30168837308883667, "learning_rate": 6.985101004588486e-06, "loss": 0.0608, "step": 38908 }, { "epoch": 0.6890603762391142, "grad_norm": 0.6678141355514526, "learning_rate": 6.984373770410206e-06, "loss": 0.0585, "step": 38909 }, { "epoch": 0.6890780857761426, "grad_norm": 0.6498220562934875, "learning_rate": 6.983646562602526e-06, "loss": 0.0635, "step": 38910 }, { "epoch": 0.689095795313171, "grad_norm": 0.48549631237983704, "learning_rate": 6.982919381167839e-06, "loss": 0.0529, "step": 38911 }, { "epoch": 0.6891135048501994, "grad_norm": 0.4746108651161194, "learning_rate": 6.98219222610854e-06, "loss": 0.0277, "step": 38912 }, { "epoch": 0.6891312143872279, "grad_norm": 0.44383570551872253, "learning_rate": 6.981465097427012e-06, "loss": 0.0386, "step": 38913 }, { "epoch": 0.6891489239242563, "grad_norm": 0.9792062640190125, "learning_rate": 6.980737995125655e-06, "loss": 0.0642, "step": 38914 }, { "epoch": 0.6891666334612847, "grad_norm": 0.6813965439796448, "learning_rate": 6.980010919206863e-06, "loss": 0.0638, "step": 38915 }, { "epoch": 0.6891843429983132, "grad_norm": 0.6986068487167358, "learning_rate": 6.9792838696730186e-06, "loss": 0.0722, "step": 38916 }, { "epoch": 0.6892020525353416, "grad_norm": 0.7371737360954285, "learning_rate": 6.978556846526519e-06, "loss": 0.0598, "step": 38917 }, { "epoch": 0.68921976207237, "grad_norm": 0.44427427649497986, "learning_rate": 6.977829849769756e-06, "loss": 0.0509, "step": 38918 }, { "epoch": 0.6892374716093984, "grad_norm": 0.5021054148674011, "learning_rate": 6.977102879405121e-06, "loss": 0.0475, "step": 38919 }, { "epoch": 0.6892551811464269, "grad_norm": 0.49166688323020935, "learning_rate": 6.976375935435007e-06, "loss": 0.0549, "step": 38920 }, { "epoch": 0.6892728906834553, "grad_norm": 0.6478099822998047, "learning_rate": 6.9756490178618035e-06, "loss": 0.0664, "step": 38921 }, { "epoch": 0.6892906002204837, "grad_norm": 0.39205923676490784, "learning_rate": 6.974922126687909e-06, "loss": 0.0522, "step": 38922 }, { "epoch": 0.6893083097575121, "grad_norm": 0.8112735152244568, "learning_rate": 6.974195261915704e-06, "loss": 0.0482, "step": 38923 }, { "epoch": 0.6893260192945406, "grad_norm": 0.9478189945220947, "learning_rate": 6.973468423547584e-06, "loss": 0.0818, "step": 38924 }, { "epoch": 0.689343728831569, "grad_norm": 0.6856480240821838, "learning_rate": 6.972741611585941e-06, "loss": 0.0805, "step": 38925 }, { "epoch": 0.6893614383685974, "grad_norm": 0.8672908544540405, "learning_rate": 6.972014826033173e-06, "loss": 0.1, "step": 38926 }, { "epoch": 0.6893791479056258, "grad_norm": 0.4779263138771057, "learning_rate": 6.971288066891652e-06, "loss": 0.0381, "step": 38927 }, { "epoch": 0.6893968574426543, "grad_norm": 0.5161029100418091, "learning_rate": 6.970561334163788e-06, "loss": 0.0565, "step": 38928 }, { "epoch": 0.6894145669796827, "grad_norm": 0.5228946805000305, "learning_rate": 6.9698346278519696e-06, "loss": 0.0455, "step": 38929 }, { "epoch": 0.6894322765167111, "grad_norm": 0.5934935808181763, "learning_rate": 6.969107947958577e-06, "loss": 0.0433, "step": 38930 }, { "epoch": 0.6894499860537396, "grad_norm": 0.6161235570907593, "learning_rate": 6.968381294486009e-06, "loss": 0.0771, "step": 38931 }, { "epoch": 0.689467695590768, "grad_norm": 0.7512436509132385, "learning_rate": 6.967654667436652e-06, "loss": 0.0627, "step": 38932 }, { "epoch": 0.6894854051277964, "grad_norm": 0.8079955577850342, "learning_rate": 6.966928066812905e-06, "loss": 0.0662, "step": 38933 }, { "epoch": 0.6895031146648248, "grad_norm": 0.8151757717132568, "learning_rate": 6.966201492617147e-06, "loss": 0.0733, "step": 38934 }, { "epoch": 0.6895208242018533, "grad_norm": 0.4719918668270111, "learning_rate": 6.965474944851775e-06, "loss": 0.0426, "step": 38935 }, { "epoch": 0.6895385337388817, "grad_norm": 0.5832774043083191, "learning_rate": 6.964748423519175e-06, "loss": 0.0582, "step": 38936 }, { "epoch": 0.6895562432759101, "grad_norm": 0.22560307383537292, "learning_rate": 6.964021928621741e-06, "loss": 0.0424, "step": 38937 }, { "epoch": 0.6895739528129385, "grad_norm": 0.4784731864929199, "learning_rate": 6.963295460161863e-06, "loss": 0.0602, "step": 38938 }, { "epoch": 0.689591662349967, "grad_norm": 0.5970650315284729, "learning_rate": 6.962569018141928e-06, "loss": 0.0674, "step": 38939 }, { "epoch": 0.6896093718869954, "grad_norm": 0.4174251854419708, "learning_rate": 6.961842602564334e-06, "loss": 0.0332, "step": 38940 }, { "epoch": 0.6896270814240238, "grad_norm": 0.4535749554634094, "learning_rate": 6.961116213431461e-06, "loss": 0.0562, "step": 38941 }, { "epoch": 0.6896447909610522, "grad_norm": 0.6368943452835083, "learning_rate": 6.960389850745702e-06, "loss": 0.0566, "step": 38942 }, { "epoch": 0.6896625004980808, "grad_norm": 0.5257525444030762, "learning_rate": 6.959663514509447e-06, "loss": 0.0521, "step": 38943 }, { "epoch": 0.6896802100351092, "grad_norm": 0.6466044187545776, "learning_rate": 6.958937204725092e-06, "loss": 0.0624, "step": 38944 }, { "epoch": 0.6896979195721376, "grad_norm": 0.7987437844276428, "learning_rate": 6.958210921395013e-06, "loss": 0.0526, "step": 38945 }, { "epoch": 0.6897156291091661, "grad_norm": 0.9951791167259216, "learning_rate": 6.957484664521608e-06, "loss": 0.0531, "step": 38946 }, { "epoch": 0.6897333386461945, "grad_norm": 0.6961665153503418, "learning_rate": 6.956758434107265e-06, "loss": 0.0802, "step": 38947 }, { "epoch": 0.6897510481832229, "grad_norm": 0.8791301846504211, "learning_rate": 6.956032230154372e-06, "loss": 0.0372, "step": 38948 }, { "epoch": 0.6897687577202513, "grad_norm": 0.6090919971466064, "learning_rate": 6.9553060526653195e-06, "loss": 0.0507, "step": 38949 }, { "epoch": 0.6897864672572798, "grad_norm": 0.7823660969734192, "learning_rate": 6.9545799016424975e-06, "loss": 0.0254, "step": 38950 }, { "epoch": 0.6898041767943082, "grad_norm": 0.6943137645721436, "learning_rate": 6.9538537770883e-06, "loss": 0.0487, "step": 38951 }, { "epoch": 0.6898218863313366, "grad_norm": 0.5592232346534729, "learning_rate": 6.953127679005102e-06, "loss": 0.0667, "step": 38952 }, { "epoch": 0.689839595868365, "grad_norm": 0.5009261965751648, "learning_rate": 6.952401607395302e-06, "loss": 0.0729, "step": 38953 }, { "epoch": 0.6898573054053935, "grad_norm": 0.280029833316803, "learning_rate": 6.951675562261291e-06, "loss": 0.0377, "step": 38954 }, { "epoch": 0.6898750149424219, "grad_norm": 0.3582899570465088, "learning_rate": 6.9509495436054495e-06, "loss": 0.0566, "step": 38955 }, { "epoch": 0.6898927244794503, "grad_norm": 0.60528564453125, "learning_rate": 6.9502235514301635e-06, "loss": 0.0608, "step": 38956 }, { "epoch": 0.6899104340164788, "grad_norm": 0.4995950758457184, "learning_rate": 6.949497585737832e-06, "loss": 0.0513, "step": 38957 }, { "epoch": 0.6899281435535072, "grad_norm": 0.4806676506996155, "learning_rate": 6.948771646530847e-06, "loss": 0.0649, "step": 38958 }, { "epoch": 0.6899458530905356, "grad_norm": 0.5465778112411499, "learning_rate": 6.948045733811582e-06, "loss": 0.0672, "step": 38959 }, { "epoch": 0.689963562627564, "grad_norm": 0.687839686870575, "learning_rate": 6.947319847582433e-06, "loss": 0.0695, "step": 38960 }, { "epoch": 0.6899812721645925, "grad_norm": 0.4458617866039276, "learning_rate": 6.94659398784579e-06, "loss": 0.0856, "step": 38961 }, { "epoch": 0.6899989817016209, "grad_norm": 0.5118972659111023, "learning_rate": 6.9458681546040355e-06, "loss": 0.0463, "step": 38962 }, { "epoch": 0.6900166912386493, "grad_norm": 0.5303992033004761, "learning_rate": 6.945142347859559e-06, "loss": 0.0603, "step": 38963 }, { "epoch": 0.6900344007756777, "grad_norm": 0.5364508032798767, "learning_rate": 6.944416567614751e-06, "loss": 0.077, "step": 38964 }, { "epoch": 0.6900521103127062, "grad_norm": 0.38886862993240356, "learning_rate": 6.943690813871996e-06, "loss": 0.0473, "step": 38965 }, { "epoch": 0.6900698198497346, "grad_norm": 0.41413018107414246, "learning_rate": 6.942965086633685e-06, "loss": 0.0751, "step": 38966 }, { "epoch": 0.690087529386763, "grad_norm": 0.707151472568512, "learning_rate": 6.942239385902203e-06, "loss": 0.0548, "step": 38967 }, { "epoch": 0.6901052389237914, "grad_norm": 0.42522892355918884, "learning_rate": 6.9415137116799386e-06, "loss": 0.0491, "step": 38968 }, { "epoch": 0.6901229484608199, "grad_norm": 0.7164867520332336, "learning_rate": 6.940788063969284e-06, "loss": 0.0621, "step": 38969 }, { "epoch": 0.6901406579978483, "grad_norm": 0.4709629416465759, "learning_rate": 6.940062442772616e-06, "loss": 0.0601, "step": 38970 }, { "epoch": 0.6901583675348767, "grad_norm": 0.6267372369766235, "learning_rate": 6.939336848092328e-06, "loss": 0.0555, "step": 38971 }, { "epoch": 0.6901760770719052, "grad_norm": 0.5268791913986206, "learning_rate": 6.938611279930812e-06, "loss": 0.0498, "step": 38972 }, { "epoch": 0.6901937866089336, "grad_norm": 0.3552781939506531, "learning_rate": 6.937885738290443e-06, "loss": 0.0352, "step": 38973 }, { "epoch": 0.690211496145962, "grad_norm": 0.47727298736572266, "learning_rate": 6.937160223173616e-06, "loss": 0.0524, "step": 38974 }, { "epoch": 0.6902292056829904, "grad_norm": 0.877859890460968, "learning_rate": 6.936434734582715e-06, "loss": 0.0449, "step": 38975 }, { "epoch": 0.6902469152200189, "grad_norm": 0.45056992769241333, "learning_rate": 6.935709272520129e-06, "loss": 0.0616, "step": 38976 }, { "epoch": 0.6902646247570473, "grad_norm": 2.664113759994507, "learning_rate": 6.934983836988243e-06, "loss": 0.0764, "step": 38977 }, { "epoch": 0.6902823342940757, "grad_norm": 1.0858906507492065, "learning_rate": 6.934258427989445e-06, "loss": 0.0916, "step": 38978 }, { "epoch": 0.6903000438311041, "grad_norm": 0.3936717212200165, "learning_rate": 6.933533045526128e-06, "loss": 0.0483, "step": 38979 }, { "epoch": 0.6903177533681326, "grad_norm": 0.5935999751091003, "learning_rate": 6.932807689600664e-06, "loss": 0.0437, "step": 38980 }, { "epoch": 0.690335462905161, "grad_norm": 0.9482154846191406, "learning_rate": 6.932082360215447e-06, "loss": 0.0717, "step": 38981 }, { "epoch": 0.6903531724421894, "grad_norm": 0.2884945273399353, "learning_rate": 6.931357057372863e-06, "loss": 0.0436, "step": 38982 }, { "epoch": 0.6903708819792178, "grad_norm": 0.3629967272281647, "learning_rate": 6.930631781075303e-06, "loss": 0.07, "step": 38983 }, { "epoch": 0.6903885915162463, "grad_norm": 0.8274646997451782, "learning_rate": 6.929906531325145e-06, "loss": 0.0814, "step": 38984 }, { "epoch": 0.6904063010532747, "grad_norm": 0.5893746614456177, "learning_rate": 6.929181308124771e-06, "loss": 0.0423, "step": 38985 }, { "epoch": 0.6904240105903031, "grad_norm": 0.40179625153541565, "learning_rate": 6.928456111476586e-06, "loss": 0.0419, "step": 38986 }, { "epoch": 0.6904417201273316, "grad_norm": 0.5470181107521057, "learning_rate": 6.927730941382957e-06, "loss": 0.0446, "step": 38987 }, { "epoch": 0.69045942966436, "grad_norm": 0.5878226161003113, "learning_rate": 6.927005797846278e-06, "loss": 0.0645, "step": 38988 }, { "epoch": 0.6904771392013884, "grad_norm": 0.5558211803436279, "learning_rate": 6.926280680868934e-06, "loss": 0.0628, "step": 38989 }, { "epoch": 0.6904948487384168, "grad_norm": 0.7742999792098999, "learning_rate": 6.925555590453314e-06, "loss": 0.0683, "step": 38990 }, { "epoch": 0.6905125582754453, "grad_norm": 0.5777836441993713, "learning_rate": 6.924830526601795e-06, "loss": 0.043, "step": 38991 }, { "epoch": 0.6905302678124737, "grad_norm": 1.3016606569290161, "learning_rate": 6.924105489316767e-06, "loss": 0.0955, "step": 38992 }, { "epoch": 0.6905479773495021, "grad_norm": 1.0295116901397705, "learning_rate": 6.923380478600614e-06, "loss": 0.0858, "step": 38993 }, { "epoch": 0.6905656868865305, "grad_norm": 0.29805850982666016, "learning_rate": 6.9226554944557236e-06, "loss": 0.0413, "step": 38994 }, { "epoch": 0.690583396423559, "grad_norm": 1.1822842359542847, "learning_rate": 6.921930536884477e-06, "loss": 0.0817, "step": 38995 }, { "epoch": 0.6906011059605874, "grad_norm": 0.6041930317878723, "learning_rate": 6.921205605889265e-06, "loss": 0.0705, "step": 38996 }, { "epoch": 0.6906188154976158, "grad_norm": 0.7684047818183899, "learning_rate": 6.920480701472474e-06, "loss": 0.0679, "step": 38997 }, { "epoch": 0.6906365250346442, "grad_norm": 0.5984450578689575, "learning_rate": 6.919755823636479e-06, "loss": 0.0528, "step": 38998 }, { "epoch": 0.6906542345716727, "grad_norm": 0.7304226756095886, "learning_rate": 6.919030972383669e-06, "loss": 0.0776, "step": 38999 }, { "epoch": 0.6906719441087011, "grad_norm": 0.7005577683448792, "learning_rate": 6.9183061477164356e-06, "loss": 0.071, "step": 39000 }, { "epoch": 0.6906896536457295, "grad_norm": 0.9912938475608826, "learning_rate": 6.917581349637152e-06, "loss": 0.0593, "step": 39001 }, { "epoch": 0.690707363182758, "grad_norm": 0.6701061129570007, "learning_rate": 6.916856578148209e-06, "loss": 0.0415, "step": 39002 }, { "epoch": 0.6907250727197864, "grad_norm": 0.5198503136634827, "learning_rate": 6.916131833251989e-06, "loss": 0.0521, "step": 39003 }, { "epoch": 0.6907427822568148, "grad_norm": 0.6838928461074829, "learning_rate": 6.915407114950879e-06, "loss": 0.0672, "step": 39004 }, { "epoch": 0.6907604917938432, "grad_norm": 0.7490904927253723, "learning_rate": 6.914682423247263e-06, "loss": 0.0551, "step": 39005 }, { "epoch": 0.6907782013308718, "grad_norm": 0.6116792559623718, "learning_rate": 6.913957758143522e-06, "loss": 0.0517, "step": 39006 }, { "epoch": 0.6907959108679002, "grad_norm": 0.48613834381103516, "learning_rate": 6.913233119642041e-06, "loss": 0.0443, "step": 39007 }, { "epoch": 0.6908136204049286, "grad_norm": 0.5245293974876404, "learning_rate": 6.912508507745214e-06, "loss": 0.055, "step": 39008 }, { "epoch": 0.690831329941957, "grad_norm": 0.6650692224502563, "learning_rate": 6.911783922455408e-06, "loss": 0.0653, "step": 39009 }, { "epoch": 0.6908490394789855, "grad_norm": 0.5742619633674622, "learning_rate": 6.9110593637750155e-06, "loss": 0.053, "step": 39010 }, { "epoch": 0.6908667490160139, "grad_norm": 0.6583951115608215, "learning_rate": 6.910334831706425e-06, "loss": 0.0512, "step": 39011 }, { "epoch": 0.6908844585530423, "grad_norm": 0.4839164614677429, "learning_rate": 6.9096103262520095e-06, "loss": 0.0547, "step": 39012 }, { "epoch": 0.6909021680900707, "grad_norm": 0.5958938598632812, "learning_rate": 6.908885847414158e-06, "loss": 0.0483, "step": 39013 }, { "epoch": 0.6909198776270992, "grad_norm": 0.8377121090888977, "learning_rate": 6.908161395195247e-06, "loss": 0.0715, "step": 39014 }, { "epoch": 0.6909375871641276, "grad_norm": 0.22888296842575073, "learning_rate": 6.907436969597678e-06, "loss": 0.0375, "step": 39015 }, { "epoch": 0.690955296701156, "grad_norm": 0.6264481544494629, "learning_rate": 6.906712570623817e-06, "loss": 0.0605, "step": 39016 }, { "epoch": 0.6909730062381845, "grad_norm": 0.49603649973869324, "learning_rate": 6.905988198276054e-06, "loss": 0.0558, "step": 39017 }, { "epoch": 0.6909907157752129, "grad_norm": 0.6967455148696899, "learning_rate": 6.905263852556777e-06, "loss": 0.0484, "step": 39018 }, { "epoch": 0.6910084253122413, "grad_norm": 0.8232173323631287, "learning_rate": 6.9045395334683566e-06, "loss": 0.103, "step": 39019 }, { "epoch": 0.6910261348492697, "grad_norm": 0.65711909532547, "learning_rate": 6.903815241013183e-06, "loss": 0.0774, "step": 39020 }, { "epoch": 0.6910438443862982, "grad_norm": 0.9528976678848267, "learning_rate": 6.903090975193638e-06, "loss": 0.0721, "step": 39021 }, { "epoch": 0.6910615539233266, "grad_norm": 0.5161768198013306, "learning_rate": 6.902366736012105e-06, "loss": 0.0434, "step": 39022 }, { "epoch": 0.691079263460355, "grad_norm": 0.6096493601799011, "learning_rate": 6.901642523470966e-06, "loss": 0.0666, "step": 39023 }, { "epoch": 0.6910969729973834, "grad_norm": 0.9309046864509583, "learning_rate": 6.900918337572605e-06, "loss": 0.0861, "step": 39024 }, { "epoch": 0.6911146825344119, "grad_norm": 0.24964074790477753, "learning_rate": 6.900194178319408e-06, "loss": 0.0366, "step": 39025 }, { "epoch": 0.6911323920714403, "grad_norm": 0.394845187664032, "learning_rate": 6.899470045713747e-06, "loss": 0.0452, "step": 39026 }, { "epoch": 0.6911501016084687, "grad_norm": 0.9026214480400085, "learning_rate": 6.89874593975801e-06, "loss": 0.0468, "step": 39027 }, { "epoch": 0.6911678111454971, "grad_norm": 0.6991853713989258, "learning_rate": 6.898021860454581e-06, "loss": 0.0508, "step": 39028 }, { "epoch": 0.6911855206825256, "grad_norm": 0.6420620679855347, "learning_rate": 6.897297807805845e-06, "loss": 0.0544, "step": 39029 }, { "epoch": 0.691203230219554, "grad_norm": 0.41961345076560974, "learning_rate": 6.896573781814175e-06, "loss": 0.0846, "step": 39030 }, { "epoch": 0.6912209397565824, "grad_norm": 0.5869541168212891, "learning_rate": 6.895849782481959e-06, "loss": 0.0806, "step": 39031 }, { "epoch": 0.6912386492936109, "grad_norm": 0.5386627316474915, "learning_rate": 6.8951258098115755e-06, "loss": 0.0671, "step": 39032 }, { "epoch": 0.6912563588306393, "grad_norm": 0.7854058742523193, "learning_rate": 6.894401863805408e-06, "loss": 0.0873, "step": 39033 }, { "epoch": 0.6912740683676677, "grad_norm": 0.8231266736984253, "learning_rate": 6.8936779444658395e-06, "loss": 0.0997, "step": 39034 }, { "epoch": 0.6912917779046961, "grad_norm": 0.7475681900978088, "learning_rate": 6.89295405179525e-06, "loss": 0.0614, "step": 39035 }, { "epoch": 0.6913094874417246, "grad_norm": 0.34817370772361755, "learning_rate": 6.8922301857960276e-06, "loss": 0.0467, "step": 39036 }, { "epoch": 0.691327196978753, "grad_norm": 0.34723833203315735, "learning_rate": 6.891506346470543e-06, "loss": 0.0292, "step": 39037 }, { "epoch": 0.6913449065157814, "grad_norm": 0.7370161414146423, "learning_rate": 6.890782533821181e-06, "loss": 0.1043, "step": 39038 }, { "epoch": 0.6913626160528098, "grad_norm": 0.4209592938423157, "learning_rate": 6.890058747850325e-06, "loss": 0.054, "step": 39039 }, { "epoch": 0.6913803255898383, "grad_norm": 0.6533681750297546, "learning_rate": 6.88933498856036e-06, "loss": 0.0374, "step": 39040 }, { "epoch": 0.6913980351268667, "grad_norm": 0.6332681179046631, "learning_rate": 6.888611255953659e-06, "loss": 0.0773, "step": 39041 }, { "epoch": 0.6914157446638951, "grad_norm": 0.6608490943908691, "learning_rate": 6.887887550032605e-06, "loss": 0.0734, "step": 39042 }, { "epoch": 0.6914334542009235, "grad_norm": 0.4489694833755493, "learning_rate": 6.887163870799581e-06, "loss": 0.0433, "step": 39043 }, { "epoch": 0.691451163737952, "grad_norm": 0.6645066738128662, "learning_rate": 6.886440218256967e-06, "loss": 0.082, "step": 39044 }, { "epoch": 0.6914688732749804, "grad_norm": 0.5997633337974548, "learning_rate": 6.885716592407144e-06, "loss": 0.066, "step": 39045 }, { "epoch": 0.6914865828120088, "grad_norm": 0.9375158548355103, "learning_rate": 6.884992993252494e-06, "loss": 0.0389, "step": 39046 }, { "epoch": 0.6915042923490373, "grad_norm": 0.41698697209358215, "learning_rate": 6.8842694207954e-06, "loss": 0.0636, "step": 39047 }, { "epoch": 0.6915220018860657, "grad_norm": 0.8766455054283142, "learning_rate": 6.883545875038235e-06, "loss": 0.0543, "step": 39048 }, { "epoch": 0.6915397114230941, "grad_norm": 0.6251373887062073, "learning_rate": 6.882822355983381e-06, "loss": 0.0624, "step": 39049 }, { "epoch": 0.6915574209601225, "grad_norm": 0.46883508563041687, "learning_rate": 6.882098863633229e-06, "loss": 0.0541, "step": 39050 }, { "epoch": 0.691575130497151, "grad_norm": 0.5411730408668518, "learning_rate": 6.881375397990137e-06, "loss": 0.0661, "step": 39051 }, { "epoch": 0.6915928400341794, "grad_norm": 0.7631767988204956, "learning_rate": 6.880651959056505e-06, "loss": 0.0765, "step": 39052 }, { "epoch": 0.6916105495712078, "grad_norm": 0.5373790264129639, "learning_rate": 6.879928546834705e-06, "loss": 0.0476, "step": 39053 }, { "epoch": 0.6916282591082362, "grad_norm": 0.7649407982826233, "learning_rate": 6.879205161327127e-06, "loss": 0.0621, "step": 39054 }, { "epoch": 0.6916459686452647, "grad_norm": 0.6400941014289856, "learning_rate": 6.878481802536136e-06, "loss": 0.0536, "step": 39055 }, { "epoch": 0.6916636781822931, "grad_norm": 0.5584182739257812, "learning_rate": 6.877758470464119e-06, "loss": 0.0735, "step": 39056 }, { "epoch": 0.6916813877193215, "grad_norm": 0.6005603671073914, "learning_rate": 6.877035165113462e-06, "loss": 0.0519, "step": 39057 }, { "epoch": 0.6916990972563499, "grad_norm": 0.6782413721084595, "learning_rate": 6.87631188648653e-06, "loss": 0.0802, "step": 39058 }, { "epoch": 0.6917168067933784, "grad_norm": 0.6192491054534912, "learning_rate": 6.87558863458571e-06, "loss": 0.0707, "step": 39059 }, { "epoch": 0.6917345163304068, "grad_norm": 0.6896733045578003, "learning_rate": 6.874865409413384e-06, "loss": 0.047, "step": 39060 }, { "epoch": 0.6917522258674352, "grad_norm": 0.7619812488555908, "learning_rate": 6.874142210971926e-06, "loss": 0.0601, "step": 39061 }, { "epoch": 0.6917699354044637, "grad_norm": 0.34732261300086975, "learning_rate": 6.873419039263719e-06, "loss": 0.037, "step": 39062 }, { "epoch": 0.6917876449414921, "grad_norm": 0.7516873478889465, "learning_rate": 6.872695894291143e-06, "loss": 0.0572, "step": 39063 }, { "epoch": 0.6918053544785205, "grad_norm": 0.6328908801078796, "learning_rate": 6.87197277605658e-06, "loss": 0.0462, "step": 39064 }, { "epoch": 0.6918230640155489, "grad_norm": 0.6570452451705933, "learning_rate": 6.8712496845624e-06, "loss": 0.028, "step": 39065 }, { "epoch": 0.6918407735525774, "grad_norm": 0.7614068388938904, "learning_rate": 6.870526619810985e-06, "loss": 0.053, "step": 39066 }, { "epoch": 0.6918584830896058, "grad_norm": 0.5602591633796692, "learning_rate": 6.869803581804714e-06, "loss": 0.0558, "step": 39067 }, { "epoch": 0.6918761926266342, "grad_norm": 0.5323814749717712, "learning_rate": 6.869080570545975e-06, "loss": 0.0573, "step": 39068 }, { "epoch": 0.6918939021636626, "grad_norm": 0.43585366010665894, "learning_rate": 6.8683575860371325e-06, "loss": 0.0736, "step": 39069 }, { "epoch": 0.6919116117006912, "grad_norm": 0.4648301303386688, "learning_rate": 6.86763462828057e-06, "loss": 0.0554, "step": 39070 }, { "epoch": 0.6919293212377196, "grad_norm": 0.9810554385185242, "learning_rate": 6.866911697278669e-06, "loss": 0.0677, "step": 39071 }, { "epoch": 0.691947030774748, "grad_norm": 0.3146413266658783, "learning_rate": 6.866188793033803e-06, "loss": 0.0607, "step": 39072 }, { "epoch": 0.6919647403117763, "grad_norm": 0.7237017154693604, "learning_rate": 6.8654659155483554e-06, "loss": 0.05, "step": 39073 }, { "epoch": 0.6919824498488049, "grad_norm": 0.3324621319770813, "learning_rate": 6.864743064824701e-06, "loss": 0.0625, "step": 39074 }, { "epoch": 0.6920001593858333, "grad_norm": 0.5343521237373352, "learning_rate": 6.864020240865226e-06, "loss": 0.0564, "step": 39075 }, { "epoch": 0.6920178689228617, "grad_norm": 0.43139970302581787, "learning_rate": 6.8632974436722944e-06, "loss": 0.0658, "step": 39076 }, { "epoch": 0.6920355784598902, "grad_norm": 0.7850841283798218, "learning_rate": 6.862574673248292e-06, "loss": 0.0876, "step": 39077 }, { "epoch": 0.6920532879969186, "grad_norm": 0.7696828842163086, "learning_rate": 6.861851929595596e-06, "loss": 0.0619, "step": 39078 }, { "epoch": 0.692070997533947, "grad_norm": 0.8175176978111267, "learning_rate": 6.861129212716589e-06, "loss": 0.0895, "step": 39079 }, { "epoch": 0.6920887070709754, "grad_norm": 0.5232760310173035, "learning_rate": 6.860406522613632e-06, "loss": 0.0332, "step": 39080 }, { "epoch": 0.6921064166080039, "grad_norm": 0.8298534750938416, "learning_rate": 6.8596838592891226e-06, "loss": 0.0523, "step": 39081 }, { "epoch": 0.6921241261450323, "grad_norm": 0.5570132732391357, "learning_rate": 6.858961222745433e-06, "loss": 0.0464, "step": 39082 }, { "epoch": 0.6921418356820607, "grad_norm": 0.48218777775764465, "learning_rate": 6.858238612984933e-06, "loss": 0.0416, "step": 39083 }, { "epoch": 0.6921595452190891, "grad_norm": 0.7414635419845581, "learning_rate": 6.857516030010003e-06, "loss": 0.0842, "step": 39084 }, { "epoch": 0.6921772547561176, "grad_norm": 0.629273533821106, "learning_rate": 6.856793473823024e-06, "loss": 0.0556, "step": 39085 }, { "epoch": 0.692194964293146, "grad_norm": 0.4347410202026367, "learning_rate": 6.856070944426374e-06, "loss": 0.0339, "step": 39086 }, { "epoch": 0.6922126738301744, "grad_norm": 0.730130672454834, "learning_rate": 6.855348441822423e-06, "loss": 0.0685, "step": 39087 }, { "epoch": 0.6922303833672028, "grad_norm": 0.6416639685630798, "learning_rate": 6.854625966013552e-06, "loss": 0.0488, "step": 39088 }, { "epoch": 0.6922480929042313, "grad_norm": 0.6221008896827698, "learning_rate": 6.853903517002137e-06, "loss": 0.075, "step": 39089 }, { "epoch": 0.6922658024412597, "grad_norm": 0.8348862528800964, "learning_rate": 6.853181094790556e-06, "loss": 0.0912, "step": 39090 }, { "epoch": 0.6922835119782881, "grad_norm": 0.6417157649993896, "learning_rate": 6.8524586993811846e-06, "loss": 0.0466, "step": 39091 }, { "epoch": 0.6923012215153166, "grad_norm": 0.41245734691619873, "learning_rate": 6.8517363307764e-06, "loss": 0.0568, "step": 39092 }, { "epoch": 0.692318931052345, "grad_norm": 0.44715169072151184, "learning_rate": 6.851013988978585e-06, "loss": 0.0532, "step": 39093 }, { "epoch": 0.6923366405893734, "grad_norm": 0.6616474390029907, "learning_rate": 6.850291673990104e-06, "loss": 0.0523, "step": 39094 }, { "epoch": 0.6923543501264018, "grad_norm": 0.5246280431747437, "learning_rate": 6.849569385813339e-06, "loss": 0.0557, "step": 39095 }, { "epoch": 0.6923720596634303, "grad_norm": 0.3170565068721771, "learning_rate": 6.8488471244506705e-06, "loss": 0.0679, "step": 39096 }, { "epoch": 0.6923897692004587, "grad_norm": 0.6842507719993591, "learning_rate": 6.8481248899044665e-06, "loss": 0.085, "step": 39097 }, { "epoch": 0.6924074787374871, "grad_norm": 0.418732225894928, "learning_rate": 6.847402682177107e-06, "loss": 0.0436, "step": 39098 }, { "epoch": 0.6924251882745155, "grad_norm": 0.7122183442115784, "learning_rate": 6.846680501270969e-06, "loss": 0.075, "step": 39099 }, { "epoch": 0.692442897811544, "grad_norm": 0.3180505931377411, "learning_rate": 6.845958347188426e-06, "loss": 0.0512, "step": 39100 }, { "epoch": 0.6924606073485724, "grad_norm": 0.5039584040641785, "learning_rate": 6.845236219931856e-06, "loss": 0.0775, "step": 39101 }, { "epoch": 0.6924783168856008, "grad_norm": 0.7146531939506531, "learning_rate": 6.844514119503632e-06, "loss": 0.0689, "step": 39102 }, { "epoch": 0.6924960264226292, "grad_norm": 0.5908401012420654, "learning_rate": 6.843792045906139e-06, "loss": 0.0629, "step": 39103 }, { "epoch": 0.6925137359596577, "grad_norm": 0.5040850043296814, "learning_rate": 6.843069999141741e-06, "loss": 0.076, "step": 39104 }, { "epoch": 0.6925314454966861, "grad_norm": 0.327536940574646, "learning_rate": 6.8423479792128155e-06, "loss": 0.0474, "step": 39105 }, { "epoch": 0.6925491550337145, "grad_norm": 0.8135316371917725, "learning_rate": 6.841625986121741e-06, "loss": 0.0923, "step": 39106 }, { "epoch": 0.692566864570743, "grad_norm": 0.3914130926132202, "learning_rate": 6.840904019870897e-06, "loss": 0.0595, "step": 39107 }, { "epoch": 0.6925845741077714, "grad_norm": 0.7335737943649292, "learning_rate": 6.840182080462649e-06, "loss": 0.0854, "step": 39108 }, { "epoch": 0.6926022836447998, "grad_norm": 0.5632441639900208, "learning_rate": 6.83946016789937e-06, "loss": 0.0498, "step": 39109 }, { "epoch": 0.6926199931818282, "grad_norm": 0.7326804399490356, "learning_rate": 6.838738282183448e-06, "loss": 0.064, "step": 39110 }, { "epoch": 0.6926377027188567, "grad_norm": 0.7058442831039429, "learning_rate": 6.838016423317256e-06, "loss": 0.0666, "step": 39111 }, { "epoch": 0.6926554122558851, "grad_norm": 0.519606351852417, "learning_rate": 6.83729459130316e-06, "loss": 0.0724, "step": 39112 }, { "epoch": 0.6926731217929135, "grad_norm": 0.39523845911026, "learning_rate": 6.8365727861435374e-06, "loss": 0.066, "step": 39113 }, { "epoch": 0.6926908313299419, "grad_norm": 0.7107527852058411, "learning_rate": 6.835851007840772e-06, "loss": 0.0502, "step": 39114 }, { "epoch": 0.6927085408669704, "grad_norm": 0.9017194509506226, "learning_rate": 6.835129256397226e-06, "loss": 0.0679, "step": 39115 }, { "epoch": 0.6927262504039988, "grad_norm": 0.7019957900047302, "learning_rate": 6.834407531815277e-06, "loss": 0.0766, "step": 39116 }, { "epoch": 0.6927439599410272, "grad_norm": 0.562092661857605, "learning_rate": 6.8336858340973025e-06, "loss": 0.0821, "step": 39117 }, { "epoch": 0.6927616694780556, "grad_norm": 0.4635317623615265, "learning_rate": 6.8329641632456745e-06, "loss": 0.0476, "step": 39118 }, { "epoch": 0.6927793790150841, "grad_norm": 0.7699415683746338, "learning_rate": 6.832242519262771e-06, "loss": 0.0536, "step": 39119 }, { "epoch": 0.6927970885521125, "grad_norm": 0.4579853117465973, "learning_rate": 6.83152090215096e-06, "loss": 0.0716, "step": 39120 }, { "epoch": 0.6928147980891409, "grad_norm": 0.674763023853302, "learning_rate": 6.830799311912627e-06, "loss": 0.0629, "step": 39121 }, { "epoch": 0.6928325076261694, "grad_norm": 0.8214734196662903, "learning_rate": 6.830077748550132e-06, "loss": 0.0806, "step": 39122 }, { "epoch": 0.6928502171631978, "grad_norm": 0.4957635700702667, "learning_rate": 6.8293562120658545e-06, "loss": 0.0474, "step": 39123 }, { "epoch": 0.6928679267002262, "grad_norm": 0.43999558687210083, "learning_rate": 6.828634702462168e-06, "loss": 0.0621, "step": 39124 }, { "epoch": 0.6928856362372546, "grad_norm": 0.4010821580886841, "learning_rate": 6.827913219741454e-06, "loss": 0.0411, "step": 39125 }, { "epoch": 0.6929033457742831, "grad_norm": 0.5294602513313293, "learning_rate": 6.827191763906072e-06, "loss": 0.0433, "step": 39126 }, { "epoch": 0.6929210553113115, "grad_norm": 0.5269116759300232, "learning_rate": 6.8264703349584025e-06, "loss": 0.059, "step": 39127 }, { "epoch": 0.6929387648483399, "grad_norm": 0.4725838899612427, "learning_rate": 6.82574893290082e-06, "loss": 0.0583, "step": 39128 }, { "epoch": 0.6929564743853683, "grad_norm": 0.7869522571563721, "learning_rate": 6.825027557735696e-06, "loss": 0.0721, "step": 39129 }, { "epoch": 0.6929741839223968, "grad_norm": 0.2574174404144287, "learning_rate": 6.824306209465405e-06, "loss": 0.0308, "step": 39130 }, { "epoch": 0.6929918934594252, "grad_norm": 0.3073822557926178, "learning_rate": 6.82358488809232e-06, "loss": 0.0461, "step": 39131 }, { "epoch": 0.6930096029964536, "grad_norm": 0.5452767014503479, "learning_rate": 6.822863593618818e-06, "loss": 0.0358, "step": 39132 }, { "epoch": 0.693027312533482, "grad_norm": 0.5789337754249573, "learning_rate": 6.822142326047263e-06, "loss": 0.0625, "step": 39133 }, { "epoch": 0.6930450220705106, "grad_norm": 0.6884408593177795, "learning_rate": 6.821421085380033e-06, "loss": 0.0567, "step": 39134 }, { "epoch": 0.693062731607539, "grad_norm": 0.6481665372848511, "learning_rate": 6.820699871619505e-06, "loss": 0.0715, "step": 39135 }, { "epoch": 0.6930804411445673, "grad_norm": 0.440173864364624, "learning_rate": 6.8199786847680405e-06, "loss": 0.0857, "step": 39136 }, { "epoch": 0.6930981506815959, "grad_norm": 0.6126846671104431, "learning_rate": 6.819257524828021e-06, "loss": 0.0517, "step": 39137 }, { "epoch": 0.6931158602186243, "grad_norm": 0.6961374878883362, "learning_rate": 6.81853639180181e-06, "loss": 0.0455, "step": 39138 }, { "epoch": 0.6931335697556527, "grad_norm": 0.5704948902130127, "learning_rate": 6.817815285691798e-06, "loss": 0.0608, "step": 39139 }, { "epoch": 0.693151279292681, "grad_norm": 0.6467689871788025, "learning_rate": 6.817094206500341e-06, "loss": 0.0728, "step": 39140 }, { "epoch": 0.6931689888297096, "grad_norm": 0.6095354557037354, "learning_rate": 6.8163731542298164e-06, "loss": 0.0513, "step": 39141 }, { "epoch": 0.693186698366738, "grad_norm": 0.6245837807655334, "learning_rate": 6.815652128882597e-06, "loss": 0.073, "step": 39142 }, { "epoch": 0.6932044079037664, "grad_norm": 0.6960667371749878, "learning_rate": 6.814931130461059e-06, "loss": 0.0834, "step": 39143 }, { "epoch": 0.6932221174407948, "grad_norm": 1.101767659187317, "learning_rate": 6.814210158967565e-06, "loss": 0.0921, "step": 39144 }, { "epoch": 0.6932398269778233, "grad_norm": 0.6205089688301086, "learning_rate": 6.8134892144044905e-06, "loss": 0.0882, "step": 39145 }, { "epoch": 0.6932575365148517, "grad_norm": 0.8028468489646912, "learning_rate": 6.812768296774208e-06, "loss": 0.0925, "step": 39146 }, { "epoch": 0.6932752460518801, "grad_norm": 0.36531829833984375, "learning_rate": 6.812047406079091e-06, "loss": 0.049, "step": 39147 }, { "epoch": 0.6932929555889085, "grad_norm": 0.4480980932712555, "learning_rate": 6.8113265423215086e-06, "loss": 0.0566, "step": 39148 }, { "epoch": 0.693310665125937, "grad_norm": 0.4026224911212921, "learning_rate": 6.810605705503834e-06, "loss": 0.0445, "step": 39149 }, { "epoch": 0.6933283746629654, "grad_norm": 0.4954886734485626, "learning_rate": 6.809884895628446e-06, "loss": 0.0448, "step": 39150 }, { "epoch": 0.6933460841999938, "grad_norm": 0.8002012968063354, "learning_rate": 6.809164112697701e-06, "loss": 0.068, "step": 39151 }, { "epoch": 0.6933637937370223, "grad_norm": 0.6608724594116211, "learning_rate": 6.8084433567139785e-06, "loss": 0.0416, "step": 39152 }, { "epoch": 0.6933815032740507, "grad_norm": 0.5574268102645874, "learning_rate": 6.8077226276796525e-06, "loss": 0.0441, "step": 39153 }, { "epoch": 0.6933992128110791, "grad_norm": 0.34171247482299805, "learning_rate": 6.8070019255970865e-06, "loss": 0.0424, "step": 39154 }, { "epoch": 0.6934169223481075, "grad_norm": 0.49737823009490967, "learning_rate": 6.806281250468655e-06, "loss": 0.0497, "step": 39155 }, { "epoch": 0.693434631885136, "grad_norm": 0.7737687826156616, "learning_rate": 6.805560602296729e-06, "loss": 0.0759, "step": 39156 }, { "epoch": 0.6934523414221644, "grad_norm": 0.6576444506645203, "learning_rate": 6.804839981083681e-06, "loss": 0.0574, "step": 39157 }, { "epoch": 0.6934700509591928, "grad_norm": 0.4960888922214508, "learning_rate": 6.804119386831879e-06, "loss": 0.0518, "step": 39158 }, { "epoch": 0.6934877604962212, "grad_norm": 0.7346683740615845, "learning_rate": 6.803398819543697e-06, "loss": 0.0624, "step": 39159 }, { "epoch": 0.6935054700332497, "grad_norm": 1.1094720363616943, "learning_rate": 6.802678279221507e-06, "loss": 0.0733, "step": 39160 }, { "epoch": 0.6935231795702781, "grad_norm": 0.7564358711242676, "learning_rate": 6.801957765867674e-06, "loss": 0.0745, "step": 39161 }, { "epoch": 0.6935408891073065, "grad_norm": 0.5043802857398987, "learning_rate": 6.801237279484568e-06, "loss": 0.0534, "step": 39162 }, { "epoch": 0.6935585986443349, "grad_norm": 1.0393608808517456, "learning_rate": 6.8005168200745634e-06, "loss": 0.0428, "step": 39163 }, { "epoch": 0.6935763081813634, "grad_norm": 0.7461008429527283, "learning_rate": 6.799796387640035e-06, "loss": 0.0559, "step": 39164 }, { "epoch": 0.6935940177183918, "grad_norm": 0.3897673487663269, "learning_rate": 6.799075982183341e-06, "loss": 0.0541, "step": 39165 }, { "epoch": 0.6936117272554202, "grad_norm": 0.6848106384277344, "learning_rate": 6.798355603706857e-06, "loss": 0.0532, "step": 39166 }, { "epoch": 0.6936294367924487, "grad_norm": 0.6609035134315491, "learning_rate": 6.797635252212954e-06, "loss": 0.0504, "step": 39167 }, { "epoch": 0.6936471463294771, "grad_norm": 0.630233645439148, "learning_rate": 6.796914927704002e-06, "loss": 0.0689, "step": 39168 }, { "epoch": 0.6936648558665055, "grad_norm": 0.4408736526966095, "learning_rate": 6.79619463018237e-06, "loss": 0.0674, "step": 39169 }, { "epoch": 0.6936825654035339, "grad_norm": 0.3455539345741272, "learning_rate": 6.7954743596504275e-06, "loss": 0.0458, "step": 39170 }, { "epoch": 0.6937002749405624, "grad_norm": 0.5532856583595276, "learning_rate": 6.79475411611055e-06, "loss": 0.0601, "step": 39171 }, { "epoch": 0.6937179844775908, "grad_norm": 0.620561420917511, "learning_rate": 6.794033899565096e-06, "loss": 0.046, "step": 39172 }, { "epoch": 0.6937356940146192, "grad_norm": 0.6568043231964111, "learning_rate": 6.793313710016441e-06, "loss": 0.0663, "step": 39173 }, { "epoch": 0.6937534035516476, "grad_norm": 0.3834076225757599, "learning_rate": 6.792593547466958e-06, "loss": 0.0509, "step": 39174 }, { "epoch": 0.6937711130886761, "grad_norm": 0.6768462657928467, "learning_rate": 6.791873411919002e-06, "loss": 0.1006, "step": 39175 }, { "epoch": 0.6937888226257045, "grad_norm": 0.49329355359077454, "learning_rate": 6.791153303374958e-06, "loss": 0.0482, "step": 39176 }, { "epoch": 0.6938065321627329, "grad_norm": 0.5856573581695557, "learning_rate": 6.790433221837189e-06, "loss": 0.058, "step": 39177 }, { "epoch": 0.6938242416997613, "grad_norm": 0.47943001985549927, "learning_rate": 6.7897131673080705e-06, "loss": 0.0347, "step": 39178 }, { "epoch": 0.6938419512367898, "grad_norm": 0.5093362927436829, "learning_rate": 6.7889931397899585e-06, "loss": 0.0402, "step": 39179 }, { "epoch": 0.6938596607738182, "grad_norm": 0.6902084946632385, "learning_rate": 6.78827313928523e-06, "loss": 0.0511, "step": 39180 }, { "epoch": 0.6938773703108466, "grad_norm": 0.4897264838218689, "learning_rate": 6.787553165796253e-06, "loss": 0.0705, "step": 39181 }, { "epoch": 0.6938950798478751, "grad_norm": 0.5289795398712158, "learning_rate": 6.786833219325398e-06, "loss": 0.045, "step": 39182 }, { "epoch": 0.6939127893849035, "grad_norm": 0.27490368485450745, "learning_rate": 6.786113299875028e-06, "loss": 0.0347, "step": 39183 }, { "epoch": 0.6939304989219319, "grad_norm": 0.39137834310531616, "learning_rate": 6.785393407447513e-06, "loss": 0.0347, "step": 39184 }, { "epoch": 0.6939482084589603, "grad_norm": 0.5250481367111206, "learning_rate": 6.7846735420452235e-06, "loss": 0.0449, "step": 39185 }, { "epoch": 0.6939659179959888, "grad_norm": 0.48465079069137573, "learning_rate": 6.7839537036705274e-06, "loss": 0.0795, "step": 39186 }, { "epoch": 0.6939836275330172, "grad_norm": 0.7196375727653503, "learning_rate": 6.783233892325791e-06, "loss": 0.0615, "step": 39187 }, { "epoch": 0.6940013370700456, "grad_norm": 0.4527497887611389, "learning_rate": 6.782514108013387e-06, "loss": 0.0669, "step": 39188 }, { "epoch": 0.694019046607074, "grad_norm": 0.5053461790084839, "learning_rate": 6.781794350735683e-06, "loss": 0.0542, "step": 39189 }, { "epoch": 0.6940367561441025, "grad_norm": 0.5440654158592224, "learning_rate": 6.781074620495041e-06, "loss": 0.0507, "step": 39190 }, { "epoch": 0.6940544656811309, "grad_norm": 0.5332791805267334, "learning_rate": 6.780354917293831e-06, "loss": 0.0759, "step": 39191 }, { "epoch": 0.6940721752181593, "grad_norm": 0.5766309499740601, "learning_rate": 6.779635241134427e-06, "loss": 0.0459, "step": 39192 }, { "epoch": 0.6940898847551877, "grad_norm": 0.3885059356689453, "learning_rate": 6.778915592019186e-06, "loss": 0.0502, "step": 39193 }, { "epoch": 0.6941075942922162, "grad_norm": 0.8132786750793457, "learning_rate": 6.7781959699504835e-06, "loss": 0.0638, "step": 39194 }, { "epoch": 0.6941253038292446, "grad_norm": 0.15205444395542145, "learning_rate": 6.7774763749306825e-06, "loss": 0.031, "step": 39195 }, { "epoch": 0.694143013366273, "grad_norm": 0.3816125988960266, "learning_rate": 6.7767568069621535e-06, "loss": 0.0634, "step": 39196 }, { "epoch": 0.6941607229033016, "grad_norm": 1.1205787658691406, "learning_rate": 6.776037266047263e-06, "loss": 0.1092, "step": 39197 }, { "epoch": 0.69417843244033, "grad_norm": 0.5446212291717529, "learning_rate": 6.775317752188378e-06, "loss": 0.0724, "step": 39198 }, { "epoch": 0.6941961419773583, "grad_norm": 0.47656357288360596, "learning_rate": 6.77459826538787e-06, "loss": 0.0743, "step": 39199 }, { "epoch": 0.6942138515143867, "grad_norm": 0.7193427681922913, "learning_rate": 6.773878805648098e-06, "loss": 0.0894, "step": 39200 }, { "epoch": 0.6942315610514153, "grad_norm": 0.5400801301002502, "learning_rate": 6.773159372971432e-06, "loss": 0.0458, "step": 39201 }, { "epoch": 0.6942492705884437, "grad_norm": 0.6039514541625977, "learning_rate": 6.772439967360239e-06, "loss": 0.0591, "step": 39202 }, { "epoch": 0.694266980125472, "grad_norm": 0.4085686504840851, "learning_rate": 6.771720588816892e-06, "loss": 0.0357, "step": 39203 }, { "epoch": 0.6942846896625005, "grad_norm": 0.5231413245201111, "learning_rate": 6.771001237343741e-06, "loss": 0.0452, "step": 39204 }, { "epoch": 0.694302399199529, "grad_norm": 0.4660295248031616, "learning_rate": 6.77028191294317e-06, "loss": 0.0591, "step": 39205 }, { "epoch": 0.6943201087365574, "grad_norm": 0.7229276895523071, "learning_rate": 6.769562615617543e-06, "loss": 0.0825, "step": 39206 }, { "epoch": 0.6943378182735858, "grad_norm": 0.5803062915802002, "learning_rate": 6.768843345369219e-06, "loss": 0.0815, "step": 39207 }, { "epoch": 0.6943555278106142, "grad_norm": 0.568150520324707, "learning_rate": 6.768124102200567e-06, "loss": 0.0489, "step": 39208 }, { "epoch": 0.6943732373476427, "grad_norm": 1.0472177267074585, "learning_rate": 6.767404886113954e-06, "loss": 0.0796, "step": 39209 }, { "epoch": 0.6943909468846711, "grad_norm": 0.5797619819641113, "learning_rate": 6.766685697111752e-06, "loss": 0.0623, "step": 39210 }, { "epoch": 0.6944086564216995, "grad_norm": 0.3848901391029358, "learning_rate": 6.765966535196316e-06, "loss": 0.0685, "step": 39211 }, { "epoch": 0.694426365958728, "grad_norm": 0.7272322773933411, "learning_rate": 6.765247400370016e-06, "loss": 0.0795, "step": 39212 }, { "epoch": 0.6944440754957564, "grad_norm": 0.6451944708824158, "learning_rate": 6.764528292635221e-06, "loss": 0.0632, "step": 39213 }, { "epoch": 0.6944617850327848, "grad_norm": 0.700009286403656, "learning_rate": 6.763809211994294e-06, "loss": 0.0591, "step": 39214 }, { "epoch": 0.6944794945698132, "grad_norm": 0.5172913670539856, "learning_rate": 6.763090158449601e-06, "loss": 0.0703, "step": 39215 }, { "epoch": 0.6944972041068417, "grad_norm": 0.5765068531036377, "learning_rate": 6.762371132003509e-06, "loss": 0.0603, "step": 39216 }, { "epoch": 0.6945149136438701, "grad_norm": 0.6334236264228821, "learning_rate": 6.761652132658389e-06, "loss": 0.0586, "step": 39217 }, { "epoch": 0.6945326231808985, "grad_norm": 0.7688156366348267, "learning_rate": 6.760933160416594e-06, "loss": 0.0445, "step": 39218 }, { "epoch": 0.6945503327179269, "grad_norm": 0.6273111701011658, "learning_rate": 6.760214215280494e-06, "loss": 0.076, "step": 39219 }, { "epoch": 0.6945680422549554, "grad_norm": 0.8066689372062683, "learning_rate": 6.759495297252459e-06, "loss": 0.0568, "step": 39220 }, { "epoch": 0.6945857517919838, "grad_norm": 0.5519389510154724, "learning_rate": 6.758776406334854e-06, "loss": 0.0426, "step": 39221 }, { "epoch": 0.6946034613290122, "grad_norm": 0.7116106748580933, "learning_rate": 6.758057542530036e-06, "loss": 0.063, "step": 39222 }, { "epoch": 0.6946211708660406, "grad_norm": 0.7582679986953735, "learning_rate": 6.757338705840375e-06, "loss": 0.0503, "step": 39223 }, { "epoch": 0.6946388804030691, "grad_norm": 0.792849063873291, "learning_rate": 6.7566198962682375e-06, "loss": 0.0473, "step": 39224 }, { "epoch": 0.6946565899400975, "grad_norm": 0.7402541041374207, "learning_rate": 6.755901113815986e-06, "loss": 0.0633, "step": 39225 }, { "epoch": 0.6946742994771259, "grad_norm": 0.5758240222930908, "learning_rate": 6.755182358485986e-06, "loss": 0.0459, "step": 39226 }, { "epoch": 0.6946920090141544, "grad_norm": 0.3949892520904541, "learning_rate": 6.7544636302806016e-06, "loss": 0.0367, "step": 39227 }, { "epoch": 0.6947097185511828, "grad_norm": 0.4321250021457672, "learning_rate": 6.753744929202202e-06, "loss": 0.0741, "step": 39228 }, { "epoch": 0.6947274280882112, "grad_norm": 0.5388651490211487, "learning_rate": 6.753026255253144e-06, "loss": 0.0856, "step": 39229 }, { "epoch": 0.6947451376252396, "grad_norm": 0.7909250259399414, "learning_rate": 6.752307608435796e-06, "loss": 0.0892, "step": 39230 }, { "epoch": 0.6947628471622681, "grad_norm": 0.8496902585029602, "learning_rate": 6.751588988752526e-06, "loss": 0.079, "step": 39231 }, { "epoch": 0.6947805566992965, "grad_norm": 0.766599714756012, "learning_rate": 6.75087039620569e-06, "loss": 0.0651, "step": 39232 }, { "epoch": 0.6947982662363249, "grad_norm": 0.5510120391845703, "learning_rate": 6.750151830797649e-06, "loss": 0.0662, "step": 39233 }, { "epoch": 0.6948159757733533, "grad_norm": 0.4153698980808258, "learning_rate": 6.74943329253078e-06, "loss": 0.0418, "step": 39234 }, { "epoch": 0.6948336853103818, "grad_norm": 0.5393405556678772, "learning_rate": 6.748714781407445e-06, "loss": 0.0483, "step": 39235 }, { "epoch": 0.6948513948474102, "grad_norm": 1.0951868295669556, "learning_rate": 6.7479962974300005e-06, "loss": 0.0588, "step": 39236 }, { "epoch": 0.6948691043844386, "grad_norm": 0.620400071144104, "learning_rate": 6.747277840600813e-06, "loss": 0.0729, "step": 39237 }, { "epoch": 0.694886813921467, "grad_norm": 0.5555811524391174, "learning_rate": 6.746559410922252e-06, "loss": 0.0369, "step": 39238 }, { "epoch": 0.6949045234584955, "grad_norm": 0.5469443798065186, "learning_rate": 6.74584100839667e-06, "loss": 0.0599, "step": 39239 }, { "epoch": 0.6949222329955239, "grad_norm": 0.701987624168396, "learning_rate": 6.745122633026435e-06, "loss": 0.0851, "step": 39240 }, { "epoch": 0.6949399425325523, "grad_norm": 0.8662660717964172, "learning_rate": 6.744404284813915e-06, "loss": 0.0877, "step": 39241 }, { "epoch": 0.6949576520695808, "grad_norm": 0.45150238275527954, "learning_rate": 6.743685963761467e-06, "loss": 0.0591, "step": 39242 }, { "epoch": 0.6949753616066092, "grad_norm": 0.8505312204360962, "learning_rate": 6.742967669871458e-06, "loss": 0.0881, "step": 39243 }, { "epoch": 0.6949930711436376, "grad_norm": 0.47290492057800293, "learning_rate": 6.742249403146251e-06, "loss": 0.0303, "step": 39244 }, { "epoch": 0.695010780680666, "grad_norm": 0.521934986114502, "learning_rate": 6.741531163588212e-06, "loss": 0.0569, "step": 39245 }, { "epoch": 0.6950284902176945, "grad_norm": 0.580289900302887, "learning_rate": 6.740812951199696e-06, "loss": 0.0502, "step": 39246 }, { "epoch": 0.6950461997547229, "grad_norm": 0.49646666646003723, "learning_rate": 6.7400947659830694e-06, "loss": 0.0574, "step": 39247 }, { "epoch": 0.6950639092917513, "grad_norm": 0.3798247277736664, "learning_rate": 6.739376607940695e-06, "loss": 0.0643, "step": 39248 }, { "epoch": 0.6950816188287797, "grad_norm": 1.2801389694213867, "learning_rate": 6.738658477074943e-06, "loss": 0.095, "step": 39249 }, { "epoch": 0.6950993283658082, "grad_norm": 0.46910062432289124, "learning_rate": 6.737940373388163e-06, "loss": 0.0399, "step": 39250 }, { "epoch": 0.6951170379028366, "grad_norm": 1.057019829750061, "learning_rate": 6.737222296882724e-06, "loss": 0.05, "step": 39251 }, { "epoch": 0.695134747439865, "grad_norm": 0.8848673701286316, "learning_rate": 6.736504247560987e-06, "loss": 0.0994, "step": 39252 }, { "epoch": 0.6951524569768934, "grad_norm": 0.586259663105011, "learning_rate": 6.735786225425316e-06, "loss": 0.0607, "step": 39253 }, { "epoch": 0.6951701665139219, "grad_norm": 0.5478433966636658, "learning_rate": 6.735068230478072e-06, "loss": 0.0679, "step": 39254 }, { "epoch": 0.6951878760509503, "grad_norm": 0.7078633904457092, "learning_rate": 6.734350262721618e-06, "loss": 0.0595, "step": 39255 }, { "epoch": 0.6952055855879787, "grad_norm": 0.6018747687339783, "learning_rate": 6.73363232215832e-06, "loss": 0.0599, "step": 39256 }, { "epoch": 0.6952232951250072, "grad_norm": 0.585045576095581, "learning_rate": 6.732914408790532e-06, "loss": 0.0588, "step": 39257 }, { "epoch": 0.6952410046620356, "grad_norm": 0.8001584410667419, "learning_rate": 6.732196522620616e-06, "loss": 0.0843, "step": 39258 }, { "epoch": 0.695258714199064, "grad_norm": 0.5880081653594971, "learning_rate": 6.731478663650941e-06, "loss": 0.0674, "step": 39259 }, { "epoch": 0.6952764237360924, "grad_norm": 0.5944584012031555, "learning_rate": 6.730760831883868e-06, "loss": 0.0702, "step": 39260 }, { "epoch": 0.695294133273121, "grad_norm": 0.5402368307113647, "learning_rate": 6.730043027321751e-06, "loss": 0.0493, "step": 39261 }, { "epoch": 0.6953118428101493, "grad_norm": 0.596907913684845, "learning_rate": 6.72932524996695e-06, "loss": 0.0399, "step": 39262 }, { "epoch": 0.6953295523471777, "grad_norm": 0.9190048575401306, "learning_rate": 6.728607499821844e-06, "loss": 0.0652, "step": 39263 }, { "epoch": 0.6953472618842061, "grad_norm": 0.8618568181991577, "learning_rate": 6.727889776888777e-06, "loss": 0.0659, "step": 39264 }, { "epoch": 0.6953649714212347, "grad_norm": 0.8745337724685669, "learning_rate": 6.727172081170115e-06, "loss": 0.099, "step": 39265 }, { "epoch": 0.695382680958263, "grad_norm": 0.5244994759559631, "learning_rate": 6.726454412668221e-06, "loss": 0.0427, "step": 39266 }, { "epoch": 0.6954003904952915, "grad_norm": 0.5650819540023804, "learning_rate": 6.725736771385461e-06, "loss": 0.057, "step": 39267 }, { "epoch": 0.6954181000323199, "grad_norm": 0.659968376159668, "learning_rate": 6.725019157324184e-06, "loss": 0.072, "step": 39268 }, { "epoch": 0.6954358095693484, "grad_norm": 0.6846112012863159, "learning_rate": 6.724301570486758e-06, "loss": 0.0461, "step": 39269 }, { "epoch": 0.6954535191063768, "grad_norm": 0.08334332704544067, "learning_rate": 6.723584010875542e-06, "loss": 0.0495, "step": 39270 }, { "epoch": 0.6954712286434052, "grad_norm": 0.7216421961784363, "learning_rate": 6.722866478492896e-06, "loss": 0.0612, "step": 39271 }, { "epoch": 0.6954889381804337, "grad_norm": 0.5744835734367371, "learning_rate": 6.722148973341186e-06, "loss": 0.0542, "step": 39272 }, { "epoch": 0.6955066477174621, "grad_norm": 0.6953649520874023, "learning_rate": 6.721431495422765e-06, "loss": 0.0663, "step": 39273 }, { "epoch": 0.6955243572544905, "grad_norm": 0.35757601261138916, "learning_rate": 6.720714044740003e-06, "loss": 0.0415, "step": 39274 }, { "epoch": 0.6955420667915189, "grad_norm": 0.19970795512199402, "learning_rate": 6.71999662129525e-06, "loss": 0.0382, "step": 39275 }, { "epoch": 0.6955597763285474, "grad_norm": 0.629382848739624, "learning_rate": 6.719279225090872e-06, "loss": 0.065, "step": 39276 }, { "epoch": 0.6955774858655758, "grad_norm": 0.6019302606582642, "learning_rate": 6.71856185612923e-06, "loss": 0.0567, "step": 39277 }, { "epoch": 0.6955951954026042, "grad_norm": 0.5115148425102234, "learning_rate": 6.717844514412679e-06, "loss": 0.0462, "step": 39278 }, { "epoch": 0.6956129049396326, "grad_norm": 0.8049508333206177, "learning_rate": 6.717127199943581e-06, "loss": 0.0506, "step": 39279 }, { "epoch": 0.6956306144766611, "grad_norm": 0.6172868609428406, "learning_rate": 6.7164099127242955e-06, "loss": 0.074, "step": 39280 }, { "epoch": 0.6956483240136895, "grad_norm": 0.9014852643013, "learning_rate": 6.7156926527571856e-06, "loss": 0.0571, "step": 39281 }, { "epoch": 0.6956660335507179, "grad_norm": 0.6226483583450317, "learning_rate": 6.714975420044608e-06, "loss": 0.0386, "step": 39282 }, { "epoch": 0.6956837430877463, "grad_norm": 0.5415838956832886, "learning_rate": 6.714258214588924e-06, "loss": 0.064, "step": 39283 }, { "epoch": 0.6957014526247748, "grad_norm": 0.3989476263523102, "learning_rate": 6.713541036392491e-06, "loss": 0.0513, "step": 39284 }, { "epoch": 0.6957191621618032, "grad_norm": 0.9722123742103577, "learning_rate": 6.712823885457675e-06, "loss": 0.0706, "step": 39285 }, { "epoch": 0.6957368716988316, "grad_norm": 0.8742069602012634, "learning_rate": 6.712106761786827e-06, "loss": 0.0622, "step": 39286 }, { "epoch": 0.6957545812358601, "grad_norm": 0.4146299362182617, "learning_rate": 6.711389665382308e-06, "loss": 0.0687, "step": 39287 }, { "epoch": 0.6957722907728885, "grad_norm": 0.6535698771476746, "learning_rate": 6.710672596246486e-06, "loss": 0.065, "step": 39288 }, { "epoch": 0.6957900003099169, "grad_norm": 0.6038146615028381, "learning_rate": 6.709955554381705e-06, "loss": 0.0754, "step": 39289 }, { "epoch": 0.6958077098469453, "grad_norm": 0.6214158535003662, "learning_rate": 6.709238539790333e-06, "loss": 0.0797, "step": 39290 }, { "epoch": 0.6958254193839738, "grad_norm": 0.19041556119918823, "learning_rate": 6.708521552474721e-06, "loss": 0.0504, "step": 39291 }, { "epoch": 0.6958431289210022, "grad_norm": 0.31902948021888733, "learning_rate": 6.707804592437247e-06, "loss": 0.0304, "step": 39292 }, { "epoch": 0.6958608384580306, "grad_norm": 0.4493604302406311, "learning_rate": 6.70708765968025e-06, "loss": 0.059, "step": 39293 }, { "epoch": 0.695878547995059, "grad_norm": 0.45465636253356934, "learning_rate": 6.706370754206097e-06, "loss": 0.0492, "step": 39294 }, { "epoch": 0.6958962575320875, "grad_norm": 0.8248991966247559, "learning_rate": 6.705653876017149e-06, "loss": 0.087, "step": 39295 }, { "epoch": 0.6959139670691159, "grad_norm": 0.7205615043640137, "learning_rate": 6.704937025115757e-06, "loss": 0.0636, "step": 39296 }, { "epoch": 0.6959316766061443, "grad_norm": 0.7066116333007812, "learning_rate": 6.704220201504284e-06, "loss": 0.062, "step": 39297 }, { "epoch": 0.6959493861431727, "grad_norm": 0.9200652241706848, "learning_rate": 6.703503405185087e-06, "loss": 0.0785, "step": 39298 }, { "epoch": 0.6959670956802012, "grad_norm": 0.7164801955223083, "learning_rate": 6.7027866361605236e-06, "loss": 0.0875, "step": 39299 }, { "epoch": 0.6959848052172296, "grad_norm": 0.3854290843009949, "learning_rate": 6.7020698944329524e-06, "loss": 0.0666, "step": 39300 }, { "epoch": 0.696002514754258, "grad_norm": 0.38191190361976624, "learning_rate": 6.701353180004733e-06, "loss": 0.032, "step": 39301 }, { "epoch": 0.6960202242912865, "grad_norm": 0.6705567240715027, "learning_rate": 6.700636492878228e-06, "loss": 0.0685, "step": 39302 }, { "epoch": 0.6960379338283149, "grad_norm": 0.304614394903183, "learning_rate": 6.699919833055783e-06, "loss": 0.081, "step": 39303 }, { "epoch": 0.6960556433653433, "grad_norm": 0.6822307109832764, "learning_rate": 6.699203200539765e-06, "loss": 0.0566, "step": 39304 }, { "epoch": 0.6960733529023717, "grad_norm": 0.7632200121879578, "learning_rate": 6.698486595332526e-06, "loss": 0.0828, "step": 39305 }, { "epoch": 0.6960910624394002, "grad_norm": 0.3965342342853546, "learning_rate": 6.697770017436433e-06, "loss": 0.0491, "step": 39306 }, { "epoch": 0.6961087719764286, "grad_norm": 0.3963984549045563, "learning_rate": 6.697053466853831e-06, "loss": 0.0552, "step": 39307 }, { "epoch": 0.696126481513457, "grad_norm": 0.5096446871757507, "learning_rate": 6.696336943587083e-06, "loss": 0.0553, "step": 39308 }, { "epoch": 0.6961441910504854, "grad_norm": 0.9795658588409424, "learning_rate": 6.6956204476385486e-06, "loss": 0.0821, "step": 39309 }, { "epoch": 0.6961619005875139, "grad_norm": 0.4657873511314392, "learning_rate": 6.694903979010581e-06, "loss": 0.0514, "step": 39310 }, { "epoch": 0.6961796101245423, "grad_norm": 0.7169280052185059, "learning_rate": 6.6941875377055405e-06, "loss": 0.0697, "step": 39311 }, { "epoch": 0.6961973196615707, "grad_norm": 0.32424986362457275, "learning_rate": 6.693471123725783e-06, "loss": 0.0412, "step": 39312 }, { "epoch": 0.6962150291985991, "grad_norm": 0.5374920964241028, "learning_rate": 6.692754737073669e-06, "loss": 0.056, "step": 39313 }, { "epoch": 0.6962327387356276, "grad_norm": 0.5928089022636414, "learning_rate": 6.692038377751547e-06, "loss": 0.0599, "step": 39314 }, { "epoch": 0.696250448272656, "grad_norm": 0.4243563115596771, "learning_rate": 6.691322045761779e-06, "loss": 0.0416, "step": 39315 }, { "epoch": 0.6962681578096844, "grad_norm": 0.27232226729393005, "learning_rate": 6.690605741106721e-06, "loss": 0.0466, "step": 39316 }, { "epoch": 0.6962858673467129, "grad_norm": 0.8595091104507446, "learning_rate": 6.689889463788736e-06, "loss": 0.0614, "step": 39317 }, { "epoch": 0.6963035768837413, "grad_norm": 0.6346484422683716, "learning_rate": 6.689173213810168e-06, "loss": 0.0437, "step": 39318 }, { "epoch": 0.6963212864207697, "grad_norm": 0.4380139410495758, "learning_rate": 6.688456991173377e-06, "loss": 0.0487, "step": 39319 }, { "epoch": 0.6963389959577981, "grad_norm": 0.7856301665306091, "learning_rate": 6.6877407958807264e-06, "loss": 0.0643, "step": 39320 }, { "epoch": 0.6963567054948266, "grad_norm": 1.184171438217163, "learning_rate": 6.687024627934565e-06, "loss": 0.0667, "step": 39321 }, { "epoch": 0.696374415031855, "grad_norm": 0.5185677409172058, "learning_rate": 6.686308487337252e-06, "loss": 0.0587, "step": 39322 }, { "epoch": 0.6963921245688834, "grad_norm": 0.6274873614311218, "learning_rate": 6.685592374091143e-06, "loss": 0.0585, "step": 39323 }, { "epoch": 0.6964098341059118, "grad_norm": 0.911641001701355, "learning_rate": 6.6848762881986e-06, "loss": 0.072, "step": 39324 }, { "epoch": 0.6964275436429403, "grad_norm": 0.659090518951416, "learning_rate": 6.684160229661969e-06, "loss": 0.0712, "step": 39325 }, { "epoch": 0.6964452531799687, "grad_norm": 0.5868734121322632, "learning_rate": 6.683444198483607e-06, "loss": 0.0359, "step": 39326 }, { "epoch": 0.6964629627169971, "grad_norm": 0.5514345765113831, "learning_rate": 6.6827281946658805e-06, "loss": 0.0655, "step": 39327 }, { "epoch": 0.6964806722540255, "grad_norm": 0.8153696060180664, "learning_rate": 6.6820122182111244e-06, "loss": 0.0796, "step": 39328 }, { "epoch": 0.696498381791054, "grad_norm": 0.7823764681816101, "learning_rate": 6.6812962691217125e-06, "loss": 0.057, "step": 39329 }, { "epoch": 0.6965160913280825, "grad_norm": 0.4557451009750366, "learning_rate": 6.6805803473999935e-06, "loss": 0.0342, "step": 39330 }, { "epoch": 0.6965338008651109, "grad_norm": 0.5958456993103027, "learning_rate": 6.679864453048331e-06, "loss": 0.0668, "step": 39331 }, { "epoch": 0.6965515104021394, "grad_norm": 0.33493345975875854, "learning_rate": 6.679148586069068e-06, "loss": 0.0488, "step": 39332 }, { "epoch": 0.6965692199391678, "grad_norm": 0.6028986573219299, "learning_rate": 6.6784327464645614e-06, "loss": 0.0553, "step": 39333 }, { "epoch": 0.6965869294761962, "grad_norm": 0.35285454988479614, "learning_rate": 6.677716934237178e-06, "loss": 0.049, "step": 39334 }, { "epoch": 0.6966046390132246, "grad_norm": 0.5887088775634766, "learning_rate": 6.677001149389258e-06, "loss": 0.0918, "step": 39335 }, { "epoch": 0.6966223485502531, "grad_norm": 0.5650209188461304, "learning_rate": 6.676285391923162e-06, "loss": 0.0514, "step": 39336 }, { "epoch": 0.6966400580872815, "grad_norm": 0.6141585111618042, "learning_rate": 6.675569661841245e-06, "loss": 0.081, "step": 39337 }, { "epoch": 0.6966577676243099, "grad_norm": 0.4772447943687439, "learning_rate": 6.674853959145861e-06, "loss": 0.0762, "step": 39338 }, { "epoch": 0.6966754771613383, "grad_norm": 0.5804850459098816, "learning_rate": 6.6741382838393675e-06, "loss": 0.0579, "step": 39339 }, { "epoch": 0.6966931866983668, "grad_norm": 0.5401619672775269, "learning_rate": 6.673422635924114e-06, "loss": 0.0636, "step": 39340 }, { "epoch": 0.6967108962353952, "grad_norm": 0.6668506264686584, "learning_rate": 6.672707015402465e-06, "loss": 0.04, "step": 39341 }, { "epoch": 0.6967286057724236, "grad_norm": 0.7082875370979309, "learning_rate": 6.671991422276763e-06, "loss": 0.076, "step": 39342 }, { "epoch": 0.696746315309452, "grad_norm": 0.8492286801338196, "learning_rate": 6.6712758565493666e-06, "loss": 0.0855, "step": 39343 }, { "epoch": 0.6967640248464805, "grad_norm": 0.48226794600486755, "learning_rate": 6.67056031822263e-06, "loss": 0.044, "step": 39344 }, { "epoch": 0.6967817343835089, "grad_norm": 0.8421942591667175, "learning_rate": 6.669844807298912e-06, "loss": 0.0423, "step": 39345 }, { "epoch": 0.6967994439205373, "grad_norm": 0.771837592124939, "learning_rate": 6.669129323780558e-06, "loss": 0.0517, "step": 39346 }, { "epoch": 0.6968171534575658, "grad_norm": 0.6465962529182434, "learning_rate": 6.668413867669925e-06, "loss": 0.061, "step": 39347 }, { "epoch": 0.6968348629945942, "grad_norm": 0.4004991948604584, "learning_rate": 6.667698438969367e-06, "loss": 0.0468, "step": 39348 }, { "epoch": 0.6968525725316226, "grad_norm": 0.5842987298965454, "learning_rate": 6.666983037681239e-06, "loss": 0.0454, "step": 39349 }, { "epoch": 0.696870282068651, "grad_norm": 0.7849660515785217, "learning_rate": 6.666267663807895e-06, "loss": 0.0827, "step": 39350 }, { "epoch": 0.6968879916056795, "grad_norm": 0.8606311082839966, "learning_rate": 6.665552317351686e-06, "loss": 0.0786, "step": 39351 }, { "epoch": 0.6969057011427079, "grad_norm": 0.6450286507606506, "learning_rate": 6.664836998314974e-06, "loss": 0.0476, "step": 39352 }, { "epoch": 0.6969234106797363, "grad_norm": 0.811216413974762, "learning_rate": 6.664121706700099e-06, "loss": 0.0768, "step": 39353 }, { "epoch": 0.6969411202167647, "grad_norm": 0.6535163521766663, "learning_rate": 6.663406442509419e-06, "loss": 0.0628, "step": 39354 }, { "epoch": 0.6969588297537932, "grad_norm": 0.5260160565376282, "learning_rate": 6.662691205745291e-06, "loss": 0.0493, "step": 39355 }, { "epoch": 0.6969765392908216, "grad_norm": 0.5387393236160278, "learning_rate": 6.66197599641007e-06, "loss": 0.1104, "step": 39356 }, { "epoch": 0.69699424882785, "grad_norm": 0.8748929500579834, "learning_rate": 6.661260814506098e-06, "loss": 0.0849, "step": 39357 }, { "epoch": 0.6970119583648784, "grad_norm": 0.8432099223136902, "learning_rate": 6.660545660035732e-06, "loss": 0.0736, "step": 39358 }, { "epoch": 0.6970296679019069, "grad_norm": 0.9678886532783508, "learning_rate": 6.659830533001336e-06, "loss": 0.1009, "step": 39359 }, { "epoch": 0.6970473774389353, "grad_norm": 0.5831294059753418, "learning_rate": 6.659115433405249e-06, "loss": 0.0705, "step": 39360 }, { "epoch": 0.6970650869759637, "grad_norm": 0.31291308999061584, "learning_rate": 6.658400361249829e-06, "loss": 0.0492, "step": 39361 }, { "epoch": 0.6970827965129922, "grad_norm": 0.7152791619300842, "learning_rate": 6.657685316537427e-06, "loss": 0.055, "step": 39362 }, { "epoch": 0.6971005060500206, "grad_norm": 0.40982353687286377, "learning_rate": 6.6569702992704025e-06, "loss": 0.0431, "step": 39363 }, { "epoch": 0.697118215587049, "grad_norm": 0.39380744099617004, "learning_rate": 6.656255309451097e-06, "loss": 0.0749, "step": 39364 }, { "epoch": 0.6971359251240774, "grad_norm": 0.7651910185813904, "learning_rate": 6.655540347081867e-06, "loss": 0.084, "step": 39365 }, { "epoch": 0.6971536346611059, "grad_norm": 0.579407811164856, "learning_rate": 6.654825412165066e-06, "loss": 0.0602, "step": 39366 }, { "epoch": 0.6971713441981343, "grad_norm": 0.7171761393547058, "learning_rate": 6.6541105047030445e-06, "loss": 0.0823, "step": 39367 }, { "epoch": 0.6971890537351627, "grad_norm": 0.4946487545967102, "learning_rate": 6.653395624698155e-06, "loss": 0.0365, "step": 39368 }, { "epoch": 0.6972067632721911, "grad_norm": 0.4438669681549072, "learning_rate": 6.6526807721527505e-06, "loss": 0.0488, "step": 39369 }, { "epoch": 0.6972244728092196, "grad_norm": 0.4427698850631714, "learning_rate": 6.651965947069187e-06, "loss": 0.05, "step": 39370 }, { "epoch": 0.697242182346248, "grad_norm": 0.42808839678764343, "learning_rate": 6.651251149449806e-06, "loss": 0.064, "step": 39371 }, { "epoch": 0.6972598918832764, "grad_norm": 0.6985422968864441, "learning_rate": 6.6505363792969656e-06, "loss": 0.0399, "step": 39372 }, { "epoch": 0.6972776014203048, "grad_norm": 0.737334132194519, "learning_rate": 6.6498216366130185e-06, "loss": 0.0596, "step": 39373 }, { "epoch": 0.6972953109573333, "grad_norm": 0.7563787698745728, "learning_rate": 6.649106921400311e-06, "loss": 0.0545, "step": 39374 }, { "epoch": 0.6973130204943617, "grad_norm": 0.48802971839904785, "learning_rate": 6.648392233661195e-06, "loss": 0.0707, "step": 39375 }, { "epoch": 0.6973307300313901, "grad_norm": 0.6831936836242676, "learning_rate": 6.647677573398025e-06, "loss": 0.0592, "step": 39376 }, { "epoch": 0.6973484395684186, "grad_norm": 0.8827437162399292, "learning_rate": 6.646962940613152e-06, "loss": 0.0727, "step": 39377 }, { "epoch": 0.697366149105447, "grad_norm": 0.45677411556243896, "learning_rate": 6.6462483353089245e-06, "loss": 0.0537, "step": 39378 }, { "epoch": 0.6973838586424754, "grad_norm": 0.5216519236564636, "learning_rate": 6.645533757487694e-06, "loss": 0.0613, "step": 39379 }, { "epoch": 0.6974015681795038, "grad_norm": 0.6384421586990356, "learning_rate": 6.644819207151821e-06, "loss": 0.0279, "step": 39380 }, { "epoch": 0.6974192777165323, "grad_norm": 0.4623361825942993, "learning_rate": 6.6441046843036385e-06, "loss": 0.0361, "step": 39381 }, { "epoch": 0.6974369872535607, "grad_norm": 0.4794670045375824, "learning_rate": 6.643390188945508e-06, "loss": 0.0611, "step": 39382 }, { "epoch": 0.6974546967905891, "grad_norm": 0.3607749342918396, "learning_rate": 6.64267572107978e-06, "loss": 0.0392, "step": 39383 }, { "epoch": 0.6974724063276175, "grad_norm": 0.7170053124427795, "learning_rate": 6.6419612807088084e-06, "loss": 0.0793, "step": 39384 }, { "epoch": 0.697490115864646, "grad_norm": 0.6035747528076172, "learning_rate": 6.641246867834932e-06, "loss": 0.0392, "step": 39385 }, { "epoch": 0.6975078254016744, "grad_norm": 0.5214611887931824, "learning_rate": 6.64053248246051e-06, "loss": 0.0416, "step": 39386 }, { "epoch": 0.6975255349387028, "grad_norm": 0.4095500707626343, "learning_rate": 6.6398181245878845e-06, "loss": 0.0384, "step": 39387 }, { "epoch": 0.6975432444757312, "grad_norm": 0.5315834283828735, "learning_rate": 6.639103794219421e-06, "loss": 0.0504, "step": 39388 }, { "epoch": 0.6975609540127597, "grad_norm": 0.3865906596183777, "learning_rate": 6.638389491357456e-06, "loss": 0.0446, "step": 39389 }, { "epoch": 0.6975786635497881, "grad_norm": 0.4600798189640045, "learning_rate": 6.637675216004346e-06, "loss": 0.0534, "step": 39390 }, { "epoch": 0.6975963730868165, "grad_norm": 0.7533437013626099, "learning_rate": 6.6369609681624415e-06, "loss": 0.0594, "step": 39391 }, { "epoch": 0.697614082623845, "grad_norm": 0.3648786246776581, "learning_rate": 6.6362467478340856e-06, "loss": 0.0472, "step": 39392 }, { "epoch": 0.6976317921608735, "grad_norm": 0.38422802090644836, "learning_rate": 6.6355325550216305e-06, "loss": 0.039, "step": 39393 }, { "epoch": 0.6976495016979019, "grad_norm": 0.8098715543746948, "learning_rate": 6.634818389727429e-06, "loss": 0.0676, "step": 39394 }, { "epoch": 0.6976672112349303, "grad_norm": 1.069120168685913, "learning_rate": 6.634104251953827e-06, "loss": 0.0521, "step": 39395 }, { "epoch": 0.6976849207719588, "grad_norm": 0.8104930520057678, "learning_rate": 6.633390141703176e-06, "loss": 0.0848, "step": 39396 }, { "epoch": 0.6977026303089872, "grad_norm": 0.500765860080719, "learning_rate": 6.632676058977828e-06, "loss": 0.0557, "step": 39397 }, { "epoch": 0.6977203398460156, "grad_norm": 0.7357749938964844, "learning_rate": 6.631962003780132e-06, "loss": 0.0498, "step": 39398 }, { "epoch": 0.697738049383044, "grad_norm": 0.6690582633018494, "learning_rate": 6.63124797611243e-06, "loss": 0.0975, "step": 39399 }, { "epoch": 0.6977557589200725, "grad_norm": 0.7599920034408569, "learning_rate": 6.630533975977076e-06, "loss": 0.0861, "step": 39400 }, { "epoch": 0.6977734684571009, "grad_norm": 0.6272061467170715, "learning_rate": 6.629820003376418e-06, "loss": 0.0802, "step": 39401 }, { "epoch": 0.6977911779941293, "grad_norm": 0.49630510807037354, "learning_rate": 6.629106058312811e-06, "loss": 0.0422, "step": 39402 }, { "epoch": 0.6978088875311577, "grad_norm": 0.6862783432006836, "learning_rate": 6.628392140788594e-06, "loss": 0.0734, "step": 39403 }, { "epoch": 0.6978265970681862, "grad_norm": 0.7012979984283447, "learning_rate": 6.62767825080612e-06, "loss": 0.0651, "step": 39404 }, { "epoch": 0.6978443066052146, "grad_norm": 0.49373936653137207, "learning_rate": 6.626964388367737e-06, "loss": 0.08, "step": 39405 }, { "epoch": 0.697862016142243, "grad_norm": 0.11584307998418808, "learning_rate": 6.6262505534757954e-06, "loss": 0.0385, "step": 39406 }, { "epoch": 0.6978797256792715, "grad_norm": 0.8099962472915649, "learning_rate": 6.6255367461326416e-06, "loss": 0.0963, "step": 39407 }, { "epoch": 0.6978974352162999, "grad_norm": 0.6381025314331055, "learning_rate": 6.624822966340624e-06, "loss": 0.0648, "step": 39408 }, { "epoch": 0.6979151447533283, "grad_norm": 0.5252696871757507, "learning_rate": 6.624109214102098e-06, "loss": 0.0695, "step": 39409 }, { "epoch": 0.6979328542903567, "grad_norm": 0.7629036903381348, "learning_rate": 6.623395489419401e-06, "loss": 0.0527, "step": 39410 }, { "epoch": 0.6979505638273852, "grad_norm": 0.7050310373306274, "learning_rate": 6.6226817922948845e-06, "loss": 0.0664, "step": 39411 }, { "epoch": 0.6979682733644136, "grad_norm": 0.3950866162776947, "learning_rate": 6.621968122730904e-06, "loss": 0.0434, "step": 39412 }, { "epoch": 0.697985982901442, "grad_norm": 0.1832546442747116, "learning_rate": 6.621254480729794e-06, "loss": 0.0405, "step": 39413 }, { "epoch": 0.6980036924384704, "grad_norm": 0.8819196224212646, "learning_rate": 6.620540866293911e-06, "loss": 0.0772, "step": 39414 }, { "epoch": 0.6980214019754989, "grad_norm": 0.6298374533653259, "learning_rate": 6.619827279425601e-06, "loss": 0.0565, "step": 39415 }, { "epoch": 0.6980391115125273, "grad_norm": 0.4709206223487854, "learning_rate": 6.6191137201272105e-06, "loss": 0.0624, "step": 39416 }, { "epoch": 0.6980568210495557, "grad_norm": 0.6695542335510254, "learning_rate": 6.618400188401088e-06, "loss": 0.0578, "step": 39417 }, { "epoch": 0.6980745305865841, "grad_norm": 0.6910301446914673, "learning_rate": 6.617686684249581e-06, "loss": 0.0675, "step": 39418 }, { "epoch": 0.6980922401236126, "grad_norm": 0.3321070671081543, "learning_rate": 6.616973207675044e-06, "loss": 0.0799, "step": 39419 }, { "epoch": 0.698109949660641, "grad_norm": 0.525955080986023, "learning_rate": 6.616259758679812e-06, "loss": 0.0449, "step": 39420 }, { "epoch": 0.6981276591976694, "grad_norm": 0.7015822529792786, "learning_rate": 6.6155463372662354e-06, "loss": 0.0695, "step": 39421 }, { "epoch": 0.6981453687346979, "grad_norm": 0.6960248947143555, "learning_rate": 6.614832943436665e-06, "loss": 0.0664, "step": 39422 }, { "epoch": 0.6981630782717263, "grad_norm": 0.8861880898475647, "learning_rate": 6.614119577193451e-06, "loss": 0.0545, "step": 39423 }, { "epoch": 0.6981807878087547, "grad_norm": 0.5653195977210999, "learning_rate": 6.613406238538925e-06, "loss": 0.0585, "step": 39424 }, { "epoch": 0.6981984973457831, "grad_norm": 0.20527887344360352, "learning_rate": 6.612692927475449e-06, "loss": 0.0568, "step": 39425 }, { "epoch": 0.6982162068828116, "grad_norm": 0.43552708625793457, "learning_rate": 6.611979644005365e-06, "loss": 0.0523, "step": 39426 }, { "epoch": 0.69823391641984, "grad_norm": 0.4241043031215668, "learning_rate": 6.6112663881310235e-06, "loss": 0.0829, "step": 39427 }, { "epoch": 0.6982516259568684, "grad_norm": 0.8836694359779358, "learning_rate": 6.6105531598547645e-06, "loss": 0.064, "step": 39428 }, { "epoch": 0.6982693354938968, "grad_norm": 0.38185572624206543, "learning_rate": 6.6098399591789345e-06, "loss": 0.0339, "step": 39429 }, { "epoch": 0.6982870450309253, "grad_norm": 0.9321510791778564, "learning_rate": 6.609126786105889e-06, "loss": 0.0454, "step": 39430 }, { "epoch": 0.6983047545679537, "grad_norm": 0.9313696622848511, "learning_rate": 6.608413640637964e-06, "loss": 0.0553, "step": 39431 }, { "epoch": 0.6983224641049821, "grad_norm": 0.5083184838294983, "learning_rate": 6.607700522777506e-06, "loss": 0.0322, "step": 39432 }, { "epoch": 0.6983401736420105, "grad_norm": 0.5630581378936768, "learning_rate": 6.606987432526866e-06, "loss": 0.0409, "step": 39433 }, { "epoch": 0.698357883179039, "grad_norm": 0.6486579775810242, "learning_rate": 6.60627436988839e-06, "loss": 0.0483, "step": 39434 }, { "epoch": 0.6983755927160674, "grad_norm": 0.7736872434616089, "learning_rate": 6.605561334864421e-06, "loss": 0.0743, "step": 39435 }, { "epoch": 0.6983933022530958, "grad_norm": 0.4901832640171051, "learning_rate": 6.604848327457305e-06, "loss": 0.0508, "step": 39436 }, { "epoch": 0.6984110117901243, "grad_norm": 0.3183315098285675, "learning_rate": 6.604135347669396e-06, "loss": 0.0671, "step": 39437 }, { "epoch": 0.6984287213271527, "grad_norm": 0.5913128852844238, "learning_rate": 6.603422395503027e-06, "loss": 0.0509, "step": 39438 }, { "epoch": 0.6984464308641811, "grad_norm": 0.7606350183486938, "learning_rate": 6.602709470960548e-06, "loss": 0.0783, "step": 39439 }, { "epoch": 0.6984641404012095, "grad_norm": 0.7436997890472412, "learning_rate": 6.601996574044307e-06, "loss": 0.0816, "step": 39440 }, { "epoch": 0.698481849938238, "grad_norm": 0.8351562023162842, "learning_rate": 6.601283704756652e-06, "loss": 0.0512, "step": 39441 }, { "epoch": 0.6984995594752664, "grad_norm": 0.6221520900726318, "learning_rate": 6.600570863099919e-06, "loss": 0.0723, "step": 39442 }, { "epoch": 0.6985172690122948, "grad_norm": 0.46744102239608765, "learning_rate": 6.599858049076459e-06, "loss": 0.0685, "step": 39443 }, { "epoch": 0.6985349785493232, "grad_norm": 0.7394189238548279, "learning_rate": 6.599145262688616e-06, "loss": 0.0422, "step": 39444 }, { "epoch": 0.6985526880863517, "grad_norm": 0.8574497103691101, "learning_rate": 6.5984325039387345e-06, "loss": 0.036, "step": 39445 }, { "epoch": 0.6985703976233801, "grad_norm": 0.7050610184669495, "learning_rate": 6.597719772829162e-06, "loss": 0.0686, "step": 39446 }, { "epoch": 0.6985881071604085, "grad_norm": 0.3266071081161499, "learning_rate": 6.59700706936224e-06, "loss": 0.0373, "step": 39447 }, { "epoch": 0.6986058166974369, "grad_norm": 0.4372715353965759, "learning_rate": 6.59629439354032e-06, "loss": 0.0727, "step": 39448 }, { "epoch": 0.6986235262344654, "grad_norm": 0.4863349199295044, "learning_rate": 6.595581745365738e-06, "loss": 0.0556, "step": 39449 }, { "epoch": 0.6986412357714938, "grad_norm": 0.441368043422699, "learning_rate": 6.59486912484084e-06, "loss": 0.0532, "step": 39450 }, { "epoch": 0.6986589453085222, "grad_norm": 0.8250216245651245, "learning_rate": 6.594156531967979e-06, "loss": 0.0582, "step": 39451 }, { "epoch": 0.6986766548455507, "grad_norm": 0.6781885027885437, "learning_rate": 6.593443966749488e-06, "loss": 0.0787, "step": 39452 }, { "epoch": 0.6986943643825791, "grad_norm": 0.6686573624610901, "learning_rate": 6.592731429187709e-06, "loss": 0.0559, "step": 39453 }, { "epoch": 0.6987120739196075, "grad_norm": 0.7392759323120117, "learning_rate": 6.592018919284999e-06, "loss": 0.0645, "step": 39454 }, { "epoch": 0.6987297834566359, "grad_norm": 0.7657790780067444, "learning_rate": 6.591306437043704e-06, "loss": 0.06, "step": 39455 }, { "epoch": 0.6987474929936645, "grad_norm": 0.43483561277389526, "learning_rate": 6.5905939824661515e-06, "loss": 0.0676, "step": 39456 }, { "epoch": 0.6987652025306929, "grad_norm": 0.6283133029937744, "learning_rate": 6.589881555554697e-06, "loss": 0.0362, "step": 39457 }, { "epoch": 0.6987829120677213, "grad_norm": 0.8687736988067627, "learning_rate": 6.58916915631168e-06, "loss": 0.0491, "step": 39458 }, { "epoch": 0.6988006216047496, "grad_norm": 0.39770469069480896, "learning_rate": 6.588456784739453e-06, "loss": 0.0494, "step": 39459 }, { "epoch": 0.6988183311417782, "grad_norm": 0.621821403503418, "learning_rate": 6.587744440840345e-06, "loss": 0.0865, "step": 39460 }, { "epoch": 0.6988360406788066, "grad_norm": 0.6251028180122375, "learning_rate": 6.587032124616708e-06, "loss": 0.0319, "step": 39461 }, { "epoch": 0.698853750215835, "grad_norm": 0.6205820441246033, "learning_rate": 6.586319836070882e-06, "loss": 0.069, "step": 39462 }, { "epoch": 0.6988714597528634, "grad_norm": 0.2694448232650757, "learning_rate": 6.585607575205215e-06, "loss": 0.0594, "step": 39463 }, { "epoch": 0.6988891692898919, "grad_norm": 0.4367583394050598, "learning_rate": 6.584895342022049e-06, "loss": 0.0537, "step": 39464 }, { "epoch": 0.6989068788269203, "grad_norm": 0.7383012771606445, "learning_rate": 6.584183136523724e-06, "loss": 0.0543, "step": 39465 }, { "epoch": 0.6989245883639487, "grad_norm": 0.8644515872001648, "learning_rate": 6.5834709587125915e-06, "loss": 0.0759, "step": 39466 }, { "epoch": 0.6989422979009772, "grad_norm": 0.47751283645629883, "learning_rate": 6.582758808590983e-06, "loss": 0.0418, "step": 39467 }, { "epoch": 0.6989600074380056, "grad_norm": 0.5402205586433411, "learning_rate": 6.582046686161246e-06, "loss": 0.0491, "step": 39468 }, { "epoch": 0.698977716975034, "grad_norm": 0.3491571247577667, "learning_rate": 6.58133459142573e-06, "loss": 0.0342, "step": 39469 }, { "epoch": 0.6989954265120624, "grad_norm": 0.30933868885040283, "learning_rate": 6.580622524386766e-06, "loss": 0.03, "step": 39470 }, { "epoch": 0.6990131360490909, "grad_norm": 0.523383378982544, "learning_rate": 6.579910485046704e-06, "loss": 0.0763, "step": 39471 }, { "epoch": 0.6990308455861193, "grad_norm": 0.6142695546150208, "learning_rate": 6.579198473407884e-06, "loss": 0.0721, "step": 39472 }, { "epoch": 0.6990485551231477, "grad_norm": 0.7557896375656128, "learning_rate": 6.578486489472648e-06, "loss": 0.047, "step": 39473 }, { "epoch": 0.6990662646601761, "grad_norm": 0.5653639435768127, "learning_rate": 6.5777745332433405e-06, "loss": 0.0531, "step": 39474 }, { "epoch": 0.6990839741972046, "grad_norm": 0.5644470453262329, "learning_rate": 6.5770626047223035e-06, "loss": 0.0449, "step": 39475 }, { "epoch": 0.699101683734233, "grad_norm": 0.5563996434211731, "learning_rate": 6.576350703911883e-06, "loss": 0.0794, "step": 39476 }, { "epoch": 0.6991193932712614, "grad_norm": 1.0167502164840698, "learning_rate": 6.575638830814412e-06, "loss": 0.0764, "step": 39477 }, { "epoch": 0.6991371028082899, "grad_norm": 0.8181066513061523, "learning_rate": 6.574926985432235e-06, "loss": 0.0562, "step": 39478 }, { "epoch": 0.6991548123453183, "grad_norm": 0.4438721537590027, "learning_rate": 6.574215167767698e-06, "loss": 0.0343, "step": 39479 }, { "epoch": 0.6991725218823467, "grad_norm": 0.8423052430152893, "learning_rate": 6.573503377823147e-06, "loss": 0.114, "step": 39480 }, { "epoch": 0.6991902314193751, "grad_norm": 0.39730167388916016, "learning_rate": 6.572791615600911e-06, "loss": 0.0568, "step": 39481 }, { "epoch": 0.6992079409564036, "grad_norm": 0.6373804807662964, "learning_rate": 6.572079881103334e-06, "loss": 0.0777, "step": 39482 }, { "epoch": 0.699225650493432, "grad_norm": 0.48269888758659363, "learning_rate": 6.5713681743327716e-06, "loss": 0.0687, "step": 39483 }, { "epoch": 0.6992433600304604, "grad_norm": 0.5793522596359253, "learning_rate": 6.5706564952915495e-06, "loss": 0.0672, "step": 39484 }, { "epoch": 0.6992610695674888, "grad_norm": 0.6023857593536377, "learning_rate": 6.569944843982017e-06, "loss": 0.0515, "step": 39485 }, { "epoch": 0.6992787791045173, "grad_norm": 0.5196071863174438, "learning_rate": 6.569233220406512e-06, "loss": 0.0516, "step": 39486 }, { "epoch": 0.6992964886415457, "grad_norm": 0.6064543128013611, "learning_rate": 6.568521624567381e-06, "loss": 0.0435, "step": 39487 }, { "epoch": 0.6993141981785741, "grad_norm": 0.31191903352737427, "learning_rate": 6.5678100564669575e-06, "loss": 0.0622, "step": 39488 }, { "epoch": 0.6993319077156025, "grad_norm": 0.4498331546783447, "learning_rate": 6.5670985161075845e-06, "loss": 0.0641, "step": 39489 }, { "epoch": 0.699349617252631, "grad_norm": 0.4079849123954773, "learning_rate": 6.566387003491606e-06, "loss": 0.0351, "step": 39490 }, { "epoch": 0.6993673267896594, "grad_norm": 0.565216600894928, "learning_rate": 6.565675518621362e-06, "loss": 0.0637, "step": 39491 }, { "epoch": 0.6993850363266878, "grad_norm": 0.628663957118988, "learning_rate": 6.564964061499189e-06, "loss": 0.0318, "step": 39492 }, { "epoch": 0.6994027458637163, "grad_norm": 0.696924090385437, "learning_rate": 6.5642526321274345e-06, "loss": 0.0564, "step": 39493 }, { "epoch": 0.6994204554007447, "grad_norm": 0.5108461976051331, "learning_rate": 6.563541230508441e-06, "loss": 0.0682, "step": 39494 }, { "epoch": 0.6994381649377731, "grad_norm": 0.5415828824043274, "learning_rate": 6.562829856644535e-06, "loss": 0.0506, "step": 39495 }, { "epoch": 0.6994558744748015, "grad_norm": 0.5846818089485168, "learning_rate": 6.562118510538068e-06, "loss": 0.0568, "step": 39496 }, { "epoch": 0.69947358401183, "grad_norm": 0.7330955266952515, "learning_rate": 6.561407192191377e-06, "loss": 0.063, "step": 39497 }, { "epoch": 0.6994912935488584, "grad_norm": 0.5034812688827515, "learning_rate": 6.5606959016068085e-06, "loss": 0.073, "step": 39498 }, { "epoch": 0.6995090030858868, "grad_norm": 0.6040695309638977, "learning_rate": 6.559984638786691e-06, "loss": 0.1002, "step": 39499 }, { "epoch": 0.6995267126229152, "grad_norm": 0.7943996787071228, "learning_rate": 6.55927340373337e-06, "loss": 0.0449, "step": 39500 }, { "epoch": 0.6995444221599437, "grad_norm": 0.988706111907959, "learning_rate": 6.5585621964491865e-06, "loss": 0.0665, "step": 39501 }, { "epoch": 0.6995621316969721, "grad_norm": 0.3295786678791046, "learning_rate": 6.557851016936479e-06, "loss": 0.0599, "step": 39502 }, { "epoch": 0.6995798412340005, "grad_norm": 0.6358749270439148, "learning_rate": 6.5571398651975894e-06, "loss": 0.0498, "step": 39503 }, { "epoch": 0.6995975507710289, "grad_norm": 0.664723813533783, "learning_rate": 6.556428741234854e-06, "loss": 0.0543, "step": 39504 }, { "epoch": 0.6996152603080574, "grad_norm": 0.5420591235160828, "learning_rate": 6.555717645050621e-06, "loss": 0.0478, "step": 39505 }, { "epoch": 0.6996329698450858, "grad_norm": 0.6032678484916687, "learning_rate": 6.555006576647216e-06, "loss": 0.0695, "step": 39506 }, { "epoch": 0.6996506793821142, "grad_norm": 0.3757559061050415, "learning_rate": 6.554295536026985e-06, "loss": 0.0408, "step": 39507 }, { "epoch": 0.6996683889191427, "grad_norm": 0.559515118598938, "learning_rate": 6.553584523192275e-06, "loss": 0.0454, "step": 39508 }, { "epoch": 0.6996860984561711, "grad_norm": 0.43954217433929443, "learning_rate": 6.55287353814541e-06, "loss": 0.0733, "step": 39509 }, { "epoch": 0.6997038079931995, "grad_norm": 0.6867920160293579, "learning_rate": 6.552162580888738e-06, "loss": 0.0788, "step": 39510 }, { "epoch": 0.6997215175302279, "grad_norm": 0.7837170958518982, "learning_rate": 6.55145165142459e-06, "loss": 0.0636, "step": 39511 }, { "epoch": 0.6997392270672564, "grad_norm": 0.7708559036254883, "learning_rate": 6.550740749755323e-06, "loss": 0.0751, "step": 39512 }, { "epoch": 0.6997569366042848, "grad_norm": 0.8306133151054382, "learning_rate": 6.550029875883259e-06, "loss": 0.0581, "step": 39513 }, { "epoch": 0.6997746461413132, "grad_norm": 0.8807812929153442, "learning_rate": 6.549319029810742e-06, "loss": 0.0806, "step": 39514 }, { "epoch": 0.6997923556783416, "grad_norm": 0.7551150918006897, "learning_rate": 6.548608211540115e-06, "loss": 0.0641, "step": 39515 }, { "epoch": 0.6998100652153701, "grad_norm": 0.8227630853652954, "learning_rate": 6.547897421073708e-06, "loss": 0.0569, "step": 39516 }, { "epoch": 0.6998277747523985, "grad_norm": 0.640945315361023, "learning_rate": 6.547186658413864e-06, "loss": 0.0769, "step": 39517 }, { "epoch": 0.699845484289427, "grad_norm": 0.3723468482494354, "learning_rate": 6.546475923562921e-06, "loss": 0.0587, "step": 39518 }, { "epoch": 0.6998631938264553, "grad_norm": 0.7461481690406799, "learning_rate": 6.545765216523217e-06, "loss": 0.0689, "step": 39519 }, { "epoch": 0.6998809033634839, "grad_norm": 0.6539005637168884, "learning_rate": 6.545054537297089e-06, "loss": 0.0578, "step": 39520 }, { "epoch": 0.6998986129005123, "grad_norm": 0.5082974433898926, "learning_rate": 6.544343885886877e-06, "loss": 0.0567, "step": 39521 }, { "epoch": 0.6999163224375407, "grad_norm": 0.4293519854545593, "learning_rate": 6.543633262294924e-06, "loss": 0.0712, "step": 39522 }, { "epoch": 0.6999340319745692, "grad_norm": 0.38992974162101746, "learning_rate": 6.5429226665235565e-06, "loss": 0.0691, "step": 39523 }, { "epoch": 0.6999517415115976, "grad_norm": 0.5956388115882874, "learning_rate": 6.542212098575119e-06, "loss": 0.0417, "step": 39524 }, { "epoch": 0.699969451048626, "grad_norm": 0.5503906011581421, "learning_rate": 6.541501558451949e-06, "loss": 0.0563, "step": 39525 }, { "epoch": 0.6999871605856544, "grad_norm": 0.5832079648971558, "learning_rate": 6.540791046156386e-06, "loss": 0.0505, "step": 39526 }, { "epoch": 0.7000048701226829, "grad_norm": 0.8749587535858154, "learning_rate": 6.540080561690761e-06, "loss": 0.0659, "step": 39527 }, { "epoch": 0.7000225796597113, "grad_norm": 0.7230979204177856, "learning_rate": 6.539370105057416e-06, "loss": 0.0619, "step": 39528 }, { "epoch": 0.7000402891967397, "grad_norm": 0.44384369254112244, "learning_rate": 6.538659676258687e-06, "loss": 0.0557, "step": 39529 }, { "epoch": 0.7000579987337681, "grad_norm": 0.3722437620162964, "learning_rate": 6.537949275296912e-06, "loss": 0.0752, "step": 39530 }, { "epoch": 0.7000757082707966, "grad_norm": 0.39521661400794983, "learning_rate": 6.537238902174426e-06, "loss": 0.0428, "step": 39531 }, { "epoch": 0.700093417807825, "grad_norm": 0.5305247902870178, "learning_rate": 6.536528556893571e-06, "loss": 0.058, "step": 39532 }, { "epoch": 0.7001111273448534, "grad_norm": 0.3164087235927582, "learning_rate": 6.535818239456685e-06, "loss": 0.0488, "step": 39533 }, { "epoch": 0.7001288368818818, "grad_norm": 0.5607283115386963, "learning_rate": 6.535107949866096e-06, "loss": 0.0258, "step": 39534 }, { "epoch": 0.7001465464189103, "grad_norm": 0.45025861263275146, "learning_rate": 6.534397688124145e-06, "loss": 0.0661, "step": 39535 }, { "epoch": 0.7001642559559387, "grad_norm": 0.5622945427894592, "learning_rate": 6.533687454233169e-06, "loss": 0.0524, "step": 39536 }, { "epoch": 0.7001819654929671, "grad_norm": 0.6210075616836548, "learning_rate": 6.532977248195512e-06, "loss": 0.0734, "step": 39537 }, { "epoch": 0.7001996750299956, "grad_norm": 0.5441160798072815, "learning_rate": 6.532267070013496e-06, "loss": 0.0514, "step": 39538 }, { "epoch": 0.700217384567024, "grad_norm": 0.6651293039321899, "learning_rate": 6.531556919689467e-06, "loss": 0.0462, "step": 39539 }, { "epoch": 0.7002350941040524, "grad_norm": 0.6324739456176758, "learning_rate": 6.530846797225757e-06, "loss": 0.0376, "step": 39540 }, { "epoch": 0.7002528036410808, "grad_norm": 0.376356840133667, "learning_rate": 6.530136702624705e-06, "loss": 0.0485, "step": 39541 }, { "epoch": 0.7002705131781093, "grad_norm": 0.6553696393966675, "learning_rate": 6.5294266358886475e-06, "loss": 0.1044, "step": 39542 }, { "epoch": 0.7002882227151377, "grad_norm": 0.5099567174911499, "learning_rate": 6.52871659701992e-06, "loss": 0.0611, "step": 39543 }, { "epoch": 0.7003059322521661, "grad_norm": 0.48361143469810486, "learning_rate": 6.528006586020862e-06, "loss": 0.0749, "step": 39544 }, { "epoch": 0.7003236417891945, "grad_norm": 0.5261073708534241, "learning_rate": 6.527296602893801e-06, "loss": 0.0533, "step": 39545 }, { "epoch": 0.700341351326223, "grad_norm": 0.6925463676452637, "learning_rate": 6.526586647641078e-06, "loss": 0.0644, "step": 39546 }, { "epoch": 0.7003590608632514, "grad_norm": 0.4058721661567688, "learning_rate": 6.525876720265032e-06, "loss": 0.0722, "step": 39547 }, { "epoch": 0.7003767704002798, "grad_norm": 0.18952953815460205, "learning_rate": 6.525166820767984e-06, "loss": 0.0364, "step": 39548 }, { "epoch": 0.7003944799373082, "grad_norm": 0.9111965298652649, "learning_rate": 6.524456949152286e-06, "loss": 0.0791, "step": 39549 }, { "epoch": 0.7004121894743367, "grad_norm": 0.5515382289886475, "learning_rate": 6.523747105420267e-06, "loss": 0.0378, "step": 39550 }, { "epoch": 0.7004298990113651, "grad_norm": 0.6348747611045837, "learning_rate": 6.523037289574268e-06, "loss": 0.0668, "step": 39551 }, { "epoch": 0.7004476085483935, "grad_norm": 0.6150914430618286, "learning_rate": 6.522327501616612e-06, "loss": 0.0806, "step": 39552 }, { "epoch": 0.700465318085422, "grad_norm": 0.7865189909934998, "learning_rate": 6.521617741549643e-06, "loss": 0.0695, "step": 39553 }, { "epoch": 0.7004830276224504, "grad_norm": 0.6299105882644653, "learning_rate": 6.520908009375698e-06, "loss": 0.0655, "step": 39554 }, { "epoch": 0.7005007371594788, "grad_norm": 0.5737047791481018, "learning_rate": 6.520198305097105e-06, "loss": 0.0719, "step": 39555 }, { "epoch": 0.7005184466965072, "grad_norm": 0.7027024030685425, "learning_rate": 6.5194886287162e-06, "loss": 0.0942, "step": 39556 }, { "epoch": 0.7005361562335357, "grad_norm": 0.8133293986320496, "learning_rate": 6.51877898023532e-06, "loss": 0.0753, "step": 39557 }, { "epoch": 0.7005538657705641, "grad_norm": 0.5154515504837036, "learning_rate": 6.518069359656799e-06, "loss": 0.0679, "step": 39558 }, { "epoch": 0.7005715753075925, "grad_norm": 0.27709725499153137, "learning_rate": 6.517359766982973e-06, "loss": 0.0722, "step": 39559 }, { "epoch": 0.7005892848446209, "grad_norm": 0.5734283328056335, "learning_rate": 6.516650202216175e-06, "loss": 0.0568, "step": 39560 }, { "epoch": 0.7006069943816494, "grad_norm": 0.6205368638038635, "learning_rate": 6.51594066535874e-06, "loss": 0.0503, "step": 39561 }, { "epoch": 0.7006247039186778, "grad_norm": 0.5727919936180115, "learning_rate": 6.515231156413006e-06, "loss": 0.0591, "step": 39562 }, { "epoch": 0.7006424134557062, "grad_norm": 0.6426979899406433, "learning_rate": 6.5145216753813e-06, "loss": 0.0634, "step": 39563 }, { "epoch": 0.7006601229927346, "grad_norm": 0.9566966891288757, "learning_rate": 6.513812222265959e-06, "loss": 0.1033, "step": 39564 }, { "epoch": 0.7006778325297631, "grad_norm": 0.5141125321388245, "learning_rate": 6.513102797069322e-06, "loss": 0.0574, "step": 39565 }, { "epoch": 0.7006955420667915, "grad_norm": 0.3574853539466858, "learning_rate": 6.512393399793714e-06, "loss": 0.0502, "step": 39566 }, { "epoch": 0.7007132516038199, "grad_norm": 0.7663626074790955, "learning_rate": 6.511684030441472e-06, "loss": 0.0711, "step": 39567 }, { "epoch": 0.7007309611408484, "grad_norm": 0.4300130605697632, "learning_rate": 6.5109746890149315e-06, "loss": 0.0488, "step": 39568 }, { "epoch": 0.7007486706778768, "grad_norm": 0.4861496388912201, "learning_rate": 6.510265375516426e-06, "loss": 0.0777, "step": 39569 }, { "epoch": 0.7007663802149052, "grad_norm": 0.2431313693523407, "learning_rate": 6.509556089948287e-06, "loss": 0.0529, "step": 39570 }, { "epoch": 0.7007840897519336, "grad_norm": 0.8614957332611084, "learning_rate": 6.508846832312852e-06, "loss": 0.0795, "step": 39571 }, { "epoch": 0.7008017992889621, "grad_norm": 0.7243126034736633, "learning_rate": 6.508137602612456e-06, "loss": 0.0539, "step": 39572 }, { "epoch": 0.7008195088259905, "grad_norm": 0.5021435618400574, "learning_rate": 6.507428400849424e-06, "loss": 0.0383, "step": 39573 }, { "epoch": 0.7008372183630189, "grad_norm": 0.3777548670768738, "learning_rate": 6.506719227026094e-06, "loss": 0.063, "step": 39574 }, { "epoch": 0.7008549279000473, "grad_norm": 0.41257333755493164, "learning_rate": 6.506010081144797e-06, "loss": 0.034, "step": 39575 }, { "epoch": 0.7008726374370758, "grad_norm": 0.7013055682182312, "learning_rate": 6.505300963207873e-06, "loss": 0.0695, "step": 39576 }, { "epoch": 0.7008903469741042, "grad_norm": 0.575982391834259, "learning_rate": 6.504591873217642e-06, "loss": 0.0499, "step": 39577 }, { "epoch": 0.7009080565111326, "grad_norm": 0.404938280582428, "learning_rate": 6.503882811176446e-06, "loss": 0.0482, "step": 39578 }, { "epoch": 0.700925766048161, "grad_norm": 1.0127242803573608, "learning_rate": 6.503173777086624e-06, "loss": 0.0867, "step": 39579 }, { "epoch": 0.7009434755851895, "grad_norm": 0.8356276750564575, "learning_rate": 6.5024647709504945e-06, "loss": 0.0689, "step": 39580 }, { "epoch": 0.700961185122218, "grad_norm": 0.6901929974555969, "learning_rate": 6.501755792770397e-06, "loss": 0.0524, "step": 39581 }, { "epoch": 0.7009788946592463, "grad_norm": 0.8287876844406128, "learning_rate": 6.5010468425486634e-06, "loss": 0.0667, "step": 39582 }, { "epoch": 0.7009966041962749, "grad_norm": 0.59209805727005, "learning_rate": 6.500337920287631e-06, "loss": 0.0955, "step": 39583 }, { "epoch": 0.7010143137333033, "grad_norm": 0.5561396479606628, "learning_rate": 6.499629025989623e-06, "loss": 0.0553, "step": 39584 }, { "epoch": 0.7010320232703317, "grad_norm": 0.40384402871131897, "learning_rate": 6.498920159656974e-06, "loss": 0.0281, "step": 39585 }, { "epoch": 0.70104973280736, "grad_norm": 0.5860850214958191, "learning_rate": 6.4982113212920195e-06, "loss": 0.0532, "step": 39586 }, { "epoch": 0.7010674423443886, "grad_norm": 0.23934338986873627, "learning_rate": 6.4975025108970876e-06, "loss": 0.0238, "step": 39587 }, { "epoch": 0.701085151881417, "grad_norm": 0.5409592986106873, "learning_rate": 6.496793728474514e-06, "loss": 0.0493, "step": 39588 }, { "epoch": 0.7011028614184454, "grad_norm": 0.37844038009643555, "learning_rate": 6.496084974026629e-06, "loss": 0.0608, "step": 39589 }, { "epoch": 0.7011205709554738, "grad_norm": 0.7513390183448792, "learning_rate": 6.495376247555768e-06, "loss": 0.0271, "step": 39590 }, { "epoch": 0.7011382804925023, "grad_norm": 0.8141906261444092, "learning_rate": 6.494667549064257e-06, "loss": 0.0878, "step": 39591 }, { "epoch": 0.7011559900295307, "grad_norm": 0.5417755842208862, "learning_rate": 6.493958878554427e-06, "loss": 0.0335, "step": 39592 }, { "epoch": 0.7011736995665591, "grad_norm": 0.4991096556186676, "learning_rate": 6.4932502360286175e-06, "loss": 0.0713, "step": 39593 }, { "epoch": 0.7011914091035875, "grad_norm": 0.5773073434829712, "learning_rate": 6.49254162148915e-06, "loss": 0.0667, "step": 39594 }, { "epoch": 0.701209118640616, "grad_norm": 0.5082032084465027, "learning_rate": 6.49183303493836e-06, "loss": 0.0504, "step": 39595 }, { "epoch": 0.7012268281776444, "grad_norm": 0.5821012258529663, "learning_rate": 6.491124476378578e-06, "loss": 0.0609, "step": 39596 }, { "epoch": 0.7012445377146728, "grad_norm": 0.4472750425338745, "learning_rate": 6.490415945812135e-06, "loss": 0.0469, "step": 39597 }, { "epoch": 0.7012622472517013, "grad_norm": 0.5092360973358154, "learning_rate": 6.4897074432413645e-06, "loss": 0.0746, "step": 39598 }, { "epoch": 0.7012799567887297, "grad_norm": 0.5286471843719482, "learning_rate": 6.488998968668595e-06, "loss": 0.0798, "step": 39599 }, { "epoch": 0.7012976663257581, "grad_norm": 0.465828537940979, "learning_rate": 6.488290522096159e-06, "loss": 0.0639, "step": 39600 }, { "epoch": 0.7013153758627865, "grad_norm": 1.080104947090149, "learning_rate": 6.487582103526389e-06, "loss": 0.0659, "step": 39601 }, { "epoch": 0.701333085399815, "grad_norm": 0.97306889295578, "learning_rate": 6.48687371296161e-06, "loss": 0.0732, "step": 39602 }, { "epoch": 0.7013507949368434, "grad_norm": 0.5328550934791565, "learning_rate": 6.4861653504041556e-06, "loss": 0.0355, "step": 39603 }, { "epoch": 0.7013685044738718, "grad_norm": 0.47329241037368774, "learning_rate": 6.485457015856361e-06, "loss": 0.0467, "step": 39604 }, { "epoch": 0.7013862140109002, "grad_norm": 0.4508879482746124, "learning_rate": 6.4847487093205456e-06, "loss": 0.0652, "step": 39605 }, { "epoch": 0.7014039235479287, "grad_norm": 0.8944308161735535, "learning_rate": 6.484040430799039e-06, "loss": 0.0749, "step": 39606 }, { "epoch": 0.7014216330849571, "grad_norm": 0.5762439966201782, "learning_rate": 6.483332180294185e-06, "loss": 0.0412, "step": 39607 }, { "epoch": 0.7014393426219855, "grad_norm": 0.5968290567398071, "learning_rate": 6.482623957808313e-06, "loss": 0.0524, "step": 39608 }, { "epoch": 0.7014570521590139, "grad_norm": 0.6879637241363525, "learning_rate": 6.481915763343741e-06, "loss": 0.0562, "step": 39609 }, { "epoch": 0.7014747616960424, "grad_norm": 0.5036698579788208, "learning_rate": 6.481207596902804e-06, "loss": 0.0619, "step": 39610 }, { "epoch": 0.7014924712330708, "grad_norm": 0.7003021240234375, "learning_rate": 6.480499458487837e-06, "loss": 0.0974, "step": 39611 }, { "epoch": 0.7015101807700992, "grad_norm": 0.7084322571754456, "learning_rate": 6.479791348101161e-06, "loss": 0.0528, "step": 39612 }, { "epoch": 0.7015278903071277, "grad_norm": 0.4175727665424347, "learning_rate": 6.479083265745108e-06, "loss": 0.0378, "step": 39613 }, { "epoch": 0.7015455998441561, "grad_norm": 0.38897284865379333, "learning_rate": 6.478375211422012e-06, "loss": 0.0465, "step": 39614 }, { "epoch": 0.7015633093811845, "grad_norm": 0.5239073038101196, "learning_rate": 6.477667185134198e-06, "loss": 0.0574, "step": 39615 }, { "epoch": 0.7015810189182129, "grad_norm": 0.628513514995575, "learning_rate": 6.476959186883996e-06, "loss": 0.0423, "step": 39616 }, { "epoch": 0.7015987284552414, "grad_norm": 0.6314923167228699, "learning_rate": 6.476251216673738e-06, "loss": 0.0351, "step": 39617 }, { "epoch": 0.7016164379922698, "grad_norm": 0.4934138357639313, "learning_rate": 6.475543274505755e-06, "loss": 0.0412, "step": 39618 }, { "epoch": 0.7016341475292982, "grad_norm": 0.4975827634334564, "learning_rate": 6.474835360382369e-06, "loss": 0.0507, "step": 39619 }, { "epoch": 0.7016518570663266, "grad_norm": 0.317043274641037, "learning_rate": 6.474127474305911e-06, "loss": 0.0471, "step": 39620 }, { "epoch": 0.7016695666033551, "grad_norm": 0.5148954391479492, "learning_rate": 6.4734196162787125e-06, "loss": 0.0355, "step": 39621 }, { "epoch": 0.7016872761403835, "grad_norm": 0.6413252949714661, "learning_rate": 6.472711786303105e-06, "loss": 0.0631, "step": 39622 }, { "epoch": 0.7017049856774119, "grad_norm": 0.7219206690788269, "learning_rate": 6.472003984381408e-06, "loss": 0.0852, "step": 39623 }, { "epoch": 0.7017226952144403, "grad_norm": 0.47080639004707336, "learning_rate": 6.4712962105159555e-06, "loss": 0.0615, "step": 39624 }, { "epoch": 0.7017404047514688, "grad_norm": 0.7033747434616089, "learning_rate": 6.470588464709077e-06, "loss": 0.0816, "step": 39625 }, { "epoch": 0.7017581142884972, "grad_norm": 0.8264428377151489, "learning_rate": 6.469880746963098e-06, "loss": 0.0691, "step": 39626 }, { "epoch": 0.7017758238255256, "grad_norm": 0.8794276118278503, "learning_rate": 6.46917305728035e-06, "loss": 0.0573, "step": 39627 }, { "epoch": 0.7017935333625541, "grad_norm": 0.7093485593795776, "learning_rate": 6.46846539566316e-06, "loss": 0.0548, "step": 39628 }, { "epoch": 0.7018112428995825, "grad_norm": 0.3305262327194214, "learning_rate": 6.467757762113861e-06, "loss": 0.0866, "step": 39629 }, { "epoch": 0.7018289524366109, "grad_norm": 0.41840502619743347, "learning_rate": 6.467050156634769e-06, "loss": 0.0665, "step": 39630 }, { "epoch": 0.7018466619736393, "grad_norm": 0.4107533395290375, "learning_rate": 6.466342579228221e-06, "loss": 0.0496, "step": 39631 }, { "epoch": 0.7018643715106678, "grad_norm": 0.6120606064796448, "learning_rate": 6.4656350298965415e-06, "loss": 0.0379, "step": 39632 }, { "epoch": 0.7018820810476962, "grad_norm": 0.7197721004486084, "learning_rate": 6.4649275086420645e-06, "loss": 0.0797, "step": 39633 }, { "epoch": 0.7018997905847246, "grad_norm": 0.6119450330734253, "learning_rate": 6.464220015467109e-06, "loss": 0.0614, "step": 39634 }, { "epoch": 0.701917500121753, "grad_norm": 0.8917809724807739, "learning_rate": 6.463512550374e-06, "loss": 0.0608, "step": 39635 }, { "epoch": 0.7019352096587815, "grad_norm": 0.5785423517227173, "learning_rate": 6.462805113365083e-06, "loss": 0.0495, "step": 39636 }, { "epoch": 0.7019529191958099, "grad_norm": 0.3426559269428253, "learning_rate": 6.462097704442666e-06, "loss": 0.0693, "step": 39637 }, { "epoch": 0.7019706287328383, "grad_norm": 0.7854596376419067, "learning_rate": 6.4613903236090855e-06, "loss": 0.0829, "step": 39638 }, { "epoch": 0.7019883382698667, "grad_norm": 1.022807240486145, "learning_rate": 6.460682970866668e-06, "loss": 0.07, "step": 39639 }, { "epoch": 0.7020060478068952, "grad_norm": 0.33377236127853394, "learning_rate": 6.459975646217744e-06, "loss": 0.0594, "step": 39640 }, { "epoch": 0.7020237573439236, "grad_norm": 0.48921895027160645, "learning_rate": 6.459268349664631e-06, "loss": 0.0522, "step": 39641 }, { "epoch": 0.702041466880952, "grad_norm": 0.7195255756378174, "learning_rate": 6.458561081209662e-06, "loss": 0.0678, "step": 39642 }, { "epoch": 0.7020591764179805, "grad_norm": 0.5475554466247559, "learning_rate": 6.457853840855163e-06, "loss": 0.0814, "step": 39643 }, { "epoch": 0.702076885955009, "grad_norm": 0.5733623504638672, "learning_rate": 6.457146628603461e-06, "loss": 0.0502, "step": 39644 }, { "epoch": 0.7020945954920373, "grad_norm": 0.7716367840766907, "learning_rate": 6.456439444456882e-06, "loss": 0.0769, "step": 39645 }, { "epoch": 0.7021123050290657, "grad_norm": 0.7766889333724976, "learning_rate": 6.4557322884177525e-06, "loss": 0.0545, "step": 39646 }, { "epoch": 0.7021300145660943, "grad_norm": 0.38974764943122864, "learning_rate": 6.455025160488407e-06, "loss": 0.0678, "step": 39647 }, { "epoch": 0.7021477241031227, "grad_norm": 0.5483641028404236, "learning_rate": 6.454318060671158e-06, "loss": 0.0552, "step": 39648 }, { "epoch": 0.702165433640151, "grad_norm": 0.7247289419174194, "learning_rate": 6.453610988968338e-06, "loss": 0.0644, "step": 39649 }, { "epoch": 0.7021831431771794, "grad_norm": 0.7205291986465454, "learning_rate": 6.452903945382279e-06, "loss": 0.0656, "step": 39650 }, { "epoch": 0.702200852714208, "grad_norm": 0.6642991304397583, "learning_rate": 6.452196929915295e-06, "loss": 0.0676, "step": 39651 }, { "epoch": 0.7022185622512364, "grad_norm": 0.6536606550216675, "learning_rate": 6.45148994256972e-06, "loss": 0.0595, "step": 39652 }, { "epoch": 0.7022362717882648, "grad_norm": 0.6769980192184448, "learning_rate": 6.450782983347877e-06, "loss": 0.072, "step": 39653 }, { "epoch": 0.7022539813252932, "grad_norm": 0.7317068576812744, "learning_rate": 6.450076052252095e-06, "loss": 0.0791, "step": 39654 }, { "epoch": 0.7022716908623217, "grad_norm": 0.6606354117393494, "learning_rate": 6.449369149284698e-06, "loss": 0.0316, "step": 39655 }, { "epoch": 0.7022894003993501, "grad_norm": 0.4280797243118286, "learning_rate": 6.44866227444801e-06, "loss": 0.0507, "step": 39656 }, { "epoch": 0.7023071099363785, "grad_norm": 0.5965081453323364, "learning_rate": 6.447955427744365e-06, "loss": 0.0482, "step": 39657 }, { "epoch": 0.702324819473407, "grad_norm": 0.5381085276603699, "learning_rate": 6.447248609176075e-06, "loss": 0.0364, "step": 39658 }, { "epoch": 0.7023425290104354, "grad_norm": 0.4325128495693207, "learning_rate": 6.4465418187454744e-06, "loss": 0.0681, "step": 39659 }, { "epoch": 0.7023602385474638, "grad_norm": 0.6897631883621216, "learning_rate": 6.445835056454884e-06, "loss": 0.0593, "step": 39660 }, { "epoch": 0.7023779480844922, "grad_norm": 0.4437943696975708, "learning_rate": 6.445128322306637e-06, "loss": 0.0634, "step": 39661 }, { "epoch": 0.7023956576215207, "grad_norm": 0.5260405540466309, "learning_rate": 6.4444216163030465e-06, "loss": 0.0531, "step": 39662 }, { "epoch": 0.7024133671585491, "grad_norm": 0.6532081961631775, "learning_rate": 6.443714938446444e-06, "loss": 0.1098, "step": 39663 }, { "epoch": 0.7024310766955775, "grad_norm": 0.8467013239860535, "learning_rate": 6.4430082887391484e-06, "loss": 0.0671, "step": 39664 }, { "epoch": 0.7024487862326059, "grad_norm": 0.566839873790741, "learning_rate": 6.442301667183502e-06, "loss": 0.0706, "step": 39665 }, { "epoch": 0.7024664957696344, "grad_norm": 1.0121090412139893, "learning_rate": 6.441595073781811e-06, "loss": 0.1163, "step": 39666 }, { "epoch": 0.7024842053066628, "grad_norm": 0.5389501452445984, "learning_rate": 6.440888508536407e-06, "loss": 0.0547, "step": 39667 }, { "epoch": 0.7025019148436912, "grad_norm": 0.358783096075058, "learning_rate": 6.440181971449619e-06, "loss": 0.068, "step": 39668 }, { "epoch": 0.7025196243807196, "grad_norm": 0.6529082655906677, "learning_rate": 6.439475462523763e-06, "loss": 0.0631, "step": 39669 }, { "epoch": 0.7025373339177481, "grad_norm": 1.0531624555587769, "learning_rate": 6.438768981761165e-06, "loss": 0.0781, "step": 39670 }, { "epoch": 0.7025550434547765, "grad_norm": 0.5268358588218689, "learning_rate": 6.43806252916415e-06, "loss": 0.0541, "step": 39671 }, { "epoch": 0.7025727529918049, "grad_norm": 0.7811648845672607, "learning_rate": 6.4373561047350454e-06, "loss": 0.1093, "step": 39672 }, { "epoch": 0.7025904625288334, "grad_norm": 0.5952511429786682, "learning_rate": 6.436649708476173e-06, "loss": 0.075, "step": 39673 }, { "epoch": 0.7026081720658618, "grad_norm": 0.6014955639839172, "learning_rate": 6.435943340389857e-06, "loss": 0.0496, "step": 39674 }, { "epoch": 0.7026258816028902, "grad_norm": 0.8117045760154724, "learning_rate": 6.435237000478425e-06, "loss": 0.0826, "step": 39675 }, { "epoch": 0.7026435911399186, "grad_norm": 0.5794278979301453, "learning_rate": 6.434530688744193e-06, "loss": 0.0644, "step": 39676 }, { "epoch": 0.7026613006769471, "grad_norm": 0.6571087837219238, "learning_rate": 6.433824405189489e-06, "loss": 0.0586, "step": 39677 }, { "epoch": 0.7026790102139755, "grad_norm": 0.4855630099773407, "learning_rate": 6.433118149816636e-06, "loss": 0.0646, "step": 39678 }, { "epoch": 0.7026967197510039, "grad_norm": 0.6496211886405945, "learning_rate": 6.432411922627962e-06, "loss": 0.0566, "step": 39679 }, { "epoch": 0.7027144292880323, "grad_norm": 0.5298705697059631, "learning_rate": 6.431705723625782e-06, "loss": 0.0657, "step": 39680 }, { "epoch": 0.7027321388250608, "grad_norm": 0.5738831162452698, "learning_rate": 6.430999552812422e-06, "loss": 0.0525, "step": 39681 }, { "epoch": 0.7027498483620892, "grad_norm": 0.6242691874504089, "learning_rate": 6.4302934101902086e-06, "loss": 0.0575, "step": 39682 }, { "epoch": 0.7027675578991176, "grad_norm": 0.39711830019950867, "learning_rate": 6.429587295761463e-06, "loss": 0.0505, "step": 39683 }, { "epoch": 0.702785267436146, "grad_norm": 0.41330835223197937, "learning_rate": 6.428881209528509e-06, "loss": 0.0569, "step": 39684 }, { "epoch": 0.7028029769731745, "grad_norm": 0.34493255615234375, "learning_rate": 6.428175151493667e-06, "loss": 0.0387, "step": 39685 }, { "epoch": 0.7028206865102029, "grad_norm": 0.7466426491737366, "learning_rate": 6.427469121659268e-06, "loss": 0.0644, "step": 39686 }, { "epoch": 0.7028383960472313, "grad_norm": 0.9957925081253052, "learning_rate": 6.426763120027623e-06, "loss": 0.0704, "step": 39687 }, { "epoch": 0.7028561055842598, "grad_norm": 0.9652915596961975, "learning_rate": 6.42605714660106e-06, "loss": 0.0831, "step": 39688 }, { "epoch": 0.7028738151212882, "grad_norm": 0.3168172538280487, "learning_rate": 6.425351201381908e-06, "loss": 0.0265, "step": 39689 }, { "epoch": 0.7028915246583166, "grad_norm": 0.7746837139129639, "learning_rate": 6.424645284372478e-06, "loss": 0.0803, "step": 39690 }, { "epoch": 0.702909234195345, "grad_norm": 0.38886332511901855, "learning_rate": 6.423939395575097e-06, "loss": 0.0897, "step": 39691 }, { "epoch": 0.7029269437323735, "grad_norm": 0.6563060283660889, "learning_rate": 6.423233534992087e-06, "loss": 0.0547, "step": 39692 }, { "epoch": 0.7029446532694019, "grad_norm": 0.9784902334213257, "learning_rate": 6.4225277026257725e-06, "loss": 0.0791, "step": 39693 }, { "epoch": 0.7029623628064303, "grad_norm": 0.6416762471199036, "learning_rate": 6.421821898478473e-06, "loss": 0.0763, "step": 39694 }, { "epoch": 0.7029800723434587, "grad_norm": 0.4732263684272766, "learning_rate": 6.421116122552513e-06, "loss": 0.0624, "step": 39695 }, { "epoch": 0.7029977818804872, "grad_norm": 0.9459070563316345, "learning_rate": 6.420410374850219e-06, "loss": 0.0793, "step": 39696 }, { "epoch": 0.7030154914175156, "grad_norm": 0.623920202255249, "learning_rate": 6.419704655373902e-06, "loss": 0.0708, "step": 39697 }, { "epoch": 0.703033200954544, "grad_norm": 0.3812318742275238, "learning_rate": 6.418998964125888e-06, "loss": 0.052, "step": 39698 }, { "epoch": 0.7030509104915724, "grad_norm": 0.5891721248626709, "learning_rate": 6.418293301108498e-06, "loss": 0.0674, "step": 39699 }, { "epoch": 0.7030686200286009, "grad_norm": 0.41421493887901306, "learning_rate": 6.417587666324063e-06, "loss": 0.0628, "step": 39700 }, { "epoch": 0.7030863295656293, "grad_norm": 0.587013304233551, "learning_rate": 6.416882059774883e-06, "loss": 0.0804, "step": 39701 }, { "epoch": 0.7031040391026577, "grad_norm": 0.768485426902771, "learning_rate": 6.416176481463301e-06, "loss": 0.0567, "step": 39702 }, { "epoch": 0.7031217486396862, "grad_norm": 0.5522174835205078, "learning_rate": 6.4154709313916294e-06, "loss": 0.0633, "step": 39703 }, { "epoch": 0.7031394581767146, "grad_norm": 0.7056342363357544, "learning_rate": 6.414765409562195e-06, "loss": 0.0817, "step": 39704 }, { "epoch": 0.703157167713743, "grad_norm": 0.29825669527053833, "learning_rate": 6.41405991597731e-06, "loss": 0.034, "step": 39705 }, { "epoch": 0.7031748772507714, "grad_norm": 0.6078483462333679, "learning_rate": 6.413354450639297e-06, "loss": 0.0471, "step": 39706 }, { "epoch": 0.7031925867878, "grad_norm": 0.34350141882896423, "learning_rate": 6.412649013550488e-06, "loss": 0.0392, "step": 39707 }, { "epoch": 0.7032102963248283, "grad_norm": 0.7888194918632507, "learning_rate": 6.411943604713186e-06, "loss": 0.056, "step": 39708 }, { "epoch": 0.7032280058618567, "grad_norm": 0.6302193999290466, "learning_rate": 6.4112382241297225e-06, "loss": 0.07, "step": 39709 }, { "epoch": 0.7032457153988851, "grad_norm": 0.5820402503013611, "learning_rate": 6.410532871802417e-06, "loss": 0.0639, "step": 39710 }, { "epoch": 0.7032634249359137, "grad_norm": 0.5497584939002991, "learning_rate": 6.40982754773359e-06, "loss": 0.076, "step": 39711 }, { "epoch": 0.703281134472942, "grad_norm": 0.7718160152435303, "learning_rate": 6.409122251925562e-06, "loss": 0.0589, "step": 39712 }, { "epoch": 0.7032988440099704, "grad_norm": 0.5293391346931458, "learning_rate": 6.408416984380652e-06, "loss": 0.0751, "step": 39713 }, { "epoch": 0.7033165535469988, "grad_norm": 0.5973993539810181, "learning_rate": 6.407711745101185e-06, "loss": 0.0299, "step": 39714 }, { "epoch": 0.7033342630840274, "grad_norm": 0.3810260593891144, "learning_rate": 6.407006534089474e-06, "loss": 0.037, "step": 39715 }, { "epoch": 0.7033519726210558, "grad_norm": 0.9083348512649536, "learning_rate": 6.406301351347841e-06, "loss": 0.0541, "step": 39716 }, { "epoch": 0.7033696821580842, "grad_norm": 0.4678337574005127, "learning_rate": 6.405596196878609e-06, "loss": 0.0597, "step": 39717 }, { "epoch": 0.7033873916951127, "grad_norm": 0.6772860884666443, "learning_rate": 6.4048910706841005e-06, "loss": 0.0692, "step": 39718 }, { "epoch": 0.7034051012321411, "grad_norm": 0.34397464990615845, "learning_rate": 6.404185972766626e-06, "loss": 0.0502, "step": 39719 }, { "epoch": 0.7034228107691695, "grad_norm": 0.691006600856781, "learning_rate": 6.403480903128511e-06, "loss": 0.053, "step": 39720 }, { "epoch": 0.7034405203061979, "grad_norm": 0.37057068943977356, "learning_rate": 6.402775861772072e-06, "loss": 0.0741, "step": 39721 }, { "epoch": 0.7034582298432264, "grad_norm": 0.6427124738693237, "learning_rate": 6.402070848699632e-06, "loss": 0.0772, "step": 39722 }, { "epoch": 0.7034759393802548, "grad_norm": 0.5773026347160339, "learning_rate": 6.4013658639135095e-06, "loss": 0.0632, "step": 39723 }, { "epoch": 0.7034936489172832, "grad_norm": 0.4994072914123535, "learning_rate": 6.400660907416025e-06, "loss": 0.0568, "step": 39724 }, { "epoch": 0.7035113584543116, "grad_norm": 0.7524003982543945, "learning_rate": 6.3999559792094986e-06, "loss": 0.0782, "step": 39725 }, { "epoch": 0.7035290679913401, "grad_norm": 0.5110660195350647, "learning_rate": 6.399251079296244e-06, "loss": 0.0484, "step": 39726 }, { "epoch": 0.7035467775283685, "grad_norm": 1.1058428287506104, "learning_rate": 6.398546207678584e-06, "loss": 0.0771, "step": 39727 }, { "epoch": 0.7035644870653969, "grad_norm": 1.0634071826934814, "learning_rate": 6.39784136435884e-06, "loss": 0.044, "step": 39728 }, { "epoch": 0.7035821966024253, "grad_norm": 0.4260866045951843, "learning_rate": 6.397136549339324e-06, "loss": 0.063, "step": 39729 }, { "epoch": 0.7035999061394538, "grad_norm": 0.08335034549236298, "learning_rate": 6.396431762622353e-06, "loss": 0.0369, "step": 39730 }, { "epoch": 0.7036176156764822, "grad_norm": 0.4442332983016968, "learning_rate": 6.395727004210258e-06, "loss": 0.051, "step": 39731 }, { "epoch": 0.7036353252135106, "grad_norm": 0.9086753726005554, "learning_rate": 6.395022274105354e-06, "loss": 0.0776, "step": 39732 }, { "epoch": 0.7036530347505391, "grad_norm": 0.450440913438797, "learning_rate": 6.394317572309953e-06, "loss": 0.0539, "step": 39733 }, { "epoch": 0.7036707442875675, "grad_norm": 0.7466984391212463, "learning_rate": 6.393612898826375e-06, "loss": 0.0421, "step": 39734 }, { "epoch": 0.7036884538245959, "grad_norm": 0.4522649347782135, "learning_rate": 6.392908253656941e-06, "loss": 0.0511, "step": 39735 }, { "epoch": 0.7037061633616243, "grad_norm": 0.687860369682312, "learning_rate": 6.392203636803974e-06, "loss": 0.0849, "step": 39736 }, { "epoch": 0.7037238728986528, "grad_norm": 0.654833972454071, "learning_rate": 6.39149904826978e-06, "loss": 0.0454, "step": 39737 }, { "epoch": 0.7037415824356812, "grad_norm": 0.46309974789619446, "learning_rate": 6.3907944880566845e-06, "loss": 0.0468, "step": 39738 }, { "epoch": 0.7037592919727096, "grad_norm": 0.6820931434631348, "learning_rate": 6.390089956167003e-06, "loss": 0.0732, "step": 39739 }, { "epoch": 0.703777001509738, "grad_norm": 0.5540982484817505, "learning_rate": 6.3893854526030565e-06, "loss": 0.0493, "step": 39740 }, { "epoch": 0.7037947110467665, "grad_norm": 0.6538123488426208, "learning_rate": 6.38868097736716e-06, "loss": 0.0517, "step": 39741 }, { "epoch": 0.7038124205837949, "grad_norm": 0.455041766166687, "learning_rate": 6.387976530461632e-06, "loss": 0.0308, "step": 39742 }, { "epoch": 0.7038301301208233, "grad_norm": 1.0236904621124268, "learning_rate": 6.387272111888796e-06, "loss": 0.0816, "step": 39743 }, { "epoch": 0.7038478396578517, "grad_norm": 0.6510522365570068, "learning_rate": 6.3865677216509585e-06, "loss": 0.0582, "step": 39744 }, { "epoch": 0.7038655491948802, "grad_norm": 1.041141390800476, "learning_rate": 6.38586335975044e-06, "loss": 0.0637, "step": 39745 }, { "epoch": 0.7038832587319086, "grad_norm": 0.6719967126846313, "learning_rate": 6.3851590261895665e-06, "loss": 0.0606, "step": 39746 }, { "epoch": 0.703900968268937, "grad_norm": 0.542384922504425, "learning_rate": 6.3844547209706435e-06, "loss": 0.0532, "step": 39747 }, { "epoch": 0.7039186778059655, "grad_norm": 0.5959051251411438, "learning_rate": 6.383750444095993e-06, "loss": 0.0498, "step": 39748 }, { "epoch": 0.7039363873429939, "grad_norm": 0.4518336355686188, "learning_rate": 6.383046195567931e-06, "loss": 0.0336, "step": 39749 }, { "epoch": 0.7039540968800223, "grad_norm": 0.3550601601600647, "learning_rate": 6.3823419753887765e-06, "loss": 0.0419, "step": 39750 }, { "epoch": 0.7039718064170507, "grad_norm": 0.555327296257019, "learning_rate": 6.3816377835608455e-06, "loss": 0.0539, "step": 39751 }, { "epoch": 0.7039895159540792, "grad_norm": 0.4332369267940521, "learning_rate": 6.380933620086455e-06, "loss": 0.0847, "step": 39752 }, { "epoch": 0.7040072254911076, "grad_norm": 0.629793107509613, "learning_rate": 6.380229484967924e-06, "loss": 0.0543, "step": 39753 }, { "epoch": 0.704024935028136, "grad_norm": 0.21368123590946198, "learning_rate": 6.379525378207562e-06, "loss": 0.0339, "step": 39754 }, { "epoch": 0.7040426445651644, "grad_norm": 0.5546008944511414, "learning_rate": 6.37882129980769e-06, "loss": 0.063, "step": 39755 }, { "epoch": 0.7040603541021929, "grad_norm": 0.7119101285934448, "learning_rate": 6.378117249770625e-06, "loss": 0.0828, "step": 39756 }, { "epoch": 0.7040780636392213, "grad_norm": 0.3478124439716339, "learning_rate": 6.377413228098686e-06, "loss": 0.0616, "step": 39757 }, { "epoch": 0.7040957731762497, "grad_norm": 0.8510372638702393, "learning_rate": 6.3767092347941795e-06, "loss": 0.0694, "step": 39758 }, { "epoch": 0.7041134827132781, "grad_norm": 0.6102730631828308, "learning_rate": 6.376005269859424e-06, "loss": 0.0476, "step": 39759 }, { "epoch": 0.7041311922503066, "grad_norm": 0.516756534576416, "learning_rate": 6.375301333296749e-06, "loss": 0.0651, "step": 39760 }, { "epoch": 0.704148901787335, "grad_norm": 0.6223406791687012, "learning_rate": 6.3745974251084535e-06, "loss": 0.0424, "step": 39761 }, { "epoch": 0.7041666113243634, "grad_norm": 0.4722362756729126, "learning_rate": 6.373893545296862e-06, "loss": 0.0609, "step": 39762 }, { "epoch": 0.7041843208613919, "grad_norm": 0.7341411113739014, "learning_rate": 6.373189693864288e-06, "loss": 0.0659, "step": 39763 }, { "epoch": 0.7042020303984203, "grad_norm": 0.41528815031051636, "learning_rate": 6.372485870813051e-06, "loss": 0.0328, "step": 39764 }, { "epoch": 0.7042197399354487, "grad_norm": 0.5780811309814453, "learning_rate": 6.37178207614546e-06, "loss": 0.0412, "step": 39765 }, { "epoch": 0.7042374494724771, "grad_norm": 0.5610603094100952, "learning_rate": 6.3710783098638295e-06, "loss": 0.0609, "step": 39766 }, { "epoch": 0.7042551590095056, "grad_norm": 0.42347320914268494, "learning_rate": 6.370374571970482e-06, "loss": 0.049, "step": 39767 }, { "epoch": 0.704272868546534, "grad_norm": 0.8753451704978943, "learning_rate": 6.369670862467727e-06, "loss": 0.0826, "step": 39768 }, { "epoch": 0.7042905780835624, "grad_norm": 0.7244288325309753, "learning_rate": 6.368967181357884e-06, "loss": 0.0698, "step": 39769 }, { "epoch": 0.7043082876205908, "grad_norm": 0.5817925930023193, "learning_rate": 6.368263528643266e-06, "loss": 0.0538, "step": 39770 }, { "epoch": 0.7043259971576193, "grad_norm": 0.45024099946022034, "learning_rate": 6.367559904326191e-06, "loss": 0.0374, "step": 39771 }, { "epoch": 0.7043437066946477, "grad_norm": 0.43381384015083313, "learning_rate": 6.366856308408967e-06, "loss": 0.0487, "step": 39772 }, { "epoch": 0.7043614162316761, "grad_norm": 0.4532707631587982, "learning_rate": 6.366152740893913e-06, "loss": 0.0525, "step": 39773 }, { "epoch": 0.7043791257687045, "grad_norm": 0.5156146287918091, "learning_rate": 6.365449201783344e-06, "loss": 0.0401, "step": 39774 }, { "epoch": 0.704396835305733, "grad_norm": 0.48313191533088684, "learning_rate": 6.364745691079576e-06, "loss": 0.0763, "step": 39775 }, { "epoch": 0.7044145448427614, "grad_norm": 0.7316773533821106, "learning_rate": 6.364042208784919e-06, "loss": 0.0385, "step": 39776 }, { "epoch": 0.7044322543797898, "grad_norm": 0.5567077994346619, "learning_rate": 6.363338754901689e-06, "loss": 0.0815, "step": 39777 }, { "epoch": 0.7044499639168184, "grad_norm": 0.5244731903076172, "learning_rate": 6.362635329432199e-06, "loss": 0.0339, "step": 39778 }, { "epoch": 0.7044676734538468, "grad_norm": 0.6807780265808105, "learning_rate": 6.361931932378768e-06, "loss": 0.0512, "step": 39779 }, { "epoch": 0.7044853829908752, "grad_norm": 0.8583049178123474, "learning_rate": 6.361228563743706e-06, "loss": 0.1001, "step": 39780 }, { "epoch": 0.7045030925279036, "grad_norm": 0.43522512912750244, "learning_rate": 6.360525223529328e-06, "loss": 0.062, "step": 39781 }, { "epoch": 0.7045208020649321, "grad_norm": 0.6826587319374084, "learning_rate": 6.359821911737952e-06, "loss": 0.0698, "step": 39782 }, { "epoch": 0.7045385116019605, "grad_norm": 0.7182713747024536, "learning_rate": 6.359118628371885e-06, "loss": 0.0687, "step": 39783 }, { "epoch": 0.7045562211389889, "grad_norm": 0.6271999478340149, "learning_rate": 6.3584153734334434e-06, "loss": 0.0554, "step": 39784 }, { "epoch": 0.7045739306760173, "grad_norm": 0.6571027636528015, "learning_rate": 6.357712146924945e-06, "loss": 0.0674, "step": 39785 }, { "epoch": 0.7045916402130458, "grad_norm": 0.6152338981628418, "learning_rate": 6.357008948848697e-06, "loss": 0.0482, "step": 39786 }, { "epoch": 0.7046093497500742, "grad_norm": 0.7740506529808044, "learning_rate": 6.356305779207013e-06, "loss": 0.0806, "step": 39787 }, { "epoch": 0.7046270592871026, "grad_norm": 0.33200567960739136, "learning_rate": 6.355602638002203e-06, "loss": 0.0491, "step": 39788 }, { "epoch": 0.704644768824131, "grad_norm": 0.7678813338279724, "learning_rate": 6.354899525236599e-06, "loss": 0.0571, "step": 39789 }, { "epoch": 0.7046624783611595, "grad_norm": 0.6319730281829834, "learning_rate": 6.354196440912493e-06, "loss": 0.0605, "step": 39790 }, { "epoch": 0.7046801878981879, "grad_norm": 0.4801434874534607, "learning_rate": 6.353493385032208e-06, "loss": 0.0469, "step": 39791 }, { "epoch": 0.7046978974352163, "grad_norm": 0.8670409321784973, "learning_rate": 6.352790357598062e-06, "loss": 0.0806, "step": 39792 }, { "epoch": 0.7047156069722448, "grad_norm": 0.6588661670684814, "learning_rate": 6.352087358612354e-06, "loss": 0.089, "step": 39793 }, { "epoch": 0.7047333165092732, "grad_norm": 0.8594353199005127, "learning_rate": 6.351384388077403e-06, "loss": 0.059, "step": 39794 }, { "epoch": 0.7047510260463016, "grad_norm": 0.8444380760192871, "learning_rate": 6.350681445995523e-06, "loss": 0.0738, "step": 39795 }, { "epoch": 0.70476873558333, "grad_norm": 0.3164198100566864, "learning_rate": 6.3499785323690275e-06, "loss": 0.0419, "step": 39796 }, { "epoch": 0.7047864451203585, "grad_norm": 0.881967306137085, "learning_rate": 6.349275647200227e-06, "loss": 0.0781, "step": 39797 }, { "epoch": 0.7048041546573869, "grad_norm": 0.6586143374443054, "learning_rate": 6.348572790491434e-06, "loss": 0.0545, "step": 39798 }, { "epoch": 0.7048218641944153, "grad_norm": 0.5399197936058044, "learning_rate": 6.347869962244968e-06, "loss": 0.0718, "step": 39799 }, { "epoch": 0.7048395737314437, "grad_norm": 0.5853137373924255, "learning_rate": 6.347167162463129e-06, "loss": 0.0693, "step": 39800 }, { "epoch": 0.7048572832684722, "grad_norm": 0.516421377658844, "learning_rate": 6.346464391148235e-06, "loss": 0.0518, "step": 39801 }, { "epoch": 0.7048749928055006, "grad_norm": 0.7121424078941345, "learning_rate": 6.345761648302598e-06, "loss": 0.0867, "step": 39802 }, { "epoch": 0.704892702342529, "grad_norm": 0.3538960814476013, "learning_rate": 6.345058933928535e-06, "loss": 0.085, "step": 39803 }, { "epoch": 0.7049104118795574, "grad_norm": 0.737504780292511, "learning_rate": 6.344356248028348e-06, "loss": 0.068, "step": 39804 }, { "epoch": 0.7049281214165859, "grad_norm": 0.7267654538154602, "learning_rate": 6.3436535906043524e-06, "loss": 0.0637, "step": 39805 }, { "epoch": 0.7049458309536143, "grad_norm": 0.2543688714504242, "learning_rate": 6.34295096165886e-06, "loss": 0.0532, "step": 39806 }, { "epoch": 0.7049635404906427, "grad_norm": 0.7217188477516174, "learning_rate": 6.342248361194185e-06, "loss": 0.0466, "step": 39807 }, { "epoch": 0.7049812500276712, "grad_norm": 0.7213400602340698, "learning_rate": 6.341545789212638e-06, "loss": 0.0469, "step": 39808 }, { "epoch": 0.7049989595646996, "grad_norm": 0.541465163230896, "learning_rate": 6.340843245716528e-06, "loss": 0.0607, "step": 39809 }, { "epoch": 0.705016669101728, "grad_norm": 0.45943504571914673, "learning_rate": 6.3401407307081736e-06, "loss": 0.0611, "step": 39810 }, { "epoch": 0.7050343786387564, "grad_norm": 0.602500855922699, "learning_rate": 6.339438244189874e-06, "loss": 0.0698, "step": 39811 }, { "epoch": 0.7050520881757849, "grad_norm": 0.7428397536277771, "learning_rate": 6.338735786163948e-06, "loss": 0.0657, "step": 39812 }, { "epoch": 0.7050697977128133, "grad_norm": 0.49420493841171265, "learning_rate": 6.338033356632706e-06, "loss": 0.0687, "step": 39813 }, { "epoch": 0.7050875072498417, "grad_norm": 0.46706706285476685, "learning_rate": 6.337330955598462e-06, "loss": 0.0283, "step": 39814 }, { "epoch": 0.7051052167868701, "grad_norm": 0.7362169027328491, "learning_rate": 6.336628583063519e-06, "loss": 0.0495, "step": 39815 }, { "epoch": 0.7051229263238986, "grad_norm": 0.7074771523475647, "learning_rate": 6.33592623903019e-06, "loss": 0.0592, "step": 39816 }, { "epoch": 0.705140635860927, "grad_norm": 0.4222288727760315, "learning_rate": 6.335223923500787e-06, "loss": 0.0498, "step": 39817 }, { "epoch": 0.7051583453979554, "grad_norm": 0.43458908796310425, "learning_rate": 6.334521636477622e-06, "loss": 0.0528, "step": 39818 }, { "epoch": 0.7051760549349838, "grad_norm": 0.601737916469574, "learning_rate": 6.333819377963005e-06, "loss": 0.0592, "step": 39819 }, { "epoch": 0.7051937644720123, "grad_norm": 0.7241094708442688, "learning_rate": 6.333117147959245e-06, "loss": 0.0489, "step": 39820 }, { "epoch": 0.7052114740090407, "grad_norm": 0.32567548751831055, "learning_rate": 6.332414946468658e-06, "loss": 0.0348, "step": 39821 }, { "epoch": 0.7052291835460691, "grad_norm": 0.33858755230903625, "learning_rate": 6.3317127734935444e-06, "loss": 0.0687, "step": 39822 }, { "epoch": 0.7052468930830976, "grad_norm": 0.9304535984992981, "learning_rate": 6.331010629036219e-06, "loss": 0.0308, "step": 39823 }, { "epoch": 0.705264602620126, "grad_norm": 0.6951011419296265, "learning_rate": 6.330308513098997e-06, "loss": 0.0503, "step": 39824 }, { "epoch": 0.7052823121571544, "grad_norm": 0.3238169252872467, "learning_rate": 6.329606425684172e-06, "loss": 0.0418, "step": 39825 }, { "epoch": 0.7053000216941828, "grad_norm": 0.569506824016571, "learning_rate": 6.32890436679407e-06, "loss": 0.0517, "step": 39826 }, { "epoch": 0.7053177312312113, "grad_norm": 0.7454495429992676, "learning_rate": 6.328202336430997e-06, "loss": 0.0598, "step": 39827 }, { "epoch": 0.7053354407682397, "grad_norm": 0.33337122201919556, "learning_rate": 6.327500334597266e-06, "loss": 0.0638, "step": 39828 }, { "epoch": 0.7053531503052681, "grad_norm": 0.8812172412872314, "learning_rate": 6.326798361295176e-06, "loss": 0.0699, "step": 39829 }, { "epoch": 0.7053708598422965, "grad_norm": 0.3408023715019226, "learning_rate": 6.326096416527042e-06, "loss": 0.051, "step": 39830 }, { "epoch": 0.705388569379325, "grad_norm": 0.5439027547836304, "learning_rate": 6.325394500295179e-06, "loss": 0.065, "step": 39831 }, { "epoch": 0.7054062789163534, "grad_norm": 0.2529206871986389, "learning_rate": 6.324692612601886e-06, "loss": 0.0383, "step": 39832 }, { "epoch": 0.7054239884533818, "grad_norm": 0.7858917713165283, "learning_rate": 6.323990753449476e-06, "loss": 0.0867, "step": 39833 }, { "epoch": 0.7054416979904102, "grad_norm": 0.5673664212226868, "learning_rate": 6.323288922840259e-06, "loss": 0.0392, "step": 39834 }, { "epoch": 0.7054594075274387, "grad_norm": 0.7017573118209839, "learning_rate": 6.322587120776545e-06, "loss": 0.0791, "step": 39835 }, { "epoch": 0.7054771170644671, "grad_norm": 0.7080127596855164, "learning_rate": 6.321885347260641e-06, "loss": 0.0816, "step": 39836 }, { "epoch": 0.7054948266014955, "grad_norm": 0.5481688976287842, "learning_rate": 6.321183602294856e-06, "loss": 0.0992, "step": 39837 }, { "epoch": 0.705512536138524, "grad_norm": 0.36889955401420593, "learning_rate": 6.320481885881498e-06, "loss": 0.068, "step": 39838 }, { "epoch": 0.7055302456755524, "grad_norm": 0.6948032975196838, "learning_rate": 6.319780198022884e-06, "loss": 0.0657, "step": 39839 }, { "epoch": 0.7055479552125808, "grad_norm": 0.39134031534194946, "learning_rate": 6.3190785387213104e-06, "loss": 0.0501, "step": 39840 }, { "epoch": 0.7055656647496092, "grad_norm": 0.7743735313415527, "learning_rate": 6.318376907979089e-06, "loss": 0.0473, "step": 39841 }, { "epoch": 0.7055833742866378, "grad_norm": 0.43181321024894714, "learning_rate": 6.3176753057985346e-06, "loss": 0.0677, "step": 39842 }, { "epoch": 0.7056010838236662, "grad_norm": 0.8342075347900391, "learning_rate": 6.316973732181947e-06, "loss": 0.0756, "step": 39843 }, { "epoch": 0.7056187933606946, "grad_norm": 0.6066938042640686, "learning_rate": 6.316272187131634e-06, "loss": 0.0423, "step": 39844 }, { "epoch": 0.705636502897723, "grad_norm": 0.7281510829925537, "learning_rate": 6.31557067064991e-06, "loss": 0.0777, "step": 39845 }, { "epoch": 0.7056542124347515, "grad_norm": 0.4968720078468323, "learning_rate": 6.31486918273908e-06, "loss": 0.0638, "step": 39846 }, { "epoch": 0.7056719219717799, "grad_norm": 0.63902348279953, "learning_rate": 6.31416772340145e-06, "loss": 0.0522, "step": 39847 }, { "epoch": 0.7056896315088083, "grad_norm": 0.48596927523612976, "learning_rate": 6.313466292639332e-06, "loss": 0.0651, "step": 39848 }, { "epoch": 0.7057073410458367, "grad_norm": 0.4953952133655548, "learning_rate": 6.312764890455035e-06, "loss": 0.0625, "step": 39849 }, { "epoch": 0.7057250505828652, "grad_norm": 0.7161059975624084, "learning_rate": 6.3120635168508585e-06, "loss": 0.0607, "step": 39850 }, { "epoch": 0.7057427601198936, "grad_norm": 0.38205358386039734, "learning_rate": 6.311362171829115e-06, "loss": 0.0444, "step": 39851 }, { "epoch": 0.705760469656922, "grad_norm": 0.6352834701538086, "learning_rate": 6.310660855392109e-06, "loss": 0.0433, "step": 39852 }, { "epoch": 0.7057781791939505, "grad_norm": 0.8050166368484497, "learning_rate": 6.309959567542156e-06, "loss": 0.0517, "step": 39853 }, { "epoch": 0.7057958887309789, "grad_norm": 0.48863357305526733, "learning_rate": 6.309258308281548e-06, "loss": 0.0654, "step": 39854 }, { "epoch": 0.7058135982680073, "grad_norm": 0.7493798732757568, "learning_rate": 6.308557077612605e-06, "loss": 0.0401, "step": 39855 }, { "epoch": 0.7058313078050357, "grad_norm": 0.34760305285453796, "learning_rate": 6.307855875537635e-06, "loss": 0.0521, "step": 39856 }, { "epoch": 0.7058490173420642, "grad_norm": 0.5721243023872375, "learning_rate": 6.307154702058935e-06, "loss": 0.0593, "step": 39857 }, { "epoch": 0.7058667268790926, "grad_norm": 0.9903073906898499, "learning_rate": 6.306453557178818e-06, "loss": 0.0768, "step": 39858 }, { "epoch": 0.705884436416121, "grad_norm": 0.9428129196166992, "learning_rate": 6.305752440899587e-06, "loss": 0.0473, "step": 39859 }, { "epoch": 0.7059021459531494, "grad_norm": 0.7294400334358215, "learning_rate": 6.305051353223559e-06, "loss": 0.0568, "step": 39860 }, { "epoch": 0.7059198554901779, "grad_norm": 0.35520651936531067, "learning_rate": 6.304350294153026e-06, "loss": 0.0405, "step": 39861 }, { "epoch": 0.7059375650272063, "grad_norm": 0.8233445286750793, "learning_rate": 6.303649263690301e-06, "loss": 0.0754, "step": 39862 }, { "epoch": 0.7059552745642347, "grad_norm": 0.8127570152282715, "learning_rate": 6.302948261837693e-06, "loss": 0.0702, "step": 39863 }, { "epoch": 0.7059729841012631, "grad_norm": 0.5353729128837585, "learning_rate": 6.302247288597503e-06, "loss": 0.078, "step": 39864 }, { "epoch": 0.7059906936382916, "grad_norm": 0.6716575622558594, "learning_rate": 6.301546343972039e-06, "loss": 0.0648, "step": 39865 }, { "epoch": 0.70600840317532, "grad_norm": 0.978437602519989, "learning_rate": 6.300845427963609e-06, "loss": 0.0783, "step": 39866 }, { "epoch": 0.7060261127123484, "grad_norm": 0.5744271278381348, "learning_rate": 6.300144540574523e-06, "loss": 0.0753, "step": 39867 }, { "epoch": 0.7060438222493769, "grad_norm": 0.31648287177085876, "learning_rate": 6.299443681807075e-06, "loss": 0.057, "step": 39868 }, { "epoch": 0.7060615317864053, "grad_norm": 0.47955524921417236, "learning_rate": 6.298742851663578e-06, "loss": 0.0725, "step": 39869 }, { "epoch": 0.7060792413234337, "grad_norm": 0.4648849368095398, "learning_rate": 6.298042050146342e-06, "loss": 0.0638, "step": 39870 }, { "epoch": 0.7060969508604621, "grad_norm": 0.6167379021644592, "learning_rate": 6.2973412772576636e-06, "loss": 0.0539, "step": 39871 }, { "epoch": 0.7061146603974906, "grad_norm": 0.46996811032295227, "learning_rate": 6.296640532999851e-06, "loss": 0.1075, "step": 39872 }, { "epoch": 0.706132369934519, "grad_norm": 0.6953121423721313, "learning_rate": 6.295939817375211e-06, "loss": 0.0469, "step": 39873 }, { "epoch": 0.7061500794715474, "grad_norm": 0.41858288645744324, "learning_rate": 6.295239130386049e-06, "loss": 0.0488, "step": 39874 }, { "epoch": 0.7061677890085758, "grad_norm": 0.6754555702209473, "learning_rate": 6.294538472034669e-06, "loss": 0.0662, "step": 39875 }, { "epoch": 0.7061854985456043, "grad_norm": 0.5386460423469543, "learning_rate": 6.293837842323378e-06, "loss": 0.0549, "step": 39876 }, { "epoch": 0.7062032080826327, "grad_norm": 0.4600350260734558, "learning_rate": 6.293137241254479e-06, "loss": 0.0627, "step": 39877 }, { "epoch": 0.7062209176196611, "grad_norm": 0.5235089063644409, "learning_rate": 6.292436668830283e-06, "loss": 0.0357, "step": 39878 }, { "epoch": 0.7062386271566895, "grad_norm": 0.4595321714878082, "learning_rate": 6.291736125053085e-06, "loss": 0.0573, "step": 39879 }, { "epoch": 0.706256336693718, "grad_norm": 0.6183146834373474, "learning_rate": 6.291035609925195e-06, "loss": 0.0559, "step": 39880 }, { "epoch": 0.7062740462307464, "grad_norm": 0.3429655134677887, "learning_rate": 6.290335123448922e-06, "loss": 0.0485, "step": 39881 }, { "epoch": 0.7062917557677748, "grad_norm": 0.6681839227676392, "learning_rate": 6.2896346656265585e-06, "loss": 0.0795, "step": 39882 }, { "epoch": 0.7063094653048033, "grad_norm": 0.7067566514015198, "learning_rate": 6.288934236460412e-06, "loss": 0.0497, "step": 39883 }, { "epoch": 0.7063271748418317, "grad_norm": 0.5013196468353271, "learning_rate": 6.288233835952796e-06, "loss": 0.0462, "step": 39884 }, { "epoch": 0.7063448843788601, "grad_norm": 0.38991865515708923, "learning_rate": 6.287533464106015e-06, "loss": 0.0693, "step": 39885 }, { "epoch": 0.7063625939158885, "grad_norm": 0.4126753509044647, "learning_rate": 6.2868331209223625e-06, "loss": 0.0566, "step": 39886 }, { "epoch": 0.706380303452917, "grad_norm": 0.6179187893867493, "learning_rate": 6.286132806404146e-06, "loss": 0.0605, "step": 39887 }, { "epoch": 0.7063980129899454, "grad_norm": 0.7572787404060364, "learning_rate": 6.2854325205536786e-06, "loss": 0.0581, "step": 39888 }, { "epoch": 0.7064157225269738, "grad_norm": 0.16281720995903015, "learning_rate": 6.28473226337325e-06, "loss": 0.0529, "step": 39889 }, { "epoch": 0.7064334320640022, "grad_norm": 0.6970878839492798, "learning_rate": 6.2840320348651716e-06, "loss": 0.0527, "step": 39890 }, { "epoch": 0.7064511416010307, "grad_norm": 0.5175470113754272, "learning_rate": 6.283331835031747e-06, "loss": 0.0389, "step": 39891 }, { "epoch": 0.7064688511380591, "grad_norm": 0.7842764258384705, "learning_rate": 6.282631663875278e-06, "loss": 0.0722, "step": 39892 }, { "epoch": 0.7064865606750875, "grad_norm": 0.6426679491996765, "learning_rate": 6.2819315213980685e-06, "loss": 0.0586, "step": 39893 }, { "epoch": 0.7065042702121159, "grad_norm": 0.6070583462715149, "learning_rate": 6.281231407602423e-06, "loss": 0.0416, "step": 39894 }, { "epoch": 0.7065219797491444, "grad_norm": 0.45340970158576965, "learning_rate": 6.280531322490651e-06, "loss": 0.0932, "step": 39895 }, { "epoch": 0.7065396892861728, "grad_norm": 0.7063738107681274, "learning_rate": 6.279831266065042e-06, "loss": 0.0883, "step": 39896 }, { "epoch": 0.7065573988232012, "grad_norm": 0.292553186416626, "learning_rate": 6.2791312383279065e-06, "loss": 0.0298, "step": 39897 }, { "epoch": 0.7065751083602297, "grad_norm": 0.4206133186817169, "learning_rate": 6.278431239281547e-06, "loss": 0.0465, "step": 39898 }, { "epoch": 0.7065928178972581, "grad_norm": 0.4904150366783142, "learning_rate": 6.277731268928273e-06, "loss": 0.0547, "step": 39899 }, { "epoch": 0.7066105274342865, "grad_norm": 0.6455556750297546, "learning_rate": 6.277031327270376e-06, "loss": 0.042, "step": 39900 }, { "epoch": 0.7066282369713149, "grad_norm": 0.7829788327217102, "learning_rate": 6.276331414310162e-06, "loss": 0.0486, "step": 39901 }, { "epoch": 0.7066459465083434, "grad_norm": 0.6264135837554932, "learning_rate": 6.2756315300499355e-06, "loss": 0.0831, "step": 39902 }, { "epoch": 0.7066636560453718, "grad_norm": 0.8027299046516418, "learning_rate": 6.274931674491998e-06, "loss": 0.0725, "step": 39903 }, { "epoch": 0.7066813655824002, "grad_norm": 0.844944179058075, "learning_rate": 6.274231847638656e-06, "loss": 0.0676, "step": 39904 }, { "epoch": 0.7066990751194286, "grad_norm": 0.7535666227340698, "learning_rate": 6.273532049492206e-06, "loss": 0.0624, "step": 39905 }, { "epoch": 0.7067167846564572, "grad_norm": 0.5688305497169495, "learning_rate": 6.272832280054959e-06, "loss": 0.0713, "step": 39906 }, { "epoch": 0.7067344941934856, "grad_norm": 0.7594125270843506, "learning_rate": 6.272132539329205e-06, "loss": 0.0652, "step": 39907 }, { "epoch": 0.706752203730514, "grad_norm": 0.6300367116928101, "learning_rate": 6.271432827317254e-06, "loss": 0.0458, "step": 39908 }, { "epoch": 0.7067699132675423, "grad_norm": 0.27220088243484497, "learning_rate": 6.270733144021404e-06, "loss": 0.0574, "step": 39909 }, { "epoch": 0.7067876228045709, "grad_norm": 0.6516351103782654, "learning_rate": 6.270033489443967e-06, "loss": 0.0488, "step": 39910 }, { "epoch": 0.7068053323415993, "grad_norm": 0.7292259335517883, "learning_rate": 6.2693338635872284e-06, "loss": 0.0919, "step": 39911 }, { "epoch": 0.7068230418786277, "grad_norm": 0.38327714800834656, "learning_rate": 6.268634266453496e-06, "loss": 0.0501, "step": 39912 }, { "epoch": 0.7068407514156562, "grad_norm": 0.4142756760120392, "learning_rate": 6.2679346980450825e-06, "loss": 0.0385, "step": 39913 }, { "epoch": 0.7068584609526846, "grad_norm": 0.8355838060379028, "learning_rate": 6.267235158364276e-06, "loss": 0.0661, "step": 39914 }, { "epoch": 0.706876170489713, "grad_norm": 1.08291757106781, "learning_rate": 6.266535647413382e-06, "loss": 0.0604, "step": 39915 }, { "epoch": 0.7068938800267414, "grad_norm": 0.7139853835105896, "learning_rate": 6.265836165194702e-06, "loss": 0.0691, "step": 39916 }, { "epoch": 0.7069115895637699, "grad_norm": 0.4173634946346283, "learning_rate": 6.265136711710545e-06, "loss": 0.0384, "step": 39917 }, { "epoch": 0.7069292991007983, "grad_norm": 0.7362635135650635, "learning_rate": 6.264437286963198e-06, "loss": 0.0786, "step": 39918 }, { "epoch": 0.7069470086378267, "grad_norm": 0.5704929232597351, "learning_rate": 6.263737890954968e-06, "loss": 0.0574, "step": 39919 }, { "epoch": 0.7069647181748551, "grad_norm": 0.6423183083534241, "learning_rate": 6.263038523688158e-06, "loss": 0.07, "step": 39920 }, { "epoch": 0.7069824277118836, "grad_norm": 0.6236405372619629, "learning_rate": 6.262339185165067e-06, "loss": 0.0517, "step": 39921 }, { "epoch": 0.707000137248912, "grad_norm": 0.5030789971351624, "learning_rate": 6.261639875387997e-06, "loss": 0.0399, "step": 39922 }, { "epoch": 0.7070178467859404, "grad_norm": 0.579418420791626, "learning_rate": 6.260940594359247e-06, "loss": 0.0579, "step": 39923 }, { "epoch": 0.7070355563229688, "grad_norm": 0.6139586567878723, "learning_rate": 6.260241342081125e-06, "loss": 0.0417, "step": 39924 }, { "epoch": 0.7070532658599973, "grad_norm": 0.46639373898506165, "learning_rate": 6.259542118555918e-06, "loss": 0.0448, "step": 39925 }, { "epoch": 0.7070709753970257, "grad_norm": 0.4832010269165039, "learning_rate": 6.258842923785935e-06, "loss": 0.0548, "step": 39926 }, { "epoch": 0.7070886849340541, "grad_norm": 0.7376282811164856, "learning_rate": 6.258143757773479e-06, "loss": 0.0636, "step": 39927 }, { "epoch": 0.7071063944710826, "grad_norm": 0.6150373816490173, "learning_rate": 6.2574446205208415e-06, "loss": 0.069, "step": 39928 }, { "epoch": 0.707124104008111, "grad_norm": 0.622333824634552, "learning_rate": 6.256745512030326e-06, "loss": 0.0389, "step": 39929 }, { "epoch": 0.7071418135451394, "grad_norm": 0.8961464166641235, "learning_rate": 6.256046432304234e-06, "loss": 0.096, "step": 39930 }, { "epoch": 0.7071595230821678, "grad_norm": 0.5023120045661926, "learning_rate": 6.255347381344863e-06, "loss": 0.0306, "step": 39931 }, { "epoch": 0.7071772326191963, "grad_norm": 0.6854907274246216, "learning_rate": 6.2546483591545174e-06, "loss": 0.0752, "step": 39932 }, { "epoch": 0.7071949421562247, "grad_norm": 0.8094066977500916, "learning_rate": 6.253949365735493e-06, "loss": 0.0986, "step": 39933 }, { "epoch": 0.7072126516932531, "grad_norm": 0.5479879379272461, "learning_rate": 6.253250401090095e-06, "loss": 0.0555, "step": 39934 }, { "epoch": 0.7072303612302815, "grad_norm": 1.051790714263916, "learning_rate": 6.2525514652206145e-06, "loss": 0.0661, "step": 39935 }, { "epoch": 0.70724807076731, "grad_norm": 0.3832766115665436, "learning_rate": 6.251852558129355e-06, "loss": 0.0783, "step": 39936 }, { "epoch": 0.7072657803043384, "grad_norm": 0.474103718996048, "learning_rate": 6.251153679818616e-06, "loss": 0.0718, "step": 39937 }, { "epoch": 0.7072834898413668, "grad_norm": 0.3608272969722748, "learning_rate": 6.2504548302906996e-06, "loss": 0.0572, "step": 39938 }, { "epoch": 0.7073011993783952, "grad_norm": 0.39234471321105957, "learning_rate": 6.249756009547899e-06, "loss": 0.055, "step": 39939 }, { "epoch": 0.7073189089154237, "grad_norm": 0.5420281291007996, "learning_rate": 6.249057217592515e-06, "loss": 0.081, "step": 39940 }, { "epoch": 0.7073366184524521, "grad_norm": 0.5781377553939819, "learning_rate": 6.248358454426842e-06, "loss": 0.0625, "step": 39941 }, { "epoch": 0.7073543279894805, "grad_norm": 0.8282554745674133, "learning_rate": 6.247659720053196e-06, "loss": 0.0973, "step": 39942 }, { "epoch": 0.707372037526509, "grad_norm": 0.582241952419281, "learning_rate": 6.246961014473859e-06, "loss": 0.046, "step": 39943 }, { "epoch": 0.7073897470635374, "grad_norm": 0.5914591550827026, "learning_rate": 6.246262337691134e-06, "loss": 0.0656, "step": 39944 }, { "epoch": 0.7074074566005658, "grad_norm": 0.9717073440551758, "learning_rate": 6.245563689707327e-06, "loss": 0.0846, "step": 39945 }, { "epoch": 0.7074251661375942, "grad_norm": 0.7079761624336243, "learning_rate": 6.244865070524724e-06, "loss": 0.0753, "step": 39946 }, { "epoch": 0.7074428756746227, "grad_norm": 0.9616879820823669, "learning_rate": 6.244166480145629e-06, "loss": 0.0656, "step": 39947 }, { "epoch": 0.7074605852116511, "grad_norm": 0.8880983591079712, "learning_rate": 6.2434679185723405e-06, "loss": 0.0986, "step": 39948 }, { "epoch": 0.7074782947486795, "grad_norm": 0.42527535557746887, "learning_rate": 6.242769385807158e-06, "loss": 0.0767, "step": 39949 }, { "epoch": 0.7074960042857079, "grad_norm": 0.39397525787353516, "learning_rate": 6.242070881852378e-06, "loss": 0.0438, "step": 39950 }, { "epoch": 0.7075137138227364, "grad_norm": 0.7524974346160889, "learning_rate": 6.241372406710298e-06, "loss": 0.0612, "step": 39951 }, { "epoch": 0.7075314233597648, "grad_norm": 0.2624579966068268, "learning_rate": 6.240673960383224e-06, "loss": 0.051, "step": 39952 }, { "epoch": 0.7075491328967932, "grad_norm": 0.7388391494750977, "learning_rate": 6.23997554287344e-06, "loss": 0.0617, "step": 39953 }, { "epoch": 0.7075668424338216, "grad_norm": 0.9832594394683838, "learning_rate": 6.239277154183251e-06, "loss": 0.0554, "step": 39954 }, { "epoch": 0.7075845519708501, "grad_norm": 0.6962172985076904, "learning_rate": 6.2385787943149555e-06, "loss": 0.0967, "step": 39955 }, { "epoch": 0.7076022615078785, "grad_norm": 0.4002530574798584, "learning_rate": 6.237880463270852e-06, "loss": 0.0248, "step": 39956 }, { "epoch": 0.7076199710449069, "grad_norm": 0.6836627125740051, "learning_rate": 6.237182161053233e-06, "loss": 0.0685, "step": 39957 }, { "epoch": 0.7076376805819354, "grad_norm": 0.5148338079452515, "learning_rate": 6.236483887664397e-06, "loss": 0.027, "step": 39958 }, { "epoch": 0.7076553901189638, "grad_norm": 0.7317111492156982, "learning_rate": 6.2357856431066425e-06, "loss": 0.0804, "step": 39959 }, { "epoch": 0.7076730996559922, "grad_norm": 0.5607662796974182, "learning_rate": 6.235087427382267e-06, "loss": 0.0531, "step": 39960 }, { "epoch": 0.7076908091930206, "grad_norm": 0.7233155965805054, "learning_rate": 6.234389240493567e-06, "loss": 0.0542, "step": 39961 }, { "epoch": 0.7077085187300491, "grad_norm": 1.2308502197265625, "learning_rate": 6.2336910824428404e-06, "loss": 0.0645, "step": 39962 }, { "epoch": 0.7077262282670775, "grad_norm": 0.7046524882316589, "learning_rate": 6.232992953232389e-06, "loss": 0.0466, "step": 39963 }, { "epoch": 0.7077439378041059, "grad_norm": 0.5740412473678589, "learning_rate": 6.232294852864497e-06, "loss": 0.0437, "step": 39964 }, { "epoch": 0.7077616473411343, "grad_norm": 0.5064763426780701, "learning_rate": 6.231596781341469e-06, "loss": 0.0554, "step": 39965 }, { "epoch": 0.7077793568781628, "grad_norm": 0.5415266156196594, "learning_rate": 6.230898738665607e-06, "loss": 0.0322, "step": 39966 }, { "epoch": 0.7077970664151912, "grad_norm": 0.3429630994796753, "learning_rate": 6.230200724839195e-06, "loss": 0.062, "step": 39967 }, { "epoch": 0.7078147759522196, "grad_norm": 0.6399924755096436, "learning_rate": 6.229502739864537e-06, "loss": 0.0534, "step": 39968 }, { "epoch": 0.707832485489248, "grad_norm": 0.34918665885925293, "learning_rate": 6.228804783743927e-06, "loss": 0.0578, "step": 39969 }, { "epoch": 0.7078501950262766, "grad_norm": 0.5379819273948669, "learning_rate": 6.228106856479662e-06, "loss": 0.0645, "step": 39970 }, { "epoch": 0.707867904563305, "grad_norm": 0.48905670642852783, "learning_rate": 6.227408958074037e-06, "loss": 0.06, "step": 39971 }, { "epoch": 0.7078856141003333, "grad_norm": 0.5863785147666931, "learning_rate": 6.226711088529351e-06, "loss": 0.0737, "step": 39972 }, { "epoch": 0.7079033236373619, "grad_norm": 0.44243448972702026, "learning_rate": 6.2260132478479025e-06, "loss": 0.0472, "step": 39973 }, { "epoch": 0.7079210331743903, "grad_norm": 0.49778860807418823, "learning_rate": 6.225315436031979e-06, "loss": 0.0367, "step": 39974 }, { "epoch": 0.7079387427114187, "grad_norm": 0.7658973932266235, "learning_rate": 6.224617653083879e-06, "loss": 0.0691, "step": 39975 }, { "epoch": 0.707956452248447, "grad_norm": 0.662432074546814, "learning_rate": 6.2239198990059e-06, "loss": 0.0639, "step": 39976 }, { "epoch": 0.7079741617854756, "grad_norm": 0.7494857311248779, "learning_rate": 6.223222173800342e-06, "loss": 0.0726, "step": 39977 }, { "epoch": 0.707991871322504, "grad_norm": 0.41246283054351807, "learning_rate": 6.222524477469484e-06, "loss": 0.0564, "step": 39978 }, { "epoch": 0.7080095808595324, "grad_norm": 0.5467445254325867, "learning_rate": 6.221826810015639e-06, "loss": 0.0514, "step": 39979 }, { "epoch": 0.7080272903965608, "grad_norm": 0.6945631504058838, "learning_rate": 6.221129171441095e-06, "loss": 0.0424, "step": 39980 }, { "epoch": 0.7080449999335893, "grad_norm": 0.6760799884796143, "learning_rate": 6.220431561748154e-06, "loss": 0.0866, "step": 39981 }, { "epoch": 0.7080627094706177, "grad_norm": 1.0083308219909668, "learning_rate": 6.2197339809391e-06, "loss": 0.0579, "step": 39982 }, { "epoch": 0.7080804190076461, "grad_norm": 0.25724130868911743, "learning_rate": 6.219036429016232e-06, "loss": 0.0508, "step": 39983 }, { "epoch": 0.7080981285446745, "grad_norm": 0.9041445255279541, "learning_rate": 6.218338905981853e-06, "loss": 0.0906, "step": 39984 }, { "epoch": 0.708115838081703, "grad_norm": 0.3170127272605896, "learning_rate": 6.217641411838244e-06, "loss": 0.0346, "step": 39985 }, { "epoch": 0.7081335476187314, "grad_norm": 0.3290838599205017, "learning_rate": 6.216943946587707e-06, "loss": 0.063, "step": 39986 }, { "epoch": 0.7081512571557598, "grad_norm": 0.4110591411590576, "learning_rate": 6.216246510232537e-06, "loss": 0.0502, "step": 39987 }, { "epoch": 0.7081689666927883, "grad_norm": 0.33360761404037476, "learning_rate": 6.215549102775026e-06, "loss": 0.0339, "step": 39988 }, { "epoch": 0.7081866762298167, "grad_norm": 0.5009781122207642, "learning_rate": 6.214851724217472e-06, "loss": 0.0802, "step": 39989 }, { "epoch": 0.7082043857668451, "grad_norm": 0.42429280281066895, "learning_rate": 6.214154374562165e-06, "loss": 0.0608, "step": 39990 }, { "epoch": 0.7082220953038735, "grad_norm": 0.9381939768791199, "learning_rate": 6.213457053811408e-06, "loss": 0.0848, "step": 39991 }, { "epoch": 0.708239804840902, "grad_norm": 0.8748283386230469, "learning_rate": 6.212759761967483e-06, "loss": 0.0579, "step": 39992 }, { "epoch": 0.7082575143779304, "grad_norm": 0.2278081625699997, "learning_rate": 6.212062499032689e-06, "loss": 0.0523, "step": 39993 }, { "epoch": 0.7082752239149588, "grad_norm": 0.8836997747421265, "learning_rate": 6.211365265009321e-06, "loss": 0.0551, "step": 39994 }, { "epoch": 0.7082929334519872, "grad_norm": 0.7199454307556152, "learning_rate": 6.210668059899678e-06, "loss": 0.0552, "step": 39995 }, { "epoch": 0.7083106429890157, "grad_norm": 0.5236389636993408, "learning_rate": 6.209970883706043e-06, "loss": 0.0547, "step": 39996 }, { "epoch": 0.7083283525260441, "grad_norm": 0.44238221645355225, "learning_rate": 6.209273736430713e-06, "loss": 0.0319, "step": 39997 }, { "epoch": 0.7083460620630725, "grad_norm": 0.39595070481300354, "learning_rate": 6.208576618075986e-06, "loss": 0.0655, "step": 39998 }, { "epoch": 0.708363771600101, "grad_norm": 0.8595603704452515, "learning_rate": 6.207879528644151e-06, "loss": 0.0746, "step": 39999 }, { "epoch": 0.7083814811371294, "grad_norm": 0.7631552219390869, "learning_rate": 6.207182468137504e-06, "loss": 0.0547, "step": 40000 }, { "epoch": 0.7083991906741578, "grad_norm": 0.7316656112670898, "learning_rate": 6.206485436558337e-06, "loss": 0.0669, "step": 40001 }, { "epoch": 0.7084169002111862, "grad_norm": 0.5745868682861328, "learning_rate": 6.205788433908948e-06, "loss": 0.0556, "step": 40002 }, { "epoch": 0.7084346097482147, "grad_norm": 0.39776402711868286, "learning_rate": 6.205091460191622e-06, "loss": 0.0516, "step": 40003 }, { "epoch": 0.7084523192852431, "grad_norm": 0.7364429831504822, "learning_rate": 6.204394515408653e-06, "loss": 0.0427, "step": 40004 }, { "epoch": 0.7084700288222715, "grad_norm": 0.42633146047592163, "learning_rate": 6.2036975995623425e-06, "loss": 0.0622, "step": 40005 }, { "epoch": 0.7084877383592999, "grad_norm": 0.4290846586227417, "learning_rate": 6.203000712654973e-06, "loss": 0.057, "step": 40006 }, { "epoch": 0.7085054478963284, "grad_norm": 0.25451987981796265, "learning_rate": 6.202303854688835e-06, "loss": 0.0508, "step": 40007 }, { "epoch": 0.7085231574333568, "grad_norm": 0.4087776243686676, "learning_rate": 6.201607025666234e-06, "loss": 0.0493, "step": 40008 }, { "epoch": 0.7085408669703852, "grad_norm": 0.8211869597434998, "learning_rate": 6.200910225589462e-06, "loss": 0.0526, "step": 40009 }, { "epoch": 0.7085585765074136, "grad_norm": 0.47192302346229553, "learning_rate": 6.200213454460797e-06, "loss": 0.0531, "step": 40010 }, { "epoch": 0.7085762860444421, "grad_norm": 0.5742983818054199, "learning_rate": 6.1995167122825416e-06, "loss": 0.0442, "step": 40011 }, { "epoch": 0.7085939955814705, "grad_norm": 0.6795101761817932, "learning_rate": 6.198819999056986e-06, "loss": 0.047, "step": 40012 }, { "epoch": 0.7086117051184989, "grad_norm": 0.9736467003822327, "learning_rate": 6.198123314786427e-06, "loss": 0.1033, "step": 40013 }, { "epoch": 0.7086294146555274, "grad_norm": 0.6652787327766418, "learning_rate": 6.197426659473149e-06, "loss": 0.0645, "step": 40014 }, { "epoch": 0.7086471241925558, "grad_norm": 0.6034651398658752, "learning_rate": 6.196730033119445e-06, "loss": 0.0764, "step": 40015 }, { "epoch": 0.7086648337295842, "grad_norm": 0.8071134090423584, "learning_rate": 6.196033435727609e-06, "loss": 0.0551, "step": 40016 }, { "epoch": 0.7086825432666126, "grad_norm": 0.3604851961135864, "learning_rate": 6.1953368672999325e-06, "loss": 0.0645, "step": 40017 }, { "epoch": 0.7087002528036411, "grad_norm": 0.604529619216919, "learning_rate": 6.1946403278387085e-06, "loss": 0.0696, "step": 40018 }, { "epoch": 0.7087179623406695, "grad_norm": 0.5316689610481262, "learning_rate": 6.193943817346225e-06, "loss": 0.0623, "step": 40019 }, { "epoch": 0.7087356718776979, "grad_norm": 0.614313542842865, "learning_rate": 6.193247335824782e-06, "loss": 0.0344, "step": 40020 }, { "epoch": 0.7087533814147263, "grad_norm": 0.5639308094978333, "learning_rate": 6.19255088327666e-06, "loss": 0.061, "step": 40021 }, { "epoch": 0.7087710909517548, "grad_norm": 0.7839305400848389, "learning_rate": 6.191854459704153e-06, "loss": 0.0711, "step": 40022 }, { "epoch": 0.7087888004887832, "grad_norm": 0.5228201150894165, "learning_rate": 6.19115806510956e-06, "loss": 0.0351, "step": 40023 }, { "epoch": 0.7088065100258116, "grad_norm": 0.5119311809539795, "learning_rate": 6.190461699495161e-06, "loss": 0.0589, "step": 40024 }, { "epoch": 0.70882421956284, "grad_norm": 0.9639524221420288, "learning_rate": 6.189765362863253e-06, "loss": 0.0639, "step": 40025 }, { "epoch": 0.7088419290998685, "grad_norm": 0.3975915014743805, "learning_rate": 6.189069055216125e-06, "loss": 0.0504, "step": 40026 }, { "epoch": 0.7088596386368969, "grad_norm": 0.8396883010864258, "learning_rate": 6.188372776556068e-06, "loss": 0.0551, "step": 40027 }, { "epoch": 0.7088773481739253, "grad_norm": 0.707353949546814, "learning_rate": 6.187676526885374e-06, "loss": 0.0634, "step": 40028 }, { "epoch": 0.7088950577109538, "grad_norm": 0.7677714228630066, "learning_rate": 6.186980306206333e-06, "loss": 0.0742, "step": 40029 }, { "epoch": 0.7089127672479822, "grad_norm": 0.5723090767860413, "learning_rate": 6.18628411452124e-06, "loss": 0.0813, "step": 40030 }, { "epoch": 0.7089304767850106, "grad_norm": 0.7737362384796143, "learning_rate": 6.185587951832376e-06, "loss": 0.0737, "step": 40031 }, { "epoch": 0.708948186322039, "grad_norm": 0.5894005298614502, "learning_rate": 6.184891818142035e-06, "loss": 0.0459, "step": 40032 }, { "epoch": 0.7089658958590676, "grad_norm": 0.5018272399902344, "learning_rate": 6.18419571345251e-06, "loss": 0.0517, "step": 40033 }, { "epoch": 0.708983605396096, "grad_norm": 0.4084455072879791, "learning_rate": 6.183499637766094e-06, "loss": 0.0464, "step": 40034 }, { "epoch": 0.7090013149331243, "grad_norm": 0.7805482149124146, "learning_rate": 6.182803591085067e-06, "loss": 0.0577, "step": 40035 }, { "epoch": 0.7090190244701527, "grad_norm": 0.3922567665576935, "learning_rate": 6.182107573411719e-06, "loss": 0.0408, "step": 40036 }, { "epoch": 0.7090367340071813, "grad_norm": 0.8038794994354248, "learning_rate": 6.181411584748357e-06, "loss": 0.0712, "step": 40037 }, { "epoch": 0.7090544435442097, "grad_norm": 0.8438706994056702, "learning_rate": 6.1807156250972515e-06, "loss": 0.0594, "step": 40038 }, { "epoch": 0.709072153081238, "grad_norm": 0.5410816669464111, "learning_rate": 6.180019694460701e-06, "loss": 0.0738, "step": 40039 }, { "epoch": 0.7090898626182665, "grad_norm": 0.6462555527687073, "learning_rate": 6.179323792840994e-06, "loss": 0.0727, "step": 40040 }, { "epoch": 0.709107572155295, "grad_norm": 0.4014679491519928, "learning_rate": 6.1786279202404244e-06, "loss": 0.0708, "step": 40041 }, { "epoch": 0.7091252816923234, "grad_norm": 0.5491107106208801, "learning_rate": 6.177932076661271e-06, "loss": 0.0838, "step": 40042 }, { "epoch": 0.7091429912293518, "grad_norm": 0.5821458101272583, "learning_rate": 6.177236262105829e-06, "loss": 0.0416, "step": 40043 }, { "epoch": 0.7091607007663803, "grad_norm": 0.6035687327384949, "learning_rate": 6.176540476576388e-06, "loss": 0.064, "step": 40044 }, { "epoch": 0.7091784103034087, "grad_norm": 1.0175026655197144, "learning_rate": 6.175844720075236e-06, "loss": 0.0628, "step": 40045 }, { "epoch": 0.7091961198404371, "grad_norm": 0.7231695055961609, "learning_rate": 6.1751489926046615e-06, "loss": 0.07, "step": 40046 }, { "epoch": 0.7092138293774655, "grad_norm": 0.6376805305480957, "learning_rate": 6.174453294166955e-06, "loss": 0.0773, "step": 40047 }, { "epoch": 0.709231538914494, "grad_norm": 0.6547020673751831, "learning_rate": 6.17375762476441e-06, "loss": 0.0553, "step": 40048 }, { "epoch": 0.7092492484515224, "grad_norm": 0.7126986980438232, "learning_rate": 6.173061984399303e-06, "loss": 0.0764, "step": 40049 }, { "epoch": 0.7092669579885508, "grad_norm": 0.5347837805747986, "learning_rate": 6.17236637307393e-06, "loss": 0.0447, "step": 40050 }, { "epoch": 0.7092846675255792, "grad_norm": 0.6293019652366638, "learning_rate": 6.17167079079058e-06, "loss": 0.0406, "step": 40051 }, { "epoch": 0.7093023770626077, "grad_norm": 0.3892909586429596, "learning_rate": 6.170975237551545e-06, "loss": 0.0459, "step": 40052 }, { "epoch": 0.7093200865996361, "grad_norm": 0.5360000729560852, "learning_rate": 6.1702797133591035e-06, "loss": 0.0502, "step": 40053 }, { "epoch": 0.7093377961366645, "grad_norm": 0.68855220079422, "learning_rate": 6.1695842182155465e-06, "loss": 0.0523, "step": 40054 }, { "epoch": 0.7093555056736929, "grad_norm": 0.8683761954307556, "learning_rate": 6.168888752123166e-06, "loss": 0.0557, "step": 40055 }, { "epoch": 0.7093732152107214, "grad_norm": 0.5246082544326782, "learning_rate": 6.168193315084249e-06, "loss": 0.0697, "step": 40056 }, { "epoch": 0.7093909247477498, "grad_norm": 0.6244759559631348, "learning_rate": 6.1674979071010815e-06, "loss": 0.0597, "step": 40057 }, { "epoch": 0.7094086342847782, "grad_norm": 0.32445207238197327, "learning_rate": 6.166802528175952e-06, "loss": 0.0429, "step": 40058 }, { "epoch": 0.7094263438218067, "grad_norm": 0.6461719274520874, "learning_rate": 6.166107178311155e-06, "loss": 0.0558, "step": 40059 }, { "epoch": 0.7094440533588351, "grad_norm": 0.5721192359924316, "learning_rate": 6.1654118575089664e-06, "loss": 0.0394, "step": 40060 }, { "epoch": 0.7094617628958635, "grad_norm": 0.6577643752098083, "learning_rate": 6.1647165657716795e-06, "loss": 0.0453, "step": 40061 }, { "epoch": 0.7094794724328919, "grad_norm": 0.5632184147834778, "learning_rate": 6.164021303101587e-06, "loss": 0.0601, "step": 40062 }, { "epoch": 0.7094971819699204, "grad_norm": 0.4644105136394501, "learning_rate": 6.1633260695009644e-06, "loss": 0.0414, "step": 40063 }, { "epoch": 0.7095148915069488, "grad_norm": 0.5978321433067322, "learning_rate": 6.162630864972106e-06, "loss": 0.0623, "step": 40064 }, { "epoch": 0.7095326010439772, "grad_norm": 0.4416409730911255, "learning_rate": 6.161935689517292e-06, "loss": 0.0302, "step": 40065 }, { "epoch": 0.7095503105810056, "grad_norm": 0.47598370909690857, "learning_rate": 6.161240543138827e-06, "loss": 0.047, "step": 40066 }, { "epoch": 0.7095680201180341, "grad_norm": 0.4136449992656708, "learning_rate": 6.1605454258389815e-06, "loss": 0.0375, "step": 40067 }, { "epoch": 0.7095857296550625, "grad_norm": 0.9106754660606384, "learning_rate": 6.1598503376200474e-06, "loss": 0.0765, "step": 40068 }, { "epoch": 0.7096034391920909, "grad_norm": 1.2777245044708252, "learning_rate": 6.1591552784843165e-06, "loss": 0.0924, "step": 40069 }, { "epoch": 0.7096211487291193, "grad_norm": 0.9614390730857849, "learning_rate": 6.158460248434066e-06, "loss": 0.0846, "step": 40070 }, { "epoch": 0.7096388582661478, "grad_norm": 0.4946669638156891, "learning_rate": 6.157765247471587e-06, "loss": 0.0594, "step": 40071 }, { "epoch": 0.7096565678031762, "grad_norm": 0.4245004653930664, "learning_rate": 6.157070275599165e-06, "loss": 0.0358, "step": 40072 }, { "epoch": 0.7096742773402046, "grad_norm": 0.6208692193031311, "learning_rate": 6.156375332819088e-06, "loss": 0.0591, "step": 40073 }, { "epoch": 0.7096919868772331, "grad_norm": 0.7905531525611877, "learning_rate": 6.155680419133641e-06, "loss": 0.098, "step": 40074 }, { "epoch": 0.7097096964142615, "grad_norm": 0.3347068130970001, "learning_rate": 6.154985534545111e-06, "loss": 0.0674, "step": 40075 }, { "epoch": 0.7097274059512899, "grad_norm": 0.7097747325897217, "learning_rate": 6.154290679055791e-06, "loss": 0.0615, "step": 40076 }, { "epoch": 0.7097451154883183, "grad_norm": 0.5323147773742676, "learning_rate": 6.153595852667953e-06, "loss": 0.0409, "step": 40077 }, { "epoch": 0.7097628250253468, "grad_norm": 0.8396428227424622, "learning_rate": 6.15290105538389e-06, "loss": 0.0626, "step": 40078 }, { "epoch": 0.7097805345623752, "grad_norm": 0.6712294220924377, "learning_rate": 6.152206287205887e-06, "loss": 0.0796, "step": 40079 }, { "epoch": 0.7097982440994036, "grad_norm": 0.4376142621040344, "learning_rate": 6.151511548136238e-06, "loss": 0.0451, "step": 40080 }, { "epoch": 0.709815953636432, "grad_norm": 1.0359700918197632, "learning_rate": 6.150816838177214e-06, "loss": 0.0629, "step": 40081 }, { "epoch": 0.7098336631734605, "grad_norm": 0.40737900137901306, "learning_rate": 6.150122157331108e-06, "loss": 0.0488, "step": 40082 }, { "epoch": 0.7098513727104889, "grad_norm": 0.37741318345069885, "learning_rate": 6.149427505600204e-06, "loss": 0.0374, "step": 40083 }, { "epoch": 0.7098690822475173, "grad_norm": 0.4515320658683777, "learning_rate": 6.148732882986789e-06, "loss": 0.0612, "step": 40084 }, { "epoch": 0.7098867917845457, "grad_norm": 0.5810684561729431, "learning_rate": 6.148038289493148e-06, "loss": 0.0509, "step": 40085 }, { "epoch": 0.7099045013215742, "grad_norm": 0.4370133578777313, "learning_rate": 6.147343725121567e-06, "loss": 0.0425, "step": 40086 }, { "epoch": 0.7099222108586026, "grad_norm": 0.7373916506767273, "learning_rate": 6.146649189874332e-06, "loss": 0.0638, "step": 40087 }, { "epoch": 0.709939920395631, "grad_norm": 0.5026191473007202, "learning_rate": 6.145954683753722e-06, "loss": 0.044, "step": 40088 }, { "epoch": 0.7099576299326595, "grad_norm": 0.6925047039985657, "learning_rate": 6.1452602067620265e-06, "loss": 0.0566, "step": 40089 }, { "epoch": 0.7099753394696879, "grad_norm": 0.7054294347763062, "learning_rate": 6.144565758901529e-06, "loss": 0.051, "step": 40090 }, { "epoch": 0.7099930490067163, "grad_norm": 0.6411271095275879, "learning_rate": 6.143871340174518e-06, "loss": 0.0685, "step": 40091 }, { "epoch": 0.7100107585437447, "grad_norm": 0.15389971435070038, "learning_rate": 6.143176950583271e-06, "loss": 0.0328, "step": 40092 }, { "epoch": 0.7100284680807732, "grad_norm": 0.5813175439834595, "learning_rate": 6.142482590130074e-06, "loss": 0.048, "step": 40093 }, { "epoch": 0.7100461776178016, "grad_norm": 0.6317176222801208, "learning_rate": 6.1417882588172155e-06, "loss": 0.0749, "step": 40094 }, { "epoch": 0.71006388715483, "grad_norm": 0.4785158038139343, "learning_rate": 6.141093956646977e-06, "loss": 0.0712, "step": 40095 }, { "epoch": 0.7100815966918584, "grad_norm": 0.8077102303504944, "learning_rate": 6.140399683621644e-06, "loss": 0.0607, "step": 40096 }, { "epoch": 0.710099306228887, "grad_norm": 0.33754390478134155, "learning_rate": 6.139705439743499e-06, "loss": 0.0596, "step": 40097 }, { "epoch": 0.7101170157659153, "grad_norm": 0.7409770488739014, "learning_rate": 6.139011225014833e-06, "loss": 0.0563, "step": 40098 }, { "epoch": 0.7101347253029437, "grad_norm": 0.5258873701095581, "learning_rate": 6.1383170394379176e-06, "loss": 0.0384, "step": 40099 }, { "epoch": 0.7101524348399721, "grad_norm": 0.44375428557395935, "learning_rate": 6.137622883015042e-06, "loss": 0.0391, "step": 40100 }, { "epoch": 0.7101701443770007, "grad_norm": 0.5207296013832092, "learning_rate": 6.136928755748499e-06, "loss": 0.0562, "step": 40101 }, { "epoch": 0.710187853914029, "grad_norm": 0.7286165952682495, "learning_rate": 6.1362346576405505e-06, "loss": 0.0662, "step": 40102 }, { "epoch": 0.7102055634510575, "grad_norm": 0.675151526927948, "learning_rate": 6.1355405886935e-06, "loss": 0.0686, "step": 40103 }, { "epoch": 0.710223272988086, "grad_norm": 0.5679610371589661, "learning_rate": 6.1348465489096244e-06, "loss": 0.0527, "step": 40104 }, { "epoch": 0.7102409825251144, "grad_norm": 0.6089773178100586, "learning_rate": 6.1341525382912125e-06, "loss": 0.0694, "step": 40105 }, { "epoch": 0.7102586920621428, "grad_norm": 0.46709227561950684, "learning_rate": 6.133458556840537e-06, "loss": 0.0552, "step": 40106 }, { "epoch": 0.7102764015991712, "grad_norm": 0.8594092130661011, "learning_rate": 6.132764604559884e-06, "loss": 0.0481, "step": 40107 }, { "epoch": 0.7102941111361997, "grad_norm": 0.5229395031929016, "learning_rate": 6.132070681451546e-06, "loss": 0.0496, "step": 40108 }, { "epoch": 0.7103118206732281, "grad_norm": 0.45864489674568176, "learning_rate": 6.131376787517792e-06, "loss": 0.0628, "step": 40109 }, { "epoch": 0.7103295302102565, "grad_norm": 1.1460579633712769, "learning_rate": 6.130682922760913e-06, "loss": 0.0933, "step": 40110 }, { "epoch": 0.7103472397472849, "grad_norm": 0.6984922289848328, "learning_rate": 6.129989087183189e-06, "loss": 0.0705, "step": 40111 }, { "epoch": 0.7103649492843134, "grad_norm": 0.6463701725006104, "learning_rate": 6.129295280786903e-06, "loss": 0.0752, "step": 40112 }, { "epoch": 0.7103826588213418, "grad_norm": 0.8036392331123352, "learning_rate": 6.12860150357434e-06, "loss": 0.0451, "step": 40113 }, { "epoch": 0.7104003683583702, "grad_norm": 0.6145041584968567, "learning_rate": 6.12790775554778e-06, "loss": 0.0504, "step": 40114 }, { "epoch": 0.7104180778953986, "grad_norm": 0.6304498314857483, "learning_rate": 6.127214036709506e-06, "loss": 0.0587, "step": 40115 }, { "epoch": 0.7104357874324271, "grad_norm": 0.5725795030593872, "learning_rate": 6.126520347061805e-06, "loss": 0.043, "step": 40116 }, { "epoch": 0.7104534969694555, "grad_norm": 0.42166611552238464, "learning_rate": 6.125826686606949e-06, "loss": 0.0512, "step": 40117 }, { "epoch": 0.7104712065064839, "grad_norm": 0.7916292548179626, "learning_rate": 6.125133055347229e-06, "loss": 0.0893, "step": 40118 }, { "epoch": 0.7104889160435124, "grad_norm": 0.5042648911476135, "learning_rate": 6.124439453284925e-06, "loss": 0.0588, "step": 40119 }, { "epoch": 0.7105066255805408, "grad_norm": 0.7088474631309509, "learning_rate": 6.1237458804223144e-06, "loss": 0.0437, "step": 40120 }, { "epoch": 0.7105243351175692, "grad_norm": 0.5228641033172607, "learning_rate": 6.1230523367616814e-06, "loss": 0.0548, "step": 40121 }, { "epoch": 0.7105420446545976, "grad_norm": 0.7342177033424377, "learning_rate": 6.122358822305307e-06, "loss": 0.0941, "step": 40122 }, { "epoch": 0.7105597541916261, "grad_norm": 0.5642430782318115, "learning_rate": 6.121665337055476e-06, "loss": 0.0428, "step": 40123 }, { "epoch": 0.7105774637286545, "grad_norm": 0.5012591481208801, "learning_rate": 6.120971881014467e-06, "loss": 0.0631, "step": 40124 }, { "epoch": 0.7105951732656829, "grad_norm": 0.40555649995803833, "learning_rate": 6.120278454184564e-06, "loss": 0.0539, "step": 40125 }, { "epoch": 0.7106128828027113, "grad_norm": 0.8583621978759766, "learning_rate": 6.1195850565680515e-06, "loss": 0.0628, "step": 40126 }, { "epoch": 0.7106305923397398, "grad_norm": 0.3529796004295349, "learning_rate": 6.1188916881671995e-06, "loss": 0.0538, "step": 40127 }, { "epoch": 0.7106483018767682, "grad_norm": 0.5150849223136902, "learning_rate": 6.118198348984296e-06, "loss": 0.0816, "step": 40128 }, { "epoch": 0.7106660114137966, "grad_norm": 0.9137475490570068, "learning_rate": 6.117505039021623e-06, "loss": 0.08, "step": 40129 }, { "epoch": 0.710683720950825, "grad_norm": 0.40810638666152954, "learning_rate": 6.116811758281463e-06, "loss": 0.0616, "step": 40130 }, { "epoch": 0.7107014304878535, "grad_norm": 0.2563135027885437, "learning_rate": 6.116118506766084e-06, "loss": 0.0294, "step": 40131 }, { "epoch": 0.7107191400248819, "grad_norm": 0.4206155836582184, "learning_rate": 6.115425284477784e-06, "loss": 0.0467, "step": 40132 }, { "epoch": 0.7107368495619103, "grad_norm": 0.44081273674964905, "learning_rate": 6.114732091418838e-06, "loss": 0.067, "step": 40133 }, { "epoch": 0.7107545590989388, "grad_norm": 0.8496270179748535, "learning_rate": 6.114038927591522e-06, "loss": 0.0659, "step": 40134 }, { "epoch": 0.7107722686359672, "grad_norm": 0.9103181958198547, "learning_rate": 6.113345792998119e-06, "loss": 0.06, "step": 40135 }, { "epoch": 0.7107899781729956, "grad_norm": 0.5987975001335144, "learning_rate": 6.112652687640907e-06, "loss": 0.0525, "step": 40136 }, { "epoch": 0.710807687710024, "grad_norm": 0.6688889861106873, "learning_rate": 6.111959611522177e-06, "loss": 0.0729, "step": 40137 }, { "epoch": 0.7108253972470525, "grad_norm": 0.8047796487808228, "learning_rate": 6.1112665646441945e-06, "loss": 0.0756, "step": 40138 }, { "epoch": 0.7108431067840809, "grad_norm": 0.3870370388031006, "learning_rate": 6.1105735470092465e-06, "loss": 0.0508, "step": 40139 }, { "epoch": 0.7108608163211093, "grad_norm": 0.5542018413543701, "learning_rate": 6.109880558619612e-06, "loss": 0.0593, "step": 40140 }, { "epoch": 0.7108785258581377, "grad_norm": 0.48491132259368896, "learning_rate": 6.109187599477571e-06, "loss": 0.0446, "step": 40141 }, { "epoch": 0.7108962353951662, "grad_norm": 0.5097131133079529, "learning_rate": 6.108494669585404e-06, "loss": 0.0472, "step": 40142 }, { "epoch": 0.7109139449321946, "grad_norm": 0.25398269295692444, "learning_rate": 6.107801768945389e-06, "loss": 0.0474, "step": 40143 }, { "epoch": 0.710931654469223, "grad_norm": 0.5460076928138733, "learning_rate": 6.107108897559813e-06, "loss": 0.0395, "step": 40144 }, { "epoch": 0.7109493640062514, "grad_norm": 0.6263435482978821, "learning_rate": 6.106416055430944e-06, "loss": 0.0489, "step": 40145 }, { "epoch": 0.7109670735432799, "grad_norm": 1.0441499948501587, "learning_rate": 6.105723242561068e-06, "loss": 0.0928, "step": 40146 }, { "epoch": 0.7109847830803083, "grad_norm": 0.614648163318634, "learning_rate": 6.105030458952466e-06, "loss": 0.0705, "step": 40147 }, { "epoch": 0.7110024926173367, "grad_norm": 0.5700746178627014, "learning_rate": 6.104337704607409e-06, "loss": 0.072, "step": 40148 }, { "epoch": 0.7110202021543652, "grad_norm": 0.7222745418548584, "learning_rate": 6.103644979528182e-06, "loss": 0.0698, "step": 40149 }, { "epoch": 0.7110379116913936, "grad_norm": 0.08044139295816422, "learning_rate": 6.102952283717064e-06, "loss": 0.0401, "step": 40150 }, { "epoch": 0.711055621228422, "grad_norm": 0.26246556639671326, "learning_rate": 6.1022596171763315e-06, "loss": 0.0273, "step": 40151 }, { "epoch": 0.7110733307654504, "grad_norm": 0.25247547030448914, "learning_rate": 6.101566979908266e-06, "loss": 0.0468, "step": 40152 }, { "epoch": 0.7110910403024789, "grad_norm": 0.6070249080657959, "learning_rate": 6.100874371915144e-06, "loss": 0.0384, "step": 40153 }, { "epoch": 0.7111087498395073, "grad_norm": 0.6066105961799622, "learning_rate": 6.100181793199247e-06, "loss": 0.0467, "step": 40154 }, { "epoch": 0.7111264593765357, "grad_norm": 0.4958574175834656, "learning_rate": 6.0994892437628555e-06, "loss": 0.0547, "step": 40155 }, { "epoch": 0.7111441689135641, "grad_norm": 0.3369007706642151, "learning_rate": 6.098796723608238e-06, "loss": 0.0387, "step": 40156 }, { "epoch": 0.7111618784505926, "grad_norm": 0.7977903485298157, "learning_rate": 6.098104232737682e-06, "loss": 0.0688, "step": 40157 }, { "epoch": 0.711179587987621, "grad_norm": 0.5441576242446899, "learning_rate": 6.097411771153466e-06, "loss": 0.0367, "step": 40158 }, { "epoch": 0.7111972975246494, "grad_norm": 0.6937553286552429, "learning_rate": 6.09671933885786e-06, "loss": 0.083, "step": 40159 }, { "epoch": 0.7112150070616778, "grad_norm": 0.3542008399963379, "learning_rate": 6.0960269358531415e-06, "loss": 0.0505, "step": 40160 }, { "epoch": 0.7112327165987063, "grad_norm": 0.5055371522903442, "learning_rate": 6.095334562141599e-06, "loss": 0.0555, "step": 40161 }, { "epoch": 0.7112504261357347, "grad_norm": 0.8426505327224731, "learning_rate": 6.094642217725509e-06, "loss": 0.0811, "step": 40162 }, { "epoch": 0.7112681356727631, "grad_norm": 0.679979145526886, "learning_rate": 6.093949902607142e-06, "loss": 0.0606, "step": 40163 }, { "epoch": 0.7112858452097917, "grad_norm": 0.6179883480072021, "learning_rate": 6.09325761678878e-06, "loss": 0.0553, "step": 40164 }, { "epoch": 0.71130355474682, "grad_norm": 0.7444028258323669, "learning_rate": 6.092565360272703e-06, "loss": 0.1128, "step": 40165 }, { "epoch": 0.7113212642838485, "grad_norm": 0.39075884222984314, "learning_rate": 6.091873133061179e-06, "loss": 0.0538, "step": 40166 }, { "epoch": 0.7113389738208769, "grad_norm": 0.9307827949523926, "learning_rate": 6.091180935156493e-06, "loss": 0.0764, "step": 40167 }, { "epoch": 0.7113566833579054, "grad_norm": 0.786058247089386, "learning_rate": 6.090488766560922e-06, "loss": 0.0505, "step": 40168 }, { "epoch": 0.7113743928949338, "grad_norm": 0.6201754808425903, "learning_rate": 6.089796627276739e-06, "loss": 0.0547, "step": 40169 }, { "epoch": 0.7113921024319622, "grad_norm": 0.41861897706985474, "learning_rate": 6.089104517306225e-06, "loss": 0.0409, "step": 40170 }, { "epoch": 0.7114098119689906, "grad_norm": 0.9242460131645203, "learning_rate": 6.088412436651655e-06, "loss": 0.0777, "step": 40171 }, { "epoch": 0.7114275215060191, "grad_norm": 0.6739450693130493, "learning_rate": 6.087720385315313e-06, "loss": 0.0885, "step": 40172 }, { "epoch": 0.7114452310430475, "grad_norm": 0.6178664565086365, "learning_rate": 6.087028363299465e-06, "loss": 0.06, "step": 40173 }, { "epoch": 0.7114629405800759, "grad_norm": 0.8508387804031372, "learning_rate": 6.086336370606391e-06, "loss": 0.068, "step": 40174 }, { "epoch": 0.7114806501171043, "grad_norm": 0.8954630494117737, "learning_rate": 6.085644407238368e-06, "loss": 0.0568, "step": 40175 }, { "epoch": 0.7114983596541328, "grad_norm": 0.28733953833580017, "learning_rate": 6.084952473197681e-06, "loss": 0.0418, "step": 40176 }, { "epoch": 0.7115160691911612, "grad_norm": 0.6426548361778259, "learning_rate": 6.08426056848659e-06, "loss": 0.0325, "step": 40177 }, { "epoch": 0.7115337787281896, "grad_norm": 0.7545809149742126, "learning_rate": 6.083568693107384e-06, "loss": 0.0634, "step": 40178 }, { "epoch": 0.7115514882652181, "grad_norm": 0.6011908650398254, "learning_rate": 6.082876847062332e-06, "loss": 0.0382, "step": 40179 }, { "epoch": 0.7115691978022465, "grad_norm": 0.681689977645874, "learning_rate": 6.0821850303537134e-06, "loss": 0.0768, "step": 40180 }, { "epoch": 0.7115869073392749, "grad_norm": 0.5330995917320251, "learning_rate": 6.081493242983805e-06, "loss": 0.055, "step": 40181 }, { "epoch": 0.7116046168763033, "grad_norm": 1.0553665161132812, "learning_rate": 6.0808014849548824e-06, "loss": 0.0946, "step": 40182 }, { "epoch": 0.7116223264133318, "grad_norm": 0.3805367350578308, "learning_rate": 6.080109756269225e-06, "loss": 0.0569, "step": 40183 }, { "epoch": 0.7116400359503602, "grad_norm": 0.6379844546318054, "learning_rate": 6.0794180569290995e-06, "loss": 0.0683, "step": 40184 }, { "epoch": 0.7116577454873886, "grad_norm": 0.9655277729034424, "learning_rate": 6.078726386936787e-06, "loss": 0.0578, "step": 40185 }, { "epoch": 0.711675455024417, "grad_norm": 0.7628904581069946, "learning_rate": 6.078034746294562e-06, "loss": 0.0347, "step": 40186 }, { "epoch": 0.7116931645614455, "grad_norm": 0.47264334559440613, "learning_rate": 6.077343135004704e-06, "loss": 0.0404, "step": 40187 }, { "epoch": 0.7117108740984739, "grad_norm": 0.7244669198989868, "learning_rate": 6.076651553069481e-06, "loss": 0.0536, "step": 40188 }, { "epoch": 0.7117285836355023, "grad_norm": 0.7590864300727844, "learning_rate": 6.075960000491165e-06, "loss": 0.0762, "step": 40189 }, { "epoch": 0.7117462931725307, "grad_norm": 0.6480593681335449, "learning_rate": 6.075268477272051e-06, "loss": 0.0638, "step": 40190 }, { "epoch": 0.7117640027095592, "grad_norm": 0.7628985643386841, "learning_rate": 6.074576983414394e-06, "loss": 0.0701, "step": 40191 }, { "epoch": 0.7117817122465876, "grad_norm": 0.558887779712677, "learning_rate": 6.073885518920476e-06, "loss": 0.059, "step": 40192 }, { "epoch": 0.711799421783616, "grad_norm": 0.7103503346443176, "learning_rate": 6.073194083792573e-06, "loss": 0.0778, "step": 40193 }, { "epoch": 0.7118171313206445, "grad_norm": 0.4272037744522095, "learning_rate": 6.072502678032963e-06, "loss": 0.0504, "step": 40194 }, { "epoch": 0.7118348408576729, "grad_norm": 0.3576048016548157, "learning_rate": 6.0718113016439105e-06, "loss": 0.0421, "step": 40195 }, { "epoch": 0.7118525503947013, "grad_norm": 0.4308883249759674, "learning_rate": 6.071119954627697e-06, "loss": 0.039, "step": 40196 }, { "epoch": 0.7118702599317297, "grad_norm": 0.39519354701042175, "learning_rate": 6.070428636986594e-06, "loss": 0.0369, "step": 40197 }, { "epoch": 0.7118879694687582, "grad_norm": 0.6573086977005005, "learning_rate": 6.069737348722878e-06, "loss": 0.0657, "step": 40198 }, { "epoch": 0.7119056790057866, "grad_norm": 0.830973207950592, "learning_rate": 6.069046089838825e-06, "loss": 0.044, "step": 40199 }, { "epoch": 0.711923388542815, "grad_norm": 0.6275643110275269, "learning_rate": 6.068354860336705e-06, "loss": 0.0597, "step": 40200 }, { "epoch": 0.7119410980798434, "grad_norm": 0.4115866720676422, "learning_rate": 6.0676636602188e-06, "loss": 0.0706, "step": 40201 }, { "epoch": 0.7119588076168719, "grad_norm": 0.6847126483917236, "learning_rate": 6.066972489487372e-06, "loss": 0.0513, "step": 40202 }, { "epoch": 0.7119765171539003, "grad_norm": 0.6068921089172363, "learning_rate": 6.0662813481447015e-06, "loss": 0.0602, "step": 40203 }, { "epoch": 0.7119942266909287, "grad_norm": 0.8134111762046814, "learning_rate": 6.065590236193067e-06, "loss": 0.0459, "step": 40204 }, { "epoch": 0.7120119362279571, "grad_norm": 0.46512842178344727, "learning_rate": 6.064899153634733e-06, "loss": 0.04, "step": 40205 }, { "epoch": 0.7120296457649856, "grad_norm": 0.8793995976448059, "learning_rate": 6.0642081004719766e-06, "loss": 0.0617, "step": 40206 }, { "epoch": 0.712047355302014, "grad_norm": 0.5074549317359924, "learning_rate": 6.06351707670707e-06, "loss": 0.0696, "step": 40207 }, { "epoch": 0.7120650648390424, "grad_norm": 0.6034250259399414, "learning_rate": 6.0628260823422896e-06, "loss": 0.0528, "step": 40208 }, { "epoch": 0.7120827743760709, "grad_norm": 0.7530798316001892, "learning_rate": 6.062135117379908e-06, "loss": 0.0803, "step": 40209 }, { "epoch": 0.7121004839130993, "grad_norm": 0.5501188635826111, "learning_rate": 6.061444181822197e-06, "loss": 0.0589, "step": 40210 }, { "epoch": 0.7121181934501277, "grad_norm": 0.6846284866333008, "learning_rate": 6.060753275671436e-06, "loss": 0.072, "step": 40211 }, { "epoch": 0.7121359029871561, "grad_norm": 0.653542697429657, "learning_rate": 6.060062398929889e-06, "loss": 0.0703, "step": 40212 }, { "epoch": 0.7121536125241846, "grad_norm": 0.5838363766670227, "learning_rate": 6.0593715515998295e-06, "loss": 0.0679, "step": 40213 }, { "epoch": 0.712171322061213, "grad_norm": 0.5618209838867188, "learning_rate": 6.058680733683535e-06, "loss": 0.0753, "step": 40214 }, { "epoch": 0.7121890315982414, "grad_norm": 0.6740279793739319, "learning_rate": 6.0579899451832815e-06, "loss": 0.0658, "step": 40215 }, { "epoch": 0.7122067411352698, "grad_norm": 0.6177160739898682, "learning_rate": 6.057299186101331e-06, "loss": 0.0466, "step": 40216 }, { "epoch": 0.7122244506722983, "grad_norm": 0.48253464698791504, "learning_rate": 6.0566084564399615e-06, "loss": 0.0725, "step": 40217 }, { "epoch": 0.7122421602093267, "grad_norm": 0.7900753617286682, "learning_rate": 6.055917756201446e-06, "loss": 0.0638, "step": 40218 }, { "epoch": 0.7122598697463551, "grad_norm": 0.4023164212703705, "learning_rate": 6.055227085388058e-06, "loss": 0.0579, "step": 40219 }, { "epoch": 0.7122775792833835, "grad_norm": 0.6791344881057739, "learning_rate": 6.054536444002066e-06, "loss": 0.0639, "step": 40220 }, { "epoch": 0.712295288820412, "grad_norm": 0.41201624274253845, "learning_rate": 6.0538458320457455e-06, "loss": 0.0507, "step": 40221 }, { "epoch": 0.7123129983574404, "grad_norm": 0.3063279688358307, "learning_rate": 6.053155249521371e-06, "loss": 0.0472, "step": 40222 }, { "epoch": 0.7123307078944688, "grad_norm": 0.6957558989524841, "learning_rate": 6.052464696431208e-06, "loss": 0.0668, "step": 40223 }, { "epoch": 0.7123484174314973, "grad_norm": 0.4552757441997528, "learning_rate": 6.05177417277753e-06, "loss": 0.0677, "step": 40224 }, { "epoch": 0.7123661269685257, "grad_norm": 0.4419923424720764, "learning_rate": 6.051083678562609e-06, "loss": 0.0528, "step": 40225 }, { "epoch": 0.7123838365055541, "grad_norm": 0.6737112998962402, "learning_rate": 6.050393213788719e-06, "loss": 0.0465, "step": 40226 }, { "epoch": 0.7124015460425825, "grad_norm": 1.0232059955596924, "learning_rate": 6.04970277845813e-06, "loss": 0.1108, "step": 40227 }, { "epoch": 0.712419255579611, "grad_norm": 0.8097638487815857, "learning_rate": 6.049012372573113e-06, "loss": 0.064, "step": 40228 }, { "epoch": 0.7124369651166395, "grad_norm": 1.0843852758407593, "learning_rate": 6.0483219961359445e-06, "loss": 0.0818, "step": 40229 }, { "epoch": 0.7124546746536679, "grad_norm": 0.640848696231842, "learning_rate": 6.0476316491488875e-06, "loss": 0.0759, "step": 40230 }, { "epoch": 0.7124723841906963, "grad_norm": 0.40132686495780945, "learning_rate": 6.0469413316142155e-06, "loss": 0.0515, "step": 40231 }, { "epoch": 0.7124900937277248, "grad_norm": 0.6382392048835754, "learning_rate": 6.046251043534203e-06, "loss": 0.0583, "step": 40232 }, { "epoch": 0.7125078032647532, "grad_norm": 0.6860020756721497, "learning_rate": 6.045560784911121e-06, "loss": 0.0715, "step": 40233 }, { "epoch": 0.7125255128017816, "grad_norm": 0.6983266472816467, "learning_rate": 6.044870555747236e-06, "loss": 0.0768, "step": 40234 }, { "epoch": 0.71254322233881, "grad_norm": 0.5631890296936035, "learning_rate": 6.044180356044819e-06, "loss": 0.0486, "step": 40235 }, { "epoch": 0.7125609318758385, "grad_norm": 0.6709068417549133, "learning_rate": 6.043490185806145e-06, "loss": 0.084, "step": 40236 }, { "epoch": 0.7125786414128669, "grad_norm": 0.32977983355522156, "learning_rate": 6.0428000450334816e-06, "loss": 0.0586, "step": 40237 }, { "epoch": 0.7125963509498953, "grad_norm": 0.47892194986343384, "learning_rate": 6.042109933729099e-06, "loss": 0.0598, "step": 40238 }, { "epoch": 0.7126140604869238, "grad_norm": 0.6711026430130005, "learning_rate": 6.041419851895269e-06, "loss": 0.0473, "step": 40239 }, { "epoch": 0.7126317700239522, "grad_norm": 0.38087692856788635, "learning_rate": 6.0407297995342675e-06, "loss": 0.0308, "step": 40240 }, { "epoch": 0.7126494795609806, "grad_norm": 1.010984182357788, "learning_rate": 6.040039776648353e-06, "loss": 0.0596, "step": 40241 }, { "epoch": 0.712667189098009, "grad_norm": 0.6389426589012146, "learning_rate": 6.0393497832398024e-06, "loss": 0.0627, "step": 40242 }, { "epoch": 0.7126848986350375, "grad_norm": 0.5859407186508179, "learning_rate": 6.03865981931089e-06, "loss": 0.0499, "step": 40243 }, { "epoch": 0.7127026081720659, "grad_norm": 0.5138071775436401, "learning_rate": 6.037969884863873e-06, "loss": 0.0501, "step": 40244 }, { "epoch": 0.7127203177090943, "grad_norm": 0.2520293593406677, "learning_rate": 6.03727997990103e-06, "loss": 0.0266, "step": 40245 }, { "epoch": 0.7127380272461227, "grad_norm": 0.5012907981872559, "learning_rate": 6.03659010442463e-06, "loss": 0.0567, "step": 40246 }, { "epoch": 0.7127557367831512, "grad_norm": 0.5575629472732544, "learning_rate": 6.035900258436941e-06, "loss": 0.0559, "step": 40247 }, { "epoch": 0.7127734463201796, "grad_norm": 0.5522851943969727, "learning_rate": 6.035210441940234e-06, "loss": 0.0449, "step": 40248 }, { "epoch": 0.712791155857208, "grad_norm": 0.502554178237915, "learning_rate": 6.034520654936778e-06, "loss": 0.0452, "step": 40249 }, { "epoch": 0.7128088653942364, "grad_norm": 1.059000849723816, "learning_rate": 6.033830897428847e-06, "loss": 0.0514, "step": 40250 }, { "epoch": 0.7128265749312649, "grad_norm": 0.563170850276947, "learning_rate": 6.0331411694187e-06, "loss": 0.0358, "step": 40251 }, { "epoch": 0.7128442844682933, "grad_norm": 0.6561959385871887, "learning_rate": 6.032451470908611e-06, "loss": 0.0434, "step": 40252 }, { "epoch": 0.7128619940053217, "grad_norm": 0.5414769649505615, "learning_rate": 6.031761801900852e-06, "loss": 0.0646, "step": 40253 }, { "epoch": 0.7128797035423502, "grad_norm": 0.4914701581001282, "learning_rate": 6.031072162397693e-06, "loss": 0.0644, "step": 40254 }, { "epoch": 0.7128974130793786, "grad_norm": 0.7053501009941101, "learning_rate": 6.03038255240139e-06, "loss": 0.0404, "step": 40255 }, { "epoch": 0.712915122616407, "grad_norm": 1.1774040460586548, "learning_rate": 6.029692971914226e-06, "loss": 0.0606, "step": 40256 }, { "epoch": 0.7129328321534354, "grad_norm": 0.7223312854766846, "learning_rate": 6.0290034209384646e-06, "loss": 0.0525, "step": 40257 }, { "epoch": 0.7129505416904639, "grad_norm": 0.6360174417495728, "learning_rate": 6.028313899476379e-06, "loss": 0.0764, "step": 40258 }, { "epoch": 0.7129682512274923, "grad_norm": 0.4159190356731415, "learning_rate": 6.02762440753023e-06, "loss": 0.0896, "step": 40259 }, { "epoch": 0.7129859607645207, "grad_norm": 0.5001940727233887, "learning_rate": 6.026934945102287e-06, "loss": 0.0441, "step": 40260 }, { "epoch": 0.7130036703015491, "grad_norm": 0.796384334564209, "learning_rate": 6.0262455121948285e-06, "loss": 0.0609, "step": 40261 }, { "epoch": 0.7130213798385776, "grad_norm": 0.6977940797805786, "learning_rate": 6.025556108810107e-06, "loss": 0.0611, "step": 40262 }, { "epoch": 0.713039089375606, "grad_norm": 0.6949863433837891, "learning_rate": 6.024866734950399e-06, "loss": 0.0408, "step": 40263 }, { "epoch": 0.7130567989126344, "grad_norm": 0.8436651229858398, "learning_rate": 6.024177390617971e-06, "loss": 0.0562, "step": 40264 }, { "epoch": 0.7130745084496628, "grad_norm": 0.3821662664413452, "learning_rate": 6.023488075815091e-06, "loss": 0.061, "step": 40265 }, { "epoch": 0.7130922179866913, "grad_norm": 0.7807087898254395, "learning_rate": 6.02279879054403e-06, "loss": 0.0649, "step": 40266 }, { "epoch": 0.7131099275237197, "grad_norm": 0.39993637800216675, "learning_rate": 6.022109534807051e-06, "loss": 0.0529, "step": 40267 }, { "epoch": 0.7131276370607481, "grad_norm": 0.7432927489280701, "learning_rate": 6.021420308606429e-06, "loss": 0.0717, "step": 40268 }, { "epoch": 0.7131453465977766, "grad_norm": 0.7415482997894287, "learning_rate": 6.020731111944422e-06, "loss": 0.0515, "step": 40269 }, { "epoch": 0.713163056134805, "grad_norm": 0.4533877670764923, "learning_rate": 6.0200419448233e-06, "loss": 0.0556, "step": 40270 }, { "epoch": 0.7131807656718334, "grad_norm": 0.6049268245697021, "learning_rate": 6.019352807245332e-06, "loss": 0.0424, "step": 40271 }, { "epoch": 0.7131984752088618, "grad_norm": 0.5407490134239197, "learning_rate": 6.01866369921279e-06, "loss": 0.0538, "step": 40272 }, { "epoch": 0.7132161847458903, "grad_norm": 0.6608939170837402, "learning_rate": 6.017974620727931e-06, "loss": 0.0486, "step": 40273 }, { "epoch": 0.7132338942829187, "grad_norm": 1.1304901838302612, "learning_rate": 6.017285571793028e-06, "loss": 0.0545, "step": 40274 }, { "epoch": 0.7132516038199471, "grad_norm": 0.39995551109313965, "learning_rate": 6.0165965524103454e-06, "loss": 0.076, "step": 40275 }, { "epoch": 0.7132693133569755, "grad_norm": 0.3984517455101013, "learning_rate": 6.0159075625821534e-06, "loss": 0.0361, "step": 40276 }, { "epoch": 0.713287022894004, "grad_norm": 0.4795452654361725, "learning_rate": 6.015218602310715e-06, "loss": 0.0759, "step": 40277 }, { "epoch": 0.7133047324310324, "grad_norm": 0.5392754077911377, "learning_rate": 6.014529671598299e-06, "loss": 0.0461, "step": 40278 }, { "epoch": 0.7133224419680608, "grad_norm": 0.8874594569206238, "learning_rate": 6.013840770447178e-06, "loss": 0.0769, "step": 40279 }, { "epoch": 0.7133401515050892, "grad_norm": 0.2559969127178192, "learning_rate": 6.013151898859605e-06, "loss": 0.0345, "step": 40280 }, { "epoch": 0.7133578610421177, "grad_norm": 0.8549240827560425, "learning_rate": 6.012463056837855e-06, "loss": 0.0716, "step": 40281 }, { "epoch": 0.7133755705791461, "grad_norm": 0.7667618989944458, "learning_rate": 6.011774244384196e-06, "loss": 0.0654, "step": 40282 }, { "epoch": 0.7133932801161745, "grad_norm": 0.19162750244140625, "learning_rate": 6.011085461500887e-06, "loss": 0.0612, "step": 40283 }, { "epoch": 0.713410989653203, "grad_norm": 0.8401137590408325, "learning_rate": 6.010396708190191e-06, "loss": 0.0525, "step": 40284 }, { "epoch": 0.7134286991902314, "grad_norm": 0.36421212553977966, "learning_rate": 6.009707984454387e-06, "loss": 0.0392, "step": 40285 }, { "epoch": 0.7134464087272598, "grad_norm": 0.836542546749115, "learning_rate": 6.009019290295739e-06, "loss": 0.0569, "step": 40286 }, { "epoch": 0.7134641182642882, "grad_norm": 0.7914626002311707, "learning_rate": 6.008330625716503e-06, "loss": 0.0791, "step": 40287 }, { "epoch": 0.7134818278013167, "grad_norm": 1.0787140130996704, "learning_rate": 6.007641990718949e-06, "loss": 0.0678, "step": 40288 }, { "epoch": 0.7134995373383451, "grad_norm": 0.6260380148887634, "learning_rate": 6.006953385305345e-06, "loss": 0.085, "step": 40289 }, { "epoch": 0.7135172468753735, "grad_norm": 0.7792271971702576, "learning_rate": 6.006264809477959e-06, "loss": 0.0672, "step": 40290 }, { "epoch": 0.7135349564124019, "grad_norm": 0.45602166652679443, "learning_rate": 6.005576263239047e-06, "loss": 0.0396, "step": 40291 }, { "epoch": 0.7135526659494305, "grad_norm": 0.47350698709487915, "learning_rate": 6.00488774659088e-06, "loss": 0.036, "step": 40292 }, { "epoch": 0.7135703754864589, "grad_norm": 0.6976655721664429, "learning_rate": 6.004199259535721e-06, "loss": 0.0408, "step": 40293 }, { "epoch": 0.7135880850234873, "grad_norm": 0.35832324624061584, "learning_rate": 6.003510802075838e-06, "loss": 0.0878, "step": 40294 }, { "epoch": 0.7136057945605156, "grad_norm": 0.4914974570274353, "learning_rate": 6.002822374213494e-06, "loss": 0.0488, "step": 40295 }, { "epoch": 0.7136235040975442, "grad_norm": 0.7757959961891174, "learning_rate": 6.0021339759509535e-06, "loss": 0.0998, "step": 40296 }, { "epoch": 0.7136412136345726, "grad_norm": 0.3157258629798889, "learning_rate": 6.001445607290489e-06, "loss": 0.0566, "step": 40297 }, { "epoch": 0.713658923171601, "grad_norm": 0.486427366733551, "learning_rate": 6.0007572682343535e-06, "loss": 0.0326, "step": 40298 }, { "epoch": 0.7136766327086295, "grad_norm": 0.9283393621444702, "learning_rate": 6.000068958784815e-06, "loss": 0.0853, "step": 40299 }, { "epoch": 0.7136943422456579, "grad_norm": 0.5366060733795166, "learning_rate": 5.9993806789441445e-06, "loss": 0.0602, "step": 40300 }, { "epoch": 0.7137120517826863, "grad_norm": 0.5588864684104919, "learning_rate": 5.998692428714595e-06, "loss": 0.0547, "step": 40301 }, { "epoch": 0.7137297613197147, "grad_norm": 0.6362372636795044, "learning_rate": 5.998004208098439e-06, "loss": 0.0474, "step": 40302 }, { "epoch": 0.7137474708567432, "grad_norm": 0.5697488188743591, "learning_rate": 5.9973160170979375e-06, "loss": 0.0443, "step": 40303 }, { "epoch": 0.7137651803937716, "grad_norm": 0.3997015058994293, "learning_rate": 5.996627855715356e-06, "loss": 0.053, "step": 40304 }, { "epoch": 0.7137828899308, "grad_norm": 0.5133814811706543, "learning_rate": 5.995939723952959e-06, "loss": 0.1034, "step": 40305 }, { "epoch": 0.7138005994678284, "grad_norm": 0.5330526232719421, "learning_rate": 5.995251621813008e-06, "loss": 0.045, "step": 40306 }, { "epoch": 0.7138183090048569, "grad_norm": 0.39737075567245483, "learning_rate": 5.994563549297775e-06, "loss": 0.0507, "step": 40307 }, { "epoch": 0.7138360185418853, "grad_norm": 0.6414100527763367, "learning_rate": 5.993875506409511e-06, "loss": 0.0715, "step": 40308 }, { "epoch": 0.7138537280789137, "grad_norm": 0.7987813949584961, "learning_rate": 5.993187493150486e-06, "loss": 0.0784, "step": 40309 }, { "epoch": 0.7138714376159421, "grad_norm": 1.2209854125976562, "learning_rate": 5.992499509522964e-06, "loss": 0.091, "step": 40310 }, { "epoch": 0.7138891471529706, "grad_norm": 0.6851532459259033, "learning_rate": 5.991811555529212e-06, "loss": 0.0644, "step": 40311 }, { "epoch": 0.713906856689999, "grad_norm": 0.9913610219955444, "learning_rate": 5.9911236311714835e-06, "loss": 0.0804, "step": 40312 }, { "epoch": 0.7139245662270274, "grad_norm": 0.39714476466178894, "learning_rate": 5.9904357364520415e-06, "loss": 0.0706, "step": 40313 }, { "epoch": 0.7139422757640559, "grad_norm": 0.6322030425071716, "learning_rate": 5.989747871373166e-06, "loss": 0.0334, "step": 40314 }, { "epoch": 0.7139599853010843, "grad_norm": 0.5817548036575317, "learning_rate": 5.989060035937103e-06, "loss": 0.0486, "step": 40315 }, { "epoch": 0.7139776948381127, "grad_norm": 0.7141858339309692, "learning_rate": 5.988372230146121e-06, "loss": 0.0681, "step": 40316 }, { "epoch": 0.7139954043751411, "grad_norm": 0.5519543886184692, "learning_rate": 5.9876844540024835e-06, "loss": 0.0685, "step": 40317 }, { "epoch": 0.7140131139121696, "grad_norm": 0.4838762581348419, "learning_rate": 5.986996707508457e-06, "loss": 0.0705, "step": 40318 }, { "epoch": 0.714030823449198, "grad_norm": 0.7108036875724792, "learning_rate": 5.986308990666295e-06, "loss": 0.0754, "step": 40319 }, { "epoch": 0.7140485329862264, "grad_norm": 0.7014521360397339, "learning_rate": 5.985621303478266e-06, "loss": 0.0812, "step": 40320 }, { "epoch": 0.7140662425232548, "grad_norm": 0.37789633870124817, "learning_rate": 5.984933645946634e-06, "loss": 0.053, "step": 40321 }, { "epoch": 0.7140839520602833, "grad_norm": 0.2985656261444092, "learning_rate": 5.984246018073649e-06, "loss": 0.0667, "step": 40322 }, { "epoch": 0.7141016615973117, "grad_norm": 0.6358208656311035, "learning_rate": 5.983558419861589e-06, "loss": 0.0634, "step": 40323 }, { "epoch": 0.7141193711343401, "grad_norm": 0.5179038047790527, "learning_rate": 5.982870851312708e-06, "loss": 0.0476, "step": 40324 }, { "epoch": 0.7141370806713685, "grad_norm": 0.33245977759361267, "learning_rate": 5.982183312429275e-06, "loss": 0.0552, "step": 40325 }, { "epoch": 0.714154790208397, "grad_norm": 0.5355684757232666, "learning_rate": 5.981495803213544e-06, "loss": 0.0789, "step": 40326 }, { "epoch": 0.7141724997454254, "grad_norm": 0.45819365978240967, "learning_rate": 5.980808323667778e-06, "loss": 0.0704, "step": 40327 }, { "epoch": 0.7141902092824538, "grad_norm": 0.596713662147522, "learning_rate": 5.9801208737942394e-06, "loss": 0.0579, "step": 40328 }, { "epoch": 0.7142079188194823, "grad_norm": 0.4829980432987213, "learning_rate": 5.979433453595198e-06, "loss": 0.0596, "step": 40329 }, { "epoch": 0.7142256283565107, "grad_norm": 0.6310552358627319, "learning_rate": 5.9787460630729015e-06, "loss": 0.07, "step": 40330 }, { "epoch": 0.7142433378935391, "grad_norm": 0.3911462724208832, "learning_rate": 5.978058702229619e-06, "loss": 0.056, "step": 40331 }, { "epoch": 0.7142610474305675, "grad_norm": 0.6847537159919739, "learning_rate": 5.977371371067611e-06, "loss": 0.0559, "step": 40332 }, { "epoch": 0.714278756967596, "grad_norm": 0.36860865354537964, "learning_rate": 5.976684069589137e-06, "loss": 0.0623, "step": 40333 }, { "epoch": 0.7142964665046244, "grad_norm": 0.44252318143844604, "learning_rate": 5.975996797796461e-06, "loss": 0.0598, "step": 40334 }, { "epoch": 0.7143141760416528, "grad_norm": 0.6508093476295471, "learning_rate": 5.975309555691843e-06, "loss": 0.0558, "step": 40335 }, { "epoch": 0.7143318855786812, "grad_norm": 0.3416268825531006, "learning_rate": 5.974622343277549e-06, "loss": 0.0518, "step": 40336 }, { "epoch": 0.7143495951157097, "grad_norm": 0.9905411005020142, "learning_rate": 5.973935160555828e-06, "loss": 0.0703, "step": 40337 }, { "epoch": 0.7143673046527381, "grad_norm": 0.7292607426643372, "learning_rate": 5.973248007528948e-06, "loss": 0.0789, "step": 40338 }, { "epoch": 0.7143850141897665, "grad_norm": 0.6074010729789734, "learning_rate": 5.972560884199176e-06, "loss": 0.0643, "step": 40339 }, { "epoch": 0.7144027237267949, "grad_norm": 0.41591742634773254, "learning_rate": 5.971873790568759e-06, "loss": 0.0424, "step": 40340 }, { "epoch": 0.7144204332638234, "grad_norm": 0.6738686561584473, "learning_rate": 5.971186726639964e-06, "loss": 0.0612, "step": 40341 }, { "epoch": 0.7144381428008518, "grad_norm": 0.8509225845336914, "learning_rate": 5.970499692415046e-06, "loss": 0.0832, "step": 40342 }, { "epoch": 0.7144558523378802, "grad_norm": 0.6619759202003479, "learning_rate": 5.969812687896281e-06, "loss": 0.0735, "step": 40343 }, { "epoch": 0.7144735618749087, "grad_norm": 0.5483098030090332, "learning_rate": 5.969125713085915e-06, "loss": 0.081, "step": 40344 }, { "epoch": 0.7144912714119371, "grad_norm": 0.7144743800163269, "learning_rate": 5.9684387679862115e-06, "loss": 0.0652, "step": 40345 }, { "epoch": 0.7145089809489655, "grad_norm": 0.48916155099868774, "learning_rate": 5.967751852599436e-06, "loss": 0.0405, "step": 40346 }, { "epoch": 0.7145266904859939, "grad_norm": 0.4340716302394867, "learning_rate": 5.967064966927838e-06, "loss": 0.0587, "step": 40347 }, { "epoch": 0.7145444000230224, "grad_norm": 0.9546603560447693, "learning_rate": 5.966378110973682e-06, "loss": 0.079, "step": 40348 }, { "epoch": 0.7145621095600508, "grad_norm": 0.45516371726989746, "learning_rate": 5.965691284739229e-06, "loss": 0.0712, "step": 40349 }, { "epoch": 0.7145798190970792, "grad_norm": 0.6403424739837646, "learning_rate": 5.965004488226745e-06, "loss": 0.0567, "step": 40350 }, { "epoch": 0.7145975286341076, "grad_norm": 0.6024411916732788, "learning_rate": 5.964317721438468e-06, "loss": 0.0683, "step": 40351 }, { "epoch": 0.7146152381711361, "grad_norm": 0.6919263601303101, "learning_rate": 5.9636309843766795e-06, "loss": 0.0507, "step": 40352 }, { "epoch": 0.7146329477081645, "grad_norm": 0.47030481696128845, "learning_rate": 5.962944277043637e-06, "loss": 0.0549, "step": 40353 }, { "epoch": 0.7146506572451929, "grad_norm": 0.6146293878555298, "learning_rate": 5.962257599441588e-06, "loss": 0.0476, "step": 40354 }, { "epoch": 0.7146683667822213, "grad_norm": 0.8314221501350403, "learning_rate": 5.961570951572795e-06, "loss": 0.0576, "step": 40355 }, { "epoch": 0.7146860763192499, "grad_norm": 0.4602409601211548, "learning_rate": 5.960884333439522e-06, "loss": 0.0693, "step": 40356 }, { "epoch": 0.7147037858562783, "grad_norm": 0.4810875654220581, "learning_rate": 5.9601977450440295e-06, "loss": 0.0444, "step": 40357 }, { "epoch": 0.7147214953933066, "grad_norm": 0.3109179139137268, "learning_rate": 5.959511186388567e-06, "loss": 0.0494, "step": 40358 }, { "epoch": 0.7147392049303352, "grad_norm": 0.5555874705314636, "learning_rate": 5.9588246574753985e-06, "loss": 0.0918, "step": 40359 }, { "epoch": 0.7147569144673636, "grad_norm": 0.6123611330986023, "learning_rate": 5.958138158306784e-06, "loss": 0.0321, "step": 40360 }, { "epoch": 0.714774624004392, "grad_norm": 0.3746585249900818, "learning_rate": 5.9574516888849785e-06, "loss": 0.0498, "step": 40361 }, { "epoch": 0.7147923335414204, "grad_norm": 0.8146386742591858, "learning_rate": 5.956765249212242e-06, "loss": 0.0641, "step": 40362 }, { "epoch": 0.7148100430784489, "grad_norm": 0.4681333005428314, "learning_rate": 5.956078839290833e-06, "loss": 0.0361, "step": 40363 }, { "epoch": 0.7148277526154773, "grad_norm": 0.6843329071998596, "learning_rate": 5.955392459123017e-06, "loss": 0.0639, "step": 40364 }, { "epoch": 0.7148454621525057, "grad_norm": 0.5706737637519836, "learning_rate": 5.954706108711039e-06, "loss": 0.0731, "step": 40365 }, { "epoch": 0.7148631716895341, "grad_norm": 0.3815767168998718, "learning_rate": 5.9540197880571615e-06, "loss": 0.0648, "step": 40366 }, { "epoch": 0.7148808812265626, "grad_norm": 0.3852214813232422, "learning_rate": 5.953333497163645e-06, "loss": 0.0364, "step": 40367 }, { "epoch": 0.714898590763591, "grad_norm": 0.6230079531669617, "learning_rate": 5.952647236032752e-06, "loss": 0.0473, "step": 40368 }, { "epoch": 0.7149163003006194, "grad_norm": 0.2818160355091095, "learning_rate": 5.95196100466673e-06, "loss": 0.0711, "step": 40369 }, { "epoch": 0.7149340098376478, "grad_norm": 0.7039828300476074, "learning_rate": 5.951274803067839e-06, "loss": 0.0604, "step": 40370 }, { "epoch": 0.7149517193746763, "grad_norm": 0.4163992702960968, "learning_rate": 5.950588631238339e-06, "loss": 0.0519, "step": 40371 }, { "epoch": 0.7149694289117047, "grad_norm": 0.7175050377845764, "learning_rate": 5.949902489180489e-06, "loss": 0.0845, "step": 40372 }, { "epoch": 0.7149871384487331, "grad_norm": 0.7436787486076355, "learning_rate": 5.949216376896543e-06, "loss": 0.0438, "step": 40373 }, { "epoch": 0.7150048479857616, "grad_norm": 0.3114902079105377, "learning_rate": 5.94853029438876e-06, "loss": 0.0542, "step": 40374 }, { "epoch": 0.71502255752279, "grad_norm": 0.5561363101005554, "learning_rate": 5.947844241659402e-06, "loss": 0.0706, "step": 40375 }, { "epoch": 0.7150402670598184, "grad_norm": 0.488937646150589, "learning_rate": 5.947158218710716e-06, "loss": 0.0677, "step": 40376 }, { "epoch": 0.7150579765968468, "grad_norm": 0.7295730113983154, "learning_rate": 5.946472225544964e-06, "loss": 0.0654, "step": 40377 }, { "epoch": 0.7150756861338753, "grad_norm": 0.501979410648346, "learning_rate": 5.945786262164408e-06, "loss": 0.0491, "step": 40378 }, { "epoch": 0.7150933956709037, "grad_norm": 0.6733985543251038, "learning_rate": 5.945100328571293e-06, "loss": 0.0533, "step": 40379 }, { "epoch": 0.7151111052079321, "grad_norm": 0.5235415101051331, "learning_rate": 5.944414424767878e-06, "loss": 0.0584, "step": 40380 }, { "epoch": 0.7151288147449605, "grad_norm": 0.5999881029129028, "learning_rate": 5.9437285507564275e-06, "loss": 0.0352, "step": 40381 }, { "epoch": 0.715146524281989, "grad_norm": 0.6714163422584534, "learning_rate": 5.9430427065392e-06, "loss": 0.0713, "step": 40382 }, { "epoch": 0.7151642338190174, "grad_norm": 0.45606565475463867, "learning_rate": 5.9423568921184414e-06, "loss": 0.0433, "step": 40383 }, { "epoch": 0.7151819433560458, "grad_norm": 0.5119739770889282, "learning_rate": 5.941671107496412e-06, "loss": 0.0609, "step": 40384 }, { "epoch": 0.7151996528930742, "grad_norm": 0.9661956429481506, "learning_rate": 5.940985352675374e-06, "loss": 0.0378, "step": 40385 }, { "epoch": 0.7152173624301027, "grad_norm": 1.114540457725525, "learning_rate": 5.940299627657572e-06, "loss": 0.0721, "step": 40386 }, { "epoch": 0.7152350719671311, "grad_norm": 0.3380639851093292, "learning_rate": 5.939613932445268e-06, "loss": 0.0442, "step": 40387 }, { "epoch": 0.7152527815041595, "grad_norm": 0.3623448610305786, "learning_rate": 5.938928267040718e-06, "loss": 0.0469, "step": 40388 }, { "epoch": 0.715270491041188, "grad_norm": 0.6376964449882507, "learning_rate": 5.938242631446177e-06, "loss": 0.0676, "step": 40389 }, { "epoch": 0.7152882005782164, "grad_norm": 0.7428832650184631, "learning_rate": 5.937557025663902e-06, "loss": 0.0565, "step": 40390 }, { "epoch": 0.7153059101152448, "grad_norm": 0.49892401695251465, "learning_rate": 5.9368714496961475e-06, "loss": 0.0426, "step": 40391 }, { "epoch": 0.7153236196522732, "grad_norm": 0.5457420945167542, "learning_rate": 5.9361859035451735e-06, "loss": 0.0703, "step": 40392 }, { "epoch": 0.7153413291893017, "grad_norm": 0.7841532230377197, "learning_rate": 5.935500387213228e-06, "loss": 0.0795, "step": 40393 }, { "epoch": 0.7153590387263301, "grad_norm": 0.8594078421592712, "learning_rate": 5.934814900702568e-06, "loss": 0.0649, "step": 40394 }, { "epoch": 0.7153767482633585, "grad_norm": 0.46035468578338623, "learning_rate": 5.934129444015451e-06, "loss": 0.0666, "step": 40395 }, { "epoch": 0.7153944578003869, "grad_norm": 0.7590043544769287, "learning_rate": 5.933444017154135e-06, "loss": 0.069, "step": 40396 }, { "epoch": 0.7154121673374154, "grad_norm": 0.5630072355270386, "learning_rate": 5.932758620120867e-06, "loss": 0.0322, "step": 40397 }, { "epoch": 0.7154298768744438, "grad_norm": 0.9486198425292969, "learning_rate": 5.932073252917906e-06, "loss": 0.0786, "step": 40398 }, { "epoch": 0.7154475864114722, "grad_norm": 0.8407528400421143, "learning_rate": 5.931387915547507e-06, "loss": 0.0816, "step": 40399 }, { "epoch": 0.7154652959485006, "grad_norm": 0.6625473499298096, "learning_rate": 5.930702608011923e-06, "loss": 0.0469, "step": 40400 }, { "epoch": 0.7154830054855291, "grad_norm": 0.5566555261611938, "learning_rate": 5.930017330313411e-06, "loss": 0.0532, "step": 40401 }, { "epoch": 0.7155007150225575, "grad_norm": 0.6633018851280212, "learning_rate": 5.929332082454225e-06, "loss": 0.0474, "step": 40402 }, { "epoch": 0.7155184245595859, "grad_norm": 0.4407126307487488, "learning_rate": 5.9286468644366235e-06, "loss": 0.082, "step": 40403 }, { "epoch": 0.7155361340966144, "grad_norm": 0.6939283609390259, "learning_rate": 5.927961676262853e-06, "loss": 0.0595, "step": 40404 }, { "epoch": 0.7155538436336428, "grad_norm": 0.5037572979927063, "learning_rate": 5.9272765179351695e-06, "loss": 0.0706, "step": 40405 }, { "epoch": 0.7155715531706712, "grad_norm": 0.8933743834495544, "learning_rate": 5.926591389455828e-06, "loss": 0.0931, "step": 40406 }, { "epoch": 0.7155892627076996, "grad_norm": 0.6395529508590698, "learning_rate": 5.925906290827089e-06, "loss": 0.067, "step": 40407 }, { "epoch": 0.7156069722447281, "grad_norm": 0.4182654321193695, "learning_rate": 5.925221222051196e-06, "loss": 0.0307, "step": 40408 }, { "epoch": 0.7156246817817565, "grad_norm": 0.5305081605911255, "learning_rate": 5.924536183130401e-06, "loss": 0.052, "step": 40409 }, { "epoch": 0.7156423913187849, "grad_norm": 0.6959020495414734, "learning_rate": 5.923851174066975e-06, "loss": 0.0523, "step": 40410 }, { "epoch": 0.7156601008558133, "grad_norm": 0.48787161707878113, "learning_rate": 5.923166194863156e-06, "loss": 0.0686, "step": 40411 }, { "epoch": 0.7156778103928418, "grad_norm": 0.8099263906478882, "learning_rate": 5.922481245521201e-06, "loss": 0.0456, "step": 40412 }, { "epoch": 0.7156955199298702, "grad_norm": 0.2847450077533722, "learning_rate": 5.921796326043366e-06, "loss": 0.0368, "step": 40413 }, { "epoch": 0.7157132294668986, "grad_norm": 0.5277274250984192, "learning_rate": 5.921111436431907e-06, "loss": 0.058, "step": 40414 }, { "epoch": 0.715730939003927, "grad_norm": 0.6517887115478516, "learning_rate": 5.920426576689068e-06, "loss": 0.0545, "step": 40415 }, { "epoch": 0.7157486485409555, "grad_norm": 0.19499389827251434, "learning_rate": 5.919741746817107e-06, "loss": 0.0488, "step": 40416 }, { "epoch": 0.7157663580779839, "grad_norm": 0.9343952536582947, "learning_rate": 5.919056946818277e-06, "loss": 0.0598, "step": 40417 }, { "epoch": 0.7157840676150123, "grad_norm": 0.3999394178390503, "learning_rate": 5.918372176694832e-06, "loss": 0.0402, "step": 40418 }, { "epoch": 0.7158017771520409, "grad_norm": 0.6230544447898865, "learning_rate": 5.917687436449023e-06, "loss": 0.0705, "step": 40419 }, { "epoch": 0.7158194866890693, "grad_norm": 0.3419246971607208, "learning_rate": 5.917002726083104e-06, "loss": 0.0412, "step": 40420 }, { "epoch": 0.7158371962260976, "grad_norm": 0.41564807295799255, "learning_rate": 5.916318045599333e-06, "loss": 0.0331, "step": 40421 }, { "epoch": 0.715854905763126, "grad_norm": 0.5548626780509949, "learning_rate": 5.9156333949999524e-06, "loss": 0.0609, "step": 40422 }, { "epoch": 0.7158726153001546, "grad_norm": 0.6543362140655518, "learning_rate": 5.914948774287218e-06, "loss": 0.053, "step": 40423 }, { "epoch": 0.715890324837183, "grad_norm": 1.0086469650268555, "learning_rate": 5.9142641834633885e-06, "loss": 0.0558, "step": 40424 }, { "epoch": 0.7159080343742114, "grad_norm": 0.5569882392883301, "learning_rate": 5.9135796225307055e-06, "loss": 0.0593, "step": 40425 }, { "epoch": 0.7159257439112398, "grad_norm": 0.585662841796875, "learning_rate": 5.912895091491428e-06, "loss": 0.0797, "step": 40426 }, { "epoch": 0.7159434534482683, "grad_norm": 0.3919411897659302, "learning_rate": 5.912210590347805e-06, "loss": 0.0573, "step": 40427 }, { "epoch": 0.7159611629852967, "grad_norm": 0.6197004914283752, "learning_rate": 5.91152611910209e-06, "loss": 0.0679, "step": 40428 }, { "epoch": 0.7159788725223251, "grad_norm": 0.6572316884994507, "learning_rate": 5.910841677756535e-06, "loss": 0.0475, "step": 40429 }, { "epoch": 0.7159965820593535, "grad_norm": 0.5818679928779602, "learning_rate": 5.910157266313392e-06, "loss": 0.0661, "step": 40430 }, { "epoch": 0.716014291596382, "grad_norm": 0.5286624431610107, "learning_rate": 5.909472884774912e-06, "loss": 0.0372, "step": 40431 }, { "epoch": 0.7160320011334104, "grad_norm": 0.6678402423858643, "learning_rate": 5.908788533143352e-06, "loss": 0.0455, "step": 40432 }, { "epoch": 0.7160497106704388, "grad_norm": 0.7471815347671509, "learning_rate": 5.908104211420953e-06, "loss": 0.0757, "step": 40433 }, { "epoch": 0.7160674202074673, "grad_norm": 0.7085584998130798, "learning_rate": 5.907419919609972e-06, "loss": 0.0922, "step": 40434 }, { "epoch": 0.7160851297444957, "grad_norm": 0.4130822420120239, "learning_rate": 5.9067356577126645e-06, "loss": 0.0724, "step": 40435 }, { "epoch": 0.7161028392815241, "grad_norm": 0.6268396973609924, "learning_rate": 5.906051425731273e-06, "loss": 0.073, "step": 40436 }, { "epoch": 0.7161205488185525, "grad_norm": 0.5743018388748169, "learning_rate": 5.90536722366805e-06, "loss": 0.0559, "step": 40437 }, { "epoch": 0.716138258355581, "grad_norm": 0.7228140830993652, "learning_rate": 5.904683051525245e-06, "loss": 0.0672, "step": 40438 }, { "epoch": 0.7161559678926094, "grad_norm": 0.6220681071281433, "learning_rate": 5.903998909305123e-06, "loss": 0.0677, "step": 40439 }, { "epoch": 0.7161736774296378, "grad_norm": 0.5341103076934814, "learning_rate": 5.9033147970099205e-06, "loss": 0.0415, "step": 40440 }, { "epoch": 0.7161913869666662, "grad_norm": 0.4805286228656769, "learning_rate": 5.902630714641892e-06, "loss": 0.0463, "step": 40441 }, { "epoch": 0.7162090965036947, "grad_norm": 0.20666025578975677, "learning_rate": 5.9019466622032916e-06, "loss": 0.0418, "step": 40442 }, { "epoch": 0.7162268060407231, "grad_norm": 0.603794515132904, "learning_rate": 5.901262639696363e-06, "loss": 0.0809, "step": 40443 }, { "epoch": 0.7162445155777515, "grad_norm": 0.8025416135787964, "learning_rate": 5.90057864712336e-06, "loss": 0.0676, "step": 40444 }, { "epoch": 0.7162622251147799, "grad_norm": 0.6125115156173706, "learning_rate": 5.899894684486531e-06, "loss": 0.071, "step": 40445 }, { "epoch": 0.7162799346518084, "grad_norm": 0.5262484550476074, "learning_rate": 5.899210751788131e-06, "loss": 0.0576, "step": 40446 }, { "epoch": 0.7162976441888368, "grad_norm": 0.7837163209915161, "learning_rate": 5.898526849030406e-06, "loss": 0.0442, "step": 40447 }, { "epoch": 0.7163153537258652, "grad_norm": 0.5351607799530029, "learning_rate": 5.897842976215607e-06, "loss": 0.0585, "step": 40448 }, { "epoch": 0.7163330632628937, "grad_norm": 0.6344600915908813, "learning_rate": 5.897159133345989e-06, "loss": 0.0649, "step": 40449 }, { "epoch": 0.7163507727999221, "grad_norm": 0.8197574615478516, "learning_rate": 5.8964753204237905e-06, "loss": 0.0726, "step": 40450 }, { "epoch": 0.7163684823369505, "grad_norm": 0.41648203134536743, "learning_rate": 5.895791537451269e-06, "loss": 0.0358, "step": 40451 }, { "epoch": 0.7163861918739789, "grad_norm": 0.49573177099227905, "learning_rate": 5.895107784430673e-06, "loss": 0.0659, "step": 40452 }, { "epoch": 0.7164039014110074, "grad_norm": 0.47815337777137756, "learning_rate": 5.8944240613642544e-06, "loss": 0.0764, "step": 40453 }, { "epoch": 0.7164216109480358, "grad_norm": 0.6881684064865112, "learning_rate": 5.893740368254256e-06, "loss": 0.0585, "step": 40454 }, { "epoch": 0.7164393204850642, "grad_norm": 0.6366480588912964, "learning_rate": 5.89305670510293e-06, "loss": 0.0486, "step": 40455 }, { "epoch": 0.7164570300220926, "grad_norm": 0.8076040744781494, "learning_rate": 5.892373071912527e-06, "loss": 0.0728, "step": 40456 }, { "epoch": 0.7164747395591211, "grad_norm": 0.6685663461685181, "learning_rate": 5.891689468685295e-06, "loss": 0.0477, "step": 40457 }, { "epoch": 0.7164924490961495, "grad_norm": 0.6655070185661316, "learning_rate": 5.891005895423483e-06, "loss": 0.0915, "step": 40458 }, { "epoch": 0.7165101586331779, "grad_norm": 0.48683080077171326, "learning_rate": 5.890322352129342e-06, "loss": 0.0525, "step": 40459 }, { "epoch": 0.7165278681702063, "grad_norm": 0.4769587814807892, "learning_rate": 5.889638838805123e-06, "loss": 0.0596, "step": 40460 }, { "epoch": 0.7165455777072348, "grad_norm": 0.30888086557388306, "learning_rate": 5.888955355453067e-06, "loss": 0.0666, "step": 40461 }, { "epoch": 0.7165632872442632, "grad_norm": 0.6448798179626465, "learning_rate": 5.888271902075425e-06, "loss": 0.0529, "step": 40462 }, { "epoch": 0.7165809967812916, "grad_norm": 0.7709455490112305, "learning_rate": 5.887588478674447e-06, "loss": 0.049, "step": 40463 }, { "epoch": 0.7165987063183201, "grad_norm": 0.7554251551628113, "learning_rate": 5.886905085252386e-06, "loss": 0.0717, "step": 40464 }, { "epoch": 0.7166164158553485, "grad_norm": 0.3861863613128662, "learning_rate": 5.88622172181148e-06, "loss": 0.0396, "step": 40465 }, { "epoch": 0.7166341253923769, "grad_norm": 0.5105632543563843, "learning_rate": 5.885538388353984e-06, "loss": 0.0603, "step": 40466 }, { "epoch": 0.7166518349294053, "grad_norm": 0.551652729511261, "learning_rate": 5.8848550848821445e-06, "loss": 0.0379, "step": 40467 }, { "epoch": 0.7166695444664338, "grad_norm": 0.5939756631851196, "learning_rate": 5.88417181139821e-06, "loss": 0.0591, "step": 40468 }, { "epoch": 0.7166872540034622, "grad_norm": 0.4142208397388458, "learning_rate": 5.883488567904428e-06, "loss": 0.0594, "step": 40469 }, { "epoch": 0.7167049635404906, "grad_norm": 0.332950234413147, "learning_rate": 5.882805354403047e-06, "loss": 0.0397, "step": 40470 }, { "epoch": 0.716722673077519, "grad_norm": 0.45926231145858765, "learning_rate": 5.882122170896318e-06, "loss": 0.0536, "step": 40471 }, { "epoch": 0.7167403826145475, "grad_norm": 0.5238811373710632, "learning_rate": 5.88143901738648e-06, "loss": 0.0737, "step": 40472 }, { "epoch": 0.7167580921515759, "grad_norm": 0.5118356347084045, "learning_rate": 5.880755893875786e-06, "loss": 0.0511, "step": 40473 }, { "epoch": 0.7167758016886043, "grad_norm": 0.7473022937774658, "learning_rate": 5.880072800366487e-06, "loss": 0.0505, "step": 40474 }, { "epoch": 0.7167935112256327, "grad_norm": 0.9434260725975037, "learning_rate": 5.879389736860816e-06, "loss": 0.0579, "step": 40475 }, { "epoch": 0.7168112207626612, "grad_norm": 0.4605982303619385, "learning_rate": 5.878706703361036e-06, "loss": 0.0608, "step": 40476 }, { "epoch": 0.7168289302996896, "grad_norm": 0.74832683801651, "learning_rate": 5.8780236998693895e-06, "loss": 0.0659, "step": 40477 }, { "epoch": 0.716846639836718, "grad_norm": 0.3493571877479553, "learning_rate": 5.877340726388126e-06, "loss": 0.0599, "step": 40478 }, { "epoch": 0.7168643493737465, "grad_norm": 0.6827653646469116, "learning_rate": 5.876657782919484e-06, "loss": 0.0715, "step": 40479 }, { "epoch": 0.7168820589107749, "grad_norm": 0.6488761901855469, "learning_rate": 5.875974869465716e-06, "loss": 0.0759, "step": 40480 }, { "epoch": 0.7168997684478033, "grad_norm": 0.8214501142501831, "learning_rate": 5.875291986029071e-06, "loss": 0.0677, "step": 40481 }, { "epoch": 0.7169174779848317, "grad_norm": 0.593208372592926, "learning_rate": 5.874609132611788e-06, "loss": 0.0451, "step": 40482 }, { "epoch": 0.7169351875218603, "grad_norm": 0.5966905355453491, "learning_rate": 5.873926309216119e-06, "loss": 0.0911, "step": 40483 }, { "epoch": 0.7169528970588886, "grad_norm": 0.5730913877487183, "learning_rate": 5.873243515844308e-06, "loss": 0.0559, "step": 40484 }, { "epoch": 0.716970606595917, "grad_norm": 0.6394588947296143, "learning_rate": 5.8725607524986045e-06, "loss": 0.0665, "step": 40485 }, { "epoch": 0.7169883161329454, "grad_norm": 0.7662496566772461, "learning_rate": 5.871878019181252e-06, "loss": 0.0614, "step": 40486 }, { "epoch": 0.717006025669974, "grad_norm": 0.7936471700668335, "learning_rate": 5.871195315894497e-06, "loss": 0.0701, "step": 40487 }, { "epoch": 0.7170237352070024, "grad_norm": 0.5635308623313904, "learning_rate": 5.870512642640592e-06, "loss": 0.0583, "step": 40488 }, { "epoch": 0.7170414447440308, "grad_norm": 1.178260087966919, "learning_rate": 5.869829999421772e-06, "loss": 0.0968, "step": 40489 }, { "epoch": 0.7170591542810592, "grad_norm": 0.5451853275299072, "learning_rate": 5.869147386240287e-06, "loss": 0.0858, "step": 40490 }, { "epoch": 0.7170768638180877, "grad_norm": 0.4320172071456909, "learning_rate": 5.868464803098383e-06, "loss": 0.0576, "step": 40491 }, { "epoch": 0.7170945733551161, "grad_norm": 0.7963899970054626, "learning_rate": 5.867782249998313e-06, "loss": 0.0677, "step": 40492 }, { "epoch": 0.7171122828921445, "grad_norm": 0.384508341550827, "learning_rate": 5.867099726942311e-06, "loss": 0.0573, "step": 40493 }, { "epoch": 0.717129992429173, "grad_norm": 0.35326871275901794, "learning_rate": 5.866417233932625e-06, "loss": 0.0661, "step": 40494 }, { "epoch": 0.7171477019662014, "grad_norm": 0.48222121596336365, "learning_rate": 5.865734770971504e-06, "loss": 0.0548, "step": 40495 }, { "epoch": 0.7171654115032298, "grad_norm": 0.5536997318267822, "learning_rate": 5.865052338061192e-06, "loss": 0.0467, "step": 40496 }, { "epoch": 0.7171831210402582, "grad_norm": 0.8445982933044434, "learning_rate": 5.864369935203932e-06, "loss": 0.072, "step": 40497 }, { "epoch": 0.7172008305772867, "grad_norm": 0.6879403591156006, "learning_rate": 5.863687562401972e-06, "loss": 0.0659, "step": 40498 }, { "epoch": 0.7172185401143151, "grad_norm": 0.48003387451171875, "learning_rate": 5.863005219657562e-06, "loss": 0.059, "step": 40499 }, { "epoch": 0.7172362496513435, "grad_norm": 0.5206860899925232, "learning_rate": 5.862322906972935e-06, "loss": 0.0465, "step": 40500 }, { "epoch": 0.7172539591883719, "grad_norm": 0.5427981615066528, "learning_rate": 5.86164062435034e-06, "loss": 0.0565, "step": 40501 }, { "epoch": 0.7172716687254004, "grad_norm": 0.41523486375808716, "learning_rate": 5.860958371792025e-06, "loss": 0.0479, "step": 40502 }, { "epoch": 0.7172893782624288, "grad_norm": 0.38806334137916565, "learning_rate": 5.860276149300236e-06, "loss": 0.0388, "step": 40503 }, { "epoch": 0.7173070877994572, "grad_norm": 0.44551271200180054, "learning_rate": 5.8595939568772046e-06, "loss": 0.0712, "step": 40504 }, { "epoch": 0.7173247973364856, "grad_norm": 0.46576398611068726, "learning_rate": 5.85891179452519e-06, "loss": 0.0349, "step": 40505 }, { "epoch": 0.7173425068735141, "grad_norm": 0.9130329489707947, "learning_rate": 5.858229662246437e-06, "loss": 0.0708, "step": 40506 }, { "epoch": 0.7173602164105425, "grad_norm": 0.8201776742935181, "learning_rate": 5.857547560043177e-06, "loss": 0.0653, "step": 40507 }, { "epoch": 0.7173779259475709, "grad_norm": 0.3561975359916687, "learning_rate": 5.856865487917661e-06, "loss": 0.0724, "step": 40508 }, { "epoch": 0.7173956354845994, "grad_norm": 0.503483772277832, "learning_rate": 5.856183445872132e-06, "loss": 0.0393, "step": 40509 }, { "epoch": 0.7174133450216278, "grad_norm": 0.6021886467933655, "learning_rate": 5.8555014339088415e-06, "loss": 0.0566, "step": 40510 }, { "epoch": 0.7174310545586562, "grad_norm": 1.1065919399261475, "learning_rate": 5.854819452030021e-06, "loss": 0.0958, "step": 40511 }, { "epoch": 0.7174487640956846, "grad_norm": 0.4173133969306946, "learning_rate": 5.8541375002379185e-06, "loss": 0.0586, "step": 40512 }, { "epoch": 0.7174664736327131, "grad_norm": 0.5761383175849915, "learning_rate": 5.8534555785347806e-06, "loss": 0.0624, "step": 40513 }, { "epoch": 0.7174841831697415, "grad_norm": 0.5750037431716919, "learning_rate": 5.852773686922846e-06, "loss": 0.0497, "step": 40514 }, { "epoch": 0.7175018927067699, "grad_norm": 0.8641444444656372, "learning_rate": 5.852091825404362e-06, "loss": 0.0808, "step": 40515 }, { "epoch": 0.7175196022437983, "grad_norm": 0.4053552448749542, "learning_rate": 5.851409993981572e-06, "loss": 0.0453, "step": 40516 }, { "epoch": 0.7175373117808268, "grad_norm": 0.5502082705497742, "learning_rate": 5.850728192656721e-06, "loss": 0.0542, "step": 40517 }, { "epoch": 0.7175550213178552, "grad_norm": 0.8846040368080139, "learning_rate": 5.850046421432044e-06, "loss": 0.0724, "step": 40518 }, { "epoch": 0.7175727308548836, "grad_norm": 0.5485401749610901, "learning_rate": 5.849364680309789e-06, "loss": 0.0723, "step": 40519 }, { "epoch": 0.717590440391912, "grad_norm": 0.8227723240852356, "learning_rate": 5.848682969292204e-06, "loss": 0.0892, "step": 40520 }, { "epoch": 0.7176081499289405, "grad_norm": 0.5713862180709839, "learning_rate": 5.8480012883815225e-06, "loss": 0.0822, "step": 40521 }, { "epoch": 0.7176258594659689, "grad_norm": 0.40872853994369507, "learning_rate": 5.84731963757999e-06, "loss": 0.0318, "step": 40522 }, { "epoch": 0.7176435690029973, "grad_norm": 0.6787484884262085, "learning_rate": 5.8466380168898495e-06, "loss": 0.0555, "step": 40523 }, { "epoch": 0.7176612785400258, "grad_norm": 0.6925739049911499, "learning_rate": 5.845956426313345e-06, "loss": 0.0641, "step": 40524 }, { "epoch": 0.7176789880770542, "grad_norm": 0.8756033778190613, "learning_rate": 5.845274865852718e-06, "loss": 0.0845, "step": 40525 }, { "epoch": 0.7176966976140826, "grad_norm": 0.8522506356239319, "learning_rate": 5.84459333551021e-06, "loss": 0.0736, "step": 40526 }, { "epoch": 0.717714407151111, "grad_norm": 0.4034474492073059, "learning_rate": 5.84391183528807e-06, "loss": 0.0627, "step": 40527 }, { "epoch": 0.7177321166881395, "grad_norm": 0.5538249611854553, "learning_rate": 5.843230365188528e-06, "loss": 0.0615, "step": 40528 }, { "epoch": 0.7177498262251679, "grad_norm": 0.6461663246154785, "learning_rate": 5.842548925213832e-06, "loss": 0.0414, "step": 40529 }, { "epoch": 0.7177675357621963, "grad_norm": 0.42588749527931213, "learning_rate": 5.841867515366224e-06, "loss": 0.0518, "step": 40530 }, { "epoch": 0.7177852452992247, "grad_norm": 0.5723208785057068, "learning_rate": 5.841186135647951e-06, "loss": 0.0578, "step": 40531 }, { "epoch": 0.7178029548362532, "grad_norm": 0.5833538174629211, "learning_rate": 5.8405047860612435e-06, "loss": 0.0552, "step": 40532 }, { "epoch": 0.7178206643732816, "grad_norm": 0.43411073088645935, "learning_rate": 5.839823466608343e-06, "loss": 0.0333, "step": 40533 }, { "epoch": 0.71783837391031, "grad_norm": 0.41584810614585876, "learning_rate": 5.8391421772915036e-06, "loss": 0.05, "step": 40534 }, { "epoch": 0.7178560834473385, "grad_norm": 0.6334367394447327, "learning_rate": 5.838460918112965e-06, "loss": 0.0725, "step": 40535 }, { "epoch": 0.7178737929843669, "grad_norm": 0.7219386696815491, "learning_rate": 5.837779689074957e-06, "loss": 0.0678, "step": 40536 }, { "epoch": 0.7178915025213953, "grad_norm": 0.6233456134796143, "learning_rate": 5.837098490179728e-06, "loss": 0.0451, "step": 40537 }, { "epoch": 0.7179092120584237, "grad_norm": 0.2862623333930969, "learning_rate": 5.836417321429524e-06, "loss": 0.0567, "step": 40538 }, { "epoch": 0.7179269215954522, "grad_norm": 0.6676159501075745, "learning_rate": 5.835736182826573e-06, "loss": 0.0719, "step": 40539 }, { "epoch": 0.7179446311324806, "grad_norm": 0.4409745931625366, "learning_rate": 5.835055074373124e-06, "loss": 0.0403, "step": 40540 }, { "epoch": 0.717962340669509, "grad_norm": 0.2857210338115692, "learning_rate": 5.834373996071417e-06, "loss": 0.0491, "step": 40541 }, { "epoch": 0.7179800502065374, "grad_norm": 0.41412490606307983, "learning_rate": 5.833692947923694e-06, "loss": 0.0536, "step": 40542 }, { "epoch": 0.7179977597435659, "grad_norm": 0.6699059009552002, "learning_rate": 5.833011929932191e-06, "loss": 0.0821, "step": 40543 }, { "epoch": 0.7180154692805943, "grad_norm": 0.8149149417877197, "learning_rate": 5.8323309420991535e-06, "loss": 0.0531, "step": 40544 }, { "epoch": 0.7180331788176227, "grad_norm": 0.4132430851459503, "learning_rate": 5.831649984426825e-06, "loss": 0.0488, "step": 40545 }, { "epoch": 0.7180508883546511, "grad_norm": 0.4415239691734314, "learning_rate": 5.830969056917436e-06, "loss": 0.0477, "step": 40546 }, { "epoch": 0.7180685978916796, "grad_norm": 0.5552554130554199, "learning_rate": 5.83028815957323e-06, "loss": 0.0472, "step": 40547 }, { "epoch": 0.718086307428708, "grad_norm": 0.6681820750236511, "learning_rate": 5.82960729239645e-06, "loss": 0.0504, "step": 40548 }, { "epoch": 0.7181040169657364, "grad_norm": 0.9686889052391052, "learning_rate": 5.828926455389338e-06, "loss": 0.0544, "step": 40549 }, { "epoch": 0.718121726502765, "grad_norm": 0.24857215583324432, "learning_rate": 5.8282456485541255e-06, "loss": 0.0438, "step": 40550 }, { "epoch": 0.7181394360397934, "grad_norm": 1.0560846328735352, "learning_rate": 5.827564871893058e-06, "loss": 0.0577, "step": 40551 }, { "epoch": 0.7181571455768218, "grad_norm": 0.828242301940918, "learning_rate": 5.826884125408374e-06, "loss": 0.0551, "step": 40552 }, { "epoch": 0.7181748551138502, "grad_norm": 0.26686665415763855, "learning_rate": 5.826203409102315e-06, "loss": 0.0625, "step": 40553 }, { "epoch": 0.7181925646508787, "grad_norm": 0.5106685757637024, "learning_rate": 5.8255227229771165e-06, "loss": 0.0563, "step": 40554 }, { "epoch": 0.7182102741879071, "grad_norm": 0.4500657320022583, "learning_rate": 5.8248420670350205e-06, "loss": 0.0634, "step": 40555 }, { "epoch": 0.7182279837249355, "grad_norm": 0.5252599716186523, "learning_rate": 5.824161441278273e-06, "loss": 0.0821, "step": 40556 }, { "epoch": 0.7182456932619639, "grad_norm": 0.2415105253458023, "learning_rate": 5.8234808457091e-06, "loss": 0.0353, "step": 40557 }, { "epoch": 0.7182634027989924, "grad_norm": 0.6029468178749084, "learning_rate": 5.822800280329746e-06, "loss": 0.0738, "step": 40558 }, { "epoch": 0.7182811123360208, "grad_norm": 0.4005916118621826, "learning_rate": 5.822119745142457e-06, "loss": 0.0644, "step": 40559 }, { "epoch": 0.7182988218730492, "grad_norm": 0.4678327143192291, "learning_rate": 5.8214392401494606e-06, "loss": 0.0388, "step": 40560 }, { "epoch": 0.7183165314100776, "grad_norm": 0.3727707266807556, "learning_rate": 5.820758765353002e-06, "loss": 0.0709, "step": 40561 }, { "epoch": 0.7183342409471061, "grad_norm": 1.1132303476333618, "learning_rate": 5.820078320755312e-06, "loss": 0.0738, "step": 40562 }, { "epoch": 0.7183519504841345, "grad_norm": 0.5310946702957153, "learning_rate": 5.819397906358646e-06, "loss": 0.0476, "step": 40563 }, { "epoch": 0.7183696600211629, "grad_norm": 0.5527336597442627, "learning_rate": 5.818717522165227e-06, "loss": 0.0626, "step": 40564 }, { "epoch": 0.7183873695581914, "grad_norm": 0.5456082224845886, "learning_rate": 5.818037168177299e-06, "loss": 0.0562, "step": 40565 }, { "epoch": 0.7184050790952198, "grad_norm": 0.603692889213562, "learning_rate": 5.817356844397105e-06, "loss": 0.0435, "step": 40566 }, { "epoch": 0.7184227886322482, "grad_norm": 0.676771342754364, "learning_rate": 5.816676550826874e-06, "loss": 0.0848, "step": 40567 }, { "epoch": 0.7184404981692766, "grad_norm": 0.21805614233016968, "learning_rate": 5.8159962874688485e-06, "loss": 0.0432, "step": 40568 }, { "epoch": 0.7184582077063051, "grad_norm": 0.5923321843147278, "learning_rate": 5.815316054325266e-06, "loss": 0.0724, "step": 40569 }, { "epoch": 0.7184759172433335, "grad_norm": 0.543694257736206, "learning_rate": 5.814635851398363e-06, "loss": 0.0444, "step": 40570 }, { "epoch": 0.7184936267803619, "grad_norm": 0.8188243508338928, "learning_rate": 5.81395567869038e-06, "loss": 0.0522, "step": 40571 }, { "epoch": 0.7185113363173903, "grad_norm": 0.7604522109031677, "learning_rate": 5.813275536203555e-06, "loss": 0.1089, "step": 40572 }, { "epoch": 0.7185290458544188, "grad_norm": 0.6175801157951355, "learning_rate": 5.812595423940123e-06, "loss": 0.0647, "step": 40573 }, { "epoch": 0.7185467553914472, "grad_norm": 0.30046346783638, "learning_rate": 5.811915341902327e-06, "loss": 0.0618, "step": 40574 }, { "epoch": 0.7185644649284756, "grad_norm": 0.48294568061828613, "learning_rate": 5.811235290092398e-06, "loss": 0.0551, "step": 40575 }, { "epoch": 0.718582174465504, "grad_norm": 0.3742077052593231, "learning_rate": 5.8105552685125725e-06, "loss": 0.0796, "step": 40576 }, { "epoch": 0.7185998840025325, "grad_norm": 0.5396482348442078, "learning_rate": 5.809875277165098e-06, "loss": 0.0515, "step": 40577 }, { "epoch": 0.7186175935395609, "grad_norm": 0.4780157208442688, "learning_rate": 5.809195316052198e-06, "loss": 0.058, "step": 40578 }, { "epoch": 0.7186353030765893, "grad_norm": 0.37692657113075256, "learning_rate": 5.8085153851761165e-06, "loss": 0.0586, "step": 40579 }, { "epoch": 0.7186530126136178, "grad_norm": 0.446216881275177, "learning_rate": 5.807835484539089e-06, "loss": 0.0495, "step": 40580 }, { "epoch": 0.7186707221506462, "grad_norm": 0.705859363079071, "learning_rate": 5.807155614143353e-06, "loss": 0.0469, "step": 40581 }, { "epoch": 0.7186884316876746, "grad_norm": 0.7499158382415771, "learning_rate": 5.806475773991146e-06, "loss": 0.0529, "step": 40582 }, { "epoch": 0.718706141224703, "grad_norm": 0.8224866986274719, "learning_rate": 5.805795964084704e-06, "loss": 0.0486, "step": 40583 }, { "epoch": 0.7187238507617315, "grad_norm": 0.49719950556755066, "learning_rate": 5.805116184426267e-06, "loss": 0.0585, "step": 40584 }, { "epoch": 0.7187415602987599, "grad_norm": 0.33320093154907227, "learning_rate": 5.804436435018064e-06, "loss": 0.0486, "step": 40585 }, { "epoch": 0.7187592698357883, "grad_norm": 0.5201186537742615, "learning_rate": 5.803756715862334e-06, "loss": 0.0466, "step": 40586 }, { "epoch": 0.7187769793728167, "grad_norm": 0.6577085256576538, "learning_rate": 5.8030770269613145e-06, "loss": 0.0686, "step": 40587 }, { "epoch": 0.7187946889098452, "grad_norm": 0.7751087546348572, "learning_rate": 5.802397368317248e-06, "loss": 0.0818, "step": 40588 }, { "epoch": 0.7188123984468736, "grad_norm": 0.6768486499786377, "learning_rate": 5.801717739932357e-06, "loss": 0.0612, "step": 40589 }, { "epoch": 0.718830107983902, "grad_norm": 0.9467177391052246, "learning_rate": 5.801038141808885e-06, "loss": 0.0803, "step": 40590 }, { "epoch": 0.7188478175209304, "grad_norm": 0.34956955909729004, "learning_rate": 5.800358573949067e-06, "loss": 0.0547, "step": 40591 }, { "epoch": 0.7188655270579589, "grad_norm": 0.7399556636810303, "learning_rate": 5.799679036355139e-06, "loss": 0.0784, "step": 40592 }, { "epoch": 0.7188832365949873, "grad_norm": 0.8239666819572449, "learning_rate": 5.7989995290293355e-06, "loss": 0.0476, "step": 40593 }, { "epoch": 0.7189009461320157, "grad_norm": 0.3735170364379883, "learning_rate": 5.798320051973894e-06, "loss": 0.0691, "step": 40594 }, { "epoch": 0.7189186556690442, "grad_norm": 0.5671502351760864, "learning_rate": 5.797640605191053e-06, "loss": 0.0793, "step": 40595 }, { "epoch": 0.7189363652060726, "grad_norm": 0.4890157878398895, "learning_rate": 5.796961188683039e-06, "loss": 0.0529, "step": 40596 }, { "epoch": 0.718954074743101, "grad_norm": 0.6595969200134277, "learning_rate": 5.796281802452093e-06, "loss": 0.0636, "step": 40597 }, { "epoch": 0.7189717842801294, "grad_norm": 0.5206562280654907, "learning_rate": 5.795602446500452e-06, "loss": 0.0654, "step": 40598 }, { "epoch": 0.7189894938171579, "grad_norm": 0.8712447881698608, "learning_rate": 5.794923120830339e-06, "loss": 0.0728, "step": 40599 }, { "epoch": 0.7190072033541863, "grad_norm": 1.0679702758789062, "learning_rate": 5.794243825444003e-06, "loss": 0.0732, "step": 40600 }, { "epoch": 0.7190249128912147, "grad_norm": 0.8579913973808289, "learning_rate": 5.793564560343673e-06, "loss": 0.0571, "step": 40601 }, { "epoch": 0.7190426224282431, "grad_norm": 0.8748428225517273, "learning_rate": 5.79288532553159e-06, "loss": 0.0912, "step": 40602 }, { "epoch": 0.7190603319652716, "grad_norm": 0.7186224460601807, "learning_rate": 5.792206121009979e-06, "loss": 0.0494, "step": 40603 }, { "epoch": 0.7190780415023, "grad_norm": 0.5011402368545532, "learning_rate": 5.791526946781077e-06, "loss": 0.051, "step": 40604 }, { "epoch": 0.7190957510393284, "grad_norm": 0.4319900572299957, "learning_rate": 5.790847802847119e-06, "loss": 0.0545, "step": 40605 }, { "epoch": 0.7191134605763568, "grad_norm": 0.4339221119880676, "learning_rate": 5.790168689210349e-06, "loss": 0.0642, "step": 40606 }, { "epoch": 0.7191311701133853, "grad_norm": 0.5977305769920349, "learning_rate": 5.789489605872985e-06, "loss": 0.0527, "step": 40607 }, { "epoch": 0.7191488796504137, "grad_norm": 0.41397184133529663, "learning_rate": 5.788810552837268e-06, "loss": 0.0478, "step": 40608 }, { "epoch": 0.7191665891874421, "grad_norm": 0.6628862023353577, "learning_rate": 5.788131530105435e-06, "loss": 0.0454, "step": 40609 }, { "epoch": 0.7191842987244706, "grad_norm": 0.6184388995170593, "learning_rate": 5.787452537679715e-06, "loss": 0.0443, "step": 40610 }, { "epoch": 0.719202008261499, "grad_norm": 0.942487359046936, "learning_rate": 5.786773575562345e-06, "loss": 0.0656, "step": 40611 }, { "epoch": 0.7192197177985274, "grad_norm": 0.7113763093948364, "learning_rate": 5.786094643755558e-06, "loss": 0.0791, "step": 40612 }, { "epoch": 0.7192374273355558, "grad_norm": 0.5495375990867615, "learning_rate": 5.7854157422615945e-06, "loss": 0.0705, "step": 40613 }, { "epoch": 0.7192551368725844, "grad_norm": 0.3700392246246338, "learning_rate": 5.784736871082674e-06, "loss": 0.0468, "step": 40614 }, { "epoch": 0.7192728464096128, "grad_norm": 0.6747941970825195, "learning_rate": 5.784058030221038e-06, "loss": 0.0681, "step": 40615 }, { "epoch": 0.7192905559466412, "grad_norm": 0.8867555260658264, "learning_rate": 5.7833792196789245e-06, "loss": 0.0843, "step": 40616 }, { "epoch": 0.7193082654836696, "grad_norm": 0.6360949277877808, "learning_rate": 5.782700439458555e-06, "loss": 0.072, "step": 40617 }, { "epoch": 0.7193259750206981, "grad_norm": 0.7693471312522888, "learning_rate": 5.782021689562169e-06, "loss": 0.0925, "step": 40618 }, { "epoch": 0.7193436845577265, "grad_norm": 0.6132521629333496, "learning_rate": 5.781342969992e-06, "loss": 0.0698, "step": 40619 }, { "epoch": 0.7193613940947549, "grad_norm": 0.6568977236747742, "learning_rate": 5.780664280750279e-06, "loss": 0.0543, "step": 40620 }, { "epoch": 0.7193791036317833, "grad_norm": 0.5994240045547485, "learning_rate": 5.779985621839243e-06, "loss": 0.0561, "step": 40621 }, { "epoch": 0.7193968131688118, "grad_norm": 0.6706200242042542, "learning_rate": 5.779306993261118e-06, "loss": 0.0715, "step": 40622 }, { "epoch": 0.7194145227058402, "grad_norm": 0.7504522800445557, "learning_rate": 5.7786283950181475e-06, "loss": 0.0659, "step": 40623 }, { "epoch": 0.7194322322428686, "grad_norm": 0.5521562695503235, "learning_rate": 5.777949827112552e-06, "loss": 0.062, "step": 40624 }, { "epoch": 0.7194499417798971, "grad_norm": 0.43214067816734314, "learning_rate": 5.7772712895465676e-06, "loss": 0.0794, "step": 40625 }, { "epoch": 0.7194676513169255, "grad_norm": 0.6402275562286377, "learning_rate": 5.776592782322428e-06, "loss": 0.045, "step": 40626 }, { "epoch": 0.7194853608539539, "grad_norm": 0.38947615027427673, "learning_rate": 5.775914305442372e-06, "loss": 0.0501, "step": 40627 }, { "epoch": 0.7195030703909823, "grad_norm": 0.6010637283325195, "learning_rate": 5.775235858908614e-06, "loss": 0.0467, "step": 40628 }, { "epoch": 0.7195207799280108, "grad_norm": 0.7736939191818237, "learning_rate": 5.774557442723402e-06, "loss": 0.0613, "step": 40629 }, { "epoch": 0.7195384894650392, "grad_norm": 0.500593900680542, "learning_rate": 5.773879056888966e-06, "loss": 0.0374, "step": 40630 }, { "epoch": 0.7195561990020676, "grad_norm": 0.47219809889793396, "learning_rate": 5.773200701407532e-06, "loss": 0.0444, "step": 40631 }, { "epoch": 0.719573908539096, "grad_norm": 0.47793954610824585, "learning_rate": 5.772522376281334e-06, "loss": 0.0445, "step": 40632 }, { "epoch": 0.7195916180761245, "grad_norm": 0.366401344537735, "learning_rate": 5.7718440815126045e-06, "loss": 0.0674, "step": 40633 }, { "epoch": 0.7196093276131529, "grad_norm": 0.839432954788208, "learning_rate": 5.7711658171035785e-06, "loss": 0.0558, "step": 40634 }, { "epoch": 0.7196270371501813, "grad_norm": 0.7964665293693542, "learning_rate": 5.770487583056479e-06, "loss": 0.0661, "step": 40635 }, { "epoch": 0.7196447466872097, "grad_norm": 0.606879472732544, "learning_rate": 5.769809379373542e-06, "loss": 0.0388, "step": 40636 }, { "epoch": 0.7196624562242382, "grad_norm": 0.649232804775238, "learning_rate": 5.769131206056998e-06, "loss": 0.0724, "step": 40637 }, { "epoch": 0.7196801657612666, "grad_norm": 0.4383618235588074, "learning_rate": 5.768453063109079e-06, "loss": 0.0572, "step": 40638 }, { "epoch": 0.719697875298295, "grad_norm": 0.6292839050292969, "learning_rate": 5.767774950532016e-06, "loss": 0.0475, "step": 40639 }, { "epoch": 0.7197155848353235, "grad_norm": 1.109307885169983, "learning_rate": 5.76709686832804e-06, "loss": 0.097, "step": 40640 }, { "epoch": 0.7197332943723519, "grad_norm": 0.908616840839386, "learning_rate": 5.766418816499386e-06, "loss": 0.0785, "step": 40641 }, { "epoch": 0.7197510039093803, "grad_norm": 1.1828007698059082, "learning_rate": 5.765740795048275e-06, "loss": 0.0487, "step": 40642 }, { "epoch": 0.7197687134464087, "grad_norm": 0.6928319931030273, "learning_rate": 5.765062803976942e-06, "loss": 0.0504, "step": 40643 }, { "epoch": 0.7197864229834372, "grad_norm": 0.6755731105804443, "learning_rate": 5.764384843287619e-06, "loss": 0.0589, "step": 40644 }, { "epoch": 0.7198041325204656, "grad_norm": 0.2893458902835846, "learning_rate": 5.763706912982542e-06, "loss": 0.049, "step": 40645 }, { "epoch": 0.719821842057494, "grad_norm": 0.6312474608421326, "learning_rate": 5.76302901306393e-06, "loss": 0.0621, "step": 40646 }, { "epoch": 0.7198395515945224, "grad_norm": 0.6467821598052979, "learning_rate": 5.762351143534017e-06, "loss": 0.079, "step": 40647 }, { "epoch": 0.7198572611315509, "grad_norm": 0.5684928894042969, "learning_rate": 5.761673304395033e-06, "loss": 0.062, "step": 40648 }, { "epoch": 0.7198749706685793, "grad_norm": 0.4200969636440277, "learning_rate": 5.760995495649211e-06, "loss": 0.0665, "step": 40649 }, { "epoch": 0.7198926802056077, "grad_norm": 0.8503396511077881, "learning_rate": 5.76031771729878e-06, "loss": 0.056, "step": 40650 }, { "epoch": 0.7199103897426361, "grad_norm": 0.5460293292999268, "learning_rate": 5.759639969345969e-06, "loss": 0.0692, "step": 40651 }, { "epoch": 0.7199280992796646, "grad_norm": 0.511081337928772, "learning_rate": 5.7589622517930115e-06, "loss": 0.0467, "step": 40652 }, { "epoch": 0.719945808816693, "grad_norm": 0.41697636246681213, "learning_rate": 5.7582845646421305e-06, "loss": 0.0608, "step": 40653 }, { "epoch": 0.7199635183537214, "grad_norm": 0.3971898853778839, "learning_rate": 5.757606907895558e-06, "loss": 0.0713, "step": 40654 }, { "epoch": 0.7199812278907499, "grad_norm": 0.529990553855896, "learning_rate": 5.756929281555529e-06, "loss": 0.0508, "step": 40655 }, { "epoch": 0.7199989374277783, "grad_norm": 0.6552103757858276, "learning_rate": 5.756251685624261e-06, "loss": 0.0431, "step": 40656 }, { "epoch": 0.7200166469648067, "grad_norm": 0.662120521068573, "learning_rate": 5.7555741201039866e-06, "loss": 0.088, "step": 40657 }, { "epoch": 0.7200343565018351, "grad_norm": 0.5844993591308594, "learning_rate": 5.7548965849969434e-06, "loss": 0.0438, "step": 40658 }, { "epoch": 0.7200520660388636, "grad_norm": 0.5120850801467896, "learning_rate": 5.754219080305359e-06, "loss": 0.072, "step": 40659 }, { "epoch": 0.720069775575892, "grad_norm": 0.5396714210510254, "learning_rate": 5.753541606031456e-06, "loss": 0.0421, "step": 40660 }, { "epoch": 0.7200874851129204, "grad_norm": 0.44517651200294495, "learning_rate": 5.752864162177464e-06, "loss": 0.0554, "step": 40661 }, { "epoch": 0.7201051946499488, "grad_norm": 0.647183358669281, "learning_rate": 5.752186748745621e-06, "loss": 0.0578, "step": 40662 }, { "epoch": 0.7201229041869773, "grad_norm": 0.4543896019458771, "learning_rate": 5.75150936573814e-06, "loss": 0.054, "step": 40663 }, { "epoch": 0.7201406137240057, "grad_norm": 0.46003487706184387, "learning_rate": 5.7508320131572585e-06, "loss": 0.0653, "step": 40664 }, { "epoch": 0.7201583232610341, "grad_norm": 0.4297451972961426, "learning_rate": 5.750154691005205e-06, "loss": 0.0742, "step": 40665 }, { "epoch": 0.7201760327980625, "grad_norm": 0.6114604473114014, "learning_rate": 5.749477399284208e-06, "loss": 0.0812, "step": 40666 }, { "epoch": 0.720193742335091, "grad_norm": 0.5347264409065247, "learning_rate": 5.748800137996493e-06, "loss": 0.0728, "step": 40667 }, { "epoch": 0.7202114518721194, "grad_norm": 0.5765421986579895, "learning_rate": 5.748122907144291e-06, "loss": 0.0534, "step": 40668 }, { "epoch": 0.7202291614091478, "grad_norm": 0.4167763590812683, "learning_rate": 5.747445706729832e-06, "loss": 0.0393, "step": 40669 }, { "epoch": 0.7202468709461763, "grad_norm": 0.5275329947471619, "learning_rate": 5.746768536755337e-06, "loss": 0.0574, "step": 40670 }, { "epoch": 0.7202645804832047, "grad_norm": 0.5274137258529663, "learning_rate": 5.746091397223038e-06, "loss": 0.0745, "step": 40671 }, { "epoch": 0.7202822900202331, "grad_norm": 0.45231279730796814, "learning_rate": 5.745414288135163e-06, "loss": 0.0648, "step": 40672 }, { "epoch": 0.7202999995572615, "grad_norm": 0.6161532402038574, "learning_rate": 5.744737209493943e-06, "loss": 0.0727, "step": 40673 }, { "epoch": 0.72031770909429, "grad_norm": 0.6106242537498474, "learning_rate": 5.744060161301596e-06, "loss": 0.0532, "step": 40674 }, { "epoch": 0.7203354186313184, "grad_norm": 0.8579515814781189, "learning_rate": 5.743383143560356e-06, "loss": 0.0523, "step": 40675 }, { "epoch": 0.7203531281683468, "grad_norm": 0.7767553925514221, "learning_rate": 5.742706156272447e-06, "loss": 0.0562, "step": 40676 }, { "epoch": 0.7203708377053752, "grad_norm": 1.0172851085662842, "learning_rate": 5.7420291994401e-06, "loss": 0.0613, "step": 40677 }, { "epoch": 0.7203885472424038, "grad_norm": 0.540641725063324, "learning_rate": 5.741352273065541e-06, "loss": 0.0564, "step": 40678 }, { "epoch": 0.7204062567794322, "grad_norm": 0.49442607164382935, "learning_rate": 5.740675377150995e-06, "loss": 0.062, "step": 40679 }, { "epoch": 0.7204239663164606, "grad_norm": 0.39826449751853943, "learning_rate": 5.739998511698698e-06, "loss": 0.0575, "step": 40680 }, { "epoch": 0.720441675853489, "grad_norm": 0.8943427801132202, "learning_rate": 5.739321676710863e-06, "loss": 0.0604, "step": 40681 }, { "epoch": 0.7204593853905175, "grad_norm": 0.7914864420890808, "learning_rate": 5.738644872189722e-06, "loss": 0.0878, "step": 40682 }, { "epoch": 0.7204770949275459, "grad_norm": 0.505664050579071, "learning_rate": 5.737968098137504e-06, "loss": 0.0605, "step": 40683 }, { "epoch": 0.7204948044645743, "grad_norm": 0.6141642332077026, "learning_rate": 5.737291354556439e-06, "loss": 0.0787, "step": 40684 }, { "epoch": 0.7205125140016028, "grad_norm": 0.6584059000015259, "learning_rate": 5.736614641448744e-06, "loss": 0.0626, "step": 40685 }, { "epoch": 0.7205302235386312, "grad_norm": 0.49203556776046753, "learning_rate": 5.7359379588166425e-06, "loss": 0.0684, "step": 40686 }, { "epoch": 0.7205479330756596, "grad_norm": 0.558604896068573, "learning_rate": 5.735261306662381e-06, "loss": 0.0609, "step": 40687 }, { "epoch": 0.720565642612688, "grad_norm": 0.7812166810035706, "learning_rate": 5.734584684988165e-06, "loss": 0.0712, "step": 40688 }, { "epoch": 0.7205833521497165, "grad_norm": 0.713707685470581, "learning_rate": 5.73390809379623e-06, "loss": 0.0664, "step": 40689 }, { "epoch": 0.7206010616867449, "grad_norm": 0.4540987014770508, "learning_rate": 5.7332315330888e-06, "loss": 0.0407, "step": 40690 }, { "epoch": 0.7206187712237733, "grad_norm": 0.6503224968910217, "learning_rate": 5.732555002868107e-06, "loss": 0.0529, "step": 40691 }, { "epoch": 0.7206364807608017, "grad_norm": 0.7445070743560791, "learning_rate": 5.731878503136364e-06, "loss": 0.0483, "step": 40692 }, { "epoch": 0.7206541902978302, "grad_norm": 0.7062881588935852, "learning_rate": 5.731202033895803e-06, "loss": 0.0491, "step": 40693 }, { "epoch": 0.7206718998348586, "grad_norm": 0.9133264422416687, "learning_rate": 5.730525595148651e-06, "loss": 0.0782, "step": 40694 }, { "epoch": 0.720689609371887, "grad_norm": 0.5044435858726501, "learning_rate": 5.7298491868971325e-06, "loss": 0.0459, "step": 40695 }, { "epoch": 0.7207073189089154, "grad_norm": 0.32837122678756714, "learning_rate": 5.729172809143472e-06, "loss": 0.0632, "step": 40696 }, { "epoch": 0.7207250284459439, "grad_norm": 0.6138056516647339, "learning_rate": 5.728496461889895e-06, "loss": 0.0414, "step": 40697 }, { "epoch": 0.7207427379829723, "grad_norm": 0.47726917266845703, "learning_rate": 5.727820145138633e-06, "loss": 0.0658, "step": 40698 }, { "epoch": 0.7207604475200007, "grad_norm": 0.9074451923370361, "learning_rate": 5.727143858891898e-06, "loss": 0.081, "step": 40699 }, { "epoch": 0.7207781570570292, "grad_norm": 0.5131688714027405, "learning_rate": 5.7264676031519235e-06, "loss": 0.0563, "step": 40700 }, { "epoch": 0.7207958665940576, "grad_norm": 0.4607839286327362, "learning_rate": 5.725791377920937e-06, "loss": 0.0375, "step": 40701 }, { "epoch": 0.720813576131086, "grad_norm": 0.38188502192497253, "learning_rate": 5.725115183201153e-06, "loss": 0.0418, "step": 40702 }, { "epoch": 0.7208312856681144, "grad_norm": 1.1166062355041504, "learning_rate": 5.724439018994803e-06, "loss": 0.0709, "step": 40703 }, { "epoch": 0.7208489952051429, "grad_norm": 0.4536333382129669, "learning_rate": 5.723762885304111e-06, "loss": 0.0393, "step": 40704 }, { "epoch": 0.7208667047421713, "grad_norm": 0.5529620051383972, "learning_rate": 5.723086782131299e-06, "loss": 0.0524, "step": 40705 }, { "epoch": 0.7208844142791997, "grad_norm": 0.7885438203811646, "learning_rate": 5.722410709478594e-06, "loss": 0.0601, "step": 40706 }, { "epoch": 0.7209021238162281, "grad_norm": 0.7227094769477844, "learning_rate": 5.7217346673482215e-06, "loss": 0.0616, "step": 40707 }, { "epoch": 0.7209198333532566, "grad_norm": 0.6953585743904114, "learning_rate": 5.721058655742401e-06, "loss": 0.0359, "step": 40708 }, { "epoch": 0.720937542890285, "grad_norm": 0.7650006413459778, "learning_rate": 5.720382674663366e-06, "loss": 0.0759, "step": 40709 }, { "epoch": 0.7209552524273134, "grad_norm": 0.5817082524299622, "learning_rate": 5.719706724113325e-06, "loss": 0.0734, "step": 40710 }, { "epoch": 0.7209729619643418, "grad_norm": 0.6822416186332703, "learning_rate": 5.719030804094513e-06, "loss": 0.0786, "step": 40711 }, { "epoch": 0.7209906715013703, "grad_norm": 0.6141597628593445, "learning_rate": 5.7183549146091555e-06, "loss": 0.0375, "step": 40712 }, { "epoch": 0.7210083810383987, "grad_norm": 0.7639310956001282, "learning_rate": 5.717679055659466e-06, "loss": 0.0718, "step": 40713 }, { "epoch": 0.7210260905754271, "grad_norm": 0.3727357089519501, "learning_rate": 5.717003227247674e-06, "loss": 0.0454, "step": 40714 }, { "epoch": 0.7210438001124556, "grad_norm": 0.5733621716499329, "learning_rate": 5.716327429375998e-06, "loss": 0.0569, "step": 40715 }, { "epoch": 0.721061509649484, "grad_norm": 0.5941693186759949, "learning_rate": 5.7156516620466744e-06, "loss": 0.0523, "step": 40716 }, { "epoch": 0.7210792191865124, "grad_norm": 0.5673519372940063, "learning_rate": 5.714975925261914e-06, "loss": 0.0788, "step": 40717 }, { "epoch": 0.7210969287235408, "grad_norm": 0.49550387263298035, "learning_rate": 5.714300219023943e-06, "loss": 0.0514, "step": 40718 }, { "epoch": 0.7211146382605693, "grad_norm": 0.5283313393592834, "learning_rate": 5.71362454333499e-06, "loss": 0.0583, "step": 40719 }, { "epoch": 0.7211323477975977, "grad_norm": 1.235722541809082, "learning_rate": 5.712948898197268e-06, "loss": 0.0564, "step": 40720 }, { "epoch": 0.7211500573346261, "grad_norm": 0.23820407688617706, "learning_rate": 5.712273283613005e-06, "loss": 0.0485, "step": 40721 }, { "epoch": 0.7211677668716545, "grad_norm": 0.36578550934791565, "learning_rate": 5.711597699584423e-06, "loss": 0.0388, "step": 40722 }, { "epoch": 0.721185476408683, "grad_norm": 0.3549329340457916, "learning_rate": 5.710922146113746e-06, "loss": 0.0552, "step": 40723 }, { "epoch": 0.7212031859457114, "grad_norm": 0.4590255320072174, "learning_rate": 5.710246623203195e-06, "loss": 0.0321, "step": 40724 }, { "epoch": 0.7212208954827398, "grad_norm": 0.5562944412231445, "learning_rate": 5.709571130854994e-06, "loss": 0.0596, "step": 40725 }, { "epoch": 0.7212386050197682, "grad_norm": 0.7777858376502991, "learning_rate": 5.708895669071369e-06, "loss": 0.0559, "step": 40726 }, { "epoch": 0.7212563145567967, "grad_norm": 0.5182300209999084, "learning_rate": 5.708220237854531e-06, "loss": 0.0572, "step": 40727 }, { "epoch": 0.7212740240938251, "grad_norm": 0.6462921500205994, "learning_rate": 5.70754483720671e-06, "loss": 0.086, "step": 40728 }, { "epoch": 0.7212917336308535, "grad_norm": 0.6892223954200745, "learning_rate": 5.706869467130126e-06, "loss": 0.0704, "step": 40729 }, { "epoch": 0.721309443167882, "grad_norm": 0.37089505791664124, "learning_rate": 5.706194127627008e-06, "loss": 0.0359, "step": 40730 }, { "epoch": 0.7213271527049104, "grad_norm": 0.6110320091247559, "learning_rate": 5.705518818699565e-06, "loss": 0.054, "step": 40731 }, { "epoch": 0.7213448622419388, "grad_norm": 0.5024710893630981, "learning_rate": 5.704843540350025e-06, "loss": 0.062, "step": 40732 }, { "epoch": 0.7213625717789672, "grad_norm": 0.5729672908782959, "learning_rate": 5.704168292580609e-06, "loss": 0.0514, "step": 40733 }, { "epoch": 0.7213802813159957, "grad_norm": 0.9926599264144897, "learning_rate": 5.70349307539354e-06, "loss": 0.0972, "step": 40734 }, { "epoch": 0.7213979908530241, "grad_norm": 0.4666404724121094, "learning_rate": 5.7028178887910385e-06, "loss": 0.0423, "step": 40735 }, { "epoch": 0.7214157003900525, "grad_norm": 0.49948254227638245, "learning_rate": 5.702142732775325e-06, "loss": 0.0701, "step": 40736 }, { "epoch": 0.7214334099270809, "grad_norm": 0.6744831204414368, "learning_rate": 5.701467607348629e-06, "loss": 0.0605, "step": 40737 }, { "epoch": 0.7214511194641094, "grad_norm": 0.8131777048110962, "learning_rate": 5.700792512513157e-06, "loss": 0.0439, "step": 40738 }, { "epoch": 0.7214688290011378, "grad_norm": 0.6335884928703308, "learning_rate": 5.700117448271137e-06, "loss": 0.0482, "step": 40739 }, { "epoch": 0.7214865385381662, "grad_norm": 0.6755260229110718, "learning_rate": 5.69944241462479e-06, "loss": 0.0636, "step": 40740 }, { "epoch": 0.7215042480751946, "grad_norm": 0.9179317355155945, "learning_rate": 5.698767411576342e-06, "loss": 0.0924, "step": 40741 }, { "epoch": 0.7215219576122232, "grad_norm": 0.4982892870903015, "learning_rate": 5.698092439128004e-06, "loss": 0.0407, "step": 40742 }, { "epoch": 0.7215396671492516, "grad_norm": 0.623209536075592, "learning_rate": 5.697417497281999e-06, "loss": 0.0855, "step": 40743 }, { "epoch": 0.72155737668628, "grad_norm": 0.5497136116027832, "learning_rate": 5.696742586040553e-06, "loss": 0.0638, "step": 40744 }, { "epoch": 0.7215750862233085, "grad_norm": 0.3482145667076111, "learning_rate": 5.696067705405881e-06, "loss": 0.0642, "step": 40745 }, { "epoch": 0.7215927957603369, "grad_norm": 0.47186216711997986, "learning_rate": 5.695392855380204e-06, "loss": 0.0912, "step": 40746 }, { "epoch": 0.7216105052973653, "grad_norm": 0.6697207689285278, "learning_rate": 5.694718035965745e-06, "loss": 0.0519, "step": 40747 }, { "epoch": 0.7216282148343937, "grad_norm": 0.3643956184387207, "learning_rate": 5.694043247164727e-06, "loss": 0.0606, "step": 40748 }, { "epoch": 0.7216459243714222, "grad_norm": 0.5209497809410095, "learning_rate": 5.693368488979361e-06, "loss": 0.0577, "step": 40749 }, { "epoch": 0.7216636339084506, "grad_norm": 1.002801537513733, "learning_rate": 5.692693761411869e-06, "loss": 0.0818, "step": 40750 }, { "epoch": 0.721681343445479, "grad_norm": 0.6196123361587524, "learning_rate": 5.692019064464481e-06, "loss": 0.0348, "step": 40751 }, { "epoch": 0.7216990529825074, "grad_norm": 0.5350768566131592, "learning_rate": 5.691344398139398e-06, "loss": 0.053, "step": 40752 }, { "epoch": 0.7217167625195359, "grad_norm": 0.5673945546150208, "learning_rate": 5.690669762438856e-06, "loss": 0.0806, "step": 40753 }, { "epoch": 0.7217344720565643, "grad_norm": 0.6090978384017944, "learning_rate": 5.6899951573650685e-06, "loss": 0.0437, "step": 40754 }, { "epoch": 0.7217521815935927, "grad_norm": 0.7996689081192017, "learning_rate": 5.68932058292026e-06, "loss": 0.0845, "step": 40755 }, { "epoch": 0.7217698911306211, "grad_norm": 0.4735473692417145, "learning_rate": 5.688646039106639e-06, "loss": 0.0474, "step": 40756 }, { "epoch": 0.7217876006676496, "grad_norm": 0.5405691862106323, "learning_rate": 5.6879715259264325e-06, "loss": 0.0476, "step": 40757 }, { "epoch": 0.721805310204678, "grad_norm": 0.835856556892395, "learning_rate": 5.687297043381861e-06, "loss": 0.0806, "step": 40758 }, { "epoch": 0.7218230197417064, "grad_norm": 0.5820828080177307, "learning_rate": 5.686622591475138e-06, "loss": 0.0687, "step": 40759 }, { "epoch": 0.7218407292787349, "grad_norm": 0.44640469551086426, "learning_rate": 5.685948170208481e-06, "loss": 0.053, "step": 40760 }, { "epoch": 0.7218584388157633, "grad_norm": 0.5922240018844604, "learning_rate": 5.685273779584115e-06, "loss": 0.0439, "step": 40761 }, { "epoch": 0.7218761483527917, "grad_norm": 0.4648434519767761, "learning_rate": 5.684599419604256e-06, "loss": 0.048, "step": 40762 }, { "epoch": 0.7218938578898201, "grad_norm": 0.5643607974052429, "learning_rate": 5.683925090271122e-06, "loss": 0.0434, "step": 40763 }, { "epoch": 0.7219115674268486, "grad_norm": 0.5388032793998718, "learning_rate": 5.683250791586933e-06, "loss": 0.0697, "step": 40764 }, { "epoch": 0.721929276963877, "grad_norm": 0.41034629940986633, "learning_rate": 5.682576523553912e-06, "loss": 0.0524, "step": 40765 }, { "epoch": 0.7219469865009054, "grad_norm": 0.5444292426109314, "learning_rate": 5.681902286174267e-06, "loss": 0.0692, "step": 40766 }, { "epoch": 0.7219646960379338, "grad_norm": 0.3585202693939209, "learning_rate": 5.681228079450219e-06, "loss": 0.0457, "step": 40767 }, { "epoch": 0.7219824055749623, "grad_norm": 0.40836048126220703, "learning_rate": 5.6805539033839885e-06, "loss": 0.0691, "step": 40768 }, { "epoch": 0.7220001151119907, "grad_norm": 0.5372313857078552, "learning_rate": 5.6798797579777985e-06, "loss": 0.0612, "step": 40769 }, { "epoch": 0.7220178246490191, "grad_norm": 0.5446948409080505, "learning_rate": 5.679205643233857e-06, "loss": 0.0715, "step": 40770 }, { "epoch": 0.7220355341860475, "grad_norm": 0.7848027348518372, "learning_rate": 5.678531559154386e-06, "loss": 0.0651, "step": 40771 }, { "epoch": 0.722053243723076, "grad_norm": 0.38990283012390137, "learning_rate": 5.677857505741603e-06, "loss": 0.0435, "step": 40772 }, { "epoch": 0.7220709532601044, "grad_norm": 0.6299135088920593, "learning_rate": 5.6771834829977265e-06, "loss": 0.0529, "step": 40773 }, { "epoch": 0.7220886627971328, "grad_norm": 0.6003060936927795, "learning_rate": 5.676509490924972e-06, "loss": 0.0483, "step": 40774 }, { "epoch": 0.7221063723341613, "grad_norm": 0.6782286167144775, "learning_rate": 5.67583552952556e-06, "loss": 0.0464, "step": 40775 }, { "epoch": 0.7221240818711897, "grad_norm": 1.4707597494125366, "learning_rate": 5.67516159880171e-06, "loss": 0.0885, "step": 40776 }, { "epoch": 0.7221417914082181, "grad_norm": 1.0968302488327026, "learning_rate": 5.67448769875563e-06, "loss": 0.0484, "step": 40777 }, { "epoch": 0.7221595009452465, "grad_norm": 0.5784885287284851, "learning_rate": 5.6738138293895435e-06, "loss": 0.0768, "step": 40778 }, { "epoch": 0.722177210482275, "grad_norm": 0.5515870451927185, "learning_rate": 5.673139990705664e-06, "loss": 0.0456, "step": 40779 }, { "epoch": 0.7221949200193034, "grad_norm": 0.41228458285331726, "learning_rate": 5.672466182706218e-06, "loss": 0.0377, "step": 40780 }, { "epoch": 0.7222126295563318, "grad_norm": 0.1744283139705658, "learning_rate": 5.6717924053934035e-06, "loss": 0.068, "step": 40781 }, { "epoch": 0.7222303390933602, "grad_norm": 0.7353953123092651, "learning_rate": 5.6711186587694535e-06, "loss": 0.0632, "step": 40782 }, { "epoch": 0.7222480486303887, "grad_norm": 0.4862907826900482, "learning_rate": 5.670444942836585e-06, "loss": 0.0667, "step": 40783 }, { "epoch": 0.7222657581674171, "grad_norm": 0.7806251645088196, "learning_rate": 5.669771257597005e-06, "loss": 0.0792, "step": 40784 }, { "epoch": 0.7222834677044455, "grad_norm": 0.5510504841804504, "learning_rate": 5.6690976030529315e-06, "loss": 0.0541, "step": 40785 }, { "epoch": 0.7223011772414739, "grad_norm": 0.5569467544555664, "learning_rate": 5.668423979206585e-06, "loss": 0.0817, "step": 40786 }, { "epoch": 0.7223188867785024, "grad_norm": 0.5958767533302307, "learning_rate": 5.667750386060185e-06, "loss": 0.0769, "step": 40787 }, { "epoch": 0.7223365963155308, "grad_norm": 0.6065072417259216, "learning_rate": 5.667076823615934e-06, "loss": 0.0692, "step": 40788 }, { "epoch": 0.7223543058525592, "grad_norm": 0.49962565302848816, "learning_rate": 5.66640329187606e-06, "loss": 0.0444, "step": 40789 }, { "epoch": 0.7223720153895877, "grad_norm": 1.000138759613037, "learning_rate": 5.665729790842773e-06, "loss": 0.0795, "step": 40790 }, { "epoch": 0.7223897249266161, "grad_norm": 0.4615122675895691, "learning_rate": 5.665056320518291e-06, "loss": 0.0404, "step": 40791 }, { "epoch": 0.7224074344636445, "grad_norm": 0.4699721336364746, "learning_rate": 5.664382880904832e-06, "loss": 0.0578, "step": 40792 }, { "epoch": 0.7224251440006729, "grad_norm": 0.6459364295005798, "learning_rate": 5.663709472004606e-06, "loss": 0.06, "step": 40793 }, { "epoch": 0.7224428535377014, "grad_norm": 0.6149137616157532, "learning_rate": 5.663036093819839e-06, "loss": 0.0578, "step": 40794 }, { "epoch": 0.7224605630747298, "grad_norm": 0.653367280960083, "learning_rate": 5.662362746352734e-06, "loss": 0.0698, "step": 40795 }, { "epoch": 0.7224782726117582, "grad_norm": 0.8231835961341858, "learning_rate": 5.6616894296055075e-06, "loss": 0.0894, "step": 40796 }, { "epoch": 0.7224959821487866, "grad_norm": 0.6587207913398743, "learning_rate": 5.661016143580387e-06, "loss": 0.0569, "step": 40797 }, { "epoch": 0.7225136916858151, "grad_norm": 0.37802353501319885, "learning_rate": 5.660342888279573e-06, "loss": 0.0445, "step": 40798 }, { "epoch": 0.7225314012228435, "grad_norm": 0.9408935308456421, "learning_rate": 5.6596696637052845e-06, "loss": 0.0679, "step": 40799 }, { "epoch": 0.7225491107598719, "grad_norm": 0.925604522228241, "learning_rate": 5.6589964698597394e-06, "loss": 0.0794, "step": 40800 }, { "epoch": 0.7225668202969003, "grad_norm": 0.8286862969398499, "learning_rate": 5.65832330674515e-06, "loss": 0.0606, "step": 40801 }, { "epoch": 0.7225845298339288, "grad_norm": 0.5203524231910706, "learning_rate": 5.6576501743637345e-06, "loss": 0.0656, "step": 40802 }, { "epoch": 0.7226022393709572, "grad_norm": 0.39682477712631226, "learning_rate": 5.656977072717704e-06, "loss": 0.0412, "step": 40803 }, { "epoch": 0.7226199489079856, "grad_norm": 0.2629862427711487, "learning_rate": 5.656304001809277e-06, "loss": 0.0605, "step": 40804 }, { "epoch": 0.7226376584450142, "grad_norm": 0.5891286730766296, "learning_rate": 5.655630961640662e-06, "loss": 0.0508, "step": 40805 }, { "epoch": 0.7226553679820426, "grad_norm": 0.6339153051376343, "learning_rate": 5.654957952214076e-06, "loss": 0.0635, "step": 40806 }, { "epoch": 0.722673077519071, "grad_norm": 0.33321473002433777, "learning_rate": 5.654284973531732e-06, "loss": 0.0559, "step": 40807 }, { "epoch": 0.7226907870560993, "grad_norm": 0.7885928750038147, "learning_rate": 5.65361202559585e-06, "loss": 0.0624, "step": 40808 }, { "epoch": 0.7227084965931279, "grad_norm": 0.5620329976081848, "learning_rate": 5.652939108408635e-06, "loss": 0.0562, "step": 40809 }, { "epoch": 0.7227262061301563, "grad_norm": 0.6403453350067139, "learning_rate": 5.652266221972299e-06, "loss": 0.0439, "step": 40810 }, { "epoch": 0.7227439156671847, "grad_norm": 0.6724575161933899, "learning_rate": 5.651593366289067e-06, "loss": 0.0737, "step": 40811 }, { "epoch": 0.722761625204213, "grad_norm": 0.44442933797836304, "learning_rate": 5.650920541361154e-06, "loss": 0.0436, "step": 40812 }, { "epoch": 0.7227793347412416, "grad_norm": 0.9994423389434814, "learning_rate": 5.65024774719076e-06, "loss": 0.0751, "step": 40813 }, { "epoch": 0.72279704427827, "grad_norm": 0.6830694675445557, "learning_rate": 5.649574983780106e-06, "loss": 0.0389, "step": 40814 }, { "epoch": 0.7228147538152984, "grad_norm": 0.8211489915847778, "learning_rate": 5.64890225113141e-06, "loss": 0.0359, "step": 40815 }, { "epoch": 0.7228324633523268, "grad_norm": 0.6623584032058716, "learning_rate": 5.648229549246874e-06, "loss": 0.057, "step": 40816 }, { "epoch": 0.7228501728893553, "grad_norm": 0.7009667158126831, "learning_rate": 5.647556878128716e-06, "loss": 0.0667, "step": 40817 }, { "epoch": 0.7228678824263837, "grad_norm": 0.26785534620285034, "learning_rate": 5.646884237779152e-06, "loss": 0.0301, "step": 40818 }, { "epoch": 0.7228855919634121, "grad_norm": 0.7220523953437805, "learning_rate": 5.64621162820039e-06, "loss": 0.0756, "step": 40819 }, { "epoch": 0.7229033015004406, "grad_norm": 0.39254122972488403, "learning_rate": 5.645539049394647e-06, "loss": 0.0344, "step": 40820 }, { "epoch": 0.722921011037469, "grad_norm": 0.5626207590103149, "learning_rate": 5.6448665013641355e-06, "loss": 0.0424, "step": 40821 }, { "epoch": 0.7229387205744974, "grad_norm": 0.9111392498016357, "learning_rate": 5.644193984111073e-06, "loss": 0.0764, "step": 40822 }, { "epoch": 0.7229564301115258, "grad_norm": 0.3625982105731964, "learning_rate": 5.643521497637658e-06, "loss": 0.0483, "step": 40823 }, { "epoch": 0.7229741396485543, "grad_norm": 0.4496139585971832, "learning_rate": 5.642849041946113e-06, "loss": 0.0476, "step": 40824 }, { "epoch": 0.7229918491855827, "grad_norm": 0.5071977972984314, "learning_rate": 5.642176617038646e-06, "loss": 0.0443, "step": 40825 }, { "epoch": 0.7230095587226111, "grad_norm": 0.6051932573318481, "learning_rate": 5.641504222917479e-06, "loss": 0.0529, "step": 40826 }, { "epoch": 0.7230272682596395, "grad_norm": 0.6464914083480835, "learning_rate": 5.640831859584809e-06, "loss": 0.1073, "step": 40827 }, { "epoch": 0.723044977796668, "grad_norm": 0.5426849126815796, "learning_rate": 5.640159527042857e-06, "loss": 0.0605, "step": 40828 }, { "epoch": 0.7230626873336964, "grad_norm": 0.8744125366210938, "learning_rate": 5.639487225293834e-06, "loss": 0.0764, "step": 40829 }, { "epoch": 0.7230803968707248, "grad_norm": 0.5976575613021851, "learning_rate": 5.638814954339951e-06, "loss": 0.0498, "step": 40830 }, { "epoch": 0.7230981064077532, "grad_norm": 0.4984470009803772, "learning_rate": 5.63814271418342e-06, "loss": 0.0572, "step": 40831 }, { "epoch": 0.7231158159447817, "grad_norm": 0.5163387060165405, "learning_rate": 5.637470504826452e-06, "loss": 0.0577, "step": 40832 }, { "epoch": 0.7231335254818101, "grad_norm": 0.6149623990058899, "learning_rate": 5.636798326271263e-06, "loss": 0.0743, "step": 40833 }, { "epoch": 0.7231512350188385, "grad_norm": 0.6696397662162781, "learning_rate": 5.636126178520058e-06, "loss": 0.0562, "step": 40834 }, { "epoch": 0.723168944555867, "grad_norm": 0.7112786173820496, "learning_rate": 5.63545406157505e-06, "loss": 0.0423, "step": 40835 }, { "epoch": 0.7231866540928954, "grad_norm": 0.527475893497467, "learning_rate": 5.634781975438454e-06, "loss": 0.049, "step": 40836 }, { "epoch": 0.7232043636299238, "grad_norm": 0.7475697994232178, "learning_rate": 5.634109920112476e-06, "loss": 0.0354, "step": 40837 }, { "epoch": 0.7232220731669522, "grad_norm": 0.2806144654750824, "learning_rate": 5.633437895599327e-06, "loss": 0.0471, "step": 40838 }, { "epoch": 0.7232397827039807, "grad_norm": 0.5822338461875916, "learning_rate": 5.6327659019012155e-06, "loss": 0.0441, "step": 40839 }, { "epoch": 0.7232574922410091, "grad_norm": 0.6565773487091064, "learning_rate": 5.632093939020367e-06, "loss": 0.0495, "step": 40840 }, { "epoch": 0.7232752017780375, "grad_norm": 0.7272486686706543, "learning_rate": 5.631422006958977e-06, "loss": 0.0664, "step": 40841 }, { "epoch": 0.7232929113150659, "grad_norm": 0.5100404024124146, "learning_rate": 5.6307501057192615e-06, "loss": 0.0953, "step": 40842 }, { "epoch": 0.7233106208520944, "grad_norm": 0.7217327952384949, "learning_rate": 5.630078235303436e-06, "loss": 0.0677, "step": 40843 }, { "epoch": 0.7233283303891228, "grad_norm": 0.33013761043548584, "learning_rate": 5.6294063957136975e-06, "loss": 0.0429, "step": 40844 }, { "epoch": 0.7233460399261512, "grad_norm": 0.6825469732284546, "learning_rate": 5.6287345869522665e-06, "loss": 0.0468, "step": 40845 }, { "epoch": 0.7233637494631796, "grad_norm": 0.28967103362083435, "learning_rate": 5.628062809021351e-06, "loss": 0.0239, "step": 40846 }, { "epoch": 0.7233814590002081, "grad_norm": 0.47653186321258545, "learning_rate": 5.6273910619231585e-06, "loss": 0.0491, "step": 40847 }, { "epoch": 0.7233991685372365, "grad_norm": 0.61257004737854, "learning_rate": 5.626719345659904e-06, "loss": 0.0673, "step": 40848 }, { "epoch": 0.7234168780742649, "grad_norm": 0.6923207640647888, "learning_rate": 5.626047660233793e-06, "loss": 0.0881, "step": 40849 }, { "epoch": 0.7234345876112934, "grad_norm": 0.8334020376205444, "learning_rate": 5.625376005647038e-06, "loss": 0.0714, "step": 40850 }, { "epoch": 0.7234522971483218, "grad_norm": 0.8738713264465332, "learning_rate": 5.624704381901853e-06, "loss": 0.0535, "step": 40851 }, { "epoch": 0.7234700066853502, "grad_norm": 0.6307840347290039, "learning_rate": 5.624032789000436e-06, "loss": 0.0525, "step": 40852 }, { "epoch": 0.7234877162223786, "grad_norm": 0.5457188487052917, "learning_rate": 5.623361226945005e-06, "loss": 0.0459, "step": 40853 }, { "epoch": 0.7235054257594071, "grad_norm": 0.6604692935943604, "learning_rate": 5.622689695737772e-06, "loss": 0.0512, "step": 40854 }, { "epoch": 0.7235231352964355, "grad_norm": 0.4803560674190521, "learning_rate": 5.622018195380936e-06, "loss": 0.0708, "step": 40855 }, { "epoch": 0.7235408448334639, "grad_norm": 0.3301338255405426, "learning_rate": 5.62134672587671e-06, "loss": 0.0903, "step": 40856 }, { "epoch": 0.7235585543704923, "grad_norm": 0.7462329864501953, "learning_rate": 5.620675287227306e-06, "loss": 0.0625, "step": 40857 }, { "epoch": 0.7235762639075208, "grad_norm": 0.4804927110671997, "learning_rate": 5.620003879434931e-06, "loss": 0.0836, "step": 40858 }, { "epoch": 0.7235939734445492, "grad_norm": 0.524255633354187, "learning_rate": 5.619332502501794e-06, "loss": 0.0474, "step": 40859 }, { "epoch": 0.7236116829815776, "grad_norm": 0.846931517124176, "learning_rate": 5.618661156430107e-06, "loss": 0.0616, "step": 40860 }, { "epoch": 0.723629392518606, "grad_norm": 0.6144198179244995, "learning_rate": 5.617989841222079e-06, "loss": 0.0398, "step": 40861 }, { "epoch": 0.7236471020556345, "grad_norm": 0.6624442338943481, "learning_rate": 5.617318556879911e-06, "loss": 0.0665, "step": 40862 }, { "epoch": 0.7236648115926629, "grad_norm": 0.526154100894928, "learning_rate": 5.616647303405814e-06, "loss": 0.0475, "step": 40863 }, { "epoch": 0.7236825211296913, "grad_norm": 0.4747459590435028, "learning_rate": 5.615976080802e-06, "loss": 0.0589, "step": 40864 }, { "epoch": 0.7237002306667198, "grad_norm": 0.7528905272483826, "learning_rate": 5.615304889070681e-06, "loss": 0.0747, "step": 40865 }, { "epoch": 0.7237179402037482, "grad_norm": 0.6392362117767334, "learning_rate": 5.614633728214055e-06, "loss": 0.067, "step": 40866 }, { "epoch": 0.7237356497407766, "grad_norm": 0.6154527068138123, "learning_rate": 5.613962598234333e-06, "loss": 0.0785, "step": 40867 }, { "epoch": 0.723753359277805, "grad_norm": 0.9366419315338135, "learning_rate": 5.613291499133726e-06, "loss": 0.0729, "step": 40868 }, { "epoch": 0.7237710688148336, "grad_norm": 0.9694846272468567, "learning_rate": 5.612620430914441e-06, "loss": 0.0666, "step": 40869 }, { "epoch": 0.723788778351862, "grad_norm": 0.5447089076042175, "learning_rate": 5.611949393578683e-06, "loss": 0.0709, "step": 40870 }, { "epoch": 0.7238064878888903, "grad_norm": 0.7297011017799377, "learning_rate": 5.611278387128665e-06, "loss": 0.0626, "step": 40871 }, { "epoch": 0.7238241974259187, "grad_norm": 0.4835578501224518, "learning_rate": 5.610607411566595e-06, "loss": 0.0525, "step": 40872 }, { "epoch": 0.7238419069629473, "grad_norm": 0.562926709651947, "learning_rate": 5.609936466894672e-06, "loss": 0.0737, "step": 40873 }, { "epoch": 0.7238596164999757, "grad_norm": 0.43617257475852966, "learning_rate": 5.609265553115108e-06, "loss": 0.0235, "step": 40874 }, { "epoch": 0.723877326037004, "grad_norm": 0.7188431620597839, "learning_rate": 5.6085946702301155e-06, "loss": 0.0535, "step": 40875 }, { "epoch": 0.7238950355740325, "grad_norm": 0.5198010802268982, "learning_rate": 5.607923818241887e-06, "loss": 0.0409, "step": 40876 }, { "epoch": 0.723912745111061, "grad_norm": 0.8428111672401428, "learning_rate": 5.607252997152643e-06, "loss": 0.0531, "step": 40877 }, { "epoch": 0.7239304546480894, "grad_norm": 0.5810731053352356, "learning_rate": 5.6065822069645874e-06, "loss": 0.0716, "step": 40878 }, { "epoch": 0.7239481641851178, "grad_norm": 0.5474637746810913, "learning_rate": 5.6059114476799325e-06, "loss": 0.0569, "step": 40879 }, { "epoch": 0.7239658737221463, "grad_norm": 0.5031901001930237, "learning_rate": 5.605240719300871e-06, "loss": 0.0662, "step": 40880 }, { "epoch": 0.7239835832591747, "grad_norm": 0.6469473242759705, "learning_rate": 5.6045700218296206e-06, "loss": 0.0868, "step": 40881 }, { "epoch": 0.7240012927962031, "grad_norm": 0.5070973634719849, "learning_rate": 5.603899355268382e-06, "loss": 0.043, "step": 40882 }, { "epoch": 0.7240190023332315, "grad_norm": 0.535929799079895, "learning_rate": 5.603228719619371e-06, "loss": 0.0869, "step": 40883 }, { "epoch": 0.72403671187026, "grad_norm": 0.7140614986419678, "learning_rate": 5.602558114884781e-06, "loss": 0.0946, "step": 40884 }, { "epoch": 0.7240544214072884, "grad_norm": 0.7491174340248108, "learning_rate": 5.601887541066825e-06, "loss": 0.0451, "step": 40885 }, { "epoch": 0.7240721309443168, "grad_norm": 0.5808058977127075, "learning_rate": 5.601216998167707e-06, "loss": 0.0651, "step": 40886 }, { "epoch": 0.7240898404813452, "grad_norm": 0.48148563504219055, "learning_rate": 5.600546486189636e-06, "loss": 0.0488, "step": 40887 }, { "epoch": 0.7241075500183737, "grad_norm": 0.5267380475997925, "learning_rate": 5.599876005134816e-06, "loss": 0.055, "step": 40888 }, { "epoch": 0.7241252595554021, "grad_norm": 0.42355310916900635, "learning_rate": 5.599205555005454e-06, "loss": 0.045, "step": 40889 }, { "epoch": 0.7241429690924305, "grad_norm": 0.6654625535011292, "learning_rate": 5.598535135803758e-06, "loss": 0.0685, "step": 40890 }, { "epoch": 0.7241606786294589, "grad_norm": 0.625315248966217, "learning_rate": 5.597864747531926e-06, "loss": 0.0549, "step": 40891 }, { "epoch": 0.7241783881664874, "grad_norm": 0.5073599815368652, "learning_rate": 5.597194390192168e-06, "loss": 0.0367, "step": 40892 }, { "epoch": 0.7241960977035158, "grad_norm": 0.36998745799064636, "learning_rate": 5.596524063786695e-06, "loss": 0.0875, "step": 40893 }, { "epoch": 0.7242138072405442, "grad_norm": 0.5020689368247986, "learning_rate": 5.595853768317702e-06, "loss": 0.0651, "step": 40894 }, { "epoch": 0.7242315167775727, "grad_norm": 0.5667256116867065, "learning_rate": 5.595183503787399e-06, "loss": 0.0669, "step": 40895 }, { "epoch": 0.7242492263146011, "grad_norm": 0.729814887046814, "learning_rate": 5.594513270197991e-06, "loss": 0.0429, "step": 40896 }, { "epoch": 0.7242669358516295, "grad_norm": 0.5456503629684448, "learning_rate": 5.59384306755168e-06, "loss": 0.0641, "step": 40897 }, { "epoch": 0.7242846453886579, "grad_norm": 0.3741089999675751, "learning_rate": 5.593172895850678e-06, "loss": 0.0475, "step": 40898 }, { "epoch": 0.7243023549256864, "grad_norm": 0.5371630787849426, "learning_rate": 5.5925027550971845e-06, "loss": 0.0714, "step": 40899 }, { "epoch": 0.7243200644627148, "grad_norm": 0.8701480627059937, "learning_rate": 5.591832645293409e-06, "loss": 0.0694, "step": 40900 }, { "epoch": 0.7243377739997432, "grad_norm": 0.5606586337089539, "learning_rate": 5.591162566441549e-06, "loss": 0.05, "step": 40901 }, { "epoch": 0.7243554835367716, "grad_norm": 0.5720509886741638, "learning_rate": 5.590492518543812e-06, "loss": 0.0359, "step": 40902 }, { "epoch": 0.7243731930738001, "grad_norm": 0.5841468572616577, "learning_rate": 5.589822501602404e-06, "loss": 0.0608, "step": 40903 }, { "epoch": 0.7243909026108285, "grad_norm": 0.980606734752655, "learning_rate": 5.589152515619531e-06, "loss": 0.0558, "step": 40904 }, { "epoch": 0.7244086121478569, "grad_norm": 0.760718822479248, "learning_rate": 5.5884825605973854e-06, "loss": 0.0433, "step": 40905 }, { "epoch": 0.7244263216848853, "grad_norm": 0.427511066198349, "learning_rate": 5.587812636538185e-06, "loss": 0.0526, "step": 40906 }, { "epoch": 0.7244440312219138, "grad_norm": 0.40605172514915466, "learning_rate": 5.587142743444136e-06, "loss": 0.0526, "step": 40907 }, { "epoch": 0.7244617407589422, "grad_norm": 0.39796513319015503, "learning_rate": 5.586472881317428e-06, "loss": 0.0636, "step": 40908 }, { "epoch": 0.7244794502959706, "grad_norm": 0.30872777104377747, "learning_rate": 5.5858030501602745e-06, "loss": 0.0759, "step": 40909 }, { "epoch": 0.7244971598329991, "grad_norm": 0.7579083442687988, "learning_rate": 5.585133249974874e-06, "loss": 0.0657, "step": 40910 }, { "epoch": 0.7245148693700275, "grad_norm": 0.5735402703285217, "learning_rate": 5.584463480763439e-06, "loss": 0.0754, "step": 40911 }, { "epoch": 0.7245325789070559, "grad_norm": 0.6941845417022705, "learning_rate": 5.5837937425281625e-06, "loss": 0.0367, "step": 40912 }, { "epoch": 0.7245502884440843, "grad_norm": 0.33896347880363464, "learning_rate": 5.583124035271252e-06, "loss": 0.0359, "step": 40913 }, { "epoch": 0.7245679979811128, "grad_norm": 0.7835054397583008, "learning_rate": 5.58245435899491e-06, "loss": 0.0445, "step": 40914 }, { "epoch": 0.7245857075181412, "grad_norm": 0.7275934219360352, "learning_rate": 5.58178471370134e-06, "loss": 0.0571, "step": 40915 }, { "epoch": 0.7246034170551696, "grad_norm": 1.0275559425354004, "learning_rate": 5.5811150993927485e-06, "loss": 0.0774, "step": 40916 }, { "epoch": 0.724621126592198, "grad_norm": 0.7112889885902405, "learning_rate": 5.580445516071333e-06, "loss": 0.0712, "step": 40917 }, { "epoch": 0.7246388361292265, "grad_norm": 0.48616522550582886, "learning_rate": 5.579775963739305e-06, "loss": 0.0594, "step": 40918 }, { "epoch": 0.7246565456662549, "grad_norm": 0.635460376739502, "learning_rate": 5.579106442398857e-06, "loss": 0.0538, "step": 40919 }, { "epoch": 0.7246742552032833, "grad_norm": 0.6208704113960266, "learning_rate": 5.578436952052194e-06, "loss": 0.075, "step": 40920 }, { "epoch": 0.7246919647403117, "grad_norm": 0.7046589851379395, "learning_rate": 5.5777674927015224e-06, "loss": 0.0718, "step": 40921 }, { "epoch": 0.7247096742773402, "grad_norm": 0.6527871489524841, "learning_rate": 5.577098064349048e-06, "loss": 0.084, "step": 40922 }, { "epoch": 0.7247273838143686, "grad_norm": 0.8195189237594604, "learning_rate": 5.576428666996961e-06, "loss": 0.0421, "step": 40923 }, { "epoch": 0.724745093351397, "grad_norm": 0.6320139765739441, "learning_rate": 5.575759300647471e-06, "loss": 0.041, "step": 40924 }, { "epoch": 0.7247628028884255, "grad_norm": 0.6084490418434143, "learning_rate": 5.575089965302781e-06, "loss": 0.073, "step": 40925 }, { "epoch": 0.7247805124254539, "grad_norm": 0.6658689379692078, "learning_rate": 5.57442066096509e-06, "loss": 0.073, "step": 40926 }, { "epoch": 0.7247982219624823, "grad_norm": 0.3676982820034027, "learning_rate": 5.573751387636603e-06, "loss": 0.0556, "step": 40927 }, { "epoch": 0.7248159314995107, "grad_norm": 0.36301106214523315, "learning_rate": 5.573082145319521e-06, "loss": 0.0507, "step": 40928 }, { "epoch": 0.7248336410365392, "grad_norm": 0.8021122813224792, "learning_rate": 5.57241293401605e-06, "loss": 0.0536, "step": 40929 }, { "epoch": 0.7248513505735676, "grad_norm": 0.7344329357147217, "learning_rate": 5.571743753728381e-06, "loss": 0.0477, "step": 40930 }, { "epoch": 0.724869060110596, "grad_norm": 0.24797530472278595, "learning_rate": 5.571074604458722e-06, "loss": 0.0608, "step": 40931 }, { "epoch": 0.7248867696476244, "grad_norm": 0.3975140452384949, "learning_rate": 5.570405486209279e-06, "loss": 0.05, "step": 40932 }, { "epoch": 0.724904479184653, "grad_norm": 0.7033730745315552, "learning_rate": 5.569736398982243e-06, "loss": 0.0708, "step": 40933 }, { "epoch": 0.7249221887216813, "grad_norm": 0.8888680338859558, "learning_rate": 5.569067342779814e-06, "loss": 0.0532, "step": 40934 }, { "epoch": 0.7249398982587097, "grad_norm": 0.44840580224990845, "learning_rate": 5.568398317604206e-06, "loss": 0.0321, "step": 40935 }, { "epoch": 0.7249576077957381, "grad_norm": 0.48060789704322815, "learning_rate": 5.567729323457618e-06, "loss": 0.0484, "step": 40936 }, { "epoch": 0.7249753173327667, "grad_norm": 0.8341406583786011, "learning_rate": 5.567060360342243e-06, "loss": 0.0798, "step": 40937 }, { "epoch": 0.724993026869795, "grad_norm": 1.0526434183120728, "learning_rate": 5.566391428260284e-06, "loss": 0.0521, "step": 40938 }, { "epoch": 0.7250107364068235, "grad_norm": 0.5862694978713989, "learning_rate": 5.565722527213949e-06, "loss": 0.0388, "step": 40939 }, { "epoch": 0.725028445943852, "grad_norm": 0.7528182864189148, "learning_rate": 5.565053657205427e-06, "loss": 0.0753, "step": 40940 }, { "epoch": 0.7250461554808804, "grad_norm": 0.3297383785247803, "learning_rate": 5.564384818236925e-06, "loss": 0.0463, "step": 40941 }, { "epoch": 0.7250638650179088, "grad_norm": 0.5139144062995911, "learning_rate": 5.56371601031064e-06, "loss": 0.0455, "step": 40942 }, { "epoch": 0.7250815745549372, "grad_norm": 0.4759434163570404, "learning_rate": 5.563047233428777e-06, "loss": 0.0477, "step": 40943 }, { "epoch": 0.7250992840919657, "grad_norm": 0.5455688238143921, "learning_rate": 5.562378487593534e-06, "loss": 0.0567, "step": 40944 }, { "epoch": 0.7251169936289941, "grad_norm": 0.42925381660461426, "learning_rate": 5.5617097728071105e-06, "loss": 0.0529, "step": 40945 }, { "epoch": 0.7251347031660225, "grad_norm": 0.6813297867774963, "learning_rate": 5.561041089071714e-06, "loss": 0.0703, "step": 40946 }, { "epoch": 0.7251524127030509, "grad_norm": 0.6606329679489136, "learning_rate": 5.560372436389531e-06, "loss": 0.052, "step": 40947 }, { "epoch": 0.7251701222400794, "grad_norm": 0.3671313226222992, "learning_rate": 5.5597038147627675e-06, "loss": 0.0378, "step": 40948 }, { "epoch": 0.7251878317771078, "grad_norm": 0.7385193109512329, "learning_rate": 5.5590352241936245e-06, "loss": 0.0603, "step": 40949 }, { "epoch": 0.7252055413141362, "grad_norm": 0.7424877285957336, "learning_rate": 5.558366664684305e-06, "loss": 0.0503, "step": 40950 }, { "epoch": 0.7252232508511646, "grad_norm": 0.44024690985679626, "learning_rate": 5.557698136237e-06, "loss": 0.0759, "step": 40951 }, { "epoch": 0.7252409603881931, "grad_norm": 0.7667070031166077, "learning_rate": 5.557029638853914e-06, "loss": 0.0658, "step": 40952 }, { "epoch": 0.7252586699252215, "grad_norm": 0.39865854382514954, "learning_rate": 5.556361172537243e-06, "loss": 0.0597, "step": 40953 }, { "epoch": 0.7252763794622499, "grad_norm": 0.507844865322113, "learning_rate": 5.555692737289192e-06, "loss": 0.077, "step": 40954 }, { "epoch": 0.7252940889992784, "grad_norm": 0.4948318600654602, "learning_rate": 5.5550243331119534e-06, "loss": 0.0597, "step": 40955 }, { "epoch": 0.7253117985363068, "grad_norm": 0.7932592630386353, "learning_rate": 5.554355960007731e-06, "loss": 0.0846, "step": 40956 }, { "epoch": 0.7253295080733352, "grad_norm": 0.5711108446121216, "learning_rate": 5.553687617978726e-06, "loss": 0.0675, "step": 40957 }, { "epoch": 0.7253472176103636, "grad_norm": 0.46694767475128174, "learning_rate": 5.55301930702713e-06, "loss": 0.0667, "step": 40958 }, { "epoch": 0.7253649271473921, "grad_norm": 0.5741739869117737, "learning_rate": 5.552351027155143e-06, "loss": 0.0459, "step": 40959 }, { "epoch": 0.7253826366844205, "grad_norm": 0.6225574016571045, "learning_rate": 5.551682778364967e-06, "loss": 0.0661, "step": 40960 }, { "epoch": 0.7254003462214489, "grad_norm": 0.4626597464084625, "learning_rate": 5.551014560658804e-06, "loss": 0.0289, "step": 40961 }, { "epoch": 0.7254180557584773, "grad_norm": 0.4979638159275055, "learning_rate": 5.5503463740388415e-06, "loss": 0.0483, "step": 40962 }, { "epoch": 0.7254357652955058, "grad_norm": 0.38193798065185547, "learning_rate": 5.549678218507279e-06, "loss": 0.0832, "step": 40963 }, { "epoch": 0.7254534748325342, "grad_norm": 0.2747337818145752, "learning_rate": 5.54901009406633e-06, "loss": 0.0437, "step": 40964 }, { "epoch": 0.7254711843695626, "grad_norm": 0.679811954498291, "learning_rate": 5.548342000718175e-06, "loss": 0.0561, "step": 40965 }, { "epoch": 0.725488893906591, "grad_norm": 0.3481592535972595, "learning_rate": 5.5476739384650215e-06, "loss": 0.0491, "step": 40966 }, { "epoch": 0.7255066034436195, "grad_norm": 0.43191877007484436, "learning_rate": 5.547005907309062e-06, "loss": 0.0752, "step": 40967 }, { "epoch": 0.7255243129806479, "grad_norm": 0.3221621811389923, "learning_rate": 5.546337907252504e-06, "loss": 0.0651, "step": 40968 }, { "epoch": 0.7255420225176763, "grad_norm": 0.5966103076934814, "learning_rate": 5.545669938297533e-06, "loss": 0.0557, "step": 40969 }, { "epoch": 0.7255597320547048, "grad_norm": 0.7884271144866943, "learning_rate": 5.545002000446351e-06, "loss": 0.0753, "step": 40970 }, { "epoch": 0.7255774415917332, "grad_norm": 0.4226682186126709, "learning_rate": 5.544334093701155e-06, "loss": 0.0384, "step": 40971 }, { "epoch": 0.7255951511287616, "grad_norm": 0.7907080054283142, "learning_rate": 5.543666218064145e-06, "loss": 0.0679, "step": 40972 }, { "epoch": 0.72561286066579, "grad_norm": 0.4898366928100586, "learning_rate": 5.542998373537516e-06, "loss": 0.0317, "step": 40973 }, { "epoch": 0.7256305702028185, "grad_norm": 0.5056938529014587, "learning_rate": 5.542330560123466e-06, "loss": 0.0572, "step": 40974 }, { "epoch": 0.7256482797398469, "grad_norm": 0.5912536978721619, "learning_rate": 5.5416627778241976e-06, "loss": 0.0819, "step": 40975 }, { "epoch": 0.7256659892768753, "grad_norm": 0.5915987491607666, "learning_rate": 5.5409950266418955e-06, "loss": 0.0625, "step": 40976 }, { "epoch": 0.7256836988139037, "grad_norm": 0.39947932958602905, "learning_rate": 5.540327306578765e-06, "loss": 0.0515, "step": 40977 }, { "epoch": 0.7257014083509322, "grad_norm": 0.22162406146526337, "learning_rate": 5.539659617637004e-06, "loss": 0.0435, "step": 40978 }, { "epoch": 0.7257191178879606, "grad_norm": 0.644919753074646, "learning_rate": 5.538991959818802e-06, "loss": 0.0735, "step": 40979 }, { "epoch": 0.725736827424989, "grad_norm": 0.5305259227752686, "learning_rate": 5.5383243331263604e-06, "loss": 0.0644, "step": 40980 }, { "epoch": 0.7257545369620174, "grad_norm": 0.6514455676078796, "learning_rate": 5.537656737561872e-06, "loss": 0.0427, "step": 40981 }, { "epoch": 0.7257722464990459, "grad_norm": 0.403225839138031, "learning_rate": 5.536989173127538e-06, "loss": 0.0488, "step": 40982 }, { "epoch": 0.7257899560360743, "grad_norm": 0.6254551410675049, "learning_rate": 5.536321639825553e-06, "loss": 0.0409, "step": 40983 }, { "epoch": 0.7258076655731027, "grad_norm": 0.6960744857788086, "learning_rate": 5.535654137658112e-06, "loss": 0.0677, "step": 40984 }, { "epoch": 0.7258253751101312, "grad_norm": 0.5206176042556763, "learning_rate": 5.534986666627411e-06, "loss": 0.0756, "step": 40985 }, { "epoch": 0.7258430846471596, "grad_norm": 0.43617814779281616, "learning_rate": 5.534319226735652e-06, "loss": 0.0434, "step": 40986 }, { "epoch": 0.725860794184188, "grad_norm": 0.8038949966430664, "learning_rate": 5.533651817985021e-06, "loss": 0.0792, "step": 40987 }, { "epoch": 0.7258785037212164, "grad_norm": 0.5659692883491516, "learning_rate": 5.532984440377716e-06, "loss": 0.0639, "step": 40988 }, { "epoch": 0.7258962132582449, "grad_norm": 1.0599461793899536, "learning_rate": 5.532317093915942e-06, "loss": 0.0855, "step": 40989 }, { "epoch": 0.7259139227952733, "grad_norm": 0.4783148467540741, "learning_rate": 5.53164977860188e-06, "loss": 0.0628, "step": 40990 }, { "epoch": 0.7259316323323017, "grad_norm": 0.7270333170890808, "learning_rate": 5.530982494437734e-06, "loss": 0.0468, "step": 40991 }, { "epoch": 0.7259493418693301, "grad_norm": 0.494825541973114, "learning_rate": 5.5303152414256924e-06, "loss": 0.0616, "step": 40992 }, { "epoch": 0.7259670514063586, "grad_norm": 0.663567066192627, "learning_rate": 5.529648019567966e-06, "loss": 0.033, "step": 40993 }, { "epoch": 0.725984760943387, "grad_norm": 0.47716963291168213, "learning_rate": 5.528980828866735e-06, "loss": 0.0734, "step": 40994 }, { "epoch": 0.7260024704804154, "grad_norm": 0.4642394781112671, "learning_rate": 5.528313669324199e-06, "loss": 0.0586, "step": 40995 }, { "epoch": 0.7260201800174438, "grad_norm": 0.7106531858444214, "learning_rate": 5.527646540942558e-06, "loss": 0.0584, "step": 40996 }, { "epoch": 0.7260378895544723, "grad_norm": 0.28977736830711365, "learning_rate": 5.526979443723996e-06, "loss": 0.0567, "step": 40997 }, { "epoch": 0.7260555990915007, "grad_norm": 0.7274122834205627, "learning_rate": 5.526312377670715e-06, "loss": 0.0621, "step": 40998 }, { "epoch": 0.7260733086285291, "grad_norm": 0.574184238910675, "learning_rate": 5.525645342784907e-06, "loss": 0.0522, "step": 40999 }, { "epoch": 0.7260910181655577, "grad_norm": 0.4144374132156372, "learning_rate": 5.524978339068767e-06, "loss": 0.0306, "step": 41000 }, { "epoch": 0.726108727702586, "grad_norm": 0.36444878578186035, "learning_rate": 5.52431136652449e-06, "loss": 0.0444, "step": 41001 }, { "epoch": 0.7261264372396145, "grad_norm": 0.4699137806892395, "learning_rate": 5.523644425154272e-06, "loss": 0.0804, "step": 41002 }, { "epoch": 0.7261441467766429, "grad_norm": 0.34663859009742737, "learning_rate": 5.52297751496031e-06, "loss": 0.0752, "step": 41003 }, { "epoch": 0.7261618563136714, "grad_norm": 1.0468653440475464, "learning_rate": 5.522310635944787e-06, "loss": 0.0644, "step": 41004 }, { "epoch": 0.7261795658506998, "grad_norm": 1.0116065740585327, "learning_rate": 5.521643788109903e-06, "loss": 0.0644, "step": 41005 }, { "epoch": 0.7261972753877282, "grad_norm": 0.7223053574562073, "learning_rate": 5.520976971457853e-06, "loss": 0.0862, "step": 41006 }, { "epoch": 0.7262149849247566, "grad_norm": 0.6570769548416138, "learning_rate": 5.520310185990836e-06, "loss": 0.0517, "step": 41007 }, { "epoch": 0.7262326944617851, "grad_norm": 0.8867207169532776, "learning_rate": 5.519643431711035e-06, "loss": 0.0939, "step": 41008 }, { "epoch": 0.7262504039988135, "grad_norm": 0.6868926882743835, "learning_rate": 5.518976708620646e-06, "loss": 0.0372, "step": 41009 }, { "epoch": 0.7262681135358419, "grad_norm": 0.6831814646720886, "learning_rate": 5.518310016721866e-06, "loss": 0.0685, "step": 41010 }, { "epoch": 0.7262858230728703, "grad_norm": 0.6685415506362915, "learning_rate": 5.517643356016888e-06, "loss": 0.0664, "step": 41011 }, { "epoch": 0.7263035326098988, "grad_norm": 0.5685900449752808, "learning_rate": 5.516976726507903e-06, "loss": 0.0505, "step": 41012 }, { "epoch": 0.7263212421469272, "grad_norm": 0.33950236439704895, "learning_rate": 5.516310128197105e-06, "loss": 0.0655, "step": 41013 }, { "epoch": 0.7263389516839556, "grad_norm": 0.4343855679035187, "learning_rate": 5.515643561086694e-06, "loss": 0.0661, "step": 41014 }, { "epoch": 0.7263566612209841, "grad_norm": 0.9359967708587646, "learning_rate": 5.51497702517885e-06, "loss": 0.0584, "step": 41015 }, { "epoch": 0.7263743707580125, "grad_norm": 0.475185364484787, "learning_rate": 5.514310520475773e-06, "loss": 0.0524, "step": 41016 }, { "epoch": 0.7263920802950409, "grad_norm": 0.4338104724884033, "learning_rate": 5.5136440469796604e-06, "loss": 0.0454, "step": 41017 }, { "epoch": 0.7264097898320693, "grad_norm": 0.5849306583404541, "learning_rate": 5.5129776046926934e-06, "loss": 0.0487, "step": 41018 }, { "epoch": 0.7264274993690978, "grad_norm": 0.6424503922462463, "learning_rate": 5.512311193617071e-06, "loss": 0.053, "step": 41019 }, { "epoch": 0.7264452089061262, "grad_norm": 0.6192905902862549, "learning_rate": 5.511644813754986e-06, "loss": 0.0736, "step": 41020 }, { "epoch": 0.7264629184431546, "grad_norm": 0.5368857979774475, "learning_rate": 5.510978465108627e-06, "loss": 0.0561, "step": 41021 }, { "epoch": 0.726480627980183, "grad_norm": 0.6453015804290771, "learning_rate": 5.510312147680192e-06, "loss": 0.0518, "step": 41022 }, { "epoch": 0.7264983375172115, "grad_norm": 0.3920346796512604, "learning_rate": 5.5096458614718685e-06, "loss": 0.0451, "step": 41023 }, { "epoch": 0.7265160470542399, "grad_norm": 0.7769916653633118, "learning_rate": 5.5089796064858505e-06, "loss": 0.0697, "step": 41024 }, { "epoch": 0.7265337565912683, "grad_norm": 0.5292870402336121, "learning_rate": 5.508313382724335e-06, "loss": 0.0687, "step": 41025 }, { "epoch": 0.7265514661282967, "grad_norm": 0.20496605336666107, "learning_rate": 5.507647190189505e-06, "loss": 0.0494, "step": 41026 }, { "epoch": 0.7265691756653252, "grad_norm": 0.4469260275363922, "learning_rate": 5.506981028883554e-06, "loss": 0.037, "step": 41027 }, { "epoch": 0.7265868852023536, "grad_norm": 0.5566163063049316, "learning_rate": 5.506314898808681e-06, "loss": 0.0448, "step": 41028 }, { "epoch": 0.726604594739382, "grad_norm": 0.5112239122390747, "learning_rate": 5.50564879996706e-06, "loss": 0.0427, "step": 41029 }, { "epoch": 0.7266223042764105, "grad_norm": 0.4291768968105316, "learning_rate": 5.5049827323609e-06, "loss": 0.0418, "step": 41030 }, { "epoch": 0.7266400138134389, "grad_norm": 0.7787119746208191, "learning_rate": 5.504316695992387e-06, "loss": 0.0753, "step": 41031 }, { "epoch": 0.7266577233504673, "grad_norm": 0.36383676528930664, "learning_rate": 5.503650690863716e-06, "loss": 0.0343, "step": 41032 }, { "epoch": 0.7266754328874957, "grad_norm": 0.6532178521156311, "learning_rate": 5.50298471697707e-06, "loss": 0.0564, "step": 41033 }, { "epoch": 0.7266931424245242, "grad_norm": 0.7407572865486145, "learning_rate": 5.502318774334643e-06, "loss": 0.0577, "step": 41034 }, { "epoch": 0.7267108519615526, "grad_norm": 0.48029825091362, "learning_rate": 5.501652862938631e-06, "loss": 0.0615, "step": 41035 }, { "epoch": 0.726728561498581, "grad_norm": 0.4488658308982849, "learning_rate": 5.500986982791216e-06, "loss": 0.0396, "step": 41036 }, { "epoch": 0.7267462710356094, "grad_norm": 0.4701943099498749, "learning_rate": 5.500321133894592e-06, "loss": 0.0549, "step": 41037 }, { "epoch": 0.7267639805726379, "grad_norm": 0.6180567741394043, "learning_rate": 5.4996553162509515e-06, "loss": 0.0529, "step": 41038 }, { "epoch": 0.7267816901096663, "grad_norm": 0.5876091718673706, "learning_rate": 5.4989895298624835e-06, "loss": 0.0494, "step": 41039 }, { "epoch": 0.7267993996466947, "grad_norm": 0.8607836365699768, "learning_rate": 5.498323774731381e-06, "loss": 0.0513, "step": 41040 }, { "epoch": 0.7268171091837231, "grad_norm": 0.641086757183075, "learning_rate": 5.497658050859831e-06, "loss": 0.0351, "step": 41041 }, { "epoch": 0.7268348187207516, "grad_norm": 1.113049030303955, "learning_rate": 5.496992358250029e-06, "loss": 0.0929, "step": 41042 }, { "epoch": 0.72685252825778, "grad_norm": 0.5628341436386108, "learning_rate": 5.496326696904157e-06, "loss": 0.0519, "step": 41043 }, { "epoch": 0.7268702377948084, "grad_norm": 0.6052714586257935, "learning_rate": 5.495661066824409e-06, "loss": 0.0621, "step": 41044 }, { "epoch": 0.7268879473318369, "grad_norm": 0.4337941110134125, "learning_rate": 5.494995468012974e-06, "loss": 0.0541, "step": 41045 }, { "epoch": 0.7269056568688653, "grad_norm": 1.0754234790802002, "learning_rate": 5.494329900472049e-06, "loss": 0.065, "step": 41046 }, { "epoch": 0.7269233664058937, "grad_norm": 0.6272351145744324, "learning_rate": 5.49366436420381e-06, "loss": 0.0664, "step": 41047 }, { "epoch": 0.7269410759429221, "grad_norm": 0.5302215218544006, "learning_rate": 5.4929988592104555e-06, "loss": 0.0589, "step": 41048 }, { "epoch": 0.7269587854799506, "grad_norm": 0.455821692943573, "learning_rate": 5.492333385494173e-06, "loss": 0.0419, "step": 41049 }, { "epoch": 0.726976495016979, "grad_norm": 0.6260927319526672, "learning_rate": 5.491667943057151e-06, "loss": 0.0563, "step": 41050 }, { "epoch": 0.7269942045540074, "grad_norm": 0.43176132440567017, "learning_rate": 5.49100253190158e-06, "loss": 0.0467, "step": 41051 }, { "epoch": 0.7270119140910358, "grad_norm": 0.6189471483230591, "learning_rate": 5.490337152029649e-06, "loss": 0.073, "step": 41052 }, { "epoch": 0.7270296236280643, "grad_norm": 0.5597018599510193, "learning_rate": 5.489671803443552e-06, "loss": 0.0531, "step": 41053 }, { "epoch": 0.7270473331650927, "grad_norm": 0.5450166463851929, "learning_rate": 5.489006486145469e-06, "loss": 0.083, "step": 41054 }, { "epoch": 0.7270650427021211, "grad_norm": 0.3423224687576294, "learning_rate": 5.48834120013759e-06, "loss": 0.0605, "step": 41055 }, { "epoch": 0.7270827522391496, "grad_norm": 0.6051690578460693, "learning_rate": 5.487675945422109e-06, "loss": 0.0675, "step": 41056 }, { "epoch": 0.727100461776178, "grad_norm": 0.48300057649612427, "learning_rate": 5.487010722001217e-06, "loss": 0.0473, "step": 41057 }, { "epoch": 0.7271181713132064, "grad_norm": 0.8568843603134155, "learning_rate": 5.486345529877086e-06, "loss": 0.0681, "step": 41058 }, { "epoch": 0.7271358808502348, "grad_norm": 0.7921871542930603, "learning_rate": 5.4856803690519215e-06, "loss": 0.0744, "step": 41059 }, { "epoch": 0.7271535903872633, "grad_norm": 0.7258697748184204, "learning_rate": 5.4850152395279115e-06, "loss": 0.0504, "step": 41060 }, { "epoch": 0.7271712999242917, "grad_norm": 0.43757718801498413, "learning_rate": 5.484350141307234e-06, "loss": 0.0468, "step": 41061 }, { "epoch": 0.7271890094613201, "grad_norm": 0.6942996978759766, "learning_rate": 5.483685074392083e-06, "loss": 0.0549, "step": 41062 }, { "epoch": 0.7272067189983485, "grad_norm": 0.7090021371841431, "learning_rate": 5.483020038784644e-06, "loss": 0.0472, "step": 41063 }, { "epoch": 0.727224428535377, "grad_norm": 1.3061164617538452, "learning_rate": 5.482355034487112e-06, "loss": 0.0505, "step": 41064 }, { "epoch": 0.7272421380724055, "grad_norm": 0.4713491201400757, "learning_rate": 5.481690061501663e-06, "loss": 0.0508, "step": 41065 }, { "epoch": 0.7272598476094339, "grad_norm": 0.7945446372032166, "learning_rate": 5.481025119830494e-06, "loss": 0.0514, "step": 41066 }, { "epoch": 0.7272775571464623, "grad_norm": 0.4900878667831421, "learning_rate": 5.480360209475787e-06, "loss": 0.0576, "step": 41067 }, { "epoch": 0.7272952666834908, "grad_norm": 0.5337100028991699, "learning_rate": 5.479695330439732e-06, "loss": 0.0518, "step": 41068 }, { "epoch": 0.7273129762205192, "grad_norm": 0.48955124616622925, "learning_rate": 5.479030482724517e-06, "loss": 0.052, "step": 41069 }, { "epoch": 0.7273306857575476, "grad_norm": 0.48084625601768494, "learning_rate": 5.47836566633233e-06, "loss": 0.0639, "step": 41070 }, { "epoch": 0.7273483952945761, "grad_norm": 0.8997278809547424, "learning_rate": 5.477700881265362e-06, "loss": 0.0381, "step": 41071 }, { "epoch": 0.7273661048316045, "grad_norm": 0.5120606422424316, "learning_rate": 5.477036127525788e-06, "loss": 0.0603, "step": 41072 }, { "epoch": 0.7273838143686329, "grad_norm": 0.1966104507446289, "learning_rate": 5.476371405115804e-06, "loss": 0.0476, "step": 41073 }, { "epoch": 0.7274015239056613, "grad_norm": 0.5717272162437439, "learning_rate": 5.4757067140375985e-06, "loss": 0.0855, "step": 41074 }, { "epoch": 0.7274192334426898, "grad_norm": 0.7245798110961914, "learning_rate": 5.475042054293349e-06, "loss": 0.0512, "step": 41075 }, { "epoch": 0.7274369429797182, "grad_norm": 0.6360992193222046, "learning_rate": 5.474377425885249e-06, "loss": 0.0761, "step": 41076 }, { "epoch": 0.7274546525167466, "grad_norm": 0.22786661982536316, "learning_rate": 5.473712828815483e-06, "loss": 0.0529, "step": 41077 }, { "epoch": 0.727472362053775, "grad_norm": 0.7002288103103638, "learning_rate": 5.473048263086239e-06, "loss": 0.0791, "step": 41078 }, { "epoch": 0.7274900715908035, "grad_norm": 0.6883264183998108, "learning_rate": 5.472383728699702e-06, "loss": 0.0615, "step": 41079 }, { "epoch": 0.7275077811278319, "grad_norm": 0.7661499381065369, "learning_rate": 5.471719225658059e-06, "loss": 0.0853, "step": 41080 }, { "epoch": 0.7275254906648603, "grad_norm": 0.5609564781188965, "learning_rate": 5.471054753963502e-06, "loss": 0.0797, "step": 41081 }, { "epoch": 0.7275432002018887, "grad_norm": 0.6824273467063904, "learning_rate": 5.4703903136182045e-06, "loss": 0.0798, "step": 41082 }, { "epoch": 0.7275609097389172, "grad_norm": 0.594559371471405, "learning_rate": 5.469725904624359e-06, "loss": 0.088, "step": 41083 }, { "epoch": 0.7275786192759456, "grad_norm": 1.2185115814208984, "learning_rate": 5.4690615269841535e-06, "loss": 0.0832, "step": 41084 }, { "epoch": 0.727596328812974, "grad_norm": 0.48672378063201904, "learning_rate": 5.468397180699775e-06, "loss": 0.0288, "step": 41085 }, { "epoch": 0.7276140383500025, "grad_norm": 0.34320685267448425, "learning_rate": 5.4677328657734e-06, "loss": 0.0414, "step": 41086 }, { "epoch": 0.7276317478870309, "grad_norm": 0.6524773240089417, "learning_rate": 5.467068582207216e-06, "loss": 0.0708, "step": 41087 }, { "epoch": 0.7276494574240593, "grad_norm": 0.5959470868110657, "learning_rate": 5.466404330003416e-06, "loss": 0.0635, "step": 41088 }, { "epoch": 0.7276671669610877, "grad_norm": 0.670026957988739, "learning_rate": 5.465740109164189e-06, "loss": 0.0693, "step": 41089 }, { "epoch": 0.7276848764981162, "grad_norm": 0.8210460543632507, "learning_rate": 5.465075919691704e-06, "loss": 0.0792, "step": 41090 }, { "epoch": 0.7277025860351446, "grad_norm": 0.46845120191574097, "learning_rate": 5.464411761588157e-06, "loss": 0.0742, "step": 41091 }, { "epoch": 0.727720295572173, "grad_norm": 0.497689425945282, "learning_rate": 5.463747634855736e-06, "loss": 0.0731, "step": 41092 }, { "epoch": 0.7277380051092014, "grad_norm": 0.49361753463745117, "learning_rate": 5.463083539496617e-06, "loss": 0.0418, "step": 41093 }, { "epoch": 0.7277557146462299, "grad_norm": 0.5502722263336182, "learning_rate": 5.462419475512986e-06, "loss": 0.0479, "step": 41094 }, { "epoch": 0.7277734241832583, "grad_norm": 0.5041307806968689, "learning_rate": 5.4617554429070316e-06, "loss": 0.0492, "step": 41095 }, { "epoch": 0.7277911337202867, "grad_norm": 0.9824581146240234, "learning_rate": 5.461091441680936e-06, "loss": 0.0932, "step": 41096 }, { "epoch": 0.7278088432573151, "grad_norm": 0.9207360148429871, "learning_rate": 5.460427471836886e-06, "loss": 0.1018, "step": 41097 }, { "epoch": 0.7278265527943436, "grad_norm": 0.20429708063602448, "learning_rate": 5.459763533377064e-06, "loss": 0.0293, "step": 41098 }, { "epoch": 0.727844262331372, "grad_norm": 0.6754205822944641, "learning_rate": 5.4590996263036606e-06, "loss": 0.049, "step": 41099 }, { "epoch": 0.7278619718684004, "grad_norm": 0.7789773941040039, "learning_rate": 5.458435750618848e-06, "loss": 0.0617, "step": 41100 }, { "epoch": 0.7278796814054289, "grad_norm": 0.42151153087615967, "learning_rate": 5.457771906324818e-06, "loss": 0.0423, "step": 41101 }, { "epoch": 0.7278973909424573, "grad_norm": 0.47014257311820984, "learning_rate": 5.457108093423754e-06, "loss": 0.0598, "step": 41102 }, { "epoch": 0.7279151004794857, "grad_norm": 0.5539500713348389, "learning_rate": 5.4564443119178435e-06, "loss": 0.0648, "step": 41103 }, { "epoch": 0.7279328100165141, "grad_norm": 0.41786402463912964, "learning_rate": 5.455780561809259e-06, "loss": 0.0669, "step": 41104 }, { "epoch": 0.7279505195535426, "grad_norm": 0.8124160170555115, "learning_rate": 5.455116843100192e-06, "loss": 0.0744, "step": 41105 }, { "epoch": 0.727968229090571, "grad_norm": 0.6265432834625244, "learning_rate": 5.4544531557928275e-06, "loss": 0.0545, "step": 41106 }, { "epoch": 0.7279859386275994, "grad_norm": 0.23643165826797485, "learning_rate": 5.453789499889345e-06, "loss": 0.0426, "step": 41107 }, { "epoch": 0.7280036481646278, "grad_norm": 0.6842641830444336, "learning_rate": 5.453125875391929e-06, "loss": 0.07, "step": 41108 }, { "epoch": 0.7280213577016563, "grad_norm": 0.6785666942596436, "learning_rate": 5.4524622823027635e-06, "loss": 0.0623, "step": 41109 }, { "epoch": 0.7280390672386847, "grad_norm": 0.4939766526222229, "learning_rate": 5.451798720624038e-06, "loss": 0.0809, "step": 41110 }, { "epoch": 0.7280567767757131, "grad_norm": 0.5597356557846069, "learning_rate": 5.451135190357922e-06, "loss": 0.0667, "step": 41111 }, { "epoch": 0.7280744863127415, "grad_norm": 0.6653228402137756, "learning_rate": 5.450471691506607e-06, "loss": 0.0504, "step": 41112 }, { "epoch": 0.72809219584977, "grad_norm": 0.6531923413276672, "learning_rate": 5.449808224072278e-06, "loss": 0.0422, "step": 41113 }, { "epoch": 0.7281099053867984, "grad_norm": 0.7256240248680115, "learning_rate": 5.449144788057108e-06, "loss": 0.0583, "step": 41114 }, { "epoch": 0.7281276149238268, "grad_norm": 0.7327873706817627, "learning_rate": 5.4484813834632876e-06, "loss": 0.0817, "step": 41115 }, { "epoch": 0.7281453244608553, "grad_norm": 0.6803312301635742, "learning_rate": 5.447818010292992e-06, "loss": 0.0691, "step": 41116 }, { "epoch": 0.7281630339978837, "grad_norm": 0.9519155621528625, "learning_rate": 5.447154668548419e-06, "loss": 0.0714, "step": 41117 }, { "epoch": 0.7281807435349121, "grad_norm": 0.5995551943778992, "learning_rate": 5.446491358231735e-06, "loss": 0.0491, "step": 41118 }, { "epoch": 0.7281984530719405, "grad_norm": 0.5623407363891602, "learning_rate": 5.445828079345129e-06, "loss": 0.0597, "step": 41119 }, { "epoch": 0.728216162608969, "grad_norm": 0.9862011075019836, "learning_rate": 5.445164831890788e-06, "loss": 0.0867, "step": 41120 }, { "epoch": 0.7282338721459974, "grad_norm": 0.5002033114433289, "learning_rate": 5.444501615870882e-06, "loss": 0.0461, "step": 41121 }, { "epoch": 0.7282515816830258, "grad_norm": 0.4769095182418823, "learning_rate": 5.4438384312875995e-06, "loss": 0.0434, "step": 41122 }, { "epoch": 0.7282692912200542, "grad_norm": 0.6760439276695251, "learning_rate": 5.443175278143122e-06, "loss": 0.0858, "step": 41123 }, { "epoch": 0.7282870007570827, "grad_norm": 0.5799638032913208, "learning_rate": 5.442512156439632e-06, "loss": 0.0604, "step": 41124 }, { "epoch": 0.7283047102941111, "grad_norm": 0.8881576657295227, "learning_rate": 5.4418490661793084e-06, "loss": 0.0783, "step": 41125 }, { "epoch": 0.7283224198311395, "grad_norm": 0.5154210329055786, "learning_rate": 5.441186007364336e-06, "loss": 0.0503, "step": 41126 }, { "epoch": 0.7283401293681679, "grad_norm": 0.5474054217338562, "learning_rate": 5.440522979996896e-06, "loss": 0.0417, "step": 41127 }, { "epoch": 0.7283578389051965, "grad_norm": 0.4255904257297516, "learning_rate": 5.439859984079171e-06, "loss": 0.0461, "step": 41128 }, { "epoch": 0.7283755484422249, "grad_norm": 0.4841775894165039, "learning_rate": 5.439197019613336e-06, "loss": 0.0565, "step": 41129 }, { "epoch": 0.7283932579792533, "grad_norm": 0.21432441473007202, "learning_rate": 5.438534086601577e-06, "loss": 0.0513, "step": 41130 }, { "epoch": 0.7284109675162818, "grad_norm": 0.5564340353012085, "learning_rate": 5.437871185046077e-06, "loss": 0.041, "step": 41131 }, { "epoch": 0.7284286770533102, "grad_norm": 0.5697578191757202, "learning_rate": 5.4372083149490095e-06, "loss": 0.047, "step": 41132 }, { "epoch": 0.7284463865903386, "grad_norm": 0.7344286441802979, "learning_rate": 5.436545476312558e-06, "loss": 0.0459, "step": 41133 }, { "epoch": 0.728464096127367, "grad_norm": 0.5933509469032288, "learning_rate": 5.435882669138905e-06, "loss": 0.0856, "step": 41134 }, { "epoch": 0.7284818056643955, "grad_norm": 0.5636873841285706, "learning_rate": 5.435219893430232e-06, "loss": 0.0685, "step": 41135 }, { "epoch": 0.7284995152014239, "grad_norm": 0.5862932801246643, "learning_rate": 5.434557149188717e-06, "loss": 0.0447, "step": 41136 }, { "epoch": 0.7285172247384523, "grad_norm": 0.4673252999782562, "learning_rate": 5.433894436416543e-06, "loss": 0.0558, "step": 41137 }, { "epoch": 0.7285349342754807, "grad_norm": 0.6112968325614929, "learning_rate": 5.433231755115893e-06, "loss": 0.0594, "step": 41138 }, { "epoch": 0.7285526438125092, "grad_norm": 0.4407901465892792, "learning_rate": 5.432569105288938e-06, "loss": 0.0772, "step": 41139 }, { "epoch": 0.7285703533495376, "grad_norm": 0.7453687787055969, "learning_rate": 5.4319064869378625e-06, "loss": 0.0695, "step": 41140 }, { "epoch": 0.728588062886566, "grad_norm": 0.30761370062828064, "learning_rate": 5.4312439000648465e-06, "loss": 0.0328, "step": 41141 }, { "epoch": 0.7286057724235944, "grad_norm": 0.7975713610649109, "learning_rate": 5.430581344672077e-06, "loss": 0.084, "step": 41142 }, { "epoch": 0.7286234819606229, "grad_norm": 0.6309244632720947, "learning_rate": 5.42991882076172e-06, "loss": 0.0512, "step": 41143 }, { "epoch": 0.7286411914976513, "grad_norm": 0.5712185502052307, "learning_rate": 5.429256328335962e-06, "loss": 0.0573, "step": 41144 }, { "epoch": 0.7286589010346797, "grad_norm": 0.5583115816116333, "learning_rate": 5.428593867396984e-06, "loss": 0.0564, "step": 41145 }, { "epoch": 0.7286766105717082, "grad_norm": 0.5290473699569702, "learning_rate": 5.427931437946964e-06, "loss": 0.0425, "step": 41146 }, { "epoch": 0.7286943201087366, "grad_norm": 0.22987566888332367, "learning_rate": 5.42726903998808e-06, "loss": 0.046, "step": 41147 }, { "epoch": 0.728712029645765, "grad_norm": 0.6533007025718689, "learning_rate": 5.426606673522514e-06, "loss": 0.0704, "step": 41148 }, { "epoch": 0.7287297391827934, "grad_norm": 0.7536950707435608, "learning_rate": 5.425944338552447e-06, "loss": 0.0583, "step": 41149 }, { "epoch": 0.7287474487198219, "grad_norm": 0.5003598928451538, "learning_rate": 5.425282035080053e-06, "loss": 0.0527, "step": 41150 }, { "epoch": 0.7287651582568503, "grad_norm": 0.6762237548828125, "learning_rate": 5.42461976310751e-06, "loss": 0.0326, "step": 41151 }, { "epoch": 0.7287828677938787, "grad_norm": 0.6443281769752502, "learning_rate": 5.423957522637005e-06, "loss": 0.0611, "step": 41152 }, { "epoch": 0.7288005773309071, "grad_norm": 0.9996612668037415, "learning_rate": 5.4232953136707015e-06, "loss": 0.087, "step": 41153 }, { "epoch": 0.7288182868679356, "grad_norm": 0.752197802066803, "learning_rate": 5.422633136210794e-06, "loss": 0.0616, "step": 41154 }, { "epoch": 0.728835996404964, "grad_norm": 0.9237992167472839, "learning_rate": 5.421970990259453e-06, "loss": 0.0951, "step": 41155 }, { "epoch": 0.7288537059419924, "grad_norm": 0.9320564270019531, "learning_rate": 5.421308875818864e-06, "loss": 0.0744, "step": 41156 }, { "epoch": 0.7288714154790208, "grad_norm": 0.4715985357761383, "learning_rate": 5.420646792891195e-06, "loss": 0.0696, "step": 41157 }, { "epoch": 0.7288891250160493, "grad_norm": 0.9052745699882507, "learning_rate": 5.41998474147863e-06, "loss": 0.072, "step": 41158 }, { "epoch": 0.7289068345530777, "grad_norm": 0.38841140270233154, "learning_rate": 5.419322721583346e-06, "loss": 0.0502, "step": 41159 }, { "epoch": 0.7289245440901061, "grad_norm": 0.5480244159698486, "learning_rate": 5.418660733207525e-06, "loss": 0.0741, "step": 41160 }, { "epoch": 0.7289422536271346, "grad_norm": 0.7287344336509705, "learning_rate": 5.4179987763533375e-06, "loss": 0.0922, "step": 41161 }, { "epoch": 0.728959963164163, "grad_norm": 0.5561480522155762, "learning_rate": 5.4173368510229645e-06, "loss": 0.045, "step": 41162 }, { "epoch": 0.7289776727011914, "grad_norm": 0.5543907284736633, "learning_rate": 5.416674957218584e-06, "loss": 0.0636, "step": 41163 }, { "epoch": 0.7289953822382198, "grad_norm": 0.5961856842041016, "learning_rate": 5.416013094942374e-06, "loss": 0.0431, "step": 41164 }, { "epoch": 0.7290130917752483, "grad_norm": 0.4772289991378784, "learning_rate": 5.41535126419651e-06, "loss": 0.0727, "step": 41165 }, { "epoch": 0.7290308013122767, "grad_norm": 0.7455285787582397, "learning_rate": 5.414689464983172e-06, "loss": 0.0857, "step": 41166 }, { "epoch": 0.7290485108493051, "grad_norm": 0.6692590713500977, "learning_rate": 5.4140276973045415e-06, "loss": 0.0622, "step": 41167 }, { "epoch": 0.7290662203863335, "grad_norm": 0.5424777865409851, "learning_rate": 5.413365961162784e-06, "loss": 0.0645, "step": 41168 }, { "epoch": 0.729083929923362, "grad_norm": 0.4914867877960205, "learning_rate": 5.4127042565600845e-06, "loss": 0.0663, "step": 41169 }, { "epoch": 0.7291016394603904, "grad_norm": 0.4409170150756836, "learning_rate": 5.412042583498623e-06, "loss": 0.0453, "step": 41170 }, { "epoch": 0.7291193489974188, "grad_norm": 0.1368679702281952, "learning_rate": 5.411380941980566e-06, "loss": 0.0458, "step": 41171 }, { "epoch": 0.7291370585344472, "grad_norm": 0.5178272128105164, "learning_rate": 5.410719332008096e-06, "loss": 0.0436, "step": 41172 }, { "epoch": 0.7291547680714757, "grad_norm": 0.516668438911438, "learning_rate": 5.410057753583389e-06, "loss": 0.0688, "step": 41173 }, { "epoch": 0.7291724776085041, "grad_norm": 0.31902381777763367, "learning_rate": 5.409396206708622e-06, "loss": 0.0585, "step": 41174 }, { "epoch": 0.7291901871455325, "grad_norm": 0.5969119071960449, "learning_rate": 5.408734691385973e-06, "loss": 0.0397, "step": 41175 }, { "epoch": 0.729207896682561, "grad_norm": 0.6957648396492004, "learning_rate": 5.408073207617616e-06, "loss": 0.0572, "step": 41176 }, { "epoch": 0.7292256062195894, "grad_norm": 0.33141714334487915, "learning_rate": 5.407411755405733e-06, "loss": 0.0274, "step": 41177 }, { "epoch": 0.7292433157566178, "grad_norm": 0.6993393301963806, "learning_rate": 5.40675033475249e-06, "loss": 0.0777, "step": 41178 }, { "epoch": 0.7292610252936462, "grad_norm": 0.5989256501197815, "learning_rate": 5.406088945660066e-06, "loss": 0.0647, "step": 41179 }, { "epoch": 0.7292787348306747, "grad_norm": 0.7452263832092285, "learning_rate": 5.405427588130642e-06, "loss": 0.0768, "step": 41180 }, { "epoch": 0.7292964443677031, "grad_norm": 0.551710844039917, "learning_rate": 5.404766262166395e-06, "loss": 0.0499, "step": 41181 }, { "epoch": 0.7293141539047315, "grad_norm": 0.5696778893470764, "learning_rate": 5.404104967769486e-06, "loss": 0.0628, "step": 41182 }, { "epoch": 0.7293318634417599, "grad_norm": 0.35640236735343933, "learning_rate": 5.4034437049421056e-06, "loss": 0.0322, "step": 41183 }, { "epoch": 0.7293495729787884, "grad_norm": 0.44653093814849854, "learning_rate": 5.402782473686431e-06, "loss": 0.0698, "step": 41184 }, { "epoch": 0.7293672825158168, "grad_norm": 0.5744801163673401, "learning_rate": 5.402121274004627e-06, "loss": 0.0485, "step": 41185 }, { "epoch": 0.7293849920528452, "grad_norm": 0.4836077392101288, "learning_rate": 5.4014601058988725e-06, "loss": 0.0729, "step": 41186 }, { "epoch": 0.7294027015898736, "grad_norm": 0.2927669584751129, "learning_rate": 5.400798969371343e-06, "loss": 0.0505, "step": 41187 }, { "epoch": 0.7294204111269021, "grad_norm": 0.5340336561203003, "learning_rate": 5.400137864424218e-06, "loss": 0.0433, "step": 41188 }, { "epoch": 0.7294381206639305, "grad_norm": 0.706556499004364, "learning_rate": 5.399476791059666e-06, "loss": 0.0533, "step": 41189 }, { "epoch": 0.7294558302009589, "grad_norm": 0.55939120054245, "learning_rate": 5.398815749279861e-06, "loss": 0.077, "step": 41190 }, { "epoch": 0.7294735397379875, "grad_norm": 0.7011871337890625, "learning_rate": 5.398154739086984e-06, "loss": 0.0532, "step": 41191 }, { "epoch": 0.7294912492750159, "grad_norm": 0.47165700793266296, "learning_rate": 5.397493760483205e-06, "loss": 0.0509, "step": 41192 }, { "epoch": 0.7295089588120443, "grad_norm": 0.33667364716529846, "learning_rate": 5.396832813470701e-06, "loss": 0.0457, "step": 41193 }, { "epoch": 0.7295266683490726, "grad_norm": 0.2822701334953308, "learning_rate": 5.396171898051645e-06, "loss": 0.0394, "step": 41194 }, { "epoch": 0.7295443778861012, "grad_norm": 0.7527491450309753, "learning_rate": 5.395511014228218e-06, "loss": 0.0562, "step": 41195 }, { "epoch": 0.7295620874231296, "grad_norm": 0.4678735136985779, "learning_rate": 5.394850162002584e-06, "loss": 0.0373, "step": 41196 }, { "epoch": 0.729579796960158, "grad_norm": 0.3353480100631714, "learning_rate": 5.39418934137692e-06, "loss": 0.0381, "step": 41197 }, { "epoch": 0.7295975064971864, "grad_norm": 0.6103259325027466, "learning_rate": 5.393528552353402e-06, "loss": 0.0579, "step": 41198 }, { "epoch": 0.7296152160342149, "grad_norm": 0.6456955075263977, "learning_rate": 5.392867794934209e-06, "loss": 0.0611, "step": 41199 }, { "epoch": 0.7296329255712433, "grad_norm": 0.39065855741500854, "learning_rate": 5.392207069121503e-06, "loss": 0.0366, "step": 41200 }, { "epoch": 0.7296506351082717, "grad_norm": 0.7691922783851624, "learning_rate": 5.391546374917465e-06, "loss": 0.0514, "step": 41201 }, { "epoch": 0.7296683446453001, "grad_norm": 1.0442918539047241, "learning_rate": 5.390885712324268e-06, "loss": 0.0867, "step": 41202 }, { "epoch": 0.7296860541823286, "grad_norm": 0.6834831833839417, "learning_rate": 5.390225081344084e-06, "loss": 0.0514, "step": 41203 }, { "epoch": 0.729703763719357, "grad_norm": 0.693518877029419, "learning_rate": 5.389564481979087e-06, "loss": 0.0596, "step": 41204 }, { "epoch": 0.7297214732563854, "grad_norm": 0.46365219354629517, "learning_rate": 5.388903914231452e-06, "loss": 0.0373, "step": 41205 }, { "epoch": 0.7297391827934139, "grad_norm": 0.6575438380241394, "learning_rate": 5.388243378103357e-06, "loss": 0.0826, "step": 41206 }, { "epoch": 0.7297568923304423, "grad_norm": 0.6735081672668457, "learning_rate": 5.3875828735969625e-06, "loss": 0.0519, "step": 41207 }, { "epoch": 0.7297746018674707, "grad_norm": 0.3967750370502472, "learning_rate": 5.386922400714448e-06, "loss": 0.0592, "step": 41208 }, { "epoch": 0.7297923114044991, "grad_norm": 0.5833600163459778, "learning_rate": 5.386261959457992e-06, "loss": 0.0655, "step": 41209 }, { "epoch": 0.7298100209415276, "grad_norm": 0.8467609286308289, "learning_rate": 5.385601549829757e-06, "loss": 0.0827, "step": 41210 }, { "epoch": 0.729827730478556, "grad_norm": 0.4595116972923279, "learning_rate": 5.3849411718319145e-06, "loss": 0.0379, "step": 41211 }, { "epoch": 0.7298454400155844, "grad_norm": 0.49731743335723877, "learning_rate": 5.3842808254666496e-06, "loss": 0.0632, "step": 41212 }, { "epoch": 0.7298631495526128, "grad_norm": 0.504287600517273, "learning_rate": 5.383620510736132e-06, "loss": 0.0556, "step": 41213 }, { "epoch": 0.7298808590896413, "grad_norm": 0.5733726620674133, "learning_rate": 5.382960227642526e-06, "loss": 0.0507, "step": 41214 }, { "epoch": 0.7298985686266697, "grad_norm": 0.7396339774131775, "learning_rate": 5.382299976188008e-06, "loss": 0.0519, "step": 41215 }, { "epoch": 0.7299162781636981, "grad_norm": 0.4151616394519806, "learning_rate": 5.3816397563747546e-06, "loss": 0.0452, "step": 41216 }, { "epoch": 0.7299339877007265, "grad_norm": 0.5445739030838013, "learning_rate": 5.38097956820493e-06, "loss": 0.065, "step": 41217 }, { "epoch": 0.729951697237755, "grad_norm": 0.5192763209342957, "learning_rate": 5.380319411680709e-06, "loss": 0.0309, "step": 41218 }, { "epoch": 0.7299694067747834, "grad_norm": 0.5308695435523987, "learning_rate": 5.379659286804262e-06, "loss": 0.0616, "step": 41219 }, { "epoch": 0.7299871163118118, "grad_norm": 0.8647047877311707, "learning_rate": 5.378999193577765e-06, "loss": 0.066, "step": 41220 }, { "epoch": 0.7300048258488403, "grad_norm": 0.34057125449180603, "learning_rate": 5.378339132003389e-06, "loss": 0.0388, "step": 41221 }, { "epoch": 0.7300225353858687, "grad_norm": 0.48194020986557007, "learning_rate": 5.3776791020833014e-06, "loss": 0.0316, "step": 41222 }, { "epoch": 0.7300402449228971, "grad_norm": 0.350504994392395, "learning_rate": 5.377019103819682e-06, "loss": 0.0664, "step": 41223 }, { "epoch": 0.7300579544599255, "grad_norm": 0.7839666604995728, "learning_rate": 5.376359137214693e-06, "loss": 0.0552, "step": 41224 }, { "epoch": 0.730075663996954, "grad_norm": 0.6633113026618958, "learning_rate": 5.375699202270508e-06, "loss": 0.0424, "step": 41225 }, { "epoch": 0.7300933735339824, "grad_norm": 0.6371286511421204, "learning_rate": 5.375039298989299e-06, "loss": 0.0688, "step": 41226 }, { "epoch": 0.7301110830710108, "grad_norm": 0.7847426533699036, "learning_rate": 5.3743794273732414e-06, "loss": 0.0619, "step": 41227 }, { "epoch": 0.7301287926080392, "grad_norm": 0.25812143087387085, "learning_rate": 5.373719587424498e-06, "loss": 0.0307, "step": 41228 }, { "epoch": 0.7301465021450677, "grad_norm": 0.6495254039764404, "learning_rate": 5.3730597791452435e-06, "loss": 0.0612, "step": 41229 }, { "epoch": 0.7301642116820961, "grad_norm": 0.7934059500694275, "learning_rate": 5.372400002537649e-06, "loss": 0.0963, "step": 41230 }, { "epoch": 0.7301819212191245, "grad_norm": 0.6154159307479858, "learning_rate": 5.371740257603883e-06, "loss": 0.0472, "step": 41231 }, { "epoch": 0.7301996307561529, "grad_norm": 0.9509885907173157, "learning_rate": 5.3710805443461195e-06, "loss": 0.0939, "step": 41232 }, { "epoch": 0.7302173402931814, "grad_norm": 0.5438758134841919, "learning_rate": 5.370420862766527e-06, "loss": 0.0578, "step": 41233 }, { "epoch": 0.7302350498302098, "grad_norm": 0.4793991148471832, "learning_rate": 5.36976121286728e-06, "loss": 0.0497, "step": 41234 }, { "epoch": 0.7302527593672382, "grad_norm": 0.5875842571258545, "learning_rate": 5.369101594650539e-06, "loss": 0.0677, "step": 41235 }, { "epoch": 0.7302704689042667, "grad_norm": 0.7287892699241638, "learning_rate": 5.368442008118481e-06, "loss": 0.0574, "step": 41236 }, { "epoch": 0.7302881784412951, "grad_norm": 0.24372480809688568, "learning_rate": 5.367782453273274e-06, "loss": 0.0301, "step": 41237 }, { "epoch": 0.7303058879783235, "grad_norm": 0.9125342965126038, "learning_rate": 5.3671229301170945e-06, "loss": 0.0754, "step": 41238 }, { "epoch": 0.7303235975153519, "grad_norm": 0.7515797019004822, "learning_rate": 5.366463438652099e-06, "loss": 0.0656, "step": 41239 }, { "epoch": 0.7303413070523804, "grad_norm": 0.8749477863311768, "learning_rate": 5.365803978880462e-06, "loss": 0.0722, "step": 41240 }, { "epoch": 0.7303590165894088, "grad_norm": 0.5173527002334595, "learning_rate": 5.365144550804365e-06, "loss": 0.0649, "step": 41241 }, { "epoch": 0.7303767261264372, "grad_norm": 0.4357352554798126, "learning_rate": 5.364485154425962e-06, "loss": 0.0657, "step": 41242 }, { "epoch": 0.7303944356634656, "grad_norm": 0.402989000082016, "learning_rate": 5.363825789747428e-06, "loss": 0.0408, "step": 41243 }, { "epoch": 0.7304121452004941, "grad_norm": 0.5597425699234009, "learning_rate": 5.363166456770933e-06, "loss": 0.0723, "step": 41244 }, { "epoch": 0.7304298547375225, "grad_norm": 0.47030168771743774, "learning_rate": 5.362507155498651e-06, "loss": 0.0639, "step": 41245 }, { "epoch": 0.7304475642745509, "grad_norm": 0.801956295967102, "learning_rate": 5.36184788593274e-06, "loss": 0.0566, "step": 41246 }, { "epoch": 0.7304652738115793, "grad_norm": 0.4077450931072235, "learning_rate": 5.361188648075375e-06, "loss": 0.0615, "step": 41247 }, { "epoch": 0.7304829833486078, "grad_norm": 0.5193314552307129, "learning_rate": 5.360529441928724e-06, "loss": 0.0476, "step": 41248 }, { "epoch": 0.7305006928856362, "grad_norm": 0.930591881275177, "learning_rate": 5.359870267494958e-06, "loss": 0.0858, "step": 41249 }, { "epoch": 0.7305184024226646, "grad_norm": 0.514681339263916, "learning_rate": 5.359211124776241e-06, "loss": 0.0465, "step": 41250 }, { "epoch": 0.7305361119596931, "grad_norm": 1.113505482673645, "learning_rate": 5.358552013774745e-06, "loss": 0.0971, "step": 41251 }, { "epoch": 0.7305538214967215, "grad_norm": 0.7999797463417053, "learning_rate": 5.3578929344926445e-06, "loss": 0.0763, "step": 41252 }, { "epoch": 0.7305715310337499, "grad_norm": 0.605712890625, "learning_rate": 5.357233886932095e-06, "loss": 0.0527, "step": 41253 }, { "epoch": 0.7305892405707783, "grad_norm": 0.48552700877189636, "learning_rate": 5.35657487109527e-06, "loss": 0.0865, "step": 41254 }, { "epoch": 0.7306069501078069, "grad_norm": 0.699073314666748, "learning_rate": 5.355915886984344e-06, "loss": 0.0627, "step": 41255 }, { "epoch": 0.7306246596448353, "grad_norm": 0.7927200794219971, "learning_rate": 5.355256934601475e-06, "loss": 0.0625, "step": 41256 }, { "epoch": 0.7306423691818636, "grad_norm": 0.8984511494636536, "learning_rate": 5.3545980139488334e-06, "loss": 0.0649, "step": 41257 }, { "epoch": 0.730660078718892, "grad_norm": 0.6270669102668762, "learning_rate": 5.35393912502859e-06, "loss": 0.0452, "step": 41258 }, { "epoch": 0.7306777882559206, "grad_norm": 0.3947926461696625, "learning_rate": 5.35328026784291e-06, "loss": 0.0745, "step": 41259 }, { "epoch": 0.730695497792949, "grad_norm": 0.4804668128490448, "learning_rate": 5.352621442393964e-06, "loss": 0.0539, "step": 41260 }, { "epoch": 0.7307132073299774, "grad_norm": 0.9873048663139343, "learning_rate": 5.351962648683917e-06, "loss": 0.1069, "step": 41261 }, { "epoch": 0.7307309168670058, "grad_norm": 0.6271670460700989, "learning_rate": 5.351303886714937e-06, "loss": 0.0621, "step": 41262 }, { "epoch": 0.7307486264040343, "grad_norm": 0.6338202357292175, "learning_rate": 5.350645156489195e-06, "loss": 0.0949, "step": 41263 }, { "epoch": 0.7307663359410627, "grad_norm": 0.9806004762649536, "learning_rate": 5.349986458008852e-06, "loss": 0.0864, "step": 41264 }, { "epoch": 0.7307840454780911, "grad_norm": 0.6709791421890259, "learning_rate": 5.349327791276076e-06, "loss": 0.0612, "step": 41265 }, { "epoch": 0.7308017550151196, "grad_norm": 0.7919067144393921, "learning_rate": 5.34866915629304e-06, "loss": 0.0592, "step": 41266 }, { "epoch": 0.730819464552148, "grad_norm": 0.32305827736854553, "learning_rate": 5.348010553061902e-06, "loss": 0.05, "step": 41267 }, { "epoch": 0.7308371740891764, "grad_norm": 0.478801965713501, "learning_rate": 5.347351981584832e-06, "loss": 0.0524, "step": 41268 }, { "epoch": 0.7308548836262048, "grad_norm": 0.49638301134109497, "learning_rate": 5.346693441863994e-06, "loss": 0.0803, "step": 41269 }, { "epoch": 0.7308725931632333, "grad_norm": 0.7855515480041504, "learning_rate": 5.346034933901569e-06, "loss": 0.0735, "step": 41270 }, { "epoch": 0.7308903027002617, "grad_norm": 0.9510586857795715, "learning_rate": 5.345376457699706e-06, "loss": 0.0983, "step": 41271 }, { "epoch": 0.7309080122372901, "grad_norm": 0.4773160219192505, "learning_rate": 5.3447180132605785e-06, "loss": 0.0493, "step": 41272 }, { "epoch": 0.7309257217743185, "grad_norm": 0.4936562180519104, "learning_rate": 5.344059600586357e-06, "loss": 0.0467, "step": 41273 }, { "epoch": 0.730943431311347, "grad_norm": 0.44439107179641724, "learning_rate": 5.343401219679198e-06, "loss": 0.0813, "step": 41274 }, { "epoch": 0.7309611408483754, "grad_norm": 0.500567615032196, "learning_rate": 5.342742870541271e-06, "loss": 0.0552, "step": 41275 }, { "epoch": 0.7309788503854038, "grad_norm": 0.2770653963088989, "learning_rate": 5.342084553174745e-06, "loss": 0.032, "step": 41276 }, { "epoch": 0.7309965599224322, "grad_norm": 0.3546106517314911, "learning_rate": 5.341426267581784e-06, "loss": 0.0376, "step": 41277 }, { "epoch": 0.7310142694594607, "grad_norm": 1.0227943658828735, "learning_rate": 5.340768013764551e-06, "loss": 0.0484, "step": 41278 }, { "epoch": 0.7310319789964891, "grad_norm": 0.5919024348258972, "learning_rate": 5.340109791725218e-06, "loss": 0.0569, "step": 41279 }, { "epoch": 0.7310496885335175, "grad_norm": 0.6394824981689453, "learning_rate": 5.33945160146595e-06, "loss": 0.0849, "step": 41280 }, { "epoch": 0.731067398070546, "grad_norm": 0.8602492809295654, "learning_rate": 5.338793442988903e-06, "loss": 0.0865, "step": 41281 }, { "epoch": 0.7310851076075744, "grad_norm": 0.39964109659194946, "learning_rate": 5.33813531629625e-06, "loss": 0.0317, "step": 41282 }, { "epoch": 0.7311028171446028, "grad_norm": 0.6164001226425171, "learning_rate": 5.337477221390153e-06, "loss": 0.0626, "step": 41283 }, { "epoch": 0.7311205266816312, "grad_norm": 0.6312326788902283, "learning_rate": 5.336819158272786e-06, "loss": 0.0485, "step": 41284 }, { "epoch": 0.7311382362186597, "grad_norm": 0.4697456955909729, "learning_rate": 5.336161126946301e-06, "loss": 0.0614, "step": 41285 }, { "epoch": 0.7311559457556881, "grad_norm": 0.37639012932777405, "learning_rate": 5.335503127412868e-06, "loss": 0.0431, "step": 41286 }, { "epoch": 0.7311736552927165, "grad_norm": 0.46195903420448303, "learning_rate": 5.334845159674651e-06, "loss": 0.0779, "step": 41287 }, { "epoch": 0.7311913648297449, "grad_norm": 0.5860366225242615, "learning_rate": 5.3341872237338164e-06, "loss": 0.048, "step": 41288 }, { "epoch": 0.7312090743667734, "grad_norm": 0.6257535219192505, "learning_rate": 5.333529319592529e-06, "loss": 0.0312, "step": 41289 }, { "epoch": 0.7312267839038018, "grad_norm": 0.7879270315170288, "learning_rate": 5.332871447252952e-06, "loss": 0.0568, "step": 41290 }, { "epoch": 0.7312444934408302, "grad_norm": 0.54190593957901, "learning_rate": 5.332213606717255e-06, "loss": 0.0611, "step": 41291 }, { "epoch": 0.7312622029778586, "grad_norm": 0.35750100016593933, "learning_rate": 5.3315557979875925e-06, "loss": 0.0617, "step": 41292 }, { "epoch": 0.7312799125148871, "grad_norm": 0.4047260880470276, "learning_rate": 5.330898021066133e-06, "loss": 0.0506, "step": 41293 }, { "epoch": 0.7312976220519155, "grad_norm": 0.40806108713150024, "learning_rate": 5.3302402759550465e-06, "loss": 0.0501, "step": 41294 }, { "epoch": 0.7313153315889439, "grad_norm": 0.373788058757782, "learning_rate": 5.329582562656488e-06, "loss": 0.0286, "step": 41295 }, { "epoch": 0.7313330411259724, "grad_norm": 0.37584683299064636, "learning_rate": 5.328924881172623e-06, "loss": 0.0538, "step": 41296 }, { "epoch": 0.7313507506630008, "grad_norm": 0.8994998335838318, "learning_rate": 5.3282672315056165e-06, "loss": 0.0727, "step": 41297 }, { "epoch": 0.7313684602000292, "grad_norm": 0.7980871796607971, "learning_rate": 5.3276096136576335e-06, "loss": 0.0493, "step": 41298 }, { "epoch": 0.7313861697370576, "grad_norm": 0.36630964279174805, "learning_rate": 5.326952027630838e-06, "loss": 0.058, "step": 41299 }, { "epoch": 0.7314038792740861, "grad_norm": 0.45987430214881897, "learning_rate": 5.326294473427389e-06, "loss": 0.0507, "step": 41300 }, { "epoch": 0.7314215888111145, "grad_norm": 0.33572688698768616, "learning_rate": 5.325636951049454e-06, "loss": 0.0596, "step": 41301 }, { "epoch": 0.7314392983481429, "grad_norm": 0.5490204095840454, "learning_rate": 5.324979460499202e-06, "loss": 0.0528, "step": 41302 }, { "epoch": 0.7314570078851713, "grad_norm": 0.8749026656150818, "learning_rate": 5.324322001778781e-06, "loss": 0.0766, "step": 41303 }, { "epoch": 0.7314747174221998, "grad_norm": 0.6279982328414917, "learning_rate": 5.323664574890362e-06, "loss": 0.0669, "step": 41304 }, { "epoch": 0.7314924269592282, "grad_norm": 0.6084874868392944, "learning_rate": 5.323007179836114e-06, "loss": 0.0592, "step": 41305 }, { "epoch": 0.7315101364962566, "grad_norm": 0.7736115455627441, "learning_rate": 5.322349816618189e-06, "loss": 0.0784, "step": 41306 }, { "epoch": 0.731527846033285, "grad_norm": 0.7221536636352539, "learning_rate": 5.321692485238748e-06, "loss": 0.0706, "step": 41307 }, { "epoch": 0.7315455555703135, "grad_norm": 0.4264214336872101, "learning_rate": 5.321035185699967e-06, "loss": 0.0769, "step": 41308 }, { "epoch": 0.7315632651073419, "grad_norm": 0.6261661648750305, "learning_rate": 5.320377918004004e-06, "loss": 0.0685, "step": 41309 }, { "epoch": 0.7315809746443703, "grad_norm": 0.5460252165794373, "learning_rate": 5.319720682153015e-06, "loss": 0.0834, "step": 41310 }, { "epoch": 0.7315986841813988, "grad_norm": 0.48227834701538086, "learning_rate": 5.319063478149166e-06, "loss": 0.0569, "step": 41311 }, { "epoch": 0.7316163937184272, "grad_norm": 0.6519932746887207, "learning_rate": 5.3184063059946234e-06, "loss": 0.0353, "step": 41312 }, { "epoch": 0.7316341032554556, "grad_norm": 0.5987462997436523, "learning_rate": 5.31774916569154e-06, "loss": 0.0678, "step": 41313 }, { "epoch": 0.731651812792484, "grad_norm": 0.3852164149284363, "learning_rate": 5.317092057242082e-06, "loss": 0.0536, "step": 41314 }, { "epoch": 0.7316695223295125, "grad_norm": 0.6382070779800415, "learning_rate": 5.316434980648411e-06, "loss": 0.0526, "step": 41315 }, { "epoch": 0.7316872318665409, "grad_norm": 0.5542054772377014, "learning_rate": 5.315777935912691e-06, "loss": 0.0683, "step": 41316 }, { "epoch": 0.7317049414035693, "grad_norm": 0.7171862125396729, "learning_rate": 5.315120923037081e-06, "loss": 0.0447, "step": 41317 }, { "epoch": 0.7317226509405977, "grad_norm": 0.6178651452064514, "learning_rate": 5.314463942023744e-06, "loss": 0.0378, "step": 41318 }, { "epoch": 0.7317403604776263, "grad_norm": 1.0797556638717651, "learning_rate": 5.313806992874848e-06, "loss": 0.0999, "step": 41319 }, { "epoch": 0.7317580700146546, "grad_norm": 0.6722521781921387, "learning_rate": 5.313150075592539e-06, "loss": 0.0871, "step": 41320 }, { "epoch": 0.731775779551683, "grad_norm": 0.7037330269813538, "learning_rate": 5.3124931901789895e-06, "loss": 0.0732, "step": 41321 }, { "epoch": 0.7317934890887114, "grad_norm": 0.9566324949264526, "learning_rate": 5.311836336636357e-06, "loss": 0.1224, "step": 41322 }, { "epoch": 0.73181119862574, "grad_norm": 1.0251177549362183, "learning_rate": 5.311179514966806e-06, "loss": 0.1019, "step": 41323 }, { "epoch": 0.7318289081627684, "grad_norm": 0.3783256709575653, "learning_rate": 5.31052272517249e-06, "loss": 0.0476, "step": 41324 }, { "epoch": 0.7318466176997968, "grad_norm": 0.3491499722003937, "learning_rate": 5.309865967255575e-06, "loss": 0.0473, "step": 41325 }, { "epoch": 0.7318643272368253, "grad_norm": 0.6344270706176758, "learning_rate": 5.30920924121822e-06, "loss": 0.0552, "step": 41326 }, { "epoch": 0.7318820367738537, "grad_norm": 0.5146859884262085, "learning_rate": 5.3085525470625875e-06, "loss": 0.0524, "step": 41327 }, { "epoch": 0.7318997463108821, "grad_norm": 0.3879920542240143, "learning_rate": 5.307895884790838e-06, "loss": 0.0399, "step": 41328 }, { "epoch": 0.7319174558479105, "grad_norm": 0.4341293275356293, "learning_rate": 5.307239254405128e-06, "loss": 0.0579, "step": 41329 }, { "epoch": 0.731935165384939, "grad_norm": 0.6911336779594421, "learning_rate": 5.306582655907627e-06, "loss": 0.071, "step": 41330 }, { "epoch": 0.7319528749219674, "grad_norm": 0.4571278989315033, "learning_rate": 5.305926089300483e-06, "loss": 0.0443, "step": 41331 }, { "epoch": 0.7319705844589958, "grad_norm": 0.5514679551124573, "learning_rate": 5.305269554585862e-06, "loss": 0.0577, "step": 41332 }, { "epoch": 0.7319882939960242, "grad_norm": 0.6955466270446777, "learning_rate": 5.3046130517659245e-06, "loss": 0.0901, "step": 41333 }, { "epoch": 0.7320060035330527, "grad_norm": 0.7289292812347412, "learning_rate": 5.3039565808428325e-06, "loss": 0.0698, "step": 41334 }, { "epoch": 0.7320237130700811, "grad_norm": 0.9031652212142944, "learning_rate": 5.30330014181874e-06, "loss": 0.0724, "step": 41335 }, { "epoch": 0.7320414226071095, "grad_norm": 0.6565877199172974, "learning_rate": 5.3026437346958015e-06, "loss": 0.1009, "step": 41336 }, { "epoch": 0.7320591321441379, "grad_norm": 0.6432937383651733, "learning_rate": 5.301987359476197e-06, "loss": 0.0448, "step": 41337 }, { "epoch": 0.7320768416811664, "grad_norm": 0.7504574656486511, "learning_rate": 5.3013310161620664e-06, "loss": 0.0547, "step": 41338 }, { "epoch": 0.7320945512181948, "grad_norm": 0.37065058946609497, "learning_rate": 5.300674704755577e-06, "loss": 0.0537, "step": 41339 }, { "epoch": 0.7321122607552232, "grad_norm": 0.4651136100292206, "learning_rate": 5.300018425258885e-06, "loss": 0.0397, "step": 41340 }, { "epoch": 0.7321299702922517, "grad_norm": 1.0027047395706177, "learning_rate": 5.299362177674158e-06, "loss": 0.0788, "step": 41341 }, { "epoch": 0.7321476798292801, "grad_norm": 0.893027126789093, "learning_rate": 5.298705962003543e-06, "loss": 0.0708, "step": 41342 }, { "epoch": 0.7321653893663085, "grad_norm": 0.37984853982925415, "learning_rate": 5.298049778249204e-06, "loss": 0.0497, "step": 41343 }, { "epoch": 0.7321830989033369, "grad_norm": 0.9052964448928833, "learning_rate": 5.2973936264132986e-06, "loss": 0.0801, "step": 41344 }, { "epoch": 0.7322008084403654, "grad_norm": 0.5511057376861572, "learning_rate": 5.296737506497988e-06, "loss": 0.0433, "step": 41345 }, { "epoch": 0.7322185179773938, "grad_norm": 0.6267858743667603, "learning_rate": 5.296081418505431e-06, "loss": 0.0595, "step": 41346 }, { "epoch": 0.7322362275144222, "grad_norm": 0.4059947431087494, "learning_rate": 5.295425362437783e-06, "loss": 0.0643, "step": 41347 }, { "epoch": 0.7322539370514506, "grad_norm": 0.761445939540863, "learning_rate": 5.294769338297209e-06, "loss": 0.0637, "step": 41348 }, { "epoch": 0.7322716465884791, "grad_norm": 0.6616843342781067, "learning_rate": 5.294113346085857e-06, "loss": 0.0558, "step": 41349 }, { "epoch": 0.7322893561255075, "grad_norm": 0.5350216031074524, "learning_rate": 5.293457385805892e-06, "loss": 0.0701, "step": 41350 }, { "epoch": 0.7323070656625359, "grad_norm": 0.43784070014953613, "learning_rate": 5.292801457459474e-06, "loss": 0.0523, "step": 41351 }, { "epoch": 0.7323247751995643, "grad_norm": 0.423502653837204, "learning_rate": 5.292145561048751e-06, "loss": 0.0556, "step": 41352 }, { "epoch": 0.7323424847365928, "grad_norm": 0.6196500658988953, "learning_rate": 5.29148969657589e-06, "loss": 0.0561, "step": 41353 }, { "epoch": 0.7323601942736212, "grad_norm": 0.9713573455810547, "learning_rate": 5.290833864043046e-06, "loss": 0.0581, "step": 41354 }, { "epoch": 0.7323779038106496, "grad_norm": 0.640127956867218, "learning_rate": 5.2901780634523735e-06, "loss": 0.0655, "step": 41355 }, { "epoch": 0.7323956133476781, "grad_norm": 0.4431515634059906, "learning_rate": 5.289522294806035e-06, "loss": 0.0282, "step": 41356 }, { "epoch": 0.7324133228847065, "grad_norm": 0.604780912399292, "learning_rate": 5.288866558106186e-06, "loss": 0.0602, "step": 41357 }, { "epoch": 0.7324310324217349, "grad_norm": 0.3340702950954437, "learning_rate": 5.28821085335499e-06, "loss": 0.0351, "step": 41358 }, { "epoch": 0.7324487419587633, "grad_norm": 0.39990320801734924, "learning_rate": 5.287555180554591e-06, "loss": 0.0523, "step": 41359 }, { "epoch": 0.7324664514957918, "grad_norm": 0.5317496061325073, "learning_rate": 5.286899539707154e-06, "loss": 0.0541, "step": 41360 }, { "epoch": 0.7324841610328202, "grad_norm": 0.6163032054901123, "learning_rate": 5.286243930814835e-06, "loss": 0.0546, "step": 41361 }, { "epoch": 0.7325018705698486, "grad_norm": 0.6701860427856445, "learning_rate": 5.2855883538797964e-06, "loss": 0.0351, "step": 41362 }, { "epoch": 0.732519580106877, "grad_norm": 0.2944980561733246, "learning_rate": 5.2849328089041835e-06, "loss": 0.0303, "step": 41363 }, { "epoch": 0.7325372896439055, "grad_norm": 0.7552635073661804, "learning_rate": 5.2842772958901595e-06, "loss": 0.0717, "step": 41364 }, { "epoch": 0.7325549991809339, "grad_norm": 0.45406919717788696, "learning_rate": 5.283621814839881e-06, "loss": 0.0546, "step": 41365 }, { "epoch": 0.7325727087179623, "grad_norm": 0.3519403636455536, "learning_rate": 5.282966365755503e-06, "loss": 0.0474, "step": 41366 }, { "epoch": 0.7325904182549907, "grad_norm": 0.6974986791610718, "learning_rate": 5.2823109486391835e-06, "loss": 0.0675, "step": 41367 }, { "epoch": 0.7326081277920192, "grad_norm": 0.3696911633014679, "learning_rate": 5.281655563493079e-06, "loss": 0.0289, "step": 41368 }, { "epoch": 0.7326258373290476, "grad_norm": 0.634626567363739, "learning_rate": 5.281000210319348e-06, "loss": 0.0592, "step": 41369 }, { "epoch": 0.732643546866076, "grad_norm": 0.7076719999313354, "learning_rate": 5.2803448891201395e-06, "loss": 0.0645, "step": 41370 }, { "epoch": 0.7326612564031045, "grad_norm": 0.6304682493209839, "learning_rate": 5.279689599897613e-06, "loss": 0.0437, "step": 41371 }, { "epoch": 0.7326789659401329, "grad_norm": 0.5144701600074768, "learning_rate": 5.279034342653924e-06, "loss": 0.05, "step": 41372 }, { "epoch": 0.7326966754771613, "grad_norm": 0.4774242341518402, "learning_rate": 5.278379117391231e-06, "loss": 0.0766, "step": 41373 }, { "epoch": 0.7327143850141897, "grad_norm": 0.7782676815986633, "learning_rate": 5.277723924111685e-06, "loss": 0.0812, "step": 41374 }, { "epoch": 0.7327320945512182, "grad_norm": 0.24155879020690918, "learning_rate": 5.277068762817443e-06, "loss": 0.0457, "step": 41375 }, { "epoch": 0.7327498040882466, "grad_norm": 0.5062564611434937, "learning_rate": 5.276413633510668e-06, "loss": 0.0794, "step": 41376 }, { "epoch": 0.732767513625275, "grad_norm": 0.6562559008598328, "learning_rate": 5.275758536193504e-06, "loss": 0.0657, "step": 41377 }, { "epoch": 0.7327852231623034, "grad_norm": 0.33756861090660095, "learning_rate": 5.27510347086811e-06, "loss": 0.0258, "step": 41378 }, { "epoch": 0.7328029326993319, "grad_norm": 0.7330232858657837, "learning_rate": 5.274448437536643e-06, "loss": 0.0639, "step": 41379 }, { "epoch": 0.7328206422363603, "grad_norm": 0.48112571239471436, "learning_rate": 5.2737934362012615e-06, "loss": 0.0484, "step": 41380 }, { "epoch": 0.7328383517733887, "grad_norm": 0.5802316665649414, "learning_rate": 5.27313846686411e-06, "loss": 0.0463, "step": 41381 }, { "epoch": 0.7328560613104171, "grad_norm": 0.42068055272102356, "learning_rate": 5.2724835295273515e-06, "loss": 0.036, "step": 41382 }, { "epoch": 0.7328737708474456, "grad_norm": 0.9243152737617493, "learning_rate": 5.271828624193135e-06, "loss": 0.0682, "step": 41383 }, { "epoch": 0.732891480384474, "grad_norm": 0.4641273021697998, "learning_rate": 5.271173750863621e-06, "loss": 0.0471, "step": 41384 }, { "epoch": 0.7329091899215024, "grad_norm": 0.46189990639686584, "learning_rate": 5.270518909540961e-06, "loss": 0.0554, "step": 41385 }, { "epoch": 0.732926899458531, "grad_norm": 0.5627368688583374, "learning_rate": 5.269864100227309e-06, "loss": 0.0749, "step": 41386 }, { "epoch": 0.7329446089955594, "grad_norm": 0.5094106793403625, "learning_rate": 5.2692093229248245e-06, "loss": 0.0356, "step": 41387 }, { "epoch": 0.7329623185325878, "grad_norm": 0.7562204003334045, "learning_rate": 5.268554577635654e-06, "loss": 0.0695, "step": 41388 }, { "epoch": 0.7329800280696162, "grad_norm": 0.525439977645874, "learning_rate": 5.267899864361951e-06, "loss": 0.0643, "step": 41389 }, { "epoch": 0.7329977376066447, "grad_norm": 0.5267907381057739, "learning_rate": 5.267245183105881e-06, "loss": 0.0554, "step": 41390 }, { "epoch": 0.7330154471436731, "grad_norm": 0.9063578248023987, "learning_rate": 5.266590533869586e-06, "loss": 0.0597, "step": 41391 }, { "epoch": 0.7330331566807015, "grad_norm": 0.5588670372962952, "learning_rate": 5.265935916655221e-06, "loss": 0.0657, "step": 41392 }, { "epoch": 0.7330508662177299, "grad_norm": 0.4875437617301941, "learning_rate": 5.2652813314649435e-06, "loss": 0.055, "step": 41393 }, { "epoch": 0.7330685757547584, "grad_norm": 0.8251280188560486, "learning_rate": 5.264626778300906e-06, "loss": 0.0403, "step": 41394 }, { "epoch": 0.7330862852917868, "grad_norm": 0.5112125277519226, "learning_rate": 5.263972257165261e-06, "loss": 0.0622, "step": 41395 }, { "epoch": 0.7331039948288152, "grad_norm": 0.41401398181915283, "learning_rate": 5.263317768060163e-06, "loss": 0.0383, "step": 41396 }, { "epoch": 0.7331217043658436, "grad_norm": 0.7027286887168884, "learning_rate": 5.26266331098777e-06, "loss": 0.0872, "step": 41397 }, { "epoch": 0.7331394139028721, "grad_norm": 0.4383806586265564, "learning_rate": 5.262008885950224e-06, "loss": 0.0341, "step": 41398 }, { "epoch": 0.7331571234399005, "grad_norm": 0.6341117024421692, "learning_rate": 5.261354492949685e-06, "loss": 0.0531, "step": 41399 }, { "epoch": 0.7331748329769289, "grad_norm": 0.5373095273971558, "learning_rate": 5.260700131988304e-06, "loss": 0.0609, "step": 41400 }, { "epoch": 0.7331925425139574, "grad_norm": 0.34479963779449463, "learning_rate": 5.2600458030682385e-06, "loss": 0.0353, "step": 41401 }, { "epoch": 0.7332102520509858, "grad_norm": 0.7279580235481262, "learning_rate": 5.259391506191628e-06, "loss": 0.0739, "step": 41402 }, { "epoch": 0.7332279615880142, "grad_norm": 0.5483214259147644, "learning_rate": 5.25873724136064e-06, "loss": 0.0416, "step": 41403 }, { "epoch": 0.7332456711250426, "grad_norm": 0.5200974941253662, "learning_rate": 5.2580830085774205e-06, "loss": 0.0605, "step": 41404 }, { "epoch": 0.7332633806620711, "grad_norm": 0.7286122441291809, "learning_rate": 5.257428807844128e-06, "loss": 0.0502, "step": 41405 }, { "epoch": 0.7332810901990995, "grad_norm": 0.5945690274238586, "learning_rate": 5.256774639162904e-06, "loss": 0.0591, "step": 41406 }, { "epoch": 0.7332987997361279, "grad_norm": 0.4879782795906067, "learning_rate": 5.2561205025359055e-06, "loss": 0.0424, "step": 41407 }, { "epoch": 0.7333165092731563, "grad_norm": 0.5315319299697876, "learning_rate": 5.255466397965291e-06, "loss": 0.0575, "step": 41408 }, { "epoch": 0.7333342188101848, "grad_norm": 0.7174094319343567, "learning_rate": 5.254812325453201e-06, "loss": 0.0607, "step": 41409 }, { "epoch": 0.7333519283472132, "grad_norm": 0.5140597224235535, "learning_rate": 5.254158285001793e-06, "loss": 0.0589, "step": 41410 }, { "epoch": 0.7333696378842416, "grad_norm": 0.6086634397506714, "learning_rate": 5.2535042766132176e-06, "loss": 0.0657, "step": 41411 }, { "epoch": 0.73338734742127, "grad_norm": 0.6225966215133667, "learning_rate": 5.252850300289628e-06, "loss": 0.0448, "step": 41412 }, { "epoch": 0.7334050569582985, "grad_norm": 0.5565205216407776, "learning_rate": 5.252196356033176e-06, "loss": 0.0526, "step": 41413 }, { "epoch": 0.7334227664953269, "grad_norm": 0.7033705115318298, "learning_rate": 5.251542443846011e-06, "loss": 0.0365, "step": 41414 }, { "epoch": 0.7334404760323553, "grad_norm": 1.0670409202575684, "learning_rate": 5.250888563730289e-06, "loss": 0.0807, "step": 41415 }, { "epoch": 0.7334581855693838, "grad_norm": 0.8449105024337769, "learning_rate": 5.250234715688155e-06, "loss": 0.0638, "step": 41416 }, { "epoch": 0.7334758951064122, "grad_norm": 0.5949037075042725, "learning_rate": 5.249580899721762e-06, "loss": 0.0532, "step": 41417 }, { "epoch": 0.7334936046434406, "grad_norm": 0.6452901363372803, "learning_rate": 5.248927115833261e-06, "loss": 0.0664, "step": 41418 }, { "epoch": 0.733511314180469, "grad_norm": 0.6328377723693848, "learning_rate": 5.24827336402481e-06, "loss": 0.0569, "step": 41419 }, { "epoch": 0.7335290237174975, "grad_norm": 0.5015931129455566, "learning_rate": 5.247619644298547e-06, "loss": 0.0474, "step": 41420 }, { "epoch": 0.7335467332545259, "grad_norm": 0.3457247018814087, "learning_rate": 5.246965956656627e-06, "loss": 0.0394, "step": 41421 }, { "epoch": 0.7335644427915543, "grad_norm": 0.6738185882568359, "learning_rate": 5.2463123011012066e-06, "loss": 0.081, "step": 41422 }, { "epoch": 0.7335821523285827, "grad_norm": 0.33764082193374634, "learning_rate": 5.245658677634429e-06, "loss": 0.0608, "step": 41423 }, { "epoch": 0.7335998618656112, "grad_norm": 0.35010573267936707, "learning_rate": 5.24500508625845e-06, "loss": 0.0748, "step": 41424 }, { "epoch": 0.7336175714026396, "grad_norm": 0.5109032392501831, "learning_rate": 5.244351526975416e-06, "loss": 0.0735, "step": 41425 }, { "epoch": 0.733635280939668, "grad_norm": 0.7134569883346558, "learning_rate": 5.243697999787484e-06, "loss": 0.0695, "step": 41426 }, { "epoch": 0.7336529904766964, "grad_norm": 0.7001389861106873, "learning_rate": 5.243044504696795e-06, "loss": 0.0745, "step": 41427 }, { "epoch": 0.7336707000137249, "grad_norm": 0.4689711034297943, "learning_rate": 5.242391041705502e-06, "loss": 0.0425, "step": 41428 }, { "epoch": 0.7336884095507533, "grad_norm": 0.5205341577529907, "learning_rate": 5.2417376108157605e-06, "loss": 0.0567, "step": 41429 }, { "epoch": 0.7337061190877817, "grad_norm": 0.7730809450149536, "learning_rate": 5.24108421202971e-06, "loss": 0.0793, "step": 41430 }, { "epoch": 0.7337238286248102, "grad_norm": 1.2610926628112793, "learning_rate": 5.2404308453495e-06, "loss": 0.0559, "step": 41431 }, { "epoch": 0.7337415381618386, "grad_norm": 0.5094581842422485, "learning_rate": 5.239777510777292e-06, "loss": 0.0554, "step": 41432 }, { "epoch": 0.733759247698867, "grad_norm": 0.6352019309997559, "learning_rate": 5.239124208315233e-06, "loss": 0.0718, "step": 41433 }, { "epoch": 0.7337769572358954, "grad_norm": 0.4790779650211334, "learning_rate": 5.238470937965462e-06, "loss": 0.045, "step": 41434 }, { "epoch": 0.7337946667729239, "grad_norm": 0.6306193470954895, "learning_rate": 5.2378176997301365e-06, "loss": 0.0588, "step": 41435 }, { "epoch": 0.7338123763099523, "grad_norm": 0.34223178029060364, "learning_rate": 5.237164493611401e-06, "loss": 0.0576, "step": 41436 }, { "epoch": 0.7338300858469807, "grad_norm": 0.62062668800354, "learning_rate": 5.236511319611413e-06, "loss": 0.067, "step": 41437 }, { "epoch": 0.7338477953840091, "grad_norm": 0.6264554262161255, "learning_rate": 5.23585817773231e-06, "loss": 0.0513, "step": 41438 }, { "epoch": 0.7338655049210376, "grad_norm": 0.5478190183639526, "learning_rate": 5.235205067976246e-06, "loss": 0.0614, "step": 41439 }, { "epoch": 0.733883214458066, "grad_norm": 0.6249479055404663, "learning_rate": 5.2345519903453704e-06, "loss": 0.0582, "step": 41440 }, { "epoch": 0.7339009239950944, "grad_norm": 0.8703877329826355, "learning_rate": 5.23389894484183e-06, "loss": 0.0858, "step": 41441 }, { "epoch": 0.7339186335321228, "grad_norm": 0.8277547359466553, "learning_rate": 5.233245931467776e-06, "loss": 0.0623, "step": 41442 }, { "epoch": 0.7339363430691513, "grad_norm": 0.42716410756111145, "learning_rate": 5.232592950225354e-06, "loss": 0.0699, "step": 41443 }, { "epoch": 0.7339540526061797, "grad_norm": 0.6420177817344666, "learning_rate": 5.2319400011167165e-06, "loss": 0.0458, "step": 41444 }, { "epoch": 0.7339717621432081, "grad_norm": 0.7998992800712585, "learning_rate": 5.231287084144006e-06, "loss": 0.0692, "step": 41445 }, { "epoch": 0.7339894716802366, "grad_norm": 0.5709619522094727, "learning_rate": 5.2306341993093724e-06, "loss": 0.0472, "step": 41446 }, { "epoch": 0.734007181217265, "grad_norm": 0.27332618832588196, "learning_rate": 5.2299813466149686e-06, "loss": 0.0335, "step": 41447 }, { "epoch": 0.7340248907542934, "grad_norm": 0.595634400844574, "learning_rate": 5.229328526062932e-06, "loss": 0.0518, "step": 41448 }, { "epoch": 0.7340426002913218, "grad_norm": 0.5221746563911438, "learning_rate": 5.228675737655418e-06, "loss": 0.0469, "step": 41449 }, { "epoch": 0.7340603098283504, "grad_norm": 0.5484087467193604, "learning_rate": 5.2280229813945725e-06, "loss": 0.0603, "step": 41450 }, { "epoch": 0.7340780193653788, "grad_norm": 0.7338660359382629, "learning_rate": 5.227370257282542e-06, "loss": 0.0391, "step": 41451 }, { "epoch": 0.7340957289024072, "grad_norm": 0.6088396906852722, "learning_rate": 5.226717565321476e-06, "loss": 0.0525, "step": 41452 }, { "epoch": 0.7341134384394356, "grad_norm": 0.6621338725090027, "learning_rate": 5.2260649055135195e-06, "loss": 0.0752, "step": 41453 }, { "epoch": 0.7341311479764641, "grad_norm": 0.35971352458000183, "learning_rate": 5.225412277860826e-06, "loss": 0.06, "step": 41454 }, { "epoch": 0.7341488575134925, "grad_norm": 0.6166760921478271, "learning_rate": 5.224759682365533e-06, "loss": 0.0541, "step": 41455 }, { "epoch": 0.7341665670505209, "grad_norm": 0.5437507629394531, "learning_rate": 5.2241071190297925e-06, "loss": 0.0713, "step": 41456 }, { "epoch": 0.7341842765875493, "grad_norm": 0.6983457803726196, "learning_rate": 5.223454587855751e-06, "loss": 0.0555, "step": 41457 }, { "epoch": 0.7342019861245778, "grad_norm": 0.45756426453590393, "learning_rate": 5.2228020888455576e-06, "loss": 0.0364, "step": 41458 }, { "epoch": 0.7342196956616062, "grad_norm": 0.828811764717102, "learning_rate": 5.2221496220013546e-06, "loss": 0.0723, "step": 41459 }, { "epoch": 0.7342374051986346, "grad_norm": 1.0018839836120605, "learning_rate": 5.2214971873252835e-06, "loss": 0.0631, "step": 41460 }, { "epoch": 0.7342551147356631, "grad_norm": 0.5269625186920166, "learning_rate": 5.220844784819508e-06, "loss": 0.052, "step": 41461 }, { "epoch": 0.7342728242726915, "grad_norm": 0.6514562368392944, "learning_rate": 5.220192414486159e-06, "loss": 0.0467, "step": 41462 }, { "epoch": 0.7342905338097199, "grad_norm": 0.7576326131820679, "learning_rate": 5.219540076327388e-06, "loss": 0.0458, "step": 41463 }, { "epoch": 0.7343082433467483, "grad_norm": 0.39352989196777344, "learning_rate": 5.21888777034534e-06, "loss": 0.0436, "step": 41464 }, { "epoch": 0.7343259528837768, "grad_norm": 0.3988646864891052, "learning_rate": 5.218235496542168e-06, "loss": 0.0668, "step": 41465 }, { "epoch": 0.7343436624208052, "grad_norm": 0.6860631108283997, "learning_rate": 5.217583254920007e-06, "loss": 0.0634, "step": 41466 }, { "epoch": 0.7343613719578336, "grad_norm": 0.947433590888977, "learning_rate": 5.216931045481007e-06, "loss": 0.0936, "step": 41467 }, { "epoch": 0.734379081494862, "grad_norm": 0.5640738606452942, "learning_rate": 5.2162788682273135e-06, "loss": 0.0619, "step": 41468 }, { "epoch": 0.7343967910318905, "grad_norm": 0.5005732178688049, "learning_rate": 5.215626723161075e-06, "loss": 0.0585, "step": 41469 }, { "epoch": 0.7344145005689189, "grad_norm": 0.7245593070983887, "learning_rate": 5.214974610284434e-06, "loss": 0.0405, "step": 41470 }, { "epoch": 0.7344322101059473, "grad_norm": 0.15483233332633972, "learning_rate": 5.214322529599537e-06, "loss": 0.0294, "step": 41471 }, { "epoch": 0.7344499196429757, "grad_norm": 0.5558100342750549, "learning_rate": 5.213670481108534e-06, "loss": 0.0472, "step": 41472 }, { "epoch": 0.7344676291800042, "grad_norm": 0.5241860747337341, "learning_rate": 5.213018464813561e-06, "loss": 0.1121, "step": 41473 }, { "epoch": 0.7344853387170326, "grad_norm": 0.3149462044239044, "learning_rate": 5.212366480716766e-06, "loss": 0.0432, "step": 41474 }, { "epoch": 0.734503048254061, "grad_norm": 0.6389060616493225, "learning_rate": 5.211714528820295e-06, "loss": 0.0537, "step": 41475 }, { "epoch": 0.7345207577910895, "grad_norm": 0.5722241401672363, "learning_rate": 5.2110626091262996e-06, "loss": 0.0536, "step": 41476 }, { "epoch": 0.7345384673281179, "grad_norm": 0.5248901844024658, "learning_rate": 5.2104107216369115e-06, "loss": 0.0426, "step": 41477 }, { "epoch": 0.7345561768651463, "grad_norm": 0.19552673399448395, "learning_rate": 5.2097588663542835e-06, "loss": 0.0387, "step": 41478 }, { "epoch": 0.7345738864021747, "grad_norm": 0.7524981498718262, "learning_rate": 5.2091070432805585e-06, "loss": 0.0736, "step": 41479 }, { "epoch": 0.7345915959392032, "grad_norm": 0.3077346682548523, "learning_rate": 5.2084552524178795e-06, "loss": 0.077, "step": 41480 }, { "epoch": 0.7346093054762316, "grad_norm": 0.6644560098648071, "learning_rate": 5.207803493768395e-06, "loss": 0.0531, "step": 41481 }, { "epoch": 0.73462701501326, "grad_norm": 0.8620261549949646, "learning_rate": 5.207151767334244e-06, "loss": 0.073, "step": 41482 }, { "epoch": 0.7346447245502884, "grad_norm": 0.486528605222702, "learning_rate": 5.20650007311758e-06, "loss": 0.0574, "step": 41483 }, { "epoch": 0.7346624340873169, "grad_norm": 0.45078009366989136, "learning_rate": 5.205848411120534e-06, "loss": 0.0715, "step": 41484 }, { "epoch": 0.7346801436243453, "grad_norm": 0.58978670835495, "learning_rate": 5.205196781345256e-06, "loss": 0.0773, "step": 41485 }, { "epoch": 0.7346978531613737, "grad_norm": 0.8710758090019226, "learning_rate": 5.204545183793896e-06, "loss": 0.0734, "step": 41486 }, { "epoch": 0.7347155626984021, "grad_norm": 0.8536815643310547, "learning_rate": 5.203893618468588e-06, "loss": 0.0666, "step": 41487 }, { "epoch": 0.7347332722354306, "grad_norm": 0.3946288824081421, "learning_rate": 5.203242085371476e-06, "loss": 0.0307, "step": 41488 }, { "epoch": 0.734750981772459, "grad_norm": 0.46319589018821716, "learning_rate": 5.202590584504704e-06, "loss": 0.0528, "step": 41489 }, { "epoch": 0.7347686913094874, "grad_norm": 0.7436779141426086, "learning_rate": 5.201939115870425e-06, "loss": 0.0535, "step": 41490 }, { "epoch": 0.7347864008465159, "grad_norm": 0.8842740654945374, "learning_rate": 5.201287679470774e-06, "loss": 0.0637, "step": 41491 }, { "epoch": 0.7348041103835443, "grad_norm": 0.5013459324836731, "learning_rate": 5.200636275307893e-06, "loss": 0.0782, "step": 41492 }, { "epoch": 0.7348218199205727, "grad_norm": 1.007676362991333, "learning_rate": 5.199984903383933e-06, "loss": 0.0739, "step": 41493 }, { "epoch": 0.7348395294576011, "grad_norm": 0.6862204074859619, "learning_rate": 5.1993335637010276e-06, "loss": 0.0691, "step": 41494 }, { "epoch": 0.7348572389946296, "grad_norm": 0.3673619329929352, "learning_rate": 5.198682256261321e-06, "loss": 0.0444, "step": 41495 }, { "epoch": 0.734874948531658, "grad_norm": 0.6499713063240051, "learning_rate": 5.198030981066961e-06, "loss": 0.0507, "step": 41496 }, { "epoch": 0.7348926580686864, "grad_norm": 0.7139600515365601, "learning_rate": 5.197379738120088e-06, "loss": 0.0926, "step": 41497 }, { "epoch": 0.7349103676057148, "grad_norm": 0.8570083379745483, "learning_rate": 5.196728527422843e-06, "loss": 0.0695, "step": 41498 }, { "epoch": 0.7349280771427433, "grad_norm": 0.92714923620224, "learning_rate": 5.196077348977369e-06, "loss": 0.093, "step": 41499 }, { "epoch": 0.7349457866797717, "grad_norm": 0.3734719157218933, "learning_rate": 5.195426202785815e-06, "loss": 0.0669, "step": 41500 }, { "epoch": 0.7349634962168001, "grad_norm": 0.5237469673156738, "learning_rate": 5.19477508885031e-06, "loss": 0.0426, "step": 41501 }, { "epoch": 0.7349812057538285, "grad_norm": 0.5906110405921936, "learning_rate": 5.194124007173004e-06, "loss": 0.0495, "step": 41502 }, { "epoch": 0.734998915290857, "grad_norm": 0.6515455842018127, "learning_rate": 5.19347295775604e-06, "loss": 0.0328, "step": 41503 }, { "epoch": 0.7350166248278854, "grad_norm": 0.6752955317497253, "learning_rate": 5.192821940601562e-06, "loss": 0.0652, "step": 41504 }, { "epoch": 0.7350343343649138, "grad_norm": 0.7433416247367859, "learning_rate": 5.192170955711703e-06, "loss": 0.0648, "step": 41505 }, { "epoch": 0.7350520439019423, "grad_norm": 0.45943501591682434, "learning_rate": 5.19152000308861e-06, "loss": 0.0359, "step": 41506 }, { "epoch": 0.7350697534389707, "grad_norm": 0.46423426270484924, "learning_rate": 5.190869082734423e-06, "loss": 0.041, "step": 41507 }, { "epoch": 0.7350874629759991, "grad_norm": 0.8536702394485474, "learning_rate": 5.190218194651285e-06, "loss": 0.0847, "step": 41508 }, { "epoch": 0.7351051725130275, "grad_norm": 0.5355898141860962, "learning_rate": 5.189567338841338e-06, "loss": 0.0408, "step": 41509 }, { "epoch": 0.735122882050056, "grad_norm": 0.4201620817184448, "learning_rate": 5.188916515306721e-06, "loss": 0.0587, "step": 41510 }, { "epoch": 0.7351405915870844, "grad_norm": 0.5320240259170532, "learning_rate": 5.1882657240495825e-06, "loss": 0.0595, "step": 41511 }, { "epoch": 0.7351583011241128, "grad_norm": 0.6291627883911133, "learning_rate": 5.18761496507205e-06, "loss": 0.0475, "step": 41512 }, { "epoch": 0.7351760106611412, "grad_norm": 0.7755728960037231, "learning_rate": 5.186964238376275e-06, "loss": 0.0614, "step": 41513 }, { "epoch": 0.7351937201981698, "grad_norm": 0.5512356162071228, "learning_rate": 5.186313543964392e-06, "loss": 0.0498, "step": 41514 }, { "epoch": 0.7352114297351982, "grad_norm": 0.720586359500885, "learning_rate": 5.185662881838552e-06, "loss": 0.0527, "step": 41515 }, { "epoch": 0.7352291392722266, "grad_norm": 0.3904580771923065, "learning_rate": 5.185012252000882e-06, "loss": 0.0462, "step": 41516 }, { "epoch": 0.735246848809255, "grad_norm": 0.3870741128921509, "learning_rate": 5.18436165445353e-06, "loss": 0.0398, "step": 41517 }, { "epoch": 0.7352645583462835, "grad_norm": 0.598452627658844, "learning_rate": 5.183711089198634e-06, "loss": 0.0373, "step": 41518 }, { "epoch": 0.7352822678833119, "grad_norm": 0.6963244080543518, "learning_rate": 5.183060556238337e-06, "loss": 0.0664, "step": 41519 }, { "epoch": 0.7352999774203403, "grad_norm": 0.6793385148048401, "learning_rate": 5.182410055574779e-06, "loss": 0.054, "step": 41520 }, { "epoch": 0.7353176869573688, "grad_norm": 0.528144359588623, "learning_rate": 5.1817595872100965e-06, "loss": 0.0431, "step": 41521 }, { "epoch": 0.7353353964943972, "grad_norm": 0.5267298221588135, "learning_rate": 5.1811091511464385e-06, "loss": 0.0471, "step": 41522 }, { "epoch": 0.7353531060314256, "grad_norm": 0.5400299429893494, "learning_rate": 5.180458747385933e-06, "loss": 0.069, "step": 41523 }, { "epoch": 0.735370815568454, "grad_norm": 0.5818412899971008, "learning_rate": 5.179808375930725e-06, "loss": 0.0564, "step": 41524 }, { "epoch": 0.7353885251054825, "grad_norm": 0.5437278747558594, "learning_rate": 5.179158036782958e-06, "loss": 0.0852, "step": 41525 }, { "epoch": 0.7354062346425109, "grad_norm": 0.7241750359535217, "learning_rate": 5.1785077299447586e-06, "loss": 0.0812, "step": 41526 }, { "epoch": 0.7354239441795393, "grad_norm": 0.4096255302429199, "learning_rate": 5.177857455418281e-06, "loss": 0.0477, "step": 41527 }, { "epoch": 0.7354416537165677, "grad_norm": 0.6680359244346619, "learning_rate": 5.177207213205659e-06, "loss": 0.0589, "step": 41528 }, { "epoch": 0.7354593632535962, "grad_norm": 0.5231608748435974, "learning_rate": 5.176557003309036e-06, "loss": 0.0346, "step": 41529 }, { "epoch": 0.7354770727906246, "grad_norm": 0.5477342009544373, "learning_rate": 5.175906825730542e-06, "loss": 0.0589, "step": 41530 }, { "epoch": 0.735494782327653, "grad_norm": 0.6168109178543091, "learning_rate": 5.17525668047232e-06, "loss": 0.0681, "step": 41531 }, { "epoch": 0.7355124918646814, "grad_norm": 0.9391589760780334, "learning_rate": 5.174606567536517e-06, "loss": 0.0614, "step": 41532 }, { "epoch": 0.7355302014017099, "grad_norm": 1.0768485069274902, "learning_rate": 5.173956486925258e-06, "loss": 0.0583, "step": 41533 }, { "epoch": 0.7355479109387383, "grad_norm": 0.4223369359970093, "learning_rate": 5.173306438640687e-06, "loss": 0.0309, "step": 41534 }, { "epoch": 0.7355656204757667, "grad_norm": 0.5896074771881104, "learning_rate": 5.1726564226849466e-06, "loss": 0.058, "step": 41535 }, { "epoch": 0.7355833300127952, "grad_norm": 1.0557223558425903, "learning_rate": 5.1720064390601715e-06, "loss": 0.0567, "step": 41536 }, { "epoch": 0.7356010395498236, "grad_norm": 0.6916989684104919, "learning_rate": 5.1713564877685015e-06, "loss": 0.0814, "step": 41537 }, { "epoch": 0.735618749086852, "grad_norm": 0.6360388398170471, "learning_rate": 5.170706568812074e-06, "loss": 0.051, "step": 41538 }, { "epoch": 0.7356364586238804, "grad_norm": 0.5008204579353333, "learning_rate": 5.170056682193033e-06, "loss": 0.0538, "step": 41539 }, { "epoch": 0.7356541681609089, "grad_norm": 0.8087464570999146, "learning_rate": 5.169406827913506e-06, "loss": 0.0577, "step": 41540 }, { "epoch": 0.7356718776979373, "grad_norm": 1.0795434713363647, "learning_rate": 5.1687570059756364e-06, "loss": 0.0627, "step": 41541 }, { "epoch": 0.7356895872349657, "grad_norm": 0.5796595811843872, "learning_rate": 5.168107216381562e-06, "loss": 0.0496, "step": 41542 }, { "epoch": 0.7357072967719941, "grad_norm": 0.5717754364013672, "learning_rate": 5.167457459133426e-06, "loss": 0.0875, "step": 41543 }, { "epoch": 0.7357250063090226, "grad_norm": 0.38405275344848633, "learning_rate": 5.166807734233355e-06, "loss": 0.0657, "step": 41544 }, { "epoch": 0.735742715846051, "grad_norm": 0.49031201004981995, "learning_rate": 5.166158041683491e-06, "loss": 0.0367, "step": 41545 }, { "epoch": 0.7357604253830794, "grad_norm": 0.5559725761413574, "learning_rate": 5.165508381485973e-06, "loss": 0.0537, "step": 41546 }, { "epoch": 0.7357781349201078, "grad_norm": 0.4377371668815613, "learning_rate": 5.164858753642939e-06, "loss": 0.0427, "step": 41547 }, { "epoch": 0.7357958444571363, "grad_norm": 0.35891249775886536, "learning_rate": 5.164209158156524e-06, "loss": 0.0452, "step": 41548 }, { "epoch": 0.7358135539941647, "grad_norm": 0.20366095006465912, "learning_rate": 5.163559595028864e-06, "loss": 0.0442, "step": 41549 }, { "epoch": 0.7358312635311931, "grad_norm": 0.5565758943557739, "learning_rate": 5.162910064262106e-06, "loss": 0.0431, "step": 41550 }, { "epoch": 0.7358489730682216, "grad_norm": 0.6425808072090149, "learning_rate": 5.162260565858373e-06, "loss": 0.0678, "step": 41551 }, { "epoch": 0.73586668260525, "grad_norm": 0.5444838404655457, "learning_rate": 5.161611099819809e-06, "loss": 0.0599, "step": 41552 }, { "epoch": 0.7358843921422784, "grad_norm": 0.8016030192375183, "learning_rate": 5.160961666148547e-06, "loss": 0.102, "step": 41553 }, { "epoch": 0.7359021016793068, "grad_norm": 0.8887003660202026, "learning_rate": 5.160312264846731e-06, "loss": 0.0661, "step": 41554 }, { "epoch": 0.7359198112163353, "grad_norm": 0.5401217341423035, "learning_rate": 5.159662895916483e-06, "loss": 0.0722, "step": 41555 }, { "epoch": 0.7359375207533637, "grad_norm": 0.4097054600715637, "learning_rate": 5.159013559359955e-06, "loss": 0.0497, "step": 41556 }, { "epoch": 0.7359552302903921, "grad_norm": 0.7381083965301514, "learning_rate": 5.1583642551792805e-06, "loss": 0.0668, "step": 41557 }, { "epoch": 0.7359729398274205, "grad_norm": 0.6316897869110107, "learning_rate": 5.157714983376589e-06, "loss": 0.0797, "step": 41558 }, { "epoch": 0.735990649364449, "grad_norm": 0.4113765060901642, "learning_rate": 5.1570657439540175e-06, "loss": 0.0376, "step": 41559 }, { "epoch": 0.7360083589014774, "grad_norm": 0.7330239415168762, "learning_rate": 5.1564165369137064e-06, "loss": 0.042, "step": 41560 }, { "epoch": 0.7360260684385058, "grad_norm": 0.6506019234657288, "learning_rate": 5.1557673622577934e-06, "loss": 0.0503, "step": 41561 }, { "epoch": 0.7360437779755342, "grad_norm": 0.7326885461807251, "learning_rate": 5.155118219988404e-06, "loss": 0.0658, "step": 41562 }, { "epoch": 0.7360614875125627, "grad_norm": 0.9982295632362366, "learning_rate": 5.154469110107681e-06, "loss": 0.0487, "step": 41563 }, { "epoch": 0.7360791970495911, "grad_norm": 0.4729479253292084, "learning_rate": 5.153820032617758e-06, "loss": 0.0351, "step": 41564 }, { "epoch": 0.7360969065866195, "grad_norm": 0.3683680295944214, "learning_rate": 5.153170987520773e-06, "loss": 0.0536, "step": 41565 }, { "epoch": 0.736114616123648, "grad_norm": 0.22672060132026672, "learning_rate": 5.152521974818857e-06, "loss": 0.076, "step": 41566 }, { "epoch": 0.7361323256606764, "grad_norm": 0.6547314524650574, "learning_rate": 5.151872994514149e-06, "loss": 0.0316, "step": 41567 }, { "epoch": 0.7361500351977048, "grad_norm": 0.43382689356803894, "learning_rate": 5.151224046608788e-06, "loss": 0.0462, "step": 41568 }, { "epoch": 0.7361677447347332, "grad_norm": 0.5910363793373108, "learning_rate": 5.150575131104898e-06, "loss": 0.0273, "step": 41569 }, { "epoch": 0.7361854542717617, "grad_norm": 0.4239085018634796, "learning_rate": 5.14992624800462e-06, "loss": 0.0296, "step": 41570 }, { "epoch": 0.7362031638087901, "grad_norm": 1.2461495399475098, "learning_rate": 5.149277397310093e-06, "loss": 0.0754, "step": 41571 }, { "epoch": 0.7362208733458185, "grad_norm": 0.7557976841926575, "learning_rate": 5.148628579023443e-06, "loss": 0.0804, "step": 41572 }, { "epoch": 0.7362385828828469, "grad_norm": 0.6547792553901672, "learning_rate": 5.147979793146809e-06, "loss": 0.07, "step": 41573 }, { "epoch": 0.7362562924198754, "grad_norm": 0.5053526163101196, "learning_rate": 5.147331039682324e-06, "loss": 0.0439, "step": 41574 }, { "epoch": 0.7362740019569038, "grad_norm": 0.5015814900398254, "learning_rate": 5.146682318632123e-06, "loss": 0.0596, "step": 41575 }, { "epoch": 0.7362917114939322, "grad_norm": 0.9077813029289246, "learning_rate": 5.14603362999834e-06, "loss": 0.0918, "step": 41576 }, { "epoch": 0.7363094210309608, "grad_norm": 0.7527149319648743, "learning_rate": 5.145384973783112e-06, "loss": 0.083, "step": 41577 }, { "epoch": 0.7363271305679892, "grad_norm": 0.616468608379364, "learning_rate": 5.1447363499885684e-06, "loss": 0.0457, "step": 41578 }, { "epoch": 0.7363448401050176, "grad_norm": 0.9137431383132935, "learning_rate": 5.144087758616852e-06, "loss": 0.0778, "step": 41579 }, { "epoch": 0.736362549642046, "grad_norm": 1.2210078239440918, "learning_rate": 5.1434391996700845e-06, "loss": 0.084, "step": 41580 }, { "epoch": 0.7363802591790745, "grad_norm": 0.9494060277938843, "learning_rate": 5.142790673150404e-06, "loss": 0.082, "step": 41581 }, { "epoch": 0.7363979687161029, "grad_norm": 0.5794886946678162, "learning_rate": 5.142142179059952e-06, "loss": 0.0568, "step": 41582 }, { "epoch": 0.7364156782531313, "grad_norm": 0.47991636395454407, "learning_rate": 5.141493717400849e-06, "loss": 0.0336, "step": 41583 }, { "epoch": 0.7364333877901597, "grad_norm": 0.9398074746131897, "learning_rate": 5.140845288175229e-06, "loss": 0.053, "step": 41584 }, { "epoch": 0.7364510973271882, "grad_norm": 0.8563666939735413, "learning_rate": 5.1401968913852376e-06, "loss": 0.0621, "step": 41585 }, { "epoch": 0.7364688068642166, "grad_norm": 0.8259680867195129, "learning_rate": 5.1395485270330055e-06, "loss": 0.0753, "step": 41586 }, { "epoch": 0.736486516401245, "grad_norm": 0.5793554782867432, "learning_rate": 5.138900195120656e-06, "loss": 0.0641, "step": 41587 }, { "epoch": 0.7365042259382734, "grad_norm": 0.5707481503486633, "learning_rate": 5.1382518956503274e-06, "loss": 0.0496, "step": 41588 }, { "epoch": 0.7365219354753019, "grad_norm": 0.7294846177101135, "learning_rate": 5.137603628624157e-06, "loss": 0.0467, "step": 41589 }, { "epoch": 0.7365396450123303, "grad_norm": 0.8103378415107727, "learning_rate": 5.13695539404427e-06, "loss": 0.0459, "step": 41590 }, { "epoch": 0.7365573545493587, "grad_norm": 0.7750979065895081, "learning_rate": 5.1363071919128015e-06, "loss": 0.0493, "step": 41591 }, { "epoch": 0.7365750640863872, "grad_norm": 0.6386776566505432, "learning_rate": 5.135659022231883e-06, "loss": 0.0547, "step": 41592 }, { "epoch": 0.7365927736234156, "grad_norm": 0.6602655053138733, "learning_rate": 5.135010885003651e-06, "loss": 0.0555, "step": 41593 }, { "epoch": 0.736610483160444, "grad_norm": 0.6240721940994263, "learning_rate": 5.134362780230234e-06, "loss": 0.0482, "step": 41594 }, { "epoch": 0.7366281926974724, "grad_norm": 0.9069168567657471, "learning_rate": 5.133714707913766e-06, "loss": 0.0454, "step": 41595 }, { "epoch": 0.7366459022345009, "grad_norm": 1.107534408569336, "learning_rate": 5.133066668056383e-06, "loss": 0.0812, "step": 41596 }, { "epoch": 0.7366636117715293, "grad_norm": 0.7321171164512634, "learning_rate": 5.132418660660209e-06, "loss": 0.0683, "step": 41597 }, { "epoch": 0.7366813213085577, "grad_norm": 0.7188150882720947, "learning_rate": 5.1317706857273785e-06, "loss": 0.0901, "step": 41598 }, { "epoch": 0.7366990308455861, "grad_norm": 0.4892790913581848, "learning_rate": 5.131122743260024e-06, "loss": 0.0582, "step": 41599 }, { "epoch": 0.7367167403826146, "grad_norm": 0.48165807127952576, "learning_rate": 5.130474833260284e-06, "loss": 0.0626, "step": 41600 }, { "epoch": 0.736734449919643, "grad_norm": 0.5055236220359802, "learning_rate": 5.129826955730278e-06, "loss": 0.0584, "step": 41601 }, { "epoch": 0.7367521594566714, "grad_norm": 0.5383207201957703, "learning_rate": 5.129179110672142e-06, "loss": 0.0571, "step": 41602 }, { "epoch": 0.7367698689936998, "grad_norm": 0.3796451985836029, "learning_rate": 5.128531298088009e-06, "loss": 0.0711, "step": 41603 }, { "epoch": 0.7367875785307283, "grad_norm": 0.4892626404762268, "learning_rate": 5.12788351798001e-06, "loss": 0.0413, "step": 41604 }, { "epoch": 0.7368052880677567, "grad_norm": 0.626481831073761, "learning_rate": 5.127235770350275e-06, "loss": 0.0491, "step": 41605 }, { "epoch": 0.7368229976047851, "grad_norm": 0.5666253566741943, "learning_rate": 5.126588055200936e-06, "loss": 0.056, "step": 41606 }, { "epoch": 0.7368407071418136, "grad_norm": 0.7091858983039856, "learning_rate": 5.125940372534129e-06, "loss": 0.0657, "step": 41607 }, { "epoch": 0.736858416678842, "grad_norm": 0.3966262936592102, "learning_rate": 5.125292722351975e-06, "loss": 0.0533, "step": 41608 }, { "epoch": 0.7368761262158704, "grad_norm": 0.5406809449195862, "learning_rate": 5.124645104656607e-06, "loss": 0.0717, "step": 41609 }, { "epoch": 0.7368938357528988, "grad_norm": 0.3038863241672516, "learning_rate": 5.1239975194501595e-06, "loss": 0.0649, "step": 41610 }, { "epoch": 0.7369115452899273, "grad_norm": 0.31752029061317444, "learning_rate": 5.123349966734767e-06, "loss": 0.0389, "step": 41611 }, { "epoch": 0.7369292548269557, "grad_norm": 0.766697883605957, "learning_rate": 5.122702446512548e-06, "loss": 0.0422, "step": 41612 }, { "epoch": 0.7369469643639841, "grad_norm": 0.8299431204795837, "learning_rate": 5.122054958785633e-06, "loss": 0.05, "step": 41613 }, { "epoch": 0.7369646739010125, "grad_norm": 0.2900850772857666, "learning_rate": 5.12140750355617e-06, "loss": 0.0394, "step": 41614 }, { "epoch": 0.736982383438041, "grad_norm": 0.7405385375022888, "learning_rate": 5.1207600808262715e-06, "loss": 0.0632, "step": 41615 }, { "epoch": 0.7370000929750694, "grad_norm": 0.5258594751358032, "learning_rate": 5.120112690598073e-06, "loss": 0.026, "step": 41616 }, { "epoch": 0.7370178025120978, "grad_norm": 0.8561736941337585, "learning_rate": 5.119465332873704e-06, "loss": 0.0885, "step": 41617 }, { "epoch": 0.7370355120491262, "grad_norm": 0.6412109136581421, "learning_rate": 5.1188180076553e-06, "loss": 0.0397, "step": 41618 }, { "epoch": 0.7370532215861547, "grad_norm": 0.4784730076789856, "learning_rate": 5.118170714944981e-06, "loss": 0.0696, "step": 41619 }, { "epoch": 0.7370709311231831, "grad_norm": 1.135806679725647, "learning_rate": 5.117523454744879e-06, "loss": 0.0858, "step": 41620 }, { "epoch": 0.7370886406602115, "grad_norm": 0.7337300777435303, "learning_rate": 5.116876227057128e-06, "loss": 0.0866, "step": 41621 }, { "epoch": 0.73710635019724, "grad_norm": 1.0356642007827759, "learning_rate": 5.116229031883853e-06, "loss": 0.1035, "step": 41622 }, { "epoch": 0.7371240597342684, "grad_norm": 0.6733580231666565, "learning_rate": 5.1155818692271835e-06, "loss": 0.0737, "step": 41623 }, { "epoch": 0.7371417692712968, "grad_norm": 0.2982083857059479, "learning_rate": 5.1149347390892526e-06, "loss": 0.0469, "step": 41624 }, { "epoch": 0.7371594788083252, "grad_norm": 0.5597941875457764, "learning_rate": 5.11428764147219e-06, "loss": 0.0512, "step": 41625 }, { "epoch": 0.7371771883453537, "grad_norm": 0.5805399417877197, "learning_rate": 5.113640576378117e-06, "loss": 0.0389, "step": 41626 }, { "epoch": 0.7371948978823821, "grad_norm": 0.4554009437561035, "learning_rate": 5.112993543809166e-06, "loss": 0.0376, "step": 41627 }, { "epoch": 0.7372126074194105, "grad_norm": 0.5497708320617676, "learning_rate": 5.11234654376747e-06, "loss": 0.0464, "step": 41628 }, { "epoch": 0.7372303169564389, "grad_norm": 0.4723559617996216, "learning_rate": 5.111699576255151e-06, "loss": 0.0492, "step": 41629 }, { "epoch": 0.7372480264934674, "grad_norm": 0.49860212206840515, "learning_rate": 5.111052641274339e-06, "loss": 0.0513, "step": 41630 }, { "epoch": 0.7372657360304958, "grad_norm": 0.35152456164360046, "learning_rate": 5.110405738827165e-06, "loss": 0.0408, "step": 41631 }, { "epoch": 0.7372834455675242, "grad_norm": 0.3365994989871979, "learning_rate": 5.109758868915754e-06, "loss": 0.0503, "step": 41632 }, { "epoch": 0.7373011551045526, "grad_norm": 0.5896937251091003, "learning_rate": 5.109112031542237e-06, "loss": 0.0421, "step": 41633 }, { "epoch": 0.7373188646415811, "grad_norm": 0.49759915471076965, "learning_rate": 5.1084652267087406e-06, "loss": 0.0468, "step": 41634 }, { "epoch": 0.7373365741786095, "grad_norm": 0.38356366753578186, "learning_rate": 5.107818454417398e-06, "loss": 0.0308, "step": 41635 }, { "epoch": 0.7373542837156379, "grad_norm": 0.3734860122203827, "learning_rate": 5.107171714670328e-06, "loss": 0.0556, "step": 41636 }, { "epoch": 0.7373719932526664, "grad_norm": 0.8520516157150269, "learning_rate": 5.106525007469663e-06, "loss": 0.0683, "step": 41637 }, { "epoch": 0.7373897027896948, "grad_norm": 0.5037485361099243, "learning_rate": 5.1058783328175285e-06, "loss": 0.053, "step": 41638 }, { "epoch": 0.7374074123267232, "grad_norm": 0.9690643548965454, "learning_rate": 5.105231690716058e-06, "loss": 0.0689, "step": 41639 }, { "epoch": 0.7374251218637516, "grad_norm": 0.8339936137199402, "learning_rate": 5.104585081167371e-06, "loss": 0.056, "step": 41640 }, { "epoch": 0.7374428314007802, "grad_norm": 0.832413375377655, "learning_rate": 5.103938504173598e-06, "loss": 0.0484, "step": 41641 }, { "epoch": 0.7374605409378086, "grad_norm": 0.4363594949245453, "learning_rate": 5.103291959736866e-06, "loss": 0.0436, "step": 41642 }, { "epoch": 0.737478250474837, "grad_norm": 0.7076393961906433, "learning_rate": 5.1026454478593025e-06, "loss": 0.0742, "step": 41643 }, { "epoch": 0.7374959600118653, "grad_norm": 0.8361799120903015, "learning_rate": 5.101998968543035e-06, "loss": 0.0643, "step": 41644 }, { "epoch": 0.7375136695488939, "grad_norm": 0.5209680795669556, "learning_rate": 5.101352521790189e-06, "loss": 0.0713, "step": 41645 }, { "epoch": 0.7375313790859223, "grad_norm": 0.5439897179603577, "learning_rate": 5.100706107602898e-06, "loss": 0.0591, "step": 41646 }, { "epoch": 0.7375490886229507, "grad_norm": 0.9444855451583862, "learning_rate": 5.100059725983276e-06, "loss": 0.0714, "step": 41647 }, { "epoch": 0.737566798159979, "grad_norm": 0.7996006011962891, "learning_rate": 5.099413376933457e-06, "loss": 0.0529, "step": 41648 }, { "epoch": 0.7375845076970076, "grad_norm": 0.5875020623207092, "learning_rate": 5.098767060455566e-06, "loss": 0.0574, "step": 41649 }, { "epoch": 0.737602217234036, "grad_norm": 0.40549150109291077, "learning_rate": 5.098120776551731e-06, "loss": 0.0508, "step": 41650 }, { "epoch": 0.7376199267710644, "grad_norm": 0.2818768322467804, "learning_rate": 5.097474525224078e-06, "loss": 0.0557, "step": 41651 }, { "epoch": 0.7376376363080929, "grad_norm": 0.6694825887680054, "learning_rate": 5.09682830647473e-06, "loss": 0.05, "step": 41652 }, { "epoch": 0.7376553458451213, "grad_norm": 0.7954890727996826, "learning_rate": 5.096182120305821e-06, "loss": 0.0408, "step": 41653 }, { "epoch": 0.7376730553821497, "grad_norm": 0.7314534783363342, "learning_rate": 5.095535966719466e-06, "loss": 0.0633, "step": 41654 }, { "epoch": 0.7376907649191781, "grad_norm": 0.9269866943359375, "learning_rate": 5.094889845717798e-06, "loss": 0.0476, "step": 41655 }, { "epoch": 0.7377084744562066, "grad_norm": 0.4159872233867645, "learning_rate": 5.094243757302938e-06, "loss": 0.0571, "step": 41656 }, { "epoch": 0.737726183993235, "grad_norm": 0.5630416870117188, "learning_rate": 5.093597701477019e-06, "loss": 0.0517, "step": 41657 }, { "epoch": 0.7377438935302634, "grad_norm": 0.4932691156864166, "learning_rate": 5.092951678242159e-06, "loss": 0.0412, "step": 41658 }, { "epoch": 0.7377616030672918, "grad_norm": 2.327326536178589, "learning_rate": 5.092305687600483e-06, "loss": 0.0578, "step": 41659 }, { "epoch": 0.7377793126043203, "grad_norm": 0.8691378235816956, "learning_rate": 5.091659729554122e-06, "loss": 0.0728, "step": 41660 }, { "epoch": 0.7377970221413487, "grad_norm": 0.7657204270362854, "learning_rate": 5.091013804105196e-06, "loss": 0.068, "step": 41661 }, { "epoch": 0.7378147316783771, "grad_norm": 0.7465463280677795, "learning_rate": 5.090367911255833e-06, "loss": 0.0761, "step": 41662 }, { "epoch": 0.7378324412154055, "grad_norm": 0.4097398817539215, "learning_rate": 5.089722051008158e-06, "loss": 0.0327, "step": 41663 }, { "epoch": 0.737850150752434, "grad_norm": 0.8964575529098511, "learning_rate": 5.0890762233642995e-06, "loss": 0.0697, "step": 41664 }, { "epoch": 0.7378678602894624, "grad_norm": 0.40133652091026306, "learning_rate": 5.088430428326374e-06, "loss": 0.0354, "step": 41665 }, { "epoch": 0.7378855698264908, "grad_norm": 0.5406355261802673, "learning_rate": 5.087784665896508e-06, "loss": 0.053, "step": 41666 }, { "epoch": 0.7379032793635193, "grad_norm": 0.5961949825286865, "learning_rate": 5.087138936076834e-06, "loss": 0.0614, "step": 41667 }, { "epoch": 0.7379209889005477, "grad_norm": 0.6996574997901917, "learning_rate": 5.0864932388694655e-06, "loss": 0.0429, "step": 41668 }, { "epoch": 0.7379386984375761, "grad_norm": 0.7099964022636414, "learning_rate": 5.085847574276531e-06, "loss": 0.0385, "step": 41669 }, { "epoch": 0.7379564079746045, "grad_norm": 0.6236539483070374, "learning_rate": 5.085201942300155e-06, "loss": 0.0593, "step": 41670 }, { "epoch": 0.737974117511633, "grad_norm": 0.7087424397468567, "learning_rate": 5.084556342942463e-06, "loss": 0.0913, "step": 41671 }, { "epoch": 0.7379918270486614, "grad_norm": 0.7635590434074402, "learning_rate": 5.083910776205577e-06, "loss": 0.0487, "step": 41672 }, { "epoch": 0.7380095365856898, "grad_norm": 0.6409221887588501, "learning_rate": 5.08326524209162e-06, "loss": 0.0278, "step": 41673 }, { "epoch": 0.7380272461227182, "grad_norm": 0.8025968670845032, "learning_rate": 5.082619740602724e-06, "loss": 0.0218, "step": 41674 }, { "epoch": 0.7380449556597467, "grad_norm": 0.6629345417022705, "learning_rate": 5.0819742717410015e-06, "loss": 0.0533, "step": 41675 }, { "epoch": 0.7380626651967751, "grad_norm": 0.24397392570972443, "learning_rate": 5.081328835508579e-06, "loss": 0.0514, "step": 41676 }, { "epoch": 0.7380803747338035, "grad_norm": 0.8281347751617432, "learning_rate": 5.0806834319075815e-06, "loss": 0.0686, "step": 41677 }, { "epoch": 0.7380980842708319, "grad_norm": 0.9332315921783447, "learning_rate": 5.08003806094014e-06, "loss": 0.0749, "step": 41678 }, { "epoch": 0.7381157938078604, "grad_norm": 0.809337317943573, "learning_rate": 5.079392722608357e-06, "loss": 0.0666, "step": 41679 }, { "epoch": 0.7381335033448888, "grad_norm": 0.6124436855316162, "learning_rate": 5.078747416914375e-06, "loss": 0.0342, "step": 41680 }, { "epoch": 0.7381512128819172, "grad_norm": 0.7478662729263306, "learning_rate": 5.078102143860309e-06, "loss": 0.0496, "step": 41681 }, { "epoch": 0.7381689224189457, "grad_norm": 0.7175050377845764, "learning_rate": 5.07745690344829e-06, "loss": 0.0695, "step": 41682 }, { "epoch": 0.7381866319559741, "grad_norm": 0.548746645450592, "learning_rate": 5.076811695680428e-06, "loss": 0.0545, "step": 41683 }, { "epoch": 0.7382043414930025, "grad_norm": 0.7244901657104492, "learning_rate": 5.076166520558852e-06, "loss": 0.0795, "step": 41684 }, { "epoch": 0.7382220510300309, "grad_norm": 0.537431538105011, "learning_rate": 5.075521378085691e-06, "loss": 0.0314, "step": 41685 }, { "epoch": 0.7382397605670594, "grad_norm": 0.6468237638473511, "learning_rate": 5.074876268263053e-06, "loss": 0.0699, "step": 41686 }, { "epoch": 0.7382574701040878, "grad_norm": 0.5818149447441101, "learning_rate": 5.074231191093071e-06, "loss": 0.0519, "step": 41687 }, { "epoch": 0.7382751796411162, "grad_norm": 0.7114532589912415, "learning_rate": 5.073586146577862e-06, "loss": 0.0538, "step": 41688 }, { "epoch": 0.7382928891781446, "grad_norm": 0.8252520561218262, "learning_rate": 5.0729411347195525e-06, "loss": 0.0847, "step": 41689 }, { "epoch": 0.7383105987151731, "grad_norm": 0.7924841642379761, "learning_rate": 5.0722961555202614e-06, "loss": 0.065, "step": 41690 }, { "epoch": 0.7383283082522015, "grad_norm": 0.7093862891197205, "learning_rate": 5.071651208982113e-06, "loss": 0.0724, "step": 41691 }, { "epoch": 0.7383460177892299, "grad_norm": 0.8991464972496033, "learning_rate": 5.071006295107232e-06, "loss": 0.0638, "step": 41692 }, { "epoch": 0.7383637273262583, "grad_norm": 0.7243399024009705, "learning_rate": 5.070361413897732e-06, "loss": 0.0836, "step": 41693 }, { "epoch": 0.7383814368632868, "grad_norm": 0.5561200380325317, "learning_rate": 5.069716565355738e-06, "loss": 0.0654, "step": 41694 }, { "epoch": 0.7383991464003152, "grad_norm": 0.5161730051040649, "learning_rate": 5.069071749483371e-06, "loss": 0.0598, "step": 41695 }, { "epoch": 0.7384168559373436, "grad_norm": 0.46149203181266785, "learning_rate": 5.068426966282761e-06, "loss": 0.0426, "step": 41696 }, { "epoch": 0.7384345654743721, "grad_norm": 0.7179719805717468, "learning_rate": 5.0677822157560146e-06, "loss": 0.0653, "step": 41697 }, { "epoch": 0.7384522750114005, "grad_norm": 0.8068961501121521, "learning_rate": 5.0671374979052615e-06, "loss": 0.0575, "step": 41698 }, { "epoch": 0.7384699845484289, "grad_norm": 0.6718602776527405, "learning_rate": 5.06649281273262e-06, "loss": 0.0707, "step": 41699 }, { "epoch": 0.7384876940854573, "grad_norm": 0.41385409235954285, "learning_rate": 5.065848160240213e-06, "loss": 0.0307, "step": 41700 }, { "epoch": 0.7385054036224858, "grad_norm": 0.4915161728858948, "learning_rate": 5.0652035404301605e-06, "loss": 0.0615, "step": 41701 }, { "epoch": 0.7385231131595142, "grad_norm": 0.9267584085464478, "learning_rate": 5.064558953304585e-06, "loss": 0.0748, "step": 41702 }, { "epoch": 0.7385408226965426, "grad_norm": 0.7490713596343994, "learning_rate": 5.063914398865609e-06, "loss": 0.0883, "step": 41703 }, { "epoch": 0.738558532233571, "grad_norm": 0.8640586733818054, "learning_rate": 5.063269877115346e-06, "loss": 0.055, "step": 41704 }, { "epoch": 0.7385762417705996, "grad_norm": 0.4389999508857727, "learning_rate": 5.062625388055918e-06, "loss": 0.0459, "step": 41705 }, { "epoch": 0.738593951307628, "grad_norm": 0.5812252759933472, "learning_rate": 5.061980931689455e-06, "loss": 0.0542, "step": 41706 }, { "epoch": 0.7386116608446563, "grad_norm": 0.8277198076248169, "learning_rate": 5.061336508018063e-06, "loss": 0.0807, "step": 41707 }, { "epoch": 0.7386293703816847, "grad_norm": 0.4454488754272461, "learning_rate": 5.060692117043864e-06, "loss": 0.0867, "step": 41708 }, { "epoch": 0.7386470799187133, "grad_norm": 0.7519501447677612, "learning_rate": 5.060047758768987e-06, "loss": 0.0743, "step": 41709 }, { "epoch": 0.7386647894557417, "grad_norm": 0.3831959664821625, "learning_rate": 5.059403433195553e-06, "loss": 0.0645, "step": 41710 }, { "epoch": 0.73868249899277, "grad_norm": 0.6781035661697388, "learning_rate": 5.058759140325671e-06, "loss": 0.0546, "step": 41711 }, { "epoch": 0.7387002085297986, "grad_norm": 0.7363583445549011, "learning_rate": 5.058114880161466e-06, "loss": 0.0429, "step": 41712 }, { "epoch": 0.738717918066827, "grad_norm": 0.55023193359375, "learning_rate": 5.0574706527050636e-06, "loss": 0.0634, "step": 41713 }, { "epoch": 0.7387356276038554, "grad_norm": 0.5315974354743958, "learning_rate": 5.05682645795857e-06, "loss": 0.0522, "step": 41714 }, { "epoch": 0.7387533371408838, "grad_norm": 0.7356328964233398, "learning_rate": 5.056182295924114e-06, "loss": 0.0685, "step": 41715 }, { "epoch": 0.7387710466779123, "grad_norm": 0.49376288056373596, "learning_rate": 5.0555381666038115e-06, "loss": 0.0352, "step": 41716 }, { "epoch": 0.7387887562149407, "grad_norm": 0.44454506039619446, "learning_rate": 5.054894069999782e-06, "loss": 0.0308, "step": 41717 }, { "epoch": 0.7388064657519691, "grad_norm": 1.313452959060669, "learning_rate": 5.0542500061141455e-06, "loss": 0.0869, "step": 41718 }, { "epoch": 0.7388241752889975, "grad_norm": 0.47512659430503845, "learning_rate": 5.053605974949021e-06, "loss": 0.0587, "step": 41719 }, { "epoch": 0.738841884826026, "grad_norm": 0.6053621768951416, "learning_rate": 5.052961976506526e-06, "loss": 0.074, "step": 41720 }, { "epoch": 0.7388595943630544, "grad_norm": 0.4650881886482239, "learning_rate": 5.052318010788786e-06, "loss": 0.0403, "step": 41721 }, { "epoch": 0.7388773039000828, "grad_norm": 0.3505193889141083, "learning_rate": 5.051674077797908e-06, "loss": 0.0536, "step": 41722 }, { "epoch": 0.7388950134371112, "grad_norm": 0.8082913756370544, "learning_rate": 5.051030177536017e-06, "loss": 0.0776, "step": 41723 }, { "epoch": 0.7389127229741397, "grad_norm": 0.4390365183353424, "learning_rate": 5.0503863100052335e-06, "loss": 0.0497, "step": 41724 }, { "epoch": 0.7389304325111681, "grad_norm": 0.5405480265617371, "learning_rate": 5.049742475207669e-06, "loss": 0.0536, "step": 41725 }, { "epoch": 0.7389481420481965, "grad_norm": 0.5090695023536682, "learning_rate": 5.049098673145445e-06, "loss": 0.0659, "step": 41726 }, { "epoch": 0.738965851585225, "grad_norm": 0.3118254244327545, "learning_rate": 5.04845490382068e-06, "loss": 0.051, "step": 41727 }, { "epoch": 0.7389835611222534, "grad_norm": 0.6490945219993591, "learning_rate": 5.047811167235492e-06, "loss": 0.0403, "step": 41728 }, { "epoch": 0.7390012706592818, "grad_norm": 0.3616112768650055, "learning_rate": 5.047167463391999e-06, "loss": 0.0531, "step": 41729 }, { "epoch": 0.7390189801963102, "grad_norm": 0.6708018779754639, "learning_rate": 5.046523792292317e-06, "loss": 0.0442, "step": 41730 }, { "epoch": 0.7390366897333387, "grad_norm": 0.4021799564361572, "learning_rate": 5.045880153938571e-06, "loss": 0.0608, "step": 41731 }, { "epoch": 0.7390543992703671, "grad_norm": 0.4932839572429657, "learning_rate": 5.0452365483328674e-06, "loss": 0.0476, "step": 41732 }, { "epoch": 0.7390721088073955, "grad_norm": 0.4654436707496643, "learning_rate": 5.04459297547733e-06, "loss": 0.0601, "step": 41733 }, { "epoch": 0.7390898183444239, "grad_norm": 0.5143480896949768, "learning_rate": 5.043949435374073e-06, "loss": 0.0819, "step": 41734 }, { "epoch": 0.7391075278814524, "grad_norm": 0.7597301602363586, "learning_rate": 5.04330592802522e-06, "loss": 0.055, "step": 41735 }, { "epoch": 0.7391252374184808, "grad_norm": 0.7979219555854797, "learning_rate": 5.0426624534328805e-06, "loss": 0.0898, "step": 41736 }, { "epoch": 0.7391429469555092, "grad_norm": 0.6118041276931763, "learning_rate": 5.042019011599168e-06, "loss": 0.0579, "step": 41737 }, { "epoch": 0.7391606564925376, "grad_norm": 0.5350661277770996, "learning_rate": 5.041375602526216e-06, "loss": 0.0537, "step": 41738 }, { "epoch": 0.7391783660295661, "grad_norm": 0.3592437207698822, "learning_rate": 5.040732226216125e-06, "loss": 0.0258, "step": 41739 }, { "epoch": 0.7391960755665945, "grad_norm": 0.7563928365707397, "learning_rate": 5.04008888267102e-06, "loss": 0.0469, "step": 41740 }, { "epoch": 0.7392137851036229, "grad_norm": 0.6757131814956665, "learning_rate": 5.039445571893013e-06, "loss": 0.0497, "step": 41741 }, { "epoch": 0.7392314946406514, "grad_norm": 0.689871072769165, "learning_rate": 5.0388022938842275e-06, "loss": 0.0432, "step": 41742 }, { "epoch": 0.7392492041776798, "grad_norm": 0.7140902876853943, "learning_rate": 5.038159048646771e-06, "loss": 0.0551, "step": 41743 }, { "epoch": 0.7392669137147082, "grad_norm": 0.3643243610858917, "learning_rate": 5.037515836182763e-06, "loss": 0.0654, "step": 41744 }, { "epoch": 0.7392846232517366, "grad_norm": 0.5243373513221741, "learning_rate": 5.0368726564943216e-06, "loss": 0.0495, "step": 41745 }, { "epoch": 0.7393023327887651, "grad_norm": 0.749519944190979, "learning_rate": 5.036229509583561e-06, "loss": 0.0426, "step": 41746 }, { "epoch": 0.7393200423257935, "grad_norm": 0.5190315842628479, "learning_rate": 5.035586395452597e-06, "loss": 0.0577, "step": 41747 }, { "epoch": 0.7393377518628219, "grad_norm": 0.7095295786857605, "learning_rate": 5.034943314103545e-06, "loss": 0.0782, "step": 41748 }, { "epoch": 0.7393554613998503, "grad_norm": 0.7220630049705505, "learning_rate": 5.034300265538528e-06, "loss": 0.0577, "step": 41749 }, { "epoch": 0.7393731709368788, "grad_norm": 0.43666040897369385, "learning_rate": 5.0336572497596505e-06, "loss": 0.0809, "step": 41750 }, { "epoch": 0.7393908804739072, "grad_norm": 0.8871216773986816, "learning_rate": 5.033014266769031e-06, "loss": 0.0754, "step": 41751 }, { "epoch": 0.7394085900109356, "grad_norm": 0.7711266875267029, "learning_rate": 5.032371316568788e-06, "loss": 0.0554, "step": 41752 }, { "epoch": 0.739426299547964, "grad_norm": 0.8209867477416992, "learning_rate": 5.031728399161039e-06, "loss": 0.0466, "step": 41753 }, { "epoch": 0.7394440090849925, "grad_norm": 0.6543127298355103, "learning_rate": 5.031085514547892e-06, "loss": 0.0845, "step": 41754 }, { "epoch": 0.7394617186220209, "grad_norm": 0.8537281155586243, "learning_rate": 5.030442662731464e-06, "loss": 0.0695, "step": 41755 }, { "epoch": 0.7394794281590493, "grad_norm": 0.745682954788208, "learning_rate": 5.029799843713872e-06, "loss": 0.0699, "step": 41756 }, { "epoch": 0.7394971376960778, "grad_norm": 0.6101934313774109, "learning_rate": 5.029157057497228e-06, "loss": 0.0439, "step": 41757 }, { "epoch": 0.7395148472331062, "grad_norm": 0.6177148222923279, "learning_rate": 5.028514304083652e-06, "loss": 0.0806, "step": 41758 }, { "epoch": 0.7395325567701346, "grad_norm": 1.0200122594833374, "learning_rate": 5.027871583475254e-06, "loss": 0.0514, "step": 41759 }, { "epoch": 0.739550266307163, "grad_norm": 0.37003421783447266, "learning_rate": 5.027228895674154e-06, "loss": 0.0323, "step": 41760 }, { "epoch": 0.7395679758441915, "grad_norm": 0.7569634914398193, "learning_rate": 5.026586240682458e-06, "loss": 0.0642, "step": 41761 }, { "epoch": 0.7395856853812199, "grad_norm": 0.3585907220840454, "learning_rate": 5.025943618502284e-06, "loss": 0.0624, "step": 41762 }, { "epoch": 0.7396033949182483, "grad_norm": 0.8566952347755432, "learning_rate": 5.025301029135753e-06, "loss": 0.0845, "step": 41763 }, { "epoch": 0.7396211044552767, "grad_norm": 1.1682125329971313, "learning_rate": 5.024658472584966e-06, "loss": 0.0635, "step": 41764 }, { "epoch": 0.7396388139923052, "grad_norm": 0.5846482515335083, "learning_rate": 5.024015948852045e-06, "loss": 0.0553, "step": 41765 }, { "epoch": 0.7396565235293336, "grad_norm": 0.5146344304084778, "learning_rate": 5.023373457939097e-06, "loss": 0.0479, "step": 41766 }, { "epoch": 0.739674233066362, "grad_norm": 0.41834312677383423, "learning_rate": 5.0227309998482515e-06, "loss": 0.0454, "step": 41767 }, { "epoch": 0.7396919426033904, "grad_norm": 0.48616093397140503, "learning_rate": 5.022088574581606e-06, "loss": 0.0594, "step": 41768 }, { "epoch": 0.739709652140419, "grad_norm": 0.3836389183998108, "learning_rate": 5.021446182141281e-06, "loss": 0.0482, "step": 41769 }, { "epoch": 0.7397273616774473, "grad_norm": 0.30026018619537354, "learning_rate": 5.020803822529393e-06, "loss": 0.0449, "step": 41770 }, { "epoch": 0.7397450712144757, "grad_norm": 0.46499380469322205, "learning_rate": 5.0201614957480466e-06, "loss": 0.0401, "step": 41771 }, { "epoch": 0.7397627807515043, "grad_norm": 0.5728801488876343, "learning_rate": 5.019519201799359e-06, "loss": 0.0584, "step": 41772 }, { "epoch": 0.7397804902885327, "grad_norm": 0.4567906856536865, "learning_rate": 5.0188769406854454e-06, "loss": 0.032, "step": 41773 }, { "epoch": 0.739798199825561, "grad_norm": 0.6224743127822876, "learning_rate": 5.018234712408416e-06, "loss": 0.0828, "step": 41774 }, { "epoch": 0.7398159093625895, "grad_norm": 0.49688342213630676, "learning_rate": 5.017592516970383e-06, "loss": 0.0536, "step": 41775 }, { "epoch": 0.739833618899618, "grad_norm": 0.5477530360221863, "learning_rate": 5.016950354373464e-06, "loss": 0.0865, "step": 41776 }, { "epoch": 0.7398513284366464, "grad_norm": 0.7277937531471252, "learning_rate": 5.016308224619771e-06, "loss": 0.109, "step": 41777 }, { "epoch": 0.7398690379736748, "grad_norm": 0.5584395527839661, "learning_rate": 5.015666127711412e-06, "loss": 0.0589, "step": 41778 }, { "epoch": 0.7398867475107032, "grad_norm": 0.8048758506774902, "learning_rate": 5.015024063650499e-06, "loss": 0.0881, "step": 41779 }, { "epoch": 0.7399044570477317, "grad_norm": 0.35609814524650574, "learning_rate": 5.0143820324391475e-06, "loss": 0.0511, "step": 41780 }, { "epoch": 0.7399221665847601, "grad_norm": 0.5936472415924072, "learning_rate": 5.0137400340794755e-06, "loss": 0.0534, "step": 41781 }, { "epoch": 0.7399398761217885, "grad_norm": 1.0039806365966797, "learning_rate": 5.013098068573581e-06, "loss": 0.0694, "step": 41782 }, { "epoch": 0.7399575856588169, "grad_norm": 0.537132203578949, "learning_rate": 5.012456135923587e-06, "loss": 0.0623, "step": 41783 }, { "epoch": 0.7399752951958454, "grad_norm": 0.49505922198295593, "learning_rate": 5.0118142361316e-06, "loss": 0.0254, "step": 41784 }, { "epoch": 0.7399930047328738, "grad_norm": 0.6036251187324524, "learning_rate": 5.011172369199735e-06, "loss": 0.0775, "step": 41785 }, { "epoch": 0.7400107142699022, "grad_norm": 0.6115212440490723, "learning_rate": 5.010530535130103e-06, "loss": 0.043, "step": 41786 }, { "epoch": 0.7400284238069307, "grad_norm": 0.7486663460731506, "learning_rate": 5.009888733924813e-06, "loss": 0.057, "step": 41787 }, { "epoch": 0.7400461333439591, "grad_norm": 0.6747017502784729, "learning_rate": 5.0092469655859844e-06, "loss": 0.0405, "step": 41788 }, { "epoch": 0.7400638428809875, "grad_norm": 0.3130190372467041, "learning_rate": 5.0086052301157194e-06, "loss": 0.0417, "step": 41789 }, { "epoch": 0.7400815524180159, "grad_norm": 0.6560496091842651, "learning_rate": 5.00796352751613e-06, "loss": 0.0588, "step": 41790 }, { "epoch": 0.7400992619550444, "grad_norm": 0.7002895474433899, "learning_rate": 5.007321857789332e-06, "loss": 0.0373, "step": 41791 }, { "epoch": 0.7401169714920728, "grad_norm": 0.742720901966095, "learning_rate": 5.006680220937438e-06, "loss": 0.0755, "step": 41792 }, { "epoch": 0.7401346810291012, "grad_norm": 0.31962281465530396, "learning_rate": 5.0060386169625525e-06, "loss": 0.0344, "step": 41793 }, { "epoch": 0.7401523905661296, "grad_norm": 0.6110127568244934, "learning_rate": 5.005397045866788e-06, "loss": 0.0504, "step": 41794 }, { "epoch": 0.7401701001031581, "grad_norm": 0.5170932412147522, "learning_rate": 5.0047555076522564e-06, "loss": 0.049, "step": 41795 }, { "epoch": 0.7401878096401865, "grad_norm": 0.6997836232185364, "learning_rate": 5.004114002321067e-06, "loss": 0.0349, "step": 41796 }, { "epoch": 0.7402055191772149, "grad_norm": 0.5676501989364624, "learning_rate": 5.003472529875332e-06, "loss": 0.0508, "step": 41797 }, { "epoch": 0.7402232287142433, "grad_norm": 0.8210411667823792, "learning_rate": 5.002831090317162e-06, "loss": 0.0665, "step": 41798 }, { "epoch": 0.7402409382512718, "grad_norm": 0.4249909818172455, "learning_rate": 5.0021896836486715e-06, "loss": 0.0393, "step": 41799 }, { "epoch": 0.7402586477883002, "grad_norm": 0.5292669534683228, "learning_rate": 5.00154830987196e-06, "loss": 0.0511, "step": 41800 }, { "epoch": 0.7402763573253286, "grad_norm": 1.0465630292892456, "learning_rate": 5.000906968989144e-06, "loss": 0.068, "step": 41801 }, { "epoch": 0.7402940668623571, "grad_norm": 0.7323498129844666, "learning_rate": 5.000265661002337e-06, "loss": 0.0851, "step": 41802 }, { "epoch": 0.7403117763993855, "grad_norm": 0.44072425365448, "learning_rate": 4.999624385913636e-06, "loss": 0.0313, "step": 41803 }, { "epoch": 0.7403294859364139, "grad_norm": 0.7069612741470337, "learning_rate": 4.998983143725163e-06, "loss": 0.0538, "step": 41804 }, { "epoch": 0.7403471954734423, "grad_norm": 0.666857898235321, "learning_rate": 4.998341934439024e-06, "loss": 0.0313, "step": 41805 }, { "epoch": 0.7403649050104708, "grad_norm": 0.45900049805641174, "learning_rate": 4.997700758057334e-06, "loss": 0.0382, "step": 41806 }, { "epoch": 0.7403826145474992, "grad_norm": 0.6386989951133728, "learning_rate": 4.997059614582191e-06, "loss": 0.0524, "step": 41807 }, { "epoch": 0.7404003240845276, "grad_norm": 0.3553480803966522, "learning_rate": 4.996418504015712e-06, "loss": 0.0469, "step": 41808 }, { "epoch": 0.740418033621556, "grad_norm": 0.3564074635505676, "learning_rate": 4.995777426360007e-06, "loss": 0.0646, "step": 41809 }, { "epoch": 0.7404357431585845, "grad_norm": 0.44686827063560486, "learning_rate": 4.995136381617179e-06, "loss": 0.0518, "step": 41810 }, { "epoch": 0.7404534526956129, "grad_norm": 0.7774572968482971, "learning_rate": 4.99449536978934e-06, "loss": 0.079, "step": 41811 }, { "epoch": 0.7404711622326413, "grad_norm": 0.7475963234901428, "learning_rate": 4.993854390878598e-06, "loss": 0.0581, "step": 41812 }, { "epoch": 0.7404888717696697, "grad_norm": 0.6887587308883667, "learning_rate": 4.9932134448870645e-06, "loss": 0.0561, "step": 41813 }, { "epoch": 0.7405065813066982, "grad_norm": 0.6087399125099182, "learning_rate": 4.992572531816846e-06, "loss": 0.0409, "step": 41814 }, { "epoch": 0.7405242908437266, "grad_norm": 0.4811423122882843, "learning_rate": 4.991931651670052e-06, "loss": 0.0688, "step": 41815 }, { "epoch": 0.740542000380755, "grad_norm": 0.6590346097946167, "learning_rate": 4.9912908044487945e-06, "loss": 0.0601, "step": 41816 }, { "epoch": 0.7405597099177835, "grad_norm": 0.5432661175727844, "learning_rate": 4.990649990155174e-06, "loss": 0.0533, "step": 41817 }, { "epoch": 0.7405774194548119, "grad_norm": 0.8862046003341675, "learning_rate": 4.990009208791303e-06, "loss": 0.0669, "step": 41818 }, { "epoch": 0.7405951289918403, "grad_norm": 0.4115581810474396, "learning_rate": 4.989368460359289e-06, "loss": 0.0657, "step": 41819 }, { "epoch": 0.7406128385288687, "grad_norm": 0.6538594365119934, "learning_rate": 4.988727744861245e-06, "loss": 0.0421, "step": 41820 }, { "epoch": 0.7406305480658972, "grad_norm": 1.0184041261672974, "learning_rate": 4.988087062299268e-06, "loss": 0.0875, "step": 41821 }, { "epoch": 0.7406482576029256, "grad_norm": 0.4573604464530945, "learning_rate": 4.987446412675474e-06, "loss": 0.055, "step": 41822 }, { "epoch": 0.740665967139954, "grad_norm": 0.3117980360984802, "learning_rate": 4.986805795991967e-06, "loss": 0.0416, "step": 41823 }, { "epoch": 0.7406836766769824, "grad_norm": 0.6153895258903503, "learning_rate": 4.9861652122508575e-06, "loss": 0.0482, "step": 41824 }, { "epoch": 0.7407013862140109, "grad_norm": 0.2285996824502945, "learning_rate": 4.9855246614542514e-06, "loss": 0.0366, "step": 41825 }, { "epoch": 0.7407190957510393, "grad_norm": 0.4440627694129944, "learning_rate": 4.984884143604256e-06, "loss": 0.0376, "step": 41826 }, { "epoch": 0.7407368052880677, "grad_norm": 0.8352816104888916, "learning_rate": 4.984243658702983e-06, "loss": 0.0685, "step": 41827 }, { "epoch": 0.7407545148250961, "grad_norm": 0.4566187262535095, "learning_rate": 4.983603206752532e-06, "loss": 0.0487, "step": 41828 }, { "epoch": 0.7407722243621246, "grad_norm": 0.4460209012031555, "learning_rate": 4.982962787755011e-06, "loss": 0.0676, "step": 41829 }, { "epoch": 0.740789933899153, "grad_norm": 0.619347333908081, "learning_rate": 4.982322401712531e-06, "loss": 0.0559, "step": 41830 }, { "epoch": 0.7408076434361814, "grad_norm": 0.5756717920303345, "learning_rate": 4.981682048627201e-06, "loss": 0.0552, "step": 41831 }, { "epoch": 0.74082535297321, "grad_norm": 0.7705510258674622, "learning_rate": 4.981041728501116e-06, "loss": 0.066, "step": 41832 }, { "epoch": 0.7408430625102383, "grad_norm": 0.5959993600845337, "learning_rate": 4.980401441336393e-06, "loss": 0.0707, "step": 41833 }, { "epoch": 0.7408607720472667, "grad_norm": 0.8424370288848877, "learning_rate": 4.979761187135141e-06, "loss": 0.0731, "step": 41834 }, { "epoch": 0.7408784815842951, "grad_norm": 0.6586090922355652, "learning_rate": 4.979120965899457e-06, "loss": 0.0361, "step": 41835 }, { "epoch": 0.7408961911213237, "grad_norm": 0.7117573618888855, "learning_rate": 4.978480777631451e-06, "loss": 0.0656, "step": 41836 }, { "epoch": 0.740913900658352, "grad_norm": 0.5471295714378357, "learning_rate": 4.977840622333231e-06, "loss": 0.0448, "step": 41837 }, { "epoch": 0.7409316101953805, "grad_norm": 0.6165087223052979, "learning_rate": 4.977200500006903e-06, "loss": 0.0382, "step": 41838 }, { "epoch": 0.7409493197324089, "grad_norm": 0.5886778831481934, "learning_rate": 4.97656041065457e-06, "loss": 0.049, "step": 41839 }, { "epoch": 0.7409670292694374, "grad_norm": 0.41796836256980896, "learning_rate": 4.9759203542783384e-06, "loss": 0.0473, "step": 41840 }, { "epoch": 0.7409847388064658, "grad_norm": 1.154201865196228, "learning_rate": 4.9752803308803155e-06, "loss": 0.0734, "step": 41841 }, { "epoch": 0.7410024483434942, "grad_norm": 0.5006354451179504, "learning_rate": 4.974640340462606e-06, "loss": 0.0494, "step": 41842 }, { "epoch": 0.7410201578805226, "grad_norm": 0.4559478163719177, "learning_rate": 4.974000383027316e-06, "loss": 0.0413, "step": 41843 }, { "epoch": 0.7410378674175511, "grad_norm": 0.3042028844356537, "learning_rate": 4.973360458576551e-06, "loss": 0.0621, "step": 41844 }, { "epoch": 0.7410555769545795, "grad_norm": 0.49933362007141113, "learning_rate": 4.9727205671124195e-06, "loss": 0.0485, "step": 41845 }, { "epoch": 0.7410732864916079, "grad_norm": 0.7901591658592224, "learning_rate": 4.9720807086370206e-06, "loss": 0.0577, "step": 41846 }, { "epoch": 0.7410909960286364, "grad_norm": 0.30300629138946533, "learning_rate": 4.97144088315246e-06, "loss": 0.0852, "step": 41847 }, { "epoch": 0.7411087055656648, "grad_norm": 0.5631852746009827, "learning_rate": 4.9708010906608494e-06, "loss": 0.0574, "step": 41848 }, { "epoch": 0.7411264151026932, "grad_norm": 0.7238699793815613, "learning_rate": 4.970161331164285e-06, "loss": 0.0694, "step": 41849 }, { "epoch": 0.7411441246397216, "grad_norm": 0.7470657229423523, "learning_rate": 4.969521604664875e-06, "loss": 0.0797, "step": 41850 }, { "epoch": 0.7411618341767501, "grad_norm": 0.4733444154262543, "learning_rate": 4.9688819111647235e-06, "loss": 0.0545, "step": 41851 }, { "epoch": 0.7411795437137785, "grad_norm": 0.7914103865623474, "learning_rate": 4.968242250665936e-06, "loss": 0.0524, "step": 41852 }, { "epoch": 0.7411972532508069, "grad_norm": 0.5659950375556946, "learning_rate": 4.967602623170617e-06, "loss": 0.0669, "step": 41853 }, { "epoch": 0.7412149627878353, "grad_norm": 0.4652356207370758, "learning_rate": 4.9669630286808715e-06, "loss": 0.0526, "step": 41854 }, { "epoch": 0.7412326723248638, "grad_norm": 0.7064333558082581, "learning_rate": 4.966323467198802e-06, "loss": 0.0668, "step": 41855 }, { "epoch": 0.7412503818618922, "grad_norm": 0.7400432825088501, "learning_rate": 4.965683938726519e-06, "loss": 0.0358, "step": 41856 }, { "epoch": 0.7412680913989206, "grad_norm": 0.1896994262933731, "learning_rate": 4.965044443266117e-06, "loss": 0.0143, "step": 41857 }, { "epoch": 0.741285800935949, "grad_norm": 0.3758786916732788, "learning_rate": 4.964404980819703e-06, "loss": 0.0579, "step": 41858 }, { "epoch": 0.7413035104729775, "grad_norm": 0.6767260432243347, "learning_rate": 4.963765551389384e-06, "loss": 0.0552, "step": 41859 }, { "epoch": 0.7413212200100059, "grad_norm": 0.5988554358482361, "learning_rate": 4.96312615497726e-06, "loss": 0.0628, "step": 41860 }, { "epoch": 0.7413389295470343, "grad_norm": 0.7092759609222412, "learning_rate": 4.96248679158543e-06, "loss": 0.0591, "step": 41861 }, { "epoch": 0.7413566390840628, "grad_norm": 0.627280592918396, "learning_rate": 4.961847461216006e-06, "loss": 0.0564, "step": 41862 }, { "epoch": 0.7413743486210912, "grad_norm": 0.8022119402885437, "learning_rate": 4.9612081638710956e-06, "loss": 0.088, "step": 41863 }, { "epoch": 0.7413920581581196, "grad_norm": 1.0185407400131226, "learning_rate": 4.9605688995527885e-06, "loss": 0.0523, "step": 41864 }, { "epoch": 0.741409767695148, "grad_norm": 0.5798088908195496, "learning_rate": 4.959929668263195e-06, "loss": 0.0532, "step": 41865 }, { "epoch": 0.7414274772321765, "grad_norm": 0.5139932632446289, "learning_rate": 4.959290470004423e-06, "loss": 0.0448, "step": 41866 }, { "epoch": 0.7414451867692049, "grad_norm": 0.5757855772972107, "learning_rate": 4.958651304778565e-06, "loss": 0.0445, "step": 41867 }, { "epoch": 0.7414628963062333, "grad_norm": 0.5007525086402893, "learning_rate": 4.958012172587727e-06, "loss": 0.0385, "step": 41868 }, { "epoch": 0.7414806058432617, "grad_norm": 0.6498857736587524, "learning_rate": 4.957373073434015e-06, "loss": 0.0659, "step": 41869 }, { "epoch": 0.7414983153802902, "grad_norm": 0.7902027368545532, "learning_rate": 4.9567340073195285e-06, "loss": 0.0612, "step": 41870 }, { "epoch": 0.7415160249173186, "grad_norm": 0.7877457141876221, "learning_rate": 4.956094974246371e-06, "loss": 0.085, "step": 41871 }, { "epoch": 0.741533734454347, "grad_norm": 0.6781925559043884, "learning_rate": 4.9554559742166465e-06, "loss": 0.067, "step": 41872 }, { "epoch": 0.7415514439913754, "grad_norm": 0.7279493808746338, "learning_rate": 4.95481700723246e-06, "loss": 0.0593, "step": 41873 }, { "epoch": 0.7415691535284039, "grad_norm": 0.4851260483264923, "learning_rate": 4.9541780732959045e-06, "loss": 0.0519, "step": 41874 }, { "epoch": 0.7415868630654323, "grad_norm": 0.5629618167877197, "learning_rate": 4.953539172409089e-06, "loss": 0.0482, "step": 41875 }, { "epoch": 0.7416045726024607, "grad_norm": 0.5428810715675354, "learning_rate": 4.952900304574111e-06, "loss": 0.0414, "step": 41876 }, { "epoch": 0.7416222821394892, "grad_norm": 0.48018476366996765, "learning_rate": 4.952261469793081e-06, "loss": 0.0458, "step": 41877 }, { "epoch": 0.7416399916765176, "grad_norm": 0.44936051964759827, "learning_rate": 4.951622668068089e-06, "loss": 0.056, "step": 41878 }, { "epoch": 0.741657701213546, "grad_norm": 0.7496194243431091, "learning_rate": 4.950983899401242e-06, "loss": 0.0774, "step": 41879 }, { "epoch": 0.7416754107505744, "grad_norm": 0.35018572211265564, "learning_rate": 4.9503451637946426e-06, "loss": 0.0403, "step": 41880 }, { "epoch": 0.7416931202876029, "grad_norm": 0.5504740476608276, "learning_rate": 4.94970646125039e-06, "loss": 0.0497, "step": 41881 }, { "epoch": 0.7417108298246313, "grad_norm": 0.12377797812223434, "learning_rate": 4.949067791770587e-06, "loss": 0.0458, "step": 41882 }, { "epoch": 0.7417285393616597, "grad_norm": 0.8789085149765015, "learning_rate": 4.948429155357334e-06, "loss": 0.0659, "step": 41883 }, { "epoch": 0.7417462488986881, "grad_norm": 0.5227128863334656, "learning_rate": 4.947790552012739e-06, "loss": 0.0519, "step": 41884 }, { "epoch": 0.7417639584357166, "grad_norm": 0.36731964349746704, "learning_rate": 4.947151981738891e-06, "loss": 0.061, "step": 41885 }, { "epoch": 0.741781667972745, "grad_norm": 0.6626829504966736, "learning_rate": 4.946513444537895e-06, "loss": 0.0653, "step": 41886 }, { "epoch": 0.7417993775097734, "grad_norm": 0.607012152671814, "learning_rate": 4.945874940411853e-06, "loss": 0.0514, "step": 41887 }, { "epoch": 0.7418170870468018, "grad_norm": 0.8230364918708801, "learning_rate": 4.945236469362871e-06, "loss": 0.0577, "step": 41888 }, { "epoch": 0.7418347965838303, "grad_norm": 1.000522494316101, "learning_rate": 4.944598031393039e-06, "loss": 0.0799, "step": 41889 }, { "epoch": 0.7418525061208587, "grad_norm": 0.6240692138671875, "learning_rate": 4.943959626504457e-06, "loss": 0.0831, "step": 41890 }, { "epoch": 0.7418702156578871, "grad_norm": 0.4375050663948059, "learning_rate": 4.943321254699241e-06, "loss": 0.0853, "step": 41891 }, { "epoch": 0.7418879251949156, "grad_norm": 0.4389660060405731, "learning_rate": 4.942682915979475e-06, "loss": 0.0575, "step": 41892 }, { "epoch": 0.741905634731944, "grad_norm": 0.38981157541275024, "learning_rate": 4.942044610347267e-06, "loss": 0.0578, "step": 41893 }, { "epoch": 0.7419233442689724, "grad_norm": 0.610226035118103, "learning_rate": 4.941406337804715e-06, "loss": 0.0733, "step": 41894 }, { "epoch": 0.7419410538060008, "grad_norm": 0.9412966370582581, "learning_rate": 4.940768098353921e-06, "loss": 0.0627, "step": 41895 }, { "epoch": 0.7419587633430293, "grad_norm": 0.5086673498153687, "learning_rate": 4.940129891996979e-06, "loss": 0.0482, "step": 41896 }, { "epoch": 0.7419764728800577, "grad_norm": 0.6898057460784912, "learning_rate": 4.9394917187359926e-06, "loss": 0.069, "step": 41897 }, { "epoch": 0.7419941824170861, "grad_norm": 0.370449423789978, "learning_rate": 4.938853578573061e-06, "loss": 0.052, "step": 41898 }, { "epoch": 0.7420118919541145, "grad_norm": 0.5605536103248596, "learning_rate": 4.9382154715102835e-06, "loss": 0.0649, "step": 41899 }, { "epoch": 0.742029601491143, "grad_norm": 0.3989478051662445, "learning_rate": 4.937577397549758e-06, "loss": 0.0757, "step": 41900 }, { "epoch": 0.7420473110281715, "grad_norm": 0.43932008743286133, "learning_rate": 4.936939356693586e-06, "loss": 0.0711, "step": 41901 }, { "epoch": 0.7420650205651999, "grad_norm": 0.21676050126552582, "learning_rate": 4.9363013489438716e-06, "loss": 0.0319, "step": 41902 }, { "epoch": 0.7420827301022282, "grad_norm": 0.5021001696586609, "learning_rate": 4.935663374302703e-06, "loss": 0.0635, "step": 41903 }, { "epoch": 0.7421004396392568, "grad_norm": 0.3215877115726471, "learning_rate": 4.935025432772181e-06, "loss": 0.0303, "step": 41904 }, { "epoch": 0.7421181491762852, "grad_norm": 0.49870994687080383, "learning_rate": 4.934387524354415e-06, "loss": 0.0454, "step": 41905 }, { "epoch": 0.7421358587133136, "grad_norm": 0.6887651085853577, "learning_rate": 4.933749649051491e-06, "loss": 0.0675, "step": 41906 }, { "epoch": 0.7421535682503421, "grad_norm": 0.49928662180900574, "learning_rate": 4.933111806865511e-06, "loss": 0.0429, "step": 41907 }, { "epoch": 0.7421712777873705, "grad_norm": 0.8352583646774292, "learning_rate": 4.932473997798576e-06, "loss": 0.0681, "step": 41908 }, { "epoch": 0.7421889873243989, "grad_norm": 0.5167173743247986, "learning_rate": 4.931836221852784e-06, "loss": 0.0586, "step": 41909 }, { "epoch": 0.7422066968614273, "grad_norm": 0.7356970310211182, "learning_rate": 4.931198479030231e-06, "loss": 0.0711, "step": 41910 }, { "epoch": 0.7422244063984558, "grad_norm": 0.3911687731742859, "learning_rate": 4.930560769333017e-06, "loss": 0.032, "step": 41911 }, { "epoch": 0.7422421159354842, "grad_norm": 0.23058639466762543, "learning_rate": 4.929923092763245e-06, "loss": 0.0626, "step": 41912 }, { "epoch": 0.7422598254725126, "grad_norm": 0.45787838101387024, "learning_rate": 4.929285449323002e-06, "loss": 0.0415, "step": 41913 }, { "epoch": 0.742277535009541, "grad_norm": 0.8081434369087219, "learning_rate": 4.928647839014392e-06, "loss": 0.0665, "step": 41914 }, { "epoch": 0.7422952445465695, "grad_norm": 0.32610705494880676, "learning_rate": 4.928010261839512e-06, "loss": 0.0602, "step": 41915 }, { "epoch": 0.7423129540835979, "grad_norm": 0.5614830851554871, "learning_rate": 4.9273727178004636e-06, "loss": 0.0316, "step": 41916 }, { "epoch": 0.7423306636206263, "grad_norm": 0.47878384590148926, "learning_rate": 4.926735206899336e-06, "loss": 0.0564, "step": 41917 }, { "epoch": 0.7423483731576547, "grad_norm": 0.8530175685882568, "learning_rate": 4.926097729138229e-06, "loss": 0.0844, "step": 41918 }, { "epoch": 0.7423660826946832, "grad_norm": 0.8222280144691467, "learning_rate": 4.925460284519243e-06, "loss": 0.0849, "step": 41919 }, { "epoch": 0.7423837922317116, "grad_norm": 0.4415266811847687, "learning_rate": 4.9248228730444744e-06, "loss": 0.0378, "step": 41920 }, { "epoch": 0.74240150176874, "grad_norm": 0.7535200715065002, "learning_rate": 4.9241854947160185e-06, "loss": 0.0662, "step": 41921 }, { "epoch": 0.7424192113057685, "grad_norm": 0.5983855724334717, "learning_rate": 4.923548149535974e-06, "loss": 0.056, "step": 41922 }, { "epoch": 0.7424369208427969, "grad_norm": 0.4379270076751709, "learning_rate": 4.922910837506441e-06, "loss": 0.108, "step": 41923 }, { "epoch": 0.7424546303798253, "grad_norm": 0.9086202383041382, "learning_rate": 4.922273558629507e-06, "loss": 0.0434, "step": 41924 }, { "epoch": 0.7424723399168537, "grad_norm": 0.40124866366386414, "learning_rate": 4.921636312907275e-06, "loss": 0.0654, "step": 41925 }, { "epoch": 0.7424900494538822, "grad_norm": 0.2955816686153412, "learning_rate": 4.9209991003418385e-06, "loss": 0.0323, "step": 41926 }, { "epoch": 0.7425077589909106, "grad_norm": 0.6243027448654175, "learning_rate": 4.9203619209352975e-06, "loss": 0.0599, "step": 41927 }, { "epoch": 0.742525468527939, "grad_norm": 0.5359882116317749, "learning_rate": 4.919724774689744e-06, "loss": 0.0703, "step": 41928 }, { "epoch": 0.7425431780649674, "grad_norm": 0.4507286846637726, "learning_rate": 4.919087661607278e-06, "loss": 0.0493, "step": 41929 }, { "epoch": 0.7425608876019959, "grad_norm": 0.4640578329563141, "learning_rate": 4.918450581689999e-06, "loss": 0.0519, "step": 41930 }, { "epoch": 0.7425785971390243, "grad_norm": 0.7667317390441895, "learning_rate": 4.917813534939992e-06, "loss": 0.0706, "step": 41931 }, { "epoch": 0.7425963066760527, "grad_norm": 0.6719586849212646, "learning_rate": 4.91717652135936e-06, "loss": 0.0716, "step": 41932 }, { "epoch": 0.7426140162130811, "grad_norm": 0.6419702768325806, "learning_rate": 4.916539540950196e-06, "loss": 0.0654, "step": 41933 }, { "epoch": 0.7426317257501096, "grad_norm": 0.719094455242157, "learning_rate": 4.915902593714602e-06, "loss": 0.0632, "step": 41934 }, { "epoch": 0.742649435287138, "grad_norm": 0.4935362935066223, "learning_rate": 4.915265679654665e-06, "loss": 0.0461, "step": 41935 }, { "epoch": 0.7426671448241664, "grad_norm": 0.6254559755325317, "learning_rate": 4.914628798772482e-06, "loss": 0.0733, "step": 41936 }, { "epoch": 0.7426848543611949, "grad_norm": 0.5290220379829407, "learning_rate": 4.913991951070151e-06, "loss": 0.0651, "step": 41937 }, { "epoch": 0.7427025638982233, "grad_norm": 0.7845237851142883, "learning_rate": 4.913355136549766e-06, "loss": 0.0445, "step": 41938 }, { "epoch": 0.7427202734352517, "grad_norm": 0.7948992252349854, "learning_rate": 4.9127183552134245e-06, "loss": 0.0408, "step": 41939 }, { "epoch": 0.7427379829722801, "grad_norm": 0.5935308933258057, "learning_rate": 4.912081607063217e-06, "loss": 0.0558, "step": 41940 }, { "epoch": 0.7427556925093086, "grad_norm": 0.4240618646144867, "learning_rate": 4.911444892101246e-06, "loss": 0.0678, "step": 41941 }, { "epoch": 0.742773402046337, "grad_norm": 0.8998540639877319, "learning_rate": 4.910808210329597e-06, "loss": 0.0616, "step": 41942 }, { "epoch": 0.7427911115833654, "grad_norm": 0.5805330276489258, "learning_rate": 4.910171561750368e-06, "loss": 0.0485, "step": 41943 }, { "epoch": 0.7428088211203938, "grad_norm": 0.42325061559677124, "learning_rate": 4.909534946365659e-06, "loss": 0.0394, "step": 41944 }, { "epoch": 0.7428265306574223, "grad_norm": 0.8782273530960083, "learning_rate": 4.908898364177554e-06, "loss": 0.0707, "step": 41945 }, { "epoch": 0.7428442401944507, "grad_norm": 0.6557015180587769, "learning_rate": 4.908261815188153e-06, "loss": 0.0374, "step": 41946 }, { "epoch": 0.7428619497314791, "grad_norm": 0.6811383366584778, "learning_rate": 4.90762529939955e-06, "loss": 0.055, "step": 41947 }, { "epoch": 0.7428796592685075, "grad_norm": 0.8855406641960144, "learning_rate": 4.9069888168138384e-06, "loss": 0.0767, "step": 41948 }, { "epoch": 0.742897368805536, "grad_norm": 0.4998452365398407, "learning_rate": 4.906352367433115e-06, "loss": 0.0467, "step": 41949 }, { "epoch": 0.7429150783425644, "grad_norm": 0.552375078201294, "learning_rate": 4.905715951259469e-06, "loss": 0.0296, "step": 41950 }, { "epoch": 0.7429327878795928, "grad_norm": 0.4502914249897003, "learning_rate": 4.905079568295002e-06, "loss": 0.0405, "step": 41951 }, { "epoch": 0.7429504974166213, "grad_norm": 0.663381814956665, "learning_rate": 4.904443218541798e-06, "loss": 0.0784, "step": 41952 }, { "epoch": 0.7429682069536497, "grad_norm": 0.7741327881813049, "learning_rate": 4.9038069020019535e-06, "loss": 0.0719, "step": 41953 }, { "epoch": 0.7429859164906781, "grad_norm": 0.669279932975769, "learning_rate": 4.903170618677564e-06, "loss": 0.0679, "step": 41954 }, { "epoch": 0.7430036260277065, "grad_norm": 0.7231165170669556, "learning_rate": 4.902534368570726e-06, "loss": 0.0685, "step": 41955 }, { "epoch": 0.743021335564735, "grad_norm": 0.4706228971481323, "learning_rate": 4.901898151683519e-06, "loss": 0.0435, "step": 41956 }, { "epoch": 0.7430390451017634, "grad_norm": 0.4474710524082184, "learning_rate": 4.901261968018051e-06, "loss": 0.0455, "step": 41957 }, { "epoch": 0.7430567546387918, "grad_norm": 0.9911869764328003, "learning_rate": 4.900625817576411e-06, "loss": 0.0722, "step": 41958 }, { "epoch": 0.7430744641758202, "grad_norm": 0.5763992071151733, "learning_rate": 4.899989700360694e-06, "loss": 0.0898, "step": 41959 }, { "epoch": 0.7430921737128487, "grad_norm": 0.3680882751941681, "learning_rate": 4.899353616372985e-06, "loss": 0.047, "step": 41960 }, { "epoch": 0.7431098832498771, "grad_norm": 0.6311447024345398, "learning_rate": 4.898717565615381e-06, "loss": 0.0472, "step": 41961 }, { "epoch": 0.7431275927869055, "grad_norm": 0.536948025226593, "learning_rate": 4.898081548089978e-06, "loss": 0.0593, "step": 41962 }, { "epoch": 0.7431453023239339, "grad_norm": 0.4113801121711731, "learning_rate": 4.897445563798862e-06, "loss": 0.0338, "step": 41963 }, { "epoch": 0.7431630118609625, "grad_norm": 0.7946279048919678, "learning_rate": 4.896809612744126e-06, "loss": 0.0816, "step": 41964 }, { "epoch": 0.7431807213979909, "grad_norm": 0.5673768520355225, "learning_rate": 4.8961736949278665e-06, "loss": 0.0709, "step": 41965 }, { "epoch": 0.7431984309350192, "grad_norm": 0.634852409362793, "learning_rate": 4.895537810352173e-06, "loss": 0.0391, "step": 41966 }, { "epoch": 0.7432161404720478, "grad_norm": 0.5997543334960938, "learning_rate": 4.894901959019138e-06, "loss": 0.0501, "step": 41967 }, { "epoch": 0.7432338500090762, "grad_norm": 0.5747032761573792, "learning_rate": 4.894266140930854e-06, "loss": 0.0527, "step": 41968 }, { "epoch": 0.7432515595461046, "grad_norm": 0.7507258653640747, "learning_rate": 4.893630356089417e-06, "loss": 0.0988, "step": 41969 }, { "epoch": 0.743269269083133, "grad_norm": 0.39572873711586, "learning_rate": 4.892994604496908e-06, "loss": 0.0323, "step": 41970 }, { "epoch": 0.7432869786201615, "grad_norm": 0.4696836769580841, "learning_rate": 4.892358886155426e-06, "loss": 0.0292, "step": 41971 }, { "epoch": 0.7433046881571899, "grad_norm": 0.6502214074134827, "learning_rate": 4.8917232010670595e-06, "loss": 0.082, "step": 41972 }, { "epoch": 0.7433223976942183, "grad_norm": 0.600168764591217, "learning_rate": 4.891087549233908e-06, "loss": 0.086, "step": 41973 }, { "epoch": 0.7433401072312467, "grad_norm": 0.6094770431518555, "learning_rate": 4.890451930658049e-06, "loss": 0.0515, "step": 41974 }, { "epoch": 0.7433578167682752, "grad_norm": 0.7097020745277405, "learning_rate": 4.889816345341582e-06, "loss": 0.0663, "step": 41975 }, { "epoch": 0.7433755263053036, "grad_norm": 0.6391171813011169, "learning_rate": 4.889180793286596e-06, "loss": 0.0474, "step": 41976 }, { "epoch": 0.743393235842332, "grad_norm": 0.5475188493728638, "learning_rate": 4.888545274495184e-06, "loss": 0.0581, "step": 41977 }, { "epoch": 0.7434109453793604, "grad_norm": 0.599479615688324, "learning_rate": 4.887909788969434e-06, "loss": 0.0534, "step": 41978 }, { "epoch": 0.7434286549163889, "grad_norm": 0.7073846459388733, "learning_rate": 4.887274336711437e-06, "loss": 0.0405, "step": 41979 }, { "epoch": 0.7434463644534173, "grad_norm": 0.7103764414787292, "learning_rate": 4.886638917723291e-06, "loss": 0.0811, "step": 41980 }, { "epoch": 0.7434640739904457, "grad_norm": 0.45336592197418213, "learning_rate": 4.886003532007075e-06, "loss": 0.0463, "step": 41981 }, { "epoch": 0.7434817835274742, "grad_norm": 0.7947705984115601, "learning_rate": 4.885368179564884e-06, "loss": 0.0769, "step": 41982 }, { "epoch": 0.7434994930645026, "grad_norm": 0.7506201863288879, "learning_rate": 4.884732860398815e-06, "loss": 0.089, "step": 41983 }, { "epoch": 0.743517202601531, "grad_norm": 0.8788061738014221, "learning_rate": 4.884097574510945e-06, "loss": 0.0527, "step": 41984 }, { "epoch": 0.7435349121385594, "grad_norm": 0.3400518596172333, "learning_rate": 4.883462321903367e-06, "loss": 0.0595, "step": 41985 }, { "epoch": 0.7435526216755879, "grad_norm": 0.663701593875885, "learning_rate": 4.882827102578179e-06, "loss": 0.0623, "step": 41986 }, { "epoch": 0.7435703312126163, "grad_norm": 0.5728311538696289, "learning_rate": 4.882191916537472e-06, "loss": 0.0349, "step": 41987 }, { "epoch": 0.7435880407496447, "grad_norm": 0.7021204829216003, "learning_rate": 4.881556763783324e-06, "loss": 0.0534, "step": 41988 }, { "epoch": 0.7436057502866731, "grad_norm": 0.5364171862602234, "learning_rate": 4.8809216443178325e-06, "loss": 0.0399, "step": 41989 }, { "epoch": 0.7436234598237016, "grad_norm": 0.4818272590637207, "learning_rate": 4.88028655814309e-06, "loss": 0.057, "step": 41990 }, { "epoch": 0.74364116936073, "grad_norm": 0.5262118577957153, "learning_rate": 4.879651505261176e-06, "loss": 0.0438, "step": 41991 }, { "epoch": 0.7436588788977584, "grad_norm": 0.5640106797218323, "learning_rate": 4.879016485674185e-06, "loss": 0.0407, "step": 41992 }, { "epoch": 0.7436765884347868, "grad_norm": 0.6426365375518799, "learning_rate": 4.878381499384205e-06, "loss": 0.0546, "step": 41993 }, { "epoch": 0.7436942979718153, "grad_norm": 0.47383132576942444, "learning_rate": 4.8777465463933265e-06, "loss": 0.0382, "step": 41994 }, { "epoch": 0.7437120075088437, "grad_norm": 0.7735221982002258, "learning_rate": 4.8771116267036395e-06, "loss": 0.0407, "step": 41995 }, { "epoch": 0.7437297170458721, "grad_norm": 0.6248378753662109, "learning_rate": 4.87647674031723e-06, "loss": 0.0498, "step": 41996 }, { "epoch": 0.7437474265829006, "grad_norm": 0.46692168712615967, "learning_rate": 4.875841887236189e-06, "loss": 0.0343, "step": 41997 }, { "epoch": 0.743765136119929, "grad_norm": 0.4237169623374939, "learning_rate": 4.875207067462608e-06, "loss": 0.057, "step": 41998 }, { "epoch": 0.7437828456569574, "grad_norm": 0.8701715469360352, "learning_rate": 4.874572280998567e-06, "loss": 0.0787, "step": 41999 }, { "epoch": 0.7438005551939858, "grad_norm": 0.5896667838096619, "learning_rate": 4.873937527846159e-06, "loss": 0.0642, "step": 42000 }, { "epoch": 0.7438182647310143, "grad_norm": 0.5059638619422913, "learning_rate": 4.873302808007478e-06, "loss": 0.0374, "step": 42001 }, { "epoch": 0.7438359742680427, "grad_norm": 0.4765388071537018, "learning_rate": 4.872668121484602e-06, "loss": 0.0313, "step": 42002 }, { "epoch": 0.7438536838050711, "grad_norm": 1.0582250356674194, "learning_rate": 4.8720334682796225e-06, "loss": 0.0925, "step": 42003 }, { "epoch": 0.7438713933420995, "grad_norm": 0.5540131330490112, "learning_rate": 4.87139884839463e-06, "loss": 0.0508, "step": 42004 }, { "epoch": 0.743889102879128, "grad_norm": 0.5874868035316467, "learning_rate": 4.870764261831709e-06, "loss": 0.0623, "step": 42005 }, { "epoch": 0.7439068124161564, "grad_norm": 0.9881328344345093, "learning_rate": 4.87012970859295e-06, "loss": 0.0712, "step": 42006 }, { "epoch": 0.7439245219531848, "grad_norm": 0.4633110463619232, "learning_rate": 4.869495188680439e-06, "loss": 0.0739, "step": 42007 }, { "epoch": 0.7439422314902132, "grad_norm": 0.8258633017539978, "learning_rate": 4.86886070209627e-06, "loss": 0.0837, "step": 42008 }, { "epoch": 0.7439599410272417, "grad_norm": 0.7778157591819763, "learning_rate": 4.86822624884252e-06, "loss": 0.0715, "step": 42009 }, { "epoch": 0.7439776505642701, "grad_norm": 0.6384034752845764, "learning_rate": 4.867591828921279e-06, "loss": 0.0523, "step": 42010 }, { "epoch": 0.7439953601012985, "grad_norm": 0.43800196051597595, "learning_rate": 4.866957442334637e-06, "loss": 0.0385, "step": 42011 }, { "epoch": 0.744013069638327, "grad_norm": 0.3646930754184723, "learning_rate": 4.866323089084685e-06, "loss": 0.0491, "step": 42012 }, { "epoch": 0.7440307791753554, "grad_norm": 0.590042769908905, "learning_rate": 4.865688769173501e-06, "loss": 0.0814, "step": 42013 }, { "epoch": 0.7440484887123838, "grad_norm": 0.5654574036598206, "learning_rate": 4.865054482603171e-06, "loss": 0.0416, "step": 42014 }, { "epoch": 0.7440661982494122, "grad_norm": 0.1901874542236328, "learning_rate": 4.864420229375795e-06, "loss": 0.0398, "step": 42015 }, { "epoch": 0.7440839077864407, "grad_norm": 0.5339882969856262, "learning_rate": 4.863786009493448e-06, "loss": 0.0568, "step": 42016 }, { "epoch": 0.7441016173234691, "grad_norm": 0.4083462655544281, "learning_rate": 4.863151822958218e-06, "loss": 0.0701, "step": 42017 }, { "epoch": 0.7441193268604975, "grad_norm": 0.621030330657959, "learning_rate": 4.862517669772194e-06, "loss": 0.0878, "step": 42018 }, { "epoch": 0.7441370363975259, "grad_norm": 0.6521271467208862, "learning_rate": 4.8618835499374685e-06, "loss": 0.0722, "step": 42019 }, { "epoch": 0.7441547459345544, "grad_norm": 0.4437965750694275, "learning_rate": 4.861249463456113e-06, "loss": 0.0497, "step": 42020 }, { "epoch": 0.7441724554715828, "grad_norm": 0.5942351222038269, "learning_rate": 4.860615410330222e-06, "loss": 0.0739, "step": 42021 }, { "epoch": 0.7441901650086112, "grad_norm": 0.7514597177505493, "learning_rate": 4.859981390561882e-06, "loss": 0.0495, "step": 42022 }, { "epoch": 0.7442078745456396, "grad_norm": 0.49098020792007446, "learning_rate": 4.859347404153176e-06, "loss": 0.0406, "step": 42023 }, { "epoch": 0.7442255840826681, "grad_norm": 0.596623957157135, "learning_rate": 4.858713451106191e-06, "loss": 0.0813, "step": 42024 }, { "epoch": 0.7442432936196965, "grad_norm": 0.9092636108398438, "learning_rate": 4.8580795314230155e-06, "loss": 0.046, "step": 42025 }, { "epoch": 0.7442610031567249, "grad_norm": 0.5886296629905701, "learning_rate": 4.857445645105736e-06, "loss": 0.0813, "step": 42026 }, { "epoch": 0.7442787126937535, "grad_norm": 0.7714384198188782, "learning_rate": 4.8568117921564294e-06, "loss": 0.0686, "step": 42027 }, { "epoch": 0.7442964222307819, "grad_norm": 0.7762993574142456, "learning_rate": 4.856177972577187e-06, "loss": 0.0476, "step": 42028 }, { "epoch": 0.7443141317678102, "grad_norm": 0.6531065106391907, "learning_rate": 4.855544186370091e-06, "loss": 0.062, "step": 42029 }, { "epoch": 0.7443318413048386, "grad_norm": 0.7071622014045715, "learning_rate": 4.854910433537235e-06, "loss": 0.0454, "step": 42030 }, { "epoch": 0.7443495508418672, "grad_norm": 0.5360866189002991, "learning_rate": 4.854276714080693e-06, "loss": 0.0757, "step": 42031 }, { "epoch": 0.7443672603788956, "grad_norm": 0.391297310590744, "learning_rate": 4.853643028002553e-06, "loss": 0.0657, "step": 42032 }, { "epoch": 0.744384969915924, "grad_norm": 0.4113521873950958, "learning_rate": 4.853009375304903e-06, "loss": 0.0587, "step": 42033 }, { "epoch": 0.7444026794529524, "grad_norm": 0.5227432250976562, "learning_rate": 4.8523757559898255e-06, "loss": 0.0556, "step": 42034 }, { "epoch": 0.7444203889899809, "grad_norm": 0.609576940536499, "learning_rate": 4.851742170059404e-06, "loss": 0.0605, "step": 42035 }, { "epoch": 0.7444380985270093, "grad_norm": 0.5392983555793762, "learning_rate": 4.851108617515726e-06, "loss": 0.0686, "step": 42036 }, { "epoch": 0.7444558080640377, "grad_norm": 0.65478515625, "learning_rate": 4.850475098360878e-06, "loss": 0.051, "step": 42037 }, { "epoch": 0.7444735176010661, "grad_norm": 0.6701140999794006, "learning_rate": 4.849841612596936e-06, "loss": 0.0533, "step": 42038 }, { "epoch": 0.7444912271380946, "grad_norm": 0.6085770130157471, "learning_rate": 4.8492081602259884e-06, "loss": 0.0744, "step": 42039 }, { "epoch": 0.744508936675123, "grad_norm": 0.3026788830757141, "learning_rate": 4.848574741250123e-06, "loss": 0.0597, "step": 42040 }, { "epoch": 0.7445266462121514, "grad_norm": 0.7071699500083923, "learning_rate": 4.847941355671416e-06, "loss": 0.0847, "step": 42041 }, { "epoch": 0.7445443557491799, "grad_norm": 0.6031857132911682, "learning_rate": 4.8473080034919555e-06, "loss": 0.0447, "step": 42042 }, { "epoch": 0.7445620652862083, "grad_norm": 0.5316008925437927, "learning_rate": 4.84667468471382e-06, "loss": 0.059, "step": 42043 }, { "epoch": 0.7445797748232367, "grad_norm": 0.21234777569770813, "learning_rate": 4.846041399339106e-06, "loss": 0.0397, "step": 42044 }, { "epoch": 0.7445974843602651, "grad_norm": 0.7324838042259216, "learning_rate": 4.845408147369886e-06, "loss": 0.0823, "step": 42045 }, { "epoch": 0.7446151938972936, "grad_norm": 0.5199950337409973, "learning_rate": 4.844774928808245e-06, "loss": 0.0427, "step": 42046 }, { "epoch": 0.744632903434322, "grad_norm": 0.5758374333381653, "learning_rate": 4.844141743656272e-06, "loss": 0.0532, "step": 42047 }, { "epoch": 0.7446506129713504, "grad_norm": 0.5017144083976746, "learning_rate": 4.843508591916041e-06, "loss": 0.068, "step": 42048 }, { "epoch": 0.7446683225083788, "grad_norm": 0.8024714589118958, "learning_rate": 4.84287547358964e-06, "loss": 0.0595, "step": 42049 }, { "epoch": 0.7446860320454073, "grad_norm": 0.666330873966217, "learning_rate": 4.842242388679151e-06, "loss": 0.0815, "step": 42050 }, { "epoch": 0.7447037415824357, "grad_norm": 0.5925477147102356, "learning_rate": 4.841609337186656e-06, "loss": 0.0375, "step": 42051 }, { "epoch": 0.7447214511194641, "grad_norm": 0.41374945640563965, "learning_rate": 4.840976319114241e-06, "loss": 0.0363, "step": 42052 }, { "epoch": 0.7447391606564925, "grad_norm": 0.6229814291000366, "learning_rate": 4.840343334463984e-06, "loss": 0.0846, "step": 42053 }, { "epoch": 0.744756870193521, "grad_norm": 0.7396826148033142, "learning_rate": 4.839710383237976e-06, "loss": 0.0602, "step": 42054 }, { "epoch": 0.7447745797305494, "grad_norm": 0.49532604217529297, "learning_rate": 4.8390774654382894e-06, "loss": 0.0629, "step": 42055 }, { "epoch": 0.7447922892675778, "grad_norm": 0.7154461145401001, "learning_rate": 4.838444581067008e-06, "loss": 0.0745, "step": 42056 }, { "epoch": 0.7448099988046063, "grad_norm": 0.3844110369682312, "learning_rate": 4.837811730126218e-06, "loss": 0.049, "step": 42057 }, { "epoch": 0.7448277083416347, "grad_norm": 0.3967263102531433, "learning_rate": 4.837178912618003e-06, "loss": 0.0593, "step": 42058 }, { "epoch": 0.7448454178786631, "grad_norm": 0.6290924549102783, "learning_rate": 4.836546128544436e-06, "loss": 0.0746, "step": 42059 }, { "epoch": 0.7448631274156915, "grad_norm": 0.37384942173957825, "learning_rate": 4.835913377907606e-06, "loss": 0.0271, "step": 42060 }, { "epoch": 0.74488083695272, "grad_norm": 0.4194989800453186, "learning_rate": 4.835280660709593e-06, "loss": 0.0655, "step": 42061 }, { "epoch": 0.7448985464897484, "grad_norm": 0.5611689686775208, "learning_rate": 4.834647976952477e-06, "loss": 0.0643, "step": 42062 }, { "epoch": 0.7449162560267768, "grad_norm": 0.5517065525054932, "learning_rate": 4.834015326638342e-06, "loss": 0.0693, "step": 42063 }, { "epoch": 0.7449339655638052, "grad_norm": 0.5481336116790771, "learning_rate": 4.833382709769268e-06, "loss": 0.0481, "step": 42064 }, { "epoch": 0.7449516751008337, "grad_norm": 0.447815477848053, "learning_rate": 4.832750126347341e-06, "loss": 0.0353, "step": 42065 }, { "epoch": 0.7449693846378621, "grad_norm": 0.5326006412506104, "learning_rate": 4.832117576374636e-06, "loss": 0.0594, "step": 42066 }, { "epoch": 0.7449870941748905, "grad_norm": 0.3835776448249817, "learning_rate": 4.831485059853233e-06, "loss": 0.0582, "step": 42067 }, { "epoch": 0.7450048037119189, "grad_norm": 0.8161139488220215, "learning_rate": 4.830852576785215e-06, "loss": 0.0661, "step": 42068 }, { "epoch": 0.7450225132489474, "grad_norm": 0.47176966071128845, "learning_rate": 4.830220127172669e-06, "loss": 0.0391, "step": 42069 }, { "epoch": 0.7450402227859758, "grad_norm": 0.3643815517425537, "learning_rate": 4.829587711017667e-06, "loss": 0.0555, "step": 42070 }, { "epoch": 0.7450579323230042, "grad_norm": 0.39818838238716125, "learning_rate": 4.8289553283222905e-06, "loss": 0.0762, "step": 42071 }, { "epoch": 0.7450756418600327, "grad_norm": 0.6201454401016235, "learning_rate": 4.828322979088624e-06, "loss": 0.0657, "step": 42072 }, { "epoch": 0.7450933513970611, "grad_norm": 0.6080257892608643, "learning_rate": 4.8276906633187445e-06, "loss": 0.0361, "step": 42073 }, { "epoch": 0.7451110609340895, "grad_norm": 0.7860575318336487, "learning_rate": 4.827058381014734e-06, "loss": 0.0942, "step": 42074 }, { "epoch": 0.7451287704711179, "grad_norm": 0.8363597989082336, "learning_rate": 4.8264261321786736e-06, "loss": 0.0874, "step": 42075 }, { "epoch": 0.7451464800081464, "grad_norm": 0.4122912287712097, "learning_rate": 4.825793916812647e-06, "loss": 0.0526, "step": 42076 }, { "epoch": 0.7451641895451748, "grad_norm": 0.764403760433197, "learning_rate": 4.825161734918724e-06, "loss": 0.0698, "step": 42077 }, { "epoch": 0.7451818990822032, "grad_norm": 0.6534812450408936, "learning_rate": 4.824529586498989e-06, "loss": 0.068, "step": 42078 }, { "epoch": 0.7451996086192316, "grad_norm": 0.6426441669464111, "learning_rate": 4.823897471555529e-06, "loss": 0.0627, "step": 42079 }, { "epoch": 0.7452173181562601, "grad_norm": 1.2393677234649658, "learning_rate": 4.823265390090406e-06, "loss": 0.0462, "step": 42080 }, { "epoch": 0.7452350276932885, "grad_norm": 0.5901870727539062, "learning_rate": 4.822633342105716e-06, "loss": 0.0444, "step": 42081 }, { "epoch": 0.7452527372303169, "grad_norm": 0.6605796813964844, "learning_rate": 4.822001327603532e-06, "loss": 0.083, "step": 42082 }, { "epoch": 0.7452704467673453, "grad_norm": 0.9275726675987244, "learning_rate": 4.821369346585941e-06, "loss": 0.0779, "step": 42083 }, { "epoch": 0.7452881563043738, "grad_norm": 0.617917001247406, "learning_rate": 4.82073739905501e-06, "loss": 0.065, "step": 42084 }, { "epoch": 0.7453058658414022, "grad_norm": 0.6258924603462219, "learning_rate": 4.820105485012822e-06, "loss": 0.0631, "step": 42085 }, { "epoch": 0.7453235753784306, "grad_norm": 0.9140731692314148, "learning_rate": 4.8194736044614624e-06, "loss": 0.0696, "step": 42086 }, { "epoch": 0.7453412849154591, "grad_norm": 0.7502133250236511, "learning_rate": 4.8188417574030014e-06, "loss": 0.0714, "step": 42087 }, { "epoch": 0.7453589944524875, "grad_norm": 0.7925053834915161, "learning_rate": 4.818209943839521e-06, "loss": 0.1111, "step": 42088 }, { "epoch": 0.7453767039895159, "grad_norm": 0.9101350903511047, "learning_rate": 4.817578163773098e-06, "loss": 0.0745, "step": 42089 }, { "epoch": 0.7453944135265443, "grad_norm": 0.27074530720710754, "learning_rate": 4.816946417205816e-06, "loss": 0.0711, "step": 42090 }, { "epoch": 0.7454121230635729, "grad_norm": 0.7708010673522949, "learning_rate": 4.8163147041397485e-06, "loss": 0.0494, "step": 42091 }, { "epoch": 0.7454298326006012, "grad_norm": 0.7098979949951172, "learning_rate": 4.815683024576975e-06, "loss": 0.0789, "step": 42092 }, { "epoch": 0.7454475421376296, "grad_norm": 0.5225360989570618, "learning_rate": 4.81505137851958e-06, "loss": 0.0501, "step": 42093 }, { "epoch": 0.745465251674658, "grad_norm": 0.8204417824745178, "learning_rate": 4.814419765969631e-06, "loss": 0.0487, "step": 42094 }, { "epoch": 0.7454829612116866, "grad_norm": 0.46492260694503784, "learning_rate": 4.813788186929212e-06, "loss": 0.0608, "step": 42095 }, { "epoch": 0.745500670748715, "grad_norm": 0.8505313396453857, "learning_rate": 4.813156641400397e-06, "loss": 0.0638, "step": 42096 }, { "epoch": 0.7455183802857434, "grad_norm": 0.6861745715141296, "learning_rate": 4.812525129385272e-06, "loss": 0.0492, "step": 42097 }, { "epoch": 0.7455360898227719, "grad_norm": 0.8899176120758057, "learning_rate": 4.811893650885906e-06, "loss": 0.0569, "step": 42098 }, { "epoch": 0.7455537993598003, "grad_norm": 0.8043389916419983, "learning_rate": 4.811262205904377e-06, "loss": 0.0908, "step": 42099 }, { "epoch": 0.7455715088968287, "grad_norm": 0.5166844129562378, "learning_rate": 4.810630794442766e-06, "loss": 0.0321, "step": 42100 }, { "epoch": 0.7455892184338571, "grad_norm": 0.7722020149230957, "learning_rate": 4.8099994165031495e-06, "loss": 0.0565, "step": 42101 }, { "epoch": 0.7456069279708856, "grad_norm": 0.7667727470397949, "learning_rate": 4.809368072087604e-06, "loss": 0.0763, "step": 42102 }, { "epoch": 0.745624637507914, "grad_norm": 0.45146918296813965, "learning_rate": 4.808736761198207e-06, "loss": 0.0429, "step": 42103 }, { "epoch": 0.7456423470449424, "grad_norm": 0.5551329255104065, "learning_rate": 4.808105483837038e-06, "loss": 0.0562, "step": 42104 }, { "epoch": 0.7456600565819708, "grad_norm": 0.6357629299163818, "learning_rate": 4.807474240006169e-06, "loss": 0.0991, "step": 42105 }, { "epoch": 0.7456777661189993, "grad_norm": 0.4397452473640442, "learning_rate": 4.806843029707678e-06, "loss": 0.0431, "step": 42106 }, { "epoch": 0.7456954756560277, "grad_norm": 0.6616259217262268, "learning_rate": 4.806211852943641e-06, "loss": 0.0734, "step": 42107 }, { "epoch": 0.7457131851930561, "grad_norm": 0.5271729230880737, "learning_rate": 4.805580709716142e-06, "loss": 0.0586, "step": 42108 }, { "epoch": 0.7457308947300845, "grad_norm": 0.3424241244792938, "learning_rate": 4.804949600027242e-06, "loss": 0.0512, "step": 42109 }, { "epoch": 0.745748604267113, "grad_norm": 0.6642096638679504, "learning_rate": 4.804318523879032e-06, "loss": 0.0674, "step": 42110 }, { "epoch": 0.7457663138041414, "grad_norm": 0.5616798400878906, "learning_rate": 4.803687481273587e-06, "loss": 0.0584, "step": 42111 }, { "epoch": 0.7457840233411698, "grad_norm": 0.7337259650230408, "learning_rate": 4.803056472212973e-06, "loss": 0.0653, "step": 42112 }, { "epoch": 0.7458017328781983, "grad_norm": 0.45482200384140015, "learning_rate": 4.802425496699273e-06, "loss": 0.0563, "step": 42113 }, { "epoch": 0.7458194424152267, "grad_norm": 0.5976850390434265, "learning_rate": 4.801794554734561e-06, "loss": 0.0596, "step": 42114 }, { "epoch": 0.7458371519522551, "grad_norm": 0.7219369411468506, "learning_rate": 4.801163646320918e-06, "loss": 0.0506, "step": 42115 }, { "epoch": 0.7458548614892835, "grad_norm": 0.6975401043891907, "learning_rate": 4.800532771460411e-06, "loss": 0.0513, "step": 42116 }, { "epoch": 0.745872571026312, "grad_norm": 0.5393365621566772, "learning_rate": 4.799901930155121e-06, "loss": 0.055, "step": 42117 }, { "epoch": 0.7458902805633404, "grad_norm": 0.6804608106613159, "learning_rate": 4.79927112240712e-06, "loss": 0.0591, "step": 42118 }, { "epoch": 0.7459079901003688, "grad_norm": 0.3875720798969269, "learning_rate": 4.798640348218485e-06, "loss": 0.0653, "step": 42119 }, { "epoch": 0.7459256996373972, "grad_norm": 0.7339736819267273, "learning_rate": 4.798009607591292e-06, "loss": 0.0472, "step": 42120 }, { "epoch": 0.7459434091744257, "grad_norm": 0.511093258857727, "learning_rate": 4.797378900527616e-06, "loss": 0.0371, "step": 42121 }, { "epoch": 0.7459611187114541, "grad_norm": 0.7449150681495667, "learning_rate": 4.796748227029536e-06, "loss": 0.0592, "step": 42122 }, { "epoch": 0.7459788282484825, "grad_norm": 0.775144100189209, "learning_rate": 4.796117587099117e-06, "loss": 0.0536, "step": 42123 }, { "epoch": 0.7459965377855109, "grad_norm": 0.6870821118354797, "learning_rate": 4.795486980738441e-06, "loss": 0.0614, "step": 42124 }, { "epoch": 0.7460142473225394, "grad_norm": 0.578430712223053, "learning_rate": 4.794856407949584e-06, "loss": 0.0422, "step": 42125 }, { "epoch": 0.7460319568595678, "grad_norm": 0.6562290191650391, "learning_rate": 4.794225868734612e-06, "loss": 0.053, "step": 42126 }, { "epoch": 0.7460496663965962, "grad_norm": 0.8194093108177185, "learning_rate": 4.793595363095605e-06, "loss": 0.0486, "step": 42127 }, { "epoch": 0.7460673759336247, "grad_norm": 0.5712977647781372, "learning_rate": 4.792964891034637e-06, "loss": 0.0546, "step": 42128 }, { "epoch": 0.7460850854706531, "grad_norm": 0.3430836498737335, "learning_rate": 4.792334452553782e-06, "loss": 0.0351, "step": 42129 }, { "epoch": 0.7461027950076815, "grad_norm": 0.6520636677742004, "learning_rate": 4.791704047655114e-06, "loss": 0.0667, "step": 42130 }, { "epoch": 0.7461205045447099, "grad_norm": 0.8163881897926331, "learning_rate": 4.791073676340708e-06, "loss": 0.079, "step": 42131 }, { "epoch": 0.7461382140817384, "grad_norm": 0.6115255355834961, "learning_rate": 4.790443338612637e-06, "loss": 0.048, "step": 42132 }, { "epoch": 0.7461559236187668, "grad_norm": 0.32389065623283386, "learning_rate": 4.78981303447298e-06, "loss": 0.0581, "step": 42133 }, { "epoch": 0.7461736331557952, "grad_norm": 0.8556462526321411, "learning_rate": 4.789182763923799e-06, "loss": 0.046, "step": 42134 }, { "epoch": 0.7461913426928236, "grad_norm": 0.7711074352264404, "learning_rate": 4.7885525269671746e-06, "loss": 0.0497, "step": 42135 }, { "epoch": 0.7462090522298521, "grad_norm": 0.4971337616443634, "learning_rate": 4.787922323605185e-06, "loss": 0.0619, "step": 42136 }, { "epoch": 0.7462267617668805, "grad_norm": 0.45019128918647766, "learning_rate": 4.787292153839893e-06, "loss": 0.0618, "step": 42137 }, { "epoch": 0.7462444713039089, "grad_norm": 0.5860584378242493, "learning_rate": 4.786662017673372e-06, "loss": 0.0572, "step": 42138 }, { "epoch": 0.7462621808409373, "grad_norm": 0.7346488237380981, "learning_rate": 4.786031915107704e-06, "loss": 0.0793, "step": 42139 }, { "epoch": 0.7462798903779658, "grad_norm": 0.6650662422180176, "learning_rate": 4.785401846144964e-06, "loss": 0.0529, "step": 42140 }, { "epoch": 0.7462975999149942, "grad_norm": 0.48424598574638367, "learning_rate": 4.7847718107872124e-06, "loss": 0.0524, "step": 42141 }, { "epoch": 0.7463153094520226, "grad_norm": 0.5886437296867371, "learning_rate": 4.78414180903653e-06, "loss": 0.0459, "step": 42142 }, { "epoch": 0.7463330189890511, "grad_norm": 0.8422040343284607, "learning_rate": 4.7835118408949926e-06, "loss": 0.0773, "step": 42143 }, { "epoch": 0.7463507285260795, "grad_norm": 1.0231599807739258, "learning_rate": 4.782881906364662e-06, "loss": 0.0957, "step": 42144 }, { "epoch": 0.7463684380631079, "grad_norm": 0.5439754128456116, "learning_rate": 4.782252005447616e-06, "loss": 0.0549, "step": 42145 }, { "epoch": 0.7463861476001363, "grad_norm": 0.545621931552887, "learning_rate": 4.781622138145928e-06, "loss": 0.0457, "step": 42146 }, { "epoch": 0.7464038571371648, "grad_norm": 0.5581499934196472, "learning_rate": 4.78099230446167e-06, "loss": 0.0607, "step": 42147 }, { "epoch": 0.7464215666741932, "grad_norm": 0.617706835269928, "learning_rate": 4.780362504396915e-06, "loss": 0.0636, "step": 42148 }, { "epoch": 0.7464392762112216, "grad_norm": 0.6490615606307983, "learning_rate": 4.779732737953733e-06, "loss": 0.0884, "step": 42149 }, { "epoch": 0.74645698574825, "grad_norm": 0.8107908964157104, "learning_rate": 4.779103005134199e-06, "loss": 0.0581, "step": 42150 }, { "epoch": 0.7464746952852785, "grad_norm": 0.6576573848724365, "learning_rate": 4.77847330594038e-06, "loss": 0.0344, "step": 42151 }, { "epoch": 0.7464924048223069, "grad_norm": 0.6720026731491089, "learning_rate": 4.77784364037435e-06, "loss": 0.0823, "step": 42152 }, { "epoch": 0.7465101143593353, "grad_norm": 0.6864882111549377, "learning_rate": 4.777214008438179e-06, "loss": 0.0619, "step": 42153 }, { "epoch": 0.7465278238963637, "grad_norm": 0.838824987411499, "learning_rate": 4.7765844101339465e-06, "loss": 0.0628, "step": 42154 }, { "epoch": 0.7465455334333922, "grad_norm": 0.4634169936180115, "learning_rate": 4.77595484546371e-06, "loss": 0.0571, "step": 42155 }, { "epoch": 0.7465632429704206, "grad_norm": 0.4799833297729492, "learning_rate": 4.77532531442955e-06, "loss": 0.0339, "step": 42156 }, { "epoch": 0.746580952507449, "grad_norm": 0.575519323348999, "learning_rate": 4.7746958170335355e-06, "loss": 0.0715, "step": 42157 }, { "epoch": 0.7465986620444776, "grad_norm": 0.7273242473602295, "learning_rate": 4.774066353277736e-06, "loss": 0.0561, "step": 42158 }, { "epoch": 0.746616371581506, "grad_norm": 0.6328644156455994, "learning_rate": 4.7734369231642246e-06, "loss": 0.0596, "step": 42159 }, { "epoch": 0.7466340811185344, "grad_norm": 1.0316352844238281, "learning_rate": 4.772807526695072e-06, "loss": 0.071, "step": 42160 }, { "epoch": 0.7466517906555628, "grad_norm": 0.8735910654067993, "learning_rate": 4.772178163872353e-06, "loss": 0.0815, "step": 42161 }, { "epoch": 0.7466695001925913, "grad_norm": 0.554477334022522, "learning_rate": 4.771548834698128e-06, "loss": 0.0513, "step": 42162 }, { "epoch": 0.7466872097296197, "grad_norm": 0.5924372673034668, "learning_rate": 4.770919539174474e-06, "loss": 0.0458, "step": 42163 }, { "epoch": 0.7467049192666481, "grad_norm": 0.7101078629493713, "learning_rate": 4.770290277303463e-06, "loss": 0.0603, "step": 42164 }, { "epoch": 0.7467226288036765, "grad_norm": 0.5266475677490234, "learning_rate": 4.769661049087159e-06, "loss": 0.0363, "step": 42165 }, { "epoch": 0.746740338340705, "grad_norm": 0.163895845413208, "learning_rate": 4.769031854527635e-06, "loss": 0.0442, "step": 42166 }, { "epoch": 0.7467580478777334, "grad_norm": 0.6823738813400269, "learning_rate": 4.7684026936269554e-06, "loss": 0.0519, "step": 42167 }, { "epoch": 0.7467757574147618, "grad_norm": 0.32485365867614746, "learning_rate": 4.767773566387208e-06, "loss": 0.0324, "step": 42168 }, { "epoch": 0.7467934669517902, "grad_norm": 0.6643544435501099, "learning_rate": 4.7671444728104445e-06, "loss": 0.0673, "step": 42169 }, { "epoch": 0.7468111764888187, "grad_norm": 0.6000354290008545, "learning_rate": 4.7665154128987405e-06, "loss": 0.0443, "step": 42170 }, { "epoch": 0.7468288860258471, "grad_norm": 0.5958124995231628, "learning_rate": 4.765886386654165e-06, "loss": 0.0877, "step": 42171 }, { "epoch": 0.7468465955628755, "grad_norm": 0.6681709885597229, "learning_rate": 4.765257394078795e-06, "loss": 0.0679, "step": 42172 }, { "epoch": 0.746864305099904, "grad_norm": 0.6699641942977905, "learning_rate": 4.764628435174687e-06, "loss": 0.0704, "step": 42173 }, { "epoch": 0.7468820146369324, "grad_norm": 0.44968464970588684, "learning_rate": 4.763999509943916e-06, "loss": 0.031, "step": 42174 }, { "epoch": 0.7468997241739608, "grad_norm": 0.818577229976654, "learning_rate": 4.763370618388553e-06, "loss": 0.0701, "step": 42175 }, { "epoch": 0.7469174337109892, "grad_norm": 0.3705958425998688, "learning_rate": 4.762741760510663e-06, "loss": 0.0376, "step": 42176 }, { "epoch": 0.7469351432480177, "grad_norm": 0.6793577671051025, "learning_rate": 4.762112936312319e-06, "loss": 0.0749, "step": 42177 }, { "epoch": 0.7469528527850461, "grad_norm": 0.6360055208206177, "learning_rate": 4.761484145795586e-06, "loss": 0.0575, "step": 42178 }, { "epoch": 0.7469705623220745, "grad_norm": 0.5063155293464661, "learning_rate": 4.76085538896254e-06, "loss": 0.0667, "step": 42179 }, { "epoch": 0.7469882718591029, "grad_norm": 0.48755893111228943, "learning_rate": 4.760226665815241e-06, "loss": 0.0378, "step": 42180 }, { "epoch": 0.7470059813961314, "grad_norm": 0.4030551612377167, "learning_rate": 4.759597976355759e-06, "loss": 0.0403, "step": 42181 }, { "epoch": 0.7470236909331598, "grad_norm": 0.5823454856872559, "learning_rate": 4.75896932058617e-06, "loss": 0.0799, "step": 42182 }, { "epoch": 0.7470414004701882, "grad_norm": 1.0680850744247437, "learning_rate": 4.75834069850853e-06, "loss": 0.0821, "step": 42183 }, { "epoch": 0.7470591100072166, "grad_norm": 1.1156681776046753, "learning_rate": 4.757712110124913e-06, "loss": 0.0901, "step": 42184 }, { "epoch": 0.7470768195442451, "grad_norm": 0.8245987296104431, "learning_rate": 4.757083555437388e-06, "loss": 0.081, "step": 42185 }, { "epoch": 0.7470945290812735, "grad_norm": 0.6603425741195679, "learning_rate": 4.756455034448022e-06, "loss": 0.0756, "step": 42186 }, { "epoch": 0.7471122386183019, "grad_norm": 0.5657682418823242, "learning_rate": 4.755826547158882e-06, "loss": 0.0724, "step": 42187 }, { "epoch": 0.7471299481553304, "grad_norm": 0.6865501999855042, "learning_rate": 4.755198093572038e-06, "loss": 0.077, "step": 42188 }, { "epoch": 0.7471476576923588, "grad_norm": 0.48485615849494934, "learning_rate": 4.754569673689561e-06, "loss": 0.0371, "step": 42189 }, { "epoch": 0.7471653672293872, "grad_norm": 0.8501780033111572, "learning_rate": 4.753941287513507e-06, "loss": 0.0543, "step": 42190 }, { "epoch": 0.7471830767664156, "grad_norm": 0.4706513583660126, "learning_rate": 4.753312935045951e-06, "loss": 0.0608, "step": 42191 }, { "epoch": 0.7472007863034441, "grad_norm": 0.5337211489677429, "learning_rate": 4.752684616288959e-06, "loss": 0.0584, "step": 42192 }, { "epoch": 0.7472184958404725, "grad_norm": 0.6948811411857605, "learning_rate": 4.7520563312446035e-06, "loss": 0.0541, "step": 42193 }, { "epoch": 0.7472362053775009, "grad_norm": 0.7197611927986145, "learning_rate": 4.75142807991494e-06, "loss": 0.0605, "step": 42194 }, { "epoch": 0.7472539149145293, "grad_norm": 0.8369245529174805, "learning_rate": 4.750799862302042e-06, "loss": 0.0564, "step": 42195 }, { "epoch": 0.7472716244515578, "grad_norm": 0.5357252955436707, "learning_rate": 4.750171678407977e-06, "loss": 0.0458, "step": 42196 }, { "epoch": 0.7472893339885862, "grad_norm": 0.6272132992744446, "learning_rate": 4.749543528234808e-06, "loss": 0.0647, "step": 42197 }, { "epoch": 0.7473070435256146, "grad_norm": 0.27035874128341675, "learning_rate": 4.748915411784606e-06, "loss": 0.0435, "step": 42198 }, { "epoch": 0.747324753062643, "grad_norm": 0.8534774780273438, "learning_rate": 4.7482873290594355e-06, "loss": 0.0482, "step": 42199 }, { "epoch": 0.7473424625996715, "grad_norm": 0.45929497480392456, "learning_rate": 4.7476592800613675e-06, "loss": 0.0362, "step": 42200 }, { "epoch": 0.7473601721366999, "grad_norm": 0.6801436543464661, "learning_rate": 4.7470312647924564e-06, "loss": 0.0635, "step": 42201 }, { "epoch": 0.7473778816737283, "grad_norm": 0.7498494982719421, "learning_rate": 4.7464032832547775e-06, "loss": 0.0581, "step": 42202 }, { "epoch": 0.7473955912107568, "grad_norm": 0.7753376364707947, "learning_rate": 4.745775335450395e-06, "loss": 0.0659, "step": 42203 }, { "epoch": 0.7474133007477852, "grad_norm": 0.5762251615524292, "learning_rate": 4.7451474213813745e-06, "loss": 0.064, "step": 42204 }, { "epoch": 0.7474310102848136, "grad_norm": 0.49978742003440857, "learning_rate": 4.744519541049781e-06, "loss": 0.0673, "step": 42205 }, { "epoch": 0.747448719821842, "grad_norm": 0.48270219564437866, "learning_rate": 4.743891694457681e-06, "loss": 0.0379, "step": 42206 }, { "epoch": 0.7474664293588705, "grad_norm": 0.8296712636947632, "learning_rate": 4.7432638816071445e-06, "loss": 0.0478, "step": 42207 }, { "epoch": 0.7474841388958989, "grad_norm": 0.6433421969413757, "learning_rate": 4.742636102500229e-06, "loss": 0.0651, "step": 42208 }, { "epoch": 0.7475018484329273, "grad_norm": 0.7679709196090698, "learning_rate": 4.742008357139002e-06, "loss": 0.0945, "step": 42209 }, { "epoch": 0.7475195579699557, "grad_norm": 0.44802162051200867, "learning_rate": 4.741380645525531e-06, "loss": 0.0644, "step": 42210 }, { "epoch": 0.7475372675069842, "grad_norm": 0.515399694442749, "learning_rate": 4.740752967661884e-06, "loss": 0.046, "step": 42211 }, { "epoch": 0.7475549770440126, "grad_norm": 0.5547638535499573, "learning_rate": 4.740125323550117e-06, "loss": 0.0696, "step": 42212 }, { "epoch": 0.747572686581041, "grad_norm": 0.3949120342731476, "learning_rate": 4.739497713192299e-06, "loss": 0.0628, "step": 42213 }, { "epoch": 0.7475903961180694, "grad_norm": 0.4993453621864319, "learning_rate": 4.738870136590499e-06, "loss": 0.0636, "step": 42214 }, { "epoch": 0.7476081056550979, "grad_norm": 0.4816136360168457, "learning_rate": 4.738242593746775e-06, "loss": 0.0837, "step": 42215 }, { "epoch": 0.7476258151921263, "grad_norm": 0.505366325378418, "learning_rate": 4.737615084663197e-06, "loss": 0.0627, "step": 42216 }, { "epoch": 0.7476435247291547, "grad_norm": 0.7972155213356018, "learning_rate": 4.736987609341827e-06, "loss": 0.0631, "step": 42217 }, { "epoch": 0.7476612342661832, "grad_norm": 0.6069107055664062, "learning_rate": 4.736360167784735e-06, "loss": 0.068, "step": 42218 }, { "epoch": 0.7476789438032116, "grad_norm": 0.46050626039505005, "learning_rate": 4.735732759993974e-06, "loss": 0.0687, "step": 42219 }, { "epoch": 0.74769665334024, "grad_norm": 0.8016921877861023, "learning_rate": 4.735105385971614e-06, "loss": 0.0461, "step": 42220 }, { "epoch": 0.7477143628772684, "grad_norm": 0.5917439460754395, "learning_rate": 4.734478045719725e-06, "loss": 0.0535, "step": 42221 }, { "epoch": 0.747732072414297, "grad_norm": 0.5652559399604797, "learning_rate": 4.73385073924036e-06, "loss": 0.063, "step": 42222 }, { "epoch": 0.7477497819513254, "grad_norm": 0.4162241518497467, "learning_rate": 4.733223466535586e-06, "loss": 0.0433, "step": 42223 }, { "epoch": 0.7477674914883538, "grad_norm": 0.6627316474914551, "learning_rate": 4.732596227607471e-06, "loss": 0.0556, "step": 42224 }, { "epoch": 0.7477852010253822, "grad_norm": 0.500623881816864, "learning_rate": 4.731969022458074e-06, "loss": 0.0662, "step": 42225 }, { "epoch": 0.7478029105624107, "grad_norm": 0.4528179466724396, "learning_rate": 4.731341851089461e-06, "loss": 0.0605, "step": 42226 }, { "epoch": 0.7478206200994391, "grad_norm": 0.4658167362213135, "learning_rate": 4.730714713503694e-06, "loss": 0.0852, "step": 42227 }, { "epoch": 0.7478383296364675, "grad_norm": 1.2056503295898438, "learning_rate": 4.730087609702843e-06, "loss": 0.0777, "step": 42228 }, { "epoch": 0.7478560391734959, "grad_norm": 0.4756706953048706, "learning_rate": 4.72946053968896e-06, "loss": 0.0765, "step": 42229 }, { "epoch": 0.7478737487105244, "grad_norm": 0.5018793344497681, "learning_rate": 4.728833503464113e-06, "loss": 0.0522, "step": 42230 }, { "epoch": 0.7478914582475528, "grad_norm": 0.7532280087471008, "learning_rate": 4.728206501030364e-06, "loss": 0.081, "step": 42231 }, { "epoch": 0.7479091677845812, "grad_norm": 0.5447130799293518, "learning_rate": 4.727579532389781e-06, "loss": 0.0545, "step": 42232 }, { "epoch": 0.7479268773216097, "grad_norm": 0.5974655747413635, "learning_rate": 4.726952597544415e-06, "loss": 0.0817, "step": 42233 }, { "epoch": 0.7479445868586381, "grad_norm": 0.5828266739845276, "learning_rate": 4.7263256964963384e-06, "loss": 0.0646, "step": 42234 }, { "epoch": 0.7479622963956665, "grad_norm": 0.39207425713539124, "learning_rate": 4.7256988292476125e-06, "loss": 0.078, "step": 42235 }, { "epoch": 0.7479800059326949, "grad_norm": 0.28494638204574585, "learning_rate": 4.725071995800302e-06, "loss": 0.0507, "step": 42236 }, { "epoch": 0.7479977154697234, "grad_norm": 0.4090537428855896, "learning_rate": 4.724445196156463e-06, "loss": 0.0537, "step": 42237 }, { "epoch": 0.7480154250067518, "grad_norm": 0.4448944926261902, "learning_rate": 4.723818430318158e-06, "loss": 0.0504, "step": 42238 }, { "epoch": 0.7480331345437802, "grad_norm": 0.365215539932251, "learning_rate": 4.723191698287455e-06, "loss": 0.0639, "step": 42239 }, { "epoch": 0.7480508440808086, "grad_norm": 0.5479032397270203, "learning_rate": 4.722565000066409e-06, "loss": 0.0495, "step": 42240 }, { "epoch": 0.7480685536178371, "grad_norm": 0.6546493172645569, "learning_rate": 4.721938335657084e-06, "loss": 0.0705, "step": 42241 }, { "epoch": 0.7480862631548655, "grad_norm": 0.5934379696846008, "learning_rate": 4.721311705061542e-06, "loss": 0.0754, "step": 42242 }, { "epoch": 0.7481039726918939, "grad_norm": 0.5908936262130737, "learning_rate": 4.7206851082818444e-06, "loss": 0.0709, "step": 42243 }, { "epoch": 0.7481216822289223, "grad_norm": 0.42889833450317383, "learning_rate": 4.720058545320054e-06, "loss": 0.0775, "step": 42244 }, { "epoch": 0.7481393917659508, "grad_norm": 0.2510152757167816, "learning_rate": 4.719432016178231e-06, "loss": 0.0465, "step": 42245 }, { "epoch": 0.7481571013029792, "grad_norm": 0.4768945872783661, "learning_rate": 4.718805520858443e-06, "loss": 0.0474, "step": 42246 }, { "epoch": 0.7481748108400076, "grad_norm": 0.4485439658164978, "learning_rate": 4.718179059362737e-06, "loss": 0.0303, "step": 42247 }, { "epoch": 0.7481925203770361, "grad_norm": 0.3733246326446533, "learning_rate": 4.717552631693184e-06, "loss": 0.053, "step": 42248 }, { "epoch": 0.7482102299140645, "grad_norm": 0.73826003074646, "learning_rate": 4.716926237851842e-06, "loss": 0.0609, "step": 42249 }, { "epoch": 0.7482279394510929, "grad_norm": 0.42082464694976807, "learning_rate": 4.716299877840778e-06, "loss": 0.0478, "step": 42250 }, { "epoch": 0.7482456489881213, "grad_norm": 0.656173825263977, "learning_rate": 4.715673551662043e-06, "loss": 0.0656, "step": 42251 }, { "epoch": 0.7482633585251498, "grad_norm": 0.7837060689926147, "learning_rate": 4.715047259317701e-06, "loss": 0.0651, "step": 42252 }, { "epoch": 0.7482810680621782, "grad_norm": 0.6653664112091064, "learning_rate": 4.714421000809814e-06, "loss": 0.0767, "step": 42253 }, { "epoch": 0.7482987775992066, "grad_norm": 0.606428325176239, "learning_rate": 4.713794776140441e-06, "loss": 0.047, "step": 42254 }, { "epoch": 0.748316487136235, "grad_norm": 0.6224309206008911, "learning_rate": 4.713168585311643e-06, "loss": 0.0547, "step": 42255 }, { "epoch": 0.7483341966732635, "grad_norm": 0.7636090517044067, "learning_rate": 4.712542428325479e-06, "loss": 0.0432, "step": 42256 }, { "epoch": 0.7483519062102919, "grad_norm": 0.5241929888725281, "learning_rate": 4.711916305184016e-06, "loss": 0.0324, "step": 42257 }, { "epoch": 0.7483696157473203, "grad_norm": 0.7752312421798706, "learning_rate": 4.711290215889303e-06, "loss": 0.0468, "step": 42258 }, { "epoch": 0.7483873252843487, "grad_norm": 1.073317289352417, "learning_rate": 4.710664160443403e-06, "loss": 0.08, "step": 42259 }, { "epoch": 0.7484050348213772, "grad_norm": 0.7582350373268127, "learning_rate": 4.710038138848384e-06, "loss": 0.0687, "step": 42260 }, { "epoch": 0.7484227443584056, "grad_norm": 0.7669407725334167, "learning_rate": 4.709412151106293e-06, "loss": 0.0674, "step": 42261 }, { "epoch": 0.748440453895434, "grad_norm": 0.4170116186141968, "learning_rate": 4.70878619721919e-06, "loss": 0.0591, "step": 42262 }, { "epoch": 0.7484581634324625, "grad_norm": 0.441868394613266, "learning_rate": 4.708160277189143e-06, "loss": 0.0354, "step": 42263 }, { "epoch": 0.7484758729694909, "grad_norm": 0.8547893166542053, "learning_rate": 4.7075343910182165e-06, "loss": 0.0431, "step": 42264 }, { "epoch": 0.7484935825065193, "grad_norm": 0.5051900744438171, "learning_rate": 4.7069085387084525e-06, "loss": 0.0564, "step": 42265 }, { "epoch": 0.7485112920435477, "grad_norm": 0.5244079828262329, "learning_rate": 4.70628272026192e-06, "loss": 0.0891, "step": 42266 }, { "epoch": 0.7485290015805762, "grad_norm": 0.6612802147865295, "learning_rate": 4.705656935680681e-06, "loss": 0.0676, "step": 42267 }, { "epoch": 0.7485467111176046, "grad_norm": 0.4517224431037903, "learning_rate": 4.705031184966783e-06, "loss": 0.0606, "step": 42268 }, { "epoch": 0.748564420654633, "grad_norm": 0.7306880354881287, "learning_rate": 4.704405468122292e-06, "loss": 0.0553, "step": 42269 }, { "epoch": 0.7485821301916614, "grad_norm": 0.5814298391342163, "learning_rate": 4.703779785149267e-06, "loss": 0.0532, "step": 42270 }, { "epoch": 0.7485998397286899, "grad_norm": 0.6154618859291077, "learning_rate": 4.703154136049764e-06, "loss": 0.0749, "step": 42271 }, { "epoch": 0.7486175492657183, "grad_norm": 0.5788052082061768, "learning_rate": 4.702528520825843e-06, "loss": 0.0828, "step": 42272 }, { "epoch": 0.7486352588027467, "grad_norm": 0.32512393593788147, "learning_rate": 4.701902939479561e-06, "loss": 0.0606, "step": 42273 }, { "epoch": 0.7486529683397751, "grad_norm": 0.3637157082557678, "learning_rate": 4.701277392012976e-06, "loss": 0.0481, "step": 42274 }, { "epoch": 0.7486706778768036, "grad_norm": 0.6282691359519958, "learning_rate": 4.700651878428153e-06, "loss": 0.0561, "step": 42275 }, { "epoch": 0.748688387413832, "grad_norm": 0.2976115345954895, "learning_rate": 4.700026398727138e-06, "loss": 0.0429, "step": 42276 }, { "epoch": 0.7487060969508604, "grad_norm": 0.7641715407371521, "learning_rate": 4.699400952911996e-06, "loss": 0.0879, "step": 42277 }, { "epoch": 0.7487238064878889, "grad_norm": 0.377483993768692, "learning_rate": 4.698775540984786e-06, "loss": 0.0711, "step": 42278 }, { "epoch": 0.7487415160249173, "grad_norm": 0.5044788122177124, "learning_rate": 4.698150162947558e-06, "loss": 0.0552, "step": 42279 }, { "epoch": 0.7487592255619457, "grad_norm": 0.6362917423248291, "learning_rate": 4.697524818802374e-06, "loss": 0.0626, "step": 42280 }, { "epoch": 0.7487769350989741, "grad_norm": 0.6782045960426331, "learning_rate": 4.696899508551292e-06, "loss": 0.0595, "step": 42281 }, { "epoch": 0.7487946446360026, "grad_norm": 0.5309406518936157, "learning_rate": 4.6962742321963684e-06, "loss": 0.0374, "step": 42282 }, { "epoch": 0.748812354173031, "grad_norm": 0.956784188747406, "learning_rate": 4.69564898973966e-06, "loss": 0.0723, "step": 42283 }, { "epoch": 0.7488300637100594, "grad_norm": 0.5619221925735474, "learning_rate": 4.695023781183224e-06, "loss": 0.0526, "step": 42284 }, { "epoch": 0.7488477732470878, "grad_norm": 0.35935258865356445, "learning_rate": 4.694398606529123e-06, "loss": 0.0514, "step": 42285 }, { "epoch": 0.7488654827841164, "grad_norm": 1.20045006275177, "learning_rate": 4.693773465779402e-06, "loss": 0.0891, "step": 42286 }, { "epoch": 0.7488831923211448, "grad_norm": 0.584959089756012, "learning_rate": 4.693148358936126e-06, "loss": 0.0785, "step": 42287 }, { "epoch": 0.7489009018581732, "grad_norm": 0.4843851923942566, "learning_rate": 4.692523286001348e-06, "loss": 0.0655, "step": 42288 }, { "epoch": 0.7489186113952015, "grad_norm": 0.5730953216552734, "learning_rate": 4.69189824697713e-06, "loss": 0.053, "step": 42289 }, { "epoch": 0.7489363209322301, "grad_norm": 0.6127389073371887, "learning_rate": 4.69127324186552e-06, "loss": 0.0425, "step": 42290 }, { "epoch": 0.7489540304692585, "grad_norm": 0.4264763593673706, "learning_rate": 4.6906482706685745e-06, "loss": 0.0385, "step": 42291 }, { "epoch": 0.7489717400062869, "grad_norm": 0.6879237294197083, "learning_rate": 4.690023333388363e-06, "loss": 0.0554, "step": 42292 }, { "epoch": 0.7489894495433154, "grad_norm": 0.5879608392715454, "learning_rate": 4.689398430026926e-06, "loss": 0.0476, "step": 42293 }, { "epoch": 0.7490071590803438, "grad_norm": 0.4656556248664856, "learning_rate": 4.688773560586327e-06, "loss": 0.0672, "step": 42294 }, { "epoch": 0.7490248686173722, "grad_norm": 0.617967963218689, "learning_rate": 4.688148725068619e-06, "loss": 0.049, "step": 42295 }, { "epoch": 0.7490425781544006, "grad_norm": 0.7296969294548035, "learning_rate": 4.687523923475862e-06, "loss": 0.0602, "step": 42296 }, { "epoch": 0.7490602876914291, "grad_norm": 0.6531862020492554, "learning_rate": 4.686899155810105e-06, "loss": 0.0842, "step": 42297 }, { "epoch": 0.7490779972284575, "grad_norm": 0.7169336080551147, "learning_rate": 4.686274422073407e-06, "loss": 0.0854, "step": 42298 }, { "epoch": 0.7490957067654859, "grad_norm": 0.3393191993236542, "learning_rate": 4.685649722267827e-06, "loss": 0.0399, "step": 42299 }, { "epoch": 0.7491134163025143, "grad_norm": 0.6809661984443665, "learning_rate": 4.685025056395407e-06, "loss": 0.0562, "step": 42300 }, { "epoch": 0.7491311258395428, "grad_norm": 0.3056606948375702, "learning_rate": 4.684400424458216e-06, "loss": 0.0379, "step": 42301 }, { "epoch": 0.7491488353765712, "grad_norm": 0.5068466663360596, "learning_rate": 4.683775826458304e-06, "loss": 0.0524, "step": 42302 }, { "epoch": 0.7491665449135996, "grad_norm": 0.657017707824707, "learning_rate": 4.683151262397731e-06, "loss": 0.0632, "step": 42303 }, { "epoch": 0.749184254450628, "grad_norm": 1.2590688467025757, "learning_rate": 4.682526732278543e-06, "loss": 0.0842, "step": 42304 }, { "epoch": 0.7492019639876565, "grad_norm": 0.6573988199234009, "learning_rate": 4.681902236102798e-06, "loss": 0.0555, "step": 42305 }, { "epoch": 0.7492196735246849, "grad_norm": 0.485245019197464, "learning_rate": 4.681277773872551e-06, "loss": 0.0492, "step": 42306 }, { "epoch": 0.7492373830617133, "grad_norm": 0.6424279808998108, "learning_rate": 4.680653345589861e-06, "loss": 0.0548, "step": 42307 }, { "epoch": 0.7492550925987418, "grad_norm": 0.4135310649871826, "learning_rate": 4.680028951256773e-06, "loss": 0.0566, "step": 42308 }, { "epoch": 0.7492728021357702, "grad_norm": 0.6856013536453247, "learning_rate": 4.679404590875345e-06, "loss": 0.0702, "step": 42309 }, { "epoch": 0.7492905116727986, "grad_norm": 0.23268568515777588, "learning_rate": 4.6787802644476344e-06, "loss": 0.0431, "step": 42310 }, { "epoch": 0.749308221209827, "grad_norm": 0.31467175483703613, "learning_rate": 4.678155971975691e-06, "loss": 0.084, "step": 42311 }, { "epoch": 0.7493259307468555, "grad_norm": 0.6944721341133118, "learning_rate": 4.67753171346157e-06, "loss": 0.0682, "step": 42312 }, { "epoch": 0.7493436402838839, "grad_norm": 0.5972419381141663, "learning_rate": 4.6769074889073266e-06, "loss": 0.0819, "step": 42313 }, { "epoch": 0.7493613498209123, "grad_norm": 0.6053023934364319, "learning_rate": 4.676283298315018e-06, "loss": 0.0681, "step": 42314 }, { "epoch": 0.7493790593579407, "grad_norm": 0.6048445701599121, "learning_rate": 4.675659141686689e-06, "loss": 0.071, "step": 42315 }, { "epoch": 0.7493967688949692, "grad_norm": 1.4104410409927368, "learning_rate": 4.6750350190243965e-06, "loss": 0.0592, "step": 42316 }, { "epoch": 0.7494144784319976, "grad_norm": 0.576119065284729, "learning_rate": 4.674410930330198e-06, "loss": 0.0814, "step": 42317 }, { "epoch": 0.749432187969026, "grad_norm": 0.5555351376533508, "learning_rate": 4.67378687560614e-06, "loss": 0.0828, "step": 42318 }, { "epoch": 0.7494498975060544, "grad_norm": 0.406101793050766, "learning_rate": 4.673162854854279e-06, "loss": 0.0546, "step": 42319 }, { "epoch": 0.7494676070430829, "grad_norm": 0.7811852097511292, "learning_rate": 4.672538868076667e-06, "loss": 0.0352, "step": 42320 }, { "epoch": 0.7494853165801113, "grad_norm": 0.5382381677627563, "learning_rate": 4.671914915275358e-06, "loss": 0.0534, "step": 42321 }, { "epoch": 0.7495030261171397, "grad_norm": 0.8005706667900085, "learning_rate": 4.671290996452405e-06, "loss": 0.0554, "step": 42322 }, { "epoch": 0.7495207356541682, "grad_norm": 0.2769342064857483, "learning_rate": 4.6706671116098585e-06, "loss": 0.0621, "step": 42323 }, { "epoch": 0.7495384451911966, "grad_norm": 0.41887107491493225, "learning_rate": 4.6700432607497775e-06, "loss": 0.0405, "step": 42324 }, { "epoch": 0.749556154728225, "grad_norm": 0.6600014567375183, "learning_rate": 4.669419443874206e-06, "loss": 0.054, "step": 42325 }, { "epoch": 0.7495738642652534, "grad_norm": 0.6247511506080627, "learning_rate": 4.6687956609851984e-06, "loss": 0.0703, "step": 42326 }, { "epoch": 0.7495915738022819, "grad_norm": 0.7254998683929443, "learning_rate": 4.6681719120848086e-06, "loss": 0.0539, "step": 42327 }, { "epoch": 0.7496092833393103, "grad_norm": 1.0709950923919678, "learning_rate": 4.667548197175092e-06, "loss": 0.0743, "step": 42328 }, { "epoch": 0.7496269928763387, "grad_norm": 0.5673081278800964, "learning_rate": 4.666924516258087e-06, "loss": 0.0452, "step": 42329 }, { "epoch": 0.7496447024133671, "grad_norm": 0.3010053336620331, "learning_rate": 4.6663008693358615e-06, "loss": 0.0586, "step": 42330 }, { "epoch": 0.7496624119503956, "grad_norm": 0.720593273639679, "learning_rate": 4.6656772564104634e-06, "loss": 0.0652, "step": 42331 }, { "epoch": 0.749680121487424, "grad_norm": 0.5704348087310791, "learning_rate": 4.6650536774839404e-06, "loss": 0.0695, "step": 42332 }, { "epoch": 0.7496978310244524, "grad_norm": 0.7326341271400452, "learning_rate": 4.664430132558344e-06, "loss": 0.0628, "step": 42333 }, { "epoch": 0.7497155405614808, "grad_norm": 0.9207006096839905, "learning_rate": 4.6638066216357265e-06, "loss": 0.097, "step": 42334 }, { "epoch": 0.7497332500985093, "grad_norm": 0.685469388961792, "learning_rate": 4.663183144718144e-06, "loss": 0.0478, "step": 42335 }, { "epoch": 0.7497509596355377, "grad_norm": 0.6297822594642639, "learning_rate": 4.66255970180764e-06, "loss": 0.058, "step": 42336 }, { "epoch": 0.7497686691725661, "grad_norm": 0.48049139976501465, "learning_rate": 4.6619362929062695e-06, "loss": 0.0513, "step": 42337 }, { "epoch": 0.7497863787095946, "grad_norm": 0.4820007383823395, "learning_rate": 4.661312918016082e-06, "loss": 0.0546, "step": 42338 }, { "epoch": 0.749804088246623, "grad_norm": 0.379473477602005, "learning_rate": 4.6606895771391284e-06, "loss": 0.0519, "step": 42339 }, { "epoch": 0.7498217977836514, "grad_norm": 0.6538172960281372, "learning_rate": 4.660066270277462e-06, "loss": 0.0495, "step": 42340 }, { "epoch": 0.7498395073206798, "grad_norm": 0.5951629281044006, "learning_rate": 4.659442997433132e-06, "loss": 0.0598, "step": 42341 }, { "epoch": 0.7498572168577083, "grad_norm": 0.6112836003303528, "learning_rate": 4.658819758608192e-06, "loss": 0.0469, "step": 42342 }, { "epoch": 0.7498749263947367, "grad_norm": 0.7184145450592041, "learning_rate": 4.658196553804684e-06, "loss": 0.0695, "step": 42343 }, { "epoch": 0.7498926359317651, "grad_norm": 0.5556039810180664, "learning_rate": 4.657573383024664e-06, "loss": 0.0397, "step": 42344 }, { "epoch": 0.7499103454687935, "grad_norm": 0.6808112859725952, "learning_rate": 4.656950246270182e-06, "loss": 0.0631, "step": 42345 }, { "epoch": 0.749928055005822, "grad_norm": 0.7734668850898743, "learning_rate": 4.656327143543292e-06, "loss": 0.0682, "step": 42346 }, { "epoch": 0.7499457645428504, "grad_norm": 0.8591267466545105, "learning_rate": 4.655704074846034e-06, "loss": 0.0505, "step": 42347 }, { "epoch": 0.7499634740798788, "grad_norm": 0.6075395345687866, "learning_rate": 4.655081040180465e-06, "loss": 0.0561, "step": 42348 }, { "epoch": 0.7499811836169072, "grad_norm": 0.4479876756668091, "learning_rate": 4.654458039548632e-06, "loss": 0.0763, "step": 42349 }, { "epoch": 0.7499988931539358, "grad_norm": 0.5891269445419312, "learning_rate": 4.653835072952585e-06, "loss": 0.0704, "step": 42350 }, { "epoch": 0.7500166026909642, "grad_norm": 0.5017289519309998, "learning_rate": 4.653212140394375e-06, "loss": 0.0784, "step": 42351 }, { "epoch": 0.7500343122279926, "grad_norm": 0.781653881072998, "learning_rate": 4.652589241876049e-06, "loss": 0.0524, "step": 42352 }, { "epoch": 0.7500520217650211, "grad_norm": 0.5591163039207458, "learning_rate": 4.651966377399665e-06, "loss": 0.0861, "step": 42353 }, { "epoch": 0.7500697313020495, "grad_norm": 0.4227142930030823, "learning_rate": 4.65134354696726e-06, "loss": 0.0626, "step": 42354 }, { "epoch": 0.7500874408390779, "grad_norm": 0.6791561245918274, "learning_rate": 4.650720750580887e-06, "loss": 0.0554, "step": 42355 }, { "epoch": 0.7501051503761063, "grad_norm": 0.4891774356365204, "learning_rate": 4.650097988242601e-06, "loss": 0.0459, "step": 42356 }, { "epoch": 0.7501228599131348, "grad_norm": 1.0326564311981201, "learning_rate": 4.64947525995444e-06, "loss": 0.0977, "step": 42357 }, { "epoch": 0.7501405694501632, "grad_norm": 0.32045435905456543, "learning_rate": 4.648852565718456e-06, "loss": 0.0618, "step": 42358 }, { "epoch": 0.7501582789871916, "grad_norm": 0.8894460797309875, "learning_rate": 4.648229905536703e-06, "loss": 0.0608, "step": 42359 }, { "epoch": 0.75017598852422, "grad_norm": 0.6678513288497925, "learning_rate": 4.647607279411232e-06, "loss": 0.0704, "step": 42360 }, { "epoch": 0.7501936980612485, "grad_norm": 0.4496300518512726, "learning_rate": 4.6469846873440815e-06, "loss": 0.0496, "step": 42361 }, { "epoch": 0.7502114075982769, "grad_norm": 0.935306191444397, "learning_rate": 4.646362129337303e-06, "loss": 0.0783, "step": 42362 }, { "epoch": 0.7502291171353053, "grad_norm": 0.6638206839561462, "learning_rate": 4.645739605392951e-06, "loss": 0.0616, "step": 42363 }, { "epoch": 0.7502468266723337, "grad_norm": 0.7351877093315125, "learning_rate": 4.645117115513066e-06, "loss": 0.0467, "step": 42364 }, { "epoch": 0.7502645362093622, "grad_norm": 0.6971119046211243, "learning_rate": 4.644494659699696e-06, "loss": 0.0531, "step": 42365 }, { "epoch": 0.7502822457463906, "grad_norm": 0.37508493661880493, "learning_rate": 4.643872237954891e-06, "loss": 0.0426, "step": 42366 }, { "epoch": 0.750299955283419, "grad_norm": 0.608115017414093, "learning_rate": 4.6432498502807e-06, "loss": 0.0363, "step": 42367 }, { "epoch": 0.7503176648204475, "grad_norm": 0.5562887191772461, "learning_rate": 4.64262749667917e-06, "loss": 0.0817, "step": 42368 }, { "epoch": 0.7503353743574759, "grad_norm": 0.7392021417617798, "learning_rate": 4.642005177152347e-06, "loss": 0.0453, "step": 42369 }, { "epoch": 0.7503530838945043, "grad_norm": 0.46875229477882385, "learning_rate": 4.641382891702283e-06, "loss": 0.0412, "step": 42370 }, { "epoch": 0.7503707934315327, "grad_norm": 0.44627830386161804, "learning_rate": 4.640760640331018e-06, "loss": 0.0558, "step": 42371 }, { "epoch": 0.7503885029685612, "grad_norm": 0.8186230659484863, "learning_rate": 4.640138423040603e-06, "loss": 0.0902, "step": 42372 }, { "epoch": 0.7504062125055896, "grad_norm": 0.39437469840049744, "learning_rate": 4.6395162398330835e-06, "loss": 0.0303, "step": 42373 }, { "epoch": 0.750423922042618, "grad_norm": 0.7429885268211365, "learning_rate": 4.638894090710513e-06, "loss": 0.0201, "step": 42374 }, { "epoch": 0.7504416315796464, "grad_norm": 0.7512205243110657, "learning_rate": 4.63827197567493e-06, "loss": 0.0597, "step": 42375 }, { "epoch": 0.7504593411166749, "grad_norm": 0.4489649534225464, "learning_rate": 4.637649894728382e-06, "loss": 0.0487, "step": 42376 }, { "epoch": 0.7504770506537033, "grad_norm": 0.450318306684494, "learning_rate": 4.637027847872918e-06, "loss": 0.0636, "step": 42377 }, { "epoch": 0.7504947601907317, "grad_norm": 0.6603557467460632, "learning_rate": 4.636405835110585e-06, "loss": 0.0435, "step": 42378 }, { "epoch": 0.7505124697277601, "grad_norm": 0.7976565361022949, "learning_rate": 4.635783856443428e-06, "loss": 0.0777, "step": 42379 }, { "epoch": 0.7505301792647886, "grad_norm": 0.09987853467464447, "learning_rate": 4.635161911873495e-06, "loss": 0.0369, "step": 42380 }, { "epoch": 0.750547888801817, "grad_norm": 0.6324062943458557, "learning_rate": 4.6345400014028345e-06, "loss": 0.049, "step": 42381 }, { "epoch": 0.7505655983388454, "grad_norm": 0.6444305181503296, "learning_rate": 4.633918125033484e-06, "loss": 0.0521, "step": 42382 }, { "epoch": 0.7505833078758739, "grad_norm": 0.9603720307350159, "learning_rate": 4.633296282767497e-06, "loss": 0.0797, "step": 42383 }, { "epoch": 0.7506010174129023, "grad_norm": 0.6743705868721008, "learning_rate": 4.632674474606914e-06, "loss": 0.0616, "step": 42384 }, { "epoch": 0.7506187269499307, "grad_norm": 0.5974041223526001, "learning_rate": 4.6320527005537885e-06, "loss": 0.0745, "step": 42385 }, { "epoch": 0.7506364364869591, "grad_norm": 0.5078158974647522, "learning_rate": 4.631430960610156e-06, "loss": 0.0449, "step": 42386 }, { "epoch": 0.7506541460239876, "grad_norm": 0.7543430328369141, "learning_rate": 4.630809254778062e-06, "loss": 0.0555, "step": 42387 }, { "epoch": 0.750671855561016, "grad_norm": 0.5393033027648926, "learning_rate": 4.630187583059569e-06, "loss": 0.0635, "step": 42388 }, { "epoch": 0.7506895650980444, "grad_norm": 0.48130130767822266, "learning_rate": 4.629565945456702e-06, "loss": 0.039, "step": 42389 }, { "epoch": 0.7507072746350728, "grad_norm": 0.6187483668327332, "learning_rate": 4.628944341971515e-06, "loss": 0.0962, "step": 42390 }, { "epoch": 0.7507249841721013, "grad_norm": 0.5787602066993713, "learning_rate": 4.628322772606053e-06, "loss": 0.0585, "step": 42391 }, { "epoch": 0.7507426937091297, "grad_norm": 0.5915601253509521, "learning_rate": 4.627701237362364e-06, "loss": 0.0867, "step": 42392 }, { "epoch": 0.7507604032461581, "grad_norm": 0.3801364600658417, "learning_rate": 4.6270797362424855e-06, "loss": 0.0456, "step": 42393 }, { "epoch": 0.7507781127831865, "grad_norm": 0.6783720254898071, "learning_rate": 4.626458269248464e-06, "loss": 0.047, "step": 42394 }, { "epoch": 0.750795822320215, "grad_norm": 0.9435157775878906, "learning_rate": 4.625836836382345e-06, "loss": 0.055, "step": 42395 }, { "epoch": 0.7508135318572434, "grad_norm": 0.8194487690925598, "learning_rate": 4.6252154376461745e-06, "loss": 0.0549, "step": 42396 }, { "epoch": 0.7508312413942718, "grad_norm": 1.0606083869934082, "learning_rate": 4.6245940730419965e-06, "loss": 0.0784, "step": 42397 }, { "epoch": 0.7508489509313003, "grad_norm": 0.4875083565711975, "learning_rate": 4.6239727425718545e-06, "loss": 0.0755, "step": 42398 }, { "epoch": 0.7508666604683287, "grad_norm": 0.401227742433548, "learning_rate": 4.623351446237796e-06, "loss": 0.0506, "step": 42399 }, { "epoch": 0.7508843700053571, "grad_norm": 0.66880863904953, "learning_rate": 4.622730184041858e-06, "loss": 0.0492, "step": 42400 }, { "epoch": 0.7509020795423855, "grad_norm": 0.4112367033958435, "learning_rate": 4.6221089559860875e-06, "loss": 0.0397, "step": 42401 }, { "epoch": 0.750919789079414, "grad_norm": 0.50127774477005, "learning_rate": 4.621487762072534e-06, "loss": 0.0441, "step": 42402 }, { "epoch": 0.7509374986164424, "grad_norm": 0.5574162602424622, "learning_rate": 4.620866602303232e-06, "loss": 0.0502, "step": 42403 }, { "epoch": 0.7509552081534708, "grad_norm": 0.6006271243095398, "learning_rate": 4.620245476680229e-06, "loss": 0.0383, "step": 42404 }, { "epoch": 0.7509729176904992, "grad_norm": 0.41393283009529114, "learning_rate": 4.619624385205568e-06, "loss": 0.0662, "step": 42405 }, { "epoch": 0.7509906272275277, "grad_norm": 0.4879499077796936, "learning_rate": 4.619003327881291e-06, "loss": 0.0387, "step": 42406 }, { "epoch": 0.7510083367645561, "grad_norm": 0.46915116906166077, "learning_rate": 4.618382304709446e-06, "loss": 0.0623, "step": 42407 }, { "epoch": 0.7510260463015845, "grad_norm": 0.7570828199386597, "learning_rate": 4.617761315692073e-06, "loss": 0.0639, "step": 42408 }, { "epoch": 0.7510437558386129, "grad_norm": 0.5138211250305176, "learning_rate": 4.617140360831214e-06, "loss": 0.0456, "step": 42409 }, { "epoch": 0.7510614653756414, "grad_norm": 1.2139716148376465, "learning_rate": 4.6165194401289175e-06, "loss": 0.0781, "step": 42410 }, { "epoch": 0.7510791749126698, "grad_norm": 0.560845673084259, "learning_rate": 4.615898553587218e-06, "loss": 0.0508, "step": 42411 }, { "epoch": 0.7510968844496982, "grad_norm": 0.42620572447776794, "learning_rate": 4.615277701208162e-06, "loss": 0.0494, "step": 42412 }, { "epoch": 0.7511145939867268, "grad_norm": 0.7310308814048767, "learning_rate": 4.614656882993796e-06, "loss": 0.0398, "step": 42413 }, { "epoch": 0.7511323035237552, "grad_norm": 0.704191267490387, "learning_rate": 4.614036098946154e-06, "loss": 0.0479, "step": 42414 }, { "epoch": 0.7511500130607836, "grad_norm": 0.7420120239257812, "learning_rate": 4.6134153490672835e-06, "loss": 0.0885, "step": 42415 }, { "epoch": 0.751167722597812, "grad_norm": 0.6192424893379211, "learning_rate": 4.61279463335922e-06, "loss": 0.0472, "step": 42416 }, { "epoch": 0.7511854321348405, "grad_norm": 0.8974308967590332, "learning_rate": 4.612173951824022e-06, "loss": 0.0557, "step": 42417 }, { "epoch": 0.7512031416718689, "grad_norm": 0.5890372395515442, "learning_rate": 4.611553304463715e-06, "loss": 0.0564, "step": 42418 }, { "epoch": 0.7512208512088973, "grad_norm": 0.4213404655456543, "learning_rate": 4.610932691280347e-06, "loss": 0.0475, "step": 42419 }, { "epoch": 0.7512385607459257, "grad_norm": 0.24786920845508575, "learning_rate": 4.610312112275966e-06, "loss": 0.0174, "step": 42420 }, { "epoch": 0.7512562702829542, "grad_norm": 0.7068552374839783, "learning_rate": 4.6096915674526016e-06, "loss": 0.0827, "step": 42421 }, { "epoch": 0.7512739798199826, "grad_norm": 0.6683940291404724, "learning_rate": 4.609071056812301e-06, "loss": 0.0768, "step": 42422 }, { "epoch": 0.751291689357011, "grad_norm": 0.8344973921775818, "learning_rate": 4.608450580357106e-06, "loss": 0.0528, "step": 42423 }, { "epoch": 0.7513093988940394, "grad_norm": 0.6820783615112305, "learning_rate": 4.607830138089057e-06, "loss": 0.056, "step": 42424 }, { "epoch": 0.7513271084310679, "grad_norm": 0.5599538683891296, "learning_rate": 4.607209730010196e-06, "loss": 0.0427, "step": 42425 }, { "epoch": 0.7513448179680963, "grad_norm": 0.6915649175643921, "learning_rate": 4.606589356122566e-06, "loss": 0.0502, "step": 42426 }, { "epoch": 0.7513625275051247, "grad_norm": 0.7035432457923889, "learning_rate": 4.605969016428207e-06, "loss": 0.0517, "step": 42427 }, { "epoch": 0.7513802370421532, "grad_norm": 0.6145403981208801, "learning_rate": 4.605348710929155e-06, "loss": 0.0491, "step": 42428 }, { "epoch": 0.7513979465791816, "grad_norm": 0.882212221622467, "learning_rate": 4.604728439627455e-06, "loss": 0.0874, "step": 42429 }, { "epoch": 0.75141565611621, "grad_norm": 0.5620045065879822, "learning_rate": 4.604108202525147e-06, "loss": 0.0556, "step": 42430 }, { "epoch": 0.7514333656532384, "grad_norm": 0.40601059794425964, "learning_rate": 4.603487999624276e-06, "loss": 0.0272, "step": 42431 }, { "epoch": 0.7514510751902669, "grad_norm": 0.8331584334373474, "learning_rate": 4.602867830926873e-06, "loss": 0.0687, "step": 42432 }, { "epoch": 0.7514687847272953, "grad_norm": 0.9703249335289001, "learning_rate": 4.602247696434984e-06, "loss": 0.0735, "step": 42433 }, { "epoch": 0.7514864942643237, "grad_norm": 0.706831693649292, "learning_rate": 4.601627596150646e-06, "loss": 0.0508, "step": 42434 }, { "epoch": 0.7515042038013521, "grad_norm": 0.6662231087684631, "learning_rate": 4.601007530075904e-06, "loss": 0.034, "step": 42435 }, { "epoch": 0.7515219133383806, "grad_norm": 0.2349141538143158, "learning_rate": 4.600387498212795e-06, "loss": 0.0411, "step": 42436 }, { "epoch": 0.751539622875409, "grad_norm": 0.65509033203125, "learning_rate": 4.59976750056336e-06, "loss": 0.0539, "step": 42437 }, { "epoch": 0.7515573324124374, "grad_norm": 0.4792332053184509, "learning_rate": 4.5991475371296415e-06, "loss": 0.0623, "step": 42438 }, { "epoch": 0.7515750419494658, "grad_norm": 0.7541734576225281, "learning_rate": 4.59852760791367e-06, "loss": 0.0863, "step": 42439 }, { "epoch": 0.7515927514864943, "grad_norm": 0.5175877213478088, "learning_rate": 4.597907712917492e-06, "loss": 0.0582, "step": 42440 }, { "epoch": 0.7516104610235227, "grad_norm": 1.1691787242889404, "learning_rate": 4.59728785214315e-06, "loss": 0.0821, "step": 42441 }, { "epoch": 0.7516281705605511, "grad_norm": 0.9753322601318359, "learning_rate": 4.596668025592675e-06, "loss": 0.0826, "step": 42442 }, { "epoch": 0.7516458800975796, "grad_norm": 0.5737667679786682, "learning_rate": 4.5960482332681084e-06, "loss": 0.0597, "step": 42443 }, { "epoch": 0.751663589634608, "grad_norm": 0.3891089856624603, "learning_rate": 4.595428475171492e-06, "loss": 0.0364, "step": 42444 }, { "epoch": 0.7516812991716364, "grad_norm": 0.629346489906311, "learning_rate": 4.594808751304863e-06, "loss": 0.0599, "step": 42445 }, { "epoch": 0.7516990087086648, "grad_norm": 0.608871340751648, "learning_rate": 4.59418906167026e-06, "loss": 0.061, "step": 42446 }, { "epoch": 0.7517167182456933, "grad_norm": 0.570597767829895, "learning_rate": 4.593569406269724e-06, "loss": 0.0838, "step": 42447 }, { "epoch": 0.7517344277827217, "grad_norm": 0.5261878967285156, "learning_rate": 4.5929497851052915e-06, "loss": 0.073, "step": 42448 }, { "epoch": 0.7517521373197501, "grad_norm": 0.41391050815582275, "learning_rate": 4.5923301981790066e-06, "loss": 0.0569, "step": 42449 }, { "epoch": 0.7517698468567785, "grad_norm": 0.4579255282878876, "learning_rate": 4.591710645492898e-06, "loss": 0.0618, "step": 42450 }, { "epoch": 0.751787556393807, "grad_norm": 0.4789068102836609, "learning_rate": 4.591091127049007e-06, "loss": 0.0887, "step": 42451 }, { "epoch": 0.7518052659308354, "grad_norm": 0.4161949157714844, "learning_rate": 4.59047164284938e-06, "loss": 0.0646, "step": 42452 }, { "epoch": 0.7518229754678638, "grad_norm": 1.050262451171875, "learning_rate": 4.589852192896038e-06, "loss": 0.084, "step": 42453 }, { "epoch": 0.7518406850048922, "grad_norm": 0.5671325922012329, "learning_rate": 4.589232777191034e-06, "loss": 0.0523, "step": 42454 }, { "epoch": 0.7518583945419207, "grad_norm": 0.6086000800132751, "learning_rate": 4.588613395736402e-06, "loss": 0.0812, "step": 42455 }, { "epoch": 0.7518761040789491, "grad_norm": 0.4176611602306366, "learning_rate": 4.5879940485341826e-06, "loss": 0.0588, "step": 42456 }, { "epoch": 0.7518938136159775, "grad_norm": 0.45273932814598083, "learning_rate": 4.587374735586406e-06, "loss": 0.0538, "step": 42457 }, { "epoch": 0.751911523153006, "grad_norm": 0.904344379901886, "learning_rate": 4.586755456895114e-06, "loss": 0.0585, "step": 42458 }, { "epoch": 0.7519292326900344, "grad_norm": 0.8814077973365784, "learning_rate": 4.586136212462346e-06, "loss": 0.0952, "step": 42459 }, { "epoch": 0.7519469422270628, "grad_norm": 0.6851930022239685, "learning_rate": 4.5855170022901325e-06, "loss": 0.0564, "step": 42460 }, { "epoch": 0.7519646517640912, "grad_norm": 0.3998415768146515, "learning_rate": 4.584897826380515e-06, "loss": 0.0448, "step": 42461 }, { "epoch": 0.7519823613011197, "grad_norm": 0.5541711449623108, "learning_rate": 4.58427868473553e-06, "loss": 0.0481, "step": 42462 }, { "epoch": 0.7520000708381481, "grad_norm": 1.0160444974899292, "learning_rate": 4.583659577357215e-06, "loss": 0.0882, "step": 42463 }, { "epoch": 0.7520177803751765, "grad_norm": 0.2998982071876526, "learning_rate": 4.583040504247606e-06, "loss": 0.0308, "step": 42464 }, { "epoch": 0.7520354899122049, "grad_norm": 0.4857737421989441, "learning_rate": 4.5824214654087415e-06, "loss": 0.044, "step": 42465 }, { "epoch": 0.7520531994492334, "grad_norm": 0.6921464800834656, "learning_rate": 4.5818024608426605e-06, "loss": 0.0581, "step": 42466 }, { "epoch": 0.7520709089862618, "grad_norm": 0.4878365099430084, "learning_rate": 4.58118349055139e-06, "loss": 0.0535, "step": 42467 }, { "epoch": 0.7520886185232902, "grad_norm": 0.6554036140441895, "learning_rate": 4.580564554536973e-06, "loss": 0.0621, "step": 42468 }, { "epoch": 0.7521063280603186, "grad_norm": 0.7521005868911743, "learning_rate": 4.579945652801444e-06, "loss": 0.0494, "step": 42469 }, { "epoch": 0.7521240375973471, "grad_norm": 0.5744496583938599, "learning_rate": 4.579326785346844e-06, "loss": 0.0335, "step": 42470 }, { "epoch": 0.7521417471343755, "grad_norm": 0.9568883180618286, "learning_rate": 4.578707952175204e-06, "loss": 0.0841, "step": 42471 }, { "epoch": 0.7521594566714039, "grad_norm": 0.40180763602256775, "learning_rate": 4.578089153288557e-06, "loss": 0.0595, "step": 42472 }, { "epoch": 0.7521771662084324, "grad_norm": 0.7671698927879333, "learning_rate": 4.577470388688944e-06, "loss": 0.0531, "step": 42473 }, { "epoch": 0.7521948757454608, "grad_norm": 0.5608153343200684, "learning_rate": 4.5768516583783975e-06, "loss": 0.0676, "step": 42474 }, { "epoch": 0.7522125852824892, "grad_norm": 0.7097266912460327, "learning_rate": 4.5762329623589576e-06, "loss": 0.0686, "step": 42475 }, { "epoch": 0.7522302948195176, "grad_norm": 0.5403695106506348, "learning_rate": 4.5756143006326545e-06, "loss": 0.0575, "step": 42476 }, { "epoch": 0.7522480043565462, "grad_norm": 0.8683169484138489, "learning_rate": 4.574995673201533e-06, "loss": 0.0699, "step": 42477 }, { "epoch": 0.7522657138935746, "grad_norm": 0.37847328186035156, "learning_rate": 4.574377080067615e-06, "loss": 0.0416, "step": 42478 }, { "epoch": 0.752283423430603, "grad_norm": 0.6924318671226501, "learning_rate": 4.573758521232942e-06, "loss": 0.0692, "step": 42479 }, { "epoch": 0.7523011329676313, "grad_norm": 0.7522851824760437, "learning_rate": 4.573139996699549e-06, "loss": 0.064, "step": 42480 }, { "epoch": 0.7523188425046599, "grad_norm": 0.2407360076904297, "learning_rate": 4.572521506469476e-06, "loss": 0.0568, "step": 42481 }, { "epoch": 0.7523365520416883, "grad_norm": 0.8736274242401123, "learning_rate": 4.571903050544742e-06, "loss": 0.0647, "step": 42482 }, { "epoch": 0.7523542615787167, "grad_norm": 0.4453941881656647, "learning_rate": 4.571284628927399e-06, "loss": 0.0553, "step": 42483 }, { "epoch": 0.752371971115745, "grad_norm": 0.4360561966896057, "learning_rate": 4.570666241619478e-06, "loss": 0.0481, "step": 42484 }, { "epoch": 0.7523896806527736, "grad_norm": 0.42710086703300476, "learning_rate": 4.570047888623005e-06, "loss": 0.0446, "step": 42485 }, { "epoch": 0.752407390189802, "grad_norm": 0.5026708245277405, "learning_rate": 4.569429569940021e-06, "loss": 0.0464, "step": 42486 }, { "epoch": 0.7524250997268304, "grad_norm": 0.7119983434677124, "learning_rate": 4.568811285572558e-06, "loss": 0.0829, "step": 42487 }, { "epoch": 0.7524428092638589, "grad_norm": 0.6010763049125671, "learning_rate": 4.568193035522655e-06, "loss": 0.061, "step": 42488 }, { "epoch": 0.7524605188008873, "grad_norm": 0.5466201901435852, "learning_rate": 4.567574819792339e-06, "loss": 0.0472, "step": 42489 }, { "epoch": 0.7524782283379157, "grad_norm": 0.814970850944519, "learning_rate": 4.566956638383644e-06, "loss": 0.0514, "step": 42490 }, { "epoch": 0.7524959378749441, "grad_norm": 0.6450212001800537, "learning_rate": 4.566338491298607e-06, "loss": 0.0708, "step": 42491 }, { "epoch": 0.7525136474119726, "grad_norm": 0.3587302565574646, "learning_rate": 4.565720378539263e-06, "loss": 0.0645, "step": 42492 }, { "epoch": 0.752531356949001, "grad_norm": 0.585213840007782, "learning_rate": 4.565102300107642e-06, "loss": 0.0912, "step": 42493 }, { "epoch": 0.7525490664860294, "grad_norm": 0.5073739290237427, "learning_rate": 4.564484256005779e-06, "loss": 0.0734, "step": 42494 }, { "epoch": 0.7525667760230578, "grad_norm": 0.26767778396606445, "learning_rate": 4.5638662462357125e-06, "loss": 0.0504, "step": 42495 }, { "epoch": 0.7525844855600863, "grad_norm": 0.5659258961677551, "learning_rate": 4.563248270799466e-06, "loss": 0.0634, "step": 42496 }, { "epoch": 0.7526021950971147, "grad_norm": 1.0124017000198364, "learning_rate": 4.562630329699077e-06, "loss": 0.0982, "step": 42497 }, { "epoch": 0.7526199046341431, "grad_norm": 0.5888335704803467, "learning_rate": 4.562012422936583e-06, "loss": 0.0466, "step": 42498 }, { "epoch": 0.7526376141711715, "grad_norm": 0.538205087184906, "learning_rate": 4.5613945505140075e-06, "loss": 0.0508, "step": 42499 }, { "epoch": 0.7526553237082, "grad_norm": 0.7015752196311951, "learning_rate": 4.560776712433388e-06, "loss": 0.0394, "step": 42500 }, { "epoch": 0.7526730332452284, "grad_norm": 0.560182511806488, "learning_rate": 4.560158908696757e-06, "loss": 0.0544, "step": 42501 }, { "epoch": 0.7526907427822568, "grad_norm": 0.6145251393318176, "learning_rate": 4.5595411393061494e-06, "loss": 0.0705, "step": 42502 }, { "epoch": 0.7527084523192853, "grad_norm": 0.2938063144683838, "learning_rate": 4.558923404263593e-06, "loss": 0.0595, "step": 42503 }, { "epoch": 0.7527261618563137, "grad_norm": 0.7772432565689087, "learning_rate": 4.558305703571123e-06, "loss": 0.0712, "step": 42504 }, { "epoch": 0.7527438713933421, "grad_norm": 0.5489708185195923, "learning_rate": 4.557688037230777e-06, "loss": 0.0443, "step": 42505 }, { "epoch": 0.7527615809303705, "grad_norm": 0.8255888223648071, "learning_rate": 4.557070405244576e-06, "loss": 0.0602, "step": 42506 }, { "epoch": 0.752779290467399, "grad_norm": 0.6347597241401672, "learning_rate": 4.556452807614557e-06, "loss": 0.068, "step": 42507 }, { "epoch": 0.7527970000044274, "grad_norm": 0.8465772271156311, "learning_rate": 4.555835244342751e-06, "loss": 0.0714, "step": 42508 }, { "epoch": 0.7528147095414558, "grad_norm": 0.5742253661155701, "learning_rate": 4.5552177154311965e-06, "loss": 0.0598, "step": 42509 }, { "epoch": 0.7528324190784842, "grad_norm": 0.9714709520339966, "learning_rate": 4.5546002208819145e-06, "loss": 0.0771, "step": 42510 }, { "epoch": 0.7528501286155127, "grad_norm": 1.0626925230026245, "learning_rate": 4.553982760696936e-06, "loss": 0.0426, "step": 42511 }, { "epoch": 0.7528678381525411, "grad_norm": 0.4490976333618164, "learning_rate": 4.553365334878308e-06, "loss": 0.0329, "step": 42512 }, { "epoch": 0.7528855476895695, "grad_norm": 0.547960102558136, "learning_rate": 4.552747943428046e-06, "loss": 0.0486, "step": 42513 }, { "epoch": 0.7529032572265979, "grad_norm": 0.6725027561187744, "learning_rate": 4.552130586348186e-06, "loss": 0.0482, "step": 42514 }, { "epoch": 0.7529209667636264, "grad_norm": 0.48082372546195984, "learning_rate": 4.551513263640761e-06, "loss": 0.048, "step": 42515 }, { "epoch": 0.7529386763006548, "grad_norm": 0.4929211437702179, "learning_rate": 4.550895975307805e-06, "loss": 0.0788, "step": 42516 }, { "epoch": 0.7529563858376832, "grad_norm": 0.28815022110939026, "learning_rate": 4.550278721351339e-06, "loss": 0.0368, "step": 42517 }, { "epoch": 0.7529740953747117, "grad_norm": 0.9330499768257141, "learning_rate": 4.5496615017734e-06, "loss": 0.0693, "step": 42518 }, { "epoch": 0.7529918049117401, "grad_norm": 0.5632593035697937, "learning_rate": 4.549044316576016e-06, "loss": 0.0598, "step": 42519 }, { "epoch": 0.7530095144487685, "grad_norm": 0.6086298227310181, "learning_rate": 4.548427165761221e-06, "loss": 0.0484, "step": 42520 }, { "epoch": 0.7530272239857969, "grad_norm": 0.7791472673416138, "learning_rate": 4.547810049331042e-06, "loss": 0.0722, "step": 42521 }, { "epoch": 0.7530449335228254, "grad_norm": 0.4528487026691437, "learning_rate": 4.547192967287512e-06, "loss": 0.0449, "step": 42522 }, { "epoch": 0.7530626430598538, "grad_norm": 0.7890623211860657, "learning_rate": 4.546575919632664e-06, "loss": 0.0618, "step": 42523 }, { "epoch": 0.7530803525968822, "grad_norm": 0.8626956343650818, "learning_rate": 4.5459589063685195e-06, "loss": 0.0713, "step": 42524 }, { "epoch": 0.7530980621339106, "grad_norm": 0.7278971672058105, "learning_rate": 4.545341927497112e-06, "loss": 0.057, "step": 42525 }, { "epoch": 0.7531157716709391, "grad_norm": 0.4265597462654114, "learning_rate": 4.544724983020473e-06, "loss": 0.0538, "step": 42526 }, { "epoch": 0.7531334812079675, "grad_norm": 0.7007651925086975, "learning_rate": 4.544108072940636e-06, "loss": 0.0483, "step": 42527 }, { "epoch": 0.7531511907449959, "grad_norm": 0.302440881729126, "learning_rate": 4.543491197259621e-06, "loss": 0.0423, "step": 42528 }, { "epoch": 0.7531689002820243, "grad_norm": 0.6751411557197571, "learning_rate": 4.542874355979462e-06, "loss": 0.0567, "step": 42529 }, { "epoch": 0.7531866098190528, "grad_norm": 0.6837232708930969, "learning_rate": 4.542257549102189e-06, "loss": 0.0171, "step": 42530 }, { "epoch": 0.7532043193560812, "grad_norm": 0.37275487184524536, "learning_rate": 4.541640776629831e-06, "loss": 0.075, "step": 42531 }, { "epoch": 0.7532220288931096, "grad_norm": 0.8971781134605408, "learning_rate": 4.5410240385644166e-06, "loss": 0.0725, "step": 42532 }, { "epoch": 0.7532397384301381, "grad_norm": 0.6754270792007446, "learning_rate": 4.5404073349079764e-06, "loss": 0.0662, "step": 42533 }, { "epoch": 0.7532574479671665, "grad_norm": 0.5023847222328186, "learning_rate": 4.539790665662542e-06, "loss": 0.0578, "step": 42534 }, { "epoch": 0.7532751575041949, "grad_norm": 0.5132867693901062, "learning_rate": 4.5391740308301335e-06, "loss": 0.0468, "step": 42535 }, { "epoch": 0.7532928670412233, "grad_norm": 0.5587390065193176, "learning_rate": 4.538557430412785e-06, "loss": 0.0675, "step": 42536 }, { "epoch": 0.7533105765782518, "grad_norm": 0.3856275677680969, "learning_rate": 4.53794086441253e-06, "loss": 0.0506, "step": 42537 }, { "epoch": 0.7533282861152802, "grad_norm": 0.5499317646026611, "learning_rate": 4.537324332831385e-06, "loss": 0.0577, "step": 42538 }, { "epoch": 0.7533459956523086, "grad_norm": 0.1029541939496994, "learning_rate": 4.536707835671384e-06, "loss": 0.0531, "step": 42539 }, { "epoch": 0.753363705189337, "grad_norm": 0.5616805553436279, "learning_rate": 4.536091372934552e-06, "loss": 0.0448, "step": 42540 }, { "epoch": 0.7533814147263656, "grad_norm": 0.5217716693878174, "learning_rate": 4.535474944622932e-06, "loss": 0.0335, "step": 42541 }, { "epoch": 0.753399124263394, "grad_norm": 0.5559911727905273, "learning_rate": 4.534858550738535e-06, "loss": 0.052, "step": 42542 }, { "epoch": 0.7534168338004223, "grad_norm": 0.7590373158454895, "learning_rate": 4.534242191283395e-06, "loss": 0.0633, "step": 42543 }, { "epoch": 0.7534345433374507, "grad_norm": 0.38038021326065063, "learning_rate": 4.533625866259543e-06, "loss": 0.0329, "step": 42544 }, { "epoch": 0.7534522528744793, "grad_norm": 0.6552510857582092, "learning_rate": 4.533009575669001e-06, "loss": 0.0564, "step": 42545 }, { "epoch": 0.7534699624115077, "grad_norm": 0.7032909393310547, "learning_rate": 4.5323933195137975e-06, "loss": 0.0615, "step": 42546 }, { "epoch": 0.753487671948536, "grad_norm": 0.5997182726860046, "learning_rate": 4.5317770977959626e-06, "loss": 0.0567, "step": 42547 }, { "epoch": 0.7535053814855646, "grad_norm": 0.4023149609565735, "learning_rate": 4.531160910517521e-06, "loss": 0.0531, "step": 42548 }, { "epoch": 0.753523091022593, "grad_norm": 0.6582052707672119, "learning_rate": 4.530544757680501e-06, "loss": 0.0475, "step": 42549 }, { "epoch": 0.7535408005596214, "grad_norm": 0.4433117210865021, "learning_rate": 4.5299286392869306e-06, "loss": 0.0622, "step": 42550 }, { "epoch": 0.7535585100966498, "grad_norm": 0.8418727517127991, "learning_rate": 4.529312555338835e-06, "loss": 0.0827, "step": 42551 }, { "epoch": 0.7535762196336783, "grad_norm": 0.5733687877655029, "learning_rate": 4.528696505838247e-06, "loss": 0.0711, "step": 42552 }, { "epoch": 0.7535939291707067, "grad_norm": 0.5588098764419556, "learning_rate": 4.528080490787185e-06, "loss": 0.0492, "step": 42553 }, { "epoch": 0.7536116387077351, "grad_norm": 0.703511655330658, "learning_rate": 4.527464510187677e-06, "loss": 0.0327, "step": 42554 }, { "epoch": 0.7536293482447635, "grad_norm": 0.9388260245323181, "learning_rate": 4.526848564041758e-06, "loss": 0.0453, "step": 42555 }, { "epoch": 0.753647057781792, "grad_norm": 0.7891541123390198, "learning_rate": 4.526232652351442e-06, "loss": 0.0592, "step": 42556 }, { "epoch": 0.7536647673188204, "grad_norm": 0.5274372100830078, "learning_rate": 4.525616775118761e-06, "loss": 0.042, "step": 42557 }, { "epoch": 0.7536824768558488, "grad_norm": 0.5747488737106323, "learning_rate": 4.525000932345742e-06, "loss": 0.0526, "step": 42558 }, { "epoch": 0.7537001863928772, "grad_norm": 0.5703608989715576, "learning_rate": 4.524385124034412e-06, "loss": 0.0768, "step": 42559 }, { "epoch": 0.7537178959299057, "grad_norm": 0.3625422418117523, "learning_rate": 4.5237693501867926e-06, "loss": 0.0481, "step": 42560 }, { "epoch": 0.7537356054669341, "grad_norm": 0.6494209170341492, "learning_rate": 4.5231536108049145e-06, "loss": 0.0487, "step": 42561 }, { "epoch": 0.7537533150039625, "grad_norm": 0.3991282880306244, "learning_rate": 4.522537905890804e-06, "loss": 0.0677, "step": 42562 }, { "epoch": 0.753771024540991, "grad_norm": 0.8045478463172913, "learning_rate": 4.521922235446481e-06, "loss": 0.0728, "step": 42563 }, { "epoch": 0.7537887340780194, "grad_norm": 0.6263943910598755, "learning_rate": 4.521306599473974e-06, "loss": 0.0748, "step": 42564 }, { "epoch": 0.7538064436150478, "grad_norm": 0.7414563894271851, "learning_rate": 4.520690997975308e-06, "loss": 0.0484, "step": 42565 }, { "epoch": 0.7538241531520762, "grad_norm": 0.21267510950565338, "learning_rate": 4.5200754309525124e-06, "loss": 0.0533, "step": 42566 }, { "epoch": 0.7538418626891047, "grad_norm": 0.562168538570404, "learning_rate": 4.519459898407607e-06, "loss": 0.0534, "step": 42567 }, { "epoch": 0.7538595722261331, "grad_norm": 0.8693652153015137, "learning_rate": 4.518844400342615e-06, "loss": 0.082, "step": 42568 }, { "epoch": 0.7538772817631615, "grad_norm": 0.6416771411895752, "learning_rate": 4.518228936759567e-06, "loss": 0.0559, "step": 42569 }, { "epoch": 0.7538949913001899, "grad_norm": 0.5428532361984253, "learning_rate": 4.517613507660485e-06, "loss": 0.0549, "step": 42570 }, { "epoch": 0.7539127008372184, "grad_norm": 0.6826041340827942, "learning_rate": 4.516998113047394e-06, "loss": 0.0544, "step": 42571 }, { "epoch": 0.7539304103742468, "grad_norm": 0.5043799877166748, "learning_rate": 4.516382752922319e-06, "loss": 0.055, "step": 42572 }, { "epoch": 0.7539481199112752, "grad_norm": 0.6426193714141846, "learning_rate": 4.51576742728729e-06, "loss": 0.036, "step": 42573 }, { "epoch": 0.7539658294483036, "grad_norm": 0.8648400902748108, "learning_rate": 4.51515213614432e-06, "loss": 0.0873, "step": 42574 }, { "epoch": 0.7539835389853321, "grad_norm": 0.2773747444152832, "learning_rate": 4.51453687949544e-06, "loss": 0.0506, "step": 42575 }, { "epoch": 0.7540012485223605, "grad_norm": 0.8458093404769897, "learning_rate": 4.513921657342676e-06, "loss": 0.0646, "step": 42576 }, { "epoch": 0.7540189580593889, "grad_norm": 0.40042486786842346, "learning_rate": 4.513306469688042e-06, "loss": 0.0555, "step": 42577 }, { "epoch": 0.7540366675964174, "grad_norm": 0.37979671359062195, "learning_rate": 4.5126913165335734e-06, "loss": 0.043, "step": 42578 }, { "epoch": 0.7540543771334458, "grad_norm": 0.5479522347450256, "learning_rate": 4.512076197881287e-06, "loss": 0.0713, "step": 42579 }, { "epoch": 0.7540720866704742, "grad_norm": 0.4389597475528717, "learning_rate": 4.511461113733216e-06, "loss": 0.0429, "step": 42580 }, { "epoch": 0.7540897962075026, "grad_norm": 0.36932986974716187, "learning_rate": 4.510846064091373e-06, "loss": 0.0602, "step": 42581 }, { "epoch": 0.7541075057445311, "grad_norm": 0.3926013112068176, "learning_rate": 4.510231048957784e-06, "loss": 0.06, "step": 42582 }, { "epoch": 0.7541252152815595, "grad_norm": 1.1561658382415771, "learning_rate": 4.509616068334475e-06, "loss": 0.0622, "step": 42583 }, { "epoch": 0.7541429248185879, "grad_norm": 0.8153685331344604, "learning_rate": 4.509001122223473e-06, "loss": 0.0869, "step": 42584 }, { "epoch": 0.7541606343556163, "grad_norm": 0.49429064989089966, "learning_rate": 4.508386210626791e-06, "loss": 0.0666, "step": 42585 }, { "epoch": 0.7541783438926448, "grad_norm": 0.45129403471946716, "learning_rate": 4.507771333546456e-06, "loss": 0.0365, "step": 42586 }, { "epoch": 0.7541960534296732, "grad_norm": 0.5901055932044983, "learning_rate": 4.507156490984492e-06, "loss": 0.0503, "step": 42587 }, { "epoch": 0.7542137629667016, "grad_norm": 0.6827995777130127, "learning_rate": 4.506541682942924e-06, "loss": 0.0415, "step": 42588 }, { "epoch": 0.75423147250373, "grad_norm": 0.3867833912372589, "learning_rate": 4.505926909423772e-06, "loss": 0.0496, "step": 42589 }, { "epoch": 0.7542491820407585, "grad_norm": 0.80460125207901, "learning_rate": 4.505312170429058e-06, "loss": 0.0916, "step": 42590 }, { "epoch": 0.7542668915777869, "grad_norm": 0.43136459589004517, "learning_rate": 4.504697465960811e-06, "loss": 0.0421, "step": 42591 }, { "epoch": 0.7542846011148153, "grad_norm": 0.6265680193901062, "learning_rate": 4.5040827960210425e-06, "loss": 0.066, "step": 42592 }, { "epoch": 0.7543023106518438, "grad_norm": 0.5408865213394165, "learning_rate": 4.503468160611781e-06, "loss": 0.0463, "step": 42593 }, { "epoch": 0.7543200201888722, "grad_norm": 0.6170075535774231, "learning_rate": 4.502853559735051e-06, "loss": 0.064, "step": 42594 }, { "epoch": 0.7543377297259006, "grad_norm": 0.706719696521759, "learning_rate": 4.5022389933928684e-06, "loss": 0.0517, "step": 42595 }, { "epoch": 0.754355439262929, "grad_norm": 0.6006306409835815, "learning_rate": 4.5016244615872555e-06, "loss": 0.0516, "step": 42596 }, { "epoch": 0.7543731487999575, "grad_norm": 0.4300147294998169, "learning_rate": 4.501009964320236e-06, "loss": 0.0487, "step": 42597 }, { "epoch": 0.7543908583369859, "grad_norm": 0.672419011592865, "learning_rate": 4.500395501593834e-06, "loss": 0.0389, "step": 42598 }, { "epoch": 0.7544085678740143, "grad_norm": 0.8220072984695435, "learning_rate": 4.499781073410067e-06, "loss": 0.0556, "step": 42599 }, { "epoch": 0.7544262774110427, "grad_norm": 0.3874852955341339, "learning_rate": 4.499166679770959e-06, "loss": 0.0548, "step": 42600 }, { "epoch": 0.7544439869480712, "grad_norm": 0.8658773303031921, "learning_rate": 4.4985523206785355e-06, "loss": 0.0484, "step": 42601 }, { "epoch": 0.7544616964850996, "grad_norm": 0.7485105395317078, "learning_rate": 4.497937996134807e-06, "loss": 0.0548, "step": 42602 }, { "epoch": 0.754479406022128, "grad_norm": 0.5080951452255249, "learning_rate": 4.497323706141803e-06, "loss": 0.046, "step": 42603 }, { "epoch": 0.7544971155591564, "grad_norm": 0.6071584224700928, "learning_rate": 4.496709450701539e-06, "loss": 0.107, "step": 42604 }, { "epoch": 0.754514825096185, "grad_norm": 0.5654295086860657, "learning_rate": 4.496095229816044e-06, "loss": 0.0403, "step": 42605 }, { "epoch": 0.7545325346332133, "grad_norm": 0.26077666878700256, "learning_rate": 4.495481043487323e-06, "loss": 0.0433, "step": 42606 }, { "epoch": 0.7545502441702417, "grad_norm": 0.8322702646255493, "learning_rate": 4.494866891717413e-06, "loss": 0.0652, "step": 42607 }, { "epoch": 0.7545679537072703, "grad_norm": 0.5550534129142761, "learning_rate": 4.494252774508333e-06, "loss": 0.0503, "step": 42608 }, { "epoch": 0.7545856632442987, "grad_norm": 0.8184951543807983, "learning_rate": 4.493638691862093e-06, "loss": 0.079, "step": 42609 }, { "epoch": 0.754603372781327, "grad_norm": 0.5868462324142456, "learning_rate": 4.493024643780719e-06, "loss": 0.0514, "step": 42610 }, { "epoch": 0.7546210823183555, "grad_norm": 0.5120995044708252, "learning_rate": 4.492410630266233e-06, "loss": 0.0642, "step": 42611 }, { "epoch": 0.754638791855384, "grad_norm": 0.5418787598609924, "learning_rate": 4.491796651320657e-06, "loss": 0.0574, "step": 42612 }, { "epoch": 0.7546565013924124, "grad_norm": 0.5490065813064575, "learning_rate": 4.491182706946001e-06, "loss": 0.0555, "step": 42613 }, { "epoch": 0.7546742109294408, "grad_norm": 0.6416069865226746, "learning_rate": 4.490568797144293e-06, "loss": 0.0435, "step": 42614 }, { "epoch": 0.7546919204664692, "grad_norm": 0.5950947999954224, "learning_rate": 4.48995492191755e-06, "loss": 0.0372, "step": 42615 }, { "epoch": 0.7547096300034977, "grad_norm": 0.3935777544975281, "learning_rate": 4.489341081267792e-06, "loss": 0.0482, "step": 42616 }, { "epoch": 0.7547273395405261, "grad_norm": 0.4619617462158203, "learning_rate": 4.488727275197039e-06, "loss": 0.027, "step": 42617 }, { "epoch": 0.7547450490775545, "grad_norm": 0.43489089608192444, "learning_rate": 4.4881135037073104e-06, "loss": 0.0442, "step": 42618 }, { "epoch": 0.754762758614583, "grad_norm": 0.14423814415931702, "learning_rate": 4.487499766800629e-06, "loss": 0.0496, "step": 42619 }, { "epoch": 0.7547804681516114, "grad_norm": 0.5919263362884521, "learning_rate": 4.486886064479006e-06, "loss": 0.0559, "step": 42620 }, { "epoch": 0.7547981776886398, "grad_norm": 0.332763671875, "learning_rate": 4.486272396744465e-06, "loss": 0.029, "step": 42621 }, { "epoch": 0.7548158872256682, "grad_norm": 0.49694034457206726, "learning_rate": 4.485658763599023e-06, "loss": 0.0559, "step": 42622 }, { "epoch": 0.7548335967626967, "grad_norm": 0.722125232219696, "learning_rate": 4.485045165044707e-06, "loss": 0.0599, "step": 42623 }, { "epoch": 0.7548513062997251, "grad_norm": 0.6100716590881348, "learning_rate": 4.484431601083524e-06, "loss": 0.0533, "step": 42624 }, { "epoch": 0.7548690158367535, "grad_norm": 0.31655019521713257, "learning_rate": 4.4838180717174955e-06, "loss": 0.0529, "step": 42625 }, { "epoch": 0.7548867253737819, "grad_norm": 0.9983572959899902, "learning_rate": 4.483204576948642e-06, "loss": 0.0669, "step": 42626 }, { "epoch": 0.7549044349108104, "grad_norm": 0.6059868335723877, "learning_rate": 4.482591116778983e-06, "loss": 0.0854, "step": 42627 }, { "epoch": 0.7549221444478388, "grad_norm": 0.47574207186698914, "learning_rate": 4.481977691210535e-06, "loss": 0.0431, "step": 42628 }, { "epoch": 0.7549398539848672, "grad_norm": 0.4851865768432617, "learning_rate": 4.4813643002453165e-06, "loss": 0.0757, "step": 42629 }, { "epoch": 0.7549575635218956, "grad_norm": 0.7849476337432861, "learning_rate": 4.480750943885351e-06, "loss": 0.0793, "step": 42630 }, { "epoch": 0.7549752730589241, "grad_norm": 0.7911525368690491, "learning_rate": 4.4801376221326445e-06, "loss": 0.0878, "step": 42631 }, { "epoch": 0.7549929825959525, "grad_norm": 0.4538637399673462, "learning_rate": 4.479524334989223e-06, "loss": 0.0449, "step": 42632 }, { "epoch": 0.7550106921329809, "grad_norm": 0.5721771717071533, "learning_rate": 4.478911082457106e-06, "loss": 0.061, "step": 42633 }, { "epoch": 0.7550284016700094, "grad_norm": 0.725633978843689, "learning_rate": 4.478297864538303e-06, "loss": 0.0414, "step": 42634 }, { "epoch": 0.7550461112070378, "grad_norm": 0.5218515992164612, "learning_rate": 4.4776846812348316e-06, "loss": 0.0441, "step": 42635 }, { "epoch": 0.7550638207440662, "grad_norm": 0.3659699857234955, "learning_rate": 4.477071532548718e-06, "loss": 0.0416, "step": 42636 }, { "epoch": 0.7550815302810946, "grad_norm": 0.8005453944206238, "learning_rate": 4.476458418481978e-06, "loss": 0.0481, "step": 42637 }, { "epoch": 0.7550992398181231, "grad_norm": 0.7404668927192688, "learning_rate": 4.475845339036622e-06, "loss": 0.0543, "step": 42638 }, { "epoch": 0.7551169493551515, "grad_norm": 1.1437528133392334, "learning_rate": 4.475232294214671e-06, "loss": 0.0821, "step": 42639 }, { "epoch": 0.7551346588921799, "grad_norm": 0.7461929321289062, "learning_rate": 4.474619284018145e-06, "loss": 0.0659, "step": 42640 }, { "epoch": 0.7551523684292083, "grad_norm": 0.7022238969802856, "learning_rate": 4.474006308449051e-06, "loss": 0.0729, "step": 42641 }, { "epoch": 0.7551700779662368, "grad_norm": 0.4757961332798004, "learning_rate": 4.473393367509413e-06, "loss": 0.0536, "step": 42642 }, { "epoch": 0.7551877875032652, "grad_norm": 0.7351849675178528, "learning_rate": 4.4727804612012465e-06, "loss": 0.0543, "step": 42643 }, { "epoch": 0.7552054970402936, "grad_norm": 0.22890987992286682, "learning_rate": 4.472167589526567e-06, "loss": 0.0503, "step": 42644 }, { "epoch": 0.755223206577322, "grad_norm": 0.5887235999107361, "learning_rate": 4.471554752487391e-06, "loss": 0.0885, "step": 42645 }, { "epoch": 0.7552409161143505, "grad_norm": 0.6370116472244263, "learning_rate": 4.470941950085735e-06, "loss": 0.0458, "step": 42646 }, { "epoch": 0.7552586256513789, "grad_norm": 0.5825637578964233, "learning_rate": 4.47032918232362e-06, "loss": 0.0615, "step": 42647 }, { "epoch": 0.7552763351884073, "grad_norm": 0.8301523923873901, "learning_rate": 4.469716449203054e-06, "loss": 0.0758, "step": 42648 }, { "epoch": 0.7552940447254358, "grad_norm": 0.5391756296157837, "learning_rate": 4.469103750726053e-06, "loss": 0.0695, "step": 42649 }, { "epoch": 0.7553117542624642, "grad_norm": 0.5317660570144653, "learning_rate": 4.468491086894637e-06, "loss": 0.0484, "step": 42650 }, { "epoch": 0.7553294637994926, "grad_norm": 0.7033478617668152, "learning_rate": 4.467878457710824e-06, "loss": 0.0639, "step": 42651 }, { "epoch": 0.755347173336521, "grad_norm": 0.6351441144943237, "learning_rate": 4.4672658631766215e-06, "loss": 0.0496, "step": 42652 }, { "epoch": 0.7553648828735495, "grad_norm": 0.8825942873954773, "learning_rate": 4.466653303294049e-06, "loss": 0.0723, "step": 42653 }, { "epoch": 0.7553825924105779, "grad_norm": 0.7522657513618469, "learning_rate": 4.466040778065123e-06, "loss": 0.0787, "step": 42654 }, { "epoch": 0.7554003019476063, "grad_norm": 0.6602675318717957, "learning_rate": 4.4654282874918545e-06, "loss": 0.0382, "step": 42655 }, { "epoch": 0.7554180114846347, "grad_norm": 0.3571483790874481, "learning_rate": 4.464815831576263e-06, "loss": 0.0383, "step": 42656 }, { "epoch": 0.7554357210216632, "grad_norm": 0.9554805755615234, "learning_rate": 4.464203410320362e-06, "loss": 0.0918, "step": 42657 }, { "epoch": 0.7554534305586916, "grad_norm": 0.22051404416561127, "learning_rate": 4.463591023726172e-06, "loss": 0.0305, "step": 42658 }, { "epoch": 0.75547114009572, "grad_norm": 0.6562228202819824, "learning_rate": 4.462978671795696e-06, "loss": 0.0649, "step": 42659 }, { "epoch": 0.7554888496327484, "grad_norm": 0.5618272423744202, "learning_rate": 4.462366354530954e-06, "loss": 0.0592, "step": 42660 }, { "epoch": 0.7555065591697769, "grad_norm": 0.874725341796875, "learning_rate": 4.46175407193396e-06, "loss": 0.0623, "step": 42661 }, { "epoch": 0.7555242687068053, "grad_norm": 0.6943436861038208, "learning_rate": 4.461141824006735e-06, "loss": 0.047, "step": 42662 }, { "epoch": 0.7555419782438337, "grad_norm": 0.18425115942955017, "learning_rate": 4.460529610751283e-06, "loss": 0.034, "step": 42663 }, { "epoch": 0.7555596877808622, "grad_norm": 0.5448564291000366, "learning_rate": 4.4599174321696175e-06, "loss": 0.0676, "step": 42664 }, { "epoch": 0.7555773973178906, "grad_norm": 0.4521714150905609, "learning_rate": 4.459305288263767e-06, "loss": 0.048, "step": 42665 }, { "epoch": 0.755595106854919, "grad_norm": 0.6436985731124878, "learning_rate": 4.458693179035731e-06, "loss": 0.059, "step": 42666 }, { "epoch": 0.7556128163919474, "grad_norm": 0.5247175693511963, "learning_rate": 4.45808110448753e-06, "loss": 0.0682, "step": 42667 }, { "epoch": 0.755630525928976, "grad_norm": 0.6876904368400574, "learning_rate": 4.457469064621174e-06, "loss": 0.0331, "step": 42668 }, { "epoch": 0.7556482354660043, "grad_norm": 0.4611574709415436, "learning_rate": 4.456857059438683e-06, "loss": 0.0656, "step": 42669 }, { "epoch": 0.7556659450030327, "grad_norm": 0.45266878604888916, "learning_rate": 4.456245088942062e-06, "loss": 0.0713, "step": 42670 }, { "epoch": 0.7556836545400611, "grad_norm": 0.31184855103492737, "learning_rate": 4.4556331531333285e-06, "loss": 0.0694, "step": 42671 }, { "epoch": 0.7557013640770897, "grad_norm": 0.739418625831604, "learning_rate": 4.455021252014495e-06, "loss": 0.0559, "step": 42672 }, { "epoch": 0.755719073614118, "grad_norm": 0.6701757311820984, "learning_rate": 4.4544093855875755e-06, "loss": 0.0742, "step": 42673 }, { "epoch": 0.7557367831511465, "grad_norm": 0.5032624006271362, "learning_rate": 4.453797553854582e-06, "loss": 0.0457, "step": 42674 }, { "epoch": 0.7557544926881749, "grad_norm": 0.5302391648292542, "learning_rate": 4.453185756817529e-06, "loss": 0.0349, "step": 42675 }, { "epoch": 0.7557722022252034, "grad_norm": 0.6537743806838989, "learning_rate": 4.45257399447843e-06, "loss": 0.0616, "step": 42676 }, { "epoch": 0.7557899117622318, "grad_norm": 0.6885247230529785, "learning_rate": 4.451962266839294e-06, "loss": 0.06, "step": 42677 }, { "epoch": 0.7558076212992602, "grad_norm": 0.5074335932731628, "learning_rate": 4.451350573902135e-06, "loss": 0.0769, "step": 42678 }, { "epoch": 0.7558253308362887, "grad_norm": 1.0461560487747192, "learning_rate": 4.450738915668968e-06, "loss": 0.0837, "step": 42679 }, { "epoch": 0.7558430403733171, "grad_norm": 0.43483468890190125, "learning_rate": 4.4501272921418e-06, "loss": 0.0401, "step": 42680 }, { "epoch": 0.7558607499103455, "grad_norm": 0.5812828540802002, "learning_rate": 4.449515703322645e-06, "loss": 0.0538, "step": 42681 }, { "epoch": 0.7558784594473739, "grad_norm": 0.45786696672439575, "learning_rate": 4.448904149213519e-06, "loss": 0.0488, "step": 42682 }, { "epoch": 0.7558961689844024, "grad_norm": 0.7478610277175903, "learning_rate": 4.4482926298164285e-06, "loss": 0.0568, "step": 42683 }, { "epoch": 0.7559138785214308, "grad_norm": 0.649213969707489, "learning_rate": 4.4476811451333885e-06, "loss": 0.0607, "step": 42684 }, { "epoch": 0.7559315880584592, "grad_norm": 0.5667062401771545, "learning_rate": 4.4470696951664115e-06, "loss": 0.0612, "step": 42685 }, { "epoch": 0.7559492975954876, "grad_norm": 1.0065003633499146, "learning_rate": 4.446458279917512e-06, "loss": 0.0669, "step": 42686 }, { "epoch": 0.7559670071325161, "grad_norm": 0.5466299057006836, "learning_rate": 4.445846899388691e-06, "loss": 0.0653, "step": 42687 }, { "epoch": 0.7559847166695445, "grad_norm": 0.6746368408203125, "learning_rate": 4.445235553581968e-06, "loss": 0.0563, "step": 42688 }, { "epoch": 0.7560024262065729, "grad_norm": 1.0398271083831787, "learning_rate": 4.4446242424993524e-06, "loss": 0.0691, "step": 42689 }, { "epoch": 0.7560201357436013, "grad_norm": 0.6026824712753296, "learning_rate": 4.444012966142858e-06, "loss": 0.0693, "step": 42690 }, { "epoch": 0.7560378452806298, "grad_norm": 0.4560026526451111, "learning_rate": 4.443401724514491e-06, "loss": 0.0434, "step": 42691 }, { "epoch": 0.7560555548176582, "grad_norm": 0.4814424514770508, "learning_rate": 4.442790517616264e-06, "loss": 0.0383, "step": 42692 }, { "epoch": 0.7560732643546866, "grad_norm": 0.8235431909561157, "learning_rate": 4.442179345450183e-06, "loss": 0.0521, "step": 42693 }, { "epoch": 0.7560909738917151, "grad_norm": 0.9117772579193115, "learning_rate": 4.441568208018274e-06, "loss": 0.0535, "step": 42694 }, { "epoch": 0.7561086834287435, "grad_norm": 0.9632903337478638, "learning_rate": 4.440957105322532e-06, "loss": 0.0656, "step": 42695 }, { "epoch": 0.7561263929657719, "grad_norm": 0.4773733615875244, "learning_rate": 4.440346037364974e-06, "loss": 0.0625, "step": 42696 }, { "epoch": 0.7561441025028003, "grad_norm": 0.41984668374061584, "learning_rate": 4.439735004147616e-06, "loss": 0.031, "step": 42697 }, { "epoch": 0.7561618120398288, "grad_norm": 0.5755487084388733, "learning_rate": 4.4391240056724545e-06, "loss": 0.0557, "step": 42698 }, { "epoch": 0.7561795215768572, "grad_norm": 0.33367419242858887, "learning_rate": 4.438513041941508e-06, "loss": 0.04, "step": 42699 }, { "epoch": 0.7561972311138856, "grad_norm": 0.605948805809021, "learning_rate": 4.437902112956785e-06, "loss": 0.1011, "step": 42700 }, { "epoch": 0.756214940650914, "grad_norm": 1.1181602478027344, "learning_rate": 4.4372912187202965e-06, "loss": 0.0462, "step": 42701 }, { "epoch": 0.7562326501879425, "grad_norm": 0.3922523558139801, "learning_rate": 4.43668035923405e-06, "loss": 0.0733, "step": 42702 }, { "epoch": 0.7562503597249709, "grad_norm": 0.7450417280197144, "learning_rate": 4.436069534500057e-06, "loss": 0.0704, "step": 42703 }, { "epoch": 0.7562680692619993, "grad_norm": 0.5482603311538696, "learning_rate": 4.435458744520332e-06, "loss": 0.0628, "step": 42704 }, { "epoch": 0.7562857787990277, "grad_norm": 0.5167898535728455, "learning_rate": 4.434847989296875e-06, "loss": 0.0618, "step": 42705 }, { "epoch": 0.7563034883360562, "grad_norm": 0.5467294454574585, "learning_rate": 4.434237268831698e-06, "loss": 0.0713, "step": 42706 }, { "epoch": 0.7563211978730846, "grad_norm": 0.7351090312004089, "learning_rate": 4.433626583126813e-06, "loss": 0.0879, "step": 42707 }, { "epoch": 0.756338907410113, "grad_norm": 0.9734757542610168, "learning_rate": 4.433015932184233e-06, "loss": 0.0801, "step": 42708 }, { "epoch": 0.7563566169471415, "grad_norm": 0.5723628401756287, "learning_rate": 4.432405316005957e-06, "loss": 0.0486, "step": 42709 }, { "epoch": 0.7563743264841699, "grad_norm": 0.8763023614883423, "learning_rate": 4.431794734593997e-06, "loss": 0.0525, "step": 42710 }, { "epoch": 0.7563920360211983, "grad_norm": 1.0785696506500244, "learning_rate": 4.431184187950365e-06, "loss": 0.0629, "step": 42711 }, { "epoch": 0.7564097455582267, "grad_norm": 0.4029240310192108, "learning_rate": 4.4305736760770675e-06, "loss": 0.0334, "step": 42712 }, { "epoch": 0.7564274550952552, "grad_norm": 0.736237108707428, "learning_rate": 4.429963198976114e-06, "loss": 0.0735, "step": 42713 }, { "epoch": 0.7564451646322836, "grad_norm": 0.5953205227851868, "learning_rate": 4.429352756649511e-06, "loss": 0.065, "step": 42714 }, { "epoch": 0.756462874169312, "grad_norm": 0.4637623727321625, "learning_rate": 4.428742349099274e-06, "loss": 0.0305, "step": 42715 }, { "epoch": 0.7564805837063404, "grad_norm": 0.5008402466773987, "learning_rate": 4.4281319763274e-06, "loss": 0.0927, "step": 42716 }, { "epoch": 0.7564982932433689, "grad_norm": 0.4485553205013275, "learning_rate": 4.427521638335903e-06, "loss": 0.0406, "step": 42717 }, { "epoch": 0.7565160027803973, "grad_norm": 0.5959895849227905, "learning_rate": 4.426911335126797e-06, "loss": 0.0583, "step": 42718 }, { "epoch": 0.7565337123174257, "grad_norm": 0.43879833817481995, "learning_rate": 4.426301066702076e-06, "loss": 0.0645, "step": 42719 }, { "epoch": 0.7565514218544541, "grad_norm": 0.538493275642395, "learning_rate": 4.425690833063757e-06, "loss": 0.09, "step": 42720 }, { "epoch": 0.7565691313914826, "grad_norm": 0.4750560522079468, "learning_rate": 4.425080634213845e-06, "loss": 0.0632, "step": 42721 }, { "epoch": 0.756586840928511, "grad_norm": 0.5684159994125366, "learning_rate": 4.424470470154347e-06, "loss": 0.045, "step": 42722 }, { "epoch": 0.7566045504655394, "grad_norm": 0.3848227560520172, "learning_rate": 4.423860340887273e-06, "loss": 0.0346, "step": 42723 }, { "epoch": 0.7566222600025679, "grad_norm": 0.8514145016670227, "learning_rate": 4.423250246414628e-06, "loss": 0.0553, "step": 42724 }, { "epoch": 0.7566399695395963, "grad_norm": 0.7490277886390686, "learning_rate": 4.4226401867384184e-06, "loss": 0.0753, "step": 42725 }, { "epoch": 0.7566576790766247, "grad_norm": 0.6784965395927429, "learning_rate": 4.42203016186066e-06, "loss": 0.0584, "step": 42726 }, { "epoch": 0.7566753886136531, "grad_norm": 0.5926159024238586, "learning_rate": 4.421420171783347e-06, "loss": 0.0529, "step": 42727 }, { "epoch": 0.7566930981506816, "grad_norm": 0.9544044733047485, "learning_rate": 4.420810216508491e-06, "loss": 0.0735, "step": 42728 }, { "epoch": 0.75671080768771, "grad_norm": 0.5428534150123596, "learning_rate": 4.420200296038105e-06, "loss": 0.0669, "step": 42729 }, { "epoch": 0.7567285172247384, "grad_norm": 0.6530847549438477, "learning_rate": 4.41959041037418e-06, "loss": 0.0789, "step": 42730 }, { "epoch": 0.7567462267617668, "grad_norm": 0.5023030042648315, "learning_rate": 4.418980559518736e-06, "loss": 0.0675, "step": 42731 }, { "epoch": 0.7567639362987953, "grad_norm": 0.6843968033790588, "learning_rate": 4.418370743473777e-06, "loss": 0.0503, "step": 42732 }, { "epoch": 0.7567816458358237, "grad_norm": 0.3941890299320221, "learning_rate": 4.417760962241313e-06, "loss": 0.0472, "step": 42733 }, { "epoch": 0.7567993553728521, "grad_norm": 0.5021492838859558, "learning_rate": 4.417151215823341e-06, "loss": 0.0659, "step": 42734 }, { "epoch": 0.7568170649098805, "grad_norm": 0.6100001335144043, "learning_rate": 4.416541504221869e-06, "loss": 0.069, "step": 42735 }, { "epoch": 0.756834774446909, "grad_norm": 0.9539206624031067, "learning_rate": 4.415931827438912e-06, "loss": 0.0707, "step": 42736 }, { "epoch": 0.7568524839839375, "grad_norm": 0.6044046878814697, "learning_rate": 4.415322185476463e-06, "loss": 0.0463, "step": 42737 }, { "epoch": 0.7568701935209659, "grad_norm": 0.5542787909507751, "learning_rate": 4.414712578336532e-06, "loss": 0.0425, "step": 42738 }, { "epoch": 0.7568879030579944, "grad_norm": 0.9206481575965881, "learning_rate": 4.414103006021128e-06, "loss": 0.0763, "step": 42739 }, { "epoch": 0.7569056125950228, "grad_norm": 0.6315304636955261, "learning_rate": 4.413493468532253e-06, "loss": 0.0388, "step": 42740 }, { "epoch": 0.7569233221320512, "grad_norm": 0.39188462495803833, "learning_rate": 4.412883965871916e-06, "loss": 0.0504, "step": 42741 }, { "epoch": 0.7569410316690796, "grad_norm": 0.4747825264930725, "learning_rate": 4.412274498042118e-06, "loss": 0.0533, "step": 42742 }, { "epoch": 0.7569587412061081, "grad_norm": 0.6761812567710876, "learning_rate": 4.411665065044872e-06, "loss": 0.0276, "step": 42743 }, { "epoch": 0.7569764507431365, "grad_norm": 0.48323774337768555, "learning_rate": 4.411055666882171e-06, "loss": 0.0939, "step": 42744 }, { "epoch": 0.7569941602801649, "grad_norm": 0.49493178725242615, "learning_rate": 4.410446303556026e-06, "loss": 0.0326, "step": 42745 }, { "epoch": 0.7570118698171933, "grad_norm": 0.5897430181503296, "learning_rate": 4.409836975068442e-06, "loss": 0.0562, "step": 42746 }, { "epoch": 0.7570295793542218, "grad_norm": 0.5103148818016052, "learning_rate": 4.409227681421427e-06, "loss": 0.0442, "step": 42747 }, { "epoch": 0.7570472888912502, "grad_norm": 0.5791506171226501, "learning_rate": 4.4086184226169785e-06, "loss": 0.0486, "step": 42748 }, { "epoch": 0.7570649984282786, "grad_norm": 0.7842831611633301, "learning_rate": 4.408009198657103e-06, "loss": 0.0716, "step": 42749 }, { "epoch": 0.757082707965307, "grad_norm": 0.4567422568798065, "learning_rate": 4.407400009543806e-06, "loss": 0.0472, "step": 42750 }, { "epoch": 0.7571004175023355, "grad_norm": 0.6871047616004944, "learning_rate": 4.4067908552790926e-06, "loss": 0.0682, "step": 42751 }, { "epoch": 0.7571181270393639, "grad_norm": 0.444266676902771, "learning_rate": 4.406181735864964e-06, "loss": 0.0608, "step": 42752 }, { "epoch": 0.7571358365763923, "grad_norm": 0.8000010848045349, "learning_rate": 4.405572651303428e-06, "loss": 0.0588, "step": 42753 }, { "epoch": 0.7571535461134208, "grad_norm": 0.45244860649108887, "learning_rate": 4.40496360159649e-06, "loss": 0.0569, "step": 42754 }, { "epoch": 0.7571712556504492, "grad_norm": 0.5117826461791992, "learning_rate": 4.404354586746146e-06, "loss": 0.037, "step": 42755 }, { "epoch": 0.7571889651874776, "grad_norm": 0.5005452632904053, "learning_rate": 4.403745606754404e-06, "loss": 0.0544, "step": 42756 }, { "epoch": 0.757206674724506, "grad_norm": 0.23827221989631653, "learning_rate": 4.403136661623266e-06, "loss": 0.0482, "step": 42757 }, { "epoch": 0.7572243842615345, "grad_norm": 0.599349319934845, "learning_rate": 4.402527751354743e-06, "loss": 0.0884, "step": 42758 }, { "epoch": 0.7572420937985629, "grad_norm": 0.4178256690502167, "learning_rate": 4.40191887595082e-06, "loss": 0.0388, "step": 42759 }, { "epoch": 0.7572598033355913, "grad_norm": 0.7101372480392456, "learning_rate": 4.401310035413519e-06, "loss": 0.0448, "step": 42760 }, { "epoch": 0.7572775128726197, "grad_norm": 0.7556692957878113, "learning_rate": 4.400701229744839e-06, "loss": 0.0731, "step": 42761 }, { "epoch": 0.7572952224096482, "grad_norm": 0.4887444078922272, "learning_rate": 4.400092458946775e-06, "loss": 0.062, "step": 42762 }, { "epoch": 0.7573129319466766, "grad_norm": 0.34047776460647583, "learning_rate": 4.399483723021337e-06, "loss": 0.0374, "step": 42763 }, { "epoch": 0.757330641483705, "grad_norm": 0.6119808554649353, "learning_rate": 4.398875021970523e-06, "loss": 0.0748, "step": 42764 }, { "epoch": 0.7573483510207334, "grad_norm": 0.9206956624984741, "learning_rate": 4.398266355796344e-06, "loss": 0.0851, "step": 42765 }, { "epoch": 0.7573660605577619, "grad_norm": 0.5630741715431213, "learning_rate": 4.397657724500792e-06, "loss": 0.0674, "step": 42766 }, { "epoch": 0.7573837700947903, "grad_norm": 0.7611807584762573, "learning_rate": 4.397049128085873e-06, "loss": 0.0682, "step": 42767 }, { "epoch": 0.7574014796318187, "grad_norm": 0.5808473825454712, "learning_rate": 4.396440566553589e-06, "loss": 0.064, "step": 42768 }, { "epoch": 0.7574191891688472, "grad_norm": 0.7725197076797485, "learning_rate": 4.395832039905944e-06, "loss": 0.0725, "step": 42769 }, { "epoch": 0.7574368987058756, "grad_norm": 0.5957914590835571, "learning_rate": 4.395223548144939e-06, "loss": 0.0521, "step": 42770 }, { "epoch": 0.757454608242904, "grad_norm": 0.03093567118048668, "learning_rate": 4.394615091272576e-06, "loss": 0.0388, "step": 42771 }, { "epoch": 0.7574723177799324, "grad_norm": 0.2917003631591797, "learning_rate": 4.394006669290862e-06, "loss": 0.0427, "step": 42772 }, { "epoch": 0.7574900273169609, "grad_norm": 0.6977121233940125, "learning_rate": 4.393398282201788e-06, "loss": 0.0347, "step": 42773 }, { "epoch": 0.7575077368539893, "grad_norm": 0.49672946333885193, "learning_rate": 4.392789930007362e-06, "loss": 0.0388, "step": 42774 }, { "epoch": 0.7575254463910177, "grad_norm": 0.46770066022872925, "learning_rate": 4.392181612709587e-06, "loss": 0.0464, "step": 42775 }, { "epoch": 0.7575431559280461, "grad_norm": 0.6110047101974487, "learning_rate": 4.391573330310458e-06, "loss": 0.0399, "step": 42776 }, { "epoch": 0.7575608654650746, "grad_norm": 0.8467335104942322, "learning_rate": 4.390965082811979e-06, "loss": 0.0561, "step": 42777 }, { "epoch": 0.757578575002103, "grad_norm": 0.5369711518287659, "learning_rate": 4.390356870216153e-06, "loss": 0.0495, "step": 42778 }, { "epoch": 0.7575962845391314, "grad_norm": 0.9810664057731628, "learning_rate": 4.389748692524978e-06, "loss": 0.0678, "step": 42779 }, { "epoch": 0.7576139940761598, "grad_norm": 0.6864714026451111, "learning_rate": 4.389140549740457e-06, "loss": 0.0496, "step": 42780 }, { "epoch": 0.7576317036131883, "grad_norm": 0.5126689076423645, "learning_rate": 4.388532441864593e-06, "loss": 0.0312, "step": 42781 }, { "epoch": 0.7576494131502167, "grad_norm": 0.45858094096183777, "learning_rate": 4.387924368899386e-06, "loss": 0.0389, "step": 42782 }, { "epoch": 0.7576671226872451, "grad_norm": 0.44762101769447327, "learning_rate": 4.38731633084683e-06, "loss": 0.0701, "step": 42783 }, { "epoch": 0.7576848322242736, "grad_norm": 0.6135833859443665, "learning_rate": 4.38670832770893e-06, "loss": 0.0591, "step": 42784 }, { "epoch": 0.757702541761302, "grad_norm": 0.44479310512542725, "learning_rate": 4.386100359487687e-06, "loss": 0.0626, "step": 42785 }, { "epoch": 0.7577202512983304, "grad_norm": 0.42090901732444763, "learning_rate": 4.385492426185105e-06, "loss": 0.0732, "step": 42786 }, { "epoch": 0.7577379608353588, "grad_norm": 0.6869154572486877, "learning_rate": 4.384884527803174e-06, "loss": 0.0927, "step": 42787 }, { "epoch": 0.7577556703723873, "grad_norm": 0.5665779709815979, "learning_rate": 4.384276664343895e-06, "loss": 0.0527, "step": 42788 }, { "epoch": 0.7577733799094157, "grad_norm": 0.6524614691734314, "learning_rate": 4.383668835809282e-06, "loss": 0.0408, "step": 42789 }, { "epoch": 0.7577910894464441, "grad_norm": 0.5058515071868896, "learning_rate": 4.383061042201319e-06, "loss": 0.0671, "step": 42790 }, { "epoch": 0.7578087989834725, "grad_norm": 0.3718208074569702, "learning_rate": 4.382453283522011e-06, "loss": 0.0347, "step": 42791 }, { "epoch": 0.757826508520501, "grad_norm": 0.642978847026825, "learning_rate": 4.381845559773358e-06, "loss": 0.0746, "step": 42792 }, { "epoch": 0.7578442180575294, "grad_norm": 0.6281218528747559, "learning_rate": 4.381237870957365e-06, "loss": 0.0408, "step": 42793 }, { "epoch": 0.7578619275945578, "grad_norm": 0.546911895275116, "learning_rate": 4.380630217076019e-06, "loss": 0.0418, "step": 42794 }, { "epoch": 0.7578796371315862, "grad_norm": 0.6787851452827454, "learning_rate": 4.380022598131327e-06, "loss": 0.0417, "step": 42795 }, { "epoch": 0.7578973466686147, "grad_norm": 0.2520977258682251, "learning_rate": 4.379415014125287e-06, "loss": 0.0493, "step": 42796 }, { "epoch": 0.7579150562056431, "grad_norm": 0.7288421392440796, "learning_rate": 4.378807465059894e-06, "loss": 0.0468, "step": 42797 }, { "epoch": 0.7579327657426715, "grad_norm": 0.6548886895179749, "learning_rate": 4.378199950937154e-06, "loss": 0.0512, "step": 42798 }, { "epoch": 0.7579504752797, "grad_norm": 0.9216988682746887, "learning_rate": 4.377592471759059e-06, "loss": 0.0702, "step": 42799 }, { "epoch": 0.7579681848167285, "grad_norm": 0.7287598252296448, "learning_rate": 4.3769850275276174e-06, "loss": 0.087, "step": 42800 }, { "epoch": 0.7579858943537569, "grad_norm": 0.564171552658081, "learning_rate": 4.376377618244815e-06, "loss": 0.0587, "step": 42801 }, { "epoch": 0.7580036038907852, "grad_norm": 0.24841246008872986, "learning_rate": 4.375770243912655e-06, "loss": 0.0607, "step": 42802 }, { "epoch": 0.7580213134278138, "grad_norm": 0.5843284130096436, "learning_rate": 4.375162904533138e-06, "loss": 0.0521, "step": 42803 }, { "epoch": 0.7580390229648422, "grad_norm": 0.45983952283859253, "learning_rate": 4.374555600108264e-06, "loss": 0.0833, "step": 42804 }, { "epoch": 0.7580567325018706, "grad_norm": 0.6251659393310547, "learning_rate": 4.373948330640023e-06, "loss": 0.0596, "step": 42805 }, { "epoch": 0.758074442038899, "grad_norm": 0.8594048023223877, "learning_rate": 4.373341096130415e-06, "loss": 0.0574, "step": 42806 }, { "epoch": 0.7580921515759275, "grad_norm": 0.470313161611557, "learning_rate": 4.372733896581443e-06, "loss": 0.0545, "step": 42807 }, { "epoch": 0.7581098611129559, "grad_norm": 0.5502763390541077, "learning_rate": 4.3721267319951e-06, "loss": 0.0488, "step": 42808 }, { "epoch": 0.7581275706499843, "grad_norm": 0.9970813393592834, "learning_rate": 4.371519602373386e-06, "loss": 0.0489, "step": 42809 }, { "epoch": 0.7581452801870127, "grad_norm": 0.39046093821525574, "learning_rate": 4.370912507718297e-06, "loss": 0.0598, "step": 42810 }, { "epoch": 0.7581629897240412, "grad_norm": 0.49530699849128723, "learning_rate": 4.370305448031836e-06, "loss": 0.042, "step": 42811 }, { "epoch": 0.7581806992610696, "grad_norm": 0.517540693283081, "learning_rate": 4.36969842331599e-06, "loss": 0.0491, "step": 42812 }, { "epoch": 0.758198408798098, "grad_norm": 0.9728651642799377, "learning_rate": 4.36909143357276e-06, "loss": 0.0604, "step": 42813 }, { "epoch": 0.7582161183351265, "grad_norm": 0.8755826950073242, "learning_rate": 4.36848447880415e-06, "loss": 0.0698, "step": 42814 }, { "epoch": 0.7582338278721549, "grad_norm": 0.77520751953125, "learning_rate": 4.367877559012145e-06, "loss": 0.0353, "step": 42815 }, { "epoch": 0.7582515374091833, "grad_norm": 0.19257864356040955, "learning_rate": 4.3672706741987485e-06, "loss": 0.0366, "step": 42816 }, { "epoch": 0.7582692469462117, "grad_norm": 0.48321428894996643, "learning_rate": 4.3666638243659505e-06, "loss": 0.0656, "step": 42817 }, { "epoch": 0.7582869564832402, "grad_norm": 0.4109157919883728, "learning_rate": 4.366057009515764e-06, "loss": 0.048, "step": 42818 }, { "epoch": 0.7583046660202686, "grad_norm": 0.9112368226051331, "learning_rate": 4.365450229650168e-06, "loss": 0.0819, "step": 42819 }, { "epoch": 0.758322375557297, "grad_norm": 0.5534616112709045, "learning_rate": 4.364843484771165e-06, "loss": 0.0353, "step": 42820 }, { "epoch": 0.7583400850943254, "grad_norm": 0.5700541138648987, "learning_rate": 4.364236774880756e-06, "loss": 0.0511, "step": 42821 }, { "epoch": 0.7583577946313539, "grad_norm": 0.47419336438179016, "learning_rate": 4.3636300999809294e-06, "loss": 0.0451, "step": 42822 }, { "epoch": 0.7583755041683823, "grad_norm": 0.9051980376243591, "learning_rate": 4.363023460073681e-06, "loss": 0.0452, "step": 42823 }, { "epoch": 0.7583932137054107, "grad_norm": 0.8719955086708069, "learning_rate": 4.362416855161012e-06, "loss": 0.0579, "step": 42824 }, { "epoch": 0.7584109232424391, "grad_norm": 0.7687234878540039, "learning_rate": 4.361810285244916e-06, "loss": 0.0744, "step": 42825 }, { "epoch": 0.7584286327794676, "grad_norm": 0.5576131343841553, "learning_rate": 4.361203750327386e-06, "loss": 0.0926, "step": 42826 }, { "epoch": 0.758446342316496, "grad_norm": 0.6140592694282532, "learning_rate": 4.360597250410421e-06, "loss": 0.0732, "step": 42827 }, { "epoch": 0.7584640518535244, "grad_norm": 0.6317541599273682, "learning_rate": 4.359990785496013e-06, "loss": 0.0394, "step": 42828 }, { "epoch": 0.7584817613905529, "grad_norm": 0.9845480918884277, "learning_rate": 4.3593843555861665e-06, "loss": 0.0769, "step": 42829 }, { "epoch": 0.7584994709275813, "grad_norm": 0.3054575026035309, "learning_rate": 4.358777960682864e-06, "loss": 0.0322, "step": 42830 }, { "epoch": 0.7585171804646097, "grad_norm": 0.37434497475624084, "learning_rate": 4.3581716007881035e-06, "loss": 0.034, "step": 42831 }, { "epoch": 0.7585348900016381, "grad_norm": 0.8389359712600708, "learning_rate": 4.357565275903888e-06, "loss": 0.0578, "step": 42832 }, { "epoch": 0.7585525995386666, "grad_norm": 0.3244827687740326, "learning_rate": 4.3569589860322025e-06, "loss": 0.0447, "step": 42833 }, { "epoch": 0.758570309075695, "grad_norm": 0.5634070038795471, "learning_rate": 4.356352731175045e-06, "loss": 0.0614, "step": 42834 }, { "epoch": 0.7585880186127234, "grad_norm": 0.940419614315033, "learning_rate": 4.355746511334411e-06, "loss": 0.0789, "step": 42835 }, { "epoch": 0.7586057281497518, "grad_norm": 0.5609870553016663, "learning_rate": 4.355140326512293e-06, "loss": 0.0488, "step": 42836 }, { "epoch": 0.7586234376867803, "grad_norm": 0.4123941957950592, "learning_rate": 4.3545341767106885e-06, "loss": 0.0355, "step": 42837 }, { "epoch": 0.7586411472238087, "grad_norm": 0.43761181831359863, "learning_rate": 4.353928061931588e-06, "loss": 0.0538, "step": 42838 }, { "epoch": 0.7586588567608371, "grad_norm": 0.6335983872413635, "learning_rate": 4.353321982176994e-06, "loss": 0.0854, "step": 42839 }, { "epoch": 0.7586765662978655, "grad_norm": 0.6182072758674622, "learning_rate": 4.352715937448887e-06, "loss": 0.0479, "step": 42840 }, { "epoch": 0.758694275834894, "grad_norm": 0.6590444445610046, "learning_rate": 4.352109927749269e-06, "loss": 0.0705, "step": 42841 }, { "epoch": 0.7587119853719224, "grad_norm": 0.5801053047180176, "learning_rate": 4.351503953080132e-06, "loss": 0.0682, "step": 42842 }, { "epoch": 0.7587296949089508, "grad_norm": 0.4199609160423279, "learning_rate": 4.3508980134434756e-06, "loss": 0.0388, "step": 42843 }, { "epoch": 0.7587474044459793, "grad_norm": 0.3233012855052948, "learning_rate": 4.350292108841281e-06, "loss": 0.0727, "step": 42844 }, { "epoch": 0.7587651139830077, "grad_norm": 0.4408820867538452, "learning_rate": 4.349686239275551e-06, "loss": 0.0676, "step": 42845 }, { "epoch": 0.7587828235200361, "grad_norm": 0.5280163884162903, "learning_rate": 4.349080404748273e-06, "loss": 0.0475, "step": 42846 }, { "epoch": 0.7588005330570645, "grad_norm": 0.3802233338356018, "learning_rate": 4.348474605261445e-06, "loss": 0.0572, "step": 42847 }, { "epoch": 0.758818242594093, "grad_norm": 0.5246589779853821, "learning_rate": 4.347868840817058e-06, "loss": 0.0566, "step": 42848 }, { "epoch": 0.7588359521311214, "grad_norm": 0.4181923568248749, "learning_rate": 4.347263111417105e-06, "loss": 0.0486, "step": 42849 }, { "epoch": 0.7588536616681498, "grad_norm": 0.7085198760032654, "learning_rate": 4.346657417063584e-06, "loss": 0.0593, "step": 42850 }, { "epoch": 0.7588713712051782, "grad_norm": 0.40213334560394287, "learning_rate": 4.3460517577584765e-06, "loss": 0.0443, "step": 42851 }, { "epoch": 0.7588890807422067, "grad_norm": 0.6866185069084167, "learning_rate": 4.345446133503781e-06, "loss": 0.0463, "step": 42852 }, { "epoch": 0.7589067902792351, "grad_norm": 0.23232389986515045, "learning_rate": 4.344840544301496e-06, "loss": 0.0519, "step": 42853 }, { "epoch": 0.7589244998162635, "grad_norm": 0.464603453874588, "learning_rate": 4.344234990153599e-06, "loss": 0.0553, "step": 42854 }, { "epoch": 0.7589422093532919, "grad_norm": 0.5673956274986267, "learning_rate": 4.343629471062094e-06, "loss": 0.0638, "step": 42855 }, { "epoch": 0.7589599188903204, "grad_norm": 0.5074671506881714, "learning_rate": 4.343023987028973e-06, "loss": 0.0327, "step": 42856 }, { "epoch": 0.7589776284273488, "grad_norm": 0.6046308279037476, "learning_rate": 4.342418538056228e-06, "loss": 0.0646, "step": 42857 }, { "epoch": 0.7589953379643772, "grad_norm": 0.5006803274154663, "learning_rate": 4.341813124145843e-06, "loss": 0.0653, "step": 42858 }, { "epoch": 0.7590130475014057, "grad_norm": 0.7404628992080688, "learning_rate": 4.341207745299815e-06, "loss": 0.0824, "step": 42859 }, { "epoch": 0.7590307570384341, "grad_norm": 0.7028111219406128, "learning_rate": 4.340602401520137e-06, "loss": 0.0448, "step": 42860 }, { "epoch": 0.7590484665754625, "grad_norm": 0.7000408172607422, "learning_rate": 4.339997092808802e-06, "loss": 0.0627, "step": 42861 }, { "epoch": 0.7590661761124909, "grad_norm": 0.6224104166030884, "learning_rate": 4.3393918191677926e-06, "loss": 0.0741, "step": 42862 }, { "epoch": 0.7590838856495195, "grad_norm": 0.7344182729721069, "learning_rate": 4.338786580599108e-06, "loss": 0.0678, "step": 42863 }, { "epoch": 0.7591015951865479, "grad_norm": 0.6390441060066223, "learning_rate": 4.338181377104737e-06, "loss": 0.0533, "step": 42864 }, { "epoch": 0.7591193047235762, "grad_norm": 0.7707523703575134, "learning_rate": 4.33757620868667e-06, "loss": 0.0681, "step": 42865 }, { "epoch": 0.7591370142606046, "grad_norm": 0.6924786567687988, "learning_rate": 4.336971075346899e-06, "loss": 0.0647, "step": 42866 }, { "epoch": 0.7591547237976332, "grad_norm": 0.6578419804573059, "learning_rate": 4.336365977087416e-06, "loss": 0.0626, "step": 42867 }, { "epoch": 0.7591724333346616, "grad_norm": 1.0294846296310425, "learning_rate": 4.335760913910215e-06, "loss": 0.0441, "step": 42868 }, { "epoch": 0.75919014287169, "grad_norm": 0.41684892773628235, "learning_rate": 4.335155885817275e-06, "loss": 0.0633, "step": 42869 }, { "epoch": 0.7592078524087184, "grad_norm": 0.7074911594390869, "learning_rate": 4.3345508928105946e-06, "loss": 0.0886, "step": 42870 }, { "epoch": 0.7592255619457469, "grad_norm": 0.7008848190307617, "learning_rate": 4.333945934892168e-06, "loss": 0.092, "step": 42871 }, { "epoch": 0.7592432714827753, "grad_norm": 0.5558348894119263, "learning_rate": 4.333341012063976e-06, "loss": 0.0388, "step": 42872 }, { "epoch": 0.7592609810198037, "grad_norm": 0.5094466805458069, "learning_rate": 4.332736124328013e-06, "loss": 0.0462, "step": 42873 }, { "epoch": 0.7592786905568322, "grad_norm": 0.4331342577934265, "learning_rate": 4.332131271686269e-06, "loss": 0.0533, "step": 42874 }, { "epoch": 0.7592964000938606, "grad_norm": 0.6627229452133179, "learning_rate": 4.331526454140734e-06, "loss": 0.0429, "step": 42875 }, { "epoch": 0.759314109630889, "grad_norm": 0.7532700896263123, "learning_rate": 4.330921671693399e-06, "loss": 0.0493, "step": 42876 }, { "epoch": 0.7593318191679174, "grad_norm": 0.45638149976730347, "learning_rate": 4.330316924346251e-06, "loss": 0.0453, "step": 42877 }, { "epoch": 0.7593495287049459, "grad_norm": 0.6076873540878296, "learning_rate": 4.329712212101287e-06, "loss": 0.0461, "step": 42878 }, { "epoch": 0.7593672382419743, "grad_norm": 0.7110516428947449, "learning_rate": 4.329107534960486e-06, "loss": 0.0523, "step": 42879 }, { "epoch": 0.7593849477790027, "grad_norm": 0.4583356976509094, "learning_rate": 4.32850289292584e-06, "loss": 0.0417, "step": 42880 }, { "epoch": 0.7594026573160311, "grad_norm": 0.5058956146240234, "learning_rate": 4.327898285999342e-06, "loss": 0.0532, "step": 42881 }, { "epoch": 0.7594203668530596, "grad_norm": 1.0003314018249512, "learning_rate": 4.327293714182984e-06, "loss": 0.0783, "step": 42882 }, { "epoch": 0.759438076390088, "grad_norm": 0.7135552763938904, "learning_rate": 4.32668917747874e-06, "loss": 0.0704, "step": 42883 }, { "epoch": 0.7594557859271164, "grad_norm": 0.4787601828575134, "learning_rate": 4.326084675888614e-06, "loss": 0.0369, "step": 42884 }, { "epoch": 0.7594734954641448, "grad_norm": 0.32833102345466614, "learning_rate": 4.325480209414595e-06, "loss": 0.0418, "step": 42885 }, { "epoch": 0.7594912050011733, "grad_norm": 0.8720059394836426, "learning_rate": 4.32487577805866e-06, "loss": 0.0848, "step": 42886 }, { "epoch": 0.7595089145382017, "grad_norm": 0.414214164018631, "learning_rate": 4.324271381822806e-06, "loss": 0.0326, "step": 42887 }, { "epoch": 0.7595266240752301, "grad_norm": 0.678048849105835, "learning_rate": 4.323667020709018e-06, "loss": 0.0452, "step": 42888 }, { "epoch": 0.7595443336122586, "grad_norm": 0.7302342653274536, "learning_rate": 4.323062694719292e-06, "loss": 0.0923, "step": 42889 }, { "epoch": 0.759562043149287, "grad_norm": 0.40509477257728577, "learning_rate": 4.322458403855603e-06, "loss": 0.0567, "step": 42890 }, { "epoch": 0.7595797526863154, "grad_norm": 0.6939536333084106, "learning_rate": 4.321854148119947e-06, "loss": 0.0694, "step": 42891 }, { "epoch": 0.7595974622233438, "grad_norm": 0.4820150136947632, "learning_rate": 4.321249927514311e-06, "loss": 0.0429, "step": 42892 }, { "epoch": 0.7596151717603723, "grad_norm": 0.6463311314582825, "learning_rate": 4.320645742040683e-06, "loss": 0.0423, "step": 42893 }, { "epoch": 0.7596328812974007, "grad_norm": 0.7819380164146423, "learning_rate": 4.32004159170105e-06, "loss": 0.066, "step": 42894 }, { "epoch": 0.7596505908344291, "grad_norm": 0.4411700963973999, "learning_rate": 4.319437476497399e-06, "loss": 0.0364, "step": 42895 }, { "epoch": 0.7596683003714575, "grad_norm": 0.8689295649528503, "learning_rate": 4.318833396431725e-06, "loss": 0.0774, "step": 42896 }, { "epoch": 0.759686009908486, "grad_norm": 0.7262595891952515, "learning_rate": 4.318229351506003e-06, "loss": 0.0517, "step": 42897 }, { "epoch": 0.7597037194455144, "grad_norm": 0.7734184861183167, "learning_rate": 4.3176253417222255e-06, "loss": 0.0605, "step": 42898 }, { "epoch": 0.7597214289825428, "grad_norm": 0.6516754031181335, "learning_rate": 4.317021367082381e-06, "loss": 0.0588, "step": 42899 }, { "epoch": 0.7597391385195712, "grad_norm": 0.40860092639923096, "learning_rate": 4.31641742758846e-06, "loss": 0.0503, "step": 42900 }, { "epoch": 0.7597568480565997, "grad_norm": 0.4517171382904053, "learning_rate": 4.315813523242441e-06, "loss": 0.0412, "step": 42901 }, { "epoch": 0.7597745575936281, "grad_norm": 0.5728899836540222, "learning_rate": 4.3152096540463135e-06, "loss": 0.0675, "step": 42902 }, { "epoch": 0.7597922671306565, "grad_norm": 0.5783697366714478, "learning_rate": 4.314605820002066e-06, "loss": 0.0463, "step": 42903 }, { "epoch": 0.759809976667685, "grad_norm": 0.7242476940155029, "learning_rate": 4.314002021111684e-06, "loss": 0.0658, "step": 42904 }, { "epoch": 0.7598276862047134, "grad_norm": 0.3923240602016449, "learning_rate": 4.313398257377157e-06, "loss": 0.0383, "step": 42905 }, { "epoch": 0.7598453957417418, "grad_norm": 0.7761562466621399, "learning_rate": 4.3127945288004665e-06, "loss": 0.0826, "step": 42906 }, { "epoch": 0.7598631052787702, "grad_norm": 0.6936677694320679, "learning_rate": 4.312190835383607e-06, "loss": 0.0604, "step": 42907 }, { "epoch": 0.7598808148157987, "grad_norm": 0.7630599141120911, "learning_rate": 4.311587177128554e-06, "loss": 0.077, "step": 42908 }, { "epoch": 0.7598985243528271, "grad_norm": 0.507455587387085, "learning_rate": 4.310983554037297e-06, "loss": 0.0665, "step": 42909 }, { "epoch": 0.7599162338898555, "grad_norm": 0.5142187476158142, "learning_rate": 4.310379966111828e-06, "loss": 0.0549, "step": 42910 }, { "epoch": 0.7599339434268839, "grad_norm": 0.31493696570396423, "learning_rate": 4.309776413354124e-06, "loss": 0.0565, "step": 42911 }, { "epoch": 0.7599516529639124, "grad_norm": 0.607730507850647, "learning_rate": 4.309172895766169e-06, "loss": 0.0755, "step": 42912 }, { "epoch": 0.7599693625009408, "grad_norm": 0.2977047562599182, "learning_rate": 4.308569413349958e-06, "loss": 0.0278, "step": 42913 }, { "epoch": 0.7599870720379692, "grad_norm": 0.9622126221656799, "learning_rate": 4.307965966107478e-06, "loss": 0.0587, "step": 42914 }, { "epoch": 0.7600047815749976, "grad_norm": 0.6844645142555237, "learning_rate": 4.307362554040703e-06, "loss": 0.0392, "step": 42915 }, { "epoch": 0.7600224911120261, "grad_norm": 0.7875494956970215, "learning_rate": 4.3067591771516244e-06, "loss": 0.0492, "step": 42916 }, { "epoch": 0.7600402006490545, "grad_norm": 0.6450251936912537, "learning_rate": 4.306155835442231e-06, "loss": 0.0781, "step": 42917 }, { "epoch": 0.7600579101860829, "grad_norm": 0.5057661533355713, "learning_rate": 4.305552528914499e-06, "loss": 0.058, "step": 42918 }, { "epoch": 0.7600756197231114, "grad_norm": 0.6649081110954285, "learning_rate": 4.304949257570416e-06, "loss": 0.0317, "step": 42919 }, { "epoch": 0.7600933292601398, "grad_norm": 0.5208312273025513, "learning_rate": 4.304346021411969e-06, "loss": 0.0527, "step": 42920 }, { "epoch": 0.7601110387971682, "grad_norm": 0.511342465877533, "learning_rate": 4.303742820441141e-06, "loss": 0.0399, "step": 42921 }, { "epoch": 0.7601287483341966, "grad_norm": 0.5111907124519348, "learning_rate": 4.303139654659919e-06, "loss": 0.0489, "step": 42922 }, { "epoch": 0.7601464578712251, "grad_norm": 0.6468749642372131, "learning_rate": 4.302536524070286e-06, "loss": 0.0563, "step": 42923 }, { "epoch": 0.7601641674082535, "grad_norm": 0.6037794351577759, "learning_rate": 4.301933428674229e-06, "loss": 0.0443, "step": 42924 }, { "epoch": 0.7601818769452819, "grad_norm": 0.392767071723938, "learning_rate": 4.301330368473723e-06, "loss": 0.0397, "step": 42925 }, { "epoch": 0.7601995864823103, "grad_norm": 0.7251165509223938, "learning_rate": 4.30072734347076e-06, "loss": 0.0492, "step": 42926 }, { "epoch": 0.7602172960193389, "grad_norm": 0.4095003306865692, "learning_rate": 4.300124353667323e-06, "loss": 0.0497, "step": 42927 }, { "epoch": 0.7602350055563672, "grad_norm": 0.3230411112308502, "learning_rate": 4.299521399065397e-06, "loss": 0.058, "step": 42928 }, { "epoch": 0.7602527150933956, "grad_norm": 0.5550618767738342, "learning_rate": 4.298918479666959e-06, "loss": 0.0811, "step": 42929 }, { "epoch": 0.760270424630424, "grad_norm": 0.34231123328208923, "learning_rate": 4.298315595473997e-06, "loss": 0.0519, "step": 42930 }, { "epoch": 0.7602881341674526, "grad_norm": 0.7547221183776855, "learning_rate": 4.2977127464884956e-06, "loss": 0.0842, "step": 42931 }, { "epoch": 0.760305843704481, "grad_norm": 0.44566845893859863, "learning_rate": 4.297109932712436e-06, "loss": 0.0418, "step": 42932 }, { "epoch": 0.7603235532415094, "grad_norm": 0.854580819606781, "learning_rate": 4.296507154147803e-06, "loss": 0.0664, "step": 42933 }, { "epoch": 0.7603412627785379, "grad_norm": 0.4749965965747833, "learning_rate": 4.295904410796577e-06, "loss": 0.0383, "step": 42934 }, { "epoch": 0.7603589723155663, "grad_norm": 0.6127320528030396, "learning_rate": 4.295301702660747e-06, "loss": 0.0634, "step": 42935 }, { "epoch": 0.7603766818525947, "grad_norm": 0.5611677765846252, "learning_rate": 4.294699029742289e-06, "loss": 0.0652, "step": 42936 }, { "epoch": 0.7603943913896231, "grad_norm": 0.40525510907173157, "learning_rate": 4.294096392043187e-06, "loss": 0.0641, "step": 42937 }, { "epoch": 0.7604121009266516, "grad_norm": 0.3215973377227783, "learning_rate": 4.2934937895654265e-06, "loss": 0.032, "step": 42938 }, { "epoch": 0.76042981046368, "grad_norm": 0.34342771768569946, "learning_rate": 4.292891222310992e-06, "loss": 0.0348, "step": 42939 }, { "epoch": 0.7604475200007084, "grad_norm": 0.4881463944911957, "learning_rate": 4.2922886902818585e-06, "loss": 0.0498, "step": 42940 }, { "epoch": 0.7604652295377368, "grad_norm": 0.655663013458252, "learning_rate": 4.291686193480008e-06, "loss": 0.062, "step": 42941 }, { "epoch": 0.7604829390747653, "grad_norm": 0.6749040484428406, "learning_rate": 4.291083731907435e-06, "loss": 0.0951, "step": 42942 }, { "epoch": 0.7605006486117937, "grad_norm": 0.545616090297699, "learning_rate": 4.2904813055661095e-06, "loss": 0.0742, "step": 42943 }, { "epoch": 0.7605183581488221, "grad_norm": 0.7470195293426514, "learning_rate": 4.2898789144580176e-06, "loss": 0.0669, "step": 42944 }, { "epoch": 0.7605360676858505, "grad_norm": 0.43623068928718567, "learning_rate": 4.28927655858514e-06, "loss": 0.0634, "step": 42945 }, { "epoch": 0.760553777222879, "grad_norm": 0.6630635857582092, "learning_rate": 4.288674237949464e-06, "loss": 0.05, "step": 42946 }, { "epoch": 0.7605714867599074, "grad_norm": 0.4391365349292755, "learning_rate": 4.288071952552961e-06, "loss": 0.0523, "step": 42947 }, { "epoch": 0.7605891962969358, "grad_norm": 0.6457728147506714, "learning_rate": 4.287469702397619e-06, "loss": 0.0499, "step": 42948 }, { "epoch": 0.7606069058339643, "grad_norm": 0.4721713960170746, "learning_rate": 4.286867487485418e-06, "loss": 0.0396, "step": 42949 }, { "epoch": 0.7606246153709927, "grad_norm": 0.1628732681274414, "learning_rate": 4.286265307818339e-06, "loss": 0.0542, "step": 42950 }, { "epoch": 0.7606423249080211, "grad_norm": 0.6196508407592773, "learning_rate": 4.285663163398364e-06, "loss": 0.0487, "step": 42951 }, { "epoch": 0.7606600344450495, "grad_norm": 0.6692643761634827, "learning_rate": 4.2850610542274735e-06, "loss": 0.0517, "step": 42952 }, { "epoch": 0.760677743982078, "grad_norm": 0.7691057324409485, "learning_rate": 4.284458980307652e-06, "loss": 0.0606, "step": 42953 }, { "epoch": 0.7606954535191064, "grad_norm": 0.45443829894065857, "learning_rate": 4.283856941640872e-06, "loss": 0.0473, "step": 42954 }, { "epoch": 0.7607131630561348, "grad_norm": 0.7805671691894531, "learning_rate": 4.283254938229121e-06, "loss": 0.0637, "step": 42955 }, { "epoch": 0.7607308725931632, "grad_norm": 0.5830790400505066, "learning_rate": 4.2826529700743805e-06, "loss": 0.045, "step": 42956 }, { "epoch": 0.7607485821301917, "grad_norm": 0.3474922478199005, "learning_rate": 4.282051037178623e-06, "loss": 0.0285, "step": 42957 }, { "epoch": 0.7607662916672201, "grad_norm": 1.111169457435608, "learning_rate": 4.281449139543833e-06, "loss": 0.0815, "step": 42958 }, { "epoch": 0.7607840012042485, "grad_norm": 1.0675735473632812, "learning_rate": 4.280847277171993e-06, "loss": 0.0747, "step": 42959 }, { "epoch": 0.7608017107412769, "grad_norm": 0.6589416265487671, "learning_rate": 4.28024545006508e-06, "loss": 0.0689, "step": 42960 }, { "epoch": 0.7608194202783054, "grad_norm": 0.824615478515625, "learning_rate": 4.279643658225074e-06, "loss": 0.0709, "step": 42961 }, { "epoch": 0.7608371298153338, "grad_norm": 0.7711038589477539, "learning_rate": 4.279041901653958e-06, "loss": 0.0446, "step": 42962 }, { "epoch": 0.7608548393523622, "grad_norm": 0.6459654569625854, "learning_rate": 4.278440180353713e-06, "loss": 0.0462, "step": 42963 }, { "epoch": 0.7608725488893907, "grad_norm": 0.8433460593223572, "learning_rate": 4.277838494326312e-06, "loss": 0.0634, "step": 42964 }, { "epoch": 0.7608902584264191, "grad_norm": 0.4689611494541168, "learning_rate": 4.277236843573737e-06, "loss": 0.0498, "step": 42965 }, { "epoch": 0.7609079679634475, "grad_norm": 0.495857298374176, "learning_rate": 4.27663522809797e-06, "loss": 0.0299, "step": 42966 }, { "epoch": 0.7609256775004759, "grad_norm": 0.3951447010040283, "learning_rate": 4.276033647900991e-06, "loss": 0.0302, "step": 42967 }, { "epoch": 0.7609433870375044, "grad_norm": 0.41805198788642883, "learning_rate": 4.275432102984772e-06, "loss": 0.0723, "step": 42968 }, { "epoch": 0.7609610965745328, "grad_norm": 0.3020232617855072, "learning_rate": 4.274830593351298e-06, "loss": 0.0727, "step": 42969 }, { "epoch": 0.7609788061115612, "grad_norm": 0.4743455946445465, "learning_rate": 4.274229119002541e-06, "loss": 0.0429, "step": 42970 }, { "epoch": 0.7609965156485896, "grad_norm": 0.46726831793785095, "learning_rate": 4.2736276799404964e-06, "loss": 0.0731, "step": 42971 }, { "epoch": 0.7610142251856181, "grad_norm": 0.6308366656303406, "learning_rate": 4.273026276167124e-06, "loss": 0.0748, "step": 42972 }, { "epoch": 0.7610319347226465, "grad_norm": 0.43282490968704224, "learning_rate": 4.272424907684413e-06, "loss": 0.032, "step": 42973 }, { "epoch": 0.7610496442596749, "grad_norm": 0.8642258644104004, "learning_rate": 4.2718235744943425e-06, "loss": 0.0694, "step": 42974 }, { "epoch": 0.7610673537967033, "grad_norm": 0.8556666970252991, "learning_rate": 4.2712222765988826e-06, "loss": 0.0555, "step": 42975 }, { "epoch": 0.7610850633337318, "grad_norm": 0.5003525018692017, "learning_rate": 4.2706210140000165e-06, "loss": 0.0636, "step": 42976 }, { "epoch": 0.7611027728707602, "grad_norm": 0.44402554631233215, "learning_rate": 4.270019786699721e-06, "loss": 0.047, "step": 42977 }, { "epoch": 0.7611204824077886, "grad_norm": 0.4041060507297516, "learning_rate": 4.269418594699977e-06, "loss": 0.0432, "step": 42978 }, { "epoch": 0.7611381919448171, "grad_norm": 0.5407347679138184, "learning_rate": 4.26881743800276e-06, "loss": 0.0684, "step": 42979 }, { "epoch": 0.7611559014818455, "grad_norm": 0.9720732569694519, "learning_rate": 4.268216316610047e-06, "loss": 0.0846, "step": 42980 }, { "epoch": 0.7611736110188739, "grad_norm": 0.7740333676338196, "learning_rate": 4.267615230523822e-06, "loss": 0.0814, "step": 42981 }, { "epoch": 0.7611913205559023, "grad_norm": 0.41957801580429077, "learning_rate": 4.267014179746054e-06, "loss": 0.0293, "step": 42982 }, { "epoch": 0.7612090300929308, "grad_norm": 0.4429137408733368, "learning_rate": 4.266413164278723e-06, "loss": 0.0533, "step": 42983 }, { "epoch": 0.7612267396299592, "grad_norm": 0.607332170009613, "learning_rate": 4.265812184123807e-06, "loss": 0.0528, "step": 42984 }, { "epoch": 0.7612444491669876, "grad_norm": 0.7043834924697876, "learning_rate": 4.2652112392832885e-06, "loss": 0.0566, "step": 42985 }, { "epoch": 0.761262158704016, "grad_norm": 0.5576704740524292, "learning_rate": 4.264610329759134e-06, "loss": 0.0626, "step": 42986 }, { "epoch": 0.7612798682410445, "grad_norm": 0.39803940057754517, "learning_rate": 4.264009455553325e-06, "loss": 0.0418, "step": 42987 }, { "epoch": 0.7612975777780729, "grad_norm": 0.45522311329841614, "learning_rate": 4.26340861666784e-06, "loss": 0.0707, "step": 42988 }, { "epoch": 0.7613152873151013, "grad_norm": 0.608774721622467, "learning_rate": 4.262807813104654e-06, "loss": 0.0571, "step": 42989 }, { "epoch": 0.7613329968521297, "grad_norm": 0.4164653718471527, "learning_rate": 4.262207044865746e-06, "loss": 0.0495, "step": 42990 }, { "epoch": 0.7613507063891582, "grad_norm": 0.9952237606048584, "learning_rate": 4.261606311953088e-06, "loss": 0.0959, "step": 42991 }, { "epoch": 0.7613684159261866, "grad_norm": 0.6384734511375427, "learning_rate": 4.261005614368666e-06, "loss": 0.0534, "step": 42992 }, { "epoch": 0.761386125463215, "grad_norm": 0.5150914192199707, "learning_rate": 4.2604049521144435e-06, "loss": 0.0551, "step": 42993 }, { "epoch": 0.7614038350002436, "grad_norm": 0.7728041410446167, "learning_rate": 4.259804325192403e-06, "loss": 0.0764, "step": 42994 }, { "epoch": 0.761421544537272, "grad_norm": 0.5353779792785645, "learning_rate": 4.2592037336045246e-06, "loss": 0.0861, "step": 42995 }, { "epoch": 0.7614392540743004, "grad_norm": 0.4566052258014679, "learning_rate": 4.258603177352774e-06, "loss": 0.0551, "step": 42996 }, { "epoch": 0.7614569636113288, "grad_norm": 0.4978971481323242, "learning_rate": 4.258002656439132e-06, "loss": 0.0419, "step": 42997 }, { "epoch": 0.7614746731483573, "grad_norm": 0.3780379593372345, "learning_rate": 4.257402170865577e-06, "loss": 0.0412, "step": 42998 }, { "epoch": 0.7614923826853857, "grad_norm": 0.7778217792510986, "learning_rate": 4.256801720634081e-06, "loss": 0.0591, "step": 42999 }, { "epoch": 0.7615100922224141, "grad_norm": 0.3532728850841522, "learning_rate": 4.256201305746621e-06, "loss": 0.0478, "step": 43000 }, { "epoch": 0.7615278017594425, "grad_norm": 0.7362257838249207, "learning_rate": 4.255600926205172e-06, "loss": 0.048, "step": 43001 }, { "epoch": 0.761545511296471, "grad_norm": 0.5089755058288574, "learning_rate": 4.25500058201171e-06, "loss": 0.0653, "step": 43002 }, { "epoch": 0.7615632208334994, "grad_norm": 0.7560149431228638, "learning_rate": 4.254400273168214e-06, "loss": 0.0442, "step": 43003 }, { "epoch": 0.7615809303705278, "grad_norm": 0.8260992765426636, "learning_rate": 4.253799999676648e-06, "loss": 0.0528, "step": 43004 }, { "epoch": 0.7615986399075562, "grad_norm": 0.46176522970199585, "learning_rate": 4.253199761538995e-06, "loss": 0.0356, "step": 43005 }, { "epoch": 0.7616163494445847, "grad_norm": 0.5099864602088928, "learning_rate": 4.252599558757231e-06, "loss": 0.0586, "step": 43006 }, { "epoch": 0.7616340589816131, "grad_norm": 0.8136118650436401, "learning_rate": 4.2519993913333195e-06, "loss": 0.0705, "step": 43007 }, { "epoch": 0.7616517685186415, "grad_norm": 1.0463372468948364, "learning_rate": 4.251399259269246e-06, "loss": 0.0501, "step": 43008 }, { "epoch": 0.76166947805567, "grad_norm": 0.7543578743934631, "learning_rate": 4.250799162566982e-06, "loss": 0.0763, "step": 43009 }, { "epoch": 0.7616871875926984, "grad_norm": 0.2822032868862152, "learning_rate": 4.250199101228507e-06, "loss": 0.0266, "step": 43010 }, { "epoch": 0.7617048971297268, "grad_norm": 0.4783400595188141, "learning_rate": 4.2495990752557845e-06, "loss": 0.0659, "step": 43011 }, { "epoch": 0.7617226066667552, "grad_norm": 0.5158334374427795, "learning_rate": 4.248999084650794e-06, "loss": 0.0893, "step": 43012 }, { "epoch": 0.7617403162037837, "grad_norm": 0.8915876150131226, "learning_rate": 4.248399129415513e-06, "loss": 0.078, "step": 43013 }, { "epoch": 0.7617580257408121, "grad_norm": 0.5040686130523682, "learning_rate": 4.2477992095519066e-06, "loss": 0.0461, "step": 43014 }, { "epoch": 0.7617757352778405, "grad_norm": 0.7486248016357422, "learning_rate": 4.247199325061954e-06, "loss": 0.0559, "step": 43015 }, { "epoch": 0.7617934448148689, "grad_norm": 0.6054049730300903, "learning_rate": 4.246599475947629e-06, "loss": 0.07, "step": 43016 }, { "epoch": 0.7618111543518974, "grad_norm": 0.7253165245056152, "learning_rate": 4.245999662210904e-06, "loss": 0.068, "step": 43017 }, { "epoch": 0.7618288638889258, "grad_norm": 0.9936815500259399, "learning_rate": 4.2453998838537505e-06, "loss": 0.0745, "step": 43018 }, { "epoch": 0.7618465734259542, "grad_norm": 0.36865419149398804, "learning_rate": 4.244800140878145e-06, "loss": 0.0571, "step": 43019 }, { "epoch": 0.7618642829629826, "grad_norm": 1.0661929845809937, "learning_rate": 4.244200433286064e-06, "loss": 0.0849, "step": 43020 }, { "epoch": 0.7618819925000111, "grad_norm": 0.650890052318573, "learning_rate": 4.2436007610794695e-06, "loss": 0.0593, "step": 43021 }, { "epoch": 0.7618997020370395, "grad_norm": 0.5937100648880005, "learning_rate": 4.2430011242603425e-06, "loss": 0.0642, "step": 43022 }, { "epoch": 0.7619174115740679, "grad_norm": 0.7401324510574341, "learning_rate": 4.242401522830652e-06, "loss": 0.0518, "step": 43023 }, { "epoch": 0.7619351211110964, "grad_norm": 0.8049516081809998, "learning_rate": 4.241801956792378e-06, "loss": 0.0727, "step": 43024 }, { "epoch": 0.7619528306481248, "grad_norm": 0.5584069490432739, "learning_rate": 4.241202426147484e-06, "loss": 0.0447, "step": 43025 }, { "epoch": 0.7619705401851532, "grad_norm": 0.6965872645378113, "learning_rate": 4.240602930897945e-06, "loss": 0.0818, "step": 43026 }, { "epoch": 0.7619882497221816, "grad_norm": 0.519202470779419, "learning_rate": 4.240003471045732e-06, "loss": 0.0406, "step": 43027 }, { "epoch": 0.7620059592592101, "grad_norm": 0.6736498475074768, "learning_rate": 4.2394040465928215e-06, "loss": 0.0589, "step": 43028 }, { "epoch": 0.7620236687962385, "grad_norm": 0.7327094078063965, "learning_rate": 4.238804657541182e-06, "loss": 0.0545, "step": 43029 }, { "epoch": 0.7620413783332669, "grad_norm": 0.6084324717521667, "learning_rate": 4.238205303892788e-06, "loss": 0.0567, "step": 43030 }, { "epoch": 0.7620590878702953, "grad_norm": 0.6686730980873108, "learning_rate": 4.237605985649615e-06, "loss": 0.0414, "step": 43031 }, { "epoch": 0.7620767974073238, "grad_norm": 0.33722615242004395, "learning_rate": 4.237006702813624e-06, "loss": 0.0659, "step": 43032 }, { "epoch": 0.7620945069443522, "grad_norm": 0.4751834273338318, "learning_rate": 4.236407455386792e-06, "loss": 0.0351, "step": 43033 }, { "epoch": 0.7621122164813806, "grad_norm": 0.7084497213363647, "learning_rate": 4.2358082433710924e-06, "loss": 0.0588, "step": 43034 }, { "epoch": 0.762129926018409, "grad_norm": 0.45355024933815, "learning_rate": 4.235209066768499e-06, "loss": 0.0631, "step": 43035 }, { "epoch": 0.7621476355554375, "grad_norm": 0.8118560314178467, "learning_rate": 4.234609925580969e-06, "loss": 0.0848, "step": 43036 }, { "epoch": 0.7621653450924659, "grad_norm": 0.6425299644470215, "learning_rate": 4.234010819810488e-06, "loss": 0.0873, "step": 43037 }, { "epoch": 0.7621830546294943, "grad_norm": 0.5769362449645996, "learning_rate": 4.2334117494590285e-06, "loss": 0.035, "step": 43038 }, { "epoch": 0.7622007641665228, "grad_norm": 0.5220032930374146, "learning_rate": 4.232812714528549e-06, "loss": 0.0398, "step": 43039 }, { "epoch": 0.7622184737035512, "grad_norm": 0.3763907253742218, "learning_rate": 4.232213715021027e-06, "loss": 0.029, "step": 43040 }, { "epoch": 0.7622361832405796, "grad_norm": 0.5566866397857666, "learning_rate": 4.2316147509384335e-06, "loss": 0.0532, "step": 43041 }, { "epoch": 0.762253892777608, "grad_norm": 0.3734617829322815, "learning_rate": 4.231015822282743e-06, "loss": 0.06, "step": 43042 }, { "epoch": 0.7622716023146365, "grad_norm": 0.45434337854385376, "learning_rate": 4.230416929055917e-06, "loss": 0.0588, "step": 43043 }, { "epoch": 0.7622893118516649, "grad_norm": 0.6111745834350586, "learning_rate": 4.22981807125993e-06, "loss": 0.0672, "step": 43044 }, { "epoch": 0.7623070213886933, "grad_norm": 0.7127094864845276, "learning_rate": 4.22921924889675e-06, "loss": 0.0718, "step": 43045 }, { "epoch": 0.7623247309257217, "grad_norm": 0.5010905265808105, "learning_rate": 4.228620461968351e-06, "loss": 0.0528, "step": 43046 }, { "epoch": 0.7623424404627502, "grad_norm": 0.625485897064209, "learning_rate": 4.2280217104767e-06, "loss": 0.0596, "step": 43047 }, { "epoch": 0.7623601499997786, "grad_norm": 0.8095996379852295, "learning_rate": 4.22742299442377e-06, "loss": 0.0576, "step": 43048 }, { "epoch": 0.762377859536807, "grad_norm": 0.8567840456962585, "learning_rate": 4.226824313811533e-06, "loss": 0.0819, "step": 43049 }, { "epoch": 0.7623955690738354, "grad_norm": 0.40162932872772217, "learning_rate": 4.226225668641949e-06, "loss": 0.0525, "step": 43050 }, { "epoch": 0.7624132786108639, "grad_norm": 0.22151358425617218, "learning_rate": 4.225627058916994e-06, "loss": 0.0486, "step": 43051 }, { "epoch": 0.7624309881478923, "grad_norm": 0.4645133912563324, "learning_rate": 4.225028484638639e-06, "loss": 0.0685, "step": 43052 }, { "epoch": 0.7624486976849207, "grad_norm": 0.3910279870033264, "learning_rate": 4.224429945808846e-06, "loss": 0.0536, "step": 43053 }, { "epoch": 0.7624664072219492, "grad_norm": 0.37009379267692566, "learning_rate": 4.22383144242959e-06, "loss": 0.0713, "step": 43054 }, { "epoch": 0.7624841167589776, "grad_norm": 0.5525631904602051, "learning_rate": 4.223232974502838e-06, "loss": 0.0501, "step": 43055 }, { "epoch": 0.762501826296006, "grad_norm": 0.7143574357032776, "learning_rate": 4.2226345420305594e-06, "loss": 0.0651, "step": 43056 }, { "epoch": 0.7625195358330344, "grad_norm": 0.3998968005180359, "learning_rate": 4.222036145014723e-06, "loss": 0.0744, "step": 43057 }, { "epoch": 0.762537245370063, "grad_norm": 0.2942057251930237, "learning_rate": 4.221437783457299e-06, "loss": 0.0686, "step": 43058 }, { "epoch": 0.7625549549070914, "grad_norm": 0.6472588777542114, "learning_rate": 4.220839457360257e-06, "loss": 0.0686, "step": 43059 }, { "epoch": 0.7625726644441198, "grad_norm": 0.6332364082336426, "learning_rate": 4.220241166725559e-06, "loss": 0.0453, "step": 43060 }, { "epoch": 0.7625903739811482, "grad_norm": 0.6997660398483276, "learning_rate": 4.219642911555178e-06, "loss": 0.0548, "step": 43061 }, { "epoch": 0.7626080835181767, "grad_norm": 0.5551309585571289, "learning_rate": 4.21904469185108e-06, "loss": 0.0629, "step": 43062 }, { "epoch": 0.7626257930552051, "grad_norm": 0.8100869059562683, "learning_rate": 4.21844650761524e-06, "loss": 0.052, "step": 43063 }, { "epoch": 0.7626435025922335, "grad_norm": 0.8860939145088196, "learning_rate": 4.217848358849617e-06, "loss": 0.08, "step": 43064 }, { "epoch": 0.7626612121292619, "grad_norm": 0.5737078785896301, "learning_rate": 4.2172502455561755e-06, "loss": 0.0636, "step": 43065 }, { "epoch": 0.7626789216662904, "grad_norm": 0.6261157989501953, "learning_rate": 4.216652167736901e-06, "loss": 0.0587, "step": 43066 }, { "epoch": 0.7626966312033188, "grad_norm": 0.7440879940986633, "learning_rate": 4.2160541253937455e-06, "loss": 0.0926, "step": 43067 }, { "epoch": 0.7627143407403472, "grad_norm": 0.6687349677085876, "learning_rate": 4.215456118528681e-06, "loss": 0.0402, "step": 43068 }, { "epoch": 0.7627320502773757, "grad_norm": 0.46737733483314514, "learning_rate": 4.2148581471436746e-06, "loss": 0.0603, "step": 43069 }, { "epoch": 0.7627497598144041, "grad_norm": 0.9070038199424744, "learning_rate": 4.214260211240698e-06, "loss": 0.0816, "step": 43070 }, { "epoch": 0.7627674693514325, "grad_norm": 0.588529109954834, "learning_rate": 4.213662310821711e-06, "loss": 0.0425, "step": 43071 }, { "epoch": 0.7627851788884609, "grad_norm": 0.3695161044597626, "learning_rate": 4.213064445888685e-06, "loss": 0.0619, "step": 43072 }, { "epoch": 0.7628028884254894, "grad_norm": 0.6604419350624084, "learning_rate": 4.2124666164435835e-06, "loss": 0.0466, "step": 43073 }, { "epoch": 0.7628205979625178, "grad_norm": 0.3673906922340393, "learning_rate": 4.211868822488376e-06, "loss": 0.0336, "step": 43074 }, { "epoch": 0.7628383074995462, "grad_norm": 0.7036384344100952, "learning_rate": 4.2112710640250305e-06, "loss": 0.0625, "step": 43075 }, { "epoch": 0.7628560170365746, "grad_norm": 0.5520308017730713, "learning_rate": 4.210673341055512e-06, "loss": 0.0528, "step": 43076 }, { "epoch": 0.7628737265736031, "grad_norm": 0.44209766387939453, "learning_rate": 4.21007565358179e-06, "loss": 0.0712, "step": 43077 }, { "epoch": 0.7628914361106315, "grad_norm": 1.0191473960876465, "learning_rate": 4.2094780016058245e-06, "loss": 0.068, "step": 43078 }, { "epoch": 0.7629091456476599, "grad_norm": 0.6331608295440674, "learning_rate": 4.208880385129584e-06, "loss": 0.0865, "step": 43079 }, { "epoch": 0.7629268551846883, "grad_norm": 0.5964859127998352, "learning_rate": 4.208282804155036e-06, "loss": 0.0428, "step": 43080 }, { "epoch": 0.7629445647217168, "grad_norm": 0.5795785784721375, "learning_rate": 4.207685258684151e-06, "loss": 0.071, "step": 43081 }, { "epoch": 0.7629622742587452, "grad_norm": 0.4716233015060425, "learning_rate": 4.2070877487188844e-06, "loss": 0.0544, "step": 43082 }, { "epoch": 0.7629799837957736, "grad_norm": 0.3386577367782593, "learning_rate": 4.206490274261208e-06, "loss": 0.0632, "step": 43083 }, { "epoch": 0.7629976933328021, "grad_norm": 0.5234259963035583, "learning_rate": 4.205892835313087e-06, "loss": 0.0429, "step": 43084 }, { "epoch": 0.7630154028698305, "grad_norm": 0.867455244064331, "learning_rate": 4.205295431876487e-06, "loss": 0.0721, "step": 43085 }, { "epoch": 0.7630331124068589, "grad_norm": 0.3265190124511719, "learning_rate": 4.204698063953374e-06, "loss": 0.0472, "step": 43086 }, { "epoch": 0.7630508219438873, "grad_norm": 0.6642821431159973, "learning_rate": 4.2041007315457115e-06, "loss": 0.0702, "step": 43087 }, { "epoch": 0.7630685314809158, "grad_norm": 0.5531320571899414, "learning_rate": 4.2035034346554706e-06, "loss": 0.0664, "step": 43088 }, { "epoch": 0.7630862410179442, "grad_norm": 0.6780396699905396, "learning_rate": 4.202906173284605e-06, "loss": 0.0802, "step": 43089 }, { "epoch": 0.7631039505549726, "grad_norm": 0.6308577656745911, "learning_rate": 4.2023089474350886e-06, "loss": 0.0575, "step": 43090 }, { "epoch": 0.763121660092001, "grad_norm": 0.46005597710609436, "learning_rate": 4.201711757108886e-06, "loss": 0.0588, "step": 43091 }, { "epoch": 0.7631393696290295, "grad_norm": 0.4587348401546478, "learning_rate": 4.2011146023079555e-06, "loss": 0.045, "step": 43092 }, { "epoch": 0.7631570791660579, "grad_norm": 0.6446107029914856, "learning_rate": 4.200517483034267e-06, "loss": 0.0554, "step": 43093 }, { "epoch": 0.7631747887030863, "grad_norm": 0.7353983521461487, "learning_rate": 4.199920399289779e-06, "loss": 0.0652, "step": 43094 }, { "epoch": 0.7631924982401147, "grad_norm": 0.5181416869163513, "learning_rate": 4.199323351076469e-06, "loss": 0.0457, "step": 43095 }, { "epoch": 0.7632102077771432, "grad_norm": 0.5468754768371582, "learning_rate": 4.1987263383962875e-06, "loss": 0.0546, "step": 43096 }, { "epoch": 0.7632279173141716, "grad_norm": 0.5693306922912598, "learning_rate": 4.198129361251205e-06, "loss": 0.052, "step": 43097 }, { "epoch": 0.7632456268512, "grad_norm": 0.697532594203949, "learning_rate": 4.197532419643189e-06, "loss": 0.0469, "step": 43098 }, { "epoch": 0.7632633363882285, "grad_norm": 0.29869216680526733, "learning_rate": 4.1969355135741934e-06, "loss": 0.0468, "step": 43099 }, { "epoch": 0.7632810459252569, "grad_norm": 0.33677834272384644, "learning_rate": 4.1963386430461875e-06, "loss": 0.0626, "step": 43100 }, { "epoch": 0.7632987554622853, "grad_norm": 0.5719967484474182, "learning_rate": 4.195741808061134e-06, "loss": 0.0384, "step": 43101 }, { "epoch": 0.7633164649993137, "grad_norm": 1.0088680982589722, "learning_rate": 4.1951450086209985e-06, "loss": 0.1024, "step": 43102 }, { "epoch": 0.7633341745363422, "grad_norm": 0.49888598918914795, "learning_rate": 4.194548244727742e-06, "loss": 0.0395, "step": 43103 }, { "epoch": 0.7633518840733706, "grad_norm": 0.47155189514160156, "learning_rate": 4.19395151638333e-06, "loss": 0.0643, "step": 43104 }, { "epoch": 0.763369593610399, "grad_norm": 0.6131759881973267, "learning_rate": 4.193354823589724e-06, "loss": 0.0589, "step": 43105 }, { "epoch": 0.7633873031474274, "grad_norm": 0.4873136579990387, "learning_rate": 4.192758166348892e-06, "loss": 0.0488, "step": 43106 }, { "epoch": 0.7634050126844559, "grad_norm": 0.21534352004528046, "learning_rate": 4.192161544662788e-06, "loss": 0.0347, "step": 43107 }, { "epoch": 0.7634227222214843, "grad_norm": 0.44039684534072876, "learning_rate": 4.19156495853338e-06, "loss": 0.0634, "step": 43108 }, { "epoch": 0.7634404317585127, "grad_norm": 0.7048159241676331, "learning_rate": 4.190968407962634e-06, "loss": 0.0563, "step": 43109 }, { "epoch": 0.7634581412955411, "grad_norm": 0.8682113885879517, "learning_rate": 4.1903718929525045e-06, "loss": 0.0663, "step": 43110 }, { "epoch": 0.7634758508325696, "grad_norm": 0.6144370436668396, "learning_rate": 4.189775413504957e-06, "loss": 0.0302, "step": 43111 }, { "epoch": 0.763493560369598, "grad_norm": 0.6026092767715454, "learning_rate": 4.189178969621956e-06, "loss": 0.0444, "step": 43112 }, { "epoch": 0.7635112699066264, "grad_norm": 0.6124642491340637, "learning_rate": 4.188582561305463e-06, "loss": 0.0666, "step": 43113 }, { "epoch": 0.7635289794436549, "grad_norm": 0.35540032386779785, "learning_rate": 4.187986188557439e-06, "loss": 0.0713, "step": 43114 }, { "epoch": 0.7635466889806833, "grad_norm": 0.4597024917602539, "learning_rate": 4.187389851379847e-06, "loss": 0.042, "step": 43115 }, { "epoch": 0.7635643985177117, "grad_norm": 0.480910986661911, "learning_rate": 4.186793549774655e-06, "loss": 0.0554, "step": 43116 }, { "epoch": 0.7635821080547401, "grad_norm": 0.47665300965309143, "learning_rate": 4.186197283743812e-06, "loss": 0.049, "step": 43117 }, { "epoch": 0.7635998175917686, "grad_norm": 0.845701277256012, "learning_rate": 4.185601053289287e-06, "loss": 0.0489, "step": 43118 }, { "epoch": 0.763617527128797, "grad_norm": 0.8795597553253174, "learning_rate": 4.185004858413041e-06, "loss": 0.0588, "step": 43119 }, { "epoch": 0.7636352366658254, "grad_norm": 0.49569839239120483, "learning_rate": 4.184408699117038e-06, "loss": 0.0501, "step": 43120 }, { "epoch": 0.7636529462028538, "grad_norm": 0.5312584042549133, "learning_rate": 4.183812575403234e-06, "loss": 0.0579, "step": 43121 }, { "epoch": 0.7636706557398824, "grad_norm": 0.80010586977005, "learning_rate": 4.183216487273592e-06, "loss": 0.049, "step": 43122 }, { "epoch": 0.7636883652769108, "grad_norm": 0.3662216365337372, "learning_rate": 4.182620434730073e-06, "loss": 0.0625, "step": 43123 }, { "epoch": 0.7637060748139392, "grad_norm": 0.5294747352600098, "learning_rate": 4.1820244177746395e-06, "loss": 0.0473, "step": 43124 }, { "epoch": 0.7637237843509675, "grad_norm": 0.5918378233909607, "learning_rate": 4.181428436409251e-06, "loss": 0.0709, "step": 43125 }, { "epoch": 0.7637414938879961, "grad_norm": 0.763298511505127, "learning_rate": 4.180832490635868e-06, "loss": 0.0418, "step": 43126 }, { "epoch": 0.7637592034250245, "grad_norm": 0.5648770332336426, "learning_rate": 4.180236580456458e-06, "loss": 0.0674, "step": 43127 }, { "epoch": 0.7637769129620529, "grad_norm": 0.8083810806274414, "learning_rate": 4.179640705872971e-06, "loss": 0.0509, "step": 43128 }, { "epoch": 0.7637946224990814, "grad_norm": 0.8047769069671631, "learning_rate": 4.1790448668873696e-06, "loss": 0.0598, "step": 43129 }, { "epoch": 0.7638123320361098, "grad_norm": 0.67896568775177, "learning_rate": 4.178449063501622e-06, "loss": 0.0639, "step": 43130 }, { "epoch": 0.7638300415731382, "grad_norm": 0.8540970683097839, "learning_rate": 4.177853295717672e-06, "loss": 0.0679, "step": 43131 }, { "epoch": 0.7638477511101666, "grad_norm": 0.5636657476425171, "learning_rate": 4.177257563537496e-06, "loss": 0.0734, "step": 43132 }, { "epoch": 0.7638654606471951, "grad_norm": 0.4986647963523865, "learning_rate": 4.176661866963048e-06, "loss": 0.0749, "step": 43133 }, { "epoch": 0.7638831701842235, "grad_norm": 0.6138165593147278, "learning_rate": 4.176066205996292e-06, "loss": 0.0558, "step": 43134 }, { "epoch": 0.7639008797212519, "grad_norm": 0.6105108261108398, "learning_rate": 4.175470580639179e-06, "loss": 0.0811, "step": 43135 }, { "epoch": 0.7639185892582803, "grad_norm": 0.7451009750366211, "learning_rate": 4.174874990893672e-06, "loss": 0.0461, "step": 43136 }, { "epoch": 0.7639362987953088, "grad_norm": 0.5941817760467529, "learning_rate": 4.174279436761736e-06, "loss": 0.0437, "step": 43137 }, { "epoch": 0.7639540083323372, "grad_norm": 0.423483282327652, "learning_rate": 4.1736839182453235e-06, "loss": 0.0594, "step": 43138 }, { "epoch": 0.7639717178693656, "grad_norm": 0.8222246766090393, "learning_rate": 4.1730884353463945e-06, "loss": 0.0609, "step": 43139 }, { "epoch": 0.7639894274063941, "grad_norm": 0.4089050889015198, "learning_rate": 4.172492988066908e-06, "loss": 0.053, "step": 43140 }, { "epoch": 0.7640071369434225, "grad_norm": 0.4489698112010956, "learning_rate": 4.171897576408825e-06, "loss": 0.0504, "step": 43141 }, { "epoch": 0.7640248464804509, "grad_norm": 0.35187238454818726, "learning_rate": 4.171302200374105e-06, "loss": 0.0492, "step": 43142 }, { "epoch": 0.7640425560174793, "grad_norm": 0.5315895080566406, "learning_rate": 4.170706859964705e-06, "loss": 0.0451, "step": 43143 }, { "epoch": 0.7640602655545078, "grad_norm": 0.6955558657646179, "learning_rate": 4.170111555182584e-06, "loss": 0.0403, "step": 43144 }, { "epoch": 0.7640779750915362, "grad_norm": 0.6661766767501831, "learning_rate": 4.169516286029705e-06, "loss": 0.0356, "step": 43145 }, { "epoch": 0.7640956846285646, "grad_norm": 0.6771790981292725, "learning_rate": 4.168921052508019e-06, "loss": 0.0548, "step": 43146 }, { "epoch": 0.764113394165593, "grad_norm": 0.5601270198822021, "learning_rate": 4.168325854619485e-06, "loss": 0.069, "step": 43147 }, { "epoch": 0.7641311037026215, "grad_norm": 0.33808013796806335, "learning_rate": 4.167730692366069e-06, "loss": 0.0718, "step": 43148 }, { "epoch": 0.7641488132396499, "grad_norm": 0.6168473958969116, "learning_rate": 4.167135565749719e-06, "loss": 0.0727, "step": 43149 }, { "epoch": 0.7641665227766783, "grad_norm": 0.5760831832885742, "learning_rate": 4.1665404747723965e-06, "loss": 0.0528, "step": 43150 }, { "epoch": 0.7641842323137067, "grad_norm": 0.7549587488174438, "learning_rate": 4.165945419436059e-06, "loss": 0.0497, "step": 43151 }, { "epoch": 0.7642019418507352, "grad_norm": 0.7987920641899109, "learning_rate": 4.165350399742668e-06, "loss": 0.0723, "step": 43152 }, { "epoch": 0.7642196513877636, "grad_norm": 0.4308241605758667, "learning_rate": 4.1647554156941775e-06, "loss": 0.0635, "step": 43153 }, { "epoch": 0.764237360924792, "grad_norm": 0.5096395015716553, "learning_rate": 4.1641604672925446e-06, "loss": 0.0754, "step": 43154 }, { "epoch": 0.7642550704618205, "grad_norm": 0.8726574778556824, "learning_rate": 4.163565554539732e-06, "loss": 0.0895, "step": 43155 }, { "epoch": 0.7642727799988489, "grad_norm": 0.6420302987098694, "learning_rate": 4.16297067743769e-06, "loss": 0.0647, "step": 43156 }, { "epoch": 0.7642904895358773, "grad_norm": 0.428987979888916, "learning_rate": 4.162375835988377e-06, "loss": 0.0399, "step": 43157 }, { "epoch": 0.7643081990729057, "grad_norm": 0.4827185571193695, "learning_rate": 4.1617810301937526e-06, "loss": 0.0527, "step": 43158 }, { "epoch": 0.7643259086099342, "grad_norm": 0.6132459044456482, "learning_rate": 4.161186260055777e-06, "loss": 0.0691, "step": 43159 }, { "epoch": 0.7643436181469626, "grad_norm": 0.5435200929641724, "learning_rate": 4.1605915255763925e-06, "loss": 0.047, "step": 43160 }, { "epoch": 0.764361327683991, "grad_norm": 0.44814860820770264, "learning_rate": 4.1599968267575694e-06, "loss": 0.0381, "step": 43161 }, { "epoch": 0.7643790372210194, "grad_norm": 0.5434426665306091, "learning_rate": 4.159402163601266e-06, "loss": 0.0466, "step": 43162 }, { "epoch": 0.7643967467580479, "grad_norm": 0.5615690350532532, "learning_rate": 4.158807536109428e-06, "loss": 0.0635, "step": 43163 }, { "epoch": 0.7644144562950763, "grad_norm": 1.166136384010315, "learning_rate": 4.158212944284016e-06, "loss": 0.0693, "step": 43164 }, { "epoch": 0.7644321658321047, "grad_norm": 1.22061026096344, "learning_rate": 4.157618388126989e-06, "loss": 0.0687, "step": 43165 }, { "epoch": 0.7644498753691331, "grad_norm": 0.3798925280570984, "learning_rate": 4.157023867640302e-06, "loss": 0.0329, "step": 43166 }, { "epoch": 0.7644675849061616, "grad_norm": 0.4943482279777527, "learning_rate": 4.1564293828259086e-06, "loss": 0.0455, "step": 43167 }, { "epoch": 0.76448529444319, "grad_norm": 1.0597866773605347, "learning_rate": 4.155834933685765e-06, "loss": 0.0809, "step": 43168 }, { "epoch": 0.7645030039802184, "grad_norm": 0.8842816352844238, "learning_rate": 4.155240520221826e-06, "loss": 0.0872, "step": 43169 }, { "epoch": 0.7645207135172469, "grad_norm": 0.5487305521965027, "learning_rate": 4.15464614243605e-06, "loss": 0.0418, "step": 43170 }, { "epoch": 0.7645384230542753, "grad_norm": 0.3281259536743164, "learning_rate": 4.1540518003303905e-06, "loss": 0.0444, "step": 43171 }, { "epoch": 0.7645561325913037, "grad_norm": 0.49845659732818604, "learning_rate": 4.1534574939068055e-06, "loss": 0.0547, "step": 43172 }, { "epoch": 0.7645738421283321, "grad_norm": 0.5981904864311218, "learning_rate": 4.152863223167251e-06, "loss": 0.0574, "step": 43173 }, { "epoch": 0.7645915516653606, "grad_norm": 0.5546967387199402, "learning_rate": 4.152268988113676e-06, "loss": 0.0505, "step": 43174 }, { "epoch": 0.764609261202389, "grad_norm": 0.9021232724189758, "learning_rate": 4.151674788748038e-06, "loss": 0.0467, "step": 43175 }, { "epoch": 0.7646269707394174, "grad_norm": 0.6077787280082703, "learning_rate": 4.151080625072293e-06, "loss": 0.0491, "step": 43176 }, { "epoch": 0.7646446802764458, "grad_norm": 0.9560794830322266, "learning_rate": 4.150486497088399e-06, "loss": 0.0605, "step": 43177 }, { "epoch": 0.7646623898134743, "grad_norm": 0.48050183057785034, "learning_rate": 4.149892404798303e-06, "loss": 0.0793, "step": 43178 }, { "epoch": 0.7646800993505027, "grad_norm": 0.7618619203567505, "learning_rate": 4.1492983482039644e-06, "loss": 0.0646, "step": 43179 }, { "epoch": 0.7646978088875311, "grad_norm": 0.5489815473556519, "learning_rate": 4.1487043273073365e-06, "loss": 0.0812, "step": 43180 }, { "epoch": 0.7647155184245595, "grad_norm": 0.4020877778530121, "learning_rate": 4.148110342110373e-06, "loss": 0.083, "step": 43181 }, { "epoch": 0.764733227961588, "grad_norm": 0.6735071539878845, "learning_rate": 4.14751639261503e-06, "loss": 0.0599, "step": 43182 }, { "epoch": 0.7647509374986164, "grad_norm": 0.6718655228614807, "learning_rate": 4.14692247882326e-06, "loss": 0.0428, "step": 43183 }, { "epoch": 0.7647686470356448, "grad_norm": 0.3316274881362915, "learning_rate": 4.146328600737022e-06, "loss": 0.077, "step": 43184 }, { "epoch": 0.7647863565726734, "grad_norm": 0.7640084028244019, "learning_rate": 4.145734758358259e-06, "loss": 0.0561, "step": 43185 }, { "epoch": 0.7648040661097018, "grad_norm": 1.0139687061309814, "learning_rate": 4.145140951688934e-06, "loss": 0.0716, "step": 43186 }, { "epoch": 0.7648217756467302, "grad_norm": 0.6500681638717651, "learning_rate": 4.144547180730999e-06, "loss": 0.0639, "step": 43187 }, { "epoch": 0.7648394851837585, "grad_norm": 0.7748973965644836, "learning_rate": 4.143953445486403e-06, "loss": 0.0487, "step": 43188 }, { "epoch": 0.7648571947207871, "grad_norm": 0.6079668402671814, "learning_rate": 4.143359745957097e-06, "loss": 0.0596, "step": 43189 }, { "epoch": 0.7648749042578155, "grad_norm": 0.31521695852279663, "learning_rate": 4.1427660821450425e-06, "loss": 0.061, "step": 43190 }, { "epoch": 0.7648926137948439, "grad_norm": 0.442294180393219, "learning_rate": 4.142172454052195e-06, "loss": 0.0576, "step": 43191 }, { "epoch": 0.7649103233318723, "grad_norm": 0.8682296872138977, "learning_rate": 4.1415788616804975e-06, "loss": 0.0506, "step": 43192 }, { "epoch": 0.7649280328689008, "grad_norm": 0.46577781438827515, "learning_rate": 4.140985305031908e-06, "loss": 0.0417, "step": 43193 }, { "epoch": 0.7649457424059292, "grad_norm": 0.598565399646759, "learning_rate": 4.140391784108383e-06, "loss": 0.0638, "step": 43194 }, { "epoch": 0.7649634519429576, "grad_norm": 0.6173518300056458, "learning_rate": 4.139798298911866e-06, "loss": 0.0655, "step": 43195 }, { "epoch": 0.764981161479986, "grad_norm": 0.960818350315094, "learning_rate": 4.139204849444313e-06, "loss": 0.0411, "step": 43196 }, { "epoch": 0.7649988710170145, "grad_norm": 0.5831305980682373, "learning_rate": 4.138611435707679e-06, "loss": 0.0467, "step": 43197 }, { "epoch": 0.7650165805540429, "grad_norm": 0.6036735773086548, "learning_rate": 4.138018057703915e-06, "loss": 0.0413, "step": 43198 }, { "epoch": 0.7650342900910713, "grad_norm": 0.5179166793823242, "learning_rate": 4.1374247154349704e-06, "loss": 0.0377, "step": 43199 }, { "epoch": 0.7650519996280998, "grad_norm": 0.21939097344875336, "learning_rate": 4.136831408902801e-06, "loss": 0.0466, "step": 43200 }, { "epoch": 0.7650697091651282, "grad_norm": 0.7107928395271301, "learning_rate": 4.136238138109364e-06, "loss": 0.0681, "step": 43201 }, { "epoch": 0.7650874187021566, "grad_norm": 0.5010126829147339, "learning_rate": 4.135644903056598e-06, "loss": 0.0577, "step": 43202 }, { "epoch": 0.765105128239185, "grad_norm": 0.5892524719238281, "learning_rate": 4.135051703746462e-06, "loss": 0.0888, "step": 43203 }, { "epoch": 0.7651228377762135, "grad_norm": 0.39987367391586304, "learning_rate": 4.134458540180908e-06, "loss": 0.0444, "step": 43204 }, { "epoch": 0.7651405473132419, "grad_norm": 0.46350952982902527, "learning_rate": 4.13386541236189e-06, "loss": 0.0525, "step": 43205 }, { "epoch": 0.7651582568502703, "grad_norm": 0.7309032678604126, "learning_rate": 4.133272320291352e-06, "loss": 0.062, "step": 43206 }, { "epoch": 0.7651759663872987, "grad_norm": 0.5957688689231873, "learning_rate": 4.132679263971247e-06, "loss": 0.0747, "step": 43207 }, { "epoch": 0.7651936759243272, "grad_norm": 0.21811921894550323, "learning_rate": 4.13208624340353e-06, "loss": 0.0947, "step": 43208 }, { "epoch": 0.7652113854613556, "grad_norm": 0.29099681973457336, "learning_rate": 4.131493258590149e-06, "loss": 0.0374, "step": 43209 }, { "epoch": 0.765229094998384, "grad_norm": 0.591292142868042, "learning_rate": 4.130900309533056e-06, "loss": 0.0688, "step": 43210 }, { "epoch": 0.7652468045354124, "grad_norm": 0.4523650109767914, "learning_rate": 4.130307396234202e-06, "loss": 0.0506, "step": 43211 }, { "epoch": 0.7652645140724409, "grad_norm": 0.6697912216186523, "learning_rate": 4.129714518695542e-06, "loss": 0.0814, "step": 43212 }, { "epoch": 0.7652822236094693, "grad_norm": 0.5007134675979614, "learning_rate": 4.1291216769190175e-06, "loss": 0.0553, "step": 43213 }, { "epoch": 0.7652999331464977, "grad_norm": 0.5443173050880432, "learning_rate": 4.128528870906582e-06, "loss": 0.048, "step": 43214 }, { "epoch": 0.7653176426835262, "grad_norm": 0.6683709621429443, "learning_rate": 4.127936100660188e-06, "loss": 0.067, "step": 43215 }, { "epoch": 0.7653353522205546, "grad_norm": 0.5830210447311401, "learning_rate": 4.127343366181788e-06, "loss": 0.0606, "step": 43216 }, { "epoch": 0.765353061757583, "grad_norm": 0.5068748593330383, "learning_rate": 4.126750667473325e-06, "loss": 0.0497, "step": 43217 }, { "epoch": 0.7653707712946114, "grad_norm": 0.9041498303413391, "learning_rate": 4.126158004536748e-06, "loss": 0.0573, "step": 43218 }, { "epoch": 0.7653884808316399, "grad_norm": 0.6348609328269958, "learning_rate": 4.12556537737402e-06, "loss": 0.0663, "step": 43219 }, { "epoch": 0.7654061903686683, "grad_norm": 0.3168051540851593, "learning_rate": 4.124972785987077e-06, "loss": 0.0299, "step": 43220 }, { "epoch": 0.7654238999056967, "grad_norm": 0.5645042061805725, "learning_rate": 4.124380230377874e-06, "loss": 0.0374, "step": 43221 }, { "epoch": 0.7654416094427251, "grad_norm": 0.5068108439445496, "learning_rate": 4.123787710548361e-06, "loss": 0.0577, "step": 43222 }, { "epoch": 0.7654593189797536, "grad_norm": 0.2427440583705902, "learning_rate": 4.12319522650049e-06, "loss": 0.0597, "step": 43223 }, { "epoch": 0.765477028516782, "grad_norm": 0.5458061695098877, "learning_rate": 4.122602778236203e-06, "loss": 0.0677, "step": 43224 }, { "epoch": 0.7654947380538104, "grad_norm": 0.574184000492096, "learning_rate": 4.1220103657574515e-06, "loss": 0.0542, "step": 43225 }, { "epoch": 0.7655124475908388, "grad_norm": 0.3261071741580963, "learning_rate": 4.1214179890661854e-06, "loss": 0.0624, "step": 43226 }, { "epoch": 0.7655301571278673, "grad_norm": 0.4122612178325653, "learning_rate": 4.120825648164355e-06, "loss": 0.0783, "step": 43227 }, { "epoch": 0.7655478666648957, "grad_norm": 0.5423262119293213, "learning_rate": 4.120233343053907e-06, "loss": 0.0753, "step": 43228 }, { "epoch": 0.7655655762019241, "grad_norm": 0.9543954730033875, "learning_rate": 4.119641073736792e-06, "loss": 0.0665, "step": 43229 }, { "epoch": 0.7655832857389526, "grad_norm": 0.6400112509727478, "learning_rate": 4.11904884021496e-06, "loss": 0.0601, "step": 43230 }, { "epoch": 0.765600995275981, "grad_norm": 0.6710621118545532, "learning_rate": 4.118456642490354e-06, "loss": 0.0569, "step": 43231 }, { "epoch": 0.7656187048130094, "grad_norm": 0.6293390393257141, "learning_rate": 4.117864480564924e-06, "loss": 0.0563, "step": 43232 }, { "epoch": 0.7656364143500378, "grad_norm": 0.5218273997306824, "learning_rate": 4.117272354440623e-06, "loss": 0.0443, "step": 43233 }, { "epoch": 0.7656541238870663, "grad_norm": 0.6964210867881775, "learning_rate": 4.116680264119393e-06, "loss": 0.0526, "step": 43234 }, { "epoch": 0.7656718334240947, "grad_norm": 0.7836020588874817, "learning_rate": 4.116088209603183e-06, "loss": 0.0439, "step": 43235 }, { "epoch": 0.7656895429611231, "grad_norm": 0.4328100085258484, "learning_rate": 4.1154961908939405e-06, "loss": 0.0535, "step": 43236 }, { "epoch": 0.7657072524981515, "grad_norm": 0.5656968951225281, "learning_rate": 4.114904207993618e-06, "loss": 0.0411, "step": 43237 }, { "epoch": 0.76572496203518, "grad_norm": 0.4046960771083832, "learning_rate": 4.1143122609041574e-06, "loss": 0.0585, "step": 43238 }, { "epoch": 0.7657426715722084, "grad_norm": 0.46083712577819824, "learning_rate": 4.1137203496275075e-06, "loss": 0.0695, "step": 43239 }, { "epoch": 0.7657603811092368, "grad_norm": 0.3815777003765106, "learning_rate": 4.113128474165623e-06, "loss": 0.0332, "step": 43240 }, { "epoch": 0.7657780906462652, "grad_norm": 0.8414273858070374, "learning_rate": 4.112536634520439e-06, "loss": 0.0666, "step": 43241 }, { "epoch": 0.7657958001832937, "grad_norm": 0.4202043116092682, "learning_rate": 4.11194483069391e-06, "loss": 0.0531, "step": 43242 }, { "epoch": 0.7658135097203221, "grad_norm": 0.6815319657325745, "learning_rate": 4.111353062687979e-06, "loss": 0.0691, "step": 43243 }, { "epoch": 0.7658312192573505, "grad_norm": 0.6303247213363647, "learning_rate": 4.110761330504602e-06, "loss": 0.037, "step": 43244 }, { "epoch": 0.765848928794379, "grad_norm": 0.8151788115501404, "learning_rate": 4.110169634145712e-06, "loss": 0.0597, "step": 43245 }, { "epoch": 0.7658666383314074, "grad_norm": 0.4815337359905243, "learning_rate": 4.109577973613263e-06, "loss": 0.044, "step": 43246 }, { "epoch": 0.7658843478684358, "grad_norm": 0.5664781928062439, "learning_rate": 4.1089863489091975e-06, "loss": 0.0536, "step": 43247 }, { "epoch": 0.7659020574054642, "grad_norm": 0.4927560091018677, "learning_rate": 4.1083947600354746e-06, "loss": 0.0588, "step": 43248 }, { "epoch": 0.7659197669424928, "grad_norm": 0.5566213726997375, "learning_rate": 4.107803206994024e-06, "loss": 0.0501, "step": 43249 }, { "epoch": 0.7659374764795212, "grad_norm": 0.7281462550163269, "learning_rate": 4.107211689786802e-06, "loss": 0.0673, "step": 43250 }, { "epoch": 0.7659551860165495, "grad_norm": 0.7304609417915344, "learning_rate": 4.106620208415755e-06, "loss": 0.0755, "step": 43251 }, { "epoch": 0.765972895553578, "grad_norm": 0.453496515750885, "learning_rate": 4.106028762882823e-06, "loss": 0.0714, "step": 43252 }, { "epoch": 0.7659906050906065, "grad_norm": 0.5866207480430603, "learning_rate": 4.105437353189953e-06, "loss": 0.046, "step": 43253 }, { "epoch": 0.7660083146276349, "grad_norm": 0.41095051169395447, "learning_rate": 4.104845979339093e-06, "loss": 0.0495, "step": 43254 }, { "epoch": 0.7660260241646633, "grad_norm": 0.8046725988388062, "learning_rate": 4.104254641332191e-06, "loss": 0.0727, "step": 43255 }, { "epoch": 0.7660437337016917, "grad_norm": 0.5429919958114624, "learning_rate": 4.10366333917118e-06, "loss": 0.0388, "step": 43256 }, { "epoch": 0.7660614432387202, "grad_norm": 0.3303666412830353, "learning_rate": 4.10307207285802e-06, "loss": 0.0502, "step": 43257 }, { "epoch": 0.7660791527757486, "grad_norm": 0.5973914861679077, "learning_rate": 4.102480842394656e-06, "loss": 0.0791, "step": 43258 }, { "epoch": 0.766096862312777, "grad_norm": 0.4306824803352356, "learning_rate": 4.101889647783022e-06, "loss": 0.0376, "step": 43259 }, { "epoch": 0.7661145718498055, "grad_norm": 0.6492257118225098, "learning_rate": 4.101298489025068e-06, "loss": 0.0545, "step": 43260 }, { "epoch": 0.7661322813868339, "grad_norm": 0.32677775621414185, "learning_rate": 4.100707366122741e-06, "loss": 0.0543, "step": 43261 }, { "epoch": 0.7661499909238623, "grad_norm": 0.6730281114578247, "learning_rate": 4.100116279077989e-06, "loss": 0.037, "step": 43262 }, { "epoch": 0.7661677004608907, "grad_norm": 0.394525408744812, "learning_rate": 4.099525227892748e-06, "loss": 0.0428, "step": 43263 }, { "epoch": 0.7661854099979192, "grad_norm": 0.7421978116035461, "learning_rate": 4.0989342125689654e-06, "loss": 0.0664, "step": 43264 }, { "epoch": 0.7662031195349476, "grad_norm": 0.6387416124343872, "learning_rate": 4.098343233108587e-06, "loss": 0.0612, "step": 43265 }, { "epoch": 0.766220829071976, "grad_norm": 1.1132620573043823, "learning_rate": 4.097752289513555e-06, "loss": 0.0544, "step": 43266 }, { "epoch": 0.7662385386090044, "grad_norm": 0.3958306610584259, "learning_rate": 4.097161381785819e-06, "loss": 0.0593, "step": 43267 }, { "epoch": 0.7662562481460329, "grad_norm": 0.8485709428787231, "learning_rate": 4.096570509927317e-06, "loss": 0.0611, "step": 43268 }, { "epoch": 0.7662739576830613, "grad_norm": 0.6390670537948608, "learning_rate": 4.095979673939999e-06, "loss": 0.0573, "step": 43269 }, { "epoch": 0.7662916672200897, "grad_norm": 0.6922885775566101, "learning_rate": 4.095388873825801e-06, "loss": 0.0521, "step": 43270 }, { "epoch": 0.7663093767571181, "grad_norm": 0.8391338586807251, "learning_rate": 4.094798109586672e-06, "loss": 0.0647, "step": 43271 }, { "epoch": 0.7663270862941466, "grad_norm": 0.40548577904701233, "learning_rate": 4.094207381224557e-06, "loss": 0.0296, "step": 43272 }, { "epoch": 0.766344795831175, "grad_norm": 0.9004777073860168, "learning_rate": 4.093616688741393e-06, "loss": 0.0557, "step": 43273 }, { "epoch": 0.7663625053682034, "grad_norm": 0.5810246467590332, "learning_rate": 4.093026032139127e-06, "loss": 0.0701, "step": 43274 }, { "epoch": 0.7663802149052319, "grad_norm": 1.0096020698547363, "learning_rate": 4.092435411419701e-06, "loss": 0.0729, "step": 43275 }, { "epoch": 0.7663979244422603, "grad_norm": 0.45986464619636536, "learning_rate": 4.091844826585062e-06, "loss": 0.0611, "step": 43276 }, { "epoch": 0.7664156339792887, "grad_norm": 0.5744141936302185, "learning_rate": 4.0912542776371485e-06, "loss": 0.0384, "step": 43277 }, { "epoch": 0.7664333435163171, "grad_norm": 0.5779486894607544, "learning_rate": 4.090663764577904e-06, "loss": 0.0544, "step": 43278 }, { "epoch": 0.7664510530533456, "grad_norm": 0.5972766876220703, "learning_rate": 4.090073287409274e-06, "loss": 0.061, "step": 43279 }, { "epoch": 0.766468762590374, "grad_norm": 0.7341174483299255, "learning_rate": 4.089482846133204e-06, "loss": 0.0386, "step": 43280 }, { "epoch": 0.7664864721274024, "grad_norm": 0.5487642288208008, "learning_rate": 4.088892440751626e-06, "loss": 0.0454, "step": 43281 }, { "epoch": 0.7665041816644308, "grad_norm": 0.545784592628479, "learning_rate": 4.08830207126649e-06, "loss": 0.0614, "step": 43282 }, { "epoch": 0.7665218912014593, "grad_norm": 0.6065412759780884, "learning_rate": 4.087711737679739e-06, "loss": 0.0632, "step": 43283 }, { "epoch": 0.7665396007384877, "grad_norm": 0.6399242877960205, "learning_rate": 4.087121439993309e-06, "loss": 0.0814, "step": 43284 }, { "epoch": 0.7665573102755161, "grad_norm": 0.6633216738700867, "learning_rate": 4.086531178209143e-06, "loss": 0.0348, "step": 43285 }, { "epoch": 0.7665750198125445, "grad_norm": 0.9086706638336182, "learning_rate": 4.085940952329189e-06, "loss": 0.0732, "step": 43286 }, { "epoch": 0.766592729349573, "grad_norm": 0.8505131006240845, "learning_rate": 4.085350762355388e-06, "loss": 0.0805, "step": 43287 }, { "epoch": 0.7666104388866014, "grad_norm": 0.42695292830467224, "learning_rate": 4.0847606082896765e-06, "loss": 0.058, "step": 43288 }, { "epoch": 0.7666281484236298, "grad_norm": 0.7493849992752075, "learning_rate": 4.084170490133997e-06, "loss": 0.0464, "step": 43289 }, { "epoch": 0.7666458579606583, "grad_norm": 0.9909107089042664, "learning_rate": 4.083580407890299e-06, "loss": 0.1037, "step": 43290 }, { "epoch": 0.7666635674976867, "grad_norm": 0.7660911083221436, "learning_rate": 4.082990361560512e-06, "loss": 0.076, "step": 43291 }, { "epoch": 0.7666812770347151, "grad_norm": 0.3758789896965027, "learning_rate": 4.082400351146583e-06, "loss": 0.0502, "step": 43292 }, { "epoch": 0.7666989865717435, "grad_norm": 0.47909173369407654, "learning_rate": 4.081810376650451e-06, "loss": 0.0672, "step": 43293 }, { "epoch": 0.766716696108772, "grad_norm": 0.8477350473403931, "learning_rate": 4.08122043807406e-06, "loss": 0.0649, "step": 43294 }, { "epoch": 0.7667344056458004, "grad_norm": 0.7233179211616516, "learning_rate": 4.080630535419349e-06, "loss": 0.075, "step": 43295 }, { "epoch": 0.7667521151828288, "grad_norm": 0.29778918623924255, "learning_rate": 4.080040668688259e-06, "loss": 0.0573, "step": 43296 }, { "epoch": 0.7667698247198572, "grad_norm": 0.8020594120025635, "learning_rate": 4.079450837882735e-06, "loss": 0.0771, "step": 43297 }, { "epoch": 0.7667875342568857, "grad_norm": 0.5941333770751953, "learning_rate": 4.078861043004709e-06, "loss": 0.0575, "step": 43298 }, { "epoch": 0.7668052437939141, "grad_norm": 0.4578454792499542, "learning_rate": 4.078271284056124e-06, "loss": 0.0357, "step": 43299 }, { "epoch": 0.7668229533309425, "grad_norm": 0.4744141697883606, "learning_rate": 4.077681561038926e-06, "loss": 0.0519, "step": 43300 }, { "epoch": 0.7668406628679709, "grad_norm": 0.6050611138343811, "learning_rate": 4.077091873955051e-06, "loss": 0.0339, "step": 43301 }, { "epoch": 0.7668583724049994, "grad_norm": 0.5263258218765259, "learning_rate": 4.076502222806436e-06, "loss": 0.0523, "step": 43302 }, { "epoch": 0.7668760819420278, "grad_norm": 0.31258848309516907, "learning_rate": 4.0759126075950245e-06, "loss": 0.0525, "step": 43303 }, { "epoch": 0.7668937914790562, "grad_norm": 0.36832866072654724, "learning_rate": 4.075323028322755e-06, "loss": 0.0598, "step": 43304 }, { "epoch": 0.7669115010160847, "grad_norm": 0.2001180797815323, "learning_rate": 4.074733484991568e-06, "loss": 0.0433, "step": 43305 }, { "epoch": 0.7669292105531131, "grad_norm": 0.6808636784553528, "learning_rate": 4.074143977603404e-06, "loss": 0.0548, "step": 43306 }, { "epoch": 0.7669469200901415, "grad_norm": 0.5121492743492126, "learning_rate": 4.0735545061601995e-06, "loss": 0.0301, "step": 43307 }, { "epoch": 0.7669646296271699, "grad_norm": 0.4998001158237457, "learning_rate": 4.072965070663901e-06, "loss": 0.0574, "step": 43308 }, { "epoch": 0.7669823391641984, "grad_norm": 1.0627334117889404, "learning_rate": 4.072375671116436e-06, "loss": 0.0726, "step": 43309 }, { "epoch": 0.7670000487012268, "grad_norm": 0.43484288454055786, "learning_rate": 4.071786307519752e-06, "loss": 0.038, "step": 43310 }, { "epoch": 0.7670177582382552, "grad_norm": 0.5121204257011414, "learning_rate": 4.071196979875788e-06, "loss": 0.0364, "step": 43311 }, { "epoch": 0.7670354677752836, "grad_norm": 0.39583542943000793, "learning_rate": 4.070607688186478e-06, "loss": 0.0582, "step": 43312 }, { "epoch": 0.7670531773123122, "grad_norm": 0.5072602033615112, "learning_rate": 4.070018432453763e-06, "loss": 0.0588, "step": 43313 }, { "epoch": 0.7670708868493405, "grad_norm": 0.66524338722229, "learning_rate": 4.069429212679576e-06, "loss": 0.0432, "step": 43314 }, { "epoch": 0.767088596386369, "grad_norm": 0.6025716662406921, "learning_rate": 4.0688400288658725e-06, "loss": 0.0659, "step": 43315 }, { "epoch": 0.7671063059233973, "grad_norm": 1.1643010377883911, "learning_rate": 4.068250881014573e-06, "loss": 0.06, "step": 43316 }, { "epoch": 0.7671240154604259, "grad_norm": 0.7694553732872009, "learning_rate": 4.067661769127624e-06, "loss": 0.0508, "step": 43317 }, { "epoch": 0.7671417249974543, "grad_norm": 0.6006541848182678, "learning_rate": 4.067072693206961e-06, "loss": 0.0429, "step": 43318 }, { "epoch": 0.7671594345344827, "grad_norm": 0.7666875720024109, "learning_rate": 4.066483653254528e-06, "loss": 0.056, "step": 43319 }, { "epoch": 0.7671771440715112, "grad_norm": 0.3982459306716919, "learning_rate": 4.065894649272254e-06, "loss": 0.0512, "step": 43320 }, { "epoch": 0.7671948536085396, "grad_norm": 0.40865424275398254, "learning_rate": 4.06530568126208e-06, "loss": 0.0637, "step": 43321 }, { "epoch": 0.767212563145568, "grad_norm": 0.7401723265647888, "learning_rate": 4.064716749225944e-06, "loss": 0.0652, "step": 43322 }, { "epoch": 0.7672302726825964, "grad_norm": 0.4042215943336487, "learning_rate": 4.0641278531657846e-06, "loss": 0.0494, "step": 43323 }, { "epoch": 0.7672479822196249, "grad_norm": 0.6860504746437073, "learning_rate": 4.063538993083537e-06, "loss": 0.0571, "step": 43324 }, { "epoch": 0.7672656917566533, "grad_norm": 0.5631138682365417, "learning_rate": 4.062950168981141e-06, "loss": 0.0326, "step": 43325 }, { "epoch": 0.7672834012936817, "grad_norm": 0.49875083565711975, "learning_rate": 4.062361380860538e-06, "loss": 0.0554, "step": 43326 }, { "epoch": 0.7673011108307101, "grad_norm": 0.6825099587440491, "learning_rate": 4.061772628723654e-06, "loss": 0.058, "step": 43327 }, { "epoch": 0.7673188203677386, "grad_norm": 0.6770527362823486, "learning_rate": 4.061183912572432e-06, "loss": 0.0653, "step": 43328 }, { "epoch": 0.767336529904767, "grad_norm": 0.29577720165252686, "learning_rate": 4.060595232408815e-06, "loss": 0.0397, "step": 43329 }, { "epoch": 0.7673542394417954, "grad_norm": 0.6073414087295532, "learning_rate": 4.060006588234727e-06, "loss": 0.0487, "step": 43330 }, { "epoch": 0.7673719489788238, "grad_norm": 0.6062414646148682, "learning_rate": 4.059417980052111e-06, "loss": 0.0405, "step": 43331 }, { "epoch": 0.7673896585158523, "grad_norm": 0.7126116156578064, "learning_rate": 4.058829407862903e-06, "loss": 0.0691, "step": 43332 }, { "epoch": 0.7674073680528807, "grad_norm": 0.487642765045166, "learning_rate": 4.058240871669038e-06, "loss": 0.0571, "step": 43333 }, { "epoch": 0.7674250775899091, "grad_norm": 0.5039281845092773, "learning_rate": 4.0576523714724574e-06, "loss": 0.0662, "step": 43334 }, { "epoch": 0.7674427871269376, "grad_norm": 0.3834967017173767, "learning_rate": 4.057063907275092e-06, "loss": 0.0474, "step": 43335 }, { "epoch": 0.767460496663966, "grad_norm": 0.5213830471038818, "learning_rate": 4.056475479078883e-06, "loss": 0.0409, "step": 43336 }, { "epoch": 0.7674782062009944, "grad_norm": 0.5012944936752319, "learning_rate": 4.05588708688576e-06, "loss": 0.0719, "step": 43337 }, { "epoch": 0.7674959157380228, "grad_norm": 0.5141186714172363, "learning_rate": 4.055298730697661e-06, "loss": 0.0347, "step": 43338 }, { "epoch": 0.7675136252750513, "grad_norm": 0.5833844542503357, "learning_rate": 4.054710410516521e-06, "loss": 0.0489, "step": 43339 }, { "epoch": 0.7675313348120797, "grad_norm": 0.4754367172718048, "learning_rate": 4.054122126344282e-06, "loss": 0.0629, "step": 43340 }, { "epoch": 0.7675490443491081, "grad_norm": 0.8130390048027039, "learning_rate": 4.053533878182871e-06, "loss": 0.0526, "step": 43341 }, { "epoch": 0.7675667538861365, "grad_norm": 0.4558585584163666, "learning_rate": 4.0529456660342236e-06, "loss": 0.0497, "step": 43342 }, { "epoch": 0.767584463423165, "grad_norm": 0.5227075815200806, "learning_rate": 4.052357489900281e-06, "loss": 0.0439, "step": 43343 }, { "epoch": 0.7676021729601934, "grad_norm": 0.6724823117256165, "learning_rate": 4.051769349782973e-06, "loss": 0.0559, "step": 43344 }, { "epoch": 0.7676198824972218, "grad_norm": 0.649740993976593, "learning_rate": 4.0511812456842365e-06, "loss": 0.0588, "step": 43345 }, { "epoch": 0.7676375920342502, "grad_norm": 0.49331918358802795, "learning_rate": 4.050593177606006e-06, "loss": 0.0447, "step": 43346 }, { "epoch": 0.7676553015712787, "grad_norm": 0.6268042325973511, "learning_rate": 4.050005145550221e-06, "loss": 0.0721, "step": 43347 }, { "epoch": 0.7676730111083071, "grad_norm": 0.8437228202819824, "learning_rate": 4.0494171495188085e-06, "loss": 0.0738, "step": 43348 }, { "epoch": 0.7676907206453355, "grad_norm": 0.5226214528083801, "learning_rate": 4.048829189513705e-06, "loss": 0.0639, "step": 43349 }, { "epoch": 0.767708430182364, "grad_norm": 0.6052327752113342, "learning_rate": 4.048241265536846e-06, "loss": 0.0572, "step": 43350 }, { "epoch": 0.7677261397193924, "grad_norm": 0.4191179871559143, "learning_rate": 4.047653377590166e-06, "loss": 0.0328, "step": 43351 }, { "epoch": 0.7677438492564208, "grad_norm": 0.551857590675354, "learning_rate": 4.047065525675598e-06, "loss": 0.0739, "step": 43352 }, { "epoch": 0.7677615587934492, "grad_norm": 0.3091501295566559, "learning_rate": 4.046477709795076e-06, "loss": 0.0594, "step": 43353 }, { "epoch": 0.7677792683304777, "grad_norm": 0.6606755256652832, "learning_rate": 4.045889929950539e-06, "loss": 0.0483, "step": 43354 }, { "epoch": 0.7677969778675061, "grad_norm": 0.3805810511112213, "learning_rate": 4.045302186143912e-06, "loss": 0.044, "step": 43355 }, { "epoch": 0.7678146874045345, "grad_norm": 0.8470329642295837, "learning_rate": 4.0447144783771344e-06, "loss": 0.0466, "step": 43356 }, { "epoch": 0.7678323969415629, "grad_norm": 0.6199343204498291, "learning_rate": 4.044126806652137e-06, "loss": 0.0702, "step": 43357 }, { "epoch": 0.7678501064785914, "grad_norm": 0.4472537934780121, "learning_rate": 4.043539170970858e-06, "loss": 0.0558, "step": 43358 }, { "epoch": 0.7678678160156198, "grad_norm": 0.7283122539520264, "learning_rate": 4.0429515713352225e-06, "loss": 0.0491, "step": 43359 }, { "epoch": 0.7678855255526482, "grad_norm": 0.8065385222434998, "learning_rate": 4.0423640077471695e-06, "loss": 0.05, "step": 43360 }, { "epoch": 0.7679032350896766, "grad_norm": 0.5432769656181335, "learning_rate": 4.041776480208629e-06, "loss": 0.082, "step": 43361 }, { "epoch": 0.7679209446267051, "grad_norm": 0.5989788770675659, "learning_rate": 4.041188988721537e-06, "loss": 0.0778, "step": 43362 }, { "epoch": 0.7679386541637335, "grad_norm": 0.6507882475852966, "learning_rate": 4.040601533287824e-06, "loss": 0.0492, "step": 43363 }, { "epoch": 0.7679563637007619, "grad_norm": 0.968281626701355, "learning_rate": 4.040014113909423e-06, "loss": 0.0876, "step": 43364 }, { "epoch": 0.7679740732377904, "grad_norm": 0.44034603238105774, "learning_rate": 4.039426730588272e-06, "loss": 0.0527, "step": 43365 }, { "epoch": 0.7679917827748188, "grad_norm": 0.3704441487789154, "learning_rate": 4.038839383326294e-06, "loss": 0.0228, "step": 43366 }, { "epoch": 0.7680094923118472, "grad_norm": 0.37688326835632324, "learning_rate": 4.038252072125426e-06, "loss": 0.0504, "step": 43367 }, { "epoch": 0.7680272018488756, "grad_norm": 0.46496090292930603, "learning_rate": 4.037664796987605e-06, "loss": 0.0592, "step": 43368 }, { "epoch": 0.7680449113859041, "grad_norm": 0.6054969429969788, "learning_rate": 4.037077557914752e-06, "loss": 0.0667, "step": 43369 }, { "epoch": 0.7680626209229325, "grad_norm": 0.9891201257705688, "learning_rate": 4.036490354908806e-06, "loss": 0.0694, "step": 43370 }, { "epoch": 0.7680803304599609, "grad_norm": 0.5240814089775085, "learning_rate": 4.035903187971698e-06, "loss": 0.0431, "step": 43371 }, { "epoch": 0.7680980399969893, "grad_norm": 0.36348655819892883, "learning_rate": 4.035316057105359e-06, "loss": 0.0647, "step": 43372 }, { "epoch": 0.7681157495340178, "grad_norm": 0.43673083186149597, "learning_rate": 4.03472896231172e-06, "loss": 0.0425, "step": 43373 }, { "epoch": 0.7681334590710462, "grad_norm": 0.31323620676994324, "learning_rate": 4.0341419035927165e-06, "loss": 0.057, "step": 43374 }, { "epoch": 0.7681511686080746, "grad_norm": 0.5261883735656738, "learning_rate": 4.03355488095028e-06, "loss": 0.055, "step": 43375 }, { "epoch": 0.768168878145103, "grad_norm": 0.5889642238616943, "learning_rate": 4.032967894386334e-06, "loss": 0.0745, "step": 43376 }, { "epoch": 0.7681865876821315, "grad_norm": 0.8307101726531982, "learning_rate": 4.032380943902815e-06, "loss": 0.0933, "step": 43377 }, { "epoch": 0.76820429721916, "grad_norm": 0.48233655095100403, "learning_rate": 4.031794029501654e-06, "loss": 0.067, "step": 43378 }, { "epoch": 0.7682220067561883, "grad_norm": 0.4111766219139099, "learning_rate": 4.0312071511847835e-06, "loss": 0.0462, "step": 43379 }, { "epoch": 0.7682397162932169, "grad_norm": 0.8811164498329163, "learning_rate": 4.0306203089541246e-06, "loss": 0.0727, "step": 43380 }, { "epoch": 0.7682574258302453, "grad_norm": 0.5692498683929443, "learning_rate": 4.0300335028116195e-06, "loss": 0.0288, "step": 43381 }, { "epoch": 0.7682751353672737, "grad_norm": 0.4931560456752777, "learning_rate": 4.029446732759194e-06, "loss": 0.0421, "step": 43382 }, { "epoch": 0.768292844904302, "grad_norm": 0.4705846309661865, "learning_rate": 4.0288599987987856e-06, "loss": 0.0523, "step": 43383 }, { "epoch": 0.7683105544413306, "grad_norm": 0.6624270081520081, "learning_rate": 4.028273300932313e-06, "loss": 0.0915, "step": 43384 }, { "epoch": 0.768328263978359, "grad_norm": 0.2873059809207916, "learning_rate": 4.02768663916171e-06, "loss": 0.0613, "step": 43385 }, { "epoch": 0.7683459735153874, "grad_norm": 0.6876224875450134, "learning_rate": 4.027100013488913e-06, "loss": 0.0485, "step": 43386 }, { "epoch": 0.7683636830524158, "grad_norm": 0.34892013669013977, "learning_rate": 4.0265134239158455e-06, "loss": 0.049, "step": 43387 }, { "epoch": 0.7683813925894443, "grad_norm": 0.3457447290420532, "learning_rate": 4.025926870444435e-06, "loss": 0.0516, "step": 43388 }, { "epoch": 0.7683991021264727, "grad_norm": 0.6087350845336914, "learning_rate": 4.025340353076617e-06, "loss": 0.0333, "step": 43389 }, { "epoch": 0.7684168116635011, "grad_norm": 0.2286662459373474, "learning_rate": 4.024753871814319e-06, "loss": 0.0342, "step": 43390 }, { "epoch": 0.7684345212005295, "grad_norm": 0.47680893540382385, "learning_rate": 4.024167426659472e-06, "loss": 0.0376, "step": 43391 }, { "epoch": 0.768452230737558, "grad_norm": 0.4018058776855469, "learning_rate": 4.023581017614002e-06, "loss": 0.0615, "step": 43392 }, { "epoch": 0.7684699402745864, "grad_norm": 0.7480461001396179, "learning_rate": 4.022994644679846e-06, "loss": 0.0619, "step": 43393 }, { "epoch": 0.7684876498116148, "grad_norm": 0.4436585605144501, "learning_rate": 4.022408307858922e-06, "loss": 0.0663, "step": 43394 }, { "epoch": 0.7685053593486433, "grad_norm": 0.6979808807373047, "learning_rate": 4.021822007153166e-06, "loss": 0.0623, "step": 43395 }, { "epoch": 0.7685230688856717, "grad_norm": 0.656758189201355, "learning_rate": 4.021235742564503e-06, "loss": 0.0568, "step": 43396 }, { "epoch": 0.7685407784227001, "grad_norm": 0.3133023679256439, "learning_rate": 4.0206495140948685e-06, "loss": 0.0526, "step": 43397 }, { "epoch": 0.7685584879597285, "grad_norm": 0.44066929817199707, "learning_rate": 4.020063321746184e-06, "loss": 0.0379, "step": 43398 }, { "epoch": 0.768576197496757, "grad_norm": 0.6382946372032166, "learning_rate": 4.01947716552038e-06, "loss": 0.0667, "step": 43399 }, { "epoch": 0.7685939070337854, "grad_norm": 0.7710092067718506, "learning_rate": 4.018891045419385e-06, "loss": 0.0951, "step": 43400 }, { "epoch": 0.7686116165708138, "grad_norm": 0.6939384341239929, "learning_rate": 4.0183049614451275e-06, "loss": 0.0797, "step": 43401 }, { "epoch": 0.7686293261078422, "grad_norm": 0.3298007547855377, "learning_rate": 4.017718913599537e-06, "loss": 0.0324, "step": 43402 }, { "epoch": 0.7686470356448707, "grad_norm": 0.8965412974357605, "learning_rate": 4.01713290188454e-06, "loss": 0.0634, "step": 43403 }, { "epoch": 0.7686647451818991, "grad_norm": 0.5742166042327881, "learning_rate": 4.016546926302068e-06, "loss": 0.0504, "step": 43404 }, { "epoch": 0.7686824547189275, "grad_norm": 0.5543867349624634, "learning_rate": 4.015960986854044e-06, "loss": 0.0795, "step": 43405 }, { "epoch": 0.7687001642559559, "grad_norm": 0.810724675655365, "learning_rate": 4.015375083542395e-06, "loss": 0.0508, "step": 43406 }, { "epoch": 0.7687178737929844, "grad_norm": 0.6801249980926514, "learning_rate": 4.014789216369056e-06, "loss": 0.0616, "step": 43407 }, { "epoch": 0.7687355833300128, "grad_norm": 0.4117283523082733, "learning_rate": 4.0142033853359455e-06, "loss": 0.0469, "step": 43408 }, { "epoch": 0.7687532928670412, "grad_norm": 0.6954452991485596, "learning_rate": 4.013617590444991e-06, "loss": 0.0508, "step": 43409 }, { "epoch": 0.7687710024040697, "grad_norm": 0.3653581440448761, "learning_rate": 4.0130318316981266e-06, "loss": 0.0623, "step": 43410 }, { "epoch": 0.7687887119410981, "grad_norm": 0.36694052815437317, "learning_rate": 4.012446109097281e-06, "loss": 0.0722, "step": 43411 }, { "epoch": 0.7688064214781265, "grad_norm": 0.6965880990028381, "learning_rate": 4.011860422644371e-06, "loss": 0.0447, "step": 43412 }, { "epoch": 0.7688241310151549, "grad_norm": 0.6098286509513855, "learning_rate": 4.011274772341329e-06, "loss": 0.0635, "step": 43413 }, { "epoch": 0.7688418405521834, "grad_norm": 0.551202654838562, "learning_rate": 4.010689158190088e-06, "loss": 0.0513, "step": 43414 }, { "epoch": 0.7688595500892118, "grad_norm": 0.5916963219642639, "learning_rate": 4.0101035801925626e-06, "loss": 0.0385, "step": 43415 }, { "epoch": 0.7688772596262402, "grad_norm": 0.6671350002288818, "learning_rate": 4.009518038350684e-06, "loss": 0.0571, "step": 43416 }, { "epoch": 0.7688949691632686, "grad_norm": 0.931892991065979, "learning_rate": 4.00893253266638e-06, "loss": 0.0541, "step": 43417 }, { "epoch": 0.7689126787002971, "grad_norm": 0.6147589683532715, "learning_rate": 4.008347063141576e-06, "loss": 0.0681, "step": 43418 }, { "epoch": 0.7689303882373255, "grad_norm": 0.7458816766738892, "learning_rate": 4.007761629778198e-06, "loss": 0.0541, "step": 43419 }, { "epoch": 0.7689480977743539, "grad_norm": 0.5571241974830627, "learning_rate": 4.007176232578174e-06, "loss": 0.0536, "step": 43420 }, { "epoch": 0.7689658073113823, "grad_norm": 0.7976865768432617, "learning_rate": 4.006590871543429e-06, "loss": 0.0806, "step": 43421 }, { "epoch": 0.7689835168484108, "grad_norm": 0.5069186091423035, "learning_rate": 4.006005546675891e-06, "loss": 0.0616, "step": 43422 }, { "epoch": 0.7690012263854392, "grad_norm": 0.4041752517223358, "learning_rate": 4.0054202579774785e-06, "loss": 0.0485, "step": 43423 }, { "epoch": 0.7690189359224676, "grad_norm": 0.6232009530067444, "learning_rate": 4.004835005450121e-06, "loss": 0.0475, "step": 43424 }, { "epoch": 0.7690366454594961, "grad_norm": 0.5659187436103821, "learning_rate": 4.004249789095748e-06, "loss": 0.047, "step": 43425 }, { "epoch": 0.7690543549965245, "grad_norm": 0.4318232238292694, "learning_rate": 4.0036646089162786e-06, "loss": 0.0881, "step": 43426 }, { "epoch": 0.7690720645335529, "grad_norm": 0.4623781144618988, "learning_rate": 4.00307946491364e-06, "loss": 0.0414, "step": 43427 }, { "epoch": 0.7690897740705813, "grad_norm": 0.7860673666000366, "learning_rate": 4.002494357089757e-06, "loss": 0.065, "step": 43428 }, { "epoch": 0.7691074836076098, "grad_norm": 0.4899888038635254, "learning_rate": 4.001909285446555e-06, "loss": 0.0456, "step": 43429 }, { "epoch": 0.7691251931446382, "grad_norm": 0.47887328267097473, "learning_rate": 4.0013242499859585e-06, "loss": 0.0504, "step": 43430 }, { "epoch": 0.7691429026816666, "grad_norm": 0.36639612913131714, "learning_rate": 4.0007392507098945e-06, "loss": 0.0388, "step": 43431 }, { "epoch": 0.769160612218695, "grad_norm": 0.6975160837173462, "learning_rate": 4.000154287620289e-06, "loss": 0.0518, "step": 43432 }, { "epoch": 0.7691783217557235, "grad_norm": 0.7636928558349609, "learning_rate": 3.9995693607190594e-06, "loss": 0.0688, "step": 43433 }, { "epoch": 0.7691960312927519, "grad_norm": 0.5039401650428772, "learning_rate": 3.998984470008133e-06, "loss": 0.0662, "step": 43434 }, { "epoch": 0.7692137408297803, "grad_norm": 0.39895322918891907, "learning_rate": 3.998399615489437e-06, "loss": 0.0542, "step": 43435 }, { "epoch": 0.7692314503668087, "grad_norm": 0.40559515357017517, "learning_rate": 3.997814797164896e-06, "loss": 0.0365, "step": 43436 }, { "epoch": 0.7692491599038372, "grad_norm": 0.6817391514778137, "learning_rate": 3.997230015036429e-06, "loss": 0.0546, "step": 43437 }, { "epoch": 0.7692668694408656, "grad_norm": 0.6796528697013855, "learning_rate": 3.996645269105956e-06, "loss": 0.0856, "step": 43438 }, { "epoch": 0.769284578977894, "grad_norm": 0.5197361707687378, "learning_rate": 3.996060559375417e-06, "loss": 0.0541, "step": 43439 }, { "epoch": 0.7693022885149225, "grad_norm": 0.757868230342865, "learning_rate": 3.995475885846722e-06, "loss": 0.0674, "step": 43440 }, { "epoch": 0.769319998051951, "grad_norm": 0.5926955342292786, "learning_rate": 3.994891248521797e-06, "loss": 0.0565, "step": 43441 }, { "epoch": 0.7693377075889793, "grad_norm": 0.45839110016822815, "learning_rate": 3.994306647402567e-06, "loss": 0.0502, "step": 43442 }, { "epoch": 0.7693554171260077, "grad_norm": 0.5930172801017761, "learning_rate": 3.9937220824909605e-06, "loss": 0.0778, "step": 43443 }, { "epoch": 0.7693731266630363, "grad_norm": 0.8795271515846252, "learning_rate": 3.993137553788889e-06, "loss": 0.0635, "step": 43444 }, { "epoch": 0.7693908362000647, "grad_norm": 0.45985254645347595, "learning_rate": 3.992553061298284e-06, "loss": 0.0516, "step": 43445 }, { "epoch": 0.769408545737093, "grad_norm": 0.7251769304275513, "learning_rate": 3.991968605021064e-06, "loss": 0.0407, "step": 43446 }, { "epoch": 0.7694262552741215, "grad_norm": 0.20250020921230316, "learning_rate": 3.9913841849591545e-06, "loss": 0.0291, "step": 43447 }, { "epoch": 0.76944396481115, "grad_norm": 0.6269738078117371, "learning_rate": 3.990799801114478e-06, "loss": 0.082, "step": 43448 }, { "epoch": 0.7694616743481784, "grad_norm": 0.7818633317947388, "learning_rate": 3.990215453488955e-06, "loss": 0.0787, "step": 43449 }, { "epoch": 0.7694793838852068, "grad_norm": 0.3833739757537842, "learning_rate": 3.989631142084516e-06, "loss": 0.0518, "step": 43450 }, { "epoch": 0.7694970934222352, "grad_norm": 0.25958630442619324, "learning_rate": 3.9890468669030725e-06, "loss": 0.0684, "step": 43451 }, { "epoch": 0.7695148029592637, "grad_norm": 0.570801854133606, "learning_rate": 3.988462627946548e-06, "loss": 0.0564, "step": 43452 }, { "epoch": 0.7695325124962921, "grad_norm": 0.5218595266342163, "learning_rate": 3.987878425216871e-06, "loss": 0.051, "step": 43453 }, { "epoch": 0.7695502220333205, "grad_norm": 1.038080096244812, "learning_rate": 3.987294258715965e-06, "loss": 0.0595, "step": 43454 }, { "epoch": 0.769567931570349, "grad_norm": 0.5139442682266235, "learning_rate": 3.986710128445741e-06, "loss": 0.051, "step": 43455 }, { "epoch": 0.7695856411073774, "grad_norm": 0.5792019367218018, "learning_rate": 3.986126034408126e-06, "loss": 0.0537, "step": 43456 }, { "epoch": 0.7696033506444058, "grad_norm": 0.8317689895629883, "learning_rate": 3.9855419766050424e-06, "loss": 0.1027, "step": 43457 }, { "epoch": 0.7696210601814342, "grad_norm": 0.6148386597633362, "learning_rate": 3.984957955038414e-06, "loss": 0.0732, "step": 43458 }, { "epoch": 0.7696387697184627, "grad_norm": 0.33923518657684326, "learning_rate": 3.984373969710159e-06, "loss": 0.0625, "step": 43459 }, { "epoch": 0.7696564792554911, "grad_norm": 0.7499549388885498, "learning_rate": 3.983790020622197e-06, "loss": 0.0739, "step": 43460 }, { "epoch": 0.7696741887925195, "grad_norm": 0.65101557970047, "learning_rate": 3.983206107776459e-06, "loss": 0.0534, "step": 43461 }, { "epoch": 0.7696918983295479, "grad_norm": 0.7062092423439026, "learning_rate": 3.982622231174853e-06, "loss": 0.0821, "step": 43462 }, { "epoch": 0.7697096078665764, "grad_norm": 0.6347527503967285, "learning_rate": 3.982038390819306e-06, "loss": 0.0333, "step": 43463 }, { "epoch": 0.7697273174036048, "grad_norm": 0.988489031791687, "learning_rate": 3.981454586711742e-06, "loss": 0.041, "step": 43464 }, { "epoch": 0.7697450269406332, "grad_norm": 0.6505805253982544, "learning_rate": 3.9808708188540744e-06, "loss": 0.0694, "step": 43465 }, { "epoch": 0.7697627364776616, "grad_norm": 0.7663863301277161, "learning_rate": 3.980287087248226e-06, "loss": 0.0573, "step": 43466 }, { "epoch": 0.7697804460146901, "grad_norm": 0.4166635274887085, "learning_rate": 3.9797033918961154e-06, "loss": 0.0301, "step": 43467 }, { "epoch": 0.7697981555517185, "grad_norm": 0.5891242027282715, "learning_rate": 3.979119732799675e-06, "loss": 0.0526, "step": 43468 }, { "epoch": 0.7698158650887469, "grad_norm": 1.0438430309295654, "learning_rate": 3.9785361099608095e-06, "loss": 0.0658, "step": 43469 }, { "epoch": 0.7698335746257754, "grad_norm": 0.5089945197105408, "learning_rate": 3.977952523381447e-06, "loss": 0.0623, "step": 43470 }, { "epoch": 0.7698512841628038, "grad_norm": 0.6586790680885315, "learning_rate": 3.977368973063511e-06, "loss": 0.0476, "step": 43471 }, { "epoch": 0.7698689936998322, "grad_norm": 0.9113918542861938, "learning_rate": 3.976785459008911e-06, "loss": 0.0632, "step": 43472 }, { "epoch": 0.7698867032368606, "grad_norm": 0.7426743507385254, "learning_rate": 3.976201981219572e-06, "loss": 0.0543, "step": 43473 }, { "epoch": 0.7699044127738891, "grad_norm": 0.5739538073539734, "learning_rate": 3.975618539697413e-06, "loss": 0.0512, "step": 43474 }, { "epoch": 0.7699221223109175, "grad_norm": 0.40744778513908386, "learning_rate": 3.975035134444354e-06, "loss": 0.0365, "step": 43475 }, { "epoch": 0.7699398318479459, "grad_norm": 0.3625699281692505, "learning_rate": 3.974451765462315e-06, "loss": 0.0639, "step": 43476 }, { "epoch": 0.7699575413849743, "grad_norm": 0.4772467017173767, "learning_rate": 3.973868432753213e-06, "loss": 0.058, "step": 43477 }, { "epoch": 0.7699752509220028, "grad_norm": 0.6540707349777222, "learning_rate": 3.973285136318975e-06, "loss": 0.0618, "step": 43478 }, { "epoch": 0.7699929604590312, "grad_norm": 0.8543291091918945, "learning_rate": 3.972701876161508e-06, "loss": 0.0579, "step": 43479 }, { "epoch": 0.7700106699960596, "grad_norm": 0.3446040153503418, "learning_rate": 3.972118652282736e-06, "loss": 0.0382, "step": 43480 }, { "epoch": 0.770028379533088, "grad_norm": 0.3690394163131714, "learning_rate": 3.9715354646845796e-06, "loss": 0.057, "step": 43481 }, { "epoch": 0.7700460890701165, "grad_norm": 0.4599839150905609, "learning_rate": 3.97095231336896e-06, "loss": 0.0457, "step": 43482 }, { "epoch": 0.7700637986071449, "grad_norm": 0.8373939990997314, "learning_rate": 3.970369198337788e-06, "loss": 0.0855, "step": 43483 }, { "epoch": 0.7700815081441733, "grad_norm": 0.44676801562309265, "learning_rate": 3.969786119592984e-06, "loss": 0.0469, "step": 43484 }, { "epoch": 0.7700992176812018, "grad_norm": 0.5655593276023865, "learning_rate": 3.96920307713647e-06, "loss": 0.0797, "step": 43485 }, { "epoch": 0.7701169272182302, "grad_norm": 0.43081268668174744, "learning_rate": 3.968620070970161e-06, "loss": 0.0724, "step": 43486 }, { "epoch": 0.7701346367552586, "grad_norm": 1.051639199256897, "learning_rate": 3.968037101095976e-06, "loss": 0.0497, "step": 43487 }, { "epoch": 0.770152346292287, "grad_norm": 0.3949735164642334, "learning_rate": 3.967454167515833e-06, "loss": 0.0595, "step": 43488 }, { "epoch": 0.7701700558293155, "grad_norm": 0.53476482629776, "learning_rate": 3.966871270231656e-06, "loss": 0.0286, "step": 43489 }, { "epoch": 0.7701877653663439, "grad_norm": 0.5737104415893555, "learning_rate": 3.96628840924535e-06, "loss": 0.037, "step": 43490 }, { "epoch": 0.7702054749033723, "grad_norm": 0.3707978129386902, "learning_rate": 3.965705584558842e-06, "loss": 0.0335, "step": 43491 }, { "epoch": 0.7702231844404007, "grad_norm": 0.6680439710617065, "learning_rate": 3.965122796174044e-06, "loss": 0.0586, "step": 43492 }, { "epoch": 0.7702408939774292, "grad_norm": 0.7849749326705933, "learning_rate": 3.964540044092881e-06, "loss": 0.0789, "step": 43493 }, { "epoch": 0.7702586035144576, "grad_norm": 0.8467785716056824, "learning_rate": 3.963957328317261e-06, "loss": 0.0812, "step": 43494 }, { "epoch": 0.770276313051486, "grad_norm": 0.8634780645370483, "learning_rate": 3.963374648849104e-06, "loss": 0.0578, "step": 43495 }, { "epoch": 0.7702940225885144, "grad_norm": 0.6461859941482544, "learning_rate": 3.962792005690329e-06, "loss": 0.0517, "step": 43496 }, { "epoch": 0.7703117321255429, "grad_norm": 0.40996676683425903, "learning_rate": 3.962209398842852e-06, "loss": 0.0541, "step": 43497 }, { "epoch": 0.7703294416625713, "grad_norm": 0.5632531642913818, "learning_rate": 3.961626828308591e-06, "loss": 0.0413, "step": 43498 }, { "epoch": 0.7703471511995997, "grad_norm": 0.445830374956131, "learning_rate": 3.961044294089458e-06, "loss": 0.0522, "step": 43499 }, { "epoch": 0.7703648607366282, "grad_norm": 0.6666485667228699, "learning_rate": 3.96046179618738e-06, "loss": 0.052, "step": 43500 }, { "epoch": 0.7703825702736566, "grad_norm": 0.3267647922039032, "learning_rate": 3.95987933460426e-06, "loss": 0.0233, "step": 43501 }, { "epoch": 0.770400279810685, "grad_norm": 1.0171140432357788, "learning_rate": 3.959296909342021e-06, "loss": 0.0737, "step": 43502 }, { "epoch": 0.7704179893477134, "grad_norm": 0.2929406464099884, "learning_rate": 3.958714520402584e-06, "loss": 0.0421, "step": 43503 }, { "epoch": 0.770435698884742, "grad_norm": 0.5778288841247559, "learning_rate": 3.95813216778785e-06, "loss": 0.0521, "step": 43504 }, { "epoch": 0.7704534084217703, "grad_norm": 0.36043697595596313, "learning_rate": 3.957549851499749e-06, "loss": 0.0424, "step": 43505 }, { "epoch": 0.7704711179587987, "grad_norm": 0.2713964581489563, "learning_rate": 3.95696757154019e-06, "loss": 0.0434, "step": 43506 }, { "epoch": 0.7704888274958271, "grad_norm": 0.43733227252960205, "learning_rate": 3.956385327911098e-06, "loss": 0.0553, "step": 43507 }, { "epoch": 0.7705065370328557, "grad_norm": 1.260494589805603, "learning_rate": 3.955803120614375e-06, "loss": 0.084, "step": 43508 }, { "epoch": 0.770524246569884, "grad_norm": 0.6451995372772217, "learning_rate": 3.955220949651944e-06, "loss": 0.0537, "step": 43509 }, { "epoch": 0.7705419561069125, "grad_norm": 0.6950915455818176, "learning_rate": 3.954638815025722e-06, "loss": 0.0614, "step": 43510 }, { "epoch": 0.7705596656439408, "grad_norm": 0.7585458159446716, "learning_rate": 3.9540567167376175e-06, "loss": 0.0838, "step": 43511 }, { "epoch": 0.7705773751809694, "grad_norm": 0.42230892181396484, "learning_rate": 3.953474654789549e-06, "loss": 0.0425, "step": 43512 }, { "epoch": 0.7705950847179978, "grad_norm": 0.618857204914093, "learning_rate": 3.952892629183432e-06, "loss": 0.0648, "step": 43513 }, { "epoch": 0.7706127942550262, "grad_norm": 0.5158526301383972, "learning_rate": 3.952310639921179e-06, "loss": 0.0455, "step": 43514 }, { "epoch": 0.7706305037920547, "grad_norm": 0.718081533908844, "learning_rate": 3.951728687004709e-06, "loss": 0.0678, "step": 43515 }, { "epoch": 0.7706482133290831, "grad_norm": 0.4524945914745331, "learning_rate": 3.9511467704359325e-06, "loss": 0.0574, "step": 43516 }, { "epoch": 0.7706659228661115, "grad_norm": 0.5957880020141602, "learning_rate": 3.950564890216771e-06, "loss": 0.0489, "step": 43517 }, { "epoch": 0.7706836324031399, "grad_norm": 0.6982608437538147, "learning_rate": 3.94998304634913e-06, "loss": 0.0506, "step": 43518 }, { "epoch": 0.7707013419401684, "grad_norm": 0.3977348506450653, "learning_rate": 3.949401238834926e-06, "loss": 0.0517, "step": 43519 }, { "epoch": 0.7707190514771968, "grad_norm": 0.3923298120498657, "learning_rate": 3.948819467676073e-06, "loss": 0.0499, "step": 43520 }, { "epoch": 0.7707367610142252, "grad_norm": 0.3298890292644501, "learning_rate": 3.948237732874491e-06, "loss": 0.0508, "step": 43521 }, { "epoch": 0.7707544705512536, "grad_norm": 0.4338137209415436, "learning_rate": 3.947656034432087e-06, "loss": 0.0745, "step": 43522 }, { "epoch": 0.7707721800882821, "grad_norm": 0.5861905813217163, "learning_rate": 3.947074372350775e-06, "loss": 0.0476, "step": 43523 }, { "epoch": 0.7707898896253105, "grad_norm": 1.168176531791687, "learning_rate": 3.94649274663247e-06, "loss": 0.0719, "step": 43524 }, { "epoch": 0.7708075991623389, "grad_norm": 0.7509629726409912, "learning_rate": 3.945911157279087e-06, "loss": 0.0617, "step": 43525 }, { "epoch": 0.7708253086993673, "grad_norm": 0.2486373782157898, "learning_rate": 3.945329604292536e-06, "loss": 0.0362, "step": 43526 }, { "epoch": 0.7708430182363958, "grad_norm": 0.5206049680709839, "learning_rate": 3.944748087674736e-06, "loss": 0.0448, "step": 43527 }, { "epoch": 0.7708607277734242, "grad_norm": 0.6773786544799805, "learning_rate": 3.9441666074275974e-06, "loss": 0.0515, "step": 43528 }, { "epoch": 0.7708784373104526, "grad_norm": 0.8350634574890137, "learning_rate": 3.94358516355303e-06, "loss": 0.0516, "step": 43529 }, { "epoch": 0.7708961468474811, "grad_norm": 0.6047604084014893, "learning_rate": 3.9430037560529496e-06, "loss": 0.0416, "step": 43530 }, { "epoch": 0.7709138563845095, "grad_norm": 1.0980982780456543, "learning_rate": 3.942422384929267e-06, "loss": 0.0777, "step": 43531 }, { "epoch": 0.7709315659215379, "grad_norm": 0.6039168834686279, "learning_rate": 3.941841050183902e-06, "loss": 0.0542, "step": 43532 }, { "epoch": 0.7709492754585663, "grad_norm": 0.5236220955848694, "learning_rate": 3.9412597518187514e-06, "loss": 0.048, "step": 43533 }, { "epoch": 0.7709669849955948, "grad_norm": 0.9254149198532104, "learning_rate": 3.940678489835744e-06, "loss": 0.0701, "step": 43534 }, { "epoch": 0.7709846945326232, "grad_norm": 0.2751981019973755, "learning_rate": 3.940097264236789e-06, "loss": 0.0396, "step": 43535 }, { "epoch": 0.7710024040696516, "grad_norm": 0.46580377221107483, "learning_rate": 3.939516075023792e-06, "loss": 0.0517, "step": 43536 }, { "epoch": 0.77102011360668, "grad_norm": 0.4503222405910492, "learning_rate": 3.9389349221986664e-06, "loss": 0.0557, "step": 43537 }, { "epoch": 0.7710378231437085, "grad_norm": 0.7982621192932129, "learning_rate": 3.938353805763328e-06, "loss": 0.0638, "step": 43538 }, { "epoch": 0.7710555326807369, "grad_norm": 0.7537549734115601, "learning_rate": 3.937772725719689e-06, "loss": 0.079, "step": 43539 }, { "epoch": 0.7710732422177653, "grad_norm": 0.7435966730117798, "learning_rate": 3.937191682069656e-06, "loss": 0.0658, "step": 43540 }, { "epoch": 0.7710909517547937, "grad_norm": 0.5045894384384155, "learning_rate": 3.936610674815144e-06, "loss": 0.0528, "step": 43541 }, { "epoch": 0.7711086612918222, "grad_norm": 0.5303835868835449, "learning_rate": 3.936029703958063e-06, "loss": 0.0611, "step": 43542 }, { "epoch": 0.7711263708288506, "grad_norm": 0.4705112874507904, "learning_rate": 3.935448769500325e-06, "loss": 0.0419, "step": 43543 }, { "epoch": 0.771144080365879, "grad_norm": 0.4810376465320587, "learning_rate": 3.934867871443842e-06, "loss": 0.0658, "step": 43544 }, { "epoch": 0.7711617899029075, "grad_norm": 0.3233449459075928, "learning_rate": 3.9342870097905235e-06, "loss": 0.0335, "step": 43545 }, { "epoch": 0.7711794994399359, "grad_norm": 0.47297292947769165, "learning_rate": 3.933706184542288e-06, "loss": 0.0529, "step": 43546 }, { "epoch": 0.7711972089769643, "grad_norm": 0.4291808605194092, "learning_rate": 3.9331253957010345e-06, "loss": 0.0628, "step": 43547 }, { "epoch": 0.7712149185139927, "grad_norm": 0.7160609364509583, "learning_rate": 3.932544643268678e-06, "loss": 0.0586, "step": 43548 }, { "epoch": 0.7712326280510212, "grad_norm": 0.6916216611862183, "learning_rate": 3.931963927247135e-06, "loss": 0.0696, "step": 43549 }, { "epoch": 0.7712503375880496, "grad_norm": 0.17897829413414001, "learning_rate": 3.931383247638308e-06, "loss": 0.0443, "step": 43550 }, { "epoch": 0.771268047125078, "grad_norm": 0.5291027426719666, "learning_rate": 3.930802604444109e-06, "loss": 0.0901, "step": 43551 }, { "epoch": 0.7712857566621064, "grad_norm": 0.8587793111801147, "learning_rate": 3.930221997666452e-06, "loss": 0.0655, "step": 43552 }, { "epoch": 0.7713034661991349, "grad_norm": 0.9132832288742065, "learning_rate": 3.929641427307243e-06, "loss": 0.1009, "step": 43553 }, { "epoch": 0.7713211757361633, "grad_norm": 0.6001521944999695, "learning_rate": 3.929060893368395e-06, "loss": 0.0505, "step": 43554 }, { "epoch": 0.7713388852731917, "grad_norm": 0.5767291784286499, "learning_rate": 3.928480395851816e-06, "loss": 0.0775, "step": 43555 }, { "epoch": 0.7713565948102201, "grad_norm": 0.3337579071521759, "learning_rate": 3.927899934759417e-06, "loss": 0.049, "step": 43556 }, { "epoch": 0.7713743043472486, "grad_norm": 0.191611185669899, "learning_rate": 3.927319510093112e-06, "loss": 0.0326, "step": 43557 }, { "epoch": 0.771392013884277, "grad_norm": 0.5485485792160034, "learning_rate": 3.926739121854801e-06, "loss": 0.0563, "step": 43558 }, { "epoch": 0.7714097234213054, "grad_norm": 0.6038995981216431, "learning_rate": 3.926158770046399e-06, "loss": 0.0455, "step": 43559 }, { "epoch": 0.7714274329583339, "grad_norm": 0.5719209313392639, "learning_rate": 3.925578454669819e-06, "loss": 0.0368, "step": 43560 }, { "epoch": 0.7714451424953623, "grad_norm": 0.3544341027736664, "learning_rate": 3.924998175726961e-06, "loss": 0.052, "step": 43561 }, { "epoch": 0.7714628520323907, "grad_norm": 0.5559143424034119, "learning_rate": 3.9244179332197345e-06, "loss": 0.0584, "step": 43562 }, { "epoch": 0.7714805615694191, "grad_norm": 0.6835231184959412, "learning_rate": 3.923837727150057e-06, "loss": 0.0631, "step": 43563 }, { "epoch": 0.7714982711064476, "grad_norm": 0.3281628489494324, "learning_rate": 3.923257557519838e-06, "loss": 0.0435, "step": 43564 }, { "epoch": 0.771515980643476, "grad_norm": 0.40378814935684204, "learning_rate": 3.9226774243309764e-06, "loss": 0.0621, "step": 43565 }, { "epoch": 0.7715336901805044, "grad_norm": 0.5414910316467285, "learning_rate": 3.9220973275853856e-06, "loss": 0.0456, "step": 43566 }, { "epoch": 0.7715513997175328, "grad_norm": 0.4462345540523529, "learning_rate": 3.92151726728498e-06, "loss": 0.0406, "step": 43567 }, { "epoch": 0.7715691092545613, "grad_norm": 0.9395759701728821, "learning_rate": 3.920937243431656e-06, "loss": 0.0761, "step": 43568 }, { "epoch": 0.7715868187915897, "grad_norm": 0.5690180659294128, "learning_rate": 3.9203572560273275e-06, "loss": 0.0436, "step": 43569 }, { "epoch": 0.7716045283286181, "grad_norm": 0.5855194926261902, "learning_rate": 3.919777305073905e-06, "loss": 0.0646, "step": 43570 }, { "epoch": 0.7716222378656465, "grad_norm": 0.8222832679748535, "learning_rate": 3.919197390573293e-06, "loss": 0.0717, "step": 43571 }, { "epoch": 0.771639947402675, "grad_norm": 0.31629329919815063, "learning_rate": 3.9186175125274e-06, "loss": 0.0361, "step": 43572 }, { "epoch": 0.7716576569397035, "grad_norm": 0.7274428606033325, "learning_rate": 3.918037670938137e-06, "loss": 0.0658, "step": 43573 }, { "epoch": 0.7716753664767318, "grad_norm": 0.5294947624206543, "learning_rate": 3.917457865807411e-06, "loss": 0.108, "step": 43574 }, { "epoch": 0.7716930760137604, "grad_norm": 0.6883543729782104, "learning_rate": 3.916878097137126e-06, "loss": 0.063, "step": 43575 }, { "epoch": 0.7717107855507888, "grad_norm": 0.43388181924819946, "learning_rate": 3.91629836492919e-06, "loss": 0.043, "step": 43576 }, { "epoch": 0.7717284950878172, "grad_norm": 0.5803511142730713, "learning_rate": 3.915718669185511e-06, "loss": 0.0798, "step": 43577 }, { "epoch": 0.7717462046248456, "grad_norm": 0.35575780272483826, "learning_rate": 3.915139009907999e-06, "loss": 0.0618, "step": 43578 }, { "epoch": 0.7717639141618741, "grad_norm": 0.8038855791091919, "learning_rate": 3.914559387098557e-06, "loss": 0.0401, "step": 43579 }, { "epoch": 0.7717816236989025, "grad_norm": 0.5911377668380737, "learning_rate": 3.913979800759092e-06, "loss": 0.0641, "step": 43580 }, { "epoch": 0.7717993332359309, "grad_norm": 0.6132393479347229, "learning_rate": 3.913400250891514e-06, "loss": 0.069, "step": 43581 }, { "epoch": 0.7718170427729593, "grad_norm": 0.35986995697021484, "learning_rate": 3.912820737497725e-06, "loss": 0.0633, "step": 43582 }, { "epoch": 0.7718347523099878, "grad_norm": 0.48667556047439575, "learning_rate": 3.912241260579636e-06, "loss": 0.0688, "step": 43583 }, { "epoch": 0.7718524618470162, "grad_norm": 0.3039454221725464, "learning_rate": 3.9116618201391524e-06, "loss": 0.0492, "step": 43584 }, { "epoch": 0.7718701713840446, "grad_norm": 0.749980092048645, "learning_rate": 3.911082416178183e-06, "loss": 0.0602, "step": 43585 }, { "epoch": 0.771887880921073, "grad_norm": 0.5126694440841675, "learning_rate": 3.910503048698626e-06, "loss": 0.0503, "step": 43586 }, { "epoch": 0.7719055904581015, "grad_norm": 0.3450342118740082, "learning_rate": 3.909923717702394e-06, "loss": 0.0443, "step": 43587 }, { "epoch": 0.7719232999951299, "grad_norm": 0.49456849694252014, "learning_rate": 3.909344423191395e-06, "loss": 0.0604, "step": 43588 }, { "epoch": 0.7719410095321583, "grad_norm": 1.0646079778671265, "learning_rate": 3.908765165167526e-06, "loss": 0.0489, "step": 43589 }, { "epoch": 0.7719587190691868, "grad_norm": 0.5639516711235046, "learning_rate": 3.9081859436326985e-06, "loss": 0.0613, "step": 43590 }, { "epoch": 0.7719764286062152, "grad_norm": 0.5388134717941284, "learning_rate": 3.907606758588814e-06, "loss": 0.0654, "step": 43591 }, { "epoch": 0.7719941381432436, "grad_norm": 0.514431357383728, "learning_rate": 3.907027610037789e-06, "loss": 0.0418, "step": 43592 }, { "epoch": 0.772011847680272, "grad_norm": 0.7912793159484863, "learning_rate": 3.906448497981516e-06, "loss": 0.0628, "step": 43593 }, { "epoch": 0.7720295572173005, "grad_norm": 0.4470594525337219, "learning_rate": 3.905869422421906e-06, "loss": 0.0439, "step": 43594 }, { "epoch": 0.7720472667543289, "grad_norm": 0.5858463048934937, "learning_rate": 3.905290383360863e-06, "loss": 0.0552, "step": 43595 }, { "epoch": 0.7720649762913573, "grad_norm": 0.7336872816085815, "learning_rate": 3.904711380800298e-06, "loss": 0.0601, "step": 43596 }, { "epoch": 0.7720826858283857, "grad_norm": 0.6061040163040161, "learning_rate": 3.9041324147421056e-06, "loss": 0.0677, "step": 43597 }, { "epoch": 0.7721003953654142, "grad_norm": 0.25041183829307556, "learning_rate": 3.9035534851881945e-06, "loss": 0.0461, "step": 43598 }, { "epoch": 0.7721181049024426, "grad_norm": 0.5735741853713989, "learning_rate": 3.902974592140472e-06, "loss": 0.0495, "step": 43599 }, { "epoch": 0.772135814439471, "grad_norm": 0.7193957567214966, "learning_rate": 3.902395735600839e-06, "loss": 0.059, "step": 43600 }, { "epoch": 0.7721535239764994, "grad_norm": 0.30665475130081177, "learning_rate": 3.901816915571203e-06, "loss": 0.0292, "step": 43601 }, { "epoch": 0.7721712335135279, "grad_norm": 0.35008546710014343, "learning_rate": 3.9012381320534646e-06, "loss": 0.0437, "step": 43602 }, { "epoch": 0.7721889430505563, "grad_norm": 0.37479841709136963, "learning_rate": 3.900659385049537e-06, "loss": 0.0527, "step": 43603 }, { "epoch": 0.7722066525875847, "grad_norm": 0.9157578349113464, "learning_rate": 3.900080674561312e-06, "loss": 0.0707, "step": 43604 }, { "epoch": 0.7722243621246132, "grad_norm": 0.41151607036590576, "learning_rate": 3.899502000590698e-06, "loss": 0.0456, "step": 43605 }, { "epoch": 0.7722420716616416, "grad_norm": 0.6032944321632385, "learning_rate": 3.898923363139604e-06, "loss": 0.0449, "step": 43606 }, { "epoch": 0.77225978119867, "grad_norm": 0.5486462116241455, "learning_rate": 3.898344762209926e-06, "loss": 0.0491, "step": 43607 }, { "epoch": 0.7722774907356984, "grad_norm": 0.49475476145744324, "learning_rate": 3.89776619780357e-06, "loss": 0.0684, "step": 43608 }, { "epoch": 0.7722952002727269, "grad_norm": 0.771144688129425, "learning_rate": 3.8971876699224415e-06, "loss": 0.0652, "step": 43609 }, { "epoch": 0.7723129098097553, "grad_norm": 0.37873584032058716, "learning_rate": 3.896609178568441e-06, "loss": 0.0438, "step": 43610 }, { "epoch": 0.7723306193467837, "grad_norm": 0.5120172500610352, "learning_rate": 3.896030723743475e-06, "loss": 0.0696, "step": 43611 }, { "epoch": 0.7723483288838121, "grad_norm": 0.8338429927825928, "learning_rate": 3.895452305449445e-06, "loss": 0.0739, "step": 43612 }, { "epoch": 0.7723660384208406, "grad_norm": 1.012561559677124, "learning_rate": 3.894873923688257e-06, "loss": 0.0562, "step": 43613 }, { "epoch": 0.772383747957869, "grad_norm": 0.2001054733991623, "learning_rate": 3.894295578461806e-06, "loss": 0.0303, "step": 43614 }, { "epoch": 0.7724014574948974, "grad_norm": 0.5905489325523376, "learning_rate": 3.893717269772e-06, "loss": 0.0643, "step": 43615 }, { "epoch": 0.7724191670319258, "grad_norm": 0.6816146969795227, "learning_rate": 3.8931389976207396e-06, "loss": 0.0438, "step": 43616 }, { "epoch": 0.7724368765689543, "grad_norm": 0.5718312859535217, "learning_rate": 3.892560762009934e-06, "loss": 0.0749, "step": 43617 }, { "epoch": 0.7724545861059827, "grad_norm": 0.5492641925811768, "learning_rate": 3.891982562941476e-06, "loss": 0.0441, "step": 43618 }, { "epoch": 0.7724722956430111, "grad_norm": 0.5283745527267456, "learning_rate": 3.891404400417271e-06, "loss": 0.0504, "step": 43619 }, { "epoch": 0.7724900051800396, "grad_norm": 0.6158633828163147, "learning_rate": 3.8908262744392225e-06, "loss": 0.0591, "step": 43620 }, { "epoch": 0.772507714717068, "grad_norm": 0.39089611172676086, "learning_rate": 3.890248185009232e-06, "loss": 0.0429, "step": 43621 }, { "epoch": 0.7725254242540964, "grad_norm": 0.6530237793922424, "learning_rate": 3.889670132129201e-06, "loss": 0.0485, "step": 43622 }, { "epoch": 0.7725431337911248, "grad_norm": 1.1888818740844727, "learning_rate": 3.88909211580103e-06, "loss": 0.0731, "step": 43623 }, { "epoch": 0.7725608433281533, "grad_norm": 0.689365804195404, "learning_rate": 3.888514136026629e-06, "loss": 0.0601, "step": 43624 }, { "epoch": 0.7725785528651817, "grad_norm": 0.18923510611057281, "learning_rate": 3.887936192807887e-06, "loss": 0.0416, "step": 43625 }, { "epoch": 0.7725962624022101, "grad_norm": 0.6439439058303833, "learning_rate": 3.887358286146711e-06, "loss": 0.0499, "step": 43626 }, { "epoch": 0.7726139719392385, "grad_norm": 0.685481071472168, "learning_rate": 3.886780416045003e-06, "loss": 0.0567, "step": 43627 }, { "epoch": 0.772631681476267, "grad_norm": 0.8704067468643188, "learning_rate": 3.8862025825046635e-06, "loss": 0.0653, "step": 43628 }, { "epoch": 0.7726493910132954, "grad_norm": 0.47831082344055176, "learning_rate": 3.885624785527592e-06, "loss": 0.08, "step": 43629 }, { "epoch": 0.7726671005503238, "grad_norm": 0.30492040514945984, "learning_rate": 3.885047025115692e-06, "loss": 0.049, "step": 43630 }, { "epoch": 0.7726848100873522, "grad_norm": 0.4076499044895172, "learning_rate": 3.884469301270867e-06, "loss": 0.0475, "step": 43631 }, { "epoch": 0.7727025196243807, "grad_norm": 0.5024093985557556, "learning_rate": 3.883891613995009e-06, "loss": 0.0867, "step": 43632 }, { "epoch": 0.7727202291614091, "grad_norm": 0.3488454222679138, "learning_rate": 3.883313963290025e-06, "loss": 0.0412, "step": 43633 }, { "epoch": 0.7727379386984375, "grad_norm": 0.8037498593330383, "learning_rate": 3.882736349157812e-06, "loss": 0.0555, "step": 43634 }, { "epoch": 0.772755648235466, "grad_norm": 0.5631652474403381, "learning_rate": 3.882158771600276e-06, "loss": 0.0945, "step": 43635 }, { "epoch": 0.7727733577724945, "grad_norm": 0.6220059394836426, "learning_rate": 3.88158123061931e-06, "loss": 0.0437, "step": 43636 }, { "epoch": 0.7727910673095228, "grad_norm": 0.48067760467529297, "learning_rate": 3.881003726216816e-06, "loss": 0.044, "step": 43637 }, { "epoch": 0.7728087768465512, "grad_norm": 0.9618163108825684, "learning_rate": 3.880426258394697e-06, "loss": 0.074, "step": 43638 }, { "epoch": 0.7728264863835798, "grad_norm": 0.519476056098938, "learning_rate": 3.8798488271548485e-06, "loss": 0.0504, "step": 43639 }, { "epoch": 0.7728441959206082, "grad_norm": 0.5917105674743652, "learning_rate": 3.879271432499174e-06, "loss": 0.0582, "step": 43640 }, { "epoch": 0.7728619054576366, "grad_norm": 0.37662896513938904, "learning_rate": 3.878694074429571e-06, "loss": 0.045, "step": 43641 }, { "epoch": 0.772879614994665, "grad_norm": 0.8564926981925964, "learning_rate": 3.878116752947946e-06, "loss": 0.0725, "step": 43642 }, { "epoch": 0.7728973245316935, "grad_norm": 0.5105587244033813, "learning_rate": 3.8775394680561856e-06, "loss": 0.049, "step": 43643 }, { "epoch": 0.7729150340687219, "grad_norm": 0.5518573522567749, "learning_rate": 3.876962219756196e-06, "loss": 0.0807, "step": 43644 }, { "epoch": 0.7729327436057503, "grad_norm": 0.6513941884040833, "learning_rate": 3.876385008049881e-06, "loss": 0.0607, "step": 43645 }, { "epoch": 0.7729504531427787, "grad_norm": 0.6137580871582031, "learning_rate": 3.875807832939127e-06, "loss": 0.0466, "step": 43646 }, { "epoch": 0.7729681626798072, "grad_norm": 0.7753861546516418, "learning_rate": 3.875230694425844e-06, "loss": 0.0482, "step": 43647 }, { "epoch": 0.7729858722168356, "grad_norm": 0.4586198627948761, "learning_rate": 3.8746535925119236e-06, "loss": 0.066, "step": 43648 }, { "epoch": 0.773003581753864, "grad_norm": 0.43966373801231384, "learning_rate": 3.874076527199269e-06, "loss": 0.0593, "step": 43649 }, { "epoch": 0.7730212912908925, "grad_norm": 0.7135113477706909, "learning_rate": 3.873499498489776e-06, "loss": 0.0664, "step": 43650 }, { "epoch": 0.7730390008279209, "grad_norm": 0.8668684959411621, "learning_rate": 3.8729225063853465e-06, "loss": 0.0756, "step": 43651 }, { "epoch": 0.7730567103649493, "grad_norm": 0.9977118968963623, "learning_rate": 3.87234555088788e-06, "loss": 0.0847, "step": 43652 }, { "epoch": 0.7730744199019777, "grad_norm": 0.4569884240627289, "learning_rate": 3.871768631999267e-06, "loss": 0.0442, "step": 43653 }, { "epoch": 0.7730921294390062, "grad_norm": 0.6580561399459839, "learning_rate": 3.871191749721408e-06, "loss": 0.0744, "step": 43654 }, { "epoch": 0.7731098389760346, "grad_norm": 0.3768030107021332, "learning_rate": 3.870614904056204e-06, "loss": 0.0418, "step": 43655 }, { "epoch": 0.773127548513063, "grad_norm": 0.40994903445243835, "learning_rate": 3.870038095005555e-06, "loss": 0.037, "step": 43656 }, { "epoch": 0.7731452580500914, "grad_norm": 0.5011838674545288, "learning_rate": 3.8694613225713475e-06, "loss": 0.0597, "step": 43657 }, { "epoch": 0.7731629675871199, "grad_norm": 0.34844228625297546, "learning_rate": 3.868884586755489e-06, "loss": 0.0466, "step": 43658 }, { "epoch": 0.7731806771241483, "grad_norm": 0.5382434725761414, "learning_rate": 3.86830788755988e-06, "loss": 0.0613, "step": 43659 }, { "epoch": 0.7731983866611767, "grad_norm": 0.4824056029319763, "learning_rate": 3.867731224986406e-06, "loss": 0.07, "step": 43660 }, { "epoch": 0.7732160961982052, "grad_norm": 0.6497419476509094, "learning_rate": 3.867154599036973e-06, "loss": 0.057, "step": 43661 }, { "epoch": 0.7732338057352336, "grad_norm": 0.46919867396354675, "learning_rate": 3.866578009713474e-06, "loss": 0.0456, "step": 43662 }, { "epoch": 0.773251515272262, "grad_norm": 0.8200844526290894, "learning_rate": 3.866001457017812e-06, "loss": 0.0465, "step": 43663 }, { "epoch": 0.7732692248092904, "grad_norm": 0.5288427472114563, "learning_rate": 3.865424940951874e-06, "loss": 0.0548, "step": 43664 }, { "epoch": 0.7732869343463189, "grad_norm": 0.4601365327835083, "learning_rate": 3.864848461517562e-06, "loss": 0.0599, "step": 43665 }, { "epoch": 0.7733046438833473, "grad_norm": 0.6073372960090637, "learning_rate": 3.864272018716772e-06, "loss": 0.0532, "step": 43666 }, { "epoch": 0.7733223534203757, "grad_norm": 0.5487166047096252, "learning_rate": 3.863695612551401e-06, "loss": 0.0315, "step": 43667 }, { "epoch": 0.7733400629574041, "grad_norm": 0.5706625580787659, "learning_rate": 3.863119243023347e-06, "loss": 0.0611, "step": 43668 }, { "epoch": 0.7733577724944326, "grad_norm": 0.5350167751312256, "learning_rate": 3.862542910134502e-06, "loss": 0.074, "step": 43669 }, { "epoch": 0.773375482031461, "grad_norm": 0.7095470428466797, "learning_rate": 3.8619666138867685e-06, "loss": 0.0484, "step": 43670 }, { "epoch": 0.7733931915684894, "grad_norm": 0.8526899814605713, "learning_rate": 3.861390354282034e-06, "loss": 0.0668, "step": 43671 }, { "epoch": 0.7734109011055178, "grad_norm": 0.5405527353286743, "learning_rate": 3.8608141313222e-06, "loss": 0.0709, "step": 43672 }, { "epoch": 0.7734286106425463, "grad_norm": 0.35290348529815674, "learning_rate": 3.8602379450091605e-06, "loss": 0.0442, "step": 43673 }, { "epoch": 0.7734463201795747, "grad_norm": 0.5150821805000305, "learning_rate": 3.8596617953448145e-06, "loss": 0.0351, "step": 43674 }, { "epoch": 0.7734640297166031, "grad_norm": 0.36228427290916443, "learning_rate": 3.859085682331051e-06, "loss": 0.0245, "step": 43675 }, { "epoch": 0.7734817392536316, "grad_norm": 0.4769638478755951, "learning_rate": 3.858509605969769e-06, "loss": 0.0861, "step": 43676 }, { "epoch": 0.77349944879066, "grad_norm": 0.6697003841400146, "learning_rate": 3.857933566262863e-06, "loss": 0.0709, "step": 43677 }, { "epoch": 0.7735171583276884, "grad_norm": 0.4882522225379944, "learning_rate": 3.85735756321223e-06, "loss": 0.0435, "step": 43678 }, { "epoch": 0.7735348678647168, "grad_norm": 0.40289604663848877, "learning_rate": 3.8567815968197625e-06, "loss": 0.0499, "step": 43679 }, { "epoch": 0.7735525774017453, "grad_norm": 0.8227976560592651, "learning_rate": 3.856205667087356e-06, "loss": 0.0518, "step": 43680 }, { "epoch": 0.7735702869387737, "grad_norm": 0.7855307459831238, "learning_rate": 3.855629774016911e-06, "loss": 0.0762, "step": 43681 }, { "epoch": 0.7735879964758021, "grad_norm": 0.6703742742538452, "learning_rate": 3.855053917610312e-06, "loss": 0.048, "step": 43682 }, { "epoch": 0.7736057060128305, "grad_norm": 0.5991792678833008, "learning_rate": 3.85447809786946e-06, "loss": 0.0436, "step": 43683 }, { "epoch": 0.773623415549859, "grad_norm": 0.6750959157943726, "learning_rate": 3.853902314796249e-06, "loss": 0.0521, "step": 43684 }, { "epoch": 0.7736411250868874, "grad_norm": 0.748081386089325, "learning_rate": 3.85332656839257e-06, "loss": 0.0735, "step": 43685 }, { "epoch": 0.7736588346239158, "grad_norm": 0.47865185141563416, "learning_rate": 3.852750858660313e-06, "loss": 0.044, "step": 43686 }, { "epoch": 0.7736765441609442, "grad_norm": 0.5712060928344727, "learning_rate": 3.852175185601384e-06, "loss": 0.0602, "step": 43687 }, { "epoch": 0.7736942536979727, "grad_norm": 0.4648685157299042, "learning_rate": 3.851599549217674e-06, "loss": 0.0414, "step": 43688 }, { "epoch": 0.7737119632350011, "grad_norm": 0.48219165205955505, "learning_rate": 3.851023949511071e-06, "loss": 0.0479, "step": 43689 }, { "epoch": 0.7737296727720295, "grad_norm": 0.45847952365875244, "learning_rate": 3.8504483864834716e-06, "loss": 0.0475, "step": 43690 }, { "epoch": 0.773747382309058, "grad_norm": 0.4564536213874817, "learning_rate": 3.849872860136773e-06, "loss": 0.0511, "step": 43691 }, { "epoch": 0.7737650918460864, "grad_norm": 0.2731446623802185, "learning_rate": 3.849297370472862e-06, "loss": 0.0267, "step": 43692 }, { "epoch": 0.7737828013831148, "grad_norm": 0.703214168548584, "learning_rate": 3.848721917493632e-06, "loss": 0.0708, "step": 43693 }, { "epoch": 0.7738005109201432, "grad_norm": 0.6770933270454407, "learning_rate": 3.848146501200981e-06, "loss": 0.0584, "step": 43694 }, { "epoch": 0.7738182204571717, "grad_norm": 0.5223802924156189, "learning_rate": 3.8475711215968e-06, "loss": 0.0489, "step": 43695 }, { "epoch": 0.7738359299942001, "grad_norm": 0.7735869884490967, "learning_rate": 3.846995778682982e-06, "loss": 0.0522, "step": 43696 }, { "epoch": 0.7738536395312285, "grad_norm": 0.40849214792251587, "learning_rate": 3.8464204724614185e-06, "loss": 0.0615, "step": 43697 }, { "epoch": 0.7738713490682569, "grad_norm": 0.6081821918487549, "learning_rate": 3.845845202934005e-06, "loss": 0.0454, "step": 43698 }, { "epoch": 0.7738890586052855, "grad_norm": 0.8697110414505005, "learning_rate": 3.845269970102636e-06, "loss": 0.0574, "step": 43699 }, { "epoch": 0.7739067681423139, "grad_norm": 0.20256048440933228, "learning_rate": 3.844694773969198e-06, "loss": 0.0316, "step": 43700 }, { "epoch": 0.7739244776793422, "grad_norm": 0.6555918455123901, "learning_rate": 3.844119614535584e-06, "loss": 0.0799, "step": 43701 }, { "epoch": 0.7739421872163706, "grad_norm": 0.4172458350658417, "learning_rate": 3.843544491803693e-06, "loss": 0.0569, "step": 43702 }, { "epoch": 0.7739598967533992, "grad_norm": 0.4481554329395294, "learning_rate": 3.842969405775408e-06, "loss": 0.0362, "step": 43703 }, { "epoch": 0.7739776062904276, "grad_norm": 0.6897602081298828, "learning_rate": 3.842394356452625e-06, "loss": 0.0428, "step": 43704 }, { "epoch": 0.773995315827456, "grad_norm": 0.584883987903595, "learning_rate": 3.841819343837236e-06, "loss": 0.0322, "step": 43705 }, { "epoch": 0.7740130253644845, "grad_norm": 0.49484553933143616, "learning_rate": 3.8412443679311334e-06, "loss": 0.0501, "step": 43706 }, { "epoch": 0.7740307349015129, "grad_norm": 0.7381601929664612, "learning_rate": 3.840669428736208e-06, "loss": 0.0795, "step": 43707 }, { "epoch": 0.7740484444385413, "grad_norm": 0.884231686592102, "learning_rate": 3.840094526254351e-06, "loss": 0.0796, "step": 43708 }, { "epoch": 0.7740661539755697, "grad_norm": 0.8391638994216919, "learning_rate": 3.839519660487458e-06, "loss": 0.0718, "step": 43709 }, { "epoch": 0.7740838635125982, "grad_norm": 0.3378125727176666, "learning_rate": 3.838944831437414e-06, "loss": 0.0353, "step": 43710 }, { "epoch": 0.7741015730496266, "grad_norm": 0.7624448537826538, "learning_rate": 3.83837003910611e-06, "loss": 0.0749, "step": 43711 }, { "epoch": 0.774119282586655, "grad_norm": 0.5838169455528259, "learning_rate": 3.837795283495442e-06, "loss": 0.0676, "step": 43712 }, { "epoch": 0.7741369921236834, "grad_norm": 0.6247795224189758, "learning_rate": 3.837220564607302e-06, "loss": 0.0453, "step": 43713 }, { "epoch": 0.7741547016607119, "grad_norm": 0.8446115851402283, "learning_rate": 3.836645882443574e-06, "loss": 0.0723, "step": 43714 }, { "epoch": 0.7741724111977403, "grad_norm": 0.5225615501403809, "learning_rate": 3.836071237006146e-06, "loss": 0.0384, "step": 43715 }, { "epoch": 0.7741901207347687, "grad_norm": 0.5191749334335327, "learning_rate": 3.835496628296924e-06, "loss": 0.0632, "step": 43716 }, { "epoch": 0.7742078302717971, "grad_norm": 0.8931323885917664, "learning_rate": 3.834922056317784e-06, "loss": 0.0411, "step": 43717 }, { "epoch": 0.7742255398088256, "grad_norm": 0.3272906243801117, "learning_rate": 3.834347521070621e-06, "loss": 0.05, "step": 43718 }, { "epoch": 0.774243249345854, "grad_norm": 0.5452551245689392, "learning_rate": 3.833773022557327e-06, "loss": 0.087, "step": 43719 }, { "epoch": 0.7742609588828824, "grad_norm": 0.579121470451355, "learning_rate": 3.833198560779793e-06, "loss": 0.0697, "step": 43720 }, { "epoch": 0.7742786684199109, "grad_norm": 0.2796652317047119, "learning_rate": 3.8326241357399014e-06, "loss": 0.0382, "step": 43721 }, { "epoch": 0.7742963779569393, "grad_norm": 0.4183099865913391, "learning_rate": 3.832049747439549e-06, "loss": 0.0422, "step": 43722 }, { "epoch": 0.7743140874939677, "grad_norm": 0.7630008459091187, "learning_rate": 3.831475395880622e-06, "loss": 0.0536, "step": 43723 }, { "epoch": 0.7743317970309961, "grad_norm": 0.5794968008995056, "learning_rate": 3.8309010810650115e-06, "loss": 0.08, "step": 43724 }, { "epoch": 0.7743495065680246, "grad_norm": 0.5180734992027283, "learning_rate": 3.830326802994607e-06, "loss": 0.0579, "step": 43725 }, { "epoch": 0.774367216105053, "grad_norm": 0.8935344219207764, "learning_rate": 3.829752561671298e-06, "loss": 0.097, "step": 43726 }, { "epoch": 0.7743849256420814, "grad_norm": 0.5796124339103699, "learning_rate": 3.829178357096977e-06, "loss": 0.0277, "step": 43727 }, { "epoch": 0.7744026351791098, "grad_norm": 1.0214189291000366, "learning_rate": 3.8286041892735255e-06, "loss": 0.0737, "step": 43728 }, { "epoch": 0.7744203447161383, "grad_norm": 0.5726305246353149, "learning_rate": 3.828030058202835e-06, "loss": 0.0471, "step": 43729 }, { "epoch": 0.7744380542531667, "grad_norm": 0.4023897349834442, "learning_rate": 3.8274559638867976e-06, "loss": 0.0532, "step": 43730 }, { "epoch": 0.7744557637901951, "grad_norm": 0.515031635761261, "learning_rate": 3.826881906327303e-06, "loss": 0.0649, "step": 43731 }, { "epoch": 0.7744734733272235, "grad_norm": 0.5488414764404297, "learning_rate": 3.826307885526233e-06, "loss": 0.0421, "step": 43732 }, { "epoch": 0.774491182864252, "grad_norm": 0.9608780145645142, "learning_rate": 3.825733901485478e-06, "loss": 0.0625, "step": 43733 }, { "epoch": 0.7745088924012804, "grad_norm": 0.5813512206077576, "learning_rate": 3.82515995420693e-06, "loss": 0.0488, "step": 43734 }, { "epoch": 0.7745266019383088, "grad_norm": 0.4388813376426697, "learning_rate": 3.824586043692477e-06, "loss": 0.0539, "step": 43735 }, { "epoch": 0.7745443114753373, "grad_norm": 0.4692849814891815, "learning_rate": 3.824012169944003e-06, "loss": 0.0422, "step": 43736 }, { "epoch": 0.7745620210123657, "grad_norm": 0.5488402843475342, "learning_rate": 3.823438332963399e-06, "loss": 0.0511, "step": 43737 }, { "epoch": 0.7745797305493941, "grad_norm": 0.4320785403251648, "learning_rate": 3.822864532752557e-06, "loss": 0.0493, "step": 43738 }, { "epoch": 0.7745974400864225, "grad_norm": 0.4445721209049225, "learning_rate": 3.822290769313358e-06, "loss": 0.0809, "step": 43739 }, { "epoch": 0.774615149623451, "grad_norm": 0.6602700352668762, "learning_rate": 3.821717042647688e-06, "loss": 0.046, "step": 43740 }, { "epoch": 0.7746328591604794, "grad_norm": 0.681212842464447, "learning_rate": 3.821143352757445e-06, "loss": 0.0483, "step": 43741 }, { "epoch": 0.7746505686975078, "grad_norm": 0.6058200001716614, "learning_rate": 3.820569699644506e-06, "loss": 0.0297, "step": 43742 }, { "epoch": 0.7746682782345362, "grad_norm": 1.0843623876571655, "learning_rate": 3.819996083310761e-06, "loss": 0.0657, "step": 43743 }, { "epoch": 0.7746859877715647, "grad_norm": 0.6435698866844177, "learning_rate": 3.819422503758094e-06, "loss": 0.0682, "step": 43744 }, { "epoch": 0.7747036973085931, "grad_norm": 0.5330102443695068, "learning_rate": 3.818848960988405e-06, "loss": 0.0652, "step": 43745 }, { "epoch": 0.7747214068456215, "grad_norm": 0.7537152767181396, "learning_rate": 3.818275455003566e-06, "loss": 0.0766, "step": 43746 }, { "epoch": 0.7747391163826499, "grad_norm": 0.41228780150413513, "learning_rate": 3.8177019858054705e-06, "loss": 0.05, "step": 43747 }, { "epoch": 0.7747568259196784, "grad_norm": 0.713716447353363, "learning_rate": 3.817128553396009e-06, "loss": 0.0627, "step": 43748 }, { "epoch": 0.7747745354567068, "grad_norm": 0.6037230491638184, "learning_rate": 3.81655515777706e-06, "loss": 0.0532, "step": 43749 }, { "epoch": 0.7747922449937352, "grad_norm": 0.7336806058883667, "learning_rate": 3.815981798950514e-06, "loss": 0.042, "step": 43750 }, { "epoch": 0.7748099545307637, "grad_norm": 0.3653687834739685, "learning_rate": 3.815408476918255e-06, "loss": 0.0462, "step": 43751 }, { "epoch": 0.7748276640677921, "grad_norm": 0.5010502934455872, "learning_rate": 3.814835191682171e-06, "loss": 0.0538, "step": 43752 }, { "epoch": 0.7748453736048205, "grad_norm": 0.5197840929031372, "learning_rate": 3.8142619432441485e-06, "loss": 0.0611, "step": 43753 }, { "epoch": 0.7748630831418489, "grad_norm": 0.5600035786628723, "learning_rate": 3.813688731606073e-06, "loss": 0.0484, "step": 43754 }, { "epoch": 0.7748807926788774, "grad_norm": 0.6988677978515625, "learning_rate": 3.8131155567698327e-06, "loss": 0.0748, "step": 43755 }, { "epoch": 0.7748985022159058, "grad_norm": 0.5775973796844482, "learning_rate": 3.8125424187373094e-06, "loss": 0.0741, "step": 43756 }, { "epoch": 0.7749162117529342, "grad_norm": 0.41667068004608154, "learning_rate": 3.8119693175103874e-06, "loss": 0.0451, "step": 43757 }, { "epoch": 0.7749339212899626, "grad_norm": 0.671406090259552, "learning_rate": 3.8113962530909564e-06, "loss": 0.055, "step": 43758 }, { "epoch": 0.7749516308269911, "grad_norm": 0.8482123017311096, "learning_rate": 3.810823225480904e-06, "loss": 0.0684, "step": 43759 }, { "epoch": 0.7749693403640195, "grad_norm": 0.7203165292739868, "learning_rate": 3.8102502346821053e-06, "loss": 0.0563, "step": 43760 }, { "epoch": 0.7749870499010479, "grad_norm": 0.6590900421142578, "learning_rate": 3.8096772806964545e-06, "loss": 0.0433, "step": 43761 }, { "epoch": 0.7750047594380763, "grad_norm": 0.6079750657081604, "learning_rate": 3.8091043635258316e-06, "loss": 0.0455, "step": 43762 }, { "epoch": 0.7750224689751049, "grad_norm": 0.5851058959960938, "learning_rate": 3.8085314831721247e-06, "loss": 0.05, "step": 43763 }, { "epoch": 0.7750401785121332, "grad_norm": 0.617135763168335, "learning_rate": 3.807958639637216e-06, "loss": 0.0689, "step": 43764 }, { "epoch": 0.7750578880491616, "grad_norm": 0.49019700288772583, "learning_rate": 3.8073858329229915e-06, "loss": 0.0372, "step": 43765 }, { "epoch": 0.7750755975861902, "grad_norm": 0.4211110472679138, "learning_rate": 3.806813063031341e-06, "loss": 0.0433, "step": 43766 }, { "epoch": 0.7750933071232186, "grad_norm": 0.3306220769882202, "learning_rate": 3.8062403299641384e-06, "loss": 0.0401, "step": 43767 }, { "epoch": 0.775111016660247, "grad_norm": 0.47677838802337646, "learning_rate": 3.805667633723273e-06, "loss": 0.0367, "step": 43768 }, { "epoch": 0.7751287261972754, "grad_norm": 0.39321139454841614, "learning_rate": 3.8050949743106286e-06, "loss": 0.0604, "step": 43769 }, { "epoch": 0.7751464357343039, "grad_norm": 0.5057511925697327, "learning_rate": 3.8045223517280943e-06, "loss": 0.0317, "step": 43770 }, { "epoch": 0.7751641452713323, "grad_norm": 0.5912919640541077, "learning_rate": 3.803949765977544e-06, "loss": 0.0863, "step": 43771 }, { "epoch": 0.7751818548083607, "grad_norm": 0.47722890973091125, "learning_rate": 3.8033772170608683e-06, "loss": 0.0554, "step": 43772 }, { "epoch": 0.7751995643453891, "grad_norm": 0.6701114177703857, "learning_rate": 3.8028047049799468e-06, "loss": 0.0703, "step": 43773 }, { "epoch": 0.7752172738824176, "grad_norm": 0.5718116164207458, "learning_rate": 3.802232229736667e-06, "loss": 0.0518, "step": 43774 }, { "epoch": 0.775234983419446, "grad_norm": 0.2933962941169739, "learning_rate": 3.8016597913329097e-06, "loss": 0.0449, "step": 43775 }, { "epoch": 0.7752526929564744, "grad_norm": 0.43637269735336304, "learning_rate": 3.8010873897705596e-06, "loss": 0.0569, "step": 43776 }, { "epoch": 0.7752704024935028, "grad_norm": 0.7995994687080383, "learning_rate": 3.8005150250515026e-06, "loss": 0.0895, "step": 43777 }, { "epoch": 0.7752881120305313, "grad_norm": 0.608334481716156, "learning_rate": 3.7999426971776156e-06, "loss": 0.042, "step": 43778 }, { "epoch": 0.7753058215675597, "grad_norm": 0.5128214359283447, "learning_rate": 3.799370406150783e-06, "loss": 0.033, "step": 43779 }, { "epoch": 0.7753235311045881, "grad_norm": 0.2860729694366455, "learning_rate": 3.7987981519728937e-06, "loss": 0.0466, "step": 43780 }, { "epoch": 0.7753412406416166, "grad_norm": 0.6619523167610168, "learning_rate": 3.798225934645817e-06, "loss": 0.0747, "step": 43781 }, { "epoch": 0.775358950178645, "grad_norm": 0.48817065358161926, "learning_rate": 3.7976537541714478e-06, "loss": 0.055, "step": 43782 }, { "epoch": 0.7753766597156734, "grad_norm": 0.6692824363708496, "learning_rate": 3.797081610551665e-06, "loss": 0.0542, "step": 43783 }, { "epoch": 0.7753943692527018, "grad_norm": 0.8072701096534729, "learning_rate": 3.7965095037883546e-06, "loss": 0.0713, "step": 43784 }, { "epoch": 0.7754120787897303, "grad_norm": 0.5991019010543823, "learning_rate": 3.7959374338833907e-06, "loss": 0.0432, "step": 43785 }, { "epoch": 0.7754297883267587, "grad_norm": 0.6083281636238098, "learning_rate": 3.79536540083866e-06, "loss": 0.0767, "step": 43786 }, { "epoch": 0.7754474978637871, "grad_norm": 0.2736051380634308, "learning_rate": 3.794793404656048e-06, "loss": 0.0383, "step": 43787 }, { "epoch": 0.7754652074008155, "grad_norm": 0.758614718914032, "learning_rate": 3.7942214453374272e-06, "loss": 0.0631, "step": 43788 }, { "epoch": 0.775482916937844, "grad_norm": 0.5081812739372253, "learning_rate": 3.7936495228846855e-06, "loss": 0.0335, "step": 43789 }, { "epoch": 0.7755006264748724, "grad_norm": 0.6351721286773682, "learning_rate": 3.793077637299703e-06, "loss": 0.0569, "step": 43790 }, { "epoch": 0.7755183360119008, "grad_norm": 0.509324312210083, "learning_rate": 3.7925057885843608e-06, "loss": 0.0637, "step": 43791 }, { "epoch": 0.7755360455489292, "grad_norm": 0.4965153634548187, "learning_rate": 3.7919339767405423e-06, "loss": 0.0579, "step": 43792 }, { "epoch": 0.7755537550859577, "grad_norm": 0.8090753555297852, "learning_rate": 3.7913622017701273e-06, "loss": 0.0933, "step": 43793 }, { "epoch": 0.7755714646229861, "grad_norm": 0.2997783124446869, "learning_rate": 3.7907904636749997e-06, "loss": 0.0378, "step": 43794 }, { "epoch": 0.7755891741600145, "grad_norm": 0.5565392374992371, "learning_rate": 3.790218762457035e-06, "loss": 0.0829, "step": 43795 }, { "epoch": 0.775606883697043, "grad_norm": 0.84770268201828, "learning_rate": 3.7896470981181154e-06, "loss": 0.0805, "step": 43796 }, { "epoch": 0.7756245932340714, "grad_norm": 0.580849826335907, "learning_rate": 3.7890754706601237e-06, "loss": 0.0557, "step": 43797 }, { "epoch": 0.7756423027710998, "grad_norm": 0.5061243772506714, "learning_rate": 3.788503880084946e-06, "loss": 0.0574, "step": 43798 }, { "epoch": 0.7756600123081282, "grad_norm": 0.5037086606025696, "learning_rate": 3.78793232639445e-06, "loss": 0.033, "step": 43799 }, { "epoch": 0.7756777218451567, "grad_norm": 0.5771093964576721, "learning_rate": 3.7873608095905225e-06, "loss": 0.0595, "step": 43800 }, { "epoch": 0.7756954313821851, "grad_norm": 0.5002694129943848, "learning_rate": 3.7867893296750442e-06, "loss": 0.0324, "step": 43801 }, { "epoch": 0.7757131409192135, "grad_norm": 0.7243558168411255, "learning_rate": 3.7862178866498955e-06, "loss": 0.0608, "step": 43802 }, { "epoch": 0.7757308504562419, "grad_norm": 0.5525962710380554, "learning_rate": 3.7856464805169555e-06, "loss": 0.0665, "step": 43803 }, { "epoch": 0.7757485599932704, "grad_norm": 0.8712396621704102, "learning_rate": 3.7850751112781034e-06, "loss": 0.0546, "step": 43804 }, { "epoch": 0.7757662695302988, "grad_norm": 0.6540001034736633, "learning_rate": 3.784503778935226e-06, "loss": 0.0493, "step": 43805 }, { "epoch": 0.7757839790673272, "grad_norm": 0.23933376371860504, "learning_rate": 3.78393248349019e-06, "loss": 0.0501, "step": 43806 }, { "epoch": 0.7758016886043556, "grad_norm": 0.5301323533058167, "learning_rate": 3.783361224944884e-06, "loss": 0.041, "step": 43807 }, { "epoch": 0.7758193981413841, "grad_norm": 0.7896862626075745, "learning_rate": 3.782790003301185e-06, "loss": 0.0599, "step": 43808 }, { "epoch": 0.7758371076784125, "grad_norm": 0.9222592711448669, "learning_rate": 3.782218818560974e-06, "loss": 0.0888, "step": 43809 }, { "epoch": 0.7758548172154409, "grad_norm": 0.5846624374389648, "learning_rate": 3.7816476707261222e-06, "loss": 0.066, "step": 43810 }, { "epoch": 0.7758725267524694, "grad_norm": 0.6142532825469971, "learning_rate": 3.781076559798519e-06, "loss": 0.0563, "step": 43811 }, { "epoch": 0.7758902362894978, "grad_norm": 0.5468827486038208, "learning_rate": 3.7805054857800436e-06, "loss": 0.0401, "step": 43812 }, { "epoch": 0.7759079458265262, "grad_norm": 0.5142267942428589, "learning_rate": 3.779934448672566e-06, "loss": 0.0756, "step": 43813 }, { "epoch": 0.7759256553635546, "grad_norm": 0.40871211886405945, "learning_rate": 3.7793634484779673e-06, "loss": 0.056, "step": 43814 }, { "epoch": 0.7759433649005831, "grad_norm": 0.6054795384407043, "learning_rate": 3.7787924851981298e-06, "loss": 0.0839, "step": 43815 }, { "epoch": 0.7759610744376115, "grad_norm": 0.8408297896385193, "learning_rate": 3.778221558834934e-06, "loss": 0.0657, "step": 43816 }, { "epoch": 0.7759787839746399, "grad_norm": 0.5694741010665894, "learning_rate": 3.7776506693902513e-06, "loss": 0.0469, "step": 43817 }, { "epoch": 0.7759964935116683, "grad_norm": 0.4401586055755615, "learning_rate": 3.77707981686596e-06, "loss": 0.0631, "step": 43818 }, { "epoch": 0.7760142030486968, "grad_norm": 0.5072686672210693, "learning_rate": 3.776509001263943e-06, "loss": 0.0579, "step": 43819 }, { "epoch": 0.7760319125857252, "grad_norm": 0.5993785858154297, "learning_rate": 3.775938222586076e-06, "loss": 0.0609, "step": 43820 }, { "epoch": 0.7760496221227536, "grad_norm": 0.9161067605018616, "learning_rate": 3.7753674808342365e-06, "loss": 0.0267, "step": 43821 }, { "epoch": 0.776067331659782, "grad_norm": 0.8932380080223083, "learning_rate": 3.7747967760103025e-06, "loss": 0.0577, "step": 43822 }, { "epoch": 0.7760850411968105, "grad_norm": 0.6904045939445496, "learning_rate": 3.774226108116158e-06, "loss": 0.0632, "step": 43823 }, { "epoch": 0.7761027507338389, "grad_norm": 0.5925123691558838, "learning_rate": 3.7736554771536676e-06, "loss": 0.0457, "step": 43824 }, { "epoch": 0.7761204602708673, "grad_norm": 0.5387263894081116, "learning_rate": 3.773084883124715e-06, "loss": 0.0353, "step": 43825 }, { "epoch": 0.7761381698078959, "grad_norm": 0.4780716598033905, "learning_rate": 3.772514326031184e-06, "loss": 0.0795, "step": 43826 }, { "epoch": 0.7761558793449242, "grad_norm": 0.6048552989959717, "learning_rate": 3.7719438058749395e-06, "loss": 0.0487, "step": 43827 }, { "epoch": 0.7761735888819526, "grad_norm": 0.1570914089679718, "learning_rate": 3.771373322657865e-06, "loss": 0.0398, "step": 43828 }, { "epoch": 0.776191298418981, "grad_norm": 0.6163622736930847, "learning_rate": 3.770802876381836e-06, "loss": 0.0666, "step": 43829 }, { "epoch": 0.7762090079560096, "grad_norm": 0.7740671634674072, "learning_rate": 3.7702324670487286e-06, "loss": 0.0544, "step": 43830 }, { "epoch": 0.776226717493038, "grad_norm": 0.49259763956069946, "learning_rate": 3.7696620946604223e-06, "loss": 0.058, "step": 43831 }, { "epoch": 0.7762444270300664, "grad_norm": 0.6731286644935608, "learning_rate": 3.769091759218791e-06, "loss": 0.0541, "step": 43832 }, { "epoch": 0.7762621365670948, "grad_norm": 0.49575600028038025, "learning_rate": 3.7685214607257128e-06, "loss": 0.0506, "step": 43833 }, { "epoch": 0.7762798461041233, "grad_norm": 0.5441217422485352, "learning_rate": 3.7679511991830674e-06, "loss": 0.0518, "step": 43834 }, { "epoch": 0.7762975556411517, "grad_norm": 0.643835723400116, "learning_rate": 3.7673809745927214e-06, "loss": 0.0557, "step": 43835 }, { "epoch": 0.7763152651781801, "grad_norm": 0.6384532451629639, "learning_rate": 3.7668107869565548e-06, "loss": 0.0725, "step": 43836 }, { "epoch": 0.7763329747152085, "grad_norm": 0.4020860195159912, "learning_rate": 3.7662406362764517e-06, "loss": 0.0623, "step": 43837 }, { "epoch": 0.776350684252237, "grad_norm": 0.46105924248695374, "learning_rate": 3.7656705225542736e-06, "loss": 0.0888, "step": 43838 }, { "epoch": 0.7763683937892654, "grad_norm": 0.6094216704368591, "learning_rate": 3.765100445791901e-06, "loss": 0.0401, "step": 43839 }, { "epoch": 0.7763861033262938, "grad_norm": 0.37829214334487915, "learning_rate": 3.764530405991215e-06, "loss": 0.0448, "step": 43840 }, { "epoch": 0.7764038128633223, "grad_norm": 0.5767560601234436, "learning_rate": 3.7639604031540913e-06, "loss": 0.063, "step": 43841 }, { "epoch": 0.7764215224003507, "grad_norm": 0.581288754940033, "learning_rate": 3.7633904372823967e-06, "loss": 0.0459, "step": 43842 }, { "epoch": 0.7764392319373791, "grad_norm": 0.6906310319900513, "learning_rate": 3.762820508378013e-06, "loss": 0.0474, "step": 43843 }, { "epoch": 0.7764569414744075, "grad_norm": 0.44874948263168335, "learning_rate": 3.7622506164428157e-06, "loss": 0.0431, "step": 43844 }, { "epoch": 0.776474651011436, "grad_norm": 0.4760536253452301, "learning_rate": 3.7616807614786743e-06, "loss": 0.0356, "step": 43845 }, { "epoch": 0.7764923605484644, "grad_norm": 0.637305736541748, "learning_rate": 3.7611109434874656e-06, "loss": 0.0805, "step": 43846 }, { "epoch": 0.7765100700854928, "grad_norm": 0.45949020981788635, "learning_rate": 3.760541162471066e-06, "loss": 0.0634, "step": 43847 }, { "epoch": 0.7765277796225212, "grad_norm": 0.700072169303894, "learning_rate": 3.759971418431349e-06, "loss": 0.0696, "step": 43848 }, { "epoch": 0.7765454891595497, "grad_norm": 0.667875349521637, "learning_rate": 3.759401711370189e-06, "loss": 0.0522, "step": 43849 }, { "epoch": 0.7765631986965781, "grad_norm": 0.7682552933692932, "learning_rate": 3.7588320412894595e-06, "loss": 0.0405, "step": 43850 }, { "epoch": 0.7765809082336065, "grad_norm": 0.9350868463516235, "learning_rate": 3.7582624081910403e-06, "loss": 0.0646, "step": 43851 }, { "epoch": 0.7765986177706349, "grad_norm": 0.6705045700073242, "learning_rate": 3.7576928120767973e-06, "loss": 0.0526, "step": 43852 }, { "epoch": 0.7766163273076634, "grad_norm": 0.3328647017478943, "learning_rate": 3.7571232529486084e-06, "loss": 0.0576, "step": 43853 }, { "epoch": 0.7766340368446918, "grad_norm": 0.3700324296951294, "learning_rate": 3.7565537308083454e-06, "loss": 0.0555, "step": 43854 }, { "epoch": 0.7766517463817202, "grad_norm": 0.5456594824790955, "learning_rate": 3.7559842456578884e-06, "loss": 0.0457, "step": 43855 }, { "epoch": 0.7766694559187487, "grad_norm": 0.2844657003879547, "learning_rate": 3.7554147974991017e-06, "loss": 0.0629, "step": 43856 }, { "epoch": 0.7766871654557771, "grad_norm": 0.5018675327301025, "learning_rate": 3.754845386333863e-06, "loss": 0.0312, "step": 43857 }, { "epoch": 0.7767048749928055, "grad_norm": 0.40675532817840576, "learning_rate": 3.754276012164044e-06, "loss": 0.0553, "step": 43858 }, { "epoch": 0.7767225845298339, "grad_norm": 0.49462568759918213, "learning_rate": 3.7537066749915207e-06, "loss": 0.0533, "step": 43859 }, { "epoch": 0.7767402940668624, "grad_norm": 0.5586246252059937, "learning_rate": 3.7531373748181656e-06, "loss": 0.0355, "step": 43860 }, { "epoch": 0.7767580036038908, "grad_norm": 0.7835450768470764, "learning_rate": 3.75256811164585e-06, "loss": 0.08, "step": 43861 }, { "epoch": 0.7767757131409192, "grad_norm": 0.48053890466690063, "learning_rate": 3.751998885476453e-06, "loss": 0.0523, "step": 43862 }, { "epoch": 0.7767934226779476, "grad_norm": 0.4429381787776947, "learning_rate": 3.7514296963118372e-06, "loss": 0.0498, "step": 43863 }, { "epoch": 0.7768111322149761, "grad_norm": 0.5234860181808472, "learning_rate": 3.750860544153881e-06, "loss": 0.0427, "step": 43864 }, { "epoch": 0.7768288417520045, "grad_norm": 0.637832522392273, "learning_rate": 3.750291429004459e-06, "loss": 0.0806, "step": 43865 }, { "epoch": 0.7768465512890329, "grad_norm": 0.5328119993209839, "learning_rate": 3.749722350865435e-06, "loss": 0.0613, "step": 43866 }, { "epoch": 0.7768642608260613, "grad_norm": 0.5574357509613037, "learning_rate": 3.7491533097386877e-06, "loss": 0.0649, "step": 43867 }, { "epoch": 0.7768819703630898, "grad_norm": 0.45615866780281067, "learning_rate": 3.7485843056260837e-06, "loss": 0.0626, "step": 43868 }, { "epoch": 0.7768996799001182, "grad_norm": 0.6594212651252747, "learning_rate": 3.7480153385295086e-06, "loss": 0.063, "step": 43869 }, { "epoch": 0.7769173894371466, "grad_norm": 0.9031832814216614, "learning_rate": 3.7474464084508203e-06, "loss": 0.0725, "step": 43870 }, { "epoch": 0.7769350989741751, "grad_norm": 0.671454906463623, "learning_rate": 3.7468775153918945e-06, "loss": 0.0817, "step": 43871 }, { "epoch": 0.7769528085112035, "grad_norm": 0.4463886022567749, "learning_rate": 3.746308659354604e-06, "loss": 0.0414, "step": 43872 }, { "epoch": 0.7769705180482319, "grad_norm": 0.42130061984062195, "learning_rate": 3.7457398403408233e-06, "loss": 0.0373, "step": 43873 }, { "epoch": 0.7769882275852603, "grad_norm": 0.6575959324836731, "learning_rate": 3.745171058352417e-06, "loss": 0.0541, "step": 43874 }, { "epoch": 0.7770059371222888, "grad_norm": 0.5141448378562927, "learning_rate": 3.7446023133912584e-06, "loss": 0.0753, "step": 43875 }, { "epoch": 0.7770236466593172, "grad_norm": 0.4168727695941925, "learning_rate": 3.744033605459221e-06, "loss": 0.036, "step": 43876 }, { "epoch": 0.7770413561963456, "grad_norm": 0.863787829875946, "learning_rate": 3.743464934558174e-06, "loss": 0.0668, "step": 43877 }, { "epoch": 0.777059065733374, "grad_norm": 0.6842277646064758, "learning_rate": 3.7428963006899887e-06, "loss": 0.0556, "step": 43878 }, { "epoch": 0.7770767752704025, "grad_norm": 0.46366849541664124, "learning_rate": 3.7423277038565366e-06, "loss": 0.0531, "step": 43879 }, { "epoch": 0.7770944848074309, "grad_norm": 0.8217988610267639, "learning_rate": 3.741759144059692e-06, "loss": 0.0346, "step": 43880 }, { "epoch": 0.7771121943444593, "grad_norm": 0.5737001299858093, "learning_rate": 3.7411906213013163e-06, "loss": 0.0788, "step": 43881 }, { "epoch": 0.7771299038814877, "grad_norm": 0.4465506076812744, "learning_rate": 3.740622135583285e-06, "loss": 0.0633, "step": 43882 }, { "epoch": 0.7771476134185162, "grad_norm": 0.3502316176891327, "learning_rate": 3.7400536869074725e-06, "loss": 0.0519, "step": 43883 }, { "epoch": 0.7771653229555446, "grad_norm": 0.6998034119606018, "learning_rate": 3.7394852752757397e-06, "loss": 0.0693, "step": 43884 }, { "epoch": 0.777183032492573, "grad_norm": 0.674741268157959, "learning_rate": 3.7389169006899615e-06, "loss": 0.079, "step": 43885 }, { "epoch": 0.7772007420296015, "grad_norm": 0.6607829332351685, "learning_rate": 3.738348563152008e-06, "loss": 0.0572, "step": 43886 }, { "epoch": 0.7772184515666299, "grad_norm": 0.8369026780128479, "learning_rate": 3.737780262663748e-06, "loss": 0.0489, "step": 43887 }, { "epoch": 0.7772361611036583, "grad_norm": 0.6697708368301392, "learning_rate": 3.737211999227053e-06, "loss": 0.0483, "step": 43888 }, { "epoch": 0.7772538706406867, "grad_norm": 0.6697085499763489, "learning_rate": 3.7366437728437904e-06, "loss": 0.0429, "step": 43889 }, { "epoch": 0.7772715801777152, "grad_norm": 1.0636570453643799, "learning_rate": 3.7360755835158343e-06, "loss": 0.116, "step": 43890 }, { "epoch": 0.7772892897147436, "grad_norm": 0.9054380059242249, "learning_rate": 3.7355074312450475e-06, "loss": 0.0824, "step": 43891 }, { "epoch": 0.777306999251772, "grad_norm": 1.1096237897872925, "learning_rate": 3.734939316033302e-06, "loss": 0.0734, "step": 43892 }, { "epoch": 0.7773247087888004, "grad_norm": 0.5350902676582336, "learning_rate": 3.734371237882465e-06, "loss": 0.0568, "step": 43893 }, { "epoch": 0.777342418325829, "grad_norm": 0.6144158244132996, "learning_rate": 3.733803196794413e-06, "loss": 0.0674, "step": 43894 }, { "epoch": 0.7773601278628574, "grad_norm": 0.3494596779346466, "learning_rate": 3.733235192771004e-06, "loss": 0.0521, "step": 43895 }, { "epoch": 0.7773778373998858, "grad_norm": 0.6386014819145203, "learning_rate": 3.7326672258141116e-06, "loss": 0.0757, "step": 43896 }, { "epoch": 0.7773955469369142, "grad_norm": 0.5679761171340942, "learning_rate": 3.732099295925604e-06, "loss": 0.0663, "step": 43897 }, { "epoch": 0.7774132564739427, "grad_norm": 0.5575183033943176, "learning_rate": 3.7315314031073515e-06, "loss": 0.0774, "step": 43898 }, { "epoch": 0.7774309660109711, "grad_norm": 0.7258961200714111, "learning_rate": 3.7309635473612197e-06, "loss": 0.0547, "step": 43899 }, { "epoch": 0.7774486755479995, "grad_norm": 0.4023652970790863, "learning_rate": 3.7303957286890783e-06, "loss": 0.0525, "step": 43900 }, { "epoch": 0.777466385085028, "grad_norm": 0.35880789160728455, "learning_rate": 3.7298279470927993e-06, "loss": 0.0368, "step": 43901 }, { "epoch": 0.7774840946220564, "grad_norm": 0.6329395771026611, "learning_rate": 3.729260202574243e-06, "loss": 0.06, "step": 43902 }, { "epoch": 0.7775018041590848, "grad_norm": 0.5555031895637512, "learning_rate": 3.728692495135279e-06, "loss": 0.0784, "step": 43903 }, { "epoch": 0.7775195136961132, "grad_norm": 0.37914004921913147, "learning_rate": 3.7281248247777785e-06, "loss": 0.0553, "step": 43904 }, { "epoch": 0.7775372232331417, "grad_norm": 0.730765163898468, "learning_rate": 3.7275571915036055e-06, "loss": 0.0655, "step": 43905 }, { "epoch": 0.7775549327701701, "grad_norm": 0.687515914440155, "learning_rate": 3.726989595314631e-06, "loss": 0.0544, "step": 43906 }, { "epoch": 0.7775726423071985, "grad_norm": 0.5905450582504272, "learning_rate": 3.72642203621272e-06, "loss": 0.0622, "step": 43907 }, { "epoch": 0.7775903518442269, "grad_norm": 0.48527759313583374, "learning_rate": 3.725854514199744e-06, "loss": 0.0659, "step": 43908 }, { "epoch": 0.7776080613812554, "grad_norm": 0.6117491126060486, "learning_rate": 3.725287029277562e-06, "loss": 0.0474, "step": 43909 }, { "epoch": 0.7776257709182838, "grad_norm": 0.488359659910202, "learning_rate": 3.7247195814480456e-06, "loss": 0.0456, "step": 43910 }, { "epoch": 0.7776434804553122, "grad_norm": 0.7903801798820496, "learning_rate": 3.7241521707130618e-06, "loss": 0.0766, "step": 43911 }, { "epoch": 0.7776611899923406, "grad_norm": 0.8033424019813538, "learning_rate": 3.72358479707448e-06, "loss": 0.0892, "step": 43912 }, { "epoch": 0.7776788995293691, "grad_norm": 0.9531519412994385, "learning_rate": 3.7230174605341597e-06, "loss": 0.0887, "step": 43913 }, { "epoch": 0.7776966090663975, "grad_norm": 0.4030066728591919, "learning_rate": 3.7224501610939727e-06, "loss": 0.0612, "step": 43914 }, { "epoch": 0.7777143186034259, "grad_norm": 0.9826456308364868, "learning_rate": 3.721882898755782e-06, "loss": 0.0565, "step": 43915 }, { "epoch": 0.7777320281404544, "grad_norm": 0.37919381260871887, "learning_rate": 3.7213156735214575e-06, "loss": 0.0611, "step": 43916 }, { "epoch": 0.7777497376774828, "grad_norm": 0.8486411571502686, "learning_rate": 3.7207484853928634e-06, "loss": 0.0618, "step": 43917 }, { "epoch": 0.7777674472145112, "grad_norm": 0.4738973081111908, "learning_rate": 3.720181334371865e-06, "loss": 0.058, "step": 43918 }, { "epoch": 0.7777851567515396, "grad_norm": 0.49914437532424927, "learning_rate": 3.7196142204603335e-06, "loss": 0.0652, "step": 43919 }, { "epoch": 0.7778028662885681, "grad_norm": 0.5863306522369385, "learning_rate": 3.7190471436601274e-06, "loss": 0.0719, "step": 43920 }, { "epoch": 0.7778205758255965, "grad_norm": 0.7170745730400085, "learning_rate": 3.718480103973114e-06, "loss": 0.0689, "step": 43921 }, { "epoch": 0.7778382853626249, "grad_norm": 0.6070532202720642, "learning_rate": 3.7179131014011654e-06, "loss": 0.0774, "step": 43922 }, { "epoch": 0.7778559948996533, "grad_norm": 0.7536942958831787, "learning_rate": 3.717346135946136e-06, "loss": 0.0772, "step": 43923 }, { "epoch": 0.7778737044366818, "grad_norm": 0.5619180798530579, "learning_rate": 3.716779207609896e-06, "loss": 0.0613, "step": 43924 }, { "epoch": 0.7778914139737102, "grad_norm": 0.43993079662323, "learning_rate": 3.7162123163943128e-06, "loss": 0.0421, "step": 43925 }, { "epoch": 0.7779091235107386, "grad_norm": 0.47154220938682556, "learning_rate": 3.71564546230125e-06, "loss": 0.057, "step": 43926 }, { "epoch": 0.777926833047767, "grad_norm": 0.3895629942417145, "learning_rate": 3.715078645332572e-06, "loss": 0.0517, "step": 43927 }, { "epoch": 0.7779445425847955, "grad_norm": 0.6012586951255798, "learning_rate": 3.7145118654901427e-06, "loss": 0.0558, "step": 43928 }, { "epoch": 0.7779622521218239, "grad_norm": 0.5549765229225159, "learning_rate": 3.7139451227758323e-06, "loss": 0.0453, "step": 43929 }, { "epoch": 0.7779799616588523, "grad_norm": 0.6330996155738831, "learning_rate": 3.713378417191498e-06, "loss": 0.0569, "step": 43930 }, { "epoch": 0.7779976711958808, "grad_norm": 0.4031796455383301, "learning_rate": 3.7128117487390063e-06, "loss": 0.0469, "step": 43931 }, { "epoch": 0.7780153807329092, "grad_norm": 0.2826986610889435, "learning_rate": 3.7122451174202222e-06, "loss": 0.0564, "step": 43932 }, { "epoch": 0.7780330902699376, "grad_norm": 0.6973767876625061, "learning_rate": 3.7116785232370143e-06, "loss": 0.1149, "step": 43933 }, { "epoch": 0.778050799806966, "grad_norm": 0.516578197479248, "learning_rate": 3.7111119661912345e-06, "loss": 0.0449, "step": 43934 }, { "epoch": 0.7780685093439945, "grad_norm": 0.6046227216720581, "learning_rate": 3.7105454462847583e-06, "loss": 0.0659, "step": 43935 }, { "epoch": 0.7780862188810229, "grad_norm": 0.33914172649383545, "learning_rate": 3.7099789635194485e-06, "loss": 0.0254, "step": 43936 }, { "epoch": 0.7781039284180513, "grad_norm": 0.6281707286834717, "learning_rate": 3.7094125178971646e-06, "loss": 0.0599, "step": 43937 }, { "epoch": 0.7781216379550797, "grad_norm": 0.6330354809761047, "learning_rate": 3.708846109419769e-06, "loss": 0.0464, "step": 43938 }, { "epoch": 0.7781393474921082, "grad_norm": 0.5876316428184509, "learning_rate": 3.708279738089129e-06, "loss": 0.0462, "step": 43939 }, { "epoch": 0.7781570570291366, "grad_norm": 0.8045612573623657, "learning_rate": 3.70771340390711e-06, "loss": 0.0803, "step": 43940 }, { "epoch": 0.778174766566165, "grad_norm": 0.7345245480537415, "learning_rate": 3.707147106875569e-06, "loss": 0.0426, "step": 43941 }, { "epoch": 0.7781924761031934, "grad_norm": 0.6671751141548157, "learning_rate": 3.7065808469963708e-06, "loss": 0.0642, "step": 43942 }, { "epoch": 0.7782101856402219, "grad_norm": 0.32869312167167664, "learning_rate": 3.7060146242713786e-06, "loss": 0.0319, "step": 43943 }, { "epoch": 0.7782278951772503, "grad_norm": 0.5087003707885742, "learning_rate": 3.7054484387024583e-06, "loss": 0.0604, "step": 43944 }, { "epoch": 0.7782456047142787, "grad_norm": 0.6405556201934814, "learning_rate": 3.704882290291468e-06, "loss": 0.0467, "step": 43945 }, { "epoch": 0.7782633142513072, "grad_norm": 0.7475661039352417, "learning_rate": 3.704316179040273e-06, "loss": 0.064, "step": 43946 }, { "epoch": 0.7782810237883356, "grad_norm": 0.6400718092918396, "learning_rate": 3.70375010495074e-06, "loss": 0.0423, "step": 43947 }, { "epoch": 0.778298733325364, "grad_norm": 0.9489911794662476, "learning_rate": 3.7031840680247225e-06, "loss": 0.0472, "step": 43948 }, { "epoch": 0.7783164428623924, "grad_norm": 0.5386196970939636, "learning_rate": 3.702618068264086e-06, "loss": 0.0363, "step": 43949 }, { "epoch": 0.7783341523994209, "grad_norm": 0.5839747786521912, "learning_rate": 3.702052105670695e-06, "loss": 0.0578, "step": 43950 }, { "epoch": 0.7783518619364493, "grad_norm": 0.7153931260108948, "learning_rate": 3.7014861802464124e-06, "loss": 0.0833, "step": 43951 }, { "epoch": 0.7783695714734777, "grad_norm": 0.7177836894989014, "learning_rate": 3.700920291993094e-06, "loss": 0.0416, "step": 43952 }, { "epoch": 0.7783872810105061, "grad_norm": 0.8316789269447327, "learning_rate": 3.7003544409126057e-06, "loss": 0.09, "step": 43953 }, { "epoch": 0.7784049905475346, "grad_norm": 0.7121909856796265, "learning_rate": 3.6997886270068072e-06, "loss": 0.0444, "step": 43954 }, { "epoch": 0.778422700084563, "grad_norm": 0.6486359238624573, "learning_rate": 3.6992228502775606e-06, "loss": 0.0563, "step": 43955 }, { "epoch": 0.7784404096215914, "grad_norm": 0.6710372567176819, "learning_rate": 3.698657110726729e-06, "loss": 0.0522, "step": 43956 }, { "epoch": 0.7784581191586198, "grad_norm": 0.566567599773407, "learning_rate": 3.698091408356172e-06, "loss": 0.058, "step": 43957 }, { "epoch": 0.7784758286956484, "grad_norm": 0.736362874507904, "learning_rate": 3.6975257431677566e-06, "loss": 0.0638, "step": 43958 }, { "epoch": 0.7784935382326768, "grad_norm": 0.7934407591819763, "learning_rate": 3.6969601151633327e-06, "loss": 0.088, "step": 43959 }, { "epoch": 0.7785112477697052, "grad_norm": 0.7901012301445007, "learning_rate": 3.696394524344766e-06, "loss": 0.0763, "step": 43960 }, { "epoch": 0.7785289573067337, "grad_norm": 1.0065194368362427, "learning_rate": 3.6958289707139244e-06, "loss": 0.0703, "step": 43961 }, { "epoch": 0.7785466668437621, "grad_norm": 0.5729901790618896, "learning_rate": 3.695263454272657e-06, "loss": 0.0564, "step": 43962 }, { "epoch": 0.7785643763807905, "grad_norm": 0.6322188377380371, "learning_rate": 3.694697975022825e-06, "loss": 0.0389, "step": 43963 }, { "epoch": 0.7785820859178189, "grad_norm": 0.6871597170829773, "learning_rate": 3.694132532966298e-06, "loss": 0.0617, "step": 43964 }, { "epoch": 0.7785997954548474, "grad_norm": 0.327596515417099, "learning_rate": 3.6935671281049345e-06, "loss": 0.0433, "step": 43965 }, { "epoch": 0.7786175049918758, "grad_norm": 0.5180965662002563, "learning_rate": 3.693001760440588e-06, "loss": 0.0729, "step": 43966 }, { "epoch": 0.7786352145289042, "grad_norm": 0.9876086711883545, "learning_rate": 3.6924364299751224e-06, "loss": 0.0722, "step": 43967 }, { "epoch": 0.7786529240659326, "grad_norm": 0.5876011848449707, "learning_rate": 3.6918711367104007e-06, "loss": 0.0614, "step": 43968 }, { "epoch": 0.7786706336029611, "grad_norm": 0.7517271041870117, "learning_rate": 3.6913058806482753e-06, "loss": 0.0856, "step": 43969 }, { "epoch": 0.7786883431399895, "grad_norm": 0.5887768864631653, "learning_rate": 3.690740661790611e-06, "loss": 0.0448, "step": 43970 }, { "epoch": 0.7787060526770179, "grad_norm": 0.5299298763275146, "learning_rate": 3.6901754801392643e-06, "loss": 0.0632, "step": 43971 }, { "epoch": 0.7787237622140463, "grad_norm": 0.45501622557640076, "learning_rate": 3.689610335696098e-06, "loss": 0.0321, "step": 43972 }, { "epoch": 0.7787414717510748, "grad_norm": 0.5077155828475952, "learning_rate": 3.6890452284629682e-06, "loss": 0.0407, "step": 43973 }, { "epoch": 0.7787591812881032, "grad_norm": 0.46083080768585205, "learning_rate": 3.6884801584417355e-06, "loss": 0.0346, "step": 43974 }, { "epoch": 0.7787768908251316, "grad_norm": 0.5214064717292786, "learning_rate": 3.6879151256342595e-06, "loss": 0.0498, "step": 43975 }, { "epoch": 0.7787946003621601, "grad_norm": 1.2944014072418213, "learning_rate": 3.6873501300424027e-06, "loss": 0.0652, "step": 43976 }, { "epoch": 0.7788123098991885, "grad_norm": 0.7499679923057556, "learning_rate": 3.686785171668017e-06, "loss": 0.058, "step": 43977 }, { "epoch": 0.7788300194362169, "grad_norm": 0.9929818511009216, "learning_rate": 3.686220250512962e-06, "loss": 0.0473, "step": 43978 }, { "epoch": 0.7788477289732453, "grad_norm": 0.6180190443992615, "learning_rate": 3.6856553665791027e-06, "loss": 0.0678, "step": 43979 }, { "epoch": 0.7788654385102738, "grad_norm": 0.569130539894104, "learning_rate": 3.6850905198682898e-06, "loss": 0.0653, "step": 43980 }, { "epoch": 0.7788831480473022, "grad_norm": 0.600300133228302, "learning_rate": 3.6845257103823836e-06, "loss": 0.0291, "step": 43981 }, { "epoch": 0.7789008575843306, "grad_norm": 0.5450900197029114, "learning_rate": 3.6839609381232426e-06, "loss": 0.0376, "step": 43982 }, { "epoch": 0.778918567121359, "grad_norm": 0.33767032623291016, "learning_rate": 3.6833962030927264e-06, "loss": 0.042, "step": 43983 }, { "epoch": 0.7789362766583875, "grad_norm": 0.5869749784469604, "learning_rate": 3.6828315052926926e-06, "loss": 0.0418, "step": 43984 }, { "epoch": 0.7789539861954159, "grad_norm": 0.504323422908783, "learning_rate": 3.6822668447249974e-06, "loss": 0.0564, "step": 43985 }, { "epoch": 0.7789716957324443, "grad_norm": 0.5458727478981018, "learning_rate": 3.6817022213915047e-06, "loss": 0.0555, "step": 43986 }, { "epoch": 0.7789894052694727, "grad_norm": 1.0710086822509766, "learning_rate": 3.6811376352940628e-06, "loss": 0.0591, "step": 43987 }, { "epoch": 0.7790071148065012, "grad_norm": 0.4345400035381317, "learning_rate": 3.680573086434532e-06, "loss": 0.0491, "step": 43988 }, { "epoch": 0.7790248243435296, "grad_norm": 0.4439658224582672, "learning_rate": 3.680008574814771e-06, "loss": 0.0529, "step": 43989 }, { "epoch": 0.779042533880558, "grad_norm": 0.6071411371231079, "learning_rate": 3.679444100436643e-06, "loss": 0.0755, "step": 43990 }, { "epoch": 0.7790602434175865, "grad_norm": 0.6438089609146118, "learning_rate": 3.678879663301993e-06, "loss": 0.0716, "step": 43991 }, { "epoch": 0.7790779529546149, "grad_norm": 0.45456817746162415, "learning_rate": 3.6783152634126804e-06, "loss": 0.0369, "step": 43992 }, { "epoch": 0.7790956624916433, "grad_norm": 0.2991253435611725, "learning_rate": 3.677750900770573e-06, "loss": 0.0441, "step": 43993 }, { "epoch": 0.7791133720286717, "grad_norm": 1.088750958442688, "learning_rate": 3.6771865753775156e-06, "loss": 0.0597, "step": 43994 }, { "epoch": 0.7791310815657002, "grad_norm": 0.6580214500427246, "learning_rate": 3.676622287235371e-06, "loss": 0.0568, "step": 43995 }, { "epoch": 0.7791487911027286, "grad_norm": 0.22502127289772034, "learning_rate": 3.6760580363459923e-06, "loss": 0.033, "step": 43996 }, { "epoch": 0.779166500639757, "grad_norm": 0.8328515887260437, "learning_rate": 3.6754938227112413e-06, "loss": 0.0545, "step": 43997 }, { "epoch": 0.7791842101767854, "grad_norm": 0.5034537315368652, "learning_rate": 3.674929646332966e-06, "loss": 0.0583, "step": 43998 }, { "epoch": 0.7792019197138139, "grad_norm": 0.6485015153884888, "learning_rate": 3.6743655072130278e-06, "loss": 0.0582, "step": 43999 }, { "epoch": 0.7792196292508423, "grad_norm": 1.0344449281692505, "learning_rate": 3.6738014053532807e-06, "loss": 0.047, "step": 44000 }, { "epoch": 0.7792373387878707, "grad_norm": 0.6034194231033325, "learning_rate": 3.6732373407555824e-06, "loss": 0.0643, "step": 44001 }, { "epoch": 0.7792550483248991, "grad_norm": 0.6447237730026245, "learning_rate": 3.672673313421786e-06, "loss": 0.095, "step": 44002 }, { "epoch": 0.7792727578619276, "grad_norm": 0.4237510561943054, "learning_rate": 3.67210932335375e-06, "loss": 0.0626, "step": 44003 }, { "epoch": 0.779290467398956, "grad_norm": 0.8915045857429504, "learning_rate": 3.6715453705533314e-06, "loss": 0.0654, "step": 44004 }, { "epoch": 0.7793081769359844, "grad_norm": 0.44135206937789917, "learning_rate": 3.67098145502238e-06, "loss": 0.0472, "step": 44005 }, { "epoch": 0.7793258864730129, "grad_norm": 0.1562996655702591, "learning_rate": 3.6704175767627534e-06, "loss": 0.0468, "step": 44006 }, { "epoch": 0.7793435960100413, "grad_norm": 0.7895073294639587, "learning_rate": 3.6698537357763063e-06, "loss": 0.0646, "step": 44007 }, { "epoch": 0.7793613055470697, "grad_norm": 0.541762113571167, "learning_rate": 3.6692899320649006e-06, "loss": 0.0623, "step": 44008 }, { "epoch": 0.7793790150840981, "grad_norm": 0.2594572901725769, "learning_rate": 3.6687261656303794e-06, "loss": 0.0411, "step": 44009 }, { "epoch": 0.7793967246211266, "grad_norm": 0.5405929684638977, "learning_rate": 3.668162436474602e-06, "loss": 0.0507, "step": 44010 }, { "epoch": 0.779414434158155, "grad_norm": 0.5550317764282227, "learning_rate": 3.667598744599425e-06, "loss": 0.0694, "step": 44011 }, { "epoch": 0.7794321436951834, "grad_norm": 0.5857516527175903, "learning_rate": 3.667035090006703e-06, "loss": 0.0606, "step": 44012 }, { "epoch": 0.7794498532322118, "grad_norm": 0.5415852069854736, "learning_rate": 3.6664714726982866e-06, "loss": 0.0448, "step": 44013 }, { "epoch": 0.7794675627692403, "grad_norm": 0.7678811550140381, "learning_rate": 3.665907892676035e-06, "loss": 0.0468, "step": 44014 }, { "epoch": 0.7794852723062687, "grad_norm": 1.13205885887146, "learning_rate": 3.6653443499418038e-06, "loss": 0.0702, "step": 44015 }, { "epoch": 0.7795029818432971, "grad_norm": 0.9378034472465515, "learning_rate": 3.664780844497439e-06, "loss": 0.0794, "step": 44016 }, { "epoch": 0.7795206913803255, "grad_norm": 0.6282646656036377, "learning_rate": 3.664217376344799e-06, "loss": 0.0538, "step": 44017 }, { "epoch": 0.779538400917354, "grad_norm": 0.9136674404144287, "learning_rate": 3.6636539454857402e-06, "loss": 0.0851, "step": 44018 }, { "epoch": 0.7795561104543824, "grad_norm": 0.49353575706481934, "learning_rate": 3.6630905519221104e-06, "loss": 0.0572, "step": 44019 }, { "epoch": 0.7795738199914108, "grad_norm": 0.5855231881141663, "learning_rate": 3.6625271956557654e-06, "loss": 0.0669, "step": 44020 }, { "epoch": 0.7795915295284394, "grad_norm": 0.36652398109436035, "learning_rate": 3.661963876688556e-06, "loss": 0.049, "step": 44021 }, { "epoch": 0.7796092390654678, "grad_norm": 0.4561309218406677, "learning_rate": 3.6614005950223455e-06, "loss": 0.0883, "step": 44022 }, { "epoch": 0.7796269486024962, "grad_norm": 0.6692256331443787, "learning_rate": 3.6608373506589776e-06, "loss": 0.0726, "step": 44023 }, { "epoch": 0.7796446581395245, "grad_norm": 0.9730741381645203, "learning_rate": 3.6602741436003073e-06, "loss": 0.0563, "step": 44024 }, { "epoch": 0.7796623676765531, "grad_norm": 0.8409360647201538, "learning_rate": 3.659710973848193e-06, "loss": 0.0631, "step": 44025 }, { "epoch": 0.7796800772135815, "grad_norm": 0.5905476808547974, "learning_rate": 3.659147841404477e-06, "loss": 0.0821, "step": 44026 }, { "epoch": 0.7796977867506099, "grad_norm": 0.5183815360069275, "learning_rate": 3.6585847462710174e-06, "loss": 0.0928, "step": 44027 }, { "epoch": 0.7797154962876383, "grad_norm": 0.7069871425628662, "learning_rate": 3.658021688449668e-06, "loss": 0.0441, "step": 44028 }, { "epoch": 0.7797332058246668, "grad_norm": 0.4022611379623413, "learning_rate": 3.6574586679422808e-06, "loss": 0.0595, "step": 44029 }, { "epoch": 0.7797509153616952, "grad_norm": 0.681254506111145, "learning_rate": 3.656895684750705e-06, "loss": 0.0468, "step": 44030 }, { "epoch": 0.7797686248987236, "grad_norm": 0.34322696924209595, "learning_rate": 3.656332738876797e-06, "loss": 0.0419, "step": 44031 }, { "epoch": 0.779786334435752, "grad_norm": 0.6595954298973083, "learning_rate": 3.6557698303224115e-06, "loss": 0.0474, "step": 44032 }, { "epoch": 0.7798040439727805, "grad_norm": 0.6836134791374207, "learning_rate": 3.65520695908939e-06, "loss": 0.0713, "step": 44033 }, { "epoch": 0.7798217535098089, "grad_norm": 0.5278300642967224, "learning_rate": 3.6546441251795896e-06, "loss": 0.0483, "step": 44034 }, { "epoch": 0.7798394630468373, "grad_norm": 0.345225989818573, "learning_rate": 3.6540813285948644e-06, "loss": 0.0485, "step": 44035 }, { "epoch": 0.7798571725838658, "grad_norm": 0.6727932095527649, "learning_rate": 3.653518569337069e-06, "loss": 0.0674, "step": 44036 }, { "epoch": 0.7798748821208942, "grad_norm": 0.7436513900756836, "learning_rate": 3.6529558474080433e-06, "loss": 0.0721, "step": 44037 }, { "epoch": 0.7798925916579226, "grad_norm": 0.6368475556373596, "learning_rate": 3.652393162809646e-06, "loss": 0.0576, "step": 44038 }, { "epoch": 0.779910301194951, "grad_norm": 0.6369531750679016, "learning_rate": 3.651830515543728e-06, "loss": 0.0628, "step": 44039 }, { "epoch": 0.7799280107319795, "grad_norm": 0.8225224614143372, "learning_rate": 3.651267905612139e-06, "loss": 0.0771, "step": 44040 }, { "epoch": 0.7799457202690079, "grad_norm": 0.7399037480354309, "learning_rate": 3.6507053330167304e-06, "loss": 0.0442, "step": 44041 }, { "epoch": 0.7799634298060363, "grad_norm": 0.4593181312084198, "learning_rate": 3.650142797759355e-06, "loss": 0.0514, "step": 44042 }, { "epoch": 0.7799811393430647, "grad_norm": 0.677177906036377, "learning_rate": 3.649580299841865e-06, "loss": 0.0816, "step": 44043 }, { "epoch": 0.7799988488800932, "grad_norm": 0.641289472579956, "learning_rate": 3.649017839266104e-06, "loss": 0.0421, "step": 44044 }, { "epoch": 0.7800165584171216, "grad_norm": 0.7247858643531799, "learning_rate": 3.6484554160339257e-06, "loss": 0.0405, "step": 44045 }, { "epoch": 0.78003426795415, "grad_norm": 0.36581405997276306, "learning_rate": 3.6478930301471806e-06, "loss": 0.0346, "step": 44046 }, { "epoch": 0.7800519774911784, "grad_norm": 1.0043151378631592, "learning_rate": 3.6473306816077246e-06, "loss": 0.0853, "step": 44047 }, { "epoch": 0.7800696870282069, "grad_norm": 0.7355835437774658, "learning_rate": 3.646768370417397e-06, "loss": 0.0606, "step": 44048 }, { "epoch": 0.7800873965652353, "grad_norm": 0.5178556442260742, "learning_rate": 3.646206096578055e-06, "loss": 0.0649, "step": 44049 }, { "epoch": 0.7801051061022637, "grad_norm": 0.5071580410003662, "learning_rate": 3.645643860091544e-06, "loss": 0.0443, "step": 44050 }, { "epoch": 0.7801228156392922, "grad_norm": 0.832938015460968, "learning_rate": 3.6450816609597176e-06, "loss": 0.0773, "step": 44051 }, { "epoch": 0.7801405251763206, "grad_norm": 0.5430929660797119, "learning_rate": 3.6445194991844254e-06, "loss": 0.0624, "step": 44052 }, { "epoch": 0.780158234713349, "grad_norm": 0.5491836667060852, "learning_rate": 3.643957374767513e-06, "loss": 0.0682, "step": 44053 }, { "epoch": 0.7801759442503774, "grad_norm": 0.49463531374931335, "learning_rate": 3.643395287710839e-06, "loss": 0.0731, "step": 44054 }, { "epoch": 0.7801936537874059, "grad_norm": 0.7238960266113281, "learning_rate": 3.64283323801624e-06, "loss": 0.0624, "step": 44055 }, { "epoch": 0.7802113633244343, "grad_norm": 0.46763038635253906, "learning_rate": 3.642271225685571e-06, "loss": 0.0355, "step": 44056 }, { "epoch": 0.7802290728614627, "grad_norm": 0.7495705485343933, "learning_rate": 3.6417092507206845e-06, "loss": 0.0775, "step": 44057 }, { "epoch": 0.7802467823984911, "grad_norm": 0.5817442536354065, "learning_rate": 3.6411473131234184e-06, "loss": 0.0586, "step": 44058 }, { "epoch": 0.7802644919355196, "grad_norm": 0.8050026297569275, "learning_rate": 3.6405854128956335e-06, "loss": 0.0671, "step": 44059 }, { "epoch": 0.780282201472548, "grad_norm": 0.49972105026245117, "learning_rate": 3.6400235500391727e-06, "loss": 0.0488, "step": 44060 }, { "epoch": 0.7802999110095764, "grad_norm": 0.7284177541732788, "learning_rate": 3.6394617245558896e-06, "loss": 0.0585, "step": 44061 }, { "epoch": 0.7803176205466048, "grad_norm": 0.7478751540184021, "learning_rate": 3.638899936447625e-06, "loss": 0.0572, "step": 44062 }, { "epoch": 0.7803353300836333, "grad_norm": 0.6745175719261169, "learning_rate": 3.6383381857162285e-06, "loss": 0.0758, "step": 44063 }, { "epoch": 0.7803530396206617, "grad_norm": 0.9000469446182251, "learning_rate": 3.637776472363557e-06, "loss": 0.0698, "step": 44064 }, { "epoch": 0.7803707491576901, "grad_norm": 0.7099314332008362, "learning_rate": 3.637214796391446e-06, "loss": 0.0629, "step": 44065 }, { "epoch": 0.7803884586947186, "grad_norm": 0.5422670841217041, "learning_rate": 3.6366531578017485e-06, "loss": 0.0586, "step": 44066 }, { "epoch": 0.780406168231747, "grad_norm": 0.5671424269676208, "learning_rate": 3.636091556596314e-06, "loss": 0.071, "step": 44067 }, { "epoch": 0.7804238777687754, "grad_norm": 0.9658122062683105, "learning_rate": 3.635529992776988e-06, "loss": 0.0557, "step": 44068 }, { "epoch": 0.7804415873058038, "grad_norm": 0.6539821624755859, "learning_rate": 3.63496846634562e-06, "loss": 0.0613, "step": 44069 }, { "epoch": 0.7804592968428323, "grad_norm": 0.987302839756012, "learning_rate": 3.634406977304055e-06, "loss": 0.0475, "step": 44070 }, { "epoch": 0.7804770063798607, "grad_norm": 0.6697894334793091, "learning_rate": 3.6338455256541463e-06, "loss": 0.0455, "step": 44071 }, { "epoch": 0.7804947159168891, "grad_norm": 0.7145351767539978, "learning_rate": 3.633284111397733e-06, "loss": 0.0562, "step": 44072 }, { "epoch": 0.7805124254539175, "grad_norm": 0.5356718301773071, "learning_rate": 3.6327227345366645e-06, "loss": 0.0409, "step": 44073 }, { "epoch": 0.780530134990946, "grad_norm": 0.7097060084342957, "learning_rate": 3.6321613950727876e-06, "loss": 0.0702, "step": 44074 }, { "epoch": 0.7805478445279744, "grad_norm": 0.8867989182472229, "learning_rate": 3.6316000930079552e-06, "loss": 0.0591, "step": 44075 }, { "epoch": 0.7805655540650028, "grad_norm": 0.6274194717407227, "learning_rate": 3.6310388283440032e-06, "loss": 0.0711, "step": 44076 }, { "epoch": 0.7805832636020312, "grad_norm": 0.31745192408561707, "learning_rate": 3.630477601082786e-06, "loss": 0.0428, "step": 44077 }, { "epoch": 0.7806009731390597, "grad_norm": 0.6803209781646729, "learning_rate": 3.629916411226144e-06, "loss": 0.0748, "step": 44078 }, { "epoch": 0.7806186826760881, "grad_norm": 1.021857738494873, "learning_rate": 3.6293552587759305e-06, "loss": 0.0734, "step": 44079 }, { "epoch": 0.7806363922131165, "grad_norm": 0.808372974395752, "learning_rate": 3.628794143733985e-06, "loss": 0.0537, "step": 44080 }, { "epoch": 0.780654101750145, "grad_norm": 0.510046660900116, "learning_rate": 3.6282330661021595e-06, "loss": 0.0438, "step": 44081 }, { "epoch": 0.7806718112871734, "grad_norm": 0.7203010320663452, "learning_rate": 3.6276720258822992e-06, "loss": 0.0858, "step": 44082 }, { "epoch": 0.7806895208242018, "grad_norm": 0.8032435774803162, "learning_rate": 3.6271110230762442e-06, "loss": 0.0591, "step": 44083 }, { "epoch": 0.7807072303612302, "grad_norm": 0.9863383173942566, "learning_rate": 3.6265500576858436e-06, "loss": 0.0487, "step": 44084 }, { "epoch": 0.7807249398982588, "grad_norm": 0.4888487458229065, "learning_rate": 3.6259891297129433e-06, "loss": 0.0742, "step": 44085 }, { "epoch": 0.7807426494352872, "grad_norm": 0.5649759769439697, "learning_rate": 3.6254282391593922e-06, "loss": 0.0608, "step": 44086 }, { "epoch": 0.7807603589723155, "grad_norm": 0.5167132616043091, "learning_rate": 3.6248673860270228e-06, "loss": 0.0657, "step": 44087 }, { "epoch": 0.780778068509344, "grad_norm": 0.3156358003616333, "learning_rate": 3.624306570317694e-06, "loss": 0.04, "step": 44088 }, { "epoch": 0.7807957780463725, "grad_norm": 0.5380202531814575, "learning_rate": 3.6237457920332505e-06, "loss": 0.082, "step": 44089 }, { "epoch": 0.7808134875834009, "grad_norm": 0.670577883720398, "learning_rate": 3.623185051175529e-06, "loss": 0.0368, "step": 44090 }, { "epoch": 0.7808311971204293, "grad_norm": 0.4746634364128113, "learning_rate": 3.6226243477463755e-06, "loss": 0.0489, "step": 44091 }, { "epoch": 0.7808489066574577, "grad_norm": 0.6957395672798157, "learning_rate": 3.6220636817476394e-06, "loss": 0.0669, "step": 44092 }, { "epoch": 0.7808666161944862, "grad_norm": 0.5103758573532104, "learning_rate": 3.621503053181167e-06, "loss": 0.062, "step": 44093 }, { "epoch": 0.7808843257315146, "grad_norm": 0.5761691927909851, "learning_rate": 3.620942462048794e-06, "loss": 0.0905, "step": 44094 }, { "epoch": 0.780902035268543, "grad_norm": 0.7327122092247009, "learning_rate": 3.6203819083523688e-06, "loss": 0.0823, "step": 44095 }, { "epoch": 0.7809197448055715, "grad_norm": 0.6073439121246338, "learning_rate": 3.619821392093738e-06, "loss": 0.0587, "step": 44096 }, { "epoch": 0.7809374543425999, "grad_norm": 0.4892400801181793, "learning_rate": 3.6192609132747417e-06, "loss": 0.0364, "step": 44097 }, { "epoch": 0.7809551638796283, "grad_norm": 0.5431462526321411, "learning_rate": 3.618700471897227e-06, "loss": 0.0507, "step": 44098 }, { "epoch": 0.7809728734166567, "grad_norm": 0.44711899757385254, "learning_rate": 3.618140067963036e-06, "loss": 0.0377, "step": 44099 }, { "epoch": 0.7809905829536852, "grad_norm": 0.7857270836830139, "learning_rate": 3.617579701474017e-06, "loss": 0.0735, "step": 44100 }, { "epoch": 0.7810082924907136, "grad_norm": 0.6353843808174133, "learning_rate": 3.6170193724320065e-06, "loss": 0.0382, "step": 44101 }, { "epoch": 0.781026002027742, "grad_norm": 0.8323066234588623, "learning_rate": 3.6164590808388504e-06, "loss": 0.0431, "step": 44102 }, { "epoch": 0.7810437115647704, "grad_norm": 0.30054470896720886, "learning_rate": 3.6158988266963968e-06, "loss": 0.0673, "step": 44103 }, { "epoch": 0.7810614211017989, "grad_norm": 0.23126426339149475, "learning_rate": 3.6153386100064807e-06, "loss": 0.0314, "step": 44104 }, { "epoch": 0.7810791306388273, "grad_norm": 0.5275160074234009, "learning_rate": 3.6147784307709474e-06, "loss": 0.0429, "step": 44105 }, { "epoch": 0.7810968401758557, "grad_norm": 0.3763465881347656, "learning_rate": 3.6142182889916426e-06, "loss": 0.0282, "step": 44106 }, { "epoch": 0.7811145497128841, "grad_norm": 0.4768733084201813, "learning_rate": 3.6136581846704094e-06, "loss": 0.0766, "step": 44107 }, { "epoch": 0.7811322592499126, "grad_norm": 0.44520896673202515, "learning_rate": 3.6130981178090876e-06, "loss": 0.0341, "step": 44108 }, { "epoch": 0.781149968786941, "grad_norm": 0.7977946996688843, "learning_rate": 3.6125380884095216e-06, "loss": 0.0614, "step": 44109 }, { "epoch": 0.7811676783239694, "grad_norm": 0.7718156576156616, "learning_rate": 3.6119780964735575e-06, "loss": 0.0667, "step": 44110 }, { "epoch": 0.7811853878609979, "grad_norm": 0.5684903264045715, "learning_rate": 3.6114181420030296e-06, "loss": 0.0504, "step": 44111 }, { "epoch": 0.7812030973980263, "grad_norm": 0.3543489873409271, "learning_rate": 3.610858224999784e-06, "loss": 0.0605, "step": 44112 }, { "epoch": 0.7812208069350547, "grad_norm": 0.7668073177337646, "learning_rate": 3.610298345465663e-06, "loss": 0.0682, "step": 44113 }, { "epoch": 0.7812385164720831, "grad_norm": 1.0895696878433228, "learning_rate": 3.6097385034025134e-06, "loss": 0.0603, "step": 44114 }, { "epoch": 0.7812562260091116, "grad_norm": 0.6779353618621826, "learning_rate": 3.6091786988121667e-06, "loss": 0.0616, "step": 44115 }, { "epoch": 0.78127393554614, "grad_norm": 0.7280834913253784, "learning_rate": 3.608618931696465e-06, "loss": 0.0524, "step": 44116 }, { "epoch": 0.7812916450831684, "grad_norm": 0.37843847274780273, "learning_rate": 3.6080592020572605e-06, "loss": 0.0608, "step": 44117 }, { "epoch": 0.7813093546201968, "grad_norm": 0.47483620047569275, "learning_rate": 3.607499509896393e-06, "loss": 0.0411, "step": 44118 }, { "epoch": 0.7813270641572253, "grad_norm": 0.7080388069152832, "learning_rate": 3.606939855215696e-06, "loss": 0.0734, "step": 44119 }, { "epoch": 0.7813447736942537, "grad_norm": 0.6915183663368225, "learning_rate": 3.6063802380170135e-06, "loss": 0.0498, "step": 44120 }, { "epoch": 0.7813624832312821, "grad_norm": 0.4324358105659485, "learning_rate": 3.6058206583021934e-06, "loss": 0.0456, "step": 44121 }, { "epoch": 0.7813801927683105, "grad_norm": 0.299866646528244, "learning_rate": 3.6052611160730663e-06, "loss": 0.0331, "step": 44122 }, { "epoch": 0.781397902305339, "grad_norm": 0.6339074373245239, "learning_rate": 3.6047016113314758e-06, "loss": 0.0412, "step": 44123 }, { "epoch": 0.7814156118423674, "grad_norm": 0.7894827723503113, "learning_rate": 3.6041421440792665e-06, "loss": 0.0682, "step": 44124 }, { "epoch": 0.7814333213793958, "grad_norm": 0.38975396752357483, "learning_rate": 3.603582714318276e-06, "loss": 0.0458, "step": 44125 }, { "epoch": 0.7814510309164243, "grad_norm": 0.7914323210716248, "learning_rate": 3.6030233220503457e-06, "loss": 0.0654, "step": 44126 }, { "epoch": 0.7814687404534527, "grad_norm": 0.4815496504306793, "learning_rate": 3.6024639672773157e-06, "loss": 0.043, "step": 44127 }, { "epoch": 0.7814864499904811, "grad_norm": 0.5065140128135681, "learning_rate": 3.6019046500010316e-06, "loss": 0.0491, "step": 44128 }, { "epoch": 0.7815041595275095, "grad_norm": 0.7891507148742676, "learning_rate": 3.6013453702233234e-06, "loss": 0.0438, "step": 44129 }, { "epoch": 0.781521869064538, "grad_norm": 0.4561951458454132, "learning_rate": 3.6007861279460364e-06, "loss": 0.0589, "step": 44130 }, { "epoch": 0.7815395786015664, "grad_norm": 0.36507365107536316, "learning_rate": 3.600226923171009e-06, "loss": 0.0452, "step": 44131 }, { "epoch": 0.7815572881385948, "grad_norm": 0.7448937892913818, "learning_rate": 3.5996677559000878e-06, "loss": 0.0862, "step": 44132 }, { "epoch": 0.7815749976756232, "grad_norm": 0.6480448842048645, "learning_rate": 3.599108626135102e-06, "loss": 0.0413, "step": 44133 }, { "epoch": 0.7815927072126517, "grad_norm": 1.0044260025024414, "learning_rate": 3.5985495338778945e-06, "loss": 0.066, "step": 44134 }, { "epoch": 0.7816104167496801, "grad_norm": 0.8465622663497925, "learning_rate": 3.5979904791303075e-06, "loss": 0.0747, "step": 44135 }, { "epoch": 0.7816281262867085, "grad_norm": 0.5532771348953247, "learning_rate": 3.5974314618941777e-06, "loss": 0.0575, "step": 44136 }, { "epoch": 0.7816458358237369, "grad_norm": 0.35181841254234314, "learning_rate": 3.596872482171344e-06, "loss": 0.0406, "step": 44137 }, { "epoch": 0.7816635453607654, "grad_norm": 0.5465116500854492, "learning_rate": 3.5963135399636476e-06, "loss": 0.0489, "step": 44138 }, { "epoch": 0.7816812548977938, "grad_norm": 0.6428270936012268, "learning_rate": 3.5957546352729308e-06, "loss": 0.0551, "step": 44139 }, { "epoch": 0.7816989644348222, "grad_norm": 0.48133695125579834, "learning_rate": 3.5951957681010237e-06, "loss": 0.0587, "step": 44140 }, { "epoch": 0.7817166739718507, "grad_norm": 0.4363333582878113, "learning_rate": 3.5946369384497684e-06, "loss": 0.0669, "step": 44141 }, { "epoch": 0.7817343835088791, "grad_norm": 0.668864905834198, "learning_rate": 3.5940781463210083e-06, "loss": 0.0692, "step": 44142 }, { "epoch": 0.7817520930459075, "grad_norm": 0.4923287034034729, "learning_rate": 3.593519391716573e-06, "loss": 0.0402, "step": 44143 }, { "epoch": 0.7817698025829359, "grad_norm": 0.692450225353241, "learning_rate": 3.5929606746383054e-06, "loss": 0.0673, "step": 44144 }, { "epoch": 0.7817875121199644, "grad_norm": 0.3199489712715149, "learning_rate": 3.59240199508804e-06, "loss": 0.0617, "step": 44145 }, { "epoch": 0.7818052216569928, "grad_norm": 0.7837339639663696, "learning_rate": 3.5918433530676255e-06, "loss": 0.0545, "step": 44146 }, { "epoch": 0.7818229311940212, "grad_norm": 0.48583853244781494, "learning_rate": 3.5912847485788885e-06, "loss": 0.0475, "step": 44147 }, { "epoch": 0.7818406407310496, "grad_norm": 0.6157580614089966, "learning_rate": 3.5907261816236714e-06, "loss": 0.0583, "step": 44148 }, { "epoch": 0.7818583502680782, "grad_norm": 0.824167788028717, "learning_rate": 3.590167652203811e-06, "loss": 0.066, "step": 44149 }, { "epoch": 0.7818760598051065, "grad_norm": 0.5218268632888794, "learning_rate": 3.58960916032115e-06, "loss": 0.0417, "step": 44150 }, { "epoch": 0.781893769342135, "grad_norm": 0.6720567345619202, "learning_rate": 3.5890507059775144e-06, "loss": 0.0672, "step": 44151 }, { "epoch": 0.7819114788791633, "grad_norm": 0.705251932144165, "learning_rate": 3.5884922891747496e-06, "loss": 0.0435, "step": 44152 }, { "epoch": 0.7819291884161919, "grad_norm": 0.6104437112808228, "learning_rate": 3.58793390991469e-06, "loss": 0.0596, "step": 44153 }, { "epoch": 0.7819468979532203, "grad_norm": 0.5277109146118164, "learning_rate": 3.5873755681991748e-06, "loss": 0.0424, "step": 44154 }, { "epoch": 0.7819646074902487, "grad_norm": 0.46639686822891235, "learning_rate": 3.586817264030037e-06, "loss": 0.0419, "step": 44155 }, { "epoch": 0.7819823170272772, "grad_norm": 0.756191611289978, "learning_rate": 3.586258997409119e-06, "loss": 0.0639, "step": 44156 }, { "epoch": 0.7820000265643056, "grad_norm": 0.5997409224510193, "learning_rate": 3.5857007683382576e-06, "loss": 0.0544, "step": 44157 }, { "epoch": 0.782017736101334, "grad_norm": 0.7950960993766785, "learning_rate": 3.5851425768192825e-06, "loss": 0.0627, "step": 44158 }, { "epoch": 0.7820354456383624, "grad_norm": 0.5553808212280273, "learning_rate": 3.5845844228540324e-06, "loss": 0.0575, "step": 44159 }, { "epoch": 0.7820531551753909, "grad_norm": 0.5705994963645935, "learning_rate": 3.5840263064443495e-06, "loss": 0.0499, "step": 44160 }, { "epoch": 0.7820708647124193, "grad_norm": 0.6584354639053345, "learning_rate": 3.583468227592062e-06, "loss": 0.0635, "step": 44161 }, { "epoch": 0.7820885742494477, "grad_norm": 0.5615530610084534, "learning_rate": 3.582910186299009e-06, "loss": 0.0518, "step": 44162 }, { "epoch": 0.7821062837864761, "grad_norm": 0.8817352652549744, "learning_rate": 3.5823521825670265e-06, "loss": 0.0787, "step": 44163 }, { "epoch": 0.7821239933235046, "grad_norm": 0.43594178557395935, "learning_rate": 3.58179421639795e-06, "loss": 0.0486, "step": 44164 }, { "epoch": 0.782141702860533, "grad_norm": 0.5117626190185547, "learning_rate": 3.5812362877936167e-06, "loss": 0.0461, "step": 44165 }, { "epoch": 0.7821594123975614, "grad_norm": 0.5998128056526184, "learning_rate": 3.5806783967558616e-06, "loss": 0.0702, "step": 44166 }, { "epoch": 0.7821771219345898, "grad_norm": 0.7508863210678101, "learning_rate": 3.580120543286523e-06, "loss": 0.0796, "step": 44167 }, { "epoch": 0.7821948314716183, "grad_norm": 0.6972758173942566, "learning_rate": 3.579562727387428e-06, "loss": 0.0723, "step": 44168 }, { "epoch": 0.7822125410086467, "grad_norm": 0.611842155456543, "learning_rate": 3.579004949060417e-06, "loss": 0.0516, "step": 44169 }, { "epoch": 0.7822302505456751, "grad_norm": 0.6000518202781677, "learning_rate": 3.5784472083073235e-06, "loss": 0.0575, "step": 44170 }, { "epoch": 0.7822479600827036, "grad_norm": 0.5099107027053833, "learning_rate": 3.5778895051299887e-06, "loss": 0.0418, "step": 44171 }, { "epoch": 0.782265669619732, "grad_norm": 0.5709525346755981, "learning_rate": 3.5773318395302384e-06, "loss": 0.066, "step": 44172 }, { "epoch": 0.7822833791567604, "grad_norm": 0.7822880744934082, "learning_rate": 3.5767742115099112e-06, "loss": 0.046, "step": 44173 }, { "epoch": 0.7823010886937888, "grad_norm": 0.6501966714859009, "learning_rate": 3.57621662107084e-06, "loss": 0.0313, "step": 44174 }, { "epoch": 0.7823187982308173, "grad_norm": 0.6909319758415222, "learning_rate": 3.575659068214861e-06, "loss": 0.0701, "step": 44175 }, { "epoch": 0.7823365077678457, "grad_norm": 0.7703383564949036, "learning_rate": 3.5751015529438086e-06, "loss": 0.0831, "step": 44176 }, { "epoch": 0.7823542173048741, "grad_norm": 0.4840414822101593, "learning_rate": 3.5745440752595166e-06, "loss": 0.0638, "step": 44177 }, { "epoch": 0.7823719268419025, "grad_norm": 0.355096697807312, "learning_rate": 3.5739866351638233e-06, "loss": 0.0585, "step": 44178 }, { "epoch": 0.782389636378931, "grad_norm": 0.4065815806388855, "learning_rate": 3.5734292326585543e-06, "loss": 0.0375, "step": 44179 }, { "epoch": 0.7824073459159594, "grad_norm": 1.1600178480148315, "learning_rate": 3.5728718677455464e-06, "loss": 0.0856, "step": 44180 }, { "epoch": 0.7824250554529878, "grad_norm": 0.9054125547409058, "learning_rate": 3.5723145404266343e-06, "loss": 0.0509, "step": 44181 }, { "epoch": 0.7824427649900163, "grad_norm": 0.6752138733863831, "learning_rate": 3.5717572507036523e-06, "loss": 0.0803, "step": 44182 }, { "epoch": 0.7824604745270447, "grad_norm": 0.671599268913269, "learning_rate": 3.5711999985784333e-06, "loss": 0.0512, "step": 44183 }, { "epoch": 0.7824781840640731, "grad_norm": 0.8731058835983276, "learning_rate": 3.5706427840528095e-06, "loss": 0.0763, "step": 44184 }, { "epoch": 0.7824958936011015, "grad_norm": 0.5853838324546814, "learning_rate": 3.570085607128621e-06, "loss": 0.0424, "step": 44185 }, { "epoch": 0.78251360313813, "grad_norm": 1.034522533416748, "learning_rate": 3.5695284678076878e-06, "loss": 0.0818, "step": 44186 }, { "epoch": 0.7825313126751584, "grad_norm": 0.6740676164627075, "learning_rate": 3.568971366091852e-06, "loss": 0.0507, "step": 44187 }, { "epoch": 0.7825490222121868, "grad_norm": 0.8175764083862305, "learning_rate": 3.5684143019829425e-06, "loss": 0.0605, "step": 44188 }, { "epoch": 0.7825667317492152, "grad_norm": 0.4522024095058441, "learning_rate": 3.567857275482799e-06, "loss": 0.049, "step": 44189 }, { "epoch": 0.7825844412862437, "grad_norm": 0.6162443161010742, "learning_rate": 3.5673002865932446e-06, "loss": 0.0658, "step": 44190 }, { "epoch": 0.7826021508232721, "grad_norm": 0.19743561744689941, "learning_rate": 3.5667433353161155e-06, "loss": 0.0485, "step": 44191 }, { "epoch": 0.7826198603603005, "grad_norm": 0.4138568341732025, "learning_rate": 3.5661864216532445e-06, "loss": 0.0396, "step": 44192 }, { "epoch": 0.7826375698973289, "grad_norm": 0.621793806552887, "learning_rate": 3.5656295456064643e-06, "loss": 0.0573, "step": 44193 }, { "epoch": 0.7826552794343574, "grad_norm": 0.6115691065788269, "learning_rate": 3.5650727071776056e-06, "loss": 0.0816, "step": 44194 }, { "epoch": 0.7826729889713858, "grad_norm": 0.7330300807952881, "learning_rate": 3.564515906368501e-06, "loss": 0.067, "step": 44195 }, { "epoch": 0.7826906985084142, "grad_norm": 0.9530944228172302, "learning_rate": 3.5639591431809874e-06, "loss": 0.0658, "step": 44196 }, { "epoch": 0.7827084080454427, "grad_norm": 0.6910982728004456, "learning_rate": 3.5634024176168867e-06, "loss": 0.0525, "step": 44197 }, { "epoch": 0.7827261175824711, "grad_norm": 0.5281105041503906, "learning_rate": 3.5628457296780352e-06, "loss": 0.0625, "step": 44198 }, { "epoch": 0.7827438271194995, "grad_norm": 0.8118826746940613, "learning_rate": 3.562289079366269e-06, "loss": 0.0521, "step": 44199 }, { "epoch": 0.7827615366565279, "grad_norm": 0.49150919914245605, "learning_rate": 3.5617324666834107e-06, "loss": 0.0349, "step": 44200 }, { "epoch": 0.7827792461935564, "grad_norm": 0.4562811553478241, "learning_rate": 3.561175891631296e-06, "loss": 0.0589, "step": 44201 }, { "epoch": 0.7827969557305848, "grad_norm": 0.7342877388000488, "learning_rate": 3.560619354211755e-06, "loss": 0.0985, "step": 44202 }, { "epoch": 0.7828146652676132, "grad_norm": 0.7592965960502625, "learning_rate": 3.5600628544266193e-06, "loss": 0.0632, "step": 44203 }, { "epoch": 0.7828323748046416, "grad_norm": 0.8060483336448669, "learning_rate": 3.55950639227772e-06, "loss": 0.0452, "step": 44204 }, { "epoch": 0.7828500843416701, "grad_norm": 0.4944226145744324, "learning_rate": 3.558949967766888e-06, "loss": 0.0724, "step": 44205 }, { "epoch": 0.7828677938786985, "grad_norm": 0.6979554295539856, "learning_rate": 3.5583935808959573e-06, "loss": 0.0523, "step": 44206 }, { "epoch": 0.7828855034157269, "grad_norm": 0.30299341678619385, "learning_rate": 3.5578372316667497e-06, "loss": 0.0538, "step": 44207 }, { "epoch": 0.7829032129527553, "grad_norm": 0.7542625069618225, "learning_rate": 3.5572809200811e-06, "loss": 0.0794, "step": 44208 }, { "epoch": 0.7829209224897838, "grad_norm": 0.476687490940094, "learning_rate": 3.556724646140841e-06, "loss": 0.0433, "step": 44209 }, { "epoch": 0.7829386320268122, "grad_norm": 0.28095224499702454, "learning_rate": 3.556168409847802e-06, "loss": 0.0401, "step": 44210 }, { "epoch": 0.7829563415638406, "grad_norm": 0.6527989506721497, "learning_rate": 3.5556122112038053e-06, "loss": 0.0552, "step": 44211 }, { "epoch": 0.7829740511008692, "grad_norm": 0.6075043678283691, "learning_rate": 3.5550560502106894e-06, "loss": 0.0592, "step": 44212 }, { "epoch": 0.7829917606378975, "grad_norm": 0.6925384402275085, "learning_rate": 3.554499926870286e-06, "loss": 0.0467, "step": 44213 }, { "epoch": 0.783009470174926, "grad_norm": 0.3868447244167328, "learning_rate": 3.5539438411844174e-06, "loss": 0.067, "step": 44214 }, { "epoch": 0.7830271797119543, "grad_norm": 0.5576026439666748, "learning_rate": 3.5533877931549145e-06, "loss": 0.0424, "step": 44215 }, { "epoch": 0.7830448892489829, "grad_norm": 0.7037872672080994, "learning_rate": 3.552831782783607e-06, "loss": 0.0742, "step": 44216 }, { "epoch": 0.7830625987860113, "grad_norm": 0.49734583497047424, "learning_rate": 3.552275810072331e-06, "loss": 0.0558, "step": 44217 }, { "epoch": 0.7830803083230397, "grad_norm": 0.43824493885040283, "learning_rate": 3.551719875022906e-06, "loss": 0.0577, "step": 44218 }, { "epoch": 0.783098017860068, "grad_norm": 0.6063859462738037, "learning_rate": 3.5511639776371624e-06, "loss": 0.0507, "step": 44219 }, { "epoch": 0.7831157273970966, "grad_norm": 0.59049391746521, "learning_rate": 3.550608117916933e-06, "loss": 0.0739, "step": 44220 }, { "epoch": 0.783133436934125, "grad_norm": 0.5699707269668579, "learning_rate": 3.5500522958640447e-06, "loss": 0.0492, "step": 44221 }, { "epoch": 0.7831511464711534, "grad_norm": 0.7592630386352539, "learning_rate": 3.549496511480326e-06, "loss": 0.0635, "step": 44222 }, { "epoch": 0.7831688560081818, "grad_norm": 0.6420196294784546, "learning_rate": 3.5489407647676063e-06, "loss": 0.1006, "step": 44223 }, { "epoch": 0.7831865655452103, "grad_norm": 0.5337757468223572, "learning_rate": 3.548385055727717e-06, "loss": 0.0568, "step": 44224 }, { "epoch": 0.7832042750822387, "grad_norm": 0.8017573952674866, "learning_rate": 3.5478293843624776e-06, "loss": 0.0469, "step": 44225 }, { "epoch": 0.7832219846192671, "grad_norm": 0.31676462292671204, "learning_rate": 3.5472737506737217e-06, "loss": 0.056, "step": 44226 }, { "epoch": 0.7832396941562956, "grad_norm": 0.5502138733863831, "learning_rate": 3.5467181546632783e-06, "loss": 0.0382, "step": 44227 }, { "epoch": 0.783257403693324, "grad_norm": 0.6129448413848877, "learning_rate": 3.546162596332977e-06, "loss": 0.0661, "step": 44228 }, { "epoch": 0.7832751132303524, "grad_norm": 0.3785143792629242, "learning_rate": 3.5456070756846363e-06, "loss": 0.0399, "step": 44229 }, { "epoch": 0.7832928227673808, "grad_norm": 0.4604818522930145, "learning_rate": 3.5450515927200923e-06, "loss": 0.0885, "step": 44230 }, { "epoch": 0.7833105323044093, "grad_norm": 0.7000229358673096, "learning_rate": 3.5444961474411693e-06, "loss": 0.0777, "step": 44231 }, { "epoch": 0.7833282418414377, "grad_norm": 0.8129525184631348, "learning_rate": 3.543940739849694e-06, "loss": 0.0716, "step": 44232 }, { "epoch": 0.7833459513784661, "grad_norm": 0.2653786540031433, "learning_rate": 3.543385369947496e-06, "loss": 0.0431, "step": 44233 }, { "epoch": 0.7833636609154945, "grad_norm": 0.5885360240936279, "learning_rate": 3.5428300377364013e-06, "loss": 0.0508, "step": 44234 }, { "epoch": 0.783381370452523, "grad_norm": 0.6755247712135315, "learning_rate": 3.5422747432182405e-06, "loss": 0.0681, "step": 44235 }, { "epoch": 0.7833990799895514, "grad_norm": 0.6228996515274048, "learning_rate": 3.541719486394834e-06, "loss": 0.0555, "step": 44236 }, { "epoch": 0.7834167895265798, "grad_norm": 0.5287817716598511, "learning_rate": 3.541164267268011e-06, "loss": 0.062, "step": 44237 }, { "epoch": 0.7834344990636082, "grad_norm": 0.5004429817199707, "learning_rate": 3.540609085839603e-06, "loss": 0.0628, "step": 44238 }, { "epoch": 0.7834522086006367, "grad_norm": 0.8233131766319275, "learning_rate": 3.5400539421114257e-06, "loss": 0.0622, "step": 44239 }, { "epoch": 0.7834699181376651, "grad_norm": 0.4778260588645935, "learning_rate": 3.5394988360853104e-06, "loss": 0.0575, "step": 44240 }, { "epoch": 0.7834876276746935, "grad_norm": 0.5198218822479248, "learning_rate": 3.5389437677630883e-06, "loss": 0.0742, "step": 44241 }, { "epoch": 0.783505337211722, "grad_norm": 0.41123315691947937, "learning_rate": 3.5383887371465845e-06, "loss": 0.0471, "step": 44242 }, { "epoch": 0.7835230467487504, "grad_norm": 0.7534562349319458, "learning_rate": 3.5378337442376197e-06, "loss": 0.0515, "step": 44243 }, { "epoch": 0.7835407562857788, "grad_norm": 0.45182493329048157, "learning_rate": 3.537278789038022e-06, "loss": 0.0449, "step": 44244 }, { "epoch": 0.7835584658228072, "grad_norm": 0.7348052263259888, "learning_rate": 3.5367238715496234e-06, "loss": 0.0524, "step": 44245 }, { "epoch": 0.7835761753598357, "grad_norm": 0.6563012003898621, "learning_rate": 3.536168991774238e-06, "loss": 0.0377, "step": 44246 }, { "epoch": 0.7835938848968641, "grad_norm": 0.4879682660102844, "learning_rate": 3.5356141497136986e-06, "loss": 0.0589, "step": 44247 }, { "epoch": 0.7836115944338925, "grad_norm": 0.8778095245361328, "learning_rate": 3.535059345369827e-06, "loss": 0.0649, "step": 44248 }, { "epoch": 0.7836293039709209, "grad_norm": 0.3494458794593811, "learning_rate": 3.5345045787444523e-06, "loss": 0.0684, "step": 44249 }, { "epoch": 0.7836470135079494, "grad_norm": 0.2943632900714874, "learning_rate": 3.533949849839396e-06, "loss": 0.0331, "step": 44250 }, { "epoch": 0.7836647230449778, "grad_norm": 0.49818143248558044, "learning_rate": 3.5333951586564873e-06, "loss": 0.0656, "step": 44251 }, { "epoch": 0.7836824325820062, "grad_norm": 0.8657582998275757, "learning_rate": 3.532840505197547e-06, "loss": 0.0617, "step": 44252 }, { "epoch": 0.7837001421190346, "grad_norm": 0.6637855172157288, "learning_rate": 3.532285889464408e-06, "loss": 0.0685, "step": 44253 }, { "epoch": 0.7837178516560631, "grad_norm": 0.45850372314453125, "learning_rate": 3.531731311458881e-06, "loss": 0.043, "step": 44254 }, { "epoch": 0.7837355611930915, "grad_norm": 0.6450156569480896, "learning_rate": 3.5311767711828006e-06, "loss": 0.0627, "step": 44255 }, { "epoch": 0.7837532707301199, "grad_norm": 0.4741893708705902, "learning_rate": 3.530622268637992e-06, "loss": 0.063, "step": 44256 }, { "epoch": 0.7837709802671484, "grad_norm": 0.30260446667671204, "learning_rate": 3.530067803826273e-06, "loss": 0.0645, "step": 44257 }, { "epoch": 0.7837886898041768, "grad_norm": 0.7562112808227539, "learning_rate": 3.5295133767494692e-06, "loss": 0.0341, "step": 44258 }, { "epoch": 0.7838063993412052, "grad_norm": 0.6006773710250854, "learning_rate": 3.5289589874094057e-06, "loss": 0.0457, "step": 44259 }, { "epoch": 0.7838241088782336, "grad_norm": 0.4856310188770294, "learning_rate": 3.5284046358079097e-06, "loss": 0.085, "step": 44260 }, { "epoch": 0.7838418184152621, "grad_norm": 0.535022497177124, "learning_rate": 3.5278503219467993e-06, "loss": 0.054, "step": 44261 }, { "epoch": 0.7838595279522905, "grad_norm": 0.9023265242576599, "learning_rate": 3.527296045827902e-06, "loss": 0.0441, "step": 44262 }, { "epoch": 0.7838772374893189, "grad_norm": 0.3995606005191803, "learning_rate": 3.5267418074530455e-06, "loss": 0.0451, "step": 44263 }, { "epoch": 0.7838949470263473, "grad_norm": 0.886112630367279, "learning_rate": 3.5261876068240423e-06, "loss": 0.0669, "step": 44264 }, { "epoch": 0.7839126565633758, "grad_norm": 0.5077037811279297, "learning_rate": 3.5256334439427223e-06, "loss": 0.0572, "step": 44265 }, { "epoch": 0.7839303661004042, "grad_norm": 0.9315555691719055, "learning_rate": 3.5250793188109075e-06, "loss": 0.0697, "step": 44266 }, { "epoch": 0.7839480756374326, "grad_norm": 0.8325399160385132, "learning_rate": 3.524525231430425e-06, "loss": 0.0801, "step": 44267 }, { "epoch": 0.783965785174461, "grad_norm": 1.0393030643463135, "learning_rate": 3.5239711818030893e-06, "loss": 0.0805, "step": 44268 }, { "epoch": 0.7839834947114895, "grad_norm": 0.751448929309845, "learning_rate": 3.52341716993073e-06, "loss": 0.0325, "step": 44269 }, { "epoch": 0.7840012042485179, "grad_norm": 0.4524442255496979, "learning_rate": 3.522863195815165e-06, "loss": 0.0464, "step": 44270 }, { "epoch": 0.7840189137855463, "grad_norm": 0.5590767860412598, "learning_rate": 3.522309259458219e-06, "loss": 0.0403, "step": 44271 }, { "epoch": 0.7840366233225748, "grad_norm": 0.3574081063270569, "learning_rate": 3.5217553608617164e-06, "loss": 0.0456, "step": 44272 }, { "epoch": 0.7840543328596032, "grad_norm": 0.5353327989578247, "learning_rate": 3.521201500027477e-06, "loss": 0.0387, "step": 44273 }, { "epoch": 0.7840720423966316, "grad_norm": 0.7850255966186523, "learning_rate": 3.520647676957328e-06, "loss": 0.0812, "step": 44274 }, { "epoch": 0.78408975193366, "grad_norm": 0.517540454864502, "learning_rate": 3.5200938916530827e-06, "loss": 0.0666, "step": 44275 }, { "epoch": 0.7841074614706885, "grad_norm": 0.6356287002563477, "learning_rate": 3.5195401441165664e-06, "loss": 0.0505, "step": 44276 }, { "epoch": 0.784125171007717, "grad_norm": 0.44301357865333557, "learning_rate": 3.5189864343496076e-06, "loss": 0.0319, "step": 44277 }, { "epoch": 0.7841428805447453, "grad_norm": 0.6992356777191162, "learning_rate": 3.518432762354013e-06, "loss": 0.0586, "step": 44278 }, { "epoch": 0.7841605900817737, "grad_norm": 0.6155247092247009, "learning_rate": 3.5178791281316182e-06, "loss": 0.0513, "step": 44279 }, { "epoch": 0.7841782996188023, "grad_norm": 1.0481466054916382, "learning_rate": 3.517325531684238e-06, "loss": 0.0518, "step": 44280 }, { "epoch": 0.7841960091558307, "grad_norm": 0.3883504569530487, "learning_rate": 3.516771973013701e-06, "loss": 0.0602, "step": 44281 }, { "epoch": 0.784213718692859, "grad_norm": 0.8993727564811707, "learning_rate": 3.5162184521218193e-06, "loss": 0.0711, "step": 44282 }, { "epoch": 0.7842314282298875, "grad_norm": 1.1239882707595825, "learning_rate": 3.5156649690104163e-06, "loss": 0.062, "step": 44283 }, { "epoch": 0.784249137766916, "grad_norm": 0.404750794172287, "learning_rate": 3.5151115236813173e-06, "loss": 0.05, "step": 44284 }, { "epoch": 0.7842668473039444, "grad_norm": 0.6866872310638428, "learning_rate": 3.514558116136338e-06, "loss": 0.0718, "step": 44285 }, { "epoch": 0.7842845568409728, "grad_norm": 0.300700843334198, "learning_rate": 3.5140047463772988e-06, "loss": 0.0574, "step": 44286 }, { "epoch": 0.7843022663780013, "grad_norm": 0.776970386505127, "learning_rate": 3.5134514144060215e-06, "loss": 0.0743, "step": 44287 }, { "epoch": 0.7843199759150297, "grad_norm": 0.7458531856536865, "learning_rate": 3.512898120224329e-06, "loss": 0.0589, "step": 44288 }, { "epoch": 0.7843376854520581, "grad_norm": 0.5524103045463562, "learning_rate": 3.5123448638340395e-06, "loss": 0.0452, "step": 44289 }, { "epoch": 0.7843553949890865, "grad_norm": 0.619647741317749, "learning_rate": 3.5117916452369736e-06, "loss": 0.0502, "step": 44290 }, { "epoch": 0.784373104526115, "grad_norm": 0.45724067091941833, "learning_rate": 3.5112384644349516e-06, "loss": 0.0657, "step": 44291 }, { "epoch": 0.7843908140631434, "grad_norm": 0.3849025368690491, "learning_rate": 3.5106853214297952e-06, "loss": 0.0362, "step": 44292 }, { "epoch": 0.7844085236001718, "grad_norm": 0.4538995921611786, "learning_rate": 3.5101322162233194e-06, "loss": 0.0718, "step": 44293 }, { "epoch": 0.7844262331372002, "grad_norm": 0.8217893838882446, "learning_rate": 3.5095791488173466e-06, "loss": 0.0575, "step": 44294 }, { "epoch": 0.7844439426742287, "grad_norm": 0.5606205463409424, "learning_rate": 3.5090261192137002e-06, "loss": 0.0628, "step": 44295 }, { "epoch": 0.7844616522112571, "grad_norm": 0.37115204334259033, "learning_rate": 3.508473127414192e-06, "loss": 0.0329, "step": 44296 }, { "epoch": 0.7844793617482855, "grad_norm": 0.5110893845558167, "learning_rate": 3.5079201734206435e-06, "loss": 0.0412, "step": 44297 }, { "epoch": 0.7844970712853139, "grad_norm": 0.5346962213516235, "learning_rate": 3.507367257234877e-06, "loss": 0.0718, "step": 44298 }, { "epoch": 0.7845147808223424, "grad_norm": 0.5461047887802124, "learning_rate": 3.5068143788587075e-06, "loss": 0.0503, "step": 44299 }, { "epoch": 0.7845324903593708, "grad_norm": 0.695183515548706, "learning_rate": 3.5062615382939588e-06, "loss": 0.0623, "step": 44300 }, { "epoch": 0.7845501998963992, "grad_norm": 0.43729960918426514, "learning_rate": 3.505708735542446e-06, "loss": 0.052, "step": 44301 }, { "epoch": 0.7845679094334277, "grad_norm": 0.7895457744598389, "learning_rate": 3.505155970605993e-06, "loss": 0.0949, "step": 44302 }, { "epoch": 0.7845856189704561, "grad_norm": 0.6544079184532166, "learning_rate": 3.504603243486409e-06, "loss": 0.0838, "step": 44303 }, { "epoch": 0.7846033285074845, "grad_norm": 0.8058012127876282, "learning_rate": 3.50405055418552e-06, "loss": 0.0627, "step": 44304 }, { "epoch": 0.7846210380445129, "grad_norm": 0.7393685579299927, "learning_rate": 3.5034979027051395e-06, "loss": 0.0691, "step": 44305 }, { "epoch": 0.7846387475815414, "grad_norm": 0.7854586243629456, "learning_rate": 3.5029452890470944e-06, "loss": 0.0745, "step": 44306 }, { "epoch": 0.7846564571185698, "grad_norm": 0.8815637826919556, "learning_rate": 3.502392713213186e-06, "loss": 0.0725, "step": 44307 }, { "epoch": 0.7846741666555982, "grad_norm": 0.3934599757194519, "learning_rate": 3.5018401752052486e-06, "loss": 0.0635, "step": 44308 }, { "epoch": 0.7846918761926266, "grad_norm": 0.6279865503311157, "learning_rate": 3.5012876750250973e-06, "loss": 0.0816, "step": 44309 }, { "epoch": 0.7847095857296551, "grad_norm": 0.5579713582992554, "learning_rate": 3.500735212674542e-06, "loss": 0.0463, "step": 44310 }, { "epoch": 0.7847272952666835, "grad_norm": 0.4280562400817871, "learning_rate": 3.500182788155406e-06, "loss": 0.0416, "step": 44311 }, { "epoch": 0.7847450048037119, "grad_norm": 0.4099997580051422, "learning_rate": 3.4996304014695053e-06, "loss": 0.044, "step": 44312 }, { "epoch": 0.7847627143407403, "grad_norm": 0.40046921372413635, "learning_rate": 3.499078052618661e-06, "loss": 0.0372, "step": 44313 }, { "epoch": 0.7847804238777688, "grad_norm": 0.6511568427085876, "learning_rate": 3.498525741604682e-06, "loss": 0.0642, "step": 44314 }, { "epoch": 0.7847981334147972, "grad_norm": 0.6432597041130066, "learning_rate": 3.4979734684293908e-06, "loss": 0.0695, "step": 44315 }, { "epoch": 0.7848158429518256, "grad_norm": 0.2605985403060913, "learning_rate": 3.497421233094603e-06, "loss": 0.0394, "step": 44316 }, { "epoch": 0.7848335524888541, "grad_norm": 0.6988703608512878, "learning_rate": 3.4968690356021366e-06, "loss": 0.0414, "step": 44317 }, { "epoch": 0.7848512620258825, "grad_norm": 0.5995287299156189, "learning_rate": 3.4963168759538063e-06, "loss": 0.0647, "step": 44318 }, { "epoch": 0.7848689715629109, "grad_norm": 0.6366029381752014, "learning_rate": 3.495764754151432e-06, "loss": 0.0539, "step": 44319 }, { "epoch": 0.7848866810999393, "grad_norm": 0.5508353114128113, "learning_rate": 3.4952126701968314e-06, "loss": 0.0516, "step": 44320 }, { "epoch": 0.7849043906369678, "grad_norm": 0.6478086709976196, "learning_rate": 3.494660624091812e-06, "loss": 0.0481, "step": 44321 }, { "epoch": 0.7849221001739962, "grad_norm": 0.6439444422721863, "learning_rate": 3.494108615838197e-06, "loss": 0.0506, "step": 44322 }, { "epoch": 0.7849398097110246, "grad_norm": 0.5244668126106262, "learning_rate": 3.4935566454378005e-06, "loss": 0.0497, "step": 44323 }, { "epoch": 0.784957519248053, "grad_norm": 0.7893579602241516, "learning_rate": 3.4930047128924432e-06, "loss": 0.0493, "step": 44324 }, { "epoch": 0.7849752287850815, "grad_norm": 1.347050666809082, "learning_rate": 3.492452818203932e-06, "loss": 0.0856, "step": 44325 }, { "epoch": 0.7849929383221099, "grad_norm": 0.5424431562423706, "learning_rate": 3.4919009613740866e-06, "loss": 0.0492, "step": 44326 }, { "epoch": 0.7850106478591383, "grad_norm": 0.42959439754486084, "learning_rate": 3.4913491424047243e-06, "loss": 0.049, "step": 44327 }, { "epoch": 0.7850283573961667, "grad_norm": 0.8507909774780273, "learning_rate": 3.4907973612976573e-06, "loss": 0.0506, "step": 44328 }, { "epoch": 0.7850460669331952, "grad_norm": 0.5464500784873962, "learning_rate": 3.4902456180547054e-06, "loss": 0.0628, "step": 44329 }, { "epoch": 0.7850637764702236, "grad_norm": 0.5574946999549866, "learning_rate": 3.489693912677679e-06, "loss": 0.0667, "step": 44330 }, { "epoch": 0.785081486007252, "grad_norm": 0.2654178738594055, "learning_rate": 3.489142245168401e-06, "loss": 0.0377, "step": 44331 }, { "epoch": 0.7850991955442805, "grad_norm": 0.3905292749404907, "learning_rate": 3.4885906155286763e-06, "loss": 0.0427, "step": 44332 }, { "epoch": 0.7851169050813089, "grad_norm": 0.567853569984436, "learning_rate": 3.488039023760324e-06, "loss": 0.0298, "step": 44333 }, { "epoch": 0.7851346146183373, "grad_norm": 0.8222459554672241, "learning_rate": 3.4874874698651644e-06, "loss": 0.052, "step": 44334 }, { "epoch": 0.7851523241553657, "grad_norm": 0.6248195171356201, "learning_rate": 3.4869359538450012e-06, "loss": 0.0683, "step": 44335 }, { "epoch": 0.7851700336923942, "grad_norm": 0.4087583124637604, "learning_rate": 3.48638447570165e-06, "loss": 0.068, "step": 44336 }, { "epoch": 0.7851877432294226, "grad_norm": 1.2185324430465698, "learning_rate": 3.4858330354369337e-06, "loss": 0.0563, "step": 44337 }, { "epoch": 0.785205452766451, "grad_norm": 0.6898249983787537, "learning_rate": 3.4852816330526665e-06, "loss": 0.0576, "step": 44338 }, { "epoch": 0.7852231623034794, "grad_norm": 0.6618258953094482, "learning_rate": 3.484730268550653e-06, "loss": 0.0392, "step": 44339 }, { "epoch": 0.785240871840508, "grad_norm": 1.02959406375885, "learning_rate": 3.4841789419327126e-06, "loss": 0.0756, "step": 44340 }, { "epoch": 0.7852585813775363, "grad_norm": 0.736497163772583, "learning_rate": 3.4836276532006628e-06, "loss": 0.0681, "step": 44341 }, { "epoch": 0.7852762909145647, "grad_norm": 0.3405454158782959, "learning_rate": 3.4830764023563094e-06, "loss": 0.0436, "step": 44342 }, { "epoch": 0.7852940004515931, "grad_norm": 0.48334047198295593, "learning_rate": 3.482525189401471e-06, "loss": 0.0356, "step": 44343 }, { "epoch": 0.7853117099886217, "grad_norm": 0.7721062302589417, "learning_rate": 3.4819740143379575e-06, "loss": 0.0708, "step": 44344 }, { "epoch": 0.78532941952565, "grad_norm": 0.3533434569835663, "learning_rate": 3.481422877167586e-06, "loss": 0.0416, "step": 44345 }, { "epoch": 0.7853471290626785, "grad_norm": 0.4512609541416168, "learning_rate": 3.4808717778921684e-06, "loss": 0.0594, "step": 44346 }, { "epoch": 0.785364838599707, "grad_norm": 0.3957101106643677, "learning_rate": 3.4803207165135164e-06, "loss": 0.0774, "step": 44347 }, { "epoch": 0.7853825481367354, "grad_norm": 1.5416383743286133, "learning_rate": 3.4797696930334487e-06, "loss": 0.0439, "step": 44348 }, { "epoch": 0.7854002576737638, "grad_norm": 0.9230695366859436, "learning_rate": 3.479218707453769e-06, "loss": 0.0822, "step": 44349 }, { "epoch": 0.7854179672107922, "grad_norm": 0.3794025778770447, "learning_rate": 3.4786677597762954e-06, "loss": 0.0474, "step": 44350 }, { "epoch": 0.7854356767478207, "grad_norm": 0.6133753657341003, "learning_rate": 3.4781168500028385e-06, "loss": 0.0436, "step": 44351 }, { "epoch": 0.7854533862848491, "grad_norm": 0.5146727561950684, "learning_rate": 3.477565978135217e-06, "loss": 0.0539, "step": 44352 }, { "epoch": 0.7854710958218775, "grad_norm": 0.7022178173065186, "learning_rate": 3.477015144175234e-06, "loss": 0.0628, "step": 44353 }, { "epoch": 0.7854888053589059, "grad_norm": 0.41399630904197693, "learning_rate": 3.4764643481247048e-06, "loss": 0.025, "step": 44354 }, { "epoch": 0.7855065148959344, "grad_norm": 0.3414572775363922, "learning_rate": 3.4759135899854433e-06, "loss": 0.0683, "step": 44355 }, { "epoch": 0.7855242244329628, "grad_norm": 0.7113198041915894, "learning_rate": 3.4753628697592593e-06, "loss": 0.0566, "step": 44356 }, { "epoch": 0.7855419339699912, "grad_norm": 0.4629633128643036, "learning_rate": 3.4748121874479665e-06, "loss": 0.0417, "step": 44357 }, { "epoch": 0.7855596435070196, "grad_norm": 0.5339691638946533, "learning_rate": 3.4742615430533752e-06, "loss": 0.0547, "step": 44358 }, { "epoch": 0.7855773530440481, "grad_norm": 0.5070863366127014, "learning_rate": 3.473710936577304e-06, "loss": 0.0721, "step": 44359 }, { "epoch": 0.7855950625810765, "grad_norm": 0.5029852390289307, "learning_rate": 3.4731603680215513e-06, "loss": 0.0514, "step": 44360 }, { "epoch": 0.7856127721181049, "grad_norm": 0.6464239358901978, "learning_rate": 3.4726098373879373e-06, "loss": 0.0825, "step": 44361 }, { "epoch": 0.7856304816551334, "grad_norm": 0.41610977053642273, "learning_rate": 3.472059344678269e-06, "loss": 0.0587, "step": 44362 }, { "epoch": 0.7856481911921618, "grad_norm": 0.6543112397193909, "learning_rate": 3.471508889894365e-06, "loss": 0.0623, "step": 44363 }, { "epoch": 0.7856659007291902, "grad_norm": 0.3367938995361328, "learning_rate": 3.470958473038025e-06, "loss": 0.0578, "step": 44364 }, { "epoch": 0.7856836102662186, "grad_norm": 0.63884437084198, "learning_rate": 3.470408094111063e-06, "loss": 0.0493, "step": 44365 }, { "epoch": 0.7857013198032471, "grad_norm": 0.5288463830947876, "learning_rate": 3.4698577531153e-06, "loss": 0.063, "step": 44366 }, { "epoch": 0.7857190293402755, "grad_norm": 0.778777003288269, "learning_rate": 3.4693074500525348e-06, "loss": 0.0519, "step": 44367 }, { "epoch": 0.7857367388773039, "grad_norm": 0.5349205136299133, "learning_rate": 3.468757184924581e-06, "loss": 0.0535, "step": 44368 }, { "epoch": 0.7857544484143323, "grad_norm": 0.36956989765167236, "learning_rate": 3.4682069577332504e-06, "loss": 0.0404, "step": 44369 }, { "epoch": 0.7857721579513608, "grad_norm": 0.47455453872680664, "learning_rate": 3.4676567684803568e-06, "loss": 0.0464, "step": 44370 }, { "epoch": 0.7857898674883892, "grad_norm": 0.5172916054725647, "learning_rate": 3.4671066171677007e-06, "loss": 0.0702, "step": 44371 }, { "epoch": 0.7858075770254176, "grad_norm": 0.5219627022743225, "learning_rate": 3.4665565037970994e-06, "loss": 0.0474, "step": 44372 }, { "epoch": 0.785825286562446, "grad_norm": 0.6910728812217712, "learning_rate": 3.466006428370359e-06, "loss": 0.0532, "step": 44373 }, { "epoch": 0.7858429960994745, "grad_norm": 0.5668482184410095, "learning_rate": 3.4654563908892924e-06, "loss": 0.0637, "step": 44374 }, { "epoch": 0.7858607056365029, "grad_norm": 0.8451976180076599, "learning_rate": 3.4649063913557056e-06, "loss": 0.0577, "step": 44375 }, { "epoch": 0.7858784151735313, "grad_norm": 0.6378761529922485, "learning_rate": 3.4643564297714107e-06, "loss": 0.0553, "step": 44376 }, { "epoch": 0.7858961247105598, "grad_norm": 0.6696216464042664, "learning_rate": 3.4638065061382217e-06, "loss": 0.0495, "step": 44377 }, { "epoch": 0.7859138342475882, "grad_norm": 0.5218320488929749, "learning_rate": 3.463256620457937e-06, "loss": 0.055, "step": 44378 }, { "epoch": 0.7859315437846166, "grad_norm": 0.36959072947502136, "learning_rate": 3.4627067727323713e-06, "loss": 0.0686, "step": 44379 }, { "epoch": 0.785949253321645, "grad_norm": 0.6050117015838623, "learning_rate": 3.4621569629633386e-06, "loss": 0.044, "step": 44380 }, { "epoch": 0.7859669628586735, "grad_norm": 0.8184073567390442, "learning_rate": 3.4616071911526374e-06, "loss": 0.0601, "step": 44381 }, { "epoch": 0.7859846723957019, "grad_norm": 0.573153555393219, "learning_rate": 3.4610574573020832e-06, "loss": 0.0886, "step": 44382 }, { "epoch": 0.7860023819327303, "grad_norm": 0.40752625465393066, "learning_rate": 3.4605077614134807e-06, "loss": 0.0374, "step": 44383 }, { "epoch": 0.7860200914697587, "grad_norm": 0.3428303897380829, "learning_rate": 3.459958103488642e-06, "loss": 0.0652, "step": 44384 }, { "epoch": 0.7860378010067872, "grad_norm": 0.5606247782707214, "learning_rate": 3.459408483529372e-06, "loss": 0.054, "step": 44385 }, { "epoch": 0.7860555105438156, "grad_norm": 0.6627888679504395, "learning_rate": 3.4588589015374834e-06, "loss": 0.065, "step": 44386 }, { "epoch": 0.786073220080844, "grad_norm": 0.6701012253761292, "learning_rate": 3.458309357514784e-06, "loss": 0.0447, "step": 44387 }, { "epoch": 0.7860909296178724, "grad_norm": 0.4388161599636078, "learning_rate": 3.4577598514630758e-06, "loss": 0.0368, "step": 44388 }, { "epoch": 0.7861086391549009, "grad_norm": 0.4536673128604889, "learning_rate": 3.4572103833841694e-06, "loss": 0.0571, "step": 44389 }, { "epoch": 0.7861263486919293, "grad_norm": 0.259817898273468, "learning_rate": 3.456660953279875e-06, "loss": 0.046, "step": 44390 }, { "epoch": 0.7861440582289577, "grad_norm": 0.5438533425331116, "learning_rate": 3.4561115611520023e-06, "loss": 0.0455, "step": 44391 }, { "epoch": 0.7861617677659862, "grad_norm": 0.2689892649650574, "learning_rate": 3.4555622070023503e-06, "loss": 0.0387, "step": 44392 }, { "epoch": 0.7861794773030146, "grad_norm": 0.586773693561554, "learning_rate": 3.4550128908327307e-06, "loss": 0.0796, "step": 44393 }, { "epoch": 0.786197186840043, "grad_norm": 0.5863553881645203, "learning_rate": 3.454463612644947e-06, "loss": 0.0678, "step": 44394 }, { "epoch": 0.7862148963770714, "grad_norm": 0.5795274972915649, "learning_rate": 3.4539143724408194e-06, "loss": 0.0339, "step": 44395 }, { "epoch": 0.7862326059140999, "grad_norm": 0.5111908912658691, "learning_rate": 3.4533651702221402e-06, "loss": 0.0608, "step": 44396 }, { "epoch": 0.7862503154511283, "grad_norm": 1.0378267765045166, "learning_rate": 3.4528160059907225e-06, "loss": 0.0581, "step": 44397 }, { "epoch": 0.7862680249881567, "grad_norm": 0.6541958451271057, "learning_rate": 3.452266879748377e-06, "loss": 0.0475, "step": 44398 }, { "epoch": 0.7862857345251851, "grad_norm": 0.2287926822900772, "learning_rate": 3.4517177914969e-06, "loss": 0.0403, "step": 44399 }, { "epoch": 0.7863034440622136, "grad_norm": 0.569529116153717, "learning_rate": 3.4511687412381053e-06, "loss": 0.0635, "step": 44400 }, { "epoch": 0.786321153599242, "grad_norm": 0.8970228433609009, "learning_rate": 3.4506197289737953e-06, "loss": 0.0641, "step": 44401 }, { "epoch": 0.7863388631362704, "grad_norm": 0.6075708866119385, "learning_rate": 3.450070754705779e-06, "loss": 0.0816, "step": 44402 }, { "epoch": 0.7863565726732988, "grad_norm": 0.6037209630012512, "learning_rate": 3.449521818435861e-06, "loss": 0.0528, "step": 44403 }, { "epoch": 0.7863742822103273, "grad_norm": 0.4638522267341614, "learning_rate": 3.44897292016585e-06, "loss": 0.0626, "step": 44404 }, { "epoch": 0.7863919917473557, "grad_norm": 0.4411790370941162, "learning_rate": 3.448424059897552e-06, "loss": 0.0469, "step": 44405 }, { "epoch": 0.7864097012843841, "grad_norm": 0.15662652254104614, "learning_rate": 3.447875237632768e-06, "loss": 0.0374, "step": 44406 }, { "epoch": 0.7864274108214127, "grad_norm": 0.8255677223205566, "learning_rate": 3.447326453373304e-06, "loss": 0.0599, "step": 44407 }, { "epoch": 0.786445120358441, "grad_norm": 0.8832601308822632, "learning_rate": 3.4467777071209693e-06, "loss": 0.0734, "step": 44408 }, { "epoch": 0.7864628298954695, "grad_norm": 0.37913817167282104, "learning_rate": 3.4462289988775707e-06, "loss": 0.0514, "step": 44409 }, { "epoch": 0.7864805394324978, "grad_norm": 0.3659997582435608, "learning_rate": 3.445680328644905e-06, "loss": 0.0396, "step": 44410 }, { "epoch": 0.7864982489695264, "grad_norm": 0.35451436042785645, "learning_rate": 3.445131696424784e-06, "loss": 0.0438, "step": 44411 }, { "epoch": 0.7865159585065548, "grad_norm": 0.3724207878112793, "learning_rate": 3.44458310221901e-06, "loss": 0.0396, "step": 44412 }, { "epoch": 0.7865336680435832, "grad_norm": 0.9071096777915955, "learning_rate": 3.4440345460293892e-06, "loss": 0.0723, "step": 44413 }, { "epoch": 0.7865513775806116, "grad_norm": 0.9236624240875244, "learning_rate": 3.4434860278577276e-06, "loss": 0.092, "step": 44414 }, { "epoch": 0.7865690871176401, "grad_norm": 0.5387751460075378, "learning_rate": 3.4429375477058262e-06, "loss": 0.0501, "step": 44415 }, { "epoch": 0.7865867966546685, "grad_norm": 0.7353115081787109, "learning_rate": 3.442389105575496e-06, "loss": 0.0842, "step": 44416 }, { "epoch": 0.7866045061916969, "grad_norm": 0.6865622401237488, "learning_rate": 3.4418407014685316e-06, "loss": 0.1032, "step": 44417 }, { "epoch": 0.7866222157287253, "grad_norm": 0.6547345519065857, "learning_rate": 3.441292335386742e-06, "loss": 0.0715, "step": 44418 }, { "epoch": 0.7866399252657538, "grad_norm": 0.41495972871780396, "learning_rate": 3.4407440073319374e-06, "loss": 0.0485, "step": 44419 }, { "epoch": 0.7866576348027822, "grad_norm": 0.7745293378829956, "learning_rate": 3.440195717305911e-06, "loss": 0.062, "step": 44420 }, { "epoch": 0.7866753443398106, "grad_norm": 0.3449121415615082, "learning_rate": 3.439647465310472e-06, "loss": 0.0514, "step": 44421 }, { "epoch": 0.7866930538768391, "grad_norm": 1.0033611059188843, "learning_rate": 3.4390992513474217e-06, "loss": 0.0558, "step": 44422 }, { "epoch": 0.7867107634138675, "grad_norm": 0.7535561919212341, "learning_rate": 3.4385510754185673e-06, "loss": 0.0615, "step": 44423 }, { "epoch": 0.7867284729508959, "grad_norm": 0.5863530039787292, "learning_rate": 3.4380029375257105e-06, "loss": 0.0672, "step": 44424 }, { "epoch": 0.7867461824879243, "grad_norm": 0.9067900776863098, "learning_rate": 3.4374548376706532e-06, "loss": 0.0796, "step": 44425 }, { "epoch": 0.7867638920249528, "grad_norm": 0.7757020592689514, "learning_rate": 3.4369067758552007e-06, "loss": 0.0893, "step": 44426 }, { "epoch": 0.7867816015619812, "grad_norm": 0.49966123700141907, "learning_rate": 3.4363587520811603e-06, "loss": 0.0672, "step": 44427 }, { "epoch": 0.7867993110990096, "grad_norm": 0.6238470077514648, "learning_rate": 3.435810766350326e-06, "loss": 0.0557, "step": 44428 }, { "epoch": 0.786817020636038, "grad_norm": 0.9409207701683044, "learning_rate": 3.435262818664503e-06, "loss": 0.0853, "step": 44429 }, { "epoch": 0.7868347301730665, "grad_norm": 0.5573867559432983, "learning_rate": 3.434714909025502e-06, "loss": 0.0611, "step": 44430 }, { "epoch": 0.7868524397100949, "grad_norm": 0.6403753161430359, "learning_rate": 3.434167037435109e-06, "loss": 0.0587, "step": 44431 }, { "epoch": 0.7868701492471233, "grad_norm": 0.26621371507644653, "learning_rate": 3.4336192038951414e-06, "loss": 0.0421, "step": 44432 }, { "epoch": 0.7868878587841517, "grad_norm": 0.469450443983078, "learning_rate": 3.4330714084073973e-06, "loss": 0.0546, "step": 44433 }, { "epoch": 0.7869055683211802, "grad_norm": 0.7073326706886292, "learning_rate": 3.432523650973681e-06, "loss": 0.0856, "step": 44434 }, { "epoch": 0.7869232778582086, "grad_norm": 0.9689239859580994, "learning_rate": 3.431975931595789e-06, "loss": 0.0648, "step": 44435 }, { "epoch": 0.786940987395237, "grad_norm": 0.5643664002418518, "learning_rate": 3.431428250275525e-06, "loss": 0.0772, "step": 44436 }, { "epoch": 0.7869586969322655, "grad_norm": 0.4650590419769287, "learning_rate": 3.4308806070146987e-06, "loss": 0.0409, "step": 44437 }, { "epoch": 0.7869764064692939, "grad_norm": 0.4347766637802124, "learning_rate": 3.430333001815098e-06, "loss": 0.0541, "step": 44438 }, { "epoch": 0.7869941160063223, "grad_norm": 0.3705603778362274, "learning_rate": 3.4297854346785336e-06, "loss": 0.0492, "step": 44439 }, { "epoch": 0.7870118255433507, "grad_norm": 1.1405545473098755, "learning_rate": 3.4292379056068037e-06, "loss": 0.0764, "step": 44440 }, { "epoch": 0.7870295350803792, "grad_norm": 0.3582533299922943, "learning_rate": 3.4286904146017102e-06, "loss": 0.0388, "step": 44441 }, { "epoch": 0.7870472446174076, "grad_norm": 0.7607484459877014, "learning_rate": 3.4281429616650565e-06, "loss": 0.0686, "step": 44442 }, { "epoch": 0.787064954154436, "grad_norm": 0.16478268802165985, "learning_rate": 3.4275955467986404e-06, "loss": 0.0308, "step": 44443 }, { "epoch": 0.7870826636914644, "grad_norm": 0.5017914175987244, "learning_rate": 3.4270481700042695e-06, "loss": 0.0417, "step": 44444 }, { "epoch": 0.7871003732284929, "grad_norm": 0.2849195897579193, "learning_rate": 3.426500831283736e-06, "loss": 0.0365, "step": 44445 }, { "epoch": 0.7871180827655213, "grad_norm": 0.31131407618522644, "learning_rate": 3.4259535306388434e-06, "loss": 0.0548, "step": 44446 }, { "epoch": 0.7871357923025497, "grad_norm": 0.7303428649902344, "learning_rate": 3.4254062680713936e-06, "loss": 0.0628, "step": 44447 }, { "epoch": 0.7871535018395781, "grad_norm": 0.4808250665664673, "learning_rate": 3.4248590435831904e-06, "loss": 0.0402, "step": 44448 }, { "epoch": 0.7871712113766066, "grad_norm": 0.5447602868080139, "learning_rate": 3.424311857176026e-06, "loss": 0.0483, "step": 44449 }, { "epoch": 0.787188920913635, "grad_norm": 0.7825005054473877, "learning_rate": 3.4237647088517044e-06, "loss": 0.0523, "step": 44450 }, { "epoch": 0.7872066304506634, "grad_norm": 0.5797873735427856, "learning_rate": 3.423217598612025e-06, "loss": 0.0613, "step": 44451 }, { "epoch": 0.7872243399876919, "grad_norm": 0.5924357771873474, "learning_rate": 3.4226705264587915e-06, "loss": 0.0548, "step": 44452 }, { "epoch": 0.7872420495247203, "grad_norm": 0.4192774295806885, "learning_rate": 3.4221234923937986e-06, "loss": 0.0539, "step": 44453 }, { "epoch": 0.7872597590617487, "grad_norm": 0.6631819605827332, "learning_rate": 3.4215764964188488e-06, "loss": 0.0384, "step": 44454 }, { "epoch": 0.7872774685987771, "grad_norm": 0.7338590621948242, "learning_rate": 3.4210295385357472e-06, "loss": 0.0527, "step": 44455 }, { "epoch": 0.7872951781358056, "grad_norm": 0.8405632972717285, "learning_rate": 3.4204826187462817e-06, "loss": 0.0606, "step": 44456 }, { "epoch": 0.787312887672834, "grad_norm": 0.6332139372825623, "learning_rate": 3.4199357370522564e-06, "loss": 0.0329, "step": 44457 }, { "epoch": 0.7873305972098624, "grad_norm": 0.6675181984901428, "learning_rate": 3.419388893455477e-06, "loss": 0.0757, "step": 44458 }, { "epoch": 0.7873483067468908, "grad_norm": 0.6606918573379517, "learning_rate": 3.4188420879577326e-06, "loss": 0.0606, "step": 44459 }, { "epoch": 0.7873660162839193, "grad_norm": 0.7715613842010498, "learning_rate": 3.418295320560821e-06, "loss": 0.0825, "step": 44460 }, { "epoch": 0.7873837258209477, "grad_norm": 0.6828979253768921, "learning_rate": 3.4177485912665516e-06, "loss": 0.067, "step": 44461 }, { "epoch": 0.7874014353579761, "grad_norm": 0.9908037185668945, "learning_rate": 3.4172019000767223e-06, "loss": 0.0457, "step": 44462 }, { "epoch": 0.7874191448950045, "grad_norm": 0.5352133512496948, "learning_rate": 3.416655246993121e-06, "loss": 0.0615, "step": 44463 }, { "epoch": 0.787436854432033, "grad_norm": 0.5370920896530151, "learning_rate": 3.416108632017555e-06, "loss": 0.0294, "step": 44464 }, { "epoch": 0.7874545639690614, "grad_norm": 0.2945830225944519, "learning_rate": 3.4155620551518176e-06, "loss": 0.0262, "step": 44465 }, { "epoch": 0.7874722735060898, "grad_norm": 0.47898006439208984, "learning_rate": 3.4150155163977153e-06, "loss": 0.0637, "step": 44466 }, { "epoch": 0.7874899830431183, "grad_norm": 0.4340122938156128, "learning_rate": 3.4144690157570357e-06, "loss": 0.0497, "step": 44467 }, { "epoch": 0.7875076925801467, "grad_norm": 0.688874363899231, "learning_rate": 3.4139225532315805e-06, "loss": 0.0504, "step": 44468 }, { "epoch": 0.7875254021171751, "grad_norm": 0.5355106592178345, "learning_rate": 3.413376128823148e-06, "loss": 0.0524, "step": 44469 }, { "epoch": 0.7875431116542035, "grad_norm": 0.5414287447929382, "learning_rate": 3.412829742533537e-06, "loss": 0.0761, "step": 44470 }, { "epoch": 0.787560821191232, "grad_norm": 0.7269580364227295, "learning_rate": 3.4122833943645444e-06, "loss": 0.0563, "step": 44471 }, { "epoch": 0.7875785307282605, "grad_norm": 0.5248631238937378, "learning_rate": 3.411737084317967e-06, "loss": 0.0461, "step": 44472 }, { "epoch": 0.7875962402652888, "grad_norm": 0.45631423592567444, "learning_rate": 3.4111908123956065e-06, "loss": 0.037, "step": 44473 }, { "epoch": 0.7876139498023172, "grad_norm": 0.3399423062801361, "learning_rate": 3.410644578599252e-06, "loss": 0.039, "step": 44474 }, { "epoch": 0.7876316593393458, "grad_norm": 0.7989203929901123, "learning_rate": 3.4100983829307046e-06, "loss": 0.0779, "step": 44475 }, { "epoch": 0.7876493688763742, "grad_norm": 0.4575316905975342, "learning_rate": 3.4095522253917656e-06, "loss": 0.0447, "step": 44476 }, { "epoch": 0.7876670784134026, "grad_norm": 0.3573606014251709, "learning_rate": 3.4090061059842225e-06, "loss": 0.03, "step": 44477 }, { "epoch": 0.787684787950431, "grad_norm": 0.34694674611091614, "learning_rate": 3.408460024709878e-06, "loss": 0.0524, "step": 44478 }, { "epoch": 0.7877024974874595, "grad_norm": 0.1947551816701889, "learning_rate": 3.407913981570527e-06, "loss": 0.032, "step": 44479 }, { "epoch": 0.7877202070244879, "grad_norm": 0.40869376063346863, "learning_rate": 3.407367976567966e-06, "loss": 0.0748, "step": 44480 }, { "epoch": 0.7877379165615163, "grad_norm": 0.8449699878692627, "learning_rate": 3.4068220097039943e-06, "loss": 0.1014, "step": 44481 }, { "epoch": 0.7877556260985448, "grad_norm": 1.0328327417373657, "learning_rate": 3.4062760809804033e-06, "loss": 0.0788, "step": 44482 }, { "epoch": 0.7877733356355732, "grad_norm": 0.522639811038971, "learning_rate": 3.405730190398995e-06, "loss": 0.0472, "step": 44483 }, { "epoch": 0.7877910451726016, "grad_norm": 0.5362998843193054, "learning_rate": 3.40518433796156e-06, "loss": 0.0794, "step": 44484 }, { "epoch": 0.78780875470963, "grad_norm": 0.5855556726455688, "learning_rate": 3.4046385236698947e-06, "loss": 0.0656, "step": 44485 }, { "epoch": 0.7878264642466585, "grad_norm": 0.6281295418739319, "learning_rate": 3.4040927475257942e-06, "loss": 0.0478, "step": 44486 }, { "epoch": 0.7878441737836869, "grad_norm": 0.6226791739463806, "learning_rate": 3.403547009531063e-06, "loss": 0.0576, "step": 44487 }, { "epoch": 0.7878618833207153, "grad_norm": 0.5797099471092224, "learning_rate": 3.403001309687482e-06, "loss": 0.0654, "step": 44488 }, { "epoch": 0.7878795928577437, "grad_norm": 0.5902485251426697, "learning_rate": 3.4024556479968517e-06, "loss": 0.0528, "step": 44489 }, { "epoch": 0.7878973023947722, "grad_norm": 0.32853516936302185, "learning_rate": 3.4019100244609756e-06, "loss": 0.0546, "step": 44490 }, { "epoch": 0.7879150119318006, "grad_norm": 0.4617099463939667, "learning_rate": 3.4013644390816396e-06, "loss": 0.0444, "step": 44491 }, { "epoch": 0.787932721468829, "grad_norm": 0.7013990879058838, "learning_rate": 3.4008188918606415e-06, "loss": 0.0578, "step": 44492 }, { "epoch": 0.7879504310058574, "grad_norm": 0.5309001207351685, "learning_rate": 3.4002733827997766e-06, "loss": 0.056, "step": 44493 }, { "epoch": 0.7879681405428859, "grad_norm": 0.5054050087928772, "learning_rate": 3.3997279119008424e-06, "loss": 0.0646, "step": 44494 }, { "epoch": 0.7879858500799143, "grad_norm": 0.5510944724082947, "learning_rate": 3.3991824791656268e-06, "loss": 0.0389, "step": 44495 }, { "epoch": 0.7880035596169427, "grad_norm": 0.9605177044868469, "learning_rate": 3.398637084595925e-06, "loss": 0.0725, "step": 44496 }, { "epoch": 0.7880212691539712, "grad_norm": 0.4245721399784088, "learning_rate": 3.398091728193536e-06, "loss": 0.0357, "step": 44497 }, { "epoch": 0.7880389786909996, "grad_norm": 0.757390022277832, "learning_rate": 3.3975464099602522e-06, "loss": 0.0451, "step": 44498 }, { "epoch": 0.788056688228028, "grad_norm": 0.4881425201892853, "learning_rate": 3.397001129897866e-06, "loss": 0.071, "step": 44499 }, { "epoch": 0.7880743977650564, "grad_norm": 0.4909774661064148, "learning_rate": 3.396455888008173e-06, "loss": 0.0802, "step": 44500 }, { "epoch": 0.7880921073020849, "grad_norm": 0.5848280787467957, "learning_rate": 3.3959106842929707e-06, "loss": 0.0637, "step": 44501 }, { "epoch": 0.7881098168391133, "grad_norm": 0.23620232939720154, "learning_rate": 3.3953655187540433e-06, "loss": 0.0489, "step": 44502 }, { "epoch": 0.7881275263761417, "grad_norm": 0.36592742800712585, "learning_rate": 3.39482039139319e-06, "loss": 0.0452, "step": 44503 }, { "epoch": 0.7881452359131701, "grad_norm": 0.6182843446731567, "learning_rate": 3.394275302212205e-06, "loss": 0.063, "step": 44504 }, { "epoch": 0.7881629454501986, "grad_norm": 0.41651713848114014, "learning_rate": 3.393730251212884e-06, "loss": 0.0575, "step": 44505 }, { "epoch": 0.788180654987227, "grad_norm": 0.29132816195487976, "learning_rate": 3.3931852383970124e-06, "loss": 0.0379, "step": 44506 }, { "epoch": 0.7881983645242554, "grad_norm": 0.4920874834060669, "learning_rate": 3.392640263766386e-06, "loss": 0.0583, "step": 44507 }, { "epoch": 0.7882160740612838, "grad_norm": 0.18548347055912018, "learning_rate": 3.3920953273228002e-06, "loss": 0.0436, "step": 44508 }, { "epoch": 0.7882337835983123, "grad_norm": 1.0023096799850464, "learning_rate": 3.3915504290680476e-06, "loss": 0.0829, "step": 44509 }, { "epoch": 0.7882514931353407, "grad_norm": 0.42788130044937134, "learning_rate": 3.3910055690039192e-06, "loss": 0.0404, "step": 44510 }, { "epoch": 0.7882692026723691, "grad_norm": 0.2493063360452652, "learning_rate": 3.3904607471322087e-06, "loss": 0.0506, "step": 44511 }, { "epoch": 0.7882869122093976, "grad_norm": 0.8552393317222595, "learning_rate": 3.389915963454712e-06, "loss": 0.0479, "step": 44512 }, { "epoch": 0.788304621746426, "grad_norm": 0.4324756860733032, "learning_rate": 3.3893712179732123e-06, "loss": 0.0531, "step": 44513 }, { "epoch": 0.7883223312834544, "grad_norm": 0.8459835052490234, "learning_rate": 3.388826510689509e-06, "loss": 0.0726, "step": 44514 }, { "epoch": 0.7883400408204828, "grad_norm": 0.746062159538269, "learning_rate": 3.388281841605394e-06, "loss": 0.0542, "step": 44515 }, { "epoch": 0.7883577503575113, "grad_norm": 0.7710441946983337, "learning_rate": 3.3877372107226545e-06, "loss": 0.0321, "step": 44516 }, { "epoch": 0.7883754598945397, "grad_norm": 0.3275039792060852, "learning_rate": 3.387192618043084e-06, "loss": 0.0362, "step": 44517 }, { "epoch": 0.7883931694315681, "grad_norm": 0.6486694812774658, "learning_rate": 3.3866480635684725e-06, "loss": 0.0578, "step": 44518 }, { "epoch": 0.7884108789685965, "grad_norm": 0.3965050280094147, "learning_rate": 3.3861035473006225e-06, "loss": 0.0662, "step": 44519 }, { "epoch": 0.788428588505625, "grad_norm": 0.5451819896697998, "learning_rate": 3.385559069241312e-06, "loss": 0.0524, "step": 44520 }, { "epoch": 0.7884462980426534, "grad_norm": 0.3873072564601898, "learning_rate": 3.385014629392338e-06, "loss": 0.053, "step": 44521 }, { "epoch": 0.7884640075796818, "grad_norm": 0.7664228081703186, "learning_rate": 3.384470227755496e-06, "loss": 0.0643, "step": 44522 }, { "epoch": 0.7884817171167102, "grad_norm": 0.5645275115966797, "learning_rate": 3.3839258643325656e-06, "loss": 0.058, "step": 44523 }, { "epoch": 0.7884994266537387, "grad_norm": 0.4871455132961273, "learning_rate": 3.3833815391253458e-06, "loss": 0.0537, "step": 44524 }, { "epoch": 0.7885171361907671, "grad_norm": 0.5938606858253479, "learning_rate": 3.3828372521356253e-06, "loss": 0.045, "step": 44525 }, { "epoch": 0.7885348457277955, "grad_norm": 0.8769341707229614, "learning_rate": 3.382293003365196e-06, "loss": 0.0591, "step": 44526 }, { "epoch": 0.788552555264824, "grad_norm": 0.6142812371253967, "learning_rate": 3.381748792815848e-06, "loss": 0.052, "step": 44527 }, { "epoch": 0.7885702648018524, "grad_norm": 0.7714364528656006, "learning_rate": 3.38120462048937e-06, "loss": 0.0757, "step": 44528 }, { "epoch": 0.7885879743388808, "grad_norm": 0.3379790484905243, "learning_rate": 3.3806604863875533e-06, "loss": 0.067, "step": 44529 }, { "epoch": 0.7886056838759092, "grad_norm": 0.5785461068153381, "learning_rate": 3.380116390512194e-06, "loss": 0.0715, "step": 44530 }, { "epoch": 0.7886233934129377, "grad_norm": 0.8185378909111023, "learning_rate": 3.379572332865072e-06, "loss": 0.059, "step": 44531 }, { "epoch": 0.7886411029499661, "grad_norm": 0.21942029893398285, "learning_rate": 3.379028313447982e-06, "loss": 0.0633, "step": 44532 }, { "epoch": 0.7886588124869945, "grad_norm": 0.2680102288722992, "learning_rate": 3.378484332262717e-06, "loss": 0.0307, "step": 44533 }, { "epoch": 0.7886765220240229, "grad_norm": 0.6594486236572266, "learning_rate": 3.3779403893110595e-06, "loss": 0.0527, "step": 44534 }, { "epoch": 0.7886942315610515, "grad_norm": 0.33077725768089294, "learning_rate": 3.3773964845948018e-06, "loss": 0.0357, "step": 44535 }, { "epoch": 0.7887119410980798, "grad_norm": 0.7514852285385132, "learning_rate": 3.3768526181157344e-06, "loss": 0.0541, "step": 44536 }, { "epoch": 0.7887296506351082, "grad_norm": 0.5469420552253723, "learning_rate": 3.3763087898756455e-06, "loss": 0.062, "step": 44537 }, { "epoch": 0.7887473601721366, "grad_norm": 0.3796943128108978, "learning_rate": 3.3757649998763256e-06, "loss": 0.0434, "step": 44538 }, { "epoch": 0.7887650697091652, "grad_norm": 0.6509431004524231, "learning_rate": 3.3752212481195632e-06, "loss": 0.0638, "step": 44539 }, { "epoch": 0.7887827792461936, "grad_norm": 0.5329234600067139, "learning_rate": 3.3746775346071516e-06, "loss": 0.0883, "step": 44540 }, { "epoch": 0.788800488783222, "grad_norm": 0.7100207209587097, "learning_rate": 3.3741338593408716e-06, "loss": 0.0476, "step": 44541 }, { "epoch": 0.7888181983202505, "grad_norm": 1.1946924924850464, "learning_rate": 3.3735902223225147e-06, "loss": 0.0592, "step": 44542 }, { "epoch": 0.7888359078572789, "grad_norm": 0.6790754795074463, "learning_rate": 3.3730466235538693e-06, "loss": 0.0608, "step": 44543 }, { "epoch": 0.7888536173943073, "grad_norm": 0.546393096446991, "learning_rate": 3.372503063036729e-06, "loss": 0.0723, "step": 44544 }, { "epoch": 0.7888713269313357, "grad_norm": 0.45891156792640686, "learning_rate": 3.3719595407728746e-06, "loss": 0.0553, "step": 44545 }, { "epoch": 0.7888890364683642, "grad_norm": 0.20870140194892883, "learning_rate": 3.371416056764096e-06, "loss": 0.0527, "step": 44546 }, { "epoch": 0.7889067460053926, "grad_norm": 0.45348429679870605, "learning_rate": 3.3708726110121817e-06, "loss": 0.0471, "step": 44547 }, { "epoch": 0.788924455542421, "grad_norm": 0.5768564939498901, "learning_rate": 3.370329203518922e-06, "loss": 0.0594, "step": 44548 }, { "epoch": 0.7889421650794494, "grad_norm": 0.225082129240036, "learning_rate": 3.3697858342861023e-06, "loss": 0.0407, "step": 44549 }, { "epoch": 0.7889598746164779, "grad_norm": 0.5683297514915466, "learning_rate": 3.3692425033155104e-06, "loss": 0.0248, "step": 44550 }, { "epoch": 0.7889775841535063, "grad_norm": 0.604846715927124, "learning_rate": 3.368699210608939e-06, "loss": 0.0507, "step": 44551 }, { "epoch": 0.7889952936905347, "grad_norm": 0.34675332903862, "learning_rate": 3.3681559561681666e-06, "loss": 0.0508, "step": 44552 }, { "epoch": 0.7890130032275631, "grad_norm": 0.6987380981445312, "learning_rate": 3.3676127399949837e-06, "loss": 0.0755, "step": 44553 }, { "epoch": 0.7890307127645916, "grad_norm": 0.9005569815635681, "learning_rate": 3.367069562091183e-06, "loss": 0.0517, "step": 44554 }, { "epoch": 0.78904842230162, "grad_norm": 0.631432294845581, "learning_rate": 3.3665264224585385e-06, "loss": 0.0742, "step": 44555 }, { "epoch": 0.7890661318386484, "grad_norm": 0.5888166427612305, "learning_rate": 3.3659833210988504e-06, "loss": 0.0352, "step": 44556 }, { "epoch": 0.7890838413756769, "grad_norm": 0.8105413317680359, "learning_rate": 3.3654402580138995e-06, "loss": 0.0732, "step": 44557 }, { "epoch": 0.7891015509127053, "grad_norm": 0.6970917582511902, "learning_rate": 3.3648972332054785e-06, "loss": 0.0422, "step": 44558 }, { "epoch": 0.7891192604497337, "grad_norm": 0.4463626742362976, "learning_rate": 3.3643542466753645e-06, "loss": 0.0336, "step": 44559 }, { "epoch": 0.7891369699867621, "grad_norm": 0.5146438479423523, "learning_rate": 3.3638112984253465e-06, "loss": 0.0437, "step": 44560 }, { "epoch": 0.7891546795237906, "grad_norm": 0.5239793658256531, "learning_rate": 3.363268388457218e-06, "loss": 0.0589, "step": 44561 }, { "epoch": 0.789172389060819, "grad_norm": 0.7365494966506958, "learning_rate": 3.3627255167727556e-06, "loss": 0.0636, "step": 44562 }, { "epoch": 0.7891900985978474, "grad_norm": 0.7629952430725098, "learning_rate": 3.362182683373748e-06, "loss": 0.0648, "step": 44563 }, { "epoch": 0.7892078081348758, "grad_norm": 0.46467727422714233, "learning_rate": 3.3616398882619835e-06, "loss": 0.0556, "step": 44564 }, { "epoch": 0.7892255176719043, "grad_norm": 0.4341232478618622, "learning_rate": 3.3610971314392465e-06, "loss": 0.0424, "step": 44565 }, { "epoch": 0.7892432272089327, "grad_norm": 0.5354111194610596, "learning_rate": 3.360554412907323e-06, "loss": 0.0512, "step": 44566 }, { "epoch": 0.7892609367459611, "grad_norm": 0.3941075801849365, "learning_rate": 3.360011732667997e-06, "loss": 0.065, "step": 44567 }, { "epoch": 0.7892786462829895, "grad_norm": 0.48028191924095154, "learning_rate": 3.3594690907230554e-06, "loss": 0.0803, "step": 44568 }, { "epoch": 0.789296355820018, "grad_norm": 0.4743087589740753, "learning_rate": 3.358926487074288e-06, "loss": 0.06, "step": 44569 }, { "epoch": 0.7893140653570464, "grad_norm": 0.9078664779663086, "learning_rate": 3.358383921723469e-06, "loss": 0.0637, "step": 44570 }, { "epoch": 0.7893317748940748, "grad_norm": 0.7414756417274475, "learning_rate": 3.357841394672392e-06, "loss": 0.0609, "step": 44571 }, { "epoch": 0.7893494844311033, "grad_norm": 0.5049641728401184, "learning_rate": 3.3572989059228413e-06, "loss": 0.0593, "step": 44572 }, { "epoch": 0.7893671939681317, "grad_norm": 0.5851755738258362, "learning_rate": 3.3567564554765965e-06, "loss": 0.0809, "step": 44573 }, { "epoch": 0.7893849035051601, "grad_norm": 0.4502737820148468, "learning_rate": 3.356214043335446e-06, "loss": 0.0633, "step": 44574 }, { "epoch": 0.7894026130421885, "grad_norm": 0.5725229978561401, "learning_rate": 3.355671669501173e-06, "loss": 0.0342, "step": 44575 }, { "epoch": 0.789420322579217, "grad_norm": 0.4818815290927887, "learning_rate": 3.355129333975561e-06, "loss": 0.0811, "step": 44576 }, { "epoch": 0.7894380321162454, "grad_norm": 0.829164445400238, "learning_rate": 3.3545870367603976e-06, "loss": 0.0692, "step": 44577 }, { "epoch": 0.7894557416532738, "grad_norm": 0.4198557138442993, "learning_rate": 3.354044777857464e-06, "loss": 0.0355, "step": 44578 }, { "epoch": 0.7894734511903022, "grad_norm": 0.5105556845664978, "learning_rate": 3.3535025572685505e-06, "loss": 0.0767, "step": 44579 }, { "epoch": 0.7894911607273307, "grad_norm": 0.5156760215759277, "learning_rate": 3.3529603749954297e-06, "loss": 0.042, "step": 44580 }, { "epoch": 0.7895088702643591, "grad_norm": 0.642018735408783, "learning_rate": 3.3524182310398928e-06, "loss": 0.0703, "step": 44581 }, { "epoch": 0.7895265798013875, "grad_norm": 0.2684513330459595, "learning_rate": 3.351876125403722e-06, "loss": 0.0327, "step": 44582 }, { "epoch": 0.7895442893384159, "grad_norm": 0.5350306034088135, "learning_rate": 3.3513340580887035e-06, "loss": 0.0419, "step": 44583 }, { "epoch": 0.7895619988754444, "grad_norm": 0.3736027777194977, "learning_rate": 3.3507920290966104e-06, "loss": 0.0336, "step": 44584 }, { "epoch": 0.7895797084124728, "grad_norm": 0.5883184671401978, "learning_rate": 3.350250038429236e-06, "loss": 0.0552, "step": 44585 }, { "epoch": 0.7895974179495012, "grad_norm": 0.39854949712753296, "learning_rate": 3.349708086088366e-06, "loss": 0.0486, "step": 44586 }, { "epoch": 0.7896151274865297, "grad_norm": 0.6415925025939941, "learning_rate": 3.3491661720757736e-06, "loss": 0.0298, "step": 44587 }, { "epoch": 0.7896328370235581, "grad_norm": 1.0924208164215088, "learning_rate": 3.3486242963932457e-06, "loss": 0.0641, "step": 44588 }, { "epoch": 0.7896505465605865, "grad_norm": 0.3926309645175934, "learning_rate": 3.348082459042567e-06, "loss": 0.0635, "step": 44589 }, { "epoch": 0.7896682560976149, "grad_norm": 0.6466324925422668, "learning_rate": 3.347540660025521e-06, "loss": 0.0641, "step": 44590 }, { "epoch": 0.7896859656346434, "grad_norm": 0.26994502544403076, "learning_rate": 3.3469988993438833e-06, "loss": 0.0563, "step": 44591 }, { "epoch": 0.7897036751716718, "grad_norm": 0.6536346673965454, "learning_rate": 3.3464571769994423e-06, "loss": 0.0522, "step": 44592 }, { "epoch": 0.7897213847087002, "grad_norm": 0.4757022261619568, "learning_rate": 3.3459154929939765e-06, "loss": 0.0598, "step": 44593 }, { "epoch": 0.7897390942457286, "grad_norm": 0.5948630571365356, "learning_rate": 3.3453738473292716e-06, "loss": 0.0633, "step": 44594 }, { "epoch": 0.7897568037827571, "grad_norm": 0.4906165599822998, "learning_rate": 3.3448322400071072e-06, "loss": 0.0643, "step": 44595 }, { "epoch": 0.7897745133197855, "grad_norm": 0.5581479668617249, "learning_rate": 3.344290671029266e-06, "loss": 0.0451, "step": 44596 }, { "epoch": 0.7897922228568139, "grad_norm": 0.726063072681427, "learning_rate": 3.343749140397534e-06, "loss": 0.0679, "step": 44597 }, { "epoch": 0.7898099323938423, "grad_norm": 0.4795842170715332, "learning_rate": 3.3432076481136835e-06, "loss": 0.0527, "step": 44598 }, { "epoch": 0.7898276419308708, "grad_norm": 0.9640733599662781, "learning_rate": 3.3426661941795017e-06, "loss": 0.0664, "step": 44599 }, { "epoch": 0.7898453514678992, "grad_norm": 0.628437876701355, "learning_rate": 3.3421247785967686e-06, "loss": 0.0508, "step": 44600 }, { "epoch": 0.7898630610049276, "grad_norm": 0.6591492891311646, "learning_rate": 3.341583401367269e-06, "loss": 0.0401, "step": 44601 }, { "epoch": 0.7898807705419562, "grad_norm": 0.7484248280525208, "learning_rate": 3.341042062492779e-06, "loss": 0.0596, "step": 44602 }, { "epoch": 0.7898984800789846, "grad_norm": 0.39050033688545227, "learning_rate": 3.34050076197508e-06, "loss": 0.0579, "step": 44603 }, { "epoch": 0.789916189616013, "grad_norm": 0.4907255172729492, "learning_rate": 3.339959499815954e-06, "loss": 0.0598, "step": 44604 }, { "epoch": 0.7899338991530414, "grad_norm": 0.5584920048713684, "learning_rate": 3.3394182760171825e-06, "loss": 0.0414, "step": 44605 }, { "epoch": 0.7899516086900699, "grad_norm": 0.5449937582015991, "learning_rate": 3.338877090580545e-06, "loss": 0.0514, "step": 44606 }, { "epoch": 0.7899693182270983, "grad_norm": 0.2998754382133484, "learning_rate": 3.3383359435078224e-06, "loss": 0.0612, "step": 44607 }, { "epoch": 0.7899870277641267, "grad_norm": 1.0408799648284912, "learning_rate": 3.337794834800799e-06, "loss": 0.0566, "step": 44608 }, { "epoch": 0.7900047373011551, "grad_norm": 0.1674508899450302, "learning_rate": 3.337253764461248e-06, "loss": 0.0449, "step": 44609 }, { "epoch": 0.7900224468381836, "grad_norm": 0.5564995408058167, "learning_rate": 3.336712732490952e-06, "loss": 0.0638, "step": 44610 }, { "epoch": 0.790040156375212, "grad_norm": 0.4600258469581604, "learning_rate": 3.336171738891695e-06, "loss": 0.0496, "step": 44611 }, { "epoch": 0.7900578659122404, "grad_norm": 0.6731433272361755, "learning_rate": 3.3356307836652498e-06, "loss": 0.0614, "step": 44612 }, { "epoch": 0.7900755754492688, "grad_norm": 0.3400406837463379, "learning_rate": 3.335089866813395e-06, "loss": 0.0587, "step": 44613 }, { "epoch": 0.7900932849862973, "grad_norm": 0.5500396490097046, "learning_rate": 3.334548988337919e-06, "loss": 0.0737, "step": 44614 }, { "epoch": 0.7901109945233257, "grad_norm": 0.5728697776794434, "learning_rate": 3.3340081482406e-06, "loss": 0.0498, "step": 44615 }, { "epoch": 0.7901287040603541, "grad_norm": 0.6034665107727051, "learning_rate": 3.3334673465232117e-06, "loss": 0.0605, "step": 44616 }, { "epoch": 0.7901464135973826, "grad_norm": 0.22545267641544342, "learning_rate": 3.332926583187535e-06, "loss": 0.0308, "step": 44617 }, { "epoch": 0.790164123134411, "grad_norm": 0.4632902443408966, "learning_rate": 3.3323858582353547e-06, "loss": 0.0608, "step": 44618 }, { "epoch": 0.7901818326714394, "grad_norm": 0.31749197840690613, "learning_rate": 3.3318451716684408e-06, "loss": 0.0536, "step": 44619 }, { "epoch": 0.7901995422084678, "grad_norm": 0.6862152218818665, "learning_rate": 3.331304523488575e-06, "loss": 0.0471, "step": 44620 }, { "epoch": 0.7902172517454963, "grad_norm": 0.4819049835205078, "learning_rate": 3.330763913697537e-06, "loss": 0.0526, "step": 44621 }, { "epoch": 0.7902349612825247, "grad_norm": 0.40282300114631653, "learning_rate": 3.330223342297106e-06, "loss": 0.0487, "step": 44622 }, { "epoch": 0.7902526708195531, "grad_norm": 0.6270173788070679, "learning_rate": 3.3296828092890596e-06, "loss": 0.03, "step": 44623 }, { "epoch": 0.7902703803565815, "grad_norm": 0.4062320291996002, "learning_rate": 3.3291423146751776e-06, "loss": 0.082, "step": 44624 }, { "epoch": 0.79028808989361, "grad_norm": 0.16719605028629303, "learning_rate": 3.32860185845724e-06, "loss": 0.0259, "step": 44625 }, { "epoch": 0.7903057994306384, "grad_norm": 0.46369698643684387, "learning_rate": 3.328061440637018e-06, "loss": 0.0397, "step": 44626 }, { "epoch": 0.7903235089676668, "grad_norm": 0.45330995321273804, "learning_rate": 3.327521061216293e-06, "loss": 0.0453, "step": 44627 }, { "epoch": 0.7903412185046952, "grad_norm": 0.6187006831169128, "learning_rate": 3.326980720196843e-06, "loss": 0.033, "step": 44628 }, { "epoch": 0.7903589280417237, "grad_norm": 0.5587481260299683, "learning_rate": 3.32644041758045e-06, "loss": 0.0543, "step": 44629 }, { "epoch": 0.7903766375787521, "grad_norm": 0.9219740033149719, "learning_rate": 3.325900153368884e-06, "loss": 0.057, "step": 44630 }, { "epoch": 0.7903943471157805, "grad_norm": 0.5612224340438843, "learning_rate": 3.3253599275639236e-06, "loss": 0.0602, "step": 44631 }, { "epoch": 0.790412056652809, "grad_norm": 0.7412306070327759, "learning_rate": 3.3248197401673495e-06, "loss": 0.058, "step": 44632 }, { "epoch": 0.7904297661898374, "grad_norm": 0.7034134864807129, "learning_rate": 3.3242795911809392e-06, "loss": 0.0633, "step": 44633 }, { "epoch": 0.7904474757268658, "grad_norm": 0.5891829133033752, "learning_rate": 3.323739480606466e-06, "loss": 0.0465, "step": 44634 }, { "epoch": 0.7904651852638942, "grad_norm": 0.8554032444953918, "learning_rate": 3.3231994084457098e-06, "loss": 0.0648, "step": 44635 }, { "epoch": 0.7904828948009227, "grad_norm": 0.5919760465621948, "learning_rate": 3.3226593747004507e-06, "loss": 0.0746, "step": 44636 }, { "epoch": 0.7905006043379511, "grad_norm": 0.663139283657074, "learning_rate": 3.3221193793724565e-06, "loss": 0.0597, "step": 44637 }, { "epoch": 0.7905183138749795, "grad_norm": 0.6060308814048767, "learning_rate": 3.321579422463508e-06, "loss": 0.0548, "step": 44638 }, { "epoch": 0.7905360234120079, "grad_norm": 0.6878188848495483, "learning_rate": 3.3210395039753833e-06, "loss": 0.047, "step": 44639 }, { "epoch": 0.7905537329490364, "grad_norm": 0.6483763456344604, "learning_rate": 3.32049962390986e-06, "loss": 0.037, "step": 44640 }, { "epoch": 0.7905714424860648, "grad_norm": 0.5174648761749268, "learning_rate": 3.319959782268709e-06, "loss": 0.0535, "step": 44641 }, { "epoch": 0.7905891520230932, "grad_norm": 0.9483635425567627, "learning_rate": 3.319419979053705e-06, "loss": 0.0861, "step": 44642 }, { "epoch": 0.7906068615601216, "grad_norm": 0.794489324092865, "learning_rate": 3.318880214266635e-06, "loss": 0.06, "step": 44643 }, { "epoch": 0.7906245710971501, "grad_norm": 0.8516216278076172, "learning_rate": 3.3183404879092633e-06, "loss": 0.0516, "step": 44644 }, { "epoch": 0.7906422806341785, "grad_norm": 0.8378404378890991, "learning_rate": 3.317800799983371e-06, "loss": 0.0569, "step": 44645 }, { "epoch": 0.7906599901712069, "grad_norm": 0.7708861827850342, "learning_rate": 3.317261150490732e-06, "loss": 0.0751, "step": 44646 }, { "epoch": 0.7906776997082354, "grad_norm": 0.7316734194755554, "learning_rate": 3.3167215394331246e-06, "loss": 0.0672, "step": 44647 }, { "epoch": 0.7906954092452638, "grad_norm": 0.748764157295227, "learning_rate": 3.3161819668123195e-06, "loss": 0.0693, "step": 44648 }, { "epoch": 0.7907131187822922, "grad_norm": 0.4208715856075287, "learning_rate": 3.3156424326300932e-06, "loss": 0.0524, "step": 44649 }, { "epoch": 0.7907308283193206, "grad_norm": 0.8255398273468018, "learning_rate": 3.3151029368882228e-06, "loss": 0.0685, "step": 44650 }, { "epoch": 0.7907485378563491, "grad_norm": 0.37989673018455505, "learning_rate": 3.3145634795884806e-06, "loss": 0.035, "step": 44651 }, { "epoch": 0.7907662473933775, "grad_norm": 0.6028275489807129, "learning_rate": 3.314024060732643e-06, "loss": 0.0492, "step": 44652 }, { "epoch": 0.7907839569304059, "grad_norm": 0.1986403912305832, "learning_rate": 3.313484680322485e-06, "loss": 0.0529, "step": 44653 }, { "epoch": 0.7908016664674343, "grad_norm": 0.73371422290802, "learning_rate": 3.3129453383597823e-06, "loss": 0.0727, "step": 44654 }, { "epoch": 0.7908193760044628, "grad_norm": 0.768985390663147, "learning_rate": 3.3124060348463053e-06, "loss": 0.0843, "step": 44655 }, { "epoch": 0.7908370855414912, "grad_norm": 0.653190553188324, "learning_rate": 3.311866769783829e-06, "loss": 0.0531, "step": 44656 }, { "epoch": 0.7908547950785196, "grad_norm": 0.6195728778839111, "learning_rate": 3.311327543174135e-06, "loss": 0.0427, "step": 44657 }, { "epoch": 0.790872504615548, "grad_norm": 0.5033512115478516, "learning_rate": 3.3107883550189845e-06, "loss": 0.0463, "step": 44658 }, { "epoch": 0.7908902141525765, "grad_norm": 0.5773958563804626, "learning_rate": 3.31024920532016e-06, "loss": 0.0706, "step": 44659 }, { "epoch": 0.7909079236896049, "grad_norm": 0.5580124855041504, "learning_rate": 3.3097100940794325e-06, "loss": 0.0504, "step": 44660 }, { "epoch": 0.7909256332266333, "grad_norm": 0.46408766508102417, "learning_rate": 3.3091710212985764e-06, "loss": 0.0359, "step": 44661 }, { "epoch": 0.7909433427636618, "grad_norm": 0.5550956130027771, "learning_rate": 3.3086319869793646e-06, "loss": 0.0615, "step": 44662 }, { "epoch": 0.7909610523006902, "grad_norm": 0.40068909525871277, "learning_rate": 3.3080929911235718e-06, "loss": 0.042, "step": 44663 }, { "epoch": 0.7909787618377186, "grad_norm": 0.5301764011383057, "learning_rate": 3.307554033732975e-06, "loss": 0.0584, "step": 44664 }, { "epoch": 0.790996471374747, "grad_norm": 0.7978678941726685, "learning_rate": 3.307015114809338e-06, "loss": 0.0728, "step": 44665 }, { "epoch": 0.7910141809117756, "grad_norm": 0.6657440662384033, "learning_rate": 3.3064762343544375e-06, "loss": 0.0483, "step": 44666 }, { "epoch": 0.791031890448804, "grad_norm": 0.3411130905151367, "learning_rate": 3.3059373923700485e-06, "loss": 0.0682, "step": 44667 }, { "epoch": 0.7910495999858324, "grad_norm": 0.4484464228153229, "learning_rate": 3.3053985888579483e-06, "loss": 0.0703, "step": 44668 }, { "epoch": 0.7910673095228608, "grad_norm": 0.38739150762557983, "learning_rate": 3.3048598238198974e-06, "loss": 0.0554, "step": 44669 }, { "epoch": 0.7910850190598893, "grad_norm": 0.5676880478858948, "learning_rate": 3.304321097257677e-06, "loss": 0.0618, "step": 44670 }, { "epoch": 0.7911027285969177, "grad_norm": 0.6970704197883606, "learning_rate": 3.303782409173051e-06, "loss": 0.0538, "step": 44671 }, { "epoch": 0.7911204381339461, "grad_norm": 0.8464009165763855, "learning_rate": 3.303243759567807e-06, "loss": 0.0891, "step": 44672 }, { "epoch": 0.7911381476709745, "grad_norm": 0.5074048042297363, "learning_rate": 3.3027051484437036e-06, "loss": 0.053, "step": 44673 }, { "epoch": 0.791155857208003, "grad_norm": 0.8778814077377319, "learning_rate": 3.302166575802518e-06, "loss": 0.1003, "step": 44674 }, { "epoch": 0.7911735667450314, "grad_norm": 1.1696175336837769, "learning_rate": 3.3016280416460255e-06, "loss": 0.0595, "step": 44675 }, { "epoch": 0.7911912762820598, "grad_norm": 0.769149124622345, "learning_rate": 3.3010895459759888e-06, "loss": 0.0449, "step": 44676 }, { "epoch": 0.7912089858190883, "grad_norm": 0.6142524480819702, "learning_rate": 3.300551088794183e-06, "loss": 0.0422, "step": 44677 }, { "epoch": 0.7912266953561167, "grad_norm": 0.4814418852329254, "learning_rate": 3.300012670102382e-06, "loss": 0.042, "step": 44678 }, { "epoch": 0.7912444048931451, "grad_norm": 1.066379427909851, "learning_rate": 3.2994742899023554e-06, "loss": 0.0822, "step": 44679 }, { "epoch": 0.7912621144301735, "grad_norm": 0.7536054253578186, "learning_rate": 3.2989359481958758e-06, "loss": 0.0644, "step": 44680 }, { "epoch": 0.791279823967202, "grad_norm": 0.6403450965881348, "learning_rate": 3.298397644984713e-06, "loss": 0.0474, "step": 44681 }, { "epoch": 0.7912975335042304, "grad_norm": 0.590797483921051, "learning_rate": 3.2978593802706425e-06, "loss": 0.0374, "step": 44682 }, { "epoch": 0.7913152430412588, "grad_norm": 0.5373799204826355, "learning_rate": 3.2973211540554278e-06, "loss": 0.0621, "step": 44683 }, { "epoch": 0.7913329525782872, "grad_norm": 0.38342785835266113, "learning_rate": 3.296782966340842e-06, "loss": 0.0517, "step": 44684 }, { "epoch": 0.7913506621153157, "grad_norm": 0.6868420243263245, "learning_rate": 3.2962448171286564e-06, "loss": 0.0515, "step": 44685 }, { "epoch": 0.7913683716523441, "grad_norm": 0.6864299178123474, "learning_rate": 3.2957067064206455e-06, "loss": 0.0574, "step": 44686 }, { "epoch": 0.7913860811893725, "grad_norm": 0.9719308018684387, "learning_rate": 3.2951686342185728e-06, "loss": 0.0612, "step": 44687 }, { "epoch": 0.7914037907264009, "grad_norm": 0.3813597559928894, "learning_rate": 3.2946306005242107e-06, "loss": 0.0478, "step": 44688 }, { "epoch": 0.7914215002634294, "grad_norm": 0.5904645323753357, "learning_rate": 3.2940926053393287e-06, "loss": 0.0459, "step": 44689 }, { "epoch": 0.7914392098004578, "grad_norm": 0.8415873646736145, "learning_rate": 3.2935546486656996e-06, "loss": 0.0541, "step": 44690 }, { "epoch": 0.7914569193374862, "grad_norm": 0.7541161775588989, "learning_rate": 3.2930167305050917e-06, "loss": 0.0504, "step": 44691 }, { "epoch": 0.7914746288745147, "grad_norm": 0.4704791307449341, "learning_rate": 3.2924788508592734e-06, "loss": 0.0541, "step": 44692 }, { "epoch": 0.7914923384115431, "grad_norm": 0.4756304621696472, "learning_rate": 3.291941009730022e-06, "loss": 0.0552, "step": 44693 }, { "epoch": 0.7915100479485715, "grad_norm": 0.7889135479927063, "learning_rate": 3.2914032071190935e-06, "loss": 0.0835, "step": 44694 }, { "epoch": 0.7915277574855999, "grad_norm": 0.42744359374046326, "learning_rate": 3.2908654430282657e-06, "loss": 0.0352, "step": 44695 }, { "epoch": 0.7915454670226284, "grad_norm": 0.7268697619438171, "learning_rate": 3.2903277174593105e-06, "loss": 0.0644, "step": 44696 }, { "epoch": 0.7915631765596568, "grad_norm": 1.2857741117477417, "learning_rate": 3.2897900304139876e-06, "loss": 0.0969, "step": 44697 }, { "epoch": 0.7915808860966852, "grad_norm": 0.568465530872345, "learning_rate": 3.289252381894073e-06, "loss": 0.0558, "step": 44698 }, { "epoch": 0.7915985956337136, "grad_norm": 0.4427953362464905, "learning_rate": 3.2887147719013327e-06, "loss": 0.0553, "step": 44699 }, { "epoch": 0.7916163051707421, "grad_norm": 0.7496073842048645, "learning_rate": 3.288177200437536e-06, "loss": 0.0464, "step": 44700 }, { "epoch": 0.7916340147077705, "grad_norm": 0.8694043159484863, "learning_rate": 3.2876396675044514e-06, "loss": 0.0526, "step": 44701 }, { "epoch": 0.7916517242447989, "grad_norm": 0.7438446283340454, "learning_rate": 3.2871021731038488e-06, "loss": 0.0532, "step": 44702 }, { "epoch": 0.7916694337818274, "grad_norm": 0.3897494673728943, "learning_rate": 3.2865647172374935e-06, "loss": 0.0324, "step": 44703 }, { "epoch": 0.7916871433188558, "grad_norm": 0.5197216272354126, "learning_rate": 3.2860272999071613e-06, "loss": 0.0483, "step": 44704 }, { "epoch": 0.7917048528558842, "grad_norm": 0.5798894762992859, "learning_rate": 3.28548992111461e-06, "loss": 0.0338, "step": 44705 }, { "epoch": 0.7917225623929126, "grad_norm": 0.6426698565483093, "learning_rate": 3.284952580861612e-06, "loss": 0.0575, "step": 44706 }, { "epoch": 0.7917402719299411, "grad_norm": 0.3272903859615326, "learning_rate": 3.284415279149939e-06, "loss": 0.0529, "step": 44707 }, { "epoch": 0.7917579814669695, "grad_norm": 0.8834975361824036, "learning_rate": 3.2838780159813453e-06, "loss": 0.0406, "step": 44708 }, { "epoch": 0.7917756910039979, "grad_norm": 0.47971296310424805, "learning_rate": 3.2833407913576137e-06, "loss": 0.0419, "step": 44709 }, { "epoch": 0.7917934005410263, "grad_norm": 0.4586966633796692, "learning_rate": 3.282803605280506e-06, "loss": 0.0631, "step": 44710 }, { "epoch": 0.7918111100780548, "grad_norm": 0.45810773968696594, "learning_rate": 3.282266457751793e-06, "loss": 0.0246, "step": 44711 }, { "epoch": 0.7918288196150832, "grad_norm": 0.46140018105506897, "learning_rate": 3.281729348773233e-06, "loss": 0.0563, "step": 44712 }, { "epoch": 0.7918465291521116, "grad_norm": 0.5755862593650818, "learning_rate": 3.2811922783465997e-06, "loss": 0.0587, "step": 44713 }, { "epoch": 0.79186423868914, "grad_norm": 0.5634147524833679, "learning_rate": 3.2806552464736615e-06, "loss": 0.0549, "step": 44714 }, { "epoch": 0.7918819482261685, "grad_norm": 1.1014659404754639, "learning_rate": 3.280118253156179e-06, "loss": 0.0603, "step": 44715 }, { "epoch": 0.7918996577631969, "grad_norm": 0.37899455428123474, "learning_rate": 3.2795812983959223e-06, "loss": 0.0587, "step": 44716 }, { "epoch": 0.7919173673002253, "grad_norm": 0.6848677396774292, "learning_rate": 3.2790443821946564e-06, "loss": 0.0737, "step": 44717 }, { "epoch": 0.7919350768372538, "grad_norm": 0.44584861397743225, "learning_rate": 3.2785075045541503e-06, "loss": 0.059, "step": 44718 }, { "epoch": 0.7919527863742822, "grad_norm": 0.9748396277427673, "learning_rate": 3.277970665476169e-06, "loss": 0.0582, "step": 44719 }, { "epoch": 0.7919704959113106, "grad_norm": 0.7623981833457947, "learning_rate": 3.2774338649624774e-06, "loss": 0.0391, "step": 44720 }, { "epoch": 0.791988205448339, "grad_norm": 0.693111002445221, "learning_rate": 3.2768971030148465e-06, "loss": 0.0537, "step": 44721 }, { "epoch": 0.7920059149853675, "grad_norm": 0.31884801387786865, "learning_rate": 3.276360379635036e-06, "loss": 0.0484, "step": 44722 }, { "epoch": 0.7920236245223959, "grad_norm": 0.4952408969402313, "learning_rate": 3.2758236948248134e-06, "loss": 0.0422, "step": 44723 }, { "epoch": 0.7920413340594243, "grad_norm": 0.38273099064826965, "learning_rate": 3.275287048585945e-06, "loss": 0.0384, "step": 44724 }, { "epoch": 0.7920590435964527, "grad_norm": 0.9896354675292969, "learning_rate": 3.2747504409201994e-06, "loss": 0.0591, "step": 44725 }, { "epoch": 0.7920767531334812, "grad_norm": 0.5414018630981445, "learning_rate": 3.274213871829336e-06, "loss": 0.057, "step": 44726 }, { "epoch": 0.7920944626705096, "grad_norm": 0.49086230993270874, "learning_rate": 3.273677341315122e-06, "loss": 0.0338, "step": 44727 }, { "epoch": 0.792112172207538, "grad_norm": 0.5169525146484375, "learning_rate": 3.273140849379324e-06, "loss": 0.0409, "step": 44728 }, { "epoch": 0.7921298817445664, "grad_norm": 0.5840848684310913, "learning_rate": 3.272604396023707e-06, "loss": 0.0755, "step": 44729 }, { "epoch": 0.792147591281595, "grad_norm": 0.3963220715522766, "learning_rate": 3.2720679812500354e-06, "loss": 0.0488, "step": 44730 }, { "epoch": 0.7921653008186234, "grad_norm": 0.7483336925506592, "learning_rate": 3.2715316050600734e-06, "loss": 0.0601, "step": 44731 }, { "epoch": 0.7921830103556518, "grad_norm": 0.2122945636510849, "learning_rate": 3.2709952674555915e-06, "loss": 0.0259, "step": 44732 }, { "epoch": 0.7922007198926803, "grad_norm": 0.6319977641105652, "learning_rate": 3.270458968438343e-06, "loss": 0.0751, "step": 44733 }, { "epoch": 0.7922184294297087, "grad_norm": 0.8076468110084534, "learning_rate": 3.269922708010098e-06, "loss": 0.073, "step": 44734 }, { "epoch": 0.7922361389667371, "grad_norm": 0.4668494462966919, "learning_rate": 3.269386486172627e-06, "loss": 0.0654, "step": 44735 }, { "epoch": 0.7922538485037655, "grad_norm": 0.540935754776001, "learning_rate": 3.2688503029276816e-06, "loss": 0.0414, "step": 44736 }, { "epoch": 0.792271558040794, "grad_norm": 0.5056629180908203, "learning_rate": 3.2683141582770293e-06, "loss": 0.0316, "step": 44737 }, { "epoch": 0.7922892675778224, "grad_norm": 0.30239030718803406, "learning_rate": 3.2677780522224417e-06, "loss": 0.066, "step": 44738 }, { "epoch": 0.7923069771148508, "grad_norm": 0.5021036863327026, "learning_rate": 3.26724198476568e-06, "loss": 0.0528, "step": 44739 }, { "epoch": 0.7923246866518792, "grad_norm": 0.6464453339576721, "learning_rate": 3.2667059559085027e-06, "loss": 0.0618, "step": 44740 }, { "epoch": 0.7923423961889077, "grad_norm": 0.8012478947639465, "learning_rate": 3.2661699656526757e-06, "loss": 0.0809, "step": 44741 }, { "epoch": 0.7923601057259361, "grad_norm": 0.9165619015693665, "learning_rate": 3.2656340139999624e-06, "loss": 0.0639, "step": 44742 }, { "epoch": 0.7923778152629645, "grad_norm": 0.6861464381217957, "learning_rate": 3.2650981009521304e-06, "loss": 0.0697, "step": 44743 }, { "epoch": 0.7923955247999929, "grad_norm": 0.08557353168725967, "learning_rate": 3.264562226510934e-06, "loss": 0.0448, "step": 44744 }, { "epoch": 0.7924132343370214, "grad_norm": 0.8042072057723999, "learning_rate": 3.2640263906781433e-06, "loss": 0.0399, "step": 44745 }, { "epoch": 0.7924309438740498, "grad_norm": 1.0146068334579468, "learning_rate": 3.263490593455517e-06, "loss": 0.0533, "step": 44746 }, { "epoch": 0.7924486534110782, "grad_norm": 0.42244958877563477, "learning_rate": 3.2629548348448194e-06, "loss": 0.037, "step": 44747 }, { "epoch": 0.7924663629481067, "grad_norm": 0.4217548668384552, "learning_rate": 3.2624191148478126e-06, "loss": 0.0465, "step": 44748 }, { "epoch": 0.7924840724851351, "grad_norm": 0.4147661626338959, "learning_rate": 3.2618834334662616e-06, "loss": 0.0445, "step": 44749 }, { "epoch": 0.7925017820221635, "grad_norm": 0.6521579623222351, "learning_rate": 3.2613477907019302e-06, "loss": 0.0678, "step": 44750 }, { "epoch": 0.7925194915591919, "grad_norm": 0.46895065903663635, "learning_rate": 3.260812186556573e-06, "loss": 0.0441, "step": 44751 }, { "epoch": 0.7925372010962204, "grad_norm": 0.30398041009902954, "learning_rate": 3.260276621031956e-06, "loss": 0.0333, "step": 44752 }, { "epoch": 0.7925549106332488, "grad_norm": 0.5098990797996521, "learning_rate": 3.259741094129846e-06, "loss": 0.048, "step": 44753 }, { "epoch": 0.7925726201702772, "grad_norm": 0.7129003405570984, "learning_rate": 3.2592056058519965e-06, "loss": 0.0473, "step": 44754 }, { "epoch": 0.7925903297073056, "grad_norm": 0.6369914412498474, "learning_rate": 3.2586701562001725e-06, "loss": 0.0554, "step": 44755 }, { "epoch": 0.7926080392443341, "grad_norm": 0.5219606161117554, "learning_rate": 3.258134745176136e-06, "loss": 0.0425, "step": 44756 }, { "epoch": 0.7926257487813625, "grad_norm": 0.5190450549125671, "learning_rate": 3.2575993727816494e-06, "loss": 0.0553, "step": 44757 }, { "epoch": 0.7926434583183909, "grad_norm": 0.6218937635421753, "learning_rate": 3.2570640390184723e-06, "loss": 0.0851, "step": 44758 }, { "epoch": 0.7926611678554193, "grad_norm": 0.7456613183021545, "learning_rate": 3.256528743888369e-06, "loss": 0.0425, "step": 44759 }, { "epoch": 0.7926788773924478, "grad_norm": 0.8711240887641907, "learning_rate": 3.255993487393101e-06, "loss": 0.087, "step": 44760 }, { "epoch": 0.7926965869294762, "grad_norm": 0.7507242560386658, "learning_rate": 3.2554582695344216e-06, "loss": 0.0421, "step": 44761 }, { "epoch": 0.7927142964665046, "grad_norm": 0.4960311949253082, "learning_rate": 3.2549230903140964e-06, "loss": 0.0503, "step": 44762 }, { "epoch": 0.7927320060035331, "grad_norm": 0.7595011591911316, "learning_rate": 3.2543879497338875e-06, "loss": 0.058, "step": 44763 }, { "epoch": 0.7927497155405615, "grad_norm": 0.8063886761665344, "learning_rate": 3.253852847795558e-06, "loss": 0.0703, "step": 44764 }, { "epoch": 0.7927674250775899, "grad_norm": 0.8081400394439697, "learning_rate": 3.2533177845008603e-06, "loss": 0.0628, "step": 44765 }, { "epoch": 0.7927851346146183, "grad_norm": 0.777800440788269, "learning_rate": 3.2527827598515547e-06, "loss": 0.0512, "step": 44766 }, { "epoch": 0.7928028441516468, "grad_norm": 0.4962692856788635, "learning_rate": 3.2522477738494143e-06, "loss": 0.0476, "step": 44767 }, { "epoch": 0.7928205536886752, "grad_norm": 0.6760867834091187, "learning_rate": 3.2517128264961867e-06, "loss": 0.056, "step": 44768 }, { "epoch": 0.7928382632257036, "grad_norm": 0.7406160235404968, "learning_rate": 3.251177917793636e-06, "loss": 0.0598, "step": 44769 }, { "epoch": 0.792855972762732, "grad_norm": 0.5422178506851196, "learning_rate": 3.2506430477435207e-06, "loss": 0.0574, "step": 44770 }, { "epoch": 0.7928736822997605, "grad_norm": 0.7252454161643982, "learning_rate": 3.2501082163476047e-06, "loss": 0.062, "step": 44771 }, { "epoch": 0.7928913918367889, "grad_norm": 0.3108282685279846, "learning_rate": 3.2495734236076424e-06, "loss": 0.0542, "step": 44772 }, { "epoch": 0.7929091013738173, "grad_norm": 0.4521579146385193, "learning_rate": 3.249038669525394e-06, "loss": 0.0615, "step": 44773 }, { "epoch": 0.7929268109108457, "grad_norm": 0.4907597601413727, "learning_rate": 3.248503954102619e-06, "loss": 0.0551, "step": 44774 }, { "epoch": 0.7929445204478742, "grad_norm": 0.9455565214157104, "learning_rate": 3.2479692773410785e-06, "loss": 0.0813, "step": 44775 }, { "epoch": 0.7929622299849026, "grad_norm": 0.409577339887619, "learning_rate": 3.2474346392425293e-06, "loss": 0.0569, "step": 44776 }, { "epoch": 0.792979939521931, "grad_norm": 0.4395553469657898, "learning_rate": 3.2469000398087327e-06, "loss": 0.0772, "step": 44777 }, { "epoch": 0.7929976490589595, "grad_norm": 0.7623985409736633, "learning_rate": 3.2463654790414498e-06, "loss": 0.0486, "step": 44778 }, { "epoch": 0.7930153585959879, "grad_norm": 0.24799689650535583, "learning_rate": 3.2458309569424304e-06, "loss": 0.0191, "step": 44779 }, { "epoch": 0.7930330681330163, "grad_norm": 0.9507395625114441, "learning_rate": 3.24529647351344e-06, "loss": 0.0684, "step": 44780 }, { "epoch": 0.7930507776700447, "grad_norm": 0.6063706278800964, "learning_rate": 3.2447620287562337e-06, "loss": 0.0632, "step": 44781 }, { "epoch": 0.7930684872070732, "grad_norm": 0.350501149892807, "learning_rate": 3.2442276226725766e-06, "loss": 0.0483, "step": 44782 }, { "epoch": 0.7930861967441016, "grad_norm": 0.8524104952812195, "learning_rate": 3.2436932552642173e-06, "loss": 0.0563, "step": 44783 }, { "epoch": 0.79310390628113, "grad_norm": 0.6431825757026672, "learning_rate": 3.243158926532918e-06, "loss": 0.0654, "step": 44784 }, { "epoch": 0.7931216158181584, "grad_norm": 0.6646432280540466, "learning_rate": 3.242624636480437e-06, "loss": 0.0657, "step": 44785 }, { "epoch": 0.7931393253551869, "grad_norm": 0.2127002477645874, "learning_rate": 3.242090385108532e-06, "loss": 0.0469, "step": 44786 }, { "epoch": 0.7931570348922153, "grad_norm": 0.3686295747756958, "learning_rate": 3.2415561724189593e-06, "loss": 0.0495, "step": 44787 }, { "epoch": 0.7931747444292437, "grad_norm": 0.5141187906265259, "learning_rate": 3.2410219984134786e-06, "loss": 0.0628, "step": 44788 }, { "epoch": 0.7931924539662721, "grad_norm": 0.6051231026649475, "learning_rate": 3.2404878630938494e-06, "loss": 0.0666, "step": 44789 }, { "epoch": 0.7932101635033006, "grad_norm": 1.0493031740188599, "learning_rate": 3.2399537664618215e-06, "loss": 0.0806, "step": 44790 }, { "epoch": 0.793227873040329, "grad_norm": 0.6694725155830383, "learning_rate": 3.2394197085191573e-06, "loss": 0.0635, "step": 44791 }, { "epoch": 0.7932455825773574, "grad_norm": 0.9985968470573425, "learning_rate": 3.2388856892676168e-06, "loss": 0.0767, "step": 44792 }, { "epoch": 0.793263292114386, "grad_norm": 0.6651091575622559, "learning_rate": 3.2383517087089483e-06, "loss": 0.0442, "step": 44793 }, { "epoch": 0.7932810016514144, "grad_norm": 0.8038609027862549, "learning_rate": 3.237817766844912e-06, "loss": 0.0823, "step": 44794 }, { "epoch": 0.7932987111884428, "grad_norm": 0.44309332966804504, "learning_rate": 3.2372838636772637e-06, "loss": 0.0688, "step": 44795 }, { "epoch": 0.7933164207254711, "grad_norm": 0.8025964498519897, "learning_rate": 3.2367499992077688e-06, "loss": 0.0606, "step": 44796 }, { "epoch": 0.7933341302624997, "grad_norm": 0.4742237329483032, "learning_rate": 3.236216173438172e-06, "loss": 0.0586, "step": 44797 }, { "epoch": 0.7933518397995281, "grad_norm": 0.7285749912261963, "learning_rate": 3.235682386370235e-06, "loss": 0.0822, "step": 44798 }, { "epoch": 0.7933695493365565, "grad_norm": 0.3827691674232483, "learning_rate": 3.2351486380057173e-06, "loss": 0.0479, "step": 44799 }, { "epoch": 0.7933872588735849, "grad_norm": 0.35830292105674744, "learning_rate": 3.2346149283463648e-06, "loss": 0.0584, "step": 44800 }, { "epoch": 0.7934049684106134, "grad_norm": 0.6533907651901245, "learning_rate": 3.2340812573939404e-06, "loss": 0.0707, "step": 44801 }, { "epoch": 0.7934226779476418, "grad_norm": 0.6806052923202515, "learning_rate": 3.2335476251501987e-06, "loss": 0.0783, "step": 44802 }, { "epoch": 0.7934403874846702, "grad_norm": 0.6273059844970703, "learning_rate": 3.2330140316168953e-06, "loss": 0.0582, "step": 44803 }, { "epoch": 0.7934580970216986, "grad_norm": 0.5440717935562134, "learning_rate": 3.2324804767957856e-06, "loss": 0.0426, "step": 44804 }, { "epoch": 0.7934758065587271, "grad_norm": 0.3719002902507782, "learning_rate": 3.2319469606886242e-06, "loss": 0.0456, "step": 44805 }, { "epoch": 0.7934935160957555, "grad_norm": 0.435987263917923, "learning_rate": 3.231413483297172e-06, "loss": 0.0439, "step": 44806 }, { "epoch": 0.7935112256327839, "grad_norm": 0.531650960445404, "learning_rate": 3.2308800446231756e-06, "loss": 0.0623, "step": 44807 }, { "epoch": 0.7935289351698124, "grad_norm": 0.6905933618545532, "learning_rate": 3.230346644668392e-06, "loss": 0.0511, "step": 44808 }, { "epoch": 0.7935466447068408, "grad_norm": 1.3209335803985596, "learning_rate": 3.2298132834345783e-06, "loss": 0.0689, "step": 44809 }, { "epoch": 0.7935643542438692, "grad_norm": 0.57387775182724, "learning_rate": 3.2292799609234915e-06, "loss": 0.0425, "step": 44810 }, { "epoch": 0.7935820637808976, "grad_norm": 0.3556390106678009, "learning_rate": 3.2287466771368796e-06, "loss": 0.0375, "step": 44811 }, { "epoch": 0.7935997733179261, "grad_norm": 0.5301718711853027, "learning_rate": 3.2282134320765018e-06, "loss": 0.0605, "step": 44812 }, { "epoch": 0.7936174828549545, "grad_norm": 0.7977647185325623, "learning_rate": 3.2276802257441097e-06, "loss": 0.082, "step": 44813 }, { "epoch": 0.7936351923919829, "grad_norm": 0.21283070743083954, "learning_rate": 3.2271470581414585e-06, "loss": 0.0473, "step": 44814 }, { "epoch": 0.7936529019290113, "grad_norm": 0.5148195028305054, "learning_rate": 3.226613929270304e-06, "loss": 0.0377, "step": 44815 }, { "epoch": 0.7936706114660398, "grad_norm": 0.7060136198997498, "learning_rate": 3.2260808391323993e-06, "loss": 0.0483, "step": 44816 }, { "epoch": 0.7936883210030682, "grad_norm": 0.5601701140403748, "learning_rate": 3.2255477877294996e-06, "loss": 0.0807, "step": 44817 }, { "epoch": 0.7937060305400966, "grad_norm": 0.7379691004753113, "learning_rate": 3.225014775063354e-06, "loss": 0.0701, "step": 44818 }, { "epoch": 0.793723740077125, "grad_norm": 0.4153504967689514, "learning_rate": 3.224481801135719e-06, "loss": 0.063, "step": 44819 }, { "epoch": 0.7937414496141535, "grad_norm": 0.5294390320777893, "learning_rate": 3.2239488659483485e-06, "loss": 0.0262, "step": 44820 }, { "epoch": 0.7937591591511819, "grad_norm": 0.666040301322937, "learning_rate": 3.2234159695029992e-06, "loss": 0.0441, "step": 44821 }, { "epoch": 0.7937768686882103, "grad_norm": 0.44860684871673584, "learning_rate": 3.2228831118014146e-06, "loss": 0.0404, "step": 44822 }, { "epoch": 0.7937945782252388, "grad_norm": 0.46164241433143616, "learning_rate": 3.2223502928453537e-06, "loss": 0.0368, "step": 44823 }, { "epoch": 0.7938122877622672, "grad_norm": 0.4014749825000763, "learning_rate": 3.2218175126365696e-06, "loss": 0.0512, "step": 44824 }, { "epoch": 0.7938299972992956, "grad_norm": 0.45431065559387207, "learning_rate": 3.2212847711768146e-06, "loss": 0.0495, "step": 44825 }, { "epoch": 0.793847706836324, "grad_norm": 0.5405879616737366, "learning_rate": 3.220752068467842e-06, "loss": 0.075, "step": 44826 }, { "epoch": 0.7938654163733525, "grad_norm": 0.7165544629096985, "learning_rate": 3.2202194045114027e-06, "loss": 0.0593, "step": 44827 }, { "epoch": 0.7938831259103809, "grad_norm": 0.39896291494369507, "learning_rate": 3.2196867793092546e-06, "loss": 0.0416, "step": 44828 }, { "epoch": 0.7939008354474093, "grad_norm": 0.7420464754104614, "learning_rate": 3.2191541928631413e-06, "loss": 0.0603, "step": 44829 }, { "epoch": 0.7939185449844377, "grad_norm": 0.6652403473854065, "learning_rate": 3.21862164517482e-06, "loss": 0.0646, "step": 44830 }, { "epoch": 0.7939362545214662, "grad_norm": 0.5481162071228027, "learning_rate": 3.2180891362460446e-06, "loss": 0.0531, "step": 44831 }, { "epoch": 0.7939539640584946, "grad_norm": 0.6046484112739563, "learning_rate": 3.2175566660785564e-06, "loss": 0.0555, "step": 44832 }, { "epoch": 0.793971673595523, "grad_norm": 0.4657096266746521, "learning_rate": 3.2170242346741195e-06, "loss": 0.0467, "step": 44833 }, { "epoch": 0.7939893831325514, "grad_norm": 0.514274001121521, "learning_rate": 3.216491842034481e-06, "loss": 0.0286, "step": 44834 }, { "epoch": 0.7940070926695799, "grad_norm": 0.5525788068771362, "learning_rate": 3.215959488161397e-06, "loss": 0.06, "step": 44835 }, { "epoch": 0.7940248022066083, "grad_norm": 0.5764579176902771, "learning_rate": 3.2154271730566097e-06, "loss": 0.051, "step": 44836 }, { "epoch": 0.7940425117436367, "grad_norm": 0.6316133141517639, "learning_rate": 3.214894896721875e-06, "loss": 0.0736, "step": 44837 }, { "epoch": 0.7940602212806652, "grad_norm": 0.7910792827606201, "learning_rate": 3.2143626591589494e-06, "loss": 0.0645, "step": 44838 }, { "epoch": 0.7940779308176936, "grad_norm": 0.7331124544143677, "learning_rate": 3.2138304603695728e-06, "loss": 0.0459, "step": 44839 }, { "epoch": 0.794095640354722, "grad_norm": 0.5372669696807861, "learning_rate": 3.2132983003555027e-06, "loss": 0.0459, "step": 44840 }, { "epoch": 0.7941133498917504, "grad_norm": 0.69205242395401, "learning_rate": 3.212766179118489e-06, "loss": 0.041, "step": 44841 }, { "epoch": 0.7941310594287789, "grad_norm": 0.4680752754211426, "learning_rate": 3.2122340966602825e-06, "loss": 0.0522, "step": 44842 }, { "epoch": 0.7941487689658073, "grad_norm": 0.4853377640247345, "learning_rate": 3.211702052982633e-06, "loss": 0.0509, "step": 44843 }, { "epoch": 0.7941664785028357, "grad_norm": 0.78912353515625, "learning_rate": 3.2111700480872923e-06, "loss": 0.083, "step": 44844 }, { "epoch": 0.7941841880398641, "grad_norm": 0.621320366859436, "learning_rate": 3.2106380819760096e-06, "loss": 0.0575, "step": 44845 }, { "epoch": 0.7942018975768926, "grad_norm": 0.8934227228164673, "learning_rate": 3.210106154650538e-06, "loss": 0.0624, "step": 44846 }, { "epoch": 0.794219607113921, "grad_norm": 0.475604385137558, "learning_rate": 3.2095742661126225e-06, "loss": 0.0338, "step": 44847 }, { "epoch": 0.7942373166509494, "grad_norm": 0.692817747592926, "learning_rate": 3.2090424163640143e-06, "loss": 0.0804, "step": 44848 }, { "epoch": 0.7942550261879778, "grad_norm": 0.7155869007110596, "learning_rate": 3.208510605406468e-06, "loss": 0.0453, "step": 44849 }, { "epoch": 0.7942727357250063, "grad_norm": 0.6398593187332153, "learning_rate": 3.2079788332417235e-06, "loss": 0.0753, "step": 44850 }, { "epoch": 0.7942904452620347, "grad_norm": 0.46687382459640503, "learning_rate": 3.2074470998715387e-06, "loss": 0.0409, "step": 44851 }, { "epoch": 0.7943081547990631, "grad_norm": 0.46390214562416077, "learning_rate": 3.2069154052976584e-06, "loss": 0.0678, "step": 44852 }, { "epoch": 0.7943258643360916, "grad_norm": 0.44238340854644775, "learning_rate": 3.2063837495218347e-06, "loss": 0.056, "step": 44853 }, { "epoch": 0.79434357387312, "grad_norm": 0.913780927658081, "learning_rate": 3.205852132545814e-06, "loss": 0.0552, "step": 44854 }, { "epoch": 0.7943612834101484, "grad_norm": 0.46274760365486145, "learning_rate": 3.205320554371349e-06, "loss": 0.0549, "step": 44855 }, { "epoch": 0.7943789929471768, "grad_norm": 1.003165364265442, "learning_rate": 3.204789015000189e-06, "loss": 0.1414, "step": 44856 }, { "epoch": 0.7943967024842054, "grad_norm": 0.5115255117416382, "learning_rate": 3.204257514434077e-06, "loss": 0.076, "step": 44857 }, { "epoch": 0.7944144120212338, "grad_norm": 0.7414838671684265, "learning_rate": 3.2037260526747642e-06, "loss": 0.0729, "step": 44858 }, { "epoch": 0.7944321215582621, "grad_norm": 0.4439488351345062, "learning_rate": 3.2031946297239993e-06, "loss": 0.0651, "step": 44859 }, { "epoch": 0.7944498310952905, "grad_norm": 0.5442082285881042, "learning_rate": 3.202663245583535e-06, "loss": 0.0602, "step": 44860 }, { "epoch": 0.7944675406323191, "grad_norm": 0.8061985969543457, "learning_rate": 3.2021319002551067e-06, "loss": 0.0521, "step": 44861 }, { "epoch": 0.7944852501693475, "grad_norm": 0.4804299771785736, "learning_rate": 3.201600593740476e-06, "loss": 0.0317, "step": 44862 }, { "epoch": 0.7945029597063759, "grad_norm": 0.38890939950942993, "learning_rate": 3.2010693260413916e-06, "loss": 0.073, "step": 44863 }, { "epoch": 0.7945206692434043, "grad_norm": 0.696698009967804, "learning_rate": 3.200538097159589e-06, "loss": 0.0568, "step": 44864 }, { "epoch": 0.7945383787804328, "grad_norm": 0.5321599245071411, "learning_rate": 3.2000069070968252e-06, "loss": 0.0696, "step": 44865 }, { "epoch": 0.7945560883174612, "grad_norm": 0.6507534384727478, "learning_rate": 3.1994757558548424e-06, "loss": 0.0696, "step": 44866 }, { "epoch": 0.7945737978544896, "grad_norm": 0.5281150937080383, "learning_rate": 3.1989446434353968e-06, "loss": 0.053, "step": 44867 }, { "epoch": 0.7945915073915181, "grad_norm": 0.6946374773979187, "learning_rate": 3.198413569840225e-06, "loss": 0.0569, "step": 44868 }, { "epoch": 0.7946092169285465, "grad_norm": 0.3772119879722595, "learning_rate": 3.197882535071079e-06, "loss": 0.043, "step": 44869 }, { "epoch": 0.7946269264655749, "grad_norm": 0.5013531446456909, "learning_rate": 3.197351539129707e-06, "loss": 0.0655, "step": 44870 }, { "epoch": 0.7946446360026033, "grad_norm": 0.6835117340087891, "learning_rate": 3.1968205820178533e-06, "loss": 0.068, "step": 44871 }, { "epoch": 0.7946623455396318, "grad_norm": 0.35019367933273315, "learning_rate": 3.1962896637372656e-06, "loss": 0.0316, "step": 44872 }, { "epoch": 0.7946800550766602, "grad_norm": 0.40910929441452026, "learning_rate": 3.195758784289692e-06, "loss": 0.0434, "step": 44873 }, { "epoch": 0.7946977646136886, "grad_norm": 0.6969982385635376, "learning_rate": 3.1952279436768816e-06, "loss": 0.0499, "step": 44874 }, { "epoch": 0.794715474150717, "grad_norm": 0.6487011313438416, "learning_rate": 3.1946971419005737e-06, "loss": 0.0693, "step": 44875 }, { "epoch": 0.7947331836877455, "grad_norm": 0.4683490991592407, "learning_rate": 3.194166378962517e-06, "loss": 0.0607, "step": 44876 }, { "epoch": 0.7947508932247739, "grad_norm": 1.093037486076355, "learning_rate": 3.1936356548644596e-06, "loss": 0.0955, "step": 44877 }, { "epoch": 0.7947686027618023, "grad_norm": 1.0529814958572388, "learning_rate": 3.1931049696081503e-06, "loss": 0.0496, "step": 44878 }, { "epoch": 0.7947863122988307, "grad_norm": 1.2543275356292725, "learning_rate": 3.19257432319533e-06, "loss": 0.0651, "step": 44879 }, { "epoch": 0.7948040218358592, "grad_norm": 0.6890650987625122, "learning_rate": 3.192043715627743e-06, "loss": 0.0453, "step": 44880 }, { "epoch": 0.7948217313728876, "grad_norm": 0.4967828094959259, "learning_rate": 3.191513146907138e-06, "loss": 0.0498, "step": 44881 }, { "epoch": 0.794839440909916, "grad_norm": 0.713670015335083, "learning_rate": 3.1909826170352624e-06, "loss": 0.0541, "step": 44882 }, { "epoch": 0.7948571504469445, "grad_norm": 0.7446796894073486, "learning_rate": 3.190452126013858e-06, "loss": 0.0786, "step": 44883 }, { "epoch": 0.7948748599839729, "grad_norm": 0.5376308560371399, "learning_rate": 3.1899216738446717e-06, "loss": 0.0514, "step": 44884 }, { "epoch": 0.7948925695210013, "grad_norm": 0.37384989857673645, "learning_rate": 3.1893912605294523e-06, "loss": 0.0576, "step": 44885 }, { "epoch": 0.7949102790580297, "grad_norm": 0.6828019022941589, "learning_rate": 3.188860886069938e-06, "loss": 0.0589, "step": 44886 }, { "epoch": 0.7949279885950582, "grad_norm": 0.3715061843395233, "learning_rate": 3.188330550467876e-06, "loss": 0.0494, "step": 44887 }, { "epoch": 0.7949456981320866, "grad_norm": 0.6960706114768982, "learning_rate": 3.1878002537250168e-06, "loss": 0.0711, "step": 44888 }, { "epoch": 0.794963407669115, "grad_norm": 0.5026800632476807, "learning_rate": 3.187269995843095e-06, "loss": 0.0591, "step": 44889 }, { "epoch": 0.7949811172061434, "grad_norm": 0.46354934573173523, "learning_rate": 3.1867397768238566e-06, "loss": 0.0312, "step": 44890 }, { "epoch": 0.7949988267431719, "grad_norm": 0.6654784083366394, "learning_rate": 3.1862095966690546e-06, "loss": 0.049, "step": 44891 }, { "epoch": 0.7950165362802003, "grad_norm": 0.8291206359863281, "learning_rate": 3.1856794553804313e-06, "loss": 0.0683, "step": 44892 }, { "epoch": 0.7950342458172287, "grad_norm": 0.6353992223739624, "learning_rate": 3.185149352959722e-06, "loss": 0.0796, "step": 44893 }, { "epoch": 0.7950519553542571, "grad_norm": 0.422128289937973, "learning_rate": 3.1846192894086785e-06, "loss": 0.0596, "step": 44894 }, { "epoch": 0.7950696648912856, "grad_norm": 0.47171804308891296, "learning_rate": 3.184089264729046e-06, "loss": 0.0553, "step": 44895 }, { "epoch": 0.795087374428314, "grad_norm": 0.6474786996841431, "learning_rate": 3.1835592789225597e-06, "loss": 0.0635, "step": 44896 }, { "epoch": 0.7951050839653424, "grad_norm": 0.5580504536628723, "learning_rate": 3.183029331990969e-06, "loss": 0.0482, "step": 44897 }, { "epoch": 0.7951227935023709, "grad_norm": 0.6519720554351807, "learning_rate": 3.182499423936016e-06, "loss": 0.0291, "step": 44898 }, { "epoch": 0.7951405030393993, "grad_norm": 0.8607072234153748, "learning_rate": 3.1819695547594456e-06, "loss": 0.0486, "step": 44899 }, { "epoch": 0.7951582125764277, "grad_norm": 0.7990043759346008, "learning_rate": 3.1814397244629995e-06, "loss": 0.0702, "step": 44900 }, { "epoch": 0.7951759221134561, "grad_norm": 0.577478289604187, "learning_rate": 3.180909933048421e-06, "loss": 0.0417, "step": 44901 }, { "epoch": 0.7951936316504846, "grad_norm": 0.9915911555290222, "learning_rate": 3.1803801805174585e-06, "loss": 0.0621, "step": 44902 }, { "epoch": 0.795211341187513, "grad_norm": 0.6193742752075195, "learning_rate": 3.1798504668718446e-06, "loss": 0.0398, "step": 44903 }, { "epoch": 0.7952290507245414, "grad_norm": 0.6842319965362549, "learning_rate": 3.1793207921133277e-06, "loss": 0.0298, "step": 44904 }, { "epoch": 0.7952467602615698, "grad_norm": 0.51228928565979, "learning_rate": 3.1787911562436495e-06, "loss": 0.066, "step": 44905 }, { "epoch": 0.7952644697985983, "grad_norm": 0.37352612614631653, "learning_rate": 3.178261559264557e-06, "loss": 0.0423, "step": 44906 }, { "epoch": 0.7952821793356267, "grad_norm": 0.43949854373931885, "learning_rate": 3.1777320011777838e-06, "loss": 0.0496, "step": 44907 }, { "epoch": 0.7952998888726551, "grad_norm": 0.3368363380432129, "learning_rate": 3.177202481985077e-06, "loss": 0.0316, "step": 44908 }, { "epoch": 0.7953175984096835, "grad_norm": 0.599026083946228, "learning_rate": 3.1766730016881785e-06, "loss": 0.0537, "step": 44909 }, { "epoch": 0.795335307946712, "grad_norm": 0.6652000546455383, "learning_rate": 3.17614356028883e-06, "loss": 0.0362, "step": 44910 }, { "epoch": 0.7953530174837404, "grad_norm": 0.3262183368206024, "learning_rate": 3.175614157788772e-06, "loss": 0.0548, "step": 44911 }, { "epoch": 0.7953707270207688, "grad_norm": 0.5849327445030212, "learning_rate": 3.175084794189748e-06, "loss": 0.0551, "step": 44912 }, { "epoch": 0.7953884365577973, "grad_norm": 1.1924868822097778, "learning_rate": 3.1745554694935036e-06, "loss": 0.0709, "step": 44913 }, { "epoch": 0.7954061460948257, "grad_norm": 0.7913365364074707, "learning_rate": 3.174026183701772e-06, "loss": 0.0624, "step": 44914 }, { "epoch": 0.7954238556318541, "grad_norm": 0.7445040345191956, "learning_rate": 3.1734969368162968e-06, "loss": 0.0688, "step": 44915 }, { "epoch": 0.7954415651688825, "grad_norm": 0.48720279335975647, "learning_rate": 3.172967728838822e-06, "loss": 0.0436, "step": 44916 }, { "epoch": 0.795459274705911, "grad_norm": 0.5384241342544556, "learning_rate": 3.172438559771091e-06, "loss": 0.0463, "step": 44917 }, { "epoch": 0.7954769842429394, "grad_norm": 0.6438639760017395, "learning_rate": 3.1719094296148352e-06, "loss": 0.0354, "step": 44918 }, { "epoch": 0.7954946937799678, "grad_norm": 0.5473300814628601, "learning_rate": 3.1713803383717978e-06, "loss": 0.0499, "step": 44919 }, { "epoch": 0.7955124033169962, "grad_norm": 0.6741260886192322, "learning_rate": 3.170851286043731e-06, "loss": 0.0593, "step": 44920 }, { "epoch": 0.7955301128540248, "grad_norm": 0.5817335247993469, "learning_rate": 3.170322272632363e-06, "loss": 0.0292, "step": 44921 }, { "epoch": 0.7955478223910531, "grad_norm": 0.5345115065574646, "learning_rate": 3.169793298139438e-06, "loss": 0.0734, "step": 44922 }, { "epoch": 0.7955655319280815, "grad_norm": 0.5942163467407227, "learning_rate": 3.169264362566697e-06, "loss": 0.0463, "step": 44923 }, { "epoch": 0.79558324146511, "grad_norm": 0.798060417175293, "learning_rate": 3.168735465915883e-06, "loss": 0.0461, "step": 44924 }, { "epoch": 0.7956009510021385, "grad_norm": 0.9092509746551514, "learning_rate": 3.1682066081887302e-06, "loss": 0.0741, "step": 44925 }, { "epoch": 0.7956186605391669, "grad_norm": 0.5435250997543335, "learning_rate": 3.1676777893869784e-06, "loss": 0.0491, "step": 44926 }, { "epoch": 0.7956363700761953, "grad_norm": 0.3878498077392578, "learning_rate": 3.167149009512372e-06, "loss": 0.0587, "step": 44927 }, { "epoch": 0.7956540796132238, "grad_norm": 0.5368983745574951, "learning_rate": 3.1666202685666482e-06, "loss": 0.023, "step": 44928 }, { "epoch": 0.7956717891502522, "grad_norm": 0.5962517857551575, "learning_rate": 3.1660915665515457e-06, "loss": 0.0755, "step": 44929 }, { "epoch": 0.7956894986872806, "grad_norm": 0.3708072304725647, "learning_rate": 3.1655629034688065e-06, "loss": 0.0411, "step": 44930 }, { "epoch": 0.795707208224309, "grad_norm": 0.30768075585365295, "learning_rate": 3.16503427932017e-06, "loss": 0.0679, "step": 44931 }, { "epoch": 0.7957249177613375, "grad_norm": 0.8842865824699402, "learning_rate": 3.164505694107372e-06, "loss": 0.0665, "step": 44932 }, { "epoch": 0.7957426272983659, "grad_norm": 0.45606711506843567, "learning_rate": 3.1639771478321535e-06, "loss": 0.0522, "step": 44933 }, { "epoch": 0.7957603368353943, "grad_norm": 0.4779694676399231, "learning_rate": 3.163448640496255e-06, "loss": 0.0497, "step": 44934 }, { "epoch": 0.7957780463724227, "grad_norm": 0.909446656703949, "learning_rate": 3.16292017210141e-06, "loss": 0.0535, "step": 44935 }, { "epoch": 0.7957957559094512, "grad_norm": 0.7748846411705017, "learning_rate": 3.1623917426493608e-06, "loss": 0.0829, "step": 44936 }, { "epoch": 0.7958134654464796, "grad_norm": 0.5228983759880066, "learning_rate": 3.161863352141846e-06, "loss": 0.0521, "step": 44937 }, { "epoch": 0.795831174983508, "grad_norm": 0.4145941138267517, "learning_rate": 3.1613350005806026e-06, "loss": 0.0485, "step": 44938 }, { "epoch": 0.7958488845205364, "grad_norm": 0.4200056195259094, "learning_rate": 3.16080668796737e-06, "loss": 0.0407, "step": 44939 }, { "epoch": 0.7958665940575649, "grad_norm": 1.0341235399246216, "learning_rate": 3.160278414303885e-06, "loss": 0.0583, "step": 44940 }, { "epoch": 0.7958843035945933, "grad_norm": 0.4370501935482025, "learning_rate": 3.1597501795918914e-06, "loss": 0.0582, "step": 44941 }, { "epoch": 0.7959020131316217, "grad_norm": 0.5878843069076538, "learning_rate": 3.1592219838331187e-06, "loss": 0.055, "step": 44942 }, { "epoch": 0.7959197226686502, "grad_norm": 0.9763879776000977, "learning_rate": 3.158693827029307e-06, "loss": 0.0717, "step": 44943 }, { "epoch": 0.7959374322056786, "grad_norm": 0.752581775188446, "learning_rate": 3.158165709182195e-06, "loss": 0.0687, "step": 44944 }, { "epoch": 0.795955141742707, "grad_norm": 0.635116457939148, "learning_rate": 3.157637630293524e-06, "loss": 0.0434, "step": 44945 }, { "epoch": 0.7959728512797354, "grad_norm": 0.36030513048171997, "learning_rate": 3.1571095903650247e-06, "loss": 0.0438, "step": 44946 }, { "epoch": 0.7959905608167639, "grad_norm": 0.47578468918800354, "learning_rate": 3.1565815893984363e-06, "loss": 0.0604, "step": 44947 }, { "epoch": 0.7960082703537923, "grad_norm": 0.6605910062789917, "learning_rate": 3.1560536273954917e-06, "loss": 0.0478, "step": 44948 }, { "epoch": 0.7960259798908207, "grad_norm": 0.43538525700569153, "learning_rate": 3.155525704357941e-06, "loss": 0.0561, "step": 44949 }, { "epoch": 0.7960436894278491, "grad_norm": 0.6323463320732117, "learning_rate": 3.1549978202875084e-06, "loss": 0.068, "step": 44950 }, { "epoch": 0.7960613989648776, "grad_norm": 0.5418301224708557, "learning_rate": 3.1544699751859357e-06, "loss": 0.0392, "step": 44951 }, { "epoch": 0.796079108501906, "grad_norm": 0.59394770860672, "learning_rate": 3.153942169054963e-06, "loss": 0.0641, "step": 44952 }, { "epoch": 0.7960968180389344, "grad_norm": 1.3481956720352173, "learning_rate": 3.153414401896319e-06, "loss": 0.0833, "step": 44953 }, { "epoch": 0.7961145275759628, "grad_norm": 0.5595815181732178, "learning_rate": 3.1528866737117425e-06, "loss": 0.0312, "step": 44954 }, { "epoch": 0.7961322371129913, "grad_norm": 0.7273657917976379, "learning_rate": 3.15235898450297e-06, "loss": 0.0499, "step": 44955 }, { "epoch": 0.7961499466500197, "grad_norm": 0.29572370648384094, "learning_rate": 3.1518313342717388e-06, "loss": 0.0447, "step": 44956 }, { "epoch": 0.7961676561870481, "grad_norm": 0.584523618221283, "learning_rate": 3.151303723019784e-06, "loss": 0.0314, "step": 44957 }, { "epoch": 0.7961853657240766, "grad_norm": 0.7693604230880737, "learning_rate": 3.1507761507488407e-06, "loss": 0.0587, "step": 44958 }, { "epoch": 0.796203075261105, "grad_norm": 0.4925318658351898, "learning_rate": 3.15024861746065e-06, "loss": 0.0433, "step": 44959 }, { "epoch": 0.7962207847981334, "grad_norm": 0.797543466091156, "learning_rate": 3.1497211231569397e-06, "loss": 0.0512, "step": 44960 }, { "epoch": 0.7962384943351618, "grad_norm": 0.52656090259552, "learning_rate": 3.1491936678394468e-06, "loss": 0.0422, "step": 44961 }, { "epoch": 0.7962562038721903, "grad_norm": 0.6139122843742371, "learning_rate": 3.1486662515099084e-06, "loss": 0.0799, "step": 44962 }, { "epoch": 0.7962739134092187, "grad_norm": 0.6469663977622986, "learning_rate": 3.148138874170065e-06, "loss": 0.0653, "step": 44963 }, { "epoch": 0.7962916229462471, "grad_norm": 0.5633135437965393, "learning_rate": 3.14761153582164e-06, "loss": 0.0403, "step": 44964 }, { "epoch": 0.7963093324832755, "grad_norm": 0.6416025757789612, "learning_rate": 3.1470842364663754e-06, "loss": 0.0644, "step": 44965 }, { "epoch": 0.796327042020304, "grad_norm": 0.5681595206260681, "learning_rate": 3.1465569761060048e-06, "loss": 0.0539, "step": 44966 }, { "epoch": 0.7963447515573324, "grad_norm": 0.6507996320724487, "learning_rate": 3.146029754742262e-06, "loss": 0.0437, "step": 44967 }, { "epoch": 0.7963624610943608, "grad_norm": 0.446637898683548, "learning_rate": 3.1455025723768833e-06, "loss": 0.0617, "step": 44968 }, { "epoch": 0.7963801706313892, "grad_norm": 1.0631353855133057, "learning_rate": 3.144975429011601e-06, "loss": 0.0972, "step": 44969 }, { "epoch": 0.7963978801684177, "grad_norm": 0.5302891731262207, "learning_rate": 3.144448324648156e-06, "loss": 0.0573, "step": 44970 }, { "epoch": 0.7964155897054461, "grad_norm": 0.4988197088241577, "learning_rate": 3.143921259288271e-06, "loss": 0.0637, "step": 44971 }, { "epoch": 0.7964332992424745, "grad_norm": 0.5869978666305542, "learning_rate": 3.1433942329336883e-06, "loss": 0.0701, "step": 44972 }, { "epoch": 0.796451008779503, "grad_norm": 0.44975244998931885, "learning_rate": 3.142867245586142e-06, "loss": 0.0316, "step": 44973 }, { "epoch": 0.7964687183165314, "grad_norm": 0.5217819213867188, "learning_rate": 3.142340297247358e-06, "loss": 0.0341, "step": 44974 }, { "epoch": 0.7964864278535598, "grad_norm": 0.7001850605010986, "learning_rate": 3.1418133879190774e-06, "loss": 0.0773, "step": 44975 }, { "epoch": 0.7965041373905882, "grad_norm": 0.7317653894424438, "learning_rate": 3.1412865176030303e-06, "loss": 0.0694, "step": 44976 }, { "epoch": 0.7965218469276167, "grad_norm": 0.7428123950958252, "learning_rate": 3.14075968630095e-06, "loss": 0.0492, "step": 44977 }, { "epoch": 0.7965395564646451, "grad_norm": 0.8758260011672974, "learning_rate": 3.1402328940145725e-06, "loss": 0.0717, "step": 44978 }, { "epoch": 0.7965572660016735, "grad_norm": 0.7203477621078491, "learning_rate": 3.139706140745629e-06, "loss": 0.0824, "step": 44979 }, { "epoch": 0.7965749755387019, "grad_norm": 0.6298463344573975, "learning_rate": 3.139179426495852e-06, "loss": 0.0434, "step": 44980 }, { "epoch": 0.7965926850757304, "grad_norm": 0.5256724953651428, "learning_rate": 3.138652751266979e-06, "loss": 0.0409, "step": 44981 }, { "epoch": 0.7966103946127588, "grad_norm": 0.6902691125869751, "learning_rate": 3.1381261150607353e-06, "loss": 0.0667, "step": 44982 }, { "epoch": 0.7966281041497872, "grad_norm": 0.7316576242446899, "learning_rate": 3.137599517878857e-06, "loss": 0.0507, "step": 44983 }, { "epoch": 0.7966458136868156, "grad_norm": 0.45004093647003174, "learning_rate": 3.1370729597230793e-06, "loss": 0.0603, "step": 44984 }, { "epoch": 0.7966635232238441, "grad_norm": 0.5348237156867981, "learning_rate": 3.1365464405951256e-06, "loss": 0.0764, "step": 44985 }, { "epoch": 0.7966812327608725, "grad_norm": 0.5092198848724365, "learning_rate": 3.1360199604967367e-06, "loss": 0.0533, "step": 44986 }, { "epoch": 0.796698942297901, "grad_norm": 0.6967810392379761, "learning_rate": 3.1354935194296424e-06, "loss": 0.0685, "step": 44987 }, { "epoch": 0.7967166518349295, "grad_norm": 0.29326143860816956, "learning_rate": 3.1349671173955785e-06, "loss": 0.0364, "step": 44988 }, { "epoch": 0.7967343613719579, "grad_norm": 0.642475962638855, "learning_rate": 3.13444075439627e-06, "loss": 0.048, "step": 44989 }, { "epoch": 0.7967520709089863, "grad_norm": 0.4492911100387573, "learning_rate": 3.133914430433449e-06, "loss": 0.0318, "step": 44990 }, { "epoch": 0.7967697804460147, "grad_norm": 0.7668709754943848, "learning_rate": 3.1333881455088555e-06, "loss": 0.0672, "step": 44991 }, { "epoch": 0.7967874899830432, "grad_norm": 0.737617015838623, "learning_rate": 3.1328618996242096e-06, "loss": 0.0548, "step": 44992 }, { "epoch": 0.7968051995200716, "grad_norm": 0.5530247688293457, "learning_rate": 3.1323356927812472e-06, "loss": 0.0412, "step": 44993 }, { "epoch": 0.7968229090571, "grad_norm": 0.6894588470458984, "learning_rate": 3.131809524981702e-06, "loss": 0.0712, "step": 44994 }, { "epoch": 0.7968406185941284, "grad_norm": 0.6091286540031433, "learning_rate": 3.1312833962273003e-06, "loss": 0.0676, "step": 44995 }, { "epoch": 0.7968583281311569, "grad_norm": 0.6421349048614502, "learning_rate": 3.1307573065197776e-06, "loss": 0.0744, "step": 44996 }, { "epoch": 0.7968760376681853, "grad_norm": 0.4829759895801544, "learning_rate": 3.1302312558608626e-06, "loss": 0.0528, "step": 44997 }, { "epoch": 0.7968937472052137, "grad_norm": 0.6679123044013977, "learning_rate": 3.1297052442522907e-06, "loss": 0.0611, "step": 44998 }, { "epoch": 0.7969114567422421, "grad_norm": 0.9050524234771729, "learning_rate": 3.1291792716957833e-06, "loss": 0.0492, "step": 44999 }, { "epoch": 0.7969291662792706, "grad_norm": 0.9872815012931824, "learning_rate": 3.128653338193075e-06, "loss": 0.0711, "step": 45000 }, { "epoch": 0.796946875816299, "grad_norm": 0.6537715792655945, "learning_rate": 3.1281274437458967e-06, "loss": 0.0986, "step": 45001 }, { "epoch": 0.7969645853533274, "grad_norm": 0.7724571228027344, "learning_rate": 3.1276015883559827e-06, "loss": 0.0603, "step": 45002 }, { "epoch": 0.7969822948903559, "grad_norm": 0.36950036883354187, "learning_rate": 3.1270757720250547e-06, "loss": 0.0727, "step": 45003 }, { "epoch": 0.7970000044273843, "grad_norm": 0.6771814823150635, "learning_rate": 3.1265499947548465e-06, "loss": 0.0739, "step": 45004 }, { "epoch": 0.7970177139644127, "grad_norm": 0.4386885464191437, "learning_rate": 3.126024256547088e-06, "loss": 0.029, "step": 45005 }, { "epoch": 0.7970354235014411, "grad_norm": 0.7512853741645813, "learning_rate": 3.1254985574035084e-06, "loss": 0.0569, "step": 45006 }, { "epoch": 0.7970531330384696, "grad_norm": 0.8376790881156921, "learning_rate": 3.124972897325839e-06, "loss": 0.0579, "step": 45007 }, { "epoch": 0.797070842575498, "grad_norm": 0.6807682514190674, "learning_rate": 3.1244472763158057e-06, "loss": 0.0636, "step": 45008 }, { "epoch": 0.7970885521125264, "grad_norm": 0.4257700443267822, "learning_rate": 3.123921694375145e-06, "loss": 0.062, "step": 45009 }, { "epoch": 0.7971062616495548, "grad_norm": 0.5924840569496155, "learning_rate": 3.1233961515055755e-06, "loss": 0.0784, "step": 45010 }, { "epoch": 0.7971239711865833, "grad_norm": 1.018810749053955, "learning_rate": 3.1228706477088325e-06, "loss": 0.0536, "step": 45011 }, { "epoch": 0.7971416807236117, "grad_norm": 0.8207011222839355, "learning_rate": 3.122345182986647e-06, "loss": 0.0443, "step": 45012 }, { "epoch": 0.7971593902606401, "grad_norm": 0.6160166263580322, "learning_rate": 3.1218197573407413e-06, "loss": 0.0725, "step": 45013 }, { "epoch": 0.7971770997976685, "grad_norm": 0.4077030420303345, "learning_rate": 3.121294370772842e-06, "loss": 0.0317, "step": 45014 }, { "epoch": 0.797194809334697, "grad_norm": 0.6558738946914673, "learning_rate": 3.1207690232846867e-06, "loss": 0.0623, "step": 45015 }, { "epoch": 0.7972125188717254, "grad_norm": 0.38142096996307373, "learning_rate": 3.120243714878005e-06, "loss": 0.0604, "step": 45016 }, { "epoch": 0.7972302284087538, "grad_norm": 0.4059724509716034, "learning_rate": 3.1197184455545163e-06, "loss": 0.0404, "step": 45017 }, { "epoch": 0.7972479379457823, "grad_norm": 0.4829939603805542, "learning_rate": 3.1191932153159515e-06, "loss": 0.0643, "step": 45018 }, { "epoch": 0.7972656474828107, "grad_norm": 1.0451455116271973, "learning_rate": 3.1186680241640385e-06, "loss": 0.0623, "step": 45019 }, { "epoch": 0.7972833570198391, "grad_norm": 0.6669939756393433, "learning_rate": 3.118142872100512e-06, "loss": 0.0585, "step": 45020 }, { "epoch": 0.7973010665568675, "grad_norm": 0.49012985825538635, "learning_rate": 3.1176177591270877e-06, "loss": 0.0329, "step": 45021 }, { "epoch": 0.797318776093896, "grad_norm": 0.5020225048065186, "learning_rate": 3.1170926852455e-06, "loss": 0.0609, "step": 45022 }, { "epoch": 0.7973364856309244, "grad_norm": 0.31119176745414734, "learning_rate": 3.116567650457476e-06, "loss": 0.0572, "step": 45023 }, { "epoch": 0.7973541951679528, "grad_norm": 0.40928032994270325, "learning_rate": 3.116042654764742e-06, "loss": 0.0546, "step": 45024 }, { "epoch": 0.7973719047049812, "grad_norm": 0.7054113149642944, "learning_rate": 3.115517698169025e-06, "loss": 0.0617, "step": 45025 }, { "epoch": 0.7973896142420097, "grad_norm": 0.35488373041152954, "learning_rate": 3.1149927806720528e-06, "loss": 0.0398, "step": 45026 }, { "epoch": 0.7974073237790381, "grad_norm": 0.8688840270042419, "learning_rate": 3.1144679022755566e-06, "loss": 0.0686, "step": 45027 }, { "epoch": 0.7974250333160665, "grad_norm": 0.5110259652137756, "learning_rate": 3.1139430629812556e-06, "loss": 0.0513, "step": 45028 }, { "epoch": 0.7974427428530949, "grad_norm": 0.5019615888595581, "learning_rate": 3.113418262790879e-06, "loss": 0.0397, "step": 45029 }, { "epoch": 0.7974604523901234, "grad_norm": 0.36779019236564636, "learning_rate": 3.112893501706158e-06, "loss": 0.0586, "step": 45030 }, { "epoch": 0.7974781619271518, "grad_norm": 0.510915219783783, "learning_rate": 3.1123687797288113e-06, "loss": 0.0548, "step": 45031 }, { "epoch": 0.7974958714641802, "grad_norm": 0.8670671582221985, "learning_rate": 3.111844096860569e-06, "loss": 0.0667, "step": 45032 }, { "epoch": 0.7975135810012087, "grad_norm": 0.5672579407691956, "learning_rate": 3.1113194531031575e-06, "loss": 0.0471, "step": 45033 }, { "epoch": 0.7975312905382371, "grad_norm": 0.4679963290691376, "learning_rate": 3.110794848458301e-06, "loss": 0.0328, "step": 45034 }, { "epoch": 0.7975490000752655, "grad_norm": 0.3774026036262512, "learning_rate": 3.1102702829277294e-06, "loss": 0.0546, "step": 45035 }, { "epoch": 0.7975667096122939, "grad_norm": 0.5873653292655945, "learning_rate": 3.109745756513164e-06, "loss": 0.048, "step": 45036 }, { "epoch": 0.7975844191493224, "grad_norm": 0.8798679709434509, "learning_rate": 3.109221269216337e-06, "loss": 0.0482, "step": 45037 }, { "epoch": 0.7976021286863508, "grad_norm": 0.2651582658290863, "learning_rate": 3.108696821038966e-06, "loss": 0.0396, "step": 45038 }, { "epoch": 0.7976198382233792, "grad_norm": 0.4677124321460724, "learning_rate": 3.108172411982778e-06, "loss": 0.0596, "step": 45039 }, { "epoch": 0.7976375477604076, "grad_norm": 1.0044355392456055, "learning_rate": 3.107648042049502e-06, "loss": 0.0514, "step": 45040 }, { "epoch": 0.7976552572974361, "grad_norm": 0.6740787625312805, "learning_rate": 3.107123711240864e-06, "loss": 0.0644, "step": 45041 }, { "epoch": 0.7976729668344645, "grad_norm": 0.5773966312408447, "learning_rate": 3.1065994195585815e-06, "loss": 0.0635, "step": 45042 }, { "epoch": 0.7976906763714929, "grad_norm": 0.8276574015617371, "learning_rate": 3.1060751670043817e-06, "loss": 0.0463, "step": 45043 }, { "epoch": 0.7977083859085213, "grad_norm": 0.5257477164268494, "learning_rate": 3.105550953579998e-06, "loss": 0.0734, "step": 45044 }, { "epoch": 0.7977260954455498, "grad_norm": 0.7152711749076843, "learning_rate": 3.1050267792871456e-06, "loss": 0.0445, "step": 45045 }, { "epoch": 0.7977438049825782, "grad_norm": 0.6926034092903137, "learning_rate": 3.1045026441275517e-06, "loss": 0.0799, "step": 45046 }, { "epoch": 0.7977615145196066, "grad_norm": 0.6312620043754578, "learning_rate": 3.1039785481029407e-06, "loss": 0.0575, "step": 45047 }, { "epoch": 0.7977792240566352, "grad_norm": 0.5168431997299194, "learning_rate": 3.1034544912150424e-06, "loss": 0.052, "step": 45048 }, { "epoch": 0.7977969335936635, "grad_norm": 0.6606345772743225, "learning_rate": 3.10293047346557e-06, "loss": 0.0584, "step": 45049 }, { "epoch": 0.797814643130692, "grad_norm": 0.5302982330322266, "learning_rate": 3.1024064948562535e-06, "loss": 0.0679, "step": 45050 }, { "epoch": 0.7978323526677203, "grad_norm": 0.4514492154121399, "learning_rate": 3.1018825553888167e-06, "loss": 0.0932, "step": 45051 }, { "epoch": 0.7978500622047489, "grad_norm": 0.5652280449867249, "learning_rate": 3.101358655064982e-06, "loss": 0.0616, "step": 45052 }, { "epoch": 0.7978677717417773, "grad_norm": 0.4742867946624756, "learning_rate": 3.100834793886474e-06, "loss": 0.0683, "step": 45053 }, { "epoch": 0.7978854812788057, "grad_norm": 0.48346975445747375, "learning_rate": 3.1003109718550165e-06, "loss": 0.0549, "step": 45054 }, { "epoch": 0.797903190815834, "grad_norm": 0.45282942056655884, "learning_rate": 3.0997871889723367e-06, "loss": 0.0658, "step": 45055 }, { "epoch": 0.7979209003528626, "grad_norm": 0.6749972105026245, "learning_rate": 3.0992634452401482e-06, "loss": 0.0537, "step": 45056 }, { "epoch": 0.797938609889891, "grad_norm": 0.8849111199378967, "learning_rate": 3.0987397406601807e-06, "loss": 0.071, "step": 45057 }, { "epoch": 0.7979563194269194, "grad_norm": 0.3801725208759308, "learning_rate": 3.0982160752341536e-06, "loss": 0.0575, "step": 45058 }, { "epoch": 0.7979740289639478, "grad_norm": 0.3519551455974579, "learning_rate": 3.0976924489637987e-06, "loss": 0.0445, "step": 45059 }, { "epoch": 0.7979917385009763, "grad_norm": 0.6305126547813416, "learning_rate": 3.0971688618508257e-06, "loss": 0.0565, "step": 45060 }, { "epoch": 0.7980094480380047, "grad_norm": 0.33040592074394226, "learning_rate": 3.0966453138969643e-06, "loss": 0.0597, "step": 45061 }, { "epoch": 0.7980271575750331, "grad_norm": 0.4376365542411804, "learning_rate": 3.0961218051039353e-06, "loss": 0.0503, "step": 45062 }, { "epoch": 0.7980448671120616, "grad_norm": 0.37637075781822205, "learning_rate": 3.0955983354734614e-06, "loss": 0.031, "step": 45063 }, { "epoch": 0.79806257664909, "grad_norm": 0.951137125492096, "learning_rate": 3.095074905007264e-06, "loss": 0.0604, "step": 45064 }, { "epoch": 0.7980802861861184, "grad_norm": 1.2402005195617676, "learning_rate": 3.0945515137070673e-06, "loss": 0.1016, "step": 45065 }, { "epoch": 0.7980979957231468, "grad_norm": 0.8977901339530945, "learning_rate": 3.094028161574596e-06, "loss": 0.0421, "step": 45066 }, { "epoch": 0.7981157052601753, "grad_norm": 0.17582391202449799, "learning_rate": 3.0935048486115645e-06, "loss": 0.0378, "step": 45067 }, { "epoch": 0.7981334147972037, "grad_norm": 0.5801123380661011, "learning_rate": 3.092981574819696e-06, "loss": 0.069, "step": 45068 }, { "epoch": 0.7981511243342321, "grad_norm": 0.42379230260849, "learning_rate": 3.092458340200719e-06, "loss": 0.0339, "step": 45069 }, { "epoch": 0.7981688338712605, "grad_norm": 0.5137698650360107, "learning_rate": 3.0919351447563454e-06, "loss": 0.0255, "step": 45070 }, { "epoch": 0.798186543408289, "grad_norm": 1.0893760919570923, "learning_rate": 3.0914119884883005e-06, "loss": 0.0963, "step": 45071 }, { "epoch": 0.7982042529453174, "grad_norm": 0.4318768382072449, "learning_rate": 3.090888871398302e-06, "loss": 0.0465, "step": 45072 }, { "epoch": 0.7982219624823458, "grad_norm": 0.47574353218078613, "learning_rate": 3.0903657934880824e-06, "loss": 0.0545, "step": 45073 }, { "epoch": 0.7982396720193742, "grad_norm": 0.5196735262870789, "learning_rate": 3.089842754759351e-06, "loss": 0.0605, "step": 45074 }, { "epoch": 0.7982573815564027, "grad_norm": 0.7122237086296082, "learning_rate": 3.089319755213833e-06, "loss": 0.0519, "step": 45075 }, { "epoch": 0.7982750910934311, "grad_norm": 0.48863154649734497, "learning_rate": 3.088796794853252e-06, "loss": 0.0514, "step": 45076 }, { "epoch": 0.7982928006304595, "grad_norm": 0.5603354573249817, "learning_rate": 3.0882738736793197e-06, "loss": 0.0463, "step": 45077 }, { "epoch": 0.798310510167488, "grad_norm": 1.0634355545043945, "learning_rate": 3.087750991693762e-06, "loss": 0.0867, "step": 45078 }, { "epoch": 0.7983282197045164, "grad_norm": 0.8330254554748535, "learning_rate": 3.0872281488982982e-06, "loss": 0.0584, "step": 45079 }, { "epoch": 0.7983459292415448, "grad_norm": 0.4446856677532196, "learning_rate": 3.0867053452946496e-06, "loss": 0.0257, "step": 45080 }, { "epoch": 0.7983636387785732, "grad_norm": 0.4797251522541046, "learning_rate": 3.0861825808845363e-06, "loss": 0.0807, "step": 45081 }, { "epoch": 0.7983813483156017, "grad_norm": 0.598018229007721, "learning_rate": 3.0856598556696762e-06, "loss": 0.0518, "step": 45082 }, { "epoch": 0.7983990578526301, "grad_norm": 0.6489649415016174, "learning_rate": 3.085137169651794e-06, "loss": 0.0705, "step": 45083 }, { "epoch": 0.7984167673896585, "grad_norm": 0.5379260778427124, "learning_rate": 3.084614522832602e-06, "loss": 0.0529, "step": 45084 }, { "epoch": 0.7984344769266869, "grad_norm": 0.7507902383804321, "learning_rate": 3.084091915213822e-06, "loss": 0.0562, "step": 45085 }, { "epoch": 0.7984521864637154, "grad_norm": 0.567862331867218, "learning_rate": 3.083569346797175e-06, "loss": 0.052, "step": 45086 }, { "epoch": 0.7984698960007438, "grad_norm": 0.8523980975151062, "learning_rate": 3.0830468175843844e-06, "loss": 0.1052, "step": 45087 }, { "epoch": 0.7984876055377722, "grad_norm": 0.796926736831665, "learning_rate": 3.0825243275771598e-06, "loss": 0.0691, "step": 45088 }, { "epoch": 0.7985053150748006, "grad_norm": 0.6121119856834412, "learning_rate": 3.082001876777224e-06, "loss": 0.0403, "step": 45089 }, { "epoch": 0.7985230246118291, "grad_norm": 0.329316645860672, "learning_rate": 3.0814794651862977e-06, "loss": 0.0361, "step": 45090 }, { "epoch": 0.7985407341488575, "grad_norm": 0.5755317211151123, "learning_rate": 3.0809570928060976e-06, "loss": 0.0553, "step": 45091 }, { "epoch": 0.7985584436858859, "grad_norm": 0.42004522681236267, "learning_rate": 3.080434759638344e-06, "loss": 0.0548, "step": 45092 }, { "epoch": 0.7985761532229144, "grad_norm": 0.8060662746429443, "learning_rate": 3.0799124656847543e-06, "loss": 0.0829, "step": 45093 }, { "epoch": 0.7985938627599428, "grad_norm": 0.6699246168136597, "learning_rate": 3.07939021094705e-06, "loss": 0.0604, "step": 45094 }, { "epoch": 0.7986115722969712, "grad_norm": 0.9711791276931763, "learning_rate": 3.0788679954269435e-06, "loss": 0.095, "step": 45095 }, { "epoch": 0.7986292818339996, "grad_norm": 0.46569281816482544, "learning_rate": 3.0783458191261542e-06, "loss": 0.0646, "step": 45096 }, { "epoch": 0.7986469913710281, "grad_norm": 0.440815806388855, "learning_rate": 3.077823682046402e-06, "loss": 0.0536, "step": 45097 }, { "epoch": 0.7986647009080565, "grad_norm": 0.3571377694606781, "learning_rate": 3.0773015841894093e-06, "loss": 0.0305, "step": 45098 }, { "epoch": 0.7986824104450849, "grad_norm": 0.6326462030410767, "learning_rate": 3.076779525556882e-06, "loss": 0.0373, "step": 45099 }, { "epoch": 0.7987001199821133, "grad_norm": 0.6431551575660706, "learning_rate": 3.0762575061505456e-06, "loss": 0.0549, "step": 45100 }, { "epoch": 0.7987178295191418, "grad_norm": 0.7932260632514954, "learning_rate": 3.0757355259721165e-06, "loss": 0.0702, "step": 45101 }, { "epoch": 0.7987355390561702, "grad_norm": 0.5728862285614014, "learning_rate": 3.0752135850233106e-06, "loss": 0.0532, "step": 45102 }, { "epoch": 0.7987532485931986, "grad_norm": 0.43039458990097046, "learning_rate": 3.0746916833058458e-06, "loss": 0.0703, "step": 45103 }, { "epoch": 0.798770958130227, "grad_norm": 0.7997796535491943, "learning_rate": 3.0741698208214398e-06, "loss": 0.0489, "step": 45104 }, { "epoch": 0.7987886676672555, "grad_norm": 0.7237006425857544, "learning_rate": 3.0736479975718125e-06, "loss": 0.0526, "step": 45105 }, { "epoch": 0.7988063772042839, "grad_norm": 0.47247323393821716, "learning_rate": 3.0731262135586724e-06, "loss": 0.0513, "step": 45106 }, { "epoch": 0.7988240867413123, "grad_norm": 0.42325806617736816, "learning_rate": 3.072604468783742e-06, "loss": 0.0324, "step": 45107 }, { "epoch": 0.7988417962783408, "grad_norm": 0.8989501595497131, "learning_rate": 3.0720827632487393e-06, "loss": 0.0632, "step": 45108 }, { "epoch": 0.7988595058153692, "grad_norm": 0.6284228563308716, "learning_rate": 3.0715610969553696e-06, "loss": 0.0621, "step": 45109 }, { "epoch": 0.7988772153523976, "grad_norm": 1.090577483177185, "learning_rate": 3.0710394699053624e-06, "loss": 0.0856, "step": 45110 }, { "epoch": 0.798894924889426, "grad_norm": 0.44001305103302, "learning_rate": 3.070517882100429e-06, "loss": 0.0387, "step": 45111 }, { "epoch": 0.7989126344264545, "grad_norm": 0.5631183981895447, "learning_rate": 3.069996333542289e-06, "loss": 0.0429, "step": 45112 }, { "epoch": 0.798930343963483, "grad_norm": 0.6909546852111816, "learning_rate": 3.069474824232651e-06, "loss": 0.0762, "step": 45113 }, { "epoch": 0.7989480535005113, "grad_norm": 1.007004976272583, "learning_rate": 3.0689533541732324e-06, "loss": 0.0861, "step": 45114 }, { "epoch": 0.7989657630375397, "grad_norm": 0.45677119493484497, "learning_rate": 3.0684319233657556e-06, "loss": 0.0634, "step": 45115 }, { "epoch": 0.7989834725745683, "grad_norm": 0.6348432898521423, "learning_rate": 3.0679105318119273e-06, "loss": 0.0657, "step": 45116 }, { "epoch": 0.7990011821115967, "grad_norm": 0.7388585805892944, "learning_rate": 3.067389179513466e-06, "loss": 0.0671, "step": 45117 }, { "epoch": 0.799018891648625, "grad_norm": 0.5397300124168396, "learning_rate": 3.066867866472086e-06, "loss": 0.0421, "step": 45118 }, { "epoch": 0.7990366011856535, "grad_norm": 0.8148885369300842, "learning_rate": 3.0663465926895058e-06, "loss": 0.0806, "step": 45119 }, { "epoch": 0.799054310722682, "grad_norm": 0.8216362595558167, "learning_rate": 3.0658253581674383e-06, "loss": 0.0634, "step": 45120 }, { "epoch": 0.7990720202597104, "grad_norm": 0.7057226300239563, "learning_rate": 3.0653041629075977e-06, "loss": 0.0632, "step": 45121 }, { "epoch": 0.7990897297967388, "grad_norm": 0.5181028842926025, "learning_rate": 3.064783006911698e-06, "loss": 0.0403, "step": 45122 }, { "epoch": 0.7991074393337673, "grad_norm": 0.5535010695457458, "learning_rate": 3.06426189018146e-06, "loss": 0.0542, "step": 45123 }, { "epoch": 0.7991251488707957, "grad_norm": 0.940350353717804, "learning_rate": 3.06374081271859e-06, "loss": 0.085, "step": 45124 }, { "epoch": 0.7991428584078241, "grad_norm": 0.4838557243347168, "learning_rate": 3.0632197745248048e-06, "loss": 0.0565, "step": 45125 }, { "epoch": 0.7991605679448525, "grad_norm": 0.46142658591270447, "learning_rate": 3.0626987756018243e-06, "loss": 0.0502, "step": 45126 }, { "epoch": 0.799178277481881, "grad_norm": 0.5049256682395935, "learning_rate": 3.0621778159513507e-06, "loss": 0.0605, "step": 45127 }, { "epoch": 0.7991959870189094, "grad_norm": 0.3272485136985779, "learning_rate": 3.0616568955751077e-06, "loss": 0.0443, "step": 45128 }, { "epoch": 0.7992136965559378, "grad_norm": 0.3531924784183502, "learning_rate": 3.061136014474804e-06, "loss": 0.0491, "step": 45129 }, { "epoch": 0.7992314060929662, "grad_norm": 0.4565275013446808, "learning_rate": 3.0606151726521564e-06, "loss": 0.0511, "step": 45130 }, { "epoch": 0.7992491156299947, "grad_norm": 0.7534860968589783, "learning_rate": 3.060094370108877e-06, "loss": 0.0473, "step": 45131 }, { "epoch": 0.7992668251670231, "grad_norm": 0.5390415787696838, "learning_rate": 3.059573606846679e-06, "loss": 0.062, "step": 45132 }, { "epoch": 0.7992845347040515, "grad_norm": 0.4210900068283081, "learning_rate": 3.0590528828672796e-06, "loss": 0.047, "step": 45133 }, { "epoch": 0.7993022442410799, "grad_norm": 0.5688080191612244, "learning_rate": 3.058532198172386e-06, "loss": 0.0449, "step": 45134 }, { "epoch": 0.7993199537781084, "grad_norm": 0.4924165606498718, "learning_rate": 3.058011552763714e-06, "loss": 0.0598, "step": 45135 }, { "epoch": 0.7993376633151368, "grad_norm": 0.8789056539535522, "learning_rate": 3.057490946642974e-06, "loss": 0.0701, "step": 45136 }, { "epoch": 0.7993553728521652, "grad_norm": 0.820085346698761, "learning_rate": 3.0569703798118857e-06, "loss": 0.0494, "step": 45137 }, { "epoch": 0.7993730823891937, "grad_norm": 0.8675400018692017, "learning_rate": 3.0564498522721496e-06, "loss": 0.0656, "step": 45138 }, { "epoch": 0.7993907919262221, "grad_norm": 0.2314525693655014, "learning_rate": 3.0559293640254885e-06, "loss": 0.0591, "step": 45139 }, { "epoch": 0.7994085014632505, "grad_norm": 0.550815224647522, "learning_rate": 3.055408915073617e-06, "loss": 0.0283, "step": 45140 }, { "epoch": 0.7994262110002789, "grad_norm": 0.5589426755905151, "learning_rate": 3.0548885054182364e-06, "loss": 0.054, "step": 45141 }, { "epoch": 0.7994439205373074, "grad_norm": 0.558373749256134, "learning_rate": 3.054368135061065e-06, "loss": 0.0395, "step": 45142 }, { "epoch": 0.7994616300743358, "grad_norm": 0.4779353141784668, "learning_rate": 3.053847804003816e-06, "loss": 0.0677, "step": 45143 }, { "epoch": 0.7994793396113642, "grad_norm": 0.4640592932701111, "learning_rate": 3.0533275122482017e-06, "loss": 0.0274, "step": 45144 }, { "epoch": 0.7994970491483926, "grad_norm": 0.6495709419250488, "learning_rate": 3.0528072597959268e-06, "loss": 0.0742, "step": 45145 }, { "epoch": 0.7995147586854211, "grad_norm": 0.5880916714668274, "learning_rate": 3.052287046648709e-06, "loss": 0.0471, "step": 45146 }, { "epoch": 0.7995324682224495, "grad_norm": 0.6313149333000183, "learning_rate": 3.0517668728082577e-06, "loss": 0.0452, "step": 45147 }, { "epoch": 0.7995501777594779, "grad_norm": 0.46554145216941833, "learning_rate": 3.051246738276286e-06, "loss": 0.0314, "step": 45148 }, { "epoch": 0.7995678872965063, "grad_norm": 0.3291601240634918, "learning_rate": 3.050726643054503e-06, "loss": 0.0379, "step": 45149 }, { "epoch": 0.7995855968335348, "grad_norm": 0.3345106840133667, "learning_rate": 3.05020658714462e-06, "loss": 0.0635, "step": 45150 }, { "epoch": 0.7996033063705632, "grad_norm": 0.45016369223594666, "learning_rate": 3.0496865705483547e-06, "loss": 0.0548, "step": 45151 }, { "epoch": 0.7996210159075916, "grad_norm": 0.8698559999465942, "learning_rate": 3.049166593267407e-06, "loss": 0.0777, "step": 45152 }, { "epoch": 0.7996387254446201, "grad_norm": 0.577263355255127, "learning_rate": 3.0486466553034918e-06, "loss": 0.0652, "step": 45153 }, { "epoch": 0.7996564349816485, "grad_norm": 0.2814229726791382, "learning_rate": 3.0481267566583205e-06, "loss": 0.0456, "step": 45154 }, { "epoch": 0.7996741445186769, "grad_norm": 0.29411181807518005, "learning_rate": 3.0476068973336075e-06, "loss": 0.0555, "step": 45155 }, { "epoch": 0.7996918540557053, "grad_norm": 0.29244667291641235, "learning_rate": 3.047087077331055e-06, "loss": 0.0478, "step": 45156 }, { "epoch": 0.7997095635927338, "grad_norm": 1.220672845840454, "learning_rate": 3.0465672966523764e-06, "loss": 0.0983, "step": 45157 }, { "epoch": 0.7997272731297622, "grad_norm": 0.7532972097396851, "learning_rate": 3.0460475552992847e-06, "loss": 0.0617, "step": 45158 }, { "epoch": 0.7997449826667906, "grad_norm": 0.8413004279136658, "learning_rate": 3.0455278532734854e-06, "loss": 0.0602, "step": 45159 }, { "epoch": 0.799762692203819, "grad_norm": 0.5475572347640991, "learning_rate": 3.0450081905766896e-06, "loss": 0.0574, "step": 45160 }, { "epoch": 0.7997804017408475, "grad_norm": 0.24048052728176117, "learning_rate": 3.044488567210609e-06, "loss": 0.0514, "step": 45161 }, { "epoch": 0.7997981112778759, "grad_norm": 0.5687240958213806, "learning_rate": 3.0439689831769564e-06, "loss": 0.0656, "step": 45162 }, { "epoch": 0.7998158208149043, "grad_norm": 0.5594530701637268, "learning_rate": 3.043449438477431e-06, "loss": 0.0491, "step": 45163 }, { "epoch": 0.7998335303519327, "grad_norm": 0.45578691363334656, "learning_rate": 3.0429299331137482e-06, "loss": 0.0625, "step": 45164 }, { "epoch": 0.7998512398889612, "grad_norm": 0.8779398202896118, "learning_rate": 3.0424104670876207e-06, "loss": 0.0693, "step": 45165 }, { "epoch": 0.7998689494259896, "grad_norm": 0.5360286831855774, "learning_rate": 3.04189104040075e-06, "loss": 0.056, "step": 45166 }, { "epoch": 0.799886658963018, "grad_norm": 0.5560413002967834, "learning_rate": 3.041371653054843e-06, "loss": 0.0752, "step": 45167 }, { "epoch": 0.7999043685000465, "grad_norm": 0.5688595175743103, "learning_rate": 3.0408523050516187e-06, "loss": 0.05, "step": 45168 }, { "epoch": 0.7999220780370749, "grad_norm": 0.7965107560157776, "learning_rate": 3.040332996392785e-06, "loss": 0.0481, "step": 45169 }, { "epoch": 0.7999397875741033, "grad_norm": 0.6480915546417236, "learning_rate": 3.0398137270800413e-06, "loss": 0.0677, "step": 45170 }, { "epoch": 0.7999574971111317, "grad_norm": 0.8054249882698059, "learning_rate": 3.039294497115101e-06, "loss": 0.0697, "step": 45171 }, { "epoch": 0.7999752066481602, "grad_norm": 0.768364667892456, "learning_rate": 3.0387753064996758e-06, "loss": 0.0549, "step": 45172 }, { "epoch": 0.7999929161851886, "grad_norm": 0.5882138013839722, "learning_rate": 3.0382561552354663e-06, "loss": 0.0502, "step": 45173 }, { "epoch": 0.800010625722217, "grad_norm": 0.7079553008079529, "learning_rate": 3.037737043324183e-06, "loss": 0.0423, "step": 45174 }, { "epoch": 0.8000283352592454, "grad_norm": 0.6386595964431763, "learning_rate": 3.0372179707675356e-06, "loss": 0.0551, "step": 45175 }, { "epoch": 0.800046044796274, "grad_norm": 0.650870680809021, "learning_rate": 3.0366989375672316e-06, "loss": 0.0753, "step": 45176 }, { "epoch": 0.8000637543333023, "grad_norm": 0.5179327130317688, "learning_rate": 3.0361799437249793e-06, "loss": 0.0842, "step": 45177 }, { "epoch": 0.8000814638703307, "grad_norm": 1.0721337795257568, "learning_rate": 3.0356609892424825e-06, "loss": 0.0886, "step": 45178 }, { "epoch": 0.8000991734073591, "grad_norm": 0.45470330119132996, "learning_rate": 3.0351420741214564e-06, "loss": 0.0597, "step": 45179 }, { "epoch": 0.8001168829443877, "grad_norm": 0.4436695873737335, "learning_rate": 3.034623198363598e-06, "loss": 0.0491, "step": 45180 }, { "epoch": 0.800134592481416, "grad_norm": 0.7914126515388489, "learning_rate": 3.0341043619706192e-06, "loss": 0.0543, "step": 45181 }, { "epoch": 0.8001523020184445, "grad_norm": 0.5512085556983948, "learning_rate": 3.0335855649442272e-06, "loss": 0.0501, "step": 45182 }, { "epoch": 0.800170011555473, "grad_norm": 0.49581223726272583, "learning_rate": 3.033066807286132e-06, "loss": 0.0444, "step": 45183 }, { "epoch": 0.8001877210925014, "grad_norm": 1.0966886281967163, "learning_rate": 3.032548088998033e-06, "loss": 0.0547, "step": 45184 }, { "epoch": 0.8002054306295298, "grad_norm": 0.33377909660339355, "learning_rate": 3.032029410081641e-06, "loss": 0.0489, "step": 45185 }, { "epoch": 0.8002231401665582, "grad_norm": 0.2028859406709671, "learning_rate": 3.03151077053866e-06, "loss": 0.0662, "step": 45186 }, { "epoch": 0.8002408497035867, "grad_norm": 0.5464462637901306, "learning_rate": 3.0309921703707993e-06, "loss": 0.0663, "step": 45187 }, { "epoch": 0.8002585592406151, "grad_norm": 0.5431678295135498, "learning_rate": 3.0304736095797635e-06, "loss": 0.0513, "step": 45188 }, { "epoch": 0.8002762687776435, "grad_norm": 1.0838561058044434, "learning_rate": 3.0299550881672584e-06, "loss": 0.1074, "step": 45189 }, { "epoch": 0.8002939783146719, "grad_norm": 0.8602533340454102, "learning_rate": 3.029436606134994e-06, "loss": 0.0509, "step": 45190 }, { "epoch": 0.8003116878517004, "grad_norm": 0.41054803133010864, "learning_rate": 3.0289181634846704e-06, "loss": 0.043, "step": 45191 }, { "epoch": 0.8003293973887288, "grad_norm": 1.0307023525238037, "learning_rate": 3.028399760217993e-06, "loss": 0.0803, "step": 45192 }, { "epoch": 0.8003471069257572, "grad_norm": 0.4489717483520508, "learning_rate": 3.0278813963366707e-06, "loss": 0.0671, "step": 45193 }, { "epoch": 0.8003648164627856, "grad_norm": 0.6062517166137695, "learning_rate": 3.0273630718424107e-06, "loss": 0.0519, "step": 45194 }, { "epoch": 0.8003825259998141, "grad_norm": 1.0375574827194214, "learning_rate": 3.0268447867369127e-06, "loss": 0.086, "step": 45195 }, { "epoch": 0.8004002355368425, "grad_norm": 0.5775659680366516, "learning_rate": 3.0263265410218786e-06, "loss": 0.0467, "step": 45196 }, { "epoch": 0.8004179450738709, "grad_norm": 0.7378129959106445, "learning_rate": 3.0258083346990295e-06, "loss": 0.0579, "step": 45197 }, { "epoch": 0.8004356546108994, "grad_norm": 0.5553766489028931, "learning_rate": 3.0252901677700534e-06, "loss": 0.0597, "step": 45198 }, { "epoch": 0.8004533641479278, "grad_norm": 1.1711233854293823, "learning_rate": 3.0247720402366623e-06, "loss": 0.0778, "step": 45199 }, { "epoch": 0.8004710736849562, "grad_norm": 0.5066717863082886, "learning_rate": 3.0242539521005604e-06, "loss": 0.0503, "step": 45200 }, { "epoch": 0.8004887832219846, "grad_norm": 0.47981375455856323, "learning_rate": 3.0237359033634553e-06, "loss": 0.0753, "step": 45201 }, { "epoch": 0.8005064927590131, "grad_norm": 0.69004225730896, "learning_rate": 3.023217894027045e-06, "loss": 0.0658, "step": 45202 }, { "epoch": 0.8005242022960415, "grad_norm": 0.3840002119541168, "learning_rate": 3.022699924093036e-06, "loss": 0.0805, "step": 45203 }, { "epoch": 0.8005419118330699, "grad_norm": 0.6376561522483826, "learning_rate": 3.022181993563135e-06, "loss": 0.0594, "step": 45204 }, { "epoch": 0.8005596213700983, "grad_norm": 0.36542484164237976, "learning_rate": 3.0216641024390377e-06, "loss": 0.0444, "step": 45205 }, { "epoch": 0.8005773309071268, "grad_norm": 0.7111550569534302, "learning_rate": 3.021146250722458e-06, "loss": 0.0858, "step": 45206 }, { "epoch": 0.8005950404441552, "grad_norm": 0.4454934000968933, "learning_rate": 3.020628438415094e-06, "loss": 0.047, "step": 45207 }, { "epoch": 0.8006127499811836, "grad_norm": 0.6481375694274902, "learning_rate": 3.020110665518656e-06, "loss": 0.0634, "step": 45208 }, { "epoch": 0.800630459518212, "grad_norm": 0.5508087873458862, "learning_rate": 3.019592932034838e-06, "loss": 0.0487, "step": 45209 }, { "epoch": 0.8006481690552405, "grad_norm": 0.7392883896827698, "learning_rate": 3.019075237965347e-06, "loss": 0.0462, "step": 45210 }, { "epoch": 0.8006658785922689, "grad_norm": 0.6821446418762207, "learning_rate": 3.0185575833118913e-06, "loss": 0.0502, "step": 45211 }, { "epoch": 0.8006835881292973, "grad_norm": 0.500022828578949, "learning_rate": 3.0180399680761647e-06, "loss": 0.0271, "step": 45212 }, { "epoch": 0.8007012976663258, "grad_norm": 0.5216127038002014, "learning_rate": 3.0175223922598755e-06, "loss": 0.0302, "step": 45213 }, { "epoch": 0.8007190072033542, "grad_norm": 0.3068896532058716, "learning_rate": 3.0170048558647245e-06, "loss": 0.0301, "step": 45214 }, { "epoch": 0.8007367167403826, "grad_norm": 0.5901472568511963, "learning_rate": 3.016487358892417e-06, "loss": 0.0442, "step": 45215 }, { "epoch": 0.800754426277411, "grad_norm": 0.609842836856842, "learning_rate": 3.015969901344653e-06, "loss": 0.0424, "step": 45216 }, { "epoch": 0.8007721358144395, "grad_norm": 0.4504644572734833, "learning_rate": 3.0154524832231363e-06, "loss": 0.0573, "step": 45217 }, { "epoch": 0.8007898453514679, "grad_norm": 0.4178594946861267, "learning_rate": 3.014935104529572e-06, "loss": 0.0546, "step": 45218 }, { "epoch": 0.8008075548884963, "grad_norm": 0.42241013050079346, "learning_rate": 3.0144177652656556e-06, "loss": 0.0464, "step": 45219 }, { "epoch": 0.8008252644255247, "grad_norm": 0.6912882924079895, "learning_rate": 3.0139004654330926e-06, "loss": 0.0402, "step": 45220 }, { "epoch": 0.8008429739625532, "grad_norm": 0.8874813914299011, "learning_rate": 3.0133832050335846e-06, "loss": 0.0638, "step": 45221 }, { "epoch": 0.8008606834995816, "grad_norm": 0.5633947849273682, "learning_rate": 3.012865984068839e-06, "loss": 0.0471, "step": 45222 }, { "epoch": 0.80087839303661, "grad_norm": 0.5966850519180298, "learning_rate": 3.0123488025405457e-06, "loss": 0.0397, "step": 45223 }, { "epoch": 0.8008961025736384, "grad_norm": 0.3148414194583893, "learning_rate": 3.0118316604504133e-06, "loss": 0.0382, "step": 45224 }, { "epoch": 0.8009138121106669, "grad_norm": 0.31447097659111023, "learning_rate": 3.011314557800137e-06, "loss": 0.0662, "step": 45225 }, { "epoch": 0.8009315216476953, "grad_norm": 1.0397403240203857, "learning_rate": 3.0107974945914328e-06, "loss": 0.0506, "step": 45226 }, { "epoch": 0.8009492311847237, "grad_norm": 0.8002462983131409, "learning_rate": 3.0102804708259875e-06, "loss": 0.0763, "step": 45227 }, { "epoch": 0.8009669407217522, "grad_norm": 0.5704530477523804, "learning_rate": 3.0097634865055074e-06, "loss": 0.0438, "step": 45228 }, { "epoch": 0.8009846502587806, "grad_norm": 0.7184032201766968, "learning_rate": 3.009246541631695e-06, "loss": 0.0666, "step": 45229 }, { "epoch": 0.801002359795809, "grad_norm": 0.33514630794525146, "learning_rate": 3.008729636206247e-06, "loss": 0.068, "step": 45230 }, { "epoch": 0.8010200693328374, "grad_norm": 0.4136456549167633, "learning_rate": 3.008212770230864e-06, "loss": 0.0394, "step": 45231 }, { "epoch": 0.8010377788698659, "grad_norm": 0.3761442005634308, "learning_rate": 3.007695943707248e-06, "loss": 0.0324, "step": 45232 }, { "epoch": 0.8010554884068943, "grad_norm": 0.6743121147155762, "learning_rate": 3.0071791566371043e-06, "loss": 0.0733, "step": 45233 }, { "epoch": 0.8010731979439227, "grad_norm": 0.39913707971572876, "learning_rate": 3.0066624090221214e-06, "loss": 0.0412, "step": 45234 }, { "epoch": 0.8010909074809511, "grad_norm": 0.7449397444725037, "learning_rate": 3.0061457008640076e-06, "loss": 0.0777, "step": 45235 }, { "epoch": 0.8011086170179796, "grad_norm": 0.6121917963027954, "learning_rate": 3.005629032164467e-06, "loss": 0.069, "step": 45236 }, { "epoch": 0.801126326555008, "grad_norm": 0.6451423168182373, "learning_rate": 3.005112402925188e-06, "loss": 0.0274, "step": 45237 }, { "epoch": 0.8011440360920364, "grad_norm": 0.584406316280365, "learning_rate": 3.004595813147878e-06, "loss": 0.0148, "step": 45238 }, { "epoch": 0.801161745629065, "grad_norm": 0.7787269949913025, "learning_rate": 3.0040792628342336e-06, "loss": 0.0884, "step": 45239 }, { "epoch": 0.8011794551660933, "grad_norm": 0.49994927644729614, "learning_rate": 3.0035627519859582e-06, "loss": 0.0607, "step": 45240 }, { "epoch": 0.8011971647031217, "grad_norm": 0.40453264117240906, "learning_rate": 3.003046280604746e-06, "loss": 0.068, "step": 45241 }, { "epoch": 0.8012148742401501, "grad_norm": 0.8067995309829712, "learning_rate": 3.002529848692297e-06, "loss": 0.0773, "step": 45242 }, { "epoch": 0.8012325837771787, "grad_norm": 0.7982277274131775, "learning_rate": 3.00201345625031e-06, "loss": 0.0646, "step": 45243 }, { "epoch": 0.801250293314207, "grad_norm": 0.5509628057479858, "learning_rate": 3.0014971032804867e-06, "loss": 0.0412, "step": 45244 }, { "epoch": 0.8012680028512355, "grad_norm": 0.37411025166511536, "learning_rate": 3.0009807897845243e-06, "loss": 0.0465, "step": 45245 }, { "epoch": 0.8012857123882638, "grad_norm": 0.5372641682624817, "learning_rate": 3.0004645157641216e-06, "loss": 0.0715, "step": 45246 }, { "epoch": 0.8013034219252924, "grad_norm": 0.3802954852581024, "learning_rate": 2.99994828122098e-06, "loss": 0.0372, "step": 45247 }, { "epoch": 0.8013211314623208, "grad_norm": 0.5471781492233276, "learning_rate": 2.999432086156792e-06, "loss": 0.0521, "step": 45248 }, { "epoch": 0.8013388409993492, "grad_norm": 0.6746155023574829, "learning_rate": 2.9989159305732577e-06, "loss": 0.0467, "step": 45249 }, { "epoch": 0.8013565505363776, "grad_norm": 0.599399745464325, "learning_rate": 2.998399814472081e-06, "loss": 0.0577, "step": 45250 }, { "epoch": 0.8013742600734061, "grad_norm": 0.8712067604064941, "learning_rate": 2.99788373785495e-06, "loss": 0.0405, "step": 45251 }, { "epoch": 0.8013919696104345, "grad_norm": 0.8329373002052307, "learning_rate": 2.997367700723569e-06, "loss": 0.0636, "step": 45252 }, { "epoch": 0.8014096791474629, "grad_norm": 0.5160486698150635, "learning_rate": 2.996851703079633e-06, "loss": 0.0509, "step": 45253 }, { "epoch": 0.8014273886844914, "grad_norm": 0.5118986368179321, "learning_rate": 2.996335744924842e-06, "loss": 0.0349, "step": 45254 }, { "epoch": 0.8014450982215198, "grad_norm": 0.19577637314796448, "learning_rate": 2.9958198262608915e-06, "loss": 0.0281, "step": 45255 }, { "epoch": 0.8014628077585482, "grad_norm": 0.8487849235534668, "learning_rate": 2.9953039470894805e-06, "loss": 0.0559, "step": 45256 }, { "epoch": 0.8014805172955766, "grad_norm": 0.7071772217750549, "learning_rate": 2.9947881074123075e-06, "loss": 0.0502, "step": 45257 }, { "epoch": 0.8014982268326051, "grad_norm": 0.429997056722641, "learning_rate": 2.994272307231064e-06, "loss": 0.0621, "step": 45258 }, { "epoch": 0.8015159363696335, "grad_norm": 0.7448723912239075, "learning_rate": 2.993756546547451e-06, "loss": 0.0667, "step": 45259 }, { "epoch": 0.8015336459066619, "grad_norm": 0.46820640563964844, "learning_rate": 2.9932408253631647e-06, "loss": 0.0622, "step": 45260 }, { "epoch": 0.8015513554436903, "grad_norm": 0.7084914445877075, "learning_rate": 2.992725143679905e-06, "loss": 0.0701, "step": 45261 }, { "epoch": 0.8015690649807188, "grad_norm": 0.41346099972724915, "learning_rate": 2.9922095014993623e-06, "loss": 0.057, "step": 45262 }, { "epoch": 0.8015867745177472, "grad_norm": 0.555262565612793, "learning_rate": 2.991693898823231e-06, "loss": 0.0686, "step": 45263 }, { "epoch": 0.8016044840547756, "grad_norm": 0.4322456121444702, "learning_rate": 2.991178335653215e-06, "loss": 0.0477, "step": 45264 }, { "epoch": 0.801622193591804, "grad_norm": 0.4090997576713562, "learning_rate": 2.9906628119910128e-06, "loss": 0.0611, "step": 45265 }, { "epoch": 0.8016399031288325, "grad_norm": 0.5888237357139587, "learning_rate": 2.9901473278383113e-06, "loss": 0.0679, "step": 45266 }, { "epoch": 0.8016576126658609, "grad_norm": 0.5445589423179626, "learning_rate": 2.9896318831968105e-06, "loss": 0.047, "step": 45267 }, { "epoch": 0.8016753222028893, "grad_norm": 0.9263833165168762, "learning_rate": 2.9891164780682113e-06, "loss": 0.0703, "step": 45268 }, { "epoch": 0.8016930317399178, "grad_norm": 0.5147146582603455, "learning_rate": 2.9886011124541984e-06, "loss": 0.0617, "step": 45269 }, { "epoch": 0.8017107412769462, "grad_norm": 0.7638052701950073, "learning_rate": 2.9880857863564727e-06, "loss": 0.0638, "step": 45270 }, { "epoch": 0.8017284508139746, "grad_norm": 0.7995033860206604, "learning_rate": 2.9875704997767307e-06, "loss": 0.0603, "step": 45271 }, { "epoch": 0.801746160351003, "grad_norm": 0.7219944596290588, "learning_rate": 2.987055252716666e-06, "loss": 0.0638, "step": 45272 }, { "epoch": 0.8017638698880315, "grad_norm": 0.45678451657295227, "learning_rate": 2.9865400451779743e-06, "loss": 0.0347, "step": 45273 }, { "epoch": 0.8017815794250599, "grad_norm": 1.272503137588501, "learning_rate": 2.9860248771623527e-06, "loss": 0.0644, "step": 45274 }, { "epoch": 0.8017992889620883, "grad_norm": 0.655631959438324, "learning_rate": 2.985509748671496e-06, "loss": 0.0574, "step": 45275 }, { "epoch": 0.8018169984991167, "grad_norm": 0.5049391388893127, "learning_rate": 2.9849946597070934e-06, "loss": 0.0567, "step": 45276 }, { "epoch": 0.8018347080361452, "grad_norm": 0.4357692301273346, "learning_rate": 2.984479610270844e-06, "loss": 0.0378, "step": 45277 }, { "epoch": 0.8018524175731736, "grad_norm": 0.6079186797142029, "learning_rate": 2.9839646003644415e-06, "loss": 0.0499, "step": 45278 }, { "epoch": 0.801870127110202, "grad_norm": 0.7327434420585632, "learning_rate": 2.9834496299895835e-06, "loss": 0.0403, "step": 45279 }, { "epoch": 0.8018878366472304, "grad_norm": 0.5889514684677124, "learning_rate": 2.9829346991479555e-06, "loss": 0.0636, "step": 45280 }, { "epoch": 0.8019055461842589, "grad_norm": 0.5006502270698547, "learning_rate": 2.9824198078412596e-06, "loss": 0.0663, "step": 45281 }, { "epoch": 0.8019232557212873, "grad_norm": 0.5220467448234558, "learning_rate": 2.981904956071184e-06, "loss": 0.0455, "step": 45282 }, { "epoch": 0.8019409652583157, "grad_norm": 0.48543545603752136, "learning_rate": 2.981390143839428e-06, "loss": 0.0703, "step": 45283 }, { "epoch": 0.8019586747953442, "grad_norm": 0.6570428609848022, "learning_rate": 2.9808753711476817e-06, "loss": 0.0489, "step": 45284 }, { "epoch": 0.8019763843323726, "grad_norm": 0.3097597658634186, "learning_rate": 2.98036063799764e-06, "loss": 0.0387, "step": 45285 }, { "epoch": 0.801994093869401, "grad_norm": 0.4785602390766144, "learning_rate": 2.979845944391e-06, "loss": 0.0589, "step": 45286 }, { "epoch": 0.8020118034064294, "grad_norm": 0.9207448959350586, "learning_rate": 2.9793312903294478e-06, "loss": 0.083, "step": 45287 }, { "epoch": 0.8020295129434579, "grad_norm": 0.6516146659851074, "learning_rate": 2.9788166758146795e-06, "loss": 0.043, "step": 45288 }, { "epoch": 0.8020472224804863, "grad_norm": 0.6667888164520264, "learning_rate": 2.9783021008483924e-06, "loss": 0.06, "step": 45289 }, { "epoch": 0.8020649320175147, "grad_norm": 0.24005864560604095, "learning_rate": 2.9777875654322716e-06, "loss": 0.0398, "step": 45290 }, { "epoch": 0.8020826415545431, "grad_norm": 0.6986929178237915, "learning_rate": 2.977273069568013e-06, "loss": 0.0577, "step": 45291 }, { "epoch": 0.8021003510915716, "grad_norm": 0.3199070394039154, "learning_rate": 2.976758613257306e-06, "loss": 0.0355, "step": 45292 }, { "epoch": 0.8021180606286, "grad_norm": 0.5881764888763428, "learning_rate": 2.9762441965018566e-06, "loss": 0.04, "step": 45293 }, { "epoch": 0.8021357701656284, "grad_norm": 0.4511006474494934, "learning_rate": 2.9757298193033416e-06, "loss": 0.054, "step": 45294 }, { "epoch": 0.8021534797026568, "grad_norm": 0.7179754972457886, "learning_rate": 2.9752154816634615e-06, "loss": 0.0544, "step": 45295 }, { "epoch": 0.8021711892396853, "grad_norm": 0.6936531662940979, "learning_rate": 2.9747011835839045e-06, "loss": 0.0481, "step": 45296 }, { "epoch": 0.8021888987767137, "grad_norm": 0.4431154429912567, "learning_rate": 2.9741869250663677e-06, "loss": 0.0492, "step": 45297 }, { "epoch": 0.8022066083137421, "grad_norm": 0.554801881313324, "learning_rate": 2.9736727061125367e-06, "loss": 0.0587, "step": 45298 }, { "epoch": 0.8022243178507706, "grad_norm": 0.45426297187805176, "learning_rate": 2.9731585267241067e-06, "loss": 0.0545, "step": 45299 }, { "epoch": 0.802242027387799, "grad_norm": 0.9338871240615845, "learning_rate": 2.972644386902768e-06, "loss": 0.0614, "step": 45300 }, { "epoch": 0.8022597369248274, "grad_norm": 0.644523024559021, "learning_rate": 2.9721302866502124e-06, "loss": 0.0509, "step": 45301 }, { "epoch": 0.8022774464618558, "grad_norm": 0.6956194043159485, "learning_rate": 2.9716162259681323e-06, "loss": 0.0688, "step": 45302 }, { "epoch": 0.8022951559988843, "grad_norm": 0.5091803669929504, "learning_rate": 2.9711022048582195e-06, "loss": 0.074, "step": 45303 }, { "epoch": 0.8023128655359127, "grad_norm": 0.6469160914421082, "learning_rate": 2.9705882233221653e-06, "loss": 0.0398, "step": 45304 }, { "epoch": 0.8023305750729411, "grad_norm": 0.6907944083213806, "learning_rate": 2.970074281361656e-06, "loss": 0.0659, "step": 45305 }, { "epoch": 0.8023482846099695, "grad_norm": 0.5728580951690674, "learning_rate": 2.9695603789783865e-06, "loss": 0.0546, "step": 45306 }, { "epoch": 0.802365994146998, "grad_norm": 0.3367531895637512, "learning_rate": 2.969046516174049e-06, "loss": 0.0414, "step": 45307 }, { "epoch": 0.8023837036840265, "grad_norm": 0.4980587959289551, "learning_rate": 2.9685326929503282e-06, "loss": 0.074, "step": 45308 }, { "epoch": 0.8024014132210548, "grad_norm": 0.7633894085884094, "learning_rate": 2.968018909308918e-06, "loss": 0.05, "step": 45309 }, { "epoch": 0.8024191227580832, "grad_norm": 0.5205928087234497, "learning_rate": 2.9675051652515095e-06, "loss": 0.0631, "step": 45310 }, { "epoch": 0.8024368322951118, "grad_norm": 1.159716010093689, "learning_rate": 2.9669914607797905e-06, "loss": 0.0546, "step": 45311 }, { "epoch": 0.8024545418321402, "grad_norm": 0.7120755314826965, "learning_rate": 2.966477795895453e-06, "loss": 0.068, "step": 45312 }, { "epoch": 0.8024722513691686, "grad_norm": 0.44811439514160156, "learning_rate": 2.965964170600188e-06, "loss": 0.0408, "step": 45313 }, { "epoch": 0.8024899609061971, "grad_norm": 0.35648924112319946, "learning_rate": 2.9654505848956862e-06, "loss": 0.0398, "step": 45314 }, { "epoch": 0.8025076704432255, "grad_norm": 0.64910489320755, "learning_rate": 2.964937038783631e-06, "loss": 0.0432, "step": 45315 }, { "epoch": 0.8025253799802539, "grad_norm": 0.5403565764427185, "learning_rate": 2.9644235322657153e-06, "loss": 0.0629, "step": 45316 }, { "epoch": 0.8025430895172823, "grad_norm": 0.5878075957298279, "learning_rate": 2.9639100653436295e-06, "loss": 0.0667, "step": 45317 }, { "epoch": 0.8025607990543108, "grad_norm": 0.4674239754676819, "learning_rate": 2.9633966380190656e-06, "loss": 0.0483, "step": 45318 }, { "epoch": 0.8025785085913392, "grad_norm": 0.5381550788879395, "learning_rate": 2.9628832502937058e-06, "loss": 0.0686, "step": 45319 }, { "epoch": 0.8025962181283676, "grad_norm": 0.6181016564369202, "learning_rate": 2.962369902169242e-06, "loss": 0.0614, "step": 45320 }, { "epoch": 0.802613927665396, "grad_norm": 0.46163827180862427, "learning_rate": 2.961856593647364e-06, "loss": 0.0525, "step": 45321 }, { "epoch": 0.8026316372024245, "grad_norm": 0.2379528284072876, "learning_rate": 2.961343324729761e-06, "loss": 0.0579, "step": 45322 }, { "epoch": 0.8026493467394529, "grad_norm": 0.7442343235015869, "learning_rate": 2.96083009541812e-06, "loss": 0.0579, "step": 45323 }, { "epoch": 0.8026670562764813, "grad_norm": 0.6751188635826111, "learning_rate": 2.960316905714131e-06, "loss": 0.0701, "step": 45324 }, { "epoch": 0.8026847658135097, "grad_norm": 0.6323877573013306, "learning_rate": 2.9598037556194845e-06, "loss": 0.0602, "step": 45325 }, { "epoch": 0.8027024753505382, "grad_norm": 0.693261981010437, "learning_rate": 2.9592906451358624e-06, "loss": 0.0558, "step": 45326 }, { "epoch": 0.8027201848875666, "grad_norm": 0.6312227249145508, "learning_rate": 2.958777574264957e-06, "loss": 0.0556, "step": 45327 }, { "epoch": 0.802737894424595, "grad_norm": 0.6304011344909668, "learning_rate": 2.958264543008455e-06, "loss": 0.0517, "step": 45328 }, { "epoch": 0.8027556039616235, "grad_norm": 0.6526788473129272, "learning_rate": 2.957751551368043e-06, "loss": 0.0716, "step": 45329 }, { "epoch": 0.8027733134986519, "grad_norm": 0.6459764838218689, "learning_rate": 2.957238599345412e-06, "loss": 0.0732, "step": 45330 }, { "epoch": 0.8027910230356803, "grad_norm": 0.2823282480239868, "learning_rate": 2.9567256869422486e-06, "loss": 0.044, "step": 45331 }, { "epoch": 0.8028087325727087, "grad_norm": 0.866807758808136, "learning_rate": 2.956212814160242e-06, "loss": 0.0624, "step": 45332 }, { "epoch": 0.8028264421097372, "grad_norm": 0.702353835105896, "learning_rate": 2.955699981001075e-06, "loss": 0.098, "step": 45333 }, { "epoch": 0.8028441516467656, "grad_norm": 0.3924790322780609, "learning_rate": 2.955187187466435e-06, "loss": 0.0365, "step": 45334 }, { "epoch": 0.802861861183794, "grad_norm": 0.7773554921150208, "learning_rate": 2.9546744335580123e-06, "loss": 0.0497, "step": 45335 }, { "epoch": 0.8028795707208224, "grad_norm": 0.40078362822532654, "learning_rate": 2.9541617192774954e-06, "loss": 0.0663, "step": 45336 }, { "epoch": 0.8028972802578509, "grad_norm": 0.8533647060394287, "learning_rate": 2.9536490446265634e-06, "loss": 0.073, "step": 45337 }, { "epoch": 0.8029149897948793, "grad_norm": 0.4432297646999359, "learning_rate": 2.9531364096069097e-06, "loss": 0.034, "step": 45338 }, { "epoch": 0.8029326993319077, "grad_norm": 0.6084717512130737, "learning_rate": 2.9526238142202177e-06, "loss": 0.0441, "step": 45339 }, { "epoch": 0.8029504088689361, "grad_norm": 0.710661768913269, "learning_rate": 2.9521112584681746e-06, "loss": 0.0455, "step": 45340 }, { "epoch": 0.8029681184059646, "grad_norm": 0.6917227506637573, "learning_rate": 2.9515987423524675e-06, "loss": 0.0436, "step": 45341 }, { "epoch": 0.802985827942993, "grad_norm": 0.5356377959251404, "learning_rate": 2.951086265874782e-06, "loss": 0.0431, "step": 45342 }, { "epoch": 0.8030035374800214, "grad_norm": 0.5024216771125793, "learning_rate": 2.9505738290368074e-06, "loss": 0.0417, "step": 45343 }, { "epoch": 0.8030212470170499, "grad_norm": 0.32376500964164734, "learning_rate": 2.9500614318402214e-06, "loss": 0.0276, "step": 45344 }, { "epoch": 0.8030389565540783, "grad_norm": 0.45081743597984314, "learning_rate": 2.949549074286716e-06, "loss": 0.0569, "step": 45345 }, { "epoch": 0.8030566660911067, "grad_norm": 0.3850761950016022, "learning_rate": 2.949036756377978e-06, "loss": 0.0632, "step": 45346 }, { "epoch": 0.8030743756281351, "grad_norm": 0.6582124829292297, "learning_rate": 2.9485244781156876e-06, "loss": 0.0609, "step": 45347 }, { "epoch": 0.8030920851651636, "grad_norm": 1.2992905378341675, "learning_rate": 2.9480122395015325e-06, "loss": 0.0478, "step": 45348 }, { "epoch": 0.803109794702192, "grad_norm": 0.8312480449676514, "learning_rate": 2.947500040537197e-06, "loss": 0.0824, "step": 45349 }, { "epoch": 0.8031275042392204, "grad_norm": 0.39568597078323364, "learning_rate": 2.9469878812243687e-06, "loss": 0.0403, "step": 45350 }, { "epoch": 0.8031452137762488, "grad_norm": 0.3396735191345215, "learning_rate": 2.9464757615647297e-06, "loss": 0.0823, "step": 45351 }, { "epoch": 0.8031629233132773, "grad_norm": 0.6557825803756714, "learning_rate": 2.9459636815599685e-06, "loss": 0.0596, "step": 45352 }, { "epoch": 0.8031806328503057, "grad_norm": 0.5822110772132874, "learning_rate": 2.94545164121177e-06, "loss": 0.0665, "step": 45353 }, { "epoch": 0.8031983423873341, "grad_norm": 0.5721052885055542, "learning_rate": 2.9449396405218114e-06, "loss": 0.0466, "step": 45354 }, { "epoch": 0.8032160519243625, "grad_norm": 0.30659207701683044, "learning_rate": 2.9444276794917834e-06, "loss": 0.0435, "step": 45355 }, { "epoch": 0.803233761461391, "grad_norm": 0.6283017992973328, "learning_rate": 2.9439157581233696e-06, "loss": 0.0603, "step": 45356 }, { "epoch": 0.8032514709984194, "grad_norm": 0.7144249677658081, "learning_rate": 2.9434038764182565e-06, "loss": 0.0478, "step": 45357 }, { "epoch": 0.8032691805354478, "grad_norm": 0.49371686577796936, "learning_rate": 2.942892034378118e-06, "loss": 0.0549, "step": 45358 }, { "epoch": 0.8032868900724763, "grad_norm": 0.629463791847229, "learning_rate": 2.942380232004648e-06, "loss": 0.0508, "step": 45359 }, { "epoch": 0.8033045996095047, "grad_norm": 0.7026485800743103, "learning_rate": 2.941868469299533e-06, "loss": 0.0545, "step": 45360 }, { "epoch": 0.8033223091465331, "grad_norm": 0.3789174258708954, "learning_rate": 2.9413567462644453e-06, "loss": 0.046, "step": 45361 }, { "epoch": 0.8033400186835615, "grad_norm": 0.542134702205658, "learning_rate": 2.9408450629010754e-06, "loss": 0.0393, "step": 45362 }, { "epoch": 0.80335772822059, "grad_norm": 0.5877262949943542, "learning_rate": 2.9403334192111063e-06, "loss": 0.0411, "step": 45363 }, { "epoch": 0.8033754377576184, "grad_norm": 0.8694654703140259, "learning_rate": 2.9398218151962226e-06, "loss": 0.052, "step": 45364 }, { "epoch": 0.8033931472946468, "grad_norm": 0.5343571305274963, "learning_rate": 2.939310250858102e-06, "loss": 0.0478, "step": 45365 }, { "epoch": 0.8034108568316752, "grad_norm": 0.22387877106666565, "learning_rate": 2.9387987261984323e-06, "loss": 0.038, "step": 45366 }, { "epoch": 0.8034285663687037, "grad_norm": 0.4472714364528656, "learning_rate": 2.9382872412188933e-06, "loss": 0.0568, "step": 45367 }, { "epoch": 0.8034462759057321, "grad_norm": 0.9204978942871094, "learning_rate": 2.9377757959211704e-06, "loss": 0.0688, "step": 45368 }, { "epoch": 0.8034639854427605, "grad_norm": 1.1085872650146484, "learning_rate": 2.937264390306944e-06, "loss": 0.0788, "step": 45369 }, { "epoch": 0.8034816949797889, "grad_norm": 0.6415991187095642, "learning_rate": 2.9367530243778977e-06, "loss": 0.0609, "step": 45370 }, { "epoch": 0.8034994045168175, "grad_norm": 0.7242180705070496, "learning_rate": 2.9362416981357186e-06, "loss": 0.0513, "step": 45371 }, { "epoch": 0.8035171140538458, "grad_norm": 0.6153404116630554, "learning_rate": 2.935730411582079e-06, "loss": 0.0417, "step": 45372 }, { "epoch": 0.8035348235908742, "grad_norm": 0.5095642805099487, "learning_rate": 2.9352191647186667e-06, "loss": 0.0517, "step": 45373 }, { "epoch": 0.8035525331279028, "grad_norm": 0.6622546315193176, "learning_rate": 2.9347079575471642e-06, "loss": 0.0487, "step": 45374 }, { "epoch": 0.8035702426649312, "grad_norm": 0.29427027702331543, "learning_rate": 2.9341967900692553e-06, "loss": 0.043, "step": 45375 }, { "epoch": 0.8035879522019596, "grad_norm": 0.8902243375778198, "learning_rate": 2.933685662286615e-06, "loss": 0.0577, "step": 45376 }, { "epoch": 0.803605661738988, "grad_norm": 0.7939637303352356, "learning_rate": 2.933174574200927e-06, "loss": 0.0912, "step": 45377 }, { "epoch": 0.8036233712760165, "grad_norm": 0.6001976728439331, "learning_rate": 2.932663525813875e-06, "loss": 0.0637, "step": 45378 }, { "epoch": 0.8036410808130449, "grad_norm": 0.999136209487915, "learning_rate": 2.9321525171271397e-06, "loss": 0.0607, "step": 45379 }, { "epoch": 0.8036587903500733, "grad_norm": 0.8603590726852417, "learning_rate": 2.9316415481424023e-06, "loss": 0.0753, "step": 45380 }, { "epoch": 0.8036764998871017, "grad_norm": 0.5953685641288757, "learning_rate": 2.9311306188613424e-06, "loss": 0.0476, "step": 45381 }, { "epoch": 0.8036942094241302, "grad_norm": 0.47752153873443604, "learning_rate": 2.9306197292856477e-06, "loss": 0.0423, "step": 45382 }, { "epoch": 0.8037119189611586, "grad_norm": 0.6134501695632935, "learning_rate": 2.930108879416988e-06, "loss": 0.0556, "step": 45383 }, { "epoch": 0.803729628498187, "grad_norm": 0.5009266138076782, "learning_rate": 2.929598069257049e-06, "loss": 0.0502, "step": 45384 }, { "epoch": 0.8037473380352154, "grad_norm": 0.48609060049057007, "learning_rate": 2.9290872988075167e-06, "loss": 0.0746, "step": 45385 }, { "epoch": 0.8037650475722439, "grad_norm": 0.3924238681793213, "learning_rate": 2.9285765680700614e-06, "loss": 0.058, "step": 45386 }, { "epoch": 0.8037827571092723, "grad_norm": 0.6489167213439941, "learning_rate": 2.9280658770463655e-06, "loss": 0.0538, "step": 45387 }, { "epoch": 0.8038004666463007, "grad_norm": 0.6149140000343323, "learning_rate": 2.927555225738115e-06, "loss": 0.0496, "step": 45388 }, { "epoch": 0.8038181761833292, "grad_norm": 0.44223034381866455, "learning_rate": 2.927044614146991e-06, "loss": 0.0513, "step": 45389 }, { "epoch": 0.8038358857203576, "grad_norm": 0.47673913836479187, "learning_rate": 2.926534042274665e-06, "loss": 0.0486, "step": 45390 }, { "epoch": 0.803853595257386, "grad_norm": 0.6143803000450134, "learning_rate": 2.926023510122821e-06, "loss": 0.0521, "step": 45391 }, { "epoch": 0.8038713047944144, "grad_norm": 0.6402173042297363, "learning_rate": 2.925513017693141e-06, "loss": 0.0524, "step": 45392 }, { "epoch": 0.8038890143314429, "grad_norm": 0.5057427287101746, "learning_rate": 2.9250025649873003e-06, "loss": 0.0682, "step": 45393 }, { "epoch": 0.8039067238684713, "grad_norm": 0.5945187211036682, "learning_rate": 2.924492152006979e-06, "loss": 0.0569, "step": 45394 }, { "epoch": 0.8039244334054997, "grad_norm": 0.6988889575004578, "learning_rate": 2.923981778753856e-06, "loss": 0.0506, "step": 45395 }, { "epoch": 0.8039421429425281, "grad_norm": 0.4529608488082886, "learning_rate": 2.923471445229613e-06, "loss": 0.0587, "step": 45396 }, { "epoch": 0.8039598524795566, "grad_norm": 0.6330413222312927, "learning_rate": 2.922961151435927e-06, "loss": 0.0643, "step": 45397 }, { "epoch": 0.803977562016585, "grad_norm": 0.4181598126888275, "learning_rate": 2.9224508973744775e-06, "loss": 0.0502, "step": 45398 }, { "epoch": 0.8039952715536134, "grad_norm": 0.3122764229774475, "learning_rate": 2.921940683046942e-06, "loss": 0.0548, "step": 45399 }, { "epoch": 0.8040129810906418, "grad_norm": 0.4976402223110199, "learning_rate": 2.9214305084550047e-06, "loss": 0.0522, "step": 45400 }, { "epoch": 0.8040306906276703, "grad_norm": 0.3594524562358856, "learning_rate": 2.920920373600334e-06, "loss": 0.0575, "step": 45401 }, { "epoch": 0.8040484001646987, "grad_norm": 1.3022027015686035, "learning_rate": 2.9204102784846158e-06, "loss": 0.112, "step": 45402 }, { "epoch": 0.8040661097017271, "grad_norm": 0.3960389196872711, "learning_rate": 2.9199002231095284e-06, "loss": 0.0389, "step": 45403 }, { "epoch": 0.8040838192387556, "grad_norm": 0.2626037299633026, "learning_rate": 2.9193902074767454e-06, "loss": 0.0617, "step": 45404 }, { "epoch": 0.804101528775784, "grad_norm": 0.6282846927642822, "learning_rate": 2.9188802315879456e-06, "loss": 0.0532, "step": 45405 }, { "epoch": 0.8041192383128124, "grad_norm": 0.9204869866371155, "learning_rate": 2.9183702954448073e-06, "loss": 0.0405, "step": 45406 }, { "epoch": 0.8041369478498408, "grad_norm": 0.5051324367523193, "learning_rate": 2.9178603990490094e-06, "loss": 0.0565, "step": 45407 }, { "epoch": 0.8041546573868693, "grad_norm": 0.687000036239624, "learning_rate": 2.9173505424022273e-06, "loss": 0.052, "step": 45408 }, { "epoch": 0.8041723669238977, "grad_norm": 0.5649049282073975, "learning_rate": 2.916840725506141e-06, "loss": 0.0337, "step": 45409 }, { "epoch": 0.8041900764609261, "grad_norm": 0.4660969376564026, "learning_rate": 2.916330948362432e-06, "loss": 0.0476, "step": 45410 }, { "epoch": 0.8042077859979545, "grad_norm": 0.6629412770271301, "learning_rate": 2.9158212109727654e-06, "loss": 0.0547, "step": 45411 }, { "epoch": 0.804225495534983, "grad_norm": 0.4651297628879547, "learning_rate": 2.9153115133388263e-06, "loss": 0.0629, "step": 45412 }, { "epoch": 0.8042432050720114, "grad_norm": 0.7461876273155212, "learning_rate": 2.9148018554622895e-06, "loss": 0.0647, "step": 45413 }, { "epoch": 0.8042609146090398, "grad_norm": 0.640311062335968, "learning_rate": 2.914292237344837e-06, "loss": 0.0639, "step": 45414 }, { "epoch": 0.8042786241460682, "grad_norm": 0.4351128339767456, "learning_rate": 2.9137826589881363e-06, "loss": 0.0493, "step": 45415 }, { "epoch": 0.8042963336830967, "grad_norm": 0.6740987300872803, "learning_rate": 2.9132731203938636e-06, "loss": 0.0639, "step": 45416 }, { "epoch": 0.8043140432201251, "grad_norm": 0.45107200741767883, "learning_rate": 2.9127636215637076e-06, "loss": 0.0513, "step": 45417 }, { "epoch": 0.8043317527571535, "grad_norm": 1.0588374137878418, "learning_rate": 2.9122541624993344e-06, "loss": 0.0526, "step": 45418 }, { "epoch": 0.804349462294182, "grad_norm": 0.5772791504859924, "learning_rate": 2.911744743202422e-06, "loss": 0.0707, "step": 45419 }, { "epoch": 0.8043671718312104, "grad_norm": 0.4387103319168091, "learning_rate": 2.911235363674646e-06, "loss": 0.03, "step": 45420 }, { "epoch": 0.8043848813682388, "grad_norm": 0.66935133934021, "learning_rate": 2.9107260239176865e-06, "loss": 0.0625, "step": 45421 }, { "epoch": 0.8044025909052672, "grad_norm": 0.7355190515518188, "learning_rate": 2.910216723933212e-06, "loss": 0.0536, "step": 45422 }, { "epoch": 0.8044203004422957, "grad_norm": 0.4440400004386902, "learning_rate": 2.909707463722902e-06, "loss": 0.0414, "step": 45423 }, { "epoch": 0.8044380099793241, "grad_norm": 0.7312729954719543, "learning_rate": 2.9091982432884314e-06, "loss": 0.0663, "step": 45424 }, { "epoch": 0.8044557195163525, "grad_norm": 1.2440820932388306, "learning_rate": 2.908689062631475e-06, "loss": 0.0852, "step": 45425 }, { "epoch": 0.8044734290533809, "grad_norm": 1.200684905052185, "learning_rate": 2.90817992175371e-06, "loss": 0.0938, "step": 45426 }, { "epoch": 0.8044911385904094, "grad_norm": 0.6537294983863831, "learning_rate": 2.9076708206568094e-06, "loss": 0.0613, "step": 45427 }, { "epoch": 0.8045088481274378, "grad_norm": 0.4861809015274048, "learning_rate": 2.9071617593424522e-06, "loss": 0.058, "step": 45428 }, { "epoch": 0.8045265576644662, "grad_norm": 1.0853674411773682, "learning_rate": 2.9066527378123076e-06, "loss": 0.064, "step": 45429 }, { "epoch": 0.8045442672014946, "grad_norm": 0.4070436656475067, "learning_rate": 2.906143756068051e-06, "loss": 0.0353, "step": 45430 }, { "epoch": 0.8045619767385231, "grad_norm": 0.34561842679977417, "learning_rate": 2.9056348141113607e-06, "loss": 0.0352, "step": 45431 }, { "epoch": 0.8045796862755515, "grad_norm": 0.6545683145523071, "learning_rate": 2.905125911943906e-06, "loss": 0.0448, "step": 45432 }, { "epoch": 0.8045973958125799, "grad_norm": 0.816428542137146, "learning_rate": 2.904617049567365e-06, "loss": 0.0731, "step": 45433 }, { "epoch": 0.8046151053496085, "grad_norm": 0.8968108296394348, "learning_rate": 2.9041082269834085e-06, "loss": 0.0963, "step": 45434 }, { "epoch": 0.8046328148866368, "grad_norm": 0.7138850688934326, "learning_rate": 2.903599444193713e-06, "loss": 0.085, "step": 45435 }, { "epoch": 0.8046505244236652, "grad_norm": 0.9810782074928284, "learning_rate": 2.9030907011999525e-06, "loss": 0.0705, "step": 45436 }, { "epoch": 0.8046682339606936, "grad_norm": 0.5672577023506165, "learning_rate": 2.9025819980038003e-06, "loss": 0.04, "step": 45437 }, { "epoch": 0.8046859434977222, "grad_norm": 0.3723233938217163, "learning_rate": 2.902073334606929e-06, "loss": 0.0361, "step": 45438 }, { "epoch": 0.8047036530347506, "grad_norm": 0.834656298160553, "learning_rate": 2.901564711011017e-06, "loss": 0.063, "step": 45439 }, { "epoch": 0.804721362571779, "grad_norm": 0.5876978635787964, "learning_rate": 2.901056127217729e-06, "loss": 0.029, "step": 45440 }, { "epoch": 0.8047390721088074, "grad_norm": 0.42855000495910645, "learning_rate": 2.9005475832287424e-06, "loss": 0.0676, "step": 45441 }, { "epoch": 0.8047567816458359, "grad_norm": 0.19446291029453278, "learning_rate": 2.9000390790457366e-06, "loss": 0.0483, "step": 45442 }, { "epoch": 0.8047744911828643, "grad_norm": 0.8969769477844238, "learning_rate": 2.8995306146703726e-06, "loss": 0.0468, "step": 45443 }, { "epoch": 0.8047922007198927, "grad_norm": 0.5965390801429749, "learning_rate": 2.8990221901043294e-06, "loss": 0.0371, "step": 45444 }, { "epoch": 0.8048099102569211, "grad_norm": 0.6689898371696472, "learning_rate": 2.8985138053492743e-06, "loss": 0.0663, "step": 45445 }, { "epoch": 0.8048276197939496, "grad_norm": 0.6360390782356262, "learning_rate": 2.898005460406894e-06, "loss": 0.0666, "step": 45446 }, { "epoch": 0.804845329330978, "grad_norm": 0.5374609231948853, "learning_rate": 2.8974971552788472e-06, "loss": 0.0576, "step": 45447 }, { "epoch": 0.8048630388680064, "grad_norm": 0.5943164229393005, "learning_rate": 2.896988889966811e-06, "loss": 0.0647, "step": 45448 }, { "epoch": 0.8048807484050349, "grad_norm": 0.8362470865249634, "learning_rate": 2.89648066447246e-06, "loss": 0.0507, "step": 45449 }, { "epoch": 0.8048984579420633, "grad_norm": 0.4389861226081848, "learning_rate": 2.8959724787974606e-06, "loss": 0.0501, "step": 45450 }, { "epoch": 0.8049161674790917, "grad_norm": 0.334696888923645, "learning_rate": 2.895464332943488e-06, "loss": 0.0328, "step": 45451 }, { "epoch": 0.8049338770161201, "grad_norm": 0.48813727498054504, "learning_rate": 2.894956226912213e-06, "loss": 0.0614, "step": 45452 }, { "epoch": 0.8049515865531486, "grad_norm": 0.8890788555145264, "learning_rate": 2.8944481607053074e-06, "loss": 0.054, "step": 45453 }, { "epoch": 0.804969296090177, "grad_norm": 0.7200427651405334, "learning_rate": 2.8939401343244425e-06, "loss": 0.0876, "step": 45454 }, { "epoch": 0.8049870056272054, "grad_norm": 0.5002102851867676, "learning_rate": 2.8934321477712904e-06, "loss": 0.0468, "step": 45455 }, { "epoch": 0.8050047151642338, "grad_norm": 0.578263521194458, "learning_rate": 2.8929242010475265e-06, "loss": 0.0452, "step": 45456 }, { "epoch": 0.8050224247012623, "grad_norm": 0.6571401953697205, "learning_rate": 2.8924162941548127e-06, "loss": 0.0617, "step": 45457 }, { "epoch": 0.8050401342382907, "grad_norm": 0.21946853399276733, "learning_rate": 2.8919084270948245e-06, "loss": 0.0341, "step": 45458 }, { "epoch": 0.8050578437753191, "grad_norm": 0.6060929298400879, "learning_rate": 2.8914005998692334e-06, "loss": 0.0551, "step": 45459 }, { "epoch": 0.8050755533123475, "grad_norm": 0.6861991286277771, "learning_rate": 2.8908928124797136e-06, "loss": 0.0772, "step": 45460 }, { "epoch": 0.805093262849376, "grad_norm": 0.6023950576782227, "learning_rate": 2.8903850649279283e-06, "loss": 0.068, "step": 45461 }, { "epoch": 0.8051109723864044, "grad_norm": 0.41163742542266846, "learning_rate": 2.8898773572155517e-06, "loss": 0.0452, "step": 45462 }, { "epoch": 0.8051286819234328, "grad_norm": 0.7136140465736389, "learning_rate": 2.8893696893442523e-06, "loss": 0.0584, "step": 45463 }, { "epoch": 0.8051463914604613, "grad_norm": 0.6089578866958618, "learning_rate": 2.8888620613157018e-06, "loss": 0.0413, "step": 45464 }, { "epoch": 0.8051641009974897, "grad_norm": 0.7813589572906494, "learning_rate": 2.888354473131571e-06, "loss": 0.0502, "step": 45465 }, { "epoch": 0.8051818105345181, "grad_norm": 0.6287369132041931, "learning_rate": 2.887846924793528e-06, "loss": 0.0638, "step": 45466 }, { "epoch": 0.8051995200715465, "grad_norm": 0.792921781539917, "learning_rate": 2.8873394163032485e-06, "loss": 0.0586, "step": 45467 }, { "epoch": 0.805217229608575, "grad_norm": 0.37957316637039185, "learning_rate": 2.886831947662393e-06, "loss": 0.0585, "step": 45468 }, { "epoch": 0.8052349391456034, "grad_norm": 0.3543320894241333, "learning_rate": 2.8863245188726345e-06, "loss": 0.053, "step": 45469 }, { "epoch": 0.8052526486826318, "grad_norm": 0.8319128751754761, "learning_rate": 2.8858171299356438e-06, "loss": 0.0892, "step": 45470 }, { "epoch": 0.8052703582196602, "grad_norm": 0.8143240213394165, "learning_rate": 2.885309780853093e-06, "loss": 0.0696, "step": 45471 }, { "epoch": 0.8052880677566887, "grad_norm": 0.7167632579803467, "learning_rate": 2.884802471626644e-06, "loss": 0.0717, "step": 45472 }, { "epoch": 0.8053057772937171, "grad_norm": 0.45810791850090027, "learning_rate": 2.8842952022579705e-06, "loss": 0.0566, "step": 45473 }, { "epoch": 0.8053234868307455, "grad_norm": 0.3975019156932831, "learning_rate": 2.8837879727487392e-06, "loss": 0.0415, "step": 45474 }, { "epoch": 0.8053411963677739, "grad_norm": 0.5975643992424011, "learning_rate": 2.8832807831006193e-06, "loss": 0.0589, "step": 45475 }, { "epoch": 0.8053589059048024, "grad_norm": 0.4749687612056732, "learning_rate": 2.8827736333152806e-06, "loss": 0.055, "step": 45476 }, { "epoch": 0.8053766154418308, "grad_norm": 0.2084605097770691, "learning_rate": 2.882266523394392e-06, "loss": 0.023, "step": 45477 }, { "epoch": 0.8053943249788592, "grad_norm": 0.5678554773330688, "learning_rate": 2.8817594533396236e-06, "loss": 0.0455, "step": 45478 }, { "epoch": 0.8054120345158877, "grad_norm": 0.5078742504119873, "learning_rate": 2.8812524231526376e-06, "loss": 0.0483, "step": 45479 }, { "epoch": 0.8054297440529161, "grad_norm": 0.7984080910682678, "learning_rate": 2.880745432835106e-06, "loss": 0.0691, "step": 45480 }, { "epoch": 0.8054474535899445, "grad_norm": 0.7345197200775146, "learning_rate": 2.8802384823886996e-06, "loss": 0.0442, "step": 45481 }, { "epoch": 0.8054651631269729, "grad_norm": 0.6450886726379395, "learning_rate": 2.879731571815075e-06, "loss": 0.0746, "step": 45482 }, { "epoch": 0.8054828726640014, "grad_norm": 0.3870491087436676, "learning_rate": 2.87922470111591e-06, "loss": 0.0601, "step": 45483 }, { "epoch": 0.8055005822010298, "grad_norm": 0.7017868757247925, "learning_rate": 2.8787178702928713e-06, "loss": 0.0669, "step": 45484 }, { "epoch": 0.8055182917380582, "grad_norm": 0.5125468373298645, "learning_rate": 2.8782110793476286e-06, "loss": 0.0431, "step": 45485 }, { "epoch": 0.8055360012750866, "grad_norm": 0.8831416964530945, "learning_rate": 2.877704328281842e-06, "loss": 0.0568, "step": 45486 }, { "epoch": 0.8055537108121151, "grad_norm": 1.0627405643463135, "learning_rate": 2.877197617097181e-06, "loss": 0.0656, "step": 45487 }, { "epoch": 0.8055714203491435, "grad_norm": 1.0978999137878418, "learning_rate": 2.876690945795318e-06, "loss": 0.0721, "step": 45488 }, { "epoch": 0.8055891298861719, "grad_norm": 0.6954900026321411, "learning_rate": 2.8761843143779115e-06, "loss": 0.0436, "step": 45489 }, { "epoch": 0.8056068394232003, "grad_norm": 1.0203994512557983, "learning_rate": 2.875677722846634e-06, "loss": 0.0644, "step": 45490 }, { "epoch": 0.8056245489602288, "grad_norm": 0.7386544942855835, "learning_rate": 2.8751711712031483e-06, "loss": 0.061, "step": 45491 }, { "epoch": 0.8056422584972572, "grad_norm": 0.3190356194972992, "learning_rate": 2.874664659449124e-06, "loss": 0.0452, "step": 45492 }, { "epoch": 0.8056599680342856, "grad_norm": 0.49178144335746765, "learning_rate": 2.8741581875862264e-06, "loss": 0.0588, "step": 45493 }, { "epoch": 0.8056776775713141, "grad_norm": 0.7234793305397034, "learning_rate": 2.8736517556161217e-06, "loss": 0.0333, "step": 45494 }, { "epoch": 0.8056953871083425, "grad_norm": 0.25131991505622864, "learning_rate": 2.8731453635404815e-06, "loss": 0.0309, "step": 45495 }, { "epoch": 0.8057130966453709, "grad_norm": 0.5566774606704712, "learning_rate": 2.8726390113609605e-06, "loss": 0.0663, "step": 45496 }, { "epoch": 0.8057308061823993, "grad_norm": 0.7979008555412292, "learning_rate": 2.872132699079232e-06, "loss": 0.0505, "step": 45497 }, { "epoch": 0.8057485157194278, "grad_norm": 0.6091736555099487, "learning_rate": 2.87162642669696e-06, "loss": 0.062, "step": 45498 }, { "epoch": 0.8057662252564562, "grad_norm": 0.683618426322937, "learning_rate": 2.871120194215814e-06, "loss": 0.0694, "step": 45499 }, { "epoch": 0.8057839347934846, "grad_norm": 0.7934120893478394, "learning_rate": 2.870614001637452e-06, "loss": 0.0662, "step": 45500 }, { "epoch": 0.805801644330513, "grad_norm": 0.29212814569473267, "learning_rate": 2.8701078489635435e-06, "loss": 0.0439, "step": 45501 }, { "epoch": 0.8058193538675416, "grad_norm": 0.43913352489471436, "learning_rate": 2.869601736195752e-06, "loss": 0.0746, "step": 45502 }, { "epoch": 0.80583706340457, "grad_norm": 0.8742753267288208, "learning_rate": 2.869095663335745e-06, "loss": 0.068, "step": 45503 }, { "epoch": 0.8058547729415984, "grad_norm": 0.5423089265823364, "learning_rate": 2.868589630385186e-06, "loss": 0.0456, "step": 45504 }, { "epoch": 0.8058724824786268, "grad_norm": 0.7658689618110657, "learning_rate": 2.8680836373457397e-06, "loss": 0.0596, "step": 45505 }, { "epoch": 0.8058901920156553, "grad_norm": 0.25356313586235046, "learning_rate": 2.8675776842190764e-06, "loss": 0.0503, "step": 45506 }, { "epoch": 0.8059079015526837, "grad_norm": 0.5914312601089478, "learning_rate": 2.86707177100685e-06, "loss": 0.0534, "step": 45507 }, { "epoch": 0.8059256110897121, "grad_norm": 1.0069842338562012, "learning_rate": 2.8665658977107306e-06, "loss": 0.0698, "step": 45508 }, { "epoch": 0.8059433206267406, "grad_norm": 0.5643539428710938, "learning_rate": 2.866060064332382e-06, "loss": 0.0783, "step": 45509 }, { "epoch": 0.805961030163769, "grad_norm": 0.48974961042404175, "learning_rate": 2.8655542708734744e-06, "loss": 0.0658, "step": 45510 }, { "epoch": 0.8059787397007974, "grad_norm": 0.6069629192352295, "learning_rate": 2.865048517335655e-06, "loss": 0.0448, "step": 45511 }, { "epoch": 0.8059964492378258, "grad_norm": 0.6362141966819763, "learning_rate": 2.8645428037206054e-06, "loss": 0.0553, "step": 45512 }, { "epoch": 0.8060141587748543, "grad_norm": 0.7041807770729065, "learning_rate": 2.864037130029985e-06, "loss": 0.049, "step": 45513 }, { "epoch": 0.8060318683118827, "grad_norm": 0.8976843953132629, "learning_rate": 2.86353149626545e-06, "loss": 0.0583, "step": 45514 }, { "epoch": 0.8060495778489111, "grad_norm": 0.36530566215515137, "learning_rate": 2.8630259024286716e-06, "loss": 0.0273, "step": 45515 }, { "epoch": 0.8060672873859395, "grad_norm": 0.8018470406532288, "learning_rate": 2.8625203485213075e-06, "loss": 0.0668, "step": 45516 }, { "epoch": 0.806084996922968, "grad_norm": 0.40771183371543884, "learning_rate": 2.8620148345450302e-06, "loss": 0.0357, "step": 45517 }, { "epoch": 0.8061027064599964, "grad_norm": 0.7157770991325378, "learning_rate": 2.86150936050149e-06, "loss": 0.042, "step": 45518 }, { "epoch": 0.8061204159970248, "grad_norm": 0.6778221130371094, "learning_rate": 2.8610039263923595e-06, "loss": 0.0523, "step": 45519 }, { "epoch": 0.8061381255340532, "grad_norm": 0.11019518226385117, "learning_rate": 2.860498532219296e-06, "loss": 0.0347, "step": 45520 }, { "epoch": 0.8061558350710817, "grad_norm": 0.5816936492919922, "learning_rate": 2.8599931779839646e-06, "loss": 0.0524, "step": 45521 }, { "epoch": 0.8061735446081101, "grad_norm": 0.43426790833473206, "learning_rate": 2.859487863688028e-06, "loss": 0.0492, "step": 45522 }, { "epoch": 0.8061912541451385, "grad_norm": 0.6660528182983398, "learning_rate": 2.8589825893331486e-06, "loss": 0.0473, "step": 45523 }, { "epoch": 0.806208963682167, "grad_norm": 0.4576142430305481, "learning_rate": 2.858477354920993e-06, "loss": 0.0444, "step": 45524 }, { "epoch": 0.8062266732191954, "grad_norm": 0.6517877578735352, "learning_rate": 2.8579721604532137e-06, "loss": 0.0423, "step": 45525 }, { "epoch": 0.8062443827562238, "grad_norm": 0.6579267978668213, "learning_rate": 2.8574670059314777e-06, "loss": 0.0742, "step": 45526 }, { "epoch": 0.8062620922932522, "grad_norm": 0.3787129819393158, "learning_rate": 2.856961891357452e-06, "loss": 0.0449, "step": 45527 }, { "epoch": 0.8062798018302807, "grad_norm": 0.6875947713851929, "learning_rate": 2.85645681673279e-06, "loss": 0.0536, "step": 45528 }, { "epoch": 0.8062975113673091, "grad_norm": 0.4640858769416809, "learning_rate": 2.855951782059155e-06, "loss": 0.0441, "step": 45529 }, { "epoch": 0.8063152209043375, "grad_norm": 0.5201005935668945, "learning_rate": 2.8554467873382116e-06, "loss": 0.0675, "step": 45530 }, { "epoch": 0.8063329304413659, "grad_norm": 0.5763734579086304, "learning_rate": 2.8549418325716216e-06, "loss": 0.0365, "step": 45531 }, { "epoch": 0.8063506399783944, "grad_norm": 0.9469609260559082, "learning_rate": 2.8544369177610414e-06, "loss": 0.0567, "step": 45532 }, { "epoch": 0.8063683495154228, "grad_norm": 0.5419652462005615, "learning_rate": 2.8539320429081365e-06, "loss": 0.0513, "step": 45533 }, { "epoch": 0.8063860590524512, "grad_norm": 0.5744800567626953, "learning_rate": 2.853427208014571e-06, "loss": 0.037, "step": 45534 }, { "epoch": 0.8064037685894796, "grad_norm": 0.5216544270515442, "learning_rate": 2.852922413081997e-06, "loss": 0.0504, "step": 45535 }, { "epoch": 0.8064214781265081, "grad_norm": 0.27184435725212097, "learning_rate": 2.852417658112081e-06, "loss": 0.058, "step": 45536 }, { "epoch": 0.8064391876635365, "grad_norm": 0.6027202606201172, "learning_rate": 2.8519129431064816e-06, "loss": 0.0545, "step": 45537 }, { "epoch": 0.8064568972005649, "grad_norm": 0.6632888913154602, "learning_rate": 2.851408268066863e-06, "loss": 0.0301, "step": 45538 }, { "epoch": 0.8064746067375934, "grad_norm": 0.4778890609741211, "learning_rate": 2.8509036329948785e-06, "loss": 0.0619, "step": 45539 }, { "epoch": 0.8064923162746218, "grad_norm": 0.5532606244087219, "learning_rate": 2.8503990378921883e-06, "loss": 0.0699, "step": 45540 }, { "epoch": 0.8065100258116502, "grad_norm": 1.0008457899093628, "learning_rate": 2.8498944827604616e-06, "loss": 0.0936, "step": 45541 }, { "epoch": 0.8065277353486786, "grad_norm": 0.6914786100387573, "learning_rate": 2.849389967601357e-06, "loss": 0.0918, "step": 45542 }, { "epoch": 0.8065454448857071, "grad_norm": 0.48291444778442383, "learning_rate": 2.848885492416524e-06, "loss": 0.0461, "step": 45543 }, { "epoch": 0.8065631544227355, "grad_norm": 0.4819973111152649, "learning_rate": 2.8483810572076306e-06, "loss": 0.0435, "step": 45544 }, { "epoch": 0.8065808639597639, "grad_norm": 0.9836901426315308, "learning_rate": 2.8478766619763387e-06, "loss": 0.087, "step": 45545 }, { "epoch": 0.8065985734967923, "grad_norm": 0.42058202624320984, "learning_rate": 2.8473723067242997e-06, "loss": 0.0466, "step": 45546 }, { "epoch": 0.8066162830338208, "grad_norm": 0.44417378306388855, "learning_rate": 2.846867991453176e-06, "loss": 0.0554, "step": 45547 }, { "epoch": 0.8066339925708492, "grad_norm": 0.9399576187133789, "learning_rate": 2.8463637161646277e-06, "loss": 0.0998, "step": 45548 }, { "epoch": 0.8066517021078776, "grad_norm": 0.8573731780052185, "learning_rate": 2.8458594808603137e-06, "loss": 0.0909, "step": 45549 }, { "epoch": 0.806669411644906, "grad_norm": 0.8741181492805481, "learning_rate": 2.8453552855418912e-06, "loss": 0.0649, "step": 45550 }, { "epoch": 0.8066871211819345, "grad_norm": 0.45321938395500183, "learning_rate": 2.8448511302110218e-06, "loss": 0.0464, "step": 45551 }, { "epoch": 0.8067048307189629, "grad_norm": 0.5268423557281494, "learning_rate": 2.8443470148693663e-06, "loss": 0.0746, "step": 45552 }, { "epoch": 0.8067225402559913, "grad_norm": 0.5242181420326233, "learning_rate": 2.8438429395185762e-06, "loss": 0.0461, "step": 45553 }, { "epoch": 0.8067402497930198, "grad_norm": 0.3879063129425049, "learning_rate": 2.8433389041603137e-06, "loss": 0.0424, "step": 45554 }, { "epoch": 0.8067579593300482, "grad_norm": 0.7795832753181458, "learning_rate": 2.8428349087962357e-06, "loss": 0.0681, "step": 45555 }, { "epoch": 0.8067756688670766, "grad_norm": 0.45031261444091797, "learning_rate": 2.8423309534280047e-06, "loss": 0.0399, "step": 45556 }, { "epoch": 0.806793378404105, "grad_norm": 0.551217794418335, "learning_rate": 2.841827038057272e-06, "loss": 0.0433, "step": 45557 }, { "epoch": 0.8068110879411335, "grad_norm": 0.7063847184181213, "learning_rate": 2.8413231626856983e-06, "loss": 0.0417, "step": 45558 }, { "epoch": 0.8068287974781619, "grad_norm": 0.8150011897087097, "learning_rate": 2.8408193273149424e-06, "loss": 0.0618, "step": 45559 }, { "epoch": 0.8068465070151903, "grad_norm": 0.8429008722305298, "learning_rate": 2.8403155319466595e-06, "loss": 0.0939, "step": 45560 }, { "epoch": 0.8068642165522187, "grad_norm": 0.5861035585403442, "learning_rate": 2.83981177658251e-06, "loss": 0.0684, "step": 45561 }, { "epoch": 0.8068819260892472, "grad_norm": 0.34213581681251526, "learning_rate": 2.839308061224149e-06, "loss": 0.0617, "step": 45562 }, { "epoch": 0.8068996356262756, "grad_norm": 0.696740984916687, "learning_rate": 2.838804385873239e-06, "loss": 0.0607, "step": 45563 }, { "epoch": 0.806917345163304, "grad_norm": 0.9782853126525879, "learning_rate": 2.8383007505314297e-06, "loss": 0.0634, "step": 45564 }, { "epoch": 0.8069350547003324, "grad_norm": 0.7728923559188843, "learning_rate": 2.8377971552003785e-06, "loss": 0.0704, "step": 45565 }, { "epoch": 0.806952764237361, "grad_norm": 0.49301034212112427, "learning_rate": 2.837293599881751e-06, "loss": 0.0404, "step": 45566 }, { "epoch": 0.8069704737743894, "grad_norm": 0.5728018283843994, "learning_rate": 2.8367900845771917e-06, "loss": 0.0663, "step": 45567 }, { "epoch": 0.8069881833114178, "grad_norm": 0.4720447361469269, "learning_rate": 2.8362866092883633e-06, "loss": 0.0525, "step": 45568 }, { "epoch": 0.8070058928484463, "grad_norm": 0.4408617317676544, "learning_rate": 2.8357831740169175e-06, "loss": 0.0574, "step": 45569 }, { "epoch": 0.8070236023854747, "grad_norm": 0.8222511410713196, "learning_rate": 2.8352797787645217e-06, "loss": 0.0595, "step": 45570 }, { "epoch": 0.8070413119225031, "grad_norm": 0.6229516863822937, "learning_rate": 2.8347764235328228e-06, "loss": 0.0429, "step": 45571 }, { "epoch": 0.8070590214595315, "grad_norm": 0.7625983357429504, "learning_rate": 2.834273108323477e-06, "loss": 0.0602, "step": 45572 }, { "epoch": 0.80707673099656, "grad_norm": 0.6972482800483704, "learning_rate": 2.8337698331381443e-06, "loss": 0.0421, "step": 45573 }, { "epoch": 0.8070944405335884, "grad_norm": 0.5089306831359863, "learning_rate": 2.833266597978481e-06, "loss": 0.057, "step": 45574 }, { "epoch": 0.8071121500706168, "grad_norm": 0.4124232530593872, "learning_rate": 2.8327634028461374e-06, "loss": 0.0364, "step": 45575 }, { "epoch": 0.8071298596076452, "grad_norm": 0.6476210355758667, "learning_rate": 2.8322602477427707e-06, "loss": 0.0502, "step": 45576 }, { "epoch": 0.8071475691446737, "grad_norm": 0.7796573638916016, "learning_rate": 2.8317571326700357e-06, "loss": 0.0481, "step": 45577 }, { "epoch": 0.8071652786817021, "grad_norm": 0.8651272058486938, "learning_rate": 2.8312540576295903e-06, "loss": 0.0429, "step": 45578 }, { "epoch": 0.8071829882187305, "grad_norm": 0.42754408717155457, "learning_rate": 2.8307510226230894e-06, "loss": 0.0538, "step": 45579 }, { "epoch": 0.8072006977557589, "grad_norm": 0.6250952482223511, "learning_rate": 2.8302480276521847e-06, "loss": 0.0517, "step": 45580 }, { "epoch": 0.8072184072927874, "grad_norm": 0.5024248361587524, "learning_rate": 2.8297450727185387e-06, "loss": 0.0428, "step": 45581 }, { "epoch": 0.8072361168298158, "grad_norm": 0.6960506439208984, "learning_rate": 2.8292421578237966e-06, "loss": 0.0544, "step": 45582 }, { "epoch": 0.8072538263668442, "grad_norm": 0.5455533862113953, "learning_rate": 2.8287392829696152e-06, "loss": 0.0306, "step": 45583 }, { "epoch": 0.8072715359038727, "grad_norm": 0.45954132080078125, "learning_rate": 2.8282364481576552e-06, "loss": 0.0433, "step": 45584 }, { "epoch": 0.8072892454409011, "grad_norm": 0.5056613683700562, "learning_rate": 2.827733653389563e-06, "loss": 0.0472, "step": 45585 }, { "epoch": 0.8073069549779295, "grad_norm": 0.5586795806884766, "learning_rate": 2.8272308986669947e-06, "loss": 0.0403, "step": 45586 }, { "epoch": 0.8073246645149579, "grad_norm": 0.43494799733161926, "learning_rate": 2.826728183991605e-06, "loss": 0.0423, "step": 45587 }, { "epoch": 0.8073423740519864, "grad_norm": 1.0122677087783813, "learning_rate": 2.8262255093650503e-06, "loss": 0.0633, "step": 45588 }, { "epoch": 0.8073600835890148, "grad_norm": 0.7504366040229797, "learning_rate": 2.825722874788981e-06, "loss": 0.0626, "step": 45589 }, { "epoch": 0.8073777931260432, "grad_norm": 0.3429751992225647, "learning_rate": 2.825220280265054e-06, "loss": 0.0384, "step": 45590 }, { "epoch": 0.8073955026630716, "grad_norm": 0.2174733728170395, "learning_rate": 2.824717725794922e-06, "loss": 0.053, "step": 45591 }, { "epoch": 0.8074132122001001, "grad_norm": 0.559333324432373, "learning_rate": 2.8242152113802354e-06, "loss": 0.0285, "step": 45592 }, { "epoch": 0.8074309217371285, "grad_norm": 0.8952099084854126, "learning_rate": 2.8237127370226485e-06, "loss": 0.0769, "step": 45593 }, { "epoch": 0.8074486312741569, "grad_norm": 0.7990816235542297, "learning_rate": 2.823210302723816e-06, "loss": 0.0581, "step": 45594 }, { "epoch": 0.8074663408111853, "grad_norm": 0.4821105897426605, "learning_rate": 2.822707908485392e-06, "loss": 0.0529, "step": 45595 }, { "epoch": 0.8074840503482138, "grad_norm": 0.5639643669128418, "learning_rate": 2.8222055543090246e-06, "loss": 0.0441, "step": 45596 }, { "epoch": 0.8075017598852422, "grad_norm": 0.8285377025604248, "learning_rate": 2.82170324019637e-06, "loss": 0.066, "step": 45597 }, { "epoch": 0.8075194694222706, "grad_norm": 0.5428064465522766, "learning_rate": 2.821200966149078e-06, "loss": 0.06, "step": 45598 }, { "epoch": 0.8075371789592991, "grad_norm": 0.32121366262435913, "learning_rate": 2.820698732168804e-06, "loss": 0.0571, "step": 45599 }, { "epoch": 0.8075548884963275, "grad_norm": 0.8970029354095459, "learning_rate": 2.8201965382572e-06, "loss": 0.0554, "step": 45600 }, { "epoch": 0.8075725980333559, "grad_norm": 0.41648629307746887, "learning_rate": 2.8196943844159163e-06, "loss": 0.0315, "step": 45601 }, { "epoch": 0.8075903075703843, "grad_norm": 0.7792074680328369, "learning_rate": 2.8191922706466116e-06, "loss": 0.0705, "step": 45602 }, { "epoch": 0.8076080171074128, "grad_norm": 0.5474082827568054, "learning_rate": 2.8186901969509264e-06, "loss": 0.0495, "step": 45603 }, { "epoch": 0.8076257266444412, "grad_norm": 0.5174615979194641, "learning_rate": 2.8181881633305194e-06, "loss": 0.0595, "step": 45604 }, { "epoch": 0.8076434361814696, "grad_norm": 0.45036882162094116, "learning_rate": 2.817686169787046e-06, "loss": 0.0442, "step": 45605 }, { "epoch": 0.807661145718498, "grad_norm": 0.7200860381126404, "learning_rate": 2.8171842163221447e-06, "loss": 0.0353, "step": 45606 }, { "epoch": 0.8076788552555265, "grad_norm": 0.733623743057251, "learning_rate": 2.816682302937477e-06, "loss": 0.0405, "step": 45607 }, { "epoch": 0.8076965647925549, "grad_norm": 0.35762953758239746, "learning_rate": 2.8161804296346944e-06, "loss": 0.0447, "step": 45608 }, { "epoch": 0.8077142743295833, "grad_norm": 0.7983015179634094, "learning_rate": 2.8156785964154496e-06, "loss": 0.0603, "step": 45609 }, { "epoch": 0.8077319838666117, "grad_norm": 0.5128516554832458, "learning_rate": 2.815176803281385e-06, "loss": 0.0668, "step": 45610 }, { "epoch": 0.8077496934036402, "grad_norm": 0.5728208422660828, "learning_rate": 2.814675050234156e-06, "loss": 0.0458, "step": 45611 }, { "epoch": 0.8077674029406686, "grad_norm": 0.6848514676094055, "learning_rate": 2.8141733372754147e-06, "loss": 0.06, "step": 45612 }, { "epoch": 0.807785112477697, "grad_norm": 0.5917202234268188, "learning_rate": 2.8136716644068146e-06, "loss": 0.0644, "step": 45613 }, { "epoch": 0.8078028220147255, "grad_norm": 0.5723278522491455, "learning_rate": 2.8131700316299975e-06, "loss": 0.0754, "step": 45614 }, { "epoch": 0.8078205315517539, "grad_norm": 0.4715602695941925, "learning_rate": 2.812668438946619e-06, "loss": 0.052, "step": 45615 }, { "epoch": 0.8078382410887823, "grad_norm": 0.3152368366718292, "learning_rate": 2.8121668863583296e-06, "loss": 0.0568, "step": 45616 }, { "epoch": 0.8078559506258107, "grad_norm": 0.6314438581466675, "learning_rate": 2.8116653738667758e-06, "loss": 0.0546, "step": 45617 }, { "epoch": 0.8078736601628392, "grad_norm": 0.4805421233177185, "learning_rate": 2.811163901473612e-06, "loss": 0.0431, "step": 45618 }, { "epoch": 0.8078913696998676, "grad_norm": 0.6966507434844971, "learning_rate": 2.810662469180486e-06, "loss": 0.0672, "step": 45619 }, { "epoch": 0.807909079236896, "grad_norm": 0.718349814414978, "learning_rate": 2.8101610769890508e-06, "loss": 0.0557, "step": 45620 }, { "epoch": 0.8079267887739244, "grad_norm": 0.5382356643676758, "learning_rate": 2.809659724900949e-06, "loss": 0.0463, "step": 45621 }, { "epoch": 0.8079444983109529, "grad_norm": 0.8242608904838562, "learning_rate": 2.809158412917834e-06, "loss": 0.0619, "step": 45622 }, { "epoch": 0.8079622078479813, "grad_norm": 0.6086775064468384, "learning_rate": 2.80865714104136e-06, "loss": 0.0658, "step": 45623 }, { "epoch": 0.8079799173850097, "grad_norm": 0.7707135081291199, "learning_rate": 2.8081559092731657e-06, "loss": 0.0702, "step": 45624 }, { "epoch": 0.8079976269220381, "grad_norm": 0.7478429079055786, "learning_rate": 2.8076547176149055e-06, "loss": 0.0435, "step": 45625 }, { "epoch": 0.8080153364590666, "grad_norm": 1.3921693563461304, "learning_rate": 2.8071535660682276e-06, "loss": 0.0619, "step": 45626 }, { "epoch": 0.808033045996095, "grad_norm": 0.2938573658466339, "learning_rate": 2.8066524546347803e-06, "loss": 0.044, "step": 45627 }, { "epoch": 0.8080507555331234, "grad_norm": 0.2741060256958008, "learning_rate": 2.806151383316215e-06, "loss": 0.0318, "step": 45628 }, { "epoch": 0.808068465070152, "grad_norm": 0.5544207692146301, "learning_rate": 2.805650352114178e-06, "loss": 0.0622, "step": 45629 }, { "epoch": 0.8080861746071804, "grad_norm": 0.7672390341758728, "learning_rate": 2.8051493610303215e-06, "loss": 0.0767, "step": 45630 }, { "epoch": 0.8081038841442088, "grad_norm": 0.5835685729980469, "learning_rate": 2.8046484100662857e-06, "loss": 0.0208, "step": 45631 }, { "epoch": 0.8081215936812371, "grad_norm": 0.8253868222236633, "learning_rate": 2.8041474992237243e-06, "loss": 0.0599, "step": 45632 }, { "epoch": 0.8081393032182657, "grad_norm": 0.5060573220252991, "learning_rate": 2.8036466285042826e-06, "loss": 0.0369, "step": 45633 }, { "epoch": 0.8081570127552941, "grad_norm": 0.5536584258079529, "learning_rate": 2.8031457979096146e-06, "loss": 0.0477, "step": 45634 }, { "epoch": 0.8081747222923225, "grad_norm": 0.6519536375999451, "learning_rate": 2.8026450074413575e-06, "loss": 0.0562, "step": 45635 }, { "epoch": 0.8081924318293509, "grad_norm": 0.4136066734790802, "learning_rate": 2.8021442571011655e-06, "loss": 0.0492, "step": 45636 }, { "epoch": 0.8082101413663794, "grad_norm": 0.9541925191879272, "learning_rate": 2.8016435468906905e-06, "loss": 0.0521, "step": 45637 }, { "epoch": 0.8082278509034078, "grad_norm": 0.5248828530311584, "learning_rate": 2.80114287681157e-06, "loss": 0.0612, "step": 45638 }, { "epoch": 0.8082455604404362, "grad_norm": 0.6234706044197083, "learning_rate": 2.8006422468654564e-06, "loss": 0.0704, "step": 45639 }, { "epoch": 0.8082632699774646, "grad_norm": 0.6782158613204956, "learning_rate": 2.800141657053995e-06, "loss": 0.0616, "step": 45640 }, { "epoch": 0.8082809795144931, "grad_norm": 0.5524988174438477, "learning_rate": 2.799641107378838e-06, "loss": 0.0452, "step": 45641 }, { "epoch": 0.8082986890515215, "grad_norm": 0.6313650608062744, "learning_rate": 2.799140597841624e-06, "loss": 0.0558, "step": 45642 }, { "epoch": 0.8083163985885499, "grad_norm": 0.421581506729126, "learning_rate": 2.7986401284440033e-06, "loss": 0.0417, "step": 45643 }, { "epoch": 0.8083341081255784, "grad_norm": 0.4753382205963135, "learning_rate": 2.798139699187623e-06, "loss": 0.0672, "step": 45644 }, { "epoch": 0.8083518176626068, "grad_norm": 0.5930691361427307, "learning_rate": 2.7976393100741283e-06, "loss": 0.071, "step": 45645 }, { "epoch": 0.8083695271996352, "grad_norm": 0.42680492997169495, "learning_rate": 2.7971389611051647e-06, "loss": 0.0553, "step": 45646 }, { "epoch": 0.8083872367366636, "grad_norm": 0.60048907995224, "learning_rate": 2.7966386522823822e-06, "loss": 0.0591, "step": 45647 }, { "epoch": 0.8084049462736921, "grad_norm": 0.8079046010971069, "learning_rate": 2.7961383836074266e-06, "loss": 0.0534, "step": 45648 }, { "epoch": 0.8084226558107205, "grad_norm": 0.503312349319458, "learning_rate": 2.795638155081938e-06, "loss": 0.0306, "step": 45649 }, { "epoch": 0.8084403653477489, "grad_norm": 0.5497791171073914, "learning_rate": 2.7951379667075656e-06, "loss": 0.0598, "step": 45650 }, { "epoch": 0.8084580748847773, "grad_norm": 0.40995657444000244, "learning_rate": 2.794637818485954e-06, "loss": 0.0559, "step": 45651 }, { "epoch": 0.8084757844218058, "grad_norm": 0.5959275364875793, "learning_rate": 2.794137710418754e-06, "loss": 0.0389, "step": 45652 }, { "epoch": 0.8084934939588342, "grad_norm": 0.3548670709133148, "learning_rate": 2.793637642507602e-06, "loss": 0.0601, "step": 45653 }, { "epoch": 0.8085112034958626, "grad_norm": 0.49350878596305847, "learning_rate": 2.7931376147541476e-06, "loss": 0.0578, "step": 45654 }, { "epoch": 0.808528913032891, "grad_norm": 0.5428740382194519, "learning_rate": 2.7926376271600375e-06, "loss": 0.0457, "step": 45655 }, { "epoch": 0.8085466225699195, "grad_norm": 0.5514793992042542, "learning_rate": 2.7921376797269134e-06, "loss": 0.0541, "step": 45656 }, { "epoch": 0.8085643321069479, "grad_norm": 1.0002321004867554, "learning_rate": 2.7916377724564206e-06, "loss": 0.0545, "step": 45657 }, { "epoch": 0.8085820416439763, "grad_norm": 0.584674060344696, "learning_rate": 2.7911379053502066e-06, "loss": 0.0867, "step": 45658 }, { "epoch": 0.8085997511810048, "grad_norm": 0.580626904964447, "learning_rate": 2.790638078409917e-06, "loss": 0.0443, "step": 45659 }, { "epoch": 0.8086174607180332, "grad_norm": 0.4131587743759155, "learning_rate": 2.7901382916371893e-06, "loss": 0.0548, "step": 45660 }, { "epoch": 0.8086351702550616, "grad_norm": 0.4477337598800659, "learning_rate": 2.7896385450336733e-06, "loss": 0.0534, "step": 45661 }, { "epoch": 0.80865287979209, "grad_norm": 0.5109073519706726, "learning_rate": 2.7891388386010137e-06, "loss": 0.053, "step": 45662 }, { "epoch": 0.8086705893291185, "grad_norm": 0.6340342164039612, "learning_rate": 2.788639172340848e-06, "loss": 0.0581, "step": 45663 }, { "epoch": 0.8086882988661469, "grad_norm": 0.5760423541069031, "learning_rate": 2.788139546254821e-06, "loss": 0.0504, "step": 45664 }, { "epoch": 0.8087060084031753, "grad_norm": 1.1526200771331787, "learning_rate": 2.7876399603445835e-06, "loss": 0.0696, "step": 45665 }, { "epoch": 0.8087237179402037, "grad_norm": 0.7175918221473694, "learning_rate": 2.787140414611779e-06, "loss": 0.0487, "step": 45666 }, { "epoch": 0.8087414274772322, "grad_norm": 0.5767340064048767, "learning_rate": 2.786640909058045e-06, "loss": 0.0318, "step": 45667 }, { "epoch": 0.8087591370142606, "grad_norm": 0.6479367017745972, "learning_rate": 2.7861414436850256e-06, "loss": 0.0465, "step": 45668 }, { "epoch": 0.808776846551289, "grad_norm": 1.0390946865081787, "learning_rate": 2.7856420184943693e-06, "loss": 0.0672, "step": 45669 }, { "epoch": 0.8087945560883174, "grad_norm": 0.5638632774353027, "learning_rate": 2.7851426334877125e-06, "loss": 0.0537, "step": 45670 }, { "epoch": 0.8088122656253459, "grad_norm": 0.39799270033836365, "learning_rate": 2.7846432886667e-06, "loss": 0.0579, "step": 45671 }, { "epoch": 0.8088299751623743, "grad_norm": 0.7432653307914734, "learning_rate": 2.7841439840329763e-06, "loss": 0.0398, "step": 45672 }, { "epoch": 0.8088476846994027, "grad_norm": 0.6190977692604065, "learning_rate": 2.783644719588183e-06, "loss": 0.0769, "step": 45673 }, { "epoch": 0.8088653942364312, "grad_norm": 0.7266925573348999, "learning_rate": 2.7831454953339623e-06, "loss": 0.0714, "step": 45674 }, { "epoch": 0.8088831037734596, "grad_norm": 0.7005541920661926, "learning_rate": 2.782646311271958e-06, "loss": 0.0281, "step": 45675 }, { "epoch": 0.808900813310488, "grad_norm": 0.5908018350601196, "learning_rate": 2.7821471674038116e-06, "loss": 0.0577, "step": 45676 }, { "epoch": 0.8089185228475164, "grad_norm": 0.5297214984893799, "learning_rate": 2.781648063731169e-06, "loss": 0.0517, "step": 45677 }, { "epoch": 0.8089362323845449, "grad_norm": 1.0967005491256714, "learning_rate": 2.7811490002556657e-06, "loss": 0.0725, "step": 45678 }, { "epoch": 0.8089539419215733, "grad_norm": 0.7567749619483948, "learning_rate": 2.7806499769789445e-06, "loss": 0.0749, "step": 45679 }, { "epoch": 0.8089716514586017, "grad_norm": 0.7225825786590576, "learning_rate": 2.780150993902653e-06, "loss": 0.0659, "step": 45680 }, { "epoch": 0.8089893609956301, "grad_norm": 0.2515288293361664, "learning_rate": 2.7796520510284252e-06, "loss": 0.0381, "step": 45681 }, { "epoch": 0.8090070705326586, "grad_norm": 0.430707722902298, "learning_rate": 2.7791531483579073e-06, "loss": 0.0345, "step": 45682 }, { "epoch": 0.809024780069687, "grad_norm": 0.5601658821105957, "learning_rate": 2.7786542858927387e-06, "loss": 0.0485, "step": 45683 }, { "epoch": 0.8090424896067154, "grad_norm": 0.6123724579811096, "learning_rate": 2.7781554636345613e-06, "loss": 0.0529, "step": 45684 }, { "epoch": 0.8090601991437438, "grad_norm": 0.7676178216934204, "learning_rate": 2.777656681585015e-06, "loss": 0.0688, "step": 45685 }, { "epoch": 0.8090779086807723, "grad_norm": 0.6764156222343445, "learning_rate": 2.777157939745744e-06, "loss": 0.0667, "step": 45686 }, { "epoch": 0.8090956182178007, "grad_norm": 0.4604261815547943, "learning_rate": 2.7766592381183904e-06, "loss": 0.0504, "step": 45687 }, { "epoch": 0.8091133277548291, "grad_norm": 0.7500559091567993, "learning_rate": 2.7761605767045873e-06, "loss": 0.0578, "step": 45688 }, { "epoch": 0.8091310372918576, "grad_norm": 0.6416105628013611, "learning_rate": 2.7756619555059803e-06, "loss": 0.0333, "step": 45689 }, { "epoch": 0.809148746828886, "grad_norm": 0.868651807308197, "learning_rate": 2.7751633745242084e-06, "loss": 0.0993, "step": 45690 }, { "epoch": 0.8091664563659144, "grad_norm": 0.6039391756057739, "learning_rate": 2.7746648337609165e-06, "loss": 0.0583, "step": 45691 }, { "epoch": 0.8091841659029428, "grad_norm": 0.594190239906311, "learning_rate": 2.774166333217737e-06, "loss": 0.1066, "step": 45692 }, { "epoch": 0.8092018754399714, "grad_norm": 0.6676367521286011, "learning_rate": 2.77366787289631e-06, "loss": 0.0683, "step": 45693 }, { "epoch": 0.8092195849769998, "grad_norm": 0.4936743676662445, "learning_rate": 2.773169452798286e-06, "loss": 0.0494, "step": 45694 }, { "epoch": 0.8092372945140281, "grad_norm": 0.732537031173706, "learning_rate": 2.7726710729252952e-06, "loss": 0.0484, "step": 45695 }, { "epoch": 0.8092550040510565, "grad_norm": 0.6998746991157532, "learning_rate": 2.7721727332789783e-06, "loss": 0.0416, "step": 45696 }, { "epoch": 0.8092727135880851, "grad_norm": 0.5267511606216431, "learning_rate": 2.7716744338609768e-06, "loss": 0.0499, "step": 45697 }, { "epoch": 0.8092904231251135, "grad_norm": 0.48249247670173645, "learning_rate": 2.7711761746729346e-06, "loss": 0.0329, "step": 45698 }, { "epoch": 0.8093081326621419, "grad_norm": 0.8780125379562378, "learning_rate": 2.770677955716481e-06, "loss": 0.0845, "step": 45699 }, { "epoch": 0.8093258421991703, "grad_norm": 0.6307783722877502, "learning_rate": 2.77017977699326e-06, "loss": 0.0428, "step": 45700 }, { "epoch": 0.8093435517361988, "grad_norm": 0.5157628655433655, "learning_rate": 2.7696816385049117e-06, "loss": 0.0705, "step": 45701 }, { "epoch": 0.8093612612732272, "grad_norm": 0.45889750123023987, "learning_rate": 2.7691835402530724e-06, "loss": 0.0668, "step": 45702 }, { "epoch": 0.8093789708102556, "grad_norm": 0.5799201726913452, "learning_rate": 2.7686854822393824e-06, "loss": 0.0436, "step": 45703 }, { "epoch": 0.8093966803472841, "grad_norm": 0.4504215717315674, "learning_rate": 2.768187464465481e-06, "loss": 0.0368, "step": 45704 }, { "epoch": 0.8094143898843125, "grad_norm": 0.5229244232177734, "learning_rate": 2.7676894869330093e-06, "loss": 0.0588, "step": 45705 }, { "epoch": 0.8094320994213409, "grad_norm": 0.6611166000366211, "learning_rate": 2.767191549643597e-06, "loss": 0.0581, "step": 45706 }, { "epoch": 0.8094498089583693, "grad_norm": 0.5766075849533081, "learning_rate": 2.7666936525988883e-06, "loss": 0.0295, "step": 45707 }, { "epoch": 0.8094675184953978, "grad_norm": 0.5284690260887146, "learning_rate": 2.7661957958005245e-06, "loss": 0.0499, "step": 45708 }, { "epoch": 0.8094852280324262, "grad_norm": 0.3258039057254791, "learning_rate": 2.765697979250134e-06, "loss": 0.045, "step": 45709 }, { "epoch": 0.8095029375694546, "grad_norm": 0.42966407537460327, "learning_rate": 2.7652002029493608e-06, "loss": 0.0586, "step": 45710 }, { "epoch": 0.809520647106483, "grad_norm": 0.5309751629829407, "learning_rate": 2.7647024668998393e-06, "loss": 0.049, "step": 45711 }, { "epoch": 0.8095383566435115, "grad_norm": 0.8169436454772949, "learning_rate": 2.7642047711032107e-06, "loss": 0.0583, "step": 45712 }, { "epoch": 0.8095560661805399, "grad_norm": 0.786848247051239, "learning_rate": 2.7637071155611123e-06, "loss": 0.0691, "step": 45713 }, { "epoch": 0.8095737757175683, "grad_norm": 0.27393412590026855, "learning_rate": 2.7632095002751784e-06, "loss": 0.0313, "step": 45714 }, { "epoch": 0.8095914852545967, "grad_norm": 0.46889546513557434, "learning_rate": 2.7627119252470467e-06, "loss": 0.0534, "step": 45715 }, { "epoch": 0.8096091947916252, "grad_norm": 0.6168493628501892, "learning_rate": 2.762214390478359e-06, "loss": 0.0586, "step": 45716 }, { "epoch": 0.8096269043286536, "grad_norm": 0.5843859314918518, "learning_rate": 2.761716895970746e-06, "loss": 0.0377, "step": 45717 }, { "epoch": 0.809644613865682, "grad_norm": 0.9001213312149048, "learning_rate": 2.7612194417258463e-06, "loss": 0.0728, "step": 45718 }, { "epoch": 0.8096623234027105, "grad_norm": 1.069757103919983, "learning_rate": 2.7607220277453e-06, "loss": 0.0885, "step": 45719 }, { "epoch": 0.8096800329397389, "grad_norm": 0.36051657795906067, "learning_rate": 2.7602246540307356e-06, "loss": 0.044, "step": 45720 }, { "epoch": 0.8096977424767673, "grad_norm": 0.44749873876571655, "learning_rate": 2.759727320583796e-06, "loss": 0.0538, "step": 45721 }, { "epoch": 0.8097154520137957, "grad_norm": 0.42568346858024597, "learning_rate": 2.7592300274061103e-06, "loss": 0.0395, "step": 45722 }, { "epoch": 0.8097331615508242, "grad_norm": 0.4431053102016449, "learning_rate": 2.758732774499327e-06, "loss": 0.0398, "step": 45723 }, { "epoch": 0.8097508710878526, "grad_norm": 0.4663524329662323, "learning_rate": 2.7582355618650716e-06, "loss": 0.0571, "step": 45724 }, { "epoch": 0.809768580624881, "grad_norm": 0.31612786650657654, "learning_rate": 2.757738389504984e-06, "loss": 0.0726, "step": 45725 }, { "epoch": 0.8097862901619094, "grad_norm": 0.523366391658783, "learning_rate": 2.7572412574207006e-06, "loss": 0.053, "step": 45726 }, { "epoch": 0.8098039996989379, "grad_norm": 0.5682284832000732, "learning_rate": 2.756744165613852e-06, "loss": 0.0454, "step": 45727 }, { "epoch": 0.8098217092359663, "grad_norm": 0.1437780112028122, "learning_rate": 2.7562471140860784e-06, "loss": 0.0477, "step": 45728 }, { "epoch": 0.8098394187729947, "grad_norm": 0.5853795409202576, "learning_rate": 2.755750102839012e-06, "loss": 0.0544, "step": 45729 }, { "epoch": 0.8098571283100231, "grad_norm": 0.4843183159828186, "learning_rate": 2.7552531318742897e-06, "loss": 0.0474, "step": 45730 }, { "epoch": 0.8098748378470516, "grad_norm": 0.5634669661521912, "learning_rate": 2.754756201193545e-06, "loss": 0.0754, "step": 45731 }, { "epoch": 0.80989254738408, "grad_norm": 0.30888840556144714, "learning_rate": 2.7542593107984154e-06, "loss": 0.0277, "step": 45732 }, { "epoch": 0.8099102569211084, "grad_norm": 0.7274872064590454, "learning_rate": 2.7537624606905377e-06, "loss": 0.0535, "step": 45733 }, { "epoch": 0.8099279664581369, "grad_norm": 0.7290495038032532, "learning_rate": 2.7532656508715385e-06, "loss": 0.0526, "step": 45734 }, { "epoch": 0.8099456759951653, "grad_norm": 0.6435683965682983, "learning_rate": 2.7527688813430575e-06, "loss": 0.0557, "step": 45735 }, { "epoch": 0.8099633855321937, "grad_norm": 0.6292628645896912, "learning_rate": 2.7522721521067275e-06, "loss": 0.0424, "step": 45736 }, { "epoch": 0.8099810950692221, "grad_norm": 0.5391951203346252, "learning_rate": 2.7517754631641873e-06, "loss": 0.0714, "step": 45737 }, { "epoch": 0.8099988046062506, "grad_norm": 0.3413489758968353, "learning_rate": 2.7512788145170626e-06, "loss": 0.0545, "step": 45738 }, { "epoch": 0.810016514143279, "grad_norm": 0.6097316741943359, "learning_rate": 2.7507822061669917e-06, "loss": 0.0541, "step": 45739 }, { "epoch": 0.8100342236803074, "grad_norm": 0.6024215221405029, "learning_rate": 2.750285638115609e-06, "loss": 0.0762, "step": 45740 }, { "epoch": 0.8100519332173358, "grad_norm": 0.23401151597499847, "learning_rate": 2.7497891103645473e-06, "loss": 0.0383, "step": 45741 }, { "epoch": 0.8100696427543643, "grad_norm": 0.5246700644493103, "learning_rate": 2.749292622915441e-06, "loss": 0.0437, "step": 45742 }, { "epoch": 0.8100873522913927, "grad_norm": 0.3458285927772522, "learning_rate": 2.7487961757699217e-06, "loss": 0.0452, "step": 45743 }, { "epoch": 0.8101050618284211, "grad_norm": 0.7549732327461243, "learning_rate": 2.748299768929628e-06, "loss": 0.0489, "step": 45744 }, { "epoch": 0.8101227713654495, "grad_norm": 0.533279299736023, "learning_rate": 2.7478034023961857e-06, "loss": 0.0626, "step": 45745 }, { "epoch": 0.810140480902478, "grad_norm": 0.7293389439582825, "learning_rate": 2.7473070761712317e-06, "loss": 0.0532, "step": 45746 }, { "epoch": 0.8101581904395064, "grad_norm": 1.4058700799942017, "learning_rate": 2.746810790256396e-06, "loss": 0.0688, "step": 45747 }, { "epoch": 0.8101758999765348, "grad_norm": 0.38622158765792847, "learning_rate": 2.7463145446533196e-06, "loss": 0.0433, "step": 45748 }, { "epoch": 0.8101936095135633, "grad_norm": 0.5986588001251221, "learning_rate": 2.745818339363624e-06, "loss": 0.0389, "step": 45749 }, { "epoch": 0.8102113190505917, "grad_norm": 0.6536357402801514, "learning_rate": 2.745322174388946e-06, "loss": 0.0433, "step": 45750 }, { "epoch": 0.8102290285876201, "grad_norm": 0.2372218519449234, "learning_rate": 2.7448260497309186e-06, "loss": 0.0421, "step": 45751 }, { "epoch": 0.8102467381246485, "grad_norm": 0.41058123111724854, "learning_rate": 2.7443299653911745e-06, "loss": 0.0422, "step": 45752 }, { "epoch": 0.810264447661677, "grad_norm": 0.5795214772224426, "learning_rate": 2.7438339213713448e-06, "loss": 0.0675, "step": 45753 }, { "epoch": 0.8102821571987054, "grad_norm": 0.8882120251655579, "learning_rate": 2.743337917673063e-06, "loss": 0.0764, "step": 45754 }, { "epoch": 0.8102998667357338, "grad_norm": 0.4146822392940521, "learning_rate": 2.7428419542979622e-06, "loss": 0.0678, "step": 45755 }, { "epoch": 0.8103175762727622, "grad_norm": 0.4370483160018921, "learning_rate": 2.7423460312476684e-06, "loss": 0.0585, "step": 45756 }, { "epoch": 0.8103352858097908, "grad_norm": 0.4527278542518616, "learning_rate": 2.7418501485238166e-06, "loss": 0.0266, "step": 45757 }, { "epoch": 0.8103529953468191, "grad_norm": 0.3683530390262604, "learning_rate": 2.7413543061280428e-06, "loss": 0.0374, "step": 45758 }, { "epoch": 0.8103707048838475, "grad_norm": 0.8820227384567261, "learning_rate": 2.740858504061963e-06, "loss": 0.0604, "step": 45759 }, { "epoch": 0.8103884144208761, "grad_norm": 0.36054521799087524, "learning_rate": 2.7403627423272256e-06, "loss": 0.0476, "step": 45760 }, { "epoch": 0.8104061239579045, "grad_norm": 0.722740113735199, "learning_rate": 2.739867020925453e-06, "loss": 0.0774, "step": 45761 }, { "epoch": 0.8104238334949329, "grad_norm": 0.36162248253822327, "learning_rate": 2.7393713398582827e-06, "loss": 0.0648, "step": 45762 }, { "epoch": 0.8104415430319613, "grad_norm": 0.15978607535362244, "learning_rate": 2.738875699127335e-06, "loss": 0.0559, "step": 45763 }, { "epoch": 0.8104592525689898, "grad_norm": 0.6405574679374695, "learning_rate": 2.7383800987342487e-06, "loss": 0.0408, "step": 45764 }, { "epoch": 0.8104769621060182, "grad_norm": 0.4214402735233307, "learning_rate": 2.737884538680653e-06, "loss": 0.0432, "step": 45765 }, { "epoch": 0.8104946716430466, "grad_norm": 0.2881225049495697, "learning_rate": 2.7373890189681756e-06, "loss": 0.0532, "step": 45766 }, { "epoch": 0.810512381180075, "grad_norm": 0.7352992296218872, "learning_rate": 2.7368935395984457e-06, "loss": 0.0926, "step": 45767 }, { "epoch": 0.8105300907171035, "grad_norm": 0.73751300573349, "learning_rate": 2.736398100573097e-06, "loss": 0.0612, "step": 45768 }, { "epoch": 0.8105478002541319, "grad_norm": 0.31014516949653625, "learning_rate": 2.735902701893758e-06, "loss": 0.0342, "step": 45769 }, { "epoch": 0.8105655097911603, "grad_norm": 0.48060545325279236, "learning_rate": 2.735407343562058e-06, "loss": 0.0578, "step": 45770 }, { "epoch": 0.8105832193281887, "grad_norm": 0.7198700904846191, "learning_rate": 2.734912025579628e-06, "loss": 0.042, "step": 45771 }, { "epoch": 0.8106009288652172, "grad_norm": 0.14952558279037476, "learning_rate": 2.7344167479481012e-06, "loss": 0.0516, "step": 45772 }, { "epoch": 0.8106186384022456, "grad_norm": 0.27237337827682495, "learning_rate": 2.7339215106690972e-06, "loss": 0.0376, "step": 45773 }, { "epoch": 0.810636347939274, "grad_norm": 0.6246651411056519, "learning_rate": 2.733426313744252e-06, "loss": 0.0847, "step": 45774 }, { "epoch": 0.8106540574763025, "grad_norm": 0.6554000973701477, "learning_rate": 2.732931157175193e-06, "loss": 0.057, "step": 45775 }, { "epoch": 0.8106717670133309, "grad_norm": 0.42518046498298645, "learning_rate": 2.732436040963555e-06, "loss": 0.0537, "step": 45776 }, { "epoch": 0.8106894765503593, "grad_norm": 0.3804517090320587, "learning_rate": 2.7319409651109565e-06, "loss": 0.0347, "step": 45777 }, { "epoch": 0.8107071860873877, "grad_norm": 0.720662534236908, "learning_rate": 2.7314459296190314e-06, "loss": 0.0519, "step": 45778 }, { "epoch": 0.8107248956244162, "grad_norm": 0.5453797578811646, "learning_rate": 2.7309509344894096e-06, "loss": 0.0501, "step": 45779 }, { "epoch": 0.8107426051614446, "grad_norm": 0.6395961046218872, "learning_rate": 2.730455979723717e-06, "loss": 0.0612, "step": 45780 }, { "epoch": 0.810760314698473, "grad_norm": 0.6659092903137207, "learning_rate": 2.729961065323583e-06, "loss": 0.0656, "step": 45781 }, { "epoch": 0.8107780242355014, "grad_norm": 0.5822020173072815, "learning_rate": 2.729466191290637e-06, "loss": 0.0857, "step": 45782 }, { "epoch": 0.8107957337725299, "grad_norm": 0.7070749998092651, "learning_rate": 2.7289713576265094e-06, "loss": 0.0663, "step": 45783 }, { "epoch": 0.8108134433095583, "grad_norm": 0.44600942730903625, "learning_rate": 2.7284765643328208e-06, "loss": 0.0608, "step": 45784 }, { "epoch": 0.8108311528465867, "grad_norm": 0.6121541261672974, "learning_rate": 2.727981811411204e-06, "loss": 0.0833, "step": 45785 }, { "epoch": 0.8108488623836151, "grad_norm": 0.7737833261489868, "learning_rate": 2.7274870988632855e-06, "loss": 0.0513, "step": 45786 }, { "epoch": 0.8108665719206436, "grad_norm": 0.552982747554779, "learning_rate": 2.7269924266906967e-06, "loss": 0.0521, "step": 45787 }, { "epoch": 0.810884281457672, "grad_norm": 0.38867083191871643, "learning_rate": 2.7264977948950524e-06, "loss": 0.0433, "step": 45788 }, { "epoch": 0.8109019909947004, "grad_norm": 0.5723981857299805, "learning_rate": 2.7260032034779956e-06, "loss": 0.0925, "step": 45789 }, { "epoch": 0.8109197005317289, "grad_norm": 0.6152152419090271, "learning_rate": 2.7255086524411483e-06, "loss": 0.0606, "step": 45790 }, { "epoch": 0.8109374100687573, "grad_norm": 0.3230655789375305, "learning_rate": 2.7250141417861335e-06, "loss": 0.0192, "step": 45791 }, { "epoch": 0.8109551196057857, "grad_norm": 0.6099295020103455, "learning_rate": 2.7245196715145804e-06, "loss": 0.0919, "step": 45792 }, { "epoch": 0.8109728291428141, "grad_norm": 0.824806272983551, "learning_rate": 2.7240252416281157e-06, "loss": 0.0629, "step": 45793 }, { "epoch": 0.8109905386798426, "grad_norm": 0.2905190885066986, "learning_rate": 2.723530852128372e-06, "loss": 0.0529, "step": 45794 }, { "epoch": 0.811008248216871, "grad_norm": 0.539054811000824, "learning_rate": 2.723036503016964e-06, "loss": 0.0378, "step": 45795 }, { "epoch": 0.8110259577538994, "grad_norm": 0.5567378997802734, "learning_rate": 2.722542194295524e-06, "loss": 0.0521, "step": 45796 }, { "epoch": 0.8110436672909278, "grad_norm": 0.56305992603302, "learning_rate": 2.7220479259656794e-06, "loss": 0.0228, "step": 45797 }, { "epoch": 0.8110613768279563, "grad_norm": 0.7256256341934204, "learning_rate": 2.7215536980290552e-06, "loss": 0.0387, "step": 45798 }, { "epoch": 0.8110790863649847, "grad_norm": 0.684432327747345, "learning_rate": 2.721059510487278e-06, "loss": 0.06, "step": 45799 }, { "epoch": 0.8110967959020131, "grad_norm": 0.3324499726295471, "learning_rate": 2.7205653633419723e-06, "loss": 0.0384, "step": 45800 }, { "epoch": 0.8111145054390415, "grad_norm": 0.8646567463874817, "learning_rate": 2.7200712565947683e-06, "loss": 0.0862, "step": 45801 }, { "epoch": 0.81113221497607, "grad_norm": 0.34321150183677673, "learning_rate": 2.7195771902472843e-06, "loss": 0.0483, "step": 45802 }, { "epoch": 0.8111499245130984, "grad_norm": 0.5456827282905579, "learning_rate": 2.7190831643011493e-06, "loss": 0.0673, "step": 45803 }, { "epoch": 0.8111676340501268, "grad_norm": 0.22369404137134552, "learning_rate": 2.718589178757992e-06, "loss": 0.0321, "step": 45804 }, { "epoch": 0.8111853435871553, "grad_norm": 0.4749263823032379, "learning_rate": 2.7180952336194308e-06, "loss": 0.0664, "step": 45805 }, { "epoch": 0.8112030531241837, "grad_norm": 0.5799496173858643, "learning_rate": 2.717601328887093e-06, "loss": 0.0583, "step": 45806 }, { "epoch": 0.8112207626612121, "grad_norm": 0.8497409224510193, "learning_rate": 2.717107464562606e-06, "loss": 0.0587, "step": 45807 }, { "epoch": 0.8112384721982405, "grad_norm": 0.6638196706771851, "learning_rate": 2.7166136406475926e-06, "loss": 0.0703, "step": 45808 }, { "epoch": 0.811256181735269, "grad_norm": 0.6189472675323486, "learning_rate": 2.716119857143677e-06, "loss": 0.0745, "step": 45809 }, { "epoch": 0.8112738912722974, "grad_norm": 0.8944550156593323, "learning_rate": 2.7156261140524868e-06, "loss": 0.0879, "step": 45810 }, { "epoch": 0.8112916008093258, "grad_norm": 0.5703518390655518, "learning_rate": 2.7151324113756464e-06, "loss": 0.0494, "step": 45811 }, { "epoch": 0.8113093103463542, "grad_norm": 0.6663873791694641, "learning_rate": 2.7146387491147735e-06, "loss": 0.0322, "step": 45812 }, { "epoch": 0.8113270198833827, "grad_norm": 0.4915742874145508, "learning_rate": 2.714145127271498e-06, "loss": 0.0606, "step": 45813 }, { "epoch": 0.8113447294204111, "grad_norm": 0.47844821214675903, "learning_rate": 2.713651545847442e-06, "loss": 0.0403, "step": 45814 }, { "epoch": 0.8113624389574395, "grad_norm": 0.6742668151855469, "learning_rate": 2.713158004844233e-06, "loss": 0.0709, "step": 45815 }, { "epoch": 0.8113801484944679, "grad_norm": 0.45169782638549805, "learning_rate": 2.7126645042634873e-06, "loss": 0.0451, "step": 45816 }, { "epoch": 0.8113978580314964, "grad_norm": 0.7457423806190491, "learning_rate": 2.712171044106829e-06, "loss": 0.0577, "step": 45817 }, { "epoch": 0.8114155675685248, "grad_norm": 1.1571906805038452, "learning_rate": 2.711677624375889e-06, "loss": 0.0642, "step": 45818 }, { "epoch": 0.8114332771055532, "grad_norm": 0.24378807842731476, "learning_rate": 2.7111842450722916e-06, "loss": 0.043, "step": 45819 }, { "epoch": 0.8114509866425818, "grad_norm": 0.7693747878074646, "learning_rate": 2.7106909061976496e-06, "loss": 0.0539, "step": 45820 }, { "epoch": 0.8114686961796101, "grad_norm": 0.3607056140899658, "learning_rate": 2.7101976077535936e-06, "loss": 0.0492, "step": 45821 }, { "epoch": 0.8114864057166385, "grad_norm": 0.6017051935195923, "learning_rate": 2.709704349741746e-06, "loss": 0.0429, "step": 45822 }, { "epoch": 0.811504115253667, "grad_norm": 0.7123914957046509, "learning_rate": 2.709211132163725e-06, "loss": 0.0606, "step": 45823 }, { "epoch": 0.8115218247906955, "grad_norm": 0.5556067824363708, "learning_rate": 2.708717955021156e-06, "loss": 0.0514, "step": 45824 }, { "epoch": 0.8115395343277239, "grad_norm": 0.8199970722198486, "learning_rate": 2.708224818315664e-06, "loss": 0.0528, "step": 45825 }, { "epoch": 0.8115572438647523, "grad_norm": 0.43691110610961914, "learning_rate": 2.7077317220488658e-06, "loss": 0.0436, "step": 45826 }, { "epoch": 0.8115749534017807, "grad_norm": 0.8527861833572388, "learning_rate": 2.7072386662223885e-06, "loss": 0.0674, "step": 45827 }, { "epoch": 0.8115926629388092, "grad_norm": 0.7724180817604065, "learning_rate": 2.706745650837853e-06, "loss": 0.0756, "step": 45828 }, { "epoch": 0.8116103724758376, "grad_norm": 0.6320728063583374, "learning_rate": 2.706252675896884e-06, "loss": 0.0398, "step": 45829 }, { "epoch": 0.811628082012866, "grad_norm": 0.5049324035644531, "learning_rate": 2.7057597414010976e-06, "loss": 0.0857, "step": 45830 }, { "epoch": 0.8116457915498944, "grad_norm": 0.8628618717193604, "learning_rate": 2.705266847352117e-06, "loss": 0.0915, "step": 45831 }, { "epoch": 0.8116635010869229, "grad_norm": 0.6775133013725281, "learning_rate": 2.704773993751564e-06, "loss": 0.0563, "step": 45832 }, { "epoch": 0.8116812106239513, "grad_norm": 0.7025613784790039, "learning_rate": 2.7042811806010664e-06, "loss": 0.0575, "step": 45833 }, { "epoch": 0.8116989201609797, "grad_norm": 0.49536630511283875, "learning_rate": 2.7037884079022373e-06, "loss": 0.0536, "step": 45834 }, { "epoch": 0.8117166296980082, "grad_norm": 0.5409239530563354, "learning_rate": 2.703295675656699e-06, "loss": 0.0534, "step": 45835 }, { "epoch": 0.8117343392350366, "grad_norm": 0.39203259348869324, "learning_rate": 2.7028029838660735e-06, "loss": 0.0751, "step": 45836 }, { "epoch": 0.811752048772065, "grad_norm": 0.9042271375656128, "learning_rate": 2.7023103325319843e-06, "loss": 0.0923, "step": 45837 }, { "epoch": 0.8117697583090934, "grad_norm": 0.6503090858459473, "learning_rate": 2.7018177216560485e-06, "loss": 0.066, "step": 45838 }, { "epoch": 0.8117874678461219, "grad_norm": 0.4195833206176758, "learning_rate": 2.70132515123989e-06, "loss": 0.0467, "step": 45839 }, { "epoch": 0.8118051773831503, "grad_norm": 0.5019513964653015, "learning_rate": 2.700832621285131e-06, "loss": 0.0494, "step": 45840 }, { "epoch": 0.8118228869201787, "grad_norm": 0.7889889478683472, "learning_rate": 2.7003401317933846e-06, "loss": 0.0515, "step": 45841 }, { "epoch": 0.8118405964572071, "grad_norm": 0.7935657501220703, "learning_rate": 2.699847682766275e-06, "loss": 0.0714, "step": 45842 }, { "epoch": 0.8118583059942356, "grad_norm": 0.7788904309272766, "learning_rate": 2.699355274205427e-06, "loss": 0.0538, "step": 45843 }, { "epoch": 0.811876015531264, "grad_norm": 0.5703156590461731, "learning_rate": 2.698862906112452e-06, "loss": 0.0494, "step": 45844 }, { "epoch": 0.8118937250682924, "grad_norm": 0.2839866578578949, "learning_rate": 2.6983705784889727e-06, "loss": 0.0594, "step": 45845 }, { "epoch": 0.8119114346053208, "grad_norm": 0.7681782841682434, "learning_rate": 2.697878291336607e-06, "loss": 0.0519, "step": 45846 }, { "epoch": 0.8119291441423493, "grad_norm": 0.23372212052345276, "learning_rate": 2.697386044656986e-06, "loss": 0.0584, "step": 45847 }, { "epoch": 0.8119468536793777, "grad_norm": 0.4408295750617981, "learning_rate": 2.6968938384517157e-06, "loss": 0.0602, "step": 45848 }, { "epoch": 0.8119645632164061, "grad_norm": 0.31103745102882385, "learning_rate": 2.696401672722421e-06, "loss": 0.0298, "step": 45849 }, { "epoch": 0.8119822727534346, "grad_norm": 0.45383119583129883, "learning_rate": 2.6959095474707194e-06, "loss": 0.0569, "step": 45850 }, { "epoch": 0.811999982290463, "grad_norm": 0.5154492855072021, "learning_rate": 2.6954174626982348e-06, "loss": 0.0569, "step": 45851 }, { "epoch": 0.8120176918274914, "grad_norm": 0.49524715542793274, "learning_rate": 2.6949254184065787e-06, "loss": 0.0533, "step": 45852 }, { "epoch": 0.8120354013645198, "grad_norm": 0.5174093246459961, "learning_rate": 2.6944334145973732e-06, "loss": 0.0492, "step": 45853 }, { "epoch": 0.8120531109015483, "grad_norm": 0.4222237169742584, "learning_rate": 2.693941451272239e-06, "loss": 0.0545, "step": 45854 }, { "epoch": 0.8120708204385767, "grad_norm": 0.7208566665649414, "learning_rate": 2.693449528432791e-06, "loss": 0.0713, "step": 45855 }, { "epoch": 0.8120885299756051, "grad_norm": 0.6666847467422485, "learning_rate": 2.6929576460806483e-06, "loss": 0.0535, "step": 45856 }, { "epoch": 0.8121062395126335, "grad_norm": 0.6228304505348206, "learning_rate": 2.692465804217431e-06, "loss": 0.0251, "step": 45857 }, { "epoch": 0.812123949049662, "grad_norm": 0.698083221912384, "learning_rate": 2.6919740028447614e-06, "loss": 0.0608, "step": 45858 }, { "epoch": 0.8121416585866904, "grad_norm": 0.5987372994422913, "learning_rate": 2.691482241964248e-06, "loss": 0.0584, "step": 45859 }, { "epoch": 0.8121593681237188, "grad_norm": 0.5069583654403687, "learning_rate": 2.6909905215775134e-06, "loss": 0.048, "step": 45860 }, { "epoch": 0.8121770776607472, "grad_norm": 0.9416531324386597, "learning_rate": 2.690498841686177e-06, "loss": 0.0505, "step": 45861 }, { "epoch": 0.8121947871977757, "grad_norm": 0.48094266653060913, "learning_rate": 2.690007202291853e-06, "loss": 0.0396, "step": 45862 }, { "epoch": 0.8122124967348041, "grad_norm": 0.6935247778892517, "learning_rate": 2.6895156033961588e-06, "loss": 0.0602, "step": 45863 }, { "epoch": 0.8122302062718325, "grad_norm": 0.5361528992652893, "learning_rate": 2.6890240450007127e-06, "loss": 0.0563, "step": 45864 }, { "epoch": 0.812247915808861, "grad_norm": 0.8204512596130371, "learning_rate": 2.6885325271071333e-06, "loss": 0.0669, "step": 45865 }, { "epoch": 0.8122656253458894, "grad_norm": 0.5269089937210083, "learning_rate": 2.688041049717036e-06, "loss": 0.0473, "step": 45866 }, { "epoch": 0.8122833348829178, "grad_norm": 0.5614542961120605, "learning_rate": 2.6875496128320397e-06, "loss": 0.096, "step": 45867 }, { "epoch": 0.8123010444199462, "grad_norm": 0.3708007335662842, "learning_rate": 2.6870582164537616e-06, "loss": 0.0523, "step": 45868 }, { "epoch": 0.8123187539569747, "grad_norm": 0.6943131685256958, "learning_rate": 2.6865668605838152e-06, "loss": 0.0542, "step": 45869 }, { "epoch": 0.8123364634940031, "grad_norm": 0.6191997528076172, "learning_rate": 2.6860755452238154e-06, "loss": 0.0417, "step": 45870 }, { "epoch": 0.8123541730310315, "grad_norm": 0.5311611890792847, "learning_rate": 2.6855842703753835e-06, "loss": 0.0383, "step": 45871 }, { "epoch": 0.8123718825680599, "grad_norm": 0.3668506145477295, "learning_rate": 2.6850930360401376e-06, "loss": 0.0393, "step": 45872 }, { "epoch": 0.8123895921050884, "grad_norm": 0.6153901815414429, "learning_rate": 2.6846018422196867e-06, "loss": 0.0465, "step": 45873 }, { "epoch": 0.8124073016421168, "grad_norm": 0.5034423470497131, "learning_rate": 2.684110688915649e-06, "loss": 0.0275, "step": 45874 }, { "epoch": 0.8124250111791452, "grad_norm": 0.5804414749145508, "learning_rate": 2.683619576129642e-06, "loss": 0.0258, "step": 45875 }, { "epoch": 0.8124427207161736, "grad_norm": 0.762992262840271, "learning_rate": 2.6831285038632815e-06, "loss": 0.037, "step": 45876 }, { "epoch": 0.8124604302532021, "grad_norm": 0.5137544870376587, "learning_rate": 2.68263747211818e-06, "loss": 0.0589, "step": 45877 }, { "epoch": 0.8124781397902305, "grad_norm": 0.7294725179672241, "learning_rate": 2.6821464808959573e-06, "loss": 0.0464, "step": 45878 }, { "epoch": 0.8124958493272589, "grad_norm": 0.8477540612220764, "learning_rate": 2.6816555301982315e-06, "loss": 0.0814, "step": 45879 }, { "epoch": 0.8125135588642874, "grad_norm": 0.3575807511806488, "learning_rate": 2.681164620026608e-06, "loss": 0.0594, "step": 45880 }, { "epoch": 0.8125312684013158, "grad_norm": 0.5243691802024841, "learning_rate": 2.6806737503827063e-06, "loss": 0.0705, "step": 45881 }, { "epoch": 0.8125489779383442, "grad_norm": 0.7333908677101135, "learning_rate": 2.6801829212681473e-06, "loss": 0.0544, "step": 45882 }, { "epoch": 0.8125666874753726, "grad_norm": 0.9268752336502075, "learning_rate": 2.679692132684532e-06, "loss": 0.0645, "step": 45883 }, { "epoch": 0.8125843970124011, "grad_norm": 0.5999270081520081, "learning_rate": 2.679201384633487e-06, "loss": 0.0721, "step": 45884 }, { "epoch": 0.8126021065494295, "grad_norm": 0.833043098449707, "learning_rate": 2.6787106771166237e-06, "loss": 0.0672, "step": 45885 }, { "epoch": 0.812619816086458, "grad_norm": 0.4755249321460724, "learning_rate": 2.6782200101355596e-06, "loss": 0.0426, "step": 45886 }, { "epoch": 0.8126375256234863, "grad_norm": 0.34829181432724, "learning_rate": 2.6777293836919007e-06, "loss": 0.0452, "step": 45887 }, { "epoch": 0.8126552351605149, "grad_norm": 0.5284631252288818, "learning_rate": 2.6772387977872676e-06, "loss": 0.0472, "step": 45888 }, { "epoch": 0.8126729446975433, "grad_norm": 0.5696057677268982, "learning_rate": 2.6767482524232715e-06, "loss": 0.0573, "step": 45889 }, { "epoch": 0.8126906542345717, "grad_norm": 0.6117978692054749, "learning_rate": 2.676257747601532e-06, "loss": 0.0484, "step": 45890 }, { "epoch": 0.8127083637716, "grad_norm": 0.5591332316398621, "learning_rate": 2.6757672833236524e-06, "loss": 0.0641, "step": 45891 }, { "epoch": 0.8127260733086286, "grad_norm": 0.7547710537910461, "learning_rate": 2.675276859591253e-06, "loss": 0.0766, "step": 45892 }, { "epoch": 0.812743782845657, "grad_norm": 0.8838716149330139, "learning_rate": 2.674786476405946e-06, "loss": 0.0814, "step": 45893 }, { "epoch": 0.8127614923826854, "grad_norm": 0.4152829945087433, "learning_rate": 2.674296133769345e-06, "loss": 0.0437, "step": 45894 }, { "epoch": 0.8127792019197139, "grad_norm": 0.6130199432373047, "learning_rate": 2.6738058316830637e-06, "loss": 0.039, "step": 45895 }, { "epoch": 0.8127969114567423, "grad_norm": 0.4230310916900635, "learning_rate": 2.6733155701487142e-06, "loss": 0.0354, "step": 45896 }, { "epoch": 0.8128146209937707, "grad_norm": 0.6913015246391296, "learning_rate": 2.672825349167912e-06, "loss": 0.0445, "step": 45897 }, { "epoch": 0.8128323305307991, "grad_norm": 0.8341261148452759, "learning_rate": 2.672335168742266e-06, "loss": 0.0668, "step": 45898 }, { "epoch": 0.8128500400678276, "grad_norm": 0.3589230179786682, "learning_rate": 2.671845028873389e-06, "loss": 0.0437, "step": 45899 }, { "epoch": 0.812867749604856, "grad_norm": 0.49767354130744934, "learning_rate": 2.671354929562899e-06, "loss": 0.0437, "step": 45900 }, { "epoch": 0.8128854591418844, "grad_norm": 0.5618573427200317, "learning_rate": 2.670864870812401e-06, "loss": 0.0603, "step": 45901 }, { "epoch": 0.8129031686789128, "grad_norm": 1.0400453805923462, "learning_rate": 2.6703748526235104e-06, "loss": 0.0713, "step": 45902 }, { "epoch": 0.8129208782159413, "grad_norm": 0.6661070585250854, "learning_rate": 2.669884874997838e-06, "loss": 0.0805, "step": 45903 }, { "epoch": 0.8129385877529697, "grad_norm": 0.46236950159072876, "learning_rate": 2.6693949379369997e-06, "loss": 0.0342, "step": 45904 }, { "epoch": 0.8129562972899981, "grad_norm": 0.807184636592865, "learning_rate": 2.6689050414426035e-06, "loss": 0.0809, "step": 45905 }, { "epoch": 0.8129740068270265, "grad_norm": 1.7579083442687988, "learning_rate": 2.6684151855162616e-06, "loss": 0.0652, "step": 45906 }, { "epoch": 0.812991716364055, "grad_norm": 0.6543627381324768, "learning_rate": 2.667925370159592e-06, "loss": 0.0482, "step": 45907 }, { "epoch": 0.8130094259010834, "grad_norm": 0.3877839744091034, "learning_rate": 2.667435595374195e-06, "loss": 0.0257, "step": 45908 }, { "epoch": 0.8130271354381118, "grad_norm": 0.4921010434627533, "learning_rate": 2.66694586116169e-06, "loss": 0.0554, "step": 45909 }, { "epoch": 0.8130448449751403, "grad_norm": 0.8025603294372559, "learning_rate": 2.666456167523684e-06, "loss": 0.0378, "step": 45910 }, { "epoch": 0.8130625545121687, "grad_norm": 0.8566778898239136, "learning_rate": 2.665966514461794e-06, "loss": 0.0685, "step": 45911 }, { "epoch": 0.8130802640491971, "grad_norm": 0.6146612167358398, "learning_rate": 2.6654769019776197e-06, "loss": 0.0366, "step": 45912 }, { "epoch": 0.8130979735862255, "grad_norm": 0.6926568746566772, "learning_rate": 2.6649873300727808e-06, "loss": 0.0524, "step": 45913 }, { "epoch": 0.813115683123254, "grad_norm": 0.7644802927970886, "learning_rate": 2.664497798748892e-06, "loss": 0.0712, "step": 45914 }, { "epoch": 0.8131333926602824, "grad_norm": 0.7680058479309082, "learning_rate": 2.6640083080075516e-06, "loss": 0.0498, "step": 45915 }, { "epoch": 0.8131511021973108, "grad_norm": 0.4837193787097931, "learning_rate": 2.6635188578503776e-06, "loss": 0.0522, "step": 45916 }, { "epoch": 0.8131688117343392, "grad_norm": 0.5182812213897705, "learning_rate": 2.6630294482789786e-06, "loss": 0.0523, "step": 45917 }, { "epoch": 0.8131865212713677, "grad_norm": 0.575616180896759, "learning_rate": 2.6625400792949696e-06, "loss": 0.0501, "step": 45918 }, { "epoch": 0.8132042308083961, "grad_norm": 0.48310959339141846, "learning_rate": 2.662050750899951e-06, "loss": 0.0714, "step": 45919 }, { "epoch": 0.8132219403454245, "grad_norm": 0.40559816360473633, "learning_rate": 2.6615614630955383e-06, "loss": 0.0594, "step": 45920 }, { "epoch": 0.8132396498824529, "grad_norm": 0.3434595763683319, "learning_rate": 2.6610722158833384e-06, "loss": 0.0301, "step": 45921 }, { "epoch": 0.8132573594194814, "grad_norm": 0.3870512843132019, "learning_rate": 2.660583009264965e-06, "loss": 0.0493, "step": 45922 }, { "epoch": 0.8132750689565098, "grad_norm": 0.29557177424430847, "learning_rate": 2.660093843242024e-06, "loss": 0.0472, "step": 45923 }, { "epoch": 0.8132927784935382, "grad_norm": 0.6977473497390747, "learning_rate": 2.659604717816127e-06, "loss": 0.0645, "step": 45924 }, { "epoch": 0.8133104880305667, "grad_norm": 0.5647909045219421, "learning_rate": 2.659115632988886e-06, "loss": 0.0499, "step": 45925 }, { "epoch": 0.8133281975675951, "grad_norm": 0.5001556277275085, "learning_rate": 2.6586265887619033e-06, "loss": 0.042, "step": 45926 }, { "epoch": 0.8133459071046235, "grad_norm": 0.37413209676742554, "learning_rate": 2.6581375851367906e-06, "loss": 0.0548, "step": 45927 }, { "epoch": 0.8133636166416519, "grad_norm": 0.4291095733642578, "learning_rate": 2.657648622115157e-06, "loss": 0.0804, "step": 45928 }, { "epoch": 0.8133813261786804, "grad_norm": 0.7265369892120361, "learning_rate": 2.6571596996986143e-06, "loss": 0.0545, "step": 45929 }, { "epoch": 0.8133990357157088, "grad_norm": 0.7542040348052979, "learning_rate": 2.6566708178887627e-06, "loss": 0.0999, "step": 45930 }, { "epoch": 0.8134167452527372, "grad_norm": 0.6756057143211365, "learning_rate": 2.656181976687218e-06, "loss": 0.05, "step": 45931 }, { "epoch": 0.8134344547897656, "grad_norm": 0.7081679105758667, "learning_rate": 2.6556931760955856e-06, "loss": 0.0603, "step": 45932 }, { "epoch": 0.8134521643267941, "grad_norm": 0.7068830728530884, "learning_rate": 2.655204416115474e-06, "loss": 0.0478, "step": 45933 }, { "epoch": 0.8134698738638225, "grad_norm": 0.6044235825538635, "learning_rate": 2.65471569674849e-06, "loss": 0.0559, "step": 45934 }, { "epoch": 0.8134875834008509, "grad_norm": 0.4573228061199188, "learning_rate": 2.654227017996244e-06, "loss": 0.0434, "step": 45935 }, { "epoch": 0.8135052929378793, "grad_norm": 0.6832567453384399, "learning_rate": 2.6537383798603454e-06, "loss": 0.084, "step": 45936 }, { "epoch": 0.8135230024749078, "grad_norm": 0.5186021327972412, "learning_rate": 2.6532497823423955e-06, "loss": 0.0532, "step": 45937 }, { "epoch": 0.8135407120119362, "grad_norm": 0.5271563529968262, "learning_rate": 2.6527612254440054e-06, "loss": 0.0534, "step": 45938 }, { "epoch": 0.8135584215489646, "grad_norm": 1.2210521697998047, "learning_rate": 2.6522727091667846e-06, "loss": 0.0865, "step": 45939 }, { "epoch": 0.8135761310859931, "grad_norm": 0.7285838723182678, "learning_rate": 2.6517842335123343e-06, "loss": 0.0493, "step": 45940 }, { "epoch": 0.8135938406230215, "grad_norm": 0.47468647360801697, "learning_rate": 2.6512957984822626e-06, "loss": 0.0341, "step": 45941 }, { "epoch": 0.8136115501600499, "grad_norm": 0.5388427376747131, "learning_rate": 2.6508074040781806e-06, "loss": 0.0496, "step": 45942 }, { "epoch": 0.8136292596970783, "grad_norm": 0.6394611597061157, "learning_rate": 2.6503190503016976e-06, "loss": 0.0553, "step": 45943 }, { "epoch": 0.8136469692341068, "grad_norm": 0.5271866917610168, "learning_rate": 2.6498307371544133e-06, "loss": 0.0653, "step": 45944 }, { "epoch": 0.8136646787711352, "grad_norm": 0.5404930114746094, "learning_rate": 2.6493424646379345e-06, "loss": 0.0698, "step": 45945 }, { "epoch": 0.8136823883081636, "grad_norm": 0.7404432892799377, "learning_rate": 2.6488542327538743e-06, "loss": 0.0432, "step": 45946 }, { "epoch": 0.813700097845192, "grad_norm": 0.47343698143959045, "learning_rate": 2.6483660415038324e-06, "loss": 0.0494, "step": 45947 }, { "epoch": 0.8137178073822205, "grad_norm": 0.7075443267822266, "learning_rate": 2.6478778908894152e-06, "loss": 0.0401, "step": 45948 }, { "epoch": 0.813735516919249, "grad_norm": 1.5870511531829834, "learning_rate": 2.6473897809122317e-06, "loss": 0.0515, "step": 45949 }, { "epoch": 0.8137532264562773, "grad_norm": 0.3945862948894501, "learning_rate": 2.646901711573885e-06, "loss": 0.0397, "step": 45950 }, { "epoch": 0.8137709359933057, "grad_norm": 0.5796545743942261, "learning_rate": 2.6464136828759833e-06, "loss": 0.0487, "step": 45951 }, { "epoch": 0.8137886455303343, "grad_norm": 0.7799623608589172, "learning_rate": 2.6459256948201292e-06, "loss": 0.0553, "step": 45952 }, { "epoch": 0.8138063550673627, "grad_norm": 0.4799515902996063, "learning_rate": 2.6454377474079323e-06, "loss": 0.0559, "step": 45953 }, { "epoch": 0.813824064604391, "grad_norm": 0.5953014492988586, "learning_rate": 2.6449498406409988e-06, "loss": 0.0705, "step": 45954 }, { "epoch": 0.8138417741414196, "grad_norm": 0.6694595813751221, "learning_rate": 2.6444619745209263e-06, "loss": 0.0442, "step": 45955 }, { "epoch": 0.813859483678448, "grad_norm": 0.6006906032562256, "learning_rate": 2.6439741490493237e-06, "loss": 0.0445, "step": 45956 }, { "epoch": 0.8138771932154764, "grad_norm": 0.4791108965873718, "learning_rate": 2.6434863642278005e-06, "loss": 0.0425, "step": 45957 }, { "epoch": 0.8138949027525048, "grad_norm": 0.6002377867698669, "learning_rate": 2.642998620057955e-06, "loss": 0.0528, "step": 45958 }, { "epoch": 0.8139126122895333, "grad_norm": 0.4051889479160309, "learning_rate": 2.642510916541393e-06, "loss": 0.0601, "step": 45959 }, { "epoch": 0.8139303218265617, "grad_norm": 0.32434147596359253, "learning_rate": 2.6420232536797205e-06, "loss": 0.0575, "step": 45960 }, { "epoch": 0.8139480313635901, "grad_norm": 0.7377725839614868, "learning_rate": 2.6415356314745415e-06, "loss": 0.0519, "step": 45961 }, { "epoch": 0.8139657409006185, "grad_norm": 0.9100404381752014, "learning_rate": 2.6410480499274608e-06, "loss": 0.0701, "step": 45962 }, { "epoch": 0.813983450437647, "grad_norm": 0.5333771109580994, "learning_rate": 2.6405605090400807e-06, "loss": 0.07, "step": 45963 }, { "epoch": 0.8140011599746754, "grad_norm": 0.8211455345153809, "learning_rate": 2.6400730088140116e-06, "loss": 0.0811, "step": 45964 }, { "epoch": 0.8140188695117038, "grad_norm": 0.49259153008461, "learning_rate": 2.6395855492508476e-06, "loss": 0.0655, "step": 45965 }, { "epoch": 0.8140365790487322, "grad_norm": 0.6959768533706665, "learning_rate": 2.6390981303521985e-06, "loss": 0.0643, "step": 45966 }, { "epoch": 0.8140542885857607, "grad_norm": 0.606015682220459, "learning_rate": 2.6386107521196646e-06, "loss": 0.053, "step": 45967 }, { "epoch": 0.8140719981227891, "grad_norm": 0.40759894251823425, "learning_rate": 2.638123414554855e-06, "loss": 0.0537, "step": 45968 }, { "epoch": 0.8140897076598175, "grad_norm": 0.4760890007019043, "learning_rate": 2.637636117659365e-06, "loss": 0.0467, "step": 45969 }, { "epoch": 0.814107417196846, "grad_norm": 0.6934670805931091, "learning_rate": 2.6371488614347983e-06, "loss": 0.0574, "step": 45970 }, { "epoch": 0.8141251267338744, "grad_norm": 0.8976716995239258, "learning_rate": 2.6366616458827703e-06, "loss": 0.0737, "step": 45971 }, { "epoch": 0.8141428362709028, "grad_norm": 0.5211149454116821, "learning_rate": 2.6361744710048697e-06, "loss": 0.0388, "step": 45972 }, { "epoch": 0.8141605458079312, "grad_norm": 0.515856921672821, "learning_rate": 2.635687336802705e-06, "loss": 0.0447, "step": 45973 }, { "epoch": 0.8141782553449597, "grad_norm": 0.47604599595069885, "learning_rate": 2.635200243277879e-06, "loss": 0.0633, "step": 45974 }, { "epoch": 0.8141959648819881, "grad_norm": 0.6278383135795593, "learning_rate": 2.634713190431996e-06, "loss": 0.0518, "step": 45975 }, { "epoch": 0.8142136744190165, "grad_norm": 0.6465855240821838, "learning_rate": 2.6342261782666536e-06, "loss": 0.0657, "step": 45976 }, { "epoch": 0.8142313839560449, "grad_norm": 0.5781513452529907, "learning_rate": 2.6337392067834542e-06, "loss": 0.054, "step": 45977 }, { "epoch": 0.8142490934930734, "grad_norm": 0.42129796743392944, "learning_rate": 2.6332522759840046e-06, "loss": 0.0498, "step": 45978 }, { "epoch": 0.8142668030301018, "grad_norm": 0.7042561173439026, "learning_rate": 2.632765385869903e-06, "loss": 0.0479, "step": 45979 }, { "epoch": 0.8142845125671302, "grad_norm": 0.421106219291687, "learning_rate": 2.632278536442752e-06, "loss": 0.0596, "step": 45980 }, { "epoch": 0.8143022221041586, "grad_norm": 0.5667951107025146, "learning_rate": 2.631791727704154e-06, "loss": 0.0528, "step": 45981 }, { "epoch": 0.8143199316411871, "grad_norm": 0.28118279576301575, "learning_rate": 2.631304959655715e-06, "loss": 0.0629, "step": 45982 }, { "epoch": 0.8143376411782155, "grad_norm": 0.3743949830532074, "learning_rate": 2.630818232299026e-06, "loss": 0.0319, "step": 45983 }, { "epoch": 0.8143553507152439, "grad_norm": 0.37179794907569885, "learning_rate": 2.6303315456356957e-06, "loss": 0.0389, "step": 45984 }, { "epoch": 0.8143730602522724, "grad_norm": 0.5763207077980042, "learning_rate": 2.629844899667326e-06, "loss": 0.0429, "step": 45985 }, { "epoch": 0.8143907697893008, "grad_norm": 0.5567139387130737, "learning_rate": 2.629358294395513e-06, "loss": 0.0509, "step": 45986 }, { "epoch": 0.8144084793263292, "grad_norm": 0.440525621175766, "learning_rate": 2.6288717298218593e-06, "loss": 0.0558, "step": 45987 }, { "epoch": 0.8144261888633576, "grad_norm": 0.5415672659873962, "learning_rate": 2.6283852059479664e-06, "loss": 0.0652, "step": 45988 }, { "epoch": 0.8144438984003861, "grad_norm": 0.5382129549980164, "learning_rate": 2.6278987227754338e-06, "loss": 0.0572, "step": 45989 }, { "epoch": 0.8144616079374145, "grad_norm": 0.8417224287986755, "learning_rate": 2.6274122803058654e-06, "loss": 0.0488, "step": 45990 }, { "epoch": 0.8144793174744429, "grad_norm": 0.5355989933013916, "learning_rate": 2.6269258785408566e-06, "loss": 0.0462, "step": 45991 }, { "epoch": 0.8144970270114713, "grad_norm": 0.2856757640838623, "learning_rate": 2.6264395174820118e-06, "loss": 0.0311, "step": 45992 }, { "epoch": 0.8145147365484998, "grad_norm": 0.5705868601799011, "learning_rate": 2.625953197130934e-06, "loss": 0.0488, "step": 45993 }, { "epoch": 0.8145324460855282, "grad_norm": 0.5347363352775574, "learning_rate": 2.625466917489213e-06, "loss": 0.0487, "step": 45994 }, { "epoch": 0.8145501556225566, "grad_norm": 0.7626974582672119, "learning_rate": 2.624980678558454e-06, "loss": 0.0523, "step": 45995 }, { "epoch": 0.814567865159585, "grad_norm": 0.3335384726524353, "learning_rate": 2.6244944803402615e-06, "loss": 0.0474, "step": 45996 }, { "epoch": 0.8145855746966135, "grad_norm": 0.9193853139877319, "learning_rate": 2.624008322836228e-06, "loss": 0.0578, "step": 45997 }, { "epoch": 0.8146032842336419, "grad_norm": 0.6339312791824341, "learning_rate": 2.623522206047953e-06, "loss": 0.0413, "step": 45998 }, { "epoch": 0.8146209937706703, "grad_norm": 0.8436186909675598, "learning_rate": 2.623036129977036e-06, "loss": 0.089, "step": 45999 }, { "epoch": 0.8146387033076988, "grad_norm": 0.8103863000869751, "learning_rate": 2.622550094625086e-06, "loss": 0.0776, "step": 46000 }, { "epoch": 0.8146564128447272, "grad_norm": 0.6991543173789978, "learning_rate": 2.622064099993689e-06, "loss": 0.0576, "step": 46001 }, { "epoch": 0.8146741223817556, "grad_norm": 0.5689476728439331, "learning_rate": 2.6215781460844498e-06, "loss": 0.0414, "step": 46002 }, { "epoch": 0.814691831918784, "grad_norm": 0.6899864673614502, "learning_rate": 2.6210922328989713e-06, "loss": 0.0271, "step": 46003 }, { "epoch": 0.8147095414558125, "grad_norm": 0.6660233736038208, "learning_rate": 2.6206063604388424e-06, "loss": 0.0468, "step": 46004 }, { "epoch": 0.8147272509928409, "grad_norm": 0.2178506702184677, "learning_rate": 2.6201205287056657e-06, "loss": 0.0284, "step": 46005 }, { "epoch": 0.8147449605298693, "grad_norm": 0.7520202398300171, "learning_rate": 2.6196347377010426e-06, "loss": 0.088, "step": 46006 }, { "epoch": 0.8147626700668977, "grad_norm": 0.4900975823402405, "learning_rate": 2.6191489874265675e-06, "loss": 0.0511, "step": 46007 }, { "epoch": 0.8147803796039262, "grad_norm": 0.7119012475013733, "learning_rate": 2.61866327788384e-06, "loss": 0.0557, "step": 46008 }, { "epoch": 0.8147980891409546, "grad_norm": 0.8776760101318359, "learning_rate": 2.6181776090744573e-06, "loss": 0.0731, "step": 46009 }, { "epoch": 0.814815798677983, "grad_norm": 0.4871952533721924, "learning_rate": 2.6176919810000237e-06, "loss": 0.0419, "step": 46010 }, { "epoch": 0.8148335082150114, "grad_norm": 0.7267175912857056, "learning_rate": 2.617206393662126e-06, "loss": 0.0635, "step": 46011 }, { "epoch": 0.81485121775204, "grad_norm": 0.43358346819877625, "learning_rate": 2.6167208470623677e-06, "loss": 0.0661, "step": 46012 }, { "epoch": 0.8148689272890683, "grad_norm": 0.4840404689311981, "learning_rate": 2.616235341202344e-06, "loss": 0.0716, "step": 46013 }, { "epoch": 0.8148866368260967, "grad_norm": 0.4230319559574127, "learning_rate": 2.615749876083658e-06, "loss": 0.0789, "step": 46014 }, { "epoch": 0.8149043463631253, "grad_norm": 0.23085620999336243, "learning_rate": 2.615264451707899e-06, "loss": 0.0382, "step": 46015 }, { "epoch": 0.8149220559001537, "grad_norm": 0.9939165115356445, "learning_rate": 2.614779068076666e-06, "loss": 0.0586, "step": 46016 }, { "epoch": 0.814939765437182, "grad_norm": 0.4183117747306824, "learning_rate": 2.6142937251915596e-06, "loss": 0.046, "step": 46017 }, { "epoch": 0.8149574749742104, "grad_norm": 1.4297024011611938, "learning_rate": 2.6138084230541716e-06, "loss": 0.0593, "step": 46018 }, { "epoch": 0.814975184511239, "grad_norm": 0.7277358770370483, "learning_rate": 2.6133231616661026e-06, "loss": 0.0478, "step": 46019 }, { "epoch": 0.8149928940482674, "grad_norm": 0.7698593139648438, "learning_rate": 2.612837941028946e-06, "loss": 0.061, "step": 46020 }, { "epoch": 0.8150106035852958, "grad_norm": 0.290698766708374, "learning_rate": 2.612352761144304e-06, "loss": 0.0591, "step": 46021 }, { "epoch": 0.8150283131223242, "grad_norm": 0.4792810082435608, "learning_rate": 2.611867622013766e-06, "loss": 0.059, "step": 46022 }, { "epoch": 0.8150460226593527, "grad_norm": 0.8261794447898865, "learning_rate": 2.61138252363893e-06, "loss": 0.0544, "step": 46023 }, { "epoch": 0.8150637321963811, "grad_norm": 0.3617296814918518, "learning_rate": 2.610897466021391e-06, "loss": 0.0373, "step": 46024 }, { "epoch": 0.8150814417334095, "grad_norm": 0.9595633745193481, "learning_rate": 2.6104124491627525e-06, "loss": 0.0707, "step": 46025 }, { "epoch": 0.8150991512704379, "grad_norm": 0.5663272738456726, "learning_rate": 2.6099274730645983e-06, "loss": 0.0486, "step": 46026 }, { "epoch": 0.8151168608074664, "grad_norm": 0.6092818975448608, "learning_rate": 2.6094425377285296e-06, "loss": 0.0675, "step": 46027 }, { "epoch": 0.8151345703444948, "grad_norm": 0.8526558876037598, "learning_rate": 2.6089576431561417e-06, "loss": 0.0488, "step": 46028 }, { "epoch": 0.8151522798815232, "grad_norm": 0.539775550365448, "learning_rate": 2.6084727893490317e-06, "loss": 0.0686, "step": 46029 }, { "epoch": 0.8151699894185517, "grad_norm": 0.6951112151145935, "learning_rate": 2.6079879763087916e-06, "loss": 0.049, "step": 46030 }, { "epoch": 0.8151876989555801, "grad_norm": 1.0284291505813599, "learning_rate": 2.607503204037017e-06, "loss": 0.0817, "step": 46031 }, { "epoch": 0.8152054084926085, "grad_norm": 0.48106953501701355, "learning_rate": 2.6070184725353073e-06, "loss": 0.0795, "step": 46032 }, { "epoch": 0.8152231180296369, "grad_norm": 0.5171516537666321, "learning_rate": 2.606533781805251e-06, "loss": 0.0407, "step": 46033 }, { "epoch": 0.8152408275666654, "grad_norm": 0.5684090256690979, "learning_rate": 2.6060491318484443e-06, "loss": 0.0581, "step": 46034 }, { "epoch": 0.8152585371036938, "grad_norm": 0.645409882068634, "learning_rate": 2.605564522666486e-06, "loss": 0.0564, "step": 46035 }, { "epoch": 0.8152762466407222, "grad_norm": 0.449689120054245, "learning_rate": 2.6050799542609583e-06, "loss": 0.0454, "step": 46036 }, { "epoch": 0.8152939561777506, "grad_norm": 0.4439229965209961, "learning_rate": 2.60459542663347e-06, "loss": 0.0561, "step": 46037 }, { "epoch": 0.8153116657147791, "grad_norm": 0.6579685211181641, "learning_rate": 2.6041109397856063e-06, "loss": 0.0538, "step": 46038 }, { "epoch": 0.8153293752518075, "grad_norm": 0.7779927253723145, "learning_rate": 2.603626493718969e-06, "loss": 0.0611, "step": 46039 }, { "epoch": 0.8153470847888359, "grad_norm": 0.8443698287010193, "learning_rate": 2.6031420884351427e-06, "loss": 0.0544, "step": 46040 }, { "epoch": 0.8153647943258643, "grad_norm": 0.3583216071128845, "learning_rate": 2.602657723935725e-06, "loss": 0.0267, "step": 46041 }, { "epoch": 0.8153825038628928, "grad_norm": 0.6350289583206177, "learning_rate": 2.6021734002223143e-06, "loss": 0.0834, "step": 46042 }, { "epoch": 0.8154002133999212, "grad_norm": 0.2646568715572357, "learning_rate": 2.601689117296495e-06, "loss": 0.0604, "step": 46043 }, { "epoch": 0.8154179229369496, "grad_norm": 0.5487343072891235, "learning_rate": 2.6012048751598637e-06, "loss": 0.0415, "step": 46044 }, { "epoch": 0.8154356324739781, "grad_norm": 0.23609860241413116, "learning_rate": 2.6007206738140138e-06, "loss": 0.0176, "step": 46045 }, { "epoch": 0.8154533420110065, "grad_norm": 0.3934395909309387, "learning_rate": 2.60023651326054e-06, "loss": 0.0401, "step": 46046 }, { "epoch": 0.8154710515480349, "grad_norm": 1.5369446277618408, "learning_rate": 2.599752393501034e-06, "loss": 0.0532, "step": 46047 }, { "epoch": 0.8154887610850633, "grad_norm": 0.7062675356864929, "learning_rate": 2.599268314537087e-06, "loss": 0.0582, "step": 46048 }, { "epoch": 0.8155064706220918, "grad_norm": 0.5556225776672363, "learning_rate": 2.5987842763702992e-06, "loss": 0.0565, "step": 46049 }, { "epoch": 0.8155241801591202, "grad_norm": 0.7307065725326538, "learning_rate": 2.59830027900225e-06, "loss": 0.0504, "step": 46050 }, { "epoch": 0.8155418896961486, "grad_norm": 0.21757428348064423, "learning_rate": 2.5978163224345415e-06, "loss": 0.051, "step": 46051 }, { "epoch": 0.815559599233177, "grad_norm": 0.6445105671882629, "learning_rate": 2.597332406668761e-06, "loss": 0.0614, "step": 46052 }, { "epoch": 0.8155773087702055, "grad_norm": 0.4997057616710663, "learning_rate": 2.596848531706506e-06, "loss": 0.0562, "step": 46053 }, { "epoch": 0.8155950183072339, "grad_norm": 0.5676831603050232, "learning_rate": 2.5963646975493623e-06, "loss": 0.0637, "step": 46054 }, { "epoch": 0.8156127278442623, "grad_norm": 0.2829837203025818, "learning_rate": 2.595880904198923e-06, "loss": 0.0768, "step": 46055 }, { "epoch": 0.8156304373812907, "grad_norm": 0.39150097966194153, "learning_rate": 2.595397151656782e-06, "loss": 0.0298, "step": 46056 }, { "epoch": 0.8156481469183192, "grad_norm": 0.5622750520706177, "learning_rate": 2.59491343992453e-06, "loss": 0.0581, "step": 46057 }, { "epoch": 0.8156658564553476, "grad_norm": 0.6683281660079956, "learning_rate": 2.594429769003757e-06, "loss": 0.102, "step": 46058 }, { "epoch": 0.815683565992376, "grad_norm": 0.5439863204956055, "learning_rate": 2.5939461388960555e-06, "loss": 0.0437, "step": 46059 }, { "epoch": 0.8157012755294045, "grad_norm": 0.6321873664855957, "learning_rate": 2.5934625496030208e-06, "loss": 0.0521, "step": 46060 }, { "epoch": 0.8157189850664329, "grad_norm": 0.4886986315250397, "learning_rate": 2.5929790011262345e-06, "loss": 0.0878, "step": 46061 }, { "epoch": 0.8157366946034613, "grad_norm": 0.5234572887420654, "learning_rate": 2.592495493467294e-06, "loss": 0.0426, "step": 46062 }, { "epoch": 0.8157544041404897, "grad_norm": 0.7183952927589417, "learning_rate": 2.5920120266277862e-06, "loss": 0.0397, "step": 46063 }, { "epoch": 0.8157721136775182, "grad_norm": 0.5362353324890137, "learning_rate": 2.5915286006093103e-06, "loss": 0.039, "step": 46064 }, { "epoch": 0.8157898232145466, "grad_norm": 0.6238058805465698, "learning_rate": 2.591045215413441e-06, "loss": 0.0746, "step": 46065 }, { "epoch": 0.815807532751575, "grad_norm": 0.6935262680053711, "learning_rate": 2.5905618710417813e-06, "loss": 0.0825, "step": 46066 }, { "epoch": 0.8158252422886034, "grad_norm": 0.5454369187355042, "learning_rate": 2.590078567495922e-06, "loss": 0.059, "step": 46067 }, { "epoch": 0.8158429518256319, "grad_norm": 0.37254926562309265, "learning_rate": 2.5895953047774463e-06, "loss": 0.0516, "step": 46068 }, { "epoch": 0.8158606613626603, "grad_norm": 0.4989995062351227, "learning_rate": 2.5891120828879454e-06, "loss": 0.0308, "step": 46069 }, { "epoch": 0.8158783708996887, "grad_norm": 0.8023236989974976, "learning_rate": 2.588628901829012e-06, "loss": 0.0645, "step": 46070 }, { "epoch": 0.8158960804367171, "grad_norm": 0.5804604887962341, "learning_rate": 2.5881457616022363e-06, "loss": 0.0727, "step": 46071 }, { "epoch": 0.8159137899737456, "grad_norm": 1.1992088556289673, "learning_rate": 2.5876626622092037e-06, "loss": 0.046, "step": 46072 }, { "epoch": 0.815931499510774, "grad_norm": 0.8512734174728394, "learning_rate": 2.5871796036515045e-06, "loss": 0.0436, "step": 46073 }, { "epoch": 0.8159492090478024, "grad_norm": 0.5907270908355713, "learning_rate": 2.586696585930729e-06, "loss": 0.0624, "step": 46074 }, { "epoch": 0.815966918584831, "grad_norm": 0.9983407258987427, "learning_rate": 2.586213609048466e-06, "loss": 0.0865, "step": 46075 }, { "epoch": 0.8159846281218593, "grad_norm": 0.5404838919639587, "learning_rate": 2.5857306730063047e-06, "loss": 0.0688, "step": 46076 }, { "epoch": 0.8160023376588877, "grad_norm": 0.4526934027671814, "learning_rate": 2.585247777805833e-06, "loss": 0.0521, "step": 46077 }, { "epoch": 0.8160200471959161, "grad_norm": 0.5980729460716248, "learning_rate": 2.5847649234486455e-06, "loss": 0.0575, "step": 46078 }, { "epoch": 0.8160377567329447, "grad_norm": 0.4384429454803467, "learning_rate": 2.584282109936322e-06, "loss": 0.0703, "step": 46079 }, { "epoch": 0.816055466269973, "grad_norm": 0.7055227756500244, "learning_rate": 2.5837993372704544e-06, "loss": 0.0472, "step": 46080 }, { "epoch": 0.8160731758070014, "grad_norm": 0.5145920515060425, "learning_rate": 2.5833166054526335e-06, "loss": 0.0659, "step": 46081 }, { "epoch": 0.8160908853440298, "grad_norm": 0.49922677874565125, "learning_rate": 2.582833914484443e-06, "loss": 0.0359, "step": 46082 }, { "epoch": 0.8161085948810584, "grad_norm": 0.5502985715866089, "learning_rate": 2.5823512643674737e-06, "loss": 0.0539, "step": 46083 }, { "epoch": 0.8161263044180868, "grad_norm": 0.9317778944969177, "learning_rate": 2.5818686551033098e-06, "loss": 0.0524, "step": 46084 }, { "epoch": 0.8161440139551152, "grad_norm": 0.19553473591804504, "learning_rate": 2.5813860866935448e-06, "loss": 0.074, "step": 46085 }, { "epoch": 0.8161617234921436, "grad_norm": 0.3914608359336853, "learning_rate": 2.580903559139763e-06, "loss": 0.0521, "step": 46086 }, { "epoch": 0.8161794330291721, "grad_norm": 0.6135289072990417, "learning_rate": 2.580421072443552e-06, "loss": 0.0684, "step": 46087 }, { "epoch": 0.8161971425662005, "grad_norm": 0.6594288349151611, "learning_rate": 2.5799386266065016e-06, "loss": 0.0548, "step": 46088 }, { "epoch": 0.8162148521032289, "grad_norm": 0.34677156805992126, "learning_rate": 2.579456221630195e-06, "loss": 0.0361, "step": 46089 }, { "epoch": 0.8162325616402574, "grad_norm": 0.5962026119232178, "learning_rate": 2.5789738575162214e-06, "loss": 0.0559, "step": 46090 }, { "epoch": 0.8162502711772858, "grad_norm": 0.3875161111354828, "learning_rate": 2.578491534266167e-06, "loss": 0.0792, "step": 46091 }, { "epoch": 0.8162679807143142, "grad_norm": 0.7099282145500183, "learning_rate": 2.578009251881622e-06, "loss": 0.0788, "step": 46092 }, { "epoch": 0.8162856902513426, "grad_norm": 1.2636915445327759, "learning_rate": 2.577527010364168e-06, "loss": 0.0766, "step": 46093 }, { "epoch": 0.8163033997883711, "grad_norm": 0.5211701989173889, "learning_rate": 2.577044809715389e-06, "loss": 0.0679, "step": 46094 }, { "epoch": 0.8163211093253995, "grad_norm": 0.47588077187538147, "learning_rate": 2.5765626499368794e-06, "loss": 0.0485, "step": 46095 }, { "epoch": 0.8163388188624279, "grad_norm": 0.3547170162200928, "learning_rate": 2.576080531030227e-06, "loss": 0.0517, "step": 46096 }, { "epoch": 0.8163565283994563, "grad_norm": 0.2650574743747711, "learning_rate": 2.5755984529970076e-06, "loss": 0.0329, "step": 46097 }, { "epoch": 0.8163742379364848, "grad_norm": 0.3450361490249634, "learning_rate": 2.575116415838815e-06, "loss": 0.0392, "step": 46098 }, { "epoch": 0.8163919474735132, "grad_norm": 0.2580971121788025, "learning_rate": 2.5746344195572345e-06, "loss": 0.0717, "step": 46099 }, { "epoch": 0.8164096570105416, "grad_norm": 0.8114021420478821, "learning_rate": 2.574152464153847e-06, "loss": 0.0417, "step": 46100 }, { "epoch": 0.81642736654757, "grad_norm": 0.11188895255327225, "learning_rate": 2.5736705496302415e-06, "loss": 0.0648, "step": 46101 }, { "epoch": 0.8164450760845985, "grad_norm": 0.4185909330844879, "learning_rate": 2.573188675988001e-06, "loss": 0.0467, "step": 46102 }, { "epoch": 0.8164627856216269, "grad_norm": 0.579925000667572, "learning_rate": 2.572706843228715e-06, "loss": 0.0631, "step": 46103 }, { "epoch": 0.8164804951586553, "grad_norm": 0.7856562733650208, "learning_rate": 2.572225051353966e-06, "loss": 0.0431, "step": 46104 }, { "epoch": 0.8164982046956838, "grad_norm": 0.6582088470458984, "learning_rate": 2.5717433003653395e-06, "loss": 0.0632, "step": 46105 }, { "epoch": 0.8165159142327122, "grad_norm": 0.3319452702999115, "learning_rate": 2.5712615902644247e-06, "loss": 0.0331, "step": 46106 }, { "epoch": 0.8165336237697406, "grad_norm": 0.49039226770401, "learning_rate": 2.570779921052799e-06, "loss": 0.0493, "step": 46107 }, { "epoch": 0.816551333306769, "grad_norm": 0.45517751574516296, "learning_rate": 2.5702982927320486e-06, "loss": 0.038, "step": 46108 }, { "epoch": 0.8165690428437975, "grad_norm": 0.6812382340431213, "learning_rate": 2.5698167053037607e-06, "loss": 0.0519, "step": 46109 }, { "epoch": 0.8165867523808259, "grad_norm": 0.5655150413513184, "learning_rate": 2.5693351587695213e-06, "loss": 0.0574, "step": 46110 }, { "epoch": 0.8166044619178543, "grad_norm": 0.3060482442378998, "learning_rate": 2.5688536531309085e-06, "loss": 0.0421, "step": 46111 }, { "epoch": 0.8166221714548827, "grad_norm": 0.6993008852005005, "learning_rate": 2.56837218838951e-06, "loss": 0.0496, "step": 46112 }, { "epoch": 0.8166398809919112, "grad_norm": 0.7134031653404236, "learning_rate": 2.567890764546911e-06, "loss": 0.0687, "step": 46113 }, { "epoch": 0.8166575905289396, "grad_norm": 0.5866104364395142, "learning_rate": 2.567409381604691e-06, "loss": 0.0365, "step": 46114 }, { "epoch": 0.816675300065968, "grad_norm": 0.6447515487670898, "learning_rate": 2.56692803956444e-06, "loss": 0.0745, "step": 46115 }, { "epoch": 0.8166930096029964, "grad_norm": 0.4717518389225006, "learning_rate": 2.5664467384277364e-06, "loss": 0.0374, "step": 46116 }, { "epoch": 0.8167107191400249, "grad_norm": 0.5513530373573303, "learning_rate": 2.565965478196168e-06, "loss": 0.0464, "step": 46117 }, { "epoch": 0.8167284286770533, "grad_norm": 0.4531940817832947, "learning_rate": 2.5654842588713133e-06, "loss": 0.0625, "step": 46118 }, { "epoch": 0.8167461382140817, "grad_norm": 0.7083025574684143, "learning_rate": 2.5650030804547586e-06, "loss": 0.055, "step": 46119 }, { "epoch": 0.8167638477511102, "grad_norm": 0.6584285497665405, "learning_rate": 2.5645219429480894e-06, "loss": 0.0443, "step": 46120 }, { "epoch": 0.8167815572881386, "grad_norm": 0.34845688939094543, "learning_rate": 2.5640408463528808e-06, "loss": 0.0672, "step": 46121 }, { "epoch": 0.816799266825167, "grad_norm": 0.5797167420387268, "learning_rate": 2.5635597906707204e-06, "loss": 0.0443, "step": 46122 }, { "epoch": 0.8168169763621954, "grad_norm": 0.6390533447265625, "learning_rate": 2.5630787759031847e-06, "loss": 0.0758, "step": 46123 }, { "epoch": 0.8168346858992239, "grad_norm": 0.7711689472198486, "learning_rate": 2.5625978020518716e-06, "loss": 0.0649, "step": 46124 }, { "epoch": 0.8168523954362523, "grad_norm": 0.7120219469070435, "learning_rate": 2.5621168691183505e-06, "loss": 0.0514, "step": 46125 }, { "epoch": 0.8168701049732807, "grad_norm": 0.4736809730529785, "learning_rate": 2.561635977104206e-06, "loss": 0.0469, "step": 46126 }, { "epoch": 0.8168878145103091, "grad_norm": 0.7304673790931702, "learning_rate": 2.5611551260110193e-06, "loss": 0.059, "step": 46127 }, { "epoch": 0.8169055240473376, "grad_norm": 0.584696888923645, "learning_rate": 2.56067431584038e-06, "loss": 0.0681, "step": 46128 }, { "epoch": 0.816923233584366, "grad_norm": 0.6718869209289551, "learning_rate": 2.5601935465938596e-06, "loss": 0.0555, "step": 46129 }, { "epoch": 0.8169409431213944, "grad_norm": 0.470296174287796, "learning_rate": 2.5597128182730457e-06, "loss": 0.0546, "step": 46130 }, { "epoch": 0.8169586526584228, "grad_norm": 0.5762529373168945, "learning_rate": 2.5592321308795165e-06, "loss": 0.058, "step": 46131 }, { "epoch": 0.8169763621954513, "grad_norm": 0.2939126491546631, "learning_rate": 2.5587514844148557e-06, "loss": 0.0537, "step": 46132 }, { "epoch": 0.8169940717324797, "grad_norm": 0.3849479854106903, "learning_rate": 2.5582708788806442e-06, "loss": 0.0519, "step": 46133 }, { "epoch": 0.8170117812695081, "grad_norm": 0.7067864537239075, "learning_rate": 2.5577903142784635e-06, "loss": 0.0566, "step": 46134 }, { "epoch": 0.8170294908065366, "grad_norm": 0.6564866304397583, "learning_rate": 2.557309790609898e-06, "loss": 0.0405, "step": 46135 }, { "epoch": 0.817047200343565, "grad_norm": 0.41937577724456787, "learning_rate": 2.556829307876521e-06, "loss": 0.0553, "step": 46136 }, { "epoch": 0.8170649098805934, "grad_norm": 0.48761796951293945, "learning_rate": 2.5563488660799163e-06, "loss": 0.0457, "step": 46137 }, { "epoch": 0.8170826194176218, "grad_norm": 0.822964608669281, "learning_rate": 2.555868465221669e-06, "loss": 0.0724, "step": 46138 }, { "epoch": 0.8171003289546503, "grad_norm": 0.6826740503311157, "learning_rate": 2.555388105303352e-06, "loss": 0.0408, "step": 46139 }, { "epoch": 0.8171180384916787, "grad_norm": 1.0666282176971436, "learning_rate": 2.55490778632655e-06, "loss": 0.0707, "step": 46140 }, { "epoch": 0.8171357480287071, "grad_norm": 0.28926360607147217, "learning_rate": 2.554427508292843e-06, "loss": 0.0434, "step": 46141 }, { "epoch": 0.8171534575657355, "grad_norm": 0.637176513671875, "learning_rate": 2.553947271203811e-06, "loss": 0.0571, "step": 46142 }, { "epoch": 0.817171167102764, "grad_norm": 1.0365113019943237, "learning_rate": 2.5534670750610316e-06, "loss": 0.0555, "step": 46143 }, { "epoch": 0.8171888766397924, "grad_norm": 0.6261910796165466, "learning_rate": 2.552986919866088e-06, "loss": 0.0483, "step": 46144 }, { "epoch": 0.8172065861768208, "grad_norm": 0.29753750562667847, "learning_rate": 2.5525068056205623e-06, "loss": 0.0569, "step": 46145 }, { "epoch": 0.8172242957138492, "grad_norm": 0.7578927278518677, "learning_rate": 2.5520267323260267e-06, "loss": 0.0752, "step": 46146 }, { "epoch": 0.8172420052508778, "grad_norm": 0.6692886352539062, "learning_rate": 2.5515466999840635e-06, "loss": 0.0818, "step": 46147 }, { "epoch": 0.8172597147879062, "grad_norm": 0.9730962514877319, "learning_rate": 2.5510667085962523e-06, "loss": 0.0505, "step": 46148 }, { "epoch": 0.8172774243249346, "grad_norm": 0.550338864326477, "learning_rate": 2.550586758164177e-06, "loss": 0.0543, "step": 46149 }, { "epoch": 0.8172951338619631, "grad_norm": 0.48860833048820496, "learning_rate": 2.5501068486894084e-06, "loss": 0.053, "step": 46150 }, { "epoch": 0.8173128433989915, "grad_norm": 0.7167680859565735, "learning_rate": 2.5496269801735284e-06, "loss": 0.0592, "step": 46151 }, { "epoch": 0.8173305529360199, "grad_norm": 0.5462200045585632, "learning_rate": 2.5491471526181166e-06, "loss": 0.0739, "step": 46152 }, { "epoch": 0.8173482624730483, "grad_norm": 0.8464419841766357, "learning_rate": 2.548667366024752e-06, "loss": 0.0876, "step": 46153 }, { "epoch": 0.8173659720100768, "grad_norm": 0.49016809463500977, "learning_rate": 2.5481876203950125e-06, "loss": 0.0451, "step": 46154 }, { "epoch": 0.8173836815471052, "grad_norm": 0.6973192691802979, "learning_rate": 2.547707915730475e-06, "loss": 0.0621, "step": 46155 }, { "epoch": 0.8174013910841336, "grad_norm": 0.5989609360694885, "learning_rate": 2.5472282520327238e-06, "loss": 0.0467, "step": 46156 }, { "epoch": 0.817419100621162, "grad_norm": 0.27295249700546265, "learning_rate": 2.5467486293033282e-06, "loss": 0.0446, "step": 46157 }, { "epoch": 0.8174368101581905, "grad_norm": 0.7051045298576355, "learning_rate": 2.54626904754387e-06, "loss": 0.0581, "step": 46158 }, { "epoch": 0.8174545196952189, "grad_norm": 0.6156594157218933, "learning_rate": 2.5457895067559296e-06, "loss": 0.0494, "step": 46159 }, { "epoch": 0.8174722292322473, "grad_norm": 0.7166217565536499, "learning_rate": 2.5453100069410757e-06, "loss": 0.0733, "step": 46160 }, { "epoch": 0.8174899387692757, "grad_norm": 1.0112839937210083, "learning_rate": 2.5448305481008956e-06, "loss": 0.0618, "step": 46161 }, { "epoch": 0.8175076483063042, "grad_norm": 0.49854791164398193, "learning_rate": 2.5443511302369627e-06, "loss": 0.0668, "step": 46162 }, { "epoch": 0.8175253578433326, "grad_norm": 0.708000659942627, "learning_rate": 2.5438717533508595e-06, "loss": 0.0615, "step": 46163 }, { "epoch": 0.817543067380361, "grad_norm": 0.6469886898994446, "learning_rate": 2.543392417444154e-06, "loss": 0.0715, "step": 46164 }, { "epoch": 0.8175607769173895, "grad_norm": 0.6531634330749512, "learning_rate": 2.5429131225184276e-06, "loss": 0.0693, "step": 46165 }, { "epoch": 0.8175784864544179, "grad_norm": 0.5276142954826355, "learning_rate": 2.5424338685752587e-06, "loss": 0.0393, "step": 46166 }, { "epoch": 0.8175961959914463, "grad_norm": 0.6651771068572998, "learning_rate": 2.5419546556162236e-06, "loss": 0.0622, "step": 46167 }, { "epoch": 0.8176139055284747, "grad_norm": 0.6656387448310852, "learning_rate": 2.5414754836428953e-06, "loss": 0.079, "step": 46168 }, { "epoch": 0.8176316150655032, "grad_norm": 0.5555420517921448, "learning_rate": 2.5409963526568527e-06, "loss": 0.0389, "step": 46169 }, { "epoch": 0.8176493246025316, "grad_norm": 0.6699772477149963, "learning_rate": 2.5405172626596716e-06, "loss": 0.0821, "step": 46170 }, { "epoch": 0.81766703413956, "grad_norm": 0.38962116837501526, "learning_rate": 2.540038213652927e-06, "loss": 0.0514, "step": 46171 }, { "epoch": 0.8176847436765884, "grad_norm": 0.6129044890403748, "learning_rate": 2.539559205638199e-06, "loss": 0.0681, "step": 46172 }, { "epoch": 0.8177024532136169, "grad_norm": 0.6495493650436401, "learning_rate": 2.5390802386170593e-06, "loss": 0.0682, "step": 46173 }, { "epoch": 0.8177201627506453, "grad_norm": 0.9522745013237, "learning_rate": 2.53860131259109e-06, "loss": 0.0787, "step": 46174 }, { "epoch": 0.8177378722876737, "grad_norm": 0.8138816356658936, "learning_rate": 2.538122427561857e-06, "loss": 0.0505, "step": 46175 }, { "epoch": 0.8177555818247021, "grad_norm": 1.216806411743164, "learning_rate": 2.537643583530942e-06, "loss": 0.0809, "step": 46176 }, { "epoch": 0.8177732913617306, "grad_norm": 0.5421561002731323, "learning_rate": 2.537164780499922e-06, "loss": 0.0513, "step": 46177 }, { "epoch": 0.817791000898759, "grad_norm": 0.5603459477424622, "learning_rate": 2.536686018470366e-06, "loss": 0.0611, "step": 46178 }, { "epoch": 0.8178087104357874, "grad_norm": 0.669468104839325, "learning_rate": 2.5362072974438525e-06, "loss": 0.031, "step": 46179 }, { "epoch": 0.8178264199728159, "grad_norm": 0.39697137475013733, "learning_rate": 2.535728617421956e-06, "loss": 0.0464, "step": 46180 }, { "epoch": 0.8178441295098443, "grad_norm": 0.30420511960983276, "learning_rate": 2.5352499784062524e-06, "loss": 0.0333, "step": 46181 }, { "epoch": 0.8178618390468727, "grad_norm": 0.8082303404808044, "learning_rate": 2.534771380398314e-06, "loss": 0.0592, "step": 46182 }, { "epoch": 0.8178795485839011, "grad_norm": 0.7253509759902954, "learning_rate": 2.5342928233997186e-06, "loss": 0.0456, "step": 46183 }, { "epoch": 0.8178972581209296, "grad_norm": 0.43788400292396545, "learning_rate": 2.5338143074120414e-06, "loss": 0.0704, "step": 46184 }, { "epoch": 0.817914967657958, "grad_norm": 0.7906172275543213, "learning_rate": 2.533335832436852e-06, "loss": 0.053, "step": 46185 }, { "epoch": 0.8179326771949864, "grad_norm": 0.6032602190971375, "learning_rate": 2.5328573984757252e-06, "loss": 0.0621, "step": 46186 }, { "epoch": 0.8179503867320148, "grad_norm": 0.5048280954360962, "learning_rate": 2.5323790055302376e-06, "loss": 0.0686, "step": 46187 }, { "epoch": 0.8179680962690433, "grad_norm": 0.7478939890861511, "learning_rate": 2.5319006536019664e-06, "loss": 0.0733, "step": 46188 }, { "epoch": 0.8179858058060717, "grad_norm": 0.5832990407943726, "learning_rate": 2.531422342692473e-06, "loss": 0.0494, "step": 46189 }, { "epoch": 0.8180035153431001, "grad_norm": 0.6481248140335083, "learning_rate": 2.530944072803342e-06, "loss": 0.0509, "step": 46190 }, { "epoch": 0.8180212248801285, "grad_norm": 0.7146236300468445, "learning_rate": 2.530465843936147e-06, "loss": 0.0945, "step": 46191 }, { "epoch": 0.818038934417157, "grad_norm": 0.5574405193328857, "learning_rate": 2.5299876560924566e-06, "loss": 0.0652, "step": 46192 }, { "epoch": 0.8180566439541854, "grad_norm": 0.34497693181037903, "learning_rate": 2.5295095092738447e-06, "loss": 0.0736, "step": 46193 }, { "epoch": 0.8180743534912138, "grad_norm": 0.5735399723052979, "learning_rate": 2.5290314034818846e-06, "loss": 0.0436, "step": 46194 }, { "epoch": 0.8180920630282423, "grad_norm": 1.1506022214889526, "learning_rate": 2.5285533387181537e-06, "loss": 0.0681, "step": 46195 }, { "epoch": 0.8181097725652707, "grad_norm": 0.5641031861305237, "learning_rate": 2.528075314984217e-06, "loss": 0.0526, "step": 46196 }, { "epoch": 0.8181274821022991, "grad_norm": 0.577103853225708, "learning_rate": 2.527597332281651e-06, "loss": 0.0581, "step": 46197 }, { "epoch": 0.8181451916393275, "grad_norm": 0.3799495995044708, "learning_rate": 2.5271193906120297e-06, "loss": 0.0475, "step": 46198 }, { "epoch": 0.818162901176356, "grad_norm": 0.4232034683227539, "learning_rate": 2.526641489976923e-06, "loss": 0.044, "step": 46199 }, { "epoch": 0.8181806107133844, "grad_norm": 0.3878227770328522, "learning_rate": 2.526163630377904e-06, "loss": 0.0436, "step": 46200 }, { "epoch": 0.8181983202504128, "grad_norm": 0.7053098082542419, "learning_rate": 2.525685811816545e-06, "loss": 0.0738, "step": 46201 }, { "epoch": 0.8182160297874412, "grad_norm": 0.5368790030479431, "learning_rate": 2.525208034294421e-06, "loss": 0.0777, "step": 46202 }, { "epoch": 0.8182337393244697, "grad_norm": 0.22009074687957764, "learning_rate": 2.524730297813098e-06, "loss": 0.0319, "step": 46203 }, { "epoch": 0.8182514488614981, "grad_norm": 0.27598997950553894, "learning_rate": 2.5242526023741513e-06, "loss": 0.0327, "step": 46204 }, { "epoch": 0.8182691583985265, "grad_norm": 0.7732182145118713, "learning_rate": 2.523774947979151e-06, "loss": 0.0653, "step": 46205 }, { "epoch": 0.8182868679355549, "grad_norm": 0.534285306930542, "learning_rate": 2.5232973346296728e-06, "loss": 0.0512, "step": 46206 }, { "epoch": 0.8183045774725834, "grad_norm": 0.6870436668395996, "learning_rate": 2.52281976232728e-06, "loss": 0.0387, "step": 46207 }, { "epoch": 0.8183222870096118, "grad_norm": 0.3486485183238983, "learning_rate": 2.5223422310735487e-06, "loss": 0.0452, "step": 46208 }, { "epoch": 0.8183399965466402, "grad_norm": 0.5670244693756104, "learning_rate": 2.5218647408700506e-06, "loss": 0.0455, "step": 46209 }, { "epoch": 0.8183577060836688, "grad_norm": 0.9545141458511353, "learning_rate": 2.5213872917183545e-06, "loss": 0.1037, "step": 46210 }, { "epoch": 0.8183754156206972, "grad_norm": 0.5634082555770874, "learning_rate": 2.520909883620032e-06, "loss": 0.0682, "step": 46211 }, { "epoch": 0.8183931251577256, "grad_norm": 0.4812517464160919, "learning_rate": 2.5204325165766527e-06, "loss": 0.0523, "step": 46212 }, { "epoch": 0.818410834694754, "grad_norm": 0.4467628300189972, "learning_rate": 2.519955190589793e-06, "loss": 0.0607, "step": 46213 }, { "epoch": 0.8184285442317825, "grad_norm": 0.5430713891983032, "learning_rate": 2.5194779056610156e-06, "loss": 0.0671, "step": 46214 }, { "epoch": 0.8184462537688109, "grad_norm": 0.47590944170951843, "learning_rate": 2.519000661791892e-06, "loss": 0.0549, "step": 46215 }, { "epoch": 0.8184639633058393, "grad_norm": 0.5830643773078918, "learning_rate": 2.5185234589839993e-06, "loss": 0.0412, "step": 46216 }, { "epoch": 0.8184816728428677, "grad_norm": 0.580321192741394, "learning_rate": 2.5180462972388967e-06, "loss": 0.063, "step": 46217 }, { "epoch": 0.8184993823798962, "grad_norm": 0.6934146285057068, "learning_rate": 2.5175691765581594e-06, "loss": 0.0686, "step": 46218 }, { "epoch": 0.8185170919169246, "grad_norm": 0.784408688545227, "learning_rate": 2.5170920969433543e-06, "loss": 0.0631, "step": 46219 }, { "epoch": 0.818534801453953, "grad_norm": 0.874193549156189, "learning_rate": 2.5166150583960603e-06, "loss": 0.0771, "step": 46220 }, { "epoch": 0.8185525109909814, "grad_norm": 0.5614408850669861, "learning_rate": 2.5161380609178376e-06, "loss": 0.0541, "step": 46221 }, { "epoch": 0.8185702205280099, "grad_norm": 0.8680955767631531, "learning_rate": 2.5156611045102567e-06, "loss": 0.0742, "step": 46222 }, { "epoch": 0.8185879300650383, "grad_norm": 0.6799818873405457, "learning_rate": 2.5151841891748918e-06, "loss": 0.0746, "step": 46223 }, { "epoch": 0.8186056396020667, "grad_norm": 0.37256962060928345, "learning_rate": 2.5147073149133043e-06, "loss": 0.0395, "step": 46224 }, { "epoch": 0.8186233491390952, "grad_norm": 0.3306562602519989, "learning_rate": 2.5142304817270684e-06, "loss": 0.0449, "step": 46225 }, { "epoch": 0.8186410586761236, "grad_norm": 0.28818732500076294, "learning_rate": 2.513753689617749e-06, "loss": 0.0747, "step": 46226 }, { "epoch": 0.818658768213152, "grad_norm": 0.7050449252128601, "learning_rate": 2.513276938586919e-06, "loss": 0.0592, "step": 46227 }, { "epoch": 0.8186764777501804, "grad_norm": 0.5762408971786499, "learning_rate": 2.5128002286361436e-06, "loss": 0.0459, "step": 46228 }, { "epoch": 0.8186941872872089, "grad_norm": 0.7959553003311157, "learning_rate": 2.512323559766994e-06, "loss": 0.0685, "step": 46229 }, { "epoch": 0.8187118968242373, "grad_norm": 0.7510347962379456, "learning_rate": 2.51184693198104e-06, "loss": 0.0555, "step": 46230 }, { "epoch": 0.8187296063612657, "grad_norm": 0.528548538684845, "learning_rate": 2.5113703452798416e-06, "loss": 0.05, "step": 46231 }, { "epoch": 0.8187473158982941, "grad_norm": 0.5491388440132141, "learning_rate": 2.510893799664972e-06, "loss": 0.0668, "step": 46232 }, { "epoch": 0.8187650254353226, "grad_norm": 0.5783403515815735, "learning_rate": 2.5104172951379993e-06, "loss": 0.0334, "step": 46233 }, { "epoch": 0.818782734972351, "grad_norm": 0.49938592314720154, "learning_rate": 2.5099408317004953e-06, "loss": 0.0707, "step": 46234 }, { "epoch": 0.8188004445093794, "grad_norm": 0.5523433685302734, "learning_rate": 2.5094644093540164e-06, "loss": 0.0461, "step": 46235 }, { "epoch": 0.8188181540464078, "grad_norm": 0.6944265365600586, "learning_rate": 2.508988028100137e-06, "loss": 0.0669, "step": 46236 }, { "epoch": 0.8188358635834363, "grad_norm": 0.8142948150634766, "learning_rate": 2.508511687940424e-06, "loss": 0.0541, "step": 46237 }, { "epoch": 0.8188535731204647, "grad_norm": 0.7894437313079834, "learning_rate": 2.5080353888764445e-06, "loss": 0.0601, "step": 46238 }, { "epoch": 0.8188712826574931, "grad_norm": 0.5184439420700073, "learning_rate": 2.5075591309097656e-06, "loss": 0.0688, "step": 46239 }, { "epoch": 0.8188889921945216, "grad_norm": 0.19826576113700867, "learning_rate": 2.507082914041953e-06, "loss": 0.0434, "step": 46240 }, { "epoch": 0.81890670173155, "grad_norm": 0.39531540870666504, "learning_rate": 2.5066067382745763e-06, "loss": 0.0529, "step": 46241 }, { "epoch": 0.8189244112685784, "grad_norm": 0.4440874755382538, "learning_rate": 2.506130603609198e-06, "loss": 0.0324, "step": 46242 }, { "epoch": 0.8189421208056068, "grad_norm": 0.38425761461257935, "learning_rate": 2.5056545100473855e-06, "loss": 0.0313, "step": 46243 }, { "epoch": 0.8189598303426353, "grad_norm": 0.591517984867096, "learning_rate": 2.5051784575907054e-06, "loss": 0.0534, "step": 46244 }, { "epoch": 0.8189775398796637, "grad_norm": 0.5720736384391785, "learning_rate": 2.5047024462407286e-06, "loss": 0.0441, "step": 46245 }, { "epoch": 0.8189952494166921, "grad_norm": 0.41510000824928284, "learning_rate": 2.5042264759990136e-06, "loss": 0.046, "step": 46246 }, { "epoch": 0.8190129589537205, "grad_norm": 0.16644054651260376, "learning_rate": 2.5037505468671305e-06, "loss": 0.0302, "step": 46247 }, { "epoch": 0.819030668490749, "grad_norm": 0.5543477535247803, "learning_rate": 2.503274658846642e-06, "loss": 0.0429, "step": 46248 }, { "epoch": 0.8190483780277774, "grad_norm": 0.6952812075614929, "learning_rate": 2.502798811939118e-06, "loss": 0.035, "step": 46249 }, { "epoch": 0.8190660875648058, "grad_norm": 0.3843969702720642, "learning_rate": 2.502323006146121e-06, "loss": 0.051, "step": 46250 }, { "epoch": 0.8190837971018342, "grad_norm": 0.6524191498756409, "learning_rate": 2.501847241469216e-06, "loss": 0.0637, "step": 46251 }, { "epoch": 0.8191015066388627, "grad_norm": 0.6072300672531128, "learning_rate": 2.5013715179099753e-06, "loss": 0.0412, "step": 46252 }, { "epoch": 0.8191192161758911, "grad_norm": 0.7097004652023315, "learning_rate": 2.500895835469953e-06, "loss": 0.0697, "step": 46253 }, { "epoch": 0.8191369257129195, "grad_norm": 0.7379075288772583, "learning_rate": 2.5004201941507206e-06, "loss": 0.0509, "step": 46254 }, { "epoch": 0.819154635249948, "grad_norm": 0.63033527135849, "learning_rate": 2.4999445939538437e-06, "loss": 0.0886, "step": 46255 }, { "epoch": 0.8191723447869764, "grad_norm": 0.25531458854675293, "learning_rate": 2.4994690348808775e-06, "loss": 0.0359, "step": 46256 }, { "epoch": 0.8191900543240048, "grad_norm": 0.5417921543121338, "learning_rate": 2.4989935169333976e-06, "loss": 0.0448, "step": 46257 }, { "epoch": 0.8192077638610332, "grad_norm": 0.4467291831970215, "learning_rate": 2.4985180401129646e-06, "loss": 0.0348, "step": 46258 }, { "epoch": 0.8192254733980617, "grad_norm": 0.6341840028762817, "learning_rate": 2.4980426044211473e-06, "loss": 0.0507, "step": 46259 }, { "epoch": 0.8192431829350901, "grad_norm": 0.6965456604957581, "learning_rate": 2.497567209859501e-06, "loss": 0.0597, "step": 46260 }, { "epoch": 0.8192608924721185, "grad_norm": 0.7840755581855774, "learning_rate": 2.4970918564295942e-06, "loss": 0.042, "step": 46261 }, { "epoch": 0.8192786020091469, "grad_norm": 0.6700052618980408, "learning_rate": 2.496616544132996e-06, "loss": 0.0451, "step": 46262 }, { "epoch": 0.8192963115461754, "grad_norm": 0.5108935236930847, "learning_rate": 2.496141272971258e-06, "loss": 0.0608, "step": 46263 }, { "epoch": 0.8193140210832038, "grad_norm": 0.5115790963172913, "learning_rate": 2.4956660429459528e-06, "loss": 0.0447, "step": 46264 }, { "epoch": 0.8193317306202322, "grad_norm": 0.726348340511322, "learning_rate": 2.4951908540586404e-06, "loss": 0.0518, "step": 46265 }, { "epoch": 0.8193494401572606, "grad_norm": 0.43283751606941223, "learning_rate": 2.494715706310885e-06, "loss": 0.0541, "step": 46266 }, { "epoch": 0.8193671496942891, "grad_norm": 0.4862424433231354, "learning_rate": 2.4942405997042518e-06, "loss": 0.0392, "step": 46267 }, { "epoch": 0.8193848592313175, "grad_norm": 0.4182875454425812, "learning_rate": 2.493765534240301e-06, "loss": 0.071, "step": 46268 }, { "epoch": 0.8194025687683459, "grad_norm": 0.7427394986152649, "learning_rate": 2.4932905099205966e-06, "loss": 0.0636, "step": 46269 }, { "epoch": 0.8194202783053744, "grad_norm": 1.0292813777923584, "learning_rate": 2.492815526746704e-06, "loss": 0.0633, "step": 46270 }, { "epoch": 0.8194379878424028, "grad_norm": 0.5471522808074951, "learning_rate": 2.492340584720182e-06, "loss": 0.0369, "step": 46271 }, { "epoch": 0.8194556973794312, "grad_norm": 0.5185821652412415, "learning_rate": 2.4918656838425923e-06, "loss": 0.049, "step": 46272 }, { "epoch": 0.8194734069164596, "grad_norm": 0.6800083518028259, "learning_rate": 2.491390824115504e-06, "loss": 0.0583, "step": 46273 }, { "epoch": 0.8194911164534882, "grad_norm": 0.9119364619255066, "learning_rate": 2.490916005540471e-06, "loss": 0.061, "step": 46274 }, { "epoch": 0.8195088259905166, "grad_norm": 0.3930658996105194, "learning_rate": 2.4904412281190585e-06, "loss": 0.0531, "step": 46275 }, { "epoch": 0.819526535527545, "grad_norm": 0.8771128058433533, "learning_rate": 2.4899664918528304e-06, "loss": 0.0803, "step": 46276 }, { "epoch": 0.8195442450645734, "grad_norm": 0.498089998960495, "learning_rate": 2.4894917967433467e-06, "loss": 0.0362, "step": 46277 }, { "epoch": 0.8195619546016019, "grad_norm": 0.6948918104171753, "learning_rate": 2.4890171427921697e-06, "loss": 0.0611, "step": 46278 }, { "epoch": 0.8195796641386303, "grad_norm": 0.6698063611984253, "learning_rate": 2.48854253000086e-06, "loss": 0.061, "step": 46279 }, { "epoch": 0.8195973736756587, "grad_norm": 0.3836788535118103, "learning_rate": 2.4880679583709847e-06, "loss": 0.0538, "step": 46280 }, { "epoch": 0.8196150832126872, "grad_norm": 0.4969446361064911, "learning_rate": 2.4875934279040955e-06, "loss": 0.0558, "step": 46281 }, { "epoch": 0.8196327927497156, "grad_norm": 0.7657475471496582, "learning_rate": 2.48711893860176e-06, "loss": 0.0615, "step": 46282 }, { "epoch": 0.819650502286744, "grad_norm": 0.70530766248703, "learning_rate": 2.486644490465537e-06, "loss": 0.05, "step": 46283 }, { "epoch": 0.8196682118237724, "grad_norm": 0.5861204862594604, "learning_rate": 2.4861700834969907e-06, "loss": 0.0466, "step": 46284 }, { "epoch": 0.8196859213608009, "grad_norm": 0.7374151349067688, "learning_rate": 2.485695717697673e-06, "loss": 0.0339, "step": 46285 }, { "epoch": 0.8197036308978293, "grad_norm": 0.5231979489326477, "learning_rate": 2.485221393069154e-06, "loss": 0.0836, "step": 46286 }, { "epoch": 0.8197213404348577, "grad_norm": 0.7718116044998169, "learning_rate": 2.4847471096129933e-06, "loss": 0.073, "step": 46287 }, { "epoch": 0.8197390499718861, "grad_norm": 0.47317081689834595, "learning_rate": 2.484272867330747e-06, "loss": 0.0521, "step": 46288 }, { "epoch": 0.8197567595089146, "grad_norm": 0.4466642439365387, "learning_rate": 2.4837986662239753e-06, "loss": 0.0775, "step": 46289 }, { "epoch": 0.819774469045943, "grad_norm": 0.6253542900085449, "learning_rate": 2.4833245062942404e-06, "loss": 0.0694, "step": 46290 }, { "epoch": 0.8197921785829714, "grad_norm": 0.4508509635925293, "learning_rate": 2.482850387543106e-06, "loss": 0.0401, "step": 46291 }, { "epoch": 0.8198098881199998, "grad_norm": 0.9104471206665039, "learning_rate": 2.4823763099721245e-06, "loss": 0.0623, "step": 46292 }, { "epoch": 0.8198275976570283, "grad_norm": 0.68455570936203, "learning_rate": 2.4819022735828578e-06, "loss": 0.0427, "step": 46293 }, { "epoch": 0.8198453071940567, "grad_norm": 0.6817499995231628, "learning_rate": 2.4814282783768678e-06, "loss": 0.0791, "step": 46294 }, { "epoch": 0.8198630167310851, "grad_norm": 0.7366629838943481, "learning_rate": 2.4809543243557114e-06, "loss": 0.0468, "step": 46295 }, { "epoch": 0.8198807262681136, "grad_norm": 0.4496321678161621, "learning_rate": 2.4804804115209494e-06, "loss": 0.0566, "step": 46296 }, { "epoch": 0.819898435805142, "grad_norm": 0.7395679354667664, "learning_rate": 2.4800065398741423e-06, "loss": 0.0507, "step": 46297 }, { "epoch": 0.8199161453421704, "grad_norm": 0.7142288684844971, "learning_rate": 2.4795327094168483e-06, "loss": 0.05, "step": 46298 }, { "epoch": 0.8199338548791988, "grad_norm": 0.6996933221817017, "learning_rate": 2.479058920150624e-06, "loss": 0.0698, "step": 46299 }, { "epoch": 0.8199515644162273, "grad_norm": 0.6904250383377075, "learning_rate": 2.478585172077028e-06, "loss": 0.0672, "step": 46300 }, { "epoch": 0.8199692739532557, "grad_norm": 0.6930469274520874, "learning_rate": 2.4781114651976204e-06, "loss": 0.062, "step": 46301 }, { "epoch": 0.8199869834902841, "grad_norm": 0.3699100911617279, "learning_rate": 2.4776377995139633e-06, "loss": 0.0524, "step": 46302 }, { "epoch": 0.8200046930273125, "grad_norm": 0.44479912519454956, "learning_rate": 2.477164175027608e-06, "loss": 0.059, "step": 46303 }, { "epoch": 0.820022402564341, "grad_norm": 0.8131295442581177, "learning_rate": 2.4766905917401156e-06, "loss": 0.0526, "step": 46304 }, { "epoch": 0.8200401121013694, "grad_norm": 0.5853480100631714, "learning_rate": 2.4762170496530446e-06, "loss": 0.0598, "step": 46305 }, { "epoch": 0.8200578216383978, "grad_norm": 0.678743839263916, "learning_rate": 2.475743548767952e-06, "loss": 0.0535, "step": 46306 }, { "epoch": 0.8200755311754262, "grad_norm": 0.5887840390205383, "learning_rate": 2.4752700890863983e-06, "loss": 0.0459, "step": 46307 }, { "epoch": 0.8200932407124547, "grad_norm": 0.8599708676338196, "learning_rate": 2.4747966706099374e-06, "loss": 0.0608, "step": 46308 }, { "epoch": 0.8201109502494831, "grad_norm": 0.5919291377067566, "learning_rate": 2.4743232933401327e-06, "loss": 0.0798, "step": 46309 }, { "epoch": 0.8201286597865115, "grad_norm": 0.5985482335090637, "learning_rate": 2.473849957278535e-06, "loss": 0.0533, "step": 46310 }, { "epoch": 0.82014636932354, "grad_norm": 0.6128856539726257, "learning_rate": 2.4733766624267027e-06, "loss": 0.0393, "step": 46311 }, { "epoch": 0.8201640788605684, "grad_norm": 0.5925926566123962, "learning_rate": 2.4729034087861983e-06, "loss": 0.061, "step": 46312 }, { "epoch": 0.8201817883975968, "grad_norm": 0.53673255443573, "learning_rate": 2.4724301963585703e-06, "loss": 0.0676, "step": 46313 }, { "epoch": 0.8201994979346252, "grad_norm": 0.4025571048259735, "learning_rate": 2.4719570251453776e-06, "loss": 0.0562, "step": 46314 }, { "epoch": 0.8202172074716537, "grad_norm": 0.7175448536872864, "learning_rate": 2.471483895148182e-06, "loss": 0.0468, "step": 46315 }, { "epoch": 0.8202349170086821, "grad_norm": 0.6338608860969543, "learning_rate": 2.4710108063685412e-06, "loss": 0.0578, "step": 46316 }, { "epoch": 0.8202526265457105, "grad_norm": 0.46347910165786743, "learning_rate": 2.4705377588080032e-06, "loss": 0.0406, "step": 46317 }, { "epoch": 0.8202703360827389, "grad_norm": 0.6519311666488647, "learning_rate": 2.4700647524681288e-06, "loss": 0.0616, "step": 46318 }, { "epoch": 0.8202880456197674, "grad_norm": 0.6342260837554932, "learning_rate": 2.469591787350478e-06, "loss": 0.0311, "step": 46319 }, { "epoch": 0.8203057551567958, "grad_norm": 0.5253958106040955, "learning_rate": 2.4691188634565987e-06, "loss": 0.0492, "step": 46320 }, { "epoch": 0.8203234646938242, "grad_norm": 0.3816159963607788, "learning_rate": 2.4686459807880522e-06, "loss": 0.044, "step": 46321 }, { "epoch": 0.8203411742308526, "grad_norm": 0.3764483332633972, "learning_rate": 2.468173139346391e-06, "loss": 0.0404, "step": 46322 }, { "epoch": 0.8203588837678811, "grad_norm": 0.8823021650314331, "learning_rate": 2.467700339133174e-06, "loss": 0.0615, "step": 46323 }, { "epoch": 0.8203765933049095, "grad_norm": 0.5236860513687134, "learning_rate": 2.467227580149955e-06, "loss": 0.0278, "step": 46324 }, { "epoch": 0.8203943028419379, "grad_norm": 0.69953453540802, "learning_rate": 2.4667548623982903e-06, "loss": 0.0557, "step": 46325 }, { "epoch": 0.8204120123789664, "grad_norm": 0.3596857488155365, "learning_rate": 2.466282185879738e-06, "loss": 0.0661, "step": 46326 }, { "epoch": 0.8204297219159948, "grad_norm": 0.7751747965812683, "learning_rate": 2.465809550595846e-06, "loss": 0.0831, "step": 46327 }, { "epoch": 0.8204474314530232, "grad_norm": 0.6917641758918762, "learning_rate": 2.465336956548172e-06, "loss": 0.0425, "step": 46328 }, { "epoch": 0.8204651409900516, "grad_norm": 0.4437548518180847, "learning_rate": 2.464864403738271e-06, "loss": 0.0499, "step": 46329 }, { "epoch": 0.8204828505270801, "grad_norm": 0.4978484511375427, "learning_rate": 2.464391892167704e-06, "loss": 0.0518, "step": 46330 }, { "epoch": 0.8205005600641085, "grad_norm": 1.029701590538025, "learning_rate": 2.463919421838014e-06, "loss": 0.0686, "step": 46331 }, { "epoch": 0.8205182696011369, "grad_norm": 0.645999014377594, "learning_rate": 2.463446992750762e-06, "loss": 0.0504, "step": 46332 }, { "epoch": 0.8205359791381653, "grad_norm": 0.820381760597229, "learning_rate": 2.4629746049075015e-06, "loss": 0.0395, "step": 46333 }, { "epoch": 0.8205536886751938, "grad_norm": 0.38502445816993713, "learning_rate": 2.462502258309788e-06, "loss": 0.063, "step": 46334 }, { "epoch": 0.8205713982122222, "grad_norm": 0.3384809195995331, "learning_rate": 2.4620299529591718e-06, "loss": 0.0472, "step": 46335 }, { "epoch": 0.8205891077492506, "grad_norm": 0.4444694519042969, "learning_rate": 2.46155768885721e-06, "loss": 0.0728, "step": 46336 }, { "epoch": 0.820606817286279, "grad_norm": 0.33126649260520935, "learning_rate": 2.4610854660054583e-06, "loss": 0.043, "step": 46337 }, { "epoch": 0.8206245268233076, "grad_norm": 0.7285369038581848, "learning_rate": 2.4606132844054634e-06, "loss": 0.0513, "step": 46338 }, { "epoch": 0.820642236360336, "grad_norm": 0.43479448556900024, "learning_rate": 2.460141144058783e-06, "loss": 0.0548, "step": 46339 }, { "epoch": 0.8206599458973644, "grad_norm": 0.5583215355873108, "learning_rate": 2.45966904496697e-06, "loss": 0.0551, "step": 46340 }, { "epoch": 0.8206776554343929, "grad_norm": 0.8122903108596802, "learning_rate": 2.4591969871315805e-06, "loss": 0.042, "step": 46341 }, { "epoch": 0.8206953649714213, "grad_norm": 0.9166879057884216, "learning_rate": 2.4587249705541616e-06, "loss": 0.0769, "step": 46342 }, { "epoch": 0.8207130745084497, "grad_norm": 0.5949276685714722, "learning_rate": 2.4582529952362647e-06, "loss": 0.0389, "step": 46343 }, { "epoch": 0.8207307840454781, "grad_norm": 0.5615916848182678, "learning_rate": 2.4577810611794543e-06, "loss": 0.0382, "step": 46344 }, { "epoch": 0.8207484935825066, "grad_norm": 0.5492968559265137, "learning_rate": 2.457309168385272e-06, "loss": 0.0444, "step": 46345 }, { "epoch": 0.820766203119535, "grad_norm": 0.4080226719379425, "learning_rate": 2.456837316855274e-06, "loss": 0.0695, "step": 46346 }, { "epoch": 0.8207839126565634, "grad_norm": 0.8481401205062866, "learning_rate": 2.456365506591013e-06, "loss": 0.0693, "step": 46347 }, { "epoch": 0.8208016221935918, "grad_norm": 0.47341108322143555, "learning_rate": 2.455893737594045e-06, "loss": 0.0332, "step": 46348 }, { "epoch": 0.8208193317306203, "grad_norm": 0.5838336944580078, "learning_rate": 2.4554220098659137e-06, "loss": 0.0637, "step": 46349 }, { "epoch": 0.8208370412676487, "grad_norm": 0.35153570771217346, "learning_rate": 2.4549503234081743e-06, "loss": 0.0418, "step": 46350 }, { "epoch": 0.8208547508046771, "grad_norm": 0.5909801125526428, "learning_rate": 2.4544786782223795e-06, "loss": 0.0751, "step": 46351 }, { "epoch": 0.8208724603417055, "grad_norm": 0.7722433805465698, "learning_rate": 2.4540070743100824e-06, "loss": 0.0493, "step": 46352 }, { "epoch": 0.820890169878734, "grad_norm": 0.354637086391449, "learning_rate": 2.4535355116728318e-06, "loss": 0.0282, "step": 46353 }, { "epoch": 0.8209078794157624, "grad_norm": 0.5847446322441101, "learning_rate": 2.4530639903121802e-06, "loss": 0.0517, "step": 46354 }, { "epoch": 0.8209255889527908, "grad_norm": 0.711076557636261, "learning_rate": 2.4525925102296833e-06, "loss": 0.0455, "step": 46355 }, { "epoch": 0.8209432984898193, "grad_norm": 0.4433608651161194, "learning_rate": 2.452121071426884e-06, "loss": 0.0449, "step": 46356 }, { "epoch": 0.8209610080268477, "grad_norm": 0.8378564715385437, "learning_rate": 2.451649673905338e-06, "loss": 0.0441, "step": 46357 }, { "epoch": 0.8209787175638761, "grad_norm": 0.5315576195716858, "learning_rate": 2.4511783176665975e-06, "loss": 0.0417, "step": 46358 }, { "epoch": 0.8209964271009045, "grad_norm": 0.6030150651931763, "learning_rate": 2.450707002712208e-06, "loss": 0.0586, "step": 46359 }, { "epoch": 0.821014136637933, "grad_norm": 0.6529096364974976, "learning_rate": 2.450235729043725e-06, "loss": 0.0934, "step": 46360 }, { "epoch": 0.8210318461749614, "grad_norm": 0.20629830658435822, "learning_rate": 2.4497644966626955e-06, "loss": 0.0372, "step": 46361 }, { "epoch": 0.8210495557119898, "grad_norm": 0.46702638268470764, "learning_rate": 2.4492933055706713e-06, "loss": 0.0527, "step": 46362 }, { "epoch": 0.8210672652490182, "grad_norm": 0.7633240818977356, "learning_rate": 2.448822155769203e-06, "loss": 0.0439, "step": 46363 }, { "epoch": 0.8210849747860467, "grad_norm": 0.6501232981681824, "learning_rate": 2.4483510472598398e-06, "loss": 0.0638, "step": 46364 }, { "epoch": 0.8211026843230751, "grad_norm": 0.5694591403007507, "learning_rate": 2.4478799800441365e-06, "loss": 0.0532, "step": 46365 }, { "epoch": 0.8211203938601035, "grad_norm": 0.7542759776115417, "learning_rate": 2.447408954123634e-06, "loss": 0.0625, "step": 46366 }, { "epoch": 0.8211381033971319, "grad_norm": 0.3055345416069031, "learning_rate": 2.4469379694998873e-06, "loss": 0.0335, "step": 46367 }, { "epoch": 0.8211558129341604, "grad_norm": 0.5714464783668518, "learning_rate": 2.4464670261744454e-06, "loss": 0.049, "step": 46368 }, { "epoch": 0.8211735224711888, "grad_norm": 0.355270653963089, "learning_rate": 2.4459961241488604e-06, "loss": 0.0576, "step": 46369 }, { "epoch": 0.8211912320082172, "grad_norm": 0.7037729024887085, "learning_rate": 2.4455252634246743e-06, "loss": 0.0774, "step": 46370 }, { "epoch": 0.8212089415452457, "grad_norm": 0.7879378199577332, "learning_rate": 2.445054444003441e-06, "loss": 0.0484, "step": 46371 }, { "epoch": 0.8212266510822741, "grad_norm": 0.7337784767150879, "learning_rate": 2.4445836658867044e-06, "loss": 0.0685, "step": 46372 }, { "epoch": 0.8212443606193025, "grad_norm": 0.4576789140701294, "learning_rate": 2.4441129290760244e-06, "loss": 0.0452, "step": 46373 }, { "epoch": 0.8212620701563309, "grad_norm": 0.5677154660224915, "learning_rate": 2.4436422335729403e-06, "loss": 0.0723, "step": 46374 }, { "epoch": 0.8212797796933594, "grad_norm": 0.41037312150001526, "learning_rate": 2.4431715793790037e-06, "loss": 0.0467, "step": 46375 }, { "epoch": 0.8212974892303878, "grad_norm": 0.26126882433891296, "learning_rate": 2.4427009664957657e-06, "loss": 0.0487, "step": 46376 }, { "epoch": 0.8213151987674162, "grad_norm": 0.4470103681087494, "learning_rate": 2.4422303949247677e-06, "loss": 0.0462, "step": 46377 }, { "epoch": 0.8213329083044446, "grad_norm": 0.7072187662124634, "learning_rate": 2.4417598646675604e-06, "loss": 0.0669, "step": 46378 }, { "epoch": 0.8213506178414731, "grad_norm": 0.2798110246658325, "learning_rate": 2.441289375725694e-06, "loss": 0.0255, "step": 46379 }, { "epoch": 0.8213683273785015, "grad_norm": 0.5400575399398804, "learning_rate": 2.4408189281007165e-06, "loss": 0.0437, "step": 46380 }, { "epoch": 0.8213860369155299, "grad_norm": 0.5459580421447754, "learning_rate": 2.4403485217941725e-06, "loss": 0.0638, "step": 46381 }, { "epoch": 0.8214037464525583, "grad_norm": 0.6169227361679077, "learning_rate": 2.439878156807612e-06, "loss": 0.0543, "step": 46382 }, { "epoch": 0.8214214559895868, "grad_norm": 0.6806714534759521, "learning_rate": 2.439407833142587e-06, "loss": 0.0498, "step": 46383 }, { "epoch": 0.8214391655266152, "grad_norm": 0.9701478481292725, "learning_rate": 2.4389375508006347e-06, "loss": 0.0543, "step": 46384 }, { "epoch": 0.8214568750636436, "grad_norm": 0.45827242732048035, "learning_rate": 2.4384673097833076e-06, "loss": 0.0425, "step": 46385 }, { "epoch": 0.8214745846006721, "grad_norm": 0.2572387754917145, "learning_rate": 2.4379971100921535e-06, "loss": 0.0369, "step": 46386 }, { "epoch": 0.8214922941377005, "grad_norm": 0.5190425515174866, "learning_rate": 2.4375269517287202e-06, "loss": 0.0453, "step": 46387 }, { "epoch": 0.8215100036747289, "grad_norm": 0.8523370027542114, "learning_rate": 2.4370568346945503e-06, "loss": 0.0487, "step": 46388 }, { "epoch": 0.8215277132117573, "grad_norm": 0.3294689953327179, "learning_rate": 2.4365867589911926e-06, "loss": 0.0458, "step": 46389 }, { "epoch": 0.8215454227487858, "grad_norm": 0.5436961054801941, "learning_rate": 2.436116724620193e-06, "loss": 0.0482, "step": 46390 }, { "epoch": 0.8215631322858142, "grad_norm": 0.5285412073135376, "learning_rate": 2.435646731583099e-06, "loss": 0.0591, "step": 46391 }, { "epoch": 0.8215808418228426, "grad_norm": 0.7336783409118652, "learning_rate": 2.4351767798814565e-06, "loss": 0.0383, "step": 46392 }, { "epoch": 0.821598551359871, "grad_norm": 0.5205708742141724, "learning_rate": 2.4347068695168116e-06, "loss": 0.0471, "step": 46393 }, { "epoch": 0.8216162608968995, "grad_norm": 0.5481996536254883, "learning_rate": 2.4342370004907144e-06, "loss": 0.0487, "step": 46394 }, { "epoch": 0.8216339704339279, "grad_norm": 0.6092609167098999, "learning_rate": 2.4337671728047036e-06, "loss": 0.0505, "step": 46395 }, { "epoch": 0.8216516799709563, "grad_norm": 0.7134608030319214, "learning_rate": 2.4332973864603257e-06, "loss": 0.0821, "step": 46396 }, { "epoch": 0.8216693895079847, "grad_norm": 0.5377562642097473, "learning_rate": 2.432827641459132e-06, "loss": 0.0506, "step": 46397 }, { "epoch": 0.8216870990450132, "grad_norm": 0.48256704211235046, "learning_rate": 2.432357937802663e-06, "loss": 0.0422, "step": 46398 }, { "epoch": 0.8217048085820416, "grad_norm": 0.9766667485237122, "learning_rate": 2.431888275492463e-06, "loss": 0.0787, "step": 46399 }, { "epoch": 0.82172251811907, "grad_norm": 0.31673601269721985, "learning_rate": 2.4314186545300802e-06, "loss": 0.0218, "step": 46400 }, { "epoch": 0.8217402276560986, "grad_norm": 0.5040230751037598, "learning_rate": 2.4309490749170582e-06, "loss": 0.0378, "step": 46401 }, { "epoch": 0.821757937193127, "grad_norm": 1.1501954793930054, "learning_rate": 2.430479536654944e-06, "loss": 0.0736, "step": 46402 }, { "epoch": 0.8217756467301554, "grad_norm": 0.45237889885902405, "learning_rate": 2.430010039745279e-06, "loss": 0.0511, "step": 46403 }, { "epoch": 0.8217933562671837, "grad_norm": 1.7354061603546143, "learning_rate": 2.4295405841896147e-06, "loss": 0.069, "step": 46404 }, { "epoch": 0.8218110658042123, "grad_norm": 0.6130449175834656, "learning_rate": 2.4290711699894858e-06, "loss": 0.0577, "step": 46405 }, { "epoch": 0.8218287753412407, "grad_norm": 0.5125086903572083, "learning_rate": 2.4286017971464414e-06, "loss": 0.0436, "step": 46406 }, { "epoch": 0.8218464848782691, "grad_norm": 0.6221961975097656, "learning_rate": 2.4281324656620253e-06, "loss": 0.0712, "step": 46407 }, { "epoch": 0.8218641944152975, "grad_norm": 0.42110323905944824, "learning_rate": 2.427663175537786e-06, "loss": 0.0273, "step": 46408 }, { "epoch": 0.821881903952326, "grad_norm": 0.42305970191955566, "learning_rate": 2.427193926775256e-06, "loss": 0.0543, "step": 46409 }, { "epoch": 0.8218996134893544, "grad_norm": 0.4482024013996124, "learning_rate": 2.42672471937599e-06, "loss": 0.0402, "step": 46410 }, { "epoch": 0.8219173230263828, "grad_norm": 0.391561359167099, "learning_rate": 2.4262555533415275e-06, "loss": 0.0487, "step": 46411 }, { "epoch": 0.8219350325634112, "grad_norm": 0.4332423806190491, "learning_rate": 2.425786428673417e-06, "loss": 0.0394, "step": 46412 }, { "epoch": 0.8219527421004397, "grad_norm": 0.7699779868125916, "learning_rate": 2.4253173453731926e-06, "loss": 0.0613, "step": 46413 }, { "epoch": 0.8219704516374681, "grad_norm": 0.48959577083587646, "learning_rate": 2.4248483034424024e-06, "loss": 0.0629, "step": 46414 }, { "epoch": 0.8219881611744965, "grad_norm": 0.7421094179153442, "learning_rate": 2.424379302882594e-06, "loss": 0.0564, "step": 46415 }, { "epoch": 0.822005870711525, "grad_norm": 0.46209049224853516, "learning_rate": 2.4239103436953007e-06, "loss": 0.038, "step": 46416 }, { "epoch": 0.8220235802485534, "grad_norm": 0.8174872994422913, "learning_rate": 2.423441425882072e-06, "loss": 0.0512, "step": 46417 }, { "epoch": 0.8220412897855818, "grad_norm": 0.34431952238082886, "learning_rate": 2.4229725494444477e-06, "loss": 0.0602, "step": 46418 }, { "epoch": 0.8220589993226102, "grad_norm": 0.5941897034645081, "learning_rate": 2.4225037143839734e-06, "loss": 0.0584, "step": 46419 }, { "epoch": 0.8220767088596387, "grad_norm": 0.8073415160179138, "learning_rate": 2.422034920702188e-06, "loss": 0.052, "step": 46420 }, { "epoch": 0.8220944183966671, "grad_norm": 0.6313717365264893, "learning_rate": 2.421566168400635e-06, "loss": 0.044, "step": 46421 }, { "epoch": 0.8221121279336955, "grad_norm": 0.6568495631217957, "learning_rate": 2.421097457480862e-06, "loss": 0.0694, "step": 46422 }, { "epoch": 0.8221298374707239, "grad_norm": 0.39878717064857483, "learning_rate": 2.4206287879444024e-06, "loss": 0.0401, "step": 46423 }, { "epoch": 0.8221475470077524, "grad_norm": 0.6628056168556213, "learning_rate": 2.4201601597928014e-06, "loss": 0.0636, "step": 46424 }, { "epoch": 0.8221652565447808, "grad_norm": 0.7853272557258606, "learning_rate": 2.4196915730276015e-06, "loss": 0.0529, "step": 46425 }, { "epoch": 0.8221829660818092, "grad_norm": 0.357159823179245, "learning_rate": 2.4192230276503462e-06, "loss": 0.0349, "step": 46426 }, { "epoch": 0.8222006756188376, "grad_norm": 0.7215555310249329, "learning_rate": 2.418754523662573e-06, "loss": 0.0668, "step": 46427 }, { "epoch": 0.8222183851558661, "grad_norm": 0.8641417622566223, "learning_rate": 2.4182860610658237e-06, "loss": 0.0455, "step": 46428 }, { "epoch": 0.8222360946928945, "grad_norm": 0.440579354763031, "learning_rate": 2.41781763986164e-06, "loss": 0.0156, "step": 46429 }, { "epoch": 0.8222538042299229, "grad_norm": 0.9914929866790771, "learning_rate": 2.417349260051565e-06, "loss": 0.0825, "step": 46430 }, { "epoch": 0.8222715137669514, "grad_norm": 0.6797321438789368, "learning_rate": 2.4168809216371384e-06, "loss": 0.0496, "step": 46431 }, { "epoch": 0.8222892233039798, "grad_norm": 0.4120267331600189, "learning_rate": 2.416412624619901e-06, "loss": 0.0792, "step": 46432 }, { "epoch": 0.8223069328410082, "grad_norm": 0.38117650151252747, "learning_rate": 2.415944369001396e-06, "loss": 0.0392, "step": 46433 }, { "epoch": 0.8223246423780366, "grad_norm": 0.9461358189582825, "learning_rate": 2.415476154783158e-06, "loss": 0.0787, "step": 46434 }, { "epoch": 0.8223423519150651, "grad_norm": 0.21336232125759125, "learning_rate": 2.41500798196673e-06, "loss": 0.0407, "step": 46435 }, { "epoch": 0.8223600614520935, "grad_norm": 0.5509926676750183, "learning_rate": 2.4145398505536574e-06, "loss": 0.0486, "step": 46436 }, { "epoch": 0.8223777709891219, "grad_norm": 0.8084811568260193, "learning_rate": 2.4140717605454714e-06, "loss": 0.0424, "step": 46437 }, { "epoch": 0.8223954805261503, "grad_norm": 0.6453938484191895, "learning_rate": 2.413603711943712e-06, "loss": 0.0419, "step": 46438 }, { "epoch": 0.8224131900631788, "grad_norm": 0.34720292687416077, "learning_rate": 2.413135704749928e-06, "loss": 0.0548, "step": 46439 }, { "epoch": 0.8224308996002072, "grad_norm": 0.548617959022522, "learning_rate": 2.412667738965658e-06, "loss": 0.0424, "step": 46440 }, { "epoch": 0.8224486091372356, "grad_norm": 0.6609875559806824, "learning_rate": 2.4121998145924347e-06, "loss": 0.0487, "step": 46441 }, { "epoch": 0.822466318674264, "grad_norm": 0.7084880471229553, "learning_rate": 2.411731931631801e-06, "loss": 0.0631, "step": 46442 }, { "epoch": 0.8224840282112925, "grad_norm": 0.734613835811615, "learning_rate": 2.4112640900852963e-06, "loss": 0.037, "step": 46443 }, { "epoch": 0.8225017377483209, "grad_norm": 0.3499712646007538, "learning_rate": 2.4107962899544624e-06, "loss": 0.0731, "step": 46444 }, { "epoch": 0.8225194472853493, "grad_norm": 0.9312102198600769, "learning_rate": 2.4103285312408312e-06, "loss": 0.1033, "step": 46445 }, { "epoch": 0.8225371568223778, "grad_norm": 0.3841410279273987, "learning_rate": 2.4098608139459457e-06, "loss": 0.0262, "step": 46446 }, { "epoch": 0.8225548663594062, "grad_norm": 0.6471533179283142, "learning_rate": 2.4093931380713452e-06, "loss": 0.0657, "step": 46447 }, { "epoch": 0.8225725758964346, "grad_norm": 0.6791619658470154, "learning_rate": 2.4089255036185676e-06, "loss": 0.0667, "step": 46448 }, { "epoch": 0.822590285433463, "grad_norm": 0.4171818196773529, "learning_rate": 2.408457910589153e-06, "loss": 0.0514, "step": 46449 }, { "epoch": 0.8226079949704915, "grad_norm": 0.23699799180030823, "learning_rate": 2.407990358984637e-06, "loss": 0.0425, "step": 46450 }, { "epoch": 0.8226257045075199, "grad_norm": 0.7274144291877747, "learning_rate": 2.4075228488065614e-06, "loss": 0.0562, "step": 46451 }, { "epoch": 0.8226434140445483, "grad_norm": 0.3702183663845062, "learning_rate": 2.4070553800564604e-06, "loss": 0.0605, "step": 46452 }, { "epoch": 0.8226611235815767, "grad_norm": 0.7460389733314514, "learning_rate": 2.4065879527358725e-06, "loss": 0.0428, "step": 46453 }, { "epoch": 0.8226788331186052, "grad_norm": 0.8151783347129822, "learning_rate": 2.4061205668463413e-06, "loss": 0.0622, "step": 46454 }, { "epoch": 0.8226965426556336, "grad_norm": 0.8090398907661438, "learning_rate": 2.4056532223893946e-06, "loss": 0.0609, "step": 46455 }, { "epoch": 0.822714252192662, "grad_norm": 0.5609081387519836, "learning_rate": 2.405185919366577e-06, "loss": 0.071, "step": 46456 }, { "epoch": 0.8227319617296904, "grad_norm": 0.7022916078567505, "learning_rate": 2.404718657779421e-06, "loss": 0.0568, "step": 46457 }, { "epoch": 0.8227496712667189, "grad_norm": 0.6045122742652893, "learning_rate": 2.404251437629469e-06, "loss": 0.0676, "step": 46458 }, { "epoch": 0.8227673808037473, "grad_norm": 0.44996973872184753, "learning_rate": 2.4037842589182557e-06, "loss": 0.0475, "step": 46459 }, { "epoch": 0.8227850903407757, "grad_norm": 0.7761231660842896, "learning_rate": 2.403317121647317e-06, "loss": 0.0578, "step": 46460 }, { "epoch": 0.8228027998778042, "grad_norm": 0.7912263870239258, "learning_rate": 2.402850025818195e-06, "loss": 0.0814, "step": 46461 }, { "epoch": 0.8228205094148326, "grad_norm": 0.4370388984680176, "learning_rate": 2.4023829714324186e-06, "loss": 0.0411, "step": 46462 }, { "epoch": 0.822838218951861, "grad_norm": 0.7136856317520142, "learning_rate": 2.401915958491528e-06, "loss": 0.0583, "step": 46463 }, { "epoch": 0.8228559284888894, "grad_norm": 0.9949575662612915, "learning_rate": 2.4014489869970586e-06, "loss": 0.0609, "step": 46464 }, { "epoch": 0.822873638025918, "grad_norm": 0.4084045886993408, "learning_rate": 2.4009820569505523e-06, "loss": 0.0838, "step": 46465 }, { "epoch": 0.8228913475629464, "grad_norm": 0.5710830688476562, "learning_rate": 2.4005151683535385e-06, "loss": 0.0585, "step": 46466 }, { "epoch": 0.8229090570999748, "grad_norm": 0.5280239582061768, "learning_rate": 2.4000483212075508e-06, "loss": 0.059, "step": 46467 }, { "epoch": 0.8229267666370031, "grad_norm": 0.5540971159934998, "learning_rate": 2.399581515514136e-06, "loss": 0.0648, "step": 46468 }, { "epoch": 0.8229444761740317, "grad_norm": 0.5475872755050659, "learning_rate": 2.3991147512748214e-06, "loss": 0.0468, "step": 46469 }, { "epoch": 0.8229621857110601, "grad_norm": 0.4595663547515869, "learning_rate": 2.398648028491144e-06, "loss": 0.042, "step": 46470 }, { "epoch": 0.8229798952480885, "grad_norm": 0.8051377534866333, "learning_rate": 2.398181347164639e-06, "loss": 0.0454, "step": 46471 }, { "epoch": 0.8229976047851169, "grad_norm": 0.7538565397262573, "learning_rate": 2.3977147072968474e-06, "loss": 0.0893, "step": 46472 }, { "epoch": 0.8230153143221454, "grad_norm": 0.6058139801025391, "learning_rate": 2.397248108889298e-06, "loss": 0.055, "step": 46473 }, { "epoch": 0.8230330238591738, "grad_norm": 0.4825553894042969, "learning_rate": 2.3967815519435254e-06, "loss": 0.047, "step": 46474 }, { "epoch": 0.8230507333962022, "grad_norm": 1.096971869468689, "learning_rate": 2.3963150364610677e-06, "loss": 0.1056, "step": 46475 }, { "epoch": 0.8230684429332307, "grad_norm": 0.641514003276825, "learning_rate": 2.395848562443458e-06, "loss": 0.0443, "step": 46476 }, { "epoch": 0.8230861524702591, "grad_norm": 0.7520081996917725, "learning_rate": 2.3953821298922317e-06, "loss": 0.048, "step": 46477 }, { "epoch": 0.8231038620072875, "grad_norm": 0.6010354161262512, "learning_rate": 2.394915738808923e-06, "loss": 0.0512, "step": 46478 }, { "epoch": 0.8231215715443159, "grad_norm": 0.6725005507469177, "learning_rate": 2.3944493891950706e-06, "loss": 0.044, "step": 46479 }, { "epoch": 0.8231392810813444, "grad_norm": 0.2634534239768982, "learning_rate": 2.3939830810522016e-06, "loss": 0.0427, "step": 46480 }, { "epoch": 0.8231569906183728, "grad_norm": 0.5221413373947144, "learning_rate": 2.3935168143818533e-06, "loss": 0.0363, "step": 46481 }, { "epoch": 0.8231747001554012, "grad_norm": 0.9660845398902893, "learning_rate": 2.3930505891855585e-06, "loss": 0.0608, "step": 46482 }, { "epoch": 0.8231924096924296, "grad_norm": 0.33711034059524536, "learning_rate": 2.392584405464857e-06, "loss": 0.0343, "step": 46483 }, { "epoch": 0.8232101192294581, "grad_norm": 0.27739769220352173, "learning_rate": 2.392118263221274e-06, "loss": 0.0294, "step": 46484 }, { "epoch": 0.8232278287664865, "grad_norm": 0.32300102710723877, "learning_rate": 2.3916521624563464e-06, "loss": 0.0518, "step": 46485 }, { "epoch": 0.8232455383035149, "grad_norm": 0.6376953721046448, "learning_rate": 2.3911861031716065e-06, "loss": 0.0632, "step": 46486 }, { "epoch": 0.8232632478405433, "grad_norm": 0.3775177001953125, "learning_rate": 2.390720085368591e-06, "loss": 0.0482, "step": 46487 }, { "epoch": 0.8232809573775718, "grad_norm": 0.8567774891853333, "learning_rate": 2.3902541090488294e-06, "loss": 0.0696, "step": 46488 }, { "epoch": 0.8232986669146002, "grad_norm": 0.5499079823493958, "learning_rate": 2.3897881742138585e-06, "loss": 0.0475, "step": 46489 }, { "epoch": 0.8233163764516286, "grad_norm": 0.5966437458992004, "learning_rate": 2.3893222808652116e-06, "loss": 0.0283, "step": 46490 }, { "epoch": 0.8233340859886571, "grad_norm": 0.5587473511695862, "learning_rate": 2.3888564290044147e-06, "loss": 0.0502, "step": 46491 }, { "epoch": 0.8233517955256855, "grad_norm": 0.8568465113639832, "learning_rate": 2.3883906186330046e-06, "loss": 0.0783, "step": 46492 }, { "epoch": 0.8233695050627139, "grad_norm": 0.5241625905036926, "learning_rate": 2.3879248497525188e-06, "loss": 0.0529, "step": 46493 }, { "epoch": 0.8233872145997423, "grad_norm": 0.4967085123062134, "learning_rate": 2.3874591223644806e-06, "loss": 0.0455, "step": 46494 }, { "epoch": 0.8234049241367708, "grad_norm": 0.6683434844017029, "learning_rate": 2.3869934364704252e-06, "loss": 0.0348, "step": 46495 }, { "epoch": 0.8234226336737992, "grad_norm": 1.0748249292373657, "learning_rate": 2.3865277920718826e-06, "loss": 0.058, "step": 46496 }, { "epoch": 0.8234403432108276, "grad_norm": 0.7024447917938232, "learning_rate": 2.3860621891703954e-06, "loss": 0.0572, "step": 46497 }, { "epoch": 0.823458052747856, "grad_norm": 0.37735533714294434, "learning_rate": 2.385596627767483e-06, "loss": 0.05, "step": 46498 }, { "epoch": 0.8234757622848845, "grad_norm": 0.8486716747283936, "learning_rate": 2.385131107864682e-06, "loss": 0.0666, "step": 46499 }, { "epoch": 0.8234934718219129, "grad_norm": 0.5094285011291504, "learning_rate": 2.384665629463528e-06, "loss": 0.044, "step": 46500 }, { "epoch": 0.8235111813589413, "grad_norm": 0.818326473236084, "learning_rate": 2.3842001925655455e-06, "loss": 0.0541, "step": 46501 }, { "epoch": 0.8235288908959697, "grad_norm": 0.6071205735206604, "learning_rate": 2.383734797172267e-06, "loss": 0.0444, "step": 46502 }, { "epoch": 0.8235466004329982, "grad_norm": 0.5549954175949097, "learning_rate": 2.3832694432852243e-06, "loss": 0.0485, "step": 46503 }, { "epoch": 0.8235643099700266, "grad_norm": 1.0296893119812012, "learning_rate": 2.3828041309059503e-06, "loss": 0.0707, "step": 46504 }, { "epoch": 0.823582019507055, "grad_norm": 0.3140742778778076, "learning_rate": 2.3823388600359735e-06, "loss": 0.0478, "step": 46505 }, { "epoch": 0.8235997290440835, "grad_norm": 0.531380295753479, "learning_rate": 2.381873630676827e-06, "loss": 0.0337, "step": 46506 }, { "epoch": 0.8236174385811119, "grad_norm": 0.527603805065155, "learning_rate": 2.3814084428300437e-06, "loss": 0.0538, "step": 46507 }, { "epoch": 0.8236351481181403, "grad_norm": 0.4384734630584717, "learning_rate": 2.380943296497146e-06, "loss": 0.0448, "step": 46508 }, { "epoch": 0.8236528576551687, "grad_norm": 0.4337390661239624, "learning_rate": 2.3804781916796682e-06, "loss": 0.0255, "step": 46509 }, { "epoch": 0.8236705671921972, "grad_norm": 0.6797704100608826, "learning_rate": 2.3800131283791415e-06, "loss": 0.0439, "step": 46510 }, { "epoch": 0.8236882767292256, "grad_norm": 0.3619469702243805, "learning_rate": 2.379548106597097e-06, "loss": 0.0717, "step": 46511 }, { "epoch": 0.823705986266254, "grad_norm": 0.788883626461029, "learning_rate": 2.3790831263350615e-06, "loss": 0.1065, "step": 46512 }, { "epoch": 0.8237236958032824, "grad_norm": 0.8962672352790833, "learning_rate": 2.3786181875945638e-06, "loss": 0.0564, "step": 46513 }, { "epoch": 0.8237414053403109, "grad_norm": 0.4609258472919464, "learning_rate": 2.378153290377138e-06, "loss": 0.0731, "step": 46514 }, { "epoch": 0.8237591148773393, "grad_norm": 0.3928605616092682, "learning_rate": 2.3776884346843093e-06, "loss": 0.0572, "step": 46515 }, { "epoch": 0.8237768244143677, "grad_norm": 0.5567407608032227, "learning_rate": 2.3772236205176096e-06, "loss": 0.0467, "step": 46516 }, { "epoch": 0.8237945339513961, "grad_norm": 0.6428915858268738, "learning_rate": 2.3767588478785677e-06, "loss": 0.0633, "step": 46517 }, { "epoch": 0.8238122434884246, "grad_norm": 0.5628952980041504, "learning_rate": 2.376294116768716e-06, "loss": 0.0613, "step": 46518 }, { "epoch": 0.823829953025453, "grad_norm": 1.0763264894485474, "learning_rate": 2.375829427189577e-06, "loss": 0.0692, "step": 46519 }, { "epoch": 0.8238476625624814, "grad_norm": 0.2586122453212738, "learning_rate": 2.375364779142682e-06, "loss": 0.0857, "step": 46520 }, { "epoch": 0.8238653720995099, "grad_norm": 0.43211647868156433, "learning_rate": 2.374900172629558e-06, "loss": 0.0346, "step": 46521 }, { "epoch": 0.8238830816365383, "grad_norm": 0.42947548627853394, "learning_rate": 2.3744356076517416e-06, "loss": 0.0618, "step": 46522 }, { "epoch": 0.8239007911735667, "grad_norm": 0.5523005127906799, "learning_rate": 2.3739710842107505e-06, "loss": 0.0559, "step": 46523 }, { "epoch": 0.8239185007105951, "grad_norm": 0.41026273369789124, "learning_rate": 2.373506602308117e-06, "loss": 0.0426, "step": 46524 }, { "epoch": 0.8239362102476236, "grad_norm": 0.6051647067070007, "learning_rate": 2.37304216194537e-06, "loss": 0.0382, "step": 46525 }, { "epoch": 0.823953919784652, "grad_norm": 0.50166916847229, "learning_rate": 2.372577763124037e-06, "loss": 0.0531, "step": 46526 }, { "epoch": 0.8239716293216804, "grad_norm": 0.41814470291137695, "learning_rate": 2.3721134058456463e-06, "loss": 0.0639, "step": 46527 }, { "epoch": 0.8239893388587088, "grad_norm": 0.4672573208808899, "learning_rate": 2.3716490901117265e-06, "loss": 0.0472, "step": 46528 }, { "epoch": 0.8240070483957374, "grad_norm": 0.47625091671943665, "learning_rate": 2.3711848159238054e-06, "loss": 0.0469, "step": 46529 }, { "epoch": 0.8240247579327658, "grad_norm": 0.7096912860870361, "learning_rate": 2.370720583283406e-06, "loss": 0.0572, "step": 46530 }, { "epoch": 0.8240424674697941, "grad_norm": 0.7079533338546753, "learning_rate": 2.3702563921920594e-06, "loss": 0.0672, "step": 46531 }, { "epoch": 0.8240601770068225, "grad_norm": 0.6278743743896484, "learning_rate": 2.369792242651294e-06, "loss": 0.0605, "step": 46532 }, { "epoch": 0.8240778865438511, "grad_norm": 0.351240873336792, "learning_rate": 2.369328134662629e-06, "loss": 0.0595, "step": 46533 }, { "epoch": 0.8240955960808795, "grad_norm": 0.09369055181741714, "learning_rate": 2.368864068227599e-06, "loss": 0.0339, "step": 46534 }, { "epoch": 0.8241133056179079, "grad_norm": 1.137860894203186, "learning_rate": 2.368400043347729e-06, "loss": 0.0748, "step": 46535 }, { "epoch": 0.8241310151549364, "grad_norm": 0.681406557559967, "learning_rate": 2.3679360600245497e-06, "loss": 0.0448, "step": 46536 }, { "epoch": 0.8241487246919648, "grad_norm": 0.8592448234558105, "learning_rate": 2.3674721182595792e-06, "loss": 0.0472, "step": 46537 }, { "epoch": 0.8241664342289932, "grad_norm": 0.7257758975028992, "learning_rate": 2.3670082180543477e-06, "loss": 0.0593, "step": 46538 }, { "epoch": 0.8241841437660216, "grad_norm": 0.7365606427192688, "learning_rate": 2.3665443594103843e-06, "loss": 0.0708, "step": 46539 }, { "epoch": 0.8242018533030501, "grad_norm": 0.6151592135429382, "learning_rate": 2.3660805423292087e-06, "loss": 0.0618, "step": 46540 }, { "epoch": 0.8242195628400785, "grad_norm": 0.6854045987129211, "learning_rate": 2.3656167668123502e-06, "loss": 0.0683, "step": 46541 }, { "epoch": 0.8242372723771069, "grad_norm": 0.5265615582466125, "learning_rate": 2.3651530328613356e-06, "loss": 0.0574, "step": 46542 }, { "epoch": 0.8242549819141353, "grad_norm": 0.1956658959388733, "learning_rate": 2.364689340477689e-06, "loss": 0.0471, "step": 46543 }, { "epoch": 0.8242726914511638, "grad_norm": 0.5880740284919739, "learning_rate": 2.3642256896629376e-06, "loss": 0.0394, "step": 46544 }, { "epoch": 0.8242904009881922, "grad_norm": 0.45959994196891785, "learning_rate": 2.3637620804186035e-06, "loss": 0.0501, "step": 46545 }, { "epoch": 0.8243081105252206, "grad_norm": 0.5124398469924927, "learning_rate": 2.3632985127462165e-06, "loss": 0.0465, "step": 46546 }, { "epoch": 0.824325820062249, "grad_norm": 0.661632776260376, "learning_rate": 2.362834986647301e-06, "loss": 0.0494, "step": 46547 }, { "epoch": 0.8243435295992775, "grad_norm": 0.8506311774253845, "learning_rate": 2.3623715021233773e-06, "loss": 0.0535, "step": 46548 }, { "epoch": 0.8243612391363059, "grad_norm": 1.2501254081726074, "learning_rate": 2.3619080591759723e-06, "loss": 0.0566, "step": 46549 }, { "epoch": 0.8243789486733343, "grad_norm": 0.9640704393386841, "learning_rate": 2.361444657806617e-06, "loss": 0.0764, "step": 46550 }, { "epoch": 0.8243966582103628, "grad_norm": 0.5144530534744263, "learning_rate": 2.3609812980168243e-06, "loss": 0.0662, "step": 46551 }, { "epoch": 0.8244143677473912, "grad_norm": 0.6741342544555664, "learning_rate": 2.3605179798081272e-06, "loss": 0.0721, "step": 46552 }, { "epoch": 0.8244320772844196, "grad_norm": 0.6196314096450806, "learning_rate": 2.3600547031820458e-06, "loss": 0.0511, "step": 46553 }, { "epoch": 0.824449786821448, "grad_norm": 0.6048384308815002, "learning_rate": 2.359591468140107e-06, "loss": 0.0475, "step": 46554 }, { "epoch": 0.8244674963584765, "grad_norm": 0.6236261129379272, "learning_rate": 2.3591282746838333e-06, "loss": 0.0468, "step": 46555 }, { "epoch": 0.8244852058955049, "grad_norm": 0.5045507550239563, "learning_rate": 2.35866512281475e-06, "loss": 0.0289, "step": 46556 }, { "epoch": 0.8245029154325333, "grad_norm": 0.6565475463867188, "learning_rate": 2.3582020125343823e-06, "loss": 0.0602, "step": 46557 }, { "epoch": 0.8245206249695617, "grad_norm": 0.5513121485710144, "learning_rate": 2.3577389438442477e-06, "loss": 0.0695, "step": 46558 }, { "epoch": 0.8245383345065902, "grad_norm": 0.541927695274353, "learning_rate": 2.357275916745873e-06, "loss": 0.0643, "step": 46559 }, { "epoch": 0.8245560440436186, "grad_norm": 0.5040336847305298, "learning_rate": 2.356812931240784e-06, "loss": 0.0567, "step": 46560 }, { "epoch": 0.824573753580647, "grad_norm": 0.40604647994041443, "learning_rate": 2.356349987330502e-06, "loss": 0.0393, "step": 46561 }, { "epoch": 0.8245914631176754, "grad_norm": 0.5246812105178833, "learning_rate": 2.3558870850165453e-06, "loss": 0.0655, "step": 46562 }, { "epoch": 0.8246091726547039, "grad_norm": 0.3811929523944855, "learning_rate": 2.3554242243004436e-06, "loss": 0.0374, "step": 46563 }, { "epoch": 0.8246268821917323, "grad_norm": 0.8497042059898376, "learning_rate": 2.354961405183721e-06, "loss": 0.0382, "step": 46564 }, { "epoch": 0.8246445917287607, "grad_norm": 0.7524680495262146, "learning_rate": 2.3544986276678925e-06, "loss": 0.0732, "step": 46565 }, { "epoch": 0.8246623012657892, "grad_norm": 0.45851269364356995, "learning_rate": 2.354035891754486e-06, "loss": 0.0377, "step": 46566 }, { "epoch": 0.8246800108028176, "grad_norm": 0.7381191849708557, "learning_rate": 2.353573197445021e-06, "loss": 0.0692, "step": 46567 }, { "epoch": 0.824697720339846, "grad_norm": 0.8289536237716675, "learning_rate": 2.3531105447410235e-06, "loss": 0.0443, "step": 46568 }, { "epoch": 0.8247154298768744, "grad_norm": 0.5914561748504639, "learning_rate": 2.352647933644012e-06, "loss": 0.048, "step": 46569 }, { "epoch": 0.8247331394139029, "grad_norm": 0.5636134147644043, "learning_rate": 2.3521853641555076e-06, "loss": 0.0346, "step": 46570 }, { "epoch": 0.8247508489509313, "grad_norm": 0.7367326021194458, "learning_rate": 2.351722836277035e-06, "loss": 0.0559, "step": 46571 }, { "epoch": 0.8247685584879597, "grad_norm": 0.47541460394859314, "learning_rate": 2.351260350010115e-06, "loss": 0.0453, "step": 46572 }, { "epoch": 0.8247862680249881, "grad_norm": 0.7026703357696533, "learning_rate": 2.3507979053562696e-06, "loss": 0.0569, "step": 46573 }, { "epoch": 0.8248039775620166, "grad_norm": 0.331007719039917, "learning_rate": 2.350335502317019e-06, "loss": 0.0475, "step": 46574 }, { "epoch": 0.824821687099045, "grad_norm": 0.6944656372070312, "learning_rate": 2.3498731408938897e-06, "loss": 0.0614, "step": 46575 }, { "epoch": 0.8248393966360734, "grad_norm": 0.6370881199836731, "learning_rate": 2.349410821088394e-06, "loss": 0.0439, "step": 46576 }, { "epoch": 0.8248571061731018, "grad_norm": 0.6613925099372864, "learning_rate": 2.348948542902057e-06, "loss": 0.0643, "step": 46577 }, { "epoch": 0.8248748157101303, "grad_norm": 0.4209577739238739, "learning_rate": 2.348486306336403e-06, "loss": 0.0447, "step": 46578 }, { "epoch": 0.8248925252471587, "grad_norm": 0.6566731929779053, "learning_rate": 2.3480241113929468e-06, "loss": 0.054, "step": 46579 }, { "epoch": 0.8249102347841871, "grad_norm": 0.7488106489181519, "learning_rate": 2.34756195807321e-06, "loss": 0.0695, "step": 46580 }, { "epoch": 0.8249279443212156, "grad_norm": 0.18663187325000763, "learning_rate": 2.3470998463787165e-06, "loss": 0.0282, "step": 46581 }, { "epoch": 0.824945653858244, "grad_norm": 0.5564131140708923, "learning_rate": 2.346637776310983e-06, "loss": 0.0605, "step": 46582 }, { "epoch": 0.8249633633952724, "grad_norm": 0.8322188258171082, "learning_rate": 2.3461757478715324e-06, "loss": 0.0534, "step": 46583 }, { "epoch": 0.8249810729323008, "grad_norm": 0.4966207444667816, "learning_rate": 2.3457137610618843e-06, "loss": 0.0242, "step": 46584 }, { "epoch": 0.8249987824693293, "grad_norm": 0.41183361411094666, "learning_rate": 2.3452518158835566e-06, "loss": 0.0528, "step": 46585 }, { "epoch": 0.8250164920063577, "grad_norm": 0.9249739646911621, "learning_rate": 2.344789912338076e-06, "loss": 0.0831, "step": 46586 }, { "epoch": 0.8250342015433861, "grad_norm": 0.6497793793678284, "learning_rate": 2.3443280504269517e-06, "loss": 0.0572, "step": 46587 }, { "epoch": 0.8250519110804145, "grad_norm": 0.20469322800636292, "learning_rate": 2.3438662301517085e-06, "loss": 0.0329, "step": 46588 }, { "epoch": 0.825069620617443, "grad_norm": 0.5323609709739685, "learning_rate": 2.3434044515138687e-06, "loss": 0.0449, "step": 46589 }, { "epoch": 0.8250873301544714, "grad_norm": 0.34801575541496277, "learning_rate": 2.3429427145149446e-06, "loss": 0.043, "step": 46590 }, { "epoch": 0.8251050396914998, "grad_norm": 0.4344382882118225, "learning_rate": 2.342481019156455e-06, "loss": 0.0366, "step": 46591 }, { "epoch": 0.8251227492285282, "grad_norm": 0.24854427576065063, "learning_rate": 2.3420193654399252e-06, "loss": 0.0388, "step": 46592 }, { "epoch": 0.8251404587655568, "grad_norm": 0.6181547045707703, "learning_rate": 2.341557753366876e-06, "loss": 0.0581, "step": 46593 }, { "epoch": 0.8251581683025851, "grad_norm": 0.7872053980827332, "learning_rate": 2.341096182938819e-06, "loss": 0.0664, "step": 46594 }, { "epoch": 0.8251758778396135, "grad_norm": 0.6455066800117493, "learning_rate": 2.340634654157273e-06, "loss": 0.0651, "step": 46595 }, { "epoch": 0.8251935873766421, "grad_norm": 0.6015291810035706, "learning_rate": 2.340173167023764e-06, "loss": 0.0759, "step": 46596 }, { "epoch": 0.8252112969136705, "grad_norm": 0.20468929409980774, "learning_rate": 2.3397117215398004e-06, "loss": 0.064, "step": 46597 }, { "epoch": 0.8252290064506989, "grad_norm": 0.4389702081680298, "learning_rate": 2.3392503177069048e-06, "loss": 0.0525, "step": 46598 }, { "epoch": 0.8252467159877273, "grad_norm": 0.4464627802371979, "learning_rate": 2.338788955526595e-06, "loss": 0.045, "step": 46599 }, { "epoch": 0.8252644255247558, "grad_norm": 0.8852933645248413, "learning_rate": 2.338327635000389e-06, "loss": 0.0433, "step": 46600 }, { "epoch": 0.8252821350617842, "grad_norm": 0.47709792852401733, "learning_rate": 2.3378663561298053e-06, "loss": 0.0381, "step": 46601 }, { "epoch": 0.8252998445988126, "grad_norm": 0.45202362537384033, "learning_rate": 2.337405118916359e-06, "loss": 0.0519, "step": 46602 }, { "epoch": 0.825317554135841, "grad_norm": 0.4939339756965637, "learning_rate": 2.336943923361573e-06, "loss": 0.0646, "step": 46603 }, { "epoch": 0.8253352636728695, "grad_norm": 0.4892447590827942, "learning_rate": 2.3364827694669587e-06, "loss": 0.0489, "step": 46604 }, { "epoch": 0.8253529732098979, "grad_norm": 0.4835788607597351, "learning_rate": 2.3360216572340338e-06, "loss": 0.0344, "step": 46605 }, { "epoch": 0.8253706827469263, "grad_norm": 0.6010051369667053, "learning_rate": 2.3355605866643166e-06, "loss": 0.0478, "step": 46606 }, { "epoch": 0.8253883922839547, "grad_norm": 0.7627060413360596, "learning_rate": 2.3350995577593293e-06, "loss": 0.0521, "step": 46607 }, { "epoch": 0.8254061018209832, "grad_norm": 0.8018949627876282, "learning_rate": 2.3346385705205776e-06, "loss": 0.0854, "step": 46608 }, { "epoch": 0.8254238113580116, "grad_norm": 0.4538145065307617, "learning_rate": 2.334177624949585e-06, "loss": 0.0573, "step": 46609 }, { "epoch": 0.82544152089504, "grad_norm": 0.6795101165771484, "learning_rate": 2.3337167210478673e-06, "loss": 0.0611, "step": 46610 }, { "epoch": 0.8254592304320685, "grad_norm": 0.675754725933075, "learning_rate": 2.333255858816941e-06, "loss": 0.0338, "step": 46611 }, { "epoch": 0.8254769399690969, "grad_norm": 0.4318123459815979, "learning_rate": 2.332795038258321e-06, "loss": 0.0354, "step": 46612 }, { "epoch": 0.8254946495061253, "grad_norm": 0.8754525184631348, "learning_rate": 2.3323342593735246e-06, "loss": 0.0678, "step": 46613 }, { "epoch": 0.8255123590431537, "grad_norm": 0.2429298311471939, "learning_rate": 2.3318735221640702e-06, "loss": 0.0486, "step": 46614 }, { "epoch": 0.8255300685801822, "grad_norm": 0.6209909915924072, "learning_rate": 2.3314128266314677e-06, "loss": 0.0916, "step": 46615 }, { "epoch": 0.8255477781172106, "grad_norm": 0.6330929398536682, "learning_rate": 2.3309521727772344e-06, "loss": 0.0496, "step": 46616 }, { "epoch": 0.825565487654239, "grad_norm": 0.27596044540405273, "learning_rate": 2.3304915606028887e-06, "loss": 0.0385, "step": 46617 }, { "epoch": 0.8255831971912674, "grad_norm": 0.6944398880004883, "learning_rate": 2.3300309901099463e-06, "loss": 0.0666, "step": 46618 }, { "epoch": 0.8256009067282959, "grad_norm": 0.5932546854019165, "learning_rate": 2.329570461299918e-06, "loss": 0.0537, "step": 46619 }, { "epoch": 0.8256186162653243, "grad_norm": 0.303301066160202, "learning_rate": 2.329109974174317e-06, "loss": 0.0311, "step": 46620 }, { "epoch": 0.8256363258023527, "grad_norm": 0.42713090777397156, "learning_rate": 2.3286495287346708e-06, "loss": 0.0426, "step": 46621 }, { "epoch": 0.8256540353393811, "grad_norm": 0.7240142226219177, "learning_rate": 2.3281891249824815e-06, "loss": 0.0802, "step": 46622 }, { "epoch": 0.8256717448764096, "grad_norm": 0.5273076295852661, "learning_rate": 2.327728762919268e-06, "loss": 0.0481, "step": 46623 }, { "epoch": 0.825689454413438, "grad_norm": 0.38624098896980286, "learning_rate": 2.3272684425465453e-06, "loss": 0.0497, "step": 46624 }, { "epoch": 0.8257071639504664, "grad_norm": 0.29734349250793457, "learning_rate": 2.3268081638658324e-06, "loss": 0.0428, "step": 46625 }, { "epoch": 0.8257248734874949, "grad_norm": 0.47271034121513367, "learning_rate": 2.3263479268786345e-06, "loss": 0.0554, "step": 46626 }, { "epoch": 0.8257425830245233, "grad_norm": 0.6241971254348755, "learning_rate": 2.3258877315864696e-06, "loss": 0.0499, "step": 46627 }, { "epoch": 0.8257602925615517, "grad_norm": 0.4695515036582947, "learning_rate": 2.3254275779908526e-06, "loss": 0.0488, "step": 46628 }, { "epoch": 0.8257780020985801, "grad_norm": 0.7351151704788208, "learning_rate": 2.324967466093297e-06, "loss": 0.0703, "step": 46629 }, { "epoch": 0.8257957116356086, "grad_norm": 0.49794554710388184, "learning_rate": 2.3245073958953166e-06, "loss": 0.0231, "step": 46630 }, { "epoch": 0.825813421172637, "grad_norm": 0.3933582901954651, "learning_rate": 2.324047367398424e-06, "loss": 0.0263, "step": 46631 }, { "epoch": 0.8258311307096654, "grad_norm": 0.35441434383392334, "learning_rate": 2.323587380604138e-06, "loss": 0.048, "step": 46632 }, { "epoch": 0.8258488402466938, "grad_norm": 0.6845413446426392, "learning_rate": 2.323127435513964e-06, "loss": 0.0624, "step": 46633 }, { "epoch": 0.8258665497837223, "grad_norm": 0.5135731101036072, "learning_rate": 2.3226675321294183e-06, "loss": 0.0612, "step": 46634 }, { "epoch": 0.8258842593207507, "grad_norm": 0.638536274433136, "learning_rate": 2.3222076704520175e-06, "loss": 0.0514, "step": 46635 }, { "epoch": 0.8259019688577791, "grad_norm": 0.6118924021720886, "learning_rate": 2.321747850483266e-06, "loss": 0.0537, "step": 46636 }, { "epoch": 0.8259196783948075, "grad_norm": 0.2837724983692169, "learning_rate": 2.3212880722246836e-06, "loss": 0.0342, "step": 46637 }, { "epoch": 0.825937387931836, "grad_norm": 0.8986822962760925, "learning_rate": 2.3208283356777804e-06, "loss": 0.0719, "step": 46638 }, { "epoch": 0.8259550974688644, "grad_norm": 0.5945159196853638, "learning_rate": 2.32036864084407e-06, "loss": 0.0391, "step": 46639 }, { "epoch": 0.8259728070058928, "grad_norm": 0.41073477268218994, "learning_rate": 2.3199089877250645e-06, "loss": 0.0487, "step": 46640 }, { "epoch": 0.8259905165429213, "grad_norm": 0.717344343662262, "learning_rate": 2.3194493763222763e-06, "loss": 0.0573, "step": 46641 }, { "epoch": 0.8260082260799497, "grad_norm": 0.8422767519950867, "learning_rate": 2.318989806637219e-06, "loss": 0.0516, "step": 46642 }, { "epoch": 0.8260259356169781, "grad_norm": 0.5681892037391663, "learning_rate": 2.3185302786714014e-06, "loss": 0.0531, "step": 46643 }, { "epoch": 0.8260436451540065, "grad_norm": 0.34371015429496765, "learning_rate": 2.318070792426334e-06, "loss": 0.0443, "step": 46644 }, { "epoch": 0.826061354691035, "grad_norm": 0.5493048429489136, "learning_rate": 2.317611347903532e-06, "loss": 0.0473, "step": 46645 }, { "epoch": 0.8260790642280634, "grad_norm": 0.5074909925460815, "learning_rate": 2.31715194510451e-06, "loss": 0.0367, "step": 46646 }, { "epoch": 0.8260967737650918, "grad_norm": 0.9209973812103271, "learning_rate": 2.3166925840307723e-06, "loss": 0.0756, "step": 46647 }, { "epoch": 0.8261144833021202, "grad_norm": 0.4826359748840332, "learning_rate": 2.316233264683832e-06, "loss": 0.0437, "step": 46648 }, { "epoch": 0.8261321928391487, "grad_norm": 0.8010480999946594, "learning_rate": 2.315773987065197e-06, "loss": 0.055, "step": 46649 }, { "epoch": 0.8261499023761771, "grad_norm": 0.8070230484008789, "learning_rate": 2.3153147511763924e-06, "loss": 0.0425, "step": 46650 }, { "epoch": 0.8261676119132055, "grad_norm": 0.32285076379776, "learning_rate": 2.314855557018914e-06, "loss": 0.0354, "step": 46651 }, { "epoch": 0.8261853214502339, "grad_norm": 0.8283881545066833, "learning_rate": 2.314396404594278e-06, "loss": 0.0637, "step": 46652 }, { "epoch": 0.8262030309872624, "grad_norm": 0.46293574571609497, "learning_rate": 2.3139372939039977e-06, "loss": 0.0289, "step": 46653 }, { "epoch": 0.8262207405242908, "grad_norm": 0.6117249727249146, "learning_rate": 2.3134782249495775e-06, "loss": 0.0599, "step": 46654 }, { "epoch": 0.8262384500613192, "grad_norm": 0.798163652420044, "learning_rate": 2.3130191977325304e-06, "loss": 0.0615, "step": 46655 }, { "epoch": 0.8262561595983478, "grad_norm": 0.4457939863204956, "learning_rate": 2.3125602122543676e-06, "loss": 0.0385, "step": 46656 }, { "epoch": 0.8262738691353761, "grad_norm": 0.704520046710968, "learning_rate": 2.3121012685165976e-06, "loss": 0.0393, "step": 46657 }, { "epoch": 0.8262915786724045, "grad_norm": 0.7858702540397644, "learning_rate": 2.3116423665207327e-06, "loss": 0.0561, "step": 46658 }, { "epoch": 0.826309288209433, "grad_norm": 0.7108458280563354, "learning_rate": 2.3111835062682796e-06, "loss": 0.0452, "step": 46659 }, { "epoch": 0.8263269977464615, "grad_norm": 0.9076237678527832, "learning_rate": 2.3107246877607542e-06, "loss": 0.0595, "step": 46660 }, { "epoch": 0.8263447072834899, "grad_norm": 0.6424387693405151, "learning_rate": 2.3102659109996564e-06, "loss": 0.0326, "step": 46661 }, { "epoch": 0.8263624168205183, "grad_norm": 1.034929633140564, "learning_rate": 2.309807175986501e-06, "loss": 0.1059, "step": 46662 }, { "epoch": 0.8263801263575467, "grad_norm": 0.4764992594718933, "learning_rate": 2.3093484827227956e-06, "loss": 0.0388, "step": 46663 }, { "epoch": 0.8263978358945752, "grad_norm": 0.57608962059021, "learning_rate": 2.308889831210055e-06, "loss": 0.0447, "step": 46664 }, { "epoch": 0.8264155454316036, "grad_norm": 0.7206584215164185, "learning_rate": 2.308431221449779e-06, "loss": 0.0374, "step": 46665 }, { "epoch": 0.826433254968632, "grad_norm": 0.4508243501186371, "learning_rate": 2.3079726534434797e-06, "loss": 0.0648, "step": 46666 }, { "epoch": 0.8264509645056604, "grad_norm": 0.5254000425338745, "learning_rate": 2.3075141271926683e-06, "loss": 0.0918, "step": 46667 }, { "epoch": 0.8264686740426889, "grad_norm": 0.5046557188034058, "learning_rate": 2.307055642698851e-06, "loss": 0.0619, "step": 46668 }, { "epoch": 0.8264863835797173, "grad_norm": 0.7020631432533264, "learning_rate": 2.3065971999635353e-06, "loss": 0.0504, "step": 46669 }, { "epoch": 0.8265040931167457, "grad_norm": 0.3943098485469818, "learning_rate": 2.3061387989882322e-06, "loss": 0.0531, "step": 46670 }, { "epoch": 0.8265218026537742, "grad_norm": 0.5976149439811707, "learning_rate": 2.305680439774453e-06, "loss": 0.0544, "step": 46671 }, { "epoch": 0.8265395121908026, "grad_norm": 0.45519232749938965, "learning_rate": 2.305222122323697e-06, "loss": 0.0444, "step": 46672 }, { "epoch": 0.826557221727831, "grad_norm": 0.7558967471122742, "learning_rate": 2.304763846637476e-06, "loss": 0.0364, "step": 46673 }, { "epoch": 0.8265749312648594, "grad_norm": 0.5999288558959961, "learning_rate": 2.3043056127173007e-06, "loss": 0.0448, "step": 46674 }, { "epoch": 0.8265926408018879, "grad_norm": 0.6926071047782898, "learning_rate": 2.303847420564673e-06, "loss": 0.0634, "step": 46675 }, { "epoch": 0.8266103503389163, "grad_norm": 0.6182568669319153, "learning_rate": 2.3033892701811038e-06, "loss": 0.0594, "step": 46676 }, { "epoch": 0.8266280598759447, "grad_norm": 0.43100765347480774, "learning_rate": 2.3029311615680998e-06, "loss": 0.0313, "step": 46677 }, { "epoch": 0.8266457694129731, "grad_norm": 0.6516731381416321, "learning_rate": 2.3024730947271668e-06, "loss": 0.062, "step": 46678 }, { "epoch": 0.8266634789500016, "grad_norm": 0.5895143151283264, "learning_rate": 2.302015069659813e-06, "loss": 0.0733, "step": 46679 }, { "epoch": 0.82668118848703, "grad_norm": 0.7626825571060181, "learning_rate": 2.3015570863675463e-06, "loss": 0.0527, "step": 46680 }, { "epoch": 0.8266988980240584, "grad_norm": 1.0064325332641602, "learning_rate": 2.3010991448518747e-06, "loss": 0.0676, "step": 46681 }, { "epoch": 0.8267166075610868, "grad_norm": 0.6084462404251099, "learning_rate": 2.3006412451143012e-06, "loss": 0.0601, "step": 46682 }, { "epoch": 0.8267343170981153, "grad_norm": 0.35382598638534546, "learning_rate": 2.300183387156332e-06, "loss": 0.0477, "step": 46683 }, { "epoch": 0.8267520266351437, "grad_norm": 1.0994844436645508, "learning_rate": 2.299725570979475e-06, "loss": 0.0511, "step": 46684 }, { "epoch": 0.8267697361721721, "grad_norm": 0.6779724955558777, "learning_rate": 2.2992677965852403e-06, "loss": 0.062, "step": 46685 }, { "epoch": 0.8267874457092006, "grad_norm": 0.2598514258861542, "learning_rate": 2.298810063975122e-06, "loss": 0.0478, "step": 46686 }, { "epoch": 0.826805155246229, "grad_norm": 0.41174447536468506, "learning_rate": 2.2983523731506383e-06, "loss": 0.0416, "step": 46687 }, { "epoch": 0.8268228647832574, "grad_norm": 0.739166796207428, "learning_rate": 2.29789472411329e-06, "loss": 0.0428, "step": 46688 }, { "epoch": 0.8268405743202858, "grad_norm": 0.4190908968448639, "learning_rate": 2.297437116864588e-06, "loss": 0.0454, "step": 46689 }, { "epoch": 0.8268582838573143, "grad_norm": 0.816120982170105, "learning_rate": 2.296979551406029e-06, "loss": 0.0808, "step": 46690 }, { "epoch": 0.8268759933943427, "grad_norm": 0.33589181303977966, "learning_rate": 2.2965220277391213e-06, "loss": 0.03, "step": 46691 }, { "epoch": 0.8268937029313711, "grad_norm": 0.4474545121192932, "learning_rate": 2.2960645458653762e-06, "loss": 0.0394, "step": 46692 }, { "epoch": 0.8269114124683995, "grad_norm": 0.8710653185844421, "learning_rate": 2.2956071057862888e-06, "loss": 0.0747, "step": 46693 }, { "epoch": 0.826929122005428, "grad_norm": 0.8419129848480225, "learning_rate": 2.2951497075033695e-06, "loss": 0.0562, "step": 46694 }, { "epoch": 0.8269468315424564, "grad_norm": 0.9684644937515259, "learning_rate": 2.294692351018124e-06, "loss": 0.0586, "step": 46695 }, { "epoch": 0.8269645410794848, "grad_norm": 0.5057756304740906, "learning_rate": 2.2942350363320543e-06, "loss": 0.0535, "step": 46696 }, { "epoch": 0.8269822506165132, "grad_norm": 0.5390909910202026, "learning_rate": 2.293777763446667e-06, "loss": 0.0541, "step": 46697 }, { "epoch": 0.8269999601535417, "grad_norm": 0.6820995211601257, "learning_rate": 2.2933205323634653e-06, "loss": 0.0555, "step": 46698 }, { "epoch": 0.8270176696905701, "grad_norm": 0.6696798205375671, "learning_rate": 2.2928633430839573e-06, "loss": 0.0673, "step": 46699 }, { "epoch": 0.8270353792275985, "grad_norm": 0.9422377347946167, "learning_rate": 2.2924061956096405e-06, "loss": 0.0703, "step": 46700 }, { "epoch": 0.827053088764627, "grad_norm": 0.4949797987937927, "learning_rate": 2.2919490899420215e-06, "loss": 0.0355, "step": 46701 }, { "epoch": 0.8270707983016554, "grad_norm": 0.5970438122749329, "learning_rate": 2.2914920260826044e-06, "loss": 0.0457, "step": 46702 }, { "epoch": 0.8270885078386838, "grad_norm": 0.9402358531951904, "learning_rate": 2.291035004032898e-06, "loss": 0.0435, "step": 46703 }, { "epoch": 0.8271062173757122, "grad_norm": 0.759337306022644, "learning_rate": 2.2905780237943964e-06, "loss": 0.053, "step": 46704 }, { "epoch": 0.8271239269127407, "grad_norm": 0.8652229905128479, "learning_rate": 2.2901210853686076e-06, "loss": 0.071, "step": 46705 }, { "epoch": 0.8271416364497691, "grad_norm": 0.5165764093399048, "learning_rate": 2.2896641887570365e-06, "loss": 0.0581, "step": 46706 }, { "epoch": 0.8271593459867975, "grad_norm": 0.45964857935905457, "learning_rate": 2.2892073339611823e-06, "loss": 0.0628, "step": 46707 }, { "epoch": 0.8271770555238259, "grad_norm": 0.8826539516448975, "learning_rate": 2.2887505209825515e-06, "loss": 0.0581, "step": 46708 }, { "epoch": 0.8271947650608544, "grad_norm": 0.8801183700561523, "learning_rate": 2.288293749822646e-06, "loss": 0.0568, "step": 46709 }, { "epoch": 0.8272124745978828, "grad_norm": 0.3197064995765686, "learning_rate": 2.2878370204829705e-06, "loss": 0.0467, "step": 46710 }, { "epoch": 0.8272301841349112, "grad_norm": 0.3699996769428253, "learning_rate": 2.2873803329650235e-06, "loss": 0.0624, "step": 46711 }, { "epoch": 0.8272478936719396, "grad_norm": 0.4747155010700226, "learning_rate": 2.286923687270309e-06, "loss": 0.0656, "step": 46712 }, { "epoch": 0.8272656032089681, "grad_norm": 0.7739575505256653, "learning_rate": 2.2864670834003338e-06, "loss": 0.0573, "step": 46713 }, { "epoch": 0.8272833127459965, "grad_norm": 0.5128983855247498, "learning_rate": 2.2860105213565926e-06, "loss": 0.0622, "step": 46714 }, { "epoch": 0.8273010222830249, "grad_norm": 0.4591022729873657, "learning_rate": 2.2855540011405855e-06, "loss": 0.0349, "step": 46715 }, { "epoch": 0.8273187318200534, "grad_norm": 0.6258465051651001, "learning_rate": 2.285097522753824e-06, "loss": 0.0645, "step": 46716 }, { "epoch": 0.8273364413570818, "grad_norm": 0.5911566019058228, "learning_rate": 2.2846410861978096e-06, "loss": 0.0507, "step": 46717 }, { "epoch": 0.8273541508941102, "grad_norm": 0.25753992795944214, "learning_rate": 2.284184691474036e-06, "loss": 0.0433, "step": 46718 }, { "epoch": 0.8273718604311386, "grad_norm": 0.7582086324691772, "learning_rate": 2.2837283385840092e-06, "loss": 0.038, "step": 46719 }, { "epoch": 0.8273895699681671, "grad_norm": 0.7816804051399231, "learning_rate": 2.2832720275292305e-06, "loss": 0.0515, "step": 46720 }, { "epoch": 0.8274072795051955, "grad_norm": 0.6356595754623413, "learning_rate": 2.282815758311203e-06, "loss": 0.0581, "step": 46721 }, { "epoch": 0.827424989042224, "grad_norm": 0.4498352110385895, "learning_rate": 2.2823595309314215e-06, "loss": 0.045, "step": 46722 }, { "epoch": 0.8274426985792523, "grad_norm": 0.6366477608680725, "learning_rate": 2.2819033453913918e-06, "loss": 0.0429, "step": 46723 }, { "epoch": 0.8274604081162809, "grad_norm": 0.7478622198104858, "learning_rate": 2.281447201692614e-06, "loss": 0.049, "step": 46724 }, { "epoch": 0.8274781176533093, "grad_norm": 0.4219280779361725, "learning_rate": 2.280991099836587e-06, "loss": 0.0367, "step": 46725 }, { "epoch": 0.8274958271903377, "grad_norm": 1.0334669351577759, "learning_rate": 2.2805350398248138e-06, "loss": 0.076, "step": 46726 }, { "epoch": 0.827513536727366, "grad_norm": 0.38458728790283203, "learning_rate": 2.280079021658795e-06, "loss": 0.0586, "step": 46727 }, { "epoch": 0.8275312462643946, "grad_norm": 0.3378327190876007, "learning_rate": 2.2796230453400306e-06, "loss": 0.04, "step": 46728 }, { "epoch": 0.827548955801423, "grad_norm": 0.4919496178627014, "learning_rate": 2.279167110870019e-06, "loss": 0.041, "step": 46729 }, { "epoch": 0.8275666653384514, "grad_norm": 0.4330374598503113, "learning_rate": 2.278711218250259e-06, "loss": 0.0597, "step": 46730 }, { "epoch": 0.8275843748754799, "grad_norm": 0.3619624078273773, "learning_rate": 2.2782553674822576e-06, "loss": 0.079, "step": 46731 }, { "epoch": 0.8276020844125083, "grad_norm": 0.500839114189148, "learning_rate": 2.277799558567504e-06, "loss": 0.0636, "step": 46732 }, { "epoch": 0.8276197939495367, "grad_norm": 0.6218711733818054, "learning_rate": 2.277343791507505e-06, "loss": 0.0626, "step": 46733 }, { "epoch": 0.8276375034865651, "grad_norm": 1.0977768898010254, "learning_rate": 2.2768880663037556e-06, "loss": 0.052, "step": 46734 }, { "epoch": 0.8276552130235936, "grad_norm": 1.153989553451538, "learning_rate": 2.2764323829577587e-06, "loss": 0.0746, "step": 46735 }, { "epoch": 0.827672922560622, "grad_norm": 0.4927876591682434, "learning_rate": 2.275976741471013e-06, "loss": 0.0337, "step": 46736 }, { "epoch": 0.8276906320976504, "grad_norm": 1.0519253015518188, "learning_rate": 2.2755211418450174e-06, "loss": 0.0705, "step": 46737 }, { "epoch": 0.8277083416346788, "grad_norm": 0.7736571431159973, "learning_rate": 2.2750655840812717e-06, "loss": 0.0683, "step": 46738 }, { "epoch": 0.8277260511717073, "grad_norm": 0.34957587718963623, "learning_rate": 2.274610068181272e-06, "loss": 0.037, "step": 46739 }, { "epoch": 0.8277437607087357, "grad_norm": 0.5065015554428101, "learning_rate": 2.274154594146517e-06, "loss": 0.0426, "step": 46740 }, { "epoch": 0.8277614702457641, "grad_norm": 0.5107601284980774, "learning_rate": 2.2736991619785053e-06, "loss": 0.0604, "step": 46741 }, { "epoch": 0.8277791797827925, "grad_norm": 0.4929831922054291, "learning_rate": 2.273243771678741e-06, "loss": 0.0544, "step": 46742 }, { "epoch": 0.827796889319821, "grad_norm": 0.415817528963089, "learning_rate": 2.2727884232487123e-06, "loss": 0.0424, "step": 46743 }, { "epoch": 0.8278145988568494, "grad_norm": 0.6245254278182983, "learning_rate": 2.2723331166899203e-06, "loss": 0.0674, "step": 46744 }, { "epoch": 0.8278323083938778, "grad_norm": 0.5794016718864441, "learning_rate": 2.2718778520038717e-06, "loss": 0.0418, "step": 46745 }, { "epoch": 0.8278500179309063, "grad_norm": 0.5082511901855469, "learning_rate": 2.2714226291920543e-06, "loss": 0.0512, "step": 46746 }, { "epoch": 0.8278677274679347, "grad_norm": 0.6269118189811707, "learning_rate": 2.2709674482559696e-06, "loss": 0.0643, "step": 46747 }, { "epoch": 0.8278854370049631, "grad_norm": 0.3065485954284668, "learning_rate": 2.2705123091971132e-06, "loss": 0.0513, "step": 46748 }, { "epoch": 0.8279031465419915, "grad_norm": 0.5948412418365479, "learning_rate": 2.270057212016989e-06, "loss": 0.0705, "step": 46749 }, { "epoch": 0.82792085607902, "grad_norm": 0.8476461172103882, "learning_rate": 2.2696021567170837e-06, "loss": 0.0615, "step": 46750 }, { "epoch": 0.8279385656160484, "grad_norm": 0.5785330533981323, "learning_rate": 2.2691471432989e-06, "loss": 0.0561, "step": 46751 }, { "epoch": 0.8279562751530768, "grad_norm": 0.8280326128005981, "learning_rate": 2.2686921717639353e-06, "loss": 0.0783, "step": 46752 }, { "epoch": 0.8279739846901052, "grad_norm": 0.48172762989997864, "learning_rate": 2.268237242113686e-06, "loss": 0.0604, "step": 46753 }, { "epoch": 0.8279916942271337, "grad_norm": 0.4060555100440979, "learning_rate": 2.267782354349648e-06, "loss": 0.0612, "step": 46754 }, { "epoch": 0.8280094037641621, "grad_norm": 0.49756357073783875, "learning_rate": 2.267327508473318e-06, "loss": 0.0602, "step": 46755 }, { "epoch": 0.8280271133011905, "grad_norm": 0.5428403615951538, "learning_rate": 2.266872704486195e-06, "loss": 0.0422, "step": 46756 }, { "epoch": 0.8280448228382189, "grad_norm": 0.8752866387367249, "learning_rate": 2.266417942389772e-06, "loss": 0.0623, "step": 46757 }, { "epoch": 0.8280625323752474, "grad_norm": 0.8507532477378845, "learning_rate": 2.265963222185543e-06, "loss": 0.052, "step": 46758 }, { "epoch": 0.8280802419122758, "grad_norm": 0.4267837703227997, "learning_rate": 2.2655085438750095e-06, "loss": 0.0327, "step": 46759 }, { "epoch": 0.8280979514493042, "grad_norm": 0.7344210147857666, "learning_rate": 2.265053907459668e-06, "loss": 0.0664, "step": 46760 }, { "epoch": 0.8281156609863327, "grad_norm": 0.30699533224105835, "learning_rate": 2.2645993129410065e-06, "loss": 0.0521, "step": 46761 }, { "epoch": 0.8281333705233611, "grad_norm": 0.8193833231925964, "learning_rate": 2.2641447603205264e-06, "loss": 0.0666, "step": 46762 }, { "epoch": 0.8281510800603895, "grad_norm": 0.740206778049469, "learning_rate": 2.2636902495997215e-06, "loss": 0.0711, "step": 46763 }, { "epoch": 0.8281687895974179, "grad_norm": 0.6614784002304077, "learning_rate": 2.2632357807800872e-06, "loss": 0.0681, "step": 46764 }, { "epoch": 0.8281864991344464, "grad_norm": 1.1010504961013794, "learning_rate": 2.2627813538631183e-06, "loss": 0.0622, "step": 46765 }, { "epoch": 0.8282042086714748, "grad_norm": 0.45806947350502014, "learning_rate": 2.262326968850312e-06, "loss": 0.0481, "step": 46766 }, { "epoch": 0.8282219182085032, "grad_norm": 0.6819522380828857, "learning_rate": 2.261872625743164e-06, "loss": 0.0466, "step": 46767 }, { "epoch": 0.8282396277455316, "grad_norm": 0.5221810340881348, "learning_rate": 2.261418324543163e-06, "loss": 0.0397, "step": 46768 }, { "epoch": 0.8282573372825601, "grad_norm": 0.7199905514717102, "learning_rate": 2.260964065251809e-06, "loss": 0.0583, "step": 46769 }, { "epoch": 0.8282750468195885, "grad_norm": 0.6269038319587708, "learning_rate": 2.2605098478705966e-06, "loss": 0.0406, "step": 46770 }, { "epoch": 0.8282927563566169, "grad_norm": 0.5245139002799988, "learning_rate": 2.2600556724010154e-06, "loss": 0.0493, "step": 46771 }, { "epoch": 0.8283104658936453, "grad_norm": 0.6371163129806519, "learning_rate": 2.259601538844562e-06, "loss": 0.0671, "step": 46772 }, { "epoch": 0.8283281754306738, "grad_norm": 0.43223902583122253, "learning_rate": 2.2591474472027267e-06, "loss": 0.0739, "step": 46773 }, { "epoch": 0.8283458849677022, "grad_norm": 0.6515772342681885, "learning_rate": 2.2586933974770157e-06, "loss": 0.0776, "step": 46774 }, { "epoch": 0.8283635945047306, "grad_norm": 0.7808247804641724, "learning_rate": 2.2582393896689124e-06, "loss": 0.0447, "step": 46775 }, { "epoch": 0.8283813040417591, "grad_norm": 0.9712402820587158, "learning_rate": 2.2577854237799106e-06, "loss": 0.0645, "step": 46776 }, { "epoch": 0.8283990135787875, "grad_norm": 0.7773005366325378, "learning_rate": 2.257331499811511e-06, "loss": 0.0508, "step": 46777 }, { "epoch": 0.8284167231158159, "grad_norm": 0.5072217583656311, "learning_rate": 2.256877617765197e-06, "loss": 0.0448, "step": 46778 }, { "epoch": 0.8284344326528443, "grad_norm": 0.6481300592422485, "learning_rate": 2.256423777642467e-06, "loss": 0.0411, "step": 46779 }, { "epoch": 0.8284521421898728, "grad_norm": 0.26546579599380493, "learning_rate": 2.255969979444815e-06, "loss": 0.0572, "step": 46780 }, { "epoch": 0.8284698517269012, "grad_norm": 0.6787678003311157, "learning_rate": 2.2555162231737323e-06, "loss": 0.0546, "step": 46781 }, { "epoch": 0.8284875612639296, "grad_norm": 0.7713436484336853, "learning_rate": 2.2550625088307112e-06, "loss": 0.0447, "step": 46782 }, { "epoch": 0.828505270800958, "grad_norm": 0.737815797328949, "learning_rate": 2.254608836417247e-06, "loss": 0.0529, "step": 46783 }, { "epoch": 0.8285229803379865, "grad_norm": 0.5381794571876526, "learning_rate": 2.2541552059348342e-06, "loss": 0.0413, "step": 46784 }, { "epoch": 0.828540689875015, "grad_norm": 0.27641594409942627, "learning_rate": 2.2537016173849574e-06, "loss": 0.0482, "step": 46785 }, { "epoch": 0.8285583994120433, "grad_norm": 0.6035943627357483, "learning_rate": 2.253248070769112e-06, "loss": 0.069, "step": 46786 }, { "epoch": 0.8285761089490717, "grad_norm": 0.6515252590179443, "learning_rate": 2.2527945660887926e-06, "loss": 0.0696, "step": 46787 }, { "epoch": 0.8285938184861003, "grad_norm": 0.6033283472061157, "learning_rate": 2.2523411033454926e-06, "loss": 0.0429, "step": 46788 }, { "epoch": 0.8286115280231287, "grad_norm": 0.24278561770915985, "learning_rate": 2.251887682540697e-06, "loss": 0.0419, "step": 46789 }, { "epoch": 0.828629237560157, "grad_norm": 0.6255496144294739, "learning_rate": 2.251434303675903e-06, "loss": 0.0401, "step": 46790 }, { "epoch": 0.8286469470971856, "grad_norm": 1.0135819911956787, "learning_rate": 2.250980966752599e-06, "loss": 0.0469, "step": 46791 }, { "epoch": 0.828664656634214, "grad_norm": 0.8544949293136597, "learning_rate": 2.250527671772279e-06, "loss": 0.0669, "step": 46792 }, { "epoch": 0.8286823661712424, "grad_norm": 1.0025455951690674, "learning_rate": 2.2500744187364338e-06, "loss": 0.076, "step": 46793 }, { "epoch": 0.8287000757082708, "grad_norm": 1.005012035369873, "learning_rate": 2.249621207646555e-06, "loss": 0.0807, "step": 46794 }, { "epoch": 0.8287177852452993, "grad_norm": 0.5990102291107178, "learning_rate": 2.2491680385041353e-06, "loss": 0.0318, "step": 46795 }, { "epoch": 0.8287354947823277, "grad_norm": 1.019393801689148, "learning_rate": 2.248714911310658e-06, "loss": 0.0739, "step": 46796 }, { "epoch": 0.8287532043193561, "grad_norm": 0.5542280077934265, "learning_rate": 2.2482618260676207e-06, "loss": 0.0467, "step": 46797 }, { "epoch": 0.8287709138563845, "grad_norm": 0.35349756479263306, "learning_rate": 2.247808782776512e-06, "loss": 0.0411, "step": 46798 }, { "epoch": 0.828788623393413, "grad_norm": 0.5759907364845276, "learning_rate": 2.2473557814388256e-06, "loss": 0.0534, "step": 46799 }, { "epoch": 0.8288063329304414, "grad_norm": 0.540166437625885, "learning_rate": 2.2469028220560455e-06, "loss": 0.0364, "step": 46800 }, { "epoch": 0.8288240424674698, "grad_norm": 0.5831007957458496, "learning_rate": 2.2464499046296656e-06, "loss": 0.0616, "step": 46801 }, { "epoch": 0.8288417520044983, "grad_norm": 0.4333939850330353, "learning_rate": 2.245997029161176e-06, "loss": 0.0468, "step": 46802 }, { "epoch": 0.8288594615415267, "grad_norm": 0.33276474475860596, "learning_rate": 2.2455441956520657e-06, "loss": 0.0491, "step": 46803 }, { "epoch": 0.8288771710785551, "grad_norm": 0.4926738440990448, "learning_rate": 2.245091404103825e-06, "loss": 0.0533, "step": 46804 }, { "epoch": 0.8288948806155835, "grad_norm": 0.5382388234138489, "learning_rate": 2.244638654517943e-06, "loss": 0.0737, "step": 46805 }, { "epoch": 0.828912590152612, "grad_norm": 0.37409326434135437, "learning_rate": 2.2441859468959136e-06, "loss": 0.0359, "step": 46806 }, { "epoch": 0.8289302996896404, "grad_norm": 0.9349609613418579, "learning_rate": 2.2437332812392188e-06, "loss": 0.08, "step": 46807 }, { "epoch": 0.8289480092266688, "grad_norm": 0.43317699432373047, "learning_rate": 2.243280657549352e-06, "loss": 0.0402, "step": 46808 }, { "epoch": 0.8289657187636972, "grad_norm": 0.6979619860649109, "learning_rate": 2.2428280758278043e-06, "loss": 0.0715, "step": 46809 }, { "epoch": 0.8289834283007257, "grad_norm": 0.29620614647865295, "learning_rate": 2.242375536076054e-06, "loss": 0.0315, "step": 46810 }, { "epoch": 0.8290011378377541, "grad_norm": 0.7168552279472351, "learning_rate": 2.241923038295602e-06, "loss": 0.0658, "step": 46811 }, { "epoch": 0.8290188473747825, "grad_norm": 0.5590417385101318, "learning_rate": 2.241470582487933e-06, "loss": 0.0387, "step": 46812 }, { "epoch": 0.8290365569118109, "grad_norm": 0.5887582898139954, "learning_rate": 2.241018168654539e-06, "loss": 0.0686, "step": 46813 }, { "epoch": 0.8290542664488394, "grad_norm": 0.472391277551651, "learning_rate": 2.2405657967969024e-06, "loss": 0.0273, "step": 46814 }, { "epoch": 0.8290719759858678, "grad_norm": 0.4019111394882202, "learning_rate": 2.2401134669165124e-06, "loss": 0.0429, "step": 46815 }, { "epoch": 0.8290896855228962, "grad_norm": 0.6192930936813354, "learning_rate": 2.2396611790148614e-06, "loss": 0.0867, "step": 46816 }, { "epoch": 0.8291073950599247, "grad_norm": 0.42511340975761414, "learning_rate": 2.2392089330934313e-06, "loss": 0.0596, "step": 46817 }, { "epoch": 0.8291251045969531, "grad_norm": 0.2886669337749481, "learning_rate": 2.238756729153714e-06, "loss": 0.0659, "step": 46818 }, { "epoch": 0.8291428141339815, "grad_norm": 0.5370832681655884, "learning_rate": 2.2383045671971956e-06, "loss": 0.0369, "step": 46819 }, { "epoch": 0.8291605236710099, "grad_norm": 0.5580765008926392, "learning_rate": 2.2378524472253647e-06, "loss": 0.0485, "step": 46820 }, { "epoch": 0.8291782332080384, "grad_norm": 0.8652564287185669, "learning_rate": 2.237400369239708e-06, "loss": 0.0493, "step": 46821 }, { "epoch": 0.8291959427450668, "grad_norm": 0.5759349465370178, "learning_rate": 2.2369483332417136e-06, "loss": 0.0509, "step": 46822 }, { "epoch": 0.8292136522820952, "grad_norm": 0.6243855953216553, "learning_rate": 2.2364963392328693e-06, "loss": 0.0466, "step": 46823 }, { "epoch": 0.8292313618191236, "grad_norm": 0.5074129104614258, "learning_rate": 2.2360443872146623e-06, "loss": 0.0565, "step": 46824 }, { "epoch": 0.8292490713561521, "grad_norm": 0.3963305950164795, "learning_rate": 2.235592477188577e-06, "loss": 0.0531, "step": 46825 }, { "epoch": 0.8292667808931805, "grad_norm": 0.4212457537651062, "learning_rate": 2.2351406091561004e-06, "loss": 0.0725, "step": 46826 }, { "epoch": 0.8292844904302089, "grad_norm": 0.548109233379364, "learning_rate": 2.2346887831187244e-06, "loss": 0.0475, "step": 46827 }, { "epoch": 0.8293021999672373, "grad_norm": 0.5190895795822144, "learning_rate": 2.2342369990779264e-06, "loss": 0.037, "step": 46828 }, { "epoch": 0.8293199095042658, "grad_norm": 0.41251450777053833, "learning_rate": 2.2337852570351983e-06, "loss": 0.0396, "step": 46829 }, { "epoch": 0.8293376190412942, "grad_norm": 0.4879128932952881, "learning_rate": 2.2333335569920243e-06, "loss": 0.0423, "step": 46830 }, { "epoch": 0.8293553285783226, "grad_norm": 0.4934767782688141, "learning_rate": 2.232881898949893e-06, "loss": 0.0466, "step": 46831 }, { "epoch": 0.8293730381153511, "grad_norm": 0.4925490915775299, "learning_rate": 2.2324302829102893e-06, "loss": 0.038, "step": 46832 }, { "epoch": 0.8293907476523795, "grad_norm": 0.5272459983825684, "learning_rate": 2.2319787088746977e-06, "loss": 0.0343, "step": 46833 }, { "epoch": 0.8294084571894079, "grad_norm": 0.6350548267364502, "learning_rate": 2.231527176844607e-06, "loss": 0.0347, "step": 46834 }, { "epoch": 0.8294261667264363, "grad_norm": 0.8024747967720032, "learning_rate": 2.2310756868214994e-06, "loss": 0.055, "step": 46835 }, { "epoch": 0.8294438762634648, "grad_norm": 0.5586445927619934, "learning_rate": 2.2306242388068598e-06, "loss": 0.0457, "step": 46836 }, { "epoch": 0.8294615858004932, "grad_norm": 0.4035954177379608, "learning_rate": 2.2301728328021757e-06, "loss": 0.0231, "step": 46837 }, { "epoch": 0.8294792953375216, "grad_norm": 0.4007333815097809, "learning_rate": 2.2297214688089346e-06, "loss": 0.0446, "step": 46838 }, { "epoch": 0.82949700487455, "grad_norm": 0.5970226526260376, "learning_rate": 2.2292701468286116e-06, "loss": 0.0452, "step": 46839 }, { "epoch": 0.8295147144115785, "grad_norm": 0.4112245440483093, "learning_rate": 2.2288188668627003e-06, "loss": 0.0679, "step": 46840 }, { "epoch": 0.8295324239486069, "grad_norm": 0.4402986466884613, "learning_rate": 2.228367628912688e-06, "loss": 0.0389, "step": 46841 }, { "epoch": 0.8295501334856353, "grad_norm": 0.44927725195884705, "learning_rate": 2.2279164329800503e-06, "loss": 0.0563, "step": 46842 }, { "epoch": 0.8295678430226637, "grad_norm": 0.636598527431488, "learning_rate": 2.2274652790662746e-06, "loss": 0.0808, "step": 46843 }, { "epoch": 0.8295855525596922, "grad_norm": 0.500494122505188, "learning_rate": 2.227014167172848e-06, "loss": 0.0338, "step": 46844 }, { "epoch": 0.8296032620967206, "grad_norm": 0.41012540459632874, "learning_rate": 2.226563097301257e-06, "loss": 0.0487, "step": 46845 }, { "epoch": 0.829620971633749, "grad_norm": 0.7648283839225769, "learning_rate": 2.2261120694529757e-06, "loss": 0.0716, "step": 46846 }, { "epoch": 0.8296386811707775, "grad_norm": 0.5340782999992371, "learning_rate": 2.225661083629495e-06, "loss": 0.0312, "step": 46847 }, { "epoch": 0.829656390707806, "grad_norm": 0.4300466477870941, "learning_rate": 2.2252101398322983e-06, "loss": 0.0551, "step": 46848 }, { "epoch": 0.8296741002448343, "grad_norm": 0.5343079566955566, "learning_rate": 2.2247592380628664e-06, "loss": 0.0556, "step": 46849 }, { "epoch": 0.8296918097818627, "grad_norm": 0.37450796365737915, "learning_rate": 2.224308378322684e-06, "loss": 0.0613, "step": 46850 }, { "epoch": 0.8297095193188913, "grad_norm": 0.4707295298576355, "learning_rate": 2.223857560613236e-06, "loss": 0.0557, "step": 46851 }, { "epoch": 0.8297272288559197, "grad_norm": 0.8956328630447388, "learning_rate": 2.223406784936009e-06, "loss": 0.0603, "step": 46852 }, { "epoch": 0.829744938392948, "grad_norm": 0.6867386698722839, "learning_rate": 2.2229560512924767e-06, "loss": 0.0546, "step": 46853 }, { "epoch": 0.8297626479299764, "grad_norm": 0.34444838762283325, "learning_rate": 2.2225053596841264e-06, "loss": 0.0569, "step": 46854 }, { "epoch": 0.829780357467005, "grad_norm": 0.5557563900947571, "learning_rate": 2.2220547101124454e-06, "loss": 0.0334, "step": 46855 }, { "epoch": 0.8297980670040334, "grad_norm": 0.29121819138526917, "learning_rate": 2.2216041025789075e-06, "loss": 0.0547, "step": 46856 }, { "epoch": 0.8298157765410618, "grad_norm": 0.5122892260551453, "learning_rate": 2.2211535370849993e-06, "loss": 0.0248, "step": 46857 }, { "epoch": 0.8298334860780902, "grad_norm": 0.5847784876823425, "learning_rate": 2.2207030136322055e-06, "loss": 0.0683, "step": 46858 }, { "epoch": 0.8298511956151187, "grad_norm": 0.4950251281261444, "learning_rate": 2.2202525322220043e-06, "loss": 0.0452, "step": 46859 }, { "epoch": 0.8298689051521471, "grad_norm": 0.5478098392486572, "learning_rate": 2.219802092855881e-06, "loss": 0.0508, "step": 46860 }, { "epoch": 0.8298866146891755, "grad_norm": 0.6242804527282715, "learning_rate": 2.2193516955353147e-06, "loss": 0.0533, "step": 46861 }, { "epoch": 0.829904324226204, "grad_norm": 0.8043885827064514, "learning_rate": 2.2189013402617898e-06, "loss": 0.0596, "step": 46862 }, { "epoch": 0.8299220337632324, "grad_norm": 0.41045886278152466, "learning_rate": 2.218451027036791e-06, "loss": 0.0443, "step": 46863 }, { "epoch": 0.8299397433002608, "grad_norm": 0.5116745233535767, "learning_rate": 2.2180007558617905e-06, "loss": 0.0529, "step": 46864 }, { "epoch": 0.8299574528372892, "grad_norm": 0.5562860369682312, "learning_rate": 2.2175505267382757e-06, "loss": 0.0365, "step": 46865 }, { "epoch": 0.8299751623743177, "grad_norm": 0.519447922706604, "learning_rate": 2.2171003396677288e-06, "loss": 0.0692, "step": 46866 }, { "epoch": 0.8299928719113461, "grad_norm": 0.7112249135971069, "learning_rate": 2.2166501946516265e-06, "loss": 0.0487, "step": 46867 }, { "epoch": 0.8300105814483745, "grad_norm": 0.6554870009422302, "learning_rate": 2.2162000916914483e-06, "loss": 0.0711, "step": 46868 }, { "epoch": 0.8300282909854029, "grad_norm": 0.5676513314247131, "learning_rate": 2.2157500307886824e-06, "loss": 0.0447, "step": 46869 }, { "epoch": 0.8300460005224314, "grad_norm": 0.5566473603248596, "learning_rate": 2.2153000119448097e-06, "loss": 0.0484, "step": 46870 }, { "epoch": 0.8300637100594598, "grad_norm": 0.7894327640533447, "learning_rate": 2.2148500351613037e-06, "loss": 0.0594, "step": 46871 }, { "epoch": 0.8300814195964882, "grad_norm": 1.063652515411377, "learning_rate": 2.214400100439647e-06, "loss": 0.0707, "step": 46872 }, { "epoch": 0.8300991291335166, "grad_norm": 0.41382530331611633, "learning_rate": 2.2139502077813246e-06, "loss": 0.047, "step": 46873 }, { "epoch": 0.8301168386705451, "grad_norm": 0.6998462080955505, "learning_rate": 2.213500357187809e-06, "loss": 0.0687, "step": 46874 }, { "epoch": 0.8301345482075735, "grad_norm": 0.8868466019630432, "learning_rate": 2.213050548660585e-06, "loss": 0.0728, "step": 46875 }, { "epoch": 0.8301522577446019, "grad_norm": 0.6802955865859985, "learning_rate": 2.212600782201131e-06, "loss": 0.0767, "step": 46876 }, { "epoch": 0.8301699672816304, "grad_norm": 0.38286054134368896, "learning_rate": 2.212151057810926e-06, "loss": 0.0469, "step": 46877 }, { "epoch": 0.8301876768186588, "grad_norm": 0.5755757093429565, "learning_rate": 2.211701375491452e-06, "loss": 0.0516, "step": 46878 }, { "epoch": 0.8302053863556872, "grad_norm": 0.7593771815299988, "learning_rate": 2.2112517352441846e-06, "loss": 0.0546, "step": 46879 }, { "epoch": 0.8302230958927156, "grad_norm": 0.6604424715042114, "learning_rate": 2.210802137070612e-06, "loss": 0.0454, "step": 46880 }, { "epoch": 0.8302408054297441, "grad_norm": 0.5818885564804077, "learning_rate": 2.2103525809722015e-06, "loss": 0.0742, "step": 46881 }, { "epoch": 0.8302585149667725, "grad_norm": 0.9528688788414001, "learning_rate": 2.2099030669504376e-06, "loss": 0.0782, "step": 46882 }, { "epoch": 0.8302762245038009, "grad_norm": 0.8114929795265198, "learning_rate": 2.2094535950068e-06, "loss": 0.1139, "step": 46883 }, { "epoch": 0.8302939340408293, "grad_norm": 0.8104997277259827, "learning_rate": 2.209004165142769e-06, "loss": 0.0405, "step": 46884 }, { "epoch": 0.8303116435778578, "grad_norm": 0.4153570234775543, "learning_rate": 2.2085547773598163e-06, "loss": 0.0508, "step": 46885 }, { "epoch": 0.8303293531148862, "grad_norm": 0.37703120708465576, "learning_rate": 2.208105431659426e-06, "loss": 0.0426, "step": 46886 }, { "epoch": 0.8303470626519146, "grad_norm": 0.48170170187950134, "learning_rate": 2.2076561280430756e-06, "loss": 0.0572, "step": 46887 }, { "epoch": 0.830364772188943, "grad_norm": 0.6241121292114258, "learning_rate": 2.20720686651224e-06, "loss": 0.0754, "step": 46888 }, { "epoch": 0.8303824817259715, "grad_norm": 0.8359997868537903, "learning_rate": 2.206757647068402e-06, "loss": 0.0814, "step": 46889 }, { "epoch": 0.8304001912629999, "grad_norm": 0.43401139974594116, "learning_rate": 2.206308469713038e-06, "loss": 0.0562, "step": 46890 }, { "epoch": 0.8304179008000283, "grad_norm": 0.5856935381889343, "learning_rate": 2.2058593344476273e-06, "loss": 0.0554, "step": 46891 }, { "epoch": 0.8304356103370568, "grad_norm": 0.42242807149887085, "learning_rate": 2.205410241273642e-06, "loss": 0.0569, "step": 46892 }, { "epoch": 0.8304533198740852, "grad_norm": 0.43824130296707153, "learning_rate": 2.2049611901925625e-06, "loss": 0.043, "step": 46893 }, { "epoch": 0.8304710294111136, "grad_norm": 0.7426248788833618, "learning_rate": 2.204512181205868e-06, "loss": 0.0579, "step": 46894 }, { "epoch": 0.830488738948142, "grad_norm": 1.0960566997528076, "learning_rate": 2.2040632143150365e-06, "loss": 0.0614, "step": 46895 }, { "epoch": 0.8305064484851705, "grad_norm": 0.48074230551719666, "learning_rate": 2.203614289521539e-06, "loss": 0.0444, "step": 46896 }, { "epoch": 0.8305241580221989, "grad_norm": 0.662456750869751, "learning_rate": 2.203165406826854e-06, "loss": 0.0562, "step": 46897 }, { "epoch": 0.8305418675592273, "grad_norm": 0.394133597612381, "learning_rate": 2.202716566232466e-06, "loss": 0.0247, "step": 46898 }, { "epoch": 0.8305595770962557, "grad_norm": 0.5285132527351379, "learning_rate": 2.202267767739843e-06, "loss": 0.0518, "step": 46899 }, { "epoch": 0.8305772866332842, "grad_norm": 0.771098792552948, "learning_rate": 2.201819011350465e-06, "loss": 0.0665, "step": 46900 }, { "epoch": 0.8305949961703126, "grad_norm": 0.4748961925506592, "learning_rate": 2.201370297065808e-06, "loss": 0.0559, "step": 46901 }, { "epoch": 0.830612705707341, "grad_norm": 0.5398989319801331, "learning_rate": 2.2009216248873526e-06, "loss": 0.0516, "step": 46902 }, { "epoch": 0.8306304152443694, "grad_norm": 0.34627166390419006, "learning_rate": 2.2004729948165665e-06, "loss": 0.0529, "step": 46903 }, { "epoch": 0.8306481247813979, "grad_norm": 0.6337276697158813, "learning_rate": 2.200024406854928e-06, "loss": 0.0544, "step": 46904 }, { "epoch": 0.8306658343184263, "grad_norm": 0.59255450963974, "learning_rate": 2.1995758610039163e-06, "loss": 0.0655, "step": 46905 }, { "epoch": 0.8306835438554547, "grad_norm": 0.7856521606445312, "learning_rate": 2.199127357265005e-06, "loss": 0.0577, "step": 46906 }, { "epoch": 0.8307012533924832, "grad_norm": 0.5868585705757141, "learning_rate": 2.1986788956396695e-06, "loss": 0.068, "step": 46907 }, { "epoch": 0.8307189629295116, "grad_norm": 0.7568991184234619, "learning_rate": 2.198230476129386e-06, "loss": 0.06, "step": 46908 }, { "epoch": 0.83073667246654, "grad_norm": 0.5586929321289062, "learning_rate": 2.197782098735634e-06, "loss": 0.0423, "step": 46909 }, { "epoch": 0.8307543820035684, "grad_norm": 0.7691236138343811, "learning_rate": 2.1973337634598795e-06, "loss": 0.0673, "step": 46910 }, { "epoch": 0.830772091540597, "grad_norm": 0.46891269087791443, "learning_rate": 2.1968854703036013e-06, "loss": 0.0798, "step": 46911 }, { "epoch": 0.8307898010776253, "grad_norm": 0.2791033387184143, "learning_rate": 2.1964372192682798e-06, "loss": 0.0312, "step": 46912 }, { "epoch": 0.8308075106146537, "grad_norm": 0.4900376796722412, "learning_rate": 2.1959890103553803e-06, "loss": 0.0292, "step": 46913 }, { "epoch": 0.8308252201516821, "grad_norm": 0.5133681297302246, "learning_rate": 2.195540843566382e-06, "loss": 0.0619, "step": 46914 }, { "epoch": 0.8308429296887107, "grad_norm": 0.38985249400138855, "learning_rate": 2.195092718902759e-06, "loss": 0.0581, "step": 46915 }, { "epoch": 0.830860639225739, "grad_norm": 0.8096452355384827, "learning_rate": 2.194644636365987e-06, "loss": 0.0629, "step": 46916 }, { "epoch": 0.8308783487627674, "grad_norm": 0.29959890246391296, "learning_rate": 2.1941965959575383e-06, "loss": 0.0538, "step": 46917 }, { "epoch": 0.8308960582997958, "grad_norm": 0.9734693765640259, "learning_rate": 2.1937485976788874e-06, "loss": 0.073, "step": 46918 }, { "epoch": 0.8309137678368244, "grad_norm": 0.71172034740448, "learning_rate": 2.1933006415315114e-06, "loss": 0.0685, "step": 46919 }, { "epoch": 0.8309314773738528, "grad_norm": 0.5776835083961487, "learning_rate": 2.192852727516878e-06, "loss": 0.0628, "step": 46920 }, { "epoch": 0.8309491869108812, "grad_norm": 0.39690518379211426, "learning_rate": 2.192404855636464e-06, "loss": 0.0505, "step": 46921 }, { "epoch": 0.8309668964479097, "grad_norm": 0.8332929611206055, "learning_rate": 2.1919570258917434e-06, "loss": 0.0803, "step": 46922 }, { "epoch": 0.8309846059849381, "grad_norm": 0.454671174287796, "learning_rate": 2.191509238284191e-06, "loss": 0.031, "step": 46923 }, { "epoch": 0.8310023155219665, "grad_norm": 0.8268163204193115, "learning_rate": 2.191061492815273e-06, "loss": 0.0657, "step": 46924 }, { "epoch": 0.8310200250589949, "grad_norm": 0.7821038365364075, "learning_rate": 2.1906137894864703e-06, "loss": 0.0468, "step": 46925 }, { "epoch": 0.8310377345960234, "grad_norm": 0.4408535659313202, "learning_rate": 2.190166128299251e-06, "loss": 0.0646, "step": 46926 }, { "epoch": 0.8310554441330518, "grad_norm": 0.7329530715942383, "learning_rate": 2.1897185092550887e-06, "loss": 0.0508, "step": 46927 }, { "epoch": 0.8310731536700802, "grad_norm": 0.2005041390657425, "learning_rate": 2.1892709323554578e-06, "loss": 0.0417, "step": 46928 }, { "epoch": 0.8310908632071086, "grad_norm": 0.38381102681159973, "learning_rate": 2.1888233976018285e-06, "loss": 0.0355, "step": 46929 }, { "epoch": 0.8311085727441371, "grad_norm": 0.4161844253540039, "learning_rate": 2.18837590499568e-06, "loss": 0.0445, "step": 46930 }, { "epoch": 0.8311262822811655, "grad_norm": 0.7456984519958496, "learning_rate": 2.1879284545384736e-06, "loss": 0.0544, "step": 46931 }, { "epoch": 0.8311439918181939, "grad_norm": 0.899823009967804, "learning_rate": 2.1874810462316875e-06, "loss": 0.0387, "step": 46932 }, { "epoch": 0.8311617013552223, "grad_norm": 0.7853417992591858, "learning_rate": 2.187033680076793e-06, "loss": 0.0549, "step": 46933 }, { "epoch": 0.8311794108922508, "grad_norm": 0.4441468119621277, "learning_rate": 2.186586356075263e-06, "loss": 0.0563, "step": 46934 }, { "epoch": 0.8311971204292792, "grad_norm": 0.20462962985038757, "learning_rate": 2.1861390742285657e-06, "loss": 0.0305, "step": 46935 }, { "epoch": 0.8312148299663076, "grad_norm": 0.7624567747116089, "learning_rate": 2.1856918345381754e-06, "loss": 0.0695, "step": 46936 }, { "epoch": 0.8312325395033361, "grad_norm": 0.2041938751935959, "learning_rate": 2.1852446370055674e-06, "loss": 0.0328, "step": 46937 }, { "epoch": 0.8312502490403645, "grad_norm": 0.6813809275627136, "learning_rate": 2.1847974816322037e-06, "loss": 0.0568, "step": 46938 }, { "epoch": 0.8312679585773929, "grad_norm": 0.4157032072544098, "learning_rate": 2.1843503684195614e-06, "loss": 0.0199, "step": 46939 }, { "epoch": 0.8312856681144213, "grad_norm": 0.6130856275558472, "learning_rate": 2.183903297369108e-06, "loss": 0.1032, "step": 46940 }, { "epoch": 0.8313033776514498, "grad_norm": 0.6020322442054749, "learning_rate": 2.1834562684823223e-06, "loss": 0.0595, "step": 46941 }, { "epoch": 0.8313210871884782, "grad_norm": 0.721138596534729, "learning_rate": 2.1830092817606646e-06, "loss": 0.0622, "step": 46942 }, { "epoch": 0.8313387967255066, "grad_norm": 0.5304065942764282, "learning_rate": 2.18256233720561e-06, "loss": 0.0378, "step": 46943 }, { "epoch": 0.831356506262535, "grad_norm": 1.1857296228408813, "learning_rate": 2.1821154348186282e-06, "loss": 0.1067, "step": 46944 }, { "epoch": 0.8313742157995635, "grad_norm": 0.6534475684165955, "learning_rate": 2.181668574601191e-06, "loss": 0.0587, "step": 46945 }, { "epoch": 0.8313919253365919, "grad_norm": 0.8174201250076294, "learning_rate": 2.1812217565547686e-06, "loss": 0.0562, "step": 46946 }, { "epoch": 0.8314096348736203, "grad_norm": 0.4127821922302246, "learning_rate": 2.1807749806808284e-06, "loss": 0.0677, "step": 46947 }, { "epoch": 0.8314273444106487, "grad_norm": 0.5925143361091614, "learning_rate": 2.1803282469808462e-06, "loss": 0.062, "step": 46948 }, { "epoch": 0.8314450539476772, "grad_norm": 0.6212412118911743, "learning_rate": 2.1798815554562833e-06, "loss": 0.0555, "step": 46949 }, { "epoch": 0.8314627634847056, "grad_norm": 0.7228350639343262, "learning_rate": 2.1794349061086142e-06, "loss": 0.0681, "step": 46950 }, { "epoch": 0.831480473021734, "grad_norm": 0.500665009021759, "learning_rate": 2.178988298939311e-06, "loss": 0.0597, "step": 46951 }, { "epoch": 0.8314981825587625, "grad_norm": 0.6471970081329346, "learning_rate": 2.1785417339498357e-06, "loss": 0.079, "step": 46952 }, { "epoch": 0.8315158920957909, "grad_norm": 0.4601961672306061, "learning_rate": 2.1780952111416625e-06, "loss": 0.0542, "step": 46953 }, { "epoch": 0.8315336016328193, "grad_norm": 0.5259196162223816, "learning_rate": 2.1776487305162578e-06, "loss": 0.0619, "step": 46954 }, { "epoch": 0.8315513111698477, "grad_norm": 0.9207367897033691, "learning_rate": 2.177202292075091e-06, "loss": 0.079, "step": 46955 }, { "epoch": 0.8315690207068762, "grad_norm": 0.6788803935050964, "learning_rate": 2.1767558958196342e-06, "loss": 0.0624, "step": 46956 }, { "epoch": 0.8315867302439046, "grad_norm": 0.7630009651184082, "learning_rate": 2.1763095417513513e-06, "loss": 0.0563, "step": 46957 }, { "epoch": 0.831604439780933, "grad_norm": 0.7567782402038574, "learning_rate": 2.1758632298717173e-06, "loss": 0.0742, "step": 46958 }, { "epoch": 0.8316221493179614, "grad_norm": 0.45053842663764954, "learning_rate": 2.1754169601821935e-06, "loss": 0.0411, "step": 46959 }, { "epoch": 0.8316398588549899, "grad_norm": 0.32604020833969116, "learning_rate": 2.1749707326842494e-06, "loss": 0.067, "step": 46960 }, { "epoch": 0.8316575683920183, "grad_norm": 0.7554330229759216, "learning_rate": 2.1745245473793563e-06, "loss": 0.0567, "step": 46961 }, { "epoch": 0.8316752779290467, "grad_norm": 0.5867983102798462, "learning_rate": 2.174078404268981e-06, "loss": 0.0589, "step": 46962 }, { "epoch": 0.8316929874660751, "grad_norm": 0.604188084602356, "learning_rate": 2.1736323033545856e-06, "loss": 0.0296, "step": 46963 }, { "epoch": 0.8317106970031036, "grad_norm": 0.6595004200935364, "learning_rate": 2.173186244637646e-06, "loss": 0.0638, "step": 46964 }, { "epoch": 0.831728406540132, "grad_norm": 0.7445468902587891, "learning_rate": 2.172740228119626e-06, "loss": 0.0485, "step": 46965 }, { "epoch": 0.8317461160771604, "grad_norm": 0.5345577597618103, "learning_rate": 2.172294253801995e-06, "loss": 0.0593, "step": 46966 }, { "epoch": 0.8317638256141889, "grad_norm": 0.885607898235321, "learning_rate": 2.171848321686217e-06, "loss": 0.0467, "step": 46967 }, { "epoch": 0.8317815351512173, "grad_norm": 0.26934996247291565, "learning_rate": 2.171402431773761e-06, "loss": 0.0484, "step": 46968 }, { "epoch": 0.8317992446882457, "grad_norm": 0.49307969212532043, "learning_rate": 2.170956584066096e-06, "loss": 0.0371, "step": 46969 }, { "epoch": 0.8318169542252741, "grad_norm": 0.594619870185852, "learning_rate": 2.1705107785646814e-06, "loss": 0.0483, "step": 46970 }, { "epoch": 0.8318346637623026, "grad_norm": 0.9560438394546509, "learning_rate": 2.1700650152709913e-06, "loss": 0.0621, "step": 46971 }, { "epoch": 0.831852373299331, "grad_norm": 0.6647818684577942, "learning_rate": 2.1696192941864885e-06, "loss": 0.0667, "step": 46972 }, { "epoch": 0.8318700828363594, "grad_norm": 0.46101152896881104, "learning_rate": 2.1691736153126407e-06, "loss": 0.0433, "step": 46973 }, { "epoch": 0.8318877923733878, "grad_norm": 0.41550129652023315, "learning_rate": 2.1687279786509136e-06, "loss": 0.0432, "step": 46974 }, { "epoch": 0.8319055019104163, "grad_norm": 0.6526923179626465, "learning_rate": 2.168282384202772e-06, "loss": 0.0512, "step": 46975 }, { "epoch": 0.8319232114474447, "grad_norm": 0.8382448554039001, "learning_rate": 2.167836831969689e-06, "loss": 0.0676, "step": 46976 }, { "epoch": 0.8319409209844731, "grad_norm": 0.5179730653762817, "learning_rate": 2.1673913219531212e-06, "loss": 0.051, "step": 46977 }, { "epoch": 0.8319586305215015, "grad_norm": 0.6378034949302673, "learning_rate": 2.166945854154539e-06, "loss": 0.0565, "step": 46978 }, { "epoch": 0.83197634005853, "grad_norm": 0.24274660646915436, "learning_rate": 2.166500428575405e-06, "loss": 0.0492, "step": 46979 }, { "epoch": 0.8319940495955584, "grad_norm": 0.49597087502479553, "learning_rate": 2.166055045217191e-06, "loss": 0.051, "step": 46980 }, { "epoch": 0.8320117591325868, "grad_norm": 0.8623162508010864, "learning_rate": 2.165609704081354e-06, "loss": 0.0867, "step": 46981 }, { "epoch": 0.8320294686696154, "grad_norm": 0.44015294313430786, "learning_rate": 2.165164405169363e-06, "loss": 0.0798, "step": 46982 }, { "epoch": 0.8320471782066438, "grad_norm": 0.3278641998767853, "learning_rate": 2.164719148482684e-06, "loss": 0.0325, "step": 46983 }, { "epoch": 0.8320648877436722, "grad_norm": 0.569781482219696, "learning_rate": 2.16427393402278e-06, "loss": 0.049, "step": 46984 }, { "epoch": 0.8320825972807006, "grad_norm": 0.7594946026802063, "learning_rate": 2.1638287617911156e-06, "loss": 0.0588, "step": 46985 }, { "epoch": 0.8321003068177291, "grad_norm": 0.8536362648010254, "learning_rate": 2.1633836317891577e-06, "loss": 0.0861, "step": 46986 }, { "epoch": 0.8321180163547575, "grad_norm": 0.8587611317634583, "learning_rate": 2.162938544018373e-06, "loss": 0.0613, "step": 46987 }, { "epoch": 0.8321357258917859, "grad_norm": 0.40570002794265747, "learning_rate": 2.1624934984802177e-06, "loss": 0.0467, "step": 46988 }, { "epoch": 0.8321534354288143, "grad_norm": 1.064115047454834, "learning_rate": 2.16204849517616e-06, "loss": 0.0895, "step": 46989 }, { "epoch": 0.8321711449658428, "grad_norm": 0.12250755727291107, "learning_rate": 2.161603534107669e-06, "loss": 0.0343, "step": 46990 }, { "epoch": 0.8321888545028712, "grad_norm": 0.44737860560417175, "learning_rate": 2.1611586152761985e-06, "loss": 0.0617, "step": 46991 }, { "epoch": 0.8322065640398996, "grad_norm": 0.7357984185218811, "learning_rate": 2.160713738683216e-06, "loss": 0.0626, "step": 46992 }, { "epoch": 0.832224273576928, "grad_norm": 0.8019684553146362, "learning_rate": 2.160268904330188e-06, "loss": 0.0498, "step": 46993 }, { "epoch": 0.8322419831139565, "grad_norm": 0.6223999261856079, "learning_rate": 2.1598241122185826e-06, "loss": 0.0472, "step": 46994 }, { "epoch": 0.8322596926509849, "grad_norm": 0.8469865918159485, "learning_rate": 2.1593793623498525e-06, "loss": 0.057, "step": 46995 }, { "epoch": 0.8322774021880133, "grad_norm": 0.5560770630836487, "learning_rate": 2.158934654725466e-06, "loss": 0.049, "step": 46996 }, { "epoch": 0.8322951117250418, "grad_norm": 0.40867388248443604, "learning_rate": 2.1584899893468856e-06, "loss": 0.0599, "step": 46997 }, { "epoch": 0.8323128212620702, "grad_norm": 0.6920099258422852, "learning_rate": 2.158045366215576e-06, "loss": 0.0375, "step": 46998 }, { "epoch": 0.8323305307990986, "grad_norm": 0.5168778300285339, "learning_rate": 2.1576007853329967e-06, "loss": 0.0371, "step": 46999 }, { "epoch": 0.832348240336127, "grad_norm": 0.4825283885002136, "learning_rate": 2.1571562467006116e-06, "loss": 0.0549, "step": 47000 }, { "epoch": 0.8323659498731555, "grad_norm": 0.7054473757743835, "learning_rate": 2.156711750319884e-06, "loss": 0.0577, "step": 47001 }, { "epoch": 0.8323836594101839, "grad_norm": 0.39635249972343445, "learning_rate": 2.1562672961922743e-06, "loss": 0.0635, "step": 47002 }, { "epoch": 0.8324013689472123, "grad_norm": 0.7247903347015381, "learning_rate": 2.1558228843192473e-06, "loss": 0.0469, "step": 47003 }, { "epoch": 0.8324190784842407, "grad_norm": 0.21826156973838806, "learning_rate": 2.1553785147022644e-06, "loss": 0.0632, "step": 47004 }, { "epoch": 0.8324367880212692, "grad_norm": 0.4633188843727112, "learning_rate": 2.1549341873427887e-06, "loss": 0.0534, "step": 47005 }, { "epoch": 0.8324544975582976, "grad_norm": 0.6404669284820557, "learning_rate": 2.1544899022422776e-06, "loss": 0.0639, "step": 47006 }, { "epoch": 0.832472207095326, "grad_norm": 0.8229680061340332, "learning_rate": 2.154045659402195e-06, "loss": 0.0519, "step": 47007 }, { "epoch": 0.8324899166323544, "grad_norm": 0.6207656860351562, "learning_rate": 2.1536014588240085e-06, "loss": 0.0353, "step": 47008 }, { "epoch": 0.8325076261693829, "grad_norm": 0.5707899332046509, "learning_rate": 2.1531573005091677e-06, "loss": 0.0808, "step": 47009 }, { "epoch": 0.8325253357064113, "grad_norm": 0.5385787487030029, "learning_rate": 2.152713184459142e-06, "loss": 0.057, "step": 47010 }, { "epoch": 0.8325430452434397, "grad_norm": 0.48237594962120056, "learning_rate": 2.152269110675388e-06, "loss": 0.0589, "step": 47011 }, { "epoch": 0.8325607547804682, "grad_norm": 0.5424659848213196, "learning_rate": 2.1518250791593725e-06, "loss": 0.0463, "step": 47012 }, { "epoch": 0.8325784643174966, "grad_norm": 0.6616436839103699, "learning_rate": 2.1513810899125515e-06, "loss": 0.0397, "step": 47013 }, { "epoch": 0.832596173854525, "grad_norm": 0.3126750886440277, "learning_rate": 2.1509371429363855e-06, "loss": 0.0357, "step": 47014 }, { "epoch": 0.8326138833915534, "grad_norm": 0.350343257188797, "learning_rate": 2.1504932382323422e-06, "loss": 0.0615, "step": 47015 }, { "epoch": 0.8326315929285819, "grad_norm": 0.5632583498954773, "learning_rate": 2.150049375801872e-06, "loss": 0.0449, "step": 47016 }, { "epoch": 0.8326493024656103, "grad_norm": 0.1744874268770218, "learning_rate": 2.14960555564644e-06, "loss": 0.0555, "step": 47017 }, { "epoch": 0.8326670120026387, "grad_norm": 0.5531653761863708, "learning_rate": 2.149161777767507e-06, "loss": 0.0623, "step": 47018 }, { "epoch": 0.8326847215396671, "grad_norm": 0.7418238520622253, "learning_rate": 2.1487180421665337e-06, "loss": 0.0562, "step": 47019 }, { "epoch": 0.8327024310766956, "grad_norm": 0.5935494899749756, "learning_rate": 2.1482743488449767e-06, "loss": 0.0554, "step": 47020 }, { "epoch": 0.832720140613724, "grad_norm": 0.43765902519226074, "learning_rate": 2.1478306978042924e-06, "loss": 0.0375, "step": 47021 }, { "epoch": 0.8327378501507524, "grad_norm": 0.6201606392860413, "learning_rate": 2.147387089045952e-06, "loss": 0.0559, "step": 47022 }, { "epoch": 0.8327555596877808, "grad_norm": 0.4587598145008087, "learning_rate": 2.1469435225714045e-06, "loss": 0.0562, "step": 47023 }, { "epoch": 0.8327732692248093, "grad_norm": 0.6560243964195251, "learning_rate": 2.1464999983821142e-06, "loss": 0.0624, "step": 47024 }, { "epoch": 0.8327909787618377, "grad_norm": 0.6380679607391357, "learning_rate": 2.1460565164795383e-06, "loss": 0.0578, "step": 47025 }, { "epoch": 0.8328086882988661, "grad_norm": 0.5878050327301025, "learning_rate": 2.145613076865138e-06, "loss": 0.0631, "step": 47026 }, { "epoch": 0.8328263978358946, "grad_norm": 0.6501154899597168, "learning_rate": 2.1451696795403685e-06, "loss": 0.0606, "step": 47027 }, { "epoch": 0.832844107372923, "grad_norm": 0.5731006264686584, "learning_rate": 2.144726324506691e-06, "loss": 0.0472, "step": 47028 }, { "epoch": 0.8328618169099514, "grad_norm": 0.20038586854934692, "learning_rate": 2.144283011765563e-06, "loss": 0.0447, "step": 47029 }, { "epoch": 0.8328795264469798, "grad_norm": 0.7510401606559753, "learning_rate": 2.143839741318445e-06, "loss": 0.0715, "step": 47030 }, { "epoch": 0.8328972359840083, "grad_norm": 0.7240797877311707, "learning_rate": 2.1433965131667927e-06, "loss": 0.0529, "step": 47031 }, { "epoch": 0.8329149455210367, "grad_norm": 0.45994290709495544, "learning_rate": 2.1429533273120646e-06, "loss": 0.0346, "step": 47032 }, { "epoch": 0.8329326550580651, "grad_norm": 0.5950118899345398, "learning_rate": 2.142510183755724e-06, "loss": 0.0467, "step": 47033 }, { "epoch": 0.8329503645950935, "grad_norm": 0.6092032194137573, "learning_rate": 2.142067082499221e-06, "loss": 0.0666, "step": 47034 }, { "epoch": 0.832968074132122, "grad_norm": 0.6355505585670471, "learning_rate": 2.141624023544017e-06, "loss": 0.0442, "step": 47035 }, { "epoch": 0.8329857836691504, "grad_norm": 0.6310681700706482, "learning_rate": 2.1411810068915694e-06, "loss": 0.0504, "step": 47036 }, { "epoch": 0.8330034932061788, "grad_norm": 0.6413637399673462, "learning_rate": 2.1407380325433403e-06, "loss": 0.0606, "step": 47037 }, { "epoch": 0.8330212027432072, "grad_norm": 0.7347878217697144, "learning_rate": 2.1402951005007766e-06, "loss": 0.06, "step": 47038 }, { "epoch": 0.8330389122802357, "grad_norm": 0.5373702049255371, "learning_rate": 2.1398522107653422e-06, "loss": 0.0362, "step": 47039 }, { "epoch": 0.8330566218172641, "grad_norm": 1.1970237493515015, "learning_rate": 2.1394093633384947e-06, "loss": 0.0812, "step": 47040 }, { "epoch": 0.8330743313542925, "grad_norm": 0.8436493277549744, "learning_rate": 2.138966558221688e-06, "loss": 0.0813, "step": 47041 }, { "epoch": 0.833092040891321, "grad_norm": 0.5557639598846436, "learning_rate": 2.1385237954163815e-06, "loss": 0.0687, "step": 47042 }, { "epoch": 0.8331097504283494, "grad_norm": 0.4522665739059448, "learning_rate": 2.1380810749240304e-06, "loss": 0.0553, "step": 47043 }, { "epoch": 0.8331274599653778, "grad_norm": 1.0712698698043823, "learning_rate": 2.1376383967460946e-06, "loss": 0.061, "step": 47044 }, { "epoch": 0.8331451695024062, "grad_norm": 0.3057374060153961, "learning_rate": 2.137195760884024e-06, "loss": 0.0598, "step": 47045 }, { "epoch": 0.8331628790394348, "grad_norm": 0.5512407422065735, "learning_rate": 2.1367531673392787e-06, "loss": 0.0532, "step": 47046 }, { "epoch": 0.8331805885764632, "grad_norm": 0.42035964131355286, "learning_rate": 2.1363106161133184e-06, "loss": 0.0581, "step": 47047 }, { "epoch": 0.8331982981134916, "grad_norm": 0.6183440089225769, "learning_rate": 2.135868107207592e-06, "loss": 0.0512, "step": 47048 }, { "epoch": 0.83321600765052, "grad_norm": 0.3034287989139557, "learning_rate": 2.1354256406235566e-06, "loss": 0.0412, "step": 47049 }, { "epoch": 0.8332337171875485, "grad_norm": 0.1672743260860443, "learning_rate": 2.134983216362668e-06, "loss": 0.0452, "step": 47050 }, { "epoch": 0.8332514267245769, "grad_norm": 0.4075831174850464, "learning_rate": 2.13454083442639e-06, "loss": 0.0358, "step": 47051 }, { "epoch": 0.8332691362616053, "grad_norm": 0.6173322200775146, "learning_rate": 2.1340984948161678e-06, "loss": 0.0588, "step": 47052 }, { "epoch": 0.8332868457986337, "grad_norm": 0.5868191123008728, "learning_rate": 2.133656197533459e-06, "loss": 0.0429, "step": 47053 }, { "epoch": 0.8333045553356622, "grad_norm": 0.7339324951171875, "learning_rate": 2.133213942579725e-06, "loss": 0.0395, "step": 47054 }, { "epoch": 0.8333222648726906, "grad_norm": 0.6298981308937073, "learning_rate": 2.1327717299564124e-06, "loss": 0.0508, "step": 47055 }, { "epoch": 0.833339974409719, "grad_norm": 0.47241270542144775, "learning_rate": 2.1323295596649777e-06, "loss": 0.0399, "step": 47056 }, { "epoch": 0.8333576839467475, "grad_norm": 0.6855455636978149, "learning_rate": 2.1318874317068783e-06, "loss": 0.038, "step": 47057 }, { "epoch": 0.8333753934837759, "grad_norm": 0.02058715932071209, "learning_rate": 2.1314453460835674e-06, "loss": 0.0631, "step": 47058 }, { "epoch": 0.8333931030208043, "grad_norm": 0.5815222263336182, "learning_rate": 2.1310033027964997e-06, "loss": 0.0535, "step": 47059 }, { "epoch": 0.8334108125578327, "grad_norm": 0.5000522136688232, "learning_rate": 2.1305613018471305e-06, "loss": 0.0587, "step": 47060 }, { "epoch": 0.8334285220948612, "grad_norm": 0.7941058874130249, "learning_rate": 2.1301193432369165e-06, "loss": 0.07, "step": 47061 }, { "epoch": 0.8334462316318896, "grad_norm": 0.7982124090194702, "learning_rate": 2.1296774269673038e-06, "loss": 0.0647, "step": 47062 }, { "epoch": 0.833463941168918, "grad_norm": 0.7550958395004272, "learning_rate": 2.1292355530397504e-06, "loss": 0.0616, "step": 47063 }, { "epoch": 0.8334816507059464, "grad_norm": 0.8983702063560486, "learning_rate": 2.1287937214557095e-06, "loss": 0.0635, "step": 47064 }, { "epoch": 0.8334993602429749, "grad_norm": 0.5618849396705627, "learning_rate": 2.1283519322166413e-06, "loss": 0.0398, "step": 47065 }, { "epoch": 0.8335170697800033, "grad_norm": 0.43218743801116943, "learning_rate": 2.1279101853239875e-06, "loss": 0.0362, "step": 47066 }, { "epoch": 0.8335347793170317, "grad_norm": 0.7814365029335022, "learning_rate": 2.127468480779209e-06, "loss": 0.0378, "step": 47067 }, { "epoch": 0.8335524888540601, "grad_norm": 0.552479088306427, "learning_rate": 2.127026818583756e-06, "loss": 0.0527, "step": 47068 }, { "epoch": 0.8335701983910886, "grad_norm": 0.8352043032646179, "learning_rate": 2.126585198739083e-06, "loss": 0.0626, "step": 47069 }, { "epoch": 0.833587907928117, "grad_norm": 0.6600586771965027, "learning_rate": 2.126143621246643e-06, "loss": 0.0476, "step": 47070 }, { "epoch": 0.8336056174651454, "grad_norm": 0.6128355264663696, "learning_rate": 2.1257020861078887e-06, "loss": 0.0453, "step": 47071 }, { "epoch": 0.8336233270021739, "grad_norm": 0.4953824281692505, "learning_rate": 2.1252605933242754e-06, "loss": 0.0674, "step": 47072 }, { "epoch": 0.8336410365392023, "grad_norm": 0.37086600065231323, "learning_rate": 2.1248191428972486e-06, "loss": 0.0505, "step": 47073 }, { "epoch": 0.8336587460762307, "grad_norm": 0.6548470258712769, "learning_rate": 2.1243777348282655e-06, "loss": 0.0527, "step": 47074 }, { "epoch": 0.8336764556132591, "grad_norm": 0.961046576499939, "learning_rate": 2.1239363691187767e-06, "loss": 0.0763, "step": 47075 }, { "epoch": 0.8336941651502876, "grad_norm": 0.49191394448280334, "learning_rate": 2.1234950457702378e-06, "loss": 0.0464, "step": 47076 }, { "epoch": 0.833711874687316, "grad_norm": 0.7283874154090881, "learning_rate": 2.123053764784096e-06, "loss": 0.038, "step": 47077 }, { "epoch": 0.8337295842243444, "grad_norm": 0.18085570633411407, "learning_rate": 2.1226125261618046e-06, "loss": 0.0539, "step": 47078 }, { "epoch": 0.8337472937613728, "grad_norm": 0.6724671721458435, "learning_rate": 2.1221713299048144e-06, "loss": 0.0569, "step": 47079 }, { "epoch": 0.8337650032984013, "grad_norm": 0.6388694643974304, "learning_rate": 2.121730176014578e-06, "loss": 0.0409, "step": 47080 }, { "epoch": 0.8337827128354297, "grad_norm": 0.6056703329086304, "learning_rate": 2.121289064492547e-06, "loss": 0.0795, "step": 47081 }, { "epoch": 0.8338004223724581, "grad_norm": 0.5678777694702148, "learning_rate": 2.1208479953401737e-06, "loss": 0.0811, "step": 47082 }, { "epoch": 0.8338181319094865, "grad_norm": 0.6152296662330627, "learning_rate": 2.1204069685589104e-06, "loss": 0.0569, "step": 47083 }, { "epoch": 0.833835841446515, "grad_norm": 0.6216902732849121, "learning_rate": 2.1199659841502013e-06, "loss": 0.0701, "step": 47084 }, { "epoch": 0.8338535509835434, "grad_norm": 0.6583664417266846, "learning_rate": 2.119525042115503e-06, "loss": 0.045, "step": 47085 }, { "epoch": 0.8338712605205718, "grad_norm": 0.6142829656600952, "learning_rate": 2.1190841424562658e-06, "loss": 0.0497, "step": 47086 }, { "epoch": 0.8338889700576003, "grad_norm": 0.5376850962638855, "learning_rate": 2.118643285173934e-06, "loss": 0.0638, "step": 47087 }, { "epoch": 0.8339066795946287, "grad_norm": 0.3458876311779022, "learning_rate": 2.118202470269965e-06, "loss": 0.0455, "step": 47088 }, { "epoch": 0.8339243891316571, "grad_norm": 0.659030020236969, "learning_rate": 2.117761697745807e-06, "loss": 0.0792, "step": 47089 }, { "epoch": 0.8339420986686855, "grad_norm": 0.4811980426311493, "learning_rate": 2.117320967602914e-06, "loss": 0.0551, "step": 47090 }, { "epoch": 0.833959808205714, "grad_norm": 0.2020990401506424, "learning_rate": 2.116880279842729e-06, "loss": 0.0408, "step": 47091 }, { "epoch": 0.8339775177427424, "grad_norm": 0.4273005723953247, "learning_rate": 2.1164396344667047e-06, "loss": 0.0579, "step": 47092 }, { "epoch": 0.8339952272797708, "grad_norm": 0.510216236114502, "learning_rate": 2.115999031476294e-06, "loss": 0.0556, "step": 47093 }, { "epoch": 0.8340129368167992, "grad_norm": 0.37255698442459106, "learning_rate": 2.1155584708729403e-06, "loss": 0.0311, "step": 47094 }, { "epoch": 0.8340306463538277, "grad_norm": 0.7489438056945801, "learning_rate": 2.115117952658096e-06, "loss": 0.0449, "step": 47095 }, { "epoch": 0.8340483558908561, "grad_norm": 0.40382808446884155, "learning_rate": 2.11467747683321e-06, "loss": 0.0508, "step": 47096 }, { "epoch": 0.8340660654278845, "grad_norm": 0.6932742595672607, "learning_rate": 2.114237043399731e-06, "loss": 0.077, "step": 47097 }, { "epoch": 0.8340837749649129, "grad_norm": 0.5254323482513428, "learning_rate": 2.1137966523591095e-06, "loss": 0.0395, "step": 47098 }, { "epoch": 0.8341014845019414, "grad_norm": 1.140816569328308, "learning_rate": 2.1133563037127947e-06, "loss": 0.065, "step": 47099 }, { "epoch": 0.8341191940389698, "grad_norm": 0.662868082523346, "learning_rate": 2.112915997462233e-06, "loss": 0.044, "step": 47100 }, { "epoch": 0.8341369035759982, "grad_norm": 0.5810996890068054, "learning_rate": 2.112475733608879e-06, "loss": 0.0387, "step": 47101 }, { "epoch": 0.8341546131130267, "grad_norm": 0.4876070022583008, "learning_rate": 2.1120355121541714e-06, "loss": 0.0577, "step": 47102 }, { "epoch": 0.8341723226500551, "grad_norm": 0.744632363319397, "learning_rate": 2.111595333099565e-06, "loss": 0.0435, "step": 47103 }, { "epoch": 0.8341900321870835, "grad_norm": 0.7037315368652344, "learning_rate": 2.1111551964465082e-06, "loss": 0.0833, "step": 47104 }, { "epoch": 0.8342077417241119, "grad_norm": 0.6420142650604248, "learning_rate": 2.1107151021964438e-06, "loss": 0.0535, "step": 47105 }, { "epoch": 0.8342254512611404, "grad_norm": 0.5502688884735107, "learning_rate": 2.1102750503508244e-06, "loss": 0.0487, "step": 47106 }, { "epoch": 0.8342431607981688, "grad_norm": 0.36970391869544983, "learning_rate": 2.1098350409110955e-06, "loss": 0.0508, "step": 47107 }, { "epoch": 0.8342608703351972, "grad_norm": 0.4278024733066559, "learning_rate": 2.109395073878706e-06, "loss": 0.0528, "step": 47108 }, { "epoch": 0.8342785798722256, "grad_norm": 0.5810709595680237, "learning_rate": 2.108955149255103e-06, "loss": 0.0516, "step": 47109 }, { "epoch": 0.8342962894092542, "grad_norm": 0.852524995803833, "learning_rate": 2.1085152670417335e-06, "loss": 0.0432, "step": 47110 }, { "epoch": 0.8343139989462826, "grad_norm": 0.9188588857650757, "learning_rate": 2.1080754272400467e-06, "loss": 0.0521, "step": 47111 }, { "epoch": 0.834331708483311, "grad_norm": 0.5369526147842407, "learning_rate": 2.107635629851486e-06, "loss": 0.0574, "step": 47112 }, { "epoch": 0.8343494180203394, "grad_norm": 0.9021387696266174, "learning_rate": 2.1071958748775007e-06, "loss": 0.0607, "step": 47113 }, { "epoch": 0.8343671275573679, "grad_norm": 0.8444613814353943, "learning_rate": 2.106756162319536e-06, "loss": 0.063, "step": 47114 }, { "epoch": 0.8343848370943963, "grad_norm": 0.39135992527008057, "learning_rate": 2.106316492179042e-06, "loss": 0.0679, "step": 47115 }, { "epoch": 0.8344025466314247, "grad_norm": 0.6092661023139954, "learning_rate": 2.105876864457457e-06, "loss": 0.0519, "step": 47116 }, { "epoch": 0.8344202561684532, "grad_norm": 0.9985671043395996, "learning_rate": 2.105437279156236e-06, "loss": 0.0422, "step": 47117 }, { "epoch": 0.8344379657054816, "grad_norm": 0.6638084053993225, "learning_rate": 2.1049977362768256e-06, "loss": 0.0472, "step": 47118 }, { "epoch": 0.83445567524251, "grad_norm": 0.17104536294937134, "learning_rate": 2.1045582358206656e-06, "loss": 0.0526, "step": 47119 }, { "epoch": 0.8344733847795384, "grad_norm": 0.5618469715118408, "learning_rate": 2.104118777789204e-06, "loss": 0.0452, "step": 47120 }, { "epoch": 0.8344910943165669, "grad_norm": 0.6209607124328613, "learning_rate": 2.1036793621838885e-06, "loss": 0.0347, "step": 47121 }, { "epoch": 0.8345088038535953, "grad_norm": 0.6758988499641418, "learning_rate": 2.103239989006166e-06, "loss": 0.0468, "step": 47122 }, { "epoch": 0.8345265133906237, "grad_norm": 0.5990940928459167, "learning_rate": 2.1028006582574756e-06, "loss": 0.038, "step": 47123 }, { "epoch": 0.8345442229276521, "grad_norm": 0.6658918857574463, "learning_rate": 2.102361369939266e-06, "loss": 0.0599, "step": 47124 }, { "epoch": 0.8345619324646806, "grad_norm": 0.7917517423629761, "learning_rate": 2.101922124052984e-06, "loss": 0.0788, "step": 47125 }, { "epoch": 0.834579642001709, "grad_norm": 0.7974986433982849, "learning_rate": 2.101482920600074e-06, "loss": 0.0507, "step": 47126 }, { "epoch": 0.8345973515387374, "grad_norm": 0.2799181044101715, "learning_rate": 2.1010437595819797e-06, "loss": 0.0587, "step": 47127 }, { "epoch": 0.8346150610757658, "grad_norm": 0.6084710955619812, "learning_rate": 2.1006046410001483e-06, "loss": 0.057, "step": 47128 }, { "epoch": 0.8346327706127943, "grad_norm": 0.13092784583568573, "learning_rate": 2.1001655648560253e-06, "loss": 0.0314, "step": 47129 }, { "epoch": 0.8346504801498227, "grad_norm": 0.38205575942993164, "learning_rate": 2.0997265311510495e-06, "loss": 0.0758, "step": 47130 }, { "epoch": 0.8346681896868511, "grad_norm": 0.7118500471115112, "learning_rate": 2.099287539886668e-06, "loss": 0.0707, "step": 47131 }, { "epoch": 0.8346858992238796, "grad_norm": 0.33972859382629395, "learning_rate": 2.0988485910643295e-06, "loss": 0.0442, "step": 47132 }, { "epoch": 0.834703608760908, "grad_norm": 0.4919033944606781, "learning_rate": 2.0984096846854717e-06, "loss": 0.0554, "step": 47133 }, { "epoch": 0.8347213182979364, "grad_norm": 0.7419977784156799, "learning_rate": 2.09797082075154e-06, "loss": 0.0731, "step": 47134 }, { "epoch": 0.8347390278349648, "grad_norm": 0.5855834484100342, "learning_rate": 2.097531999263979e-06, "loss": 0.0366, "step": 47135 }, { "epoch": 0.8347567373719933, "grad_norm": 0.5898536443710327, "learning_rate": 2.097093220224234e-06, "loss": 0.0692, "step": 47136 }, { "epoch": 0.8347744469090217, "grad_norm": 0.4227801263332367, "learning_rate": 2.0966544836337457e-06, "loss": 0.0662, "step": 47137 }, { "epoch": 0.8347921564460501, "grad_norm": 0.8626636266708374, "learning_rate": 2.096215789493961e-06, "loss": 0.0722, "step": 47138 }, { "epoch": 0.8348098659830785, "grad_norm": 0.5360986590385437, "learning_rate": 2.0957771378063193e-06, "loss": 0.0727, "step": 47139 }, { "epoch": 0.834827575520107, "grad_norm": 1.0840671062469482, "learning_rate": 2.0953385285722704e-06, "loss": 0.0862, "step": 47140 }, { "epoch": 0.8348452850571354, "grad_norm": 0.6334700584411621, "learning_rate": 2.094899961793249e-06, "loss": 0.0675, "step": 47141 }, { "epoch": 0.8348629945941638, "grad_norm": 0.7254167199134827, "learning_rate": 2.094461437470701e-06, "loss": 0.0719, "step": 47142 }, { "epoch": 0.8348807041311922, "grad_norm": 0.8268482089042664, "learning_rate": 2.0940229556060734e-06, "loss": 0.0644, "step": 47143 }, { "epoch": 0.8348984136682207, "grad_norm": 0.6530988812446594, "learning_rate": 2.093584516200802e-06, "loss": 0.0817, "step": 47144 }, { "epoch": 0.8349161232052491, "grad_norm": 0.30528998374938965, "learning_rate": 2.0931461192563274e-06, "loss": 0.0451, "step": 47145 }, { "epoch": 0.8349338327422775, "grad_norm": 0.612901508808136, "learning_rate": 2.092707764774101e-06, "loss": 0.052, "step": 47146 }, { "epoch": 0.834951542279306, "grad_norm": 0.3666090965270996, "learning_rate": 2.0922694527555623e-06, "loss": 0.0491, "step": 47147 }, { "epoch": 0.8349692518163344, "grad_norm": 0.4779406189918518, "learning_rate": 2.09183118320215e-06, "loss": 0.0762, "step": 47148 }, { "epoch": 0.8349869613533628, "grad_norm": 0.6051979064941406, "learning_rate": 2.091392956115307e-06, "loss": 0.0442, "step": 47149 }, { "epoch": 0.8350046708903912, "grad_norm": 0.8001546859741211, "learning_rate": 2.090954771496479e-06, "loss": 0.0583, "step": 47150 }, { "epoch": 0.8350223804274197, "grad_norm": 0.6472022533416748, "learning_rate": 2.0905166293471008e-06, "loss": 0.0552, "step": 47151 }, { "epoch": 0.8350400899644481, "grad_norm": 0.3722326457500458, "learning_rate": 2.0900785296686174e-06, "loss": 0.0454, "step": 47152 }, { "epoch": 0.8350577995014765, "grad_norm": 0.6722990870475769, "learning_rate": 2.0896404724624683e-06, "loss": 0.0626, "step": 47153 }, { "epoch": 0.8350755090385049, "grad_norm": 1.3919895887374878, "learning_rate": 2.089202457730099e-06, "loss": 0.0587, "step": 47154 }, { "epoch": 0.8350932185755334, "grad_norm": 0.4447398781776428, "learning_rate": 2.088764485472946e-06, "loss": 0.0564, "step": 47155 }, { "epoch": 0.8351109281125618, "grad_norm": 0.6668393611907959, "learning_rate": 2.088326555692452e-06, "loss": 0.0655, "step": 47156 }, { "epoch": 0.8351286376495902, "grad_norm": 0.6104159951210022, "learning_rate": 2.0878886683900607e-06, "loss": 0.0615, "step": 47157 }, { "epoch": 0.8351463471866186, "grad_norm": 0.589062511920929, "learning_rate": 2.0874508235672075e-06, "loss": 0.0524, "step": 47158 }, { "epoch": 0.8351640567236471, "grad_norm": 0.5769243240356445, "learning_rate": 2.087013021225333e-06, "loss": 0.0631, "step": 47159 }, { "epoch": 0.8351817662606755, "grad_norm": 0.28520452976226807, "learning_rate": 2.086575261365881e-06, "loss": 0.0738, "step": 47160 }, { "epoch": 0.8351994757977039, "grad_norm": 0.6042367815971375, "learning_rate": 2.086137543990294e-06, "loss": 0.0438, "step": 47161 }, { "epoch": 0.8352171853347324, "grad_norm": 0.6945679187774658, "learning_rate": 2.085699869100006e-06, "loss": 0.0571, "step": 47162 }, { "epoch": 0.8352348948717608, "grad_norm": 0.6927061676979065, "learning_rate": 2.0852622366964563e-06, "loss": 0.0907, "step": 47163 }, { "epoch": 0.8352526044087892, "grad_norm": 0.43315592408180237, "learning_rate": 2.08482464678109e-06, "loss": 0.031, "step": 47164 }, { "epoch": 0.8352703139458176, "grad_norm": 0.5912776589393616, "learning_rate": 2.0843870993553437e-06, "loss": 0.0558, "step": 47165 }, { "epoch": 0.8352880234828461, "grad_norm": 0.39605391025543213, "learning_rate": 2.0839495944206567e-06, "loss": 0.0304, "step": 47166 }, { "epoch": 0.8353057330198745, "grad_norm": 1.1649649143218994, "learning_rate": 2.083512131978469e-06, "loss": 0.0789, "step": 47167 }, { "epoch": 0.8353234425569029, "grad_norm": 0.8117318749427795, "learning_rate": 2.0830747120302244e-06, "loss": 0.0643, "step": 47168 }, { "epoch": 0.8353411520939313, "grad_norm": 0.6309868693351746, "learning_rate": 2.082637334577354e-06, "loss": 0.0648, "step": 47169 }, { "epoch": 0.8353588616309598, "grad_norm": 0.6083328723907471, "learning_rate": 2.0821999996212998e-06, "loss": 0.0694, "step": 47170 }, { "epoch": 0.8353765711679882, "grad_norm": 0.7516359686851501, "learning_rate": 2.081762707163502e-06, "loss": 0.0457, "step": 47171 }, { "epoch": 0.8353942807050166, "grad_norm": 0.7819831967353821, "learning_rate": 2.0813254572053998e-06, "loss": 0.0406, "step": 47172 }, { "epoch": 0.835411990242045, "grad_norm": 0.8695488572120667, "learning_rate": 2.0808882497484285e-06, "loss": 0.0523, "step": 47173 }, { "epoch": 0.8354296997790736, "grad_norm": 0.3801238536834717, "learning_rate": 2.080451084794025e-06, "loss": 0.0496, "step": 47174 }, { "epoch": 0.835447409316102, "grad_norm": 0.36689314246177673, "learning_rate": 2.0800139623436354e-06, "loss": 0.0529, "step": 47175 }, { "epoch": 0.8354651188531304, "grad_norm": 0.5943084359169006, "learning_rate": 2.079576882398692e-06, "loss": 0.0449, "step": 47176 }, { "epoch": 0.8354828283901589, "grad_norm": 0.6128734350204468, "learning_rate": 2.0791398449606326e-06, "loss": 0.0496, "step": 47177 }, { "epoch": 0.8355005379271873, "grad_norm": 0.47850871086120605, "learning_rate": 2.078702850030897e-06, "loss": 0.0396, "step": 47178 }, { "epoch": 0.8355182474642157, "grad_norm": 0.6166843771934509, "learning_rate": 2.0782658976109237e-06, "loss": 0.0444, "step": 47179 }, { "epoch": 0.8355359570012441, "grad_norm": 0.8113179802894592, "learning_rate": 2.0778289877021467e-06, "loss": 0.0642, "step": 47180 }, { "epoch": 0.8355536665382726, "grad_norm": 0.4112311899662018, "learning_rate": 2.0773921203060046e-06, "loss": 0.0501, "step": 47181 }, { "epoch": 0.835571376075301, "grad_norm": 0.324491411447525, "learning_rate": 2.0769552954239403e-06, "loss": 0.0482, "step": 47182 }, { "epoch": 0.8355890856123294, "grad_norm": 0.599928081035614, "learning_rate": 2.076518513057378e-06, "loss": 0.0375, "step": 47183 }, { "epoch": 0.8356067951493578, "grad_norm": 0.8926227688789368, "learning_rate": 2.0760817732077663e-06, "loss": 0.0697, "step": 47184 }, { "epoch": 0.8356245046863863, "grad_norm": 0.6024113297462463, "learning_rate": 2.0756450758765383e-06, "loss": 0.0568, "step": 47185 }, { "epoch": 0.8356422142234147, "grad_norm": 0.6395869851112366, "learning_rate": 2.0752084210651347e-06, "loss": 0.0454, "step": 47186 }, { "epoch": 0.8356599237604431, "grad_norm": 0.7243407368659973, "learning_rate": 2.074771808774984e-06, "loss": 0.0705, "step": 47187 }, { "epoch": 0.8356776332974715, "grad_norm": 0.7752910852432251, "learning_rate": 2.074335239007527e-06, "loss": 0.0666, "step": 47188 }, { "epoch": 0.8356953428345, "grad_norm": 0.5226611495018005, "learning_rate": 2.0738987117642027e-06, "loss": 0.0564, "step": 47189 }, { "epoch": 0.8357130523715284, "grad_norm": 0.3459438383579254, "learning_rate": 2.0734622270464416e-06, "loss": 0.0608, "step": 47190 }, { "epoch": 0.8357307619085568, "grad_norm": 0.6426880955696106, "learning_rate": 2.0730257848556822e-06, "loss": 0.0482, "step": 47191 }, { "epoch": 0.8357484714455853, "grad_norm": 0.6553409099578857, "learning_rate": 2.0725893851933605e-06, "loss": 0.0615, "step": 47192 }, { "epoch": 0.8357661809826137, "grad_norm": 0.44410470128059387, "learning_rate": 2.0721530280609115e-06, "loss": 0.0469, "step": 47193 }, { "epoch": 0.8357838905196421, "grad_norm": 0.5138369798660278, "learning_rate": 2.0717167134597724e-06, "loss": 0.0703, "step": 47194 }, { "epoch": 0.8358016000566705, "grad_norm": 0.49627190828323364, "learning_rate": 2.0712804413913776e-06, "loss": 0.0286, "step": 47195 }, { "epoch": 0.835819309593699, "grad_norm": 0.4622713625431061, "learning_rate": 2.0708442118571657e-06, "loss": 0.059, "step": 47196 }, { "epoch": 0.8358370191307274, "grad_norm": 0.3654061257839203, "learning_rate": 2.0704080248585653e-06, "loss": 0.0456, "step": 47197 }, { "epoch": 0.8358547286677558, "grad_norm": 0.4782998263835907, "learning_rate": 2.0699718803970142e-06, "loss": 0.0604, "step": 47198 }, { "epoch": 0.8358724382047842, "grad_norm": 0.4046078026294708, "learning_rate": 2.0695357784739486e-06, "loss": 0.0691, "step": 47199 }, { "epoch": 0.8358901477418127, "grad_norm": 0.4778413772583008, "learning_rate": 2.069099719090805e-06, "loss": 0.0412, "step": 47200 }, { "epoch": 0.8359078572788411, "grad_norm": 0.841087818145752, "learning_rate": 2.0686637022490116e-06, "loss": 0.0635, "step": 47201 }, { "epoch": 0.8359255668158695, "grad_norm": 0.8220527172088623, "learning_rate": 2.068227727950008e-06, "loss": 0.0509, "step": 47202 }, { "epoch": 0.8359432763528979, "grad_norm": 0.47364768385887146, "learning_rate": 2.067791796195227e-06, "loss": 0.0732, "step": 47203 }, { "epoch": 0.8359609858899264, "grad_norm": 0.5884771347045898, "learning_rate": 2.0673559069861033e-06, "loss": 0.0467, "step": 47204 }, { "epoch": 0.8359786954269548, "grad_norm": 0.8552147150039673, "learning_rate": 2.06692006032407e-06, "loss": 0.0554, "step": 47205 }, { "epoch": 0.8359964049639832, "grad_norm": 0.9253984689712524, "learning_rate": 2.066484256210562e-06, "loss": 0.0724, "step": 47206 }, { "epoch": 0.8360141145010117, "grad_norm": 0.6515365839004517, "learning_rate": 2.0660484946470156e-06, "loss": 0.0446, "step": 47207 }, { "epoch": 0.8360318240380401, "grad_norm": 0.414651095867157, "learning_rate": 2.065612775634858e-06, "loss": 0.0563, "step": 47208 }, { "epoch": 0.8360495335750685, "grad_norm": 0.6367976665496826, "learning_rate": 2.0651770991755253e-06, "loss": 0.0625, "step": 47209 }, { "epoch": 0.8360672431120969, "grad_norm": 0.6517537236213684, "learning_rate": 2.064741465270453e-06, "loss": 0.0475, "step": 47210 }, { "epoch": 0.8360849526491254, "grad_norm": 0.8126893639564514, "learning_rate": 2.064305873921076e-06, "loss": 0.0897, "step": 47211 }, { "epoch": 0.8361026621861538, "grad_norm": 0.42805707454681396, "learning_rate": 2.063870325128818e-06, "loss": 0.0353, "step": 47212 }, { "epoch": 0.8361203717231822, "grad_norm": 0.8257018327713013, "learning_rate": 2.0634348188951206e-06, "loss": 0.065, "step": 47213 }, { "epoch": 0.8361380812602106, "grad_norm": 0.4950700104236603, "learning_rate": 2.062999355221419e-06, "loss": 0.0604, "step": 47214 }, { "epoch": 0.8361557907972391, "grad_norm": 0.4153652787208557, "learning_rate": 2.062563934109136e-06, "loss": 0.0578, "step": 47215 }, { "epoch": 0.8361735003342675, "grad_norm": 0.6284716129302979, "learning_rate": 2.06212855555971e-06, "loss": 0.051, "step": 47216 }, { "epoch": 0.8361912098712959, "grad_norm": 0.887027382850647, "learning_rate": 2.0616932195745714e-06, "loss": 0.0573, "step": 47217 }, { "epoch": 0.8362089194083243, "grad_norm": 0.58315509557724, "learning_rate": 2.0612579261551583e-06, "loss": 0.0655, "step": 47218 }, { "epoch": 0.8362266289453528, "grad_norm": 0.5597625374794006, "learning_rate": 2.0608226753028935e-06, "loss": 0.0504, "step": 47219 }, { "epoch": 0.8362443384823812, "grad_norm": 0.8687658905982971, "learning_rate": 2.0603874670192143e-06, "loss": 0.0667, "step": 47220 }, { "epoch": 0.8362620480194096, "grad_norm": 1.0027672052383423, "learning_rate": 2.059952301305552e-06, "loss": 0.0853, "step": 47221 }, { "epoch": 0.8362797575564381, "grad_norm": 1.2353929281234741, "learning_rate": 2.0595171781633365e-06, "loss": 0.0832, "step": 47222 }, { "epoch": 0.8362974670934665, "grad_norm": 0.745183527469635, "learning_rate": 2.059082097594001e-06, "loss": 0.044, "step": 47223 }, { "epoch": 0.8363151766304949, "grad_norm": 0.6372566819190979, "learning_rate": 2.0586470595989766e-06, "loss": 0.0477, "step": 47224 }, { "epoch": 0.8363328861675233, "grad_norm": 0.633949339389801, "learning_rate": 2.0582120641796982e-06, "loss": 0.063, "step": 47225 }, { "epoch": 0.8363505957045518, "grad_norm": 1.058903455734253, "learning_rate": 2.0577771113375897e-06, "loss": 0.0562, "step": 47226 }, { "epoch": 0.8363683052415802, "grad_norm": 0.5565650463104248, "learning_rate": 2.0573422010740856e-06, "loss": 0.0443, "step": 47227 }, { "epoch": 0.8363860147786086, "grad_norm": 0.2812730371952057, "learning_rate": 2.056907333390619e-06, "loss": 0.045, "step": 47228 }, { "epoch": 0.836403724315637, "grad_norm": 0.42210888862609863, "learning_rate": 2.056472508288614e-06, "loss": 0.0285, "step": 47229 }, { "epoch": 0.8364214338526655, "grad_norm": 0.5729435682296753, "learning_rate": 2.0560377257695085e-06, "loss": 0.0785, "step": 47230 }, { "epoch": 0.8364391433896939, "grad_norm": 0.6613577008247375, "learning_rate": 2.055602985834727e-06, "loss": 0.0828, "step": 47231 }, { "epoch": 0.8364568529267223, "grad_norm": 0.6309910416603088, "learning_rate": 2.0551682884857035e-06, "loss": 0.0533, "step": 47232 }, { "epoch": 0.8364745624637507, "grad_norm": 0.5338796377182007, "learning_rate": 2.054733633723866e-06, "loss": 0.0357, "step": 47233 }, { "epoch": 0.8364922720007792, "grad_norm": 0.41880232095718384, "learning_rate": 2.0542990215506473e-06, "loss": 0.0638, "step": 47234 }, { "epoch": 0.8365099815378076, "grad_norm": 0.3382628560066223, "learning_rate": 2.053864451967477e-06, "loss": 0.0532, "step": 47235 }, { "epoch": 0.836527691074836, "grad_norm": 0.5045187473297119, "learning_rate": 2.0534299249757805e-06, "loss": 0.0457, "step": 47236 }, { "epoch": 0.8365454006118646, "grad_norm": 0.28356122970581055, "learning_rate": 2.0529954405769913e-06, "loss": 0.0232, "step": 47237 }, { "epoch": 0.836563110148893, "grad_norm": 0.3325037956237793, "learning_rate": 2.0525609987725354e-06, "loss": 0.046, "step": 47238 }, { "epoch": 0.8365808196859214, "grad_norm": 0.599830150604248, "learning_rate": 2.0521265995638493e-06, "loss": 0.039, "step": 47239 }, { "epoch": 0.8365985292229497, "grad_norm": 0.6129919290542603, "learning_rate": 2.051692242952353e-06, "loss": 0.0625, "step": 47240 }, { "epoch": 0.8366162387599783, "grad_norm": 0.547997772693634, "learning_rate": 2.0512579289394756e-06, "loss": 0.0295, "step": 47241 }, { "epoch": 0.8366339482970067, "grad_norm": 1.014113426208496, "learning_rate": 2.0508236575266555e-06, "loss": 0.0634, "step": 47242 }, { "epoch": 0.8366516578340351, "grad_norm": 0.6598918437957764, "learning_rate": 2.0503894287153173e-06, "loss": 0.0619, "step": 47243 }, { "epoch": 0.8366693673710635, "grad_norm": 1.1317330598831177, "learning_rate": 2.0499552425068856e-06, "loss": 0.0576, "step": 47244 }, { "epoch": 0.836687076908092, "grad_norm": 0.5049267411231995, "learning_rate": 2.0495210989027907e-06, "loss": 0.0595, "step": 47245 }, { "epoch": 0.8367047864451204, "grad_norm": 0.7779359817504883, "learning_rate": 2.0490869979044645e-06, "loss": 0.0682, "step": 47246 }, { "epoch": 0.8367224959821488, "grad_norm": 0.6720848083496094, "learning_rate": 2.0486529395133295e-06, "loss": 0.0771, "step": 47247 }, { "epoch": 0.8367402055191772, "grad_norm": 0.5108041167259216, "learning_rate": 2.0482189237308165e-06, "loss": 0.0502, "step": 47248 }, { "epoch": 0.8367579150562057, "grad_norm": 0.3135976493358612, "learning_rate": 2.0477849505583535e-06, "loss": 0.0531, "step": 47249 }, { "epoch": 0.8367756245932341, "grad_norm": 0.5565018653869629, "learning_rate": 2.0473510199973664e-06, "loss": 0.0486, "step": 47250 }, { "epoch": 0.8367933341302625, "grad_norm": 0.6278793811798096, "learning_rate": 2.0469171320492865e-06, "loss": 0.0528, "step": 47251 }, { "epoch": 0.836811043667291, "grad_norm": 0.7083854079246521, "learning_rate": 2.0464832867155382e-06, "loss": 0.0612, "step": 47252 }, { "epoch": 0.8368287532043194, "grad_norm": 0.7111929059028625, "learning_rate": 2.0460494839975524e-06, "loss": 0.0536, "step": 47253 }, { "epoch": 0.8368464627413478, "grad_norm": 0.7464408874511719, "learning_rate": 2.0456157238967516e-06, "loss": 0.0464, "step": 47254 }, { "epoch": 0.8368641722783762, "grad_norm": 0.7259339690208435, "learning_rate": 2.0451820064145643e-06, "loss": 0.0669, "step": 47255 }, { "epoch": 0.8368818818154047, "grad_norm": 0.5909767746925354, "learning_rate": 2.0447483315524164e-06, "loss": 0.0492, "step": 47256 }, { "epoch": 0.8368995913524331, "grad_norm": 0.7191835045814514, "learning_rate": 2.0443146993117413e-06, "loss": 0.083, "step": 47257 }, { "epoch": 0.8369173008894615, "grad_norm": 0.783510684967041, "learning_rate": 2.0438811096939563e-06, "loss": 0.0647, "step": 47258 }, { "epoch": 0.8369350104264899, "grad_norm": 0.3524169623851776, "learning_rate": 2.043447562700492e-06, "loss": 0.054, "step": 47259 }, { "epoch": 0.8369527199635184, "grad_norm": 0.48525968194007874, "learning_rate": 2.043014058332776e-06, "loss": 0.0567, "step": 47260 }, { "epoch": 0.8369704295005468, "grad_norm": 0.8673551082611084, "learning_rate": 2.0425805965922315e-06, "loss": 0.1026, "step": 47261 }, { "epoch": 0.8369881390375752, "grad_norm": 0.40331441164016724, "learning_rate": 2.042147177480286e-06, "loss": 0.046, "step": 47262 }, { "epoch": 0.8370058485746036, "grad_norm": 0.48081985116004944, "learning_rate": 2.041713800998367e-06, "loss": 0.0602, "step": 47263 }, { "epoch": 0.8370235581116321, "grad_norm": 0.5593878030776978, "learning_rate": 2.0412804671479028e-06, "loss": 0.0603, "step": 47264 }, { "epoch": 0.8370412676486605, "grad_norm": 0.40228742361068726, "learning_rate": 2.040847175930311e-06, "loss": 0.046, "step": 47265 }, { "epoch": 0.8370589771856889, "grad_norm": 0.5734089016914368, "learning_rate": 2.040413927347022e-06, "loss": 0.0642, "step": 47266 }, { "epoch": 0.8370766867227174, "grad_norm": 0.5945620536804199, "learning_rate": 2.0399807213994626e-06, "loss": 0.053, "step": 47267 }, { "epoch": 0.8370943962597458, "grad_norm": 0.5063464641571045, "learning_rate": 2.039547558089052e-06, "loss": 0.0581, "step": 47268 }, { "epoch": 0.8371121057967742, "grad_norm": 0.24393540620803833, "learning_rate": 2.0391144374172195e-06, "loss": 0.0374, "step": 47269 }, { "epoch": 0.8371298153338026, "grad_norm": 0.8482401967048645, "learning_rate": 2.0386813593853863e-06, "loss": 0.0671, "step": 47270 }, { "epoch": 0.8371475248708311, "grad_norm": 0.4198834300041199, "learning_rate": 2.038248323994989e-06, "loss": 0.0501, "step": 47271 }, { "epoch": 0.8371652344078595, "grad_norm": 0.7966148257255554, "learning_rate": 2.037815331247439e-06, "loss": 0.0598, "step": 47272 }, { "epoch": 0.8371829439448879, "grad_norm": 0.5771611332893372, "learning_rate": 2.037382381144164e-06, "loss": 0.0852, "step": 47273 }, { "epoch": 0.8372006534819163, "grad_norm": 0.48849353194236755, "learning_rate": 2.0369494736865923e-06, "loss": 0.039, "step": 47274 }, { "epoch": 0.8372183630189448, "grad_norm": 0.582033097743988, "learning_rate": 2.0365166088761467e-06, "loss": 0.0671, "step": 47275 }, { "epoch": 0.8372360725559732, "grad_norm": 0.6777358055114746, "learning_rate": 2.036083786714249e-06, "loss": 0.0381, "step": 47276 }, { "epoch": 0.8372537820930016, "grad_norm": 0.5219953060150146, "learning_rate": 2.0356510072023232e-06, "loss": 0.0323, "step": 47277 }, { "epoch": 0.83727149163003, "grad_norm": 0.4232235550880432, "learning_rate": 2.0352182703417947e-06, "loss": 0.0527, "step": 47278 }, { "epoch": 0.8372892011670585, "grad_norm": 0.34192338585853577, "learning_rate": 2.0347855761340876e-06, "loss": 0.0435, "step": 47279 }, { "epoch": 0.8373069107040869, "grad_norm": 0.7263827919960022, "learning_rate": 2.034352924580624e-06, "loss": 0.0627, "step": 47280 }, { "epoch": 0.8373246202411153, "grad_norm": 0.587895393371582, "learning_rate": 2.033920315682828e-06, "loss": 0.0543, "step": 47281 }, { "epoch": 0.8373423297781438, "grad_norm": 0.5587753057479858, "learning_rate": 2.0334877494421245e-06, "loss": 0.0525, "step": 47282 }, { "epoch": 0.8373600393151722, "grad_norm": 0.7087807655334473, "learning_rate": 2.0330552258599326e-06, "loss": 0.0501, "step": 47283 }, { "epoch": 0.8373777488522006, "grad_norm": 0.7319412231445312, "learning_rate": 2.0326227449376783e-06, "loss": 0.0494, "step": 47284 }, { "epoch": 0.837395458389229, "grad_norm": 0.7233925461769104, "learning_rate": 2.0321903066767865e-06, "loss": 0.0512, "step": 47285 }, { "epoch": 0.8374131679262575, "grad_norm": 0.4113162159919739, "learning_rate": 2.0317579110786733e-06, "loss": 0.054, "step": 47286 }, { "epoch": 0.8374308774632859, "grad_norm": 0.5356561541557312, "learning_rate": 2.0313255581447652e-06, "loss": 0.0517, "step": 47287 }, { "epoch": 0.8374485870003143, "grad_norm": 0.40315011143684387, "learning_rate": 2.0308932478764835e-06, "loss": 0.0631, "step": 47288 }, { "epoch": 0.8374662965373427, "grad_norm": 0.8771874904632568, "learning_rate": 2.0304609802752516e-06, "loss": 0.0844, "step": 47289 }, { "epoch": 0.8374840060743712, "grad_norm": 0.4898800551891327, "learning_rate": 2.0300287553424917e-06, "loss": 0.0855, "step": 47290 }, { "epoch": 0.8375017156113996, "grad_norm": 0.671217679977417, "learning_rate": 2.0295965730796264e-06, "loss": 0.0383, "step": 47291 }, { "epoch": 0.837519425148428, "grad_norm": 0.6141162514686584, "learning_rate": 2.0291644334880786e-06, "loss": 0.0576, "step": 47292 }, { "epoch": 0.8375371346854564, "grad_norm": 0.6011127233505249, "learning_rate": 2.0287323365692635e-06, "loss": 0.0749, "step": 47293 }, { "epoch": 0.8375548442224849, "grad_norm": 0.4047144949436188, "learning_rate": 2.028300282324609e-06, "loss": 0.0556, "step": 47294 }, { "epoch": 0.8375725537595133, "grad_norm": 0.507607102394104, "learning_rate": 2.027868270755534e-06, "loss": 0.0849, "step": 47295 }, { "epoch": 0.8375902632965417, "grad_norm": 0.6669837236404419, "learning_rate": 2.027436301863463e-06, "loss": 0.0927, "step": 47296 }, { "epoch": 0.8376079728335702, "grad_norm": 0.5757691860198975, "learning_rate": 2.0270043756498108e-06, "loss": 0.0415, "step": 47297 }, { "epoch": 0.8376256823705986, "grad_norm": 0.2970448136329651, "learning_rate": 2.0265724921160032e-06, "loss": 0.0612, "step": 47298 }, { "epoch": 0.837643391907627, "grad_norm": 0.2279503047466278, "learning_rate": 2.0261406512634596e-06, "loss": 0.0483, "step": 47299 }, { "epoch": 0.8376611014446554, "grad_norm": 0.6670094132423401, "learning_rate": 2.025708853093601e-06, "loss": 0.0461, "step": 47300 }, { "epoch": 0.837678810981684, "grad_norm": 0.5612179636955261, "learning_rate": 2.025277097607848e-06, "loss": 0.034, "step": 47301 }, { "epoch": 0.8376965205187124, "grad_norm": 0.46641698479652405, "learning_rate": 2.024845384807621e-06, "loss": 0.0501, "step": 47302 }, { "epoch": 0.8377142300557407, "grad_norm": 0.9318970441818237, "learning_rate": 2.024413714694343e-06, "loss": 0.0503, "step": 47303 }, { "epoch": 0.8377319395927691, "grad_norm": 0.5721632242202759, "learning_rate": 2.0239820872694297e-06, "loss": 0.0507, "step": 47304 }, { "epoch": 0.8377496491297977, "grad_norm": 0.4893012046813965, "learning_rate": 2.0235505025343014e-06, "loss": 0.0708, "step": 47305 }, { "epoch": 0.8377673586668261, "grad_norm": 0.7212436199188232, "learning_rate": 2.0231189604903837e-06, "loss": 0.0551, "step": 47306 }, { "epoch": 0.8377850682038545, "grad_norm": 0.5479327440261841, "learning_rate": 2.0226874611390856e-06, "loss": 0.0434, "step": 47307 }, { "epoch": 0.8378027777408829, "grad_norm": 0.7532306909561157, "learning_rate": 2.0222560044818357e-06, "loss": 0.0931, "step": 47308 }, { "epoch": 0.8378204872779114, "grad_norm": 0.6615936160087585, "learning_rate": 2.021824590520051e-06, "loss": 0.0534, "step": 47309 }, { "epoch": 0.8378381968149398, "grad_norm": 0.4745367169380188, "learning_rate": 2.021393219255154e-06, "loss": 0.0329, "step": 47310 }, { "epoch": 0.8378559063519682, "grad_norm": 0.8575717806816101, "learning_rate": 2.0209618906885576e-06, "loss": 0.1095, "step": 47311 }, { "epoch": 0.8378736158889967, "grad_norm": 0.7666590213775635, "learning_rate": 2.0205306048216842e-06, "loss": 0.0652, "step": 47312 }, { "epoch": 0.8378913254260251, "grad_norm": 0.5543208718299866, "learning_rate": 2.020099361655952e-06, "loss": 0.045, "step": 47313 }, { "epoch": 0.8379090349630535, "grad_norm": 0.5241492390632629, "learning_rate": 2.019668161192783e-06, "loss": 0.0502, "step": 47314 }, { "epoch": 0.8379267445000819, "grad_norm": 0.5801535844802856, "learning_rate": 2.0192370034335912e-06, "loss": 0.0481, "step": 47315 }, { "epoch": 0.8379444540371104, "grad_norm": 0.18557976186275482, "learning_rate": 2.018805888379795e-06, "loss": 0.044, "step": 47316 }, { "epoch": 0.8379621635741388, "grad_norm": 0.649043083190918, "learning_rate": 2.0183748160328138e-06, "loss": 0.0729, "step": 47317 }, { "epoch": 0.8379798731111672, "grad_norm": 0.6897539496421814, "learning_rate": 2.0179437863940677e-06, "loss": 0.0615, "step": 47318 }, { "epoch": 0.8379975826481956, "grad_norm": 1.0324314832687378, "learning_rate": 2.0175127994649738e-06, "loss": 0.0847, "step": 47319 }, { "epoch": 0.8380152921852241, "grad_norm": 0.5526424646377563, "learning_rate": 2.0170818552469483e-06, "loss": 0.0548, "step": 47320 }, { "epoch": 0.8380330017222525, "grad_norm": 0.8753276467323303, "learning_rate": 2.0166509537414147e-06, "loss": 0.0575, "step": 47321 }, { "epoch": 0.8380507112592809, "grad_norm": 0.49042925238609314, "learning_rate": 2.0162200949497832e-06, "loss": 0.0433, "step": 47322 }, { "epoch": 0.8380684207963094, "grad_norm": 0.6492125988006592, "learning_rate": 2.015789278873473e-06, "loss": 0.0656, "step": 47323 }, { "epoch": 0.8380861303333378, "grad_norm": 0.38719263672828674, "learning_rate": 2.0153585055139084e-06, "loss": 0.0714, "step": 47324 }, { "epoch": 0.8381038398703662, "grad_norm": 0.7887603044509888, "learning_rate": 2.0149277748724957e-06, "loss": 0.054, "step": 47325 }, { "epoch": 0.8381215494073946, "grad_norm": 0.5817367434501648, "learning_rate": 2.014497086950658e-06, "loss": 0.0603, "step": 47326 }, { "epoch": 0.8381392589444231, "grad_norm": 0.8141596913337708, "learning_rate": 2.014066441749812e-06, "loss": 0.0646, "step": 47327 }, { "epoch": 0.8381569684814515, "grad_norm": 0.476288765668869, "learning_rate": 2.0136358392713726e-06, "loss": 0.0541, "step": 47328 }, { "epoch": 0.8381746780184799, "grad_norm": 0.5741013884544373, "learning_rate": 2.013205279516758e-06, "loss": 0.0516, "step": 47329 }, { "epoch": 0.8381923875555083, "grad_norm": 0.38428956270217896, "learning_rate": 2.0127747624873855e-06, "loss": 0.0446, "step": 47330 }, { "epoch": 0.8382100970925368, "grad_norm": 0.5882688760757446, "learning_rate": 2.012344288184673e-06, "loss": 0.0619, "step": 47331 }, { "epoch": 0.8382278066295652, "grad_norm": 0.5821441411972046, "learning_rate": 2.011913856610032e-06, "loss": 0.0567, "step": 47332 }, { "epoch": 0.8382455161665936, "grad_norm": 0.3594056963920593, "learning_rate": 2.011483467764879e-06, "loss": 0.0353, "step": 47333 }, { "epoch": 0.838263225703622, "grad_norm": 0.5878483057022095, "learning_rate": 2.0110531216506333e-06, "loss": 0.0398, "step": 47334 }, { "epoch": 0.8382809352406505, "grad_norm": 0.6998974680900574, "learning_rate": 2.010622818268712e-06, "loss": 0.0531, "step": 47335 }, { "epoch": 0.8382986447776789, "grad_norm": 0.8621758222579956, "learning_rate": 2.0101925576205205e-06, "loss": 0.0635, "step": 47336 }, { "epoch": 0.8383163543147073, "grad_norm": 0.845120906829834, "learning_rate": 2.009762339707486e-06, "loss": 0.0575, "step": 47337 }, { "epoch": 0.8383340638517358, "grad_norm": 0.9692590236663818, "learning_rate": 2.009332164531021e-06, "loss": 0.0611, "step": 47338 }, { "epoch": 0.8383517733887642, "grad_norm": 0.2580869495868683, "learning_rate": 2.008902032092537e-06, "loss": 0.0513, "step": 47339 }, { "epoch": 0.8383694829257926, "grad_norm": 0.5743444561958313, "learning_rate": 2.0084719423934523e-06, "loss": 0.0454, "step": 47340 }, { "epoch": 0.838387192462821, "grad_norm": 0.5285269618034363, "learning_rate": 2.00804189543518e-06, "loss": 0.0513, "step": 47341 }, { "epoch": 0.8384049019998495, "grad_norm": 0.7217251658439636, "learning_rate": 2.007611891219139e-06, "loss": 0.0877, "step": 47342 }, { "epoch": 0.8384226115368779, "grad_norm": 0.509723961353302, "learning_rate": 2.007181929746738e-06, "loss": 0.0736, "step": 47343 }, { "epoch": 0.8384403210739063, "grad_norm": 0.8064953684806824, "learning_rate": 2.0067520110193933e-06, "loss": 0.0517, "step": 47344 }, { "epoch": 0.8384580306109347, "grad_norm": 0.420110285282135, "learning_rate": 2.0063221350385215e-06, "loss": 0.0538, "step": 47345 }, { "epoch": 0.8384757401479632, "grad_norm": 1.050155520439148, "learning_rate": 2.0058923018055347e-06, "loss": 0.0864, "step": 47346 }, { "epoch": 0.8384934496849916, "grad_norm": 0.6791521310806274, "learning_rate": 2.005462511321849e-06, "loss": 0.0572, "step": 47347 }, { "epoch": 0.83851115922202, "grad_norm": 0.5202676057815552, "learning_rate": 2.005032763588876e-06, "loss": 0.0643, "step": 47348 }, { "epoch": 0.8385288687590484, "grad_norm": 1.1670763492584229, "learning_rate": 2.004603058608035e-06, "loss": 0.0629, "step": 47349 }, { "epoch": 0.8385465782960769, "grad_norm": 0.8741835951805115, "learning_rate": 2.0041733963807314e-06, "loss": 0.087, "step": 47350 }, { "epoch": 0.8385642878331053, "grad_norm": 0.519384503364563, "learning_rate": 2.003743776908384e-06, "loss": 0.0678, "step": 47351 }, { "epoch": 0.8385819973701337, "grad_norm": 0.8385119438171387, "learning_rate": 2.003314200192405e-06, "loss": 0.0521, "step": 47352 }, { "epoch": 0.8385997069071622, "grad_norm": 0.5677688121795654, "learning_rate": 2.0028846662342097e-06, "loss": 0.084, "step": 47353 }, { "epoch": 0.8386174164441906, "grad_norm": 0.7048901319503784, "learning_rate": 2.002455175035208e-06, "loss": 0.0658, "step": 47354 }, { "epoch": 0.838635125981219, "grad_norm": 0.5641331672668457, "learning_rate": 2.002025726596812e-06, "loss": 0.0434, "step": 47355 }, { "epoch": 0.8386528355182474, "grad_norm": 0.7464380264282227, "learning_rate": 2.0015963209204375e-06, "loss": 0.0455, "step": 47356 }, { "epoch": 0.8386705450552759, "grad_norm": 0.7948316335678101, "learning_rate": 2.001166958007497e-06, "loss": 0.0741, "step": 47357 }, { "epoch": 0.8386882545923043, "grad_norm": 0.7663198709487915, "learning_rate": 2.0007376378594015e-06, "loss": 0.0759, "step": 47358 }, { "epoch": 0.8387059641293327, "grad_norm": 0.4810161590576172, "learning_rate": 2.000308360477565e-06, "loss": 0.0504, "step": 47359 }, { "epoch": 0.8387236736663611, "grad_norm": 0.7074766755104065, "learning_rate": 1.9998791258634032e-06, "loss": 0.0549, "step": 47360 }, { "epoch": 0.8387413832033896, "grad_norm": 0.06885485351085663, "learning_rate": 1.999449934018321e-06, "loss": 0.0264, "step": 47361 }, { "epoch": 0.838759092740418, "grad_norm": 0.5770400166511536, "learning_rate": 1.9990207849437326e-06, "loss": 0.0528, "step": 47362 }, { "epoch": 0.8387768022774464, "grad_norm": 0.7620833516120911, "learning_rate": 1.9985916786410535e-06, "loss": 0.0568, "step": 47363 }, { "epoch": 0.8387945118144748, "grad_norm": 0.6172811985015869, "learning_rate": 1.9981626151116913e-06, "loss": 0.0434, "step": 47364 }, { "epoch": 0.8388122213515034, "grad_norm": 0.4345378279685974, "learning_rate": 1.997733594357054e-06, "loss": 0.0697, "step": 47365 }, { "epoch": 0.8388299308885317, "grad_norm": 0.38854900002479553, "learning_rate": 1.997304616378561e-06, "loss": 0.0544, "step": 47366 }, { "epoch": 0.8388476404255601, "grad_norm": 0.6195351481437683, "learning_rate": 1.996875681177625e-06, "loss": 0.0596, "step": 47367 }, { "epoch": 0.8388653499625887, "grad_norm": 1.026997447013855, "learning_rate": 1.9964467887556508e-06, "loss": 0.0864, "step": 47368 }, { "epoch": 0.8388830594996171, "grad_norm": 0.620733380317688, "learning_rate": 1.996017939114049e-06, "loss": 0.0607, "step": 47369 }, { "epoch": 0.8389007690366455, "grad_norm": 0.8665915727615356, "learning_rate": 1.9955891322542373e-06, "loss": 0.0504, "step": 47370 }, { "epoch": 0.8389184785736739, "grad_norm": 0.6554027199745178, "learning_rate": 1.995160368177619e-06, "loss": 0.057, "step": 47371 }, { "epoch": 0.8389361881107024, "grad_norm": 0.5511672496795654, "learning_rate": 1.9947316468856085e-06, "loss": 0.0428, "step": 47372 }, { "epoch": 0.8389538976477308, "grad_norm": 1.0352458953857422, "learning_rate": 1.994302968379614e-06, "loss": 0.0601, "step": 47373 }, { "epoch": 0.8389716071847592, "grad_norm": 0.313044935464859, "learning_rate": 1.993874332661047e-06, "loss": 0.0543, "step": 47374 }, { "epoch": 0.8389893167217876, "grad_norm": 0.4611548185348511, "learning_rate": 1.9934457397313188e-06, "loss": 0.0423, "step": 47375 }, { "epoch": 0.8390070262588161, "grad_norm": 0.6927608251571655, "learning_rate": 1.9930171895918387e-06, "loss": 0.0722, "step": 47376 }, { "epoch": 0.8390247357958445, "grad_norm": 0.6960619688034058, "learning_rate": 1.9925886822440205e-06, "loss": 0.0605, "step": 47377 }, { "epoch": 0.8390424453328729, "grad_norm": 0.6285879015922546, "learning_rate": 1.9921602176892643e-06, "loss": 0.0455, "step": 47378 }, { "epoch": 0.8390601548699013, "grad_norm": 0.5077490210533142, "learning_rate": 1.991731795928988e-06, "loss": 0.0356, "step": 47379 }, { "epoch": 0.8390778644069298, "grad_norm": 0.4977383613586426, "learning_rate": 1.9913034169645973e-06, "loss": 0.0606, "step": 47380 }, { "epoch": 0.8390955739439582, "grad_norm": 0.8487392663955688, "learning_rate": 1.9908750807975053e-06, "loss": 0.0351, "step": 47381 }, { "epoch": 0.8391132834809866, "grad_norm": 0.3956109881401062, "learning_rate": 1.990446787429116e-06, "loss": 0.0474, "step": 47382 }, { "epoch": 0.8391309930180151, "grad_norm": 0.42745640873908997, "learning_rate": 1.9900185368608403e-06, "loss": 0.0572, "step": 47383 }, { "epoch": 0.8391487025550435, "grad_norm": 0.5434187650680542, "learning_rate": 1.989590329094087e-06, "loss": 0.0561, "step": 47384 }, { "epoch": 0.8391664120920719, "grad_norm": 0.5669032335281372, "learning_rate": 1.9891621641302664e-06, "loss": 0.0377, "step": 47385 }, { "epoch": 0.8391841216291003, "grad_norm": 0.33371564745903015, "learning_rate": 1.9887340419707877e-06, "loss": 0.0339, "step": 47386 }, { "epoch": 0.8392018311661288, "grad_norm": 0.4297826886177063, "learning_rate": 1.988305962617056e-06, "loss": 0.0689, "step": 47387 }, { "epoch": 0.8392195407031572, "grad_norm": 0.35101237893104553, "learning_rate": 1.987877926070486e-06, "loss": 0.0366, "step": 47388 }, { "epoch": 0.8392372502401856, "grad_norm": 0.5246775150299072, "learning_rate": 1.9874499323324775e-06, "loss": 0.0394, "step": 47389 }, { "epoch": 0.839254959777214, "grad_norm": 0.4152205288410187, "learning_rate": 1.9870219814044428e-06, "loss": 0.0567, "step": 47390 }, { "epoch": 0.8392726693142425, "grad_norm": 0.5066729784011841, "learning_rate": 1.9865940732877906e-06, "loss": 0.0416, "step": 47391 }, { "epoch": 0.8392903788512709, "grad_norm": 0.27897748351097107, "learning_rate": 1.9861662079839305e-06, "loss": 0.0571, "step": 47392 }, { "epoch": 0.8393080883882993, "grad_norm": 0.5955466628074646, "learning_rate": 1.985738385494264e-06, "loss": 0.0704, "step": 47393 }, { "epoch": 0.8393257979253277, "grad_norm": 0.33201080560684204, "learning_rate": 1.985310605820197e-06, "loss": 0.0636, "step": 47394 }, { "epoch": 0.8393435074623562, "grad_norm": 0.6427066326141357, "learning_rate": 1.9848828689631497e-06, "loss": 0.0576, "step": 47395 }, { "epoch": 0.8393612169993846, "grad_norm": 0.5942171812057495, "learning_rate": 1.9844551749245183e-06, "loss": 0.0466, "step": 47396 }, { "epoch": 0.839378926536413, "grad_norm": 0.687333345413208, "learning_rate": 1.9840275237057127e-06, "loss": 0.0806, "step": 47397 }, { "epoch": 0.8393966360734415, "grad_norm": 0.40076354146003723, "learning_rate": 1.98359991530814e-06, "loss": 0.0603, "step": 47398 }, { "epoch": 0.8394143456104699, "grad_norm": 0.5603269338607788, "learning_rate": 1.98317234973321e-06, "loss": 0.0505, "step": 47399 }, { "epoch": 0.8394320551474983, "grad_norm": 0.8634805679321289, "learning_rate": 1.982744826982324e-06, "loss": 0.0743, "step": 47400 }, { "epoch": 0.8394497646845267, "grad_norm": 0.18588897585868835, "learning_rate": 1.9823173470568897e-06, "loss": 0.0414, "step": 47401 }, { "epoch": 0.8394674742215552, "grad_norm": 0.5604966878890991, "learning_rate": 1.9818899099583155e-06, "loss": 0.0576, "step": 47402 }, { "epoch": 0.8394851837585836, "grad_norm": 0.6973680257797241, "learning_rate": 1.9814625156880073e-06, "loss": 0.0482, "step": 47403 }, { "epoch": 0.839502893295612, "grad_norm": 0.6369324922561646, "learning_rate": 1.981035164247369e-06, "loss": 0.0642, "step": 47404 }, { "epoch": 0.8395206028326404, "grad_norm": 0.7251637578010559, "learning_rate": 1.9806078556378094e-06, "loss": 0.0573, "step": 47405 }, { "epoch": 0.8395383123696689, "grad_norm": 1.1501513719558716, "learning_rate": 1.9801805898607374e-06, "loss": 0.0676, "step": 47406 }, { "epoch": 0.8395560219066973, "grad_norm": 0.8245976567268372, "learning_rate": 1.979753366917548e-06, "loss": 0.0726, "step": 47407 }, { "epoch": 0.8395737314437257, "grad_norm": 0.8816580176353455, "learning_rate": 1.979326186809656e-06, "loss": 0.0701, "step": 47408 }, { "epoch": 0.8395914409807541, "grad_norm": 0.4524584114551544, "learning_rate": 1.978899049538467e-06, "loss": 0.0593, "step": 47409 }, { "epoch": 0.8396091505177826, "grad_norm": 0.3751329779624939, "learning_rate": 1.9784719551053787e-06, "loss": 0.0266, "step": 47410 }, { "epoch": 0.839626860054811, "grad_norm": 0.8670227527618408, "learning_rate": 1.978044903511801e-06, "loss": 0.0533, "step": 47411 }, { "epoch": 0.8396445695918394, "grad_norm": 0.3290620744228363, "learning_rate": 1.9776178947591374e-06, "loss": 0.0541, "step": 47412 }, { "epoch": 0.8396622791288679, "grad_norm": 0.36753422021865845, "learning_rate": 1.977190928848795e-06, "loss": 0.0522, "step": 47413 }, { "epoch": 0.8396799886658963, "grad_norm": 0.25547128915786743, "learning_rate": 1.976764005782178e-06, "loss": 0.0306, "step": 47414 }, { "epoch": 0.8396976982029247, "grad_norm": 0.7298502922058105, "learning_rate": 1.9763371255606883e-06, "loss": 0.0554, "step": 47415 }, { "epoch": 0.8397154077399531, "grad_norm": 0.6627143621444702, "learning_rate": 1.975910288185733e-06, "loss": 0.0542, "step": 47416 }, { "epoch": 0.8397331172769816, "grad_norm": 0.4164144992828369, "learning_rate": 1.9754834936587186e-06, "loss": 0.059, "step": 47417 }, { "epoch": 0.83975082681401, "grad_norm": 0.6756030321121216, "learning_rate": 1.9750567419810446e-06, "loss": 0.0818, "step": 47418 }, { "epoch": 0.8397685363510384, "grad_norm": 0.4619101881980896, "learning_rate": 1.9746300331541152e-06, "loss": 0.025, "step": 47419 }, { "epoch": 0.8397862458880668, "grad_norm": 0.7961430549621582, "learning_rate": 1.9742033671793373e-06, "loss": 0.0636, "step": 47420 }, { "epoch": 0.8398039554250953, "grad_norm": 0.44356095790863037, "learning_rate": 1.9737767440581124e-06, "loss": 0.0445, "step": 47421 }, { "epoch": 0.8398216649621237, "grad_norm": 0.5283929109573364, "learning_rate": 1.9733501637918434e-06, "loss": 0.0604, "step": 47422 }, { "epoch": 0.8398393744991521, "grad_norm": 0.20729874074459076, "learning_rate": 1.972923626381931e-06, "loss": 0.0477, "step": 47423 }, { "epoch": 0.8398570840361805, "grad_norm": 0.7144560217857361, "learning_rate": 1.972497131829788e-06, "loss": 0.0755, "step": 47424 }, { "epoch": 0.839874793573209, "grad_norm": 0.683229923248291, "learning_rate": 1.972070680136811e-06, "loss": 0.0492, "step": 47425 }, { "epoch": 0.8398925031102374, "grad_norm": 0.298279732465744, "learning_rate": 1.971644271304401e-06, "loss": 0.0562, "step": 47426 }, { "epoch": 0.8399102126472658, "grad_norm": 0.4907568097114563, "learning_rate": 1.9712179053339698e-06, "loss": 0.0597, "step": 47427 }, { "epoch": 0.8399279221842944, "grad_norm": 0.3254760503768921, "learning_rate": 1.970791582226908e-06, "loss": 0.0197, "step": 47428 }, { "epoch": 0.8399456317213227, "grad_norm": 0.6273698210716248, "learning_rate": 1.9703653019846246e-06, "loss": 0.0278, "step": 47429 }, { "epoch": 0.8399633412583511, "grad_norm": 0.6229100823402405, "learning_rate": 1.969939064608522e-06, "loss": 0.0569, "step": 47430 }, { "epoch": 0.8399810507953795, "grad_norm": 0.4096495807170868, "learning_rate": 1.9695128701000022e-06, "loss": 0.0712, "step": 47431 }, { "epoch": 0.8399987603324081, "grad_norm": 0.7427063584327698, "learning_rate": 1.9690867184604672e-06, "loss": 0.0443, "step": 47432 }, { "epoch": 0.8400164698694365, "grad_norm": 0.6550934314727783, "learning_rate": 1.9686606096913195e-06, "loss": 0.067, "step": 47433 }, { "epoch": 0.8400341794064649, "grad_norm": 0.45579585433006287, "learning_rate": 1.9682345437939614e-06, "loss": 0.0425, "step": 47434 }, { "epoch": 0.8400518889434933, "grad_norm": 0.43103766441345215, "learning_rate": 1.9678085207697916e-06, "loss": 0.0478, "step": 47435 }, { "epoch": 0.8400695984805218, "grad_norm": 1.0302296876907349, "learning_rate": 1.967382540620214e-06, "loss": 0.0666, "step": 47436 }, { "epoch": 0.8400873080175502, "grad_norm": 0.8406309485435486, "learning_rate": 1.966956603346629e-06, "loss": 0.0502, "step": 47437 }, { "epoch": 0.8401050175545786, "grad_norm": 0.3348076641559601, "learning_rate": 1.9665307089504427e-06, "loss": 0.0371, "step": 47438 }, { "epoch": 0.840122727091607, "grad_norm": 0.4546998143196106, "learning_rate": 1.966104857433048e-06, "loss": 0.0623, "step": 47439 }, { "epoch": 0.8401404366286355, "grad_norm": 0.5374855995178223, "learning_rate": 1.9656790487958513e-06, "loss": 0.0476, "step": 47440 }, { "epoch": 0.8401581461656639, "grad_norm": 0.5168685913085938, "learning_rate": 1.9652532830402515e-06, "loss": 0.0552, "step": 47441 }, { "epoch": 0.8401758557026923, "grad_norm": 0.6797997355461121, "learning_rate": 1.9648275601676484e-06, "loss": 0.0906, "step": 47442 }, { "epoch": 0.8401935652397208, "grad_norm": 0.5817446112632751, "learning_rate": 1.9644018801794467e-06, "loss": 0.0576, "step": 47443 }, { "epoch": 0.8402112747767492, "grad_norm": 0.7652617692947388, "learning_rate": 1.9639762430770432e-06, "loss": 0.0807, "step": 47444 }, { "epoch": 0.8402289843137776, "grad_norm": 0.422521710395813, "learning_rate": 1.9635506488618415e-06, "loss": 0.0467, "step": 47445 }, { "epoch": 0.840246693850806, "grad_norm": 0.898941695690155, "learning_rate": 1.9631250975352378e-06, "loss": 0.0664, "step": 47446 }, { "epoch": 0.8402644033878345, "grad_norm": 0.5350015759468079, "learning_rate": 1.962699589098634e-06, "loss": 0.0403, "step": 47447 }, { "epoch": 0.8402821129248629, "grad_norm": 0.6830578446388245, "learning_rate": 1.9622741235534285e-06, "loss": 0.0571, "step": 47448 }, { "epoch": 0.8402998224618913, "grad_norm": 0.43796759843826294, "learning_rate": 1.9618487009010275e-06, "loss": 0.0432, "step": 47449 }, { "epoch": 0.8403175319989197, "grad_norm": 0.8464094996452332, "learning_rate": 1.961423321142822e-06, "loss": 0.0374, "step": 47450 }, { "epoch": 0.8403352415359482, "grad_norm": 0.509741485118866, "learning_rate": 1.9609979842802127e-06, "loss": 0.0476, "step": 47451 }, { "epoch": 0.8403529510729766, "grad_norm": 0.5729888081550598, "learning_rate": 1.960572690314603e-06, "loss": 0.0639, "step": 47452 }, { "epoch": 0.840370660610005, "grad_norm": 0.3270511329174042, "learning_rate": 1.9601474392473915e-06, "loss": 0.0636, "step": 47453 }, { "epoch": 0.8403883701470334, "grad_norm": 0.48239654302597046, "learning_rate": 1.959722231079974e-06, "loss": 0.0428, "step": 47454 }, { "epoch": 0.8404060796840619, "grad_norm": 0.6274145245552063, "learning_rate": 1.959297065813752e-06, "loss": 0.0604, "step": 47455 }, { "epoch": 0.8404237892210903, "grad_norm": 0.5818071365356445, "learning_rate": 1.9588719434501285e-06, "loss": 0.0402, "step": 47456 }, { "epoch": 0.8404414987581187, "grad_norm": 0.8074269890785217, "learning_rate": 1.958446863990492e-06, "loss": 0.0616, "step": 47457 }, { "epoch": 0.8404592082951472, "grad_norm": 0.8113962411880493, "learning_rate": 1.958021827436246e-06, "loss": 0.0529, "step": 47458 }, { "epoch": 0.8404769178321756, "grad_norm": 0.7624372839927673, "learning_rate": 1.9575968337887935e-06, "loss": 0.042, "step": 47459 }, { "epoch": 0.840494627369204, "grad_norm": 0.633639931678772, "learning_rate": 1.957171883049521e-06, "loss": 0.0599, "step": 47460 }, { "epoch": 0.8405123369062324, "grad_norm": 0.5351409912109375, "learning_rate": 1.956746975219838e-06, "loss": 0.0576, "step": 47461 }, { "epoch": 0.8405300464432609, "grad_norm": 0.821750819683075, "learning_rate": 1.956322110301138e-06, "loss": 0.0552, "step": 47462 }, { "epoch": 0.8405477559802893, "grad_norm": 0.28518593311309814, "learning_rate": 1.9558972882948218e-06, "loss": 0.0336, "step": 47463 }, { "epoch": 0.8405654655173177, "grad_norm": 0.5186777114868164, "learning_rate": 1.955472509202281e-06, "loss": 0.0488, "step": 47464 }, { "epoch": 0.8405831750543461, "grad_norm": 0.9503164887428284, "learning_rate": 1.955047773024917e-06, "loss": 0.0417, "step": 47465 }, { "epoch": 0.8406008845913746, "grad_norm": 0.7606750130653381, "learning_rate": 1.9546230797641286e-06, "loss": 0.0749, "step": 47466 }, { "epoch": 0.840618594128403, "grad_norm": 0.22256389260292053, "learning_rate": 1.9541984294213074e-06, "loss": 0.0475, "step": 47467 }, { "epoch": 0.8406363036654314, "grad_norm": 0.38826000690460205, "learning_rate": 1.9537738219978557e-06, "loss": 0.0541, "step": 47468 }, { "epoch": 0.8406540132024598, "grad_norm": 0.29754945635795593, "learning_rate": 1.953349257495166e-06, "loss": 0.0374, "step": 47469 }, { "epoch": 0.8406717227394883, "grad_norm": 0.5946186184883118, "learning_rate": 1.952924735914639e-06, "loss": 0.0426, "step": 47470 }, { "epoch": 0.8406894322765167, "grad_norm": 0.32587626576423645, "learning_rate": 1.95250025725767e-06, "loss": 0.0441, "step": 47471 }, { "epoch": 0.8407071418135451, "grad_norm": 0.4442363381385803, "learning_rate": 1.952075821525656e-06, "loss": 0.0325, "step": 47472 }, { "epoch": 0.8407248513505736, "grad_norm": 0.4878526031970978, "learning_rate": 1.951651428719995e-06, "loss": 0.0522, "step": 47473 }, { "epoch": 0.840742560887602, "grad_norm": 0.5471884608268738, "learning_rate": 1.9512270788420785e-06, "loss": 0.0475, "step": 47474 }, { "epoch": 0.8407602704246304, "grad_norm": 0.6897568106651306, "learning_rate": 1.950802771893305e-06, "loss": 0.0653, "step": 47475 }, { "epoch": 0.8407779799616588, "grad_norm": 0.6635391116142273, "learning_rate": 1.95037850787507e-06, "loss": 0.053, "step": 47476 }, { "epoch": 0.8407956894986873, "grad_norm": 0.6137630939483643, "learning_rate": 1.9499542867887735e-06, "loss": 0.0656, "step": 47477 }, { "epoch": 0.8408133990357157, "grad_norm": 0.48011553287506104, "learning_rate": 1.9495301086358046e-06, "loss": 0.0464, "step": 47478 }, { "epoch": 0.8408311085727441, "grad_norm": 0.4056127071380615, "learning_rate": 1.9491059734175615e-06, "loss": 0.0507, "step": 47479 }, { "epoch": 0.8408488181097725, "grad_norm": 0.2887752950191498, "learning_rate": 1.9486818811354396e-06, "loss": 0.0375, "step": 47480 }, { "epoch": 0.840866527646801, "grad_norm": 0.9721641540527344, "learning_rate": 1.9482578317908345e-06, "loss": 0.0767, "step": 47481 }, { "epoch": 0.8408842371838294, "grad_norm": 0.5482745170593262, "learning_rate": 1.9478338253851404e-06, "loss": 0.0763, "step": 47482 }, { "epoch": 0.8409019467208578, "grad_norm": 0.5005280375480652, "learning_rate": 1.9474098619197523e-06, "loss": 0.0493, "step": 47483 }, { "epoch": 0.8409196562578862, "grad_norm": 0.31658223271369934, "learning_rate": 1.9469859413960695e-06, "loss": 0.0491, "step": 47484 }, { "epoch": 0.8409373657949147, "grad_norm": 0.6007890701293945, "learning_rate": 1.946562063815481e-06, "loss": 0.0448, "step": 47485 }, { "epoch": 0.8409550753319431, "grad_norm": 0.3995112180709839, "learning_rate": 1.9461382291793816e-06, "loss": 0.0541, "step": 47486 }, { "epoch": 0.8409727848689715, "grad_norm": 0.600379228591919, "learning_rate": 1.945714437489166e-06, "loss": 0.035, "step": 47487 }, { "epoch": 0.840990494406, "grad_norm": 0.7138223648071289, "learning_rate": 1.9452906887462347e-06, "loss": 0.0582, "step": 47488 }, { "epoch": 0.8410082039430284, "grad_norm": 0.585563063621521, "learning_rate": 1.9448669829519694e-06, "loss": 0.0659, "step": 47489 }, { "epoch": 0.8410259134800568, "grad_norm": 0.7141944169998169, "learning_rate": 1.9444433201077743e-06, "loss": 0.0688, "step": 47490 }, { "epoch": 0.8410436230170852, "grad_norm": 0.9928746819496155, "learning_rate": 1.9440197002150433e-06, "loss": 0.078, "step": 47491 }, { "epoch": 0.8410613325541137, "grad_norm": 0.6771631240844727, "learning_rate": 1.9435961232751637e-06, "loss": 0.0622, "step": 47492 }, { "epoch": 0.8410790420911421, "grad_norm": 0.6944609880447388, "learning_rate": 1.943172589289534e-06, "loss": 0.054, "step": 47493 }, { "epoch": 0.8410967516281705, "grad_norm": 0.2689250111579895, "learning_rate": 1.9427490982595437e-06, "loss": 0.0366, "step": 47494 }, { "epoch": 0.841114461165199, "grad_norm": 0.848794162273407, "learning_rate": 1.9423256501865926e-06, "loss": 0.0833, "step": 47495 }, { "epoch": 0.8411321707022275, "grad_norm": 0.5297275185585022, "learning_rate": 1.941902245072065e-06, "loss": 0.0315, "step": 47496 }, { "epoch": 0.8411498802392559, "grad_norm": 0.5720003247261047, "learning_rate": 1.94147888291736e-06, "loss": 0.0683, "step": 47497 }, { "epoch": 0.8411675897762843, "grad_norm": 0.4642488360404968, "learning_rate": 1.941055563723868e-06, "loss": 0.0547, "step": 47498 }, { "epoch": 0.8411852993133127, "grad_norm": 0.7366690635681152, "learning_rate": 1.9406322874929812e-06, "loss": 0.0948, "step": 47499 }, { "epoch": 0.8412030088503412, "grad_norm": 0.7349047064781189, "learning_rate": 1.940209054226095e-06, "loss": 0.0518, "step": 47500 }, { "epoch": 0.8412207183873696, "grad_norm": 0.7757663726806641, "learning_rate": 1.9397858639245997e-06, "loss": 0.0649, "step": 47501 }, { "epoch": 0.841238427924398, "grad_norm": 0.6587679386138916, "learning_rate": 1.9393627165898897e-06, "loss": 0.0492, "step": 47502 }, { "epoch": 0.8412561374614265, "grad_norm": 0.435606449842453, "learning_rate": 1.938939612223354e-06, "loss": 0.0449, "step": 47503 }, { "epoch": 0.8412738469984549, "grad_norm": 0.5009075403213501, "learning_rate": 1.9385165508263842e-06, "loss": 0.0529, "step": 47504 }, { "epoch": 0.8412915565354833, "grad_norm": 0.44335687160491943, "learning_rate": 1.938093532400378e-06, "loss": 0.0525, "step": 47505 }, { "epoch": 0.8413092660725117, "grad_norm": 0.7481673359870911, "learning_rate": 1.9376705569467207e-06, "loss": 0.086, "step": 47506 }, { "epoch": 0.8413269756095402, "grad_norm": 0.3578265309333801, "learning_rate": 1.9372476244668048e-06, "loss": 0.0309, "step": 47507 }, { "epoch": 0.8413446851465686, "grad_norm": 0.35280153155326843, "learning_rate": 1.936824734962024e-06, "loss": 0.045, "step": 47508 }, { "epoch": 0.841362394683597, "grad_norm": 0.5664500594139099, "learning_rate": 1.936401888433767e-06, "loss": 0.0564, "step": 47509 }, { "epoch": 0.8413801042206254, "grad_norm": 0.6175605654716492, "learning_rate": 1.935979084883428e-06, "loss": 0.0637, "step": 47510 }, { "epoch": 0.8413978137576539, "grad_norm": 0.6179115772247314, "learning_rate": 1.935556324312396e-06, "loss": 0.0469, "step": 47511 }, { "epoch": 0.8414155232946823, "grad_norm": 0.46016058325767517, "learning_rate": 1.9351336067220643e-06, "loss": 0.0469, "step": 47512 }, { "epoch": 0.8414332328317107, "grad_norm": 0.5355653762817383, "learning_rate": 1.9347109321138206e-06, "loss": 0.0425, "step": 47513 }, { "epoch": 0.8414509423687391, "grad_norm": 0.8123676776885986, "learning_rate": 1.9342883004890554e-06, "loss": 0.0843, "step": 47514 }, { "epoch": 0.8414686519057676, "grad_norm": 0.29848209023475647, "learning_rate": 1.9338657118491595e-06, "loss": 0.0356, "step": 47515 }, { "epoch": 0.841486361442796, "grad_norm": 0.4574749171733856, "learning_rate": 1.9334431661955275e-06, "loss": 0.0542, "step": 47516 }, { "epoch": 0.8415040709798244, "grad_norm": 0.7290122509002686, "learning_rate": 1.9330206635295426e-06, "loss": 0.0458, "step": 47517 }, { "epoch": 0.8415217805168529, "grad_norm": 0.6110768914222717, "learning_rate": 1.9325982038525953e-06, "loss": 0.0637, "step": 47518 }, { "epoch": 0.8415394900538813, "grad_norm": 0.7536449432373047, "learning_rate": 1.9321757871660806e-06, "loss": 0.0688, "step": 47519 }, { "epoch": 0.8415571995909097, "grad_norm": 0.5563398599624634, "learning_rate": 1.931753413471391e-06, "loss": 0.0385, "step": 47520 }, { "epoch": 0.8415749091279381, "grad_norm": 0.47007766366004944, "learning_rate": 1.9313310827699054e-06, "loss": 0.044, "step": 47521 }, { "epoch": 0.8415926186649666, "grad_norm": 0.9263397455215454, "learning_rate": 1.9309087950630195e-06, "loss": 0.0524, "step": 47522 }, { "epoch": 0.841610328201995, "grad_norm": 0.6010749340057373, "learning_rate": 1.9304865503521253e-06, "loss": 0.0651, "step": 47523 }, { "epoch": 0.8416280377390234, "grad_norm": 0.44928258657455444, "learning_rate": 1.930064348638605e-06, "loss": 0.03, "step": 47524 }, { "epoch": 0.8416457472760518, "grad_norm": 0.49971649050712585, "learning_rate": 1.929642189923851e-06, "loss": 0.0453, "step": 47525 }, { "epoch": 0.8416634568130803, "grad_norm": 1.0237077474594116, "learning_rate": 1.9292200742092534e-06, "loss": 0.0728, "step": 47526 }, { "epoch": 0.8416811663501087, "grad_norm": 0.5478154420852661, "learning_rate": 1.9287980014961986e-06, "loss": 0.0459, "step": 47527 }, { "epoch": 0.8416988758871371, "grad_norm": 0.7075037956237793, "learning_rate": 1.9283759717860764e-06, "loss": 0.063, "step": 47528 }, { "epoch": 0.8417165854241655, "grad_norm": 0.48297587037086487, "learning_rate": 1.927953985080275e-06, "loss": 0.0507, "step": 47529 }, { "epoch": 0.841734294961194, "grad_norm": 0.6234224438667297, "learning_rate": 1.9275320413801868e-06, "loss": 0.0699, "step": 47530 }, { "epoch": 0.8417520044982224, "grad_norm": 0.37417590618133545, "learning_rate": 1.9271101406871916e-06, "loss": 0.0432, "step": 47531 }, { "epoch": 0.8417697140352508, "grad_norm": 0.7190631628036499, "learning_rate": 1.9266882830026818e-06, "loss": 0.0551, "step": 47532 }, { "epoch": 0.8417874235722793, "grad_norm": 0.45088812708854675, "learning_rate": 1.9262664683280457e-06, "loss": 0.0615, "step": 47533 }, { "epoch": 0.8418051331093077, "grad_norm": 0.8432279229164124, "learning_rate": 1.925844696664674e-06, "loss": 0.0825, "step": 47534 }, { "epoch": 0.8418228426463361, "grad_norm": 0.588822603225708, "learning_rate": 1.9254229680139478e-06, "loss": 0.0364, "step": 47535 }, { "epoch": 0.8418405521833645, "grad_norm": 0.8175641298294067, "learning_rate": 1.9250012823772574e-06, "loss": 0.0707, "step": 47536 }, { "epoch": 0.841858261720393, "grad_norm": 0.8293092846870422, "learning_rate": 1.924579639755991e-06, "loss": 0.0501, "step": 47537 }, { "epoch": 0.8418759712574214, "grad_norm": 0.508806049823761, "learning_rate": 1.9241580401515333e-06, "loss": 0.0759, "step": 47538 }, { "epoch": 0.8418936807944498, "grad_norm": 0.2214963585138321, "learning_rate": 1.923736483565275e-06, "loss": 0.0354, "step": 47539 }, { "epoch": 0.8419113903314782, "grad_norm": 0.5804436802864075, "learning_rate": 1.923314969998599e-06, "loss": 0.0497, "step": 47540 }, { "epoch": 0.8419290998685067, "grad_norm": 0.7448198199272156, "learning_rate": 1.9228934994529e-06, "loss": 0.0546, "step": 47541 }, { "epoch": 0.8419468094055351, "grad_norm": 0.5643275380134583, "learning_rate": 1.9224720719295537e-06, "loss": 0.0572, "step": 47542 }, { "epoch": 0.8419645189425635, "grad_norm": 0.4681851863861084, "learning_rate": 1.9220506874299515e-06, "loss": 0.0355, "step": 47543 }, { "epoch": 0.8419822284795919, "grad_norm": 0.8046996593475342, "learning_rate": 1.9216293459554845e-06, "loss": 0.0684, "step": 47544 }, { "epoch": 0.8419999380166204, "grad_norm": 0.4134458601474762, "learning_rate": 1.92120804750753e-06, "loss": 0.0614, "step": 47545 }, { "epoch": 0.8420176475536488, "grad_norm": 0.4014345109462738, "learning_rate": 1.9207867920874784e-06, "loss": 0.0642, "step": 47546 }, { "epoch": 0.8420353570906772, "grad_norm": 0.6154193878173828, "learning_rate": 1.9203655796967117e-06, "loss": 0.0274, "step": 47547 }, { "epoch": 0.8420530666277057, "grad_norm": 0.6489955186843872, "learning_rate": 1.919944410336626e-06, "loss": 0.0436, "step": 47548 }, { "epoch": 0.8420707761647341, "grad_norm": 0.7328966856002808, "learning_rate": 1.9195232840085963e-06, "loss": 0.0454, "step": 47549 }, { "epoch": 0.8420884857017625, "grad_norm": 0.9486545324325562, "learning_rate": 1.919102200714012e-06, "loss": 0.0737, "step": 47550 }, { "epoch": 0.8421061952387909, "grad_norm": 0.525558590888977, "learning_rate": 1.918681160454263e-06, "loss": 0.0531, "step": 47551 }, { "epoch": 0.8421239047758194, "grad_norm": 0.6780611872673035, "learning_rate": 1.9182601632307258e-06, "loss": 0.0717, "step": 47552 }, { "epoch": 0.8421416143128478, "grad_norm": 0.5615195035934448, "learning_rate": 1.9178392090447897e-06, "loss": 0.0592, "step": 47553 }, { "epoch": 0.8421593238498762, "grad_norm": 0.4854772686958313, "learning_rate": 1.9174182978978382e-06, "loss": 0.0315, "step": 47554 }, { "epoch": 0.8421770333869046, "grad_norm": 0.8210334777832031, "learning_rate": 1.9169974297912596e-06, "loss": 0.0672, "step": 47555 }, { "epoch": 0.8421947429239331, "grad_norm": 0.47602182626724243, "learning_rate": 1.9165766047264343e-06, "loss": 0.0425, "step": 47556 }, { "epoch": 0.8422124524609615, "grad_norm": 0.6293623447418213, "learning_rate": 1.91615582270475e-06, "loss": 0.0559, "step": 47557 }, { "epoch": 0.84223016199799, "grad_norm": 0.34425678849220276, "learning_rate": 1.915735083727588e-06, "loss": 0.0557, "step": 47558 }, { "epoch": 0.8422478715350183, "grad_norm": 0.8504226803779602, "learning_rate": 1.9153143877963388e-06, "loss": 0.0759, "step": 47559 }, { "epoch": 0.8422655810720469, "grad_norm": 0.5198215246200562, "learning_rate": 1.9148937349123794e-06, "loss": 0.0585, "step": 47560 }, { "epoch": 0.8422832906090753, "grad_norm": 0.36251407861709595, "learning_rate": 1.914473125077095e-06, "loss": 0.0471, "step": 47561 }, { "epoch": 0.8423010001461037, "grad_norm": 0.49334993958473206, "learning_rate": 1.914052558291873e-06, "loss": 0.0476, "step": 47562 }, { "epoch": 0.8423187096831322, "grad_norm": 0.41740643978118896, "learning_rate": 1.9136320345580928e-06, "loss": 0.0421, "step": 47563 }, { "epoch": 0.8423364192201606, "grad_norm": 0.6054365634918213, "learning_rate": 1.9132115538771387e-06, "loss": 0.0485, "step": 47564 }, { "epoch": 0.842354128757189, "grad_norm": 0.6019647121429443, "learning_rate": 1.9127911162503953e-06, "loss": 0.053, "step": 47565 }, { "epoch": 0.8423718382942174, "grad_norm": 0.46715739369392395, "learning_rate": 1.9123707216792443e-06, "loss": 0.0487, "step": 47566 }, { "epoch": 0.8423895478312459, "grad_norm": 0.6225667595863342, "learning_rate": 1.911950370165071e-06, "loss": 0.0551, "step": 47567 }, { "epoch": 0.8424072573682743, "grad_norm": 0.43102797865867615, "learning_rate": 1.911530061709256e-06, "loss": 0.0525, "step": 47568 }, { "epoch": 0.8424249669053027, "grad_norm": 0.5780986547470093, "learning_rate": 1.911109796313189e-06, "loss": 0.0539, "step": 47569 }, { "epoch": 0.8424426764423311, "grad_norm": 0.5525123476982117, "learning_rate": 1.9106895739782417e-06, "loss": 0.0456, "step": 47570 }, { "epoch": 0.8424603859793596, "grad_norm": 0.33630427718162537, "learning_rate": 1.9102693947058024e-06, "loss": 0.0401, "step": 47571 }, { "epoch": 0.842478095516388, "grad_norm": 0.663842499256134, "learning_rate": 1.9098492584972525e-06, "loss": 0.0667, "step": 47572 }, { "epoch": 0.8424958050534164, "grad_norm": 0.1554577350616455, "learning_rate": 1.9094291653539773e-06, "loss": 0.0322, "step": 47573 }, { "epoch": 0.8425135145904448, "grad_norm": 0.7448365688323975, "learning_rate": 1.909009115277354e-06, "loss": 0.054, "step": 47574 }, { "epoch": 0.8425312241274733, "grad_norm": 0.8758299946784973, "learning_rate": 1.9085891082687666e-06, "loss": 0.0738, "step": 47575 }, { "epoch": 0.8425489336645017, "grad_norm": 0.6847776174545288, "learning_rate": 1.908169144329597e-06, "loss": 0.0577, "step": 47576 }, { "epoch": 0.8425666432015301, "grad_norm": 0.7043435573577881, "learning_rate": 1.9077492234612263e-06, "loss": 0.0547, "step": 47577 }, { "epoch": 0.8425843527385586, "grad_norm": 0.6665000319480896, "learning_rate": 1.9073293456650365e-06, "loss": 0.0517, "step": 47578 }, { "epoch": 0.842602062275587, "grad_norm": 0.79825758934021, "learning_rate": 1.9069095109424079e-06, "loss": 0.064, "step": 47579 }, { "epoch": 0.8426197718126154, "grad_norm": 0.81414794921875, "learning_rate": 1.9064897192947278e-06, "loss": 0.0587, "step": 47580 }, { "epoch": 0.8426374813496438, "grad_norm": 0.8857842683792114, "learning_rate": 1.906069970723367e-06, "loss": 0.0427, "step": 47581 }, { "epoch": 0.8426551908866723, "grad_norm": 0.3283874988555908, "learning_rate": 1.9056502652297125e-06, "loss": 0.0518, "step": 47582 }, { "epoch": 0.8426729004237007, "grad_norm": 0.8217394351959229, "learning_rate": 1.9052306028151483e-06, "loss": 0.0848, "step": 47583 }, { "epoch": 0.8426906099607291, "grad_norm": 1.1146806478500366, "learning_rate": 1.9048109834810434e-06, "loss": 0.0803, "step": 47584 }, { "epoch": 0.8427083194977575, "grad_norm": 0.6306638121604919, "learning_rate": 1.90439140722879e-06, "loss": 0.0534, "step": 47585 }, { "epoch": 0.842726029034786, "grad_norm": 1.3866926431655884, "learning_rate": 1.9039718740597634e-06, "loss": 0.1027, "step": 47586 }, { "epoch": 0.8427437385718144, "grad_norm": 0.2997930645942688, "learning_rate": 1.9035523839753494e-06, "loss": 0.0335, "step": 47587 }, { "epoch": 0.8427614481088428, "grad_norm": 0.8938482403755188, "learning_rate": 1.9031329369769185e-06, "loss": 0.0519, "step": 47588 }, { "epoch": 0.8427791576458712, "grad_norm": 0.6871820092201233, "learning_rate": 1.9027135330658563e-06, "loss": 0.071, "step": 47589 }, { "epoch": 0.8427968671828997, "grad_norm": 0.35949772596359253, "learning_rate": 1.9022941722435417e-06, "loss": 0.0233, "step": 47590 }, { "epoch": 0.8428145767199281, "grad_norm": 0.39004021883010864, "learning_rate": 1.9018748545113585e-06, "loss": 0.055, "step": 47591 }, { "epoch": 0.8428322862569565, "grad_norm": 0.5092554092407227, "learning_rate": 1.901455579870679e-06, "loss": 0.0431, "step": 47592 }, { "epoch": 0.842849995793985, "grad_norm": 0.7540440559387207, "learning_rate": 1.9010363483228854e-06, "loss": 0.0609, "step": 47593 }, { "epoch": 0.8428677053310134, "grad_norm": 0.5410040020942688, "learning_rate": 1.9006171598693567e-06, "loss": 0.0646, "step": 47594 }, { "epoch": 0.8428854148680418, "grad_norm": 0.5751733779907227, "learning_rate": 1.9001980145114733e-06, "loss": 0.0613, "step": 47595 }, { "epoch": 0.8429031244050702, "grad_norm": 0.5878022313117981, "learning_rate": 1.8997789122506126e-06, "loss": 0.0581, "step": 47596 }, { "epoch": 0.8429208339420987, "grad_norm": 0.5532020926475525, "learning_rate": 1.899359853088155e-06, "loss": 0.0476, "step": 47597 }, { "epoch": 0.8429385434791271, "grad_norm": 0.8773460388183594, "learning_rate": 1.8989408370254812e-06, "loss": 0.0632, "step": 47598 }, { "epoch": 0.8429562530161555, "grad_norm": 0.7096560001373291, "learning_rate": 1.8985218640639652e-06, "loss": 0.0539, "step": 47599 }, { "epoch": 0.8429739625531839, "grad_norm": 0.4961376488208771, "learning_rate": 1.8981029342049855e-06, "loss": 0.0605, "step": 47600 }, { "epoch": 0.8429916720902124, "grad_norm": 0.4499964416027069, "learning_rate": 1.8976840474499262e-06, "loss": 0.069, "step": 47601 }, { "epoch": 0.8430093816272408, "grad_norm": 0.5287700295448303, "learning_rate": 1.8972652038001564e-06, "loss": 0.0831, "step": 47602 }, { "epoch": 0.8430270911642692, "grad_norm": 1.0868901014328003, "learning_rate": 1.8968464032570598e-06, "loss": 0.0664, "step": 47603 }, { "epoch": 0.8430448007012976, "grad_norm": 0.43255749344825745, "learning_rate": 1.8964276458220137e-06, "loss": 0.0562, "step": 47604 }, { "epoch": 0.8430625102383261, "grad_norm": 0.40054625272750854, "learning_rate": 1.8960089314963952e-06, "loss": 0.0629, "step": 47605 }, { "epoch": 0.8430802197753545, "grad_norm": 0.5590465664863586, "learning_rate": 1.8955902602815816e-06, "loss": 0.0626, "step": 47606 }, { "epoch": 0.8430979293123829, "grad_norm": 0.5663366317749023, "learning_rate": 1.8951716321789502e-06, "loss": 0.0897, "step": 47607 }, { "epoch": 0.8431156388494114, "grad_norm": 0.9120917320251465, "learning_rate": 1.8947530471898833e-06, "loss": 0.0716, "step": 47608 }, { "epoch": 0.8431333483864398, "grad_norm": 0.5445009469985962, "learning_rate": 1.8943345053157497e-06, "loss": 0.041, "step": 47609 }, { "epoch": 0.8431510579234682, "grad_norm": 0.8508191704750061, "learning_rate": 1.8939160065579281e-06, "loss": 0.0682, "step": 47610 }, { "epoch": 0.8431687674604966, "grad_norm": 0.5914254188537598, "learning_rate": 1.8934975509177993e-06, "loss": 0.0833, "step": 47611 }, { "epoch": 0.8431864769975251, "grad_norm": 0.5745458602905273, "learning_rate": 1.8930791383967405e-06, "loss": 0.0534, "step": 47612 }, { "epoch": 0.8432041865345535, "grad_norm": 0.9895949959754944, "learning_rate": 1.8926607689961189e-06, "loss": 0.0547, "step": 47613 }, { "epoch": 0.8432218960715819, "grad_norm": 0.5782217383384705, "learning_rate": 1.8922424427173218e-06, "loss": 0.0762, "step": 47614 }, { "epoch": 0.8432396056086103, "grad_norm": 0.7690711617469788, "learning_rate": 1.8918241595617231e-06, "loss": 0.0727, "step": 47615 }, { "epoch": 0.8432573151456388, "grad_norm": 1.0502004623413086, "learning_rate": 1.891405919530695e-06, "loss": 0.0645, "step": 47616 }, { "epoch": 0.8432750246826672, "grad_norm": 0.43966248631477356, "learning_rate": 1.8909877226256144e-06, "loss": 0.0402, "step": 47617 }, { "epoch": 0.8432927342196956, "grad_norm": 0.6238791346549988, "learning_rate": 1.8905695688478591e-06, "loss": 0.0878, "step": 47618 }, { "epoch": 0.843310443756724, "grad_norm": 0.19712162017822266, "learning_rate": 1.8901514581988078e-06, "loss": 0.0417, "step": 47619 }, { "epoch": 0.8433281532937525, "grad_norm": 0.4193562865257263, "learning_rate": 1.8897333906798292e-06, "loss": 0.0312, "step": 47620 }, { "epoch": 0.843345862830781, "grad_norm": 0.5659565329551697, "learning_rate": 1.8893153662923008e-06, "loss": 0.0559, "step": 47621 }, { "epoch": 0.8433635723678093, "grad_norm": 0.6813003420829773, "learning_rate": 1.8888973850375979e-06, "loss": 0.0644, "step": 47622 }, { "epoch": 0.8433812819048379, "grad_norm": 0.4792899191379547, "learning_rate": 1.888479446917098e-06, "loss": 0.0372, "step": 47623 }, { "epoch": 0.8433989914418663, "grad_norm": 0.7999947667121887, "learning_rate": 1.8880615519321747e-06, "loss": 0.0609, "step": 47624 }, { "epoch": 0.8434167009788947, "grad_norm": 0.48931267857551575, "learning_rate": 1.8876437000842006e-06, "loss": 0.0408, "step": 47625 }, { "epoch": 0.843434410515923, "grad_norm": 0.7656846046447754, "learning_rate": 1.8872258913745577e-06, "loss": 0.0572, "step": 47626 }, { "epoch": 0.8434521200529516, "grad_norm": 0.443374365568161, "learning_rate": 1.8868081258046117e-06, "loss": 0.0458, "step": 47627 }, { "epoch": 0.84346982958998, "grad_norm": 0.5323801040649414, "learning_rate": 1.8863904033757411e-06, "loss": 0.0568, "step": 47628 }, { "epoch": 0.8434875391270084, "grad_norm": 0.6187138557434082, "learning_rate": 1.8859727240893188e-06, "loss": 0.0215, "step": 47629 }, { "epoch": 0.8435052486640368, "grad_norm": 0.5418287515640259, "learning_rate": 1.8855550879467232e-06, "loss": 0.0528, "step": 47630 }, { "epoch": 0.8435229582010653, "grad_norm": 0.5896788835525513, "learning_rate": 1.8851374949493217e-06, "loss": 0.0627, "step": 47631 }, { "epoch": 0.8435406677380937, "grad_norm": 0.4430278241634369, "learning_rate": 1.8847199450984915e-06, "loss": 0.065, "step": 47632 }, { "epoch": 0.8435583772751221, "grad_norm": 0.6275510787963867, "learning_rate": 1.8843024383956064e-06, "loss": 0.0668, "step": 47633 }, { "epoch": 0.8435760868121505, "grad_norm": 0.6909874081611633, "learning_rate": 1.8838849748420388e-06, "loss": 0.0726, "step": 47634 }, { "epoch": 0.843593796349179, "grad_norm": 1.2447649240493774, "learning_rate": 1.8834675544391644e-06, "loss": 0.0572, "step": 47635 }, { "epoch": 0.8436115058862074, "grad_norm": 0.4950539171695709, "learning_rate": 1.8830501771883552e-06, "loss": 0.0322, "step": 47636 }, { "epoch": 0.8436292154232358, "grad_norm": 0.47208184003829956, "learning_rate": 1.8826328430909868e-06, "loss": 0.043, "step": 47637 }, { "epoch": 0.8436469249602643, "grad_norm": 1.0570026636123657, "learning_rate": 1.8822155521484263e-06, "loss": 0.0695, "step": 47638 }, { "epoch": 0.8436646344972927, "grad_norm": 0.6176441311836243, "learning_rate": 1.8817983043620495e-06, "loss": 0.0464, "step": 47639 }, { "epoch": 0.8436823440343211, "grad_norm": 0.7004237174987793, "learning_rate": 1.881381099733232e-06, "loss": 0.0571, "step": 47640 }, { "epoch": 0.8437000535713495, "grad_norm": 0.8190111517906189, "learning_rate": 1.8809639382633425e-06, "loss": 0.0631, "step": 47641 }, { "epoch": 0.843717763108378, "grad_norm": 0.5396704077720642, "learning_rate": 1.8805468199537501e-06, "loss": 0.0289, "step": 47642 }, { "epoch": 0.8437354726454064, "grad_norm": 0.11745922267436981, "learning_rate": 1.8801297448058353e-06, "loss": 0.0333, "step": 47643 }, { "epoch": 0.8437531821824348, "grad_norm": 0.6961464285850525, "learning_rate": 1.87971271282097e-06, "loss": 0.0642, "step": 47644 }, { "epoch": 0.8437708917194632, "grad_norm": 0.3997003436088562, "learning_rate": 1.8792957240005204e-06, "loss": 0.0511, "step": 47645 }, { "epoch": 0.8437886012564917, "grad_norm": 0.6756280064582825, "learning_rate": 1.8788787783458599e-06, "loss": 0.055, "step": 47646 }, { "epoch": 0.8438063107935201, "grad_norm": 0.7644757032394409, "learning_rate": 1.8784618758583643e-06, "loss": 0.0479, "step": 47647 }, { "epoch": 0.8438240203305485, "grad_norm": 1.1854090690612793, "learning_rate": 1.8780450165393974e-06, "loss": 0.0705, "step": 47648 }, { "epoch": 0.8438417298675769, "grad_norm": 0.9707501530647278, "learning_rate": 1.8776282003903368e-06, "loss": 0.04, "step": 47649 }, { "epoch": 0.8438594394046054, "grad_norm": 0.34050050377845764, "learning_rate": 1.8772114274125523e-06, "loss": 0.0612, "step": 47650 }, { "epoch": 0.8438771489416338, "grad_norm": 0.5832022428512573, "learning_rate": 1.8767946976074153e-06, "loss": 0.0578, "step": 47651 }, { "epoch": 0.8438948584786622, "grad_norm": 0.5890021324157715, "learning_rate": 1.8763780109762957e-06, "loss": 0.0697, "step": 47652 }, { "epoch": 0.8439125680156907, "grad_norm": 0.6620262265205383, "learning_rate": 1.8759613675205645e-06, "loss": 0.0456, "step": 47653 }, { "epoch": 0.8439302775527191, "grad_norm": 0.587842583656311, "learning_rate": 1.875544767241597e-06, "loss": 0.0574, "step": 47654 }, { "epoch": 0.8439479870897475, "grad_norm": 0.32891759276390076, "learning_rate": 1.8751282101407558e-06, "loss": 0.0479, "step": 47655 }, { "epoch": 0.8439656966267759, "grad_norm": 0.4034481942653656, "learning_rate": 1.8747116962194162e-06, "loss": 0.0604, "step": 47656 }, { "epoch": 0.8439834061638044, "grad_norm": 0.18535159528255463, "learning_rate": 1.8742952254789469e-06, "loss": 0.0225, "step": 47657 }, { "epoch": 0.8440011157008328, "grad_norm": 0.43615609407424927, "learning_rate": 1.8738787979207223e-06, "loss": 0.0512, "step": 47658 }, { "epoch": 0.8440188252378612, "grad_norm": 0.5821566581726074, "learning_rate": 1.873462413546106e-06, "loss": 0.0406, "step": 47659 }, { "epoch": 0.8440365347748896, "grad_norm": 0.8307154178619385, "learning_rate": 1.8730460723564702e-06, "loss": 0.0751, "step": 47660 }, { "epoch": 0.8440542443119181, "grad_norm": 0.5640813708305359, "learning_rate": 1.8726297743531855e-06, "loss": 0.048, "step": 47661 }, { "epoch": 0.8440719538489465, "grad_norm": 0.3617265522480011, "learning_rate": 1.8722135195376195e-06, "loss": 0.0486, "step": 47662 }, { "epoch": 0.8440896633859749, "grad_norm": 0.8061432838439941, "learning_rate": 1.8717973079111456e-06, "loss": 0.0466, "step": 47663 }, { "epoch": 0.8441073729230033, "grad_norm": 0.7529850006103516, "learning_rate": 1.8713811394751297e-06, "loss": 0.0734, "step": 47664 }, { "epoch": 0.8441250824600318, "grad_norm": 0.46365031599998474, "learning_rate": 1.8709650142309436e-06, "loss": 0.049, "step": 47665 }, { "epoch": 0.8441427919970602, "grad_norm": 0.3340262770652771, "learning_rate": 1.8705489321799534e-06, "loss": 0.0811, "step": 47666 }, { "epoch": 0.8441605015340886, "grad_norm": 0.6808013319969177, "learning_rate": 1.8701328933235296e-06, "loss": 0.036, "step": 47667 }, { "epoch": 0.8441782110711171, "grad_norm": 0.455764502286911, "learning_rate": 1.869716897663039e-06, "loss": 0.0464, "step": 47668 }, { "epoch": 0.8441959206081455, "grad_norm": 0.6078721880912781, "learning_rate": 1.8693009451998561e-06, "loss": 0.0471, "step": 47669 }, { "epoch": 0.8442136301451739, "grad_norm": 0.8603368401527405, "learning_rate": 1.868885035935341e-06, "loss": 0.0568, "step": 47670 }, { "epoch": 0.8442313396822023, "grad_norm": 0.5149073004722595, "learning_rate": 1.8684691698708628e-06, "loss": 0.0591, "step": 47671 }, { "epoch": 0.8442490492192308, "grad_norm": 0.6801449060440063, "learning_rate": 1.8680533470078003e-06, "loss": 0.0496, "step": 47672 }, { "epoch": 0.8442667587562592, "grad_norm": 0.8336951732635498, "learning_rate": 1.867637567347511e-06, "loss": 0.0874, "step": 47673 }, { "epoch": 0.8442844682932876, "grad_norm": 0.4311879277229309, "learning_rate": 1.8672218308913636e-06, "loss": 0.0275, "step": 47674 }, { "epoch": 0.844302177830316, "grad_norm": 0.44087332487106323, "learning_rate": 1.8668061376407302e-06, "loss": 0.0429, "step": 47675 }, { "epoch": 0.8443198873673445, "grad_norm": 0.8614634871482849, "learning_rate": 1.8663904875969784e-06, "loss": 0.0677, "step": 47676 }, { "epoch": 0.8443375969043729, "grad_norm": 0.11691480129957199, "learning_rate": 1.8659748807614718e-06, "loss": 0.0507, "step": 47677 }, { "epoch": 0.8443553064414013, "grad_norm": 0.46042105555534363, "learning_rate": 1.8655593171355779e-06, "loss": 0.0386, "step": 47678 }, { "epoch": 0.8443730159784297, "grad_norm": 0.46664759516716003, "learning_rate": 1.8651437967206654e-06, "loss": 0.0404, "step": 47679 }, { "epoch": 0.8443907255154582, "grad_norm": 0.4510982632637024, "learning_rate": 1.8647283195181015e-06, "loss": 0.0435, "step": 47680 }, { "epoch": 0.8444084350524866, "grad_norm": 0.48840758204460144, "learning_rate": 1.8643128855292534e-06, "loss": 0.0418, "step": 47681 }, { "epoch": 0.844426144589515, "grad_norm": 0.6373915076255798, "learning_rate": 1.8638974947554872e-06, "loss": 0.033, "step": 47682 }, { "epoch": 0.8444438541265435, "grad_norm": 0.30108851194381714, "learning_rate": 1.8634821471981728e-06, "loss": 0.0474, "step": 47683 }, { "epoch": 0.844461563663572, "grad_norm": 0.7273203134536743, "learning_rate": 1.8630668428586695e-06, "loss": 0.0553, "step": 47684 }, { "epoch": 0.8444792732006003, "grad_norm": 0.855771005153656, "learning_rate": 1.8626515817383478e-06, "loss": 0.0467, "step": 47685 }, { "epoch": 0.8444969827376287, "grad_norm": 0.6386040449142456, "learning_rate": 1.862236363838578e-06, "loss": 0.0532, "step": 47686 }, { "epoch": 0.8445146922746573, "grad_norm": 0.5327717065811157, "learning_rate": 1.861821189160718e-06, "loss": 0.061, "step": 47687 }, { "epoch": 0.8445324018116857, "grad_norm": 0.3940850794315338, "learning_rate": 1.8614060577061376e-06, "loss": 0.0419, "step": 47688 }, { "epoch": 0.844550111348714, "grad_norm": 0.4058830440044403, "learning_rate": 1.860990969476201e-06, "loss": 0.0478, "step": 47689 }, { "epoch": 0.8445678208857424, "grad_norm": 0.6213974952697754, "learning_rate": 1.8605759244722759e-06, "loss": 0.0419, "step": 47690 }, { "epoch": 0.844585530422771, "grad_norm": 0.647852897644043, "learning_rate": 1.8601609226957271e-06, "loss": 0.0512, "step": 47691 }, { "epoch": 0.8446032399597994, "grad_norm": 0.44833046197891235, "learning_rate": 1.8597459641479208e-06, "loss": 0.0729, "step": 47692 }, { "epoch": 0.8446209494968278, "grad_norm": 0.7216818928718567, "learning_rate": 1.8593310488302207e-06, "loss": 0.0521, "step": 47693 }, { "epoch": 0.8446386590338562, "grad_norm": 0.9786716103553772, "learning_rate": 1.8589161767439955e-06, "loss": 0.0585, "step": 47694 }, { "epoch": 0.8446563685708847, "grad_norm": 1.1274943351745605, "learning_rate": 1.8585013478906026e-06, "loss": 0.0808, "step": 47695 }, { "epoch": 0.8446740781079131, "grad_norm": 0.6816734671592712, "learning_rate": 1.8580865622714127e-06, "loss": 0.0697, "step": 47696 }, { "epoch": 0.8446917876449415, "grad_norm": 0.45347529649734497, "learning_rate": 1.8576718198877913e-06, "loss": 0.0566, "step": 47697 }, { "epoch": 0.84470949718197, "grad_norm": 0.6000324487686157, "learning_rate": 1.8572571207410972e-06, "loss": 0.0584, "step": 47698 }, { "epoch": 0.8447272067189984, "grad_norm": 0.6727874279022217, "learning_rate": 1.8568424648326976e-06, "loss": 0.0868, "step": 47699 }, { "epoch": 0.8447449162560268, "grad_norm": 0.4914003610610962, "learning_rate": 1.8564278521639533e-06, "loss": 0.0545, "step": 47700 }, { "epoch": 0.8447626257930552, "grad_norm": 0.6635639071464539, "learning_rate": 1.8560132827362398e-06, "loss": 0.0532, "step": 47701 }, { "epoch": 0.8447803353300837, "grad_norm": 0.8393489122390747, "learning_rate": 1.8555987565509092e-06, "loss": 0.0587, "step": 47702 }, { "epoch": 0.8447980448671121, "grad_norm": 0.5115969181060791, "learning_rate": 1.8551842736093289e-06, "loss": 0.0264, "step": 47703 }, { "epoch": 0.8448157544041405, "grad_norm": 0.8912818431854248, "learning_rate": 1.854769833912866e-06, "loss": 0.0622, "step": 47704 }, { "epoch": 0.8448334639411689, "grad_norm": 0.5895289182662964, "learning_rate": 1.8543554374628779e-06, "loss": 0.0806, "step": 47705 }, { "epoch": 0.8448511734781974, "grad_norm": 1.1951631307601929, "learning_rate": 1.8539410842607302e-06, "loss": 0.0288, "step": 47706 }, { "epoch": 0.8448688830152258, "grad_norm": 0.47400450706481934, "learning_rate": 1.8535267743077883e-06, "loss": 0.0275, "step": 47707 }, { "epoch": 0.8448865925522542, "grad_norm": 0.35410159826278687, "learning_rate": 1.8531125076054128e-06, "loss": 0.0469, "step": 47708 }, { "epoch": 0.8449043020892826, "grad_norm": 0.6510490775108337, "learning_rate": 1.8526982841549661e-06, "loss": 0.045, "step": 47709 }, { "epoch": 0.8449220116263111, "grad_norm": 0.4043373465538025, "learning_rate": 1.8522841039578137e-06, "loss": 0.0613, "step": 47710 }, { "epoch": 0.8449397211633395, "grad_norm": 0.4283216893672943, "learning_rate": 1.8518699670153194e-06, "loss": 0.0542, "step": 47711 }, { "epoch": 0.8449574307003679, "grad_norm": 0.6871371269226074, "learning_rate": 1.8514558733288405e-06, "loss": 0.0555, "step": 47712 }, { "epoch": 0.8449751402373964, "grad_norm": 0.49622631072998047, "learning_rate": 1.851041822899741e-06, "loss": 0.0408, "step": 47713 }, { "epoch": 0.8449928497744248, "grad_norm": 0.32051169872283936, "learning_rate": 1.8506278157293848e-06, "loss": 0.0408, "step": 47714 }, { "epoch": 0.8450105593114532, "grad_norm": 0.3778189718723297, "learning_rate": 1.8502138518191357e-06, "loss": 0.0294, "step": 47715 }, { "epoch": 0.8450282688484816, "grad_norm": 0.5471248030662537, "learning_rate": 1.8497999311703512e-06, "loss": 0.0703, "step": 47716 }, { "epoch": 0.8450459783855101, "grad_norm": 0.8163697719573975, "learning_rate": 1.8493860537843931e-06, "loss": 0.0756, "step": 47717 }, { "epoch": 0.8450636879225385, "grad_norm": 0.2543172240257263, "learning_rate": 1.8489722196626257e-06, "loss": 0.0336, "step": 47718 }, { "epoch": 0.8450813974595669, "grad_norm": 0.6145884990692139, "learning_rate": 1.8485584288064094e-06, "loss": 0.0341, "step": 47719 }, { "epoch": 0.8450991069965953, "grad_norm": 0.5983520150184631, "learning_rate": 1.8481446812171048e-06, "loss": 0.072, "step": 47720 }, { "epoch": 0.8451168165336238, "grad_norm": 0.8615394830703735, "learning_rate": 1.847730976896076e-06, "loss": 0.0742, "step": 47721 }, { "epoch": 0.8451345260706522, "grad_norm": 0.5791059136390686, "learning_rate": 1.8473173158446832e-06, "loss": 0.0767, "step": 47722 }, { "epoch": 0.8451522356076806, "grad_norm": 0.3697643578052521, "learning_rate": 1.846903698064284e-06, "loss": 0.0363, "step": 47723 }, { "epoch": 0.845169945144709, "grad_norm": 0.9241374731063843, "learning_rate": 1.8464901235562403e-06, "loss": 0.0667, "step": 47724 }, { "epoch": 0.8451876546817375, "grad_norm": 0.6469677090644836, "learning_rate": 1.846076592321918e-06, "loss": 0.0319, "step": 47725 }, { "epoch": 0.8452053642187659, "grad_norm": 0.5796996355056763, "learning_rate": 1.8456631043626692e-06, "loss": 0.0674, "step": 47726 }, { "epoch": 0.8452230737557943, "grad_norm": 0.7420333027839661, "learning_rate": 1.8452496596798595e-06, "loss": 0.0407, "step": 47727 }, { "epoch": 0.8452407832928228, "grad_norm": 0.4900619685649872, "learning_rate": 1.8448362582748462e-06, "loss": 0.0487, "step": 47728 }, { "epoch": 0.8452584928298512, "grad_norm": 0.5614699125289917, "learning_rate": 1.8444229001489932e-06, "loss": 0.0672, "step": 47729 }, { "epoch": 0.8452762023668796, "grad_norm": 0.6924821734428406, "learning_rate": 1.844009585303656e-06, "loss": 0.0466, "step": 47730 }, { "epoch": 0.845293911903908, "grad_norm": 0.573337972164154, "learning_rate": 1.8435963137401985e-06, "loss": 0.0648, "step": 47731 }, { "epoch": 0.8453116214409365, "grad_norm": 0.3159039616584778, "learning_rate": 1.843183085459978e-06, "loss": 0.0433, "step": 47732 }, { "epoch": 0.8453293309779649, "grad_norm": 0.5948353409767151, "learning_rate": 1.8427699004643566e-06, "loss": 0.0653, "step": 47733 }, { "epoch": 0.8453470405149933, "grad_norm": 0.5283092260360718, "learning_rate": 1.8423567587546885e-06, "loss": 0.0566, "step": 47734 }, { "epoch": 0.8453647500520217, "grad_norm": 0.12728868424892426, "learning_rate": 1.8419436603323358e-06, "loss": 0.0332, "step": 47735 }, { "epoch": 0.8453824595890502, "grad_norm": 0.6755263805389404, "learning_rate": 1.8415306051986625e-06, "loss": 0.0588, "step": 47736 }, { "epoch": 0.8454001691260786, "grad_norm": 0.838323175907135, "learning_rate": 1.841117593355014e-06, "loss": 0.0804, "step": 47737 }, { "epoch": 0.845417878663107, "grad_norm": 0.7650136351585388, "learning_rate": 1.8407046248027626e-06, "loss": 0.0596, "step": 47738 }, { "epoch": 0.8454355882001354, "grad_norm": 0.42570072412490845, "learning_rate": 1.8402916995432605e-06, "loss": 0.0558, "step": 47739 }, { "epoch": 0.8454532977371639, "grad_norm": 1.1043245792388916, "learning_rate": 1.839878817577872e-06, "loss": 0.0689, "step": 47740 }, { "epoch": 0.8454710072741923, "grad_norm": 0.28320834040641785, "learning_rate": 1.839465978907947e-06, "loss": 0.0413, "step": 47741 }, { "epoch": 0.8454887168112207, "grad_norm": 0.6081019639968872, "learning_rate": 1.8390531835348484e-06, "loss": 0.0456, "step": 47742 }, { "epoch": 0.8455064263482492, "grad_norm": 0.3540091812610626, "learning_rate": 1.8386404314599364e-06, "loss": 0.0277, "step": 47743 }, { "epoch": 0.8455241358852776, "grad_norm": 0.6943643689155579, "learning_rate": 1.8382277226845634e-06, "loss": 0.0537, "step": 47744 }, { "epoch": 0.845541845422306, "grad_norm": 0.7549731135368347, "learning_rate": 1.8378150572100899e-06, "loss": 0.0451, "step": 47745 }, { "epoch": 0.8455595549593344, "grad_norm": 0.6520525813102722, "learning_rate": 1.8374024350378749e-06, "loss": 0.0495, "step": 47746 }, { "epoch": 0.845577264496363, "grad_norm": 0.5654156804084778, "learning_rate": 1.8369898561692721e-06, "loss": 0.0627, "step": 47747 }, { "epoch": 0.8455949740333913, "grad_norm": 0.5281041264533997, "learning_rate": 1.8365773206056423e-06, "loss": 0.0391, "step": 47748 }, { "epoch": 0.8456126835704197, "grad_norm": 0.6531608700752258, "learning_rate": 1.836164828348343e-06, "loss": 0.0555, "step": 47749 }, { "epoch": 0.8456303931074481, "grad_norm": 0.6295016407966614, "learning_rate": 1.8357523793987325e-06, "loss": 0.0587, "step": 47750 }, { "epoch": 0.8456481026444767, "grad_norm": 0.8232396245002747, "learning_rate": 1.8353399737581617e-06, "loss": 0.0711, "step": 47751 }, { "epoch": 0.845665812181505, "grad_norm": 0.26873359084129333, "learning_rate": 1.8349276114279895e-06, "loss": 0.0348, "step": 47752 }, { "epoch": 0.8456835217185334, "grad_norm": 0.5743030309677124, "learning_rate": 1.834515292409575e-06, "loss": 0.0504, "step": 47753 }, { "epoch": 0.8457012312555618, "grad_norm": 0.7155440449714661, "learning_rate": 1.8341030167042783e-06, "loss": 0.0532, "step": 47754 }, { "epoch": 0.8457189407925904, "grad_norm": 0.647776186466217, "learning_rate": 1.8336907843134453e-06, "loss": 0.0681, "step": 47755 }, { "epoch": 0.8457366503296188, "grad_norm": 0.4893513023853302, "learning_rate": 1.83327859523844e-06, "loss": 0.0594, "step": 47756 }, { "epoch": 0.8457543598666472, "grad_norm": 0.4718047082424164, "learning_rate": 1.8328664494806148e-06, "loss": 0.0617, "step": 47757 }, { "epoch": 0.8457720694036757, "grad_norm": 0.4504542350769043, "learning_rate": 1.832454347041328e-06, "loss": 0.0341, "step": 47758 }, { "epoch": 0.8457897789407041, "grad_norm": 0.41753366589546204, "learning_rate": 1.832042287921934e-06, "loss": 0.0488, "step": 47759 }, { "epoch": 0.8458074884777325, "grad_norm": 0.7391805052757263, "learning_rate": 1.83163027212379e-06, "loss": 0.0687, "step": 47760 }, { "epoch": 0.8458251980147609, "grad_norm": 0.6045085787773132, "learning_rate": 1.8312182996482529e-06, "loss": 0.0863, "step": 47761 }, { "epoch": 0.8458429075517894, "grad_norm": 0.35059836506843567, "learning_rate": 1.830806370496672e-06, "loss": 0.062, "step": 47762 }, { "epoch": 0.8458606170888178, "grad_norm": 0.5394863486289978, "learning_rate": 1.830394484670408e-06, "loss": 0.0614, "step": 47763 }, { "epoch": 0.8458783266258462, "grad_norm": 1.0405844449996948, "learning_rate": 1.8299826421708126e-06, "loss": 0.0593, "step": 47764 }, { "epoch": 0.8458960361628746, "grad_norm": 0.29394227266311646, "learning_rate": 1.8295708429992453e-06, "loss": 0.0644, "step": 47765 }, { "epoch": 0.8459137456999031, "grad_norm": 0.7324546575546265, "learning_rate": 1.829159087157053e-06, "loss": 0.0619, "step": 47766 }, { "epoch": 0.8459314552369315, "grad_norm": 0.36510413885116577, "learning_rate": 1.8287473746455963e-06, "loss": 0.0786, "step": 47767 }, { "epoch": 0.8459491647739599, "grad_norm": 0.5585154294967651, "learning_rate": 1.8283357054662325e-06, "loss": 0.0787, "step": 47768 }, { "epoch": 0.8459668743109883, "grad_norm": 0.7436206936836243, "learning_rate": 1.8279240796203107e-06, "loss": 0.0483, "step": 47769 }, { "epoch": 0.8459845838480168, "grad_norm": 0.6280882954597473, "learning_rate": 1.8275124971091844e-06, "loss": 0.0548, "step": 47770 }, { "epoch": 0.8460022933850452, "grad_norm": 0.7643278241157532, "learning_rate": 1.8271009579342096e-06, "loss": 0.0885, "step": 47771 }, { "epoch": 0.8460200029220736, "grad_norm": 0.5058475732803345, "learning_rate": 1.8266894620967434e-06, "loss": 0.0567, "step": 47772 }, { "epoch": 0.8460377124591021, "grad_norm": 0.6975399851799011, "learning_rate": 1.8262780095981346e-06, "loss": 0.0283, "step": 47773 }, { "epoch": 0.8460554219961305, "grad_norm": 0.45654240250587463, "learning_rate": 1.8258666004397373e-06, "loss": 0.0588, "step": 47774 }, { "epoch": 0.8460731315331589, "grad_norm": 0.42976269125938416, "learning_rate": 1.825455234622907e-06, "loss": 0.0243, "step": 47775 }, { "epoch": 0.8460908410701873, "grad_norm": 0.36066609621047974, "learning_rate": 1.825043912148996e-06, "loss": 0.0645, "step": 47776 }, { "epoch": 0.8461085506072158, "grad_norm": 0.4575892686843872, "learning_rate": 1.8246326330193598e-06, "loss": 0.0359, "step": 47777 }, { "epoch": 0.8461262601442442, "grad_norm": 0.32213717699050903, "learning_rate": 1.824221397235347e-06, "loss": 0.0524, "step": 47778 }, { "epoch": 0.8461439696812726, "grad_norm": 0.7511731386184692, "learning_rate": 1.8238102047983173e-06, "loss": 0.0563, "step": 47779 }, { "epoch": 0.846161679218301, "grad_norm": 0.6797516942024231, "learning_rate": 1.8233990557096176e-06, "loss": 0.056, "step": 47780 }, { "epoch": 0.8461793887553295, "grad_norm": 0.2733805179595947, "learning_rate": 1.8229879499705998e-06, "loss": 0.0384, "step": 47781 }, { "epoch": 0.8461970982923579, "grad_norm": 0.41260793805122375, "learning_rate": 1.8225768875826249e-06, "loss": 0.0452, "step": 47782 }, { "epoch": 0.8462148078293863, "grad_norm": 0.6239842772483826, "learning_rate": 1.8221658685470333e-06, "loss": 0.0367, "step": 47783 }, { "epoch": 0.8462325173664147, "grad_norm": 0.921500027179718, "learning_rate": 1.821754892865184e-06, "loss": 0.065, "step": 47784 }, { "epoch": 0.8462502269034432, "grad_norm": 0.5376402139663696, "learning_rate": 1.821343960538429e-06, "loss": 0.0526, "step": 47785 }, { "epoch": 0.8462679364404716, "grad_norm": 0.2543429434299469, "learning_rate": 1.8209330715681194e-06, "loss": 0.0496, "step": 47786 }, { "epoch": 0.8462856459775, "grad_norm": 0.6865612268447876, "learning_rate": 1.820522225955607e-06, "loss": 0.0706, "step": 47787 }, { "epoch": 0.8463033555145285, "grad_norm": 0.5373064875602722, "learning_rate": 1.8201114237022426e-06, "loss": 0.0484, "step": 47788 }, { "epoch": 0.8463210650515569, "grad_norm": 0.8419671654701233, "learning_rate": 1.8197006648093817e-06, "loss": 0.0656, "step": 47789 }, { "epoch": 0.8463387745885853, "grad_norm": 0.41957321763038635, "learning_rate": 1.81928994927837e-06, "loss": 0.0325, "step": 47790 }, { "epoch": 0.8463564841256137, "grad_norm": 0.3792288601398468, "learning_rate": 1.818879277110561e-06, "loss": 0.0386, "step": 47791 }, { "epoch": 0.8463741936626422, "grad_norm": 0.848976731300354, "learning_rate": 1.818468648307306e-06, "loss": 0.0358, "step": 47792 }, { "epoch": 0.8463919031996706, "grad_norm": 0.7814878225326538, "learning_rate": 1.8180580628699584e-06, "loss": 0.0538, "step": 47793 }, { "epoch": 0.846409612736699, "grad_norm": 0.7593553066253662, "learning_rate": 1.8176475207998638e-06, "loss": 0.0657, "step": 47794 }, { "epoch": 0.8464273222737274, "grad_norm": 0.4816426634788513, "learning_rate": 1.8172370220983731e-06, "loss": 0.0506, "step": 47795 }, { "epoch": 0.8464450318107559, "grad_norm": 0.4478086531162262, "learning_rate": 1.8168265667668415e-06, "loss": 0.0454, "step": 47796 }, { "epoch": 0.8464627413477843, "grad_norm": 0.7272425293922424, "learning_rate": 1.8164161548066217e-06, "loss": 0.0624, "step": 47797 }, { "epoch": 0.8464804508848127, "grad_norm": 0.5581161975860596, "learning_rate": 1.8160057862190538e-06, "loss": 0.0585, "step": 47798 }, { "epoch": 0.8464981604218411, "grad_norm": 0.2951738238334656, "learning_rate": 1.8155954610054953e-06, "loss": 0.0452, "step": 47799 }, { "epoch": 0.8465158699588696, "grad_norm": 0.6874628067016602, "learning_rate": 1.815185179167297e-06, "loss": 0.0636, "step": 47800 }, { "epoch": 0.846533579495898, "grad_norm": 0.6788164377212524, "learning_rate": 1.8147749407058024e-06, "loss": 0.0633, "step": 47801 }, { "epoch": 0.8465512890329264, "grad_norm": 0.6300170421600342, "learning_rate": 1.8143647456223639e-06, "loss": 0.0537, "step": 47802 }, { "epoch": 0.8465689985699549, "grad_norm": 0.7392427325248718, "learning_rate": 1.813954593918334e-06, "loss": 0.0595, "step": 47803 }, { "epoch": 0.8465867081069833, "grad_norm": 0.45361822843551636, "learning_rate": 1.8135444855950578e-06, "loss": 0.0607, "step": 47804 }, { "epoch": 0.8466044176440117, "grad_norm": 0.41263455152511597, "learning_rate": 1.8131344206538863e-06, "loss": 0.0336, "step": 47805 }, { "epoch": 0.8466221271810401, "grad_norm": 0.2999286949634552, "learning_rate": 1.8127243990961701e-06, "loss": 0.0396, "step": 47806 }, { "epoch": 0.8466398367180686, "grad_norm": 0.6509370803833008, "learning_rate": 1.8123144209232596e-06, "loss": 0.045, "step": 47807 }, { "epoch": 0.846657546255097, "grad_norm": 0.5173619389533997, "learning_rate": 1.811904486136497e-06, "loss": 0.0349, "step": 47808 }, { "epoch": 0.8466752557921254, "grad_norm": 0.5786706209182739, "learning_rate": 1.8114945947372363e-06, "loss": 0.0615, "step": 47809 }, { "epoch": 0.8466929653291538, "grad_norm": 0.43314775824546814, "learning_rate": 1.8110847467268231e-06, "loss": 0.0556, "step": 47810 }, { "epoch": 0.8467106748661823, "grad_norm": 0.7187593579292297, "learning_rate": 1.81067494210661e-06, "loss": 0.0578, "step": 47811 }, { "epoch": 0.8467283844032107, "grad_norm": 0.5778017044067383, "learning_rate": 1.8102651808779403e-06, "loss": 0.0794, "step": 47812 }, { "epoch": 0.8467460939402391, "grad_norm": 2.0308854579925537, "learning_rate": 1.8098554630421631e-06, "loss": 0.1055, "step": 47813 }, { "epoch": 0.8467638034772675, "grad_norm": 0.7197229862213135, "learning_rate": 1.8094457886006277e-06, "loss": 0.0628, "step": 47814 }, { "epoch": 0.846781513014296, "grad_norm": 0.27942904829978943, "learning_rate": 1.809036157554681e-06, "loss": 0.0345, "step": 47815 }, { "epoch": 0.8467992225513244, "grad_norm": 0.26436156034469604, "learning_rate": 1.808626569905672e-06, "loss": 0.038, "step": 47816 }, { "epoch": 0.8468169320883528, "grad_norm": 0.7543558478355408, "learning_rate": 1.8082170256549463e-06, "loss": 0.0569, "step": 47817 }, { "epoch": 0.8468346416253814, "grad_norm": 0.6279534697532654, "learning_rate": 1.8078075248038561e-06, "loss": 0.076, "step": 47818 }, { "epoch": 0.8468523511624098, "grad_norm": 0.8420996069908142, "learning_rate": 1.807398067353742e-06, "loss": 0.0462, "step": 47819 }, { "epoch": 0.8468700606994382, "grad_norm": 0.17006294429302216, "learning_rate": 1.806988653305955e-06, "loss": 0.0196, "step": 47820 }, { "epoch": 0.8468877702364666, "grad_norm": 0.6231796741485596, "learning_rate": 1.8065792826618415e-06, "loss": 0.0678, "step": 47821 }, { "epoch": 0.8469054797734951, "grad_norm": 0.7690649628639221, "learning_rate": 1.8061699554227462e-06, "loss": 0.0485, "step": 47822 }, { "epoch": 0.8469231893105235, "grad_norm": 0.9019702076911926, "learning_rate": 1.8057606715900176e-06, "loss": 0.0611, "step": 47823 }, { "epoch": 0.8469408988475519, "grad_norm": 0.2584977149963379, "learning_rate": 1.8053514311649965e-06, "loss": 0.038, "step": 47824 }, { "epoch": 0.8469586083845803, "grad_norm": 0.603462278842926, "learning_rate": 1.8049422341490435e-06, "loss": 0.0701, "step": 47825 }, { "epoch": 0.8469763179216088, "grad_norm": 0.693076491355896, "learning_rate": 1.8045330805434923e-06, "loss": 0.064, "step": 47826 }, { "epoch": 0.8469940274586372, "grad_norm": 0.6961349844932556, "learning_rate": 1.804123970349692e-06, "loss": 0.0272, "step": 47827 }, { "epoch": 0.8470117369956656, "grad_norm": 0.39610758423805237, "learning_rate": 1.8037149035689916e-06, "loss": 0.0685, "step": 47828 }, { "epoch": 0.847029446532694, "grad_norm": 0.3065946698188782, "learning_rate": 1.803305880202733e-06, "loss": 0.0585, "step": 47829 }, { "epoch": 0.8470471560697225, "grad_norm": 0.5662024021148682, "learning_rate": 1.8028969002522622e-06, "loss": 0.0478, "step": 47830 }, { "epoch": 0.8470648656067509, "grad_norm": 0.6186362504959106, "learning_rate": 1.8024879637189245e-06, "loss": 0.0514, "step": 47831 }, { "epoch": 0.8470825751437793, "grad_norm": 0.736070454120636, "learning_rate": 1.8020790706040691e-06, "loss": 0.047, "step": 47832 }, { "epoch": 0.8471002846808078, "grad_norm": 0.5719627737998962, "learning_rate": 1.8016702209090379e-06, "loss": 0.0239, "step": 47833 }, { "epoch": 0.8471179942178362, "grad_norm": 0.20156311988830566, "learning_rate": 1.8012614146351752e-06, "loss": 0.0185, "step": 47834 }, { "epoch": 0.8471357037548646, "grad_norm": 0.8263511657714844, "learning_rate": 1.8008526517838297e-06, "loss": 0.0565, "step": 47835 }, { "epoch": 0.847153413291893, "grad_norm": 0.4898308515548706, "learning_rate": 1.8004439323563453e-06, "loss": 0.0324, "step": 47836 }, { "epoch": 0.8471711228289215, "grad_norm": 0.30989933013916016, "learning_rate": 1.8000352563540629e-06, "loss": 0.0494, "step": 47837 }, { "epoch": 0.8471888323659499, "grad_norm": 0.5812821388244629, "learning_rate": 1.7996266237783293e-06, "loss": 0.0744, "step": 47838 }, { "epoch": 0.8472065419029783, "grad_norm": 0.5517894625663757, "learning_rate": 1.7992180346304937e-06, "loss": 0.0387, "step": 47839 }, { "epoch": 0.8472242514400067, "grad_norm": 0.5461724996566772, "learning_rate": 1.79880948891189e-06, "loss": 0.0581, "step": 47840 }, { "epoch": 0.8472419609770352, "grad_norm": 0.7383692264556885, "learning_rate": 1.7984009866238688e-06, "loss": 0.074, "step": 47841 }, { "epoch": 0.8472596705140636, "grad_norm": 0.2540260851383209, "learning_rate": 1.7979925277677738e-06, "loss": 0.0318, "step": 47842 }, { "epoch": 0.847277380051092, "grad_norm": 0.8569159507751465, "learning_rate": 1.7975841123449477e-06, "loss": 0.078, "step": 47843 }, { "epoch": 0.8472950895881205, "grad_norm": 0.40087366104125977, "learning_rate": 1.797175740356734e-06, "loss": 0.0348, "step": 47844 }, { "epoch": 0.8473127991251489, "grad_norm": 0.7051114439964294, "learning_rate": 1.7967674118044769e-06, "loss": 0.07, "step": 47845 }, { "epoch": 0.8473305086621773, "grad_norm": 0.36811673641204834, "learning_rate": 1.7963591266895236e-06, "loss": 0.0554, "step": 47846 }, { "epoch": 0.8473482181992057, "grad_norm": 0.7386081218719482, "learning_rate": 1.7959508850132096e-06, "loss": 0.0506, "step": 47847 }, { "epoch": 0.8473659277362342, "grad_norm": 0.16329975426197052, "learning_rate": 1.7955426867768805e-06, "loss": 0.0661, "step": 47848 }, { "epoch": 0.8473836372732626, "grad_norm": 0.4065152406692505, "learning_rate": 1.7951345319818818e-06, "loss": 0.0514, "step": 47849 }, { "epoch": 0.847401346810291, "grad_norm": 0.6910818219184875, "learning_rate": 1.7947264206295577e-06, "loss": 0.0623, "step": 47850 }, { "epoch": 0.8474190563473194, "grad_norm": 0.9740280508995056, "learning_rate": 1.7943183527212454e-06, "loss": 0.0604, "step": 47851 }, { "epoch": 0.8474367658843479, "grad_norm": 0.9725179672241211, "learning_rate": 1.7939103282582886e-06, "loss": 0.0681, "step": 47852 }, { "epoch": 0.8474544754213763, "grad_norm": 0.4771680533885956, "learning_rate": 1.7935023472420314e-06, "loss": 0.0615, "step": 47853 }, { "epoch": 0.8474721849584047, "grad_norm": 0.5390011072158813, "learning_rate": 1.7930944096738162e-06, "loss": 0.0525, "step": 47854 }, { "epoch": 0.8474898944954331, "grad_norm": 0.4992222487926483, "learning_rate": 1.7926865155549848e-06, "loss": 0.0514, "step": 47855 }, { "epoch": 0.8475076040324616, "grad_norm": 0.5359588861465454, "learning_rate": 1.79227866488688e-06, "loss": 0.0562, "step": 47856 }, { "epoch": 0.84752531356949, "grad_norm": 0.6760140061378479, "learning_rate": 1.7918708576708437e-06, "loss": 0.0555, "step": 47857 }, { "epoch": 0.8475430231065184, "grad_norm": 0.5557319521903992, "learning_rate": 1.791463093908215e-06, "loss": 0.0655, "step": 47858 }, { "epoch": 0.8475607326435469, "grad_norm": 0.5313631296157837, "learning_rate": 1.7910553736003343e-06, "loss": 0.0512, "step": 47859 }, { "epoch": 0.8475784421805753, "grad_norm": 0.3467191457748413, "learning_rate": 1.7906476967485508e-06, "loss": 0.0525, "step": 47860 }, { "epoch": 0.8475961517176037, "grad_norm": 0.863932728767395, "learning_rate": 1.7902400633541932e-06, "loss": 0.0693, "step": 47861 }, { "epoch": 0.8476138612546321, "grad_norm": 0.5408490300178528, "learning_rate": 1.7898324734186123e-06, "loss": 0.0569, "step": 47862 }, { "epoch": 0.8476315707916606, "grad_norm": 0.797946572303772, "learning_rate": 1.7894249269431485e-06, "loss": 0.0409, "step": 47863 }, { "epoch": 0.847649280328689, "grad_norm": 0.49181750416755676, "learning_rate": 1.7890174239291407e-06, "loss": 0.0467, "step": 47864 }, { "epoch": 0.8476669898657174, "grad_norm": 0.7818418741226196, "learning_rate": 1.788609964377928e-06, "loss": 0.0407, "step": 47865 }, { "epoch": 0.8476846994027458, "grad_norm": 0.5456316471099854, "learning_rate": 1.7882025482908527e-06, "loss": 0.0739, "step": 47866 }, { "epoch": 0.8477024089397743, "grad_norm": 0.38374680280685425, "learning_rate": 1.7877951756692535e-06, "loss": 0.0628, "step": 47867 }, { "epoch": 0.8477201184768027, "grad_norm": 0.846813976764679, "learning_rate": 1.7873878465144762e-06, "loss": 0.0467, "step": 47868 }, { "epoch": 0.8477378280138311, "grad_norm": 0.3774542212486267, "learning_rate": 1.7869805608278527e-06, "loss": 0.0231, "step": 47869 }, { "epoch": 0.8477555375508595, "grad_norm": 0.5197837352752686, "learning_rate": 1.7865733186107257e-06, "loss": 0.0435, "step": 47870 }, { "epoch": 0.847773247087888, "grad_norm": 0.6392821669578552, "learning_rate": 1.7861661198644375e-06, "loss": 0.0712, "step": 47871 }, { "epoch": 0.8477909566249164, "grad_norm": 0.7653250098228455, "learning_rate": 1.7857589645903266e-06, "loss": 0.065, "step": 47872 }, { "epoch": 0.8478086661619448, "grad_norm": 0.6064956784248352, "learning_rate": 1.7853518527897307e-06, "loss": 0.0715, "step": 47873 }, { "epoch": 0.8478263756989733, "grad_norm": 0.648625910282135, "learning_rate": 1.7849447844639916e-06, "loss": 0.0466, "step": 47874 }, { "epoch": 0.8478440852360017, "grad_norm": 0.6893919110298157, "learning_rate": 1.7845377596144503e-06, "loss": 0.0454, "step": 47875 }, { "epoch": 0.8478617947730301, "grad_norm": 0.7664430737495422, "learning_rate": 1.7841307782424404e-06, "loss": 0.0694, "step": 47876 }, { "epoch": 0.8478795043100585, "grad_norm": 0.5422267913818359, "learning_rate": 1.7837238403493029e-06, "loss": 0.0504, "step": 47877 }, { "epoch": 0.847897213847087, "grad_norm": 0.603356122970581, "learning_rate": 1.7833169459363796e-06, "loss": 0.0552, "step": 47878 }, { "epoch": 0.8479149233841154, "grad_norm": 0.45868170261383057, "learning_rate": 1.7829100950050047e-06, "loss": 0.0611, "step": 47879 }, { "epoch": 0.8479326329211438, "grad_norm": 0.6888536214828491, "learning_rate": 1.782503287556519e-06, "loss": 0.0457, "step": 47880 }, { "epoch": 0.8479503424581722, "grad_norm": 0.7108392119407654, "learning_rate": 1.782096523592261e-06, "loss": 0.0573, "step": 47881 }, { "epoch": 0.8479680519952008, "grad_norm": 0.8749105930328369, "learning_rate": 1.781689803113568e-06, "loss": 0.0753, "step": 47882 }, { "epoch": 0.8479857615322292, "grad_norm": 0.39298850297927856, "learning_rate": 1.7812831261217777e-06, "loss": 0.0619, "step": 47883 }, { "epoch": 0.8480034710692576, "grad_norm": 0.5846363306045532, "learning_rate": 1.7808764926182303e-06, "loss": 0.0465, "step": 47884 }, { "epoch": 0.848021180606286, "grad_norm": 0.44394102692604065, "learning_rate": 1.7804699026042649e-06, "loss": 0.0601, "step": 47885 }, { "epoch": 0.8480388901433145, "grad_norm": 0.5130917429924011, "learning_rate": 1.7800633560812118e-06, "loss": 0.0717, "step": 47886 }, { "epoch": 0.8480565996803429, "grad_norm": 0.5370227694511414, "learning_rate": 1.7796568530504154e-06, "loss": 0.0541, "step": 47887 }, { "epoch": 0.8480743092173713, "grad_norm": 0.6726034879684448, "learning_rate": 1.7792503935132092e-06, "loss": 0.0639, "step": 47888 }, { "epoch": 0.8480920187543998, "grad_norm": 1.0563645362854004, "learning_rate": 1.778843977470936e-06, "loss": 0.0629, "step": 47889 }, { "epoch": 0.8481097282914282, "grad_norm": 0.5106949210166931, "learning_rate": 1.7784376049249223e-06, "loss": 0.0553, "step": 47890 }, { "epoch": 0.8481274378284566, "grad_norm": 0.535595178604126, "learning_rate": 1.7780312758765144e-06, "loss": 0.0484, "step": 47891 }, { "epoch": 0.848145147365485, "grad_norm": 0.6805204749107361, "learning_rate": 1.7776249903270508e-06, "loss": 0.0578, "step": 47892 }, { "epoch": 0.8481628569025135, "grad_norm": 0.35358572006225586, "learning_rate": 1.777218748277859e-06, "loss": 0.0521, "step": 47893 }, { "epoch": 0.8481805664395419, "grad_norm": 0.45322185754776, "learning_rate": 1.7768125497302813e-06, "loss": 0.0257, "step": 47894 }, { "epoch": 0.8481982759765703, "grad_norm": 0.5663179159164429, "learning_rate": 1.7764063946856513e-06, "loss": 0.0536, "step": 47895 }, { "epoch": 0.8482159855135987, "grad_norm": 0.5798418521881104, "learning_rate": 1.7760002831453116e-06, "loss": 0.055, "step": 47896 }, { "epoch": 0.8482336950506272, "grad_norm": 0.7232827544212341, "learning_rate": 1.7755942151105891e-06, "loss": 0.0455, "step": 47897 }, { "epoch": 0.8482514045876556, "grad_norm": 0.7737846374511719, "learning_rate": 1.7751881905828248e-06, "loss": 0.067, "step": 47898 }, { "epoch": 0.848269114124684, "grad_norm": 0.6488012671470642, "learning_rate": 1.7747822095633526e-06, "loss": 0.0647, "step": 47899 }, { "epoch": 0.8482868236617124, "grad_norm": 0.6327608823776245, "learning_rate": 1.7743762720535111e-06, "loss": 0.0597, "step": 47900 }, { "epoch": 0.8483045331987409, "grad_norm": 0.7944143414497375, "learning_rate": 1.773970378054633e-06, "loss": 0.0456, "step": 47901 }, { "epoch": 0.8483222427357693, "grad_norm": 0.504814863204956, "learning_rate": 1.7735645275680533e-06, "loss": 0.0289, "step": 47902 }, { "epoch": 0.8483399522727977, "grad_norm": 0.519732654094696, "learning_rate": 1.7731587205951133e-06, "loss": 0.0372, "step": 47903 }, { "epoch": 0.8483576618098262, "grad_norm": 0.7892249226570129, "learning_rate": 1.7727529571371398e-06, "loss": 0.0711, "step": 47904 }, { "epoch": 0.8483753713468546, "grad_norm": 0.6118608713150024, "learning_rate": 1.7723472371954718e-06, "loss": 0.0728, "step": 47905 }, { "epoch": 0.848393080883883, "grad_norm": 0.39187729358673096, "learning_rate": 1.7719415607714434e-06, "loss": 0.0596, "step": 47906 }, { "epoch": 0.8484107904209114, "grad_norm": 0.5684956312179565, "learning_rate": 1.7715359278663918e-06, "loss": 0.0477, "step": 47907 }, { "epoch": 0.8484284999579399, "grad_norm": 0.4651319086551666, "learning_rate": 1.7711303384816458e-06, "loss": 0.0415, "step": 47908 }, { "epoch": 0.8484462094949683, "grad_norm": 0.48856639862060547, "learning_rate": 1.7707247926185426e-06, "loss": 0.0523, "step": 47909 }, { "epoch": 0.8484639190319967, "grad_norm": 0.4857577383518219, "learning_rate": 1.7703192902784181e-06, "loss": 0.0596, "step": 47910 }, { "epoch": 0.8484816285690251, "grad_norm": 0.8136942386627197, "learning_rate": 1.7699138314626046e-06, "loss": 0.0742, "step": 47911 }, { "epoch": 0.8484993381060536, "grad_norm": 0.3558169901371002, "learning_rate": 1.7695084161724356e-06, "loss": 0.0562, "step": 47912 }, { "epoch": 0.848517047643082, "grad_norm": 0.5891363620758057, "learning_rate": 1.769103044409247e-06, "loss": 0.055, "step": 47913 }, { "epoch": 0.8485347571801104, "grad_norm": 0.4917999804019928, "learning_rate": 1.7686977161743728e-06, "loss": 0.0409, "step": 47914 }, { "epoch": 0.8485524667171388, "grad_norm": 0.6105517148971558, "learning_rate": 1.7682924314691434e-06, "loss": 0.0427, "step": 47915 }, { "epoch": 0.8485701762541673, "grad_norm": 0.4178503751754761, "learning_rate": 1.7678871902948928e-06, "loss": 0.0519, "step": 47916 }, { "epoch": 0.8485878857911957, "grad_norm": 0.4406539499759674, "learning_rate": 1.7674819926529583e-06, "loss": 0.0601, "step": 47917 }, { "epoch": 0.8486055953282241, "grad_norm": 0.43749362230300903, "learning_rate": 1.7670768385446655e-06, "loss": 0.0492, "step": 47918 }, { "epoch": 0.8486233048652526, "grad_norm": 0.5329648852348328, "learning_rate": 1.7666717279713484e-06, "loss": 0.0468, "step": 47919 }, { "epoch": 0.848641014402281, "grad_norm": 0.9277843236923218, "learning_rate": 1.7662666609343475e-06, "loss": 0.0742, "step": 47920 }, { "epoch": 0.8486587239393094, "grad_norm": 0.557849645614624, "learning_rate": 1.7658616374349933e-06, "loss": 0.0564, "step": 47921 }, { "epoch": 0.8486764334763378, "grad_norm": 0.4324583113193512, "learning_rate": 1.7654566574746117e-06, "loss": 0.0325, "step": 47922 }, { "epoch": 0.8486941430133663, "grad_norm": 0.6988852620124817, "learning_rate": 1.7650517210545398e-06, "loss": 0.0628, "step": 47923 }, { "epoch": 0.8487118525503947, "grad_norm": 0.47215694189071655, "learning_rate": 1.764646828176113e-06, "loss": 0.0456, "step": 47924 }, { "epoch": 0.8487295620874231, "grad_norm": 0.4915913939476013, "learning_rate": 1.7642419788406554e-06, "loss": 0.0603, "step": 47925 }, { "epoch": 0.8487472716244515, "grad_norm": 0.6237069964408875, "learning_rate": 1.763837173049503e-06, "loss": 0.064, "step": 47926 }, { "epoch": 0.84876498116148, "grad_norm": 0.6343080997467041, "learning_rate": 1.7634324108039873e-06, "loss": 0.0485, "step": 47927 }, { "epoch": 0.8487826906985084, "grad_norm": 0.8970218896865845, "learning_rate": 1.7630276921054412e-06, "loss": 0.041, "step": 47928 }, { "epoch": 0.8488004002355368, "grad_norm": 0.7993816137313843, "learning_rate": 1.762623016955195e-06, "loss": 0.0649, "step": 47929 }, { "epoch": 0.8488181097725652, "grad_norm": 0.21042005717754364, "learning_rate": 1.7622183853545791e-06, "loss": 0.0418, "step": 47930 }, { "epoch": 0.8488358193095937, "grad_norm": 0.3268081247806549, "learning_rate": 1.7618137973049297e-06, "loss": 0.0555, "step": 47931 }, { "epoch": 0.8488535288466221, "grad_norm": 0.37915652990341187, "learning_rate": 1.7614092528075704e-06, "loss": 0.0575, "step": 47932 }, { "epoch": 0.8488712383836505, "grad_norm": 1.0303876399993896, "learning_rate": 1.7610047518638367e-06, "loss": 0.0545, "step": 47933 }, { "epoch": 0.848888947920679, "grad_norm": 0.9504026174545288, "learning_rate": 1.7606002944750577e-06, "loss": 0.0593, "step": 47934 }, { "epoch": 0.8489066574577074, "grad_norm": 0.7187885046005249, "learning_rate": 1.7601958806425672e-06, "loss": 0.0653, "step": 47935 }, { "epoch": 0.8489243669947358, "grad_norm": 0.4646297097206116, "learning_rate": 1.7597915103676908e-06, "loss": 0.0675, "step": 47936 }, { "epoch": 0.8489420765317642, "grad_norm": 0.6903116106987, "learning_rate": 1.7593871836517612e-06, "loss": 0.0505, "step": 47937 }, { "epoch": 0.8489597860687927, "grad_norm": 0.2727203369140625, "learning_rate": 1.7589829004961066e-06, "loss": 0.0381, "step": 47938 }, { "epoch": 0.8489774956058211, "grad_norm": 0.7539244294166565, "learning_rate": 1.7585786609020616e-06, "loss": 0.0641, "step": 47939 }, { "epoch": 0.8489952051428495, "grad_norm": 0.47188425064086914, "learning_rate": 1.7581744648709513e-06, "loss": 0.0692, "step": 47940 }, { "epoch": 0.8490129146798779, "grad_norm": 0.32879456877708435, "learning_rate": 1.7577703124041101e-06, "loss": 0.0303, "step": 47941 }, { "epoch": 0.8490306242169064, "grad_norm": 0.6814795732498169, "learning_rate": 1.7573662035028665e-06, "loss": 0.0566, "step": 47942 }, { "epoch": 0.8490483337539348, "grad_norm": 0.8652125597000122, "learning_rate": 1.7569621381685464e-06, "loss": 0.0693, "step": 47943 }, { "epoch": 0.8490660432909632, "grad_norm": 0.5717906355857849, "learning_rate": 1.7565581164024801e-06, "loss": 0.0533, "step": 47944 }, { "epoch": 0.8490837528279916, "grad_norm": 0.5636414289474487, "learning_rate": 1.7561541382060004e-06, "loss": 0.0507, "step": 47945 }, { "epoch": 0.8491014623650202, "grad_norm": 0.6953054666519165, "learning_rate": 1.7557502035804358e-06, "loss": 0.0571, "step": 47946 }, { "epoch": 0.8491191719020486, "grad_norm": 0.4808458685874939, "learning_rate": 1.7553463125271101e-06, "loss": 0.0454, "step": 47947 }, { "epoch": 0.849136881439077, "grad_norm": 1.1461856365203857, "learning_rate": 1.7549424650473529e-06, "loss": 0.0584, "step": 47948 }, { "epoch": 0.8491545909761055, "grad_norm": 0.5061191320419312, "learning_rate": 1.7545386611425006e-06, "loss": 0.0681, "step": 47949 }, { "epoch": 0.8491723005131339, "grad_norm": 0.7732082605361938, "learning_rate": 1.7541349008138746e-06, "loss": 0.0514, "step": 47950 }, { "epoch": 0.8491900100501623, "grad_norm": 0.38155797123908997, "learning_rate": 1.753731184062805e-06, "loss": 0.0325, "step": 47951 }, { "epoch": 0.8492077195871907, "grad_norm": 0.5810054540634155, "learning_rate": 1.7533275108906194e-06, "loss": 0.059, "step": 47952 }, { "epoch": 0.8492254291242192, "grad_norm": 0.800325870513916, "learning_rate": 1.7529238812986497e-06, "loss": 0.0715, "step": 47953 }, { "epoch": 0.8492431386612476, "grad_norm": 0.8154260516166687, "learning_rate": 1.752520295288217e-06, "loss": 0.0923, "step": 47954 }, { "epoch": 0.849260848198276, "grad_norm": 0.8201985359191895, "learning_rate": 1.752116752860653e-06, "loss": 0.068, "step": 47955 }, { "epoch": 0.8492785577353044, "grad_norm": 0.49881571531295776, "learning_rate": 1.7517132540172853e-06, "loss": 0.0606, "step": 47956 }, { "epoch": 0.8492962672723329, "grad_norm": 0.5253688097000122, "learning_rate": 1.7513097987594396e-06, "loss": 0.0412, "step": 47957 }, { "epoch": 0.8493139768093613, "grad_norm": 0.6354068517684937, "learning_rate": 1.7509063870884461e-06, "loss": 0.0326, "step": 47958 }, { "epoch": 0.8493316863463897, "grad_norm": 0.5541340112686157, "learning_rate": 1.750503019005631e-06, "loss": 0.064, "step": 47959 }, { "epoch": 0.8493493958834181, "grad_norm": 0.566071093082428, "learning_rate": 1.7500996945123227e-06, "loss": 0.0373, "step": 47960 }, { "epoch": 0.8493671054204466, "grad_norm": 0.3714316487312317, "learning_rate": 1.7496964136098436e-06, "loss": 0.0584, "step": 47961 }, { "epoch": 0.849384814957475, "grad_norm": 0.6142943501472473, "learning_rate": 1.7492931762995229e-06, "loss": 0.0666, "step": 47962 }, { "epoch": 0.8494025244945034, "grad_norm": 0.8003085851669312, "learning_rate": 1.7488899825826909e-06, "loss": 0.0591, "step": 47963 }, { "epoch": 0.8494202340315319, "grad_norm": 0.5233249068260193, "learning_rate": 1.7484868324606669e-06, "loss": 0.055, "step": 47964 }, { "epoch": 0.8494379435685603, "grad_norm": 0.6809048056602478, "learning_rate": 1.7480837259347809e-06, "loss": 0.0576, "step": 47965 }, { "epoch": 0.8494556531055887, "grad_norm": 0.6470529437065125, "learning_rate": 1.7476806630063609e-06, "loss": 0.0592, "step": 47966 }, { "epoch": 0.8494733626426171, "grad_norm": 0.569033145904541, "learning_rate": 1.7472776436767284e-06, "loss": 0.0706, "step": 47967 }, { "epoch": 0.8494910721796456, "grad_norm": 0.5949914455413818, "learning_rate": 1.746874667947213e-06, "loss": 0.0605, "step": 47968 }, { "epoch": 0.849508781716674, "grad_norm": 0.853146493434906, "learning_rate": 1.7464717358191401e-06, "loss": 0.0548, "step": 47969 }, { "epoch": 0.8495264912537024, "grad_norm": 0.4942045211791992, "learning_rate": 1.7460688472938352e-06, "loss": 0.0433, "step": 47970 }, { "epoch": 0.8495442007907308, "grad_norm": 0.5453500151634216, "learning_rate": 1.7456660023726257e-06, "loss": 0.0701, "step": 47971 }, { "epoch": 0.8495619103277593, "grad_norm": 0.5190620422363281, "learning_rate": 1.7452632010568303e-06, "loss": 0.0433, "step": 47972 }, { "epoch": 0.8495796198647877, "grad_norm": 0.6350404620170593, "learning_rate": 1.7448604433477799e-06, "loss": 0.0564, "step": 47973 }, { "epoch": 0.8495973294018161, "grad_norm": 0.44239693880081177, "learning_rate": 1.7444577292468e-06, "loss": 0.0648, "step": 47974 }, { "epoch": 0.8496150389388445, "grad_norm": 0.5999796986579895, "learning_rate": 1.7440550587552112e-06, "loss": 0.0609, "step": 47975 }, { "epoch": 0.849632748475873, "grad_norm": 0.46666252613067627, "learning_rate": 1.743652431874339e-06, "loss": 0.0455, "step": 47976 }, { "epoch": 0.8496504580129014, "grad_norm": 0.7064319252967834, "learning_rate": 1.7432498486055077e-06, "loss": 0.0387, "step": 47977 }, { "epoch": 0.8496681675499298, "grad_norm": 0.6724339723587036, "learning_rate": 1.742847308950049e-06, "loss": 0.0836, "step": 47978 }, { "epoch": 0.8496858770869583, "grad_norm": 0.7869994640350342, "learning_rate": 1.742444812909279e-06, "loss": 0.0602, "step": 47979 }, { "epoch": 0.8497035866239867, "grad_norm": 0.7716795206069946, "learning_rate": 1.7420423604845248e-06, "loss": 0.0645, "step": 47980 }, { "epoch": 0.8497212961610151, "grad_norm": 0.3624289631843567, "learning_rate": 1.7416399516771136e-06, "loss": 0.0318, "step": 47981 }, { "epoch": 0.8497390056980435, "grad_norm": 0.32752200961112976, "learning_rate": 1.741237586488363e-06, "loss": 0.0552, "step": 47982 }, { "epoch": 0.849756715235072, "grad_norm": 0.27001282572746277, "learning_rate": 1.7408352649195997e-06, "loss": 0.0672, "step": 47983 }, { "epoch": 0.8497744247721004, "grad_norm": 0.6326466798782349, "learning_rate": 1.7404329869721464e-06, "loss": 0.0468, "step": 47984 }, { "epoch": 0.8497921343091288, "grad_norm": 0.7731461524963379, "learning_rate": 1.7400307526473287e-06, "loss": 0.0659, "step": 47985 }, { "epoch": 0.8498098438461572, "grad_norm": 0.639704704284668, "learning_rate": 1.739628561946467e-06, "loss": 0.0793, "step": 47986 }, { "epoch": 0.8498275533831857, "grad_norm": 0.6544657945632935, "learning_rate": 1.739226414870887e-06, "loss": 0.039, "step": 47987 }, { "epoch": 0.8498452629202141, "grad_norm": 0.7510262727737427, "learning_rate": 1.7388243114219143e-06, "loss": 0.0654, "step": 47988 }, { "epoch": 0.8498629724572425, "grad_norm": 0.4092474579811096, "learning_rate": 1.7384222516008647e-06, "loss": 0.0487, "step": 47989 }, { "epoch": 0.8498806819942709, "grad_norm": 0.9637469053268433, "learning_rate": 1.7380202354090652e-06, "loss": 0.0864, "step": 47990 }, { "epoch": 0.8498983915312994, "grad_norm": 0.4666910171508789, "learning_rate": 1.7376182628478381e-06, "loss": 0.0301, "step": 47991 }, { "epoch": 0.8499161010683278, "grad_norm": 0.6611672043800354, "learning_rate": 1.7372163339185076e-06, "loss": 0.0427, "step": 47992 }, { "epoch": 0.8499338106053562, "grad_norm": 0.31012070178985596, "learning_rate": 1.7368144486223909e-06, "loss": 0.0527, "step": 47993 }, { "epoch": 0.8499515201423847, "grad_norm": 0.5789828896522522, "learning_rate": 1.7364126069608132e-06, "loss": 0.0424, "step": 47994 }, { "epoch": 0.8499692296794131, "grad_norm": 0.46731138229370117, "learning_rate": 1.7360108089350956e-06, "loss": 0.0411, "step": 47995 }, { "epoch": 0.8499869392164415, "grad_norm": 0.8157058954238892, "learning_rate": 1.735609054546562e-06, "loss": 0.0633, "step": 47996 }, { "epoch": 0.8500046487534699, "grad_norm": 0.6753187775611877, "learning_rate": 1.7352073437965343e-06, "loss": 0.055, "step": 47997 }, { "epoch": 0.8500223582904984, "grad_norm": 1.00486159324646, "learning_rate": 1.7348056766863302e-06, "loss": 0.0649, "step": 47998 }, { "epoch": 0.8500400678275268, "grad_norm": 0.44188663363456726, "learning_rate": 1.7344040532172784e-06, "loss": 0.0332, "step": 47999 }, { "epoch": 0.8500577773645552, "grad_norm": 0.49647238850593567, "learning_rate": 1.734002473390693e-06, "loss": 0.0646, "step": 48000 }, { "epoch": 0.8500754869015836, "grad_norm": 0.43848565220832825, "learning_rate": 1.733600937207896e-06, "loss": 0.0766, "step": 48001 }, { "epoch": 0.8500931964386121, "grad_norm": 0.5515759587287903, "learning_rate": 1.7331994446702132e-06, "loss": 0.047, "step": 48002 }, { "epoch": 0.8501109059756405, "grad_norm": 0.2950817942619324, "learning_rate": 1.7327979957789603e-06, "loss": 0.0513, "step": 48003 }, { "epoch": 0.8501286155126689, "grad_norm": 0.7346221208572388, "learning_rate": 1.7323965905354593e-06, "loss": 0.0513, "step": 48004 }, { "epoch": 0.8501463250496973, "grad_norm": 0.40762361884117126, "learning_rate": 1.731995228941033e-06, "loss": 0.0461, "step": 48005 }, { "epoch": 0.8501640345867258, "grad_norm": 0.40065091848373413, "learning_rate": 1.7315939109969997e-06, "loss": 0.0446, "step": 48006 }, { "epoch": 0.8501817441237542, "grad_norm": 0.4900035560131073, "learning_rate": 1.7311926367046804e-06, "loss": 0.055, "step": 48007 }, { "epoch": 0.8501994536607826, "grad_norm": 0.6487911343574524, "learning_rate": 1.730791406065394e-06, "loss": 0.0514, "step": 48008 }, { "epoch": 0.8502171631978112, "grad_norm": 0.8506035208702087, "learning_rate": 1.7303902190804626e-06, "loss": 0.0531, "step": 48009 }, { "epoch": 0.8502348727348396, "grad_norm": 0.8001949787139893, "learning_rate": 1.7299890757512088e-06, "loss": 0.0485, "step": 48010 }, { "epoch": 0.850252582271868, "grad_norm": 0.6177722811698914, "learning_rate": 1.7295879760789462e-06, "loss": 0.0681, "step": 48011 }, { "epoch": 0.8502702918088964, "grad_norm": 0.5784793496131897, "learning_rate": 1.7291869200649957e-06, "loss": 0.0619, "step": 48012 }, { "epoch": 0.8502880013459249, "grad_norm": 0.5853025913238525, "learning_rate": 1.728785907710681e-06, "loss": 0.0396, "step": 48013 }, { "epoch": 0.8503057108829533, "grad_norm": 0.7487261295318604, "learning_rate": 1.7283849390173129e-06, "loss": 0.0598, "step": 48014 }, { "epoch": 0.8503234204199817, "grad_norm": 0.3529331684112549, "learning_rate": 1.72798401398622e-06, "loss": 0.081, "step": 48015 }, { "epoch": 0.8503411299570101, "grad_norm": 0.5403825044631958, "learning_rate": 1.7275831326187152e-06, "loss": 0.0387, "step": 48016 }, { "epoch": 0.8503588394940386, "grad_norm": 0.5263088941574097, "learning_rate": 1.7271822949161236e-06, "loss": 0.0476, "step": 48017 }, { "epoch": 0.850376549031067, "grad_norm": 0.8443426489830017, "learning_rate": 1.7267815008797578e-06, "loss": 0.0676, "step": 48018 }, { "epoch": 0.8503942585680954, "grad_norm": 0.8031637668609619, "learning_rate": 1.7263807505109364e-06, "loss": 0.0668, "step": 48019 }, { "epoch": 0.8504119681051238, "grad_norm": 0.5277932286262512, "learning_rate": 1.7259800438109834e-06, "loss": 0.0683, "step": 48020 }, { "epoch": 0.8504296776421523, "grad_norm": 0.39181211590766907, "learning_rate": 1.7255793807812115e-06, "loss": 0.0229, "step": 48021 }, { "epoch": 0.8504473871791807, "grad_norm": 0.6731816530227661, "learning_rate": 1.7251787614229392e-06, "loss": 0.0737, "step": 48022 }, { "epoch": 0.8504650967162091, "grad_norm": 0.6815292239189148, "learning_rate": 1.7247781857374872e-06, "loss": 0.0405, "step": 48023 }, { "epoch": 0.8504828062532376, "grad_norm": 0.7392948865890503, "learning_rate": 1.7243776537261729e-06, "loss": 0.0479, "step": 48024 }, { "epoch": 0.850500515790266, "grad_norm": 0.5166553854942322, "learning_rate": 1.723977165390312e-06, "loss": 0.0515, "step": 48025 }, { "epoch": 0.8505182253272944, "grad_norm": 0.3662310540676117, "learning_rate": 1.7235767207312232e-06, "loss": 0.0481, "step": 48026 }, { "epoch": 0.8505359348643228, "grad_norm": 0.8401452898979187, "learning_rate": 1.7231763197502288e-06, "loss": 0.0591, "step": 48027 }, { "epoch": 0.8505536444013513, "grad_norm": 0.34112176299095154, "learning_rate": 1.7227759624486379e-06, "loss": 0.0561, "step": 48028 }, { "epoch": 0.8505713539383797, "grad_norm": 0.27583301067352295, "learning_rate": 1.7223756488277693e-06, "loss": 0.0599, "step": 48029 }, { "epoch": 0.8505890634754081, "grad_norm": 0.6984997391700745, "learning_rate": 1.7219753788889435e-06, "loss": 0.04, "step": 48030 }, { "epoch": 0.8506067730124365, "grad_norm": 0.7537306547164917, "learning_rate": 1.7215751526334784e-06, "loss": 0.0495, "step": 48031 }, { "epoch": 0.850624482549465, "grad_norm": 0.8268123269081116, "learning_rate": 1.7211749700626855e-06, "loss": 0.0458, "step": 48032 }, { "epoch": 0.8506421920864934, "grad_norm": 0.35054779052734375, "learning_rate": 1.7207748311778843e-06, "loss": 0.0429, "step": 48033 }, { "epoch": 0.8506599016235218, "grad_norm": 0.5764608383178711, "learning_rate": 1.72037473598039e-06, "loss": 0.0532, "step": 48034 }, { "epoch": 0.8506776111605502, "grad_norm": 0.42727941274642944, "learning_rate": 1.7199746844715186e-06, "loss": 0.0543, "step": 48035 }, { "epoch": 0.8506953206975787, "grad_norm": 0.8738817572593689, "learning_rate": 1.7195746766525888e-06, "loss": 0.0691, "step": 48036 }, { "epoch": 0.8507130302346071, "grad_norm": 0.40327489376068115, "learning_rate": 1.7191747125249147e-06, "loss": 0.0673, "step": 48037 }, { "epoch": 0.8507307397716355, "grad_norm": 0.7406834363937378, "learning_rate": 1.718774792089815e-06, "loss": 0.0564, "step": 48038 }, { "epoch": 0.850748449308664, "grad_norm": 0.7319343090057373, "learning_rate": 1.7183749153486005e-06, "loss": 0.0513, "step": 48039 }, { "epoch": 0.8507661588456924, "grad_norm": 0.7260041236877441, "learning_rate": 1.7179750823025902e-06, "loss": 0.0772, "step": 48040 }, { "epoch": 0.8507838683827208, "grad_norm": 0.544830858707428, "learning_rate": 1.7175752929530963e-06, "loss": 0.0792, "step": 48041 }, { "epoch": 0.8508015779197492, "grad_norm": 0.5888099670410156, "learning_rate": 1.7171755473014412e-06, "loss": 0.0584, "step": 48042 }, { "epoch": 0.8508192874567777, "grad_norm": 0.4621066451072693, "learning_rate": 1.716775845348927e-06, "loss": 0.0722, "step": 48043 }, { "epoch": 0.8508369969938061, "grad_norm": 0.5493700504302979, "learning_rate": 1.7163761870968808e-06, "loss": 0.0559, "step": 48044 }, { "epoch": 0.8508547065308345, "grad_norm": 0.7547370195388794, "learning_rate": 1.715976572546617e-06, "loss": 0.0888, "step": 48045 }, { "epoch": 0.8508724160678629, "grad_norm": 0.5184974670410156, "learning_rate": 1.7155770016994426e-06, "loss": 0.055, "step": 48046 }, { "epoch": 0.8508901256048914, "grad_norm": 0.26443973183631897, "learning_rate": 1.7151774745566784e-06, "loss": 0.0576, "step": 48047 }, { "epoch": 0.8509078351419198, "grad_norm": 0.5841902494430542, "learning_rate": 1.714777991119635e-06, "loss": 0.0335, "step": 48048 }, { "epoch": 0.8509255446789482, "grad_norm": 0.5588237047195435, "learning_rate": 1.714378551389631e-06, "loss": 0.0593, "step": 48049 }, { "epoch": 0.8509432542159766, "grad_norm": 0.7848354578018188, "learning_rate": 1.713979155367974e-06, "loss": 0.0578, "step": 48050 }, { "epoch": 0.8509609637530051, "grad_norm": 0.7849710583686829, "learning_rate": 1.7135798030559828e-06, "loss": 0.0694, "step": 48051 }, { "epoch": 0.8509786732900335, "grad_norm": 0.36319848895072937, "learning_rate": 1.7131804944549716e-06, "loss": 0.0661, "step": 48052 }, { "epoch": 0.8509963828270619, "grad_norm": 1.0686676502227783, "learning_rate": 1.712781229566251e-06, "loss": 0.0715, "step": 48053 }, { "epoch": 0.8510140923640904, "grad_norm": 0.3244498074054718, "learning_rate": 1.7123820083911362e-06, "loss": 0.0304, "step": 48054 }, { "epoch": 0.8510318019011188, "grad_norm": 0.7424253821372986, "learning_rate": 1.7119828309309416e-06, "loss": 0.0453, "step": 48055 }, { "epoch": 0.8510495114381472, "grad_norm": 0.7757477760314941, "learning_rate": 1.7115836971869824e-06, "loss": 0.0385, "step": 48056 }, { "epoch": 0.8510672209751756, "grad_norm": 0.7370936870574951, "learning_rate": 1.7111846071605646e-06, "loss": 0.0855, "step": 48057 }, { "epoch": 0.8510849305122041, "grad_norm": 0.5494289994239807, "learning_rate": 1.710785560853007e-06, "loss": 0.0466, "step": 48058 }, { "epoch": 0.8511026400492325, "grad_norm": 0.3811931014060974, "learning_rate": 1.710386558265622e-06, "loss": 0.0496, "step": 48059 }, { "epoch": 0.8511203495862609, "grad_norm": 0.30788496136665344, "learning_rate": 1.7099875993997199e-06, "loss": 0.0529, "step": 48060 }, { "epoch": 0.8511380591232893, "grad_norm": 0.6413458585739136, "learning_rate": 1.7095886842566132e-06, "loss": 0.0601, "step": 48061 }, { "epoch": 0.8511557686603178, "grad_norm": 0.32815706729888916, "learning_rate": 1.709189812837616e-06, "loss": 0.0337, "step": 48062 }, { "epoch": 0.8511734781973462, "grad_norm": 0.4680594205856323, "learning_rate": 1.7087909851440387e-06, "loss": 0.0574, "step": 48063 }, { "epoch": 0.8511911877343746, "grad_norm": 0.5583462715148926, "learning_rate": 1.7083922011771968e-06, "loss": 0.0622, "step": 48064 }, { "epoch": 0.851208897271403, "grad_norm": 0.6702632904052734, "learning_rate": 1.707993460938398e-06, "loss": 0.0574, "step": 48065 }, { "epoch": 0.8512266068084315, "grad_norm": 0.4912267029285431, "learning_rate": 1.7075947644289608e-06, "loss": 0.0528, "step": 48066 }, { "epoch": 0.8512443163454599, "grad_norm": 0.5081683993339539, "learning_rate": 1.7071961116501894e-06, "loss": 0.0455, "step": 48067 }, { "epoch": 0.8512620258824883, "grad_norm": 0.3720289468765259, "learning_rate": 1.7067975026033976e-06, "loss": 0.0366, "step": 48068 }, { "epoch": 0.8512797354195168, "grad_norm": 0.9292405247688293, "learning_rate": 1.706398937289898e-06, "loss": 0.0832, "step": 48069 }, { "epoch": 0.8512974449565452, "grad_norm": 0.515883207321167, "learning_rate": 1.7060004157110043e-06, "loss": 0.0598, "step": 48070 }, { "epoch": 0.8513151544935736, "grad_norm": 0.4129527807235718, "learning_rate": 1.7056019378680222e-06, "loss": 0.0447, "step": 48071 }, { "epoch": 0.851332864030602, "grad_norm": 0.715518593788147, "learning_rate": 1.7052035037622605e-06, "loss": 0.0551, "step": 48072 }, { "epoch": 0.8513505735676306, "grad_norm": 0.9077693819999695, "learning_rate": 1.7048051133950382e-06, "loss": 0.0819, "step": 48073 }, { "epoch": 0.851368283104659, "grad_norm": 0.36413395404815674, "learning_rate": 1.704406766767666e-06, "loss": 0.0423, "step": 48074 }, { "epoch": 0.8513859926416874, "grad_norm": 0.8386957049369812, "learning_rate": 1.704008463881448e-06, "loss": 0.0582, "step": 48075 }, { "epoch": 0.8514037021787157, "grad_norm": 0.4150908887386322, "learning_rate": 1.703610204737696e-06, "loss": 0.0312, "step": 48076 }, { "epoch": 0.8514214117157443, "grad_norm": 0.632222592830658, "learning_rate": 1.7032119893377263e-06, "loss": 0.0574, "step": 48077 }, { "epoch": 0.8514391212527727, "grad_norm": 0.33483371138572693, "learning_rate": 1.7028138176828405e-06, "loss": 0.05, "step": 48078 }, { "epoch": 0.8514568307898011, "grad_norm": 0.19757558405399323, "learning_rate": 1.7024156897743514e-06, "loss": 0.0347, "step": 48079 }, { "epoch": 0.8514745403268295, "grad_norm": 0.4928795099258423, "learning_rate": 1.7020176056135712e-06, "loss": 0.0411, "step": 48080 }, { "epoch": 0.851492249863858, "grad_norm": 0.8513126373291016, "learning_rate": 1.7016195652018067e-06, "loss": 0.0442, "step": 48081 }, { "epoch": 0.8515099594008864, "grad_norm": 0.4864376485347748, "learning_rate": 1.7012215685403693e-06, "loss": 0.0607, "step": 48082 }, { "epoch": 0.8515276689379148, "grad_norm": 0.513587236404419, "learning_rate": 1.7008236156305674e-06, "loss": 0.0718, "step": 48083 }, { "epoch": 0.8515453784749433, "grad_norm": 0.9008991718292236, "learning_rate": 1.700425706473715e-06, "loss": 0.0738, "step": 48084 }, { "epoch": 0.8515630880119717, "grad_norm": 0.9516828060150146, "learning_rate": 1.7000278410711128e-06, "loss": 0.0429, "step": 48085 }, { "epoch": 0.8515807975490001, "grad_norm": 0.3555663824081421, "learning_rate": 1.6996300194240733e-06, "loss": 0.0438, "step": 48086 }, { "epoch": 0.8515985070860285, "grad_norm": 0.5079166293144226, "learning_rate": 1.6992322415339068e-06, "loss": 0.0506, "step": 48087 }, { "epoch": 0.851616216623057, "grad_norm": 0.46251314878463745, "learning_rate": 1.698834507401924e-06, "loss": 0.0448, "step": 48088 }, { "epoch": 0.8516339261600854, "grad_norm": 0.7424345016479492, "learning_rate": 1.6984368170294274e-06, "loss": 0.0704, "step": 48089 }, { "epoch": 0.8516516356971138, "grad_norm": 0.5902321338653564, "learning_rate": 1.6980391704177273e-06, "loss": 0.1167, "step": 48090 }, { "epoch": 0.8516693452341422, "grad_norm": 0.800731897354126, "learning_rate": 1.6976415675681344e-06, "loss": 0.0587, "step": 48091 }, { "epoch": 0.8516870547711707, "grad_norm": 0.5636931657791138, "learning_rate": 1.6972440084819529e-06, "loss": 0.0445, "step": 48092 }, { "epoch": 0.8517047643081991, "grad_norm": 1.123805046081543, "learning_rate": 1.696846493160495e-06, "loss": 0.0656, "step": 48093 }, { "epoch": 0.8517224738452275, "grad_norm": 0.6601908206939697, "learning_rate": 1.696449021605066e-06, "loss": 0.0843, "step": 48094 }, { "epoch": 0.8517401833822559, "grad_norm": 0.8283529877662659, "learning_rate": 1.6960515938169785e-06, "loss": 0.0738, "step": 48095 }, { "epoch": 0.8517578929192844, "grad_norm": 0.673125147819519, "learning_rate": 1.6956542097975314e-06, "loss": 0.044, "step": 48096 }, { "epoch": 0.8517756024563128, "grad_norm": 0.6080762147903442, "learning_rate": 1.6952568695480352e-06, "loss": 0.0509, "step": 48097 }, { "epoch": 0.8517933119933412, "grad_norm": 0.4430641829967499, "learning_rate": 1.694859573069804e-06, "loss": 0.0492, "step": 48098 }, { "epoch": 0.8518110215303697, "grad_norm": 0.7103374004364014, "learning_rate": 1.6944623203641335e-06, "loss": 0.0499, "step": 48099 }, { "epoch": 0.8518287310673981, "grad_norm": 0.36554110050201416, "learning_rate": 1.6940651114323374e-06, "loss": 0.0277, "step": 48100 }, { "epoch": 0.8518464406044265, "grad_norm": 0.5437408089637756, "learning_rate": 1.6936679462757183e-06, "loss": 0.0508, "step": 48101 }, { "epoch": 0.8518641501414549, "grad_norm": 0.5525273084640503, "learning_rate": 1.6932708248955914e-06, "loss": 0.0569, "step": 48102 }, { "epoch": 0.8518818596784834, "grad_norm": 0.7457324266433716, "learning_rate": 1.6928737472932543e-06, "loss": 0.0409, "step": 48103 }, { "epoch": 0.8518995692155118, "grad_norm": 0.29846101999282837, "learning_rate": 1.6924767134700175e-06, "loss": 0.0353, "step": 48104 }, { "epoch": 0.8519172787525402, "grad_norm": 0.6215561628341675, "learning_rate": 1.6920797234271885e-06, "loss": 0.1149, "step": 48105 }, { "epoch": 0.8519349882895686, "grad_norm": 0.8351004719734192, "learning_rate": 1.6916827771660692e-06, "loss": 0.055, "step": 48106 }, { "epoch": 0.8519526978265971, "grad_norm": 0.701106607913971, "learning_rate": 1.6912858746879672e-06, "loss": 0.055, "step": 48107 }, { "epoch": 0.8519704073636255, "grad_norm": 0.5738811492919922, "learning_rate": 1.690889015994188e-06, "loss": 0.0518, "step": 48108 }, { "epoch": 0.8519881169006539, "grad_norm": 0.46821123361587524, "learning_rate": 1.6904922010860375e-06, "loss": 0.0609, "step": 48109 }, { "epoch": 0.8520058264376823, "grad_norm": 1.0300520658493042, "learning_rate": 1.6900954299648224e-06, "loss": 0.0729, "step": 48110 }, { "epoch": 0.8520235359747108, "grad_norm": 0.5685996413230896, "learning_rate": 1.6896987026318472e-06, "loss": 0.0857, "step": 48111 }, { "epoch": 0.8520412455117392, "grad_norm": 0.5981286764144897, "learning_rate": 1.6893020190884157e-06, "loss": 0.0516, "step": 48112 }, { "epoch": 0.8520589550487676, "grad_norm": 0.7478236556053162, "learning_rate": 1.6889053793358384e-06, "loss": 0.0466, "step": 48113 }, { "epoch": 0.8520766645857961, "grad_norm": 0.4411180913448334, "learning_rate": 1.6885087833754126e-06, "loss": 0.0352, "step": 48114 }, { "epoch": 0.8520943741228245, "grad_norm": 0.5467243194580078, "learning_rate": 1.6881122312084474e-06, "loss": 0.0386, "step": 48115 }, { "epoch": 0.8521120836598529, "grad_norm": 0.8131869435310364, "learning_rate": 1.6877157228362483e-06, "loss": 0.0611, "step": 48116 }, { "epoch": 0.8521297931968813, "grad_norm": 0.7985805869102478, "learning_rate": 1.687319258260116e-06, "loss": 0.0385, "step": 48117 }, { "epoch": 0.8521475027339098, "grad_norm": 0.5751026272773743, "learning_rate": 1.6869228374813562e-06, "loss": 0.044, "step": 48118 }, { "epoch": 0.8521652122709382, "grad_norm": 0.5085594654083252, "learning_rate": 1.6865264605012742e-06, "loss": 0.0363, "step": 48119 }, { "epoch": 0.8521829218079666, "grad_norm": 0.49215495586395264, "learning_rate": 1.6861301273211726e-06, "loss": 0.0451, "step": 48120 }, { "epoch": 0.852200631344995, "grad_norm": 0.8532695174217224, "learning_rate": 1.685733837942357e-06, "loss": 0.0609, "step": 48121 }, { "epoch": 0.8522183408820235, "grad_norm": 0.4487665891647339, "learning_rate": 1.6853375923661312e-06, "loss": 0.0603, "step": 48122 }, { "epoch": 0.8522360504190519, "grad_norm": 0.36719128489494324, "learning_rate": 1.684941390593801e-06, "loss": 0.0465, "step": 48123 }, { "epoch": 0.8522537599560803, "grad_norm": 0.38969293236732483, "learning_rate": 1.684545232626662e-06, "loss": 0.0491, "step": 48124 }, { "epoch": 0.8522714694931087, "grad_norm": 0.4932412803173065, "learning_rate": 1.6841491184660246e-06, "loss": 0.0446, "step": 48125 }, { "epoch": 0.8522891790301372, "grad_norm": 0.533461332321167, "learning_rate": 1.683753048113188e-06, "loss": 0.0718, "step": 48126 }, { "epoch": 0.8523068885671656, "grad_norm": 0.8097090721130371, "learning_rate": 1.683357021569461e-06, "loss": 0.0487, "step": 48127 }, { "epoch": 0.852324598104194, "grad_norm": 0.6535891890525818, "learning_rate": 1.6829610388361394e-06, "loss": 0.0888, "step": 48128 }, { "epoch": 0.8523423076412225, "grad_norm": 0.665381133556366, "learning_rate": 1.6825650999145287e-06, "loss": 0.0396, "step": 48129 }, { "epoch": 0.8523600171782509, "grad_norm": 0.29116329550743103, "learning_rate": 1.6821692048059312e-06, "loss": 0.0598, "step": 48130 }, { "epoch": 0.8523777267152793, "grad_norm": 0.6610009670257568, "learning_rate": 1.6817733535116508e-06, "loss": 0.0666, "step": 48131 }, { "epoch": 0.8523954362523077, "grad_norm": 0.6448973417282104, "learning_rate": 1.68137754603299e-06, "loss": 0.0709, "step": 48132 }, { "epoch": 0.8524131457893362, "grad_norm": 0.777458906173706, "learning_rate": 1.6809817823712493e-06, "loss": 0.052, "step": 48133 }, { "epoch": 0.8524308553263646, "grad_norm": 0.8991841673851013, "learning_rate": 1.6805860625277342e-06, "loss": 0.0785, "step": 48134 }, { "epoch": 0.852448564863393, "grad_norm": 0.34526240825653076, "learning_rate": 1.6801903865037405e-06, "loss": 0.0406, "step": 48135 }, { "epoch": 0.8524662744004214, "grad_norm": 0.9667937159538269, "learning_rate": 1.6797947543005737e-06, "loss": 0.0755, "step": 48136 }, { "epoch": 0.85248398393745, "grad_norm": 0.6268022060394287, "learning_rate": 1.6793991659195362e-06, "loss": 0.0541, "step": 48137 }, { "epoch": 0.8525016934744784, "grad_norm": 0.08796784281730652, "learning_rate": 1.6790036213619237e-06, "loss": 0.0549, "step": 48138 }, { "epoch": 0.8525194030115067, "grad_norm": 0.5203419327735901, "learning_rate": 1.678608120629045e-06, "loss": 0.0437, "step": 48139 }, { "epoch": 0.8525371125485351, "grad_norm": 0.7445185780525208, "learning_rate": 1.6782126637221989e-06, "loss": 0.0694, "step": 48140 }, { "epoch": 0.8525548220855637, "grad_norm": 0.6643840670585632, "learning_rate": 1.6778172506426864e-06, "loss": 0.0651, "step": 48141 }, { "epoch": 0.8525725316225921, "grad_norm": 0.6586410403251648, "learning_rate": 1.6774218813918062e-06, "loss": 0.0636, "step": 48142 }, { "epoch": 0.8525902411596205, "grad_norm": 0.5603045225143433, "learning_rate": 1.6770265559708608e-06, "loss": 0.0673, "step": 48143 }, { "epoch": 0.852607950696649, "grad_norm": 0.5198807716369629, "learning_rate": 1.6766312743811508e-06, "loss": 0.0429, "step": 48144 }, { "epoch": 0.8526256602336774, "grad_norm": 0.256696879863739, "learning_rate": 1.6762360366239783e-06, "loss": 0.0367, "step": 48145 }, { "epoch": 0.8526433697707058, "grad_norm": 0.6640255451202393, "learning_rate": 1.6758408427006388e-06, "loss": 0.0462, "step": 48146 }, { "epoch": 0.8526610793077342, "grad_norm": 0.48608115315437317, "learning_rate": 1.6754456926124351e-06, "loss": 0.0452, "step": 48147 }, { "epoch": 0.8526787888447627, "grad_norm": 0.507337749004364, "learning_rate": 1.6750505863606674e-06, "loss": 0.0853, "step": 48148 }, { "epoch": 0.8526964983817911, "grad_norm": 0.49729862809181213, "learning_rate": 1.6746555239466366e-06, "loss": 0.0471, "step": 48149 }, { "epoch": 0.8527142079188195, "grad_norm": 0.806760847568512, "learning_rate": 1.6742605053716397e-06, "loss": 0.0505, "step": 48150 }, { "epoch": 0.8527319174558479, "grad_norm": 0.5476839542388916, "learning_rate": 1.6738655306369792e-06, "loss": 0.0504, "step": 48151 }, { "epoch": 0.8527496269928764, "grad_norm": 0.4358840584754944, "learning_rate": 1.673470599743956e-06, "loss": 0.0438, "step": 48152 }, { "epoch": 0.8527673365299048, "grad_norm": 0.39974093437194824, "learning_rate": 1.6730757126938635e-06, "loss": 0.0679, "step": 48153 }, { "epoch": 0.8527850460669332, "grad_norm": 1.1559110879898071, "learning_rate": 1.672680869488004e-06, "loss": 0.0512, "step": 48154 }, { "epoch": 0.8528027556039616, "grad_norm": 0.5372086763381958, "learning_rate": 1.6722860701276804e-06, "loss": 0.045, "step": 48155 }, { "epoch": 0.8528204651409901, "grad_norm": 0.7705947756767273, "learning_rate": 1.6718913146141828e-06, "loss": 0.062, "step": 48156 }, { "epoch": 0.8528381746780185, "grad_norm": 0.3636021912097931, "learning_rate": 1.6714966029488154e-06, "loss": 0.0375, "step": 48157 }, { "epoch": 0.8528558842150469, "grad_norm": 0.39073872566223145, "learning_rate": 1.6711019351328771e-06, "loss": 0.0186, "step": 48158 }, { "epoch": 0.8528735937520754, "grad_norm": 0.8394121527671814, "learning_rate": 1.6707073111676635e-06, "loss": 0.0761, "step": 48159 }, { "epoch": 0.8528913032891038, "grad_norm": 0.9595986008644104, "learning_rate": 1.6703127310544753e-06, "loss": 0.0574, "step": 48160 }, { "epoch": 0.8529090128261322, "grad_norm": 0.5321666598320007, "learning_rate": 1.6699181947946114e-06, "loss": 0.0585, "step": 48161 }, { "epoch": 0.8529267223631606, "grad_norm": 0.6026971936225891, "learning_rate": 1.6695237023893705e-06, "loss": 0.0529, "step": 48162 }, { "epoch": 0.8529444319001891, "grad_norm": 0.5118682980537415, "learning_rate": 1.6691292538400438e-06, "loss": 0.0678, "step": 48163 }, { "epoch": 0.8529621414372175, "grad_norm": 0.46992260217666626, "learning_rate": 1.6687348491479348e-06, "loss": 0.0429, "step": 48164 }, { "epoch": 0.8529798509742459, "grad_norm": 0.5506612062454224, "learning_rate": 1.668340488314341e-06, "loss": 0.0634, "step": 48165 }, { "epoch": 0.8529975605112743, "grad_norm": 0.34525561332702637, "learning_rate": 1.6679461713405598e-06, "loss": 0.0211, "step": 48166 }, { "epoch": 0.8530152700483028, "grad_norm": 0.21459725499153137, "learning_rate": 1.6675518982278848e-06, "loss": 0.0633, "step": 48167 }, { "epoch": 0.8530329795853312, "grad_norm": 0.3443555235862732, "learning_rate": 1.6671576689776118e-06, "loss": 0.0303, "step": 48168 }, { "epoch": 0.8530506891223596, "grad_norm": 0.7223731279373169, "learning_rate": 1.6667634835910484e-06, "loss": 0.0579, "step": 48169 }, { "epoch": 0.853068398659388, "grad_norm": 0.783759355545044, "learning_rate": 1.6663693420694815e-06, "loss": 0.0575, "step": 48170 }, { "epoch": 0.8530861081964165, "grad_norm": 0.784869372844696, "learning_rate": 1.6659752444142118e-06, "loss": 0.0744, "step": 48171 }, { "epoch": 0.8531038177334449, "grad_norm": 0.5340878367424011, "learning_rate": 1.6655811906265333e-06, "loss": 0.0446, "step": 48172 }, { "epoch": 0.8531215272704733, "grad_norm": 0.6543155908584595, "learning_rate": 1.6651871807077467e-06, "loss": 0.0736, "step": 48173 }, { "epoch": 0.8531392368075018, "grad_norm": 0.7143183946609497, "learning_rate": 1.6647932146591444e-06, "loss": 0.066, "step": 48174 }, { "epoch": 0.8531569463445302, "grad_norm": 0.7728418111801147, "learning_rate": 1.6643992924820234e-06, "loss": 0.0813, "step": 48175 }, { "epoch": 0.8531746558815586, "grad_norm": 0.7766959071159363, "learning_rate": 1.6640054141776779e-06, "loss": 0.086, "step": 48176 }, { "epoch": 0.853192365418587, "grad_norm": 0.669898509979248, "learning_rate": 1.6636115797474066e-06, "loss": 0.0637, "step": 48177 }, { "epoch": 0.8532100749556155, "grad_norm": 0.8282049894332886, "learning_rate": 1.6632177891925055e-06, "loss": 0.042, "step": 48178 }, { "epoch": 0.8532277844926439, "grad_norm": 0.5146521925926208, "learning_rate": 1.6628240425142682e-06, "loss": 0.0479, "step": 48179 }, { "epoch": 0.8532454940296723, "grad_norm": 0.6409523487091064, "learning_rate": 1.662430339713994e-06, "loss": 0.0669, "step": 48180 }, { "epoch": 0.8532632035667007, "grad_norm": 0.4009620249271393, "learning_rate": 1.6620366807929715e-06, "loss": 0.0287, "step": 48181 }, { "epoch": 0.8532809131037292, "grad_norm": 0.5703258514404297, "learning_rate": 1.6616430657525e-06, "loss": 0.0373, "step": 48182 }, { "epoch": 0.8532986226407576, "grad_norm": 0.6217457056045532, "learning_rate": 1.6612494945938733e-06, "loss": 0.0471, "step": 48183 }, { "epoch": 0.853316332177786, "grad_norm": 0.515014111995697, "learning_rate": 1.6608559673183888e-06, "loss": 0.0633, "step": 48184 }, { "epoch": 0.8533340417148144, "grad_norm": 0.6744979023933411, "learning_rate": 1.660462483927337e-06, "loss": 0.0442, "step": 48185 }, { "epoch": 0.8533517512518429, "grad_norm": 0.276110976934433, "learning_rate": 1.6600690444220136e-06, "loss": 0.0306, "step": 48186 }, { "epoch": 0.8533694607888713, "grad_norm": 0.5622934699058533, "learning_rate": 1.6596756488037156e-06, "loss": 0.0613, "step": 48187 }, { "epoch": 0.8533871703258997, "grad_norm": 0.17928446829319, "learning_rate": 1.6592822970737342e-06, "loss": 0.0588, "step": 48188 }, { "epoch": 0.8534048798629282, "grad_norm": 0.3925146460533142, "learning_rate": 1.6588889892333647e-06, "loss": 0.0633, "step": 48189 }, { "epoch": 0.8534225893999566, "grad_norm": 0.5882738828659058, "learning_rate": 1.6584957252839012e-06, "loss": 0.0517, "step": 48190 }, { "epoch": 0.853440298936985, "grad_norm": 0.7779645323753357, "learning_rate": 1.658102505226639e-06, "loss": 0.0414, "step": 48191 }, { "epoch": 0.8534580084740134, "grad_norm": 0.8062353730201721, "learning_rate": 1.6577093290628692e-06, "loss": 0.0738, "step": 48192 }, { "epoch": 0.8534757180110419, "grad_norm": 0.5778706669807434, "learning_rate": 1.6573161967938855e-06, "loss": 0.0402, "step": 48193 }, { "epoch": 0.8534934275480703, "grad_norm": 1.004807472229004, "learning_rate": 1.6569231084209836e-06, "loss": 0.0966, "step": 48194 }, { "epoch": 0.8535111370850987, "grad_norm": 0.496114581823349, "learning_rate": 1.656530063945454e-06, "loss": 0.0323, "step": 48195 }, { "epoch": 0.8535288466221271, "grad_norm": 0.2678234279155731, "learning_rate": 1.6561370633685906e-06, "loss": 0.0563, "step": 48196 }, { "epoch": 0.8535465561591556, "grad_norm": 0.7583609223365784, "learning_rate": 1.6557441066916829e-06, "loss": 0.0784, "step": 48197 }, { "epoch": 0.853564265696184, "grad_norm": 0.9391985535621643, "learning_rate": 1.6553511939160326e-06, "loss": 0.0824, "step": 48198 }, { "epoch": 0.8535819752332124, "grad_norm": 0.7761027812957764, "learning_rate": 1.6549583250429257e-06, "loss": 0.0597, "step": 48199 }, { "epoch": 0.8535996847702408, "grad_norm": 0.5763790607452393, "learning_rate": 1.6545655000736559e-06, "loss": 0.0661, "step": 48200 }, { "epoch": 0.8536173943072694, "grad_norm": 0.5031616687774658, "learning_rate": 1.654172719009519e-06, "loss": 0.039, "step": 48201 }, { "epoch": 0.8536351038442977, "grad_norm": 0.5038774013519287, "learning_rate": 1.6537799818518008e-06, "loss": 0.0679, "step": 48202 }, { "epoch": 0.8536528133813261, "grad_norm": 0.5418456792831421, "learning_rate": 1.6533872886017965e-06, "loss": 0.0558, "step": 48203 }, { "epoch": 0.8536705229183547, "grad_norm": 0.7685694098472595, "learning_rate": 1.652994639260797e-06, "loss": 0.0797, "step": 48204 }, { "epoch": 0.8536882324553831, "grad_norm": 0.6488708257675171, "learning_rate": 1.6526020338300962e-06, "loss": 0.0624, "step": 48205 }, { "epoch": 0.8537059419924115, "grad_norm": 0.4287474751472473, "learning_rate": 1.6522094723109849e-06, "loss": 0.0666, "step": 48206 }, { "epoch": 0.8537236515294399, "grad_norm": 0.5285224914550781, "learning_rate": 1.6518169547047535e-06, "loss": 0.0666, "step": 48207 }, { "epoch": 0.8537413610664684, "grad_norm": 0.8964556455612183, "learning_rate": 1.6514244810126978e-06, "loss": 0.0713, "step": 48208 }, { "epoch": 0.8537590706034968, "grad_norm": 0.5395047664642334, "learning_rate": 1.6510320512361033e-06, "loss": 0.0602, "step": 48209 }, { "epoch": 0.8537767801405252, "grad_norm": 0.7883501052856445, "learning_rate": 1.6506396653762623e-06, "loss": 0.0468, "step": 48210 }, { "epoch": 0.8537944896775536, "grad_norm": 0.9323434829711914, "learning_rate": 1.6502473234344656e-06, "loss": 0.0762, "step": 48211 }, { "epoch": 0.8538121992145821, "grad_norm": 0.408739298582077, "learning_rate": 1.6498550254120105e-06, "loss": 0.0382, "step": 48212 }, { "epoch": 0.8538299087516105, "grad_norm": 0.5415124893188477, "learning_rate": 1.6494627713101773e-06, "loss": 0.0493, "step": 48213 }, { "epoch": 0.8538476182886389, "grad_norm": 0.5209469795227051, "learning_rate": 1.6490705611302604e-06, "loss": 0.061, "step": 48214 }, { "epoch": 0.8538653278256673, "grad_norm": 0.814318835735321, "learning_rate": 1.6486783948735518e-06, "loss": 0.0747, "step": 48215 }, { "epoch": 0.8538830373626958, "grad_norm": 0.6105988025665283, "learning_rate": 1.6482862725413423e-06, "loss": 0.0614, "step": 48216 }, { "epoch": 0.8539007468997242, "grad_norm": 0.2421310991048813, "learning_rate": 1.647894194134919e-06, "loss": 0.0266, "step": 48217 }, { "epoch": 0.8539184564367526, "grad_norm": 0.6724823713302612, "learning_rate": 1.6475021596555745e-06, "loss": 0.0693, "step": 48218 }, { "epoch": 0.8539361659737811, "grad_norm": 0.5439916253089905, "learning_rate": 1.6471101691045993e-06, "loss": 0.0582, "step": 48219 }, { "epoch": 0.8539538755108095, "grad_norm": 0.6807570457458496, "learning_rate": 1.6467182224832773e-06, "loss": 0.0666, "step": 48220 }, { "epoch": 0.8539715850478379, "grad_norm": 0.6215760111808777, "learning_rate": 1.646326319792904e-06, "loss": 0.0513, "step": 48221 }, { "epoch": 0.8539892945848663, "grad_norm": 0.6309245228767395, "learning_rate": 1.6459344610347654e-06, "loss": 0.0529, "step": 48222 }, { "epoch": 0.8540070041218948, "grad_norm": 0.6290873885154724, "learning_rate": 1.6455426462101536e-06, "loss": 0.0797, "step": 48223 }, { "epoch": 0.8540247136589232, "grad_norm": 0.6149270534515381, "learning_rate": 1.6451508753203541e-06, "loss": 0.0643, "step": 48224 }, { "epoch": 0.8540424231959516, "grad_norm": 0.7193863391876221, "learning_rate": 1.644759148366658e-06, "loss": 0.0532, "step": 48225 }, { "epoch": 0.85406013273298, "grad_norm": 0.6374552249908447, "learning_rate": 1.644367465350352e-06, "loss": 0.0538, "step": 48226 }, { "epoch": 0.8540778422700085, "grad_norm": 0.5526109933853149, "learning_rate": 1.6439758262727273e-06, "loss": 0.0483, "step": 48227 }, { "epoch": 0.8540955518070369, "grad_norm": 0.5017808079719543, "learning_rate": 1.6435842311350707e-06, "loss": 0.0595, "step": 48228 }, { "epoch": 0.8541132613440653, "grad_norm": 0.5928739309310913, "learning_rate": 1.6431926799386699e-06, "loss": 0.0704, "step": 48229 }, { "epoch": 0.8541309708810937, "grad_norm": 0.6640543937683105, "learning_rate": 1.6428011726848185e-06, "loss": 0.0498, "step": 48230 }, { "epoch": 0.8541486804181222, "grad_norm": 0.7225087285041809, "learning_rate": 1.6424097093747976e-06, "loss": 0.0756, "step": 48231 }, { "epoch": 0.8541663899551506, "grad_norm": 0.39386263489723206, "learning_rate": 1.6420182900098973e-06, "loss": 0.0597, "step": 48232 }, { "epoch": 0.854184099492179, "grad_norm": 0.2729107737541199, "learning_rate": 1.6416269145914087e-06, "loss": 0.047, "step": 48233 }, { "epoch": 0.8542018090292075, "grad_norm": 0.49561581015586853, "learning_rate": 1.6412355831206106e-06, "loss": 0.0296, "step": 48234 }, { "epoch": 0.8542195185662359, "grad_norm": 0.3490099608898163, "learning_rate": 1.6408442955988e-06, "loss": 0.0569, "step": 48235 }, { "epoch": 0.8542372281032643, "grad_norm": 0.8284720182418823, "learning_rate": 1.6404530520272598e-06, "loss": 0.0866, "step": 48236 }, { "epoch": 0.8542549376402927, "grad_norm": 0.5946980714797974, "learning_rate": 1.6400618524072802e-06, "loss": 0.0544, "step": 48237 }, { "epoch": 0.8542726471773212, "grad_norm": 0.5163580179214478, "learning_rate": 1.6396706967401437e-06, "loss": 0.0613, "step": 48238 }, { "epoch": 0.8542903567143496, "grad_norm": 0.4994984269142151, "learning_rate": 1.6392795850271392e-06, "loss": 0.0431, "step": 48239 }, { "epoch": 0.854308066251378, "grad_norm": 0.443220853805542, "learning_rate": 1.6388885172695556e-06, "loss": 0.0358, "step": 48240 }, { "epoch": 0.8543257757884064, "grad_norm": 0.5306351780891418, "learning_rate": 1.6384974934686754e-06, "loss": 0.0398, "step": 48241 }, { "epoch": 0.8543434853254349, "grad_norm": 0.6276335716247559, "learning_rate": 1.6381065136257857e-06, "loss": 0.05, "step": 48242 }, { "epoch": 0.8543611948624633, "grad_norm": 0.5673477649688721, "learning_rate": 1.6377155777421754e-06, "loss": 0.0476, "step": 48243 }, { "epoch": 0.8543789043994917, "grad_norm": 0.3855193853378296, "learning_rate": 1.6373246858191288e-06, "loss": 0.0265, "step": 48244 }, { "epoch": 0.8543966139365201, "grad_norm": 0.5827771425247192, "learning_rate": 1.636933837857933e-06, "loss": 0.0515, "step": 48245 }, { "epoch": 0.8544143234735486, "grad_norm": 0.8640928864479065, "learning_rate": 1.636543033859872e-06, "loss": 0.0325, "step": 48246 }, { "epoch": 0.854432033010577, "grad_norm": 0.4846804141998291, "learning_rate": 1.6361522738262346e-06, "loss": 0.0673, "step": 48247 }, { "epoch": 0.8544497425476054, "grad_norm": 0.44401517510414124, "learning_rate": 1.6357615577583067e-06, "loss": 0.0525, "step": 48248 }, { "epoch": 0.8544674520846339, "grad_norm": 0.6477267742156982, "learning_rate": 1.6353708856573674e-06, "loss": 0.0593, "step": 48249 }, { "epoch": 0.8544851616216623, "grad_norm": 0.330551415681839, "learning_rate": 1.6349802575247086e-06, "loss": 0.0427, "step": 48250 }, { "epoch": 0.8545028711586907, "grad_norm": 0.6358726024627686, "learning_rate": 1.6345896733616146e-06, "loss": 0.0576, "step": 48251 }, { "epoch": 0.8545205806957191, "grad_norm": 0.47097474336624146, "learning_rate": 1.6341991331693656e-06, "loss": 0.0467, "step": 48252 }, { "epoch": 0.8545382902327476, "grad_norm": 0.3578733801841736, "learning_rate": 1.6338086369492494e-06, "loss": 0.0441, "step": 48253 }, { "epoch": 0.854555999769776, "grad_norm": 0.6032176613807678, "learning_rate": 1.6334181847025514e-06, "loss": 0.0486, "step": 48254 }, { "epoch": 0.8545737093068044, "grad_norm": 0.39715349674224854, "learning_rate": 1.633027776430554e-06, "loss": 0.035, "step": 48255 }, { "epoch": 0.8545914188438328, "grad_norm": 0.9135023951530457, "learning_rate": 1.6326374121345444e-06, "loss": 0.0549, "step": 48256 }, { "epoch": 0.8546091283808613, "grad_norm": 0.34894081950187683, "learning_rate": 1.6322470918158067e-06, "loss": 0.0311, "step": 48257 }, { "epoch": 0.8546268379178897, "grad_norm": 0.5746719241142273, "learning_rate": 1.6318568154756263e-06, "loss": 0.0648, "step": 48258 }, { "epoch": 0.8546445474549181, "grad_norm": 0.5829422473907471, "learning_rate": 1.631466583115281e-06, "loss": 0.0695, "step": 48259 }, { "epoch": 0.8546622569919465, "grad_norm": 0.46842581033706665, "learning_rate": 1.6310763947360607e-06, "loss": 0.0453, "step": 48260 }, { "epoch": 0.854679966528975, "grad_norm": 0.2715097963809967, "learning_rate": 1.630686250339245e-06, "loss": 0.0293, "step": 48261 }, { "epoch": 0.8546976760660034, "grad_norm": 0.7764281034469604, "learning_rate": 1.6302961499261227e-06, "loss": 0.0642, "step": 48262 }, { "epoch": 0.8547153856030318, "grad_norm": 0.661502480506897, "learning_rate": 1.6299060934979675e-06, "loss": 0.0481, "step": 48263 }, { "epoch": 0.8547330951400604, "grad_norm": 0.7197816967964172, "learning_rate": 1.6295160810560721e-06, "loss": 0.0717, "step": 48264 }, { "epoch": 0.8547508046770887, "grad_norm": 0.450120747089386, "learning_rate": 1.6291261126017204e-06, "loss": 0.0314, "step": 48265 }, { "epoch": 0.8547685142141171, "grad_norm": 0.5477861762046814, "learning_rate": 1.6287361881361878e-06, "loss": 0.0668, "step": 48266 }, { "epoch": 0.8547862237511455, "grad_norm": 0.3655910789966583, "learning_rate": 1.6283463076607601e-06, "loss": 0.0443, "step": 48267 }, { "epoch": 0.8548039332881741, "grad_norm": 0.7657728791236877, "learning_rate": 1.6279564711767214e-06, "loss": 0.0743, "step": 48268 }, { "epoch": 0.8548216428252025, "grad_norm": 0.5264068841934204, "learning_rate": 1.6275666786853555e-06, "loss": 0.0575, "step": 48269 }, { "epoch": 0.8548393523622309, "grad_norm": 0.874984622001648, "learning_rate": 1.6271769301879397e-06, "loss": 0.0784, "step": 48270 }, { "epoch": 0.8548570618992593, "grad_norm": 0.6017594933509827, "learning_rate": 1.6267872256857597e-06, "loss": 0.0637, "step": 48271 }, { "epoch": 0.8548747714362878, "grad_norm": 0.4713999927043915, "learning_rate": 1.6263975651800978e-06, "loss": 0.0345, "step": 48272 }, { "epoch": 0.8548924809733162, "grad_norm": 0.44088733196258545, "learning_rate": 1.6260079486722346e-06, "loss": 0.0474, "step": 48273 }, { "epoch": 0.8549101905103446, "grad_norm": 0.6037033796310425, "learning_rate": 1.6256183761634525e-06, "loss": 0.0553, "step": 48274 }, { "epoch": 0.854927900047373, "grad_norm": 0.3161298930644989, "learning_rate": 1.6252288476550337e-06, "loss": 0.0469, "step": 48275 }, { "epoch": 0.8549456095844015, "grad_norm": 0.45233091711997986, "learning_rate": 1.6248393631482622e-06, "loss": 0.0288, "step": 48276 }, { "epoch": 0.8549633191214299, "grad_norm": 0.3600277602672577, "learning_rate": 1.624449922644412e-06, "loss": 0.0283, "step": 48277 }, { "epoch": 0.8549810286584583, "grad_norm": 0.3445596992969513, "learning_rate": 1.6240605261447704e-06, "loss": 0.0452, "step": 48278 }, { "epoch": 0.8549987381954868, "grad_norm": 0.8833514451980591, "learning_rate": 1.6236711736506182e-06, "loss": 0.0543, "step": 48279 }, { "epoch": 0.8550164477325152, "grad_norm": 0.8534322381019592, "learning_rate": 1.6232818651632325e-06, "loss": 0.0697, "step": 48280 }, { "epoch": 0.8550341572695436, "grad_norm": 0.3657957911491394, "learning_rate": 1.6228926006838974e-06, "loss": 0.0526, "step": 48281 }, { "epoch": 0.855051866806572, "grad_norm": 0.7615175247192383, "learning_rate": 1.6225033802138916e-06, "loss": 0.0848, "step": 48282 }, { "epoch": 0.8550695763436005, "grad_norm": 0.5855501294136047, "learning_rate": 1.6221142037544978e-06, "loss": 0.0582, "step": 48283 }, { "epoch": 0.8550872858806289, "grad_norm": 0.9334006905555725, "learning_rate": 1.6217250713069932e-06, "loss": 0.0577, "step": 48284 }, { "epoch": 0.8551049954176573, "grad_norm": 0.4295058846473694, "learning_rate": 1.6213359828726615e-06, "loss": 0.0557, "step": 48285 }, { "epoch": 0.8551227049546857, "grad_norm": 0.5663389563560486, "learning_rate": 1.620946938452782e-06, "loss": 0.0376, "step": 48286 }, { "epoch": 0.8551404144917142, "grad_norm": 0.4968852698802948, "learning_rate": 1.6205579380486352e-06, "loss": 0.0498, "step": 48287 }, { "epoch": 0.8551581240287426, "grad_norm": 0.9271144866943359, "learning_rate": 1.6201689816614967e-06, "loss": 0.0706, "step": 48288 }, { "epoch": 0.855175833565771, "grad_norm": 0.3953608572483063, "learning_rate": 1.619780069292649e-06, "loss": 0.0456, "step": 48289 }, { "epoch": 0.8551935431027994, "grad_norm": 0.5388227701187134, "learning_rate": 1.619391200943376e-06, "loss": 0.0556, "step": 48290 }, { "epoch": 0.8552112526398279, "grad_norm": 1.0144208669662476, "learning_rate": 1.619002376614948e-06, "loss": 0.0626, "step": 48291 }, { "epoch": 0.8552289621768563, "grad_norm": 0.6488995552062988, "learning_rate": 1.6186135963086463e-06, "loss": 0.0592, "step": 48292 }, { "epoch": 0.8552466717138847, "grad_norm": 0.5620397925376892, "learning_rate": 1.618224860025756e-06, "loss": 0.0429, "step": 48293 }, { "epoch": 0.8552643812509132, "grad_norm": 0.3224093019962311, "learning_rate": 1.6178361677675546e-06, "loss": 0.0454, "step": 48294 }, { "epoch": 0.8552820907879416, "grad_norm": 0.5934162735939026, "learning_rate": 1.6174475195353162e-06, "loss": 0.0632, "step": 48295 }, { "epoch": 0.85529980032497, "grad_norm": 0.8237841129302979, "learning_rate": 1.6170589153303211e-06, "loss": 0.0601, "step": 48296 }, { "epoch": 0.8553175098619984, "grad_norm": 0.6310744285583496, "learning_rate": 1.616670355153852e-06, "loss": 0.0841, "step": 48297 }, { "epoch": 0.8553352193990269, "grad_norm": 0.4593549370765686, "learning_rate": 1.616281839007181e-06, "loss": 0.0407, "step": 48298 }, { "epoch": 0.8553529289360553, "grad_norm": 0.2990395426750183, "learning_rate": 1.6158933668915887e-06, "loss": 0.0339, "step": 48299 }, { "epoch": 0.8553706384730837, "grad_norm": 0.7579976320266724, "learning_rate": 1.6155049388083542e-06, "loss": 0.0629, "step": 48300 }, { "epoch": 0.8553883480101121, "grad_norm": 0.6126383543014526, "learning_rate": 1.6151165547587532e-06, "loss": 0.0294, "step": 48301 }, { "epoch": 0.8554060575471406, "grad_norm": 0.6449260711669922, "learning_rate": 1.6147282147440662e-06, "loss": 0.0637, "step": 48302 }, { "epoch": 0.855423767084169, "grad_norm": 0.36487817764282227, "learning_rate": 1.6143399187655688e-06, "loss": 0.0434, "step": 48303 }, { "epoch": 0.8554414766211974, "grad_norm": 0.6309124231338501, "learning_rate": 1.6139516668245435e-06, "loss": 0.0647, "step": 48304 }, { "epoch": 0.8554591861582258, "grad_norm": 0.9739925265312195, "learning_rate": 1.6135634589222591e-06, "loss": 0.052, "step": 48305 }, { "epoch": 0.8554768956952543, "grad_norm": 0.8993807435035706, "learning_rate": 1.6131752950599982e-06, "loss": 0.0922, "step": 48306 }, { "epoch": 0.8554946052322827, "grad_norm": 0.41146397590637207, "learning_rate": 1.6127871752390344e-06, "loss": 0.061, "step": 48307 }, { "epoch": 0.8555123147693111, "grad_norm": 0.3909674882888794, "learning_rate": 1.612399099460652e-06, "loss": 0.0453, "step": 48308 }, { "epoch": 0.8555300243063396, "grad_norm": 0.5493356585502625, "learning_rate": 1.612011067726118e-06, "loss": 0.0556, "step": 48309 }, { "epoch": 0.855547733843368, "grad_norm": 0.6628394722938538, "learning_rate": 1.6116230800367149e-06, "loss": 0.0413, "step": 48310 }, { "epoch": 0.8555654433803964, "grad_norm": 0.7435221672058105, "learning_rate": 1.6112351363937167e-06, "loss": 0.0749, "step": 48311 }, { "epoch": 0.8555831529174248, "grad_norm": 0.6236861944198608, "learning_rate": 1.6108472367984007e-06, "loss": 0.0432, "step": 48312 }, { "epoch": 0.8556008624544533, "grad_norm": 0.7353840470314026, "learning_rate": 1.6104593812520423e-06, "loss": 0.0654, "step": 48313 }, { "epoch": 0.8556185719914817, "grad_norm": 0.7365247011184692, "learning_rate": 1.6100715697559192e-06, "loss": 0.0507, "step": 48314 }, { "epoch": 0.8556362815285101, "grad_norm": 0.801315426826477, "learning_rate": 1.6096838023113104e-06, "loss": 0.0788, "step": 48315 }, { "epoch": 0.8556539910655385, "grad_norm": 0.92979896068573, "learning_rate": 1.6092960789194828e-06, "loss": 0.0589, "step": 48316 }, { "epoch": 0.855671700602567, "grad_norm": 0.457201212644577, "learning_rate": 1.6089083995817173e-06, "loss": 0.0438, "step": 48317 }, { "epoch": 0.8556894101395954, "grad_norm": 0.5777432322502136, "learning_rate": 1.6085207642992894e-06, "loss": 0.0451, "step": 48318 }, { "epoch": 0.8557071196766238, "grad_norm": 0.5245455503463745, "learning_rate": 1.6081331730734749e-06, "loss": 0.0939, "step": 48319 }, { "epoch": 0.8557248292136522, "grad_norm": 0.2827112078666687, "learning_rate": 1.6077456259055463e-06, "loss": 0.0761, "step": 48320 }, { "epoch": 0.8557425387506807, "grad_norm": 0.5809934735298157, "learning_rate": 1.607358122796777e-06, "loss": 0.0431, "step": 48321 }, { "epoch": 0.8557602482877091, "grad_norm": 0.5526171326637268, "learning_rate": 1.60697066374845e-06, "loss": 0.0341, "step": 48322 }, { "epoch": 0.8557779578247375, "grad_norm": 0.5980977416038513, "learning_rate": 1.6065832487618337e-06, "loss": 0.0676, "step": 48323 }, { "epoch": 0.855795667361766, "grad_norm": 0.9907322525978088, "learning_rate": 1.6061958778382024e-06, "loss": 0.0743, "step": 48324 }, { "epoch": 0.8558133768987944, "grad_norm": 0.7306222319602966, "learning_rate": 1.6058085509788317e-06, "loss": 0.0809, "step": 48325 }, { "epoch": 0.8558310864358228, "grad_norm": 0.6662831902503967, "learning_rate": 1.6054212681850007e-06, "loss": 0.0626, "step": 48326 }, { "epoch": 0.8558487959728512, "grad_norm": 0.5970261693000793, "learning_rate": 1.6050340294579746e-06, "loss": 0.0469, "step": 48327 }, { "epoch": 0.8558665055098797, "grad_norm": 0.5933638215065002, "learning_rate": 1.604646834799031e-06, "loss": 0.0564, "step": 48328 }, { "epoch": 0.8558842150469081, "grad_norm": 0.3937448561191559, "learning_rate": 1.604259684209446e-06, "loss": 0.0526, "step": 48329 }, { "epoch": 0.8559019245839365, "grad_norm": 0.8389663696289062, "learning_rate": 1.603872577690491e-06, "loss": 0.077, "step": 48330 }, { "epoch": 0.855919634120965, "grad_norm": 0.47080904245376587, "learning_rate": 1.6034855152434407e-06, "loss": 0.0571, "step": 48331 }, { "epoch": 0.8559373436579935, "grad_norm": 0.7676511406898499, "learning_rate": 1.6030984968695672e-06, "loss": 0.0631, "step": 48332 }, { "epoch": 0.8559550531950219, "grad_norm": 0.46312740445137024, "learning_rate": 1.6027115225701493e-06, "loss": 0.0508, "step": 48333 }, { "epoch": 0.8559727627320503, "grad_norm": 0.8271578550338745, "learning_rate": 1.6023245923464512e-06, "loss": 0.065, "step": 48334 }, { "epoch": 0.8559904722690787, "grad_norm": 0.4719274640083313, "learning_rate": 1.6019377061997503e-06, "loss": 0.0538, "step": 48335 }, { "epoch": 0.8560081818061072, "grad_norm": 0.6705155968666077, "learning_rate": 1.601550864131322e-06, "loss": 0.0669, "step": 48336 }, { "epoch": 0.8560258913431356, "grad_norm": 0.4784501791000366, "learning_rate": 1.6011640661424338e-06, "loss": 0.0499, "step": 48337 }, { "epoch": 0.856043600880164, "grad_norm": 0.5923333764076233, "learning_rate": 1.6007773122343595e-06, "loss": 0.0413, "step": 48338 }, { "epoch": 0.8560613104171925, "grad_norm": 0.547667384147644, "learning_rate": 1.6003906024083732e-06, "loss": 0.0404, "step": 48339 }, { "epoch": 0.8560790199542209, "grad_norm": 0.26609042286872864, "learning_rate": 1.6000039366657472e-06, "loss": 0.0306, "step": 48340 }, { "epoch": 0.8560967294912493, "grad_norm": 0.8019350171089172, "learning_rate": 1.599617315007752e-06, "loss": 0.0578, "step": 48341 }, { "epoch": 0.8561144390282777, "grad_norm": 0.7118580341339111, "learning_rate": 1.5992307374356617e-06, "loss": 0.0473, "step": 48342 }, { "epoch": 0.8561321485653062, "grad_norm": 0.6099665760993958, "learning_rate": 1.598844203950749e-06, "loss": 0.0565, "step": 48343 }, { "epoch": 0.8561498581023346, "grad_norm": 0.7877942323684692, "learning_rate": 1.5984577145542805e-06, "loss": 0.0542, "step": 48344 }, { "epoch": 0.856167567639363, "grad_norm": 0.735200822353363, "learning_rate": 1.5980712692475291e-06, "loss": 0.0569, "step": 48345 }, { "epoch": 0.8561852771763914, "grad_norm": 0.4684561491012573, "learning_rate": 1.5976848680317702e-06, "loss": 0.0747, "step": 48346 }, { "epoch": 0.8562029867134199, "grad_norm": 0.6716261506080627, "learning_rate": 1.5972985109082743e-06, "loss": 0.054, "step": 48347 }, { "epoch": 0.8562206962504483, "grad_norm": 0.6297183632850647, "learning_rate": 1.5969121978783075e-06, "loss": 0.0483, "step": 48348 }, { "epoch": 0.8562384057874767, "grad_norm": 0.621922492980957, "learning_rate": 1.596525928943145e-06, "loss": 0.0583, "step": 48349 }, { "epoch": 0.8562561153245051, "grad_norm": 0.697749674320221, "learning_rate": 1.596139704104056e-06, "loss": 0.0534, "step": 48350 }, { "epoch": 0.8562738248615336, "grad_norm": 0.42007413506507874, "learning_rate": 1.595753523362311e-06, "loss": 0.0542, "step": 48351 }, { "epoch": 0.856291534398562, "grad_norm": 0.23032033443450928, "learning_rate": 1.595367386719181e-06, "loss": 0.0249, "step": 48352 }, { "epoch": 0.8563092439355904, "grad_norm": 0.7125635743141174, "learning_rate": 1.594981294175938e-06, "loss": 0.0673, "step": 48353 }, { "epoch": 0.8563269534726189, "grad_norm": 0.6186542510986328, "learning_rate": 1.5945952457338526e-06, "loss": 0.0688, "step": 48354 }, { "epoch": 0.8563446630096473, "grad_norm": 0.5179996490478516, "learning_rate": 1.5942092413941906e-06, "loss": 0.0522, "step": 48355 }, { "epoch": 0.8563623725466757, "grad_norm": 0.4050736129283905, "learning_rate": 1.5938232811582225e-06, "loss": 0.081, "step": 48356 }, { "epoch": 0.8563800820837041, "grad_norm": 1.0175408124923706, "learning_rate": 1.5934373650272228e-06, "loss": 0.0666, "step": 48357 }, { "epoch": 0.8563977916207326, "grad_norm": 0.4188251793384552, "learning_rate": 1.5930514930024563e-06, "loss": 0.0481, "step": 48358 }, { "epoch": 0.856415501157761, "grad_norm": 0.5764959454536438, "learning_rate": 1.5926656650851944e-06, "loss": 0.0557, "step": 48359 }, { "epoch": 0.8564332106947894, "grad_norm": 0.45681145787239075, "learning_rate": 1.592279881276706e-06, "loss": 0.0586, "step": 48360 }, { "epoch": 0.8564509202318178, "grad_norm": 0.3448670506477356, "learning_rate": 1.5918941415782645e-06, "loss": 0.0381, "step": 48361 }, { "epoch": 0.8564686297688463, "grad_norm": 0.6094405651092529, "learning_rate": 1.5915084459911327e-06, "loss": 0.0585, "step": 48362 }, { "epoch": 0.8564863393058747, "grad_norm": 0.44845303893089294, "learning_rate": 1.5911227945165812e-06, "loss": 0.0473, "step": 48363 }, { "epoch": 0.8565040488429031, "grad_norm": 0.4816831648349762, "learning_rate": 1.590737187155879e-06, "loss": 0.0287, "step": 48364 }, { "epoch": 0.8565217583799316, "grad_norm": 0.5947635173797607, "learning_rate": 1.5903516239102983e-06, "loss": 0.0544, "step": 48365 }, { "epoch": 0.85653946791696, "grad_norm": 0.5879095792770386, "learning_rate": 1.5899661047811015e-06, "loss": 0.0594, "step": 48366 }, { "epoch": 0.8565571774539884, "grad_norm": 0.7574335932731628, "learning_rate": 1.589580629769561e-06, "loss": 0.0687, "step": 48367 }, { "epoch": 0.8565748869910168, "grad_norm": 0.37314677238464355, "learning_rate": 1.589195198876942e-06, "loss": 0.0749, "step": 48368 }, { "epoch": 0.8565925965280453, "grad_norm": 1.071239948272705, "learning_rate": 1.5888098121045158e-06, "loss": 0.0832, "step": 48369 }, { "epoch": 0.8566103060650737, "grad_norm": 0.5930922627449036, "learning_rate": 1.5884244694535493e-06, "loss": 0.0566, "step": 48370 }, { "epoch": 0.8566280156021021, "grad_norm": 0.6879421472549438, "learning_rate": 1.5880391709253084e-06, "loss": 0.043, "step": 48371 }, { "epoch": 0.8566457251391305, "grad_norm": 0.3834231197834015, "learning_rate": 1.5876539165210652e-06, "loss": 0.0578, "step": 48372 }, { "epoch": 0.856663434676159, "grad_norm": 0.607391893863678, "learning_rate": 1.587268706242082e-06, "loss": 0.0617, "step": 48373 }, { "epoch": 0.8566811442131874, "grad_norm": 0.491451621055603, "learning_rate": 1.5868835400896282e-06, "loss": 0.0483, "step": 48374 }, { "epoch": 0.8566988537502158, "grad_norm": 0.6143106818199158, "learning_rate": 1.5864984180649722e-06, "loss": 0.0688, "step": 48375 }, { "epoch": 0.8567165632872442, "grad_norm": 0.7365536093711853, "learning_rate": 1.5861133401693783e-06, "loss": 0.0597, "step": 48376 }, { "epoch": 0.8567342728242727, "grad_norm": 0.48315155506134033, "learning_rate": 1.585728306404114e-06, "loss": 0.0378, "step": 48377 }, { "epoch": 0.8567519823613011, "grad_norm": 0.634771466255188, "learning_rate": 1.585343316770448e-06, "loss": 0.0652, "step": 48378 }, { "epoch": 0.8567696918983295, "grad_norm": 0.5052488446235657, "learning_rate": 1.5849583712696442e-06, "loss": 0.0421, "step": 48379 }, { "epoch": 0.856787401435358, "grad_norm": 0.45553871989250183, "learning_rate": 1.584573469902972e-06, "loss": 0.0531, "step": 48380 }, { "epoch": 0.8568051109723864, "grad_norm": 0.6040341258049011, "learning_rate": 1.5841886126716948e-06, "loss": 0.0418, "step": 48381 }, { "epoch": 0.8568228205094148, "grad_norm": 0.5778940320014954, "learning_rate": 1.5838037995770839e-06, "loss": 0.051, "step": 48382 }, { "epoch": 0.8568405300464432, "grad_norm": 0.8465243577957153, "learning_rate": 1.5834190306203978e-06, "loss": 0.0595, "step": 48383 }, { "epoch": 0.8568582395834717, "grad_norm": 0.5485846996307373, "learning_rate": 1.5830343058029057e-06, "loss": 0.0612, "step": 48384 }, { "epoch": 0.8568759491205001, "grad_norm": 0.2530606985092163, "learning_rate": 1.5826496251258748e-06, "loss": 0.03, "step": 48385 }, { "epoch": 0.8568936586575285, "grad_norm": 0.47548791766166687, "learning_rate": 1.5822649885905727e-06, "loss": 0.0657, "step": 48386 }, { "epoch": 0.8569113681945569, "grad_norm": 0.7612427473068237, "learning_rate": 1.5818803961982547e-06, "loss": 0.0463, "step": 48387 }, { "epoch": 0.8569290777315854, "grad_norm": 0.7343817949295044, "learning_rate": 1.5814958479501968e-06, "loss": 0.0552, "step": 48388 }, { "epoch": 0.8569467872686138, "grad_norm": 0.8467622399330139, "learning_rate": 1.581111343847661e-06, "loss": 0.0734, "step": 48389 }, { "epoch": 0.8569644968056422, "grad_norm": 0.32908475399017334, "learning_rate": 1.5807268838919132e-06, "loss": 0.0415, "step": 48390 }, { "epoch": 0.8569822063426706, "grad_norm": 0.6336488723754883, "learning_rate": 1.5803424680842137e-06, "loss": 0.0724, "step": 48391 }, { "epoch": 0.8569999158796991, "grad_norm": 0.7900490164756775, "learning_rate": 1.579958096425832e-06, "loss": 0.0695, "step": 48392 }, { "epoch": 0.8570176254167275, "grad_norm": 0.7573105096817017, "learning_rate": 1.5795737689180317e-06, "loss": 0.0431, "step": 48393 }, { "epoch": 0.857035334953756, "grad_norm": 0.3311643898487091, "learning_rate": 1.5791894855620736e-06, "loss": 0.0497, "step": 48394 }, { "epoch": 0.8570530444907845, "grad_norm": 0.6159719228744507, "learning_rate": 1.578805246359225e-06, "loss": 0.0755, "step": 48395 }, { "epoch": 0.8570707540278129, "grad_norm": 0.42283347249031067, "learning_rate": 1.57842105131075e-06, "loss": 0.0422, "step": 48396 }, { "epoch": 0.8570884635648413, "grad_norm": 0.4843575060367584, "learning_rate": 1.5780369004179123e-06, "loss": 0.0488, "step": 48397 }, { "epoch": 0.8571061731018697, "grad_norm": 0.49316492676734924, "learning_rate": 1.5776527936819762e-06, "loss": 0.044, "step": 48398 }, { "epoch": 0.8571238826388982, "grad_norm": 0.33173054456710815, "learning_rate": 1.5772687311042038e-06, "loss": 0.0631, "step": 48399 }, { "epoch": 0.8571415921759266, "grad_norm": 0.7358466386795044, "learning_rate": 1.5768847126858626e-06, "loss": 0.0572, "step": 48400 }, { "epoch": 0.857159301712955, "grad_norm": 0.4523613154888153, "learning_rate": 1.5765007384282099e-06, "loss": 0.0726, "step": 48401 }, { "epoch": 0.8571770112499834, "grad_norm": 0.671395480632782, "learning_rate": 1.5761168083325128e-06, "loss": 0.0521, "step": 48402 }, { "epoch": 0.8571947207870119, "grad_norm": 0.8452854752540588, "learning_rate": 1.5757329224000338e-06, "loss": 0.0858, "step": 48403 }, { "epoch": 0.8572124303240403, "grad_norm": 0.5900613069534302, "learning_rate": 1.5753490806320385e-06, "loss": 0.0548, "step": 48404 }, { "epoch": 0.8572301398610687, "grad_norm": 0.4041203260421753, "learning_rate": 1.5749652830297829e-06, "loss": 0.043, "step": 48405 }, { "epoch": 0.8572478493980971, "grad_norm": 0.7759423851966858, "learning_rate": 1.5745815295945353e-06, "loss": 0.033, "step": 48406 }, { "epoch": 0.8572655589351256, "grad_norm": 0.581706702709198, "learning_rate": 1.5741978203275553e-06, "loss": 0.0624, "step": 48407 }, { "epoch": 0.857283268472154, "grad_norm": 0.7735551595687866, "learning_rate": 1.5738141552301067e-06, "loss": 0.0435, "step": 48408 }, { "epoch": 0.8573009780091824, "grad_norm": 0.5925207138061523, "learning_rate": 1.5734305343034517e-06, "loss": 0.0598, "step": 48409 }, { "epoch": 0.8573186875462109, "grad_norm": 0.784808337688446, "learning_rate": 1.5730469575488526e-06, "loss": 0.0852, "step": 48410 }, { "epoch": 0.8573363970832393, "grad_norm": 0.5965703129768372, "learning_rate": 1.5726634249675737e-06, "loss": 0.0568, "step": 48411 }, { "epoch": 0.8573541066202677, "grad_norm": 0.41286179423332214, "learning_rate": 1.5722799365608703e-06, "loss": 0.0626, "step": 48412 }, { "epoch": 0.8573718161572961, "grad_norm": 0.6743369102478027, "learning_rate": 1.5718964923300084e-06, "loss": 0.0704, "step": 48413 }, { "epoch": 0.8573895256943246, "grad_norm": 0.6317995190620422, "learning_rate": 1.5715130922762517e-06, "loss": 0.0646, "step": 48414 }, { "epoch": 0.857407235231353, "grad_norm": 0.5148085951805115, "learning_rate": 1.571129736400856e-06, "loss": 0.0434, "step": 48415 }, { "epoch": 0.8574249447683814, "grad_norm": 0.5599604249000549, "learning_rate": 1.5707464247050835e-06, "loss": 0.0445, "step": 48416 }, { "epoch": 0.8574426543054098, "grad_norm": 0.6691621541976929, "learning_rate": 1.5703631571901984e-06, "loss": 0.0378, "step": 48417 }, { "epoch": 0.8574603638424383, "grad_norm": 0.9935517311096191, "learning_rate": 1.5699799338574628e-06, "loss": 0.0607, "step": 48418 }, { "epoch": 0.8574780733794667, "grad_norm": 0.6384130120277405, "learning_rate": 1.5695967547081325e-06, "loss": 0.0398, "step": 48419 }, { "epoch": 0.8574957829164951, "grad_norm": 0.7209140062332153, "learning_rate": 1.5692136197434714e-06, "loss": 0.0563, "step": 48420 }, { "epoch": 0.8575134924535235, "grad_norm": 0.6223021149635315, "learning_rate": 1.568830528964737e-06, "loss": 0.0433, "step": 48421 }, { "epoch": 0.857531201990552, "grad_norm": 0.4927254021167755, "learning_rate": 1.5684474823731965e-06, "loss": 0.0598, "step": 48422 }, { "epoch": 0.8575489115275804, "grad_norm": 0.8998358845710754, "learning_rate": 1.5680644799701006e-06, "loss": 0.0649, "step": 48423 }, { "epoch": 0.8575666210646088, "grad_norm": 0.6685179471969604, "learning_rate": 1.5676815217567147e-06, "loss": 0.0481, "step": 48424 }, { "epoch": 0.8575843306016373, "grad_norm": 0.7893729209899902, "learning_rate": 1.5672986077342983e-06, "loss": 0.0601, "step": 48425 }, { "epoch": 0.8576020401386657, "grad_norm": 0.5684301257133484, "learning_rate": 1.5669157379041115e-06, "loss": 0.0579, "step": 48426 }, { "epoch": 0.8576197496756941, "grad_norm": 0.5657396912574768, "learning_rate": 1.5665329122674122e-06, "loss": 0.0598, "step": 48427 }, { "epoch": 0.8576374592127225, "grad_norm": 0.49926885962486267, "learning_rate": 1.5661501308254623e-06, "loss": 0.0396, "step": 48428 }, { "epoch": 0.857655168749751, "grad_norm": 0.5859932899475098, "learning_rate": 1.5657673935795208e-06, "loss": 0.0517, "step": 48429 }, { "epoch": 0.8576728782867794, "grad_norm": 0.18381349742412567, "learning_rate": 1.5653847005308437e-06, "loss": 0.032, "step": 48430 }, { "epoch": 0.8576905878238078, "grad_norm": 0.44146808981895447, "learning_rate": 1.5650020516806913e-06, "loss": 0.0596, "step": 48431 }, { "epoch": 0.8577082973608362, "grad_norm": 0.8913219571113586, "learning_rate": 1.5646194470303277e-06, "loss": 0.0601, "step": 48432 }, { "epoch": 0.8577260068978647, "grad_norm": 0.5603565573692322, "learning_rate": 1.5642368865810036e-06, "loss": 0.0463, "step": 48433 }, { "epoch": 0.8577437164348931, "grad_norm": 0.5131874680519104, "learning_rate": 1.5638543703339813e-06, "loss": 0.031, "step": 48434 }, { "epoch": 0.8577614259719215, "grad_norm": 0.706731915473938, "learning_rate": 1.5634718982905184e-06, "loss": 0.052, "step": 48435 }, { "epoch": 0.8577791355089499, "grad_norm": 0.6147817373275757, "learning_rate": 1.563089470451875e-06, "loss": 0.0373, "step": 48436 }, { "epoch": 0.8577968450459784, "grad_norm": 0.5425767302513123, "learning_rate": 1.5627070868193072e-06, "loss": 0.0337, "step": 48437 }, { "epoch": 0.8578145545830068, "grad_norm": 0.7547354698181152, "learning_rate": 1.5623247473940755e-06, "loss": 0.0526, "step": 48438 }, { "epoch": 0.8578322641200352, "grad_norm": 1.0349518060684204, "learning_rate": 1.5619424521774372e-06, "loss": 0.0974, "step": 48439 }, { "epoch": 0.8578499736570637, "grad_norm": 0.6336364150047302, "learning_rate": 1.561560201170648e-06, "loss": 0.0461, "step": 48440 }, { "epoch": 0.8578676831940921, "grad_norm": 0.707211434841156, "learning_rate": 1.5611779943749654e-06, "loss": 0.0582, "step": 48441 }, { "epoch": 0.8578853927311205, "grad_norm": 0.9988144040107727, "learning_rate": 1.5607958317916482e-06, "loss": 0.0731, "step": 48442 }, { "epoch": 0.8579031022681489, "grad_norm": 0.5236201286315918, "learning_rate": 1.5604137134219554e-06, "loss": 0.0606, "step": 48443 }, { "epoch": 0.8579208118051774, "grad_norm": 0.6184082627296448, "learning_rate": 1.560031639267141e-06, "loss": 0.0478, "step": 48444 }, { "epoch": 0.8579385213422058, "grad_norm": 0.6120446920394897, "learning_rate": 1.559649609328459e-06, "loss": 0.0503, "step": 48445 }, { "epoch": 0.8579562308792342, "grad_norm": 0.7747011184692383, "learning_rate": 1.5592676236071767e-06, "loss": 0.0511, "step": 48446 }, { "epoch": 0.8579739404162626, "grad_norm": 0.47131940722465515, "learning_rate": 1.5588856821045416e-06, "loss": 0.0534, "step": 48447 }, { "epoch": 0.8579916499532911, "grad_norm": 0.8234347105026245, "learning_rate": 1.5585037848218125e-06, "loss": 0.0763, "step": 48448 }, { "epoch": 0.8580093594903195, "grad_norm": 0.6145737767219543, "learning_rate": 1.5581219317602468e-06, "loss": 0.0416, "step": 48449 }, { "epoch": 0.8580270690273479, "grad_norm": 0.33964359760284424, "learning_rate": 1.5577401229211036e-06, "loss": 0.0503, "step": 48450 }, { "epoch": 0.8580447785643763, "grad_norm": 0.7789707779884338, "learning_rate": 1.5573583583056318e-06, "loss": 0.0785, "step": 48451 }, { "epoch": 0.8580624881014048, "grad_norm": 0.5439966917037964, "learning_rate": 1.5569766379150918e-06, "loss": 0.0488, "step": 48452 }, { "epoch": 0.8580801976384332, "grad_norm": 0.9252006411552429, "learning_rate": 1.5565949617507396e-06, "loss": 0.0772, "step": 48453 }, { "epoch": 0.8580979071754616, "grad_norm": 0.41206541657447815, "learning_rate": 1.556213329813831e-06, "loss": 0.0633, "step": 48454 }, { "epoch": 0.8581156167124901, "grad_norm": 0.9531115889549255, "learning_rate": 1.5558317421056194e-06, "loss": 0.0774, "step": 48455 }, { "epoch": 0.8581333262495185, "grad_norm": 0.7320114970207214, "learning_rate": 1.5554501986273613e-06, "loss": 0.0808, "step": 48456 }, { "epoch": 0.858151035786547, "grad_norm": 0.6946337223052979, "learning_rate": 1.5550686993803165e-06, "loss": 0.0638, "step": 48457 }, { "epoch": 0.8581687453235753, "grad_norm": 0.7750273942947388, "learning_rate": 1.5546872443657313e-06, "loss": 0.0639, "step": 48458 }, { "epoch": 0.8581864548606039, "grad_norm": 0.3919742703437805, "learning_rate": 1.5543058335848658e-06, "loss": 0.0342, "step": 48459 }, { "epoch": 0.8582041643976323, "grad_norm": 0.7243895530700684, "learning_rate": 1.5539244670389745e-06, "loss": 0.0425, "step": 48460 }, { "epoch": 0.8582218739346607, "grad_norm": 0.5096859335899353, "learning_rate": 1.5535431447293147e-06, "loss": 0.0264, "step": 48461 }, { "epoch": 0.858239583471689, "grad_norm": 0.2514970004558563, "learning_rate": 1.5531618666571351e-06, "loss": 0.0328, "step": 48462 }, { "epoch": 0.8582572930087176, "grad_norm": 0.9596647620201111, "learning_rate": 1.5527806328236932e-06, "loss": 0.0615, "step": 48463 }, { "epoch": 0.858275002545746, "grad_norm": 0.5706255435943604, "learning_rate": 1.5523994432302413e-06, "loss": 0.0617, "step": 48464 }, { "epoch": 0.8582927120827744, "grad_norm": 0.7919673919677734, "learning_rate": 1.5520182978780368e-06, "loss": 0.0429, "step": 48465 }, { "epoch": 0.8583104216198028, "grad_norm": 0.7563979029655457, "learning_rate": 1.5516371967683319e-06, "loss": 0.096, "step": 48466 }, { "epoch": 0.8583281311568313, "grad_norm": 1.031133770942688, "learning_rate": 1.5512561399023788e-06, "loss": 0.0499, "step": 48467 }, { "epoch": 0.8583458406938597, "grad_norm": 0.5446734428405762, "learning_rate": 1.550875127281437e-06, "loss": 0.0449, "step": 48468 }, { "epoch": 0.8583635502308881, "grad_norm": 0.4913758337497711, "learning_rate": 1.5504941589067517e-06, "loss": 0.05, "step": 48469 }, { "epoch": 0.8583812597679166, "grad_norm": 0.6651251912117004, "learning_rate": 1.5501132347795822e-06, "loss": 0.0677, "step": 48470 }, { "epoch": 0.858398969304945, "grad_norm": 0.4324833154678345, "learning_rate": 1.5497323549011804e-06, "loss": 0.0528, "step": 48471 }, { "epoch": 0.8584166788419734, "grad_norm": 0.6234728693962097, "learning_rate": 1.5493515192727975e-06, "loss": 0.0408, "step": 48472 }, { "epoch": 0.8584343883790018, "grad_norm": 0.3853164315223694, "learning_rate": 1.548970727895687e-06, "loss": 0.0307, "step": 48473 }, { "epoch": 0.8584520979160303, "grad_norm": 0.42117005586624146, "learning_rate": 1.5485899807711002e-06, "loss": 0.0453, "step": 48474 }, { "epoch": 0.8584698074530587, "grad_norm": 0.4611189365386963, "learning_rate": 1.5482092779002954e-06, "loss": 0.0477, "step": 48475 }, { "epoch": 0.8584875169900871, "grad_norm": 0.42006152868270874, "learning_rate": 1.5478286192845202e-06, "loss": 0.034, "step": 48476 }, { "epoch": 0.8585052265271155, "grad_norm": 0.3461296856403351, "learning_rate": 1.5474480049250289e-06, "loss": 0.0434, "step": 48477 }, { "epoch": 0.858522936064144, "grad_norm": 0.5129451751708984, "learning_rate": 1.547067434823075e-06, "loss": 0.0493, "step": 48478 }, { "epoch": 0.8585406456011724, "grad_norm": 0.9736839532852173, "learning_rate": 1.5466869089799063e-06, "loss": 0.0463, "step": 48479 }, { "epoch": 0.8585583551382008, "grad_norm": 0.614082396030426, "learning_rate": 1.5463064273967763e-06, "loss": 0.059, "step": 48480 }, { "epoch": 0.8585760646752292, "grad_norm": 1.0167813301086426, "learning_rate": 1.5459259900749378e-06, "loss": 0.0652, "step": 48481 }, { "epoch": 0.8585937742122577, "grad_norm": 0.45197051763534546, "learning_rate": 1.5455455970156413e-06, "loss": 0.0705, "step": 48482 }, { "epoch": 0.8586114837492861, "grad_norm": 0.8803799152374268, "learning_rate": 1.545165248220139e-06, "loss": 0.0392, "step": 48483 }, { "epoch": 0.8586291932863145, "grad_norm": 0.6280434131622314, "learning_rate": 1.5447849436896832e-06, "loss": 0.0464, "step": 48484 }, { "epoch": 0.858646902823343, "grad_norm": 0.7801451683044434, "learning_rate": 1.5444046834255266e-06, "loss": 0.0492, "step": 48485 }, { "epoch": 0.8586646123603714, "grad_norm": 0.5805729031562805, "learning_rate": 1.5440244674289145e-06, "loss": 0.0567, "step": 48486 }, { "epoch": 0.8586823218973998, "grad_norm": 0.38177815079689026, "learning_rate": 1.543644295701101e-06, "loss": 0.0507, "step": 48487 }, { "epoch": 0.8587000314344282, "grad_norm": 0.8549968004226685, "learning_rate": 1.5432641682433368e-06, "loss": 0.0604, "step": 48488 }, { "epoch": 0.8587177409714567, "grad_norm": 0.9094500541687012, "learning_rate": 1.5428840850568743e-06, "loss": 0.0474, "step": 48489 }, { "epoch": 0.8587354505084851, "grad_norm": 0.5992032885551453, "learning_rate": 1.5425040461429606e-06, "loss": 0.0699, "step": 48490 }, { "epoch": 0.8587531600455135, "grad_norm": 0.5002512335777283, "learning_rate": 1.5421240515028467e-06, "loss": 0.0651, "step": 48491 }, { "epoch": 0.8587708695825419, "grad_norm": 0.9790782332420349, "learning_rate": 1.541744101137783e-06, "loss": 0.0669, "step": 48492 }, { "epoch": 0.8587885791195704, "grad_norm": 0.6730038523674011, "learning_rate": 1.5413641950490203e-06, "loss": 0.0256, "step": 48493 }, { "epoch": 0.8588062886565988, "grad_norm": 0.5188204050064087, "learning_rate": 1.5409843332378093e-06, "loss": 0.0591, "step": 48494 }, { "epoch": 0.8588239981936272, "grad_norm": 0.7541248202323914, "learning_rate": 1.5406045157053973e-06, "loss": 0.0433, "step": 48495 }, { "epoch": 0.8588417077306556, "grad_norm": 0.6098155379295349, "learning_rate": 1.540224742453038e-06, "loss": 0.0704, "step": 48496 }, { "epoch": 0.8588594172676841, "grad_norm": 0.5587857365608215, "learning_rate": 1.539845013481976e-06, "loss": 0.0498, "step": 48497 }, { "epoch": 0.8588771268047125, "grad_norm": 0.6428380012512207, "learning_rate": 1.5394653287934613e-06, "loss": 0.0807, "step": 48498 }, { "epoch": 0.8588948363417409, "grad_norm": 0.7285224795341492, "learning_rate": 1.539085688388745e-06, "loss": 0.0393, "step": 48499 }, { "epoch": 0.8589125458787694, "grad_norm": 0.49922698736190796, "learning_rate": 1.5387060922690776e-06, "loss": 0.0626, "step": 48500 }, { "epoch": 0.8589302554157978, "grad_norm": 0.5112382769584656, "learning_rate": 1.5383265404357032e-06, "loss": 0.0386, "step": 48501 }, { "epoch": 0.8589479649528262, "grad_norm": 0.4172895848751068, "learning_rate": 1.5379470328898725e-06, "loss": 0.0327, "step": 48502 }, { "epoch": 0.8589656744898546, "grad_norm": 0.6391692161560059, "learning_rate": 1.5375675696328362e-06, "loss": 0.0433, "step": 48503 }, { "epoch": 0.8589833840268831, "grad_norm": 0.34882453083992004, "learning_rate": 1.5371881506658398e-06, "loss": 0.0502, "step": 48504 }, { "epoch": 0.8590010935639115, "grad_norm": 0.4897383153438568, "learning_rate": 1.5368087759901322e-06, "loss": 0.0361, "step": 48505 }, { "epoch": 0.8590188031009399, "grad_norm": 0.7368823289871216, "learning_rate": 1.5364294456069612e-06, "loss": 0.054, "step": 48506 }, { "epoch": 0.8590365126379683, "grad_norm": 0.4470532536506653, "learning_rate": 1.5360501595175803e-06, "loss": 0.0508, "step": 48507 }, { "epoch": 0.8590542221749968, "grad_norm": 0.1453734040260315, "learning_rate": 1.5356709177232287e-06, "loss": 0.0443, "step": 48508 }, { "epoch": 0.8590719317120252, "grad_norm": 1.2384063005447388, "learning_rate": 1.5352917202251587e-06, "loss": 0.0931, "step": 48509 }, { "epoch": 0.8590896412490536, "grad_norm": 0.7704496383666992, "learning_rate": 1.5349125670246195e-06, "loss": 0.0502, "step": 48510 }, { "epoch": 0.859107350786082, "grad_norm": 0.9436314105987549, "learning_rate": 1.5345334581228498e-06, "loss": 0.0743, "step": 48511 }, { "epoch": 0.8591250603231105, "grad_norm": 0.5474034547805786, "learning_rate": 1.5341543935211056e-06, "loss": 0.0593, "step": 48512 }, { "epoch": 0.8591427698601389, "grad_norm": 0.770984411239624, "learning_rate": 1.5337753732206321e-06, "loss": 0.0759, "step": 48513 }, { "epoch": 0.8591604793971673, "grad_norm": 0.8088493943214417, "learning_rate": 1.5333963972226788e-06, "loss": 0.05, "step": 48514 }, { "epoch": 0.8591781889341958, "grad_norm": 0.41618776321411133, "learning_rate": 1.5330174655284862e-06, "loss": 0.0779, "step": 48515 }, { "epoch": 0.8591958984712242, "grad_norm": 0.3100229799747467, "learning_rate": 1.5326385781393049e-06, "loss": 0.0612, "step": 48516 }, { "epoch": 0.8592136080082526, "grad_norm": 0.45774951577186584, "learning_rate": 1.5322597350563822e-06, "loss": 0.0815, "step": 48517 }, { "epoch": 0.859231317545281, "grad_norm": 0.38676273822784424, "learning_rate": 1.5318809362809604e-06, "loss": 0.0282, "step": 48518 }, { "epoch": 0.8592490270823095, "grad_norm": 0.2735002040863037, "learning_rate": 1.5315021818142871e-06, "loss": 0.0449, "step": 48519 }, { "epoch": 0.859266736619338, "grad_norm": 0.5479046106338501, "learning_rate": 1.5311234716576111e-06, "loss": 0.0753, "step": 48520 }, { "epoch": 0.8592844461563663, "grad_norm": 0.6850792169570923, "learning_rate": 1.5307448058121748e-06, "loss": 0.0538, "step": 48521 }, { "epoch": 0.8593021556933947, "grad_norm": 0.5334681868553162, "learning_rate": 1.530366184279227e-06, "loss": 0.0683, "step": 48522 }, { "epoch": 0.8593198652304233, "grad_norm": 0.8888779878616333, "learning_rate": 1.529987607060012e-06, "loss": 0.0574, "step": 48523 }, { "epoch": 0.8593375747674517, "grad_norm": 0.8750950694084167, "learning_rate": 1.5296090741557772e-06, "loss": 0.0486, "step": 48524 }, { "epoch": 0.85935528430448, "grad_norm": 0.7047216296195984, "learning_rate": 1.5292305855677647e-06, "loss": 0.0575, "step": 48525 }, { "epoch": 0.8593729938415084, "grad_norm": 1.082268476486206, "learning_rate": 1.52885214129722e-06, "loss": 0.0784, "step": 48526 }, { "epoch": 0.859390703378537, "grad_norm": 0.46789631247520447, "learning_rate": 1.5284737413453908e-06, "loss": 0.0576, "step": 48527 }, { "epoch": 0.8594084129155654, "grad_norm": 0.4939376413822174, "learning_rate": 1.528095385713521e-06, "loss": 0.0572, "step": 48528 }, { "epoch": 0.8594261224525938, "grad_norm": 0.5683611631393433, "learning_rate": 1.527717074402853e-06, "loss": 0.0591, "step": 48529 }, { "epoch": 0.8594438319896223, "grad_norm": 0.6346588730812073, "learning_rate": 1.5273388074146339e-06, "loss": 0.0713, "step": 48530 }, { "epoch": 0.8594615415266507, "grad_norm": 0.6728652119636536, "learning_rate": 1.5269605847501062e-06, "loss": 0.0822, "step": 48531 }, { "epoch": 0.8594792510636791, "grad_norm": 0.3739544153213501, "learning_rate": 1.5265824064105154e-06, "loss": 0.0513, "step": 48532 }, { "epoch": 0.8594969606007075, "grad_norm": 0.5264652967453003, "learning_rate": 1.5262042723971076e-06, "loss": 0.0568, "step": 48533 }, { "epoch": 0.859514670137736, "grad_norm": 0.7592443823814392, "learning_rate": 1.525826182711123e-06, "loss": 0.0554, "step": 48534 }, { "epoch": 0.8595323796747644, "grad_norm": 0.9870969653129578, "learning_rate": 1.525448137353811e-06, "loss": 0.0656, "step": 48535 }, { "epoch": 0.8595500892117928, "grad_norm": 0.6176568865776062, "learning_rate": 1.5250701363264085e-06, "loss": 0.0611, "step": 48536 }, { "epoch": 0.8595677987488212, "grad_norm": 0.34191855788230896, "learning_rate": 1.5246921796301632e-06, "loss": 0.035, "step": 48537 }, { "epoch": 0.8595855082858497, "grad_norm": 0.5461338758468628, "learning_rate": 1.5243142672663175e-06, "loss": 0.0511, "step": 48538 }, { "epoch": 0.8596032178228781, "grad_norm": 0.5741933584213257, "learning_rate": 1.5239363992361165e-06, "loss": 0.053, "step": 48539 }, { "epoch": 0.8596209273599065, "grad_norm": 0.42919620871543884, "learning_rate": 1.5235585755407982e-06, "loss": 0.0215, "step": 48540 }, { "epoch": 0.8596386368969349, "grad_norm": 0.6000073552131653, "learning_rate": 1.5231807961816097e-06, "loss": 0.0325, "step": 48541 }, { "epoch": 0.8596563464339634, "grad_norm": 0.4802347719669342, "learning_rate": 1.522803061159798e-06, "loss": 0.0325, "step": 48542 }, { "epoch": 0.8596740559709918, "grad_norm": 0.5768892168998718, "learning_rate": 1.5224253704765979e-06, "loss": 0.055, "step": 48543 }, { "epoch": 0.8596917655080202, "grad_norm": 0.8775882124900818, "learning_rate": 1.5220477241332543e-06, "loss": 0.0893, "step": 48544 }, { "epoch": 0.8597094750450487, "grad_norm": 0.7467602491378784, "learning_rate": 1.5216701221310098e-06, "loss": 0.0471, "step": 48545 }, { "epoch": 0.8597271845820771, "grad_norm": 0.38336828351020813, "learning_rate": 1.5212925644711118e-06, "loss": 0.0367, "step": 48546 }, { "epoch": 0.8597448941191055, "grad_norm": 0.49731937050819397, "learning_rate": 1.5209150511547942e-06, "loss": 0.0621, "step": 48547 }, { "epoch": 0.8597626036561339, "grad_norm": 0.5001323819160461, "learning_rate": 1.5205375821833029e-06, "loss": 0.0555, "step": 48548 }, { "epoch": 0.8597803131931624, "grad_norm": 0.2581596374511719, "learning_rate": 1.52016015755788e-06, "loss": 0.0664, "step": 48549 }, { "epoch": 0.8597980227301908, "grad_norm": 0.9321964383125305, "learning_rate": 1.519782777279768e-06, "loss": 0.0707, "step": 48550 }, { "epoch": 0.8598157322672192, "grad_norm": 0.7152893543243408, "learning_rate": 1.5194054413502057e-06, "loss": 0.0579, "step": 48551 }, { "epoch": 0.8598334418042476, "grad_norm": 0.5098767280578613, "learning_rate": 1.5190281497704357e-06, "loss": 0.0593, "step": 48552 }, { "epoch": 0.8598511513412761, "grad_norm": 0.6191819906234741, "learning_rate": 1.5186509025417017e-06, "loss": 0.0508, "step": 48553 }, { "epoch": 0.8598688608783045, "grad_norm": 0.4459203779697418, "learning_rate": 1.5182736996652414e-06, "loss": 0.0433, "step": 48554 }, { "epoch": 0.8598865704153329, "grad_norm": 0.7596091032028198, "learning_rate": 1.5178965411422968e-06, "loss": 0.0698, "step": 48555 }, { "epoch": 0.8599042799523613, "grad_norm": 0.6180038452148438, "learning_rate": 1.517519426974112e-06, "loss": 0.0763, "step": 48556 }, { "epoch": 0.8599219894893898, "grad_norm": 0.6156378388404846, "learning_rate": 1.5171423571619214e-06, "loss": 0.0522, "step": 48557 }, { "epoch": 0.8599396990264182, "grad_norm": 0.49240046739578247, "learning_rate": 1.5167653317069686e-06, "loss": 0.0312, "step": 48558 }, { "epoch": 0.8599574085634466, "grad_norm": 0.38303086161613464, "learning_rate": 1.5163883506104942e-06, "loss": 0.0549, "step": 48559 }, { "epoch": 0.8599751181004751, "grad_norm": 0.43160298466682434, "learning_rate": 1.5160114138737373e-06, "loss": 0.0482, "step": 48560 }, { "epoch": 0.8599928276375035, "grad_norm": 0.5846404433250427, "learning_rate": 1.5156345214979405e-06, "loss": 0.0431, "step": 48561 }, { "epoch": 0.8600105371745319, "grad_norm": 0.4801837205886841, "learning_rate": 1.5152576734843426e-06, "loss": 0.055, "step": 48562 }, { "epoch": 0.8600282467115603, "grad_norm": 0.592972993850708, "learning_rate": 1.5148808698341827e-06, "loss": 0.0531, "step": 48563 }, { "epoch": 0.8600459562485888, "grad_norm": 0.267461359500885, "learning_rate": 1.5145041105487028e-06, "loss": 0.0543, "step": 48564 }, { "epoch": 0.8600636657856172, "grad_norm": 0.6959390044212341, "learning_rate": 1.5141273956291373e-06, "loss": 0.0721, "step": 48565 }, { "epoch": 0.8600813753226456, "grad_norm": 0.8319501280784607, "learning_rate": 1.5137507250767303e-06, "loss": 0.0675, "step": 48566 }, { "epoch": 0.860099084859674, "grad_norm": 0.44323739409446716, "learning_rate": 1.5133740988927203e-06, "loss": 0.0499, "step": 48567 }, { "epoch": 0.8601167943967025, "grad_norm": 0.6417495012283325, "learning_rate": 1.5129975170783433e-06, "loss": 0.0323, "step": 48568 }, { "epoch": 0.8601345039337309, "grad_norm": 0.8067941665649414, "learning_rate": 1.5126209796348384e-06, "loss": 0.0621, "step": 48569 }, { "epoch": 0.8601522134707593, "grad_norm": 0.48048481345176697, "learning_rate": 1.5122444865634477e-06, "loss": 0.0348, "step": 48570 }, { "epoch": 0.8601699230077877, "grad_norm": 0.37375277280807495, "learning_rate": 1.5118680378654122e-06, "loss": 0.0615, "step": 48571 }, { "epoch": 0.8601876325448162, "grad_norm": 0.28681209683418274, "learning_rate": 1.5114916335419637e-06, "loss": 0.0332, "step": 48572 }, { "epoch": 0.8602053420818446, "grad_norm": 0.5623531341552734, "learning_rate": 1.5111152735943434e-06, "loss": 0.0604, "step": 48573 }, { "epoch": 0.860223051618873, "grad_norm": 0.5837752819061279, "learning_rate": 1.5107389580237934e-06, "loss": 0.0455, "step": 48574 }, { "epoch": 0.8602407611559015, "grad_norm": 0.5539703369140625, "learning_rate": 1.510362686831543e-06, "loss": 0.0681, "step": 48575 }, { "epoch": 0.8602584706929299, "grad_norm": 0.7472788095474243, "learning_rate": 1.5099864600188356e-06, "loss": 0.0487, "step": 48576 }, { "epoch": 0.8602761802299583, "grad_norm": 0.4683331251144409, "learning_rate": 1.5096102775869092e-06, "loss": 0.0528, "step": 48577 }, { "epoch": 0.8602938897669867, "grad_norm": 0.8876245617866516, "learning_rate": 1.5092341395370008e-06, "loss": 0.0672, "step": 48578 }, { "epoch": 0.8603115993040152, "grad_norm": 0.6548442840576172, "learning_rate": 1.508858045870346e-06, "loss": 0.0623, "step": 48579 }, { "epoch": 0.8603293088410436, "grad_norm": 0.436898797750473, "learning_rate": 1.508481996588184e-06, "loss": 0.054, "step": 48580 }, { "epoch": 0.860347018378072, "grad_norm": 0.29647934436798096, "learning_rate": 1.5081059916917555e-06, "loss": 0.0619, "step": 48581 }, { "epoch": 0.8603647279151004, "grad_norm": 0.5547342896461487, "learning_rate": 1.5077300311822895e-06, "loss": 0.072, "step": 48582 }, { "epoch": 0.860382437452129, "grad_norm": 0.4265742301940918, "learning_rate": 1.5073541150610281e-06, "loss": 0.0634, "step": 48583 }, { "epoch": 0.8604001469891573, "grad_norm": 0.832844614982605, "learning_rate": 1.5069782433292073e-06, "loss": 0.0691, "step": 48584 }, { "epoch": 0.8604178565261857, "grad_norm": 0.5469692945480347, "learning_rate": 1.506602415988066e-06, "loss": 0.0617, "step": 48585 }, { "epoch": 0.8604355660632141, "grad_norm": 0.43420958518981934, "learning_rate": 1.506226633038833e-06, "loss": 0.0447, "step": 48586 }, { "epoch": 0.8604532756002427, "grad_norm": 0.622611939907074, "learning_rate": 1.5058508944827527e-06, "loss": 0.0186, "step": 48587 }, { "epoch": 0.860470985137271, "grad_norm": 0.4674898087978363, "learning_rate": 1.5054752003210554e-06, "loss": 0.061, "step": 48588 }, { "epoch": 0.8604886946742994, "grad_norm": 0.736311137676239, "learning_rate": 1.505099550554982e-06, "loss": 0.0596, "step": 48589 }, { "epoch": 0.860506404211328, "grad_norm": 0.5279315710067749, "learning_rate": 1.5047239451857648e-06, "loss": 0.0503, "step": 48590 }, { "epoch": 0.8605241137483564, "grad_norm": 0.9340145587921143, "learning_rate": 1.504348384214641e-06, "loss": 0.0516, "step": 48591 }, { "epoch": 0.8605418232853848, "grad_norm": 0.8528077602386475, "learning_rate": 1.50397286764285e-06, "loss": 0.0777, "step": 48592 }, { "epoch": 0.8605595328224132, "grad_norm": 0.7639184594154358, "learning_rate": 1.5035973954716187e-06, "loss": 0.0758, "step": 48593 }, { "epoch": 0.8605772423594417, "grad_norm": 0.5002942085266113, "learning_rate": 1.503221967702188e-06, "loss": 0.055, "step": 48594 }, { "epoch": 0.8605949518964701, "grad_norm": 0.36616456508636475, "learning_rate": 1.5028465843357918e-06, "loss": 0.0554, "step": 48595 }, { "epoch": 0.8606126614334985, "grad_norm": 0.3224720358848572, "learning_rate": 1.5024712453736677e-06, "loss": 0.051, "step": 48596 }, { "epoch": 0.8606303709705269, "grad_norm": 0.5819952487945557, "learning_rate": 1.5020959508170445e-06, "loss": 0.068, "step": 48597 }, { "epoch": 0.8606480805075554, "grad_norm": 0.8196623921394348, "learning_rate": 1.501720700667158e-06, "loss": 0.0874, "step": 48598 }, { "epoch": 0.8606657900445838, "grad_norm": 0.8883424401283264, "learning_rate": 1.501345494925252e-06, "loss": 0.0467, "step": 48599 }, { "epoch": 0.8606834995816122, "grad_norm": 0.6337584853172302, "learning_rate": 1.5009703335925505e-06, "loss": 0.0401, "step": 48600 }, { "epoch": 0.8607012091186406, "grad_norm": 0.4048015773296356, "learning_rate": 1.5005952166702897e-06, "loss": 0.0697, "step": 48601 }, { "epoch": 0.8607189186556691, "grad_norm": 0.8260801434516907, "learning_rate": 1.500220144159708e-06, "loss": 0.0489, "step": 48602 }, { "epoch": 0.8607366281926975, "grad_norm": 0.423717200756073, "learning_rate": 1.499845116062038e-06, "loss": 0.0351, "step": 48603 }, { "epoch": 0.8607543377297259, "grad_norm": 0.34260696172714233, "learning_rate": 1.4994701323785088e-06, "loss": 0.0628, "step": 48604 }, { "epoch": 0.8607720472667544, "grad_norm": 0.775607705116272, "learning_rate": 1.4990951931103574e-06, "loss": 0.0989, "step": 48605 }, { "epoch": 0.8607897568037828, "grad_norm": 0.4584779441356659, "learning_rate": 1.4987202982588182e-06, "loss": 0.0301, "step": 48606 }, { "epoch": 0.8608074663408112, "grad_norm": 0.7441040873527527, "learning_rate": 1.4983454478251235e-06, "loss": 0.0526, "step": 48607 }, { "epoch": 0.8608251758778396, "grad_norm": 0.4540150463581085, "learning_rate": 1.4979706418105055e-06, "loss": 0.0582, "step": 48608 }, { "epoch": 0.8608428854148681, "grad_norm": 0.6507959961891174, "learning_rate": 1.4975958802161998e-06, "loss": 0.0465, "step": 48609 }, { "epoch": 0.8608605949518965, "grad_norm": 0.5155251026153564, "learning_rate": 1.4972211630434408e-06, "loss": 0.0592, "step": 48610 }, { "epoch": 0.8608783044889249, "grad_norm": 0.7204909324645996, "learning_rate": 1.4968464902934553e-06, "loss": 0.0605, "step": 48611 }, { "epoch": 0.8608960140259533, "grad_norm": 0.7579523324966431, "learning_rate": 1.4964718619674777e-06, "loss": 0.0606, "step": 48612 }, { "epoch": 0.8609137235629818, "grad_norm": 0.5884291529655457, "learning_rate": 1.496097278066747e-06, "loss": 0.0485, "step": 48613 }, { "epoch": 0.8609314331000102, "grad_norm": 0.5090678930282593, "learning_rate": 1.4957227385924854e-06, "loss": 0.0594, "step": 48614 }, { "epoch": 0.8609491426370386, "grad_norm": 0.60484778881073, "learning_rate": 1.495348243545932e-06, "loss": 0.083, "step": 48615 }, { "epoch": 0.860966852174067, "grad_norm": 0.44217735528945923, "learning_rate": 1.4949737929283158e-06, "loss": 0.0539, "step": 48616 }, { "epoch": 0.8609845617110955, "grad_norm": 0.7264849543571472, "learning_rate": 1.4945993867408709e-06, "loss": 0.0822, "step": 48617 }, { "epoch": 0.8610022712481239, "grad_norm": 0.5148358345031738, "learning_rate": 1.494225024984826e-06, "loss": 0.0446, "step": 48618 }, { "epoch": 0.8610199807851523, "grad_norm": 0.7269976139068604, "learning_rate": 1.4938507076614171e-06, "loss": 0.0641, "step": 48619 }, { "epoch": 0.8610376903221808, "grad_norm": 0.6354349255561829, "learning_rate": 1.493476434771875e-06, "loss": 0.0478, "step": 48620 }, { "epoch": 0.8610553998592092, "grad_norm": 0.7969744205474854, "learning_rate": 1.4931022063174266e-06, "loss": 0.0729, "step": 48621 }, { "epoch": 0.8610731093962376, "grad_norm": 0.4915315806865692, "learning_rate": 1.4927280222993062e-06, "loss": 0.0773, "step": 48622 }, { "epoch": 0.861090818933266, "grad_norm": 0.616237998008728, "learning_rate": 1.4923538827187444e-06, "loss": 0.034, "step": 48623 }, { "epoch": 0.8611085284702945, "grad_norm": 0.6067892909049988, "learning_rate": 1.4919797875769736e-06, "loss": 0.0594, "step": 48624 }, { "epoch": 0.8611262380073229, "grad_norm": 0.6169929504394531, "learning_rate": 1.4916057368752212e-06, "loss": 0.0475, "step": 48625 }, { "epoch": 0.8611439475443513, "grad_norm": 0.5355044603347778, "learning_rate": 1.4912317306147195e-06, "loss": 0.0408, "step": 48626 }, { "epoch": 0.8611616570813797, "grad_norm": 0.5028564929962158, "learning_rate": 1.4908577687967007e-06, "loss": 0.0361, "step": 48627 }, { "epoch": 0.8611793666184082, "grad_norm": 1.0271716117858887, "learning_rate": 1.4904838514223907e-06, "loss": 0.068, "step": 48628 }, { "epoch": 0.8611970761554366, "grad_norm": 0.6722520589828491, "learning_rate": 1.490109978493025e-06, "loss": 0.073, "step": 48629 }, { "epoch": 0.861214785692465, "grad_norm": 0.6544744372367859, "learning_rate": 1.4897361500098293e-06, "loss": 0.0533, "step": 48630 }, { "epoch": 0.8612324952294934, "grad_norm": 0.5293627381324768, "learning_rate": 1.489362365974038e-06, "loss": 0.0493, "step": 48631 }, { "epoch": 0.8612502047665219, "grad_norm": 0.8489498496055603, "learning_rate": 1.4889886263868763e-06, "loss": 0.0843, "step": 48632 }, { "epoch": 0.8612679143035503, "grad_norm": 0.6968385577201843, "learning_rate": 1.488614931249575e-06, "loss": 0.053, "step": 48633 }, { "epoch": 0.8612856238405787, "grad_norm": 0.34340566396713257, "learning_rate": 1.4882412805633633e-06, "loss": 0.0308, "step": 48634 }, { "epoch": 0.8613033333776072, "grad_norm": 0.6842651963233948, "learning_rate": 1.4878676743294717e-06, "loss": 0.0835, "step": 48635 }, { "epoch": 0.8613210429146356, "grad_norm": 0.7264251112937927, "learning_rate": 1.4874941125491293e-06, "loss": 0.084, "step": 48636 }, { "epoch": 0.861338752451664, "grad_norm": 0.6330491304397583, "learning_rate": 1.4871205952235633e-06, "loss": 0.0288, "step": 48637 }, { "epoch": 0.8613564619886924, "grad_norm": 0.7502567172050476, "learning_rate": 1.486747122354008e-06, "loss": 0.0665, "step": 48638 }, { "epoch": 0.8613741715257209, "grad_norm": 0.5927233099937439, "learning_rate": 1.4863736939416838e-06, "loss": 0.0531, "step": 48639 }, { "epoch": 0.8613918810627493, "grad_norm": 0.23957981169223785, "learning_rate": 1.4860003099878234e-06, "loss": 0.0209, "step": 48640 }, { "epoch": 0.8614095905997777, "grad_norm": 0.9464274048805237, "learning_rate": 1.4856269704936553e-06, "loss": 0.0721, "step": 48641 }, { "epoch": 0.8614273001368061, "grad_norm": 0.308290034532547, "learning_rate": 1.4852536754604107e-06, "loss": 0.0652, "step": 48642 }, { "epoch": 0.8614450096738346, "grad_norm": 0.8063679337501526, "learning_rate": 1.4848804248893133e-06, "loss": 0.069, "step": 48643 }, { "epoch": 0.861462719210863, "grad_norm": 0.7851305603981018, "learning_rate": 1.4845072187815906e-06, "loss": 0.0496, "step": 48644 }, { "epoch": 0.8614804287478914, "grad_norm": 0.3607560992240906, "learning_rate": 1.4841340571384716e-06, "loss": 0.0485, "step": 48645 }, { "epoch": 0.8614981382849198, "grad_norm": 0.6566696166992188, "learning_rate": 1.4837609399611852e-06, "loss": 0.0695, "step": 48646 }, { "epoch": 0.8615158478219483, "grad_norm": 0.9588818550109863, "learning_rate": 1.4833878672509587e-06, "loss": 0.0906, "step": 48647 }, { "epoch": 0.8615335573589767, "grad_norm": 0.4120604395866394, "learning_rate": 1.4830148390090181e-06, "loss": 0.0389, "step": 48648 }, { "epoch": 0.8615512668960051, "grad_norm": 0.8379820585250854, "learning_rate": 1.4826418552365955e-06, "loss": 0.0487, "step": 48649 }, { "epoch": 0.8615689764330337, "grad_norm": 0.5235961675643921, "learning_rate": 1.48226891593491e-06, "loss": 0.0519, "step": 48650 }, { "epoch": 0.861586685970062, "grad_norm": 0.4079459309577942, "learning_rate": 1.4818960211051924e-06, "loss": 0.0512, "step": 48651 }, { "epoch": 0.8616043955070904, "grad_norm": 0.3855242431163788, "learning_rate": 1.4815231707486733e-06, "loss": 0.0476, "step": 48652 }, { "epoch": 0.8616221050441188, "grad_norm": 0.6715825796127319, "learning_rate": 1.481150364866573e-06, "loss": 0.0638, "step": 48653 }, { "epoch": 0.8616398145811474, "grad_norm": 0.6514586210250854, "learning_rate": 1.4807776034601196e-06, "loss": 0.0472, "step": 48654 }, { "epoch": 0.8616575241181758, "grad_norm": 0.4807925224304199, "learning_rate": 1.4804048865305414e-06, "loss": 0.0501, "step": 48655 }, { "epoch": 0.8616752336552042, "grad_norm": 0.8864421844482422, "learning_rate": 1.4800322140790629e-06, "loss": 0.0717, "step": 48656 }, { "epoch": 0.8616929431922326, "grad_norm": 0.4446418881416321, "learning_rate": 1.4796595861069129e-06, "loss": 0.0357, "step": 48657 }, { "epoch": 0.8617106527292611, "grad_norm": 0.6485558152198792, "learning_rate": 1.4792870026153138e-06, "loss": 0.039, "step": 48658 }, { "epoch": 0.8617283622662895, "grad_norm": 0.6681450009346008, "learning_rate": 1.4789144636054947e-06, "loss": 0.0609, "step": 48659 }, { "epoch": 0.8617460718033179, "grad_norm": 0.39267614483833313, "learning_rate": 1.4785419690786777e-06, "loss": 0.0552, "step": 48660 }, { "epoch": 0.8617637813403463, "grad_norm": 0.8895496726036072, "learning_rate": 1.4781695190360905e-06, "loss": 0.0671, "step": 48661 }, { "epoch": 0.8617814908773748, "grad_norm": 0.5993396639823914, "learning_rate": 1.4777971134789587e-06, "loss": 0.0658, "step": 48662 }, { "epoch": 0.8617992004144032, "grad_norm": 0.619547426700592, "learning_rate": 1.4774247524085077e-06, "loss": 0.0499, "step": 48663 }, { "epoch": 0.8618169099514316, "grad_norm": 0.7669135928153992, "learning_rate": 1.4770524358259568e-06, "loss": 0.0429, "step": 48664 }, { "epoch": 0.8618346194884601, "grad_norm": 0.47401440143585205, "learning_rate": 1.4766801637325384e-06, "loss": 0.0586, "step": 48665 }, { "epoch": 0.8618523290254885, "grad_norm": 0.39589688181877136, "learning_rate": 1.476307936129473e-06, "loss": 0.0367, "step": 48666 }, { "epoch": 0.8618700385625169, "grad_norm": 0.7472665905952454, "learning_rate": 1.4759357530179913e-06, "loss": 0.0416, "step": 48667 }, { "epoch": 0.8618877480995453, "grad_norm": 0.8483037352561951, "learning_rate": 1.475563614399309e-06, "loss": 0.0501, "step": 48668 }, { "epoch": 0.8619054576365738, "grad_norm": 0.6759529113769531, "learning_rate": 1.4751915202746551e-06, "loss": 0.0549, "step": 48669 }, { "epoch": 0.8619231671736022, "grad_norm": 0.35061877965927124, "learning_rate": 1.4748194706452572e-06, "loss": 0.0262, "step": 48670 }, { "epoch": 0.8619408767106306, "grad_norm": 0.5222746729850769, "learning_rate": 1.4744474655123308e-06, "loss": 0.048, "step": 48671 }, { "epoch": 0.861958586247659, "grad_norm": 0.39906516671180725, "learning_rate": 1.4740755048771048e-06, "loss": 0.0421, "step": 48672 }, { "epoch": 0.8619762957846875, "grad_norm": 1.0209535360336304, "learning_rate": 1.4737035887408017e-06, "loss": 0.0658, "step": 48673 }, { "epoch": 0.8619940053217159, "grad_norm": 0.5110728740692139, "learning_rate": 1.4733317171046472e-06, "loss": 0.0533, "step": 48674 }, { "epoch": 0.8620117148587443, "grad_norm": 0.3782847225666046, "learning_rate": 1.472959889969862e-06, "loss": 0.0379, "step": 48675 }, { "epoch": 0.8620294243957727, "grad_norm": 0.5146258473396301, "learning_rate": 1.4725881073376702e-06, "loss": 0.058, "step": 48676 }, { "epoch": 0.8620471339328012, "grad_norm": 0.7572519779205322, "learning_rate": 1.4722163692092987e-06, "loss": 0.0723, "step": 48677 }, { "epoch": 0.8620648434698296, "grad_norm": 0.4712481200695038, "learning_rate": 1.4718446755859655e-06, "loss": 0.0646, "step": 48678 }, { "epoch": 0.862082553006858, "grad_norm": 0.7949504256248474, "learning_rate": 1.4714730264688925e-06, "loss": 0.0744, "step": 48679 }, { "epoch": 0.8621002625438865, "grad_norm": 0.499738484621048, "learning_rate": 1.4711014218593073e-06, "loss": 0.0522, "step": 48680 }, { "epoch": 0.8621179720809149, "grad_norm": 0.828877329826355, "learning_rate": 1.470729861758432e-06, "loss": 0.0747, "step": 48681 }, { "epoch": 0.8621356816179433, "grad_norm": 0.5759146809577942, "learning_rate": 1.470358346167484e-06, "loss": 0.0577, "step": 48682 }, { "epoch": 0.8621533911549717, "grad_norm": 0.41501787304878235, "learning_rate": 1.4699868750876876e-06, "loss": 0.0414, "step": 48683 }, { "epoch": 0.8621711006920002, "grad_norm": 0.49281296133995056, "learning_rate": 1.4696154485202684e-06, "loss": 0.0638, "step": 48684 }, { "epoch": 0.8621888102290286, "grad_norm": 0.6671199798583984, "learning_rate": 1.4692440664664437e-06, "loss": 0.0655, "step": 48685 }, { "epoch": 0.862206519766057, "grad_norm": 0.673730731010437, "learning_rate": 1.468872728927439e-06, "loss": 0.0403, "step": 48686 }, { "epoch": 0.8622242293030854, "grad_norm": 0.4227461516857147, "learning_rate": 1.4685014359044734e-06, "loss": 0.0531, "step": 48687 }, { "epoch": 0.8622419388401139, "grad_norm": 0.5435025691986084, "learning_rate": 1.4681301873987714e-06, "loss": 0.0521, "step": 48688 }, { "epoch": 0.8622596483771423, "grad_norm": 0.4889770746231079, "learning_rate": 1.4677589834115512e-06, "loss": 0.0726, "step": 48689 }, { "epoch": 0.8622773579141707, "grad_norm": 0.6323726177215576, "learning_rate": 1.4673878239440342e-06, "loss": 0.0499, "step": 48690 }, { "epoch": 0.8622950674511991, "grad_norm": 0.5459016561508179, "learning_rate": 1.4670167089974456e-06, "loss": 0.0402, "step": 48691 }, { "epoch": 0.8623127769882276, "grad_norm": 0.5436658263206482, "learning_rate": 1.4666456385730015e-06, "loss": 0.0517, "step": 48692 }, { "epoch": 0.862330486525256, "grad_norm": 0.319458544254303, "learning_rate": 1.4662746126719206e-06, "loss": 0.0832, "step": 48693 }, { "epoch": 0.8623481960622844, "grad_norm": 0.5595365166664124, "learning_rate": 1.4659036312954304e-06, "loss": 0.0691, "step": 48694 }, { "epoch": 0.8623659055993129, "grad_norm": 0.6395852565765381, "learning_rate": 1.4655326944447516e-06, "loss": 0.0526, "step": 48695 }, { "epoch": 0.8623836151363413, "grad_norm": 0.5993330478668213, "learning_rate": 1.4651618021210983e-06, "loss": 0.069, "step": 48696 }, { "epoch": 0.8624013246733697, "grad_norm": 0.9270256161689758, "learning_rate": 1.4647909543256926e-06, "loss": 0.0671, "step": 48697 }, { "epoch": 0.8624190342103981, "grad_norm": 0.6290728449821472, "learning_rate": 1.4644201510597606e-06, "loss": 0.0537, "step": 48698 }, { "epoch": 0.8624367437474266, "grad_norm": 0.8143661618232727, "learning_rate": 1.4640493923245124e-06, "loss": 0.0465, "step": 48699 }, { "epoch": 0.862454453284455, "grad_norm": 0.40553873777389526, "learning_rate": 1.4636786781211742e-06, "loss": 0.0443, "step": 48700 }, { "epoch": 0.8624721628214834, "grad_norm": 0.6024847626686096, "learning_rate": 1.4633080084509632e-06, "loss": 0.0442, "step": 48701 }, { "epoch": 0.8624898723585118, "grad_norm": 0.6611975431442261, "learning_rate": 1.4629373833150984e-06, "loss": 0.0557, "step": 48702 }, { "epoch": 0.8625075818955403, "grad_norm": 0.26207345724105835, "learning_rate": 1.462566802714802e-06, "loss": 0.046, "step": 48703 }, { "epoch": 0.8625252914325687, "grad_norm": 0.838861882686615, "learning_rate": 1.4621962666512916e-06, "loss": 0.0659, "step": 48704 }, { "epoch": 0.8625430009695971, "grad_norm": 0.7626208662986755, "learning_rate": 1.4618257751257862e-06, "loss": 0.0864, "step": 48705 }, { "epoch": 0.8625607105066255, "grad_norm": 1.0317438840866089, "learning_rate": 1.4614553281395066e-06, "loss": 0.0578, "step": 48706 }, { "epoch": 0.862578420043654, "grad_norm": 0.5631503462791443, "learning_rate": 1.4610849256936682e-06, "loss": 0.0403, "step": 48707 }, { "epoch": 0.8625961295806824, "grad_norm": 0.763511061668396, "learning_rate": 1.46071456778949e-06, "loss": 0.0831, "step": 48708 }, { "epoch": 0.8626138391177108, "grad_norm": 0.5078301429748535, "learning_rate": 1.4603442544281947e-06, "loss": 0.0552, "step": 48709 }, { "epoch": 0.8626315486547393, "grad_norm": 0.5380498170852661, "learning_rate": 1.4599739856109945e-06, "loss": 0.0601, "step": 48710 }, { "epoch": 0.8626492581917677, "grad_norm": 0.5816616415977478, "learning_rate": 1.45960376133911e-06, "loss": 0.0452, "step": 48711 }, { "epoch": 0.8626669677287961, "grad_norm": 0.39670613408088684, "learning_rate": 1.4592335816137604e-06, "loss": 0.0274, "step": 48712 }, { "epoch": 0.8626846772658245, "grad_norm": 0.7058849334716797, "learning_rate": 1.4588634464361628e-06, "loss": 0.0409, "step": 48713 }, { "epoch": 0.862702386802853, "grad_norm": 0.812793493270874, "learning_rate": 1.458493355807533e-06, "loss": 0.0663, "step": 48714 }, { "epoch": 0.8627200963398814, "grad_norm": 0.3510149121284485, "learning_rate": 1.4581233097290919e-06, "loss": 0.0522, "step": 48715 }, { "epoch": 0.8627378058769098, "grad_norm": 0.7903801798820496, "learning_rate": 1.4577533082020583e-06, "loss": 0.0451, "step": 48716 }, { "epoch": 0.8627555154139382, "grad_norm": 0.581521213054657, "learning_rate": 1.457383351227643e-06, "loss": 0.0672, "step": 48717 }, { "epoch": 0.8627732249509668, "grad_norm": 0.38585445284843445, "learning_rate": 1.4570134388070683e-06, "loss": 0.0278, "step": 48718 }, { "epoch": 0.8627909344879952, "grad_norm": 0.647030770778656, "learning_rate": 1.4566435709415483e-06, "loss": 0.0452, "step": 48719 }, { "epoch": 0.8628086440250236, "grad_norm": 0.6360114812850952, "learning_rate": 1.4562737476323034e-06, "loss": 0.0532, "step": 48720 }, { "epoch": 0.862826353562052, "grad_norm": 0.6593388319015503, "learning_rate": 1.4559039688805465e-06, "loss": 0.0715, "step": 48721 }, { "epoch": 0.8628440630990805, "grad_norm": 0.5057827234268188, "learning_rate": 1.4555342346874928e-06, "loss": 0.047, "step": 48722 }, { "epoch": 0.8628617726361089, "grad_norm": 0.662443995475769, "learning_rate": 1.4551645450543665e-06, "loss": 0.0499, "step": 48723 }, { "epoch": 0.8628794821731373, "grad_norm": 0.4831484258174896, "learning_rate": 1.4547948999823768e-06, "loss": 0.0632, "step": 48724 }, { "epoch": 0.8628971917101658, "grad_norm": 0.7152758240699768, "learning_rate": 1.454425299472741e-06, "loss": 0.0487, "step": 48725 }, { "epoch": 0.8629149012471942, "grad_norm": 0.48569074273109436, "learning_rate": 1.454055743526676e-06, "loss": 0.0582, "step": 48726 }, { "epoch": 0.8629326107842226, "grad_norm": 0.3916333019733429, "learning_rate": 1.4536862321453997e-06, "loss": 0.0296, "step": 48727 }, { "epoch": 0.862950320321251, "grad_norm": 0.5355554223060608, "learning_rate": 1.4533167653301244e-06, "loss": 0.0769, "step": 48728 }, { "epoch": 0.8629680298582795, "grad_norm": 0.6372593641281128, "learning_rate": 1.4529473430820656e-06, "loss": 0.0567, "step": 48729 }, { "epoch": 0.8629857393953079, "grad_norm": 0.5212895274162292, "learning_rate": 1.4525779654024407e-06, "loss": 0.0529, "step": 48730 }, { "epoch": 0.8630034489323363, "grad_norm": 0.35655054450035095, "learning_rate": 1.4522086322924638e-06, "loss": 0.0498, "step": 48731 }, { "epoch": 0.8630211584693647, "grad_norm": 0.5390358567237854, "learning_rate": 1.4518393437533489e-06, "loss": 0.0535, "step": 48732 }, { "epoch": 0.8630388680063932, "grad_norm": 0.41161853075027466, "learning_rate": 1.4514700997863134e-06, "loss": 0.05, "step": 48733 }, { "epoch": 0.8630565775434216, "grad_norm": 0.4701199233531952, "learning_rate": 1.4511009003925746e-06, "loss": 0.068, "step": 48734 }, { "epoch": 0.86307428708045, "grad_norm": 0.4267614781856537, "learning_rate": 1.4507317455733399e-06, "loss": 0.1061, "step": 48735 }, { "epoch": 0.8630919966174784, "grad_norm": 0.605193555355072, "learning_rate": 1.4503626353298266e-06, "loss": 0.0362, "step": 48736 }, { "epoch": 0.8631097061545069, "grad_norm": 0.3171175420284271, "learning_rate": 1.4499935696632505e-06, "loss": 0.0414, "step": 48737 }, { "epoch": 0.8631274156915353, "grad_norm": 0.8873176574707031, "learning_rate": 1.4496245485748272e-06, "loss": 0.0799, "step": 48738 }, { "epoch": 0.8631451252285637, "grad_norm": 1.0505211353302002, "learning_rate": 1.4492555720657658e-06, "loss": 0.084, "step": 48739 }, { "epoch": 0.8631628347655922, "grad_norm": 0.6157662868499756, "learning_rate": 1.4488866401372835e-06, "loss": 0.0425, "step": 48740 }, { "epoch": 0.8631805443026206, "grad_norm": 0.5007155537605286, "learning_rate": 1.4485177527905947e-06, "loss": 0.0499, "step": 48741 }, { "epoch": 0.863198253839649, "grad_norm": 0.27632805705070496, "learning_rate": 1.4481489100269097e-06, "loss": 0.0432, "step": 48742 }, { "epoch": 0.8632159633766774, "grad_norm": 0.9414641857147217, "learning_rate": 1.447780111847446e-06, "loss": 0.0578, "step": 48743 }, { "epoch": 0.8632336729137059, "grad_norm": 0.6312264204025269, "learning_rate": 1.4474113582534126e-06, "loss": 0.044, "step": 48744 }, { "epoch": 0.8632513824507343, "grad_norm": 0.3997417390346527, "learning_rate": 1.4470426492460303e-06, "loss": 0.0371, "step": 48745 }, { "epoch": 0.8632690919877627, "grad_norm": 0.1176401674747467, "learning_rate": 1.4466739848265031e-06, "loss": 0.0417, "step": 48746 }, { "epoch": 0.8632868015247911, "grad_norm": 1.0812150239944458, "learning_rate": 1.4463053649960468e-06, "loss": 0.0539, "step": 48747 }, { "epoch": 0.8633045110618196, "grad_norm": 0.3543102443218231, "learning_rate": 1.4459367897558784e-06, "loss": 0.0335, "step": 48748 }, { "epoch": 0.863322220598848, "grad_norm": 0.95557701587677, "learning_rate": 1.4455682591072055e-06, "loss": 0.0679, "step": 48749 }, { "epoch": 0.8633399301358764, "grad_norm": 0.6100998520851135, "learning_rate": 1.4451997730512407e-06, "loss": 0.0663, "step": 48750 }, { "epoch": 0.8633576396729048, "grad_norm": 0.5618383884429932, "learning_rate": 1.4448313315891959e-06, "loss": 0.0656, "step": 48751 }, { "epoch": 0.8633753492099333, "grad_norm": 0.5325912833213806, "learning_rate": 1.4444629347222888e-06, "loss": 0.0439, "step": 48752 }, { "epoch": 0.8633930587469617, "grad_norm": 0.6330717206001282, "learning_rate": 1.4440945824517266e-06, "loss": 0.0643, "step": 48753 }, { "epoch": 0.8634107682839901, "grad_norm": 0.738040566444397, "learning_rate": 1.4437262747787217e-06, "loss": 0.0385, "step": 48754 }, { "epoch": 0.8634284778210186, "grad_norm": 0.5196520686149597, "learning_rate": 1.4433580117044882e-06, "loss": 0.048, "step": 48755 }, { "epoch": 0.863446187358047, "grad_norm": 0.6286499500274658, "learning_rate": 1.4429897932302332e-06, "loss": 0.0365, "step": 48756 }, { "epoch": 0.8634638968950754, "grad_norm": 0.8393523693084717, "learning_rate": 1.4426216193571713e-06, "loss": 0.0695, "step": 48757 }, { "epoch": 0.8634816064321038, "grad_norm": 0.7415509223937988, "learning_rate": 1.4422534900865126e-06, "loss": 0.0674, "step": 48758 }, { "epoch": 0.8634993159691323, "grad_norm": 0.5028730034828186, "learning_rate": 1.441885405419468e-06, "loss": 0.0633, "step": 48759 }, { "epoch": 0.8635170255061607, "grad_norm": 0.45354336500167847, "learning_rate": 1.44151736535725e-06, "loss": 0.0491, "step": 48760 }, { "epoch": 0.8635347350431891, "grad_norm": 0.5483709573745728, "learning_rate": 1.4411493699010674e-06, "loss": 0.0599, "step": 48761 }, { "epoch": 0.8635524445802175, "grad_norm": 0.8633733987808228, "learning_rate": 1.4407814190521362e-06, "loss": 0.056, "step": 48762 }, { "epoch": 0.863570154117246, "grad_norm": 0.792667031288147, "learning_rate": 1.4404135128116586e-06, "loss": 0.0847, "step": 48763 }, { "epoch": 0.8635878636542744, "grad_norm": 0.5397158861160278, "learning_rate": 1.44004565118085e-06, "loss": 0.0449, "step": 48764 }, { "epoch": 0.8636055731913028, "grad_norm": 0.5023564696311951, "learning_rate": 1.4396778341609202e-06, "loss": 0.0536, "step": 48765 }, { "epoch": 0.8636232827283312, "grad_norm": 0.6293080449104309, "learning_rate": 1.4393100617530808e-06, "loss": 0.0464, "step": 48766 }, { "epoch": 0.8636409922653597, "grad_norm": 0.6017420887947083, "learning_rate": 1.4389423339585362e-06, "loss": 0.0513, "step": 48767 }, { "epoch": 0.8636587018023881, "grad_norm": 0.8297766447067261, "learning_rate": 1.4385746507785002e-06, "loss": 0.0468, "step": 48768 }, { "epoch": 0.8636764113394165, "grad_norm": 0.7018601298332214, "learning_rate": 1.438207012214182e-06, "loss": 0.0395, "step": 48769 }, { "epoch": 0.863694120876445, "grad_norm": 0.4874097406864166, "learning_rate": 1.4378394182667909e-06, "loss": 0.0494, "step": 48770 }, { "epoch": 0.8637118304134734, "grad_norm": 0.6256139278411865, "learning_rate": 1.4374718689375372e-06, "loss": 0.0717, "step": 48771 }, { "epoch": 0.8637295399505018, "grad_norm": 0.7765747308731079, "learning_rate": 1.4371043642276282e-06, "loss": 0.0641, "step": 48772 }, { "epoch": 0.8637472494875302, "grad_norm": 0.5524786114692688, "learning_rate": 1.4367369041382783e-06, "loss": 0.0577, "step": 48773 }, { "epoch": 0.8637649590245587, "grad_norm": 0.639232337474823, "learning_rate": 1.4363694886706879e-06, "loss": 0.0413, "step": 48774 }, { "epoch": 0.8637826685615871, "grad_norm": 0.6498496532440186, "learning_rate": 1.4360021178260713e-06, "loss": 0.052, "step": 48775 }, { "epoch": 0.8638003780986155, "grad_norm": 0.9347018599510193, "learning_rate": 1.435634791605634e-06, "loss": 0.0661, "step": 48776 }, { "epoch": 0.8638180876356439, "grad_norm": 0.6924943923950195, "learning_rate": 1.43526751001059e-06, "loss": 0.049, "step": 48777 }, { "epoch": 0.8638357971726724, "grad_norm": 0.7004614472389221, "learning_rate": 1.4349002730421418e-06, "loss": 0.054, "step": 48778 }, { "epoch": 0.8638535067097008, "grad_norm": 0.8572608232498169, "learning_rate": 1.4345330807014984e-06, "loss": 0.079, "step": 48779 }, { "epoch": 0.8638712162467292, "grad_norm": 0.3029385209083557, "learning_rate": 1.4341659329898705e-06, "loss": 0.0205, "step": 48780 }, { "epoch": 0.8638889257837576, "grad_norm": 0.4497954845428467, "learning_rate": 1.4337988299084636e-06, "loss": 0.0415, "step": 48781 }, { "epoch": 0.8639066353207862, "grad_norm": 0.6967448592185974, "learning_rate": 1.4334317714584871e-06, "loss": 0.0513, "step": 48782 }, { "epoch": 0.8639243448578146, "grad_norm": 0.5292695760726929, "learning_rate": 1.4330647576411466e-06, "loss": 0.0476, "step": 48783 }, { "epoch": 0.863942054394843, "grad_norm": 0.5858584642410278, "learning_rate": 1.4326977884576558e-06, "loss": 0.0441, "step": 48784 }, { "epoch": 0.8639597639318715, "grad_norm": 0.6309805512428284, "learning_rate": 1.4323308639092124e-06, "loss": 0.0686, "step": 48785 }, { "epoch": 0.8639774734688999, "grad_norm": 0.36243462562561035, "learning_rate": 1.4319639839970288e-06, "loss": 0.0648, "step": 48786 }, { "epoch": 0.8639951830059283, "grad_norm": 0.49997419118881226, "learning_rate": 1.431597148722314e-06, "loss": 0.0641, "step": 48787 }, { "epoch": 0.8640128925429567, "grad_norm": 0.28040990233421326, "learning_rate": 1.4312303580862669e-06, "loss": 0.0455, "step": 48788 }, { "epoch": 0.8640306020799852, "grad_norm": 0.6956333518028259, "learning_rate": 1.4308636120901015e-06, "loss": 0.0791, "step": 48789 }, { "epoch": 0.8640483116170136, "grad_norm": 0.7220397591590881, "learning_rate": 1.4304969107350236e-06, "loss": 0.0594, "step": 48790 }, { "epoch": 0.864066021154042, "grad_norm": 0.41218921542167664, "learning_rate": 1.4301302540222405e-06, "loss": 0.0541, "step": 48791 }, { "epoch": 0.8640837306910704, "grad_norm": 0.37957116961479187, "learning_rate": 1.4297636419529531e-06, "loss": 0.0394, "step": 48792 }, { "epoch": 0.8641014402280989, "grad_norm": 0.6438353061676025, "learning_rate": 1.429397074528372e-06, "loss": 0.0594, "step": 48793 }, { "epoch": 0.8641191497651273, "grad_norm": 0.510168194770813, "learning_rate": 1.4290305517497044e-06, "loss": 0.057, "step": 48794 }, { "epoch": 0.8641368593021557, "grad_norm": 0.49074050784111023, "learning_rate": 1.4286640736181494e-06, "loss": 0.0509, "step": 48795 }, { "epoch": 0.8641545688391841, "grad_norm": 0.5924362540245056, "learning_rate": 1.428297640134918e-06, "loss": 0.044, "step": 48796 }, { "epoch": 0.8641722783762126, "grad_norm": 0.3924626410007477, "learning_rate": 1.4279312513012154e-06, "loss": 0.026, "step": 48797 }, { "epoch": 0.864189987913241, "grad_norm": 0.48518145084381104, "learning_rate": 1.4275649071182445e-06, "loss": 0.0563, "step": 48798 }, { "epoch": 0.8642076974502694, "grad_norm": 0.4712730348110199, "learning_rate": 1.427198607587214e-06, "loss": 0.0624, "step": 48799 }, { "epoch": 0.8642254069872979, "grad_norm": 0.4677521884441376, "learning_rate": 1.4268323527093262e-06, "loss": 0.0473, "step": 48800 }, { "epoch": 0.8642431165243263, "grad_norm": 0.667611300945282, "learning_rate": 1.4264661424857906e-06, "loss": 0.0689, "step": 48801 }, { "epoch": 0.8642608260613547, "grad_norm": 0.366218239068985, "learning_rate": 1.4260999769178056e-06, "loss": 0.0569, "step": 48802 }, { "epoch": 0.8642785355983831, "grad_norm": 0.9093300700187683, "learning_rate": 1.425733856006579e-06, "loss": 0.0681, "step": 48803 }, { "epoch": 0.8642962451354116, "grad_norm": 0.420605331659317, "learning_rate": 1.425367779753315e-06, "loss": 0.0363, "step": 48804 }, { "epoch": 0.86431395467244, "grad_norm": 0.6600499153137207, "learning_rate": 1.4250017481592203e-06, "loss": 0.0715, "step": 48805 }, { "epoch": 0.8643316642094684, "grad_norm": 0.6025295257568359, "learning_rate": 1.4246357612254945e-06, "loss": 0.0468, "step": 48806 }, { "epoch": 0.8643493737464968, "grad_norm": 0.9745279550552368, "learning_rate": 1.4242698189533433e-06, "loss": 0.069, "step": 48807 }, { "epoch": 0.8643670832835253, "grad_norm": 1.2722303867340088, "learning_rate": 1.423903921343972e-06, "loss": 0.0755, "step": 48808 }, { "epoch": 0.8643847928205537, "grad_norm": 0.811585009098053, "learning_rate": 1.4235380683985832e-06, "loss": 0.0646, "step": 48809 }, { "epoch": 0.8644025023575821, "grad_norm": 0.5467899441719055, "learning_rate": 1.423172260118381e-06, "loss": 0.0601, "step": 48810 }, { "epoch": 0.8644202118946105, "grad_norm": 0.6809989213943481, "learning_rate": 1.4228064965045678e-06, "loss": 0.0418, "step": 48811 }, { "epoch": 0.864437921431639, "grad_norm": 0.5049115419387817, "learning_rate": 1.4224407775583526e-06, "loss": 0.0402, "step": 48812 }, { "epoch": 0.8644556309686674, "grad_norm": 0.4879102110862732, "learning_rate": 1.4220751032809309e-06, "loss": 0.0587, "step": 48813 }, { "epoch": 0.8644733405056958, "grad_norm": 0.36264073848724365, "learning_rate": 1.4217094736735088e-06, "loss": 0.0403, "step": 48814 }, { "epoch": 0.8644910500427243, "grad_norm": 0.3746357560157776, "learning_rate": 1.4213438887372882e-06, "loss": 0.0452, "step": 48815 }, { "epoch": 0.8645087595797527, "grad_norm": 0.46422505378723145, "learning_rate": 1.4209783484734766e-06, "loss": 0.0682, "step": 48816 }, { "epoch": 0.8645264691167811, "grad_norm": 0.7732109427452087, "learning_rate": 1.4206128528832667e-06, "loss": 0.0795, "step": 48817 }, { "epoch": 0.8645441786538095, "grad_norm": 0.6557161211967468, "learning_rate": 1.420247401967869e-06, "loss": 0.0494, "step": 48818 }, { "epoch": 0.864561888190838, "grad_norm": 0.7733510136604309, "learning_rate": 1.4198819957284875e-06, "loss": 0.0618, "step": 48819 }, { "epoch": 0.8645795977278664, "grad_norm": 0.7382054924964905, "learning_rate": 1.419516634166318e-06, "loss": 0.0535, "step": 48820 }, { "epoch": 0.8645973072648948, "grad_norm": 0.6962934732437134, "learning_rate": 1.419151317282566e-06, "loss": 0.0721, "step": 48821 }, { "epoch": 0.8646150168019232, "grad_norm": 0.5060961246490479, "learning_rate": 1.4187860450784308e-06, "loss": 0.0581, "step": 48822 }, { "epoch": 0.8646327263389517, "grad_norm": 0.6409803032875061, "learning_rate": 1.4184208175551194e-06, "loss": 0.0748, "step": 48823 }, { "epoch": 0.8646504358759801, "grad_norm": 0.43714800477027893, "learning_rate": 1.418055634713828e-06, "loss": 0.0355, "step": 48824 }, { "epoch": 0.8646681454130085, "grad_norm": 0.9950528740882874, "learning_rate": 1.4176904965557585e-06, "loss": 0.0679, "step": 48825 }, { "epoch": 0.8646858549500369, "grad_norm": 0.8219646215438843, "learning_rate": 1.4173254030821136e-06, "loss": 0.0638, "step": 48826 }, { "epoch": 0.8647035644870654, "grad_norm": 0.9114031791687012, "learning_rate": 1.4169603542940957e-06, "loss": 0.076, "step": 48827 }, { "epoch": 0.8647212740240938, "grad_norm": 0.4373294711112976, "learning_rate": 1.4165953501929035e-06, "loss": 0.0578, "step": 48828 }, { "epoch": 0.8647389835611222, "grad_norm": 0.3739641308784485, "learning_rate": 1.416230390779738e-06, "loss": 0.0466, "step": 48829 }, { "epoch": 0.8647566930981507, "grad_norm": 0.7186962366104126, "learning_rate": 1.4158654760558032e-06, "loss": 0.0634, "step": 48830 }, { "epoch": 0.8647744026351791, "grad_norm": 0.4529677927494049, "learning_rate": 1.4155006060222964e-06, "loss": 0.0587, "step": 48831 }, { "epoch": 0.8647921121722075, "grad_norm": 0.5410969853401184, "learning_rate": 1.4151357806804167e-06, "loss": 0.0509, "step": 48832 }, { "epoch": 0.8648098217092359, "grad_norm": 0.7766110897064209, "learning_rate": 1.4147710000313697e-06, "loss": 0.0689, "step": 48833 }, { "epoch": 0.8648275312462644, "grad_norm": 0.5107945799827576, "learning_rate": 1.4144062640763495e-06, "loss": 0.0539, "step": 48834 }, { "epoch": 0.8648452407832928, "grad_norm": 0.19310373067855835, "learning_rate": 1.4140415728165584e-06, "loss": 0.0449, "step": 48835 }, { "epoch": 0.8648629503203212, "grad_norm": 0.8146079182624817, "learning_rate": 1.4136769262531956e-06, "loss": 0.051, "step": 48836 }, { "epoch": 0.8648806598573496, "grad_norm": 0.5606803894042969, "learning_rate": 1.4133123243874618e-06, "loss": 0.0346, "step": 48837 }, { "epoch": 0.8648983693943781, "grad_norm": 0.7145054340362549, "learning_rate": 1.4129477672205576e-06, "loss": 0.063, "step": 48838 }, { "epoch": 0.8649160789314065, "grad_norm": 0.49137669801712036, "learning_rate": 1.4125832547536788e-06, "loss": 0.0474, "step": 48839 }, { "epoch": 0.8649337884684349, "grad_norm": 0.6863333582878113, "learning_rate": 1.4122187869880277e-06, "loss": 0.0718, "step": 48840 }, { "epoch": 0.8649514980054633, "grad_norm": 0.6236162185668945, "learning_rate": 1.4118543639248066e-06, "loss": 0.0493, "step": 48841 }, { "epoch": 0.8649692075424918, "grad_norm": 0.18444885313510895, "learning_rate": 1.4114899855652064e-06, "loss": 0.0349, "step": 48842 }, { "epoch": 0.8649869170795202, "grad_norm": 0.4707461893558502, "learning_rate": 1.4111256519104292e-06, "loss": 0.0364, "step": 48843 }, { "epoch": 0.8650046266165486, "grad_norm": 0.9186208844184875, "learning_rate": 1.4107613629616777e-06, "loss": 0.0724, "step": 48844 }, { "epoch": 0.8650223361535772, "grad_norm": 0.7930185198783875, "learning_rate": 1.4103971187201442e-06, "loss": 0.0571, "step": 48845 }, { "epoch": 0.8650400456906056, "grad_norm": 1.1209644079208374, "learning_rate": 1.4100329191870259e-06, "loss": 0.0609, "step": 48846 }, { "epoch": 0.865057755227634, "grad_norm": 0.7133224010467529, "learning_rate": 1.4096687643635286e-06, "loss": 0.0494, "step": 48847 }, { "epoch": 0.8650754647646623, "grad_norm": 0.47032231092453003, "learning_rate": 1.4093046542508498e-06, "loss": 0.049, "step": 48848 }, { "epoch": 0.8650931743016909, "grad_norm": 0.6821203231811523, "learning_rate": 1.40894058885018e-06, "loss": 0.0693, "step": 48849 }, { "epoch": 0.8651108838387193, "grad_norm": 0.9959122538566589, "learning_rate": 1.4085765681627217e-06, "loss": 0.0596, "step": 48850 }, { "epoch": 0.8651285933757477, "grad_norm": 0.4374881982803345, "learning_rate": 1.4082125921896738e-06, "loss": 0.0746, "step": 48851 }, { "epoch": 0.865146302912776, "grad_norm": 1.0619430541992188, "learning_rate": 1.4078486609322288e-06, "loss": 0.0789, "step": 48852 }, { "epoch": 0.8651640124498046, "grad_norm": 0.24340398609638214, "learning_rate": 1.4074847743915892e-06, "loss": 0.0371, "step": 48853 }, { "epoch": 0.865181721986833, "grad_norm": 0.6397829055786133, "learning_rate": 1.4071209325689471e-06, "loss": 0.0837, "step": 48854 }, { "epoch": 0.8651994315238614, "grad_norm": 0.5766999125480652, "learning_rate": 1.4067571354655035e-06, "loss": 0.0416, "step": 48855 }, { "epoch": 0.8652171410608898, "grad_norm": 0.26960939168930054, "learning_rate": 1.4063933830824538e-06, "loss": 0.0469, "step": 48856 }, { "epoch": 0.8652348505979183, "grad_norm": 0.7716042995452881, "learning_rate": 1.4060296754209956e-06, "loss": 0.074, "step": 48857 }, { "epoch": 0.8652525601349467, "grad_norm": 0.9168776273727417, "learning_rate": 1.4056660124823278e-06, "loss": 0.0548, "step": 48858 }, { "epoch": 0.8652702696719751, "grad_norm": 0.5318204760551453, "learning_rate": 1.4053023942676397e-06, "loss": 0.0525, "step": 48859 }, { "epoch": 0.8652879792090036, "grad_norm": 0.40474724769592285, "learning_rate": 1.4049388207781333e-06, "loss": 0.0436, "step": 48860 }, { "epoch": 0.865305688746032, "grad_norm": 0.6802684664726257, "learning_rate": 1.4045752920150012e-06, "loss": 0.0452, "step": 48861 }, { "epoch": 0.8653233982830604, "grad_norm": 0.6731686592102051, "learning_rate": 1.4042118079794458e-06, "loss": 0.0467, "step": 48862 }, { "epoch": 0.8653411078200888, "grad_norm": 0.5190843939781189, "learning_rate": 1.4038483686726544e-06, "loss": 0.0612, "step": 48863 }, { "epoch": 0.8653588173571173, "grad_norm": 0.45496654510498047, "learning_rate": 1.4034849740958277e-06, "loss": 0.0629, "step": 48864 }, { "epoch": 0.8653765268941457, "grad_norm": 0.6979761719703674, "learning_rate": 1.4031216242501598e-06, "loss": 0.0637, "step": 48865 }, { "epoch": 0.8653942364311741, "grad_norm": 0.4289478063583374, "learning_rate": 1.4027583191368465e-06, "loss": 0.0454, "step": 48866 }, { "epoch": 0.8654119459682025, "grad_norm": 0.3726772665977478, "learning_rate": 1.4023950587570833e-06, "loss": 0.0343, "step": 48867 }, { "epoch": 0.865429655505231, "grad_norm": 0.3461231291294098, "learning_rate": 1.4020318431120642e-06, "loss": 0.0555, "step": 48868 }, { "epoch": 0.8654473650422594, "grad_norm": 0.6086457967758179, "learning_rate": 1.4016686722029886e-06, "loss": 0.0671, "step": 48869 }, { "epoch": 0.8654650745792878, "grad_norm": 0.5402069091796875, "learning_rate": 1.4013055460310453e-06, "loss": 0.0716, "step": 48870 }, { "epoch": 0.8654827841163162, "grad_norm": 0.8150572180747986, "learning_rate": 1.40094246459743e-06, "loss": 0.0783, "step": 48871 }, { "epoch": 0.8655004936533447, "grad_norm": 0.32109174132347107, "learning_rate": 1.4005794279033435e-06, "loss": 0.0454, "step": 48872 }, { "epoch": 0.8655182031903731, "grad_norm": 0.11848801374435425, "learning_rate": 1.4002164359499696e-06, "loss": 0.0292, "step": 48873 }, { "epoch": 0.8655359127274015, "grad_norm": 0.8024773597717285, "learning_rate": 1.3998534887385096e-06, "loss": 0.0676, "step": 48874 }, { "epoch": 0.86555362226443, "grad_norm": 0.4907465875148773, "learning_rate": 1.3994905862701534e-06, "loss": 0.0627, "step": 48875 }, { "epoch": 0.8655713318014584, "grad_norm": 0.5700462460517883, "learning_rate": 1.3991277285461023e-06, "loss": 0.0543, "step": 48876 }, { "epoch": 0.8655890413384868, "grad_norm": 0.49413973093032837, "learning_rate": 1.3987649155675436e-06, "loss": 0.0373, "step": 48877 }, { "epoch": 0.8656067508755152, "grad_norm": 0.4851268231868744, "learning_rate": 1.3984021473356729e-06, "loss": 0.0565, "step": 48878 }, { "epoch": 0.8656244604125437, "grad_norm": 0.6588841080665588, "learning_rate": 1.3980394238516825e-06, "loss": 0.0568, "step": 48879 }, { "epoch": 0.8656421699495721, "grad_norm": 0.5717939734458923, "learning_rate": 1.3976767451167682e-06, "loss": 0.0797, "step": 48880 }, { "epoch": 0.8656598794866005, "grad_norm": 0.4312835931777954, "learning_rate": 1.3973141111321208e-06, "loss": 0.0355, "step": 48881 }, { "epoch": 0.8656775890236289, "grad_norm": 0.5963253378868103, "learning_rate": 1.3969515218989342e-06, "loss": 0.0504, "step": 48882 }, { "epoch": 0.8656952985606574, "grad_norm": 0.3839404284954071, "learning_rate": 1.3965889774183994e-06, "loss": 0.0476, "step": 48883 }, { "epoch": 0.8657130080976858, "grad_norm": 0.6902250647544861, "learning_rate": 1.3962264776917132e-06, "loss": 0.0694, "step": 48884 }, { "epoch": 0.8657307176347142, "grad_norm": 0.6902024745941162, "learning_rate": 1.3958640227200652e-06, "loss": 0.0493, "step": 48885 }, { "epoch": 0.8657484271717427, "grad_norm": 0.864772617816925, "learning_rate": 1.3955016125046478e-06, "loss": 0.052, "step": 48886 }, { "epoch": 0.8657661367087711, "grad_norm": 0.7157331109046936, "learning_rate": 1.395139247046658e-06, "loss": 0.0485, "step": 48887 }, { "epoch": 0.8657838462457995, "grad_norm": 0.6359257102012634, "learning_rate": 1.3947769263472798e-06, "loss": 0.0747, "step": 48888 }, { "epoch": 0.8658015557828279, "grad_norm": 0.7197190523147583, "learning_rate": 1.3944146504077093e-06, "loss": 0.0421, "step": 48889 }, { "epoch": 0.8658192653198564, "grad_norm": 0.6173208355903625, "learning_rate": 1.3940524192291438e-06, "loss": 0.0581, "step": 48890 }, { "epoch": 0.8658369748568848, "grad_norm": 0.5864881277084351, "learning_rate": 1.3936902328127637e-06, "loss": 0.0434, "step": 48891 }, { "epoch": 0.8658546843939132, "grad_norm": 0.7341768145561218, "learning_rate": 1.3933280911597684e-06, "loss": 0.0898, "step": 48892 }, { "epoch": 0.8658723939309416, "grad_norm": 0.5569949746131897, "learning_rate": 1.392965994271347e-06, "loss": 0.0633, "step": 48893 }, { "epoch": 0.8658901034679701, "grad_norm": 0.7402706742286682, "learning_rate": 1.3926039421486913e-06, "loss": 0.0552, "step": 48894 }, { "epoch": 0.8659078130049985, "grad_norm": 0.38849037885665894, "learning_rate": 1.3922419347929927e-06, "loss": 0.0393, "step": 48895 }, { "epoch": 0.8659255225420269, "grad_norm": 0.6788537502288818, "learning_rate": 1.3918799722054414e-06, "loss": 0.0524, "step": 48896 }, { "epoch": 0.8659432320790553, "grad_norm": 0.7783534526824951, "learning_rate": 1.391518054387232e-06, "loss": 0.0613, "step": 48897 }, { "epoch": 0.8659609416160838, "grad_norm": 0.7280173897743225, "learning_rate": 1.3911561813395479e-06, "loss": 0.0556, "step": 48898 }, { "epoch": 0.8659786511531122, "grad_norm": 0.5613798499107361, "learning_rate": 1.3907943530635836e-06, "loss": 0.0721, "step": 48899 }, { "epoch": 0.8659963606901406, "grad_norm": 1.2327592372894287, "learning_rate": 1.3904325695605314e-06, "loss": 0.07, "step": 48900 }, { "epoch": 0.8660140702271691, "grad_norm": 0.6655712723731995, "learning_rate": 1.3900708308315802e-06, "loss": 0.0376, "step": 48901 }, { "epoch": 0.8660317797641975, "grad_norm": 0.5092535614967346, "learning_rate": 1.3897091368779174e-06, "loss": 0.035, "step": 48902 }, { "epoch": 0.8660494893012259, "grad_norm": 0.4327925145626068, "learning_rate": 1.389347487700734e-06, "loss": 0.0596, "step": 48903 }, { "epoch": 0.8660671988382543, "grad_norm": 0.5732183456420898, "learning_rate": 1.388985883301222e-06, "loss": 0.049, "step": 48904 }, { "epoch": 0.8660849083752828, "grad_norm": 0.36747822165489197, "learning_rate": 1.3886243236805691e-06, "loss": 0.0461, "step": 48905 }, { "epoch": 0.8661026179123112, "grad_norm": 0.4028206467628479, "learning_rate": 1.3882628088399656e-06, "loss": 0.046, "step": 48906 }, { "epoch": 0.8661203274493396, "grad_norm": 0.5057902932167053, "learning_rate": 1.387901338780601e-06, "loss": 0.0451, "step": 48907 }, { "epoch": 0.866138036986368, "grad_norm": 0.30016008019447327, "learning_rate": 1.387539913503666e-06, "loss": 0.0415, "step": 48908 }, { "epoch": 0.8661557465233966, "grad_norm": 0.520209789276123, "learning_rate": 1.3871785330103459e-06, "loss": 0.054, "step": 48909 }, { "epoch": 0.866173456060425, "grad_norm": 0.5556319355964661, "learning_rate": 1.3868171973018317e-06, "loss": 0.0634, "step": 48910 }, { "epoch": 0.8661911655974533, "grad_norm": 0.6189303398132324, "learning_rate": 1.3864559063793125e-06, "loss": 0.0621, "step": 48911 }, { "epoch": 0.8662088751344817, "grad_norm": 0.47182539105415344, "learning_rate": 1.3860946602439756e-06, "loss": 0.0459, "step": 48912 }, { "epoch": 0.8662265846715103, "grad_norm": 0.701007068157196, "learning_rate": 1.38573345889701e-06, "loss": 0.0649, "step": 48913 }, { "epoch": 0.8662442942085387, "grad_norm": 0.7425388693809509, "learning_rate": 1.3853723023396047e-06, "loss": 0.0569, "step": 48914 }, { "epoch": 0.866262003745567, "grad_norm": 1.032174825668335, "learning_rate": 1.3850111905729507e-06, "loss": 0.0572, "step": 48915 }, { "epoch": 0.8662797132825956, "grad_norm": 0.584927499294281, "learning_rate": 1.38465012359823e-06, "loss": 0.0456, "step": 48916 }, { "epoch": 0.866297422819624, "grad_norm": 0.3776754140853882, "learning_rate": 1.384289101416632e-06, "loss": 0.029, "step": 48917 }, { "epoch": 0.8663151323566524, "grad_norm": 0.5746302008628845, "learning_rate": 1.3839281240293473e-06, "loss": 0.0547, "step": 48918 }, { "epoch": 0.8663328418936808, "grad_norm": 0.8296841382980347, "learning_rate": 1.3835671914375631e-06, "loss": 0.0537, "step": 48919 }, { "epoch": 0.8663505514307093, "grad_norm": 0.419566810131073, "learning_rate": 1.3832063036424635e-06, "loss": 0.0441, "step": 48920 }, { "epoch": 0.8663682609677377, "grad_norm": 0.4802343249320984, "learning_rate": 1.3828454606452395e-06, "loss": 0.046, "step": 48921 }, { "epoch": 0.8663859705047661, "grad_norm": 0.7979651689529419, "learning_rate": 1.382484662447075e-06, "loss": 0.063, "step": 48922 }, { "epoch": 0.8664036800417945, "grad_norm": 0.652896523475647, "learning_rate": 1.382123909049159e-06, "loss": 0.0548, "step": 48923 }, { "epoch": 0.866421389578823, "grad_norm": 0.3439217507839203, "learning_rate": 1.381763200452677e-06, "loss": 0.0448, "step": 48924 }, { "epoch": 0.8664390991158514, "grad_norm": 0.5863397121429443, "learning_rate": 1.3814025366588185e-06, "loss": 0.063, "step": 48925 }, { "epoch": 0.8664568086528798, "grad_norm": 0.7356769442558289, "learning_rate": 1.3810419176687705e-06, "loss": 0.0578, "step": 48926 }, { "epoch": 0.8664745181899082, "grad_norm": 0.6799341440200806, "learning_rate": 1.380681343483714e-06, "loss": 0.0758, "step": 48927 }, { "epoch": 0.8664922277269367, "grad_norm": 0.2891046404838562, "learning_rate": 1.380320814104838e-06, "loss": 0.0451, "step": 48928 }, { "epoch": 0.8665099372639651, "grad_norm": 0.5135318636894226, "learning_rate": 1.3799603295333312e-06, "loss": 0.031, "step": 48929 }, { "epoch": 0.8665276468009935, "grad_norm": 0.47131064534187317, "learning_rate": 1.3795998897703765e-06, "loss": 0.0733, "step": 48930 }, { "epoch": 0.866545356338022, "grad_norm": 0.42058899998664856, "learning_rate": 1.3792394948171593e-06, "loss": 0.0382, "step": 48931 }, { "epoch": 0.8665630658750504, "grad_norm": 0.5103768110275269, "learning_rate": 1.3788791446748654e-06, "loss": 0.0569, "step": 48932 }, { "epoch": 0.8665807754120788, "grad_norm": 0.6663082242012024, "learning_rate": 1.378518839344684e-06, "loss": 0.048, "step": 48933 }, { "epoch": 0.8665984849491072, "grad_norm": 0.6794207096099854, "learning_rate": 1.3781585788277956e-06, "loss": 0.0502, "step": 48934 }, { "epoch": 0.8666161944861357, "grad_norm": 0.3104431629180908, "learning_rate": 1.3777983631253893e-06, "loss": 0.0436, "step": 48935 }, { "epoch": 0.8666339040231641, "grad_norm": 1.3033137321472168, "learning_rate": 1.3774381922386509e-06, "loss": 0.0854, "step": 48936 }, { "epoch": 0.8666516135601925, "grad_norm": 0.5152018666267395, "learning_rate": 1.3770780661687592e-06, "loss": 0.0479, "step": 48937 }, { "epoch": 0.8666693230972209, "grad_norm": 0.35904258489608765, "learning_rate": 1.3767179849169037e-06, "loss": 0.0294, "step": 48938 }, { "epoch": 0.8666870326342494, "grad_norm": 0.8345000743865967, "learning_rate": 1.376357948484268e-06, "loss": 0.0885, "step": 48939 }, { "epoch": 0.8667047421712778, "grad_norm": 0.5407840609550476, "learning_rate": 1.37599795687204e-06, "loss": 0.0546, "step": 48940 }, { "epoch": 0.8667224517083062, "grad_norm": 0.4822184145450592, "learning_rate": 1.3756380100813948e-06, "loss": 0.0786, "step": 48941 }, { "epoch": 0.8667401612453346, "grad_norm": 0.531911313533783, "learning_rate": 1.3752781081135253e-06, "loss": 0.0576, "step": 48942 }, { "epoch": 0.8667578707823631, "grad_norm": 0.49522680044174194, "learning_rate": 1.3749182509696118e-06, "loss": 0.0619, "step": 48943 }, { "epoch": 0.8667755803193915, "grad_norm": 0.5406851172447205, "learning_rate": 1.3745584386508435e-06, "loss": 0.0742, "step": 48944 }, { "epoch": 0.8667932898564199, "grad_norm": 0.2907579839229584, "learning_rate": 1.3741986711583965e-06, "loss": 0.0406, "step": 48945 }, { "epoch": 0.8668109993934484, "grad_norm": 0.4555967450141907, "learning_rate": 1.3738389484934578e-06, "loss": 0.0516, "step": 48946 }, { "epoch": 0.8668287089304768, "grad_norm": 0.6869322061538696, "learning_rate": 1.3734792706572131e-06, "loss": 0.0909, "step": 48947 }, { "epoch": 0.8668464184675052, "grad_norm": 0.619199812412262, "learning_rate": 1.3731196376508398e-06, "loss": 0.0677, "step": 48948 }, { "epoch": 0.8668641280045336, "grad_norm": 0.6269999742507935, "learning_rate": 1.3727600494755254e-06, "loss": 0.0504, "step": 48949 }, { "epoch": 0.8668818375415621, "grad_norm": 0.8300172090530396, "learning_rate": 1.3724005061324524e-06, "loss": 0.0556, "step": 48950 }, { "epoch": 0.8668995470785905, "grad_norm": 0.3614543080329895, "learning_rate": 1.3720410076228046e-06, "loss": 0.0302, "step": 48951 }, { "epoch": 0.8669172566156189, "grad_norm": 0.6913368701934814, "learning_rate": 1.371681553947761e-06, "loss": 0.05, "step": 48952 }, { "epoch": 0.8669349661526473, "grad_norm": 0.6021667122840881, "learning_rate": 1.3713221451085078e-06, "loss": 0.0958, "step": 48953 }, { "epoch": 0.8669526756896758, "grad_norm": 0.625648558139801, "learning_rate": 1.3709627811062303e-06, "loss": 0.045, "step": 48954 }, { "epoch": 0.8669703852267042, "grad_norm": 0.42989885807037354, "learning_rate": 1.3706034619421027e-06, "loss": 0.0476, "step": 48955 }, { "epoch": 0.8669880947637326, "grad_norm": 0.5182889699935913, "learning_rate": 1.3702441876173105e-06, "loss": 0.0268, "step": 48956 }, { "epoch": 0.867005804300761, "grad_norm": 0.4721493124961853, "learning_rate": 1.369884958133038e-06, "loss": 0.0396, "step": 48957 }, { "epoch": 0.8670235138377895, "grad_norm": 0.3091970980167389, "learning_rate": 1.369525773490466e-06, "loss": 0.0747, "step": 48958 }, { "epoch": 0.8670412233748179, "grad_norm": 0.5322034358978271, "learning_rate": 1.3691666336907748e-06, "loss": 0.0691, "step": 48959 }, { "epoch": 0.8670589329118463, "grad_norm": 0.44318243861198425, "learning_rate": 1.3688075387351456e-06, "loss": 0.0399, "step": 48960 }, { "epoch": 0.8670766424488748, "grad_norm": 0.5716525316238403, "learning_rate": 1.3684484886247622e-06, "loss": 0.0566, "step": 48961 }, { "epoch": 0.8670943519859032, "grad_norm": 0.4560953378677368, "learning_rate": 1.3680894833608038e-06, "loss": 0.0311, "step": 48962 }, { "epoch": 0.8671120615229316, "grad_norm": 0.6468022465705872, "learning_rate": 1.3677305229444509e-06, "loss": 0.0491, "step": 48963 }, { "epoch": 0.86712977105996, "grad_norm": 0.4729291796684265, "learning_rate": 1.3673716073768878e-06, "loss": 0.0471, "step": 48964 }, { "epoch": 0.8671474805969885, "grad_norm": 0.7125588655471802, "learning_rate": 1.367012736659295e-06, "loss": 0.0438, "step": 48965 }, { "epoch": 0.8671651901340169, "grad_norm": 0.48355910181999207, "learning_rate": 1.3666539107928483e-06, "loss": 0.0396, "step": 48966 }, { "epoch": 0.8671828996710453, "grad_norm": 0.39869359135627747, "learning_rate": 1.3662951297787318e-06, "loss": 0.0454, "step": 48967 }, { "epoch": 0.8672006092080737, "grad_norm": 0.42104247212409973, "learning_rate": 1.365936393618128e-06, "loss": 0.0484, "step": 48968 }, { "epoch": 0.8672183187451022, "grad_norm": 0.4963115155696869, "learning_rate": 1.3655777023122124e-06, "loss": 0.0374, "step": 48969 }, { "epoch": 0.8672360282821306, "grad_norm": 0.27401241660118103, "learning_rate": 1.3652190558621625e-06, "loss": 0.0445, "step": 48970 }, { "epoch": 0.867253737819159, "grad_norm": 0.38836848735809326, "learning_rate": 1.3648604542691673e-06, "loss": 0.0298, "step": 48971 }, { "epoch": 0.8672714473561874, "grad_norm": 0.42620885372161865, "learning_rate": 1.3645018975344043e-06, "loss": 0.0496, "step": 48972 }, { "epoch": 0.867289156893216, "grad_norm": 0.5983364582061768, "learning_rate": 1.3641433856590475e-06, "loss": 0.0487, "step": 48973 }, { "epoch": 0.8673068664302443, "grad_norm": 0.4332764148712158, "learning_rate": 1.3637849186442807e-06, "loss": 0.0656, "step": 48974 }, { "epoch": 0.8673245759672727, "grad_norm": 0.5680034756660461, "learning_rate": 1.3634264964912852e-06, "loss": 0.0536, "step": 48975 }, { "epoch": 0.8673422855043013, "grad_norm": 0.2792760729789734, "learning_rate": 1.3630681192012345e-06, "loss": 0.0363, "step": 48976 }, { "epoch": 0.8673599950413297, "grad_norm": 0.3459042012691498, "learning_rate": 1.3627097867753096e-06, "loss": 0.0344, "step": 48977 }, { "epoch": 0.867377704578358, "grad_norm": 0.43632879853248596, "learning_rate": 1.3623514992146913e-06, "loss": 0.054, "step": 48978 }, { "epoch": 0.8673954141153865, "grad_norm": 0.6633635759353638, "learning_rate": 1.3619932565205567e-06, "loss": 0.0733, "step": 48979 }, { "epoch": 0.867413123652415, "grad_norm": 0.5131298303604126, "learning_rate": 1.3616350586940835e-06, "loss": 0.0353, "step": 48980 }, { "epoch": 0.8674308331894434, "grad_norm": 0.5504308938980103, "learning_rate": 1.3612769057364538e-06, "loss": 0.0542, "step": 48981 }, { "epoch": 0.8674485427264718, "grad_norm": 0.3616686165332794, "learning_rate": 1.360918797648842e-06, "loss": 0.0333, "step": 48982 }, { "epoch": 0.8674662522635002, "grad_norm": 0.40218183398246765, "learning_rate": 1.3605607344324317e-06, "loss": 0.0657, "step": 48983 }, { "epoch": 0.8674839618005287, "grad_norm": 0.4643293619155884, "learning_rate": 1.3602027160883924e-06, "loss": 0.048, "step": 48984 }, { "epoch": 0.8675016713375571, "grad_norm": 0.43107935786247253, "learning_rate": 1.3598447426179078e-06, "loss": 0.0533, "step": 48985 }, { "epoch": 0.8675193808745855, "grad_norm": 0.983863353729248, "learning_rate": 1.3594868140221572e-06, "loss": 0.0684, "step": 48986 }, { "epoch": 0.8675370904116139, "grad_norm": 0.6857304573059082, "learning_rate": 1.359128930302313e-06, "loss": 0.0655, "step": 48987 }, { "epoch": 0.8675547999486424, "grad_norm": 0.545573890209198, "learning_rate": 1.3587710914595542e-06, "loss": 0.049, "step": 48988 }, { "epoch": 0.8675725094856708, "grad_norm": 0.5328681468963623, "learning_rate": 1.3584132974950597e-06, "loss": 0.062, "step": 48989 }, { "epoch": 0.8675902190226992, "grad_norm": 0.35305145382881165, "learning_rate": 1.3580555484100055e-06, "loss": 0.0573, "step": 48990 }, { "epoch": 0.8676079285597277, "grad_norm": 0.6512709259986877, "learning_rate": 1.357697844205567e-06, "loss": 0.0612, "step": 48991 }, { "epoch": 0.8676256380967561, "grad_norm": 0.5455548167228699, "learning_rate": 1.3573401848829253e-06, "loss": 0.046, "step": 48992 }, { "epoch": 0.8676433476337845, "grad_norm": 0.33569416403770447, "learning_rate": 1.356982570443256e-06, "loss": 0.0591, "step": 48993 }, { "epoch": 0.8676610571708129, "grad_norm": 0.6756771206855774, "learning_rate": 1.3566250008877313e-06, "loss": 0.0772, "step": 48994 }, { "epoch": 0.8676787667078414, "grad_norm": 0.425360769033432, "learning_rate": 1.3562674762175304e-06, "loss": 0.0388, "step": 48995 }, { "epoch": 0.8676964762448698, "grad_norm": 0.3986888527870178, "learning_rate": 1.355909996433829e-06, "loss": 0.0452, "step": 48996 }, { "epoch": 0.8677141857818982, "grad_norm": 0.5080341100692749, "learning_rate": 1.3555525615378079e-06, "loss": 0.0396, "step": 48997 }, { "epoch": 0.8677318953189266, "grad_norm": 0.7347836494445801, "learning_rate": 1.355195171530636e-06, "loss": 0.0549, "step": 48998 }, { "epoch": 0.8677496048559551, "grad_norm": 0.3983926475048065, "learning_rate": 1.3548378264134875e-06, "loss": 0.0421, "step": 48999 }, { "epoch": 0.8677673143929835, "grad_norm": 0.38811740279197693, "learning_rate": 1.3544805261875498e-06, "loss": 0.0613, "step": 49000 }, { "epoch": 0.8677850239300119, "grad_norm": 0.4371584355831146, "learning_rate": 1.3541232708539869e-06, "loss": 0.0483, "step": 49001 }, { "epoch": 0.8678027334670403, "grad_norm": 0.653389036655426, "learning_rate": 1.3537660604139778e-06, "loss": 0.056, "step": 49002 }, { "epoch": 0.8678204430040688, "grad_norm": 0.763073742389679, "learning_rate": 1.3534088948687002e-06, "loss": 0.0622, "step": 49003 }, { "epoch": 0.8678381525410972, "grad_norm": 0.6394650936126709, "learning_rate": 1.3530517742193277e-06, "loss": 0.0799, "step": 49004 }, { "epoch": 0.8678558620781256, "grad_norm": 0.902974545955658, "learning_rate": 1.352694698467033e-06, "loss": 0.0696, "step": 49005 }, { "epoch": 0.8678735716151541, "grad_norm": 0.5012373328208923, "learning_rate": 1.3523376676129917e-06, "loss": 0.037, "step": 49006 }, { "epoch": 0.8678912811521825, "grad_norm": 0.5710457563400269, "learning_rate": 1.3519806816583797e-06, "loss": 0.0365, "step": 49007 }, { "epoch": 0.8679089906892109, "grad_norm": 0.4713747799396515, "learning_rate": 1.3516237406043708e-06, "loss": 0.0254, "step": 49008 }, { "epoch": 0.8679267002262393, "grad_norm": 0.6779513359069824, "learning_rate": 1.3512668444521393e-06, "loss": 0.0518, "step": 49009 }, { "epoch": 0.8679444097632678, "grad_norm": 0.6450409889221191, "learning_rate": 1.350909993202859e-06, "loss": 0.0592, "step": 49010 }, { "epoch": 0.8679621193002962, "grad_norm": 0.7796157002449036, "learning_rate": 1.3505531868577075e-06, "loss": 0.0599, "step": 49011 }, { "epoch": 0.8679798288373246, "grad_norm": 0.37251487374305725, "learning_rate": 1.3501964254178522e-06, "loss": 0.0417, "step": 49012 }, { "epoch": 0.867997538374353, "grad_norm": 0.7834919095039368, "learning_rate": 1.3498397088844706e-06, "loss": 0.0527, "step": 49013 }, { "epoch": 0.8680152479113815, "grad_norm": 0.5586647391319275, "learning_rate": 1.3494830372587363e-06, "loss": 0.0583, "step": 49014 }, { "epoch": 0.8680329574484099, "grad_norm": 0.63667231798172, "learning_rate": 1.349126410541824e-06, "loss": 0.0639, "step": 49015 }, { "epoch": 0.8680506669854383, "grad_norm": 0.6139299273490906, "learning_rate": 1.348769828734902e-06, "loss": 0.0375, "step": 49016 }, { "epoch": 0.8680683765224667, "grad_norm": 0.3046693801879883, "learning_rate": 1.3484132918391468e-06, "loss": 0.0368, "step": 49017 }, { "epoch": 0.8680860860594952, "grad_norm": 0.6397348642349243, "learning_rate": 1.348056799855732e-06, "loss": 0.0381, "step": 49018 }, { "epoch": 0.8681037955965236, "grad_norm": 0.6009154915809631, "learning_rate": 1.3477003527858283e-06, "loss": 0.0525, "step": 49019 }, { "epoch": 0.868121505133552, "grad_norm": 0.7883073687553406, "learning_rate": 1.34734395063061e-06, "loss": 0.0642, "step": 49020 }, { "epoch": 0.8681392146705805, "grad_norm": 0.4188898801803589, "learning_rate": 1.3469875933912496e-06, "loss": 0.0603, "step": 49021 }, { "epoch": 0.8681569242076089, "grad_norm": 0.7666136622428894, "learning_rate": 1.3466312810689208e-06, "loss": 0.0605, "step": 49022 }, { "epoch": 0.8681746337446373, "grad_norm": 0.6822959780693054, "learning_rate": 1.3462750136647928e-06, "loss": 0.0516, "step": 49023 }, { "epoch": 0.8681923432816657, "grad_norm": 0.1699010133743286, "learning_rate": 1.345918791180038e-06, "loss": 0.0799, "step": 49024 }, { "epoch": 0.8682100528186942, "grad_norm": 0.5122736692428589, "learning_rate": 1.3455626136158322e-06, "loss": 0.0818, "step": 49025 }, { "epoch": 0.8682277623557226, "grad_norm": 0.15162146091461182, "learning_rate": 1.3452064809733428e-06, "loss": 0.0356, "step": 49026 }, { "epoch": 0.868245471892751, "grad_norm": 0.25233352184295654, "learning_rate": 1.344850393253742e-06, "loss": 0.0326, "step": 49027 }, { "epoch": 0.8682631814297794, "grad_norm": 0.5508344173431396, "learning_rate": 1.3444943504581991e-06, "loss": 0.0472, "step": 49028 }, { "epoch": 0.8682808909668079, "grad_norm": 0.8102402687072754, "learning_rate": 1.3441383525878948e-06, "loss": 0.0683, "step": 49029 }, { "epoch": 0.8682986005038363, "grad_norm": 0.5581861734390259, "learning_rate": 1.3437823996439913e-06, "loss": 0.048, "step": 49030 }, { "epoch": 0.8683163100408647, "grad_norm": 0.8764273524284363, "learning_rate": 1.3434264916276611e-06, "loss": 0.0361, "step": 49031 }, { "epoch": 0.8683340195778931, "grad_norm": 0.6271879076957703, "learning_rate": 1.3430706285400818e-06, "loss": 0.0424, "step": 49032 }, { "epoch": 0.8683517291149216, "grad_norm": 0.44688913226127625, "learning_rate": 1.3427148103824138e-06, "loss": 0.0482, "step": 49033 }, { "epoch": 0.86836943865195, "grad_norm": 0.830230176448822, "learning_rate": 1.3423590371558331e-06, "loss": 0.0479, "step": 49034 }, { "epoch": 0.8683871481889784, "grad_norm": 0.26243093609809875, "learning_rate": 1.3420033088615102e-06, "loss": 0.0461, "step": 49035 }, { "epoch": 0.868404857726007, "grad_norm": 0.5803062915802002, "learning_rate": 1.341647625500616e-06, "loss": 0.0515, "step": 49036 }, { "epoch": 0.8684225672630353, "grad_norm": 0.6963814496994019, "learning_rate": 1.3412919870743178e-06, "loss": 0.0534, "step": 49037 }, { "epoch": 0.8684402768000637, "grad_norm": 0.5647442936897278, "learning_rate": 1.3409363935837899e-06, "loss": 0.0366, "step": 49038 }, { "epoch": 0.8684579863370921, "grad_norm": 0.7659803628921509, "learning_rate": 1.340580845030201e-06, "loss": 0.0583, "step": 49039 }, { "epoch": 0.8684756958741207, "grad_norm": 0.8399531245231628, "learning_rate": 1.3402253414147154e-06, "loss": 0.0659, "step": 49040 }, { "epoch": 0.868493405411149, "grad_norm": 0.6446525454521179, "learning_rate": 1.3398698827385086e-06, "loss": 0.0545, "step": 49041 }, { "epoch": 0.8685111149481775, "grad_norm": 0.513472855091095, "learning_rate": 1.3395144690027483e-06, "loss": 0.0493, "step": 49042 }, { "epoch": 0.8685288244852059, "grad_norm": 0.6683397889137268, "learning_rate": 1.339159100208605e-06, "loss": 0.0597, "step": 49043 }, { "epoch": 0.8685465340222344, "grad_norm": 0.9351035356521606, "learning_rate": 1.3388037763572448e-06, "loss": 0.0803, "step": 49044 }, { "epoch": 0.8685642435592628, "grad_norm": 0.5000598430633545, "learning_rate": 1.3384484974498395e-06, "loss": 0.0508, "step": 49045 }, { "epoch": 0.8685819530962912, "grad_norm": 0.28201204538345337, "learning_rate": 1.3380932634875553e-06, "loss": 0.0332, "step": 49046 }, { "epoch": 0.8685996626333196, "grad_norm": 0.5294650197029114, "learning_rate": 1.337738074471563e-06, "loss": 0.0761, "step": 49047 }, { "epoch": 0.8686173721703481, "grad_norm": 0.5410444140434265, "learning_rate": 1.337382930403031e-06, "loss": 0.0584, "step": 49048 }, { "epoch": 0.8686350817073765, "grad_norm": 0.33846327662467957, "learning_rate": 1.3370278312831258e-06, "loss": 0.0366, "step": 49049 }, { "epoch": 0.8686527912444049, "grad_norm": 0.42026638984680176, "learning_rate": 1.336672777113021e-06, "loss": 0.0417, "step": 49050 }, { "epoch": 0.8686705007814334, "grad_norm": 0.3336934447288513, "learning_rate": 1.3363177678938777e-06, "loss": 0.0445, "step": 49051 }, { "epoch": 0.8686882103184618, "grad_norm": 0.6427770256996155, "learning_rate": 1.3359628036268678e-06, "loss": 0.0634, "step": 49052 }, { "epoch": 0.8687059198554902, "grad_norm": 0.637100100517273, "learning_rate": 1.3356078843131576e-06, "loss": 0.0718, "step": 49053 }, { "epoch": 0.8687236293925186, "grad_norm": 0.45567137002944946, "learning_rate": 1.3352530099539173e-06, "loss": 0.064, "step": 49054 }, { "epoch": 0.8687413389295471, "grad_norm": 0.7360872030258179, "learning_rate": 1.3348981805503113e-06, "loss": 0.0751, "step": 49055 }, { "epoch": 0.8687590484665755, "grad_norm": 0.9221479892730713, "learning_rate": 1.334543396103507e-06, "loss": 0.0754, "step": 49056 }, { "epoch": 0.8687767580036039, "grad_norm": 0.3574518859386444, "learning_rate": 1.3341886566146732e-06, "loss": 0.0559, "step": 49057 }, { "epoch": 0.8687944675406323, "grad_norm": 0.5811620354652405, "learning_rate": 1.333833962084976e-06, "loss": 0.0515, "step": 49058 }, { "epoch": 0.8688121770776608, "grad_norm": 0.31272751092910767, "learning_rate": 1.3334793125155842e-06, "loss": 0.0536, "step": 49059 }, { "epoch": 0.8688298866146892, "grad_norm": 0.5021603107452393, "learning_rate": 1.333124707907662e-06, "loss": 0.0741, "step": 49060 }, { "epoch": 0.8688475961517176, "grad_norm": 0.1535128504037857, "learning_rate": 1.33277014826238e-06, "loss": 0.0364, "step": 49061 }, { "epoch": 0.868865305688746, "grad_norm": 0.6083101630210876, "learning_rate": 1.332415633580899e-06, "loss": 0.0651, "step": 49062 }, { "epoch": 0.8688830152257745, "grad_norm": 0.7235421538352966, "learning_rate": 1.3320611638643899e-06, "loss": 0.0706, "step": 49063 }, { "epoch": 0.8689007247628029, "grad_norm": 0.4624097943305969, "learning_rate": 1.3317067391140181e-06, "loss": 0.054, "step": 49064 }, { "epoch": 0.8689184342998313, "grad_norm": 1.0217835903167725, "learning_rate": 1.331352359330943e-06, "loss": 0.0715, "step": 49065 }, { "epoch": 0.8689361438368598, "grad_norm": 0.5363737344741821, "learning_rate": 1.33099802451634e-06, "loss": 0.0617, "step": 49066 }, { "epoch": 0.8689538533738882, "grad_norm": 0.5569771528244019, "learning_rate": 1.3306437346713702e-06, "loss": 0.0594, "step": 49067 }, { "epoch": 0.8689715629109166, "grad_norm": 0.5341365933418274, "learning_rate": 1.3302894897972023e-06, "loss": 0.0447, "step": 49068 }, { "epoch": 0.868989272447945, "grad_norm": 0.942048192024231, "learning_rate": 1.3299352898949974e-06, "loss": 0.069, "step": 49069 }, { "epoch": 0.8690069819849735, "grad_norm": 0.547878623008728, "learning_rate": 1.3295811349659226e-06, "loss": 0.032, "step": 49070 }, { "epoch": 0.8690246915220019, "grad_norm": 0.6421041488647461, "learning_rate": 1.3292270250111455e-06, "loss": 0.0524, "step": 49071 }, { "epoch": 0.8690424010590303, "grad_norm": 0.578869640827179, "learning_rate": 1.3288729600318267e-06, "loss": 0.051, "step": 49072 }, { "epoch": 0.8690601105960587, "grad_norm": 0.6207163333892822, "learning_rate": 1.328518940029132e-06, "loss": 0.0522, "step": 49073 }, { "epoch": 0.8690778201330872, "grad_norm": 0.6471620798110962, "learning_rate": 1.3281649650042287e-06, "loss": 0.0428, "step": 49074 }, { "epoch": 0.8690955296701156, "grad_norm": 0.5691545009613037, "learning_rate": 1.3278110349582795e-06, "loss": 0.0616, "step": 49075 }, { "epoch": 0.869113239207144, "grad_norm": 0.5483299493789673, "learning_rate": 1.3274571498924482e-06, "loss": 0.0503, "step": 49076 }, { "epoch": 0.8691309487441724, "grad_norm": 0.4420487582683563, "learning_rate": 1.3271033098079005e-06, "loss": 0.0504, "step": 49077 }, { "epoch": 0.8691486582812009, "grad_norm": 0.6487451791763306, "learning_rate": 1.326749514705804e-06, "loss": 0.0501, "step": 49078 }, { "epoch": 0.8691663678182293, "grad_norm": 0.6723699569702148, "learning_rate": 1.3263957645873142e-06, "loss": 0.0366, "step": 49079 }, { "epoch": 0.8691840773552577, "grad_norm": 0.4853983223438263, "learning_rate": 1.3260420594535987e-06, "loss": 0.0547, "step": 49080 }, { "epoch": 0.8692017868922862, "grad_norm": 0.938069760799408, "learning_rate": 1.3256883993058234e-06, "loss": 0.0623, "step": 49081 }, { "epoch": 0.8692194964293146, "grad_norm": 0.5164501070976257, "learning_rate": 1.3253347841451535e-06, "loss": 0.0524, "step": 49082 }, { "epoch": 0.869237205966343, "grad_norm": 0.11079201847314835, "learning_rate": 1.3249812139727453e-06, "loss": 0.0515, "step": 49083 }, { "epoch": 0.8692549155033714, "grad_norm": 0.7236977815628052, "learning_rate": 1.324627688789764e-06, "loss": 0.0499, "step": 49084 }, { "epoch": 0.8692726250403999, "grad_norm": 0.23150590062141418, "learning_rate": 1.3242742085973774e-06, "loss": 0.0548, "step": 49085 }, { "epoch": 0.8692903345774283, "grad_norm": 0.7699074149131775, "learning_rate": 1.3239207733967445e-06, "loss": 0.0877, "step": 49086 }, { "epoch": 0.8693080441144567, "grad_norm": 0.44705361127853394, "learning_rate": 1.3235673831890278e-06, "loss": 0.0425, "step": 49087 }, { "epoch": 0.8693257536514851, "grad_norm": 0.5617289543151855, "learning_rate": 1.3232140379753926e-06, "loss": 0.0679, "step": 49088 }, { "epoch": 0.8693434631885136, "grad_norm": 0.4152050018310547, "learning_rate": 1.3228607377570019e-06, "loss": 0.0434, "step": 49089 }, { "epoch": 0.869361172725542, "grad_norm": 0.32772716879844666, "learning_rate": 1.3225074825350126e-06, "loss": 0.0589, "step": 49090 }, { "epoch": 0.8693788822625704, "grad_norm": 0.6164641976356506, "learning_rate": 1.3221542723105923e-06, "loss": 0.0369, "step": 49091 }, { "epoch": 0.8693965917995988, "grad_norm": 0.6896106600761414, "learning_rate": 1.3218011070848984e-06, "loss": 0.0454, "step": 49092 }, { "epoch": 0.8694143013366273, "grad_norm": 0.48264235258102417, "learning_rate": 1.3214479868591002e-06, "loss": 0.0416, "step": 49093 }, { "epoch": 0.8694320108736557, "grad_norm": 0.8548569679260254, "learning_rate": 1.3210949116343479e-06, "loss": 0.0688, "step": 49094 }, { "epoch": 0.8694497204106841, "grad_norm": 0.39102455973625183, "learning_rate": 1.3207418814118127e-06, "loss": 0.0288, "step": 49095 }, { "epoch": 0.8694674299477126, "grad_norm": 0.74607914686203, "learning_rate": 1.3203888961926568e-06, "loss": 0.0609, "step": 49096 }, { "epoch": 0.869485139484741, "grad_norm": 0.5494359731674194, "learning_rate": 1.3200359559780346e-06, "loss": 0.0319, "step": 49097 }, { "epoch": 0.8695028490217694, "grad_norm": 0.9488407373428345, "learning_rate": 1.3196830607691112e-06, "loss": 0.04, "step": 49098 }, { "epoch": 0.8695205585587978, "grad_norm": 0.36370688676834106, "learning_rate": 1.3193302105670463e-06, "loss": 0.0447, "step": 49099 }, { "epoch": 0.8695382680958263, "grad_norm": 0.8060057759284973, "learning_rate": 1.3189774053730035e-06, "loss": 0.0587, "step": 49100 }, { "epoch": 0.8695559776328547, "grad_norm": 0.825393795967102, "learning_rate": 1.318624645188139e-06, "loss": 0.0558, "step": 49101 }, { "epoch": 0.8695736871698831, "grad_norm": 0.5056101679801941, "learning_rate": 1.3182719300136148e-06, "loss": 0.0738, "step": 49102 }, { "epoch": 0.8695913967069115, "grad_norm": 0.5674439072608948, "learning_rate": 1.3179192598505934e-06, "loss": 0.0606, "step": 49103 }, { "epoch": 0.86960910624394, "grad_norm": 0.36868274211883545, "learning_rate": 1.3175666347002324e-06, "loss": 0.0667, "step": 49104 }, { "epoch": 0.8696268157809685, "grad_norm": 0.511694073677063, "learning_rate": 1.317214054563694e-06, "loss": 0.0335, "step": 49105 }, { "epoch": 0.8696445253179969, "grad_norm": 0.823249101638794, "learning_rate": 1.3168615194421373e-06, "loss": 0.0466, "step": 49106 }, { "epoch": 0.8696622348550253, "grad_norm": 0.8155247569084167, "learning_rate": 1.3165090293367249e-06, "loss": 0.0638, "step": 49107 }, { "epoch": 0.8696799443920538, "grad_norm": 0.6592181324958801, "learning_rate": 1.3161565842486123e-06, "loss": 0.0523, "step": 49108 }, { "epoch": 0.8696976539290822, "grad_norm": 0.34574612975120544, "learning_rate": 1.3158041841789587e-06, "loss": 0.0481, "step": 49109 }, { "epoch": 0.8697153634661106, "grad_norm": 0.4316016733646393, "learning_rate": 1.3154518291289296e-06, "loss": 0.0527, "step": 49110 }, { "epoch": 0.8697330730031391, "grad_norm": 0.6952853798866272, "learning_rate": 1.3150995190996763e-06, "loss": 0.0571, "step": 49111 }, { "epoch": 0.8697507825401675, "grad_norm": 0.8412291407585144, "learning_rate": 1.3147472540923605e-06, "loss": 0.0565, "step": 49112 }, { "epoch": 0.8697684920771959, "grad_norm": 0.6057862639427185, "learning_rate": 1.3143950341081435e-06, "loss": 0.054, "step": 49113 }, { "epoch": 0.8697862016142243, "grad_norm": 0.5762847661972046, "learning_rate": 1.3140428591481822e-06, "loss": 0.0656, "step": 49114 }, { "epoch": 0.8698039111512528, "grad_norm": 0.5326551198959351, "learning_rate": 1.3136907292136363e-06, "loss": 0.0531, "step": 49115 }, { "epoch": 0.8698216206882812, "grad_norm": 0.6666935086250305, "learning_rate": 1.3133386443056628e-06, "loss": 0.0564, "step": 49116 }, { "epoch": 0.8698393302253096, "grad_norm": 0.4720725119113922, "learning_rate": 1.3129866044254207e-06, "loss": 0.0471, "step": 49117 }, { "epoch": 0.869857039762338, "grad_norm": 0.2409256100654602, "learning_rate": 1.312634609574071e-06, "loss": 0.0451, "step": 49118 }, { "epoch": 0.8698747492993665, "grad_norm": 0.36289602518081665, "learning_rate": 1.3122826597527677e-06, "loss": 0.0522, "step": 49119 }, { "epoch": 0.8698924588363949, "grad_norm": 0.6824136972427368, "learning_rate": 1.311930754962668e-06, "loss": 0.0587, "step": 49120 }, { "epoch": 0.8699101683734233, "grad_norm": 0.8145967125892639, "learning_rate": 1.3115788952049346e-06, "loss": 0.0606, "step": 49121 }, { "epoch": 0.8699278779104517, "grad_norm": 0.8424911499023438, "learning_rate": 1.3112270804807213e-06, "loss": 0.0914, "step": 49122 }, { "epoch": 0.8699455874474802, "grad_norm": 0.302293598651886, "learning_rate": 1.3108753107911826e-06, "loss": 0.0647, "step": 49123 }, { "epoch": 0.8699632969845086, "grad_norm": 1.2377468347549438, "learning_rate": 1.3105235861374821e-06, "loss": 0.086, "step": 49124 }, { "epoch": 0.869981006521537, "grad_norm": 0.3306931257247925, "learning_rate": 1.3101719065207774e-06, "loss": 0.0395, "step": 49125 }, { "epoch": 0.8699987160585655, "grad_norm": 0.8390690088272095, "learning_rate": 1.3098202719422192e-06, "loss": 0.0535, "step": 49126 }, { "epoch": 0.8700164255955939, "grad_norm": 0.5405842065811157, "learning_rate": 1.3094686824029666e-06, "loss": 0.042, "step": 49127 }, { "epoch": 0.8700341351326223, "grad_norm": 0.5700508952140808, "learning_rate": 1.3091171379041822e-06, "loss": 0.0451, "step": 49128 }, { "epoch": 0.8700518446696507, "grad_norm": 0.8033018112182617, "learning_rate": 1.3087656384470148e-06, "loss": 0.0626, "step": 49129 }, { "epoch": 0.8700695542066792, "grad_norm": 0.8472084999084473, "learning_rate": 1.3084141840326218e-06, "loss": 0.0623, "step": 49130 }, { "epoch": 0.8700872637437076, "grad_norm": 0.2449239045381546, "learning_rate": 1.3080627746621626e-06, "loss": 0.0312, "step": 49131 }, { "epoch": 0.870104973280736, "grad_norm": 0.49430912733078003, "learning_rate": 1.3077114103367927e-06, "loss": 0.0447, "step": 49132 }, { "epoch": 0.8701226828177644, "grad_norm": 0.36220782995224, "learning_rate": 1.3073600910576661e-06, "loss": 0.0429, "step": 49133 }, { "epoch": 0.8701403923547929, "grad_norm": 0.48845386505126953, "learning_rate": 1.3070088168259403e-06, "loss": 0.04, "step": 49134 }, { "epoch": 0.8701581018918213, "grad_norm": 0.6154674887657166, "learning_rate": 1.3066575876427728e-06, "loss": 0.0417, "step": 49135 }, { "epoch": 0.8701758114288497, "grad_norm": 0.7827775478363037, "learning_rate": 1.3063064035093158e-06, "loss": 0.0407, "step": 49136 }, { "epoch": 0.8701935209658781, "grad_norm": 0.7375428676605225, "learning_rate": 1.3059552644267236e-06, "loss": 0.0578, "step": 49137 }, { "epoch": 0.8702112305029066, "grad_norm": 0.3768410086631775, "learning_rate": 1.3056041703961551e-06, "loss": 0.0492, "step": 49138 }, { "epoch": 0.870228940039935, "grad_norm": 0.5635429620742798, "learning_rate": 1.3052531214187662e-06, "loss": 0.0594, "step": 49139 }, { "epoch": 0.8702466495769634, "grad_norm": 0.626197338104248, "learning_rate": 1.304902117495706e-06, "loss": 0.0631, "step": 49140 }, { "epoch": 0.8702643591139919, "grad_norm": 0.8797425031661987, "learning_rate": 1.3045511586281334e-06, "loss": 0.07, "step": 49141 }, { "epoch": 0.8702820686510203, "grad_norm": 0.6939835548400879, "learning_rate": 1.3042002448172008e-06, "loss": 0.0836, "step": 49142 }, { "epoch": 0.8702997781880487, "grad_norm": 0.6692020297050476, "learning_rate": 1.3038493760640656e-06, "loss": 0.0586, "step": 49143 }, { "epoch": 0.8703174877250771, "grad_norm": 0.8606781363487244, "learning_rate": 1.3034985523698806e-06, "loss": 0.0503, "step": 49144 }, { "epoch": 0.8703351972621056, "grad_norm": 0.4877850115299225, "learning_rate": 1.3031477737357994e-06, "loss": 0.0517, "step": 49145 }, { "epoch": 0.870352906799134, "grad_norm": 0.36674168705940247, "learning_rate": 1.3027970401629796e-06, "loss": 0.0433, "step": 49146 }, { "epoch": 0.8703706163361624, "grad_norm": 0.48675692081451416, "learning_rate": 1.3024463516525687e-06, "loss": 0.0355, "step": 49147 }, { "epoch": 0.8703883258731908, "grad_norm": 0.2260931134223938, "learning_rate": 1.3020957082057256e-06, "loss": 0.0272, "step": 49148 }, { "epoch": 0.8704060354102193, "grad_norm": 0.45879867672920227, "learning_rate": 1.3017451098236045e-06, "loss": 0.0473, "step": 49149 }, { "epoch": 0.8704237449472477, "grad_norm": 0.39977169036865234, "learning_rate": 1.3013945565073527e-06, "loss": 0.0423, "step": 49150 }, { "epoch": 0.8704414544842761, "grad_norm": 0.6678788065910339, "learning_rate": 1.3010440482581276e-06, "loss": 0.0477, "step": 49151 }, { "epoch": 0.8704591640213045, "grad_norm": 0.9379112720489502, "learning_rate": 1.3006935850770802e-06, "loss": 0.065, "step": 49152 }, { "epoch": 0.870476873558333, "grad_norm": 0.4149567782878876, "learning_rate": 1.3003431669653693e-06, "loss": 0.0388, "step": 49153 }, { "epoch": 0.8704945830953614, "grad_norm": 0.7404467463493347, "learning_rate": 1.2999927939241424e-06, "loss": 0.0671, "step": 49154 }, { "epoch": 0.8705122926323898, "grad_norm": 0.8670689463615417, "learning_rate": 1.299642465954552e-06, "loss": 0.0661, "step": 49155 }, { "epoch": 0.8705300021694183, "grad_norm": 0.7548733353614807, "learning_rate": 1.2992921830577536e-06, "loss": 0.0662, "step": 49156 }, { "epoch": 0.8705477117064467, "grad_norm": 0.40198251605033875, "learning_rate": 1.2989419452349e-06, "loss": 0.0589, "step": 49157 }, { "epoch": 0.8705654212434751, "grad_norm": 0.49161452054977417, "learning_rate": 1.29859175248714e-06, "loss": 0.0378, "step": 49158 }, { "epoch": 0.8705831307805035, "grad_norm": 0.49634262919425964, "learning_rate": 1.2982416048156259e-06, "loss": 0.0646, "step": 49159 }, { "epoch": 0.870600840317532, "grad_norm": 0.6332255601882935, "learning_rate": 1.2978915022215154e-06, "loss": 0.0696, "step": 49160 }, { "epoch": 0.8706185498545604, "grad_norm": 0.4205540418624878, "learning_rate": 1.297541444705949e-06, "loss": 0.0492, "step": 49161 }, { "epoch": 0.8706362593915888, "grad_norm": 0.31149357557296753, "learning_rate": 1.2971914322700895e-06, "loss": 0.0442, "step": 49162 }, { "epoch": 0.8706539689286172, "grad_norm": 0.9467689990997314, "learning_rate": 1.2968414649150822e-06, "loss": 0.0327, "step": 49163 }, { "epoch": 0.8706716784656457, "grad_norm": 0.6730339527130127, "learning_rate": 1.2964915426420849e-06, "loss": 0.0595, "step": 49164 }, { "epoch": 0.8706893880026741, "grad_norm": 0.49674859642982483, "learning_rate": 1.2961416654522413e-06, "loss": 0.0477, "step": 49165 }, { "epoch": 0.8707070975397025, "grad_norm": 0.46092796325683594, "learning_rate": 1.2957918333467057e-06, "loss": 0.0515, "step": 49166 }, { "epoch": 0.8707248070767309, "grad_norm": 0.5787297487258911, "learning_rate": 1.2954420463266303e-06, "loss": 0.036, "step": 49167 }, { "epoch": 0.8707425166137595, "grad_norm": 0.6107402443885803, "learning_rate": 1.2950923043931628e-06, "loss": 0.0513, "step": 49168 }, { "epoch": 0.8707602261507879, "grad_norm": 0.7655894756317139, "learning_rate": 1.2947426075474555e-06, "loss": 0.0701, "step": 49169 }, { "epoch": 0.8707779356878163, "grad_norm": 0.40227749943733215, "learning_rate": 1.294392955790659e-06, "loss": 0.0314, "step": 49170 }, { "epoch": 0.8707956452248448, "grad_norm": 0.6522651314735413, "learning_rate": 1.2940433491239229e-06, "loss": 0.0425, "step": 49171 }, { "epoch": 0.8708133547618732, "grad_norm": 0.4377966523170471, "learning_rate": 1.293693787548399e-06, "loss": 0.0671, "step": 49172 }, { "epoch": 0.8708310642989016, "grad_norm": 1.1745377779006958, "learning_rate": 1.2933442710652348e-06, "loss": 0.0694, "step": 49173 }, { "epoch": 0.87084877383593, "grad_norm": 0.49316075444221497, "learning_rate": 1.2929947996755848e-06, "loss": 0.0519, "step": 49174 }, { "epoch": 0.8708664833729585, "grad_norm": 0.6702790856361389, "learning_rate": 1.2926453733805927e-06, "loss": 0.0643, "step": 49175 }, { "epoch": 0.8708841929099869, "grad_norm": 0.3296279311180115, "learning_rate": 1.2922959921814126e-06, "loss": 0.04, "step": 49176 }, { "epoch": 0.8709019024470153, "grad_norm": 0.6002660989761353, "learning_rate": 1.2919466560791903e-06, "loss": 0.0571, "step": 49177 }, { "epoch": 0.8709196119840437, "grad_norm": 0.5160822868347168, "learning_rate": 1.29159736507508e-06, "loss": 0.0754, "step": 49178 }, { "epoch": 0.8709373215210722, "grad_norm": 0.6350802183151245, "learning_rate": 1.2912481191702259e-06, "loss": 0.0663, "step": 49179 }, { "epoch": 0.8709550310581006, "grad_norm": 0.6197202205657959, "learning_rate": 1.2908989183657781e-06, "loss": 0.0403, "step": 49180 }, { "epoch": 0.870972740595129, "grad_norm": 0.34150755405426025, "learning_rate": 1.2905497626628866e-06, "loss": 0.0379, "step": 49181 }, { "epoch": 0.8709904501321574, "grad_norm": 0.5240839123725891, "learning_rate": 1.2902006520626997e-06, "loss": 0.0645, "step": 49182 }, { "epoch": 0.8710081596691859, "grad_norm": 0.6080427765846252, "learning_rate": 1.2898515865663653e-06, "loss": 0.0581, "step": 49183 }, { "epoch": 0.8710258692062143, "grad_norm": 0.44033440947532654, "learning_rate": 1.2895025661750337e-06, "loss": 0.0371, "step": 49184 }, { "epoch": 0.8710435787432427, "grad_norm": 0.43149274587631226, "learning_rate": 1.2891535908898544e-06, "loss": 0.0178, "step": 49185 }, { "epoch": 0.8710612882802712, "grad_norm": 0.45392903685569763, "learning_rate": 1.2888046607119697e-06, "loss": 0.0221, "step": 49186 }, { "epoch": 0.8710789978172996, "grad_norm": 0.31345391273498535, "learning_rate": 1.2884557756425302e-06, "loss": 0.0421, "step": 49187 }, { "epoch": 0.871096707354328, "grad_norm": 0.731381356716156, "learning_rate": 1.2881069356826868e-06, "loss": 0.0773, "step": 49188 }, { "epoch": 0.8711144168913564, "grad_norm": 0.6793810129165649, "learning_rate": 1.2877581408335854e-06, "loss": 0.0374, "step": 49189 }, { "epoch": 0.8711321264283849, "grad_norm": 0.6945325136184692, "learning_rate": 1.287409391096368e-06, "loss": 0.0509, "step": 49190 }, { "epoch": 0.8711498359654133, "grad_norm": 0.6418722867965698, "learning_rate": 1.287060686472189e-06, "loss": 0.0397, "step": 49191 }, { "epoch": 0.8711675455024417, "grad_norm": 0.4846142530441284, "learning_rate": 1.2867120269621973e-06, "loss": 0.0508, "step": 49192 }, { "epoch": 0.8711852550394701, "grad_norm": 0.7307937145233154, "learning_rate": 1.2863634125675321e-06, "loss": 0.0556, "step": 49193 }, { "epoch": 0.8712029645764986, "grad_norm": 0.5529274940490723, "learning_rate": 1.2860148432893443e-06, "loss": 0.036, "step": 49194 }, { "epoch": 0.871220674113527, "grad_norm": 0.5930052995681763, "learning_rate": 1.285666319128781e-06, "loss": 0.0442, "step": 49195 }, { "epoch": 0.8712383836505554, "grad_norm": 0.7647815346717834, "learning_rate": 1.2853178400869915e-06, "loss": 0.0636, "step": 49196 }, { "epoch": 0.8712560931875838, "grad_norm": 0.9586454629898071, "learning_rate": 1.2849694061651146e-06, "loss": 0.0714, "step": 49197 }, { "epoch": 0.8712738027246123, "grad_norm": 0.3495754897594452, "learning_rate": 1.284621017364303e-06, "loss": 0.0676, "step": 49198 }, { "epoch": 0.8712915122616407, "grad_norm": 0.559504508972168, "learning_rate": 1.2842726736857007e-06, "loss": 0.0366, "step": 49199 }, { "epoch": 0.8713092217986691, "grad_norm": 0.552072286605835, "learning_rate": 1.2839243751304536e-06, "loss": 0.0441, "step": 49200 }, { "epoch": 0.8713269313356976, "grad_norm": 0.27317744493484497, "learning_rate": 1.283576121699709e-06, "loss": 0.0237, "step": 49201 }, { "epoch": 0.871344640872726, "grad_norm": 0.35665205121040344, "learning_rate": 1.2832279133946106e-06, "loss": 0.0618, "step": 49202 }, { "epoch": 0.8713623504097544, "grad_norm": 0.5143118500709534, "learning_rate": 1.2828797502163081e-06, "loss": 0.0391, "step": 49203 }, { "epoch": 0.8713800599467828, "grad_norm": 0.5267699956893921, "learning_rate": 1.2825316321659403e-06, "loss": 0.0435, "step": 49204 }, { "epoch": 0.8713977694838113, "grad_norm": 0.7858079671859741, "learning_rate": 1.2821835592446563e-06, "loss": 0.0801, "step": 49205 }, { "epoch": 0.8714154790208397, "grad_norm": 1.0101346969604492, "learning_rate": 1.2818355314536052e-06, "loss": 0.0953, "step": 49206 }, { "epoch": 0.8714331885578681, "grad_norm": 0.8302464485168457, "learning_rate": 1.2814875487939226e-06, "loss": 0.0562, "step": 49207 }, { "epoch": 0.8714508980948965, "grad_norm": 0.3188406527042389, "learning_rate": 1.2811396112667596e-06, "loss": 0.0475, "step": 49208 }, { "epoch": 0.871468607631925, "grad_norm": 0.7674953937530518, "learning_rate": 1.28079171887326e-06, "loss": 0.0619, "step": 49209 }, { "epoch": 0.8714863171689534, "grad_norm": 0.5051116347312927, "learning_rate": 1.2804438716145678e-06, "loss": 0.0375, "step": 49210 }, { "epoch": 0.8715040267059818, "grad_norm": 0.8799169063568115, "learning_rate": 1.2800960694918274e-06, "loss": 0.0566, "step": 49211 }, { "epoch": 0.8715217362430102, "grad_norm": 0.43312883377075195, "learning_rate": 1.2797483125061843e-06, "loss": 0.0349, "step": 49212 }, { "epoch": 0.8715394457800387, "grad_norm": 0.6276100277900696, "learning_rate": 1.2794006006587827e-06, "loss": 0.0494, "step": 49213 }, { "epoch": 0.8715571553170671, "grad_norm": 1.4048367738723755, "learning_rate": 1.2790529339507634e-06, "loss": 0.0678, "step": 49214 }, { "epoch": 0.8715748648540955, "grad_norm": 0.5501505732536316, "learning_rate": 1.278705312383272e-06, "loss": 0.0564, "step": 49215 }, { "epoch": 0.871592574391124, "grad_norm": 0.5379178524017334, "learning_rate": 1.2783577359574528e-06, "loss": 0.0682, "step": 49216 }, { "epoch": 0.8716102839281524, "grad_norm": 0.8354009985923767, "learning_rate": 1.2780102046744513e-06, "loss": 0.0667, "step": 49217 }, { "epoch": 0.8716279934651808, "grad_norm": 0.5761688947677612, "learning_rate": 1.277662718535405e-06, "loss": 0.0409, "step": 49218 }, { "epoch": 0.8716457030022092, "grad_norm": 0.43234989047050476, "learning_rate": 1.277315277541458e-06, "loss": 0.025, "step": 49219 }, { "epoch": 0.8716634125392377, "grad_norm": 0.5996469259262085, "learning_rate": 1.276967881693758e-06, "loss": 0.0529, "step": 49220 }, { "epoch": 0.8716811220762661, "grad_norm": 0.7914583683013916, "learning_rate": 1.2766205309934486e-06, "loss": 0.065, "step": 49221 }, { "epoch": 0.8716988316132945, "grad_norm": 0.5934194326400757, "learning_rate": 1.2762732254416677e-06, "loss": 0.0429, "step": 49222 }, { "epoch": 0.8717165411503229, "grad_norm": 0.4599514305591583, "learning_rate": 1.2759259650395605e-06, "loss": 0.0312, "step": 49223 }, { "epoch": 0.8717342506873514, "grad_norm": 0.9372448921203613, "learning_rate": 1.2755787497882698e-06, "loss": 0.0565, "step": 49224 }, { "epoch": 0.8717519602243798, "grad_norm": 0.6658965349197388, "learning_rate": 1.2752315796889346e-06, "loss": 0.0678, "step": 49225 }, { "epoch": 0.8717696697614082, "grad_norm": 0.47883906960487366, "learning_rate": 1.274884454742699e-06, "loss": 0.0528, "step": 49226 }, { "epoch": 0.8717873792984366, "grad_norm": 0.6819540858268738, "learning_rate": 1.2745373749507056e-06, "loss": 0.0673, "step": 49227 }, { "epoch": 0.8718050888354651, "grad_norm": 0.16063451766967773, "learning_rate": 1.2741903403140965e-06, "loss": 0.0418, "step": 49228 }, { "epoch": 0.8718227983724935, "grad_norm": 0.49528318643569946, "learning_rate": 1.2738433508340125e-06, "loss": 0.0374, "step": 49229 }, { "epoch": 0.871840507909522, "grad_norm": 0.31135088205337524, "learning_rate": 1.273496406511596e-06, "loss": 0.0407, "step": 49230 }, { "epoch": 0.8718582174465505, "grad_norm": 0.488558828830719, "learning_rate": 1.2731495073479898e-06, "loss": 0.0289, "step": 49231 }, { "epoch": 0.8718759269835789, "grad_norm": 0.7632902264595032, "learning_rate": 1.272802653344331e-06, "loss": 0.0609, "step": 49232 }, { "epoch": 0.8718936365206073, "grad_norm": 0.6344374418258667, "learning_rate": 1.2724558445017637e-06, "loss": 0.0579, "step": 49233 }, { "epoch": 0.8719113460576357, "grad_norm": 0.6872555613517761, "learning_rate": 1.272109080821427e-06, "loss": 0.0695, "step": 49234 }, { "epoch": 0.8719290555946642, "grad_norm": 0.4106316566467285, "learning_rate": 1.2717623623044667e-06, "loss": 0.0495, "step": 49235 }, { "epoch": 0.8719467651316926, "grad_norm": 1.0421414375305176, "learning_rate": 1.2714156889520167e-06, "loss": 0.0796, "step": 49236 }, { "epoch": 0.871964474668721, "grad_norm": 0.7612805962562561, "learning_rate": 1.2710690607652197e-06, "loss": 0.0618, "step": 49237 }, { "epoch": 0.8719821842057494, "grad_norm": 0.6192677617073059, "learning_rate": 1.2707224777452182e-06, "loss": 0.0421, "step": 49238 }, { "epoch": 0.8719998937427779, "grad_norm": 0.7527357339859009, "learning_rate": 1.2703759398931507e-06, "loss": 0.057, "step": 49239 }, { "epoch": 0.8720176032798063, "grad_norm": 0.9205142259597778, "learning_rate": 1.270029447210157e-06, "loss": 0.0465, "step": 49240 }, { "epoch": 0.8720353128168347, "grad_norm": 0.4714730679988861, "learning_rate": 1.2696829996973791e-06, "loss": 0.0295, "step": 49241 }, { "epoch": 0.8720530223538631, "grad_norm": 0.235728919506073, "learning_rate": 1.2693365973559562e-06, "loss": 0.0495, "step": 49242 }, { "epoch": 0.8720707318908916, "grad_norm": 0.7260687351226807, "learning_rate": 1.2689902401870257e-06, "loss": 0.045, "step": 49243 }, { "epoch": 0.87208844142792, "grad_norm": 0.47522681951522827, "learning_rate": 1.2686439281917284e-06, "loss": 0.0478, "step": 49244 }, { "epoch": 0.8721061509649484, "grad_norm": 0.5371903777122498, "learning_rate": 1.2682976613712066e-06, "loss": 0.0553, "step": 49245 }, { "epoch": 0.8721238605019769, "grad_norm": 0.5026407837867737, "learning_rate": 1.2679514397265928e-06, "loss": 0.0715, "step": 49246 }, { "epoch": 0.8721415700390053, "grad_norm": 0.40978100895881653, "learning_rate": 1.2676052632590312e-06, "loss": 0.0345, "step": 49247 }, { "epoch": 0.8721592795760337, "grad_norm": 0.8134133219718933, "learning_rate": 1.2672591319696558e-06, "loss": 0.1007, "step": 49248 }, { "epoch": 0.8721769891130621, "grad_norm": 0.6162930727005005, "learning_rate": 1.2669130458596157e-06, "loss": 0.0601, "step": 49249 }, { "epoch": 0.8721946986500906, "grad_norm": 0.20412182807922363, "learning_rate": 1.2665670049300382e-06, "loss": 0.0449, "step": 49250 }, { "epoch": 0.872212408187119, "grad_norm": 0.8199100494384766, "learning_rate": 1.2662210091820675e-06, "loss": 0.0691, "step": 49251 }, { "epoch": 0.8722301177241474, "grad_norm": 0.4777541756629944, "learning_rate": 1.2658750586168428e-06, "loss": 0.0479, "step": 49252 }, { "epoch": 0.8722478272611758, "grad_norm": 0.5277412533760071, "learning_rate": 1.2655291532354963e-06, "loss": 0.0444, "step": 49253 }, { "epoch": 0.8722655367982043, "grad_norm": 0.45484256744384766, "learning_rate": 1.2651832930391704e-06, "loss": 0.0453, "step": 49254 }, { "epoch": 0.8722832463352327, "grad_norm": 0.515648365020752, "learning_rate": 1.2648374780290028e-06, "loss": 0.0447, "step": 49255 }, { "epoch": 0.8723009558722611, "grad_norm": 0.36131078004837036, "learning_rate": 1.2644917082061309e-06, "loss": 0.0375, "step": 49256 }, { "epoch": 0.8723186654092895, "grad_norm": 0.5048391819000244, "learning_rate": 1.26414598357169e-06, "loss": 0.0636, "step": 49257 }, { "epoch": 0.872336374946318, "grad_norm": 0.4032880663871765, "learning_rate": 1.2638003041268215e-06, "loss": 0.0517, "step": 49258 }, { "epoch": 0.8723540844833464, "grad_norm": 0.6739362478256226, "learning_rate": 1.2634546698726606e-06, "loss": 0.0604, "step": 49259 }, { "epoch": 0.8723717940203748, "grad_norm": 0.4637129306793213, "learning_rate": 1.2631090808103468e-06, "loss": 0.0501, "step": 49260 }, { "epoch": 0.8723895035574033, "grad_norm": 0.46820399165153503, "learning_rate": 1.2627635369410107e-06, "loss": 0.0448, "step": 49261 }, { "epoch": 0.8724072130944317, "grad_norm": 1.0078030824661255, "learning_rate": 1.2624180382657947e-06, "loss": 0.048, "step": 49262 }, { "epoch": 0.8724249226314601, "grad_norm": 0.29642143845558167, "learning_rate": 1.2620725847858361e-06, "loss": 0.0478, "step": 49263 }, { "epoch": 0.8724426321684885, "grad_norm": 0.6598559021949768, "learning_rate": 1.2617271765022675e-06, "loss": 0.0768, "step": 49264 }, { "epoch": 0.872460341705517, "grad_norm": 0.8650665283203125, "learning_rate": 1.2613818134162264e-06, "loss": 0.0595, "step": 49265 }, { "epoch": 0.8724780512425454, "grad_norm": 0.6343779563903809, "learning_rate": 1.2610364955288484e-06, "loss": 0.062, "step": 49266 }, { "epoch": 0.8724957607795738, "grad_norm": 0.6842278242111206, "learning_rate": 1.260691222841271e-06, "loss": 0.0667, "step": 49267 }, { "epoch": 0.8725134703166022, "grad_norm": 0.38404059410095215, "learning_rate": 1.2603459953546314e-06, "loss": 0.0398, "step": 49268 }, { "epoch": 0.8725311798536307, "grad_norm": 0.33961936831474304, "learning_rate": 1.2600008130700624e-06, "loss": 0.0426, "step": 49269 }, { "epoch": 0.8725488893906591, "grad_norm": 0.8198398947715759, "learning_rate": 1.2596556759887045e-06, "loss": 0.0575, "step": 49270 }, { "epoch": 0.8725665989276875, "grad_norm": 0.4939589202404022, "learning_rate": 1.2593105841116853e-06, "loss": 0.041, "step": 49271 }, { "epoch": 0.8725843084647159, "grad_norm": 0.6017708778381348, "learning_rate": 1.2589655374401454e-06, "loss": 0.0496, "step": 49272 }, { "epoch": 0.8726020180017444, "grad_norm": 1.1094847917556763, "learning_rate": 1.2586205359752205e-06, "loss": 0.0517, "step": 49273 }, { "epoch": 0.8726197275387728, "grad_norm": 0.4137813448905945, "learning_rate": 1.258275579718045e-06, "loss": 0.0589, "step": 49274 }, { "epoch": 0.8726374370758012, "grad_norm": 0.9138281345367432, "learning_rate": 1.2579306686697512e-06, "loss": 0.084, "step": 49275 }, { "epoch": 0.8726551466128297, "grad_norm": 0.4987969994544983, "learning_rate": 1.2575858028314763e-06, "loss": 0.0418, "step": 49276 }, { "epoch": 0.8726728561498581, "grad_norm": 0.31400758028030396, "learning_rate": 1.2572409822043529e-06, "loss": 0.0683, "step": 49277 }, { "epoch": 0.8726905656868865, "grad_norm": 0.19889378547668457, "learning_rate": 1.2568962067895169e-06, "loss": 0.0433, "step": 49278 }, { "epoch": 0.8727082752239149, "grad_norm": 0.5858679413795471, "learning_rate": 1.256551476588102e-06, "loss": 0.0586, "step": 49279 }, { "epoch": 0.8727259847609434, "grad_norm": 0.31995564699172974, "learning_rate": 1.2562067916012427e-06, "loss": 0.0378, "step": 49280 }, { "epoch": 0.8727436942979718, "grad_norm": 0.5640361905097961, "learning_rate": 1.2558621518300761e-06, "loss": 0.0392, "step": 49281 }, { "epoch": 0.8727614038350002, "grad_norm": 0.3144196569919586, "learning_rate": 1.25551755727573e-06, "loss": 0.0333, "step": 49282 }, { "epoch": 0.8727791133720286, "grad_norm": 1.2923177480697632, "learning_rate": 1.2551730079393414e-06, "loss": 0.0625, "step": 49283 }, { "epoch": 0.8727968229090571, "grad_norm": 0.9118556380271912, "learning_rate": 1.2548285038220463e-06, "loss": 0.0686, "step": 49284 }, { "epoch": 0.8728145324460855, "grad_norm": 0.7247717976570129, "learning_rate": 1.254484044924969e-06, "loss": 0.082, "step": 49285 }, { "epoch": 0.8728322419831139, "grad_norm": 0.40019741654396057, "learning_rate": 1.2541396312492514e-06, "loss": 0.0625, "step": 49286 }, { "epoch": 0.8728499515201423, "grad_norm": 0.48766380548477173, "learning_rate": 1.2537952627960248e-06, "loss": 0.0787, "step": 49287 }, { "epoch": 0.8728676610571708, "grad_norm": 0.41720911860466003, "learning_rate": 1.2534509395664244e-06, "loss": 0.0268, "step": 49288 }, { "epoch": 0.8728853705941992, "grad_norm": 0.5668118596076965, "learning_rate": 1.2531066615615765e-06, "loss": 0.0508, "step": 49289 }, { "epoch": 0.8729030801312276, "grad_norm": 0.3526695668697357, "learning_rate": 1.2527624287826184e-06, "loss": 0.0455, "step": 49290 }, { "epoch": 0.8729207896682561, "grad_norm": 0.5340072512626648, "learning_rate": 1.2524182412306806e-06, "loss": 0.0445, "step": 49291 }, { "epoch": 0.8729384992052845, "grad_norm": 1.2479835748672485, "learning_rate": 1.252074098906899e-06, "loss": 0.1186, "step": 49292 }, { "epoch": 0.872956208742313, "grad_norm": 0.464728981256485, "learning_rate": 1.251730001812401e-06, "loss": 0.0677, "step": 49293 }, { "epoch": 0.8729739182793413, "grad_norm": 0.3130306899547577, "learning_rate": 1.2513859499483194e-06, "loss": 0.0409, "step": 49294 }, { "epoch": 0.8729916278163699, "grad_norm": 0.2729790210723877, "learning_rate": 1.2510419433157893e-06, "loss": 0.0569, "step": 49295 }, { "epoch": 0.8730093373533983, "grad_norm": 0.5415177941322327, "learning_rate": 1.2506979819159386e-06, "loss": 0.0614, "step": 49296 }, { "epoch": 0.8730270468904267, "grad_norm": 0.48057109117507935, "learning_rate": 1.2503540657499029e-06, "loss": 0.0574, "step": 49297 }, { "epoch": 0.873044756427455, "grad_norm": 0.480709969997406, "learning_rate": 1.2500101948188098e-06, "loss": 0.0557, "step": 49298 }, { "epoch": 0.8730624659644836, "grad_norm": 0.557595431804657, "learning_rate": 1.2496663691237964e-06, "loss": 0.0551, "step": 49299 }, { "epoch": 0.873080175501512, "grad_norm": 0.6940827369689941, "learning_rate": 1.249322588665987e-06, "loss": 0.0901, "step": 49300 }, { "epoch": 0.8730978850385404, "grad_norm": 0.5399059057235718, "learning_rate": 1.2489788534465141e-06, "loss": 0.0461, "step": 49301 }, { "epoch": 0.8731155945755688, "grad_norm": 0.7067180275917053, "learning_rate": 1.2486351634665133e-06, "loss": 0.0436, "step": 49302 }, { "epoch": 0.8731333041125973, "grad_norm": 0.725567102432251, "learning_rate": 1.2482915187271104e-06, "loss": 0.0451, "step": 49303 }, { "epoch": 0.8731510136496257, "grad_norm": 0.47239312529563904, "learning_rate": 1.2479479192294363e-06, "loss": 0.0464, "step": 49304 }, { "epoch": 0.8731687231866541, "grad_norm": 0.7879846096038818, "learning_rate": 1.2476043649746233e-06, "loss": 0.0828, "step": 49305 }, { "epoch": 0.8731864327236826, "grad_norm": 0.7289179563522339, "learning_rate": 1.2472608559638006e-06, "loss": 0.0454, "step": 49306 }, { "epoch": 0.873204142260711, "grad_norm": 0.5080369114875793, "learning_rate": 1.2469173921980986e-06, "loss": 0.0686, "step": 49307 }, { "epoch": 0.8732218517977394, "grad_norm": 0.5726696252822876, "learning_rate": 1.2465739736786485e-06, "loss": 0.0349, "step": 49308 }, { "epoch": 0.8732395613347678, "grad_norm": 0.7548289895057678, "learning_rate": 1.246230600406581e-06, "loss": 0.0546, "step": 49309 }, { "epoch": 0.8732572708717963, "grad_norm": 0.33121350407600403, "learning_rate": 1.24588727238302e-06, "loss": 0.0296, "step": 49310 }, { "epoch": 0.8732749804088247, "grad_norm": 0.3688454329967499, "learning_rate": 1.2455439896090997e-06, "loss": 0.0272, "step": 49311 }, { "epoch": 0.8732926899458531, "grad_norm": 0.6910992860794067, "learning_rate": 1.2452007520859477e-06, "loss": 0.0508, "step": 49312 }, { "epoch": 0.8733103994828815, "grad_norm": 0.6659531593322754, "learning_rate": 1.2448575598146977e-06, "loss": 0.0549, "step": 49313 }, { "epoch": 0.87332810901991, "grad_norm": 0.7766291499137878, "learning_rate": 1.2445144127964692e-06, "loss": 0.0542, "step": 49314 }, { "epoch": 0.8733458185569384, "grad_norm": 0.3731304407119751, "learning_rate": 1.2441713110323993e-06, "loss": 0.0576, "step": 49315 }, { "epoch": 0.8733635280939668, "grad_norm": 0.5035558342933655, "learning_rate": 1.2438282545236174e-06, "loss": 0.0574, "step": 49316 }, { "epoch": 0.8733812376309952, "grad_norm": 0.8144959211349487, "learning_rate": 1.2434852432712475e-06, "loss": 0.0722, "step": 49317 }, { "epoch": 0.8733989471680237, "grad_norm": 0.8570848703384399, "learning_rate": 1.2431422772764184e-06, "loss": 0.0815, "step": 49318 }, { "epoch": 0.8734166567050521, "grad_norm": 0.5118753910064697, "learning_rate": 1.2427993565402613e-06, "loss": 0.04, "step": 49319 }, { "epoch": 0.8734343662420805, "grad_norm": 0.7840330004692078, "learning_rate": 1.2424564810639033e-06, "loss": 0.0929, "step": 49320 }, { "epoch": 0.873452075779109, "grad_norm": 0.38617536425590515, "learning_rate": 1.2421136508484722e-06, "loss": 0.0371, "step": 49321 }, { "epoch": 0.8734697853161374, "grad_norm": 0.7540550827980042, "learning_rate": 1.2417708658950932e-06, "loss": 0.0523, "step": 49322 }, { "epoch": 0.8734874948531658, "grad_norm": 0.5395162105560303, "learning_rate": 1.2414281262048977e-06, "loss": 0.0441, "step": 49323 }, { "epoch": 0.8735052043901942, "grad_norm": 0.6787984371185303, "learning_rate": 1.241085431779011e-06, "loss": 0.0485, "step": 49324 }, { "epoch": 0.8735229139272227, "grad_norm": 0.4783535301685333, "learning_rate": 1.2407427826185623e-06, "loss": 0.0428, "step": 49325 }, { "epoch": 0.8735406234642511, "grad_norm": 0.6898055076599121, "learning_rate": 1.2404001787246778e-06, "loss": 0.0411, "step": 49326 }, { "epoch": 0.8735583330012795, "grad_norm": 0.8335933685302734, "learning_rate": 1.2400576200984892e-06, "loss": 0.0825, "step": 49327 }, { "epoch": 0.8735760425383079, "grad_norm": 1.0147345066070557, "learning_rate": 1.2397151067411144e-06, "loss": 0.0602, "step": 49328 }, { "epoch": 0.8735937520753364, "grad_norm": 0.4257757067680359, "learning_rate": 1.2393726386536875e-06, "loss": 0.0358, "step": 49329 }, { "epoch": 0.8736114616123648, "grad_norm": 0.7806325554847717, "learning_rate": 1.2390302158373308e-06, "loss": 0.0504, "step": 49330 }, { "epoch": 0.8736291711493932, "grad_norm": 0.6652657389640808, "learning_rate": 1.2386878382931766e-06, "loss": 0.0821, "step": 49331 }, { "epoch": 0.8736468806864216, "grad_norm": 0.7009015083312988, "learning_rate": 1.2383455060223443e-06, "loss": 0.0619, "step": 49332 }, { "epoch": 0.8736645902234501, "grad_norm": 0.3571327030658722, "learning_rate": 1.2380032190259643e-06, "loss": 0.0381, "step": 49333 }, { "epoch": 0.8736822997604785, "grad_norm": 1.0111021995544434, "learning_rate": 1.2376609773051612e-06, "loss": 0.0881, "step": 49334 }, { "epoch": 0.8737000092975069, "grad_norm": 0.4208889305591583, "learning_rate": 1.2373187808610604e-06, "loss": 0.0507, "step": 49335 }, { "epoch": 0.8737177188345354, "grad_norm": 0.8114514946937561, "learning_rate": 1.2369766296947894e-06, "loss": 0.0538, "step": 49336 }, { "epoch": 0.8737354283715638, "grad_norm": 0.5804482102394104, "learning_rate": 1.2366345238074739e-06, "loss": 0.0512, "step": 49337 }, { "epoch": 0.8737531379085922, "grad_norm": 0.21087709069252014, "learning_rate": 1.2362924632002416e-06, "loss": 0.0393, "step": 49338 }, { "epoch": 0.8737708474456206, "grad_norm": 0.6645746827125549, "learning_rate": 1.235950447874211e-06, "loss": 0.044, "step": 49339 }, { "epoch": 0.8737885569826491, "grad_norm": 0.5341893434524536, "learning_rate": 1.2356084778305122e-06, "loss": 0.0373, "step": 49340 }, { "epoch": 0.8738062665196775, "grad_norm": 0.3842235803604126, "learning_rate": 1.2352665530702717e-06, "loss": 0.0824, "step": 49341 }, { "epoch": 0.8738239760567059, "grad_norm": 0.5294696092605591, "learning_rate": 1.2349246735946106e-06, "loss": 0.0693, "step": 49342 }, { "epoch": 0.8738416855937343, "grad_norm": 0.5866441130638123, "learning_rate": 1.2345828394046515e-06, "loss": 0.0637, "step": 49343 }, { "epoch": 0.8738593951307628, "grad_norm": 0.4333319664001465, "learning_rate": 1.234241050501525e-06, "loss": 0.0447, "step": 49344 }, { "epoch": 0.8738771046677912, "grad_norm": 0.09763628989458084, "learning_rate": 1.2338993068863552e-06, "loss": 0.0241, "step": 49345 }, { "epoch": 0.8738948142048196, "grad_norm": 0.5640971660614014, "learning_rate": 1.2335576085602628e-06, "loss": 0.0345, "step": 49346 }, { "epoch": 0.873912523741848, "grad_norm": 0.8019703030586243, "learning_rate": 1.2332159555243721e-06, "loss": 0.0446, "step": 49347 }, { "epoch": 0.8739302332788765, "grad_norm": 0.33970436453819275, "learning_rate": 1.232874347779812e-06, "loss": 0.0455, "step": 49348 }, { "epoch": 0.8739479428159049, "grad_norm": 0.6982502937316895, "learning_rate": 1.2325327853277019e-06, "loss": 0.0462, "step": 49349 }, { "epoch": 0.8739656523529333, "grad_norm": 0.25274017453193665, "learning_rate": 1.2321912681691638e-06, "loss": 0.0236, "step": 49350 }, { "epoch": 0.8739833618899618, "grad_norm": 0.8115518689155579, "learning_rate": 1.2318497963053255e-06, "loss": 0.0509, "step": 49351 }, { "epoch": 0.8740010714269902, "grad_norm": 0.8531124591827393, "learning_rate": 1.2315083697373076e-06, "loss": 0.0514, "step": 49352 }, { "epoch": 0.8740187809640186, "grad_norm": 0.6834799647331238, "learning_rate": 1.2311669884662358e-06, "loss": 0.044, "step": 49353 }, { "epoch": 0.874036490501047, "grad_norm": 0.6589295268058777, "learning_rate": 1.230825652493231e-06, "loss": 0.0496, "step": 49354 }, { "epoch": 0.8740542000380755, "grad_norm": 0.6104035973548889, "learning_rate": 1.2304843618194206e-06, "loss": 0.0335, "step": 49355 }, { "epoch": 0.874071909575104, "grad_norm": 1.0647987127304077, "learning_rate": 1.2301431164459203e-06, "loss": 0.0695, "step": 49356 }, { "epoch": 0.8740896191121323, "grad_norm": 0.669093132019043, "learning_rate": 1.229801916373856e-06, "loss": 0.1077, "step": 49357 }, { "epoch": 0.8741073286491607, "grad_norm": 0.6067035794258118, "learning_rate": 1.2294607616043518e-06, "loss": 0.0564, "step": 49358 }, { "epoch": 0.8741250381861893, "grad_norm": 0.5183969736099243, "learning_rate": 1.2291196521385318e-06, "loss": 0.0585, "step": 49359 }, { "epoch": 0.8741427477232177, "grad_norm": 0.591170608997345, "learning_rate": 1.2287785879775115e-06, "loss": 0.0362, "step": 49360 }, { "epoch": 0.874160457260246, "grad_norm": 0.5811934471130371, "learning_rate": 1.2284375691224187e-06, "loss": 0.0715, "step": 49361 }, { "epoch": 0.8741781667972744, "grad_norm": 0.45638036727905273, "learning_rate": 1.2280965955743707e-06, "loss": 0.0397, "step": 49362 }, { "epoch": 0.874195876334303, "grad_norm": 0.42408886551856995, "learning_rate": 1.2277556673344947e-06, "loss": 0.0544, "step": 49363 }, { "epoch": 0.8742135858713314, "grad_norm": 0.810518205165863, "learning_rate": 1.227414784403907e-06, "loss": 0.0505, "step": 49364 }, { "epoch": 0.8742312954083598, "grad_norm": 0.7378600835800171, "learning_rate": 1.2270739467837327e-06, "loss": 0.0658, "step": 49365 }, { "epoch": 0.8742490049453883, "grad_norm": 0.36324450373649597, "learning_rate": 1.2267331544750949e-06, "loss": 0.0384, "step": 49366 }, { "epoch": 0.8742667144824167, "grad_norm": 0.6550083160400391, "learning_rate": 1.2263924074791105e-06, "loss": 0.0381, "step": 49367 }, { "epoch": 0.8742844240194451, "grad_norm": 0.3260888457298279, "learning_rate": 1.2260517057969007e-06, "loss": 0.0426, "step": 49368 }, { "epoch": 0.8743021335564735, "grad_norm": 1.0502517223358154, "learning_rate": 1.2257110494295875e-06, "loss": 0.0607, "step": 49369 }, { "epoch": 0.874319843093502, "grad_norm": 0.6550854444503784, "learning_rate": 1.2253704383782937e-06, "loss": 0.0551, "step": 49370 }, { "epoch": 0.8743375526305304, "grad_norm": 0.607870876789093, "learning_rate": 1.2250298726441367e-06, "loss": 0.0628, "step": 49371 }, { "epoch": 0.8743552621675588, "grad_norm": 0.392079621553421, "learning_rate": 1.2246893522282354e-06, "loss": 0.074, "step": 49372 }, { "epoch": 0.8743729717045872, "grad_norm": 0.5477204918861389, "learning_rate": 1.2243488771317175e-06, "loss": 0.0611, "step": 49373 }, { "epoch": 0.8743906812416157, "grad_norm": 0.8042119145393372, "learning_rate": 1.224008447355695e-06, "loss": 0.0958, "step": 49374 }, { "epoch": 0.8744083907786441, "grad_norm": 0.49659448862075806, "learning_rate": 1.2236680629012926e-06, "loss": 0.0454, "step": 49375 }, { "epoch": 0.8744261003156725, "grad_norm": 0.7222580313682556, "learning_rate": 1.2233277237696288e-06, "loss": 0.0504, "step": 49376 }, { "epoch": 0.8744438098527009, "grad_norm": 0.719184398651123, "learning_rate": 1.2229874299618265e-06, "loss": 0.0673, "step": 49377 }, { "epoch": 0.8744615193897294, "grad_norm": 0.45048987865448, "learning_rate": 1.222647181478998e-06, "loss": 0.0495, "step": 49378 }, { "epoch": 0.8744792289267578, "grad_norm": 0.3031378984451294, "learning_rate": 1.2223069783222673e-06, "loss": 0.033, "step": 49379 }, { "epoch": 0.8744969384637862, "grad_norm": 0.6131839156150818, "learning_rate": 1.2219668204927536e-06, "loss": 0.0522, "step": 49380 }, { "epoch": 0.8745146480008147, "grad_norm": 0.5736173987388611, "learning_rate": 1.221626707991576e-06, "loss": 0.0453, "step": 49381 }, { "epoch": 0.8745323575378431, "grad_norm": 0.39791548252105713, "learning_rate": 1.221286640819852e-06, "loss": 0.0691, "step": 49382 }, { "epoch": 0.8745500670748715, "grad_norm": 0.749756395816803, "learning_rate": 1.2209466189787023e-06, "loss": 0.0508, "step": 49383 }, { "epoch": 0.8745677766118999, "grad_norm": 0.6673351526260376, "learning_rate": 1.2206066424692458e-06, "loss": 0.0799, "step": 49384 }, { "epoch": 0.8745854861489284, "grad_norm": 0.25158634781837463, "learning_rate": 1.2202667112925987e-06, "loss": 0.0344, "step": 49385 }, { "epoch": 0.8746031956859568, "grad_norm": 1.3207707405090332, "learning_rate": 1.219926825449878e-06, "loss": 0.0695, "step": 49386 }, { "epoch": 0.8746209052229852, "grad_norm": 0.7082471251487732, "learning_rate": 1.2195869849422097e-06, "loss": 0.0698, "step": 49387 }, { "epoch": 0.8746386147600136, "grad_norm": 0.4487445056438446, "learning_rate": 1.2192471897707014e-06, "loss": 0.0408, "step": 49388 }, { "epoch": 0.8746563242970421, "grad_norm": 0.8330845832824707, "learning_rate": 1.218907439936477e-06, "loss": 0.0962, "step": 49389 }, { "epoch": 0.8746740338340705, "grad_norm": 0.547898530960083, "learning_rate": 1.2185677354406539e-06, "loss": 0.0529, "step": 49390 }, { "epoch": 0.8746917433710989, "grad_norm": 0.5875333547592163, "learning_rate": 1.218228076284348e-06, "loss": 0.0431, "step": 49391 }, { "epoch": 0.8747094529081273, "grad_norm": 0.5600684881210327, "learning_rate": 1.217888462468678e-06, "loss": 0.0607, "step": 49392 }, { "epoch": 0.8747271624451558, "grad_norm": 0.43815183639526367, "learning_rate": 1.217548893994762e-06, "loss": 0.0324, "step": 49393 }, { "epoch": 0.8747448719821842, "grad_norm": 0.5478640198707581, "learning_rate": 1.2172093708637155e-06, "loss": 0.0466, "step": 49394 }, { "epoch": 0.8747625815192126, "grad_norm": 0.715289831161499, "learning_rate": 1.2168698930766574e-06, "loss": 0.0574, "step": 49395 }, { "epoch": 0.8747802910562411, "grad_norm": 0.5763295888900757, "learning_rate": 1.2165304606347022e-06, "loss": 0.0404, "step": 49396 }, { "epoch": 0.8747980005932695, "grad_norm": 1.1045974493026733, "learning_rate": 1.2161910735389669e-06, "loss": 0.0472, "step": 49397 }, { "epoch": 0.8748157101302979, "grad_norm": 0.19800466299057007, "learning_rate": 1.2158517317905727e-06, "loss": 0.0815, "step": 49398 }, { "epoch": 0.8748334196673263, "grad_norm": 0.6530094146728516, "learning_rate": 1.2155124353906283e-06, "loss": 0.0372, "step": 49399 }, { "epoch": 0.8748511292043548, "grad_norm": 0.673721194267273, "learning_rate": 1.2151731843402548e-06, "loss": 0.0575, "step": 49400 }, { "epoch": 0.8748688387413832, "grad_norm": 0.3913068175315857, "learning_rate": 1.2148339786405627e-06, "loss": 0.0328, "step": 49401 }, { "epoch": 0.8748865482784116, "grad_norm": 0.6945575475692749, "learning_rate": 1.2144948182926796e-06, "loss": 0.0562, "step": 49402 }, { "epoch": 0.87490425781544, "grad_norm": 0.5992605686187744, "learning_rate": 1.2141557032977112e-06, "loss": 0.0297, "step": 49403 }, { "epoch": 0.8749219673524685, "grad_norm": 0.3448624908924103, "learning_rate": 1.2138166336567752e-06, "loss": 0.0572, "step": 49404 }, { "epoch": 0.8749396768894969, "grad_norm": 0.3621004521846771, "learning_rate": 1.213477609370992e-06, "loss": 0.037, "step": 49405 }, { "epoch": 0.8749573864265253, "grad_norm": 1.7055913209915161, "learning_rate": 1.2131386304414694e-06, "loss": 0.1162, "step": 49406 }, { "epoch": 0.8749750959635537, "grad_norm": 0.5306032299995422, "learning_rate": 1.2127996968693277e-06, "loss": 0.0453, "step": 49407 }, { "epoch": 0.8749928055005822, "grad_norm": 0.4516671895980835, "learning_rate": 1.21246080865568e-06, "loss": 0.065, "step": 49408 }, { "epoch": 0.8750105150376106, "grad_norm": 0.3219003975391388, "learning_rate": 1.2121219658016413e-06, "loss": 0.0471, "step": 49409 }, { "epoch": 0.875028224574639, "grad_norm": 0.44851502776145935, "learning_rate": 1.2117831683083263e-06, "loss": 0.0409, "step": 49410 }, { "epoch": 0.8750459341116675, "grad_norm": 0.526457667350769, "learning_rate": 1.2114444161768507e-06, "loss": 0.0592, "step": 49411 }, { "epoch": 0.8750636436486959, "grad_norm": 0.6159680485725403, "learning_rate": 1.21110570940833e-06, "loss": 0.058, "step": 49412 }, { "epoch": 0.8750813531857243, "grad_norm": 0.4860653281211853, "learning_rate": 1.2107670480038752e-06, "loss": 0.059, "step": 49413 }, { "epoch": 0.8750990627227527, "grad_norm": 0.6752944588661194, "learning_rate": 1.210428431964602e-06, "loss": 0.0717, "step": 49414 }, { "epoch": 0.8751167722597812, "grad_norm": 0.44371065497398376, "learning_rate": 1.2100898612916244e-06, "loss": 0.0504, "step": 49415 }, { "epoch": 0.8751344817968096, "grad_norm": 0.334734171628952, "learning_rate": 1.20975133598606e-06, "loss": 0.0308, "step": 49416 }, { "epoch": 0.875152191333838, "grad_norm": 0.8082041144371033, "learning_rate": 1.2094128560490147e-06, "loss": 0.0457, "step": 49417 }, { "epoch": 0.8751699008708664, "grad_norm": 0.3513374328613281, "learning_rate": 1.2090744214816073e-06, "loss": 0.0346, "step": 49418 }, { "epoch": 0.875187610407895, "grad_norm": 0.3488137125968933, "learning_rate": 1.2087360322849506e-06, "loss": 0.0544, "step": 49419 }, { "epoch": 0.8752053199449233, "grad_norm": 0.3079279661178589, "learning_rate": 1.2083976884601566e-06, "loss": 0.0327, "step": 49420 }, { "epoch": 0.8752230294819517, "grad_norm": 0.5532323122024536, "learning_rate": 1.2080593900083382e-06, "loss": 0.0517, "step": 49421 }, { "epoch": 0.8752407390189803, "grad_norm": 0.5398881435394287, "learning_rate": 1.2077211369306107e-06, "loss": 0.0581, "step": 49422 }, { "epoch": 0.8752584485560087, "grad_norm": 0.5167630910873413, "learning_rate": 1.2073829292280885e-06, "loss": 0.0677, "step": 49423 }, { "epoch": 0.875276158093037, "grad_norm": 0.6155310273170471, "learning_rate": 1.2070447669018792e-06, "loss": 0.0675, "step": 49424 }, { "epoch": 0.8752938676300654, "grad_norm": 0.7413645386695862, "learning_rate": 1.2067066499530966e-06, "loss": 0.0652, "step": 49425 }, { "epoch": 0.875311577167094, "grad_norm": 0.574277937412262, "learning_rate": 1.2063685783828565e-06, "loss": 0.0508, "step": 49426 }, { "epoch": 0.8753292867041224, "grad_norm": 0.5450620055198669, "learning_rate": 1.2060305521922666e-06, "loss": 0.0539, "step": 49427 }, { "epoch": 0.8753469962411508, "grad_norm": 1.0301806926727295, "learning_rate": 1.2056925713824407e-06, "loss": 0.0673, "step": 49428 }, { "epoch": 0.8753647057781792, "grad_norm": 0.48040512204170227, "learning_rate": 1.2053546359544914e-06, "loss": 0.0386, "step": 49429 }, { "epoch": 0.8753824153152077, "grad_norm": 0.3054611086845398, "learning_rate": 1.2050167459095296e-06, "loss": 0.0429, "step": 49430 }, { "epoch": 0.8754001248522361, "grad_norm": 0.9860944151878357, "learning_rate": 1.2046789012486676e-06, "loss": 0.0714, "step": 49431 }, { "epoch": 0.8754178343892645, "grad_norm": 0.4083169102668762, "learning_rate": 1.2043411019730161e-06, "loss": 0.0434, "step": 49432 }, { "epoch": 0.8754355439262929, "grad_norm": 0.6976820826530457, "learning_rate": 1.2040033480836876e-06, "loss": 0.0768, "step": 49433 }, { "epoch": 0.8754532534633214, "grad_norm": 0.6736674904823303, "learning_rate": 1.2036656395817962e-06, "loss": 0.0298, "step": 49434 }, { "epoch": 0.8754709630003498, "grad_norm": 0.5902101993560791, "learning_rate": 1.2033279764684463e-06, "loss": 0.0669, "step": 49435 }, { "epoch": 0.8754886725373782, "grad_norm": 0.6602482795715332, "learning_rate": 1.2029903587447515e-06, "loss": 0.043, "step": 49436 }, { "epoch": 0.8755063820744067, "grad_norm": 0.7133714556694031, "learning_rate": 1.2026527864118246e-06, "loss": 0.0494, "step": 49437 }, { "epoch": 0.8755240916114351, "grad_norm": 0.6966205835342407, "learning_rate": 1.2023152594707715e-06, "loss": 0.0642, "step": 49438 }, { "epoch": 0.8755418011484635, "grad_norm": 1.101453185081482, "learning_rate": 1.2019777779227059e-06, "loss": 0.061, "step": 49439 }, { "epoch": 0.8755595106854919, "grad_norm": 0.5284867286682129, "learning_rate": 1.201640341768739e-06, "loss": 0.0744, "step": 49440 }, { "epoch": 0.8755772202225204, "grad_norm": 0.6393277049064636, "learning_rate": 1.2013029510099828e-06, "loss": 0.0391, "step": 49441 }, { "epoch": 0.8755949297595488, "grad_norm": 0.37814396619796753, "learning_rate": 1.2009656056475415e-06, "loss": 0.0373, "step": 49442 }, { "epoch": 0.8756126392965772, "grad_norm": 0.7343050837516785, "learning_rate": 1.200628305682528e-06, "loss": 0.0668, "step": 49443 }, { "epoch": 0.8756303488336056, "grad_norm": 0.543498158454895, "learning_rate": 1.2002910511160526e-06, "loss": 0.0516, "step": 49444 }, { "epoch": 0.8756480583706341, "grad_norm": 0.7674933075904846, "learning_rate": 1.199953841949223e-06, "loss": 0.0537, "step": 49445 }, { "epoch": 0.8756657679076625, "grad_norm": 0.6532630324363708, "learning_rate": 1.199616678183148e-06, "loss": 0.0655, "step": 49446 }, { "epoch": 0.8756834774446909, "grad_norm": 0.7517073750495911, "learning_rate": 1.1992795598189387e-06, "loss": 0.0555, "step": 49447 }, { "epoch": 0.8757011869817193, "grad_norm": 0.43757885694503784, "learning_rate": 1.1989424868577043e-06, "loss": 0.0396, "step": 49448 }, { "epoch": 0.8757188965187478, "grad_norm": 0.6861650347709656, "learning_rate": 1.1986054593005536e-06, "loss": 0.0468, "step": 49449 }, { "epoch": 0.8757366060557762, "grad_norm": 0.8239896893501282, "learning_rate": 1.198268477148594e-06, "loss": 0.0723, "step": 49450 }, { "epoch": 0.8757543155928046, "grad_norm": 0.7118006348609924, "learning_rate": 1.1979315404029383e-06, "loss": 0.0584, "step": 49451 }, { "epoch": 0.8757720251298331, "grad_norm": 0.4420796036720276, "learning_rate": 1.1975946490646906e-06, "loss": 0.0655, "step": 49452 }, { "epoch": 0.8757897346668615, "grad_norm": 0.7701927423477173, "learning_rate": 1.1972578031349579e-06, "loss": 0.0481, "step": 49453 }, { "epoch": 0.8758074442038899, "grad_norm": 0.41412046551704407, "learning_rate": 1.196921002614853e-06, "loss": 0.0502, "step": 49454 }, { "epoch": 0.8758251537409183, "grad_norm": 0.7988390922546387, "learning_rate": 1.1965842475054833e-06, "loss": 0.0766, "step": 49455 }, { "epoch": 0.8758428632779468, "grad_norm": 0.8753904104232788, "learning_rate": 1.196247537807953e-06, "loss": 0.0549, "step": 49456 }, { "epoch": 0.8758605728149752, "grad_norm": 0.7933498024940491, "learning_rate": 1.1959108735233726e-06, "loss": 0.0676, "step": 49457 }, { "epoch": 0.8758782823520036, "grad_norm": 0.44385650753974915, "learning_rate": 1.1955742546528481e-06, "loss": 0.0423, "step": 49458 }, { "epoch": 0.875895991889032, "grad_norm": 0.8130484819412231, "learning_rate": 1.1952376811974885e-06, "loss": 0.0451, "step": 49459 }, { "epoch": 0.8759137014260605, "grad_norm": 0.40226641297340393, "learning_rate": 1.1949011531584013e-06, "loss": 0.0447, "step": 49460 }, { "epoch": 0.8759314109630889, "grad_norm": 0.4221850037574768, "learning_rate": 1.1945646705366925e-06, "loss": 0.02, "step": 49461 }, { "epoch": 0.8759491205001173, "grad_norm": 0.5867605209350586, "learning_rate": 1.1942282333334726e-06, "loss": 0.0495, "step": 49462 }, { "epoch": 0.8759668300371457, "grad_norm": 0.5047696232795715, "learning_rate": 1.1938918415498424e-06, "loss": 0.0446, "step": 49463 }, { "epoch": 0.8759845395741742, "grad_norm": 0.7718150615692139, "learning_rate": 1.1935554951869126e-06, "loss": 0.0675, "step": 49464 }, { "epoch": 0.8760022491112026, "grad_norm": 0.6622515320777893, "learning_rate": 1.1932191942457876e-06, "loss": 0.0575, "step": 49465 }, { "epoch": 0.876019958648231, "grad_norm": 0.270537793636322, "learning_rate": 1.192882938727578e-06, "loss": 0.0457, "step": 49466 }, { "epoch": 0.8760376681852595, "grad_norm": 0.29422470927238464, "learning_rate": 1.1925467286333813e-06, "loss": 0.0452, "step": 49467 }, { "epoch": 0.8760553777222879, "grad_norm": 0.7787176966667175, "learning_rate": 1.1922105639643132e-06, "loss": 0.0412, "step": 49468 }, { "epoch": 0.8760730872593163, "grad_norm": 0.753287672996521, "learning_rate": 1.1918744447214764e-06, "loss": 0.0697, "step": 49469 }, { "epoch": 0.8760907967963447, "grad_norm": 0.4963335394859314, "learning_rate": 1.1915383709059746e-06, "loss": 0.0646, "step": 49470 }, { "epoch": 0.8761085063333732, "grad_norm": 0.547173798084259, "learning_rate": 1.1912023425189156e-06, "loss": 0.0613, "step": 49471 }, { "epoch": 0.8761262158704016, "grad_norm": 0.3339924216270447, "learning_rate": 1.1908663595614034e-06, "loss": 0.041, "step": 49472 }, { "epoch": 0.87614392540743, "grad_norm": 0.5179367661476135, "learning_rate": 1.190530422034547e-06, "loss": 0.0421, "step": 49473 }, { "epoch": 0.8761616349444584, "grad_norm": 0.5263567566871643, "learning_rate": 1.1901945299394474e-06, "loss": 0.0654, "step": 49474 }, { "epoch": 0.8761793444814869, "grad_norm": 0.4292096495628357, "learning_rate": 1.1898586832772084e-06, "loss": 0.0399, "step": 49475 }, { "epoch": 0.8761970540185153, "grad_norm": 0.46324068307876587, "learning_rate": 1.1895228820489397e-06, "loss": 0.0413, "step": 49476 }, { "epoch": 0.8762147635555437, "grad_norm": 0.7744881510734558, "learning_rate": 1.189187126255743e-06, "loss": 0.051, "step": 49477 }, { "epoch": 0.8762324730925721, "grad_norm": 0.4603841006755829, "learning_rate": 1.188851415898723e-06, "loss": 0.0536, "step": 49478 }, { "epoch": 0.8762501826296006, "grad_norm": 0.34830689430236816, "learning_rate": 1.1885157509789868e-06, "loss": 0.0562, "step": 49479 }, { "epoch": 0.876267892166629, "grad_norm": 0.5099329948425293, "learning_rate": 1.188180131497637e-06, "loss": 0.0514, "step": 49480 }, { "epoch": 0.8762856017036574, "grad_norm": 0.8717197775840759, "learning_rate": 1.1878445574557779e-06, "loss": 0.0546, "step": 49481 }, { "epoch": 0.876303311240686, "grad_norm": 0.545318067073822, "learning_rate": 1.1875090288545116e-06, "loss": 0.062, "step": 49482 }, { "epoch": 0.8763210207777143, "grad_norm": 0.5408758521080017, "learning_rate": 1.1871735456949455e-06, "loss": 0.0349, "step": 49483 }, { "epoch": 0.8763387303147427, "grad_norm": 0.7072839140892029, "learning_rate": 1.1868381079781793e-06, "loss": 0.0486, "step": 49484 }, { "epoch": 0.8763564398517711, "grad_norm": 0.7115569114685059, "learning_rate": 1.1865027157053182e-06, "loss": 0.0521, "step": 49485 }, { "epoch": 0.8763741493887997, "grad_norm": 0.6055009365081787, "learning_rate": 1.1861673688774666e-06, "loss": 0.0508, "step": 49486 }, { "epoch": 0.876391858925828, "grad_norm": 1.4817390441894531, "learning_rate": 1.1858320674957285e-06, "loss": 0.042, "step": 49487 }, { "epoch": 0.8764095684628564, "grad_norm": 0.7887013554573059, "learning_rate": 1.185496811561203e-06, "loss": 0.0477, "step": 49488 }, { "epoch": 0.8764272779998848, "grad_norm": 0.7421310544013977, "learning_rate": 1.1851616010749976e-06, "loss": 0.0676, "step": 49489 }, { "epoch": 0.8764449875369134, "grad_norm": 0.5884192585945129, "learning_rate": 1.184826436038215e-06, "loss": 0.0737, "step": 49490 }, { "epoch": 0.8764626970739418, "grad_norm": 0.7884755730628967, "learning_rate": 1.1844913164519538e-06, "loss": 0.0795, "step": 49491 }, { "epoch": 0.8764804066109702, "grad_norm": 0.6741933226585388, "learning_rate": 1.1841562423173186e-06, "loss": 0.0417, "step": 49492 }, { "epoch": 0.8764981161479986, "grad_norm": 0.6353221535682678, "learning_rate": 1.1838212136354132e-06, "loss": 0.043, "step": 49493 }, { "epoch": 0.8765158256850271, "grad_norm": 0.7014991044998169, "learning_rate": 1.1834862304073403e-06, "loss": 0.0732, "step": 49494 }, { "epoch": 0.8765335352220555, "grad_norm": 0.7922244668006897, "learning_rate": 1.183151292634197e-06, "loss": 0.0607, "step": 49495 }, { "epoch": 0.8765512447590839, "grad_norm": 0.6335809230804443, "learning_rate": 1.182816400317086e-06, "loss": 0.0219, "step": 49496 }, { "epoch": 0.8765689542961124, "grad_norm": 0.5806798934936523, "learning_rate": 1.182481553457118e-06, "loss": 0.0728, "step": 49497 }, { "epoch": 0.8765866638331408, "grad_norm": 0.8134918212890625, "learning_rate": 1.1821467520553842e-06, "loss": 0.0684, "step": 49498 }, { "epoch": 0.8766043733701692, "grad_norm": 0.3653048872947693, "learning_rate": 1.1818119961129897e-06, "loss": 0.0835, "step": 49499 }, { "epoch": 0.8766220829071976, "grad_norm": 0.38687410950660706, "learning_rate": 1.1814772856310373e-06, "loss": 0.0429, "step": 49500 }, { "epoch": 0.8766397924442261, "grad_norm": 0.2873145639896393, "learning_rate": 1.1811426206106297e-06, "loss": 0.0569, "step": 49501 }, { "epoch": 0.8766575019812545, "grad_norm": 0.8515301942825317, "learning_rate": 1.1808080010528622e-06, "loss": 0.0434, "step": 49502 }, { "epoch": 0.8766752115182829, "grad_norm": 0.27120280265808105, "learning_rate": 1.1804734269588374e-06, "loss": 0.046, "step": 49503 }, { "epoch": 0.8766929210553113, "grad_norm": 0.7672659754753113, "learning_rate": 1.180138898329658e-06, "loss": 0.078, "step": 49504 }, { "epoch": 0.8767106305923398, "grad_norm": 0.6201182007789612, "learning_rate": 1.1798044151664245e-06, "loss": 0.0291, "step": 49505 }, { "epoch": 0.8767283401293682, "grad_norm": 0.7883971929550171, "learning_rate": 1.1794699774702362e-06, "loss": 0.0645, "step": 49506 }, { "epoch": 0.8767460496663966, "grad_norm": 0.44177812337875366, "learning_rate": 1.1791355852421937e-06, "loss": 0.0502, "step": 49507 }, { "epoch": 0.876763759203425, "grad_norm": 0.9341632723808289, "learning_rate": 1.1788012384834014e-06, "loss": 0.0708, "step": 49508 }, { "epoch": 0.8767814687404535, "grad_norm": 0.5401288270950317, "learning_rate": 1.1784669371949498e-06, "loss": 0.0618, "step": 49509 }, { "epoch": 0.8767991782774819, "grad_norm": 0.46010133624076843, "learning_rate": 1.1781326813779464e-06, "loss": 0.0584, "step": 49510 }, { "epoch": 0.8768168878145103, "grad_norm": 0.4958053529262543, "learning_rate": 1.177798471033487e-06, "loss": 0.0707, "step": 49511 }, { "epoch": 0.8768345973515388, "grad_norm": 0.7258785367012024, "learning_rate": 1.1774643061626761e-06, "loss": 0.0473, "step": 49512 }, { "epoch": 0.8768523068885672, "grad_norm": 0.714436411857605, "learning_rate": 1.1771301867666056e-06, "loss": 0.0229, "step": 49513 }, { "epoch": 0.8768700164255956, "grad_norm": 0.5718416571617126, "learning_rate": 1.17679611284638e-06, "loss": 0.0502, "step": 49514 }, { "epoch": 0.876887725962624, "grad_norm": 0.7560504674911499, "learning_rate": 1.1764620844030966e-06, "loss": 0.0782, "step": 49515 }, { "epoch": 0.8769054354996525, "grad_norm": 0.8778979778289795, "learning_rate": 1.1761281014378544e-06, "loss": 0.0909, "step": 49516 }, { "epoch": 0.8769231450366809, "grad_norm": 0.5346837043762207, "learning_rate": 1.1757941639517528e-06, "loss": 0.054, "step": 49517 }, { "epoch": 0.8769408545737093, "grad_norm": 0.5013761520385742, "learning_rate": 1.1754602719458906e-06, "loss": 0.0526, "step": 49518 }, { "epoch": 0.8769585641107377, "grad_norm": 0.7223983407020569, "learning_rate": 1.1751264254213688e-06, "loss": 0.0656, "step": 49519 }, { "epoch": 0.8769762736477662, "grad_norm": 0.5718377828598022, "learning_rate": 1.1747926243792784e-06, "loss": 0.0335, "step": 49520 }, { "epoch": 0.8769939831847946, "grad_norm": 0.5231053829193115, "learning_rate": 1.174458868820723e-06, "loss": 0.0662, "step": 49521 }, { "epoch": 0.877011692721823, "grad_norm": 0.6793099641799927, "learning_rate": 1.1741251587468022e-06, "loss": 0.0663, "step": 49522 }, { "epoch": 0.8770294022588514, "grad_norm": 0.9979166984558105, "learning_rate": 1.1737914941586098e-06, "loss": 0.0392, "step": 49523 }, { "epoch": 0.8770471117958799, "grad_norm": 0.3160504102706909, "learning_rate": 1.1734578750572434e-06, "loss": 0.0331, "step": 49524 }, { "epoch": 0.8770648213329083, "grad_norm": 0.5199565291404724, "learning_rate": 1.1731243014437987e-06, "loss": 0.0481, "step": 49525 }, { "epoch": 0.8770825308699367, "grad_norm": 0.7532728910446167, "learning_rate": 1.1727907733193832e-06, "loss": 0.0708, "step": 49526 }, { "epoch": 0.8771002404069652, "grad_norm": 0.8668725490570068, "learning_rate": 1.1724572906850843e-06, "loss": 0.0682, "step": 49527 }, { "epoch": 0.8771179499439936, "grad_norm": 0.34185731410980225, "learning_rate": 1.172123853542003e-06, "loss": 0.0493, "step": 49528 }, { "epoch": 0.877135659481022, "grad_norm": 0.4245266020298004, "learning_rate": 1.1717904618912383e-06, "loss": 0.0314, "step": 49529 }, { "epoch": 0.8771533690180504, "grad_norm": 0.5728054642677307, "learning_rate": 1.171457115733881e-06, "loss": 0.0619, "step": 49530 }, { "epoch": 0.8771710785550789, "grad_norm": 0.9009456038475037, "learning_rate": 1.1711238150710317e-06, "loss": 0.0944, "step": 49531 }, { "epoch": 0.8771887880921073, "grad_norm": 0.8055624961853027, "learning_rate": 1.1707905599037865e-06, "loss": 0.0384, "step": 49532 }, { "epoch": 0.8772064976291357, "grad_norm": 0.5039240121841431, "learning_rate": 1.170457350233241e-06, "loss": 0.0482, "step": 49533 }, { "epoch": 0.8772242071661641, "grad_norm": 0.5601271986961365, "learning_rate": 1.1701241860604929e-06, "loss": 0.0509, "step": 49534 }, { "epoch": 0.8772419167031926, "grad_norm": 0.5880477428436279, "learning_rate": 1.1697910673866357e-06, "loss": 0.0548, "step": 49535 }, { "epoch": 0.877259626240221, "grad_norm": 0.38341209292411804, "learning_rate": 1.1694579942127675e-06, "loss": 0.0291, "step": 49536 }, { "epoch": 0.8772773357772494, "grad_norm": 0.6731241345405579, "learning_rate": 1.1691249665399872e-06, "loss": 0.0786, "step": 49537 }, { "epoch": 0.8772950453142778, "grad_norm": 0.5536233186721802, "learning_rate": 1.168791984369384e-06, "loss": 0.0587, "step": 49538 }, { "epoch": 0.8773127548513063, "grad_norm": 0.4080040454864502, "learning_rate": 1.1684590477020568e-06, "loss": 0.0448, "step": 49539 }, { "epoch": 0.8773304643883347, "grad_norm": 0.5476311445236206, "learning_rate": 1.1681261565391016e-06, "loss": 0.0652, "step": 49540 }, { "epoch": 0.8773481739253631, "grad_norm": 0.46577388048171997, "learning_rate": 1.1677933108816109e-06, "loss": 0.0432, "step": 49541 }, { "epoch": 0.8773658834623916, "grad_norm": 0.5623698234558105, "learning_rate": 1.1674605107306802e-06, "loss": 0.0486, "step": 49542 }, { "epoch": 0.87738359299942, "grad_norm": 0.6254859566688538, "learning_rate": 1.1671277560874054e-06, "loss": 0.0562, "step": 49543 }, { "epoch": 0.8774013025364484, "grad_norm": 0.47694388031959534, "learning_rate": 1.1667950469528827e-06, "loss": 0.0542, "step": 49544 }, { "epoch": 0.8774190120734768, "grad_norm": 0.37111228704452515, "learning_rate": 1.166462383328204e-06, "loss": 0.0441, "step": 49545 }, { "epoch": 0.8774367216105053, "grad_norm": 0.5333617925643921, "learning_rate": 1.1661297652144654e-06, "loss": 0.0459, "step": 49546 }, { "epoch": 0.8774544311475337, "grad_norm": 0.503756582736969, "learning_rate": 1.1657971926127626e-06, "loss": 0.0427, "step": 49547 }, { "epoch": 0.8774721406845621, "grad_norm": 0.3700774610042572, "learning_rate": 1.1654646655241846e-06, "loss": 0.0471, "step": 49548 }, { "epoch": 0.8774898502215905, "grad_norm": 1.0255764722824097, "learning_rate": 1.1651321839498291e-06, "loss": 0.0602, "step": 49549 }, { "epoch": 0.877507559758619, "grad_norm": 0.25373876094818115, "learning_rate": 1.1647997478907901e-06, "loss": 0.0614, "step": 49550 }, { "epoch": 0.8775252692956474, "grad_norm": 0.49394142627716064, "learning_rate": 1.1644673573481618e-06, "loss": 0.0537, "step": 49551 }, { "epoch": 0.8775429788326758, "grad_norm": 0.4645463824272156, "learning_rate": 1.1641350123230348e-06, "loss": 0.064, "step": 49552 }, { "epoch": 0.8775606883697042, "grad_norm": 0.37269702553749084, "learning_rate": 1.1638027128165051e-06, "loss": 0.0285, "step": 49553 }, { "epoch": 0.8775783979067328, "grad_norm": 0.3022450804710388, "learning_rate": 1.1634704588296634e-06, "loss": 0.0262, "step": 49554 }, { "epoch": 0.8775961074437612, "grad_norm": 0.4448876678943634, "learning_rate": 1.1631382503636056e-06, "loss": 0.0663, "step": 49555 }, { "epoch": 0.8776138169807896, "grad_norm": 0.7119043469429016, "learning_rate": 1.1628060874194224e-06, "loss": 0.0508, "step": 49556 }, { "epoch": 0.8776315265178181, "grad_norm": 0.9048237204551697, "learning_rate": 1.1624739699982095e-06, "loss": 0.0649, "step": 49557 }, { "epoch": 0.8776492360548465, "grad_norm": 0.5307517647743225, "learning_rate": 1.1621418981010578e-06, "loss": 0.0422, "step": 49558 }, { "epoch": 0.8776669455918749, "grad_norm": 0.49245721101760864, "learning_rate": 1.1618098717290582e-06, "loss": 0.0573, "step": 49559 }, { "epoch": 0.8776846551289033, "grad_norm": 0.8973622918128967, "learning_rate": 1.1614778908833047e-06, "loss": 0.049, "step": 49560 }, { "epoch": 0.8777023646659318, "grad_norm": 0.7321548461914062, "learning_rate": 1.161145955564893e-06, "loss": 0.0495, "step": 49561 }, { "epoch": 0.8777200742029602, "grad_norm": 0.4393012821674347, "learning_rate": 1.160814065774904e-06, "loss": 0.0401, "step": 49562 }, { "epoch": 0.8777377837399886, "grad_norm": 0.7374226450920105, "learning_rate": 1.16048222151444e-06, "loss": 0.051, "step": 49563 }, { "epoch": 0.877755493277017, "grad_norm": 0.4926578998565674, "learning_rate": 1.1601504227845906e-06, "loss": 0.0462, "step": 49564 }, { "epoch": 0.8777732028140455, "grad_norm": 0.5345960259437561, "learning_rate": 1.1598186695864494e-06, "loss": 0.0561, "step": 49565 }, { "epoch": 0.8777909123510739, "grad_norm": 0.6929172277450562, "learning_rate": 1.1594869619211008e-06, "loss": 0.0719, "step": 49566 }, { "epoch": 0.8778086218881023, "grad_norm": 0.5339841842651367, "learning_rate": 1.1591552997896403e-06, "loss": 0.0467, "step": 49567 }, { "epoch": 0.8778263314251307, "grad_norm": 0.4200216233730316, "learning_rate": 1.1588236831931609e-06, "loss": 0.0452, "step": 49568 }, { "epoch": 0.8778440409621592, "grad_norm": 1.0004377365112305, "learning_rate": 1.158492112132753e-06, "loss": 0.072, "step": 49569 }, { "epoch": 0.8778617504991876, "grad_norm": 0.23907119035720825, "learning_rate": 1.1581605866095024e-06, "loss": 0.0615, "step": 49570 }, { "epoch": 0.877879460036216, "grad_norm": 0.758107602596283, "learning_rate": 1.157829106624505e-06, "loss": 0.0431, "step": 49571 }, { "epoch": 0.8778971695732445, "grad_norm": 0.5669460892677307, "learning_rate": 1.1574976721788482e-06, "loss": 0.0693, "step": 49572 }, { "epoch": 0.8779148791102729, "grad_norm": 0.7264068722724915, "learning_rate": 1.1571662832736263e-06, "loss": 0.0434, "step": 49573 }, { "epoch": 0.8779325886473013, "grad_norm": 0.5612004399299622, "learning_rate": 1.1568349399099248e-06, "loss": 0.0523, "step": 49574 }, { "epoch": 0.8779502981843297, "grad_norm": 0.7159790992736816, "learning_rate": 1.156503642088838e-06, "loss": 0.0651, "step": 49575 }, { "epoch": 0.8779680077213582, "grad_norm": 0.43218910694122314, "learning_rate": 1.1561723898114552e-06, "loss": 0.0384, "step": 49576 }, { "epoch": 0.8779857172583866, "grad_norm": 0.7619028687477112, "learning_rate": 1.1558411830788634e-06, "loss": 0.0565, "step": 49577 }, { "epoch": 0.878003426795415, "grad_norm": 0.5169451832771301, "learning_rate": 1.1555100218921538e-06, "loss": 0.043, "step": 49578 }, { "epoch": 0.8780211363324434, "grad_norm": 0.3920591473579407, "learning_rate": 1.1551789062524187e-06, "loss": 0.0548, "step": 49579 }, { "epoch": 0.8780388458694719, "grad_norm": 0.5837720632553101, "learning_rate": 1.1548478361607423e-06, "loss": 0.0608, "step": 49580 }, { "epoch": 0.8780565554065003, "grad_norm": 0.6108834147453308, "learning_rate": 1.1545168116182153e-06, "loss": 0.0748, "step": 49581 }, { "epoch": 0.8780742649435287, "grad_norm": 0.6356261968612671, "learning_rate": 1.154185832625927e-06, "loss": 0.0464, "step": 49582 }, { "epoch": 0.8780919744805571, "grad_norm": 0.6579058170318604, "learning_rate": 1.153854899184968e-06, "loss": 0.0462, "step": 49583 }, { "epoch": 0.8781096840175856, "grad_norm": 0.5203746557235718, "learning_rate": 1.1535240112964258e-06, "loss": 0.0627, "step": 49584 }, { "epoch": 0.878127393554614, "grad_norm": 0.5188276767730713, "learning_rate": 1.1531931689613894e-06, "loss": 0.06, "step": 49585 }, { "epoch": 0.8781451030916424, "grad_norm": 0.627761960029602, "learning_rate": 1.1528623721809484e-06, "loss": 0.0516, "step": 49586 }, { "epoch": 0.8781628126286709, "grad_norm": 0.5535721182823181, "learning_rate": 1.152531620956188e-06, "loss": 0.0353, "step": 49587 }, { "epoch": 0.8781805221656993, "grad_norm": 0.36164340376853943, "learning_rate": 1.1522009152881979e-06, "loss": 0.0485, "step": 49588 }, { "epoch": 0.8781982317027277, "grad_norm": 0.7131401896476746, "learning_rate": 1.1518702551780668e-06, "loss": 0.0649, "step": 49589 }, { "epoch": 0.8782159412397561, "grad_norm": 0.6267622113227844, "learning_rate": 1.1515396406268841e-06, "loss": 0.0372, "step": 49590 }, { "epoch": 0.8782336507767846, "grad_norm": 0.5477867722511292, "learning_rate": 1.1512090716357288e-06, "loss": 0.0496, "step": 49591 }, { "epoch": 0.878251360313813, "grad_norm": 0.39948534965515137, "learning_rate": 1.1508785482056983e-06, "loss": 0.053, "step": 49592 }, { "epoch": 0.8782690698508414, "grad_norm": 0.9304225444793701, "learning_rate": 1.15054807033788e-06, "loss": 0.078, "step": 49593 }, { "epoch": 0.8782867793878698, "grad_norm": 0.3946150541305542, "learning_rate": 1.1502176380333551e-06, "loss": 0.0468, "step": 49594 }, { "epoch": 0.8783044889248983, "grad_norm": 0.6503432393074036, "learning_rate": 1.1498872512932123e-06, "loss": 0.0536, "step": 49595 }, { "epoch": 0.8783221984619267, "grad_norm": 0.73968505859375, "learning_rate": 1.149556910118541e-06, "loss": 0.0758, "step": 49596 }, { "epoch": 0.8783399079989551, "grad_norm": 0.7079268097877502, "learning_rate": 1.1492266145104268e-06, "loss": 0.0588, "step": 49597 }, { "epoch": 0.8783576175359835, "grad_norm": 0.35065746307373047, "learning_rate": 1.1488963644699558e-06, "loss": 0.0615, "step": 49598 }, { "epoch": 0.878375327073012, "grad_norm": 0.2688060402870178, "learning_rate": 1.1485661599982134e-06, "loss": 0.0494, "step": 49599 }, { "epoch": 0.8783930366100404, "grad_norm": 0.5691354274749756, "learning_rate": 1.148236001096289e-06, "loss": 0.022, "step": 49600 }, { "epoch": 0.8784107461470688, "grad_norm": 0.6842756867408752, "learning_rate": 1.1479058877652664e-06, "loss": 0.0509, "step": 49601 }, { "epoch": 0.8784284556840973, "grad_norm": 0.730407178401947, "learning_rate": 1.147575820006232e-06, "loss": 0.0383, "step": 49602 }, { "epoch": 0.8784461652211257, "grad_norm": 0.9399610161781311, "learning_rate": 1.1472457978202712e-06, "loss": 0.0567, "step": 49603 }, { "epoch": 0.8784638747581541, "grad_norm": 0.3593345582485199, "learning_rate": 1.1469158212084746e-06, "loss": 0.0362, "step": 49604 }, { "epoch": 0.8784815842951825, "grad_norm": 0.6706050634384155, "learning_rate": 1.14658589017192e-06, "loss": 0.048, "step": 49605 }, { "epoch": 0.878499293832211, "grad_norm": 0.5283816456794739, "learning_rate": 1.1462560047116983e-06, "loss": 0.0468, "step": 49606 }, { "epoch": 0.8785170033692394, "grad_norm": 0.4959065020084381, "learning_rate": 1.1459261648288916e-06, "loss": 0.0548, "step": 49607 }, { "epoch": 0.8785347129062678, "grad_norm": 0.6723666191101074, "learning_rate": 1.1455963705245909e-06, "loss": 0.0628, "step": 49608 }, { "epoch": 0.8785524224432962, "grad_norm": 0.49626728892326355, "learning_rate": 1.1452666217998719e-06, "loss": 0.0371, "step": 49609 }, { "epoch": 0.8785701319803247, "grad_norm": 0.4825301468372345, "learning_rate": 1.1449369186558256e-06, "loss": 0.0531, "step": 49610 }, { "epoch": 0.8785878415173531, "grad_norm": 0.32296839356422424, "learning_rate": 1.144607261093536e-06, "loss": 0.0624, "step": 49611 }, { "epoch": 0.8786055510543815, "grad_norm": 0.7981710433959961, "learning_rate": 1.1442776491140872e-06, "loss": 0.0702, "step": 49612 }, { "epoch": 0.8786232605914099, "grad_norm": 0.6796621084213257, "learning_rate": 1.1439480827185617e-06, "loss": 0.0652, "step": 49613 }, { "epoch": 0.8786409701284384, "grad_norm": 0.40797409415245056, "learning_rate": 1.1436185619080469e-06, "loss": 0.0383, "step": 49614 }, { "epoch": 0.8786586796654668, "grad_norm": 0.4014490246772766, "learning_rate": 1.1432890866836288e-06, "loss": 0.0317, "step": 49615 }, { "epoch": 0.8786763892024952, "grad_norm": 0.7641264200210571, "learning_rate": 1.1429596570463846e-06, "loss": 0.0515, "step": 49616 }, { "epoch": 0.8786940987395238, "grad_norm": 0.6919490694999695, "learning_rate": 1.142630272997402e-06, "loss": 0.0644, "step": 49617 }, { "epoch": 0.8787118082765522, "grad_norm": 0.6149043440818787, "learning_rate": 1.1423009345377667e-06, "loss": 0.0669, "step": 49618 }, { "epoch": 0.8787295178135806, "grad_norm": 0.6799280643463135, "learning_rate": 1.141971641668556e-06, "loss": 0.0488, "step": 49619 }, { "epoch": 0.878747227350609, "grad_norm": 0.8387840390205383, "learning_rate": 1.141642394390856e-06, "loss": 0.0808, "step": 49620 }, { "epoch": 0.8787649368876375, "grad_norm": 0.5292229652404785, "learning_rate": 1.1413131927057525e-06, "loss": 0.0585, "step": 49621 }, { "epoch": 0.8787826464246659, "grad_norm": 0.5518171787261963, "learning_rate": 1.1409840366143294e-06, "loss": 0.0491, "step": 49622 }, { "epoch": 0.8788003559616943, "grad_norm": 0.7020053863525391, "learning_rate": 1.140654926117664e-06, "loss": 0.0467, "step": 49623 }, { "epoch": 0.8788180654987227, "grad_norm": 0.5720816254615784, "learning_rate": 1.1403258612168443e-06, "loss": 0.081, "step": 49624 }, { "epoch": 0.8788357750357512, "grad_norm": 0.7544011473655701, "learning_rate": 1.139996841912951e-06, "loss": 0.0371, "step": 49625 }, { "epoch": 0.8788534845727796, "grad_norm": 0.7712688446044922, "learning_rate": 1.1396678682070644e-06, "loss": 0.0656, "step": 49626 }, { "epoch": 0.878871194109808, "grad_norm": 0.5812031030654907, "learning_rate": 1.139338940100269e-06, "loss": 0.069, "step": 49627 }, { "epoch": 0.8788889036468364, "grad_norm": 0.7042313814163208, "learning_rate": 1.1390100575936458e-06, "loss": 0.0504, "step": 49628 }, { "epoch": 0.8789066131838649, "grad_norm": 0.5540236234664917, "learning_rate": 1.1386812206882785e-06, "loss": 0.0525, "step": 49629 }, { "epoch": 0.8789243227208933, "grad_norm": 0.6170026659965515, "learning_rate": 1.1383524293852466e-06, "loss": 0.048, "step": 49630 }, { "epoch": 0.8789420322579217, "grad_norm": 0.47543665766716003, "learning_rate": 1.138023683685634e-06, "loss": 0.0664, "step": 49631 }, { "epoch": 0.8789597417949502, "grad_norm": 0.445081502199173, "learning_rate": 1.1376949835905247e-06, "loss": 0.0525, "step": 49632 }, { "epoch": 0.8789774513319786, "grad_norm": 0.5175133347511292, "learning_rate": 1.1373663291009933e-06, "loss": 0.0422, "step": 49633 }, { "epoch": 0.878995160869007, "grad_norm": 0.9237549901008606, "learning_rate": 1.1370377202181253e-06, "loss": 0.0548, "step": 49634 }, { "epoch": 0.8790128704060354, "grad_norm": 0.5435288548469543, "learning_rate": 1.136709156943e-06, "loss": 0.0706, "step": 49635 }, { "epoch": 0.8790305799430639, "grad_norm": 0.495368629693985, "learning_rate": 1.1363806392767028e-06, "loss": 0.0739, "step": 49636 }, { "epoch": 0.8790482894800923, "grad_norm": 0.6896296143531799, "learning_rate": 1.1360521672203084e-06, "loss": 0.0867, "step": 49637 }, { "epoch": 0.8790659990171207, "grad_norm": 0.5407808423042297, "learning_rate": 1.1357237407749004e-06, "loss": 0.0504, "step": 49638 }, { "epoch": 0.8790837085541491, "grad_norm": 0.7584758400917053, "learning_rate": 1.1353953599415584e-06, "loss": 0.0519, "step": 49639 }, { "epoch": 0.8791014180911776, "grad_norm": 0.3593956530094147, "learning_rate": 1.1350670247213629e-06, "loss": 0.0573, "step": 49640 }, { "epoch": 0.879119127628206, "grad_norm": 0.659106969833374, "learning_rate": 1.1347387351153965e-06, "loss": 0.0776, "step": 49641 }, { "epoch": 0.8791368371652344, "grad_norm": 0.8047075271606445, "learning_rate": 1.1344104911247349e-06, "loss": 0.0599, "step": 49642 }, { "epoch": 0.8791545467022628, "grad_norm": 0.39430996775627136, "learning_rate": 1.134082292750464e-06, "loss": 0.0289, "step": 49643 }, { "epoch": 0.8791722562392913, "grad_norm": 0.9024482369422913, "learning_rate": 1.133754139993658e-06, "loss": 0.041, "step": 49644 }, { "epoch": 0.8791899657763197, "grad_norm": 0.7265311479568481, "learning_rate": 1.1334260328553974e-06, "loss": 0.0551, "step": 49645 }, { "epoch": 0.8792076753133481, "grad_norm": 0.4732920825481415, "learning_rate": 1.1330979713367633e-06, "loss": 0.0468, "step": 49646 }, { "epoch": 0.8792253848503766, "grad_norm": 0.7396175861358643, "learning_rate": 1.1327699554388365e-06, "loss": 0.068, "step": 49647 }, { "epoch": 0.879243094387405, "grad_norm": 0.608534574508667, "learning_rate": 1.132441985162691e-06, "loss": 0.0671, "step": 49648 }, { "epoch": 0.8792608039244334, "grad_norm": 1.104097604751587, "learning_rate": 1.132114060509406e-06, "loss": 0.0661, "step": 49649 }, { "epoch": 0.8792785134614618, "grad_norm": 0.6436877250671387, "learning_rate": 1.131786181480069e-06, "loss": 0.058, "step": 49650 }, { "epoch": 0.8792962229984903, "grad_norm": 0.5371001362800598, "learning_rate": 1.1314583480757506e-06, "loss": 0.0697, "step": 49651 }, { "epoch": 0.8793139325355187, "grad_norm": 0.9313313364982605, "learning_rate": 1.1311305602975302e-06, "loss": 0.0476, "step": 49652 }, { "epoch": 0.8793316420725471, "grad_norm": 0.6780095100402832, "learning_rate": 1.1308028181464885e-06, "loss": 0.0769, "step": 49653 }, { "epoch": 0.8793493516095755, "grad_norm": 0.5397313833236694, "learning_rate": 1.1304751216237064e-06, "loss": 0.0657, "step": 49654 }, { "epoch": 0.879367061146604, "grad_norm": 0.6576514840126038, "learning_rate": 1.1301474707302544e-06, "loss": 0.0381, "step": 49655 }, { "epoch": 0.8793847706836324, "grad_norm": 1.0736509561538696, "learning_rate": 1.1298198654672138e-06, "loss": 0.0826, "step": 49656 }, { "epoch": 0.8794024802206608, "grad_norm": 0.38213104009628296, "learning_rate": 1.1294923058356633e-06, "loss": 0.0536, "step": 49657 }, { "epoch": 0.8794201897576892, "grad_norm": 0.7344914078712463, "learning_rate": 1.1291647918366805e-06, "loss": 0.0484, "step": 49658 }, { "epoch": 0.8794378992947177, "grad_norm": 0.844275176525116, "learning_rate": 1.1288373234713428e-06, "loss": 0.057, "step": 49659 }, { "epoch": 0.8794556088317461, "grad_norm": 0.580599308013916, "learning_rate": 1.1285099007407263e-06, "loss": 0.0473, "step": 49660 }, { "epoch": 0.8794733183687745, "grad_norm": 0.5757383108139038, "learning_rate": 1.128182523645913e-06, "loss": 0.0556, "step": 49661 }, { "epoch": 0.879491027905803, "grad_norm": 0.5133137702941895, "learning_rate": 1.1278551921879726e-06, "loss": 0.0308, "step": 49662 }, { "epoch": 0.8795087374428314, "grad_norm": 0.9317522644996643, "learning_rate": 1.1275279063679871e-06, "loss": 0.0684, "step": 49663 }, { "epoch": 0.8795264469798598, "grad_norm": 0.592650294303894, "learning_rate": 1.1272006661870326e-06, "loss": 0.0564, "step": 49664 }, { "epoch": 0.8795441565168882, "grad_norm": 0.8834739327430725, "learning_rate": 1.1268734716461832e-06, "loss": 0.0545, "step": 49665 }, { "epoch": 0.8795618660539167, "grad_norm": 0.829152524471283, "learning_rate": 1.1265463227465162e-06, "loss": 0.0486, "step": 49666 }, { "epoch": 0.8795795755909451, "grad_norm": 0.28246381878852844, "learning_rate": 1.126219219489109e-06, "loss": 0.0515, "step": 49667 }, { "epoch": 0.8795972851279735, "grad_norm": 0.501973032951355, "learning_rate": 1.125892161875038e-06, "loss": 0.0278, "step": 49668 }, { "epoch": 0.8796149946650019, "grad_norm": 0.8248957395553589, "learning_rate": 1.1255651499053765e-06, "loss": 0.0704, "step": 49669 }, { "epoch": 0.8796327042020304, "grad_norm": 0.4310952126979828, "learning_rate": 1.1252381835812043e-06, "loss": 0.0304, "step": 49670 }, { "epoch": 0.8796504137390588, "grad_norm": 0.32540163397789, "learning_rate": 1.1249112629035968e-06, "loss": 0.038, "step": 49671 }, { "epoch": 0.8796681232760872, "grad_norm": 0.5827983021736145, "learning_rate": 1.1245843878736251e-06, "loss": 0.0629, "step": 49672 }, { "epoch": 0.8796858328131156, "grad_norm": 0.5065528154373169, "learning_rate": 1.1242575584923681e-06, "loss": 0.0457, "step": 49673 }, { "epoch": 0.8797035423501441, "grad_norm": 0.415176123380661, "learning_rate": 1.1239307747609e-06, "loss": 0.0285, "step": 49674 }, { "epoch": 0.8797212518871725, "grad_norm": 0.4226839244365692, "learning_rate": 1.1236040366802985e-06, "loss": 0.0537, "step": 49675 }, { "epoch": 0.8797389614242009, "grad_norm": 0.6455129981040955, "learning_rate": 1.123277344251634e-06, "loss": 0.0337, "step": 49676 }, { "epoch": 0.8797566709612294, "grad_norm": 0.3246997892856598, "learning_rate": 1.1229506974759828e-06, "loss": 0.0317, "step": 49677 }, { "epoch": 0.8797743804982578, "grad_norm": 0.47031915187835693, "learning_rate": 1.1226240963544187e-06, "loss": 0.0393, "step": 49678 }, { "epoch": 0.8797920900352862, "grad_norm": 0.6591874361038208, "learning_rate": 1.1222975408880209e-06, "loss": 0.0585, "step": 49679 }, { "epoch": 0.8798097995723146, "grad_norm": 0.6309069395065308, "learning_rate": 1.1219710310778602e-06, "loss": 0.0661, "step": 49680 }, { "epoch": 0.8798275091093432, "grad_norm": 0.5027257800102234, "learning_rate": 1.1216445669250109e-06, "loss": 0.0542, "step": 49681 }, { "epoch": 0.8798452186463716, "grad_norm": 0.6225005388259888, "learning_rate": 1.1213181484305485e-06, "loss": 0.0604, "step": 49682 }, { "epoch": 0.8798629281834, "grad_norm": 0.2801315188407898, "learning_rate": 1.120991775595544e-06, "loss": 0.0339, "step": 49683 }, { "epoch": 0.8798806377204283, "grad_norm": 0.08457493036985397, "learning_rate": 1.1206654484210715e-06, "loss": 0.0658, "step": 49684 }, { "epoch": 0.8798983472574569, "grad_norm": 0.719603955745697, "learning_rate": 1.120339166908207e-06, "loss": 0.0439, "step": 49685 }, { "epoch": 0.8799160567944853, "grad_norm": 0.4721958637237549, "learning_rate": 1.1200129310580227e-06, "loss": 0.0392, "step": 49686 }, { "epoch": 0.8799337663315137, "grad_norm": 0.5609718561172485, "learning_rate": 1.1196867408715927e-06, "loss": 0.0625, "step": 49687 }, { "epoch": 0.879951475868542, "grad_norm": 0.4288487732410431, "learning_rate": 1.1193605963499881e-06, "loss": 0.0416, "step": 49688 }, { "epoch": 0.8799691854055706, "grad_norm": 0.7538290023803711, "learning_rate": 1.1190344974942862e-06, "loss": 0.0424, "step": 49689 }, { "epoch": 0.879986894942599, "grad_norm": 0.5866663455963135, "learning_rate": 1.1187084443055545e-06, "loss": 0.0612, "step": 49690 }, { "epoch": 0.8800046044796274, "grad_norm": 0.8213121891021729, "learning_rate": 1.1183824367848688e-06, "loss": 0.0687, "step": 49691 }, { "epoch": 0.8800223140166559, "grad_norm": 0.3694644272327423, "learning_rate": 1.1180564749333e-06, "loss": 0.0393, "step": 49692 }, { "epoch": 0.8800400235536843, "grad_norm": 0.8184344172477722, "learning_rate": 1.1177305587519239e-06, "loss": 0.084, "step": 49693 }, { "epoch": 0.8800577330907127, "grad_norm": 0.6455582976341248, "learning_rate": 1.1174046882418077e-06, "loss": 0.0615, "step": 49694 }, { "epoch": 0.8800754426277411, "grad_norm": 0.3320195972919464, "learning_rate": 1.1170788634040258e-06, "loss": 0.0504, "step": 49695 }, { "epoch": 0.8800931521647696, "grad_norm": 0.47662046551704407, "learning_rate": 1.116753084239649e-06, "loss": 0.0542, "step": 49696 }, { "epoch": 0.880110861701798, "grad_norm": 0.5237681269645691, "learning_rate": 1.1164273507497513e-06, "loss": 0.0225, "step": 49697 }, { "epoch": 0.8801285712388264, "grad_norm": 0.851643979549408, "learning_rate": 1.1161016629354038e-06, "loss": 0.0404, "step": 49698 }, { "epoch": 0.8801462807758548, "grad_norm": 1.1370916366577148, "learning_rate": 1.1157760207976787e-06, "loss": 0.081, "step": 49699 }, { "epoch": 0.8801639903128833, "grad_norm": 0.44466885924339294, "learning_rate": 1.1154504243376467e-06, "loss": 0.0477, "step": 49700 }, { "epoch": 0.8801816998499117, "grad_norm": 0.5439746379852295, "learning_rate": 1.1151248735563774e-06, "loss": 0.0499, "step": 49701 }, { "epoch": 0.8801994093869401, "grad_norm": 0.6292408108711243, "learning_rate": 1.1147993684549413e-06, "loss": 0.0394, "step": 49702 }, { "epoch": 0.8802171189239685, "grad_norm": 0.564704954624176, "learning_rate": 1.114473909034414e-06, "loss": 0.0481, "step": 49703 }, { "epoch": 0.880234828460997, "grad_norm": 0.5416990518569946, "learning_rate": 1.114148495295862e-06, "loss": 0.0912, "step": 49704 }, { "epoch": 0.8802525379980254, "grad_norm": 0.5063673853874207, "learning_rate": 1.1138231272403553e-06, "loss": 0.0551, "step": 49705 }, { "epoch": 0.8802702475350538, "grad_norm": 0.4257275462150574, "learning_rate": 1.113497804868967e-06, "loss": 0.0464, "step": 49706 }, { "epoch": 0.8802879570720823, "grad_norm": 0.4132384657859802, "learning_rate": 1.1131725281827659e-06, "loss": 0.0503, "step": 49707 }, { "epoch": 0.8803056666091107, "grad_norm": 0.5742424130439758, "learning_rate": 1.112847297182823e-06, "loss": 0.053, "step": 49708 }, { "epoch": 0.8803233761461391, "grad_norm": 0.7962799668312073, "learning_rate": 1.1125221118702073e-06, "loss": 0.0488, "step": 49709 }, { "epoch": 0.8803410856831675, "grad_norm": 0.4376370906829834, "learning_rate": 1.1121969722459895e-06, "loss": 0.0249, "step": 49710 }, { "epoch": 0.880358795220196, "grad_norm": 0.4695388376712799, "learning_rate": 1.1118718783112407e-06, "loss": 0.0587, "step": 49711 }, { "epoch": 0.8803765047572244, "grad_norm": 0.5186780691146851, "learning_rate": 1.1115468300670267e-06, "loss": 0.0371, "step": 49712 }, { "epoch": 0.8803942142942528, "grad_norm": 0.6130322813987732, "learning_rate": 1.1112218275144198e-06, "loss": 0.0526, "step": 49713 }, { "epoch": 0.8804119238312812, "grad_norm": 0.4391726553440094, "learning_rate": 1.1108968706544891e-06, "loss": 0.0372, "step": 49714 }, { "epoch": 0.8804296333683097, "grad_norm": 0.5855906009674072, "learning_rate": 1.110571959488299e-06, "loss": 0.0554, "step": 49715 }, { "epoch": 0.8804473429053381, "grad_norm": 0.305246502161026, "learning_rate": 1.1102470940169251e-06, "loss": 0.0579, "step": 49716 }, { "epoch": 0.8804650524423665, "grad_norm": 0.9017778635025024, "learning_rate": 1.1099222742414333e-06, "loss": 0.0598, "step": 49717 }, { "epoch": 0.8804827619793949, "grad_norm": 0.5850808024406433, "learning_rate": 1.1095975001628944e-06, "loss": 0.0589, "step": 49718 }, { "epoch": 0.8805004715164234, "grad_norm": 0.22524139285087585, "learning_rate": 1.1092727717823725e-06, "loss": 0.0554, "step": 49719 }, { "epoch": 0.8805181810534518, "grad_norm": 0.6867411136627197, "learning_rate": 1.1089480891009385e-06, "loss": 0.055, "step": 49720 }, { "epoch": 0.8805358905904802, "grad_norm": 0.5952657461166382, "learning_rate": 1.1086234521196631e-06, "loss": 0.0494, "step": 49721 }, { "epoch": 0.8805536001275087, "grad_norm": 0.5813989043235779, "learning_rate": 1.1082988608396088e-06, "loss": 0.041, "step": 49722 }, { "epoch": 0.8805713096645371, "grad_norm": 0.7367503046989441, "learning_rate": 1.107974315261845e-06, "loss": 0.0877, "step": 49723 }, { "epoch": 0.8805890192015655, "grad_norm": 0.38715222477912903, "learning_rate": 1.107649815387442e-06, "loss": 0.0615, "step": 49724 }, { "epoch": 0.8806067287385939, "grad_norm": 0.590526819229126, "learning_rate": 1.1073253612174644e-06, "loss": 0.0535, "step": 49725 }, { "epoch": 0.8806244382756224, "grad_norm": 0.7236036062240601, "learning_rate": 1.1070009527529829e-06, "loss": 0.0534, "step": 49726 }, { "epoch": 0.8806421478126508, "grad_norm": 0.3664149045944214, "learning_rate": 1.1066765899950632e-06, "loss": 0.0562, "step": 49727 }, { "epoch": 0.8806598573496792, "grad_norm": 0.6437376737594604, "learning_rate": 1.106352272944773e-06, "loss": 0.0727, "step": 49728 }, { "epoch": 0.8806775668867076, "grad_norm": 0.6772876977920532, "learning_rate": 1.1060280016031776e-06, "loss": 0.0391, "step": 49729 }, { "epoch": 0.8806952764237361, "grad_norm": 0.6528308987617493, "learning_rate": 1.1057037759713434e-06, "loss": 0.0336, "step": 49730 }, { "epoch": 0.8807129859607645, "grad_norm": 0.7650412321090698, "learning_rate": 1.1053795960503393e-06, "loss": 0.0492, "step": 49731 }, { "epoch": 0.8807306954977929, "grad_norm": 0.6860948801040649, "learning_rate": 1.1050554618412345e-06, "loss": 0.0417, "step": 49732 }, { "epoch": 0.8807484050348213, "grad_norm": 0.6265144944190979, "learning_rate": 1.1047313733450882e-06, "loss": 0.0655, "step": 49733 }, { "epoch": 0.8807661145718498, "grad_norm": 0.8590164184570312, "learning_rate": 1.1044073305629693e-06, "loss": 0.0498, "step": 49734 }, { "epoch": 0.8807838241088782, "grad_norm": 0.46298372745513916, "learning_rate": 1.104083333495947e-06, "loss": 0.0476, "step": 49735 }, { "epoch": 0.8808015336459066, "grad_norm": 0.5073451995849609, "learning_rate": 1.1037593821450842e-06, "loss": 0.0266, "step": 49736 }, { "epoch": 0.8808192431829351, "grad_norm": 0.7667667269706726, "learning_rate": 1.1034354765114464e-06, "loss": 0.063, "step": 49737 }, { "epoch": 0.8808369527199635, "grad_norm": 0.9723518490791321, "learning_rate": 1.1031116165961008e-06, "loss": 0.0891, "step": 49738 }, { "epoch": 0.8808546622569919, "grad_norm": 0.5574008822441101, "learning_rate": 1.1027878024001153e-06, "loss": 0.047, "step": 49739 }, { "epoch": 0.8808723717940203, "grad_norm": 0.5857408046722412, "learning_rate": 1.1024640339245505e-06, "loss": 0.0575, "step": 49740 }, { "epoch": 0.8808900813310488, "grad_norm": 0.5644718408584595, "learning_rate": 1.1021403111704726e-06, "loss": 0.0907, "step": 49741 }, { "epoch": 0.8809077908680772, "grad_norm": 0.6954575777053833, "learning_rate": 1.1018166341389468e-06, "loss": 0.0443, "step": 49742 }, { "epoch": 0.8809255004051056, "grad_norm": 0.19043207168579102, "learning_rate": 1.1014930028310426e-06, "loss": 0.0376, "step": 49743 }, { "epoch": 0.880943209942134, "grad_norm": 0.6825482845306396, "learning_rate": 1.1011694172478144e-06, "loss": 0.0558, "step": 49744 }, { "epoch": 0.8809609194791626, "grad_norm": 0.549427330493927, "learning_rate": 1.1008458773903356e-06, "loss": 0.0554, "step": 49745 }, { "epoch": 0.880978629016191, "grad_norm": 0.49390754103660583, "learning_rate": 1.1005223832596712e-06, "loss": 0.0473, "step": 49746 }, { "epoch": 0.8809963385532193, "grad_norm": 0.2184896469116211, "learning_rate": 1.1001989348568797e-06, "loss": 0.0449, "step": 49747 }, { "epoch": 0.8810140480902477, "grad_norm": 0.4443170726299286, "learning_rate": 1.0998755321830273e-06, "loss": 0.0587, "step": 49748 }, { "epoch": 0.8810317576272763, "grad_norm": 0.530752420425415, "learning_rate": 1.099552175239178e-06, "loss": 0.0413, "step": 49749 }, { "epoch": 0.8810494671643047, "grad_norm": 0.3213083744049072, "learning_rate": 1.0992288640264008e-06, "loss": 0.0441, "step": 49750 }, { "epoch": 0.881067176701333, "grad_norm": 0.8114463686943054, "learning_rate": 1.0989055985457503e-06, "loss": 0.072, "step": 49751 }, { "epoch": 0.8810848862383616, "grad_norm": 0.8544010519981384, "learning_rate": 1.098582378798295e-06, "loss": 0.0368, "step": 49752 }, { "epoch": 0.88110259577539, "grad_norm": 0.7332021594047546, "learning_rate": 1.0982592047850964e-06, "loss": 0.0745, "step": 49753 }, { "epoch": 0.8811203053124184, "grad_norm": 0.665134847164154, "learning_rate": 1.09793607650722e-06, "loss": 0.0511, "step": 49754 }, { "epoch": 0.8811380148494468, "grad_norm": 0.3586706817150116, "learning_rate": 1.0976129939657282e-06, "loss": 0.0602, "step": 49755 }, { "epoch": 0.8811557243864753, "grad_norm": 0.5293325781822205, "learning_rate": 1.0972899571616835e-06, "loss": 0.0701, "step": 49756 }, { "epoch": 0.8811734339235037, "grad_norm": 0.32323333621025085, "learning_rate": 1.0969669660961502e-06, "loss": 0.0337, "step": 49757 }, { "epoch": 0.8811911434605321, "grad_norm": 0.4126747250556946, "learning_rate": 1.0966440207701872e-06, "loss": 0.0609, "step": 49758 }, { "epoch": 0.8812088529975605, "grad_norm": 0.5568069219589233, "learning_rate": 1.0963211211848605e-06, "loss": 0.0351, "step": 49759 }, { "epoch": 0.881226562534589, "grad_norm": 0.6048182249069214, "learning_rate": 1.0959982673412312e-06, "loss": 0.0799, "step": 49760 }, { "epoch": 0.8812442720716174, "grad_norm": 0.74680095911026, "learning_rate": 1.0956754592403612e-06, "loss": 0.0513, "step": 49761 }, { "epoch": 0.8812619816086458, "grad_norm": 0.47114992141723633, "learning_rate": 1.09535269688331e-06, "loss": 0.079, "step": 49762 }, { "epoch": 0.8812796911456742, "grad_norm": 0.4558104872703552, "learning_rate": 1.095029980271145e-06, "loss": 0.0553, "step": 49763 }, { "epoch": 0.8812974006827027, "grad_norm": 0.5145910978317261, "learning_rate": 1.094707309404922e-06, "loss": 0.0453, "step": 49764 }, { "epoch": 0.8813151102197311, "grad_norm": 0.4818088412284851, "learning_rate": 1.094384684285707e-06, "loss": 0.0602, "step": 49765 }, { "epoch": 0.8813328197567595, "grad_norm": 0.5614315271377563, "learning_rate": 1.0940621049145605e-06, "loss": 0.0573, "step": 49766 }, { "epoch": 0.881350529293788, "grad_norm": 0.15009698271751404, "learning_rate": 1.0937395712925451e-06, "loss": 0.025, "step": 49767 }, { "epoch": 0.8813682388308164, "grad_norm": 1.1784201860427856, "learning_rate": 1.0934170834207168e-06, "loss": 0.0607, "step": 49768 }, { "epoch": 0.8813859483678448, "grad_norm": 0.700096845626831, "learning_rate": 1.0930946413001414e-06, "loss": 0.073, "step": 49769 }, { "epoch": 0.8814036579048732, "grad_norm": 0.6773457527160645, "learning_rate": 1.092772244931876e-06, "loss": 0.0825, "step": 49770 }, { "epoch": 0.8814213674419017, "grad_norm": 0.5981056094169617, "learning_rate": 1.0924498943169869e-06, "loss": 0.0471, "step": 49771 }, { "epoch": 0.8814390769789301, "grad_norm": 0.8718134164810181, "learning_rate": 1.092127589456528e-06, "loss": 0.0599, "step": 49772 }, { "epoch": 0.8814567865159585, "grad_norm": 0.7973174452781677, "learning_rate": 1.09180533035156e-06, "loss": 0.069, "step": 49773 }, { "epoch": 0.8814744960529869, "grad_norm": 0.7578096389770508, "learning_rate": 1.0914831170031526e-06, "loss": 0.0686, "step": 49774 }, { "epoch": 0.8814922055900154, "grad_norm": 0.48245683312416077, "learning_rate": 1.0911609494123542e-06, "loss": 0.0236, "step": 49775 }, { "epoch": 0.8815099151270438, "grad_norm": 0.7378877401351929, "learning_rate": 1.090838827580231e-06, "loss": 0.0598, "step": 49776 }, { "epoch": 0.8815276246640722, "grad_norm": 0.6046555638313293, "learning_rate": 1.0905167515078408e-06, "loss": 0.049, "step": 49777 }, { "epoch": 0.8815453342011006, "grad_norm": 0.7947456240653992, "learning_rate": 1.0901947211962454e-06, "loss": 0.0834, "step": 49778 }, { "epoch": 0.8815630437381291, "grad_norm": 0.5702864527702332, "learning_rate": 1.0898727366465012e-06, "loss": 0.0366, "step": 49779 }, { "epoch": 0.8815807532751575, "grad_norm": 0.372424840927124, "learning_rate": 1.089550797859667e-06, "loss": 0.045, "step": 49780 }, { "epoch": 0.8815984628121859, "grad_norm": 0.6686234474182129, "learning_rate": 1.0892289048368054e-06, "loss": 0.0417, "step": 49781 }, { "epoch": 0.8816161723492144, "grad_norm": 0.618063747882843, "learning_rate": 1.0889070575789722e-06, "loss": 0.045, "step": 49782 }, { "epoch": 0.8816338818862428, "grad_norm": 0.5555465221405029, "learning_rate": 1.0885852560872301e-06, "loss": 0.0404, "step": 49783 }, { "epoch": 0.8816515914232712, "grad_norm": 0.6530255675315857, "learning_rate": 1.088263500362633e-06, "loss": 0.0584, "step": 49784 }, { "epoch": 0.8816693009602996, "grad_norm": 0.5545361042022705, "learning_rate": 1.087941790406245e-06, "loss": 0.0404, "step": 49785 }, { "epoch": 0.8816870104973281, "grad_norm": 0.743793249130249, "learning_rate": 1.0876201262191204e-06, "loss": 0.0715, "step": 49786 }, { "epoch": 0.8817047200343565, "grad_norm": 0.5820185542106628, "learning_rate": 1.0872985078023167e-06, "loss": 0.0374, "step": 49787 }, { "epoch": 0.8817224295713849, "grad_norm": 0.7484509944915771, "learning_rate": 1.0869769351568949e-06, "loss": 0.0706, "step": 49788 }, { "epoch": 0.8817401391084133, "grad_norm": 0.43910595774650574, "learning_rate": 1.0866554082839137e-06, "loss": 0.0363, "step": 49789 }, { "epoch": 0.8817578486454418, "grad_norm": 0.2527908384799957, "learning_rate": 1.0863339271844258e-06, "loss": 0.0551, "step": 49790 }, { "epoch": 0.8817755581824702, "grad_norm": 1.0841091871261597, "learning_rate": 1.0860124918594921e-06, "loss": 0.0427, "step": 49791 }, { "epoch": 0.8817932677194986, "grad_norm": 0.4337930977344513, "learning_rate": 1.0856911023101718e-06, "loss": 0.0389, "step": 49792 }, { "epoch": 0.881810977256527, "grad_norm": 0.48563408851623535, "learning_rate": 1.0853697585375189e-06, "loss": 0.0662, "step": 49793 }, { "epoch": 0.8818286867935555, "grad_norm": 0.5431333184242249, "learning_rate": 1.0850484605425926e-06, "loss": 0.0567, "step": 49794 }, { "epoch": 0.8818463963305839, "grad_norm": 0.5621252655982971, "learning_rate": 1.0847272083264486e-06, "loss": 0.0643, "step": 49795 }, { "epoch": 0.8818641058676123, "grad_norm": 0.4065660834312439, "learning_rate": 1.084406001890148e-06, "loss": 0.0318, "step": 49796 }, { "epoch": 0.8818818154046408, "grad_norm": 0.5998914241790771, "learning_rate": 1.0840848412347415e-06, "loss": 0.0421, "step": 49797 }, { "epoch": 0.8818995249416692, "grad_norm": 0.3318560719490051, "learning_rate": 1.0837637263612882e-06, "loss": 0.0354, "step": 49798 }, { "epoch": 0.8819172344786976, "grad_norm": 0.6022109389305115, "learning_rate": 1.0834426572708472e-06, "loss": 0.0397, "step": 49799 }, { "epoch": 0.881934944015726, "grad_norm": 0.7264162302017212, "learning_rate": 1.0831216339644695e-06, "loss": 0.058, "step": 49800 }, { "epoch": 0.8819526535527545, "grad_norm": 0.478755384683609, "learning_rate": 1.082800656443214e-06, "loss": 0.0449, "step": 49801 }, { "epoch": 0.8819703630897829, "grad_norm": 0.5596226453781128, "learning_rate": 1.0824797247081354e-06, "loss": 0.0466, "step": 49802 }, { "epoch": 0.8819880726268113, "grad_norm": 0.39816227555274963, "learning_rate": 1.0821588387602955e-06, "loss": 0.0287, "step": 49803 }, { "epoch": 0.8820057821638397, "grad_norm": 0.428445965051651, "learning_rate": 1.081837998600742e-06, "loss": 0.0257, "step": 49804 }, { "epoch": 0.8820234917008682, "grad_norm": 0.4262326955795288, "learning_rate": 1.0815172042305343e-06, "loss": 0.0447, "step": 49805 }, { "epoch": 0.8820412012378966, "grad_norm": 0.40074819326400757, "learning_rate": 1.0811964556507298e-06, "loss": 0.0394, "step": 49806 }, { "epoch": 0.882058910774925, "grad_norm": 0.5141201019287109, "learning_rate": 1.0808757528623774e-06, "loss": 0.055, "step": 49807 }, { "epoch": 0.8820766203119534, "grad_norm": 0.40861859917640686, "learning_rate": 1.0805550958665367e-06, "loss": 0.0418, "step": 49808 }, { "epoch": 0.882094329848982, "grad_norm": 0.6909496188163757, "learning_rate": 1.0802344846642631e-06, "loss": 0.0615, "step": 49809 }, { "epoch": 0.8821120393860103, "grad_norm": 0.6878398060798645, "learning_rate": 1.0799139192566093e-06, "loss": 0.0662, "step": 49810 }, { "epoch": 0.8821297489230387, "grad_norm": 0.6971307992935181, "learning_rate": 1.0795933996446294e-06, "loss": 0.0806, "step": 49811 }, { "epoch": 0.8821474584600673, "grad_norm": 0.42723509669303894, "learning_rate": 1.079272925829381e-06, "loss": 0.0314, "step": 49812 }, { "epoch": 0.8821651679970957, "grad_norm": 0.3465380072593689, "learning_rate": 1.0789524978119164e-06, "loss": 0.0352, "step": 49813 }, { "epoch": 0.882182877534124, "grad_norm": 0.4764501750469208, "learning_rate": 1.0786321155932916e-06, "loss": 0.0704, "step": 49814 }, { "epoch": 0.8822005870711525, "grad_norm": 0.7753835320472717, "learning_rate": 1.0783117791745572e-06, "loss": 0.0346, "step": 49815 }, { "epoch": 0.882218296608181, "grad_norm": 0.3285350799560547, "learning_rate": 1.0779914885567693e-06, "loss": 0.0264, "step": 49816 }, { "epoch": 0.8822360061452094, "grad_norm": 0.5726030468940735, "learning_rate": 1.0776712437409836e-06, "loss": 0.0497, "step": 49817 }, { "epoch": 0.8822537156822378, "grad_norm": 0.8800382614135742, "learning_rate": 1.0773510447282492e-06, "loss": 0.0939, "step": 49818 }, { "epoch": 0.8822714252192662, "grad_norm": 0.9994755387306213, "learning_rate": 1.0770308915196204e-06, "loss": 0.0626, "step": 49819 }, { "epoch": 0.8822891347562947, "grad_norm": 0.6974009275436401, "learning_rate": 1.0767107841161527e-06, "loss": 0.0576, "step": 49820 }, { "epoch": 0.8823068442933231, "grad_norm": 0.3241463303565979, "learning_rate": 1.076390722518899e-06, "loss": 0.0226, "step": 49821 }, { "epoch": 0.8823245538303515, "grad_norm": 0.8325439691543579, "learning_rate": 1.07607070672891e-06, "loss": 0.0531, "step": 49822 }, { "epoch": 0.8823422633673799, "grad_norm": 0.15674063563346863, "learning_rate": 1.0757507367472414e-06, "loss": 0.037, "step": 49823 }, { "epoch": 0.8823599729044084, "grad_norm": 0.6478204131126404, "learning_rate": 1.0754308125749457e-06, "loss": 0.0658, "step": 49824 }, { "epoch": 0.8823776824414368, "grad_norm": 0.582065224647522, "learning_rate": 1.0751109342130721e-06, "loss": 0.0403, "step": 49825 }, { "epoch": 0.8823953919784652, "grad_norm": 0.38739314675331116, "learning_rate": 1.0747911016626765e-06, "loss": 0.0443, "step": 49826 }, { "epoch": 0.8824131015154937, "grad_norm": 0.4984537363052368, "learning_rate": 1.0744713149248081e-06, "loss": 0.0512, "step": 49827 }, { "epoch": 0.8824308110525221, "grad_norm": 0.9974412322044373, "learning_rate": 1.0741515740005242e-06, "loss": 0.0724, "step": 49828 }, { "epoch": 0.8824485205895505, "grad_norm": 0.4989967942237854, "learning_rate": 1.073831878890869e-06, "loss": 0.0525, "step": 49829 }, { "epoch": 0.8824662301265789, "grad_norm": 0.8338919878005981, "learning_rate": 1.0735122295969003e-06, "loss": 0.0699, "step": 49830 }, { "epoch": 0.8824839396636074, "grad_norm": 1.0416656732559204, "learning_rate": 1.0731926261196668e-06, "loss": 0.0718, "step": 49831 }, { "epoch": 0.8825016492006358, "grad_norm": 0.19805243611335754, "learning_rate": 1.0728730684602212e-06, "loss": 0.0533, "step": 49832 }, { "epoch": 0.8825193587376642, "grad_norm": 0.3490068018436432, "learning_rate": 1.0725535566196143e-06, "loss": 0.0317, "step": 49833 }, { "epoch": 0.8825370682746926, "grad_norm": 0.40384814143180847, "learning_rate": 1.072234090598897e-06, "loss": 0.0322, "step": 49834 }, { "epoch": 0.8825547778117211, "grad_norm": 0.8196366429328918, "learning_rate": 1.0719146703991233e-06, "loss": 0.0638, "step": 49835 }, { "epoch": 0.8825724873487495, "grad_norm": 0.789417028427124, "learning_rate": 1.071595296021341e-06, "loss": 0.0633, "step": 49836 }, { "epoch": 0.8825901968857779, "grad_norm": 0.482033908367157, "learning_rate": 1.0712759674665989e-06, "loss": 0.0749, "step": 49837 }, { "epoch": 0.8826079064228063, "grad_norm": 0.6822658181190491, "learning_rate": 1.0709566847359547e-06, "loss": 0.0704, "step": 49838 }, { "epoch": 0.8826256159598348, "grad_norm": 0.4148520231246948, "learning_rate": 1.0706374478304476e-06, "loss": 0.0682, "step": 49839 }, { "epoch": 0.8826433254968632, "grad_norm": 0.7245595455169678, "learning_rate": 1.0703182567511383e-06, "loss": 0.0753, "step": 49840 }, { "epoch": 0.8826610350338916, "grad_norm": 0.5625267028808594, "learning_rate": 1.0699991114990726e-06, "loss": 0.0685, "step": 49841 }, { "epoch": 0.8826787445709201, "grad_norm": 0.6624566316604614, "learning_rate": 1.0696800120753032e-06, "loss": 0.0611, "step": 49842 }, { "epoch": 0.8826964541079485, "grad_norm": 0.704514741897583, "learning_rate": 1.0693609584808739e-06, "loss": 0.0348, "step": 49843 }, { "epoch": 0.8827141636449769, "grad_norm": 0.28930673003196716, "learning_rate": 1.069041950716839e-06, "loss": 0.0392, "step": 49844 }, { "epoch": 0.8827318731820053, "grad_norm": 0.8004679083824158, "learning_rate": 1.0687229887842498e-06, "loss": 0.0483, "step": 49845 }, { "epoch": 0.8827495827190338, "grad_norm": 0.6570408940315247, "learning_rate": 1.06840407268415e-06, "loss": 0.0395, "step": 49846 }, { "epoch": 0.8827672922560622, "grad_norm": 0.48338747024536133, "learning_rate": 1.0680852024175918e-06, "loss": 0.0576, "step": 49847 }, { "epoch": 0.8827850017930906, "grad_norm": 0.8031371235847473, "learning_rate": 1.067766377985625e-06, "loss": 0.0697, "step": 49848 }, { "epoch": 0.882802711330119, "grad_norm": 0.6653739809989929, "learning_rate": 1.0674475993892969e-06, "loss": 0.0627, "step": 49849 }, { "epoch": 0.8828204208671475, "grad_norm": 0.6317805647850037, "learning_rate": 1.067128866629658e-06, "loss": 0.0497, "step": 49850 }, { "epoch": 0.8828381304041759, "grad_norm": 0.6131787300109863, "learning_rate": 1.0668101797077545e-06, "loss": 0.0421, "step": 49851 }, { "epoch": 0.8828558399412043, "grad_norm": 0.46735846996307373, "learning_rate": 1.0664915386246371e-06, "loss": 0.0427, "step": 49852 }, { "epoch": 0.8828735494782327, "grad_norm": 0.7446147203445435, "learning_rate": 1.066172943381355e-06, "loss": 0.07, "step": 49853 }, { "epoch": 0.8828912590152612, "grad_norm": 0.4445883631706238, "learning_rate": 1.0658543939789523e-06, "loss": 0.0683, "step": 49854 }, { "epoch": 0.8829089685522896, "grad_norm": 0.3222390115261078, "learning_rate": 1.0655358904184798e-06, "loss": 0.0483, "step": 49855 }, { "epoch": 0.882926678089318, "grad_norm": 0.7786174416542053, "learning_rate": 1.0652174327009868e-06, "loss": 0.0525, "step": 49856 }, { "epoch": 0.8829443876263465, "grad_norm": 0.5179431438446045, "learning_rate": 1.0648990208275173e-06, "loss": 0.066, "step": 49857 }, { "epoch": 0.8829620971633749, "grad_norm": 0.7271197438240051, "learning_rate": 1.064580654799119e-06, "loss": 0.0682, "step": 49858 }, { "epoch": 0.8829798067004033, "grad_norm": 0.5496967434883118, "learning_rate": 1.0642623346168423e-06, "loss": 0.049, "step": 49859 }, { "epoch": 0.8829975162374317, "grad_norm": 0.7543161511421204, "learning_rate": 1.0639440602817319e-06, "loss": 0.0669, "step": 49860 }, { "epoch": 0.8830152257744602, "grad_norm": 0.47742098569869995, "learning_rate": 1.0636258317948367e-06, "loss": 0.059, "step": 49861 }, { "epoch": 0.8830329353114886, "grad_norm": 0.6095508933067322, "learning_rate": 1.0633076491572042e-06, "loss": 0.0352, "step": 49862 }, { "epoch": 0.883050644848517, "grad_norm": 0.7828544974327087, "learning_rate": 1.0629895123698801e-06, "loss": 0.0574, "step": 49863 }, { "epoch": 0.8830683543855454, "grad_norm": 0.709835410118103, "learning_rate": 1.0626714214339106e-06, "loss": 0.0676, "step": 49864 }, { "epoch": 0.8830860639225739, "grad_norm": 0.4906417727470398, "learning_rate": 1.0623533763503413e-06, "loss": 0.0596, "step": 49865 }, { "epoch": 0.8831037734596023, "grad_norm": 0.7092807292938232, "learning_rate": 1.0620353771202197e-06, "loss": 0.0766, "step": 49866 }, { "epoch": 0.8831214829966307, "grad_norm": 0.7942652702331543, "learning_rate": 1.0617174237445931e-06, "loss": 0.0351, "step": 49867 }, { "epoch": 0.8831391925336591, "grad_norm": 0.39338454604148865, "learning_rate": 1.0613995162245028e-06, "loss": 0.0542, "step": 49868 }, { "epoch": 0.8831569020706876, "grad_norm": 0.5653578042984009, "learning_rate": 1.0610816545610008e-06, "loss": 0.0397, "step": 49869 }, { "epoch": 0.883174611607716, "grad_norm": 0.4993438422679901, "learning_rate": 1.0607638387551332e-06, "loss": 0.0412, "step": 49870 }, { "epoch": 0.8831923211447444, "grad_norm": 0.8831790685653687, "learning_rate": 1.0604460688079393e-06, "loss": 0.0532, "step": 49871 }, { "epoch": 0.883210030681773, "grad_norm": 0.6391052603721619, "learning_rate": 1.060128344720468e-06, "loss": 0.045, "step": 49872 }, { "epoch": 0.8832277402188013, "grad_norm": 0.33858367800712585, "learning_rate": 1.059810666493765e-06, "loss": 0.0437, "step": 49873 }, { "epoch": 0.8832454497558297, "grad_norm": 0.8828057646751404, "learning_rate": 1.0594930341288783e-06, "loss": 0.0616, "step": 49874 }, { "epoch": 0.8832631592928581, "grad_norm": 0.9782617092132568, "learning_rate": 1.059175447626845e-06, "loss": 0.068, "step": 49875 }, { "epoch": 0.8832808688298867, "grad_norm": 0.6572864055633545, "learning_rate": 1.058857906988716e-06, "loss": 0.0577, "step": 49876 }, { "epoch": 0.883298578366915, "grad_norm": 0.8262085914611816, "learning_rate": 1.058540412215534e-06, "loss": 0.0515, "step": 49877 }, { "epoch": 0.8833162879039435, "grad_norm": 0.719980776309967, "learning_rate": 1.058222963308343e-06, "loss": 0.0369, "step": 49878 }, { "epoch": 0.8833339974409719, "grad_norm": 0.7561241984367371, "learning_rate": 1.0579055602681902e-06, "loss": 0.0464, "step": 49879 }, { "epoch": 0.8833517069780004, "grad_norm": 0.7332740426063538, "learning_rate": 1.057588203096117e-06, "loss": 0.0685, "step": 49880 }, { "epoch": 0.8833694165150288, "grad_norm": 0.5482507944107056, "learning_rate": 1.0572708917931705e-06, "loss": 0.0629, "step": 49881 }, { "epoch": 0.8833871260520572, "grad_norm": 0.6093092560768127, "learning_rate": 1.0569536263603902e-06, "loss": 0.0557, "step": 49882 }, { "epoch": 0.8834048355890856, "grad_norm": 0.40381190180778503, "learning_rate": 1.0566364067988233e-06, "loss": 0.0523, "step": 49883 }, { "epoch": 0.8834225451261141, "grad_norm": 0.5547099113464355, "learning_rate": 1.0563192331095107e-06, "loss": 0.0518, "step": 49884 }, { "epoch": 0.8834402546631425, "grad_norm": 0.7475398778915405, "learning_rate": 1.0560021052935e-06, "loss": 0.0423, "step": 49885 }, { "epoch": 0.8834579642001709, "grad_norm": 0.7871297001838684, "learning_rate": 1.05568502335183e-06, "loss": 0.046, "step": 49886 }, { "epoch": 0.8834756737371994, "grad_norm": 0.8491860032081604, "learning_rate": 1.055367987285547e-06, "loss": 0.0634, "step": 49887 }, { "epoch": 0.8834933832742278, "grad_norm": 0.4651232063770294, "learning_rate": 1.0550509970956917e-06, "loss": 0.0312, "step": 49888 }, { "epoch": 0.8835110928112562, "grad_norm": 0.5920634269714355, "learning_rate": 1.0547340527833082e-06, "loss": 0.063, "step": 49889 }, { "epoch": 0.8835288023482846, "grad_norm": 0.6803314089775085, "learning_rate": 1.0544171543494407e-06, "loss": 0.0261, "step": 49890 }, { "epoch": 0.8835465118853131, "grad_norm": 0.3551623821258545, "learning_rate": 1.0541003017951284e-06, "loss": 0.0485, "step": 49891 }, { "epoch": 0.8835642214223415, "grad_norm": 0.4246135950088501, "learning_rate": 1.0537834951214187e-06, "loss": 0.0464, "step": 49892 }, { "epoch": 0.8835819309593699, "grad_norm": 0.4586973190307617, "learning_rate": 1.053466734329349e-06, "loss": 0.068, "step": 49893 }, { "epoch": 0.8835996404963983, "grad_norm": 0.6280331611633301, "learning_rate": 1.0531500194199623e-06, "loss": 0.0314, "step": 49894 }, { "epoch": 0.8836173500334268, "grad_norm": 0.5406449437141418, "learning_rate": 1.0528333503943038e-06, "loss": 0.048, "step": 49895 }, { "epoch": 0.8836350595704552, "grad_norm": 0.3394051194190979, "learning_rate": 1.0525167272534097e-06, "loss": 0.05, "step": 49896 }, { "epoch": 0.8836527691074836, "grad_norm": 0.3420148491859436, "learning_rate": 1.0522001499983241e-06, "loss": 0.0438, "step": 49897 }, { "epoch": 0.883670478644512, "grad_norm": 0.3426406681537628, "learning_rate": 1.0518836186300912e-06, "loss": 0.0393, "step": 49898 }, { "epoch": 0.8836881881815405, "grad_norm": 0.5701867341995239, "learning_rate": 1.0515671331497517e-06, "loss": 0.0692, "step": 49899 }, { "epoch": 0.8837058977185689, "grad_norm": 0.5740236043930054, "learning_rate": 1.0512506935583448e-06, "loss": 0.0692, "step": 49900 }, { "epoch": 0.8837236072555973, "grad_norm": 0.6971966624259949, "learning_rate": 1.0509342998569117e-06, "loss": 0.0856, "step": 49901 }, { "epoch": 0.8837413167926258, "grad_norm": 1.0153247117996216, "learning_rate": 1.0506179520464958e-06, "loss": 0.0657, "step": 49902 }, { "epoch": 0.8837590263296542, "grad_norm": 0.7317229509353638, "learning_rate": 1.0503016501281338e-06, "loss": 0.0546, "step": 49903 }, { "epoch": 0.8837767358666826, "grad_norm": 0.502358615398407, "learning_rate": 1.0499853941028676e-06, "loss": 0.0466, "step": 49904 }, { "epoch": 0.883794445403711, "grad_norm": 0.4116501212120056, "learning_rate": 1.04966918397174e-06, "loss": 0.0358, "step": 49905 }, { "epoch": 0.8838121549407395, "grad_norm": 0.3766443133354187, "learning_rate": 1.0493530197357898e-06, "loss": 0.0452, "step": 49906 }, { "epoch": 0.8838298644777679, "grad_norm": 0.7675024271011353, "learning_rate": 1.0490369013960565e-06, "loss": 0.0671, "step": 49907 }, { "epoch": 0.8838475740147963, "grad_norm": 0.7148141264915466, "learning_rate": 1.0487208289535793e-06, "loss": 0.0502, "step": 49908 }, { "epoch": 0.8838652835518247, "grad_norm": 0.8736180067062378, "learning_rate": 1.0484048024094039e-06, "loss": 0.0395, "step": 49909 }, { "epoch": 0.8838829930888532, "grad_norm": 0.390259712934494, "learning_rate": 1.0480888217645628e-06, "loss": 0.0332, "step": 49910 }, { "epoch": 0.8839007026258816, "grad_norm": 0.5509942770004272, "learning_rate": 1.0477728870200986e-06, "loss": 0.0677, "step": 49911 }, { "epoch": 0.88391841216291, "grad_norm": 0.6935132145881653, "learning_rate": 1.0474569981770487e-06, "loss": 0.0681, "step": 49912 }, { "epoch": 0.8839361216999384, "grad_norm": 0.7826209664344788, "learning_rate": 1.047141155236459e-06, "loss": 0.0595, "step": 49913 }, { "epoch": 0.8839538312369669, "grad_norm": 0.4042070209980011, "learning_rate": 1.0468253581993587e-06, "loss": 0.0433, "step": 49914 }, { "epoch": 0.8839715407739953, "grad_norm": 0.4633646309375763, "learning_rate": 1.0465096070667935e-06, "loss": 0.0493, "step": 49915 }, { "epoch": 0.8839892503110237, "grad_norm": 0.45986029505729675, "learning_rate": 1.0461939018397993e-06, "loss": 0.0466, "step": 49916 }, { "epoch": 0.8840069598480522, "grad_norm": 0.509552001953125, "learning_rate": 1.045878242519417e-06, "loss": 0.0333, "step": 49917 }, { "epoch": 0.8840246693850806, "grad_norm": 0.5223496556282043, "learning_rate": 1.0455626291066839e-06, "loss": 0.0574, "step": 49918 }, { "epoch": 0.884042378922109, "grad_norm": 0.7015846967697144, "learning_rate": 1.0452470616026378e-06, "loss": 0.0553, "step": 49919 }, { "epoch": 0.8840600884591374, "grad_norm": 0.2685491144657135, "learning_rate": 1.0449315400083193e-06, "loss": 0.0477, "step": 49920 }, { "epoch": 0.8840777979961659, "grad_norm": 0.4846770763397217, "learning_rate": 1.0446160643247626e-06, "loss": 0.0581, "step": 49921 }, { "epoch": 0.8840955075331943, "grad_norm": 0.2850434482097626, "learning_rate": 1.0443006345530071e-06, "loss": 0.054, "step": 49922 }, { "epoch": 0.8841132170702227, "grad_norm": 0.5415358543395996, "learning_rate": 1.0439852506940918e-06, "loss": 0.0362, "step": 49923 }, { "epoch": 0.8841309266072511, "grad_norm": 0.9279198050498962, "learning_rate": 1.0436699127490556e-06, "loss": 0.0789, "step": 49924 }, { "epoch": 0.8841486361442796, "grad_norm": 0.5880638957023621, "learning_rate": 1.0433546207189315e-06, "loss": 0.0567, "step": 49925 }, { "epoch": 0.884166345681308, "grad_norm": 0.625451922416687, "learning_rate": 1.0430393746047566e-06, "loss": 0.0278, "step": 49926 }, { "epoch": 0.8841840552183364, "grad_norm": 0.47894832491874695, "learning_rate": 1.0427241744075738e-06, "loss": 0.0527, "step": 49927 }, { "epoch": 0.8842017647553648, "grad_norm": 0.5086290836334229, "learning_rate": 1.042409020128417e-06, "loss": 0.0404, "step": 49928 }, { "epoch": 0.8842194742923933, "grad_norm": 0.87922203540802, "learning_rate": 1.0420939117683203e-06, "loss": 0.0439, "step": 49929 }, { "epoch": 0.8842371838294217, "grad_norm": 0.7412289381027222, "learning_rate": 1.0417788493283247e-06, "loss": 0.0532, "step": 49930 }, { "epoch": 0.8842548933664501, "grad_norm": 0.4645335078239441, "learning_rate": 1.0414638328094662e-06, "loss": 0.052, "step": 49931 }, { "epoch": 0.8842726029034786, "grad_norm": 0.5743749737739563, "learning_rate": 1.0411488622127786e-06, "loss": 0.0836, "step": 49932 }, { "epoch": 0.884290312440507, "grad_norm": 0.49513962864875793, "learning_rate": 1.0408339375392979e-06, "loss": 0.0649, "step": 49933 }, { "epoch": 0.8843080219775354, "grad_norm": 0.4101271629333496, "learning_rate": 1.0405190587900631e-06, "loss": 0.0365, "step": 49934 }, { "epoch": 0.8843257315145638, "grad_norm": 0.504642903804779, "learning_rate": 1.0402042259661088e-06, "loss": 0.0405, "step": 49935 }, { "epoch": 0.8843434410515923, "grad_norm": 0.45111173391342163, "learning_rate": 1.0398894390684687e-06, "loss": 0.0486, "step": 49936 }, { "epoch": 0.8843611505886207, "grad_norm": 0.5048379302024841, "learning_rate": 1.0395746980981824e-06, "loss": 0.0392, "step": 49937 }, { "epoch": 0.8843788601256491, "grad_norm": 0.37004366517066956, "learning_rate": 1.0392600030562839e-06, "loss": 0.0537, "step": 49938 }, { "epoch": 0.8843965696626775, "grad_norm": 0.3886987864971161, "learning_rate": 1.038945353943807e-06, "loss": 0.0501, "step": 49939 }, { "epoch": 0.884414279199706, "grad_norm": 0.33235543966293335, "learning_rate": 1.0386307507617882e-06, "loss": 0.0632, "step": 49940 }, { "epoch": 0.8844319887367345, "grad_norm": 0.4921340048313141, "learning_rate": 1.0383161935112628e-06, "loss": 0.061, "step": 49941 }, { "epoch": 0.8844496982737629, "grad_norm": 0.5918012261390686, "learning_rate": 1.0380016821932637e-06, "loss": 0.0426, "step": 49942 }, { "epoch": 0.8844674078107914, "grad_norm": 0.7903890609741211, "learning_rate": 1.0376872168088263e-06, "loss": 0.0702, "step": 49943 }, { "epoch": 0.8844851173478198, "grad_norm": 0.6045383214950562, "learning_rate": 1.0373727973589869e-06, "loss": 0.0594, "step": 49944 }, { "epoch": 0.8845028268848482, "grad_norm": 0.5144276022911072, "learning_rate": 1.0370584238447778e-06, "loss": 0.0431, "step": 49945 }, { "epoch": 0.8845205364218766, "grad_norm": 0.3365125358104706, "learning_rate": 1.0367440962672347e-06, "loss": 0.0544, "step": 49946 }, { "epoch": 0.8845382459589051, "grad_norm": 0.659633219242096, "learning_rate": 1.0364298146273904e-06, "loss": 0.0601, "step": 49947 }, { "epoch": 0.8845559554959335, "grad_norm": 0.39644357562065125, "learning_rate": 1.036115578926282e-06, "loss": 0.0548, "step": 49948 }, { "epoch": 0.8845736650329619, "grad_norm": 0.8338201642036438, "learning_rate": 1.0358013891649393e-06, "loss": 0.0419, "step": 49949 }, { "epoch": 0.8845913745699903, "grad_norm": 0.30475613474845886, "learning_rate": 1.0354872453443975e-06, "loss": 0.0432, "step": 49950 }, { "epoch": 0.8846090841070188, "grad_norm": 0.5184409022331238, "learning_rate": 1.035173147465691e-06, "loss": 0.0472, "step": 49951 }, { "epoch": 0.8846267936440472, "grad_norm": 0.5136963725090027, "learning_rate": 1.0348590955298543e-06, "loss": 0.0787, "step": 49952 }, { "epoch": 0.8846445031810756, "grad_norm": 0.40797296166419983, "learning_rate": 1.034545089537916e-06, "loss": 0.053, "step": 49953 }, { "epoch": 0.884662212718104, "grad_norm": 0.5435373187065125, "learning_rate": 1.0342311294909108e-06, "loss": 0.0527, "step": 49954 }, { "epoch": 0.8846799222551325, "grad_norm": 0.7605680227279663, "learning_rate": 1.0339172153898724e-06, "loss": 0.0666, "step": 49955 }, { "epoch": 0.8846976317921609, "grad_norm": 0.21684668958187103, "learning_rate": 1.033603347235837e-06, "loss": 0.0405, "step": 49956 }, { "epoch": 0.8847153413291893, "grad_norm": 0.788844108581543, "learning_rate": 1.0332895250298318e-06, "loss": 0.08, "step": 49957 }, { "epoch": 0.8847330508662178, "grad_norm": 0.38220563530921936, "learning_rate": 1.0329757487728913e-06, "loss": 0.0495, "step": 49958 }, { "epoch": 0.8847507604032462, "grad_norm": 0.43804919719696045, "learning_rate": 1.0326620184660512e-06, "loss": 0.0575, "step": 49959 }, { "epoch": 0.8847684699402746, "grad_norm": 0.9436546564102173, "learning_rate": 1.0323483341103373e-06, "loss": 0.0669, "step": 49960 }, { "epoch": 0.884786179477303, "grad_norm": 0.5745161175727844, "learning_rate": 1.0320346957067855e-06, "loss": 0.0566, "step": 49961 }, { "epoch": 0.8848038890143315, "grad_norm": 0.40328797698020935, "learning_rate": 1.031721103256425e-06, "loss": 0.0441, "step": 49962 }, { "epoch": 0.8848215985513599, "grad_norm": 0.40732917189598083, "learning_rate": 1.0314075567602916e-06, "loss": 0.0553, "step": 49963 }, { "epoch": 0.8848393080883883, "grad_norm": 0.37679123878479004, "learning_rate": 1.0310940562194127e-06, "loss": 0.0548, "step": 49964 }, { "epoch": 0.8848570176254167, "grad_norm": 0.5810728669166565, "learning_rate": 1.0307806016348209e-06, "loss": 0.0684, "step": 49965 }, { "epoch": 0.8848747271624452, "grad_norm": 0.4836725890636444, "learning_rate": 1.0304671930075522e-06, "loss": 0.0516, "step": 49966 }, { "epoch": 0.8848924366994736, "grad_norm": 0.4771929979324341, "learning_rate": 1.0301538303386303e-06, "loss": 0.0531, "step": 49967 }, { "epoch": 0.884910146236502, "grad_norm": 0.9868581295013428, "learning_rate": 1.02984051362909e-06, "loss": 0.0516, "step": 49968 }, { "epoch": 0.8849278557735304, "grad_norm": 0.7141423225402832, "learning_rate": 1.02952724287996e-06, "loss": 0.0618, "step": 49969 }, { "epoch": 0.8849455653105589, "grad_norm": 0.7848959565162659, "learning_rate": 1.0292140180922765e-06, "loss": 0.0743, "step": 49970 }, { "epoch": 0.8849632748475873, "grad_norm": 1.1171000003814697, "learning_rate": 1.0289008392670618e-06, "loss": 0.0744, "step": 49971 }, { "epoch": 0.8849809843846157, "grad_norm": 0.8496453166007996, "learning_rate": 1.0285877064053517e-06, "loss": 0.0599, "step": 49972 }, { "epoch": 0.8849986939216442, "grad_norm": 0.5371045470237732, "learning_rate": 1.0282746195081737e-06, "loss": 0.0455, "step": 49973 }, { "epoch": 0.8850164034586726, "grad_norm": 0.5981389284133911, "learning_rate": 1.027961578576559e-06, "loss": 0.073, "step": 49974 }, { "epoch": 0.885034112995701, "grad_norm": 0.3177223205566406, "learning_rate": 1.027648583611538e-06, "loss": 0.0378, "step": 49975 }, { "epoch": 0.8850518225327294, "grad_norm": 0.5313366651535034, "learning_rate": 1.02733563461414e-06, "loss": 0.0639, "step": 49976 }, { "epoch": 0.8850695320697579, "grad_norm": 0.4724279046058655, "learning_rate": 1.0270227315853974e-06, "loss": 0.0602, "step": 49977 }, { "epoch": 0.8850872416067863, "grad_norm": 0.6115127801895142, "learning_rate": 1.0267098745263332e-06, "loss": 0.056, "step": 49978 }, { "epoch": 0.8851049511438147, "grad_norm": 0.8264868855476379, "learning_rate": 1.026397063437981e-06, "loss": 0.0801, "step": 49979 }, { "epoch": 0.8851226606808431, "grad_norm": 0.6498862504959106, "learning_rate": 1.026084298321372e-06, "loss": 0.062, "step": 49980 }, { "epoch": 0.8851403702178716, "grad_norm": 0.5976417660713196, "learning_rate": 1.0257715791775285e-06, "loss": 0.0686, "step": 49981 }, { "epoch": 0.8851580797549, "grad_norm": 0.4525923430919647, "learning_rate": 1.0254589060074848e-06, "loss": 0.0604, "step": 49982 }, { "epoch": 0.8851757892919284, "grad_norm": 0.580224871635437, "learning_rate": 1.0251462788122668e-06, "loss": 0.0408, "step": 49983 }, { "epoch": 0.8851934988289568, "grad_norm": 0.45761609077453613, "learning_rate": 1.0248336975929034e-06, "loss": 0.0784, "step": 49984 }, { "epoch": 0.8852112083659853, "grad_norm": 0.691609263420105, "learning_rate": 1.024521162350424e-06, "loss": 0.0502, "step": 49985 }, { "epoch": 0.8852289179030137, "grad_norm": 0.32502567768096924, "learning_rate": 1.024208673085858e-06, "loss": 0.032, "step": 49986 }, { "epoch": 0.8852466274400421, "grad_norm": 0.5899583697319031, "learning_rate": 1.0238962298002308e-06, "loss": 0.0397, "step": 49987 }, { "epoch": 0.8852643369770706, "grad_norm": 0.5439389944076538, "learning_rate": 1.0235838324945735e-06, "loss": 0.0442, "step": 49988 }, { "epoch": 0.885282046514099, "grad_norm": 0.46253839135169983, "learning_rate": 1.0232714811699102e-06, "loss": 0.0276, "step": 49989 }, { "epoch": 0.8852997560511274, "grad_norm": 0.31451836228370667, "learning_rate": 1.0229591758272683e-06, "loss": 0.051, "step": 49990 }, { "epoch": 0.8853174655881558, "grad_norm": 0.5679658651351929, "learning_rate": 1.0226469164676805e-06, "loss": 0.0626, "step": 49991 }, { "epoch": 0.8853351751251843, "grad_norm": 0.7640476226806641, "learning_rate": 1.022334703092166e-06, "loss": 0.0815, "step": 49992 }, { "epoch": 0.8853528846622127, "grad_norm": 0.8957228660583496, "learning_rate": 1.0220225357017588e-06, "loss": 0.0722, "step": 49993 }, { "epoch": 0.8853705941992411, "grad_norm": 0.43427005410194397, "learning_rate": 1.0217104142974847e-06, "loss": 0.0581, "step": 49994 }, { "epoch": 0.8853883037362695, "grad_norm": 0.45842334628105164, "learning_rate": 1.02139833888037e-06, "loss": 0.0542, "step": 49995 }, { "epoch": 0.885406013273298, "grad_norm": 0.37493181228637695, "learning_rate": 1.02108630945144e-06, "loss": 0.0492, "step": 49996 }, { "epoch": 0.8854237228103264, "grad_norm": 0.5807842016220093, "learning_rate": 1.020774326011721e-06, "loss": 0.0465, "step": 49997 }, { "epoch": 0.8854414323473548, "grad_norm": 0.4056331217288971, "learning_rate": 1.020462388562245e-06, "loss": 0.0788, "step": 49998 }, { "epoch": 0.8854591418843832, "grad_norm": 0.5873964428901672, "learning_rate": 1.02015049710403e-06, "loss": 0.0397, "step": 49999 }, { "epoch": 0.8854768514214117, "grad_norm": 0.4110702872276306, "learning_rate": 1.0198386516381047e-06, "loss": 0.0451, "step": 50000 }, { "epoch": 0.8854945609584401, "grad_norm": 0.4903040826320648, "learning_rate": 1.019526852165497e-06, "loss": 0.0605, "step": 50001 }, { "epoch": 0.8855122704954685, "grad_norm": 0.4310120642185211, "learning_rate": 1.0192150986872328e-06, "loss": 0.0363, "step": 50002 }, { "epoch": 0.885529980032497, "grad_norm": 0.39566347002983093, "learning_rate": 1.0189033912043361e-06, "loss": 0.0591, "step": 50003 }, { "epoch": 0.8855476895695255, "grad_norm": 0.8780154585838318, "learning_rate": 1.0185917297178344e-06, "loss": 0.0874, "step": 50004 }, { "epoch": 0.8855653991065539, "grad_norm": 0.3302345275878906, "learning_rate": 1.0182801142287518e-06, "loss": 0.0554, "step": 50005 }, { "epoch": 0.8855831086435823, "grad_norm": 0.6458871364593506, "learning_rate": 1.0179685447381126e-06, "loss": 0.0554, "step": 50006 }, { "epoch": 0.8856008181806108, "grad_norm": 0.5608035922050476, "learning_rate": 1.0176570212469428e-06, "loss": 0.0595, "step": 50007 }, { "epoch": 0.8856185277176392, "grad_norm": 0.5511586666107178, "learning_rate": 1.0173455437562678e-06, "loss": 0.0643, "step": 50008 }, { "epoch": 0.8856362372546676, "grad_norm": 0.5301934480667114, "learning_rate": 1.0170341122671123e-06, "loss": 0.0585, "step": 50009 }, { "epoch": 0.885653946791696, "grad_norm": 0.544284462928772, "learning_rate": 1.0167227267804985e-06, "loss": 0.0313, "step": 50010 }, { "epoch": 0.8856716563287245, "grad_norm": 0.8637276887893677, "learning_rate": 1.016411387297454e-06, "loss": 0.0589, "step": 50011 }, { "epoch": 0.8856893658657529, "grad_norm": 0.46999838948249817, "learning_rate": 1.0161000938189997e-06, "loss": 0.0465, "step": 50012 }, { "epoch": 0.8857070754027813, "grad_norm": 0.6808909177780151, "learning_rate": 1.015788846346163e-06, "loss": 0.0491, "step": 50013 }, { "epoch": 0.8857247849398097, "grad_norm": 0.3881435692310333, "learning_rate": 1.0154776448799663e-06, "loss": 0.073, "step": 50014 }, { "epoch": 0.8857424944768382, "grad_norm": 0.8125080466270447, "learning_rate": 1.0151664894214342e-06, "loss": 0.0421, "step": 50015 }, { "epoch": 0.8857602040138666, "grad_norm": 0.5822039842605591, "learning_rate": 1.0148553799715905e-06, "loss": 0.0641, "step": 50016 }, { "epoch": 0.885777913550895, "grad_norm": 0.47323426604270935, "learning_rate": 1.0145443165314577e-06, "loss": 0.0553, "step": 50017 }, { "epoch": 0.8857956230879235, "grad_norm": 0.3746430277824402, "learning_rate": 1.0142332991020587e-06, "loss": 0.0281, "step": 50018 }, { "epoch": 0.8858133326249519, "grad_norm": 0.4471239447593689, "learning_rate": 1.0139223276844189e-06, "loss": 0.0438, "step": 50019 }, { "epoch": 0.8858310421619803, "grad_norm": 0.3876367509365082, "learning_rate": 1.0136114022795596e-06, "loss": 0.0451, "step": 50020 }, { "epoch": 0.8858487516990087, "grad_norm": 0.3794209957122803, "learning_rate": 1.0133005228884995e-06, "loss": 0.0403, "step": 50021 }, { "epoch": 0.8858664612360372, "grad_norm": 0.6003392934799194, "learning_rate": 1.0129896895122697e-06, "loss": 0.0753, "step": 50022 }, { "epoch": 0.8858841707730656, "grad_norm": 0.7037714123725891, "learning_rate": 1.0126789021518929e-06, "loss": 0.0498, "step": 50023 }, { "epoch": 0.885901880310094, "grad_norm": 1.0948739051818848, "learning_rate": 1.012368160808383e-06, "loss": 0.072, "step": 50024 }, { "epoch": 0.8859195898471224, "grad_norm": 0.8680991530418396, "learning_rate": 1.0120574654827692e-06, "loss": 0.0728, "step": 50025 }, { "epoch": 0.8859372993841509, "grad_norm": 0.6797734498977661, "learning_rate": 1.011746816176069e-06, "loss": 0.0582, "step": 50026 }, { "epoch": 0.8859550089211793, "grad_norm": 0.584553599357605, "learning_rate": 1.0114362128893118e-06, "loss": 0.051, "step": 50027 }, { "epoch": 0.8859727184582077, "grad_norm": 0.6632318496704102, "learning_rate": 1.0111256556235115e-06, "loss": 0.0436, "step": 50028 }, { "epoch": 0.8859904279952361, "grad_norm": 0.48686978220939636, "learning_rate": 1.0108151443796925e-06, "loss": 0.0515, "step": 50029 }, { "epoch": 0.8860081375322646, "grad_norm": 1.0072311162948608, "learning_rate": 1.0105046791588757e-06, "loss": 0.0817, "step": 50030 }, { "epoch": 0.886025847069293, "grad_norm": 0.4754481017589569, "learning_rate": 1.0101942599620851e-06, "loss": 0.08, "step": 50031 }, { "epoch": 0.8860435566063214, "grad_norm": 0.6655736565589905, "learning_rate": 1.0098838867903398e-06, "loss": 0.0518, "step": 50032 }, { "epoch": 0.8860612661433499, "grad_norm": 0.7171275019645691, "learning_rate": 1.0095735596446625e-06, "loss": 0.0746, "step": 50033 }, { "epoch": 0.8860789756803783, "grad_norm": 0.6116518378257751, "learning_rate": 1.0092632785260742e-06, "loss": 0.0671, "step": 50034 }, { "epoch": 0.8860966852174067, "grad_norm": 0.7015330791473389, "learning_rate": 1.0089530434355937e-06, "loss": 0.0604, "step": 50035 }, { "epoch": 0.8861143947544351, "grad_norm": 0.5748511552810669, "learning_rate": 1.0086428543742403e-06, "loss": 0.0619, "step": 50036 }, { "epoch": 0.8861321042914636, "grad_norm": 0.6440737843513489, "learning_rate": 1.008332711343042e-06, "loss": 0.0657, "step": 50037 }, { "epoch": 0.886149813828492, "grad_norm": 0.4096154570579529, "learning_rate": 1.0080226143430087e-06, "loss": 0.0429, "step": 50038 }, { "epoch": 0.8861675233655204, "grad_norm": 0.33750540018081665, "learning_rate": 1.0077125633751672e-06, "loss": 0.0314, "step": 50039 }, { "epoch": 0.8861852329025488, "grad_norm": 0.4909588098526001, "learning_rate": 1.0074025584405362e-06, "loss": 0.0569, "step": 50040 }, { "epoch": 0.8862029424395773, "grad_norm": 0.6089372038841248, "learning_rate": 1.0070925995401347e-06, "loss": 0.0665, "step": 50041 }, { "epoch": 0.8862206519766057, "grad_norm": 0.4461371600627899, "learning_rate": 1.0067826866749842e-06, "loss": 0.0604, "step": 50042 }, { "epoch": 0.8862383615136341, "grad_norm": 0.759141743183136, "learning_rate": 1.0064728198461031e-06, "loss": 0.0644, "step": 50043 }, { "epoch": 0.8862560710506625, "grad_norm": 0.5884547233581543, "learning_rate": 1.0061629990545145e-06, "loss": 0.0504, "step": 50044 }, { "epoch": 0.886273780587691, "grad_norm": 0.30260786414146423, "learning_rate": 1.0058532243012309e-06, "loss": 0.0538, "step": 50045 }, { "epoch": 0.8862914901247194, "grad_norm": 0.7301619052886963, "learning_rate": 1.0055434955872745e-06, "loss": 0.0683, "step": 50046 }, { "epoch": 0.8863091996617478, "grad_norm": 0.3273790776729584, "learning_rate": 1.0052338129136645e-06, "loss": 0.0582, "step": 50047 }, { "epoch": 0.8863269091987763, "grad_norm": 0.5309194922447205, "learning_rate": 1.0049241762814222e-06, "loss": 0.0557, "step": 50048 }, { "epoch": 0.8863446187358047, "grad_norm": 0.696383535861969, "learning_rate": 1.0046145856915613e-06, "loss": 0.0635, "step": 50049 }, { "epoch": 0.8863623282728331, "grad_norm": 0.7012754082679749, "learning_rate": 1.004305041145101e-06, "loss": 0.05, "step": 50050 }, { "epoch": 0.8863800378098615, "grad_norm": 0.29731863737106323, "learning_rate": 1.003995542643066e-06, "loss": 0.0445, "step": 50051 }, { "epoch": 0.88639774734689, "grad_norm": 0.6905436515808105, "learning_rate": 1.0036860901864681e-06, "loss": 0.0649, "step": 50052 }, { "epoch": 0.8864154568839184, "grad_norm": 0.3907373547554016, "learning_rate": 1.0033766837763269e-06, "loss": 0.0479, "step": 50053 }, { "epoch": 0.8864331664209468, "grad_norm": 0.5759936571121216, "learning_rate": 1.00306732341366e-06, "loss": 0.0411, "step": 50054 }, { "epoch": 0.8864508759579752, "grad_norm": 0.7226043343544006, "learning_rate": 1.0027580090994897e-06, "loss": 0.0734, "step": 50055 }, { "epoch": 0.8864685854950037, "grad_norm": 0.6798129677772522, "learning_rate": 1.0024487408348271e-06, "loss": 0.0671, "step": 50056 }, { "epoch": 0.8864862950320321, "grad_norm": 0.4157637357711792, "learning_rate": 1.002139518620691e-06, "loss": 0.0739, "step": 50057 }, { "epoch": 0.8865040045690605, "grad_norm": 0.4620645046234131, "learning_rate": 1.001830342458101e-06, "loss": 0.0467, "step": 50058 }, { "epoch": 0.8865217141060889, "grad_norm": 0.4546554684638977, "learning_rate": 1.0015212123480744e-06, "loss": 0.0535, "step": 50059 }, { "epoch": 0.8865394236431174, "grad_norm": 0.7642579078674316, "learning_rate": 1.0012121282916254e-06, "loss": 0.0554, "step": 50060 }, { "epoch": 0.8865571331801458, "grad_norm": 0.6189824342727661, "learning_rate": 1.0009030902897748e-06, "loss": 0.0437, "step": 50061 }, { "epoch": 0.8865748427171742, "grad_norm": 0.48459988832473755, "learning_rate": 1.0005940983435368e-06, "loss": 0.0436, "step": 50062 }, { "epoch": 0.8865925522542027, "grad_norm": 0.5821757316589355, "learning_rate": 1.0002851524539274e-06, "loss": 0.0334, "step": 50063 }, { "epoch": 0.8866102617912311, "grad_norm": 1.0401915311813354, "learning_rate": 9.999762526219642e-07, "loss": 0.0892, "step": 50064 }, { "epoch": 0.8866279713282595, "grad_norm": 0.2968810796737671, "learning_rate": 9.996673988486627e-07, "loss": 0.0413, "step": 50065 }, { "epoch": 0.8866456808652879, "grad_norm": 0.8434152007102966, "learning_rate": 9.993585911350422e-07, "loss": 0.0717, "step": 50066 }, { "epoch": 0.8866633904023165, "grad_norm": 0.7839731574058533, "learning_rate": 9.99049829482112e-07, "loss": 0.0544, "step": 50067 }, { "epoch": 0.8866810999393449, "grad_norm": 0.4171760678291321, "learning_rate": 9.98741113890893e-07, "loss": 0.0475, "step": 50068 }, { "epoch": 0.8866988094763733, "grad_norm": 0.27194976806640625, "learning_rate": 9.984324443623992e-07, "loss": 0.0384, "step": 50069 }, { "epoch": 0.8867165190134016, "grad_norm": 0.46387866139411926, "learning_rate": 9.981238208976468e-07, "loss": 0.0606, "step": 50070 }, { "epoch": 0.8867342285504302, "grad_norm": 1.0321881771087646, "learning_rate": 9.978152434976513e-07, "loss": 0.0495, "step": 50071 }, { "epoch": 0.8867519380874586, "grad_norm": 0.5586514472961426, "learning_rate": 9.975067121634267e-07, "loss": 0.0586, "step": 50072 }, { "epoch": 0.886769647624487, "grad_norm": 0.29739901423454285, "learning_rate": 9.971982268959911e-07, "loss": 0.0504, "step": 50073 }, { "epoch": 0.8867873571615154, "grad_norm": 0.620489239692688, "learning_rate": 9.968897876963552e-07, "loss": 0.0643, "step": 50074 }, { "epoch": 0.8868050666985439, "grad_norm": 0.5577613711357117, "learning_rate": 9.965813945655344e-07, "loss": 0.0662, "step": 50075 }, { "epoch": 0.8868227762355723, "grad_norm": 0.5331082344055176, "learning_rate": 9.962730475045467e-07, "loss": 0.031, "step": 50076 }, { "epoch": 0.8868404857726007, "grad_norm": 0.9177631735801697, "learning_rate": 9.959647465144028e-07, "loss": 0.054, "step": 50077 }, { "epoch": 0.8868581953096292, "grad_norm": 0.43680790066719055, "learning_rate": 9.956564915961186e-07, "loss": 0.0544, "step": 50078 }, { "epoch": 0.8868759048466576, "grad_norm": 0.6102064847946167, "learning_rate": 9.953482827507065e-07, "loss": 0.038, "step": 50079 }, { "epoch": 0.886893614383686, "grad_norm": 0.42951738834381104, "learning_rate": 9.95040119979186e-07, "loss": 0.0582, "step": 50080 }, { "epoch": 0.8869113239207144, "grad_norm": 0.48715898394584656, "learning_rate": 9.947320032825642e-07, "loss": 0.0627, "step": 50081 }, { "epoch": 0.8869290334577429, "grad_norm": 0.5705039501190186, "learning_rate": 9.94423932661857e-07, "loss": 0.0552, "step": 50082 }, { "epoch": 0.8869467429947713, "grad_norm": 0.538493812084198, "learning_rate": 9.941159081180824e-07, "loss": 0.0504, "step": 50083 }, { "epoch": 0.8869644525317997, "grad_norm": 0.8118436932563782, "learning_rate": 9.938079296522456e-07, "loss": 0.0572, "step": 50084 }, { "epoch": 0.8869821620688281, "grad_norm": 0.6375812292098999, "learning_rate": 9.934999972653663e-07, "loss": 0.047, "step": 50085 }, { "epoch": 0.8869998716058566, "grad_norm": 0.5193594694137573, "learning_rate": 9.931921109584536e-07, "loss": 0.0612, "step": 50086 }, { "epoch": 0.887017581142885, "grad_norm": 0.6563283205032349, "learning_rate": 9.928842707325249e-07, "loss": 0.067, "step": 50087 }, { "epoch": 0.8870352906799134, "grad_norm": 0.4874304533004761, "learning_rate": 9.925764765885859e-07, "loss": 0.0526, "step": 50088 }, { "epoch": 0.8870530002169418, "grad_norm": 0.46797072887420654, "learning_rate": 9.92268728527656e-07, "loss": 0.0644, "step": 50089 }, { "epoch": 0.8870707097539703, "grad_norm": 0.9787406325340271, "learning_rate": 9.919610265507445e-07, "loss": 0.0637, "step": 50090 }, { "epoch": 0.8870884192909987, "grad_norm": 0.6031145453453064, "learning_rate": 9.916533706588671e-07, "loss": 0.0621, "step": 50091 }, { "epoch": 0.8871061288280271, "grad_norm": 0.8300238847732544, "learning_rate": 9.913457608530313e-07, "loss": 0.0515, "step": 50092 }, { "epoch": 0.8871238383650556, "grad_norm": 0.7216576933860779, "learning_rate": 9.910381971342514e-07, "loss": 0.0631, "step": 50093 }, { "epoch": 0.887141547902084, "grad_norm": 0.4756604731082916, "learning_rate": 9.907306795035398e-07, "loss": 0.0384, "step": 50094 }, { "epoch": 0.8871592574391124, "grad_norm": 0.7718665599822998, "learning_rate": 9.904232079619057e-07, "loss": 0.0477, "step": 50095 }, { "epoch": 0.8871769669761408, "grad_norm": 0.9584224224090576, "learning_rate": 9.901157825103635e-07, "loss": 0.0716, "step": 50096 }, { "epoch": 0.8871946765131693, "grad_norm": 0.895283579826355, "learning_rate": 9.898084031499222e-07, "loss": 0.0673, "step": 50097 }, { "epoch": 0.8872123860501977, "grad_norm": 0.5017091035842896, "learning_rate": 9.895010698815927e-07, "loss": 0.0532, "step": 50098 }, { "epoch": 0.8872300955872261, "grad_norm": 0.5328173041343689, "learning_rate": 9.891937827063891e-07, "loss": 0.0422, "step": 50099 }, { "epoch": 0.8872478051242545, "grad_norm": 0.685131847858429, "learning_rate": 9.888865416253208e-07, "loss": 0.0497, "step": 50100 }, { "epoch": 0.887265514661283, "grad_norm": 0.32627010345458984, "learning_rate": 9.885793466394017e-07, "loss": 0.0395, "step": 50101 }, { "epoch": 0.8872832241983114, "grad_norm": 0.3157173693180084, "learning_rate": 9.882721977496362e-07, "loss": 0.0516, "step": 50102 }, { "epoch": 0.8873009337353398, "grad_norm": 0.5248382091522217, "learning_rate": 9.879650949570385e-07, "loss": 0.0484, "step": 50103 }, { "epoch": 0.8873186432723682, "grad_norm": 0.7154858708381653, "learning_rate": 9.876580382626176e-07, "loss": 0.0911, "step": 50104 }, { "epoch": 0.8873363528093967, "grad_norm": 0.4574335515499115, "learning_rate": 9.87351027667388e-07, "loss": 0.0592, "step": 50105 }, { "epoch": 0.8873540623464251, "grad_norm": 0.6212021112442017, "learning_rate": 9.87044063172352e-07, "loss": 0.062, "step": 50106 }, { "epoch": 0.8873717718834535, "grad_norm": 0.6293421387672424, "learning_rate": 9.867371447785256e-07, "loss": 0.0735, "step": 50107 }, { "epoch": 0.887389481420482, "grad_norm": 0.675055742263794, "learning_rate": 9.864302724869162e-07, "loss": 0.0523, "step": 50108 }, { "epoch": 0.8874071909575104, "grad_norm": 0.6263607144355774, "learning_rate": 9.86123446298533e-07, "loss": 0.0722, "step": 50109 }, { "epoch": 0.8874249004945388, "grad_norm": 0.8306805491447449, "learning_rate": 9.858166662143868e-07, "loss": 0.0931, "step": 50110 }, { "epoch": 0.8874426100315672, "grad_norm": 0.8907050490379333, "learning_rate": 9.855099322354855e-07, "loss": 0.0708, "step": 50111 }, { "epoch": 0.8874603195685957, "grad_norm": 0.5575915575027466, "learning_rate": 9.852032443628429e-07, "loss": 0.0561, "step": 50112 }, { "epoch": 0.8874780291056241, "grad_norm": 0.9556105136871338, "learning_rate": 9.848966025974615e-07, "loss": 0.0862, "step": 50113 }, { "epoch": 0.8874957386426525, "grad_norm": 0.4847451448440552, "learning_rate": 9.845900069403525e-07, "loss": 0.0465, "step": 50114 }, { "epoch": 0.8875134481796809, "grad_norm": 1.0441893339157104, "learning_rate": 9.842834573925264e-07, "loss": 0.0798, "step": 50115 }, { "epoch": 0.8875311577167094, "grad_norm": 0.3322250247001648, "learning_rate": 9.839769539549875e-07, "loss": 0.0575, "step": 50116 }, { "epoch": 0.8875488672537378, "grad_norm": 0.6362825036048889, "learning_rate": 9.836704966287453e-07, "loss": 0.0595, "step": 50117 }, { "epoch": 0.8875665767907662, "grad_norm": 0.23340940475463867, "learning_rate": 9.833640854148118e-07, "loss": 0.0347, "step": 50118 }, { "epoch": 0.8875842863277946, "grad_norm": 0.5407813787460327, "learning_rate": 9.83057720314195e-07, "loss": 0.0353, "step": 50119 }, { "epoch": 0.8876019958648231, "grad_norm": 0.2576479911804199, "learning_rate": 9.827514013278987e-07, "loss": 0.0462, "step": 50120 }, { "epoch": 0.8876197054018515, "grad_norm": 0.4748474657535553, "learning_rate": 9.824451284569325e-07, "loss": 0.0442, "step": 50121 }, { "epoch": 0.8876374149388799, "grad_norm": 0.22025923430919647, "learning_rate": 9.821389017023052e-07, "loss": 0.043, "step": 50122 }, { "epoch": 0.8876551244759084, "grad_norm": 0.6148548722267151, "learning_rate": 9.818327210650213e-07, "loss": 0.0687, "step": 50123 }, { "epoch": 0.8876728340129368, "grad_norm": 0.5735423564910889, "learning_rate": 9.815265865460916e-07, "loss": 0.042, "step": 50124 }, { "epoch": 0.8876905435499652, "grad_norm": 0.6936764717102051, "learning_rate": 9.812204981465183e-07, "loss": 0.0806, "step": 50125 }, { "epoch": 0.8877082530869936, "grad_norm": 0.6242585778236389, "learning_rate": 9.809144558673143e-07, "loss": 0.0656, "step": 50126 }, { "epoch": 0.8877259626240221, "grad_norm": 0.6019059419631958, "learning_rate": 9.806084597094823e-07, "loss": 0.0511, "step": 50127 }, { "epoch": 0.8877436721610505, "grad_norm": 0.6251385807991028, "learning_rate": 9.803025096740327e-07, "loss": 0.0745, "step": 50128 }, { "epoch": 0.8877613816980789, "grad_norm": 1.13723886013031, "learning_rate": 9.79996605761968e-07, "loss": 0.1029, "step": 50129 }, { "epoch": 0.8877790912351073, "grad_norm": 0.4830167591571808, "learning_rate": 9.796907479742979e-07, "loss": 0.0243, "step": 50130 }, { "epoch": 0.8877968007721359, "grad_norm": 0.3478016257286072, "learning_rate": 9.79384936312026e-07, "loss": 0.0304, "step": 50131 }, { "epoch": 0.8878145103091643, "grad_norm": 0.5552809834480286, "learning_rate": 9.790791707761603e-07, "loss": 0.0578, "step": 50132 }, { "epoch": 0.8878322198461926, "grad_norm": 0.6668935418128967, "learning_rate": 9.787734513677065e-07, "loss": 0.0499, "step": 50133 }, { "epoch": 0.887849929383221, "grad_norm": 0.3284471929073334, "learning_rate": 9.784677780876688e-07, "loss": 0.0504, "step": 50134 }, { "epoch": 0.8878676389202496, "grad_norm": 0.8730542063713074, "learning_rate": 9.781621509370547e-07, "loss": 0.0772, "step": 50135 }, { "epoch": 0.887885348457278, "grad_norm": 0.5810010433197021, "learning_rate": 9.778565699168669e-07, "loss": 0.0427, "step": 50136 }, { "epoch": 0.8879030579943064, "grad_norm": 0.4566809833049774, "learning_rate": 9.775510350281142e-07, "loss": 0.0465, "step": 50137 }, { "epoch": 0.8879207675313349, "grad_norm": 0.3012281358242035, "learning_rate": 9.772455462717993e-07, "loss": 0.0466, "step": 50138 }, { "epoch": 0.8879384770683633, "grad_norm": 0.1998043805360794, "learning_rate": 9.7694010364893e-07, "loss": 0.0501, "step": 50139 }, { "epoch": 0.8879561866053917, "grad_norm": 0.45722827315330505, "learning_rate": 9.766347071605103e-07, "loss": 0.041, "step": 50140 }, { "epoch": 0.8879738961424201, "grad_norm": 0.5839574337005615, "learning_rate": 9.76329356807541e-07, "loss": 0.072, "step": 50141 }, { "epoch": 0.8879916056794486, "grad_norm": 0.7329775094985962, "learning_rate": 9.760240525910313e-07, "loss": 0.0603, "step": 50142 }, { "epoch": 0.888009315216477, "grad_norm": 0.44177258014678955, "learning_rate": 9.757187945119838e-07, "loss": 0.045, "step": 50143 }, { "epoch": 0.8880270247535054, "grad_norm": 0.3653703033924103, "learning_rate": 9.754135825714043e-07, "loss": 0.0421, "step": 50144 }, { "epoch": 0.8880447342905338, "grad_norm": 0.5438298583030701, "learning_rate": 9.75108416770294e-07, "loss": 0.0566, "step": 50145 }, { "epoch": 0.8880624438275623, "grad_norm": 0.48064517974853516, "learning_rate": 9.748032971096566e-07, "loss": 0.0406, "step": 50146 }, { "epoch": 0.8880801533645907, "grad_norm": 0.7205355167388916, "learning_rate": 9.744982235905014e-07, "loss": 0.0621, "step": 50147 }, { "epoch": 0.8880978629016191, "grad_norm": 0.5243480801582336, "learning_rate": 9.741931962138263e-07, "loss": 0.051, "step": 50148 }, { "epoch": 0.8881155724386475, "grad_norm": 0.5613244771957397, "learning_rate": 9.738882149806366e-07, "loss": 0.0339, "step": 50149 }, { "epoch": 0.888133281975676, "grad_norm": 0.40732625126838684, "learning_rate": 9.73583279891937e-07, "loss": 0.0494, "step": 50150 }, { "epoch": 0.8881509915127044, "grad_norm": 0.6734560132026672, "learning_rate": 9.732783909487313e-07, "loss": 0.0482, "step": 50151 }, { "epoch": 0.8881687010497328, "grad_norm": 0.7006860375404358, "learning_rate": 9.729735481520191e-07, "loss": 0.068, "step": 50152 }, { "epoch": 0.8881864105867613, "grad_norm": 0.6376392245292664, "learning_rate": 9.72668751502806e-07, "loss": 0.0443, "step": 50153 }, { "epoch": 0.8882041201237897, "grad_norm": 0.7026619911193848, "learning_rate": 9.72364001002093e-07, "loss": 0.0489, "step": 50154 }, { "epoch": 0.8882218296608181, "grad_norm": 0.7106753587722778, "learning_rate": 9.72059296650884e-07, "loss": 0.0456, "step": 50155 }, { "epoch": 0.8882395391978465, "grad_norm": 0.4879179000854492, "learning_rate": 9.717546384501802e-07, "loss": 0.0591, "step": 50156 }, { "epoch": 0.888257248734875, "grad_norm": 0.3061990439891815, "learning_rate": 9.714500264009857e-07, "loss": 0.0342, "step": 50157 }, { "epoch": 0.8882749582719034, "grad_norm": 0.19949451088905334, "learning_rate": 9.71145460504303e-07, "loss": 0.0505, "step": 50158 }, { "epoch": 0.8882926678089318, "grad_norm": 0.813495397567749, "learning_rate": 9.708409407611313e-07, "loss": 0.0914, "step": 50159 }, { "epoch": 0.8883103773459602, "grad_norm": 0.5462912321090698, "learning_rate": 9.705364671724749e-07, "loss": 0.0468, "step": 50160 }, { "epoch": 0.8883280868829887, "grad_norm": 0.3681747019290924, "learning_rate": 9.702320397393328e-07, "loss": 0.0496, "step": 50161 }, { "epoch": 0.8883457964200171, "grad_norm": 0.6390644907951355, "learning_rate": 9.69927658462711e-07, "loss": 0.0528, "step": 50162 }, { "epoch": 0.8883635059570455, "grad_norm": 0.23671188950538635, "learning_rate": 9.69623323343607e-07, "loss": 0.0342, "step": 50163 }, { "epoch": 0.8883812154940739, "grad_norm": 0.3237416744232178, "learning_rate": 9.693190343830215e-07, "loss": 0.0386, "step": 50164 }, { "epoch": 0.8883989250311024, "grad_norm": 0.3082233667373657, "learning_rate": 9.690147915819575e-07, "loss": 0.0351, "step": 50165 }, { "epoch": 0.8884166345681308, "grad_norm": 0.43248242139816284, "learning_rate": 9.68710594941417e-07, "loss": 0.0387, "step": 50166 }, { "epoch": 0.8884343441051592, "grad_norm": 0.5471786856651306, "learning_rate": 9.68406444462398e-07, "loss": 0.0601, "step": 50167 }, { "epoch": 0.8884520536421877, "grad_norm": 0.4426664113998413, "learning_rate": 9.681023401459028e-07, "loss": 0.0543, "step": 50168 }, { "epoch": 0.8884697631792161, "grad_norm": 0.448615700006485, "learning_rate": 9.677982819929353e-07, "loss": 0.049, "step": 50169 }, { "epoch": 0.8884874727162445, "grad_norm": 0.5947856903076172, "learning_rate": 9.674942700044887e-07, "loss": 0.0626, "step": 50170 }, { "epoch": 0.8885051822532729, "grad_norm": 0.6222347617149353, "learning_rate": 9.671903041815666e-07, "loss": 0.0478, "step": 50171 }, { "epoch": 0.8885228917903014, "grad_norm": 0.5638276934623718, "learning_rate": 9.668863845251718e-07, "loss": 0.0431, "step": 50172 }, { "epoch": 0.8885406013273298, "grad_norm": 0.6304716467857361, "learning_rate": 9.665825110362985e-07, "loss": 0.0297, "step": 50173 }, { "epoch": 0.8885583108643582, "grad_norm": 0.5006122589111328, "learning_rate": 9.66278683715951e-07, "loss": 0.0294, "step": 50174 }, { "epoch": 0.8885760204013866, "grad_norm": 0.42945921421051025, "learning_rate": 9.659749025651232e-07, "loss": 0.0526, "step": 50175 }, { "epoch": 0.8885937299384151, "grad_norm": 0.5919875502586365, "learning_rate": 9.65671167584823e-07, "loss": 0.0683, "step": 50176 }, { "epoch": 0.8886114394754435, "grad_norm": 0.6989203095436096, "learning_rate": 9.65367478776044e-07, "loss": 0.0475, "step": 50177 }, { "epoch": 0.8886291490124719, "grad_norm": 0.29960790276527405, "learning_rate": 9.650638361397863e-07, "loss": 0.0477, "step": 50178 }, { "epoch": 0.8886468585495003, "grad_norm": 0.9175493121147156, "learning_rate": 9.647602396770499e-07, "loss": 0.0575, "step": 50179 }, { "epoch": 0.8886645680865288, "grad_norm": 0.5303193926811218, "learning_rate": 9.64456689388833e-07, "loss": 0.0593, "step": 50180 }, { "epoch": 0.8886822776235572, "grad_norm": 0.4881614148616791, "learning_rate": 9.641531852761327e-07, "loss": 0.0275, "step": 50181 }, { "epoch": 0.8886999871605856, "grad_norm": 0.4249412417411804, "learning_rate": 9.638497273399483e-07, "loss": 0.0514, "step": 50182 }, { "epoch": 0.8887176966976141, "grad_norm": 0.5440525412559509, "learning_rate": 9.63546315581279e-07, "loss": 0.0336, "step": 50183 }, { "epoch": 0.8887354062346425, "grad_norm": 0.7469373941421509, "learning_rate": 9.632429500011224e-07, "loss": 0.0558, "step": 50184 }, { "epoch": 0.8887531157716709, "grad_norm": 0.4267185926437378, "learning_rate": 9.629396306004762e-07, "loss": 0.0417, "step": 50185 }, { "epoch": 0.8887708253086993, "grad_norm": 0.5159078240394592, "learning_rate": 9.626363573803425e-07, "loss": 0.044, "step": 50186 }, { "epoch": 0.8887885348457278, "grad_norm": 0.48103851079940796, "learning_rate": 9.623331303417126e-07, "loss": 0.0556, "step": 50187 }, { "epoch": 0.8888062443827562, "grad_norm": 0.4943610429763794, "learning_rate": 9.620299494855855e-07, "loss": 0.0668, "step": 50188 }, { "epoch": 0.8888239539197846, "grad_norm": 0.29270803928375244, "learning_rate": 9.61726814812962e-07, "loss": 0.0476, "step": 50189 }, { "epoch": 0.888841663456813, "grad_norm": 0.6375848650932312, "learning_rate": 9.614237263248398e-07, "loss": 0.0626, "step": 50190 }, { "epoch": 0.8888593729938415, "grad_norm": 0.5083705186843872, "learning_rate": 9.611206840222097e-07, "loss": 0.0492, "step": 50191 }, { "epoch": 0.8888770825308699, "grad_norm": 0.7032061815261841, "learning_rate": 9.608176879060727e-07, "loss": 0.0418, "step": 50192 }, { "epoch": 0.8888947920678983, "grad_norm": 0.6301054954528809, "learning_rate": 9.605147379774277e-07, "loss": 0.052, "step": 50193 }, { "epoch": 0.8889125016049267, "grad_norm": 0.6181522011756897, "learning_rate": 9.602118342372674e-07, "loss": 0.0494, "step": 50194 }, { "epoch": 0.8889302111419553, "grad_norm": 0.7027011513710022, "learning_rate": 9.599089766865893e-07, "loss": 0.0711, "step": 50195 }, { "epoch": 0.8889479206789836, "grad_norm": 0.567672073841095, "learning_rate": 9.596061653263926e-07, "loss": 0.0475, "step": 50196 }, { "epoch": 0.888965630216012, "grad_norm": 0.7246601581573486, "learning_rate": 9.593034001576734e-07, "loss": 0.0588, "step": 50197 }, { "epoch": 0.8889833397530406, "grad_norm": 0.37894412875175476, "learning_rate": 9.590006811814223e-07, "loss": 0.0392, "step": 50198 }, { "epoch": 0.889001049290069, "grad_norm": 0.6290260553359985, "learning_rate": 9.586980083986402e-07, "loss": 0.0546, "step": 50199 }, { "epoch": 0.8890187588270974, "grad_norm": 0.6186981797218323, "learning_rate": 9.583953818103197e-07, "loss": 0.0818, "step": 50200 }, { "epoch": 0.8890364683641258, "grad_norm": 0.4273056089878082, "learning_rate": 9.580928014174617e-07, "loss": 0.0229, "step": 50201 }, { "epoch": 0.8890541779011543, "grad_norm": 0.8466678857803345, "learning_rate": 9.57790267221057e-07, "loss": 0.0613, "step": 50202 }, { "epoch": 0.8890718874381827, "grad_norm": 0.9905535578727722, "learning_rate": 9.574877792220998e-07, "loss": 0.0726, "step": 50203 }, { "epoch": 0.8890895969752111, "grad_norm": 0.429156631231308, "learning_rate": 9.571853374215895e-07, "loss": 0.0585, "step": 50204 }, { "epoch": 0.8891073065122395, "grad_norm": 0.32553374767303467, "learning_rate": 9.568829418205182e-07, "loss": 0.0502, "step": 50205 }, { "epoch": 0.889125016049268, "grad_norm": 0.3537599444389343, "learning_rate": 9.565805924198806e-07, "loss": 0.0669, "step": 50206 }, { "epoch": 0.8891427255862964, "grad_norm": 0.5442333221435547, "learning_rate": 9.562782892206739e-07, "loss": 0.0348, "step": 50207 }, { "epoch": 0.8891604351233248, "grad_norm": 0.3652356266975403, "learning_rate": 9.559760322238925e-07, "loss": 0.0284, "step": 50208 }, { "epoch": 0.8891781446603532, "grad_norm": 0.8733975887298584, "learning_rate": 9.556738214305289e-07, "loss": 0.0489, "step": 50209 }, { "epoch": 0.8891958541973817, "grad_norm": 0.47405916452407837, "learning_rate": 9.553716568415754e-07, "loss": 0.0489, "step": 50210 }, { "epoch": 0.8892135637344101, "grad_norm": 0.8356356620788574, "learning_rate": 9.550695384580333e-07, "loss": 0.072, "step": 50211 }, { "epoch": 0.8892312732714385, "grad_norm": 0.2725031077861786, "learning_rate": 9.547674662808864e-07, "loss": 0.0616, "step": 50212 }, { "epoch": 0.889248982808467, "grad_norm": 0.8792518973350525, "learning_rate": 9.544654403111359e-07, "loss": 0.0519, "step": 50213 }, { "epoch": 0.8892666923454954, "grad_norm": 0.342183917760849, "learning_rate": 9.541634605497724e-07, "loss": 0.0473, "step": 50214 }, { "epoch": 0.8892844018825238, "grad_norm": 0.66795814037323, "learning_rate": 9.538615269977951e-07, "loss": 0.0689, "step": 50215 }, { "epoch": 0.8893021114195522, "grad_norm": 0.821148693561554, "learning_rate": 9.535596396561886e-07, "loss": 0.057, "step": 50216 }, { "epoch": 0.8893198209565807, "grad_norm": 0.4041442275047302, "learning_rate": 9.532577985259516e-07, "loss": 0.0364, "step": 50217 }, { "epoch": 0.8893375304936091, "grad_norm": 0.4160194993019104, "learning_rate": 9.52956003608077e-07, "loss": 0.0346, "step": 50218 }, { "epoch": 0.8893552400306375, "grad_norm": 0.5800045132637024, "learning_rate": 9.526542549035538e-07, "loss": 0.0465, "step": 50219 }, { "epoch": 0.8893729495676659, "grad_norm": 0.5825235843658447, "learning_rate": 9.52352552413378e-07, "loss": 0.0729, "step": 50220 }, { "epoch": 0.8893906591046944, "grad_norm": 0.5279359221458435, "learning_rate": 9.520508961385404e-07, "loss": 0.0417, "step": 50221 }, { "epoch": 0.8894083686417228, "grad_norm": 0.5524141788482666, "learning_rate": 9.51749286080037e-07, "loss": 0.0518, "step": 50222 }, { "epoch": 0.8894260781787512, "grad_norm": 0.5479284524917603, "learning_rate": 9.514477222388551e-07, "loss": 0.0409, "step": 50223 }, { "epoch": 0.8894437877157796, "grad_norm": 0.2535538673400879, "learning_rate": 9.511462046159908e-07, "loss": 0.0652, "step": 50224 }, { "epoch": 0.8894614972528081, "grad_norm": 0.5635246634483337, "learning_rate": 9.508447332124348e-07, "loss": 0.0509, "step": 50225 }, { "epoch": 0.8894792067898365, "grad_norm": 0.464900940656662, "learning_rate": 9.505433080291781e-07, "loss": 0.0302, "step": 50226 }, { "epoch": 0.8894969163268649, "grad_norm": 0.6411691904067993, "learning_rate": 9.502419290672132e-07, "loss": 0.0564, "step": 50227 }, { "epoch": 0.8895146258638934, "grad_norm": 0.8111296892166138, "learning_rate": 9.499405963275293e-07, "loss": 0.0592, "step": 50228 }, { "epoch": 0.8895323354009218, "grad_norm": 0.7546815872192383, "learning_rate": 9.496393098111239e-07, "loss": 0.036, "step": 50229 }, { "epoch": 0.8895500449379502, "grad_norm": 0.7306285500526428, "learning_rate": 9.493380695189813e-07, "loss": 0.0603, "step": 50230 }, { "epoch": 0.8895677544749786, "grad_norm": 0.32746031880378723, "learning_rate": 9.490368754520957e-07, "loss": 0.0555, "step": 50231 }, { "epoch": 0.8895854640120071, "grad_norm": 0.7038719654083252, "learning_rate": 9.487357276114561e-07, "loss": 0.0588, "step": 50232 }, { "epoch": 0.8896031735490355, "grad_norm": 0.4302736222743988, "learning_rate": 9.484346259980569e-07, "loss": 0.065, "step": 50233 }, { "epoch": 0.8896208830860639, "grad_norm": 0.7805755138397217, "learning_rate": 9.481335706128841e-07, "loss": 0.0467, "step": 50234 }, { "epoch": 0.8896385926230923, "grad_norm": 0.8282676339149475, "learning_rate": 9.478325614569333e-07, "loss": 0.0556, "step": 50235 }, { "epoch": 0.8896563021601208, "grad_norm": 0.9207985997200012, "learning_rate": 9.475315985311922e-07, "loss": 0.0605, "step": 50236 }, { "epoch": 0.8896740116971492, "grad_norm": 0.6100299954414368, "learning_rate": 9.472306818366499e-07, "loss": 0.0538, "step": 50237 }, { "epoch": 0.8896917212341776, "grad_norm": 0.6106241941452026, "learning_rate": 9.469298113742958e-07, "loss": 0.0561, "step": 50238 }, { "epoch": 0.889709430771206, "grad_norm": 0.5495577454566956, "learning_rate": 9.466289871451222e-07, "loss": 0.0532, "step": 50239 }, { "epoch": 0.8897271403082345, "grad_norm": 0.35322627425193787, "learning_rate": 9.463282091501202e-07, "loss": 0.0507, "step": 50240 }, { "epoch": 0.8897448498452629, "grad_norm": 0.5538114905357361, "learning_rate": 9.460274773902722e-07, "loss": 0.0479, "step": 50241 }, { "epoch": 0.8897625593822913, "grad_norm": 0.6042522192001343, "learning_rate": 9.457267918665741e-07, "loss": 0.0476, "step": 50242 }, { "epoch": 0.8897802689193198, "grad_norm": 0.13418377935886383, "learning_rate": 9.454261525800167e-07, "loss": 0.0422, "step": 50243 }, { "epoch": 0.8897979784563482, "grad_norm": 0.41577205061912537, "learning_rate": 9.451255595315828e-07, "loss": 0.0991, "step": 50244 }, { "epoch": 0.8898156879933766, "grad_norm": 0.9458264112472534, "learning_rate": 9.448250127222646e-07, "loss": 0.0637, "step": 50245 }, { "epoch": 0.889833397530405, "grad_norm": 0.5850231647491455, "learning_rate": 9.4452451215305e-07, "loss": 0.1018, "step": 50246 }, { "epoch": 0.8898511070674335, "grad_norm": 0.45671457052230835, "learning_rate": 9.442240578249311e-07, "loss": 0.0453, "step": 50247 }, { "epoch": 0.8898688166044619, "grad_norm": 0.7924697399139404, "learning_rate": 9.439236497388909e-07, "loss": 0.0497, "step": 50248 }, { "epoch": 0.8898865261414903, "grad_norm": 0.39119666814804077, "learning_rate": 9.4362328789592e-07, "loss": 0.0361, "step": 50249 }, { "epoch": 0.8899042356785187, "grad_norm": 0.4896949231624603, "learning_rate": 9.43322972297006e-07, "loss": 0.0417, "step": 50250 }, { "epoch": 0.8899219452155472, "grad_norm": 0.9203076362609863, "learning_rate": 9.430227029431381e-07, "loss": 0.0848, "step": 50251 }, { "epoch": 0.8899396547525756, "grad_norm": 0.7915844917297363, "learning_rate": 9.427224798353057e-07, "loss": 0.0824, "step": 50252 }, { "epoch": 0.889957364289604, "grad_norm": 0.7431432604789734, "learning_rate": 9.424223029744927e-07, "loss": 0.0591, "step": 50253 }, { "epoch": 0.8899750738266324, "grad_norm": 0.5160745978355408, "learning_rate": 9.421221723616902e-07, "loss": 0.0336, "step": 50254 }, { "epoch": 0.8899927833636609, "grad_norm": 0.6359794735908508, "learning_rate": 9.418220879978823e-07, "loss": 0.0575, "step": 50255 }, { "epoch": 0.8900104929006893, "grad_norm": 0.2718244194984436, "learning_rate": 9.415220498840582e-07, "loss": 0.0328, "step": 50256 }, { "epoch": 0.8900282024377177, "grad_norm": 0.6055680513381958, "learning_rate": 9.412220580212072e-07, "loss": 0.0617, "step": 50257 }, { "epoch": 0.8900459119747463, "grad_norm": 0.39211028814315796, "learning_rate": 9.409221124103101e-07, "loss": 0.0608, "step": 50258 }, { "epoch": 0.8900636215117746, "grad_norm": 0.5507662296295166, "learning_rate": 9.406222130523578e-07, "loss": 0.0512, "step": 50259 }, { "epoch": 0.890081331048803, "grad_norm": 0.47617438435554504, "learning_rate": 9.403223599483363e-07, "loss": 0.0694, "step": 50260 }, { "epoch": 0.8900990405858314, "grad_norm": 0.41123247146606445, "learning_rate": 9.400225530992329e-07, "loss": 0.0353, "step": 50261 }, { "epoch": 0.89011675012286, "grad_norm": 0.70555579662323, "learning_rate": 9.397227925060336e-07, "loss": 0.0535, "step": 50262 }, { "epoch": 0.8901344596598884, "grad_norm": 0.6537371873855591, "learning_rate": 9.394230781697228e-07, "loss": 0.0653, "step": 50263 }, { "epoch": 0.8901521691969168, "grad_norm": 0.6428654789924622, "learning_rate": 9.391234100912876e-07, "loss": 0.0688, "step": 50264 }, { "epoch": 0.8901698787339452, "grad_norm": 0.6707932353019714, "learning_rate": 9.388237882717177e-07, "loss": 0.0676, "step": 50265 }, { "epoch": 0.8901875882709737, "grad_norm": 0.6805309653282166, "learning_rate": 9.385242127119936e-07, "loss": 0.0799, "step": 50266 }, { "epoch": 0.8902052978080021, "grad_norm": 0.6828919649124146, "learning_rate": 9.382246834131014e-07, "loss": 0.041, "step": 50267 }, { "epoch": 0.8902230073450305, "grad_norm": 0.7587502002716064, "learning_rate": 9.379252003760302e-07, "loss": 0.0554, "step": 50268 }, { "epoch": 0.8902407168820589, "grad_norm": 0.4182719886302948, "learning_rate": 9.37625763601761e-07, "loss": 0.04, "step": 50269 }, { "epoch": 0.8902584264190874, "grad_norm": 0.5080432891845703, "learning_rate": 9.373263730912779e-07, "loss": 0.0513, "step": 50270 }, { "epoch": 0.8902761359561158, "grad_norm": 1.0461002588272095, "learning_rate": 9.370270288455718e-07, "loss": 0.0779, "step": 50271 }, { "epoch": 0.8902938454931442, "grad_norm": 0.9052455425262451, "learning_rate": 9.367277308656253e-07, "loss": 0.0802, "step": 50272 }, { "epoch": 0.8903115550301727, "grad_norm": 0.9014807343482971, "learning_rate": 9.364284791524208e-07, "loss": 0.0713, "step": 50273 }, { "epoch": 0.8903292645672011, "grad_norm": 0.47021597623825073, "learning_rate": 9.361292737069443e-07, "loss": 0.0572, "step": 50274 }, { "epoch": 0.8903469741042295, "grad_norm": 0.22866810858249664, "learning_rate": 9.358301145301817e-07, "loss": 0.0362, "step": 50275 }, { "epoch": 0.8903646836412579, "grad_norm": 0.38064172863960266, "learning_rate": 9.355310016231139e-07, "loss": 0.0574, "step": 50276 }, { "epoch": 0.8903823931782864, "grad_norm": 0.6007917523384094, "learning_rate": 9.35231934986725e-07, "loss": 0.0577, "step": 50277 }, { "epoch": 0.8904001027153148, "grad_norm": 0.7780700325965881, "learning_rate": 9.349329146220026e-07, "loss": 0.0699, "step": 50278 }, { "epoch": 0.8904178122523432, "grad_norm": 0.5172293782234192, "learning_rate": 9.346339405299275e-07, "loss": 0.0568, "step": 50279 }, { "epoch": 0.8904355217893716, "grad_norm": 0.4818967878818512, "learning_rate": 9.343350127114824e-07, "loss": 0.0536, "step": 50280 }, { "epoch": 0.8904532313264001, "grad_norm": 0.4967963397502899, "learning_rate": 9.340361311676549e-07, "loss": 0.0591, "step": 50281 }, { "epoch": 0.8904709408634285, "grad_norm": 0.4304922819137573, "learning_rate": 9.337372958994273e-07, "loss": 0.0479, "step": 50282 }, { "epoch": 0.8904886504004569, "grad_norm": 0.8718982338905334, "learning_rate": 9.334385069077789e-07, "loss": 0.06, "step": 50283 }, { "epoch": 0.8905063599374853, "grad_norm": 0.413118839263916, "learning_rate": 9.331397641936956e-07, "loss": 0.0336, "step": 50284 }, { "epoch": 0.8905240694745138, "grad_norm": 0.8028171062469482, "learning_rate": 9.328410677581583e-07, "loss": 0.0597, "step": 50285 }, { "epoch": 0.8905417790115422, "grad_norm": 0.4545738697052002, "learning_rate": 9.325424176021547e-07, "loss": 0.0274, "step": 50286 }, { "epoch": 0.8905594885485706, "grad_norm": 0.4990881085395813, "learning_rate": 9.322438137266603e-07, "loss": 0.0593, "step": 50287 }, { "epoch": 0.8905771980855991, "grad_norm": 0.5286584496498108, "learning_rate": 9.319452561326613e-07, "loss": 0.081, "step": 50288 }, { "epoch": 0.8905949076226275, "grad_norm": 0.6785343289375305, "learning_rate": 9.316467448211402e-07, "loss": 0.0678, "step": 50289 }, { "epoch": 0.8906126171596559, "grad_norm": 0.5503495335578918, "learning_rate": 9.313482797930778e-07, "loss": 0.0663, "step": 50290 }, { "epoch": 0.8906303266966843, "grad_norm": 0.756527841091156, "learning_rate": 9.310498610494567e-07, "loss": 0.0619, "step": 50291 }, { "epoch": 0.8906480362337128, "grad_norm": 0.6923876404762268, "learning_rate": 9.307514885912593e-07, "loss": 0.054, "step": 50292 }, { "epoch": 0.8906657457707412, "grad_norm": 0.7567659020423889, "learning_rate": 9.304531624194685e-07, "loss": 0.0645, "step": 50293 }, { "epoch": 0.8906834553077696, "grad_norm": 0.5668583512306213, "learning_rate": 9.301548825350597e-07, "loss": 0.0508, "step": 50294 }, { "epoch": 0.890701164844798, "grad_norm": 0.40559619665145874, "learning_rate": 9.298566489390209e-07, "loss": 0.0309, "step": 50295 }, { "epoch": 0.8907188743818265, "grad_norm": 0.6298712491989136, "learning_rate": 9.29558461632331e-07, "loss": 0.025, "step": 50296 }, { "epoch": 0.8907365839188549, "grad_norm": 0.8105614185333252, "learning_rate": 9.292603206159694e-07, "loss": 0.0723, "step": 50297 }, { "epoch": 0.8907542934558833, "grad_norm": 0.39361968636512756, "learning_rate": 9.28962225890917e-07, "loss": 0.0505, "step": 50298 }, { "epoch": 0.8907720029929117, "grad_norm": 0.5900582075119019, "learning_rate": 9.286641774581545e-07, "loss": 0.0651, "step": 50299 }, { "epoch": 0.8907897125299402, "grad_norm": 0.4073393642902374, "learning_rate": 9.283661753186679e-07, "loss": 0.0552, "step": 50300 }, { "epoch": 0.8908074220669686, "grad_norm": 0.6178814768791199, "learning_rate": 9.280682194734314e-07, "loss": 0.0517, "step": 50301 }, { "epoch": 0.890825131603997, "grad_norm": 0.8081519603729248, "learning_rate": 9.277703099234258e-07, "loss": 0.0754, "step": 50302 }, { "epoch": 0.8908428411410255, "grad_norm": 0.5617513656616211, "learning_rate": 9.274724466696339e-07, "loss": 0.0703, "step": 50303 }, { "epoch": 0.8908605506780539, "grad_norm": 0.37878018617630005, "learning_rate": 9.271746297130362e-07, "loss": 0.0406, "step": 50304 }, { "epoch": 0.8908782602150823, "grad_norm": 0.5479361414909363, "learning_rate": 9.268768590546072e-07, "loss": 0.0391, "step": 50305 }, { "epoch": 0.8908959697521107, "grad_norm": 0.6759623885154724, "learning_rate": 9.265791346953312e-07, "loss": 0.0682, "step": 50306 }, { "epoch": 0.8909136792891392, "grad_norm": 0.41026824712753296, "learning_rate": 9.262814566361872e-07, "loss": 0.0555, "step": 50307 }, { "epoch": 0.8909313888261676, "grad_norm": 0.6887272596359253, "learning_rate": 9.259838248781527e-07, "loss": 0.0529, "step": 50308 }, { "epoch": 0.890949098363196, "grad_norm": 0.42864859104156494, "learning_rate": 9.256862394222071e-07, "loss": 0.0388, "step": 50309 }, { "epoch": 0.8909668079002244, "grad_norm": 0.2629956007003784, "learning_rate": 9.253887002693312e-07, "loss": 0.036, "step": 50310 }, { "epoch": 0.8909845174372529, "grad_norm": 0.599937379360199, "learning_rate": 9.250912074205059e-07, "loss": 0.0657, "step": 50311 }, { "epoch": 0.8910022269742813, "grad_norm": 0.4000749886035919, "learning_rate": 9.247937608767037e-07, "loss": 0.0468, "step": 50312 }, { "epoch": 0.8910199365113097, "grad_norm": 0.6199113130569458, "learning_rate": 9.244963606389057e-07, "loss": 0.0542, "step": 50313 }, { "epoch": 0.8910376460483381, "grad_norm": 0.5799655318260193, "learning_rate": 9.241990067080941e-07, "loss": 0.0542, "step": 50314 }, { "epoch": 0.8910553555853666, "grad_norm": 0.8874366879463196, "learning_rate": 9.239016990852417e-07, "loss": 0.0582, "step": 50315 }, { "epoch": 0.891073065122395, "grad_norm": 0.2777031660079956, "learning_rate": 9.236044377713293e-07, "loss": 0.051, "step": 50316 }, { "epoch": 0.8910907746594234, "grad_norm": 0.7261106371879578, "learning_rate": 9.233072227673345e-07, "loss": 0.0672, "step": 50317 }, { "epoch": 0.8911084841964519, "grad_norm": 1.3939054012298584, "learning_rate": 9.230100540742348e-07, "loss": 0.0629, "step": 50318 }, { "epoch": 0.8911261937334803, "grad_norm": 0.5012447834014893, "learning_rate": 9.227129316930094e-07, "loss": 0.062, "step": 50319 }, { "epoch": 0.8911439032705087, "grad_norm": 0.2215091586112976, "learning_rate": 9.224158556246326e-07, "loss": 0.0296, "step": 50320 }, { "epoch": 0.8911616128075371, "grad_norm": 0.2656104862689972, "learning_rate": 9.221188258700886e-07, "loss": 0.0235, "step": 50321 }, { "epoch": 0.8911793223445656, "grad_norm": 0.2358185201883316, "learning_rate": 9.218218424303449e-07, "loss": 0.0319, "step": 50322 }, { "epoch": 0.891197031881594, "grad_norm": 0.776646077632904, "learning_rate": 9.215249053063857e-07, "loss": 0.0841, "step": 50323 }, { "epoch": 0.8912147414186224, "grad_norm": 0.8082560300827026, "learning_rate": 9.212280144991836e-07, "loss": 0.0544, "step": 50324 }, { "epoch": 0.8912324509556508, "grad_norm": 0.3960466682910919, "learning_rate": 9.209311700097211e-07, "loss": 0.0456, "step": 50325 }, { "epoch": 0.8912501604926794, "grad_norm": 0.6195933818817139, "learning_rate": 9.206343718389676e-07, "loss": 0.0507, "step": 50326 }, { "epoch": 0.8912678700297078, "grad_norm": 0.345611035823822, "learning_rate": 9.203376199879038e-07, "loss": 0.049, "step": 50327 }, { "epoch": 0.8912855795667362, "grad_norm": 0.4679301381111145, "learning_rate": 9.200409144575056e-07, "loss": 0.0528, "step": 50328 }, { "epoch": 0.8913032891037646, "grad_norm": 0.4197074770927429, "learning_rate": 9.197442552487473e-07, "loss": 0.0308, "step": 50329 }, { "epoch": 0.8913209986407931, "grad_norm": 0.6307345032691956, "learning_rate": 9.194476423626064e-07, "loss": 0.0454, "step": 50330 }, { "epoch": 0.8913387081778215, "grad_norm": 0.3984479606151581, "learning_rate": 9.191510758000587e-07, "loss": 0.0679, "step": 50331 }, { "epoch": 0.8913564177148499, "grad_norm": 0.4059644341468811, "learning_rate": 9.188545555620837e-07, "loss": 0.0352, "step": 50332 }, { "epoch": 0.8913741272518784, "grad_norm": 0.416543185710907, "learning_rate": 9.185580816496486e-07, "loss": 0.0554, "step": 50333 }, { "epoch": 0.8913918367889068, "grad_norm": 0.6584420800209045, "learning_rate": 9.182616540637346e-07, "loss": 0.0485, "step": 50334 }, { "epoch": 0.8914095463259352, "grad_norm": 0.3274284303188324, "learning_rate": 9.179652728053156e-07, "loss": 0.0382, "step": 50335 }, { "epoch": 0.8914272558629636, "grad_norm": 0.6440805792808533, "learning_rate": 9.176689378753644e-07, "loss": 0.0437, "step": 50336 }, { "epoch": 0.8914449653999921, "grad_norm": 0.531849205493927, "learning_rate": 9.173726492748602e-07, "loss": 0.0566, "step": 50337 }, { "epoch": 0.8914626749370205, "grad_norm": 0.674877405166626, "learning_rate": 9.170764070047755e-07, "loss": 0.0371, "step": 50338 }, { "epoch": 0.8914803844740489, "grad_norm": 0.4579527974128723, "learning_rate": 9.167802110660861e-07, "loss": 0.0406, "step": 50339 }, { "epoch": 0.8914980940110773, "grad_norm": 0.29403939843177795, "learning_rate": 9.164840614597647e-07, "loss": 0.0396, "step": 50340 }, { "epoch": 0.8915158035481058, "grad_norm": 0.7206377387046814, "learning_rate": 9.161879581867838e-07, "loss": 0.0374, "step": 50341 }, { "epoch": 0.8915335130851342, "grad_norm": 0.3986363708972931, "learning_rate": 9.158919012481226e-07, "loss": 0.0536, "step": 50342 }, { "epoch": 0.8915512226221626, "grad_norm": 0.6294035315513611, "learning_rate": 9.155958906447537e-07, "loss": 0.0423, "step": 50343 }, { "epoch": 0.891568932159191, "grad_norm": 0.8694143295288086, "learning_rate": 9.152999263776479e-07, "loss": 0.0553, "step": 50344 }, { "epoch": 0.8915866416962195, "grad_norm": 0.8009870052337646, "learning_rate": 9.150040084477796e-07, "loss": 0.0532, "step": 50345 }, { "epoch": 0.8916043512332479, "grad_norm": 0.5749913454055786, "learning_rate": 9.147081368561244e-07, "loss": 0.0898, "step": 50346 }, { "epoch": 0.8916220607702763, "grad_norm": 0.5097610354423523, "learning_rate": 9.144123116036551e-07, "loss": 0.0602, "step": 50347 }, { "epoch": 0.8916397703073048, "grad_norm": 0.4026714265346527, "learning_rate": 9.141165326913442e-07, "loss": 0.0547, "step": 50348 }, { "epoch": 0.8916574798443332, "grad_norm": 0.44059836864471436, "learning_rate": 9.138208001201642e-07, "loss": 0.0491, "step": 50349 }, { "epoch": 0.8916751893813616, "grad_norm": 0.5599672198295593, "learning_rate": 9.135251138910927e-07, "loss": 0.0494, "step": 50350 }, { "epoch": 0.89169289891839, "grad_norm": 0.5634294152259827, "learning_rate": 9.132294740050956e-07, "loss": 0.055, "step": 50351 }, { "epoch": 0.8917106084554185, "grad_norm": 0.5274606943130493, "learning_rate": 9.129338804631504e-07, "loss": 0.046, "step": 50352 }, { "epoch": 0.8917283179924469, "grad_norm": 0.6167584657669067, "learning_rate": 9.126383332662281e-07, "loss": 0.0477, "step": 50353 }, { "epoch": 0.8917460275294753, "grad_norm": 0.7297083139419556, "learning_rate": 9.123428324152994e-07, "loss": 0.0465, "step": 50354 }, { "epoch": 0.8917637370665037, "grad_norm": 0.5377959609031677, "learning_rate": 9.120473779113387e-07, "loss": 0.0434, "step": 50355 }, { "epoch": 0.8917814466035322, "grad_norm": 0.5743369460105896, "learning_rate": 9.117519697553167e-07, "loss": 0.0463, "step": 50356 }, { "epoch": 0.8917991561405606, "grad_norm": 0.5430991053581238, "learning_rate": 9.114566079482062e-07, "loss": 0.0564, "step": 50357 }, { "epoch": 0.891816865677589, "grad_norm": 0.7466925978660583, "learning_rate": 9.111612924909779e-07, "loss": 0.0769, "step": 50358 }, { "epoch": 0.8918345752146174, "grad_norm": 0.5867378115653992, "learning_rate": 9.108660233846028e-07, "loss": 0.045, "step": 50359 }, { "epoch": 0.8918522847516459, "grad_norm": 0.34770938754081726, "learning_rate": 9.105708006300567e-07, "loss": 0.0316, "step": 50360 }, { "epoch": 0.8918699942886743, "grad_norm": 0.5524692535400391, "learning_rate": 9.102756242283055e-07, "loss": 0.048, "step": 50361 }, { "epoch": 0.8918877038257027, "grad_norm": 0.6581584215164185, "learning_rate": 9.099804941803236e-07, "loss": 0.05, "step": 50362 }, { "epoch": 0.8919054133627312, "grad_norm": 0.6870017647743225, "learning_rate": 9.096854104870783e-07, "loss": 0.0587, "step": 50363 }, { "epoch": 0.8919231228997596, "grad_norm": 0.5380659699440002, "learning_rate": 9.093903731495473e-07, "loss": 0.0673, "step": 50364 }, { "epoch": 0.891940832436788, "grad_norm": 0.5587170720100403, "learning_rate": 9.090953821686915e-07, "loss": 0.057, "step": 50365 }, { "epoch": 0.8919585419738164, "grad_norm": 0.5540071725845337, "learning_rate": 9.088004375454884e-07, "loss": 0.0322, "step": 50366 }, { "epoch": 0.8919762515108449, "grad_norm": 1.0085097551345825, "learning_rate": 9.085055392809072e-07, "loss": 0.0703, "step": 50367 }, { "epoch": 0.8919939610478733, "grad_norm": 0.8230128288269043, "learning_rate": 9.082106873759188e-07, "loss": 0.061, "step": 50368 }, { "epoch": 0.8920116705849017, "grad_norm": 0.43698403239250183, "learning_rate": 9.079158818314909e-07, "loss": 0.0432, "step": 50369 }, { "epoch": 0.8920293801219301, "grad_norm": 0.10576991736888885, "learning_rate": 9.076211226485942e-07, "loss": 0.0606, "step": 50370 }, { "epoch": 0.8920470896589586, "grad_norm": 0.510913074016571, "learning_rate": 9.073264098282013e-07, "loss": 0.0598, "step": 50371 }, { "epoch": 0.892064799195987, "grad_norm": 0.8709694743156433, "learning_rate": 9.070317433712766e-07, "loss": 0.0875, "step": 50372 }, { "epoch": 0.8920825087330154, "grad_norm": 0.3463559150695801, "learning_rate": 9.067371232787908e-07, "loss": 0.0705, "step": 50373 }, { "epoch": 0.8921002182700438, "grad_norm": 0.6924774646759033, "learning_rate": 9.064425495517148e-07, "loss": 0.058, "step": 50374 }, { "epoch": 0.8921179278070723, "grad_norm": 0.6738122701644897, "learning_rate": 9.061480221910179e-07, "loss": 0.0719, "step": 50375 }, { "epoch": 0.8921356373441007, "grad_norm": 0.5348681807518005, "learning_rate": 9.058535411976692e-07, "loss": 0.0466, "step": 50376 }, { "epoch": 0.8921533468811291, "grad_norm": 0.664051353931427, "learning_rate": 9.055591065726365e-07, "loss": 0.0519, "step": 50377 }, { "epoch": 0.8921710564181576, "grad_norm": 0.3364798128604889, "learning_rate": 9.052647183168905e-07, "loss": 0.0531, "step": 50378 }, { "epoch": 0.892188765955186, "grad_norm": 0.4045029878616333, "learning_rate": 9.049703764313955e-07, "loss": 0.027, "step": 50379 }, { "epoch": 0.8922064754922144, "grad_norm": 0.955943763256073, "learning_rate": 9.046760809171223e-07, "loss": 0.0963, "step": 50380 }, { "epoch": 0.8922241850292428, "grad_norm": 0.8544002175331116, "learning_rate": 9.043818317750402e-07, "loss": 0.044, "step": 50381 }, { "epoch": 0.8922418945662713, "grad_norm": 0.8082528710365295, "learning_rate": 9.040876290061167e-07, "loss": 0.0566, "step": 50382 }, { "epoch": 0.8922596041032997, "grad_norm": 0.5480738282203674, "learning_rate": 9.037934726113178e-07, "loss": 0.0628, "step": 50383 }, { "epoch": 0.8922773136403281, "grad_norm": 0.471151202917099, "learning_rate": 9.034993625916127e-07, "loss": 0.0558, "step": 50384 }, { "epoch": 0.8922950231773565, "grad_norm": 0.6998392343521118, "learning_rate": 9.032052989479705e-07, "loss": 0.0886, "step": 50385 }, { "epoch": 0.892312732714385, "grad_norm": 1.0129671096801758, "learning_rate": 9.029112816813556e-07, "loss": 0.0731, "step": 50386 }, { "epoch": 0.8923304422514134, "grad_norm": 0.3152214586734772, "learning_rate": 9.026173107927354e-07, "loss": 0.038, "step": 50387 }, { "epoch": 0.8923481517884418, "grad_norm": 0.5018886923789978, "learning_rate": 9.02323386283081e-07, "loss": 0.0478, "step": 50388 }, { "epoch": 0.8923658613254702, "grad_norm": 0.4774263799190521, "learning_rate": 9.020295081533564e-07, "loss": 0.0707, "step": 50389 }, { "epoch": 0.8923835708624988, "grad_norm": 0.34724247455596924, "learning_rate": 9.017356764045293e-07, "loss": 0.0271, "step": 50390 }, { "epoch": 0.8924012803995272, "grad_norm": 0.4151620864868164, "learning_rate": 9.014418910375638e-07, "loss": 0.0424, "step": 50391 }, { "epoch": 0.8924189899365556, "grad_norm": 0.2791239023208618, "learning_rate": 9.01148152053431e-07, "loss": 0.0413, "step": 50392 }, { "epoch": 0.8924366994735841, "grad_norm": 0.4117841422557831, "learning_rate": 9.008544594530932e-07, "loss": 0.0308, "step": 50393 }, { "epoch": 0.8924544090106125, "grad_norm": 0.7959025502204895, "learning_rate": 9.005608132375149e-07, "loss": 0.0886, "step": 50394 }, { "epoch": 0.8924721185476409, "grad_norm": 0.6855672001838684, "learning_rate": 9.002672134076683e-07, "loss": 0.0542, "step": 50395 }, { "epoch": 0.8924898280846693, "grad_norm": 0.37067025899887085, "learning_rate": 8.999736599645197e-07, "loss": 0.0379, "step": 50396 }, { "epoch": 0.8925075376216978, "grad_norm": 0.9291982650756836, "learning_rate": 8.996801529090282e-07, "loss": 0.0675, "step": 50397 }, { "epoch": 0.8925252471587262, "grad_norm": 0.3265053629875183, "learning_rate": 8.993866922421628e-07, "loss": 0.045, "step": 50398 }, { "epoch": 0.8925429566957546, "grad_norm": 0.5427234172821045, "learning_rate": 8.990932779648931e-07, "loss": 0.0326, "step": 50399 }, { "epoch": 0.892560666232783, "grad_norm": 0.5353327393531799, "learning_rate": 8.987999100781763e-07, "loss": 0.0476, "step": 50400 }, { "epoch": 0.8925783757698115, "grad_norm": 0.4811578691005707, "learning_rate": 8.985065885829819e-07, "loss": 0.0588, "step": 50401 }, { "epoch": 0.8925960853068399, "grad_norm": 0.6926018595695496, "learning_rate": 8.982133134802739e-07, "loss": 0.0726, "step": 50402 }, { "epoch": 0.8926137948438683, "grad_norm": 0.5885664820671082, "learning_rate": 8.979200847710184e-07, "loss": 0.0395, "step": 50403 }, { "epoch": 0.8926315043808967, "grad_norm": 0.5576995611190796, "learning_rate": 8.976269024561795e-07, "loss": 0.0622, "step": 50404 }, { "epoch": 0.8926492139179252, "grad_norm": 0.6016415953636169, "learning_rate": 8.973337665367215e-07, "loss": 0.0522, "step": 50405 }, { "epoch": 0.8926669234549536, "grad_norm": 0.6861021518707275, "learning_rate": 8.970406770136085e-07, "loss": 0.0685, "step": 50406 }, { "epoch": 0.892684632991982, "grad_norm": 0.7529134154319763, "learning_rate": 8.967476338878083e-07, "loss": 0.0529, "step": 50407 }, { "epoch": 0.8927023425290105, "grad_norm": 0.5925948023796082, "learning_rate": 8.9645463716028e-07, "loss": 0.0443, "step": 50408 }, { "epoch": 0.8927200520660389, "grad_norm": 0.6530311107635498, "learning_rate": 8.961616868319894e-07, "loss": 0.0668, "step": 50409 }, { "epoch": 0.8927377616030673, "grad_norm": 0.8843921422958374, "learning_rate": 8.958687829039008e-07, "loss": 0.0603, "step": 50410 }, { "epoch": 0.8927554711400957, "grad_norm": 0.5907266139984131, "learning_rate": 8.955759253769768e-07, "loss": 0.0493, "step": 50411 }, { "epoch": 0.8927731806771242, "grad_norm": 0.38087883591651917, "learning_rate": 8.952831142521817e-07, "loss": 0.0465, "step": 50412 }, { "epoch": 0.8927908902141526, "grad_norm": 0.5535892248153687, "learning_rate": 8.949903495304778e-07, "loss": 0.0413, "step": 50413 }, { "epoch": 0.892808599751181, "grad_norm": 0.533571183681488, "learning_rate": 8.946976312128297e-07, "loss": 0.0489, "step": 50414 }, { "epoch": 0.8928263092882094, "grad_norm": 0.4811505377292633, "learning_rate": 8.94404959300198e-07, "loss": 0.0772, "step": 50415 }, { "epoch": 0.8928440188252379, "grad_norm": 0.7334222793579102, "learning_rate": 8.941123337935486e-07, "loss": 0.0583, "step": 50416 }, { "epoch": 0.8928617283622663, "grad_norm": 0.644288957118988, "learning_rate": 8.938197546938459e-07, "loss": 0.0393, "step": 50417 }, { "epoch": 0.8928794378992947, "grad_norm": 0.984382152557373, "learning_rate": 8.935272220020458e-07, "loss": 0.0581, "step": 50418 }, { "epoch": 0.8928971474363231, "grad_norm": 0.9652077555656433, "learning_rate": 8.932347357191156e-07, "loss": 0.0674, "step": 50419 }, { "epoch": 0.8929148569733516, "grad_norm": 0.5316885709762573, "learning_rate": 8.929422958460149e-07, "loss": 0.0488, "step": 50420 }, { "epoch": 0.89293256651038, "grad_norm": 0.3417159914970398, "learning_rate": 8.926499023837109e-07, "loss": 0.0523, "step": 50421 }, { "epoch": 0.8929502760474084, "grad_norm": 0.6229439973831177, "learning_rate": 8.923575553331581e-07, "loss": 0.0586, "step": 50422 }, { "epoch": 0.8929679855844369, "grad_norm": 0.5077345967292786, "learning_rate": 8.920652546953206e-07, "loss": 0.0526, "step": 50423 }, { "epoch": 0.8929856951214653, "grad_norm": 0.7268725633621216, "learning_rate": 8.917730004711661e-07, "loss": 0.0584, "step": 50424 }, { "epoch": 0.8930034046584937, "grad_norm": 0.9562714099884033, "learning_rate": 8.914807926616486e-07, "loss": 0.0626, "step": 50425 }, { "epoch": 0.8930211141955221, "grad_norm": 0.6023246049880981, "learning_rate": 8.911886312677325e-07, "loss": 0.0348, "step": 50426 }, { "epoch": 0.8930388237325506, "grad_norm": 0.40375784039497375, "learning_rate": 8.908965162903788e-07, "loss": 0.0495, "step": 50427 }, { "epoch": 0.893056533269579, "grad_norm": 0.5203551054000854, "learning_rate": 8.906044477305513e-07, "loss": 0.0305, "step": 50428 }, { "epoch": 0.8930742428066074, "grad_norm": 0.6837855577468872, "learning_rate": 8.903124255892064e-07, "loss": 0.0487, "step": 50429 }, { "epoch": 0.8930919523436358, "grad_norm": 0.8640766739845276, "learning_rate": 8.900204498673048e-07, "loss": 0.0701, "step": 50430 }, { "epoch": 0.8931096618806643, "grad_norm": 0.6146493554115295, "learning_rate": 8.89728520565809e-07, "loss": 0.0509, "step": 50431 }, { "epoch": 0.8931273714176927, "grad_norm": 0.4878626763820648, "learning_rate": 8.8943663768568e-07, "loss": 0.0709, "step": 50432 }, { "epoch": 0.8931450809547211, "grad_norm": 0.21842952072620392, "learning_rate": 8.891448012278769e-07, "loss": 0.0579, "step": 50433 }, { "epoch": 0.8931627904917495, "grad_norm": 0.694210410118103, "learning_rate": 8.888530111933607e-07, "loss": 0.0283, "step": 50434 }, { "epoch": 0.893180500028778, "grad_norm": 0.4818050265312195, "learning_rate": 8.885612675830923e-07, "loss": 0.0694, "step": 50435 }, { "epoch": 0.8931982095658064, "grad_norm": 0.9110371470451355, "learning_rate": 8.882695703980292e-07, "loss": 0.062, "step": 50436 }, { "epoch": 0.8932159191028348, "grad_norm": 0.26037856936454773, "learning_rate": 8.879779196391308e-07, "loss": 0.0398, "step": 50437 }, { "epoch": 0.8932336286398633, "grad_norm": 0.13778653740882874, "learning_rate": 8.876863153073578e-07, "loss": 0.0274, "step": 50438 }, { "epoch": 0.8932513381768917, "grad_norm": 0.42497679591178894, "learning_rate": 8.873947574036711e-07, "loss": 0.0412, "step": 50439 }, { "epoch": 0.8932690477139201, "grad_norm": 0.3636299669742584, "learning_rate": 8.871032459290268e-07, "loss": 0.0716, "step": 50440 }, { "epoch": 0.8932867572509485, "grad_norm": 0.800216794013977, "learning_rate": 8.868117808843856e-07, "loss": 0.0591, "step": 50441 }, { "epoch": 0.893304466787977, "grad_norm": 0.4149227440357208, "learning_rate": 8.865203622707052e-07, "loss": 0.0314, "step": 50442 }, { "epoch": 0.8933221763250054, "grad_norm": 0.7082532644271851, "learning_rate": 8.862289900889464e-07, "loss": 0.053, "step": 50443 }, { "epoch": 0.8933398858620338, "grad_norm": 0.6237439513206482, "learning_rate": 8.859376643400652e-07, "loss": 0.0258, "step": 50444 }, { "epoch": 0.8933575953990622, "grad_norm": 0.8470540046691895, "learning_rate": 8.856463850250224e-07, "loss": 0.0766, "step": 50445 }, { "epoch": 0.8933753049360907, "grad_norm": 0.9294259548187256, "learning_rate": 8.853551521447789e-07, "loss": 0.0493, "step": 50446 }, { "epoch": 0.8933930144731191, "grad_norm": 0.614600419998169, "learning_rate": 8.850639657002857e-07, "loss": 0.0602, "step": 50447 }, { "epoch": 0.8934107240101475, "grad_norm": 0.4290822446346283, "learning_rate": 8.847728256925053e-07, "loss": 0.0563, "step": 50448 }, { "epoch": 0.8934284335471759, "grad_norm": 0.3386516571044922, "learning_rate": 8.844817321223953e-07, "loss": 0.0487, "step": 50449 }, { "epoch": 0.8934461430842044, "grad_norm": 1.0374665260314941, "learning_rate": 8.841906849909115e-07, "loss": 0.0778, "step": 50450 }, { "epoch": 0.8934638526212328, "grad_norm": 0.6121361255645752, "learning_rate": 8.838996842990133e-07, "loss": 0.0537, "step": 50451 }, { "epoch": 0.8934815621582612, "grad_norm": 0.43494898080825806, "learning_rate": 8.836087300476531e-07, "loss": 0.0592, "step": 50452 }, { "epoch": 0.8934992716952898, "grad_norm": 0.3272676467895508, "learning_rate": 8.833178222377985e-07, "loss": 0.0541, "step": 50453 }, { "epoch": 0.8935169812323182, "grad_norm": 0.6636723875999451, "learning_rate": 8.830269608703972e-07, "loss": 0.051, "step": 50454 }, { "epoch": 0.8935346907693466, "grad_norm": 0.6186853647232056, "learning_rate": 8.827361459464101e-07, "loss": 0.0659, "step": 50455 }, { "epoch": 0.893552400306375, "grad_norm": 0.6598557233810425, "learning_rate": 8.824453774667962e-07, "loss": 0.0655, "step": 50456 }, { "epoch": 0.8935701098434035, "grad_norm": 0.5260990858078003, "learning_rate": 8.821546554325066e-07, "loss": 0.0267, "step": 50457 }, { "epoch": 0.8935878193804319, "grad_norm": 0.38380932807922363, "learning_rate": 8.818639798444988e-07, "loss": 0.0661, "step": 50458 }, { "epoch": 0.8936055289174603, "grad_norm": 0.4406753480434418, "learning_rate": 8.815733507037322e-07, "loss": 0.0693, "step": 50459 }, { "epoch": 0.8936232384544887, "grad_norm": 0.440824419260025, "learning_rate": 8.81282768011159e-07, "loss": 0.0722, "step": 50460 }, { "epoch": 0.8936409479915172, "grad_norm": 0.3889278769493103, "learning_rate": 8.809922317677405e-07, "loss": 0.0554, "step": 50461 }, { "epoch": 0.8936586575285456, "grad_norm": 0.5447256565093994, "learning_rate": 8.807017419744273e-07, "loss": 0.0617, "step": 50462 }, { "epoch": 0.893676367065574, "grad_norm": 0.7230538129806519, "learning_rate": 8.804112986321805e-07, "loss": 0.0532, "step": 50463 }, { "epoch": 0.8936940766026025, "grad_norm": 0.5821992754936218, "learning_rate": 8.801209017419493e-07, "loss": 0.0765, "step": 50464 }, { "epoch": 0.8937117861396309, "grad_norm": 0.6966233849525452, "learning_rate": 8.798305513046928e-07, "loss": 0.0626, "step": 50465 }, { "epoch": 0.8937294956766593, "grad_norm": 0.43976110219955444, "learning_rate": 8.795402473213654e-07, "loss": 0.0591, "step": 50466 }, { "epoch": 0.8937472052136877, "grad_norm": 0.7541826963424683, "learning_rate": 8.792499897929246e-07, "loss": 0.0548, "step": 50467 }, { "epoch": 0.8937649147507162, "grad_norm": 0.6709944009780884, "learning_rate": 8.789597787203213e-07, "loss": 0.0492, "step": 50468 }, { "epoch": 0.8937826242877446, "grad_norm": 0.4318772554397583, "learning_rate": 8.786696141045114e-07, "loss": 0.0512, "step": 50469 }, { "epoch": 0.893800333824773, "grad_norm": 0.4289379417896271, "learning_rate": 8.783794959464509e-07, "loss": 0.0268, "step": 50470 }, { "epoch": 0.8938180433618014, "grad_norm": 0.5604017376899719, "learning_rate": 8.780894242470938e-07, "loss": 0.0486, "step": 50471 }, { "epoch": 0.8938357528988299, "grad_norm": 0.6679508686065674, "learning_rate": 8.777993990073929e-07, "loss": 0.0506, "step": 50472 }, { "epoch": 0.8938534624358583, "grad_norm": 0.6716682314872742, "learning_rate": 8.77509420228304e-07, "loss": 0.053, "step": 50473 }, { "epoch": 0.8938711719728867, "grad_norm": 0.6397109627723694, "learning_rate": 8.772194879107848e-07, "loss": 0.0409, "step": 50474 }, { "epoch": 0.8938888815099151, "grad_norm": 0.6692625880241394, "learning_rate": 8.76929602055781e-07, "loss": 0.0547, "step": 50475 }, { "epoch": 0.8939065910469436, "grad_norm": 0.7809050679206848, "learning_rate": 8.766397626642503e-07, "loss": 0.0551, "step": 50476 }, { "epoch": 0.893924300583972, "grad_norm": 0.770003080368042, "learning_rate": 8.763499697371469e-07, "loss": 0.053, "step": 50477 }, { "epoch": 0.8939420101210004, "grad_norm": 0.7009308338165283, "learning_rate": 8.76060223275425e-07, "loss": 0.0606, "step": 50478 }, { "epoch": 0.8939597196580289, "grad_norm": 0.6531361937522888, "learning_rate": 8.757705232800339e-07, "loss": 0.07, "step": 50479 }, { "epoch": 0.8939774291950573, "grad_norm": 0.6415771245956421, "learning_rate": 8.754808697519312e-07, "loss": 0.0507, "step": 50480 }, { "epoch": 0.8939951387320857, "grad_norm": 0.5614548921585083, "learning_rate": 8.751912626920661e-07, "loss": 0.0286, "step": 50481 }, { "epoch": 0.8940128482691141, "grad_norm": 0.7583789825439453, "learning_rate": 8.749017021013944e-07, "loss": 0.0788, "step": 50482 }, { "epoch": 0.8940305578061426, "grad_norm": 0.453844279050827, "learning_rate": 8.746121879808655e-07, "loss": 0.0568, "step": 50483 }, { "epoch": 0.894048267343171, "grad_norm": 0.38270512223243713, "learning_rate": 8.743227203314352e-07, "loss": 0.0584, "step": 50484 }, { "epoch": 0.8940659768801994, "grad_norm": 0.4158165454864502, "learning_rate": 8.740332991540561e-07, "loss": 0.035, "step": 50485 }, { "epoch": 0.8940836864172278, "grad_norm": 0.4977896809577942, "learning_rate": 8.737439244496776e-07, "loss": 0.0522, "step": 50486 }, { "epoch": 0.8941013959542563, "grad_norm": 0.32478442788124084, "learning_rate": 8.73454596219252e-07, "loss": 0.0329, "step": 50487 }, { "epoch": 0.8941191054912847, "grad_norm": 1.105291724205017, "learning_rate": 8.731653144637336e-07, "loss": 0.0533, "step": 50488 }, { "epoch": 0.8941368150283131, "grad_norm": 0.5801238417625427, "learning_rate": 8.728760791840684e-07, "loss": 0.065, "step": 50489 }, { "epoch": 0.8941545245653415, "grad_norm": 0.37218859791755676, "learning_rate": 8.725868903812156e-07, "loss": 0.0288, "step": 50490 }, { "epoch": 0.89417223410237, "grad_norm": 0.6349887847900391, "learning_rate": 8.722977480561212e-07, "loss": 0.0711, "step": 50491 }, { "epoch": 0.8941899436393984, "grad_norm": 0.36312535405158997, "learning_rate": 8.720086522097409e-07, "loss": 0.035, "step": 50492 }, { "epoch": 0.8942076531764268, "grad_norm": 0.7057237029075623, "learning_rate": 8.717196028430208e-07, "loss": 0.0497, "step": 50493 }, { "epoch": 0.8942253627134553, "grad_norm": 0.35486435890197754, "learning_rate": 8.71430599956915e-07, "loss": 0.0629, "step": 50494 }, { "epoch": 0.8942430722504837, "grad_norm": 0.5904272198677063, "learning_rate": 8.711416435523761e-07, "loss": 0.0618, "step": 50495 }, { "epoch": 0.8942607817875121, "grad_norm": 0.8922981023788452, "learning_rate": 8.708527336303484e-07, "loss": 0.0825, "step": 50496 }, { "epoch": 0.8942784913245405, "grad_norm": 0.2531593143939972, "learning_rate": 8.705638701917878e-07, "loss": 0.0244, "step": 50497 }, { "epoch": 0.894296200861569, "grad_norm": 0.3096296191215515, "learning_rate": 8.702750532376419e-07, "loss": 0.0573, "step": 50498 }, { "epoch": 0.8943139103985974, "grad_norm": 0.7987361550331116, "learning_rate": 8.699862827688615e-07, "loss": 0.049, "step": 50499 }, { "epoch": 0.8943316199356258, "grad_norm": 0.3676549792289734, "learning_rate": 8.696975587863976e-07, "loss": 0.0575, "step": 50500 }, { "epoch": 0.8943493294726542, "grad_norm": 0.4726669192314148, "learning_rate": 8.694088812911994e-07, "loss": 0.0603, "step": 50501 }, { "epoch": 0.8943670390096827, "grad_norm": 0.7483465075492859, "learning_rate": 8.691202502842194e-07, "loss": 0.0549, "step": 50502 }, { "epoch": 0.8943847485467111, "grad_norm": 0.7500127553939819, "learning_rate": 8.688316657664019e-07, "loss": 0.0582, "step": 50503 }, { "epoch": 0.8944024580837395, "grad_norm": 0.23230388760566711, "learning_rate": 8.685431277386996e-07, "loss": 0.0488, "step": 50504 }, { "epoch": 0.8944201676207679, "grad_norm": 0.5967385768890381, "learning_rate": 8.682546362020599e-07, "loss": 0.0458, "step": 50505 }, { "epoch": 0.8944378771577964, "grad_norm": 0.45989957451820374, "learning_rate": 8.679661911574371e-07, "loss": 0.0444, "step": 50506 }, { "epoch": 0.8944555866948248, "grad_norm": 0.42343729734420776, "learning_rate": 8.676777926057722e-07, "loss": 0.0469, "step": 50507 }, { "epoch": 0.8944732962318532, "grad_norm": 0.5767027735710144, "learning_rate": 8.673894405480193e-07, "loss": 0.0538, "step": 50508 }, { "epoch": 0.8944910057688817, "grad_norm": 0.6108847260475159, "learning_rate": 8.671011349851243e-07, "loss": 0.049, "step": 50509 }, { "epoch": 0.8945087153059101, "grad_norm": 0.8533597588539124, "learning_rate": 8.668128759180383e-07, "loss": 0.0742, "step": 50510 }, { "epoch": 0.8945264248429385, "grad_norm": 0.10957948118448257, "learning_rate": 8.665246633477086e-07, "loss": 0.0574, "step": 50511 }, { "epoch": 0.8945441343799669, "grad_norm": 0.6240586638450623, "learning_rate": 8.662364972750831e-07, "loss": 0.0811, "step": 50512 }, { "epoch": 0.8945618439169954, "grad_norm": 0.6578812003135681, "learning_rate": 8.659483777011106e-07, "loss": 0.0584, "step": 50513 }, { "epoch": 0.8945795534540238, "grad_norm": 0.37809017300605774, "learning_rate": 8.656603046267375e-07, "loss": 0.0366, "step": 50514 }, { "epoch": 0.8945972629910522, "grad_norm": 0.4417828917503357, "learning_rate": 8.653722780529127e-07, "loss": 0.0378, "step": 50515 }, { "epoch": 0.8946149725280806, "grad_norm": 0.7837865948677063, "learning_rate": 8.650842979805823e-07, "loss": 0.0566, "step": 50516 }, { "epoch": 0.8946326820651092, "grad_norm": 0.4246079623699188, "learning_rate": 8.647963644106988e-07, "loss": 0.0421, "step": 50517 }, { "epoch": 0.8946503916021376, "grad_norm": 0.48824670910835266, "learning_rate": 8.645084773441997e-07, "loss": 0.0205, "step": 50518 }, { "epoch": 0.894668101139166, "grad_norm": 0.19149546325206757, "learning_rate": 8.64220636782041e-07, "loss": 0.0596, "step": 50519 }, { "epoch": 0.8946858106761943, "grad_norm": 0.6500490307807922, "learning_rate": 8.639328427251686e-07, "loss": 0.0665, "step": 50520 }, { "epoch": 0.8947035202132229, "grad_norm": 0.38443049788475037, "learning_rate": 8.636450951745251e-07, "loss": 0.0294, "step": 50521 }, { "epoch": 0.8947212297502513, "grad_norm": 0.35096827149391174, "learning_rate": 8.633573941310596e-07, "loss": 0.0648, "step": 50522 }, { "epoch": 0.8947389392872797, "grad_norm": 0.13722363114356995, "learning_rate": 8.630697395957182e-07, "loss": 0.0602, "step": 50523 }, { "epoch": 0.8947566488243082, "grad_norm": 0.6449427604675293, "learning_rate": 8.627821315694484e-07, "loss": 0.0369, "step": 50524 }, { "epoch": 0.8947743583613366, "grad_norm": 0.709449827671051, "learning_rate": 8.624945700531961e-07, "loss": 0.0656, "step": 50525 }, { "epoch": 0.894792067898365, "grad_norm": 0.559857189655304, "learning_rate": 8.622070550479039e-07, "loss": 0.038, "step": 50526 }, { "epoch": 0.8948097774353934, "grad_norm": 0.7995956540107727, "learning_rate": 8.619195865545227e-07, "loss": 0.0485, "step": 50527 }, { "epoch": 0.8948274869724219, "grad_norm": 0.4964456856250763, "learning_rate": 8.61632164573995e-07, "loss": 0.0877, "step": 50528 }, { "epoch": 0.8948451965094503, "grad_norm": 0.6325089931488037, "learning_rate": 8.613447891072684e-07, "loss": 0.0435, "step": 50529 }, { "epoch": 0.8948629060464787, "grad_norm": 0.47620806097984314, "learning_rate": 8.610574601552857e-07, "loss": 0.057, "step": 50530 }, { "epoch": 0.8948806155835071, "grad_norm": 0.6284481287002563, "learning_rate": 8.607701777189975e-07, "loss": 0.051, "step": 50531 }, { "epoch": 0.8948983251205356, "grad_norm": 0.7786052227020264, "learning_rate": 8.604829417993432e-07, "loss": 0.075, "step": 50532 }, { "epoch": 0.894916034657564, "grad_norm": 0.3532840609550476, "learning_rate": 8.601957523972687e-07, "loss": 0.0206, "step": 50533 }, { "epoch": 0.8949337441945924, "grad_norm": 0.6765924692153931, "learning_rate": 8.599086095137232e-07, "loss": 0.0697, "step": 50534 }, { "epoch": 0.8949514537316208, "grad_norm": 0.7453917264938354, "learning_rate": 8.596215131496459e-07, "loss": 0.0463, "step": 50535 }, { "epoch": 0.8949691632686493, "grad_norm": 0.5491347312927246, "learning_rate": 8.59334463305983e-07, "loss": 0.0672, "step": 50536 }, { "epoch": 0.8949868728056777, "grad_norm": 0.4514514207839966, "learning_rate": 8.590474599836801e-07, "loss": 0.0342, "step": 50537 }, { "epoch": 0.8950045823427061, "grad_norm": 0.605170488357544, "learning_rate": 8.587605031836814e-07, "loss": 0.0542, "step": 50538 }, { "epoch": 0.8950222918797346, "grad_norm": 0.5155246257781982, "learning_rate": 8.584735929069299e-07, "loss": 0.0261, "step": 50539 }, { "epoch": 0.895040001416763, "grad_norm": 0.5086930990219116, "learning_rate": 8.581867291543693e-07, "loss": 0.0692, "step": 50540 }, { "epoch": 0.8950577109537914, "grad_norm": 0.7084296941757202, "learning_rate": 8.578999119269443e-07, "loss": 0.0541, "step": 50541 }, { "epoch": 0.8950754204908198, "grad_norm": 0.39690232276916504, "learning_rate": 8.576131412256005e-07, "loss": 0.0817, "step": 50542 }, { "epoch": 0.8950931300278483, "grad_norm": 0.5184534788131714, "learning_rate": 8.573264170512773e-07, "loss": 0.0502, "step": 50543 }, { "epoch": 0.8951108395648767, "grad_norm": 0.6941196918487549, "learning_rate": 8.57039739404919e-07, "loss": 0.0678, "step": 50544 }, { "epoch": 0.8951285491019051, "grad_norm": 0.6848750710487366, "learning_rate": 8.567531082874714e-07, "loss": 0.079, "step": 50545 }, { "epoch": 0.8951462586389335, "grad_norm": 0.5795538425445557, "learning_rate": 8.564665236998737e-07, "loss": 0.052, "step": 50546 }, { "epoch": 0.895163968175962, "grad_norm": 0.6959854364395142, "learning_rate": 8.561799856430686e-07, "loss": 0.0627, "step": 50547 }, { "epoch": 0.8951816777129904, "grad_norm": 0.4457816183567047, "learning_rate": 8.558934941180036e-07, "loss": 0.0513, "step": 50548 }, { "epoch": 0.8951993872500188, "grad_norm": 0.5322648882865906, "learning_rate": 8.556070491256196e-07, "loss": 0.0574, "step": 50549 }, { "epoch": 0.8952170967870472, "grad_norm": 0.7931607961654663, "learning_rate": 8.55320650666856e-07, "loss": 0.0729, "step": 50550 }, { "epoch": 0.8952348063240757, "grad_norm": 0.8104336857795715, "learning_rate": 8.550342987426569e-07, "loss": 0.0796, "step": 50551 }, { "epoch": 0.8952525158611041, "grad_norm": 0.5305395722389221, "learning_rate": 8.547479933539665e-07, "loss": 0.0605, "step": 50552 }, { "epoch": 0.8952702253981325, "grad_norm": 0.7383074164390564, "learning_rate": 8.544617345017225e-07, "loss": 0.0583, "step": 50553 }, { "epoch": 0.895287934935161, "grad_norm": 0.5590156316757202, "learning_rate": 8.541755221868675e-07, "loss": 0.0541, "step": 50554 }, { "epoch": 0.8953056444721894, "grad_norm": 0.42041015625, "learning_rate": 8.538893564103456e-07, "loss": 0.0399, "step": 50555 }, { "epoch": 0.8953233540092178, "grad_norm": 0.4590822458267212, "learning_rate": 8.536032371730962e-07, "loss": 0.0526, "step": 50556 }, { "epoch": 0.8953410635462462, "grad_norm": 0.6169940233230591, "learning_rate": 8.533171644760618e-07, "loss": 0.0558, "step": 50557 }, { "epoch": 0.8953587730832747, "grad_norm": 0.6994214057922363, "learning_rate": 8.530311383201816e-07, "loss": 0.0471, "step": 50558 }, { "epoch": 0.8953764826203031, "grad_norm": 0.5590613484382629, "learning_rate": 8.527451587064017e-07, "loss": 0.0712, "step": 50559 }, { "epoch": 0.8953941921573315, "grad_norm": 0.6054847836494446, "learning_rate": 8.524592256356578e-07, "loss": 0.0505, "step": 50560 }, { "epoch": 0.8954119016943599, "grad_norm": 0.5777310729026794, "learning_rate": 8.52173339108891e-07, "loss": 0.0633, "step": 50561 }, { "epoch": 0.8954296112313884, "grad_norm": 0.7085633873939514, "learning_rate": 8.518874991270436e-07, "loss": 0.0511, "step": 50562 }, { "epoch": 0.8954473207684168, "grad_norm": 0.4540424346923828, "learning_rate": 8.516017056910569e-07, "loss": 0.0362, "step": 50563 }, { "epoch": 0.8954650303054452, "grad_norm": 0.627677321434021, "learning_rate": 8.513159588018665e-07, "loss": 0.0538, "step": 50564 }, { "epoch": 0.8954827398424736, "grad_norm": 0.5442948937416077, "learning_rate": 8.510302584604168e-07, "loss": 0.0575, "step": 50565 }, { "epoch": 0.8955004493795021, "grad_norm": 1.083161473274231, "learning_rate": 8.50744604667647e-07, "loss": 0.0596, "step": 50566 }, { "epoch": 0.8955181589165305, "grad_norm": 0.6591689586639404, "learning_rate": 8.504589974244948e-07, "loss": 0.0485, "step": 50567 }, { "epoch": 0.8955358684535589, "grad_norm": 0.8289482593536377, "learning_rate": 8.501734367319026e-07, "loss": 0.0432, "step": 50568 }, { "epoch": 0.8955535779905874, "grad_norm": 0.6576510667800903, "learning_rate": 8.498879225908079e-07, "loss": 0.0543, "step": 50569 }, { "epoch": 0.8955712875276158, "grad_norm": 0.2855701446533203, "learning_rate": 8.496024550021536e-07, "loss": 0.0733, "step": 50570 }, { "epoch": 0.8955889970646442, "grad_norm": 0.40903833508491516, "learning_rate": 8.493170339668737e-07, "loss": 0.0496, "step": 50571 }, { "epoch": 0.8956067066016726, "grad_norm": 0.2744620740413666, "learning_rate": 8.490316594859094e-07, "loss": 0.0323, "step": 50572 }, { "epoch": 0.8956244161387011, "grad_norm": 0.5364493131637573, "learning_rate": 8.487463315602012e-07, "loss": 0.0504, "step": 50573 }, { "epoch": 0.8956421256757295, "grad_norm": 0.4257095456123352, "learning_rate": 8.484610501906853e-07, "loss": 0.0519, "step": 50574 }, { "epoch": 0.8956598352127579, "grad_norm": 0.4896833896636963, "learning_rate": 8.481758153782992e-07, "loss": 0.0345, "step": 50575 }, { "epoch": 0.8956775447497863, "grad_norm": 0.6385256052017212, "learning_rate": 8.478906271239823e-07, "loss": 0.0441, "step": 50576 }, { "epoch": 0.8956952542868148, "grad_norm": 0.36358869075775146, "learning_rate": 8.476054854286785e-07, "loss": 0.0392, "step": 50577 }, { "epoch": 0.8957129638238432, "grad_norm": 0.5940082669258118, "learning_rate": 8.473203902933191e-07, "loss": 0.0519, "step": 50578 }, { "epoch": 0.8957306733608716, "grad_norm": 1.009035587310791, "learning_rate": 8.470353417188448e-07, "loss": 0.0845, "step": 50579 }, { "epoch": 0.8957483828979, "grad_norm": 0.9176327586174011, "learning_rate": 8.467503397061915e-07, "loss": 0.0593, "step": 50580 }, { "epoch": 0.8957660924349286, "grad_norm": 0.3119700849056244, "learning_rate": 8.464653842563003e-07, "loss": 0.0407, "step": 50581 }, { "epoch": 0.895783801971957, "grad_norm": 0.7706219553947449, "learning_rate": 8.461804753701036e-07, "loss": 0.043, "step": 50582 }, { "epoch": 0.8958015115089853, "grad_norm": 0.43578198552131653, "learning_rate": 8.45895613048544e-07, "loss": 0.0523, "step": 50583 }, { "epoch": 0.8958192210460139, "grad_norm": 0.8051226735115051, "learning_rate": 8.456107972925542e-07, "loss": 0.0648, "step": 50584 }, { "epoch": 0.8958369305830423, "grad_norm": 0.9064478278160095, "learning_rate": 8.453260281030734e-07, "loss": 0.0479, "step": 50585 }, { "epoch": 0.8958546401200707, "grad_norm": 0.748410701751709, "learning_rate": 8.450413054810391e-07, "loss": 0.056, "step": 50586 }, { "epoch": 0.895872349657099, "grad_norm": 0.5302443504333496, "learning_rate": 8.447566294273857e-07, "loss": 0.0661, "step": 50587 }, { "epoch": 0.8958900591941276, "grad_norm": 0.4634590446949005, "learning_rate": 8.444719999430556e-07, "loss": 0.0374, "step": 50588 }, { "epoch": 0.895907768731156, "grad_norm": 0.4581626355648041, "learning_rate": 8.441874170289765e-07, "loss": 0.044, "step": 50589 }, { "epoch": 0.8959254782681844, "grad_norm": 0.6536760330200195, "learning_rate": 8.43902880686091e-07, "loss": 0.0483, "step": 50590 }, { "epoch": 0.8959431878052128, "grad_norm": 0.19690896570682526, "learning_rate": 8.43618390915335e-07, "loss": 0.0529, "step": 50591 }, { "epoch": 0.8959608973422413, "grad_norm": 0.21070067584514618, "learning_rate": 8.433339477176394e-07, "loss": 0.0351, "step": 50592 }, { "epoch": 0.8959786068792697, "grad_norm": 0.6422852873802185, "learning_rate": 8.430495510939435e-07, "loss": 0.0512, "step": 50593 }, { "epoch": 0.8959963164162981, "grad_norm": 0.9845614433288574, "learning_rate": 8.42765201045183e-07, "loss": 0.0699, "step": 50594 }, { "epoch": 0.8960140259533265, "grad_norm": 0.6129958629608154, "learning_rate": 8.424808975722941e-07, "loss": 0.0473, "step": 50595 }, { "epoch": 0.896031735490355, "grad_norm": 0.47042718529701233, "learning_rate": 8.421966406762094e-07, "loss": 0.0381, "step": 50596 }, { "epoch": 0.8960494450273834, "grad_norm": 0.39014363288879395, "learning_rate": 8.419124303578663e-07, "loss": 0.045, "step": 50597 }, { "epoch": 0.8960671545644118, "grad_norm": 0.22816739976406097, "learning_rate": 8.416282666182024e-07, "loss": 0.0235, "step": 50598 }, { "epoch": 0.8960848641014403, "grad_norm": 0.8059601783752441, "learning_rate": 8.413441494581469e-07, "loss": 0.0941, "step": 50599 }, { "epoch": 0.8961025736384687, "grad_norm": 0.531101644039154, "learning_rate": 8.41060078878636e-07, "loss": 0.0563, "step": 50600 }, { "epoch": 0.8961202831754971, "grad_norm": 0.4629860818386078, "learning_rate": 8.40776054880607e-07, "loss": 0.048, "step": 50601 }, { "epoch": 0.8961379927125255, "grad_norm": 0.7472347617149353, "learning_rate": 8.404920774649927e-07, "loss": 0.058, "step": 50602 }, { "epoch": 0.896155702249554, "grad_norm": 0.5698291063308716, "learning_rate": 8.402081466327272e-07, "loss": 0.0641, "step": 50603 }, { "epoch": 0.8961734117865824, "grad_norm": 0.7706395387649536, "learning_rate": 8.39924262384743e-07, "loss": 0.0702, "step": 50604 }, { "epoch": 0.8961911213236108, "grad_norm": 0.7119119763374329, "learning_rate": 8.39640424721978e-07, "loss": 0.0449, "step": 50605 }, { "epoch": 0.8962088308606392, "grad_norm": 0.7519643306732178, "learning_rate": 8.393566336453612e-07, "loss": 0.0539, "step": 50606 }, { "epoch": 0.8962265403976677, "grad_norm": 0.6985929608345032, "learning_rate": 8.390728891558303e-07, "loss": 0.0401, "step": 50607 }, { "epoch": 0.8962442499346961, "grad_norm": 0.9268380403518677, "learning_rate": 8.387891912543177e-07, "loss": 0.0571, "step": 50608 }, { "epoch": 0.8962619594717245, "grad_norm": 0.617039680480957, "learning_rate": 8.385055399417579e-07, "loss": 0.0688, "step": 50609 }, { "epoch": 0.8962796690087529, "grad_norm": 0.9189647436141968, "learning_rate": 8.382219352190801e-07, "loss": 0.0486, "step": 50610 }, { "epoch": 0.8962973785457814, "grad_norm": 0.8352312445640564, "learning_rate": 8.379383770872201e-07, "loss": 0.1254, "step": 50611 }, { "epoch": 0.8963150880828098, "grad_norm": 0.541670024394989, "learning_rate": 8.376548655471106e-07, "loss": 0.0318, "step": 50612 }, { "epoch": 0.8963327976198382, "grad_norm": 0.47811341285705566, "learning_rate": 8.373714005996841e-07, "loss": 0.0312, "step": 50613 }, { "epoch": 0.8963505071568667, "grad_norm": 0.4756399393081665, "learning_rate": 8.370879822458716e-07, "loss": 0.0481, "step": 50614 }, { "epoch": 0.8963682166938951, "grad_norm": 0.368796169757843, "learning_rate": 8.36804610486609e-07, "loss": 0.0467, "step": 50615 }, { "epoch": 0.8963859262309235, "grad_norm": 0.5641418695449829, "learning_rate": 8.365212853228288e-07, "loss": 0.0593, "step": 50616 }, { "epoch": 0.8964036357679519, "grad_norm": 0.43515199422836304, "learning_rate": 8.362380067554587e-07, "loss": 0.0562, "step": 50617 }, { "epoch": 0.8964213453049804, "grad_norm": 0.7818514108657837, "learning_rate": 8.359547747854329e-07, "loss": 0.0475, "step": 50618 }, { "epoch": 0.8964390548420088, "grad_norm": 0.49766409397125244, "learning_rate": 8.356715894136824e-07, "loss": 0.0499, "step": 50619 }, { "epoch": 0.8964567643790372, "grad_norm": 0.731821596622467, "learning_rate": 8.35388450641143e-07, "loss": 0.0662, "step": 50620 }, { "epoch": 0.8964744739160656, "grad_norm": 0.7755047082901001, "learning_rate": 8.351053584687407e-07, "loss": 0.0643, "step": 50621 }, { "epoch": 0.8964921834530941, "grad_norm": 0.6463700532913208, "learning_rate": 8.348223128974081e-07, "loss": 0.0563, "step": 50622 }, { "epoch": 0.8965098929901225, "grad_norm": 0.34613606333732605, "learning_rate": 8.345393139280777e-07, "loss": 0.0292, "step": 50623 }, { "epoch": 0.8965276025271509, "grad_norm": 0.7645872831344604, "learning_rate": 8.342563615616805e-07, "loss": 0.0491, "step": 50624 }, { "epoch": 0.8965453120641793, "grad_norm": 0.8224684596061707, "learning_rate": 8.339734557991475e-07, "loss": 0.0743, "step": 50625 }, { "epoch": 0.8965630216012078, "grad_norm": 0.6415380835533142, "learning_rate": 8.336905966414094e-07, "loss": 0.0542, "step": 50626 }, { "epoch": 0.8965807311382362, "grad_norm": 0.46648311614990234, "learning_rate": 8.334077840893972e-07, "loss": 0.0413, "step": 50627 }, { "epoch": 0.8965984406752646, "grad_norm": 0.7011194229125977, "learning_rate": 8.331250181440386e-07, "loss": 0.0577, "step": 50628 }, { "epoch": 0.8966161502122931, "grad_norm": 0.5480430126190186, "learning_rate": 8.328422988062678e-07, "loss": 0.041, "step": 50629 }, { "epoch": 0.8966338597493215, "grad_norm": 0.6487358212471008, "learning_rate": 8.325596260770124e-07, "loss": 0.0566, "step": 50630 }, { "epoch": 0.8966515692863499, "grad_norm": 0.5616053342819214, "learning_rate": 8.322769999572016e-07, "loss": 0.0657, "step": 50631 }, { "epoch": 0.8966692788233783, "grad_norm": 0.14339874684810638, "learning_rate": 8.319944204477664e-07, "loss": 0.059, "step": 50632 }, { "epoch": 0.8966869883604068, "grad_norm": 0.6592146158218384, "learning_rate": 8.317118875496377e-07, "loss": 0.0508, "step": 50633 }, { "epoch": 0.8967046978974352, "grad_norm": 0.5091755390167236, "learning_rate": 8.31429401263743e-07, "loss": 0.0619, "step": 50634 }, { "epoch": 0.8967224074344636, "grad_norm": 0.34969833493232727, "learning_rate": 8.311469615910117e-07, "loss": 0.0579, "step": 50635 }, { "epoch": 0.896740116971492, "grad_norm": 0.7408962845802307, "learning_rate": 8.308645685323746e-07, "loss": 0.0721, "step": 50636 }, { "epoch": 0.8967578265085205, "grad_norm": 0.3222680687904358, "learning_rate": 8.305822220887626e-07, "loss": 0.0493, "step": 50637 }, { "epoch": 0.8967755360455489, "grad_norm": 0.6729866862297058, "learning_rate": 8.302999222611002e-07, "loss": 0.0471, "step": 50638 }, { "epoch": 0.8967932455825773, "grad_norm": 0.3060000538825989, "learning_rate": 8.300176690503164e-07, "loss": 0.0496, "step": 50639 }, { "epoch": 0.8968109551196057, "grad_norm": 0.6567662358283997, "learning_rate": 8.297354624573422e-07, "loss": 0.0663, "step": 50640 }, { "epoch": 0.8968286646566342, "grad_norm": 0.7710612416267395, "learning_rate": 8.294533024831069e-07, "loss": 0.0638, "step": 50641 }, { "epoch": 0.8968463741936626, "grad_norm": 0.524626612663269, "learning_rate": 8.291711891285314e-07, "loss": 0.0366, "step": 50642 }, { "epoch": 0.896864083730691, "grad_norm": 0.24670982360839844, "learning_rate": 8.288891223945533e-07, "loss": 0.0327, "step": 50643 }, { "epoch": 0.8968817932677196, "grad_norm": 0.27392756938934326, "learning_rate": 8.286071022820984e-07, "loss": 0.0545, "step": 50644 }, { "epoch": 0.896899502804748, "grad_norm": 0.48452356457710266, "learning_rate": 8.283251287920912e-07, "loss": 0.0595, "step": 50645 }, { "epoch": 0.8969172123417763, "grad_norm": 0.8255129456520081, "learning_rate": 8.280432019254592e-07, "loss": 0.0847, "step": 50646 }, { "epoch": 0.8969349218788047, "grad_norm": 0.26113054156303406, "learning_rate": 8.277613216831332e-07, "loss": 0.0608, "step": 50647 }, { "epoch": 0.8969526314158333, "grad_norm": 0.5652650594711304, "learning_rate": 8.274794880660408e-07, "loss": 0.0633, "step": 50648 }, { "epoch": 0.8969703409528617, "grad_norm": 0.6544360518455505, "learning_rate": 8.271977010751047e-07, "loss": 0.0726, "step": 50649 }, { "epoch": 0.89698805048989, "grad_norm": 0.48088836669921875, "learning_rate": 8.269159607112558e-07, "loss": 0.0769, "step": 50650 }, { "epoch": 0.8970057600269185, "grad_norm": 0.4326489567756653, "learning_rate": 8.266342669754184e-07, "loss": 0.0435, "step": 50651 }, { "epoch": 0.897023469563947, "grad_norm": 0.22948823869228363, "learning_rate": 8.263526198685218e-07, "loss": 0.0445, "step": 50652 }, { "epoch": 0.8970411791009754, "grad_norm": 0.4418797194957733, "learning_rate": 8.260710193914917e-07, "loss": 0.0367, "step": 50653 }, { "epoch": 0.8970588886380038, "grad_norm": 0.5942702293395996, "learning_rate": 8.257894655452542e-07, "loss": 0.0513, "step": 50654 }, { "epoch": 0.8970765981750322, "grad_norm": 0.6392182111740112, "learning_rate": 8.255079583307368e-07, "loss": 0.0418, "step": 50655 }, { "epoch": 0.8970943077120607, "grad_norm": 0.7696934342384338, "learning_rate": 8.252264977488639e-07, "loss": 0.0742, "step": 50656 }, { "epoch": 0.8971120172490891, "grad_norm": 0.7201910018920898, "learning_rate": 8.249450838005612e-07, "loss": 0.035, "step": 50657 }, { "epoch": 0.8971297267861175, "grad_norm": 1.2151756286621094, "learning_rate": 8.246637164867549e-07, "loss": 0.0992, "step": 50658 }, { "epoch": 0.897147436323146, "grad_norm": 0.48933839797973633, "learning_rate": 8.24382395808374e-07, "loss": 0.0396, "step": 50659 }, { "epoch": 0.8971651458601744, "grad_norm": 0.6882525682449341, "learning_rate": 8.241011217663396e-07, "loss": 0.048, "step": 50660 }, { "epoch": 0.8971828553972028, "grad_norm": 0.6227759122848511, "learning_rate": 8.238198943615776e-07, "loss": 0.0596, "step": 50661 }, { "epoch": 0.8972005649342312, "grad_norm": 0.4795958697795868, "learning_rate": 8.235387135950156e-07, "loss": 0.0584, "step": 50662 }, { "epoch": 0.8972182744712597, "grad_norm": 0.5857579112052917, "learning_rate": 8.232575794675761e-07, "loss": 0.0453, "step": 50663 }, { "epoch": 0.8972359840082881, "grad_norm": 0.5592337846755981, "learning_rate": 8.229764919801869e-07, "loss": 0.0323, "step": 50664 }, { "epoch": 0.8972536935453165, "grad_norm": 0.5307683944702148, "learning_rate": 8.226954511337703e-07, "loss": 0.0667, "step": 50665 }, { "epoch": 0.8972714030823449, "grad_norm": 0.6947304010391235, "learning_rate": 8.224144569292541e-07, "loss": 0.0454, "step": 50666 }, { "epoch": 0.8972891126193734, "grad_norm": 0.1527589112520218, "learning_rate": 8.221335093675575e-07, "loss": 0.0423, "step": 50667 }, { "epoch": 0.8973068221564018, "grad_norm": 0.5177746415138245, "learning_rate": 8.218526084496081e-07, "loss": 0.0432, "step": 50668 }, { "epoch": 0.8973245316934302, "grad_norm": 0.49810144305229187, "learning_rate": 8.215717541763317e-07, "loss": 0.0358, "step": 50669 }, { "epoch": 0.8973422412304586, "grad_norm": 0.7062265872955322, "learning_rate": 8.212909465486479e-07, "loss": 0.0691, "step": 50670 }, { "epoch": 0.8973599507674871, "grad_norm": 0.4329116940498352, "learning_rate": 8.210101855674807e-07, "loss": 0.0419, "step": 50671 }, { "epoch": 0.8973776603045155, "grad_norm": 0.7425131797790527, "learning_rate": 8.207294712337576e-07, "loss": 0.0546, "step": 50672 }, { "epoch": 0.8973953698415439, "grad_norm": 0.5642491579055786, "learning_rate": 8.204488035484031e-07, "loss": 0.0443, "step": 50673 }, { "epoch": 0.8974130793785724, "grad_norm": 0.5382121205329895, "learning_rate": 8.201681825123347e-07, "loss": 0.05, "step": 50674 }, { "epoch": 0.8974307889156008, "grad_norm": 0.8490504622459412, "learning_rate": 8.1988760812648e-07, "loss": 0.0364, "step": 50675 }, { "epoch": 0.8974484984526292, "grad_norm": 0.4161803424358368, "learning_rate": 8.196070803917632e-07, "loss": 0.0478, "step": 50676 }, { "epoch": 0.8974662079896576, "grad_norm": 0.6032371520996094, "learning_rate": 8.19326599309102e-07, "loss": 0.0377, "step": 50677 }, { "epoch": 0.8974839175266861, "grad_norm": 0.8299080729484558, "learning_rate": 8.190461648794223e-07, "loss": 0.0612, "step": 50678 }, { "epoch": 0.8975016270637145, "grad_norm": 0.28987711668014526, "learning_rate": 8.187657771036466e-07, "loss": 0.0374, "step": 50679 }, { "epoch": 0.8975193366007429, "grad_norm": 0.4014575779438019, "learning_rate": 8.184854359826959e-07, "loss": 0.065, "step": 50680 }, { "epoch": 0.8975370461377713, "grad_norm": 0.5552400350570679, "learning_rate": 8.182051415174946e-07, "loss": 0.039, "step": 50681 }, { "epoch": 0.8975547556747998, "grad_norm": 0.7280561327934265, "learning_rate": 8.179248937089634e-07, "loss": 0.074, "step": 50682 }, { "epoch": 0.8975724652118282, "grad_norm": 0.6811298131942749, "learning_rate": 8.176446925580266e-07, "loss": 0.055, "step": 50683 }, { "epoch": 0.8975901747488566, "grad_norm": 0.4852113723754883, "learning_rate": 8.173645380656052e-07, "loss": 0.0356, "step": 50684 }, { "epoch": 0.897607884285885, "grad_norm": 0.6866132616996765, "learning_rate": 8.170844302326169e-07, "loss": 0.056, "step": 50685 }, { "epoch": 0.8976255938229135, "grad_norm": 0.7069003582000732, "learning_rate": 8.168043690599874e-07, "loss": 0.0623, "step": 50686 }, { "epoch": 0.8976433033599419, "grad_norm": 0.8391497731208801, "learning_rate": 8.165243545486395e-07, "loss": 0.0537, "step": 50687 }, { "epoch": 0.8976610128969703, "grad_norm": 0.5958657264709473, "learning_rate": 8.16244386699489e-07, "loss": 0.0791, "step": 50688 }, { "epoch": 0.8976787224339988, "grad_norm": 0.4765409529209137, "learning_rate": 8.159644655134602e-07, "loss": 0.0332, "step": 50689 }, { "epoch": 0.8976964319710272, "grad_norm": 0.7416279315948486, "learning_rate": 8.15684590991474e-07, "loss": 0.0836, "step": 50690 }, { "epoch": 0.8977141415080556, "grad_norm": 0.5844473838806152, "learning_rate": 8.154047631344513e-07, "loss": 0.0652, "step": 50691 }, { "epoch": 0.897731851045084, "grad_norm": 0.6538001298904419, "learning_rate": 8.151249819433115e-07, "loss": 0.0414, "step": 50692 }, { "epoch": 0.8977495605821125, "grad_norm": 0.6262199878692627, "learning_rate": 8.148452474189755e-07, "loss": 0.047, "step": 50693 }, { "epoch": 0.8977672701191409, "grad_norm": 0.37211874127388, "learning_rate": 8.145655595623658e-07, "loss": 0.0356, "step": 50694 }, { "epoch": 0.8977849796561693, "grad_norm": 0.8987404108047485, "learning_rate": 8.142859183743983e-07, "loss": 0.0784, "step": 50695 }, { "epoch": 0.8978026891931977, "grad_norm": 0.5154682397842407, "learning_rate": 8.140063238559975e-07, "loss": 0.0394, "step": 50696 }, { "epoch": 0.8978203987302262, "grad_norm": 0.2550857663154602, "learning_rate": 8.137267760080791e-07, "loss": 0.0395, "step": 50697 }, { "epoch": 0.8978381082672546, "grad_norm": 1.0790281295776367, "learning_rate": 8.134472748315675e-07, "loss": 0.0642, "step": 50698 }, { "epoch": 0.897855817804283, "grad_norm": 0.6521386504173279, "learning_rate": 8.131678203273768e-07, "loss": 0.0588, "step": 50699 }, { "epoch": 0.8978735273413114, "grad_norm": 0.45752429962158203, "learning_rate": 8.128884124964281e-07, "loss": 0.0258, "step": 50700 }, { "epoch": 0.8978912368783399, "grad_norm": 0.495978981256485, "learning_rate": 8.126090513396439e-07, "loss": 0.0548, "step": 50701 }, { "epoch": 0.8979089464153683, "grad_norm": 0.19317126274108887, "learning_rate": 8.123297368579402e-07, "loss": 0.041, "step": 50702 }, { "epoch": 0.8979266559523967, "grad_norm": 0.7191279530525208, "learning_rate": 8.120504690522362e-07, "loss": 0.0581, "step": 50703 }, { "epoch": 0.8979443654894252, "grad_norm": 0.39566823840141296, "learning_rate": 8.117712479234513e-07, "loss": 0.045, "step": 50704 }, { "epoch": 0.8979620750264536, "grad_norm": 0.5002977848052979, "learning_rate": 8.114920734725062e-07, "loss": 0.0356, "step": 50705 }, { "epoch": 0.897979784563482, "grad_norm": 0.36630651354789734, "learning_rate": 8.112129457003153e-07, "loss": 0.0318, "step": 50706 }, { "epoch": 0.8979974941005104, "grad_norm": 0.3653229773044586, "learning_rate": 8.109338646077996e-07, "loss": 0.0472, "step": 50707 }, { "epoch": 0.898015203637539, "grad_norm": 0.4050200581550598, "learning_rate": 8.10654830195875e-07, "loss": 0.037, "step": 50708 }, { "epoch": 0.8980329131745673, "grad_norm": 0.7202024459838867, "learning_rate": 8.103758424654606e-07, "loss": 0.0508, "step": 50709 }, { "epoch": 0.8980506227115957, "grad_norm": 0.6559304594993591, "learning_rate": 8.100969014174741e-07, "loss": 0.0687, "step": 50710 }, { "epoch": 0.8980683322486241, "grad_norm": 0.7662085890769958, "learning_rate": 8.098180070528349e-07, "loss": 0.0671, "step": 50711 }, { "epoch": 0.8980860417856527, "grad_norm": 0.3860379755496979, "learning_rate": 8.095391593724605e-07, "loss": 0.0378, "step": 50712 }, { "epoch": 0.898103751322681, "grad_norm": 0.29299429059028625, "learning_rate": 8.092603583772651e-07, "loss": 0.0398, "step": 50713 }, { "epoch": 0.8981214608597095, "grad_norm": 0.6443781852722168, "learning_rate": 8.08981604068168e-07, "loss": 0.0497, "step": 50714 }, { "epoch": 0.8981391703967379, "grad_norm": 0.5565161108970642, "learning_rate": 8.087028964460851e-07, "loss": 0.0507, "step": 50715 }, { "epoch": 0.8981568799337664, "grad_norm": 0.7753652930259705, "learning_rate": 8.084242355119376e-07, "loss": 0.0485, "step": 50716 }, { "epoch": 0.8981745894707948, "grad_norm": 0.6425228118896484, "learning_rate": 8.081456212666361e-07, "loss": 0.0697, "step": 50717 }, { "epoch": 0.8981922990078232, "grad_norm": 0.2991848587989807, "learning_rate": 8.078670537111016e-07, "loss": 0.0598, "step": 50718 }, { "epoch": 0.8982100085448517, "grad_norm": 0.5617534518241882, "learning_rate": 8.075885328462468e-07, "loss": 0.0375, "step": 50719 }, { "epoch": 0.8982277180818801, "grad_norm": 0.5575249791145325, "learning_rate": 8.073100586729926e-07, "loss": 0.0488, "step": 50720 }, { "epoch": 0.8982454276189085, "grad_norm": 1.0196970701217651, "learning_rate": 8.070316311922515e-07, "loss": 0.0581, "step": 50721 }, { "epoch": 0.8982631371559369, "grad_norm": 0.8783082365989685, "learning_rate": 8.067532504049396e-07, "loss": 0.0807, "step": 50722 }, { "epoch": 0.8982808466929654, "grad_norm": 0.8014366626739502, "learning_rate": 8.064749163119778e-07, "loss": 0.0395, "step": 50723 }, { "epoch": 0.8982985562299938, "grad_norm": 0.6246436834335327, "learning_rate": 8.061966289142752e-07, "loss": 0.0662, "step": 50724 }, { "epoch": 0.8983162657670222, "grad_norm": 0.24703870713710785, "learning_rate": 8.059183882127497e-07, "loss": 0.0666, "step": 50725 }, { "epoch": 0.8983339753040506, "grad_norm": 0.35894936323165894, "learning_rate": 8.056401942083202e-07, "loss": 0.0662, "step": 50726 }, { "epoch": 0.8983516848410791, "grad_norm": 0.5333157777786255, "learning_rate": 8.053620469018963e-07, "loss": 0.0903, "step": 50727 }, { "epoch": 0.8983693943781075, "grad_norm": 0.5617161393165588, "learning_rate": 8.050839462943954e-07, "loss": 0.0517, "step": 50728 }, { "epoch": 0.8983871039151359, "grad_norm": 0.6079291105270386, "learning_rate": 8.048058923867301e-07, "loss": 0.0562, "step": 50729 }, { "epoch": 0.8984048134521643, "grad_norm": 0.5117853283882141, "learning_rate": 8.045278851798232e-07, "loss": 0.0627, "step": 50730 }, { "epoch": 0.8984225229891928, "grad_norm": 0.44923868775367737, "learning_rate": 8.042499246745804e-07, "loss": 0.0391, "step": 50731 }, { "epoch": 0.8984402325262212, "grad_norm": 0.6203206181526184, "learning_rate": 8.039720108719195e-07, "loss": 0.0387, "step": 50732 }, { "epoch": 0.8984579420632496, "grad_norm": 0.6661639213562012, "learning_rate": 8.03694143772758e-07, "loss": 0.0381, "step": 50733 }, { "epoch": 0.8984756516002781, "grad_norm": 0.5992617607116699, "learning_rate": 8.034163233780034e-07, "loss": 0.0456, "step": 50734 }, { "epoch": 0.8984933611373065, "grad_norm": 0.730518639087677, "learning_rate": 8.031385496885735e-07, "loss": 0.0537, "step": 50735 }, { "epoch": 0.8985110706743349, "grad_norm": 0.7357699871063232, "learning_rate": 8.028608227053824e-07, "loss": 0.0666, "step": 50736 }, { "epoch": 0.8985287802113633, "grad_norm": 0.5208606123924255, "learning_rate": 8.025831424293445e-07, "loss": 0.051, "step": 50737 }, { "epoch": 0.8985464897483918, "grad_norm": 0.7378190159797668, "learning_rate": 8.023055088613707e-07, "loss": 0.0464, "step": 50738 }, { "epoch": 0.8985641992854202, "grad_norm": 0.5900072455406189, "learning_rate": 8.020279220023752e-07, "loss": 0.0345, "step": 50739 }, { "epoch": 0.8985819088224486, "grad_norm": 0.5612065196037292, "learning_rate": 8.01750381853274e-07, "loss": 0.0406, "step": 50740 }, { "epoch": 0.898599618359477, "grad_norm": 0.6052120327949524, "learning_rate": 8.014728884149764e-07, "loss": 0.0677, "step": 50741 }, { "epoch": 0.8986173278965055, "grad_norm": 0.8954836130142212, "learning_rate": 8.011954416883965e-07, "loss": 0.0533, "step": 50742 }, { "epoch": 0.8986350374335339, "grad_norm": 0.37746763229370117, "learning_rate": 8.009180416744489e-07, "loss": 0.0501, "step": 50743 }, { "epoch": 0.8986527469705623, "grad_norm": 0.679795503616333, "learning_rate": 8.006406883740458e-07, "loss": 0.0641, "step": 50744 }, { "epoch": 0.8986704565075907, "grad_norm": 0.5705810189247131, "learning_rate": 8.003633817880951e-07, "loss": 0.0476, "step": 50745 }, { "epoch": 0.8986881660446192, "grad_norm": 0.3967714309692383, "learning_rate": 8.000861219175143e-07, "loss": 0.0554, "step": 50746 }, { "epoch": 0.8987058755816476, "grad_norm": 0.8145110011100769, "learning_rate": 7.998089087632143e-07, "loss": 0.0544, "step": 50747 }, { "epoch": 0.898723585118676, "grad_norm": 0.4931681156158447, "learning_rate": 7.995317423261062e-07, "loss": 0.0361, "step": 50748 }, { "epoch": 0.8987412946557045, "grad_norm": 0.40564799308776855, "learning_rate": 7.992546226071023e-07, "loss": 0.0754, "step": 50749 }, { "epoch": 0.8987590041927329, "grad_norm": 0.8355361223220825, "learning_rate": 7.989775496071139e-07, "loss": 0.0497, "step": 50750 }, { "epoch": 0.8987767137297613, "grad_norm": 0.7171691656112671, "learning_rate": 7.987005233270566e-07, "loss": 0.0545, "step": 50751 }, { "epoch": 0.8987944232667897, "grad_norm": 0.5617431998252869, "learning_rate": 7.98423543767835e-07, "loss": 0.0675, "step": 50752 }, { "epoch": 0.8988121328038182, "grad_norm": 0.5014893412590027, "learning_rate": 7.981466109303631e-07, "loss": 0.0436, "step": 50753 }, { "epoch": 0.8988298423408466, "grad_norm": 0.2984004616737366, "learning_rate": 7.978697248155537e-07, "loss": 0.0514, "step": 50754 }, { "epoch": 0.898847551877875, "grad_norm": 0.9069928526878357, "learning_rate": 7.975928854243175e-07, "loss": 0.1028, "step": 50755 }, { "epoch": 0.8988652614149034, "grad_norm": 0.550091028213501, "learning_rate": 7.973160927575623e-07, "loss": 0.0454, "step": 50756 }, { "epoch": 0.8988829709519319, "grad_norm": 0.815467357635498, "learning_rate": 7.970393468162007e-07, "loss": 0.0555, "step": 50757 }, { "epoch": 0.8989006804889603, "grad_norm": 0.7293059825897217, "learning_rate": 7.967626476011436e-07, "loss": 0.0675, "step": 50758 }, { "epoch": 0.8989183900259887, "grad_norm": 1.2145837545394897, "learning_rate": 7.964859951133002e-07, "loss": 0.0769, "step": 50759 }, { "epoch": 0.8989360995630171, "grad_norm": 0.5381598472595215, "learning_rate": 7.962093893535816e-07, "loss": 0.0427, "step": 50760 }, { "epoch": 0.8989538091000456, "grad_norm": 0.5110577344894409, "learning_rate": 7.959328303228969e-07, "loss": 0.0436, "step": 50761 }, { "epoch": 0.898971518637074, "grad_norm": 0.6432902812957764, "learning_rate": 7.956563180221588e-07, "loss": 0.0462, "step": 50762 }, { "epoch": 0.8989892281741024, "grad_norm": 1.1583640575408936, "learning_rate": 7.953798524522731e-07, "loss": 0.0651, "step": 50763 }, { "epoch": 0.8990069377111309, "grad_norm": 0.5800254940986633, "learning_rate": 7.951034336141511e-07, "loss": 0.0367, "step": 50764 }, { "epoch": 0.8990246472481593, "grad_norm": 0.22157829999923706, "learning_rate": 7.948270615087033e-07, "loss": 0.0576, "step": 50765 }, { "epoch": 0.8990423567851877, "grad_norm": 0.3794974386692047, "learning_rate": 7.945507361368343e-07, "loss": 0.0478, "step": 50766 }, { "epoch": 0.8990600663222161, "grad_norm": 0.7854347825050354, "learning_rate": 7.942744574994581e-07, "loss": 0.0646, "step": 50767 }, { "epoch": 0.8990777758592446, "grad_norm": 0.8381199240684509, "learning_rate": 7.939982255974826e-07, "loss": 0.0738, "step": 50768 }, { "epoch": 0.899095485396273, "grad_norm": 0.6398004293441772, "learning_rate": 7.937220404318185e-07, "loss": 0.0498, "step": 50769 }, { "epoch": 0.8991131949333014, "grad_norm": 0.7794337868690491, "learning_rate": 7.934459020033702e-07, "loss": 0.0478, "step": 50770 }, { "epoch": 0.8991309044703298, "grad_norm": 0.697668194770813, "learning_rate": 7.931698103130469e-07, "loss": 0.0556, "step": 50771 }, { "epoch": 0.8991486140073583, "grad_norm": 1.0029823780059814, "learning_rate": 7.928937653617596e-07, "loss": 0.0852, "step": 50772 }, { "epoch": 0.8991663235443867, "grad_norm": 0.7428089380264282, "learning_rate": 7.926177671504142e-07, "loss": 0.0735, "step": 50773 }, { "epoch": 0.8991840330814151, "grad_norm": 0.35318928956985474, "learning_rate": 7.923418156799184e-07, "loss": 0.0341, "step": 50774 }, { "epoch": 0.8992017426184435, "grad_norm": 0.6710242629051208, "learning_rate": 7.920659109511813e-07, "loss": 0.0458, "step": 50775 }, { "epoch": 0.899219452155472, "grad_norm": 0.7190195918083191, "learning_rate": 7.917900529651106e-07, "loss": 0.0564, "step": 50776 }, { "epoch": 0.8992371616925005, "grad_norm": 0.22454214096069336, "learning_rate": 7.91514241722614e-07, "loss": 0.03, "step": 50777 }, { "epoch": 0.8992548712295289, "grad_norm": 0.3285849988460541, "learning_rate": 7.912384772245973e-07, "loss": 0.0376, "step": 50778 }, { "epoch": 0.8992725807665574, "grad_norm": 0.4134937822818756, "learning_rate": 7.909627594719715e-07, "loss": 0.0543, "step": 50779 }, { "epoch": 0.8992902903035858, "grad_norm": 0.8006669282913208, "learning_rate": 7.906870884656392e-07, "loss": 0.0832, "step": 50780 }, { "epoch": 0.8993079998406142, "grad_norm": 0.5133310556411743, "learning_rate": 7.90411464206508e-07, "loss": 0.0456, "step": 50781 }, { "epoch": 0.8993257093776426, "grad_norm": 0.5600560307502747, "learning_rate": 7.901358866954872e-07, "loss": 0.0403, "step": 50782 }, { "epoch": 0.8993434189146711, "grad_norm": 0.5146566033363342, "learning_rate": 7.898603559334827e-07, "loss": 0.0635, "step": 50783 }, { "epoch": 0.8993611284516995, "grad_norm": 0.7378780245780945, "learning_rate": 7.895848719214005e-07, "loss": 0.058, "step": 50784 }, { "epoch": 0.8993788379887279, "grad_norm": 0.46463096141815186, "learning_rate": 7.893094346601448e-07, "loss": 0.0632, "step": 50785 }, { "epoch": 0.8993965475257563, "grad_norm": 0.40948382019996643, "learning_rate": 7.890340441506249e-07, "loss": 0.0133, "step": 50786 }, { "epoch": 0.8994142570627848, "grad_norm": 0.2826770544052124, "learning_rate": 7.887587003937453e-07, "loss": 0.0333, "step": 50787 }, { "epoch": 0.8994319665998132, "grad_norm": 0.6453720927238464, "learning_rate": 7.884834033904114e-07, "loss": 0.0466, "step": 50788 }, { "epoch": 0.8994496761368416, "grad_norm": 0.8437733054161072, "learning_rate": 7.882081531415314e-07, "loss": 0.0713, "step": 50789 }, { "epoch": 0.89946738567387, "grad_norm": 0.6904982328414917, "learning_rate": 7.879329496480092e-07, "loss": 0.0676, "step": 50790 }, { "epoch": 0.8994850952108985, "grad_norm": 0.3554472029209137, "learning_rate": 7.876577929107493e-07, "loss": 0.0538, "step": 50791 }, { "epoch": 0.8995028047479269, "grad_norm": 0.7008830904960632, "learning_rate": 7.873826829306574e-07, "loss": 0.0658, "step": 50792 }, { "epoch": 0.8995205142849553, "grad_norm": 0.7883729338645935, "learning_rate": 7.871076197086397e-07, "loss": 0.0705, "step": 50793 }, { "epoch": 0.8995382238219838, "grad_norm": 0.41993317008018494, "learning_rate": 7.868326032456019e-07, "loss": 0.0703, "step": 50794 }, { "epoch": 0.8995559333590122, "grad_norm": 0.5530096888542175, "learning_rate": 7.865576335424435e-07, "loss": 0.049, "step": 50795 }, { "epoch": 0.8995736428960406, "grad_norm": 0.5229242444038391, "learning_rate": 7.862827106000737e-07, "loss": 0.0785, "step": 50796 }, { "epoch": 0.899591352433069, "grad_norm": 0.736012876033783, "learning_rate": 7.860078344193999e-07, "loss": 0.056, "step": 50797 }, { "epoch": 0.8996090619700975, "grad_norm": 0.37627509236335754, "learning_rate": 7.8573300500132e-07, "loss": 0.0344, "step": 50798 }, { "epoch": 0.8996267715071259, "grad_norm": 0.4827534258365631, "learning_rate": 7.854582223467399e-07, "loss": 0.0463, "step": 50799 }, { "epoch": 0.8996444810441543, "grad_norm": 0.44988399744033813, "learning_rate": 7.851834864565654e-07, "loss": 0.0499, "step": 50800 }, { "epoch": 0.8996621905811827, "grad_norm": 0.48881325125694275, "learning_rate": 7.84908797331701e-07, "loss": 0.0337, "step": 50801 }, { "epoch": 0.8996799001182112, "grad_norm": 0.4930276572704315, "learning_rate": 7.846341549730474e-07, "loss": 0.0454, "step": 50802 }, { "epoch": 0.8996976096552396, "grad_norm": 0.9375274777412415, "learning_rate": 7.843595593815089e-07, "loss": 0.0535, "step": 50803 }, { "epoch": 0.899715319192268, "grad_norm": 0.5352246165275574, "learning_rate": 7.840850105579916e-07, "loss": 0.058, "step": 50804 }, { "epoch": 0.8997330287292964, "grad_norm": 0.7722066640853882, "learning_rate": 7.838105085033948e-07, "loss": 0.0517, "step": 50805 }, { "epoch": 0.8997507382663249, "grad_norm": 0.6375609636306763, "learning_rate": 7.835360532186242e-07, "loss": 0.0382, "step": 50806 }, { "epoch": 0.8997684478033533, "grad_norm": 0.333607017993927, "learning_rate": 7.832616447045809e-07, "loss": 0.0583, "step": 50807 }, { "epoch": 0.8997861573403817, "grad_norm": 0.6476823687553406, "learning_rate": 7.829872829621726e-07, "loss": 0.0669, "step": 50808 }, { "epoch": 0.8998038668774102, "grad_norm": 0.45047372579574585, "learning_rate": 7.82712967992295e-07, "loss": 0.031, "step": 50809 }, { "epoch": 0.8998215764144386, "grad_norm": 0.792759358882904, "learning_rate": 7.824386997958544e-07, "loss": 0.0571, "step": 50810 }, { "epoch": 0.899839285951467, "grad_norm": 0.6378374695777893, "learning_rate": 7.821644783737547e-07, "loss": 0.0513, "step": 50811 }, { "epoch": 0.8998569954884954, "grad_norm": 0.5301241874694824, "learning_rate": 7.818903037268938e-07, "loss": 0.0348, "step": 50812 }, { "epoch": 0.8998747050255239, "grad_norm": 0.7137932777404785, "learning_rate": 7.816161758561757e-07, "loss": 0.0515, "step": 50813 }, { "epoch": 0.8998924145625523, "grad_norm": 0.6623463034629822, "learning_rate": 7.813420947625017e-07, "loss": 0.0441, "step": 50814 }, { "epoch": 0.8999101240995807, "grad_norm": 0.6685974597930908, "learning_rate": 7.81068060446774e-07, "loss": 0.0657, "step": 50815 }, { "epoch": 0.8999278336366091, "grad_norm": 0.654665470123291, "learning_rate": 7.807940729098939e-07, "loss": 0.0715, "step": 50816 }, { "epoch": 0.8999455431736376, "grad_norm": 0.6261911988258362, "learning_rate": 7.805201321527638e-07, "loss": 0.0544, "step": 50817 }, { "epoch": 0.899963252710666, "grad_norm": 0.42271697521209717, "learning_rate": 7.802462381762865e-07, "loss": 0.0393, "step": 50818 }, { "epoch": 0.8999809622476944, "grad_norm": 0.3767203092575073, "learning_rate": 7.799723909813577e-07, "loss": 0.0364, "step": 50819 }, { "epoch": 0.8999986717847228, "grad_norm": 0.10152357816696167, "learning_rate": 7.796985905688836e-07, "loss": 0.0233, "step": 50820 }, { "epoch": 0.9000163813217513, "grad_norm": 0.5486351847648621, "learning_rate": 7.794248369397617e-07, "loss": 0.0712, "step": 50821 }, { "epoch": 0.9000340908587797, "grad_norm": 0.5592359304428101, "learning_rate": 7.791511300948944e-07, "loss": 0.026, "step": 50822 }, { "epoch": 0.9000518003958081, "grad_norm": 0.5591068267822266, "learning_rate": 7.788774700351814e-07, "loss": 0.0678, "step": 50823 }, { "epoch": 0.9000695099328366, "grad_norm": 0.6868748664855957, "learning_rate": 7.786038567615201e-07, "loss": 0.0978, "step": 50824 }, { "epoch": 0.900087219469865, "grad_norm": 0.4461875259876251, "learning_rate": 7.783302902748162e-07, "loss": 0.0512, "step": 50825 }, { "epoch": 0.9001049290068934, "grad_norm": 0.54768967628479, "learning_rate": 7.780567705759695e-07, "loss": 0.0288, "step": 50826 }, { "epoch": 0.9001226385439218, "grad_norm": 0.6895778179168701, "learning_rate": 7.777832976658738e-07, "loss": 0.0651, "step": 50827 }, { "epoch": 0.9001403480809503, "grad_norm": 0.598874032497406, "learning_rate": 7.775098715454337e-07, "loss": 0.0666, "step": 50828 }, { "epoch": 0.9001580576179787, "grad_norm": 0.7549964785575867, "learning_rate": 7.772364922155501e-07, "loss": 0.0576, "step": 50829 }, { "epoch": 0.9001757671550071, "grad_norm": 0.5814896821975708, "learning_rate": 7.769631596771187e-07, "loss": 0.0583, "step": 50830 }, { "epoch": 0.9001934766920355, "grad_norm": 0.7248273491859436, "learning_rate": 7.766898739310375e-07, "loss": 0.0622, "step": 50831 }, { "epoch": 0.900211186229064, "grad_norm": 1.1030688285827637, "learning_rate": 7.764166349782104e-07, "loss": 0.0602, "step": 50832 }, { "epoch": 0.9002288957660924, "grad_norm": 0.6713739633560181, "learning_rate": 7.761434428195319e-07, "loss": 0.0511, "step": 50833 }, { "epoch": 0.9002466053031208, "grad_norm": 0.48282939195632935, "learning_rate": 7.758702974559046e-07, "loss": 0.0568, "step": 50834 }, { "epoch": 0.9002643148401492, "grad_norm": 0.845419704914093, "learning_rate": 7.755971988882243e-07, "loss": 0.0613, "step": 50835 }, { "epoch": 0.9002820243771777, "grad_norm": 0.4594016671180725, "learning_rate": 7.753241471173921e-07, "loss": 0.0503, "step": 50836 }, { "epoch": 0.9002997339142061, "grad_norm": 0.4861299395561218, "learning_rate": 7.75051142144304e-07, "loss": 0.05, "step": 50837 }, { "epoch": 0.9003174434512345, "grad_norm": 0.5508471131324768, "learning_rate": 7.747781839698575e-07, "loss": 0.0478, "step": 50838 }, { "epoch": 0.900335152988263, "grad_norm": 0.4059523344039917, "learning_rate": 7.745052725949536e-07, "loss": 0.0515, "step": 50839 }, { "epoch": 0.9003528625252915, "grad_norm": 0.7387844324111938, "learning_rate": 7.742324080204899e-07, "loss": 0.0619, "step": 50840 }, { "epoch": 0.9003705720623199, "grad_norm": 0.6294388175010681, "learning_rate": 7.739595902473606e-07, "loss": 0.0503, "step": 50841 }, { "epoch": 0.9003882815993483, "grad_norm": 0.9488784670829773, "learning_rate": 7.736868192764651e-07, "loss": 0.0793, "step": 50842 }, { "epoch": 0.9004059911363768, "grad_norm": 0.5633859038352966, "learning_rate": 7.73414095108701e-07, "loss": 0.0658, "step": 50843 }, { "epoch": 0.9004237006734052, "grad_norm": 0.32382380962371826, "learning_rate": 7.731414177449659e-07, "loss": 0.0643, "step": 50844 }, { "epoch": 0.9004414102104336, "grad_norm": 0.45760494470596313, "learning_rate": 7.728687871861556e-07, "loss": 0.0402, "step": 50845 }, { "epoch": 0.900459119747462, "grad_norm": 0.973771870136261, "learning_rate": 7.725962034331696e-07, "loss": 0.0472, "step": 50846 }, { "epoch": 0.9004768292844905, "grad_norm": 0.7034437656402588, "learning_rate": 7.723236664869037e-07, "loss": 0.0591, "step": 50847 }, { "epoch": 0.9004945388215189, "grad_norm": 0.3486018776893616, "learning_rate": 7.720511763482524e-07, "loss": 0.0383, "step": 50848 }, { "epoch": 0.9005122483585473, "grad_norm": 0.7519232630729675, "learning_rate": 7.717787330181148e-07, "loss": 0.069, "step": 50849 }, { "epoch": 0.9005299578955757, "grad_norm": 0.5965666174888611, "learning_rate": 7.715063364973868e-07, "loss": 0.0399, "step": 50850 }, { "epoch": 0.9005476674326042, "grad_norm": 0.9139350652694702, "learning_rate": 7.712339867869611e-07, "loss": 0.05, "step": 50851 }, { "epoch": 0.9005653769696326, "grad_norm": 0.7315079569816589, "learning_rate": 7.709616838877387e-07, "loss": 0.0494, "step": 50852 }, { "epoch": 0.900583086506661, "grad_norm": 0.4498162269592285, "learning_rate": 7.706894278006088e-07, "loss": 0.0609, "step": 50853 }, { "epoch": 0.9006007960436895, "grad_norm": 0.49390876293182373, "learning_rate": 7.704172185264774e-07, "loss": 0.0645, "step": 50854 }, { "epoch": 0.9006185055807179, "grad_norm": 0.5396482348442078, "learning_rate": 7.701450560662305e-07, "loss": 0.0513, "step": 50855 }, { "epoch": 0.9006362151177463, "grad_norm": 0.6600773930549622, "learning_rate": 7.698729404207671e-07, "loss": 0.061, "step": 50856 }, { "epoch": 0.9006539246547747, "grad_norm": 0.663212776184082, "learning_rate": 7.696008715909836e-07, "loss": 0.0521, "step": 50857 }, { "epoch": 0.9006716341918032, "grad_norm": 0.31196731328964233, "learning_rate": 7.693288495777756e-07, "loss": 0.0482, "step": 50858 }, { "epoch": 0.9006893437288316, "grad_norm": 0.5714174509048462, "learning_rate": 7.690568743820342e-07, "loss": 0.0896, "step": 50859 }, { "epoch": 0.90070705326586, "grad_norm": 0.6202690005302429, "learning_rate": 7.68784946004657e-07, "loss": 0.0546, "step": 50860 }, { "epoch": 0.9007247628028884, "grad_norm": 0.7246665358543396, "learning_rate": 7.685130644465366e-07, "loss": 0.051, "step": 50861 }, { "epoch": 0.9007424723399169, "grad_norm": 0.46057504415512085, "learning_rate": 7.682412297085705e-07, "loss": 0.0473, "step": 50862 }, { "epoch": 0.9007601818769453, "grad_norm": 0.5772429704666138, "learning_rate": 7.6796944179165e-07, "loss": 0.0338, "step": 50863 }, { "epoch": 0.9007778914139737, "grad_norm": 0.5615769028663635, "learning_rate": 7.676977006966706e-07, "loss": 0.0523, "step": 50864 }, { "epoch": 0.9007956009510021, "grad_norm": 0.9362895488739014, "learning_rate": 7.674260064245286e-07, "loss": 0.091, "step": 50865 }, { "epoch": 0.9008133104880306, "grad_norm": 0.6681590676307678, "learning_rate": 7.671543589761132e-07, "loss": 0.0633, "step": 50866 }, { "epoch": 0.900831020025059, "grad_norm": 0.5269147753715515, "learning_rate": 7.668827583523202e-07, "loss": 0.0533, "step": 50867 }, { "epoch": 0.9008487295620874, "grad_norm": 0.6176361441612244, "learning_rate": 7.666112045540457e-07, "loss": 0.0492, "step": 50868 }, { "epoch": 0.9008664390991159, "grad_norm": 0.5048737525939941, "learning_rate": 7.663396975821791e-07, "loss": 0.0537, "step": 50869 }, { "epoch": 0.9008841486361443, "grad_norm": 0.6368657946586609, "learning_rate": 7.660682374376143e-07, "loss": 0.0496, "step": 50870 }, { "epoch": 0.9009018581731727, "grad_norm": 0.5238685607910156, "learning_rate": 7.657968241212459e-07, "loss": 0.0546, "step": 50871 }, { "epoch": 0.9009195677102011, "grad_norm": 0.6617191433906555, "learning_rate": 7.655254576339665e-07, "loss": 0.0575, "step": 50872 }, { "epoch": 0.9009372772472296, "grad_norm": 0.34842315316200256, "learning_rate": 7.652541379766687e-07, "loss": 0.0498, "step": 50873 }, { "epoch": 0.900954986784258, "grad_norm": 0.6262992024421692, "learning_rate": 7.64982865150245e-07, "loss": 0.0595, "step": 50874 }, { "epoch": 0.9009726963212864, "grad_norm": 0.591541051864624, "learning_rate": 7.647116391555897e-07, "loss": 0.0303, "step": 50875 }, { "epoch": 0.9009904058583148, "grad_norm": 0.5962300300598145, "learning_rate": 7.644404599935906e-07, "loss": 0.0466, "step": 50876 }, { "epoch": 0.9010081153953433, "grad_norm": 0.8799045085906982, "learning_rate": 7.641693276651435e-07, "loss": 0.0603, "step": 50877 }, { "epoch": 0.9010258249323717, "grad_norm": 0.937934398651123, "learning_rate": 7.638982421711394e-07, "loss": 0.0765, "step": 50878 }, { "epoch": 0.9010435344694001, "grad_norm": 0.7029545307159424, "learning_rate": 7.636272035124708e-07, "loss": 0.072, "step": 50879 }, { "epoch": 0.9010612440064285, "grad_norm": 0.864352822303772, "learning_rate": 7.633562116900289e-07, "loss": 0.053, "step": 50880 }, { "epoch": 0.901078953543457, "grad_norm": 0.8832391500473022, "learning_rate": 7.630852667047028e-07, "loss": 0.0864, "step": 50881 }, { "epoch": 0.9010966630804854, "grad_norm": 0.34116387367248535, "learning_rate": 7.628143685573885e-07, "loss": 0.0473, "step": 50882 }, { "epoch": 0.9011143726175138, "grad_norm": 0.4658203423023224, "learning_rate": 7.625435172489737e-07, "loss": 0.046, "step": 50883 }, { "epoch": 0.9011320821545423, "grad_norm": 0.7213615775108337, "learning_rate": 7.622727127803508e-07, "loss": 0.0775, "step": 50884 }, { "epoch": 0.9011497916915707, "grad_norm": 0.9778649806976318, "learning_rate": 7.62001955152411e-07, "loss": 0.0534, "step": 50885 }, { "epoch": 0.9011675012285991, "grad_norm": 0.6896610260009766, "learning_rate": 7.617312443660452e-07, "loss": 0.06, "step": 50886 }, { "epoch": 0.9011852107656275, "grad_norm": 0.33612656593322754, "learning_rate": 7.614605804221442e-07, "loss": 0.0214, "step": 50887 }, { "epoch": 0.901202920302656, "grad_norm": 0.32042357325553894, "learning_rate": 7.611899633215958e-07, "loss": 0.0526, "step": 50888 }, { "epoch": 0.9012206298396844, "grad_norm": 0.6082167625427246, "learning_rate": 7.609193930652924e-07, "loss": 0.0568, "step": 50889 }, { "epoch": 0.9012383393767128, "grad_norm": 0.6429610848426819, "learning_rate": 7.606488696541236e-07, "loss": 0.0428, "step": 50890 }, { "epoch": 0.9012560489137412, "grad_norm": 0.3686101734638214, "learning_rate": 7.603783930889802e-07, "loss": 0.044, "step": 50891 }, { "epoch": 0.9012737584507697, "grad_norm": 0.7897266745567322, "learning_rate": 7.60107963370753e-07, "loss": 0.0581, "step": 50892 }, { "epoch": 0.9012914679877981, "grad_norm": 0.9438137412071228, "learning_rate": 7.598375805003299e-07, "loss": 0.0817, "step": 50893 }, { "epoch": 0.9013091775248265, "grad_norm": 0.7387513518333435, "learning_rate": 7.595672444786018e-07, "loss": 0.0683, "step": 50894 }, { "epoch": 0.9013268870618549, "grad_norm": 0.514715313911438, "learning_rate": 7.592969553064543e-07, "loss": 0.0704, "step": 50895 }, { "epoch": 0.9013445965988834, "grad_norm": 0.5987470746040344, "learning_rate": 7.590267129847806e-07, "loss": 0.0691, "step": 50896 }, { "epoch": 0.9013623061359118, "grad_norm": 0.6253359913825989, "learning_rate": 7.587565175144712e-07, "loss": 0.0575, "step": 50897 }, { "epoch": 0.9013800156729402, "grad_norm": 0.6946039199829102, "learning_rate": 7.584863688964105e-07, "loss": 0.046, "step": 50898 }, { "epoch": 0.9013977252099687, "grad_norm": 0.4733550548553467, "learning_rate": 7.582162671314879e-07, "loss": 0.0401, "step": 50899 }, { "epoch": 0.9014154347469971, "grad_norm": 0.5723416209220886, "learning_rate": 7.579462122205944e-07, "loss": 0.0546, "step": 50900 }, { "epoch": 0.9014331442840255, "grad_norm": 0.6535903811454773, "learning_rate": 7.576762041646157e-07, "loss": 0.0634, "step": 50901 }, { "epoch": 0.9014508538210539, "grad_norm": 0.3999561667442322, "learning_rate": 7.574062429644446e-07, "loss": 0.0475, "step": 50902 }, { "epoch": 0.9014685633580825, "grad_norm": 0.7346634864807129, "learning_rate": 7.571363286209638e-07, "loss": 0.0562, "step": 50903 }, { "epoch": 0.9014862728951109, "grad_norm": 0.5705762505531311, "learning_rate": 7.568664611350673e-07, "loss": 0.0602, "step": 50904 }, { "epoch": 0.9015039824321393, "grad_norm": 0.4337265193462372, "learning_rate": 7.565966405076363e-07, "loss": 0.0236, "step": 50905 }, { "epoch": 0.9015216919691676, "grad_norm": 0.7484127283096313, "learning_rate": 7.563268667395634e-07, "loss": 0.0703, "step": 50906 }, { "epoch": 0.9015394015061962, "grad_norm": 0.4965610206127167, "learning_rate": 7.560571398317362e-07, "loss": 0.0946, "step": 50907 }, { "epoch": 0.9015571110432246, "grad_norm": 0.5957223176956177, "learning_rate": 7.557874597850373e-07, "loss": 0.0434, "step": 50908 }, { "epoch": 0.901574820580253, "grad_norm": 0.4182746410369873, "learning_rate": 7.555178266003577e-07, "loss": 0.0455, "step": 50909 }, { "epoch": 0.9015925301172814, "grad_norm": 0.402240514755249, "learning_rate": 7.552482402785832e-07, "loss": 0.0453, "step": 50910 }, { "epoch": 0.9016102396543099, "grad_norm": 0.61617112159729, "learning_rate": 7.549787008206016e-07, "loss": 0.0648, "step": 50911 }, { "epoch": 0.9016279491913383, "grad_norm": 0.5899905562400818, "learning_rate": 7.547092082272989e-07, "loss": 0.0643, "step": 50912 }, { "epoch": 0.9016456587283667, "grad_norm": 0.8703833818435669, "learning_rate": 7.544397624995625e-07, "loss": 0.0506, "step": 50913 }, { "epoch": 0.9016633682653952, "grad_norm": 0.4389511048793793, "learning_rate": 7.541703636382802e-07, "loss": 0.0584, "step": 50914 }, { "epoch": 0.9016810778024236, "grad_norm": 0.42446643114089966, "learning_rate": 7.539010116443345e-07, "loss": 0.0493, "step": 50915 }, { "epoch": 0.901698787339452, "grad_norm": 0.6015906929969788, "learning_rate": 7.536317065186133e-07, "loss": 0.0614, "step": 50916 }, { "epoch": 0.9017164968764804, "grad_norm": 0.4351469576358795, "learning_rate": 7.533624482620038e-07, "loss": 0.0516, "step": 50917 }, { "epoch": 0.9017342064135089, "grad_norm": 0.6810952425003052, "learning_rate": 7.530932368753923e-07, "loss": 0.0396, "step": 50918 }, { "epoch": 0.9017519159505373, "grad_norm": 0.9400017857551575, "learning_rate": 7.52824072359658e-07, "loss": 0.0317, "step": 50919 }, { "epoch": 0.9017696254875657, "grad_norm": 0.42048028111457825, "learning_rate": 7.525549547156952e-07, "loss": 0.0555, "step": 50920 }, { "epoch": 0.9017873350245941, "grad_norm": 0.7294089794158936, "learning_rate": 7.522858839443864e-07, "loss": 0.0523, "step": 50921 }, { "epoch": 0.9018050445616226, "grad_norm": 0.7684295773506165, "learning_rate": 7.520168600466143e-07, "loss": 0.0538, "step": 50922 }, { "epoch": 0.901822754098651, "grad_norm": 0.6479881405830383, "learning_rate": 7.517478830232666e-07, "loss": 0.0521, "step": 50923 }, { "epoch": 0.9018404636356794, "grad_norm": 0.3982792794704437, "learning_rate": 7.514789528752258e-07, "loss": 0.0344, "step": 50924 }, { "epoch": 0.9018581731727078, "grad_norm": 0.5187003016471863, "learning_rate": 7.512100696033814e-07, "loss": 0.067, "step": 50925 }, { "epoch": 0.9018758827097363, "grad_norm": 0.27393075823783875, "learning_rate": 7.509412332086107e-07, "loss": 0.032, "step": 50926 }, { "epoch": 0.9018935922467647, "grad_norm": 0.3245440721511841, "learning_rate": 7.50672443691805e-07, "loss": 0.0303, "step": 50927 }, { "epoch": 0.9019113017837931, "grad_norm": 0.5849481821060181, "learning_rate": 7.504037010538434e-07, "loss": 0.0542, "step": 50928 }, { "epoch": 0.9019290113208216, "grad_norm": 0.6408571600914001, "learning_rate": 7.501350052956135e-07, "loss": 0.0532, "step": 50929 }, { "epoch": 0.90194672085785, "grad_norm": 0.72916579246521, "learning_rate": 7.498663564179981e-07, "loss": 0.041, "step": 50930 }, { "epoch": 0.9019644303948784, "grad_norm": 0.91661137342453, "learning_rate": 7.495977544218796e-07, "loss": 0.0546, "step": 50931 }, { "epoch": 0.9019821399319068, "grad_norm": 0.1888502985239029, "learning_rate": 7.493291993081475e-07, "loss": 0.045, "step": 50932 }, { "epoch": 0.9019998494689353, "grad_norm": 0.6469993591308594, "learning_rate": 7.490606910776776e-07, "loss": 0.0564, "step": 50933 }, { "epoch": 0.9020175590059637, "grad_norm": 0.33281975984573364, "learning_rate": 7.48792229731356e-07, "loss": 0.0223, "step": 50934 }, { "epoch": 0.9020352685429921, "grad_norm": 0.8890575766563416, "learning_rate": 7.485238152700668e-07, "loss": 0.0793, "step": 50935 }, { "epoch": 0.9020529780800205, "grad_norm": 0.5478063225746155, "learning_rate": 7.482554476946962e-07, "loss": 0.0363, "step": 50936 }, { "epoch": 0.902070687617049, "grad_norm": 0.4956355392932892, "learning_rate": 7.4798712700612e-07, "loss": 0.0482, "step": 50937 }, { "epoch": 0.9020883971540774, "grad_norm": 0.5147437453269958, "learning_rate": 7.47718853205226e-07, "loss": 0.0374, "step": 50938 }, { "epoch": 0.9021061066911058, "grad_norm": 0.6787532567977905, "learning_rate": 7.474506262928949e-07, "loss": 0.0682, "step": 50939 }, { "epoch": 0.9021238162281342, "grad_norm": 0.762744128704071, "learning_rate": 7.471824462700094e-07, "loss": 0.0602, "step": 50940 }, { "epoch": 0.9021415257651627, "grad_norm": 0.49592071771621704, "learning_rate": 7.469143131374523e-07, "loss": 0.0601, "step": 50941 }, { "epoch": 0.9021592353021911, "grad_norm": 0.5772047638893127, "learning_rate": 7.466462268961061e-07, "loss": 0.0417, "step": 50942 }, { "epoch": 0.9021769448392195, "grad_norm": 0.6441243886947632, "learning_rate": 7.463781875468534e-07, "loss": 0.062, "step": 50943 }, { "epoch": 0.902194654376248, "grad_norm": 0.5793694257736206, "learning_rate": 7.461101950905735e-07, "loss": 0.0291, "step": 50944 }, { "epoch": 0.9022123639132764, "grad_norm": 0.45612505078315735, "learning_rate": 7.458422495281491e-07, "loss": 0.057, "step": 50945 }, { "epoch": 0.9022300734503048, "grad_norm": 0.659065306186676, "learning_rate": 7.455743508604629e-07, "loss": 0.0698, "step": 50946 }, { "epoch": 0.9022477829873332, "grad_norm": 0.640950620174408, "learning_rate": 7.45306499088394e-07, "loss": 0.0504, "step": 50947 }, { "epoch": 0.9022654925243617, "grad_norm": 0.6971873044967651, "learning_rate": 7.450386942128234e-07, "loss": 0.0562, "step": 50948 }, { "epoch": 0.9022832020613901, "grad_norm": 1.1874221563339233, "learning_rate": 7.447709362346355e-07, "loss": 0.1108, "step": 50949 }, { "epoch": 0.9023009115984185, "grad_norm": 0.5104905366897583, "learning_rate": 7.445032251547113e-07, "loss": 0.0725, "step": 50950 }, { "epoch": 0.9023186211354469, "grad_norm": 0.7515747547149658, "learning_rate": 7.442355609739282e-07, "loss": 0.0835, "step": 50951 }, { "epoch": 0.9023363306724754, "grad_norm": 0.4719094932079315, "learning_rate": 7.439679436931674e-07, "loss": 0.0561, "step": 50952 }, { "epoch": 0.9023540402095038, "grad_norm": 0.6473982334136963, "learning_rate": 7.43700373313313e-07, "loss": 0.0531, "step": 50953 }, { "epoch": 0.9023717497465322, "grad_norm": 0.5041863918304443, "learning_rate": 7.434328498352394e-07, "loss": 0.0587, "step": 50954 }, { "epoch": 0.9023894592835606, "grad_norm": 0.4482197165489197, "learning_rate": 7.431653732598293e-07, "loss": 0.0743, "step": 50955 }, { "epoch": 0.9024071688205891, "grad_norm": 0.7738655209541321, "learning_rate": 7.428979435879651e-07, "loss": 0.0559, "step": 50956 }, { "epoch": 0.9024248783576175, "grad_norm": 0.979377031326294, "learning_rate": 7.42630560820523e-07, "loss": 0.0795, "step": 50957 }, { "epoch": 0.9024425878946459, "grad_norm": 0.6677035689353943, "learning_rate": 7.423632249583856e-07, "loss": 0.0536, "step": 50958 }, { "epoch": 0.9024602974316744, "grad_norm": 0.5763461589813232, "learning_rate": 7.420959360024304e-07, "loss": 0.0801, "step": 50959 }, { "epoch": 0.9024780069687028, "grad_norm": 0.16355368494987488, "learning_rate": 7.418286939535368e-07, "loss": 0.04, "step": 50960 }, { "epoch": 0.9024957165057312, "grad_norm": 0.4180770516395569, "learning_rate": 7.415614988125874e-07, "loss": 0.0479, "step": 50961 }, { "epoch": 0.9025134260427596, "grad_norm": 0.6357071995735168, "learning_rate": 7.412943505804565e-07, "loss": 0.0563, "step": 50962 }, { "epoch": 0.9025311355797881, "grad_norm": 0.7269385457038879, "learning_rate": 7.41027249258025e-07, "loss": 0.054, "step": 50963 }, { "epoch": 0.9025488451168165, "grad_norm": 1.0089666843414307, "learning_rate": 7.407601948461739e-07, "loss": 0.0612, "step": 50964 }, { "epoch": 0.9025665546538449, "grad_norm": 0.6800553202629089, "learning_rate": 7.404931873457776e-07, "loss": 0.0783, "step": 50965 }, { "epoch": 0.9025842641908733, "grad_norm": 0.3530985414981842, "learning_rate": 7.402262267577153e-07, "loss": 0.0448, "step": 50966 }, { "epoch": 0.9026019737279019, "grad_norm": 0.3560362160205841, "learning_rate": 7.39959313082868e-07, "loss": 0.0454, "step": 50967 }, { "epoch": 0.9026196832649303, "grad_norm": 0.470852255821228, "learning_rate": 7.396924463221116e-07, "loss": 0.0644, "step": 50968 }, { "epoch": 0.9026373928019586, "grad_norm": 0.6596986055374146, "learning_rate": 7.394256264763239e-07, "loss": 0.0505, "step": 50969 }, { "epoch": 0.902655102338987, "grad_norm": 0.3559616506099701, "learning_rate": 7.391588535463839e-07, "loss": 0.0481, "step": 50970 }, { "epoch": 0.9026728118760156, "grad_norm": 0.5317579507827759, "learning_rate": 7.388921275331712e-07, "loss": 0.0446, "step": 50971 }, { "epoch": 0.902690521413044, "grad_norm": 0.34258517622947693, "learning_rate": 7.386254484375599e-07, "loss": 0.0519, "step": 50972 }, { "epoch": 0.9027082309500724, "grad_norm": 0.4637925326824188, "learning_rate": 7.38358816260426e-07, "loss": 0.0537, "step": 50973 }, { "epoch": 0.9027259404871009, "grad_norm": 0.691770613193512, "learning_rate": 7.380922310026505e-07, "loss": 0.039, "step": 50974 }, { "epoch": 0.9027436500241293, "grad_norm": 0.6403191685676575, "learning_rate": 7.378256926651111e-07, "loss": 0.0551, "step": 50975 }, { "epoch": 0.9027613595611577, "grad_norm": 0.7240532040596008, "learning_rate": 7.37559201248682e-07, "loss": 0.0556, "step": 50976 }, { "epoch": 0.9027790690981861, "grad_norm": 0.73590487241745, "learning_rate": 7.372927567542359e-07, "loss": 0.0694, "step": 50977 }, { "epoch": 0.9027967786352146, "grad_norm": 0.8736926913261414, "learning_rate": 7.370263591826603e-07, "loss": 0.0704, "step": 50978 }, { "epoch": 0.902814488172243, "grad_norm": 0.7807738780975342, "learning_rate": 7.36760008534823e-07, "loss": 0.069, "step": 50979 }, { "epoch": 0.9028321977092714, "grad_norm": 0.6872158050537109, "learning_rate": 7.364937048116017e-07, "loss": 0.0347, "step": 50980 }, { "epoch": 0.9028499072462998, "grad_norm": 0.22463126480579376, "learning_rate": 7.362274480138736e-07, "loss": 0.0374, "step": 50981 }, { "epoch": 0.9028676167833283, "grad_norm": 0.4724258780479431, "learning_rate": 7.359612381425168e-07, "loss": 0.0348, "step": 50982 }, { "epoch": 0.9028853263203567, "grad_norm": 0.4194548428058624, "learning_rate": 7.35695075198402e-07, "loss": 0.0391, "step": 50983 }, { "epoch": 0.9029030358573851, "grad_norm": 0.8801060914993286, "learning_rate": 7.354289591824088e-07, "loss": 0.0642, "step": 50984 }, { "epoch": 0.9029207453944136, "grad_norm": 0.24628044664859772, "learning_rate": 7.351628900954111e-07, "loss": 0.0431, "step": 50985 }, { "epoch": 0.902938454931442, "grad_norm": 1.0615224838256836, "learning_rate": 7.34896867938285e-07, "loss": 0.0889, "step": 50986 }, { "epoch": 0.9029561644684704, "grad_norm": 0.8544940948486328, "learning_rate": 7.34630892711905e-07, "loss": 0.0746, "step": 50987 }, { "epoch": 0.9029738740054988, "grad_norm": 0.6018383502960205, "learning_rate": 7.343649644171469e-07, "loss": 0.0522, "step": 50988 }, { "epoch": 0.9029915835425273, "grad_norm": 0.8220524191856384, "learning_rate": 7.340990830548866e-07, "loss": 0.0788, "step": 50989 }, { "epoch": 0.9030092930795557, "grad_norm": 0.6323995590209961, "learning_rate": 7.338332486259952e-07, "loss": 0.0447, "step": 50990 }, { "epoch": 0.9030270026165841, "grad_norm": 0.5886309146881104, "learning_rate": 7.335674611313503e-07, "loss": 0.0579, "step": 50991 }, { "epoch": 0.9030447121536125, "grad_norm": 0.8030158281326294, "learning_rate": 7.333017205718278e-07, "loss": 0.0683, "step": 50992 }, { "epoch": 0.903062421690641, "grad_norm": 0.6942406296730042, "learning_rate": 7.330360269482972e-07, "loss": 0.0581, "step": 50993 }, { "epoch": 0.9030801312276694, "grad_norm": 0.5907675623893738, "learning_rate": 7.327703802616342e-07, "loss": 0.0564, "step": 50994 }, { "epoch": 0.9030978407646978, "grad_norm": 0.5503144860267639, "learning_rate": 7.325047805127149e-07, "loss": 0.054, "step": 50995 }, { "epoch": 0.9031155503017262, "grad_norm": 0.3812304437160492, "learning_rate": 7.322392277024104e-07, "loss": 0.0241, "step": 50996 }, { "epoch": 0.9031332598387547, "grad_norm": 0.6673949360847473, "learning_rate": 7.319737218315964e-07, "loss": 0.0618, "step": 50997 }, { "epoch": 0.9031509693757831, "grad_norm": 0.8740074038505554, "learning_rate": 7.317082629011457e-07, "loss": 0.051, "step": 50998 }, { "epoch": 0.9031686789128115, "grad_norm": 0.49503016471862793, "learning_rate": 7.314428509119325e-07, "loss": 0.0338, "step": 50999 }, { "epoch": 0.90318638844984, "grad_norm": 0.5783982872962952, "learning_rate": 7.311774858648296e-07, "loss": 0.0542, "step": 51000 }, { "epoch": 0.9032040979868684, "grad_norm": 0.6647672653198242, "learning_rate": 7.309121677607094e-07, "loss": 0.0649, "step": 51001 }, { "epoch": 0.9032218075238968, "grad_norm": 0.2989075779914856, "learning_rate": 7.306468966004431e-07, "loss": 0.0356, "step": 51002 }, { "epoch": 0.9032395170609252, "grad_norm": 1.0023293495178223, "learning_rate": 7.303816723849083e-07, "loss": 0.0486, "step": 51003 }, { "epoch": 0.9032572265979537, "grad_norm": 0.48860102891921997, "learning_rate": 7.301164951149725e-07, "loss": 0.0812, "step": 51004 }, { "epoch": 0.9032749361349821, "grad_norm": 0.4823567271232605, "learning_rate": 7.2985136479151e-07, "loss": 0.0416, "step": 51005 }, { "epoch": 0.9032926456720105, "grad_norm": 0.28068143129348755, "learning_rate": 7.29586281415392e-07, "loss": 0.0284, "step": 51006 }, { "epoch": 0.9033103552090389, "grad_norm": 0.6853114366531372, "learning_rate": 7.293212449874942e-07, "loss": 0.0674, "step": 51007 }, { "epoch": 0.9033280647460674, "grad_norm": 0.49964919686317444, "learning_rate": 7.290562555086843e-07, "loss": 0.0583, "step": 51008 }, { "epoch": 0.9033457742830958, "grad_norm": 0.5484964847564697, "learning_rate": 7.287913129798369e-07, "loss": 0.0828, "step": 51009 }, { "epoch": 0.9033634838201242, "grad_norm": 0.23216506838798523, "learning_rate": 7.285264174018242e-07, "loss": 0.0339, "step": 51010 }, { "epoch": 0.9033811933571526, "grad_norm": 0.44924700260162354, "learning_rate": 7.282615687755139e-07, "loss": 0.0304, "step": 51011 }, { "epoch": 0.9033989028941811, "grad_norm": 0.5113515257835388, "learning_rate": 7.279967671017789e-07, "loss": 0.0424, "step": 51012 }, { "epoch": 0.9034166124312095, "grad_norm": 0.3952217996120453, "learning_rate": 7.277320123814918e-07, "loss": 0.0511, "step": 51013 }, { "epoch": 0.9034343219682379, "grad_norm": 0.37847617268562317, "learning_rate": 7.274673046155234e-07, "loss": 0.0309, "step": 51014 }, { "epoch": 0.9034520315052664, "grad_norm": 0.6045994758605957, "learning_rate": 7.27202643804743e-07, "loss": 0.045, "step": 51015 }, { "epoch": 0.9034697410422948, "grad_norm": 0.4605978727340698, "learning_rate": 7.269380299500233e-07, "loss": 0.0625, "step": 51016 }, { "epoch": 0.9034874505793232, "grad_norm": 0.5018709897994995, "learning_rate": 7.266734630522354e-07, "loss": 0.0269, "step": 51017 }, { "epoch": 0.9035051601163516, "grad_norm": 0.8515301942825317, "learning_rate": 7.264089431122451e-07, "loss": 0.0806, "step": 51018 }, { "epoch": 0.9035228696533801, "grad_norm": 0.5485947132110596, "learning_rate": 7.261444701309267e-07, "loss": 0.0579, "step": 51019 }, { "epoch": 0.9035405791904085, "grad_norm": 0.4237704277038574, "learning_rate": 7.258800441091496e-07, "loss": 0.0473, "step": 51020 }, { "epoch": 0.9035582887274369, "grad_norm": 0.5431010723114014, "learning_rate": 7.256156650477847e-07, "loss": 0.0499, "step": 51021 }, { "epoch": 0.9035759982644653, "grad_norm": 0.31325483322143555, "learning_rate": 7.253513329476997e-07, "loss": 0.0531, "step": 51022 }, { "epoch": 0.9035937078014938, "grad_norm": 0.48297086358070374, "learning_rate": 7.250870478097638e-07, "loss": 0.06, "step": 51023 }, { "epoch": 0.9036114173385222, "grad_norm": 0.4615388512611389, "learning_rate": 7.24822809634848e-07, "loss": 0.0642, "step": 51024 }, { "epoch": 0.9036291268755506, "grad_norm": 0.4256957173347473, "learning_rate": 7.245586184238218e-07, "loss": 0.039, "step": 51025 }, { "epoch": 0.903646836412579, "grad_norm": 0.8561593890190125, "learning_rate": 7.242944741775526e-07, "loss": 0.0623, "step": 51026 }, { "epoch": 0.9036645459496075, "grad_norm": 0.4480453133583069, "learning_rate": 7.240303768969114e-07, "loss": 0.051, "step": 51027 }, { "epoch": 0.9036822554866359, "grad_norm": 0.4733453392982483, "learning_rate": 7.237663265827677e-07, "loss": 0.0303, "step": 51028 }, { "epoch": 0.9036999650236643, "grad_norm": 0.5996343493461609, "learning_rate": 7.235023232359889e-07, "loss": 0.0404, "step": 51029 }, { "epoch": 0.9037176745606929, "grad_norm": 0.5936930179595947, "learning_rate": 7.23238366857441e-07, "loss": 0.045, "step": 51030 }, { "epoch": 0.9037353840977213, "grad_norm": 0.8284489512443542, "learning_rate": 7.229744574479969e-07, "loss": 0.0471, "step": 51031 }, { "epoch": 0.9037530936347496, "grad_norm": 0.506556510925293, "learning_rate": 7.227105950085239e-07, "loss": 0.0685, "step": 51032 }, { "epoch": 0.903770803171778, "grad_norm": 0.6022635698318481, "learning_rate": 7.224467795398864e-07, "loss": 0.038, "step": 51033 }, { "epoch": 0.9037885127088066, "grad_norm": 0.6867653727531433, "learning_rate": 7.221830110429556e-07, "loss": 0.0469, "step": 51034 }, { "epoch": 0.903806222245835, "grad_norm": 0.9046152830123901, "learning_rate": 7.219192895185989e-07, "loss": 0.0748, "step": 51035 }, { "epoch": 0.9038239317828634, "grad_norm": 0.7942625880241394, "learning_rate": 7.216556149676822e-07, "loss": 0.0839, "step": 51036 }, { "epoch": 0.9038416413198918, "grad_norm": 0.5543071627616882, "learning_rate": 7.213919873910751e-07, "loss": 0.0383, "step": 51037 }, { "epoch": 0.9038593508569203, "grad_norm": 0.5207470059394836, "learning_rate": 7.211284067896451e-07, "loss": 0.0374, "step": 51038 }, { "epoch": 0.9038770603939487, "grad_norm": 0.751042902469635, "learning_rate": 7.208648731642597e-07, "loss": 0.0575, "step": 51039 }, { "epoch": 0.9038947699309771, "grad_norm": 0.7327736020088196, "learning_rate": 7.206013865157817e-07, "loss": 0.0524, "step": 51040 }, { "epoch": 0.9039124794680055, "grad_norm": 0.7662848234176636, "learning_rate": 7.203379468450821e-07, "loss": 0.0506, "step": 51041 }, { "epoch": 0.903930189005034, "grad_norm": 0.5412546396255493, "learning_rate": 7.200745541530285e-07, "loss": 0.0474, "step": 51042 }, { "epoch": 0.9039478985420624, "grad_norm": 0.44355762004852295, "learning_rate": 7.198112084404801e-07, "loss": 0.0461, "step": 51043 }, { "epoch": 0.9039656080790908, "grad_norm": 0.44183918833732605, "learning_rate": 7.195479097083113e-07, "loss": 0.053, "step": 51044 }, { "epoch": 0.9039833176161193, "grad_norm": 0.7024041414260864, "learning_rate": 7.192846579573847e-07, "loss": 0.0516, "step": 51045 }, { "epoch": 0.9040010271531477, "grad_norm": 0.433627724647522, "learning_rate": 7.190214531885697e-07, "loss": 0.0606, "step": 51046 }, { "epoch": 0.9040187366901761, "grad_norm": 0.25856131315231323, "learning_rate": 7.187582954027288e-07, "loss": 0.062, "step": 51047 }, { "epoch": 0.9040364462272045, "grad_norm": 0.5335552096366882, "learning_rate": 7.184951846007281e-07, "loss": 0.0796, "step": 51048 }, { "epoch": 0.904054155764233, "grad_norm": 0.4333736002445221, "learning_rate": 7.182321207834353e-07, "loss": 0.0452, "step": 51049 }, { "epoch": 0.9040718653012614, "grad_norm": 0.698992133140564, "learning_rate": 7.179691039517111e-07, "loss": 0.0602, "step": 51050 }, { "epoch": 0.9040895748382898, "grad_norm": 0.8509681224822998, "learning_rate": 7.177061341064251e-07, "loss": 0.0447, "step": 51051 }, { "epoch": 0.9041072843753182, "grad_norm": 0.6449438333511353, "learning_rate": 7.174432112484414e-07, "loss": 0.0709, "step": 51052 }, { "epoch": 0.9041249939123467, "grad_norm": 0.7726522088050842, "learning_rate": 7.171803353786261e-07, "loss": 0.043, "step": 51053 }, { "epoch": 0.9041427034493751, "grad_norm": 0.4701232314109802, "learning_rate": 7.169175064978418e-07, "loss": 0.059, "step": 51054 }, { "epoch": 0.9041604129864035, "grad_norm": 0.6080048084259033, "learning_rate": 7.166547246069543e-07, "loss": 0.0575, "step": 51055 }, { "epoch": 0.9041781225234319, "grad_norm": 0.49536633491516113, "learning_rate": 7.163919897068299e-07, "loss": 0.0499, "step": 51056 }, { "epoch": 0.9041958320604604, "grad_norm": 0.7399088740348816, "learning_rate": 7.161293017983295e-07, "loss": 0.0582, "step": 51057 }, { "epoch": 0.9042135415974888, "grad_norm": 0.3719348609447479, "learning_rate": 7.158666608823189e-07, "loss": 0.07, "step": 51058 }, { "epoch": 0.9042312511345172, "grad_norm": 0.18781109154224396, "learning_rate": 7.156040669596608e-07, "loss": 0.0415, "step": 51059 }, { "epoch": 0.9042489606715457, "grad_norm": 0.6252744197845459, "learning_rate": 7.153415200312247e-07, "loss": 0.0565, "step": 51060 }, { "epoch": 0.9042666702085741, "grad_norm": 0.5036104321479797, "learning_rate": 7.150790200978663e-07, "loss": 0.0566, "step": 51061 }, { "epoch": 0.9042843797456025, "grad_norm": 0.6561504602432251, "learning_rate": 7.14816567160455e-07, "loss": 0.0628, "step": 51062 }, { "epoch": 0.9043020892826309, "grad_norm": 0.49770525097846985, "learning_rate": 7.145541612198519e-07, "loss": 0.0218, "step": 51063 }, { "epoch": 0.9043197988196594, "grad_norm": 0.8182273507118225, "learning_rate": 7.142918022769196e-07, "loss": 0.0581, "step": 51064 }, { "epoch": 0.9043375083566878, "grad_norm": 0.6238003969192505, "learning_rate": 7.140294903325223e-07, "loss": 0.0336, "step": 51065 }, { "epoch": 0.9043552178937162, "grad_norm": 0.5397003293037415, "learning_rate": 7.137672253875244e-07, "loss": 0.04, "step": 51066 }, { "epoch": 0.9043729274307446, "grad_norm": 0.8203269839286804, "learning_rate": 7.135050074427885e-07, "loss": 0.0646, "step": 51067 }, { "epoch": 0.9043906369677731, "grad_norm": 0.7474757432937622, "learning_rate": 7.132428364991738e-07, "loss": 0.0666, "step": 51068 }, { "epoch": 0.9044083465048015, "grad_norm": 0.530662477016449, "learning_rate": 7.129807125575466e-07, "loss": 0.0757, "step": 51069 }, { "epoch": 0.9044260560418299, "grad_norm": 0.5527594685554504, "learning_rate": 7.127186356187676e-07, "loss": 0.0404, "step": 51070 }, { "epoch": 0.9044437655788583, "grad_norm": 0.8980345726013184, "learning_rate": 7.124566056836996e-07, "loss": 0.0569, "step": 51071 }, { "epoch": 0.9044614751158868, "grad_norm": 0.47412577271461487, "learning_rate": 7.121946227532017e-07, "loss": 0.0621, "step": 51072 }, { "epoch": 0.9044791846529152, "grad_norm": 0.5851590633392334, "learning_rate": 7.119326868281401e-07, "loss": 0.0596, "step": 51073 }, { "epoch": 0.9044968941899436, "grad_norm": 0.4547017812728882, "learning_rate": 7.116707979093756e-07, "loss": 0.0462, "step": 51074 }, { "epoch": 0.9045146037269721, "grad_norm": 0.5588226318359375, "learning_rate": 7.114089559977677e-07, "loss": 0.0701, "step": 51075 }, { "epoch": 0.9045323132640005, "grad_norm": 0.6130495071411133, "learning_rate": 7.111471610941806e-07, "loss": 0.0321, "step": 51076 }, { "epoch": 0.9045500228010289, "grad_norm": 0.5336920022964478, "learning_rate": 7.108854131994719e-07, "loss": 0.0797, "step": 51077 }, { "epoch": 0.9045677323380573, "grad_norm": 0.6780487895011902, "learning_rate": 7.106237123145076e-07, "loss": 0.0289, "step": 51078 }, { "epoch": 0.9045854418750858, "grad_norm": 0.4853527247905731, "learning_rate": 7.103620584401454e-07, "loss": 0.0274, "step": 51079 }, { "epoch": 0.9046031514121142, "grad_norm": 0.33395981788635254, "learning_rate": 7.101004515772447e-07, "loss": 0.0272, "step": 51080 }, { "epoch": 0.9046208609491426, "grad_norm": 0.8208884000778198, "learning_rate": 7.098388917266696e-07, "loss": 0.0565, "step": 51081 }, { "epoch": 0.904638570486171, "grad_norm": 0.7057476043701172, "learning_rate": 7.095773788892795e-07, "loss": 0.0583, "step": 51082 }, { "epoch": 0.9046562800231995, "grad_norm": 0.5519248247146606, "learning_rate": 7.093159130659338e-07, "loss": 0.0434, "step": 51083 }, { "epoch": 0.9046739895602279, "grad_norm": 0.7234947085380554, "learning_rate": 7.090544942574917e-07, "loss": 0.0632, "step": 51084 }, { "epoch": 0.9046916990972563, "grad_norm": 0.5844254493713379, "learning_rate": 7.087931224648192e-07, "loss": 0.0546, "step": 51085 }, { "epoch": 0.9047094086342847, "grad_norm": 0.36229395866394043, "learning_rate": 7.08531797688769e-07, "loss": 0.0319, "step": 51086 }, { "epoch": 0.9047271181713132, "grad_norm": 0.45151498913764954, "learning_rate": 7.082705199302036e-07, "loss": 0.0404, "step": 51087 }, { "epoch": 0.9047448277083416, "grad_norm": 0.5795556306838989, "learning_rate": 7.080092891899842e-07, "loss": 0.0437, "step": 51088 }, { "epoch": 0.90476253724537, "grad_norm": 0.5800718069076538, "learning_rate": 7.077481054689683e-07, "loss": 0.0485, "step": 51089 }, { "epoch": 0.9047802467823985, "grad_norm": 0.5148393511772156, "learning_rate": 7.074869687680135e-07, "loss": 0.0704, "step": 51090 }, { "epoch": 0.9047979563194269, "grad_norm": 0.7159242033958435, "learning_rate": 7.072258790879826e-07, "loss": 0.0491, "step": 51091 }, { "epoch": 0.9048156658564553, "grad_norm": 0.5613079071044922, "learning_rate": 7.069648364297316e-07, "loss": 0.05, "step": 51092 }, { "epoch": 0.9048333753934837, "grad_norm": 0.6693741083145142, "learning_rate": 7.067038407941228e-07, "loss": 0.0562, "step": 51093 }, { "epoch": 0.9048510849305123, "grad_norm": 0.33999255299568176, "learning_rate": 7.064428921820109e-07, "loss": 0.0361, "step": 51094 }, { "epoch": 0.9048687944675406, "grad_norm": 0.36066189408302307, "learning_rate": 7.061819905942585e-07, "loss": 0.046, "step": 51095 }, { "epoch": 0.904886504004569, "grad_norm": 0.31324130296707153, "learning_rate": 7.059211360317197e-07, "loss": 0.0765, "step": 51096 }, { "epoch": 0.9049042135415974, "grad_norm": 0.5062476992607117, "learning_rate": 7.05660328495254e-07, "loss": 0.0461, "step": 51097 }, { "epoch": 0.904921923078626, "grad_norm": 0.3953579366207123, "learning_rate": 7.053995679857206e-07, "loss": 0.0443, "step": 51098 }, { "epoch": 0.9049396326156544, "grad_norm": 0.5571668744087219, "learning_rate": 7.051388545039788e-07, "loss": 0.0451, "step": 51099 }, { "epoch": 0.9049573421526828, "grad_norm": 0.6008314490318298, "learning_rate": 7.048781880508832e-07, "loss": 0.0488, "step": 51100 }, { "epoch": 0.9049750516897112, "grad_norm": 0.8037233352661133, "learning_rate": 7.04617568627291e-07, "loss": 0.0549, "step": 51101 }, { "epoch": 0.9049927612267397, "grad_norm": 0.2848840057849884, "learning_rate": 7.043569962340601e-07, "loss": 0.0726, "step": 51102 }, { "epoch": 0.9050104707637681, "grad_norm": 0.31165850162506104, "learning_rate": 7.040964708720515e-07, "loss": 0.0389, "step": 51103 }, { "epoch": 0.9050281803007965, "grad_norm": 0.6478406190872192, "learning_rate": 7.038359925421195e-07, "loss": 0.0593, "step": 51104 }, { "epoch": 0.905045889837825, "grad_norm": 0.5241569876670837, "learning_rate": 7.0357556124512e-07, "loss": 0.0531, "step": 51105 }, { "epoch": 0.9050635993748534, "grad_norm": 0.7451167106628418, "learning_rate": 7.033151769819124e-07, "loss": 0.0625, "step": 51106 }, { "epoch": 0.9050813089118818, "grad_norm": 0.6330872774124146, "learning_rate": 7.03054839753351e-07, "loss": 0.0553, "step": 51107 }, { "epoch": 0.9050990184489102, "grad_norm": 0.5668131709098816, "learning_rate": 7.027945495602916e-07, "loss": 0.0384, "step": 51108 }, { "epoch": 0.9051167279859387, "grad_norm": 0.5152901411056519, "learning_rate": 7.025343064035938e-07, "loss": 0.0734, "step": 51109 }, { "epoch": 0.9051344375229671, "grad_norm": 0.5510295033454895, "learning_rate": 7.022741102841101e-07, "loss": 0.0464, "step": 51110 }, { "epoch": 0.9051521470599955, "grad_norm": 0.21371208131313324, "learning_rate": 7.020139612026999e-07, "loss": 0.0318, "step": 51111 }, { "epoch": 0.9051698565970239, "grad_norm": 0.9929137825965881, "learning_rate": 7.017538591602174e-07, "loss": 0.0755, "step": 51112 }, { "epoch": 0.9051875661340524, "grad_norm": 0.919909656047821, "learning_rate": 7.014938041575203e-07, "loss": 0.0672, "step": 51113 }, { "epoch": 0.9052052756710808, "grad_norm": 0.3758433759212494, "learning_rate": 7.012337961954613e-07, "loss": 0.0231, "step": 51114 }, { "epoch": 0.9052229852081092, "grad_norm": 0.3257284462451935, "learning_rate": 7.009738352748962e-07, "loss": 0.0589, "step": 51115 }, { "epoch": 0.9052406947451376, "grad_norm": 0.6005520820617676, "learning_rate": 7.007139213966795e-07, "loss": 0.0442, "step": 51116 }, { "epoch": 0.9052584042821661, "grad_norm": 0.638634204864502, "learning_rate": 7.004540545616722e-07, "loss": 0.0647, "step": 51117 }, { "epoch": 0.9052761138191945, "grad_norm": 0.4910190999507904, "learning_rate": 7.001942347707202e-07, "loss": 0.0722, "step": 51118 }, { "epoch": 0.9052938233562229, "grad_norm": 0.4702984392642975, "learning_rate": 6.999344620246845e-07, "loss": 0.0275, "step": 51119 }, { "epoch": 0.9053115328932514, "grad_norm": 0.6131471395492554, "learning_rate": 6.996747363244177e-07, "loss": 0.0566, "step": 51120 }, { "epoch": 0.9053292424302798, "grad_norm": 0.45568743348121643, "learning_rate": 6.994150576707759e-07, "loss": 0.0668, "step": 51121 }, { "epoch": 0.9053469519673082, "grad_norm": 0.576373279094696, "learning_rate": 6.991554260646099e-07, "loss": 0.0473, "step": 51122 }, { "epoch": 0.9053646615043366, "grad_norm": 0.4313453733921051, "learning_rate": 6.988958415067775e-07, "loss": 0.0629, "step": 51123 }, { "epoch": 0.9053823710413651, "grad_norm": 0.3490893244743347, "learning_rate": 6.986363039981331e-07, "loss": 0.0684, "step": 51124 }, { "epoch": 0.9054000805783935, "grad_norm": 0.48199206590652466, "learning_rate": 6.983768135395258e-07, "loss": 0.069, "step": 51125 }, { "epoch": 0.9054177901154219, "grad_norm": 0.60325688123703, "learning_rate": 6.981173701318133e-07, "loss": 0.0321, "step": 51126 }, { "epoch": 0.9054354996524503, "grad_norm": 0.3425278067588806, "learning_rate": 6.9785797377585e-07, "loss": 0.0375, "step": 51127 }, { "epoch": 0.9054532091894788, "grad_norm": 0.23213668167591095, "learning_rate": 6.975986244724852e-07, "loss": 0.0253, "step": 51128 }, { "epoch": 0.9054709187265072, "grad_norm": 0.3743344843387604, "learning_rate": 6.97339322222575e-07, "loss": 0.0581, "step": 51129 }, { "epoch": 0.9054886282635356, "grad_norm": 0.5777955055236816, "learning_rate": 6.9708006702697e-07, "loss": 0.0494, "step": 51130 }, { "epoch": 0.905506337800564, "grad_norm": 0.5461886525154114, "learning_rate": 6.968208588865266e-07, "loss": 0.0483, "step": 51131 }, { "epoch": 0.9055240473375925, "grad_norm": 0.6577366590499878, "learning_rate": 6.965616978020955e-07, "loss": 0.0533, "step": 51132 }, { "epoch": 0.9055417568746209, "grad_norm": 0.9226890206336975, "learning_rate": 6.963025837745296e-07, "loss": 0.0673, "step": 51133 }, { "epoch": 0.9055594664116493, "grad_norm": 0.5900266170501709, "learning_rate": 6.960435168046814e-07, "loss": 0.0576, "step": 51134 }, { "epoch": 0.9055771759486778, "grad_norm": 0.6617493629455566, "learning_rate": 6.957844968934052e-07, "loss": 0.0752, "step": 51135 }, { "epoch": 0.9055948854857062, "grad_norm": 0.5377382040023804, "learning_rate": 6.955255240415487e-07, "loss": 0.0514, "step": 51136 }, { "epoch": 0.9056125950227346, "grad_norm": 0.3809633255004883, "learning_rate": 6.952665982499679e-07, "loss": 0.054, "step": 51137 }, { "epoch": 0.905630304559763, "grad_norm": 0.548707902431488, "learning_rate": 6.950077195195137e-07, "loss": 0.0296, "step": 51138 }, { "epoch": 0.9056480140967915, "grad_norm": 0.2897644639015198, "learning_rate": 6.947488878510339e-07, "loss": 0.0359, "step": 51139 }, { "epoch": 0.9056657236338199, "grad_norm": 0.7564823031425476, "learning_rate": 6.944901032453859e-07, "loss": 0.048, "step": 51140 }, { "epoch": 0.9056834331708483, "grad_norm": 0.39555492997169495, "learning_rate": 6.942313657034177e-07, "loss": 0.0347, "step": 51141 }, { "epoch": 0.9057011427078767, "grad_norm": 0.6654629707336426, "learning_rate": 6.939726752259834e-07, "loss": 0.0538, "step": 51142 }, { "epoch": 0.9057188522449052, "grad_norm": 0.33139535784721375, "learning_rate": 6.937140318139307e-07, "loss": 0.043, "step": 51143 }, { "epoch": 0.9057365617819336, "grad_norm": 0.4868530035018921, "learning_rate": 6.934554354681122e-07, "loss": 0.0791, "step": 51144 }, { "epoch": 0.905754271318962, "grad_norm": 0.2314612716436386, "learning_rate": 6.931968861893789e-07, "loss": 0.0689, "step": 51145 }, { "epoch": 0.9057719808559904, "grad_norm": 0.5708388686180115, "learning_rate": 6.929383839785785e-07, "loss": 0.0365, "step": 51146 }, { "epoch": 0.9057896903930189, "grad_norm": 0.45843297243118286, "learning_rate": 6.926799288365654e-07, "loss": 0.0614, "step": 51147 }, { "epoch": 0.9058073999300473, "grad_norm": 0.4494975209236145, "learning_rate": 6.92421520764187e-07, "loss": 0.0472, "step": 51148 }, { "epoch": 0.9058251094670757, "grad_norm": 0.4301082491874695, "learning_rate": 6.921631597622963e-07, "loss": 0.0463, "step": 51149 }, { "epoch": 0.9058428190041042, "grad_norm": 0.8401603698730469, "learning_rate": 6.919048458317406e-07, "loss": 0.0585, "step": 51150 }, { "epoch": 0.9058605285411326, "grad_norm": 0.7143114805221558, "learning_rate": 6.916465789733695e-07, "loss": 0.0465, "step": 51151 }, { "epoch": 0.905878238078161, "grad_norm": 0.7867962718009949, "learning_rate": 6.913883591880372e-07, "loss": 0.0612, "step": 51152 }, { "epoch": 0.9058959476151894, "grad_norm": 0.38613179326057434, "learning_rate": 6.911301864765879e-07, "loss": 0.0477, "step": 51153 }, { "epoch": 0.9059136571522179, "grad_norm": 0.4245016574859619, "learning_rate": 6.908720608398728e-07, "loss": 0.0615, "step": 51154 }, { "epoch": 0.9059313666892463, "grad_norm": 0.6162745356559753, "learning_rate": 6.906139822787428e-07, "loss": 0.0665, "step": 51155 }, { "epoch": 0.9059490762262747, "grad_norm": 0.6013815999031067, "learning_rate": 6.903559507940455e-07, "loss": 0.0389, "step": 51156 }, { "epoch": 0.9059667857633031, "grad_norm": 0.5472996234893799, "learning_rate": 6.900979663866286e-07, "loss": 0.0607, "step": 51157 }, { "epoch": 0.9059844953003316, "grad_norm": 0.3888282775878906, "learning_rate": 6.898400290573415e-07, "loss": 0.0325, "step": 51158 }, { "epoch": 0.90600220483736, "grad_norm": 0.6375094056129456, "learning_rate": 6.89582138807035e-07, "loss": 0.0494, "step": 51159 }, { "epoch": 0.9060199143743884, "grad_norm": 0.7620680332183838, "learning_rate": 6.893242956365537e-07, "loss": 0.0569, "step": 51160 }, { "epoch": 0.9060376239114168, "grad_norm": 0.454166442155838, "learning_rate": 6.890664995467499e-07, "loss": 0.0536, "step": 51161 }, { "epoch": 0.9060553334484454, "grad_norm": 0.5436238050460815, "learning_rate": 6.888087505384682e-07, "loss": 0.0576, "step": 51162 }, { "epoch": 0.9060730429854738, "grad_norm": 0.24809953570365906, "learning_rate": 6.885510486125613e-07, "loss": 0.0546, "step": 51163 }, { "epoch": 0.9060907525225022, "grad_norm": 0.648802638053894, "learning_rate": 6.882933937698715e-07, "loss": 0.0499, "step": 51164 }, { "epoch": 0.9061084620595307, "grad_norm": 0.4770813584327698, "learning_rate": 6.880357860112485e-07, "loss": 0.0456, "step": 51165 }, { "epoch": 0.9061261715965591, "grad_norm": 0.5847816467285156, "learning_rate": 6.877782253375431e-07, "loss": 0.0447, "step": 51166 }, { "epoch": 0.9061438811335875, "grad_norm": 0.859538733959198, "learning_rate": 6.875207117495979e-07, "loss": 0.0612, "step": 51167 }, { "epoch": 0.9061615906706159, "grad_norm": 0.4384443759918213, "learning_rate": 6.872632452482592e-07, "loss": 0.0498, "step": 51168 }, { "epoch": 0.9061793002076444, "grad_norm": 0.703521728515625, "learning_rate": 6.870058258343792e-07, "loss": 0.0493, "step": 51169 }, { "epoch": 0.9061970097446728, "grad_norm": 0.5381247997283936, "learning_rate": 6.867484535088043e-07, "loss": 0.0577, "step": 51170 }, { "epoch": 0.9062147192817012, "grad_norm": 0.5529824495315552, "learning_rate": 6.864911282723768e-07, "loss": 0.0629, "step": 51171 }, { "epoch": 0.9062324288187296, "grad_norm": 0.6552348136901855, "learning_rate": 6.862338501259463e-07, "loss": 0.0585, "step": 51172 }, { "epoch": 0.9062501383557581, "grad_norm": 0.5976102352142334, "learning_rate": 6.859766190703571e-07, "loss": 0.0382, "step": 51173 }, { "epoch": 0.9062678478927865, "grad_norm": 0.5865016579627991, "learning_rate": 6.857194351064616e-07, "loss": 0.0751, "step": 51174 }, { "epoch": 0.9062855574298149, "grad_norm": 0.5397116541862488, "learning_rate": 6.854622982350978e-07, "loss": 0.0389, "step": 51175 }, { "epoch": 0.9063032669668433, "grad_norm": 0.6997097730636597, "learning_rate": 6.852052084571147e-07, "loss": 0.0554, "step": 51176 }, { "epoch": 0.9063209765038718, "grad_norm": 0.7633331418037415, "learning_rate": 6.849481657733603e-07, "loss": 0.0475, "step": 51177 }, { "epoch": 0.9063386860409002, "grad_norm": 0.6215072870254517, "learning_rate": 6.846911701846769e-07, "loss": 0.0574, "step": 51178 }, { "epoch": 0.9063563955779286, "grad_norm": 0.3620312511920929, "learning_rate": 6.844342216919125e-07, "loss": 0.0463, "step": 51179 }, { "epoch": 0.9063741051149571, "grad_norm": 0.8067200779914856, "learning_rate": 6.841773202959128e-07, "loss": 0.0772, "step": 51180 }, { "epoch": 0.9063918146519855, "grad_norm": 0.6743886470794678, "learning_rate": 6.839204659975224e-07, "loss": 0.0443, "step": 51181 }, { "epoch": 0.9064095241890139, "grad_norm": 0.5969347357749939, "learning_rate": 6.836636587975836e-07, "loss": 0.0415, "step": 51182 }, { "epoch": 0.9064272337260423, "grad_norm": 0.4958464503288269, "learning_rate": 6.834068986969428e-07, "loss": 0.0544, "step": 51183 }, { "epoch": 0.9064449432630708, "grad_norm": 0.6738428473472595, "learning_rate": 6.831501856964489e-07, "loss": 0.0692, "step": 51184 }, { "epoch": 0.9064626528000992, "grad_norm": 0.6016605496406555, "learning_rate": 6.828935197969399e-07, "loss": 0.0803, "step": 51185 }, { "epoch": 0.9064803623371276, "grad_norm": 0.6346694827079773, "learning_rate": 6.826369009992634e-07, "loss": 0.0583, "step": 51186 }, { "epoch": 0.906498071874156, "grad_norm": 0.5331048369407654, "learning_rate": 6.823803293042619e-07, "loss": 0.024, "step": 51187 }, { "epoch": 0.9065157814111845, "grad_norm": 0.4709654450416565, "learning_rate": 6.821238047127832e-07, "loss": 0.0532, "step": 51188 }, { "epoch": 0.9065334909482129, "grad_norm": 0.3355453312397003, "learning_rate": 6.818673272256682e-07, "loss": 0.0343, "step": 51189 }, { "epoch": 0.9065512004852413, "grad_norm": 0.9877573251724243, "learning_rate": 6.816108968437612e-07, "loss": 0.0521, "step": 51190 }, { "epoch": 0.9065689100222697, "grad_norm": 0.4744241535663605, "learning_rate": 6.813545135679084e-07, "loss": 0.0488, "step": 51191 }, { "epoch": 0.9065866195592982, "grad_norm": 0.4878239929676056, "learning_rate": 6.810981773989489e-07, "loss": 0.043, "step": 51192 }, { "epoch": 0.9066043290963266, "grad_norm": 0.5537825226783752, "learning_rate": 6.808418883377271e-07, "loss": 0.0522, "step": 51193 }, { "epoch": 0.906622038633355, "grad_norm": 0.5828805565834045, "learning_rate": 6.80585646385089e-07, "loss": 0.0544, "step": 51194 }, { "epoch": 0.9066397481703835, "grad_norm": 0.22021983563899994, "learning_rate": 6.803294515418756e-07, "loss": 0.0648, "step": 51195 }, { "epoch": 0.9066574577074119, "grad_norm": 1.019726276397705, "learning_rate": 6.800733038089296e-07, "loss": 0.0839, "step": 51196 }, { "epoch": 0.9066751672444403, "grad_norm": 0.8057852983474731, "learning_rate": 6.79817203187092e-07, "loss": 0.0511, "step": 51197 }, { "epoch": 0.9066928767814687, "grad_norm": 0.29157915711402893, "learning_rate": 6.79561149677212e-07, "loss": 0.051, "step": 51198 }, { "epoch": 0.9067105863184972, "grad_norm": 0.4150295853614807, "learning_rate": 6.79305143280124e-07, "loss": 0.0497, "step": 51199 }, { "epoch": 0.9067282958555256, "grad_norm": 0.5420644283294678, "learning_rate": 6.790491839966739e-07, "loss": 0.0478, "step": 51200 }, { "epoch": 0.906746005392554, "grad_norm": 0.446347177028656, "learning_rate": 6.787932718277045e-07, "loss": 0.0547, "step": 51201 }, { "epoch": 0.9067637149295824, "grad_norm": 0.9147871136665344, "learning_rate": 6.785374067740585e-07, "loss": 0.0708, "step": 51202 }, { "epoch": 0.9067814244666109, "grad_norm": 0.5580271482467651, "learning_rate": 6.782815888365734e-07, "loss": 0.0593, "step": 51203 }, { "epoch": 0.9067991340036393, "grad_norm": 0.4279964864253998, "learning_rate": 6.780258180160937e-07, "loss": 0.0413, "step": 51204 }, { "epoch": 0.9068168435406677, "grad_norm": 0.6518832445144653, "learning_rate": 6.777700943134584e-07, "loss": 0.0582, "step": 51205 }, { "epoch": 0.9068345530776961, "grad_norm": 0.7732592821121216, "learning_rate": 6.77514417729514e-07, "loss": 0.0483, "step": 51206 }, { "epoch": 0.9068522626147246, "grad_norm": 0.688204824924469, "learning_rate": 6.77258788265096e-07, "loss": 0.062, "step": 51207 }, { "epoch": 0.906869972151753, "grad_norm": 0.6416891813278198, "learning_rate": 6.77003205921049e-07, "loss": 0.076, "step": 51208 }, { "epoch": 0.9068876816887814, "grad_norm": 0.6519184708595276, "learning_rate": 6.767476706982157e-07, "loss": 0.036, "step": 51209 }, { "epoch": 0.9069053912258099, "grad_norm": 0.7421441674232483, "learning_rate": 6.764921825974301e-07, "loss": 0.0639, "step": 51210 }, { "epoch": 0.9069231007628383, "grad_norm": 0.5988540053367615, "learning_rate": 6.762367416195386e-07, "loss": 0.0323, "step": 51211 }, { "epoch": 0.9069408102998667, "grad_norm": 1.0012714862823486, "learning_rate": 6.759813477653787e-07, "loss": 0.0653, "step": 51212 }, { "epoch": 0.9069585198368951, "grad_norm": 0.6351596713066101, "learning_rate": 6.75726001035793e-07, "loss": 0.0459, "step": 51213 }, { "epoch": 0.9069762293739236, "grad_norm": 0.19556090235710144, "learning_rate": 6.754707014316174e-07, "loss": 0.0418, "step": 51214 }, { "epoch": 0.906993938910952, "grad_norm": 0.511615514755249, "learning_rate": 6.752154489536949e-07, "loss": 0.0291, "step": 51215 }, { "epoch": 0.9070116484479804, "grad_norm": 0.6438618898391724, "learning_rate": 6.749602436028646e-07, "loss": 0.0568, "step": 51216 }, { "epoch": 0.9070293579850088, "grad_norm": 0.6199107766151428, "learning_rate": 6.747050853799657e-07, "loss": 0.0586, "step": 51217 }, { "epoch": 0.9070470675220373, "grad_norm": 0.8596423864364624, "learning_rate": 6.744499742858395e-07, "loss": 0.0809, "step": 51218 }, { "epoch": 0.9070647770590657, "grad_norm": 0.4252781569957733, "learning_rate": 6.741949103213218e-07, "loss": 0.0515, "step": 51219 }, { "epoch": 0.9070824865960941, "grad_norm": 0.6218491792678833, "learning_rate": 6.739398934872587e-07, "loss": 0.0464, "step": 51220 }, { "epoch": 0.9071001961331225, "grad_norm": 0.6123531460762024, "learning_rate": 6.736849237844794e-07, "loss": 0.0735, "step": 51221 }, { "epoch": 0.907117905670151, "grad_norm": 0.700045108795166, "learning_rate": 6.7343000121383e-07, "loss": 0.0501, "step": 51222 }, { "epoch": 0.9071356152071794, "grad_norm": 0.5059463381767273, "learning_rate": 6.731751257761482e-07, "loss": 0.0475, "step": 51223 }, { "epoch": 0.9071533247442078, "grad_norm": 0.6919488906860352, "learning_rate": 6.729202974722682e-07, "loss": 0.0522, "step": 51224 }, { "epoch": 0.9071710342812364, "grad_norm": 0.8355405330657959, "learning_rate": 6.726655163030326e-07, "loss": 0.0798, "step": 51225 }, { "epoch": 0.9071887438182648, "grad_norm": 0.7306156754493713, "learning_rate": 6.72410782269276e-07, "loss": 0.0469, "step": 51226 }, { "epoch": 0.9072064533552932, "grad_norm": 0.655612051486969, "learning_rate": 6.721560953718426e-07, "loss": 0.0531, "step": 51227 }, { "epoch": 0.9072241628923216, "grad_norm": 0.7815665006637573, "learning_rate": 6.719014556115649e-07, "loss": 0.0279, "step": 51228 }, { "epoch": 0.9072418724293501, "grad_norm": 0.6215293407440186, "learning_rate": 6.716468629892808e-07, "loss": 0.0422, "step": 51229 }, { "epoch": 0.9072595819663785, "grad_norm": 0.8719958662986755, "learning_rate": 6.713923175058328e-07, "loss": 0.0667, "step": 51230 }, { "epoch": 0.9072772915034069, "grad_norm": 0.5592939257621765, "learning_rate": 6.711378191620521e-07, "loss": 0.0324, "step": 51231 }, { "epoch": 0.9072950010404353, "grad_norm": 0.6024723649024963, "learning_rate": 6.708833679587795e-07, "loss": 0.0606, "step": 51232 }, { "epoch": 0.9073127105774638, "grad_norm": 0.9978859424591064, "learning_rate": 6.70628963896851e-07, "loss": 0.0629, "step": 51233 }, { "epoch": 0.9073304201144922, "grad_norm": 0.6509761810302734, "learning_rate": 6.703746069771028e-07, "loss": 0.0514, "step": 51234 }, { "epoch": 0.9073481296515206, "grad_norm": 0.4153886139392853, "learning_rate": 6.701202972003739e-07, "loss": 0.02, "step": 51235 }, { "epoch": 0.907365839188549, "grad_norm": 0.7195302248001099, "learning_rate": 6.698660345674989e-07, "loss": 0.0277, "step": 51236 }, { "epoch": 0.9073835487255775, "grad_norm": 0.6454133987426758, "learning_rate": 6.696118190793171e-07, "loss": 0.0624, "step": 51237 }, { "epoch": 0.9074012582626059, "grad_norm": 0.4503544569015503, "learning_rate": 6.693576507366628e-07, "loss": 0.0402, "step": 51238 }, { "epoch": 0.9074189677996343, "grad_norm": 0.6012445092201233, "learning_rate": 6.691035295403719e-07, "loss": 0.0507, "step": 51239 }, { "epoch": 0.9074366773366628, "grad_norm": 0.6709925532341003, "learning_rate": 6.688494554912805e-07, "loss": 0.0456, "step": 51240 }, { "epoch": 0.9074543868736912, "grad_norm": 0.123307004570961, "learning_rate": 6.685954285902263e-07, "loss": 0.0286, "step": 51241 }, { "epoch": 0.9074720964107196, "grad_norm": 0.8607020378112793, "learning_rate": 6.683414488380418e-07, "loss": 0.0553, "step": 51242 }, { "epoch": 0.907489805947748, "grad_norm": 0.6542270183563232, "learning_rate": 6.680875162355649e-07, "loss": 0.0657, "step": 51243 }, { "epoch": 0.9075075154847765, "grad_norm": 0.5417961478233337, "learning_rate": 6.678336307836297e-07, "loss": 0.0316, "step": 51244 }, { "epoch": 0.9075252250218049, "grad_norm": 0.4836061894893646, "learning_rate": 6.675797924830724e-07, "loss": 0.0475, "step": 51245 }, { "epoch": 0.9075429345588333, "grad_norm": 0.6098665595054626, "learning_rate": 6.673260013347287e-07, "loss": 0.0394, "step": 51246 }, { "epoch": 0.9075606440958617, "grad_norm": 0.367847204208374, "learning_rate": 6.670722573394317e-07, "loss": 0.044, "step": 51247 }, { "epoch": 0.9075783536328902, "grad_norm": 0.6350553631782532, "learning_rate": 6.668185604980203e-07, "loss": 0.0473, "step": 51248 }, { "epoch": 0.9075960631699186, "grad_norm": 0.6803550720214844, "learning_rate": 6.665649108113242e-07, "loss": 0.0555, "step": 51249 }, { "epoch": 0.907613772706947, "grad_norm": 0.537507176399231, "learning_rate": 6.663113082801791e-07, "loss": 0.0369, "step": 51250 }, { "epoch": 0.9076314822439754, "grad_norm": 0.5861948728561401, "learning_rate": 6.660577529054213e-07, "loss": 0.0319, "step": 51251 }, { "epoch": 0.9076491917810039, "grad_norm": 0.585490882396698, "learning_rate": 6.658042446878832e-07, "loss": 0.054, "step": 51252 }, { "epoch": 0.9076669013180323, "grad_norm": 0.22773133218288422, "learning_rate": 6.655507836283991e-07, "loss": 0.0461, "step": 51253 }, { "epoch": 0.9076846108550607, "grad_norm": 0.593053936958313, "learning_rate": 6.652973697278036e-07, "loss": 0.0555, "step": 51254 }, { "epoch": 0.9077023203920892, "grad_norm": 0.2389914095401764, "learning_rate": 6.650440029869293e-07, "loss": 0.032, "step": 51255 }, { "epoch": 0.9077200299291176, "grad_norm": 0.666601300239563, "learning_rate": 6.647906834066103e-07, "loss": 0.045, "step": 51256 }, { "epoch": 0.907737739466146, "grad_norm": 0.3496081233024597, "learning_rate": 6.645374109876795e-07, "loss": 0.0584, "step": 51257 }, { "epoch": 0.9077554490031744, "grad_norm": 0.6701293587684631, "learning_rate": 6.642841857309712e-07, "loss": 0.0478, "step": 51258 }, { "epoch": 0.9077731585402029, "grad_norm": 0.8169602155685425, "learning_rate": 6.640310076373196e-07, "loss": 0.0722, "step": 51259 }, { "epoch": 0.9077908680772313, "grad_norm": 0.9234173893928528, "learning_rate": 6.637778767075542e-07, "loss": 0.0683, "step": 51260 }, { "epoch": 0.9078085776142597, "grad_norm": 0.6180202960968018, "learning_rate": 6.635247929425093e-07, "loss": 0.0564, "step": 51261 }, { "epoch": 0.9078262871512881, "grad_norm": 0.5516982078552246, "learning_rate": 6.63271756343019e-07, "loss": 0.0563, "step": 51262 }, { "epoch": 0.9078439966883166, "grad_norm": 0.7707581520080566, "learning_rate": 6.63018766909913e-07, "loss": 0.0641, "step": 51263 }, { "epoch": 0.907861706225345, "grad_norm": 0.630283772945404, "learning_rate": 6.627658246440255e-07, "loss": 0.0628, "step": 51264 }, { "epoch": 0.9078794157623734, "grad_norm": 1.1458314657211304, "learning_rate": 6.62512929546189e-07, "loss": 0.0816, "step": 51265 }, { "epoch": 0.9078971252994018, "grad_norm": 0.5254417061805725, "learning_rate": 6.622600816172347e-07, "loss": 0.0318, "step": 51266 }, { "epoch": 0.9079148348364303, "grad_norm": 0.42565295100212097, "learning_rate": 6.620072808579953e-07, "loss": 0.061, "step": 51267 }, { "epoch": 0.9079325443734587, "grad_norm": 0.702713668346405, "learning_rate": 6.617545272692999e-07, "loss": 0.0547, "step": 51268 }, { "epoch": 0.9079502539104871, "grad_norm": 0.4087728261947632, "learning_rate": 6.615018208519847e-07, "loss": 0.0494, "step": 51269 }, { "epoch": 0.9079679634475156, "grad_norm": 0.47266992926597595, "learning_rate": 6.612491616068772e-07, "loss": 0.0406, "step": 51270 }, { "epoch": 0.907985672984544, "grad_norm": 0.4143771529197693, "learning_rate": 6.609965495348086e-07, "loss": 0.0621, "step": 51271 }, { "epoch": 0.9080033825215724, "grad_norm": 0.3465828597545624, "learning_rate": 6.607439846366098e-07, "loss": 0.0651, "step": 51272 }, { "epoch": 0.9080210920586008, "grad_norm": 0.7096288800239563, "learning_rate": 6.604914669131151e-07, "loss": 0.0387, "step": 51273 }, { "epoch": 0.9080388015956293, "grad_norm": 0.6225053071975708, "learning_rate": 6.602389963651522e-07, "loss": 0.0543, "step": 51274 }, { "epoch": 0.9080565111326577, "grad_norm": 0.9047767519950867, "learning_rate": 6.599865729935522e-07, "loss": 0.0707, "step": 51275 }, { "epoch": 0.9080742206696861, "grad_norm": 1.0908406972885132, "learning_rate": 6.597341967991477e-07, "loss": 0.069, "step": 51276 }, { "epoch": 0.9080919302067145, "grad_norm": 0.3473982512950897, "learning_rate": 6.59481867782768e-07, "loss": 0.0299, "step": 51277 }, { "epoch": 0.908109639743743, "grad_norm": 0.49673113226890564, "learning_rate": 6.592295859452407e-07, "loss": 0.0336, "step": 51278 }, { "epoch": 0.9081273492807714, "grad_norm": 0.5956181287765503, "learning_rate": 6.58977351287397e-07, "loss": 0.0539, "step": 51279 }, { "epoch": 0.9081450588177998, "grad_norm": 0.6362984776496887, "learning_rate": 6.587251638100694e-07, "loss": 0.0951, "step": 51280 }, { "epoch": 0.9081627683548282, "grad_norm": 0.8046294450759888, "learning_rate": 6.58473023514084e-07, "loss": 0.0658, "step": 51281 }, { "epoch": 0.9081804778918567, "grad_norm": 0.6629123091697693, "learning_rate": 6.582209304002718e-07, "loss": 0.0566, "step": 51282 }, { "epoch": 0.9081981874288851, "grad_norm": 0.4355977177619934, "learning_rate": 6.579688844694621e-07, "loss": 0.0473, "step": 51283 }, { "epoch": 0.9082158969659135, "grad_norm": 0.8874703645706177, "learning_rate": 6.577168857224841e-07, "loss": 0.0592, "step": 51284 }, { "epoch": 0.908233606502942, "grad_norm": 0.6653252243995667, "learning_rate": 6.574649341601657e-07, "loss": 0.049, "step": 51285 }, { "epoch": 0.9082513160399704, "grad_norm": 0.898144006729126, "learning_rate": 6.572130297833395e-07, "loss": 0.0793, "step": 51286 }, { "epoch": 0.9082690255769988, "grad_norm": 0.5877271890640259, "learning_rate": 6.569611725928315e-07, "loss": 0.0459, "step": 51287 }, { "epoch": 0.9082867351140272, "grad_norm": 0.7975349426269531, "learning_rate": 6.567093625894694e-07, "loss": 0.0803, "step": 51288 }, { "epoch": 0.9083044446510558, "grad_norm": 0.8578516840934753, "learning_rate": 6.564575997740824e-07, "loss": 0.0808, "step": 51289 }, { "epoch": 0.9083221541880842, "grad_norm": 0.7259389758110046, "learning_rate": 6.562058841474999e-07, "loss": 0.072, "step": 51290 }, { "epoch": 0.9083398637251126, "grad_norm": 0.5788561701774597, "learning_rate": 6.559542157105514e-07, "loss": 0.0577, "step": 51291 }, { "epoch": 0.908357573262141, "grad_norm": 0.7141255736351013, "learning_rate": 6.557025944640577e-07, "loss": 0.0446, "step": 51292 }, { "epoch": 0.9083752827991695, "grad_norm": 0.6418374180793762, "learning_rate": 6.554510204088549e-07, "loss": 0.0568, "step": 51293 }, { "epoch": 0.9083929923361979, "grad_norm": 0.5816600918769836, "learning_rate": 6.551994935457672e-07, "loss": 0.0367, "step": 51294 }, { "epoch": 0.9084107018732263, "grad_norm": 0.6472473740577698, "learning_rate": 6.549480138756225e-07, "loss": 0.074, "step": 51295 }, { "epoch": 0.9084284114102547, "grad_norm": 1.0044499635696411, "learning_rate": 6.546965813992467e-07, "loss": 0.1043, "step": 51296 }, { "epoch": 0.9084461209472832, "grad_norm": 0.3801964223384857, "learning_rate": 6.544451961174692e-07, "loss": 0.0303, "step": 51297 }, { "epoch": 0.9084638304843116, "grad_norm": 0.7010223269462585, "learning_rate": 6.541938580311174e-07, "loss": 0.0453, "step": 51298 }, { "epoch": 0.90848154002134, "grad_norm": 0.40384551882743835, "learning_rate": 6.539425671410143e-07, "loss": 0.0594, "step": 51299 }, { "epoch": 0.9084992495583685, "grad_norm": 0.5241950750350952, "learning_rate": 6.536913234479891e-07, "loss": 0.0848, "step": 51300 }, { "epoch": 0.9085169590953969, "grad_norm": 0.8294928669929504, "learning_rate": 6.534401269528694e-07, "loss": 0.0576, "step": 51301 }, { "epoch": 0.9085346686324253, "grad_norm": 0.511965811252594, "learning_rate": 6.531889776564797e-07, "loss": 0.0476, "step": 51302 }, { "epoch": 0.9085523781694537, "grad_norm": 0.7385815382003784, "learning_rate": 6.529378755596477e-07, "loss": 0.0488, "step": 51303 }, { "epoch": 0.9085700877064822, "grad_norm": 0.23510690033435822, "learning_rate": 6.526868206631992e-07, "loss": 0.0478, "step": 51304 }, { "epoch": 0.9085877972435106, "grad_norm": 0.779007077217102, "learning_rate": 6.524358129679619e-07, "loss": 0.0569, "step": 51305 }, { "epoch": 0.908605506780539, "grad_norm": 0.4060427248477936, "learning_rate": 6.52184852474757e-07, "loss": 0.0508, "step": 51306 }, { "epoch": 0.9086232163175674, "grad_norm": 0.31758740544319153, "learning_rate": 6.519339391844137e-07, "loss": 0.0483, "step": 51307 }, { "epoch": 0.9086409258545959, "grad_norm": 0.6268527507781982, "learning_rate": 6.516830730977563e-07, "loss": 0.0517, "step": 51308 }, { "epoch": 0.9086586353916243, "grad_norm": 0.5073484778404236, "learning_rate": 6.514322542156109e-07, "loss": 0.055, "step": 51309 }, { "epoch": 0.9086763449286527, "grad_norm": 0.4106460213661194, "learning_rate": 6.511814825388019e-07, "loss": 0.0297, "step": 51310 }, { "epoch": 0.9086940544656811, "grad_norm": 0.8216328024864197, "learning_rate": 6.509307580681534e-07, "loss": 0.0537, "step": 51311 }, { "epoch": 0.9087117640027096, "grad_norm": 0.5603122711181641, "learning_rate": 6.506800808044916e-07, "loss": 0.0493, "step": 51312 }, { "epoch": 0.908729473539738, "grad_norm": 0.5744502544403076, "learning_rate": 6.504294507486425e-07, "loss": 0.062, "step": 51313 }, { "epoch": 0.9087471830767664, "grad_norm": 0.5660349726676941, "learning_rate": 6.50178867901427e-07, "loss": 0.0548, "step": 51314 }, { "epoch": 0.9087648926137949, "grad_norm": 0.3861946165561676, "learning_rate": 6.499283322636729e-07, "loss": 0.036, "step": 51315 }, { "epoch": 0.9087826021508233, "grad_norm": 0.6986908912658691, "learning_rate": 6.496778438362045e-07, "loss": 0.0737, "step": 51316 }, { "epoch": 0.9088003116878517, "grad_norm": 0.704757809638977, "learning_rate": 6.494274026198427e-07, "loss": 0.0611, "step": 51317 }, { "epoch": 0.9088180212248801, "grad_norm": 0.6656711101531982, "learning_rate": 6.491770086154136e-07, "loss": 0.059, "step": 51318 }, { "epoch": 0.9088357307619086, "grad_norm": 0.8018589615821838, "learning_rate": 6.489266618237416e-07, "loss": 0.0709, "step": 51319 }, { "epoch": 0.908853440298937, "grad_norm": 0.5115852952003479, "learning_rate": 6.486763622456493e-07, "loss": 0.0472, "step": 51320 }, { "epoch": 0.9088711498359654, "grad_norm": 0.5835801362991333, "learning_rate": 6.484261098819561e-07, "loss": 0.0581, "step": 51321 }, { "epoch": 0.9088888593729938, "grad_norm": 0.3301439583301544, "learning_rate": 6.481759047334929e-07, "loss": 0.053, "step": 51322 }, { "epoch": 0.9089065689100223, "grad_norm": 0.4437132477760315, "learning_rate": 6.479257468010808e-07, "loss": 0.0547, "step": 51323 }, { "epoch": 0.9089242784470507, "grad_norm": 0.4416886270046234, "learning_rate": 6.47675636085539e-07, "loss": 0.0271, "step": 51324 }, { "epoch": 0.9089419879840791, "grad_norm": 0.5653444528579712, "learning_rate": 6.474255725876937e-07, "loss": 0.0402, "step": 51325 }, { "epoch": 0.9089596975211075, "grad_norm": 0.9488433003425598, "learning_rate": 6.471755563083675e-07, "loss": 0.0586, "step": 51326 }, { "epoch": 0.908977407058136, "grad_norm": 0.5792772769927979, "learning_rate": 6.469255872483798e-07, "loss": 0.0435, "step": 51327 }, { "epoch": 0.9089951165951644, "grad_norm": 0.7827924489974976, "learning_rate": 6.466756654085565e-07, "loss": 0.0443, "step": 51328 }, { "epoch": 0.9090128261321928, "grad_norm": 0.46483051776885986, "learning_rate": 6.464257907897186e-07, "loss": 0.0294, "step": 51329 }, { "epoch": 0.9090305356692213, "grad_norm": 0.5476272106170654, "learning_rate": 6.461759633926872e-07, "loss": 0.0607, "step": 51330 }, { "epoch": 0.9090482452062497, "grad_norm": 0.5644863247871399, "learning_rate": 6.45926183218285e-07, "loss": 0.0466, "step": 51331 }, { "epoch": 0.9090659547432781, "grad_norm": 0.44361135363578796, "learning_rate": 6.456764502673345e-07, "loss": 0.0387, "step": 51332 }, { "epoch": 0.9090836642803065, "grad_norm": 0.6269487142562866, "learning_rate": 6.45426764540657e-07, "loss": 0.0495, "step": 51333 }, { "epoch": 0.909101373817335, "grad_norm": 0.29976558685302734, "learning_rate": 6.451771260390732e-07, "loss": 0.0444, "step": 51334 }, { "epoch": 0.9091190833543634, "grad_norm": 0.40289679169654846, "learning_rate": 6.449275347634043e-07, "loss": 0.0485, "step": 51335 }, { "epoch": 0.9091367928913918, "grad_norm": 0.8714674115180969, "learning_rate": 6.446779907144729e-07, "loss": 0.0768, "step": 51336 }, { "epoch": 0.9091545024284202, "grad_norm": 0.39052820205688477, "learning_rate": 6.444284938930999e-07, "loss": 0.0507, "step": 51337 }, { "epoch": 0.9091722119654487, "grad_norm": 0.8846328854560852, "learning_rate": 6.441790443001034e-07, "loss": 0.0644, "step": 51338 }, { "epoch": 0.9091899215024771, "grad_norm": 0.684777021408081, "learning_rate": 6.439296419363055e-07, "loss": 0.0599, "step": 51339 }, { "epoch": 0.9092076310395055, "grad_norm": 0.13350224494934082, "learning_rate": 6.436802868025276e-07, "loss": 0.0466, "step": 51340 }, { "epoch": 0.9092253405765339, "grad_norm": 0.6700446605682373, "learning_rate": 6.43430978899589e-07, "loss": 0.0418, "step": 51341 }, { "epoch": 0.9092430501135624, "grad_norm": 0.49120524525642395, "learning_rate": 6.431817182283106e-07, "loss": 0.0367, "step": 51342 }, { "epoch": 0.9092607596505908, "grad_norm": 0.3683769702911377, "learning_rate": 6.429325047895119e-07, "loss": 0.0287, "step": 51343 }, { "epoch": 0.9092784691876192, "grad_norm": 0.3229118585586548, "learning_rate": 6.426833385840153e-07, "loss": 0.0436, "step": 51344 }, { "epoch": 0.9092961787246477, "grad_norm": 0.6468198299407959, "learning_rate": 6.42434219612637e-07, "loss": 0.0391, "step": 51345 }, { "epoch": 0.9093138882616761, "grad_norm": 0.6718223690986633, "learning_rate": 6.421851478761964e-07, "loss": 0.0598, "step": 51346 }, { "epoch": 0.9093315977987045, "grad_norm": 0.361385315656662, "learning_rate": 6.419361233755161e-07, "loss": 0.0556, "step": 51347 }, { "epoch": 0.9093493073357329, "grad_norm": 0.6013808846473694, "learning_rate": 6.416871461114154e-07, "loss": 0.0831, "step": 51348 }, { "epoch": 0.9093670168727614, "grad_norm": 0.3928898572921753, "learning_rate": 6.414382160847087e-07, "loss": 0.0473, "step": 51349 }, { "epoch": 0.9093847264097898, "grad_norm": 0.48709824681282043, "learning_rate": 6.411893332962171e-07, "loss": 0.0462, "step": 51350 }, { "epoch": 0.9094024359468182, "grad_norm": 0.49702009558677673, "learning_rate": 6.409404977467631e-07, "loss": 0.0473, "step": 51351 }, { "epoch": 0.9094201454838466, "grad_norm": 0.5455832481384277, "learning_rate": 6.406917094371612e-07, "loss": 0.0401, "step": 51352 }, { "epoch": 0.9094378550208752, "grad_norm": 0.717891275882721, "learning_rate": 6.404429683682323e-07, "loss": 0.0448, "step": 51353 }, { "epoch": 0.9094555645579036, "grad_norm": 0.4378189146518707, "learning_rate": 6.401942745407924e-07, "loss": 0.055, "step": 51354 }, { "epoch": 0.909473274094932, "grad_norm": 0.5051478147506714, "learning_rate": 6.399456279556626e-07, "loss": 0.049, "step": 51355 }, { "epoch": 0.9094909836319603, "grad_norm": 0.5017276406288147, "learning_rate": 6.396970286136589e-07, "loss": 0.0667, "step": 51356 }, { "epoch": 0.9095086931689889, "grad_norm": 0.8708151578903198, "learning_rate": 6.394484765155989e-07, "loss": 0.0752, "step": 51357 }, { "epoch": 0.9095264027060173, "grad_norm": 0.6515806317329407, "learning_rate": 6.391999716623004e-07, "loss": 0.0524, "step": 51358 }, { "epoch": 0.9095441122430457, "grad_norm": 1.0151597261428833, "learning_rate": 6.389515140545809e-07, "loss": 0.0522, "step": 51359 }, { "epoch": 0.9095618217800742, "grad_norm": 0.704086422920227, "learning_rate": 6.3870310369326e-07, "loss": 0.0446, "step": 51360 }, { "epoch": 0.9095795313171026, "grad_norm": 0.16743752360343933, "learning_rate": 6.384547405791518e-07, "loss": 0.0416, "step": 51361 }, { "epoch": 0.909597240854131, "grad_norm": 0.511897087097168, "learning_rate": 6.382064247130775e-07, "loss": 0.0379, "step": 51362 }, { "epoch": 0.9096149503911594, "grad_norm": 0.6099503636360168, "learning_rate": 6.379581560958497e-07, "loss": 0.0438, "step": 51363 }, { "epoch": 0.9096326599281879, "grad_norm": 0.5027477145195007, "learning_rate": 6.377099347282861e-07, "loss": 0.0609, "step": 51364 }, { "epoch": 0.9096503694652163, "grad_norm": 0.5566677451133728, "learning_rate": 6.374617606112077e-07, "loss": 0.0534, "step": 51365 }, { "epoch": 0.9096680790022447, "grad_norm": 0.5179566144943237, "learning_rate": 6.372136337454237e-07, "loss": 0.0471, "step": 51366 }, { "epoch": 0.9096857885392731, "grad_norm": 0.6847707629203796, "learning_rate": 6.369655541317554e-07, "loss": 0.047, "step": 51367 }, { "epoch": 0.9097034980763016, "grad_norm": 0.9338513612747192, "learning_rate": 6.367175217710169e-07, "loss": 0.0419, "step": 51368 }, { "epoch": 0.90972120761333, "grad_norm": 0.6185346245765686, "learning_rate": 6.364695366640261e-07, "loss": 0.0344, "step": 51369 }, { "epoch": 0.9097389171503584, "grad_norm": 0.5084890127182007, "learning_rate": 6.362215988115972e-07, "loss": 0.0539, "step": 51370 }, { "epoch": 0.9097566266873868, "grad_norm": 0.4897601008415222, "learning_rate": 6.359737082145462e-07, "loss": 0.0569, "step": 51371 }, { "epoch": 0.9097743362244153, "grad_norm": 0.5966421365737915, "learning_rate": 6.357258648736908e-07, "loss": 0.0763, "step": 51372 }, { "epoch": 0.9097920457614437, "grad_norm": 0.6016713976860046, "learning_rate": 6.354780687898421e-07, "loss": 0.0409, "step": 51373 }, { "epoch": 0.9098097552984721, "grad_norm": 0.3049350380897522, "learning_rate": 6.352303199638177e-07, "loss": 0.0357, "step": 51374 }, { "epoch": 0.9098274648355006, "grad_norm": 0.531269907951355, "learning_rate": 6.349826183964336e-07, "loss": 0.0405, "step": 51375 }, { "epoch": 0.909845174372529, "grad_norm": 0.8921526670455933, "learning_rate": 6.347349640885042e-07, "loss": 0.0584, "step": 51376 }, { "epoch": 0.9098628839095574, "grad_norm": 0.3854754865169525, "learning_rate": 6.344873570408438e-07, "loss": 0.0404, "step": 51377 }, { "epoch": 0.9098805934465858, "grad_norm": 0.5713352560997009, "learning_rate": 6.342397972542652e-07, "loss": 0.0506, "step": 51378 }, { "epoch": 0.9098983029836143, "grad_norm": 0.4949719309806824, "learning_rate": 6.339922847295843e-07, "loss": 0.0592, "step": 51379 }, { "epoch": 0.9099160125206427, "grad_norm": 0.5142183899879456, "learning_rate": 6.337448194676187e-07, "loss": 0.0473, "step": 51380 }, { "epoch": 0.9099337220576711, "grad_norm": 0.41869014501571655, "learning_rate": 6.334974014691781e-07, "loss": 0.031, "step": 51381 }, { "epoch": 0.9099514315946995, "grad_norm": 0.7821561694145203, "learning_rate": 6.33250030735078e-07, "loss": 0.0553, "step": 51382 }, { "epoch": 0.909969141131728, "grad_norm": 0.38604065775871277, "learning_rate": 6.330027072661332e-07, "loss": 0.063, "step": 51383 }, { "epoch": 0.9099868506687564, "grad_norm": 0.9183929562568665, "learning_rate": 6.327554310631545e-07, "loss": 0.0818, "step": 51384 }, { "epoch": 0.9100045602057848, "grad_norm": 0.5297287702560425, "learning_rate": 6.325082021269579e-07, "loss": 0.0475, "step": 51385 }, { "epoch": 0.9100222697428132, "grad_norm": 0.5166937112808228, "learning_rate": 6.322610204583562e-07, "loss": 0.039, "step": 51386 }, { "epoch": 0.9100399792798417, "grad_norm": 0.5381089448928833, "learning_rate": 6.320138860581621e-07, "loss": 0.0451, "step": 51387 }, { "epoch": 0.9100576888168701, "grad_norm": 0.48466336727142334, "learning_rate": 6.317667989271897e-07, "loss": 0.0547, "step": 51388 }, { "epoch": 0.9100753983538985, "grad_norm": 0.5312897562980652, "learning_rate": 6.31519759066252e-07, "loss": 0.0663, "step": 51389 }, { "epoch": 0.910093107890927, "grad_norm": 0.8176043033599854, "learning_rate": 6.312727664761614e-07, "loss": 0.0738, "step": 51390 }, { "epoch": 0.9101108174279554, "grad_norm": 0.40020784735679626, "learning_rate": 6.310258211577274e-07, "loss": 0.0704, "step": 51391 }, { "epoch": 0.9101285269649838, "grad_norm": 0.8844920992851257, "learning_rate": 6.307789231117677e-07, "loss": 0.061, "step": 51392 }, { "epoch": 0.9101462365020122, "grad_norm": 0.647214949131012, "learning_rate": 6.305320723390901e-07, "loss": 0.0379, "step": 51393 }, { "epoch": 0.9101639460390407, "grad_norm": 0.6775418519973755, "learning_rate": 6.302852688405103e-07, "loss": 0.0584, "step": 51394 }, { "epoch": 0.9101816555760691, "grad_norm": 0.47418922185897827, "learning_rate": 6.300385126168378e-07, "loss": 0.0567, "step": 51395 }, { "epoch": 0.9101993651130975, "grad_norm": 0.4378948509693146, "learning_rate": 6.297918036688854e-07, "loss": 0.0536, "step": 51396 }, { "epoch": 0.9102170746501259, "grad_norm": 0.42405039072036743, "learning_rate": 6.29545141997464e-07, "loss": 0.0445, "step": 51397 }, { "epoch": 0.9102347841871544, "grad_norm": 0.7971111536026001, "learning_rate": 6.292985276033847e-07, "loss": 0.076, "step": 51398 }, { "epoch": 0.9102524937241828, "grad_norm": 0.7425742745399475, "learning_rate": 6.290519604874601e-07, "loss": 0.0503, "step": 51399 }, { "epoch": 0.9102702032612112, "grad_norm": 0.577621340751648, "learning_rate": 6.288054406505011e-07, "loss": 0.0404, "step": 51400 }, { "epoch": 0.9102879127982396, "grad_norm": 0.6922788023948669, "learning_rate": 6.285589680933224e-07, "loss": 0.0512, "step": 51401 }, { "epoch": 0.9103056223352681, "grad_norm": 0.375946968793869, "learning_rate": 6.283125428167264e-07, "loss": 0.0358, "step": 51402 }, { "epoch": 0.9103233318722965, "grad_norm": 0.6669892072677612, "learning_rate": 6.280661648215307e-07, "loss": 0.0467, "step": 51403 }, { "epoch": 0.9103410414093249, "grad_norm": 0.9679108262062073, "learning_rate": 6.278198341085434e-07, "loss": 0.0731, "step": 51404 }, { "epoch": 0.9103587509463534, "grad_norm": 0.9724667072296143, "learning_rate": 6.275735506785752e-07, "loss": 0.0628, "step": 51405 }, { "epoch": 0.9103764604833818, "grad_norm": 0.38537588715553284, "learning_rate": 6.273273145324354e-07, "loss": 0.0738, "step": 51406 }, { "epoch": 0.9103941700204102, "grad_norm": 0.8789423704147339, "learning_rate": 6.270811256709353e-07, "loss": 0.0639, "step": 51407 }, { "epoch": 0.9104118795574386, "grad_norm": 0.6517059803009033, "learning_rate": 6.268349840948856e-07, "loss": 0.0692, "step": 51408 }, { "epoch": 0.9104295890944671, "grad_norm": 0.4786117374897003, "learning_rate": 6.265888898050925e-07, "loss": 0.0391, "step": 51409 }, { "epoch": 0.9104472986314955, "grad_norm": 0.4737248718738556, "learning_rate": 6.263428428023704e-07, "loss": 0.051, "step": 51410 }, { "epoch": 0.9104650081685239, "grad_norm": 0.7855784296989441, "learning_rate": 6.260968430875252e-07, "loss": 0.0459, "step": 51411 }, { "epoch": 0.9104827177055523, "grad_norm": 0.5973944664001465, "learning_rate": 6.258508906613713e-07, "loss": 0.0399, "step": 51412 }, { "epoch": 0.9105004272425808, "grad_norm": 0.4315544366836548, "learning_rate": 6.256049855247098e-07, "loss": 0.0365, "step": 51413 }, { "epoch": 0.9105181367796092, "grad_norm": 0.6969165802001953, "learning_rate": 6.253591276783565e-07, "loss": 0.0697, "step": 51414 }, { "epoch": 0.9105358463166376, "grad_norm": 0.3838611841201782, "learning_rate": 6.251133171231177e-07, "loss": 0.0417, "step": 51415 }, { "epoch": 0.910553555853666, "grad_norm": 0.4499503970146179, "learning_rate": 6.248675538597992e-07, "loss": 0.0605, "step": 51416 }, { "epoch": 0.9105712653906946, "grad_norm": 0.5463729500770569, "learning_rate": 6.246218378892137e-07, "loss": 0.0421, "step": 51417 }, { "epoch": 0.910588974927723, "grad_norm": 0.8163692355155945, "learning_rate": 6.243761692121691e-07, "loss": 0.0865, "step": 51418 }, { "epoch": 0.9106066844647513, "grad_norm": 0.8352559804916382, "learning_rate": 6.241305478294745e-07, "loss": 0.0627, "step": 51419 }, { "epoch": 0.9106243940017799, "grad_norm": 0.3568136394023895, "learning_rate": 6.238849737419345e-07, "loss": 0.0474, "step": 51420 }, { "epoch": 0.9106421035388083, "grad_norm": 0.540448784828186, "learning_rate": 6.236394469503598e-07, "loss": 0.0934, "step": 51421 }, { "epoch": 0.9106598130758367, "grad_norm": 0.6118605136871338, "learning_rate": 6.233939674555566e-07, "loss": 0.0404, "step": 51422 }, { "epoch": 0.910677522612865, "grad_norm": 0.5450615286827087, "learning_rate": 6.231485352583344e-07, "loss": 0.053, "step": 51423 }, { "epoch": 0.9106952321498936, "grad_norm": 0.7730689644813538, "learning_rate": 6.229031503594973e-07, "loss": 0.0561, "step": 51424 }, { "epoch": 0.910712941686922, "grad_norm": 1.1307873725891113, "learning_rate": 6.226578127598548e-07, "loss": 0.063, "step": 51425 }, { "epoch": 0.9107306512239504, "grad_norm": 0.5005931258201599, "learning_rate": 6.224125224602146e-07, "loss": 0.0599, "step": 51426 }, { "epoch": 0.9107483607609788, "grad_norm": 0.9970412254333496, "learning_rate": 6.221672794613825e-07, "loss": 0.076, "step": 51427 }, { "epoch": 0.9107660702980073, "grad_norm": 0.3837544620037079, "learning_rate": 6.219220837641665e-07, "loss": 0.0425, "step": 51428 }, { "epoch": 0.9107837798350357, "grad_norm": 0.4739409387111664, "learning_rate": 6.216769353693741e-07, "loss": 0.0584, "step": 51429 }, { "epoch": 0.9108014893720641, "grad_norm": 0.7214654684066772, "learning_rate": 6.21431834277808e-07, "loss": 0.0711, "step": 51430 }, { "epoch": 0.9108191989090925, "grad_norm": 0.28299811482429504, "learning_rate": 6.211867804902777e-07, "loss": 0.0403, "step": 51431 }, { "epoch": 0.910836908446121, "grad_norm": 0.38693559169769287, "learning_rate": 6.209417740075874e-07, "loss": 0.0303, "step": 51432 }, { "epoch": 0.9108546179831494, "grad_norm": 0.6713399291038513, "learning_rate": 6.206968148305481e-07, "loss": 0.0836, "step": 51433 }, { "epoch": 0.9108723275201778, "grad_norm": 0.5645778775215149, "learning_rate": 6.204519029599576e-07, "loss": 0.0614, "step": 51434 }, { "epoch": 0.9108900370572063, "grad_norm": 0.3279363512992859, "learning_rate": 6.202070383966268e-07, "loss": 0.0423, "step": 51435 }, { "epoch": 0.9109077465942347, "grad_norm": 0.5768314003944397, "learning_rate": 6.199622211413619e-07, "loss": 0.0591, "step": 51436 }, { "epoch": 0.9109254561312631, "grad_norm": 0.6667590737342834, "learning_rate": 6.197174511949654e-07, "loss": 0.0557, "step": 51437 }, { "epoch": 0.9109431656682915, "grad_norm": 0.5038039684295654, "learning_rate": 6.194727285582435e-07, "loss": 0.0516, "step": 51438 }, { "epoch": 0.91096087520532, "grad_norm": 0.5566660165786743, "learning_rate": 6.192280532320039e-07, "loss": 0.0369, "step": 51439 }, { "epoch": 0.9109785847423484, "grad_norm": 0.1286913901567459, "learning_rate": 6.18983425217049e-07, "loss": 0.0608, "step": 51440 }, { "epoch": 0.9109962942793768, "grad_norm": 0.746874988079071, "learning_rate": 6.187388445141834e-07, "loss": 0.0734, "step": 51441 }, { "epoch": 0.9110140038164052, "grad_norm": 0.5848562121391296, "learning_rate": 6.18494311124213e-07, "loss": 0.0411, "step": 51442 }, { "epoch": 0.9110317133534337, "grad_norm": 0.8888601064682007, "learning_rate": 6.182498250479424e-07, "loss": 0.0445, "step": 51443 }, { "epoch": 0.9110494228904621, "grad_norm": 0.5160703063011169, "learning_rate": 6.18005386286174e-07, "loss": 0.0634, "step": 51444 }, { "epoch": 0.9110671324274905, "grad_norm": 0.9205886721611023, "learning_rate": 6.177609948397106e-07, "loss": 0.0589, "step": 51445 }, { "epoch": 0.9110848419645189, "grad_norm": 0.6138836741447449, "learning_rate": 6.175166507093616e-07, "loss": 0.0447, "step": 51446 }, { "epoch": 0.9111025515015474, "grad_norm": 0.5653001070022583, "learning_rate": 6.172723538959296e-07, "loss": 0.0362, "step": 51447 }, { "epoch": 0.9111202610385758, "grad_norm": 0.4552938938140869, "learning_rate": 6.170281044002157e-07, "loss": 0.039, "step": 51448 }, { "epoch": 0.9111379705756042, "grad_norm": 0.6933162808418274, "learning_rate": 6.167839022230243e-07, "loss": 0.0722, "step": 51449 }, { "epoch": 0.9111556801126327, "grad_norm": 0.7523095011711121, "learning_rate": 6.165397473651579e-07, "loss": 0.0775, "step": 51450 }, { "epoch": 0.9111733896496611, "grad_norm": 0.7291082143783569, "learning_rate": 6.162956398274244e-07, "loss": 0.0725, "step": 51451 }, { "epoch": 0.9111910991866895, "grad_norm": 0.4451204836368561, "learning_rate": 6.160515796106214e-07, "loss": 0.0331, "step": 51452 }, { "epoch": 0.9112088087237179, "grad_norm": 0.7313202023506165, "learning_rate": 6.158075667155532e-07, "loss": 0.0566, "step": 51453 }, { "epoch": 0.9112265182607464, "grad_norm": 0.714526891708374, "learning_rate": 6.155636011430227e-07, "loss": 0.0731, "step": 51454 }, { "epoch": 0.9112442277977748, "grad_norm": 0.3140697181224823, "learning_rate": 6.15319682893834e-07, "loss": 0.0323, "step": 51455 }, { "epoch": 0.9112619373348032, "grad_norm": 0.736028254032135, "learning_rate": 6.150758119687883e-07, "loss": 0.091, "step": 51456 }, { "epoch": 0.9112796468718316, "grad_norm": 0.5826452970504761, "learning_rate": 6.148319883686882e-07, "loss": 0.044, "step": 51457 }, { "epoch": 0.9112973564088601, "grad_norm": 0.44709569215774536, "learning_rate": 6.14588212094338e-07, "loss": 0.0577, "step": 51458 }, { "epoch": 0.9113150659458885, "grad_norm": 0.6954931020736694, "learning_rate": 6.143444831465355e-07, "loss": 0.0537, "step": 51459 }, { "epoch": 0.9113327754829169, "grad_norm": 0.41470327973365784, "learning_rate": 6.141008015260835e-07, "loss": 0.0454, "step": 51460 }, { "epoch": 0.9113504850199453, "grad_norm": 0.5103361010551453, "learning_rate": 6.138571672337878e-07, "loss": 0.0646, "step": 51461 }, { "epoch": 0.9113681945569738, "grad_norm": 0.6162747740745544, "learning_rate": 6.136135802704463e-07, "loss": 0.048, "step": 51462 }, { "epoch": 0.9113859040940022, "grad_norm": 0.7329803705215454, "learning_rate": 6.133700406368597e-07, "loss": 0.0823, "step": 51463 }, { "epoch": 0.9114036136310306, "grad_norm": 0.5901178121566772, "learning_rate": 6.131265483338294e-07, "loss": 0.0294, "step": 51464 }, { "epoch": 0.9114213231680591, "grad_norm": 0.6349249482154846, "learning_rate": 6.128831033621596e-07, "loss": 0.0613, "step": 51465 }, { "epoch": 0.9114390327050875, "grad_norm": 0.6672660708427429, "learning_rate": 6.12639705722648e-07, "loss": 0.0601, "step": 51466 }, { "epoch": 0.9114567422421159, "grad_norm": 0.4429827928543091, "learning_rate": 6.123963554160972e-07, "loss": 0.083, "step": 51467 }, { "epoch": 0.9114744517791443, "grad_norm": 0.657463788986206, "learning_rate": 6.121530524433083e-07, "loss": 0.0561, "step": 51468 }, { "epoch": 0.9114921613161728, "grad_norm": 0.45519083738327026, "learning_rate": 6.119097968050808e-07, "loss": 0.0444, "step": 51469 }, { "epoch": 0.9115098708532012, "grad_norm": 0.7042036056518555, "learning_rate": 6.116665885022121e-07, "loss": 0.0662, "step": 51470 }, { "epoch": 0.9115275803902296, "grad_norm": 0.49260613322257996, "learning_rate": 6.114234275355069e-07, "loss": 0.0503, "step": 51471 }, { "epoch": 0.911545289927258, "grad_norm": 0.8986555933952332, "learning_rate": 6.111803139057642e-07, "loss": 0.0756, "step": 51472 }, { "epoch": 0.9115629994642865, "grad_norm": 0.6237685680389404, "learning_rate": 6.10937247613782e-07, "loss": 0.0725, "step": 51473 }, { "epoch": 0.9115807090013149, "grad_norm": 1.1749026775360107, "learning_rate": 6.106942286603579e-07, "loss": 0.0582, "step": 51474 }, { "epoch": 0.9115984185383433, "grad_norm": 0.7190101146697998, "learning_rate": 6.104512570462995e-07, "loss": 0.0668, "step": 51475 }, { "epoch": 0.9116161280753717, "grad_norm": 0.5183060765266418, "learning_rate": 6.102083327723979e-07, "loss": 0.0569, "step": 51476 }, { "epoch": 0.9116338376124002, "grad_norm": 0.48101162910461426, "learning_rate": 6.099654558394557e-07, "loss": 0.0484, "step": 51477 }, { "epoch": 0.9116515471494286, "grad_norm": 0.4210582673549652, "learning_rate": 6.097226262482724e-07, "loss": 0.0496, "step": 51478 }, { "epoch": 0.911669256686457, "grad_norm": 0.41799741983413696, "learning_rate": 6.094798439996474e-07, "loss": 0.0822, "step": 51479 }, { "epoch": 0.9116869662234856, "grad_norm": 0.43327540159225464, "learning_rate": 6.092371090943766e-07, "loss": 0.0364, "step": 51480 }, { "epoch": 0.911704675760514, "grad_norm": 0.8688549995422363, "learning_rate": 6.089944215332593e-07, "loss": 0.0475, "step": 51481 }, { "epoch": 0.9117223852975423, "grad_norm": 0.5460084676742554, "learning_rate": 6.08751781317095e-07, "loss": 0.0416, "step": 51482 }, { "epoch": 0.9117400948345707, "grad_norm": 0.6832648515701294, "learning_rate": 6.085091884466831e-07, "loss": 0.0773, "step": 51483 }, { "epoch": 0.9117578043715993, "grad_norm": 0.810150146484375, "learning_rate": 6.082666429228195e-07, "loss": 0.0672, "step": 51484 }, { "epoch": 0.9117755139086277, "grad_norm": 0.5814105272293091, "learning_rate": 6.08024144746302e-07, "loss": 0.0372, "step": 51485 }, { "epoch": 0.911793223445656, "grad_norm": 0.8822562098503113, "learning_rate": 6.077816939179331e-07, "loss": 0.0771, "step": 51486 }, { "epoch": 0.9118109329826845, "grad_norm": 0.6365411281585693, "learning_rate": 6.075392904385041e-07, "loss": 0.0599, "step": 51487 }, { "epoch": 0.911828642519713, "grad_norm": 0.5444681644439697, "learning_rate": 6.072969343088141e-07, "loss": 0.0756, "step": 51488 }, { "epoch": 0.9118463520567414, "grad_norm": 0.37366652488708496, "learning_rate": 6.070546255296627e-07, "loss": 0.0452, "step": 51489 }, { "epoch": 0.9118640615937698, "grad_norm": 0.6773934960365295, "learning_rate": 6.068123641018475e-07, "loss": 0.0458, "step": 51490 }, { "epoch": 0.9118817711307982, "grad_norm": 0.7736596465110779, "learning_rate": 6.065701500261628e-07, "loss": 0.0476, "step": 51491 }, { "epoch": 0.9118994806678267, "grad_norm": 0.5020125508308411, "learning_rate": 6.063279833034047e-07, "loss": 0.0486, "step": 51492 }, { "epoch": 0.9119171902048551, "grad_norm": 0.8169512748718262, "learning_rate": 6.060858639343725e-07, "loss": 0.0799, "step": 51493 }, { "epoch": 0.9119348997418835, "grad_norm": 0.38252800703048706, "learning_rate": 6.058437919198622e-07, "loss": 0.0427, "step": 51494 }, { "epoch": 0.911952609278912, "grad_norm": 0.5411134362220764, "learning_rate": 6.056017672606701e-07, "loss": 0.0575, "step": 51495 }, { "epoch": 0.9119703188159404, "grad_norm": 0.6388089060783386, "learning_rate": 6.053597899575919e-07, "loss": 0.0391, "step": 51496 }, { "epoch": 0.9119880283529688, "grad_norm": 0.6351392269134521, "learning_rate": 6.051178600114254e-07, "loss": 0.056, "step": 51497 }, { "epoch": 0.9120057378899972, "grad_norm": 0.4859979450702667, "learning_rate": 6.048759774229651e-07, "loss": 0.0481, "step": 51498 }, { "epoch": 0.9120234474270257, "grad_norm": 0.4384973347187042, "learning_rate": 6.046341421930052e-07, "loss": 0.0528, "step": 51499 }, { "epoch": 0.9120411569640541, "grad_norm": 0.7447147369384766, "learning_rate": 6.04392354322345e-07, "loss": 0.0499, "step": 51500 }, { "epoch": 0.9120588665010825, "grad_norm": 0.2959795296192169, "learning_rate": 6.041506138117775e-07, "loss": 0.0447, "step": 51501 }, { "epoch": 0.9120765760381109, "grad_norm": 0.5078803896903992, "learning_rate": 6.039089206620968e-07, "loss": 0.0491, "step": 51502 }, { "epoch": 0.9120942855751394, "grad_norm": 0.6961978673934937, "learning_rate": 6.036672748740974e-07, "loss": 0.0719, "step": 51503 }, { "epoch": 0.9121119951121678, "grad_norm": 0.269089013338089, "learning_rate": 6.034256764485819e-07, "loss": 0.0556, "step": 51504 }, { "epoch": 0.9121297046491962, "grad_norm": 0.669402003288269, "learning_rate": 6.03184125386338e-07, "loss": 0.0352, "step": 51505 }, { "epoch": 0.9121474141862247, "grad_norm": 0.7499441504478455, "learning_rate": 6.029426216881601e-07, "loss": 0.0621, "step": 51506 }, { "epoch": 0.9121651237232531, "grad_norm": 0.8475503921508789, "learning_rate": 6.027011653548492e-07, "loss": 0.0562, "step": 51507 }, { "epoch": 0.9121828332602815, "grad_norm": 0.6947782635688782, "learning_rate": 6.024597563871914e-07, "loss": 0.0453, "step": 51508 }, { "epoch": 0.9122005427973099, "grad_norm": 0.4418095350265503, "learning_rate": 6.022183947859861e-07, "loss": 0.044, "step": 51509 }, { "epoch": 0.9122182523343384, "grad_norm": 0.2654939889907837, "learning_rate": 6.019770805520242e-07, "loss": 0.0399, "step": 51510 }, { "epoch": 0.9122359618713668, "grad_norm": 0.539610743522644, "learning_rate": 6.017358136861035e-07, "loss": 0.0595, "step": 51511 }, { "epoch": 0.9122536714083952, "grad_norm": 0.3237740099430084, "learning_rate": 6.014945941890148e-07, "loss": 0.0355, "step": 51512 }, { "epoch": 0.9122713809454236, "grad_norm": 0.7629538774490356, "learning_rate": 6.012534220615529e-07, "loss": 0.0627, "step": 51513 }, { "epoch": 0.9122890904824521, "grad_norm": 0.34641170501708984, "learning_rate": 6.010122973045101e-07, "loss": 0.0593, "step": 51514 }, { "epoch": 0.9123068000194805, "grad_norm": 0.7806900143623352, "learning_rate": 6.007712199186826e-07, "loss": 0.0503, "step": 51515 }, { "epoch": 0.9123245095565089, "grad_norm": 0.5207139849662781, "learning_rate": 6.005301899048598e-07, "loss": 0.0483, "step": 51516 }, { "epoch": 0.9123422190935373, "grad_norm": 0.4685104489326477, "learning_rate": 6.002892072638378e-07, "loss": 0.0661, "step": 51517 }, { "epoch": 0.9123599286305658, "grad_norm": 0.3730499744415283, "learning_rate": 6.000482719964074e-07, "loss": 0.0609, "step": 51518 }, { "epoch": 0.9123776381675942, "grad_norm": 0.5323586463928223, "learning_rate": 5.998073841033614e-07, "loss": 0.0447, "step": 51519 }, { "epoch": 0.9123953477046226, "grad_norm": 0.8953759670257568, "learning_rate": 5.995665435854908e-07, "loss": 0.0649, "step": 51520 }, { "epoch": 0.9124130572416511, "grad_norm": 0.2601611912250519, "learning_rate": 5.993257504435917e-07, "loss": 0.0344, "step": 51521 }, { "epoch": 0.9124307667786795, "grad_norm": 0.8004837036132812, "learning_rate": 5.990850046784535e-07, "loss": 0.066, "step": 51522 }, { "epoch": 0.9124484763157079, "grad_norm": 0.9692037105560303, "learning_rate": 5.988443062908689e-07, "loss": 0.075, "step": 51523 }, { "epoch": 0.9124661858527363, "grad_norm": 0.6102454662322998, "learning_rate": 5.986036552816287e-07, "loss": 0.0513, "step": 51524 }, { "epoch": 0.9124838953897648, "grad_norm": 0.6466337442398071, "learning_rate": 5.983630516515292e-07, "loss": 0.0648, "step": 51525 }, { "epoch": 0.9125016049267932, "grad_norm": 0.5093441009521484, "learning_rate": 5.981224954013581e-07, "loss": 0.0451, "step": 51526 }, { "epoch": 0.9125193144638216, "grad_norm": 0.5553624629974365, "learning_rate": 5.978819865319046e-07, "loss": 0.0685, "step": 51527 }, { "epoch": 0.91253702400085, "grad_norm": 0.7457509636878967, "learning_rate": 5.976415250439649e-07, "loss": 0.0351, "step": 51528 }, { "epoch": 0.9125547335378785, "grad_norm": 0.3773040175437927, "learning_rate": 5.974011109383282e-07, "loss": 0.0395, "step": 51529 }, { "epoch": 0.9125724430749069, "grad_norm": 0.45375052094459534, "learning_rate": 5.97160744215784e-07, "loss": 0.0431, "step": 51530 }, { "epoch": 0.9125901526119353, "grad_norm": 0.7538228034973145, "learning_rate": 5.969204248771249e-07, "loss": 0.0588, "step": 51531 }, { "epoch": 0.9126078621489637, "grad_norm": 0.5123744606971741, "learning_rate": 5.966801529231386e-07, "loss": 0.0467, "step": 51532 }, { "epoch": 0.9126255716859922, "grad_norm": 0.4800637662410736, "learning_rate": 5.964399283546196e-07, "loss": 0.0757, "step": 51533 }, { "epoch": 0.9126432812230206, "grad_norm": 0.780987024307251, "learning_rate": 5.961997511723555e-07, "loss": 0.0627, "step": 51534 }, { "epoch": 0.912660990760049, "grad_norm": 0.7193718552589417, "learning_rate": 5.95959621377139e-07, "loss": 0.0792, "step": 51535 }, { "epoch": 0.9126787002970775, "grad_norm": 0.9629935026168823, "learning_rate": 5.957195389697579e-07, "loss": 0.0604, "step": 51536 }, { "epoch": 0.9126964098341059, "grad_norm": 0.6594796180725098, "learning_rate": 5.95479503951003e-07, "loss": 0.0416, "step": 51537 }, { "epoch": 0.9127141193711343, "grad_norm": 0.36369165778160095, "learning_rate": 5.952395163216623e-07, "loss": 0.0401, "step": 51538 }, { "epoch": 0.9127318289081627, "grad_norm": 0.472721129655838, "learning_rate": 5.949995760825283e-07, "loss": 0.0343, "step": 51539 }, { "epoch": 0.9127495384451912, "grad_norm": 0.8162997961044312, "learning_rate": 5.947596832343854e-07, "loss": 0.044, "step": 51540 }, { "epoch": 0.9127672479822196, "grad_norm": 0.6870785355567932, "learning_rate": 5.945198377780281e-07, "loss": 0.0577, "step": 51541 }, { "epoch": 0.912784957519248, "grad_norm": 0.5745872855186462, "learning_rate": 5.942800397142439e-07, "loss": 0.0476, "step": 51542 }, { "epoch": 0.9128026670562764, "grad_norm": 0.600380539894104, "learning_rate": 5.940402890438224e-07, "loss": 0.0612, "step": 51543 }, { "epoch": 0.912820376593305, "grad_norm": 0.9718798995018005, "learning_rate": 5.938005857675493e-07, "loss": 0.0575, "step": 51544 }, { "epoch": 0.9128380861303333, "grad_norm": 0.5462583303451538, "learning_rate": 5.93560929886216e-07, "loss": 0.0531, "step": 51545 }, { "epoch": 0.9128557956673617, "grad_norm": 0.5451692938804626, "learning_rate": 5.933213214006117e-07, "loss": 0.0635, "step": 51546 }, { "epoch": 0.9128735052043901, "grad_norm": 0.6697845458984375, "learning_rate": 5.930817603115207e-07, "loss": 0.0737, "step": 51547 }, { "epoch": 0.9128912147414187, "grad_norm": 0.46290236711502075, "learning_rate": 5.928422466197342e-07, "loss": 0.0383, "step": 51548 }, { "epoch": 0.912908924278447, "grad_norm": 0.38481295108795166, "learning_rate": 5.926027803260398e-07, "loss": 0.0528, "step": 51549 }, { "epoch": 0.9129266338154755, "grad_norm": 0.510549783706665, "learning_rate": 5.923633614312251e-07, "loss": 0.0651, "step": 51550 }, { "epoch": 0.912944343352504, "grad_norm": 0.6694396734237671, "learning_rate": 5.921239899360764e-07, "loss": 0.0471, "step": 51551 }, { "epoch": 0.9129620528895324, "grad_norm": 0.3881388008594513, "learning_rate": 5.918846658413829e-07, "loss": 0.0606, "step": 51552 }, { "epoch": 0.9129797624265608, "grad_norm": 0.3609234392642975, "learning_rate": 5.916453891479323e-07, "loss": 0.0935, "step": 51553 }, { "epoch": 0.9129974719635892, "grad_norm": 0.5931740999221802, "learning_rate": 5.914061598565124e-07, "loss": 0.0481, "step": 51554 }, { "epoch": 0.9130151815006177, "grad_norm": 0.7805912494659424, "learning_rate": 5.911669779679074e-07, "loss": 0.0534, "step": 51555 }, { "epoch": 0.9130328910376461, "grad_norm": 0.9272782802581787, "learning_rate": 5.909278434829052e-07, "loss": 0.0582, "step": 51556 }, { "epoch": 0.9130506005746745, "grad_norm": 0.5847324728965759, "learning_rate": 5.90688756402295e-07, "loss": 0.0555, "step": 51557 }, { "epoch": 0.9130683101117029, "grad_norm": 0.6866058111190796, "learning_rate": 5.904497167268596e-07, "loss": 0.0637, "step": 51558 }, { "epoch": 0.9130860196487314, "grad_norm": 0.32381319999694824, "learning_rate": 5.902107244573868e-07, "loss": 0.037, "step": 51559 }, { "epoch": 0.9131037291857598, "grad_norm": 0.29043424129486084, "learning_rate": 5.899717795946641e-07, "loss": 0.0386, "step": 51560 }, { "epoch": 0.9131214387227882, "grad_norm": 0.33257803320884705, "learning_rate": 5.897328821394759e-07, "loss": 0.0407, "step": 51561 }, { "epoch": 0.9131391482598166, "grad_norm": 0.5648195743560791, "learning_rate": 5.894940320926101e-07, "loss": 0.0369, "step": 51562 }, { "epoch": 0.9131568577968451, "grad_norm": 0.3666941523551941, "learning_rate": 5.892552294548509e-07, "loss": 0.048, "step": 51563 }, { "epoch": 0.9131745673338735, "grad_norm": 0.7867771983146667, "learning_rate": 5.89016474226986e-07, "loss": 0.0414, "step": 51564 }, { "epoch": 0.9131922768709019, "grad_norm": 0.737113893032074, "learning_rate": 5.887777664097981e-07, "loss": 0.0617, "step": 51565 }, { "epoch": 0.9132099864079304, "grad_norm": 0.22649428248405457, "learning_rate": 5.885391060040734e-07, "loss": 0.0678, "step": 51566 }, { "epoch": 0.9132276959449588, "grad_norm": 0.7815830111503601, "learning_rate": 5.883004930105962e-07, "loss": 0.0517, "step": 51567 }, { "epoch": 0.9132454054819872, "grad_norm": 0.5448862314224243, "learning_rate": 5.880619274301557e-07, "loss": 0.0442, "step": 51568 }, { "epoch": 0.9132631150190156, "grad_norm": 0.7614683508872986, "learning_rate": 5.878234092635299e-07, "loss": 0.0535, "step": 51569 }, { "epoch": 0.9132808245560441, "grad_norm": 1.0689018964767456, "learning_rate": 5.875849385115095e-07, "loss": 0.0635, "step": 51570 }, { "epoch": 0.9132985340930725, "grad_norm": 0.6186307072639465, "learning_rate": 5.873465151748791e-07, "loss": 0.0626, "step": 51571 }, { "epoch": 0.9133162436301009, "grad_norm": 0.47802576422691345, "learning_rate": 5.87108139254418e-07, "loss": 0.0316, "step": 51572 }, { "epoch": 0.9133339531671293, "grad_norm": 0.6327151656150818, "learning_rate": 5.868698107509141e-07, "loss": 0.0455, "step": 51573 }, { "epoch": 0.9133516627041578, "grad_norm": 0.7898351550102234, "learning_rate": 5.866315296651498e-07, "loss": 0.0475, "step": 51574 }, { "epoch": 0.9133693722411862, "grad_norm": 0.7693853378295898, "learning_rate": 5.86393295997913e-07, "loss": 0.0631, "step": 51575 }, { "epoch": 0.9133870817782146, "grad_norm": 0.5627695322036743, "learning_rate": 5.861551097499829e-07, "loss": 0.0388, "step": 51576 }, { "epoch": 0.913404791315243, "grad_norm": 0.6137818098068237, "learning_rate": 5.859169709221423e-07, "loss": 0.0297, "step": 51577 }, { "epoch": 0.9134225008522715, "grad_norm": 0.8087446689605713, "learning_rate": 5.856788795151791e-07, "loss": 0.0568, "step": 51578 }, { "epoch": 0.9134402103892999, "grad_norm": 0.7093061804771423, "learning_rate": 5.85440835529874e-07, "loss": 0.0797, "step": 51579 }, { "epoch": 0.9134579199263283, "grad_norm": 0.33218705654144287, "learning_rate": 5.8520283896701e-07, "loss": 0.0665, "step": 51580 }, { "epoch": 0.9134756294633568, "grad_norm": 0.5337648391723633, "learning_rate": 5.849648898273713e-07, "loss": 0.0578, "step": 51581 }, { "epoch": 0.9134933390003852, "grad_norm": 0.45866063237190247, "learning_rate": 5.847269881117423e-07, "loss": 0.0445, "step": 51582 }, { "epoch": 0.9135110485374136, "grad_norm": 0.7943375706672668, "learning_rate": 5.844891338209008e-07, "loss": 0.0795, "step": 51583 }, { "epoch": 0.913528758074442, "grad_norm": 0.45551201701164246, "learning_rate": 5.842513269556326e-07, "loss": 0.0371, "step": 51584 }, { "epoch": 0.9135464676114705, "grad_norm": 0.5701932311058044, "learning_rate": 5.840135675167191e-07, "loss": 0.0558, "step": 51585 }, { "epoch": 0.9135641771484989, "grad_norm": 0.6092145442962646, "learning_rate": 5.837758555049444e-07, "loss": 0.0445, "step": 51586 }, { "epoch": 0.9135818866855273, "grad_norm": 1.0027092695236206, "learning_rate": 5.835381909210879e-07, "loss": 0.0536, "step": 51587 }, { "epoch": 0.9135995962225557, "grad_norm": 0.34762802720069885, "learning_rate": 5.833005737659325e-07, "loss": 0.0353, "step": 51588 }, { "epoch": 0.9136173057595842, "grad_norm": 0.9602874517440796, "learning_rate": 5.830630040402591e-07, "loss": 0.0779, "step": 51589 }, { "epoch": 0.9136350152966126, "grad_norm": 0.662929892539978, "learning_rate": 5.828254817448519e-07, "loss": 0.0744, "step": 51590 }, { "epoch": 0.913652724833641, "grad_norm": 0.8911082148551941, "learning_rate": 5.82588006880489e-07, "loss": 0.0568, "step": 51591 }, { "epoch": 0.9136704343706694, "grad_norm": 0.5347188711166382, "learning_rate": 5.823505794479544e-07, "loss": 0.0492, "step": 51592 }, { "epoch": 0.9136881439076979, "grad_norm": 0.5619606971740723, "learning_rate": 5.821131994480294e-07, "loss": 0.0742, "step": 51593 }, { "epoch": 0.9137058534447263, "grad_norm": 0.3372042179107666, "learning_rate": 5.818758668814916e-07, "loss": 0.0465, "step": 51594 }, { "epoch": 0.9137235629817547, "grad_norm": 0.7256513833999634, "learning_rate": 5.816385817491254e-07, "loss": 0.0701, "step": 51595 }, { "epoch": 0.9137412725187832, "grad_norm": 0.5828270316123962, "learning_rate": 5.814013440517102e-07, "loss": 0.052, "step": 51596 }, { "epoch": 0.9137589820558116, "grad_norm": 0.472122460603714, "learning_rate": 5.811641537900253e-07, "loss": 0.0506, "step": 51597 }, { "epoch": 0.91377669159284, "grad_norm": 0.30572813749313354, "learning_rate": 5.809270109648501e-07, "loss": 0.0693, "step": 51598 }, { "epoch": 0.9137944011298684, "grad_norm": 0.6468698382377625, "learning_rate": 5.806899155769673e-07, "loss": 0.0577, "step": 51599 }, { "epoch": 0.9138121106668969, "grad_norm": 0.3000425100326538, "learning_rate": 5.804528676271597e-07, "loss": 0.0492, "step": 51600 }, { "epoch": 0.9138298202039253, "grad_norm": 0.7567600011825562, "learning_rate": 5.802158671162e-07, "loss": 0.0469, "step": 51601 }, { "epoch": 0.9138475297409537, "grad_norm": 0.7152292132377625, "learning_rate": 5.799789140448724e-07, "loss": 0.0369, "step": 51602 }, { "epoch": 0.9138652392779821, "grad_norm": 0.8164800405502319, "learning_rate": 5.797420084139582e-07, "loss": 0.0951, "step": 51603 }, { "epoch": 0.9138829488150106, "grad_norm": 0.6622253656387329, "learning_rate": 5.795051502242316e-07, "loss": 0.0468, "step": 51604 }, { "epoch": 0.913900658352039, "grad_norm": 0.6158204078674316, "learning_rate": 5.792683394764752e-07, "loss": 0.0518, "step": 51605 }, { "epoch": 0.9139183678890674, "grad_norm": 0.9727866053581238, "learning_rate": 5.790315761714671e-07, "loss": 0.0541, "step": 51606 }, { "epoch": 0.9139360774260958, "grad_norm": 0.41031384468078613, "learning_rate": 5.78794860309988e-07, "loss": 0.038, "step": 51607 }, { "epoch": 0.9139537869631243, "grad_norm": 0.38470199704170227, "learning_rate": 5.785581918928123e-07, "loss": 0.0652, "step": 51608 }, { "epoch": 0.9139714965001527, "grad_norm": 0.576782763004303, "learning_rate": 5.783215709207229e-07, "loss": 0.0508, "step": 51609 }, { "epoch": 0.9139892060371811, "grad_norm": 0.47808071970939636, "learning_rate": 5.78084997394499e-07, "loss": 0.0472, "step": 51610 }, { "epoch": 0.9140069155742097, "grad_norm": 0.6192752718925476, "learning_rate": 5.778484713149152e-07, "loss": 0.0499, "step": 51611 }, { "epoch": 0.914024625111238, "grad_norm": 0.4040614068508148, "learning_rate": 5.776119926827506e-07, "loss": 0.0605, "step": 51612 }, { "epoch": 0.9140423346482665, "grad_norm": 0.8911104798316956, "learning_rate": 5.773755614987847e-07, "loss": 0.055, "step": 51613 }, { "epoch": 0.9140600441852949, "grad_norm": 0.7187819480895996, "learning_rate": 5.771391777637952e-07, "loss": 0.0423, "step": 51614 }, { "epoch": 0.9140777537223234, "grad_norm": 0.15967242419719696, "learning_rate": 5.769028414785565e-07, "loss": 0.0394, "step": 51615 }, { "epoch": 0.9140954632593518, "grad_norm": 0.3686770498752594, "learning_rate": 5.766665526438497e-07, "loss": 0.0501, "step": 51616 }, { "epoch": 0.9141131727963802, "grad_norm": 0.5998835563659668, "learning_rate": 5.764303112604524e-07, "loss": 0.0542, "step": 51617 }, { "epoch": 0.9141308823334086, "grad_norm": 0.6115874648094177, "learning_rate": 5.76194117329139e-07, "loss": 0.0574, "step": 51618 }, { "epoch": 0.9141485918704371, "grad_norm": 0.5638433694839478, "learning_rate": 5.759579708506873e-07, "loss": 0.0582, "step": 51619 }, { "epoch": 0.9141663014074655, "grad_norm": 0.4272490441799164, "learning_rate": 5.757218718258767e-07, "loss": 0.0344, "step": 51620 }, { "epoch": 0.9141840109444939, "grad_norm": 0.61871737241745, "learning_rate": 5.75485820255483e-07, "loss": 0.0554, "step": 51621 }, { "epoch": 0.9142017204815223, "grad_norm": 0.22300277650356293, "learning_rate": 5.752498161402808e-07, "loss": 0.0464, "step": 51622 }, { "epoch": 0.9142194300185508, "grad_norm": 0.6420227885246277, "learning_rate": 5.750138594810478e-07, "loss": 0.0488, "step": 51623 }, { "epoch": 0.9142371395555792, "grad_norm": 0.7074548602104187, "learning_rate": 5.747779502785583e-07, "loss": 0.0437, "step": 51624 }, { "epoch": 0.9142548490926076, "grad_norm": 0.49196502566337585, "learning_rate": 5.74542088533595e-07, "loss": 0.0868, "step": 51625 }, { "epoch": 0.9142725586296361, "grad_norm": 0.6880024075508118, "learning_rate": 5.743062742469257e-07, "loss": 0.0396, "step": 51626 }, { "epoch": 0.9142902681666645, "grad_norm": 0.5512247681617737, "learning_rate": 5.740705074193281e-07, "loss": 0.0525, "step": 51627 }, { "epoch": 0.9143079777036929, "grad_norm": 0.4675739109516144, "learning_rate": 5.738347880515831e-07, "loss": 0.0539, "step": 51628 }, { "epoch": 0.9143256872407213, "grad_norm": 0.7711718678474426, "learning_rate": 5.735991161444604e-07, "loss": 0.0438, "step": 51629 }, { "epoch": 0.9143433967777498, "grad_norm": 0.6683842539787292, "learning_rate": 5.733634916987374e-07, "loss": 0.0375, "step": 51630 }, { "epoch": 0.9143611063147782, "grad_norm": 0.5439420342445374, "learning_rate": 5.731279147151903e-07, "loss": 0.0364, "step": 51631 }, { "epoch": 0.9143788158518066, "grad_norm": 0.5264267921447754, "learning_rate": 5.72892385194595e-07, "loss": 0.0474, "step": 51632 }, { "epoch": 0.914396525388835, "grad_norm": 0.26311346888542175, "learning_rate": 5.72656903137721e-07, "loss": 0.0361, "step": 51633 }, { "epoch": 0.9144142349258635, "grad_norm": 0.8158488273620605, "learning_rate": 5.724214685453477e-07, "loss": 0.0471, "step": 51634 }, { "epoch": 0.9144319444628919, "grad_norm": 0.5295634269714355, "learning_rate": 5.721860814182494e-07, "loss": 0.0543, "step": 51635 }, { "epoch": 0.9144496539999203, "grad_norm": 0.5848770141601562, "learning_rate": 5.719507417571973e-07, "loss": 0.0506, "step": 51636 }, { "epoch": 0.9144673635369487, "grad_norm": 0.697360634803772, "learning_rate": 5.717154495629706e-07, "loss": 0.0584, "step": 51637 }, { "epoch": 0.9144850730739772, "grad_norm": 0.4316225051879883, "learning_rate": 5.714802048363388e-07, "loss": 0.0377, "step": 51638 }, { "epoch": 0.9145027826110056, "grad_norm": 0.11434037983417511, "learning_rate": 5.712450075780812e-07, "loss": 0.0361, "step": 51639 }, { "epoch": 0.914520492148034, "grad_norm": 0.7251946330070496, "learning_rate": 5.710098577889655e-07, "loss": 0.052, "step": 51640 }, { "epoch": 0.9145382016850625, "grad_norm": 0.5412603616714478, "learning_rate": 5.707747554697679e-07, "loss": 0.0493, "step": 51641 }, { "epoch": 0.9145559112220909, "grad_norm": 0.4969876706600189, "learning_rate": 5.705397006212626e-07, "loss": 0.0387, "step": 51642 }, { "epoch": 0.9145736207591193, "grad_norm": 0.510610818862915, "learning_rate": 5.703046932442224e-07, "loss": 0.0596, "step": 51643 }, { "epoch": 0.9145913302961477, "grad_norm": 0.4459380805492401, "learning_rate": 5.700697333394183e-07, "loss": 0.0413, "step": 51644 }, { "epoch": 0.9146090398331762, "grad_norm": 0.5519764423370361, "learning_rate": 5.698348209076265e-07, "loss": 0.0616, "step": 51645 }, { "epoch": 0.9146267493702046, "grad_norm": 0.6825125217437744, "learning_rate": 5.695999559496195e-07, "loss": 0.0583, "step": 51646 }, { "epoch": 0.914644458907233, "grad_norm": 0.7070289850234985, "learning_rate": 5.693651384661669e-07, "loss": 0.0521, "step": 51647 }, { "epoch": 0.9146621684442614, "grad_norm": 0.7574214339256287, "learning_rate": 5.691303684580445e-07, "loss": 0.0571, "step": 51648 }, { "epoch": 0.9146798779812899, "grad_norm": 0.49550366401672363, "learning_rate": 5.68895645926027e-07, "loss": 0.0494, "step": 51649 }, { "epoch": 0.9146975875183183, "grad_norm": 0.9814578890800476, "learning_rate": 5.686609708708784e-07, "loss": 0.0475, "step": 51650 }, { "epoch": 0.9147152970553467, "grad_norm": 0.5083674192428589, "learning_rate": 5.684263432933784e-07, "loss": 0.0369, "step": 51651 }, { "epoch": 0.9147330065923751, "grad_norm": 0.6346532106399536, "learning_rate": 5.681917631942945e-07, "loss": 0.0363, "step": 51652 }, { "epoch": 0.9147507161294036, "grad_norm": 0.9692013263702393, "learning_rate": 5.679572305744013e-07, "loss": 0.0522, "step": 51653 }, { "epoch": 0.914768425666432, "grad_norm": 0.7419523596763611, "learning_rate": 5.67722745434468e-07, "loss": 0.0629, "step": 51654 }, { "epoch": 0.9147861352034604, "grad_norm": 0.397096186876297, "learning_rate": 5.674883077752673e-07, "loss": 0.0531, "step": 51655 }, { "epoch": 0.9148038447404889, "grad_norm": 0.5630737543106079, "learning_rate": 5.672539175975688e-07, "loss": 0.0768, "step": 51656 }, { "epoch": 0.9148215542775173, "grad_norm": 0.9542708396911621, "learning_rate": 5.670195749021484e-07, "loss": 0.0464, "step": 51657 }, { "epoch": 0.9148392638145457, "grad_norm": 0.7452613115310669, "learning_rate": 5.667852796897722e-07, "loss": 0.0529, "step": 51658 }, { "epoch": 0.9148569733515741, "grad_norm": 0.591210663318634, "learning_rate": 5.665510319612127e-07, "loss": 0.0529, "step": 51659 }, { "epoch": 0.9148746828886026, "grad_norm": 1.1941879987716675, "learning_rate": 5.663168317172413e-07, "loss": 0.0901, "step": 51660 }, { "epoch": 0.914892392425631, "grad_norm": 0.7654917240142822, "learning_rate": 5.660826789586271e-07, "loss": 0.0647, "step": 51661 }, { "epoch": 0.9149101019626594, "grad_norm": 0.4802803099155426, "learning_rate": 5.658485736861396e-07, "loss": 0.0473, "step": 51662 }, { "epoch": 0.9149278114996878, "grad_norm": 0.7475554347038269, "learning_rate": 5.656145159005516e-07, "loss": 0.0498, "step": 51663 }, { "epoch": 0.9149455210367163, "grad_norm": 0.6626152396202087, "learning_rate": 5.653805056026306e-07, "loss": 0.0327, "step": 51664 }, { "epoch": 0.9149632305737447, "grad_norm": 0.6741660833358765, "learning_rate": 5.651465427931479e-07, "loss": 0.0402, "step": 51665 }, { "epoch": 0.9149809401107731, "grad_norm": 0.4980275332927704, "learning_rate": 5.649126274728744e-07, "loss": 0.0354, "step": 51666 }, { "epoch": 0.9149986496478015, "grad_norm": 0.6143314242362976, "learning_rate": 5.646787596425795e-07, "loss": 0.0588, "step": 51667 }, { "epoch": 0.91501635918483, "grad_norm": 0.6699214577674866, "learning_rate": 5.644449393030293e-07, "loss": 0.0298, "step": 51668 }, { "epoch": 0.9150340687218584, "grad_norm": 0.6139305233955383, "learning_rate": 5.642111664549949e-07, "loss": 0.0413, "step": 51669 }, { "epoch": 0.9150517782588868, "grad_norm": 0.5700145363807678, "learning_rate": 5.639774410992471e-07, "loss": 0.0638, "step": 51670 }, { "epoch": 0.9150694877959153, "grad_norm": 0.7270573973655701, "learning_rate": 5.637437632365539e-07, "loss": 0.044, "step": 51671 }, { "epoch": 0.9150871973329437, "grad_norm": 0.5566126704216003, "learning_rate": 5.635101328676829e-07, "loss": 0.0557, "step": 51672 }, { "epoch": 0.9151049068699721, "grad_norm": 0.9437659382820129, "learning_rate": 5.632765499934017e-07, "loss": 0.0491, "step": 51673 }, { "epoch": 0.9151226164070005, "grad_norm": 0.8477873206138611, "learning_rate": 5.630430146144816e-07, "loss": 0.0977, "step": 51674 }, { "epoch": 0.915140325944029, "grad_norm": 0.49301061034202576, "learning_rate": 5.628095267316902e-07, "loss": 0.0354, "step": 51675 }, { "epoch": 0.9151580354810575, "grad_norm": 0.6104121804237366, "learning_rate": 5.625760863457935e-07, "loss": 0.0412, "step": 51676 }, { "epoch": 0.9151757450180859, "grad_norm": 0.3341491222381592, "learning_rate": 5.623426934575626e-07, "loss": 0.0368, "step": 51677 }, { "epoch": 0.9151934545551142, "grad_norm": 0.5691235065460205, "learning_rate": 5.621093480677669e-07, "loss": 0.0432, "step": 51678 }, { "epoch": 0.9152111640921428, "grad_norm": 0.44063499569892883, "learning_rate": 5.618760501771674e-07, "loss": 0.042, "step": 51679 }, { "epoch": 0.9152288736291712, "grad_norm": 0.5195287466049194, "learning_rate": 5.616427997865353e-07, "loss": 0.051, "step": 51680 }, { "epoch": 0.9152465831661996, "grad_norm": 0.4040244519710541, "learning_rate": 5.614095968966415e-07, "loss": 0.0498, "step": 51681 }, { "epoch": 0.915264292703228, "grad_norm": 0.3526958227157593, "learning_rate": 5.611764415082454e-07, "loss": 0.0323, "step": 51682 }, { "epoch": 0.9152820022402565, "grad_norm": 0.576071560382843, "learning_rate": 5.609433336221215e-07, "loss": 0.0635, "step": 51683 }, { "epoch": 0.9152997117772849, "grad_norm": 0.6950812339782715, "learning_rate": 5.607102732390307e-07, "loss": 0.0413, "step": 51684 }, { "epoch": 0.9153174213143133, "grad_norm": 0.19338186085224152, "learning_rate": 5.604772603597441e-07, "loss": 0.027, "step": 51685 }, { "epoch": 0.9153351308513418, "grad_norm": 0.8492754101753235, "learning_rate": 5.602442949850279e-07, "loss": 0.0781, "step": 51686 }, { "epoch": 0.9153528403883702, "grad_norm": 0.3935585021972656, "learning_rate": 5.600113771156462e-07, "loss": 0.0427, "step": 51687 }, { "epoch": 0.9153705499253986, "grad_norm": 0.8096914887428284, "learning_rate": 5.597785067523653e-07, "loss": 0.0733, "step": 51688 }, { "epoch": 0.915388259462427, "grad_norm": 0.8569815158843994, "learning_rate": 5.595456838959562e-07, "loss": 0.0522, "step": 51689 }, { "epoch": 0.9154059689994555, "grad_norm": 0.6959325075149536, "learning_rate": 5.593129085471799e-07, "loss": 0.0651, "step": 51690 }, { "epoch": 0.9154236785364839, "grad_norm": 1.3652421236038208, "learning_rate": 5.590801807068025e-07, "loss": 0.0691, "step": 51691 }, { "epoch": 0.9154413880735123, "grad_norm": 0.5011522173881531, "learning_rate": 5.588475003755917e-07, "loss": 0.0408, "step": 51692 }, { "epoch": 0.9154590976105407, "grad_norm": 0.7544482946395874, "learning_rate": 5.586148675543101e-07, "loss": 0.0552, "step": 51693 }, { "epoch": 0.9154768071475692, "grad_norm": 0.4738534986972809, "learning_rate": 5.583822822437257e-07, "loss": 0.0503, "step": 51694 }, { "epoch": 0.9154945166845976, "grad_norm": 0.47085097432136536, "learning_rate": 5.581497444446026e-07, "loss": 0.0349, "step": 51695 }, { "epoch": 0.915512226221626, "grad_norm": 0.6711204051971436, "learning_rate": 5.579172541577087e-07, "loss": 0.0553, "step": 51696 }, { "epoch": 0.9155299357586544, "grad_norm": 0.5369793176651001, "learning_rate": 5.576848113838035e-07, "loss": 0.0686, "step": 51697 }, { "epoch": 0.9155476452956829, "grad_norm": 0.7862162590026855, "learning_rate": 5.57452416123656e-07, "loss": 0.0679, "step": 51698 }, { "epoch": 0.9155653548327113, "grad_norm": 0.5125111937522888, "learning_rate": 5.57220068378031e-07, "loss": 0.0557, "step": 51699 }, { "epoch": 0.9155830643697397, "grad_norm": 0.5524436831474304, "learning_rate": 5.569877681476876e-07, "loss": 0.0452, "step": 51700 }, { "epoch": 0.9156007739067682, "grad_norm": 0.42413973808288574, "learning_rate": 5.567555154333953e-07, "loss": 0.0497, "step": 51701 }, { "epoch": 0.9156184834437966, "grad_norm": 0.6917080283164978, "learning_rate": 5.565233102359151e-07, "loss": 0.0632, "step": 51702 }, { "epoch": 0.915636192980825, "grad_norm": 0.4331885576248169, "learning_rate": 5.562911525560132e-07, "loss": 0.0636, "step": 51703 }, { "epoch": 0.9156539025178534, "grad_norm": 0.8103378415107727, "learning_rate": 5.56059042394454e-07, "loss": 0.0472, "step": 51704 }, { "epoch": 0.9156716120548819, "grad_norm": 0.6087393164634705, "learning_rate": 5.558269797519983e-07, "loss": 0.0495, "step": 51705 }, { "epoch": 0.9156893215919103, "grad_norm": 1.1241838932037354, "learning_rate": 5.555949646294123e-07, "loss": 0.0739, "step": 51706 }, { "epoch": 0.9157070311289387, "grad_norm": 0.16023746132850647, "learning_rate": 5.553629970274554e-07, "loss": 0.0413, "step": 51707 }, { "epoch": 0.9157247406659671, "grad_norm": 0.3418327271938324, "learning_rate": 5.551310769468953e-07, "loss": 0.0218, "step": 51708 }, { "epoch": 0.9157424502029956, "grad_norm": 0.7035106420516968, "learning_rate": 5.548992043884915e-07, "loss": 0.0657, "step": 51709 }, { "epoch": 0.915760159740024, "grad_norm": 0.9741559624671936, "learning_rate": 5.5466737935301e-07, "loss": 0.0901, "step": 51710 }, { "epoch": 0.9157778692770524, "grad_norm": 0.6317218542098999, "learning_rate": 5.5443560184121e-07, "loss": 0.0644, "step": 51711 }, { "epoch": 0.9157955788140808, "grad_norm": 0.6743703484535217, "learning_rate": 5.542038718538561e-07, "loss": 0.0584, "step": 51712 }, { "epoch": 0.9158132883511093, "grad_norm": 0.5047732591629028, "learning_rate": 5.53972189391711e-07, "loss": 0.0362, "step": 51713 }, { "epoch": 0.9158309978881377, "grad_norm": 0.43110883235931396, "learning_rate": 5.537405544555357e-07, "loss": 0.0597, "step": 51714 }, { "epoch": 0.9158487074251661, "grad_norm": 0.4018012285232544, "learning_rate": 5.53508967046093e-07, "loss": 0.0592, "step": 51715 }, { "epoch": 0.9158664169621946, "grad_norm": 0.4000742435455322, "learning_rate": 5.532774271641439e-07, "loss": 0.039, "step": 51716 }, { "epoch": 0.915884126499223, "grad_norm": 0.5822184681892395, "learning_rate": 5.530459348104528e-07, "loss": 0.0489, "step": 51717 }, { "epoch": 0.9159018360362514, "grad_norm": 0.7236577868461609, "learning_rate": 5.52814489985779e-07, "loss": 0.053, "step": 51718 }, { "epoch": 0.9159195455732798, "grad_norm": 0.9936068654060364, "learning_rate": 5.525830926908837e-07, "loss": 0.052, "step": 51719 }, { "epoch": 0.9159372551103083, "grad_norm": 0.5850989818572998, "learning_rate": 5.523517429265296e-07, "loss": 0.0558, "step": 51720 }, { "epoch": 0.9159549646473367, "grad_norm": 0.7165272235870361, "learning_rate": 5.52120440693476e-07, "loss": 0.0772, "step": 51721 }, { "epoch": 0.9159726741843651, "grad_norm": 0.3081452250480652, "learning_rate": 5.518891859924824e-07, "loss": 0.0718, "step": 51722 }, { "epoch": 0.9159903837213935, "grad_norm": 0.7707245945930481, "learning_rate": 5.516579788243132e-07, "loss": 0.0715, "step": 51723 }, { "epoch": 0.916008093258422, "grad_norm": 0.6072269082069397, "learning_rate": 5.514268191897309e-07, "loss": 0.047, "step": 51724 }, { "epoch": 0.9160258027954504, "grad_norm": 0.33403119444847107, "learning_rate": 5.511957070894919e-07, "loss": 0.0266, "step": 51725 }, { "epoch": 0.9160435123324788, "grad_norm": 0.47444942593574524, "learning_rate": 5.509646425243553e-07, "loss": 0.0578, "step": 51726 }, { "epoch": 0.9160612218695072, "grad_norm": 0.1961306631565094, "learning_rate": 5.507336254950857e-07, "loss": 0.0399, "step": 51727 }, { "epoch": 0.9160789314065357, "grad_norm": 0.5329402685165405, "learning_rate": 5.505026560024423e-07, "loss": 0.0355, "step": 51728 }, { "epoch": 0.9160966409435641, "grad_norm": 1.0030356645584106, "learning_rate": 5.502717340471813e-07, "loss": 0.0623, "step": 51729 }, { "epoch": 0.9161143504805925, "grad_norm": 0.37893837690353394, "learning_rate": 5.500408596300655e-07, "loss": 0.0486, "step": 51730 }, { "epoch": 0.916132060017621, "grad_norm": 0.8327152132987976, "learning_rate": 5.498100327518524e-07, "loss": 0.051, "step": 51731 }, { "epoch": 0.9161497695546494, "grad_norm": 0.8439342975616455, "learning_rate": 5.495792534133049e-07, "loss": 0.0374, "step": 51732 }, { "epoch": 0.9161674790916778, "grad_norm": 0.6352697610855103, "learning_rate": 5.493485216151789e-07, "loss": 0.0745, "step": 51733 }, { "epoch": 0.9161851886287062, "grad_norm": 0.5282902717590332, "learning_rate": 5.491178373582339e-07, "loss": 0.0616, "step": 51734 }, { "epoch": 0.9162028981657347, "grad_norm": 0.6027270555496216, "learning_rate": 5.488872006432327e-07, "loss": 0.0608, "step": 51735 }, { "epoch": 0.9162206077027631, "grad_norm": 0.7622061967849731, "learning_rate": 5.48656611470928e-07, "loss": 0.0622, "step": 51736 }, { "epoch": 0.9162383172397915, "grad_norm": 0.5963694453239441, "learning_rate": 5.484260698420823e-07, "loss": 0.047, "step": 51737 }, { "epoch": 0.9162560267768199, "grad_norm": 0.4070796072483063, "learning_rate": 5.481955757574552e-07, "loss": 0.0422, "step": 51738 }, { "epoch": 0.9162737363138485, "grad_norm": 0.5939236879348755, "learning_rate": 5.479651292177995e-07, "loss": 0.0382, "step": 51739 }, { "epoch": 0.9162914458508769, "grad_norm": 0.5927811861038208, "learning_rate": 5.477347302238777e-07, "loss": 0.0526, "step": 51740 }, { "epoch": 0.9163091553879052, "grad_norm": 0.5971565246582031, "learning_rate": 5.475043787764477e-07, "loss": 0.0618, "step": 51741 }, { "epoch": 0.9163268649249336, "grad_norm": 0.434303343296051, "learning_rate": 5.472740748762655e-07, "loss": 0.0545, "step": 51742 }, { "epoch": 0.9163445744619622, "grad_norm": 0.5342601537704468, "learning_rate": 5.470438185240906e-07, "loss": 0.087, "step": 51743 }, { "epoch": 0.9163622839989906, "grad_norm": 0.7832107543945312, "learning_rate": 5.468136097206788e-07, "loss": 0.0736, "step": 51744 }, { "epoch": 0.916379993536019, "grad_norm": 0.3880767822265625, "learning_rate": 5.465834484667915e-07, "loss": 0.0479, "step": 51745 }, { "epoch": 0.9163977030730475, "grad_norm": 0.5068178772926331, "learning_rate": 5.463533347631794e-07, "loss": 0.0494, "step": 51746 }, { "epoch": 0.9164154126100759, "grad_norm": 0.43490496277809143, "learning_rate": 5.461232686106038e-07, "loss": 0.0423, "step": 51747 }, { "epoch": 0.9164331221471043, "grad_norm": 0.5512584447860718, "learning_rate": 5.458932500098207e-07, "loss": 0.0602, "step": 51748 }, { "epoch": 0.9164508316841327, "grad_norm": 0.6257478594779968, "learning_rate": 5.456632789615878e-07, "loss": 0.0715, "step": 51749 }, { "epoch": 0.9164685412211612, "grad_norm": 0.6702908873558044, "learning_rate": 5.454333554666612e-07, "loss": 0.0476, "step": 51750 }, { "epoch": 0.9164862507581896, "grad_norm": 0.17772555351257324, "learning_rate": 5.452034795257937e-07, "loss": 0.0464, "step": 51751 }, { "epoch": 0.916503960295218, "grad_norm": 0.7094018459320068, "learning_rate": 5.449736511397479e-07, "loss": 0.0562, "step": 51752 }, { "epoch": 0.9165216698322464, "grad_norm": 0.5776634812355042, "learning_rate": 5.44743870309275e-07, "loss": 0.0537, "step": 51753 }, { "epoch": 0.9165393793692749, "grad_norm": 0.6607481837272644, "learning_rate": 5.445141370351326e-07, "loss": 0.0494, "step": 51754 }, { "epoch": 0.9165570889063033, "grad_norm": 0.39510732889175415, "learning_rate": 5.442844513180767e-07, "loss": 0.0707, "step": 51755 }, { "epoch": 0.9165747984433317, "grad_norm": 0.4977991282939911, "learning_rate": 5.440548131588652e-07, "loss": 0.0499, "step": 51756 }, { "epoch": 0.9165925079803601, "grad_norm": 0.47819092869758606, "learning_rate": 5.43825222558249e-07, "loss": 0.0226, "step": 51757 }, { "epoch": 0.9166102175173886, "grad_norm": 0.3292398452758789, "learning_rate": 5.435956795169861e-07, "loss": 0.0248, "step": 51758 }, { "epoch": 0.916627927054417, "grad_norm": 0.8685622811317444, "learning_rate": 5.433661840358306e-07, "loss": 0.065, "step": 51759 }, { "epoch": 0.9166456365914454, "grad_norm": 0.38193821907043457, "learning_rate": 5.431367361155387e-07, "loss": 0.0495, "step": 51760 }, { "epoch": 0.9166633461284739, "grad_norm": 0.7288342118263245, "learning_rate": 5.429073357568648e-07, "loss": 0.0715, "step": 51761 }, { "epoch": 0.9166810556655023, "grad_norm": 0.44328516721725464, "learning_rate": 5.426779829605633e-07, "loss": 0.0552, "step": 51762 }, { "epoch": 0.9166987652025307, "grad_norm": 0.6886236071586609, "learning_rate": 5.424486777273901e-07, "loss": 0.0317, "step": 51763 }, { "epoch": 0.9167164747395591, "grad_norm": 0.5241486430168152, "learning_rate": 5.422194200580965e-07, "loss": 0.0529, "step": 51764 }, { "epoch": 0.9167341842765876, "grad_norm": 0.38694509863853455, "learning_rate": 5.419902099534402e-07, "loss": 0.0299, "step": 51765 }, { "epoch": 0.916751893813616, "grad_norm": 0.5677604675292969, "learning_rate": 5.417610474141738e-07, "loss": 0.0328, "step": 51766 }, { "epoch": 0.9167696033506444, "grad_norm": 0.4823395907878876, "learning_rate": 5.415319324410517e-07, "loss": 0.0529, "step": 51767 }, { "epoch": 0.9167873128876728, "grad_norm": 0.4605943262577057, "learning_rate": 5.41302865034825e-07, "loss": 0.05, "step": 51768 }, { "epoch": 0.9168050224247013, "grad_norm": 0.36265870928764343, "learning_rate": 5.410738451962516e-07, "loss": 0.0588, "step": 51769 }, { "epoch": 0.9168227319617297, "grad_norm": 0.48876819014549255, "learning_rate": 5.408448729260806e-07, "loss": 0.0436, "step": 51770 }, { "epoch": 0.9168404414987581, "grad_norm": 0.6421205401420593, "learning_rate": 5.406159482250683e-07, "loss": 0.0391, "step": 51771 }, { "epoch": 0.9168581510357865, "grad_norm": 0.6565386652946472, "learning_rate": 5.403870710939673e-07, "loss": 0.0381, "step": 51772 }, { "epoch": 0.916875860572815, "grad_norm": 0.6021117568016052, "learning_rate": 5.401582415335304e-07, "loss": 0.0576, "step": 51773 }, { "epoch": 0.9168935701098434, "grad_norm": 0.6475727558135986, "learning_rate": 5.39929459544512e-07, "loss": 0.0463, "step": 51774 }, { "epoch": 0.9169112796468718, "grad_norm": 0.5059409141540527, "learning_rate": 5.397007251276614e-07, "loss": 0.0564, "step": 51775 }, { "epoch": 0.9169289891839003, "grad_norm": 0.5660103559494019, "learning_rate": 5.394720382837331e-07, "loss": 0.0554, "step": 51776 }, { "epoch": 0.9169466987209287, "grad_norm": 0.5597602128982544, "learning_rate": 5.392433990134798e-07, "loss": 0.0679, "step": 51777 }, { "epoch": 0.9169644082579571, "grad_norm": 0.680975615978241, "learning_rate": 5.390148073176526e-07, "loss": 0.0542, "step": 51778 }, { "epoch": 0.9169821177949855, "grad_norm": 0.7218264937400818, "learning_rate": 5.387862631970042e-07, "loss": 0.031, "step": 51779 }, { "epoch": 0.916999827332014, "grad_norm": 0.42594113945961, "learning_rate": 5.38557766652284e-07, "loss": 0.049, "step": 51780 }, { "epoch": 0.9170175368690424, "grad_norm": 0.4797849953174591, "learning_rate": 5.383293176842497e-07, "loss": 0.0469, "step": 51781 }, { "epoch": 0.9170352464060708, "grad_norm": 0.6716399788856506, "learning_rate": 5.381009162936473e-07, "loss": 0.0638, "step": 51782 }, { "epoch": 0.9170529559430992, "grad_norm": 0.5742227435112, "learning_rate": 5.378725624812298e-07, "loss": 0.0595, "step": 51783 }, { "epoch": 0.9170706654801277, "grad_norm": 0.4456401467323303, "learning_rate": 5.376442562477513e-07, "loss": 0.025, "step": 51784 }, { "epoch": 0.9170883750171561, "grad_norm": 0.489480584859848, "learning_rate": 5.37415997593958e-07, "loss": 0.0524, "step": 51785 }, { "epoch": 0.9171060845541845, "grad_norm": 0.7578151226043701, "learning_rate": 5.371877865206026e-07, "loss": 0.063, "step": 51786 }, { "epoch": 0.9171237940912129, "grad_norm": 0.7435094118118286, "learning_rate": 5.369596230284379e-07, "loss": 0.0464, "step": 51787 }, { "epoch": 0.9171415036282414, "grad_norm": 0.5860462188720703, "learning_rate": 5.367315071182133e-07, "loss": 0.0509, "step": 51788 }, { "epoch": 0.9171592131652698, "grad_norm": 0.8591665625572205, "learning_rate": 5.365034387906781e-07, "loss": 0.0975, "step": 51789 }, { "epoch": 0.9171769227022982, "grad_norm": 0.06141146272420883, "learning_rate": 5.36275418046585e-07, "loss": 0.0354, "step": 51790 }, { "epoch": 0.9171946322393267, "grad_norm": 0.6112903356552124, "learning_rate": 5.360474448866837e-07, "loss": 0.0457, "step": 51791 }, { "epoch": 0.9172123417763551, "grad_norm": 0.6050291657447815, "learning_rate": 5.358195193117216e-07, "loss": 0.0316, "step": 51792 }, { "epoch": 0.9172300513133835, "grad_norm": 0.470124214887619, "learning_rate": 5.355916413224498e-07, "loss": 0.079, "step": 51793 }, { "epoch": 0.9172477608504119, "grad_norm": 0.536625862121582, "learning_rate": 5.353638109196196e-07, "loss": 0.0435, "step": 51794 }, { "epoch": 0.9172654703874404, "grad_norm": 0.4942810833454132, "learning_rate": 5.351360281039819e-07, "loss": 0.0513, "step": 51795 }, { "epoch": 0.9172831799244688, "grad_norm": 0.7750155329704285, "learning_rate": 5.349082928762794e-07, "loss": 0.0595, "step": 51796 }, { "epoch": 0.9173008894614972, "grad_norm": 0.6779415011405945, "learning_rate": 5.346806052372682e-07, "loss": 0.0428, "step": 51797 }, { "epoch": 0.9173185989985256, "grad_norm": 0.590251088142395, "learning_rate": 5.344529651876928e-07, "loss": 0.0439, "step": 51798 }, { "epoch": 0.9173363085355541, "grad_norm": 0.36333152651786804, "learning_rate": 5.342253727283042e-07, "loss": 0.048, "step": 51799 }, { "epoch": 0.9173540180725825, "grad_norm": 0.7339887619018555, "learning_rate": 5.339978278598518e-07, "loss": 0.0441, "step": 51800 }, { "epoch": 0.9173717276096109, "grad_norm": 0.18772464990615845, "learning_rate": 5.337703305830832e-07, "loss": 0.0398, "step": 51801 }, { "epoch": 0.9173894371466393, "grad_norm": 0.5076167583465576, "learning_rate": 5.335428808987497e-07, "loss": 0.0505, "step": 51802 }, { "epoch": 0.9174071466836679, "grad_norm": 0.2269386500120163, "learning_rate": 5.333154788075939e-07, "loss": 0.0424, "step": 51803 }, { "epoch": 0.9174248562206962, "grad_norm": 0.8560491800308228, "learning_rate": 5.330881243103669e-07, "loss": 0.0605, "step": 51804 }, { "epoch": 0.9174425657577246, "grad_norm": 0.2897915542125702, "learning_rate": 5.328608174078181e-07, "loss": 0.0391, "step": 51805 }, { "epoch": 0.9174602752947532, "grad_norm": 0.7959892153739929, "learning_rate": 5.326335581006935e-07, "loss": 0.0475, "step": 51806 }, { "epoch": 0.9174779848317816, "grad_norm": 0.6646106839179993, "learning_rate": 5.324063463897411e-07, "loss": 0.0328, "step": 51807 }, { "epoch": 0.91749569436881, "grad_norm": 0.44563326239585876, "learning_rate": 5.321791822757083e-07, "loss": 0.0487, "step": 51808 }, { "epoch": 0.9175134039058384, "grad_norm": 0.4642859697341919, "learning_rate": 5.319520657593429e-07, "loss": 0.0444, "step": 51809 }, { "epoch": 0.9175311134428669, "grad_norm": 0.4211231470108032, "learning_rate": 5.317249968413912e-07, "loss": 0.0498, "step": 51810 }, { "epoch": 0.9175488229798953, "grad_norm": 0.6347415447235107, "learning_rate": 5.314979755226007e-07, "loss": 0.061, "step": 51811 }, { "epoch": 0.9175665325169237, "grad_norm": 0.7973580956459045, "learning_rate": 5.312710018037192e-07, "loss": 0.0619, "step": 51812 }, { "epoch": 0.9175842420539521, "grad_norm": 0.40396422147750854, "learning_rate": 5.310440756854928e-07, "loss": 0.0436, "step": 51813 }, { "epoch": 0.9176019515909806, "grad_norm": 0.6998255252838135, "learning_rate": 5.308171971686676e-07, "loss": 0.0528, "step": 51814 }, { "epoch": 0.917619661128009, "grad_norm": 1.0556809902191162, "learning_rate": 5.305903662539912e-07, "loss": 0.0615, "step": 51815 }, { "epoch": 0.9176373706650374, "grad_norm": 0.4750581383705139, "learning_rate": 5.303635829422099e-07, "loss": 0.0452, "step": 51816 }, { "epoch": 0.9176550802020658, "grad_norm": 0.8125238418579102, "learning_rate": 5.301368472340645e-07, "loss": 0.0513, "step": 51817 }, { "epoch": 0.9176727897390943, "grad_norm": 0.8721326589584351, "learning_rate": 5.29910159130308e-07, "loss": 0.0476, "step": 51818 }, { "epoch": 0.9176904992761227, "grad_norm": 0.724094569683075, "learning_rate": 5.296835186316829e-07, "loss": 0.0609, "step": 51819 }, { "epoch": 0.9177082088131511, "grad_norm": 1.2998926639556885, "learning_rate": 5.294569257389386e-07, "loss": 0.0819, "step": 51820 }, { "epoch": 0.9177259183501796, "grad_norm": 0.8391976356506348, "learning_rate": 5.292303804528149e-07, "loss": 0.0604, "step": 51821 }, { "epoch": 0.917743627887208, "grad_norm": 0.47914764285087585, "learning_rate": 5.29003882774059e-07, "loss": 0.0413, "step": 51822 }, { "epoch": 0.9177613374242364, "grad_norm": 0.7684598565101624, "learning_rate": 5.287774327034189e-07, "loss": 0.0515, "step": 51823 }, { "epoch": 0.9177790469612648, "grad_norm": 0.7734165787696838, "learning_rate": 5.285510302416358e-07, "loss": 0.085, "step": 51824 }, { "epoch": 0.9177967564982933, "grad_norm": 0.4898213744163513, "learning_rate": 5.283246753894555e-07, "loss": 0.0407, "step": 51825 }, { "epoch": 0.9178144660353217, "grad_norm": 1.0103729963302612, "learning_rate": 5.280983681476226e-07, "loss": 0.0824, "step": 51826 }, { "epoch": 0.9178321755723501, "grad_norm": 0.5046969056129456, "learning_rate": 5.278721085168847e-07, "loss": 0.0568, "step": 51827 }, { "epoch": 0.9178498851093785, "grad_norm": 0.6118809580802917, "learning_rate": 5.276458964979813e-07, "loss": 0.036, "step": 51828 }, { "epoch": 0.917867594646407, "grad_norm": 0.8631811738014221, "learning_rate": 5.274197320916602e-07, "loss": 0.0655, "step": 51829 }, { "epoch": 0.9178853041834354, "grad_norm": 0.4786010682582855, "learning_rate": 5.271936152986657e-07, "loss": 0.0338, "step": 51830 }, { "epoch": 0.9179030137204638, "grad_norm": 0.7615160346031189, "learning_rate": 5.269675461197404e-07, "loss": 0.0412, "step": 51831 }, { "epoch": 0.9179207232574922, "grad_norm": 0.3484661877155304, "learning_rate": 5.267415245556273e-07, "loss": 0.028, "step": 51832 }, { "epoch": 0.9179384327945207, "grad_norm": 0.6865903735160828, "learning_rate": 5.26515550607069e-07, "loss": 0.0684, "step": 51833 }, { "epoch": 0.9179561423315491, "grad_norm": 0.9464626312255859, "learning_rate": 5.262896242748133e-07, "loss": 0.0536, "step": 51834 }, { "epoch": 0.9179738518685775, "grad_norm": 0.7314403653144836, "learning_rate": 5.260637455595995e-07, "loss": 0.0503, "step": 51835 }, { "epoch": 0.917991561405606, "grad_norm": 0.7055317163467407, "learning_rate": 5.258379144621722e-07, "loss": 0.0708, "step": 51836 }, { "epoch": 0.9180092709426344, "grad_norm": 0.7555413842201233, "learning_rate": 5.25612130983274e-07, "loss": 0.0665, "step": 51837 }, { "epoch": 0.9180269804796628, "grad_norm": 0.5310285687446594, "learning_rate": 5.253863951236476e-07, "loss": 0.0362, "step": 51838 }, { "epoch": 0.9180446900166912, "grad_norm": 0.41174402832984924, "learning_rate": 5.251607068840358e-07, "loss": 0.056, "step": 51839 }, { "epoch": 0.9180623995537197, "grad_norm": 0.6515800356864929, "learning_rate": 5.249350662651814e-07, "loss": 0.0417, "step": 51840 }, { "epoch": 0.9180801090907481, "grad_norm": 0.5385584831237793, "learning_rate": 5.247094732678287e-07, "loss": 0.043, "step": 51841 }, { "epoch": 0.9180978186277765, "grad_norm": 0.35127130150794983, "learning_rate": 5.244839278927155e-07, "loss": 0.0306, "step": 51842 }, { "epoch": 0.9181155281648049, "grad_norm": 0.5781478881835938, "learning_rate": 5.242584301405862e-07, "loss": 0.0393, "step": 51843 }, { "epoch": 0.9181332377018334, "grad_norm": 0.7881512641906738, "learning_rate": 5.240329800121818e-07, "loss": 0.067, "step": 51844 }, { "epoch": 0.9181509472388618, "grad_norm": 0.5555035471916199, "learning_rate": 5.238075775082485e-07, "loss": 0.0675, "step": 51845 }, { "epoch": 0.9181686567758902, "grad_norm": 0.6760643720626831, "learning_rate": 5.23582222629519e-07, "loss": 0.0492, "step": 51846 }, { "epoch": 0.9181863663129186, "grad_norm": 0.5860021710395813, "learning_rate": 5.233569153767426e-07, "loss": 0.0649, "step": 51847 }, { "epoch": 0.9182040758499471, "grad_norm": 0.5930430293083191, "learning_rate": 5.231316557506588e-07, "loss": 0.0568, "step": 51848 }, { "epoch": 0.9182217853869755, "grad_norm": 0.8339915871620178, "learning_rate": 5.229064437520054e-07, "loss": 0.08, "step": 51849 }, { "epoch": 0.9182394949240039, "grad_norm": 0.5472763180732727, "learning_rate": 5.22681279381525e-07, "loss": 0.0511, "step": 51850 }, { "epoch": 0.9182572044610324, "grad_norm": 1.1560685634613037, "learning_rate": 5.224561626399605e-07, "loss": 0.0656, "step": 51851 }, { "epoch": 0.9182749139980608, "grad_norm": 0.6127113699913025, "learning_rate": 5.222310935280511e-07, "loss": 0.0698, "step": 51852 }, { "epoch": 0.9182926235350892, "grad_norm": 0.289582759141922, "learning_rate": 5.220060720465364e-07, "loss": 0.0249, "step": 51853 }, { "epoch": 0.9183103330721176, "grad_norm": 0.5584491491317749, "learning_rate": 5.217810981961573e-07, "loss": 0.0477, "step": 51854 }, { "epoch": 0.9183280426091461, "grad_norm": 0.820786714553833, "learning_rate": 5.215561719776551e-07, "loss": 0.0876, "step": 51855 }, { "epoch": 0.9183457521461745, "grad_norm": 0.8504220247268677, "learning_rate": 5.213312933917675e-07, "loss": 0.0691, "step": 51856 }, { "epoch": 0.9183634616832029, "grad_norm": 0.6133024096488953, "learning_rate": 5.211064624392353e-07, "loss": 0.0459, "step": 51857 }, { "epoch": 0.9183811712202313, "grad_norm": 0.32246828079223633, "learning_rate": 5.208816791207982e-07, "loss": 0.0452, "step": 51858 }, { "epoch": 0.9183988807572598, "grad_norm": 0.46638578176498413, "learning_rate": 5.206569434371971e-07, "loss": 0.0504, "step": 51859 }, { "epoch": 0.9184165902942882, "grad_norm": 1.1027687788009644, "learning_rate": 5.2043225538917e-07, "loss": 0.0666, "step": 51860 }, { "epoch": 0.9184342998313166, "grad_norm": 0.7741055488586426, "learning_rate": 5.202076149774559e-07, "loss": 0.0716, "step": 51861 }, { "epoch": 0.918452009368345, "grad_norm": 0.5921568870544434, "learning_rate": 5.19983022202793e-07, "loss": 0.0621, "step": 51862 }, { "epoch": 0.9184697189053735, "grad_norm": 0.5977989435195923, "learning_rate": 5.197584770659252e-07, "loss": 0.0402, "step": 51863 }, { "epoch": 0.9184874284424019, "grad_norm": 0.8458514213562012, "learning_rate": 5.19533979567584e-07, "loss": 0.0762, "step": 51864 }, { "epoch": 0.9185051379794303, "grad_norm": 0.5539395809173584, "learning_rate": 5.193095297085121e-07, "loss": 0.0452, "step": 51865 }, { "epoch": 0.9185228475164589, "grad_norm": 0.4964965283870697, "learning_rate": 5.190851274894471e-07, "loss": 0.0495, "step": 51866 }, { "epoch": 0.9185405570534872, "grad_norm": 0.5815337896347046, "learning_rate": 5.188607729111267e-07, "loss": 0.0701, "step": 51867 }, { "epoch": 0.9185582665905156, "grad_norm": 0.5444874167442322, "learning_rate": 5.186364659742904e-07, "loss": 0.0611, "step": 51868 }, { "epoch": 0.918575976127544, "grad_norm": 0.19640690088272095, "learning_rate": 5.18412206679676e-07, "loss": 0.0347, "step": 51869 }, { "epoch": 0.9185936856645726, "grad_norm": 0.16018563508987427, "learning_rate": 5.181879950280211e-07, "loss": 0.0419, "step": 51870 }, { "epoch": 0.918611395201601, "grad_norm": 0.6142697930335999, "learning_rate": 5.179638310200618e-07, "loss": 0.0733, "step": 51871 }, { "epoch": 0.9186291047386294, "grad_norm": 0.8125987648963928, "learning_rate": 5.17739714656536e-07, "loss": 0.0722, "step": 51872 }, { "epoch": 0.9186468142756578, "grad_norm": 0.5921007394790649, "learning_rate": 5.175156459381847e-07, "loss": 0.0463, "step": 51873 }, { "epoch": 0.9186645238126863, "grad_norm": 0.675278902053833, "learning_rate": 5.172916248657405e-07, "loss": 0.0501, "step": 51874 }, { "epoch": 0.9186822333497147, "grad_norm": 0.6716240048408508, "learning_rate": 5.170676514399398e-07, "loss": 0.0781, "step": 51875 }, { "epoch": 0.9186999428867431, "grad_norm": 0.9800664186477661, "learning_rate": 5.168437256615233e-07, "loss": 0.0902, "step": 51876 }, { "epoch": 0.9187176524237715, "grad_norm": 0.5734376311302185, "learning_rate": 5.166198475312273e-07, "loss": 0.0571, "step": 51877 }, { "epoch": 0.9187353619608, "grad_norm": 0.7283717393875122, "learning_rate": 5.163960170497862e-07, "loss": 0.0817, "step": 51878 }, { "epoch": 0.9187530714978284, "grad_norm": 0.7963385581970215, "learning_rate": 5.161722342179376e-07, "loss": 0.058, "step": 51879 }, { "epoch": 0.9187707810348568, "grad_norm": 0.4411248564720154, "learning_rate": 5.159484990364177e-07, "loss": 0.0415, "step": 51880 }, { "epoch": 0.9187884905718853, "grad_norm": 0.5470021367073059, "learning_rate": 5.157248115059609e-07, "loss": 0.0619, "step": 51881 }, { "epoch": 0.9188062001089137, "grad_norm": 0.48315128684043884, "learning_rate": 5.155011716273051e-07, "loss": 0.0369, "step": 51882 }, { "epoch": 0.9188239096459421, "grad_norm": 0.37479522824287415, "learning_rate": 5.15277579401186e-07, "loss": 0.0356, "step": 51883 }, { "epoch": 0.9188416191829705, "grad_norm": 0.3210632801055908, "learning_rate": 5.150540348283367e-07, "loss": 0.0388, "step": 51884 }, { "epoch": 0.918859328719999, "grad_norm": 0.4842297434806824, "learning_rate": 5.148305379094964e-07, "loss": 0.0421, "step": 51885 }, { "epoch": 0.9188770382570274, "grad_norm": 0.7502105236053467, "learning_rate": 5.146070886453979e-07, "loss": 0.0629, "step": 51886 }, { "epoch": 0.9188947477940558, "grad_norm": 0.9067124724388123, "learning_rate": 5.14383687036779e-07, "loss": 0.0557, "step": 51887 }, { "epoch": 0.9189124573310842, "grad_norm": 0.6201159954071045, "learning_rate": 5.141603330843691e-07, "loss": 0.06, "step": 51888 }, { "epoch": 0.9189301668681127, "grad_norm": 0.8810535669326782, "learning_rate": 5.139370267889094e-07, "loss": 0.075, "step": 51889 }, { "epoch": 0.9189478764051411, "grad_norm": 0.4703592360019684, "learning_rate": 5.137137681511289e-07, "loss": 0.05, "step": 51890 }, { "epoch": 0.9189655859421695, "grad_norm": 0.9318180680274963, "learning_rate": 5.134905571717691e-07, "loss": 0.0651, "step": 51891 }, { "epoch": 0.9189832954791979, "grad_norm": 0.4817618131637573, "learning_rate": 5.132673938515575e-07, "loss": 0.0604, "step": 51892 }, { "epoch": 0.9190010050162264, "grad_norm": 0.5781813263893127, "learning_rate": 5.130442781912304e-07, "loss": 0.0387, "step": 51893 }, { "epoch": 0.9190187145532548, "grad_norm": 0.46385458111763, "learning_rate": 5.128212101915219e-07, "loss": 0.0382, "step": 51894 }, { "epoch": 0.9190364240902832, "grad_norm": 0.34030041098594666, "learning_rate": 5.125981898531684e-07, "loss": 0.0368, "step": 51895 }, { "epoch": 0.9190541336273117, "grad_norm": 0.5135340094566345, "learning_rate": 5.123752171768992e-07, "loss": 0.0426, "step": 51896 }, { "epoch": 0.9190718431643401, "grad_norm": 0.3163531720638275, "learning_rate": 5.121522921634519e-07, "loss": 0.0527, "step": 51897 }, { "epoch": 0.9190895527013685, "grad_norm": 1.0412321090698242, "learning_rate": 5.119294148135595e-07, "loss": 0.0838, "step": 51898 }, { "epoch": 0.9191072622383969, "grad_norm": 0.4730290174484253, "learning_rate": 5.117065851279512e-07, "loss": 0.0548, "step": 51899 }, { "epoch": 0.9191249717754254, "grad_norm": 0.34862589836120605, "learning_rate": 5.114838031073648e-07, "loss": 0.0583, "step": 51900 }, { "epoch": 0.9191426813124538, "grad_norm": 0.5448756217956543, "learning_rate": 5.112610687525299e-07, "loss": 0.0471, "step": 51901 }, { "epoch": 0.9191603908494822, "grad_norm": 0.6623635292053223, "learning_rate": 5.110383820641823e-07, "loss": 0.059, "step": 51902 }, { "epoch": 0.9191781003865106, "grad_norm": 0.3703829348087311, "learning_rate": 5.108157430430516e-07, "loss": 0.0589, "step": 51903 }, { "epoch": 0.9191958099235391, "grad_norm": 0.8020554184913635, "learning_rate": 5.105931516898704e-07, "loss": 0.0634, "step": 51904 }, { "epoch": 0.9192135194605675, "grad_norm": 0.3575272858142853, "learning_rate": 5.10370608005375e-07, "loss": 0.0445, "step": 51905 }, { "epoch": 0.9192312289975959, "grad_norm": 0.5374888777732849, "learning_rate": 5.101481119902929e-07, "loss": 0.0593, "step": 51906 }, { "epoch": 0.9192489385346243, "grad_norm": 0.6669178605079651, "learning_rate": 5.099256636453586e-07, "loss": 0.0606, "step": 51907 }, { "epoch": 0.9192666480716528, "grad_norm": 1.0807199478149414, "learning_rate": 5.097032629713016e-07, "loss": 0.0857, "step": 51908 }, { "epoch": 0.9192843576086812, "grad_norm": 1.0013890266418457, "learning_rate": 5.09480909968858e-07, "loss": 0.0682, "step": 51909 }, { "epoch": 0.9193020671457096, "grad_norm": 0.7508838772773743, "learning_rate": 5.092586046387554e-07, "loss": 0.061, "step": 51910 }, { "epoch": 0.9193197766827381, "grad_norm": 0.3292061984539032, "learning_rate": 5.090363469817266e-07, "loss": 0.0494, "step": 51911 }, { "epoch": 0.9193374862197665, "grad_norm": 0.6936826109886169, "learning_rate": 5.088141369985011e-07, "loss": 0.0596, "step": 51912 }, { "epoch": 0.9193551957567949, "grad_norm": 0.5863195061683655, "learning_rate": 5.085919746898132e-07, "loss": 0.0469, "step": 51913 }, { "epoch": 0.9193729052938233, "grad_norm": 0.5660568475723267, "learning_rate": 5.083698600563907e-07, "loss": 0.0725, "step": 51914 }, { "epoch": 0.9193906148308518, "grad_norm": 0.17499323189258575, "learning_rate": 5.081477930989664e-07, "loss": 0.0388, "step": 51915 }, { "epoch": 0.9194083243678802, "grad_norm": 0.752659022808075, "learning_rate": 5.079257738182713e-07, "loss": 0.0608, "step": 51916 }, { "epoch": 0.9194260339049086, "grad_norm": 0.546184778213501, "learning_rate": 5.077038022150332e-07, "loss": 0.0481, "step": 51917 }, { "epoch": 0.919443743441937, "grad_norm": 0.5701320171356201, "learning_rate": 5.074818782899831e-07, "loss": 0.0552, "step": 51918 }, { "epoch": 0.9194614529789655, "grad_norm": 0.43333613872528076, "learning_rate": 5.072600020438539e-07, "loss": 0.0747, "step": 51919 }, { "epoch": 0.9194791625159939, "grad_norm": 0.8439506888389587, "learning_rate": 5.070381734773716e-07, "loss": 0.0562, "step": 51920 }, { "epoch": 0.9194968720530223, "grad_norm": 0.5793442130088806, "learning_rate": 5.068163925912689e-07, "loss": 0.0397, "step": 51921 }, { "epoch": 0.9195145815900507, "grad_norm": 0.6063505411148071, "learning_rate": 5.065946593862735e-07, "loss": 0.0418, "step": 51922 }, { "epoch": 0.9195322911270792, "grad_norm": 0.9667619466781616, "learning_rate": 5.063729738631151e-07, "loss": 0.041, "step": 51923 }, { "epoch": 0.9195500006641076, "grad_norm": 0.7378900051116943, "learning_rate": 5.061513360225245e-07, "loss": 0.0408, "step": 51924 }, { "epoch": 0.919567710201136, "grad_norm": 0.7307468056678772, "learning_rate": 5.059297458652296e-07, "loss": 0.0598, "step": 51925 }, { "epoch": 0.9195854197381645, "grad_norm": 0.6323626041412354, "learning_rate": 5.057082033919613e-07, "loss": 0.0595, "step": 51926 }, { "epoch": 0.9196031292751929, "grad_norm": 0.589565634727478, "learning_rate": 5.054867086034459e-07, "loss": 0.0392, "step": 51927 }, { "epoch": 0.9196208388122213, "grad_norm": 0.6059870719909668, "learning_rate": 5.052652615004144e-07, "loss": 0.0811, "step": 51928 }, { "epoch": 0.9196385483492497, "grad_norm": 0.8855079412460327, "learning_rate": 5.050438620835928e-07, "loss": 0.0458, "step": 51929 }, { "epoch": 0.9196562578862782, "grad_norm": 0.6076086759567261, "learning_rate": 5.048225103537124e-07, "loss": 0.0475, "step": 51930 }, { "epoch": 0.9196739674233066, "grad_norm": 0.5295187830924988, "learning_rate": 5.046012063114975e-07, "loss": 0.0449, "step": 51931 }, { "epoch": 0.919691676960335, "grad_norm": 0.6877386569976807, "learning_rate": 5.04379949957679e-07, "loss": 0.0668, "step": 51932 }, { "epoch": 0.9197093864973634, "grad_norm": 0.7186889052391052, "learning_rate": 5.041587412929832e-07, "loss": 0.0449, "step": 51933 }, { "epoch": 0.919727096034392, "grad_norm": 0.44203415513038635, "learning_rate": 5.039375803181428e-07, "loss": 0.0451, "step": 51934 }, { "epoch": 0.9197448055714204, "grad_norm": 0.5628036856651306, "learning_rate": 5.037164670338789e-07, "loss": 0.0552, "step": 51935 }, { "epoch": 0.9197625151084488, "grad_norm": 0.5681347846984863, "learning_rate": 5.034954014409227e-07, "loss": 0.0408, "step": 51936 }, { "epoch": 0.9197802246454772, "grad_norm": 0.7503439784049988, "learning_rate": 5.0327438354e-07, "loss": 0.0517, "step": 51937 }, { "epoch": 0.9197979341825057, "grad_norm": 0.726431667804718, "learning_rate": 5.030534133318387e-07, "loss": 0.0686, "step": 51938 }, { "epoch": 0.9198156437195341, "grad_norm": 0.6858296990394592, "learning_rate": 5.02832490817165e-07, "loss": 0.079, "step": 51939 }, { "epoch": 0.9198333532565625, "grad_norm": 1.0865440368652344, "learning_rate": 5.026116159967048e-07, "loss": 0.0804, "step": 51940 }, { "epoch": 0.919851062793591, "grad_norm": 0.4542803466320038, "learning_rate": 5.023907888711876e-07, "loss": 0.0585, "step": 51941 }, { "epoch": 0.9198687723306194, "grad_norm": 0.7263712286949158, "learning_rate": 5.021700094413378e-07, "loss": 0.0625, "step": 51942 }, { "epoch": 0.9198864818676478, "grad_norm": 0.5723327994346619, "learning_rate": 5.019492777078816e-07, "loss": 0.0374, "step": 51943 }, { "epoch": 0.9199041914046762, "grad_norm": 0.6112474203109741, "learning_rate": 5.017285936715482e-07, "loss": 0.0349, "step": 51944 }, { "epoch": 0.9199219009417047, "grad_norm": 0.4719648063182831, "learning_rate": 5.015079573330605e-07, "loss": 0.0563, "step": 51945 }, { "epoch": 0.9199396104787331, "grad_norm": 0.3763720989227295, "learning_rate": 5.012873686931446e-07, "loss": 0.0453, "step": 51946 }, { "epoch": 0.9199573200157615, "grad_norm": 0.7474746108055115, "learning_rate": 5.010668277525265e-07, "loss": 0.0448, "step": 51947 }, { "epoch": 0.9199750295527899, "grad_norm": 0.6636554002761841, "learning_rate": 5.008463345119324e-07, "loss": 0.0477, "step": 51948 }, { "epoch": 0.9199927390898184, "grad_norm": 0.6572140455245972, "learning_rate": 5.006258889720883e-07, "loss": 0.0693, "step": 51949 }, { "epoch": 0.9200104486268468, "grad_norm": 0.27189216017723083, "learning_rate": 5.004054911337169e-07, "loss": 0.0362, "step": 51950 }, { "epoch": 0.9200281581638752, "grad_norm": 0.6065279245376587, "learning_rate": 5.001851409975444e-07, "loss": 0.0541, "step": 51951 }, { "epoch": 0.9200458677009036, "grad_norm": 0.36911219358444214, "learning_rate": 4.999648385642969e-07, "loss": 0.0451, "step": 51952 }, { "epoch": 0.9200635772379321, "grad_norm": 0.711868405342102, "learning_rate": 4.997445838346987e-07, "loss": 0.0421, "step": 51953 }, { "epoch": 0.9200812867749605, "grad_norm": 0.44702184200286865, "learning_rate": 4.995243768094743e-07, "loss": 0.0505, "step": 51954 }, { "epoch": 0.9200989963119889, "grad_norm": 0.9757828116416931, "learning_rate": 4.993042174893498e-07, "loss": 0.0663, "step": 51955 }, { "epoch": 0.9201167058490174, "grad_norm": 0.1694028079509735, "learning_rate": 4.990841058750462e-07, "loss": 0.0484, "step": 51956 }, { "epoch": 0.9201344153860458, "grad_norm": 0.8782791495323181, "learning_rate": 4.988640419672896e-07, "loss": 0.0694, "step": 51957 }, { "epoch": 0.9201521249230742, "grad_norm": 0.4613897204399109, "learning_rate": 4.986440257668045e-07, "loss": 0.0618, "step": 51958 }, { "epoch": 0.9201698344601026, "grad_norm": 0.7164164781570435, "learning_rate": 4.98424057274312e-07, "loss": 0.0464, "step": 51959 }, { "epoch": 0.9201875439971311, "grad_norm": 0.7474613785743713, "learning_rate": 4.982041364905382e-07, "loss": 0.0588, "step": 51960 }, { "epoch": 0.9202052535341595, "grad_norm": 0.4022536277770996, "learning_rate": 4.979842634162041e-07, "loss": 0.0385, "step": 51961 }, { "epoch": 0.9202229630711879, "grad_norm": 0.47948694229125977, "learning_rate": 4.977644380520375e-07, "loss": 0.0431, "step": 51962 }, { "epoch": 0.9202406726082163, "grad_norm": 0.3942529857158661, "learning_rate": 4.975446603987577e-07, "loss": 0.0563, "step": 51963 }, { "epoch": 0.9202583821452448, "grad_norm": 0.5950343012809753, "learning_rate": 4.973249304570893e-07, "loss": 0.0456, "step": 51964 }, { "epoch": 0.9202760916822732, "grad_norm": 0.6013069152832031, "learning_rate": 4.971052482277566e-07, "loss": 0.0601, "step": 51965 }, { "epoch": 0.9202938012193016, "grad_norm": 0.30036771297454834, "learning_rate": 4.968856137114807e-07, "loss": 0.0354, "step": 51966 }, { "epoch": 0.92031151075633, "grad_norm": 0.5807933807373047, "learning_rate": 4.966660269089829e-07, "loss": 0.0458, "step": 51967 }, { "epoch": 0.9203292202933585, "grad_norm": 0.7917752861976624, "learning_rate": 4.964464878209873e-07, "loss": 0.0517, "step": 51968 }, { "epoch": 0.9203469298303869, "grad_norm": 0.447965145111084, "learning_rate": 4.962269964482169e-07, "loss": 0.048, "step": 51969 }, { "epoch": 0.9203646393674153, "grad_norm": 0.730832040309906, "learning_rate": 4.960075527913893e-07, "loss": 0.0701, "step": 51970 }, { "epoch": 0.9203823489044438, "grad_norm": 0.7481309175491333, "learning_rate": 4.957881568512323e-07, "loss": 0.0575, "step": 51971 }, { "epoch": 0.9204000584414722, "grad_norm": 0.4854484796524048, "learning_rate": 4.955688086284637e-07, "loss": 0.0691, "step": 51972 }, { "epoch": 0.9204177679785006, "grad_norm": 0.5193274021148682, "learning_rate": 4.953495081238096e-07, "loss": 0.0509, "step": 51973 }, { "epoch": 0.920435477515529, "grad_norm": 0.47869399189949036, "learning_rate": 4.951302553379877e-07, "loss": 0.0433, "step": 51974 }, { "epoch": 0.9204531870525575, "grad_norm": 0.37089747190475464, "learning_rate": 4.949110502717191e-07, "loss": 0.0495, "step": 51975 }, { "epoch": 0.9204708965895859, "grad_norm": 0.8239192962646484, "learning_rate": 4.946918929257283e-07, "loss": 0.0819, "step": 51976 }, { "epoch": 0.9204886061266143, "grad_norm": 0.5883974432945251, "learning_rate": 4.944727833007312e-07, "loss": 0.0413, "step": 51977 }, { "epoch": 0.9205063156636427, "grad_norm": 0.3912472128868103, "learning_rate": 4.942537213974524e-07, "loss": 0.0415, "step": 51978 }, { "epoch": 0.9205240252006712, "grad_norm": 0.6376401782035828, "learning_rate": 4.940347072166113e-07, "loss": 0.0382, "step": 51979 }, { "epoch": 0.9205417347376996, "grad_norm": 0.3840750753879547, "learning_rate": 4.938157407589289e-07, "loss": 0.0483, "step": 51980 }, { "epoch": 0.920559444274728, "grad_norm": 0.5665261745452881, "learning_rate": 4.935968220251263e-07, "loss": 0.0591, "step": 51981 }, { "epoch": 0.9205771538117564, "grad_norm": 1.113918662071228, "learning_rate": 4.933779510159214e-07, "loss": 0.085, "step": 51982 }, { "epoch": 0.9205948633487849, "grad_norm": 0.5189319849014282, "learning_rate": 4.931591277320385e-07, "loss": 0.042, "step": 51983 }, { "epoch": 0.9206125728858133, "grad_norm": 0.4794166088104248, "learning_rate": 4.929403521741937e-07, "loss": 0.0401, "step": 51984 }, { "epoch": 0.9206302824228417, "grad_norm": 0.5566710233688354, "learning_rate": 4.927216243431065e-07, "loss": 0.0723, "step": 51985 }, { "epoch": 0.9206479919598702, "grad_norm": 0.5757532119750977, "learning_rate": 4.92502944239498e-07, "loss": 0.0566, "step": 51986 }, { "epoch": 0.9206657014968986, "grad_norm": 1.0640641450881958, "learning_rate": 4.922843118640907e-07, "loss": 0.0497, "step": 51987 }, { "epoch": 0.920683411033927, "grad_norm": 0.7791633605957031, "learning_rate": 4.920657272175977e-07, "loss": 0.06, "step": 51988 }, { "epoch": 0.9207011205709554, "grad_norm": 0.3779413402080536, "learning_rate": 4.918471903007416e-07, "loss": 0.0406, "step": 51989 }, { "epoch": 0.9207188301079839, "grad_norm": 0.6666238307952881, "learning_rate": 4.916287011142401e-07, "loss": 0.0619, "step": 51990 }, { "epoch": 0.9207365396450123, "grad_norm": 0.3045254647731781, "learning_rate": 4.914102596588127e-07, "loss": 0.0434, "step": 51991 }, { "epoch": 0.9207542491820407, "grad_norm": 0.6018356680870056, "learning_rate": 4.911918659351789e-07, "loss": 0.0419, "step": 51992 }, { "epoch": 0.9207719587190691, "grad_norm": 0.5554841756820679, "learning_rate": 4.909735199440562e-07, "loss": 0.031, "step": 51993 }, { "epoch": 0.9207896682560976, "grad_norm": 0.5966249704360962, "learning_rate": 4.907552216861644e-07, "loss": 0.045, "step": 51994 }, { "epoch": 0.920807377793126, "grad_norm": 0.33142098784446716, "learning_rate": 4.905369711622176e-07, "loss": 0.0534, "step": 51995 }, { "epoch": 0.9208250873301544, "grad_norm": 0.5430798530578613, "learning_rate": 4.903187683729388e-07, "loss": 0.0653, "step": 51996 }, { "epoch": 0.9208427968671828, "grad_norm": 0.40395116806030273, "learning_rate": 4.901006133190438e-07, "loss": 0.0518, "step": 51997 }, { "epoch": 0.9208605064042114, "grad_norm": 0.35438111424446106, "learning_rate": 4.898825060012491e-07, "loss": 0.0424, "step": 51998 }, { "epoch": 0.9208782159412398, "grad_norm": 0.4643591046333313, "learning_rate": 4.896644464202721e-07, "loss": 0.0698, "step": 51999 }, { "epoch": 0.9208959254782682, "grad_norm": 0.4523887634277344, "learning_rate": 4.894464345768323e-07, "loss": 0.0496, "step": 52000 }, { "epoch": 0.9209136350152967, "grad_norm": 0.5908466577529907, "learning_rate": 4.892284704716493e-07, "loss": 0.0404, "step": 52001 }, { "epoch": 0.9209313445523251, "grad_norm": 0.4079414904117584, "learning_rate": 4.890105541054341e-07, "loss": 0.0486, "step": 52002 }, { "epoch": 0.9209490540893535, "grad_norm": 0.720893383026123, "learning_rate": 4.88792685478906e-07, "loss": 0.073, "step": 52003 }, { "epoch": 0.9209667636263819, "grad_norm": 0.5523874759674072, "learning_rate": 4.885748645927829e-07, "loss": 0.0448, "step": 52004 }, { "epoch": 0.9209844731634104, "grad_norm": 0.5058995485305786, "learning_rate": 4.883570914477825e-07, "loss": 0.0522, "step": 52005 }, { "epoch": 0.9210021827004388, "grad_norm": 0.40362367033958435, "learning_rate": 4.881393660446176e-07, "loss": 0.0507, "step": 52006 }, { "epoch": 0.9210198922374672, "grad_norm": 0.7525837421417236, "learning_rate": 4.87921688384006e-07, "loss": 0.0667, "step": 52007 }, { "epoch": 0.9210376017744956, "grad_norm": 0.30920112133026123, "learning_rate": 4.877040584666653e-07, "loss": 0.0605, "step": 52008 }, { "epoch": 0.9210553113115241, "grad_norm": 0.6662702560424805, "learning_rate": 4.874864762933101e-07, "loss": 0.046, "step": 52009 }, { "epoch": 0.9210730208485525, "grad_norm": 0.7305753231048584, "learning_rate": 4.872689418646564e-07, "loss": 0.0672, "step": 52010 }, { "epoch": 0.9210907303855809, "grad_norm": 0.6714496612548828, "learning_rate": 4.870514551814187e-07, "loss": 0.0616, "step": 52011 }, { "epoch": 0.9211084399226093, "grad_norm": 0.6937105059623718, "learning_rate": 4.86834016244318e-07, "loss": 0.053, "step": 52012 }, { "epoch": 0.9211261494596378, "grad_norm": 1.1195993423461914, "learning_rate": 4.866166250540621e-07, "loss": 0.0754, "step": 52013 }, { "epoch": 0.9211438589966662, "grad_norm": 0.7558934688568115, "learning_rate": 4.863992816113688e-07, "loss": 0.0617, "step": 52014 }, { "epoch": 0.9211615685336946, "grad_norm": 0.8613884449005127, "learning_rate": 4.861819859169575e-07, "loss": 0.0322, "step": 52015 }, { "epoch": 0.9211792780707231, "grad_norm": 0.2868879437446594, "learning_rate": 4.859647379715377e-07, "loss": 0.031, "step": 52016 }, { "epoch": 0.9211969876077515, "grad_norm": 0.7601189017295837, "learning_rate": 4.857475377758252e-07, "loss": 0.0589, "step": 52017 }, { "epoch": 0.9212146971447799, "grad_norm": 0.510773241519928, "learning_rate": 4.855303853305349e-07, "loss": 0.0651, "step": 52018 }, { "epoch": 0.9212324066818083, "grad_norm": 0.821075975894928, "learning_rate": 4.853132806363808e-07, "loss": 0.0749, "step": 52019 }, { "epoch": 0.9212501162188368, "grad_norm": 0.8193659782409668, "learning_rate": 4.850962236940792e-07, "loss": 0.0451, "step": 52020 }, { "epoch": 0.9212678257558652, "grad_norm": 0.69124835729599, "learning_rate": 4.848792145043429e-07, "loss": 0.0594, "step": 52021 }, { "epoch": 0.9212855352928936, "grad_norm": 0.4247969388961792, "learning_rate": 4.84662253067888e-07, "loss": 0.0583, "step": 52022 }, { "epoch": 0.921303244829922, "grad_norm": 0.38801875710487366, "learning_rate": 4.844453393854237e-07, "loss": 0.0463, "step": 52023 }, { "epoch": 0.9213209543669505, "grad_norm": 0.6168967485427856, "learning_rate": 4.842284734576663e-07, "loss": 0.0628, "step": 52024 }, { "epoch": 0.9213386639039789, "grad_norm": 0.31099337339401245, "learning_rate": 4.840116552853302e-07, "loss": 0.0341, "step": 52025 }, { "epoch": 0.9213563734410073, "grad_norm": 0.4442463219165802, "learning_rate": 4.837948848691265e-07, "loss": 0.0469, "step": 52026 }, { "epoch": 0.9213740829780358, "grad_norm": 0.8149998188018799, "learning_rate": 4.835781622097696e-07, "loss": 0.0664, "step": 52027 }, { "epoch": 0.9213917925150642, "grad_norm": 0.8363265991210938, "learning_rate": 4.833614873079705e-07, "loss": 0.0762, "step": 52028 }, { "epoch": 0.9214095020520926, "grad_norm": 0.4654955267906189, "learning_rate": 4.831448601644473e-07, "loss": 0.0371, "step": 52029 }, { "epoch": 0.921427211589121, "grad_norm": 0.1979491412639618, "learning_rate": 4.829282807799073e-07, "loss": 0.0401, "step": 52030 }, { "epoch": 0.9214449211261495, "grad_norm": 0.3522655963897705, "learning_rate": 4.827117491550654e-07, "loss": 0.0451, "step": 52031 }, { "epoch": 0.9214626306631779, "grad_norm": 0.46847257018089294, "learning_rate": 4.824952652906323e-07, "loss": 0.0378, "step": 52032 }, { "epoch": 0.9214803402002063, "grad_norm": 0.32096707820892334, "learning_rate": 4.822788291873243e-07, "loss": 0.0493, "step": 52033 }, { "epoch": 0.9214980497372347, "grad_norm": 0.6684183478355408, "learning_rate": 4.820624408458473e-07, "loss": 0.0678, "step": 52034 }, { "epoch": 0.9215157592742632, "grad_norm": 0.7514902353286743, "learning_rate": 4.818461002669178e-07, "loss": 0.0559, "step": 52035 }, { "epoch": 0.9215334688112916, "grad_norm": 0.5974710583686829, "learning_rate": 4.816298074512465e-07, "loss": 0.0746, "step": 52036 }, { "epoch": 0.92155117834832, "grad_norm": 0.510077178478241, "learning_rate": 4.814135623995447e-07, "loss": 0.0736, "step": 52037 }, { "epoch": 0.9215688878853484, "grad_norm": 0.5293263792991638, "learning_rate": 4.811973651125235e-07, "loss": 0.0547, "step": 52038 }, { "epoch": 0.9215865974223769, "grad_norm": 0.45696568489074707, "learning_rate": 4.809812155908939e-07, "loss": 0.0438, "step": 52039 }, { "epoch": 0.9216043069594053, "grad_norm": 0.9526198506355286, "learning_rate": 4.807651138353686e-07, "loss": 0.0939, "step": 52040 }, { "epoch": 0.9216220164964337, "grad_norm": 0.9461745619773865, "learning_rate": 4.805490598466572e-07, "loss": 0.0695, "step": 52041 }, { "epoch": 0.9216397260334622, "grad_norm": 0.44140273332595825, "learning_rate": 4.803330536254708e-07, "loss": 0.0667, "step": 52042 }, { "epoch": 0.9216574355704906, "grad_norm": 0.8264976739883423, "learning_rate": 4.801170951725203e-07, "loss": 0.0575, "step": 52043 }, { "epoch": 0.921675145107519, "grad_norm": 0.6653774380683899, "learning_rate": 4.79901184488517e-07, "loss": 0.0306, "step": 52044 }, { "epoch": 0.9216928546445474, "grad_norm": 0.8494346141815186, "learning_rate": 4.796853215741686e-07, "loss": 0.0873, "step": 52045 }, { "epoch": 0.9217105641815759, "grad_norm": 0.8959094882011414, "learning_rate": 4.794695064301863e-07, "loss": 0.1056, "step": 52046 }, { "epoch": 0.9217282737186043, "grad_norm": 0.7600545883178711, "learning_rate": 4.792537390572809e-07, "loss": 0.0633, "step": 52047 }, { "epoch": 0.9217459832556327, "grad_norm": 0.7614609003067017, "learning_rate": 4.790380194561622e-07, "loss": 0.0663, "step": 52048 }, { "epoch": 0.9217636927926611, "grad_norm": 0.6265732645988464, "learning_rate": 4.78822347627541e-07, "loss": 0.0554, "step": 52049 }, { "epoch": 0.9217814023296896, "grad_norm": 1.027816653251648, "learning_rate": 4.786067235721236e-07, "loss": 0.0772, "step": 52050 }, { "epoch": 0.921799111866718, "grad_norm": 0.4699552357196808, "learning_rate": 4.783911472906244e-07, "loss": 0.0665, "step": 52051 }, { "epoch": 0.9218168214037464, "grad_norm": 0.46325698494911194, "learning_rate": 4.781756187837477e-07, "loss": 0.0718, "step": 52052 }, { "epoch": 0.9218345309407748, "grad_norm": 0.5841720104217529, "learning_rate": 4.779601380522031e-07, "loss": 0.0407, "step": 52053 }, { "epoch": 0.9218522404778033, "grad_norm": 0.968323290348053, "learning_rate": 4.777447050967032e-07, "loss": 0.075, "step": 52054 }, { "epoch": 0.9218699500148317, "grad_norm": 0.5310306549072266, "learning_rate": 4.775293199179542e-07, "loss": 0.0706, "step": 52055 }, { "epoch": 0.9218876595518601, "grad_norm": 0.6760796904563904, "learning_rate": 4.773139825166622e-07, "loss": 0.0603, "step": 52056 }, { "epoch": 0.9219053690888886, "grad_norm": 0.537559449672699, "learning_rate": 4.770986928935383e-07, "loss": 0.0443, "step": 52057 }, { "epoch": 0.921923078625917, "grad_norm": 0.8762097954750061, "learning_rate": 4.768834510492953e-07, "loss": 0.048, "step": 52058 }, { "epoch": 0.9219407881629454, "grad_norm": 0.6394121050834656, "learning_rate": 4.7666825698463257e-07, "loss": 0.0479, "step": 52059 }, { "epoch": 0.9219584976999738, "grad_norm": 0.3140279948711395, "learning_rate": 4.7645311070026454e-07, "loss": 0.0539, "step": 52060 }, { "epoch": 0.9219762072370024, "grad_norm": 0.5463111996650696, "learning_rate": 4.7623801219689733e-07, "loss": 0.0519, "step": 52061 }, { "epoch": 0.9219939167740308, "grad_norm": 0.9012595415115356, "learning_rate": 4.760229614752354e-07, "loss": 0.0829, "step": 52062 }, { "epoch": 0.9220116263110592, "grad_norm": 0.471743643283844, "learning_rate": 4.7580795853598977e-07, "loss": 0.0561, "step": 52063 }, { "epoch": 0.9220293358480876, "grad_norm": 1.0699654817581177, "learning_rate": 4.755930033798683e-07, "loss": 0.0611, "step": 52064 }, { "epoch": 0.9220470453851161, "grad_norm": 0.34369173645973206, "learning_rate": 4.7537809600757707e-07, "loss": 0.0354, "step": 52065 }, { "epoch": 0.9220647549221445, "grad_norm": 0.3861740529537201, "learning_rate": 4.751632364198205e-07, "loss": 0.0544, "step": 52066 }, { "epoch": 0.9220824644591729, "grad_norm": 0.731712281703949, "learning_rate": 4.7494842461730803e-07, "loss": 0.0378, "step": 52067 }, { "epoch": 0.9221001739962013, "grad_norm": 0.2253526896238327, "learning_rate": 4.7473366060074907e-07, "loss": 0.0359, "step": 52068 }, { "epoch": 0.9221178835332298, "grad_norm": 0.7931836843490601, "learning_rate": 4.7451894437084476e-07, "loss": 0.0553, "step": 52069 }, { "epoch": 0.9221355930702582, "grad_norm": 0.3014472424983978, "learning_rate": 4.743042759283045e-07, "loss": 0.0546, "step": 52070 }, { "epoch": 0.9221533026072866, "grad_norm": 0.9760137796401978, "learning_rate": 4.7408965527383273e-07, "loss": 0.0597, "step": 52071 }, { "epoch": 0.9221710121443151, "grad_norm": 0.35942116379737854, "learning_rate": 4.738750824081389e-07, "loss": 0.0694, "step": 52072 }, { "epoch": 0.9221887216813435, "grad_norm": 0.573633074760437, "learning_rate": 4.736605573319258e-07, "loss": 0.0501, "step": 52073 }, { "epoch": 0.9222064312183719, "grad_norm": 0.5892528295516968, "learning_rate": 4.7344608004590115e-07, "loss": 0.051, "step": 52074 }, { "epoch": 0.9222241407554003, "grad_norm": 0.7983719706535339, "learning_rate": 4.732316505507678e-07, "loss": 0.058, "step": 52075 }, { "epoch": 0.9222418502924288, "grad_norm": 0.7020514607429504, "learning_rate": 4.730172688472334e-07, "loss": 0.0579, "step": 52076 }, { "epoch": 0.9222595598294572, "grad_norm": 0.5758486390113831, "learning_rate": 4.728029349360041e-07, "loss": 0.0527, "step": 52077 }, { "epoch": 0.9222772693664856, "grad_norm": 0.40068426728248596, "learning_rate": 4.725886488177827e-07, "loss": 0.043, "step": 52078 }, { "epoch": 0.922294978903514, "grad_norm": 0.3387051224708557, "learning_rate": 4.72374410493277e-07, "loss": 0.0364, "step": 52079 }, { "epoch": 0.9223126884405425, "grad_norm": 0.6643640398979187, "learning_rate": 4.72160219963188e-07, "loss": 0.0785, "step": 52080 }, { "epoch": 0.9223303979775709, "grad_norm": 0.7792378067970276, "learning_rate": 4.719460772282236e-07, "loss": 0.0536, "step": 52081 }, { "epoch": 0.9223481075145993, "grad_norm": 0.5222831964492798, "learning_rate": 4.717319822890864e-07, "loss": 0.0393, "step": 52082 }, { "epoch": 0.9223658170516277, "grad_norm": 0.31991150975227356, "learning_rate": 4.7151793514648435e-07, "loss": 0.0284, "step": 52083 }, { "epoch": 0.9223835265886562, "grad_norm": 0.3376482427120209, "learning_rate": 4.713039358011151e-07, "loss": 0.0613, "step": 52084 }, { "epoch": 0.9224012361256846, "grad_norm": 0.43681803345680237, "learning_rate": 4.710899842536881e-07, "loss": 0.0435, "step": 52085 }, { "epoch": 0.922418945662713, "grad_norm": 0.4256456792354584, "learning_rate": 4.708760805049045e-07, "loss": 0.0278, "step": 52086 }, { "epoch": 0.9224366551997415, "grad_norm": 0.37098297476768494, "learning_rate": 4.7066222455547036e-07, "loss": 0.0278, "step": 52087 }, { "epoch": 0.9224543647367699, "grad_norm": 0.627186119556427, "learning_rate": 4.704484164060868e-07, "loss": 0.0507, "step": 52088 }, { "epoch": 0.9224720742737983, "grad_norm": 0.6526144742965698, "learning_rate": 4.702346560574583e-07, "loss": 0.059, "step": 52089 }, { "epoch": 0.9224897838108267, "grad_norm": 0.3882416784763336, "learning_rate": 4.700209435102909e-07, "loss": 0.04, "step": 52090 }, { "epoch": 0.9225074933478552, "grad_norm": 0.8216524124145508, "learning_rate": 4.698072787652824e-07, "loss": 0.0618, "step": 52091 }, { "epoch": 0.9225252028848836, "grad_norm": 0.6334511637687683, "learning_rate": 4.695936618231389e-07, "loss": 0.0799, "step": 52092 }, { "epoch": 0.922542912421912, "grad_norm": 0.6431090831756592, "learning_rate": 4.693800926845632e-07, "loss": 0.0574, "step": 52093 }, { "epoch": 0.9225606219589404, "grad_norm": 0.6569457054138184, "learning_rate": 4.691665713502563e-07, "loss": 0.0501, "step": 52094 }, { "epoch": 0.9225783314959689, "grad_norm": 0.44413450360298157, "learning_rate": 4.6895309782092113e-07, "loss": 0.039, "step": 52095 }, { "epoch": 0.9225960410329973, "grad_norm": 0.8716744184494019, "learning_rate": 4.687396720972603e-07, "loss": 0.0699, "step": 52096 }, { "epoch": 0.9226137505700257, "grad_norm": 0.9976202845573425, "learning_rate": 4.6852629417998e-07, "loss": 0.0596, "step": 52097 }, { "epoch": 0.9226314601070541, "grad_norm": 0.507382869720459, "learning_rate": 4.6831296406977476e-07, "loss": 0.0646, "step": 52098 }, { "epoch": 0.9226491696440826, "grad_norm": 0.4885583221912384, "learning_rate": 4.680996817673505e-07, "loss": 0.0528, "step": 52099 }, { "epoch": 0.922666879181111, "grad_norm": 0.5923681259155273, "learning_rate": 4.6788644727341177e-07, "loss": 0.0703, "step": 52100 }, { "epoch": 0.9226845887181394, "grad_norm": 0.3398302495479584, "learning_rate": 4.67673260588653e-07, "loss": 0.0308, "step": 52101 }, { "epoch": 0.9227022982551679, "grad_norm": 0.9995454549789429, "learning_rate": 4.6746012171378026e-07, "loss": 0.0383, "step": 52102 }, { "epoch": 0.9227200077921963, "grad_norm": 0.5688176155090332, "learning_rate": 4.6724703064949304e-07, "loss": 0.0386, "step": 52103 }, { "epoch": 0.9227377173292247, "grad_norm": 0.5465218424797058, "learning_rate": 4.6703398739649404e-07, "loss": 0.0371, "step": 52104 }, { "epoch": 0.9227554268662531, "grad_norm": 0.7021109461784363, "learning_rate": 4.6682099195548444e-07, "loss": 0.0501, "step": 52105 }, { "epoch": 0.9227731364032816, "grad_norm": 0.6111364364624023, "learning_rate": 4.66608044327162e-07, "loss": 0.0499, "step": 52106 }, { "epoch": 0.92279084594031, "grad_norm": 0.7218324542045593, "learning_rate": 4.663951445122294e-07, "loss": 0.0436, "step": 52107 }, { "epoch": 0.9228085554773384, "grad_norm": 0.5910868644714355, "learning_rate": 4.6618229251138954e-07, "loss": 0.041, "step": 52108 }, { "epoch": 0.9228262650143668, "grad_norm": 0.6560546159744263, "learning_rate": 4.659694883253368e-07, "loss": 0.0487, "step": 52109 }, { "epoch": 0.9228439745513953, "grad_norm": 0.6607282757759094, "learning_rate": 4.6575673195477565e-07, "loss": 0.0647, "step": 52110 }, { "epoch": 0.9228616840884237, "grad_norm": 0.740502119064331, "learning_rate": 4.6554402340040545e-07, "loss": 0.0576, "step": 52111 }, { "epoch": 0.9228793936254521, "grad_norm": 0.6036199927330017, "learning_rate": 4.653313626629241e-07, "loss": 0.0472, "step": 52112 }, { "epoch": 0.9228971031624805, "grad_norm": 0.2400789111852646, "learning_rate": 4.651187497430326e-07, "loss": 0.0471, "step": 52113 }, { "epoch": 0.922914812699509, "grad_norm": 0.4461500644683838, "learning_rate": 4.6490618464143044e-07, "loss": 0.0423, "step": 52114 }, { "epoch": 0.9229325222365374, "grad_norm": 0.8251405358314514, "learning_rate": 4.64693667358817e-07, "loss": 0.0524, "step": 52115 }, { "epoch": 0.9229502317735658, "grad_norm": 0.6144020557403564, "learning_rate": 4.6448119789589017e-07, "loss": 0.033, "step": 52116 }, { "epoch": 0.9229679413105943, "grad_norm": 0.8463085889816284, "learning_rate": 4.6426877625335096e-07, "loss": 0.0509, "step": 52117 }, { "epoch": 0.9229856508476227, "grad_norm": 0.7039926052093506, "learning_rate": 4.6405640243189883e-07, "loss": 0.059, "step": 52118 }, { "epoch": 0.9230033603846511, "grad_norm": 0.38999179005622864, "learning_rate": 4.638440764322283e-07, "loss": 0.0416, "step": 52119 }, { "epoch": 0.9230210699216795, "grad_norm": 0.6111720204353333, "learning_rate": 4.63631798255042e-07, "loss": 0.0485, "step": 52120 }, { "epoch": 0.923038779458708, "grad_norm": 0.8422754406929016, "learning_rate": 4.6341956790103614e-07, "loss": 0.0343, "step": 52121 }, { "epoch": 0.9230564889957364, "grad_norm": 0.22196249663829803, "learning_rate": 4.6320738537091014e-07, "loss": 0.0455, "step": 52122 }, { "epoch": 0.9230741985327648, "grad_norm": 0.22679319977760315, "learning_rate": 4.629952506653617e-07, "loss": 0.0419, "step": 52123 }, { "epoch": 0.9230919080697932, "grad_norm": 0.5728753805160522, "learning_rate": 4.6278316378508546e-07, "loss": 0.0623, "step": 52124 }, { "epoch": 0.9231096176068218, "grad_norm": 0.5771892666816711, "learning_rate": 4.6257112473078736e-07, "loss": 0.0474, "step": 52125 }, { "epoch": 0.9231273271438502, "grad_norm": 0.43336930871009827, "learning_rate": 4.6235913350315684e-07, "loss": 0.0571, "step": 52126 }, { "epoch": 0.9231450366808786, "grad_norm": 0.608609139919281, "learning_rate": 4.621471901028934e-07, "loss": 0.0743, "step": 52127 }, { "epoch": 0.923162746217907, "grad_norm": 0.4842224419116974, "learning_rate": 4.6193529453069647e-07, "loss": 0.0435, "step": 52128 }, { "epoch": 0.9231804557549355, "grad_norm": 0.7017717957496643, "learning_rate": 4.6172344678726386e-07, "loss": 0.0631, "step": 52129 }, { "epoch": 0.9231981652919639, "grad_norm": 0.8849093914031982, "learning_rate": 4.6151164687328996e-07, "loss": 0.0605, "step": 52130 }, { "epoch": 0.9232158748289923, "grad_norm": 0.2283308207988739, "learning_rate": 4.612998947894709e-07, "loss": 0.0576, "step": 52131 }, { "epoch": 0.9232335843660208, "grad_norm": 0.5155208110809326, "learning_rate": 4.6108819053650443e-07, "loss": 0.0578, "step": 52132 }, { "epoch": 0.9232512939030492, "grad_norm": 0.3909761309623718, "learning_rate": 4.6087653411508833e-07, "loss": 0.0688, "step": 52133 }, { "epoch": 0.9232690034400776, "grad_norm": 0.5607946515083313, "learning_rate": 4.606649255259171e-07, "loss": 0.0528, "step": 52134 }, { "epoch": 0.923286712977106, "grad_norm": 0.47310009598731995, "learning_rate": 4.6045336476968846e-07, "loss": 0.043, "step": 52135 }, { "epoch": 0.9233044225141345, "grad_norm": 0.9100618362426758, "learning_rate": 4.6024185184709853e-07, "loss": 0.0744, "step": 52136 }, { "epoch": 0.9233221320511629, "grad_norm": 0.5423189997673035, "learning_rate": 4.6003038675884014e-07, "loss": 0.0408, "step": 52137 }, { "epoch": 0.9233398415881913, "grad_norm": 0.37687647342681885, "learning_rate": 4.598189695056126e-07, "loss": 0.0416, "step": 52138 }, { "epoch": 0.9233575511252197, "grad_norm": 0.6120592355728149, "learning_rate": 4.5960760008811054e-07, "loss": 0.0349, "step": 52139 }, { "epoch": 0.9233752606622482, "grad_norm": 0.6316000819206238, "learning_rate": 4.5939627850702825e-07, "loss": 0.0574, "step": 52140 }, { "epoch": 0.9233929701992766, "grad_norm": 0.32085731625556946, "learning_rate": 4.591850047630602e-07, "loss": 0.0589, "step": 52141 }, { "epoch": 0.923410679736305, "grad_norm": 0.5653387904167175, "learning_rate": 4.589737788569043e-07, "loss": 0.0662, "step": 52142 }, { "epoch": 0.9234283892733334, "grad_norm": 0.5756369829177856, "learning_rate": 4.5876260078925315e-07, "loss": 0.0506, "step": 52143 }, { "epoch": 0.9234460988103619, "grad_norm": 0.6395599842071533, "learning_rate": 4.5855147056080127e-07, "loss": 0.0528, "step": 52144 }, { "epoch": 0.9234638083473903, "grad_norm": 0.41178256273269653, "learning_rate": 4.583403881722464e-07, "loss": 0.0384, "step": 52145 }, { "epoch": 0.9234815178844187, "grad_norm": 0.6486327052116394, "learning_rate": 4.5812935362427977e-07, "loss": 0.0515, "step": 52146 }, { "epoch": 0.9234992274214472, "grad_norm": 0.6410379409790039, "learning_rate": 4.5791836691759737e-07, "loss": 0.0618, "step": 52147 }, { "epoch": 0.9235169369584756, "grad_norm": 0.4624442160129547, "learning_rate": 4.57707428052892e-07, "loss": 0.0359, "step": 52148 }, { "epoch": 0.923534646495504, "grad_norm": 0.6037682890892029, "learning_rate": 4.5749653703085815e-07, "loss": 0.0424, "step": 52149 }, { "epoch": 0.9235523560325324, "grad_norm": 0.5484550595283508, "learning_rate": 4.572856938521919e-07, "loss": 0.0502, "step": 52150 }, { "epoch": 0.9235700655695609, "grad_norm": 0.5582547187805176, "learning_rate": 4.5707489851758264e-07, "loss": 0.0607, "step": 52151 }, { "epoch": 0.9235877751065893, "grad_norm": 0.6304318904876709, "learning_rate": 4.568641510277266e-07, "loss": 0.0651, "step": 52152 }, { "epoch": 0.9236054846436177, "grad_norm": 0.3886624574661255, "learning_rate": 4.5665345138331484e-07, "loss": 0.0376, "step": 52153 }, { "epoch": 0.9236231941806461, "grad_norm": 0.72170090675354, "learning_rate": 4.564427995850451e-07, "loss": 0.0772, "step": 52154 }, { "epoch": 0.9236409037176746, "grad_norm": 0.38284701108932495, "learning_rate": 4.562321956336052e-07, "loss": 0.0233, "step": 52155 }, { "epoch": 0.923658613254703, "grad_norm": 0.7038163542747498, "learning_rate": 4.560216395296912e-07, "loss": 0.0512, "step": 52156 }, { "epoch": 0.9236763227917314, "grad_norm": 0.744731605052948, "learning_rate": 4.5581113127399766e-07, "loss": 0.0574, "step": 52157 }, { "epoch": 0.9236940323287598, "grad_norm": 0.5455191731452942, "learning_rate": 4.5560067086721057e-07, "loss": 0.046, "step": 52158 }, { "epoch": 0.9237117418657883, "grad_norm": 0.4906006157398224, "learning_rate": 4.553902583100278e-07, "loss": 0.0582, "step": 52159 }, { "epoch": 0.9237294514028167, "grad_norm": 0.5977895259857178, "learning_rate": 4.551798936031404e-07, "loss": 0.0523, "step": 52160 }, { "epoch": 0.9237471609398451, "grad_norm": 0.2816818654537201, "learning_rate": 4.549695767472395e-07, "loss": 0.0367, "step": 52161 }, { "epoch": 0.9237648704768736, "grad_norm": 0.5162453651428223, "learning_rate": 4.547593077430162e-07, "loss": 0.0404, "step": 52162 }, { "epoch": 0.923782580013902, "grad_norm": 0.419709175825119, "learning_rate": 4.54549086591165e-07, "loss": 0.037, "step": 52163 }, { "epoch": 0.9238002895509304, "grad_norm": 0.3790963888168335, "learning_rate": 4.54338913292377e-07, "loss": 0.0382, "step": 52164 }, { "epoch": 0.9238179990879588, "grad_norm": 0.564808189868927, "learning_rate": 4.541287878473416e-07, "loss": 0.0549, "step": 52165 }, { "epoch": 0.9238357086249873, "grad_norm": 0.38466906547546387, "learning_rate": 4.539187102567516e-07, "loss": 0.0401, "step": 52166 }, { "epoch": 0.9238534181620157, "grad_norm": 0.5913566946983337, "learning_rate": 4.537086805212981e-07, "loss": 0.0419, "step": 52167 }, { "epoch": 0.9238711276990441, "grad_norm": 0.5813932418823242, "learning_rate": 4.5349869864167226e-07, "loss": 0.066, "step": 52168 }, { "epoch": 0.9238888372360725, "grad_norm": 0.4122737646102905, "learning_rate": 4.532887646185635e-07, "loss": 0.0419, "step": 52169 }, { "epoch": 0.923906546773101, "grad_norm": 0.49894848465919495, "learning_rate": 4.530788784526629e-07, "loss": 0.055, "step": 52170 }, { "epoch": 0.9239242563101294, "grad_norm": 0.9056538939476013, "learning_rate": 4.528690401446617e-07, "loss": 0.0547, "step": 52171 }, { "epoch": 0.9239419658471578, "grad_norm": 0.6137701272964478, "learning_rate": 4.526592496952492e-07, "loss": 0.0587, "step": 52172 }, { "epoch": 0.9239596753841862, "grad_norm": 0.46366873383522034, "learning_rate": 4.5244950710511823e-07, "loss": 0.0634, "step": 52173 }, { "epoch": 0.9239773849212147, "grad_norm": 0.32525917887687683, "learning_rate": 4.5223981237495495e-07, "loss": 0.0406, "step": 52174 }, { "epoch": 0.9239950944582431, "grad_norm": 0.4469418525695801, "learning_rate": 4.520301655054554e-07, "loss": 0.0369, "step": 52175 }, { "epoch": 0.9240128039952715, "grad_norm": 0.25565072894096375, "learning_rate": 4.5182056649730074e-07, "loss": 0.0306, "step": 52176 }, { "epoch": 0.9240305135323, "grad_norm": 0.36913159489631653, "learning_rate": 4.516110153511871e-07, "loss": 0.0516, "step": 52177 }, { "epoch": 0.9240482230693284, "grad_norm": 0.435830295085907, "learning_rate": 4.5140151206780055e-07, "loss": 0.0305, "step": 52178 }, { "epoch": 0.9240659326063568, "grad_norm": 0.6219201683998108, "learning_rate": 4.5119205664783393e-07, "loss": 0.0523, "step": 52179 }, { "epoch": 0.9240836421433852, "grad_norm": 0.5677458047866821, "learning_rate": 4.5098264909197164e-07, "loss": 0.0586, "step": 52180 }, { "epoch": 0.9241013516804137, "grad_norm": 0.7572108507156372, "learning_rate": 4.507732894009048e-07, "loss": 0.0576, "step": 52181 }, { "epoch": 0.9241190612174421, "grad_norm": 0.534574568271637, "learning_rate": 4.5056397757532284e-07, "loss": 0.0489, "step": 52182 }, { "epoch": 0.9241367707544705, "grad_norm": 0.8050143718719482, "learning_rate": 4.503547136159136e-07, "loss": 0.0832, "step": 52183 }, { "epoch": 0.9241544802914989, "grad_norm": 0.4824163317680359, "learning_rate": 4.501454975233649e-07, "loss": 0.0584, "step": 52184 }, { "epoch": 0.9241721898285274, "grad_norm": 0.546899139881134, "learning_rate": 4.499363292983677e-07, "loss": 0.054, "step": 52185 }, { "epoch": 0.9241898993655558, "grad_norm": 0.5961100459098816, "learning_rate": 4.4972720894160823e-07, "loss": 0.0184, "step": 52186 }, { "epoch": 0.9242076089025842, "grad_norm": 0.5867577791213989, "learning_rate": 4.4951813645377427e-07, "loss": 0.0664, "step": 52187 }, { "epoch": 0.9242253184396126, "grad_norm": 0.4729444980621338, "learning_rate": 4.493091118355536e-07, "loss": 0.0449, "step": 52188 }, { "epoch": 0.9242430279766412, "grad_norm": 0.8037331104278564, "learning_rate": 4.491001350876356e-07, "loss": 0.0493, "step": 52189 }, { "epoch": 0.9242607375136696, "grad_norm": 0.7307093143463135, "learning_rate": 4.488912062107031e-07, "loss": 0.0471, "step": 52190 }, { "epoch": 0.924278447050698, "grad_norm": 0.7643007636070251, "learning_rate": 4.486823252054473e-07, "loss": 0.055, "step": 52191 }, { "epoch": 0.9242961565877265, "grad_norm": 0.8735790848731995, "learning_rate": 4.484734920725575e-07, "loss": 0.06, "step": 52192 }, { "epoch": 0.9243138661247549, "grad_norm": 0.8269504308700562, "learning_rate": 4.4826470681271826e-07, "loss": 0.0838, "step": 52193 }, { "epoch": 0.9243315756617833, "grad_norm": 1.1986335515975952, "learning_rate": 4.4805596942661396e-07, "loss": 0.0968, "step": 52194 }, { "epoch": 0.9243492851988117, "grad_norm": 0.8305543661117554, "learning_rate": 4.4784727991493413e-07, "loss": 0.0465, "step": 52195 }, { "epoch": 0.9243669947358402, "grad_norm": 0.9730607867240906, "learning_rate": 4.476386382783665e-07, "loss": 0.0874, "step": 52196 }, { "epoch": 0.9243847042728686, "grad_norm": 0.4244384169578552, "learning_rate": 4.474300445175955e-07, "loss": 0.0691, "step": 52197 }, { "epoch": 0.924402413809897, "grad_norm": 0.5149878859519958, "learning_rate": 4.472214986333073e-07, "loss": 0.0427, "step": 52198 }, { "epoch": 0.9244201233469254, "grad_norm": 0.4293644428253174, "learning_rate": 4.47013000626188e-07, "loss": 0.0727, "step": 52199 }, { "epoch": 0.9244378328839539, "grad_norm": 0.5262821316719055, "learning_rate": 4.4680455049692373e-07, "loss": 0.0408, "step": 52200 }, { "epoch": 0.9244555424209823, "grad_norm": 0.7144618630409241, "learning_rate": 4.4659614824620055e-07, "loss": 0.05, "step": 52201 }, { "epoch": 0.9244732519580107, "grad_norm": 1.0799403190612793, "learning_rate": 4.46387793874703e-07, "loss": 0.0431, "step": 52202 }, { "epoch": 0.9244909614950391, "grad_norm": 0.4381551444530487, "learning_rate": 4.4617948738312045e-07, "loss": 0.0401, "step": 52203 }, { "epoch": 0.9245086710320676, "grad_norm": 0.38884055614471436, "learning_rate": 4.4597122877213403e-07, "loss": 0.036, "step": 52204 }, { "epoch": 0.924526380569096, "grad_norm": 0.8343769311904907, "learning_rate": 4.457630180424299e-07, "loss": 0.0725, "step": 52205 }, { "epoch": 0.9245440901061244, "grad_norm": 0.6767895221710205, "learning_rate": 4.455548551946942e-07, "loss": 0.0443, "step": 52206 }, { "epoch": 0.9245617996431529, "grad_norm": 0.3410630524158478, "learning_rate": 4.453467402296113e-07, "loss": 0.0374, "step": 52207 }, { "epoch": 0.9245795091801813, "grad_norm": 0.6744155883789062, "learning_rate": 4.4513867314786403e-07, "loss": 0.0714, "step": 52208 }, { "epoch": 0.9245972187172097, "grad_norm": 0.37402573227882385, "learning_rate": 4.4493065395013855e-07, "loss": 0.0415, "step": 52209 }, { "epoch": 0.9246149282542381, "grad_norm": 0.4544616639614105, "learning_rate": 4.4472268263711925e-07, "loss": 0.0544, "step": 52210 }, { "epoch": 0.9246326377912666, "grad_norm": 0.5885413289070129, "learning_rate": 4.445147592094906e-07, "loss": 0.0515, "step": 52211 }, { "epoch": 0.924650347328295, "grad_norm": 0.6336187720298767, "learning_rate": 4.4430688366793536e-07, "loss": 0.0461, "step": 52212 }, { "epoch": 0.9246680568653234, "grad_norm": 0.8684285283088684, "learning_rate": 4.4409905601313804e-07, "loss": 0.0467, "step": 52213 }, { "epoch": 0.9246857664023518, "grad_norm": 0.5464064478874207, "learning_rate": 4.438912762457847e-07, "loss": 0.0447, "step": 52214 }, { "epoch": 0.9247034759393803, "grad_norm": 0.6741040349006653, "learning_rate": 4.436835443665566e-07, "loss": 0.0386, "step": 52215 }, { "epoch": 0.9247211854764087, "grad_norm": 0.3648904263973236, "learning_rate": 4.4347586037613463e-07, "loss": 0.0565, "step": 52216 }, { "epoch": 0.9247388950134371, "grad_norm": 0.4978561997413635, "learning_rate": 4.4326822427520676e-07, "loss": 0.0476, "step": 52217 }, { "epoch": 0.9247566045504655, "grad_norm": 0.4615471065044403, "learning_rate": 4.430606360644557e-07, "loss": 0.0438, "step": 52218 }, { "epoch": 0.924774314087494, "grad_norm": 0.6120392680168152, "learning_rate": 4.4285309574455755e-07, "loss": 0.0525, "step": 52219 }, { "epoch": 0.9247920236245224, "grad_norm": 0.7793987989425659, "learning_rate": 4.4264560331620353e-07, "loss": 0.0754, "step": 52220 }, { "epoch": 0.9248097331615508, "grad_norm": 0.4959701597690582, "learning_rate": 4.4243815878007465e-07, "loss": 0.0406, "step": 52221 }, { "epoch": 0.9248274426985793, "grad_norm": 0.43386128544807434, "learning_rate": 4.422307621368504e-07, "loss": 0.0473, "step": 52222 }, { "epoch": 0.9248451522356077, "grad_norm": 0.5697363615036011, "learning_rate": 4.420234133872136e-07, "loss": 0.0713, "step": 52223 }, { "epoch": 0.9248628617726361, "grad_norm": 0.6071164608001709, "learning_rate": 4.418161125318487e-07, "loss": 0.0548, "step": 52224 }, { "epoch": 0.9248805713096645, "grad_norm": 0.6678429245948792, "learning_rate": 4.4160885957143684e-07, "loss": 0.0598, "step": 52225 }, { "epoch": 0.924898280846693, "grad_norm": 0.34222710132598877, "learning_rate": 4.4140165450665904e-07, "loss": 0.0578, "step": 52226 }, { "epoch": 0.9249159903837214, "grad_norm": 0.7349599599838257, "learning_rate": 4.4119449733819485e-07, "loss": 0.0506, "step": 52227 }, { "epoch": 0.9249336999207498, "grad_norm": 0.45624372363090515, "learning_rate": 4.4098738806673035e-07, "loss": 0.0331, "step": 52228 }, { "epoch": 0.9249514094577782, "grad_norm": 0.5484016537666321, "learning_rate": 4.4078032669294497e-07, "loss": 0.065, "step": 52229 }, { "epoch": 0.9249691189948067, "grad_norm": 0.4128352403640747, "learning_rate": 4.4057331321751825e-07, "loss": 0.0397, "step": 52230 }, { "epoch": 0.9249868285318351, "grad_norm": 0.8056715726852417, "learning_rate": 4.4036634764113295e-07, "loss": 0.0418, "step": 52231 }, { "epoch": 0.9250045380688635, "grad_norm": 0.5743746161460876, "learning_rate": 4.401594299644718e-07, "loss": 0.084, "step": 52232 }, { "epoch": 0.9250222476058919, "grad_norm": 0.5063655972480774, "learning_rate": 4.399525601882126e-07, "loss": 0.031, "step": 52233 }, { "epoch": 0.9250399571429204, "grad_norm": 0.6286135911941528, "learning_rate": 4.397457383130349e-07, "loss": 0.0383, "step": 52234 }, { "epoch": 0.9250576666799488, "grad_norm": 0.7522413730621338, "learning_rate": 4.395389643396247e-07, "loss": 0.0511, "step": 52235 }, { "epoch": 0.9250753762169772, "grad_norm": 1.09554922580719, "learning_rate": 4.3933223826865653e-07, "loss": 0.0889, "step": 52236 }, { "epoch": 0.9250930857540057, "grad_norm": 0.5922645926475525, "learning_rate": 4.391255601008115e-07, "loss": 0.0734, "step": 52237 }, { "epoch": 0.9251107952910341, "grad_norm": 0.494940847158432, "learning_rate": 4.389189298367707e-07, "loss": 0.0377, "step": 52238 }, { "epoch": 0.9251285048280625, "grad_norm": 0.5014171600341797, "learning_rate": 4.387123474772153e-07, "loss": 0.0472, "step": 52239 }, { "epoch": 0.9251462143650909, "grad_norm": 0.7482466697692871, "learning_rate": 4.3850581302282143e-07, "loss": 0.0532, "step": 52240 }, { "epoch": 0.9251639239021194, "grad_norm": 0.42008090019226074, "learning_rate": 4.3829932647427184e-07, "loss": 0.0625, "step": 52241 }, { "epoch": 0.9251816334391478, "grad_norm": 0.5962508320808411, "learning_rate": 4.3809288783224763e-07, "loss": 0.0563, "step": 52242 }, { "epoch": 0.9251993429761762, "grad_norm": 0.44568517804145813, "learning_rate": 4.3788649709742166e-07, "loss": 0.0478, "step": 52243 }, { "epoch": 0.9252170525132046, "grad_norm": 0.532217264175415, "learning_rate": 4.376801542704767e-07, "loss": 0.0405, "step": 52244 }, { "epoch": 0.9252347620502331, "grad_norm": 0.7041668891906738, "learning_rate": 4.374738593520905e-07, "loss": 0.0433, "step": 52245 }, { "epoch": 0.9252524715872615, "grad_norm": 0.5555714964866638, "learning_rate": 4.3726761234294423e-07, "loss": 0.0473, "step": 52246 }, { "epoch": 0.9252701811242899, "grad_norm": 0.2417249232530594, "learning_rate": 4.3706141324371395e-07, "loss": 0.0316, "step": 52247 }, { "epoch": 0.9252878906613183, "grad_norm": 0.7864614129066467, "learning_rate": 4.3685526205507586e-07, "loss": 0.0598, "step": 52248 }, { "epoch": 0.9253056001983468, "grad_norm": 0.47033554315567017, "learning_rate": 4.366491587777127e-07, "loss": 0.065, "step": 52249 }, { "epoch": 0.9253233097353752, "grad_norm": 0.5518405437469482, "learning_rate": 4.3644310341230233e-07, "loss": 0.057, "step": 52250 }, { "epoch": 0.9253410192724036, "grad_norm": 0.42727285623550415, "learning_rate": 4.3623709595952077e-07, "loss": 0.0649, "step": 52251 }, { "epoch": 0.9253587288094322, "grad_norm": 0.35451754927635193, "learning_rate": 4.360311364200459e-07, "loss": 0.0343, "step": 52252 }, { "epoch": 0.9253764383464606, "grad_norm": 0.6279718279838562, "learning_rate": 4.3582522479455544e-07, "loss": 0.0457, "step": 52253 }, { "epoch": 0.925394147883489, "grad_norm": 0.5771094560623169, "learning_rate": 4.3561936108372724e-07, "loss": 0.0538, "step": 52254 }, { "epoch": 0.9254118574205173, "grad_norm": 0.4854632019996643, "learning_rate": 4.354135452882374e-07, "loss": 0.0571, "step": 52255 }, { "epoch": 0.9254295669575459, "grad_norm": 0.9869646430015564, "learning_rate": 4.3520777740876373e-07, "loss": 0.0569, "step": 52256 }, { "epoch": 0.9254472764945743, "grad_norm": 0.28869470953941345, "learning_rate": 4.35002057445984e-07, "loss": 0.0506, "step": 52257 }, { "epoch": 0.9254649860316027, "grad_norm": 0.7416869401931763, "learning_rate": 4.3479638540057265e-07, "loss": 0.0812, "step": 52258 }, { "epoch": 0.925482695568631, "grad_norm": 0.8326901793479919, "learning_rate": 4.345907612732092e-07, "loss": 0.0774, "step": 52259 }, { "epoch": 0.9255004051056596, "grad_norm": 0.594730019569397, "learning_rate": 4.3438518506457135e-07, "loss": 0.0436, "step": 52260 }, { "epoch": 0.925518114642688, "grad_norm": 0.4229791760444641, "learning_rate": 4.3417965677533035e-07, "loss": 0.0468, "step": 52261 }, { "epoch": 0.9255358241797164, "grad_norm": 0.5665107369422913, "learning_rate": 4.3397417640616555e-07, "loss": 0.0586, "step": 52262 }, { "epoch": 0.9255535337167448, "grad_norm": 0.7644052505493164, "learning_rate": 4.337687439577515e-07, "loss": 0.0431, "step": 52263 }, { "epoch": 0.9255712432537733, "grad_norm": 0.7348268628120422, "learning_rate": 4.3356335943076753e-07, "loss": 0.048, "step": 52264 }, { "epoch": 0.9255889527908017, "grad_norm": 0.8900131583213806, "learning_rate": 4.333580228258849e-07, "loss": 0.0707, "step": 52265 }, { "epoch": 0.9256066623278301, "grad_norm": 0.6541231870651245, "learning_rate": 4.331527341437813e-07, "loss": 0.0467, "step": 52266 }, { "epoch": 0.9256243718648586, "grad_norm": 0.5180016756057739, "learning_rate": 4.329474933851313e-07, "loss": 0.0423, "step": 52267 }, { "epoch": 0.925642081401887, "grad_norm": 0.3723561465740204, "learning_rate": 4.327423005506126e-07, "loss": 0.036, "step": 52268 }, { "epoch": 0.9256597909389154, "grad_norm": 0.5395162105560303, "learning_rate": 4.3253715564089635e-07, "loss": 0.0506, "step": 52269 }, { "epoch": 0.9256775004759438, "grad_norm": 0.6158802509307861, "learning_rate": 4.3233205865666034e-07, "loss": 0.0537, "step": 52270 }, { "epoch": 0.9256952100129723, "grad_norm": 0.5911784172058105, "learning_rate": 4.3212700959857907e-07, "loss": 0.0738, "step": 52271 }, { "epoch": 0.9257129195500007, "grad_norm": 0.5646731853485107, "learning_rate": 4.3192200846732533e-07, "loss": 0.0486, "step": 52272 }, { "epoch": 0.9257306290870291, "grad_norm": 0.53873610496521, "learning_rate": 4.317170552635752e-07, "loss": 0.056, "step": 52273 }, { "epoch": 0.9257483386240575, "grad_norm": 0.38756251335144043, "learning_rate": 4.3151214998800315e-07, "loss": 0.0518, "step": 52274 }, { "epoch": 0.925766048161086, "grad_norm": 0.6454432010650635, "learning_rate": 4.31307292641282e-07, "loss": 0.0524, "step": 52275 }, { "epoch": 0.9257837576981144, "grad_norm": 0.8098107576370239, "learning_rate": 4.311024832240845e-07, "loss": 0.0643, "step": 52276 }, { "epoch": 0.9258014672351428, "grad_norm": 0.4760443866252899, "learning_rate": 4.3089772173708687e-07, "loss": 0.0328, "step": 52277 }, { "epoch": 0.9258191767721712, "grad_norm": 0.4428602159023285, "learning_rate": 4.3069300818096515e-07, "loss": 0.0225, "step": 52278 }, { "epoch": 0.9258368863091997, "grad_norm": 0.6259384751319885, "learning_rate": 4.3048834255638715e-07, "loss": 0.076, "step": 52279 }, { "epoch": 0.9258545958462281, "grad_norm": 0.8532649278640747, "learning_rate": 4.30283724864029e-07, "loss": 0.0538, "step": 52280 }, { "epoch": 0.9258723053832565, "grad_norm": 0.7141566276550293, "learning_rate": 4.300791551045635e-07, "loss": 0.0327, "step": 52281 }, { "epoch": 0.925890014920285, "grad_norm": 0.5473171472549438, "learning_rate": 4.2987463327866685e-07, "loss": 0.0647, "step": 52282 }, { "epoch": 0.9259077244573134, "grad_norm": 0.9705451130867004, "learning_rate": 4.296701593870067e-07, "loss": 0.0378, "step": 52283 }, { "epoch": 0.9259254339943418, "grad_norm": 0.6938656568527222, "learning_rate": 4.294657334302576e-07, "loss": 0.058, "step": 52284 }, { "epoch": 0.9259431435313702, "grad_norm": 0.5136986970901489, "learning_rate": 4.292613554090924e-07, "loss": 0.0518, "step": 52285 }, { "epoch": 0.9259608530683987, "grad_norm": 0.5997087955474854, "learning_rate": 4.2905702532418377e-07, "loss": 0.0623, "step": 52286 }, { "epoch": 0.9259785626054271, "grad_norm": 0.3255528211593628, "learning_rate": 4.288527431762046e-07, "loss": 0.0273, "step": 52287 }, { "epoch": 0.9259962721424555, "grad_norm": 0.5962058305740356, "learning_rate": 4.28648508965826e-07, "loss": 0.0583, "step": 52288 }, { "epoch": 0.9260139816794839, "grad_norm": 0.6014037132263184, "learning_rate": 4.2844432269372067e-07, "loss": 0.0703, "step": 52289 }, { "epoch": 0.9260316912165124, "grad_norm": 0.36304762959480286, "learning_rate": 4.282401843605582e-07, "loss": 0.0667, "step": 52290 }, { "epoch": 0.9260494007535408, "grad_norm": 0.9050540924072266, "learning_rate": 4.28036093967013e-07, "loss": 0.0347, "step": 52291 }, { "epoch": 0.9260671102905692, "grad_norm": 0.4988533854484558, "learning_rate": 4.2783205151375626e-07, "loss": 0.0606, "step": 52292 }, { "epoch": 0.9260848198275976, "grad_norm": 0.5525727272033691, "learning_rate": 4.276280570014557e-07, "loss": 0.0445, "step": 52293 }, { "epoch": 0.9261025293646261, "grad_norm": 0.5382878184318542, "learning_rate": 4.274241104307858e-07, "loss": 0.0694, "step": 52294 }, { "epoch": 0.9261202389016545, "grad_norm": 0.6816672682762146, "learning_rate": 4.2722021180241597e-07, "loss": 0.0592, "step": 52295 }, { "epoch": 0.9261379484386829, "grad_norm": 0.783258318901062, "learning_rate": 4.270163611170175e-07, "loss": 0.0518, "step": 52296 }, { "epoch": 0.9261556579757114, "grad_norm": 1.0195913314819336, "learning_rate": 4.2681255837526133e-07, "loss": 0.0839, "step": 52297 }, { "epoch": 0.9261733675127398, "grad_norm": 0.7734736800193787, "learning_rate": 4.2660880357781875e-07, "loss": 0.0723, "step": 52298 }, { "epoch": 0.9261910770497682, "grad_norm": 0.714298665523529, "learning_rate": 4.2640509672536076e-07, "loss": 0.0562, "step": 52299 }, { "epoch": 0.9262087865867966, "grad_norm": 0.611535370349884, "learning_rate": 4.262014378185536e-07, "loss": 0.0465, "step": 52300 }, { "epoch": 0.9262264961238251, "grad_norm": 0.24536338448524475, "learning_rate": 4.2599782685807e-07, "loss": 0.0323, "step": 52301 }, { "epoch": 0.9262442056608535, "grad_norm": 0.8645923733711243, "learning_rate": 4.257942638445794e-07, "loss": 0.0626, "step": 52302 }, { "epoch": 0.9262619151978819, "grad_norm": 0.5035749673843384, "learning_rate": 4.2559074877875304e-07, "loss": 0.0525, "step": 52303 }, { "epoch": 0.9262796247349103, "grad_norm": 0.6801701188087463, "learning_rate": 4.253872816612586e-07, "loss": 0.0337, "step": 52304 }, { "epoch": 0.9262973342719388, "grad_norm": 0.9323290586471558, "learning_rate": 4.25183862492764e-07, "loss": 0.0398, "step": 52305 }, { "epoch": 0.9263150438089672, "grad_norm": 0.285494327545166, "learning_rate": 4.249804912739419e-07, "loss": 0.0334, "step": 52306 }, { "epoch": 0.9263327533459956, "grad_norm": 0.6117009520530701, "learning_rate": 4.247771680054585e-07, "loss": 0.0547, "step": 52307 }, { "epoch": 0.926350462883024, "grad_norm": 0.5612025260925293, "learning_rate": 4.2457389268798497e-07, "loss": 0.0609, "step": 52308 }, { "epoch": 0.9263681724200525, "grad_norm": 0.09814240783452988, "learning_rate": 4.2437066532218906e-07, "loss": 0.044, "step": 52309 }, { "epoch": 0.9263858819570809, "grad_norm": 1.0157312154769897, "learning_rate": 4.241674859087402e-07, "loss": 0.0773, "step": 52310 }, { "epoch": 0.9264035914941093, "grad_norm": 0.6261391639709473, "learning_rate": 4.2396435444830464e-07, "loss": 0.0711, "step": 52311 }, { "epoch": 0.9264213010311378, "grad_norm": 0.5629804730415344, "learning_rate": 4.237612709415534e-07, "loss": 0.0514, "step": 52312 }, { "epoch": 0.9264390105681662, "grad_norm": 0.5511452555656433, "learning_rate": 4.2355823538915094e-07, "loss": 0.058, "step": 52313 }, { "epoch": 0.9264567201051946, "grad_norm": 0.3722648322582245, "learning_rate": 4.2335524779177015e-07, "loss": 0.0318, "step": 52314 }, { "epoch": 0.926474429642223, "grad_norm": 0.47293734550476074, "learning_rate": 4.231523081500738e-07, "loss": 0.049, "step": 52315 }, { "epoch": 0.9264921391792516, "grad_norm": 0.37852105498313904, "learning_rate": 4.2294941646473464e-07, "loss": 0.0286, "step": 52316 }, { "epoch": 0.92650984871628, "grad_norm": 0.704238772392273, "learning_rate": 4.2274657273641717e-07, "loss": 0.0704, "step": 52317 }, { "epoch": 0.9265275582533083, "grad_norm": 0.2850615084171295, "learning_rate": 4.225437769657875e-07, "loss": 0.067, "step": 52318 }, { "epoch": 0.9265452677903367, "grad_norm": 0.7560837864875793, "learning_rate": 4.223410291535151e-07, "loss": 0.0599, "step": 52319 }, { "epoch": 0.9265629773273653, "grad_norm": 0.8214122653007507, "learning_rate": 4.221383293002662e-07, "loss": 0.0771, "step": 52320 }, { "epoch": 0.9265806868643937, "grad_norm": 0.8213425874710083, "learning_rate": 4.2193567740670845e-07, "loss": 0.0702, "step": 52321 }, { "epoch": 0.926598396401422, "grad_norm": 0.5341904163360596, "learning_rate": 4.217330734735081e-07, "loss": 0.0622, "step": 52322 }, { "epoch": 0.9266161059384505, "grad_norm": 0.4662392735481262, "learning_rate": 4.2153051750132954e-07, "loss": 0.058, "step": 52323 }, { "epoch": 0.926633815475479, "grad_norm": 0.650166928768158, "learning_rate": 4.2132800949084226e-07, "loss": 0.064, "step": 52324 }, { "epoch": 0.9266515250125074, "grad_norm": 0.6151546239852905, "learning_rate": 4.2112554944271073e-07, "loss": 0.0603, "step": 52325 }, { "epoch": 0.9266692345495358, "grad_norm": 0.8210981488227844, "learning_rate": 4.2092313735760115e-07, "loss": 0.0553, "step": 52326 }, { "epoch": 0.9266869440865643, "grad_norm": 0.5649627447128296, "learning_rate": 4.207207732361812e-07, "loss": 0.0858, "step": 52327 }, { "epoch": 0.9267046536235927, "grad_norm": 0.5314605236053467, "learning_rate": 4.205184570791154e-07, "loss": 0.0512, "step": 52328 }, { "epoch": 0.9267223631606211, "grad_norm": 0.6378924250602722, "learning_rate": 4.2031618888706826e-07, "loss": 0.0771, "step": 52329 }, { "epoch": 0.9267400726976495, "grad_norm": 0.9638583064079285, "learning_rate": 4.201139686607075e-07, "loss": 0.0578, "step": 52330 }, { "epoch": 0.926757782234678, "grad_norm": 0.6625266671180725, "learning_rate": 4.199117964006977e-07, "loss": 0.0551, "step": 52331 }, { "epoch": 0.9267754917717064, "grad_norm": 0.5224379301071167, "learning_rate": 4.1970967210770153e-07, "loss": 0.0512, "step": 52332 }, { "epoch": 0.9267932013087348, "grad_norm": 0.557620644569397, "learning_rate": 4.1950759578238516e-07, "loss": 0.0546, "step": 52333 }, { "epoch": 0.9268109108457632, "grad_norm": 0.4458160698413849, "learning_rate": 4.193055674254148e-07, "loss": 0.0716, "step": 52334 }, { "epoch": 0.9268286203827917, "grad_norm": 0.5135276913642883, "learning_rate": 4.1910358703745487e-07, "loss": 0.0459, "step": 52335 }, { "epoch": 0.9268463299198201, "grad_norm": 0.5973048210144043, "learning_rate": 4.189016546191682e-07, "loss": 0.0583, "step": 52336 }, { "epoch": 0.9268640394568485, "grad_norm": 0.4360724687576294, "learning_rate": 4.186997701712225e-07, "loss": 0.0227, "step": 52337 }, { "epoch": 0.9268817489938769, "grad_norm": 0.6760678887367249, "learning_rate": 4.1849793369427904e-07, "loss": 0.0573, "step": 52338 }, { "epoch": 0.9268994585309054, "grad_norm": 0.6523155570030212, "learning_rate": 4.1829614518900214e-07, "loss": 0.0481, "step": 52339 }, { "epoch": 0.9269171680679338, "grad_norm": 0.32024890184402466, "learning_rate": 4.1809440465605475e-07, "loss": 0.0314, "step": 52340 }, { "epoch": 0.9269348776049622, "grad_norm": 0.4530807137489319, "learning_rate": 4.178927120961029e-07, "loss": 0.054, "step": 52341 }, { "epoch": 0.9269525871419907, "grad_norm": 0.3835364282131195, "learning_rate": 4.176910675098111e-07, "loss": 0.034, "step": 52342 }, { "epoch": 0.9269702966790191, "grad_norm": 0.36796244978904724, "learning_rate": 4.174894708978372e-07, "loss": 0.0529, "step": 52343 }, { "epoch": 0.9269880062160475, "grad_norm": 0.6229506134986877, "learning_rate": 4.1728792226085055e-07, "loss": 0.0569, "step": 52344 }, { "epoch": 0.9270057157530759, "grad_norm": 0.8486785292625427, "learning_rate": 4.1708642159951237e-07, "loss": 0.0661, "step": 52345 }, { "epoch": 0.9270234252901044, "grad_norm": 0.18360821902751923, "learning_rate": 4.168849689144821e-07, "loss": 0.0653, "step": 52346 }, { "epoch": 0.9270411348271328, "grad_norm": 0.3409564793109894, "learning_rate": 4.166835642064276e-07, "loss": 0.0462, "step": 52347 }, { "epoch": 0.9270588443641612, "grad_norm": 0.5323016047477722, "learning_rate": 4.164822074760066e-07, "loss": 0.0412, "step": 52348 }, { "epoch": 0.9270765539011896, "grad_norm": 0.35156989097595215, "learning_rate": 4.1628089872388695e-07, "loss": 0.0322, "step": 52349 }, { "epoch": 0.9270942634382181, "grad_norm": 0.3354872465133667, "learning_rate": 4.160796379507281e-07, "loss": 0.056, "step": 52350 }, { "epoch": 0.9271119729752465, "grad_norm": 0.450265496969223, "learning_rate": 4.1587842515718956e-07, "loss": 0.0596, "step": 52351 }, { "epoch": 0.9271296825122749, "grad_norm": 0.864037275314331, "learning_rate": 4.156772603439374e-07, "loss": 0.0807, "step": 52352 }, { "epoch": 0.9271473920493033, "grad_norm": 0.7067423462867737, "learning_rate": 4.154761435116311e-07, "loss": 0.0464, "step": 52353 }, { "epoch": 0.9271651015863318, "grad_norm": 0.7704333066940308, "learning_rate": 4.1527507466093353e-07, "loss": 0.0511, "step": 52354 }, { "epoch": 0.9271828111233602, "grad_norm": 0.5905799269676208, "learning_rate": 4.1507405379250574e-07, "loss": 0.0499, "step": 52355 }, { "epoch": 0.9272005206603886, "grad_norm": 0.8803460597991943, "learning_rate": 4.1487308090701226e-07, "loss": 0.0635, "step": 52356 }, { "epoch": 0.9272182301974171, "grad_norm": 0.6183152794837952, "learning_rate": 4.1467215600510754e-07, "loss": 0.0318, "step": 52357 }, { "epoch": 0.9272359397344455, "grad_norm": 0.6454723477363586, "learning_rate": 4.1447127908745606e-07, "loss": 0.0413, "step": 52358 }, { "epoch": 0.9272536492714739, "grad_norm": 0.6035510301589966, "learning_rate": 4.1427045015472063e-07, "loss": 0.0648, "step": 52359 }, { "epoch": 0.9272713588085023, "grad_norm": 0.578400731086731, "learning_rate": 4.1406966920756073e-07, "loss": 0.049, "step": 52360 }, { "epoch": 0.9272890683455308, "grad_norm": 0.5130922794342041, "learning_rate": 4.138689362466341e-07, "loss": 0.0417, "step": 52361 }, { "epoch": 0.9273067778825592, "grad_norm": 0.5350878834724426, "learning_rate": 4.136682512726053e-07, "loss": 0.0556, "step": 52362 }, { "epoch": 0.9273244874195876, "grad_norm": 0.5496750473976135, "learning_rate": 4.1346761428613047e-07, "loss": 0.0628, "step": 52363 }, { "epoch": 0.927342196956616, "grad_norm": 0.5476436614990234, "learning_rate": 4.132670252878723e-07, "loss": 0.0556, "step": 52364 }, { "epoch": 0.9273599064936445, "grad_norm": 0.6648581027984619, "learning_rate": 4.1306648427849203e-07, "loss": 0.0425, "step": 52365 }, { "epoch": 0.9273776160306729, "grad_norm": 0.7508862614631653, "learning_rate": 4.128659912586458e-07, "loss": 0.0646, "step": 52366 }, { "epoch": 0.9273953255677013, "grad_norm": 0.6179839372634888, "learning_rate": 4.126655462289963e-07, "loss": 0.0575, "step": 52367 }, { "epoch": 0.9274130351047297, "grad_norm": 0.4749724268913269, "learning_rate": 4.1246514919020153e-07, "loss": 0.0398, "step": 52368 }, { "epoch": 0.9274307446417582, "grad_norm": 0.5215131044387817, "learning_rate": 4.122648001429208e-07, "loss": 0.054, "step": 52369 }, { "epoch": 0.9274484541787866, "grad_norm": 0.36256980895996094, "learning_rate": 4.120644990878153e-07, "loss": 0.0402, "step": 52370 }, { "epoch": 0.927466163715815, "grad_norm": 1.007516622543335, "learning_rate": 4.118642460255395e-07, "loss": 0.0792, "step": 52371 }, { "epoch": 0.9274838732528435, "grad_norm": 0.5144999623298645, "learning_rate": 4.116640409567529e-07, "loss": 0.0662, "step": 52372 }, { "epoch": 0.9275015827898719, "grad_norm": 0.265257865190506, "learning_rate": 4.1146388388211833e-07, "loss": 0.0418, "step": 52373 }, { "epoch": 0.9275192923269003, "grad_norm": 0.6277675628662109, "learning_rate": 4.112637748022935e-07, "loss": 0.0653, "step": 52374 }, { "epoch": 0.9275370018639287, "grad_norm": 0.5206167101860046, "learning_rate": 4.110637137179346e-07, "loss": 0.048, "step": 52375 }, { "epoch": 0.9275547114009572, "grad_norm": 0.22343496978282928, "learning_rate": 4.108637006296995e-07, "loss": 0.0497, "step": 52376 }, { "epoch": 0.9275724209379856, "grad_norm": 0.6000538468360901, "learning_rate": 4.106637355382492e-07, "loss": 0.0582, "step": 52377 }, { "epoch": 0.927590130475014, "grad_norm": 0.8571567535400391, "learning_rate": 4.104638184442383e-07, "loss": 0.0589, "step": 52378 }, { "epoch": 0.9276078400120424, "grad_norm": 1.0268281698226929, "learning_rate": 4.1026394934832457e-07, "loss": 0.0648, "step": 52379 }, { "epoch": 0.927625549549071, "grad_norm": 0.9683937430381775, "learning_rate": 4.1006412825116914e-07, "loss": 0.0591, "step": 52380 }, { "epoch": 0.9276432590860993, "grad_norm": 0.5130600929260254, "learning_rate": 4.098643551534248e-07, "loss": 0.0549, "step": 52381 }, { "epoch": 0.9276609686231277, "grad_norm": 0.6513689756393433, "learning_rate": 4.0966463005575106e-07, "loss": 0.0379, "step": 52382 }, { "epoch": 0.9276786781601561, "grad_norm": 0.5844817161560059, "learning_rate": 4.094649529588057e-07, "loss": 0.0345, "step": 52383 }, { "epoch": 0.9276963876971847, "grad_norm": 0.7390763759613037, "learning_rate": 4.092653238632449e-07, "loss": 0.0597, "step": 52384 }, { "epoch": 0.927714097234213, "grad_norm": 0.6375546455383301, "learning_rate": 4.0906574276972807e-07, "loss": 0.0408, "step": 52385 }, { "epoch": 0.9277318067712415, "grad_norm": 0.9297927021980286, "learning_rate": 4.0886620967890643e-07, "loss": 0.0626, "step": 52386 }, { "epoch": 0.92774951630827, "grad_norm": 0.3506200909614563, "learning_rate": 4.0866672459143937e-07, "loss": 0.0395, "step": 52387 }, { "epoch": 0.9277672258452984, "grad_norm": 0.7304528951644897, "learning_rate": 4.0846728750798314e-07, "loss": 0.0555, "step": 52388 }, { "epoch": 0.9277849353823268, "grad_norm": 0.7302879095077515, "learning_rate": 4.0826789842919375e-07, "loss": 0.0363, "step": 52389 }, { "epoch": 0.9278026449193552, "grad_norm": 0.46826645731925964, "learning_rate": 4.0806855735572745e-07, "loss": 0.0449, "step": 52390 }, { "epoch": 0.9278203544563837, "grad_norm": 1.1665078401565552, "learning_rate": 4.078692642882387e-07, "loss": 0.0706, "step": 52391 }, { "epoch": 0.9278380639934121, "grad_norm": 0.5300600528717041, "learning_rate": 4.0767001922738357e-07, "loss": 0.0743, "step": 52392 }, { "epoch": 0.9278557735304405, "grad_norm": 0.5566372275352478, "learning_rate": 4.0747082217381995e-07, "loss": 0.0485, "step": 52393 }, { "epoch": 0.9278734830674689, "grad_norm": 0.20304612815380096, "learning_rate": 4.0727167312820055e-07, "loss": 0.0465, "step": 52394 }, { "epoch": 0.9278911926044974, "grad_norm": 0.5210675597190857, "learning_rate": 4.070725720911833e-07, "loss": 0.0637, "step": 52395 }, { "epoch": 0.9279089021415258, "grad_norm": 0.4232426583766937, "learning_rate": 4.068735190634193e-07, "loss": 0.021, "step": 52396 }, { "epoch": 0.9279266116785542, "grad_norm": 0.8839104771614075, "learning_rate": 4.0667451404556465e-07, "loss": 0.0807, "step": 52397 }, { "epoch": 0.9279443212155826, "grad_norm": 0.610700786113739, "learning_rate": 4.0647555703827557e-07, "loss": 0.0624, "step": 52398 }, { "epoch": 0.9279620307526111, "grad_norm": 0.6891996264457703, "learning_rate": 4.062766480422081e-07, "loss": 0.0502, "step": 52399 }, { "epoch": 0.9279797402896395, "grad_norm": 0.6212945580482483, "learning_rate": 4.060777870580118e-07, "loss": 0.0555, "step": 52400 }, { "epoch": 0.9279974498266679, "grad_norm": 0.6146154999732971, "learning_rate": 4.058789740863411e-07, "loss": 0.0593, "step": 52401 }, { "epoch": 0.9280151593636964, "grad_norm": 0.5438938140869141, "learning_rate": 4.0568020912785717e-07, "loss": 0.0465, "step": 52402 }, { "epoch": 0.9280328689007248, "grad_norm": 0.5181488990783691, "learning_rate": 4.0548149218320784e-07, "loss": 0.0564, "step": 52403 }, { "epoch": 0.9280505784377532, "grad_norm": 0.7326322793960571, "learning_rate": 4.0528282325304756e-07, "loss": 0.0402, "step": 52404 }, { "epoch": 0.9280682879747816, "grad_norm": 0.6487946510314941, "learning_rate": 4.050842023380308e-07, "loss": 0.0673, "step": 52405 }, { "epoch": 0.9280859975118101, "grad_norm": 0.7132481932640076, "learning_rate": 4.0488562943881203e-07, "loss": 0.0416, "step": 52406 }, { "epoch": 0.9281037070488385, "grad_norm": 0.6072022914886475, "learning_rate": 4.046871045560424e-07, "loss": 0.0574, "step": 52407 }, { "epoch": 0.9281214165858669, "grad_norm": 0.6964673399925232, "learning_rate": 4.0448862769037477e-07, "loss": 0.0728, "step": 52408 }, { "epoch": 0.9281391261228953, "grad_norm": 0.5537377595901489, "learning_rate": 4.042901988424652e-07, "loss": 0.0794, "step": 52409 }, { "epoch": 0.9281568356599238, "grad_norm": 0.48022717237472534, "learning_rate": 4.0409181801296325e-07, "loss": 0.0608, "step": 52410 }, { "epoch": 0.9281745451969522, "grad_norm": 0.7234416604042053, "learning_rate": 4.0389348520252334e-07, "loss": 0.0613, "step": 52411 }, { "epoch": 0.9281922547339806, "grad_norm": 0.7707555890083313, "learning_rate": 4.0369520041179666e-07, "loss": 0.0649, "step": 52412 }, { "epoch": 0.928209964271009, "grad_norm": 0.977813720703125, "learning_rate": 4.034969636414393e-07, "loss": 0.0665, "step": 52413 }, { "epoch": 0.9282276738080375, "grad_norm": 0.5582472085952759, "learning_rate": 4.032987748920991e-07, "loss": 0.0623, "step": 52414 }, { "epoch": 0.9282453833450659, "grad_norm": 0.650160014629364, "learning_rate": 4.0310063416442887e-07, "loss": 0.0595, "step": 52415 }, { "epoch": 0.9282630928820943, "grad_norm": 0.7998287081718445, "learning_rate": 4.029025414590831e-07, "loss": 0.0421, "step": 52416 }, { "epoch": 0.9282808024191228, "grad_norm": 0.45031264424324036, "learning_rate": 4.027044967767096e-07, "loss": 0.0392, "step": 52417 }, { "epoch": 0.9282985119561512, "grad_norm": 0.626340925693512, "learning_rate": 4.025065001179612e-07, "loss": 0.0533, "step": 52418 }, { "epoch": 0.9283162214931796, "grad_norm": 0.32732754945755005, "learning_rate": 4.0230855148349233e-07, "loss": 0.0201, "step": 52419 }, { "epoch": 0.928333931030208, "grad_norm": 0.4305625557899475, "learning_rate": 4.0211065087394913e-07, "loss": 0.0634, "step": 52420 }, { "epoch": 0.9283516405672365, "grad_norm": 0.6858172416687012, "learning_rate": 4.019127982899862e-07, "loss": 0.05, "step": 52421 }, { "epoch": 0.9283693501042649, "grad_norm": 0.6347889304161072, "learning_rate": 4.017149937322545e-07, "loss": 0.0431, "step": 52422 }, { "epoch": 0.9283870596412933, "grad_norm": 0.3528066575527191, "learning_rate": 4.015172372014037e-07, "loss": 0.0267, "step": 52423 }, { "epoch": 0.9284047691783217, "grad_norm": 0.4157332181930542, "learning_rate": 4.013195286980864e-07, "loss": 0.0391, "step": 52424 }, { "epoch": 0.9284224787153502, "grad_norm": 0.6314237117767334, "learning_rate": 4.0112186822294895e-07, "loss": 0.0528, "step": 52425 }, { "epoch": 0.9284401882523786, "grad_norm": 0.6275570392608643, "learning_rate": 4.0092425577664403e-07, "loss": 0.0415, "step": 52426 }, { "epoch": 0.928457897789407, "grad_norm": 0.5414139032363892, "learning_rate": 4.0072669135982455e-07, "loss": 0.0387, "step": 52427 }, { "epoch": 0.9284756073264354, "grad_norm": 0.3479292094707489, "learning_rate": 4.005291749731349e-07, "loss": 0.0439, "step": 52428 }, { "epoch": 0.9284933168634639, "grad_norm": 0.8558695316314697, "learning_rate": 4.0033170661722797e-07, "loss": 0.0374, "step": 52429 }, { "epoch": 0.9285110264004923, "grad_norm": 0.33851271867752075, "learning_rate": 4.001342862927515e-07, "loss": 0.0435, "step": 52430 }, { "epoch": 0.9285287359375207, "grad_norm": 0.5499335527420044, "learning_rate": 3.9993691400035834e-07, "loss": 0.0464, "step": 52431 }, { "epoch": 0.9285464454745492, "grad_norm": 0.3486257791519165, "learning_rate": 3.997395897406964e-07, "loss": 0.0305, "step": 52432 }, { "epoch": 0.9285641550115776, "grad_norm": 0.6112340688705444, "learning_rate": 3.995423135144133e-07, "loss": 0.0513, "step": 52433 }, { "epoch": 0.928581864548606, "grad_norm": 0.5353341102600098, "learning_rate": 3.9934508532216194e-07, "loss": 0.0388, "step": 52434 }, { "epoch": 0.9285995740856344, "grad_norm": 0.5501811504364014, "learning_rate": 3.991479051645852e-07, "loss": 0.0632, "step": 52435 }, { "epoch": 0.9286172836226629, "grad_norm": 0.42023512721061707, "learning_rate": 3.989507730423358e-07, "loss": 0.0429, "step": 52436 }, { "epoch": 0.9286349931596913, "grad_norm": 0.6584190130233765, "learning_rate": 3.987536889560633e-07, "loss": 0.0579, "step": 52437 }, { "epoch": 0.9286527026967197, "grad_norm": 0.883285641670227, "learning_rate": 3.985566529064122e-07, "loss": 0.0703, "step": 52438 }, { "epoch": 0.9286704122337481, "grad_norm": 0.5365832448005676, "learning_rate": 3.983596648940335e-07, "loss": 0.0338, "step": 52439 }, { "epoch": 0.9286881217707766, "grad_norm": 0.6517507433891296, "learning_rate": 3.9816272491957517e-07, "loss": 0.0423, "step": 52440 }, { "epoch": 0.928705831307805, "grad_norm": 0.5798534154891968, "learning_rate": 3.9796583298368496e-07, "loss": 0.0436, "step": 52441 }, { "epoch": 0.9287235408448334, "grad_norm": 0.5517299771308899, "learning_rate": 3.97768989087009e-07, "loss": 0.0292, "step": 52442 }, { "epoch": 0.9287412503818618, "grad_norm": 0.5428752899169922, "learning_rate": 3.9757219323019676e-07, "loss": 0.0717, "step": 52443 }, { "epoch": 0.9287589599188903, "grad_norm": 0.4920530915260315, "learning_rate": 3.9737544541389447e-07, "loss": 0.044, "step": 52444 }, { "epoch": 0.9287766694559187, "grad_norm": 0.6611679792404175, "learning_rate": 3.971787456387516e-07, "loss": 0.054, "step": 52445 }, { "epoch": 0.9287943789929471, "grad_norm": 0.9675062894821167, "learning_rate": 3.969820939054125e-07, "loss": 0.0528, "step": 52446 }, { "epoch": 0.9288120885299757, "grad_norm": 0.5277894735336304, "learning_rate": 3.9678549021452517e-07, "loss": 0.0532, "step": 52447 }, { "epoch": 0.928829798067004, "grad_norm": 0.5578685998916626, "learning_rate": 3.965889345667373e-07, "loss": 0.0431, "step": 52448 }, { "epoch": 0.9288475076040325, "grad_norm": 0.5582689046859741, "learning_rate": 3.963924269626934e-07, "loss": 0.054, "step": 52449 }, { "epoch": 0.9288652171410609, "grad_norm": 0.5665069222450256, "learning_rate": 3.96195967403043e-07, "loss": 0.0634, "step": 52450 }, { "epoch": 0.9288829266780894, "grad_norm": 0.6835631132125854, "learning_rate": 3.9599955588842885e-07, "loss": 0.0746, "step": 52451 }, { "epoch": 0.9289006362151178, "grad_norm": 0.6201303601264954, "learning_rate": 3.9580319241950214e-07, "loss": 0.0391, "step": 52452 }, { "epoch": 0.9289183457521462, "grad_norm": 0.46231311559677124, "learning_rate": 3.95606876996904e-07, "loss": 0.0531, "step": 52453 }, { "epoch": 0.9289360552891746, "grad_norm": 0.6756781339645386, "learning_rate": 3.954106096212806e-07, "loss": 0.0447, "step": 52454 }, { "epoch": 0.9289537648262031, "grad_norm": 0.2839822769165039, "learning_rate": 3.9521439029328145e-07, "loss": 0.0356, "step": 52455 }, { "epoch": 0.9289714743632315, "grad_norm": 0.7570348978042603, "learning_rate": 3.950182190135493e-07, "loss": 0.0562, "step": 52456 }, { "epoch": 0.9289891839002599, "grad_norm": 0.6201011538505554, "learning_rate": 3.948220957827303e-07, "loss": 0.0486, "step": 52457 }, { "epoch": 0.9290068934372883, "grad_norm": 0.8058412075042725, "learning_rate": 3.9462602060146734e-07, "loss": 0.0523, "step": 52458 }, { "epoch": 0.9290246029743168, "grad_norm": 0.32669612765312195, "learning_rate": 3.944299934704082e-07, "loss": 0.0506, "step": 52459 }, { "epoch": 0.9290423125113452, "grad_norm": 0.8540899753570557, "learning_rate": 3.9423401439019733e-07, "loss": 0.0572, "step": 52460 }, { "epoch": 0.9290600220483736, "grad_norm": 0.4232146739959717, "learning_rate": 3.940380833614809e-07, "loss": 0.0405, "step": 52461 }, { "epoch": 0.9290777315854021, "grad_norm": 0.6419615149497986, "learning_rate": 3.9384220038490016e-07, "loss": 0.0668, "step": 52462 }, { "epoch": 0.9290954411224305, "grad_norm": 0.5699419379234314, "learning_rate": 3.936463654611028e-07, "loss": 0.0471, "step": 52463 }, { "epoch": 0.9291131506594589, "grad_norm": 0.3913312554359436, "learning_rate": 3.9345057859073e-07, "loss": 0.0318, "step": 52464 }, { "epoch": 0.9291308601964873, "grad_norm": 0.9201026558876038, "learning_rate": 3.932548397744279e-07, "loss": 0.0652, "step": 52465 }, { "epoch": 0.9291485697335158, "grad_norm": 0.6589488983154297, "learning_rate": 3.93059149012841e-07, "loss": 0.0431, "step": 52466 }, { "epoch": 0.9291662792705442, "grad_norm": 0.4593784213066101, "learning_rate": 3.928635063066088e-07, "loss": 0.0499, "step": 52467 }, { "epoch": 0.9291839888075726, "grad_norm": 0.5075372457504272, "learning_rate": 3.9266791165638084e-07, "loss": 0.0602, "step": 52468 }, { "epoch": 0.929201698344601, "grad_norm": 0.19742920994758606, "learning_rate": 3.9247236506279815e-07, "loss": 0.0315, "step": 52469 }, { "epoch": 0.9292194078816295, "grad_norm": 0.5834944248199463, "learning_rate": 3.9227686652650364e-07, "loss": 0.0557, "step": 52470 }, { "epoch": 0.9292371174186579, "grad_norm": 0.22778114676475525, "learning_rate": 3.9208141604814176e-07, "loss": 0.0484, "step": 52471 }, { "epoch": 0.9292548269556863, "grad_norm": 0.6207494139671326, "learning_rate": 3.9188601362835195e-07, "loss": 0.0565, "step": 52472 }, { "epoch": 0.9292725364927147, "grad_norm": 0.7375244498252869, "learning_rate": 3.9169065926778215e-07, "loss": 0.0547, "step": 52473 }, { "epoch": 0.9292902460297432, "grad_norm": 0.6387628316879272, "learning_rate": 3.914953529670717e-07, "loss": 0.0879, "step": 52474 }, { "epoch": 0.9293079555667716, "grad_norm": 1.0262244939804077, "learning_rate": 3.913000947268636e-07, "loss": 0.0783, "step": 52475 }, { "epoch": 0.9293256651038, "grad_norm": 0.49552127718925476, "learning_rate": 3.911048845478005e-07, "loss": 0.0508, "step": 52476 }, { "epoch": 0.9293433746408285, "grad_norm": 0.5776107907295227, "learning_rate": 3.9090972243052524e-07, "loss": 0.0401, "step": 52477 }, { "epoch": 0.9293610841778569, "grad_norm": 0.7982924580574036, "learning_rate": 3.907146083756774e-07, "loss": 0.0799, "step": 52478 }, { "epoch": 0.9293787937148853, "grad_norm": 0.7595008611679077, "learning_rate": 3.905195423839031e-07, "loss": 0.0772, "step": 52479 }, { "epoch": 0.9293965032519137, "grad_norm": 0.5210590958595276, "learning_rate": 3.9032452445584176e-07, "loss": 0.0457, "step": 52480 }, { "epoch": 0.9294142127889422, "grad_norm": 0.4954878091812134, "learning_rate": 3.901295545921346e-07, "loss": 0.071, "step": 52481 }, { "epoch": 0.9294319223259706, "grad_norm": 0.5428025722503662, "learning_rate": 3.8993463279342446e-07, "loss": 0.0571, "step": 52482 }, { "epoch": 0.929449631862999, "grad_norm": 0.2034255564212799, "learning_rate": 3.897397590603491e-07, "loss": 0.0493, "step": 52483 }, { "epoch": 0.9294673414000274, "grad_norm": 0.5405699014663696, "learning_rate": 3.8954493339355637e-07, "loss": 0.0418, "step": 52484 }, { "epoch": 0.9294850509370559, "grad_norm": 0.8235289454460144, "learning_rate": 3.8935015579367904e-07, "loss": 0.0404, "step": 52485 }, { "epoch": 0.9295027604740843, "grad_norm": 0.652380108833313, "learning_rate": 3.8915542626136335e-07, "loss": 0.055, "step": 52486 }, { "epoch": 0.9295204700111127, "grad_norm": 0.4989157021045685, "learning_rate": 3.8896074479724875e-07, "loss": 0.062, "step": 52487 }, { "epoch": 0.9295381795481411, "grad_norm": 0.7993773818016052, "learning_rate": 3.887661114019747e-07, "loss": 0.0534, "step": 52488 }, { "epoch": 0.9295558890851696, "grad_norm": 0.5460607409477234, "learning_rate": 3.8857152607618243e-07, "loss": 0.0508, "step": 52489 }, { "epoch": 0.929573598622198, "grad_norm": 0.7352560758590698, "learning_rate": 3.88376988820513e-07, "loss": 0.0678, "step": 52490 }, { "epoch": 0.9295913081592264, "grad_norm": 0.5951676964759827, "learning_rate": 3.8818249963560594e-07, "loss": 0.038, "step": 52491 }, { "epoch": 0.9296090176962549, "grad_norm": 0.39653849601745605, "learning_rate": 3.8798805852209906e-07, "loss": 0.0583, "step": 52492 }, { "epoch": 0.9296267272332833, "grad_norm": 0.5419268012046814, "learning_rate": 3.8779366548063356e-07, "loss": 0.0318, "step": 52493 }, { "epoch": 0.9296444367703117, "grad_norm": 0.38675206899642944, "learning_rate": 3.8759932051184885e-07, "loss": 0.0343, "step": 52494 }, { "epoch": 0.9296621463073401, "grad_norm": 0.6467859745025635, "learning_rate": 3.874050236163862e-07, "loss": 0.0445, "step": 52495 }, { "epoch": 0.9296798558443686, "grad_norm": 0.40882813930511475, "learning_rate": 3.8721077479488e-07, "loss": 0.0394, "step": 52496 }, { "epoch": 0.929697565381397, "grad_norm": 0.8704375624656677, "learning_rate": 3.870165740479731e-07, "loss": 0.0447, "step": 52497 }, { "epoch": 0.9297152749184254, "grad_norm": 0.620089054107666, "learning_rate": 3.8682242137630663e-07, "loss": 0.0476, "step": 52498 }, { "epoch": 0.9297329844554538, "grad_norm": 0.37597760558128357, "learning_rate": 3.866283167805135e-07, "loss": 0.0525, "step": 52499 }, { "epoch": 0.9297506939924823, "grad_norm": 0.3964548707008362, "learning_rate": 3.864342602612364e-07, "loss": 0.045, "step": 52500 }, { "epoch": 0.9297684035295107, "grad_norm": 0.41403278708457947, "learning_rate": 3.862402518191116e-07, "loss": 0.0601, "step": 52501 }, { "epoch": 0.9297861130665391, "grad_norm": 0.5966930985450745, "learning_rate": 3.8604629145478023e-07, "loss": 0.0457, "step": 52502 }, { "epoch": 0.9298038226035675, "grad_norm": 0.725645124912262, "learning_rate": 3.8585237916887675e-07, "loss": 0.0448, "step": 52503 }, { "epoch": 0.929821532140596, "grad_norm": 0.41805192828178406, "learning_rate": 3.8565851496204063e-07, "loss": 0.0447, "step": 52504 }, { "epoch": 0.9298392416776244, "grad_norm": 0.4683915972709656, "learning_rate": 3.8546469883490974e-07, "loss": 0.0546, "step": 52505 }, { "epoch": 0.9298569512146528, "grad_norm": 0.4111121594905853, "learning_rate": 3.852709307881219e-07, "loss": 0.0598, "step": 52506 }, { "epoch": 0.9298746607516813, "grad_norm": 0.9006126523017883, "learning_rate": 3.8507721082231493e-07, "loss": 0.0529, "step": 52507 }, { "epoch": 0.9298923702887097, "grad_norm": 0.5315171480178833, "learning_rate": 3.84883538938125e-07, "loss": 0.0383, "step": 52508 }, { "epoch": 0.9299100798257381, "grad_norm": 0.6795787811279297, "learning_rate": 3.846899151361932e-07, "loss": 0.0552, "step": 52509 }, { "epoch": 0.9299277893627665, "grad_norm": 0.4304429292678833, "learning_rate": 3.844963394171508e-07, "loss": 0.0562, "step": 52510 }, { "epoch": 0.929945498899795, "grad_norm": 0.7601416707038879, "learning_rate": 3.843028117816372e-07, "loss": 0.0568, "step": 52511 }, { "epoch": 0.9299632084368235, "grad_norm": 0.535141110420227, "learning_rate": 3.8410933223029023e-07, "loss": 0.0476, "step": 52512 }, { "epoch": 0.9299809179738519, "grad_norm": 0.7735642790794373, "learning_rate": 3.8391590076374273e-07, "loss": 0.0448, "step": 52513 }, { "epoch": 0.9299986275108802, "grad_norm": 0.5998435020446777, "learning_rate": 3.837225173826359e-07, "loss": 0.0493, "step": 52514 }, { "epoch": 0.9300163370479088, "grad_norm": 0.5053936839103699, "learning_rate": 3.8352918208760247e-07, "loss": 0.0569, "step": 52515 }, { "epoch": 0.9300340465849372, "grad_norm": 0.6144227385520935, "learning_rate": 3.8333589487927867e-07, "loss": 0.058, "step": 52516 }, { "epoch": 0.9300517561219656, "grad_norm": 0.34028130769729614, "learning_rate": 3.831426557583023e-07, "loss": 0.0406, "step": 52517 }, { "epoch": 0.930069465658994, "grad_norm": 0.6403326988220215, "learning_rate": 3.829494647253096e-07, "loss": 0.0502, "step": 52518 }, { "epoch": 0.9300871751960225, "grad_norm": 0.6998185515403748, "learning_rate": 3.827563217809349e-07, "loss": 0.0374, "step": 52519 }, { "epoch": 0.9301048847330509, "grad_norm": 0.5360133051872253, "learning_rate": 3.8256322692581115e-07, "loss": 0.0614, "step": 52520 }, { "epoch": 0.9301225942700793, "grad_norm": 0.7546711564064026, "learning_rate": 3.8237018016057777e-07, "loss": 0.049, "step": 52521 }, { "epoch": 0.9301403038071078, "grad_norm": 0.8479281067848206, "learning_rate": 3.82177181485866e-07, "loss": 0.0665, "step": 52522 }, { "epoch": 0.9301580133441362, "grad_norm": 0.5879847407341003, "learning_rate": 3.8198423090231527e-07, "loss": 0.061, "step": 52523 }, { "epoch": 0.9301757228811646, "grad_norm": 0.47040531039237976, "learning_rate": 3.8179132841055674e-07, "loss": 0.0336, "step": 52524 }, { "epoch": 0.930193432418193, "grad_norm": 0.763094425201416, "learning_rate": 3.8159847401122493e-07, "loss": 0.0768, "step": 52525 }, { "epoch": 0.9302111419552215, "grad_norm": 0.8520475625991821, "learning_rate": 3.81405667704956e-07, "loss": 0.0554, "step": 52526 }, { "epoch": 0.9302288514922499, "grad_norm": 0.5641762018203735, "learning_rate": 3.8121290949238605e-07, "loss": 0.0529, "step": 52527 }, { "epoch": 0.9302465610292783, "grad_norm": 0.3874167799949646, "learning_rate": 3.8102019937414466e-07, "loss": 0.0303, "step": 52528 }, { "epoch": 0.9302642705663067, "grad_norm": 0.502927839756012, "learning_rate": 3.808275373508696e-07, "loss": 0.0487, "step": 52529 }, { "epoch": 0.9302819801033352, "grad_norm": 0.5822865962982178, "learning_rate": 3.806349234231937e-07, "loss": 0.0387, "step": 52530 }, { "epoch": 0.9302996896403636, "grad_norm": 0.5872692465782166, "learning_rate": 3.8044235759174983e-07, "loss": 0.0429, "step": 52531 }, { "epoch": 0.930317399177392, "grad_norm": 0.6979943513870239, "learning_rate": 3.8024983985717075e-07, "loss": 0.0831, "step": 52532 }, { "epoch": 0.9303351087144204, "grad_norm": 0.7428339123725891, "learning_rate": 3.80057370220091e-07, "loss": 0.0436, "step": 52533 }, { "epoch": 0.9303528182514489, "grad_norm": 0.6601062417030334, "learning_rate": 3.7986494868114506e-07, "loss": 0.0562, "step": 52534 }, { "epoch": 0.9303705277884773, "grad_norm": 0.6007351279258728, "learning_rate": 3.796725752409641e-07, "loss": 0.032, "step": 52535 }, { "epoch": 0.9303882373255057, "grad_norm": 0.5614728927612305, "learning_rate": 3.7948024990018094e-07, "loss": 0.0401, "step": 52536 }, { "epoch": 0.9304059468625342, "grad_norm": 0.5188308358192444, "learning_rate": 3.7928797265943003e-07, "loss": 0.0451, "step": 52537 }, { "epoch": 0.9304236563995626, "grad_norm": 0.5191901922225952, "learning_rate": 3.790957435193426e-07, "loss": 0.0479, "step": 52538 }, { "epoch": 0.930441365936591, "grad_norm": 0.5118342638015747, "learning_rate": 3.789035624805515e-07, "loss": 0.0489, "step": 52539 }, { "epoch": 0.9304590754736194, "grad_norm": 0.4527573883533478, "learning_rate": 3.7871142954368777e-07, "loss": 0.0486, "step": 52540 }, { "epoch": 0.9304767850106479, "grad_norm": 0.8045178651809692, "learning_rate": 3.7851934470938764e-07, "loss": 0.0552, "step": 52541 }, { "epoch": 0.9304944945476763, "grad_norm": 0.6455563902854919, "learning_rate": 3.783273079782773e-07, "loss": 0.0539, "step": 52542 }, { "epoch": 0.9305122040847047, "grad_norm": 0.699743390083313, "learning_rate": 3.781353193509912e-07, "loss": 0.0449, "step": 52543 }, { "epoch": 0.9305299136217331, "grad_norm": 0.36061891913414, "learning_rate": 3.779433788281622e-07, "loss": 0.0406, "step": 52544 }, { "epoch": 0.9305476231587616, "grad_norm": 0.8283849954605103, "learning_rate": 3.777514864104198e-07, "loss": 0.0776, "step": 52545 }, { "epoch": 0.93056533269579, "grad_norm": 0.5323401689529419, "learning_rate": 3.7755964209839513e-07, "loss": 0.0654, "step": 52546 }, { "epoch": 0.9305830422328184, "grad_norm": 0.5516053438186646, "learning_rate": 3.77367845892721e-07, "loss": 0.031, "step": 52547 }, { "epoch": 0.9306007517698469, "grad_norm": 0.3970833420753479, "learning_rate": 3.771760977940286e-07, "loss": 0.0402, "step": 52548 }, { "epoch": 0.9306184613068753, "grad_norm": 0.3013599216938019, "learning_rate": 3.7698439780294747e-07, "loss": 0.0605, "step": 52549 }, { "epoch": 0.9306361708439037, "grad_norm": 0.20978985726833344, "learning_rate": 3.767927459201087e-07, "loss": 0.0453, "step": 52550 }, { "epoch": 0.9306538803809321, "grad_norm": 0.4160861670970917, "learning_rate": 3.766011421461435e-07, "loss": 0.0389, "step": 52551 }, { "epoch": 0.9306715899179606, "grad_norm": 0.4971446990966797, "learning_rate": 3.764095864816797e-07, "loss": 0.0402, "step": 52552 }, { "epoch": 0.930689299454989, "grad_norm": 0.6323336958885193, "learning_rate": 3.7621807892735007e-07, "loss": 0.0604, "step": 52553 }, { "epoch": 0.9307070089920174, "grad_norm": 0.7248038649559021, "learning_rate": 3.760266194837825e-07, "loss": 0.0455, "step": 52554 }, { "epoch": 0.9307247185290458, "grad_norm": 0.5331973433494568, "learning_rate": 3.758352081516098e-07, "loss": 0.0604, "step": 52555 }, { "epoch": 0.9307424280660743, "grad_norm": 0.3864612877368927, "learning_rate": 3.7564384493145975e-07, "loss": 0.0465, "step": 52556 }, { "epoch": 0.9307601376031027, "grad_norm": 0.4844824969768524, "learning_rate": 3.7545252982396195e-07, "loss": 0.052, "step": 52557 }, { "epoch": 0.9307778471401311, "grad_norm": 0.6597511768341064, "learning_rate": 3.752612628297458e-07, "loss": 0.0373, "step": 52558 }, { "epoch": 0.9307955566771595, "grad_norm": 0.730976939201355, "learning_rate": 3.7507004394944255e-07, "loss": 0.0745, "step": 52559 }, { "epoch": 0.930813266214188, "grad_norm": 0.48871487379074097, "learning_rate": 3.748788731836783e-07, "loss": 0.0353, "step": 52560 }, { "epoch": 0.9308309757512164, "grad_norm": 1.011461615562439, "learning_rate": 3.7468775053308423e-07, "loss": 0.0511, "step": 52561 }, { "epoch": 0.9308486852882448, "grad_norm": 0.7402050495147705, "learning_rate": 3.744966759982882e-07, "loss": 0.0448, "step": 52562 }, { "epoch": 0.9308663948252733, "grad_norm": 0.5547716021537781, "learning_rate": 3.74305649579918e-07, "loss": 0.0472, "step": 52563 }, { "epoch": 0.9308841043623017, "grad_norm": 0.7138859629631042, "learning_rate": 3.7411467127860143e-07, "loss": 0.0623, "step": 52564 }, { "epoch": 0.9309018138993301, "grad_norm": 0.7571509480476379, "learning_rate": 3.739237410949697e-07, "loss": 0.0683, "step": 52565 }, { "epoch": 0.9309195234363585, "grad_norm": 0.5816094875335693, "learning_rate": 3.737328590296524e-07, "loss": 0.0551, "step": 52566 }, { "epoch": 0.930937232973387, "grad_norm": 0.3609199821949005, "learning_rate": 3.7354202508327216e-07, "loss": 0.0634, "step": 52567 }, { "epoch": 0.9309549425104154, "grad_norm": 0.5662539601325989, "learning_rate": 3.7335123925645864e-07, "loss": 0.0597, "step": 52568 }, { "epoch": 0.9309726520474438, "grad_norm": 1.2299398183822632, "learning_rate": 3.731605015498413e-07, "loss": 0.0612, "step": 52569 }, { "epoch": 0.9309903615844722, "grad_norm": 0.5463888645172119, "learning_rate": 3.7296981196404625e-07, "loss": 0.0558, "step": 52570 }, { "epoch": 0.9310080711215007, "grad_norm": 0.46440765261650085, "learning_rate": 3.7277917049969966e-07, "loss": 0.0634, "step": 52571 }, { "epoch": 0.9310257806585291, "grad_norm": 0.44282597303390503, "learning_rate": 3.7258857715743113e-07, "loss": 0.0192, "step": 52572 }, { "epoch": 0.9310434901955575, "grad_norm": 0.5192286372184753, "learning_rate": 3.723980319378667e-07, "loss": 0.052, "step": 52573 }, { "epoch": 0.9310611997325859, "grad_norm": 0.33916690945625305, "learning_rate": 3.722075348416326e-07, "loss": 0.0436, "step": 52574 }, { "epoch": 0.9310789092696145, "grad_norm": 0.6184476017951965, "learning_rate": 3.720170858693567e-07, "loss": 0.059, "step": 52575 }, { "epoch": 0.9310966188066429, "grad_norm": 0.5661070942878723, "learning_rate": 3.7182668502166674e-07, "loss": 0.0465, "step": 52576 }, { "epoch": 0.9311143283436712, "grad_norm": 0.37220317125320435, "learning_rate": 3.716363322991856e-07, "loss": 0.0355, "step": 52577 }, { "epoch": 0.9311320378806998, "grad_norm": 0.39331716299057007, "learning_rate": 3.714460277025428e-07, "loss": 0.0379, "step": 52578 }, { "epoch": 0.9311497474177282, "grad_norm": 0.4728136956691742, "learning_rate": 3.712557712323611e-07, "loss": 0.046, "step": 52579 }, { "epoch": 0.9311674569547566, "grad_norm": 0.5103879570960999, "learning_rate": 3.710655628892701e-07, "loss": 0.0412, "step": 52580 }, { "epoch": 0.931185166491785, "grad_norm": 0.5931305885314941, "learning_rate": 3.708754026738925e-07, "loss": 0.0419, "step": 52581 }, { "epoch": 0.9312028760288135, "grad_norm": 0.8381673097610474, "learning_rate": 3.706852905868563e-07, "loss": 0.0522, "step": 52582 }, { "epoch": 0.9312205855658419, "grad_norm": 1.1028774976730347, "learning_rate": 3.704952266287842e-07, "loss": 0.0552, "step": 52583 }, { "epoch": 0.9312382951028703, "grad_norm": 0.8558123111724854, "learning_rate": 3.703052108003041e-07, "loss": 0.0599, "step": 52584 }, { "epoch": 0.9312560046398987, "grad_norm": 0.49700912833213806, "learning_rate": 3.701152431020388e-07, "loss": 0.0824, "step": 52585 }, { "epoch": 0.9312737141769272, "grad_norm": 0.5689919590950012, "learning_rate": 3.699253235346162e-07, "loss": 0.0554, "step": 52586 }, { "epoch": 0.9312914237139556, "grad_norm": 0.48949238657951355, "learning_rate": 3.6973545209866064e-07, "loss": 0.0509, "step": 52587 }, { "epoch": 0.931309133250984, "grad_norm": 0.3805015981197357, "learning_rate": 3.6954562879479345e-07, "loss": 0.0635, "step": 52588 }, { "epoch": 0.9313268427880124, "grad_norm": 0.6884496808052063, "learning_rate": 3.693558536236424e-07, "loss": 0.0656, "step": 52589 }, { "epoch": 0.9313445523250409, "grad_norm": 0.5859453678131104, "learning_rate": 3.6916612658583037e-07, "loss": 0.0534, "step": 52590 }, { "epoch": 0.9313622618620693, "grad_norm": 0.6763738393783569, "learning_rate": 3.689764476819818e-07, "loss": 0.0321, "step": 52591 }, { "epoch": 0.9313799713990977, "grad_norm": 0.5958318114280701, "learning_rate": 3.6878681691271956e-07, "loss": 0.0553, "step": 52592 }, { "epoch": 0.9313976809361262, "grad_norm": 0.7911232113838196, "learning_rate": 3.6859723427867143e-07, "loss": 0.0531, "step": 52593 }, { "epoch": 0.9314153904731546, "grad_norm": 0.6830123662948608, "learning_rate": 3.684076997804586e-07, "loss": 0.0512, "step": 52594 }, { "epoch": 0.931433100010183, "grad_norm": 0.6340978145599365, "learning_rate": 3.6821821341870234e-07, "loss": 0.0554, "step": 52595 }, { "epoch": 0.9314508095472114, "grad_norm": 0.6276615858078003, "learning_rate": 3.680287751940303e-07, "loss": 0.0515, "step": 52596 }, { "epoch": 0.9314685190842399, "grad_norm": 0.4852025806903839, "learning_rate": 3.6783938510706214e-07, "loss": 0.0377, "step": 52597 }, { "epoch": 0.9314862286212683, "grad_norm": 0.31080570816993713, "learning_rate": 3.67650043158424e-07, "loss": 0.0372, "step": 52598 }, { "epoch": 0.9315039381582967, "grad_norm": 0.39007267355918884, "learning_rate": 3.674607493487353e-07, "loss": 0.0492, "step": 52599 }, { "epoch": 0.9315216476953251, "grad_norm": 0.9069300293922424, "learning_rate": 3.672715036786223e-07, "loss": 0.0547, "step": 52600 }, { "epoch": 0.9315393572323536, "grad_norm": 0.47666481137275696, "learning_rate": 3.670823061487044e-07, "loss": 0.0463, "step": 52601 }, { "epoch": 0.931557066769382, "grad_norm": 0.7547534704208374, "learning_rate": 3.668931567596079e-07, "loss": 0.0647, "step": 52602 }, { "epoch": 0.9315747763064104, "grad_norm": 0.44928449392318726, "learning_rate": 3.6670405551195054e-07, "loss": 0.0383, "step": 52603 }, { "epoch": 0.9315924858434388, "grad_norm": 0.44374367594718933, "learning_rate": 3.665150024063585e-07, "loss": 0.0491, "step": 52604 }, { "epoch": 0.9316101953804673, "grad_norm": 0.6209542751312256, "learning_rate": 3.6632599744345294e-07, "loss": 0.0554, "step": 52605 }, { "epoch": 0.9316279049174957, "grad_norm": 0.3081640899181366, "learning_rate": 3.6613704062385176e-07, "loss": 0.0353, "step": 52606 }, { "epoch": 0.9316456144545241, "grad_norm": 0.5290541648864746, "learning_rate": 3.659481319481811e-07, "loss": 0.049, "step": 52607 }, { "epoch": 0.9316633239915526, "grad_norm": 0.8891879320144653, "learning_rate": 3.657592714170621e-07, "loss": 0.0656, "step": 52608 }, { "epoch": 0.931681033528581, "grad_norm": 0.3206133246421814, "learning_rate": 3.655704590311143e-07, "loss": 0.0358, "step": 52609 }, { "epoch": 0.9316987430656094, "grad_norm": 0.7776090502738953, "learning_rate": 3.6538169479095885e-07, "loss": 0.0563, "step": 52610 }, { "epoch": 0.9317164526026378, "grad_norm": 0.6619060039520264, "learning_rate": 3.651929786972169e-07, "loss": 0.065, "step": 52611 }, { "epoch": 0.9317341621396663, "grad_norm": 0.31230777502059937, "learning_rate": 3.6500431075050964e-07, "loss": 0.0556, "step": 52612 }, { "epoch": 0.9317518716766947, "grad_norm": 0.4068467915058136, "learning_rate": 3.648156909514583e-07, "loss": 0.0697, "step": 52613 }, { "epoch": 0.9317695812137231, "grad_norm": 0.614011824131012, "learning_rate": 3.646271193006839e-07, "loss": 0.0608, "step": 52614 }, { "epoch": 0.9317872907507515, "grad_norm": 0.18528340756893158, "learning_rate": 3.644385957988078e-07, "loss": 0.0466, "step": 52615 }, { "epoch": 0.93180500028778, "grad_norm": 0.16934283077716827, "learning_rate": 3.6425012044644603e-07, "loss": 0.0595, "step": 52616 }, { "epoch": 0.9318227098248084, "grad_norm": 0.5315708518028259, "learning_rate": 3.6406169324421976e-07, "loss": 0.0673, "step": 52617 }, { "epoch": 0.9318404193618368, "grad_norm": 0.7549235224723816, "learning_rate": 3.638733141927519e-07, "loss": 0.0427, "step": 52618 }, { "epoch": 0.9318581288988652, "grad_norm": 0.5088890790939331, "learning_rate": 3.6368498329266196e-07, "loss": 0.031, "step": 52619 }, { "epoch": 0.9318758384358937, "grad_norm": 0.4205702543258667, "learning_rate": 3.634967005445644e-07, "loss": 0.0527, "step": 52620 }, { "epoch": 0.9318935479729221, "grad_norm": 0.7379262447357178, "learning_rate": 3.633084659490837e-07, "loss": 0.0761, "step": 52621 }, { "epoch": 0.9319112575099505, "grad_norm": 0.46109825372695923, "learning_rate": 3.6312027950683947e-07, "loss": 0.0596, "step": 52622 }, { "epoch": 0.931928967046979, "grad_norm": 0.650947630405426, "learning_rate": 3.6293214121844775e-07, "loss": 0.0544, "step": 52623 }, { "epoch": 0.9319466765840074, "grad_norm": 0.6870126724243164, "learning_rate": 3.6274405108452977e-07, "loss": 0.0492, "step": 52624 }, { "epoch": 0.9319643861210358, "grad_norm": 0.9373409748077393, "learning_rate": 3.6255600910570173e-07, "loss": 0.0654, "step": 52625 }, { "epoch": 0.9319820956580642, "grad_norm": 0.4173510670661926, "learning_rate": 3.623680152825865e-07, "loss": 0.0472, "step": 52626 }, { "epoch": 0.9319998051950927, "grad_norm": 0.36282408237457275, "learning_rate": 3.621800696157984e-07, "loss": 0.0409, "step": 52627 }, { "epoch": 0.9320175147321211, "grad_norm": 0.7104399800300598, "learning_rate": 3.619921721059555e-07, "loss": 0.0352, "step": 52628 }, { "epoch": 0.9320352242691495, "grad_norm": 0.28334102034568787, "learning_rate": 3.6180432275368047e-07, "loss": 0.0311, "step": 52629 }, { "epoch": 0.9320529338061779, "grad_norm": 0.2671756446361542, "learning_rate": 3.6161652155958793e-07, "loss": 0.0448, "step": 52630 }, { "epoch": 0.9320706433432064, "grad_norm": 0.5239893794059753, "learning_rate": 3.614287685242956e-07, "loss": 0.0454, "step": 52631 }, { "epoch": 0.9320883528802348, "grad_norm": 0.627297580242157, "learning_rate": 3.6124106364842147e-07, "loss": 0.0504, "step": 52632 }, { "epoch": 0.9321060624172632, "grad_norm": 0.7060213088989258, "learning_rate": 3.610534069325866e-07, "loss": 0.0566, "step": 52633 }, { "epoch": 0.9321237719542916, "grad_norm": 0.45190536975860596, "learning_rate": 3.6086579837740387e-07, "loss": 0.0741, "step": 52634 }, { "epoch": 0.9321414814913201, "grad_norm": 0.520979106426239, "learning_rate": 3.606782379834911e-07, "loss": 0.0536, "step": 52635 }, { "epoch": 0.9321591910283485, "grad_norm": 0.899025559425354, "learning_rate": 3.6049072575146614e-07, "loss": 0.0482, "step": 52636 }, { "epoch": 0.9321769005653769, "grad_norm": 0.5576683282852173, "learning_rate": 3.603032616819485e-07, "loss": 0.0632, "step": 52637 }, { "epoch": 0.9321946101024055, "grad_norm": 0.436592698097229, "learning_rate": 3.6011584577555103e-07, "loss": 0.0416, "step": 52638 }, { "epoch": 0.9322123196394339, "grad_norm": 0.6068785190582275, "learning_rate": 3.599284780328915e-07, "loss": 0.0592, "step": 52639 }, { "epoch": 0.9322300291764622, "grad_norm": 0.6621347665786743, "learning_rate": 3.597411584545862e-07, "loss": 0.0594, "step": 52640 }, { "epoch": 0.9322477387134906, "grad_norm": 0.8956880569458008, "learning_rate": 3.595538870412512e-07, "loss": 0.0791, "step": 52641 }, { "epoch": 0.9322654482505192, "grad_norm": 0.6256825923919678, "learning_rate": 3.593666637935028e-07, "loss": 0.0609, "step": 52642 }, { "epoch": 0.9322831577875476, "grad_norm": 0.46557027101516724, "learning_rate": 3.5917948871195873e-07, "loss": 0.0298, "step": 52643 }, { "epoch": 0.932300867324576, "grad_norm": 0.8689537644386292, "learning_rate": 3.589923617972335e-07, "loss": 0.0496, "step": 52644 }, { "epoch": 0.9323185768616044, "grad_norm": 0.6891384124755859, "learning_rate": 3.5880528304994165e-07, "loss": 0.0477, "step": 52645 }, { "epoch": 0.9323362863986329, "grad_norm": 0.43390363454818726, "learning_rate": 3.5861825247069936e-07, "loss": 0.052, "step": 52646 }, { "epoch": 0.9323539959356613, "grad_norm": 0.8341089487075806, "learning_rate": 3.584312700601244e-07, "loss": 0.0588, "step": 52647 }, { "epoch": 0.9323717054726897, "grad_norm": 0.5419348478317261, "learning_rate": 3.582443358188281e-07, "loss": 0.0558, "step": 52648 }, { "epoch": 0.9323894150097181, "grad_norm": 0.6829473376274109, "learning_rate": 3.580574497474248e-07, "loss": 0.0569, "step": 52649 }, { "epoch": 0.9324071245467466, "grad_norm": 0.4742623567581177, "learning_rate": 3.5787061184653246e-07, "loss": 0.0559, "step": 52650 }, { "epoch": 0.932424834083775, "grad_norm": 0.4230841100215912, "learning_rate": 3.576838221167672e-07, "loss": 0.0422, "step": 52651 }, { "epoch": 0.9324425436208034, "grad_norm": 0.7759424448013306, "learning_rate": 3.574970805587402e-07, "loss": 0.0652, "step": 52652 }, { "epoch": 0.9324602531578319, "grad_norm": 0.39653974771499634, "learning_rate": 3.5731038717306597e-07, "loss": 0.0563, "step": 52653 }, { "epoch": 0.9324779626948603, "grad_norm": 0.37700048089027405, "learning_rate": 3.571237419603607e-07, "loss": 0.0378, "step": 52654 }, { "epoch": 0.9324956722318887, "grad_norm": 0.6410687565803528, "learning_rate": 3.569371449212372e-07, "loss": 0.0431, "step": 52655 }, { "epoch": 0.9325133817689171, "grad_norm": 0.842387855052948, "learning_rate": 3.567505960563083e-07, "loss": 0.0721, "step": 52656 }, { "epoch": 0.9325310913059456, "grad_norm": 0.6531957387924194, "learning_rate": 3.565640953661903e-07, "loss": 0.0348, "step": 52657 }, { "epoch": 0.932548800842974, "grad_norm": 0.5831610560417175, "learning_rate": 3.563776428514942e-07, "loss": 0.0666, "step": 52658 }, { "epoch": 0.9325665103800024, "grad_norm": 0.47833073139190674, "learning_rate": 3.56191238512833e-07, "loss": 0.0575, "step": 52659 }, { "epoch": 0.9325842199170308, "grad_norm": 0.32096731662750244, "learning_rate": 3.560048823508244e-07, "loss": 0.0505, "step": 52660 }, { "epoch": 0.9326019294540593, "grad_norm": 0.7295858860015869, "learning_rate": 3.5581857436607635e-07, "loss": 0.0419, "step": 52661 }, { "epoch": 0.9326196389910877, "grad_norm": 0.3524168133735657, "learning_rate": 3.5563231455920664e-07, "loss": 0.0307, "step": 52662 }, { "epoch": 0.9326373485281161, "grad_norm": 0.5786013007164001, "learning_rate": 3.5544610293082316e-07, "loss": 0.0486, "step": 52663 }, { "epoch": 0.9326550580651445, "grad_norm": 0.7926099896430969, "learning_rate": 3.55259939481542e-07, "loss": 0.0512, "step": 52664 }, { "epoch": 0.932672767602173, "grad_norm": 0.46729692816734314, "learning_rate": 3.550738242119744e-07, "loss": 0.0454, "step": 52665 }, { "epoch": 0.9326904771392014, "grad_norm": 0.4622780680656433, "learning_rate": 3.5488775712273326e-07, "loss": 0.0478, "step": 52666 }, { "epoch": 0.9327081866762298, "grad_norm": 0.6967435479164124, "learning_rate": 3.5470173821442795e-07, "loss": 0.0579, "step": 52667 }, { "epoch": 0.9327258962132583, "grad_norm": 0.5171331763267517, "learning_rate": 3.545157674876748e-07, "loss": 0.0482, "step": 52668 }, { "epoch": 0.9327436057502867, "grad_norm": 0.43825146555900574, "learning_rate": 3.543298449430815e-07, "loss": 0.0358, "step": 52669 }, { "epoch": 0.9327613152873151, "grad_norm": 0.5684449672698975, "learning_rate": 3.5414397058126267e-07, "loss": 0.0563, "step": 52670 }, { "epoch": 0.9327790248243435, "grad_norm": 0.2525639533996582, "learning_rate": 3.539581444028278e-07, "loss": 0.0349, "step": 52671 }, { "epoch": 0.932796734361372, "grad_norm": 0.44654467701911926, "learning_rate": 3.537723664083914e-07, "loss": 0.0482, "step": 52672 }, { "epoch": 0.9328144438984004, "grad_norm": 0.5377139449119568, "learning_rate": 3.535866365985613e-07, "loss": 0.0657, "step": 52673 }, { "epoch": 0.9328321534354288, "grad_norm": 0.38421866297721863, "learning_rate": 3.534009549739503e-07, "loss": 0.0402, "step": 52674 }, { "epoch": 0.9328498629724572, "grad_norm": 0.4865703582763672, "learning_rate": 3.53215321535168e-07, "loss": 0.063, "step": 52675 }, { "epoch": 0.9328675725094857, "grad_norm": 0.4920605719089508, "learning_rate": 3.530297362828272e-07, "loss": 0.0693, "step": 52676 }, { "epoch": 0.9328852820465141, "grad_norm": 0.4190441370010376, "learning_rate": 3.5284419921753576e-07, "loss": 0.0349, "step": 52677 }, { "epoch": 0.9329029915835425, "grad_norm": 0.4776272475719452, "learning_rate": 3.526587103399048e-07, "loss": 0.0545, "step": 52678 }, { "epoch": 0.9329207011205709, "grad_norm": 0.46982064843177795, "learning_rate": 3.524732696505473e-07, "loss": 0.031, "step": 52679 }, { "epoch": 0.9329384106575994, "grad_norm": 0.8018631935119629, "learning_rate": 3.5228787715006927e-07, "loss": 0.0867, "step": 52680 }, { "epoch": 0.9329561201946278, "grad_norm": 0.34006115794181824, "learning_rate": 3.5210253283908366e-07, "loss": 0.0569, "step": 52681 }, { "epoch": 0.9329738297316562, "grad_norm": 0.6214669942855835, "learning_rate": 3.519172367181983e-07, "loss": 0.046, "step": 52682 }, { "epoch": 0.9329915392686847, "grad_norm": 0.7645054459571838, "learning_rate": 3.5173198878802604e-07, "loss": 0.0609, "step": 52683 }, { "epoch": 0.9330092488057131, "grad_norm": 0.9181144833564758, "learning_rate": 3.51546789049173e-07, "loss": 0.0682, "step": 52684 }, { "epoch": 0.9330269583427415, "grad_norm": 0.8098084330558777, "learning_rate": 3.5136163750224713e-07, "loss": 0.07, "step": 52685 }, { "epoch": 0.9330446678797699, "grad_norm": 0.8348621726036072, "learning_rate": 3.511765341478612e-07, "loss": 0.0544, "step": 52686 }, { "epoch": 0.9330623774167984, "grad_norm": 0.8589633107185364, "learning_rate": 3.509914789866231e-07, "loss": 0.0824, "step": 52687 }, { "epoch": 0.9330800869538268, "grad_norm": 0.6223192811012268, "learning_rate": 3.5080647201914063e-07, "loss": 0.0471, "step": 52688 }, { "epoch": 0.9330977964908552, "grad_norm": 0.296019047498703, "learning_rate": 3.50621513246025e-07, "loss": 0.062, "step": 52689 }, { "epoch": 0.9331155060278836, "grad_norm": 0.5362908840179443, "learning_rate": 3.5043660266788237e-07, "loss": 0.0492, "step": 52690 }, { "epoch": 0.9331332155649121, "grad_norm": 0.48552292585372925, "learning_rate": 3.502517402853206e-07, "loss": 0.0561, "step": 52691 }, { "epoch": 0.9331509251019405, "grad_norm": 0.49151289463043213, "learning_rate": 3.500669260989492e-07, "loss": 0.042, "step": 52692 }, { "epoch": 0.9331686346389689, "grad_norm": 0.8943089246749878, "learning_rate": 3.498821601093777e-07, "loss": 0.0683, "step": 52693 }, { "epoch": 0.9331863441759973, "grad_norm": 0.6810007095336914, "learning_rate": 3.496974423172089e-07, "loss": 0.0551, "step": 52694 }, { "epoch": 0.9332040537130258, "grad_norm": 0.36906710267066956, "learning_rate": 3.495127727230557e-07, "loss": 0.0429, "step": 52695 }, { "epoch": 0.9332217632500542, "grad_norm": 0.2617718577384949, "learning_rate": 3.493281513275226e-07, "loss": 0.049, "step": 52696 }, { "epoch": 0.9332394727870826, "grad_norm": 0.6917200684547424, "learning_rate": 3.491435781312191e-07, "loss": 0.0527, "step": 52697 }, { "epoch": 0.9332571823241111, "grad_norm": 0.286786288022995, "learning_rate": 3.4895905313474974e-07, "loss": 0.0477, "step": 52698 }, { "epoch": 0.9332748918611395, "grad_norm": 0.5314967036247253, "learning_rate": 3.48774576338724e-07, "loss": 0.0397, "step": 52699 }, { "epoch": 0.9332926013981679, "grad_norm": 0.5989775657653809, "learning_rate": 3.485901477437481e-07, "loss": 0.0466, "step": 52700 }, { "epoch": 0.9333103109351963, "grad_norm": 0.7021593451499939, "learning_rate": 3.484057673504315e-07, "loss": 0.0584, "step": 52701 }, { "epoch": 0.9333280204722249, "grad_norm": 0.46382415294647217, "learning_rate": 3.4822143515937544e-07, "loss": 0.0558, "step": 52702 }, { "epoch": 0.9333457300092532, "grad_norm": 0.6624515056610107, "learning_rate": 3.480371511711877e-07, "loss": 0.0531, "step": 52703 }, { "epoch": 0.9333634395462816, "grad_norm": 0.9085366129875183, "learning_rate": 3.478529153864779e-07, "loss": 0.0569, "step": 52704 }, { "epoch": 0.93338114908331, "grad_norm": 0.6411680579185486, "learning_rate": 3.4766872780585045e-07, "loss": 0.0536, "step": 52705 }, { "epoch": 0.9333988586203386, "grad_norm": 0.3795621693134308, "learning_rate": 3.4748458842990824e-07, "loss": 0.0553, "step": 52706 }, { "epoch": 0.933416568157367, "grad_norm": 0.6683523654937744, "learning_rate": 3.4730049725925915e-07, "loss": 0.0413, "step": 52707 }, { "epoch": 0.9334342776943954, "grad_norm": 1.0175901651382446, "learning_rate": 3.4711645429451267e-07, "loss": 0.0843, "step": 52708 }, { "epoch": 0.9334519872314238, "grad_norm": 0.6319584846496582, "learning_rate": 3.4693245953627e-07, "loss": 0.0767, "step": 52709 }, { "epoch": 0.9334696967684523, "grad_norm": 0.6128732562065125, "learning_rate": 3.4674851298513723e-07, "loss": 0.0693, "step": 52710 }, { "epoch": 0.9334874063054807, "grad_norm": 0.7628458738327026, "learning_rate": 3.4656461464172064e-07, "loss": 0.0829, "step": 52711 }, { "epoch": 0.9335051158425091, "grad_norm": 0.5928873419761658, "learning_rate": 3.4638076450662303e-07, "loss": 0.0459, "step": 52712 }, { "epoch": 0.9335228253795376, "grad_norm": 0.7568073272705078, "learning_rate": 3.461969625804506e-07, "loss": 0.0343, "step": 52713 }, { "epoch": 0.933540534916566, "grad_norm": 0.7864737510681152, "learning_rate": 3.4601320886380796e-07, "loss": 0.054, "step": 52714 }, { "epoch": 0.9335582444535944, "grad_norm": 1.156504511833191, "learning_rate": 3.458295033573011e-07, "loss": 0.0823, "step": 52715 }, { "epoch": 0.9335759539906228, "grad_norm": 0.7016119956970215, "learning_rate": 3.456458460615314e-07, "loss": 0.0568, "step": 52716 }, { "epoch": 0.9335936635276513, "grad_norm": 0.9871680736541748, "learning_rate": 3.454622369771049e-07, "loss": 0.0928, "step": 52717 }, { "epoch": 0.9336113730646797, "grad_norm": 0.6364214420318604, "learning_rate": 3.4527867610462783e-07, "loss": 0.0671, "step": 52718 }, { "epoch": 0.9336290826017081, "grad_norm": 0.35139092803001404, "learning_rate": 3.4509516344469803e-07, "loss": 0.0706, "step": 52719 }, { "epoch": 0.9336467921387365, "grad_norm": 0.45546191930770874, "learning_rate": 3.449116989979251e-07, "loss": 0.0558, "step": 52720 }, { "epoch": 0.933664501675765, "grad_norm": 0.3636819124221802, "learning_rate": 3.447282827649101e-07, "loss": 0.0494, "step": 52721 }, { "epoch": 0.9336822112127934, "grad_norm": 0.5515963435173035, "learning_rate": 3.445449147462576e-07, "loss": 0.0473, "step": 52722 }, { "epoch": 0.9336999207498218, "grad_norm": 0.46906888484954834, "learning_rate": 3.443615949425688e-07, "loss": 0.0604, "step": 52723 }, { "epoch": 0.9337176302868502, "grad_norm": 0.8093858361244202, "learning_rate": 3.4417832335444656e-07, "loss": 0.0656, "step": 52724 }, { "epoch": 0.9337353398238787, "grad_norm": 0.6403948664665222, "learning_rate": 3.43995099982497e-07, "loss": 0.0525, "step": 52725 }, { "epoch": 0.9337530493609071, "grad_norm": 0.6326605677604675, "learning_rate": 3.438119248273214e-07, "loss": 0.0698, "step": 52726 }, { "epoch": 0.9337707588979355, "grad_norm": 0.6063602566719055, "learning_rate": 3.4362879788952085e-07, "loss": 0.0343, "step": 52727 }, { "epoch": 0.933788468434964, "grad_norm": 0.33716467022895813, "learning_rate": 3.4344571916969993e-07, "loss": 0.0515, "step": 52728 }, { "epoch": 0.9338061779719924, "grad_norm": 0.5958417654037476, "learning_rate": 3.4326268866846144e-07, "loss": 0.0788, "step": 52729 }, { "epoch": 0.9338238875090208, "grad_norm": 0.2294793277978897, "learning_rate": 3.4307970638640496e-07, "loss": 0.0288, "step": 52730 }, { "epoch": 0.9338415970460492, "grad_norm": 0.8309800624847412, "learning_rate": 3.4289677232413497e-07, "loss": 0.0634, "step": 52731 }, { "epoch": 0.9338593065830777, "grad_norm": 0.33035948872566223, "learning_rate": 3.42713886482251e-07, "loss": 0.0537, "step": 52732 }, { "epoch": 0.9338770161201061, "grad_norm": 0.47679075598716736, "learning_rate": 3.4253104886135757e-07, "loss": 0.0439, "step": 52733 }, { "epoch": 0.9338947256571345, "grad_norm": 0.893771767616272, "learning_rate": 3.423482594620525e-07, "loss": 0.077, "step": 52734 }, { "epoch": 0.9339124351941629, "grad_norm": 0.5367746949195862, "learning_rate": 3.421655182849387e-07, "loss": 0.0853, "step": 52735 }, { "epoch": 0.9339301447311914, "grad_norm": 0.5976148843765259, "learning_rate": 3.4198282533061896e-07, "loss": 0.0675, "step": 52736 }, { "epoch": 0.9339478542682198, "grad_norm": 0.5288389325141907, "learning_rate": 3.418001805996929e-07, "loss": 0.0294, "step": 52737 }, { "epoch": 0.9339655638052482, "grad_norm": 0.7167304158210754, "learning_rate": 3.416175840927616e-07, "loss": 0.0381, "step": 52738 }, { "epoch": 0.9339832733422766, "grad_norm": 1.5747421979904175, "learning_rate": 3.4143503581042456e-07, "loss": 0.0702, "step": 52739 }, { "epoch": 0.9340009828793051, "grad_norm": 0.5158734917640686, "learning_rate": 3.412525357532864e-07, "loss": 0.0434, "step": 52740 }, { "epoch": 0.9340186924163335, "grad_norm": 0.4192611575126648, "learning_rate": 3.410700839219433e-07, "loss": 0.0414, "step": 52741 }, { "epoch": 0.9340364019533619, "grad_norm": 0.39889273047447205, "learning_rate": 3.4088768031699634e-07, "loss": 0.0391, "step": 52742 }, { "epoch": 0.9340541114903904, "grad_norm": 0.5841314792633057, "learning_rate": 3.407053249390485e-07, "loss": 0.0508, "step": 52743 }, { "epoch": 0.9340718210274188, "grad_norm": 0.8896269202232361, "learning_rate": 3.405230177886942e-07, "loss": 0.0669, "step": 52744 }, { "epoch": 0.9340895305644472, "grad_norm": 0.37740904092788696, "learning_rate": 3.4034075886653804e-07, "loss": 0.0585, "step": 52745 }, { "epoch": 0.9341072401014756, "grad_norm": 0.3397360146045685, "learning_rate": 3.401585481731778e-07, "loss": 0.0651, "step": 52746 }, { "epoch": 0.9341249496385041, "grad_norm": 0.5221365094184875, "learning_rate": 3.399763857092147e-07, "loss": 0.0426, "step": 52747 }, { "epoch": 0.9341426591755325, "grad_norm": 0.49996915459632874, "learning_rate": 3.39794271475245e-07, "loss": 0.0661, "step": 52748 }, { "epoch": 0.9341603687125609, "grad_norm": 0.4991472363471985, "learning_rate": 3.396122054718681e-07, "loss": 0.0675, "step": 52749 }, { "epoch": 0.9341780782495893, "grad_norm": 0.59133380651474, "learning_rate": 3.394301876996869e-07, "loss": 0.046, "step": 52750 }, { "epoch": 0.9341957877866178, "grad_norm": 0.6003500819206238, "learning_rate": 3.3924821815929595e-07, "loss": 0.0576, "step": 52751 }, { "epoch": 0.9342134973236462, "grad_norm": 0.8816384077072144, "learning_rate": 3.390662968512964e-07, "loss": 0.0903, "step": 52752 }, { "epoch": 0.9342312068606746, "grad_norm": 0.6654621362686157, "learning_rate": 3.3888442377628614e-07, "loss": 0.0348, "step": 52753 }, { "epoch": 0.934248916397703, "grad_norm": 0.4268728196620941, "learning_rate": 3.3870259893486135e-07, "loss": 0.0225, "step": 52754 }, { "epoch": 0.9342666259347315, "grad_norm": 0.6829426288604736, "learning_rate": 3.3852082232762314e-07, "loss": 0.0674, "step": 52755 }, { "epoch": 0.9342843354717599, "grad_norm": 0.7423069477081299, "learning_rate": 3.383390939551678e-07, "loss": 0.0573, "step": 52756 }, { "epoch": 0.9343020450087883, "grad_norm": 0.2151138037443161, "learning_rate": 3.381574138180965e-07, "loss": 0.0361, "step": 52757 }, { "epoch": 0.9343197545458168, "grad_norm": 0.6654110550880432, "learning_rate": 3.3797578191700206e-07, "loss": 0.0442, "step": 52758 }, { "epoch": 0.9343374640828452, "grad_norm": 0.940950870513916, "learning_rate": 3.377941982524857e-07, "loss": 0.0495, "step": 52759 }, { "epoch": 0.9343551736198736, "grad_norm": 0.609103798866272, "learning_rate": 3.376126628251419e-07, "loss": 0.0523, "step": 52760 }, { "epoch": 0.934372883156902, "grad_norm": 0.4368979036808014, "learning_rate": 3.374311756355719e-07, "loss": 0.0404, "step": 52761 }, { "epoch": 0.9343905926939305, "grad_norm": 0.6261388659477234, "learning_rate": 3.3724973668436686e-07, "loss": 0.0546, "step": 52762 }, { "epoch": 0.9344083022309589, "grad_norm": 0.7283005714416504, "learning_rate": 3.3706834597212967e-07, "loss": 0.0716, "step": 52763 }, { "epoch": 0.9344260117679873, "grad_norm": 0.8016351461410522, "learning_rate": 3.368870034994531e-07, "loss": 0.0578, "step": 52764 }, { "epoch": 0.9344437213050157, "grad_norm": 0.30070579051971436, "learning_rate": 3.3670570926693513e-07, "loss": 0.0506, "step": 52765 }, { "epoch": 0.9344614308420442, "grad_norm": 0.46792304515838623, "learning_rate": 3.365244632751735e-07, "loss": 0.0456, "step": 52766 }, { "epoch": 0.9344791403790726, "grad_norm": 0.3345620930194855, "learning_rate": 3.363432655247628e-07, "loss": 0.027, "step": 52767 }, { "epoch": 0.934496849916101, "grad_norm": 0.5505942106246948, "learning_rate": 3.3616211601630084e-07, "loss": 0.0472, "step": 52768 }, { "epoch": 0.9345145594531294, "grad_norm": 0.47815844416618347, "learning_rate": 3.3598101475038055e-07, "loss": 0.0425, "step": 52769 }, { "epoch": 0.934532268990158, "grad_norm": 0.5301689505577087, "learning_rate": 3.3579996172759976e-07, "loss": 0.0588, "step": 52770 }, { "epoch": 0.9345499785271864, "grad_norm": 0.763697624206543, "learning_rate": 3.356189569485529e-07, "loss": 0.0489, "step": 52771 }, { "epoch": 0.9345676880642148, "grad_norm": 0.7468016147613525, "learning_rate": 3.3543800041383964e-07, "loss": 0.0506, "step": 52772 }, { "epoch": 0.9345853976012433, "grad_norm": 0.773331880569458, "learning_rate": 3.3525709212404775e-07, "loss": 0.0392, "step": 52773 }, { "epoch": 0.9346031071382717, "grad_norm": 0.8298646211624146, "learning_rate": 3.350762320797801e-07, "loss": 0.0655, "step": 52774 }, { "epoch": 0.9346208166753001, "grad_norm": 0.875860869884491, "learning_rate": 3.348954202816279e-07, "loss": 0.0639, "step": 52775 }, { "epoch": 0.9346385262123285, "grad_norm": 0.9717580080032349, "learning_rate": 3.3471465673018566e-07, "loss": 0.0677, "step": 52776 }, { "epoch": 0.934656235749357, "grad_norm": 0.30455878376960754, "learning_rate": 3.3453394142604955e-07, "loss": 0.0377, "step": 52777 }, { "epoch": 0.9346739452863854, "grad_norm": 0.5533920526504517, "learning_rate": 3.3435327436981246e-07, "loss": 0.0444, "step": 52778 }, { "epoch": 0.9346916548234138, "grad_norm": 0.5144806504249573, "learning_rate": 3.341726555620722e-07, "loss": 0.0482, "step": 52779 }, { "epoch": 0.9347093643604422, "grad_norm": 0.3811183571815491, "learning_rate": 3.339920850034184e-07, "loss": 0.0502, "step": 52780 }, { "epoch": 0.9347270738974707, "grad_norm": 0.3443973958492279, "learning_rate": 3.338115626944488e-07, "loss": 0.0517, "step": 52781 }, { "epoch": 0.9347447834344991, "grad_norm": 1.0017778873443604, "learning_rate": 3.33631088635753e-07, "loss": 0.0992, "step": 52782 }, { "epoch": 0.9347624929715275, "grad_norm": 0.6845927834510803, "learning_rate": 3.334506628279305e-07, "loss": 0.0776, "step": 52783 }, { "epoch": 0.9347802025085559, "grad_norm": 0.3597458004951477, "learning_rate": 3.3327028527156913e-07, "loss": 0.0469, "step": 52784 }, { "epoch": 0.9347979120455844, "grad_norm": 0.8123801350593567, "learning_rate": 3.3308995596726676e-07, "loss": 0.0636, "step": 52785 }, { "epoch": 0.9348156215826128, "grad_norm": 0.8679076433181763, "learning_rate": 3.329096749156163e-07, "loss": 0.053, "step": 52786 }, { "epoch": 0.9348333311196412, "grad_norm": 0.5086051225662231, "learning_rate": 3.3272944211720726e-07, "loss": 0.0639, "step": 52787 }, { "epoch": 0.9348510406566697, "grad_norm": 0.4361613988876343, "learning_rate": 3.325492575726358e-07, "loss": 0.051, "step": 52788 }, { "epoch": 0.9348687501936981, "grad_norm": 0.5287114977836609, "learning_rate": 3.3236912128249474e-07, "loss": 0.0575, "step": 52789 }, { "epoch": 0.9348864597307265, "grad_norm": 1.0015376806259155, "learning_rate": 3.321890332473737e-07, "loss": 0.0746, "step": 52790 }, { "epoch": 0.9349041692677549, "grad_norm": 0.6417990326881409, "learning_rate": 3.3200899346786883e-07, "loss": 0.063, "step": 52791 }, { "epoch": 0.9349218788047834, "grad_norm": 0.2501267194747925, "learning_rate": 3.3182900194456964e-07, "loss": 0.0371, "step": 52792 }, { "epoch": 0.9349395883418118, "grad_norm": 0.4381430745124817, "learning_rate": 3.3164905867806904e-07, "loss": 0.0579, "step": 52793 }, { "epoch": 0.9349572978788402, "grad_norm": 1.1180098056793213, "learning_rate": 3.3146916366895986e-07, "loss": 0.0691, "step": 52794 }, { "epoch": 0.9349750074158686, "grad_norm": 0.5243354439735413, "learning_rate": 3.3128931691783326e-07, "loss": 0.0647, "step": 52795 }, { "epoch": 0.9349927169528971, "grad_norm": 0.36236709356307983, "learning_rate": 3.311095184252821e-07, "loss": 0.0296, "step": 52796 }, { "epoch": 0.9350104264899255, "grad_norm": 0.7707884311676025, "learning_rate": 3.3092976819189766e-07, "loss": 0.0689, "step": 52797 }, { "epoch": 0.9350281360269539, "grad_norm": 0.7691377997398376, "learning_rate": 3.3075006621826943e-07, "loss": 0.0736, "step": 52798 }, { "epoch": 0.9350458455639823, "grad_norm": 0.2707783281803131, "learning_rate": 3.3057041250498854e-07, "loss": 0.0547, "step": 52799 }, { "epoch": 0.9350635551010108, "grad_norm": 0.35750454664230347, "learning_rate": 3.3039080705264955e-07, "loss": 0.0674, "step": 52800 }, { "epoch": 0.9350812646380392, "grad_norm": 0.39143243432044983, "learning_rate": 3.302112498618404e-07, "loss": 0.0503, "step": 52801 }, { "epoch": 0.9350989741750676, "grad_norm": 0.714778482913971, "learning_rate": 3.300317409331505e-07, "loss": 0.0619, "step": 52802 }, { "epoch": 0.9351166837120961, "grad_norm": 0.630715548992157, "learning_rate": 3.298522802671744e-07, "loss": 0.0862, "step": 52803 }, { "epoch": 0.9351343932491245, "grad_norm": 0.6092198491096497, "learning_rate": 3.296728678645e-07, "loss": 0.0606, "step": 52804 }, { "epoch": 0.9351521027861529, "grad_norm": 0.5849571824073792, "learning_rate": 3.2949350372571853e-07, "loss": 0.0572, "step": 52805 }, { "epoch": 0.9351698123231813, "grad_norm": 0.44145339727401733, "learning_rate": 3.2931418785141944e-07, "loss": 0.0596, "step": 52806 }, { "epoch": 0.9351875218602098, "grad_norm": 0.33290696144104004, "learning_rate": 3.291349202421939e-07, "loss": 0.0482, "step": 52807 }, { "epoch": 0.9352052313972382, "grad_norm": 0.9287997484207153, "learning_rate": 3.2895570089862823e-07, "loss": 0.0691, "step": 52808 }, { "epoch": 0.9352229409342666, "grad_norm": 0.625369668006897, "learning_rate": 3.2877652982131525e-07, "loss": 0.0468, "step": 52809 }, { "epoch": 0.935240650471295, "grad_norm": 0.5365417003631592, "learning_rate": 3.285974070108444e-07, "loss": 0.0501, "step": 52810 }, { "epoch": 0.9352583600083235, "grad_norm": 0.600121796131134, "learning_rate": 3.284183324678036e-07, "loss": 0.0496, "step": 52811 }, { "epoch": 0.9352760695453519, "grad_norm": 0.3224358856678009, "learning_rate": 3.282393061927824e-07, "loss": 0.0438, "step": 52812 }, { "epoch": 0.9352937790823803, "grad_norm": 0.5807640552520752, "learning_rate": 3.2806032818636865e-07, "loss": 0.0248, "step": 52813 }, { "epoch": 0.9353114886194087, "grad_norm": 0.478727251291275, "learning_rate": 3.278813984491552e-07, "loss": 0.0423, "step": 52814 }, { "epoch": 0.9353291981564372, "grad_norm": 0.6374425292015076, "learning_rate": 3.277025169817266e-07, "loss": 0.0617, "step": 52815 }, { "epoch": 0.9353469076934656, "grad_norm": 0.578629195690155, "learning_rate": 3.2752368378467235e-07, "loss": 0.0257, "step": 52816 }, { "epoch": 0.935364617230494, "grad_norm": 0.43375611305236816, "learning_rate": 3.27344898858582e-07, "loss": 0.07, "step": 52817 }, { "epoch": 0.9353823267675225, "grad_norm": 0.4841075539588928, "learning_rate": 3.271661622040434e-07, "loss": 0.039, "step": 52818 }, { "epoch": 0.9354000363045509, "grad_norm": 0.42470040917396545, "learning_rate": 3.2698747382164275e-07, "loss": 0.0521, "step": 52819 }, { "epoch": 0.9354177458415793, "grad_norm": 0.8021770715713501, "learning_rate": 3.268088337119696e-07, "loss": 0.0797, "step": 52820 }, { "epoch": 0.9354354553786077, "grad_norm": 0.7542093992233276, "learning_rate": 3.2663024187561007e-07, "loss": 0.0618, "step": 52821 }, { "epoch": 0.9354531649156362, "grad_norm": 0.6574246883392334, "learning_rate": 3.264516983131538e-07, "loss": 0.0743, "step": 52822 }, { "epoch": 0.9354708744526646, "grad_norm": 0.3542669713497162, "learning_rate": 3.262732030251886e-07, "loss": 0.035, "step": 52823 }, { "epoch": 0.935488583989693, "grad_norm": 0.5150747299194336, "learning_rate": 3.26094756012299e-07, "loss": 0.0367, "step": 52824 }, { "epoch": 0.9355062935267214, "grad_norm": 0.6148107051849365, "learning_rate": 3.259163572750745e-07, "loss": 0.0625, "step": 52825 }, { "epoch": 0.9355240030637499, "grad_norm": 0.3631219267845154, "learning_rate": 3.257380068140997e-07, "loss": 0.0357, "step": 52826 }, { "epoch": 0.9355417126007783, "grad_norm": 0.517718493938446, "learning_rate": 3.255597046299641e-07, "loss": 0.0287, "step": 52827 }, { "epoch": 0.9355594221378067, "grad_norm": 0.37897762656211853, "learning_rate": 3.253814507232522e-07, "loss": 0.0534, "step": 52828 }, { "epoch": 0.9355771316748351, "grad_norm": 0.2613771855831146, "learning_rate": 3.2520324509455026e-07, "loss": 0.0291, "step": 52829 }, { "epoch": 0.9355948412118636, "grad_norm": 0.6403154134750366, "learning_rate": 3.2502508774444605e-07, "loss": 0.0579, "step": 52830 }, { "epoch": 0.935612550748892, "grad_norm": 0.9945515990257263, "learning_rate": 3.248469786735242e-07, "loss": 0.0856, "step": 52831 }, { "epoch": 0.9356302602859204, "grad_norm": 0.5352607369422913, "learning_rate": 3.2466891788237254e-07, "loss": 0.0427, "step": 52832 }, { "epoch": 0.935647969822949, "grad_norm": 0.7325893640518188, "learning_rate": 3.2449090537157553e-07, "loss": 0.0501, "step": 52833 }, { "epoch": 0.9356656793599774, "grad_norm": 0.8436564207077026, "learning_rate": 3.243129411417195e-07, "loss": 0.0369, "step": 52834 }, { "epoch": 0.9356833888970058, "grad_norm": 1.2413629293441772, "learning_rate": 3.241350251933889e-07, "loss": 0.068, "step": 52835 }, { "epoch": 0.9357010984340342, "grad_norm": 0.5317454934120178, "learning_rate": 3.239571575271716e-07, "loss": 0.0507, "step": 52836 }, { "epoch": 0.9357188079710627, "grad_norm": 0.6268422603607178, "learning_rate": 3.237793381436488e-07, "loss": 0.0525, "step": 52837 }, { "epoch": 0.9357365175080911, "grad_norm": 0.6079674959182739, "learning_rate": 3.236015670434067e-07, "loss": 0.0355, "step": 52838 }, { "epoch": 0.9357542270451195, "grad_norm": 0.5617266893386841, "learning_rate": 3.234238442270332e-07, "loss": 0.0747, "step": 52839 }, { "epoch": 0.9357719365821479, "grad_norm": 0.5071214437484741, "learning_rate": 3.2324616969510945e-07, "loss": 0.0511, "step": 52840 }, { "epoch": 0.9357896461191764, "grad_norm": 0.6692012548446655, "learning_rate": 3.230685434482217e-07, "loss": 0.0474, "step": 52841 }, { "epoch": 0.9358073556562048, "grad_norm": 0.6499070525169373, "learning_rate": 3.2289096548695274e-07, "loss": 0.0513, "step": 52842 }, { "epoch": 0.9358250651932332, "grad_norm": 0.39895692467689514, "learning_rate": 3.227134358118905e-07, "loss": 0.0567, "step": 52843 }, { "epoch": 0.9358427747302616, "grad_norm": 0.4691073000431061, "learning_rate": 3.225359544236162e-07, "loss": 0.0604, "step": 52844 }, { "epoch": 0.9358604842672901, "grad_norm": 0.509326159954071, "learning_rate": 3.223585213227126e-07, "loss": 0.0752, "step": 52845 }, { "epoch": 0.9358781938043185, "grad_norm": 0.8206255435943604, "learning_rate": 3.22181136509766e-07, "loss": 0.0528, "step": 52846 }, { "epoch": 0.9358959033413469, "grad_norm": 0.6612269282341003, "learning_rate": 3.220037999853592e-07, "loss": 0.0467, "step": 52847 }, { "epoch": 0.9359136128783754, "grad_norm": 1.1382797956466675, "learning_rate": 3.2182651175007347e-07, "loss": 0.0614, "step": 52848 }, { "epoch": 0.9359313224154038, "grad_norm": 0.7440088391304016, "learning_rate": 3.21649271804495e-07, "loss": 0.0443, "step": 52849 }, { "epoch": 0.9359490319524322, "grad_norm": 0.4344409704208374, "learning_rate": 3.214720801492049e-07, "loss": 0.0322, "step": 52850 }, { "epoch": 0.9359667414894606, "grad_norm": 1.3669352531433105, "learning_rate": 3.2129493678478783e-07, "loss": 0.0738, "step": 52851 }, { "epoch": 0.9359844510264891, "grad_norm": 0.5763616561889648, "learning_rate": 3.2111784171182654e-07, "loss": 0.0541, "step": 52852 }, { "epoch": 0.9360021605635175, "grad_norm": 0.3730606138706207, "learning_rate": 3.209407949309023e-07, "loss": 0.0315, "step": 52853 }, { "epoch": 0.9360198701005459, "grad_norm": 0.5991912484169006, "learning_rate": 3.2076379644259804e-07, "loss": 0.0607, "step": 52854 }, { "epoch": 0.9360375796375743, "grad_norm": 0.5967766642570496, "learning_rate": 3.2058684624749644e-07, "loss": 0.0437, "step": 52855 }, { "epoch": 0.9360552891746028, "grad_norm": 0.52310711145401, "learning_rate": 3.204099443461789e-07, "loss": 0.0438, "step": 52856 }, { "epoch": 0.9360729987116312, "grad_norm": 0.718984842300415, "learning_rate": 3.2023309073922814e-07, "loss": 0.0548, "step": 52857 }, { "epoch": 0.9360907082486596, "grad_norm": 0.6902899146080017, "learning_rate": 3.200562854272254e-07, "loss": 0.0808, "step": 52858 }, { "epoch": 0.936108417785688, "grad_norm": 0.5231379270553589, "learning_rate": 3.198795284107536e-07, "loss": 0.0539, "step": 52859 }, { "epoch": 0.9361261273227165, "grad_norm": 0.551990270614624, "learning_rate": 3.197028196903923e-07, "loss": 0.0429, "step": 52860 }, { "epoch": 0.9361438368597449, "grad_norm": 0.7861149311065674, "learning_rate": 3.1952615926672426e-07, "loss": 0.0902, "step": 52861 }, { "epoch": 0.9361615463967733, "grad_norm": 0.5989391803741455, "learning_rate": 3.193495471403307e-07, "loss": 0.0682, "step": 52862 }, { "epoch": 0.9361792559338018, "grad_norm": 0.619819164276123, "learning_rate": 3.191729833117912e-07, "loss": 0.0474, "step": 52863 }, { "epoch": 0.9361969654708302, "grad_norm": 0.8012160658836365, "learning_rate": 3.189964677816887e-07, "loss": 0.0556, "step": 52864 }, { "epoch": 0.9362146750078586, "grad_norm": 0.726201593875885, "learning_rate": 3.188200005506026e-07, "loss": 0.0563, "step": 52865 }, { "epoch": 0.936232384544887, "grad_norm": 0.40715864300727844, "learning_rate": 3.1864358161911414e-07, "loss": 0.0648, "step": 52866 }, { "epoch": 0.9362500940819155, "grad_norm": 0.8672199249267578, "learning_rate": 3.1846721098780454e-07, "loss": 0.0718, "step": 52867 }, { "epoch": 0.9362678036189439, "grad_norm": 0.6380281448364258, "learning_rate": 3.1829088865725e-07, "loss": 0.0512, "step": 52868 }, { "epoch": 0.9362855131559723, "grad_norm": 0.8609520196914673, "learning_rate": 3.1811461462803336e-07, "loss": 0.0526, "step": 52869 }, { "epoch": 0.9363032226930007, "grad_norm": 0.7790198922157288, "learning_rate": 3.1793838890073756e-07, "loss": 0.0521, "step": 52870 }, { "epoch": 0.9363209322300292, "grad_norm": 0.592499852180481, "learning_rate": 3.1776221147593874e-07, "loss": 0.0409, "step": 52871 }, { "epoch": 0.9363386417670576, "grad_norm": 0.3777119219303131, "learning_rate": 3.175860823542165e-07, "loss": 0.0422, "step": 52872 }, { "epoch": 0.936356351304086, "grad_norm": 0.8160537481307983, "learning_rate": 3.174100015361503e-07, "loss": 0.0555, "step": 52873 }, { "epoch": 0.9363740608411144, "grad_norm": 0.31161361932754517, "learning_rate": 3.1723396902232137e-07, "loss": 0.0433, "step": 52874 }, { "epoch": 0.9363917703781429, "grad_norm": 0.8519728779792786, "learning_rate": 3.170579848133076e-07, "loss": 0.0779, "step": 52875 }, { "epoch": 0.9364094799151713, "grad_norm": 0.572420597076416, "learning_rate": 3.1688204890968684e-07, "loss": 0.0354, "step": 52876 }, { "epoch": 0.9364271894521997, "grad_norm": 0.7610859274864197, "learning_rate": 3.1670616131204035e-07, "loss": 0.0612, "step": 52877 }, { "epoch": 0.9364448989892282, "grad_norm": 0.3639846742153168, "learning_rate": 3.165303220209442e-07, "loss": 0.0523, "step": 52878 }, { "epoch": 0.9364626085262566, "grad_norm": 0.45864158868789673, "learning_rate": 3.163545310369781e-07, "loss": 0.0426, "step": 52879 }, { "epoch": 0.936480318063285, "grad_norm": 0.7838624715805054, "learning_rate": 3.1617878836072144e-07, "loss": 0.0654, "step": 52880 }, { "epoch": 0.9364980276003134, "grad_norm": 0.2599656581878662, "learning_rate": 3.1600309399275053e-07, "loss": 0.0437, "step": 52881 }, { "epoch": 0.9365157371373419, "grad_norm": 0.919273853302002, "learning_rate": 3.158274479336465e-07, "loss": 0.0541, "step": 52882 }, { "epoch": 0.9365334466743703, "grad_norm": 0.5309863686561584, "learning_rate": 3.156518501839822e-07, "loss": 0.0366, "step": 52883 }, { "epoch": 0.9365511562113987, "grad_norm": 0.5027696490287781, "learning_rate": 3.15476300744339e-07, "loss": 0.0397, "step": 52884 }, { "epoch": 0.9365688657484271, "grad_norm": 0.7598627209663391, "learning_rate": 3.153007996152946e-07, "loss": 0.0685, "step": 52885 }, { "epoch": 0.9365865752854556, "grad_norm": 0.9378980994224548, "learning_rate": 3.1512534679742355e-07, "loss": 0.0551, "step": 52886 }, { "epoch": 0.936604284822484, "grad_norm": 0.9227977395057678, "learning_rate": 3.149499422913055e-07, "loss": 0.0669, "step": 52887 }, { "epoch": 0.9366219943595124, "grad_norm": 0.6262799501419067, "learning_rate": 3.1477458609751654e-07, "loss": 0.0573, "step": 52888 }, { "epoch": 0.9366397038965408, "grad_norm": 0.717010498046875, "learning_rate": 3.145992782166346e-07, "loss": 0.0559, "step": 52889 }, { "epoch": 0.9366574134335693, "grad_norm": 0.7683833837509155, "learning_rate": 3.144240186492359e-07, "loss": 0.0433, "step": 52890 }, { "epoch": 0.9366751229705977, "grad_norm": 0.5289404392242432, "learning_rate": 3.142488073958949e-07, "loss": 0.0668, "step": 52891 }, { "epoch": 0.9366928325076261, "grad_norm": 0.4485286474227905, "learning_rate": 3.140736444571929e-07, "loss": 0.0379, "step": 52892 }, { "epoch": 0.9367105420446546, "grad_norm": 0.672265887260437, "learning_rate": 3.1389852983370105e-07, "loss": 0.0545, "step": 52893 }, { "epoch": 0.936728251581683, "grad_norm": 0.7830076813697815, "learning_rate": 3.137234635259989e-07, "loss": 0.0743, "step": 52894 }, { "epoch": 0.9367459611187114, "grad_norm": 0.5375023484230042, "learning_rate": 3.1354844553465934e-07, "loss": 0.0494, "step": 52895 }, { "epoch": 0.9367636706557398, "grad_norm": 0.8774081468582153, "learning_rate": 3.1337347586026187e-07, "loss": 0.0635, "step": 52896 }, { "epoch": 0.9367813801927684, "grad_norm": 0.4968189597129822, "learning_rate": 3.131985545033778e-07, "loss": 0.025, "step": 52897 }, { "epoch": 0.9367990897297968, "grad_norm": 0.6791654825210571, "learning_rate": 3.130236814645865e-07, "loss": 0.0604, "step": 52898 }, { "epoch": 0.9368167992668252, "grad_norm": 0.7301731705665588, "learning_rate": 3.128488567444643e-07, "loss": 0.0613, "step": 52899 }, { "epoch": 0.9368345088038535, "grad_norm": 0.4362671971321106, "learning_rate": 3.126740803435807e-07, "loss": 0.0467, "step": 52900 }, { "epoch": 0.9368522183408821, "grad_norm": 0.6911998987197876, "learning_rate": 3.1249935226251357e-07, "loss": 0.049, "step": 52901 }, { "epoch": 0.9368699278779105, "grad_norm": 0.7384824752807617, "learning_rate": 3.1232467250183916e-07, "loss": 0.051, "step": 52902 }, { "epoch": 0.9368876374149389, "grad_norm": 0.27408266067504883, "learning_rate": 3.1215004106213197e-07, "loss": 0.0268, "step": 52903 }, { "epoch": 0.9369053469519673, "grad_norm": 0.6153414249420166, "learning_rate": 3.1197545794396486e-07, "loss": 0.0642, "step": 52904 }, { "epoch": 0.9369230564889958, "grad_norm": 0.6743077635765076, "learning_rate": 3.118009231479124e-07, "loss": 0.0594, "step": 52905 }, { "epoch": 0.9369407660260242, "grad_norm": 0.8080425262451172, "learning_rate": 3.116264366745492e-07, "loss": 0.0521, "step": 52906 }, { "epoch": 0.9369584755630526, "grad_norm": 0.5128065347671509, "learning_rate": 3.1145199852444796e-07, "loss": 0.073, "step": 52907 }, { "epoch": 0.9369761851000811, "grad_norm": 0.3787071406841278, "learning_rate": 3.112776086981867e-07, "loss": 0.0375, "step": 52908 }, { "epoch": 0.9369938946371095, "grad_norm": 0.8459905385971069, "learning_rate": 3.1110326719633484e-07, "loss": 0.0576, "step": 52909 }, { "epoch": 0.9370116041741379, "grad_norm": 0.5609033107757568, "learning_rate": 3.109289740194687e-07, "loss": 0.0539, "step": 52910 }, { "epoch": 0.9370293137111663, "grad_norm": 0.7345889806747437, "learning_rate": 3.107547291681595e-07, "loss": 0.0842, "step": 52911 }, { "epoch": 0.9370470232481948, "grad_norm": 0.2531670033931732, "learning_rate": 3.105805326429817e-07, "loss": 0.0306, "step": 52912 }, { "epoch": 0.9370647327852232, "grad_norm": 0.6783785223960876, "learning_rate": 3.1040638444450817e-07, "loss": 0.0526, "step": 52913 }, { "epoch": 0.9370824423222516, "grad_norm": 0.6230100989341736, "learning_rate": 3.1023228457331353e-07, "loss": 0.0639, "step": 52914 }, { "epoch": 0.93710015185928, "grad_norm": 0.9729108810424805, "learning_rate": 3.100582330299673e-07, "loss": 0.0652, "step": 52915 }, { "epoch": 0.9371178613963085, "grad_norm": 0.6195590496063232, "learning_rate": 3.0988422981504227e-07, "loss": 0.0661, "step": 52916 }, { "epoch": 0.9371355709333369, "grad_norm": 0.15802407264709473, "learning_rate": 3.0971027492911475e-07, "loss": 0.0332, "step": 52917 }, { "epoch": 0.9371532804703653, "grad_norm": 0.625127375125885, "learning_rate": 3.095363683727526e-07, "loss": 0.0684, "step": 52918 }, { "epoch": 0.9371709900073937, "grad_norm": 0.37536701560020447, "learning_rate": 3.0936251014653025e-07, "loss": 0.0448, "step": 52919 }, { "epoch": 0.9371886995444222, "grad_norm": 0.557094395160675, "learning_rate": 3.0918870025102073e-07, "loss": 0.0656, "step": 52920 }, { "epoch": 0.9372064090814506, "grad_norm": 0.9616273045539856, "learning_rate": 3.090149386867952e-07, "loss": 0.073, "step": 52921 }, { "epoch": 0.937224118618479, "grad_norm": 0.6794387698173523, "learning_rate": 3.0884122545442317e-07, "loss": 0.0654, "step": 52922 }, { "epoch": 0.9372418281555075, "grad_norm": 0.45093387365341187, "learning_rate": 3.086675605544775e-07, "loss": 0.0392, "step": 52923 }, { "epoch": 0.9372595376925359, "grad_norm": 0.1786866933107376, "learning_rate": 3.084939439875312e-07, "loss": 0.0289, "step": 52924 }, { "epoch": 0.9372772472295643, "grad_norm": 0.7506381869316101, "learning_rate": 3.0832037575415195e-07, "loss": 0.0561, "step": 52925 }, { "epoch": 0.9372949567665927, "grad_norm": 0.27409982681274414, "learning_rate": 3.081468558549111e-07, "loss": 0.0498, "step": 52926 }, { "epoch": 0.9373126663036212, "grad_norm": 0.4769093096256256, "learning_rate": 3.0797338429038313e-07, "loss": 0.0557, "step": 52927 }, { "epoch": 0.9373303758406496, "grad_norm": 0.5897948145866394, "learning_rate": 3.0779996106113763e-07, "loss": 0.0567, "step": 52928 }, { "epoch": 0.937348085377678, "grad_norm": 0.5795590281486511, "learning_rate": 3.076265861677441e-07, "loss": 0.0435, "step": 52929 }, { "epoch": 0.9373657949147064, "grad_norm": 0.7179215550422668, "learning_rate": 3.074532596107721e-07, "loss": 0.068, "step": 52930 }, { "epoch": 0.9373835044517349, "grad_norm": 0.6030864715576172, "learning_rate": 3.072799813907945e-07, "loss": 0.0474, "step": 52931 }, { "epoch": 0.9374012139887633, "grad_norm": 0.8755555152893066, "learning_rate": 3.0710675150837755e-07, "loss": 0.0588, "step": 52932 }, { "epoch": 0.9374189235257917, "grad_norm": 0.522813618183136, "learning_rate": 3.0693356996409405e-07, "loss": 0.0379, "step": 52933 }, { "epoch": 0.9374366330628201, "grad_norm": 0.1740579903125763, "learning_rate": 3.0676043675851194e-07, "loss": 0.0554, "step": 52934 }, { "epoch": 0.9374543425998486, "grad_norm": 0.30879613757133484, "learning_rate": 3.065873518922041e-07, "loss": 0.0573, "step": 52935 }, { "epoch": 0.937472052136877, "grad_norm": 0.7025598883628845, "learning_rate": 3.0641431536573505e-07, "loss": 0.0605, "step": 52936 }, { "epoch": 0.9374897616739054, "grad_norm": 0.5835527181625366, "learning_rate": 3.062413271796777e-07, "loss": 0.0434, "step": 52937 }, { "epoch": 0.9375074712109339, "grad_norm": 0.5622405409812927, "learning_rate": 3.0606838733460317e-07, "loss": 0.0423, "step": 52938 }, { "epoch": 0.9375251807479623, "grad_norm": 0.5038002133369446, "learning_rate": 3.0589549583107445e-07, "loss": 0.0587, "step": 52939 }, { "epoch": 0.9375428902849907, "grad_norm": 0.7045947909355164, "learning_rate": 3.057226526696627e-07, "loss": 0.0428, "step": 52940 }, { "epoch": 0.9375605998220191, "grad_norm": 0.7491433024406433, "learning_rate": 3.0554985785093914e-07, "loss": 0.061, "step": 52941 }, { "epoch": 0.9375783093590476, "grad_norm": 0.557259738445282, "learning_rate": 3.0537711137546996e-07, "loss": 0.0522, "step": 52942 }, { "epoch": 0.937596018896076, "grad_norm": 0.3513893783092499, "learning_rate": 3.0520441324382307e-07, "loss": 0.0664, "step": 52943 }, { "epoch": 0.9376137284331044, "grad_norm": 0.33853548765182495, "learning_rate": 3.0503176345656803e-07, "loss": 0.0696, "step": 52944 }, { "epoch": 0.9376314379701328, "grad_norm": 0.881261944770813, "learning_rate": 3.04859162014271e-07, "loss": 0.0709, "step": 52945 }, { "epoch": 0.9376491475071613, "grad_norm": 0.6766186952590942, "learning_rate": 3.046866089175016e-07, "loss": 0.0576, "step": 52946 }, { "epoch": 0.9376668570441897, "grad_norm": 0.5418056845664978, "learning_rate": 3.0451410416682756e-07, "loss": 0.055, "step": 52947 }, { "epoch": 0.9376845665812181, "grad_norm": 0.7133521437644958, "learning_rate": 3.0434164776281524e-07, "loss": 0.0455, "step": 52948 }, { "epoch": 0.9377022761182465, "grad_norm": 0.502336323261261, "learning_rate": 3.041692397060325e-07, "loss": 0.041, "step": 52949 }, { "epoch": 0.937719985655275, "grad_norm": 0.4811071753501892, "learning_rate": 3.0399687999704717e-07, "loss": 0.067, "step": 52950 }, { "epoch": 0.9377376951923034, "grad_norm": 0.3853170871734619, "learning_rate": 3.038245686364255e-07, "loss": 0.0344, "step": 52951 }, { "epoch": 0.9377554047293318, "grad_norm": 0.7802589535713196, "learning_rate": 3.036523056247337e-07, "loss": 0.039, "step": 52952 }, { "epoch": 0.9377731142663603, "grad_norm": 0.29428860545158386, "learning_rate": 3.0348009096254127e-07, "loss": 0.0492, "step": 52953 }, { "epoch": 0.9377908238033887, "grad_norm": 0.8489664793014526, "learning_rate": 3.033079246504111e-07, "loss": 0.0481, "step": 52954 }, { "epoch": 0.9378085333404171, "grad_norm": 1.1400928497314453, "learning_rate": 3.031358066889095e-07, "loss": 0.0665, "step": 52955 }, { "epoch": 0.9378262428774455, "grad_norm": 0.8490676283836365, "learning_rate": 3.029637370786093e-07, "loss": 0.0436, "step": 52956 }, { "epoch": 0.937843952414474, "grad_norm": 0.565746009349823, "learning_rate": 3.0279171582006824e-07, "loss": 0.0427, "step": 52957 }, { "epoch": 0.9378616619515024, "grad_norm": 0.7874495983123779, "learning_rate": 3.0261974291385777e-07, "loss": 0.0641, "step": 52958 }, { "epoch": 0.9378793714885308, "grad_norm": 0.7898041009902954, "learning_rate": 3.0244781836054056e-07, "loss": 0.0853, "step": 52959 }, { "epoch": 0.9378970810255592, "grad_norm": 0.15377134084701538, "learning_rate": 3.0227594216068467e-07, "loss": 0.0359, "step": 52960 }, { "epoch": 0.9379147905625878, "grad_norm": 0.7290698289871216, "learning_rate": 3.021041143148545e-07, "loss": 0.0579, "step": 52961 }, { "epoch": 0.9379325000996162, "grad_norm": 0.3898449242115021, "learning_rate": 3.0193233482361307e-07, "loss": 0.0561, "step": 52962 }, { "epoch": 0.9379502096366445, "grad_norm": 0.5183236002922058, "learning_rate": 3.017606036875298e-07, "loss": 0.0378, "step": 52963 }, { "epoch": 0.937967919173673, "grad_norm": 0.5869367718696594, "learning_rate": 3.015889209071659e-07, "loss": 0.0526, "step": 52964 }, { "epoch": 0.9379856287107015, "grad_norm": 0.7604442834854126, "learning_rate": 3.0141728648308773e-07, "loss": 0.0661, "step": 52965 }, { "epoch": 0.9380033382477299, "grad_norm": 0.6950852870941162, "learning_rate": 3.012457004158614e-07, "loss": 0.0728, "step": 52966 }, { "epoch": 0.9380210477847583, "grad_norm": 0.7228584289550781, "learning_rate": 3.010741627060515e-07, "loss": 0.0609, "step": 52967 }, { "epoch": 0.9380387573217868, "grad_norm": 0.4287872314453125, "learning_rate": 3.009026733542175e-07, "loss": 0.0496, "step": 52968 }, { "epoch": 0.9380564668588152, "grad_norm": 0.6242888569831848, "learning_rate": 3.0073123236092904e-07, "loss": 0.0303, "step": 52969 }, { "epoch": 0.9380741763958436, "grad_norm": 0.546919047832489, "learning_rate": 3.0055983972674727e-07, "loss": 0.0543, "step": 52970 }, { "epoch": 0.938091885932872, "grad_norm": 0.9559131264686584, "learning_rate": 3.0038849545223846e-07, "loss": 0.0704, "step": 52971 }, { "epoch": 0.9381095954699005, "grad_norm": 0.8156539797782898, "learning_rate": 3.0021719953796214e-07, "loss": 0.0729, "step": 52972 }, { "epoch": 0.9381273050069289, "grad_norm": 0.42442694306373596, "learning_rate": 3.0004595198448614e-07, "loss": 0.0749, "step": 52973 }, { "epoch": 0.9381450145439573, "grad_norm": 0.7812365293502808, "learning_rate": 2.998747527923701e-07, "loss": 0.0549, "step": 52974 }, { "epoch": 0.9381627240809857, "grad_norm": 0.7295936942100525, "learning_rate": 2.9970360196218184e-07, "loss": 0.0462, "step": 52975 }, { "epoch": 0.9381804336180142, "grad_norm": 0.5499519109725952, "learning_rate": 2.9953249949447925e-07, "loss": 0.0589, "step": 52976 }, { "epoch": 0.9381981431550426, "grad_norm": 0.4247666895389557, "learning_rate": 2.9936144538983024e-07, "loss": 0.0663, "step": 52977 }, { "epoch": 0.938215852692071, "grad_norm": 0.624750554561615, "learning_rate": 2.99190439648796e-07, "loss": 0.0407, "step": 52978 }, { "epoch": 0.9382335622290994, "grad_norm": 0.9002726674079895, "learning_rate": 2.990194822719361e-07, "loss": 0.0439, "step": 52979 }, { "epoch": 0.9382512717661279, "grad_norm": 0.553883969783783, "learning_rate": 2.988485732598151e-07, "loss": 0.0445, "step": 52980 }, { "epoch": 0.9382689813031563, "grad_norm": 0.4200381934642792, "learning_rate": 2.9867771261299746e-07, "loss": 0.0423, "step": 52981 }, { "epoch": 0.9382866908401847, "grad_norm": 0.6144787669181824, "learning_rate": 2.985069003320412e-07, "loss": 0.0485, "step": 52982 }, { "epoch": 0.9383044003772132, "grad_norm": 0.5464800596237183, "learning_rate": 2.9833613641751077e-07, "loss": 0.056, "step": 52983 }, { "epoch": 0.9383221099142416, "grad_norm": 0.729469358921051, "learning_rate": 2.9816542086996577e-07, "loss": 0.0415, "step": 52984 }, { "epoch": 0.93833981945127, "grad_norm": 0.8249973654747009, "learning_rate": 2.979947536899724e-07, "loss": 0.0735, "step": 52985 }, { "epoch": 0.9383575289882984, "grad_norm": 0.49673566222190857, "learning_rate": 2.978241348780886e-07, "loss": 0.0404, "step": 52986 }, { "epoch": 0.9383752385253269, "grad_norm": 0.3607100546360016, "learning_rate": 2.976535644348755e-07, "loss": 0.0606, "step": 52987 }, { "epoch": 0.9383929480623553, "grad_norm": 0.5451846122741699, "learning_rate": 2.974830423608976e-07, "loss": 0.049, "step": 52988 }, { "epoch": 0.9384106575993837, "grad_norm": 0.5018711686134338, "learning_rate": 2.973125686567113e-07, "loss": 0.0492, "step": 52989 }, { "epoch": 0.9384283671364121, "grad_norm": 0.589795708656311, "learning_rate": 2.9714214332287934e-07, "loss": 0.0823, "step": 52990 }, { "epoch": 0.9384460766734406, "grad_norm": 0.6093077659606934, "learning_rate": 2.9697176635996305e-07, "loss": 0.0668, "step": 52991 }, { "epoch": 0.938463786210469, "grad_norm": 0.5221249461174011, "learning_rate": 2.9680143776852187e-07, "loss": 0.0427, "step": 52992 }, { "epoch": 0.9384814957474974, "grad_norm": 0.3300619125366211, "learning_rate": 2.9663115754911705e-07, "loss": 0.0551, "step": 52993 }, { "epoch": 0.9384992052845258, "grad_norm": 0.9332820773124695, "learning_rate": 2.964609257023082e-07, "loss": 0.0635, "step": 52994 }, { "epoch": 0.9385169148215543, "grad_norm": 0.410512775182724, "learning_rate": 2.9629074222865814e-07, "loss": 0.063, "step": 52995 }, { "epoch": 0.9385346243585827, "grad_norm": 0.34167179465293884, "learning_rate": 2.961206071287215e-07, "loss": 0.0469, "step": 52996 }, { "epoch": 0.9385523338956111, "grad_norm": 0.3260292410850525, "learning_rate": 2.9595052040306267e-07, "loss": 0.0237, "step": 52997 }, { "epoch": 0.9385700434326396, "grad_norm": 0.577324628829956, "learning_rate": 2.957804820522381e-07, "loss": 0.049, "step": 52998 }, { "epoch": 0.938587752969668, "grad_norm": 0.6404862999916077, "learning_rate": 2.9561049207681045e-07, "loss": 0.0342, "step": 52999 }, { "epoch": 0.9386054625066964, "grad_norm": 0.4326699376106262, "learning_rate": 2.954405504773361e-07, "loss": 0.0363, "step": 53000 }, { "epoch": 0.9386231720437248, "grad_norm": 0.6639801859855652, "learning_rate": 2.952706572543745e-07, "loss": 0.0532, "step": 53001 }, { "epoch": 0.9386408815807533, "grad_norm": 0.4861240088939667, "learning_rate": 2.951008124084853e-07, "loss": 0.055, "step": 53002 }, { "epoch": 0.9386585911177817, "grad_norm": 0.596803605556488, "learning_rate": 2.949310159402263e-07, "loss": 0.0222, "step": 53003 }, { "epoch": 0.9386763006548101, "grad_norm": 0.9497172832489014, "learning_rate": 2.947612678501571e-07, "loss": 0.0716, "step": 53004 }, { "epoch": 0.9386940101918385, "grad_norm": 0.3011549413204193, "learning_rate": 2.9459156813883724e-07, "loss": 0.0518, "step": 53005 }, { "epoch": 0.938711719728867, "grad_norm": 0.7209103107452393, "learning_rate": 2.9442191680682294e-07, "loss": 0.0478, "step": 53006 }, { "epoch": 0.9387294292658954, "grad_norm": 1.0404932498931885, "learning_rate": 2.9425231385467377e-07, "loss": 0.0991, "step": 53007 }, { "epoch": 0.9387471388029238, "grad_norm": 0.23830538988113403, "learning_rate": 2.940827592829459e-07, "loss": 0.0397, "step": 53008 }, { "epoch": 0.9387648483399522, "grad_norm": 0.9062669277191162, "learning_rate": 2.9391325309219896e-07, "loss": 0.0558, "step": 53009 }, { "epoch": 0.9387825578769807, "grad_norm": 0.11865098774433136, "learning_rate": 2.937437952829924e-07, "loss": 0.0301, "step": 53010 }, { "epoch": 0.9388002674140091, "grad_norm": 0.7545549273490906, "learning_rate": 2.935743858558793e-07, "loss": 0.0667, "step": 53011 }, { "epoch": 0.9388179769510375, "grad_norm": 0.5182685256004333, "learning_rate": 2.934050248114173e-07, "loss": 0.0401, "step": 53012 }, { "epoch": 0.938835686488066, "grad_norm": 0.6507366299629211, "learning_rate": 2.932357121501678e-07, "loss": 0.0763, "step": 53013 }, { "epoch": 0.9388533960250944, "grad_norm": 0.779720664024353, "learning_rate": 2.9306644787268353e-07, "loss": 0.0513, "step": 53014 }, { "epoch": 0.9388711055621228, "grad_norm": 0.43649500608444214, "learning_rate": 2.9289723197952256e-07, "loss": 0.0365, "step": 53015 }, { "epoch": 0.9388888150991512, "grad_norm": 0.7260960936546326, "learning_rate": 2.9272806447124436e-07, "loss": 0.0587, "step": 53016 }, { "epoch": 0.9389065246361797, "grad_norm": 0.5426547527313232, "learning_rate": 2.9255894534840345e-07, "loss": 0.0529, "step": 53017 }, { "epoch": 0.9389242341732081, "grad_norm": 0.6030489206314087, "learning_rate": 2.923898746115544e-07, "loss": 0.07, "step": 53018 }, { "epoch": 0.9389419437102365, "grad_norm": 0.8961561322212219, "learning_rate": 2.9222085226125516e-07, "loss": 0.0464, "step": 53019 }, { "epoch": 0.9389596532472649, "grad_norm": 0.7965514659881592, "learning_rate": 2.9205187829806354e-07, "loss": 0.061, "step": 53020 }, { "epoch": 0.9389773627842934, "grad_norm": 0.33689308166503906, "learning_rate": 2.9188295272253086e-07, "loss": 0.0352, "step": 53021 }, { "epoch": 0.9389950723213218, "grad_norm": 0.6597983241081238, "learning_rate": 2.917140755352166e-07, "loss": 0.0549, "step": 53022 }, { "epoch": 0.9390127818583502, "grad_norm": 0.513307511806488, "learning_rate": 2.9154524673667525e-07, "loss": 0.0519, "step": 53023 }, { "epoch": 0.9390304913953786, "grad_norm": 0.6285771727561951, "learning_rate": 2.9137646632746484e-07, "loss": 0.0591, "step": 53024 }, { "epoch": 0.9390482009324072, "grad_norm": 0.6531983613967896, "learning_rate": 2.912077343081365e-07, "loss": 0.064, "step": 53025 }, { "epoch": 0.9390659104694355, "grad_norm": 0.907795250415802, "learning_rate": 2.910390506792465e-07, "loss": 0.0665, "step": 53026 }, { "epoch": 0.939083620006464, "grad_norm": 0.8904793858528137, "learning_rate": 2.9087041544135105e-07, "loss": 0.0767, "step": 53027 }, { "epoch": 0.9391013295434925, "grad_norm": 0.6653880476951599, "learning_rate": 2.9070182859500305e-07, "loss": 0.0506, "step": 53028 }, { "epoch": 0.9391190390805209, "grad_norm": 0.612235963344574, "learning_rate": 2.905332901407587e-07, "loss": 0.0737, "step": 53029 }, { "epoch": 0.9391367486175493, "grad_norm": 0.5068774223327637, "learning_rate": 2.9036480007917255e-07, "loss": 0.0605, "step": 53030 }, { "epoch": 0.9391544581545777, "grad_norm": 0.24929594993591309, "learning_rate": 2.901963584107975e-07, "loss": 0.0488, "step": 53031 }, { "epoch": 0.9391721676916062, "grad_norm": 0.48102474212646484, "learning_rate": 2.900279651361898e-07, "loss": 0.0445, "step": 53032 }, { "epoch": 0.9391898772286346, "grad_norm": 0.5980851054191589, "learning_rate": 2.898596202559023e-07, "loss": 0.0581, "step": 53033 }, { "epoch": 0.939207586765663, "grad_norm": 0.2680261731147766, "learning_rate": 2.896913237704879e-07, "loss": 0.0423, "step": 53034 }, { "epoch": 0.9392252963026914, "grad_norm": 0.6731177568435669, "learning_rate": 2.8952307568050283e-07, "loss": 0.0554, "step": 53035 }, { "epoch": 0.9392430058397199, "grad_norm": 0.4348765015602112, "learning_rate": 2.893548759864967e-07, "loss": 0.0332, "step": 53036 }, { "epoch": 0.9392607153767483, "grad_norm": 0.4459305703639984, "learning_rate": 2.891867246890256e-07, "loss": 0.0255, "step": 53037 }, { "epoch": 0.9392784249137767, "grad_norm": 0.6383624076843262, "learning_rate": 2.8901862178864427e-07, "loss": 0.0446, "step": 53038 }, { "epoch": 0.9392961344508051, "grad_norm": 0.675747811794281, "learning_rate": 2.888505672859021e-07, "loss": 0.0803, "step": 53039 }, { "epoch": 0.9393138439878336, "grad_norm": 0.6241121292114258, "learning_rate": 2.886825611813537e-07, "loss": 0.0439, "step": 53040 }, { "epoch": 0.939331553524862, "grad_norm": 0.5767930150032043, "learning_rate": 2.8851460347555204e-07, "loss": 0.0414, "step": 53041 }, { "epoch": 0.9393492630618904, "grad_norm": 0.6531241536140442, "learning_rate": 2.883466941690482e-07, "loss": 0.0842, "step": 53042 }, { "epoch": 0.9393669725989189, "grad_norm": 0.6850525736808777, "learning_rate": 2.881788332623969e-07, "loss": 0.0586, "step": 53043 }, { "epoch": 0.9393846821359473, "grad_norm": 0.5266762375831604, "learning_rate": 2.8801102075614927e-07, "loss": 0.0571, "step": 53044 }, { "epoch": 0.9394023916729757, "grad_norm": 0.4881921112537384, "learning_rate": 2.878432566508582e-07, "loss": 0.0449, "step": 53045 }, { "epoch": 0.9394201012100041, "grad_norm": 0.39937782287597656, "learning_rate": 2.8767554094707327e-07, "loss": 0.0342, "step": 53046 }, { "epoch": 0.9394378107470326, "grad_norm": 0.509734034538269, "learning_rate": 2.875078736453474e-07, "loss": 0.0601, "step": 53047 }, { "epoch": 0.939455520284061, "grad_norm": 0.46916303038597107, "learning_rate": 2.873402547462334e-07, "loss": 0.0409, "step": 53048 }, { "epoch": 0.9394732298210894, "grad_norm": 0.5165058374404907, "learning_rate": 2.8717268425028256e-07, "loss": 0.0589, "step": 53049 }, { "epoch": 0.9394909393581178, "grad_norm": 0.494322806596756, "learning_rate": 2.870051621580444e-07, "loss": 0.068, "step": 53050 }, { "epoch": 0.9395086488951463, "grad_norm": 0.5024682283401489, "learning_rate": 2.8683768847007017e-07, "loss": 0.0504, "step": 53051 }, { "epoch": 0.9395263584321747, "grad_norm": 0.5814605951309204, "learning_rate": 2.8667026318691446e-07, "loss": 0.0665, "step": 53052 }, { "epoch": 0.9395440679692031, "grad_norm": 0.2595466375350952, "learning_rate": 2.865028863091235e-07, "loss": 0.0374, "step": 53053 }, { "epoch": 0.9395617775062315, "grad_norm": 0.8979823589324951, "learning_rate": 2.8633555783725007e-07, "loss": 0.0427, "step": 53054 }, { "epoch": 0.93957948704326, "grad_norm": 0.466383159160614, "learning_rate": 2.8616827777184387e-07, "loss": 0.0668, "step": 53055 }, { "epoch": 0.9395971965802884, "grad_norm": 0.4968687891960144, "learning_rate": 2.8600104611345766e-07, "loss": 0.0455, "step": 53056 }, { "epoch": 0.9396149061173168, "grad_norm": 0.8352411389350891, "learning_rate": 2.8583386286263947e-07, "loss": 0.0509, "step": 53057 }, { "epoch": 0.9396326156543453, "grad_norm": 0.4524264931678772, "learning_rate": 2.8566672801993876e-07, "loss": 0.0367, "step": 53058 }, { "epoch": 0.9396503251913737, "grad_norm": 0.4683888852596283, "learning_rate": 2.8549964158590516e-07, "loss": 0.0469, "step": 53059 }, { "epoch": 0.9396680347284021, "grad_norm": 0.38304558396339417, "learning_rate": 2.8533260356109145e-07, "loss": 0.0356, "step": 53060 }, { "epoch": 0.9396857442654305, "grad_norm": 0.5094757080078125, "learning_rate": 2.8516561394604403e-07, "loss": 0.0663, "step": 53061 }, { "epoch": 0.939703453802459, "grad_norm": 0.582271158695221, "learning_rate": 2.84998672741314e-07, "loss": 0.0388, "step": 53062 }, { "epoch": 0.9397211633394874, "grad_norm": 0.630204975605011, "learning_rate": 2.848317799474509e-07, "loss": 0.0734, "step": 53063 }, { "epoch": 0.9397388728765158, "grad_norm": 0.30442777276039124, "learning_rate": 2.8466493556500106e-07, "loss": 0.0437, "step": 53064 }, { "epoch": 0.9397565824135442, "grad_norm": 0.6010584831237793, "learning_rate": 2.8449813959451566e-07, "loss": 0.0618, "step": 53065 }, { "epoch": 0.9397742919505727, "grad_norm": 0.7937980890274048, "learning_rate": 2.8433139203654426e-07, "loss": 0.0625, "step": 53066 }, { "epoch": 0.9397920014876011, "grad_norm": 0.8133076429367065, "learning_rate": 2.8416469289163304e-07, "loss": 0.056, "step": 53067 }, { "epoch": 0.9398097110246295, "grad_norm": 0.3696237802505493, "learning_rate": 2.8399804216033163e-07, "loss": 0.0536, "step": 53068 }, { "epoch": 0.939827420561658, "grad_norm": 0.37837404012680054, "learning_rate": 2.8383143984318795e-07, "loss": 0.0312, "step": 53069 }, { "epoch": 0.9398451300986864, "grad_norm": 0.5693193674087524, "learning_rate": 2.8366488594075145e-07, "loss": 0.0336, "step": 53070 }, { "epoch": 0.9398628396357148, "grad_norm": 0.4502609372138977, "learning_rate": 2.834983804535685e-07, "loss": 0.0324, "step": 53071 }, { "epoch": 0.9398805491727432, "grad_norm": 0.30280664563179016, "learning_rate": 2.8333192338218685e-07, "loss": 0.048, "step": 53072 }, { "epoch": 0.9398982587097717, "grad_norm": 0.7087447047233582, "learning_rate": 2.831655147271561e-07, "loss": 0.0426, "step": 53073 }, { "epoch": 0.9399159682468001, "grad_norm": 0.6107736229896545, "learning_rate": 2.829991544890209e-07, "loss": 0.0421, "step": 53074 }, { "epoch": 0.9399336777838285, "grad_norm": 0.4910055100917816, "learning_rate": 2.828328426683291e-07, "loss": 0.0602, "step": 53075 }, { "epoch": 0.9399513873208569, "grad_norm": 0.5861549973487854, "learning_rate": 2.8266657926563023e-07, "loss": 0.0575, "step": 53076 }, { "epoch": 0.9399690968578854, "grad_norm": 0.6805272698402405, "learning_rate": 2.825003642814705e-07, "loss": 0.0508, "step": 53077 }, { "epoch": 0.9399868063949138, "grad_norm": 0.5369800925254822, "learning_rate": 2.8233419771639457e-07, "loss": 0.0378, "step": 53078 }, { "epoch": 0.9400045159319422, "grad_norm": 0.6533229351043701, "learning_rate": 2.8216807957095025e-07, "loss": 0.0401, "step": 53079 }, { "epoch": 0.9400222254689706, "grad_norm": 0.5771180987358093, "learning_rate": 2.8200200984568383e-07, "loss": 0.0422, "step": 53080 }, { "epoch": 0.9400399350059991, "grad_norm": 0.46236181259155273, "learning_rate": 2.818359885411448e-07, "loss": 0.0557, "step": 53081 }, { "epoch": 0.9400576445430275, "grad_norm": 0.3712458908557892, "learning_rate": 2.816700156578744e-07, "loss": 0.0405, "step": 53082 }, { "epoch": 0.9400753540800559, "grad_norm": 0.40551522374153137, "learning_rate": 2.8150409119642227e-07, "loss": 0.0503, "step": 53083 }, { "epoch": 0.9400930636170844, "grad_norm": 0.39805570244789124, "learning_rate": 2.813382151573329e-07, "loss": 0.0266, "step": 53084 }, { "epoch": 0.9401107731541128, "grad_norm": 0.5735127329826355, "learning_rate": 2.811723875411526e-07, "loss": 0.0402, "step": 53085 }, { "epoch": 0.9401284826911412, "grad_norm": 0.5099251866340637, "learning_rate": 2.810066083484242e-07, "loss": 0.0479, "step": 53086 }, { "epoch": 0.9401461922281696, "grad_norm": 0.32486188411712646, "learning_rate": 2.8084087757969725e-07, "loss": 0.0518, "step": 53087 }, { "epoch": 0.9401639017651982, "grad_norm": 0.8693501353263855, "learning_rate": 2.8067519523551464e-07, "loss": 0.05, "step": 53088 }, { "epoch": 0.9401816113022265, "grad_norm": 0.7735651731491089, "learning_rate": 2.8050956131642103e-07, "loss": 0.0603, "step": 53089 }, { "epoch": 0.940199320839255, "grad_norm": 0.4246765375137329, "learning_rate": 2.8034397582296255e-07, "loss": 0.0413, "step": 53090 }, { "epoch": 0.9402170303762833, "grad_norm": 0.8608455061912537, "learning_rate": 2.8017843875568384e-07, "loss": 0.0624, "step": 53091 }, { "epoch": 0.9402347399133119, "grad_norm": 0.6329243779182434, "learning_rate": 2.800129501151294e-07, "loss": 0.0638, "step": 53092 }, { "epoch": 0.9402524494503403, "grad_norm": 0.6261668801307678, "learning_rate": 2.7984750990184384e-07, "loss": 0.0748, "step": 53093 }, { "epoch": 0.9402701589873687, "grad_norm": 0.7082064747810364, "learning_rate": 2.7968211811637e-07, "loss": 0.0575, "step": 53094 }, { "epoch": 0.940287868524397, "grad_norm": 0.5496521592140198, "learning_rate": 2.795167747592559e-07, "loss": 0.0486, "step": 53095 }, { "epoch": 0.9403055780614256, "grad_norm": 0.7720293402671814, "learning_rate": 2.7935147983103926e-07, "loss": 0.0561, "step": 53096 }, { "epoch": 0.940323287598454, "grad_norm": 0.5349395275115967, "learning_rate": 2.791862333322681e-07, "loss": 0.0565, "step": 53097 }, { "epoch": 0.9403409971354824, "grad_norm": 0.6483533978462219, "learning_rate": 2.79021035263487e-07, "loss": 0.041, "step": 53098 }, { "epoch": 0.9403587066725109, "grad_norm": 0.7306020855903625, "learning_rate": 2.7885588562523546e-07, "loss": 0.067, "step": 53099 }, { "epoch": 0.9403764162095393, "grad_norm": 0.8003203868865967, "learning_rate": 2.7869078441805973e-07, "loss": 0.0467, "step": 53100 }, { "epoch": 0.9403941257465677, "grad_norm": 0.8967124223709106, "learning_rate": 2.785257316425027e-07, "loss": 0.0744, "step": 53101 }, { "epoch": 0.9404118352835961, "grad_norm": 0.5320172905921936, "learning_rate": 2.78360727299109e-07, "loss": 0.0406, "step": 53102 }, { "epoch": 0.9404295448206246, "grad_norm": 0.4624961018562317, "learning_rate": 2.7819577138841813e-07, "loss": 0.0322, "step": 53103 }, { "epoch": 0.940447254357653, "grad_norm": 0.25975194573402405, "learning_rate": 2.78030863910973e-07, "loss": 0.0611, "step": 53104 }, { "epoch": 0.9404649638946814, "grad_norm": 0.9835751056671143, "learning_rate": 2.7786600486731986e-07, "loss": 0.0641, "step": 53105 }, { "epoch": 0.9404826734317098, "grad_norm": 0.5559824109077454, "learning_rate": 2.7770119425799654e-07, "loss": 0.0557, "step": 53106 }, { "epoch": 0.9405003829687383, "grad_norm": 0.7329511642456055, "learning_rate": 2.7753643208354605e-07, "loss": 0.071, "step": 53107 }, { "epoch": 0.9405180925057667, "grad_norm": 0.7939459681510925, "learning_rate": 2.773717183445129e-07, "loss": 0.0433, "step": 53108 }, { "epoch": 0.9405358020427951, "grad_norm": 0.38050025701522827, "learning_rate": 2.772070530414383e-07, "loss": 0.043, "step": 53109 }, { "epoch": 0.9405535115798235, "grad_norm": 0.6441528797149658, "learning_rate": 2.770424361748619e-07, "loss": 0.0452, "step": 53110 }, { "epoch": 0.940571221116852, "grad_norm": 0.5308616161346436, "learning_rate": 2.768778677453282e-07, "loss": 0.0444, "step": 53111 }, { "epoch": 0.9405889306538804, "grad_norm": 0.6291329264640808, "learning_rate": 2.7671334775337674e-07, "loss": 0.0708, "step": 53112 }, { "epoch": 0.9406066401909088, "grad_norm": 1.0220788717269897, "learning_rate": 2.765488761995488e-07, "loss": 0.0628, "step": 53113 }, { "epoch": 0.9406243497279373, "grad_norm": 0.4034197926521301, "learning_rate": 2.7638445308438565e-07, "loss": 0.04, "step": 53114 }, { "epoch": 0.9406420592649657, "grad_norm": 0.36187586188316345, "learning_rate": 2.762200784084301e-07, "loss": 0.0426, "step": 53115 }, { "epoch": 0.9406597688019941, "grad_norm": 0.5029687881469727, "learning_rate": 2.760557521722201e-07, "loss": 0.0526, "step": 53116 }, { "epoch": 0.9406774783390225, "grad_norm": 0.20756611227989197, "learning_rate": 2.758914743762969e-07, "loss": 0.0312, "step": 53117 }, { "epoch": 0.940695187876051, "grad_norm": 0.6583952903747559, "learning_rate": 2.757272450212017e-07, "loss": 0.0847, "step": 53118 }, { "epoch": 0.9407128974130794, "grad_norm": 0.33861276507377625, "learning_rate": 2.755630641074741e-07, "loss": 0.0461, "step": 53119 }, { "epoch": 0.9407306069501078, "grad_norm": 0.5993914008140564, "learning_rate": 2.75398931635657e-07, "loss": 0.0555, "step": 53120 }, { "epoch": 0.9407483164871362, "grad_norm": 0.5431880354881287, "learning_rate": 2.752348476062866e-07, "loss": 0.0463, "step": 53121 }, { "epoch": 0.9407660260241647, "grad_norm": 0.7885175943374634, "learning_rate": 2.7507081201990413e-07, "loss": 0.0558, "step": 53122 }, { "epoch": 0.9407837355611931, "grad_norm": 0.4555976986885071, "learning_rate": 2.749068248770509e-07, "loss": 0.0465, "step": 53123 }, { "epoch": 0.9408014450982215, "grad_norm": 0.5077033638954163, "learning_rate": 2.7474288617826303e-07, "loss": 0.0669, "step": 53124 }, { "epoch": 0.9408191546352499, "grad_norm": 0.8239967226982117, "learning_rate": 2.745789959240819e-07, "loss": 0.0697, "step": 53125 }, { "epoch": 0.9408368641722784, "grad_norm": 1.0281529426574707, "learning_rate": 2.744151541150469e-07, "loss": 0.0494, "step": 53126 }, { "epoch": 0.9408545737093068, "grad_norm": 0.5542204976081848, "learning_rate": 2.742513607516961e-07, "loss": 0.0718, "step": 53127 }, { "epoch": 0.9408722832463352, "grad_norm": 0.4521181285381317, "learning_rate": 2.74087615834569e-07, "loss": 0.0692, "step": 53128 }, { "epoch": 0.9408899927833637, "grad_norm": 0.44214174151420593, "learning_rate": 2.7392391936420345e-07, "loss": 0.0625, "step": 53129 }, { "epoch": 0.9409077023203921, "grad_norm": 0.5636930465698242, "learning_rate": 2.737602713411408e-07, "loss": 0.0381, "step": 53130 }, { "epoch": 0.9409254118574205, "grad_norm": 0.6168779730796814, "learning_rate": 2.7359667176591716e-07, "loss": 0.0742, "step": 53131 }, { "epoch": 0.9409431213944489, "grad_norm": 0.5477027893066406, "learning_rate": 2.734331206390689e-07, "loss": 0.0552, "step": 53132 }, { "epoch": 0.9409608309314774, "grad_norm": 0.5875053405761719, "learning_rate": 2.732696179611371e-07, "loss": 0.0565, "step": 53133 }, { "epoch": 0.9409785404685058, "grad_norm": 0.49761080741882324, "learning_rate": 2.731061637326615e-07, "loss": 0.0566, "step": 53134 }, { "epoch": 0.9409962500055342, "grad_norm": 0.6978718638420105, "learning_rate": 2.7294275795417325e-07, "loss": 0.0324, "step": 53135 }, { "epoch": 0.9410139595425626, "grad_norm": 1.1047418117523193, "learning_rate": 2.727794006262152e-07, "loss": 0.1023, "step": 53136 }, { "epoch": 0.9410316690795911, "grad_norm": 0.5465813279151917, "learning_rate": 2.726160917493237e-07, "loss": 0.0417, "step": 53137 }, { "epoch": 0.9410493786166195, "grad_norm": 0.45680415630340576, "learning_rate": 2.7245283132403487e-07, "loss": 0.0404, "step": 53138 }, { "epoch": 0.9410670881536479, "grad_norm": 0.32100358605384827, "learning_rate": 2.722896193508867e-07, "loss": 0.0352, "step": 53139 }, { "epoch": 0.9410847976906763, "grad_norm": 0.7732186913490295, "learning_rate": 2.7212645583041707e-07, "loss": 0.0575, "step": 53140 }, { "epoch": 0.9411025072277048, "grad_norm": 0.6246312260627747, "learning_rate": 2.719633407631622e-07, "loss": 0.0629, "step": 53141 }, { "epoch": 0.9411202167647332, "grad_norm": 0.5601674318313599, "learning_rate": 2.7180027414965667e-07, "loss": 0.0483, "step": 53142 }, { "epoch": 0.9411379263017616, "grad_norm": 0.48854413628578186, "learning_rate": 2.7163725599043844e-07, "loss": 0.0569, "step": 53143 }, { "epoch": 0.9411556358387901, "grad_norm": 0.43280911445617676, "learning_rate": 2.7147428628604696e-07, "loss": 0.0551, "step": 53144 }, { "epoch": 0.9411733453758185, "grad_norm": 0.408389687538147, "learning_rate": 2.7131136503701194e-07, "loss": 0.0303, "step": 53145 }, { "epoch": 0.9411910549128469, "grad_norm": 0.7033774852752686, "learning_rate": 2.7114849224387283e-07, "loss": 0.0631, "step": 53146 }, { "epoch": 0.9412087644498753, "grad_norm": 0.6035990118980408, "learning_rate": 2.7098566790716596e-07, "loss": 0.0516, "step": 53147 }, { "epoch": 0.9412264739869038, "grad_norm": 0.8202301263809204, "learning_rate": 2.7082289202742914e-07, "loss": 0.0528, "step": 53148 }, { "epoch": 0.9412441835239322, "grad_norm": 0.5031407475471497, "learning_rate": 2.7066016460519205e-07, "loss": 0.0497, "step": 53149 }, { "epoch": 0.9412618930609606, "grad_norm": 0.5620816349983215, "learning_rate": 2.7049748564099417e-07, "loss": 0.0621, "step": 53150 }, { "epoch": 0.941279602597989, "grad_norm": 0.49439308047294617, "learning_rate": 2.7033485513537015e-07, "loss": 0.0504, "step": 53151 }, { "epoch": 0.9412973121350175, "grad_norm": 0.8821092844009399, "learning_rate": 2.7017227308885617e-07, "loss": 0.0465, "step": 53152 }, { "epoch": 0.941315021672046, "grad_norm": 0.7939366698265076, "learning_rate": 2.7000973950198346e-07, "loss": 0.0764, "step": 53153 }, { "epoch": 0.9413327312090743, "grad_norm": 0.4925289750099182, "learning_rate": 2.6984725437528837e-07, "loss": 0.0412, "step": 53154 }, { "epoch": 0.9413504407461027, "grad_norm": 0.5582097768783569, "learning_rate": 2.69684817709307e-07, "loss": 0.0448, "step": 53155 }, { "epoch": 0.9413681502831313, "grad_norm": 0.36352500319480896, "learning_rate": 2.695224295045723e-07, "loss": 0.0483, "step": 53156 }, { "epoch": 0.9413858598201597, "grad_norm": 0.4486555755138397, "learning_rate": 2.6936008976161886e-07, "loss": 0.0309, "step": 53157 }, { "epoch": 0.941403569357188, "grad_norm": 0.4450494647026062, "learning_rate": 2.6919779848098126e-07, "loss": 0.0381, "step": 53158 }, { "epoch": 0.9414212788942166, "grad_norm": 0.9194880127906799, "learning_rate": 2.6903555566319403e-07, "loss": 0.051, "step": 53159 }, { "epoch": 0.941438988431245, "grad_norm": 0.7329432964324951, "learning_rate": 2.6887336130878847e-07, "loss": 0.0574, "step": 53160 }, { "epoch": 0.9414566979682734, "grad_norm": 0.4373255670070648, "learning_rate": 2.687112154182991e-07, "loss": 0.0583, "step": 53161 }, { "epoch": 0.9414744075053018, "grad_norm": 0.46295562386512756, "learning_rate": 2.6854911799226054e-07, "loss": 0.0444, "step": 53162 }, { "epoch": 0.9414921170423303, "grad_norm": 0.3923989534378052, "learning_rate": 2.683870690312057e-07, "loss": 0.0452, "step": 53163 }, { "epoch": 0.9415098265793587, "grad_norm": 0.5227832198143005, "learning_rate": 2.682250685356674e-07, "loss": 0.0439, "step": 53164 }, { "epoch": 0.9415275361163871, "grad_norm": 0.7208828926086426, "learning_rate": 2.68063116506177e-07, "loss": 0.0568, "step": 53165 }, { "epoch": 0.9415452456534155, "grad_norm": 0.5458025932312012, "learning_rate": 2.679012129432706e-07, "loss": 0.0391, "step": 53166 }, { "epoch": 0.941562955190444, "grad_norm": 0.4143844246864319, "learning_rate": 2.6773935784747794e-07, "loss": 0.0501, "step": 53167 }, { "epoch": 0.9415806647274724, "grad_norm": 0.5076063871383667, "learning_rate": 2.675775512193335e-07, "loss": 0.0558, "step": 53168 }, { "epoch": 0.9415983742645008, "grad_norm": 0.43672120571136475, "learning_rate": 2.6741579305937016e-07, "loss": 0.0493, "step": 53169 }, { "epoch": 0.9416160838015292, "grad_norm": 0.3266739845275879, "learning_rate": 2.672540833681175e-07, "loss": 0.0459, "step": 53170 }, { "epoch": 0.9416337933385577, "grad_norm": 0.7202927470207214, "learning_rate": 2.670924221461085e-07, "loss": 0.0666, "step": 53171 }, { "epoch": 0.9416515028755861, "grad_norm": 0.8199607729911804, "learning_rate": 2.669308093938744e-07, "loss": 0.0739, "step": 53172 }, { "epoch": 0.9416692124126145, "grad_norm": 0.5714172720909119, "learning_rate": 2.6676924511195135e-07, "loss": 0.0504, "step": 53173 }, { "epoch": 0.941686921949643, "grad_norm": 0.492306113243103, "learning_rate": 2.66607729300864e-07, "loss": 0.0541, "step": 53174 }, { "epoch": 0.9417046314866714, "grad_norm": 0.544677734375, "learning_rate": 2.6644626196114686e-07, "loss": 0.0606, "step": 53175 }, { "epoch": 0.9417223410236998, "grad_norm": 0.6399375796318054, "learning_rate": 2.662848430933329e-07, "loss": 0.0569, "step": 53176 }, { "epoch": 0.9417400505607282, "grad_norm": 0.814054012298584, "learning_rate": 2.6612347269795e-07, "loss": 0.0549, "step": 53177 }, { "epoch": 0.9417577600977567, "grad_norm": 0.909028172492981, "learning_rate": 2.659621507755311e-07, "loss": 0.0516, "step": 53178 }, { "epoch": 0.9417754696347851, "grad_norm": 0.6277225017547607, "learning_rate": 2.658008773266074e-07, "loss": 0.0556, "step": 53179 }, { "epoch": 0.9417931791718135, "grad_norm": 0.6838393211364746, "learning_rate": 2.656396523517085e-07, "loss": 0.05, "step": 53180 }, { "epoch": 0.9418108887088419, "grad_norm": 0.7329341173171997, "learning_rate": 2.654784758513656e-07, "loss": 0.0531, "step": 53181 }, { "epoch": 0.9418285982458704, "grad_norm": 0.45998460054397583, "learning_rate": 2.6531734782610674e-07, "loss": 0.0582, "step": 53182 }, { "epoch": 0.9418463077828988, "grad_norm": 0.565765917301178, "learning_rate": 2.65156268276463e-07, "loss": 0.058, "step": 53183 }, { "epoch": 0.9418640173199272, "grad_norm": 0.8182215094566345, "learning_rate": 2.649952372029657e-07, "loss": 0.0679, "step": 53184 }, { "epoch": 0.9418817268569556, "grad_norm": 0.9054136872291565, "learning_rate": 2.6483425460614276e-07, "loss": 0.0504, "step": 53185 }, { "epoch": 0.9418994363939841, "grad_norm": 0.544108510017395, "learning_rate": 2.646733204865254e-07, "loss": 0.0356, "step": 53186 }, { "epoch": 0.9419171459310125, "grad_norm": 0.5493524074554443, "learning_rate": 2.6451243484464317e-07, "loss": 0.0392, "step": 53187 }, { "epoch": 0.9419348554680409, "grad_norm": 0.49842238426208496, "learning_rate": 2.64351597681024e-07, "loss": 0.076, "step": 53188 }, { "epoch": 0.9419525650050694, "grad_norm": 0.3780060410499573, "learning_rate": 2.641908089961975e-07, "loss": 0.0431, "step": 53189 }, { "epoch": 0.9419702745420978, "grad_norm": 0.7900628447532654, "learning_rate": 2.6403006879069324e-07, "loss": 0.0548, "step": 53190 }, { "epoch": 0.9419879840791262, "grad_norm": 0.8489546179771423, "learning_rate": 2.6386937706504076e-07, "loss": 0.0567, "step": 53191 }, { "epoch": 0.9420056936161546, "grad_norm": 0.72934490442276, "learning_rate": 2.6370873381976635e-07, "loss": 0.0532, "step": 53192 }, { "epoch": 0.9420234031531831, "grad_norm": 0.42304202914237976, "learning_rate": 2.6354813905539953e-07, "loss": 0.0397, "step": 53193 }, { "epoch": 0.9420411126902115, "grad_norm": 0.822003960609436, "learning_rate": 2.6338759277246824e-07, "loss": 0.0554, "step": 53194 }, { "epoch": 0.9420588222272399, "grad_norm": 0.5797649025917053, "learning_rate": 2.632270949715004e-07, "loss": 0.0461, "step": 53195 }, { "epoch": 0.9420765317642683, "grad_norm": 0.7200466394424438, "learning_rate": 2.630666456530273e-07, "loss": 0.0535, "step": 53196 }, { "epoch": 0.9420942413012968, "grad_norm": 0.6901001930236816, "learning_rate": 2.629062448175734e-07, "loss": 0.0653, "step": 53197 }, { "epoch": 0.9421119508383252, "grad_norm": 0.3107817471027374, "learning_rate": 2.627458924656684e-07, "loss": 0.0629, "step": 53198 }, { "epoch": 0.9421296603753536, "grad_norm": 0.7027764320373535, "learning_rate": 2.6258558859783676e-07, "loss": 0.0498, "step": 53199 }, { "epoch": 0.942147369912382, "grad_norm": 0.5413005352020264, "learning_rate": 2.6242533321460984e-07, "loss": 0.0727, "step": 53200 }, { "epoch": 0.9421650794494105, "grad_norm": 0.25884121656417847, "learning_rate": 2.622651263165121e-07, "loss": 0.0248, "step": 53201 }, { "epoch": 0.9421827889864389, "grad_norm": 0.6733890771865845, "learning_rate": 2.621049679040716e-07, "loss": 0.058, "step": 53202 }, { "epoch": 0.9422004985234673, "grad_norm": 0.6348169445991516, "learning_rate": 2.6194485797781443e-07, "loss": 0.0599, "step": 53203 }, { "epoch": 0.9422182080604958, "grad_norm": 0.5454867482185364, "learning_rate": 2.617847965382669e-07, "loss": 0.0694, "step": 53204 }, { "epoch": 0.9422359175975242, "grad_norm": 0.6466275453567505, "learning_rate": 2.616247835859603e-07, "loss": 0.0506, "step": 53205 }, { "epoch": 0.9422536271345526, "grad_norm": 0.5129519701004028, "learning_rate": 2.6146481912141584e-07, "loss": 0.0408, "step": 53206 }, { "epoch": 0.942271336671581, "grad_norm": 0.44291073083877563, "learning_rate": 2.613049031451598e-07, "loss": 0.0346, "step": 53207 }, { "epoch": 0.9422890462086095, "grad_norm": 0.5583074688911438, "learning_rate": 2.611450356577233e-07, "loss": 0.0635, "step": 53208 }, { "epoch": 0.9423067557456379, "grad_norm": 0.3678140938282013, "learning_rate": 2.6098521665962603e-07, "loss": 0.0501, "step": 53209 }, { "epoch": 0.9423244652826663, "grad_norm": 0.5561071038246155, "learning_rate": 2.608254461513976e-07, "loss": 0.0561, "step": 53210 }, { "epoch": 0.9423421748196947, "grad_norm": 0.52176433801651, "learning_rate": 2.606657241335625e-07, "loss": 0.0538, "step": 53211 }, { "epoch": 0.9423598843567232, "grad_norm": 0.7654610872268677, "learning_rate": 2.6050605060664533e-07, "loss": 0.0375, "step": 53212 }, { "epoch": 0.9423775938937516, "grad_norm": 0.47271785140037537, "learning_rate": 2.6034642557117404e-07, "loss": 0.0761, "step": 53213 }, { "epoch": 0.94239530343078, "grad_norm": 0.6777644157409668, "learning_rate": 2.601868490276715e-07, "loss": 0.0478, "step": 53214 }, { "epoch": 0.9424130129678084, "grad_norm": 0.5853783488273621, "learning_rate": 2.6002732097666403e-07, "loss": 0.0769, "step": 53215 }, { "epoch": 0.942430722504837, "grad_norm": 0.6093705296516418, "learning_rate": 2.598678414186745e-07, "loss": 0.0384, "step": 53216 }, { "epoch": 0.9424484320418653, "grad_norm": 0.42005106806755066, "learning_rate": 2.5970841035423076e-07, "loss": 0.0518, "step": 53217 }, { "epoch": 0.9424661415788937, "grad_norm": 0.5718355178833008, "learning_rate": 2.595490277838541e-07, "loss": 0.0408, "step": 53218 }, { "epoch": 0.9424838511159223, "grad_norm": 0.5562273263931274, "learning_rate": 2.593896937080709e-07, "loss": 0.0638, "step": 53219 }, { "epoch": 0.9425015606529507, "grad_norm": 0.5026108622550964, "learning_rate": 2.5923040812740384e-07, "loss": 0.048, "step": 53220 }, { "epoch": 0.942519270189979, "grad_norm": 0.47328758239746094, "learning_rate": 2.5907117104237766e-07, "loss": 0.0599, "step": 53221 }, { "epoch": 0.9425369797270075, "grad_norm": 0.45442721247673035, "learning_rate": 2.5891198245351687e-07, "loss": 0.0743, "step": 53222 }, { "epoch": 0.942554689264036, "grad_norm": 0.513562023639679, "learning_rate": 2.5875284236134446e-07, "loss": 0.0755, "step": 53223 }, { "epoch": 0.9425723988010644, "grad_norm": 0.8264095783233643, "learning_rate": 2.585937507663849e-07, "loss": 0.0534, "step": 53224 }, { "epoch": 0.9425901083380928, "grad_norm": 0.7212926149368286, "learning_rate": 2.5843470766915957e-07, "loss": 0.0568, "step": 53225 }, { "epoch": 0.9426078178751212, "grad_norm": 0.7434232234954834, "learning_rate": 2.582757130701963e-07, "loss": 0.0425, "step": 53226 }, { "epoch": 0.9426255274121497, "grad_norm": 0.5111492276191711, "learning_rate": 2.581167669700113e-07, "loss": 0.0491, "step": 53227 }, { "epoch": 0.9426432369491781, "grad_norm": 0.5539354085922241, "learning_rate": 2.579578693691326e-07, "loss": 0.0648, "step": 53228 }, { "epoch": 0.9426609464862065, "grad_norm": 0.8649277687072754, "learning_rate": 2.577990202680813e-07, "loss": 0.0667, "step": 53229 }, { "epoch": 0.9426786560232349, "grad_norm": 0.35837605595588684, "learning_rate": 2.576402196673805e-07, "loss": 0.0519, "step": 53230 }, { "epoch": 0.9426963655602634, "grad_norm": 0.4975360333919525, "learning_rate": 2.5748146756755133e-07, "loss": 0.051, "step": 53231 }, { "epoch": 0.9427140750972918, "grad_norm": 0.6965152025222778, "learning_rate": 2.5732276396911834e-07, "loss": 0.0726, "step": 53232 }, { "epoch": 0.9427317846343202, "grad_norm": 0.8177094459533691, "learning_rate": 2.571641088726012e-07, "loss": 0.0784, "step": 53233 }, { "epoch": 0.9427494941713487, "grad_norm": 0.4675358533859253, "learning_rate": 2.5700550227852284e-07, "loss": 0.0435, "step": 53234 }, { "epoch": 0.9427672037083771, "grad_norm": 0.4751582741737366, "learning_rate": 2.5684694418740605e-07, "loss": 0.0617, "step": 53235 }, { "epoch": 0.9427849132454055, "grad_norm": 0.5780284404754639, "learning_rate": 2.566884345997705e-07, "loss": 0.0368, "step": 53236 }, { "epoch": 0.9428026227824339, "grad_norm": 0.7717617154121399, "learning_rate": 2.5652997351614084e-07, "loss": 0.0801, "step": 53237 }, { "epoch": 0.9428203323194624, "grad_norm": 0.896757185459137, "learning_rate": 2.5637156093703486e-07, "loss": 0.0481, "step": 53238 }, { "epoch": 0.9428380418564908, "grad_norm": 0.39329054951667786, "learning_rate": 2.562131968629755e-07, "loss": 0.0521, "step": 53239 }, { "epoch": 0.9428557513935192, "grad_norm": 0.4064190089702606, "learning_rate": 2.5605488129448407e-07, "loss": 0.0463, "step": 53240 }, { "epoch": 0.9428734609305476, "grad_norm": 0.4019046425819397, "learning_rate": 2.5589661423207846e-07, "loss": 0.0414, "step": 53241 }, { "epoch": 0.9428911704675761, "grad_norm": 0.8657130002975464, "learning_rate": 2.557383956762832e-07, "loss": 0.0577, "step": 53242 }, { "epoch": 0.9429088800046045, "grad_norm": 0.4168666899204254, "learning_rate": 2.5558022562761794e-07, "loss": 0.0459, "step": 53243 }, { "epoch": 0.9429265895416329, "grad_norm": 0.7366830110549927, "learning_rate": 2.554221040866023e-07, "loss": 0.0699, "step": 53244 }, { "epoch": 0.9429442990786613, "grad_norm": 0.5065993666648865, "learning_rate": 2.552640310537557e-07, "loss": 0.0504, "step": 53245 }, { "epoch": 0.9429620086156898, "grad_norm": 0.4037296772003174, "learning_rate": 2.551060065295996e-07, "loss": 0.0413, "step": 53246 }, { "epoch": 0.9429797181527182, "grad_norm": 0.45318207144737244, "learning_rate": 2.549480305146551e-07, "loss": 0.0545, "step": 53247 }, { "epoch": 0.9429974276897466, "grad_norm": 0.6525231599807739, "learning_rate": 2.547901030094385e-07, "loss": 0.052, "step": 53248 }, { "epoch": 0.9430151372267751, "grad_norm": 0.6717847585678101, "learning_rate": 2.5463222401447105e-07, "loss": 0.0623, "step": 53249 }, { "epoch": 0.9430328467638035, "grad_norm": 1.046242594718933, "learning_rate": 2.5447439353027235e-07, "loss": 0.0629, "step": 53250 }, { "epoch": 0.9430505563008319, "grad_norm": 0.19949133694171906, "learning_rate": 2.543166115573603e-07, "loss": 0.0439, "step": 53251 }, { "epoch": 0.9430682658378603, "grad_norm": 0.3305628001689911, "learning_rate": 2.5415887809625614e-07, "loss": 0.0364, "step": 53252 }, { "epoch": 0.9430859753748888, "grad_norm": 0.4282114803791046, "learning_rate": 2.540011931474778e-07, "loss": 0.0518, "step": 53253 }, { "epoch": 0.9431036849119172, "grad_norm": 0.5439421534538269, "learning_rate": 2.5384355671154493e-07, "loss": 0.0527, "step": 53254 }, { "epoch": 0.9431213944489456, "grad_norm": 0.5355374217033386, "learning_rate": 2.536859687889753e-07, "loss": 0.0472, "step": 53255 }, { "epoch": 0.943139103985974, "grad_norm": 0.39696046710014343, "learning_rate": 2.5352842938028697e-07, "loss": 0.0304, "step": 53256 }, { "epoch": 0.9431568135230025, "grad_norm": 1.0213607549667358, "learning_rate": 2.533709384859978e-07, "loss": 0.0759, "step": 53257 }, { "epoch": 0.9431745230600309, "grad_norm": 0.47649556398391724, "learning_rate": 2.5321349610662737e-07, "loss": 0.0593, "step": 53258 }, { "epoch": 0.9431922325970593, "grad_norm": 0.552578330039978, "learning_rate": 2.530561022426936e-07, "loss": 0.049, "step": 53259 }, { "epoch": 0.9432099421340877, "grad_norm": 0.7535125613212585, "learning_rate": 2.5289875689471273e-07, "loss": 0.0565, "step": 53260 }, { "epoch": 0.9432276516711162, "grad_norm": 0.7691670060157776, "learning_rate": 2.5274146006320277e-07, "loss": 0.0666, "step": 53261 }, { "epoch": 0.9432453612081446, "grad_norm": 0.32907184958457947, "learning_rate": 2.525842117486832e-07, "loss": 0.0417, "step": 53262 }, { "epoch": 0.943263070745173, "grad_norm": 0.7404769659042358, "learning_rate": 2.524270119516686e-07, "loss": 0.0844, "step": 53263 }, { "epoch": 0.9432807802822015, "grad_norm": 0.29849570989608765, "learning_rate": 2.52269860672677e-07, "loss": 0.0305, "step": 53264 }, { "epoch": 0.9432984898192299, "grad_norm": 0.47201886773109436, "learning_rate": 2.5211275791222954e-07, "loss": 0.0457, "step": 53265 }, { "epoch": 0.9433161993562583, "grad_norm": 0.3650870621204376, "learning_rate": 2.5195570367083587e-07, "loss": 0.049, "step": 53266 }, { "epoch": 0.9433339088932867, "grad_norm": 0.7524091601371765, "learning_rate": 2.517986979490172e-07, "loss": 0.0423, "step": 53267 }, { "epoch": 0.9433516184303152, "grad_norm": 0.5932761430740356, "learning_rate": 2.5164174074728985e-07, "loss": 0.0485, "step": 53268 }, { "epoch": 0.9433693279673436, "grad_norm": 0.8566834330558777, "learning_rate": 2.5148483206617003e-07, "loss": 0.0715, "step": 53269 }, { "epoch": 0.943387037504372, "grad_norm": 0.3538961708545685, "learning_rate": 2.5132797190617063e-07, "loss": 0.0368, "step": 53270 }, { "epoch": 0.9434047470414004, "grad_norm": 0.4063817262649536, "learning_rate": 2.511711602678113e-07, "loss": 0.0396, "step": 53271 }, { "epoch": 0.9434224565784289, "grad_norm": 0.3100293278694153, "learning_rate": 2.510143971516099e-07, "loss": 0.0465, "step": 53272 }, { "epoch": 0.9434401661154573, "grad_norm": 0.4668145477771759, "learning_rate": 2.5085768255807604e-07, "loss": 0.065, "step": 53273 }, { "epoch": 0.9434578756524857, "grad_norm": 0.6289874911308289, "learning_rate": 2.50701016487731e-07, "loss": 0.0477, "step": 53274 }, { "epoch": 0.9434755851895141, "grad_norm": 0.7191242575645447, "learning_rate": 2.5054439894108604e-07, "loss": 0.0504, "step": 53275 }, { "epoch": 0.9434932947265426, "grad_norm": 0.7456366419792175, "learning_rate": 2.503878299186607e-07, "loss": 0.0596, "step": 53276 }, { "epoch": 0.943511004263571, "grad_norm": 0.7136600613594055, "learning_rate": 2.502313094209646e-07, "loss": 0.0561, "step": 53277 }, { "epoch": 0.9435287138005994, "grad_norm": 0.40310272574424744, "learning_rate": 2.500748374485173e-07, "loss": 0.0286, "step": 53278 }, { "epoch": 0.943546423337628, "grad_norm": 0.5797324180603027, "learning_rate": 2.499184140018318e-07, "loss": 0.0379, "step": 53279 }, { "epoch": 0.9435641328746563, "grad_norm": 0.9931843876838684, "learning_rate": 2.497620390814209e-07, "loss": 0.0676, "step": 53280 }, { "epoch": 0.9435818424116847, "grad_norm": 0.48212045431137085, "learning_rate": 2.4960571268780264e-07, "loss": 0.0568, "step": 53281 }, { "epoch": 0.9435995519487131, "grad_norm": 0.37434133887290955, "learning_rate": 2.4944943482148983e-07, "loss": 0.0329, "step": 53282 }, { "epoch": 0.9436172614857417, "grad_norm": 0.5714825391769409, "learning_rate": 2.4929320548299715e-07, "loss": 0.0837, "step": 53283 }, { "epoch": 0.94363497102277, "grad_norm": 0.590294599533081, "learning_rate": 2.491370246728375e-07, "loss": 0.0549, "step": 53284 }, { "epoch": 0.9436526805597985, "grad_norm": 0.6080230474472046, "learning_rate": 2.489808923915238e-07, "loss": 0.0632, "step": 53285 }, { "epoch": 0.9436703900968268, "grad_norm": 0.462287038564682, "learning_rate": 2.4882480863957224e-07, "loss": 0.0403, "step": 53286 }, { "epoch": 0.9436880996338554, "grad_norm": 0.4616624712944031, "learning_rate": 2.4866877341749583e-07, "loss": 0.0595, "step": 53287 }, { "epoch": 0.9437058091708838, "grad_norm": 0.5547499060630798, "learning_rate": 2.4851278672580415e-07, "loss": 0.0353, "step": 53288 }, { "epoch": 0.9437235187079122, "grad_norm": 0.6251385807991028, "learning_rate": 2.4835684856501506e-07, "loss": 0.0336, "step": 53289 }, { "epoch": 0.9437412282449406, "grad_norm": 0.3059825301170349, "learning_rate": 2.482009589356399e-07, "loss": 0.0329, "step": 53290 }, { "epoch": 0.9437589377819691, "grad_norm": 0.4711190462112427, "learning_rate": 2.480451178381915e-07, "loss": 0.0443, "step": 53291 }, { "epoch": 0.9437766473189975, "grad_norm": 1.0069468021392822, "learning_rate": 2.4788932527318286e-07, "loss": 0.0731, "step": 53292 }, { "epoch": 0.9437943568560259, "grad_norm": 0.8318538665771484, "learning_rate": 2.477335812411252e-07, "loss": 0.0825, "step": 53293 }, { "epoch": 0.9438120663930544, "grad_norm": 0.4706767499446869, "learning_rate": 2.475778857425348e-07, "loss": 0.0506, "step": 53294 }, { "epoch": 0.9438297759300828, "grad_norm": 0.3942328989505768, "learning_rate": 2.474222387779179e-07, "loss": 0.0383, "step": 53295 }, { "epoch": 0.9438474854671112, "grad_norm": 0.8953578472137451, "learning_rate": 2.472666403477908e-07, "loss": 0.0516, "step": 53296 }, { "epoch": 0.9438651950041396, "grad_norm": 0.8620986938476562, "learning_rate": 2.4711109045266635e-07, "loss": 0.0524, "step": 53297 }, { "epoch": 0.9438829045411681, "grad_norm": 0.8005481362342834, "learning_rate": 2.4695558909305247e-07, "loss": 0.0538, "step": 53298 }, { "epoch": 0.9439006140781965, "grad_norm": 0.7349593639373779, "learning_rate": 2.4680013626946053e-07, "loss": 0.0627, "step": 53299 }, { "epoch": 0.9439183236152249, "grad_norm": 0.2624419331550598, "learning_rate": 2.46644731982405e-07, "loss": 0.0251, "step": 53300 }, { "epoch": 0.9439360331522533, "grad_norm": 0.5842353105545044, "learning_rate": 2.464893762323989e-07, "loss": 0.0603, "step": 53301 }, { "epoch": 0.9439537426892818, "grad_norm": 0.30410444736480713, "learning_rate": 2.4633406901994837e-07, "loss": 0.0409, "step": 53302 }, { "epoch": 0.9439714522263102, "grad_norm": 0.5915537476539612, "learning_rate": 2.4617881034556644e-07, "loss": 0.0632, "step": 53303 }, { "epoch": 0.9439891617633386, "grad_norm": 0.43243998289108276, "learning_rate": 2.4602360020976433e-07, "loss": 0.0392, "step": 53304 }, { "epoch": 0.944006871300367, "grad_norm": 0.46116673946380615, "learning_rate": 2.4586843861305165e-07, "loss": 0.0311, "step": 53305 }, { "epoch": 0.9440245808373955, "grad_norm": 0.2868518531322479, "learning_rate": 2.4571332555593963e-07, "loss": 0.0496, "step": 53306 }, { "epoch": 0.9440422903744239, "grad_norm": 0.42235496640205383, "learning_rate": 2.455582610389395e-07, "loss": 0.0352, "step": 53307 }, { "epoch": 0.9440599999114523, "grad_norm": 1.371431589126587, "learning_rate": 2.454032450625593e-07, "loss": 0.0607, "step": 53308 }, { "epoch": 0.9440777094484808, "grad_norm": 0.4375934302806854, "learning_rate": 2.4524827762731016e-07, "loss": 0.0747, "step": 53309 }, { "epoch": 0.9440954189855092, "grad_norm": 0.7268303632736206, "learning_rate": 2.450933587337018e-07, "loss": 0.0451, "step": 53310 }, { "epoch": 0.9441131285225376, "grad_norm": 0.287168025970459, "learning_rate": 2.449384883822453e-07, "loss": 0.04, "step": 53311 }, { "epoch": 0.944130838059566, "grad_norm": 0.746364176273346, "learning_rate": 2.447836665734471e-07, "loss": 0.0654, "step": 53312 }, { "epoch": 0.9441485475965945, "grad_norm": 0.48240983486175537, "learning_rate": 2.4462889330781833e-07, "loss": 0.0426, "step": 53313 }, { "epoch": 0.9441662571336229, "grad_norm": 0.7429214715957642, "learning_rate": 2.444741685858687e-07, "loss": 0.0617, "step": 53314 }, { "epoch": 0.9441839666706513, "grad_norm": 0.24639910459518433, "learning_rate": 2.443194924081077e-07, "loss": 0.0246, "step": 53315 }, { "epoch": 0.9442016762076797, "grad_norm": 0.6872931122779846, "learning_rate": 2.441648647750416e-07, "loss": 0.049, "step": 53316 }, { "epoch": 0.9442193857447082, "grad_norm": 0.9930703639984131, "learning_rate": 2.4401028568718174e-07, "loss": 0.0612, "step": 53317 }, { "epoch": 0.9442370952817366, "grad_norm": 0.6427808403968811, "learning_rate": 2.4385575514503435e-07, "loss": 0.054, "step": 53318 }, { "epoch": 0.944254804818765, "grad_norm": 0.6104506850242615, "learning_rate": 2.4370127314910893e-07, "loss": 0.0594, "step": 53319 }, { "epoch": 0.9442725143557934, "grad_norm": 0.13125817477703094, "learning_rate": 2.4354683969991354e-07, "loss": 0.0411, "step": 53320 }, { "epoch": 0.9442902238928219, "grad_norm": 0.47727566957473755, "learning_rate": 2.433924547979577e-07, "loss": 0.055, "step": 53321 }, { "epoch": 0.9443079334298503, "grad_norm": 0.6858457326889038, "learning_rate": 2.4323811844374934e-07, "loss": 0.0331, "step": 53322 }, { "epoch": 0.9443256429668787, "grad_norm": 0.5087614059448242, "learning_rate": 2.4308383063779305e-07, "loss": 0.0326, "step": 53323 }, { "epoch": 0.9443433525039072, "grad_norm": 0.4464401602745056, "learning_rate": 2.4292959138059844e-07, "loss": 0.0481, "step": 53324 }, { "epoch": 0.9443610620409356, "grad_norm": 0.7550824284553528, "learning_rate": 2.4277540067267344e-07, "loss": 0.0387, "step": 53325 }, { "epoch": 0.944378771577964, "grad_norm": 0.9810619354248047, "learning_rate": 2.42621258514526e-07, "loss": 0.0634, "step": 53326 }, { "epoch": 0.9443964811149924, "grad_norm": 0.35667529702186584, "learning_rate": 2.42467164906659e-07, "loss": 0.0495, "step": 53327 }, { "epoch": 0.9444141906520209, "grad_norm": 0.7712287306785583, "learning_rate": 2.423131198495837e-07, "loss": 0.0857, "step": 53328 }, { "epoch": 0.9444319001890493, "grad_norm": 0.4757002592086792, "learning_rate": 2.421591233438064e-07, "loss": 0.0403, "step": 53329 }, { "epoch": 0.9444496097260777, "grad_norm": 0.6961156725883484, "learning_rate": 2.4200517538983167e-07, "loss": 0.0616, "step": 53330 }, { "epoch": 0.9444673192631061, "grad_norm": 0.8441092371940613, "learning_rate": 2.4185127598816915e-07, "loss": 0.0715, "step": 53331 }, { "epoch": 0.9444850288001346, "grad_norm": 1.046168327331543, "learning_rate": 2.416974251393217e-07, "loss": 0.069, "step": 53332 }, { "epoch": 0.944502738337163, "grad_norm": 0.4605602025985718, "learning_rate": 2.4154362284379727e-07, "loss": 0.0393, "step": 53333 }, { "epoch": 0.9445204478741914, "grad_norm": 0.49574941396713257, "learning_rate": 2.4138986910210217e-07, "loss": 0.0474, "step": 53334 }, { "epoch": 0.9445381574112198, "grad_norm": 0.41097426414489746, "learning_rate": 2.412361639147409e-07, "loss": 0.0273, "step": 53335 }, { "epoch": 0.9445558669482483, "grad_norm": 0.22184306383132935, "learning_rate": 2.410825072822198e-07, "loss": 0.0319, "step": 53336 }, { "epoch": 0.9445735764852767, "grad_norm": 0.9970529675483704, "learning_rate": 2.4092889920504513e-07, "loss": 0.0611, "step": 53337 }, { "epoch": 0.9445912860223051, "grad_norm": 0.7915511727333069, "learning_rate": 2.407753396837198e-07, "loss": 0.0658, "step": 53338 }, { "epoch": 0.9446089955593336, "grad_norm": 0.7090768218040466, "learning_rate": 2.4062182871875336e-07, "loss": 0.0845, "step": 53339 }, { "epoch": 0.944626705096362, "grad_norm": 0.47201645374298096, "learning_rate": 2.4046836631064715e-07, "loss": 0.0606, "step": 53340 }, { "epoch": 0.9446444146333904, "grad_norm": 0.5398136377334595, "learning_rate": 2.403149524599074e-07, "loss": 0.0611, "step": 53341 }, { "epoch": 0.9446621241704188, "grad_norm": 0.5430447459220886, "learning_rate": 2.4016158716703875e-07, "loss": 0.0361, "step": 53342 }, { "epoch": 0.9446798337074473, "grad_norm": 0.717450737953186, "learning_rate": 2.400082704325457e-07, "loss": 0.0592, "step": 53343 }, { "epoch": 0.9446975432444757, "grad_norm": 0.606573760509491, "learning_rate": 2.398550022569329e-07, "loss": 0.0467, "step": 53344 }, { "epoch": 0.9447152527815041, "grad_norm": 0.600667417049408, "learning_rate": 2.397017826407033e-07, "loss": 0.0513, "step": 53345 }, { "epoch": 0.9447329623185325, "grad_norm": 0.4486101269721985, "learning_rate": 2.3954861158436145e-07, "loss": 0.0405, "step": 53346 }, { "epoch": 0.944750671855561, "grad_norm": 0.5002361536026001, "learning_rate": 2.39395489088412e-07, "loss": 0.0283, "step": 53347 }, { "epoch": 0.9447683813925895, "grad_norm": 0.6359162926673889, "learning_rate": 2.392424151533579e-07, "loss": 0.0759, "step": 53348 }, { "epoch": 0.9447860909296178, "grad_norm": 0.2593976855278015, "learning_rate": 2.390893897797053e-07, "loss": 0.0602, "step": 53349 }, { "epoch": 0.9448038004666462, "grad_norm": 0.605729341506958, "learning_rate": 2.3893641296795553e-07, "loss": 0.0459, "step": 53350 }, { "epoch": 0.9448215100036748, "grad_norm": 0.5986744165420532, "learning_rate": 2.387834847186099e-07, "loss": 0.0822, "step": 53351 }, { "epoch": 0.9448392195407032, "grad_norm": 0.6557185649871826, "learning_rate": 2.386306050321746e-07, "loss": 0.0361, "step": 53352 }, { "epoch": 0.9448569290777316, "grad_norm": 0.6967528462409973, "learning_rate": 2.384777739091526e-07, "loss": 0.0389, "step": 53353 }, { "epoch": 0.9448746386147601, "grad_norm": 0.7186834812164307, "learning_rate": 2.3832499135004516e-07, "loss": 0.052, "step": 53354 }, { "epoch": 0.9448923481517885, "grad_norm": 0.7114190459251404, "learning_rate": 2.381722573553552e-07, "loss": 0.0568, "step": 53355 }, { "epoch": 0.9449100576888169, "grad_norm": 0.945709228515625, "learning_rate": 2.38019571925584e-07, "loss": 0.0492, "step": 53356 }, { "epoch": 0.9449277672258453, "grad_norm": 0.8136155605316162, "learning_rate": 2.3786693506123614e-07, "loss": 0.0738, "step": 53357 }, { "epoch": 0.9449454767628738, "grad_norm": 0.798953115940094, "learning_rate": 2.3771434676281457e-07, "loss": 0.0595, "step": 53358 }, { "epoch": 0.9449631862999022, "grad_norm": 0.6640825271606445, "learning_rate": 2.375618070308172e-07, "loss": 0.0556, "step": 53359 }, { "epoch": 0.9449808958369306, "grad_norm": 0.22444838285446167, "learning_rate": 2.3740931586575032e-07, "loss": 0.0613, "step": 53360 }, { "epoch": 0.944998605373959, "grad_norm": 0.413675993680954, "learning_rate": 2.3725687326811352e-07, "loss": 0.0545, "step": 53361 }, { "epoch": 0.9450163149109875, "grad_norm": 0.44702309370040894, "learning_rate": 2.3710447923840806e-07, "loss": 0.0581, "step": 53362 }, { "epoch": 0.9450340244480159, "grad_norm": 0.44318270683288574, "learning_rate": 2.369521337771352e-07, "loss": 0.0506, "step": 53363 }, { "epoch": 0.9450517339850443, "grad_norm": 0.5691185593605042, "learning_rate": 2.367998368847979e-07, "loss": 0.0525, "step": 53364 }, { "epoch": 0.9450694435220727, "grad_norm": 0.601826548576355, "learning_rate": 2.3664758856189405e-07, "loss": 0.0443, "step": 53365 }, { "epoch": 0.9450871530591012, "grad_norm": 0.5703416466712952, "learning_rate": 2.3649538880892663e-07, "loss": 0.0523, "step": 53366 }, { "epoch": 0.9451048625961296, "grad_norm": 0.44131970405578613, "learning_rate": 2.3634323762639688e-07, "loss": 0.035, "step": 53367 }, { "epoch": 0.945122572133158, "grad_norm": 0.623607873916626, "learning_rate": 2.3619113501480604e-07, "loss": 0.0747, "step": 53368 }, { "epoch": 0.9451402816701865, "grad_norm": 0.31413358449935913, "learning_rate": 2.360390809746521e-07, "loss": 0.0402, "step": 53369 }, { "epoch": 0.9451579912072149, "grad_norm": 0.38541173934936523, "learning_rate": 2.3588707550643463e-07, "loss": 0.0245, "step": 53370 }, { "epoch": 0.9451757007442433, "grad_norm": 0.44479408860206604, "learning_rate": 2.3573511861065656e-07, "loss": 0.0235, "step": 53371 }, { "epoch": 0.9451934102812717, "grad_norm": 0.7728418111801147, "learning_rate": 2.3558321028781914e-07, "loss": 0.0755, "step": 53372 }, { "epoch": 0.9452111198183002, "grad_norm": 0.4777921736240387, "learning_rate": 2.3543135053841703e-07, "loss": 0.0481, "step": 53373 }, { "epoch": 0.9452288293553286, "grad_norm": 0.33113664388656616, "learning_rate": 2.3527953936295144e-07, "loss": 0.052, "step": 53374 }, { "epoch": 0.945246538892357, "grad_norm": 0.7999538779258728, "learning_rate": 2.3512777676192365e-07, "loss": 0.0536, "step": 53375 }, { "epoch": 0.9452642484293854, "grad_norm": 0.6434815526008606, "learning_rate": 2.3497606273583328e-07, "loss": 0.0394, "step": 53376 }, { "epoch": 0.9452819579664139, "grad_norm": 0.715192437171936, "learning_rate": 2.3482439728517656e-07, "loss": 0.0442, "step": 53377 }, { "epoch": 0.9452996675034423, "grad_norm": 0.3754776120185852, "learning_rate": 2.3467278041045482e-07, "loss": 0.054, "step": 53378 }, { "epoch": 0.9453173770404707, "grad_norm": 1.1459933519363403, "learning_rate": 2.3452121211216758e-07, "loss": 0.0706, "step": 53379 }, { "epoch": 0.9453350865774991, "grad_norm": 0.6115582585334778, "learning_rate": 2.3436969239081118e-07, "loss": 0.0688, "step": 53380 }, { "epoch": 0.9453527961145276, "grad_norm": 0.6959279775619507, "learning_rate": 2.3421822124688352e-07, "loss": 0.052, "step": 53381 }, { "epoch": 0.945370505651556, "grad_norm": 0.3972444534301758, "learning_rate": 2.3406679868088755e-07, "loss": 0.047, "step": 53382 }, { "epoch": 0.9453882151885844, "grad_norm": 0.7329756021499634, "learning_rate": 2.3391542469331618e-07, "loss": 0.0535, "step": 53383 }, { "epoch": 0.9454059247256129, "grad_norm": 0.5205880403518677, "learning_rate": 2.3376409928466902e-07, "loss": 0.0449, "step": 53384 }, { "epoch": 0.9454236342626413, "grad_norm": 0.2522103190422058, "learning_rate": 2.3361282245544568e-07, "loss": 0.057, "step": 53385 }, { "epoch": 0.9454413437996697, "grad_norm": 0.5719450116157532, "learning_rate": 2.3346159420614245e-07, "loss": 0.0471, "step": 53386 }, { "epoch": 0.9454590533366981, "grad_norm": 0.5306521058082581, "learning_rate": 2.333104145372572e-07, "loss": 0.0452, "step": 53387 }, { "epoch": 0.9454767628737266, "grad_norm": 0.4874001443386078, "learning_rate": 2.3315928344928628e-07, "loss": 0.0344, "step": 53388 }, { "epoch": 0.945494472410755, "grad_norm": 0.7022498250007629, "learning_rate": 2.3300820094272922e-07, "loss": 0.0689, "step": 53389 }, { "epoch": 0.9455121819477834, "grad_norm": 0.7459948658943176, "learning_rate": 2.328571670180807e-07, "loss": 0.0492, "step": 53390 }, { "epoch": 0.9455298914848118, "grad_norm": 0.6949121952056885, "learning_rate": 2.327061816758369e-07, "loss": 0.0655, "step": 53391 }, { "epoch": 0.9455476010218403, "grad_norm": 0.496015340089798, "learning_rate": 2.325552449164975e-07, "loss": 0.036, "step": 53392 }, { "epoch": 0.9455653105588687, "grad_norm": 0.4313420057296753, "learning_rate": 2.324043567405587e-07, "loss": 0.0507, "step": 53393 }, { "epoch": 0.9455830200958971, "grad_norm": 0.7641237378120422, "learning_rate": 2.3225351714851516e-07, "loss": 0.0689, "step": 53394 }, { "epoch": 0.9456007296329255, "grad_norm": 0.8586199879646301, "learning_rate": 2.3210272614086315e-07, "loss": 0.0621, "step": 53395 }, { "epoch": 0.945618439169954, "grad_norm": 0.5297712087631226, "learning_rate": 2.319519837180989e-07, "loss": 0.0411, "step": 53396 }, { "epoch": 0.9456361487069824, "grad_norm": 0.37599828839302063, "learning_rate": 2.3180128988072203e-07, "loss": 0.0453, "step": 53397 }, { "epoch": 0.9456538582440108, "grad_norm": 0.7656087279319763, "learning_rate": 2.3165064462922382e-07, "loss": 0.0791, "step": 53398 }, { "epoch": 0.9456715677810393, "grad_norm": 0.3059498071670532, "learning_rate": 2.315000479641005e-07, "loss": 0.0463, "step": 53399 }, { "epoch": 0.9456892773180677, "grad_norm": 0.48635998368263245, "learning_rate": 2.3134949988584842e-07, "loss": 0.0385, "step": 53400 }, { "epoch": 0.9457069868550961, "grad_norm": 0.44967004656791687, "learning_rate": 2.3119900039496377e-07, "loss": 0.0456, "step": 53401 }, { "epoch": 0.9457246963921245, "grad_norm": 0.5312245488166809, "learning_rate": 2.3104854949193956e-07, "loss": 0.0391, "step": 53402 }, { "epoch": 0.945742405929153, "grad_norm": 0.4332066476345062, "learning_rate": 2.3089814717727032e-07, "loss": 0.0482, "step": 53403 }, { "epoch": 0.9457601154661814, "grad_norm": 0.4055479168891907, "learning_rate": 2.3074779345145402e-07, "loss": 0.0419, "step": 53404 }, { "epoch": 0.9457778250032098, "grad_norm": 0.49007803201675415, "learning_rate": 2.305974883149836e-07, "loss": 0.0374, "step": 53405 }, { "epoch": 0.9457955345402382, "grad_norm": 0.36176320910453796, "learning_rate": 2.304472317683537e-07, "loss": 0.0242, "step": 53406 }, { "epoch": 0.9458132440772667, "grad_norm": 0.7372989654541016, "learning_rate": 2.3029702381206054e-07, "loss": 0.0589, "step": 53407 }, { "epoch": 0.9458309536142951, "grad_norm": 0.7403830885887146, "learning_rate": 2.3014686444659371e-07, "loss": 0.0508, "step": 53408 }, { "epoch": 0.9458486631513235, "grad_norm": 0.6466141939163208, "learning_rate": 2.2999675367244955e-07, "loss": 0.0355, "step": 53409 }, { "epoch": 0.9458663726883519, "grad_norm": 0.5912095308303833, "learning_rate": 2.2984669149012262e-07, "loss": 0.0729, "step": 53410 }, { "epoch": 0.9458840822253805, "grad_norm": 0.9247999787330627, "learning_rate": 2.2969667790010752e-07, "loss": 0.0452, "step": 53411 }, { "epoch": 0.9459017917624089, "grad_norm": 0.26444879174232483, "learning_rate": 2.2954671290289554e-07, "loss": 0.0531, "step": 53412 }, { "epoch": 0.9459195012994372, "grad_norm": 0.82243812084198, "learning_rate": 2.2939679649898126e-07, "loss": 0.0535, "step": 53413 }, { "epoch": 0.9459372108364658, "grad_norm": 0.6958222985267639, "learning_rate": 2.29246928688856e-07, "loss": 0.0514, "step": 53414 }, { "epoch": 0.9459549203734942, "grad_norm": 0.6897854804992676, "learning_rate": 2.29097109473016e-07, "loss": 0.0359, "step": 53415 }, { "epoch": 0.9459726299105226, "grad_norm": 0.7472134232521057, "learning_rate": 2.2894733885195252e-07, "loss": 0.0598, "step": 53416 }, { "epoch": 0.945990339447551, "grad_norm": 1.0321000814437866, "learning_rate": 2.2879761682615686e-07, "loss": 0.0837, "step": 53417 }, { "epoch": 0.9460080489845795, "grad_norm": 1.0116761922836304, "learning_rate": 2.2864794339612526e-07, "loss": 0.0695, "step": 53418 }, { "epoch": 0.9460257585216079, "grad_norm": 0.7529173493385315, "learning_rate": 2.284983185623457e-07, "loss": 0.0364, "step": 53419 }, { "epoch": 0.9460434680586363, "grad_norm": 0.9116851091384888, "learning_rate": 2.2834874232531443e-07, "loss": 0.0754, "step": 53420 }, { "epoch": 0.9460611775956647, "grad_norm": 0.9625017046928406, "learning_rate": 2.2819921468552107e-07, "loss": 0.1049, "step": 53421 }, { "epoch": 0.9460788871326932, "grad_norm": 0.4735634922981262, "learning_rate": 2.2804973564345856e-07, "loss": 0.058, "step": 53422 }, { "epoch": 0.9460965966697216, "grad_norm": 0.7137125134468079, "learning_rate": 2.2790030519961646e-07, "loss": 0.0445, "step": 53423 }, { "epoch": 0.94611430620675, "grad_norm": 0.5067129731178284, "learning_rate": 2.2775092335448943e-07, "loss": 0.0542, "step": 53424 }, { "epoch": 0.9461320157437784, "grad_norm": 0.7112773656845093, "learning_rate": 2.2760159010856874e-07, "loss": 0.0606, "step": 53425 }, { "epoch": 0.9461497252808069, "grad_norm": 0.6978863477706909, "learning_rate": 2.2745230546234397e-07, "loss": 0.0525, "step": 53426 }, { "epoch": 0.9461674348178353, "grad_norm": 0.6432269215583801, "learning_rate": 2.2730306941630642e-07, "loss": 0.0406, "step": 53427 }, { "epoch": 0.9461851443548637, "grad_norm": 0.6794123649597168, "learning_rate": 2.2715388197094732e-07, "loss": 0.0456, "step": 53428 }, { "epoch": 0.9462028538918922, "grad_norm": 0.43759775161743164, "learning_rate": 2.2700474312675968e-07, "loss": 0.0364, "step": 53429 }, { "epoch": 0.9462205634289206, "grad_norm": 0.20231474936008453, "learning_rate": 2.268556528842314e-07, "loss": 0.0345, "step": 53430 }, { "epoch": 0.946238272965949, "grad_norm": 1.0064244270324707, "learning_rate": 2.2670661124385206e-07, "loss": 0.079, "step": 53431 }, { "epoch": 0.9462559825029774, "grad_norm": 0.32495665550231934, "learning_rate": 2.2655761820611464e-07, "loss": 0.0563, "step": 53432 }, { "epoch": 0.9462736920400059, "grad_norm": 0.5882090330123901, "learning_rate": 2.2640867377150877e-07, "loss": 0.045, "step": 53433 }, { "epoch": 0.9462914015770343, "grad_norm": 1.0128145217895508, "learning_rate": 2.26259777940524e-07, "loss": 0.0685, "step": 53434 }, { "epoch": 0.9463091111140627, "grad_norm": 0.4114426076412201, "learning_rate": 2.2611093071365164e-07, "loss": 0.064, "step": 53435 }, { "epoch": 0.9463268206510911, "grad_norm": 0.40861231088638306, "learning_rate": 2.259621320913796e-07, "loss": 0.0446, "step": 53436 }, { "epoch": 0.9463445301881196, "grad_norm": 0.5236387848854065, "learning_rate": 2.2581338207419587e-07, "loss": 0.0457, "step": 53437 }, { "epoch": 0.946362239725148, "grad_norm": 0.6159573197364807, "learning_rate": 2.2566468066259338e-07, "loss": 0.0437, "step": 53438 }, { "epoch": 0.9463799492621764, "grad_norm": 0.5487358570098877, "learning_rate": 2.2551602785706005e-07, "loss": 0.0907, "step": 53439 }, { "epoch": 0.9463976587992048, "grad_norm": 0.6144688725471497, "learning_rate": 2.2536742365808383e-07, "loss": 0.1199, "step": 53440 }, { "epoch": 0.9464153683362333, "grad_norm": 0.7591511011123657, "learning_rate": 2.2521886806615433e-07, "loss": 0.0706, "step": 53441 }, { "epoch": 0.9464330778732617, "grad_norm": 0.44812652468681335, "learning_rate": 2.2507036108176115e-07, "loss": 0.0502, "step": 53442 }, { "epoch": 0.9464507874102901, "grad_norm": 0.5085449814796448, "learning_rate": 2.2492190270539058e-07, "loss": 0.075, "step": 53443 }, { "epoch": 0.9464684969473186, "grad_norm": 0.6906750202178955, "learning_rate": 2.247734929375339e-07, "loss": 0.0531, "step": 53444 }, { "epoch": 0.946486206484347, "grad_norm": 0.22132788598537445, "learning_rate": 2.2462513177867738e-07, "loss": 0.055, "step": 53445 }, { "epoch": 0.9465039160213754, "grad_norm": 0.5818219184875488, "learning_rate": 2.2447681922931062e-07, "loss": 0.0539, "step": 53446 }, { "epoch": 0.9465216255584038, "grad_norm": 0.4162002205848694, "learning_rate": 2.2432855528991992e-07, "loss": 0.0568, "step": 53447 }, { "epoch": 0.9465393350954323, "grad_norm": 0.6032841801643372, "learning_rate": 2.2418033996099318e-07, "loss": 0.0503, "step": 53448 }, { "epoch": 0.9465570446324607, "grad_norm": 0.530331552028656, "learning_rate": 2.2403217324302005e-07, "loss": 0.0361, "step": 53449 }, { "epoch": 0.9465747541694891, "grad_norm": 0.39982253313064575, "learning_rate": 2.238840551364868e-07, "loss": 0.0398, "step": 53450 }, { "epoch": 0.9465924637065175, "grad_norm": 0.5169664025306702, "learning_rate": 2.237359856418797e-07, "loss": 0.0699, "step": 53451 }, { "epoch": 0.946610173243546, "grad_norm": 0.5583809018135071, "learning_rate": 2.23587964759685e-07, "loss": 0.0646, "step": 53452 }, { "epoch": 0.9466278827805744, "grad_norm": 0.6306973695755005, "learning_rate": 2.2343999249039403e-07, "loss": 0.061, "step": 53453 }, { "epoch": 0.9466455923176028, "grad_norm": 0.6361962556838989, "learning_rate": 2.232920688344897e-07, "loss": 0.0724, "step": 53454 }, { "epoch": 0.9466633018546312, "grad_norm": 0.6911697387695312, "learning_rate": 2.2314419379245998e-07, "loss": 0.0478, "step": 53455 }, { "epoch": 0.9466810113916597, "grad_norm": 0.7640243768692017, "learning_rate": 2.2299636736479113e-07, "loss": 0.0694, "step": 53456 }, { "epoch": 0.9466987209286881, "grad_norm": 0.6632713675498962, "learning_rate": 2.2284858955197106e-07, "loss": 0.0333, "step": 53457 }, { "epoch": 0.9467164304657165, "grad_norm": 0.2737153172492981, "learning_rate": 2.2270086035448278e-07, "loss": 0.0368, "step": 53458 }, { "epoch": 0.946734140002745, "grad_norm": 0.8302243947982788, "learning_rate": 2.225531797728142e-07, "loss": 0.0777, "step": 53459 }, { "epoch": 0.9467518495397734, "grad_norm": 0.5245027542114258, "learning_rate": 2.2240554780744992e-07, "loss": 0.0496, "step": 53460 }, { "epoch": 0.9467695590768018, "grad_norm": 0.5132774114608765, "learning_rate": 2.2225796445887958e-07, "loss": 0.0596, "step": 53461 }, { "epoch": 0.9467872686138302, "grad_norm": 0.26267847418785095, "learning_rate": 2.2211042972758276e-07, "loss": 0.0486, "step": 53462 }, { "epoch": 0.9468049781508587, "grad_norm": 0.5337221622467041, "learning_rate": 2.2196294361405078e-07, "loss": 0.0491, "step": 53463 }, { "epoch": 0.9468226876878871, "grad_norm": 0.3725220263004303, "learning_rate": 2.2181550611876489e-07, "loss": 0.0437, "step": 53464 }, { "epoch": 0.9468403972249155, "grad_norm": 0.7597692608833313, "learning_rate": 2.2166811724221137e-07, "loss": 0.0791, "step": 53465 }, { "epoch": 0.9468581067619439, "grad_norm": 0.22928474843502045, "learning_rate": 2.2152077698487482e-07, "loss": 0.0471, "step": 53466 }, { "epoch": 0.9468758162989724, "grad_norm": 0.5636752843856812, "learning_rate": 2.2137348534723988e-07, "loss": 0.0797, "step": 53467 }, { "epoch": 0.9468935258360008, "grad_norm": 0.4480256736278534, "learning_rate": 2.2122624232979284e-07, "loss": 0.0486, "step": 53468 }, { "epoch": 0.9469112353730292, "grad_norm": 0.6249865889549255, "learning_rate": 2.2107904793301492e-07, "loss": 0.0514, "step": 53469 }, { "epoch": 0.9469289449100576, "grad_norm": 0.4505530595779419, "learning_rate": 2.2093190215739246e-07, "loss": 0.0609, "step": 53470 }, { "epoch": 0.9469466544470861, "grad_norm": 0.4442879855632782, "learning_rate": 2.2078480500340836e-07, "loss": 0.0446, "step": 53471 }, { "epoch": 0.9469643639841145, "grad_norm": 0.4354024827480316, "learning_rate": 2.2063775647154894e-07, "loss": 0.0439, "step": 53472 }, { "epoch": 0.9469820735211429, "grad_norm": 0.6558942794799805, "learning_rate": 2.2049075656229544e-07, "loss": 0.0807, "step": 53473 }, { "epoch": 0.9469997830581715, "grad_norm": 0.6204367280006409, "learning_rate": 2.2034380527613418e-07, "loss": 0.0499, "step": 53474 }, { "epoch": 0.9470174925951999, "grad_norm": 0.8238844871520996, "learning_rate": 2.2019690261354642e-07, "loss": 0.0603, "step": 53475 }, { "epoch": 0.9470352021322282, "grad_norm": 0.617195725440979, "learning_rate": 2.200500485750151e-07, "loss": 0.0678, "step": 53476 }, { "epoch": 0.9470529116692566, "grad_norm": 0.7213006019592285, "learning_rate": 2.1990324316102484e-07, "loss": 0.0599, "step": 53477 }, { "epoch": 0.9470706212062852, "grad_norm": 0.28122153878211975, "learning_rate": 2.1975648637205858e-07, "loss": 0.0275, "step": 53478 }, { "epoch": 0.9470883307433136, "grad_norm": 0.5436161160469055, "learning_rate": 2.1960977820859762e-07, "loss": 0.0516, "step": 53479 }, { "epoch": 0.947106040280342, "grad_norm": 0.9710304141044617, "learning_rate": 2.194631186711249e-07, "loss": 0.0526, "step": 53480 }, { "epoch": 0.9471237498173704, "grad_norm": 0.3101158142089844, "learning_rate": 2.1931650776012336e-07, "loss": 0.0385, "step": 53481 }, { "epoch": 0.9471414593543989, "grad_norm": 0.5710287690162659, "learning_rate": 2.1916994547607594e-07, "loss": 0.0413, "step": 53482 }, { "epoch": 0.9471591688914273, "grad_norm": 0.3876475393772125, "learning_rate": 2.1902343181946562e-07, "loss": 0.0548, "step": 53483 }, { "epoch": 0.9471768784284557, "grad_norm": 0.7838523983955383, "learning_rate": 2.1887696679077195e-07, "loss": 0.0597, "step": 53484 }, { "epoch": 0.9471945879654841, "grad_norm": 0.4192134141921997, "learning_rate": 2.1873055039047963e-07, "loss": 0.0546, "step": 53485 }, { "epoch": 0.9472122975025126, "grad_norm": 0.8325838446617126, "learning_rate": 2.185841826190682e-07, "loss": 0.0452, "step": 53486 }, { "epoch": 0.947230007039541, "grad_norm": 0.17318525910377502, "learning_rate": 2.1843786347701899e-07, "loss": 0.0653, "step": 53487 }, { "epoch": 0.9472477165765694, "grad_norm": 0.5869089365005493, "learning_rate": 2.1829159296481326e-07, "loss": 0.0429, "step": 53488 }, { "epoch": 0.9472654261135979, "grad_norm": 0.667728066444397, "learning_rate": 2.1814537108293397e-07, "loss": 0.0435, "step": 53489 }, { "epoch": 0.9472831356506263, "grad_norm": 0.4073238968849182, "learning_rate": 2.179991978318624e-07, "loss": 0.0638, "step": 53490 }, { "epoch": 0.9473008451876547, "grad_norm": 0.9695720076560974, "learning_rate": 2.1785307321207647e-07, "loss": 0.0687, "step": 53491 }, { "epoch": 0.9473185547246831, "grad_norm": 1.1013662815093994, "learning_rate": 2.1770699722406086e-07, "loss": 0.0677, "step": 53492 }, { "epoch": 0.9473362642617116, "grad_norm": 0.285335898399353, "learning_rate": 2.1756096986829344e-07, "loss": 0.0548, "step": 53493 }, { "epoch": 0.94735397379874, "grad_norm": 0.5428723096847534, "learning_rate": 2.1741499114525388e-07, "loss": 0.0464, "step": 53494 }, { "epoch": 0.9473716833357684, "grad_norm": 0.6073154807090759, "learning_rate": 2.172690610554251e-07, "loss": 0.0421, "step": 53495 }, { "epoch": 0.9473893928727968, "grad_norm": 0.5262463688850403, "learning_rate": 2.1712317959928673e-07, "loss": 0.0498, "step": 53496 }, { "epoch": 0.9474071024098253, "grad_norm": 0.6849785447120667, "learning_rate": 2.1697734677731674e-07, "loss": 0.0608, "step": 53497 }, { "epoch": 0.9474248119468537, "grad_norm": 0.7064769268035889, "learning_rate": 2.1683156258999637e-07, "loss": 0.0426, "step": 53498 }, { "epoch": 0.9474425214838821, "grad_norm": 0.5825780034065247, "learning_rate": 2.1668582703780527e-07, "loss": 0.0586, "step": 53499 }, { "epoch": 0.9474602310209105, "grad_norm": 0.4687184691429138, "learning_rate": 2.1654014012122136e-07, "loss": 0.0335, "step": 53500 }, { "epoch": 0.947477940557939, "grad_norm": 0.5605431795120239, "learning_rate": 2.1639450184072596e-07, "loss": 0.048, "step": 53501 }, { "epoch": 0.9474956500949674, "grad_norm": 0.7116332650184631, "learning_rate": 2.1624891219679698e-07, "loss": 0.0806, "step": 53502 }, { "epoch": 0.9475133596319958, "grad_norm": 0.48417147994041443, "learning_rate": 2.1610337118991409e-07, "loss": 0.048, "step": 53503 }, { "epoch": 0.9475310691690243, "grad_norm": 0.6714490652084351, "learning_rate": 2.1595787882055683e-07, "loss": 0.0587, "step": 53504 }, { "epoch": 0.9475487787060527, "grad_norm": 0.569794774055481, "learning_rate": 2.1581243508920156e-07, "loss": 0.0628, "step": 53505 }, { "epoch": 0.9475664882430811, "grad_norm": 0.5930147171020508, "learning_rate": 2.1566703999632786e-07, "loss": 0.0625, "step": 53506 }, { "epoch": 0.9475841977801095, "grad_norm": 1.1909875869750977, "learning_rate": 2.1552169354241535e-07, "loss": 0.0464, "step": 53507 }, { "epoch": 0.947601907317138, "grad_norm": 0.5498263835906982, "learning_rate": 2.153763957279403e-07, "loss": 0.0423, "step": 53508 }, { "epoch": 0.9476196168541664, "grad_norm": 0.46261757612228394, "learning_rate": 2.152311465533807e-07, "loss": 0.0536, "step": 53509 }, { "epoch": 0.9476373263911948, "grad_norm": 0.4369247853755951, "learning_rate": 2.1508594601921617e-07, "loss": 0.047, "step": 53510 }, { "epoch": 0.9476550359282232, "grad_norm": 0.5280593037605286, "learning_rate": 2.1494079412592292e-07, "loss": 0.0627, "step": 53511 }, { "epoch": 0.9476727454652517, "grad_norm": 0.36042359471321106, "learning_rate": 2.1479569087398067e-07, "loss": 0.0424, "step": 53512 }, { "epoch": 0.9476904550022801, "grad_norm": 1.2366621494293213, "learning_rate": 2.1465063626386393e-07, "loss": 0.0987, "step": 53513 }, { "epoch": 0.9477081645393085, "grad_norm": 0.8824082016944885, "learning_rate": 2.1450563029605242e-07, "loss": 0.0473, "step": 53514 }, { "epoch": 0.9477258740763369, "grad_norm": 0.5982304811477661, "learning_rate": 2.143606729710207e-07, "loss": 0.0592, "step": 53515 }, { "epoch": 0.9477435836133654, "grad_norm": 0.6648561358451843, "learning_rate": 2.1421576428924672e-07, "loss": 0.0628, "step": 53516 }, { "epoch": 0.9477612931503938, "grad_norm": 1.0443285703659058, "learning_rate": 2.1407090425120846e-07, "loss": 0.0508, "step": 53517 }, { "epoch": 0.9477790026874222, "grad_norm": 0.8840147256851196, "learning_rate": 2.1392609285737884e-07, "loss": 0.0714, "step": 53518 }, { "epoch": 0.9477967122244507, "grad_norm": 0.49455276131629944, "learning_rate": 2.137813301082392e-07, "loss": 0.0618, "step": 53519 }, { "epoch": 0.9478144217614791, "grad_norm": 0.9497451186180115, "learning_rate": 2.1363661600426243e-07, "loss": 0.0697, "step": 53520 }, { "epoch": 0.9478321312985075, "grad_norm": 0.37146201729774475, "learning_rate": 2.1349195054592652e-07, "loss": 0.0515, "step": 53521 }, { "epoch": 0.9478498408355359, "grad_norm": 0.6625003218650818, "learning_rate": 2.1334733373370608e-07, "loss": 0.0522, "step": 53522 }, { "epoch": 0.9478675503725644, "grad_norm": 0.8022570610046387, "learning_rate": 2.132027655680774e-07, "loss": 0.0461, "step": 53523 }, { "epoch": 0.9478852599095928, "grad_norm": 0.44652581214904785, "learning_rate": 2.1305824604951675e-07, "loss": 0.0442, "step": 53524 }, { "epoch": 0.9479029694466212, "grad_norm": 0.9900009632110596, "learning_rate": 2.1291377517849708e-07, "loss": 0.0791, "step": 53525 }, { "epoch": 0.9479206789836496, "grad_norm": 0.5285304188728333, "learning_rate": 2.127693529554964e-07, "loss": 0.0562, "step": 53526 }, { "epoch": 0.9479383885206781, "grad_norm": 0.6768749356269836, "learning_rate": 2.1262497938098924e-07, "loss": 0.0572, "step": 53527 }, { "epoch": 0.9479560980577065, "grad_norm": 0.5457151532173157, "learning_rate": 2.1248065445545027e-07, "loss": 0.0511, "step": 53528 }, { "epoch": 0.9479738075947349, "grad_norm": 0.5739552974700928, "learning_rate": 2.1233637817935414e-07, "loss": 0.0479, "step": 53529 }, { "epoch": 0.9479915171317633, "grad_norm": 0.8405108451843262, "learning_rate": 2.1219215055317542e-07, "loss": 0.0428, "step": 53530 }, { "epoch": 0.9480092266687918, "grad_norm": 0.6001597046852112, "learning_rate": 2.1204797157738876e-07, "loss": 0.0774, "step": 53531 }, { "epoch": 0.9480269362058202, "grad_norm": 0.8225812315940857, "learning_rate": 2.1190384125246875e-07, "loss": 0.037, "step": 53532 }, { "epoch": 0.9480446457428486, "grad_norm": 0.8958259224891663, "learning_rate": 2.1175975957889004e-07, "loss": 0.0513, "step": 53533 }, { "epoch": 0.9480623552798771, "grad_norm": 0.3785223364830017, "learning_rate": 2.1161572655712392e-07, "loss": 0.0438, "step": 53534 }, { "epoch": 0.9480800648169055, "grad_norm": 0.6776413917541504, "learning_rate": 2.1147174218764998e-07, "loss": 0.0426, "step": 53535 }, { "epoch": 0.9480977743539339, "grad_norm": 0.45596739649772644, "learning_rate": 2.1132780647093453e-07, "loss": 0.0538, "step": 53536 }, { "epoch": 0.9481154838909623, "grad_norm": 0.6764503717422485, "learning_rate": 2.1118391940745717e-07, "loss": 0.0485, "step": 53537 }, { "epoch": 0.9481331934279909, "grad_norm": 0.6350529789924622, "learning_rate": 2.1104008099768757e-07, "loss": 0.0802, "step": 53538 }, { "epoch": 0.9481509029650192, "grad_norm": 0.3511234223842621, "learning_rate": 2.1089629124210031e-07, "loss": 0.0336, "step": 53539 }, { "epoch": 0.9481686125020476, "grad_norm": 0.7686315774917603, "learning_rate": 2.1075255014116835e-07, "loss": 0.0441, "step": 53540 }, { "epoch": 0.948186322039076, "grad_norm": 0.49758803844451904, "learning_rate": 2.1060885769536464e-07, "loss": 0.0371, "step": 53541 }, { "epoch": 0.9482040315761046, "grad_norm": 0.6981683969497681, "learning_rate": 2.1046521390516382e-07, "loss": 0.0599, "step": 53542 }, { "epoch": 0.948221741113133, "grad_norm": 0.3228099048137665, "learning_rate": 2.103216187710355e-07, "loss": 0.0512, "step": 53543 }, { "epoch": 0.9482394506501614, "grad_norm": 1.3204890489578247, "learning_rate": 2.1017807229345265e-07, "loss": 0.0691, "step": 53544 }, { "epoch": 0.9482571601871898, "grad_norm": 0.4584653377532959, "learning_rate": 2.100345744728882e-07, "loss": 0.0463, "step": 53545 }, { "epoch": 0.9482748697242183, "grad_norm": 0.5673827528953552, "learning_rate": 2.0989112530981346e-07, "loss": 0.07, "step": 53546 }, { "epoch": 0.9482925792612467, "grad_norm": 0.48020005226135254, "learning_rate": 2.097477248047014e-07, "loss": 0.0522, "step": 53547 }, { "epoch": 0.9483102887982751, "grad_norm": 0.5273916721343994, "learning_rate": 2.0960437295802325e-07, "loss": 0.0395, "step": 53548 }, { "epoch": 0.9483279983353036, "grad_norm": 0.45166337490081787, "learning_rate": 2.0946106977025204e-07, "loss": 0.0381, "step": 53549 }, { "epoch": 0.948345707872332, "grad_norm": 0.691825807094574, "learning_rate": 2.0931781524185733e-07, "loss": 0.0464, "step": 53550 }, { "epoch": 0.9483634174093604, "grad_norm": 0.7398556470870972, "learning_rate": 2.0917460937331045e-07, "loss": 0.054, "step": 53551 }, { "epoch": 0.9483811269463888, "grad_norm": 0.5182325839996338, "learning_rate": 2.0903145216508268e-07, "loss": 0.0577, "step": 53552 }, { "epoch": 0.9483988364834173, "grad_norm": 0.3776726722717285, "learning_rate": 2.0888834361764696e-07, "loss": 0.039, "step": 53553 }, { "epoch": 0.9484165460204457, "grad_norm": 0.8714854121208191, "learning_rate": 2.087452837314713e-07, "loss": 0.0602, "step": 53554 }, { "epoch": 0.9484342555574741, "grad_norm": 0.5773964524269104, "learning_rate": 2.0860227250702856e-07, "loss": 0.0364, "step": 53555 }, { "epoch": 0.9484519650945025, "grad_norm": 0.5309892892837524, "learning_rate": 2.0845930994478678e-07, "loss": 0.0393, "step": 53556 }, { "epoch": 0.948469674631531, "grad_norm": 0.8774341940879822, "learning_rate": 2.083163960452189e-07, "loss": 0.0543, "step": 53557 }, { "epoch": 0.9484873841685594, "grad_norm": 0.419968843460083, "learning_rate": 2.0817353080879286e-07, "loss": 0.0483, "step": 53558 }, { "epoch": 0.9485050937055878, "grad_norm": 0.5690173506736755, "learning_rate": 2.0803071423598165e-07, "loss": 0.0619, "step": 53559 }, { "epoch": 0.9485228032426162, "grad_norm": 0.6175791621208191, "learning_rate": 2.0788794632725317e-07, "loss": 0.0681, "step": 53560 }, { "epoch": 0.9485405127796447, "grad_norm": 0.35358768701553345, "learning_rate": 2.0774522708307542e-07, "loss": 0.0516, "step": 53561 }, { "epoch": 0.9485582223166731, "grad_norm": 0.5304602980613708, "learning_rate": 2.0760255650392135e-07, "loss": 0.0484, "step": 53562 }, { "epoch": 0.9485759318537015, "grad_norm": 0.6577045917510986, "learning_rate": 2.074599345902589e-07, "loss": 0.0634, "step": 53563 }, { "epoch": 0.94859364139073, "grad_norm": 0.782880961894989, "learning_rate": 2.0731736134255607e-07, "loss": 0.0605, "step": 53564 }, { "epoch": 0.9486113509277584, "grad_norm": 0.5234897136688232, "learning_rate": 2.0717483676128247e-07, "loss": 0.0425, "step": 53565 }, { "epoch": 0.9486290604647868, "grad_norm": 0.7178432941436768, "learning_rate": 2.070323608469077e-07, "loss": 0.0412, "step": 53566 }, { "epoch": 0.9486467700018152, "grad_norm": 0.084145687520504, "learning_rate": 2.068899335999014e-07, "loss": 0.0232, "step": 53567 }, { "epoch": 0.9486644795388437, "grad_norm": 0.5712634325027466, "learning_rate": 2.0674755502072983e-07, "loss": 0.04, "step": 53568 }, { "epoch": 0.9486821890758721, "grad_norm": 0.9557926058769226, "learning_rate": 2.0660522510986267e-07, "loss": 0.0524, "step": 53569 }, { "epoch": 0.9486998986129005, "grad_norm": 0.5090975165367126, "learning_rate": 2.0646294386776788e-07, "loss": 0.0718, "step": 53570 }, { "epoch": 0.9487176081499289, "grad_norm": 0.5873278975486755, "learning_rate": 2.0632071129491503e-07, "loss": 0.0496, "step": 53571 }, { "epoch": 0.9487353176869574, "grad_norm": 0.6314638257026672, "learning_rate": 2.0617852739177046e-07, "loss": 0.0516, "step": 53572 }, { "epoch": 0.9487530272239858, "grad_norm": 0.4050745666027069, "learning_rate": 2.060363921588021e-07, "loss": 0.0588, "step": 53573 }, { "epoch": 0.9487707367610142, "grad_norm": 0.6314139366149902, "learning_rate": 2.0589430559647793e-07, "loss": 0.0431, "step": 53574 }, { "epoch": 0.9487884462980426, "grad_norm": 0.5040107369422913, "learning_rate": 2.0575226770526424e-07, "loss": 0.0425, "step": 53575 }, { "epoch": 0.9488061558350711, "grad_norm": 0.6861864924430847, "learning_rate": 2.056102784856273e-07, "loss": 0.0377, "step": 53576 }, { "epoch": 0.9488238653720995, "grad_norm": 0.23545058071613312, "learning_rate": 2.0546833793803843e-07, "loss": 0.0472, "step": 53577 }, { "epoch": 0.9488415749091279, "grad_norm": 0.5182316899299622, "learning_rate": 2.053264460629639e-07, "loss": 0.0812, "step": 53578 }, { "epoch": 0.9488592844461564, "grad_norm": 0.22044329345226288, "learning_rate": 2.0518460286086672e-07, "loss": 0.0671, "step": 53579 }, { "epoch": 0.9488769939831848, "grad_norm": 0.43104156851768494, "learning_rate": 2.0504280833221644e-07, "loss": 0.0667, "step": 53580 }, { "epoch": 0.9488947035202132, "grad_norm": 0.28889334201812744, "learning_rate": 2.049010624774794e-07, "loss": 0.0298, "step": 53581 }, { "epoch": 0.9489124130572416, "grad_norm": 0.6073416471481323, "learning_rate": 2.0475936529712191e-07, "loss": 0.0413, "step": 53582 }, { "epoch": 0.9489301225942701, "grad_norm": 0.7402695417404175, "learning_rate": 2.0461771679160858e-07, "loss": 0.0503, "step": 53583 }, { "epoch": 0.9489478321312985, "grad_norm": 0.6927594542503357, "learning_rate": 2.0447611696140567e-07, "loss": 0.0509, "step": 53584 }, { "epoch": 0.9489655416683269, "grad_norm": 0.3853904902935028, "learning_rate": 2.0433456580698117e-07, "loss": 0.0429, "step": 53585 }, { "epoch": 0.9489832512053553, "grad_norm": 0.3988405466079712, "learning_rate": 2.0419306332879804e-07, "loss": 0.0539, "step": 53586 }, { "epoch": 0.9490009607423838, "grad_norm": 0.380156546831131, "learning_rate": 2.0405160952732427e-07, "loss": 0.0486, "step": 53587 }, { "epoch": 0.9490186702794122, "grad_norm": 0.6696500778198242, "learning_rate": 2.0391020440302611e-07, "loss": 0.0539, "step": 53588 }, { "epoch": 0.9490363798164406, "grad_norm": 0.6694120168685913, "learning_rate": 2.0376884795636487e-07, "loss": 0.048, "step": 53589 }, { "epoch": 0.949054089353469, "grad_norm": 0.36111903190612793, "learning_rate": 2.0362754018780682e-07, "loss": 0.0468, "step": 53590 }, { "epoch": 0.9490717988904975, "grad_norm": 0.5214544534683228, "learning_rate": 2.0348628109781832e-07, "loss": 0.0829, "step": 53591 }, { "epoch": 0.9490895084275259, "grad_norm": 0.7831245064735413, "learning_rate": 2.0334507068686393e-07, "loss": 0.0575, "step": 53592 }, { "epoch": 0.9491072179645543, "grad_norm": 0.39506420493125916, "learning_rate": 2.0320390895540664e-07, "loss": 0.0269, "step": 53593 }, { "epoch": 0.9491249275015828, "grad_norm": 0.5550159811973572, "learning_rate": 2.0306279590391274e-07, "loss": 0.0638, "step": 53594 }, { "epoch": 0.9491426370386112, "grad_norm": 0.6566033959388733, "learning_rate": 2.029217315328452e-07, "loss": 0.049, "step": 53595 }, { "epoch": 0.9491603465756396, "grad_norm": 0.5053915977478027, "learning_rate": 2.0278071584266866e-07, "loss": 0.0625, "step": 53596 }, { "epoch": 0.949178056112668, "grad_norm": 0.6031528115272522, "learning_rate": 2.0263974883384605e-07, "loss": 0.0586, "step": 53597 }, { "epoch": 0.9491957656496965, "grad_norm": 0.7154484987258911, "learning_rate": 2.024988305068437e-07, "loss": 0.0619, "step": 53598 }, { "epoch": 0.9492134751867249, "grad_norm": 0.6111194491386414, "learning_rate": 2.0235796086212287e-07, "loss": 0.0417, "step": 53599 }, { "epoch": 0.9492311847237533, "grad_norm": 0.7609778642654419, "learning_rate": 2.0221713990014823e-07, "loss": 0.0507, "step": 53600 }, { "epoch": 0.9492488942607817, "grad_norm": 0.1698765903711319, "learning_rate": 2.0207636762138104e-07, "loss": 0.0467, "step": 53601 }, { "epoch": 0.9492666037978102, "grad_norm": 0.5976370573043823, "learning_rate": 2.0193564402628594e-07, "loss": 0.0368, "step": 53602 }, { "epoch": 0.9492843133348386, "grad_norm": 0.6501526832580566, "learning_rate": 2.0179496911532757e-07, "loss": 0.0539, "step": 53603 }, { "epoch": 0.949302022871867, "grad_norm": 0.7583099007606506, "learning_rate": 2.0165434288896556e-07, "loss": 0.0629, "step": 53604 }, { "epoch": 0.9493197324088956, "grad_norm": 0.879335880279541, "learning_rate": 2.0151376534766285e-07, "loss": 0.0838, "step": 53605 }, { "epoch": 0.949337441945924, "grad_norm": 0.5797887444496155, "learning_rate": 2.0137323649188577e-07, "loss": 0.0363, "step": 53606 }, { "epoch": 0.9493551514829524, "grad_norm": 0.4301254451274872, "learning_rate": 2.0123275632209225e-07, "loss": 0.0391, "step": 53607 }, { "epoch": 0.9493728610199808, "grad_norm": 0.7009970545768738, "learning_rate": 2.0109232483874695e-07, "loss": 0.0394, "step": 53608 }, { "epoch": 0.9493905705570093, "grad_norm": 0.8486696481704712, "learning_rate": 2.0095194204231116e-07, "loss": 0.0698, "step": 53609 }, { "epoch": 0.9494082800940377, "grad_norm": 0.6825524568557739, "learning_rate": 2.0081160793324615e-07, "loss": 0.0615, "step": 53610 }, { "epoch": 0.9494259896310661, "grad_norm": 0.4091872572898865, "learning_rate": 2.0067132251201491e-07, "loss": 0.0395, "step": 53611 }, { "epoch": 0.9494436991680945, "grad_norm": 0.4777953028678894, "learning_rate": 2.0053108577907708e-07, "loss": 0.0501, "step": 53612 }, { "epoch": 0.949461408705123, "grad_norm": 0.5835278630256653, "learning_rate": 2.003908977348956e-07, "loss": 0.0727, "step": 53613 }, { "epoch": 0.9494791182421514, "grad_norm": 0.4645870625972748, "learning_rate": 2.0025075837993012e-07, "loss": 0.0386, "step": 53614 }, { "epoch": 0.9494968277791798, "grad_norm": 0.8229319453239441, "learning_rate": 2.001106677146436e-07, "loss": 0.0671, "step": 53615 }, { "epoch": 0.9495145373162082, "grad_norm": 0.6226233839988708, "learning_rate": 1.9997062573949566e-07, "loss": 0.0653, "step": 53616 }, { "epoch": 0.9495322468532367, "grad_norm": 0.2855505049228668, "learning_rate": 1.9983063245494926e-07, "loss": 0.0561, "step": 53617 }, { "epoch": 0.9495499563902651, "grad_norm": 0.4664682149887085, "learning_rate": 1.9969068786146238e-07, "loss": 0.0366, "step": 53618 }, { "epoch": 0.9495676659272935, "grad_norm": 0.793146550655365, "learning_rate": 1.9955079195949467e-07, "loss": 0.0576, "step": 53619 }, { "epoch": 0.949585375464322, "grad_norm": 0.44011208415031433, "learning_rate": 1.994109447495107e-07, "loss": 0.043, "step": 53620 }, { "epoch": 0.9496030850013504, "grad_norm": 0.5406572222709656, "learning_rate": 1.9927114623196517e-07, "loss": 0.0401, "step": 53621 }, { "epoch": 0.9496207945383788, "grad_norm": 0.39181163907051086, "learning_rate": 1.9913139640732104e-07, "loss": 0.0427, "step": 53622 }, { "epoch": 0.9496385040754072, "grad_norm": 0.5378925800323486, "learning_rate": 1.989916952760379e-07, "loss": 0.046, "step": 53623 }, { "epoch": 0.9496562136124357, "grad_norm": 0.299835205078125, "learning_rate": 1.9885204283857371e-07, "loss": 0.0452, "step": 53624 }, { "epoch": 0.9496739231494641, "grad_norm": 0.2882060110569, "learning_rate": 1.987124390953915e-07, "loss": 0.0393, "step": 53625 }, { "epoch": 0.9496916326864925, "grad_norm": 0.7024217844009399, "learning_rate": 1.9857288404694584e-07, "loss": 0.0693, "step": 53626 }, { "epoch": 0.9497093422235209, "grad_norm": 0.6621454954147339, "learning_rate": 1.984333776937014e-07, "loss": 0.0447, "step": 53627 }, { "epoch": 0.9497270517605494, "grad_norm": 0.4029763340950012, "learning_rate": 1.982939200361128e-07, "loss": 0.0511, "step": 53628 }, { "epoch": 0.9497447612975778, "grad_norm": 0.5040832161903381, "learning_rate": 1.9815451107463966e-07, "loss": 0.0355, "step": 53629 }, { "epoch": 0.9497624708346062, "grad_norm": 0.382325142621994, "learning_rate": 1.9801515080973997e-07, "loss": 0.0435, "step": 53630 }, { "epoch": 0.9497801803716346, "grad_norm": 0.5620217323303223, "learning_rate": 1.97875839241875e-07, "loss": 0.075, "step": 53631 }, { "epoch": 0.9497978899086631, "grad_norm": 0.5400004386901855, "learning_rate": 1.9773657637150112e-07, "loss": 0.0396, "step": 53632 }, { "epoch": 0.9498155994456915, "grad_norm": 0.684669017791748, "learning_rate": 1.975973621990762e-07, "loss": 0.0649, "step": 53633 }, { "epoch": 0.9498333089827199, "grad_norm": 0.6950770020484924, "learning_rate": 1.9745819672505826e-07, "loss": 0.0501, "step": 53634 }, { "epoch": 0.9498510185197484, "grad_norm": 0.6897353529930115, "learning_rate": 1.9731907994990694e-07, "loss": 0.0497, "step": 53635 }, { "epoch": 0.9498687280567768, "grad_norm": 0.7616205215454102, "learning_rate": 1.9718001187407853e-07, "loss": 0.0658, "step": 53636 }, { "epoch": 0.9498864375938052, "grad_norm": 0.5191478133201599, "learning_rate": 1.9704099249802932e-07, "loss": 0.0553, "step": 53637 }, { "epoch": 0.9499041471308336, "grad_norm": 0.38082683086395264, "learning_rate": 1.9690202182222062e-07, "loss": 0.0359, "step": 53638 }, { "epoch": 0.9499218566678621, "grad_norm": 0.4518571197986603, "learning_rate": 1.9676309984710538e-07, "loss": 0.0616, "step": 53639 }, { "epoch": 0.9499395662048905, "grad_norm": 0.4351899325847626, "learning_rate": 1.9662422657314161e-07, "loss": 0.0682, "step": 53640 }, { "epoch": 0.9499572757419189, "grad_norm": 0.27751442790031433, "learning_rate": 1.9648540200078724e-07, "loss": 0.0492, "step": 53641 }, { "epoch": 0.9499749852789473, "grad_norm": 0.777283787727356, "learning_rate": 1.9634662613049858e-07, "loss": 0.0479, "step": 53642 }, { "epoch": 0.9499926948159758, "grad_norm": 0.7510127425193787, "learning_rate": 1.9620789896273194e-07, "loss": 0.0471, "step": 53643 }, { "epoch": 0.9500104043530042, "grad_norm": 0.3069444000720978, "learning_rate": 1.960692204979453e-07, "loss": 0.0466, "step": 53644 }, { "epoch": 0.9500281138900326, "grad_norm": 1.301768183708191, "learning_rate": 1.9593059073659325e-07, "loss": 0.0849, "step": 53645 }, { "epoch": 0.950045823427061, "grad_norm": 0.5490161776542664, "learning_rate": 1.9579200967913213e-07, "loss": 0.047, "step": 53646 }, { "epoch": 0.9500635329640895, "grad_norm": 0.1991792917251587, "learning_rate": 1.9565347732601824e-07, "loss": 0.0626, "step": 53647 }, { "epoch": 0.9500812425011179, "grad_norm": 0.9438685178756714, "learning_rate": 1.955149936777062e-07, "loss": 0.0435, "step": 53648 }, { "epoch": 0.9500989520381463, "grad_norm": 0.3549827039241791, "learning_rate": 1.9537655873465232e-07, "loss": 0.0397, "step": 53649 }, { "epoch": 0.9501166615751748, "grad_norm": 0.6553602814674377, "learning_rate": 1.952381724973129e-07, "loss": 0.0418, "step": 53650 }, { "epoch": 0.9501343711122032, "grad_norm": 0.9113306403160095, "learning_rate": 1.950998349661426e-07, "loss": 0.0721, "step": 53651 }, { "epoch": 0.9501520806492316, "grad_norm": 0.7452832460403442, "learning_rate": 1.94961546141596e-07, "loss": 0.0643, "step": 53652 }, { "epoch": 0.95016979018626, "grad_norm": 0.479361355304718, "learning_rate": 1.9482330602412778e-07, "loss": 0.0552, "step": 53653 }, { "epoch": 0.9501874997232885, "grad_norm": 0.3750080466270447, "learning_rate": 1.9468511461419425e-07, "loss": 0.0498, "step": 53654 }, { "epoch": 0.9502052092603169, "grad_norm": 0.7234089374542236, "learning_rate": 1.9454697191224835e-07, "loss": 0.0658, "step": 53655 }, { "epoch": 0.9502229187973453, "grad_norm": 0.5733305215835571, "learning_rate": 1.944088779187464e-07, "loss": 0.0535, "step": 53656 }, { "epoch": 0.9502406283343737, "grad_norm": 0.7854490876197815, "learning_rate": 1.9427083263414136e-07, "loss": 0.0805, "step": 53657 }, { "epoch": 0.9502583378714022, "grad_norm": 0.3396439254283905, "learning_rate": 1.9413283605888787e-07, "loss": 0.0572, "step": 53658 }, { "epoch": 0.9502760474084306, "grad_norm": 0.4825059771537781, "learning_rate": 1.9399488819344057e-07, "loss": 0.0483, "step": 53659 }, { "epoch": 0.950293756945459, "grad_norm": 0.5440409779548645, "learning_rate": 1.9385698903825243e-07, "loss": 0.0588, "step": 53660 }, { "epoch": 0.9503114664824874, "grad_norm": 0.5034157037734985, "learning_rate": 1.9371913859377644e-07, "loss": 0.0376, "step": 53661 }, { "epoch": 0.9503291760195159, "grad_norm": 0.4489127993583679, "learning_rate": 1.935813368604672e-07, "loss": 0.0513, "step": 53662 }, { "epoch": 0.9503468855565443, "grad_norm": 0.5378717184066772, "learning_rate": 1.934435838387777e-07, "loss": 0.0492, "step": 53663 }, { "epoch": 0.9503645950935727, "grad_norm": 0.5390704274177551, "learning_rate": 1.933058795291609e-07, "loss": 0.0488, "step": 53664 }, { "epoch": 0.9503823046306012, "grad_norm": 1.000792145729065, "learning_rate": 1.9316822393206978e-07, "loss": 0.0744, "step": 53665 }, { "epoch": 0.9504000141676296, "grad_norm": 0.5445042848587036, "learning_rate": 1.93030617047959e-07, "loss": 0.0428, "step": 53666 }, { "epoch": 0.950417723704658, "grad_norm": 0.19057093560695648, "learning_rate": 1.928930588772798e-07, "loss": 0.0423, "step": 53667 }, { "epoch": 0.9504354332416864, "grad_norm": 0.412887841463089, "learning_rate": 1.9275554942048358e-07, "loss": 0.0424, "step": 53668 }, { "epoch": 0.950453142778715, "grad_norm": 0.6888561844825745, "learning_rate": 1.9261808867802487e-07, "loss": 0.0753, "step": 53669 }, { "epoch": 0.9504708523157434, "grad_norm": 0.49602675437927246, "learning_rate": 1.9248067665035508e-07, "loss": 0.0412, "step": 53670 }, { "epoch": 0.9504885618527718, "grad_norm": 0.7629618644714355, "learning_rate": 1.9234331333792376e-07, "loss": 0.0215, "step": 53671 }, { "epoch": 0.9505062713898002, "grad_norm": 0.4381861090660095, "learning_rate": 1.9220599874118727e-07, "loss": 0.0408, "step": 53672 }, { "epoch": 0.9505239809268287, "grad_norm": 0.7075226306915283, "learning_rate": 1.9206873286059522e-07, "loss": 0.0474, "step": 53673 }, { "epoch": 0.9505416904638571, "grad_norm": 0.5396338105201721, "learning_rate": 1.919315156966006e-07, "loss": 0.0648, "step": 53674 }, { "epoch": 0.9505594000008855, "grad_norm": 0.6220386624336243, "learning_rate": 1.9179434724965139e-07, "loss": 0.0528, "step": 53675 }, { "epoch": 0.9505771095379139, "grad_norm": 0.6934633851051331, "learning_rate": 1.916572275202022e-07, "loss": 0.0292, "step": 53676 }, { "epoch": 0.9505948190749424, "grad_norm": 0.8714168071746826, "learning_rate": 1.915201565087027e-07, "loss": 0.0713, "step": 53677 }, { "epoch": 0.9506125286119708, "grad_norm": 0.6658216118812561, "learning_rate": 1.9138313421560416e-07, "loss": 0.0475, "step": 53678 }, { "epoch": 0.9506302381489992, "grad_norm": 0.5951671600341797, "learning_rate": 1.912461606413579e-07, "loss": 0.0629, "step": 53679 }, { "epoch": 0.9506479476860277, "grad_norm": 0.5271709561347961, "learning_rate": 1.9110923578641194e-07, "loss": 0.0709, "step": 53680 }, { "epoch": 0.9506656572230561, "grad_norm": 0.8307682871818542, "learning_rate": 1.9097235965122085e-07, "loss": 0.0709, "step": 53681 }, { "epoch": 0.9506833667600845, "grad_norm": 0.3693753182888031, "learning_rate": 1.9083553223623095e-07, "loss": 0.06, "step": 53682 }, { "epoch": 0.9507010762971129, "grad_norm": 0.5657933354377747, "learning_rate": 1.9069875354189527e-07, "loss": 0.0518, "step": 53683 }, { "epoch": 0.9507187858341414, "grad_norm": 0.8146768808364868, "learning_rate": 1.9056202356866503e-07, "loss": 0.0784, "step": 53684 }, { "epoch": 0.9507364953711698, "grad_norm": 0.8972392082214355, "learning_rate": 1.9042534231698494e-07, "loss": 0.0531, "step": 53685 }, { "epoch": 0.9507542049081982, "grad_norm": 0.2975582480430603, "learning_rate": 1.9028870978730793e-07, "loss": 0.0306, "step": 53686 }, { "epoch": 0.9507719144452266, "grad_norm": 0.4807371497154236, "learning_rate": 1.901521259800837e-07, "loss": 0.0663, "step": 53687 }, { "epoch": 0.9507896239822551, "grad_norm": 1.126024842262268, "learning_rate": 1.9001559089576182e-07, "loss": 0.0973, "step": 53688 }, { "epoch": 0.9508073335192835, "grad_norm": 0.546902596950531, "learning_rate": 1.898791045347903e-07, "loss": 0.0538, "step": 53689 }, { "epoch": 0.9508250430563119, "grad_norm": 0.5441349148750305, "learning_rate": 1.897426668976171e-07, "loss": 0.0578, "step": 53690 }, { "epoch": 0.9508427525933403, "grad_norm": 0.8328808546066284, "learning_rate": 1.8960627798469355e-07, "loss": 0.0512, "step": 53691 }, { "epoch": 0.9508604621303688, "grad_norm": 0.5046835541725159, "learning_rate": 1.894699377964676e-07, "loss": 0.0429, "step": 53692 }, { "epoch": 0.9508781716673972, "grad_norm": 0.750484049320221, "learning_rate": 1.8933364633338722e-07, "loss": 0.078, "step": 53693 }, { "epoch": 0.9508958812044256, "grad_norm": 0.46744251251220703, "learning_rate": 1.8919740359590044e-07, "loss": 0.0495, "step": 53694 }, { "epoch": 0.9509135907414541, "grad_norm": 0.5670018196105957, "learning_rate": 1.890612095844585e-07, "loss": 0.0679, "step": 53695 }, { "epoch": 0.9509313002784825, "grad_norm": 0.6918146014213562, "learning_rate": 1.8892506429950606e-07, "loss": 0.0386, "step": 53696 }, { "epoch": 0.9509490098155109, "grad_norm": 0.27774813771247864, "learning_rate": 1.8878896774149113e-07, "loss": 0.0495, "step": 53697 }, { "epoch": 0.9509667193525393, "grad_norm": 0.6284881830215454, "learning_rate": 1.886529199108633e-07, "loss": 0.0535, "step": 53698 }, { "epoch": 0.9509844288895678, "grad_norm": 0.475307434797287, "learning_rate": 1.8851692080806892e-07, "loss": 0.0541, "step": 53699 }, { "epoch": 0.9510021384265962, "grad_norm": 0.6712431907653809, "learning_rate": 1.8838097043355429e-07, "loss": 0.0464, "step": 53700 }, { "epoch": 0.9510198479636246, "grad_norm": 0.7536234855651855, "learning_rate": 1.882450687877707e-07, "loss": 0.0738, "step": 53701 }, { "epoch": 0.951037557500653, "grad_norm": 0.797482430934906, "learning_rate": 1.8810921587116282e-07, "loss": 0.043, "step": 53702 }, { "epoch": 0.9510552670376815, "grad_norm": 0.5040937662124634, "learning_rate": 1.8797341168417527e-07, "loss": 0.0452, "step": 53703 }, { "epoch": 0.9510729765747099, "grad_norm": 0.6586959958076477, "learning_rate": 1.8783765622725768e-07, "loss": 0.0445, "step": 53704 }, { "epoch": 0.9510906861117383, "grad_norm": 0.5524454116821289, "learning_rate": 1.877019495008564e-07, "loss": 0.0479, "step": 53705 }, { "epoch": 0.9511083956487667, "grad_norm": 0.5213131904602051, "learning_rate": 1.8756629150541937e-07, "loss": 0.0609, "step": 53706 }, { "epoch": 0.9511261051857952, "grad_norm": 0.4173625409603119, "learning_rate": 1.8743068224138958e-07, "loss": 0.0445, "step": 53707 }, { "epoch": 0.9511438147228236, "grad_norm": 0.8786997199058533, "learning_rate": 1.8729512170921338e-07, "loss": 0.0579, "step": 53708 }, { "epoch": 0.951161524259852, "grad_norm": 0.4078359603881836, "learning_rate": 1.8715960990933868e-07, "loss": 0.0827, "step": 53709 }, { "epoch": 0.9511792337968805, "grad_norm": 0.5223971605300903, "learning_rate": 1.8702414684221182e-07, "loss": 0.0546, "step": 53710 }, { "epoch": 0.9511969433339089, "grad_norm": 0.6133947372436523, "learning_rate": 1.8688873250827576e-07, "loss": 0.0589, "step": 53711 }, { "epoch": 0.9512146528709373, "grad_norm": 0.5657796263694763, "learning_rate": 1.867533669079785e-07, "loss": 0.0688, "step": 53712 }, { "epoch": 0.9512323624079657, "grad_norm": 0.73028564453125, "learning_rate": 1.8661805004176469e-07, "loss": 0.0517, "step": 53713 }, { "epoch": 0.9512500719449942, "grad_norm": 0.8284139037132263, "learning_rate": 1.8648278191007895e-07, "loss": 0.0888, "step": 53714 }, { "epoch": 0.9512677814820226, "grad_norm": 0.608796238899231, "learning_rate": 1.8634756251336594e-07, "loss": 0.049, "step": 53715 }, { "epoch": 0.951285491019051, "grad_norm": 0.4713643491268158, "learning_rate": 1.8621239185207196e-07, "loss": 0.0656, "step": 53716 }, { "epoch": 0.9513032005560794, "grad_norm": 0.1673918068408966, "learning_rate": 1.8607726992664e-07, "loss": 0.0529, "step": 53717 }, { "epoch": 0.9513209100931079, "grad_norm": 0.865010678768158, "learning_rate": 1.8594219673751633e-07, "loss": 0.0573, "step": 53718 }, { "epoch": 0.9513386196301363, "grad_norm": 0.6231929659843445, "learning_rate": 1.8580717228514399e-07, "loss": 0.0525, "step": 53719 }, { "epoch": 0.9513563291671647, "grad_norm": 0.4441368281841278, "learning_rate": 1.856721965699676e-07, "loss": 0.0314, "step": 53720 }, { "epoch": 0.9513740387041931, "grad_norm": 0.87776780128479, "learning_rate": 1.8553726959243012e-07, "loss": 0.0938, "step": 53721 }, { "epoch": 0.9513917482412216, "grad_norm": 0.38074782490730286, "learning_rate": 1.8540239135297787e-07, "loss": 0.0497, "step": 53722 }, { "epoch": 0.95140945777825, "grad_norm": 0.9306635856628418, "learning_rate": 1.8526756185205385e-07, "loss": 0.0479, "step": 53723 }, { "epoch": 0.9514271673152784, "grad_norm": 0.4346558153629303, "learning_rate": 1.85132781090101e-07, "loss": 0.0339, "step": 53724 }, { "epoch": 0.9514448768523069, "grad_norm": 1.0408605337142944, "learning_rate": 1.8499804906756236e-07, "loss": 0.0588, "step": 53725 }, { "epoch": 0.9514625863893353, "grad_norm": 1.024692177772522, "learning_rate": 1.848633657848825e-07, "loss": 0.0577, "step": 53726 }, { "epoch": 0.9514802959263637, "grad_norm": 0.44104793667793274, "learning_rate": 1.8472873124250277e-07, "loss": 0.0481, "step": 53727 }, { "epoch": 0.9514980054633921, "grad_norm": 0.48062244057655334, "learning_rate": 1.8459414544086783e-07, "loss": 0.0395, "step": 53728 }, { "epoch": 0.9515157150004206, "grad_norm": 0.22335322201251984, "learning_rate": 1.844596083804173e-07, "loss": 0.0497, "step": 53729 }, { "epoch": 0.951533424537449, "grad_norm": 0.6531790494918823, "learning_rate": 1.843251200615992e-07, "loss": 0.0246, "step": 53730 }, { "epoch": 0.9515511340744774, "grad_norm": 0.20919275283813477, "learning_rate": 1.8419068048485144e-07, "loss": 0.052, "step": 53731 }, { "epoch": 0.9515688436115058, "grad_norm": 0.429395467042923, "learning_rate": 1.8405628965061872e-07, "loss": 0.0647, "step": 53732 }, { "epoch": 0.9515865531485344, "grad_norm": 0.43415454030036926, "learning_rate": 1.8392194755934066e-07, "loss": 0.0717, "step": 53733 }, { "epoch": 0.9516042626855628, "grad_norm": 0.4042188823223114, "learning_rate": 1.837876542114636e-07, "loss": 0.0356, "step": 53734 }, { "epoch": 0.9516219722225912, "grad_norm": 0.5180478692054749, "learning_rate": 1.8365340960742383e-07, "loss": 0.0726, "step": 53735 }, { "epoch": 0.9516396817596195, "grad_norm": 0.3322024941444397, "learning_rate": 1.8351921374766767e-07, "loss": 0.0243, "step": 53736 }, { "epoch": 0.9516573912966481, "grad_norm": 0.694124162197113, "learning_rate": 1.833850666326331e-07, "loss": 0.0522, "step": 53737 }, { "epoch": 0.9516751008336765, "grad_norm": 1.1986786127090454, "learning_rate": 1.8325096826276311e-07, "loss": 0.0596, "step": 53738 }, { "epoch": 0.9516928103707049, "grad_norm": 0.5748795866966248, "learning_rate": 1.8311691863850067e-07, "loss": 0.0472, "step": 53739 }, { "epoch": 0.9517105199077334, "grad_norm": 0.6491461992263794, "learning_rate": 1.829829177602821e-07, "loss": 0.041, "step": 53740 }, { "epoch": 0.9517282294447618, "grad_norm": 0.5359241366386414, "learning_rate": 1.828489656285537e-07, "loss": 0.0589, "step": 53741 }, { "epoch": 0.9517459389817902, "grad_norm": 0.8140634298324585, "learning_rate": 1.8271506224375344e-07, "loss": 0.0579, "step": 53742 }, { "epoch": 0.9517636485188186, "grad_norm": 0.7325196266174316, "learning_rate": 1.8258120760632103e-07, "loss": 0.0619, "step": 53743 }, { "epoch": 0.9517813580558471, "grad_norm": 0.6380775570869446, "learning_rate": 1.8244740171669606e-07, "loss": 0.0396, "step": 53744 }, { "epoch": 0.9517990675928755, "grad_norm": 0.896894097328186, "learning_rate": 1.8231364457532318e-07, "loss": 0.1156, "step": 53745 }, { "epoch": 0.9518167771299039, "grad_norm": 0.3455113470554352, "learning_rate": 1.8217993618263874e-07, "loss": 0.0282, "step": 53746 }, { "epoch": 0.9518344866669323, "grad_norm": 0.565898597240448, "learning_rate": 1.8204627653908402e-07, "loss": 0.0522, "step": 53747 }, { "epoch": 0.9518521962039608, "grad_norm": 0.4343191683292389, "learning_rate": 1.81912665645097e-07, "loss": 0.0528, "step": 53748 }, { "epoch": 0.9518699057409892, "grad_norm": 0.5170320272445679, "learning_rate": 1.8177910350111904e-07, "loss": 0.0463, "step": 53749 }, { "epoch": 0.9518876152780176, "grad_norm": 0.5797188878059387, "learning_rate": 1.8164559010758973e-07, "loss": 0.0592, "step": 53750 }, { "epoch": 0.951905324815046, "grad_norm": 0.5792394876480103, "learning_rate": 1.815121254649471e-07, "loss": 0.0479, "step": 53751 }, { "epoch": 0.9519230343520745, "grad_norm": 0.41806602478027344, "learning_rate": 1.8137870957363245e-07, "loss": 0.0566, "step": 53752 }, { "epoch": 0.9519407438891029, "grad_norm": 0.9878588318824768, "learning_rate": 1.812453424340804e-07, "loss": 0.0547, "step": 53753 }, { "epoch": 0.9519584534261313, "grad_norm": 0.754988968372345, "learning_rate": 1.81112024046734e-07, "loss": 0.0482, "step": 53754 }, { "epoch": 0.9519761629631598, "grad_norm": 0.4869875907897949, "learning_rate": 1.809787544120295e-07, "loss": 0.042, "step": 53755 }, { "epoch": 0.9519938725001882, "grad_norm": 0.5581526756286621, "learning_rate": 1.8084553353040656e-07, "loss": 0.0841, "step": 53756 }, { "epoch": 0.9520115820372166, "grad_norm": 0.8398380279541016, "learning_rate": 1.807123614023032e-07, "loss": 0.0462, "step": 53757 }, { "epoch": 0.952029291574245, "grad_norm": 0.5884783267974854, "learning_rate": 1.80579238028154e-07, "loss": 0.0542, "step": 53758 }, { "epoch": 0.9520470011112735, "grad_norm": 0.3058605492115021, "learning_rate": 1.804461634084037e-07, "loss": 0.0583, "step": 53759 }, { "epoch": 0.9520647106483019, "grad_norm": 0.609390914440155, "learning_rate": 1.803131375434869e-07, "loss": 0.0378, "step": 53760 }, { "epoch": 0.9520824201853303, "grad_norm": 0.756102442741394, "learning_rate": 1.801801604338399e-07, "loss": 0.0645, "step": 53761 }, { "epoch": 0.9521001297223587, "grad_norm": 0.6892519593238831, "learning_rate": 1.8004723207990072e-07, "loss": 0.0519, "step": 53762 }, { "epoch": 0.9521178392593872, "grad_norm": 0.6288892030715942, "learning_rate": 1.799143524821073e-07, "loss": 0.0492, "step": 53763 }, { "epoch": 0.9521355487964156, "grad_norm": 0.8498178720474243, "learning_rate": 1.7978152164089768e-07, "loss": 0.0276, "step": 53764 }, { "epoch": 0.952153258333444, "grad_norm": 0.7757754325866699, "learning_rate": 1.7964873955670648e-07, "loss": 0.0327, "step": 53765 }, { "epoch": 0.9521709678704724, "grad_norm": 0.6578748822212219, "learning_rate": 1.7951600622997334e-07, "loss": 0.038, "step": 53766 }, { "epoch": 0.9521886774075009, "grad_norm": 0.8561909198760986, "learning_rate": 1.7938332166113292e-07, "loss": 0.0688, "step": 53767 }, { "epoch": 0.9522063869445293, "grad_norm": 0.9119229316711426, "learning_rate": 1.7925068585062155e-07, "loss": 0.0695, "step": 53768 }, { "epoch": 0.9522240964815577, "grad_norm": 0.6082603931427002, "learning_rate": 1.7911809879887885e-07, "loss": 0.0309, "step": 53769 }, { "epoch": 0.9522418060185862, "grad_norm": 0.8001160621643066, "learning_rate": 1.7898556050633618e-07, "loss": 0.0515, "step": 53770 }, { "epoch": 0.9522595155556146, "grad_norm": 0.5417875051498413, "learning_rate": 1.7885307097343317e-07, "loss": 0.0603, "step": 53771 }, { "epoch": 0.952277225092643, "grad_norm": 0.7668594121932983, "learning_rate": 1.787206302006028e-07, "loss": 0.0658, "step": 53772 }, { "epoch": 0.9522949346296714, "grad_norm": 0.9756107926368713, "learning_rate": 1.7858823818828473e-07, "loss": 0.0759, "step": 53773 }, { "epoch": 0.9523126441666999, "grad_norm": 0.33508750796318054, "learning_rate": 1.7845589493691194e-07, "loss": 0.0397, "step": 53774 }, { "epoch": 0.9523303537037283, "grad_norm": 0.8842409253120422, "learning_rate": 1.783236004469191e-07, "loss": 0.0579, "step": 53775 }, { "epoch": 0.9523480632407567, "grad_norm": 0.6659464836120605, "learning_rate": 1.7819135471874247e-07, "loss": 0.0478, "step": 53776 }, { "epoch": 0.9523657727777851, "grad_norm": 0.7033784985542297, "learning_rate": 1.780591577528168e-07, "loss": 0.0555, "step": 53777 }, { "epoch": 0.9523834823148136, "grad_norm": 0.5853169560432434, "learning_rate": 1.779270095495783e-07, "loss": 0.0729, "step": 53778 }, { "epoch": 0.952401191851842, "grad_norm": 0.5456191301345825, "learning_rate": 1.7779491010946003e-07, "loss": 0.0505, "step": 53779 }, { "epoch": 0.9524189013888704, "grad_norm": 0.6607117652893066, "learning_rate": 1.7766285943289828e-07, "loss": 0.0367, "step": 53780 }, { "epoch": 0.9524366109258988, "grad_norm": 0.679460346698761, "learning_rate": 1.7753085752032605e-07, "loss": 0.0491, "step": 53781 }, { "epoch": 0.9524543204629273, "grad_norm": 1.0442092418670654, "learning_rate": 1.7739890437217797e-07, "loss": 0.081, "step": 53782 }, { "epoch": 0.9524720299999557, "grad_norm": 0.3461480736732483, "learning_rate": 1.7726699998888872e-07, "loss": 0.0595, "step": 53783 }, { "epoch": 0.9524897395369841, "grad_norm": 0.469769150018692, "learning_rate": 1.7713514437089296e-07, "loss": 0.0434, "step": 53784 }, { "epoch": 0.9525074490740126, "grad_norm": 0.6922518610954285, "learning_rate": 1.7700333751862196e-07, "loss": 0.0449, "step": 53785 }, { "epoch": 0.952525158611041, "grad_norm": 0.7299587726593018, "learning_rate": 1.7687157943251043e-07, "loss": 0.0645, "step": 53786 }, { "epoch": 0.9525428681480694, "grad_norm": 0.6253238320350647, "learning_rate": 1.7673987011299132e-07, "loss": 0.0399, "step": 53787 }, { "epoch": 0.9525605776850978, "grad_norm": 0.4068475067615509, "learning_rate": 1.7660820956049928e-07, "loss": 0.0369, "step": 53788 }, { "epoch": 0.9525782872221263, "grad_norm": 0.3155151307582855, "learning_rate": 1.764765977754673e-07, "loss": 0.0452, "step": 53789 }, { "epoch": 0.9525959967591547, "grad_norm": 0.6004643440246582, "learning_rate": 1.7634503475832843e-07, "loss": 0.0597, "step": 53790 }, { "epoch": 0.9526137062961831, "grad_norm": 0.6354189515113831, "learning_rate": 1.7621352050951557e-07, "loss": 0.0629, "step": 53791 }, { "epoch": 0.9526314158332115, "grad_norm": 0.6273449063301086, "learning_rate": 1.7608205502946006e-07, "loss": 0.0645, "step": 53792 }, { "epoch": 0.95264912537024, "grad_norm": 0.3491506576538086, "learning_rate": 1.7595063831859492e-07, "loss": 0.053, "step": 53793 }, { "epoch": 0.9526668349072684, "grad_norm": 0.7909927368164062, "learning_rate": 1.758192703773548e-07, "loss": 0.0382, "step": 53794 }, { "epoch": 0.9526845444442968, "grad_norm": 0.35622134804725647, "learning_rate": 1.7568795120616765e-07, "loss": 0.0415, "step": 53795 }, { "epoch": 0.9527022539813252, "grad_norm": 0.5837118625640869, "learning_rate": 1.7555668080546816e-07, "loss": 0.0476, "step": 53796 }, { "epoch": 0.9527199635183538, "grad_norm": 0.70560622215271, "learning_rate": 1.7542545917568765e-07, "loss": 0.0611, "step": 53797 }, { "epoch": 0.9527376730553822, "grad_norm": 0.4936220049858093, "learning_rate": 1.752942863172591e-07, "loss": 0.0577, "step": 53798 }, { "epoch": 0.9527553825924105, "grad_norm": 0.6544216871261597, "learning_rate": 1.7516316223061213e-07, "loss": 0.0685, "step": 53799 }, { "epoch": 0.9527730921294391, "grad_norm": 0.7139471173286438, "learning_rate": 1.7503208691617812e-07, "loss": 0.054, "step": 53800 }, { "epoch": 0.9527908016664675, "grad_norm": 0.7449265718460083, "learning_rate": 1.7490106037439168e-07, "loss": 0.0537, "step": 53801 }, { "epoch": 0.9528085112034959, "grad_norm": 0.3997323215007782, "learning_rate": 1.7477008260567917e-07, "loss": 0.024, "step": 53802 }, { "epoch": 0.9528262207405243, "grad_norm": 0.30180713534355164, "learning_rate": 1.7463915361047356e-07, "loss": 0.0567, "step": 53803 }, { "epoch": 0.9528439302775528, "grad_norm": 0.6926308274269104, "learning_rate": 1.7450827338920616e-07, "loss": 0.0857, "step": 53804 }, { "epoch": 0.9528616398145812, "grad_norm": 0.3883962631225586, "learning_rate": 1.7437744194230664e-07, "loss": 0.054, "step": 53805 }, { "epoch": 0.9528793493516096, "grad_norm": 0.6308318972587585, "learning_rate": 1.7424665927020467e-07, "loss": 0.0612, "step": 53806 }, { "epoch": 0.952897058888638, "grad_norm": 0.5001434683799744, "learning_rate": 1.741159253733332e-07, "loss": 0.0503, "step": 53807 }, { "epoch": 0.9529147684256665, "grad_norm": 0.5693075060844421, "learning_rate": 1.7398524025212026e-07, "loss": 0.0626, "step": 53808 }, { "epoch": 0.9529324779626949, "grad_norm": 0.793148398399353, "learning_rate": 1.738546039069988e-07, "loss": 0.0568, "step": 53809 }, { "epoch": 0.9529501874997233, "grad_norm": 0.41329899430274963, "learning_rate": 1.7372401633839353e-07, "loss": 0.0631, "step": 53810 }, { "epoch": 0.9529678970367517, "grad_norm": 0.6460240483283997, "learning_rate": 1.7359347754673737e-07, "loss": 0.0856, "step": 53811 }, { "epoch": 0.9529856065737802, "grad_norm": 0.4644482731819153, "learning_rate": 1.7346298753245836e-07, "loss": 0.0488, "step": 53812 }, { "epoch": 0.9530033161108086, "grad_norm": 0.5016517043113708, "learning_rate": 1.7333254629598783e-07, "loss": 0.0596, "step": 53813 }, { "epoch": 0.953021025647837, "grad_norm": 0.6277070045471191, "learning_rate": 1.7320215383775208e-07, "loss": 0.0598, "step": 53814 }, { "epoch": 0.9530387351848655, "grad_norm": 0.733630359172821, "learning_rate": 1.730718101581824e-07, "loss": 0.0567, "step": 53815 }, { "epoch": 0.9530564447218939, "grad_norm": 0.550438404083252, "learning_rate": 1.7294151525770684e-07, "loss": 0.0391, "step": 53816 }, { "epoch": 0.9530741542589223, "grad_norm": 0.32924503087997437, "learning_rate": 1.7281126913675338e-07, "loss": 0.0424, "step": 53817 }, { "epoch": 0.9530918637959507, "grad_norm": 0.3245057761669159, "learning_rate": 1.7268107179575164e-07, "loss": 0.0291, "step": 53818 }, { "epoch": 0.9531095733329792, "grad_norm": 0.7205632328987122, "learning_rate": 1.7255092323512966e-07, "loss": 0.0713, "step": 53819 }, { "epoch": 0.9531272828700076, "grad_norm": 0.6645939946174622, "learning_rate": 1.724208234553154e-07, "loss": 0.064, "step": 53820 }, { "epoch": 0.953144992407036, "grad_norm": 0.6649988889694214, "learning_rate": 1.7229077245673685e-07, "loss": 0.0372, "step": 53821 }, { "epoch": 0.9531627019440644, "grad_norm": 0.8239068388938904, "learning_rate": 1.7216077023982201e-07, "loss": 0.0935, "step": 53822 }, { "epoch": 0.9531804114810929, "grad_norm": 0.4047757685184479, "learning_rate": 1.720308168049989e-07, "loss": 0.0342, "step": 53823 }, { "epoch": 0.9531981210181213, "grad_norm": 0.363132506608963, "learning_rate": 1.7190091215269378e-07, "loss": 0.0535, "step": 53824 }, { "epoch": 0.9532158305551497, "grad_norm": 0.5628295540809631, "learning_rate": 1.7177105628333467e-07, "loss": 0.0465, "step": 53825 }, { "epoch": 0.9532335400921781, "grad_norm": 0.8429918885231018, "learning_rate": 1.7164124919734957e-07, "loss": 0.0452, "step": 53826 }, { "epoch": 0.9532512496292066, "grad_norm": 0.593770444393158, "learning_rate": 1.7151149089516484e-07, "loss": 0.0303, "step": 53827 }, { "epoch": 0.953268959166235, "grad_norm": 0.3992845416069031, "learning_rate": 1.7138178137720838e-07, "loss": 0.0239, "step": 53828 }, { "epoch": 0.9532866687032634, "grad_norm": 0.4982379376888275, "learning_rate": 1.7125212064390493e-07, "loss": 0.0672, "step": 53829 }, { "epoch": 0.9533043782402919, "grad_norm": 0.5575075745582581, "learning_rate": 1.7112250869568246e-07, "loss": 0.0451, "step": 53830 }, { "epoch": 0.9533220877773203, "grad_norm": 0.16940607130527496, "learning_rate": 1.709929455329673e-07, "loss": 0.0291, "step": 53831 }, { "epoch": 0.9533397973143487, "grad_norm": 0.569575846195221, "learning_rate": 1.7086343115618574e-07, "loss": 0.0544, "step": 53832 }, { "epoch": 0.9533575068513771, "grad_norm": 0.5634685754776001, "learning_rate": 1.7073396556576248e-07, "loss": 0.029, "step": 53833 }, { "epoch": 0.9533752163884056, "grad_norm": 0.47905030846595764, "learning_rate": 1.706045487621255e-07, "loss": 0.0575, "step": 53834 }, { "epoch": 0.953392925925434, "grad_norm": 0.479022353887558, "learning_rate": 1.7047518074569945e-07, "loss": 0.0447, "step": 53835 }, { "epoch": 0.9534106354624624, "grad_norm": 0.20177702605724335, "learning_rate": 1.70345861516909e-07, "loss": 0.0385, "step": 53836 }, { "epoch": 0.9534283449994908, "grad_norm": 0.9228288531303406, "learning_rate": 1.7021659107618214e-07, "loss": 0.0865, "step": 53837 }, { "epoch": 0.9534460545365193, "grad_norm": 0.5385511517524719, "learning_rate": 1.7008736942394353e-07, "loss": 0.0398, "step": 53838 }, { "epoch": 0.9534637640735477, "grad_norm": 0.48090657591819763, "learning_rate": 1.6995819656061618e-07, "loss": 0.0341, "step": 53839 }, { "epoch": 0.9534814736105761, "grad_norm": 0.6799531579017639, "learning_rate": 1.6982907248662639e-07, "loss": 0.0413, "step": 53840 }, { "epoch": 0.9534991831476045, "grad_norm": 0.7427071928977966, "learning_rate": 1.696999972024005e-07, "loss": 0.053, "step": 53841 }, { "epoch": 0.953516892684633, "grad_norm": 0.6526899933815002, "learning_rate": 1.695709707083598e-07, "loss": 0.0462, "step": 53842 }, { "epoch": 0.9535346022216614, "grad_norm": 0.7060678005218506, "learning_rate": 1.6944199300493068e-07, "loss": 0.0386, "step": 53843 }, { "epoch": 0.9535523117586898, "grad_norm": 0.7852798700332642, "learning_rate": 1.6931306409253776e-07, "loss": 0.0484, "step": 53844 }, { "epoch": 0.9535700212957183, "grad_norm": 0.42343568801879883, "learning_rate": 1.6918418397160406e-07, "loss": 0.0687, "step": 53845 }, { "epoch": 0.9535877308327467, "grad_norm": 0.7680594325065613, "learning_rate": 1.690553526425559e-07, "loss": 0.051, "step": 53846 }, { "epoch": 0.9536054403697751, "grad_norm": 0.9882162809371948, "learning_rate": 1.6892657010581457e-07, "loss": 0.0691, "step": 53847 }, { "epoch": 0.9536231499068035, "grad_norm": 0.4993714392185211, "learning_rate": 1.6879783636180646e-07, "loss": 0.0467, "step": 53848 }, { "epoch": 0.953640859443832, "grad_norm": 0.6069727540016174, "learning_rate": 1.686691514109512e-07, "loss": 0.0759, "step": 53849 }, { "epoch": 0.9536585689808604, "grad_norm": 0.7600352764129639, "learning_rate": 1.6854051525367508e-07, "loss": 0.0577, "step": 53850 }, { "epoch": 0.9536762785178888, "grad_norm": 0.4106998145580292, "learning_rate": 1.6841192789040115e-07, "loss": 0.0414, "step": 53851 }, { "epoch": 0.9536939880549172, "grad_norm": 0.9331269860267639, "learning_rate": 1.6828338932155073e-07, "loss": 0.0733, "step": 53852 }, { "epoch": 0.9537116975919457, "grad_norm": 0.544277012348175, "learning_rate": 1.6815489954754848e-07, "loss": 0.0435, "step": 53853 }, { "epoch": 0.9537294071289741, "grad_norm": 0.7013797760009766, "learning_rate": 1.680264585688157e-07, "loss": 0.0767, "step": 53854 }, { "epoch": 0.9537471166660025, "grad_norm": 0.4024549722671509, "learning_rate": 1.678980663857771e-07, "loss": 0.0738, "step": 53855 }, { "epoch": 0.9537648262030309, "grad_norm": 0.6712486743927002, "learning_rate": 1.677697229988523e-07, "loss": 0.0628, "step": 53856 }, { "epoch": 0.9537825357400594, "grad_norm": 0.5586337447166443, "learning_rate": 1.6764142840846597e-07, "loss": 0.0614, "step": 53857 }, { "epoch": 0.9538002452770878, "grad_norm": 0.5130954384803772, "learning_rate": 1.6751318261503946e-07, "loss": 0.0588, "step": 53858 }, { "epoch": 0.9538179548141162, "grad_norm": 0.45763882994651794, "learning_rate": 1.6738498561899406e-07, "loss": 0.0561, "step": 53859 }, { "epoch": 0.9538356643511448, "grad_norm": 0.45718318223953247, "learning_rate": 1.6725683742075114e-07, "loss": 0.0695, "step": 53860 }, { "epoch": 0.9538533738881732, "grad_norm": 0.8901939392089844, "learning_rate": 1.6712873802073202e-07, "loss": 0.0769, "step": 53861 }, { "epoch": 0.9538710834252015, "grad_norm": 0.4220563769340515, "learning_rate": 1.6700068741936136e-07, "loss": 0.0773, "step": 53862 }, { "epoch": 0.95388879296223, "grad_norm": 0.4274069368839264, "learning_rate": 1.6687268561705716e-07, "loss": 0.0802, "step": 53863 }, { "epoch": 0.9539065024992585, "grad_norm": 0.6001713275909424, "learning_rate": 1.6674473261424072e-07, "loss": 0.0464, "step": 53864 }, { "epoch": 0.9539242120362869, "grad_norm": 0.2398863583803177, "learning_rate": 1.6661682841133508e-07, "loss": 0.0426, "step": 53865 }, { "epoch": 0.9539419215733153, "grad_norm": 0.2687094211578369, "learning_rate": 1.6648897300875987e-07, "loss": 0.0385, "step": 53866 }, { "epoch": 0.9539596311103437, "grad_norm": 0.5565531849861145, "learning_rate": 1.6636116640693478e-07, "loss": 0.0721, "step": 53867 }, { "epoch": 0.9539773406473722, "grad_norm": 0.10564904659986496, "learning_rate": 1.6623340860628112e-07, "loss": 0.0364, "step": 53868 }, { "epoch": 0.9539950501844006, "grad_norm": 0.49427974224090576, "learning_rate": 1.6610569960722022e-07, "loss": 0.0309, "step": 53869 }, { "epoch": 0.954012759721429, "grad_norm": 0.5545613765716553, "learning_rate": 1.6597803941017008e-07, "loss": 0.0612, "step": 53870 }, { "epoch": 0.9540304692584574, "grad_norm": 0.7803744673728943, "learning_rate": 1.6585042801555206e-07, "loss": 0.0681, "step": 53871 }, { "epoch": 0.9540481787954859, "grad_norm": 0.6110813021659851, "learning_rate": 1.657228654237858e-07, "loss": 0.0389, "step": 53872 }, { "epoch": 0.9540658883325143, "grad_norm": 0.6326417326927185, "learning_rate": 1.6559535163529093e-07, "loss": 0.0454, "step": 53873 }, { "epoch": 0.9540835978695427, "grad_norm": 0.44146063923835754, "learning_rate": 1.6546788665048718e-07, "loss": 0.031, "step": 53874 }, { "epoch": 0.9541013074065712, "grad_norm": 0.4744402766227722, "learning_rate": 1.653404704697925e-07, "loss": 0.0274, "step": 53875 }, { "epoch": 0.9541190169435996, "grad_norm": 0.5862691402435303, "learning_rate": 1.6521310309362993e-07, "loss": 0.0463, "step": 53876 }, { "epoch": 0.954136726480628, "grad_norm": 0.4209764301776886, "learning_rate": 1.6508578452241407e-07, "loss": 0.0279, "step": 53877 }, { "epoch": 0.9541544360176564, "grad_norm": 0.7927573919296265, "learning_rate": 1.6495851475656464e-07, "loss": 0.0648, "step": 53878 }, { "epoch": 0.9541721455546849, "grad_norm": 0.5161710977554321, "learning_rate": 1.6483129379650296e-07, "loss": 0.0508, "step": 53879 }, { "epoch": 0.9541898550917133, "grad_norm": 0.6630352139472961, "learning_rate": 1.6470412164264536e-07, "loss": 0.0626, "step": 53880 }, { "epoch": 0.9542075646287417, "grad_norm": 0.5431767702102661, "learning_rate": 1.6457699829540983e-07, "loss": 0.0327, "step": 53881 }, { "epoch": 0.9542252741657701, "grad_norm": 0.28227904438972473, "learning_rate": 1.644499237552144e-07, "loss": 0.0464, "step": 53882 }, { "epoch": 0.9542429837027986, "grad_norm": 0.4879455864429474, "learning_rate": 1.6432289802248034e-07, "loss": 0.0611, "step": 53883 }, { "epoch": 0.954260693239827, "grad_norm": 0.3115696310997009, "learning_rate": 1.6419592109762404e-07, "loss": 0.0476, "step": 53884 }, { "epoch": 0.9542784027768554, "grad_norm": 0.42203792929649353, "learning_rate": 1.6406899298106014e-07, "loss": 0.0681, "step": 53885 }, { "epoch": 0.9542961123138838, "grad_norm": 0.5462269186973572, "learning_rate": 1.6394211367321e-07, "loss": 0.0479, "step": 53886 }, { "epoch": 0.9543138218509123, "grad_norm": 0.40584439039230347, "learning_rate": 1.6381528317449158e-07, "loss": 0.0348, "step": 53887 }, { "epoch": 0.9543315313879407, "grad_norm": 0.6055155396461487, "learning_rate": 1.636885014853179e-07, "loss": 0.0574, "step": 53888 }, { "epoch": 0.9543492409249691, "grad_norm": 0.6250322461128235, "learning_rate": 1.635617686061086e-07, "loss": 0.0468, "step": 53889 }, { "epoch": 0.9543669504619976, "grad_norm": 0.9507407546043396, "learning_rate": 1.6343508453728006e-07, "loss": 0.0547, "step": 53890 }, { "epoch": 0.954384659999026, "grad_norm": 0.3195367753505707, "learning_rate": 1.6330844927925026e-07, "loss": 0.0462, "step": 53891 }, { "epoch": 0.9544023695360544, "grad_norm": 0.46378564834594727, "learning_rate": 1.631818628324355e-07, "loss": 0.0718, "step": 53892 }, { "epoch": 0.9544200790730828, "grad_norm": 0.561353862285614, "learning_rate": 1.6305532519725054e-07, "loss": 0.0345, "step": 53893 }, { "epoch": 0.9544377886101113, "grad_norm": 0.49624374508857727, "learning_rate": 1.6292883637411492e-07, "loss": 0.0459, "step": 53894 }, { "epoch": 0.9544554981471397, "grad_norm": 0.3240104615688324, "learning_rate": 1.6280239636344006e-07, "loss": 0.0526, "step": 53895 }, { "epoch": 0.9544732076841681, "grad_norm": 0.9531935453414917, "learning_rate": 1.6267600516564728e-07, "loss": 0.0602, "step": 53896 }, { "epoch": 0.9544909172211965, "grad_norm": 0.3410671651363373, "learning_rate": 1.6254966278114792e-07, "loss": 0.0612, "step": 53897 }, { "epoch": 0.954508626758225, "grad_norm": 0.4590894281864166, "learning_rate": 1.6242336921035993e-07, "loss": 0.0543, "step": 53898 }, { "epoch": 0.9545263362952534, "grad_norm": 0.7355741262435913, "learning_rate": 1.6229712445369804e-07, "loss": 0.0434, "step": 53899 }, { "epoch": 0.9545440458322818, "grad_norm": 0.6161550879478455, "learning_rate": 1.6217092851157855e-07, "loss": 0.0515, "step": 53900 }, { "epoch": 0.9545617553693102, "grad_norm": 0.5895287394523621, "learning_rate": 1.6204478138441613e-07, "loss": 0.0643, "step": 53901 }, { "epoch": 0.9545794649063387, "grad_norm": 0.8924232125282288, "learning_rate": 1.619186830726238e-07, "loss": 0.0835, "step": 53902 }, { "epoch": 0.9545971744433671, "grad_norm": 0.4794312119483948, "learning_rate": 1.6179263357661955e-07, "loss": 0.0624, "step": 53903 }, { "epoch": 0.9546148839803955, "grad_norm": 0.4474903643131256, "learning_rate": 1.6166663289681805e-07, "loss": 0.0406, "step": 53904 }, { "epoch": 0.954632593517424, "grad_norm": 0.5197781920433044, "learning_rate": 1.6154068103363063e-07, "loss": 0.0563, "step": 53905 }, { "epoch": 0.9546503030544524, "grad_norm": 0.7896189093589783, "learning_rate": 1.614147779874736e-07, "loss": 0.0879, "step": 53906 }, { "epoch": 0.9546680125914808, "grad_norm": 0.6719802021980286, "learning_rate": 1.612889237587617e-07, "loss": 0.0571, "step": 53907 }, { "epoch": 0.9546857221285092, "grad_norm": 0.499286949634552, "learning_rate": 1.6116311834790952e-07, "loss": 0.0557, "step": 53908 }, { "epoch": 0.9547034316655377, "grad_norm": 0.6988208889961243, "learning_rate": 1.6103736175532845e-07, "loss": 0.0486, "step": 53909 }, { "epoch": 0.9547211412025661, "grad_norm": 0.3211759030818939, "learning_rate": 1.6091165398143314e-07, "loss": 0.0273, "step": 53910 }, { "epoch": 0.9547388507395945, "grad_norm": 0.8091603517532349, "learning_rate": 1.6078599502663826e-07, "loss": 0.0857, "step": 53911 }, { "epoch": 0.9547565602766229, "grad_norm": 0.3838993012905121, "learning_rate": 1.606603848913568e-07, "loss": 0.0535, "step": 53912 }, { "epoch": 0.9547742698136514, "grad_norm": 0.7254442572593689, "learning_rate": 1.6053482357600013e-07, "loss": 0.0345, "step": 53913 }, { "epoch": 0.9547919793506798, "grad_norm": 0.6133832931518555, "learning_rate": 1.6040931108098456e-07, "loss": 0.0633, "step": 53914 }, { "epoch": 0.9548096888877082, "grad_norm": 0.5830919146537781, "learning_rate": 1.6028384740672307e-07, "loss": 0.0525, "step": 53915 }, { "epoch": 0.9548273984247366, "grad_norm": 0.8011120557785034, "learning_rate": 1.6015843255362373e-07, "loss": 0.0626, "step": 53916 }, { "epoch": 0.9548451079617651, "grad_norm": 0.3322025537490845, "learning_rate": 1.600330665221028e-07, "loss": 0.0415, "step": 53917 }, { "epoch": 0.9548628174987935, "grad_norm": 0.4314223527908325, "learning_rate": 1.5990774931257334e-07, "loss": 0.0417, "step": 53918 }, { "epoch": 0.9548805270358219, "grad_norm": 0.4654703736305237, "learning_rate": 1.59782480925445e-07, "loss": 0.0542, "step": 53919 }, { "epoch": 0.9548982365728504, "grad_norm": 0.5677297711372375, "learning_rate": 1.5965726136113245e-07, "loss": 0.0495, "step": 53920 }, { "epoch": 0.9549159461098788, "grad_norm": 0.5197263956069946, "learning_rate": 1.5953209062004536e-07, "loss": 0.0491, "step": 53921 }, { "epoch": 0.9549336556469072, "grad_norm": 0.7233276963233948, "learning_rate": 1.594069687025984e-07, "loss": 0.0505, "step": 53922 }, { "epoch": 0.9549513651839356, "grad_norm": 0.568585991859436, "learning_rate": 1.592818956091996e-07, "loss": 0.0552, "step": 53923 }, { "epoch": 0.9549690747209642, "grad_norm": 0.3762344419956207, "learning_rate": 1.5915687134026357e-07, "loss": 0.0446, "step": 53924 }, { "epoch": 0.9549867842579925, "grad_norm": 0.7683997750282288, "learning_rate": 1.5903189589620004e-07, "loss": 0.061, "step": 53925 }, { "epoch": 0.955004493795021, "grad_norm": 0.801895797252655, "learning_rate": 1.589069692774203e-07, "loss": 0.0428, "step": 53926 }, { "epoch": 0.9550222033320493, "grad_norm": 0.7646582126617432, "learning_rate": 1.5878209148433575e-07, "loss": 0.0775, "step": 53927 }, { "epoch": 0.9550399128690779, "grad_norm": 0.42679452896118164, "learning_rate": 1.5865726251735768e-07, "loss": 0.0455, "step": 53928 }, { "epoch": 0.9550576224061063, "grad_norm": 0.2714958190917969, "learning_rate": 1.585324823768941e-07, "loss": 0.0475, "step": 53929 }, { "epoch": 0.9550753319431347, "grad_norm": 0.7060483694076538, "learning_rate": 1.584077510633597e-07, "loss": 0.0725, "step": 53930 }, { "epoch": 0.955093041480163, "grad_norm": 0.7007304430007935, "learning_rate": 1.5828306857716078e-07, "loss": 0.0804, "step": 53931 }, { "epoch": 0.9551107510171916, "grad_norm": 0.8490713834762573, "learning_rate": 1.581584349187104e-07, "loss": 0.0678, "step": 53932 }, { "epoch": 0.95512846055422, "grad_norm": 0.2205817997455597, "learning_rate": 1.580338500884182e-07, "loss": 0.0455, "step": 53933 }, { "epoch": 0.9551461700912484, "grad_norm": 0.44473204016685486, "learning_rate": 1.5790931408669217e-07, "loss": 0.0606, "step": 53934 }, { "epoch": 0.9551638796282769, "grad_norm": 0.5933470129966736, "learning_rate": 1.577848269139437e-07, "loss": 0.0419, "step": 53935 }, { "epoch": 0.9551815891653053, "grad_norm": 1.0977014303207397, "learning_rate": 1.5766038857058408e-07, "loss": 0.0892, "step": 53936 }, { "epoch": 0.9551992987023337, "grad_norm": 0.5597808361053467, "learning_rate": 1.5753599905701798e-07, "loss": 0.0332, "step": 53937 }, { "epoch": 0.9552170082393621, "grad_norm": 0.6241452693939209, "learning_rate": 1.5741165837365846e-07, "loss": 0.0609, "step": 53938 }, { "epoch": 0.9552347177763906, "grad_norm": 0.7398422956466675, "learning_rate": 1.572873665209118e-07, "loss": 0.0497, "step": 53939 }, { "epoch": 0.955252427313419, "grad_norm": 0.8583515882492065, "learning_rate": 1.5716312349918938e-07, "loss": 0.038, "step": 53940 }, { "epoch": 0.9552701368504474, "grad_norm": 0.23488864302635193, "learning_rate": 1.570389293088992e-07, "loss": 0.0397, "step": 53941 }, { "epoch": 0.9552878463874758, "grad_norm": 0.5124145746231079, "learning_rate": 1.5691478395044922e-07, "loss": 0.0549, "step": 53942 }, { "epoch": 0.9553055559245043, "grad_norm": 0.26916036009788513, "learning_rate": 1.5679068742424918e-07, "loss": 0.035, "step": 53943 }, { "epoch": 0.9553232654615327, "grad_norm": 0.5143234133720398, "learning_rate": 1.5666663973070705e-07, "loss": 0.0383, "step": 53944 }, { "epoch": 0.9553409749985611, "grad_norm": 0.9826094508171082, "learning_rate": 1.5654264087022917e-07, "loss": 0.0621, "step": 53945 }, { "epoch": 0.9553586845355895, "grad_norm": 0.7402482628822327, "learning_rate": 1.5641869084322358e-07, "loss": 0.0564, "step": 53946 }, { "epoch": 0.955376394072618, "grad_norm": 1.0147755146026611, "learning_rate": 1.5629478965010159e-07, "loss": 0.0807, "step": 53947 }, { "epoch": 0.9553941036096464, "grad_norm": 0.598791241645813, "learning_rate": 1.561709372912662e-07, "loss": 0.0528, "step": 53948 }, { "epoch": 0.9554118131466748, "grad_norm": 0.7185289859771729, "learning_rate": 1.560471337671271e-07, "loss": 0.0621, "step": 53949 }, { "epoch": 0.9554295226837033, "grad_norm": 0.3023046851158142, "learning_rate": 1.559233790780923e-07, "loss": 0.0462, "step": 53950 }, { "epoch": 0.9554472322207317, "grad_norm": 0.7255122065544128, "learning_rate": 1.5579967322456978e-07, "loss": 0.0839, "step": 53951 }, { "epoch": 0.9554649417577601, "grad_norm": 0.5706227421760559, "learning_rate": 1.5567601620696424e-07, "loss": 0.0377, "step": 53952 }, { "epoch": 0.9554826512947885, "grad_norm": 0.8306478261947632, "learning_rate": 1.5555240802568205e-07, "loss": 0.0404, "step": 53953 }, { "epoch": 0.955500360831817, "grad_norm": 0.5173287391662598, "learning_rate": 1.5542884868113115e-07, "loss": 0.0704, "step": 53954 }, { "epoch": 0.9555180703688454, "grad_norm": 0.2868915796279907, "learning_rate": 1.5530533817371794e-07, "loss": 0.0391, "step": 53955 }, { "epoch": 0.9555357799058738, "grad_norm": 0.4974358081817627, "learning_rate": 1.551818765038504e-07, "loss": 0.0478, "step": 53956 }, { "epoch": 0.9555534894429022, "grad_norm": 0.721062421798706, "learning_rate": 1.5505846367193154e-07, "loss": 0.0548, "step": 53957 }, { "epoch": 0.9555711989799307, "grad_norm": 0.43108034133911133, "learning_rate": 1.5493509967836772e-07, "loss": 0.0495, "step": 53958 }, { "epoch": 0.9555889085169591, "grad_norm": 0.29066669940948486, "learning_rate": 1.5481178452356693e-07, "loss": 0.0452, "step": 53959 }, { "epoch": 0.9556066180539875, "grad_norm": 0.6474660634994507, "learning_rate": 1.5468851820793384e-07, "loss": 0.063, "step": 53960 }, { "epoch": 0.9556243275910159, "grad_norm": 0.6197543144226074, "learning_rate": 1.545653007318748e-07, "loss": 0.0447, "step": 53961 }, { "epoch": 0.9556420371280444, "grad_norm": 0.4536674916744232, "learning_rate": 1.544421320957945e-07, "loss": 0.0391, "step": 53962 }, { "epoch": 0.9556597466650728, "grad_norm": 0.5103203058242798, "learning_rate": 1.543190123000976e-07, "loss": 0.0739, "step": 53963 }, { "epoch": 0.9556774562021012, "grad_norm": 0.41475802659988403, "learning_rate": 1.5419594134518878e-07, "loss": 0.0441, "step": 53964 }, { "epoch": 0.9556951657391297, "grad_norm": 0.5712601542472839, "learning_rate": 1.5407291923147436e-07, "loss": 0.051, "step": 53965 }, { "epoch": 0.9557128752761581, "grad_norm": 0.7645696401596069, "learning_rate": 1.5394994595935908e-07, "loss": 0.0679, "step": 53966 }, { "epoch": 0.9557305848131865, "grad_norm": 0.35960087180137634, "learning_rate": 1.538270215292459e-07, "loss": 0.0519, "step": 53967 }, { "epoch": 0.9557482943502149, "grad_norm": 0.5663022398948669, "learning_rate": 1.5370414594154113e-07, "loss": 0.0429, "step": 53968 }, { "epoch": 0.9557660038872434, "grad_norm": 0.638030469417572, "learning_rate": 1.5358131919664619e-07, "loss": 0.052, "step": 53969 }, { "epoch": 0.9557837134242718, "grad_norm": 0.5115770697593689, "learning_rate": 1.5345854129496906e-07, "loss": 0.0514, "step": 53970 }, { "epoch": 0.9558014229613002, "grad_norm": 0.2741702198982239, "learning_rate": 1.5333581223691106e-07, "loss": 0.0422, "step": 53971 }, { "epoch": 0.9558191324983286, "grad_norm": 0.24911849200725555, "learning_rate": 1.532131320228769e-07, "loss": 0.0534, "step": 53972 }, { "epoch": 0.9558368420353571, "grad_norm": 0.7125676274299622, "learning_rate": 1.5309050065326957e-07, "loss": 0.0394, "step": 53973 }, { "epoch": 0.9558545515723855, "grad_norm": 0.643102765083313, "learning_rate": 1.5296791812849208e-07, "loss": 0.0434, "step": 53974 }, { "epoch": 0.9558722611094139, "grad_norm": 0.7921132445335388, "learning_rate": 1.528453844489508e-07, "loss": 0.0458, "step": 53975 }, { "epoch": 0.9558899706464423, "grad_norm": 0.5956748723983765, "learning_rate": 1.5272289961504538e-07, "loss": 0.0443, "step": 53976 }, { "epoch": 0.9559076801834708, "grad_norm": 0.2576507329940796, "learning_rate": 1.5260046362717717e-07, "loss": 0.0385, "step": 53977 }, { "epoch": 0.9559253897204992, "grad_norm": 0.9560567736625671, "learning_rate": 1.5247807648575419e-07, "loss": 0.0579, "step": 53978 }, { "epoch": 0.9559430992575276, "grad_norm": 0.5265175700187683, "learning_rate": 1.5235573819117777e-07, "loss": 0.0604, "step": 53979 }, { "epoch": 0.9559608087945561, "grad_norm": 0.914020836353302, "learning_rate": 1.522334487438476e-07, "loss": 0.0597, "step": 53980 }, { "epoch": 0.9559785183315845, "grad_norm": 0.7470394968986511, "learning_rate": 1.521112081441667e-07, "loss": 0.09, "step": 53981 }, { "epoch": 0.9559962278686129, "grad_norm": 0.3388148546218872, "learning_rate": 1.5198901639253971e-07, "loss": 0.0308, "step": 53982 }, { "epoch": 0.9560139374056413, "grad_norm": 0.6903115510940552, "learning_rate": 1.51866873489368e-07, "loss": 0.077, "step": 53983 }, { "epoch": 0.9560316469426698, "grad_norm": 0.40590813755989075, "learning_rate": 1.5174477943505127e-07, "loss": 0.0276, "step": 53984 }, { "epoch": 0.9560493564796982, "grad_norm": 0.25799334049224854, "learning_rate": 1.5162273422999085e-07, "loss": 0.0444, "step": 53985 }, { "epoch": 0.9560670660167266, "grad_norm": 0.8397407531738281, "learning_rate": 1.515007378745914e-07, "loss": 0.0621, "step": 53986 }, { "epoch": 0.956084775553755, "grad_norm": 0.4321483373641968, "learning_rate": 1.513787903692526e-07, "loss": 0.0257, "step": 53987 }, { "epoch": 0.9561024850907835, "grad_norm": 0.8170979022979736, "learning_rate": 1.5125689171437585e-07, "loss": 0.079, "step": 53988 }, { "epoch": 0.956120194627812, "grad_norm": 0.9532256722450256, "learning_rate": 1.5113504191036242e-07, "loss": 0.0544, "step": 53989 }, { "epoch": 0.9561379041648403, "grad_norm": 0.4700666069984436, "learning_rate": 1.5101324095761204e-07, "loss": 0.0453, "step": 53990 }, { "epoch": 0.9561556137018687, "grad_norm": 0.7585503458976746, "learning_rate": 1.5089148885652604e-07, "loss": 0.0428, "step": 53991 }, { "epoch": 0.9561733232388973, "grad_norm": 0.6793407797813416, "learning_rate": 1.5076978560750575e-07, "loss": 0.0835, "step": 53992 }, { "epoch": 0.9561910327759257, "grad_norm": 0.609874427318573, "learning_rate": 1.5064813121095256e-07, "loss": 0.0429, "step": 53993 }, { "epoch": 0.956208742312954, "grad_norm": 0.6661333441734314, "learning_rate": 1.5052652566726278e-07, "loss": 0.0511, "step": 53994 }, { "epoch": 0.9562264518499826, "grad_norm": 0.7467592358589172, "learning_rate": 1.5040496897683942e-07, "loss": 0.076, "step": 53995 }, { "epoch": 0.956244161387011, "grad_norm": 0.7282138466835022, "learning_rate": 1.502834611400805e-07, "loss": 0.0517, "step": 53996 }, { "epoch": 0.9562618709240394, "grad_norm": 0.5056408643722534, "learning_rate": 1.5016200215738907e-07, "loss": 0.0534, "step": 53997 }, { "epoch": 0.9562795804610678, "grad_norm": 0.4878973960876465, "learning_rate": 1.5004059202916143e-07, "loss": 0.0467, "step": 53998 }, { "epoch": 0.9562972899980963, "grad_norm": 0.7525886297225952, "learning_rate": 1.4991923075579894e-07, "loss": 0.0443, "step": 53999 }, { "epoch": 0.9563149995351247, "grad_norm": 0.9344980120658875, "learning_rate": 1.4979791833769962e-07, "loss": 0.0708, "step": 54000 }, { "epoch": 0.9563327090721531, "grad_norm": 0.7810451984405518, "learning_rate": 1.4967665477526314e-07, "loss": 0.0446, "step": 54001 }, { "epoch": 0.9563504186091815, "grad_norm": 0.569889485836029, "learning_rate": 1.4955544006888922e-07, "loss": 0.0646, "step": 54002 }, { "epoch": 0.95636812814621, "grad_norm": 0.5640438199043274, "learning_rate": 1.4943427421897416e-07, "loss": 0.0517, "step": 54003 }, { "epoch": 0.9563858376832384, "grad_norm": 0.601296603679657, "learning_rate": 1.49313157225921e-07, "loss": 0.0599, "step": 54004 }, { "epoch": 0.9564035472202668, "grad_norm": 0.45377451181411743, "learning_rate": 1.491920890901244e-07, "loss": 0.0647, "step": 54005 }, { "epoch": 0.9564212567572952, "grad_norm": 0.8076468110084534, "learning_rate": 1.4907106981198236e-07, "loss": 0.0784, "step": 54006 }, { "epoch": 0.9564389662943237, "grad_norm": 0.7323328256607056, "learning_rate": 1.489500993918963e-07, "loss": 0.0863, "step": 54007 }, { "epoch": 0.9564566758313521, "grad_norm": 0.8162305951118469, "learning_rate": 1.4882917783026084e-07, "loss": 0.0521, "step": 54008 }, { "epoch": 0.9564743853683805, "grad_norm": 0.5957688093185425, "learning_rate": 1.4870830512747568e-07, "loss": 0.0469, "step": 54009 }, { "epoch": 0.956492094905409, "grad_norm": 0.5612189173698425, "learning_rate": 1.4858748128393717e-07, "loss": 0.0637, "step": 54010 }, { "epoch": 0.9565098044424374, "grad_norm": 0.45307081937789917, "learning_rate": 1.4846670630004667e-07, "loss": 0.055, "step": 54011 }, { "epoch": 0.9565275139794658, "grad_norm": 0.7962256669998169, "learning_rate": 1.4834598017619715e-07, "loss": 0.0524, "step": 54012 }, { "epoch": 0.9565452235164942, "grad_norm": 0.22474455833435059, "learning_rate": 1.4822530291278502e-07, "loss": 0.0603, "step": 54013 }, { "epoch": 0.9565629330535227, "grad_norm": 0.662650465965271, "learning_rate": 1.4810467451021326e-07, "loss": 0.045, "step": 54014 }, { "epoch": 0.9565806425905511, "grad_norm": 0.3855744004249573, "learning_rate": 1.4798409496887155e-07, "loss": 0.0462, "step": 54015 }, { "epoch": 0.9565983521275795, "grad_norm": 1.0391337871551514, "learning_rate": 1.4786356428916126e-07, "loss": 0.0471, "step": 54016 }, { "epoch": 0.9566160616646079, "grad_norm": 0.6151096224784851, "learning_rate": 1.4774308247147705e-07, "loss": 0.0561, "step": 54017 }, { "epoch": 0.9566337712016364, "grad_norm": 0.6932613253593445, "learning_rate": 1.4762264951621695e-07, "loss": 0.0554, "step": 54018 }, { "epoch": 0.9566514807386648, "grad_norm": 0.6923483610153198, "learning_rate": 1.4750226542377564e-07, "loss": 0.0637, "step": 54019 }, { "epoch": 0.9566691902756932, "grad_norm": 0.6582266688346863, "learning_rate": 1.473819301945495e-07, "loss": 0.0585, "step": 54020 }, { "epoch": 0.9566868998127216, "grad_norm": 0.43006470799446106, "learning_rate": 1.472616438289348e-07, "loss": 0.0325, "step": 54021 }, { "epoch": 0.9567046093497501, "grad_norm": 1.2788214683532715, "learning_rate": 1.4714140632732797e-07, "loss": 0.0952, "step": 54022 }, { "epoch": 0.9567223188867785, "grad_norm": 0.8250582218170166, "learning_rate": 1.47021217690122e-07, "loss": 0.0505, "step": 54023 }, { "epoch": 0.9567400284238069, "grad_norm": 0.6007468700408936, "learning_rate": 1.469010779177149e-07, "loss": 0.0581, "step": 54024 }, { "epoch": 0.9567577379608354, "grad_norm": 0.5588834881782532, "learning_rate": 1.4678098701050136e-07, "loss": 0.053, "step": 54025 }, { "epoch": 0.9567754474978638, "grad_norm": 0.5721551775932312, "learning_rate": 1.4666094496887438e-07, "loss": 0.0572, "step": 54026 }, { "epoch": 0.9567931570348922, "grad_norm": 0.5853538513183594, "learning_rate": 1.46540951793232e-07, "loss": 0.0469, "step": 54027 }, { "epoch": 0.9568108665719206, "grad_norm": 1.1435495615005493, "learning_rate": 1.4642100748396724e-07, "loss": 0.0702, "step": 54028 }, { "epoch": 0.9568285761089491, "grad_norm": 0.6039490103721619, "learning_rate": 1.4630111204147477e-07, "loss": 0.0418, "step": 54029 }, { "epoch": 0.9568462856459775, "grad_norm": 0.6463648676872253, "learning_rate": 1.461812654661493e-07, "loss": 0.0414, "step": 54030 }, { "epoch": 0.9568639951830059, "grad_norm": 0.5886176228523254, "learning_rate": 1.4606146775838547e-07, "loss": 0.0494, "step": 54031 }, { "epoch": 0.9568817047200343, "grad_norm": 0.500408411026001, "learning_rate": 1.4594171891857632e-07, "loss": 0.0614, "step": 54032 }, { "epoch": 0.9568994142570628, "grad_norm": 0.6750918626785278, "learning_rate": 1.458220189471182e-07, "loss": 0.0542, "step": 54033 }, { "epoch": 0.9569171237940912, "grad_norm": 0.4963870048522949, "learning_rate": 1.4570236784440084e-07, "loss": 0.0495, "step": 54034 }, { "epoch": 0.9569348333311196, "grad_norm": 0.6861589550971985, "learning_rate": 1.455827656108205e-07, "loss": 0.0567, "step": 54035 }, { "epoch": 0.956952542868148, "grad_norm": 0.4222205877304077, "learning_rate": 1.4546321224677194e-07, "loss": 0.0552, "step": 54036 }, { "epoch": 0.9569702524051765, "grad_norm": 0.5295131206512451, "learning_rate": 1.4534370775264648e-07, "loss": 0.0364, "step": 54037 }, { "epoch": 0.9569879619422049, "grad_norm": 0.6297895908355713, "learning_rate": 1.4522425212883717e-07, "loss": 0.0406, "step": 54038 }, { "epoch": 0.9570056714792333, "grad_norm": 0.765943706035614, "learning_rate": 1.4510484537573865e-07, "loss": 0.0425, "step": 54039 }, { "epoch": 0.9570233810162618, "grad_norm": 0.49855563044548035, "learning_rate": 1.449854874937423e-07, "loss": 0.0357, "step": 54040 }, { "epoch": 0.9570410905532902, "grad_norm": 0.6011205315589905, "learning_rate": 1.4486617848324114e-07, "loss": 0.0486, "step": 54041 }, { "epoch": 0.9570588000903186, "grad_norm": 1.1654585599899292, "learning_rate": 1.447469183446265e-07, "loss": 0.0986, "step": 54042 }, { "epoch": 0.957076509627347, "grad_norm": 0.4958702623844147, "learning_rate": 1.4462770707829476e-07, "loss": 0.0522, "step": 54043 }, { "epoch": 0.9570942191643755, "grad_norm": 0.7556800842285156, "learning_rate": 1.4450854468463225e-07, "loss": 0.0873, "step": 54044 }, { "epoch": 0.9571119287014039, "grad_norm": 0.41362616419792175, "learning_rate": 1.4438943116403535e-07, "loss": 0.053, "step": 54045 }, { "epoch": 0.9571296382384323, "grad_norm": 0.4698972702026367, "learning_rate": 1.442703665168954e-07, "loss": 0.0383, "step": 54046 }, { "epoch": 0.9571473477754607, "grad_norm": 0.6494103074073792, "learning_rate": 1.4415135074360208e-07, "loss": 0.0672, "step": 54047 }, { "epoch": 0.9571650573124892, "grad_norm": 0.6238466501235962, "learning_rate": 1.440323838445501e-07, "loss": 0.0569, "step": 54048 }, { "epoch": 0.9571827668495176, "grad_norm": 0.6044312715530396, "learning_rate": 1.4391346582012744e-07, "loss": 0.0698, "step": 54049 }, { "epoch": 0.957200476386546, "grad_norm": 0.8428415656089783, "learning_rate": 1.4379459667072713e-07, "loss": 0.0482, "step": 54050 }, { "epoch": 0.9572181859235744, "grad_norm": 0.7189658284187317, "learning_rate": 1.4367577639674057e-07, "loss": 0.073, "step": 54051 }, { "epoch": 0.957235895460603, "grad_norm": 0.6625562906265259, "learning_rate": 1.435570049985574e-07, "loss": 0.07, "step": 54052 }, { "epoch": 0.9572536049976313, "grad_norm": 0.20916643738746643, "learning_rate": 1.43438282476569e-07, "loss": 0.0303, "step": 54053 }, { "epoch": 0.9572713145346597, "grad_norm": 0.5079460144042969, "learning_rate": 1.433196088311667e-07, "loss": 0.065, "step": 54054 }, { "epoch": 0.9572890240716883, "grad_norm": 0.3569742441177368, "learning_rate": 1.4320098406273852e-07, "loss": 0.0567, "step": 54055 }, { "epoch": 0.9573067336087167, "grad_norm": 0.5416486859321594, "learning_rate": 1.430824081716775e-07, "loss": 0.0595, "step": 54056 }, { "epoch": 0.957324443145745, "grad_norm": 0.2568753659725189, "learning_rate": 1.4296388115837332e-07, "loss": 0.034, "step": 54057 }, { "epoch": 0.9573421526827735, "grad_norm": 0.6035928726196289, "learning_rate": 1.428454030232157e-07, "loss": 0.0497, "step": 54058 }, { "epoch": 0.957359862219802, "grad_norm": 0.7247380018234253, "learning_rate": 1.427269737665926e-07, "loss": 0.0624, "step": 54059 }, { "epoch": 0.9573775717568304, "grad_norm": 0.7625588178634644, "learning_rate": 1.4260859338889541e-07, "loss": 0.0525, "step": 54060 }, { "epoch": 0.9573952812938588, "grad_norm": 0.3984747529029846, "learning_rate": 1.4249026189051384e-07, "loss": 0.0358, "step": 54061 }, { "epoch": 0.9574129908308872, "grad_norm": 0.7234870195388794, "learning_rate": 1.4237197927183588e-07, "loss": 0.0501, "step": 54062 }, { "epoch": 0.9574307003679157, "grad_norm": 0.5147953033447266, "learning_rate": 1.422537455332512e-07, "loss": 0.0425, "step": 54063 }, { "epoch": 0.9574484099049441, "grad_norm": 0.6959670186042786, "learning_rate": 1.4213556067514954e-07, "loss": 0.0584, "step": 54064 }, { "epoch": 0.9574661194419725, "grad_norm": 0.3917657732963562, "learning_rate": 1.420174246979189e-07, "loss": 0.0559, "step": 54065 }, { "epoch": 0.9574838289790009, "grad_norm": 0.5451606512069702, "learning_rate": 1.418993376019473e-07, "loss": 0.0341, "step": 54066 }, { "epoch": 0.9575015385160294, "grad_norm": 0.6974650621414185, "learning_rate": 1.417812993876244e-07, "loss": 0.0386, "step": 54067 }, { "epoch": 0.9575192480530578, "grad_norm": 0.561414361000061, "learning_rate": 1.4166331005533995e-07, "loss": 0.0478, "step": 54068 }, { "epoch": 0.9575369575900862, "grad_norm": 0.36669614911079407, "learning_rate": 1.4154536960547858e-07, "loss": 0.0728, "step": 54069 }, { "epoch": 0.9575546671271147, "grad_norm": 0.7241666316986084, "learning_rate": 1.414274780384317e-07, "loss": 0.0433, "step": 54070 }, { "epoch": 0.9575723766641431, "grad_norm": 1.0686087608337402, "learning_rate": 1.4130963535458397e-07, "loss": 0.0827, "step": 54071 }, { "epoch": 0.9575900862011715, "grad_norm": 0.5319087505340576, "learning_rate": 1.4119184155432509e-07, "loss": 0.0507, "step": 54072 }, { "epoch": 0.9576077957381999, "grad_norm": 0.5201557278633118, "learning_rate": 1.4107409663804137e-07, "loss": 0.0425, "step": 54073 }, { "epoch": 0.9576255052752284, "grad_norm": 0.2984943091869354, "learning_rate": 1.409564006061226e-07, "loss": 0.0125, "step": 54074 }, { "epoch": 0.9576432148122568, "grad_norm": 0.37827712297439575, "learning_rate": 1.4083875345895335e-07, "loss": 0.0437, "step": 54075 }, { "epoch": 0.9576609243492852, "grad_norm": 0.6143608689308167, "learning_rate": 1.4072115519692175e-07, "loss": 0.0563, "step": 54076 }, { "epoch": 0.9576786338863136, "grad_norm": 0.6087765097618103, "learning_rate": 1.4060360582041576e-07, "loss": 0.0476, "step": 54077 }, { "epoch": 0.9576963434233421, "grad_norm": 0.5337245464324951, "learning_rate": 1.4048610532982009e-07, "loss": 0.0359, "step": 54078 }, { "epoch": 0.9577140529603705, "grad_norm": 0.47417575120925903, "learning_rate": 1.4036865372552278e-07, "loss": 0.0479, "step": 54079 }, { "epoch": 0.9577317624973989, "grad_norm": 0.42387041449546814, "learning_rate": 1.4025125100790847e-07, "loss": 0.06, "step": 54080 }, { "epoch": 0.9577494720344273, "grad_norm": 0.4518044590950012, "learning_rate": 1.4013389717736523e-07, "loss": 0.0612, "step": 54081 }, { "epoch": 0.9577671815714558, "grad_norm": 0.8325488567352295, "learning_rate": 1.4001659223427775e-07, "loss": 0.0412, "step": 54082 }, { "epoch": 0.9577848911084842, "grad_norm": 0.8688989281654358, "learning_rate": 1.3989933617903238e-07, "loss": 0.06, "step": 54083 }, { "epoch": 0.9578026006455126, "grad_norm": 0.6035081744194031, "learning_rate": 1.3978212901201714e-07, "loss": 0.0784, "step": 54084 }, { "epoch": 0.9578203101825411, "grad_norm": 0.500249981880188, "learning_rate": 1.3966497073361505e-07, "loss": 0.0508, "step": 54085 }, { "epoch": 0.9578380197195695, "grad_norm": 0.6081519722938538, "learning_rate": 1.3954786134421082e-07, "loss": 0.0692, "step": 54086 }, { "epoch": 0.9578557292565979, "grad_norm": 0.48700952529907227, "learning_rate": 1.3943080084419246e-07, "loss": 0.0565, "step": 54087 }, { "epoch": 0.9578734387936263, "grad_norm": 0.5755921602249146, "learning_rate": 1.39313789233943e-07, "loss": 0.0537, "step": 54088 }, { "epoch": 0.9578911483306548, "grad_norm": 0.5186179280281067, "learning_rate": 1.391968265138488e-07, "loss": 0.0435, "step": 54089 }, { "epoch": 0.9579088578676832, "grad_norm": 1.173625111579895, "learning_rate": 1.3907991268429287e-07, "loss": 0.0909, "step": 54090 }, { "epoch": 0.9579265674047116, "grad_norm": 0.9918203949928284, "learning_rate": 1.389630477456616e-07, "loss": 0.0663, "step": 54091 }, { "epoch": 0.95794427694174, "grad_norm": 0.9542779326438904, "learning_rate": 1.3884623169833966e-07, "loss": 0.0698, "step": 54092 }, { "epoch": 0.9579619864787685, "grad_norm": 1.025520920753479, "learning_rate": 1.387294645427084e-07, "loss": 0.0899, "step": 54093 }, { "epoch": 0.9579796960157969, "grad_norm": 1.098532795906067, "learning_rate": 1.3861274627915588e-07, "loss": 0.0804, "step": 54094 }, { "epoch": 0.9579974055528253, "grad_norm": 0.6419084072113037, "learning_rate": 1.3849607690806343e-07, "loss": 0.0416, "step": 54095 }, { "epoch": 0.9580151150898537, "grad_norm": 0.9369722008705139, "learning_rate": 1.3837945642981743e-07, "loss": 0.0722, "step": 54096 }, { "epoch": 0.9580328246268822, "grad_norm": 0.3811973035335541, "learning_rate": 1.3826288484479755e-07, "loss": 0.0568, "step": 54097 }, { "epoch": 0.9580505341639106, "grad_norm": 0.6694692373275757, "learning_rate": 1.3814636215339016e-07, "loss": 0.0723, "step": 54098 }, { "epoch": 0.958068243700939, "grad_norm": 0.3037244975566864, "learning_rate": 1.380298883559783e-07, "loss": 0.0394, "step": 54099 }, { "epoch": 0.9580859532379675, "grad_norm": 0.6923321485519409, "learning_rate": 1.3791346345294498e-07, "loss": 0.051, "step": 54100 }, { "epoch": 0.9581036627749959, "grad_norm": 0.6512410044670105, "learning_rate": 1.3779708744467324e-07, "loss": 0.0537, "step": 54101 }, { "epoch": 0.9581213723120243, "grad_norm": 0.6987846493721008, "learning_rate": 1.376807603315461e-07, "loss": 0.0516, "step": 54102 }, { "epoch": 0.9581390818490527, "grad_norm": 0.5007713437080383, "learning_rate": 1.375644821139449e-07, "loss": 0.0604, "step": 54103 }, { "epoch": 0.9581567913860812, "grad_norm": 0.6391656994819641, "learning_rate": 1.3744825279225436e-07, "loss": 0.0549, "step": 54104 }, { "epoch": 0.9581745009231096, "grad_norm": 0.449595183134079, "learning_rate": 1.3733207236685584e-07, "loss": 0.0398, "step": 54105 }, { "epoch": 0.958192210460138, "grad_norm": 0.7089588046073914, "learning_rate": 1.372159408381307e-07, "loss": 0.0712, "step": 54106 }, { "epoch": 0.9582099199971664, "grad_norm": 0.8237382173538208, "learning_rate": 1.3709985820646364e-07, "loss": 0.0724, "step": 54107 }, { "epoch": 0.9582276295341949, "grad_norm": 0.5547512173652649, "learning_rate": 1.3698382447223267e-07, "loss": 0.0806, "step": 54108 }, { "epoch": 0.9582453390712233, "grad_norm": 1.1331199407577515, "learning_rate": 1.368678396358225e-07, "loss": 0.0778, "step": 54109 }, { "epoch": 0.9582630486082517, "grad_norm": 0.7863284945487976, "learning_rate": 1.3675190369761448e-07, "loss": 0.0342, "step": 54110 }, { "epoch": 0.9582807581452801, "grad_norm": 0.7586444616317749, "learning_rate": 1.366360166579883e-07, "loss": 0.0678, "step": 54111 }, { "epoch": 0.9582984676823086, "grad_norm": 0.96428382396698, "learning_rate": 1.36520178517327e-07, "loss": 0.0576, "step": 54112 }, { "epoch": 0.958316177219337, "grad_norm": 0.6177461743354797, "learning_rate": 1.3640438927601028e-07, "loss": 0.0508, "step": 54113 }, { "epoch": 0.9583338867563654, "grad_norm": 0.5708399415016174, "learning_rate": 1.3628864893441951e-07, "loss": 0.0408, "step": 54114 }, { "epoch": 0.958351596293394, "grad_norm": 0.38534924387931824, "learning_rate": 1.3617295749293767e-07, "loss": 0.0616, "step": 54115 }, { "epoch": 0.9583693058304223, "grad_norm": 0.6670891046524048, "learning_rate": 1.360573149519412e-07, "loss": 0.0452, "step": 54116 }, { "epoch": 0.9583870153674507, "grad_norm": 0.7998049259185791, "learning_rate": 1.359417213118147e-07, "loss": 0.0726, "step": 54117 }, { "epoch": 0.9584047249044791, "grad_norm": 0.4557029902935028, "learning_rate": 1.358261765729346e-07, "loss": 0.0574, "step": 54118 }, { "epoch": 0.9584224344415077, "grad_norm": 0.6026411652565002, "learning_rate": 1.3571068073568392e-07, "loss": 0.0489, "step": 54119 }, { "epoch": 0.958440143978536, "grad_norm": 0.30609479546546936, "learning_rate": 1.355952338004407e-07, "loss": 0.045, "step": 54120 }, { "epoch": 0.9584578535155645, "grad_norm": 0.5629124641418457, "learning_rate": 1.3547983576758626e-07, "loss": 0.044, "step": 54121 }, { "epoch": 0.9584755630525928, "grad_norm": 0.4575270414352417, "learning_rate": 1.3536448663750035e-07, "loss": 0.0346, "step": 54122 }, { "epoch": 0.9584932725896214, "grad_norm": 0.58229660987854, "learning_rate": 1.3524918641056095e-07, "loss": 0.0457, "step": 54123 }, { "epoch": 0.9585109821266498, "grad_norm": 0.6086667776107788, "learning_rate": 1.3513393508714945e-07, "loss": 0.0584, "step": 54124 }, { "epoch": 0.9585286916636782, "grad_norm": 0.41322383284568787, "learning_rate": 1.3501873266764387e-07, "loss": 0.0406, "step": 54125 }, { "epoch": 0.9585464012007067, "grad_norm": 0.41975468397140503, "learning_rate": 1.3490357915242224e-07, "loss": 0.0821, "step": 54126 }, { "epoch": 0.9585641107377351, "grad_norm": 0.7202336192131042, "learning_rate": 1.3478847454186426e-07, "loss": 0.0728, "step": 54127 }, { "epoch": 0.9585818202747635, "grad_norm": 0.6526439189910889, "learning_rate": 1.3467341883634965e-07, "loss": 0.0568, "step": 54128 }, { "epoch": 0.9585995298117919, "grad_norm": 0.9433062672615051, "learning_rate": 1.3455841203625475e-07, "loss": 0.0398, "step": 54129 }, { "epoch": 0.9586172393488204, "grad_norm": 0.5062025189399719, "learning_rate": 1.3444345414195924e-07, "loss": 0.0506, "step": 54130 }, { "epoch": 0.9586349488858488, "grad_norm": 0.8322847485542297, "learning_rate": 1.343285451538412e-07, "loss": 0.0684, "step": 54131 }, { "epoch": 0.9586526584228772, "grad_norm": 0.6305689811706543, "learning_rate": 1.342136850722786e-07, "loss": 0.0559, "step": 54132 }, { "epoch": 0.9586703679599056, "grad_norm": 0.5429587364196777, "learning_rate": 1.340988738976512e-07, "loss": 0.0759, "step": 54133 }, { "epoch": 0.9586880774969341, "grad_norm": 0.44331416487693787, "learning_rate": 1.3398411163033198e-07, "loss": 0.0365, "step": 54134 }, { "epoch": 0.9587057870339625, "grad_norm": 0.40192848443984985, "learning_rate": 1.3386939827070398e-07, "loss": 0.0342, "step": 54135 }, { "epoch": 0.9587234965709909, "grad_norm": 0.5197286009788513, "learning_rate": 1.3375473381914193e-07, "loss": 0.0704, "step": 54136 }, { "epoch": 0.9587412061080193, "grad_norm": 0.6346374750137329, "learning_rate": 1.3364011827602218e-07, "loss": 0.0603, "step": 54137 }, { "epoch": 0.9587589156450478, "grad_norm": 0.5610177516937256, "learning_rate": 1.3352555164172276e-07, "loss": 0.0679, "step": 54138 }, { "epoch": 0.9587766251820762, "grad_norm": 0.446842223405838, "learning_rate": 1.3341103391662003e-07, "loss": 0.0685, "step": 54139 }, { "epoch": 0.9587943347191046, "grad_norm": 0.7925527691841125, "learning_rate": 1.33296565101092e-07, "loss": 0.0458, "step": 54140 }, { "epoch": 0.9588120442561331, "grad_norm": 0.45005014538764954, "learning_rate": 1.3318214519551342e-07, "loss": 0.0525, "step": 54141 }, { "epoch": 0.9588297537931615, "grad_norm": 0.551476001739502, "learning_rate": 1.3306777420026396e-07, "loss": 0.0524, "step": 54142 }, { "epoch": 0.9588474633301899, "grad_norm": 0.6185634136199951, "learning_rate": 1.329534521157183e-07, "loss": 0.0498, "step": 54143 }, { "epoch": 0.9588651728672183, "grad_norm": 0.7388573884963989, "learning_rate": 1.328391789422495e-07, "loss": 0.0688, "step": 54144 }, { "epoch": 0.9588828824042468, "grad_norm": 0.7482065558433533, "learning_rate": 1.3272495468023727e-07, "loss": 0.0623, "step": 54145 }, { "epoch": 0.9589005919412752, "grad_norm": 0.9169151782989502, "learning_rate": 1.3261077933005794e-07, "loss": 0.0607, "step": 54146 }, { "epoch": 0.9589183014783036, "grad_norm": 0.464903861284256, "learning_rate": 1.324966528920829e-07, "loss": 0.0446, "step": 54147 }, { "epoch": 0.958936011015332, "grad_norm": 0.7816649675369263, "learning_rate": 1.323825753666902e-07, "loss": 0.0575, "step": 54148 }, { "epoch": 0.9589537205523605, "grad_norm": 0.6348491907119751, "learning_rate": 1.3226854675425616e-07, "loss": 0.0682, "step": 54149 }, { "epoch": 0.9589714300893889, "grad_norm": 0.6921451687812805, "learning_rate": 1.3215456705515383e-07, "loss": 0.0449, "step": 54150 }, { "epoch": 0.9589891396264173, "grad_norm": 0.91243976354599, "learning_rate": 1.320406362697596e-07, "loss": 0.0701, "step": 54151 }, { "epoch": 0.9590068491634457, "grad_norm": 0.44116494059562683, "learning_rate": 1.3192675439844816e-07, "loss": 0.049, "step": 54152 }, { "epoch": 0.9590245587004742, "grad_norm": 0.6640956997871399, "learning_rate": 1.318129214415942e-07, "loss": 0.0553, "step": 54153 }, { "epoch": 0.9590422682375026, "grad_norm": 0.6635775566101074, "learning_rate": 1.316991373995724e-07, "loss": 0.0915, "step": 54154 }, { "epoch": 0.959059977774531, "grad_norm": 0.39642417430877686, "learning_rate": 1.3158540227275583e-07, "loss": 0.0392, "step": 54155 }, { "epoch": 0.9590776873115595, "grad_norm": 0.5419033765792847, "learning_rate": 1.3147171606151919e-07, "loss": 0.0657, "step": 54156 }, { "epoch": 0.9590953968485879, "grad_norm": 0.49207982420921326, "learning_rate": 1.3135807876623717e-07, "loss": 0.0475, "step": 54157 }, { "epoch": 0.9591131063856163, "grad_norm": 0.9485709071159363, "learning_rate": 1.3124449038728282e-07, "loss": 0.0717, "step": 54158 }, { "epoch": 0.9591308159226447, "grad_norm": 0.6680274605751038, "learning_rate": 1.3113095092502913e-07, "loss": 0.0749, "step": 54159 }, { "epoch": 0.9591485254596732, "grad_norm": 0.5435264706611633, "learning_rate": 1.3101746037985252e-07, "loss": 0.0706, "step": 54160 }, { "epoch": 0.9591662349967016, "grad_norm": 0.6861501336097717, "learning_rate": 1.309040187521243e-07, "loss": 0.0503, "step": 54161 }, { "epoch": 0.95918394453373, "grad_norm": 0.38505834341049194, "learning_rate": 1.307906260422176e-07, "loss": 0.0422, "step": 54162 }, { "epoch": 0.9592016540707584, "grad_norm": 0.7043203711509705, "learning_rate": 1.3067728225050535e-07, "loss": 0.0649, "step": 54163 }, { "epoch": 0.9592193636077869, "grad_norm": 0.457112580537796, "learning_rate": 1.3056398737736063e-07, "loss": 0.0358, "step": 54164 }, { "epoch": 0.9592370731448153, "grad_norm": 0.5691918730735779, "learning_rate": 1.3045074142315815e-07, "loss": 0.0495, "step": 54165 }, { "epoch": 0.9592547826818437, "grad_norm": 0.5393441915512085, "learning_rate": 1.3033754438826762e-07, "loss": 0.0778, "step": 54166 }, { "epoch": 0.9592724922188721, "grad_norm": 0.5328322052955627, "learning_rate": 1.302243962730637e-07, "loss": 0.0667, "step": 54167 }, { "epoch": 0.9592902017559006, "grad_norm": 0.6364489793777466, "learning_rate": 1.3011129707791447e-07, "loss": 0.0675, "step": 54168 }, { "epoch": 0.959307911292929, "grad_norm": 0.8785656094551086, "learning_rate": 1.2999824680319795e-07, "loss": 0.0765, "step": 54169 }, { "epoch": 0.9593256208299574, "grad_norm": 0.8145707249641418, "learning_rate": 1.2988524544928215e-07, "loss": 0.053, "step": 54170 }, { "epoch": 0.9593433303669859, "grad_norm": 0.4404129683971405, "learning_rate": 1.2977229301654015e-07, "loss": 0.0564, "step": 54171 }, { "epoch": 0.9593610399040143, "grad_norm": 0.34446945786476135, "learning_rate": 1.296593895053433e-07, "loss": 0.033, "step": 54172 }, { "epoch": 0.9593787494410427, "grad_norm": 0.9406505823135376, "learning_rate": 1.2954653491606295e-07, "loss": 0.0838, "step": 54173 }, { "epoch": 0.9593964589780711, "grad_norm": 0.5940608978271484, "learning_rate": 1.2943372924907048e-07, "loss": 0.0534, "step": 54174 }, { "epoch": 0.9594141685150996, "grad_norm": 0.26974499225616455, "learning_rate": 1.293209725047373e-07, "loss": 0.0751, "step": 54175 }, { "epoch": 0.959431878052128, "grad_norm": 0.7818130850791931, "learning_rate": 1.2920826468343307e-07, "loss": 0.072, "step": 54176 }, { "epoch": 0.9594495875891564, "grad_norm": 0.45512375235557556, "learning_rate": 1.2909560578553081e-07, "loss": 0.0421, "step": 54177 }, { "epoch": 0.9594672971261848, "grad_norm": 0.4385887384414673, "learning_rate": 1.289829958113986e-07, "loss": 0.0678, "step": 54178 }, { "epoch": 0.9594850066632133, "grad_norm": 0.33047834038734436, "learning_rate": 1.2887043476140948e-07, "loss": 0.0385, "step": 54179 }, { "epoch": 0.9595027162002417, "grad_norm": 0.6995708346366882, "learning_rate": 1.2875792263593145e-07, "loss": 0.05, "step": 54180 }, { "epoch": 0.9595204257372701, "grad_norm": 0.5166359543800354, "learning_rate": 1.286454594353359e-07, "loss": 0.0663, "step": 54181 }, { "epoch": 0.9595381352742985, "grad_norm": 0.887066662311554, "learning_rate": 1.2853304515999254e-07, "loss": 0.0562, "step": 54182 }, { "epoch": 0.959555844811327, "grad_norm": 0.6610804200172424, "learning_rate": 1.2842067981027273e-07, "loss": 0.0511, "step": 54183 }, { "epoch": 0.9595735543483555, "grad_norm": 0.43934470415115356, "learning_rate": 1.2830836338654283e-07, "loss": 0.0762, "step": 54184 }, { "epoch": 0.9595912638853838, "grad_norm": 0.43817955255508423, "learning_rate": 1.2819609588917593e-07, "loss": 0.0357, "step": 54185 }, { "epoch": 0.9596089734224124, "grad_norm": 0.5045604705810547, "learning_rate": 1.2808387731853833e-07, "loss": 0.0613, "step": 54186 }, { "epoch": 0.9596266829594408, "grad_norm": 0.8840851783752441, "learning_rate": 1.2797170767500143e-07, "loss": 0.0556, "step": 54187 }, { "epoch": 0.9596443924964692, "grad_norm": 0.6731297969818115, "learning_rate": 1.278595869589333e-07, "loss": 0.0364, "step": 54188 }, { "epoch": 0.9596621020334976, "grad_norm": 0.6008096933364868, "learning_rate": 1.2774751517070194e-07, "loss": 0.0517, "step": 54189 }, { "epoch": 0.9596798115705261, "grad_norm": 0.41281166672706604, "learning_rate": 1.2763549231067705e-07, "loss": 0.0352, "step": 54190 }, { "epoch": 0.9596975211075545, "grad_norm": 0.5207260847091675, "learning_rate": 1.2752351837922837e-07, "loss": 0.0339, "step": 54191 }, { "epoch": 0.9597152306445829, "grad_norm": 0.4822312593460083, "learning_rate": 1.2741159337672225e-07, "loss": 0.0354, "step": 54192 }, { "epoch": 0.9597329401816113, "grad_norm": 0.6220794320106506, "learning_rate": 1.272997173035284e-07, "loss": 0.0432, "step": 54193 }, { "epoch": 0.9597506497186398, "grad_norm": 0.5046756863594055, "learning_rate": 1.271878901600132e-07, "loss": 0.0414, "step": 54194 }, { "epoch": 0.9597683592556682, "grad_norm": 0.6535106897354126, "learning_rate": 1.2707611194654635e-07, "loss": 0.0505, "step": 54195 }, { "epoch": 0.9597860687926966, "grad_norm": 0.3716464042663574, "learning_rate": 1.2696438266349587e-07, "loss": 0.0577, "step": 54196 }, { "epoch": 0.959803778329725, "grad_norm": 0.4939554035663605, "learning_rate": 1.2685270231122482e-07, "loss": 0.045, "step": 54197 }, { "epoch": 0.9598214878667535, "grad_norm": 0.5796768069267273, "learning_rate": 1.267410708901062e-07, "loss": 0.0658, "step": 54198 }, { "epoch": 0.9598391974037819, "grad_norm": 0.31094950437545776, "learning_rate": 1.2662948840050646e-07, "loss": 0.0665, "step": 54199 }, { "epoch": 0.9598569069408103, "grad_norm": 0.36326122283935547, "learning_rate": 1.265179548427886e-07, "loss": 0.0291, "step": 54200 }, { "epoch": 0.9598746164778388, "grad_norm": 0.8894774913787842, "learning_rate": 1.26406470217324e-07, "loss": 0.0894, "step": 54201 }, { "epoch": 0.9598923260148672, "grad_norm": 0.31059885025024414, "learning_rate": 1.2629503452447732e-07, "loss": 0.0328, "step": 54202 }, { "epoch": 0.9599100355518956, "grad_norm": 0.43417462706565857, "learning_rate": 1.2618364776461667e-07, "loss": 0.0505, "step": 54203 }, { "epoch": 0.959927745088924, "grad_norm": 0.753731369972229, "learning_rate": 1.260723099381067e-07, "loss": 0.0628, "step": 54204 }, { "epoch": 0.9599454546259525, "grad_norm": 0.7542871236801147, "learning_rate": 1.2596102104531382e-07, "loss": 0.0647, "step": 54205 }, { "epoch": 0.9599631641629809, "grad_norm": 0.7055186033248901, "learning_rate": 1.2584978108660439e-07, "loss": 0.0654, "step": 54206 }, { "epoch": 0.9599808737000093, "grad_norm": 0.4481116831302643, "learning_rate": 1.2573859006234478e-07, "loss": 0.0547, "step": 54207 }, { "epoch": 0.9599985832370377, "grad_norm": 0.6296294927597046, "learning_rate": 1.2562744797290138e-07, "loss": 0.0486, "step": 54208 }, { "epoch": 0.9600162927740662, "grad_norm": 0.3940204679965973, "learning_rate": 1.2551635481863888e-07, "loss": 0.0426, "step": 54209 }, { "epoch": 0.9600340023110946, "grad_norm": 0.6074244976043701, "learning_rate": 1.2540531059992532e-07, "loss": 0.0373, "step": 54210 }, { "epoch": 0.960051711848123, "grad_norm": 0.4690227508544922, "learning_rate": 1.2529431531712044e-07, "loss": 0.045, "step": 54211 }, { "epoch": 0.9600694213851514, "grad_norm": 0.5536454916000366, "learning_rate": 1.2518336897059557e-07, "loss": 0.0298, "step": 54212 }, { "epoch": 0.9600871309221799, "grad_norm": 0.7084553837776184, "learning_rate": 1.250724715607121e-07, "loss": 0.0724, "step": 54213 }, { "epoch": 0.9601048404592083, "grad_norm": 0.540283203125, "learning_rate": 1.2496162308783476e-07, "loss": 0.0357, "step": 54214 }, { "epoch": 0.9601225499962367, "grad_norm": 0.8409181237220764, "learning_rate": 1.248508235523299e-07, "loss": 0.0745, "step": 54215 }, { "epoch": 0.9601402595332652, "grad_norm": 0.5137902498245239, "learning_rate": 1.247400729545606e-07, "loss": 0.0488, "step": 54216 }, { "epoch": 0.9601579690702936, "grad_norm": 0.3493478298187256, "learning_rate": 1.246293712948915e-07, "loss": 0.04, "step": 54217 }, { "epoch": 0.960175678607322, "grad_norm": 0.8068432211875916, "learning_rate": 1.2451871857368902e-07, "loss": 0.0463, "step": 54218 }, { "epoch": 0.9601933881443504, "grad_norm": 0.6174222230911255, "learning_rate": 1.2440811479131454e-07, "loss": 0.0602, "step": 54219 }, { "epoch": 0.9602110976813789, "grad_norm": 0.5009452104568481, "learning_rate": 1.2429755994813274e-07, "loss": 0.0589, "step": 54220 }, { "epoch": 0.9602288072184073, "grad_norm": 0.7529411911964417, "learning_rate": 1.241870540445067e-07, "loss": 0.0546, "step": 54221 }, { "epoch": 0.9602465167554357, "grad_norm": 0.9233635067939758, "learning_rate": 1.240765970808011e-07, "loss": 0.0564, "step": 54222 }, { "epoch": 0.9602642262924641, "grad_norm": 0.44149187207221985, "learning_rate": 1.23966189057379e-07, "loss": 0.0437, "step": 54223 }, { "epoch": 0.9602819358294926, "grad_norm": 0.45767199993133545, "learning_rate": 1.238558299746051e-07, "loss": 0.0398, "step": 54224 }, { "epoch": 0.960299645366521, "grad_norm": 0.5247423648834229, "learning_rate": 1.2374551983283911e-07, "loss": 0.0382, "step": 54225 }, { "epoch": 0.9603173549035494, "grad_norm": 0.7161356806755066, "learning_rate": 1.2363525863244408e-07, "loss": 0.0742, "step": 54226 }, { "epoch": 0.9603350644405778, "grad_norm": 0.5121904015541077, "learning_rate": 1.2352504637378637e-07, "loss": 0.0502, "step": 54227 }, { "epoch": 0.9603527739776063, "grad_norm": 1.0074366331100464, "learning_rate": 1.2341488305722737e-07, "loss": 0.0722, "step": 54228 }, { "epoch": 0.9603704835146347, "grad_norm": 0.4346064329147339, "learning_rate": 1.2330476868312846e-07, "loss": 0.0432, "step": 54229 }, { "epoch": 0.9603881930516631, "grad_norm": 0.48339560627937317, "learning_rate": 1.23194703251851e-07, "loss": 0.0426, "step": 54230 }, { "epoch": 0.9604059025886916, "grad_norm": 0.21905609965324402, "learning_rate": 1.2308468676375973e-07, "loss": 0.0607, "step": 54231 }, { "epoch": 0.96042361212572, "grad_norm": 0.4301682710647583, "learning_rate": 1.2297471921921434e-07, "loss": 0.0405, "step": 54232 }, { "epoch": 0.9604413216627484, "grad_norm": 0.7466744780540466, "learning_rate": 1.228648006185762e-07, "loss": 0.0472, "step": 54233 }, { "epoch": 0.9604590311997768, "grad_norm": 0.3288300931453705, "learning_rate": 1.2275493096221003e-07, "loss": 0.0388, "step": 54234 }, { "epoch": 0.9604767407368053, "grad_norm": 0.782447099685669, "learning_rate": 1.2264511025047386e-07, "loss": 0.066, "step": 54235 }, { "epoch": 0.9604944502738337, "grad_norm": 0.35388967394828796, "learning_rate": 1.2253533848373076e-07, "loss": 0.0535, "step": 54236 }, { "epoch": 0.9605121598108621, "grad_norm": 0.6824039220809937, "learning_rate": 1.2242561566234212e-07, "loss": 0.0671, "step": 54237 }, { "epoch": 0.9605298693478905, "grad_norm": 0.29786616563796997, "learning_rate": 1.2231594178666927e-07, "loss": 0.0527, "step": 54238 }, { "epoch": 0.960547578884919, "grad_norm": 0.9151933789253235, "learning_rate": 1.2220631685707197e-07, "loss": 0.056, "step": 54239 }, { "epoch": 0.9605652884219474, "grad_norm": 0.9025933146476746, "learning_rate": 1.2209674087390988e-07, "loss": 0.0719, "step": 54240 }, { "epoch": 0.9605829979589758, "grad_norm": 0.8674540519714355, "learning_rate": 1.2198721383754607e-07, "loss": 0.064, "step": 54241 }, { "epoch": 0.9606007074960042, "grad_norm": 0.41030773520469666, "learning_rate": 1.218777357483386e-07, "loss": 0.0506, "step": 54242 }, { "epoch": 0.9606184170330327, "grad_norm": 0.435185045003891, "learning_rate": 1.2176830660664884e-07, "loss": 0.0546, "step": 54243 }, { "epoch": 0.9606361265700611, "grad_norm": 0.2782019376754761, "learning_rate": 1.216589264128365e-07, "loss": 0.037, "step": 54244 }, { "epoch": 0.9606538361070895, "grad_norm": 0.4326086640357971, "learning_rate": 1.2154959516726128e-07, "loss": 0.0361, "step": 54245 }, { "epoch": 0.960671545644118, "grad_norm": 0.6348140239715576, "learning_rate": 1.214403128702829e-07, "loss": 0.0304, "step": 54246 }, { "epoch": 0.9606892551811465, "grad_norm": 0.4276624321937561, "learning_rate": 1.2133107952226107e-07, "loss": 0.0638, "step": 54247 }, { "epoch": 0.9607069647181748, "grad_norm": 0.626467227935791, "learning_rate": 1.2122189512355552e-07, "loss": 0.0398, "step": 54248 }, { "epoch": 0.9607246742552032, "grad_norm": 0.5708062648773193, "learning_rate": 1.211127596745243e-07, "loss": 0.0494, "step": 54249 }, { "epoch": 0.9607423837922318, "grad_norm": 0.6920025944709778, "learning_rate": 1.2100367317552709e-07, "loss": 0.0615, "step": 54250 }, { "epoch": 0.9607600933292602, "grad_norm": 1.3934810161590576, "learning_rate": 1.208946356269236e-07, "loss": 0.0675, "step": 54251 }, { "epoch": 0.9607778028662886, "grad_norm": 0.5249220132827759, "learning_rate": 1.2078564702907191e-07, "loss": 0.0806, "step": 54252 }, { "epoch": 0.960795512403317, "grad_norm": 0.8101881742477417, "learning_rate": 1.2067670738233006e-07, "loss": 0.0819, "step": 54253 }, { "epoch": 0.9608132219403455, "grad_norm": 0.69176185131073, "learning_rate": 1.2056781668705774e-07, "loss": 0.0358, "step": 54254 }, { "epoch": 0.9608309314773739, "grad_norm": 0.6979894638061523, "learning_rate": 1.2045897494361134e-07, "loss": 0.0457, "step": 54255 }, { "epoch": 0.9608486410144023, "grad_norm": 0.9608949422836304, "learning_rate": 1.2035018215235062e-07, "loss": 0.0667, "step": 54256 }, { "epoch": 0.9608663505514307, "grad_norm": 0.597069501876831, "learning_rate": 1.2024143831363187e-07, "loss": 0.0577, "step": 54257 }, { "epoch": 0.9608840600884592, "grad_norm": 0.7843254804611206, "learning_rate": 1.2013274342781488e-07, "loss": 0.0608, "step": 54258 }, { "epoch": 0.9609017696254876, "grad_norm": 0.5467050075531006, "learning_rate": 1.20024097495256e-07, "loss": 0.0327, "step": 54259 }, { "epoch": 0.960919479162516, "grad_norm": 0.6895470023155212, "learning_rate": 1.1991550051631162e-07, "loss": 0.0596, "step": 54260 }, { "epoch": 0.9609371886995445, "grad_norm": 0.6369293332099915, "learning_rate": 1.1980695249134144e-07, "loss": 0.036, "step": 54261 }, { "epoch": 0.9609548982365729, "grad_norm": 0.6370996832847595, "learning_rate": 1.196984534207002e-07, "loss": 0.0544, "step": 54262 }, { "epoch": 0.9609726077736013, "grad_norm": 0.9874947667121887, "learning_rate": 1.1959000330474756e-07, "loss": 0.0726, "step": 54263 }, { "epoch": 0.9609903173106297, "grad_norm": 0.7904955744743347, "learning_rate": 1.1948160214383662e-07, "loss": 0.0862, "step": 54264 }, { "epoch": 0.9610080268476582, "grad_norm": 0.3184739351272583, "learning_rate": 1.1937324993832878e-07, "loss": 0.0319, "step": 54265 }, { "epoch": 0.9610257363846866, "grad_norm": 0.38604214787483215, "learning_rate": 1.1926494668857534e-07, "loss": 0.0493, "step": 54266 }, { "epoch": 0.961043445921715, "grad_norm": 0.7097412943840027, "learning_rate": 1.1915669239493776e-07, "loss": 0.0541, "step": 54267 }, { "epoch": 0.9610611554587434, "grad_norm": 1.1231303215026855, "learning_rate": 1.1904848705776738e-07, "loss": 0.0572, "step": 54268 }, { "epoch": 0.9610788649957719, "grad_norm": 0.6626105308532715, "learning_rate": 1.1894033067742227e-07, "loss": 0.0799, "step": 54269 }, { "epoch": 0.9610965745328003, "grad_norm": 0.5153364539146423, "learning_rate": 1.1883222325426046e-07, "loss": 0.0491, "step": 54270 }, { "epoch": 0.9611142840698287, "grad_norm": 0.7020211815834045, "learning_rate": 1.1872416478863501e-07, "loss": 0.0801, "step": 54271 }, { "epoch": 0.9611319936068571, "grad_norm": 0.5090973377227783, "learning_rate": 1.1861615528090064e-07, "loss": 0.0378, "step": 54272 }, { "epoch": 0.9611497031438856, "grad_norm": 0.5928424596786499, "learning_rate": 1.1850819473141372e-07, "loss": 0.0585, "step": 54273 }, { "epoch": 0.961167412680914, "grad_norm": 0.5418581962585449, "learning_rate": 1.1840028314053064e-07, "loss": 0.0346, "step": 54274 }, { "epoch": 0.9611851222179424, "grad_norm": 0.7293668389320374, "learning_rate": 1.1829242050860445e-07, "loss": 0.0561, "step": 54275 }, { "epoch": 0.9612028317549709, "grad_norm": 0.9353277683258057, "learning_rate": 1.1818460683599152e-07, "loss": 0.0508, "step": 54276 }, { "epoch": 0.9612205412919993, "grad_norm": 0.7434295415878296, "learning_rate": 1.1807684212304659e-07, "loss": 0.056, "step": 54277 }, { "epoch": 0.9612382508290277, "grad_norm": 0.5767846703529358, "learning_rate": 1.1796912637012436e-07, "loss": 0.0408, "step": 54278 }, { "epoch": 0.9612559603660561, "grad_norm": 0.5748132467269897, "learning_rate": 1.1786145957757621e-07, "loss": 0.05, "step": 54279 }, { "epoch": 0.9612736699030846, "grad_norm": 0.36109957098960876, "learning_rate": 1.1775384174576021e-07, "loss": 0.0399, "step": 54280 }, { "epoch": 0.961291379440113, "grad_norm": 1.079048752784729, "learning_rate": 1.1764627287502939e-07, "loss": 0.0941, "step": 54281 }, { "epoch": 0.9613090889771414, "grad_norm": 0.6466845273971558, "learning_rate": 1.1753875296573513e-07, "loss": 0.0485, "step": 54282 }, { "epoch": 0.9613267985141698, "grad_norm": 0.5936622619628906, "learning_rate": 1.1743128201823383e-07, "loss": 0.0612, "step": 54283 }, { "epoch": 0.9613445080511983, "grad_norm": 1.0584430694580078, "learning_rate": 1.1732386003287854e-07, "loss": 0.0552, "step": 54284 }, { "epoch": 0.9613622175882267, "grad_norm": 0.34247922897338867, "learning_rate": 1.172164870100223e-07, "loss": 0.0657, "step": 54285 }, { "epoch": 0.9613799271252551, "grad_norm": 0.49721240997314453, "learning_rate": 1.1710916295001983e-07, "loss": 0.0344, "step": 54286 }, { "epoch": 0.9613976366622835, "grad_norm": 0.8388711810112, "learning_rate": 1.1700188785322086e-07, "loss": 0.0583, "step": 54287 }, { "epoch": 0.961415346199312, "grad_norm": 0.7808666229248047, "learning_rate": 1.1689466171998175e-07, "loss": 0.0445, "step": 54288 }, { "epoch": 0.9614330557363404, "grad_norm": 0.3962932229042053, "learning_rate": 1.1678748455065392e-07, "loss": 0.0533, "step": 54289 }, { "epoch": 0.9614507652733688, "grad_norm": 0.3598844110965729, "learning_rate": 1.1668035634558872e-07, "loss": 0.03, "step": 54290 }, { "epoch": 0.9614684748103973, "grad_norm": 0.511035680770874, "learning_rate": 1.1657327710514087e-07, "loss": 0.0431, "step": 54291 }, { "epoch": 0.9614861843474257, "grad_norm": 0.44056084752082825, "learning_rate": 1.1646624682966011e-07, "loss": 0.0497, "step": 54292 }, { "epoch": 0.9615038938844541, "grad_norm": 0.4582826793193817, "learning_rate": 1.1635926551950116e-07, "loss": 0.0355, "step": 54293 }, { "epoch": 0.9615216034214825, "grad_norm": 0.9615321755409241, "learning_rate": 1.1625233317501538e-07, "loss": 0.0695, "step": 54294 }, { "epoch": 0.961539312958511, "grad_norm": 0.8189330697059631, "learning_rate": 1.1614544979655418e-07, "loss": 0.0336, "step": 54295 }, { "epoch": 0.9615570224955394, "grad_norm": 0.6240364909172058, "learning_rate": 1.1603861538446725e-07, "loss": 0.0627, "step": 54296 }, { "epoch": 0.9615747320325678, "grad_norm": 0.7023629546165466, "learning_rate": 1.1593182993910933e-07, "loss": 0.054, "step": 54297 }, { "epoch": 0.9615924415695962, "grad_norm": 0.6627855896949768, "learning_rate": 1.1582509346082847e-07, "loss": 0.0958, "step": 54298 }, { "epoch": 0.9616101511066247, "grad_norm": 0.534403920173645, "learning_rate": 1.157184059499794e-07, "loss": 0.0447, "step": 54299 }, { "epoch": 0.9616278606436531, "grad_norm": 0.9616130590438843, "learning_rate": 1.1561176740691181e-07, "loss": 0.0709, "step": 54300 }, { "epoch": 0.9616455701806815, "grad_norm": 0.9885607361793518, "learning_rate": 1.1550517783197378e-07, "loss": 0.067, "step": 54301 }, { "epoch": 0.9616632797177099, "grad_norm": 0.7785462141036987, "learning_rate": 1.1539863722552001e-07, "loss": 0.0696, "step": 54302 }, { "epoch": 0.9616809892547384, "grad_norm": 0.4186299741268158, "learning_rate": 1.1529214558789858e-07, "loss": 0.053, "step": 54303 }, { "epoch": 0.9616986987917668, "grad_norm": 0.7323938608169556, "learning_rate": 1.1518570291946084e-07, "loss": 0.0705, "step": 54304 }, { "epoch": 0.9617164083287952, "grad_norm": 0.8116856813430786, "learning_rate": 1.1507930922055654e-07, "loss": 0.0552, "step": 54305 }, { "epoch": 0.9617341178658237, "grad_norm": 0.5730430483818054, "learning_rate": 1.1497296449153538e-07, "loss": 0.0557, "step": 54306 }, { "epoch": 0.9617518274028521, "grad_norm": 0.5996447801589966, "learning_rate": 1.1486666873274875e-07, "loss": 0.0547, "step": 54307 }, { "epoch": 0.9617695369398805, "grad_norm": 0.5097126960754395, "learning_rate": 1.1476042194454473e-07, "loss": 0.0446, "step": 54308 }, { "epoch": 0.9617872464769089, "grad_norm": 0.776253879070282, "learning_rate": 1.14654224127273e-07, "loss": 0.0671, "step": 54309 }, { "epoch": 0.9618049560139375, "grad_norm": 0.42427659034729004, "learning_rate": 1.145480752812833e-07, "loss": 0.0642, "step": 54310 }, { "epoch": 0.9618226655509658, "grad_norm": 0.2502480149269104, "learning_rate": 1.1444197540692536e-07, "loss": 0.0518, "step": 54311 }, { "epoch": 0.9618403750879942, "grad_norm": 0.38630151748657227, "learning_rate": 1.1433592450454722e-07, "loss": 0.0473, "step": 54312 }, { "epoch": 0.9618580846250226, "grad_norm": 0.6304858922958374, "learning_rate": 1.1422992257449694e-07, "loss": 0.0569, "step": 54313 }, { "epoch": 0.9618757941620512, "grad_norm": 0.3786158561706543, "learning_rate": 1.141239696171259e-07, "loss": 0.0564, "step": 54314 }, { "epoch": 0.9618935036990796, "grad_norm": 0.8402836918830872, "learning_rate": 1.1401806563278216e-07, "loss": 0.0607, "step": 54315 }, { "epoch": 0.961911213236108, "grad_norm": 0.8714662194252014, "learning_rate": 1.1391221062181378e-07, "loss": 0.0706, "step": 54316 }, { "epoch": 0.9619289227731364, "grad_norm": 0.579269528388977, "learning_rate": 1.1380640458456715e-07, "loss": 0.0881, "step": 54317 }, { "epoch": 0.9619466323101649, "grad_norm": 0.7378469109535217, "learning_rate": 1.1370064752139197e-07, "loss": 0.0527, "step": 54318 }, { "epoch": 0.9619643418471933, "grad_norm": 0.5262088775634766, "learning_rate": 1.1359493943263633e-07, "loss": 0.0404, "step": 54319 }, { "epoch": 0.9619820513842217, "grad_norm": 0.5350400805473328, "learning_rate": 1.1348928031864825e-07, "loss": 0.0619, "step": 54320 }, { "epoch": 0.9619997609212502, "grad_norm": 0.284223735332489, "learning_rate": 1.1338367017977247e-07, "loss": 0.0577, "step": 54321 }, { "epoch": 0.9620174704582786, "grad_norm": 0.5673019886016846, "learning_rate": 1.1327810901636038e-07, "loss": 0.0532, "step": 54322 }, { "epoch": 0.962035179995307, "grad_norm": 0.7995391488075256, "learning_rate": 1.1317259682875835e-07, "loss": 0.0534, "step": 54323 }, { "epoch": 0.9620528895323354, "grad_norm": 0.5364030003547668, "learning_rate": 1.130671336173128e-07, "loss": 0.0718, "step": 54324 }, { "epoch": 0.9620705990693639, "grad_norm": 0.42673754692077637, "learning_rate": 1.1296171938236844e-07, "loss": 0.0509, "step": 54325 }, { "epoch": 0.9620883086063923, "grad_norm": 0.36418965458869934, "learning_rate": 1.1285635412427664e-07, "loss": 0.0562, "step": 54326 }, { "epoch": 0.9621060181434207, "grad_norm": 0.7002084851264954, "learning_rate": 1.1275103784338048e-07, "loss": 0.0608, "step": 54327 }, { "epoch": 0.9621237276804491, "grad_norm": 0.7358030080795288, "learning_rate": 1.12645770540028e-07, "loss": 0.0733, "step": 54328 }, { "epoch": 0.9621414372174776, "grad_norm": 0.6779360771179199, "learning_rate": 1.1254055221456561e-07, "loss": 0.0711, "step": 54329 }, { "epoch": 0.962159146754506, "grad_norm": 0.5121392011642456, "learning_rate": 1.12435382867338e-07, "loss": 0.0248, "step": 54330 }, { "epoch": 0.9621768562915344, "grad_norm": 0.6366267204284668, "learning_rate": 1.123302624986916e-07, "loss": 0.044, "step": 54331 }, { "epoch": 0.9621945658285628, "grad_norm": 0.6109011769294739, "learning_rate": 1.1222519110897445e-07, "loss": 0.0357, "step": 54332 }, { "epoch": 0.9622122753655913, "grad_norm": 0.8009446263313293, "learning_rate": 1.1212016869852958e-07, "loss": 0.0649, "step": 54333 }, { "epoch": 0.9622299849026197, "grad_norm": 0.5427448749542236, "learning_rate": 1.120151952677051e-07, "loss": 0.051, "step": 54334 }, { "epoch": 0.9622476944396481, "grad_norm": 0.38472801446914673, "learning_rate": 1.1191027081684402e-07, "loss": 0.0685, "step": 54335 }, { "epoch": 0.9622654039766766, "grad_norm": 0.39485377073287964, "learning_rate": 1.1180539534629108e-07, "loss": 0.0681, "step": 54336 }, { "epoch": 0.962283113513705, "grad_norm": 0.5458704829216003, "learning_rate": 1.1170056885639435e-07, "loss": 0.0551, "step": 54337 }, { "epoch": 0.9623008230507334, "grad_norm": 0.5985321402549744, "learning_rate": 1.1159579134749687e-07, "loss": 0.0385, "step": 54338 }, { "epoch": 0.9623185325877618, "grad_norm": 0.4629595875740051, "learning_rate": 1.114910628199417e-07, "loss": 0.0442, "step": 54339 }, { "epoch": 0.9623362421247903, "grad_norm": 0.6292720437049866, "learning_rate": 1.1138638327407525e-07, "loss": 0.0416, "step": 54340 }, { "epoch": 0.9623539516618187, "grad_norm": 0.591998279094696, "learning_rate": 1.1128175271024221e-07, "loss": 0.0632, "step": 54341 }, { "epoch": 0.9623716611988471, "grad_norm": 0.41692495346069336, "learning_rate": 1.1117717112878734e-07, "loss": 0.0422, "step": 54342 }, { "epoch": 0.9623893707358755, "grad_norm": 0.2777302861213684, "learning_rate": 1.11072638530052e-07, "loss": 0.0243, "step": 54343 }, { "epoch": 0.962407080272904, "grad_norm": 0.5215235948562622, "learning_rate": 1.1096815491438261e-07, "loss": 0.0383, "step": 54344 }, { "epoch": 0.9624247898099324, "grad_norm": 0.7036596536636353, "learning_rate": 1.1086372028212221e-07, "loss": 0.059, "step": 54345 }, { "epoch": 0.9624424993469608, "grad_norm": 0.49163487553596497, "learning_rate": 1.1075933463361388e-07, "loss": 0.0485, "step": 54346 }, { "epoch": 0.9624602088839892, "grad_norm": 0.49806639552116394, "learning_rate": 1.106549979692023e-07, "loss": 0.0407, "step": 54347 }, { "epoch": 0.9624779184210177, "grad_norm": 0.5885295867919922, "learning_rate": 1.1055071028922892e-07, "loss": 0.064, "step": 54348 }, { "epoch": 0.9624956279580461, "grad_norm": 0.9039290547370911, "learning_rate": 1.1044647159403676e-07, "loss": 0.0815, "step": 54349 }, { "epoch": 0.9625133374950745, "grad_norm": 0.5853576064109802, "learning_rate": 1.1034228188397056e-07, "loss": 0.0386, "step": 54350 }, { "epoch": 0.962531047032103, "grad_norm": 0.6370306611061096, "learning_rate": 1.102381411593717e-07, "loss": 0.0676, "step": 54351 }, { "epoch": 0.9625487565691314, "grad_norm": 0.9874761700630188, "learning_rate": 1.1013404942058491e-07, "loss": 0.056, "step": 54352 }, { "epoch": 0.9625664661061598, "grad_norm": 1.1374503374099731, "learning_rate": 1.1003000666794993e-07, "loss": 0.0627, "step": 54353 }, { "epoch": 0.9625841756431882, "grad_norm": 0.4346654713153839, "learning_rate": 1.0992601290181147e-07, "loss": 0.0305, "step": 54354 }, { "epoch": 0.9626018851802167, "grad_norm": 0.7784353494644165, "learning_rate": 1.0982206812250928e-07, "loss": 0.0619, "step": 54355 }, { "epoch": 0.9626195947172451, "grad_norm": 0.8173156380653381, "learning_rate": 1.0971817233038638e-07, "loss": 0.0585, "step": 54356 }, { "epoch": 0.9626373042542735, "grad_norm": 0.48593297600746155, "learning_rate": 1.0961432552578587e-07, "loss": 0.0545, "step": 54357 }, { "epoch": 0.9626550137913019, "grad_norm": 0.3747599422931671, "learning_rate": 1.095105277090458e-07, "loss": 0.0461, "step": 54358 }, { "epoch": 0.9626727233283304, "grad_norm": 0.2644527852535248, "learning_rate": 1.0940677888051253e-07, "loss": 0.0519, "step": 54359 }, { "epoch": 0.9626904328653588, "grad_norm": 0.6649871468544006, "learning_rate": 1.093030790405225e-07, "loss": 0.0509, "step": 54360 }, { "epoch": 0.9627081424023872, "grad_norm": 0.48880499601364136, "learning_rate": 1.0919942818942042e-07, "loss": 0.0303, "step": 54361 }, { "epoch": 0.9627258519394156, "grad_norm": 0.8311973214149475, "learning_rate": 1.0909582632754767e-07, "loss": 0.0706, "step": 54362 }, { "epoch": 0.9627435614764441, "grad_norm": 0.5978823304176331, "learning_rate": 1.0899227345524065e-07, "loss": 0.0694, "step": 54363 }, { "epoch": 0.9627612710134725, "grad_norm": 0.17380036413669586, "learning_rate": 1.088887695728441e-07, "loss": 0.0235, "step": 54364 }, { "epoch": 0.9627789805505009, "grad_norm": 0.6424598097801208, "learning_rate": 1.0878531468069774e-07, "loss": 0.0472, "step": 54365 }, { "epoch": 0.9627966900875294, "grad_norm": 0.6432672142982483, "learning_rate": 1.086819087791413e-07, "loss": 0.0667, "step": 54366 }, { "epoch": 0.9628143996245578, "grad_norm": 0.12236057966947556, "learning_rate": 1.0857855186851617e-07, "loss": 0.0484, "step": 54367 }, { "epoch": 0.9628321091615862, "grad_norm": 0.24917495250701904, "learning_rate": 1.0847524394916042e-07, "loss": 0.0697, "step": 54368 }, { "epoch": 0.9628498186986146, "grad_norm": 0.9995154738426208, "learning_rate": 1.0837198502141377e-07, "loss": 0.0696, "step": 54369 }, { "epoch": 0.9628675282356431, "grad_norm": 0.37138447165489197, "learning_rate": 1.0826877508561927e-07, "loss": 0.0393, "step": 54370 }, { "epoch": 0.9628852377726715, "grad_norm": 0.3679628372192383, "learning_rate": 1.0816561414211167e-07, "loss": 0.037, "step": 54371 }, { "epoch": 0.9629029473096999, "grad_norm": 0.3080956041812897, "learning_rate": 1.080625021912357e-07, "loss": 0.041, "step": 54372 }, { "epoch": 0.9629206568467283, "grad_norm": 0.5891778469085693, "learning_rate": 1.0795943923332608e-07, "loss": 0.0608, "step": 54373 }, { "epoch": 0.9629383663837568, "grad_norm": 0.430289089679718, "learning_rate": 1.0785642526872585e-07, "loss": 0.0528, "step": 54374 }, { "epoch": 0.9629560759207852, "grad_norm": 0.5135793089866638, "learning_rate": 1.0775346029776977e-07, "loss": 0.0443, "step": 54375 }, { "epoch": 0.9629737854578136, "grad_norm": 0.4976084530353546, "learning_rate": 1.0765054432079757e-07, "loss": 0.0346, "step": 54376 }, { "epoch": 0.962991494994842, "grad_norm": 0.9279630184173584, "learning_rate": 1.0754767733815229e-07, "loss": 0.0692, "step": 54377 }, { "epoch": 0.9630092045318706, "grad_norm": 0.5165858864784241, "learning_rate": 1.0744485935016535e-07, "loss": 0.0414, "step": 54378 }, { "epoch": 0.963026914068899, "grad_norm": 0.5361785888671875, "learning_rate": 1.073420903571798e-07, "loss": 0.0648, "step": 54379 }, { "epoch": 0.9630446236059274, "grad_norm": 0.6084617972373962, "learning_rate": 1.0723937035953369e-07, "loss": 0.0619, "step": 54380 }, { "epoch": 0.9630623331429559, "grad_norm": 0.3056568205356598, "learning_rate": 1.0713669935756176e-07, "loss": 0.0414, "step": 54381 }, { "epoch": 0.9630800426799843, "grad_norm": 0.7885757088661194, "learning_rate": 1.0703407735160542e-07, "loss": 0.0537, "step": 54382 }, { "epoch": 0.9630977522170127, "grad_norm": 0.2626197934150696, "learning_rate": 1.0693150434199938e-07, "loss": 0.0324, "step": 54383 }, { "epoch": 0.9631154617540411, "grad_norm": 0.5225681662559509, "learning_rate": 1.068289803290834e-07, "loss": 0.0513, "step": 54384 }, { "epoch": 0.9631331712910696, "grad_norm": 0.27349990606307983, "learning_rate": 1.0672650531319384e-07, "loss": 0.049, "step": 54385 }, { "epoch": 0.963150880828098, "grad_norm": 0.4831523001194, "learning_rate": 1.066240792946671e-07, "loss": 0.063, "step": 54386 }, { "epoch": 0.9631685903651264, "grad_norm": 0.4161449074745178, "learning_rate": 1.0652170227383962e-07, "loss": 0.0485, "step": 54387 }, { "epoch": 0.9631862999021548, "grad_norm": 0.7432027459144592, "learning_rate": 1.0641937425105108e-07, "loss": 0.0659, "step": 54388 }, { "epoch": 0.9632040094391833, "grad_norm": 0.6319400668144226, "learning_rate": 1.0631709522663624e-07, "loss": 0.048, "step": 54389 }, { "epoch": 0.9632217189762117, "grad_norm": 0.5432865619659424, "learning_rate": 1.0621486520092982e-07, "loss": 0.0567, "step": 54390 }, { "epoch": 0.9632394285132401, "grad_norm": 0.7078805565834045, "learning_rate": 1.0611268417427323e-07, "loss": 0.0589, "step": 54391 }, { "epoch": 0.9632571380502685, "grad_norm": 0.5348806381225586, "learning_rate": 1.0601055214699784e-07, "loss": 0.0665, "step": 54392 }, { "epoch": 0.963274847587297, "grad_norm": 0.7255948781967163, "learning_rate": 1.0590846911944008e-07, "loss": 0.0558, "step": 54393 }, { "epoch": 0.9632925571243254, "grad_norm": 0.5757752656936646, "learning_rate": 1.0580643509193966e-07, "loss": 0.0544, "step": 54394 }, { "epoch": 0.9633102666613538, "grad_norm": 0.8983049392700195, "learning_rate": 1.0570445006482798e-07, "loss": 0.0635, "step": 54395 }, { "epoch": 0.9633279761983823, "grad_norm": 0.4293586015701294, "learning_rate": 1.0560251403844146e-07, "loss": 0.0702, "step": 54396 }, { "epoch": 0.9633456857354107, "grad_norm": 0.8664786219596863, "learning_rate": 1.0550062701311813e-07, "loss": 0.0498, "step": 54397 }, { "epoch": 0.9633633952724391, "grad_norm": 0.5311856269836426, "learning_rate": 1.0539878898918942e-07, "loss": 0.0478, "step": 54398 }, { "epoch": 0.9633811048094675, "grad_norm": 0.3999275267124176, "learning_rate": 1.0529699996699338e-07, "loss": 0.0789, "step": 54399 }, { "epoch": 0.963398814346496, "grad_norm": 0.38021230697631836, "learning_rate": 1.0519525994686308e-07, "loss": 0.0419, "step": 54400 }, { "epoch": 0.9634165238835244, "grad_norm": 0.7242507934570312, "learning_rate": 1.0509356892913324e-07, "loss": 0.0718, "step": 54401 }, { "epoch": 0.9634342334205528, "grad_norm": 0.606536328792572, "learning_rate": 1.0499192691414194e-07, "loss": 0.0639, "step": 54402 }, { "epoch": 0.9634519429575812, "grad_norm": 0.434823215007782, "learning_rate": 1.0489033390221725e-07, "loss": 0.0297, "step": 54403 }, { "epoch": 0.9634696524946097, "grad_norm": 0.585117757320404, "learning_rate": 1.0478878989369889e-07, "loss": 0.059, "step": 54404 }, { "epoch": 0.9634873620316381, "grad_norm": 0.6190431714057922, "learning_rate": 1.0468729488891826e-07, "loss": 0.0518, "step": 54405 }, { "epoch": 0.9635050715686665, "grad_norm": 0.7030487060546875, "learning_rate": 1.045858488882101e-07, "loss": 0.0868, "step": 54406 }, { "epoch": 0.9635227811056949, "grad_norm": 0.6494905352592468, "learning_rate": 1.0448445189190747e-07, "loss": 0.0623, "step": 54407 }, { "epoch": 0.9635404906427234, "grad_norm": 0.40253567695617676, "learning_rate": 1.0438310390034345e-07, "loss": 0.0417, "step": 54408 }, { "epoch": 0.9635582001797518, "grad_norm": 0.343735933303833, "learning_rate": 1.0428180491385442e-07, "loss": 0.0581, "step": 54409 }, { "epoch": 0.9635759097167802, "grad_norm": 0.47061383724212646, "learning_rate": 1.0418055493277012e-07, "loss": 0.0559, "step": 54410 }, { "epoch": 0.9635936192538087, "grad_norm": 0.2481168508529663, "learning_rate": 1.040793539574253e-07, "loss": 0.0527, "step": 54411 }, { "epoch": 0.9636113287908371, "grad_norm": 0.4444483816623688, "learning_rate": 1.0397820198815467e-07, "loss": 0.0485, "step": 54412 }, { "epoch": 0.9636290383278655, "grad_norm": 0.48443350195884705, "learning_rate": 1.0387709902528797e-07, "loss": 0.0466, "step": 54413 }, { "epoch": 0.9636467478648939, "grad_norm": 0.7338395714759827, "learning_rate": 1.0377604506915827e-07, "loss": 0.0553, "step": 54414 }, { "epoch": 0.9636644574019224, "grad_norm": 0.2861582040786743, "learning_rate": 1.0367504012009865e-07, "loss": 0.0334, "step": 54415 }, { "epoch": 0.9636821669389508, "grad_norm": 0.44435733556747437, "learning_rate": 1.0357408417844217e-07, "loss": 0.0577, "step": 54416 }, { "epoch": 0.9636998764759792, "grad_norm": 0.4313703775405884, "learning_rate": 1.0347317724451854e-07, "loss": 0.0344, "step": 54417 }, { "epoch": 0.9637175860130076, "grad_norm": 0.4266025125980377, "learning_rate": 1.033723193186642e-07, "loss": 0.0577, "step": 54418 }, { "epoch": 0.9637352955500361, "grad_norm": 0.9055685997009277, "learning_rate": 1.032715104012072e-07, "loss": 0.0505, "step": 54419 }, { "epoch": 0.9637530050870645, "grad_norm": 0.44586411118507385, "learning_rate": 1.0317075049247893e-07, "loss": 0.0314, "step": 54420 }, { "epoch": 0.9637707146240929, "grad_norm": 0.6228700876235962, "learning_rate": 1.0307003959281413e-07, "loss": 0.0631, "step": 54421 }, { "epoch": 0.9637884241611213, "grad_norm": 0.6628942489624023, "learning_rate": 1.0296937770254088e-07, "loss": 0.059, "step": 54422 }, { "epoch": 0.9638061336981498, "grad_norm": 0.803034782409668, "learning_rate": 1.0286876482199225e-07, "loss": 0.0657, "step": 54423 }, { "epoch": 0.9638238432351782, "grad_norm": 0.8361524343490601, "learning_rate": 1.0276820095149797e-07, "loss": 0.0594, "step": 54424 }, { "epoch": 0.9638415527722066, "grad_norm": 0.4218991994857788, "learning_rate": 1.026676860913911e-07, "loss": 0.0652, "step": 54425 }, { "epoch": 0.9638592623092351, "grad_norm": 0.5623206496238708, "learning_rate": 1.0256722024199972e-07, "loss": 0.0694, "step": 54426 }, { "epoch": 0.9638769718462635, "grad_norm": 0.5360141396522522, "learning_rate": 1.0246680340365522e-07, "loss": 0.0432, "step": 54427 }, { "epoch": 0.9638946813832919, "grad_norm": 0.6449146866798401, "learning_rate": 1.0236643557668734e-07, "loss": 0.0398, "step": 54428 }, { "epoch": 0.9639123909203203, "grad_norm": 0.42183569073677063, "learning_rate": 1.0226611676142749e-07, "loss": 0.0471, "step": 54429 }, { "epoch": 0.9639301004573488, "grad_norm": 0.6543046832084656, "learning_rate": 1.0216584695820708e-07, "loss": 0.0396, "step": 54430 }, { "epoch": 0.9639478099943772, "grad_norm": 0.5405094623565674, "learning_rate": 1.0206562616735248e-07, "loss": 0.0455, "step": 54431 }, { "epoch": 0.9639655195314056, "grad_norm": 0.7959280014038086, "learning_rate": 1.0196545438919513e-07, "loss": 0.05, "step": 54432 }, { "epoch": 0.963983229068434, "grad_norm": 0.7022355198860168, "learning_rate": 1.0186533162406475e-07, "loss": 0.0599, "step": 54433 }, { "epoch": 0.9640009386054625, "grad_norm": 0.48246291279792786, "learning_rate": 1.0176525787229274e-07, "loss": 0.0574, "step": 54434 }, { "epoch": 0.9640186481424909, "grad_norm": 0.7545044422149658, "learning_rate": 1.0166523313420384e-07, "loss": 0.079, "step": 54435 }, { "epoch": 0.9640363576795193, "grad_norm": 0.840015172958374, "learning_rate": 1.0156525741012945e-07, "loss": 0.051, "step": 54436 }, { "epoch": 0.9640540672165477, "grad_norm": 0.8760009407997131, "learning_rate": 1.0146533070039932e-07, "loss": 0.0684, "step": 54437 }, { "epoch": 0.9640717767535762, "grad_norm": 0.3107677400112152, "learning_rate": 1.0136545300533984e-07, "loss": 0.0419, "step": 54438 }, { "epoch": 0.9640894862906046, "grad_norm": 0.6572622656822205, "learning_rate": 1.0126562432528241e-07, "loss": 0.0417, "step": 54439 }, { "epoch": 0.964107195827633, "grad_norm": 0.9711140394210815, "learning_rate": 1.0116584466055345e-07, "loss": 0.0529, "step": 54440 }, { "epoch": 0.9641249053646616, "grad_norm": 0.8144819736480713, "learning_rate": 1.0106611401148435e-07, "loss": 0.0541, "step": 54441 }, { "epoch": 0.96414261490169, "grad_norm": 0.7126541137695312, "learning_rate": 1.009664323783982e-07, "loss": 0.0476, "step": 54442 }, { "epoch": 0.9641603244387184, "grad_norm": 0.6742630004882812, "learning_rate": 1.0086679976162639e-07, "loss": 0.051, "step": 54443 }, { "epoch": 0.9641780339757468, "grad_norm": 0.386972576379776, "learning_rate": 1.0076721616149531e-07, "loss": 0.0494, "step": 54444 }, { "epoch": 0.9641957435127753, "grad_norm": 0.7527950406074524, "learning_rate": 1.0066768157833306e-07, "loss": 0.0456, "step": 54445 }, { "epoch": 0.9642134530498037, "grad_norm": 0.5390361547470093, "learning_rate": 1.0056819601246769e-07, "loss": 0.0612, "step": 54446 }, { "epoch": 0.9642311625868321, "grad_norm": 0.646514356136322, "learning_rate": 1.0046875946422563e-07, "loss": 0.0472, "step": 54447 }, { "epoch": 0.9642488721238605, "grad_norm": 0.22805242240428925, "learning_rate": 1.0036937193393491e-07, "loss": 0.052, "step": 54448 }, { "epoch": 0.964266581660889, "grad_norm": 0.30315306782722473, "learning_rate": 1.0027003342192198e-07, "loss": 0.0473, "step": 54449 }, { "epoch": 0.9642842911979174, "grad_norm": 0.6415720582008362, "learning_rate": 1.0017074392851321e-07, "loss": 0.0463, "step": 54450 }, { "epoch": 0.9643020007349458, "grad_norm": 0.4002981185913086, "learning_rate": 1.0007150345403671e-07, "loss": 0.0468, "step": 54451 }, { "epoch": 0.9643197102719742, "grad_norm": 0.4862237870693207, "learning_rate": 9.997231199881718e-08, "loss": 0.0375, "step": 54452 }, { "epoch": 0.9643374198090027, "grad_norm": 1.5074717998504639, "learning_rate": 9.987316956318104e-08, "loss": 0.042, "step": 54453 }, { "epoch": 0.9643551293460311, "grad_norm": 0.7366793155670166, "learning_rate": 9.977407614745471e-08, "loss": 0.0511, "step": 54454 }, { "epoch": 0.9643728388830595, "grad_norm": 0.6243178844451904, "learning_rate": 9.967503175196623e-08, "loss": 0.0658, "step": 54455 }, { "epoch": 0.964390548420088, "grad_norm": 0.430890828371048, "learning_rate": 9.957603637703872e-08, "loss": 0.0337, "step": 54456 }, { "epoch": 0.9644082579571164, "grad_norm": 0.5042861700057983, "learning_rate": 9.947709002299854e-08, "loss": 0.0472, "step": 54457 }, { "epoch": 0.9644259674941448, "grad_norm": 0.6658439040184021, "learning_rate": 9.937819269017379e-08, "loss": 0.064, "step": 54458 }, { "epoch": 0.9644436770311732, "grad_norm": 0.2495531588792801, "learning_rate": 9.927934437888586e-08, "loss": 0.0268, "step": 54459 }, { "epoch": 0.9644613865682017, "grad_norm": 0.4179067313671112, "learning_rate": 9.918054508946117e-08, "loss": 0.0438, "step": 54460 }, { "epoch": 0.9644790961052301, "grad_norm": 0.8212449550628662, "learning_rate": 9.908179482222613e-08, "loss": 0.0535, "step": 54461 }, { "epoch": 0.9644968056422585, "grad_norm": 0.5410963296890259, "learning_rate": 9.898309357750712e-08, "loss": 0.0516, "step": 54462 }, { "epoch": 0.9645145151792869, "grad_norm": 0.7901284694671631, "learning_rate": 9.888444135562391e-08, "loss": 0.0609, "step": 54463 }, { "epoch": 0.9645322247163154, "grad_norm": 0.6523035764694214, "learning_rate": 9.878583815690456e-08, "loss": 0.053, "step": 54464 }, { "epoch": 0.9645499342533438, "grad_norm": 0.6112050414085388, "learning_rate": 9.868728398167215e-08, "loss": 0.0713, "step": 54465 }, { "epoch": 0.9645676437903722, "grad_norm": 1.1770837306976318, "learning_rate": 9.858877883025141e-08, "loss": 0.0762, "step": 54466 }, { "epoch": 0.9645853533274006, "grad_norm": 0.5501044392585754, "learning_rate": 9.849032270296709e-08, "loss": 0.0311, "step": 54467 }, { "epoch": 0.9646030628644291, "grad_norm": 0.47518259286880493, "learning_rate": 9.83919156001406e-08, "loss": 0.0588, "step": 54468 }, { "epoch": 0.9646207724014575, "grad_norm": 0.7703143358230591, "learning_rate": 9.82935575221e-08, "loss": 0.0672, "step": 54469 }, { "epoch": 0.9646384819384859, "grad_norm": 0.380654513835907, "learning_rate": 9.819524846916505e-08, "loss": 0.0528, "step": 54470 }, { "epoch": 0.9646561914755144, "grad_norm": 0.4139431118965149, "learning_rate": 9.809698844166048e-08, "loss": 0.0521, "step": 54471 }, { "epoch": 0.9646739010125428, "grad_norm": 0.8319405317306519, "learning_rate": 9.799877743991104e-08, "loss": 0.0473, "step": 54472 }, { "epoch": 0.9646916105495712, "grad_norm": 0.5856063961982727, "learning_rate": 9.790061546423645e-08, "loss": 0.0475, "step": 54473 }, { "epoch": 0.9647093200865996, "grad_norm": 0.4674232304096222, "learning_rate": 9.780250251496147e-08, "loss": 0.0535, "step": 54474 }, { "epoch": 0.9647270296236281, "grad_norm": 0.5801594853401184, "learning_rate": 9.770443859241085e-08, "loss": 0.0465, "step": 54475 }, { "epoch": 0.9647447391606565, "grad_norm": 0.6604599356651306, "learning_rate": 9.760642369690432e-08, "loss": 0.0421, "step": 54476 }, { "epoch": 0.9647624486976849, "grad_norm": 0.4460245668888092, "learning_rate": 9.750845782876494e-08, "loss": 0.0511, "step": 54477 }, { "epoch": 0.9647801582347133, "grad_norm": 0.6588929891586304, "learning_rate": 9.741054098831582e-08, "loss": 0.0691, "step": 54478 }, { "epoch": 0.9647978677717418, "grad_norm": 0.9507951140403748, "learning_rate": 9.731267317587833e-08, "loss": 0.0601, "step": 54479 }, { "epoch": 0.9648155773087702, "grad_norm": 0.510344386100769, "learning_rate": 9.721485439177724e-08, "loss": 0.0478, "step": 54480 }, { "epoch": 0.9648332868457986, "grad_norm": 0.5389220714569092, "learning_rate": 9.711708463632896e-08, "loss": 0.0575, "step": 54481 }, { "epoch": 0.964850996382827, "grad_norm": 0.4349086284637451, "learning_rate": 9.701936390985988e-08, "loss": 0.0552, "step": 54482 }, { "epoch": 0.9648687059198555, "grad_norm": 0.4511837661266327, "learning_rate": 9.69216922126881e-08, "loss": 0.0675, "step": 54483 }, { "epoch": 0.9648864154568839, "grad_norm": 0.5405542254447937, "learning_rate": 9.682406954513667e-08, "loss": 0.0338, "step": 54484 }, { "epoch": 0.9649041249939123, "grad_norm": 0.8725873827934265, "learning_rate": 9.672649590752869e-08, "loss": 0.0422, "step": 54485 }, { "epoch": 0.9649218345309408, "grad_norm": 0.6596354842185974, "learning_rate": 9.662897130018056e-08, "loss": 0.0736, "step": 54486 }, { "epoch": 0.9649395440679692, "grad_norm": 0.6468281149864197, "learning_rate": 9.653149572341868e-08, "loss": 0.0494, "step": 54487 }, { "epoch": 0.9649572536049976, "grad_norm": 0.48235762119293213, "learning_rate": 9.643406917755948e-08, "loss": 0.0468, "step": 54488 }, { "epoch": 0.964974963142026, "grad_norm": 0.46394944190979004, "learning_rate": 9.633669166292436e-08, "loss": 0.0519, "step": 54489 }, { "epoch": 0.9649926726790545, "grad_norm": 1.1653718948364258, "learning_rate": 9.623936317983473e-08, "loss": 0.0684, "step": 54490 }, { "epoch": 0.9650103822160829, "grad_norm": 0.647918164730072, "learning_rate": 9.614208372861033e-08, "loss": 0.0501, "step": 54491 }, { "epoch": 0.9650280917531113, "grad_norm": 0.6003056764602661, "learning_rate": 9.604485330956924e-08, "loss": 0.0606, "step": 54492 }, { "epoch": 0.9650458012901397, "grad_norm": 0.9549034833908081, "learning_rate": 9.594767192303456e-08, "loss": 0.0609, "step": 54493 }, { "epoch": 0.9650635108271682, "grad_norm": 0.9259005188941956, "learning_rate": 9.585053956932433e-08, "loss": 0.0478, "step": 54494 }, { "epoch": 0.9650812203641966, "grad_norm": 0.6621429920196533, "learning_rate": 9.575345624875831e-08, "loss": 0.0311, "step": 54495 }, { "epoch": 0.965098929901225, "grad_norm": 0.412558913230896, "learning_rate": 9.565642196165625e-08, "loss": 0.0436, "step": 54496 }, { "epoch": 0.9651166394382534, "grad_norm": 0.8892053961753845, "learning_rate": 9.555943670833789e-08, "loss": 0.0675, "step": 54497 }, { "epoch": 0.9651343489752819, "grad_norm": 0.6583789587020874, "learning_rate": 9.546250048911964e-08, "loss": 0.0617, "step": 54498 }, { "epoch": 0.9651520585123103, "grad_norm": 0.320093035697937, "learning_rate": 9.536561330432292e-08, "loss": 0.0456, "step": 54499 }, { "epoch": 0.9651697680493387, "grad_norm": 0.5005850195884705, "learning_rate": 9.526877515426579e-08, "loss": 0.054, "step": 54500 }, { "epoch": 0.9651874775863672, "grad_norm": 0.5124083161354065, "learning_rate": 9.517198603926803e-08, "loss": 0.0537, "step": 54501 }, { "epoch": 0.9652051871233956, "grad_norm": 0.6295589208602905, "learning_rate": 9.5075245959646e-08, "loss": 0.0543, "step": 54502 }, { "epoch": 0.965222896660424, "grad_norm": 0.5225439667701721, "learning_rate": 9.497855491571783e-08, "loss": 0.0464, "step": 54503 }, { "epoch": 0.9652406061974524, "grad_norm": 0.5017364025115967, "learning_rate": 9.488191290780323e-08, "loss": 0.049, "step": 54504 }, { "epoch": 0.965258315734481, "grad_norm": 0.5043889284133911, "learning_rate": 9.47853199362203e-08, "loss": 0.049, "step": 54505 }, { "epoch": 0.9652760252715094, "grad_norm": 0.5530347228050232, "learning_rate": 9.468877600128546e-08, "loss": 0.0563, "step": 54506 }, { "epoch": 0.9652937348085378, "grad_norm": 0.2923831343650818, "learning_rate": 9.459228110331841e-08, "loss": 0.0355, "step": 54507 }, { "epoch": 0.9653114443455661, "grad_norm": 0.40873709321022034, "learning_rate": 9.449583524263395e-08, "loss": 0.0282, "step": 54508 }, { "epoch": 0.9653291538825947, "grad_norm": 0.753772497177124, "learning_rate": 9.439943841955012e-08, "loss": 0.0623, "step": 54509 }, { "epoch": 0.9653468634196231, "grad_norm": 0.7297279834747314, "learning_rate": 9.430309063438502e-08, "loss": 0.0681, "step": 54510 }, { "epoch": 0.9653645729566515, "grad_norm": 0.7336264848709106, "learning_rate": 9.420679188745507e-08, "loss": 0.0735, "step": 54511 }, { "epoch": 0.9653822824936799, "grad_norm": 0.540511429309845, "learning_rate": 9.411054217907832e-08, "loss": 0.0687, "step": 54512 }, { "epoch": 0.9653999920307084, "grad_norm": 0.5781465768814087, "learning_rate": 9.401434150956956e-08, "loss": 0.0511, "step": 54513 }, { "epoch": 0.9654177015677368, "grad_norm": 0.6911824345588684, "learning_rate": 9.391818987924517e-08, "loss": 0.0549, "step": 54514 }, { "epoch": 0.9654354111047652, "grad_norm": 0.29775673151016235, "learning_rate": 9.382208728842323e-08, "loss": 0.0395, "step": 54515 }, { "epoch": 0.9654531206417937, "grad_norm": 0.37331539392471313, "learning_rate": 9.372603373741851e-08, "loss": 0.0555, "step": 54516 }, { "epoch": 0.9654708301788221, "grad_norm": 0.3966100215911865, "learning_rate": 9.36300292265474e-08, "loss": 0.065, "step": 54517 }, { "epoch": 0.9654885397158505, "grad_norm": 0.457341730594635, "learning_rate": 9.353407375612466e-08, "loss": 0.0419, "step": 54518 }, { "epoch": 0.9655062492528789, "grad_norm": 0.6391999125480652, "learning_rate": 9.343816732647003e-08, "loss": 0.0466, "step": 54519 }, { "epoch": 0.9655239587899074, "grad_norm": 0.740253746509552, "learning_rate": 9.334230993789328e-08, "loss": 0.0663, "step": 54520 }, { "epoch": 0.9655416683269358, "grad_norm": 0.7069156765937805, "learning_rate": 9.324650159071412e-08, "loss": 0.0665, "step": 54521 }, { "epoch": 0.9655593778639642, "grad_norm": 0.34603556990623474, "learning_rate": 9.315074228524567e-08, "loss": 0.0508, "step": 54522 }, { "epoch": 0.9655770874009926, "grad_norm": 0.6999260783195496, "learning_rate": 9.305503202180266e-08, "loss": 0.0586, "step": 54523 }, { "epoch": 0.9655947969380211, "grad_norm": 0.8350076675415039, "learning_rate": 9.295937080070315e-08, "loss": 0.0564, "step": 54524 }, { "epoch": 0.9656125064750495, "grad_norm": 0.9486151933670044, "learning_rate": 9.286375862225694e-08, "loss": 0.0507, "step": 54525 }, { "epoch": 0.9656302160120779, "grad_norm": 0.6473945379257202, "learning_rate": 9.276819548678373e-08, "loss": 0.0535, "step": 54526 }, { "epoch": 0.9656479255491063, "grad_norm": 0.24326778948307037, "learning_rate": 9.267268139459328e-08, "loss": 0.042, "step": 54527 }, { "epoch": 0.9656656350861348, "grad_norm": 0.5174395442008972, "learning_rate": 9.257721634600369e-08, "loss": 0.0471, "step": 54528 }, { "epoch": 0.9656833446231632, "grad_norm": 0.5725148320198059, "learning_rate": 9.248180034132636e-08, "loss": 0.0447, "step": 54529 }, { "epoch": 0.9657010541601916, "grad_norm": 0.6293433904647827, "learning_rate": 9.238643338087604e-08, "loss": 0.0554, "step": 54530 }, { "epoch": 0.9657187636972201, "grad_norm": 0.8176918029785156, "learning_rate": 9.22911154649675e-08, "loss": 0.0599, "step": 54531 }, { "epoch": 0.9657364732342485, "grad_norm": 0.32726770639419556, "learning_rate": 9.219584659391045e-08, "loss": 0.0485, "step": 54532 }, { "epoch": 0.9657541827712769, "grad_norm": 0.5426756143569946, "learning_rate": 9.210062676802467e-08, "loss": 0.0671, "step": 54533 }, { "epoch": 0.9657718923083053, "grad_norm": 0.7470918297767639, "learning_rate": 9.200545598761823e-08, "loss": 0.0528, "step": 54534 }, { "epoch": 0.9657896018453338, "grad_norm": 0.651069700717926, "learning_rate": 9.191033425300588e-08, "loss": 0.0342, "step": 54535 }, { "epoch": 0.9658073113823622, "grad_norm": 0.7271209359169006, "learning_rate": 9.181526156450237e-08, "loss": 0.0688, "step": 54536 }, { "epoch": 0.9658250209193906, "grad_norm": 0.6726546287536621, "learning_rate": 9.172023792241747e-08, "loss": 0.0669, "step": 54537 }, { "epoch": 0.965842730456419, "grad_norm": 0.23735825717449188, "learning_rate": 9.162526332706423e-08, "loss": 0.0196, "step": 54538 }, { "epoch": 0.9658604399934475, "grad_norm": 0.7203468680381775, "learning_rate": 9.153033777875741e-08, "loss": 0.0771, "step": 54539 }, { "epoch": 0.9658781495304759, "grad_norm": 0.3281897008419037, "learning_rate": 9.143546127780677e-08, "loss": 0.0441, "step": 54540 }, { "epoch": 0.9658958590675043, "grad_norm": 0.28255990147590637, "learning_rate": 9.134063382452706e-08, "loss": 0.0539, "step": 54541 }, { "epoch": 0.9659135686045327, "grad_norm": 0.5974255204200745, "learning_rate": 9.124585541922802e-08, "loss": 0.0504, "step": 54542 }, { "epoch": 0.9659312781415612, "grad_norm": 0.8117036819458008, "learning_rate": 9.11511260622211e-08, "loss": 0.0711, "step": 54543 }, { "epoch": 0.9659489876785896, "grad_norm": 0.4037877023220062, "learning_rate": 9.1056445753821e-08, "loss": 0.0517, "step": 54544 }, { "epoch": 0.965966697215618, "grad_norm": 0.6282229423522949, "learning_rate": 9.096181449433583e-08, "loss": 0.0764, "step": 54545 }, { "epoch": 0.9659844067526465, "grad_norm": 0.24872174859046936, "learning_rate": 9.086723228407867e-08, "loss": 0.0172, "step": 54546 }, { "epoch": 0.9660021162896749, "grad_norm": 0.14533507823944092, "learning_rate": 9.077269912336095e-08, "loss": 0.0316, "step": 54547 }, { "epoch": 0.9660198258267033, "grad_norm": 0.5583797097206116, "learning_rate": 9.067821501249241e-08, "loss": 0.0432, "step": 54548 }, { "epoch": 0.9660375353637317, "grad_norm": 0.6338199973106384, "learning_rate": 9.058377995178446e-08, "loss": 0.0382, "step": 54549 }, { "epoch": 0.9660552449007602, "grad_norm": 0.6118683815002441, "learning_rate": 9.048939394154854e-08, "loss": 0.0492, "step": 54550 }, { "epoch": 0.9660729544377886, "grad_norm": 0.26133933663368225, "learning_rate": 9.039505698209272e-08, "loss": 0.0392, "step": 54551 }, { "epoch": 0.966090663974817, "grad_norm": 0.24178926646709442, "learning_rate": 9.030076907373175e-08, "loss": 0.0357, "step": 54552 }, { "epoch": 0.9661083735118454, "grad_norm": 0.5506497621536255, "learning_rate": 9.020653021677039e-08, "loss": 0.0681, "step": 54553 }, { "epoch": 0.9661260830488739, "grad_norm": 0.6349990963935852, "learning_rate": 9.011234041152505e-08, "loss": 0.0501, "step": 54554 }, { "epoch": 0.9661437925859023, "grad_norm": 0.6353957653045654, "learning_rate": 9.001819965829883e-08, "loss": 0.0297, "step": 54555 }, { "epoch": 0.9661615021229307, "grad_norm": 0.5831093788146973, "learning_rate": 8.992410795740647e-08, "loss": 0.0396, "step": 54556 }, { "epoch": 0.9661792116599591, "grad_norm": 0.17276334762573242, "learning_rate": 8.983006530915439e-08, "loss": 0.0332, "step": 54557 }, { "epoch": 0.9661969211969876, "grad_norm": 0.3671835958957672, "learning_rate": 8.973607171385567e-08, "loss": 0.0434, "step": 54558 }, { "epoch": 0.966214630734016, "grad_norm": 0.5866199731826782, "learning_rate": 8.964212717181508e-08, "loss": 0.0376, "step": 54559 }, { "epoch": 0.9662323402710444, "grad_norm": 0.6050878167152405, "learning_rate": 8.954823168334402e-08, "loss": 0.0736, "step": 54560 }, { "epoch": 0.9662500498080729, "grad_norm": 0.6515370011329651, "learning_rate": 8.945438524875226e-08, "loss": 0.0491, "step": 54561 }, { "epoch": 0.9662677593451013, "grad_norm": 1.165703296661377, "learning_rate": 8.936058786834623e-08, "loss": 0.0561, "step": 54562 }, { "epoch": 0.9662854688821297, "grad_norm": 0.863649308681488, "learning_rate": 8.926683954243564e-08, "loss": 0.0616, "step": 54563 }, { "epoch": 0.9663031784191581, "grad_norm": 0.39822328090667725, "learning_rate": 8.917314027133028e-08, "loss": 0.0516, "step": 54564 }, { "epoch": 0.9663208879561866, "grad_norm": 0.49914616346359253, "learning_rate": 8.907949005533656e-08, "loss": 0.064, "step": 54565 }, { "epoch": 0.966338597493215, "grad_norm": 0.30354881286621094, "learning_rate": 8.898588889476256e-08, "loss": 0.0413, "step": 54566 }, { "epoch": 0.9663563070302434, "grad_norm": 0.8211790919303894, "learning_rate": 8.889233678991803e-08, "loss": 0.0501, "step": 54567 }, { "epoch": 0.9663740165672718, "grad_norm": 0.4468359649181366, "learning_rate": 8.879883374110942e-08, "loss": 0.0397, "step": 54568 }, { "epoch": 0.9663917261043004, "grad_norm": 0.5680283308029175, "learning_rate": 8.870537974864312e-08, "loss": 0.038, "step": 54569 }, { "epoch": 0.9664094356413288, "grad_norm": 0.23197585344314575, "learning_rate": 8.86119748128289e-08, "loss": 0.0489, "step": 54570 }, { "epoch": 0.9664271451783571, "grad_norm": 0.39639607071876526, "learning_rate": 8.851861893397151e-08, "loss": 0.0535, "step": 54571 }, { "epoch": 0.9664448547153855, "grad_norm": 0.645939290523529, "learning_rate": 8.842531211238237e-08, "loss": 0.0781, "step": 54572 }, { "epoch": 0.9664625642524141, "grad_norm": 0.3784724771976471, "learning_rate": 8.833205434836456e-08, "loss": 0.0509, "step": 54573 }, { "epoch": 0.9664802737894425, "grad_norm": 0.38062554597854614, "learning_rate": 8.823884564222617e-08, "loss": 0.0584, "step": 54574 }, { "epoch": 0.9664979833264709, "grad_norm": 0.9524561762809753, "learning_rate": 8.814568599427363e-08, "loss": 0.0662, "step": 54575 }, { "epoch": 0.9665156928634994, "grad_norm": 0.7325955033302307, "learning_rate": 8.805257540481337e-08, "loss": 0.0814, "step": 54576 }, { "epoch": 0.9665334024005278, "grad_norm": 0.6559839248657227, "learning_rate": 8.79595138741518e-08, "loss": 0.0852, "step": 54577 }, { "epoch": 0.9665511119375562, "grad_norm": 0.5906270742416382, "learning_rate": 8.786650140259533e-08, "loss": 0.0474, "step": 54578 }, { "epoch": 0.9665688214745846, "grad_norm": 0.3616287112236023, "learning_rate": 8.777353799044873e-08, "loss": 0.0665, "step": 54579 }, { "epoch": 0.9665865310116131, "grad_norm": 0.5001258850097656, "learning_rate": 8.76806236380201e-08, "loss": 0.0492, "step": 54580 }, { "epoch": 0.9666042405486415, "grad_norm": 0.3668493926525116, "learning_rate": 8.758775834561416e-08, "loss": 0.0445, "step": 54581 }, { "epoch": 0.9666219500856699, "grad_norm": 0.33798879384994507, "learning_rate": 8.74949421135357e-08, "loss": 0.0402, "step": 54582 }, { "epoch": 0.9666396596226983, "grad_norm": 0.4819510877132416, "learning_rate": 8.740217494209113e-08, "loss": 0.0621, "step": 54583 }, { "epoch": 0.9666573691597268, "grad_norm": 0.49952733516693115, "learning_rate": 8.73094568315852e-08, "loss": 0.0398, "step": 54584 }, { "epoch": 0.9666750786967552, "grad_norm": 0.65826416015625, "learning_rate": 8.721678778232101e-08, "loss": 0.0463, "step": 54585 }, { "epoch": 0.9666927882337836, "grad_norm": 0.8629919290542603, "learning_rate": 8.71241677946083e-08, "loss": 0.0624, "step": 54586 }, { "epoch": 0.966710497770812, "grad_norm": 0.5158237218856812, "learning_rate": 8.703159686874685e-08, "loss": 0.0535, "step": 54587 }, { "epoch": 0.9667282073078405, "grad_norm": 0.41683417558670044, "learning_rate": 8.693907500504305e-08, "loss": 0.059, "step": 54588 }, { "epoch": 0.9667459168448689, "grad_norm": 0.6322031617164612, "learning_rate": 8.684660220380003e-08, "loss": 0.0439, "step": 54589 }, { "epoch": 0.9667636263818973, "grad_norm": 0.7240686416625977, "learning_rate": 8.675417846532586e-08, "loss": 0.0558, "step": 54590 }, { "epoch": 0.9667813359189258, "grad_norm": 0.509263277053833, "learning_rate": 8.666180378992028e-08, "loss": 0.0483, "step": 54591 }, { "epoch": 0.9667990454559542, "grad_norm": 0.985753059387207, "learning_rate": 8.656947817788974e-08, "loss": 0.0727, "step": 54592 }, { "epoch": 0.9668167549929826, "grad_norm": 0.416517049074173, "learning_rate": 8.647720162953732e-08, "loss": 0.0494, "step": 54593 }, { "epoch": 0.966834464530011, "grad_norm": 0.349555104970932, "learning_rate": 8.638497414516611e-08, "loss": 0.0449, "step": 54594 }, { "epoch": 0.9668521740670395, "grad_norm": 0.23946775496006012, "learning_rate": 8.629279572508086e-08, "loss": 0.0508, "step": 54595 }, { "epoch": 0.9668698836040679, "grad_norm": 0.46062469482421875, "learning_rate": 8.620066636958301e-08, "loss": 0.0293, "step": 54596 }, { "epoch": 0.9668875931410963, "grad_norm": 0.9067901968955994, "learning_rate": 8.610858607897731e-08, "loss": 0.072, "step": 54597 }, { "epoch": 0.9669053026781247, "grad_norm": 0.6924030184745789, "learning_rate": 8.601655485356352e-08, "loss": 0.0621, "step": 54598 }, { "epoch": 0.9669230122151532, "grad_norm": 0.4130571782588959, "learning_rate": 8.592457269364806e-08, "loss": 0.0502, "step": 54599 }, { "epoch": 0.9669407217521816, "grad_norm": 0.6494779586791992, "learning_rate": 8.583263959953402e-08, "loss": 0.047, "step": 54600 }, { "epoch": 0.96695843128921, "grad_norm": 0.6358806490898132, "learning_rate": 8.574075557152117e-08, "loss": 0.0291, "step": 54601 }, { "epoch": 0.9669761408262384, "grad_norm": 0.6482139825820923, "learning_rate": 8.564892060991092e-08, "loss": 0.0639, "step": 54602 }, { "epoch": 0.9669938503632669, "grad_norm": 0.4661094546318054, "learning_rate": 8.555713471500969e-08, "loss": 0.0368, "step": 54603 }, { "epoch": 0.9670115599002953, "grad_norm": 0.9387749433517456, "learning_rate": 8.546539788711561e-08, "loss": 0.0781, "step": 54604 }, { "epoch": 0.9670292694373237, "grad_norm": 0.37800249457359314, "learning_rate": 8.537371012653338e-08, "loss": 0.0436, "step": 54605 }, { "epoch": 0.9670469789743522, "grad_norm": 0.6571308970451355, "learning_rate": 8.528207143356115e-08, "loss": 0.0494, "step": 54606 }, { "epoch": 0.9670646885113806, "grad_norm": 0.8127606511116028, "learning_rate": 8.519048180850364e-08, "loss": 0.0595, "step": 54607 }, { "epoch": 0.967082398048409, "grad_norm": 0.6361210942268372, "learning_rate": 8.509894125166062e-08, "loss": 0.0746, "step": 54608 }, { "epoch": 0.9671001075854374, "grad_norm": 0.8447361588478088, "learning_rate": 8.500744976333352e-08, "loss": 0.0605, "step": 54609 }, { "epoch": 0.9671178171224659, "grad_norm": 1.194366693496704, "learning_rate": 8.49160073438221e-08, "loss": 0.0839, "step": 54610 }, { "epoch": 0.9671355266594943, "grad_norm": 0.6518704295158386, "learning_rate": 8.482461399343111e-08, "loss": 0.0799, "step": 54611 }, { "epoch": 0.9671532361965227, "grad_norm": 0.5231149196624756, "learning_rate": 8.473326971245698e-08, "loss": 0.0409, "step": 54612 }, { "epoch": 0.9671709457335511, "grad_norm": 0.4149947762489319, "learning_rate": 8.464197450120114e-08, "loss": 0.0319, "step": 54613 }, { "epoch": 0.9671886552705796, "grad_norm": 0.5018342137336731, "learning_rate": 8.455072835996503e-08, "loss": 0.0525, "step": 54614 }, { "epoch": 0.967206364807608, "grad_norm": 0.6066031455993652, "learning_rate": 8.445953128905004e-08, "loss": 0.0599, "step": 54615 }, { "epoch": 0.9672240743446364, "grad_norm": 0.676730751991272, "learning_rate": 8.436838328875262e-08, "loss": 0.0509, "step": 54616 }, { "epoch": 0.9672417838816648, "grad_norm": 1.0144226551055908, "learning_rate": 8.427728435937421e-08, "loss": 0.0515, "step": 54617 }, { "epoch": 0.9672594934186933, "grad_norm": 0.516858696937561, "learning_rate": 8.418623450121621e-08, "loss": 0.0444, "step": 54618 }, { "epoch": 0.9672772029557217, "grad_norm": 0.5953471660614014, "learning_rate": 8.409523371457673e-08, "loss": 0.046, "step": 54619 }, { "epoch": 0.9672949124927501, "grad_norm": 0.2907501757144928, "learning_rate": 8.400428199975552e-08, "loss": 0.0378, "step": 54620 }, { "epoch": 0.9673126220297786, "grad_norm": 0.40306076407432556, "learning_rate": 8.391337935705068e-08, "loss": 0.045, "step": 54621 }, { "epoch": 0.967330331566807, "grad_norm": 1.0801194906234741, "learning_rate": 8.382252578676363e-08, "loss": 0.0781, "step": 54622 }, { "epoch": 0.9673480411038354, "grad_norm": 0.49039193987846375, "learning_rate": 8.373172128918915e-08, "loss": 0.0456, "step": 54623 }, { "epoch": 0.9673657506408638, "grad_norm": 0.5925212502479553, "learning_rate": 8.364096586463033e-08, "loss": 0.0362, "step": 54624 }, { "epoch": 0.9673834601778923, "grad_norm": 0.603331983089447, "learning_rate": 8.355025951338524e-08, "loss": 0.0568, "step": 54625 }, { "epoch": 0.9674011697149207, "grad_norm": 0.5752996206283569, "learning_rate": 8.345960223574866e-08, "loss": 0.0562, "step": 54626 }, { "epoch": 0.9674188792519491, "grad_norm": 0.4368322193622589, "learning_rate": 8.336899403202203e-08, "loss": 0.0586, "step": 54627 }, { "epoch": 0.9674365887889775, "grad_norm": 0.7782881259918213, "learning_rate": 8.327843490250341e-08, "loss": 0.0437, "step": 54628 }, { "epoch": 0.967454298326006, "grad_norm": 0.7414345145225525, "learning_rate": 8.318792484748927e-08, "loss": 0.0782, "step": 54629 }, { "epoch": 0.9674720078630344, "grad_norm": 0.4757167100906372, "learning_rate": 8.309746386727934e-08, "loss": 0.0537, "step": 54630 }, { "epoch": 0.9674897174000628, "grad_norm": 0.39844292402267456, "learning_rate": 8.30070519621684e-08, "loss": 0.0379, "step": 54631 }, { "epoch": 0.9675074269370912, "grad_norm": 0.6255101561546326, "learning_rate": 8.291668913245787e-08, "loss": 0.0392, "step": 54632 }, { "epoch": 0.9675251364741198, "grad_norm": 0.5838735699653625, "learning_rate": 8.282637537844085e-08, "loss": 0.0447, "step": 54633 }, { "epoch": 0.9675428460111481, "grad_norm": 0.2666074335575104, "learning_rate": 8.273611070041542e-08, "loss": 0.0215, "step": 54634 }, { "epoch": 0.9675605555481765, "grad_norm": 0.9083022475242615, "learning_rate": 8.264589509868136e-08, "loss": 0.0652, "step": 54635 }, { "epoch": 0.9675782650852051, "grad_norm": 0.46249935030937195, "learning_rate": 8.255572857353177e-08, "loss": 0.0529, "step": 54636 }, { "epoch": 0.9675959746222335, "grad_norm": 0.5050856471061707, "learning_rate": 8.24656111252664e-08, "loss": 0.0691, "step": 54637 }, { "epoch": 0.9676136841592619, "grad_norm": 0.7328581213951111, "learning_rate": 8.237554275417835e-08, "loss": 0.0427, "step": 54638 }, { "epoch": 0.9676313936962903, "grad_norm": 0.6060830950737, "learning_rate": 8.228552346056906e-08, "loss": 0.0536, "step": 54639 }, { "epoch": 0.9676491032333188, "grad_norm": 0.4379855990409851, "learning_rate": 8.219555324472994e-08, "loss": 0.0462, "step": 54640 }, { "epoch": 0.9676668127703472, "grad_norm": 0.5071396231651306, "learning_rate": 8.210563210695743e-08, "loss": 0.0369, "step": 54641 }, { "epoch": 0.9676845223073756, "grad_norm": 0.6896289587020874, "learning_rate": 8.201576004754962e-08, "loss": 0.0706, "step": 54642 }, { "epoch": 0.967702231844404, "grad_norm": 0.6131548285484314, "learning_rate": 8.192593706680129e-08, "loss": 0.0291, "step": 54643 }, { "epoch": 0.9677199413814325, "grad_norm": 0.5077244639396667, "learning_rate": 8.18361631650072e-08, "loss": 0.0544, "step": 54644 }, { "epoch": 0.9677376509184609, "grad_norm": 0.6368948817253113, "learning_rate": 8.174643834246376e-08, "loss": 0.0546, "step": 54645 }, { "epoch": 0.9677553604554893, "grad_norm": 0.6698891520500183, "learning_rate": 8.165676259946576e-08, "loss": 0.0635, "step": 54646 }, { "epoch": 0.9677730699925178, "grad_norm": 0.7302291989326477, "learning_rate": 8.156713593630794e-08, "loss": 0.0367, "step": 54647 }, { "epoch": 0.9677907795295462, "grad_norm": 0.7161552309989929, "learning_rate": 8.147755835328508e-08, "loss": 0.0511, "step": 54648 }, { "epoch": 0.9678084890665746, "grad_norm": 0.5835979580879211, "learning_rate": 8.138802985069193e-08, "loss": 0.0541, "step": 54649 }, { "epoch": 0.967826198603603, "grad_norm": 0.77508544921875, "learning_rate": 8.129855042882495e-08, "loss": 0.0561, "step": 54650 }, { "epoch": 0.9678439081406315, "grad_norm": 0.5266026258468628, "learning_rate": 8.120912008797554e-08, "loss": 0.0428, "step": 54651 }, { "epoch": 0.9678616176776599, "grad_norm": 0.626305878162384, "learning_rate": 8.111973882843849e-08, "loss": 0.0679, "step": 54652 }, { "epoch": 0.9678793272146883, "grad_norm": 0.530467689037323, "learning_rate": 8.103040665050854e-08, "loss": 0.0389, "step": 54653 }, { "epoch": 0.9678970367517167, "grad_norm": 0.5621276497840881, "learning_rate": 8.094112355448047e-08, "loss": 0.0386, "step": 54654 }, { "epoch": 0.9679147462887452, "grad_norm": 0.7546732425689697, "learning_rate": 8.085188954064738e-08, "loss": 0.0552, "step": 54655 }, { "epoch": 0.9679324558257736, "grad_norm": 0.8294627666473389, "learning_rate": 8.076270460930069e-08, "loss": 0.0592, "step": 54656 }, { "epoch": 0.967950165362802, "grad_norm": 0.2965046763420105, "learning_rate": 8.067356876073851e-08, "loss": 0.0475, "step": 54657 }, { "epoch": 0.9679678748998304, "grad_norm": 0.7423866391181946, "learning_rate": 8.058448199524893e-08, "loss": 0.0447, "step": 54658 }, { "epoch": 0.9679855844368589, "grad_norm": 0.5934613347053528, "learning_rate": 8.049544431312839e-08, "loss": 0.0341, "step": 54659 }, { "epoch": 0.9680032939738873, "grad_norm": 0.8789088129997253, "learning_rate": 8.040645571466998e-08, "loss": 0.0394, "step": 54660 }, { "epoch": 0.9680210035109157, "grad_norm": 1.3334380388259888, "learning_rate": 8.031751620016348e-08, "loss": 0.0865, "step": 54661 }, { "epoch": 0.9680387130479442, "grad_norm": 0.7010683417320251, "learning_rate": 8.022862576990364e-08, "loss": 0.0633, "step": 54662 }, { "epoch": 0.9680564225849726, "grad_norm": 0.7304130792617798, "learning_rate": 8.013978442418358e-08, "loss": 0.0525, "step": 54663 }, { "epoch": 0.968074132122001, "grad_norm": 0.5210750699043274, "learning_rate": 8.005099216329304e-08, "loss": 0.0552, "step": 54664 }, { "epoch": 0.9680918416590294, "grad_norm": 0.5455872416496277, "learning_rate": 7.996224898752679e-08, "loss": 0.0226, "step": 54665 }, { "epoch": 0.9681095511960579, "grad_norm": 0.570910632610321, "learning_rate": 7.987355489717463e-08, "loss": 0.0423, "step": 54666 }, { "epoch": 0.9681272607330863, "grad_norm": 0.4874800741672516, "learning_rate": 7.978490989253129e-08, "loss": 0.0691, "step": 54667 }, { "epoch": 0.9681449702701147, "grad_norm": 0.3635445535182953, "learning_rate": 7.969631397388488e-08, "loss": 0.0615, "step": 54668 }, { "epoch": 0.9681626798071431, "grad_norm": 0.5441648960113525, "learning_rate": 7.960776714153018e-08, "loss": 0.0384, "step": 54669 }, { "epoch": 0.9681803893441716, "grad_norm": 0.5625121593475342, "learning_rate": 7.951926939575527e-08, "loss": 0.0486, "step": 54670 }, { "epoch": 0.9681980988812, "grad_norm": 0.4372404217720032, "learning_rate": 7.943082073685326e-08, "loss": 0.0424, "step": 54671 }, { "epoch": 0.9682158084182284, "grad_norm": 0.8191574811935425, "learning_rate": 7.934242116511392e-08, "loss": 0.0555, "step": 54672 }, { "epoch": 0.9682335179552568, "grad_norm": 0.6939843893051147, "learning_rate": 7.925407068083034e-08, "loss": 0.0386, "step": 54673 }, { "epoch": 0.9682512274922853, "grad_norm": 0.5007637143135071, "learning_rate": 7.916576928429064e-08, "loss": 0.0667, "step": 54674 }, { "epoch": 0.9682689370293137, "grad_norm": 0.5713555216789246, "learning_rate": 7.907751697578625e-08, "loss": 0.0551, "step": 54675 }, { "epoch": 0.9682866465663421, "grad_norm": 0.223445862531662, "learning_rate": 7.898931375560858e-08, "loss": 0.0451, "step": 54676 }, { "epoch": 0.9683043561033706, "grad_norm": 0.3723292350769043, "learning_rate": 7.890115962404576e-08, "loss": 0.0366, "step": 54677 }, { "epoch": 0.968322065640399, "grad_norm": 0.45256882905960083, "learning_rate": 7.881305458139087e-08, "loss": 0.0427, "step": 54678 }, { "epoch": 0.9683397751774274, "grad_norm": 0.44175782799720764, "learning_rate": 7.872499862793037e-08, "loss": 0.0491, "step": 54679 }, { "epoch": 0.9683574847144558, "grad_norm": 0.9570667743682861, "learning_rate": 7.863699176395566e-08, "loss": 0.0716, "step": 54680 }, { "epoch": 0.9683751942514843, "grad_norm": 0.3976995646953583, "learning_rate": 7.854903398975654e-08, "loss": 0.0424, "step": 54681 }, { "epoch": 0.9683929037885127, "grad_norm": 0.40762314200401306, "learning_rate": 7.84611253056211e-08, "loss": 0.0637, "step": 54682 }, { "epoch": 0.9684106133255411, "grad_norm": 0.496850848197937, "learning_rate": 7.837326571184078e-08, "loss": 0.0488, "step": 54683 }, { "epoch": 0.9684283228625695, "grad_norm": 0.580102801322937, "learning_rate": 7.828545520870201e-08, "loss": 0.0412, "step": 54684 }, { "epoch": 0.968446032399598, "grad_norm": 0.6271683573722839, "learning_rate": 7.819769379649288e-08, "loss": 0.0366, "step": 54685 }, { "epoch": 0.9684637419366264, "grad_norm": 0.5297700762748718, "learning_rate": 7.810998147550652e-08, "loss": 0.0339, "step": 54686 }, { "epoch": 0.9684814514736548, "grad_norm": 0.41816508769989014, "learning_rate": 7.802231824602935e-08, "loss": 0.0726, "step": 54687 }, { "epoch": 0.9684991610106832, "grad_norm": 0.9389750361442566, "learning_rate": 7.793470410834946e-08, "loss": 0.0577, "step": 54688 }, { "epoch": 0.9685168705477117, "grad_norm": 0.47604018449783325, "learning_rate": 7.784713906275497e-08, "loss": 0.0479, "step": 54689 }, { "epoch": 0.9685345800847401, "grad_norm": 0.5822592377662659, "learning_rate": 7.775962310953233e-08, "loss": 0.0443, "step": 54690 }, { "epoch": 0.9685522896217685, "grad_norm": 0.5467196106910706, "learning_rate": 7.767215624897295e-08, "loss": 0.0836, "step": 54691 }, { "epoch": 0.968569999158797, "grad_norm": 0.6607233285903931, "learning_rate": 7.75847384813616e-08, "loss": 0.0444, "step": 54692 }, { "epoch": 0.9685877086958254, "grad_norm": 0.6219402551651001, "learning_rate": 7.749736980698807e-08, "loss": 0.0744, "step": 54693 }, { "epoch": 0.9686054182328538, "grad_norm": 0.4076291024684906, "learning_rate": 7.741005022613879e-08, "loss": 0.0305, "step": 54694 }, { "epoch": 0.9686231277698822, "grad_norm": 0.4270839989185333, "learning_rate": 7.732277973910184e-08, "loss": 0.0617, "step": 54695 }, { "epoch": 0.9686408373069108, "grad_norm": 0.5891066193580627, "learning_rate": 7.723555834616369e-08, "loss": 0.0628, "step": 54696 }, { "epoch": 0.9686585468439391, "grad_norm": 0.28135377168655396, "learning_rate": 7.71483860476091e-08, "loss": 0.0547, "step": 54697 }, { "epoch": 0.9686762563809675, "grad_norm": 0.9367096424102783, "learning_rate": 7.70612628437295e-08, "loss": 0.0565, "step": 54698 }, { "epoch": 0.968693965917996, "grad_norm": 0.5918601155281067, "learning_rate": 7.697418873480633e-08, "loss": 0.0462, "step": 54699 }, { "epoch": 0.9687116754550245, "grad_norm": 0.5602195858955383, "learning_rate": 7.688716372113103e-08, "loss": 0.052, "step": 54700 }, { "epoch": 0.9687293849920529, "grad_norm": 0.5501986742019653, "learning_rate": 7.68001878029867e-08, "loss": 0.0505, "step": 54701 }, { "epoch": 0.9687470945290813, "grad_norm": 0.1655363142490387, "learning_rate": 7.671326098065978e-08, "loss": 0.0333, "step": 54702 }, { "epoch": 0.9687648040661097, "grad_norm": 0.8297324180603027, "learning_rate": 7.662638325443839e-08, "loss": 0.0579, "step": 54703 }, { "epoch": 0.9687825136031382, "grad_norm": 0.28776639699935913, "learning_rate": 7.65395546246056e-08, "loss": 0.0323, "step": 54704 }, { "epoch": 0.9688002231401666, "grad_norm": 0.7618363499641418, "learning_rate": 7.645277509144788e-08, "loss": 0.0718, "step": 54705 }, { "epoch": 0.968817932677195, "grad_norm": 0.5485036373138428, "learning_rate": 7.636604465525165e-08, "loss": 0.0479, "step": 54706 }, { "epoch": 0.9688356422142235, "grad_norm": 0.44577786326408386, "learning_rate": 7.62793633163017e-08, "loss": 0.0581, "step": 54707 }, { "epoch": 0.9688533517512519, "grad_norm": 0.3952772915363312, "learning_rate": 7.619273107488278e-08, "loss": 0.0326, "step": 54708 }, { "epoch": 0.9688710612882803, "grad_norm": 0.9585800170898438, "learning_rate": 7.610614793128134e-08, "loss": 0.0592, "step": 54709 }, { "epoch": 0.9688887708253087, "grad_norm": 0.6218886971473694, "learning_rate": 7.60196138857805e-08, "loss": 0.0546, "step": 54710 }, { "epoch": 0.9689064803623372, "grad_norm": 0.710944652557373, "learning_rate": 7.593312893866666e-08, "loss": 0.0587, "step": 54711 }, { "epoch": 0.9689241898993656, "grad_norm": 0.6575601100921631, "learning_rate": 7.584669309022129e-08, "loss": 0.0448, "step": 54712 }, { "epoch": 0.968941899436394, "grad_norm": 0.3668593168258667, "learning_rate": 7.57603063407325e-08, "loss": 0.0424, "step": 54713 }, { "epoch": 0.9689596089734224, "grad_norm": 0.4501402974128723, "learning_rate": 7.567396869048171e-08, "loss": 0.0655, "step": 54714 }, { "epoch": 0.9689773185104509, "grad_norm": 0.7618653178215027, "learning_rate": 7.558768013975537e-08, "loss": 0.0634, "step": 54715 }, { "epoch": 0.9689950280474793, "grad_norm": 0.5454624891281128, "learning_rate": 7.550144068883491e-08, "loss": 0.0701, "step": 54716 }, { "epoch": 0.9690127375845077, "grad_norm": 0.8015183806419373, "learning_rate": 7.541525033800512e-08, "loss": 0.0669, "step": 54717 }, { "epoch": 0.9690304471215361, "grad_norm": 0.8197290897369385, "learning_rate": 7.532910908755075e-08, "loss": 0.0451, "step": 54718 }, { "epoch": 0.9690481566585646, "grad_norm": 0.7292099595069885, "learning_rate": 7.524301693775326e-08, "loss": 0.0682, "step": 54719 }, { "epoch": 0.969065866195593, "grad_norm": 0.6111737489700317, "learning_rate": 7.515697388889742e-08, "loss": 0.0593, "step": 54720 }, { "epoch": 0.9690835757326214, "grad_norm": 0.577315628528595, "learning_rate": 7.507097994126633e-08, "loss": 0.0262, "step": 54721 }, { "epoch": 0.9691012852696499, "grad_norm": 0.3132065236568451, "learning_rate": 7.498503509513976e-08, "loss": 0.0396, "step": 54722 }, { "epoch": 0.9691189948066783, "grad_norm": 0.616328239440918, "learning_rate": 7.489913935080251e-08, "loss": 0.0849, "step": 54723 }, { "epoch": 0.9691367043437067, "grad_norm": 0.7636467218399048, "learning_rate": 7.481329270853931e-08, "loss": 0.0423, "step": 54724 }, { "epoch": 0.9691544138807351, "grad_norm": 0.6866546273231506, "learning_rate": 7.472749516863165e-08, "loss": 0.0811, "step": 54725 }, { "epoch": 0.9691721234177636, "grad_norm": 1.0304853916168213, "learning_rate": 7.464174673135926e-08, "loss": 0.0687, "step": 54726 }, { "epoch": 0.969189832954792, "grad_norm": 0.5279222130775452, "learning_rate": 7.455604739700694e-08, "loss": 0.0653, "step": 54727 }, { "epoch": 0.9692075424918204, "grad_norm": 0.5571911334991455, "learning_rate": 7.447039716585447e-08, "loss": 0.0494, "step": 54728 }, { "epoch": 0.9692252520288488, "grad_norm": 0.6696692109107971, "learning_rate": 7.438479603818494e-08, "loss": 0.0523, "step": 54729 }, { "epoch": 0.9692429615658773, "grad_norm": 0.6415078639984131, "learning_rate": 7.429924401428146e-08, "loss": 0.0778, "step": 54730 }, { "epoch": 0.9692606711029057, "grad_norm": 0.5184906125068665, "learning_rate": 7.421374109442214e-08, "loss": 0.065, "step": 54731 }, { "epoch": 0.9692783806399341, "grad_norm": 0.6926127672195435, "learning_rate": 7.41282872788901e-08, "loss": 0.0825, "step": 54732 }, { "epoch": 0.9692960901769625, "grad_norm": 0.408226877450943, "learning_rate": 7.404288256796676e-08, "loss": 0.04, "step": 54733 }, { "epoch": 0.969313799713991, "grad_norm": 0.5478594899177551, "learning_rate": 7.395752696193192e-08, "loss": 0.0472, "step": 54734 }, { "epoch": 0.9693315092510194, "grad_norm": 1.1503283977508545, "learning_rate": 7.387222046106868e-08, "loss": 0.0725, "step": 54735 }, { "epoch": 0.9693492187880478, "grad_norm": 0.6135846376419067, "learning_rate": 7.378696306565513e-08, "loss": 0.0619, "step": 54736 }, { "epoch": 0.9693669283250763, "grad_norm": 0.40839245915412903, "learning_rate": 7.370175477597273e-08, "loss": 0.0506, "step": 54737 }, { "epoch": 0.9693846378621047, "grad_norm": 0.5697280764579773, "learning_rate": 7.361659559230128e-08, "loss": 0.0455, "step": 54738 }, { "epoch": 0.9694023473991331, "grad_norm": 0.44425973296165466, "learning_rate": 7.353148551492217e-08, "loss": 0.0274, "step": 54739 }, { "epoch": 0.9694200569361615, "grad_norm": 0.5476217269897461, "learning_rate": 7.344642454411521e-08, "loss": 0.0562, "step": 54740 }, { "epoch": 0.96943776647319, "grad_norm": 0.6660053133964539, "learning_rate": 7.33614126801585e-08, "loss": 0.0434, "step": 54741 }, { "epoch": 0.9694554760102184, "grad_norm": 0.8891292810440063, "learning_rate": 7.327644992333349e-08, "loss": 0.0294, "step": 54742 }, { "epoch": 0.9694731855472468, "grad_norm": 0.5323050022125244, "learning_rate": 7.319153627391994e-08, "loss": 0.0354, "step": 54743 }, { "epoch": 0.9694908950842752, "grad_norm": 0.6252221465110779, "learning_rate": 7.310667173219598e-08, "loss": 0.059, "step": 54744 }, { "epoch": 0.9695086046213037, "grad_norm": 0.604363739490509, "learning_rate": 7.302185629844138e-08, "loss": 0.0329, "step": 54745 }, { "epoch": 0.9695263141583321, "grad_norm": 0.642369270324707, "learning_rate": 7.293708997293592e-08, "loss": 0.0418, "step": 54746 }, { "epoch": 0.9695440236953605, "grad_norm": 0.5266346335411072, "learning_rate": 7.285237275595602e-08, "loss": 0.0605, "step": 54747 }, { "epoch": 0.9695617332323889, "grad_norm": 0.8525661826133728, "learning_rate": 7.276770464778316e-08, "loss": 0.0724, "step": 54748 }, { "epoch": 0.9695794427694174, "grad_norm": 0.2313433587551117, "learning_rate": 7.268308564869541e-08, "loss": 0.0271, "step": 54749 }, { "epoch": 0.9695971523064458, "grad_norm": 0.26373669505119324, "learning_rate": 7.259851575896925e-08, "loss": 0.0385, "step": 54750 }, { "epoch": 0.9696148618434742, "grad_norm": 0.26826754212379456, "learning_rate": 7.251399497888444e-08, "loss": 0.0199, "step": 54751 }, { "epoch": 0.9696325713805027, "grad_norm": 0.7917393445968628, "learning_rate": 7.242952330871743e-08, "loss": 0.0572, "step": 54752 }, { "epoch": 0.9696502809175311, "grad_norm": 0.37896236777305603, "learning_rate": 7.234510074874967e-08, "loss": 0.0274, "step": 54753 }, { "epoch": 0.9696679904545595, "grad_norm": 0.35704275965690613, "learning_rate": 7.226072729925592e-08, "loss": 0.0556, "step": 54754 }, { "epoch": 0.9696856999915879, "grad_norm": 0.6740049123764038, "learning_rate": 7.21764029605143e-08, "loss": 0.037, "step": 54755 }, { "epoch": 0.9697034095286164, "grad_norm": 0.45214608311653137, "learning_rate": 7.209212773280294e-08, "loss": 0.0338, "step": 54756 }, { "epoch": 0.9697211190656448, "grad_norm": 0.5213009119033813, "learning_rate": 7.200790161639825e-08, "loss": 0.0639, "step": 54757 }, { "epoch": 0.9697388286026732, "grad_norm": 0.6775357127189636, "learning_rate": 7.192372461157836e-08, "loss": 0.0521, "step": 54758 }, { "epoch": 0.9697565381397016, "grad_norm": 0.7729012966156006, "learning_rate": 7.183959671861806e-08, "loss": 0.059, "step": 54759 }, { "epoch": 0.9697742476767302, "grad_norm": 0.8719190955162048, "learning_rate": 7.175551793779711e-08, "loss": 0.0403, "step": 54760 }, { "epoch": 0.9697919572137585, "grad_norm": 0.40438583493232727, "learning_rate": 7.167148826938863e-08, "loss": 0.0584, "step": 54761 }, { "epoch": 0.969809666750787, "grad_norm": 0.6475053429603577, "learning_rate": 7.158750771367239e-08, "loss": 0.0462, "step": 54762 }, { "epoch": 0.9698273762878153, "grad_norm": 0.6227352023124695, "learning_rate": 7.150357627092318e-08, "loss": 0.0455, "step": 54763 }, { "epoch": 0.9698450858248439, "grad_norm": 0.43045613169670105, "learning_rate": 7.141969394141912e-08, "loss": 0.0425, "step": 54764 }, { "epoch": 0.9698627953618723, "grad_norm": 0.5122536420822144, "learning_rate": 7.133586072543163e-08, "loss": 0.0599, "step": 54765 }, { "epoch": 0.9698805048989007, "grad_norm": 0.338627427816391, "learning_rate": 7.125207662324051e-08, "loss": 0.0662, "step": 54766 }, { "epoch": 0.9698982144359292, "grad_norm": 0.6226585507392883, "learning_rate": 7.116834163512054e-08, "loss": 0.0401, "step": 54767 }, { "epoch": 0.9699159239729576, "grad_norm": 0.5681803822517395, "learning_rate": 7.108465576134481e-08, "loss": 0.0602, "step": 54768 }, { "epoch": 0.969933633509986, "grad_norm": 0.437052458524704, "learning_rate": 7.100101900219146e-08, "loss": 0.0291, "step": 54769 }, { "epoch": 0.9699513430470144, "grad_norm": 0.5518612265586853, "learning_rate": 7.091743135793527e-08, "loss": 0.0531, "step": 54770 }, { "epoch": 0.9699690525840429, "grad_norm": 0.530547559261322, "learning_rate": 7.0833892828851e-08, "loss": 0.0457, "step": 54771 }, { "epoch": 0.9699867621210713, "grad_norm": 0.643515408039093, "learning_rate": 7.075040341521177e-08, "loss": 0.0619, "step": 54772 }, { "epoch": 0.9700044716580997, "grad_norm": 0.43190550804138184, "learning_rate": 7.06669631172957e-08, "loss": 0.0397, "step": 54773 }, { "epoch": 0.9700221811951281, "grad_norm": 0.4042787253856659, "learning_rate": 7.058357193537423e-08, "loss": 0.0847, "step": 54774 }, { "epoch": 0.9700398907321566, "grad_norm": 0.5148805975914001, "learning_rate": 7.050022986972215e-08, "loss": 0.0559, "step": 54775 }, { "epoch": 0.970057600269185, "grad_norm": 0.9346925020217896, "learning_rate": 7.04169369206159e-08, "loss": 0.0343, "step": 54776 }, { "epoch": 0.9700753098062134, "grad_norm": 0.5813769102096558, "learning_rate": 7.033369308832694e-08, "loss": 0.0581, "step": 54777 }, { "epoch": 0.9700930193432418, "grad_norm": 0.9003203511238098, "learning_rate": 7.025049837313002e-08, "loss": 0.0558, "step": 54778 }, { "epoch": 0.9701107288802703, "grad_norm": 0.45541366934776306, "learning_rate": 7.016735277529995e-08, "loss": 0.0606, "step": 54779 }, { "epoch": 0.9701284384172987, "grad_norm": 0.199165940284729, "learning_rate": 7.008425629510651e-08, "loss": 0.0519, "step": 54780 }, { "epoch": 0.9701461479543271, "grad_norm": 0.303119421005249, "learning_rate": 7.00012089328278e-08, "loss": 0.0288, "step": 54781 }, { "epoch": 0.9701638574913556, "grad_norm": 1.5415090322494507, "learning_rate": 6.991821068873527e-08, "loss": 0.0595, "step": 54782 }, { "epoch": 0.970181567028384, "grad_norm": 0.7353207468986511, "learning_rate": 6.983526156310205e-08, "loss": 0.0557, "step": 54783 }, { "epoch": 0.9701992765654124, "grad_norm": 0.6297281384468079, "learning_rate": 6.975236155619957e-08, "loss": 0.0654, "step": 54784 }, { "epoch": 0.9702169861024408, "grad_norm": 0.5560879707336426, "learning_rate": 6.966951066830263e-08, "loss": 0.0587, "step": 54785 }, { "epoch": 0.9702346956394693, "grad_norm": 0.5322774052619934, "learning_rate": 6.958670889968266e-08, "loss": 0.0263, "step": 54786 }, { "epoch": 0.9702524051764977, "grad_norm": 0.6329523324966431, "learning_rate": 6.950395625061112e-08, "loss": 0.0458, "step": 54787 }, { "epoch": 0.9702701147135261, "grad_norm": 0.5153746008872986, "learning_rate": 6.94212527213628e-08, "loss": 0.07, "step": 54788 }, { "epoch": 0.9702878242505545, "grad_norm": 0.6212686896324158, "learning_rate": 6.933859831220746e-08, "loss": 0.0707, "step": 54789 }, { "epoch": 0.970305533787583, "grad_norm": 0.34222185611724854, "learning_rate": 6.925599302341823e-08, "loss": 0.0554, "step": 54790 }, { "epoch": 0.9703232433246114, "grad_norm": 0.6593627333641052, "learning_rate": 6.917343685526656e-08, "loss": 0.0519, "step": 54791 }, { "epoch": 0.9703409528616398, "grad_norm": 0.7731807231903076, "learning_rate": 6.909092980802556e-08, "loss": 0.0728, "step": 54792 }, { "epoch": 0.9703586623986682, "grad_norm": 0.5941442847251892, "learning_rate": 6.900847188196335e-08, "loss": 0.0617, "step": 54793 }, { "epoch": 0.9703763719356967, "grad_norm": 0.5276944041252136, "learning_rate": 6.892606307735472e-08, "loss": 0.0723, "step": 54794 }, { "epoch": 0.9703940814727251, "grad_norm": 0.466738760471344, "learning_rate": 6.884370339446777e-08, "loss": 0.0585, "step": 54795 }, { "epoch": 0.9704117910097535, "grad_norm": 0.5620360374450684, "learning_rate": 6.876139283357563e-08, "loss": 0.0493, "step": 54796 }, { "epoch": 0.970429500546782, "grad_norm": 1.1488773822784424, "learning_rate": 6.867913139494808e-08, "loss": 0.0757, "step": 54797 }, { "epoch": 0.9704472100838104, "grad_norm": 0.4274999797344208, "learning_rate": 6.859691907885657e-08, "loss": 0.0574, "step": 54798 }, { "epoch": 0.9704649196208388, "grad_norm": 0.21870917081832886, "learning_rate": 6.851475588556921e-08, "loss": 0.0376, "step": 54799 }, { "epoch": 0.9704826291578672, "grad_norm": 0.5953943133354187, "learning_rate": 6.843264181535913e-08, "loss": 0.0532, "step": 54800 }, { "epoch": 0.9705003386948957, "grad_norm": 0.3182675838470459, "learning_rate": 6.835057686849611e-08, "loss": 0.057, "step": 54801 }, { "epoch": 0.9705180482319241, "grad_norm": 0.7002854347229004, "learning_rate": 6.826856104524826e-08, "loss": 0.0919, "step": 54802 }, { "epoch": 0.9705357577689525, "grad_norm": 0.49327459931373596, "learning_rate": 6.818659434588703e-08, "loss": 0.0519, "step": 54803 }, { "epoch": 0.9705534673059809, "grad_norm": 0.3783864974975586, "learning_rate": 6.810467677068222e-08, "loss": 0.0798, "step": 54804 }, { "epoch": 0.9705711768430094, "grad_norm": 0.5252849459648132, "learning_rate": 6.802280831990193e-08, "loss": 0.0493, "step": 54805 }, { "epoch": 0.9705888863800378, "grad_norm": 1.2007535696029663, "learning_rate": 6.794098899381762e-08, "loss": 0.0846, "step": 54806 }, { "epoch": 0.9706065959170662, "grad_norm": 0.6199641227722168, "learning_rate": 6.785921879269574e-08, "loss": 0.0543, "step": 54807 }, { "epoch": 0.9706243054540946, "grad_norm": 0.7548437118530273, "learning_rate": 6.777749771680775e-08, "loss": 0.0573, "step": 54808 }, { "epoch": 0.9706420149911231, "grad_norm": 0.6790045499801636, "learning_rate": 6.769582576642008e-08, "loss": 0.0572, "step": 54809 }, { "epoch": 0.9706597245281515, "grad_norm": 0.7210732102394104, "learning_rate": 6.76142029418042e-08, "loss": 0.0479, "step": 54810 }, { "epoch": 0.9706774340651799, "grad_norm": 0.8410405516624451, "learning_rate": 6.753262924322656e-08, "loss": 0.0584, "step": 54811 }, { "epoch": 0.9706951436022084, "grad_norm": 0.7113348841667175, "learning_rate": 6.745110467095695e-08, "loss": 0.0607, "step": 54812 }, { "epoch": 0.9707128531392368, "grad_norm": 0.7530766129493713, "learning_rate": 6.73696292252618e-08, "loss": 0.0511, "step": 54813 }, { "epoch": 0.9707305626762652, "grad_norm": 0.6862155199050903, "learning_rate": 6.728820290641091e-08, "loss": 0.0843, "step": 54814 }, { "epoch": 0.9707482722132936, "grad_norm": 0.5237832069396973, "learning_rate": 6.72068257146724e-08, "loss": 0.0714, "step": 54815 }, { "epoch": 0.9707659817503221, "grad_norm": 0.31551653146743774, "learning_rate": 6.712549765031106e-08, "loss": 0.0422, "step": 54816 }, { "epoch": 0.9707836912873505, "grad_norm": 0.8703738451004028, "learning_rate": 6.704421871359667e-08, "loss": 0.0656, "step": 54817 }, { "epoch": 0.9708014008243789, "grad_norm": 0.6519786715507507, "learning_rate": 6.696298890479735e-08, "loss": 0.0639, "step": 54818 }, { "epoch": 0.9708191103614073, "grad_norm": 0.9199588298797607, "learning_rate": 6.688180822417955e-08, "loss": 0.0612, "step": 54819 }, { "epoch": 0.9708368198984358, "grad_norm": 0.5942396521568298, "learning_rate": 6.680067667200973e-08, "loss": 0.0548, "step": 54820 }, { "epoch": 0.9708545294354642, "grad_norm": 0.8158605694770813, "learning_rate": 6.671959424855601e-08, "loss": 0.0811, "step": 54821 }, { "epoch": 0.9708722389724926, "grad_norm": 0.3648143410682678, "learning_rate": 6.663856095408482e-08, "loss": 0.0387, "step": 54822 }, { "epoch": 0.970889948509521, "grad_norm": 0.4000729024410248, "learning_rate": 6.655757678886099e-08, "loss": 0.0285, "step": 54823 }, { "epoch": 0.9709076580465495, "grad_norm": 0.4760880470275879, "learning_rate": 6.64766417531526e-08, "loss": 0.0478, "step": 54824 }, { "epoch": 0.970925367583578, "grad_norm": 0.2770320475101471, "learning_rate": 6.639575584722613e-08, "loss": 0.0438, "step": 54825 }, { "epoch": 0.9709430771206063, "grad_norm": 0.5565032362937927, "learning_rate": 6.631491907134635e-08, "loss": 0.0443, "step": 54826 }, { "epoch": 0.9709607866576349, "grad_norm": 0.6826890110969543, "learning_rate": 6.623413142577971e-08, "loss": 0.0596, "step": 54827 }, { "epoch": 0.9709784961946633, "grad_norm": 0.49097004532814026, "learning_rate": 6.615339291079436e-08, "loss": 0.042, "step": 54828 }, { "epoch": 0.9709962057316917, "grad_norm": 0.39351773262023926, "learning_rate": 6.607270352665173e-08, "loss": 0.0446, "step": 54829 }, { "epoch": 0.97101391526872, "grad_norm": 0.5386072993278503, "learning_rate": 6.599206327361995e-08, "loss": 0.042, "step": 54830 }, { "epoch": 0.9710316248057486, "grad_norm": 0.462879478931427, "learning_rate": 6.591147215196547e-08, "loss": 0.0659, "step": 54831 }, { "epoch": 0.971049334342777, "grad_norm": 0.47546422481536865, "learning_rate": 6.583093016194975e-08, "loss": 0.0333, "step": 54832 }, { "epoch": 0.9710670438798054, "grad_norm": 0.31986069679260254, "learning_rate": 6.57504373038409e-08, "loss": 0.0566, "step": 54833 }, { "epoch": 0.9710847534168338, "grad_norm": 0.5531526207923889, "learning_rate": 6.566999357790204e-08, "loss": 0.0423, "step": 54834 }, { "epoch": 0.9711024629538623, "grad_norm": 0.8245074152946472, "learning_rate": 6.558959898439798e-08, "loss": 0.0585, "step": 54835 }, { "epoch": 0.9711201724908907, "grad_norm": 0.824070394039154, "learning_rate": 6.550925352359516e-08, "loss": 0.0672, "step": 54836 }, { "epoch": 0.9711378820279191, "grad_norm": 0.7884644865989685, "learning_rate": 6.542895719575503e-08, "loss": 0.08, "step": 54837 }, { "epoch": 0.9711555915649475, "grad_norm": 0.7820003628730774, "learning_rate": 6.53487100011424e-08, "loss": 0.0344, "step": 54838 }, { "epoch": 0.971173301101976, "grad_norm": 0.4342394173145294, "learning_rate": 6.52685119400237e-08, "loss": 0.0604, "step": 54839 }, { "epoch": 0.9711910106390044, "grad_norm": 0.5359641909599304, "learning_rate": 6.51883630126604e-08, "loss": 0.0514, "step": 54840 }, { "epoch": 0.9712087201760328, "grad_norm": 0.5210751891136169, "learning_rate": 6.510826321931563e-08, "loss": 0.0421, "step": 54841 }, { "epoch": 0.9712264297130613, "grad_norm": 0.45559513568878174, "learning_rate": 6.502821256025581e-08, "loss": 0.0445, "step": 54842 }, { "epoch": 0.9712441392500897, "grad_norm": 0.7619799375534058, "learning_rate": 6.494821103574244e-08, "loss": 0.0724, "step": 54843 }, { "epoch": 0.9712618487871181, "grad_norm": 0.6159107089042664, "learning_rate": 6.486825864603862e-08, "loss": 0.028, "step": 54844 }, { "epoch": 0.9712795583241465, "grad_norm": 0.5404706597328186, "learning_rate": 6.478835539140748e-08, "loss": 0.058, "step": 54845 }, { "epoch": 0.971297267861175, "grad_norm": 0.518794596195221, "learning_rate": 6.470850127211214e-08, "loss": 0.0465, "step": 54846 }, { "epoch": 0.9713149773982034, "grad_norm": 0.44980213046073914, "learning_rate": 6.462869628841406e-08, "loss": 0.0414, "step": 54847 }, { "epoch": 0.9713326869352318, "grad_norm": 0.8100374937057495, "learning_rate": 6.454894044057802e-08, "loss": 0.0685, "step": 54848 }, { "epoch": 0.9713503964722602, "grad_norm": 0.6298811435699463, "learning_rate": 6.446923372886549e-08, "loss": 0.0479, "step": 54849 }, { "epoch": 0.9713681060092887, "grad_norm": 0.6232229471206665, "learning_rate": 6.438957615353791e-08, "loss": 0.0501, "step": 54850 }, { "epoch": 0.9713858155463171, "grad_norm": 0.6506872177124023, "learning_rate": 6.430996771485842e-08, "loss": 0.0293, "step": 54851 }, { "epoch": 0.9714035250833455, "grad_norm": 0.5242139101028442, "learning_rate": 6.423040841308848e-08, "loss": 0.0259, "step": 54852 }, { "epoch": 0.9714212346203739, "grad_norm": 0.6958763599395752, "learning_rate": 6.415089824849119e-08, "loss": 0.0433, "step": 54853 }, { "epoch": 0.9714389441574024, "grad_norm": 0.7067924737930298, "learning_rate": 6.40714372213247e-08, "loss": 0.0623, "step": 54854 }, { "epoch": 0.9714566536944308, "grad_norm": 0.9909313917160034, "learning_rate": 6.399202533185378e-08, "loss": 0.073, "step": 54855 }, { "epoch": 0.9714743632314592, "grad_norm": 0.4817776381969452, "learning_rate": 6.391266258033657e-08, "loss": 0.059, "step": 54856 }, { "epoch": 0.9714920727684877, "grad_norm": 0.3987243175506592, "learning_rate": 6.383334896703785e-08, "loss": 0.0433, "step": 54857 }, { "epoch": 0.9715097823055161, "grad_norm": 0.6314260363578796, "learning_rate": 6.375408449221576e-08, "loss": 0.0475, "step": 54858 }, { "epoch": 0.9715274918425445, "grad_norm": 0.7218994498252869, "learning_rate": 6.367486915613174e-08, "loss": 0.042, "step": 54859 }, { "epoch": 0.9715452013795729, "grad_norm": 0.43262970447540283, "learning_rate": 6.359570295904893e-08, "loss": 0.0518, "step": 54860 }, { "epoch": 0.9715629109166014, "grad_norm": 0.6300865411758423, "learning_rate": 6.351658590122377e-08, "loss": 0.0578, "step": 54861 }, { "epoch": 0.9715806204536298, "grad_norm": 0.8882845640182495, "learning_rate": 6.343751798291774e-08, "loss": 0.0643, "step": 54862 }, { "epoch": 0.9715983299906582, "grad_norm": 0.5415992736816406, "learning_rate": 6.33584992043923e-08, "loss": 0.0488, "step": 54863 }, { "epoch": 0.9716160395276866, "grad_norm": 0.576029896736145, "learning_rate": 6.327952956590722e-08, "loss": 0.0292, "step": 54864 }, { "epoch": 0.9716337490647151, "grad_norm": 0.6758381724357605, "learning_rate": 6.320060906772062e-08, "loss": 0.0906, "step": 54865 }, { "epoch": 0.9716514586017435, "grad_norm": 0.7794586420059204, "learning_rate": 6.312173771009399e-08, "loss": 0.0747, "step": 54866 }, { "epoch": 0.9716691681387719, "grad_norm": 0.43220964074134827, "learning_rate": 6.304291549328545e-08, "loss": 0.0553, "step": 54867 }, { "epoch": 0.9716868776758003, "grad_norm": 0.769141674041748, "learning_rate": 6.296414241755477e-08, "loss": 0.0408, "step": 54868 }, { "epoch": 0.9717045872128288, "grad_norm": 0.48499003052711487, "learning_rate": 6.288541848316176e-08, "loss": 0.0451, "step": 54869 }, { "epoch": 0.9717222967498572, "grad_norm": 0.658341109752655, "learning_rate": 6.28067436903662e-08, "loss": 0.0614, "step": 54870 }, { "epoch": 0.9717400062868856, "grad_norm": 0.36948439478874207, "learning_rate": 6.272811803942457e-08, "loss": 0.0517, "step": 54871 }, { "epoch": 0.9717577158239141, "grad_norm": 0.6921157240867615, "learning_rate": 6.264954153059499e-08, "loss": 0.0372, "step": 54872 }, { "epoch": 0.9717754253609425, "grad_norm": 0.5795513391494751, "learning_rate": 6.257101416414057e-08, "loss": 0.0555, "step": 54873 }, { "epoch": 0.9717931348979709, "grad_norm": 0.5213643312454224, "learning_rate": 6.249253594031446e-08, "loss": 0.0379, "step": 54874 }, { "epoch": 0.9718108444349993, "grad_norm": 0.8757326006889343, "learning_rate": 6.24141068593781e-08, "loss": 0.0793, "step": 54875 }, { "epoch": 0.9718285539720278, "grad_norm": 0.5676910877227783, "learning_rate": 6.233572692158795e-08, "loss": 0.0581, "step": 54876 }, { "epoch": 0.9718462635090562, "grad_norm": 0.42805349826812744, "learning_rate": 6.225739612720216e-08, "loss": 0.0425, "step": 54877 }, { "epoch": 0.9718639730460846, "grad_norm": 0.45548373460769653, "learning_rate": 6.217911447647884e-08, "loss": 0.0427, "step": 54878 }, { "epoch": 0.971881682583113, "grad_norm": 0.41298702359199524, "learning_rate": 6.210088196967612e-08, "loss": 0.0508, "step": 54879 }, { "epoch": 0.9718993921201415, "grad_norm": 0.6077884435653687, "learning_rate": 6.202269860705045e-08, "loss": 0.0527, "step": 54880 }, { "epoch": 0.9719171016571699, "grad_norm": 0.4653911590576172, "learning_rate": 6.194456438885832e-08, "loss": 0.0402, "step": 54881 }, { "epoch": 0.9719348111941983, "grad_norm": 0.5431618690490723, "learning_rate": 6.18664793153595e-08, "loss": 0.0528, "step": 54882 }, { "epoch": 0.9719525207312267, "grad_norm": 0.6270307302474976, "learning_rate": 6.178844338680711e-08, "loss": 0.0377, "step": 54883 }, { "epoch": 0.9719702302682552, "grad_norm": 0.15886275470256805, "learning_rate": 6.171045660346097e-08, "loss": 0.0612, "step": 54884 }, { "epoch": 0.9719879398052836, "grad_norm": 0.6758516430854797, "learning_rate": 6.163251896557586e-08, "loss": 0.0673, "step": 54885 }, { "epoch": 0.972005649342312, "grad_norm": 0.3378843665122986, "learning_rate": 6.155463047340825e-08, "loss": 0.0531, "step": 54886 }, { "epoch": 0.9720233588793405, "grad_norm": 0.5285148024559021, "learning_rate": 6.147679112721627e-08, "loss": 0.0597, "step": 54887 }, { "epoch": 0.972041068416369, "grad_norm": 0.5336790680885315, "learning_rate": 6.13990009272547e-08, "loss": 0.0628, "step": 54888 }, { "epoch": 0.9720587779533973, "grad_norm": 0.3706553876399994, "learning_rate": 6.132125987377834e-08, "loss": 0.0437, "step": 54889 }, { "epoch": 0.9720764874904257, "grad_norm": 0.13969135284423828, "learning_rate": 6.124356796704533e-08, "loss": 0.0425, "step": 54890 }, { "epoch": 0.9720941970274543, "grad_norm": 0.7471635937690735, "learning_rate": 6.11659252073088e-08, "loss": 0.0689, "step": 54891 }, { "epoch": 0.9721119065644827, "grad_norm": 0.4509957730770111, "learning_rate": 6.108833159482686e-08, "loss": 0.0518, "step": 54892 }, { "epoch": 0.972129616101511, "grad_norm": 0.7949771285057068, "learning_rate": 6.101078712985264e-08, "loss": 0.0478, "step": 54893 }, { "epoch": 0.9721473256385395, "grad_norm": 0.57297682762146, "learning_rate": 6.093329181264095e-08, "loss": 0.0467, "step": 54894 }, { "epoch": 0.972165035175568, "grad_norm": 0.4604955315589905, "learning_rate": 6.085584564344827e-08, "loss": 0.0889, "step": 54895 }, { "epoch": 0.9721827447125964, "grad_norm": 0.4597122073173523, "learning_rate": 6.077844862252935e-08, "loss": 0.0591, "step": 54896 }, { "epoch": 0.9722004542496248, "grad_norm": 0.6541334390640259, "learning_rate": 6.070110075013901e-08, "loss": 0.0625, "step": 54897 }, { "epoch": 0.9722181637866532, "grad_norm": 0.4285624921321869, "learning_rate": 6.062380202653039e-08, "loss": 0.0611, "step": 54898 }, { "epoch": 0.9722358733236817, "grad_norm": 0.35681506991386414, "learning_rate": 6.054655245195829e-08, "loss": 0.0389, "step": 54899 }, { "epoch": 0.9722535828607101, "grad_norm": 0.38231825828552246, "learning_rate": 6.046935202667581e-08, "loss": 0.0469, "step": 54900 }, { "epoch": 0.9722712923977385, "grad_norm": 0.6631875038146973, "learning_rate": 6.039220075093943e-08, "loss": 0.0407, "step": 54901 }, { "epoch": 0.972289001934767, "grad_norm": 0.5289257168769836, "learning_rate": 6.031509862500228e-08, "loss": 0.0591, "step": 54902 }, { "epoch": 0.9723067114717954, "grad_norm": 0.6568787693977356, "learning_rate": 6.023804564911584e-08, "loss": 0.0773, "step": 54903 }, { "epoch": 0.9723244210088238, "grad_norm": 0.4268381893634796, "learning_rate": 6.016104182353654e-08, "loss": 0.04, "step": 54904 }, { "epoch": 0.9723421305458522, "grad_norm": 0.6687943935394287, "learning_rate": 6.008408714851421e-08, "loss": 0.0562, "step": 54905 }, { "epoch": 0.9723598400828807, "grad_norm": 0.7900252342224121, "learning_rate": 6.000718162430697e-08, "loss": 0.0604, "step": 54906 }, { "epoch": 0.9723775496199091, "grad_norm": 0.619408905506134, "learning_rate": 5.993032525116293e-08, "loss": 0.0582, "step": 54907 }, { "epoch": 0.9723952591569375, "grad_norm": 0.7636301517486572, "learning_rate": 5.985351802933859e-08, "loss": 0.0403, "step": 54908 }, { "epoch": 0.9724129686939659, "grad_norm": 0.4584738612174988, "learning_rate": 5.977675995908371e-08, "loss": 0.0402, "step": 54909 }, { "epoch": 0.9724306782309944, "grad_norm": 0.664621889591217, "learning_rate": 5.970005104065312e-08, "loss": 0.0535, "step": 54910 }, { "epoch": 0.9724483877680228, "grad_norm": 0.3608122766017914, "learning_rate": 5.962339127429827e-08, "loss": 0.0394, "step": 54911 }, { "epoch": 0.9724660973050512, "grad_norm": 0.9778138399124146, "learning_rate": 5.954678066027064e-08, "loss": 0.0426, "step": 54912 }, { "epoch": 0.9724838068420796, "grad_norm": 0.7265605330467224, "learning_rate": 5.947021919882334e-08, "loss": 0.0531, "step": 54913 }, { "epoch": 0.9725015163791081, "grad_norm": 0.7380088567733765, "learning_rate": 5.9393706890207846e-08, "loss": 0.1044, "step": 54914 }, { "epoch": 0.9725192259161365, "grad_norm": 0.4947240352630615, "learning_rate": 5.931724373467562e-08, "loss": 0.0576, "step": 54915 }, { "epoch": 0.9725369354531649, "grad_norm": 0.8647392988204956, "learning_rate": 5.924082973247813e-08, "loss": 0.0431, "step": 54916 }, { "epoch": 0.9725546449901934, "grad_norm": 0.5385186076164246, "learning_rate": 5.9164464883868506e-08, "loss": 0.0549, "step": 54917 }, { "epoch": 0.9725723545272218, "grad_norm": 0.3629927635192871, "learning_rate": 5.908814918909489e-08, "loss": 0.0493, "step": 54918 }, { "epoch": 0.9725900640642502, "grad_norm": 0.29496556520462036, "learning_rate": 5.901188264841206e-08, "loss": 0.0543, "step": 54919 }, { "epoch": 0.9726077736012786, "grad_norm": 0.45200175046920776, "learning_rate": 5.893566526206817e-08, "loss": 0.0331, "step": 54920 }, { "epoch": 0.9726254831383071, "grad_norm": 0.6345718502998352, "learning_rate": 5.885949703031468e-08, "loss": 0.0544, "step": 54921 }, { "epoch": 0.9726431926753355, "grad_norm": 0.917894721031189, "learning_rate": 5.878337795340139e-08, "loss": 0.0555, "step": 54922 }, { "epoch": 0.9726609022123639, "grad_norm": 0.5407027006149292, "learning_rate": 5.8707308031579754e-08, "loss": 0.0494, "step": 54923 }, { "epoch": 0.9726786117493923, "grad_norm": 0.5049844980239868, "learning_rate": 5.863128726509959e-08, "loss": 0.0439, "step": 54924 }, { "epoch": 0.9726963212864208, "grad_norm": 0.30001091957092285, "learning_rate": 5.855531565421235e-08, "loss": 0.0457, "step": 54925 }, { "epoch": 0.9727140308234492, "grad_norm": 0.8820924162864685, "learning_rate": 5.847939319916618e-08, "loss": 0.0474, "step": 54926 }, { "epoch": 0.9727317403604776, "grad_norm": 0.6712769269943237, "learning_rate": 5.8403519900212535e-08, "loss": 0.0584, "step": 54927 }, { "epoch": 0.972749449897506, "grad_norm": 0.47091707587242126, "learning_rate": 5.832769575759955e-08, "loss": 0.07, "step": 54928 }, { "epoch": 0.9727671594345345, "grad_norm": 0.387346476316452, "learning_rate": 5.825192077157704e-08, "loss": 0.0434, "step": 54929 }, { "epoch": 0.9727848689715629, "grad_norm": 0.6699082851409912, "learning_rate": 5.817619494239479e-08, "loss": 0.0468, "step": 54930 }, { "epoch": 0.9728025785085913, "grad_norm": 0.47034987807273865, "learning_rate": 5.810051827030094e-08, "loss": 0.0645, "step": 54931 }, { "epoch": 0.9728202880456198, "grad_norm": 0.6607725024223328, "learning_rate": 5.802489075554529e-08, "loss": 0.0425, "step": 54932 }, { "epoch": 0.9728379975826482, "grad_norm": 0.578097403049469, "learning_rate": 5.794931239837598e-08, "loss": 0.0435, "step": 54933 }, { "epoch": 0.9728557071196766, "grad_norm": 0.42570194602012634, "learning_rate": 5.7873783199044464e-08, "loss": 0.0512, "step": 54934 }, { "epoch": 0.972873416656705, "grad_norm": 0.2745231091976166, "learning_rate": 5.7798303157793887e-08, "loss": 0.0409, "step": 54935 }, { "epoch": 0.9728911261937335, "grad_norm": 0.39819514751434326, "learning_rate": 5.772287227487738e-08, "loss": 0.0416, "step": 54936 }, { "epoch": 0.9729088357307619, "grad_norm": 0.6643266677856445, "learning_rate": 5.7647490550541416e-08, "loss": 0.0774, "step": 54937 }, { "epoch": 0.9729265452677903, "grad_norm": 0.6093443036079407, "learning_rate": 5.7572157985032456e-08, "loss": 0.0587, "step": 54938 }, { "epoch": 0.9729442548048187, "grad_norm": 0.49669700860977173, "learning_rate": 5.749687457860031e-08, "loss": 0.0529, "step": 54939 }, { "epoch": 0.9729619643418472, "grad_norm": 0.5847472548484802, "learning_rate": 5.742164033149311e-08, "loss": 0.0408, "step": 54940 }, { "epoch": 0.9729796738788756, "grad_norm": 0.6438632607460022, "learning_rate": 5.734645524395565e-08, "loss": 0.0757, "step": 54941 }, { "epoch": 0.972997383415904, "grad_norm": 0.7759942412376404, "learning_rate": 5.727131931623775e-08, "loss": 0.0452, "step": 54942 }, { "epoch": 0.9730150929529324, "grad_norm": 0.7442989349365234, "learning_rate": 5.7196232548587525e-08, "loss": 0.059, "step": 54943 }, { "epoch": 0.9730328024899609, "grad_norm": 0.5040945410728455, "learning_rate": 5.7121194941248126e-08, "loss": 0.0532, "step": 54944 }, { "epoch": 0.9730505120269893, "grad_norm": 0.5722774863243103, "learning_rate": 5.7046206494469346e-08, "loss": 0.0398, "step": 54945 }, { "epoch": 0.9730682215640177, "grad_norm": 0.5572210550308228, "learning_rate": 5.697126720849766e-08, "loss": 0.0528, "step": 54946 }, { "epoch": 0.9730859311010462, "grad_norm": 0.3946298360824585, "learning_rate": 5.6896377083577866e-08, "loss": 0.0177, "step": 54947 }, { "epoch": 0.9731036406380746, "grad_norm": 0.3476121425628662, "learning_rate": 5.682153611995977e-08, "loss": 0.0477, "step": 54948 }, { "epoch": 0.973121350175103, "grad_norm": 0.5379712581634521, "learning_rate": 5.6746744317884844e-08, "loss": 0.0508, "step": 54949 }, { "epoch": 0.9731390597121314, "grad_norm": 0.4415334463119507, "learning_rate": 5.667200167760289e-08, "loss": 0.0467, "step": 54950 }, { "epoch": 0.97315676924916, "grad_norm": 0.5760126709938049, "learning_rate": 5.659730819935871e-08, "loss": 0.0447, "step": 54951 }, { "epoch": 0.9731744787861883, "grad_norm": 0.6012268662452698, "learning_rate": 5.652266388339711e-08, "loss": 0.0782, "step": 54952 }, { "epoch": 0.9731921883232167, "grad_norm": 0.5507386326789856, "learning_rate": 5.6448068729966216e-08, "loss": 0.0633, "step": 54953 }, { "epoch": 0.9732098978602451, "grad_norm": 0.664839506149292, "learning_rate": 5.6373522739307514e-08, "loss": 0.0731, "step": 54954 }, { "epoch": 0.9732276073972737, "grad_norm": 0.40828588604927063, "learning_rate": 5.62990259116708e-08, "loss": 0.0564, "step": 54955 }, { "epoch": 0.973245316934302, "grad_norm": 0.5514529943466187, "learning_rate": 5.622457824729754e-08, "loss": 0.0556, "step": 54956 }, { "epoch": 0.9732630264713305, "grad_norm": 0.30393850803375244, "learning_rate": 5.6150179746434216e-08, "loss": 0.059, "step": 54957 }, { "epoch": 0.9732807360083588, "grad_norm": 0.353540301322937, "learning_rate": 5.607583040932396e-08, "loss": 0.0237, "step": 54958 }, { "epoch": 0.9732984455453874, "grad_norm": 0.9103314280509949, "learning_rate": 5.600153023621491e-08, "loss": 0.0787, "step": 54959 }, { "epoch": 0.9733161550824158, "grad_norm": 0.6544302701950073, "learning_rate": 5.592727922734853e-08, "loss": 0.0457, "step": 54960 }, { "epoch": 0.9733338646194442, "grad_norm": 0.489734023809433, "learning_rate": 5.5853077382969634e-08, "loss": 0.0377, "step": 54961 }, { "epoch": 0.9733515741564727, "grad_norm": 1.000946044921875, "learning_rate": 5.5778924703321355e-08, "loss": 0.0497, "step": 54962 }, { "epoch": 0.9733692836935011, "grad_norm": 0.5802645087242126, "learning_rate": 5.5704821188651834e-08, "loss": 0.0794, "step": 54963 }, { "epoch": 0.9733869932305295, "grad_norm": 0.6114477515220642, "learning_rate": 5.563076683919921e-08, "loss": 0.0607, "step": 54964 }, { "epoch": 0.9734047027675579, "grad_norm": 0.44980093836784363, "learning_rate": 5.555676165521162e-08, "loss": 0.0396, "step": 54965 }, { "epoch": 0.9734224123045864, "grad_norm": 0.21964488923549652, "learning_rate": 5.5482805636930536e-08, "loss": 0.0376, "step": 54966 }, { "epoch": 0.9734401218416148, "grad_norm": 0.6181844472885132, "learning_rate": 5.540889878459909e-08, "loss": 0.0429, "step": 54967 }, { "epoch": 0.9734578313786432, "grad_norm": 1.0648823976516724, "learning_rate": 5.5335041098460435e-08, "loss": 0.0776, "step": 54968 }, { "epoch": 0.9734755409156716, "grad_norm": 0.6292725205421448, "learning_rate": 5.52612325787577e-08, "loss": 0.0484, "step": 54969 }, { "epoch": 0.9734932504527001, "grad_norm": 0.4442940950393677, "learning_rate": 5.5187473225734034e-08, "loss": 0.0316, "step": 54970 }, { "epoch": 0.9735109599897285, "grad_norm": 0.507453978061676, "learning_rate": 5.511376303963256e-08, "loss": 0.0396, "step": 54971 }, { "epoch": 0.9735286695267569, "grad_norm": 0.6586418747901917, "learning_rate": 5.5040102020696424e-08, "loss": 0.0716, "step": 54972 }, { "epoch": 0.9735463790637853, "grad_norm": 0.38319364190101624, "learning_rate": 5.496649016916544e-08, "loss": 0.0366, "step": 54973 }, { "epoch": 0.9735640886008138, "grad_norm": 0.7066130042076111, "learning_rate": 5.489292748528274e-08, "loss": 0.0565, "step": 54974 }, { "epoch": 0.9735817981378422, "grad_norm": 0.7617493271827698, "learning_rate": 5.4819413969291465e-08, "loss": 0.0554, "step": 54975 }, { "epoch": 0.9735995076748706, "grad_norm": 0.563616931438446, "learning_rate": 5.4745949621431426e-08, "loss": 0.0405, "step": 54976 }, { "epoch": 0.9736172172118991, "grad_norm": 0.6831980347633362, "learning_rate": 5.467253444194742e-08, "loss": 0.0717, "step": 54977 }, { "epoch": 0.9736349267489275, "grad_norm": 0.42759382724761963, "learning_rate": 5.45991684310776e-08, "loss": 0.064, "step": 54978 }, { "epoch": 0.9736526362859559, "grad_norm": 0.695731520652771, "learning_rate": 5.452585158906509e-08, "loss": 0.0506, "step": 54979 }, { "epoch": 0.9736703458229843, "grad_norm": 0.28664377331733704, "learning_rate": 5.445258391615138e-08, "loss": 0.0642, "step": 54980 }, { "epoch": 0.9736880553600128, "grad_norm": 0.6670951247215271, "learning_rate": 5.437936541257793e-08, "loss": 0.0622, "step": 54981 }, { "epoch": 0.9737057648970412, "grad_norm": 0.5971547961235046, "learning_rate": 5.430619607858289e-08, "loss": 0.0337, "step": 54982 }, { "epoch": 0.9737234744340696, "grad_norm": 0.6702582240104675, "learning_rate": 5.42330759144094e-08, "loss": 0.0451, "step": 54983 }, { "epoch": 0.973741183971098, "grad_norm": 1.1779485940933228, "learning_rate": 5.416000492029893e-08, "loss": 0.049, "step": 54984 }, { "epoch": 0.9737588935081265, "grad_norm": 0.6837769746780396, "learning_rate": 5.408698309648796e-08, "loss": 0.0424, "step": 54985 }, { "epoch": 0.9737766030451549, "grad_norm": 0.41011083126068115, "learning_rate": 5.40140104432213e-08, "loss": 0.0453, "step": 54986 }, { "epoch": 0.9737943125821833, "grad_norm": 0.7667937278747559, "learning_rate": 5.394108696073541e-08, "loss": 0.0522, "step": 54987 }, { "epoch": 0.9738120221192117, "grad_norm": 0.46336430311203003, "learning_rate": 5.3868212649271775e-08, "loss": 0.0488, "step": 54988 }, { "epoch": 0.9738297316562402, "grad_norm": 0.4773644506931305, "learning_rate": 5.37953875090702e-08, "loss": 0.0566, "step": 54989 }, { "epoch": 0.9738474411932686, "grad_norm": 0.706069827079773, "learning_rate": 5.37226115403705e-08, "loss": 0.0532, "step": 54990 }, { "epoch": 0.973865150730297, "grad_norm": 0.5935582518577576, "learning_rate": 5.364988474341082e-08, "loss": 0.0385, "step": 54991 }, { "epoch": 0.9738828602673255, "grad_norm": 0.4075019955635071, "learning_rate": 5.357720711843262e-08, "loss": 0.06, "step": 54992 }, { "epoch": 0.9739005698043539, "grad_norm": 0.5124244093894958, "learning_rate": 5.350457866567238e-08, "loss": 0.0559, "step": 54993 }, { "epoch": 0.9739182793413823, "grad_norm": 0.615764856338501, "learning_rate": 5.343199938536991e-08, "loss": 0.0625, "step": 54994 }, { "epoch": 0.9739359888784107, "grad_norm": 0.6987655162811279, "learning_rate": 5.335946927776669e-08, "loss": 0.0312, "step": 54995 }, { "epoch": 0.9739536984154392, "grad_norm": 1.2725880146026611, "learning_rate": 5.328698834309753e-08, "loss": 0.1015, "step": 54996 }, { "epoch": 0.9739714079524676, "grad_norm": 0.42240777611732483, "learning_rate": 5.321455658160224e-08, "loss": 0.0573, "step": 54997 }, { "epoch": 0.973989117489496, "grad_norm": 0.6777465343475342, "learning_rate": 5.314217399351895e-08, "loss": 0.0701, "step": 54998 }, { "epoch": 0.9740068270265244, "grad_norm": 0.5012499094009399, "learning_rate": 5.306984057908748e-08, "loss": 0.0282, "step": 54999 }, { "epoch": 0.9740245365635529, "grad_norm": 0.41709965467453003, "learning_rate": 5.299755633854264e-08, "loss": 0.0388, "step": 55000 }, { "epoch": 0.9740422461005813, "grad_norm": 1.1192065477371216, "learning_rate": 5.29253212721259e-08, "loss": 0.0834, "step": 55001 }, { "epoch": 0.9740599556376097, "grad_norm": 0.5953419804573059, "learning_rate": 5.2853135380072084e-08, "loss": 0.0682, "step": 55002 }, { "epoch": 0.9740776651746381, "grad_norm": 0.7482724189758301, "learning_rate": 5.278099866261932e-08, "loss": 0.0547, "step": 55003 }, { "epoch": 0.9740953747116666, "grad_norm": 0.4973399341106415, "learning_rate": 5.2708911120005756e-08, "loss": 0.062, "step": 55004 }, { "epoch": 0.974113084248695, "grad_norm": 0.8250840902328491, "learning_rate": 5.263687275246787e-08, "loss": 0.0776, "step": 55005 }, { "epoch": 0.9741307937857234, "grad_norm": 0.7586232423782349, "learning_rate": 5.2564883560243804e-08, "loss": 0.0376, "step": 55006 }, { "epoch": 0.9741485033227519, "grad_norm": 0.5906006693840027, "learning_rate": 5.249294354356837e-08, "loss": 0.0294, "step": 55007 }, { "epoch": 0.9741662128597803, "grad_norm": 0.5410860180854797, "learning_rate": 5.242105270267972e-08, "loss": 0.0423, "step": 55008 }, { "epoch": 0.9741839223968087, "grad_norm": 0.5951564908027649, "learning_rate": 5.234921103781265e-08, "loss": 0.0665, "step": 55009 }, { "epoch": 0.9742016319338371, "grad_norm": 0.7408517003059387, "learning_rate": 5.227741854920698e-08, "loss": 0.0581, "step": 55010 }, { "epoch": 0.9742193414708656, "grad_norm": 0.43415969610214233, "learning_rate": 5.2205675237095854e-08, "loss": 0.0233, "step": 55011 }, { "epoch": 0.974237051007894, "grad_norm": 0.8538000583648682, "learning_rate": 5.2133981101715746e-08, "loss": 0.0714, "step": 55012 }, { "epoch": 0.9742547605449224, "grad_norm": 0.43314793705940247, "learning_rate": 5.206233614330313e-08, "loss": 0.0607, "step": 55013 }, { "epoch": 0.9742724700819508, "grad_norm": 0.8075572848320007, "learning_rate": 5.1990740362092836e-08, "loss": 0.0536, "step": 55014 }, { "epoch": 0.9742901796189793, "grad_norm": 0.5836537480354309, "learning_rate": 5.191919375832299e-08, "loss": 0.034, "step": 55015 }, { "epoch": 0.9743078891560077, "grad_norm": 0.7546282410621643, "learning_rate": 5.184769633222675e-08, "loss": 0.0849, "step": 55016 }, { "epoch": 0.9743255986930361, "grad_norm": 0.4732609689235687, "learning_rate": 5.1776248084038916e-08, "loss": 0.0468, "step": 55017 }, { "epoch": 0.9743433082300645, "grad_norm": 0.5179817080497742, "learning_rate": 5.17048490139943e-08, "loss": 0.0437, "step": 55018 }, { "epoch": 0.974361017767093, "grad_norm": 0.48296836018562317, "learning_rate": 5.16334991223294e-08, "loss": 0.0547, "step": 55019 }, { "epoch": 0.9743787273041215, "grad_norm": 0.49118733406066895, "learning_rate": 5.156219840927901e-08, "loss": 0.0424, "step": 55020 }, { "epoch": 0.9743964368411498, "grad_norm": 0.33154380321502686, "learning_rate": 5.149094687507627e-08, "loss": 0.0576, "step": 55021 }, { "epoch": 0.9744141463781784, "grad_norm": 0.37785348296165466, "learning_rate": 5.141974451995768e-08, "loss": 0.0501, "step": 55022 }, { "epoch": 0.9744318559152068, "grad_norm": 0.7053034901618958, "learning_rate": 5.1348591344154705e-08, "loss": 0.057, "step": 55023 }, { "epoch": 0.9744495654522352, "grad_norm": 0.46249449253082275, "learning_rate": 5.127748734790383e-08, "loss": 0.0518, "step": 55024 }, { "epoch": 0.9744672749892636, "grad_norm": 0.5798645615577698, "learning_rate": 5.12064325314382e-08, "loss": 0.0578, "step": 55025 }, { "epoch": 0.9744849845262921, "grad_norm": 0.7453286647796631, "learning_rate": 5.113542689499096e-08, "loss": 0.0364, "step": 55026 }, { "epoch": 0.9745026940633205, "grad_norm": 0.7855237722396851, "learning_rate": 5.106447043879692e-08, "loss": 0.0728, "step": 55027 }, { "epoch": 0.9745204036003489, "grad_norm": 0.46453192830085754, "learning_rate": 5.099356316308756e-08, "loss": 0.0433, "step": 55028 }, { "epoch": 0.9745381131373773, "grad_norm": 0.47262808680534363, "learning_rate": 5.092270506809771e-08, "loss": 0.0506, "step": 55029 }, { "epoch": 0.9745558226744058, "grad_norm": 0.7543955445289612, "learning_rate": 5.0851896154060494e-08, "loss": 0.0604, "step": 55030 }, { "epoch": 0.9745735322114342, "grad_norm": 0.8090897798538208, "learning_rate": 5.078113642120741e-08, "loss": 0.0561, "step": 55031 }, { "epoch": 0.9745912417484626, "grad_norm": 0.41044139862060547, "learning_rate": 5.0710425869773256e-08, "loss": 0.0514, "step": 55032 }, { "epoch": 0.974608951285491, "grad_norm": 0.7133612632751465, "learning_rate": 5.0639764499991194e-08, "loss": 0.0486, "step": 55033 }, { "epoch": 0.9746266608225195, "grad_norm": 0.6037382483482361, "learning_rate": 5.0569152312091026e-08, "loss": 0.0578, "step": 55034 }, { "epoch": 0.9746443703595479, "grad_norm": 0.7979623675346375, "learning_rate": 5.0498589306305906e-08, "loss": 0.0499, "step": 55035 }, { "epoch": 0.9746620798965763, "grad_norm": 0.5717501044273376, "learning_rate": 5.0428075482868986e-08, "loss": 0.0428, "step": 55036 }, { "epoch": 0.9746797894336048, "grad_norm": 0.9641760587692261, "learning_rate": 5.035761084201007e-08, "loss": 0.0781, "step": 55037 }, { "epoch": 0.9746974989706332, "grad_norm": 1.009135365486145, "learning_rate": 5.028719538396398e-08, "loss": 0.0738, "step": 55038 }, { "epoch": 0.9747152085076616, "grad_norm": 0.38374996185302734, "learning_rate": 5.0216829108960526e-08, "loss": 0.0375, "step": 55039 }, { "epoch": 0.97473291804469, "grad_norm": 0.4112775921821594, "learning_rate": 5.014651201723286e-08, "loss": 0.0397, "step": 55040 }, { "epoch": 0.9747506275817185, "grad_norm": 0.8526771068572998, "learning_rate": 5.0076244109010794e-08, "loss": 0.0518, "step": 55041 }, { "epoch": 0.9747683371187469, "grad_norm": 0.5722682476043701, "learning_rate": 5.000602538452415e-08, "loss": 0.0455, "step": 55042 }, { "epoch": 0.9747860466557753, "grad_norm": 0.6818804740905762, "learning_rate": 4.993585584400606e-08, "loss": 0.0498, "step": 55043 }, { "epoch": 0.9748037561928037, "grad_norm": 0.5650399327278137, "learning_rate": 4.9865735487688023e-08, "loss": 0.0479, "step": 55044 }, { "epoch": 0.9748214657298322, "grad_norm": 0.6652479767799377, "learning_rate": 4.9795664315798185e-08, "loss": 0.0365, "step": 55045 }, { "epoch": 0.9748391752668606, "grad_norm": 0.8534939289093018, "learning_rate": 4.972564232856969e-08, "loss": 0.0563, "step": 55046 }, { "epoch": 0.974856884803889, "grad_norm": 0.7118238210678101, "learning_rate": 4.965566952623068e-08, "loss": 0.0474, "step": 55047 }, { "epoch": 0.9748745943409174, "grad_norm": 0.6234242916107178, "learning_rate": 4.9585745909014325e-08, "loss": 0.0556, "step": 55048 }, { "epoch": 0.9748923038779459, "grad_norm": 0.624275803565979, "learning_rate": 4.951587147714709e-08, "loss": 0.0716, "step": 55049 }, { "epoch": 0.9749100134149743, "grad_norm": 0.764096200466156, "learning_rate": 4.9446046230860464e-08, "loss": 0.0362, "step": 55050 }, { "epoch": 0.9749277229520027, "grad_norm": 0.4754524528980255, "learning_rate": 4.937627017038426e-08, "loss": 0.0503, "step": 55051 }, { "epoch": 0.9749454324890312, "grad_norm": 0.555842936038971, "learning_rate": 4.9306543295948305e-08, "loss": 0.0612, "step": 55052 }, { "epoch": 0.9749631420260596, "grad_norm": 0.36449092626571655, "learning_rate": 4.9236865607780735e-08, "loss": 0.0467, "step": 55053 }, { "epoch": 0.974980851563088, "grad_norm": 0.5592160820960999, "learning_rate": 4.916723710611304e-08, "loss": 0.09, "step": 55054 }, { "epoch": 0.9749985611001164, "grad_norm": 0.32988983392715454, "learning_rate": 4.909765779117337e-08, "loss": 0.0366, "step": 55055 }, { "epoch": 0.9750162706371449, "grad_norm": 0.7736194729804993, "learning_rate": 4.90281276631882e-08, "loss": 0.0738, "step": 55056 }, { "epoch": 0.9750339801741733, "grad_norm": 0.8153558373451233, "learning_rate": 4.895864672238903e-08, "loss": 0.0551, "step": 55057 }, { "epoch": 0.9750516897112017, "grad_norm": 0.5204774737358093, "learning_rate": 4.8889214969004e-08, "loss": 0.029, "step": 55058 }, { "epoch": 0.9750693992482301, "grad_norm": 0.32840776443481445, "learning_rate": 4.8819832403261265e-08, "loss": 0.0456, "step": 55059 }, { "epoch": 0.9750871087852586, "grad_norm": 0.420938640832901, "learning_rate": 4.875049902538897e-08, "loss": 0.0463, "step": 55060 }, { "epoch": 0.975104818322287, "grad_norm": 0.3965080976486206, "learning_rate": 4.8681214835615274e-08, "loss": 0.0494, "step": 55061 }, { "epoch": 0.9751225278593154, "grad_norm": 0.3932340741157532, "learning_rate": 4.861197983416832e-08, "loss": 0.0486, "step": 55062 }, { "epoch": 0.9751402373963438, "grad_norm": 0.6366393566131592, "learning_rate": 4.854279402127626e-08, "loss": 0.0378, "step": 55063 }, { "epoch": 0.9751579469333723, "grad_norm": 0.857040286064148, "learning_rate": 4.8473657397165584e-08, "loss": 0.0668, "step": 55064 }, { "epoch": 0.9751756564704007, "grad_norm": 0.6334042549133301, "learning_rate": 4.8404569962064436e-08, "loss": 0.0554, "step": 55065 }, { "epoch": 0.9751933660074291, "grad_norm": 0.20312288403511047, "learning_rate": 4.833553171619931e-08, "loss": 0.0394, "step": 55066 }, { "epoch": 0.9752110755444576, "grad_norm": 0.6818885207176208, "learning_rate": 4.826654265979835e-08, "loss": 0.0672, "step": 55067 }, { "epoch": 0.975228785081486, "grad_norm": 0.5879457592964172, "learning_rate": 4.8197602793088044e-08, "loss": 0.031, "step": 55068 }, { "epoch": 0.9752464946185144, "grad_norm": 0.39336344599723816, "learning_rate": 4.8128712116296546e-08, "loss": 0.0389, "step": 55069 }, { "epoch": 0.9752642041555428, "grad_norm": 0.669883668422699, "learning_rate": 4.805987062964867e-08, "loss": 0.0491, "step": 55070 }, { "epoch": 0.9752819136925713, "grad_norm": 0.9801656603813171, "learning_rate": 4.799107833337091e-08, "loss": 0.0784, "step": 55071 }, { "epoch": 0.9752996232295997, "grad_norm": 0.737183153629303, "learning_rate": 4.792233522768974e-08, "loss": 0.0617, "step": 55072 }, { "epoch": 0.9753173327666281, "grad_norm": 0.5198779106140137, "learning_rate": 4.7853641312833316e-08, "loss": 0.0551, "step": 55073 }, { "epoch": 0.9753350423036565, "grad_norm": 0.43893858790397644, "learning_rate": 4.77849965890248e-08, "loss": 0.0405, "step": 55074 }, { "epoch": 0.975352751840685, "grad_norm": 0.7121006846427917, "learning_rate": 4.7716401056492334e-08, "loss": 0.0461, "step": 55075 }, { "epoch": 0.9753704613777134, "grad_norm": 0.5189580917358398, "learning_rate": 4.7647854715460736e-08, "loss": 0.0771, "step": 55076 }, { "epoch": 0.9753881709147418, "grad_norm": 0.44716230034828186, "learning_rate": 4.757935756615317e-08, "loss": 0.0569, "step": 55077 }, { "epoch": 0.9754058804517702, "grad_norm": 0.7867108583450317, "learning_rate": 4.751090960879945e-08, "loss": 0.0893, "step": 55078 }, { "epoch": 0.9754235899887987, "grad_norm": 0.6219741702079773, "learning_rate": 4.744251084362106e-08, "loss": 0.0531, "step": 55079 }, { "epoch": 0.9754412995258271, "grad_norm": 0.5150837302207947, "learning_rate": 4.737416127084615e-08, "loss": 0.0439, "step": 55080 }, { "epoch": 0.9754590090628555, "grad_norm": 0.6312962770462036, "learning_rate": 4.730586089069622e-08, "loss": 0.0542, "step": 55081 }, { "epoch": 0.975476718599884, "grad_norm": 0.4756198525428772, "learning_rate": 4.723760970339941e-08, "loss": 0.0356, "step": 55082 }, { "epoch": 0.9754944281369125, "grad_norm": 0.5055896043777466, "learning_rate": 4.7169407709177215e-08, "loss": 0.0717, "step": 55083 }, { "epoch": 0.9755121376739408, "grad_norm": 0.5031940937042236, "learning_rate": 4.710125490825445e-08, "loss": 0.0289, "step": 55084 }, { "epoch": 0.9755298472109692, "grad_norm": 0.6364059448242188, "learning_rate": 4.703315130085761e-08, "loss": 0.0295, "step": 55085 }, { "epoch": 0.9755475567479978, "grad_norm": 0.8143738508224487, "learning_rate": 4.6965096887209846e-08, "loss": 0.0594, "step": 55086 }, { "epoch": 0.9755652662850262, "grad_norm": 0.6688119769096375, "learning_rate": 4.689709166753264e-08, "loss": 0.0463, "step": 55087 }, { "epoch": 0.9755829758220546, "grad_norm": 0.9525862336158752, "learning_rate": 4.682913564205249e-08, "loss": 0.0508, "step": 55088 }, { "epoch": 0.975600685359083, "grad_norm": 0.3529917001724243, "learning_rate": 4.676122881099254e-08, "loss": 0.061, "step": 55089 }, { "epoch": 0.9756183948961115, "grad_norm": 0.5310977697372437, "learning_rate": 4.669337117457429e-08, "loss": 0.0627, "step": 55090 }, { "epoch": 0.9756361044331399, "grad_norm": 0.7600628733634949, "learning_rate": 4.662556273302421e-08, "loss": 0.0608, "step": 55091 }, { "epoch": 0.9756538139701683, "grad_norm": 0.13592754304409027, "learning_rate": 4.6557803486562134e-08, "loss": 0.0515, "step": 55092 }, { "epoch": 0.9756715235071967, "grad_norm": 0.4816470146179199, "learning_rate": 4.649009343541288e-08, "loss": 0.0715, "step": 55093 }, { "epoch": 0.9756892330442252, "grad_norm": 0.3804153800010681, "learning_rate": 4.6422432579799613e-08, "loss": 0.0689, "step": 55094 }, { "epoch": 0.9757069425812536, "grad_norm": 0.6732572913169861, "learning_rate": 4.6354820919942145e-08, "loss": 0.0572, "step": 55095 }, { "epoch": 0.975724652118282, "grad_norm": 0.49717989563941956, "learning_rate": 4.628725845606696e-08, "loss": 0.0574, "step": 55096 }, { "epoch": 0.9757423616553105, "grad_norm": 0.8289556503295898, "learning_rate": 4.6219745188392225e-08, "loss": 0.0826, "step": 55097 }, { "epoch": 0.9757600711923389, "grad_norm": 0.960503339767456, "learning_rate": 4.615228111714442e-08, "loss": 0.0793, "step": 55098 }, { "epoch": 0.9757777807293673, "grad_norm": 0.3653044104576111, "learning_rate": 4.608486624254171e-08, "loss": 0.0351, "step": 55099 }, { "epoch": 0.9757954902663957, "grad_norm": 0.7860211133956909, "learning_rate": 4.601750056480725e-08, "loss": 0.0494, "step": 55100 }, { "epoch": 0.9758131998034242, "grad_norm": 0.5488837957382202, "learning_rate": 4.5950184084162514e-08, "loss": 0.0547, "step": 55101 }, { "epoch": 0.9758309093404526, "grad_norm": 0.37059393525123596, "learning_rate": 4.588291680082901e-08, "loss": 0.045, "step": 55102 }, { "epoch": 0.975848618877481, "grad_norm": 0.4853089451789856, "learning_rate": 4.5815698715029885e-08, "loss": 0.0357, "step": 55103 }, { "epoch": 0.9758663284145094, "grad_norm": 0.21985499560832977, "learning_rate": 4.57485298269833e-08, "loss": 0.0506, "step": 55104 }, { "epoch": 0.9758840379515379, "grad_norm": 0.6368761658668518, "learning_rate": 4.568141013691074e-08, "loss": 0.0608, "step": 55105 }, { "epoch": 0.9759017474885663, "grad_norm": 0.7418611645698547, "learning_rate": 4.561433964503536e-08, "loss": 0.0643, "step": 55106 }, { "epoch": 0.9759194570255947, "grad_norm": 0.2904396951198578, "learning_rate": 4.554731835157533e-08, "loss": 0.0244, "step": 55107 }, { "epoch": 0.9759371665626231, "grad_norm": 0.6770220994949341, "learning_rate": 4.548034625675212e-08, "loss": 0.0719, "step": 55108 }, { "epoch": 0.9759548760996516, "grad_norm": 0.644889235496521, "learning_rate": 4.5413423360785574e-08, "loss": 0.0429, "step": 55109 }, { "epoch": 0.97597258563668, "grad_norm": 0.38335567712783813, "learning_rate": 4.534654966389717e-08, "loss": 0.0488, "step": 55110 }, { "epoch": 0.9759902951737084, "grad_norm": 0.4552821218967438, "learning_rate": 4.527972516630674e-08, "loss": 0.059, "step": 55111 }, { "epoch": 0.9760080047107369, "grad_norm": 0.3652592599391937, "learning_rate": 4.521294986823243e-08, "loss": 0.023, "step": 55112 }, { "epoch": 0.9760257142477653, "grad_norm": 0.28294628858566284, "learning_rate": 4.514622376989408e-08, "loss": 0.054, "step": 55113 }, { "epoch": 0.9760434237847937, "grad_norm": 0.35909396409988403, "learning_rate": 4.5079546871513164e-08, "loss": 0.0584, "step": 55114 }, { "epoch": 0.9760611333218221, "grad_norm": 0.4737277626991272, "learning_rate": 4.5012919173307856e-08, "loss": 0.0619, "step": 55115 }, { "epoch": 0.9760788428588506, "grad_norm": 0.668217122554779, "learning_rate": 4.494634067549796e-08, "loss": 0.0741, "step": 55116 }, { "epoch": 0.976096552395879, "grad_norm": 0.5229017734527588, "learning_rate": 4.4879811378299994e-08, "loss": 0.0546, "step": 55117 }, { "epoch": 0.9761142619329074, "grad_norm": 0.9151774048805237, "learning_rate": 4.4813331281937096e-08, "loss": 0.0452, "step": 55118 }, { "epoch": 0.9761319714699358, "grad_norm": 0.3560175597667694, "learning_rate": 4.4746900386624104e-08, "loss": 0.062, "step": 55119 }, { "epoch": 0.9761496810069643, "grad_norm": 0.1820632815361023, "learning_rate": 4.46805186925825e-08, "loss": 0.0289, "step": 55120 }, { "epoch": 0.9761673905439927, "grad_norm": 0.40459898114204407, "learning_rate": 4.4614186200028774e-08, "loss": 0.0524, "step": 55121 }, { "epoch": 0.9761851000810211, "grad_norm": 0.7351487874984741, "learning_rate": 4.454790290918276e-08, "loss": 0.0482, "step": 55122 }, { "epoch": 0.9762028096180495, "grad_norm": 0.660612165927887, "learning_rate": 4.448166882025928e-08, "loss": 0.0693, "step": 55123 }, { "epoch": 0.976220519155078, "grad_norm": 0.8945354223251343, "learning_rate": 4.441548393347983e-08, "loss": 0.0591, "step": 55124 }, { "epoch": 0.9762382286921064, "grad_norm": 0.48525506258010864, "learning_rate": 4.434934824905923e-08, "loss": 0.0562, "step": 55125 }, { "epoch": 0.9762559382291348, "grad_norm": 0.8759883046150208, "learning_rate": 4.428326176721897e-08, "loss": 0.0653, "step": 55126 }, { "epoch": 0.9762736477661633, "grad_norm": 0.48249995708465576, "learning_rate": 4.421722448817222e-08, "loss": 0.0522, "step": 55127 }, { "epoch": 0.9762913573031917, "grad_norm": 0.559592068195343, "learning_rate": 4.415123641213881e-08, "loss": 0.0443, "step": 55128 }, { "epoch": 0.9763090668402201, "grad_norm": 0.7467687726020813, "learning_rate": 4.408529753933521e-08, "loss": 0.0582, "step": 55129 }, { "epoch": 0.9763267763772485, "grad_norm": 0.5166910290718079, "learning_rate": 4.40194078699796e-08, "loss": 0.0453, "step": 55130 }, { "epoch": 0.976344485914277, "grad_norm": 0.27609002590179443, "learning_rate": 4.3953567404285135e-08, "loss": 0.0573, "step": 55131 }, { "epoch": 0.9763621954513054, "grad_norm": 0.32703712582588196, "learning_rate": 4.3887776142471634e-08, "loss": 0.0349, "step": 55132 }, { "epoch": 0.9763799049883338, "grad_norm": 0.7747929692268372, "learning_rate": 4.3822034084755603e-08, "loss": 0.0554, "step": 55133 }, { "epoch": 0.9763976145253622, "grad_norm": 0.6524978280067444, "learning_rate": 4.375634123135186e-08, "loss": 0.0597, "step": 55134 }, { "epoch": 0.9764153240623907, "grad_norm": 1.000654697418213, "learning_rate": 4.369069758247524e-08, "loss": 0.0677, "step": 55135 }, { "epoch": 0.9764330335994191, "grad_norm": 0.21987281739711761, "learning_rate": 4.362510313834556e-08, "loss": 0.0426, "step": 55136 }, { "epoch": 0.9764507431364475, "grad_norm": 0.48529574275016785, "learning_rate": 4.3559557899175986e-08, "loss": 0.0571, "step": 55137 }, { "epoch": 0.9764684526734759, "grad_norm": 0.5373584628105164, "learning_rate": 4.349406186518301e-08, "loss": 0.0436, "step": 55138 }, { "epoch": 0.9764861622105044, "grad_norm": 0.5135824680328369, "learning_rate": 4.3428615036581465e-08, "loss": 0.0412, "step": 55139 }, { "epoch": 0.9765038717475328, "grad_norm": 0.657799482345581, "learning_rate": 4.336321741358617e-08, "loss": 0.0609, "step": 55140 }, { "epoch": 0.9765215812845612, "grad_norm": 0.7951173782348633, "learning_rate": 4.32978689964153e-08, "loss": 0.0538, "step": 55141 }, { "epoch": 0.9765392908215897, "grad_norm": 0.37846437096595764, "learning_rate": 4.323256978527867e-08, "loss": 0.0378, "step": 55142 }, { "epoch": 0.9765570003586181, "grad_norm": 0.5397406816482544, "learning_rate": 4.316731978039612e-08, "loss": 0.0562, "step": 55143 }, { "epoch": 0.9765747098956465, "grad_norm": 0.9194743633270264, "learning_rate": 4.3102118981980796e-08, "loss": 0.0371, "step": 55144 }, { "epoch": 0.9765924194326749, "grad_norm": 0.5558164715766907, "learning_rate": 4.303696739024421e-08, "loss": 0.0471, "step": 55145 }, { "epoch": 0.9766101289697035, "grad_norm": 0.8808044791221619, "learning_rate": 4.297186500540618e-08, "loss": 0.0338, "step": 55146 }, { "epoch": 0.9766278385067318, "grad_norm": 0.5592027306556702, "learning_rate": 4.290681182767653e-08, "loss": 0.0489, "step": 55147 }, { "epoch": 0.9766455480437602, "grad_norm": 0.35215774178504944, "learning_rate": 4.28418078572701e-08, "loss": 0.0494, "step": 55148 }, { "epoch": 0.9766632575807886, "grad_norm": 0.8289638757705688, "learning_rate": 4.277685309440171e-08, "loss": 0.0508, "step": 55149 }, { "epoch": 0.9766809671178172, "grad_norm": 0.6201543807983398, "learning_rate": 4.2711947539284535e-08, "loss": 0.055, "step": 55150 }, { "epoch": 0.9766986766548456, "grad_norm": 0.5508887767791748, "learning_rate": 4.2647091192133394e-08, "loss": 0.0518, "step": 55151 }, { "epoch": 0.976716386191874, "grad_norm": 0.5585706233978271, "learning_rate": 4.2582284053158115e-08, "loss": 0.0603, "step": 55152 }, { "epoch": 0.9767340957289024, "grad_norm": 0.696215808391571, "learning_rate": 4.25175261225752e-08, "loss": 0.0775, "step": 55153 }, { "epoch": 0.9767518052659309, "grad_norm": 0.5079901218414307, "learning_rate": 4.245281740059781e-08, "loss": 0.0438, "step": 55154 }, { "epoch": 0.9767695148029593, "grad_norm": 0.6036777496337891, "learning_rate": 4.238815788743744e-08, "loss": 0.0532, "step": 55155 }, { "epoch": 0.9767872243399877, "grad_norm": 0.5841967463493347, "learning_rate": 4.2323547583307254e-08, "loss": 0.0532, "step": 55156 }, { "epoch": 0.9768049338770162, "grad_norm": 0.37819811701774597, "learning_rate": 4.225898648841875e-08, "loss": 0.0369, "step": 55157 }, { "epoch": 0.9768226434140446, "grad_norm": 0.6028366088867188, "learning_rate": 4.2194474602986754e-08, "loss": 0.0564, "step": 55158 }, { "epoch": 0.976840352951073, "grad_norm": 0.5577080845832825, "learning_rate": 4.21300119272211e-08, "loss": 0.0649, "step": 55159 }, { "epoch": 0.9768580624881014, "grad_norm": 0.6661328077316284, "learning_rate": 4.206559846133495e-08, "loss": 0.0376, "step": 55160 }, { "epoch": 0.9768757720251299, "grad_norm": 0.5772796869277954, "learning_rate": 4.2001234205539806e-08, "loss": 0.0492, "step": 55161 }, { "epoch": 0.9768934815621583, "grad_norm": 0.5978769659996033, "learning_rate": 4.193691916004716e-08, "loss": 0.0728, "step": 55162 }, { "epoch": 0.9769111910991867, "grad_norm": 0.595684826374054, "learning_rate": 4.1872653325070174e-08, "loss": 0.0759, "step": 55163 }, { "epoch": 0.9769289006362151, "grad_norm": 0.7735510468482971, "learning_rate": 4.180843670081869e-08, "loss": 0.0505, "step": 55164 }, { "epoch": 0.9769466101732436, "grad_norm": 0.7149447798728943, "learning_rate": 4.1744269287505855e-08, "loss": 0.0308, "step": 55165 }, { "epoch": 0.976964319710272, "grad_norm": 0.6647375226020813, "learning_rate": 4.1680151085339845e-08, "loss": 0.0546, "step": 55166 }, { "epoch": 0.9769820292473004, "grad_norm": 1.0235576629638672, "learning_rate": 4.1616082094533827e-08, "loss": 0.0647, "step": 55167 }, { "epoch": 0.9769997387843289, "grad_norm": 0.2736614942550659, "learning_rate": 4.1552062315297626e-08, "loss": 0.0426, "step": 55168 }, { "epoch": 0.9770174483213573, "grad_norm": 0.8472163081169128, "learning_rate": 4.1488091747842735e-08, "loss": 0.0586, "step": 55169 }, { "epoch": 0.9770351578583857, "grad_norm": 0.5056101083755493, "learning_rate": 4.1424170392379e-08, "loss": 0.0465, "step": 55170 }, { "epoch": 0.9770528673954141, "grad_norm": 0.3902592360973358, "learning_rate": 4.1360298249116245e-08, "loss": 0.0399, "step": 55171 }, { "epoch": 0.9770705769324426, "grad_norm": 0.6825157999992371, "learning_rate": 4.12964753182643e-08, "loss": 0.0648, "step": 55172 }, { "epoch": 0.977088286469471, "grad_norm": 0.5214478969573975, "learning_rate": 4.1232701600036337e-08, "loss": 0.0446, "step": 55173 }, { "epoch": 0.9771059960064994, "grad_norm": 0.44734302163124084, "learning_rate": 4.1168977094637184e-08, "loss": 0.0458, "step": 55174 }, { "epoch": 0.9771237055435278, "grad_norm": 0.5476452708244324, "learning_rate": 4.110530180228167e-08, "loss": 0.0822, "step": 55175 }, { "epoch": 0.9771414150805563, "grad_norm": 0.7166728377342224, "learning_rate": 4.104167572317463e-08, "loss": 0.0456, "step": 55176 }, { "epoch": 0.9771591246175847, "grad_norm": 0.9195577502250671, "learning_rate": 4.097809885752924e-08, "loss": 0.0838, "step": 55177 }, { "epoch": 0.9771768341546131, "grad_norm": 0.4559374451637268, "learning_rate": 4.091457120555198e-08, "loss": 0.0506, "step": 55178 }, { "epoch": 0.9771945436916415, "grad_norm": 0.6839544177055359, "learning_rate": 4.085109276745269e-08, "loss": 0.0691, "step": 55179 }, { "epoch": 0.97721225322867, "grad_norm": 0.441654771566391, "learning_rate": 4.0787663543439547e-08, "loss": 0.0246, "step": 55180 }, { "epoch": 0.9772299627656984, "grad_norm": 0.46709227561950684, "learning_rate": 4.072428353372237e-08, "loss": 0.066, "step": 55181 }, { "epoch": 0.9772476723027268, "grad_norm": 0.5934597849845886, "learning_rate": 4.066095273850934e-08, "loss": 0.0377, "step": 55182 }, { "epoch": 0.9772653818397553, "grad_norm": 0.737705647945404, "learning_rate": 4.059767115801027e-08, "loss": 0.0779, "step": 55183 }, { "epoch": 0.9772830913767837, "grad_norm": 0.6694193482398987, "learning_rate": 4.053443879243002e-08, "loss": 0.0824, "step": 55184 }, { "epoch": 0.9773008009138121, "grad_norm": 0.4286736845970154, "learning_rate": 4.04712556419784e-08, "loss": 0.0465, "step": 55185 }, { "epoch": 0.9773185104508405, "grad_norm": 0.5614365935325623, "learning_rate": 4.0408121706865256e-08, "loss": 0.039, "step": 55186 }, { "epoch": 0.977336219987869, "grad_norm": 0.3174816370010376, "learning_rate": 4.034503698729375e-08, "loss": 0.0508, "step": 55187 }, { "epoch": 0.9773539295248974, "grad_norm": 1.0794274806976318, "learning_rate": 4.028200148347538e-08, "loss": 0.05, "step": 55188 }, { "epoch": 0.9773716390619258, "grad_norm": 0.47863471508026123, "learning_rate": 4.021901519561666e-08, "loss": 0.0395, "step": 55189 }, { "epoch": 0.9773893485989542, "grad_norm": 0.4783492982387543, "learning_rate": 4.015607812392241e-08, "loss": 0.0578, "step": 55190 }, { "epoch": 0.9774070581359827, "grad_norm": 0.7323175668716431, "learning_rate": 4.0093190268602474e-08, "loss": 0.0438, "step": 55191 }, { "epoch": 0.9774247676730111, "grad_norm": 0.6783080697059631, "learning_rate": 4.0030351629863346e-08, "loss": 0.0501, "step": 55192 }, { "epoch": 0.9774424772100395, "grad_norm": 0.31812378764152527, "learning_rate": 3.996756220791153e-08, "loss": 0.0463, "step": 55193 }, { "epoch": 0.9774601867470679, "grad_norm": 0.6041973829269409, "learning_rate": 3.9904822002951865e-08, "loss": 0.0474, "step": 55194 }, { "epoch": 0.9774778962840964, "grad_norm": 0.2826117277145386, "learning_rate": 3.984213101519418e-08, "loss": 0.0519, "step": 55195 }, { "epoch": 0.9774956058211248, "grad_norm": 0.5830562710762024, "learning_rate": 3.977948924484165e-08, "loss": 0.0429, "step": 55196 }, { "epoch": 0.9775133153581532, "grad_norm": 0.5915881991386414, "learning_rate": 3.9716896692100765e-08, "loss": 0.0538, "step": 55197 }, { "epoch": 0.9775310248951817, "grad_norm": 0.638374388217926, "learning_rate": 3.965435335717971e-08, "loss": 0.0754, "step": 55198 }, { "epoch": 0.9775487344322101, "grad_norm": 0.4122377038002014, "learning_rate": 3.9591859240281634e-08, "loss": 0.0462, "step": 55199 }, { "epoch": 0.9775664439692385, "grad_norm": 0.799359142780304, "learning_rate": 3.9529414341613055e-08, "loss": 0.0533, "step": 55200 }, { "epoch": 0.9775841535062669, "grad_norm": 0.7292945981025696, "learning_rate": 3.946701866138047e-08, "loss": 0.0815, "step": 55201 }, { "epoch": 0.9776018630432954, "grad_norm": 1.2080305814743042, "learning_rate": 3.940467219978705e-08, "loss": 0.0347, "step": 55202 }, { "epoch": 0.9776195725803238, "grad_norm": 0.6093379259109497, "learning_rate": 3.9342374957039296e-08, "loss": 0.0673, "step": 55203 }, { "epoch": 0.9776372821173522, "grad_norm": 0.2984056770801544, "learning_rate": 3.928012693334204e-08, "loss": 0.0408, "step": 55204 }, { "epoch": 0.9776549916543806, "grad_norm": 0.3367099165916443, "learning_rate": 3.921792812890013e-08, "loss": 0.0358, "step": 55205 }, { "epoch": 0.9776727011914091, "grad_norm": 0.6622586846351624, "learning_rate": 3.915577854391839e-08, "loss": 0.0489, "step": 55206 }, { "epoch": 0.9776904107284375, "grad_norm": 0.332120418548584, "learning_rate": 3.909367817860166e-08, "loss": 0.0385, "step": 55207 }, { "epoch": 0.9777081202654659, "grad_norm": 0.5925386548042297, "learning_rate": 3.9031627033153106e-08, "loss": 0.0606, "step": 55208 }, { "epoch": 0.9777258298024943, "grad_norm": 0.4800269603729248, "learning_rate": 3.896962510777591e-08, "loss": 0.0476, "step": 55209 }, { "epoch": 0.9777435393395228, "grad_norm": 0.6147804260253906, "learning_rate": 3.8907672402676566e-08, "loss": 0.0433, "step": 55210 }, { "epoch": 0.9777612488765512, "grad_norm": 0.2988406717777252, "learning_rate": 3.884576891805658e-08, "loss": 0.0416, "step": 55211 }, { "epoch": 0.9777789584135796, "grad_norm": 0.4868374466896057, "learning_rate": 3.8783914654122455e-08, "loss": 0.0581, "step": 55212 }, { "epoch": 0.9777966679506082, "grad_norm": 0.3956894874572754, "learning_rate": 3.872210961107403e-08, "loss": 0.0371, "step": 55213 }, { "epoch": 0.9778143774876366, "grad_norm": 0.4055383801460266, "learning_rate": 3.866035378911781e-08, "loss": 0.0589, "step": 55214 }, { "epoch": 0.977832087024665, "grad_norm": 0.6399456262588501, "learning_rate": 3.859864718845696e-08, "loss": 0.0705, "step": 55215 }, { "epoch": 0.9778497965616934, "grad_norm": 0.2058061808347702, "learning_rate": 3.8536989809291326e-08, "loss": 0.0305, "step": 55216 }, { "epoch": 0.9778675060987219, "grad_norm": 0.6238610148429871, "learning_rate": 3.84753816518274e-08, "loss": 0.0581, "step": 55217 }, { "epoch": 0.9778852156357503, "grad_norm": 0.359226256608963, "learning_rate": 3.8413822716265035e-08, "loss": 0.0446, "step": 55218 }, { "epoch": 0.9779029251727787, "grad_norm": 0.6967686414718628, "learning_rate": 3.8352313002807394e-08, "loss": 0.0558, "step": 55219 }, { "epoch": 0.9779206347098071, "grad_norm": 0.29462215304374695, "learning_rate": 3.829085251165931e-08, "loss": 0.0345, "step": 55220 }, { "epoch": 0.9779383442468356, "grad_norm": 0.6655312180519104, "learning_rate": 3.822944124301897e-08, "loss": 0.068, "step": 55221 }, { "epoch": 0.977956053783864, "grad_norm": 0.3742806613445282, "learning_rate": 3.8168079197092864e-08, "loss": 0.0511, "step": 55222 }, { "epoch": 0.9779737633208924, "grad_norm": 0.4110771715641022, "learning_rate": 3.8106766374077506e-08, "loss": 0.0486, "step": 55223 }, { "epoch": 0.9779914728579208, "grad_norm": 0.4096860885620117, "learning_rate": 3.8045502774179394e-08, "loss": 0.0421, "step": 55224 }, { "epoch": 0.9780091823949493, "grad_norm": 0.5745574831962585, "learning_rate": 3.7984288397598374e-08, "loss": 0.0378, "step": 55225 }, { "epoch": 0.9780268919319777, "grad_norm": 0.4502899646759033, "learning_rate": 3.792312324453595e-08, "loss": 0.0735, "step": 55226 }, { "epoch": 0.9780446014690061, "grad_norm": 0.5881596207618713, "learning_rate": 3.786200731519196e-08, "loss": 0.0578, "step": 55227 }, { "epoch": 0.9780623110060346, "grad_norm": 0.5735450983047485, "learning_rate": 3.780094060976957e-08, "loss": 0.0602, "step": 55228 }, { "epoch": 0.978080020543063, "grad_norm": 0.6535680890083313, "learning_rate": 3.773992312846863e-08, "loss": 0.0363, "step": 55229 }, { "epoch": 0.9780977300800914, "grad_norm": 0.5229599475860596, "learning_rate": 3.767895487148898e-08, "loss": 0.064, "step": 55230 }, { "epoch": 0.9781154396171198, "grad_norm": 0.5842304825782776, "learning_rate": 3.761803583903212e-08, "loss": 0.0804, "step": 55231 }, { "epoch": 0.9781331491541483, "grad_norm": 0.43423688411712646, "learning_rate": 3.755716603130121e-08, "loss": 0.0471, "step": 55232 }, { "epoch": 0.9781508586911767, "grad_norm": 1.0360771417617798, "learning_rate": 3.749634544849112e-08, "loss": 0.0693, "step": 55233 }, { "epoch": 0.9781685682282051, "grad_norm": 0.4563506245613098, "learning_rate": 3.7435574090806665e-08, "loss": 0.0354, "step": 55234 }, { "epoch": 0.9781862777652335, "grad_norm": 0.45541614294052124, "learning_rate": 3.7374851958444366e-08, "loss": 0.0526, "step": 55235 }, { "epoch": 0.978203987302262, "grad_norm": 0.41649341583251953, "learning_rate": 3.731417905160572e-08, "loss": 0.0451, "step": 55236 }, { "epoch": 0.9782216968392904, "grad_norm": 0.8624700307846069, "learning_rate": 3.7253555370490574e-08, "loss": 0.0691, "step": 55237 }, { "epoch": 0.9782394063763188, "grad_norm": 0.4831531047821045, "learning_rate": 3.71929809152971e-08, "loss": 0.041, "step": 55238 }, { "epoch": 0.9782571159133472, "grad_norm": 0.8482987880706787, "learning_rate": 3.713245568622514e-08, "loss": 0.0403, "step": 55239 }, { "epoch": 0.9782748254503757, "grad_norm": 0.5268115997314453, "learning_rate": 3.707197968347453e-08, "loss": 0.0689, "step": 55240 }, { "epoch": 0.9782925349874041, "grad_norm": 0.8157222270965576, "learning_rate": 3.7011552907243454e-08, "loss": 0.0865, "step": 55241 }, { "epoch": 0.9783102445244325, "grad_norm": 0.4828988313674927, "learning_rate": 3.6951175357731737e-08, "loss": 0.0502, "step": 55242 }, { "epoch": 0.978327954061461, "grad_norm": 0.5663291215896606, "learning_rate": 3.6890847035137564e-08, "loss": 0.0534, "step": 55243 }, { "epoch": 0.9783456635984894, "grad_norm": 0.7054060697555542, "learning_rate": 3.683056793965745e-08, "loss": 0.0555, "step": 55244 }, { "epoch": 0.9783633731355178, "grad_norm": 0.5703775882720947, "learning_rate": 3.6770338071491215e-08, "loss": 0.0583, "step": 55245 }, { "epoch": 0.9783810826725462, "grad_norm": 0.44277918338775635, "learning_rate": 3.671015743083872e-08, "loss": 0.054, "step": 55246 }, { "epoch": 0.9783987922095747, "grad_norm": 0.6547017693519592, "learning_rate": 3.6650026017896465e-08, "loss": 0.0491, "step": 55247 }, { "epoch": 0.9784165017466031, "grad_norm": 0.582689106464386, "learning_rate": 3.658994383286096e-08, "loss": 0.0473, "step": 55248 }, { "epoch": 0.9784342112836315, "grad_norm": 0.7013058066368103, "learning_rate": 3.6529910875930384e-08, "loss": 0.0483, "step": 55249 }, { "epoch": 0.9784519208206599, "grad_norm": 0.3248640298843384, "learning_rate": 3.646992714730457e-08, "loss": 0.0387, "step": 55250 }, { "epoch": 0.9784696303576884, "grad_norm": 0.757009744644165, "learning_rate": 3.640999264717837e-08, "loss": 0.0482, "step": 55251 }, { "epoch": 0.9784873398947168, "grad_norm": 0.9739649891853333, "learning_rate": 3.635010737574995e-08, "loss": 0.0721, "step": 55252 }, { "epoch": 0.9785050494317452, "grad_norm": 0.4763326644897461, "learning_rate": 3.629027133321583e-08, "loss": 0.0675, "step": 55253 }, { "epoch": 0.9785227589687736, "grad_norm": 0.6375137567520142, "learning_rate": 3.6230484519774174e-08, "loss": 0.0556, "step": 55254 }, { "epoch": 0.9785404685058021, "grad_norm": 0.8236886262893677, "learning_rate": 3.61707469356215e-08, "loss": 0.056, "step": 55255 }, { "epoch": 0.9785581780428305, "grad_norm": 0.4597083628177643, "learning_rate": 3.611105858095265e-08, "loss": 0.0296, "step": 55256 }, { "epoch": 0.9785758875798589, "grad_norm": 0.43720850348472595, "learning_rate": 3.605141945596413e-08, "loss": 0.0423, "step": 55257 }, { "epoch": 0.9785935971168874, "grad_norm": 0.4292893409729004, "learning_rate": 3.5991829560852454e-08, "loss": 0.0633, "step": 55258 }, { "epoch": 0.9786113066539158, "grad_norm": 0.5428699254989624, "learning_rate": 3.593228889581579e-08, "loss": 0.0499, "step": 55259 }, { "epoch": 0.9786290161909442, "grad_norm": 0.5348628163337708, "learning_rate": 3.5872797461047323e-08, "loss": 0.069, "step": 55260 }, { "epoch": 0.9786467257279726, "grad_norm": 0.305854469537735, "learning_rate": 3.581335525674523e-08, "loss": 0.0354, "step": 55261 }, { "epoch": 0.9786644352650011, "grad_norm": 0.4640876054763794, "learning_rate": 3.575396228310268e-08, "loss": 0.0328, "step": 55262 }, { "epoch": 0.9786821448020295, "grad_norm": 0.64371657371521, "learning_rate": 3.569461854031453e-08, "loss": 0.0538, "step": 55263 }, { "epoch": 0.9786998543390579, "grad_norm": 0.6252237558364868, "learning_rate": 3.5635324028580604e-08, "loss": 0.0363, "step": 55264 }, { "epoch": 0.9787175638760863, "grad_norm": 0.7235230803489685, "learning_rate": 3.557607874809077e-08, "loss": 0.0552, "step": 55265 }, { "epoch": 0.9787352734131148, "grad_norm": 0.8171104788780212, "learning_rate": 3.5516882699043184e-08, "loss": 0.0631, "step": 55266 }, { "epoch": 0.9787529829501432, "grad_norm": 0.6947441697120667, "learning_rate": 3.545773588162937e-08, "loss": 0.0466, "step": 55267 }, { "epoch": 0.9787706924871716, "grad_norm": 0.6446865797042847, "learning_rate": 3.53986382960475e-08, "loss": 0.0446, "step": 55268 }, { "epoch": 0.9787884020242, "grad_norm": 1.0063362121582031, "learning_rate": 3.533958994249076e-08, "loss": 0.0625, "step": 55269 }, { "epoch": 0.9788061115612285, "grad_norm": 0.33814018964767456, "learning_rate": 3.528059082115398e-08, "loss": 0.0338, "step": 55270 }, { "epoch": 0.9788238210982569, "grad_norm": 0.8541043400764465, "learning_rate": 3.522164093222868e-08, "loss": 0.0562, "step": 55271 }, { "epoch": 0.9788415306352853, "grad_norm": 0.5106627345085144, "learning_rate": 3.516274027591138e-08, "loss": 0.0489, "step": 55272 }, { "epoch": 0.9788592401723138, "grad_norm": 0.2587485611438751, "learning_rate": 3.510388885239524e-08, "loss": 0.0285, "step": 55273 }, { "epoch": 0.9788769497093422, "grad_norm": 0.6672817468643188, "learning_rate": 3.5045086661873446e-08, "loss": 0.0365, "step": 55274 }, { "epoch": 0.9788946592463706, "grad_norm": 0.8220829367637634, "learning_rate": 3.498633370454085e-08, "loss": 0.0801, "step": 55275 }, { "epoch": 0.978912368783399, "grad_norm": 1.2732417583465576, "learning_rate": 3.492762998058896e-08, "loss": 0.0841, "step": 55276 }, { "epoch": 0.9789300783204276, "grad_norm": 0.452696293592453, "learning_rate": 3.486897549020929e-08, "loss": 0.0517, "step": 55277 }, { "epoch": 0.978947787857456, "grad_norm": 0.5782681107521057, "learning_rate": 3.481037023360001e-08, "loss": 0.0497, "step": 55278 }, { "epoch": 0.9789654973944844, "grad_norm": 0.7237594723701477, "learning_rate": 3.4751814210949306e-08, "loss": 0.0386, "step": 55279 }, { "epoch": 0.9789832069315128, "grad_norm": 0.46580013632774353, "learning_rate": 3.4693307422452025e-08, "loss": 0.0426, "step": 55280 }, { "epoch": 0.9790009164685413, "grad_norm": 0.4693000912666321, "learning_rate": 3.463484986830134e-08, "loss": 0.0501, "step": 55281 }, { "epoch": 0.9790186260055697, "grad_norm": 0.7317027449607849, "learning_rate": 3.457644154868711e-08, "loss": 0.0691, "step": 55282 }, { "epoch": 0.9790363355425981, "grad_norm": 0.5065625905990601, "learning_rate": 3.4518082463802504e-08, "loss": 0.0701, "step": 55283 }, { "epoch": 0.9790540450796265, "grad_norm": 0.7176231145858765, "learning_rate": 3.44597726138407e-08, "loss": 0.0733, "step": 55284 }, { "epoch": 0.979071754616655, "grad_norm": 0.6154808402061462, "learning_rate": 3.440151199899155e-08, "loss": 0.0643, "step": 55285 }, { "epoch": 0.9790894641536834, "grad_norm": 0.5226113796234131, "learning_rate": 3.434330061944824e-08, "loss": 0.0407, "step": 55286 }, { "epoch": 0.9791071736907118, "grad_norm": 0.4358307421207428, "learning_rate": 3.42851384754006e-08, "loss": 0.0466, "step": 55287 }, { "epoch": 0.9791248832277403, "grad_norm": 0.44543200731277466, "learning_rate": 3.422702556704182e-08, "loss": 0.0325, "step": 55288 }, { "epoch": 0.9791425927647687, "grad_norm": 0.33527857065200806, "learning_rate": 3.4168961894563424e-08, "loss": 0.0552, "step": 55289 }, { "epoch": 0.9791603023017971, "grad_norm": 0.5205538272857666, "learning_rate": 3.411094745815524e-08, "loss": 0.0717, "step": 55290 }, { "epoch": 0.9791780118388255, "grad_norm": 0.5878251194953918, "learning_rate": 3.4052982258007124e-08, "loss": 0.0562, "step": 55291 }, { "epoch": 0.979195721375854, "grad_norm": 0.3979378044605255, "learning_rate": 3.399506629431226e-08, "loss": 0.0337, "step": 55292 }, { "epoch": 0.9792134309128824, "grad_norm": 0.4740865230560303, "learning_rate": 3.393719956725883e-08, "loss": 0.0468, "step": 55293 }, { "epoch": 0.9792311404499108, "grad_norm": 0.1738668978214264, "learning_rate": 3.387938207704e-08, "loss": 0.049, "step": 55294 }, { "epoch": 0.9792488499869392, "grad_norm": 0.41072359681129456, "learning_rate": 3.38216138238423e-08, "loss": 0.0504, "step": 55295 }, { "epoch": 0.9792665595239677, "grad_norm": 0.506713330745697, "learning_rate": 3.376389480785891e-08, "loss": 0.0491, "step": 55296 }, { "epoch": 0.9792842690609961, "grad_norm": 1.0341159105300903, "learning_rate": 3.3706225029278005e-08, "loss": 0.0504, "step": 55297 }, { "epoch": 0.9793019785980245, "grad_norm": 0.8595702052116394, "learning_rate": 3.364860448828944e-08, "loss": 0.0542, "step": 55298 }, { "epoch": 0.9793196881350529, "grad_norm": 0.5524059534072876, "learning_rate": 3.359103318508472e-08, "loss": 0.0634, "step": 55299 }, { "epoch": 0.9793373976720814, "grad_norm": 0.5640700459480286, "learning_rate": 3.353351111985037e-08, "loss": 0.044, "step": 55300 }, { "epoch": 0.9793551072091098, "grad_norm": 0.5453413128852844, "learning_rate": 3.34760382927779e-08, "loss": 0.0442, "step": 55301 }, { "epoch": 0.9793728167461382, "grad_norm": 0.5297130346298218, "learning_rate": 3.341861470405549e-08, "loss": 0.0287, "step": 55302 }, { "epoch": 0.9793905262831667, "grad_norm": 0.6052441596984863, "learning_rate": 3.3361240353871334e-08, "loss": 0.0431, "step": 55303 }, { "epoch": 0.9794082358201951, "grad_norm": 0.30260586738586426, "learning_rate": 3.330391524241527e-08, "loss": 0.0501, "step": 55304 }, { "epoch": 0.9794259453572235, "grad_norm": 0.7166643738746643, "learning_rate": 3.3246639369875486e-08, "loss": 0.0465, "step": 55305 }, { "epoch": 0.9794436548942519, "grad_norm": 0.5538071990013123, "learning_rate": 3.318941273644016e-08, "loss": 0.0662, "step": 55306 }, { "epoch": 0.9794613644312804, "grad_norm": 0.8489021062850952, "learning_rate": 3.3132235342299144e-08, "loss": 0.0586, "step": 55307 }, { "epoch": 0.9794790739683088, "grad_norm": 0.416928768157959, "learning_rate": 3.307510718763729e-08, "loss": 0.0444, "step": 55308 }, { "epoch": 0.9794967835053372, "grad_norm": 0.47451457381248474, "learning_rate": 3.301802827264611e-08, "loss": 0.0446, "step": 55309 }, { "epoch": 0.9795144930423656, "grad_norm": 0.5211293697357178, "learning_rate": 3.296099859751045e-08, "loss": 0.046, "step": 55310 }, { "epoch": 0.9795322025793941, "grad_norm": 0.8951501250267029, "learning_rate": 3.290401816242184e-08, "loss": 0.0593, "step": 55311 }, { "epoch": 0.9795499121164225, "grad_norm": 0.5598911046981812, "learning_rate": 3.284708696756178e-08, "loss": 0.0413, "step": 55312 }, { "epoch": 0.9795676216534509, "grad_norm": 0.3730245530605316, "learning_rate": 3.2790205013123466e-08, "loss": 0.0495, "step": 55313 }, { "epoch": 0.9795853311904793, "grad_norm": 0.9251983165740967, "learning_rate": 3.273337229929008e-08, "loss": 0.0505, "step": 55314 }, { "epoch": 0.9796030407275078, "grad_norm": 0.7368046641349792, "learning_rate": 3.267658882625146e-08, "loss": 0.0471, "step": 55315 }, { "epoch": 0.9796207502645362, "grad_norm": 0.7181791663169861, "learning_rate": 3.261985459419081e-08, "loss": 0.0551, "step": 55316 }, { "epoch": 0.9796384598015646, "grad_norm": 0.8934387564659119, "learning_rate": 3.2563169603299636e-08, "loss": 0.0552, "step": 55317 }, { "epoch": 0.9796561693385931, "grad_norm": 0.4675983190536499, "learning_rate": 3.2506533853759455e-08, "loss": 0.0333, "step": 55318 }, { "epoch": 0.9796738788756215, "grad_norm": 0.4370546042919159, "learning_rate": 3.244994734576012e-08, "loss": 0.044, "step": 55319 }, { "epoch": 0.9796915884126499, "grad_norm": 0.6263466477394104, "learning_rate": 3.239341007948649e-08, "loss": 0.0378, "step": 55320 }, { "epoch": 0.9797092979496783, "grad_norm": 0.5446743965148926, "learning_rate": 3.2336922055125065e-08, "loss": 0.0472, "step": 55321 }, { "epoch": 0.9797270074867068, "grad_norm": 0.5092133283615112, "learning_rate": 3.2280483272860706e-08, "loss": 0.0714, "step": 55322 }, { "epoch": 0.9797447170237352, "grad_norm": 0.441344290971756, "learning_rate": 3.2224093732879936e-08, "loss": 0.0454, "step": 55323 }, { "epoch": 0.9797624265607636, "grad_norm": 0.8363280892372131, "learning_rate": 3.2167753435367594e-08, "loss": 0.0549, "step": 55324 }, { "epoch": 0.979780136097792, "grad_norm": 0.600343644618988, "learning_rate": 3.211146238051021e-08, "loss": 0.0697, "step": 55325 }, { "epoch": 0.9797978456348205, "grad_norm": 0.5137032270431519, "learning_rate": 3.2055220568490976e-08, "loss": 0.0322, "step": 55326 }, { "epoch": 0.9798155551718489, "grad_norm": 0.5773435831069946, "learning_rate": 3.199902799949639e-08, "loss": 0.0496, "step": 55327 }, { "epoch": 0.9798332647088773, "grad_norm": 0.40344375371932983, "learning_rate": 3.194288467371298e-08, "loss": 0.0676, "step": 55328 }, { "epoch": 0.9798509742459057, "grad_norm": 0.11784727871417999, "learning_rate": 3.18867905913206e-08, "loss": 0.0278, "step": 55329 }, { "epoch": 0.9798686837829342, "grad_norm": 0.4626430869102478, "learning_rate": 3.18307457525091e-08, "loss": 0.0454, "step": 55330 }, { "epoch": 0.9798863933199626, "grad_norm": 0.18650241196155548, "learning_rate": 3.177475015746001e-08, "loss": 0.0424, "step": 55331 }, { "epoch": 0.979904102856991, "grad_norm": 0.8322761058807373, "learning_rate": 3.17188038063565e-08, "loss": 0.0654, "step": 55332 }, { "epoch": 0.9799218123940195, "grad_norm": 0.7540187239646912, "learning_rate": 3.166290669938676e-08, "loss": 0.038, "step": 55333 }, { "epoch": 0.9799395219310479, "grad_norm": 0.501907467842102, "learning_rate": 3.160705883673065e-08, "loss": 0.0644, "step": 55334 }, { "epoch": 0.9799572314680763, "grad_norm": 0.82813560962677, "learning_rate": 3.155126021857302e-08, "loss": 0.0681, "step": 55335 }, { "epoch": 0.9799749410051047, "grad_norm": 0.6607086658477783, "learning_rate": 3.1495510845097056e-08, "loss": 0.0498, "step": 55336 }, { "epoch": 0.9799926505421332, "grad_norm": 0.30304399132728577, "learning_rate": 3.14398107164876e-08, "loss": 0.0376, "step": 55337 }, { "epoch": 0.9800103600791616, "grad_norm": 0.6200608015060425, "learning_rate": 3.138415983292786e-08, "loss": 0.0488, "step": 55338 }, { "epoch": 0.98002806961619, "grad_norm": 0.7006233930587769, "learning_rate": 3.132855819460101e-08, "loss": 0.0378, "step": 55339 }, { "epoch": 0.9800457791532184, "grad_norm": 1.0056684017181396, "learning_rate": 3.127300580168857e-08, "loss": 0.0775, "step": 55340 }, { "epoch": 0.980063488690247, "grad_norm": 0.4728216528892517, "learning_rate": 3.121750265437206e-08, "loss": 0.0455, "step": 55341 }, { "epoch": 0.9800811982272754, "grad_norm": 0.5560653805732727, "learning_rate": 3.116204875283801e-08, "loss": 0.0451, "step": 55342 }, { "epoch": 0.9800989077643038, "grad_norm": 0.6529059410095215, "learning_rate": 3.110664409726627e-08, "loss": 0.0744, "step": 55343 }, { "epoch": 0.9801166173013321, "grad_norm": 0.5444855690002441, "learning_rate": 3.105128868784002e-08, "loss": 0.0493, "step": 55344 }, { "epoch": 0.9801343268383607, "grad_norm": 0.5384231805801392, "learning_rate": 3.099598252473912e-08, "loss": 0.0533, "step": 55345 }, { "epoch": 0.9801520363753891, "grad_norm": 0.41538217663764954, "learning_rate": 3.094072560815009e-08, "loss": 0.0421, "step": 55346 }, { "epoch": 0.9801697459124175, "grad_norm": 0.896754264831543, "learning_rate": 3.088551793825112e-08, "loss": 0.0684, "step": 55347 }, { "epoch": 0.980187455449446, "grad_norm": 0.850591778755188, "learning_rate": 3.083035951522373e-08, "loss": 0.0493, "step": 55348 }, { "epoch": 0.9802051649864744, "grad_norm": 0.7269749045372009, "learning_rate": 3.0775250339251104e-08, "loss": 0.0673, "step": 55349 }, { "epoch": 0.9802228745235028, "grad_norm": 0.15785305202007294, "learning_rate": 3.072019041051477e-08, "loss": 0.0363, "step": 55350 }, { "epoch": 0.9802405840605312, "grad_norm": 0.4175727665424347, "learning_rate": 3.066517972919458e-08, "loss": 0.0374, "step": 55351 }, { "epoch": 0.9802582935975597, "grad_norm": 0.6480048298835754, "learning_rate": 3.0610218295470395e-08, "loss": 0.071, "step": 55352 }, { "epoch": 0.9802760031345881, "grad_norm": 0.809660792350769, "learning_rate": 3.055530610952539e-08, "loss": 0.0835, "step": 55353 }, { "epoch": 0.9802937126716165, "grad_norm": 0.6607531905174255, "learning_rate": 3.050044317153944e-08, "loss": 0.0475, "step": 55354 }, { "epoch": 0.9803114222086449, "grad_norm": 0.7852681875228882, "learning_rate": 3.044562948169405e-08, "loss": 0.051, "step": 55355 }, { "epoch": 0.9803291317456734, "grad_norm": 0.511978268623352, "learning_rate": 3.039086504016742e-08, "loss": 0.0512, "step": 55356 }, { "epoch": 0.9803468412827018, "grad_norm": 0.7888544797897339, "learning_rate": 3.033614984714106e-08, "loss": 0.0585, "step": 55357 }, { "epoch": 0.9803645508197302, "grad_norm": 0.28625571727752686, "learning_rate": 3.028148390279317e-08, "loss": 0.0652, "step": 55358 }, { "epoch": 0.9803822603567586, "grad_norm": 0.8048872351646423, "learning_rate": 3.0226867207306944e-08, "loss": 0.0776, "step": 55359 }, { "epoch": 0.9803999698937871, "grad_norm": 0.7928190231323242, "learning_rate": 3.017229976086055e-08, "loss": 0.0589, "step": 55360 }, { "epoch": 0.9804176794308155, "grad_norm": 0.4871630370616913, "learning_rate": 3.0117781563632206e-08, "loss": 0.0462, "step": 55361 }, { "epoch": 0.9804353889678439, "grad_norm": 0.5918567776679993, "learning_rate": 3.006331261580175e-08, "loss": 0.0506, "step": 55362 }, { "epoch": 0.9804530985048724, "grad_norm": 0.7053704857826233, "learning_rate": 3.000889291754905e-08, "loss": 0.0546, "step": 55363 }, { "epoch": 0.9804708080419008, "grad_norm": 0.996062695980072, "learning_rate": 2.9954522469053943e-08, "loss": 0.0613, "step": 55364 }, { "epoch": 0.9804885175789292, "grad_norm": 0.5042852163314819, "learning_rate": 2.9900201270492975e-08, "loss": 0.0489, "step": 55365 }, { "epoch": 0.9805062271159576, "grad_norm": 0.7306790947914124, "learning_rate": 2.984592932204766e-08, "loss": 0.0588, "step": 55366 }, { "epoch": 0.9805239366529861, "grad_norm": 0.49758094549179077, "learning_rate": 2.979170662389452e-08, "loss": 0.0523, "step": 55367 }, { "epoch": 0.9805416461900145, "grad_norm": 0.6006535887718201, "learning_rate": 2.973753317621175e-08, "loss": 0.0439, "step": 55368 }, { "epoch": 0.9805593557270429, "grad_norm": 0.5592117309570312, "learning_rate": 2.9683408979179205e-08, "loss": 0.0461, "step": 55369 }, { "epoch": 0.9805770652640713, "grad_norm": 0.6144980192184448, "learning_rate": 2.962933403297341e-08, "loss": 0.0492, "step": 55370 }, { "epoch": 0.9805947748010998, "grad_norm": 0.8455274105072021, "learning_rate": 2.957530833777422e-08, "loss": 0.0697, "step": 55371 }, { "epoch": 0.9806124843381282, "grad_norm": 0.8975721001625061, "learning_rate": 2.95213318937565e-08, "loss": 0.0601, "step": 55372 }, { "epoch": 0.9806301938751566, "grad_norm": 0.5072780251502991, "learning_rate": 2.9467404701100097e-08, "loss": 0.0491, "step": 55373 }, { "epoch": 0.980647903412185, "grad_norm": 0.7778937816619873, "learning_rate": 2.9413526759981547e-08, "loss": 0.0381, "step": 55374 }, { "epoch": 0.9806656129492135, "grad_norm": 0.6348828673362732, "learning_rate": 2.9359698070579033e-08, "loss": 0.073, "step": 55375 }, { "epoch": 0.9806833224862419, "grad_norm": 0.6715130805969238, "learning_rate": 2.9305918633069084e-08, "loss": 0.0635, "step": 55376 }, { "epoch": 0.9807010320232703, "grad_norm": 0.4603259563446045, "learning_rate": 2.9252188447628226e-08, "loss": 0.0524, "step": 55377 }, { "epoch": 0.9807187415602988, "grad_norm": 0.6159343123435974, "learning_rate": 2.919850751443298e-08, "loss": 0.0482, "step": 55378 }, { "epoch": 0.9807364510973272, "grad_norm": 0.35985177755355835, "learning_rate": 2.914487583365988e-08, "loss": 0.0437, "step": 55379 }, { "epoch": 0.9807541606343556, "grad_norm": 1.2746689319610596, "learning_rate": 2.9091293405487106e-08, "loss": 0.0694, "step": 55380 }, { "epoch": 0.980771870171384, "grad_norm": 0.5267320871353149, "learning_rate": 2.9037760230089527e-08, "loss": 0.0443, "step": 55381 }, { "epoch": 0.9807895797084125, "grad_norm": 0.7814785242080688, "learning_rate": 2.8984276307643665e-08, "loss": 0.0671, "step": 55382 }, { "epoch": 0.9808072892454409, "grad_norm": 0.8232007026672363, "learning_rate": 2.8930841638324378e-08, "loss": 0.0424, "step": 55383 }, { "epoch": 0.9808249987824693, "grad_norm": 0.4358956515789032, "learning_rate": 2.8877456222308195e-08, "loss": 0.0522, "step": 55384 }, { "epoch": 0.9808427083194977, "grad_norm": 0.7216913104057312, "learning_rate": 2.8824120059773308e-08, "loss": 0.0847, "step": 55385 }, { "epoch": 0.9808604178565262, "grad_norm": 0.6135263442993164, "learning_rate": 2.877083315089124e-08, "loss": 0.0704, "step": 55386 }, { "epoch": 0.9808781273935546, "grad_norm": 0.3187699019908905, "learning_rate": 2.8717595495838522e-08, "loss": 0.0409, "step": 55387 }, { "epoch": 0.980895836930583, "grad_norm": 0.6116864085197449, "learning_rate": 2.8664407094790013e-08, "loss": 0.0779, "step": 55388 }, { "epoch": 0.9809135464676114, "grad_norm": 0.8669129610061646, "learning_rate": 2.86112679479239e-08, "loss": 0.0617, "step": 55389 }, { "epoch": 0.9809312560046399, "grad_norm": 0.10967322438955307, "learning_rate": 2.8558178055410057e-08, "loss": 0.0571, "step": 55390 }, { "epoch": 0.9809489655416683, "grad_norm": 0.7001522183418274, "learning_rate": 2.8505137417426664e-08, "loss": 0.0626, "step": 55391 }, { "epoch": 0.9809666750786967, "grad_norm": 0.7337080240249634, "learning_rate": 2.8452146034146918e-08, "loss": 0.0897, "step": 55392 }, { "epoch": 0.9809843846157252, "grad_norm": 0.7701380252838135, "learning_rate": 2.839920390574402e-08, "loss": 0.0315, "step": 55393 }, { "epoch": 0.9810020941527536, "grad_norm": 0.3569599688053131, "learning_rate": 2.8346311032394488e-08, "loss": 0.0392, "step": 55394 }, { "epoch": 0.981019803689782, "grad_norm": 0.40554511547088623, "learning_rate": 2.8293467414271523e-08, "loss": 0.0612, "step": 55395 }, { "epoch": 0.9810375132268104, "grad_norm": 0.46502751111984253, "learning_rate": 2.824067305154998e-08, "loss": 0.0564, "step": 55396 }, { "epoch": 0.9810552227638389, "grad_norm": 0.8921391367912292, "learning_rate": 2.8187927944399726e-08, "loss": 0.0571, "step": 55397 }, { "epoch": 0.9810729323008673, "grad_norm": 0.4856698513031006, "learning_rate": 2.813523209299895e-08, "loss": 0.0591, "step": 55398 }, { "epoch": 0.9810906418378957, "grad_norm": 0.31253212690353394, "learning_rate": 2.808258549751752e-08, "loss": 0.0586, "step": 55399 }, { "epoch": 0.9811083513749241, "grad_norm": 0.8443610072135925, "learning_rate": 2.802998815813196e-08, "loss": 0.0511, "step": 55400 }, { "epoch": 0.9811260609119526, "grad_norm": 0.4635331928730011, "learning_rate": 2.7977440075010463e-08, "loss": 0.065, "step": 55401 }, { "epoch": 0.981143770448981, "grad_norm": 1.051921010017395, "learning_rate": 2.7924941248331225e-08, "loss": 0.0734, "step": 55402 }, { "epoch": 0.9811614799860094, "grad_norm": 0.4450821280479431, "learning_rate": 2.787249167826411e-08, "loss": 0.051, "step": 55403 }, { "epoch": 0.9811791895230378, "grad_norm": 0.3896760046482086, "learning_rate": 2.7820091364982314e-08, "loss": 0.0702, "step": 55404 }, { "epoch": 0.9811968990600664, "grad_norm": 0.4114904999732971, "learning_rate": 2.7767740308657364e-08, "loss": 0.057, "step": 55405 }, { "epoch": 0.9812146085970948, "grad_norm": 0.5248851180076599, "learning_rate": 2.7715438509462453e-08, "loss": 0.0577, "step": 55406 }, { "epoch": 0.9812323181341231, "grad_norm": 0.7984095215797424, "learning_rate": 2.7663185967569114e-08, "loss": 0.0373, "step": 55407 }, { "epoch": 0.9812500276711517, "grad_norm": 0.4461634159088135, "learning_rate": 2.7610982683150544e-08, "loss": 0.0637, "step": 55408 }, { "epoch": 0.9812677372081801, "grad_norm": 0.7041371464729309, "learning_rate": 2.75588286563766e-08, "loss": 0.0731, "step": 55409 }, { "epoch": 0.9812854467452085, "grad_norm": 0.4923570454120636, "learning_rate": 2.750672388742048e-08, "loss": 0.0528, "step": 55410 }, { "epoch": 0.9813031562822369, "grad_norm": 0.3575940430164337, "learning_rate": 2.745466837645205e-08, "loss": 0.0597, "step": 55411 }, { "epoch": 0.9813208658192654, "grad_norm": 0.44580885767936707, "learning_rate": 2.740266212364284e-08, "loss": 0.0489, "step": 55412 }, { "epoch": 0.9813385753562938, "grad_norm": 0.6237473487854004, "learning_rate": 2.7350705129166042e-08, "loss": 0.0636, "step": 55413 }, { "epoch": 0.9813562848933222, "grad_norm": 0.69925856590271, "learning_rate": 2.7298797393189855e-08, "loss": 0.0752, "step": 55414 }, { "epoch": 0.9813739944303506, "grad_norm": 0.8877773284912109, "learning_rate": 2.7246938915887477e-08, "loss": 0.0702, "step": 55415 }, { "epoch": 0.9813917039673791, "grad_norm": 0.5145431756973267, "learning_rate": 2.71951296974271e-08, "loss": 0.0371, "step": 55416 }, { "epoch": 0.9814094135044075, "grad_norm": 0.6880705952644348, "learning_rate": 2.7143369737981927e-08, "loss": 0.054, "step": 55417 }, { "epoch": 0.9814271230414359, "grad_norm": 0.4926607310771942, "learning_rate": 2.7091659037718485e-08, "loss": 0.0557, "step": 55418 }, { "epoch": 0.9814448325784643, "grad_norm": 0.5279960632324219, "learning_rate": 2.7039997596809973e-08, "loss": 0.0431, "step": 55419 }, { "epoch": 0.9814625421154928, "grad_norm": 0.48007461428642273, "learning_rate": 2.6988385415426254e-08, "loss": 0.0621, "step": 55420 }, { "epoch": 0.9814802516525212, "grad_norm": 0.6644617319107056, "learning_rate": 2.6936822493735526e-08, "loss": 0.0552, "step": 55421 }, { "epoch": 0.9814979611895496, "grad_norm": 0.5723509192466736, "learning_rate": 2.688530883190765e-08, "loss": 0.0702, "step": 55422 }, { "epoch": 0.9815156707265781, "grad_norm": 0.8001711964607239, "learning_rate": 2.68338444301125e-08, "loss": 0.0749, "step": 55423 }, { "epoch": 0.9815333802636065, "grad_norm": 0.7047444581985474, "learning_rate": 2.6782429288521593e-08, "loss": 0.0646, "step": 55424 }, { "epoch": 0.9815510898006349, "grad_norm": 0.5735244154930115, "learning_rate": 2.6731063407299806e-08, "loss": 0.059, "step": 55425 }, { "epoch": 0.9815687993376633, "grad_norm": 0.5730841755867004, "learning_rate": 2.667974678662033e-08, "loss": 0.0591, "step": 55426 }, { "epoch": 0.9815865088746918, "grad_norm": 0.26943397521972656, "learning_rate": 2.6628479426648033e-08, "loss": 0.0295, "step": 55427 }, { "epoch": 0.9816042184117202, "grad_norm": 0.5385921597480774, "learning_rate": 2.6577261327556112e-08, "loss": 0.0669, "step": 55428 }, { "epoch": 0.9816219279487486, "grad_norm": 0.5422024130821228, "learning_rate": 2.652609248950777e-08, "loss": 0.0469, "step": 55429 }, { "epoch": 0.981639637485777, "grad_norm": 0.5107825398445129, "learning_rate": 2.64749729126762e-08, "loss": 0.0584, "step": 55430 }, { "epoch": 0.9816573470228055, "grad_norm": 0.2869512736797333, "learning_rate": 2.642390259722627e-08, "loss": 0.0499, "step": 55431 }, { "epoch": 0.9816750565598339, "grad_norm": 0.16456235945224762, "learning_rate": 2.6372881543327844e-08, "loss": 0.0451, "step": 55432 }, { "epoch": 0.9816927660968623, "grad_norm": 0.2976897954940796, "learning_rate": 2.6321909751147454e-08, "loss": 0.0459, "step": 55433 }, { "epoch": 0.9817104756338907, "grad_norm": 0.7525925636291504, "learning_rate": 2.6270987220853303e-08, "loss": 0.0456, "step": 55434 }, { "epoch": 0.9817281851709192, "grad_norm": 0.6510891318321228, "learning_rate": 2.6220113952615256e-08, "loss": 0.0458, "step": 55435 }, { "epoch": 0.9817458947079476, "grad_norm": 0.04310772940516472, "learning_rate": 2.616928994659651e-08, "loss": 0.0601, "step": 55436 }, { "epoch": 0.981763604244976, "grad_norm": 0.7417455315589905, "learning_rate": 2.611851520296693e-08, "loss": 0.0626, "step": 55437 }, { "epoch": 0.9817813137820045, "grad_norm": 0.43242761492729187, "learning_rate": 2.6067789721891388e-08, "loss": 0.061, "step": 55438 }, { "epoch": 0.9817990233190329, "grad_norm": 0.4514688551425934, "learning_rate": 2.6017113503539746e-08, "loss": 0.0447, "step": 55439 }, { "epoch": 0.9818167328560613, "grad_norm": 0.5219502449035645, "learning_rate": 2.5966486548076872e-08, "loss": 0.0344, "step": 55440 }, { "epoch": 0.9818344423930897, "grad_norm": 0.6875864267349243, "learning_rate": 2.5915908855670965e-08, "loss": 0.0547, "step": 55441 }, { "epoch": 0.9818521519301182, "grad_norm": 0.54344242811203, "learning_rate": 2.5865380426486895e-08, "loss": 0.0361, "step": 55442 }, { "epoch": 0.9818698614671466, "grad_norm": 0.5407072305679321, "learning_rate": 2.5814901260689528e-08, "loss": 0.0481, "step": 55443 }, { "epoch": 0.981887571004175, "grad_norm": 0.7212746739387512, "learning_rate": 2.5764471358448726e-08, "loss": 0.0735, "step": 55444 }, { "epoch": 0.9819052805412034, "grad_norm": 0.6283810138702393, "learning_rate": 2.5714090719926032e-08, "loss": 0.0464, "step": 55445 }, { "epoch": 0.9819229900782319, "grad_norm": 0.6378257274627686, "learning_rate": 2.5663759345291303e-08, "loss": 0.0654, "step": 55446 }, { "epoch": 0.9819406996152603, "grad_norm": 0.48244163393974304, "learning_rate": 2.561347723470775e-08, "loss": 0.0283, "step": 55447 }, { "epoch": 0.9819584091522887, "grad_norm": 0.9182543754577637, "learning_rate": 2.5563244388340235e-08, "loss": 0.0619, "step": 55448 }, { "epoch": 0.9819761186893171, "grad_norm": 0.9253103137016296, "learning_rate": 2.5513060806355293e-08, "loss": 0.0631, "step": 55449 }, { "epoch": 0.9819938282263456, "grad_norm": 1.0719060897827148, "learning_rate": 2.5462926488919458e-08, "loss": 0.0645, "step": 55450 }, { "epoch": 0.982011537763374, "grad_norm": 0.605873703956604, "learning_rate": 2.5412841436194267e-08, "loss": 0.0673, "step": 55451 }, { "epoch": 0.9820292473004024, "grad_norm": 0.7201575636863708, "learning_rate": 2.5362805648346253e-08, "loss": 0.0491, "step": 55452 }, { "epoch": 0.9820469568374309, "grad_norm": 0.7671942114830017, "learning_rate": 2.5312819125541954e-08, "loss": 0.0666, "step": 55453 }, { "epoch": 0.9820646663744593, "grad_norm": 0.9840885996818542, "learning_rate": 2.5262881867941235e-08, "loss": 0.0503, "step": 55454 }, { "epoch": 0.9820823759114877, "grad_norm": 0.5621209740638733, "learning_rate": 2.5212993875712298e-08, "loss": 0.0717, "step": 55455 }, { "epoch": 0.9821000854485161, "grad_norm": 0.564570426940918, "learning_rate": 2.5163155149018346e-08, "loss": 0.049, "step": 55456 }, { "epoch": 0.9821177949855446, "grad_norm": 0.9735825657844543, "learning_rate": 2.5113365688024248e-08, "loss": 0.068, "step": 55457 }, { "epoch": 0.982135504522573, "grad_norm": 0.45461130142211914, "learning_rate": 2.5063625492889873e-08, "loss": 0.0412, "step": 55458 }, { "epoch": 0.9821532140596014, "grad_norm": 0.4593794345855713, "learning_rate": 2.501393456378176e-08, "loss": 0.0411, "step": 55459 }, { "epoch": 0.9821709235966298, "grad_norm": 0.3470030128955841, "learning_rate": 2.496429290086477e-08, "loss": 0.0415, "step": 55460 }, { "epoch": 0.9821886331336583, "grad_norm": 0.7757998704910278, "learning_rate": 2.4914700504300446e-08, "loss": 0.0545, "step": 55461 }, { "epoch": 0.9822063426706867, "grad_norm": 0.5661094188690186, "learning_rate": 2.486515737425199e-08, "loss": 0.0395, "step": 55462 }, { "epoch": 0.9822240522077151, "grad_norm": 0.8482633233070374, "learning_rate": 2.4815663510882602e-08, "loss": 0.05, "step": 55463 }, { "epoch": 0.9822417617447435, "grad_norm": 0.9044672846794128, "learning_rate": 2.476621891435382e-08, "loss": 0.0497, "step": 55464 }, { "epoch": 0.982259471281772, "grad_norm": 0.5841654539108276, "learning_rate": 2.4716823584830516e-08, "loss": 0.0532, "step": 55465 }, { "epoch": 0.9822771808188004, "grad_norm": 0.5208471417427063, "learning_rate": 2.4667477522474223e-08, "loss": 0.0343, "step": 55466 }, { "epoch": 0.9822948903558288, "grad_norm": 0.4243660271167755, "learning_rate": 2.4618180727446484e-08, "loss": 0.0374, "step": 55467 }, { "epoch": 0.9823125998928574, "grad_norm": 0.5047572255134583, "learning_rate": 2.4568933199910494e-08, "loss": 0.0322, "step": 55468 }, { "epoch": 0.9823303094298858, "grad_norm": 0.3289116621017456, "learning_rate": 2.4519734940027793e-08, "loss": 0.0455, "step": 55469 }, { "epoch": 0.9823480189669141, "grad_norm": 0.5784407258033752, "learning_rate": 2.447058594796159e-08, "loss": 0.044, "step": 55470 }, { "epoch": 0.9823657285039425, "grad_norm": 0.5235517621040344, "learning_rate": 2.4421486223871748e-08, "loss": 0.0641, "step": 55471 }, { "epoch": 0.9823834380409711, "grad_norm": 0.48737776279449463, "learning_rate": 2.4372435767921474e-08, "loss": 0.0635, "step": 55472 }, { "epoch": 0.9824011475779995, "grad_norm": 0.7311444282531738, "learning_rate": 2.4323434580270642e-08, "loss": 0.0544, "step": 55473 }, { "epoch": 0.9824188571150279, "grad_norm": 0.4480958878993988, "learning_rate": 2.4274482661080788e-08, "loss": 0.0734, "step": 55474 }, { "epoch": 0.9824365666520563, "grad_norm": 0.7208805680274963, "learning_rate": 2.422558001051345e-08, "loss": 0.056, "step": 55475 }, { "epoch": 0.9824542761890848, "grad_norm": 0.40174782276153564, "learning_rate": 2.4176726628728496e-08, "loss": 0.0414, "step": 55476 }, { "epoch": 0.9824719857261132, "grad_norm": 0.5405436158180237, "learning_rate": 2.412792251588747e-08, "loss": 0.0632, "step": 55477 }, { "epoch": 0.9824896952631416, "grad_norm": 0.6404121518135071, "learning_rate": 2.40791676721519e-08, "loss": 0.0455, "step": 55478 }, { "epoch": 0.98250740480017, "grad_norm": 0.3953624963760376, "learning_rate": 2.4030462097680007e-08, "loss": 0.0208, "step": 55479 }, { "epoch": 0.9825251143371985, "grad_norm": 0.6722357273101807, "learning_rate": 2.3981805792633315e-08, "loss": 0.0441, "step": 55480 }, { "epoch": 0.9825428238742269, "grad_norm": 0.4301299750804901, "learning_rate": 2.393319875717337e-08, "loss": 0.0508, "step": 55481 }, { "epoch": 0.9825605334112553, "grad_norm": 0.4857714772224426, "learning_rate": 2.3884640991456706e-08, "loss": 0.0461, "step": 55482 }, { "epoch": 0.9825782429482838, "grad_norm": 0.2961670458316803, "learning_rate": 2.3836132495644868e-08, "loss": 0.0414, "step": 55483 }, { "epoch": 0.9825959524853122, "grad_norm": 0.5714017152786255, "learning_rate": 2.3787673269897724e-08, "loss": 0.0607, "step": 55484 }, { "epoch": 0.9826136620223406, "grad_norm": 0.6022112965583801, "learning_rate": 2.373926331437515e-08, "loss": 0.0506, "step": 55485 }, { "epoch": 0.982631371559369, "grad_norm": 0.5490885376930237, "learning_rate": 2.3690902629235344e-08, "loss": 0.0572, "step": 55486 }, { "epoch": 0.9826490810963975, "grad_norm": 0.4179690480232239, "learning_rate": 2.364259121463652e-08, "loss": 0.0614, "step": 55487 }, { "epoch": 0.9826667906334259, "grad_norm": 0.509353756904602, "learning_rate": 2.3594329070740216e-08, "loss": 0.0489, "step": 55488 }, { "epoch": 0.9826845001704543, "grad_norm": 0.8540618419647217, "learning_rate": 2.354611619770297e-08, "loss": 0.0607, "step": 55489 }, { "epoch": 0.9827022097074827, "grad_norm": 0.5001215934753418, "learning_rate": 2.3497952595684656e-08, "loss": 0.0451, "step": 55490 }, { "epoch": 0.9827199192445112, "grad_norm": 0.538834273815155, "learning_rate": 2.3449838264841818e-08, "loss": 0.0488, "step": 55491 }, { "epoch": 0.9827376287815396, "grad_norm": 0.4277114272117615, "learning_rate": 2.3401773205335987e-08, "loss": 0.0242, "step": 55492 }, { "epoch": 0.982755338318568, "grad_norm": 0.6654377579689026, "learning_rate": 2.3353757417323707e-08, "loss": 0.0395, "step": 55493 }, { "epoch": 0.9827730478555964, "grad_norm": 0.675333559513092, "learning_rate": 2.330579090096152e-08, "loss": 0.0615, "step": 55494 }, { "epoch": 0.9827907573926249, "grad_norm": 0.82606440782547, "learning_rate": 2.3257873656409302e-08, "loss": 0.0571, "step": 55495 }, { "epoch": 0.9828084669296533, "grad_norm": 0.6071526408195496, "learning_rate": 2.3210005683823587e-08, "loss": 0.0357, "step": 55496 }, { "epoch": 0.9828261764666817, "grad_norm": 0.5801470875740051, "learning_rate": 2.3162186983362588e-08, "loss": 0.0244, "step": 55497 }, { "epoch": 0.9828438860037102, "grad_norm": 0.40009236335754395, "learning_rate": 2.3114417555182843e-08, "loss": 0.0283, "step": 55498 }, { "epoch": 0.9828615955407386, "grad_norm": 0.2114272117614746, "learning_rate": 2.306669739944256e-08, "loss": 0.0311, "step": 55499 }, { "epoch": 0.982879305077767, "grad_norm": 0.3197104036808014, "learning_rate": 2.3019026516296615e-08, "loss": 0.0577, "step": 55500 }, { "epoch": 0.9828970146147954, "grad_norm": 0.5537397861480713, "learning_rate": 2.2971404905903214e-08, "loss": 0.0391, "step": 55501 }, { "epoch": 0.9829147241518239, "grad_norm": 0.8739393353462219, "learning_rate": 2.2923832568420567e-08, "loss": 0.0688, "step": 55502 }, { "epoch": 0.9829324336888523, "grad_norm": 0.6765508651733398, "learning_rate": 2.2876309504003544e-08, "loss": 0.0747, "step": 55503 }, { "epoch": 0.9829501432258807, "grad_norm": 0.699401319026947, "learning_rate": 2.2828835712807027e-08, "loss": 0.0946, "step": 55504 }, { "epoch": 0.9829678527629091, "grad_norm": 0.6878949403762817, "learning_rate": 2.278141119498922e-08, "loss": 0.0408, "step": 55505 }, { "epoch": 0.9829855622999376, "grad_norm": 0.6428361535072327, "learning_rate": 2.2734035950706668e-08, "loss": 0.0478, "step": 55506 }, { "epoch": 0.983003271836966, "grad_norm": 0.7399658560752869, "learning_rate": 2.268670998011424e-08, "loss": 0.0392, "step": 55507 }, { "epoch": 0.9830209813739944, "grad_norm": 0.3813297748565674, "learning_rate": 2.2639433283366817e-08, "loss": 0.0399, "step": 55508 }, { "epoch": 0.9830386909110228, "grad_norm": 0.5373409986495972, "learning_rate": 2.259220586062094e-08, "loss": 0.045, "step": 55509 }, { "epoch": 0.9830564004480513, "grad_norm": 0.8047507405281067, "learning_rate": 2.254502771203315e-08, "loss": 0.0593, "step": 55510 }, { "epoch": 0.9830741099850797, "grad_norm": 0.6128607988357544, "learning_rate": 2.2497898837754993e-08, "loss": 0.0547, "step": 55511 }, { "epoch": 0.9830918195221081, "grad_norm": 0.5957903265953064, "learning_rate": 2.2450819237944676e-08, "loss": 0.0487, "step": 55512 }, { "epoch": 0.9831095290591366, "grad_norm": 0.4721018671989441, "learning_rate": 2.2403788912757074e-08, "loss": 0.0425, "step": 55513 }, { "epoch": 0.983127238596165, "grad_norm": 0.47932714223861694, "learning_rate": 2.2356807862347062e-08, "loss": 0.0275, "step": 55514 }, { "epoch": 0.9831449481331934, "grad_norm": 0.6601106524467468, "learning_rate": 2.2309876086866186e-08, "loss": 0.0666, "step": 55515 }, { "epoch": 0.9831626576702218, "grad_norm": 0.30709508061408997, "learning_rate": 2.226299358647266e-08, "loss": 0.0421, "step": 55516 }, { "epoch": 0.9831803672072503, "grad_norm": 0.7158224582672119, "learning_rate": 2.221616036131635e-08, "loss": 0.039, "step": 55517 }, { "epoch": 0.9831980767442787, "grad_norm": 0.4880772829055786, "learning_rate": 2.216937641155714e-08, "loss": 0.0463, "step": 55518 }, { "epoch": 0.9832157862813071, "grad_norm": 0.5149673223495483, "learning_rate": 2.212264173734324e-08, "loss": 0.0812, "step": 55519 }, { "epoch": 0.9832334958183355, "grad_norm": 0.402308851480484, "learning_rate": 2.207595633883286e-08, "loss": 0.0634, "step": 55520 }, { "epoch": 0.983251205355364, "grad_norm": 0.8273376822471619, "learning_rate": 2.2029320216175875e-08, "loss": 0.0381, "step": 55521 }, { "epoch": 0.9832689148923924, "grad_norm": 0.5362792015075684, "learning_rate": 2.198273336952883e-08, "loss": 0.0437, "step": 55522 }, { "epoch": 0.9832866244294208, "grad_norm": 0.42942917346954346, "learning_rate": 2.1936195799043268e-08, "loss": 0.0243, "step": 55523 }, { "epoch": 0.9833043339664492, "grad_norm": 0.7046492695808411, "learning_rate": 2.1889707504872402e-08, "loss": 0.044, "step": 55524 }, { "epoch": 0.9833220435034777, "grad_norm": 0.5057495832443237, "learning_rate": 2.1843268487169443e-08, "loss": 0.0529, "step": 55525 }, { "epoch": 0.9833397530405061, "grad_norm": 0.5647251605987549, "learning_rate": 2.17968787460876e-08, "loss": 0.0593, "step": 55526 }, { "epoch": 0.9833574625775345, "grad_norm": 0.6942359805107117, "learning_rate": 2.175053828177842e-08, "loss": 0.0581, "step": 55527 }, { "epoch": 0.983375172114563, "grad_norm": 0.7909529805183411, "learning_rate": 2.1704247094395115e-08, "loss": 0.0768, "step": 55528 }, { "epoch": 0.9833928816515914, "grad_norm": 0.47435861825942993, "learning_rate": 2.165800518409089e-08, "loss": 0.049, "step": 55529 }, { "epoch": 0.9834105911886198, "grad_norm": 0.6379308104515076, "learning_rate": 2.161181255101563e-08, "loss": 0.0364, "step": 55530 }, { "epoch": 0.9834283007256482, "grad_norm": 0.7968963384628296, "learning_rate": 2.1565669195324212e-08, "loss": 0.0659, "step": 55531 }, { "epoch": 0.9834460102626768, "grad_norm": 0.8103414177894592, "learning_rate": 2.1519575117164847e-08, "loss": 0.0604, "step": 55532 }, { "epoch": 0.9834637197997051, "grad_norm": 0.6404439806938171, "learning_rate": 2.1473530316692414e-08, "loss": 0.0612, "step": 55533 }, { "epoch": 0.9834814293367335, "grad_norm": 0.3767624795436859, "learning_rate": 2.1427534794056792e-08, "loss": 0.0389, "step": 55534 }, { "epoch": 0.983499138873762, "grad_norm": 0.4358609914779663, "learning_rate": 2.1381588549409525e-08, "loss": 0.074, "step": 55535 }, { "epoch": 0.9835168484107905, "grad_norm": 0.4826076030731201, "learning_rate": 2.1335691582900497e-08, "loss": 0.0549, "step": 55536 }, { "epoch": 0.9835345579478189, "grad_norm": 0.6213324069976807, "learning_rate": 2.1289843894682915e-08, "loss": 0.062, "step": 55537 }, { "epoch": 0.9835522674848473, "grad_norm": 0.41927894949913025, "learning_rate": 2.1244045484908326e-08, "loss": 0.0369, "step": 55538 }, { "epoch": 0.9835699770218757, "grad_norm": 0.6650677919387817, "learning_rate": 2.119829635372328e-08, "loss": 0.0628, "step": 55539 }, { "epoch": 0.9835876865589042, "grad_norm": 0.6846715807914734, "learning_rate": 2.115259650128265e-08, "loss": 0.0359, "step": 55540 }, { "epoch": 0.9836053960959326, "grad_norm": 0.6839513182640076, "learning_rate": 2.1106945927734654e-08, "loss": 0.0608, "step": 55541 }, { "epoch": 0.983623105632961, "grad_norm": 0.8589057326316833, "learning_rate": 2.1061344633229175e-08, "loss": 0.0687, "step": 55542 }, { "epoch": 0.9836408151699895, "grad_norm": 0.3672359883785248, "learning_rate": 2.1015792617917752e-08, "loss": 0.0642, "step": 55543 }, { "epoch": 0.9836585247070179, "grad_norm": 0.6561914086341858, "learning_rate": 2.09702898819486e-08, "loss": 0.0523, "step": 55544 }, { "epoch": 0.9836762342440463, "grad_norm": 0.31430989503860474, "learning_rate": 2.0924836425471605e-08, "loss": 0.0519, "step": 55545 }, { "epoch": 0.9836939437810747, "grad_norm": 0.74048912525177, "learning_rate": 2.087943224863831e-08, "loss": 0.0395, "step": 55546 }, { "epoch": 0.9837116533181032, "grad_norm": 0.3346073031425476, "learning_rate": 2.083407735159526e-08, "loss": 0.0434, "step": 55547 }, { "epoch": 0.9837293628551316, "grad_norm": 0.6939727067947388, "learning_rate": 2.0788771734494004e-08, "loss": 0.0602, "step": 55548 }, { "epoch": 0.98374707239216, "grad_norm": 0.5244675874710083, "learning_rate": 2.074351539748276e-08, "loss": 0.0653, "step": 55549 }, { "epoch": 0.9837647819291884, "grad_norm": 0.818735659122467, "learning_rate": 2.069830834070974e-08, "loss": 0.0597, "step": 55550 }, { "epoch": 0.9837824914662169, "grad_norm": 0.966139554977417, "learning_rate": 2.065315056432482e-08, "loss": 0.0581, "step": 55551 }, { "epoch": 0.9838002010032453, "grad_norm": 0.45107144117355347, "learning_rate": 2.0608042068474552e-08, "loss": 0.0672, "step": 55552 }, { "epoch": 0.9838179105402737, "grad_norm": 0.5590296387672424, "learning_rate": 2.0562982853310485e-08, "loss": 0.0287, "step": 55553 }, { "epoch": 0.9838356200773021, "grad_norm": 0.2640828788280487, "learning_rate": 2.05179729189775e-08, "loss": 0.044, "step": 55554 }, { "epoch": 0.9838533296143306, "grad_norm": 0.5540536046028137, "learning_rate": 2.047301226562548e-08, "loss": 0.0569, "step": 55555 }, { "epoch": 0.983871039151359, "grad_norm": 0.6412041187286377, "learning_rate": 2.04281008934043e-08, "loss": 0.0518, "step": 55556 }, { "epoch": 0.9838887486883874, "grad_norm": 0.39630115032196045, "learning_rate": 2.038323880245718e-08, "loss": 0.055, "step": 55557 }, { "epoch": 0.9839064582254159, "grad_norm": 0.4020909368991852, "learning_rate": 2.0338425992935672e-08, "loss": 0.026, "step": 55558 }, { "epoch": 0.9839241677624443, "grad_norm": 0.5503687262535095, "learning_rate": 2.0293662464984653e-08, "loss": 0.039, "step": 55559 }, { "epoch": 0.9839418772994727, "grad_norm": 0.4222947955131531, "learning_rate": 2.0248948218752338e-08, "loss": 0.0328, "step": 55560 }, { "epoch": 0.9839595868365011, "grad_norm": 0.6763361692428589, "learning_rate": 2.0204283254386944e-08, "loss": 0.0756, "step": 55561 }, { "epoch": 0.9839772963735296, "grad_norm": 0.38093966245651245, "learning_rate": 2.0159667572033357e-08, "loss": 0.0709, "step": 55562 }, { "epoch": 0.983995005910558, "grad_norm": 0.5747602581977844, "learning_rate": 2.011510117183979e-08, "loss": 0.0632, "step": 55563 }, { "epoch": 0.9840127154475864, "grad_norm": 0.6569401621818542, "learning_rate": 2.0070584053951123e-08, "loss": 0.0802, "step": 55564 }, { "epoch": 0.9840304249846148, "grad_norm": 0.5970139503479004, "learning_rate": 2.0026116218517243e-08, "loss": 0.0455, "step": 55565 }, { "epoch": 0.9840481345216433, "grad_norm": 0.6309560537338257, "learning_rate": 1.9981697665681363e-08, "loss": 0.0281, "step": 55566 }, { "epoch": 0.9840658440586717, "grad_norm": 0.7034043669700623, "learning_rate": 1.9937328395590036e-08, "loss": 0.0646, "step": 55567 }, { "epoch": 0.9840835535957001, "grad_norm": 0.5130185484886169, "learning_rate": 1.9893008408389813e-08, "loss": 0.0696, "step": 55568 }, { "epoch": 0.9841012631327285, "grad_norm": 0.3409600853919983, "learning_rate": 1.984873770422724e-08, "loss": 0.0568, "step": 55569 }, { "epoch": 0.984118972669757, "grad_norm": 0.5365053415298462, "learning_rate": 1.9804516283247198e-08, "loss": 0.0465, "step": 55570 }, { "epoch": 0.9841366822067854, "grad_norm": 0.4869025647640228, "learning_rate": 1.9760344145594578e-08, "loss": 0.0407, "step": 55571 }, { "epoch": 0.9841543917438138, "grad_norm": 0.2883959114551544, "learning_rate": 1.971622129141426e-08, "loss": 0.0383, "step": 55572 }, { "epoch": 0.9841721012808423, "grad_norm": 0.4294719994068146, "learning_rate": 1.9672147720854462e-08, "loss": 0.0854, "step": 55573 }, { "epoch": 0.9841898108178707, "grad_norm": 0.7391061782836914, "learning_rate": 1.962812343405673e-08, "loss": 0.0693, "step": 55574 }, { "epoch": 0.9842075203548991, "grad_norm": 0.4697010815143585, "learning_rate": 1.9584148431167626e-08, "loss": 0.042, "step": 55575 }, { "epoch": 0.9842252298919275, "grad_norm": 0.5964667201042175, "learning_rate": 1.9540222712332024e-08, "loss": 0.0488, "step": 55576 }, { "epoch": 0.984242939428956, "grad_norm": 0.34737226366996765, "learning_rate": 1.9496346277693143e-08, "loss": 0.0452, "step": 55577 }, { "epoch": 0.9842606489659844, "grad_norm": 0.6167478561401367, "learning_rate": 1.9452519127395874e-08, "loss": 0.0889, "step": 55578 }, { "epoch": 0.9842783585030128, "grad_norm": 0.991178035736084, "learning_rate": 1.940874126158676e-08, "loss": 0.0665, "step": 55579 }, { "epoch": 0.9842960680400412, "grad_norm": 0.6838277578353882, "learning_rate": 1.936501268040569e-08, "loss": 0.0511, "step": 55580 }, { "epoch": 0.9843137775770697, "grad_norm": 0.5990946888923645, "learning_rate": 1.932133338399922e-08, "loss": 0.0611, "step": 55581 }, { "epoch": 0.9843314871140981, "grad_norm": 0.8959499001502991, "learning_rate": 1.927770337250889e-08, "loss": 0.0595, "step": 55582 }, { "epoch": 0.9843491966511265, "grad_norm": 0.4487107992172241, "learning_rate": 1.9234122646081264e-08, "loss": 0.0603, "step": 55583 }, { "epoch": 0.9843669061881549, "grad_norm": 0.4706409275531769, "learning_rate": 1.9190591204857888e-08, "loss": 0.0377, "step": 55584 }, { "epoch": 0.9843846157251834, "grad_norm": 0.895858883857727, "learning_rate": 1.914710904898198e-08, "loss": 0.0538, "step": 55585 }, { "epoch": 0.9844023252622118, "grad_norm": 0.3449549973011017, "learning_rate": 1.9103676178596764e-08, "loss": 0.0517, "step": 55586 }, { "epoch": 0.9844200347992402, "grad_norm": 0.16973255574703217, "learning_rate": 1.9060292593845453e-08, "loss": 0.0323, "step": 55587 }, { "epoch": 0.9844377443362687, "grad_norm": 0.6413801908493042, "learning_rate": 1.9016958294871268e-08, "loss": 0.0614, "step": 55588 }, { "epoch": 0.9844554538732971, "grad_norm": 0.8259175419807434, "learning_rate": 1.8973673281815763e-08, "loss": 0.0801, "step": 55589 }, { "epoch": 0.9844731634103255, "grad_norm": 0.34094852209091187, "learning_rate": 1.893043755482049e-08, "loss": 0.0326, "step": 55590 }, { "epoch": 0.9844908729473539, "grad_norm": 0.8841454982757568, "learning_rate": 1.8887251114030334e-08, "loss": 0.0668, "step": 55591 }, { "epoch": 0.9845085824843824, "grad_norm": 0.5103493928909302, "learning_rate": 1.8844113959585186e-08, "loss": 0.0343, "step": 55592 }, { "epoch": 0.9845262920214108, "grad_norm": 0.540357768535614, "learning_rate": 1.8801026091628258e-08, "loss": 0.0396, "step": 55593 }, { "epoch": 0.9845440015584392, "grad_norm": 0.5181887149810791, "learning_rate": 1.8757987510299447e-08, "loss": 0.0454, "step": 55594 }, { "epoch": 0.9845617110954676, "grad_norm": 0.6725274324417114, "learning_rate": 1.8714998215741964e-08, "loss": 0.0715, "step": 55595 }, { "epoch": 0.9845794206324961, "grad_norm": 0.6911676526069641, "learning_rate": 1.8672058208097366e-08, "loss": 0.0571, "step": 55596 }, { "epoch": 0.9845971301695245, "grad_norm": 0.6938372850418091, "learning_rate": 1.862916748750554e-08, "loss": 0.0437, "step": 55597 }, { "epoch": 0.984614839706553, "grad_norm": 0.5085515975952148, "learning_rate": 1.8586326054109702e-08, "loss": 0.0467, "step": 55598 }, { "epoch": 0.9846325492435813, "grad_norm": 1.0329885482788086, "learning_rate": 1.8543533908049747e-08, "loss": 0.0725, "step": 55599 }, { "epoch": 0.9846502587806099, "grad_norm": 0.7982615828514099, "learning_rate": 1.850079104946556e-08, "loss": 0.0651, "step": 55600 }, { "epoch": 0.9846679683176383, "grad_norm": 0.9188159108161926, "learning_rate": 1.8458097478497028e-08, "loss": 0.075, "step": 55601 }, { "epoch": 0.9846856778546667, "grad_norm": 0.7493042349815369, "learning_rate": 1.8415453195289033e-08, "loss": 0.084, "step": 55602 }, { "epoch": 0.9847033873916952, "grad_norm": 0.3953940272331238, "learning_rate": 1.837285819997647e-08, "loss": 0.0558, "step": 55603 }, { "epoch": 0.9847210969287236, "grad_norm": 0.5275830030441284, "learning_rate": 1.833031249270256e-08, "loss": 0.0342, "step": 55604 }, { "epoch": 0.984738806465752, "grad_norm": 0.47639748454093933, "learning_rate": 1.828781607360719e-08, "loss": 0.0339, "step": 55605 }, { "epoch": 0.9847565160027804, "grad_norm": 0.7343656420707703, "learning_rate": 1.824536894282858e-08, "loss": 0.0697, "step": 55606 }, { "epoch": 0.9847742255398089, "grad_norm": 0.5420412421226501, "learning_rate": 1.8202971100508283e-08, "loss": 0.0376, "step": 55607 }, { "epoch": 0.9847919350768373, "grad_norm": 0.8152760863304138, "learning_rate": 1.816062254678452e-08, "loss": 0.0396, "step": 55608 }, { "epoch": 0.9848096446138657, "grad_norm": 0.760222315788269, "learning_rate": 1.8118323281798853e-08, "loss": 0.0703, "step": 55609 }, { "epoch": 0.9848273541508941, "grad_norm": 0.2340749055147171, "learning_rate": 1.807607330568617e-08, "loss": 0.0247, "step": 55610 }, { "epoch": 0.9848450636879226, "grad_norm": 0.6475274562835693, "learning_rate": 1.8033872618588023e-08, "loss": 0.0801, "step": 55611 }, { "epoch": 0.984862773224951, "grad_norm": 0.7210534811019897, "learning_rate": 1.7991721220644298e-08, "loss": 0.0275, "step": 55612 }, { "epoch": 0.9848804827619794, "grad_norm": 0.46764904260635376, "learning_rate": 1.7949619111991556e-08, "loss": 0.0445, "step": 55613 }, { "epoch": 0.9848981922990078, "grad_norm": 0.6734029054641724, "learning_rate": 1.7907566292769683e-08, "loss": 0.0528, "step": 55614 }, { "epoch": 0.9849159018360363, "grad_norm": 0.7154271602630615, "learning_rate": 1.786556276311524e-08, "loss": 0.0548, "step": 55615 }, { "epoch": 0.9849336113730647, "grad_norm": 0.5033795833587646, "learning_rate": 1.7823608523169776e-08, "loss": 0.033, "step": 55616 }, { "epoch": 0.9849513209100931, "grad_norm": 0.7787259221076965, "learning_rate": 1.7781703573068185e-08, "loss": 0.0464, "step": 55617 }, { "epoch": 0.9849690304471216, "grad_norm": 0.3075595200061798, "learning_rate": 1.7739847912950358e-08, "loss": 0.0487, "step": 55618 }, { "epoch": 0.98498673998415, "grad_norm": 0.4467565417289734, "learning_rate": 1.7698041542951184e-08, "loss": 0.04, "step": 55619 }, { "epoch": 0.9850044495211784, "grad_norm": 0.5504165887832642, "learning_rate": 1.7656284463212213e-08, "loss": 0.0683, "step": 55620 }, { "epoch": 0.9850221590582068, "grad_norm": 0.8318390250205994, "learning_rate": 1.761457667386668e-08, "loss": 0.0633, "step": 55621 }, { "epoch": 0.9850398685952353, "grad_norm": 0.5320653915405273, "learning_rate": 1.7572918175054464e-08, "loss": 0.0565, "step": 55622 }, { "epoch": 0.9850575781322637, "grad_norm": 0.847001850605011, "learning_rate": 1.7531308966913794e-08, "loss": 0.051, "step": 55623 }, { "epoch": 0.9850752876692921, "grad_norm": 0.6325038075447083, "learning_rate": 1.7489749049577896e-08, "loss": 0.057, "step": 55624 }, { "epoch": 0.9850929972063205, "grad_norm": 0.39326947927474976, "learning_rate": 1.744823842318499e-08, "loss": 0.0668, "step": 55625 }, { "epoch": 0.985110706743349, "grad_norm": 0.8504701256752014, "learning_rate": 1.7406777087873306e-08, "loss": 0.0644, "step": 55626 }, { "epoch": 0.9851284162803774, "grad_norm": 0.6578500270843506, "learning_rate": 1.7365365043777724e-08, "loss": 0.1035, "step": 55627 }, { "epoch": 0.9851461258174058, "grad_norm": 0.8602349162101746, "learning_rate": 1.7324002291034814e-08, "loss": 0.0577, "step": 55628 }, { "epoch": 0.9851638353544342, "grad_norm": 0.44848042726516724, "learning_rate": 1.7282688829779458e-08, "loss": 0.0461, "step": 55629 }, { "epoch": 0.9851815448914627, "grad_norm": 0.6188582181930542, "learning_rate": 1.7241424660148218e-08, "loss": 0.0463, "step": 55630 }, { "epoch": 0.9851992544284911, "grad_norm": 0.5131548047065735, "learning_rate": 1.720020978227932e-08, "loss": 0.0475, "step": 55631 }, { "epoch": 0.9852169639655195, "grad_norm": 0.6328814625740051, "learning_rate": 1.7159044196304318e-08, "loss": 0.0312, "step": 55632 }, { "epoch": 0.985234673502548, "grad_norm": 0.779593288898468, "learning_rate": 1.7117927902361442e-08, "loss": 0.0667, "step": 55633 }, { "epoch": 0.9852523830395764, "grad_norm": 0.43684378266334534, "learning_rate": 1.707686090058558e-08, "loss": 0.0438, "step": 55634 }, { "epoch": 0.9852700925766048, "grad_norm": 0.8299548029899597, "learning_rate": 1.703584319111162e-08, "loss": 0.0724, "step": 55635 }, { "epoch": 0.9852878021136332, "grad_norm": 0.5052317976951599, "learning_rate": 1.6994874774074466e-08, "loss": 0.0401, "step": 55636 }, { "epoch": 0.9853055116506617, "grad_norm": 0.9818083643913269, "learning_rate": 1.6953955649607335e-08, "loss": 0.0691, "step": 55637 }, { "epoch": 0.9853232211876901, "grad_norm": 0.40949660539627075, "learning_rate": 1.6913085817846784e-08, "loss": 0.0501, "step": 55638 }, { "epoch": 0.9853409307247185, "grad_norm": 0.6249476075172424, "learning_rate": 1.6872265278927714e-08, "loss": 0.0418, "step": 55639 }, { "epoch": 0.9853586402617469, "grad_norm": 0.7186302542686462, "learning_rate": 1.6831494032981675e-08, "loss": 0.0497, "step": 55640 }, { "epoch": 0.9853763497987754, "grad_norm": 0.5297726392745972, "learning_rate": 1.6790772080146897e-08, "loss": 0.0553, "step": 55641 }, { "epoch": 0.9853940593358038, "grad_norm": 0.6174829602241516, "learning_rate": 1.6750099420553277e-08, "loss": 0.0477, "step": 55642 }, { "epoch": 0.9854117688728322, "grad_norm": 0.6184360980987549, "learning_rate": 1.6709476054335705e-08, "loss": 0.034, "step": 55643 }, { "epoch": 0.9854294784098606, "grad_norm": 0.6835264563560486, "learning_rate": 1.6668901981630734e-08, "loss": 0.0322, "step": 55644 }, { "epoch": 0.9854471879468891, "grad_norm": 0.6098225712776184, "learning_rate": 1.6628377202568266e-08, "loss": 0.0489, "step": 55645 }, { "epoch": 0.9854648974839175, "grad_norm": 0.8102971911430359, "learning_rate": 1.6587901717283194e-08, "loss": 0.0323, "step": 55646 }, { "epoch": 0.9854826070209459, "grad_norm": 0.7941629886627197, "learning_rate": 1.6547475525907075e-08, "loss": 0.064, "step": 55647 }, { "epoch": 0.9855003165579744, "grad_norm": 0.5851717591285706, "learning_rate": 1.6507098628576468e-08, "loss": 0.0447, "step": 55648 }, { "epoch": 0.9855180260950028, "grad_norm": 0.8895248770713806, "learning_rate": 1.6466771025421266e-08, "loss": 0.0618, "step": 55649 }, { "epoch": 0.9855357356320312, "grad_norm": 0.8815199136734009, "learning_rate": 1.6426492716574705e-08, "loss": 0.0611, "step": 55650 }, { "epoch": 0.9855534451690596, "grad_norm": 0.2035965919494629, "learning_rate": 1.6386263702168336e-08, "loss": 0.0443, "step": 55651 }, { "epoch": 0.9855711547060881, "grad_norm": 0.4428490400314331, "learning_rate": 1.6346083982337057e-08, "loss": 0.0499, "step": 55652 }, { "epoch": 0.9855888642431165, "grad_norm": 0.7842006683349609, "learning_rate": 1.6305953557212427e-08, "loss": 0.0358, "step": 55653 }, { "epoch": 0.9856065737801449, "grad_norm": 0.560583233833313, "learning_rate": 1.6265872426924346e-08, "loss": 0.0554, "step": 55654 }, { "epoch": 0.9856242833171733, "grad_norm": 0.300068199634552, "learning_rate": 1.6225840591606034e-08, "loss": 0.039, "step": 55655 }, { "epoch": 0.9856419928542018, "grad_norm": 0.38603082299232483, "learning_rate": 1.6185858051390724e-08, "loss": 0.0668, "step": 55656 }, { "epoch": 0.9856597023912302, "grad_norm": 0.5870351195335388, "learning_rate": 1.6145924806406643e-08, "loss": 0.0318, "step": 55657 }, { "epoch": 0.9856774119282586, "grad_norm": 0.44056689739227295, "learning_rate": 1.610604085678702e-08, "loss": 0.0607, "step": 55658 }, { "epoch": 0.985695121465287, "grad_norm": 0.6275739669799805, "learning_rate": 1.6066206202663415e-08, "loss": 0.0423, "step": 55659 }, { "epoch": 0.9857128310023155, "grad_norm": 0.34522631764411926, "learning_rate": 1.6026420844167387e-08, "loss": 0.0266, "step": 55660 }, { "epoch": 0.985730540539344, "grad_norm": 0.6176301836967468, "learning_rate": 1.5986684781427173e-08, "loss": 0.0407, "step": 55661 }, { "epoch": 0.9857482500763723, "grad_norm": 0.4600253701210022, "learning_rate": 1.5946998014575998e-08, "loss": 0.0501, "step": 55662 }, { "epoch": 0.9857659596134009, "grad_norm": 0.3567117750644684, "learning_rate": 1.5907360543743755e-08, "loss": 0.0406, "step": 55663 }, { "epoch": 0.9857836691504293, "grad_norm": 0.6089068055152893, "learning_rate": 1.5867772369062006e-08, "loss": 0.0752, "step": 55664 }, { "epoch": 0.9858013786874577, "grad_norm": 0.5653368234634399, "learning_rate": 1.5828233490657317e-08, "loss": 0.0599, "step": 55665 }, { "epoch": 0.985819088224486, "grad_norm": 0.5251937508583069, "learning_rate": 1.5788743908664584e-08, "loss": 0.0399, "step": 55666 }, { "epoch": 0.9858367977615146, "grad_norm": 0.6656915545463562, "learning_rate": 1.5749303623210365e-08, "loss": 0.052, "step": 55667 }, { "epoch": 0.985854507298543, "grad_norm": 0.47619858384132385, "learning_rate": 1.5709912634424562e-08, "loss": 0.0438, "step": 55668 }, { "epoch": 0.9858722168355714, "grad_norm": 0.5908463001251221, "learning_rate": 1.5670570942437068e-08, "loss": 0.0564, "step": 55669 }, { "epoch": 0.9858899263725998, "grad_norm": 0.7094197869300842, "learning_rate": 1.5631278547379447e-08, "loss": 0.0422, "step": 55670 }, { "epoch": 0.9859076359096283, "grad_norm": 0.5452536940574646, "learning_rate": 1.5592035449378262e-08, "loss": 0.0709, "step": 55671 }, { "epoch": 0.9859253454466567, "grad_norm": 0.31473463773727417, "learning_rate": 1.5552841648565074e-08, "loss": 0.0492, "step": 55672 }, { "epoch": 0.9859430549836851, "grad_norm": 1.0051859617233276, "learning_rate": 1.551369714506645e-08, "loss": 0.0883, "step": 55673 }, { "epoch": 0.9859607645207135, "grad_norm": 0.7918427586555481, "learning_rate": 1.5474601939012288e-08, "loss": 0.0555, "step": 55674 }, { "epoch": 0.985978474057742, "grad_norm": 0.39636731147766113, "learning_rate": 1.5435556030530817e-08, "loss": 0.0555, "step": 55675 }, { "epoch": 0.9859961835947704, "grad_norm": 0.6544347405433655, "learning_rate": 1.5396559419751932e-08, "loss": 0.0639, "step": 55676 }, { "epoch": 0.9860138931317988, "grad_norm": 0.3512127697467804, "learning_rate": 1.5357612106800534e-08, "loss": 0.0538, "step": 55677 }, { "epoch": 0.9860316026688273, "grad_norm": 0.830108106136322, "learning_rate": 1.5318714091808185e-08, "loss": 0.0497, "step": 55678 }, { "epoch": 0.9860493122058557, "grad_norm": 0.6320138573646545, "learning_rate": 1.5279865374903114e-08, "loss": 0.0367, "step": 55679 }, { "epoch": 0.9860670217428841, "grad_norm": 0.3901319205760956, "learning_rate": 1.524106595621022e-08, "loss": 0.0503, "step": 55680 }, { "epoch": 0.9860847312799125, "grad_norm": 0.7092639207839966, "learning_rate": 1.5202315835857738e-08, "loss": 0.0619, "step": 55681 }, { "epoch": 0.986102440816941, "grad_norm": 0.6365274786949158, "learning_rate": 1.5163615013975563e-08, "loss": 0.0324, "step": 55682 }, { "epoch": 0.9861201503539694, "grad_norm": 0.4359127879142761, "learning_rate": 1.512496349068859e-08, "loss": 0.0398, "step": 55683 }, { "epoch": 0.9861378598909978, "grad_norm": 0.41570621728897095, "learning_rate": 1.508636126612506e-08, "loss": 0.0467, "step": 55684 }, { "epoch": 0.9861555694280262, "grad_norm": 0.8193755745887756, "learning_rate": 1.5047808340413193e-08, "loss": 0.031, "step": 55685 }, { "epoch": 0.9861732789650547, "grad_norm": 0.8878175616264343, "learning_rate": 1.5009304713676232e-08, "loss": 0.0753, "step": 55686 }, { "epoch": 0.9861909885020831, "grad_norm": 1.0242966413497925, "learning_rate": 1.4970850386044068e-08, "loss": 0.0574, "step": 55687 }, { "epoch": 0.9862086980391115, "grad_norm": 0.6272690892219543, "learning_rate": 1.49324453576416e-08, "loss": 0.0726, "step": 55688 }, { "epoch": 0.98622640757614, "grad_norm": 0.4906201958656311, "learning_rate": 1.4894089628597063e-08, "loss": 0.0617, "step": 55689 }, { "epoch": 0.9862441171131684, "grad_norm": 0.4547909200191498, "learning_rate": 1.485578319903369e-08, "loss": 0.0406, "step": 55690 }, { "epoch": 0.9862618266501968, "grad_norm": 0.518521785736084, "learning_rate": 1.4817526069079713e-08, "loss": 0.0466, "step": 55691 }, { "epoch": 0.9862795361872252, "grad_norm": 0.6961977481842041, "learning_rate": 1.4779318238860029e-08, "loss": 0.0556, "step": 55692 }, { "epoch": 0.9862972457242537, "grad_norm": 0.5960334539413452, "learning_rate": 1.4741159708501206e-08, "loss": 0.0438, "step": 55693 }, { "epoch": 0.9863149552612821, "grad_norm": 0.27682313323020935, "learning_rate": 1.4703050478129808e-08, "loss": 0.0436, "step": 55694 }, { "epoch": 0.9863326647983105, "grad_norm": 0.5900205373764038, "learning_rate": 1.4664990547867408e-08, "loss": 0.0393, "step": 55695 }, { "epoch": 0.9863503743353389, "grad_norm": 0.7013446688652039, "learning_rate": 1.462697991784223e-08, "loss": 0.0593, "step": 55696 }, { "epoch": 0.9863680838723674, "grad_norm": 0.6398376226425171, "learning_rate": 1.4589018588179182e-08, "loss": 0.0552, "step": 55697 }, { "epoch": 0.9863857934093958, "grad_norm": 0.6494256854057312, "learning_rate": 1.4551106559001493e-08, "loss": 0.0541, "step": 55698 }, { "epoch": 0.9864035029464242, "grad_norm": 0.5605993270874023, "learning_rate": 1.4513243830435729e-08, "loss": 0.0395, "step": 55699 }, { "epoch": 0.9864212124834526, "grad_norm": 0.4322059452533722, "learning_rate": 1.4475430402605128e-08, "loss": 0.043, "step": 55700 }, { "epoch": 0.9864389220204811, "grad_norm": 0.46857354044914246, "learning_rate": 1.4437666275636252e-08, "loss": 0.0471, "step": 55701 }, { "epoch": 0.9864566315575095, "grad_norm": 0.5459872484207153, "learning_rate": 1.4399951449650672e-08, "loss": 0.0575, "step": 55702 }, { "epoch": 0.9864743410945379, "grad_norm": 0.8965749144554138, "learning_rate": 1.4362285924773288e-08, "loss": 0.0753, "step": 55703 }, { "epoch": 0.9864920506315664, "grad_norm": 0.4025256335735321, "learning_rate": 1.4324669701127334e-08, "loss": 0.0384, "step": 55704 }, { "epoch": 0.9865097601685948, "grad_norm": 0.5643288493156433, "learning_rate": 1.4287102778839379e-08, "loss": 0.0676, "step": 55705 }, { "epoch": 0.9865274697056232, "grad_norm": 0.663030207157135, "learning_rate": 1.4249585158030987e-08, "loss": 0.069, "step": 55706 }, { "epoch": 0.9865451792426516, "grad_norm": 0.7555832266807556, "learning_rate": 1.421211683882373e-08, "loss": 0.0409, "step": 55707 }, { "epoch": 0.9865628887796801, "grad_norm": 0.6226457953453064, "learning_rate": 1.4174697821345838e-08, "loss": 0.058, "step": 55708 }, { "epoch": 0.9865805983167085, "grad_norm": 0.5930636525154114, "learning_rate": 1.4137328105715553e-08, "loss": 0.0521, "step": 55709 }, { "epoch": 0.9865983078537369, "grad_norm": 0.5726172924041748, "learning_rate": 1.410000769205777e-08, "loss": 0.0503, "step": 55710 }, { "epoch": 0.9866160173907653, "grad_norm": 1.0182439088821411, "learning_rate": 1.4062736580495728e-08, "loss": 0.0803, "step": 55711 }, { "epoch": 0.9866337269277938, "grad_norm": 0.9303700923919678, "learning_rate": 1.4025514771150993e-08, "loss": 0.058, "step": 55712 }, { "epoch": 0.9866514364648222, "grad_norm": 0.45533737540245056, "learning_rate": 1.3988342264146803e-08, "loss": 0.0379, "step": 55713 }, { "epoch": 0.9866691460018506, "grad_norm": 0.6203941106796265, "learning_rate": 1.395121905960639e-08, "loss": 0.0532, "step": 55714 }, { "epoch": 0.986686855538879, "grad_norm": 0.7669447064399719, "learning_rate": 1.3914145157649661e-08, "loss": 0.0566, "step": 55715 }, { "epoch": 0.9867045650759075, "grad_norm": 0.3323383629322052, "learning_rate": 1.3877120558398181e-08, "loss": 0.0447, "step": 55716 }, { "epoch": 0.9867222746129359, "grad_norm": 0.6739885210990906, "learning_rate": 1.3840145261976855e-08, "loss": 0.0501, "step": 55717 }, { "epoch": 0.9867399841499643, "grad_norm": 0.3181808888912201, "learning_rate": 1.3803219268505585e-08, "loss": 0.0555, "step": 55718 }, { "epoch": 0.9867576936869928, "grad_norm": 0.816117525100708, "learning_rate": 1.376634257810594e-08, "loss": 0.0591, "step": 55719 }, { "epoch": 0.9867754032240212, "grad_norm": 1.1104743480682373, "learning_rate": 1.3729515190899488e-08, "loss": 0.0836, "step": 55720 }, { "epoch": 0.9867931127610496, "grad_norm": 0.8327904343605042, "learning_rate": 1.3692737107007803e-08, "loss": 0.0679, "step": 55721 }, { "epoch": 0.986810822298078, "grad_norm": 0.6705148220062256, "learning_rate": 1.3656008326549119e-08, "loss": 0.0619, "step": 55722 }, { "epoch": 0.9868285318351065, "grad_norm": 0.6997935175895691, "learning_rate": 1.3619328849648338e-08, "loss": 0.061, "step": 55723 }, { "epoch": 0.986846241372135, "grad_norm": 0.5264473557472229, "learning_rate": 1.35826986764237e-08, "loss": 0.0643, "step": 55724 }, { "epoch": 0.9868639509091633, "grad_norm": 0.6088201999664307, "learning_rate": 1.3546117806996772e-08, "loss": 0.0596, "step": 55725 }, { "epoch": 0.9868816604461917, "grad_norm": 0.5939140319824219, "learning_rate": 1.3509586241485795e-08, "loss": 0.0416, "step": 55726 }, { "epoch": 0.9868993699832203, "grad_norm": 0.5236981511116028, "learning_rate": 1.3473103980012336e-08, "loss": 0.0426, "step": 55727 }, { "epoch": 0.9869170795202487, "grad_norm": 0.6110216379165649, "learning_rate": 1.3436671022697966e-08, "loss": 0.046, "step": 55728 }, { "epoch": 0.986934789057277, "grad_norm": 0.4856932759284973, "learning_rate": 1.3400287369660924e-08, "loss": 0.0533, "step": 55729 }, { "epoch": 0.9869524985943054, "grad_norm": 0.41982972621917725, "learning_rate": 1.3363953021021114e-08, "loss": 0.046, "step": 55730 }, { "epoch": 0.986970208131334, "grad_norm": 0.5668182969093323, "learning_rate": 1.3327667976898439e-08, "loss": 0.0551, "step": 55731 }, { "epoch": 0.9869879176683624, "grad_norm": 0.3270799219608307, "learning_rate": 1.329143223741114e-08, "loss": 0.0641, "step": 55732 }, { "epoch": 0.9870056272053908, "grad_norm": 0.5184877514839172, "learning_rate": 1.3255245802679116e-08, "loss": 0.0532, "step": 55733 }, { "epoch": 0.9870233367424193, "grad_norm": 0.5953505039215088, "learning_rate": 1.3219108672822277e-08, "loss": 0.0591, "step": 55734 }, { "epoch": 0.9870410462794477, "grad_norm": 1.195499062538147, "learning_rate": 1.3183020847957195e-08, "loss": 0.0831, "step": 55735 }, { "epoch": 0.9870587558164761, "grad_norm": 0.44248878955841064, "learning_rate": 1.3146982328205437e-08, "loss": 0.038, "step": 55736 }, { "epoch": 0.9870764653535045, "grad_norm": 0.3950614333152771, "learning_rate": 1.3110993113683578e-08, "loss": 0.0678, "step": 55737 }, { "epoch": 0.987094174890533, "grad_norm": 0.5819411277770996, "learning_rate": 1.3075053204511523e-08, "loss": 0.0348, "step": 55738 }, { "epoch": 0.9871118844275614, "grad_norm": 0.22843126952648163, "learning_rate": 1.3039162600805843e-08, "loss": 0.043, "step": 55739 }, { "epoch": 0.9871295939645898, "grad_norm": 0.5294094085693359, "learning_rate": 1.3003321302686443e-08, "loss": 0.0569, "step": 55740 }, { "epoch": 0.9871473035016182, "grad_norm": 0.5497263669967651, "learning_rate": 1.2967529310269899e-08, "loss": 0.0571, "step": 55741 }, { "epoch": 0.9871650130386467, "grad_norm": 0.4801214039325714, "learning_rate": 1.2931786623674446e-08, "loss": 0.0329, "step": 55742 }, { "epoch": 0.9871827225756751, "grad_norm": 0.2787350118160248, "learning_rate": 1.2896093243018325e-08, "loss": 0.026, "step": 55743 }, { "epoch": 0.9872004321127035, "grad_norm": 0.4859921634197235, "learning_rate": 1.2860449168416444e-08, "loss": 0.0632, "step": 55744 }, { "epoch": 0.9872181416497319, "grad_norm": 0.4624616503715515, "learning_rate": 1.2824854399988706e-08, "loss": 0.1096, "step": 55745 }, { "epoch": 0.9872358511867604, "grad_norm": 0.4579697847366333, "learning_rate": 1.2789308937851684e-08, "loss": 0.0746, "step": 55746 }, { "epoch": 0.9872535607237888, "grad_norm": 0.39962708950042725, "learning_rate": 1.2753812782121955e-08, "loss": 0.0541, "step": 55747 }, { "epoch": 0.9872712702608172, "grad_norm": 0.7882727384567261, "learning_rate": 1.2718365932916087e-08, "loss": 0.0539, "step": 55748 }, { "epoch": 0.9872889797978457, "grad_norm": 0.38609299063682556, "learning_rate": 1.2682968390350657e-08, "loss": 0.0399, "step": 55749 }, { "epoch": 0.9873066893348741, "grad_norm": 0.7031871676445007, "learning_rate": 1.2647620154542238e-08, "loss": 0.062, "step": 55750 }, { "epoch": 0.9873243988719025, "grad_norm": 0.6874836683273315, "learning_rate": 1.2612321225607405e-08, "loss": 0.0451, "step": 55751 }, { "epoch": 0.9873421084089309, "grad_norm": 0.43186765909194946, "learning_rate": 1.2577071603662726e-08, "loss": 0.0556, "step": 55752 }, { "epoch": 0.9873598179459594, "grad_norm": 0.39177048206329346, "learning_rate": 1.2541871288823115e-08, "loss": 0.0554, "step": 55753 }, { "epoch": 0.9873775274829878, "grad_norm": 0.465310662984848, "learning_rate": 1.2506720281203476e-08, "loss": 0.0403, "step": 55754 }, { "epoch": 0.9873952370200162, "grad_norm": 0.5014140605926514, "learning_rate": 1.247161858092205e-08, "loss": 0.0594, "step": 55755 }, { "epoch": 0.9874129465570446, "grad_norm": 0.5742853879928589, "learning_rate": 1.2436566188093746e-08, "loss": 0.0599, "step": 55756 }, { "epoch": 0.9874306560940731, "grad_norm": 0.558978259563446, "learning_rate": 1.2401563102833468e-08, "loss": 0.0546, "step": 55757 }, { "epoch": 0.9874483656311015, "grad_norm": 0.5984024405479431, "learning_rate": 1.2366609325256129e-08, "loss": 0.0514, "step": 55758 }, { "epoch": 0.9874660751681299, "grad_norm": 0.4025561511516571, "learning_rate": 1.2331704855474968e-08, "loss": 0.0453, "step": 55759 }, { "epoch": 0.9874837847051583, "grad_norm": 0.5868176221847534, "learning_rate": 1.2296849693608226e-08, "loss": 0.0524, "step": 55760 }, { "epoch": 0.9875014942421868, "grad_norm": 0.367423415184021, "learning_rate": 1.2262043839767479e-08, "loss": 0.0256, "step": 55761 }, { "epoch": 0.9875192037792152, "grad_norm": 0.9133152961730957, "learning_rate": 1.2227287294070966e-08, "loss": 0.0735, "step": 55762 }, { "epoch": 0.9875369133162436, "grad_norm": 0.6433402299880981, "learning_rate": 1.21925800566286e-08, "loss": 0.0654, "step": 55763 }, { "epoch": 0.9875546228532721, "grad_norm": 0.5348601937294006, "learning_rate": 1.2157922127556953e-08, "loss": 0.0687, "step": 55764 }, { "epoch": 0.9875723323903005, "grad_norm": 0.6227992177009583, "learning_rate": 1.2123313506970935e-08, "loss": 0.0376, "step": 55765 }, { "epoch": 0.9875900419273289, "grad_norm": 0.4692395329475403, "learning_rate": 1.2088754194982122e-08, "loss": 0.0381, "step": 55766 }, { "epoch": 0.9876077514643573, "grad_norm": 0.7468745708465576, "learning_rate": 1.2054244191707086e-08, "loss": 0.0446, "step": 55767 }, { "epoch": 0.9876254610013858, "grad_norm": 0.14848528802394867, "learning_rate": 1.2019783497255742e-08, "loss": 0.0442, "step": 55768 }, { "epoch": 0.9876431705384142, "grad_norm": 0.927139163017273, "learning_rate": 1.1985372111744663e-08, "loss": 0.0698, "step": 55769 }, { "epoch": 0.9876608800754426, "grad_norm": 0.5051469206809998, "learning_rate": 1.1951010035285426e-08, "loss": 0.0395, "step": 55770 }, { "epoch": 0.987678589612471, "grad_norm": 0.8272497653961182, "learning_rate": 1.1916697267991273e-08, "loss": 0.0674, "step": 55771 }, { "epoch": 0.9876962991494995, "grad_norm": 0.632625937461853, "learning_rate": 1.1882433809975445e-08, "loss": 0.0757, "step": 55772 }, { "epoch": 0.9877140086865279, "grad_norm": 0.4356434941291809, "learning_rate": 1.1848219661349524e-08, "loss": 0.0375, "step": 55773 }, { "epoch": 0.9877317182235563, "grad_norm": 0.4322008192539215, "learning_rate": 1.1814054822228414e-08, "loss": 0.035, "step": 55774 }, { "epoch": 0.9877494277605847, "grad_norm": 0.9670458436012268, "learning_rate": 1.177993929272203e-08, "loss": 0.0634, "step": 55775 }, { "epoch": 0.9877671372976132, "grad_norm": 0.3693578839302063, "learning_rate": 1.1745873072943614e-08, "loss": 0.0169, "step": 55776 }, { "epoch": 0.9877848468346416, "grad_norm": 0.36106348037719727, "learning_rate": 1.1711856163006407e-08, "loss": 0.0367, "step": 55777 }, { "epoch": 0.98780255637167, "grad_norm": 0.856449544429779, "learning_rate": 1.1677888563020323e-08, "loss": 0.0566, "step": 55778 }, { "epoch": 0.9878202659086985, "grad_norm": 0.746386706829071, "learning_rate": 1.1643970273098603e-08, "loss": 0.0681, "step": 55779 }, { "epoch": 0.9878379754457269, "grad_norm": 0.4801674485206604, "learning_rate": 1.161010129335116e-08, "loss": 0.0614, "step": 55780 }, { "epoch": 0.9878556849827553, "grad_norm": 0.6762864589691162, "learning_rate": 1.1576281623891239e-08, "loss": 0.0498, "step": 55781 }, { "epoch": 0.9878733945197837, "grad_norm": 0.7245732545852661, "learning_rate": 1.1542511264828748e-08, "loss": 0.0593, "step": 55782 }, { "epoch": 0.9878911040568122, "grad_norm": 0.5305841565132141, "learning_rate": 1.1508790216276933e-08, "loss": 0.0695, "step": 55783 }, { "epoch": 0.9879088135938406, "grad_norm": 0.6716864705085754, "learning_rate": 1.1475118478344038e-08, "loss": 0.0626, "step": 55784 }, { "epoch": 0.987926523130869, "grad_norm": 0.4483845829963684, "learning_rate": 1.1441496051141641e-08, "loss": 0.0532, "step": 55785 }, { "epoch": 0.9879442326678974, "grad_norm": 0.8265349268913269, "learning_rate": 1.1407922934781322e-08, "loss": 0.0624, "step": 55786 }, { "epoch": 0.987961942204926, "grad_norm": 0.4105913043022156, "learning_rate": 1.1374399129372991e-08, "loss": 0.0569, "step": 55787 }, { "epoch": 0.9879796517419543, "grad_norm": 0.49089309573173523, "learning_rate": 1.1340924635026562e-08, "loss": 0.0544, "step": 55788 }, { "epoch": 0.9879973612789827, "grad_norm": 0.497364342212677, "learning_rate": 1.1307499451853609e-08, "loss": 0.056, "step": 55789 }, { "epoch": 0.9880150708160111, "grad_norm": 0.41888973116874695, "learning_rate": 1.127412357996238e-08, "loss": 0.0486, "step": 55790 }, { "epoch": 0.9880327803530397, "grad_norm": 0.7247154116630554, "learning_rate": 1.124079701946279e-08, "loss": 0.0462, "step": 55791 }, { "epoch": 0.988050489890068, "grad_norm": 0.6070161461830139, "learning_rate": 1.1207519770464746e-08, "loss": 0.0468, "step": 55792 }, { "epoch": 0.9880681994270964, "grad_norm": 0.8240073323249817, "learning_rate": 1.1174291833078165e-08, "loss": 0.058, "step": 55793 }, { "epoch": 0.988085908964125, "grad_norm": 0.3264811635017395, "learning_rate": 1.114111320741129e-08, "loss": 0.0889, "step": 55794 }, { "epoch": 0.9881036185011534, "grad_norm": 0.7279253005981445, "learning_rate": 1.1107983893575701e-08, "loss": 0.0612, "step": 55795 }, { "epoch": 0.9881213280381818, "grad_norm": 0.4118216633796692, "learning_rate": 1.1074903891677978e-08, "loss": 0.0625, "step": 55796 }, { "epoch": 0.9881390375752102, "grad_norm": 0.7710176706314087, "learning_rate": 1.1041873201828034e-08, "loss": 0.0778, "step": 55797 }, { "epoch": 0.9881567471122387, "grad_norm": 0.6929205656051636, "learning_rate": 1.1008891824134116e-08, "loss": 0.0392, "step": 55798 }, { "epoch": 0.9881744566492671, "grad_norm": 0.7935324311256409, "learning_rate": 1.0975959758706134e-08, "loss": 0.0843, "step": 55799 }, { "epoch": 0.9881921661862955, "grad_norm": 0.8973535299301147, "learning_rate": 1.0943077005649006e-08, "loss": 0.0585, "step": 55800 }, { "epoch": 0.9882098757233239, "grad_norm": 0.4886888265609741, "learning_rate": 1.0910243565074307e-08, "loss": 0.043, "step": 55801 }, { "epoch": 0.9882275852603524, "grad_norm": 0.4720108211040497, "learning_rate": 1.0877459437088621e-08, "loss": 0.0732, "step": 55802 }, { "epoch": 0.9882452947973808, "grad_norm": 0.7513032555580139, "learning_rate": 1.0844724621800195e-08, "loss": 0.0399, "step": 55803 }, { "epoch": 0.9882630043344092, "grad_norm": 0.3606230318546295, "learning_rate": 1.0812039119317273e-08, "loss": 0.0537, "step": 55804 }, { "epoch": 0.9882807138714376, "grad_norm": 0.4122535288333893, "learning_rate": 1.077940292974644e-08, "loss": 0.0392, "step": 55805 }, { "epoch": 0.9882984234084661, "grad_norm": 0.4711274206638336, "learning_rate": 1.0746816053194276e-08, "loss": 0.0531, "step": 55806 }, { "epoch": 0.9883161329454945, "grad_norm": 0.8515456318855286, "learning_rate": 1.0714278489769025e-08, "loss": 0.0638, "step": 55807 }, { "epoch": 0.9883338424825229, "grad_norm": 0.8328676819801331, "learning_rate": 1.0681790239578937e-08, "loss": 0.0627, "step": 55808 }, { "epoch": 0.9883515520195514, "grad_norm": 0.8269935250282288, "learning_rate": 1.0649351302728926e-08, "loss": 0.0647, "step": 55809 }, { "epoch": 0.9883692615565798, "grad_norm": 0.676293134689331, "learning_rate": 1.0616961679325576e-08, "loss": 0.0623, "step": 55810 }, { "epoch": 0.9883869710936082, "grad_norm": 0.5593075752258301, "learning_rate": 1.0584621369477132e-08, "loss": 0.0399, "step": 55811 }, { "epoch": 0.9884046806306366, "grad_norm": 1.0840719938278198, "learning_rate": 1.0552330373290176e-08, "loss": 0.0522, "step": 55812 }, { "epoch": 0.9884223901676651, "grad_norm": 0.48388341069221497, "learning_rate": 1.0520088690869623e-08, "loss": 0.0422, "step": 55813 }, { "epoch": 0.9884400997046935, "grad_norm": 0.5453547239303589, "learning_rate": 1.048789632232039e-08, "loss": 0.0423, "step": 55814 }, { "epoch": 0.9884578092417219, "grad_norm": 0.3745473325252533, "learning_rate": 1.0455753267750723e-08, "loss": 0.0586, "step": 55815 }, { "epoch": 0.9884755187787503, "grad_norm": 0.19578589498996735, "learning_rate": 1.0423659527265539e-08, "loss": 0.0194, "step": 55816 }, { "epoch": 0.9884932283157788, "grad_norm": 0.5269662737846375, "learning_rate": 1.0391615100971419e-08, "loss": 0.0418, "step": 55817 }, { "epoch": 0.9885109378528072, "grad_norm": 0.6522767543792725, "learning_rate": 1.0359619988971614e-08, "loss": 0.0516, "step": 55818 }, { "epoch": 0.9885286473898356, "grad_norm": 0.4939640760421753, "learning_rate": 1.0327674191372705e-08, "loss": 0.0601, "step": 55819 }, { "epoch": 0.988546356926864, "grad_norm": 0.30312490463256836, "learning_rate": 1.0295777708279608e-08, "loss": 0.0247, "step": 55820 }, { "epoch": 0.9885640664638925, "grad_norm": 0.3892797827720642, "learning_rate": 1.0263930539797239e-08, "loss": 0.0313, "step": 55821 }, { "epoch": 0.9885817760009209, "grad_norm": 0.9050271511077881, "learning_rate": 1.0232132686032181e-08, "loss": 0.0566, "step": 55822 }, { "epoch": 0.9885994855379493, "grad_norm": 0.556134045124054, "learning_rate": 1.0200384147084352e-08, "loss": 0.0543, "step": 55823 }, { "epoch": 0.9886171950749778, "grad_norm": 1.103507161140442, "learning_rate": 1.0168684923063665e-08, "loss": 0.0353, "step": 55824 }, { "epoch": 0.9886349046120062, "grad_norm": 0.40236911177635193, "learning_rate": 1.0137035014070039e-08, "loss": 0.0856, "step": 55825 }, { "epoch": 0.9886526141490346, "grad_norm": 0.38045579195022583, "learning_rate": 1.0105434420210059e-08, "loss": 0.0404, "step": 55826 }, { "epoch": 0.988670323686063, "grad_norm": 0.5520045161247253, "learning_rate": 1.007388314158697e-08, "loss": 0.0517, "step": 55827 }, { "epoch": 0.9886880332230915, "grad_norm": 0.7569853067398071, "learning_rate": 1.0042381178305693e-08, "loss": 0.0606, "step": 55828 }, { "epoch": 0.9887057427601199, "grad_norm": 0.5536710023880005, "learning_rate": 1.0010928530467811e-08, "loss": 0.0603, "step": 55829 }, { "epoch": 0.9887234522971483, "grad_norm": 0.7095898389816284, "learning_rate": 9.97952519817824e-09, "loss": 0.0607, "step": 55830 }, { "epoch": 0.9887411618341767, "grad_norm": 0.43342646956443787, "learning_rate": 9.948171181538569e-09, "loss": 0.0365, "step": 55831 }, { "epoch": 0.9887588713712052, "grad_norm": 0.6417940855026245, "learning_rate": 9.916866480653707e-09, "loss": 0.0324, "step": 55832 }, { "epoch": 0.9887765809082336, "grad_norm": 0.3984829783439636, "learning_rate": 9.885611095626911e-09, "loss": 0.0824, "step": 55833 }, { "epoch": 0.988794290445262, "grad_norm": 0.6994772553443909, "learning_rate": 9.85440502656143e-09, "loss": 0.0634, "step": 55834 }, { "epoch": 0.9888119999822904, "grad_norm": 0.34102585911750793, "learning_rate": 9.823248273557184e-09, "loss": 0.0402, "step": 55835 }, { "epoch": 0.9888297095193189, "grad_norm": 0.6744744181632996, "learning_rate": 9.792140836719088e-09, "loss": 0.0526, "step": 55836 }, { "epoch": 0.9888474190563473, "grad_norm": 0.4019332528114319, "learning_rate": 9.761082716147062e-09, "loss": 0.0737, "step": 55837 }, { "epoch": 0.9888651285933757, "grad_norm": 0.4792421758174896, "learning_rate": 9.730073911946025e-09, "loss": 0.0576, "step": 55838 }, { "epoch": 0.9888828381304042, "grad_norm": 0.9453420639038086, "learning_rate": 9.699114424217558e-09, "loss": 0.0643, "step": 55839 }, { "epoch": 0.9889005476674326, "grad_norm": 0.7913751602172852, "learning_rate": 9.668204253061585e-09, "loss": 0.0537, "step": 55840 }, { "epoch": 0.988918257204461, "grad_norm": 0.6156312227249146, "learning_rate": 9.63734339858302e-09, "loss": 0.0603, "step": 55841 }, { "epoch": 0.9889359667414894, "grad_norm": 0.470974862575531, "learning_rate": 9.606531860878454e-09, "loss": 0.0645, "step": 55842 }, { "epoch": 0.9889536762785179, "grad_norm": 0.5501421689987183, "learning_rate": 9.575769640054466e-09, "loss": 0.0581, "step": 55843 }, { "epoch": 0.9889713858155463, "grad_norm": 0.4282935559749603, "learning_rate": 9.545056736209312e-09, "loss": 0.0415, "step": 55844 }, { "epoch": 0.9889890953525747, "grad_norm": 0.35693633556365967, "learning_rate": 9.514393149444578e-09, "loss": 0.0557, "step": 55845 }, { "epoch": 0.9890068048896031, "grad_norm": 0.39751729369163513, "learning_rate": 9.48377887986185e-09, "loss": 0.0508, "step": 55846 }, { "epoch": 0.9890245144266316, "grad_norm": 0.6652473211288452, "learning_rate": 9.453213927561043e-09, "loss": 0.0677, "step": 55847 }, { "epoch": 0.98904222396366, "grad_norm": 0.5900747776031494, "learning_rate": 9.422698292642085e-09, "loss": 0.0386, "step": 55848 }, { "epoch": 0.9890599335006884, "grad_norm": 0.48351195454597473, "learning_rate": 9.392231975208221e-09, "loss": 0.0401, "step": 55849 }, { "epoch": 0.9890776430377168, "grad_norm": 0.7535977959632874, "learning_rate": 9.361814975356042e-09, "loss": 0.0822, "step": 55850 }, { "epoch": 0.9890953525747453, "grad_norm": 0.835951030254364, "learning_rate": 9.331447293187135e-09, "loss": 0.0488, "step": 55851 }, { "epoch": 0.9891130621117737, "grad_norm": 0.9765211939811707, "learning_rate": 9.301128928803082e-09, "loss": 0.0638, "step": 55852 }, { "epoch": 0.9891307716488021, "grad_norm": 0.7049991488456726, "learning_rate": 9.270859882300475e-09, "loss": 0.0627, "step": 55853 }, { "epoch": 0.9891484811858307, "grad_norm": 0.5316997766494751, "learning_rate": 9.240640153780899e-09, "loss": 0.0513, "step": 55854 }, { "epoch": 0.989166190722859, "grad_norm": 0.3705279529094696, "learning_rate": 9.210469743344274e-09, "loss": 0.063, "step": 55855 }, { "epoch": 0.9891839002598874, "grad_norm": 0.45464977622032166, "learning_rate": 9.180348651088854e-09, "loss": 0.0591, "step": 55856 }, { "epoch": 0.9892016097969158, "grad_norm": 0.25786924362182617, "learning_rate": 9.150276877112895e-09, "loss": 0.0341, "step": 55857 }, { "epoch": 0.9892193193339444, "grad_norm": 0.7283669710159302, "learning_rate": 9.120254421516316e-09, "loss": 0.0622, "step": 55858 }, { "epoch": 0.9892370288709728, "grad_norm": 0.8270745277404785, "learning_rate": 9.090281284399037e-09, "loss": 0.0629, "step": 55859 }, { "epoch": 0.9892547384080012, "grad_norm": 0.47926005721092224, "learning_rate": 9.06035746585765e-09, "loss": 0.0449, "step": 55860 }, { "epoch": 0.9892724479450296, "grad_norm": 0.5281662940979004, "learning_rate": 9.030482965992071e-09, "loss": 0.0668, "step": 55861 }, { "epoch": 0.9892901574820581, "grad_norm": 0.5841387510299683, "learning_rate": 9.00065778489889e-09, "loss": 0.0404, "step": 55862 }, { "epoch": 0.9893078670190865, "grad_norm": 0.5641486048698425, "learning_rate": 8.970881922678032e-09, "loss": 0.0615, "step": 55863 }, { "epoch": 0.9893255765561149, "grad_norm": 0.9100887775421143, "learning_rate": 8.94115537942608e-09, "loss": 0.0683, "step": 55864 }, { "epoch": 0.9893432860931433, "grad_norm": 0.3934786915779114, "learning_rate": 8.911478155241293e-09, "loss": 0.0429, "step": 55865 }, { "epoch": 0.9893609956301718, "grad_norm": 0.4513666033744812, "learning_rate": 8.88185025022359e-09, "loss": 0.0508, "step": 55866 }, { "epoch": 0.9893787051672002, "grad_norm": 1.2002390623092651, "learning_rate": 8.852271664466228e-09, "loss": 0.0597, "step": 55867 }, { "epoch": 0.9893964147042286, "grad_norm": 0.5034666061401367, "learning_rate": 8.822742398069128e-09, "loss": 0.0601, "step": 55868 }, { "epoch": 0.9894141242412571, "grad_norm": 0.5005087852478027, "learning_rate": 8.793262451128881e-09, "loss": 0.0479, "step": 55869 }, { "epoch": 0.9894318337782855, "grad_norm": 0.8209421634674072, "learning_rate": 8.76383182374374e-09, "loss": 0.054, "step": 55870 }, { "epoch": 0.9894495433153139, "grad_norm": 0.8443045616149902, "learning_rate": 8.734450516006964e-09, "loss": 0.0697, "step": 55871 }, { "epoch": 0.9894672528523423, "grad_norm": 1.0111535787582397, "learning_rate": 8.70511852802014e-09, "loss": 0.0705, "step": 55872 }, { "epoch": 0.9894849623893708, "grad_norm": 0.9937906265258789, "learning_rate": 8.675835859874858e-09, "loss": 0.0665, "step": 55873 }, { "epoch": 0.9895026719263992, "grad_norm": 0.47648000717163086, "learning_rate": 8.646602511671042e-09, "loss": 0.071, "step": 55874 }, { "epoch": 0.9895203814634276, "grad_norm": 0.5091878771781921, "learning_rate": 8.617418483503615e-09, "loss": 0.0383, "step": 55875 }, { "epoch": 0.989538091000456, "grad_norm": 0.7367745041847229, "learning_rate": 8.588283775469163e-09, "loss": 0.0485, "step": 55876 }, { "epoch": 0.9895558005374845, "grad_norm": 0.8161070346832275, "learning_rate": 8.559198387662614e-09, "loss": 0.0759, "step": 55877 }, { "epoch": 0.9895735100745129, "grad_norm": 0.6755622029304504, "learning_rate": 8.530162320178892e-09, "loss": 0.0616, "step": 55878 }, { "epoch": 0.9895912196115413, "grad_norm": 0.4656756520271301, "learning_rate": 8.501175573116249e-09, "loss": 0.0573, "step": 55879 }, { "epoch": 0.9896089291485697, "grad_norm": 0.7436966300010681, "learning_rate": 8.472238146566281e-09, "loss": 0.057, "step": 55880 }, { "epoch": 0.9896266386855982, "grad_norm": 0.6890082955360413, "learning_rate": 8.443350040628905e-09, "loss": 0.0787, "step": 55881 }, { "epoch": 0.9896443482226266, "grad_norm": 0.47208738327026367, "learning_rate": 8.414511255395717e-09, "loss": 0.0201, "step": 55882 }, { "epoch": 0.989662057759655, "grad_norm": 0.48280519247055054, "learning_rate": 8.385721790961643e-09, "loss": 0.0494, "step": 55883 }, { "epoch": 0.9896797672966835, "grad_norm": 0.6824872493743896, "learning_rate": 8.356981647421602e-09, "loss": 0.0436, "step": 55884 }, { "epoch": 0.9896974768337119, "grad_norm": 0.5205600261688232, "learning_rate": 8.328290824872186e-09, "loss": 0.069, "step": 55885 }, { "epoch": 0.9897151863707403, "grad_norm": 0.6887112259864807, "learning_rate": 8.299649323406654e-09, "loss": 0.0636, "step": 55886 }, { "epoch": 0.9897328959077687, "grad_norm": 0.6898291110992432, "learning_rate": 8.271057143118265e-09, "loss": 0.0495, "step": 55887 }, { "epoch": 0.9897506054447972, "grad_norm": 0.5749167203903198, "learning_rate": 8.242514284101943e-09, "loss": 0.056, "step": 55888 }, { "epoch": 0.9897683149818256, "grad_norm": 0.6424967646598816, "learning_rate": 8.214020746450946e-09, "loss": 0.0399, "step": 55889 }, { "epoch": 0.989786024518854, "grad_norm": 0.33779001235961914, "learning_rate": 8.185576530260198e-09, "loss": 0.0376, "step": 55890 }, { "epoch": 0.9898037340558824, "grad_norm": 0.47809872031211853, "learning_rate": 8.157181635622956e-09, "loss": 0.0535, "step": 55891 }, { "epoch": 0.9898214435929109, "grad_norm": 1.5308760404586792, "learning_rate": 8.128836062630817e-09, "loss": 0.0808, "step": 55892 }, { "epoch": 0.9898391531299393, "grad_norm": 0.46137893199920654, "learning_rate": 8.10053981138037e-09, "loss": 0.0364, "step": 55893 }, { "epoch": 0.9898568626669677, "grad_norm": 0.8360546231269836, "learning_rate": 8.07229288196154e-09, "loss": 0.0512, "step": 55894 }, { "epoch": 0.9898745722039961, "grad_norm": 0.38717886805534363, "learning_rate": 8.04409527446759e-09, "loss": 0.0341, "step": 55895 }, { "epoch": 0.9898922817410246, "grad_norm": 0.43276339769363403, "learning_rate": 8.015946988993439e-09, "loss": 0.0349, "step": 55896 }, { "epoch": 0.989909991278053, "grad_norm": 0.5392817258834839, "learning_rate": 7.987848025630684e-09, "loss": 0.0501, "step": 55897 }, { "epoch": 0.9899277008150814, "grad_norm": 0.7475136518478394, "learning_rate": 7.959798384470918e-09, "loss": 0.0704, "step": 55898 }, { "epoch": 0.9899454103521099, "grad_norm": 0.44493556022644043, "learning_rate": 7.931798065607398e-09, "loss": 0.0262, "step": 55899 }, { "epoch": 0.9899631198891383, "grad_norm": 0.6471849083900452, "learning_rate": 7.90384706913172e-09, "loss": 0.0544, "step": 55900 }, { "epoch": 0.9899808294261667, "grad_norm": 0.7877731919288635, "learning_rate": 7.87594539513714e-09, "loss": 0.048, "step": 55901 }, { "epoch": 0.9899985389631951, "grad_norm": 0.5709165930747986, "learning_rate": 7.848093043713589e-09, "loss": 0.0433, "step": 55902 }, { "epoch": 0.9900162485002236, "grad_norm": 0.42919790744781494, "learning_rate": 7.820290014952657e-09, "loss": 0.0516, "step": 55903 }, { "epoch": 0.990033958037252, "grad_norm": 0.5398619174957275, "learning_rate": 7.792536308947606e-09, "loss": 0.0695, "step": 55904 }, { "epoch": 0.9900516675742804, "grad_norm": 0.5475980043411255, "learning_rate": 7.764831925788363e-09, "loss": 0.0599, "step": 55905 }, { "epoch": 0.9900693771113088, "grad_norm": 2.5570523738861084, "learning_rate": 7.73717686556652e-09, "loss": 0.0742, "step": 55906 }, { "epoch": 0.9900870866483373, "grad_norm": 0.6713703870773315, "learning_rate": 7.709571128372005e-09, "loss": 0.0441, "step": 55907 }, { "epoch": 0.9901047961853657, "grad_norm": 0.8444082736968994, "learning_rate": 7.68201471429808e-09, "loss": 0.062, "step": 55908 }, { "epoch": 0.9901225057223941, "grad_norm": 0.9504433274269104, "learning_rate": 7.654507623433004e-09, "loss": 0.1201, "step": 55909 }, { "epoch": 0.9901402152594225, "grad_norm": 0.5511475801467896, "learning_rate": 7.627049855868373e-09, "loss": 0.0549, "step": 55910 }, { "epoch": 0.990157924796451, "grad_norm": 1.2515259981155396, "learning_rate": 7.599641411694114e-09, "loss": 0.0945, "step": 55911 }, { "epoch": 0.9901756343334794, "grad_norm": 0.8671427965164185, "learning_rate": 7.572282291001819e-09, "loss": 0.0568, "step": 55912 }, { "epoch": 0.9901933438705078, "grad_norm": 0.8568766713142395, "learning_rate": 7.544972493879754e-09, "loss": 0.0632, "step": 55913 }, { "epoch": 0.9902110534075363, "grad_norm": 0.351029634475708, "learning_rate": 7.517712020417844e-09, "loss": 0.0663, "step": 55914 }, { "epoch": 0.9902287629445647, "grad_norm": 0.49816229939460754, "learning_rate": 7.490500870706018e-09, "loss": 0.0575, "step": 55915 }, { "epoch": 0.9902464724815931, "grad_norm": 0.4411080479621887, "learning_rate": 7.46333904483587e-09, "loss": 0.0391, "step": 55916 }, { "epoch": 0.9902641820186215, "grad_norm": 0.29185763001441956, "learning_rate": 7.436226542892333e-09, "loss": 0.0566, "step": 55917 }, { "epoch": 0.99028189155565, "grad_norm": 0.7559646964073181, "learning_rate": 7.409163364968663e-09, "loss": 0.0685, "step": 55918 }, { "epoch": 0.9902996010926784, "grad_norm": 0.6703712344169617, "learning_rate": 7.382149511153124e-09, "loss": 0.0716, "step": 55919 }, { "epoch": 0.9903173106297068, "grad_norm": 0.6310096383094788, "learning_rate": 7.355184981532315e-09, "loss": 0.0707, "step": 55920 }, { "epoch": 0.9903350201667352, "grad_norm": 0.8253975510597229, "learning_rate": 7.328269776197827e-09, "loss": 0.0611, "step": 55921 }, { "epoch": 0.9903527297037638, "grad_norm": 0.5720941424369812, "learning_rate": 7.301403895236259e-09, "loss": 0.0452, "step": 55922 }, { "epoch": 0.9903704392407922, "grad_norm": 0.32781168818473816, "learning_rate": 7.274587338737537e-09, "loss": 0.041, "step": 55923 }, { "epoch": 0.9903881487778206, "grad_norm": 0.3438457250595093, "learning_rate": 7.247820106788261e-09, "loss": 0.0554, "step": 55924 }, { "epoch": 0.990405858314849, "grad_norm": 1.0060358047485352, "learning_rate": 7.2211021994766926e-09, "loss": 0.0449, "step": 55925 }, { "epoch": 0.9904235678518775, "grad_norm": 0.7820664048194885, "learning_rate": 7.194433616891094e-09, "loss": 0.0343, "step": 55926 }, { "epoch": 0.9904412773889059, "grad_norm": 0.5207416415214539, "learning_rate": 7.167814359119729e-09, "loss": 0.0361, "step": 55927 }, { "epoch": 0.9904589869259343, "grad_norm": 0.7485432028770447, "learning_rate": 7.141244426250859e-09, "loss": 0.0625, "step": 55928 }, { "epoch": 0.9904766964629628, "grad_norm": 0.8231899738311768, "learning_rate": 7.1147238183710825e-09, "loss": 0.0553, "step": 55929 }, { "epoch": 0.9904944059999912, "grad_norm": 0.5302553772926331, "learning_rate": 7.088252535565332e-09, "loss": 0.0656, "step": 55930 }, { "epoch": 0.9905121155370196, "grad_norm": 0.4923699200153351, "learning_rate": 7.061830577923533e-09, "loss": 0.0349, "step": 55931 }, { "epoch": 0.990529825074048, "grad_norm": 0.7391575574874878, "learning_rate": 7.035457945533952e-09, "loss": 0.0478, "step": 55932 }, { "epoch": 0.9905475346110765, "grad_norm": 0.5846974849700928, "learning_rate": 7.009134638478187e-09, "loss": 0.0456, "step": 55933 }, { "epoch": 0.9905652441481049, "grad_norm": 0.5612425208091736, "learning_rate": 6.9828606568478335e-09, "loss": 0.0221, "step": 55934 }, { "epoch": 0.9905829536851333, "grad_norm": 0.6436780095100403, "learning_rate": 6.956636000727823e-09, "loss": 0.0752, "step": 55935 }, { "epoch": 0.9906006632221617, "grad_norm": 0.6758379340171814, "learning_rate": 6.930460670203087e-09, "loss": 0.0652, "step": 55936 }, { "epoch": 0.9906183727591902, "grad_norm": 0.48353680968284607, "learning_rate": 6.904334665360224e-09, "loss": 0.0473, "step": 55937 }, { "epoch": 0.9906360822962186, "grad_norm": 0.43485888838768005, "learning_rate": 6.878257986285829e-09, "loss": 0.0437, "step": 55938 }, { "epoch": 0.990653791833247, "grad_norm": 0.24168966710567474, "learning_rate": 6.852230633066503e-09, "loss": 0.0394, "step": 55939 }, { "epoch": 0.9906715013702754, "grad_norm": 0.48069387674331665, "learning_rate": 6.82625260578551e-09, "loss": 0.0603, "step": 55940 }, { "epoch": 0.9906892109073039, "grad_norm": 0.7518093585968018, "learning_rate": 6.800323904531114e-09, "loss": 0.0481, "step": 55941 }, { "epoch": 0.9907069204443323, "grad_norm": 0.330672949552536, "learning_rate": 6.774444529384916e-09, "loss": 0.0463, "step": 55942 }, { "epoch": 0.9907246299813607, "grad_norm": 0.36290794610977173, "learning_rate": 6.748614480436844e-09, "loss": 0.068, "step": 55943 }, { "epoch": 0.9907423395183892, "grad_norm": 0.5900275111198425, "learning_rate": 6.722833757768498e-09, "loss": 0.0626, "step": 55944 }, { "epoch": 0.9907600490554176, "grad_norm": 0.27740228176116943, "learning_rate": 6.697102361464813e-09, "loss": 0.0324, "step": 55945 }, { "epoch": 0.990777758592446, "grad_norm": 0.6019006371498108, "learning_rate": 6.671420291612384e-09, "loss": 0.0539, "step": 55946 }, { "epoch": 0.9907954681294744, "grad_norm": 0.696567177772522, "learning_rate": 6.645787548292815e-09, "loss": 0.0582, "step": 55947 }, { "epoch": 0.9908131776665029, "grad_norm": 0.6107199788093567, "learning_rate": 6.620204131594365e-09, "loss": 0.0422, "step": 55948 }, { "epoch": 0.9908308872035313, "grad_norm": 0.7541971802711487, "learning_rate": 6.594670041596973e-09, "loss": 0.0423, "step": 55949 }, { "epoch": 0.9908485967405597, "grad_norm": 0.760262668132782, "learning_rate": 6.569185278387235e-09, "loss": 0.0601, "step": 55950 }, { "epoch": 0.9908663062775881, "grad_norm": 0.20368792116641998, "learning_rate": 6.543749842048419e-09, "loss": 0.0517, "step": 55951 }, { "epoch": 0.9908840158146166, "grad_norm": 0.7203751802444458, "learning_rate": 6.518363732665455e-09, "loss": 0.0783, "step": 55952 }, { "epoch": 0.990901725351645, "grad_norm": 0.9070571064949036, "learning_rate": 6.493026950318282e-09, "loss": 0.0504, "step": 55953 }, { "epoch": 0.9909194348886734, "grad_norm": 0.35695457458496094, "learning_rate": 6.467739495093494e-09, "loss": 0.0325, "step": 55954 }, { "epoch": 0.9909371444257018, "grad_norm": 0.48411375284194946, "learning_rate": 6.44250136707436e-09, "loss": 0.0574, "step": 55955 }, { "epoch": 0.9909548539627303, "grad_norm": 0.4960483908653259, "learning_rate": 6.417312566342482e-09, "loss": 0.0682, "step": 55956 }, { "epoch": 0.9909725634997587, "grad_norm": 0.35199543833732605, "learning_rate": 6.3921730929794584e-09, "loss": 0.0415, "step": 55957 }, { "epoch": 0.9909902730367871, "grad_norm": 0.5761773586273193, "learning_rate": 6.36708294707189e-09, "loss": 0.0721, "step": 55958 }, { "epoch": 0.9910079825738156, "grad_norm": 0.7046918272972107, "learning_rate": 6.3420421286980445e-09, "loss": 0.0555, "step": 55959 }, { "epoch": 0.991025692110844, "grad_norm": 0.5704370141029358, "learning_rate": 6.317050637944522e-09, "loss": 0.042, "step": 55960 }, { "epoch": 0.9910434016478724, "grad_norm": 0.7349465489387512, "learning_rate": 6.292108474889591e-09, "loss": 0.0706, "step": 55961 }, { "epoch": 0.9910611111849008, "grad_norm": 0.3134431540966034, "learning_rate": 6.267215639618184e-09, "loss": 0.0416, "step": 55962 }, { "epoch": 0.9910788207219293, "grad_norm": 0.6029155850410461, "learning_rate": 6.242372132210239e-09, "loss": 0.0394, "step": 55963 }, { "epoch": 0.9910965302589577, "grad_norm": 0.45055902004241943, "learning_rate": 6.217577952749021e-09, "loss": 0.0486, "step": 55964 }, { "epoch": 0.9911142397959861, "grad_norm": 0.6569663882255554, "learning_rate": 6.192833101316131e-09, "loss": 0.0484, "step": 55965 }, { "epoch": 0.9911319493330145, "grad_norm": 0.31731438636779785, "learning_rate": 6.168137577991506e-09, "loss": 0.0449, "step": 55966 }, { "epoch": 0.991149658870043, "grad_norm": 0.7474635243415833, "learning_rate": 6.1434913828567474e-09, "loss": 0.0521, "step": 55967 }, { "epoch": 0.9911673684070714, "grad_norm": 0.4676962196826935, "learning_rate": 6.1188945159934564e-09, "loss": 0.0443, "step": 55968 }, { "epoch": 0.9911850779440998, "grad_norm": 0.41710108518600464, "learning_rate": 6.094346977483234e-09, "loss": 0.0699, "step": 55969 }, { "epoch": 0.9912027874811282, "grad_norm": 0.58547043800354, "learning_rate": 6.069848767406016e-09, "loss": 0.0406, "step": 55970 }, { "epoch": 0.9912204970181567, "grad_norm": 0.6008710265159607, "learning_rate": 6.045399885841741e-09, "loss": 0.0457, "step": 55971 }, { "epoch": 0.9912382065551851, "grad_norm": 0.23167432844638824, "learning_rate": 6.021000332872007e-09, "loss": 0.0557, "step": 55972 }, { "epoch": 0.9912559160922135, "grad_norm": 0.5191537737846375, "learning_rate": 5.996650108576751e-09, "loss": 0.0426, "step": 55973 }, { "epoch": 0.991273625629242, "grad_norm": 0.3886866867542267, "learning_rate": 5.972349213034245e-09, "loss": 0.0525, "step": 55974 }, { "epoch": 0.9912913351662704, "grad_norm": 0.6065993905067444, "learning_rate": 5.948097646327755e-09, "loss": 0.0601, "step": 55975 }, { "epoch": 0.9913090447032988, "grad_norm": 0.4078840911388397, "learning_rate": 5.923895408535551e-09, "loss": 0.0614, "step": 55976 }, { "epoch": 0.9913267542403272, "grad_norm": 0.6066601872444153, "learning_rate": 5.89974249973757e-09, "loss": 0.033, "step": 55977 }, { "epoch": 0.9913444637773557, "grad_norm": 0.31606245040893555, "learning_rate": 5.875638920012083e-09, "loss": 0.0402, "step": 55978 }, { "epoch": 0.9913621733143841, "grad_norm": 0.7345893979072571, "learning_rate": 5.851584669439025e-09, "loss": 0.0521, "step": 55979 }, { "epoch": 0.9913798828514125, "grad_norm": 1.0062297582626343, "learning_rate": 5.827579748098333e-09, "loss": 0.0796, "step": 55980 }, { "epoch": 0.9913975923884409, "grad_norm": 0.6505994200706482, "learning_rate": 5.803624156068277e-09, "loss": 0.049, "step": 55981 }, { "epoch": 0.9914153019254694, "grad_norm": 0.46690434217453003, "learning_rate": 5.7797178934271274e-09, "loss": 0.0385, "step": 55982 }, { "epoch": 0.9914330114624978, "grad_norm": 0.6212347745895386, "learning_rate": 5.755860960254822e-09, "loss": 0.0383, "step": 55983 }, { "epoch": 0.9914507209995262, "grad_norm": 0.2973712682723999, "learning_rate": 5.732053356627964e-09, "loss": 0.0434, "step": 55984 }, { "epoch": 0.9914684305365546, "grad_norm": 0.7832808494567871, "learning_rate": 5.708295082626491e-09, "loss": 0.0429, "step": 55985 }, { "epoch": 0.9914861400735832, "grad_norm": 0.3060525357723236, "learning_rate": 5.684586138328673e-09, "loss": 0.0745, "step": 55986 }, { "epoch": 0.9915038496106116, "grad_norm": 0.48598477244377136, "learning_rate": 5.660926523811116e-09, "loss": 0.0559, "step": 55987 }, { "epoch": 0.99152155914764, "grad_norm": 0.951300859451294, "learning_rate": 5.637316239153756e-09, "loss": 0.0874, "step": 55988 }, { "epoch": 0.9915392686846685, "grad_norm": 0.850066065788269, "learning_rate": 5.6137552844331975e-09, "loss": 0.085, "step": 55989 }, { "epoch": 0.9915569782216969, "grad_norm": 0.39629945158958435, "learning_rate": 5.5902436597260466e-09, "loss": 0.0628, "step": 55990 }, { "epoch": 0.9915746877587253, "grad_norm": 0.6281546354293823, "learning_rate": 5.566781365110574e-09, "loss": 0.0582, "step": 55991 }, { "epoch": 0.9915923972957537, "grad_norm": 0.3442215919494629, "learning_rate": 5.543368400663384e-09, "loss": 0.0452, "step": 55992 }, { "epoch": 0.9916101068327822, "grad_norm": 0.7319508790969849, "learning_rate": 5.520004766462749e-09, "loss": 0.0522, "step": 55993 }, { "epoch": 0.9916278163698106, "grad_norm": 0.5214645266532898, "learning_rate": 5.496690462583609e-09, "loss": 0.0533, "step": 55994 }, { "epoch": 0.991645525906839, "grad_norm": 0.6404017806053162, "learning_rate": 5.4734254891042335e-09, "loss": 0.04, "step": 55995 }, { "epoch": 0.9916632354438674, "grad_norm": 0.5697265863418579, "learning_rate": 5.450209846101228e-09, "loss": 0.0403, "step": 55996 }, { "epoch": 0.9916809449808959, "grad_norm": 0.7001110911369324, "learning_rate": 5.427043533649534e-09, "loss": 0.0656, "step": 55997 }, { "epoch": 0.9916986545179243, "grad_norm": 0.7141333222389221, "learning_rate": 5.40392655182742e-09, "loss": 0.0695, "step": 55998 }, { "epoch": 0.9917163640549527, "grad_norm": 0.6282785534858704, "learning_rate": 5.380858900708163e-09, "loss": 0.0589, "step": 55999 }, { "epoch": 0.9917340735919811, "grad_norm": 0.5720378160476685, "learning_rate": 5.3578405803700325e-09, "loss": 0.0396, "step": 56000 }, { "epoch": 0.9917517831290096, "grad_norm": 0.7513495683670044, "learning_rate": 5.3348715908879685e-09, "loss": 0.0713, "step": 56001 }, { "epoch": 0.991769492666038, "grad_norm": 1.0270811319351196, "learning_rate": 5.311951932338577e-09, "loss": 0.0383, "step": 56002 }, { "epoch": 0.9917872022030664, "grad_norm": 0.5609460473060608, "learning_rate": 5.289081604795132e-09, "loss": 0.0649, "step": 56003 }, { "epoch": 0.9918049117400949, "grad_norm": 0.8431770205497742, "learning_rate": 5.266260608334239e-09, "loss": 0.0732, "step": 56004 }, { "epoch": 0.9918226212771233, "grad_norm": 0.34222611784935, "learning_rate": 5.2434889430308385e-09, "loss": 0.0479, "step": 56005 }, { "epoch": 0.9918403308141517, "grad_norm": 0.6684064865112305, "learning_rate": 5.220766608958205e-09, "loss": 0.0421, "step": 56006 }, { "epoch": 0.9918580403511801, "grad_norm": 0.7152336835861206, "learning_rate": 5.19809360619461e-09, "loss": 0.0724, "step": 56007 }, { "epoch": 0.9918757498882086, "grad_norm": 0.5855464339256287, "learning_rate": 5.1754699348116604e-09, "loss": 0.0562, "step": 56008 }, { "epoch": 0.991893459425237, "grad_norm": 0.5670018792152405, "learning_rate": 5.152895594885965e-09, "loss": 0.0614, "step": 56009 }, { "epoch": 0.9919111689622654, "grad_norm": 0.9403606057167053, "learning_rate": 5.130370586489131e-09, "loss": 0.0418, "step": 56010 }, { "epoch": 0.9919288784992938, "grad_norm": 0.6993036866188049, "learning_rate": 5.1078949096977635e-09, "loss": 0.0779, "step": 56011 }, { "epoch": 0.9919465880363223, "grad_norm": 0.6389455795288086, "learning_rate": 5.085468564585138e-09, "loss": 0.0637, "step": 56012 }, { "epoch": 0.9919642975733507, "grad_norm": 0.5598847270011902, "learning_rate": 5.063091551222865e-09, "loss": 0.0438, "step": 56013 }, { "epoch": 0.9919820071103791, "grad_norm": 0.49651142954826355, "learning_rate": 5.040763869687548e-09, "loss": 0.0435, "step": 56014 }, { "epoch": 0.9919997166474075, "grad_norm": 0.496585875749588, "learning_rate": 5.018485520052463e-09, "loss": 0.0472, "step": 56015 }, { "epoch": 0.992017426184436, "grad_norm": 0.3682384490966797, "learning_rate": 4.996256502387553e-09, "loss": 0.0488, "step": 56016 }, { "epoch": 0.9920351357214644, "grad_norm": 0.7101843953132629, "learning_rate": 4.974076816769424e-09, "loss": 0.0589, "step": 56017 }, { "epoch": 0.9920528452584928, "grad_norm": 0.48070117831230164, "learning_rate": 4.951946463271351e-09, "loss": 0.0597, "step": 56018 }, { "epoch": 0.9920705547955213, "grad_norm": 0.9363362193107605, "learning_rate": 4.929865441963277e-09, "loss": 0.0715, "step": 56019 }, { "epoch": 0.9920882643325497, "grad_norm": 0.5641918182373047, "learning_rate": 4.907833752918478e-09, "loss": 0.0621, "step": 56020 }, { "epoch": 0.9921059738695781, "grad_norm": 0.47644373774528503, "learning_rate": 4.885851396211893e-09, "loss": 0.0465, "step": 56021 }, { "epoch": 0.9921236834066065, "grad_norm": 0.38513705134391785, "learning_rate": 4.863918371913467e-09, "loss": 0.0554, "step": 56022 }, { "epoch": 0.992141392943635, "grad_norm": 0.8402393460273743, "learning_rate": 4.842034680096474e-09, "loss": 0.0563, "step": 56023 }, { "epoch": 0.9921591024806634, "grad_norm": 0.6802376508712769, "learning_rate": 4.820200320832524e-09, "loss": 0.0573, "step": 56024 }, { "epoch": 0.9921768120176918, "grad_norm": 0.8222596645355225, "learning_rate": 4.798415294193225e-09, "loss": 0.0717, "step": 56025 }, { "epoch": 0.9921945215547202, "grad_norm": 0.40604233741760254, "learning_rate": 4.776679600250189e-09, "loss": 0.0303, "step": 56026 }, { "epoch": 0.9922122310917487, "grad_norm": 0.7609058022499084, "learning_rate": 4.7549932390750225e-09, "loss": 0.0668, "step": 56027 }, { "epoch": 0.9922299406287771, "grad_norm": 0.7330237627029419, "learning_rate": 4.733356210739337e-09, "loss": 0.0384, "step": 56028 }, { "epoch": 0.9922476501658055, "grad_norm": 0.3837956190109253, "learning_rate": 4.7117685153147404e-09, "loss": 0.0518, "step": 56029 }, { "epoch": 0.9922653597028339, "grad_norm": 0.6247158050537109, "learning_rate": 4.690230152871178e-09, "loss": 0.0358, "step": 56030 }, { "epoch": 0.9922830692398624, "grad_norm": 0.24095095694065094, "learning_rate": 4.6687411234802576e-09, "loss": 0.0471, "step": 56031 }, { "epoch": 0.9923007787768908, "grad_norm": 0.6100821495056152, "learning_rate": 4.6473014272119255e-09, "loss": 0.0507, "step": 56032 }, { "epoch": 0.9923184883139192, "grad_norm": 0.8214970231056213, "learning_rate": 4.625911064139454e-09, "loss": 0.0672, "step": 56033 }, { "epoch": 0.9923361978509477, "grad_norm": 0.5060888528823853, "learning_rate": 4.604570034329459e-09, "loss": 0.0619, "step": 56034 }, { "epoch": 0.9923539073879761, "grad_norm": 0.809988796710968, "learning_rate": 4.583278337853547e-09, "loss": 0.0636, "step": 56035 }, { "epoch": 0.9923716169250045, "grad_norm": 1.118173599243164, "learning_rate": 4.562035974781664e-09, "loss": 0.08, "step": 56036 }, { "epoch": 0.9923893264620329, "grad_norm": 1.5115258693695068, "learning_rate": 4.5408429451854196e-09, "loss": 0.0857, "step": 56037 }, { "epoch": 0.9924070359990614, "grad_norm": 0.7227131724357605, "learning_rate": 4.519699249131426e-09, "loss": 0.0684, "step": 56038 }, { "epoch": 0.9924247455360898, "grad_norm": 0.5030069947242737, "learning_rate": 4.4986048866912935e-09, "loss": 0.0793, "step": 56039 }, { "epoch": 0.9924424550731182, "grad_norm": 0.4271393418312073, "learning_rate": 4.477559857934965e-09, "loss": 0.0543, "step": 56040 }, { "epoch": 0.9924601646101466, "grad_norm": 0.9008066654205322, "learning_rate": 4.4565641629307204e-09, "loss": 0.0452, "step": 56041 }, { "epoch": 0.9924778741471751, "grad_norm": 0.5381762385368347, "learning_rate": 4.435617801746838e-09, "loss": 0.0561, "step": 56042 }, { "epoch": 0.9924955836842035, "grad_norm": 0.3948960602283478, "learning_rate": 4.414720774453263e-09, "loss": 0.0437, "step": 56043 }, { "epoch": 0.9925132932212319, "grad_norm": 0.6953791975975037, "learning_rate": 4.393873081118271e-09, "loss": 0.0486, "step": 56044 }, { "epoch": 0.9925310027582603, "grad_norm": 1.0012478828430176, "learning_rate": 4.373074721811809e-09, "loss": 0.0687, "step": 56045 }, { "epoch": 0.9925487122952888, "grad_norm": 1.0256998538970947, "learning_rate": 4.35232569660049e-09, "loss": 0.0716, "step": 56046 }, { "epoch": 0.9925664218323172, "grad_norm": 0.7106518149375916, "learning_rate": 4.331626005554257e-09, "loss": 0.0628, "step": 56047 }, { "epoch": 0.9925841313693456, "grad_norm": 0.9061825275421143, "learning_rate": 4.3109756487380584e-09, "loss": 0.069, "step": 56048 }, { "epoch": 0.9926018409063742, "grad_norm": 0.35274747014045715, "learning_rate": 4.290374626223503e-09, "loss": 0.0449, "step": 56049 }, { "epoch": 0.9926195504434026, "grad_norm": 0.9409925937652588, "learning_rate": 4.269822938077206e-09, "loss": 0.0548, "step": 56050 }, { "epoch": 0.992637259980431, "grad_norm": 0.6269252896308899, "learning_rate": 4.249320584365779e-09, "loss": 0.0528, "step": 56051 }, { "epoch": 0.9926549695174594, "grad_norm": 0.48034271597862244, "learning_rate": 4.228867565157502e-09, "loss": 0.0344, "step": 56052 }, { "epoch": 0.9926726790544879, "grad_norm": 0.513160765171051, "learning_rate": 4.208463880520652e-09, "loss": 0.0522, "step": 56053 }, { "epoch": 0.9926903885915163, "grad_norm": 0.42012637853622437, "learning_rate": 4.188109530520179e-09, "loss": 0.0521, "step": 56054 }, { "epoch": 0.9927080981285447, "grad_norm": 0.24780745804309845, "learning_rate": 4.167804515222695e-09, "loss": 0.0459, "step": 56055 }, { "epoch": 0.9927258076655731, "grad_norm": 0.2336082011461258, "learning_rate": 4.147548834698145e-09, "loss": 0.0478, "step": 56056 }, { "epoch": 0.9927435172026016, "grad_norm": 0.5237352252006531, "learning_rate": 4.127342489011477e-09, "loss": 0.038, "step": 56057 }, { "epoch": 0.99276122673963, "grad_norm": 0.5577192902565002, "learning_rate": 4.107185478229303e-09, "loss": 0.0659, "step": 56058 }, { "epoch": 0.9927789362766584, "grad_norm": 0.5636258125305176, "learning_rate": 4.087077802416572e-09, "loss": 0.0517, "step": 56059 }, { "epoch": 0.9927966458136868, "grad_norm": 0.5328343510627747, "learning_rate": 4.0670194616415635e-09, "loss": 0.0394, "step": 56060 }, { "epoch": 0.9928143553507153, "grad_norm": 0.5782381892204285, "learning_rate": 4.047010455969225e-09, "loss": 0.0637, "step": 56061 }, { "epoch": 0.9928320648877437, "grad_norm": 0.4834292232990265, "learning_rate": 4.0270507854645034e-09, "loss": 0.0628, "step": 56062 }, { "epoch": 0.9928497744247721, "grad_norm": 0.3027121126651764, "learning_rate": 4.0071404501940136e-09, "loss": 0.0336, "step": 56063 }, { "epoch": 0.9928674839618006, "grad_norm": 0.3418150544166565, "learning_rate": 3.987279450224368e-09, "loss": 0.0293, "step": 56064 }, { "epoch": 0.992885193498829, "grad_norm": 0.6479394435882568, "learning_rate": 3.967467785620516e-09, "loss": 0.0806, "step": 56065 }, { "epoch": 0.9929029030358574, "grad_norm": 0.5111286044120789, "learning_rate": 3.947705456445738e-09, "loss": 0.0296, "step": 56066 }, { "epoch": 0.9929206125728858, "grad_norm": 0.9855242967605591, "learning_rate": 3.927992462764984e-09, "loss": 0.0551, "step": 56067 }, { "epoch": 0.9929383221099143, "grad_norm": 0.5519845485687256, "learning_rate": 3.908328804646532e-09, "loss": 0.0528, "step": 56068 }, { "epoch": 0.9929560316469427, "grad_norm": 0.49112051725387573, "learning_rate": 3.888714482152e-09, "loss": 0.0448, "step": 56069 }, { "epoch": 0.9929737411839711, "grad_norm": 0.7184768915176392, "learning_rate": 3.869149495346336e-09, "loss": 0.0476, "step": 56070 }, { "epoch": 0.9929914507209995, "grad_norm": 0.22857879102230072, "learning_rate": 3.849633844296152e-09, "loss": 0.0394, "step": 56071 }, { "epoch": 0.993009160258028, "grad_norm": 0.5633224248886108, "learning_rate": 3.830167529061402e-09, "loss": 0.057, "step": 56072 }, { "epoch": 0.9930268697950564, "grad_norm": 0.4701499342918396, "learning_rate": 3.810750549710362e-09, "loss": 0.0405, "step": 56073 }, { "epoch": 0.9930445793320848, "grad_norm": 0.5883582234382629, "learning_rate": 3.791382906304653e-09, "loss": 0.0397, "step": 56074 }, { "epoch": 0.9930622888691132, "grad_norm": 0.524603545665741, "learning_rate": 3.77206459890922e-09, "loss": 0.0763, "step": 56075 }, { "epoch": 0.9930799984061417, "grad_norm": 0.6635290384292603, "learning_rate": 3.752795627587347e-09, "loss": 0.0539, "step": 56076 }, { "epoch": 0.9930977079431701, "grad_norm": 0.9177208542823792, "learning_rate": 3.7335759924006506e-09, "loss": 0.0559, "step": 56077 }, { "epoch": 0.9931154174801985, "grad_norm": 0.27481821179389954, "learning_rate": 3.7144056934140803e-09, "loss": 0.045, "step": 56078 }, { "epoch": 0.993133127017227, "grad_norm": 0.5880502462387085, "learning_rate": 3.6952847306892522e-09, "loss": 0.0502, "step": 56079 }, { "epoch": 0.9931508365542554, "grad_norm": 0.3854734003543854, "learning_rate": 3.676213104291115e-09, "loss": 0.0449, "step": 56080 }, { "epoch": 0.9931685460912838, "grad_norm": 0.5838336944580078, "learning_rate": 3.6571908142829513e-09, "loss": 0.0322, "step": 56081 }, { "epoch": 0.9931862556283122, "grad_norm": 0.4654935598373413, "learning_rate": 3.6382178607230475e-09, "loss": 0.0311, "step": 56082 }, { "epoch": 0.9932039651653407, "grad_norm": 0.7294833064079285, "learning_rate": 3.6192942436780173e-09, "loss": 0.0574, "step": 56083 }, { "epoch": 0.9932216747023691, "grad_norm": 0.6363264918327332, "learning_rate": 3.6004199632094782e-09, "loss": 0.0419, "step": 56084 }, { "epoch": 0.9932393842393975, "grad_norm": 0.8998425602912903, "learning_rate": 3.581595019377382e-09, "loss": 0.0658, "step": 56085 }, { "epoch": 0.9932570937764259, "grad_norm": 0.6731346845626831, "learning_rate": 3.5628194122466763e-09, "loss": 0.0333, "step": 56086 }, { "epoch": 0.9932748033134544, "grad_norm": 0.4468100666999817, "learning_rate": 3.5440931418756484e-09, "loss": 0.0854, "step": 56087 }, { "epoch": 0.9932925128504828, "grad_norm": 0.4846389591693878, "learning_rate": 3.5254162083292464e-09, "loss": 0.0596, "step": 56088 }, { "epoch": 0.9933102223875112, "grad_norm": 0.3757529556751251, "learning_rate": 3.506788611667422e-09, "loss": 0.0314, "step": 56089 }, { "epoch": 0.9933279319245396, "grad_norm": 0.7369664311408997, "learning_rate": 3.4882103519501275e-09, "loss": 0.0651, "step": 56090 }, { "epoch": 0.9933456414615681, "grad_norm": 0.45075172185897827, "learning_rate": 3.4696814292406452e-09, "loss": 0.0285, "step": 56091 }, { "epoch": 0.9933633509985965, "grad_norm": 0.7457826137542725, "learning_rate": 3.4512018435989278e-09, "loss": 0.0465, "step": 56092 }, { "epoch": 0.9933810605356249, "grad_norm": 0.5949856638908386, "learning_rate": 3.4327715950865923e-09, "loss": 0.0421, "step": 56093 }, { "epoch": 0.9933987700726534, "grad_norm": 0.3042069673538208, "learning_rate": 3.4143906837619256e-09, "loss": 0.0269, "step": 56094 }, { "epoch": 0.9934164796096818, "grad_norm": 0.5377402305603027, "learning_rate": 3.39605910968821e-09, "loss": 0.0721, "step": 56095 }, { "epoch": 0.9934341891467102, "grad_norm": 0.5120760798454285, "learning_rate": 3.3777768729253977e-09, "loss": 0.036, "step": 56096 }, { "epoch": 0.9934518986837386, "grad_norm": 0.3578394651412964, "learning_rate": 3.3595439735317757e-09, "loss": 0.0561, "step": 56097 }, { "epoch": 0.9934696082207671, "grad_norm": 0.5628923177719116, "learning_rate": 3.341360411568961e-09, "loss": 0.0511, "step": 56098 }, { "epoch": 0.9934873177577955, "grad_norm": 0.7031801342964172, "learning_rate": 3.323226187095241e-09, "loss": 0.0576, "step": 56099 }, { "epoch": 0.9935050272948239, "grad_norm": 0.42014098167419434, "learning_rate": 3.305141300172232e-09, "loss": 0.0416, "step": 56100 }, { "epoch": 0.9935227368318523, "grad_norm": 0.8189441561698914, "learning_rate": 3.2871057508582215e-09, "loss": 0.0559, "step": 56101 }, { "epoch": 0.9935404463688808, "grad_norm": 0.8017730712890625, "learning_rate": 3.2691195392131614e-09, "loss": 0.0637, "step": 56102 }, { "epoch": 0.9935581559059092, "grad_norm": 0.9099120497703552, "learning_rate": 3.251182665295338e-09, "loss": 0.0612, "step": 56103 }, { "epoch": 0.9935758654429376, "grad_norm": 0.6000470519065857, "learning_rate": 3.233295129164704e-09, "loss": 0.0428, "step": 56104 }, { "epoch": 0.993593574979966, "grad_norm": 0.6042441725730896, "learning_rate": 3.2154569308795454e-09, "loss": 0.0725, "step": 56105 }, { "epoch": 0.9936112845169945, "grad_norm": 0.571807324886322, "learning_rate": 3.1976680704998153e-09, "loss": 0.0606, "step": 56106 }, { "epoch": 0.9936289940540229, "grad_norm": 0.5090352892875671, "learning_rate": 3.1799285480804684e-09, "loss": 0.0621, "step": 56107 }, { "epoch": 0.9936467035910513, "grad_norm": 0.665044903755188, "learning_rate": 3.1622383636847883e-09, "loss": 0.0593, "step": 56108 }, { "epoch": 0.9936644131280798, "grad_norm": 0.2084517627954483, "learning_rate": 3.144597517367731e-09, "loss": 0.0271, "step": 56109 }, { "epoch": 0.9936821226651082, "grad_norm": 0.4444899260997772, "learning_rate": 3.127006009187583e-09, "loss": 0.0508, "step": 56110 }, { "epoch": 0.9936998322021366, "grad_norm": 0.6907276511192322, "learning_rate": 3.109463839202631e-09, "loss": 0.062, "step": 56111 }, { "epoch": 0.993717541739165, "grad_norm": 0.6763873100280762, "learning_rate": 3.0919710074711614e-09, "loss": 0.0703, "step": 56112 }, { "epoch": 0.9937352512761936, "grad_norm": 0.4705551564693451, "learning_rate": 3.0745275140497965e-09, "loss": 0.0284, "step": 56113 }, { "epoch": 0.993752960813222, "grad_norm": 0.7348297238349915, "learning_rate": 3.057133358996822e-09, "loss": 0.0639, "step": 56114 }, { "epoch": 0.9937706703502504, "grad_norm": 0.6109908223152161, "learning_rate": 3.03978854236886e-09, "loss": 0.0767, "step": 56115 }, { "epoch": 0.9937883798872787, "grad_norm": 0.47857147455215454, "learning_rate": 3.022493064222531e-09, "loss": 0.0526, "step": 56116 }, { "epoch": 0.9938060894243073, "grad_norm": 0.3747742772102356, "learning_rate": 3.005246924616123e-09, "loss": 0.0554, "step": 56117 }, { "epoch": 0.9938237989613357, "grad_norm": 0.6258236169815063, "learning_rate": 2.9880501236062564e-09, "loss": 0.072, "step": 56118 }, { "epoch": 0.9938415084983641, "grad_norm": 0.6146595478057861, "learning_rate": 2.970902661247887e-09, "loss": 0.0569, "step": 56119 }, { "epoch": 0.9938592180353925, "grad_norm": 0.47577598690986633, "learning_rate": 2.953804537599303e-09, "loss": 0.0695, "step": 56120 }, { "epoch": 0.993876927572421, "grad_norm": 0.4163631498813629, "learning_rate": 2.9367557527137935e-09, "loss": 0.0522, "step": 56121 }, { "epoch": 0.9938946371094494, "grad_norm": 0.6839221715927124, "learning_rate": 2.9197563066513112e-09, "loss": 0.061, "step": 56122 }, { "epoch": 0.9939123466464778, "grad_norm": 0.6289983987808228, "learning_rate": 2.9028061994651465e-09, "loss": 0.0451, "step": 56123 }, { "epoch": 0.9939300561835063, "grad_norm": 0.6648182272911072, "learning_rate": 2.8859054312119215e-09, "loss": 0.0441, "step": 56124 }, { "epoch": 0.9939477657205347, "grad_norm": 0.8665350675582886, "learning_rate": 2.869054001948257e-09, "loss": 0.0526, "step": 56125 }, { "epoch": 0.9939654752575631, "grad_norm": 0.6018106937408447, "learning_rate": 2.852251911727444e-09, "loss": 0.0628, "step": 56126 }, { "epoch": 0.9939831847945915, "grad_norm": 0.5355915427207947, "learning_rate": 2.8354991606061033e-09, "loss": 0.073, "step": 56127 }, { "epoch": 0.99400089433162, "grad_norm": 0.6607397198677063, "learning_rate": 2.8187957486391912e-09, "loss": 0.03, "step": 56128 }, { "epoch": 0.9940186038686484, "grad_norm": 0.5190093517303467, "learning_rate": 2.8021416758816642e-09, "loss": 0.05, "step": 56129 }, { "epoch": 0.9940363134056768, "grad_norm": 0.4767889976501465, "learning_rate": 2.785536942388478e-09, "loss": 0.0407, "step": 56130 }, { "epoch": 0.9940540229427052, "grad_norm": 0.5562220811843872, "learning_rate": 2.768981548212923e-09, "loss": 0.0595, "step": 56131 }, { "epoch": 0.9940717324797337, "grad_norm": 0.8501244783401489, "learning_rate": 2.7524754934116216e-09, "loss": 0.0539, "step": 56132 }, { "epoch": 0.9940894420167621, "grad_norm": 0.5456467866897583, "learning_rate": 2.7360187780378632e-09, "loss": 0.0597, "step": 56133 }, { "epoch": 0.9941071515537905, "grad_norm": 0.8896065950393677, "learning_rate": 2.7196114021449392e-09, "loss": 0.061, "step": 56134 }, { "epoch": 0.9941248610908189, "grad_norm": 0.666046142578125, "learning_rate": 2.7032533657878057e-09, "loss": 0.0557, "step": 56135 }, { "epoch": 0.9941425706278474, "grad_norm": 0.6682331562042236, "learning_rate": 2.686944669019753e-09, "loss": 0.0488, "step": 56136 }, { "epoch": 0.9941602801648758, "grad_norm": 0.47966495156288147, "learning_rate": 2.6706853118957375e-09, "loss": 0.056, "step": 56137 }, { "epoch": 0.9941779897019042, "grad_norm": 0.3611738681793213, "learning_rate": 2.6544752944673846e-09, "loss": 0.0306, "step": 56138 }, { "epoch": 0.9941956992389327, "grad_norm": 0.8415361046791077, "learning_rate": 2.63831461678965e-09, "loss": 0.0746, "step": 56139 }, { "epoch": 0.9942134087759611, "grad_norm": 0.5507253408432007, "learning_rate": 2.6222032789141592e-09, "loss": 0.0471, "step": 56140 }, { "epoch": 0.9942311183129895, "grad_norm": 0.49993473291397095, "learning_rate": 2.6061412808958686e-09, "loss": 0.0539, "step": 56141 }, { "epoch": 0.9942488278500179, "grad_norm": 0.6166626811027527, "learning_rate": 2.590128622786403e-09, "loss": 0.0514, "step": 56142 }, { "epoch": 0.9942665373870464, "grad_norm": 0.547751784324646, "learning_rate": 2.574165304637388e-09, "loss": 0.0438, "step": 56143 }, { "epoch": 0.9942842469240748, "grad_norm": 0.9008592367172241, "learning_rate": 2.5582513265021144e-09, "loss": 0.062, "step": 56144 }, { "epoch": 0.9943019564611032, "grad_norm": 0.7472985982894897, "learning_rate": 2.5423866884355383e-09, "loss": 0.0621, "step": 56145 }, { "epoch": 0.9943196659981316, "grad_norm": 0.6748130321502686, "learning_rate": 2.5265713904859542e-09, "loss": 0.0774, "step": 56146 }, { "epoch": 0.9943373755351601, "grad_norm": 0.7566420435905457, "learning_rate": 2.5108054327066533e-09, "loss": 0.0437, "step": 56147 }, { "epoch": 0.9943550850721885, "grad_norm": 0.8948885798454285, "learning_rate": 2.4950888151509254e-09, "loss": 0.0645, "step": 56148 }, { "epoch": 0.9943727946092169, "grad_norm": 0.6280741095542908, "learning_rate": 2.4794215378687315e-09, "loss": 0.0458, "step": 56149 }, { "epoch": 0.9943905041462453, "grad_norm": 0.47857218980789185, "learning_rate": 2.4638036009133613e-09, "loss": 0.053, "step": 56150 }, { "epoch": 0.9944082136832738, "grad_norm": 0.5224462151527405, "learning_rate": 2.4482350043347758e-09, "loss": 0.0425, "step": 56151 }, { "epoch": 0.9944259232203022, "grad_norm": 0.9254876971244812, "learning_rate": 2.4327157481845997e-09, "loss": 0.0626, "step": 56152 }, { "epoch": 0.9944436327573306, "grad_norm": 0.511083722114563, "learning_rate": 2.4172458325127935e-09, "loss": 0.0528, "step": 56153 }, { "epoch": 0.9944613422943591, "grad_norm": 0.6046938300132751, "learning_rate": 2.4018252573726475e-09, "loss": 0.0434, "step": 56154 }, { "epoch": 0.9944790518313875, "grad_norm": 0.2589757740497589, "learning_rate": 2.3864540228124566e-09, "loss": 0.0312, "step": 56155 }, { "epoch": 0.9944967613684159, "grad_norm": 0.3638736307621002, "learning_rate": 2.371132128883846e-09, "loss": 0.0704, "step": 56156 }, { "epoch": 0.9945144709054443, "grad_norm": 0.4530695378780365, "learning_rate": 2.3558595756384416e-09, "loss": 0.0223, "step": 56157 }, { "epoch": 0.9945321804424728, "grad_norm": 0.43911653757095337, "learning_rate": 2.3406363631245377e-09, "loss": 0.0565, "step": 56158 }, { "epoch": 0.9945498899795012, "grad_norm": 0.6781673431396484, "learning_rate": 2.3254624913937593e-09, "loss": 0.0587, "step": 56159 }, { "epoch": 0.9945675995165296, "grad_norm": 0.5513975024223328, "learning_rate": 2.310337960494402e-09, "loss": 0.0522, "step": 56160 }, { "epoch": 0.994585309053558, "grad_norm": 0.6861286759376526, "learning_rate": 2.2952627704780905e-09, "loss": 0.0521, "step": 56161 }, { "epoch": 0.9946030185905865, "grad_norm": 0.7269485592842102, "learning_rate": 2.2802369213931197e-09, "loss": 0.0661, "step": 56162 }, { "epoch": 0.9946207281276149, "grad_norm": 0.5837116837501526, "learning_rate": 2.2652604132894496e-09, "loss": 0.0322, "step": 56163 }, { "epoch": 0.9946384376646433, "grad_norm": 0.9373772740364075, "learning_rate": 2.2503332462153747e-09, "loss": 0.0571, "step": 56164 }, { "epoch": 0.9946561472016717, "grad_norm": 0.3594749867916107, "learning_rate": 2.2354554202208555e-09, "loss": 0.0291, "step": 56165 }, { "epoch": 0.9946738567387002, "grad_norm": 0.5225095748901367, "learning_rate": 2.2206269353558516e-09, "loss": 0.0515, "step": 56166 }, { "epoch": 0.9946915662757286, "grad_norm": 0.6777315735816956, "learning_rate": 2.2058477916669928e-09, "loss": 0.0462, "step": 56167 }, { "epoch": 0.994709275812757, "grad_norm": 0.4636123776435852, "learning_rate": 2.1911179892042387e-09, "loss": 0.0562, "step": 56168 }, { "epoch": 0.9947269853497855, "grad_norm": 0.44505831599235535, "learning_rate": 2.1764375280158844e-09, "loss": 0.0552, "step": 56169 }, { "epoch": 0.9947446948868139, "grad_norm": 0.5003305077552795, "learning_rate": 2.1618064081502243e-09, "loss": 0.0594, "step": 56170 }, { "epoch": 0.9947624044238423, "grad_norm": 0.6409093737602234, "learning_rate": 2.147224629653888e-09, "loss": 0.0573, "step": 56171 }, { "epoch": 0.9947801139608707, "grad_norm": 0.8961049318313599, "learning_rate": 2.1326921925785003e-09, "loss": 0.0705, "step": 56172 }, { "epoch": 0.9947978234978992, "grad_norm": 0.49061158299446106, "learning_rate": 2.1182090969673607e-09, "loss": 0.0488, "step": 56173 }, { "epoch": 0.9948155330349276, "grad_norm": 0.6372871398925781, "learning_rate": 2.103775342872094e-09, "loss": 0.0602, "step": 56174 }, { "epoch": 0.994833242571956, "grad_norm": 0.40408217906951904, "learning_rate": 2.0893909303376644e-09, "loss": 0.0446, "step": 56175 }, { "epoch": 0.9948509521089844, "grad_norm": 0.2325192540884018, "learning_rate": 2.075055859412367e-09, "loss": 0.0278, "step": 56176 }, { "epoch": 0.994868661646013, "grad_norm": 0.904091477394104, "learning_rate": 2.0607701301428306e-09, "loss": 0.0689, "step": 56177 }, { "epoch": 0.9948863711830414, "grad_norm": 0.4533552825450897, "learning_rate": 2.04653374257735e-09, "loss": 0.0409, "step": 56178 }, { "epoch": 0.9949040807200698, "grad_norm": 0.427865207195282, "learning_rate": 2.0323466967608895e-09, "loss": 0.0712, "step": 56179 }, { "epoch": 0.9949217902570981, "grad_norm": 0.5493322610855103, "learning_rate": 2.0182089927417435e-09, "loss": 0.0387, "step": 56180 }, { "epoch": 0.9949394997941267, "grad_norm": 0.7221601605415344, "learning_rate": 2.0041206305648763e-09, "loss": 0.0528, "step": 56181 }, { "epoch": 0.9949572093311551, "grad_norm": 0.9327787160873413, "learning_rate": 1.990081610278582e-09, "loss": 0.0803, "step": 56182 }, { "epoch": 0.9949749188681835, "grad_norm": 0.37891432642936707, "learning_rate": 1.976091931927826e-09, "loss": 0.021, "step": 56183 }, { "epoch": 0.994992628405212, "grad_norm": 0.4713606536388397, "learning_rate": 1.9621515955575707e-09, "loss": 0.0463, "step": 56184 }, { "epoch": 0.9950103379422404, "grad_norm": 0.5733397006988525, "learning_rate": 1.948260601216112e-09, "loss": 0.0485, "step": 56185 }, { "epoch": 0.9950280474792688, "grad_norm": 0.5442774891853333, "learning_rate": 1.9344189489467477e-09, "loss": 0.05, "step": 56186 }, { "epoch": 0.9950457570162972, "grad_norm": 0.7845129370689392, "learning_rate": 1.9206266387977734e-09, "loss": 0.0586, "step": 56187 }, { "epoch": 0.9950634665533257, "grad_norm": 0.4649858772754669, "learning_rate": 1.906883670810822e-09, "loss": 0.0344, "step": 56188 }, { "epoch": 0.9950811760903541, "grad_norm": 0.44251078367233276, "learning_rate": 1.893190045035853e-09, "loss": 0.038, "step": 56189 }, { "epoch": 0.9950988856273825, "grad_norm": 0.5863775014877319, "learning_rate": 1.8795457615128355e-09, "loss": 0.0481, "step": 56190 }, { "epoch": 0.9951165951644109, "grad_norm": 0.47716546058654785, "learning_rate": 1.8659508202917285e-09, "loss": 0.0552, "step": 56191 }, { "epoch": 0.9951343047014394, "grad_norm": 0.29158124327659607, "learning_rate": 1.8524052214125009e-09, "loss": 0.0395, "step": 56192 }, { "epoch": 0.9951520142384678, "grad_norm": 0.523270845413208, "learning_rate": 1.8389089649217816e-09, "loss": 0.035, "step": 56193 }, { "epoch": 0.9951697237754962, "grad_norm": 0.7381253242492676, "learning_rate": 1.8254620508662e-09, "loss": 0.0556, "step": 56194 }, { "epoch": 0.9951874333125246, "grad_norm": 0.642205536365509, "learning_rate": 1.8120644792857243e-09, "loss": 0.0701, "step": 56195 }, { "epoch": 0.9952051428495531, "grad_norm": 0.4953170716762543, "learning_rate": 1.798716250228649e-09, "loss": 0.0259, "step": 56196 }, { "epoch": 0.9952228523865815, "grad_norm": 0.3477655053138733, "learning_rate": 1.7854173637349424e-09, "loss": 0.0212, "step": 56197 }, { "epoch": 0.9952405619236099, "grad_norm": 0.6088738441467285, "learning_rate": 1.7721678198512337e-09, "loss": 0.0377, "step": 56198 }, { "epoch": 0.9952582714606384, "grad_norm": 0.3633171319961548, "learning_rate": 1.758967618619156e-09, "loss": 0.0321, "step": 56199 }, { "epoch": 0.9952759809976668, "grad_norm": 0.5057743191719055, "learning_rate": 1.7458167600836738e-09, "loss": 0.0404, "step": 56200 }, { "epoch": 0.9952936905346952, "grad_norm": 0.5304741263389587, "learning_rate": 1.7327152442880856e-09, "loss": 0.0561, "step": 56201 }, { "epoch": 0.9953114000717236, "grad_norm": 0.5194215774536133, "learning_rate": 1.7196630712740247e-09, "loss": 0.0439, "step": 56202 }, { "epoch": 0.9953291096087521, "grad_norm": 0.2731957733631134, "learning_rate": 1.7066602410864552e-09, "loss": 0.0581, "step": 56203 }, { "epoch": 0.9953468191457805, "grad_norm": 0.563593327999115, "learning_rate": 1.6937067537653451e-09, "loss": 0.0549, "step": 56204 }, { "epoch": 0.9953645286828089, "grad_norm": 0.5924323201179504, "learning_rate": 1.6808026093556582e-09, "loss": 0.0306, "step": 56205 }, { "epoch": 0.9953822382198373, "grad_norm": 0.7851725816726685, "learning_rate": 1.6679478078990284e-09, "loss": 0.0566, "step": 56206 }, { "epoch": 0.9953999477568658, "grad_norm": 0.5133508443832397, "learning_rate": 1.655142349438754e-09, "loss": 0.0675, "step": 56207 }, { "epoch": 0.9954176572938942, "grad_norm": 0.5361252427101135, "learning_rate": 1.6423862340148033e-09, "loss": 0.037, "step": 56208 }, { "epoch": 0.9954353668309226, "grad_norm": 0.8885935544967651, "learning_rate": 1.6296794616704747e-09, "loss": 0.0694, "step": 56209 }, { "epoch": 0.9954530763679511, "grad_norm": 0.5976641178131104, "learning_rate": 1.617022032449067e-09, "loss": 0.0608, "step": 56210 }, { "epoch": 0.9954707859049795, "grad_norm": 0.8149344325065613, "learning_rate": 1.604413946388883e-09, "loss": 0.0596, "step": 56211 }, { "epoch": 0.9954884954420079, "grad_norm": 0.6123572587966919, "learning_rate": 1.5918552035348865e-09, "loss": 0.0516, "step": 56212 }, { "epoch": 0.9955062049790363, "grad_norm": 0.6156291365623474, "learning_rate": 1.5793458039253805e-09, "loss": 0.0498, "step": 56213 }, { "epoch": 0.9955239145160648, "grad_norm": 0.42454051971435547, "learning_rate": 1.5668857476019983e-09, "loss": 0.0344, "step": 56214 }, { "epoch": 0.9955416240530932, "grad_norm": 0.6431016325950623, "learning_rate": 1.5544750346080383e-09, "loss": 0.0585, "step": 56215 }, { "epoch": 0.9955593335901216, "grad_norm": 0.355381578207016, "learning_rate": 1.5421136649834688e-09, "loss": 0.0337, "step": 56216 }, { "epoch": 0.99557704312715, "grad_norm": 0.7864057421684265, "learning_rate": 1.5298016387665925e-09, "loss": 0.0702, "step": 56217 }, { "epoch": 0.9955947526641785, "grad_norm": 0.5400519967079163, "learning_rate": 1.5175389560007081e-09, "loss": 0.0446, "step": 56218 }, { "epoch": 0.9956124622012069, "grad_norm": 1.1358015537261963, "learning_rate": 1.505325616724118e-09, "loss": 0.0604, "step": 56219 }, { "epoch": 0.9956301717382353, "grad_norm": 0.4824100732803345, "learning_rate": 1.4931616209801214e-09, "loss": 0.0579, "step": 56220 }, { "epoch": 0.9956478812752637, "grad_norm": 0.7247211337089539, "learning_rate": 1.4810469688053553e-09, "loss": 0.0733, "step": 56221 }, { "epoch": 0.9956655908122922, "grad_norm": 0.6313799619674683, "learning_rate": 1.4689816602397876e-09, "loss": 0.0431, "step": 56222 }, { "epoch": 0.9956833003493206, "grad_norm": 0.9785242080688477, "learning_rate": 1.4569656953250521e-09, "loss": 0.0744, "step": 56223 }, { "epoch": 0.995701009886349, "grad_norm": 0.7501779794692993, "learning_rate": 1.4449990741011165e-09, "loss": 0.0545, "step": 56224 }, { "epoch": 0.9957187194233775, "grad_norm": 0.7346696257591248, "learning_rate": 1.4330817966046183e-09, "loss": 0.0607, "step": 56225 }, { "epoch": 0.9957364289604059, "grad_norm": 0.6645909547805786, "learning_rate": 1.421213862877191e-09, "loss": 0.0355, "step": 56226 }, { "epoch": 0.9957541384974343, "grad_norm": 0.3692678213119507, "learning_rate": 1.409395272957137e-09, "loss": 0.0706, "step": 56227 }, { "epoch": 0.9957718480344627, "grad_norm": 0.41916513442993164, "learning_rate": 1.3976260268810937e-09, "loss": 0.0825, "step": 56228 }, { "epoch": 0.9957895575714912, "grad_norm": 0.6201558113098145, "learning_rate": 1.38590612469236e-09, "loss": 0.042, "step": 56229 }, { "epoch": 0.9958072671085196, "grad_norm": 0.5254788398742676, "learning_rate": 1.3742355664259076e-09, "loss": 0.0368, "step": 56230 }, { "epoch": 0.995824976645548, "grad_norm": 0.9714922308921814, "learning_rate": 1.3626143521217049e-09, "loss": 0.0621, "step": 56231 }, { "epoch": 0.9958426861825764, "grad_norm": 0.3835074007511139, "learning_rate": 1.3510424818163892e-09, "loss": 0.0433, "step": 56232 }, { "epoch": 0.9958603957196049, "grad_norm": 0.4958973824977875, "learning_rate": 1.3395199555499283e-09, "loss": 0.0482, "step": 56233 }, { "epoch": 0.9958781052566333, "grad_norm": 0.6626959443092346, "learning_rate": 1.3280467733589597e-09, "loss": 0.0392, "step": 56234 }, { "epoch": 0.9958958147936617, "grad_norm": 0.4839259088039398, "learning_rate": 1.316622935281786e-09, "loss": 0.0451, "step": 56235 }, { "epoch": 0.9959135243306901, "grad_norm": 0.7967732548713684, "learning_rate": 1.30524844135671e-09, "loss": 0.0439, "step": 56236 }, { "epoch": 0.9959312338677186, "grad_norm": 0.6680967211723328, "learning_rate": 1.2939232916187037e-09, "loss": 0.0762, "step": 56237 }, { "epoch": 0.995948943404747, "grad_norm": 0.5673735737800598, "learning_rate": 1.2826474861077352e-09, "loss": 0.0544, "step": 56238 }, { "epoch": 0.9959666529417754, "grad_norm": 0.677210807800293, "learning_rate": 1.2714210248604419e-09, "loss": 0.0477, "step": 56239 }, { "epoch": 0.995984362478804, "grad_norm": 0.4202744662761688, "learning_rate": 1.2602439079117956e-09, "loss": 0.0456, "step": 56240 }, { "epoch": 0.9960020720158324, "grad_norm": 0.3851499557495117, "learning_rate": 1.2491161353017644e-09, "loss": 0.0272, "step": 56241 }, { "epoch": 0.9960197815528608, "grad_norm": 0.23014506697654724, "learning_rate": 1.238037707063655e-09, "loss": 0.0505, "step": 56242 }, { "epoch": 0.9960374910898891, "grad_norm": 0.7164465188980103, "learning_rate": 1.2270086232357702e-09, "loss": 0.0493, "step": 56243 }, { "epoch": 0.9960552006269177, "grad_norm": 0.9139718413352966, "learning_rate": 1.216028883853082e-09, "loss": 0.0509, "step": 56244 }, { "epoch": 0.9960729101639461, "grad_norm": 0.7600525617599487, "learning_rate": 1.205098488953893e-09, "loss": 0.051, "step": 56245 }, { "epoch": 0.9960906197009745, "grad_norm": 0.6198582053184509, "learning_rate": 1.19421743857151e-09, "loss": 0.0563, "step": 56246 }, { "epoch": 0.9961083292380029, "grad_norm": 0.9118953943252563, "learning_rate": 1.1833857327442354e-09, "loss": 0.0783, "step": 56247 }, { "epoch": 0.9961260387750314, "grad_norm": 0.5598082542419434, "learning_rate": 1.1726033715070416e-09, "loss": 0.0521, "step": 56248 }, { "epoch": 0.9961437483120598, "grad_norm": 0.6355445981025696, "learning_rate": 1.1618703548932353e-09, "loss": 0.0708, "step": 56249 }, { "epoch": 0.9961614578490882, "grad_norm": 1.0759145021438599, "learning_rate": 1.1511866829411188e-09, "loss": 0.064, "step": 56250 }, { "epoch": 0.9961791673861166, "grad_norm": 0.48263585567474365, "learning_rate": 1.1405523556839992e-09, "loss": 0.0305, "step": 56251 }, { "epoch": 0.9961968769231451, "grad_norm": 0.4824199974536896, "learning_rate": 1.1299673731585135e-09, "loss": 0.0387, "step": 56252 }, { "epoch": 0.9962145864601735, "grad_norm": 0.663665771484375, "learning_rate": 1.119431735397969e-09, "loss": 0.0539, "step": 56253 }, { "epoch": 0.9962322959972019, "grad_norm": 0.5678579211235046, "learning_rate": 1.1089454424373368e-09, "loss": 0.0491, "step": 56254 }, { "epoch": 0.9962500055342304, "grad_norm": 0.49049055576324463, "learning_rate": 1.0985084943099243e-09, "loss": 0.066, "step": 56255 }, { "epoch": 0.9962677150712588, "grad_norm": 0.5043612122535706, "learning_rate": 1.0881208910523687e-09, "loss": 0.0541, "step": 56256 }, { "epoch": 0.9962854246082872, "grad_norm": 0.6450850963592529, "learning_rate": 1.0777826326979766e-09, "loss": 0.0488, "step": 56257 }, { "epoch": 0.9963031341453156, "grad_norm": 0.7695220112800598, "learning_rate": 1.067493719280055e-09, "loss": 0.0585, "step": 56258 }, { "epoch": 0.9963208436823441, "grad_norm": 0.3928075134754181, "learning_rate": 1.0572541508335753e-09, "loss": 0.0372, "step": 56259 }, { "epoch": 0.9963385532193725, "grad_norm": 0.5557178854942322, "learning_rate": 1.0470639273918448e-09, "loss": 0.0472, "step": 56260 }, { "epoch": 0.9963562627564009, "grad_norm": 0.5906175971031189, "learning_rate": 1.0369230489881698e-09, "loss": 0.0662, "step": 56261 }, { "epoch": 0.9963739722934293, "grad_norm": 0.6487547755241394, "learning_rate": 1.0268315156558571e-09, "loss": 0.0317, "step": 56262 }, { "epoch": 0.9963916818304578, "grad_norm": 0.6840857267379761, "learning_rate": 1.0167893274282137e-09, "loss": 0.0868, "step": 56263 }, { "epoch": 0.9964093913674862, "grad_norm": 0.3585069179534912, "learning_rate": 1.0067964843385457e-09, "loss": 0.0567, "step": 56264 }, { "epoch": 0.9964271009045146, "grad_norm": 0.8265216946601868, "learning_rate": 9.96852986418495e-10, "loss": 0.0581, "step": 56265 }, { "epoch": 0.996444810441543, "grad_norm": 0.28727835416793823, "learning_rate": 9.869588337030333e-10, "loss": 0.0592, "step": 56266 }, { "epoch": 0.9964625199785715, "grad_norm": 0.5651264190673828, "learning_rate": 9.771140262221368e-10, "loss": 0.0406, "step": 56267 }, { "epoch": 0.9964802295155999, "grad_norm": 0.4338971972465515, "learning_rate": 9.673185640107774e-10, "loss": 0.0582, "step": 56268 }, { "epoch": 0.9964979390526283, "grad_norm": 0.48449602723121643, "learning_rate": 9.575724470989312e-10, "loss": 0.0653, "step": 56269 }, { "epoch": 0.9965156485896568, "grad_norm": 0.7185707688331604, "learning_rate": 9.478756755199046e-10, "loss": 0.0506, "step": 56270 }, { "epoch": 0.9965333581266852, "grad_norm": 0.5002660751342773, "learning_rate": 9.382282493053395e-10, "loss": 0.038, "step": 56271 }, { "epoch": 0.9965510676637136, "grad_norm": 0.5142331719398499, "learning_rate": 9.28630168486877e-10, "loss": 0.0521, "step": 56272 }, { "epoch": 0.996568777200742, "grad_norm": 0.7445099353790283, "learning_rate": 9.190814330961583e-10, "loss": 0.0473, "step": 56273 }, { "epoch": 0.9965864867377705, "grad_norm": 0.33953627943992615, "learning_rate": 9.095820431648249e-10, "loss": 0.0388, "step": 56274 }, { "epoch": 0.9966041962747989, "grad_norm": 0.9168040752410889, "learning_rate": 9.001319987228529e-10, "loss": 0.048, "step": 56275 }, { "epoch": 0.9966219058118273, "grad_norm": 0.49006903171539307, "learning_rate": 8.907312998035488e-10, "loss": 0.0488, "step": 56276 }, { "epoch": 0.9966396153488557, "grad_norm": 0.3940713405609131, "learning_rate": 8.813799464352235e-10, "loss": 0.0378, "step": 56277 }, { "epoch": 0.9966573248858842, "grad_norm": 0.8613235354423523, "learning_rate": 8.720779386511834e-10, "loss": 0.0927, "step": 56278 }, { "epoch": 0.9966750344229126, "grad_norm": 0.24625825881958008, "learning_rate": 8.628252764814048e-10, "loss": 0.0402, "step": 56279 }, { "epoch": 0.996692743959941, "grad_norm": 0.45917651057243347, "learning_rate": 8.536219599558636e-10, "loss": 0.0501, "step": 56280 }, { "epoch": 0.9967104534969694, "grad_norm": 0.4950627386569977, "learning_rate": 8.44467989104536e-10, "loss": 0.0781, "step": 56281 }, { "epoch": 0.9967281630339979, "grad_norm": 0.8394597768783569, "learning_rate": 8.353633639573977e-10, "loss": 0.0796, "step": 56282 }, { "epoch": 0.9967458725710263, "grad_norm": 0.5853080153465271, "learning_rate": 8.263080845444249e-10, "loss": 0.0506, "step": 56283 }, { "epoch": 0.9967635821080547, "grad_norm": 0.8300614953041077, "learning_rate": 8.173021508972589e-10, "loss": 0.0716, "step": 56284 }, { "epoch": 0.9967812916450832, "grad_norm": 0.5652140378952026, "learning_rate": 8.083455630425451e-10, "loss": 0.0753, "step": 56285 }, { "epoch": 0.9967990011821116, "grad_norm": 0.6641860008239746, "learning_rate": 7.994383210119249e-10, "loss": 0.0484, "step": 56286 }, { "epoch": 0.99681671071914, "grad_norm": 0.4699726998806, "learning_rate": 7.905804248353743e-10, "loss": 0.0497, "step": 56287 }, { "epoch": 0.9968344202561684, "grad_norm": 0.5105248093605042, "learning_rate": 7.817718745395386e-10, "loss": 0.0467, "step": 56288 }, { "epoch": 0.9968521297931969, "grad_norm": 0.6505424380302429, "learning_rate": 7.730126701543938e-10, "loss": 0.0398, "step": 56289 }, { "epoch": 0.9968698393302253, "grad_norm": 0.675769567489624, "learning_rate": 7.643028117082506e-10, "loss": 0.0604, "step": 56290 }, { "epoch": 0.9968875488672537, "grad_norm": 1.0153242349624634, "learning_rate": 7.556422992327505e-10, "loss": 0.0791, "step": 56291 }, { "epoch": 0.9969052584042821, "grad_norm": 0.6133859157562256, "learning_rate": 7.47031132751208e-10, "loss": 0.0544, "step": 56292 }, { "epoch": 0.9969229679413106, "grad_norm": 0.6584493517875671, "learning_rate": 7.3846931229693e-10, "loss": 0.0563, "step": 56293 }, { "epoch": 0.996940677478339, "grad_norm": 0.4336360991001129, "learning_rate": 7.299568378948962e-10, "loss": 0.0514, "step": 56294 }, { "epoch": 0.9969583870153674, "grad_norm": 0.3447802662849426, "learning_rate": 7.214937095750828e-10, "loss": 0.0396, "step": 56295 }, { "epoch": 0.9969760965523958, "grad_norm": 0.28820013999938965, "learning_rate": 7.130799273641353e-10, "loss": 0.0579, "step": 56296 }, { "epoch": 0.9969938060894243, "grad_norm": 0.5190265774726868, "learning_rate": 7.047154912903642e-10, "loss": 0.0574, "step": 56297 }, { "epoch": 0.9970115156264527, "grad_norm": 0.4526291787624359, "learning_rate": 6.964004013804148e-10, "loss": 0.0247, "step": 56298 }, { "epoch": 0.9970292251634811, "grad_norm": 1.5462894439697266, "learning_rate": 6.88134657662598e-10, "loss": 0.0698, "step": 56299 }, { "epoch": 0.9970469347005096, "grad_norm": 0.5760831832885742, "learning_rate": 6.799182601635589e-10, "loss": 0.0357, "step": 56300 }, { "epoch": 0.997064644237538, "grad_norm": 0.6550770401954651, "learning_rate": 6.717512089116085e-10, "loss": 0.0461, "step": 56301 }, { "epoch": 0.9970823537745664, "grad_norm": 0.8036562204360962, "learning_rate": 6.636335039317265e-10, "loss": 0.0679, "step": 56302 }, { "epoch": 0.9971000633115948, "grad_norm": 0.6699053049087524, "learning_rate": 6.555651452505584e-10, "loss": 0.0445, "step": 56303 }, { "epoch": 0.9971177728486234, "grad_norm": 0.5721903443336487, "learning_rate": 6.475461328964149e-10, "loss": 0.0468, "step": 56304 }, { "epoch": 0.9971354823856518, "grad_norm": 0.6319437623023987, "learning_rate": 6.395764668942761e-10, "loss": 0.051, "step": 56305 }, { "epoch": 0.9971531919226801, "grad_norm": 0.41029536724090576, "learning_rate": 6.316561472707871e-10, "loss": 0.0576, "step": 56306 }, { "epoch": 0.9971709014597085, "grad_norm": 0.5201777815818787, "learning_rate": 6.237851740525935e-10, "loss": 0.0543, "step": 56307 }, { "epoch": 0.9971886109967371, "grad_norm": 0.5048030018806458, "learning_rate": 6.159635472646752e-10, "loss": 0.0506, "step": 56308 }, { "epoch": 0.9972063205337655, "grad_norm": 0.5639444589614868, "learning_rate": 6.081912669336776e-10, "loss": 0.0567, "step": 56309 }, { "epoch": 0.9972240300707939, "grad_norm": 0.6385101079940796, "learning_rate": 6.004683330829153e-10, "loss": 0.0439, "step": 56310 }, { "epoch": 0.9972417396078223, "grad_norm": 0.44643309712409973, "learning_rate": 5.92794745740699e-10, "loss": 0.0449, "step": 56311 }, { "epoch": 0.9972594491448508, "grad_norm": 0.8168236017227173, "learning_rate": 5.851705049320089e-10, "loss": 0.087, "step": 56312 }, { "epoch": 0.9972771586818792, "grad_norm": 0.9101377725601196, "learning_rate": 5.775956106801594e-10, "loss": 0.0665, "step": 56313 }, { "epoch": 0.9972948682189076, "grad_norm": 0.6547563076019287, "learning_rate": 5.700700630101308e-10, "loss": 0.0503, "step": 56314 }, { "epoch": 0.9973125777559361, "grad_norm": 0.9188401699066162, "learning_rate": 5.625938619485682e-10, "loss": 0.0734, "step": 56315 }, { "epoch": 0.9973302872929645, "grad_norm": 0.9390664100646973, "learning_rate": 5.551670075187864e-10, "loss": 0.0594, "step": 56316 }, { "epoch": 0.9973479968299929, "grad_norm": 0.7071933150291443, "learning_rate": 5.477894997457655e-10, "loss": 0.1086, "step": 56317 }, { "epoch": 0.9973657063670213, "grad_norm": 0.4111967980861664, "learning_rate": 5.404613386528201e-10, "loss": 0.0554, "step": 56318 }, { "epoch": 0.9973834159040498, "grad_norm": 0.6412701606750488, "learning_rate": 5.331825242649302e-10, "loss": 0.0821, "step": 56319 }, { "epoch": 0.9974011254410782, "grad_norm": 0.7157458066940308, "learning_rate": 5.259530566054105e-10, "loss": 0.072, "step": 56320 }, { "epoch": 0.9974188349781066, "grad_norm": 0.3304073214530945, "learning_rate": 5.187729356975757e-10, "loss": 0.051, "step": 56321 }, { "epoch": 0.997436544515135, "grad_norm": 0.4119534194469452, "learning_rate": 5.116421615680711e-10, "loss": 0.0448, "step": 56322 }, { "epoch": 0.9974542540521635, "grad_norm": 0.37346404790878296, "learning_rate": 5.045607342368807e-10, "loss": 0.0531, "step": 56323 }, { "epoch": 0.9974719635891919, "grad_norm": 0.767387330532074, "learning_rate": 4.975286537289847e-10, "loss": 0.0603, "step": 56324 }, { "epoch": 0.9974896731262203, "grad_norm": 1.0186127424240112, "learning_rate": 4.905459200660322e-10, "loss": 0.0427, "step": 56325 }, { "epoch": 0.9975073826632487, "grad_norm": 0.40908554196357727, "learning_rate": 4.836125332730035e-10, "loss": 0.0714, "step": 56326 }, { "epoch": 0.9975250922002772, "grad_norm": 0.7790197134017944, "learning_rate": 4.767284933715477e-10, "loss": 0.1016, "step": 56327 }, { "epoch": 0.9975428017373056, "grad_norm": 0.49869611859321594, "learning_rate": 4.698938003849796e-10, "loss": 0.0483, "step": 56328 }, { "epoch": 0.997560511274334, "grad_norm": 0.8381267189979553, "learning_rate": 4.631084543349484e-10, "loss": 0.0828, "step": 56329 }, { "epoch": 0.9975782208113625, "grad_norm": 0.6207663416862488, "learning_rate": 4.5637245524310365e-10, "loss": 0.046, "step": 56330 }, { "epoch": 0.9975959303483909, "grad_norm": 0.7084871530532837, "learning_rate": 4.496858031344253e-10, "loss": 0.0645, "step": 56331 }, { "epoch": 0.9976136398854193, "grad_norm": 1.0882648229599, "learning_rate": 4.4304849802889733e-10, "loss": 0.0931, "step": 56332 }, { "epoch": 0.9976313494224477, "grad_norm": 0.666614294052124, "learning_rate": 4.3646053994816916e-10, "loss": 0.0716, "step": 56333 }, { "epoch": 0.9976490589594762, "grad_norm": 0.4185929596424103, "learning_rate": 4.2992192891389005e-10, "loss": 0.0462, "step": 56334 }, { "epoch": 0.9976667684965046, "grad_norm": 0.7333757877349854, "learning_rate": 4.234326649493747e-10, "loss": 0.0333, "step": 56335 }, { "epoch": 0.997684478033533, "grad_norm": 0.7591397762298584, "learning_rate": 4.1699274807294186e-10, "loss": 0.0488, "step": 56336 }, { "epoch": 0.9977021875705614, "grad_norm": 0.841353714466095, "learning_rate": 4.1060217830790616e-10, "loss": 0.0838, "step": 56337 }, { "epoch": 0.9977198971075899, "grad_norm": 0.33378082513809204, "learning_rate": 4.042609556759169e-10, "loss": 0.0415, "step": 56338 }, { "epoch": 0.9977376066446183, "grad_norm": 0.8863450884819031, "learning_rate": 3.9796908019695823e-10, "loss": 0.0661, "step": 56339 }, { "epoch": 0.9977553161816467, "grad_norm": 0.8977382183074951, "learning_rate": 3.91726551891014e-10, "loss": 0.0538, "step": 56340 }, { "epoch": 0.9977730257186751, "grad_norm": 0.6546470522880554, "learning_rate": 3.855333707780684e-10, "loss": 0.058, "step": 56341 }, { "epoch": 0.9977907352557036, "grad_norm": 0.39308443665504456, "learning_rate": 3.7938953688143597e-10, "loss": 0.0788, "step": 56342 }, { "epoch": 0.997808444792732, "grad_norm": 0.8081027865409851, "learning_rate": 3.732950502177701e-10, "loss": 0.0653, "step": 56343 }, { "epoch": 0.9978261543297604, "grad_norm": 0.33485206961631775, "learning_rate": 3.672499108103855e-10, "loss": 0.0498, "step": 56344 }, { "epoch": 0.9978438638667889, "grad_norm": 0.6426995992660522, "learning_rate": 3.612541186759355e-10, "loss": 0.0533, "step": 56345 }, { "epoch": 0.9978615734038173, "grad_norm": 0.5615160465240479, "learning_rate": 3.5530767383773475e-10, "loss": 0.0407, "step": 56346 }, { "epoch": 0.9978792829408457, "grad_norm": 0.6205453872680664, "learning_rate": 3.4941057631243667e-10, "loss": 0.0478, "step": 56347 }, { "epoch": 0.9978969924778741, "grad_norm": 0.7033224105834961, "learning_rate": 3.435628261200252e-10, "loss": 0.0542, "step": 56348 }, { "epoch": 0.9979147020149026, "grad_norm": 0.6700589656829834, "learning_rate": 3.377644232804844e-10, "loss": 0.0421, "step": 56349 }, { "epoch": 0.997932411551931, "grad_norm": 0.7677657008171082, "learning_rate": 3.3201536781379825e-10, "loss": 0.0797, "step": 56350 }, { "epoch": 0.9979501210889594, "grad_norm": 0.5294189453125, "learning_rate": 3.2631565973662013e-10, "loss": 0.0732, "step": 56351 }, { "epoch": 0.9979678306259878, "grad_norm": 1.0286792516708374, "learning_rate": 3.20665299068934e-10, "loss": 0.0704, "step": 56352 }, { "epoch": 0.9979855401630163, "grad_norm": 1.0089514255523682, "learning_rate": 3.150642858273933e-10, "loss": 0.0724, "step": 56353 }, { "epoch": 0.9980032497000447, "grad_norm": 0.4758734703063965, "learning_rate": 3.095126200353127e-10, "loss": 0.0472, "step": 56354 }, { "epoch": 0.9980209592370731, "grad_norm": 0.43116500973701477, "learning_rate": 3.0401030170601474e-10, "loss": 0.0368, "step": 56355 }, { "epoch": 0.9980386687741015, "grad_norm": 0.6480433344841003, "learning_rate": 2.9855733085948356e-10, "loss": 0.0891, "step": 56356 }, { "epoch": 0.99805637831113, "grad_norm": 0.33651065826416016, "learning_rate": 2.931537075123725e-10, "loss": 0.0441, "step": 56357 }, { "epoch": 0.9980740878481584, "grad_norm": 0.6084065437316895, "learning_rate": 2.8779943168633085e-10, "loss": 0.0614, "step": 56358 }, { "epoch": 0.9980917973851868, "grad_norm": 0.6219797134399414, "learning_rate": 2.824945033946813e-10, "loss": 0.049, "step": 56359 }, { "epoch": 0.9981095069222153, "grad_norm": 0.7782607674598694, "learning_rate": 2.7723892265574257e-10, "loss": 0.0537, "step": 56360 }, { "epoch": 0.9981272164592437, "grad_norm": 0.6287626624107361, "learning_rate": 2.7203268948949864e-10, "loss": 0.0628, "step": 56361 }, { "epoch": 0.9981449259962721, "grad_norm": 0.32004228234291077, "learning_rate": 2.6687580390927224e-10, "loss": 0.0429, "step": 56362 }, { "epoch": 0.9981626355333005, "grad_norm": 0.34651339054107666, "learning_rate": 2.6176826593504733e-10, "loss": 0.0535, "step": 56363 }, { "epoch": 0.998180345070329, "grad_norm": 0.2885667085647583, "learning_rate": 2.5671007558347724e-10, "loss": 0.0542, "step": 56364 }, { "epoch": 0.9981980546073574, "grad_norm": 0.7795165777206421, "learning_rate": 2.5170123286788473e-10, "loss": 0.0806, "step": 56365 }, { "epoch": 0.9982157641443858, "grad_norm": 0.45243075489997864, "learning_rate": 2.467417378082537e-10, "loss": 0.0406, "step": 56366 }, { "epoch": 0.9982334736814142, "grad_norm": 0.4391928017139435, "learning_rate": 2.4183159041957225e-10, "loss": 0.0392, "step": 56367 }, { "epoch": 0.9982511832184428, "grad_norm": 0.7633500099182129, "learning_rate": 2.369707907184937e-10, "loss": 0.06, "step": 56368 }, { "epoch": 0.9982688927554711, "grad_norm": 0.43766865134239197, "learning_rate": 2.3215933872000605e-10, "loss": 0.0444, "step": 56369 }, { "epoch": 0.9982866022924995, "grad_norm": 0.6632774472236633, "learning_rate": 2.2739723444076265e-10, "loss": 0.0442, "step": 56370 }, { "epoch": 0.998304311829528, "grad_norm": 0.1942588835954666, "learning_rate": 2.2268447789741685e-10, "loss": 0.0473, "step": 56371 }, { "epoch": 0.9983220213665565, "grad_norm": 0.5634713768959045, "learning_rate": 2.1802106910329134e-10, "loss": 0.0594, "step": 56372 }, { "epoch": 0.9983397309035849, "grad_norm": 0.3838638365268707, "learning_rate": 2.134070080733741e-10, "loss": 0.042, "step": 56373 }, { "epoch": 0.9983574404406133, "grad_norm": 0.7633088827133179, "learning_rate": 2.0884229482598383e-10, "loss": 0.0534, "step": 56374 }, { "epoch": 0.9983751499776418, "grad_norm": 0.4738968014717102, "learning_rate": 2.043269293744432e-10, "loss": 0.0815, "step": 56375 }, { "epoch": 0.9983928595146702, "grad_norm": 0.3483390808105469, "learning_rate": 1.9986091173207489e-10, "loss": 0.0298, "step": 56376 }, { "epoch": 0.9984105690516986, "grad_norm": 0.4350152313709259, "learning_rate": 1.9544424191719757e-10, "loss": 0.0455, "step": 56377 }, { "epoch": 0.998428278588727, "grad_norm": 0.6476709842681885, "learning_rate": 1.9107691993980326e-10, "loss": 0.0415, "step": 56378 }, { "epoch": 0.9984459881257555, "grad_norm": 0.4026968479156494, "learning_rate": 1.8675894581821063e-10, "loss": 0.0264, "step": 56379 }, { "epoch": 0.9984636976627839, "grad_norm": 0.4736410677433014, "learning_rate": 1.8249031956407702e-10, "loss": 0.0431, "step": 56380 }, { "epoch": 0.9984814071998123, "grad_norm": 0.5970602035522461, "learning_rate": 1.782710411940558e-10, "loss": 0.0571, "step": 56381 }, { "epoch": 0.9984991167368407, "grad_norm": 0.3574751019477844, "learning_rate": 1.7410111071813896e-10, "loss": 0.0526, "step": 56382 }, { "epoch": 0.9985168262738692, "grad_norm": 0.595150351524353, "learning_rate": 1.6998052815297983e-10, "loss": 0.0736, "step": 56383 }, { "epoch": 0.9985345358108976, "grad_norm": 0.6019747853279114, "learning_rate": 1.6590929351190108e-10, "loss": 0.0389, "step": 56384 }, { "epoch": 0.998552245347926, "grad_norm": 0.40664976835250854, "learning_rate": 1.6188740680822544e-10, "loss": 0.0392, "step": 56385 }, { "epoch": 0.9985699548849544, "grad_norm": 0.3537991940975189, "learning_rate": 1.5791486805527556e-10, "loss": 0.0484, "step": 56386 }, { "epoch": 0.9985876644219829, "grad_norm": 0.5397231578826904, "learning_rate": 1.5399167726470876e-10, "loss": 0.0535, "step": 56387 }, { "epoch": 0.9986053739590113, "grad_norm": 0.7057232856750488, "learning_rate": 1.5011783444984772e-10, "loss": 0.0446, "step": 56388 }, { "epoch": 0.9986230834960397, "grad_norm": 0.5711401104927063, "learning_rate": 1.4629333962401514e-10, "loss": 0.0341, "step": 56389 }, { "epoch": 0.9986407930330682, "grad_norm": 0.18105751276016235, "learning_rate": 1.425181928005337e-10, "loss": 0.0399, "step": 56390 }, { "epoch": 0.9986585025700966, "grad_norm": 0.5567518472671509, "learning_rate": 1.387923939910607e-10, "loss": 0.0393, "step": 56391 }, { "epoch": 0.998676212107125, "grad_norm": 0.4212878942489624, "learning_rate": 1.351159432072535e-10, "loss": 0.0507, "step": 56392 }, { "epoch": 0.9986939216441534, "grad_norm": 0.707930326461792, "learning_rate": 1.314888404624348e-10, "loss": 0.0731, "step": 56393 }, { "epoch": 0.9987116311811819, "grad_norm": 0.5298145413398743, "learning_rate": 1.279110857665966e-10, "loss": 0.0539, "step": 56394 }, { "epoch": 0.9987293407182103, "grad_norm": 0.6712641716003418, "learning_rate": 1.2438267913306156e-10, "loss": 0.0455, "step": 56395 }, { "epoch": 0.9987470502552387, "grad_norm": 0.47960165143013, "learning_rate": 1.2090362057515237e-10, "loss": 0.0228, "step": 56396 }, { "epoch": 0.9987647597922671, "grad_norm": 0.5292540788650513, "learning_rate": 1.1747391009953036e-10, "loss": 0.0697, "step": 56397 }, { "epoch": 0.9987824693292956, "grad_norm": 0.6610271334648132, "learning_rate": 1.1409354772284885e-10, "loss": 0.0564, "step": 56398 }, { "epoch": 0.998800178866324, "grad_norm": 0.5915583968162537, "learning_rate": 1.1076253345176924e-10, "loss": 0.055, "step": 56399 }, { "epoch": 0.9988178884033524, "grad_norm": 0.5740044116973877, "learning_rate": 1.0748086729961415e-10, "loss": 0.0648, "step": 56400 }, { "epoch": 0.9988355979403808, "grad_norm": 0.767818808555603, "learning_rate": 1.0424854927637562e-10, "loss": 0.0916, "step": 56401 }, { "epoch": 0.9988533074774093, "grad_norm": 0.5082961916923523, "learning_rate": 1.0106557939371097e-10, "loss": 0.0758, "step": 56402 }, { "epoch": 0.9988710170144377, "grad_norm": 0.7962104678153992, "learning_rate": 9.793195766161222e-11, "loss": 0.0731, "step": 56403 }, { "epoch": 0.9988887265514661, "grad_norm": 0.5777062773704529, "learning_rate": 9.484768409007138e-11, "loss": 0.0702, "step": 56404 }, { "epoch": 0.9989064360884946, "grad_norm": 0.4455835521221161, "learning_rate": 9.181275868908045e-11, "loss": 0.0484, "step": 56405 }, { "epoch": 0.998924145625523, "grad_norm": 0.7683792114257812, "learning_rate": 8.882718146863145e-11, "loss": 0.0399, "step": 56406 }, { "epoch": 0.9989418551625514, "grad_norm": 0.7433175444602966, "learning_rate": 8.589095243871636e-11, "loss": 0.0663, "step": 56407 }, { "epoch": 0.9989595646995798, "grad_norm": 0.4070419669151306, "learning_rate": 8.300407160932722e-11, "loss": 0.0308, "step": 56408 }, { "epoch": 0.9989772742366083, "grad_norm": 0.4462023675441742, "learning_rate": 8.016653898879067e-11, "loss": 0.0421, "step": 56409 }, { "epoch": 0.9989949837736367, "grad_norm": 1.1123251914978027, "learning_rate": 7.737835458876408e-11, "loss": 0.0294, "step": 56410 }, { "epoch": 0.9990126933106651, "grad_norm": 0.6967952251434326, "learning_rate": 7.463951841590877e-11, "loss": 0.0535, "step": 56411 }, { "epoch": 0.9990304028476935, "grad_norm": 0.5915445685386658, "learning_rate": 7.195003048188209e-11, "loss": 0.0412, "step": 56412 }, { "epoch": 0.999048112384722, "grad_norm": 0.7343975901603699, "learning_rate": 6.930989079334538e-11, "loss": 0.0703, "step": 56413 }, { "epoch": 0.9990658219217504, "grad_norm": 0.44572681188583374, "learning_rate": 6.67190993586253e-11, "loss": 0.0319, "step": 56414 }, { "epoch": 0.9990835314587788, "grad_norm": 0.696948230266571, "learning_rate": 6.417765618771388e-11, "loss": 0.0441, "step": 56415 }, { "epoch": 0.9991012409958072, "grad_norm": 0.4885699152946472, "learning_rate": 6.168556128893777e-11, "loss": 0.0406, "step": 56416 }, { "epoch": 0.9991189505328357, "grad_norm": 0.8648345470428467, "learning_rate": 5.924281467062364e-11, "loss": 0.083, "step": 56417 }, { "epoch": 0.9991366600698641, "grad_norm": 0.3918311893939972, "learning_rate": 5.6849416339432855e-11, "loss": 0.0279, "step": 56418 }, { "epoch": 0.9991543696068925, "grad_norm": 0.6526279449462891, "learning_rate": 5.4505366303692074e-11, "loss": 0.0517, "step": 56419 }, { "epoch": 0.999172079143921, "grad_norm": 0.4224960505962372, "learning_rate": 5.2210664571727964e-11, "loss": 0.0395, "step": 56420 }, { "epoch": 0.9991897886809494, "grad_norm": 0.4272979497909546, "learning_rate": 4.9965311150201864e-11, "loss": 0.0262, "step": 56421 }, { "epoch": 0.9992074982179778, "grad_norm": 0.5183130502700806, "learning_rate": 4.776930604910579e-11, "loss": 0.0444, "step": 56422 }, { "epoch": 0.9992252077550062, "grad_norm": 1.3248997926712036, "learning_rate": 4.56226492717704e-11, "loss": 0.0759, "step": 56423 }, { "epoch": 0.9992429172920347, "grad_norm": 0.9029396772384644, "learning_rate": 4.352534082818771e-11, "loss": 0.0854, "step": 56424 }, { "epoch": 0.9992606268290631, "grad_norm": 1.1421040296554565, "learning_rate": 4.147738072335372e-11, "loss": 0.061, "step": 56425 }, { "epoch": 0.9992783363660915, "grad_norm": 0.6526013016700745, "learning_rate": 3.9478768965595105e-11, "loss": 0.0588, "step": 56426 }, { "epoch": 0.9992960459031199, "grad_norm": 0.6318877935409546, "learning_rate": 3.75295055615732e-11, "loss": 0.0501, "step": 56427 }, { "epoch": 0.9993137554401484, "grad_norm": 0.38416847586631775, "learning_rate": 3.5629590516284004e-11, "loss": 0.0417, "step": 56428 }, { "epoch": 0.9993314649771768, "grad_norm": 1.0606491565704346, "learning_rate": 3.377902383638887e-11, "loss": 0.0685, "step": 56429 }, { "epoch": 0.9993491745142052, "grad_norm": 0.3897482454776764, "learning_rate": 3.197780552688378e-11, "loss": 0.0408, "step": 56430 }, { "epoch": 0.9993668840512336, "grad_norm": 0.794891893863678, "learning_rate": 3.0225935597760765e-11, "loss": 0.0746, "step": 56431 }, { "epoch": 0.9993845935882621, "grad_norm": 0.8604879379272461, "learning_rate": 2.8523414050685148e-11, "loss": 0.0501, "step": 56432 }, { "epoch": 0.9994023031252905, "grad_norm": 0.5911352634429932, "learning_rate": 2.6870240892318265e-11, "loss": 0.0611, "step": 56433 }, { "epoch": 0.999420012662319, "grad_norm": 0.36827966570854187, "learning_rate": 2.526641612765612e-11, "loss": 0.0679, "step": 56434 }, { "epoch": 0.9994377221993475, "grad_norm": 0.5811046361923218, "learning_rate": 2.371193976502539e-11, "loss": 0.0539, "step": 56435 }, { "epoch": 0.9994554317363759, "grad_norm": 0.4242349863052368, "learning_rate": 2.2206811804426074e-11, "loss": 0.0375, "step": 56436 }, { "epoch": 0.9994731412734043, "grad_norm": 0.5744441151618958, "learning_rate": 2.0751032255850176e-11, "loss": 0.0561, "step": 56437 }, { "epoch": 0.9994908508104327, "grad_norm": 0.5274975299835205, "learning_rate": 1.9344601120963036e-11, "loss": 0.0642, "step": 56438 }, { "epoch": 0.9995085603474612, "grad_norm": 0.7059852480888367, "learning_rate": 1.798751840476065e-11, "loss": 0.0524, "step": 56439 }, { "epoch": 0.9995262698844896, "grad_norm": 0.4349272847175598, "learning_rate": 1.6679784113904362e-11, "loss": 0.0508, "step": 56440 }, { "epoch": 0.999543979421518, "grad_norm": 0.8410070538520813, "learning_rate": 1.5421398250059503e-11, "loss": 0.0541, "step": 56441 }, { "epoch": 0.9995616889585464, "grad_norm": 0.36186760663986206, "learning_rate": 1.421236081822208e-11, "loss": 0.0284, "step": 56442 }, { "epoch": 0.9995793984955749, "grad_norm": 0.6467425227165222, "learning_rate": 1.3052671821722762e-11, "loss": 0.0411, "step": 56443 }, { "epoch": 0.9995971080326033, "grad_norm": 0.7914958000183105, "learning_rate": 1.194233126555755e-11, "loss": 0.0535, "step": 56444 }, { "epoch": 0.9996148175696317, "grad_norm": 0.7071537971496582, "learning_rate": 1.0881339153057112e-11, "loss": 0.0811, "step": 56445 }, { "epoch": 0.9996325271066601, "grad_norm": 0.8757293224334717, "learning_rate": 9.869695489217456e-12, "loss": 0.0599, "step": 56446 }, { "epoch": 0.9996502366436886, "grad_norm": 0.3350847363471985, "learning_rate": 8.90740027403858e-12, "loss": 0.048, "step": 56447 }, { "epoch": 0.999667946180717, "grad_norm": 0.560255765914917, "learning_rate": 7.994453512516487e-12, "loss": 0.0207, "step": 56448 }, { "epoch": 0.9996856557177454, "grad_norm": 0.7196671366691589, "learning_rate": 7.130855207981846e-12, "loss": 0.0545, "step": 56449 }, { "epoch": 0.9997033652547739, "grad_norm": 0.8167745471000671, "learning_rate": 6.316605362099992e-12, "loss": 0.0714, "step": 56450 }, { "epoch": 0.9997210747918023, "grad_norm": 0.8301746845245361, "learning_rate": 5.551703978201595e-12, "loss": 0.0691, "step": 56451 }, { "epoch": 0.9997387843288307, "grad_norm": 0.5142480134963989, "learning_rate": 4.836151059617322e-12, "loss": 0.0319, "step": 56452 }, { "epoch": 0.9997564938658591, "grad_norm": 0.3846062123775482, "learning_rate": 4.169946608012509e-12, "loss": 0.0561, "step": 56453 }, { "epoch": 0.9997742034028876, "grad_norm": 0.63724285364151, "learning_rate": 3.553090625052491e-12, "loss": 0.0799, "step": 56454 }, { "epoch": 0.999791912939916, "grad_norm": 0.7958111763000488, "learning_rate": 2.985583114067936e-12, "loss": 0.0604, "step": 56455 }, { "epoch": 0.9998096224769444, "grad_norm": 0.6240877509117126, "learning_rate": 2.467424075058844e-12, "loss": 0.0693, "step": 56456 }, { "epoch": 0.9998273320139728, "grad_norm": 0.33696770668029785, "learning_rate": 1.9986135113558845e-12, "loss": 0.0424, "step": 56457 }, { "epoch": 0.9998450415510013, "grad_norm": 0.4716333746910095, "learning_rate": 1.5791514229590577e-12, "loss": 0.0562, "step": 56458 }, { "epoch": 0.9998627510880297, "grad_norm": 0.6137022972106934, "learning_rate": 1.209037813199032e-12, "loss": 0.0456, "step": 56459 }, { "epoch": 0.9998804606250581, "grad_norm": 0.4314163327217102, "learning_rate": 8.88272682075808e-13, "loss": 0.0446, "step": 56460 }, { "epoch": 0.9998981701620865, "grad_norm": 0.5064791440963745, "learning_rate": 6.1685603125472e-13, "loss": 0.0375, "step": 56461 }, { "epoch": 0.999915879699115, "grad_norm": 0.9169848561286926, "learning_rate": 3.94787860735768e-13, "loss": 0.0686, "step": 56462 }, { "epoch": 0.9999335892361434, "grad_norm": 0.5309838056564331, "learning_rate": 2.2206817218428655e-13, "loss": 0.0438, "step": 56463 }, { "epoch": 0.9999512987731718, "grad_norm": 0.5554822683334351, "learning_rate": 9.869696560027564e-14, "loss": 0.0675, "step": 56464 }, { "epoch": 0.9999690083102003, "grad_norm": 0.5348848104476929, "learning_rate": 2.4674240983735276e-14, "loss": 0.066, "step": 56465 }, { "epoch": 0.9999867178472287, "grad_norm": 0.6021392941474915, "learning_rate": 0.0, "loss": 0.0565, "step": 56466 }, { "epoch": 0.9999867178472287, "step": 56466, "total_flos": 5.522238675674464e+18, "train_loss": 0.09166212584712534, "train_runtime": 325079.5254, "train_samples_per_second": 16.675, "train_steps_per_second": 0.174 } ], "logging_steps": 1.0, "max_steps": 56466, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.522238675674464e+18, "train_batch_size": 12, "trial_name": null, "trial_params": null }