{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9996413750704441, "eval_steps": 500, "global_step": 2439, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004098570623495056, "grad_norm": 49.01335709008168, "learning_rate": 1.3513513513513515e-07, "loss": 2.2934, "step": 1 }, { "epoch": 0.0008197141246990112, "grad_norm": 59.21541636222851, "learning_rate": 2.702702702702703e-07, "loss": 2.3983, "step": 2 }, { "epoch": 0.001229571187048517, "grad_norm": 57.32845705936646, "learning_rate": 4.0540540540540546e-07, "loss": 2.4133, "step": 3 }, { "epoch": 0.0016394282493980225, "grad_norm": 53.10138107158891, "learning_rate": 5.405405405405406e-07, "loss": 2.3242, "step": 4 }, { "epoch": 0.002049285311747528, "grad_norm": 48.32310245089257, "learning_rate": 6.756756756756758e-07, "loss": 2.2415, "step": 5 }, { "epoch": 0.002459142374097034, "grad_norm": 49.31889023890138, "learning_rate": 8.108108108108109e-07, "loss": 2.2124, "step": 6 }, { "epoch": 0.002868999436446539, "grad_norm": 47.45000482614807, "learning_rate": 9.459459459459461e-07, "loss": 2.1413, "step": 7 }, { "epoch": 0.003278856498796045, "grad_norm": 46.37094092198512, "learning_rate": 1.0810810810810812e-06, "loss": 2.0534, "step": 8 }, { "epoch": 0.0036887135611455503, "grad_norm": 41.36162100620571, "learning_rate": 1.2162162162162164e-06, "loss": 1.964, "step": 9 }, { "epoch": 0.004098570623495056, "grad_norm": 30.456632611243325, "learning_rate": 1.3513513513513515e-06, "loss": 1.6305, "step": 10 }, { "epoch": 0.0045084276858445614, "grad_norm": 27.241661585563122, "learning_rate": 1.4864864864864868e-06, "loss": 1.4412, "step": 11 }, { "epoch": 0.004918284748194068, "grad_norm": 23.77683663255652, "learning_rate": 1.6216216216216219e-06, "loss": 1.5066, "step": 12 }, { "epoch": 0.005328141810543573, "grad_norm": 5.913663897608947, "learning_rate": 1.756756756756757e-06, "loss": 1.1372, "step": 13 }, { "epoch": 0.005737998872893078, "grad_norm": 5.461038261852845, "learning_rate": 1.8918918918918922e-06, "loss": 1.0456, "step": 14 }, { "epoch": 0.006147855935242585, "grad_norm": 4.912429842482721, "learning_rate": 2.0270270270270273e-06, "loss": 1.0382, "step": 15 }, { "epoch": 0.00655771299759209, "grad_norm": 4.5109701055377425, "learning_rate": 2.1621621621621623e-06, "loss": 0.9461, "step": 16 }, { "epoch": 0.006967570059941595, "grad_norm": 4.532430641821237, "learning_rate": 2.297297297297298e-06, "loss": 1.034, "step": 17 }, { "epoch": 0.007377427122291101, "grad_norm": 5.425286559535307, "learning_rate": 2.432432432432433e-06, "loss": 0.8286, "step": 18 }, { "epoch": 0.007787284184640607, "grad_norm": 5.038292800632896, "learning_rate": 2.5675675675675675e-06, "loss": 0.9372, "step": 19 }, { "epoch": 0.008197141246990112, "grad_norm": 4.544477855523407, "learning_rate": 2.702702702702703e-06, "loss": 0.8883, "step": 20 }, { "epoch": 0.008606998309339618, "grad_norm": 4.373102375018528, "learning_rate": 2.837837837837838e-06, "loss": 0.8893, "step": 21 }, { "epoch": 0.009016855371689123, "grad_norm": 3.2370075376600234, "learning_rate": 2.9729729729729736e-06, "loss": 0.7992, "step": 22 }, { "epoch": 0.009426712434038628, "grad_norm": 1.794338556752799, "learning_rate": 3.1081081081081082e-06, "loss": 0.7882, "step": 23 }, { "epoch": 0.009836569496388135, "grad_norm": 1.7068718652568642, "learning_rate": 3.2432432432432437e-06, "loss": 0.8435, "step": 24 }, { "epoch": 0.01024642655873764, "grad_norm": 1.662807369546924, "learning_rate": 3.3783783783783788e-06, "loss": 0.8346, "step": 25 }, { "epoch": 0.010656283621087146, "grad_norm": 1.4049587594995725, "learning_rate": 3.513513513513514e-06, "loss": 0.7559, "step": 26 }, { "epoch": 0.011066140683436651, "grad_norm": 1.3451508132913392, "learning_rate": 3.648648648648649e-06, "loss": 0.7333, "step": 27 }, { "epoch": 0.011475997745786157, "grad_norm": 1.29451204547309, "learning_rate": 3.7837837837837844e-06, "loss": 0.7947, "step": 28 }, { "epoch": 0.011885854808135662, "grad_norm": 1.1453856958737136, "learning_rate": 3.918918918918919e-06, "loss": 0.7061, "step": 29 }, { "epoch": 0.01229571187048517, "grad_norm": 1.2118899595282928, "learning_rate": 4.0540540540540545e-06, "loss": 0.7319, "step": 30 }, { "epoch": 0.012705568932834675, "grad_norm": 1.1856627020901178, "learning_rate": 4.189189189189189e-06, "loss": 0.7457, "step": 31 }, { "epoch": 0.01311542599518418, "grad_norm": 1.1755108006456914, "learning_rate": 4.324324324324325e-06, "loss": 0.6754, "step": 32 }, { "epoch": 0.013525283057533685, "grad_norm": 1.2595771852598254, "learning_rate": 4.45945945945946e-06, "loss": 0.7436, "step": 33 }, { "epoch": 0.01393514011988319, "grad_norm": 1.0934250663221914, "learning_rate": 4.594594594594596e-06, "loss": 0.6993, "step": 34 }, { "epoch": 0.014344997182232696, "grad_norm": 1.2493329306520378, "learning_rate": 4.72972972972973e-06, "loss": 0.7389, "step": 35 }, { "epoch": 0.014754854244582201, "grad_norm": 1.1478790185395542, "learning_rate": 4.864864864864866e-06, "loss": 0.6885, "step": 36 }, { "epoch": 0.015164711306931708, "grad_norm": 1.2416469441573061, "learning_rate": 5e-06, "loss": 0.7514, "step": 37 }, { "epoch": 0.015574568369281214, "grad_norm": 1.1656743378361971, "learning_rate": 5.135135135135135e-06, "loss": 0.6883, "step": 38 }, { "epoch": 0.015984425431630717, "grad_norm": 1.183437011474807, "learning_rate": 5.2702702702702705e-06, "loss": 0.6818, "step": 39 }, { "epoch": 0.016394282493980224, "grad_norm": 1.2068094873021005, "learning_rate": 5.405405405405406e-06, "loss": 0.7525, "step": 40 }, { "epoch": 0.01680413955632973, "grad_norm": 1.1685562405286085, "learning_rate": 5.540540540540541e-06, "loss": 0.7605, "step": 41 }, { "epoch": 0.017213996618679235, "grad_norm": 1.1704098372409182, "learning_rate": 5.675675675675676e-06, "loss": 0.6944, "step": 42 }, { "epoch": 0.017623853681028742, "grad_norm": 1.127182240266125, "learning_rate": 5.810810810810811e-06, "loss": 0.7404, "step": 43 }, { "epoch": 0.018033710743378246, "grad_norm": 1.2214453163369945, "learning_rate": 5.945945945945947e-06, "loss": 0.6942, "step": 44 }, { "epoch": 0.018443567805727753, "grad_norm": 1.081474086286073, "learning_rate": 6.081081081081082e-06, "loss": 0.6826, "step": 45 }, { "epoch": 0.018853424868077256, "grad_norm": 1.157898254407067, "learning_rate": 6.2162162162162164e-06, "loss": 0.713, "step": 46 }, { "epoch": 0.019263281930426764, "grad_norm": 1.1872211188948483, "learning_rate": 6.351351351351351e-06, "loss": 0.7309, "step": 47 }, { "epoch": 0.01967313899277627, "grad_norm": 1.1014977885747814, "learning_rate": 6.486486486486487e-06, "loss": 0.6961, "step": 48 }, { "epoch": 0.020082996055125774, "grad_norm": 1.1937154186947874, "learning_rate": 6.621621621621622e-06, "loss": 0.7549, "step": 49 }, { "epoch": 0.02049285311747528, "grad_norm": 1.2881723334840716, "learning_rate": 6.7567567567567575e-06, "loss": 0.7535, "step": 50 }, { "epoch": 0.020902710179824785, "grad_norm": 1.05402085982103, "learning_rate": 6.891891891891892e-06, "loss": 0.6774, "step": 51 }, { "epoch": 0.021312567242174292, "grad_norm": 1.1714039601252912, "learning_rate": 7.027027027027028e-06, "loss": 0.7248, "step": 52 }, { "epoch": 0.021722424304523796, "grad_norm": 1.292734308217589, "learning_rate": 7.162162162162163e-06, "loss": 0.7461, "step": 53 }, { "epoch": 0.022132281366873303, "grad_norm": 1.1544178374967604, "learning_rate": 7.297297297297298e-06, "loss": 0.7291, "step": 54 }, { "epoch": 0.02254213842922281, "grad_norm": 1.1793062422415168, "learning_rate": 7.4324324324324324e-06, "loss": 0.7436, "step": 55 }, { "epoch": 0.022951995491572313, "grad_norm": 1.2404333132020975, "learning_rate": 7.567567567567569e-06, "loss": 0.682, "step": 56 }, { "epoch": 0.02336185255392182, "grad_norm": 1.1751050706468302, "learning_rate": 7.702702702702704e-06, "loss": 0.7059, "step": 57 }, { "epoch": 0.023771709616271324, "grad_norm": 1.2428490460020951, "learning_rate": 7.837837837837838e-06, "loss": 0.7209, "step": 58 }, { "epoch": 0.02418156667862083, "grad_norm": 1.1338525659661436, "learning_rate": 7.972972972972974e-06, "loss": 0.701, "step": 59 }, { "epoch": 0.02459142374097034, "grad_norm": 1.134959422437001, "learning_rate": 8.108108108108109e-06, "loss": 0.7122, "step": 60 }, { "epoch": 0.025001280803319842, "grad_norm": 1.1655518201428814, "learning_rate": 8.243243243243245e-06, "loss": 0.7229, "step": 61 }, { "epoch": 0.02541113786566935, "grad_norm": 1.1014613286208041, "learning_rate": 8.378378378378378e-06, "loss": 0.6993, "step": 62 }, { "epoch": 0.025820994928018853, "grad_norm": 1.1060640009887053, "learning_rate": 8.513513513513514e-06, "loss": 0.6944, "step": 63 }, { "epoch": 0.02623085199036836, "grad_norm": 1.2668666740192445, "learning_rate": 8.64864864864865e-06, "loss": 0.6894, "step": 64 }, { "epoch": 0.026640709052717863, "grad_norm": 1.1102589789011996, "learning_rate": 8.783783783783785e-06, "loss": 0.654, "step": 65 }, { "epoch": 0.02705056611506737, "grad_norm": 1.1453769694903757, "learning_rate": 8.91891891891892e-06, "loss": 0.6925, "step": 66 }, { "epoch": 0.027460423177416877, "grad_norm": 1.1271643953794501, "learning_rate": 9.054054054054054e-06, "loss": 0.6568, "step": 67 }, { "epoch": 0.02787028023976638, "grad_norm": 1.0648645427829413, "learning_rate": 9.189189189189191e-06, "loss": 0.7062, "step": 68 }, { "epoch": 0.028280137302115888, "grad_norm": 1.105232315977265, "learning_rate": 9.324324324324325e-06, "loss": 0.6849, "step": 69 }, { "epoch": 0.028689994364465392, "grad_norm": 1.2277445089194814, "learning_rate": 9.45945945945946e-06, "loss": 0.713, "step": 70 }, { "epoch": 0.0290998514268149, "grad_norm": 1.1027944747500484, "learning_rate": 9.594594594594594e-06, "loss": 0.6318, "step": 71 }, { "epoch": 0.029509708489164403, "grad_norm": 1.1094902354952703, "learning_rate": 9.729729729729732e-06, "loss": 0.6535, "step": 72 }, { "epoch": 0.02991956555151391, "grad_norm": 1.2045330953803275, "learning_rate": 9.864864864864865e-06, "loss": 0.7215, "step": 73 }, { "epoch": 0.030329422613863417, "grad_norm": 1.0517930133628461, "learning_rate": 1e-05, "loss": 0.6752, "step": 74 }, { "epoch": 0.03073927967621292, "grad_norm": 1.1278919275121349, "learning_rate": 9.999995588590236e-06, "loss": 0.6725, "step": 75 }, { "epoch": 0.031149136738562427, "grad_norm": 1.0977649378435859, "learning_rate": 9.99998235436873e-06, "loss": 0.6463, "step": 76 }, { "epoch": 0.031558993800911934, "grad_norm": 1.0911438685359953, "learning_rate": 9.999960297358836e-06, "loss": 0.6958, "step": 77 }, { "epoch": 0.031968850863261435, "grad_norm": 1.1116578864429443, "learning_rate": 9.999929417599469e-06, "loss": 0.6987, "step": 78 }, { "epoch": 0.03237870792561094, "grad_norm": 1.08688280879502, "learning_rate": 9.999889715145124e-06, "loss": 0.6941, "step": 79 }, { "epoch": 0.03278856498796045, "grad_norm": 1.2451282028462956, "learning_rate": 9.999841190065856e-06, "loss": 0.7219, "step": 80 }, { "epoch": 0.033198422050309956, "grad_norm": 1.1790622366966503, "learning_rate": 9.999783842447291e-06, "loss": 0.7096, "step": 81 }, { "epoch": 0.03360827911265946, "grad_norm": 1.0876000545374471, "learning_rate": 9.999717672390624e-06, "loss": 0.6659, "step": 82 }, { "epoch": 0.03401813617500896, "grad_norm": 1.0019594342611564, "learning_rate": 9.999642680012614e-06, "loss": 0.6654, "step": 83 }, { "epoch": 0.03442799323735847, "grad_norm": 1.0830496390891027, "learning_rate": 9.99955886544559e-06, "loss": 0.6862, "step": 84 }, { "epoch": 0.03483785029970798, "grad_norm": 1.0673551928084517, "learning_rate": 9.999466228837452e-06, "loss": 0.6694, "step": 85 }, { "epoch": 0.035247707362057484, "grad_norm": 1.1511611157610206, "learning_rate": 9.999364770351659e-06, "loss": 0.689, "step": 86 }, { "epoch": 0.03565756442440699, "grad_norm": 1.1632475594360108, "learning_rate": 9.99925449016724e-06, "loss": 0.6754, "step": 87 }, { "epoch": 0.03606742148675649, "grad_norm": 1.0601017077569383, "learning_rate": 9.999135388478796e-06, "loss": 0.6319, "step": 88 }, { "epoch": 0.036477278549106, "grad_norm": 1.047208038053008, "learning_rate": 9.999007465496488e-06, "loss": 0.677, "step": 89 }, { "epoch": 0.036887135611455506, "grad_norm": 1.0713542913744256, "learning_rate": 9.998870721446042e-06, "loss": 0.6642, "step": 90 }, { "epoch": 0.03729699267380501, "grad_norm": 1.1013064270069832, "learning_rate": 9.998725156568755e-06, "loss": 0.6742, "step": 91 }, { "epoch": 0.03770684973615451, "grad_norm": 1.0120171585386426, "learning_rate": 9.998570771121481e-06, "loss": 0.6601, "step": 92 }, { "epoch": 0.03811670679850402, "grad_norm": 1.0559277865685155, "learning_rate": 9.998407565376649e-06, "loss": 0.6656, "step": 93 }, { "epoch": 0.03852656386085353, "grad_norm": 0.983241380367742, "learning_rate": 9.998235539622239e-06, "loss": 0.6598, "step": 94 }, { "epoch": 0.038936420923203034, "grad_norm": 1.1623614670017297, "learning_rate": 9.998054694161807e-06, "loss": 0.7103, "step": 95 }, { "epoch": 0.03934627798555254, "grad_norm": 1.1233712851445272, "learning_rate": 9.997865029314464e-06, "loss": 0.7028, "step": 96 }, { "epoch": 0.03975613504790204, "grad_norm": 1.0989992958760133, "learning_rate": 9.997666545414887e-06, "loss": 0.7208, "step": 97 }, { "epoch": 0.04016599211025155, "grad_norm": 1.088236044327103, "learning_rate": 9.997459242813312e-06, "loss": 0.7138, "step": 98 }, { "epoch": 0.040575849172601056, "grad_norm": 1.1337556767937405, "learning_rate": 9.997243121875538e-06, "loss": 0.6828, "step": 99 }, { "epoch": 0.04098570623495056, "grad_norm": 1.1819738528676442, "learning_rate": 9.997018182982925e-06, "loss": 0.7214, "step": 100 }, { "epoch": 0.04139556329730007, "grad_norm": 1.1570275816882574, "learning_rate": 9.996784426532394e-06, "loss": 0.6396, "step": 101 }, { "epoch": 0.04180542035964957, "grad_norm": 1.092931513861752, "learning_rate": 9.996541852936417e-06, "loss": 0.6774, "step": 102 }, { "epoch": 0.04221527742199908, "grad_norm": 1.0426333955128184, "learning_rate": 9.996290462623037e-06, "loss": 0.6129, "step": 103 }, { "epoch": 0.042625134484348584, "grad_norm": 1.0672346378292075, "learning_rate": 9.996030256035845e-06, "loss": 0.6644, "step": 104 }, { "epoch": 0.04303499154669809, "grad_norm": 1.0848067273768167, "learning_rate": 9.995761233633993e-06, "loss": 0.6857, "step": 105 }, { "epoch": 0.04344484860904759, "grad_norm": 1.13663633580764, "learning_rate": 9.995483395892188e-06, "loss": 0.696, "step": 106 }, { "epoch": 0.0438547056713971, "grad_norm": 1.1656324701688152, "learning_rate": 9.995196743300693e-06, "loss": 0.6947, "step": 107 }, { "epoch": 0.044264562733746605, "grad_norm": 1.1576176390793491, "learning_rate": 9.994901276365323e-06, "loss": 0.6985, "step": 108 }, { "epoch": 0.04467441979609611, "grad_norm": 1.1106013388002076, "learning_rate": 9.99459699560745e-06, "loss": 0.704, "step": 109 }, { "epoch": 0.04508427685844562, "grad_norm": 1.1206714046451864, "learning_rate": 9.994283901563999e-06, "loss": 0.6825, "step": 110 }, { "epoch": 0.04549413392079512, "grad_norm": 1.065880699315399, "learning_rate": 9.99396199478744e-06, "loss": 0.679, "step": 111 }, { "epoch": 0.04590399098314463, "grad_norm": 1.1464290577836065, "learning_rate": 9.993631275845798e-06, "loss": 0.6951, "step": 112 }, { "epoch": 0.046313848045494134, "grad_norm": 1.097226361694393, "learning_rate": 9.993291745322652e-06, "loss": 0.6931, "step": 113 }, { "epoch": 0.04672370510784364, "grad_norm": 1.1153645326304609, "learning_rate": 9.992943403817123e-06, "loss": 0.6192, "step": 114 }, { "epoch": 0.04713356217019315, "grad_norm": 1.0355318318463236, "learning_rate": 9.992586251943884e-06, "loss": 0.654, "step": 115 }, { "epoch": 0.04754341923254265, "grad_norm": 1.0527668359082654, "learning_rate": 9.992220290333149e-06, "loss": 0.6549, "step": 116 }, { "epoch": 0.047953276294892155, "grad_norm": 1.0580358398414265, "learning_rate": 9.991845519630679e-06, "loss": 0.6834, "step": 117 }, { "epoch": 0.04836313335724166, "grad_norm": 1.0842317517940556, "learning_rate": 9.991461940497786e-06, "loss": 0.6242, "step": 118 }, { "epoch": 0.04877299041959117, "grad_norm": 1.0645831923418094, "learning_rate": 9.991069553611317e-06, "loss": 0.6671, "step": 119 }, { "epoch": 0.04918284748194068, "grad_norm": 0.9393604851023265, "learning_rate": 9.990668359663664e-06, "loss": 0.631, "step": 120 }, { "epoch": 0.04959270454429018, "grad_norm": 1.0993479462743998, "learning_rate": 9.99025835936276e-06, "loss": 0.7129, "step": 121 }, { "epoch": 0.050002561606639684, "grad_norm": 1.1264133055728651, "learning_rate": 9.989839553432079e-06, "loss": 0.7009, "step": 122 }, { "epoch": 0.05041241866898919, "grad_norm": 1.134307839048874, "learning_rate": 9.989411942610625e-06, "loss": 0.6779, "step": 123 }, { "epoch": 0.0508222757313387, "grad_norm": 1.0367700459851987, "learning_rate": 9.98897552765295e-06, "loss": 0.6621, "step": 124 }, { "epoch": 0.0512321327936882, "grad_norm": 1.0917731030269913, "learning_rate": 9.988530309329132e-06, "loss": 0.6903, "step": 125 }, { "epoch": 0.051641989856037705, "grad_norm": 1.0564257612846901, "learning_rate": 9.988076288424791e-06, "loss": 0.6546, "step": 126 }, { "epoch": 0.05205184691838721, "grad_norm": 1.0156442807800463, "learning_rate": 9.987613465741072e-06, "loss": 0.6381, "step": 127 }, { "epoch": 0.05246170398073672, "grad_norm": 1.1193274043015407, "learning_rate": 9.987141842094659e-06, "loss": 0.6616, "step": 128 }, { "epoch": 0.052871561043086227, "grad_norm": 1.030978592049532, "learning_rate": 9.986661418317759e-06, "loss": 0.6584, "step": 129 }, { "epoch": 0.05328141810543573, "grad_norm": 1.083236035685641, "learning_rate": 9.986172195258112e-06, "loss": 0.6979, "step": 130 }, { "epoch": 0.053691275167785234, "grad_norm": 0.9634816558589199, "learning_rate": 9.985674173778984e-06, "loss": 0.6649, "step": 131 }, { "epoch": 0.05410113223013474, "grad_norm": 1.1699966394203942, "learning_rate": 9.985167354759164e-06, "loss": 0.66, "step": 132 }, { "epoch": 0.05451098929248425, "grad_norm": 1.042947531276349, "learning_rate": 9.98465173909297e-06, "loss": 0.6671, "step": 133 }, { "epoch": 0.054920846354833755, "grad_norm": 1.1435745295556656, "learning_rate": 9.984127327690232e-06, "loss": 0.7009, "step": 134 }, { "epoch": 0.055330703417183255, "grad_norm": 1.102771062284289, "learning_rate": 9.983594121476314e-06, "loss": 0.6872, "step": 135 }, { "epoch": 0.05574056047953276, "grad_norm": 1.0687279906323341, "learning_rate": 9.98305212139209e-06, "loss": 0.6989, "step": 136 }, { "epoch": 0.05615041754188227, "grad_norm": 1.0516557476121642, "learning_rate": 9.982501328393954e-06, "loss": 0.6319, "step": 137 }, { "epoch": 0.056560274604231776, "grad_norm": 0.9624668129610545, "learning_rate": 9.981941743453815e-06, "loss": 0.6555, "step": 138 }, { "epoch": 0.056970131666581277, "grad_norm": 1.088525181954864, "learning_rate": 9.981373367559095e-06, "loss": 0.7547, "step": 139 }, { "epoch": 0.057379988728930784, "grad_norm": 1.033265356003468, "learning_rate": 9.980796201712734e-06, "loss": 0.6406, "step": 140 }, { "epoch": 0.05778984579128029, "grad_norm": 1.1134925030662137, "learning_rate": 9.980210246933174e-06, "loss": 0.7159, "step": 141 }, { "epoch": 0.0581997028536298, "grad_norm": 1.0909440379906783, "learning_rate": 9.979615504254371e-06, "loss": 0.7044, "step": 142 }, { "epoch": 0.058609559915979305, "grad_norm": 1.0340502676045038, "learning_rate": 9.979011974725787e-06, "loss": 0.6459, "step": 143 }, { "epoch": 0.059019416978328805, "grad_norm": 1.042852776271893, "learning_rate": 9.978399659412388e-06, "loss": 0.6635, "step": 144 }, { "epoch": 0.05942927404067831, "grad_norm": 1.018315316080827, "learning_rate": 9.977778559394642e-06, "loss": 0.689, "step": 145 }, { "epoch": 0.05983913110302782, "grad_norm": 1.0880843930477184, "learning_rate": 9.977148675768523e-06, "loss": 0.6911, "step": 146 }, { "epoch": 0.060248988165377326, "grad_norm": 1.1335809066439522, "learning_rate": 9.976510009645495e-06, "loss": 0.6483, "step": 147 }, { "epoch": 0.06065884522772683, "grad_norm": 1.053328399603831, "learning_rate": 9.975862562152531e-06, "loss": 0.6679, "step": 148 }, { "epoch": 0.061068702290076333, "grad_norm": 1.0236775485286118, "learning_rate": 9.975206334432094e-06, "loss": 0.66, "step": 149 }, { "epoch": 0.06147855935242584, "grad_norm": 1.0146541563297151, "learning_rate": 9.974541327642135e-06, "loss": 0.665, "step": 150 }, { "epoch": 0.06188841641477535, "grad_norm": 1.030895184661015, "learning_rate": 9.973867542956104e-06, "loss": 0.6598, "step": 151 }, { "epoch": 0.062298273477124855, "grad_norm": 1.0218939177326452, "learning_rate": 9.973184981562937e-06, "loss": 0.6501, "step": 152 }, { "epoch": 0.06270813053947435, "grad_norm": 1.0975563932359482, "learning_rate": 9.972493644667056e-06, "loss": 0.7118, "step": 153 }, { "epoch": 0.06311798760182387, "grad_norm": 1.128920499178967, "learning_rate": 9.97179353348837e-06, "loss": 0.745, "step": 154 }, { "epoch": 0.06352784466417337, "grad_norm": 1.1887090759416137, "learning_rate": 9.97108464926227e-06, "loss": 0.7285, "step": 155 }, { "epoch": 0.06393770172652287, "grad_norm": 1.083805370237057, "learning_rate": 9.970366993239627e-06, "loss": 0.668, "step": 156 }, { "epoch": 0.06434755878887238, "grad_norm": 1.063613225004549, "learning_rate": 9.96964056668679e-06, "loss": 0.6323, "step": 157 }, { "epoch": 0.06475741585122188, "grad_norm": 1.057270931722657, "learning_rate": 9.968905370885586e-06, "loss": 0.6564, "step": 158 }, { "epoch": 0.0651672729135714, "grad_norm": 1.0771682716609274, "learning_rate": 9.968161407133317e-06, "loss": 0.6836, "step": 159 }, { "epoch": 0.0655771299759209, "grad_norm": 1.1367061387644046, "learning_rate": 9.96740867674275e-06, "loss": 0.7043, "step": 160 }, { "epoch": 0.0659869870382704, "grad_norm": 1.029623849670837, "learning_rate": 9.966647181042133e-06, "loss": 0.6501, "step": 161 }, { "epoch": 0.06639684410061991, "grad_norm": 1.0786104556322362, "learning_rate": 9.965876921375165e-06, "loss": 0.6817, "step": 162 }, { "epoch": 0.06680670116296941, "grad_norm": 1.0372664887316847, "learning_rate": 9.965097899101025e-06, "loss": 0.6793, "step": 163 }, { "epoch": 0.06721655822531893, "grad_norm": 1.0379062158938954, "learning_rate": 9.964310115594347e-06, "loss": 0.6628, "step": 164 }, { "epoch": 0.06762641528766843, "grad_norm": 1.0532437013480616, "learning_rate": 9.963513572245223e-06, "loss": 0.6877, "step": 165 }, { "epoch": 0.06803627235001793, "grad_norm": 1.025080182095345, "learning_rate": 9.962708270459205e-06, "loss": 0.6508, "step": 166 }, { "epoch": 0.06844612941236744, "grad_norm": 1.1030208777412314, "learning_rate": 9.961894211657298e-06, "loss": 0.7071, "step": 167 }, { "epoch": 0.06885598647471694, "grad_norm": 1.1316643367566548, "learning_rate": 9.961071397275965e-06, "loss": 0.7121, "step": 168 }, { "epoch": 0.06926584353706645, "grad_norm": 1.0595144140873094, "learning_rate": 9.96023982876711e-06, "loss": 0.6794, "step": 169 }, { "epoch": 0.06967570059941595, "grad_norm": 1.004840225482448, "learning_rate": 9.959399507598092e-06, "loss": 0.6285, "step": 170 }, { "epoch": 0.07008555766176545, "grad_norm": 0.9898112399487675, "learning_rate": 9.958550435251713e-06, "loss": 0.6612, "step": 171 }, { "epoch": 0.07049541472411497, "grad_norm": 0.987965590004236, "learning_rate": 9.95769261322621e-06, "loss": 0.6584, "step": 172 }, { "epoch": 0.07090527178646447, "grad_norm": 1.0199732213137955, "learning_rate": 9.956826043035268e-06, "loss": 0.6692, "step": 173 }, { "epoch": 0.07131512884881398, "grad_norm": 1.0568233066065653, "learning_rate": 9.955950726208006e-06, "loss": 0.6664, "step": 174 }, { "epoch": 0.07172498591116348, "grad_norm": 0.9821311546451016, "learning_rate": 9.955066664288975e-06, "loss": 0.6785, "step": 175 }, { "epoch": 0.07213484297351298, "grad_norm": 1.0423055733119724, "learning_rate": 9.954173858838159e-06, "loss": 0.626, "step": 176 }, { "epoch": 0.0725447000358625, "grad_norm": 0.965613128873412, "learning_rate": 9.953272311430972e-06, "loss": 0.628, "step": 177 }, { "epoch": 0.072954557098212, "grad_norm": 0.9599081651224648, "learning_rate": 9.952362023658248e-06, "loss": 0.5941, "step": 178 }, { "epoch": 0.0733644141605615, "grad_norm": 0.9240749409872056, "learning_rate": 9.951442997126253e-06, "loss": 0.6607, "step": 179 }, { "epoch": 0.07377427122291101, "grad_norm": 1.0880044125612822, "learning_rate": 9.950515233456666e-06, "loss": 0.7116, "step": 180 }, { "epoch": 0.07418412828526051, "grad_norm": 1.0076938413841008, "learning_rate": 9.949578734286582e-06, "loss": 0.6845, "step": 181 }, { "epoch": 0.07459398534761003, "grad_norm": 1.065855442220535, "learning_rate": 9.948633501268518e-06, "loss": 0.6973, "step": 182 }, { "epoch": 0.07500384240995953, "grad_norm": 1.0698511851775558, "learning_rate": 9.947679536070399e-06, "loss": 0.7363, "step": 183 }, { "epoch": 0.07541369947230903, "grad_norm": 0.9993646901849131, "learning_rate": 9.946716840375552e-06, "loss": 0.6597, "step": 184 }, { "epoch": 0.07582355653465854, "grad_norm": 0.9914837384412523, "learning_rate": 9.94574541588272e-06, "loss": 0.6578, "step": 185 }, { "epoch": 0.07623341359700804, "grad_norm": 1.0291124949973818, "learning_rate": 9.94476526430604e-06, "loss": 0.6855, "step": 186 }, { "epoch": 0.07664327065935755, "grad_norm": 0.9812929043856575, "learning_rate": 9.943776387375056e-06, "loss": 0.6345, "step": 187 }, { "epoch": 0.07705312772170705, "grad_norm": 0.983769103190469, "learning_rate": 9.9427787868347e-06, "loss": 0.6252, "step": 188 }, { "epoch": 0.07746298478405655, "grad_norm": 0.9909847221573748, "learning_rate": 9.941772464445306e-06, "loss": 0.654, "step": 189 }, { "epoch": 0.07787284184640607, "grad_norm": 1.0749688310783163, "learning_rate": 9.940757421982592e-06, "loss": 0.6833, "step": 190 }, { "epoch": 0.07828269890875557, "grad_norm": 0.9705960102474551, "learning_rate": 9.939733661237667e-06, "loss": 0.6444, "step": 191 }, { "epoch": 0.07869255597110508, "grad_norm": 1.0130395679355733, "learning_rate": 9.93870118401702e-06, "loss": 0.6135, "step": 192 }, { "epoch": 0.07910241303345458, "grad_norm": 1.0257758818469116, "learning_rate": 9.937659992142525e-06, "loss": 0.6544, "step": 193 }, { "epoch": 0.07951227009580408, "grad_norm": 1.0134619215500047, "learning_rate": 9.936610087451428e-06, "loss": 0.6585, "step": 194 }, { "epoch": 0.0799221271581536, "grad_norm": 1.032513029711096, "learning_rate": 9.935551471796358e-06, "loss": 0.6637, "step": 195 }, { "epoch": 0.0803319842205031, "grad_norm": 1.0884040431125903, "learning_rate": 9.934484147045308e-06, "loss": 0.6884, "step": 196 }, { "epoch": 0.08074184128285261, "grad_norm": 1.0463573530163388, "learning_rate": 9.933408115081639e-06, "loss": 0.6645, "step": 197 }, { "epoch": 0.08115169834520211, "grad_norm": 1.0162066486028827, "learning_rate": 9.93232337780408e-06, "loss": 0.6569, "step": 198 }, { "epoch": 0.08156155540755161, "grad_norm": 1.1010297420405675, "learning_rate": 9.931229937126719e-06, "loss": 0.699, "step": 199 }, { "epoch": 0.08197141246990113, "grad_norm": 0.9777246407752307, "learning_rate": 9.930127794979004e-06, "loss": 0.637, "step": 200 }, { "epoch": 0.08238126953225063, "grad_norm": 1.0398688925176418, "learning_rate": 9.929016953305729e-06, "loss": 0.671, "step": 201 }, { "epoch": 0.08279112659460014, "grad_norm": 1.16215028723065, "learning_rate": 9.92789741406705e-06, "loss": 0.731, "step": 202 }, { "epoch": 0.08320098365694964, "grad_norm": 1.1043990383574276, "learning_rate": 9.926769179238467e-06, "loss": 0.6804, "step": 203 }, { "epoch": 0.08361084071929914, "grad_norm": 1.0204872859073235, "learning_rate": 9.925632250810817e-06, "loss": 0.6894, "step": 204 }, { "epoch": 0.08402069778164865, "grad_norm": 1.037481718466932, "learning_rate": 9.924486630790287e-06, "loss": 0.6241, "step": 205 }, { "epoch": 0.08443055484399815, "grad_norm": 0.9660114538134681, "learning_rate": 9.923332321198396e-06, "loss": 0.6554, "step": 206 }, { "epoch": 0.08484041190634767, "grad_norm": 0.9890957710165509, "learning_rate": 9.922169324071997e-06, "loss": 0.6799, "step": 207 }, { "epoch": 0.08525026896869717, "grad_norm": 0.9889151428600476, "learning_rate": 9.920997641463273e-06, "loss": 0.6607, "step": 208 }, { "epoch": 0.08566012603104667, "grad_norm": 0.9225542452088764, "learning_rate": 9.91981727543973e-06, "loss": 0.6283, "step": 209 }, { "epoch": 0.08606998309339618, "grad_norm": 1.0038539929734707, "learning_rate": 9.918628228084204e-06, "loss": 0.6784, "step": 210 }, { "epoch": 0.08647984015574568, "grad_norm": 0.9611108155234929, "learning_rate": 9.917430501494841e-06, "loss": 0.6466, "step": 211 }, { "epoch": 0.08688969721809518, "grad_norm": 1.0525087017265211, "learning_rate": 9.91622409778511e-06, "loss": 0.6321, "step": 212 }, { "epoch": 0.0872995542804447, "grad_norm": 0.9726270063782991, "learning_rate": 9.915009019083781e-06, "loss": 0.6172, "step": 213 }, { "epoch": 0.0877094113427942, "grad_norm": 0.9149491313393994, "learning_rate": 9.913785267534945e-06, "loss": 0.645, "step": 214 }, { "epoch": 0.08811926840514371, "grad_norm": 1.0410273210777379, "learning_rate": 9.912552845297988e-06, "loss": 0.6763, "step": 215 }, { "epoch": 0.08852912546749321, "grad_norm": 1.0704555293652906, "learning_rate": 9.911311754547594e-06, "loss": 0.7155, "step": 216 }, { "epoch": 0.08893898252984271, "grad_norm": 1.0370844494372882, "learning_rate": 9.910061997473753e-06, "loss": 0.7087, "step": 217 }, { "epoch": 0.08934883959219223, "grad_norm": 0.9979433956864201, "learning_rate": 9.908803576281736e-06, "loss": 0.62, "step": 218 }, { "epoch": 0.08975869665454173, "grad_norm": 0.9882328236452865, "learning_rate": 9.90753649319211e-06, "loss": 0.6753, "step": 219 }, { "epoch": 0.09016855371689124, "grad_norm": 0.935450584155394, "learning_rate": 9.906260750440724e-06, "loss": 0.6282, "step": 220 }, { "epoch": 0.09057841077924074, "grad_norm": 1.0869525809347043, "learning_rate": 9.904976350278705e-06, "loss": 0.6909, "step": 221 }, { "epoch": 0.09098826784159024, "grad_norm": 1.0104027085547709, "learning_rate": 9.903683294972464e-06, "loss": 0.6501, "step": 222 }, { "epoch": 0.09139812490393975, "grad_norm": 0.9792665492255069, "learning_rate": 9.902381586803677e-06, "loss": 0.6787, "step": 223 }, { "epoch": 0.09180798196628925, "grad_norm": 0.9953950468471995, "learning_rate": 9.90107122806929e-06, "loss": 0.6362, "step": 224 }, { "epoch": 0.09221783902863877, "grad_norm": 1.0080516093853384, "learning_rate": 9.899752221081518e-06, "loss": 0.6846, "step": 225 }, { "epoch": 0.09262769609098827, "grad_norm": 1.0193999536794152, "learning_rate": 9.89842456816783e-06, "loss": 0.6854, "step": 226 }, { "epoch": 0.09303755315333777, "grad_norm": 0.973928244117763, "learning_rate": 9.897088271670957e-06, "loss": 0.6018, "step": 227 }, { "epoch": 0.09344741021568728, "grad_norm": 1.0266667291493825, "learning_rate": 9.895743333948875e-06, "loss": 0.6935, "step": 228 }, { "epoch": 0.09385726727803678, "grad_norm": 0.990626591489092, "learning_rate": 9.894389757374818e-06, "loss": 0.6765, "step": 229 }, { "epoch": 0.0942671243403863, "grad_norm": 0.9337789410683777, "learning_rate": 9.893027544337257e-06, "loss": 0.6345, "step": 230 }, { "epoch": 0.0946769814027358, "grad_norm": 0.9759740654092015, "learning_rate": 9.891656697239902e-06, "loss": 0.6202, "step": 231 }, { "epoch": 0.0950868384650853, "grad_norm": 1.0363592872853675, "learning_rate": 9.890277218501701e-06, "loss": 0.6723, "step": 232 }, { "epoch": 0.09549669552743481, "grad_norm": 0.983033866659552, "learning_rate": 9.888889110556834e-06, "loss": 0.6809, "step": 233 }, { "epoch": 0.09590655258978431, "grad_norm": 0.9128608394396988, "learning_rate": 9.887492375854705e-06, "loss": 0.6618, "step": 234 }, { "epoch": 0.09631640965213382, "grad_norm": 1.026820117093061, "learning_rate": 9.886087016859941e-06, "loss": 0.6943, "step": 235 }, { "epoch": 0.09672626671448332, "grad_norm": 1.034164516673056, "learning_rate": 9.884673036052389e-06, "loss": 0.6722, "step": 236 }, { "epoch": 0.09713612377683283, "grad_norm": 1.0218180313554959, "learning_rate": 9.883250435927108e-06, "loss": 0.6805, "step": 237 }, { "epoch": 0.09754598083918234, "grad_norm": 1.024332895199548, "learning_rate": 9.881819218994366e-06, "loss": 0.6639, "step": 238 }, { "epoch": 0.09795583790153184, "grad_norm": 1.0026178838869635, "learning_rate": 9.880379387779637e-06, "loss": 0.5831, "step": 239 }, { "epoch": 0.09836569496388135, "grad_norm": 1.0472383078027763, "learning_rate": 9.878930944823596e-06, "loss": 0.6979, "step": 240 }, { "epoch": 0.09877555202623085, "grad_norm": 0.9966873562194005, "learning_rate": 9.877473892682112e-06, "loss": 0.6656, "step": 241 }, { "epoch": 0.09918540908858035, "grad_norm": 0.9300133421528517, "learning_rate": 9.87600823392625e-06, "loss": 0.6178, "step": 242 }, { "epoch": 0.09959526615092987, "grad_norm": 1.1050788349858656, "learning_rate": 9.874533971142255e-06, "loss": 0.6966, "step": 243 }, { "epoch": 0.10000512321327937, "grad_norm": 1.0691770635694937, "learning_rate": 9.873051106931557e-06, "loss": 0.6918, "step": 244 }, { "epoch": 0.10041498027562887, "grad_norm": 0.950417074176108, "learning_rate": 9.871559643910769e-06, "loss": 0.6196, "step": 245 }, { "epoch": 0.10082483733797838, "grad_norm": 0.9328310651561157, "learning_rate": 9.870059584711668e-06, "loss": 0.6438, "step": 246 }, { "epoch": 0.10123469440032788, "grad_norm": 0.9855732640479077, "learning_rate": 9.86855093198121e-06, "loss": 0.6732, "step": 247 }, { "epoch": 0.1016445514626774, "grad_norm": 1.071394804523662, "learning_rate": 9.867033688381502e-06, "loss": 0.6597, "step": 248 }, { "epoch": 0.1020544085250269, "grad_norm": 0.9889795349381163, "learning_rate": 9.865507856589822e-06, "loss": 0.6121, "step": 249 }, { "epoch": 0.1024642655873764, "grad_norm": 0.9914843112349575, "learning_rate": 9.863973439298597e-06, "loss": 0.657, "step": 250 }, { "epoch": 0.10287412264972591, "grad_norm": 0.9838042467254268, "learning_rate": 9.862430439215407e-06, "loss": 0.6295, "step": 251 }, { "epoch": 0.10328397971207541, "grad_norm": 1.0703938872088097, "learning_rate": 9.860878859062967e-06, "loss": 0.7167, "step": 252 }, { "epoch": 0.10369383677442492, "grad_norm": 1.0229591438238304, "learning_rate": 9.859318701579148e-06, "loss": 0.6422, "step": 253 }, { "epoch": 0.10410369383677442, "grad_norm": 1.0315399538588015, "learning_rate": 9.857749969516942e-06, "loss": 0.7207, "step": 254 }, { "epoch": 0.10451355089912392, "grad_norm": 1.0235016242671329, "learning_rate": 9.856172665644476e-06, "loss": 0.6931, "step": 255 }, { "epoch": 0.10492340796147344, "grad_norm": 0.9425660873272028, "learning_rate": 9.854586792745009e-06, "loss": 0.6119, "step": 256 }, { "epoch": 0.10533326502382294, "grad_norm": 0.9909947730328975, "learning_rate": 9.85299235361691e-06, "loss": 0.5855, "step": 257 }, { "epoch": 0.10574312208617245, "grad_norm": 1.0599554058119396, "learning_rate": 9.851389351073671e-06, "loss": 0.6731, "step": 258 }, { "epoch": 0.10615297914852195, "grad_norm": 0.9946247593512222, "learning_rate": 9.84977778794389e-06, "loss": 0.685, "step": 259 }, { "epoch": 0.10656283621087145, "grad_norm": 1.0327985245912068, "learning_rate": 9.84815766707128e-06, "loss": 0.691, "step": 260 }, { "epoch": 0.10697269327322097, "grad_norm": 1.0274125852744223, "learning_rate": 9.846528991314638e-06, "loss": 0.6676, "step": 261 }, { "epoch": 0.10738255033557047, "grad_norm": 0.9273784671764042, "learning_rate": 9.844891763547874e-06, "loss": 0.6145, "step": 262 }, { "epoch": 0.10779240739791998, "grad_norm": 0.9917671462360524, "learning_rate": 9.843245986659977e-06, "loss": 0.6485, "step": 263 }, { "epoch": 0.10820226446026948, "grad_norm": 1.0042249509508812, "learning_rate": 9.84159166355503e-06, "loss": 0.6179, "step": 264 }, { "epoch": 0.10861212152261898, "grad_norm": 1.017929126508688, "learning_rate": 9.839928797152185e-06, "loss": 0.6669, "step": 265 }, { "epoch": 0.1090219785849685, "grad_norm": 0.9461120380362925, "learning_rate": 9.838257390385682e-06, "loss": 0.5978, "step": 266 }, { "epoch": 0.109431835647318, "grad_norm": 1.003605542923786, "learning_rate": 9.836577446204822e-06, "loss": 0.6088, "step": 267 }, { "epoch": 0.10984169270966751, "grad_norm": 1.0318467634967208, "learning_rate": 9.834888967573977e-06, "loss": 0.7083, "step": 268 }, { "epoch": 0.11025154977201701, "grad_norm": 0.9474685418742693, "learning_rate": 9.83319195747257e-06, "loss": 0.6291, "step": 269 }, { "epoch": 0.11066140683436651, "grad_norm": 1.0139376648573803, "learning_rate": 9.831486418895088e-06, "loss": 0.6251, "step": 270 }, { "epoch": 0.11107126389671602, "grad_norm": 1.0243415000658127, "learning_rate": 9.829772354851064e-06, "loss": 0.6163, "step": 271 }, { "epoch": 0.11148112095906552, "grad_norm": 0.9959274109647012, "learning_rate": 9.82804976836507e-06, "loss": 0.6699, "step": 272 }, { "epoch": 0.11189097802141502, "grad_norm": 0.9852649312468624, "learning_rate": 9.82631866247672e-06, "loss": 0.6225, "step": 273 }, { "epoch": 0.11230083508376454, "grad_norm": 0.9293592387878767, "learning_rate": 9.824579040240663e-06, "loss": 0.6293, "step": 274 }, { "epoch": 0.11271069214611404, "grad_norm": 0.9452347301165976, "learning_rate": 9.822830904726573e-06, "loss": 0.6591, "step": 275 }, { "epoch": 0.11312054920846355, "grad_norm": 0.9395930840377683, "learning_rate": 9.821074259019145e-06, "loss": 0.6233, "step": 276 }, { "epoch": 0.11353040627081305, "grad_norm": 0.9950251201164159, "learning_rate": 9.819309106218095e-06, "loss": 0.7433, "step": 277 }, { "epoch": 0.11394026333316255, "grad_norm": 0.9475334634743996, "learning_rate": 9.81753544943815e-06, "loss": 0.6926, "step": 278 }, { "epoch": 0.11435012039551207, "grad_norm": 0.9499935104292627, "learning_rate": 9.815753291809035e-06, "loss": 0.6564, "step": 279 }, { "epoch": 0.11475997745786157, "grad_norm": 0.9620021028562664, "learning_rate": 9.813962636475485e-06, "loss": 0.6572, "step": 280 }, { "epoch": 0.11516983452021108, "grad_norm": 0.9772068830433337, "learning_rate": 9.812163486597224e-06, "loss": 0.6564, "step": 281 }, { "epoch": 0.11557969158256058, "grad_norm": 0.9696261760073119, "learning_rate": 9.810355845348968e-06, "loss": 0.6929, "step": 282 }, { "epoch": 0.11598954864491008, "grad_norm": 0.940415573270125, "learning_rate": 9.808539715920415e-06, "loss": 0.6835, "step": 283 }, { "epoch": 0.1163994057072596, "grad_norm": 0.9845065902203242, "learning_rate": 9.806715101516243e-06, "loss": 0.6265, "step": 284 }, { "epoch": 0.1168092627696091, "grad_norm": 0.9845044975221721, "learning_rate": 9.804882005356098e-06, "loss": 0.6581, "step": 285 }, { "epoch": 0.11721911983195861, "grad_norm": 1.007873091378526, "learning_rate": 9.803040430674597e-06, "loss": 0.6513, "step": 286 }, { "epoch": 0.11762897689430811, "grad_norm": 0.9830980979116998, "learning_rate": 9.801190380721316e-06, "loss": 0.6629, "step": 287 }, { "epoch": 0.11803883395665761, "grad_norm": 0.982100764201484, "learning_rate": 9.799331858760785e-06, "loss": 0.6485, "step": 288 }, { "epoch": 0.11844869101900712, "grad_norm": 0.9999862985433592, "learning_rate": 9.797464868072489e-06, "loss": 0.5955, "step": 289 }, { "epoch": 0.11885854808135662, "grad_norm": 1.0119540170106385, "learning_rate": 9.795589411950845e-06, "loss": 0.6747, "step": 290 }, { "epoch": 0.11926840514370614, "grad_norm": 1.008077722256422, "learning_rate": 9.793705493705223e-06, "loss": 0.6483, "step": 291 }, { "epoch": 0.11967826220605564, "grad_norm": 0.948987094361649, "learning_rate": 9.791813116659912e-06, "loss": 0.6283, "step": 292 }, { "epoch": 0.12008811926840514, "grad_norm": 0.9839196462016332, "learning_rate": 9.789912284154135e-06, "loss": 0.6656, "step": 293 }, { "epoch": 0.12049797633075465, "grad_norm": 0.9734231453237092, "learning_rate": 9.78800299954203e-06, "loss": 0.6127, "step": 294 }, { "epoch": 0.12090783339310415, "grad_norm": 1.0695644132044406, "learning_rate": 9.786085266192655e-06, "loss": 0.6937, "step": 295 }, { "epoch": 0.12131769045545367, "grad_norm": 1.037363884931726, "learning_rate": 9.78415908748997e-06, "loss": 0.7042, "step": 296 }, { "epoch": 0.12172754751780317, "grad_norm": 1.0046955336650036, "learning_rate": 9.78222446683284e-06, "loss": 0.6498, "step": 297 }, { "epoch": 0.12213740458015267, "grad_norm": 0.9838728935843674, "learning_rate": 9.780281407635031e-06, "loss": 0.6785, "step": 298 }, { "epoch": 0.12254726164250218, "grad_norm": 0.9582995254984139, "learning_rate": 9.77832991332519e-06, "loss": 0.6968, "step": 299 }, { "epoch": 0.12295711870485168, "grad_norm": 0.9740835800754538, "learning_rate": 9.776369987346857e-06, "loss": 0.6131, "step": 300 }, { "epoch": 0.1233669757672012, "grad_norm": 1.0504332572616737, "learning_rate": 9.774401633158446e-06, "loss": 0.6748, "step": 301 }, { "epoch": 0.1237768328295507, "grad_norm": 0.9873260983115744, "learning_rate": 9.772424854233243e-06, "loss": 0.6425, "step": 302 }, { "epoch": 0.1241866898919002, "grad_norm": 0.9340365600979239, "learning_rate": 9.7704396540594e-06, "loss": 0.6506, "step": 303 }, { "epoch": 0.12459654695424971, "grad_norm": 0.9912995724472821, "learning_rate": 9.768446036139932e-06, "loss": 0.6761, "step": 304 }, { "epoch": 0.1250064040165992, "grad_norm": 1.002768688292176, "learning_rate": 9.766444003992704e-06, "loss": 0.687, "step": 305 }, { "epoch": 0.1254162610789487, "grad_norm": 0.9819828803500713, "learning_rate": 9.764433561150426e-06, "loss": 0.6927, "step": 306 }, { "epoch": 0.1258261181412982, "grad_norm": 0.9227383970144794, "learning_rate": 9.76241471116066e-06, "loss": 0.6424, "step": 307 }, { "epoch": 0.12623597520364774, "grad_norm": 0.9810799613303911, "learning_rate": 9.76038745758579e-06, "loss": 0.6312, "step": 308 }, { "epoch": 0.12664583226599724, "grad_norm": 0.9974704913537001, "learning_rate": 9.758351804003037e-06, "loss": 0.6251, "step": 309 }, { "epoch": 0.12705568932834674, "grad_norm": 0.9722714613787725, "learning_rate": 9.756307754004444e-06, "loss": 0.6835, "step": 310 }, { "epoch": 0.12746554639069624, "grad_norm": 0.9689679105364238, "learning_rate": 9.754255311196863e-06, "loss": 0.6493, "step": 311 }, { "epoch": 0.12787540345304574, "grad_norm": 0.9978264920105784, "learning_rate": 9.752194479201961e-06, "loss": 0.652, "step": 312 }, { "epoch": 0.12828526051539527, "grad_norm": 0.9603852364283471, "learning_rate": 9.750125261656213e-06, "loss": 0.6389, "step": 313 }, { "epoch": 0.12869511757774477, "grad_norm": 0.9386764549595017, "learning_rate": 9.74804766221088e-06, "loss": 0.6003, "step": 314 }, { "epoch": 0.12910497464009427, "grad_norm": 0.8630537339943082, "learning_rate": 9.745961684532022e-06, "loss": 0.6125, "step": 315 }, { "epoch": 0.12951483170244377, "grad_norm": 1.0007403231884078, "learning_rate": 9.743867332300478e-06, "loss": 0.6661, "step": 316 }, { "epoch": 0.12992468876479327, "grad_norm": 0.9655141616929799, "learning_rate": 9.74176460921187e-06, "loss": 0.6625, "step": 317 }, { "epoch": 0.1303345458271428, "grad_norm": 0.9570326597066976, "learning_rate": 9.739653518976581e-06, "loss": 0.6569, "step": 318 }, { "epoch": 0.1307444028894923, "grad_norm": 0.9029920933096656, "learning_rate": 9.737534065319772e-06, "loss": 0.6541, "step": 319 }, { "epoch": 0.1311542599518418, "grad_norm": 0.9757451857258855, "learning_rate": 9.73540625198135e-06, "loss": 0.658, "step": 320 }, { "epoch": 0.1315641170141913, "grad_norm": 0.9764930871820565, "learning_rate": 9.733270082715976e-06, "loss": 0.6391, "step": 321 }, { "epoch": 0.1319739740765408, "grad_norm": 0.9749512131650747, "learning_rate": 9.731125561293061e-06, "loss": 0.6632, "step": 322 }, { "epoch": 0.13238383113889032, "grad_norm": 0.9533053637752533, "learning_rate": 9.728972691496749e-06, "loss": 0.6295, "step": 323 }, { "epoch": 0.13279368820123982, "grad_norm": 0.9796508160609343, "learning_rate": 9.726811477125915e-06, "loss": 0.7261, "step": 324 }, { "epoch": 0.13320354526358932, "grad_norm": 0.9379918325433638, "learning_rate": 9.72464192199416e-06, "loss": 0.6681, "step": 325 }, { "epoch": 0.13361340232593882, "grad_norm": 1.0062001305784496, "learning_rate": 9.722464029929806e-06, "loss": 0.6595, "step": 326 }, { "epoch": 0.13402325938828832, "grad_norm": 0.9890663888550544, "learning_rate": 9.720277804775879e-06, "loss": 0.6344, "step": 327 }, { "epoch": 0.13443311645063785, "grad_norm": 0.9564707204931682, "learning_rate": 9.718083250390113e-06, "loss": 0.6647, "step": 328 }, { "epoch": 0.13484297351298735, "grad_norm": 0.9527911154843529, "learning_rate": 9.715880370644943e-06, "loss": 0.5871, "step": 329 }, { "epoch": 0.13525283057533685, "grad_norm": 0.936993342982377, "learning_rate": 9.713669169427487e-06, "loss": 0.5794, "step": 330 }, { "epoch": 0.13566268763768635, "grad_norm": 1.0413447898566646, "learning_rate": 9.711449650639553e-06, "loss": 0.6581, "step": 331 }, { "epoch": 0.13607254470003585, "grad_norm": 1.001411902036785, "learning_rate": 9.709221818197626e-06, "loss": 0.6117, "step": 332 }, { "epoch": 0.13648240176238538, "grad_norm": 1.0108297001510702, "learning_rate": 9.706985676032851e-06, "loss": 0.62, "step": 333 }, { "epoch": 0.13689225882473488, "grad_norm": 0.9312544247084872, "learning_rate": 9.704741228091052e-06, "loss": 0.63, "step": 334 }, { "epoch": 0.13730211588708438, "grad_norm": 0.9581592955693357, "learning_rate": 9.702488478332698e-06, "loss": 0.6701, "step": 335 }, { "epoch": 0.13771197294943388, "grad_norm": 0.9547531902321051, "learning_rate": 9.70022743073291e-06, "loss": 0.6677, "step": 336 }, { "epoch": 0.13812183001178338, "grad_norm": 1.0333665180332312, "learning_rate": 9.697958089281448e-06, "loss": 0.6588, "step": 337 }, { "epoch": 0.1385316870741329, "grad_norm": 0.9960904605736769, "learning_rate": 9.695680457982713e-06, "loss": 0.7155, "step": 338 }, { "epoch": 0.1389415441364824, "grad_norm": 1.0190009991005813, "learning_rate": 9.693394540855732e-06, "loss": 0.6755, "step": 339 }, { "epoch": 0.1393514011988319, "grad_norm": 1.0192230085577085, "learning_rate": 9.691100341934149e-06, "loss": 0.6643, "step": 340 }, { "epoch": 0.1397612582611814, "grad_norm": 0.8532054275528237, "learning_rate": 9.688797865266227e-06, "loss": 0.6037, "step": 341 }, { "epoch": 0.1401711153235309, "grad_norm": 1.0866324879614682, "learning_rate": 9.686487114914833e-06, "loss": 0.7335, "step": 342 }, { "epoch": 0.14058097238588044, "grad_norm": 0.9647217668035047, "learning_rate": 9.684168094957432e-06, "loss": 0.6797, "step": 343 }, { "epoch": 0.14099082944822994, "grad_norm": 0.9913683135101609, "learning_rate": 9.681840809486083e-06, "loss": 0.6592, "step": 344 }, { "epoch": 0.14140068651057944, "grad_norm": 0.9833661844479759, "learning_rate": 9.679505262607432e-06, "loss": 0.6631, "step": 345 }, { "epoch": 0.14181054357292894, "grad_norm": 0.8939272876983049, "learning_rate": 9.6771614584427e-06, "loss": 0.6616, "step": 346 }, { "epoch": 0.14222040063527844, "grad_norm": 0.9180503904046442, "learning_rate": 9.674809401127676e-06, "loss": 0.5888, "step": 347 }, { "epoch": 0.14263025769762797, "grad_norm": 0.9196948818570934, "learning_rate": 9.672449094812719e-06, "loss": 0.6324, "step": 348 }, { "epoch": 0.14304011475997747, "grad_norm": 0.9958072721786914, "learning_rate": 9.670080543662742e-06, "loss": 0.6514, "step": 349 }, { "epoch": 0.14344997182232697, "grad_norm": 0.9975289727495877, "learning_rate": 9.667703751857199e-06, "loss": 0.6656, "step": 350 }, { "epoch": 0.14385982888467647, "grad_norm": 0.8975124335949476, "learning_rate": 9.665318723590096e-06, "loss": 0.6335, "step": 351 }, { "epoch": 0.14426968594702597, "grad_norm": 0.9854929972952096, "learning_rate": 9.662925463069967e-06, "loss": 0.6573, "step": 352 }, { "epoch": 0.1446795430093755, "grad_norm": 0.9095513674755179, "learning_rate": 9.66052397451987e-06, "loss": 0.5936, "step": 353 }, { "epoch": 0.145089400071725, "grad_norm": 0.9797689349816915, "learning_rate": 9.65811426217739e-06, "loss": 0.6498, "step": 354 }, { "epoch": 0.1454992571340745, "grad_norm": 1.044494668840864, "learning_rate": 9.655696330294613e-06, "loss": 0.6953, "step": 355 }, { "epoch": 0.145909114196424, "grad_norm": 1.0378226965188013, "learning_rate": 9.65327018313814e-06, "loss": 0.7067, "step": 356 }, { "epoch": 0.1463189712587735, "grad_norm": 0.9799800711683213, "learning_rate": 9.650835824989057e-06, "loss": 0.7068, "step": 357 }, { "epoch": 0.146728828321123, "grad_norm": 1.0702114068605417, "learning_rate": 9.648393260142949e-06, "loss": 0.6665, "step": 358 }, { "epoch": 0.14713868538347252, "grad_norm": 0.9688527183973731, "learning_rate": 9.645942492909875e-06, "loss": 0.6291, "step": 359 }, { "epoch": 0.14754854244582202, "grad_norm": 0.9165202187407978, "learning_rate": 9.643483527614372e-06, "loss": 0.6085, "step": 360 }, { "epoch": 0.14795839950817152, "grad_norm": 1.0186830542017917, "learning_rate": 9.64101636859544e-06, "loss": 0.6841, "step": 361 }, { "epoch": 0.14836825657052102, "grad_norm": 0.9381578582434479, "learning_rate": 9.638541020206542e-06, "loss": 0.6034, "step": 362 }, { "epoch": 0.14877811363287052, "grad_norm": 0.9023052544748195, "learning_rate": 9.636057486815583e-06, "loss": 0.65, "step": 363 }, { "epoch": 0.14918797069522005, "grad_norm": 0.9393682845737674, "learning_rate": 9.63356577280492e-06, "loss": 0.6334, "step": 364 }, { "epoch": 0.14959782775756955, "grad_norm": 1.0252135867854424, "learning_rate": 9.631065882571342e-06, "loss": 0.6558, "step": 365 }, { "epoch": 0.15000768481991905, "grad_norm": 0.9196112985003105, "learning_rate": 9.628557820526065e-06, "loss": 0.6442, "step": 366 }, { "epoch": 0.15041754188226855, "grad_norm": 0.9515940711212271, "learning_rate": 9.626041591094723e-06, "loss": 0.6217, "step": 367 }, { "epoch": 0.15082739894461805, "grad_norm": 0.9390594917514403, "learning_rate": 9.623517198717363e-06, "loss": 0.6007, "step": 368 }, { "epoch": 0.15123725600696758, "grad_norm": 0.9228185926253952, "learning_rate": 9.62098464784844e-06, "loss": 0.6317, "step": 369 }, { "epoch": 0.15164711306931708, "grad_norm": 1.030879309420988, "learning_rate": 9.6184439429568e-06, "loss": 0.6797, "step": 370 }, { "epoch": 0.15205697013166658, "grad_norm": 0.913181128938046, "learning_rate": 9.615895088525677e-06, "loss": 0.6119, "step": 371 }, { "epoch": 0.15246682719401608, "grad_norm": 0.9732445688981177, "learning_rate": 9.613338089052691e-06, "loss": 0.6579, "step": 372 }, { "epoch": 0.15287668425636558, "grad_norm": 0.9730246283409615, "learning_rate": 9.610772949049829e-06, "loss": 0.6265, "step": 373 }, { "epoch": 0.1532865413187151, "grad_norm": 0.9611971727845967, "learning_rate": 9.608199673043447e-06, "loss": 0.6037, "step": 374 }, { "epoch": 0.1536963983810646, "grad_norm": 0.968024532623284, "learning_rate": 9.60561826557425e-06, "loss": 0.6488, "step": 375 }, { "epoch": 0.1541062554434141, "grad_norm": 0.9937879084584703, "learning_rate": 9.603028731197303e-06, "loss": 0.6138, "step": 376 }, { "epoch": 0.1545161125057636, "grad_norm": 0.9684926257428884, "learning_rate": 9.600431074482e-06, "loss": 0.5797, "step": 377 }, { "epoch": 0.1549259695681131, "grad_norm": 0.9898070088134615, "learning_rate": 9.597825300012073e-06, "loss": 0.6375, "step": 378 }, { "epoch": 0.15533582663046264, "grad_norm": 0.9682216827962415, "learning_rate": 9.595211412385579e-06, "loss": 0.6604, "step": 379 }, { "epoch": 0.15574568369281214, "grad_norm": 0.9945498252582468, "learning_rate": 9.592589416214889e-06, "loss": 0.6322, "step": 380 }, { "epoch": 0.15615554075516164, "grad_norm": 0.9574435119169162, "learning_rate": 9.589959316126683e-06, "loss": 0.6473, "step": 381 }, { "epoch": 0.15656539781751114, "grad_norm": 1.010087921257916, "learning_rate": 9.587321116761938e-06, "loss": 0.6805, "step": 382 }, { "epoch": 0.15697525487986064, "grad_norm": 1.023071700167666, "learning_rate": 9.58467482277593e-06, "loss": 0.6339, "step": 383 }, { "epoch": 0.15738511194221017, "grad_norm": 0.8965712537564404, "learning_rate": 9.582020438838213e-06, "loss": 0.6302, "step": 384 }, { "epoch": 0.15779496900455967, "grad_norm": 1.0343006680468283, "learning_rate": 9.579357969632614e-06, "loss": 0.6693, "step": 385 }, { "epoch": 0.15820482606690917, "grad_norm": 0.9258650676740177, "learning_rate": 9.57668741985723e-06, "loss": 0.6192, "step": 386 }, { "epoch": 0.15861468312925867, "grad_norm": 0.9353285905175047, "learning_rate": 9.574008794224423e-06, "loss": 0.6452, "step": 387 }, { "epoch": 0.15902454019160817, "grad_norm": 0.9180024943700854, "learning_rate": 9.571322097460793e-06, "loss": 0.6086, "step": 388 }, { "epoch": 0.1594343972539577, "grad_norm": 0.9391501815261133, "learning_rate": 9.56862733430719e-06, "loss": 0.6434, "step": 389 }, { "epoch": 0.1598442543163072, "grad_norm": 0.9195519607845012, "learning_rate": 9.565924509518693e-06, "loss": 0.6362, "step": 390 }, { "epoch": 0.1602541113786567, "grad_norm": 0.983645292672998, "learning_rate": 9.563213627864615e-06, "loss": 0.603, "step": 391 }, { "epoch": 0.1606639684410062, "grad_norm": 0.9668523634596651, "learning_rate": 9.560494694128475e-06, "loss": 0.6588, "step": 392 }, { "epoch": 0.1610738255033557, "grad_norm": 0.9198069899687034, "learning_rate": 9.557767713108009e-06, "loss": 0.6844, "step": 393 }, { "epoch": 0.16148368256570522, "grad_norm": 0.9709234260959786, "learning_rate": 9.555032689615145e-06, "loss": 0.6216, "step": 394 }, { "epoch": 0.16189353962805472, "grad_norm": 0.9619199081664456, "learning_rate": 9.552289628476009e-06, "loss": 0.6632, "step": 395 }, { "epoch": 0.16230339669040422, "grad_norm": 0.9293076526438114, "learning_rate": 9.549538534530908e-06, "loss": 0.6188, "step": 396 }, { "epoch": 0.16271325375275372, "grad_norm": 0.961793847975169, "learning_rate": 9.546779412634324e-06, "loss": 0.6327, "step": 397 }, { "epoch": 0.16312311081510322, "grad_norm": 0.9865959834193898, "learning_rate": 9.544012267654901e-06, "loss": 0.6463, "step": 398 }, { "epoch": 0.16353296787745275, "grad_norm": 0.9633509831401007, "learning_rate": 9.541237104475445e-06, "loss": 0.6255, "step": 399 }, { "epoch": 0.16394282493980225, "grad_norm": 1.0289708988291155, "learning_rate": 9.53845392799291e-06, "loss": 0.6791, "step": 400 }, { "epoch": 0.16435268200215175, "grad_norm": 0.9295507551770691, "learning_rate": 9.535662743118386e-06, "loss": 0.6394, "step": 401 }, { "epoch": 0.16476253906450125, "grad_norm": 0.9742837310496094, "learning_rate": 9.532863554777099e-06, "loss": 0.6786, "step": 402 }, { "epoch": 0.16517239612685075, "grad_norm": 0.9884791600270346, "learning_rate": 9.530056367908393e-06, "loss": 0.5855, "step": 403 }, { "epoch": 0.16558225318920028, "grad_norm": 0.9036095646732858, "learning_rate": 9.527241187465735e-06, "loss": 0.654, "step": 404 }, { "epoch": 0.16599211025154978, "grad_norm": 0.9534732983415365, "learning_rate": 9.524418018416684e-06, "loss": 0.6249, "step": 405 }, { "epoch": 0.16640196731389928, "grad_norm": 0.9978081910885006, "learning_rate": 9.521586865742904e-06, "loss": 0.6616, "step": 406 }, { "epoch": 0.16681182437624878, "grad_norm": 0.8294778569152337, "learning_rate": 9.518747734440145e-06, "loss": 0.5677, "step": 407 }, { "epoch": 0.16722168143859828, "grad_norm": 0.9165934538282671, "learning_rate": 9.51590062951824e-06, "loss": 0.622, "step": 408 }, { "epoch": 0.1676315385009478, "grad_norm": 0.947108865241617, "learning_rate": 9.513045556001082e-06, "loss": 0.6839, "step": 409 }, { "epoch": 0.1680413955632973, "grad_norm": 0.939729967996245, "learning_rate": 9.51018251892663e-06, "loss": 0.6466, "step": 410 }, { "epoch": 0.1684512526256468, "grad_norm": 0.9598080638158942, "learning_rate": 9.5073115233469e-06, "loss": 0.663, "step": 411 }, { "epoch": 0.1688611096879963, "grad_norm": 0.8808260250001861, "learning_rate": 9.504432574327946e-06, "loss": 0.6605, "step": 412 }, { "epoch": 0.1692709667503458, "grad_norm": 0.9088117451532064, "learning_rate": 9.501545676949856e-06, "loss": 0.5971, "step": 413 }, { "epoch": 0.16968082381269534, "grad_norm": 1.0564755806230108, "learning_rate": 9.498650836306748e-06, "loss": 0.6128, "step": 414 }, { "epoch": 0.17009068087504484, "grad_norm": 0.9098361079588699, "learning_rate": 9.49574805750675e-06, "loss": 0.6395, "step": 415 }, { "epoch": 0.17050053793739434, "grad_norm": 0.9670878237860614, "learning_rate": 9.492837345672e-06, "loss": 0.6458, "step": 416 }, { "epoch": 0.17091039499974384, "grad_norm": 1.0723508280154284, "learning_rate": 9.48991870593864e-06, "loss": 0.6422, "step": 417 }, { "epoch": 0.17132025206209334, "grad_norm": 0.8893687154833078, "learning_rate": 9.486992143456792e-06, "loss": 0.5853, "step": 418 }, { "epoch": 0.17173010912444284, "grad_norm": 0.9405016459165743, "learning_rate": 9.484057663390565e-06, "loss": 0.6358, "step": 419 }, { "epoch": 0.17213996618679236, "grad_norm": 1.0841421566043472, "learning_rate": 9.481115270918034e-06, "loss": 0.6104, "step": 420 }, { "epoch": 0.17254982324914186, "grad_norm": 1.0027295151851066, "learning_rate": 9.47816497123124e-06, "loss": 0.6933, "step": 421 }, { "epoch": 0.17295968031149137, "grad_norm": 0.9243538370240459, "learning_rate": 9.475206769536177e-06, "loss": 0.6708, "step": 422 }, { "epoch": 0.17336953737384087, "grad_norm": 0.9651548017091147, "learning_rate": 9.472240671052777e-06, "loss": 0.6739, "step": 423 }, { "epoch": 0.17377939443619037, "grad_norm": 0.9338693243299134, "learning_rate": 9.469266681014914e-06, "loss": 0.6341, "step": 424 }, { "epoch": 0.1741892514985399, "grad_norm": 1.0236582753878702, "learning_rate": 9.466284804670382e-06, "loss": 0.6731, "step": 425 }, { "epoch": 0.1745991085608894, "grad_norm": 0.9594492393538745, "learning_rate": 9.463295047280892e-06, "loss": 0.6579, "step": 426 }, { "epoch": 0.1750089656232389, "grad_norm": 0.9225362286675274, "learning_rate": 9.460297414122062e-06, "loss": 0.6787, "step": 427 }, { "epoch": 0.1754188226855884, "grad_norm": 1.0184058003785699, "learning_rate": 9.457291910483408e-06, "loss": 0.6705, "step": 428 }, { "epoch": 0.1758286797479379, "grad_norm": 0.9505757465844947, "learning_rate": 9.454278541668334e-06, "loss": 0.6374, "step": 429 }, { "epoch": 0.17623853681028742, "grad_norm": 0.9318912708248303, "learning_rate": 9.45125731299412e-06, "loss": 0.6306, "step": 430 }, { "epoch": 0.17664839387263692, "grad_norm": 0.9504313232590225, "learning_rate": 9.448228229791918e-06, "loss": 0.628, "step": 431 }, { "epoch": 0.17705825093498642, "grad_norm": 1.0083568003073151, "learning_rate": 9.445191297406737e-06, "loss": 0.6622, "step": 432 }, { "epoch": 0.17746810799733592, "grad_norm": 0.9814792176611703, "learning_rate": 9.442146521197441e-06, "loss": 0.6552, "step": 433 }, { "epoch": 0.17787796505968542, "grad_norm": 0.9461902074077306, "learning_rate": 9.439093906536732e-06, "loss": 0.5746, "step": 434 }, { "epoch": 0.17828782212203495, "grad_norm": 0.9444112458315801, "learning_rate": 9.436033458811142e-06, "loss": 0.6281, "step": 435 }, { "epoch": 0.17869767918438445, "grad_norm": 0.9608921378730612, "learning_rate": 9.43296518342103e-06, "loss": 0.6998, "step": 436 }, { "epoch": 0.17910753624673395, "grad_norm": 1.0586631822879702, "learning_rate": 9.429889085780559e-06, "loss": 0.6463, "step": 437 }, { "epoch": 0.17951739330908345, "grad_norm": 0.8983826974598419, "learning_rate": 9.4268051713177e-06, "loss": 0.6283, "step": 438 }, { "epoch": 0.17992725037143295, "grad_norm": 0.9870164035568378, "learning_rate": 9.423713445474224e-06, "loss": 0.6109, "step": 439 }, { "epoch": 0.18033710743378248, "grad_norm": 0.9240073492550492, "learning_rate": 9.420613913705672e-06, "loss": 0.6433, "step": 440 }, { "epoch": 0.18074696449613198, "grad_norm": 0.9538948184178875, "learning_rate": 9.417506581481368e-06, "loss": 0.647, "step": 441 }, { "epoch": 0.18115682155848148, "grad_norm": 1.0139364786223164, "learning_rate": 9.4143914542844e-06, "loss": 0.7205, "step": 442 }, { "epoch": 0.18156667862083098, "grad_norm": 1.0204075948946638, "learning_rate": 9.411268537611607e-06, "loss": 0.6926, "step": 443 }, { "epoch": 0.18197653568318048, "grad_norm": 1.0102313069906208, "learning_rate": 9.408137836973574e-06, "loss": 0.6389, "step": 444 }, { "epoch": 0.18238639274553, "grad_norm": 0.9917141100865329, "learning_rate": 9.404999357894623e-06, "loss": 0.6672, "step": 445 }, { "epoch": 0.1827962498078795, "grad_norm": 0.9043532685880183, "learning_rate": 9.401853105912805e-06, "loss": 0.5835, "step": 446 }, { "epoch": 0.183206106870229, "grad_norm": 0.8819943005114933, "learning_rate": 9.398699086579879e-06, "loss": 0.5997, "step": 447 }, { "epoch": 0.1836159639325785, "grad_norm": 0.9587162327216129, "learning_rate": 9.395537305461312e-06, "loss": 0.6564, "step": 448 }, { "epoch": 0.184025820994928, "grad_norm": 0.8905401311086402, "learning_rate": 9.392367768136272e-06, "loss": 0.5938, "step": 449 }, { "epoch": 0.18443567805727754, "grad_norm": 1.0086927939483707, "learning_rate": 9.389190480197609e-06, "loss": 0.6304, "step": 450 }, { "epoch": 0.18484553511962704, "grad_norm": 0.9228901223723927, "learning_rate": 9.38600544725185e-06, "loss": 0.6424, "step": 451 }, { "epoch": 0.18525539218197654, "grad_norm": 0.9459502184024712, "learning_rate": 9.38281267491919e-06, "loss": 0.6717, "step": 452 }, { "epoch": 0.18566524924432604, "grad_norm": 0.9118099713702045, "learning_rate": 9.37961216883348e-06, "loss": 0.6422, "step": 453 }, { "epoch": 0.18607510630667554, "grad_norm": 1.0111734015658451, "learning_rate": 9.37640393464222e-06, "loss": 0.6484, "step": 454 }, { "epoch": 0.18648496336902506, "grad_norm": 0.894701994877936, "learning_rate": 9.373187978006538e-06, "loss": 0.6173, "step": 455 }, { "epoch": 0.18689482043137456, "grad_norm": 0.8956320766447815, "learning_rate": 9.3699643046012e-06, "loss": 0.6079, "step": 456 }, { "epoch": 0.18730467749372406, "grad_norm": 0.9313703885950092, "learning_rate": 9.366732920114581e-06, "loss": 0.6445, "step": 457 }, { "epoch": 0.18771453455607356, "grad_norm": 0.9356392632603555, "learning_rate": 9.363493830248666e-06, "loss": 0.5974, "step": 458 }, { "epoch": 0.18812439161842306, "grad_norm": 0.9811457771880149, "learning_rate": 9.36024704071904e-06, "loss": 0.6535, "step": 459 }, { "epoch": 0.1885342486807726, "grad_norm": 0.9543826178980104, "learning_rate": 9.356992557254865e-06, "loss": 0.6515, "step": 460 }, { "epoch": 0.1889441057431221, "grad_norm": 0.9114801976819517, "learning_rate": 9.353730385598887e-06, "loss": 0.615, "step": 461 }, { "epoch": 0.1893539628054716, "grad_norm": 0.9586440862247613, "learning_rate": 9.350460531507417e-06, "loss": 0.6476, "step": 462 }, { "epoch": 0.1897638198678211, "grad_norm": 0.960323413383666, "learning_rate": 9.347183000750321e-06, "loss": 0.5928, "step": 463 }, { "epoch": 0.1901736769301706, "grad_norm": 0.9623663843640659, "learning_rate": 9.343897799111012e-06, "loss": 0.6721, "step": 464 }, { "epoch": 0.19058353399252012, "grad_norm": 0.9318826272360204, "learning_rate": 9.340604932386439e-06, "loss": 0.6392, "step": 465 }, { "epoch": 0.19099339105486962, "grad_norm": 0.9127019726050232, "learning_rate": 9.337304406387073e-06, "loss": 0.6029, "step": 466 }, { "epoch": 0.19140324811721912, "grad_norm": 0.958251386833172, "learning_rate": 9.333996226936906e-06, "loss": 0.6976, "step": 467 }, { "epoch": 0.19181310517956862, "grad_norm": 1.0294295988974038, "learning_rate": 9.330680399873432e-06, "loss": 0.6671, "step": 468 }, { "epoch": 0.19222296224191812, "grad_norm": 0.9936365294509376, "learning_rate": 9.327356931047636e-06, "loss": 0.6809, "step": 469 }, { "epoch": 0.19263281930426765, "grad_norm": 0.925472070343213, "learning_rate": 9.324025826323995e-06, "loss": 0.5941, "step": 470 }, { "epoch": 0.19304267636661715, "grad_norm": 0.9622759554136099, "learning_rate": 9.320687091580453e-06, "loss": 0.5985, "step": 471 }, { "epoch": 0.19345253342896665, "grad_norm": 0.9767105107144355, "learning_rate": 9.317340732708425e-06, "loss": 0.6316, "step": 472 }, { "epoch": 0.19386239049131615, "grad_norm": 0.9672747363505229, "learning_rate": 9.313986755612772e-06, "loss": 0.5967, "step": 473 }, { "epoch": 0.19427224755366565, "grad_norm": 0.9111971993863457, "learning_rate": 9.310625166211802e-06, "loss": 0.6117, "step": 474 }, { "epoch": 0.19468210461601518, "grad_norm": 0.9106259211353186, "learning_rate": 9.307255970437255e-06, "loss": 0.6239, "step": 475 }, { "epoch": 0.19509196167836468, "grad_norm": 0.9443559110066804, "learning_rate": 9.30387917423429e-06, "loss": 0.6281, "step": 476 }, { "epoch": 0.19550181874071418, "grad_norm": 0.9042114378089574, "learning_rate": 9.300494783561479e-06, "loss": 0.6218, "step": 477 }, { "epoch": 0.19591167580306368, "grad_norm": 0.9314039688647906, "learning_rate": 9.297102804390798e-06, "loss": 0.6094, "step": 478 }, { "epoch": 0.19632153286541318, "grad_norm": 0.8911736179653077, "learning_rate": 9.293703242707613e-06, "loss": 0.6208, "step": 479 }, { "epoch": 0.1967313899277627, "grad_norm": 0.9463027635509804, "learning_rate": 9.290296104510666e-06, "loss": 0.6328, "step": 480 }, { "epoch": 0.1971412469901122, "grad_norm": 0.9344386117077448, "learning_rate": 9.286881395812066e-06, "loss": 0.6153, "step": 481 }, { "epoch": 0.1975511040524617, "grad_norm": 0.9401198409592969, "learning_rate": 9.28345912263729e-06, "loss": 0.6431, "step": 482 }, { "epoch": 0.1979609611148112, "grad_norm": 0.9552949094477372, "learning_rate": 9.280029291025156e-06, "loss": 0.6773, "step": 483 }, { "epoch": 0.1983708181771607, "grad_norm": 0.9798261271838966, "learning_rate": 9.27659190702782e-06, "loss": 0.6185, "step": 484 }, { "epoch": 0.1987806752395102, "grad_norm": 0.9178068277490171, "learning_rate": 9.27314697671077e-06, "loss": 0.6255, "step": 485 }, { "epoch": 0.19919053230185974, "grad_norm": 1.0011584323679699, "learning_rate": 9.269694506152799e-06, "loss": 0.6078, "step": 486 }, { "epoch": 0.19960038936420924, "grad_norm": 1.015575006214679, "learning_rate": 9.266234501446016e-06, "loss": 0.6538, "step": 487 }, { "epoch": 0.20001024642655874, "grad_norm": 0.9238461710656591, "learning_rate": 9.26276696869582e-06, "loss": 0.6183, "step": 488 }, { "epoch": 0.20042010348890824, "grad_norm": 1.0067782203021383, "learning_rate": 9.259291914020893e-06, "loss": 0.5913, "step": 489 }, { "epoch": 0.20082996055125774, "grad_norm": 1.0433831356664511, "learning_rate": 9.255809343553196e-06, "loss": 0.6602, "step": 490 }, { "epoch": 0.20123981761360726, "grad_norm": 1.0228384870478606, "learning_rate": 9.25231926343794e-06, "loss": 0.6634, "step": 491 }, { "epoch": 0.20164967467595676, "grad_norm": 0.9831918995593597, "learning_rate": 9.248821679833596e-06, "loss": 0.6666, "step": 492 }, { "epoch": 0.20205953173830626, "grad_norm": 0.899159574820684, "learning_rate": 9.245316598911877e-06, "loss": 0.6574, "step": 493 }, { "epoch": 0.20246938880065576, "grad_norm": 0.9829027814297675, "learning_rate": 9.24180402685772e-06, "loss": 0.661, "step": 494 }, { "epoch": 0.20287924586300526, "grad_norm": 0.9739329084479487, "learning_rate": 9.238283969869284e-06, "loss": 0.6514, "step": 495 }, { "epoch": 0.2032891029253548, "grad_norm": 0.9291735399877861, "learning_rate": 9.234756434157935e-06, "loss": 0.6076, "step": 496 }, { "epoch": 0.2036989599877043, "grad_norm": 1.0845033022536577, "learning_rate": 9.231221425948232e-06, "loss": 0.6408, "step": 497 }, { "epoch": 0.2041088170500538, "grad_norm": 0.8751728135259016, "learning_rate": 9.227678951477925e-06, "loss": 0.6214, "step": 498 }, { "epoch": 0.2045186741124033, "grad_norm": 0.9745108956546873, "learning_rate": 9.224129016997938e-06, "loss": 0.6498, "step": 499 }, { "epoch": 0.2049285311747528, "grad_norm": 0.9371066122769106, "learning_rate": 9.220571628772354e-06, "loss": 0.598, "step": 500 }, { "epoch": 0.20533838823710232, "grad_norm": 0.9300863292154004, "learning_rate": 9.217006793078415e-06, "loss": 0.6194, "step": 501 }, { "epoch": 0.20574824529945182, "grad_norm": 1.0013026351477596, "learning_rate": 9.213434516206502e-06, "loss": 0.6496, "step": 502 }, { "epoch": 0.20615810236180132, "grad_norm": 1.0081358476293858, "learning_rate": 9.209854804460121e-06, "loss": 0.6899, "step": 503 }, { "epoch": 0.20656795942415082, "grad_norm": 0.9314994697238682, "learning_rate": 9.206267664155906e-06, "loss": 0.5903, "step": 504 }, { "epoch": 0.20697781648650032, "grad_norm": 1.0024013610500238, "learning_rate": 9.202673101623595e-06, "loss": 0.6702, "step": 505 }, { "epoch": 0.20738767354884985, "grad_norm": 0.958721849175664, "learning_rate": 9.199071123206023e-06, "loss": 0.6565, "step": 506 }, { "epoch": 0.20779753061119935, "grad_norm": 1.0148666865298996, "learning_rate": 9.195461735259108e-06, "loss": 0.6841, "step": 507 }, { "epoch": 0.20820738767354885, "grad_norm": 0.9314234018878591, "learning_rate": 9.19184494415185e-06, "loss": 0.6258, "step": 508 }, { "epoch": 0.20861724473589835, "grad_norm": 0.958486024592098, "learning_rate": 9.188220756266309e-06, "loss": 0.633, "step": 509 }, { "epoch": 0.20902710179824785, "grad_norm": 0.9360019858928802, "learning_rate": 9.184589177997592e-06, "loss": 0.6586, "step": 510 }, { "epoch": 0.20943695886059738, "grad_norm": 0.9137986886941724, "learning_rate": 9.180950215753851e-06, "loss": 0.5958, "step": 511 }, { "epoch": 0.20984681592294688, "grad_norm": 0.8868136850323548, "learning_rate": 9.177303875956269e-06, "loss": 0.6422, "step": 512 }, { "epoch": 0.21025667298529638, "grad_norm": 0.8807602676130643, "learning_rate": 9.173650165039047e-06, "loss": 0.6228, "step": 513 }, { "epoch": 0.21066653004764588, "grad_norm": 0.9897983603305393, "learning_rate": 9.16998908944939e-06, "loss": 0.5989, "step": 514 }, { "epoch": 0.21107638710999538, "grad_norm": 0.9979824044081529, "learning_rate": 9.1663206556475e-06, "loss": 0.6983, "step": 515 }, { "epoch": 0.2114862441723449, "grad_norm": 0.9915039417393606, "learning_rate": 9.162644870106563e-06, "loss": 0.6682, "step": 516 }, { "epoch": 0.2118961012346944, "grad_norm": 0.9697232436771064, "learning_rate": 9.158961739312735e-06, "loss": 0.6183, "step": 517 }, { "epoch": 0.2123059582970439, "grad_norm": 0.8680428002054329, "learning_rate": 9.155271269765141e-06, "loss": 0.6387, "step": 518 }, { "epoch": 0.2127158153593934, "grad_norm": 0.9695524707456274, "learning_rate": 9.151573467975846e-06, "loss": 0.6242, "step": 519 }, { "epoch": 0.2131256724217429, "grad_norm": 0.8869848086392278, "learning_rate": 9.147868340469857e-06, "loss": 0.6357, "step": 520 }, { "epoch": 0.21353552948409243, "grad_norm": 0.9497554375780068, "learning_rate": 9.144155893785112e-06, "loss": 0.6264, "step": 521 }, { "epoch": 0.21394538654644193, "grad_norm": 0.9423966874051808, "learning_rate": 9.140436134472457e-06, "loss": 0.6082, "step": 522 }, { "epoch": 0.21435524360879143, "grad_norm": 0.9481538809621833, "learning_rate": 9.136709069095647e-06, "loss": 0.6294, "step": 523 }, { "epoch": 0.21476510067114093, "grad_norm": 0.8733836986859059, "learning_rate": 9.132974704231328e-06, "loss": 0.6447, "step": 524 }, { "epoch": 0.21517495773349044, "grad_norm": 0.8889395113649008, "learning_rate": 9.129233046469021e-06, "loss": 0.6208, "step": 525 }, { "epoch": 0.21558481479583996, "grad_norm": 0.9431852018542756, "learning_rate": 9.125484102411125e-06, "loss": 0.6067, "step": 526 }, { "epoch": 0.21599467185818946, "grad_norm": 0.8750826296087421, "learning_rate": 9.12172787867289e-06, "loss": 0.6158, "step": 527 }, { "epoch": 0.21640452892053896, "grad_norm": 0.9416120392089519, "learning_rate": 9.117964381882412e-06, "loss": 0.5894, "step": 528 }, { "epoch": 0.21681438598288846, "grad_norm": 0.9394757955106943, "learning_rate": 9.114193618680623e-06, "loss": 0.6546, "step": 529 }, { "epoch": 0.21722424304523796, "grad_norm": 0.9928633041944767, "learning_rate": 9.110415595721273e-06, "loss": 0.6559, "step": 530 }, { "epoch": 0.2176341001075875, "grad_norm": 0.9819017364555566, "learning_rate": 9.106630319670928e-06, "loss": 0.6355, "step": 531 }, { "epoch": 0.218043957169937, "grad_norm": 0.8828187427482812, "learning_rate": 9.102837797208946e-06, "loss": 0.6452, "step": 532 }, { "epoch": 0.2184538142322865, "grad_norm": 0.9242440456263338, "learning_rate": 9.099038035027478e-06, "loss": 0.5799, "step": 533 }, { "epoch": 0.218863671294636, "grad_norm": 0.9216167617802524, "learning_rate": 9.095231039831449e-06, "loss": 0.6461, "step": 534 }, { "epoch": 0.2192735283569855, "grad_norm": 0.9568715024901074, "learning_rate": 9.09141681833854e-06, "loss": 0.617, "step": 535 }, { "epoch": 0.21968338541933502, "grad_norm": 0.8873165465798084, "learning_rate": 9.087595377279192e-06, "loss": 0.5951, "step": 536 }, { "epoch": 0.22009324248168452, "grad_norm": 0.9808566346737617, "learning_rate": 9.083766723396582e-06, "loss": 0.594, "step": 537 }, { "epoch": 0.22050309954403402, "grad_norm": 0.870024219570733, "learning_rate": 9.079930863446612e-06, "loss": 0.6838, "step": 538 }, { "epoch": 0.22091295660638352, "grad_norm": 1.0026621706313608, "learning_rate": 9.076087804197907e-06, "loss": 0.6664, "step": 539 }, { "epoch": 0.22132281366873302, "grad_norm": 0.8990223922941152, "learning_rate": 9.072237552431787e-06, "loss": 0.6183, "step": 540 }, { "epoch": 0.22173267073108255, "grad_norm": 0.9085516749560856, "learning_rate": 9.068380114942266e-06, "loss": 0.5986, "step": 541 }, { "epoch": 0.22214252779343205, "grad_norm": 0.883893652508556, "learning_rate": 9.064515498536041e-06, "loss": 0.6146, "step": 542 }, { "epoch": 0.22255238485578155, "grad_norm": 0.9641628945292434, "learning_rate": 9.060643710032473e-06, "loss": 0.6753, "step": 543 }, { "epoch": 0.22296224191813105, "grad_norm": 0.9365486870253191, "learning_rate": 9.056764756263585e-06, "loss": 0.6609, "step": 544 }, { "epoch": 0.22337209898048055, "grad_norm": 1.0130092992701936, "learning_rate": 9.052878644074032e-06, "loss": 0.6418, "step": 545 }, { "epoch": 0.22378195604283005, "grad_norm": 0.8526468896762813, "learning_rate": 9.048985380321114e-06, "loss": 0.5711, "step": 546 }, { "epoch": 0.22419181310517958, "grad_norm": 0.9835966586138113, "learning_rate": 9.045084971874738e-06, "loss": 0.6414, "step": 547 }, { "epoch": 0.22460167016752908, "grad_norm": 0.958216268758148, "learning_rate": 9.041177425617427e-06, "loss": 0.6284, "step": 548 }, { "epoch": 0.22501152722987858, "grad_norm": 0.9312451071245191, "learning_rate": 9.037262748444296e-06, "loss": 0.6089, "step": 549 }, { "epoch": 0.22542138429222808, "grad_norm": 0.972319827014016, "learning_rate": 9.03334094726304e-06, "loss": 0.6327, "step": 550 }, { "epoch": 0.22583124135457758, "grad_norm": 0.9173069560414837, "learning_rate": 9.029412028993934e-06, "loss": 0.6922, "step": 551 }, { "epoch": 0.2262410984169271, "grad_norm": 0.9032188308911839, "learning_rate": 9.025476000569799e-06, "loss": 0.6504, "step": 552 }, { "epoch": 0.2266509554792766, "grad_norm": 0.9466230496971504, "learning_rate": 9.021532868936012e-06, "loss": 0.6727, "step": 553 }, { "epoch": 0.2270608125416261, "grad_norm": 0.9163513499599063, "learning_rate": 9.017582641050481e-06, "loss": 0.6219, "step": 554 }, { "epoch": 0.2274706696039756, "grad_norm": 1.0201278463934658, "learning_rate": 9.013625323883633e-06, "loss": 0.6511, "step": 555 }, { "epoch": 0.2278805266663251, "grad_norm": 0.8725586831930378, "learning_rate": 9.009660924418411e-06, "loss": 0.591, "step": 556 }, { "epoch": 0.22829038372867463, "grad_norm": 0.8804186330909698, "learning_rate": 9.005689449650246e-06, "loss": 0.5763, "step": 557 }, { "epoch": 0.22870024079102413, "grad_norm": 0.9325814195190042, "learning_rate": 9.001710906587064e-06, "loss": 0.6354, "step": 558 }, { "epoch": 0.22911009785337363, "grad_norm": 0.988480656446707, "learning_rate": 8.997725302249255e-06, "loss": 0.6241, "step": 559 }, { "epoch": 0.22951995491572313, "grad_norm": 1.00092574534484, "learning_rate": 8.993732643669675e-06, "loss": 0.6085, "step": 560 }, { "epoch": 0.22992981197807263, "grad_norm": 0.8988829219087683, "learning_rate": 8.989732937893623e-06, "loss": 0.6547, "step": 561 }, { "epoch": 0.23033966904042216, "grad_norm": 0.9088078042189096, "learning_rate": 8.985726191978839e-06, "loss": 0.6426, "step": 562 }, { "epoch": 0.23074952610277166, "grad_norm": 0.9165261634155963, "learning_rate": 8.981712412995478e-06, "loss": 0.633, "step": 563 }, { "epoch": 0.23115938316512116, "grad_norm": 0.9854849847281505, "learning_rate": 8.977691608026112e-06, "loss": 0.6752, "step": 564 }, { "epoch": 0.23156924022747066, "grad_norm": 0.9242141558279566, "learning_rate": 8.973663784165705e-06, "loss": 0.6332, "step": 565 }, { "epoch": 0.23197909728982016, "grad_norm": 0.886412053284817, "learning_rate": 8.969628948521616e-06, "loss": 0.5911, "step": 566 }, { "epoch": 0.2323889543521697, "grad_norm": 0.9766102342060892, "learning_rate": 8.965587108213565e-06, "loss": 0.7151, "step": 567 }, { "epoch": 0.2327988114145192, "grad_norm": 0.9214020691749254, "learning_rate": 8.96153827037364e-06, "loss": 0.686, "step": 568 }, { "epoch": 0.2332086684768687, "grad_norm": 0.9072564226580885, "learning_rate": 8.957482442146271e-06, "loss": 0.6019, "step": 569 }, { "epoch": 0.2336185255392182, "grad_norm": 1.0961096582511347, "learning_rate": 8.953419630688229e-06, "loss": 0.6751, "step": 570 }, { "epoch": 0.2340283826015677, "grad_norm": 1.0569740107997059, "learning_rate": 8.949349843168605e-06, "loss": 0.6524, "step": 571 }, { "epoch": 0.23443823966391722, "grad_norm": 0.9523941348497468, "learning_rate": 8.945273086768798e-06, "loss": 0.6541, "step": 572 }, { "epoch": 0.23484809672626672, "grad_norm": 0.896414208120836, "learning_rate": 8.941189368682503e-06, "loss": 0.6296, "step": 573 }, { "epoch": 0.23525795378861622, "grad_norm": 0.9310681360799072, "learning_rate": 8.937098696115707e-06, "loss": 0.6183, "step": 574 }, { "epoch": 0.23566781085096572, "grad_norm": 0.9554555579090108, "learning_rate": 8.933001076286659e-06, "loss": 0.618, "step": 575 }, { "epoch": 0.23607766791331522, "grad_norm": 0.9351976381544213, "learning_rate": 8.92889651642587e-06, "loss": 0.6623, "step": 576 }, { "epoch": 0.23648752497566475, "grad_norm": 1.0043579392925355, "learning_rate": 8.924785023776102e-06, "loss": 0.6693, "step": 577 }, { "epoch": 0.23689738203801425, "grad_norm": 0.942704010499086, "learning_rate": 8.920666605592343e-06, "loss": 0.6763, "step": 578 }, { "epoch": 0.23730723910036375, "grad_norm": 0.9088698842921653, "learning_rate": 8.916541269141807e-06, "loss": 0.6464, "step": 579 }, { "epoch": 0.23771709616271325, "grad_norm": 0.9698743009285515, "learning_rate": 8.912409021703914e-06, "loss": 0.6604, "step": 580 }, { "epoch": 0.23812695322506275, "grad_norm": 0.950199352228834, "learning_rate": 8.908269870570278e-06, "loss": 0.6435, "step": 581 }, { "epoch": 0.23853681028741228, "grad_norm": 0.8318810958162008, "learning_rate": 8.904123823044694e-06, "loss": 0.6345, "step": 582 }, { "epoch": 0.23894666734976178, "grad_norm": 0.980538840852774, "learning_rate": 8.899970886443133e-06, "loss": 0.68, "step": 583 }, { "epoch": 0.23935652441211128, "grad_norm": 0.9255507093555948, "learning_rate": 8.895811068093711e-06, "loss": 0.6617, "step": 584 }, { "epoch": 0.23976638147446078, "grad_norm": 0.9889219755147091, "learning_rate": 8.891644375336696e-06, "loss": 0.6534, "step": 585 }, { "epoch": 0.24017623853681028, "grad_norm": 1.0015019613871579, "learning_rate": 8.887470815524486e-06, "loss": 0.6265, "step": 586 }, { "epoch": 0.2405860955991598, "grad_norm": 0.982363947378659, "learning_rate": 8.883290396021589e-06, "loss": 0.6969, "step": 587 }, { "epoch": 0.2409959526615093, "grad_norm": 0.9214104040735684, "learning_rate": 8.879103124204626e-06, "loss": 0.5782, "step": 588 }, { "epoch": 0.2414058097238588, "grad_norm": 0.9414090359393414, "learning_rate": 8.874909007462306e-06, "loss": 0.6563, "step": 589 }, { "epoch": 0.2418156667862083, "grad_norm": 0.8667718671656868, "learning_rate": 8.870708053195414e-06, "loss": 0.5895, "step": 590 }, { "epoch": 0.2422255238485578, "grad_norm": 0.9580086683366452, "learning_rate": 8.866500268816803e-06, "loss": 0.623, "step": 591 }, { "epoch": 0.24263538091090733, "grad_norm": 0.9280008828335006, "learning_rate": 8.862285661751375e-06, "loss": 0.6023, "step": 592 }, { "epoch": 0.24304523797325683, "grad_norm": 0.8048730520441765, "learning_rate": 8.858064239436077e-06, "loss": 0.6095, "step": 593 }, { "epoch": 0.24345509503560633, "grad_norm": 0.9274020095140763, "learning_rate": 8.85383600931988e-06, "loss": 0.6587, "step": 594 }, { "epoch": 0.24386495209795583, "grad_norm": 0.9324134564733432, "learning_rate": 8.849600978863762e-06, "loss": 0.63, "step": 595 }, { "epoch": 0.24427480916030533, "grad_norm": 0.912223711888179, "learning_rate": 8.845359155540705e-06, "loss": 0.6368, "step": 596 }, { "epoch": 0.24468466622265486, "grad_norm": 0.930652739731215, "learning_rate": 8.84111054683568e-06, "loss": 0.6272, "step": 597 }, { "epoch": 0.24509452328500436, "grad_norm": 1.0456790371379918, "learning_rate": 8.836855160245629e-06, "loss": 0.7112, "step": 598 }, { "epoch": 0.24550438034735386, "grad_norm": 1.0155553476063137, "learning_rate": 8.83259300327945e-06, "loss": 0.6411, "step": 599 }, { "epoch": 0.24591423740970336, "grad_norm": 0.9303283657198675, "learning_rate": 8.828324083457992e-06, "loss": 0.6625, "step": 600 }, { "epoch": 0.24632409447205286, "grad_norm": 0.9174451171480336, "learning_rate": 8.824048408314042e-06, "loss": 0.614, "step": 601 }, { "epoch": 0.2467339515344024, "grad_norm": 0.8989629758214904, "learning_rate": 8.819765985392297e-06, "loss": 0.5985, "step": 602 }, { "epoch": 0.2471438085967519, "grad_norm": 0.919697639767833, "learning_rate": 8.815476822249365e-06, "loss": 0.6644, "step": 603 }, { "epoch": 0.2475536656591014, "grad_norm": 0.9146124179239646, "learning_rate": 8.811180926453753e-06, "loss": 0.6095, "step": 604 }, { "epoch": 0.2479635227214509, "grad_norm": 0.9669091724678797, "learning_rate": 8.80687830558584e-06, "loss": 0.6585, "step": 605 }, { "epoch": 0.2483733797838004, "grad_norm": 0.8833082368107967, "learning_rate": 8.802568967237876e-06, "loss": 0.6071, "step": 606 }, { "epoch": 0.24878323684614992, "grad_norm": 0.8955666335436591, "learning_rate": 8.798252919013965e-06, "loss": 0.5924, "step": 607 }, { "epoch": 0.24919309390849942, "grad_norm": 0.8833424786684243, "learning_rate": 8.79393016853005e-06, "loss": 0.5898, "step": 608 }, { "epoch": 0.24960295097084892, "grad_norm": 0.9787548447046432, "learning_rate": 8.7896007234139e-06, "loss": 0.7031, "step": 609 }, { "epoch": 0.2500128080331984, "grad_norm": 0.9078696131369395, "learning_rate": 8.785264591305098e-06, "loss": 0.619, "step": 610 }, { "epoch": 0.25042266509554795, "grad_norm": 0.8947151923374936, "learning_rate": 8.780921779855025e-06, "loss": 0.6399, "step": 611 }, { "epoch": 0.2508325221578974, "grad_norm": 0.9225474838199544, "learning_rate": 8.776572296726851e-06, "loss": 0.6201, "step": 612 }, { "epoch": 0.25124237922024695, "grad_norm": 0.910295813934489, "learning_rate": 8.772216149595515e-06, "loss": 0.5727, "step": 613 }, { "epoch": 0.2516522362825964, "grad_norm": 0.9324494733078766, "learning_rate": 8.767853346147718e-06, "loss": 0.6325, "step": 614 }, { "epoch": 0.25206209334494595, "grad_norm": 0.9102967028168402, "learning_rate": 8.763483894081906e-06, "loss": 0.6648, "step": 615 }, { "epoch": 0.2524719504072955, "grad_norm": 0.9253587373713067, "learning_rate": 8.759107801108257e-06, "loss": 0.6484, "step": 616 }, { "epoch": 0.25288180746964495, "grad_norm": 0.885597055705801, "learning_rate": 8.754725074948663e-06, "loss": 0.6202, "step": 617 }, { "epoch": 0.2532916645319945, "grad_norm": 0.9230470246241361, "learning_rate": 8.750335723336729e-06, "loss": 0.6221, "step": 618 }, { "epoch": 0.25370152159434395, "grad_norm": 0.9000481822879236, "learning_rate": 8.745939754017744e-06, "loss": 0.6155, "step": 619 }, { "epoch": 0.2541113786566935, "grad_norm": 0.9796164482582176, "learning_rate": 8.741537174748678e-06, "loss": 0.6686, "step": 620 }, { "epoch": 0.254521235719043, "grad_norm": 0.9165267074600955, "learning_rate": 8.737127993298161e-06, "loss": 0.6229, "step": 621 }, { "epoch": 0.2549310927813925, "grad_norm": 0.8696647358928813, "learning_rate": 8.73271221744648e-06, "loss": 0.5941, "step": 622 }, { "epoch": 0.255340949843742, "grad_norm": 0.9925987025589506, "learning_rate": 8.728289854985547e-06, "loss": 0.6345, "step": 623 }, { "epoch": 0.2557508069060915, "grad_norm": 0.9704683086949344, "learning_rate": 8.72386091371891e-06, "loss": 0.6508, "step": 624 }, { "epoch": 0.256160663968441, "grad_norm": 0.9708619217125203, "learning_rate": 8.719425401461716e-06, "loss": 0.7137, "step": 625 }, { "epoch": 0.25657052103079053, "grad_norm": 0.8991304574173983, "learning_rate": 8.714983326040707e-06, "loss": 0.6137, "step": 626 }, { "epoch": 0.25698037809314, "grad_norm": 0.9858062736461212, "learning_rate": 8.710534695294215e-06, "loss": 0.6324, "step": 627 }, { "epoch": 0.25739023515548953, "grad_norm": 0.9431049492489176, "learning_rate": 8.706079517072129e-06, "loss": 0.6213, "step": 628 }, { "epoch": 0.257800092217839, "grad_norm": 0.8903528878074898, "learning_rate": 8.701617799235896e-06, "loss": 0.6056, "step": 629 }, { "epoch": 0.25820994928018853, "grad_norm": 0.9034287273388876, "learning_rate": 8.697149549658501e-06, "loss": 0.6304, "step": 630 }, { "epoch": 0.25861980634253806, "grad_norm": 0.9985923276259432, "learning_rate": 8.69267477622446e-06, "loss": 0.628, "step": 631 }, { "epoch": 0.25902966340488753, "grad_norm": 0.9144083895516348, "learning_rate": 8.688193486829795e-06, "loss": 0.657, "step": 632 }, { "epoch": 0.25943952046723706, "grad_norm": 0.9157360102110803, "learning_rate": 8.683705689382025e-06, "loss": 0.647, "step": 633 }, { "epoch": 0.25984937752958653, "grad_norm": 0.8830974637674581, "learning_rate": 8.67921139180016e-06, "loss": 0.6185, "step": 634 }, { "epoch": 0.26025923459193606, "grad_norm": 1.0009747553177724, "learning_rate": 8.674710602014672e-06, "loss": 0.7026, "step": 635 }, { "epoch": 0.2606690916542856, "grad_norm": 0.9359788153032813, "learning_rate": 8.67020332796749e-06, "loss": 0.6558, "step": 636 }, { "epoch": 0.26107894871663506, "grad_norm": 0.8691072756474223, "learning_rate": 8.665689577611993e-06, "loss": 0.6319, "step": 637 }, { "epoch": 0.2614888057789846, "grad_norm": 0.9393485229714715, "learning_rate": 8.661169358912977e-06, "loss": 0.6438, "step": 638 }, { "epoch": 0.26189866284133406, "grad_norm": 0.87688657572448, "learning_rate": 8.656642679846661e-06, "loss": 0.5903, "step": 639 }, { "epoch": 0.2623085199036836, "grad_norm": 0.9445784049565163, "learning_rate": 8.652109548400656e-06, "loss": 0.705, "step": 640 }, { "epoch": 0.2627183769660331, "grad_norm": 0.8696006378896625, "learning_rate": 8.647569972573962e-06, "loss": 0.6204, "step": 641 }, { "epoch": 0.2631282340283826, "grad_norm": 0.8480959124992289, "learning_rate": 8.643023960376953e-06, "loss": 0.6046, "step": 642 }, { "epoch": 0.2635380910907321, "grad_norm": 0.8947307637849959, "learning_rate": 8.638471519831358e-06, "loss": 0.6306, "step": 643 }, { "epoch": 0.2639479481530816, "grad_norm": 0.913733929140852, "learning_rate": 8.633912658970247e-06, "loss": 0.6525, "step": 644 }, { "epoch": 0.2643578052154311, "grad_norm": 0.9499269578436106, "learning_rate": 8.629347385838023e-06, "loss": 0.6351, "step": 645 }, { "epoch": 0.26476766227778065, "grad_norm": 0.9280453503469189, "learning_rate": 8.624775708490403e-06, "loss": 0.6163, "step": 646 }, { "epoch": 0.2651775193401301, "grad_norm": 0.9206869672257686, "learning_rate": 8.620197634994401e-06, "loss": 0.6495, "step": 647 }, { "epoch": 0.26558737640247965, "grad_norm": 0.8746643924177638, "learning_rate": 8.61561317342832e-06, "loss": 0.6409, "step": 648 }, { "epoch": 0.2659972334648291, "grad_norm": 0.9176170780091806, "learning_rate": 8.611022331881742e-06, "loss": 0.6351, "step": 649 }, { "epoch": 0.26640709052717865, "grad_norm": 0.9165924857968336, "learning_rate": 8.606425118455492e-06, "loss": 0.6324, "step": 650 }, { "epoch": 0.2668169475895282, "grad_norm": 0.9450499917590055, "learning_rate": 8.601821541261653e-06, "loss": 0.5495, "step": 651 }, { "epoch": 0.26722680465187765, "grad_norm": 0.8832337848949606, "learning_rate": 8.597211608423527e-06, "loss": 0.6477, "step": 652 }, { "epoch": 0.2676366617142272, "grad_norm": 0.9379251952627018, "learning_rate": 8.592595328075636e-06, "loss": 0.6631, "step": 653 }, { "epoch": 0.26804651877657665, "grad_norm": 0.8838200145611845, "learning_rate": 8.587972708363703e-06, "loss": 0.6275, "step": 654 }, { "epoch": 0.2684563758389262, "grad_norm": 0.9407422642750393, "learning_rate": 8.583343757444636e-06, "loss": 0.6364, "step": 655 }, { "epoch": 0.2688662329012757, "grad_norm": 0.9256025934183475, "learning_rate": 8.578708483486513e-06, "loss": 0.6188, "step": 656 }, { "epoch": 0.2692760899636252, "grad_norm": 0.8393932342608722, "learning_rate": 8.574066894668573e-06, "loss": 0.5956, "step": 657 }, { "epoch": 0.2696859470259747, "grad_norm": 1.0026736355698835, "learning_rate": 8.569418999181194e-06, "loss": 0.6597, "step": 658 }, { "epoch": 0.2700958040883242, "grad_norm": 2.2514238060255916, "learning_rate": 8.564764805225887e-06, "loss": 0.6664, "step": 659 }, { "epoch": 0.2705056611506737, "grad_norm": 0.9009540811471138, "learning_rate": 8.560104321015274e-06, "loss": 0.5923, "step": 660 }, { "epoch": 0.27091551821302323, "grad_norm": 0.9137839082128703, "learning_rate": 8.555437554773074e-06, "loss": 0.5878, "step": 661 }, { "epoch": 0.2713253752753727, "grad_norm": 0.8902893611231019, "learning_rate": 8.550764514734098e-06, "loss": 0.5919, "step": 662 }, { "epoch": 0.27173523233772223, "grad_norm": 0.9794615475030007, "learning_rate": 8.546085209144224e-06, "loss": 0.6298, "step": 663 }, { "epoch": 0.2721450894000717, "grad_norm": 4.6577353925060825, "learning_rate": 8.541399646260384e-06, "loss": 0.68, "step": 664 }, { "epoch": 0.27255494646242123, "grad_norm": 1.016688437124396, "learning_rate": 8.536707834350553e-06, "loss": 0.6745, "step": 665 }, { "epoch": 0.27296480352477076, "grad_norm": 0.973310323394724, "learning_rate": 8.532009781693736e-06, "loss": 0.6564, "step": 666 }, { "epoch": 0.27337466058712023, "grad_norm": 0.9841863112598264, "learning_rate": 8.52730549657994e-06, "loss": 0.6496, "step": 667 }, { "epoch": 0.27378451764946976, "grad_norm": 0.8836816779752752, "learning_rate": 8.522594987310184e-06, "loss": 0.5814, "step": 668 }, { "epoch": 0.27419437471181923, "grad_norm": 0.9904750902160162, "learning_rate": 8.517878262196462e-06, "loss": 0.5697, "step": 669 }, { "epoch": 0.27460423177416876, "grad_norm": 0.8944366817854605, "learning_rate": 8.513155329561731e-06, "loss": 0.6172, "step": 670 }, { "epoch": 0.2750140888365183, "grad_norm": 0.8798219095674473, "learning_rate": 8.508426197739914e-06, "loss": 0.6276, "step": 671 }, { "epoch": 0.27542394589886776, "grad_norm": 0.941329119845467, "learning_rate": 8.503690875075862e-06, "loss": 0.6435, "step": 672 }, { "epoch": 0.2758338029612173, "grad_norm": 0.9375163502081082, "learning_rate": 8.49894936992536e-06, "loss": 0.6461, "step": 673 }, { "epoch": 0.27624366002356676, "grad_norm": 0.8837407983816019, "learning_rate": 8.494201690655088e-06, "loss": 0.6677, "step": 674 }, { "epoch": 0.2766535170859163, "grad_norm": 0.9167815444837252, "learning_rate": 8.489447845642638e-06, "loss": 0.612, "step": 675 }, { "epoch": 0.2770633741482658, "grad_norm": 0.9737296950327986, "learning_rate": 8.48468784327647e-06, "loss": 0.632, "step": 676 }, { "epoch": 0.2774732312106153, "grad_norm": 0.9569523081196121, "learning_rate": 8.479921691955908e-06, "loss": 0.6231, "step": 677 }, { "epoch": 0.2778830882729648, "grad_norm": 0.8617241651861582, "learning_rate": 8.475149400091139e-06, "loss": 0.5781, "step": 678 }, { "epoch": 0.2782929453353143, "grad_norm": 0.9391609374121477, "learning_rate": 8.470370976103171e-06, "loss": 0.682, "step": 679 }, { "epoch": 0.2787028023976638, "grad_norm": 0.8938643579837721, "learning_rate": 8.46558642842384e-06, "loss": 0.6104, "step": 680 }, { "epoch": 0.27911265946001335, "grad_norm": 0.8801049040274936, "learning_rate": 8.460795765495785e-06, "loss": 0.6225, "step": 681 }, { "epoch": 0.2795225165223628, "grad_norm": 0.9502035342409964, "learning_rate": 8.45599899577244e-06, "loss": 0.635, "step": 682 }, { "epoch": 0.27993237358471235, "grad_norm": 0.9478382778276946, "learning_rate": 8.451196127718012e-06, "loss": 0.656, "step": 683 }, { "epoch": 0.2803422306470618, "grad_norm": 1.043886011454192, "learning_rate": 8.446387169807463e-06, "loss": 0.6379, "step": 684 }, { "epoch": 0.28075208770941135, "grad_norm": 0.9290791546699341, "learning_rate": 8.441572130526512e-06, "loss": 0.6492, "step": 685 }, { "epoch": 0.2811619447717609, "grad_norm": 0.9973378951662397, "learning_rate": 8.4367510183716e-06, "loss": 0.6198, "step": 686 }, { "epoch": 0.28157180183411035, "grad_norm": 1.0149840239453447, "learning_rate": 8.431923841849891e-06, "loss": 0.7149, "step": 687 }, { "epoch": 0.2819816588964599, "grad_norm": 1.0256654792175157, "learning_rate": 8.427090609479246e-06, "loss": 0.6252, "step": 688 }, { "epoch": 0.28239151595880935, "grad_norm": 0.9152345637318333, "learning_rate": 8.422251329788207e-06, "loss": 0.628, "step": 689 }, { "epoch": 0.2828013730211589, "grad_norm": 0.8820796588761827, "learning_rate": 8.417406011316e-06, "loss": 0.6149, "step": 690 }, { "epoch": 0.2832112300835084, "grad_norm": 0.9555730695508452, "learning_rate": 8.412554662612491e-06, "loss": 0.6461, "step": 691 }, { "epoch": 0.2836210871458579, "grad_norm": 0.9775983973207598, "learning_rate": 8.407697292238203e-06, "loss": 0.6676, "step": 692 }, { "epoch": 0.2840309442082074, "grad_norm": 0.921238396603472, "learning_rate": 8.40283390876427e-06, "loss": 0.6197, "step": 693 }, { "epoch": 0.2844408012705569, "grad_norm": 0.8877646973852603, "learning_rate": 8.397964520772446e-06, "loss": 0.6635, "step": 694 }, { "epoch": 0.2848506583329064, "grad_norm": 0.8758488315982328, "learning_rate": 8.393089136855079e-06, "loss": 0.5921, "step": 695 }, { "epoch": 0.28526051539525593, "grad_norm": 0.9597422603682262, "learning_rate": 8.388207765615091e-06, "loss": 0.6488, "step": 696 }, { "epoch": 0.2856703724576054, "grad_norm": 0.9072153349473043, "learning_rate": 8.383320415665977e-06, "loss": 0.6118, "step": 697 }, { "epoch": 0.28608022951995493, "grad_norm": 1.0029362466232405, "learning_rate": 8.378427095631776e-06, "loss": 0.6307, "step": 698 }, { "epoch": 0.2864900865823044, "grad_norm": 0.954627247124495, "learning_rate": 8.373527814147067e-06, "loss": 0.6648, "step": 699 }, { "epoch": 0.28689994364465393, "grad_norm": 0.9147237635924793, "learning_rate": 8.368622579856943e-06, "loss": 0.6739, "step": 700 }, { "epoch": 0.28730980070700346, "grad_norm": 0.9556000011922184, "learning_rate": 8.363711401417e-06, "loss": 0.684, "step": 701 }, { "epoch": 0.28771965776935293, "grad_norm": 0.9294198547473005, "learning_rate": 8.358794287493332e-06, "loss": 0.6157, "step": 702 }, { "epoch": 0.28812951483170246, "grad_norm": 0.9139699742225578, "learning_rate": 8.353871246762499e-06, "loss": 0.6052, "step": 703 }, { "epoch": 0.28853937189405193, "grad_norm": 0.901835126469778, "learning_rate": 8.34894228791152e-06, "loss": 0.5805, "step": 704 }, { "epoch": 0.28894922895640146, "grad_norm": 0.9198639884193597, "learning_rate": 8.34400741963786e-06, "loss": 0.6448, "step": 705 }, { "epoch": 0.289359086018751, "grad_norm": 0.9170977165619594, "learning_rate": 8.339066650649409e-06, "loss": 0.645, "step": 706 }, { "epoch": 0.28976894308110046, "grad_norm": 0.9833621123770627, "learning_rate": 8.334119989664465e-06, "loss": 0.6665, "step": 707 }, { "epoch": 0.29017880014345, "grad_norm": 0.968702923881669, "learning_rate": 8.329167445411733e-06, "loss": 0.6571, "step": 708 }, { "epoch": 0.29058865720579946, "grad_norm": 0.9550855310765778, "learning_rate": 8.324209026630293e-06, "loss": 0.6416, "step": 709 }, { "epoch": 0.290998514268149, "grad_norm": 0.9754478169638664, "learning_rate": 8.319244742069587e-06, "loss": 0.6857, "step": 710 }, { "epoch": 0.29140837133049846, "grad_norm": 0.8921853470788165, "learning_rate": 8.314274600489419e-06, "loss": 0.5992, "step": 711 }, { "epoch": 0.291818228392848, "grad_norm": 1.0062683512996866, "learning_rate": 8.309298610659917e-06, "loss": 0.6928, "step": 712 }, { "epoch": 0.2922280854551975, "grad_norm": 1.0100642758781135, "learning_rate": 8.304316781361534e-06, "loss": 0.6283, "step": 713 }, { "epoch": 0.292637942517547, "grad_norm": 0.8484844184652135, "learning_rate": 8.299329121385027e-06, "loss": 0.5803, "step": 714 }, { "epoch": 0.2930477995798965, "grad_norm": 0.8246284078765677, "learning_rate": 8.294335639531441e-06, "loss": 0.575, "step": 715 }, { "epoch": 0.293457656642246, "grad_norm": 0.9815809509212354, "learning_rate": 8.289336344612094e-06, "loss": 0.6834, "step": 716 }, { "epoch": 0.2938675137045955, "grad_norm": 0.8677394350742974, "learning_rate": 8.284331245448558e-06, "loss": 0.6117, "step": 717 }, { "epoch": 0.29427737076694505, "grad_norm": 0.9085577994986176, "learning_rate": 8.279320350872655e-06, "loss": 0.6102, "step": 718 }, { "epoch": 0.2946872278292945, "grad_norm": 0.8390598517026674, "learning_rate": 8.274303669726427e-06, "loss": 0.5711, "step": 719 }, { "epoch": 0.29509708489164405, "grad_norm": 0.9739680333197049, "learning_rate": 8.269281210862125e-06, "loss": 0.7113, "step": 720 }, { "epoch": 0.2955069419539935, "grad_norm": 0.9369252883882595, "learning_rate": 8.264252983142204e-06, "loss": 0.6517, "step": 721 }, { "epoch": 0.29591679901634305, "grad_norm": 0.9290425477445975, "learning_rate": 8.25921899543929e-06, "loss": 0.6046, "step": 722 }, { "epoch": 0.2963266560786926, "grad_norm": 0.9221360005386883, "learning_rate": 8.25417925663618e-06, "loss": 0.6452, "step": 723 }, { "epoch": 0.29673651314104205, "grad_norm": 0.9537935595481676, "learning_rate": 8.249133775625809e-06, "loss": 0.6233, "step": 724 }, { "epoch": 0.2971463702033916, "grad_norm": 0.9454715024003711, "learning_rate": 8.244082561311253e-06, "loss": 0.7009, "step": 725 }, { "epoch": 0.29755622726574105, "grad_norm": 0.9230581874199104, "learning_rate": 8.239025622605705e-06, "loss": 0.6504, "step": 726 }, { "epoch": 0.2979660843280906, "grad_norm": 0.8936872053169469, "learning_rate": 8.233962968432454e-06, "loss": 0.5919, "step": 727 }, { "epoch": 0.2983759413904401, "grad_norm": 0.8916772989038152, "learning_rate": 8.228894607724878e-06, "loss": 0.6162, "step": 728 }, { "epoch": 0.2987857984527896, "grad_norm": 0.9673512213338538, "learning_rate": 8.22382054942642e-06, "loss": 0.6666, "step": 729 }, { "epoch": 0.2991956555151391, "grad_norm": 0.911165964090236, "learning_rate": 8.218740802490587e-06, "loss": 0.6258, "step": 730 }, { "epoch": 0.2996055125774886, "grad_norm": 0.9999116978196771, "learning_rate": 8.213655375880912e-06, "loss": 0.6615, "step": 731 }, { "epoch": 0.3000153696398381, "grad_norm": 0.9603578872042428, "learning_rate": 8.208564278570955e-06, "loss": 0.5718, "step": 732 }, { "epoch": 0.30042522670218763, "grad_norm": 0.9581679609025451, "learning_rate": 8.203467519544285e-06, "loss": 0.6016, "step": 733 }, { "epoch": 0.3008350837645371, "grad_norm": 0.9746473123550218, "learning_rate": 8.198365107794457e-06, "loss": 0.6304, "step": 734 }, { "epoch": 0.30124494082688663, "grad_norm": 0.9050415221994591, "learning_rate": 8.193257052325004e-06, "loss": 0.6524, "step": 735 }, { "epoch": 0.3016547978892361, "grad_norm": 0.9739181694860202, "learning_rate": 8.188143362149417e-06, "loss": 0.6617, "step": 736 }, { "epoch": 0.30206465495158563, "grad_norm": 0.9021460836370256, "learning_rate": 8.183024046291127e-06, "loss": 0.6115, "step": 737 }, { "epoch": 0.30247451201393516, "grad_norm": 0.950666410783993, "learning_rate": 8.177899113783493e-06, "loss": 0.6719, "step": 738 }, { "epoch": 0.30288436907628463, "grad_norm": 0.8831446006933701, "learning_rate": 8.172768573669789e-06, "loss": 0.6247, "step": 739 }, { "epoch": 0.30329422613863416, "grad_norm": 0.9183705987276262, "learning_rate": 8.167632435003181e-06, "loss": 0.6263, "step": 740 }, { "epoch": 0.30370408320098363, "grad_norm": 0.8968230575291446, "learning_rate": 8.16249070684671e-06, "loss": 0.6219, "step": 741 }, { "epoch": 0.30411394026333316, "grad_norm": 0.9125435234490126, "learning_rate": 8.157343398273289e-06, "loss": 0.5997, "step": 742 }, { "epoch": 0.3045237973256827, "grad_norm": 0.836389035934933, "learning_rate": 8.15219051836567e-06, "loss": 0.5762, "step": 743 }, { "epoch": 0.30493365438803216, "grad_norm": 0.8905630130470841, "learning_rate": 8.147032076216439e-06, "loss": 0.6243, "step": 744 }, { "epoch": 0.3053435114503817, "grad_norm": 0.850590006431916, "learning_rate": 8.141868080927998e-06, "loss": 0.5844, "step": 745 }, { "epoch": 0.30575336851273116, "grad_norm": 0.8389999395831045, "learning_rate": 8.136698541612544e-06, "loss": 0.5949, "step": 746 }, { "epoch": 0.3061632255750807, "grad_norm": 0.8214342651690416, "learning_rate": 8.131523467392063e-06, "loss": 0.5538, "step": 747 }, { "epoch": 0.3065730826374302, "grad_norm": 0.8554195190905365, "learning_rate": 8.126342867398301e-06, "loss": 0.5878, "step": 748 }, { "epoch": 0.3069829396997797, "grad_norm": 0.918590715579228, "learning_rate": 8.121156750772761e-06, "loss": 0.6002, "step": 749 }, { "epoch": 0.3073927967621292, "grad_norm": 0.8692408367027756, "learning_rate": 8.115965126666673e-06, "loss": 0.6068, "step": 750 }, { "epoch": 0.3078026538244787, "grad_norm": 0.9774109598589927, "learning_rate": 8.110768004240993e-06, "loss": 0.6665, "step": 751 }, { "epoch": 0.3082125108868282, "grad_norm": 0.9155669186278471, "learning_rate": 8.105565392666378e-06, "loss": 0.6136, "step": 752 }, { "epoch": 0.30862236794917774, "grad_norm": 0.978929181706621, "learning_rate": 8.100357301123162e-06, "loss": 0.5679, "step": 753 }, { "epoch": 0.3090322250115272, "grad_norm": 0.8823101163906769, "learning_rate": 8.095143738801358e-06, "loss": 0.6315, "step": 754 }, { "epoch": 0.30944208207387675, "grad_norm": 0.8832363005757313, "learning_rate": 8.089924714900631e-06, "loss": 0.5948, "step": 755 }, { "epoch": 0.3098519391362262, "grad_norm": 1.051617568380832, "learning_rate": 8.084700238630283e-06, "loss": 0.6379, "step": 756 }, { "epoch": 0.31026179619857575, "grad_norm": 0.9548824217890742, "learning_rate": 8.079470319209236e-06, "loss": 0.6161, "step": 757 }, { "epoch": 0.3106716532609253, "grad_norm": 0.9890769204877043, "learning_rate": 8.074234965866013e-06, "loss": 0.6319, "step": 758 }, { "epoch": 0.31108151032327475, "grad_norm": 0.9030176352786934, "learning_rate": 8.068994187838733e-06, "loss": 0.6194, "step": 759 }, { "epoch": 0.3114913673856243, "grad_norm": 0.8841654201598095, "learning_rate": 8.063747994375085e-06, "loss": 0.6436, "step": 760 }, { "epoch": 0.31190122444797375, "grad_norm": 0.8988890252854391, "learning_rate": 8.058496394732309e-06, "loss": 0.6524, "step": 761 }, { "epoch": 0.3123110815103233, "grad_norm": 0.902191770785708, "learning_rate": 8.053239398177191e-06, "loss": 0.6447, "step": 762 }, { "epoch": 0.3127209385726728, "grad_norm": 0.9320760692899376, "learning_rate": 8.047977013986037e-06, "loss": 0.6935, "step": 763 }, { "epoch": 0.3131307956350223, "grad_norm": 0.8583575400783752, "learning_rate": 8.042709251444657e-06, "loss": 0.549, "step": 764 }, { "epoch": 0.3135406526973718, "grad_norm": 0.8572322210065338, "learning_rate": 8.03743611984836e-06, "loss": 0.6118, "step": 765 }, { "epoch": 0.3139505097597213, "grad_norm": 0.9209423770878413, "learning_rate": 8.03215762850192e-06, "loss": 0.6248, "step": 766 }, { "epoch": 0.3143603668220708, "grad_norm": 0.9016129795227299, "learning_rate": 8.026873786719574e-06, "loss": 0.6278, "step": 767 }, { "epoch": 0.31477022388442033, "grad_norm": 0.885127538582919, "learning_rate": 8.021584603824996e-06, "loss": 0.5806, "step": 768 }, { "epoch": 0.3151800809467698, "grad_norm": 0.9405924609444561, "learning_rate": 8.016290089151293e-06, "loss": 0.6063, "step": 769 }, { "epoch": 0.31558993800911933, "grad_norm": 0.9721783558464843, "learning_rate": 8.010990252040967e-06, "loss": 0.6314, "step": 770 }, { "epoch": 0.3159997950714688, "grad_norm": 0.9877145188779, "learning_rate": 8.005685101845923e-06, "loss": 0.6504, "step": 771 }, { "epoch": 0.31640965213381833, "grad_norm": 0.9603088090072154, "learning_rate": 8.000374647927437e-06, "loss": 0.6285, "step": 772 }, { "epoch": 0.31681950919616786, "grad_norm": 0.8358842007965986, "learning_rate": 7.995058899656146e-06, "loss": 0.6605, "step": 773 }, { "epoch": 0.31722936625851733, "grad_norm": 0.9137862157884057, "learning_rate": 7.989737866412025e-06, "loss": 0.6739, "step": 774 }, { "epoch": 0.31763922332086686, "grad_norm": 1.0161366840631083, "learning_rate": 7.984411557584377e-06, "loss": 0.6873, "step": 775 }, { "epoch": 0.31804908038321633, "grad_norm": 0.9287981956070639, "learning_rate": 7.979079982571818e-06, "loss": 0.6302, "step": 776 }, { "epoch": 0.31845893744556586, "grad_norm": 0.8566970758094414, "learning_rate": 7.97374315078225e-06, "loss": 0.6309, "step": 777 }, { "epoch": 0.3188687945079154, "grad_norm": 1.0181197406837985, "learning_rate": 7.968401071632854e-06, "loss": 0.6894, "step": 778 }, { "epoch": 0.31927865157026486, "grad_norm": 0.934545843942762, "learning_rate": 7.96305375455007e-06, "loss": 0.6536, "step": 779 }, { "epoch": 0.3196885086326144, "grad_norm": 0.9391137842084358, "learning_rate": 7.95770120896958e-06, "loss": 0.6178, "step": 780 }, { "epoch": 0.32009836569496386, "grad_norm": 0.8747712927818503, "learning_rate": 7.952343444336295e-06, "loss": 0.5835, "step": 781 }, { "epoch": 0.3205082227573134, "grad_norm": 0.9085282734389628, "learning_rate": 7.94698047010433e-06, "loss": 0.6315, "step": 782 }, { "epoch": 0.3209180798196629, "grad_norm": 0.8598437619177859, "learning_rate": 7.941612295737e-06, "loss": 0.5778, "step": 783 }, { "epoch": 0.3213279368820124, "grad_norm": 0.9029082636071951, "learning_rate": 7.93623893070679e-06, "loss": 0.6233, "step": 784 }, { "epoch": 0.3217377939443619, "grad_norm": 0.9615999707934129, "learning_rate": 7.930860384495342e-06, "loss": 0.6267, "step": 785 }, { "epoch": 0.3221476510067114, "grad_norm": 0.9178848434189926, "learning_rate": 7.92547666659345e-06, "loss": 0.6602, "step": 786 }, { "epoch": 0.3225575080690609, "grad_norm": 0.8867416545387492, "learning_rate": 7.920087786501025e-06, "loss": 0.6046, "step": 787 }, { "epoch": 0.32296736513141044, "grad_norm": 0.9546892732838623, "learning_rate": 7.914693753727093e-06, "loss": 0.5718, "step": 788 }, { "epoch": 0.3233772221937599, "grad_norm": 0.9391218458536794, "learning_rate": 7.909294577789765e-06, "loss": 0.6542, "step": 789 }, { "epoch": 0.32378707925610944, "grad_norm": 0.8987966459118402, "learning_rate": 7.903890268216237e-06, "loss": 0.6311, "step": 790 }, { "epoch": 0.3241969363184589, "grad_norm": 0.9105154724657066, "learning_rate": 7.898480834542755e-06, "loss": 0.6584, "step": 791 }, { "epoch": 0.32460679338080844, "grad_norm": 0.931956734028986, "learning_rate": 7.893066286314613e-06, "loss": 0.6496, "step": 792 }, { "epoch": 0.325016650443158, "grad_norm": 0.8745993255582439, "learning_rate": 7.887646633086126e-06, "loss": 0.6196, "step": 793 }, { "epoch": 0.32542650750550745, "grad_norm": 0.915921856669605, "learning_rate": 7.882221884420617e-06, "loss": 0.6322, "step": 794 }, { "epoch": 0.325836364567857, "grad_norm": 0.851710371280093, "learning_rate": 7.876792049890405e-06, "loss": 0.6001, "step": 795 }, { "epoch": 0.32624622163020645, "grad_norm": 0.8609237395885269, "learning_rate": 7.87135713907678e-06, "loss": 0.6115, "step": 796 }, { "epoch": 0.326656078692556, "grad_norm": 0.9760918655871068, "learning_rate": 7.865917161569987e-06, "loss": 0.6551, "step": 797 }, { "epoch": 0.3270659357549055, "grad_norm": 0.9082989747326331, "learning_rate": 7.860472126969213e-06, "loss": 0.6531, "step": 798 }, { "epoch": 0.327475792817255, "grad_norm": 0.8970187648365274, "learning_rate": 7.855022044882572e-06, "loss": 0.6331, "step": 799 }, { "epoch": 0.3278856498796045, "grad_norm": 0.9613976267988735, "learning_rate": 7.849566924927082e-06, "loss": 0.6263, "step": 800 }, { "epoch": 0.328295506941954, "grad_norm": 0.9182352691678264, "learning_rate": 7.84410677672865e-06, "loss": 0.5984, "step": 801 }, { "epoch": 0.3287053640043035, "grad_norm": 0.9621655202233856, "learning_rate": 7.838641609922057e-06, "loss": 0.6728, "step": 802 }, { "epoch": 0.32911522106665303, "grad_norm": 0.926391165891455, "learning_rate": 7.833171434150939e-06, "loss": 0.6425, "step": 803 }, { "epoch": 0.3295250781290025, "grad_norm": 0.8541024980611409, "learning_rate": 7.82769625906777e-06, "loss": 0.6344, "step": 804 }, { "epoch": 0.32993493519135203, "grad_norm": 0.9183525524647806, "learning_rate": 7.822216094333847e-06, "loss": 0.6062, "step": 805 }, { "epoch": 0.3303447922537015, "grad_norm": 0.8821898722350946, "learning_rate": 7.81673094961927e-06, "loss": 0.6476, "step": 806 }, { "epoch": 0.33075464931605103, "grad_norm": 0.9405161084841117, "learning_rate": 7.811240834602929e-06, "loss": 0.6623, "step": 807 }, { "epoch": 0.33116450637840056, "grad_norm": 0.9275946276886832, "learning_rate": 7.805745758972482e-06, "loss": 0.6577, "step": 808 }, { "epoch": 0.33157436344075003, "grad_norm": 0.8765827597482941, "learning_rate": 7.80024573242434e-06, "loss": 0.5781, "step": 809 }, { "epoch": 0.33198422050309956, "grad_norm": 0.8632522841477129, "learning_rate": 7.794740764663653e-06, "loss": 0.6065, "step": 810 }, { "epoch": 0.33239407756544903, "grad_norm": 0.9133232897887319, "learning_rate": 7.789230865404287e-06, "loss": 0.6639, "step": 811 }, { "epoch": 0.33280393462779856, "grad_norm": 0.9688724996067842, "learning_rate": 7.783716044368812e-06, "loss": 0.6818, "step": 812 }, { "epoch": 0.3332137916901481, "grad_norm": 0.9309180234059397, "learning_rate": 7.778196311288484e-06, "loss": 0.6093, "step": 813 }, { "epoch": 0.33362364875249756, "grad_norm": 0.7981993744317325, "learning_rate": 7.772671675903222e-06, "loss": 0.6532, "step": 814 }, { "epoch": 0.3340335058148471, "grad_norm": 0.9372811600794941, "learning_rate": 7.767142147961598e-06, "loss": 0.658, "step": 815 }, { "epoch": 0.33444336287719656, "grad_norm": 0.9994933311706636, "learning_rate": 7.76160773722082e-06, "loss": 0.6716, "step": 816 }, { "epoch": 0.3348532199395461, "grad_norm": 0.8871540380550142, "learning_rate": 7.756068453446707e-06, "loss": 0.5693, "step": 817 }, { "epoch": 0.3352630770018956, "grad_norm": 0.8798510736514145, "learning_rate": 7.750524306413681e-06, "loss": 0.655, "step": 818 }, { "epoch": 0.3356729340642451, "grad_norm": 0.9400949361683093, "learning_rate": 7.744975305904742e-06, "loss": 0.6515, "step": 819 }, { "epoch": 0.3360827911265946, "grad_norm": 0.8808034927577336, "learning_rate": 7.739421461711457e-06, "loss": 0.5925, "step": 820 }, { "epoch": 0.3364926481889441, "grad_norm": 0.8837080311911811, "learning_rate": 7.73386278363394e-06, "loss": 0.5894, "step": 821 }, { "epoch": 0.3369025052512936, "grad_norm": 0.8974936026278227, "learning_rate": 7.728299281480833e-06, "loss": 0.654, "step": 822 }, { "epoch": 0.33731236231364314, "grad_norm": 0.861379854492864, "learning_rate": 7.722730965069291e-06, "loss": 0.5725, "step": 823 }, { "epoch": 0.3377222193759926, "grad_norm": 0.9678666371710299, "learning_rate": 7.717157844224962e-06, "loss": 0.6692, "step": 824 }, { "epoch": 0.33813207643834214, "grad_norm": 0.9330962447862109, "learning_rate": 7.711579928781977e-06, "loss": 0.6907, "step": 825 }, { "epoch": 0.3385419335006916, "grad_norm": 0.8752760763425423, "learning_rate": 7.705997228582924e-06, "loss": 0.6095, "step": 826 }, { "epoch": 0.33895179056304114, "grad_norm": 0.9177930028211304, "learning_rate": 7.700409753478832e-06, "loss": 0.6343, "step": 827 }, { "epoch": 0.33936164762539067, "grad_norm": 0.9224435544014402, "learning_rate": 7.694817513329159e-06, "loss": 0.6431, "step": 828 }, { "epoch": 0.33977150468774014, "grad_norm": 0.8711457449460842, "learning_rate": 7.689220518001771e-06, "loss": 0.6187, "step": 829 }, { "epoch": 0.3401813617500897, "grad_norm": 0.9060459228019416, "learning_rate": 7.683618777372923e-06, "loss": 0.6536, "step": 830 }, { "epoch": 0.34059121881243914, "grad_norm": 0.8645443880862171, "learning_rate": 7.678012301327244e-06, "loss": 0.5648, "step": 831 }, { "epoch": 0.3410010758747887, "grad_norm": 0.849352651839283, "learning_rate": 7.672401099757722e-06, "loss": 0.6077, "step": 832 }, { "epoch": 0.3414109329371382, "grad_norm": 0.9789808530727929, "learning_rate": 7.666785182565676e-06, "loss": 0.6313, "step": 833 }, { "epoch": 0.3418207899994877, "grad_norm": 0.932899523145675, "learning_rate": 7.661164559660754e-06, "loss": 0.6395, "step": 834 }, { "epoch": 0.3422306470618372, "grad_norm": 0.8229426090610606, "learning_rate": 7.655539240960905e-06, "loss": 0.5995, "step": 835 }, { "epoch": 0.3426405041241867, "grad_norm": 0.9937682251294773, "learning_rate": 7.649909236392361e-06, "loss": 0.6561, "step": 836 }, { "epoch": 0.3430503611865362, "grad_norm": 0.9345959328766529, "learning_rate": 7.644274555889625e-06, "loss": 0.6053, "step": 837 }, { "epoch": 0.3434602182488857, "grad_norm": 0.9340255101816487, "learning_rate": 7.638635209395454e-06, "loss": 0.676, "step": 838 }, { "epoch": 0.3438700753112352, "grad_norm": 0.9790666094667553, "learning_rate": 7.632991206860831e-06, "loss": 0.6453, "step": 839 }, { "epoch": 0.34427993237358473, "grad_norm": 0.8676322649497654, "learning_rate": 7.627342558244963e-06, "loss": 0.553, "step": 840 }, { "epoch": 0.3446897894359342, "grad_norm": 0.8296517289408768, "learning_rate": 7.621689273515248e-06, "loss": 0.5477, "step": 841 }, { "epoch": 0.34509964649828373, "grad_norm": 0.8985636213411187, "learning_rate": 7.61603136264727e-06, "loss": 0.6586, "step": 842 }, { "epoch": 0.3455095035606332, "grad_norm": 0.8597856626488813, "learning_rate": 7.6103688356247755e-06, "loss": 0.6059, "step": 843 }, { "epoch": 0.34591936062298273, "grad_norm": 0.8714011571880431, "learning_rate": 7.604701702439652e-06, "loss": 0.6341, "step": 844 }, { "epoch": 0.34632921768533226, "grad_norm": 0.9103146536165035, "learning_rate": 7.599029973091921e-06, "loss": 0.636, "step": 845 }, { "epoch": 0.34673907474768173, "grad_norm": 0.8819310809343326, "learning_rate": 7.59335365758971e-06, "loss": 0.6267, "step": 846 }, { "epoch": 0.34714893181003126, "grad_norm": 0.8431416833873004, "learning_rate": 7.5876727659492414e-06, "loss": 0.58, "step": 847 }, { "epoch": 0.34755878887238073, "grad_norm": 0.9885654682823908, "learning_rate": 7.5819873081948105e-06, "loss": 0.6219, "step": 848 }, { "epoch": 0.34796864593473026, "grad_norm": 0.9186670954024708, "learning_rate": 7.576297294358772e-06, "loss": 0.7024, "step": 849 }, { "epoch": 0.3483785029970798, "grad_norm": 0.8911553854983014, "learning_rate": 7.570602734481517e-06, "loss": 0.6119, "step": 850 }, { "epoch": 0.34878836005942926, "grad_norm": 0.8941040555584264, "learning_rate": 7.564903638611463e-06, "loss": 0.6474, "step": 851 }, { "epoch": 0.3491982171217788, "grad_norm": 0.8885613388823898, "learning_rate": 7.559200016805027e-06, "loss": 0.63, "step": 852 }, { "epoch": 0.34960807418412826, "grad_norm": 0.9366390942594093, "learning_rate": 7.553491879126613e-06, "loss": 0.6595, "step": 853 }, { "epoch": 0.3500179312464778, "grad_norm": 0.9092904387822276, "learning_rate": 7.547779235648598e-06, "loss": 0.6407, "step": 854 }, { "epoch": 0.3504277883088273, "grad_norm": 0.9051701207799567, "learning_rate": 7.542062096451306e-06, "loss": 0.6146, "step": 855 }, { "epoch": 0.3508376453711768, "grad_norm": 0.9647812086414473, "learning_rate": 7.536340471622991e-06, "loss": 0.6768, "step": 856 }, { "epoch": 0.3512475024335263, "grad_norm": 0.82605540183469, "learning_rate": 7.53061437125983e-06, "loss": 0.5775, "step": 857 }, { "epoch": 0.3516573594958758, "grad_norm": 0.9201502462794282, "learning_rate": 7.524883805465889e-06, "loss": 0.6266, "step": 858 }, { "epoch": 0.3520672165582253, "grad_norm": 0.8988083582915508, "learning_rate": 7.519148784353122e-06, "loss": 0.6116, "step": 859 }, { "epoch": 0.35247707362057484, "grad_norm": 0.9062896789005043, "learning_rate": 7.513409318041336e-06, "loss": 0.6419, "step": 860 }, { "epoch": 0.3528869306829243, "grad_norm": 0.8465626957302279, "learning_rate": 7.507665416658188e-06, "loss": 0.6088, "step": 861 }, { "epoch": 0.35329678774527384, "grad_norm": 0.8802985208161099, "learning_rate": 7.501917090339158e-06, "loss": 0.6055, "step": 862 }, { "epoch": 0.3537066448076233, "grad_norm": 0.8913583333896032, "learning_rate": 7.496164349227537e-06, "loss": 0.6158, "step": 863 }, { "epoch": 0.35411650186997284, "grad_norm": 0.9029468615941765, "learning_rate": 7.490407203474403e-06, "loss": 0.611, "step": 864 }, { "epoch": 0.35452635893232237, "grad_norm": 0.8767100752818348, "learning_rate": 7.48464566323861e-06, "loss": 0.6424, "step": 865 }, { "epoch": 0.35493621599467184, "grad_norm": 0.906626838461339, "learning_rate": 7.4788797386867596e-06, "loss": 0.5458, "step": 866 }, { "epoch": 0.35534607305702137, "grad_norm": 0.9208390152656397, "learning_rate": 7.473109439993199e-06, "loss": 0.6279, "step": 867 }, { "epoch": 0.35575593011937084, "grad_norm": 1.0210116202749338, "learning_rate": 7.467334777339985e-06, "loss": 0.6629, "step": 868 }, { "epoch": 0.3561657871817204, "grad_norm": 0.8493588762943369, "learning_rate": 7.46155576091688e-06, "loss": 0.6404, "step": 869 }, { "epoch": 0.3565756442440699, "grad_norm": 0.8634293851343712, "learning_rate": 7.4557724009213276e-06, "loss": 0.5981, "step": 870 }, { "epoch": 0.3569855013064194, "grad_norm": 0.9004281706475237, "learning_rate": 7.449984707558437e-06, "loss": 0.6403, "step": 871 }, { "epoch": 0.3573953583687689, "grad_norm": 0.8340249602285648, "learning_rate": 7.444192691040963e-06, "loss": 0.5896, "step": 872 }, { "epoch": 0.3578052154311184, "grad_norm": 0.9213028807087345, "learning_rate": 7.438396361589288e-06, "loss": 0.6036, "step": 873 }, { "epoch": 0.3582150724934679, "grad_norm": 0.8843298180627532, "learning_rate": 7.432595729431408e-06, "loss": 0.6105, "step": 874 }, { "epoch": 0.35862492955581743, "grad_norm": 0.9231583852212196, "learning_rate": 7.426790804802907e-06, "loss": 0.6082, "step": 875 }, { "epoch": 0.3590347866181669, "grad_norm": 0.829860314697199, "learning_rate": 7.420981597946945e-06, "loss": 0.6287, "step": 876 }, { "epoch": 0.35944464368051643, "grad_norm": 0.8957700087040924, "learning_rate": 7.41516811911424e-06, "loss": 0.6063, "step": 877 }, { "epoch": 0.3598545007428659, "grad_norm": 0.8676424805373951, "learning_rate": 7.409350378563047e-06, "loss": 0.6166, "step": 878 }, { "epoch": 0.36026435780521543, "grad_norm": 0.904333979065802, "learning_rate": 7.403528386559138e-06, "loss": 0.5862, "step": 879 }, { "epoch": 0.36067421486756496, "grad_norm": 0.9056873753422969, "learning_rate": 7.397702153375795e-06, "loss": 0.6253, "step": 880 }, { "epoch": 0.36108407192991443, "grad_norm": 0.8994872881890624, "learning_rate": 7.391871689293774e-06, "loss": 0.6392, "step": 881 }, { "epoch": 0.36149392899226396, "grad_norm": 0.8974519573478622, "learning_rate": 7.386037004601306e-06, "loss": 0.6649, "step": 882 }, { "epoch": 0.36190378605461343, "grad_norm": 0.8504060532137677, "learning_rate": 7.380198109594063e-06, "loss": 0.5931, "step": 883 }, { "epoch": 0.36231364311696296, "grad_norm": 0.9002926638579786, "learning_rate": 7.374355014575148e-06, "loss": 0.6308, "step": 884 }, { "epoch": 0.3627235001793125, "grad_norm": 0.978828783395832, "learning_rate": 7.368507729855074e-06, "loss": 0.6717, "step": 885 }, { "epoch": 0.36313335724166196, "grad_norm": 0.9272430428983665, "learning_rate": 7.362656265751752e-06, "loss": 0.6308, "step": 886 }, { "epoch": 0.3635432143040115, "grad_norm": 0.9388056856902545, "learning_rate": 7.356800632590462e-06, "loss": 0.6747, "step": 887 }, { "epoch": 0.36395307136636096, "grad_norm": 0.8624326564035893, "learning_rate": 7.350940840703842e-06, "loss": 0.6538, "step": 888 }, { "epoch": 0.3643629284287105, "grad_norm": 0.9255270815799045, "learning_rate": 7.3450769004318714e-06, "loss": 0.6423, "step": 889 }, { "epoch": 0.36477278549106, "grad_norm": 0.9546159762602388, "learning_rate": 7.339208822121847e-06, "loss": 0.6315, "step": 890 }, { "epoch": 0.3651826425534095, "grad_norm": 0.9073379193528323, "learning_rate": 7.333336616128369e-06, "loss": 0.6247, "step": 891 }, { "epoch": 0.365592499615759, "grad_norm": 0.9026575721052461, "learning_rate": 7.327460292813319e-06, "loss": 0.62, "step": 892 }, { "epoch": 0.3660023566781085, "grad_norm": 0.9445188304895413, "learning_rate": 7.3215798625458445e-06, "loss": 0.6029, "step": 893 }, { "epoch": 0.366412213740458, "grad_norm": 0.8542055354118856, "learning_rate": 7.315695335702341e-06, "loss": 0.6176, "step": 894 }, { "epoch": 0.36682207080280754, "grad_norm": 0.9469223590356858, "learning_rate": 7.3098067226664315e-06, "loss": 0.6209, "step": 895 }, { "epoch": 0.367231927865157, "grad_norm": 0.9239697413497688, "learning_rate": 7.303914033828951e-06, "loss": 0.6288, "step": 896 }, { "epoch": 0.36764178492750654, "grad_norm": 0.9013983709072323, "learning_rate": 7.2980172795879255e-06, "loss": 0.6673, "step": 897 }, { "epoch": 0.368051641989856, "grad_norm": 0.9075326363082719, "learning_rate": 7.292116470348554e-06, "loss": 0.6156, "step": 898 }, { "epoch": 0.36846149905220554, "grad_norm": 0.8856235709339374, "learning_rate": 7.286211616523193e-06, "loss": 0.6203, "step": 899 }, { "epoch": 0.36887135611455507, "grad_norm": 0.8496073841571679, "learning_rate": 7.280302728531333e-06, "loss": 0.626, "step": 900 }, { "epoch": 0.36928121317690454, "grad_norm": 0.9484757117247062, "learning_rate": 7.274389816799585e-06, "loss": 0.6523, "step": 901 }, { "epoch": 0.36969107023925407, "grad_norm": 0.8485292996467835, "learning_rate": 7.26847289176166e-06, "loss": 0.6296, "step": 902 }, { "epoch": 0.37010092730160354, "grad_norm": 0.8961902198382509, "learning_rate": 7.2625519638583485e-06, "loss": 0.6264, "step": 903 }, { "epoch": 0.37051078436395307, "grad_norm": 0.9287827187351518, "learning_rate": 7.256627043537508e-06, "loss": 0.568, "step": 904 }, { "epoch": 0.3709206414263026, "grad_norm": 0.9016810468434724, "learning_rate": 7.250698141254038e-06, "loss": 0.6522, "step": 905 }, { "epoch": 0.37133049848865207, "grad_norm": 0.9174845159085119, "learning_rate": 7.244765267469867e-06, "loss": 0.631, "step": 906 }, { "epoch": 0.3717403555510016, "grad_norm": 0.8968043041955962, "learning_rate": 7.238828432653928e-06, "loss": 0.6667, "step": 907 }, { "epoch": 0.3721502126133511, "grad_norm": 0.8950007277723915, "learning_rate": 7.232887647282147e-06, "loss": 0.6233, "step": 908 }, { "epoch": 0.3725600696757006, "grad_norm": 0.97231187989343, "learning_rate": 7.226942921837419e-06, "loss": 0.6546, "step": 909 }, { "epoch": 0.37296992673805013, "grad_norm": 0.962830883212956, "learning_rate": 7.220994266809591e-06, "loss": 0.6644, "step": 910 }, { "epoch": 0.3733797838003996, "grad_norm": 0.9259615305665083, "learning_rate": 7.215041692695446e-06, "loss": 0.6287, "step": 911 }, { "epoch": 0.37378964086274913, "grad_norm": 0.8610209172422043, "learning_rate": 7.209085209998681e-06, "loss": 0.5446, "step": 912 }, { "epoch": 0.3741994979250986, "grad_norm": 0.9416549643604464, "learning_rate": 7.2031248292298905e-06, "loss": 0.6774, "step": 913 }, { "epoch": 0.37460935498744813, "grad_norm": 0.8924670471973993, "learning_rate": 7.197160560906547e-06, "loss": 0.5982, "step": 914 }, { "epoch": 0.37501921204979766, "grad_norm": 0.9553614398635903, "learning_rate": 7.191192415552983e-06, "loss": 0.6672, "step": 915 }, { "epoch": 0.37542906911214713, "grad_norm": 0.9483459311309098, "learning_rate": 7.185220403700373e-06, "loss": 0.5998, "step": 916 }, { "epoch": 0.37583892617449666, "grad_norm": 0.9059016386996558, "learning_rate": 7.179244535886715e-06, "loss": 0.6213, "step": 917 }, { "epoch": 0.37624878323684613, "grad_norm": 0.9153623938356362, "learning_rate": 7.1732648226568055e-06, "loss": 0.6821, "step": 918 }, { "epoch": 0.37665864029919566, "grad_norm": 0.9455193631304288, "learning_rate": 7.167281274562235e-06, "loss": 0.6298, "step": 919 }, { "epoch": 0.3770684973615452, "grad_norm": 0.8769185562981799, "learning_rate": 7.161293902161353e-06, "loss": 0.6156, "step": 920 }, { "epoch": 0.37747835442389466, "grad_norm": 0.8968180698908108, "learning_rate": 7.155302716019263e-06, "loss": 0.5935, "step": 921 }, { "epoch": 0.3778882114862442, "grad_norm": 0.8580285658874974, "learning_rate": 7.149307726707795e-06, "loss": 0.611, "step": 922 }, { "epoch": 0.37829806854859366, "grad_norm": 0.8713353706287862, "learning_rate": 7.143308944805491e-06, "loss": 0.6117, "step": 923 }, { "epoch": 0.3787079256109432, "grad_norm": 0.9508709320838208, "learning_rate": 7.137306380897585e-06, "loss": 0.6012, "step": 924 }, { "epoch": 0.3791177826732927, "grad_norm": 0.913799829074204, "learning_rate": 7.131300045575984e-06, "loss": 0.6346, "step": 925 }, { "epoch": 0.3795276397356422, "grad_norm": 0.9241633619335718, "learning_rate": 7.125289949439251e-06, "loss": 0.6543, "step": 926 }, { "epoch": 0.3799374967979917, "grad_norm": 0.9507110226067006, "learning_rate": 7.119276103092585e-06, "loss": 0.6604, "step": 927 }, { "epoch": 0.3803473538603412, "grad_norm": 0.7922364762788225, "learning_rate": 7.113258517147802e-06, "loss": 0.5459, "step": 928 }, { "epoch": 0.3807572109226907, "grad_norm": 0.8628749253809763, "learning_rate": 7.107237202223316e-06, "loss": 0.6083, "step": 929 }, { "epoch": 0.38116706798504024, "grad_norm": 0.9131553899219389, "learning_rate": 7.101212168944124e-06, "loss": 0.6176, "step": 930 }, { "epoch": 0.3815769250473897, "grad_norm": 0.8591053562125689, "learning_rate": 7.095183427941781e-06, "loss": 0.6075, "step": 931 }, { "epoch": 0.38198678210973924, "grad_norm": 0.9322055784616016, "learning_rate": 7.089150989854385e-06, "loss": 0.6284, "step": 932 }, { "epoch": 0.3823966391720887, "grad_norm": 0.8777102312780416, "learning_rate": 7.08311486532656e-06, "loss": 0.5989, "step": 933 }, { "epoch": 0.38280649623443824, "grad_norm": 0.9338146803832802, "learning_rate": 7.0770750650094335e-06, "loss": 0.6375, "step": 934 }, { "epoch": 0.38321635329678777, "grad_norm": 0.9437888194437303, "learning_rate": 7.071031599560617e-06, "loss": 0.6266, "step": 935 }, { "epoch": 0.38362621035913724, "grad_norm": 0.9139745810318491, "learning_rate": 7.064984479644193e-06, "loss": 0.6164, "step": 936 }, { "epoch": 0.38403606742148677, "grad_norm": 0.8647299547152973, "learning_rate": 7.058933715930693e-06, "loss": 0.5872, "step": 937 }, { "epoch": 0.38444592448383624, "grad_norm": 0.8414152163300519, "learning_rate": 7.052879319097072e-06, "loss": 0.5769, "step": 938 }, { "epoch": 0.38485578154618577, "grad_norm": 0.9309031696054278, "learning_rate": 7.046821299826703e-06, "loss": 0.6393, "step": 939 }, { "epoch": 0.3852656386085353, "grad_norm": 0.939751869627146, "learning_rate": 7.040759668809348e-06, "loss": 0.5968, "step": 940 }, { "epoch": 0.38567549567088477, "grad_norm": 0.8873543586748119, "learning_rate": 7.0346944367411414e-06, "loss": 0.5799, "step": 941 }, { "epoch": 0.3860853527332343, "grad_norm": 0.8441568637166218, "learning_rate": 7.028625614324574e-06, "loss": 0.5504, "step": 942 }, { "epoch": 0.38649520979558377, "grad_norm": 0.9134776497218471, "learning_rate": 7.022553212268469e-06, "loss": 0.6534, "step": 943 }, { "epoch": 0.3869050668579333, "grad_norm": 0.9367615863246458, "learning_rate": 7.016477241287969e-06, "loss": 0.6293, "step": 944 }, { "epoch": 0.3873149239202828, "grad_norm": 0.9105095862565333, "learning_rate": 7.010397712104513e-06, "loss": 0.5795, "step": 945 }, { "epoch": 0.3877247809826323, "grad_norm": 0.9301045487363986, "learning_rate": 7.0043146354458195e-06, "loss": 0.6468, "step": 946 }, { "epoch": 0.38813463804498183, "grad_norm": 0.8475351201149668, "learning_rate": 6.998228022045864e-06, "loss": 0.6383, "step": 947 }, { "epoch": 0.3885444951073313, "grad_norm": 0.8694968077830771, "learning_rate": 6.9921378826448686e-06, "loss": 0.5712, "step": 948 }, { "epoch": 0.38895435216968083, "grad_norm": 0.9299498224008264, "learning_rate": 6.9860442279892686e-06, "loss": 0.68, "step": 949 }, { "epoch": 0.38936420923203036, "grad_norm": 0.9406137260678079, "learning_rate": 6.979947068831709e-06, "loss": 0.5706, "step": 950 }, { "epoch": 0.38977406629437983, "grad_norm": 0.8865134186172849, "learning_rate": 6.973846415931018e-06, "loss": 0.6187, "step": 951 }, { "epoch": 0.39018392335672936, "grad_norm": 0.8813056898175655, "learning_rate": 6.967742280052187e-06, "loss": 0.5906, "step": 952 }, { "epoch": 0.39059378041907883, "grad_norm": 0.9576638973713656, "learning_rate": 6.961634671966352e-06, "loss": 0.6455, "step": 953 }, { "epoch": 0.39100363748142836, "grad_norm": 0.8877462123033258, "learning_rate": 6.95552360245078e-06, "loss": 0.6633, "step": 954 }, { "epoch": 0.3914134945437779, "grad_norm": 0.8802558099346542, "learning_rate": 6.949409082288844e-06, "loss": 0.6477, "step": 955 }, { "epoch": 0.39182335160612736, "grad_norm": 0.8701657539214442, "learning_rate": 6.943291122270004e-06, "loss": 0.6139, "step": 956 }, { "epoch": 0.3922332086684769, "grad_norm": 0.9758955253096684, "learning_rate": 6.937169733189793e-06, "loss": 0.6206, "step": 957 }, { "epoch": 0.39264306573082636, "grad_norm": 0.8759700502454508, "learning_rate": 6.931044925849789e-06, "loss": 0.5806, "step": 958 }, { "epoch": 0.3930529227931759, "grad_norm": 0.9252391981328264, "learning_rate": 6.924916711057611e-06, "loss": 0.6309, "step": 959 }, { "epoch": 0.3934627798555254, "grad_norm": 0.8712951145627621, "learning_rate": 6.918785099626883e-06, "loss": 0.6368, "step": 960 }, { "epoch": 0.3938726369178749, "grad_norm": 0.91818511984685, "learning_rate": 6.912650102377227e-06, "loss": 0.6124, "step": 961 }, { "epoch": 0.3942824939802244, "grad_norm": 0.8631297805231067, "learning_rate": 6.906511730134238e-06, "loss": 0.5966, "step": 962 }, { "epoch": 0.3946923510425739, "grad_norm": 0.9363453421810264, "learning_rate": 6.900369993729462e-06, "loss": 0.6461, "step": 963 }, { "epoch": 0.3951022081049234, "grad_norm": 0.9349421805695657, "learning_rate": 6.89422490400039e-06, "loss": 0.6207, "step": 964 }, { "epoch": 0.3955120651672729, "grad_norm": 0.907468805942568, "learning_rate": 6.888076471790423e-06, "loss": 0.6449, "step": 965 }, { "epoch": 0.3959219222296224, "grad_norm": 0.931178283377802, "learning_rate": 6.881924707948864e-06, "loss": 0.6306, "step": 966 }, { "epoch": 0.39633177929197194, "grad_norm": 0.8972442391921164, "learning_rate": 6.875769623330892e-06, "loss": 0.6427, "step": 967 }, { "epoch": 0.3967416363543214, "grad_norm": 0.9206093894599513, "learning_rate": 6.869611228797547e-06, "loss": 0.623, "step": 968 }, { "epoch": 0.39715149341667094, "grad_norm": 0.8882164744124571, "learning_rate": 6.863449535215711e-06, "loss": 0.6103, "step": 969 }, { "epoch": 0.3975613504790204, "grad_norm": 0.9218674456849281, "learning_rate": 6.857284553458085e-06, "loss": 0.6303, "step": 970 }, { "epoch": 0.39797120754136994, "grad_norm": 0.9926370180288921, "learning_rate": 6.851116294403177e-06, "loss": 0.6447, "step": 971 }, { "epoch": 0.39838106460371947, "grad_norm": 0.8890293360533097, "learning_rate": 6.8449447689352684e-06, "loss": 0.6643, "step": 972 }, { "epoch": 0.39879092166606894, "grad_norm": 0.9264750734474754, "learning_rate": 6.838769987944414e-06, "loss": 0.6149, "step": 973 }, { "epoch": 0.39920077872841847, "grad_norm": 0.8979245863308853, "learning_rate": 6.832591962326408e-06, "loss": 0.6112, "step": 974 }, { "epoch": 0.39961063579076794, "grad_norm": 0.8737684304265955, "learning_rate": 6.826410702982773e-06, "loss": 0.6177, "step": 975 }, { "epoch": 0.40002049285311747, "grad_norm": 0.9083031157207516, "learning_rate": 6.820226220820733e-06, "loss": 0.604, "step": 976 }, { "epoch": 0.400430349915467, "grad_norm": 0.8937871042786354, "learning_rate": 6.814038526753205e-06, "loss": 0.6478, "step": 977 }, { "epoch": 0.40084020697781647, "grad_norm": 0.8978026343674134, "learning_rate": 6.80784763169877e-06, "loss": 0.6159, "step": 978 }, { "epoch": 0.401250064040166, "grad_norm": 0.9198488115333758, "learning_rate": 6.8016535465816595e-06, "loss": 0.6533, "step": 979 }, { "epoch": 0.40165992110251547, "grad_norm": 0.924480746747995, "learning_rate": 6.795456282331729e-06, "loss": 0.623, "step": 980 }, { "epoch": 0.402069778164865, "grad_norm": 0.9009966910784907, "learning_rate": 6.789255849884449e-06, "loss": 0.6063, "step": 981 }, { "epoch": 0.4024796352272145, "grad_norm": 0.8959773326105209, "learning_rate": 6.783052260180878e-06, "loss": 0.6331, "step": 982 }, { "epoch": 0.402889492289564, "grad_norm": 0.8896347097830651, "learning_rate": 6.776845524167648e-06, "loss": 0.6195, "step": 983 }, { "epoch": 0.4032993493519135, "grad_norm": 0.9255834264047984, "learning_rate": 6.770635652796939e-06, "loss": 0.6304, "step": 984 }, { "epoch": 0.403709206414263, "grad_norm": 0.8733151552532676, "learning_rate": 6.764422657026468e-06, "loss": 0.6134, "step": 985 }, { "epoch": 0.40411906347661253, "grad_norm": 0.930630252974523, "learning_rate": 6.758206547819464e-06, "loss": 0.6663, "step": 986 }, { "epoch": 0.40452892053896206, "grad_norm": 0.8840088290957417, "learning_rate": 6.7519873361446475e-06, "loss": 0.5733, "step": 987 }, { "epoch": 0.40493877760131153, "grad_norm": 0.8868175407091178, "learning_rate": 6.745765032976215e-06, "loss": 0.6314, "step": 988 }, { "epoch": 0.40534863466366106, "grad_norm": 0.8986869343273223, "learning_rate": 6.739539649293817e-06, "loss": 0.6265, "step": 989 }, { "epoch": 0.40575849172601053, "grad_norm": 0.9126100490780487, "learning_rate": 6.733311196082542e-06, "loss": 0.6457, "step": 990 }, { "epoch": 0.40616834878836006, "grad_norm": 0.9805892893705136, "learning_rate": 6.727079684332893e-06, "loss": 0.6706, "step": 991 }, { "epoch": 0.4065782058507096, "grad_norm": 0.916831338722316, "learning_rate": 6.720845125040772e-06, "loss": 0.5822, "step": 992 }, { "epoch": 0.40698806291305906, "grad_norm": 0.8843356662758197, "learning_rate": 6.714607529207457e-06, "loss": 0.6743, "step": 993 }, { "epoch": 0.4073979199754086, "grad_norm": 0.915642095925482, "learning_rate": 6.708366907839583e-06, "loss": 0.6264, "step": 994 }, { "epoch": 0.40780777703775806, "grad_norm": 1.0182445827560593, "learning_rate": 6.702123271949128e-06, "loss": 0.6348, "step": 995 }, { "epoch": 0.4082176341001076, "grad_norm": 0.9740984385402105, "learning_rate": 6.695876632553383e-06, "loss": 0.6278, "step": 996 }, { "epoch": 0.4086274911624571, "grad_norm": 0.9094092949685881, "learning_rate": 6.6896270006749455e-06, "loss": 0.6029, "step": 997 }, { "epoch": 0.4090373482248066, "grad_norm": 0.899155354341766, "learning_rate": 6.683374387341688e-06, "loss": 0.6038, "step": 998 }, { "epoch": 0.4094472052871561, "grad_norm": 0.8887269556607833, "learning_rate": 6.677118803586747e-06, "loss": 0.6115, "step": 999 }, { "epoch": 0.4098570623495056, "grad_norm": 0.8735530024798817, "learning_rate": 6.670860260448501e-06, "loss": 0.6224, "step": 1000 }, { "epoch": 0.4102669194118551, "grad_norm": 0.8955078111791012, "learning_rate": 6.664598768970547e-06, "loss": 0.6178, "step": 1001 }, { "epoch": 0.41067677647420464, "grad_norm": 0.9052107201309103, "learning_rate": 6.658334340201689e-06, "loss": 0.6489, "step": 1002 }, { "epoch": 0.4110866335365541, "grad_norm": 0.9785629078013838, "learning_rate": 6.652066985195913e-06, "loss": 0.5859, "step": 1003 }, { "epoch": 0.41149649059890364, "grad_norm": 0.9047277711653311, "learning_rate": 6.645796715012363e-06, "loss": 0.5933, "step": 1004 }, { "epoch": 0.4119063476612531, "grad_norm": 0.8541870376942784, "learning_rate": 6.639523540715335e-06, "loss": 0.6099, "step": 1005 }, { "epoch": 0.41231620472360264, "grad_norm": 0.9290822460623029, "learning_rate": 6.633247473374247e-06, "loss": 0.6326, "step": 1006 }, { "epoch": 0.41272606178595217, "grad_norm": 0.9814625786995784, "learning_rate": 6.626968524063617e-06, "loss": 0.6632, "step": 1007 }, { "epoch": 0.41313591884830164, "grad_norm": 0.8665787907883268, "learning_rate": 6.620686703863054e-06, "loss": 0.6043, "step": 1008 }, { "epoch": 0.41354577591065117, "grad_norm": 0.8981042597412753, "learning_rate": 6.614402023857231e-06, "loss": 0.6264, "step": 1009 }, { "epoch": 0.41395563297300064, "grad_norm": 0.9286626435334508, "learning_rate": 6.60811449513587e-06, "loss": 0.5682, "step": 1010 }, { "epoch": 0.41436549003535017, "grad_norm": 0.8442033166706318, "learning_rate": 6.601824128793713e-06, "loss": 0.5616, "step": 1011 }, { "epoch": 0.4147753470976997, "grad_norm": 0.9135168751828947, "learning_rate": 6.595530935930516e-06, "loss": 0.613, "step": 1012 }, { "epoch": 0.41518520416004917, "grad_norm": 0.9103837144383286, "learning_rate": 6.589234927651021e-06, "loss": 0.6108, "step": 1013 }, { "epoch": 0.4155950612223987, "grad_norm": 0.9057249809406045, "learning_rate": 6.582936115064934e-06, "loss": 0.5952, "step": 1014 }, { "epoch": 0.41600491828474817, "grad_norm": 0.8722764522423979, "learning_rate": 6.576634509286914e-06, "loss": 0.6035, "step": 1015 }, { "epoch": 0.4164147753470977, "grad_norm": 0.8847116359949693, "learning_rate": 6.570330121436546e-06, "loss": 0.5817, "step": 1016 }, { "epoch": 0.4168246324094472, "grad_norm": 1.0387855635464343, "learning_rate": 6.564022962638328e-06, "loss": 0.6426, "step": 1017 }, { "epoch": 0.4172344894717967, "grad_norm": 0.9771500698014963, "learning_rate": 6.557713044021642e-06, "loss": 0.6184, "step": 1018 }, { "epoch": 0.4176443465341462, "grad_norm": 0.8786730549976817, "learning_rate": 6.551400376720744e-06, "loss": 0.6258, "step": 1019 }, { "epoch": 0.4180542035964957, "grad_norm": 0.9076783791074367, "learning_rate": 6.545084971874738e-06, "loss": 0.6309, "step": 1020 }, { "epoch": 0.4184640606588452, "grad_norm": 0.9360471711066288, "learning_rate": 6.53876684062756e-06, "loss": 0.655, "step": 1021 }, { "epoch": 0.41887391772119476, "grad_norm": 0.8948799027800506, "learning_rate": 6.532445994127956e-06, "loss": 0.5962, "step": 1022 }, { "epoch": 0.4192837747835442, "grad_norm": 0.9701355592249925, "learning_rate": 6.526122443529464e-06, "loss": 0.6565, "step": 1023 }, { "epoch": 0.41969363184589376, "grad_norm": 0.9297914713304658, "learning_rate": 6.5197961999903925e-06, "loss": 0.6008, "step": 1024 }, { "epoch": 0.42010348890824323, "grad_norm": 0.926714742206596, "learning_rate": 6.513467274673804e-06, "loss": 0.6375, "step": 1025 }, { "epoch": 0.42051334597059276, "grad_norm": 0.9171296213682146, "learning_rate": 6.50713567874749e-06, "loss": 0.6548, "step": 1026 }, { "epoch": 0.4209232030329423, "grad_norm": 0.9330977942293359, "learning_rate": 6.500801423383955e-06, "loss": 0.5703, "step": 1027 }, { "epoch": 0.42133306009529176, "grad_norm": 0.9516727781091443, "learning_rate": 6.494464519760402e-06, "loss": 0.6648, "step": 1028 }, { "epoch": 0.4217429171576413, "grad_norm": 0.8934792600620406, "learning_rate": 6.4881249790586975e-06, "loss": 0.5808, "step": 1029 }, { "epoch": 0.42215277421999076, "grad_norm": 0.9341053135614495, "learning_rate": 6.4817828124653695e-06, "loss": 0.603, "step": 1030 }, { "epoch": 0.4225626312823403, "grad_norm": 0.8924836258523059, "learning_rate": 6.475438031171574e-06, "loss": 0.6435, "step": 1031 }, { "epoch": 0.4229724883446898, "grad_norm": 0.9514661591942672, "learning_rate": 6.469090646373084e-06, "loss": 0.644, "step": 1032 }, { "epoch": 0.4233823454070393, "grad_norm": 0.8998562360531134, "learning_rate": 6.462740669270266e-06, "loss": 0.6442, "step": 1033 }, { "epoch": 0.4237922024693888, "grad_norm": 0.8772883202018791, "learning_rate": 6.45638811106806e-06, "loss": 0.6323, "step": 1034 }, { "epoch": 0.4242020595317383, "grad_norm": 0.9825142769200704, "learning_rate": 6.450032982975961e-06, "loss": 0.6313, "step": 1035 }, { "epoch": 0.4246119165940878, "grad_norm": 0.8858796519161557, "learning_rate": 6.443675296207998e-06, "loss": 0.5542, "step": 1036 }, { "epoch": 0.42502177365643734, "grad_norm": 0.8585143022677548, "learning_rate": 6.437315061982719e-06, "loss": 0.5744, "step": 1037 }, { "epoch": 0.4254316307187868, "grad_norm": 0.9263325382512895, "learning_rate": 6.430952291523158e-06, "loss": 0.619, "step": 1038 }, { "epoch": 0.42584148778113634, "grad_norm": 0.9293172044907352, "learning_rate": 6.424586996056834e-06, "loss": 0.652, "step": 1039 }, { "epoch": 0.4262513448434858, "grad_norm": 0.8911620073790109, "learning_rate": 6.418219186815716e-06, "loss": 0.5699, "step": 1040 }, { "epoch": 0.42666120190583534, "grad_norm": 0.8639981138441963, "learning_rate": 6.411848875036212e-06, "loss": 0.5943, "step": 1041 }, { "epoch": 0.42707105896818487, "grad_norm": 0.9469172785893666, "learning_rate": 6.405476071959142e-06, "loss": 0.624, "step": 1042 }, { "epoch": 0.42748091603053434, "grad_norm": 0.9379593871247665, "learning_rate": 6.3991007888297265e-06, "loss": 0.6105, "step": 1043 }, { "epoch": 0.42789077309288387, "grad_norm": 0.989914113810862, "learning_rate": 6.392723036897559e-06, "loss": 0.6502, "step": 1044 }, { "epoch": 0.42830063015523334, "grad_norm": 0.9306785569146953, "learning_rate": 6.386342827416591e-06, "loss": 0.6134, "step": 1045 }, { "epoch": 0.42871048721758287, "grad_norm": 0.9721792070088338, "learning_rate": 6.3799601716451074e-06, "loss": 0.616, "step": 1046 }, { "epoch": 0.4291203442799324, "grad_norm": 0.9414609225940453, "learning_rate": 6.373575080845714e-06, "loss": 0.6401, "step": 1047 }, { "epoch": 0.42953020134228187, "grad_norm": 0.9851041142021457, "learning_rate": 6.3671875662853135e-06, "loss": 0.6953, "step": 1048 }, { "epoch": 0.4299400584046314, "grad_norm": 0.926384087694355, "learning_rate": 6.36079763923508e-06, "loss": 0.6284, "step": 1049 }, { "epoch": 0.43034991546698087, "grad_norm": 0.9331625360013056, "learning_rate": 6.35440531097045e-06, "loss": 0.6516, "step": 1050 }, { "epoch": 0.4307597725293304, "grad_norm": 0.9162804070127303, "learning_rate": 6.348010592771096e-06, "loss": 0.6727, "step": 1051 }, { "epoch": 0.4311696295916799, "grad_norm": 0.930189481930973, "learning_rate": 6.341613495920906e-06, "loss": 0.6549, "step": 1052 }, { "epoch": 0.4315794866540294, "grad_norm": 0.9046045349903002, "learning_rate": 6.335214031707966e-06, "loss": 0.6186, "step": 1053 }, { "epoch": 0.4319893437163789, "grad_norm": 0.8848507115381941, "learning_rate": 6.328812211424539e-06, "loss": 0.6268, "step": 1054 }, { "epoch": 0.4323992007787284, "grad_norm": 0.9155377054090952, "learning_rate": 6.322408046367046e-06, "loss": 0.6045, "step": 1055 }, { "epoch": 0.4328090578410779, "grad_norm": 0.8941711461430181, "learning_rate": 6.316001547836046e-06, "loss": 0.5991, "step": 1056 }, { "epoch": 0.43321891490342745, "grad_norm": 0.8444519020040466, "learning_rate": 6.309592727136216e-06, "loss": 0.6022, "step": 1057 }, { "epoch": 0.4336287719657769, "grad_norm": 0.9370354000363083, "learning_rate": 6.303181595576328e-06, "loss": 0.6358, "step": 1058 }, { "epoch": 0.43403862902812645, "grad_norm": 0.8611781829386218, "learning_rate": 6.296768164469236e-06, "loss": 0.6228, "step": 1059 }, { "epoch": 0.4344484860904759, "grad_norm": 0.9336172003510546, "learning_rate": 6.290352445131848e-06, "loss": 0.5999, "step": 1060 }, { "epoch": 0.43485834315282546, "grad_norm": 0.9240167395336799, "learning_rate": 6.2839344488851085e-06, "loss": 0.595, "step": 1061 }, { "epoch": 0.435268200215175, "grad_norm": 0.8762721847117991, "learning_rate": 6.2775141870539835e-06, "loss": 0.6162, "step": 1062 }, { "epoch": 0.43567805727752446, "grad_norm": 0.9004911552632834, "learning_rate": 6.271091670967437e-06, "loss": 0.5589, "step": 1063 }, { "epoch": 0.436087914339874, "grad_norm": 0.8956450914986459, "learning_rate": 6.264666911958404e-06, "loss": 0.5607, "step": 1064 }, { "epoch": 0.43649777140222346, "grad_norm": 0.9458892806657289, "learning_rate": 6.258239921363788e-06, "loss": 0.659, "step": 1065 }, { "epoch": 0.436907628464573, "grad_norm": 0.8331348610000855, "learning_rate": 6.251810710524422e-06, "loss": 0.6121, "step": 1066 }, { "epoch": 0.4373174855269225, "grad_norm": 0.8475266029266756, "learning_rate": 6.245379290785061e-06, "loss": 0.5752, "step": 1067 }, { "epoch": 0.437727342589272, "grad_norm": 0.8667551722265131, "learning_rate": 6.238945673494354e-06, "loss": 0.5901, "step": 1068 }, { "epoch": 0.4381371996516215, "grad_norm": 0.9256660492658586, "learning_rate": 6.232509870004831e-06, "loss": 0.6068, "step": 1069 }, { "epoch": 0.438547056713971, "grad_norm": 0.9038030754651661, "learning_rate": 6.226071891672878e-06, "loss": 0.6165, "step": 1070 }, { "epoch": 0.4389569137763205, "grad_norm": 0.920707493517189, "learning_rate": 6.21963174985872e-06, "loss": 0.6218, "step": 1071 }, { "epoch": 0.43936677083867004, "grad_norm": 0.8977361892029014, "learning_rate": 6.213189455926397e-06, "loss": 0.6404, "step": 1072 }, { "epoch": 0.4397766279010195, "grad_norm": 0.9171485112941635, "learning_rate": 6.206745021243751e-06, "loss": 0.7032, "step": 1073 }, { "epoch": 0.44018648496336904, "grad_norm": 0.9175790743368268, "learning_rate": 6.200298457182396e-06, "loss": 0.6025, "step": 1074 }, { "epoch": 0.4405963420257185, "grad_norm": 0.9070457209056723, "learning_rate": 6.193849775117709e-06, "loss": 0.6256, "step": 1075 }, { "epoch": 0.44100619908806804, "grad_norm": 0.9683224200895547, "learning_rate": 6.187398986428802e-06, "loss": 0.6378, "step": 1076 }, { "epoch": 0.44141605615041757, "grad_norm": 0.8907292061241514, "learning_rate": 6.180946102498501e-06, "loss": 0.6044, "step": 1077 }, { "epoch": 0.44182591321276704, "grad_norm": 0.8998045134039416, "learning_rate": 6.174491134713333e-06, "loss": 0.6372, "step": 1078 }, { "epoch": 0.44223577027511657, "grad_norm": 0.9551165871989905, "learning_rate": 6.168034094463501e-06, "loss": 0.6561, "step": 1079 }, { "epoch": 0.44264562733746604, "grad_norm": 0.9136717017830294, "learning_rate": 6.1615749931428655e-06, "loss": 0.6542, "step": 1080 }, { "epoch": 0.44305548439981557, "grad_norm": 0.8683697675098729, "learning_rate": 6.155113842148923e-06, "loss": 0.6083, "step": 1081 }, { "epoch": 0.4434653414621651, "grad_norm": 0.8649047890124095, "learning_rate": 6.148650652882789e-06, "loss": 0.5914, "step": 1082 }, { "epoch": 0.44387519852451457, "grad_norm": 0.8865735633967374, "learning_rate": 6.142185436749174e-06, "loss": 0.5935, "step": 1083 }, { "epoch": 0.4442850555868641, "grad_norm": 0.9142687283009775, "learning_rate": 6.135718205156363e-06, "loss": 0.567, "step": 1084 }, { "epoch": 0.44469491264921357, "grad_norm": 0.8733197798865401, "learning_rate": 6.129248969516202e-06, "loss": 0.5796, "step": 1085 }, { "epoch": 0.4451047697115631, "grad_norm": 0.9379948095887551, "learning_rate": 6.122777741244067e-06, "loss": 0.6148, "step": 1086 }, { "epoch": 0.4455146267739126, "grad_norm": 0.9716709140341622, "learning_rate": 6.116304531758857e-06, "loss": 0.6236, "step": 1087 }, { "epoch": 0.4459244838362621, "grad_norm": 0.9180993494681584, "learning_rate": 6.109829352482963e-06, "loss": 0.6205, "step": 1088 }, { "epoch": 0.4463343408986116, "grad_norm": 0.8438437249517798, "learning_rate": 6.103352214842252e-06, "loss": 0.5618, "step": 1089 }, { "epoch": 0.4467441979609611, "grad_norm": 0.8353057658242161, "learning_rate": 6.096873130266047e-06, "loss": 0.5908, "step": 1090 }, { "epoch": 0.4471540550233106, "grad_norm": 0.994681220752144, "learning_rate": 6.090392110187108e-06, "loss": 0.6271, "step": 1091 }, { "epoch": 0.4475639120856601, "grad_norm": 0.8697914272706765, "learning_rate": 6.08390916604161e-06, "loss": 0.6226, "step": 1092 }, { "epoch": 0.4479737691480096, "grad_norm": 0.88875863732712, "learning_rate": 6.07742430926912e-06, "loss": 0.6133, "step": 1093 }, { "epoch": 0.44838362621035915, "grad_norm": 0.8822554936294235, "learning_rate": 6.070937551312583e-06, "loss": 0.5766, "step": 1094 }, { "epoch": 0.4487934832727086, "grad_norm": 0.8609481602919532, "learning_rate": 6.064448903618298e-06, "loss": 0.5679, "step": 1095 }, { "epoch": 0.44920334033505815, "grad_norm": 0.9298471404345084, "learning_rate": 6.0579583776358985e-06, "loss": 0.6295, "step": 1096 }, { "epoch": 0.4496131973974076, "grad_norm": 0.9355125029892031, "learning_rate": 6.051465984818332e-06, "loss": 0.6129, "step": 1097 }, { "epoch": 0.45002305445975715, "grad_norm": 0.8924007096439216, "learning_rate": 6.044971736621842e-06, "loss": 0.5803, "step": 1098 }, { "epoch": 0.4504329115221067, "grad_norm": 0.902617120619383, "learning_rate": 6.038475644505942e-06, "loss": 0.6291, "step": 1099 }, { "epoch": 0.45084276858445615, "grad_norm": 0.9589614042826818, "learning_rate": 6.031977719933404e-06, "loss": 0.6504, "step": 1100 }, { "epoch": 0.4512526256468057, "grad_norm": 0.8874044480806513, "learning_rate": 6.02547797437023e-06, "loss": 0.6013, "step": 1101 }, { "epoch": 0.45166248270915516, "grad_norm": 0.8915013298763051, "learning_rate": 6.018976419285636e-06, "loss": 0.5858, "step": 1102 }, { "epoch": 0.4520723397715047, "grad_norm": 0.8589833610915024, "learning_rate": 6.012473066152034e-06, "loss": 0.5964, "step": 1103 }, { "epoch": 0.4524821968338542, "grad_norm": 0.8527274109557783, "learning_rate": 6.005967926445002e-06, "loss": 0.5943, "step": 1104 }, { "epoch": 0.4528920538962037, "grad_norm": 0.9194219456121153, "learning_rate": 5.999461011643278e-06, "loss": 0.606, "step": 1105 }, { "epoch": 0.4533019109585532, "grad_norm": 0.906677366524736, "learning_rate": 5.9929523332287275e-06, "loss": 0.6659, "step": 1106 }, { "epoch": 0.4537117680209027, "grad_norm": 0.870323612080412, "learning_rate": 5.986441902686331e-06, "loss": 0.5917, "step": 1107 }, { "epoch": 0.4541216250832522, "grad_norm": 0.880162193277191, "learning_rate": 5.979929731504158e-06, "loss": 0.6309, "step": 1108 }, { "epoch": 0.45453148214560174, "grad_norm": 0.8800665382255013, "learning_rate": 5.97341583117335e-06, "loss": 0.6051, "step": 1109 }, { "epoch": 0.4549413392079512, "grad_norm": 0.9093574081889094, "learning_rate": 5.9669002131881025e-06, "loss": 0.6514, "step": 1110 }, { "epoch": 0.45535119627030074, "grad_norm": 0.9630334218404192, "learning_rate": 5.960382889045638e-06, "loss": 0.6674, "step": 1111 }, { "epoch": 0.4557610533326502, "grad_norm": 0.8430267328547052, "learning_rate": 5.953863870246193e-06, "loss": 0.5205, "step": 1112 }, { "epoch": 0.45617091039499974, "grad_norm": 0.8420776202082421, "learning_rate": 5.947343168292991e-06, "loss": 0.6439, "step": 1113 }, { "epoch": 0.45658076745734927, "grad_norm": 0.8845074120613557, "learning_rate": 5.940820794692228e-06, "loss": 0.5592, "step": 1114 }, { "epoch": 0.45699062451969874, "grad_norm": 0.8520187212002416, "learning_rate": 5.93429676095305e-06, "loss": 0.6342, "step": 1115 }, { "epoch": 0.45740048158204827, "grad_norm": 0.9173132777543993, "learning_rate": 5.9277710785875306e-06, "loss": 0.645, "step": 1116 }, { "epoch": 0.45781033864439774, "grad_norm": 0.9216033601655572, "learning_rate": 5.921243759110653e-06, "loss": 0.6302, "step": 1117 }, { "epoch": 0.45822019570674727, "grad_norm": 0.8714440134262055, "learning_rate": 5.91471481404029e-06, "loss": 0.602, "step": 1118 }, { "epoch": 0.4586300527690968, "grad_norm": 0.9075461148534371, "learning_rate": 5.908184254897183e-06, "loss": 0.5877, "step": 1119 }, { "epoch": 0.45903990983144627, "grad_norm": 0.8857025942049206, "learning_rate": 5.90165209320492e-06, "loss": 0.5958, "step": 1120 }, { "epoch": 0.4594497668937958, "grad_norm": 0.8643128457253737, "learning_rate": 5.8951183404899185e-06, "loss": 0.5895, "step": 1121 }, { "epoch": 0.45985962395614527, "grad_norm": 1.0000685352346246, "learning_rate": 5.888583008281401e-06, "loss": 0.6609, "step": 1122 }, { "epoch": 0.4602694810184948, "grad_norm": 0.8740524701821923, "learning_rate": 5.882046108111381e-06, "loss": 0.6292, "step": 1123 }, { "epoch": 0.4606793380808443, "grad_norm": 0.9233497673306182, "learning_rate": 5.875507651514636e-06, "loss": 0.6128, "step": 1124 }, { "epoch": 0.4610891951431938, "grad_norm": 0.9533200842379719, "learning_rate": 5.868967650028689e-06, "loss": 0.62, "step": 1125 }, { "epoch": 0.4614990522055433, "grad_norm": 0.9273708369189544, "learning_rate": 5.8624261151937935e-06, "loss": 0.5708, "step": 1126 }, { "epoch": 0.4619089092678928, "grad_norm": 0.9435970642203969, "learning_rate": 5.855883058552904e-06, "loss": 0.6191, "step": 1127 }, { "epoch": 0.4623187663302423, "grad_norm": 0.914981411816178, "learning_rate": 5.8493384916516614e-06, "loss": 0.6177, "step": 1128 }, { "epoch": 0.46272862339259185, "grad_norm": 0.8916495003409933, "learning_rate": 5.842792426038373e-06, "loss": 0.5981, "step": 1129 }, { "epoch": 0.4631384804549413, "grad_norm": 0.9581236889692265, "learning_rate": 5.836244873263989e-06, "loss": 0.6132, "step": 1130 }, { "epoch": 0.46354833751729085, "grad_norm": 0.9401161760897846, "learning_rate": 5.8296958448820874e-06, "loss": 0.625, "step": 1131 }, { "epoch": 0.4639581945796403, "grad_norm": 0.8789978154977602, "learning_rate": 5.823145352448845e-06, "loss": 0.6128, "step": 1132 }, { "epoch": 0.46436805164198985, "grad_norm": 0.8229411800329829, "learning_rate": 5.816593407523025e-06, "loss": 0.6222, "step": 1133 }, { "epoch": 0.4647779087043394, "grad_norm": 0.9609437421202073, "learning_rate": 5.8100400216659505e-06, "loss": 0.6177, "step": 1134 }, { "epoch": 0.46518776576668885, "grad_norm": 0.9431493262691141, "learning_rate": 5.803485206441493e-06, "loss": 0.6146, "step": 1135 }, { "epoch": 0.4655976228290384, "grad_norm": 0.8759641012798117, "learning_rate": 5.7969289734160426e-06, "loss": 0.614, "step": 1136 }, { "epoch": 0.46600747989138785, "grad_norm": 0.9356616083334219, "learning_rate": 5.79037133415849e-06, "loss": 0.6213, "step": 1137 }, { "epoch": 0.4664173369537374, "grad_norm": 0.8806105070267529, "learning_rate": 5.783812300240209e-06, "loss": 0.6196, "step": 1138 }, { "epoch": 0.4668271940160869, "grad_norm": 0.9135560402438904, "learning_rate": 5.7772518832350345e-06, "loss": 0.6094, "step": 1139 }, { "epoch": 0.4672370510784364, "grad_norm": 0.8984385802333817, "learning_rate": 5.7706900947192414e-06, "loss": 0.63, "step": 1140 }, { "epoch": 0.4676469081407859, "grad_norm": 0.8501040620251654, "learning_rate": 5.764126946271526e-06, "loss": 0.5909, "step": 1141 }, { "epoch": 0.4680567652031354, "grad_norm": 0.8616909075329598, "learning_rate": 5.757562449472981e-06, "loss": 0.6107, "step": 1142 }, { "epoch": 0.4684666222654849, "grad_norm": 0.8859179872896573, "learning_rate": 5.7509966159070806e-06, "loss": 0.5961, "step": 1143 }, { "epoch": 0.46887647932783444, "grad_norm": 0.8807477006195026, "learning_rate": 5.744429457159661e-06, "loss": 0.599, "step": 1144 }, { "epoch": 0.4692863363901839, "grad_norm": 0.9149638131683132, "learning_rate": 5.737860984818889e-06, "loss": 0.6481, "step": 1145 }, { "epoch": 0.46969619345253344, "grad_norm": 0.8858569084702431, "learning_rate": 5.731291210475257e-06, "loss": 0.5952, "step": 1146 }, { "epoch": 0.4701060505148829, "grad_norm": 0.8791226671350758, "learning_rate": 5.724720145721551e-06, "loss": 0.5978, "step": 1147 }, { "epoch": 0.47051590757723244, "grad_norm": 0.9389060111490877, "learning_rate": 5.7181478021528335e-06, "loss": 0.6562, "step": 1148 }, { "epoch": 0.47092576463958197, "grad_norm": 0.9171146327561434, "learning_rate": 5.711574191366427e-06, "loss": 0.5775, "step": 1149 }, { "epoch": 0.47133562170193144, "grad_norm": 0.9299916481769966, "learning_rate": 5.704999324961885e-06, "loss": 0.5682, "step": 1150 }, { "epoch": 0.47174547876428097, "grad_norm": 0.9470000210256874, "learning_rate": 5.6984232145409805e-06, "loss": 0.6729, "step": 1151 }, { "epoch": 0.47215533582663044, "grad_norm": 0.8638709816747699, "learning_rate": 5.691845871707682e-06, "loss": 0.5882, "step": 1152 }, { "epoch": 0.47256519288897997, "grad_norm": 0.9555546644787014, "learning_rate": 5.685267308068129e-06, "loss": 0.6649, "step": 1153 }, { "epoch": 0.4729750499513295, "grad_norm": 0.8015903153966794, "learning_rate": 5.6786875352306205e-06, "loss": 0.5946, "step": 1154 }, { "epoch": 0.47338490701367897, "grad_norm": 0.8769291126034893, "learning_rate": 5.6721065648055825e-06, "loss": 0.6189, "step": 1155 }, { "epoch": 0.4737947640760285, "grad_norm": 0.9081338325517627, "learning_rate": 5.665524408405561e-06, "loss": 0.6404, "step": 1156 }, { "epoch": 0.47420462113837797, "grad_norm": 0.949006173950421, "learning_rate": 5.65894107764519e-06, "loss": 0.6442, "step": 1157 }, { "epoch": 0.4746144782007275, "grad_norm": 0.8368625879968804, "learning_rate": 5.652356584141178e-06, "loss": 0.6097, "step": 1158 }, { "epoch": 0.475024335263077, "grad_norm": 0.9048148384238749, "learning_rate": 5.645770939512284e-06, "loss": 0.6481, "step": 1159 }, { "epoch": 0.4754341923254265, "grad_norm": 0.8756357640373862, "learning_rate": 5.639184155379297e-06, "loss": 0.5753, "step": 1160 }, { "epoch": 0.475844049387776, "grad_norm": 0.8607969664723035, "learning_rate": 5.632596243365021e-06, "loss": 0.5873, "step": 1161 }, { "epoch": 0.4762539064501255, "grad_norm": 0.9726262733010295, "learning_rate": 5.626007215094249e-06, "loss": 0.6443, "step": 1162 }, { "epoch": 0.476663763512475, "grad_norm": 0.8958138984027599, "learning_rate": 5.61941708219374e-06, "loss": 0.6066, "step": 1163 }, { "epoch": 0.47707362057482455, "grad_norm": 0.887268965458129, "learning_rate": 5.6128258562922065e-06, "loss": 0.6489, "step": 1164 }, { "epoch": 0.477483477637174, "grad_norm": 0.8701100233297195, "learning_rate": 5.606233549020286e-06, "loss": 0.623, "step": 1165 }, { "epoch": 0.47789333469952355, "grad_norm": 0.9793112719307859, "learning_rate": 5.599640172010525e-06, "loss": 0.6634, "step": 1166 }, { "epoch": 0.478303191761873, "grad_norm": 0.9166041690453104, "learning_rate": 5.593045736897362e-06, "loss": 0.6313, "step": 1167 }, { "epoch": 0.47871304882422255, "grad_norm": 0.8931589220428093, "learning_rate": 5.586450255317097e-06, "loss": 0.6102, "step": 1168 }, { "epoch": 0.4791229058865721, "grad_norm": 0.8572071786978973, "learning_rate": 5.579853738907878e-06, "loss": 0.6155, "step": 1169 }, { "epoch": 0.47953276294892155, "grad_norm": 0.9620721239711421, "learning_rate": 5.573256199309681e-06, "loss": 0.6544, "step": 1170 }, { "epoch": 0.4799426200112711, "grad_norm": 0.899038145636369, "learning_rate": 5.566657648164287e-06, "loss": 0.6168, "step": 1171 }, { "epoch": 0.48035247707362055, "grad_norm": 0.8917630696141741, "learning_rate": 5.560058097115262e-06, "loss": 0.6001, "step": 1172 }, { "epoch": 0.4807623341359701, "grad_norm": 0.8908263679730704, "learning_rate": 5.5534575578079334e-06, "loss": 0.607, "step": 1173 }, { "epoch": 0.4811721911983196, "grad_norm": 0.8784384804895291, "learning_rate": 5.546856041889374e-06, "loss": 0.5507, "step": 1174 }, { "epoch": 0.4815820482606691, "grad_norm": 0.8516403577187097, "learning_rate": 5.540253561008381e-06, "loss": 0.6019, "step": 1175 }, { "epoch": 0.4819919053230186, "grad_norm": 0.8817554433247085, "learning_rate": 5.533650126815455e-06, "loss": 0.6043, "step": 1176 }, { "epoch": 0.4824017623853681, "grad_norm": 0.93306868018001, "learning_rate": 5.527045750962777e-06, "loss": 0.6032, "step": 1177 }, { "epoch": 0.4828116194477176, "grad_norm": 0.8912074350765491, "learning_rate": 5.520440445104189e-06, "loss": 0.6274, "step": 1178 }, { "epoch": 0.48322147651006714, "grad_norm": 0.933964425223672, "learning_rate": 5.513834220895179e-06, "loss": 0.635, "step": 1179 }, { "epoch": 0.4836313335724166, "grad_norm": 0.9262296597680673, "learning_rate": 5.5072270899928495e-06, "loss": 0.6162, "step": 1180 }, { "epoch": 0.48404119063476614, "grad_norm": 0.9434601289640452, "learning_rate": 5.500619064055904e-06, "loss": 0.5891, "step": 1181 }, { "epoch": 0.4844510476971156, "grad_norm": 0.9082334186673355, "learning_rate": 5.494010154744629e-06, "loss": 0.6139, "step": 1182 }, { "epoch": 0.48486090475946514, "grad_norm": 0.8336105447597228, "learning_rate": 5.487400373720865e-06, "loss": 0.5977, "step": 1183 }, { "epoch": 0.48527076182181467, "grad_norm": 0.904836859647803, "learning_rate": 5.4807897326479935e-06, "loss": 0.6388, "step": 1184 }, { "epoch": 0.48568061888416414, "grad_norm": 0.884210486105013, "learning_rate": 5.4741782431909144e-06, "loss": 0.6159, "step": 1185 }, { "epoch": 0.48609047594651367, "grad_norm": 0.8600445876946173, "learning_rate": 5.467565917016022e-06, "loss": 0.5903, "step": 1186 }, { "epoch": 0.48650033300886314, "grad_norm": 0.8250842784310378, "learning_rate": 5.4609527657911885e-06, "loss": 0.5641, "step": 1187 }, { "epoch": 0.48691019007121267, "grad_norm": 0.9283013746648655, "learning_rate": 5.454338801185746e-06, "loss": 0.6567, "step": 1188 }, { "epoch": 0.4873200471335622, "grad_norm": 0.9221359506065854, "learning_rate": 5.447724034870451e-06, "loss": 0.624, "step": 1189 }, { "epoch": 0.48772990419591167, "grad_norm": 0.9123072888406125, "learning_rate": 5.441108478517485e-06, "loss": 0.6142, "step": 1190 }, { "epoch": 0.4881397612582612, "grad_norm": 0.8983616152124795, "learning_rate": 5.434492143800419e-06, "loss": 0.5653, "step": 1191 }, { "epoch": 0.48854961832061067, "grad_norm": 0.8598197478592601, "learning_rate": 5.4278750423942e-06, "loss": 0.6028, "step": 1192 }, { "epoch": 0.4889594753829602, "grad_norm": 0.8826794489725446, "learning_rate": 5.421257185975124e-06, "loss": 0.5567, "step": 1193 }, { "epoch": 0.4893693324453097, "grad_norm": 0.9657478894962029, "learning_rate": 5.414638586220824e-06, "loss": 0.6344, "step": 1194 }, { "epoch": 0.4897791895076592, "grad_norm": 0.8666666662684932, "learning_rate": 5.408019254810238e-06, "loss": 0.603, "step": 1195 }, { "epoch": 0.4901890465700087, "grad_norm": 0.8900979567393951, "learning_rate": 5.4013992034236065e-06, "loss": 0.618, "step": 1196 }, { "epoch": 0.4905989036323582, "grad_norm": 0.9531858846609488, "learning_rate": 5.394778443742426e-06, "loss": 0.639, "step": 1197 }, { "epoch": 0.4910087606947077, "grad_norm": 0.8970084455525545, "learning_rate": 5.388156987449455e-06, "loss": 0.6325, "step": 1198 }, { "epoch": 0.49141861775705725, "grad_norm": 0.909829987950285, "learning_rate": 5.381534846228673e-06, "loss": 0.6357, "step": 1199 }, { "epoch": 0.4918284748194067, "grad_norm": 0.9418354303845616, "learning_rate": 5.374912031765274e-06, "loss": 0.6428, "step": 1200 }, { "epoch": 0.49223833188175625, "grad_norm": 0.8693192388937186, "learning_rate": 5.368288555745635e-06, "loss": 0.6164, "step": 1201 }, { "epoch": 0.4926481889441057, "grad_norm": 0.8603068988817488, "learning_rate": 5.361664429857303e-06, "loss": 0.6098, "step": 1202 }, { "epoch": 0.49305804600645525, "grad_norm": 0.885496183927996, "learning_rate": 5.355039665788974e-06, "loss": 0.5781, "step": 1203 }, { "epoch": 0.4934679030688048, "grad_norm": 0.8481759310774216, "learning_rate": 5.348414275230464e-06, "loss": 0.5791, "step": 1204 }, { "epoch": 0.49387776013115425, "grad_norm": 0.9342142877679858, "learning_rate": 5.3417882698727005e-06, "loss": 0.6292, "step": 1205 }, { "epoch": 0.4942876171935038, "grad_norm": 0.9363505456497911, "learning_rate": 5.335161661407693e-06, "loss": 0.6138, "step": 1206 }, { "epoch": 0.49469747425585325, "grad_norm": 0.8690017076897805, "learning_rate": 5.328534461528515e-06, "loss": 0.6124, "step": 1207 }, { "epoch": 0.4951073313182028, "grad_norm": 0.8625542425282678, "learning_rate": 5.321906681929284e-06, "loss": 0.5844, "step": 1208 }, { "epoch": 0.4955171883805523, "grad_norm": 0.8638035542384932, "learning_rate": 5.315278334305143e-06, "loss": 0.5865, "step": 1209 }, { "epoch": 0.4959270454429018, "grad_norm": 0.897969156916707, "learning_rate": 5.3086494303522304e-06, "loss": 0.6398, "step": 1210 }, { "epoch": 0.4963369025052513, "grad_norm": 0.8820666209722386, "learning_rate": 5.302019981767675e-06, "loss": 0.5878, "step": 1211 }, { "epoch": 0.4967467595676008, "grad_norm": 0.9103986890969428, "learning_rate": 5.295390000249562e-06, "loss": 0.6503, "step": 1212 }, { "epoch": 0.4971566166299503, "grad_norm": 0.9314539819391834, "learning_rate": 5.288759497496916e-06, "loss": 0.6335, "step": 1213 }, { "epoch": 0.49756647369229984, "grad_norm": 0.9394605753562004, "learning_rate": 5.2821284852096835e-06, "loss": 0.6481, "step": 1214 }, { "epoch": 0.4979763307546493, "grad_norm": 0.908256468587114, "learning_rate": 5.275496975088708e-06, "loss": 0.5913, "step": 1215 }, { "epoch": 0.49838618781699884, "grad_norm": 0.9221703940024301, "learning_rate": 5.268864978835715e-06, "loss": 0.6217, "step": 1216 }, { "epoch": 0.4987960448793483, "grad_norm": 0.8917345770199103, "learning_rate": 5.262232508153286e-06, "loss": 0.6018, "step": 1217 }, { "epoch": 0.49920590194169784, "grad_norm": 0.9004445630764327, "learning_rate": 5.255599574744836e-06, "loss": 0.6163, "step": 1218 }, { "epoch": 0.49961575900404737, "grad_norm": 0.8529214107807, "learning_rate": 5.248966190314604e-06, "loss": 0.6019, "step": 1219 }, { "epoch": 0.5000256160663968, "grad_norm": 0.8903815224076395, "learning_rate": 5.2423323665676184e-06, "loss": 0.6459, "step": 1220 }, { "epoch": 0.5004354731287464, "grad_norm": 0.8172214069010677, "learning_rate": 5.235698115209685e-06, "loss": 0.5276, "step": 1221 }, { "epoch": 0.5008453301910959, "grad_norm": 0.8754468702329112, "learning_rate": 5.229063447947365e-06, "loss": 0.5507, "step": 1222 }, { "epoch": 0.5012551872534453, "grad_norm": 0.8352037343598464, "learning_rate": 5.222428376487954e-06, "loss": 0.5961, "step": 1223 }, { "epoch": 0.5016650443157948, "grad_norm": 0.9345233886419388, "learning_rate": 5.215792912539458e-06, "loss": 0.6218, "step": 1224 }, { "epoch": 0.5020749013781444, "grad_norm": 0.8809277315492825, "learning_rate": 5.209157067810578e-06, "loss": 0.5932, "step": 1225 }, { "epoch": 0.5024847584404939, "grad_norm": 0.9236947887499363, "learning_rate": 5.202520854010683e-06, "loss": 0.6036, "step": 1226 }, { "epoch": 0.5028946155028434, "grad_norm": 0.8850943055881872, "learning_rate": 5.195884282849801e-06, "loss": 0.6182, "step": 1227 }, { "epoch": 0.5033044725651928, "grad_norm": 0.8660206660227343, "learning_rate": 5.1892473660385835e-06, "loss": 0.6158, "step": 1228 }, { "epoch": 0.5037143296275424, "grad_norm": 0.8909312519058709, "learning_rate": 5.182610115288296e-06, "loss": 0.605, "step": 1229 }, { "epoch": 0.5041241866898919, "grad_norm": 0.8888099628151006, "learning_rate": 5.175972542310789e-06, "loss": 0.6383, "step": 1230 }, { "epoch": 0.5045340437522414, "grad_norm": 0.9114257837601683, "learning_rate": 5.169334658818484e-06, "loss": 0.6106, "step": 1231 }, { "epoch": 0.504943900814591, "grad_norm": 0.9191939507426241, "learning_rate": 5.162696476524353e-06, "loss": 0.6249, "step": 1232 }, { "epoch": 0.5053537578769404, "grad_norm": 0.9323316247288369, "learning_rate": 5.156058007141893e-06, "loss": 0.6416, "step": 1233 }, { "epoch": 0.5057636149392899, "grad_norm": 0.949610774459351, "learning_rate": 5.149419262385106e-06, "loss": 0.6737, "step": 1234 }, { "epoch": 0.5061734720016394, "grad_norm": 0.8725297944382487, "learning_rate": 5.142780253968481e-06, "loss": 0.6315, "step": 1235 }, { "epoch": 0.506583329063989, "grad_norm": 0.8791178470654853, "learning_rate": 5.136140993606975e-06, "loss": 0.6024, "step": 1236 }, { "epoch": 0.5069931861263385, "grad_norm": 0.8805570799721937, "learning_rate": 5.129501493015986e-06, "loss": 0.6464, "step": 1237 }, { "epoch": 0.5074030431886879, "grad_norm": 0.8604280272254023, "learning_rate": 5.1228617639113355e-06, "loss": 0.612, "step": 1238 }, { "epoch": 0.5078129002510374, "grad_norm": 0.9086760421456714, "learning_rate": 5.116221818009251e-06, "loss": 0.6059, "step": 1239 }, { "epoch": 0.508222757313387, "grad_norm": 0.8914727775670025, "learning_rate": 5.109581667026341e-06, "loss": 0.5697, "step": 1240 }, { "epoch": 0.5086326143757365, "grad_norm": 0.834964310146916, "learning_rate": 5.102941322679577e-06, "loss": 0.5564, "step": 1241 }, { "epoch": 0.509042471438086, "grad_norm": 0.9867986193402781, "learning_rate": 5.096300796686271e-06, "loss": 0.6333, "step": 1242 }, { "epoch": 0.5094523285004354, "grad_norm": 0.8357965756811988, "learning_rate": 5.0896601007640545e-06, "loss": 0.5783, "step": 1243 }, { "epoch": 0.509862185562785, "grad_norm": 0.9573878939700226, "learning_rate": 5.083019246630862e-06, "loss": 0.6234, "step": 1244 }, { "epoch": 0.5102720426251345, "grad_norm": 0.8712506526190286, "learning_rate": 5.076378246004903e-06, "loss": 0.5768, "step": 1245 }, { "epoch": 0.510681899687484, "grad_norm": 0.8946523480557282, "learning_rate": 5.069737110604647e-06, "loss": 0.6316, "step": 1246 }, { "epoch": 0.5110917567498335, "grad_norm": 0.8802464888732894, "learning_rate": 5.063095852148803e-06, "loss": 0.6017, "step": 1247 }, { "epoch": 0.511501613812183, "grad_norm": 0.8799889041370137, "learning_rate": 5.056454482356295e-06, "loss": 0.6063, "step": 1248 }, { "epoch": 0.5119114708745325, "grad_norm": 0.8941781183218833, "learning_rate": 5.049813012946246e-06, "loss": 0.6487, "step": 1249 }, { "epoch": 0.512321327936882, "grad_norm": 0.9064880619128819, "learning_rate": 5.043171455637952e-06, "loss": 0.5887, "step": 1250 }, { "epoch": 0.5127311849992315, "grad_norm": 0.8883026941593168, "learning_rate": 5.036529822150865e-06, "loss": 0.5803, "step": 1251 }, { "epoch": 0.5131410420615811, "grad_norm": 0.9471338072737294, "learning_rate": 5.029888124204574e-06, "loss": 0.61, "step": 1252 }, { "epoch": 0.5135508991239305, "grad_norm": 0.8454668307748958, "learning_rate": 5.023246373518777e-06, "loss": 0.5994, "step": 1253 }, { "epoch": 0.51396075618628, "grad_norm": 0.8228752470660619, "learning_rate": 5.016604581813269e-06, "loss": 0.5693, "step": 1254 }, { "epoch": 0.5143706132486295, "grad_norm": 0.8451564780494096, "learning_rate": 5.009962760807915e-06, "loss": 0.5892, "step": 1255 }, { "epoch": 0.5147804703109791, "grad_norm": 0.8476750524399441, "learning_rate": 5.003320922222632e-06, "loss": 0.5904, "step": 1256 }, { "epoch": 0.5151903273733286, "grad_norm": 0.8740896790863493, "learning_rate": 4.99667907777737e-06, "loss": 0.6155, "step": 1257 }, { "epoch": 0.515600184435678, "grad_norm": 0.8263632444194938, "learning_rate": 4.990037239192087e-06, "loss": 0.5767, "step": 1258 }, { "epoch": 0.5160100414980275, "grad_norm": 0.9192409907263347, "learning_rate": 4.983395418186732e-06, "loss": 0.6512, "step": 1259 }, { "epoch": 0.5164198985603771, "grad_norm": 0.8775347799501683, "learning_rate": 4.976753626481225e-06, "loss": 0.6314, "step": 1260 }, { "epoch": 0.5168297556227266, "grad_norm": 0.8802228813437273, "learning_rate": 4.970111875795428e-06, "loss": 0.5712, "step": 1261 }, { "epoch": 0.5172396126850761, "grad_norm": 0.8447322088628753, "learning_rate": 4.963470177849135e-06, "loss": 0.6153, "step": 1262 }, { "epoch": 0.5176494697474255, "grad_norm": 0.9359849994712266, "learning_rate": 4.95682854436205e-06, "loss": 0.6255, "step": 1263 }, { "epoch": 0.5180593268097751, "grad_norm": 0.843848502677891, "learning_rate": 4.950186987053755e-06, "loss": 0.6295, "step": 1264 }, { "epoch": 0.5184691838721246, "grad_norm": 0.8270801784518665, "learning_rate": 4.943545517643707e-06, "loss": 0.6483, "step": 1265 }, { "epoch": 0.5188790409344741, "grad_norm": 0.9114451221769354, "learning_rate": 4.936904147851199e-06, "loss": 0.5811, "step": 1266 }, { "epoch": 0.5192888979968237, "grad_norm": 0.8952016553945555, "learning_rate": 4.930262889395356e-06, "loss": 0.5844, "step": 1267 }, { "epoch": 0.5196987550591731, "grad_norm": 0.8876683141957712, "learning_rate": 4.9236217539951e-06, "loss": 0.6066, "step": 1268 }, { "epoch": 0.5201086121215226, "grad_norm": 0.9385628152502488, "learning_rate": 4.916980753369141e-06, "loss": 0.6525, "step": 1269 }, { "epoch": 0.5205184691838721, "grad_norm": 0.9238561342799797, "learning_rate": 4.910339899235947e-06, "loss": 0.6049, "step": 1270 }, { "epoch": 0.5209283262462217, "grad_norm": 0.9219639808254085, "learning_rate": 4.90369920331373e-06, "loss": 0.6057, "step": 1271 }, { "epoch": 0.5213381833085712, "grad_norm": 0.9246228491500559, "learning_rate": 4.897058677320425e-06, "loss": 0.6073, "step": 1272 }, { "epoch": 0.5217480403709206, "grad_norm": 0.8584061030770449, "learning_rate": 4.89041833297366e-06, "loss": 0.5939, "step": 1273 }, { "epoch": 0.5221578974332701, "grad_norm": 0.9302601545079059, "learning_rate": 4.883778181990752e-06, "loss": 0.6455, "step": 1274 }, { "epoch": 0.5225677544956197, "grad_norm": 0.7979070630237438, "learning_rate": 4.877138236088666e-06, "loss": 0.5701, "step": 1275 }, { "epoch": 0.5229776115579692, "grad_norm": 0.9039040286007842, "learning_rate": 4.8704985069840165e-06, "loss": 0.6075, "step": 1276 }, { "epoch": 0.5233874686203187, "grad_norm": 0.8875856467520153, "learning_rate": 4.8638590063930265e-06, "loss": 0.6154, "step": 1277 }, { "epoch": 0.5237973256826681, "grad_norm": 0.9133495218337578, "learning_rate": 4.85721974603152e-06, "loss": 0.6304, "step": 1278 }, { "epoch": 0.5242071827450177, "grad_norm": 0.9148213610785779, "learning_rate": 4.850580737614896e-06, "loss": 0.6197, "step": 1279 }, { "epoch": 0.5246170398073672, "grad_norm": 0.9036499192911676, "learning_rate": 4.843941992858107e-06, "loss": 0.6121, "step": 1280 }, { "epoch": 0.5250268968697167, "grad_norm": 0.9125378403496753, "learning_rate": 4.837303523475648e-06, "loss": 0.6376, "step": 1281 }, { "epoch": 0.5254367539320662, "grad_norm": 0.9034491859103583, "learning_rate": 4.830665341181516e-06, "loss": 0.6024, "step": 1282 }, { "epoch": 0.5258466109944157, "grad_norm": 0.9067456703562705, "learning_rate": 4.824027457689214e-06, "loss": 0.6473, "step": 1283 }, { "epoch": 0.5262564680567652, "grad_norm": 0.9736640691392519, "learning_rate": 4.817389884711706e-06, "loss": 0.5922, "step": 1284 }, { "epoch": 0.5266663251191147, "grad_norm": 0.905788660768375, "learning_rate": 4.810752633961418e-06, "loss": 0.6168, "step": 1285 }, { "epoch": 0.5270761821814642, "grad_norm": 0.9215561191135068, "learning_rate": 4.8041157171502e-06, "loss": 0.5845, "step": 1286 }, { "epoch": 0.5274860392438138, "grad_norm": 0.8896638094171557, "learning_rate": 4.797479145989319e-06, "loss": 0.622, "step": 1287 }, { "epoch": 0.5278958963061632, "grad_norm": 0.9171656787328262, "learning_rate": 4.790842932189425e-06, "loss": 0.6114, "step": 1288 }, { "epoch": 0.5283057533685127, "grad_norm": 0.9043503918545099, "learning_rate": 4.784207087460543e-06, "loss": 0.645, "step": 1289 }, { "epoch": 0.5287156104308622, "grad_norm": 0.9025516953275236, "learning_rate": 4.777571623512047e-06, "loss": 0.5886, "step": 1290 }, { "epoch": 0.5291254674932118, "grad_norm": 0.9053265756326917, "learning_rate": 4.770936552052634e-06, "loss": 0.6229, "step": 1291 }, { "epoch": 0.5295353245555613, "grad_norm": 0.9019337670609221, "learning_rate": 4.764301884790316e-06, "loss": 0.6054, "step": 1292 }, { "epoch": 0.5299451816179107, "grad_norm": 0.8971810066240222, "learning_rate": 4.757667633432383e-06, "loss": 0.5873, "step": 1293 }, { "epoch": 0.5303550386802602, "grad_norm": 0.9518747370193238, "learning_rate": 4.751033809685397e-06, "loss": 0.6535, "step": 1294 }, { "epoch": 0.5307648957426098, "grad_norm": 0.8574319611519631, "learning_rate": 4.744400425255165e-06, "loss": 0.5798, "step": 1295 }, { "epoch": 0.5311747528049593, "grad_norm": 0.9242058570016168, "learning_rate": 4.7377674918467175e-06, "loss": 0.5717, "step": 1296 }, { "epoch": 0.5315846098673088, "grad_norm": 0.9437016201446258, "learning_rate": 4.731135021164286e-06, "loss": 0.6041, "step": 1297 }, { "epoch": 0.5319944669296582, "grad_norm": 0.870786725817425, "learning_rate": 4.7245030249112925e-06, "loss": 0.6189, "step": 1298 }, { "epoch": 0.5324043239920078, "grad_norm": 0.8104401011547898, "learning_rate": 4.717871514790319e-06, "loss": 0.5588, "step": 1299 }, { "epoch": 0.5328141810543573, "grad_norm": 0.8850968604448693, "learning_rate": 4.7112405025030855e-06, "loss": 0.6621, "step": 1300 }, { "epoch": 0.5332240381167068, "grad_norm": 0.9235485856878897, "learning_rate": 4.70460999975044e-06, "loss": 0.596, "step": 1301 }, { "epoch": 0.5336338951790564, "grad_norm": 0.8742994967634934, "learning_rate": 4.697980018232326e-06, "loss": 0.6129, "step": 1302 }, { "epoch": 0.5340437522414058, "grad_norm": 0.9094252155465166, "learning_rate": 4.691350569647771e-06, "loss": 0.6251, "step": 1303 }, { "epoch": 0.5344536093037553, "grad_norm": 0.8410972082547192, "learning_rate": 4.684721665694859e-06, "loss": 0.6029, "step": 1304 }, { "epoch": 0.5348634663661048, "grad_norm": 0.9264148535161596, "learning_rate": 4.678093318070715e-06, "loss": 0.6694, "step": 1305 }, { "epoch": 0.5352733234284544, "grad_norm": 0.9324284368488158, "learning_rate": 4.671465538471487e-06, "loss": 0.6145, "step": 1306 }, { "epoch": 0.5356831804908039, "grad_norm": 0.9159955855030233, "learning_rate": 4.664838338592307e-06, "loss": 0.554, "step": 1307 }, { "epoch": 0.5360930375531533, "grad_norm": 0.8713178669498598, "learning_rate": 4.658211730127301e-06, "loss": 0.5538, "step": 1308 }, { "epoch": 0.5365028946155028, "grad_norm": 0.9031789364782815, "learning_rate": 4.651585724769537e-06, "loss": 0.5982, "step": 1309 }, { "epoch": 0.5369127516778524, "grad_norm": 0.9208726638312602, "learning_rate": 4.64496033421103e-06, "loss": 0.628, "step": 1310 }, { "epoch": 0.5373226087402019, "grad_norm": 0.8973307673411189, "learning_rate": 4.638335570142698e-06, "loss": 0.5564, "step": 1311 }, { "epoch": 0.5377324658025514, "grad_norm": 0.9234742353166404, "learning_rate": 4.631711444254368e-06, "loss": 0.6139, "step": 1312 }, { "epoch": 0.5381423228649008, "grad_norm": 0.958432256569356, "learning_rate": 4.6250879682347285e-06, "loss": 0.6362, "step": 1313 }, { "epoch": 0.5385521799272504, "grad_norm": 0.91223622619854, "learning_rate": 4.618465153771327e-06, "loss": 0.6209, "step": 1314 }, { "epoch": 0.5389620369895999, "grad_norm": 0.9835603432901022, "learning_rate": 4.611843012550546e-06, "loss": 0.6594, "step": 1315 }, { "epoch": 0.5393718940519494, "grad_norm": 0.8941930656951431, "learning_rate": 4.6052215562575745e-06, "loss": 0.5757, "step": 1316 }, { "epoch": 0.5397817511142989, "grad_norm": 0.9027757014417672, "learning_rate": 4.598600796576395e-06, "loss": 0.6105, "step": 1317 }, { "epoch": 0.5401916081766484, "grad_norm": 0.8542765786497386, "learning_rate": 4.591980745189762e-06, "loss": 0.5712, "step": 1318 }, { "epoch": 0.5406014652389979, "grad_norm": 0.9646718018884227, "learning_rate": 4.585361413779179e-06, "loss": 0.6141, "step": 1319 }, { "epoch": 0.5410113223013474, "grad_norm": 0.8359309593646923, "learning_rate": 4.578742814024878e-06, "loss": 0.6267, "step": 1320 }, { "epoch": 0.5414211793636969, "grad_norm": 0.8435733335271379, "learning_rate": 4.572124957605803e-06, "loss": 0.5899, "step": 1321 }, { "epoch": 0.5418310364260465, "grad_norm": 0.8200867769816164, "learning_rate": 4.565507856199582e-06, "loss": 0.541, "step": 1322 }, { "epoch": 0.5422408934883959, "grad_norm": 0.9466449565013993, "learning_rate": 4.5588915214825154e-06, "loss": 0.596, "step": 1323 }, { "epoch": 0.5426507505507454, "grad_norm": 0.9477241946335815, "learning_rate": 4.552275965129551e-06, "loss": 0.6427, "step": 1324 }, { "epoch": 0.5430606076130949, "grad_norm": 0.8650124629012952, "learning_rate": 4.545661198814257e-06, "loss": 0.5779, "step": 1325 }, { "epoch": 0.5434704646754445, "grad_norm": 0.9496796961049592, "learning_rate": 4.539047234208812e-06, "loss": 0.6882, "step": 1326 }, { "epoch": 0.543880321737794, "grad_norm": 0.8860585698176044, "learning_rate": 4.5324340829839785e-06, "loss": 0.6245, "step": 1327 }, { "epoch": 0.5442901788001434, "grad_norm": 0.8956760224718001, "learning_rate": 4.525821756809088e-06, "loss": 0.5956, "step": 1328 }, { "epoch": 0.5447000358624929, "grad_norm": 0.8956714884052235, "learning_rate": 4.519210267352007e-06, "loss": 0.6007, "step": 1329 }, { "epoch": 0.5451098929248425, "grad_norm": 1.0109991428810816, "learning_rate": 4.512599626279138e-06, "loss": 0.6706, "step": 1330 }, { "epoch": 0.545519749987192, "grad_norm": 0.9228802720127786, "learning_rate": 4.505989845255373e-06, "loss": 0.6214, "step": 1331 }, { "epoch": 0.5459296070495415, "grad_norm": 0.8306054475854066, "learning_rate": 4.4993809359440975e-06, "loss": 0.6303, "step": 1332 }, { "epoch": 0.5463394641118909, "grad_norm": 0.8893570669528911, "learning_rate": 4.492772910007152e-06, "loss": 0.6207, "step": 1333 }, { "epoch": 0.5467493211742405, "grad_norm": 0.854509801929049, "learning_rate": 4.486165779104822e-06, "loss": 0.6031, "step": 1334 }, { "epoch": 0.54715917823659, "grad_norm": 0.8902547452733099, "learning_rate": 4.479559554895812e-06, "loss": 0.6541, "step": 1335 }, { "epoch": 0.5475690352989395, "grad_norm": 0.911058329346076, "learning_rate": 4.472954249037224e-06, "loss": 0.6399, "step": 1336 }, { "epoch": 0.547978892361289, "grad_norm": 0.924269371099788, "learning_rate": 4.466349873184548e-06, "loss": 0.6071, "step": 1337 }, { "epoch": 0.5483887494236385, "grad_norm": 0.9040955544231293, "learning_rate": 4.4597464389916206e-06, "loss": 0.6198, "step": 1338 }, { "epoch": 0.548798606485988, "grad_norm": 0.8190140593233403, "learning_rate": 4.4531439581106295e-06, "loss": 0.5958, "step": 1339 }, { "epoch": 0.5492084635483375, "grad_norm": 0.8981640429951159, "learning_rate": 4.446542442192069e-06, "loss": 0.6098, "step": 1340 }, { "epoch": 0.549618320610687, "grad_norm": 0.8810810638657716, "learning_rate": 4.439941902884739e-06, "loss": 0.6008, "step": 1341 }, { "epoch": 0.5500281776730366, "grad_norm": 0.9202884565928008, "learning_rate": 4.433342351835714e-06, "loss": 0.578, "step": 1342 }, { "epoch": 0.550438034735386, "grad_norm": 0.9253829222672632, "learning_rate": 4.426743800690318e-06, "loss": 0.6725, "step": 1343 }, { "epoch": 0.5508478917977355, "grad_norm": 0.9033415891266737, "learning_rate": 4.4201462610921235e-06, "loss": 0.6041, "step": 1344 }, { "epoch": 0.551257748860085, "grad_norm": 0.871966852702791, "learning_rate": 4.4135497446829044e-06, "loss": 0.5895, "step": 1345 }, { "epoch": 0.5516676059224346, "grad_norm": 0.8873304788987425, "learning_rate": 4.406954263102641e-06, "loss": 0.5532, "step": 1346 }, { "epoch": 0.5520774629847841, "grad_norm": 0.8787376651404979, "learning_rate": 4.400359827989475e-06, "loss": 0.597, "step": 1347 }, { "epoch": 0.5524873200471335, "grad_norm": 0.9082982875240359, "learning_rate": 4.3937664509797175e-06, "loss": 0.5836, "step": 1348 }, { "epoch": 0.552897177109483, "grad_norm": 0.8987282901984691, "learning_rate": 4.387174143707794e-06, "loss": 0.6338, "step": 1349 }, { "epoch": 0.5533070341718326, "grad_norm": 0.9192251135295589, "learning_rate": 4.38058291780626e-06, "loss": 0.6211, "step": 1350 }, { "epoch": 0.5537168912341821, "grad_norm": 0.8814440847381059, "learning_rate": 4.373992784905753e-06, "loss": 0.5713, "step": 1351 }, { "epoch": 0.5541267482965316, "grad_norm": 0.8262089871294825, "learning_rate": 4.367403756634979e-06, "loss": 0.5812, "step": 1352 }, { "epoch": 0.554536605358881, "grad_norm": 0.799429526603619, "learning_rate": 4.360815844620704e-06, "loss": 0.5916, "step": 1353 }, { "epoch": 0.5549464624212306, "grad_norm": 0.8632033796714843, "learning_rate": 4.354229060487718e-06, "loss": 0.6506, "step": 1354 }, { "epoch": 0.5553563194835801, "grad_norm": 0.9042869418215003, "learning_rate": 4.347643415858825e-06, "loss": 0.6556, "step": 1355 }, { "epoch": 0.5557661765459296, "grad_norm": 0.9114514359995793, "learning_rate": 4.341058922354811e-06, "loss": 0.582, "step": 1356 }, { "epoch": 0.5561760336082792, "grad_norm": 0.9333252953814914, "learning_rate": 4.33447559159444e-06, "loss": 0.6284, "step": 1357 }, { "epoch": 0.5565858906706286, "grad_norm": 0.913110066055317, "learning_rate": 4.327893435194418e-06, "loss": 0.5613, "step": 1358 }, { "epoch": 0.5569957477329781, "grad_norm": 0.8879204200859762, "learning_rate": 4.32131246476938e-06, "loss": 0.5917, "step": 1359 }, { "epoch": 0.5574056047953276, "grad_norm": 0.8548733869086996, "learning_rate": 4.314732691931871e-06, "loss": 0.607, "step": 1360 }, { "epoch": 0.5578154618576772, "grad_norm": 0.833823048094666, "learning_rate": 4.308154128292318e-06, "loss": 0.6088, "step": 1361 }, { "epoch": 0.5582253189200267, "grad_norm": 0.9344598772550454, "learning_rate": 4.30157678545902e-06, "loss": 0.612, "step": 1362 }, { "epoch": 0.5586351759823761, "grad_norm": 0.9089051989286111, "learning_rate": 4.295000675038116e-06, "loss": 0.6461, "step": 1363 }, { "epoch": 0.5590450330447256, "grad_norm": 0.8868857370416103, "learning_rate": 4.2884258086335755e-06, "loss": 0.6199, "step": 1364 }, { "epoch": 0.5594548901070752, "grad_norm": 0.872361053062591, "learning_rate": 4.281852197847167e-06, "loss": 0.608, "step": 1365 }, { "epoch": 0.5598647471694247, "grad_norm": 0.8911161187264081, "learning_rate": 4.275279854278451e-06, "loss": 0.5525, "step": 1366 }, { "epoch": 0.5602746042317742, "grad_norm": 0.8604581130384225, "learning_rate": 4.2687087895247444e-06, "loss": 0.5671, "step": 1367 }, { "epoch": 0.5606844612941236, "grad_norm": 0.9267328792917775, "learning_rate": 4.262139015181112e-06, "loss": 0.6147, "step": 1368 }, { "epoch": 0.5610943183564732, "grad_norm": 0.8611696460777428, "learning_rate": 4.255570542840342e-06, "loss": 0.6445, "step": 1369 }, { "epoch": 0.5615041754188227, "grad_norm": 0.8764819796139601, "learning_rate": 4.249003384092919e-06, "loss": 0.5666, "step": 1370 }, { "epoch": 0.5619140324811722, "grad_norm": 0.9395246474591258, "learning_rate": 4.242437550527021e-06, "loss": 0.5926, "step": 1371 }, { "epoch": 0.5623238895435217, "grad_norm": 0.8977271935869883, "learning_rate": 4.235873053728475e-06, "loss": 0.6419, "step": 1372 }, { "epoch": 0.5627337466058712, "grad_norm": 0.8578802771478791, "learning_rate": 4.22930990528076e-06, "loss": 0.588, "step": 1373 }, { "epoch": 0.5631436036682207, "grad_norm": 0.9265113619466834, "learning_rate": 4.222748116764967e-06, "loss": 0.6363, "step": 1374 }, { "epoch": 0.5635534607305702, "grad_norm": 0.8527047883524943, "learning_rate": 4.2161876997597936e-06, "loss": 0.604, "step": 1375 }, { "epoch": 0.5639633177929197, "grad_norm": 0.849110748849593, "learning_rate": 4.2096286658415116e-06, "loss": 0.5588, "step": 1376 }, { "epoch": 0.5643731748552693, "grad_norm": 0.9323628185858212, "learning_rate": 4.203071026583958e-06, "loss": 0.5936, "step": 1377 }, { "epoch": 0.5647830319176187, "grad_norm": 0.9381230906273655, "learning_rate": 4.196514793558508e-06, "loss": 0.6098, "step": 1378 }, { "epoch": 0.5651928889799682, "grad_norm": 0.850584536720403, "learning_rate": 4.18995997833405e-06, "loss": 0.6236, "step": 1379 }, { "epoch": 0.5656027460423177, "grad_norm": 0.952476241269481, "learning_rate": 4.183406592476978e-06, "loss": 0.6263, "step": 1380 }, { "epoch": 0.5660126031046673, "grad_norm": 0.8950838966337443, "learning_rate": 4.176854647551158e-06, "loss": 0.5745, "step": 1381 }, { "epoch": 0.5664224601670168, "grad_norm": 0.8841947417588049, "learning_rate": 4.170304155117914e-06, "loss": 0.5977, "step": 1382 }, { "epoch": 0.5668323172293662, "grad_norm": 0.863092429745221, "learning_rate": 4.163755126736011e-06, "loss": 0.5848, "step": 1383 }, { "epoch": 0.5672421742917158, "grad_norm": 0.9271238606074361, "learning_rate": 4.157207573961627e-06, "loss": 0.6254, "step": 1384 }, { "epoch": 0.5676520313540653, "grad_norm": 0.9197228427253256, "learning_rate": 4.15066150834834e-06, "loss": 0.5845, "step": 1385 }, { "epoch": 0.5680618884164148, "grad_norm": 0.7832231619214584, "learning_rate": 4.144116941447096e-06, "loss": 0.5541, "step": 1386 }, { "epoch": 0.5684717454787643, "grad_norm": 0.9292590198769374, "learning_rate": 4.137573884806207e-06, "loss": 0.6092, "step": 1387 }, { "epoch": 0.5688816025411138, "grad_norm": 0.8721117486012088, "learning_rate": 4.13103234997131e-06, "loss": 0.6114, "step": 1388 }, { "epoch": 0.5692914596034633, "grad_norm": 0.8711061834383341, "learning_rate": 4.124492348485366e-06, "loss": 0.582, "step": 1389 }, { "epoch": 0.5697013166658128, "grad_norm": 0.7788195010820959, "learning_rate": 4.1179538918886205e-06, "loss": 0.5758, "step": 1390 }, { "epoch": 0.5701111737281623, "grad_norm": 0.8531423753017332, "learning_rate": 4.111416991718601e-06, "loss": 0.59, "step": 1391 }, { "epoch": 0.5705210307905119, "grad_norm": 0.8620062487728033, "learning_rate": 4.104881659510084e-06, "loss": 0.5511, "step": 1392 }, { "epoch": 0.5709308878528613, "grad_norm": 0.825016922590123, "learning_rate": 4.09834790679508e-06, "loss": 0.6119, "step": 1393 }, { "epoch": 0.5713407449152108, "grad_norm": 0.8636707820626904, "learning_rate": 4.091815745102818e-06, "loss": 0.5993, "step": 1394 }, { "epoch": 0.5717506019775603, "grad_norm": 0.8610311535691259, "learning_rate": 4.08528518595971e-06, "loss": 0.6014, "step": 1395 }, { "epoch": 0.5721604590399099, "grad_norm": 0.9302571469041263, "learning_rate": 4.0787562408893485e-06, "loss": 0.6501, "step": 1396 }, { "epoch": 0.5725703161022594, "grad_norm": 0.8839848089444177, "learning_rate": 4.072228921412471e-06, "loss": 0.5676, "step": 1397 }, { "epoch": 0.5729801731646088, "grad_norm": 0.8239194507418837, "learning_rate": 4.065703239046951e-06, "loss": 0.5772, "step": 1398 }, { "epoch": 0.5733900302269583, "grad_norm": 0.939745228380715, "learning_rate": 4.059179205307773e-06, "loss": 0.599, "step": 1399 }, { "epoch": 0.5737998872893079, "grad_norm": 0.9195197371930826, "learning_rate": 4.052656831707012e-06, "loss": 0.6068, "step": 1400 }, { "epoch": 0.5742097443516574, "grad_norm": 0.9301455462954117, "learning_rate": 4.046136129753809e-06, "loss": 0.6847, "step": 1401 }, { "epoch": 0.5746196014140069, "grad_norm": 0.8541984074749708, "learning_rate": 4.039617110954362e-06, "loss": 0.5823, "step": 1402 }, { "epoch": 0.5750294584763563, "grad_norm": 0.9297759719413224, "learning_rate": 4.033099786811899e-06, "loss": 0.6416, "step": 1403 }, { "epoch": 0.5754393155387059, "grad_norm": 0.8397358962663878, "learning_rate": 4.02658416882665e-06, "loss": 0.6006, "step": 1404 }, { "epoch": 0.5758491726010554, "grad_norm": 0.8425948760611377, "learning_rate": 4.020070268495844e-06, "loss": 0.6154, "step": 1405 }, { "epoch": 0.5762590296634049, "grad_norm": 0.9204582726393317, "learning_rate": 4.01355809731367e-06, "loss": 0.6301, "step": 1406 }, { "epoch": 0.5766688867257544, "grad_norm": 0.9208058205292039, "learning_rate": 4.007047666771274e-06, "loss": 0.6409, "step": 1407 }, { "epoch": 0.5770787437881039, "grad_norm": 0.8995612509700791, "learning_rate": 4.000538988356723e-06, "loss": 0.6291, "step": 1408 }, { "epoch": 0.5774886008504534, "grad_norm": 0.9280752767095183, "learning_rate": 3.9940320735550005e-06, "loss": 0.6312, "step": 1409 }, { "epoch": 0.5778984579128029, "grad_norm": 0.8800314853762585, "learning_rate": 3.987526933847969e-06, "loss": 0.6374, "step": 1410 }, { "epoch": 0.5783083149751524, "grad_norm": 0.8939235426726458, "learning_rate": 3.981023580714364e-06, "loss": 0.5848, "step": 1411 }, { "epoch": 0.578718172037502, "grad_norm": 0.8716709472540803, "learning_rate": 3.974522025629771e-06, "loss": 0.5788, "step": 1412 }, { "epoch": 0.5791280290998514, "grad_norm": 0.9632219475484204, "learning_rate": 3.9680222800665974e-06, "loss": 0.6475, "step": 1413 }, { "epoch": 0.5795378861622009, "grad_norm": 0.8247852565767617, "learning_rate": 3.9615243554940595e-06, "loss": 0.5519, "step": 1414 }, { "epoch": 0.5799477432245504, "grad_norm": 0.9201197083296649, "learning_rate": 3.95502826337816e-06, "loss": 0.644, "step": 1415 }, { "epoch": 0.5803576002869, "grad_norm": 0.9269750065919564, "learning_rate": 3.948534015181671e-06, "loss": 0.6285, "step": 1416 }, { "epoch": 0.5807674573492494, "grad_norm": 0.9069114896384848, "learning_rate": 3.942041622364103e-06, "loss": 0.6185, "step": 1417 }, { "epoch": 0.5811773144115989, "grad_norm": 0.904374399043228, "learning_rate": 3.935551096381705e-06, "loss": 0.5999, "step": 1418 }, { "epoch": 0.5815871714739484, "grad_norm": 0.8811716562840162, "learning_rate": 3.929062448687418e-06, "loss": 0.5844, "step": 1419 }, { "epoch": 0.581997028536298, "grad_norm": 0.9413814386029583, "learning_rate": 3.922575690730882e-06, "loss": 0.6021, "step": 1420 }, { "epoch": 0.5824068855986475, "grad_norm": 0.9585044591744503, "learning_rate": 3.916090833958391e-06, "loss": 0.5459, "step": 1421 }, { "epoch": 0.5828167426609969, "grad_norm": 0.9366256192667047, "learning_rate": 3.909607889812892e-06, "loss": 0.5995, "step": 1422 }, { "epoch": 0.5832265997233465, "grad_norm": 0.9199689738552599, "learning_rate": 3.903126869733955e-06, "loss": 0.6275, "step": 1423 }, { "epoch": 0.583636456785696, "grad_norm": 0.8919296955552073, "learning_rate": 3.89664778515775e-06, "loss": 0.6238, "step": 1424 }, { "epoch": 0.5840463138480455, "grad_norm": 0.82894982013289, "learning_rate": 3.8901706475170395e-06, "loss": 0.6338, "step": 1425 }, { "epoch": 0.584456170910395, "grad_norm": 0.9266480962344984, "learning_rate": 3.883695468241144e-06, "loss": 0.5945, "step": 1426 }, { "epoch": 0.5848660279727445, "grad_norm": 0.8231406759631504, "learning_rate": 3.8772222587559345e-06, "loss": 0.5779, "step": 1427 }, { "epoch": 0.585275885035094, "grad_norm": 0.893671163960851, "learning_rate": 3.8707510304838e-06, "loss": 0.6334, "step": 1428 }, { "epoch": 0.5856857420974435, "grad_norm": 0.8984970075574249, "learning_rate": 3.8642817948436374e-06, "loss": 0.5982, "step": 1429 }, { "epoch": 0.586095599159793, "grad_norm": 1.0108691917601638, "learning_rate": 3.857814563250828e-06, "loss": 0.6625, "step": 1430 }, { "epoch": 0.5865054562221426, "grad_norm": 0.8736435555404135, "learning_rate": 3.851349347117211e-06, "loss": 0.5986, "step": 1431 }, { "epoch": 0.586915313284492, "grad_norm": 0.8803716697892656, "learning_rate": 3.844886157851078e-06, "loss": 0.5664, "step": 1432 }, { "epoch": 0.5873251703468415, "grad_norm": 0.8600954774964656, "learning_rate": 3.838425006857135e-06, "loss": 0.6175, "step": 1433 }, { "epoch": 0.587735027409191, "grad_norm": 0.9311547066733731, "learning_rate": 3.8319659055365016e-06, "loss": 0.6503, "step": 1434 }, { "epoch": 0.5881448844715406, "grad_norm": 0.9067878405051956, "learning_rate": 3.825508865286669e-06, "loss": 0.6585, "step": 1435 }, { "epoch": 0.5885547415338901, "grad_norm": 0.8305117895119674, "learning_rate": 3.8190538975015015e-06, "loss": 0.5927, "step": 1436 }, { "epoch": 0.5889645985962395, "grad_norm": 0.8934716553863579, "learning_rate": 3.8126010135711987e-06, "loss": 0.616, "step": 1437 }, { "epoch": 0.589374455658589, "grad_norm": 0.9165536333671126, "learning_rate": 3.80615022488229e-06, "loss": 0.5626, "step": 1438 }, { "epoch": 0.5897843127209386, "grad_norm": 0.8674815278006818, "learning_rate": 3.799701542817604e-06, "loss": 0.6238, "step": 1439 }, { "epoch": 0.5901941697832881, "grad_norm": 0.8682313820597992, "learning_rate": 3.7932549787562493e-06, "loss": 0.5902, "step": 1440 }, { "epoch": 0.5906040268456376, "grad_norm": 0.9154569200843851, "learning_rate": 3.7868105440736042e-06, "loss": 0.6181, "step": 1441 }, { "epoch": 0.591013883907987, "grad_norm": 0.9479887655071425, "learning_rate": 3.7803682501412814e-06, "loss": 0.5917, "step": 1442 }, { "epoch": 0.5914237409703366, "grad_norm": 0.9110163487484266, "learning_rate": 3.773928108327124e-06, "loss": 0.6025, "step": 1443 }, { "epoch": 0.5918335980326861, "grad_norm": 0.854623364073732, "learning_rate": 3.7674901299951706e-06, "loss": 0.5754, "step": 1444 }, { "epoch": 0.5922434550950356, "grad_norm": 0.8611076334614156, "learning_rate": 3.761054326505648e-06, "loss": 0.5521, "step": 1445 }, { "epoch": 0.5926533121573851, "grad_norm": 0.7913098719956619, "learning_rate": 3.754620709214941e-06, "loss": 0.5965, "step": 1446 }, { "epoch": 0.5930631692197346, "grad_norm": 0.8879101652978972, "learning_rate": 3.7481892894755776e-06, "loss": 0.6377, "step": 1447 }, { "epoch": 0.5934730262820841, "grad_norm": 0.8769695968336179, "learning_rate": 3.7417600786362134e-06, "loss": 0.6121, "step": 1448 }, { "epoch": 0.5938828833444336, "grad_norm": 0.9238080182383583, "learning_rate": 3.7353330880415963e-06, "loss": 0.6092, "step": 1449 }, { "epoch": 0.5942927404067831, "grad_norm": 0.949191066095733, "learning_rate": 3.7289083290325668e-06, "loss": 0.6426, "step": 1450 }, { "epoch": 0.5947025974691327, "grad_norm": 0.8520740849353962, "learning_rate": 3.7224858129460174e-06, "loss": 0.5856, "step": 1451 }, { "epoch": 0.5951124545314821, "grad_norm": 0.9258413805886108, "learning_rate": 3.7160655511148936e-06, "loss": 0.6272, "step": 1452 }, { "epoch": 0.5955223115938316, "grad_norm": 0.9180655596764429, "learning_rate": 3.7096475548681532e-06, "loss": 0.6434, "step": 1453 }, { "epoch": 0.5959321686561811, "grad_norm": 0.9047590124052799, "learning_rate": 3.703231835530765e-06, "loss": 0.5868, "step": 1454 }, { "epoch": 0.5963420257185307, "grad_norm": 0.871998227022875, "learning_rate": 3.696818404423672e-06, "loss": 0.6338, "step": 1455 }, { "epoch": 0.5967518827808802, "grad_norm": 0.9483419558295044, "learning_rate": 3.6904072728637843e-06, "loss": 0.6331, "step": 1456 }, { "epoch": 0.5971617398432296, "grad_norm": 0.8487497142158914, "learning_rate": 3.6839984521639556e-06, "loss": 0.616, "step": 1457 }, { "epoch": 0.5975715969055791, "grad_norm": 0.8664489229958486, "learning_rate": 3.677591953632955e-06, "loss": 0.5908, "step": 1458 }, { "epoch": 0.5979814539679287, "grad_norm": 0.926086786034206, "learning_rate": 3.671187788575464e-06, "loss": 0.6526, "step": 1459 }, { "epoch": 0.5983913110302782, "grad_norm": 0.8519982139697357, "learning_rate": 3.664785968292036e-06, "loss": 0.5478, "step": 1460 }, { "epoch": 0.5988011680926277, "grad_norm": 0.8847370370617151, "learning_rate": 3.6583865040790965e-06, "loss": 0.5554, "step": 1461 }, { "epoch": 0.5992110251549772, "grad_norm": 0.9010146400356691, "learning_rate": 3.6519894072289053e-06, "loss": 0.5391, "step": 1462 }, { "epoch": 0.5996208822173267, "grad_norm": 0.9508541656080028, "learning_rate": 3.6455946890295513e-06, "loss": 0.632, "step": 1463 }, { "epoch": 0.6000307392796762, "grad_norm": 0.9241386092497408, "learning_rate": 3.6392023607649215e-06, "loss": 0.6162, "step": 1464 }, { "epoch": 0.6004405963420257, "grad_norm": 0.8766790055980226, "learning_rate": 3.6328124337146873e-06, "loss": 0.575, "step": 1465 }, { "epoch": 0.6008504534043753, "grad_norm": 0.807998572766778, "learning_rate": 3.6264249191542866e-06, "loss": 0.5711, "step": 1466 }, { "epoch": 0.6012603104667247, "grad_norm": 0.8588067139726856, "learning_rate": 3.6200398283548934e-06, "loss": 0.5728, "step": 1467 }, { "epoch": 0.6016701675290742, "grad_norm": 0.943186071451963, "learning_rate": 3.613657172583412e-06, "loss": 0.6138, "step": 1468 }, { "epoch": 0.6020800245914237, "grad_norm": 0.8436515797077088, "learning_rate": 3.607276963102442e-06, "loss": 0.5869, "step": 1469 }, { "epoch": 0.6024898816537733, "grad_norm": 0.8932340179940947, "learning_rate": 3.600899211170275e-06, "loss": 0.5889, "step": 1470 }, { "epoch": 0.6028997387161228, "grad_norm": 0.8676002482820034, "learning_rate": 3.5945239280408596e-06, "loss": 0.5614, "step": 1471 }, { "epoch": 0.6033095957784722, "grad_norm": 0.8744225983755127, "learning_rate": 3.588151124963789e-06, "loss": 0.6096, "step": 1472 }, { "epoch": 0.6037194528408217, "grad_norm": 0.9383150903969648, "learning_rate": 3.581780813184286e-06, "loss": 0.642, "step": 1473 }, { "epoch": 0.6041293099031713, "grad_norm": 0.8714664015419062, "learning_rate": 3.575413003943167e-06, "loss": 0.6221, "step": 1474 }, { "epoch": 0.6045391669655208, "grad_norm": 0.8377524671253196, "learning_rate": 3.569047708476844e-06, "loss": 0.5923, "step": 1475 }, { "epoch": 0.6049490240278703, "grad_norm": 0.9284021330679016, "learning_rate": 3.5626849380172824e-06, "loss": 0.6127, "step": 1476 }, { "epoch": 0.6053588810902197, "grad_norm": 0.876549485885289, "learning_rate": 3.556324703792002e-06, "loss": 0.6138, "step": 1477 }, { "epoch": 0.6057687381525693, "grad_norm": 0.874155762754558, "learning_rate": 3.5499670170240396e-06, "loss": 0.6168, "step": 1478 }, { "epoch": 0.6061785952149188, "grad_norm": 0.8619607434788205, "learning_rate": 3.5436118889319414e-06, "loss": 0.6114, "step": 1479 }, { "epoch": 0.6065884522772683, "grad_norm": 0.8805913047468303, "learning_rate": 3.5372593307297355e-06, "loss": 0.6244, "step": 1480 }, { "epoch": 0.6069983093396178, "grad_norm": 0.8814405961407834, "learning_rate": 3.530909353626916e-06, "loss": 0.566, "step": 1481 }, { "epoch": 0.6074081664019673, "grad_norm": 0.9013203262752785, "learning_rate": 3.5245619688284277e-06, "loss": 0.6099, "step": 1482 }, { "epoch": 0.6078180234643168, "grad_norm": 0.8981527943983368, "learning_rate": 3.5182171875346318e-06, "loss": 0.6312, "step": 1483 }, { "epoch": 0.6082278805266663, "grad_norm": 0.8880279448135808, "learning_rate": 3.5118750209413046e-06, "loss": 0.635, "step": 1484 }, { "epoch": 0.6086377375890158, "grad_norm": 1.0112534739850612, "learning_rate": 3.5055354802396003e-06, "loss": 0.5707, "step": 1485 }, { "epoch": 0.6090475946513654, "grad_norm": 0.8929590058117503, "learning_rate": 3.4991985766160456e-06, "loss": 0.6193, "step": 1486 }, { "epoch": 0.6094574517137148, "grad_norm": 0.8840358548025935, "learning_rate": 3.4928643212525125e-06, "loss": 0.6231, "step": 1487 }, { "epoch": 0.6098673087760643, "grad_norm": 0.7924976073503969, "learning_rate": 3.486532725326199e-06, "loss": 0.583, "step": 1488 }, { "epoch": 0.6102771658384138, "grad_norm": 0.9023076420579409, "learning_rate": 3.4802038000096087e-06, "loss": 0.5792, "step": 1489 }, { "epoch": 0.6106870229007634, "grad_norm": 0.8660120520932516, "learning_rate": 3.4738775564705368e-06, "loss": 0.6115, "step": 1490 }, { "epoch": 0.6110968799631129, "grad_norm": 0.9348306052832493, "learning_rate": 3.467554005872046e-06, "loss": 0.5749, "step": 1491 }, { "epoch": 0.6115067370254623, "grad_norm": 0.85025822861439, "learning_rate": 3.46123315937244e-06, "loss": 0.5635, "step": 1492 }, { "epoch": 0.6119165940878118, "grad_norm": 0.9247556347069951, "learning_rate": 3.4549150281252635e-06, "loss": 0.584, "step": 1493 }, { "epoch": 0.6123264511501614, "grad_norm": 0.8916183520144882, "learning_rate": 3.4485996232792574e-06, "loss": 0.5923, "step": 1494 }, { "epoch": 0.6127363082125109, "grad_norm": 0.9097008435975372, "learning_rate": 3.44228695597836e-06, "loss": 0.5999, "step": 1495 }, { "epoch": 0.6131461652748604, "grad_norm": 0.8495594457304793, "learning_rate": 3.435977037361673e-06, "loss": 0.6421, "step": 1496 }, { "epoch": 0.6135560223372098, "grad_norm": 0.9221122881831858, "learning_rate": 3.4296698785634554e-06, "loss": 0.6154, "step": 1497 }, { "epoch": 0.6139658793995594, "grad_norm": 0.9136442133078445, "learning_rate": 3.4233654907130875e-06, "loss": 0.596, "step": 1498 }, { "epoch": 0.6143757364619089, "grad_norm": 0.8885567538945889, "learning_rate": 3.417063884935067e-06, "loss": 0.6177, "step": 1499 }, { "epoch": 0.6147855935242584, "grad_norm": 0.9217791424411353, "learning_rate": 3.4107650723489805e-06, "loss": 0.6325, "step": 1500 }, { "epoch": 0.615195450586608, "grad_norm": 0.9139917047344356, "learning_rate": 3.4044690640694844e-06, "loss": 0.5864, "step": 1501 }, { "epoch": 0.6156053076489574, "grad_norm": 0.8723542436476888, "learning_rate": 3.398175871206288e-06, "loss": 0.6205, "step": 1502 }, { "epoch": 0.6160151647113069, "grad_norm": 0.9140336741832694, "learning_rate": 3.391885504864132e-06, "loss": 0.6078, "step": 1503 }, { "epoch": 0.6164250217736564, "grad_norm": 0.8845893570323918, "learning_rate": 3.3855979761427705e-06, "loss": 0.627, "step": 1504 }, { "epoch": 0.616834878836006, "grad_norm": 0.8641622243020904, "learning_rate": 3.379313296136947e-06, "loss": 0.5933, "step": 1505 }, { "epoch": 0.6172447358983555, "grad_norm": 0.9230555006082778, "learning_rate": 3.3730314759363857e-06, "loss": 0.6267, "step": 1506 }, { "epoch": 0.6176545929607049, "grad_norm": 0.8780816921972185, "learning_rate": 3.366752526625755e-06, "loss": 0.5837, "step": 1507 }, { "epoch": 0.6180644500230544, "grad_norm": 0.932524206945896, "learning_rate": 3.3604764592846637e-06, "loss": 0.6244, "step": 1508 }, { "epoch": 0.618474307085404, "grad_norm": 0.9536388177816704, "learning_rate": 3.354203284987637e-06, "loss": 0.6086, "step": 1509 }, { "epoch": 0.6188841641477535, "grad_norm": 0.8531248430451454, "learning_rate": 3.347933014804089e-06, "loss": 0.5795, "step": 1510 }, { "epoch": 0.619294021210103, "grad_norm": 0.8947170178782422, "learning_rate": 3.341665659798312e-06, "loss": 0.6414, "step": 1511 }, { "epoch": 0.6197038782724524, "grad_norm": 0.8356837434710035, "learning_rate": 3.3354012310294537e-06, "loss": 0.5446, "step": 1512 }, { "epoch": 0.620113735334802, "grad_norm": 0.8236371355395576, "learning_rate": 3.3291397395515017e-06, "loss": 0.5859, "step": 1513 }, { "epoch": 0.6205235923971515, "grad_norm": 0.9434171011722012, "learning_rate": 3.3228811964132546e-06, "loss": 0.598, "step": 1514 }, { "epoch": 0.620933449459501, "grad_norm": 0.8995452692548453, "learning_rate": 3.316625612658315e-06, "loss": 0.5722, "step": 1515 }, { "epoch": 0.6213433065218505, "grad_norm": 0.9417927428039422, "learning_rate": 3.3103729993250566e-06, "loss": 0.6247, "step": 1516 }, { "epoch": 0.6217531635842, "grad_norm": 0.9080731646539058, "learning_rate": 3.3041233674466185e-06, "loss": 0.6212, "step": 1517 }, { "epoch": 0.6221630206465495, "grad_norm": 0.9082347982578306, "learning_rate": 3.2978767280508737e-06, "loss": 0.634, "step": 1518 }, { "epoch": 0.622572877708899, "grad_norm": 0.9194417285486438, "learning_rate": 3.291633092160418e-06, "loss": 0.6017, "step": 1519 }, { "epoch": 0.6229827347712485, "grad_norm": 0.8933694092481198, "learning_rate": 3.2853924707925454e-06, "loss": 0.5999, "step": 1520 }, { "epoch": 0.6233925918335981, "grad_norm": 0.9565405928415834, "learning_rate": 3.279154874959229e-06, "loss": 0.661, "step": 1521 }, { "epoch": 0.6238024488959475, "grad_norm": 0.8180764442654088, "learning_rate": 3.2729203156671085e-06, "loss": 0.5878, "step": 1522 }, { "epoch": 0.624212305958297, "grad_norm": 0.799050717766393, "learning_rate": 3.2666888039174592e-06, "loss": 0.548, "step": 1523 }, { "epoch": 0.6246221630206465, "grad_norm": 0.8793157318894942, "learning_rate": 3.2604603507061857e-06, "loss": 0.5685, "step": 1524 }, { "epoch": 0.6250320200829961, "grad_norm": 0.901700845049934, "learning_rate": 3.254234967023787e-06, "loss": 0.5803, "step": 1525 }, { "epoch": 0.6254418771453456, "grad_norm": 0.8772549822674647, "learning_rate": 3.2480126638553533e-06, "loss": 0.615, "step": 1526 }, { "epoch": 0.625851734207695, "grad_norm": 0.8881014290698543, "learning_rate": 3.241793452180537e-06, "loss": 0.6397, "step": 1527 }, { "epoch": 0.6262615912700445, "grad_norm": 0.8646461388450529, "learning_rate": 3.2355773429735316e-06, "loss": 0.5632, "step": 1528 }, { "epoch": 0.6266714483323941, "grad_norm": 0.8637671946513139, "learning_rate": 3.229364347203062e-06, "loss": 0.5651, "step": 1529 }, { "epoch": 0.6270813053947436, "grad_norm": 0.907942403953918, "learning_rate": 3.223154475832354e-06, "loss": 0.6099, "step": 1530 }, { "epoch": 0.6274911624570931, "grad_norm": 0.8864650520934255, "learning_rate": 3.216947739819125e-06, "loss": 0.5655, "step": 1531 }, { "epoch": 0.6279010195194425, "grad_norm": 0.8118757289679949, "learning_rate": 3.2107441501155534e-06, "loss": 0.5477, "step": 1532 }, { "epoch": 0.6283108765817921, "grad_norm": 0.8772843970216238, "learning_rate": 3.2045437176682743e-06, "loss": 0.5916, "step": 1533 }, { "epoch": 0.6287207336441416, "grad_norm": 0.8814702284500177, "learning_rate": 3.198346453418343e-06, "loss": 0.5703, "step": 1534 }, { "epoch": 0.6291305907064911, "grad_norm": 0.8928385519222547, "learning_rate": 3.19215236830123e-06, "loss": 0.5838, "step": 1535 }, { "epoch": 0.6295404477688407, "grad_norm": 0.8780969706857987, "learning_rate": 3.1859614732467957e-06, "loss": 0.6042, "step": 1536 }, { "epoch": 0.6299503048311901, "grad_norm": 0.9256792836155763, "learning_rate": 3.1797737791792672e-06, "loss": 0.6293, "step": 1537 }, { "epoch": 0.6303601618935396, "grad_norm": 0.8576291246130883, "learning_rate": 3.17358929701723e-06, "loss": 0.5934, "step": 1538 }, { "epoch": 0.6307700189558891, "grad_norm": 0.9600672308752053, "learning_rate": 3.1674080376735926e-06, "loss": 0.6278, "step": 1539 }, { "epoch": 0.6311798760182387, "grad_norm": 0.9021006737291225, "learning_rate": 3.161230012055588e-06, "loss": 0.5808, "step": 1540 }, { "epoch": 0.6315897330805882, "grad_norm": 0.9375681746664533, "learning_rate": 3.1550552310647324e-06, "loss": 0.5962, "step": 1541 }, { "epoch": 0.6319995901429376, "grad_norm": 0.9323534125587528, "learning_rate": 3.148883705596826e-06, "loss": 0.6156, "step": 1542 }, { "epoch": 0.6324094472052871, "grad_norm": 0.9236284537433478, "learning_rate": 3.142715446541915e-06, "loss": 0.5866, "step": 1543 }, { "epoch": 0.6328193042676367, "grad_norm": 0.8464791358452364, "learning_rate": 3.1365504647842895e-06, "loss": 0.6193, "step": 1544 }, { "epoch": 0.6332291613299862, "grad_norm": 0.8428028821016579, "learning_rate": 3.130388771202455e-06, "loss": 0.6386, "step": 1545 }, { "epoch": 0.6336390183923357, "grad_norm": 0.8682332970249893, "learning_rate": 3.1242303766691096e-06, "loss": 0.5719, "step": 1546 }, { "epoch": 0.6340488754546851, "grad_norm": 0.8698708983434168, "learning_rate": 3.118075292051138e-06, "loss": 0.617, "step": 1547 }, { "epoch": 0.6344587325170347, "grad_norm": 0.8652192914911497, "learning_rate": 3.111923528209577e-06, "loss": 0.5997, "step": 1548 }, { "epoch": 0.6348685895793842, "grad_norm": 0.9460222594171827, "learning_rate": 3.1057750959996115e-06, "loss": 0.5967, "step": 1549 }, { "epoch": 0.6352784466417337, "grad_norm": 0.8596580159743465, "learning_rate": 3.0996300062705387e-06, "loss": 0.5615, "step": 1550 }, { "epoch": 0.6356883037040832, "grad_norm": 0.841440271088491, "learning_rate": 3.093488269865764e-06, "loss": 0.6152, "step": 1551 }, { "epoch": 0.6360981607664327, "grad_norm": 0.9531565328246459, "learning_rate": 3.0873498976227735e-06, "loss": 0.6182, "step": 1552 }, { "epoch": 0.6365080178287822, "grad_norm": 0.862810131833411, "learning_rate": 3.0812149003731164e-06, "loss": 0.5886, "step": 1553 }, { "epoch": 0.6369178748911317, "grad_norm": 0.9198503799356978, "learning_rate": 3.0750832889423906e-06, "loss": 0.5464, "step": 1554 }, { "epoch": 0.6373277319534812, "grad_norm": 0.8777206287638182, "learning_rate": 3.0689550741502116e-06, "loss": 0.5802, "step": 1555 }, { "epoch": 0.6377375890158308, "grad_norm": 0.8810722490548843, "learning_rate": 3.0628302668102108e-06, "loss": 0.6253, "step": 1556 }, { "epoch": 0.6381474460781802, "grad_norm": 0.9610632479707393, "learning_rate": 3.056708877729997e-06, "loss": 0.6119, "step": 1557 }, { "epoch": 0.6385573031405297, "grad_norm": 0.8841281416499689, "learning_rate": 3.0505909177111575e-06, "loss": 0.604, "step": 1558 }, { "epoch": 0.6389671602028792, "grad_norm": 0.9281846260137709, "learning_rate": 3.044476397549221e-06, "loss": 0.6328, "step": 1559 }, { "epoch": 0.6393770172652288, "grad_norm": 0.8563464391953525, "learning_rate": 3.038365328033648e-06, "loss": 0.5508, "step": 1560 }, { "epoch": 0.6397868743275783, "grad_norm": 0.9101071544315449, "learning_rate": 3.0322577199478153e-06, "loss": 0.605, "step": 1561 }, { "epoch": 0.6401967313899277, "grad_norm": 0.8773865608136665, "learning_rate": 3.0261535840689825e-06, "loss": 0.6353, "step": 1562 }, { "epoch": 0.6406065884522772, "grad_norm": 0.8560578158143477, "learning_rate": 3.0200529311682925e-06, "loss": 0.6317, "step": 1563 }, { "epoch": 0.6410164455146268, "grad_norm": 0.8729805161513811, "learning_rate": 3.0139557720107327e-06, "loss": 0.6081, "step": 1564 }, { "epoch": 0.6414263025769763, "grad_norm": 0.9016757029462947, "learning_rate": 3.0078621173551335e-06, "loss": 0.6174, "step": 1565 }, { "epoch": 0.6418361596393258, "grad_norm": 0.8391630559841188, "learning_rate": 3.0017719779541365e-06, "loss": 0.6, "step": 1566 }, { "epoch": 0.6422460167016752, "grad_norm": 0.8988113710179686, "learning_rate": 2.9956853645541817e-06, "loss": 0.5799, "step": 1567 }, { "epoch": 0.6426558737640248, "grad_norm": 0.8254116401895403, "learning_rate": 2.989602287895488e-06, "loss": 0.6073, "step": 1568 }, { "epoch": 0.6430657308263743, "grad_norm": 0.927891271327787, "learning_rate": 2.983522758712031e-06, "loss": 0.6266, "step": 1569 }, { "epoch": 0.6434755878887238, "grad_norm": 0.9191578973239759, "learning_rate": 2.977446787731532e-06, "loss": 0.64, "step": 1570 }, { "epoch": 0.6438854449510734, "grad_norm": 0.87728648063979, "learning_rate": 2.9713743856754264e-06, "loss": 0.6148, "step": 1571 }, { "epoch": 0.6442953020134228, "grad_norm": 0.8041904822793888, "learning_rate": 2.9653055632588594e-06, "loss": 0.5772, "step": 1572 }, { "epoch": 0.6447051590757723, "grad_norm": 0.8546877228706548, "learning_rate": 2.9592403311906526e-06, "loss": 0.6213, "step": 1573 }, { "epoch": 0.6451150161381218, "grad_norm": 0.8452799594839825, "learning_rate": 2.953178700173298e-06, "loss": 0.5515, "step": 1574 }, { "epoch": 0.6455248732004714, "grad_norm": 0.9645304205858869, "learning_rate": 2.9471206809029297e-06, "loss": 0.6201, "step": 1575 }, { "epoch": 0.6459347302628209, "grad_norm": 0.9009050235613497, "learning_rate": 2.9410662840693104e-06, "loss": 0.5901, "step": 1576 }, { "epoch": 0.6463445873251703, "grad_norm": 0.8440548808952641, "learning_rate": 2.935015520355808e-06, "loss": 0.5889, "step": 1577 }, { "epoch": 0.6467544443875198, "grad_norm": 0.8644757414402617, "learning_rate": 2.9289684004393835e-06, "loss": 0.6052, "step": 1578 }, { "epoch": 0.6471643014498694, "grad_norm": 0.9163106017428029, "learning_rate": 2.9229249349905686e-06, "loss": 0.641, "step": 1579 }, { "epoch": 0.6475741585122189, "grad_norm": 0.971164632520686, "learning_rate": 2.9168851346734412e-06, "loss": 0.6928, "step": 1580 }, { "epoch": 0.6479840155745684, "grad_norm": 0.9026447081294051, "learning_rate": 2.910849010145617e-06, "loss": 0.5873, "step": 1581 }, { "epoch": 0.6483938726369178, "grad_norm": 0.8765655055085161, "learning_rate": 2.9048165720582205e-06, "loss": 0.6054, "step": 1582 }, { "epoch": 0.6488037296992674, "grad_norm": 0.8390353615963051, "learning_rate": 2.898787831055878e-06, "loss": 0.5967, "step": 1583 }, { "epoch": 0.6492135867616169, "grad_norm": 0.8820048982886288, "learning_rate": 2.892762797776685e-06, "loss": 0.5945, "step": 1584 }, { "epoch": 0.6496234438239664, "grad_norm": 0.9478410316038012, "learning_rate": 2.8867414828522e-06, "loss": 0.6262, "step": 1585 }, { "epoch": 0.650033300886316, "grad_norm": 0.9236028165806325, "learning_rate": 2.880723896907416e-06, "loss": 0.6323, "step": 1586 }, { "epoch": 0.6504431579486654, "grad_norm": 0.8580333839353153, "learning_rate": 2.8747100505607494e-06, "loss": 0.5678, "step": 1587 }, { "epoch": 0.6508530150110149, "grad_norm": 0.975564091617787, "learning_rate": 2.8686999544240173e-06, "loss": 0.6487, "step": 1588 }, { "epoch": 0.6512628720733644, "grad_norm": 0.9104417040560052, "learning_rate": 2.8626936191024156e-06, "loss": 0.5826, "step": 1589 }, { "epoch": 0.651672729135714, "grad_norm": 0.8984790927396092, "learning_rate": 2.8566910551945105e-06, "loss": 0.6195, "step": 1590 }, { "epoch": 0.6520825861980635, "grad_norm": 0.9044223413607072, "learning_rate": 2.850692273292205e-06, "loss": 0.5895, "step": 1591 }, { "epoch": 0.6524924432604129, "grad_norm": 0.8311142352572907, "learning_rate": 2.8446972839807384e-06, "loss": 0.5905, "step": 1592 }, { "epoch": 0.6529023003227624, "grad_norm": 0.9416787430852014, "learning_rate": 2.8387060978386473e-06, "loss": 0.6161, "step": 1593 }, { "epoch": 0.653312157385112, "grad_norm": 0.8309674117644229, "learning_rate": 2.832718725437767e-06, "loss": 0.5449, "step": 1594 }, { "epoch": 0.6537220144474615, "grad_norm": 0.8439007196335412, "learning_rate": 2.826735177343196e-06, "loss": 0.5685, "step": 1595 }, { "epoch": 0.654131871509811, "grad_norm": 0.8785860971751855, "learning_rate": 2.8207554641132872e-06, "loss": 0.5328, "step": 1596 }, { "epoch": 0.6545417285721604, "grad_norm": 0.8784307681403498, "learning_rate": 2.8147795962996284e-06, "loss": 0.5579, "step": 1597 }, { "epoch": 0.65495158563451, "grad_norm": 0.8690074786433809, "learning_rate": 2.808807584447018e-06, "loss": 0.5814, "step": 1598 }, { "epoch": 0.6553614426968595, "grad_norm": 0.9445270108197001, "learning_rate": 2.8028394390934554e-06, "loss": 0.6036, "step": 1599 }, { "epoch": 0.655771299759209, "grad_norm": 0.85835505510223, "learning_rate": 2.7968751707701115e-06, "loss": 0.5312, "step": 1600 }, { "epoch": 0.6561811568215585, "grad_norm": 0.8494911956475815, "learning_rate": 2.790914790001322e-06, "loss": 0.6, "step": 1601 }, { "epoch": 0.656591013883908, "grad_norm": 0.8865022263509248, "learning_rate": 2.7849583073045563e-06, "loss": 0.6012, "step": 1602 }, { "epoch": 0.6570008709462575, "grad_norm": 0.9129212447130838, "learning_rate": 2.779005733190412e-06, "loss": 0.6396, "step": 1603 }, { "epoch": 0.657410728008607, "grad_norm": 0.9198144158967311, "learning_rate": 2.773057078162583e-06, "loss": 0.6724, "step": 1604 }, { "epoch": 0.6578205850709565, "grad_norm": 0.9044418586506562, "learning_rate": 2.7671123527178533e-06, "loss": 0.6031, "step": 1605 }, { "epoch": 0.6582304421333061, "grad_norm": 0.838248298856063, "learning_rate": 2.7611715673460737e-06, "loss": 0.5664, "step": 1606 }, { "epoch": 0.6586402991956555, "grad_norm": 0.8739515205534798, "learning_rate": 2.755234732530134e-06, "loss": 0.5784, "step": 1607 }, { "epoch": 0.659050156258005, "grad_norm": 0.8418576205933842, "learning_rate": 2.749301858745963e-06, "loss": 0.5605, "step": 1608 }, { "epoch": 0.6594600133203545, "grad_norm": 0.8799720678258484, "learning_rate": 2.7433729564624923e-06, "loss": 0.5815, "step": 1609 }, { "epoch": 0.6598698703827041, "grad_norm": 0.8576266838218546, "learning_rate": 2.737448036141653e-06, "loss": 0.5623, "step": 1610 }, { "epoch": 0.6602797274450536, "grad_norm": 0.8798261692153692, "learning_rate": 2.7315271082383433e-06, "loss": 0.564, "step": 1611 }, { "epoch": 0.660689584507403, "grad_norm": 0.8231785258907751, "learning_rate": 2.725610183200416e-06, "loss": 0.5728, "step": 1612 }, { "epoch": 0.6610994415697525, "grad_norm": 0.9114851695834766, "learning_rate": 2.7196972714686688e-06, "loss": 0.6134, "step": 1613 }, { "epoch": 0.6615092986321021, "grad_norm": 0.8590014092370797, "learning_rate": 2.7137883834768076e-06, "loss": 0.5783, "step": 1614 }, { "epoch": 0.6619191556944516, "grad_norm": 0.9342070399695606, "learning_rate": 2.7078835296514473e-06, "loss": 0.6232, "step": 1615 }, { "epoch": 0.6623290127568011, "grad_norm": 0.9150569056855263, "learning_rate": 2.7019827204120753e-06, "loss": 0.6507, "step": 1616 }, { "epoch": 0.6627388698191505, "grad_norm": 0.8669280789005324, "learning_rate": 2.6960859661710507e-06, "loss": 0.6328, "step": 1617 }, { "epoch": 0.6631487268815001, "grad_norm": 0.937758470930089, "learning_rate": 2.6901932773335694e-06, "loss": 0.6301, "step": 1618 }, { "epoch": 0.6635585839438496, "grad_norm": 0.9124928526619556, "learning_rate": 2.6843046642976616e-06, "loss": 0.6275, "step": 1619 }, { "epoch": 0.6639684410061991, "grad_norm": 0.9146869910260284, "learning_rate": 2.6784201374541576e-06, "loss": 0.6441, "step": 1620 }, { "epoch": 0.6643782980685486, "grad_norm": 0.8793252397265476, "learning_rate": 2.672539707186683e-06, "loss": 0.5741, "step": 1621 }, { "epoch": 0.6647881551308981, "grad_norm": 0.9042652331170686, "learning_rate": 2.6666633838716317e-06, "loss": 0.6221, "step": 1622 }, { "epoch": 0.6651980121932476, "grad_norm": 0.8978035820659765, "learning_rate": 2.660791177878152e-06, "loss": 0.5955, "step": 1623 }, { "epoch": 0.6656078692555971, "grad_norm": 0.9135142412774876, "learning_rate": 2.654923099568129e-06, "loss": 0.5577, "step": 1624 }, { "epoch": 0.6660177263179466, "grad_norm": 0.8790750657794328, "learning_rate": 2.649059159296158e-06, "loss": 0.5583, "step": 1625 }, { "epoch": 0.6664275833802962, "grad_norm": 0.9198909129771091, "learning_rate": 2.6431993674095403e-06, "loss": 0.5753, "step": 1626 }, { "epoch": 0.6668374404426456, "grad_norm": 0.8714608098992723, "learning_rate": 2.6373437342482507e-06, "loss": 0.6213, "step": 1627 }, { "epoch": 0.6672472975049951, "grad_norm": 0.9536104843309492, "learning_rate": 2.6314922701449284e-06, "loss": 0.6275, "step": 1628 }, { "epoch": 0.6676571545673446, "grad_norm": 0.9499604873041759, "learning_rate": 2.6256449854248543e-06, "loss": 0.6127, "step": 1629 }, { "epoch": 0.6680670116296942, "grad_norm": 0.8576830340880099, "learning_rate": 2.61980189040594e-06, "loss": 0.58, "step": 1630 }, { "epoch": 0.6684768686920437, "grad_norm": 0.8903316708289928, "learning_rate": 2.6139629953986946e-06, "loss": 0.5926, "step": 1631 }, { "epoch": 0.6688867257543931, "grad_norm": 1.0418210346010384, "learning_rate": 2.608128310706225e-06, "loss": 0.5829, "step": 1632 }, { "epoch": 0.6692965828167426, "grad_norm": 0.93103674378346, "learning_rate": 2.6022978466242066e-06, "loss": 0.5796, "step": 1633 }, { "epoch": 0.6697064398790922, "grad_norm": 0.9070431364208482, "learning_rate": 2.596471613440862e-06, "loss": 0.5852, "step": 1634 }, { "epoch": 0.6701162969414417, "grad_norm": 0.9516542216476052, "learning_rate": 2.590649621436956e-06, "loss": 0.6477, "step": 1635 }, { "epoch": 0.6705261540037912, "grad_norm": 0.896151705286909, "learning_rate": 2.584831880885761e-06, "loss": 0.5776, "step": 1636 }, { "epoch": 0.6709360110661406, "grad_norm": 0.90372786757659, "learning_rate": 2.579018402053057e-06, "loss": 0.6054, "step": 1637 }, { "epoch": 0.6713458681284902, "grad_norm": 0.9207192911558773, "learning_rate": 2.5732091951970938e-06, "loss": 0.6113, "step": 1638 }, { "epoch": 0.6717557251908397, "grad_norm": 0.8335498485188018, "learning_rate": 2.5674042705685914e-06, "loss": 0.5818, "step": 1639 }, { "epoch": 0.6721655822531892, "grad_norm": 0.8485885652094718, "learning_rate": 2.561603638410711e-06, "loss": 0.6088, "step": 1640 }, { "epoch": 0.6725754393155388, "grad_norm": 0.899719155296411, "learning_rate": 2.555807308959036e-06, "loss": 0.5732, "step": 1641 }, { "epoch": 0.6729852963778882, "grad_norm": 0.8778185934793691, "learning_rate": 2.5500152924415635e-06, "loss": 0.6261, "step": 1642 }, { "epoch": 0.6733951534402377, "grad_norm": 0.965014631616205, "learning_rate": 2.5442275990786737e-06, "loss": 0.6465, "step": 1643 }, { "epoch": 0.6738050105025872, "grad_norm": 0.8990108952426236, "learning_rate": 2.5384442390831233e-06, "loss": 0.5672, "step": 1644 }, { "epoch": 0.6742148675649368, "grad_norm": 0.8439117117080631, "learning_rate": 2.532665222660018e-06, "loss": 0.5781, "step": 1645 }, { "epoch": 0.6746247246272863, "grad_norm": 0.9420561213803303, "learning_rate": 2.5268905600068046e-06, "loss": 0.5991, "step": 1646 }, { "epoch": 0.6750345816896357, "grad_norm": 0.8860943859397696, "learning_rate": 2.5211202613132413e-06, "loss": 0.6169, "step": 1647 }, { "epoch": 0.6754444387519852, "grad_norm": 0.8458981914512151, "learning_rate": 2.5153543367613908e-06, "loss": 0.5496, "step": 1648 }, { "epoch": 0.6758542958143348, "grad_norm": 0.8421389312926563, "learning_rate": 2.5095927965255973e-06, "loss": 0.5525, "step": 1649 }, { "epoch": 0.6762641528766843, "grad_norm": 0.9287998930562015, "learning_rate": 2.5038356507724627e-06, "loss": 0.5852, "step": 1650 }, { "epoch": 0.6766740099390338, "grad_norm": 0.8236184740124035, "learning_rate": 2.4980829096608433e-06, "loss": 0.5971, "step": 1651 }, { "epoch": 0.6770838670013832, "grad_norm": 0.8810466373548466, "learning_rate": 2.4923345833418134e-06, "loss": 0.5976, "step": 1652 }, { "epoch": 0.6774937240637328, "grad_norm": 0.8609712493920686, "learning_rate": 2.4865906819586664e-06, "loss": 0.6293, "step": 1653 }, { "epoch": 0.6779035811260823, "grad_norm": 0.9207894518936478, "learning_rate": 2.4808512156468795e-06, "loss": 0.5502, "step": 1654 }, { "epoch": 0.6783134381884318, "grad_norm": 0.8196942398810521, "learning_rate": 2.475116194534112e-06, "loss": 0.574, "step": 1655 }, { "epoch": 0.6787232952507813, "grad_norm": 0.8540678202898504, "learning_rate": 2.4693856287401713e-06, "loss": 0.5964, "step": 1656 }, { "epoch": 0.6791331523131308, "grad_norm": 0.9216975201829984, "learning_rate": 2.463659528377008e-06, "loss": 0.6092, "step": 1657 }, { "epoch": 0.6795430093754803, "grad_norm": 0.865293792622588, "learning_rate": 2.457937903548695e-06, "loss": 0.5495, "step": 1658 }, { "epoch": 0.6799528664378298, "grad_norm": 0.9017408839967425, "learning_rate": 2.452220764351403e-06, "loss": 0.6405, "step": 1659 }, { "epoch": 0.6803627235001793, "grad_norm": 0.870804258622246, "learning_rate": 2.446508120873389e-06, "loss": 0.5649, "step": 1660 }, { "epoch": 0.6807725805625289, "grad_norm": 0.9095686300123436, "learning_rate": 2.440799983194975e-06, "loss": 0.5967, "step": 1661 }, { "epoch": 0.6811824376248783, "grad_norm": 0.8241393217725199, "learning_rate": 2.43509636138854e-06, "loss": 0.5527, "step": 1662 }, { "epoch": 0.6815922946872278, "grad_norm": 0.8668787462460482, "learning_rate": 2.4293972655184842e-06, "loss": 0.614, "step": 1663 }, { "epoch": 0.6820021517495773, "grad_norm": 0.8762851270790394, "learning_rate": 2.423702705641231e-06, "loss": 0.56, "step": 1664 }, { "epoch": 0.6824120088119269, "grad_norm": 0.9266905251335416, "learning_rate": 2.418012691805191e-06, "loss": 0.6099, "step": 1665 }, { "epoch": 0.6828218658742764, "grad_norm": 0.9191941960179448, "learning_rate": 2.4123272340507594e-06, "loss": 0.6074, "step": 1666 }, { "epoch": 0.6832317229366258, "grad_norm": 0.8949197511320647, "learning_rate": 2.4066463424102915e-06, "loss": 0.6525, "step": 1667 }, { "epoch": 0.6836415799989753, "grad_norm": 0.9189018620229463, "learning_rate": 2.4009700269080793e-06, "loss": 0.6265, "step": 1668 }, { "epoch": 0.6840514370613249, "grad_norm": 0.8788498074414858, "learning_rate": 2.3952982975603494e-06, "loss": 0.6384, "step": 1669 }, { "epoch": 0.6844612941236744, "grad_norm": 0.9071449939983807, "learning_rate": 2.3896311643752258e-06, "loss": 0.5816, "step": 1670 }, { "epoch": 0.6848711511860238, "grad_norm": 0.9435134050481072, "learning_rate": 2.3839686373527308e-06, "loss": 0.6861, "step": 1671 }, { "epoch": 0.6852810082483733, "grad_norm": 0.8243255702581793, "learning_rate": 2.378310726484752e-06, "loss": 0.5729, "step": 1672 }, { "epoch": 0.6856908653107229, "grad_norm": 0.8737114709760552, "learning_rate": 2.3726574417550387e-06, "loss": 0.5749, "step": 1673 }, { "epoch": 0.6861007223730724, "grad_norm": 0.8795959682764446, "learning_rate": 2.3670087931391683e-06, "loss": 0.5638, "step": 1674 }, { "epoch": 0.6865105794354219, "grad_norm": 0.8753144068125237, "learning_rate": 2.3613647906045472e-06, "loss": 0.6126, "step": 1675 }, { "epoch": 0.6869204364977713, "grad_norm": 0.854002695690522, "learning_rate": 2.355725444110376e-06, "loss": 0.5785, "step": 1676 }, { "epoch": 0.6873302935601209, "grad_norm": 0.8685582443149517, "learning_rate": 2.3500907636076396e-06, "loss": 0.5901, "step": 1677 }, { "epoch": 0.6877401506224704, "grad_norm": 0.840671533290867, "learning_rate": 2.344460759039097e-06, "loss": 0.5623, "step": 1678 }, { "epoch": 0.6881500076848199, "grad_norm": 0.9533557271782459, "learning_rate": 2.3388354403392466e-06, "loss": 0.6527, "step": 1679 }, { "epoch": 0.6885598647471695, "grad_norm": 0.8350358097873841, "learning_rate": 2.3332148174343257e-06, "loss": 0.5348, "step": 1680 }, { "epoch": 0.6889697218095189, "grad_norm": 0.8770348947526252, "learning_rate": 2.32759890024228e-06, "loss": 0.5853, "step": 1681 }, { "epoch": 0.6893795788718684, "grad_norm": 0.8738803622661594, "learning_rate": 2.3219876986727576e-06, "loss": 0.6111, "step": 1682 }, { "epoch": 0.6897894359342179, "grad_norm": 0.916828746244095, "learning_rate": 2.3163812226270782e-06, "loss": 0.6082, "step": 1683 }, { "epoch": 0.6901992929965675, "grad_norm": 0.884322574265004, "learning_rate": 2.310779481998229e-06, "loss": 0.5917, "step": 1684 }, { "epoch": 0.690609150058917, "grad_norm": 0.9308514126682158, "learning_rate": 2.3051824866708422e-06, "loss": 0.7145, "step": 1685 }, { "epoch": 0.6910190071212664, "grad_norm": 0.9101999043684257, "learning_rate": 2.2995902465211683e-06, "loss": 0.5892, "step": 1686 }, { "epoch": 0.6914288641836159, "grad_norm": 0.892675167977413, "learning_rate": 2.2940027714170777e-06, "loss": 0.6204, "step": 1687 }, { "epoch": 0.6918387212459655, "grad_norm": 0.8642066036418659, "learning_rate": 2.2884200712180225e-06, "loss": 0.5991, "step": 1688 }, { "epoch": 0.692248578308315, "grad_norm": 0.9353492983447449, "learning_rate": 2.2828421557750386e-06, "loss": 0.5874, "step": 1689 }, { "epoch": 0.6926584353706645, "grad_norm": 0.8149878185998833, "learning_rate": 2.2772690349307095e-06, "loss": 0.5482, "step": 1690 }, { "epoch": 0.6930682924330139, "grad_norm": 0.8925874485261046, "learning_rate": 2.2717007185191673e-06, "loss": 0.5529, "step": 1691 }, { "epoch": 0.6934781494953635, "grad_norm": 0.9173593439694244, "learning_rate": 2.266137216366061e-06, "loss": 0.5508, "step": 1692 }, { "epoch": 0.693888006557713, "grad_norm": 0.8226891654600464, "learning_rate": 2.2605785382885425e-06, "loss": 0.5366, "step": 1693 }, { "epoch": 0.6942978636200625, "grad_norm": 0.8653259401104415, "learning_rate": 2.255024694095259e-06, "loss": 0.6115, "step": 1694 }, { "epoch": 0.694707720682412, "grad_norm": 0.8964923594228996, "learning_rate": 2.24947569358632e-06, "loss": 0.6138, "step": 1695 }, { "epoch": 0.6951175777447615, "grad_norm": 0.8311871729623819, "learning_rate": 2.2439315465532947e-06, "loss": 0.577, "step": 1696 }, { "epoch": 0.695527434807111, "grad_norm": 0.9320391110605261, "learning_rate": 2.2383922627791815e-06, "loss": 0.5943, "step": 1697 }, { "epoch": 0.6959372918694605, "grad_norm": 0.9023889286521704, "learning_rate": 2.2328578520384035e-06, "loss": 0.6134, "step": 1698 }, { "epoch": 0.69634714893181, "grad_norm": 0.8007211391518744, "learning_rate": 2.2273283240967795e-06, "loss": 0.562, "step": 1699 }, { "epoch": 0.6967570059941596, "grad_norm": 1.0130385365238825, "learning_rate": 2.2218036887115186e-06, "loss": 0.6381, "step": 1700 }, { "epoch": 0.697166863056509, "grad_norm": 0.9252823840969002, "learning_rate": 2.2162839556311884e-06, "loss": 0.6069, "step": 1701 }, { "epoch": 0.6975767201188585, "grad_norm": 0.908400160958456, "learning_rate": 2.2107691345957133e-06, "loss": 0.5907, "step": 1702 }, { "epoch": 0.697986577181208, "grad_norm": 0.9586403983958389, "learning_rate": 2.2052592353363482e-06, "loss": 0.6308, "step": 1703 }, { "epoch": 0.6983964342435576, "grad_norm": 0.9482292875013271, "learning_rate": 2.1997542675756604e-06, "loss": 0.6399, "step": 1704 }, { "epoch": 0.6988062913059071, "grad_norm": 0.9052998492228839, "learning_rate": 2.19425424102752e-06, "loss": 0.6242, "step": 1705 }, { "epoch": 0.6992161483682565, "grad_norm": 0.9034624927647482, "learning_rate": 2.1887591653970714e-06, "loss": 0.5903, "step": 1706 }, { "epoch": 0.699626005430606, "grad_norm": 0.8530797046877084, "learning_rate": 2.183269050380731e-06, "loss": 0.5886, "step": 1707 }, { "epoch": 0.7000358624929556, "grad_norm": 0.8718637780179547, "learning_rate": 2.1777839056661555e-06, "loss": 0.583, "step": 1708 }, { "epoch": 0.7004457195553051, "grad_norm": 1.0026263366596035, "learning_rate": 2.172303740932233e-06, "loss": 0.646, "step": 1709 }, { "epoch": 0.7008555766176546, "grad_norm": 0.9452652050145466, "learning_rate": 2.1668285658490636e-06, "loss": 0.6306, "step": 1710 }, { "epoch": 0.701265433680004, "grad_norm": 0.9017542875791703, "learning_rate": 2.1613583900779438e-06, "loss": 0.5994, "step": 1711 }, { "epoch": 0.7016752907423536, "grad_norm": 0.9721192191036614, "learning_rate": 2.1558932232713513e-06, "loss": 0.6508, "step": 1712 }, { "epoch": 0.7020851478047031, "grad_norm": 0.825542344315657, "learning_rate": 2.1504330750729185e-06, "loss": 0.5614, "step": 1713 }, { "epoch": 0.7024950048670526, "grad_norm": 0.8199892220298124, "learning_rate": 2.1449779551174292e-06, "loss": 0.5873, "step": 1714 }, { "epoch": 0.7029048619294022, "grad_norm": 0.8649096007721814, "learning_rate": 2.1395278730307878e-06, "loss": 0.5899, "step": 1715 }, { "epoch": 0.7033147189917516, "grad_norm": 0.8556390565691211, "learning_rate": 2.134082838430016e-06, "loss": 0.6224, "step": 1716 }, { "epoch": 0.7037245760541011, "grad_norm": 0.8969142999476226, "learning_rate": 2.1286428609232213e-06, "loss": 0.5923, "step": 1717 }, { "epoch": 0.7041344331164506, "grad_norm": 0.9805630468618897, "learning_rate": 2.123207950109596e-06, "loss": 0.6804, "step": 1718 }, { "epoch": 0.7045442901788002, "grad_norm": 0.9358175672153682, "learning_rate": 2.1177781155793826e-06, "loss": 0.6238, "step": 1719 }, { "epoch": 0.7049541472411497, "grad_norm": 0.9712220604975952, "learning_rate": 2.1123533669138748e-06, "loss": 0.6182, "step": 1720 }, { "epoch": 0.7053640043034991, "grad_norm": 0.8852328701050542, "learning_rate": 2.106933713685388e-06, "loss": 0.6039, "step": 1721 }, { "epoch": 0.7057738613658486, "grad_norm": 1.0222022977377285, "learning_rate": 2.1015191654572446e-06, "loss": 0.6412, "step": 1722 }, { "epoch": 0.7061837184281982, "grad_norm": 0.8867417277997708, "learning_rate": 2.0961097317837646e-06, "loss": 0.603, "step": 1723 }, { "epoch": 0.7065935754905477, "grad_norm": 0.8732939153482571, "learning_rate": 2.0907054222102367e-06, "loss": 0.6051, "step": 1724 }, { "epoch": 0.7070034325528972, "grad_norm": 0.8897550687949493, "learning_rate": 2.0853062462729112e-06, "loss": 0.5423, "step": 1725 }, { "epoch": 0.7074132896152466, "grad_norm": 0.9245165303593922, "learning_rate": 2.079912213498977e-06, "loss": 0.5638, "step": 1726 }, { "epoch": 0.7078231466775962, "grad_norm": 0.8248676879426412, "learning_rate": 2.0745233334065513e-06, "loss": 0.5973, "step": 1727 }, { "epoch": 0.7082330037399457, "grad_norm": 0.9102526746881753, "learning_rate": 2.0691396155046593e-06, "loss": 0.6333, "step": 1728 }, { "epoch": 0.7086428608022952, "grad_norm": 0.8940437998234426, "learning_rate": 2.063761069293212e-06, "loss": 0.5951, "step": 1729 }, { "epoch": 0.7090527178646447, "grad_norm": 0.8679973549919896, "learning_rate": 2.058387704263001e-06, "loss": 0.6289, "step": 1730 }, { "epoch": 0.7094625749269942, "grad_norm": 0.9742794244363233, "learning_rate": 2.053019529895669e-06, "loss": 0.6384, "step": 1731 }, { "epoch": 0.7098724319893437, "grad_norm": 0.9087894661227974, "learning_rate": 2.0476565556637066e-06, "loss": 0.6227, "step": 1732 }, { "epoch": 0.7102822890516932, "grad_norm": 0.907741630566839, "learning_rate": 2.04229879103042e-06, "loss": 0.6023, "step": 1733 }, { "epoch": 0.7106921461140427, "grad_norm": 0.8748428796075575, "learning_rate": 2.0369462454499323e-06, "loss": 0.5957, "step": 1734 }, { "epoch": 0.7111020031763923, "grad_norm": 0.9260732201258572, "learning_rate": 2.0315989283671474e-06, "loss": 0.6014, "step": 1735 }, { "epoch": 0.7115118602387417, "grad_norm": 0.8437287862904438, "learning_rate": 2.02625684921775e-06, "loss": 0.5416, "step": 1736 }, { "epoch": 0.7119217173010912, "grad_norm": 0.9371437337413182, "learning_rate": 2.020920017428183e-06, "loss": 0.6464, "step": 1737 }, { "epoch": 0.7123315743634407, "grad_norm": 0.8839122993370073, "learning_rate": 2.0155884424156243e-06, "loss": 0.6151, "step": 1738 }, { "epoch": 0.7127414314257903, "grad_norm": 0.8827711520358525, "learning_rate": 2.010262133587977e-06, "loss": 0.5787, "step": 1739 }, { "epoch": 0.7131512884881398, "grad_norm": 0.9459311308625015, "learning_rate": 2.004941100343857e-06, "loss": 0.643, "step": 1740 }, { "epoch": 0.7135611455504892, "grad_norm": 0.9235439473845157, "learning_rate": 1.9996253520725657e-06, "loss": 0.6538, "step": 1741 }, { "epoch": 0.7139710026128387, "grad_norm": 0.8601685116650073, "learning_rate": 1.9943148981540793e-06, "loss": 0.5949, "step": 1742 }, { "epoch": 0.7143808596751883, "grad_norm": 0.8957139905090667, "learning_rate": 1.9890097479590364e-06, "loss": 0.6212, "step": 1743 }, { "epoch": 0.7147907167375378, "grad_norm": 0.8523502773840603, "learning_rate": 1.98370991084871e-06, "loss": 0.647, "step": 1744 }, { "epoch": 0.7152005737998873, "grad_norm": 0.8681229589819881, "learning_rate": 1.9784153961750037e-06, "loss": 0.5926, "step": 1745 }, { "epoch": 0.7156104308622367, "grad_norm": 0.9158829257371237, "learning_rate": 1.9731262132804275e-06, "loss": 0.5774, "step": 1746 }, { "epoch": 0.7160202879245863, "grad_norm": 0.8856156015992114, "learning_rate": 1.96784237149808e-06, "loss": 0.597, "step": 1747 }, { "epoch": 0.7164301449869358, "grad_norm": 0.8575913929555173, "learning_rate": 1.9625638801516404e-06, "loss": 0.5856, "step": 1748 }, { "epoch": 0.7168400020492853, "grad_norm": 0.8800401194949675, "learning_rate": 1.957290748555342e-06, "loss": 0.5593, "step": 1749 }, { "epoch": 0.7172498591116349, "grad_norm": 0.8341369737182651, "learning_rate": 1.952022986013965e-06, "loss": 0.5703, "step": 1750 }, { "epoch": 0.7176597161739843, "grad_norm": 0.8781828626778929, "learning_rate": 1.946760601822809e-06, "loss": 0.6157, "step": 1751 }, { "epoch": 0.7180695732363338, "grad_norm": 0.8913278892898012, "learning_rate": 1.941503605267692e-06, "loss": 0.5758, "step": 1752 }, { "epoch": 0.7184794302986833, "grad_norm": 0.8881484835815793, "learning_rate": 1.9362520056249158e-06, "loss": 0.6033, "step": 1753 }, { "epoch": 0.7188892873610329, "grad_norm": 0.9576614754846304, "learning_rate": 1.931005812161267e-06, "loss": 0.5736, "step": 1754 }, { "epoch": 0.7192991444233824, "grad_norm": 0.8602136581033364, "learning_rate": 1.9257650341339873e-06, "loss": 0.5601, "step": 1755 }, { "epoch": 0.7197090014857318, "grad_norm": 1.002847131654057, "learning_rate": 1.9205296807907664e-06, "loss": 0.6759, "step": 1756 }, { "epoch": 0.7201188585480813, "grad_norm": 0.8957851666706909, "learning_rate": 1.9152997613697184e-06, "loss": 0.5806, "step": 1757 }, { "epoch": 0.7205287156104309, "grad_norm": 0.8962880651489786, "learning_rate": 1.9100752850993686e-06, "loss": 0.6151, "step": 1758 }, { "epoch": 0.7209385726727804, "grad_norm": 0.8644790245881326, "learning_rate": 1.9048562611986432e-06, "loss": 0.5889, "step": 1759 }, { "epoch": 0.7213484297351299, "grad_norm": 0.937163716090849, "learning_rate": 1.8996426988768397e-06, "loss": 0.633, "step": 1760 }, { "epoch": 0.7217582867974793, "grad_norm": 0.8283434277855685, "learning_rate": 1.894434607333625e-06, "loss": 0.6315, "step": 1761 }, { "epoch": 0.7221681438598289, "grad_norm": 0.9892665799295803, "learning_rate": 1.889231995759006e-06, "loss": 0.6476, "step": 1762 }, { "epoch": 0.7225780009221784, "grad_norm": 0.9339593074441698, "learning_rate": 1.8840348733333257e-06, "loss": 0.6174, "step": 1763 }, { "epoch": 0.7229878579845279, "grad_norm": 0.9105544674873023, "learning_rate": 1.8788432492272407e-06, "loss": 0.6348, "step": 1764 }, { "epoch": 0.7233977150468774, "grad_norm": 0.9015276972677025, "learning_rate": 1.8736571326016988e-06, "loss": 0.6026, "step": 1765 }, { "epoch": 0.7238075721092269, "grad_norm": 0.8995229752080219, "learning_rate": 1.8684765326079384e-06, "loss": 0.6261, "step": 1766 }, { "epoch": 0.7242174291715764, "grad_norm": 0.8607551214627502, "learning_rate": 1.863301458387456e-06, "loss": 0.5814, "step": 1767 }, { "epoch": 0.7246272862339259, "grad_norm": 0.877629739949186, "learning_rate": 1.8581319190720038e-06, "loss": 0.5742, "step": 1768 }, { "epoch": 0.7250371432962754, "grad_norm": 0.7762722689210652, "learning_rate": 1.8529679237835613e-06, "loss": 0.5149, "step": 1769 }, { "epoch": 0.725447000358625, "grad_norm": 0.8949173990011016, "learning_rate": 1.8478094816343312e-06, "loss": 0.5998, "step": 1770 }, { "epoch": 0.7258568574209744, "grad_norm": 0.8538950617066742, "learning_rate": 1.842656601726711e-06, "loss": 0.548, "step": 1771 }, { "epoch": 0.7262667144833239, "grad_norm": 0.9043062231809332, "learning_rate": 1.83750929315329e-06, "loss": 0.6254, "step": 1772 }, { "epoch": 0.7266765715456734, "grad_norm": 0.8640530309754448, "learning_rate": 1.8323675649968215e-06, "loss": 0.5751, "step": 1773 }, { "epoch": 0.727086428608023, "grad_norm": 0.8922725239661011, "learning_rate": 1.8272314263302115e-06, "loss": 0.5868, "step": 1774 }, { "epoch": 0.7274962856703725, "grad_norm": 0.882486233742087, "learning_rate": 1.822100886216509e-06, "loss": 0.6117, "step": 1775 }, { "epoch": 0.7279061427327219, "grad_norm": 0.9574530806950167, "learning_rate": 1.8169759537088756e-06, "loss": 0.599, "step": 1776 }, { "epoch": 0.7283159997950714, "grad_norm": 0.8568553981809975, "learning_rate": 1.8118566378505864e-06, "loss": 0.5879, "step": 1777 }, { "epoch": 0.728725856857421, "grad_norm": 0.866738096152974, "learning_rate": 1.806742947674997e-06, "loss": 0.627, "step": 1778 }, { "epoch": 0.7291357139197705, "grad_norm": 0.9282029369649878, "learning_rate": 1.8016348922055448e-06, "loss": 0.6014, "step": 1779 }, { "epoch": 0.72954557098212, "grad_norm": 0.8838425013171837, "learning_rate": 1.7965324804557166e-06, "loss": 0.6132, "step": 1780 }, { "epoch": 0.7299554280444694, "grad_norm": 0.9137849527283729, "learning_rate": 1.791435721429045e-06, "loss": 0.6405, "step": 1781 }, { "epoch": 0.730365285106819, "grad_norm": 0.9109737700074448, "learning_rate": 1.7863446241190901e-06, "loss": 0.612, "step": 1782 }, { "epoch": 0.7307751421691685, "grad_norm": 0.9261334440907791, "learning_rate": 1.7812591975094133e-06, "loss": 0.5994, "step": 1783 }, { "epoch": 0.731184999231518, "grad_norm": 0.887528716481261, "learning_rate": 1.7761794505735796e-06, "loss": 0.5891, "step": 1784 }, { "epoch": 0.7315948562938676, "grad_norm": 0.9179835500672755, "learning_rate": 1.771105392275123e-06, "loss": 0.6309, "step": 1785 }, { "epoch": 0.732004713356217, "grad_norm": 0.8595549235035053, "learning_rate": 1.7660370315675473e-06, "loss": 0.5907, "step": 1786 }, { "epoch": 0.7324145704185665, "grad_norm": 0.8382193960228206, "learning_rate": 1.7609743773942956e-06, "loss": 0.6134, "step": 1787 }, { "epoch": 0.732824427480916, "grad_norm": 0.8561046409434647, "learning_rate": 1.7559174386887478e-06, "loss": 0.5494, "step": 1788 }, { "epoch": 0.7332342845432656, "grad_norm": 0.8857346008756272, "learning_rate": 1.7508662243741937e-06, "loss": 0.575, "step": 1789 }, { "epoch": 0.7336441416056151, "grad_norm": 0.8669653610208399, "learning_rate": 1.7458207433638225e-06, "loss": 0.5566, "step": 1790 }, { "epoch": 0.7340539986679645, "grad_norm": 0.9051703706542786, "learning_rate": 1.7407810045607104e-06, "loss": 0.6277, "step": 1791 }, { "epoch": 0.734463855730314, "grad_norm": 0.8653433994773081, "learning_rate": 1.735747016857796e-06, "loss": 0.5402, "step": 1792 }, { "epoch": 0.7348737127926636, "grad_norm": 0.8291573011224311, "learning_rate": 1.7307187891378757e-06, "loss": 0.5573, "step": 1793 }, { "epoch": 0.7352835698550131, "grad_norm": 0.7937301101050542, "learning_rate": 1.7256963302735752e-06, "loss": 0.5737, "step": 1794 }, { "epoch": 0.7356934269173626, "grad_norm": 0.7902063437891279, "learning_rate": 1.720679649127347e-06, "loss": 0.5346, "step": 1795 }, { "epoch": 0.736103283979712, "grad_norm": 0.9173358605595614, "learning_rate": 1.7156687545514423e-06, "loss": 0.6259, "step": 1796 }, { "epoch": 0.7365131410420616, "grad_norm": 0.9262628229658131, "learning_rate": 1.7106636553879087e-06, "loss": 0.6476, "step": 1797 }, { "epoch": 0.7369229981044111, "grad_norm": 0.8873530960867846, "learning_rate": 1.7056643604685597e-06, "loss": 0.5844, "step": 1798 }, { "epoch": 0.7373328551667606, "grad_norm": 0.8805309964065172, "learning_rate": 1.7006708786149723e-06, "loss": 0.6304, "step": 1799 }, { "epoch": 0.7377427122291101, "grad_norm": 0.9138035074614655, "learning_rate": 1.6956832186384664e-06, "loss": 0.5736, "step": 1800 }, { "epoch": 0.7381525692914596, "grad_norm": 0.9408617250304644, "learning_rate": 1.6907013893400838e-06, "loss": 0.6153, "step": 1801 }, { "epoch": 0.7385624263538091, "grad_norm": 0.9392328418944441, "learning_rate": 1.6857253995105827e-06, "loss": 0.6255, "step": 1802 }, { "epoch": 0.7389722834161586, "grad_norm": 0.9838391113983769, "learning_rate": 1.6807552579304143e-06, "loss": 0.6413, "step": 1803 }, { "epoch": 0.7393821404785081, "grad_norm": 0.834415758984027, "learning_rate": 1.6757909733697092e-06, "loss": 0.551, "step": 1804 }, { "epoch": 0.7397919975408577, "grad_norm": 0.9069734781360201, "learning_rate": 1.6708325545882687e-06, "loss": 0.6296, "step": 1805 }, { "epoch": 0.7402018546032071, "grad_norm": 0.8743995349238052, "learning_rate": 1.6658800103355355e-06, "loss": 0.6098, "step": 1806 }, { "epoch": 0.7406117116655566, "grad_norm": 0.8596233749354123, "learning_rate": 1.6609333493505935e-06, "loss": 0.6373, "step": 1807 }, { "epoch": 0.7410215687279061, "grad_norm": 0.8553816876846967, "learning_rate": 1.6559925803621401e-06, "loss": 0.5426, "step": 1808 }, { "epoch": 0.7414314257902557, "grad_norm": 0.986393189393883, "learning_rate": 1.6510577120884803e-06, "loss": 0.6648, "step": 1809 }, { "epoch": 0.7418412828526052, "grad_norm": 0.8428829490799723, "learning_rate": 1.6461287532375009e-06, "loss": 0.5566, "step": 1810 }, { "epoch": 0.7422511399149546, "grad_norm": 0.9597441667904945, "learning_rate": 1.6412057125066689e-06, "loss": 0.6185, "step": 1811 }, { "epoch": 0.7426609969773041, "grad_norm": 0.8391547192783783, "learning_rate": 1.6362885985830001e-06, "loss": 0.6106, "step": 1812 }, { "epoch": 0.7430708540396537, "grad_norm": 0.961549217128666, "learning_rate": 1.631377420143061e-06, "loss": 0.6161, "step": 1813 }, { "epoch": 0.7434807111020032, "grad_norm": 0.8795036561346022, "learning_rate": 1.6264721858529347e-06, "loss": 0.6397, "step": 1814 }, { "epoch": 0.7438905681643527, "grad_norm": 0.9474100924059752, "learning_rate": 1.6215729043682239e-06, "loss": 0.5998, "step": 1815 }, { "epoch": 0.7443004252267021, "grad_norm": 0.8281649434841892, "learning_rate": 1.6166795843340244e-06, "loss": 0.6067, "step": 1816 }, { "epoch": 0.7447102822890517, "grad_norm": 0.8737199138525926, "learning_rate": 1.611792234384909e-06, "loss": 0.559, "step": 1817 }, { "epoch": 0.7451201393514012, "grad_norm": 0.8801589557102809, "learning_rate": 1.6069108631449226e-06, "loss": 0.6479, "step": 1818 }, { "epoch": 0.7455299964137507, "grad_norm": 0.8679196274458997, "learning_rate": 1.602035479227555e-06, "loss": 0.6479, "step": 1819 }, { "epoch": 0.7459398534761003, "grad_norm": 0.9330203818757545, "learning_rate": 1.5971660912357306e-06, "loss": 0.6308, "step": 1820 }, { "epoch": 0.7463497105384497, "grad_norm": 0.810788175488313, "learning_rate": 1.5923027077617998e-06, "loss": 0.5672, "step": 1821 }, { "epoch": 0.7467595676007992, "grad_norm": 0.8637899363988385, "learning_rate": 1.5874453373875109e-06, "loss": 0.5505, "step": 1822 }, { "epoch": 0.7471694246631487, "grad_norm": 0.8336796226248692, "learning_rate": 1.5825939886840036e-06, "loss": 0.6248, "step": 1823 }, { "epoch": 0.7475792817254983, "grad_norm": 0.8751265139274129, "learning_rate": 1.577748670211794e-06, "loss": 0.56, "step": 1824 }, { "epoch": 0.7479891387878478, "grad_norm": 0.857685347631554, "learning_rate": 1.5729093905207577e-06, "loss": 0.6121, "step": 1825 }, { "epoch": 0.7483989958501972, "grad_norm": 0.8477392380240091, "learning_rate": 1.5680761581501097e-06, "loss": 0.5771, "step": 1826 }, { "epoch": 0.7488088529125467, "grad_norm": 0.8567296435221169, "learning_rate": 1.5632489816284009e-06, "loss": 0.5752, "step": 1827 }, { "epoch": 0.7492187099748963, "grad_norm": 0.8935656830620647, "learning_rate": 1.5584278694734888e-06, "loss": 0.641, "step": 1828 }, { "epoch": 0.7496285670372458, "grad_norm": 0.9019603274775163, "learning_rate": 1.5536128301925384e-06, "loss": 0.5827, "step": 1829 }, { "epoch": 0.7500384240995953, "grad_norm": 0.8297770415109389, "learning_rate": 1.5488038722819897e-06, "loss": 0.59, "step": 1830 }, { "epoch": 0.7504482811619447, "grad_norm": 0.9009123123411157, "learning_rate": 1.5440010042275605e-06, "loss": 0.5838, "step": 1831 }, { "epoch": 0.7508581382242943, "grad_norm": 0.8841540863917182, "learning_rate": 1.5392042345042147e-06, "loss": 0.6067, "step": 1832 }, { "epoch": 0.7512679952866438, "grad_norm": 0.9273765576876872, "learning_rate": 1.5344135715761604e-06, "loss": 0.6644, "step": 1833 }, { "epoch": 0.7516778523489933, "grad_norm": 0.916523217002708, "learning_rate": 1.5296290238968303e-06, "loss": 0.6003, "step": 1834 }, { "epoch": 0.7520877094113428, "grad_norm": 0.861070884092072, "learning_rate": 1.5248505999088637e-06, "loss": 0.6045, "step": 1835 }, { "epoch": 0.7524975664736923, "grad_norm": 0.8939636556315104, "learning_rate": 1.5200783080440922e-06, "loss": 0.5935, "step": 1836 }, { "epoch": 0.7529074235360418, "grad_norm": 0.8910225644333942, "learning_rate": 1.5153121567235334e-06, "loss": 0.5871, "step": 1837 }, { "epoch": 0.7533172805983913, "grad_norm": 0.9182712776374516, "learning_rate": 1.5105521543573648e-06, "loss": 0.5845, "step": 1838 }, { "epoch": 0.7537271376607408, "grad_norm": 0.9121804821975116, "learning_rate": 1.5057983093449124e-06, "loss": 0.5727, "step": 1839 }, { "epoch": 0.7541369947230904, "grad_norm": 0.8284171166451666, "learning_rate": 1.5010506300746435e-06, "loss": 0.5644, "step": 1840 }, { "epoch": 0.7545468517854398, "grad_norm": 0.8551708915887705, "learning_rate": 1.496309124924138e-06, "loss": 0.6113, "step": 1841 }, { "epoch": 0.7549567088477893, "grad_norm": 0.87017994155162, "learning_rate": 1.4915738022600862e-06, "loss": 0.5975, "step": 1842 }, { "epoch": 0.7553665659101388, "grad_norm": 0.9026989408562619, "learning_rate": 1.4868446704382694e-06, "loss": 0.576, "step": 1843 }, { "epoch": 0.7557764229724884, "grad_norm": 0.8990402568915792, "learning_rate": 1.4821217378035396e-06, "loss": 0.6385, "step": 1844 }, { "epoch": 0.7561862800348379, "grad_norm": 0.8773246154644005, "learning_rate": 1.4774050126898164e-06, "loss": 0.6332, "step": 1845 }, { "epoch": 0.7565961370971873, "grad_norm": 0.9010469611469428, "learning_rate": 1.4726945034200597e-06, "loss": 0.6273, "step": 1846 }, { "epoch": 0.7570059941595368, "grad_norm": 0.8441086402976813, "learning_rate": 1.4679902183062673e-06, "loss": 0.5664, "step": 1847 }, { "epoch": 0.7574158512218864, "grad_norm": 0.8834862858452494, "learning_rate": 1.463292165649447e-06, "loss": 0.5463, "step": 1848 }, { "epoch": 0.7578257082842359, "grad_norm": 0.9020569325472798, "learning_rate": 1.4586003537396171e-06, "loss": 0.6237, "step": 1849 }, { "epoch": 0.7582355653465854, "grad_norm": 0.8723327287948588, "learning_rate": 1.453914790855776e-06, "loss": 0.5915, "step": 1850 }, { "epoch": 0.7586454224089348, "grad_norm": 0.9209783654863992, "learning_rate": 1.4492354852659024e-06, "loss": 0.6095, "step": 1851 }, { "epoch": 0.7590552794712844, "grad_norm": 0.9167828759695262, "learning_rate": 1.4445624452269258e-06, "loss": 0.6591, "step": 1852 }, { "epoch": 0.7594651365336339, "grad_norm": 0.8552049737910455, "learning_rate": 1.4398956789847285e-06, "loss": 0.6072, "step": 1853 }, { "epoch": 0.7598749935959834, "grad_norm": 0.864786477838781, "learning_rate": 1.435235194774115e-06, "loss": 0.6338, "step": 1854 }, { "epoch": 0.760284850658333, "grad_norm": 0.8939880012205083, "learning_rate": 1.4305810008188064e-06, "loss": 0.5821, "step": 1855 }, { "epoch": 0.7606947077206824, "grad_norm": 0.8799228474835237, "learning_rate": 1.425933105331429e-06, "loss": 0.5825, "step": 1856 }, { "epoch": 0.7611045647830319, "grad_norm": 0.9191651154656485, "learning_rate": 1.4212915165134877e-06, "loss": 0.5997, "step": 1857 }, { "epoch": 0.7615144218453814, "grad_norm": 0.9532072892580656, "learning_rate": 1.4166562425553658e-06, "loss": 0.571, "step": 1858 }, { "epoch": 0.761924278907731, "grad_norm": 0.8532465073582729, "learning_rate": 1.4120272916362981e-06, "loss": 0.591, "step": 1859 }, { "epoch": 0.7623341359700805, "grad_norm": 0.8247073580200713, "learning_rate": 1.4074046719243645e-06, "loss": 0.6214, "step": 1860 }, { "epoch": 0.7627439930324299, "grad_norm": 0.9544008219127902, "learning_rate": 1.402788391576475e-06, "loss": 0.6087, "step": 1861 }, { "epoch": 0.7631538500947794, "grad_norm": 0.8681464754519892, "learning_rate": 1.3981784587383485e-06, "loss": 0.5601, "step": 1862 }, { "epoch": 0.763563707157129, "grad_norm": 0.8922688429630601, "learning_rate": 1.393574881544509e-06, "loss": 0.5661, "step": 1863 }, { "epoch": 0.7639735642194785, "grad_norm": 0.9642858738611146, "learning_rate": 1.3889776681182592e-06, "loss": 0.5936, "step": 1864 }, { "epoch": 0.764383421281828, "grad_norm": 0.9450487375075157, "learning_rate": 1.3843868265716798e-06, "loss": 0.5991, "step": 1865 }, { "epoch": 0.7647932783441774, "grad_norm": 0.8522766419102317, "learning_rate": 1.3798023650056003e-06, "loss": 0.6034, "step": 1866 }, { "epoch": 0.765203135406527, "grad_norm": 0.824612686421063, "learning_rate": 1.3752242915095993e-06, "loss": 0.5932, "step": 1867 }, { "epoch": 0.7656129924688765, "grad_norm": 0.8175894404339339, "learning_rate": 1.370652614161979e-06, "loss": 0.5663, "step": 1868 }, { "epoch": 0.766022849531226, "grad_norm": 1.0154587454935202, "learning_rate": 1.3660873410297542e-06, "loss": 0.6249, "step": 1869 }, { "epoch": 0.7664327065935755, "grad_norm": 0.8422128507072014, "learning_rate": 1.3615284801686447e-06, "loss": 0.5961, "step": 1870 }, { "epoch": 0.766842563655925, "grad_norm": 0.826362486122805, "learning_rate": 1.356976039623048e-06, "loss": 0.5233, "step": 1871 }, { "epoch": 0.7672524207182745, "grad_norm": 0.9562175956016475, "learning_rate": 1.35243002742604e-06, "loss": 0.5745, "step": 1872 }, { "epoch": 0.767662277780624, "grad_norm": 0.9159015781205679, "learning_rate": 1.3478904515993464e-06, "loss": 0.6005, "step": 1873 }, { "epoch": 0.7680721348429735, "grad_norm": 0.9010277515811626, "learning_rate": 1.3433573201533417e-06, "loss": 0.6083, "step": 1874 }, { "epoch": 0.7684819919053231, "grad_norm": 0.84810667408981, "learning_rate": 1.3388306410870238e-06, "loss": 0.5916, "step": 1875 }, { "epoch": 0.7688918489676725, "grad_norm": 0.8193933921303916, "learning_rate": 1.33431042238801e-06, "loss": 0.5469, "step": 1876 }, { "epoch": 0.769301706030022, "grad_norm": 0.8765067584134995, "learning_rate": 1.3297966720325112e-06, "loss": 0.5981, "step": 1877 }, { "epoch": 0.7697115630923715, "grad_norm": 0.9211663034616906, "learning_rate": 1.3252893979853304e-06, "loss": 0.5956, "step": 1878 }, { "epoch": 0.7701214201547211, "grad_norm": 0.9148486908976645, "learning_rate": 1.3207886081998423e-06, "loss": 0.5694, "step": 1879 }, { "epoch": 0.7705312772170706, "grad_norm": 0.9018722728863999, "learning_rate": 1.3162943106179748e-06, "loss": 0.5657, "step": 1880 }, { "epoch": 0.77094113427942, "grad_norm": 0.8307635964021953, "learning_rate": 1.3118065131702067e-06, "loss": 0.5732, "step": 1881 }, { "epoch": 0.7713509913417695, "grad_norm": 0.8759106456731064, "learning_rate": 1.30732522377554e-06, "loss": 0.6159, "step": 1882 }, { "epoch": 0.7717608484041191, "grad_norm": 0.877149631164817, "learning_rate": 1.3028504503414996e-06, "loss": 0.6325, "step": 1883 }, { "epoch": 0.7721707054664686, "grad_norm": 0.9444576903659722, "learning_rate": 1.2983822007641073e-06, "loss": 0.6396, "step": 1884 }, { "epoch": 0.7725805625288181, "grad_norm": 0.8480389836774167, "learning_rate": 1.2939204829278733e-06, "loss": 0.5563, "step": 1885 }, { "epoch": 0.7729904195911675, "grad_norm": 0.907172093024098, "learning_rate": 1.2894653047057875e-06, "loss": 0.6264, "step": 1886 }, { "epoch": 0.7734002766535171, "grad_norm": 0.924204807055541, "learning_rate": 1.2850166739592934e-06, "loss": 0.6162, "step": 1887 }, { "epoch": 0.7738101337158666, "grad_norm": 0.9423258400058596, "learning_rate": 1.2805745985382867e-06, "loss": 0.6238, "step": 1888 }, { "epoch": 0.7742199907782161, "grad_norm": 0.8920992486546333, "learning_rate": 1.2761390862810907e-06, "loss": 0.5445, "step": 1889 }, { "epoch": 0.7746298478405657, "grad_norm": 0.8920022407817766, "learning_rate": 1.2717101450144537e-06, "loss": 0.588, "step": 1890 }, { "epoch": 0.7750397049029151, "grad_norm": 0.8027262506001859, "learning_rate": 1.2672877825535223e-06, "loss": 0.5773, "step": 1891 }, { "epoch": 0.7754495619652646, "grad_norm": 0.9138321770003313, "learning_rate": 1.2628720067018402e-06, "loss": 0.6437, "step": 1892 }, { "epoch": 0.7758594190276141, "grad_norm": 0.9795148170525917, "learning_rate": 1.2584628252513232e-06, "loss": 0.585, "step": 1893 }, { "epoch": 0.7762692760899637, "grad_norm": 0.8296712478650149, "learning_rate": 1.254060245982256e-06, "loss": 0.5331, "step": 1894 }, { "epoch": 0.7766791331523132, "grad_norm": 0.9212868700964323, "learning_rate": 1.2496642766632716e-06, "loss": 0.6059, "step": 1895 }, { "epoch": 0.7770889902146626, "grad_norm": 0.903368156749697, "learning_rate": 1.245274925051337e-06, "loss": 0.602, "step": 1896 }, { "epoch": 0.7774988472770121, "grad_norm": 0.9110975310461027, "learning_rate": 1.240892198891745e-06, "loss": 0.6414, "step": 1897 }, { "epoch": 0.7779087043393617, "grad_norm": 0.9068967289458066, "learning_rate": 1.2365161059180942e-06, "loss": 0.5695, "step": 1898 }, { "epoch": 0.7783185614017112, "grad_norm": 0.8736137307692283, "learning_rate": 1.232146653852283e-06, "loss": 0.6249, "step": 1899 }, { "epoch": 0.7787284184640607, "grad_norm": 0.8275222559595068, "learning_rate": 1.227783850404487e-06, "loss": 0.5844, "step": 1900 }, { "epoch": 0.7791382755264101, "grad_norm": 0.8912404918264887, "learning_rate": 1.2234277032731506e-06, "loss": 0.5735, "step": 1901 }, { "epoch": 0.7795481325887597, "grad_norm": 0.8590404051774451, "learning_rate": 1.219078220144977e-06, "loss": 0.5095, "step": 1902 }, { "epoch": 0.7799579896511092, "grad_norm": 0.8820676306571729, "learning_rate": 1.2147354086949032e-06, "loss": 0.5634, "step": 1903 }, { "epoch": 0.7803678467134587, "grad_norm": 0.8872702095566435, "learning_rate": 1.2103992765861017e-06, "loss": 0.604, "step": 1904 }, { "epoch": 0.7807777037758082, "grad_norm": 0.8806421047226447, "learning_rate": 1.206069831469951e-06, "loss": 0.6483, "step": 1905 }, { "epoch": 0.7811875608381577, "grad_norm": 0.903990994961449, "learning_rate": 1.2017470809860365e-06, "loss": 0.5971, "step": 1906 }, { "epoch": 0.7815974179005072, "grad_norm": 0.9307581187605714, "learning_rate": 1.1974310327621248e-06, "loss": 0.5973, "step": 1907 }, { "epoch": 0.7820072749628567, "grad_norm": 0.890982150733259, "learning_rate": 1.1931216944141622e-06, "loss": 0.5988, "step": 1908 }, { "epoch": 0.7824171320252062, "grad_norm": 0.9050663155129954, "learning_rate": 1.1888190735462486e-06, "loss": 0.5991, "step": 1909 }, { "epoch": 0.7828269890875558, "grad_norm": 0.887199466063509, "learning_rate": 1.184523177750636e-06, "loss": 0.6486, "step": 1910 }, { "epoch": 0.7832368461499052, "grad_norm": 0.9141559931425783, "learning_rate": 1.1802340146077045e-06, "loss": 0.5494, "step": 1911 }, { "epoch": 0.7836467032122547, "grad_norm": 0.9040462292583253, "learning_rate": 1.175951591685958e-06, "loss": 0.5768, "step": 1912 }, { "epoch": 0.7840565602746042, "grad_norm": 0.94180897773021, "learning_rate": 1.171675916542007e-06, "loss": 0.5952, "step": 1913 }, { "epoch": 0.7844664173369538, "grad_norm": 0.857490210102407, "learning_rate": 1.1674069967205504e-06, "loss": 0.566, "step": 1914 }, { "epoch": 0.7848762743993033, "grad_norm": 0.8462279534824871, "learning_rate": 1.163144839754372e-06, "loss": 0.5422, "step": 1915 }, { "epoch": 0.7852861314616527, "grad_norm": 0.8989709884700051, "learning_rate": 1.1588894531643208e-06, "loss": 0.6122, "step": 1916 }, { "epoch": 0.7856959885240022, "grad_norm": 0.8607222429610976, "learning_rate": 1.154640844459295e-06, "loss": 0.5758, "step": 1917 }, { "epoch": 0.7861058455863518, "grad_norm": 0.985983624053414, "learning_rate": 1.1503990211362404e-06, "loss": 0.6183, "step": 1918 }, { "epoch": 0.7865157026487013, "grad_norm": 0.8842416126305849, "learning_rate": 1.1461639906801225e-06, "loss": 0.5929, "step": 1919 }, { "epoch": 0.7869255597110508, "grad_norm": 0.9506051937627028, "learning_rate": 1.1419357605639231e-06, "loss": 0.6139, "step": 1920 }, { "epoch": 0.7873354167734002, "grad_norm": 0.8354992416762942, "learning_rate": 1.1377143382486256e-06, "loss": 0.6084, "step": 1921 }, { "epoch": 0.7877452738357498, "grad_norm": 0.8749108987176284, "learning_rate": 1.1334997311832003e-06, "loss": 0.599, "step": 1922 }, { "epoch": 0.7881551308980993, "grad_norm": 0.8990142447476395, "learning_rate": 1.1292919468045876e-06, "loss": 0.5981, "step": 1923 }, { "epoch": 0.7885649879604488, "grad_norm": 0.8049600454786139, "learning_rate": 1.1250909925376963e-06, "loss": 0.5221, "step": 1924 }, { "epoch": 0.7889748450227984, "grad_norm": 0.8438659311066307, "learning_rate": 1.1208968757953742e-06, "loss": 0.5863, "step": 1925 }, { "epoch": 0.7893847020851478, "grad_norm": 0.8918025438006278, "learning_rate": 1.116709603978412e-06, "loss": 0.599, "step": 1926 }, { "epoch": 0.7897945591474973, "grad_norm": 0.8639387138904571, "learning_rate": 1.1125291844755153e-06, "loss": 0.6177, "step": 1927 }, { "epoch": 0.7902044162098468, "grad_norm": 0.9008198690199652, "learning_rate": 1.1083556246633047e-06, "loss": 0.6277, "step": 1928 }, { "epoch": 0.7906142732721964, "grad_norm": 0.9138527041099548, "learning_rate": 1.1041889319062898e-06, "loss": 0.5729, "step": 1929 }, { "epoch": 0.7910241303345458, "grad_norm": 0.8277274065781082, "learning_rate": 1.100029113556868e-06, "loss": 0.5899, "step": 1930 }, { "epoch": 0.7914339873968953, "grad_norm": 0.8665314602674099, "learning_rate": 1.0958761769553061e-06, "loss": 0.58, "step": 1931 }, { "epoch": 0.7918438444592448, "grad_norm": 0.8902794140201253, "learning_rate": 1.0917301294297244e-06, "loss": 0.564, "step": 1932 }, { "epoch": 0.7922537015215944, "grad_norm": 0.9005525804392157, "learning_rate": 1.0875909782960887e-06, "loss": 0.6018, "step": 1933 }, { "epoch": 0.7926635585839439, "grad_norm": 0.8895492067497688, "learning_rate": 1.083458730858195e-06, "loss": 0.6105, "step": 1934 }, { "epoch": 0.7930734156462933, "grad_norm": 0.9604163844310234, "learning_rate": 1.0793333944076596e-06, "loss": 0.6285, "step": 1935 }, { "epoch": 0.7934832727086428, "grad_norm": 0.942924701800689, "learning_rate": 1.0752149762239007e-06, "loss": 0.6145, "step": 1936 }, { "epoch": 0.7938931297709924, "grad_norm": 0.8815627799497991, "learning_rate": 1.0711034835741323e-06, "loss": 0.6203, "step": 1937 }, { "epoch": 0.7943029868333419, "grad_norm": 0.8576137000419685, "learning_rate": 1.0669989237133437e-06, "loss": 0.574, "step": 1938 }, { "epoch": 0.7947128438956914, "grad_norm": 0.8945902840790864, "learning_rate": 1.062901303884294e-06, "loss": 0.6203, "step": 1939 }, { "epoch": 0.7951227009580408, "grad_norm": 0.8738806712271309, "learning_rate": 1.0588106313174968e-06, "loss": 0.5788, "step": 1940 }, { "epoch": 0.7955325580203904, "grad_norm": 0.9134601097286962, "learning_rate": 1.0547269132312027e-06, "loss": 0.531, "step": 1941 }, { "epoch": 0.7959424150827399, "grad_norm": 0.8599200355478647, "learning_rate": 1.0506501568313959e-06, "loss": 0.5685, "step": 1942 }, { "epoch": 0.7963522721450894, "grad_norm": 0.839952719550253, "learning_rate": 1.0465803693117705e-06, "loss": 0.5594, "step": 1943 }, { "epoch": 0.7967621292074389, "grad_norm": 0.892303311460195, "learning_rate": 1.04251755785373e-06, "loss": 0.6201, "step": 1944 }, { "epoch": 0.7971719862697884, "grad_norm": 0.8996170168953704, "learning_rate": 1.0384617296263617e-06, "loss": 0.6238, "step": 1945 }, { "epoch": 0.7975818433321379, "grad_norm": 0.9510357103044653, "learning_rate": 1.0344128917864365e-06, "loss": 0.6049, "step": 1946 }, { "epoch": 0.7979917003944874, "grad_norm": 0.8736102083130333, "learning_rate": 1.030371051478385e-06, "loss": 0.5928, "step": 1947 }, { "epoch": 0.7984015574568369, "grad_norm": 0.9720631642236364, "learning_rate": 1.0263362158342948e-06, "loss": 0.6194, "step": 1948 }, { "epoch": 0.7988114145191865, "grad_norm": 0.8822117475048493, "learning_rate": 1.0223083919738907e-06, "loss": 0.6043, "step": 1949 }, { "epoch": 0.7992212715815359, "grad_norm": 0.8149732195753991, "learning_rate": 1.0182875870045238e-06, "loss": 0.58, "step": 1950 }, { "epoch": 0.7996311286438854, "grad_norm": 0.9265778821149682, "learning_rate": 1.0142738080211634e-06, "loss": 0.6043, "step": 1951 }, { "epoch": 0.8000409857062349, "grad_norm": 0.8992145042220231, "learning_rate": 1.0102670621063776e-06, "loss": 0.5668, "step": 1952 }, { "epoch": 0.8004508427685845, "grad_norm": 0.9312924825456177, "learning_rate": 1.0062673563303265e-06, "loss": 0.5866, "step": 1953 }, { "epoch": 0.800860699830934, "grad_norm": 0.8849067670345098, "learning_rate": 1.0022746977507458e-06, "loss": 0.5921, "step": 1954 }, { "epoch": 0.8012705568932834, "grad_norm": 0.8945067185148857, "learning_rate": 9.98289093412938e-07, "loss": 0.6121, "step": 1955 }, { "epoch": 0.8016804139556329, "grad_norm": 0.8584037516082549, "learning_rate": 9.943105503497546e-07, "loss": 0.603, "step": 1956 }, { "epoch": 0.8020902710179825, "grad_norm": 0.8805782893104103, "learning_rate": 9.903390755815907e-07, "loss": 0.6102, "step": 1957 }, { "epoch": 0.802500128080332, "grad_norm": 0.8584880668995438, "learning_rate": 9.863746761163677e-07, "loss": 0.5895, "step": 1958 }, { "epoch": 0.8029099851426815, "grad_norm": 0.8837330705022524, "learning_rate": 9.8241735894952e-07, "loss": 0.604, "step": 1959 }, { "epoch": 0.8033198422050309, "grad_norm": 0.8409561238844894, "learning_rate": 9.784671310639888e-07, "loss": 0.6068, "step": 1960 }, { "epoch": 0.8037296992673805, "grad_norm": 0.8909117708769527, "learning_rate": 9.745239994302013e-07, "loss": 0.6035, "step": 1961 }, { "epoch": 0.80413955632973, "grad_norm": 0.8392792007086016, "learning_rate": 9.705879710060679e-07, "loss": 0.5335, "step": 1962 }, { "epoch": 0.8045494133920795, "grad_norm": 0.8426574395734817, "learning_rate": 9.66659052736959e-07, "loss": 0.5679, "step": 1963 }, { "epoch": 0.804959270454429, "grad_norm": 0.9065782840179462, "learning_rate": 9.62737251555706e-07, "loss": 0.5985, "step": 1964 }, { "epoch": 0.8053691275167785, "grad_norm": 0.8755024072837768, "learning_rate": 9.588225743825747e-07, "loss": 0.5939, "step": 1965 }, { "epoch": 0.805778984579128, "grad_norm": 0.7797432933742591, "learning_rate": 9.549150281252633e-07, "loss": 0.5173, "step": 1966 }, { "epoch": 0.8061888416414775, "grad_norm": 0.8321277373539501, "learning_rate": 9.510146196788883e-07, "loss": 0.5499, "step": 1967 }, { "epoch": 0.806598698703827, "grad_norm": 0.9605491313814586, "learning_rate": 9.471213559259684e-07, "loss": 0.6312, "step": 1968 }, { "epoch": 0.8070085557661766, "grad_norm": 0.8527761293522313, "learning_rate": 9.432352437364173e-07, "loss": 0.557, "step": 1969 }, { "epoch": 0.807418412828526, "grad_norm": 0.899422190196483, "learning_rate": 9.393562899675268e-07, "loss": 0.6161, "step": 1970 }, { "epoch": 0.8078282698908755, "grad_norm": 0.8595169904844934, "learning_rate": 9.354845014639613e-07, "loss": 0.6187, "step": 1971 }, { "epoch": 0.8082381269532251, "grad_norm": 0.9059118344216212, "learning_rate": 9.316198850577357e-07, "loss": 0.5565, "step": 1972 }, { "epoch": 0.8086479840155746, "grad_norm": 0.9082192568863762, "learning_rate": 9.277624475682163e-07, "loss": 0.5755, "step": 1973 }, { "epoch": 0.8090578410779241, "grad_norm": 0.8464207195792819, "learning_rate": 9.239121958020941e-07, "loss": 0.5536, "step": 1974 }, { "epoch": 0.8094676981402735, "grad_norm": 0.8680342133580455, "learning_rate": 9.20069136553387e-07, "loss": 0.6091, "step": 1975 }, { "epoch": 0.8098775552026231, "grad_norm": 0.9243319075970977, "learning_rate": 9.162332766034193e-07, "loss": 0.6107, "step": 1976 }, { "epoch": 0.8102874122649726, "grad_norm": 0.8937738294734132, "learning_rate": 9.124046227208083e-07, "loss": 0.6299, "step": 1977 }, { "epoch": 0.8106972693273221, "grad_norm": 0.8851903881013399, "learning_rate": 9.08583181661461e-07, "loss": 0.6141, "step": 1978 }, { "epoch": 0.8111071263896716, "grad_norm": 0.8500864327455063, "learning_rate": 9.047689601685522e-07, "loss": 0.5651, "step": 1979 }, { "epoch": 0.8115169834520211, "grad_norm": 0.8891244390738063, "learning_rate": 9.009619649725221e-07, "loss": 0.5965, "step": 1980 }, { "epoch": 0.8119268405143706, "grad_norm": 0.8877397889228426, "learning_rate": 8.971622027910554e-07, "loss": 0.5762, "step": 1981 }, { "epoch": 0.8123366975767201, "grad_norm": 0.9094452033523018, "learning_rate": 8.933696803290742e-07, "loss": 0.5967, "step": 1982 }, { "epoch": 0.8127465546390696, "grad_norm": 0.8944846084927052, "learning_rate": 8.895844042787288e-07, "loss": 0.6065, "step": 1983 }, { "epoch": 0.8131564117014192, "grad_norm": 0.9501003907064649, "learning_rate": 8.85806381319379e-07, "loss": 0.5962, "step": 1984 }, { "epoch": 0.8135662687637686, "grad_norm": 0.899978618752124, "learning_rate": 8.820356181175893e-07, "loss": 0.611, "step": 1985 }, { "epoch": 0.8139761258261181, "grad_norm": 0.8182013658865862, "learning_rate": 8.782721213271106e-07, "loss": 0.5708, "step": 1986 }, { "epoch": 0.8143859828884676, "grad_norm": 0.8936236254471126, "learning_rate": 8.745158975888756e-07, "loss": 0.6075, "step": 1987 }, { "epoch": 0.8147958399508172, "grad_norm": 0.8855467947511614, "learning_rate": 8.707669535309793e-07, "loss": 0.5812, "step": 1988 }, { "epoch": 0.8152056970131667, "grad_norm": 0.9049462133639875, "learning_rate": 8.670252957686748e-07, "loss": 0.6048, "step": 1989 }, { "epoch": 0.8156155540755161, "grad_norm": 0.8878965726967509, "learning_rate": 8.632909309043536e-07, "loss": 0.6116, "step": 1990 }, { "epoch": 0.8160254111378656, "grad_norm": 0.8797623230970256, "learning_rate": 8.595638655275434e-07, "loss": 0.6494, "step": 1991 }, { "epoch": 0.8164352682002152, "grad_norm": 0.8829127048420798, "learning_rate": 8.558441062148898e-07, "loss": 0.6063, "step": 1992 }, { "epoch": 0.8168451252625647, "grad_norm": 0.9064277536416342, "learning_rate": 8.521316595301438e-07, "loss": 0.5609, "step": 1993 }, { "epoch": 0.8172549823249142, "grad_norm": 0.8938832731751305, "learning_rate": 8.484265320241563e-07, "loss": 0.5684, "step": 1994 }, { "epoch": 0.8176648393872636, "grad_norm": 0.9056817281309898, "learning_rate": 8.447287302348606e-07, "loss": 0.606, "step": 1995 }, { "epoch": 0.8180746964496132, "grad_norm": 0.9159078479550525, "learning_rate": 8.410382606872652e-07, "loss": 0.6078, "step": 1996 }, { "epoch": 0.8184845535119627, "grad_norm": 0.9026802673054497, "learning_rate": 8.373551298934395e-07, "loss": 0.5924, "step": 1997 }, { "epoch": 0.8188944105743122, "grad_norm": 0.92800791407488, "learning_rate": 8.33679344352501e-07, "loss": 0.5434, "step": 1998 }, { "epoch": 0.8193042676366618, "grad_norm": 0.8808002359960316, "learning_rate": 8.30010910550611e-07, "loss": 0.6497, "step": 1999 }, { "epoch": 0.8197141246990112, "grad_norm": 0.8937577950766179, "learning_rate": 8.263498349609533e-07, "loss": 0.5808, "step": 2000 }, { "epoch": 0.8201239817613607, "grad_norm": 0.9133111485335524, "learning_rate": 8.226961240437315e-07, "loss": 0.5684, "step": 2001 }, { "epoch": 0.8205338388237102, "grad_norm": 0.8412059875368931, "learning_rate": 8.190497842461498e-07, "loss": 0.599, "step": 2002 }, { "epoch": 0.8209436958860598, "grad_norm": 0.9507953203284668, "learning_rate": 8.154108220024104e-07, "loss": 0.6004, "step": 2003 }, { "epoch": 0.8213535529484093, "grad_norm": 0.9140620910272804, "learning_rate": 8.117792437336924e-07, "loss": 0.6403, "step": 2004 }, { "epoch": 0.8217634100107587, "grad_norm": 0.8664392082353177, "learning_rate": 8.081550558481499e-07, "loss": 0.6059, "step": 2005 }, { "epoch": 0.8221732670731082, "grad_norm": 0.9414987609704617, "learning_rate": 8.045382647408917e-07, "loss": 0.6066, "step": 2006 }, { "epoch": 0.8225831241354578, "grad_norm": 0.9920322376202505, "learning_rate": 8.009288767939793e-07, "loss": 0.6107, "step": 2007 }, { "epoch": 0.8229929811978073, "grad_norm": 0.8084361600976814, "learning_rate": 7.97326898376406e-07, "loss": 0.5812, "step": 2008 }, { "epoch": 0.8234028382601568, "grad_norm": 0.8517864676508012, "learning_rate": 7.937323358440935e-07, "loss": 0.5632, "step": 2009 }, { "epoch": 0.8238126953225062, "grad_norm": 0.8669460091516806, "learning_rate": 7.901451955398792e-07, "loss": 0.5886, "step": 2010 }, { "epoch": 0.8242225523848558, "grad_norm": 0.9217770694268587, "learning_rate": 7.865654837934988e-07, "loss": 0.5899, "step": 2011 }, { "epoch": 0.8246324094472053, "grad_norm": 0.923855231857185, "learning_rate": 7.829932069215845e-07, "loss": 0.6148, "step": 2012 }, { "epoch": 0.8250422665095548, "grad_norm": 0.9259769383224292, "learning_rate": 7.794283712276463e-07, "loss": 0.5736, "step": 2013 }, { "epoch": 0.8254521235719043, "grad_norm": 0.9232979433438705, "learning_rate": 7.758709830020638e-07, "loss": 0.5907, "step": 2014 }, { "epoch": 0.8258619806342538, "grad_norm": 0.8533661649360548, "learning_rate": 7.723210485220755e-07, "loss": 0.5454, "step": 2015 }, { "epoch": 0.8262718376966033, "grad_norm": 0.91723497594892, "learning_rate": 7.687785740517701e-07, "loss": 0.611, "step": 2016 }, { "epoch": 0.8266816947589528, "grad_norm": 0.9046886009213528, "learning_rate": 7.652435658420676e-07, "loss": 0.5913, "step": 2017 }, { "epoch": 0.8270915518213023, "grad_norm": 0.8535748025923917, "learning_rate": 7.61716030130717e-07, "loss": 0.5249, "step": 2018 }, { "epoch": 0.8275014088836519, "grad_norm": 0.8894268458997914, "learning_rate": 7.581959731422811e-07, "loss": 0.558, "step": 2019 }, { "epoch": 0.8279112659460013, "grad_norm": 0.9016164601311275, "learning_rate": 7.546834010881238e-07, "loss": 0.5731, "step": 2020 }, { "epoch": 0.8283211230083508, "grad_norm": 0.8497997555294498, "learning_rate": 7.511783201664053e-07, "loss": 0.5604, "step": 2021 }, { "epoch": 0.8287309800707003, "grad_norm": 0.9868833055380724, "learning_rate": 7.476807365620625e-07, "loss": 0.68, "step": 2022 }, { "epoch": 0.8291408371330499, "grad_norm": 0.8381061111334754, "learning_rate": 7.441906564468071e-07, "loss": 0.6087, "step": 2023 }, { "epoch": 0.8295506941953994, "grad_norm": 0.9120664571734121, "learning_rate": 7.407080859791066e-07, "loss": 0.6339, "step": 2024 }, { "epoch": 0.8299605512577488, "grad_norm": 0.8905162013134806, "learning_rate": 7.372330313041809e-07, "loss": 0.5941, "step": 2025 }, { "epoch": 0.8303704083200983, "grad_norm": 0.8250828820567961, "learning_rate": 7.337654985539844e-07, "loss": 0.5504, "step": 2026 }, { "epoch": 0.8307802653824479, "grad_norm": 0.8959525044510788, "learning_rate": 7.303054938472015e-07, "loss": 0.6026, "step": 2027 }, { "epoch": 0.8311901224447974, "grad_norm": 0.8495945154655385, "learning_rate": 7.268530232892318e-07, "loss": 0.5718, "step": 2028 }, { "epoch": 0.8315999795071469, "grad_norm": 0.8901536942014083, "learning_rate": 7.234080929721804e-07, "loss": 0.5972, "step": 2029 }, { "epoch": 0.8320098365694963, "grad_norm": 0.8818503851444517, "learning_rate": 7.199707089748459e-07, "loss": 0.6564, "step": 2030 }, { "epoch": 0.8324196936318459, "grad_norm": 0.9138264741383277, "learning_rate": 7.165408773627108e-07, "loss": 0.573, "step": 2031 }, { "epoch": 0.8328295506941954, "grad_norm": 0.9091101232149591, "learning_rate": 7.131186041879357e-07, "loss": 0.5775, "step": 2032 }, { "epoch": 0.8332394077565449, "grad_norm": 0.8696451575062535, "learning_rate": 7.097038954893364e-07, "loss": 0.6152, "step": 2033 }, { "epoch": 0.8336492648188945, "grad_norm": 0.8698864528263816, "learning_rate": 7.062967572923885e-07, "loss": 0.5857, "step": 2034 }, { "epoch": 0.8340591218812439, "grad_norm": 0.9316461513429456, "learning_rate": 7.028971956092017e-07, "loss": 0.5429, "step": 2035 }, { "epoch": 0.8344689789435934, "grad_norm": 0.8581056730135299, "learning_rate": 6.995052164385213e-07, "loss": 0.53, "step": 2036 }, { "epoch": 0.8348788360059429, "grad_norm": 0.9123316419618139, "learning_rate": 6.961208257657126e-07, "loss": 0.5938, "step": 2037 }, { "epoch": 0.8352886930682925, "grad_norm": 0.90518884142337, "learning_rate": 6.927440295627469e-07, "loss": 0.6125, "step": 2038 }, { "epoch": 0.835698550130642, "grad_norm": 0.914311803754745, "learning_rate": 6.893748337881995e-07, "loss": 0.5885, "step": 2039 }, { "epoch": 0.8361084071929914, "grad_norm": 0.8973055936279172, "learning_rate": 6.86013244387228e-07, "loss": 0.5983, "step": 2040 }, { "epoch": 0.8365182642553409, "grad_norm": 0.9032400398804291, "learning_rate": 6.826592672915755e-07, "loss": 0.6071, "step": 2041 }, { "epoch": 0.8369281213176905, "grad_norm": 0.9279908146627429, "learning_rate": 6.793129084195466e-07, "loss": 0.5781, "step": 2042 }, { "epoch": 0.83733797838004, "grad_norm": 0.8939450941717418, "learning_rate": 6.759741736760062e-07, "loss": 0.6051, "step": 2043 }, { "epoch": 0.8377478354423895, "grad_norm": 0.8532535831387348, "learning_rate": 6.726430689523644e-07, "loss": 0.5796, "step": 2044 }, { "epoch": 0.8381576925047389, "grad_norm": 0.8769628573436061, "learning_rate": 6.693196001265701e-07, "loss": 0.6282, "step": 2045 }, { "epoch": 0.8385675495670885, "grad_norm": 0.9122087265373554, "learning_rate": 6.660037730630953e-07, "loss": 0.6251, "step": 2046 }, { "epoch": 0.838977406629438, "grad_norm": 0.9420047153213889, "learning_rate": 6.62695593612927e-07, "loss": 0.6185, "step": 2047 }, { "epoch": 0.8393872636917875, "grad_norm": 0.8631431472906177, "learning_rate": 6.593950676135624e-07, "loss": 0.5705, "step": 2048 }, { "epoch": 0.839797120754137, "grad_norm": 0.9778110151635642, "learning_rate": 6.561022008889884e-07, "loss": 0.6842, "step": 2049 }, { "epoch": 0.8402069778164865, "grad_norm": 0.8208063208257476, "learning_rate": 6.528169992496802e-07, "loss": 0.5304, "step": 2050 }, { "epoch": 0.840616834878836, "grad_norm": 0.8511160117190656, "learning_rate": 6.495394684925843e-07, "loss": 0.5431, "step": 2051 }, { "epoch": 0.8410266919411855, "grad_norm": 0.9177785357827354, "learning_rate": 6.462696144011149e-07, "loss": 0.6495, "step": 2052 }, { "epoch": 0.841436549003535, "grad_norm": 0.8699092473248573, "learning_rate": 6.430074427451371e-07, "loss": 0.6085, "step": 2053 }, { "epoch": 0.8418464060658846, "grad_norm": 0.9449232425369916, "learning_rate": 6.397529592809615e-07, "loss": 0.6036, "step": 2054 }, { "epoch": 0.842256263128234, "grad_norm": 0.932610257465311, "learning_rate": 6.365061697513342e-07, "loss": 0.5986, "step": 2055 }, { "epoch": 0.8426661201905835, "grad_norm": 0.9346763601753311, "learning_rate": 6.332670798854201e-07, "loss": 0.6181, "step": 2056 }, { "epoch": 0.843075977252933, "grad_norm": 0.8488140338962235, "learning_rate": 6.300356953988024e-07, "loss": 0.6544, "step": 2057 }, { "epoch": 0.8434858343152826, "grad_norm": 0.9750441658981688, "learning_rate": 6.268120219934631e-07, "loss": 0.6138, "step": 2058 }, { "epoch": 0.8438956913776321, "grad_norm": 0.9314988477839027, "learning_rate": 6.235960653577828e-07, "loss": 0.6239, "step": 2059 }, { "epoch": 0.8443055484399815, "grad_norm": 0.9326025868390853, "learning_rate": 6.203878311665196e-07, "loss": 0.6424, "step": 2060 }, { "epoch": 0.844715405502331, "grad_norm": 0.9066289681885018, "learning_rate": 6.171873250808103e-07, "loss": 0.6074, "step": 2061 }, { "epoch": 0.8451252625646806, "grad_norm": 0.9482313922215743, "learning_rate": 6.139945527481512e-07, "loss": 0.6063, "step": 2062 }, { "epoch": 0.8455351196270301, "grad_norm": 0.9180749932442456, "learning_rate": 6.108095198023922e-07, "loss": 0.6281, "step": 2063 }, { "epoch": 0.8459449766893796, "grad_norm": 0.8724225133177839, "learning_rate": 6.076322318637301e-07, "loss": 0.6006, "step": 2064 }, { "epoch": 0.846354833751729, "grad_norm": 0.9370191471403171, "learning_rate": 6.044626945386894e-07, "loss": 0.5912, "step": 2065 }, { "epoch": 0.8467646908140786, "grad_norm": 0.8963060662815779, "learning_rate": 6.013009134201237e-07, "loss": 0.5506, "step": 2066 }, { "epoch": 0.8471745478764281, "grad_norm": 1.036461787491957, "learning_rate": 5.981468940871959e-07, "loss": 0.6403, "step": 2067 }, { "epoch": 0.8475844049387776, "grad_norm": 0.8042978925744757, "learning_rate": 5.950006421053773e-07, "loss": 0.5869, "step": 2068 }, { "epoch": 0.8479942620011272, "grad_norm": 0.9616360474876823, "learning_rate": 5.918621630264271e-07, "loss": 0.5907, "step": 2069 }, { "epoch": 0.8484041190634766, "grad_norm": 0.8578701738152952, "learning_rate": 5.887314623883944e-07, "loss": 0.5622, "step": 2070 }, { "epoch": 0.8488139761258261, "grad_norm": 0.8856372036162837, "learning_rate": 5.856085457156013e-07, "loss": 0.6114, "step": 2071 }, { "epoch": 0.8492238331881756, "grad_norm": 0.8838796031956833, "learning_rate": 5.82493418518632e-07, "loss": 0.5945, "step": 2072 }, { "epoch": 0.8496336902505252, "grad_norm": 0.8985554418623318, "learning_rate": 5.793860862943291e-07, "loss": 0.5873, "step": 2073 }, { "epoch": 0.8500435473128747, "grad_norm": 0.909677021814174, "learning_rate": 5.762865545257768e-07, "loss": 0.5865, "step": 2074 }, { "epoch": 0.8504534043752241, "grad_norm": 0.8450651007657664, "learning_rate": 5.731948286822992e-07, "loss": 0.5773, "step": 2075 }, { "epoch": 0.8508632614375736, "grad_norm": 0.8840692463855224, "learning_rate": 5.701109142194422e-07, "loss": 0.6121, "step": 2076 }, { "epoch": 0.8512731184999232, "grad_norm": 0.9149248579933463, "learning_rate": 5.670348165789718e-07, "loss": 0.5822, "step": 2077 }, { "epoch": 0.8516829755622727, "grad_norm": 0.8865422853634312, "learning_rate": 5.639665411888584e-07, "loss": 0.5554, "step": 2078 }, { "epoch": 0.8520928326246222, "grad_norm": 0.8718148511876513, "learning_rate": 5.609060934632682e-07, "loss": 0.6036, "step": 2079 }, { "epoch": 0.8525026896869716, "grad_norm": 0.9297185439353768, "learning_rate": 5.578534788025592e-07, "loss": 0.6481, "step": 2080 }, { "epoch": 0.8529125467493212, "grad_norm": 0.8354005293963193, "learning_rate": 5.54808702593263e-07, "loss": 0.5422, "step": 2081 }, { "epoch": 0.8533224038116707, "grad_norm": 0.9611079677089466, "learning_rate": 5.517717702080844e-07, "loss": 0.6232, "step": 2082 }, { "epoch": 0.8537322608740202, "grad_norm": 0.9365871377130434, "learning_rate": 5.487426870058815e-07, "loss": 0.6358, "step": 2083 }, { "epoch": 0.8541421179363697, "grad_norm": 0.9067335912009503, "learning_rate": 5.457214583316678e-07, "loss": 0.5837, "step": 2084 }, { "epoch": 0.8545519749987192, "grad_norm": 0.904733361936646, "learning_rate": 5.427080895165926e-07, "loss": 0.6006, "step": 2085 }, { "epoch": 0.8549618320610687, "grad_norm": 0.8910095004929571, "learning_rate": 5.397025858779392e-07, "loss": 0.5906, "step": 2086 }, { "epoch": 0.8553716891234182, "grad_norm": 0.9734385207988107, "learning_rate": 5.367049527191093e-07, "loss": 0.6717, "step": 2087 }, { "epoch": 0.8557815461857677, "grad_norm": 0.8811717191231156, "learning_rate": 5.337151953296188e-07, "loss": 0.6177, "step": 2088 }, { "epoch": 0.8561914032481173, "grad_norm": 0.9076424911401267, "learning_rate": 5.307333189850866e-07, "loss": 0.5988, "step": 2089 }, { "epoch": 0.8566012603104667, "grad_norm": 0.8769011996531583, "learning_rate": 5.277593289472227e-07, "loss": 0.5634, "step": 2090 }, { "epoch": 0.8570111173728162, "grad_norm": 0.9367003145036098, "learning_rate": 5.247932304638243e-07, "loss": 0.6084, "step": 2091 }, { "epoch": 0.8574209744351657, "grad_norm": 0.9496727279236847, "learning_rate": 5.218350287687596e-07, "loss": 0.6315, "step": 2092 }, { "epoch": 0.8578308314975153, "grad_norm": 0.8968313797954266, "learning_rate": 5.188847290819665e-07, "loss": 0.6011, "step": 2093 }, { "epoch": 0.8582406885598648, "grad_norm": 0.8942760409813149, "learning_rate": 5.159423366094369e-07, "loss": 0.5871, "step": 2094 }, { "epoch": 0.8586505456222142, "grad_norm": 0.8905278669604273, "learning_rate": 5.130078565432089e-07, "loss": 0.5781, "step": 2095 }, { "epoch": 0.8590604026845637, "grad_norm": 0.8780524701554937, "learning_rate": 5.100812940613609e-07, "loss": 0.6045, "step": 2096 }, { "epoch": 0.8594702597469133, "grad_norm": 0.9190709475548663, "learning_rate": 5.071626543279989e-07, "loss": 0.5948, "step": 2097 }, { "epoch": 0.8598801168092628, "grad_norm": 0.8944193039426347, "learning_rate": 5.042519424932512e-07, "loss": 0.5568, "step": 2098 }, { "epoch": 0.8602899738716123, "grad_norm": 0.8521360971947538, "learning_rate": 5.013491636932527e-07, "loss": 0.5724, "step": 2099 }, { "epoch": 0.8606998309339617, "grad_norm": 0.8944490806302615, "learning_rate": 4.984543230501443e-07, "loss": 0.5908, "step": 2100 }, { "epoch": 0.8611096879963113, "grad_norm": 0.9148537281789568, "learning_rate": 4.955674256720544e-07, "loss": 0.6044, "step": 2101 }, { "epoch": 0.8615195450586608, "grad_norm": 0.873959640120682, "learning_rate": 4.926884766531004e-07, "loss": 0.5875, "step": 2102 }, { "epoch": 0.8619294021210103, "grad_norm": 0.9375222442838818, "learning_rate": 4.898174810733703e-07, "loss": 0.6358, "step": 2103 }, { "epoch": 0.8623392591833599, "grad_norm": 0.8784099740961642, "learning_rate": 4.869544439989204e-07, "loss": 0.6078, "step": 2104 }, { "epoch": 0.8627491162457093, "grad_norm": 0.913545791357898, "learning_rate": 4.840993704817615e-07, "loss": 0.629, "step": 2105 }, { "epoch": 0.8631589733080588, "grad_norm": 0.8873686268038194, "learning_rate": 4.81252265559854e-07, "loss": 0.6498, "step": 2106 }, { "epoch": 0.8635688303704083, "grad_norm": 0.952627853878165, "learning_rate": 4.784131342570969e-07, "loss": 0.5324, "step": 2107 }, { "epoch": 0.8639786874327579, "grad_norm": 0.9138533866589095, "learning_rate": 4.7558198158331735e-07, "loss": 0.6172, "step": 2108 }, { "epoch": 0.8643885444951074, "grad_norm": 0.9033973756097475, "learning_rate": 4.727588125342669e-07, "loss": 0.6042, "step": 2109 }, { "epoch": 0.8647984015574568, "grad_norm": 0.8742411152368539, "learning_rate": 4.6994363209160733e-07, "loss": 0.5947, "step": 2110 }, { "epoch": 0.8652082586198063, "grad_norm": 0.9536270777110067, "learning_rate": 4.6713644522290324e-07, "loss": 0.58, "step": 2111 }, { "epoch": 0.8656181156821559, "grad_norm": 0.8718154097494986, "learning_rate": 4.643372568816157e-07, "loss": 0.6065, "step": 2112 }, { "epoch": 0.8660279727445054, "grad_norm": 0.8771738503819537, "learning_rate": 4.6154607200709246e-07, "loss": 0.5972, "step": 2113 }, { "epoch": 0.8664378298068549, "grad_norm": 0.8836379077464842, "learning_rate": 4.58762895524556e-07, "loss": 0.5744, "step": 2114 }, { "epoch": 0.8668476868692043, "grad_norm": 0.912471367918489, "learning_rate": 4.559877323451001e-07, "loss": 0.6185, "step": 2115 }, { "epoch": 0.8672575439315539, "grad_norm": 0.8975024271494856, "learning_rate": 4.532205873656781e-07, "loss": 0.5911, "step": 2116 }, { "epoch": 0.8676674009939034, "grad_norm": 0.9012172847071127, "learning_rate": 4.504614654690925e-07, "loss": 0.5907, "step": 2117 }, { "epoch": 0.8680772580562529, "grad_norm": 0.9029374956658528, "learning_rate": 4.477103715239922e-07, "loss": 0.5933, "step": 2118 }, { "epoch": 0.8684871151186024, "grad_norm": 0.8713105268821532, "learning_rate": 4.4496731038485675e-07, "loss": 0.6086, "step": 2119 }, { "epoch": 0.8688969721809519, "grad_norm": 0.8372420922136203, "learning_rate": 4.422322868919937e-07, "loss": 0.5685, "step": 2120 }, { "epoch": 0.8693068292433014, "grad_norm": 0.8586949806791252, "learning_rate": 4.395053058715254e-07, "loss": 0.6047, "step": 2121 }, { "epoch": 0.8697166863056509, "grad_norm": 0.8987064048637243, "learning_rate": 4.367863721353866e-07, "loss": 0.6032, "step": 2122 }, { "epoch": 0.8701265433680004, "grad_norm": 0.9201184368718575, "learning_rate": 4.340754904813066e-07, "loss": 0.6006, "step": 2123 }, { "epoch": 0.87053640043035, "grad_norm": 0.8789441037175114, "learning_rate": 4.3137266569281245e-07, "loss": 0.5869, "step": 2124 }, { "epoch": 0.8709462574926994, "grad_norm": 0.8928642371359647, "learning_rate": 4.286779025392085e-07, "loss": 0.5625, "step": 2125 }, { "epoch": 0.8713561145550489, "grad_norm": 0.9979708290656718, "learning_rate": 4.259912057755783e-07, "loss": 0.588, "step": 2126 }, { "epoch": 0.8717659716173984, "grad_norm": 0.8862328585451924, "learning_rate": 4.2331258014276956e-07, "loss": 0.6149, "step": 2127 }, { "epoch": 0.872175828679748, "grad_norm": 0.9426764097492047, "learning_rate": 4.206420303673875e-07, "loss": 0.6263, "step": 2128 }, { "epoch": 0.8725856857420975, "grad_norm": 0.871748211635695, "learning_rate": 4.1797956116178926e-07, "loss": 0.5582, "step": 2129 }, { "epoch": 0.8729955428044469, "grad_norm": 0.8320802053065114, "learning_rate": 4.1532517722407074e-07, "loss": 0.5923, "step": 2130 }, { "epoch": 0.8734053998667964, "grad_norm": 0.9403222501003953, "learning_rate": 4.1267888323806294e-07, "loss": 0.6288, "step": 2131 }, { "epoch": 0.873815256929146, "grad_norm": 0.9549420399735035, "learning_rate": 4.1004068387331953e-07, "loss": 0.6089, "step": 2132 }, { "epoch": 0.8742251139914955, "grad_norm": 0.8857594545542118, "learning_rate": 4.074105837851122e-07, "loss": 0.5789, "step": 2133 }, { "epoch": 0.874634971053845, "grad_norm": 0.9268718842177583, "learning_rate": 4.0478858761442253e-07, "loss": 0.575, "step": 2134 }, { "epoch": 0.8750448281161944, "grad_norm": 0.8887834563619261, "learning_rate": 4.021746999879278e-07, "loss": 0.5982, "step": 2135 }, { "epoch": 0.875454685178544, "grad_norm": 0.933833790851661, "learning_rate": 3.995689255180019e-07, "loss": 0.5758, "step": 2136 }, { "epoch": 0.8758645422408935, "grad_norm": 1.0005454759056844, "learning_rate": 3.9697126880269774e-07, "loss": 0.6225, "step": 2137 }, { "epoch": 0.876274399303243, "grad_norm": 0.8883127059943112, "learning_rate": 3.9438173442575e-07, "loss": 0.6139, "step": 2138 }, { "epoch": 0.8766842563655926, "grad_norm": 0.9081393536106999, "learning_rate": 3.91800326956554e-07, "loss": 0.5782, "step": 2139 }, { "epoch": 0.877094113427942, "grad_norm": 0.8912893214854087, "learning_rate": 3.892270509501711e-07, "loss": 0.5964, "step": 2140 }, { "epoch": 0.8775039704902915, "grad_norm": 0.8537810977374987, "learning_rate": 3.8666191094730967e-07, "loss": 0.5903, "step": 2141 }, { "epoch": 0.877913827552641, "grad_norm": 0.8478722234206522, "learning_rate": 3.841049114743239e-07, "loss": 0.582, "step": 2142 }, { "epoch": 0.8783236846149906, "grad_norm": 0.9079503998251378, "learning_rate": 3.815560570432031e-07, "loss": 0.605, "step": 2143 }, { "epoch": 0.8787335416773401, "grad_norm": 0.8689990357451747, "learning_rate": 3.790153521515616e-07, "loss": 0.5846, "step": 2144 }, { "epoch": 0.8791433987396895, "grad_norm": 0.8905331879931616, "learning_rate": 3.7648280128263826e-07, "loss": 0.5848, "step": 2145 }, { "epoch": 0.879553255802039, "grad_norm": 0.894890635130002, "learning_rate": 3.739584089052789e-07, "loss": 0.5843, "step": 2146 }, { "epoch": 0.8799631128643886, "grad_norm": 0.8019063511471083, "learning_rate": 3.71442179473937e-07, "loss": 0.5455, "step": 2147 }, { "epoch": 0.8803729699267381, "grad_norm": 0.896934418364146, "learning_rate": 3.6893411742865814e-07, "loss": 0.632, "step": 2148 }, { "epoch": 0.8807828269890876, "grad_norm": 0.9165526369833742, "learning_rate": 3.664342271950799e-07, "loss": 0.6134, "step": 2149 }, { "epoch": 0.881192684051437, "grad_norm": 0.9155004355315545, "learning_rate": 3.6394251318441763e-07, "loss": 0.6156, "step": 2150 }, { "epoch": 0.8816025411137866, "grad_norm": 0.9391813211051775, "learning_rate": 3.6145897979345977e-07, "loss": 0.5764, "step": 2151 }, { "epoch": 0.8820123981761361, "grad_norm": 0.8994723527287757, "learning_rate": 3.589836314045603e-07, "loss": 0.5737, "step": 2152 }, { "epoch": 0.8824222552384856, "grad_norm": 0.8364339985911291, "learning_rate": 3.5651647238562904e-07, "loss": 0.5634, "step": 2153 }, { "epoch": 0.8828321123008351, "grad_norm": 0.9465523200025411, "learning_rate": 3.540575070901259e-07, "loss": 0.6062, "step": 2154 }, { "epoch": 0.8832419693631846, "grad_norm": 0.9062941811512927, "learning_rate": 3.5160673985705217e-07, "loss": 0.599, "step": 2155 }, { "epoch": 0.8836518264255341, "grad_norm": 0.9294137078704131, "learning_rate": 3.4916417501094424e-07, "loss": 0.6483, "step": 2156 }, { "epoch": 0.8840616834878836, "grad_norm": 0.9513951283838668, "learning_rate": 3.467298168618616e-07, "loss": 0.6688, "step": 2157 }, { "epoch": 0.8844715405502331, "grad_norm": 0.9379112914599437, "learning_rate": 3.4430366970538755e-07, "loss": 0.6007, "step": 2158 }, { "epoch": 0.8848813976125827, "grad_norm": 0.9292153333159985, "learning_rate": 3.4188573782261203e-07, "loss": 0.638, "step": 2159 }, { "epoch": 0.8852912546749321, "grad_norm": 0.9752008243808401, "learning_rate": 3.3947602548013046e-07, "loss": 0.6427, "step": 2160 }, { "epoch": 0.8857011117372816, "grad_norm": 0.9159686043817655, "learning_rate": 3.3707453693003543e-07, "loss": 0.5903, "step": 2161 }, { "epoch": 0.8861109687996311, "grad_norm": 0.9088677004263518, "learning_rate": 3.34681276409905e-07, "loss": 0.6114, "step": 2162 }, { "epoch": 0.8865208258619807, "grad_norm": 0.8777321047979779, "learning_rate": 3.3229624814280216e-07, "loss": 0.5355, "step": 2163 }, { "epoch": 0.8869306829243302, "grad_norm": 0.8948435066121029, "learning_rate": 3.299194563372604e-07, "loss": 0.6086, "step": 2164 }, { "epoch": 0.8873405399866796, "grad_norm": 0.8708502247997444, "learning_rate": 3.275509051872816e-07, "loss": 0.6443, "step": 2165 }, { "epoch": 0.8877503970490291, "grad_norm": 0.8960395039899209, "learning_rate": 3.251905988723253e-07, "loss": 0.5778, "step": 2166 }, { "epoch": 0.8881602541113787, "grad_norm": 0.8914218601779839, "learning_rate": 3.228385415573021e-07, "loss": 0.614, "step": 2167 }, { "epoch": 0.8885701111737282, "grad_norm": 0.907770943992168, "learning_rate": 3.204947373925693e-07, "loss": 0.6366, "step": 2168 }, { "epoch": 0.8889799682360777, "grad_norm": 0.8921631425142479, "learning_rate": 3.181591905139175e-07, "loss": 0.5926, "step": 2169 }, { "epoch": 0.8893898252984271, "grad_norm": 0.912815004743411, "learning_rate": 3.1583190504256956e-07, "loss": 0.5648, "step": 2170 }, { "epoch": 0.8897996823607767, "grad_norm": 0.991567545840345, "learning_rate": 3.135128850851682e-07, "loss": 0.644, "step": 2171 }, { "epoch": 0.8902095394231262, "grad_norm": 0.8768806781594873, "learning_rate": 3.1120213473377346e-07, "loss": 0.58, "step": 2172 }, { "epoch": 0.8906193964854757, "grad_norm": 0.8259518843870569, "learning_rate": 3.08899658065851e-07, "loss": 0.5943, "step": 2173 }, { "epoch": 0.8910292535478253, "grad_norm": 0.8551628657369315, "learning_rate": 3.066054591442691e-07, "loss": 0.5958, "step": 2174 }, { "epoch": 0.8914391106101747, "grad_norm": 0.8894334428951951, "learning_rate": 3.043195420172879e-07, "loss": 0.5688, "step": 2175 }, { "epoch": 0.8918489676725242, "grad_norm": 0.9038904294807689, "learning_rate": 3.0204191071855347e-07, "loss": 0.5756, "step": 2176 }, { "epoch": 0.8922588247348737, "grad_norm": 0.862539534508749, "learning_rate": 2.997725692670928e-07, "loss": 0.5894, "step": 2177 }, { "epoch": 0.8926686817972233, "grad_norm": 0.835359989855224, "learning_rate": 2.97511521667303e-07, "loss": 0.592, "step": 2178 }, { "epoch": 0.8930785388595728, "grad_norm": 0.8598152035388135, "learning_rate": 2.9525877190894894e-07, "loss": 0.6038, "step": 2179 }, { "epoch": 0.8934883959219222, "grad_norm": 0.8231217549573135, "learning_rate": 2.93014323967149e-07, "loss": 0.5718, "step": 2180 }, { "epoch": 0.8938982529842717, "grad_norm": 0.9011508168023575, "learning_rate": 2.9077818180237693e-07, "loss": 0.6409, "step": 2181 }, { "epoch": 0.8943081100466213, "grad_norm": 0.9656620545116291, "learning_rate": 2.885503493604475e-07, "loss": 0.5865, "step": 2182 }, { "epoch": 0.8947179671089708, "grad_norm": 0.8614438601601626, "learning_rate": 2.8633083057251374e-07, "loss": 0.5414, "step": 2183 }, { "epoch": 0.8951278241713202, "grad_norm": 0.8848192477588062, "learning_rate": 2.84119629355058e-07, "loss": 0.5739, "step": 2184 }, { "epoch": 0.8955376812336697, "grad_norm": 0.8762094716731819, "learning_rate": 2.819167496098868e-07, "loss": 0.5478, "step": 2185 }, { "epoch": 0.8959475382960193, "grad_norm": 0.8517249992910771, "learning_rate": 2.7972219522412194e-07, "loss": 0.5693, "step": 2186 }, { "epoch": 0.8963573953583688, "grad_norm": 0.9159611630705358, "learning_rate": 2.775359700701946e-07, "loss": 0.6041, "step": 2187 }, { "epoch": 0.8967672524207183, "grad_norm": 0.9061660958158559, "learning_rate": 2.7535807800583957e-07, "loss": 0.5711, "step": 2188 }, { "epoch": 0.8971771094830677, "grad_norm": 0.8868585837551417, "learning_rate": 2.731885228740866e-07, "loss": 0.5879, "step": 2189 }, { "epoch": 0.8975869665454173, "grad_norm": 0.9154023459272088, "learning_rate": 2.7102730850325276e-07, "loss": 0.6253, "step": 2190 }, { "epoch": 0.8979968236077668, "grad_norm": 0.8442469736843361, "learning_rate": 2.688744387069403e-07, "loss": 0.5259, "step": 2191 }, { "epoch": 0.8984066806701163, "grad_norm": 0.8868201441524003, "learning_rate": 2.667299172840254e-07, "loss": 0.5732, "step": 2192 }, { "epoch": 0.8988165377324658, "grad_norm": 0.8985638806130297, "learning_rate": 2.6459374801865225e-07, "loss": 0.6011, "step": 2193 }, { "epoch": 0.8992263947948153, "grad_norm": 0.8826843072920587, "learning_rate": 2.6246593468022883e-07, "loss": 0.6338, "step": 2194 }, { "epoch": 0.8996362518571648, "grad_norm": 0.8064687881812549, "learning_rate": 2.603464810234185e-07, "loss": 0.5607, "step": 2195 }, { "epoch": 0.9000461089195143, "grad_norm": 0.9050182957204542, "learning_rate": 2.582353907881313e-07, "loss": 0.6301, "step": 2196 }, { "epoch": 0.9004559659818638, "grad_norm": 0.8627160226238599, "learning_rate": 2.5613266769952183e-07, "loss": 0.6495, "step": 2197 }, { "epoch": 0.9008658230442134, "grad_norm": 0.8625969582839287, "learning_rate": 2.5403831546797876e-07, "loss": 0.5931, "step": 2198 }, { "epoch": 0.9012756801065628, "grad_norm": 0.931359664995304, "learning_rate": 2.5195233778912086e-07, "loss": 0.6409, "step": 2199 }, { "epoch": 0.9016855371689123, "grad_norm": 0.8919011820615685, "learning_rate": 2.4987473834378826e-07, "loss": 0.5461, "step": 2200 }, { "epoch": 0.9020953942312618, "grad_norm": 0.8639643486013949, "learning_rate": 2.4780552079803897e-07, "loss": 0.5896, "step": 2201 }, { "epoch": 0.9025052512936114, "grad_norm": 0.8791783613650447, "learning_rate": 2.457446888031384e-07, "loss": 0.5799, "step": 2202 }, { "epoch": 0.9029151083559609, "grad_norm": 0.8805854914097088, "learning_rate": 2.436922459955576e-07, "loss": 0.602, "step": 2203 }, { "epoch": 0.9033249654183103, "grad_norm": 0.896398999607441, "learning_rate": 2.41648195996963e-07, "loss": 0.5575, "step": 2204 }, { "epoch": 0.9037348224806598, "grad_norm": 0.9021699143715028, "learning_rate": 2.396125424142104e-07, "loss": 0.5814, "step": 2205 }, { "epoch": 0.9041446795430094, "grad_norm": 0.8556319676261277, "learning_rate": 2.3758528883934097e-07, "loss": 0.5975, "step": 2206 }, { "epoch": 0.9045545366053589, "grad_norm": 0.8930638297897249, "learning_rate": 2.3556643884957432e-07, "loss": 0.5901, "step": 2207 }, { "epoch": 0.9049643936677084, "grad_norm": 0.8248636273967198, "learning_rate": 2.3355599600729916e-07, "loss": 0.5651, "step": 2208 }, { "epoch": 0.9053742507300578, "grad_norm": 0.9399108016991761, "learning_rate": 2.3155396386006945e-07, "loss": 0.6109, "step": 2209 }, { "epoch": 0.9057841077924074, "grad_norm": 0.8298660818850947, "learning_rate": 2.2956034594060095e-07, "loss": 0.5835, "step": 2210 }, { "epoch": 0.9061939648547569, "grad_norm": 0.9043365371236329, "learning_rate": 2.2757514576675854e-07, "loss": 0.627, "step": 2211 }, { "epoch": 0.9066038219171064, "grad_norm": 0.855990680644192, "learning_rate": 2.2559836684155456e-07, "loss": 0.6022, "step": 2212 }, { "epoch": 0.907013678979456, "grad_norm": 0.8745170387325478, "learning_rate": 2.2363001265314376e-07, "loss": 0.5505, "step": 2213 }, { "epoch": 0.9074235360418054, "grad_norm": 0.8505092237965338, "learning_rate": 2.2167008667481005e-07, "loss": 0.5901, "step": 2214 }, { "epoch": 0.9078333931041549, "grad_norm": 0.9336689072863538, "learning_rate": 2.1971859236497084e-07, "loss": 0.6144, "step": 2215 }, { "epoch": 0.9082432501665044, "grad_norm": 0.8626562936747512, "learning_rate": 2.1777553316715994e-07, "loss": 0.6447, "step": 2216 }, { "epoch": 0.908653107228854, "grad_norm": 0.805936686561404, "learning_rate": 2.1584091251003192e-07, "loss": 0.5468, "step": 2217 }, { "epoch": 0.9090629642912035, "grad_norm": 0.9002926936296656, "learning_rate": 2.1391473380734663e-07, "loss": 0.6024, "step": 2218 }, { "epoch": 0.9094728213535529, "grad_norm": 0.8401308004532361, "learning_rate": 2.1199700045797077e-07, "loss": 0.5887, "step": 2219 }, { "epoch": 0.9098826784159024, "grad_norm": 0.907336848845651, "learning_rate": 2.1008771584586584e-07, "loss": 0.6439, "step": 2220 }, { "epoch": 0.910292535478252, "grad_norm": 0.8438706520430641, "learning_rate": 2.0818688334008853e-07, "loss": 0.5869, "step": 2221 }, { "epoch": 0.9107023925406015, "grad_norm": 0.9056092884628097, "learning_rate": 2.06294506294778e-07, "loss": 0.6176, "step": 2222 }, { "epoch": 0.911112249602951, "grad_norm": 0.912616209517273, "learning_rate": 2.044105880491548e-07, "loss": 0.6026, "step": 2223 }, { "epoch": 0.9115221066653004, "grad_norm": 0.8731489404331498, "learning_rate": 2.0253513192751374e-07, "loss": 0.5726, "step": 2224 }, { "epoch": 0.91193196372765, "grad_norm": 0.8843790494982418, "learning_rate": 2.006681412392153e-07, "loss": 0.6086, "step": 2225 }, { "epoch": 0.9123418207899995, "grad_norm": 0.9074996667194064, "learning_rate": 1.9880961927868537e-07, "loss": 0.6001, "step": 2226 }, { "epoch": 0.912751677852349, "grad_norm": 0.8409770811765664, "learning_rate": 1.9695956932540394e-07, "loss": 0.5844, "step": 2227 }, { "epoch": 0.9131615349146985, "grad_norm": 0.829396446825123, "learning_rate": 1.9511799464390247e-07, "loss": 0.5739, "step": 2228 }, { "epoch": 0.913571391977048, "grad_norm": 0.9080808194958845, "learning_rate": 1.932848984837582e-07, "loss": 0.6373, "step": 2229 }, { "epoch": 0.9139812490393975, "grad_norm": 0.9380079611876301, "learning_rate": 1.9146028407958483e-07, "loss": 0.613, "step": 2230 }, { "epoch": 0.914391106101747, "grad_norm": 0.8449288491132558, "learning_rate": 1.8964415465103246e-07, "loss": 0.5757, "step": 2231 }, { "epoch": 0.9148009631640965, "grad_norm": 0.868968197813747, "learning_rate": 1.8783651340277597e-07, "loss": 0.6156, "step": 2232 }, { "epoch": 0.9152108202264461, "grad_norm": 0.9653448109239274, "learning_rate": 1.8603736352451608e-07, "loss": 0.6563, "step": 2233 }, { "epoch": 0.9156206772887955, "grad_norm": 0.8731475485461045, "learning_rate": 1.8424670819096545e-07, "loss": 0.6039, "step": 2234 }, { "epoch": 0.916030534351145, "grad_norm": 0.8706784092959162, "learning_rate": 1.8246455056185164e-07, "loss": 0.5647, "step": 2235 }, { "epoch": 0.9164403914134945, "grad_norm": 0.8675145656154148, "learning_rate": 1.8069089378190408e-07, "loss": 0.5692, "step": 2236 }, { "epoch": 0.9168502484758441, "grad_norm": 0.8890308913708053, "learning_rate": 1.7892574098085535e-07, "loss": 0.5455, "step": 2237 }, { "epoch": 0.9172601055381936, "grad_norm": 0.7883433833410753, "learning_rate": 1.771690952734284e-07, "loss": 0.5606, "step": 2238 }, { "epoch": 0.917669962600543, "grad_norm": 0.890562346363791, "learning_rate": 1.754209597593387e-07, "loss": 0.6256, "step": 2239 }, { "epoch": 0.9180798196628925, "grad_norm": 0.841634222943691, "learning_rate": 1.736813375232821e-07, "loss": 0.5424, "step": 2240 }, { "epoch": 0.9184896767252421, "grad_norm": 0.8599228801314799, "learning_rate": 1.7195023163493253e-07, "loss": 0.5672, "step": 2241 }, { "epoch": 0.9188995337875916, "grad_norm": 0.8882252414544117, "learning_rate": 1.702276451489382e-07, "loss": 0.5642, "step": 2242 }, { "epoch": 0.9193093908499411, "grad_norm": 0.8949435011003288, "learning_rate": 1.685135811049121e-07, "loss": 0.6179, "step": 2243 }, { "epoch": 0.9197192479122905, "grad_norm": 0.9213020013195863, "learning_rate": 1.66808042527431e-07, "loss": 0.6066, "step": 2244 }, { "epoch": 0.9201291049746401, "grad_norm": 0.9168183756529438, "learning_rate": 1.6511103242602466e-07, "loss": 0.5991, "step": 2245 }, { "epoch": 0.9205389620369896, "grad_norm": 0.9229632531950482, "learning_rate": 1.634225537951778e-07, "loss": 0.5933, "step": 2246 }, { "epoch": 0.9209488190993391, "grad_norm": 0.8687429455707264, "learning_rate": 1.6174260961431875e-07, "loss": 0.5611, "step": 2247 }, { "epoch": 0.9213586761616887, "grad_norm": 0.9108550686658172, "learning_rate": 1.6007120284781518e-07, "loss": 0.5924, "step": 2248 }, { "epoch": 0.9217685332240381, "grad_norm": 0.8932591067780931, "learning_rate": 1.5840833644497176e-07, "loss": 0.5983, "step": 2249 }, { "epoch": 0.9221783902863876, "grad_norm": 0.9172263350504684, "learning_rate": 1.567540133400225e-07, "loss": 0.6306, "step": 2250 }, { "epoch": 0.9225882473487371, "grad_norm": 0.8520371736226666, "learning_rate": 1.5510823645212726e-07, "loss": 0.5904, "step": 2251 }, { "epoch": 0.9229981044110867, "grad_norm": 0.8228445604611377, "learning_rate": 1.5347100868536246e-07, "loss": 0.5905, "step": 2252 }, { "epoch": 0.9234079614734362, "grad_norm": 0.9038144726891748, "learning_rate": 1.5184233292872274e-07, "loss": 0.5792, "step": 2253 }, { "epoch": 0.9238178185357856, "grad_norm": 0.9327891269475601, "learning_rate": 1.5022221205611033e-07, "loss": 0.6015, "step": 2254 }, { "epoch": 0.9242276755981351, "grad_norm": 0.9654996912936659, "learning_rate": 1.4861064892633114e-07, "loss": 0.6148, "step": 2255 }, { "epoch": 0.9246375326604847, "grad_norm": 0.8987113726352637, "learning_rate": 1.4700764638309217e-07, "loss": 0.545, "step": 2256 }, { "epoch": 0.9250473897228342, "grad_norm": 0.8901760171825935, "learning_rate": 1.4541320725499296e-07, "loss": 0.6067, "step": 2257 }, { "epoch": 0.9254572467851837, "grad_norm": 0.840695682736227, "learning_rate": 1.4382733435552464e-07, "loss": 0.5538, "step": 2258 }, { "epoch": 0.9258671038475331, "grad_norm": 0.8446941041815835, "learning_rate": 1.422500304830604e-07, "loss": 0.5568, "step": 2259 }, { "epoch": 0.9262769609098827, "grad_norm": 0.8767402239774456, "learning_rate": 1.4068129842085386e-07, "loss": 0.6219, "step": 2260 }, { "epoch": 0.9266868179722322, "grad_norm": 0.9104611764693494, "learning_rate": 1.3912114093703244e-07, "loss": 0.5981, "step": 2261 }, { "epoch": 0.9270966750345817, "grad_norm": 0.8898259556199708, "learning_rate": 1.3756956078459505e-07, "loss": 0.6055, "step": 2262 }, { "epoch": 0.9275065320969312, "grad_norm": 0.8675290840676714, "learning_rate": 1.3602656070140275e-07, "loss": 0.6178, "step": 2263 }, { "epoch": 0.9279163891592807, "grad_norm": 0.9290970213968772, "learning_rate": 1.3449214341017814e-07, "loss": 0.6105, "step": 2264 }, { "epoch": 0.9283262462216302, "grad_norm": 0.9241579322086784, "learning_rate": 1.329663116184987e-07, "loss": 0.6386, "step": 2265 }, { "epoch": 0.9287361032839797, "grad_norm": 0.8012605952212051, "learning_rate": 1.3144906801879242e-07, "loss": 0.5351, "step": 2266 }, { "epoch": 0.9291459603463292, "grad_norm": 0.9452384308995927, "learning_rate": 1.2994041528833267e-07, "loss": 0.6517, "step": 2267 }, { "epoch": 0.9295558174086788, "grad_norm": 0.8750400102607148, "learning_rate": 1.2844035608923222e-07, "loss": 0.5793, "step": 2268 }, { "epoch": 0.9299656744710282, "grad_norm": 0.8887862419757315, "learning_rate": 1.2694889306844372e-07, "loss": 0.5809, "step": 2269 }, { "epoch": 0.9303755315333777, "grad_norm": 0.8400610572714103, "learning_rate": 1.2546602885774695e-07, "loss": 0.568, "step": 2270 }, { "epoch": 0.9307853885957272, "grad_norm": 0.9412563897567954, "learning_rate": 1.2399176607375162e-07, "loss": 0.6145, "step": 2271 }, { "epoch": 0.9311952456580768, "grad_norm": 0.9165564032102114, "learning_rate": 1.2252610731788793e-07, "loss": 0.6448, "step": 2272 }, { "epoch": 0.9316051027204263, "grad_norm": 0.8821969813915393, "learning_rate": 1.2106905517640488e-07, "loss": 0.6037, "step": 2273 }, { "epoch": 0.9320149597827757, "grad_norm": 0.8776856854062772, "learning_rate": 1.196206122203647e-07, "loss": 0.5762, "step": 2274 }, { "epoch": 0.9324248168451252, "grad_norm": 0.9122861546568017, "learning_rate": 1.1818078100563569e-07, "loss": 0.5558, "step": 2275 }, { "epoch": 0.9328346739074748, "grad_norm": 0.9270168667620055, "learning_rate": 1.1674956407289439e-07, "loss": 0.6104, "step": 2276 }, { "epoch": 0.9332445309698243, "grad_norm": 0.9069148810167252, "learning_rate": 1.1532696394761234e-07, "loss": 0.577, "step": 2277 }, { "epoch": 0.9336543880321738, "grad_norm": 0.9069752860517225, "learning_rate": 1.1391298314006038e-07, "loss": 0.5934, "step": 2278 }, { "epoch": 0.9340642450945232, "grad_norm": 0.9079174024955636, "learning_rate": 1.1250762414529604e-07, "loss": 0.6308, "step": 2279 }, { "epoch": 0.9344741021568728, "grad_norm": 0.9249791900521629, "learning_rate": 1.1111088944316673e-07, "loss": 0.6462, "step": 2280 }, { "epoch": 0.9348839592192223, "grad_norm": 0.8056245412279032, "learning_rate": 1.0972278149829929e-07, "loss": 0.5214, "step": 2281 }, { "epoch": 0.9352938162815718, "grad_norm": 0.8876982096998818, "learning_rate": 1.083433027600983e-07, "loss": 0.6176, "step": 2282 }, { "epoch": 0.9357036733439213, "grad_norm": 0.8419674254560541, "learning_rate": 1.0697245566274384e-07, "loss": 0.5804, "step": 2283 }, { "epoch": 0.9361135304062708, "grad_norm": 0.914294174247639, "learning_rate": 1.0561024262518204e-07, "loss": 0.6249, "step": 2284 }, { "epoch": 0.9365233874686203, "grad_norm": 0.8610601351100121, "learning_rate": 1.0425666605112516e-07, "loss": 0.6229, "step": 2285 }, { "epoch": 0.9369332445309698, "grad_norm": 0.834943389588605, "learning_rate": 1.0291172832904539e-07, "loss": 0.5764, "step": 2286 }, { "epoch": 0.9373431015933193, "grad_norm": 0.8441512489767133, "learning_rate": 1.01575431832171e-07, "loss": 0.5984, "step": 2287 }, { "epoch": 0.9377529586556689, "grad_norm": 0.8527082609168035, "learning_rate": 1.0024777891848358e-07, "loss": 0.6173, "step": 2288 }, { "epoch": 0.9381628157180183, "grad_norm": 0.8773595679522962, "learning_rate": 9.892877193071082e-08, "loss": 0.5824, "step": 2289 }, { "epoch": 0.9385726727803678, "grad_norm": 0.8888954964591689, "learning_rate": 9.761841319632426e-08, "loss": 0.642, "step": 2290 }, { "epoch": 0.9389825298427174, "grad_norm": 0.8713948689678723, "learning_rate": 9.631670502753654e-08, "loss": 0.617, "step": 2291 }, { "epoch": 0.9393923869050669, "grad_norm": 0.8468216742066577, "learning_rate": 9.50236497212953e-08, "loss": 0.5449, "step": 2292 }, { "epoch": 0.9398022439674164, "grad_norm": 0.9821663137953306, "learning_rate": 9.37392495592776e-08, "loss": 0.6579, "step": 2293 }, { "epoch": 0.9402121010297658, "grad_norm": 0.8910598437990892, "learning_rate": 9.246350680789163e-08, "loss": 0.6089, "step": 2294 }, { "epoch": 0.9406219580921154, "grad_norm": 0.8883509377232548, "learning_rate": 9.119642371826498e-08, "loss": 0.5924, "step": 2295 }, { "epoch": 0.9410318151544649, "grad_norm": 0.8640923361942259, "learning_rate": 8.993800252624863e-08, "loss": 0.6262, "step": 2296 }, { "epoch": 0.9414416722168144, "grad_norm": 0.902417695314022, "learning_rate": 8.868824545240573e-08, "loss": 0.5699, "step": 2297 }, { "epoch": 0.9418515292791639, "grad_norm": 0.9128185889642079, "learning_rate": 8.744715470201337e-08, "loss": 0.566, "step": 2298 }, { "epoch": 0.9422613863415134, "grad_norm": 0.8768706340474982, "learning_rate": 8.621473246505475e-08, "loss": 0.6065, "step": 2299 }, { "epoch": 0.9426712434038629, "grad_norm": 0.9077420477878166, "learning_rate": 8.49909809162186e-08, "loss": 0.6318, "step": 2300 }, { "epoch": 0.9430811004662124, "grad_norm": 0.9772658966703641, "learning_rate": 8.377590221489207e-08, "loss": 0.661, "step": 2301 }, { "epoch": 0.9434909575285619, "grad_norm": 0.905172600220024, "learning_rate": 8.256949850516006e-08, "loss": 0.5991, "step": 2302 }, { "epoch": 0.9439008145909115, "grad_norm": 0.9250400157380279, "learning_rate": 8.137177191579748e-08, "loss": 0.551, "step": 2303 }, { "epoch": 0.9443106716532609, "grad_norm": 0.9065920302075992, "learning_rate": 8.018272456027043e-08, "loss": 0.6171, "step": 2304 }, { "epoch": 0.9447205287156104, "grad_norm": 0.9301284453710261, "learning_rate": 7.900235853672888e-08, "loss": 0.6605, "step": 2305 }, { "epoch": 0.9451303857779599, "grad_norm": 0.8647951395657065, "learning_rate": 7.783067592800398e-08, "loss": 0.6005, "step": 2306 }, { "epoch": 0.9455402428403095, "grad_norm": 0.8734467531080343, "learning_rate": 7.666767880160464e-08, "loss": 0.6256, "step": 2307 }, { "epoch": 0.945950099902659, "grad_norm": 0.9155509558314178, "learning_rate": 7.551336920971375e-08, "loss": 0.602, "step": 2308 }, { "epoch": 0.9463599569650084, "grad_norm": 0.949971453100473, "learning_rate": 7.436774918918365e-08, "loss": 0.6555, "step": 2309 }, { "epoch": 0.9467698140273579, "grad_norm": 0.9324579118271419, "learning_rate": 7.32308207615351e-08, "loss": 0.6063, "step": 2310 }, { "epoch": 0.9471796710897075, "grad_norm": 0.9234211582716132, "learning_rate": 7.210258593294994e-08, "loss": 0.6244, "step": 2311 }, { "epoch": 0.947589528152057, "grad_norm": 0.9268170927094423, "learning_rate": 7.09830466942718e-08, "loss": 0.5994, "step": 2312 }, { "epoch": 0.9479993852144065, "grad_norm": 0.8381094893271532, "learning_rate": 6.987220502099823e-08, "loss": 0.5708, "step": 2313 }, { "epoch": 0.9484092422767559, "grad_norm": 0.8694507186799701, "learning_rate": 6.877006287328125e-08, "loss": 0.6137, "step": 2314 }, { "epoch": 0.9488190993391055, "grad_norm": 0.861676823582488, "learning_rate": 6.767662219591964e-08, "loss": 0.59, "step": 2315 }, { "epoch": 0.949228956401455, "grad_norm": 0.9167524695404661, "learning_rate": 6.659188491836111e-08, "loss": 0.6286, "step": 2316 }, { "epoch": 0.9496388134638045, "grad_norm": 0.9587984777498696, "learning_rate": 6.551585295469287e-08, "loss": 0.6528, "step": 2317 }, { "epoch": 0.950048670526154, "grad_norm": 0.9004675354827532, "learning_rate": 6.444852820364222e-08, "loss": 0.617, "step": 2318 }, { "epoch": 0.9504585275885035, "grad_norm": 0.8442960036330057, "learning_rate": 6.338991254857207e-08, "loss": 0.5415, "step": 2319 }, { "epoch": 0.950868384650853, "grad_norm": 0.8265172314337503, "learning_rate": 6.234000785747708e-08, "loss": 0.5775, "step": 2320 }, { "epoch": 0.9512782417132025, "grad_norm": 0.8986407572463885, "learning_rate": 6.129881598298138e-08, "loss": 0.6076, "step": 2321 }, { "epoch": 0.951688098775552, "grad_norm": 0.8996509331375855, "learning_rate": 6.026633876233423e-08, "loss": 0.596, "step": 2322 }, { "epoch": 0.9520979558379016, "grad_norm": 0.9062177156766617, "learning_rate": 5.9242578017408844e-08, "loss": 0.555, "step": 2323 }, { "epoch": 0.952507812900251, "grad_norm": 0.8659782565445687, "learning_rate": 5.8227535554694624e-08, "loss": 0.604, "step": 2324 }, { "epoch": 0.9529176699626005, "grad_norm": 0.8988424636780541, "learning_rate": 5.7221213165300496e-08, "loss": 0.6444, "step": 2325 }, { "epoch": 0.95332752702495, "grad_norm": 0.8624105366051791, "learning_rate": 5.622361262494602e-08, "loss": 0.5997, "step": 2326 }, { "epoch": 0.9537373840872996, "grad_norm": 0.9179933787434269, "learning_rate": 5.5234735693960875e-08, "loss": 0.5823, "step": 2327 }, { "epoch": 0.9541472411496491, "grad_norm": 0.9499655068040062, "learning_rate": 5.425458411728202e-08, "loss": 0.6409, "step": 2328 }, { "epoch": 0.9545570982119985, "grad_norm": 0.9229608201449481, "learning_rate": 5.3283159624448745e-08, "loss": 0.5739, "step": 2329 }, { "epoch": 0.954966955274348, "grad_norm": 0.824768416924879, "learning_rate": 5.2320463929603215e-08, "loss": 0.594, "step": 2330 }, { "epoch": 0.9553768123366976, "grad_norm": 0.8809507156983671, "learning_rate": 5.1366498731481584e-08, "loss": 0.6453, "step": 2331 }, { "epoch": 0.9557866693990471, "grad_norm": 0.9194472440152661, "learning_rate": 5.042126571341843e-08, "loss": 0.6378, "step": 2332 }, { "epoch": 0.9561965264613966, "grad_norm": 0.8554602955934468, "learning_rate": 4.948476654333567e-08, "loss": 0.5383, "step": 2333 }, { "epoch": 0.956606383523746, "grad_norm": 0.9004291161138254, "learning_rate": 4.8557002873747536e-08, "loss": 0.5861, "step": 2334 }, { "epoch": 0.9570162405860956, "grad_norm": 0.9153048263506908, "learning_rate": 4.763797634175227e-08, "loss": 0.6046, "step": 2335 }, { "epoch": 0.9574260976484451, "grad_norm": 0.9679332787170581, "learning_rate": 4.6727688569029316e-08, "loss": 0.6114, "step": 2336 }, { "epoch": 0.9578359547107946, "grad_norm": 0.9275439921168828, "learning_rate": 4.5826141161841584e-08, "loss": 0.5258, "step": 2337 }, { "epoch": 0.9582458117731442, "grad_norm": 0.8751419537189106, "learning_rate": 4.4933335711025986e-08, "loss": 0.5625, "step": 2338 }, { "epoch": 0.9586556688354936, "grad_norm": 0.9474945207238491, "learning_rate": 4.4049273791995086e-08, "loss": 0.5712, "step": 2339 }, { "epoch": 0.9590655258978431, "grad_norm": 0.914066556062614, "learning_rate": 4.3173956964732145e-08, "loss": 0.5977, "step": 2340 }, { "epoch": 0.9594753829601926, "grad_norm": 0.8597289406682977, "learning_rate": 4.2307386773791094e-08, "loss": 0.5767, "step": 2341 }, { "epoch": 0.9598852400225422, "grad_norm": 0.8976217269203397, "learning_rate": 4.144956474828876e-08, "loss": 0.5846, "step": 2342 }, { "epoch": 0.9602950970848917, "grad_norm": 0.9143565050102782, "learning_rate": 4.060049240190711e-08, "loss": 0.6449, "step": 2343 }, { "epoch": 0.9607049541472411, "grad_norm": 0.8379017470345417, "learning_rate": 3.976017123288989e-08, "loss": 0.5828, "step": 2344 }, { "epoch": 0.9611148112095906, "grad_norm": 0.8531628208079497, "learning_rate": 3.892860272403598e-08, "loss": 0.5795, "step": 2345 }, { "epoch": 0.9615246682719402, "grad_norm": 0.8635319694297592, "learning_rate": 3.810578834270273e-08, "loss": 0.6154, "step": 2346 }, { "epoch": 0.9619345253342897, "grad_norm": 0.8211794141368746, "learning_rate": 3.729172954079707e-08, "loss": 0.5939, "step": 2347 }, { "epoch": 0.9623443823966392, "grad_norm": 0.8646688314605971, "learning_rate": 3.6486427754778844e-08, "loss": 0.552, "step": 2348 }, { "epoch": 0.9627542394589886, "grad_norm": 0.8219208782630941, "learning_rate": 3.568988440565413e-08, "loss": 0.6019, "step": 2349 }, { "epoch": 0.9631640965213382, "grad_norm": 0.8567171878903391, "learning_rate": 3.490210089897417e-08, "loss": 0.5696, "step": 2350 }, { "epoch": 0.9635739535836877, "grad_norm": 0.9064267553439412, "learning_rate": 3.4123078624834214e-08, "loss": 0.5769, "step": 2351 }, { "epoch": 0.9639838106460372, "grad_norm": 0.8716584896601687, "learning_rate": 3.3352818957868574e-08, "loss": 0.6241, "step": 2352 }, { "epoch": 0.9643936677083867, "grad_norm": 0.8557714406315321, "learning_rate": 3.25913232572489e-08, "loss": 0.6049, "step": 2353 }, { "epoch": 0.9648035247707362, "grad_norm": 0.8590011666898412, "learning_rate": 3.1838592866684225e-08, "loss": 0.5585, "step": 2354 }, { "epoch": 0.9652133818330857, "grad_norm": 0.9503657133158787, "learning_rate": 3.10946291144143e-08, "loss": 0.6402, "step": 2355 }, { "epoch": 0.9656232388954352, "grad_norm": 0.7910430938637529, "learning_rate": 3.035943331321123e-08, "loss": 0.548, "step": 2356 }, { "epoch": 0.9660330959577847, "grad_norm": 0.8943750145271225, "learning_rate": 2.963300676037506e-08, "loss": 0.5754, "step": 2357 }, { "epoch": 0.9664429530201343, "grad_norm": 0.9176274478817221, "learning_rate": 2.891535073773155e-08, "loss": 0.6405, "step": 2358 }, { "epoch": 0.9668528100824837, "grad_norm": 0.834748536404841, "learning_rate": 2.8206466511631615e-08, "loss": 0.5959, "step": 2359 }, { "epoch": 0.9672626671448332, "grad_norm": 0.8704355233675241, "learning_rate": 2.750635533294521e-08, "loss": 0.564, "step": 2360 }, { "epoch": 0.9676725242071827, "grad_norm": 0.9225286211014754, "learning_rate": 2.6815018437064132e-08, "loss": 0.6466, "step": 2361 }, { "epoch": 0.9680823812695323, "grad_norm": 0.885968429700448, "learning_rate": 2.6132457043896442e-08, "loss": 0.588, "step": 2362 }, { "epoch": 0.9684922383318818, "grad_norm": 0.9020254066795549, "learning_rate": 2.5458672357865366e-08, "loss": 0.5856, "step": 2363 }, { "epoch": 0.9689020953942312, "grad_norm": 0.871490505263075, "learning_rate": 2.4793665567907076e-08, "loss": 0.6044, "step": 2364 }, { "epoch": 0.9693119524565807, "grad_norm": 0.8306472118675584, "learning_rate": 2.4137437847467915e-08, "loss": 0.5263, "step": 2365 }, { "epoch": 0.9697218095189303, "grad_norm": 0.8928647657937997, "learning_rate": 2.3489990354504945e-08, "loss": 0.5799, "step": 2366 }, { "epoch": 0.9701316665812798, "grad_norm": 0.8578479917184003, "learning_rate": 2.2851324231479842e-08, "loss": 0.54, "step": 2367 }, { "epoch": 0.9705415236436293, "grad_norm": 0.925565184585107, "learning_rate": 2.2221440605359468e-08, "loss": 0.6175, "step": 2368 }, { "epoch": 0.9709513807059788, "grad_norm": 0.896395719711481, "learning_rate": 2.1600340587614176e-08, "loss": 0.6061, "step": 2369 }, { "epoch": 0.9713612377683283, "grad_norm": 0.9020754000193265, "learning_rate": 2.0988025274213954e-08, "loss": 0.614, "step": 2370 }, { "epoch": 0.9717710948306778, "grad_norm": 0.9531579542645661, "learning_rate": 2.0384495745630062e-08, "loss": 0.6516, "step": 2371 }, { "epoch": 0.9721809518930273, "grad_norm": 0.9212822478045413, "learning_rate": 1.9789753066826735e-08, "loss": 0.6095, "step": 2372 }, { "epoch": 0.9725908089553769, "grad_norm": 0.9108993906892016, "learning_rate": 1.920379828726726e-08, "loss": 0.597, "step": 2373 }, { "epoch": 0.9730006660177263, "grad_norm": 0.8939594384374872, "learning_rate": 1.8626632440904568e-08, "loss": 0.5845, "step": 2374 }, { "epoch": 0.9734105230800758, "grad_norm": 0.898857340610807, "learning_rate": 1.8058256546186205e-08, "loss": 0.6047, "step": 2375 }, { "epoch": 0.9738203801424253, "grad_norm": 0.8323091408151926, "learning_rate": 1.7498671606047125e-08, "loss": 0.5964, "step": 2376 }, { "epoch": 0.9742302372047749, "grad_norm": 0.8740383559324798, "learning_rate": 1.6947878607910806e-08, "loss": 0.604, "step": 2377 }, { "epoch": 0.9746400942671244, "grad_norm": 0.8245525632863494, "learning_rate": 1.640587852368647e-08, "loss": 0.5906, "step": 2378 }, { "epoch": 0.9750499513294738, "grad_norm": 0.9188340963380507, "learning_rate": 1.5872672309767966e-08, "loss": 0.5822, "step": 2379 }, { "epoch": 0.9754598083918233, "grad_norm": 0.9223884784159965, "learning_rate": 1.534826090703212e-08, "loss": 0.5845, "step": 2380 }, { "epoch": 0.9758696654541729, "grad_norm": 0.8729721292511555, "learning_rate": 1.483264524083594e-08, "loss": 0.6012, "step": 2381 }, { "epoch": 0.9762795225165224, "grad_norm": 0.9523020964368102, "learning_rate": 1.4325826221016637e-08, "loss": 0.6202, "step": 2382 }, { "epoch": 0.9766893795788719, "grad_norm": 0.8245771472234896, "learning_rate": 1.3827804741888273e-08, "loss": 0.5575, "step": 2383 }, { "epoch": 0.9770992366412213, "grad_norm": 0.9001107512953423, "learning_rate": 1.333858168224178e-08, "loss": 0.5898, "step": 2384 }, { "epoch": 0.9775090937035709, "grad_norm": 0.9720443380585607, "learning_rate": 1.2858157905342173e-08, "loss": 0.6503, "step": 2385 }, { "epoch": 0.9779189507659204, "grad_norm": 0.8748119091366308, "learning_rate": 1.2386534258929105e-08, "loss": 0.6161, "step": 2386 }, { "epoch": 0.9783288078282699, "grad_norm": 0.9041736082659865, "learning_rate": 1.1923711575210772e-08, "loss": 0.6153, "step": 2387 }, { "epoch": 0.9787386648906194, "grad_norm": 0.9056889252435393, "learning_rate": 1.1469690670868894e-08, "loss": 0.5744, "step": 2388 }, { "epoch": 0.9791485219529689, "grad_norm": 0.84447713345573, "learning_rate": 1.1024472347052061e-08, "loss": 0.5762, "step": 2389 }, { "epoch": 0.9795583790153184, "grad_norm": 0.9053484413374181, "learning_rate": 1.0588057389375739e-08, "loss": 0.5874, "step": 2390 }, { "epoch": 0.9799682360776679, "grad_norm": 0.9622564674615585, "learning_rate": 1.016044656792281e-08, "loss": 0.5953, "step": 2391 }, { "epoch": 0.9803780931400174, "grad_norm": 0.9131751829748632, "learning_rate": 9.741640637239703e-09, "loss": 0.5718, "step": 2392 }, { "epoch": 0.980787950202367, "grad_norm": 0.9709449881080938, "learning_rate": 9.331640336335822e-09, "loss": 0.6266, "step": 2393 }, { "epoch": 0.9811978072647164, "grad_norm": 0.8382481840645458, "learning_rate": 8.930446388683567e-09, "loss": 0.5629, "step": 2394 }, { "epoch": 0.9816076643270659, "grad_norm": 0.8219882215355527, "learning_rate": 8.538059502214979e-09, "loss": 0.5494, "step": 2395 }, { "epoch": 0.9820175213894154, "grad_norm": 0.914547970011823, "learning_rate": 8.15448036932176e-09, "loss": 0.6665, "step": 2396 }, { "epoch": 0.982427378451765, "grad_norm": 0.860304440172228, "learning_rate": 7.779709666853597e-09, "loss": 0.6182, "step": 2397 }, { "epoch": 0.9828372355141145, "grad_norm": 0.9981164584651809, "learning_rate": 7.413748056117609e-09, "loss": 0.5959, "step": 2398 }, { "epoch": 0.9832470925764639, "grad_norm": 0.929983920436559, "learning_rate": 7.056596182876685e-09, "loss": 0.5868, "step": 2399 }, { "epoch": 0.9836569496388134, "grad_norm": 0.9519524986984178, "learning_rate": 6.708254677347814e-09, "loss": 0.6325, "step": 2400 }, { "epoch": 0.984066806701163, "grad_norm": 0.8983595961929064, "learning_rate": 6.368724154201534e-09, "loss": 0.6267, "step": 2401 }, { "epoch": 0.9844766637635125, "grad_norm": 0.8923132560631897, "learning_rate": 6.038005212561926e-09, "loss": 0.5457, "step": 2402 }, { "epoch": 0.984886520825862, "grad_norm": 0.9362325140174048, "learning_rate": 5.716098436002737e-09, "loss": 0.611, "step": 2403 }, { "epoch": 0.9852963778882114, "grad_norm": 0.8882020975887004, "learning_rate": 5.4030043925501486e-09, "loss": 0.6167, "step": 2404 }, { "epoch": 0.985706234950561, "grad_norm": 0.9099278589940002, "learning_rate": 5.098723634677782e-09, "loss": 0.6278, "step": 2405 }, { "epoch": 0.9861160920129105, "grad_norm": 0.9066082478631138, "learning_rate": 4.803256699308923e-09, "loss": 0.5636, "step": 2406 }, { "epoch": 0.98652594907526, "grad_norm": 0.8683172511266976, "learning_rate": 4.516604107813183e-09, "loss": 0.5605, "step": 2407 }, { "epoch": 0.9869358061376096, "grad_norm": 0.876505228349541, "learning_rate": 4.238766366008173e-09, "loss": 0.5991, "step": 2408 }, { "epoch": 0.987345663199959, "grad_norm": 0.9367594056253123, "learning_rate": 3.96974396415617e-09, "loss": 0.5859, "step": 2409 }, { "epoch": 0.9877555202623085, "grad_norm": 0.9827129023482096, "learning_rate": 3.709537376964112e-09, "loss": 0.617, "step": 2410 }, { "epoch": 0.988165377324658, "grad_norm": 0.8262748818023851, "learning_rate": 3.4581470635836056e-09, "loss": 0.599, "step": 2411 }, { "epoch": 0.9885752343870076, "grad_norm": 0.8877062694205977, "learning_rate": 3.2155734676081463e-09, "loss": 0.5917, "step": 2412 }, { "epoch": 0.9889850914493571, "grad_norm": 0.9182169620634345, "learning_rate": 2.981817017074784e-09, "loss": 0.5972, "step": 2413 }, { "epoch": 0.9893949485117065, "grad_norm": 0.7963827105838494, "learning_rate": 2.7568781244624587e-09, "loss": 0.5616, "step": 2414 }, { "epoch": 0.989804805574056, "grad_norm": 0.8883145036652959, "learning_rate": 2.5407571866886694e-09, "loss": 0.5981, "step": 2415 }, { "epoch": 0.9902146626364056, "grad_norm": 0.9032060904517343, "learning_rate": 2.3334545851139144e-09, "loss": 0.6331, "step": 2416 }, { "epoch": 0.9906245196987551, "grad_norm": 0.8504249947187965, "learning_rate": 2.134970685536697e-09, "loss": 0.5933, "step": 2417 }, { "epoch": 0.9910343767611046, "grad_norm": 0.9170060252922844, "learning_rate": 1.945305838194078e-09, "loss": 0.5861, "step": 2418 }, { "epoch": 0.991444233823454, "grad_norm": 0.8987529161174707, "learning_rate": 1.7644603777616786e-09, "loss": 0.6137, "step": 2419 }, { "epoch": 0.9918540908858036, "grad_norm": 0.852694645729217, "learning_rate": 1.592434623353678e-09, "loss": 0.6285, "step": 2420 }, { "epoch": 0.9922639479481531, "grad_norm": 0.8394860373908164, "learning_rate": 1.429228878519484e-09, "loss": 0.6069, "step": 2421 }, { "epoch": 0.9926738050105026, "grad_norm": 0.9194866246552107, "learning_rate": 1.2748434312470636e-09, "loss": 0.6331, "step": 2422 }, { "epoch": 0.9930836620728521, "grad_norm": 0.9074234634210182, "learning_rate": 1.1292785539585016e-09, "loss": 0.6064, "step": 2423 }, { "epoch": 0.9934935191352016, "grad_norm": 0.9115809914878777, "learning_rate": 9.925345035133316e-10, "loss": 0.596, "step": 2424 }, { "epoch": 0.9939033761975511, "grad_norm": 0.9114461821181523, "learning_rate": 8.646115212040951e-10, "loss": 0.618, "step": 2425 }, { "epoch": 0.9943132332599006, "grad_norm": 0.9050817896486356, "learning_rate": 7.455098327596721e-10, "loss": 0.6139, "step": 2426 }, { "epoch": 0.9947230903222501, "grad_norm": 0.8542881604864829, "learning_rate": 6.352296483430608e-10, "loss": 0.5437, "step": 2427 }, { "epoch": 0.9951329473845997, "grad_norm": 0.919024998111466, "learning_rate": 5.337711625497122e-10, "loss": 0.5898, "step": 2428 }, { "epoch": 0.9955428044469491, "grad_norm": 0.8503120030168121, "learning_rate": 4.4113455440975005e-10, "loss": 0.609, "step": 2429 }, { "epoch": 0.9959526615092986, "grad_norm": 0.8809316679631194, "learning_rate": 3.573199873874167e-10, "loss": 0.5806, "step": 2430 }, { "epoch": 0.9963625185716481, "grad_norm": 0.8775696554476818, "learning_rate": 2.823276093777416e-10, "loss": 0.6019, "step": 2431 }, { "epoch": 0.9967723756339977, "grad_norm": 0.9502617643804362, "learning_rate": 2.1615755270987248e-10, "loss": 0.6439, "step": 2432 }, { "epoch": 0.9971822326963472, "grad_norm": 0.8898227986916293, "learning_rate": 1.5880993414540969e-10, "loss": 0.5851, "step": 2433 }, { "epoch": 0.9975920897586966, "grad_norm": 0.8993616696422122, "learning_rate": 1.1028485487729612e-10, "loss": 0.5879, "step": 2434 }, { "epoch": 0.9980019468210461, "grad_norm": 0.9068389648507963, "learning_rate": 7.058240053203769e-11, "loss": 0.5663, "step": 2435 }, { "epoch": 0.9984118038833957, "grad_norm": 0.883130321589962, "learning_rate": 3.970264116637257e-11, "loss": 0.6552, "step": 2436 }, { "epoch": 0.9988216609457452, "grad_norm": 0.8176722892979932, "learning_rate": 1.7645631270046814e-11, "loss": 0.5423, "step": 2437 }, { "epoch": 0.9992315180080947, "grad_norm": 0.8559341732739159, "learning_rate": 4.411409763593888e-12, "loss": 0.5649, "step": 2438 }, { "epoch": 0.9996413750704441, "grad_norm": 0.771475536335532, "learning_rate": 0.0, "loss": 0.5469, "step": 2439 }, { "epoch": 0.9996413750704441, "step": 2439, "total_flos": 4694577437212672.0, "train_loss": 0.6278860935572476, "train_runtime": 15496.6103, "train_samples_per_second": 40.306, "train_steps_per_second": 0.157 } ], "logging_steps": 1.0, "max_steps": 2439, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4694577437212672.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }